diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:49:45 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:49:45 +0000 |
commit | 2c3c1048746a4622d8c89a29670120dc8fab93c4 (patch) | |
tree | 848558de17fb3008cdf4d861b01ac7781903ce39 /arch/mips/mm | |
parent | Initial commit. (diff) | |
download | linux-upstream.tar.xz linux-upstream.zip |
Adding upstream version 6.1.76.upstream/6.1.76upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
39 files changed, 12049 insertions, 0 deletions
diff --git a/arch/mips/mm/Makefile b/arch/mips/mm/Makefile new file mode 100644 index 000000000..304692391 --- /dev/null +++ b/arch/mips/mm/Makefile @@ -0,0 +1,47 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the Linux/MIPS-specific parts of the memory manager. +# + +obj-y += cache.o +obj-y += context.o +obj-y += extable.o +obj-y += fault.o +obj-y += init.o +obj-y += mmap.o +obj-y += page.o +obj-y += page-funcs.o +obj-y += pgtable.o +obj-y += tlbex.o +obj-y += tlbex-fault.o +obj-y += tlb-funcs.o + +ifdef CONFIG_CPU_MICROMIPS +obj-y += uasm-micromips.o +else +obj-y += uasm-mips.o +endif + +ifndef CONFIG_EVA +obj-y += maccess.o +endif + +obj-$(CONFIG_32BIT) += ioremap.o pgtable-32.o +obj-$(CONFIG_64BIT) += ioremap64.o pgtable-64.o +obj-$(CONFIG_HIGHMEM) += highmem.o +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_DMA_NONCOHERENT) += dma-noncoherent.o + +obj-$(CONFIG_CPU_R3K_TLB) += tlb-r3k.o +obj-$(CONFIG_CPU_R4K_CACHE_TLB) += c-r4k.o cex-gen.o tlb-r4k.o +obj-$(CONFIG_CPU_R3000) += c-r3k.o +obj-$(CONFIG_CPU_SB1) += c-r4k.o cerr-sb1.o cex-sb1.o tlb-r4k.o +obj-$(CONFIG_CPU_CAVIUM_OCTEON) += c-octeon.o cex-oct.o tlb-r4k.o + +obj-$(CONFIG_IP22_CPU_SCACHE) += sc-ip22.o +obj-$(CONFIG_R5000_CPU_SCACHE) += sc-r5k.o +obj-$(CONFIG_RM7000_CPU_SCACHE) += sc-rm7k.o +obj-$(CONFIG_MIPS_CPU_SCACHE) += sc-mips.o +obj-$(CONFIG_SCACHE_DEBUGFS) += sc-debugfs.o + +obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o diff --git a/arch/mips/mm/c-octeon.c b/arch/mips/mm/c-octeon.c new file mode 100644 index 000000000..c7ed589de --- /dev/null +++ b/arch/mips/mm/c-octeon.c @@ -0,0 +1,350 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2005-2007 Cavium Networks + */ +#include <linux/export.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/mm.h> +#include <linux/bitops.h> +#include <linux/cpu.h> +#include <linux/io.h> + +#include <asm/bcache.h> +#include <asm/bootinfo.h> +#include <asm/cacheops.h> +#include <asm/cpu-features.h> +#include <asm/cpu-type.h> +#include <asm/page.h> +#include <asm/r4kcache.h> +#include <asm/traps.h> +#include <asm/mmu_context.h> + +#include <asm/octeon/octeon.h> + +unsigned long long cache_err_dcache[NR_CPUS]; +EXPORT_SYMBOL_GPL(cache_err_dcache); + +/* + * Octeon automatically flushes the dcache on tlb changes, so + * from Linux's viewpoint it acts much like a physically + * tagged cache. No flushing is needed + * + */ +static void octeon_flush_data_cache_page(unsigned long addr) +{ + /* Nothing to do */ +} + +static inline void octeon_local_flush_icache(void) +{ + asm volatile ("synci 0($0)"); +} + +/* + * Flush local I-cache for the specified range. + */ +static void local_octeon_flush_icache_range(unsigned long start, + unsigned long end) +{ + octeon_local_flush_icache(); +} + +/** + * octeon_flush_icache_all_cores - Flush caches as necessary for all cores + * affected by a vma. If no vma is supplied, all cores are flushed. + * + * @vma: VMA to flush or NULL to flush all icaches. + */ +static void octeon_flush_icache_all_cores(struct vm_area_struct *vma) +{ + extern void octeon_send_ipi_single(int cpu, unsigned int action); +#ifdef CONFIG_SMP + int cpu; + cpumask_t mask; +#endif + + mb(); + octeon_local_flush_icache(); +#ifdef CONFIG_SMP + preempt_disable(); + cpu = smp_processor_id(); + + /* + * If we have a vma structure, we only need to worry about + * cores it has been used on + */ + if (vma) + mask = *mm_cpumask(vma->vm_mm); + else + mask = *cpu_online_mask; + cpumask_clear_cpu(cpu, &mask); + for_each_cpu(cpu, &mask) + octeon_send_ipi_single(cpu, SMP_ICACHE_FLUSH); + + preempt_enable(); +#endif +} + + +/* + * Called to flush the icache on all cores + */ +static void octeon_flush_icache_all(void) +{ + octeon_flush_icache_all_cores(NULL); +} + + +/** + * octeon_flush_cache_mm - flush all memory associated with a memory context. + * + * @mm: Memory context to flush + */ +static void octeon_flush_cache_mm(struct mm_struct *mm) +{ + /* + * According to the R4K version of this file, CPUs without + * dcache aliases don't need to do anything here + */ +} + + +/* + * Flush a range of kernel addresses out of the icache + * + */ +static void octeon_flush_icache_range(unsigned long start, unsigned long end) +{ + octeon_flush_icache_all_cores(NULL); +} + + +/** + * octeon_flush_cache_range - Flush a range out of a vma + * + * @vma: VMA to flush + * @start: beginning address for flush + * @end: ending address for flush + */ +static void octeon_flush_cache_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + if (vma->vm_flags & VM_EXEC) + octeon_flush_icache_all_cores(vma); +} + + +/** + * octeon_flush_cache_page - Flush a specific page of a vma + * + * @vma: VMA to flush page for + * @page: Page to flush + * @pfn: Page frame number + */ +static void octeon_flush_cache_page(struct vm_area_struct *vma, + unsigned long page, unsigned long pfn) +{ + if (vma->vm_flags & VM_EXEC) + octeon_flush_icache_all_cores(vma); +} + +static void octeon_flush_kernel_vmap_range(unsigned long vaddr, int size) +{ + BUG(); +} + +/* + * Probe Octeon's caches + * + */ +static void probe_octeon(void) +{ + unsigned long icache_size; + unsigned long dcache_size; + unsigned int config1; + struct cpuinfo_mips *c = ¤t_cpu_data; + int cputype = current_cpu_type(); + + config1 = read_c0_config1(); + switch (cputype) { + case CPU_CAVIUM_OCTEON: + case CPU_CAVIUM_OCTEON_PLUS: + c->icache.linesz = 2 << ((config1 >> 19) & 7); + c->icache.sets = 64 << ((config1 >> 22) & 7); + c->icache.ways = 1 + ((config1 >> 16) & 7); + c->icache.flags |= MIPS_CACHE_VTAG; + icache_size = + c->icache.sets * c->icache.ways * c->icache.linesz; + c->icache.waybit = ffs(icache_size / c->icache.ways) - 1; + c->dcache.linesz = 128; + if (cputype == CPU_CAVIUM_OCTEON_PLUS) + c->dcache.sets = 2; /* CN5XXX has two Dcache sets */ + else + c->dcache.sets = 1; /* CN3XXX has one Dcache set */ + c->dcache.ways = 64; + dcache_size = + c->dcache.sets * c->dcache.ways * c->dcache.linesz; + c->dcache.waybit = ffs(dcache_size / c->dcache.ways) - 1; + c->options |= MIPS_CPU_PREFETCH; + break; + + case CPU_CAVIUM_OCTEON2: + c->icache.linesz = 2 << ((config1 >> 19) & 7); + c->icache.sets = 8; + c->icache.ways = 37; + c->icache.flags |= MIPS_CACHE_VTAG; + icache_size = c->icache.sets * c->icache.ways * c->icache.linesz; + + c->dcache.linesz = 128; + c->dcache.ways = 32; + c->dcache.sets = 8; + dcache_size = c->dcache.sets * c->dcache.ways * c->dcache.linesz; + c->options |= MIPS_CPU_PREFETCH; + break; + + case CPU_CAVIUM_OCTEON3: + c->icache.linesz = 128; + c->icache.sets = 16; + c->icache.ways = 39; + c->icache.flags |= MIPS_CACHE_VTAG; + icache_size = c->icache.sets * c->icache.ways * c->icache.linesz; + + c->dcache.linesz = 128; + c->dcache.ways = 32; + c->dcache.sets = 8; + dcache_size = c->dcache.sets * c->dcache.ways * c->dcache.linesz; + c->options |= MIPS_CPU_PREFETCH; + break; + + default: + panic("Unsupported Cavium Networks CPU type"); + break; + } + + /* compute a couple of other cache variables */ + c->icache.waysize = icache_size / c->icache.ways; + c->dcache.waysize = dcache_size / c->dcache.ways; + + c->icache.sets = icache_size / (c->icache.linesz * c->icache.ways); + c->dcache.sets = dcache_size / (c->dcache.linesz * c->dcache.ways); + + if (smp_processor_id() == 0) { + pr_info("Primary instruction cache %ldkB, %s, %d way, " + "%d sets, linesize %d bytes.\n", + icache_size >> 10, + cpu_has_vtag_icache ? + "virtually tagged" : "physically tagged", + c->icache.ways, c->icache.sets, c->icache.linesz); + + pr_info("Primary data cache %ldkB, %d-way, %d sets, " + "linesize %d bytes.\n", + dcache_size >> 10, c->dcache.ways, + c->dcache.sets, c->dcache.linesz); + } +} + +static void octeon_cache_error_setup(void) +{ + extern char except_vec2_octeon; + set_handler(0x100, &except_vec2_octeon, 0x80); +} + +/* + * Setup the Octeon cache flush routines + * + */ +void octeon_cache_init(void) +{ + probe_octeon(); + + shm_align_mask = PAGE_SIZE - 1; + + flush_cache_all = octeon_flush_icache_all; + __flush_cache_all = octeon_flush_icache_all; + flush_cache_mm = octeon_flush_cache_mm; + flush_cache_page = octeon_flush_cache_page; + flush_cache_range = octeon_flush_cache_range; + flush_icache_all = octeon_flush_icache_all; + flush_data_cache_page = octeon_flush_data_cache_page; + flush_icache_range = octeon_flush_icache_range; + local_flush_icache_range = local_octeon_flush_icache_range; + __flush_icache_user_range = octeon_flush_icache_range; + __local_flush_icache_user_range = local_octeon_flush_icache_range; + + __flush_kernel_vmap_range = octeon_flush_kernel_vmap_range; + + build_clear_page(); + build_copy_page(); + + board_cache_error_setup = octeon_cache_error_setup; +} + +/* + * Handle a cache error exception + */ +static RAW_NOTIFIER_HEAD(co_cache_error_chain); + +int register_co_cache_error_notifier(struct notifier_block *nb) +{ + return raw_notifier_chain_register(&co_cache_error_chain, nb); +} +EXPORT_SYMBOL_GPL(register_co_cache_error_notifier); + +int unregister_co_cache_error_notifier(struct notifier_block *nb) +{ + return raw_notifier_chain_unregister(&co_cache_error_chain, nb); +} +EXPORT_SYMBOL_GPL(unregister_co_cache_error_notifier); + +static void co_cache_error_call_notifiers(unsigned long val) +{ + int rv = raw_notifier_call_chain(&co_cache_error_chain, val, NULL); + if ((rv & ~NOTIFY_STOP_MASK) != NOTIFY_OK) { + u64 dcache_err; + unsigned long coreid = cvmx_get_core_num(); + u64 icache_err = read_octeon_c0_icacheerr(); + + if (val) { + dcache_err = cache_err_dcache[coreid]; + cache_err_dcache[coreid] = 0; + } else { + dcache_err = read_octeon_c0_dcacheerr(); + } + + pr_err("Core%lu: Cache error exception:\n", coreid); + pr_err("cp0_errorepc == %lx\n", read_c0_errorepc()); + if (icache_err & 1) { + pr_err("CacheErr (Icache) == %llx\n", + (unsigned long long)icache_err); + write_octeon_c0_icacheerr(0); + } + if (dcache_err & 1) { + pr_err("CacheErr (Dcache) == %llx\n", + (unsigned long long)dcache_err); + } + } +} + +/* + * Called when the exception is recoverable + */ + +asmlinkage void cache_parity_error_octeon_recoverable(void) +{ + co_cache_error_call_notifiers(0); +} + +/* + * Called when the exception is not recoverable + */ + +asmlinkage void cache_parity_error_octeon_non_recoverable(void) +{ + co_cache_error_call_notifiers(1); + panic("Can't handle cache error: nested exception"); +} diff --git a/arch/mips/mm/c-r3k.c b/arch/mips/mm/c-r3k.c new file mode 100644 index 000000000..df6755ca1 --- /dev/null +++ b/arch/mips/mm/c-r3k.c @@ -0,0 +1,319 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * r2300.c: R2000 and R3000 specific mmu/cache code. + * + * Copyright (C) 1996 David S. Miller (davem@davemloft.net) + * + * with a lot of changes to make this thing work for R3000s + * Tx39XX R4k style caches added. HK + * Copyright (C) 1998, 1999, 2000 Harald Koerfgen + * Copyright (C) 1998 Gleb Raiko & Vladimir Roganov + * Copyright (C) 2001, 2004, 2007 Maciej W. Rozycki + */ +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/mm.h> + +#include <asm/page.h> +#include <asm/mmu_context.h> +#include <asm/isadep.h> +#include <asm/io.h> +#include <asm/bootinfo.h> +#include <asm/cpu.h> + +static unsigned long icache_size, dcache_size; /* Size in bytes */ +static unsigned long icache_lsize, dcache_lsize; /* Size in bytes */ + +unsigned long r3k_cache_size(unsigned long ca_flags) +{ + unsigned long flags, status, dummy, size; + volatile unsigned long *p; + + p = (volatile unsigned long *) KSEG0; + + flags = read_c0_status(); + + /* isolate cache space */ + write_c0_status((ca_flags|flags)&~ST0_IEC); + + *p = 0xa5a55a5a; + dummy = *p; + status = read_c0_status(); + + if (dummy != 0xa5a55a5a || (status & ST0_CM)) { + size = 0; + } else { + for (size = 128; size <= 0x40000; size <<= 1) + *(p + size) = 0; + *p = -1; + for (size = 128; + (size <= 0x40000) && (*(p + size) == 0); + size <<= 1) + ; + if (size > 0x40000) + size = 0; + } + + write_c0_status(flags); + + return size * sizeof(*p); +} + +unsigned long r3k_cache_lsize(unsigned long ca_flags) +{ + unsigned long flags, status, lsize, i; + volatile unsigned long *p; + + p = (volatile unsigned long *) KSEG0; + + flags = read_c0_status(); + + /* isolate cache space */ + write_c0_status((ca_flags|flags)&~ST0_IEC); + + for (i = 0; i < 128; i++) + *(p + i) = 0; + *(volatile unsigned char *)p = 0; + for (lsize = 1; lsize < 128; lsize <<= 1) { + *(p + lsize); + status = read_c0_status(); + if (!(status & ST0_CM)) + break; + } + for (i = 0; i < 128; i += lsize) + *(volatile unsigned char *)(p + i) = 0; + + write_c0_status(flags); + + return lsize * sizeof(*p); +} + +static void r3k_probe_cache(void) +{ + dcache_size = r3k_cache_size(ST0_ISC); + if (dcache_size) + dcache_lsize = r3k_cache_lsize(ST0_ISC); + + icache_size = r3k_cache_size(ST0_ISC|ST0_SWC); + if (icache_size) + icache_lsize = r3k_cache_lsize(ST0_ISC|ST0_SWC); +} + +static void r3k_flush_icache_range(unsigned long start, unsigned long end) +{ + unsigned long size, i, flags; + volatile unsigned char *p; + + size = end - start; + if (size > icache_size || KSEGX(start) != KSEG0) { + start = KSEG0; + size = icache_size; + } + p = (char *)start; + + flags = read_c0_status(); + + /* isolate cache space */ + write_c0_status((ST0_ISC|ST0_SWC|flags)&~ST0_IEC); + + for (i = 0; i < size; i += 0x080) { + asm( "sb\t$0, 0x000(%0)\n\t" + "sb\t$0, 0x004(%0)\n\t" + "sb\t$0, 0x008(%0)\n\t" + "sb\t$0, 0x00c(%0)\n\t" + "sb\t$0, 0x010(%0)\n\t" + "sb\t$0, 0x014(%0)\n\t" + "sb\t$0, 0x018(%0)\n\t" + "sb\t$0, 0x01c(%0)\n\t" + "sb\t$0, 0x020(%0)\n\t" + "sb\t$0, 0x024(%0)\n\t" + "sb\t$0, 0x028(%0)\n\t" + "sb\t$0, 0x02c(%0)\n\t" + "sb\t$0, 0x030(%0)\n\t" + "sb\t$0, 0x034(%0)\n\t" + "sb\t$0, 0x038(%0)\n\t" + "sb\t$0, 0x03c(%0)\n\t" + "sb\t$0, 0x040(%0)\n\t" + "sb\t$0, 0x044(%0)\n\t" + "sb\t$0, 0x048(%0)\n\t" + "sb\t$0, 0x04c(%0)\n\t" + "sb\t$0, 0x050(%0)\n\t" + "sb\t$0, 0x054(%0)\n\t" + "sb\t$0, 0x058(%0)\n\t" + "sb\t$0, 0x05c(%0)\n\t" + "sb\t$0, 0x060(%0)\n\t" + "sb\t$0, 0x064(%0)\n\t" + "sb\t$0, 0x068(%0)\n\t" + "sb\t$0, 0x06c(%0)\n\t" + "sb\t$0, 0x070(%0)\n\t" + "sb\t$0, 0x074(%0)\n\t" + "sb\t$0, 0x078(%0)\n\t" + "sb\t$0, 0x07c(%0)\n\t" + : : "r" (p) ); + p += 0x080; + } + + write_c0_status(flags); +} + +static void r3k_flush_dcache_range(unsigned long start, unsigned long end) +{ + unsigned long size, i, flags; + volatile unsigned char *p; + + size = end - start; + if (size > dcache_size || KSEGX(start) != KSEG0) { + start = KSEG0; + size = dcache_size; + } + p = (char *)start; + + flags = read_c0_status(); + + /* isolate cache space */ + write_c0_status((ST0_ISC|flags)&~ST0_IEC); + + for (i = 0; i < size; i += 0x080) { + asm( "sb\t$0, 0x000(%0)\n\t" + "sb\t$0, 0x004(%0)\n\t" + "sb\t$0, 0x008(%0)\n\t" + "sb\t$0, 0x00c(%0)\n\t" + "sb\t$0, 0x010(%0)\n\t" + "sb\t$0, 0x014(%0)\n\t" + "sb\t$0, 0x018(%0)\n\t" + "sb\t$0, 0x01c(%0)\n\t" + "sb\t$0, 0x020(%0)\n\t" + "sb\t$0, 0x024(%0)\n\t" + "sb\t$0, 0x028(%0)\n\t" + "sb\t$0, 0x02c(%0)\n\t" + "sb\t$0, 0x030(%0)\n\t" + "sb\t$0, 0x034(%0)\n\t" + "sb\t$0, 0x038(%0)\n\t" + "sb\t$0, 0x03c(%0)\n\t" + "sb\t$0, 0x040(%0)\n\t" + "sb\t$0, 0x044(%0)\n\t" + "sb\t$0, 0x048(%0)\n\t" + "sb\t$0, 0x04c(%0)\n\t" + "sb\t$0, 0x050(%0)\n\t" + "sb\t$0, 0x054(%0)\n\t" + "sb\t$0, 0x058(%0)\n\t" + "sb\t$0, 0x05c(%0)\n\t" + "sb\t$0, 0x060(%0)\n\t" + "sb\t$0, 0x064(%0)\n\t" + "sb\t$0, 0x068(%0)\n\t" + "sb\t$0, 0x06c(%0)\n\t" + "sb\t$0, 0x070(%0)\n\t" + "sb\t$0, 0x074(%0)\n\t" + "sb\t$0, 0x078(%0)\n\t" + "sb\t$0, 0x07c(%0)\n\t" + : : "r" (p) ); + p += 0x080; + } + + write_c0_status(flags); +} + +static inline void r3k_flush_cache_all(void) +{ +} + +static inline void r3k___flush_cache_all(void) +{ + r3k_flush_dcache_range(KSEG0, KSEG0 + dcache_size); + r3k_flush_icache_range(KSEG0, KSEG0 + icache_size); +} + +static void r3k_flush_cache_mm(struct mm_struct *mm) +{ +} + +static void r3k_flush_cache_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ +} + +static void r3k_flush_cache_page(struct vm_area_struct *vma, + unsigned long addr, unsigned long pfn) +{ + unsigned long kaddr = KSEG0ADDR(pfn << PAGE_SHIFT); + int exec = vma->vm_flags & VM_EXEC; + struct mm_struct *mm = vma->vm_mm; + pmd_t *pmdp; + pte_t *ptep; + + pr_debug("cpage[%08llx,%08lx]\n", + cpu_context(smp_processor_id(), mm), addr); + + /* No ASID => no such page in the cache. */ + if (cpu_context(smp_processor_id(), mm) == 0) + return; + + pmdp = pmd_off(mm, addr); + ptep = pte_offset_kernel(pmdp, addr); + + /* Invalid => no such page in the cache. */ + if (!(pte_val(*ptep) & _PAGE_PRESENT)) + return; + + r3k_flush_dcache_range(kaddr, kaddr + PAGE_SIZE); + if (exec) + r3k_flush_icache_range(kaddr, kaddr + PAGE_SIZE); +} + +static void local_r3k_flush_data_cache_page(void *addr) +{ +} + +static void r3k_flush_data_cache_page(unsigned long addr) +{ +} + +static void r3k_flush_kernel_vmap_range(unsigned long vaddr, int size) +{ + BUG(); +} + +static void r3k_dma_cache_wback_inv(unsigned long start, unsigned long size) +{ + /* Catch bad driver code */ + BUG_ON(size == 0); + + iob(); + r3k_flush_dcache_range(start, start + size); +} + +void r3k_cache_init(void) +{ + extern void build_clear_page(void); + extern void build_copy_page(void); + + r3k_probe_cache(); + + flush_cache_all = r3k_flush_cache_all; + __flush_cache_all = r3k___flush_cache_all; + flush_cache_mm = r3k_flush_cache_mm; + flush_cache_range = r3k_flush_cache_range; + flush_cache_page = r3k_flush_cache_page; + flush_icache_range = r3k_flush_icache_range; + local_flush_icache_range = r3k_flush_icache_range; + __flush_icache_user_range = r3k_flush_icache_range; + __local_flush_icache_user_range = r3k_flush_icache_range; + + __flush_kernel_vmap_range = r3k_flush_kernel_vmap_range; + + local_flush_data_cache_page = local_r3k_flush_data_cache_page; + flush_data_cache_page = r3k_flush_data_cache_page; + + _dma_cache_wback_inv = r3k_dma_cache_wback_inv; + _dma_cache_wback = r3k_dma_cache_wback_inv; + _dma_cache_inv = r3k_dma_cache_wback_inv; + + pr_info("Primary instruction cache %ldkB, linesize %ld bytes.\n", + icache_size >> 10, icache_lsize); + pr_info("Primary data cache %ldkB, linesize %ld bytes.\n", + dcache_size >> 10, dcache_lsize); + + build_clear_page(); + build_copy_page(); +} diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c new file mode 100644 index 000000000..a549fa98c --- /dev/null +++ b/arch/mips/mm/c-r4k.c @@ -0,0 +1,1957 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1996 David S. Miller (davem@davemloft.net) + * Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002 Ralf Baechle (ralf@gnu.org) + * Copyright (C) 1999, 2000 Silicon Graphics, Inc. + */ +#include <linux/cpu_pm.h> +#include <linux/hardirq.h> +#include <linux/init.h> +#include <linux/highmem.h> +#include <linux/kernel.h> +#include <linux/linkage.h> +#include <linux/preempt.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/mm.h> +#include <linux/export.h> +#include <linux/bitops.h> +#include <linux/dma-map-ops.h> /* for dma_default_coherent */ + +#include <asm/bcache.h> +#include <asm/bootinfo.h> +#include <asm/cache.h> +#include <asm/cacheops.h> +#include <asm/cpu.h> +#include <asm/cpu-features.h> +#include <asm/cpu-type.h> +#include <asm/io.h> +#include <asm/page.h> +#include <asm/r4kcache.h> +#include <asm/sections.h> +#include <asm/mmu_context.h> +#include <asm/cacheflush.h> /* for run_uncached() */ +#include <asm/traps.h> +#include <asm/mips-cps.h> + +/* + * Bits describing what cache ops an SMP callback function may perform. + * + * R4K_HIT - Virtual user or kernel address based cache operations. The + * active_mm must be checked before using user addresses, falling + * back to kmap. + * R4K_INDEX - Index based cache operations. + */ + +#define R4K_HIT BIT(0) +#define R4K_INDEX BIT(1) + +/** + * r4k_op_needs_ipi() - Decide if a cache op needs to be done on every core. + * @type: Type of cache operations (R4K_HIT or R4K_INDEX). + * + * Decides whether a cache op needs to be performed on every core in the system. + * This may change depending on the @type of cache operation, as well as the set + * of online CPUs, so preemption should be disabled by the caller to prevent CPU + * hotplug from changing the result. + * + * Returns: 1 if the cache operation @type should be done on every core in + * the system. + * 0 if the cache operation @type is globalized and only needs to + * be performed on a simple CPU. + */ +static inline bool r4k_op_needs_ipi(unsigned int type) +{ + /* The MIPS Coherence Manager (CM) globalizes address-based cache ops */ + if (type == R4K_HIT && mips_cm_present()) + return false; + + /* + * Hardware doesn't globalize the required cache ops, so SMP calls may + * be needed, but only if there are foreign CPUs (non-siblings with + * separate caches). + */ + /* cpu_foreign_map[] undeclared when !CONFIG_SMP */ +#ifdef CONFIG_SMP + return !cpumask_empty(&cpu_foreign_map[0]); +#else + return false; +#endif +} + +/* + * Special Variant of smp_call_function for use by cache functions: + * + * o No return value + * o collapses to normal function call on UP kernels + * o collapses to normal function call on systems with a single shared + * primary cache. + * o doesn't disable interrupts on the local CPU + */ +static inline void r4k_on_each_cpu(unsigned int type, + void (*func)(void *info), void *info) +{ + preempt_disable(); + if (r4k_op_needs_ipi(type)) + smp_call_function_many(&cpu_foreign_map[smp_processor_id()], + func, info, 1); + func(info); + preempt_enable(); +} + +/* + * Must die. + */ +static unsigned long icache_size __read_mostly; +static unsigned long dcache_size __read_mostly; +static unsigned long vcache_size __read_mostly; +static unsigned long scache_size __read_mostly; + +/* + * Dummy cache handling routines for machines without boardcaches + */ +static void cache_noop(void) {} + +static struct bcache_ops no_sc_ops = { + .bc_enable = (void *)cache_noop, + .bc_disable = (void *)cache_noop, + .bc_wback_inv = (void *)cache_noop, + .bc_inv = (void *)cache_noop +}; + +struct bcache_ops *bcops = &no_sc_ops; + +#define cpu_is_r4600_v1_x() ((read_c0_prid() & 0xfffffff0) == 0x00002010) +#define cpu_is_r4600_v2_x() ((read_c0_prid() & 0xfffffff0) == 0x00002020) + +#define R4600_HIT_CACHEOP_WAR_IMPL \ +do { \ + if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) && \ + cpu_is_r4600_v2_x()) \ + *(volatile unsigned long *)CKSEG1; \ + if (IS_ENABLED(CONFIG_WAR_R4600_V1_HIT_CACHEOP)) \ + __asm__ __volatile__("nop;nop;nop;nop"); \ +} while (0) + +static void (*r4k_blast_dcache_page)(unsigned long addr); + +static inline void r4k_blast_dcache_page_dc32(unsigned long addr) +{ + R4600_HIT_CACHEOP_WAR_IMPL; + blast_dcache32_page(addr); +} + +static inline void r4k_blast_dcache_page_dc64(unsigned long addr) +{ + blast_dcache64_page(addr); +} + +static inline void r4k_blast_dcache_page_dc128(unsigned long addr) +{ + blast_dcache128_page(addr); +} + +static void r4k_blast_dcache_page_setup(void) +{ + unsigned long dc_lsize = cpu_dcache_line_size(); + + switch (dc_lsize) { + case 0: + r4k_blast_dcache_page = (void *)cache_noop; + break; + case 16: + r4k_blast_dcache_page = blast_dcache16_page; + break; + case 32: + r4k_blast_dcache_page = r4k_blast_dcache_page_dc32; + break; + case 64: + r4k_blast_dcache_page = r4k_blast_dcache_page_dc64; + break; + case 128: + r4k_blast_dcache_page = r4k_blast_dcache_page_dc128; + break; + default: + break; + } +} + +#ifndef CONFIG_EVA +#define r4k_blast_dcache_user_page r4k_blast_dcache_page +#else + +static void (*r4k_blast_dcache_user_page)(unsigned long addr); + +static void r4k_blast_dcache_user_page_setup(void) +{ + unsigned long dc_lsize = cpu_dcache_line_size(); + + if (dc_lsize == 0) + r4k_blast_dcache_user_page = (void *)cache_noop; + else if (dc_lsize == 16) + r4k_blast_dcache_user_page = blast_dcache16_user_page; + else if (dc_lsize == 32) + r4k_blast_dcache_user_page = blast_dcache32_user_page; + else if (dc_lsize == 64) + r4k_blast_dcache_user_page = blast_dcache64_user_page; +} + +#endif + +static void (* r4k_blast_dcache_page_indexed)(unsigned long addr); + +static void r4k_blast_dcache_page_indexed_setup(void) +{ + unsigned long dc_lsize = cpu_dcache_line_size(); + + if (dc_lsize == 0) + r4k_blast_dcache_page_indexed = (void *)cache_noop; + else if (dc_lsize == 16) + r4k_blast_dcache_page_indexed = blast_dcache16_page_indexed; + else if (dc_lsize == 32) + r4k_blast_dcache_page_indexed = blast_dcache32_page_indexed; + else if (dc_lsize == 64) + r4k_blast_dcache_page_indexed = blast_dcache64_page_indexed; + else if (dc_lsize == 128) + r4k_blast_dcache_page_indexed = blast_dcache128_page_indexed; +} + +void (* r4k_blast_dcache)(void); +EXPORT_SYMBOL(r4k_blast_dcache); + +static void r4k_blast_dcache_setup(void) +{ + unsigned long dc_lsize = cpu_dcache_line_size(); + + if (dc_lsize == 0) + r4k_blast_dcache = (void *)cache_noop; + else if (dc_lsize == 16) + r4k_blast_dcache = blast_dcache16; + else if (dc_lsize == 32) + r4k_blast_dcache = blast_dcache32; + else if (dc_lsize == 64) + r4k_blast_dcache = blast_dcache64; + else if (dc_lsize == 128) + r4k_blast_dcache = blast_dcache128; +} + +/* force code alignment (used for CONFIG_WAR_TX49XX_ICACHE_INDEX_INV) */ +#define JUMP_TO_ALIGN(order) \ + __asm__ __volatile__( \ + "b\t1f\n\t" \ + ".align\t" #order "\n\t" \ + "1:\n\t" \ + ) +#define CACHE32_UNROLL32_ALIGN JUMP_TO_ALIGN(10) /* 32 * 32 = 1024 */ +#define CACHE32_UNROLL32_ALIGN2 JUMP_TO_ALIGN(11) + +static inline void blast_r4600_v1_icache32(void) +{ + unsigned long flags; + + local_irq_save(flags); + blast_icache32(); + local_irq_restore(flags); +} + +static inline void tx49_blast_icache32(void) +{ + unsigned long start = INDEX_BASE; + unsigned long end = start + current_cpu_data.icache.waysize; + unsigned long ws_inc = 1UL << current_cpu_data.icache.waybit; + unsigned long ws_end = current_cpu_data.icache.ways << + current_cpu_data.icache.waybit; + unsigned long ws, addr; + + CACHE32_UNROLL32_ALIGN2; + /* I'm in even chunk. blast odd chunks */ + for (ws = 0; ws < ws_end; ws += ws_inc) + for (addr = start + 0x400; addr < end; addr += 0x400 * 2) + cache_unroll(32, kernel_cache, Index_Invalidate_I, + addr | ws, 32); + CACHE32_UNROLL32_ALIGN; + /* I'm in odd chunk. blast even chunks */ + for (ws = 0; ws < ws_end; ws += ws_inc) + for (addr = start; addr < end; addr += 0x400 * 2) + cache_unroll(32, kernel_cache, Index_Invalidate_I, + addr | ws, 32); +} + +static inline void blast_icache32_r4600_v1_page_indexed(unsigned long page) +{ + unsigned long flags; + + local_irq_save(flags); + blast_icache32_page_indexed(page); + local_irq_restore(flags); +} + +static inline void tx49_blast_icache32_page_indexed(unsigned long page) +{ + unsigned long indexmask = current_cpu_data.icache.waysize - 1; + unsigned long start = INDEX_BASE + (page & indexmask); + unsigned long end = start + PAGE_SIZE; + unsigned long ws_inc = 1UL << current_cpu_data.icache.waybit; + unsigned long ws_end = current_cpu_data.icache.ways << + current_cpu_data.icache.waybit; + unsigned long ws, addr; + + CACHE32_UNROLL32_ALIGN2; + /* I'm in even chunk. blast odd chunks */ + for (ws = 0; ws < ws_end; ws += ws_inc) + for (addr = start + 0x400; addr < end; addr += 0x400 * 2) + cache_unroll(32, kernel_cache, Index_Invalidate_I, + addr | ws, 32); + CACHE32_UNROLL32_ALIGN; + /* I'm in odd chunk. blast even chunks */ + for (ws = 0; ws < ws_end; ws += ws_inc) + for (addr = start; addr < end; addr += 0x400 * 2) + cache_unroll(32, kernel_cache, Index_Invalidate_I, + addr | ws, 32); +} + +static void (* r4k_blast_icache_page)(unsigned long addr); + +static void r4k_blast_icache_page_setup(void) +{ + unsigned long ic_lsize = cpu_icache_line_size(); + + if (ic_lsize == 0) + r4k_blast_icache_page = (void *)cache_noop; + else if (ic_lsize == 16) + r4k_blast_icache_page = blast_icache16_page; + else if (ic_lsize == 32 && current_cpu_type() == CPU_LOONGSON2EF) + r4k_blast_icache_page = loongson2_blast_icache32_page; + else if (ic_lsize == 32) + r4k_blast_icache_page = blast_icache32_page; + else if (ic_lsize == 64) + r4k_blast_icache_page = blast_icache64_page; + else if (ic_lsize == 128) + r4k_blast_icache_page = blast_icache128_page; +} + +#ifndef CONFIG_EVA +#define r4k_blast_icache_user_page r4k_blast_icache_page +#else + +static void (*r4k_blast_icache_user_page)(unsigned long addr); + +static void r4k_blast_icache_user_page_setup(void) +{ + unsigned long ic_lsize = cpu_icache_line_size(); + + if (ic_lsize == 0) + r4k_blast_icache_user_page = (void *)cache_noop; + else if (ic_lsize == 16) + r4k_blast_icache_user_page = blast_icache16_user_page; + else if (ic_lsize == 32) + r4k_blast_icache_user_page = blast_icache32_user_page; + else if (ic_lsize == 64) + r4k_blast_icache_user_page = blast_icache64_user_page; +} + +#endif + +static void (* r4k_blast_icache_page_indexed)(unsigned long addr); + +static void r4k_blast_icache_page_indexed_setup(void) +{ + unsigned long ic_lsize = cpu_icache_line_size(); + + if (ic_lsize == 0) + r4k_blast_icache_page_indexed = (void *)cache_noop; + else if (ic_lsize == 16) + r4k_blast_icache_page_indexed = blast_icache16_page_indexed; + else if (ic_lsize == 32) { + if (IS_ENABLED(CONFIG_WAR_R4600_V1_INDEX_ICACHEOP) && + cpu_is_r4600_v1_x()) + r4k_blast_icache_page_indexed = + blast_icache32_r4600_v1_page_indexed; + else if (IS_ENABLED(CONFIG_WAR_TX49XX_ICACHE_INDEX_INV)) + r4k_blast_icache_page_indexed = + tx49_blast_icache32_page_indexed; + else if (current_cpu_type() == CPU_LOONGSON2EF) + r4k_blast_icache_page_indexed = + loongson2_blast_icache32_page_indexed; + else + r4k_blast_icache_page_indexed = + blast_icache32_page_indexed; + } else if (ic_lsize == 64) + r4k_blast_icache_page_indexed = blast_icache64_page_indexed; +} + +void (* r4k_blast_icache)(void); +EXPORT_SYMBOL(r4k_blast_icache); + +static void r4k_blast_icache_setup(void) +{ + unsigned long ic_lsize = cpu_icache_line_size(); + + if (ic_lsize == 0) + r4k_blast_icache = (void *)cache_noop; + else if (ic_lsize == 16) + r4k_blast_icache = blast_icache16; + else if (ic_lsize == 32) { + if (IS_ENABLED(CONFIG_WAR_R4600_V1_INDEX_ICACHEOP) && + cpu_is_r4600_v1_x()) + r4k_blast_icache = blast_r4600_v1_icache32; + else if (IS_ENABLED(CONFIG_WAR_TX49XX_ICACHE_INDEX_INV)) + r4k_blast_icache = tx49_blast_icache32; + else if (current_cpu_type() == CPU_LOONGSON2EF) + r4k_blast_icache = loongson2_blast_icache32; + else + r4k_blast_icache = blast_icache32; + } else if (ic_lsize == 64) + r4k_blast_icache = blast_icache64; + else if (ic_lsize == 128) + r4k_blast_icache = blast_icache128; +} + +static void (* r4k_blast_scache_page)(unsigned long addr); + +static void r4k_blast_scache_page_setup(void) +{ + unsigned long sc_lsize = cpu_scache_line_size(); + + if (scache_size == 0) + r4k_blast_scache_page = (void *)cache_noop; + else if (sc_lsize == 16) + r4k_blast_scache_page = blast_scache16_page; + else if (sc_lsize == 32) + r4k_blast_scache_page = blast_scache32_page; + else if (sc_lsize == 64) + r4k_blast_scache_page = blast_scache64_page; + else if (sc_lsize == 128) + r4k_blast_scache_page = blast_scache128_page; +} + +static void (* r4k_blast_scache_page_indexed)(unsigned long addr); + +static void r4k_blast_scache_page_indexed_setup(void) +{ + unsigned long sc_lsize = cpu_scache_line_size(); + + if (scache_size == 0) + r4k_blast_scache_page_indexed = (void *)cache_noop; + else if (sc_lsize == 16) + r4k_blast_scache_page_indexed = blast_scache16_page_indexed; + else if (sc_lsize == 32) + r4k_blast_scache_page_indexed = blast_scache32_page_indexed; + else if (sc_lsize == 64) + r4k_blast_scache_page_indexed = blast_scache64_page_indexed; + else if (sc_lsize == 128) + r4k_blast_scache_page_indexed = blast_scache128_page_indexed; +} + +static void (* r4k_blast_scache)(void); + +static void r4k_blast_scache_setup(void) +{ + unsigned long sc_lsize = cpu_scache_line_size(); + + if (scache_size == 0) + r4k_blast_scache = (void *)cache_noop; + else if (sc_lsize == 16) + r4k_blast_scache = blast_scache16; + else if (sc_lsize == 32) + r4k_blast_scache = blast_scache32; + else if (sc_lsize == 64) + r4k_blast_scache = blast_scache64; + else if (sc_lsize == 128) + r4k_blast_scache = blast_scache128; +} + +static void (*r4k_blast_scache_node)(long node); + +static void r4k_blast_scache_node_setup(void) +{ + unsigned long sc_lsize = cpu_scache_line_size(); + + if (current_cpu_type() != CPU_LOONGSON64) + r4k_blast_scache_node = (void *)cache_noop; + else if (sc_lsize == 16) + r4k_blast_scache_node = blast_scache16_node; + else if (sc_lsize == 32) + r4k_blast_scache_node = blast_scache32_node; + else if (sc_lsize == 64) + r4k_blast_scache_node = blast_scache64_node; + else if (sc_lsize == 128) + r4k_blast_scache_node = blast_scache128_node; +} + +static inline void local_r4k___flush_cache_all(void * args) +{ + switch (current_cpu_type()) { + case CPU_LOONGSON2EF: + case CPU_R4000SC: + case CPU_R4000MC: + case CPU_R4400SC: + case CPU_R4400MC: + case CPU_R10000: + case CPU_R12000: + case CPU_R14000: + case CPU_R16000: + /* + * These caches are inclusive caches, that is, if something + * is not cached in the S-cache, we know it also won't be + * in one of the primary caches. + */ + r4k_blast_scache(); + break; + + case CPU_LOONGSON64: + /* Use get_ebase_cpunum() for both NUMA=y/n */ + r4k_blast_scache_node(get_ebase_cpunum() >> 2); + break; + + case CPU_BMIPS5000: + r4k_blast_scache(); + __sync(); + break; + + default: + r4k_blast_dcache(); + r4k_blast_icache(); + break; + } +} + +static void r4k___flush_cache_all(void) +{ + r4k_on_each_cpu(R4K_INDEX, local_r4k___flush_cache_all, NULL); +} + +/** + * has_valid_asid() - Determine if an mm already has an ASID. + * @mm: Memory map. + * @type: R4K_HIT or R4K_INDEX, type of cache op. + * + * Determines whether @mm already has an ASID on any of the CPUs which cache ops + * of type @type within an r4k_on_each_cpu() call will affect. If + * r4k_on_each_cpu() does an SMP call to a single VPE in each core, then the + * scope of the operation is confined to sibling CPUs, otherwise all online CPUs + * will need to be checked. + * + * Must be called in non-preemptive context. + * + * Returns: 1 if the CPUs affected by @type cache ops have an ASID for @mm. + * 0 otherwise. + */ +static inline int has_valid_asid(const struct mm_struct *mm, unsigned int type) +{ + unsigned int i; + const cpumask_t *mask = cpu_present_mask; + + if (cpu_has_mmid) + return cpu_context(0, mm) != 0; + + /* cpu_sibling_map[] undeclared when !CONFIG_SMP */ +#ifdef CONFIG_SMP + /* + * If r4k_on_each_cpu does SMP calls, it does them to a single VPE in + * each foreign core, so we only need to worry about siblings. + * Otherwise we need to worry about all present CPUs. + */ + if (r4k_op_needs_ipi(type)) + mask = &cpu_sibling_map[smp_processor_id()]; +#endif + for_each_cpu(i, mask) + if (cpu_context(i, mm)) + return 1; + return 0; +} + +static void r4k__flush_cache_vmap(void) +{ + r4k_blast_dcache(); +} + +static void r4k__flush_cache_vunmap(void) +{ + r4k_blast_dcache(); +} + +/* + * Note: flush_tlb_range() assumes flush_cache_range() sufficiently flushes + * whole caches when vma is executable. + */ +static inline void local_r4k_flush_cache_range(void * args) +{ + struct vm_area_struct *vma = args; + int exec = vma->vm_flags & VM_EXEC; + + if (!has_valid_asid(vma->vm_mm, R4K_INDEX)) + return; + + /* + * If dcache can alias, we must blast it since mapping is changing. + * If executable, we must ensure any dirty lines are written back far + * enough to be visible to icache. + */ + if (cpu_has_dc_aliases || (exec && !cpu_has_ic_fills_f_dc)) + r4k_blast_dcache(); + /* If executable, blast stale lines from icache */ + if (exec) + r4k_blast_icache(); +} + +static void r4k_flush_cache_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + int exec = vma->vm_flags & VM_EXEC; + + if (cpu_has_dc_aliases || exec) + r4k_on_each_cpu(R4K_INDEX, local_r4k_flush_cache_range, vma); +} + +static inline void local_r4k_flush_cache_mm(void * args) +{ + struct mm_struct *mm = args; + + if (!has_valid_asid(mm, R4K_INDEX)) + return; + + /* + * Kludge alert. For obscure reasons R4000SC and R4400SC go nuts if we + * only flush the primary caches but R1x000 behave sane ... + * R4000SC and R4400SC indexed S-cache ops also invalidate primary + * caches, so we can bail out early. + */ + if (current_cpu_type() == CPU_R4000SC || + current_cpu_type() == CPU_R4000MC || + current_cpu_type() == CPU_R4400SC || + current_cpu_type() == CPU_R4400MC) { + r4k_blast_scache(); + return; + } + + r4k_blast_dcache(); +} + +static void r4k_flush_cache_mm(struct mm_struct *mm) +{ + if (!cpu_has_dc_aliases) + return; + + r4k_on_each_cpu(R4K_INDEX, local_r4k_flush_cache_mm, mm); +} + +struct flush_cache_page_args { + struct vm_area_struct *vma; + unsigned long addr; + unsigned long pfn; +}; + +static inline void local_r4k_flush_cache_page(void *args) +{ + struct flush_cache_page_args *fcp_args = args; + struct vm_area_struct *vma = fcp_args->vma; + unsigned long addr = fcp_args->addr; + struct page *page = pfn_to_page(fcp_args->pfn); + int exec = vma->vm_flags & VM_EXEC; + struct mm_struct *mm = vma->vm_mm; + int map_coherent = 0; + pmd_t *pmdp; + pte_t *ptep; + void *vaddr; + + /* + * If owns no valid ASID yet, cannot possibly have gotten + * this page into the cache. + */ + if (!has_valid_asid(mm, R4K_HIT)) + return; + + addr &= PAGE_MASK; + pmdp = pmd_off(mm, addr); + ptep = pte_offset_kernel(pmdp, addr); + + /* + * If the page isn't marked valid, the page cannot possibly be + * in the cache. + */ + if (!(pte_present(*ptep))) + return; + + if ((mm == current->active_mm) && (pte_val(*ptep) & _PAGE_VALID)) + vaddr = NULL; + else { + /* + * Use kmap_coherent or kmap_atomic to do flushes for + * another ASID than the current one. + */ + map_coherent = (cpu_has_dc_aliases && + page_mapcount(page) && + !Page_dcache_dirty(page)); + if (map_coherent) + vaddr = kmap_coherent(page, addr); + else + vaddr = kmap_atomic(page); + addr = (unsigned long)vaddr; + } + + if (cpu_has_dc_aliases || (exec && !cpu_has_ic_fills_f_dc)) { + vaddr ? r4k_blast_dcache_page(addr) : + r4k_blast_dcache_user_page(addr); + if (exec && !cpu_icache_snoops_remote_store) + r4k_blast_scache_page(addr); + } + if (exec) { + if (vaddr && cpu_has_vtag_icache && mm == current->active_mm) { + drop_mmu_context(mm); + } else + vaddr ? r4k_blast_icache_page(addr) : + r4k_blast_icache_user_page(addr); + } + + if (vaddr) { + if (map_coherent) + kunmap_coherent(); + else + kunmap_atomic(vaddr); + } +} + +static void r4k_flush_cache_page(struct vm_area_struct *vma, + unsigned long addr, unsigned long pfn) +{ + struct flush_cache_page_args args; + + args.vma = vma; + args.addr = addr; + args.pfn = pfn; + + r4k_on_each_cpu(R4K_HIT, local_r4k_flush_cache_page, &args); +} + +static inline void local_r4k_flush_data_cache_page(void * addr) +{ + r4k_blast_dcache_page((unsigned long) addr); +} + +static void r4k_flush_data_cache_page(unsigned long addr) +{ + if (in_atomic()) + local_r4k_flush_data_cache_page((void *)addr); + else + r4k_on_each_cpu(R4K_HIT, local_r4k_flush_data_cache_page, + (void *) addr); +} + +struct flush_icache_range_args { + unsigned long start; + unsigned long end; + unsigned int type; + bool user; +}; + +static inline void __local_r4k_flush_icache_range(unsigned long start, + unsigned long end, + unsigned int type, + bool user) +{ + if (!cpu_has_ic_fills_f_dc) { + if (type == R4K_INDEX || + (type & R4K_INDEX && end - start >= dcache_size)) { + r4k_blast_dcache(); + } else { + R4600_HIT_CACHEOP_WAR_IMPL; + if (user) + protected_blast_dcache_range(start, end); + else + blast_dcache_range(start, end); + } + } + + if (type == R4K_INDEX || + (type & R4K_INDEX && end - start > icache_size)) + r4k_blast_icache(); + else { + switch (boot_cpu_type()) { + case CPU_LOONGSON2EF: + protected_loongson2_blast_icache_range(start, end); + break; + + default: + if (user) + protected_blast_icache_range(start, end); + else + blast_icache_range(start, end); + break; + } + } +} + +static inline void local_r4k_flush_icache_range(unsigned long start, + unsigned long end) +{ + __local_r4k_flush_icache_range(start, end, R4K_HIT | R4K_INDEX, false); +} + +static inline void local_r4k_flush_icache_user_range(unsigned long start, + unsigned long end) +{ + __local_r4k_flush_icache_range(start, end, R4K_HIT | R4K_INDEX, true); +} + +static inline void local_r4k_flush_icache_range_ipi(void *args) +{ + struct flush_icache_range_args *fir_args = args; + unsigned long start = fir_args->start; + unsigned long end = fir_args->end; + unsigned int type = fir_args->type; + bool user = fir_args->user; + + __local_r4k_flush_icache_range(start, end, type, user); +} + +static void __r4k_flush_icache_range(unsigned long start, unsigned long end, + bool user) +{ + struct flush_icache_range_args args; + unsigned long size, cache_size; + + args.start = start; + args.end = end; + args.type = R4K_HIT | R4K_INDEX; + args.user = user; + + /* + * Indexed cache ops require an SMP call. + * Consider if that can or should be avoided. + */ + preempt_disable(); + if (r4k_op_needs_ipi(R4K_INDEX) && !r4k_op_needs_ipi(R4K_HIT)) { + /* + * If address-based cache ops don't require an SMP call, then + * use them exclusively for small flushes. + */ + size = end - start; + cache_size = icache_size; + if (!cpu_has_ic_fills_f_dc) { + size *= 2; + cache_size += dcache_size; + } + if (size <= cache_size) + args.type &= ~R4K_INDEX; + } + r4k_on_each_cpu(args.type, local_r4k_flush_icache_range_ipi, &args); + preempt_enable(); + instruction_hazard(); +} + +static void r4k_flush_icache_range(unsigned long start, unsigned long end) +{ + return __r4k_flush_icache_range(start, end, false); +} + +static void r4k_flush_icache_user_range(unsigned long start, unsigned long end) +{ + return __r4k_flush_icache_range(start, end, true); +} + +#ifdef CONFIG_DMA_NONCOHERENT + +static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) +{ + /* Catch bad driver code */ + if (WARN_ON(size == 0)) + return; + + preempt_disable(); + if (cpu_has_inclusive_pcaches) { + if (size >= scache_size) { + if (current_cpu_type() != CPU_LOONGSON64) + r4k_blast_scache(); + else + r4k_blast_scache_node(pa_to_nid(addr)); + } else { + blast_scache_range(addr, addr + size); + } + preempt_enable(); + __sync(); + return; + } + + /* + * Either no secondary cache or the available caches don't have the + * subset property so we have to flush the primary caches + * explicitly. + * If we would need IPI to perform an INDEX-type operation, then + * we have to use the HIT-type alternative as IPI cannot be used + * here due to interrupts possibly being disabled. + */ + if (!r4k_op_needs_ipi(R4K_INDEX) && size >= dcache_size) { + r4k_blast_dcache(); + } else { + R4600_HIT_CACHEOP_WAR_IMPL; + blast_dcache_range(addr, addr + size); + } + preempt_enable(); + + bc_wback_inv(addr, size); + __sync(); +} + +static void prefetch_cache_inv(unsigned long addr, unsigned long size) +{ + unsigned int linesz = cpu_scache_line_size(); + unsigned long addr0 = addr, addr1; + + addr0 &= ~(linesz - 1); + addr1 = (addr0 + size - 1) & ~(linesz - 1); + + protected_writeback_scache_line(addr0); + if (likely(addr1 != addr0)) + protected_writeback_scache_line(addr1); + else + return; + + addr0 += linesz; + if (likely(addr1 != addr0)) + protected_writeback_scache_line(addr0); + else + return; + + addr1 -= linesz; + if (likely(addr1 > addr0)) + protected_writeback_scache_line(addr0); +} + +static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) +{ + /* Catch bad driver code */ + if (WARN_ON(size == 0)) + return; + + preempt_disable(); + + if (current_cpu_type() == CPU_BMIPS5000) + prefetch_cache_inv(addr, size); + + if (cpu_has_inclusive_pcaches) { + if (size >= scache_size) { + if (current_cpu_type() != CPU_LOONGSON64) + r4k_blast_scache(); + else + r4k_blast_scache_node(pa_to_nid(addr)); + } else { + /* + * There is no clearly documented alignment requirement + * for the cache instruction on MIPS processors and + * some processors, among them the RM5200 and RM7000 + * QED processors will throw an address error for cache + * hit ops with insufficient alignment. Solved by + * aligning the address to cache line size. + */ + blast_inv_scache_range(addr, addr + size); + } + preempt_enable(); + __sync(); + return; + } + + if (!r4k_op_needs_ipi(R4K_INDEX) && size >= dcache_size) { + r4k_blast_dcache(); + } else { + R4600_HIT_CACHEOP_WAR_IMPL; + blast_inv_dcache_range(addr, addr + size); + } + preempt_enable(); + + bc_inv(addr, size); + __sync(); +} +#endif /* CONFIG_DMA_NONCOHERENT */ + +static void r4k_flush_icache_all(void) +{ + if (cpu_has_vtag_icache) + r4k_blast_icache(); +} + +struct flush_kernel_vmap_range_args { + unsigned long vaddr; + int size; +}; + +static inline void local_r4k_flush_kernel_vmap_range_index(void *args) +{ + /* + * Aliases only affect the primary caches so don't bother with + * S-caches or T-caches. + */ + r4k_blast_dcache(); +} + +static inline void local_r4k_flush_kernel_vmap_range(void *args) +{ + struct flush_kernel_vmap_range_args *vmra = args; + unsigned long vaddr = vmra->vaddr; + int size = vmra->size; + + /* + * Aliases only affect the primary caches so don't bother with + * S-caches or T-caches. + */ + R4600_HIT_CACHEOP_WAR_IMPL; + blast_dcache_range(vaddr, vaddr + size); +} + +static void r4k_flush_kernel_vmap_range(unsigned long vaddr, int size) +{ + struct flush_kernel_vmap_range_args args; + + args.vaddr = (unsigned long) vaddr; + args.size = size; + + if (size >= dcache_size) + r4k_on_each_cpu(R4K_INDEX, + local_r4k_flush_kernel_vmap_range_index, NULL); + else + r4k_on_each_cpu(R4K_HIT, local_r4k_flush_kernel_vmap_range, + &args); +} + +static inline void rm7k_erratum31(void) +{ + const unsigned long ic_lsize = 32; + unsigned long addr; + + /* RM7000 erratum #31. The icache is screwed at startup. */ + write_c0_taglo(0); + write_c0_taghi(0); + + for (addr = INDEX_BASE; addr <= INDEX_BASE + 4096; addr += ic_lsize) { + __asm__ __volatile__ ( + ".set push\n\t" + ".set noreorder\n\t" + ".set mips3\n\t" + "cache\t%1, 0(%0)\n\t" + "cache\t%1, 0x1000(%0)\n\t" + "cache\t%1, 0x2000(%0)\n\t" + "cache\t%1, 0x3000(%0)\n\t" + "cache\t%2, 0(%0)\n\t" + "cache\t%2, 0x1000(%0)\n\t" + "cache\t%2, 0x2000(%0)\n\t" + "cache\t%2, 0x3000(%0)\n\t" + "cache\t%1, 0(%0)\n\t" + "cache\t%1, 0x1000(%0)\n\t" + "cache\t%1, 0x2000(%0)\n\t" + "cache\t%1, 0x3000(%0)\n\t" + ".set pop\n" + : + : "r" (addr), "i" (Index_Store_Tag_I), "i" (Fill_I)); + } +} + +static inline int alias_74k_erratum(struct cpuinfo_mips *c) +{ + unsigned int imp = c->processor_id & PRID_IMP_MASK; + unsigned int rev = c->processor_id & PRID_REV_MASK; + int present = 0; + + /* + * Early versions of the 74K do not update the cache tags on a + * vtag miss/ptag hit which can occur in the case of KSEG0/KUSEG + * aliases. In this case it is better to treat the cache as always + * having aliases. Also disable the synonym tag update feature + * where available. In this case no opportunistic tag update will + * happen where a load causes a virtual address miss but a physical + * address hit during a D-cache look-up. + */ + switch (imp) { + case PRID_IMP_74K: + if (rev <= PRID_REV_ENCODE_332(2, 4, 0)) + present = 1; + if (rev == PRID_REV_ENCODE_332(2, 4, 0)) + write_c0_config6(read_c0_config6() | MTI_CONF6_SYND); + break; + case PRID_IMP_1074K: + if (rev <= PRID_REV_ENCODE_332(1, 1, 0)) { + present = 1; + write_c0_config6(read_c0_config6() | MTI_CONF6_SYND); + } + break; + default: + BUG(); + } + + return present; +} + +static void b5k_instruction_hazard(void) +{ + __sync(); + __sync(); + __asm__ __volatile__( + " nop; nop; nop; nop; nop; nop; nop; nop\n" + " nop; nop; nop; nop; nop; nop; nop; nop\n" + " nop; nop; nop; nop; nop; nop; nop; nop\n" + " nop; nop; nop; nop; nop; nop; nop; nop\n" + : : : "memory"); +} + +static char *way_string[] = { NULL, "direct mapped", "2-way", + "3-way", "4-way", "5-way", "6-way", "7-way", "8-way", + "9-way", "10-way", "11-way", "12-way", + "13-way", "14-way", "15-way", "16-way", +}; + +static void probe_pcache(void) +{ + struct cpuinfo_mips *c = ¤t_cpu_data; + unsigned int config = read_c0_config(); + unsigned int prid = read_c0_prid(); + int has_74k_erratum = 0; + unsigned long config1; + unsigned int lsize; + + switch (current_cpu_type()) { + case CPU_R4600: /* QED style two way caches? */ + case CPU_R4700: + case CPU_R5000: + case CPU_NEVADA: + icache_size = 1 << (12 + ((config & CONF_IC) >> 9)); + c->icache.linesz = 16 << ((config & CONF_IB) >> 5); + c->icache.ways = 2; + c->icache.waybit = __ffs(icache_size/2); + + dcache_size = 1 << (12 + ((config & CONF_DC) >> 6)); + c->dcache.linesz = 16 << ((config & CONF_DB) >> 4); + c->dcache.ways = 2; + c->dcache.waybit= __ffs(dcache_size/2); + + c->options |= MIPS_CPU_CACHE_CDEX_P; + break; + + case CPU_R5500: + icache_size = 1 << (12 + ((config & CONF_IC) >> 9)); + c->icache.linesz = 16 << ((config & CONF_IB) >> 5); + c->icache.ways = 2; + c->icache.waybit= 0; + + dcache_size = 1 << (12 + ((config & CONF_DC) >> 6)); + c->dcache.linesz = 16 << ((config & CONF_DB) >> 4); + c->dcache.ways = 2; + c->dcache.waybit = 0; + + c->options |= MIPS_CPU_CACHE_CDEX_P | MIPS_CPU_PREFETCH; + break; + + case CPU_TX49XX: + icache_size = 1 << (12 + ((config & CONF_IC) >> 9)); + c->icache.linesz = 16 << ((config & CONF_IB) >> 5); + c->icache.ways = 4; + c->icache.waybit= 0; + + dcache_size = 1 << (12 + ((config & CONF_DC) >> 6)); + c->dcache.linesz = 16 << ((config & CONF_DB) >> 4); + c->dcache.ways = 4; + c->dcache.waybit = 0; + + c->options |= MIPS_CPU_CACHE_CDEX_P; + c->options |= MIPS_CPU_PREFETCH; + break; + + case CPU_R4000PC: + case CPU_R4000SC: + case CPU_R4000MC: + case CPU_R4400PC: + case CPU_R4400SC: + case CPU_R4400MC: + case CPU_R4300: + icache_size = 1 << (12 + ((config & CONF_IC) >> 9)); + c->icache.linesz = 16 << ((config & CONF_IB) >> 5); + c->icache.ways = 1; + c->icache.waybit = 0; /* doesn't matter */ + + dcache_size = 1 << (12 + ((config & CONF_DC) >> 6)); + c->dcache.linesz = 16 << ((config & CONF_DB) >> 4); + c->dcache.ways = 1; + c->dcache.waybit = 0; /* does not matter */ + + c->options |= MIPS_CPU_CACHE_CDEX_P; + break; + + case CPU_R10000: + case CPU_R12000: + case CPU_R14000: + case CPU_R16000: + icache_size = 1 << (12 + ((config & R10K_CONF_IC) >> 29)); + c->icache.linesz = 64; + c->icache.ways = 2; + c->icache.waybit = 0; + + dcache_size = 1 << (12 + ((config & R10K_CONF_DC) >> 26)); + c->dcache.linesz = 32; + c->dcache.ways = 2; + c->dcache.waybit = 0; + + c->options |= MIPS_CPU_PREFETCH; + break; + + case CPU_RM7000: + rm7k_erratum31(); + + icache_size = 1 << (12 + ((config & CONF_IC) >> 9)); + c->icache.linesz = 16 << ((config & CONF_IB) >> 5); + c->icache.ways = 4; + c->icache.waybit = __ffs(icache_size / c->icache.ways); + + dcache_size = 1 << (12 + ((config & CONF_DC) >> 6)); + c->dcache.linesz = 16 << ((config & CONF_DB) >> 4); + c->dcache.ways = 4; + c->dcache.waybit = __ffs(dcache_size / c->dcache.ways); + + c->options |= MIPS_CPU_CACHE_CDEX_P; + c->options |= MIPS_CPU_PREFETCH; + break; + + case CPU_LOONGSON2EF: + icache_size = 1 << (12 + ((config & CONF_IC) >> 9)); + c->icache.linesz = 16 << ((config & CONF_IB) >> 5); + if (prid & 0x3) + c->icache.ways = 4; + else + c->icache.ways = 2; + c->icache.waybit = 0; + + dcache_size = 1 << (12 + ((config & CONF_DC) >> 6)); + c->dcache.linesz = 16 << ((config & CONF_DB) >> 4); + if (prid & 0x3) + c->dcache.ways = 4; + else + c->dcache.ways = 2; + c->dcache.waybit = 0; + break; + + case CPU_LOONGSON64: + config1 = read_c0_config1(); + lsize = (config1 >> 19) & 7; + if (lsize) + c->icache.linesz = 2 << lsize; + else + c->icache.linesz = 0; + c->icache.sets = 64 << ((config1 >> 22) & 7); + c->icache.ways = 1 + ((config1 >> 16) & 7); + icache_size = c->icache.sets * + c->icache.ways * + c->icache.linesz; + c->icache.waybit = 0; + + lsize = (config1 >> 10) & 7; + if (lsize) + c->dcache.linesz = 2 << lsize; + else + c->dcache.linesz = 0; + c->dcache.sets = 64 << ((config1 >> 13) & 7); + c->dcache.ways = 1 + ((config1 >> 7) & 7); + dcache_size = c->dcache.sets * + c->dcache.ways * + c->dcache.linesz; + c->dcache.waybit = 0; + if ((c->processor_id & (PRID_IMP_MASK | PRID_REV_MASK)) >= + (PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0) || + (c->processor_id & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64R) + c->options |= MIPS_CPU_PREFETCH; + break; + + case CPU_CAVIUM_OCTEON3: + /* For now lie about the number of ways. */ + c->icache.linesz = 128; + c->icache.sets = 16; + c->icache.ways = 8; + c->icache.flags |= MIPS_CACHE_VTAG; + icache_size = c->icache.sets * c->icache.ways * c->icache.linesz; + + c->dcache.linesz = 128; + c->dcache.ways = 8; + c->dcache.sets = 8; + dcache_size = c->dcache.sets * c->dcache.ways * c->dcache.linesz; + c->options |= MIPS_CPU_PREFETCH; + break; + + default: + if (!(config & MIPS_CONF_M)) + panic("Don't know how to probe P-caches on this cpu."); + + /* + * So we seem to be a MIPS32 or MIPS64 CPU + * So let's probe the I-cache ... + */ + config1 = read_c0_config1(); + + lsize = (config1 >> 19) & 7; + + /* IL == 7 is reserved */ + if (lsize == 7) + panic("Invalid icache line size"); + + c->icache.linesz = lsize ? 2 << lsize : 0; + + c->icache.sets = 32 << (((config1 >> 22) + 1) & 7); + c->icache.ways = 1 + ((config1 >> 16) & 7); + + icache_size = c->icache.sets * + c->icache.ways * + c->icache.linesz; + c->icache.waybit = __ffs(icache_size/c->icache.ways); + + if (config & MIPS_CONF_VI) + c->icache.flags |= MIPS_CACHE_VTAG; + + /* + * Now probe the MIPS32 / MIPS64 data cache. + */ + c->dcache.flags = 0; + + lsize = (config1 >> 10) & 7; + + /* DL == 7 is reserved */ + if (lsize == 7) + panic("Invalid dcache line size"); + + c->dcache.linesz = lsize ? 2 << lsize : 0; + + c->dcache.sets = 32 << (((config1 >> 13) + 1) & 7); + c->dcache.ways = 1 + ((config1 >> 7) & 7); + + dcache_size = c->dcache.sets * + c->dcache.ways * + c->dcache.linesz; + c->dcache.waybit = __ffs(dcache_size/c->dcache.ways); + + c->options |= MIPS_CPU_PREFETCH; + break; + } + + /* + * Processor configuration sanity check for the R4000SC erratum + * #5. With page sizes larger than 32kB there is no possibility + * to get a VCE exception anymore so we don't care about this + * misconfiguration. The case is rather theoretical anyway; + * presumably no vendor is shipping his hardware in the "bad" + * configuration. + */ + if ((prid & PRID_IMP_MASK) == PRID_IMP_R4000 && + (prid & PRID_REV_MASK) < PRID_REV_R4400 && + !(config & CONF_SC) && c->icache.linesz != 16 && + PAGE_SIZE <= 0x8000) + panic("Improper R4000SC processor configuration detected"); + + /* compute a couple of other cache variables */ + c->icache.waysize = icache_size / c->icache.ways; + c->dcache.waysize = dcache_size / c->dcache.ways; + + c->icache.sets = c->icache.linesz ? + icache_size / (c->icache.linesz * c->icache.ways) : 0; + c->dcache.sets = c->dcache.linesz ? + dcache_size / (c->dcache.linesz * c->dcache.ways) : 0; + + /* + * R1x000 P-caches are odd in a positive way. They're 32kB 2-way + * virtually indexed so normally would suffer from aliases. So + * normally they'd suffer from aliases but magic in the hardware deals + * with that for us so we don't need to take care ourselves. + */ + switch (current_cpu_type()) { + case CPU_20KC: + case CPU_25KF: + case CPU_I6400: + case CPU_I6500: + case CPU_SB1: + case CPU_SB1A: + c->dcache.flags |= MIPS_CACHE_PINDEX; + break; + + case CPU_R10000: + case CPU_R12000: + case CPU_R14000: + case CPU_R16000: + break; + + case CPU_74K: + case CPU_1074K: + has_74k_erratum = alias_74k_erratum(c); + fallthrough; + case CPU_M14KC: + case CPU_M14KEC: + case CPU_24K: + case CPU_34K: + case CPU_1004K: + case CPU_INTERAPTIV: + case CPU_P5600: + case CPU_PROAPTIV: + case CPU_M5150: + case CPU_QEMU_GENERIC: + case CPU_P6600: + case CPU_M6250: + if (!(read_c0_config7() & MIPS_CONF7_IAR) && + (c->icache.waysize > PAGE_SIZE)) + c->icache.flags |= MIPS_CACHE_ALIASES; + if (!has_74k_erratum && (read_c0_config7() & MIPS_CONF7_AR)) { + /* + * Effectively physically indexed dcache, + * thus no virtual aliases. + */ + c->dcache.flags |= MIPS_CACHE_PINDEX; + break; + } + fallthrough; + default: + if (has_74k_erratum || c->dcache.waysize > PAGE_SIZE) + c->dcache.flags |= MIPS_CACHE_ALIASES; + } + + /* Physically indexed caches don't suffer from virtual aliasing */ + if (c->dcache.flags & MIPS_CACHE_PINDEX) + c->dcache.flags &= ~MIPS_CACHE_ALIASES; + + /* + * In systems with CM the icache fills from L2 or closer caches, and + * thus sees remote stores without needing to write them back any + * further than that. + */ + if (mips_cm_present()) + c->icache.flags |= MIPS_IC_SNOOPS_REMOTE; + + switch (current_cpu_type()) { + case CPU_20KC: + /* + * Some older 20Kc chips doesn't have the 'VI' bit in + * the config register. + */ + c->icache.flags |= MIPS_CACHE_VTAG; + break; + + case CPU_ALCHEMY: + case CPU_I6400: + case CPU_I6500: + c->icache.flags |= MIPS_CACHE_IC_F_DC; + break; + + case CPU_BMIPS5000: + c->icache.flags |= MIPS_CACHE_IC_F_DC; + /* Cache aliases are handled in hardware; allow HIGHMEM */ + c->dcache.flags &= ~MIPS_CACHE_ALIASES; + break; + + case CPU_LOONGSON2EF: + /* + * LOONGSON2 has 4 way icache, but when using indexed cache op, + * one op will act on all 4 ways + */ + c->icache.ways = 1; + } + + pr_info("Primary instruction cache %ldkB, %s, %s, linesize %d bytes.\n", + icache_size >> 10, + c->icache.flags & MIPS_CACHE_VTAG ? "VIVT" : "VIPT", + way_string[c->icache.ways], c->icache.linesz); + + pr_info("Primary data cache %ldkB, %s, %s, %s, linesize %d bytes\n", + dcache_size >> 10, way_string[c->dcache.ways], + (c->dcache.flags & MIPS_CACHE_PINDEX) ? "PIPT" : "VIPT", + (c->dcache.flags & MIPS_CACHE_ALIASES) ? + "cache aliases" : "no aliases", + c->dcache.linesz); +} + +static void probe_vcache(void) +{ + struct cpuinfo_mips *c = ¤t_cpu_data; + unsigned int config2, lsize; + + if (current_cpu_type() != CPU_LOONGSON64) + return; + + config2 = read_c0_config2(); + if ((lsize = ((config2 >> 20) & 15))) + c->vcache.linesz = 2 << lsize; + else + c->vcache.linesz = lsize; + + c->vcache.sets = 64 << ((config2 >> 24) & 15); + c->vcache.ways = 1 + ((config2 >> 16) & 15); + + vcache_size = c->vcache.sets * c->vcache.ways * c->vcache.linesz; + + c->vcache.waybit = 0; + c->vcache.waysize = vcache_size / c->vcache.ways; + + pr_info("Unified victim cache %ldkB %s, linesize %d bytes.\n", + vcache_size >> 10, way_string[c->vcache.ways], c->vcache.linesz); +} + +/* + * If you even _breathe_ on this function, look at the gcc output and make sure + * it does not pop things on and off the stack for the cache sizing loop that + * executes in KSEG1 space or else you will crash and burn badly. You have + * been warned. + */ +static int probe_scache(void) +{ + unsigned long flags, addr, begin, end, pow2; + unsigned int config = read_c0_config(); + struct cpuinfo_mips *c = ¤t_cpu_data; + + if (config & CONF_SC) + return 0; + + begin = (unsigned long) &_stext; + begin &= ~((4 * 1024 * 1024) - 1); + end = begin + (4 * 1024 * 1024); + + /* + * This is such a bitch, you'd think they would make it easy to do + * this. Away you daemons of stupidity! + */ + local_irq_save(flags); + + /* Fill each size-multiple cache line with a valid tag. */ + pow2 = (64 * 1024); + for (addr = begin; addr < end; addr = (begin + pow2)) { + unsigned long *p = (unsigned long *) addr; + __asm__ __volatile__("nop" : : "r" (*p)); /* whee... */ + pow2 <<= 1; + } + + /* Load first line with zero (therefore invalid) tag. */ + write_c0_taglo(0); + write_c0_taghi(0); + __asm__ __volatile__("nop; nop; nop; nop;"); /* avoid the hazard */ + cache_op(Index_Store_Tag_I, begin); + cache_op(Index_Store_Tag_D, begin); + cache_op(Index_Store_Tag_SD, begin); + + /* Now search for the wrap around point. */ + pow2 = (128 * 1024); + for (addr = begin + (128 * 1024); addr < end; addr = begin + pow2) { + cache_op(Index_Load_Tag_SD, addr); + __asm__ __volatile__("nop; nop; nop; nop;"); /* hazard... */ + if (!read_c0_taglo()) + break; + pow2 <<= 1; + } + local_irq_restore(flags); + addr -= begin; + + scache_size = addr; + c->scache.linesz = 16 << ((config & R4K_CONF_SB) >> 22); + c->scache.ways = 1; + c->scache.waybit = 0; /* does not matter */ + + return 1; +} + +static void loongson2_sc_init(void) +{ + struct cpuinfo_mips *c = ¤t_cpu_data; + + scache_size = 512*1024; + c->scache.linesz = 32; + c->scache.ways = 4; + c->scache.waybit = 0; + c->scache.waysize = scache_size / (c->scache.ways); + c->scache.sets = scache_size / (c->scache.linesz * c->scache.ways); + pr_info("Unified secondary cache %ldkB %s, linesize %d bytes.\n", + scache_size >> 10, way_string[c->scache.ways], c->scache.linesz); + + c->options |= MIPS_CPU_INCLUSIVE_CACHES; +} + +static void loongson3_sc_init(void) +{ + struct cpuinfo_mips *c = ¤t_cpu_data; + unsigned int config2, lsize; + + config2 = read_c0_config2(); + lsize = (config2 >> 4) & 15; + if (lsize) + c->scache.linesz = 2 << lsize; + else + c->scache.linesz = 0; + c->scache.sets = 64 << ((config2 >> 8) & 15); + c->scache.ways = 1 + (config2 & 15); + + /* Loongson-3 has 4-Scache banks, while Loongson-2K have only 2 banks */ + if ((c->processor_id & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64R) + c->scache.sets *= 2; + else + c->scache.sets *= 4; + + scache_size = c->scache.sets * c->scache.ways * c->scache.linesz; + + c->scache.waybit = 0; + c->scache.waysize = scache_size / c->scache.ways; + pr_info("Unified secondary cache %ldkB %s, linesize %d bytes.\n", + scache_size >> 10, way_string[c->scache.ways], c->scache.linesz); + if (scache_size) + c->options |= MIPS_CPU_INCLUSIVE_CACHES; + return; +} + +extern int r5k_sc_init(void); +extern int rm7k_sc_init(void); +extern int mips_sc_init(void); + +static void setup_scache(void) +{ + struct cpuinfo_mips *c = ¤t_cpu_data; + unsigned int config = read_c0_config(); + int sc_present = 0; + + /* + * Do the probing thing on R4000SC and R4400SC processors. Other + * processors don't have a S-cache that would be relevant to the + * Linux memory management. + */ + switch (current_cpu_type()) { + case CPU_R4000SC: + case CPU_R4000MC: + case CPU_R4400SC: + case CPU_R4400MC: + sc_present = run_uncached(probe_scache); + if (sc_present) + c->options |= MIPS_CPU_CACHE_CDEX_S; + break; + + case CPU_R10000: + case CPU_R12000: + case CPU_R14000: + case CPU_R16000: + scache_size = 0x80000 << ((config & R10K_CONF_SS) >> 16); + c->scache.linesz = 64 << ((config >> 13) & 1); + c->scache.ways = 2; + c->scache.waybit= 0; + sc_present = 1; + break; + + case CPU_R5000: + case CPU_NEVADA: +#ifdef CONFIG_R5000_CPU_SCACHE + r5k_sc_init(); +#endif + return; + + case CPU_RM7000: +#ifdef CONFIG_RM7000_CPU_SCACHE + rm7k_sc_init(); +#endif + return; + + case CPU_LOONGSON2EF: + loongson2_sc_init(); + return; + + case CPU_LOONGSON64: + loongson3_sc_init(); + return; + + case CPU_CAVIUM_OCTEON3: + /* don't need to worry about L2, fully coherent */ + return; + + default: + if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 | + MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 | + MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 | + MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) { +#ifdef CONFIG_MIPS_CPU_SCACHE + if (mips_sc_init ()) { + scache_size = c->scache.ways * c->scache.sets * c->scache.linesz; + printk("MIPS secondary cache %ldkB, %s, linesize %d bytes.\n", + scache_size >> 10, + way_string[c->scache.ways], c->scache.linesz); + + if (current_cpu_type() == CPU_BMIPS5000) + c->options |= MIPS_CPU_INCLUSIVE_CACHES; + } + +#else + if (!(c->scache.flags & MIPS_CACHE_NOT_PRESENT)) + panic("Dunno how to handle MIPS32 / MIPS64 second level cache"); +#endif + return; + } + sc_present = 0; + } + + if (!sc_present) + return; + + /* compute a couple of other cache variables */ + c->scache.waysize = scache_size / c->scache.ways; + + c->scache.sets = scache_size / (c->scache.linesz * c->scache.ways); + + printk("Unified secondary cache %ldkB %s, linesize %d bytes.\n", + scache_size >> 10, way_string[c->scache.ways], c->scache.linesz); + + c->options |= MIPS_CPU_INCLUSIVE_CACHES; +} + +void au1x00_fixup_config_od(void) +{ + /* + * c0_config.od (bit 19) was write only (and read as 0) + * on the early revisions of Alchemy SOCs. It disables the bus + * transaction overlapping and needs to be set to fix various errata. + */ + switch (read_c0_prid()) { + case 0x00030100: /* Au1000 DA */ + case 0x00030201: /* Au1000 HA */ + case 0x00030202: /* Au1000 HB */ + case 0x01030200: /* Au1500 AB */ + /* + * Au1100 errata actually keeps silence about this bit, so we set it + * just in case for those revisions that require it to be set according + * to the (now gone) cpu table. + */ + case 0x02030200: /* Au1100 AB */ + case 0x02030201: /* Au1100 BA */ + case 0x02030202: /* Au1100 BC */ + set_c0_config(1 << 19); + break; + } +} + +/* CP0 hazard avoidance. */ +#define NXP_BARRIER() \ + __asm__ __volatile__( \ + ".set noreorder\n\t" \ + "nop; nop; nop; nop; nop; nop;\n\t" \ + ".set reorder\n\t") + +static void nxp_pr4450_fixup_config(void) +{ + unsigned long config0; + + config0 = read_c0_config(); + + /* clear all three cache coherency fields */ + config0 &= ~(0x7 | (7 << 25) | (7 << 28)); + config0 |= (((_page_cachable_default >> _CACHE_SHIFT) << 0) | + ((_page_cachable_default >> _CACHE_SHIFT) << 25) | + ((_page_cachable_default >> _CACHE_SHIFT) << 28)); + write_c0_config(config0); + NXP_BARRIER(); +} + +static int cca = -1; + +static int __init cca_setup(char *str) +{ + get_option(&str, &cca); + + return 0; +} + +early_param("cca", cca_setup); + +static void coherency_setup(void) +{ + if (cca < 0 || cca > 7) + cca = read_c0_config() & CONF_CM_CMASK; + _page_cachable_default = cca << _CACHE_SHIFT; + + pr_debug("Using cache attribute %d\n", cca); + change_c0_config(CONF_CM_CMASK, cca); + + /* + * c0_status.cu=0 specifies that updates by the sc instruction use + * the coherency mode specified by the TLB; 1 means cachable + * coherent update on write will be used. Not all processors have + * this bit and; some wire it to zero, others like Toshiba had the + * silly idea of putting something else there ... + */ + switch (current_cpu_type()) { + case CPU_R4000PC: + case CPU_R4000SC: + case CPU_R4000MC: + case CPU_R4400PC: + case CPU_R4400SC: + case CPU_R4400MC: + clear_c0_config(CONF_CU); + break; + /* + * We need to catch the early Alchemy SOCs with + * the write-only co_config.od bit and set it back to one on: + * Au1000 rev DA, HA, HB; Au1100 AB, BA, BC, Au1500 AB + */ + case CPU_ALCHEMY: + au1x00_fixup_config_od(); + break; + + case PRID_IMP_PR4450: + nxp_pr4450_fixup_config(); + break; + } +} + +static void r4k_cache_error_setup(void) +{ + extern char __weak except_vec2_generic; + extern char __weak except_vec2_sb1; + + switch (current_cpu_type()) { + case CPU_SB1: + case CPU_SB1A: + set_uncached_handler(0x100, &except_vec2_sb1, 0x80); + break; + + default: + set_uncached_handler(0x100, &except_vec2_generic, 0x80); + break; + } +} + +void r4k_cache_init(void) +{ + extern void build_clear_page(void); + extern void build_copy_page(void); + struct cpuinfo_mips *c = ¤t_cpu_data; + + probe_pcache(); + probe_vcache(); + setup_scache(); + + r4k_blast_dcache_page_setup(); + r4k_blast_dcache_page_indexed_setup(); + r4k_blast_dcache_setup(); + r4k_blast_icache_page_setup(); + r4k_blast_icache_page_indexed_setup(); + r4k_blast_icache_setup(); + r4k_blast_scache_page_setup(); + r4k_blast_scache_page_indexed_setup(); + r4k_blast_scache_setup(); + r4k_blast_scache_node_setup(); +#ifdef CONFIG_EVA + r4k_blast_dcache_user_page_setup(); + r4k_blast_icache_user_page_setup(); +#endif + + /* + * Some MIPS32 and MIPS64 processors have physically indexed caches. + * This code supports virtually indexed processors and will be + * unnecessarily inefficient on physically indexed processors. + */ + if (c->dcache.linesz && cpu_has_dc_aliases) + shm_align_mask = max_t( unsigned long, + c->dcache.sets * c->dcache.linesz - 1, + PAGE_SIZE - 1); + else + shm_align_mask = PAGE_SIZE-1; + + __flush_cache_vmap = r4k__flush_cache_vmap; + __flush_cache_vunmap = r4k__flush_cache_vunmap; + + flush_cache_all = cache_noop; + __flush_cache_all = r4k___flush_cache_all; + flush_cache_mm = r4k_flush_cache_mm; + flush_cache_page = r4k_flush_cache_page; + flush_cache_range = r4k_flush_cache_range; + + __flush_kernel_vmap_range = r4k_flush_kernel_vmap_range; + + flush_icache_all = r4k_flush_icache_all; + local_flush_data_cache_page = local_r4k_flush_data_cache_page; + flush_data_cache_page = r4k_flush_data_cache_page; + flush_icache_range = r4k_flush_icache_range; + local_flush_icache_range = local_r4k_flush_icache_range; + __flush_icache_user_range = r4k_flush_icache_user_range; + __local_flush_icache_user_range = local_r4k_flush_icache_user_range; + +#ifdef CONFIG_DMA_NONCOHERENT + if (dma_default_coherent) { + _dma_cache_wback_inv = (void *)cache_noop; + _dma_cache_wback = (void *)cache_noop; + _dma_cache_inv = (void *)cache_noop; + } else { + _dma_cache_wback_inv = r4k_dma_cache_wback_inv; + _dma_cache_wback = r4k_dma_cache_wback_inv; + _dma_cache_inv = r4k_dma_cache_inv; + } +#endif /* CONFIG_DMA_NONCOHERENT */ + + build_clear_page(); + build_copy_page(); + + /* + * We want to run CMP kernels on core with and without coherent + * caches. Therefore, do not use CONFIG_MIPS_CMP to decide whether + * or not to flush caches. + */ + local_r4k___flush_cache_all(NULL); + + coherency_setup(); + board_cache_error_setup = r4k_cache_error_setup; + + /* + * Per-CPU overrides + */ + switch (current_cpu_type()) { + case CPU_BMIPS4350: + case CPU_BMIPS4380: + /* No IPI is needed because all CPUs share the same D$ */ + flush_data_cache_page = r4k_blast_dcache_page; + break; + case CPU_BMIPS5000: + /* We lose our superpowers if L2 is disabled */ + if (c->scache.flags & MIPS_CACHE_NOT_PRESENT) + break; + + /* I$ fills from D$ just by emptying the write buffers */ + flush_cache_page = (void *)b5k_instruction_hazard; + flush_cache_range = (void *)b5k_instruction_hazard; + local_flush_data_cache_page = (void *)b5k_instruction_hazard; + flush_data_cache_page = (void *)b5k_instruction_hazard; + flush_icache_range = (void *)b5k_instruction_hazard; + local_flush_icache_range = (void *)b5k_instruction_hazard; + + + /* Optimization: an L2 flush implicitly flushes the L1 */ + current_cpu_data.options |= MIPS_CPU_INCLUSIVE_CACHES; + break; + case CPU_LOONGSON64: + /* Loongson-3 maintains cache coherency by hardware */ + __flush_cache_all = cache_noop; + __flush_cache_vmap = cache_noop; + __flush_cache_vunmap = cache_noop; + __flush_kernel_vmap_range = (void *)cache_noop; + flush_cache_mm = (void *)cache_noop; + flush_cache_page = (void *)cache_noop; + flush_cache_range = (void *)cache_noop; + flush_icache_all = (void *)cache_noop; + flush_data_cache_page = (void *)cache_noop; + local_flush_data_cache_page = (void *)cache_noop; + break; + } +} + +static int r4k_cache_pm_notifier(struct notifier_block *self, unsigned long cmd, + void *v) +{ + switch (cmd) { + case CPU_PM_ENTER_FAILED: + case CPU_PM_EXIT: + coherency_setup(); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block r4k_cache_pm_notifier_block = { + .notifier_call = r4k_cache_pm_notifier, +}; + +int __init r4k_cache_init_pm(void) +{ + return cpu_pm_register_notifier(&r4k_cache_pm_notifier_block); +} +arch_initcall(r4k_cache_init_pm); diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c new file mode 100644 index 000000000..11b3e7dda --- /dev/null +++ b/arch/mips/mm/cache.c @@ -0,0 +1,209 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1994 - 2003, 06, 07 by Ralf Baechle (ralf@linux-mips.org) + * Copyright (C) 2007 MIPS Technologies, Inc. + */ +#include <linux/fs.h> +#include <linux/fcntl.h> +#include <linux/kernel.h> +#include <linux/linkage.h> +#include <linux/export.h> +#include <linux/sched.h> +#include <linux/syscalls.h> +#include <linux/mm.h> +#include <linux/highmem.h> +#include <linux/pagemap.h> + +#include <asm/cacheflush.h> +#include <asm/processor.h> +#include <asm/cpu.h> +#include <asm/cpu-features.h> +#include <asm/setup.h> +#include <asm/pgtable.h> + +/* Cache operations. */ +void (*flush_cache_all)(void); +void (*__flush_cache_all)(void); +EXPORT_SYMBOL_GPL(__flush_cache_all); +void (*flush_cache_mm)(struct mm_struct *mm); +void (*flush_cache_range)(struct vm_area_struct *vma, unsigned long start, + unsigned long end); +void (*flush_cache_page)(struct vm_area_struct *vma, unsigned long page, + unsigned long pfn); +void (*flush_icache_range)(unsigned long start, unsigned long end); +EXPORT_SYMBOL_GPL(flush_icache_range); +void (*local_flush_icache_range)(unsigned long start, unsigned long end); +EXPORT_SYMBOL_GPL(local_flush_icache_range); +void (*__flush_icache_user_range)(unsigned long start, unsigned long end); +void (*__local_flush_icache_user_range)(unsigned long start, unsigned long end); +EXPORT_SYMBOL_GPL(__local_flush_icache_user_range); + +void (*__flush_cache_vmap)(void); +void (*__flush_cache_vunmap)(void); + +void (*__flush_kernel_vmap_range)(unsigned long vaddr, int size); +EXPORT_SYMBOL_GPL(__flush_kernel_vmap_range); + +/* MIPS specific cache operations */ +void (*local_flush_data_cache_page)(void * addr); +void (*flush_data_cache_page)(unsigned long addr); +void (*flush_icache_all)(void); + +EXPORT_SYMBOL_GPL(local_flush_data_cache_page); +EXPORT_SYMBOL(flush_data_cache_page); +EXPORT_SYMBOL(flush_icache_all); + +#ifdef CONFIG_DMA_NONCOHERENT + +/* DMA cache operations. */ +void (*_dma_cache_wback_inv)(unsigned long start, unsigned long size); +void (*_dma_cache_wback)(unsigned long start, unsigned long size); +void (*_dma_cache_inv)(unsigned long start, unsigned long size); + +#endif /* CONFIG_DMA_NONCOHERENT */ + +/* + * We could optimize the case where the cache argument is not BCACHE but + * that seems very atypical use ... + */ +SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, bytes, + unsigned int, cache) +{ + if (bytes == 0) + return 0; + if (!access_ok((void __user *) addr, bytes)) + return -EFAULT; + + __flush_icache_user_range(addr, addr + bytes); + + return 0; +} + +void __flush_dcache_page(struct page *page) +{ + struct address_space *mapping = page_mapping_file(page); + unsigned long addr; + + if (mapping && !mapping_mapped(mapping)) { + SetPageDcacheDirty(page); + return; + } + + /* + * We could delay the flush for the !page_mapping case too. But that + * case is for exec env/arg pages and those are %99 certainly going to + * get faulted into the tlb (and thus flushed) anyways. + */ + if (PageHighMem(page)) + addr = (unsigned long)kmap_atomic(page); + else + addr = (unsigned long)page_address(page); + + flush_data_cache_page(addr); + + if (PageHighMem(page)) + kunmap_atomic((void *)addr); +} + +EXPORT_SYMBOL(__flush_dcache_page); + +void __flush_anon_page(struct page *page, unsigned long vmaddr) +{ + unsigned long addr = (unsigned long) page_address(page); + + if (pages_do_alias(addr, vmaddr)) { + if (page_mapcount(page) && !Page_dcache_dirty(page)) { + void *kaddr; + + kaddr = kmap_coherent(page, vmaddr); + flush_data_cache_page((unsigned long)kaddr); + kunmap_coherent(); + } else + flush_data_cache_page(addr); + } +} + +EXPORT_SYMBOL(__flush_anon_page); + +void __update_cache(unsigned long address, pte_t pte) +{ + struct page *page; + unsigned long pfn, addr; + int exec = !pte_no_exec(pte) && !cpu_has_ic_fills_f_dc; + + pfn = pte_pfn(pte); + if (unlikely(!pfn_valid(pfn))) + return; + page = pfn_to_page(pfn); + if (Page_dcache_dirty(page)) { + if (PageHighMem(page)) + addr = (unsigned long)kmap_atomic(page); + else + addr = (unsigned long)page_address(page); + + if (exec || pages_do_alias(addr, address & PAGE_MASK)) + flush_data_cache_page(addr); + + if (PageHighMem(page)) + kunmap_atomic((void *)addr); + + ClearPageDcacheDirty(page); + } +} + +unsigned long _page_cachable_default; +EXPORT_SYMBOL(_page_cachable_default); + +#define PM(p) __pgprot(_page_cachable_default | (p)) + +static pgprot_t protection_map[16] __ro_after_init; +DECLARE_VM_GET_PAGE_PROT + +static inline void setup_protection_map(void) +{ + protection_map[0] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); + protection_map[1] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC); + protection_map[2] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); + protection_map[3] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC); + protection_map[4] = PM(_PAGE_PRESENT); + protection_map[5] = PM(_PAGE_PRESENT); + protection_map[6] = PM(_PAGE_PRESENT); + protection_map[7] = PM(_PAGE_PRESENT); + + protection_map[8] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); + protection_map[9] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC); + protection_map[10] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE | + _PAGE_NO_READ); + protection_map[11] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE); + protection_map[12] = PM(_PAGE_PRESENT); + protection_map[13] = PM(_PAGE_PRESENT); + protection_map[14] = PM(_PAGE_PRESENT | _PAGE_WRITE); + protection_map[15] = PM(_PAGE_PRESENT | _PAGE_WRITE); +} + +#undef PM + +void cpu_cache_init(void) +{ + if (cpu_has_3k_cache) { + extern void __weak r3k_cache_init(void); + + r3k_cache_init(); + } + if (cpu_has_4k_cache) { + extern void __weak r4k_cache_init(void); + + r4k_cache_init(); + } + + if (cpu_has_octeon_cache) { + extern void __weak octeon_cache_init(void); + + octeon_cache_init(); + } + + setup_protection_map(); +} diff --git a/arch/mips/mm/cerr-sb1.c b/arch/mips/mm/cerr-sb1.c new file mode 100644 index 000000000..a3c02df19 --- /dev/null +++ b/arch/mips/mm/cerr-sb1.c @@ -0,0 +1,569 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2001,2002,2003 Broadcom Corporation + */ +#include <linux/sched.h> +#include <asm/mipsregs.h> +#include <asm/sibyte/sb1250.h> +#include <asm/sibyte/sb1250_regs.h> + +#if !defined(CONFIG_SIBYTE_BUS_WATCHER) || defined(CONFIG_SIBYTE_BW_TRACE) +#include <asm/io.h> +#include <asm/sibyte/sb1250_scd.h> +#endif + +/* + * We'd like to dump the L2_ECC_TAG register on errors, but errata make + * that unsafe... So for now we don't. (BCM1250/BCM112x erratum SOC-48.) + */ +#undef DUMP_L2_ECC_TAG_ON_ERROR + +/* SB1 definitions */ + +/* XXX should come from config1 XXX */ +#define SB1_CACHE_INDEX_MASK 0x1fe0 + +#define CP0_ERRCTL_RECOVERABLE (1 << 31) +#define CP0_ERRCTL_DCACHE (1 << 30) +#define CP0_ERRCTL_ICACHE (1 << 29) +#define CP0_ERRCTL_MULTIBUS (1 << 23) +#define CP0_ERRCTL_MC_TLB (1 << 15) +#define CP0_ERRCTL_MC_TIMEOUT (1 << 14) + +#define CP0_CERRI_TAG_PARITY (1 << 29) +#define CP0_CERRI_DATA_PARITY (1 << 28) +#define CP0_CERRI_EXTERNAL (1 << 26) + +#define CP0_CERRI_IDX_VALID(c) (!((c) & CP0_CERRI_EXTERNAL)) +#define CP0_CERRI_DATA (CP0_CERRI_DATA_PARITY) + +#define CP0_CERRD_MULTIPLE (1 << 31) +#define CP0_CERRD_TAG_STATE (1 << 30) +#define CP0_CERRD_TAG_ADDRESS (1 << 29) +#define CP0_CERRD_DATA_SBE (1 << 28) +#define CP0_CERRD_DATA_DBE (1 << 27) +#define CP0_CERRD_EXTERNAL (1 << 26) +#define CP0_CERRD_LOAD (1 << 25) +#define CP0_CERRD_STORE (1 << 24) +#define CP0_CERRD_FILLWB (1 << 23) +#define CP0_CERRD_COHERENCY (1 << 22) +#define CP0_CERRD_DUPTAG (1 << 21) + +#define CP0_CERRD_DPA_VALID(c) (!((c) & CP0_CERRD_EXTERNAL)) +#define CP0_CERRD_IDX_VALID(c) \ + (((c) & (CP0_CERRD_LOAD | CP0_CERRD_STORE)) ? (!((c) & CP0_CERRD_EXTERNAL)) : 0) +#define CP0_CERRD_CAUSES \ + (CP0_CERRD_LOAD | CP0_CERRD_STORE | CP0_CERRD_FILLWB | CP0_CERRD_COHERENCY | CP0_CERRD_DUPTAG) +#define CP0_CERRD_TYPES \ + (CP0_CERRD_TAG_STATE | CP0_CERRD_TAG_ADDRESS | CP0_CERRD_DATA_SBE | CP0_CERRD_DATA_DBE | CP0_CERRD_EXTERNAL) +#define CP0_CERRD_DATA (CP0_CERRD_DATA_SBE | CP0_CERRD_DATA_DBE) + +static uint32_t extract_ic(unsigned short addr, int data); +static uint32_t extract_dc(unsigned short addr, int data); + +static inline void breakout_errctl(unsigned int val) +{ + if (val & CP0_ERRCTL_RECOVERABLE) + printk(" recoverable"); + if (val & CP0_ERRCTL_DCACHE) + printk(" dcache"); + if (val & CP0_ERRCTL_ICACHE) + printk(" icache"); + if (val & CP0_ERRCTL_MULTIBUS) + printk(" multiple-buserr"); + printk("\n"); +} + +static inline void breakout_cerri(unsigned int val) +{ + if (val & CP0_CERRI_TAG_PARITY) + printk(" tag-parity"); + if (val & CP0_CERRI_DATA_PARITY) + printk(" data-parity"); + if (val & CP0_CERRI_EXTERNAL) + printk(" external"); + printk("\n"); +} + +static inline void breakout_cerrd(unsigned int val) +{ + switch (val & CP0_CERRD_CAUSES) { + case CP0_CERRD_LOAD: + printk(" load,"); + break; + case CP0_CERRD_STORE: + printk(" store,"); + break; + case CP0_CERRD_FILLWB: + printk(" fill/wb,"); + break; + case CP0_CERRD_COHERENCY: + printk(" coherency,"); + break; + case CP0_CERRD_DUPTAG: + printk(" duptags,"); + break; + default: + printk(" NO CAUSE,"); + break; + } + if (!(val & CP0_CERRD_TYPES)) + printk(" NO TYPE"); + else { + if (val & CP0_CERRD_MULTIPLE) + printk(" multi-err"); + if (val & CP0_CERRD_TAG_STATE) + printk(" tag-state"); + if (val & CP0_CERRD_TAG_ADDRESS) + printk(" tag-address"); + if (val & CP0_CERRD_DATA_SBE) + printk(" data-SBE"); + if (val & CP0_CERRD_DATA_DBE) + printk(" data-DBE"); + if (val & CP0_CERRD_EXTERNAL) + printk(" external"); + } + printk("\n"); +} + +#ifndef CONFIG_SIBYTE_BUS_WATCHER + +static void check_bus_watcher(void) +{ + uint32_t status, l2_err, memio_err; +#ifdef DUMP_L2_ECC_TAG_ON_ERROR + uint64_t l2_tag; +#endif + + /* Destructive read, clears register and interrupt */ + status = csr_in32(IOADDR(A_SCD_BUS_ERR_STATUS)); + /* Bit 31 is always on, but there's no #define for that */ + if (status & ~(1UL << 31)) { + l2_err = csr_in32(IOADDR(A_BUS_L2_ERRORS)); +#ifdef DUMP_L2_ECC_TAG_ON_ERROR + l2_tag = in64(IOADDR(A_L2_ECC_TAG)); +#endif + memio_err = csr_in32(IOADDR(A_BUS_MEM_IO_ERRORS)); + printk("Bus watcher error counters: %08x %08x\n", l2_err, memio_err); + printk("\nLast recorded signature:\n"); + printk("Request %02x from %d, answered by %d with Dcode %d\n", + (unsigned int)(G_SCD_BERR_TID(status) & 0x3f), + (int)(G_SCD_BERR_TID(status) >> 6), + (int)G_SCD_BERR_RID(status), + (int)G_SCD_BERR_DCODE(status)); +#ifdef DUMP_L2_ECC_TAG_ON_ERROR + printk("Last L2 tag w/ bad ECC: %016llx\n", l2_tag); +#endif + } else { + printk("Bus watcher indicates no error\n"); + } +} +#else +extern void check_bus_watcher(void); +#endif + +asmlinkage void sb1_cache_error(void) +{ + uint32_t errctl, cerr_i, cerr_d, dpalo, dpahi, eepc, res; + unsigned long long cerr_dpa; + +#ifdef CONFIG_SIBYTE_BW_TRACE + /* Freeze the trace buffer now */ + csr_out32(M_SCD_TRACE_CFG_FREEZE, IOADDR(A_SCD_TRACE_CFG)); + printk("Trace buffer frozen\n"); +#endif + + printk("Cache error exception on CPU %x:\n", + (read_c0_prid() >> 25) & 0x7); + + __asm__ __volatile__ ( + " .set push\n\t" + " .set mips64\n\t" + " .set noat\n\t" + " mfc0 %0, $26\n\t" + " mfc0 %1, $27\n\t" + " mfc0 %2, $27, 1\n\t" + " dmfc0 $1, $27, 3\n\t" + " dsrl32 %3, $1, 0 \n\t" + " sll %4, $1, 0 \n\t" + " mfc0 %5, $30\n\t" + " .set pop" + : "=r" (errctl), "=r" (cerr_i), "=r" (cerr_d), + "=r" (dpahi), "=r" (dpalo), "=r" (eepc)); + + cerr_dpa = (((uint64_t)dpahi) << 32) | dpalo; + printk(" c0_errorepc == %08x\n", eepc); + printk(" c0_errctl == %08x", errctl); + breakout_errctl(errctl); + if (errctl & CP0_ERRCTL_ICACHE) { + printk(" c0_cerr_i == %08x", cerr_i); + breakout_cerri(cerr_i); + if (CP0_CERRI_IDX_VALID(cerr_i)) { + /* Check index of EPC, allowing for delay slot */ + if (((eepc & SB1_CACHE_INDEX_MASK) != (cerr_i & SB1_CACHE_INDEX_MASK)) && + ((eepc & SB1_CACHE_INDEX_MASK) != ((cerr_i & SB1_CACHE_INDEX_MASK) - 4))) + printk(" cerr_i idx doesn't match eepc\n"); + else { + res = extract_ic(cerr_i & SB1_CACHE_INDEX_MASK, + (cerr_i & CP0_CERRI_DATA) != 0); + if (!(res & cerr_i)) + printk("...didn't see indicated icache problem\n"); + } + } + } + if (errctl & CP0_ERRCTL_DCACHE) { + printk(" c0_cerr_d == %08x", cerr_d); + breakout_cerrd(cerr_d); + if (CP0_CERRD_DPA_VALID(cerr_d)) { + printk(" c0_cerr_dpa == %010llx\n", cerr_dpa); + if (!CP0_CERRD_IDX_VALID(cerr_d)) { + res = extract_dc(cerr_dpa & SB1_CACHE_INDEX_MASK, + (cerr_d & CP0_CERRD_DATA) != 0); + if (!(res & cerr_d)) + printk("...didn't see indicated dcache problem\n"); + } else { + if ((cerr_dpa & SB1_CACHE_INDEX_MASK) != (cerr_d & SB1_CACHE_INDEX_MASK)) + printk(" cerr_d idx doesn't match cerr_dpa\n"); + else { + res = extract_dc(cerr_d & SB1_CACHE_INDEX_MASK, + (cerr_d & CP0_CERRD_DATA) != 0); + if (!(res & cerr_d)) + printk("...didn't see indicated problem\n"); + } + } + } + } + + check_bus_watcher(); + + /* + * Calling panic() when a fatal cache error occurs scrambles the + * state of the system (and the cache), making it difficult to + * investigate after the fact. However, if you just stall the CPU, + * the other CPU may keep on running, which is typically very + * undesirable. + */ +#ifdef CONFIG_SB1_CERR_STALL + while (1) + ; +#else + panic("unhandled cache error"); +#endif +} + + +/* Parity lookup table. */ +static const uint8_t parity[256] = { + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0 +}; + +/* Masks to select bits for Hamming parity, mask_72_64[i] for bit[i] */ +static const uint64_t mask_72_64[8] = { + 0x0738C808099264FFULL, + 0x38C808099264FF07ULL, + 0xC808099264FF0738ULL, + 0x08099264FF0738C8ULL, + 0x099264FF0738C808ULL, + 0x9264FF0738C80809ULL, + 0x64FF0738C8080992ULL, + 0xFF0738C808099264ULL +}; + +/* Calculate the parity on a range of bits */ +static char range_parity(uint64_t dword, int max, int min) +{ + char parity = 0; + int i; + dword >>= min; + for (i=max-min; i>=0; i--) { + if (dword & 0x1) + parity = !parity; + dword >>= 1; + } + return parity; +} + +/* Calculate the 4-bit even byte-parity for an instruction */ +static unsigned char inst_parity(uint32_t word) +{ + int i, j; + char parity = 0; + for (j=0; j<4; j++) { + char byte_parity = 0; + for (i=0; i<8; i++) { + if (word & 0x80000000) + byte_parity = !byte_parity; + word <<= 1; + } + parity <<= 1; + parity |= byte_parity; + } + return parity; +} + +static uint32_t extract_ic(unsigned short addr, int data) +{ + unsigned short way; + int valid; + uint32_t taghi, taglolo, taglohi; + unsigned long long taglo, va; + uint64_t tlo_tmp; + uint8_t lru; + int res = 0; + + printk("Icache index 0x%04x ", addr); + for (way = 0; way < 4; way++) { + /* Index-load-tag-I */ + __asm__ __volatile__ ( + " .set push \n\t" + " .set noreorder \n\t" + " .set mips64 \n\t" + " .set noat \n\t" + " cache 4, 0(%3) \n\t" + " mfc0 %0, $29 \n\t" + " dmfc0 $1, $28 \n\t" + " dsrl32 %1, $1, 0 \n\t" + " sll %2, $1, 0 \n\t" + " .set pop" + : "=r" (taghi), "=r" (taglohi), "=r" (taglolo) + : "r" ((way << 13) | addr)); + + taglo = ((unsigned long long)taglohi << 32) | taglolo; + if (way == 0) { + lru = (taghi >> 14) & 0xff; + printk("[Bank %d Set 0x%02x] LRU > %d %d %d %d > MRU\n", + ((addr >> 5) & 0x3), /* bank */ + ((addr >> 7) & 0x3f), /* index */ + (lru & 0x3), + ((lru >> 2) & 0x3), + ((lru >> 4) & 0x3), + ((lru >> 6) & 0x3)); + } + va = (taglo & 0xC0000FFFFFFFE000ULL) | addr; + if ((taglo & (1 << 31)) && (((taglo >> 62) & 0x3) == 3)) + va |= 0x3FFFF00000000000ULL; + valid = ((taghi >> 29) & 1); + if (valid) { + tlo_tmp = taglo & 0xfff3ff; + if (((taglo >> 10) & 1) ^ range_parity(tlo_tmp, 23, 0)) { + printk(" ** bad parity in VTag0/G/ASID\n"); + res |= CP0_CERRI_TAG_PARITY; + } + if (((taglo >> 11) & 1) ^ range_parity(taglo, 63, 24)) { + printk(" ** bad parity in R/VTag1\n"); + res |= CP0_CERRI_TAG_PARITY; + } + } + if (valid ^ ((taghi >> 27) & 1)) { + printk(" ** bad parity for valid bit\n"); + res |= CP0_CERRI_TAG_PARITY; + } + printk(" %d [VA %016llx] [Vld? %d] raw tags: %08X-%016llX\n", + way, va, valid, taghi, taglo); + + if (data) { + uint32_t datahi, insta, instb; + uint8_t predecode; + int offset; + + /* (hit all banks and ways) */ + for (offset = 0; offset < 4; offset++) { + /* Index-load-data-I */ + __asm__ __volatile__ ( + " .set push\n\t" + " .set noreorder\n\t" + " .set mips64\n\t" + " .set noat\n\t" + " cache 6, 0(%3) \n\t" + " mfc0 %0, $29, 1\n\t" + " dmfc0 $1, $28, 1\n\t" + " dsrl32 %1, $1, 0 \n\t" + " sll %2, $1, 0 \n\t" + " .set pop \n" + : "=r" (datahi), "=r" (insta), "=r" (instb) + : "r" ((way << 13) | addr | (offset << 3))); + predecode = (datahi >> 8) & 0xff; + if (((datahi >> 16) & 1) != (uint32_t)range_parity(predecode, 7, 0)) { + printk(" ** bad parity in predecode\n"); + res |= CP0_CERRI_DATA_PARITY; + } + /* XXXKW should/could check predecode bits themselves */ + if (((datahi >> 4) & 0xf) ^ inst_parity(insta)) { + printk(" ** bad parity in instruction a\n"); + res |= CP0_CERRI_DATA_PARITY; + } + if ((datahi & 0xf) ^ inst_parity(instb)) { + printk(" ** bad parity in instruction b\n"); + res |= CP0_CERRI_DATA_PARITY; + } + printk(" %05X-%08X%08X", datahi, insta, instb); + } + printk("\n"); + } + } + return res; +} + +/* Compute the ECC for a data doubleword */ +static uint8_t dc_ecc(uint64_t dword) +{ + uint64_t t; + uint32_t w; + uint8_t p; + int i; + + p = 0; + for (i = 7; i >= 0; i--) + { + p <<= 1; + t = dword & mask_72_64[i]; + w = (uint32_t)(t >> 32); + p ^= (parity[w>>24] ^ parity[(w>>16) & 0xFF] + ^ parity[(w>>8) & 0xFF] ^ parity[w & 0xFF]); + w = (uint32_t)(t & 0xFFFFFFFF); + p ^= (parity[w>>24] ^ parity[(w>>16) & 0xFF] + ^ parity[(w>>8) & 0xFF] ^ parity[w & 0xFF]); + } + return p; +} + +struct dc_state { + unsigned char val; + char *name; +}; + +static struct dc_state dc_states[] = { + { 0x00, "INVALID" }, + { 0x0f, "COH-SHD" }, + { 0x13, "NCO-E-C" }, + { 0x19, "NCO-E-D" }, + { 0x16, "COH-E-C" }, + { 0x1c, "COH-E-D" }, + { 0xff, "*ERROR*" } +}; + +#define DC_TAG_VALID(state) \ + (((state) == 0x0) || ((state) == 0xf) || ((state) == 0x13) || \ + ((state) == 0x19) || ((state) == 0x16) || ((state) == 0x1c)) + +static char *dc_state_str(unsigned char state) +{ + struct dc_state *dsc = dc_states; + while (dsc->val != 0xff) { + if (dsc->val == state) + break; + dsc++; + } + return dsc->name; +} + +static uint32_t extract_dc(unsigned short addr, int data) +{ + int valid, way; + unsigned char state; + uint32_t taghi, taglolo, taglohi; + unsigned long long taglo, pa; + uint8_t ecc, lru; + int res = 0; + + printk("Dcache index 0x%04x ", addr); + for (way = 0; way < 4; way++) { + __asm__ __volatile__ ( + " .set push\n\t" + " .set noreorder\n\t" + " .set mips64\n\t" + " .set noat\n\t" + " cache 5, 0(%3)\n\t" /* Index-load-tag-D */ + " mfc0 %0, $29, 2\n\t" + " dmfc0 $1, $28, 2\n\t" + " dsrl32 %1, $1, 0\n\t" + " sll %2, $1, 0\n\t" + " .set pop" + : "=r" (taghi), "=r" (taglohi), "=r" (taglolo) + : "r" ((way << 13) | addr)); + + taglo = ((unsigned long long)taglohi << 32) | taglolo; + pa = (taglo & 0xFFFFFFE000ULL) | addr; + if (way == 0) { + lru = (taghi >> 14) & 0xff; + printk("[Bank %d Set 0x%02x] LRU > %d %d %d %d > MRU\n", + ((addr >> 11) & 0x2) | ((addr >> 5) & 1), /* bank */ + ((addr >> 6) & 0x3f), /* index */ + (lru & 0x3), + ((lru >> 2) & 0x3), + ((lru >> 4) & 0x3), + ((lru >> 6) & 0x3)); + } + state = (taghi >> 25) & 0x1f; + valid = DC_TAG_VALID(state); + printk(" %d [PA %010llx] [state %s (%02x)] raw tags: %08X-%016llX\n", + way, pa, dc_state_str(state), state, taghi, taglo); + if (valid) { + if (((taglo >> 11) & 1) ^ range_parity(taglo, 39, 26)) { + printk(" ** bad parity in PTag1\n"); + res |= CP0_CERRD_TAG_ADDRESS; + } + if (((taglo >> 10) & 1) ^ range_parity(taglo, 25, 13)) { + printk(" ** bad parity in PTag0\n"); + res |= CP0_CERRD_TAG_ADDRESS; + } + } else { + res |= CP0_CERRD_TAG_STATE; + } + + if (data) { + uint32_t datalohi, datalolo, datahi; + unsigned long long datalo; + int offset; + char bad_ecc = 0; + + for (offset = 0; offset < 4; offset++) { + /* Index-load-data-D */ + __asm__ __volatile__ ( + " .set push\n\t" + " .set noreorder\n\t" + " .set mips64\n\t" + " .set noat\n\t" + " cache 7, 0(%3)\n\t" /* Index-load-data-D */ + " mfc0 %0, $29, 3\n\t" + " dmfc0 $1, $28, 3\n\t" + " dsrl32 %1, $1, 0 \n\t" + " sll %2, $1, 0 \n\t" + " .set pop" + : "=r" (datahi), "=r" (datalohi), "=r" (datalolo) + : "r" ((way << 13) | addr | (offset << 3))); + datalo = ((unsigned long long)datalohi << 32) | datalolo; + ecc = dc_ecc(datalo); + if (ecc != datahi) { + int bits; + bad_ecc |= 1 << (3-offset); + ecc ^= datahi; + bits = hweight8(ecc); + res |= (bits == 1) ? CP0_CERRD_DATA_SBE : CP0_CERRD_DATA_DBE; + } + printk(" %02X-%016llX", datahi, datalo); + } + printk("\n"); + if (bad_ecc) + printk(" dwords w/ bad ECC: %d %d %d %d\n", + !!(bad_ecc & 8), !!(bad_ecc & 4), + !!(bad_ecc & 2), !!(bad_ecc & 1)); + } + } + return res; +} diff --git a/arch/mips/mm/cex-gen.S b/arch/mips/mm/cex-gen.S new file mode 100644 index 000000000..45dff5cd4 --- /dev/null +++ b/arch/mips/mm/cex-gen.S @@ -0,0 +1,42 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1995 - 1999 Ralf Baechle + * Copyright (C) 1999 Silicon Graphics, Inc. + * + * Cache error handler + */ +#include <asm/asm.h> +#include <asm/regdef.h> +#include <asm/mipsregs.h> +#include <asm/stackframe.h> + +/* + * Game over. Go to the button. Press gently. Swear where allowed by + * legislation. + */ + LEAF(except_vec2_generic) + .set noreorder + .set noat + .set mips0 + /* + * This is a very bad place to be. Our cache error + * detection has triggered. If we have write-back data + * in the cache, we may not be able to recover. As a + * first-order desperate measure, turn off KSEG0 cacheing. + */ + mfc0 k0,CP0_CONFIG + li k1,~CONF_CM_CMASK + and k0,k0,k1 + ori k0,k0,CONF_CM_UNCACHED + mtc0 k0,CP0_CONFIG + /* Give it a few cycles to sink in... */ + nop + nop + nop + + j cache_parity_error + nop + END(except_vec2_generic) diff --git a/arch/mips/mm/cex-oct.S b/arch/mips/mm/cex-oct.S new file mode 100644 index 000000000..9029092aa --- /dev/null +++ b/arch/mips/mm/cex-oct.S @@ -0,0 +1,70 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2006 Cavium Networks + * Cache error handler + */ + +#include <asm/asm.h> +#include <asm/regdef.h> +#include <asm/mipsregs.h> +#include <asm/stackframe.h> + +/* + * Handle cache error. Indicate to the second level handler whether + * the exception is recoverable. + */ + LEAF(except_vec2_octeon) + + .set push + .set mips64r2 + .set noreorder + .set noat + + + /* due to an errata we need to read the COP0 CacheErr (Dcache) + * before any cache/DRAM access */ + + rdhwr k0, $0 /* get core_id */ + PTR_LA k1, cache_err_dcache + sll k0, k0, 3 + PTR_ADDU k1, k0, k1 /* k1 = &cache_err_dcache[core_id] */ + + dmfc0 k0, CP0_CACHEERR, 1 + sd k0, (k1) + dmtc0 $0, CP0_CACHEERR, 1 + + /* check whether this is a nested exception */ + mfc0 k1, CP0_STATUS + andi k1, k1, ST0_EXL + beqz k1, 1f + nop + j cache_parity_error_octeon_non_recoverable + nop + + /* exception is recoverable */ +1: j handle_cache_err + nop + + .set pop + END(except_vec2_octeon) + + /* We need to jump to handle_cache_err so that the previous handler + * can fit within 0x80 bytes. We also move from 0xFFFFFFFFAXXXXXXX + * space (uncached) to the 0xFFFFFFFF8XXXXXXX space (cached). */ + LEAF(handle_cache_err) + .set push + .set noreorder + .set noat + + SAVE_ALL + KMODE + jal cache_parity_error_octeon_recoverable + nop + j ret_from_exception + nop + + .set pop + END(handle_cache_err) diff --git a/arch/mips/mm/cex-sb1.S b/arch/mips/mm/cex-sb1.S new file mode 100644 index 000000000..85c6e6a40 --- /dev/null +++ b/arch/mips/mm/cex-sb1.S @@ -0,0 +1,157 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2001,2002,2003 Broadcom Corporation + */ + +#include <asm/asm.h> +#include <asm/regdef.h> +#include <asm/mipsregs.h> +#include <asm/stackframe.h> +#include <asm/cacheops.h> +#include <asm/sibyte/board.h> + +#define C0_ERRCTL $26 /* CP0: Error info */ +#define C0_CERR_I $27 /* CP0: Icache error */ +#define C0_CERR_D $27,1 /* CP0: Dcache error */ + + /* + * Based on SiByte sample software cache-err/cerr.S + * CVS revision 1.8. Only the 'unrecoverable' case + * is changed. + */ + + .set mips64 + .set noreorder + .set noat + + /* + * sb1_cerr_vec: code to be copied to the Cache Error + * Exception vector. The code must be pushed out to memory + * (either by copying to Kseg0 and Kseg1 both, or by flushing + * the L1 and L2) since it is fetched as 0xa0000100. + * + * NOTE: Be sure this handler is at most 28 instructions long + * since the final 16 bytes of the exception vector memory + * (0x170-0x17f) are used to preserve k0, k1, and ra. + */ + +LEAF(except_vec2_sb1) + /* + * If this error is recoverable, we need to exit the handler + * without having dirtied any registers. To do this, + * save/restore k0 and k1 from low memory (Useg is direct + * mapped while ERL=1). Note that we can't save to a + * CPU-specific location without ruining a register in the + * process. This means we are vulnerable to data corruption + * whenever the handler is reentered by a second CPU. + */ + sd k0,0x170($0) + sd k1,0x178($0) + +#ifdef CONFIG_SB1_CEX_ALWAYS_FATAL + j handle_vec2_sb1 + nop +#else + /* + * M_ERRCTL_RECOVERABLE is bit 31, which makes it easy to tell + * if we can fast-path out of here for a h/w-recovered error. + */ + mfc0 k1,C0_ERRCTL + bgtz k1,attempt_recovery + sll k0,k1,1 + +recovered_dcache: + /* + * Unlock CacheErr-D (which in turn unlocks CacheErr-DPA). + * Ought to log the occurrence of this recovered dcache error. + */ + b recovered + mtc0 $0,C0_CERR_D + +attempt_recovery: + /* + * k0 has C0_ERRCTL << 1, which puts 'DC' at bit 31. Any + * Dcache errors we can recover from will take more extensive + * processing. For now, they are considered "unrecoverable". + * Note that 'DC' becoming set (outside of ERL mode) will + * cause 'IC' to clear; so if there's an Icache error, we'll + * only find out about it if we recover from this error and + * continue executing. + */ + bltz k0,unrecoverable + sll k0,1 + + /* + * k0 has C0_ERRCTL << 2, which puts 'IC' at bit 31. If an + * Icache error isn't indicated, I'm not sure why we got here. + * Consider that case "unrecoverable" for now. + */ + bgez k0,unrecoverable + +attempt_icache_recovery: + /* + * External icache errors are due to uncorrectable ECC errors + * in the L2 cache or Memory Controller and cannot be + * recovered here. + */ + mfc0 k0,C0_CERR_I /* delay slot */ + li k1,1 << 26 /* ICACHE_EXTERNAL */ + and k1,k0 + bnez k1,unrecoverable + andi k0,0x1fe0 + + /* + * Since the error is internal, the 'IDX' field from + * CacheErr-I is valid and we can just invalidate all blocks + * in that set. + */ + cache Index_Invalidate_I,(0<<13)(k0) + cache Index_Invalidate_I,(1<<13)(k0) + cache Index_Invalidate_I,(2<<13)(k0) + cache Index_Invalidate_I,(3<<13)(k0) + + /* Ought to log this recovered icache error */ + +recovered: + /* Restore the saved registers */ + ld k0,0x170($0) + ld k1,0x178($0) + eret + +unrecoverable: + /* Unrecoverable Icache or Dcache error; log it and/or fail */ + j handle_vec2_sb1 + nop +#endif + +END(except_vec2_sb1) + + LEAF(handle_vec2_sb1) + mfc0 k0,CP0_CONFIG + li k1,~CONF_CM_CMASK + and k0,k0,k1 + ori k0,k0,CONF_CM_UNCACHED + mtc0 k0,CP0_CONFIG + + SSNOP + SSNOP + SSNOP + SSNOP + bnezl $0, 1f +1: + mfc0 k0, CP0_STATUS + sll k0, k0, 3 # check CU0 (kernel?) + bltz k0, 2f + nop + + /* Get a valid Kseg0 stack pointer. Any task's stack pointer + * will do, although if we ever want to resume execution we + * better not have corrupted any state. */ + get_saved_sp + move sp, k1 + +2: + j sb1_cache_error + nop + + END(handle_vec2_sb1) diff --git a/arch/mips/mm/context.c b/arch/mips/mm/context.c new file mode 100644 index 000000000..966f40066 --- /dev/null +++ b/arch/mips/mm/context.c @@ -0,0 +1,290 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/atomic.h> +#include <linux/mmu_context.h> +#include <linux/percpu.h> +#include <linux/spinlock.h> + +static DEFINE_RAW_SPINLOCK(cpu_mmid_lock); + +static atomic64_t mmid_version; +static unsigned int num_mmids; +static unsigned long *mmid_map; + +static DEFINE_PER_CPU(u64, reserved_mmids); +static cpumask_t tlb_flush_pending; + +static bool asid_versions_eq(int cpu, u64 a, u64 b) +{ + return ((a ^ b) & asid_version_mask(cpu)) == 0; +} + +void get_new_mmu_context(struct mm_struct *mm) +{ + unsigned int cpu; + u64 asid; + + /* + * This function is specific to ASIDs, and should not be called when + * MMIDs are in use. + */ + if (WARN_ON(IS_ENABLED(CONFIG_DEBUG_VM) && cpu_has_mmid)) + return; + + cpu = smp_processor_id(); + asid = asid_cache(cpu); + + if (!((asid += cpu_asid_inc()) & cpu_asid_mask(&cpu_data[cpu]))) { + if (cpu_has_vtag_icache) + flush_icache_all(); + local_flush_tlb_all(); /* start new asid cycle */ + } + + set_cpu_context(cpu, mm, asid); + asid_cache(cpu) = asid; +} +EXPORT_SYMBOL_GPL(get_new_mmu_context); + +void check_mmu_context(struct mm_struct *mm) +{ + unsigned int cpu = smp_processor_id(); + + /* + * This function is specific to ASIDs, and should not be called when + * MMIDs are in use. + */ + if (WARN_ON(IS_ENABLED(CONFIG_DEBUG_VM) && cpu_has_mmid)) + return; + + /* Check if our ASID is of an older version and thus invalid */ + if (!asid_versions_eq(cpu, cpu_context(cpu, mm), asid_cache(cpu))) + get_new_mmu_context(mm); +} +EXPORT_SYMBOL_GPL(check_mmu_context); + +static void flush_context(void) +{ + u64 mmid; + int cpu; + + /* Update the list of reserved MMIDs and the MMID bitmap */ + bitmap_zero(mmid_map, num_mmids); + + /* Reserve an MMID for kmap/wired entries */ + __set_bit(MMID_KERNEL_WIRED, mmid_map); + + for_each_possible_cpu(cpu) { + mmid = xchg_relaxed(&cpu_data[cpu].asid_cache, 0); + + /* + * If this CPU has already been through a + * rollover, but hasn't run another task in + * the meantime, we must preserve its reserved + * MMID, as this is the only trace we have of + * the process it is still running. + */ + if (mmid == 0) + mmid = per_cpu(reserved_mmids, cpu); + + __set_bit(mmid & cpu_asid_mask(&cpu_data[cpu]), mmid_map); + per_cpu(reserved_mmids, cpu) = mmid; + } + + /* + * Queue a TLB invalidation for each CPU to perform on next + * context-switch + */ + cpumask_setall(&tlb_flush_pending); +} + +static bool check_update_reserved_mmid(u64 mmid, u64 newmmid) +{ + bool hit; + int cpu; + + /* + * Iterate over the set of reserved MMIDs looking for a match. + * If we find one, then we can update our mm to use newmmid + * (i.e. the same MMID in the current generation) but we can't + * exit the loop early, since we need to ensure that all copies + * of the old MMID are updated to reflect the mm. Failure to do + * so could result in us missing the reserved MMID in a future + * generation. + */ + hit = false; + for_each_possible_cpu(cpu) { + if (per_cpu(reserved_mmids, cpu) == mmid) { + hit = true; + per_cpu(reserved_mmids, cpu) = newmmid; + } + } + + return hit; +} + +static u64 get_new_mmid(struct mm_struct *mm) +{ + static u32 cur_idx = MMID_KERNEL_WIRED + 1; + u64 mmid, version, mmid_mask; + + mmid = cpu_context(0, mm); + version = atomic64_read(&mmid_version); + mmid_mask = cpu_asid_mask(&boot_cpu_data); + + if (!asid_versions_eq(0, mmid, 0)) { + u64 newmmid = version | (mmid & mmid_mask); + + /* + * If our current MMID was active during a rollover, we + * can continue to use it and this was just a false alarm. + */ + if (check_update_reserved_mmid(mmid, newmmid)) { + mmid = newmmid; + goto set_context; + } + + /* + * We had a valid MMID in a previous life, so try to re-use + * it if possible. + */ + if (!__test_and_set_bit(mmid & mmid_mask, mmid_map)) { + mmid = newmmid; + goto set_context; + } + } + + /* Allocate a free MMID */ + mmid = find_next_zero_bit(mmid_map, num_mmids, cur_idx); + if (mmid != num_mmids) + goto reserve_mmid; + + /* We're out of MMIDs, so increment the global version */ + version = atomic64_add_return_relaxed(asid_first_version(0), + &mmid_version); + + /* Note currently active MMIDs & mark TLBs as requiring flushes */ + flush_context(); + + /* We have more MMIDs than CPUs, so this will always succeed */ + mmid = find_first_zero_bit(mmid_map, num_mmids); + +reserve_mmid: + __set_bit(mmid, mmid_map); + cur_idx = mmid; + mmid |= version; +set_context: + set_cpu_context(0, mm, mmid); + return mmid; +} + +void check_switch_mmu_context(struct mm_struct *mm) +{ + unsigned int cpu = smp_processor_id(); + u64 ctx, old_active_mmid; + unsigned long flags; + + if (!cpu_has_mmid) { + check_mmu_context(mm); + write_c0_entryhi(cpu_asid(cpu, mm)); + goto setup_pgd; + } + + /* + * MMID switch fast-path, to avoid acquiring cpu_mmid_lock when it's + * unnecessary. + * + * The memory ordering here is subtle. If our active_mmids is non-zero + * and the MMID matches the current version, then we update the CPU's + * asid_cache with a relaxed cmpxchg. Racing with a concurrent rollover + * means that either: + * + * - We get a zero back from the cmpxchg and end up waiting on + * cpu_mmid_lock in check_mmu_context(). Taking the lock synchronises + * with the rollover and so we are forced to see the updated + * generation. + * + * - We get a valid MMID back from the cmpxchg, which means the + * relaxed xchg in flush_context will treat us as reserved + * because atomic RmWs are totally ordered for a given location. + */ + ctx = cpu_context(cpu, mm); + old_active_mmid = READ_ONCE(cpu_data[cpu].asid_cache); + if (!old_active_mmid || + !asid_versions_eq(cpu, ctx, atomic64_read(&mmid_version)) || + !cmpxchg_relaxed(&cpu_data[cpu].asid_cache, old_active_mmid, ctx)) { + raw_spin_lock_irqsave(&cpu_mmid_lock, flags); + + ctx = cpu_context(cpu, mm); + if (!asid_versions_eq(cpu, ctx, atomic64_read(&mmid_version))) + ctx = get_new_mmid(mm); + + WRITE_ONCE(cpu_data[cpu].asid_cache, ctx); + raw_spin_unlock_irqrestore(&cpu_mmid_lock, flags); + } + + /* + * Invalidate the local TLB if needed. Note that we must only clear our + * bit in tlb_flush_pending after this is complete, so that the + * cpu_has_shared_ftlb_entries case below isn't misled. + */ + if (cpumask_test_cpu(cpu, &tlb_flush_pending)) { + if (cpu_has_vtag_icache) + flush_icache_all(); + local_flush_tlb_all(); + cpumask_clear_cpu(cpu, &tlb_flush_pending); + } + + write_c0_memorymapid(ctx & cpu_asid_mask(&boot_cpu_data)); + + /* + * If this CPU shares FTLB entries with its siblings and one or more of + * those siblings hasn't yet invalidated its TLB following a version + * increase then we need to invalidate any TLB entries for our MMID + * that we might otherwise pick up from a sibling. + * + * We ifdef on CONFIG_SMP because cpu_sibling_map isn't defined in + * CONFIG_SMP=n kernels. + */ +#ifdef CONFIG_SMP + if (cpu_has_shared_ftlb_entries && + cpumask_intersects(&tlb_flush_pending, &cpu_sibling_map[cpu])) { + /* Ensure we operate on the new MMID */ + mtc0_tlbw_hazard(); + + /* + * Invalidate all TLB entries associated with the new + * MMID, and wait for the invalidation to complete. + */ + ginvt_mmid(); + sync_ginv(); + } +#endif + +setup_pgd: + TLBMISS_HANDLER_SETUP_PGD(mm->pgd); +} +EXPORT_SYMBOL_GPL(check_switch_mmu_context); + +static int mmid_init(void) +{ + if (!cpu_has_mmid) + return 0; + + /* + * Expect allocation after rollover to fail if we don't have at least + * one more MMID than CPUs. + */ + num_mmids = asid_first_version(0); + WARN_ON(num_mmids <= num_possible_cpus()); + + atomic64_set(&mmid_version, asid_first_version(0)); + mmid_map = bitmap_zalloc(num_mmids, GFP_KERNEL); + if (!mmid_map) + panic("Failed to allocate bitmap for %u MMIDs\n", num_mmids); + + /* Reserve an MMID for kmap/wired entries */ + __set_bit(MMID_KERNEL_WIRED, mmid_map); + + pr_info("MMID allocator initialised with %u entries\n", num_mmids); + return 0; +} +early_initcall(mmid_init); diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c new file mode 100644 index 000000000..3c4fc97b9 --- /dev/null +++ b/arch/mips/mm/dma-noncoherent.c @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2000 Ani Joshi <ajoshi@unixbox.com> + * Copyright (C) 2000, 2001, 06 Ralf Baechle <ralf@linux-mips.org> + * swiped from i386, and cloned for MIPS by Geert, polished by Ralf. + */ +#include <linux/dma-direct.h> +#include <linux/dma-map-ops.h> +#include <linux/highmem.h> + +#include <asm/cache.h> +#include <asm/cpu-type.h> +#include <asm/io.h> + +/* + * The affected CPUs below in 'cpu_needs_post_dma_flush()' can speculatively + * fill random cachelines with stale data at any time, requiring an extra + * flush post-DMA. + * + * Warning on the terminology - Linux calls an uncached area coherent; MIPS + * terminology calls memory areas with hardware maintained coherency coherent. + * + * Note that the R14000 and R16000 should also be checked for in this condition. + * However this function is only called on non-I/O-coherent systems and only the + * R10000 and R12000 are used in such systems, the SGI IP28 Indigo² rsp. + * SGI IP32 aka O2. + */ +static inline bool cpu_needs_post_dma_flush(void) +{ + switch (boot_cpu_type()) { + case CPU_R10000: + case CPU_R12000: + case CPU_BMIPS5000: + case CPU_LOONGSON2EF: + case CPU_XBURST: + return true; + default: + /* + * Presence of MAARs suggests that the CPU supports + * speculatively prefetching data, and therefore requires + * the post-DMA flush/invalidate. + */ + return cpu_has_maar; + } +} + +void arch_dma_prep_coherent(struct page *page, size_t size) +{ + dma_cache_wback_inv((unsigned long)page_address(page), size); +} + +void *arch_dma_set_uncached(void *addr, size_t size) +{ + return (void *)(__pa(addr) + UNCAC_BASE); +} + +static inline void dma_sync_virt_for_device(void *addr, size_t size, + enum dma_data_direction dir) +{ + switch (dir) { + case DMA_TO_DEVICE: + dma_cache_wback((unsigned long)addr, size); + break; + case DMA_FROM_DEVICE: + dma_cache_inv((unsigned long)addr, size); + break; + case DMA_BIDIRECTIONAL: + dma_cache_wback_inv((unsigned long)addr, size); + break; + default: + BUG(); + } +} + +static inline void dma_sync_virt_for_cpu(void *addr, size_t size, + enum dma_data_direction dir) +{ + switch (dir) { + case DMA_TO_DEVICE: + break; + case DMA_FROM_DEVICE: + case DMA_BIDIRECTIONAL: + dma_cache_inv((unsigned long)addr, size); + break; + default: + BUG(); + } +} + +/* + * A single sg entry may refer to multiple physically contiguous pages. But + * we still need to process highmem pages individually. If highmem is not + * configured then the bulk of this loop gets optimized out. + */ +static inline void dma_sync_phys(phys_addr_t paddr, size_t size, + enum dma_data_direction dir, bool for_device) +{ + struct page *page = pfn_to_page(paddr >> PAGE_SHIFT); + unsigned long offset = paddr & ~PAGE_MASK; + size_t left = size; + + do { + size_t len = left; + void *addr; + + if (PageHighMem(page)) { + if (offset + len > PAGE_SIZE) + len = PAGE_SIZE - offset; + } + + addr = kmap_atomic(page); + if (for_device) + dma_sync_virt_for_device(addr + offset, len, dir); + else + dma_sync_virt_for_cpu(addr + offset, len, dir); + kunmap_atomic(addr); + + offset = 0; + page++; + left -= len; + } while (left); +} + +void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, + enum dma_data_direction dir) +{ + dma_sync_phys(paddr, size, dir, true); +} + +#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU +void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, + enum dma_data_direction dir) +{ + if (cpu_needs_post_dma_flush()) + dma_sync_phys(paddr, size, dir, false); +} +#endif + +#ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS +void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, + const struct iommu_ops *iommu, bool coherent) +{ + dev->dma_coherent = coherent; +} +#endif diff --git a/arch/mips/mm/extable.c b/arch/mips/mm/extable.c new file mode 100644 index 000000000..81bc8a34a --- /dev/null +++ b/arch/mips/mm/extable.c @@ -0,0 +1,25 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1997, 99, 2001 - 2004 Ralf Baechle <ralf@linux-mips.org> + */ +#include <linux/extable.h> +#include <linux/spinlock.h> +#include <asm/branch.h> +#include <linux/uaccess.h> + +int fixup_exception(struct pt_regs *regs) +{ + const struct exception_table_entry *fixup; + + fixup = search_exception_tables(exception_epc(regs)); + if (fixup) { + regs->cp0_epc = fixup->nextinsn; + + return 1; + } + + return 0; +} diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c new file mode 100644 index 000000000..d7878208b --- /dev/null +++ b/arch/mips/mm/fault.c @@ -0,0 +1,332 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1995 - 2000 by Ralf Baechle + */ +#include <linux/context_tracking.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/ratelimit.h> +#include <linux/mman.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/kprobes.h> +#include <linux/perf_event.h> +#include <linux/uaccess.h> + +#include <asm/branch.h> +#include <asm/mmu_context.h> +#include <asm/ptrace.h> +#include <asm/highmem.h> /* For VMALLOC_END */ +#include <linux/kdebug.h> + +int show_unhandled_signals = 1; + +/* + * This routine handles page faults. It determines the address, + * and the problem, and then passes it off to one of the appropriate + * routines. + */ +static void __do_page_fault(struct pt_regs *regs, unsigned long write, + unsigned long address) +{ + struct vm_area_struct * vma = NULL; + struct task_struct *tsk = current; + struct mm_struct *mm = tsk->mm; + const int field = sizeof(unsigned long) * 2; + int si_code; + vm_fault_t fault; + unsigned int flags = FAULT_FLAG_DEFAULT; + + static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10); + +#if 0 + printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", raw_smp_processor_id(), + current->comm, current->pid, field, address, write, + field, regs->cp0_epc); +#endif + +#ifdef CONFIG_KPROBES + /* + * This is to notify the fault handler of the kprobes. + */ + if (notify_die(DIE_PAGE_FAULT, "page fault", regs, -1, + current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) + return; +#endif + + si_code = SEGV_MAPERR; + + /* + * We fault-in kernel-space virtual memory on-demand. The + * 'reference' page table is init_mm.pgd. + * + * NOTE! We MUST NOT take any locks for this case. We may + * be in an interrupt or a critical region, and should + * only copy the information from the master page table, + * nothing more. + */ +#ifdef CONFIG_64BIT +# define VMALLOC_FAULT_TARGET no_context +#else +# define VMALLOC_FAULT_TARGET vmalloc_fault +#endif + + if (unlikely(address >= VMALLOC_START && address <= VMALLOC_END)) + goto VMALLOC_FAULT_TARGET; +#ifdef MODULE_START + if (unlikely(address >= MODULE_START && address < MODULE_END)) + goto VMALLOC_FAULT_TARGET; +#endif + + /* + * If we're in an interrupt or have no user + * context, we must not take the fault.. + */ + if (faulthandler_disabled() || !mm) + goto bad_area_nosemaphore; + + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); +retry: + vma = lock_mm_and_find_vma(mm, address, regs); + if (!vma) + goto bad_area_nosemaphore; +/* + * Ok, we have a good vm_area for this memory access, so + * we can handle it.. + */ + si_code = SEGV_ACCERR; + + if (write) { + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + flags |= FAULT_FLAG_WRITE; + } else { + if (cpu_has_rixi) { + if (address == regs->cp0_epc && !(vma->vm_flags & VM_EXEC)) { +#if 0 + pr_notice("Cpu%d[%s:%d:%0*lx:%ld:%0*lx] XI violation\n", + raw_smp_processor_id(), + current->comm, current->pid, + field, address, write, + field, regs->cp0_epc); +#endif + goto bad_area; + } + if (!(vma->vm_flags & VM_READ) && + exception_epc(regs) != address) { +#if 0 + pr_notice("Cpu%d[%s:%d:%0*lx:%ld:%0*lx] RI violation\n", + raw_smp_processor_id(), + current->comm, current->pid, + field, address, write, + field, regs->cp0_epc); +#endif + goto bad_area; + } + } else { + if (unlikely(!vma_is_accessible(vma))) + goto bad_area; + } + } + + /* + * If for any reason at all we couldn't handle the fault, + * make sure we exit gracefully rather than endlessly redo + * the fault. + */ + fault = handle_mm_fault(vma, address, flags, regs); + + if (fault_signal_pending(fault, regs)) { + if (!user_mode(regs)) + goto no_context; + return; + } + + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) + return; + + if (unlikely(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_OOM) + goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; + else if (fault & VM_FAULT_SIGBUS) + goto do_sigbus; + BUG(); + } + + if (fault & VM_FAULT_RETRY) { + flags |= FAULT_FLAG_TRIED; + + /* + * No need to mmap_read_unlock(mm) as we would + * have already released it in __lock_page_or_retry + * in mm/filemap.c. + */ + + goto retry; + } + + mmap_read_unlock(mm); + return; + +/* + * Something tried to access memory that isn't in our memory map.. + * Fix it, but check if it's kernel or user first.. + */ +bad_area: + mmap_read_unlock(mm); + +bad_area_nosemaphore: + /* User mode accesses just cause a SIGSEGV */ + if (user_mode(regs)) { + tsk->thread.cp0_badvaddr = address; + tsk->thread.error_code = write; + if (show_unhandled_signals && + unhandled_signal(tsk, SIGSEGV) && + __ratelimit(&ratelimit_state)) { + pr_info("do_page_fault(): sending SIGSEGV to %s for invalid %s %0*lx\n", + tsk->comm, + write ? "write access to" : "read access from", + field, address); + pr_info("epc = %0*lx in", field, + (unsigned long) regs->cp0_epc); + print_vma_addr(KERN_CONT " ", regs->cp0_epc); + pr_cont("\n"); + pr_info("ra = %0*lx in", field, + (unsigned long) regs->regs[31]); + print_vma_addr(KERN_CONT " ", regs->regs[31]); + pr_cont("\n"); + } + current->thread.trap_nr = (regs->cp0_cause >> 2) & 0x1f; + force_sig_fault(SIGSEGV, si_code, (void __user *)address); + return; + } + +no_context: + /* Are we prepared to handle this kernel fault? */ + if (fixup_exception(regs)) { + current->thread.cp0_baduaddr = address; + return; + } + + /* + * Oops. The kernel tried to access some bad page. We'll have to + * terminate things with extreme prejudice. + */ + bust_spinlocks(1); + + printk(KERN_ALERT "CPU %d Unable to handle kernel paging request at " + "virtual address %0*lx, epc == %0*lx, ra == %0*lx\n", + raw_smp_processor_id(), field, address, field, regs->cp0_epc, + field, regs->regs[31]); + die("Oops", regs); + +out_of_memory: + /* + * We ran out of memory, call the OOM killer, and return the userspace + * (which will retry the fault, or kill us if we got oom-killed). + */ + mmap_read_unlock(mm); + if (!user_mode(regs)) + goto no_context; + pagefault_out_of_memory(); + return; + +do_sigbus: + mmap_read_unlock(mm); + + /* Kernel mode? Handle exceptions or die */ + if (!user_mode(regs)) + goto no_context; + + /* + * Send a sigbus, regardless of whether we were in kernel + * or user mode. + */ +#if 0 + printk("do_page_fault() #3: sending SIGBUS to %s for " + "invalid %s\n%0*lx (epc == %0*lx, ra == %0*lx)\n", + tsk->comm, + write ? "write access to" : "read access from", + field, address, + field, (unsigned long) regs->cp0_epc, + field, (unsigned long) regs->regs[31]); +#endif + current->thread.trap_nr = (regs->cp0_cause >> 2) & 0x1f; + tsk->thread.cp0_badvaddr = address; + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address); + + return; +#ifndef CONFIG_64BIT +vmalloc_fault: + { + /* + * Synchronize this task's top level page-table + * with the 'reference' page table. + * + * Do _not_ use "tsk" here. We might be inside + * an interrupt in the middle of a task switch.. + */ + int offset = pgd_index(address); + pgd_t *pgd, *pgd_k; + p4d_t *p4d, *p4d_k; + pud_t *pud, *pud_k; + pmd_t *pmd, *pmd_k; + pte_t *pte_k; + + pgd = (pgd_t *) pgd_current[raw_smp_processor_id()] + offset; + pgd_k = init_mm.pgd + offset; + + if (!pgd_present(*pgd_k)) + goto no_context; + set_pgd(pgd, *pgd_k); + + p4d = p4d_offset(pgd, address); + p4d_k = p4d_offset(pgd_k, address); + if (!p4d_present(*p4d_k)) + goto no_context; + + pud = pud_offset(p4d, address); + pud_k = pud_offset(p4d_k, address); + if (!pud_present(*pud_k)) + goto no_context; + + pmd = pmd_offset(pud, address); + pmd_k = pmd_offset(pud_k, address); + if (!pmd_present(*pmd_k)) + goto no_context; + set_pmd(pmd, *pmd_k); + + pte_k = pte_offset_kernel(pmd_k, address); + if (!pte_present(*pte_k)) + goto no_context; + return; + } +#endif +} +NOKPROBE_SYMBOL(__do_page_fault); + +asmlinkage void do_page_fault(struct pt_regs *regs, + unsigned long write, unsigned long address) +{ + enum ctx_state prev_state; + + prev_state = exception_enter(); + __do_page_fault(regs, write, address); + exception_exit(prev_state); +} +NOKPROBE_SYMBOL(do_page_fault); diff --git a/arch/mips/mm/highmem.c b/arch/mips/mm/highmem.c new file mode 100644 index 000000000..57e2f08f0 --- /dev/null +++ b/arch/mips/mm/highmem.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/compiler.h> +#include <linux/init.h> +#include <linux/export.h> +#include <linux/highmem.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <asm/fixmap.h> +#include <asm/tlbflush.h> + +unsigned long highstart_pfn, highend_pfn; + +void kmap_flush_tlb(unsigned long addr) +{ + flush_tlb_one(addr); +} +EXPORT_SYMBOL(kmap_flush_tlb); diff --git a/arch/mips/mm/hugetlbpage.c b/arch/mips/mm/hugetlbpage.c new file mode 100644 index 000000000..7eaff5b07 --- /dev/null +++ b/arch/mips/mm/hugetlbpage.c @@ -0,0 +1,69 @@ +/* + * MIPS Huge TLB Page Support for Kernel. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> + * Copyright 2005, Embedded Alley Solutions, Inc. + * Matt Porter <mporter@embeddedalley.com> + * Copyright (C) 2008, 2009 Cavium Networks, Inc. + */ + +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/pagemap.h> +#include <linux/err.h> +#include <linux/sysctl.h> +#include <asm/mman.h> +#include <asm/tlb.h> +#include <asm/tlbflush.h> + +pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr, unsigned long sz) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pte_t *pte = NULL; + + pgd = pgd_offset(mm, addr); + p4d = p4d_alloc(mm, pgd, addr); + pud = pud_alloc(mm, p4d, addr); + if (pud) + pte = (pte_t *)pmd_alloc(mm, pud, addr); + + return pte; +} + +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, + unsigned long sz) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd = NULL; + + pgd = pgd_offset(mm, addr); + if (pgd_present(*pgd)) { + p4d = p4d_offset(pgd, addr); + if (p4d_present(*p4d)) { + pud = pud_offset(p4d, addr); + if (pud_present(*pud)) + pmd = pmd_offset(pud, addr); + } + } + return (pte_t *) pmd; +} + +int pmd_huge(pmd_t pmd) +{ + return (pmd_val(pmd) & _PAGE_HUGE) != 0; +} + +int pud_huge(pud_t pud) +{ + return (pud_val(pud) & _PAGE_HUGE) != 0; +} diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c new file mode 100644 index 000000000..e8660d06f --- /dev/null +++ b/arch/mips/mm/init.c @@ -0,0 +1,568 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1994 - 2000 Ralf Baechle + * Copyright (C) 1999, 2000 Silicon Graphics, Inc. + * Kevin D. Kissell, kevink@mips.com and Carsten Langgaard, carstenl@mips.com + * Copyright (C) 2000 MIPS Technologies, Inc. All rights reserved. + */ +#include <linux/bug.h> +#include <linux/init.h> +#include <linux/export.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/pagemap.h> +#include <linux/ptrace.h> +#include <linux/mman.h> +#include <linux/mm.h> +#include <linux/memblock.h> +#include <linux/highmem.h> +#include <linux/swap.h> +#include <linux/proc_fs.h> +#include <linux/pfn.h> +#include <linux/hardirq.h> +#include <linux/gfp.h> +#include <linux/kcore.h> +#include <linux/initrd.h> + +#include <asm/bootinfo.h> +#include <asm/cachectl.h> +#include <asm/cpu.h> +#include <asm/dma.h> +#include <asm/maar.h> +#include <asm/mmu_context.h> +#include <asm/sections.h> +#include <asm/pgalloc.h> +#include <asm/tlb.h> +#include <asm/fixmap.h> + +/* + * We have up to 8 empty zeroed pages so we can map one of the right colour + * when needed. This is necessary only on R4000 / R4400 SC and MC versions + * where we have to avoid VCED / VECI exceptions for good performance at + * any price. Since page is never written to after the initialization we + * don't have to care about aliases on other CPUs. + */ +unsigned long empty_zero_page, zero_page_mask; +EXPORT_SYMBOL_GPL(empty_zero_page); +EXPORT_SYMBOL(zero_page_mask); + +/* + * Not static inline because used by IP27 special magic initialization code + */ +void setup_zero_pages(void) +{ + unsigned int order, i; + struct page *page; + + if (cpu_has_vce) + order = 3; + else + order = 0; + + empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); + if (!empty_zero_page) + panic("Oh boy, that early out of memory?"); + + page = virt_to_page((void *)empty_zero_page); + split_page(page, order); + for (i = 0; i < (1 << order); i++, page++) + mark_page_reserved(page); + + zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK; +} + +static void *__kmap_pgprot(struct page *page, unsigned long addr, pgprot_t prot) +{ + enum fixed_addresses idx; + unsigned int old_mmid; + unsigned long vaddr, flags, entrylo; + unsigned long old_ctx; + pte_t pte; + int tlbidx; + + BUG_ON(Page_dcache_dirty(page)); + + preempt_disable(); + pagefault_disable(); + idx = (addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1); + idx += in_interrupt() ? FIX_N_COLOURS : 0; + vaddr = __fix_to_virt(FIX_CMAP_END - idx); + pte = mk_pte(page, prot); +#if defined(CONFIG_XPA) + entrylo = pte_to_entrylo(pte.pte_high); +#elif defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) + entrylo = pte.pte_high; +#else + entrylo = pte_to_entrylo(pte_val(pte)); +#endif + + local_irq_save(flags); + old_ctx = read_c0_entryhi(); + write_c0_entryhi(vaddr & (PAGE_MASK << 1)); + write_c0_entrylo0(entrylo); + write_c0_entrylo1(entrylo); + if (cpu_has_mmid) { + old_mmid = read_c0_memorymapid(); + write_c0_memorymapid(MMID_KERNEL_WIRED); + } +#ifdef CONFIG_XPA + if (cpu_has_xpa) { + entrylo = (pte.pte_low & _PFNX_MASK); + writex_c0_entrylo0(entrylo); + writex_c0_entrylo1(entrylo); + } +#endif + tlbidx = num_wired_entries(); + write_c0_wired(tlbidx + 1); + write_c0_index(tlbidx); + mtc0_tlbw_hazard(); + tlb_write_indexed(); + tlbw_use_hazard(); + write_c0_entryhi(old_ctx); + if (cpu_has_mmid) + write_c0_memorymapid(old_mmid); + local_irq_restore(flags); + + return (void*) vaddr; +} + +void *kmap_coherent(struct page *page, unsigned long addr) +{ + return __kmap_pgprot(page, addr, PAGE_KERNEL); +} + +void *kmap_noncoherent(struct page *page, unsigned long addr) +{ + return __kmap_pgprot(page, addr, PAGE_KERNEL_NC); +} + +void kunmap_coherent(void) +{ + unsigned int wired; + unsigned long flags, old_ctx; + + local_irq_save(flags); + old_ctx = read_c0_entryhi(); + wired = num_wired_entries() - 1; + write_c0_wired(wired); + write_c0_index(wired); + write_c0_entryhi(UNIQUE_ENTRYHI(wired)); + write_c0_entrylo0(0); + write_c0_entrylo1(0); + mtc0_tlbw_hazard(); + tlb_write_indexed(); + tlbw_use_hazard(); + write_c0_entryhi(old_ctx); + local_irq_restore(flags); + pagefault_enable(); + preempt_enable(); +} + +void copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma) +{ + void *vfrom, *vto; + + vto = kmap_atomic(to); + if (cpu_has_dc_aliases && + page_mapcount(from) && !Page_dcache_dirty(from)) { + vfrom = kmap_coherent(from, vaddr); + copy_page(vto, vfrom); + kunmap_coherent(); + } else { + vfrom = kmap_atomic(from); + copy_page(vto, vfrom); + kunmap_atomic(vfrom); + } + if ((!cpu_has_ic_fills_f_dc) || + pages_do_alias((unsigned long)vto, vaddr & PAGE_MASK)) + flush_data_cache_page((unsigned long)vto); + kunmap_atomic(vto); + /* Make sure this page is cleared on other CPU's too before using it */ + smp_wmb(); +} + +void copy_to_user_page(struct vm_area_struct *vma, + struct page *page, unsigned long vaddr, void *dst, const void *src, + unsigned long len) +{ + if (cpu_has_dc_aliases && + page_mapcount(page) && !Page_dcache_dirty(page)) { + void *vto = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK); + memcpy(vto, src, len); + kunmap_coherent(); + } else { + memcpy(dst, src, len); + if (cpu_has_dc_aliases) + SetPageDcacheDirty(page); + } + if (vma->vm_flags & VM_EXEC) + flush_cache_page(vma, vaddr, page_to_pfn(page)); +} + +void copy_from_user_page(struct vm_area_struct *vma, + struct page *page, unsigned long vaddr, void *dst, const void *src, + unsigned long len) +{ + if (cpu_has_dc_aliases && + page_mapcount(page) && !Page_dcache_dirty(page)) { + void *vfrom = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK); + memcpy(dst, vfrom, len); + kunmap_coherent(); + } else { + memcpy(dst, src, len); + if (cpu_has_dc_aliases) + SetPageDcacheDirty(page); + } +} +EXPORT_SYMBOL_GPL(copy_from_user_page); + +void __init fixrange_init(unsigned long start, unsigned long end, + pgd_t *pgd_base) +{ +#ifdef CONFIG_HIGHMEM + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + int i, j, k; + unsigned long vaddr; + + vaddr = start; + i = pgd_index(vaddr); + j = pud_index(vaddr); + k = pmd_index(vaddr); + pgd = pgd_base + i; + + for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) { + pud = (pud_t *)pgd; + for ( ; (j < PTRS_PER_PUD) && (vaddr < end); pud++, j++) { + pmd = (pmd_t *)pud; + for (; (k < PTRS_PER_PMD) && (vaddr < end); pmd++, k++) { + if (pmd_none(*pmd)) { + pte = (pte_t *) memblock_alloc_low(PAGE_SIZE, + PAGE_SIZE); + if (!pte) + panic("%s: Failed to allocate %lu bytes align=%lx\n", + __func__, PAGE_SIZE, + PAGE_SIZE); + + set_pmd(pmd, __pmd((unsigned long)pte)); + BUG_ON(pte != pte_offset_kernel(pmd, 0)); + } + vaddr += PMD_SIZE; + } + k = 0; + } + j = 0; + } +#endif +} + +struct maar_walk_info { + struct maar_config cfg[16]; + unsigned int num_cfg; +}; + +static int maar_res_walk(unsigned long start_pfn, unsigned long nr_pages, + void *data) +{ + struct maar_walk_info *wi = data; + struct maar_config *cfg = &wi->cfg[wi->num_cfg]; + unsigned int maar_align; + + /* MAAR registers hold physical addresses right shifted by 4 bits */ + maar_align = BIT(MIPS_MAAR_ADDR_SHIFT + 4); + + /* Fill in the MAAR config entry */ + cfg->lower = ALIGN(PFN_PHYS(start_pfn), maar_align); + cfg->upper = ALIGN_DOWN(PFN_PHYS(start_pfn + nr_pages), maar_align) - 1; + cfg->attrs = MIPS_MAAR_S; + + /* Ensure we don't overflow the cfg array */ + if (!WARN_ON(wi->num_cfg >= ARRAY_SIZE(wi->cfg))) + wi->num_cfg++; + + return 0; +} + + +unsigned __weak platform_maar_init(unsigned num_pairs) +{ + unsigned int num_configured; + struct maar_walk_info wi; + + wi.num_cfg = 0; + walk_system_ram_range(0, max_pfn, &wi, maar_res_walk); + + num_configured = maar_config(wi.cfg, wi.num_cfg, num_pairs); + if (num_configured < wi.num_cfg) + pr_warn("Not enough MAAR pairs (%u) for all memory regions (%u)\n", + num_pairs, wi.num_cfg); + + return num_configured; +} + +void maar_init(void) +{ + unsigned num_maars, used, i; + phys_addr_t lower, upper, attr; + static struct { + struct maar_config cfgs[3]; + unsigned used; + } recorded = { { { 0 } }, 0 }; + + if (!cpu_has_maar) + return; + + /* Detect the number of MAARs */ + write_c0_maari(~0); + back_to_back_c0_hazard(); + num_maars = read_c0_maari() + 1; + + /* MAARs should be in pairs */ + WARN_ON(num_maars % 2); + + /* Set MAARs using values we recorded already */ + if (recorded.used) { + used = maar_config(recorded.cfgs, recorded.used, num_maars / 2); + BUG_ON(used != recorded.used); + } else { + /* Configure the required MAARs */ + used = platform_maar_init(num_maars / 2); + } + + /* Disable any further MAARs */ + for (i = (used * 2); i < num_maars; i++) { + write_c0_maari(i); + back_to_back_c0_hazard(); + write_c0_maar(0); + back_to_back_c0_hazard(); + } + + if (recorded.used) + return; + + pr_info("MAAR configuration:\n"); + for (i = 0; i < num_maars; i += 2) { + write_c0_maari(i); + back_to_back_c0_hazard(); + upper = read_c0_maar(); +#ifdef CONFIG_XPA + upper |= (phys_addr_t)readx_c0_maar() << MIPS_MAARX_ADDR_SHIFT; +#endif + + write_c0_maari(i + 1); + back_to_back_c0_hazard(); + lower = read_c0_maar(); +#ifdef CONFIG_XPA + lower |= (phys_addr_t)readx_c0_maar() << MIPS_MAARX_ADDR_SHIFT; +#endif + + attr = lower & upper; + lower = (lower & MIPS_MAAR_ADDR) << 4; + upper = ((upper & MIPS_MAAR_ADDR) << 4) | 0xffff; + + pr_info(" [%d]: ", i / 2); + if ((attr & MIPS_MAAR_V) != MIPS_MAAR_V) { + pr_cont("disabled\n"); + continue; + } + + pr_cont("%pa-%pa", &lower, &upper); + + if (attr & MIPS_MAAR_S) + pr_cont(" speculate"); + + pr_cont("\n"); + + /* Record the setup for use on secondary CPUs */ + if (used <= ARRAY_SIZE(recorded.cfgs)) { + recorded.cfgs[recorded.used].lower = lower; + recorded.cfgs[recorded.used].upper = upper; + recorded.cfgs[recorded.used].attrs = attr; + recorded.used++; + } + } +} + +#ifndef CONFIG_NUMA +void __init paging_init(void) +{ + unsigned long max_zone_pfns[MAX_NR_ZONES]; + + pagetable_init(); + +#ifdef CONFIG_ZONE_DMA + max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; +#endif +#ifdef CONFIG_ZONE_DMA32 + max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; +#endif + max_zone_pfns[ZONE_NORMAL] = max_low_pfn; +#ifdef CONFIG_HIGHMEM + max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; + + if (cpu_has_dc_aliases && max_low_pfn != highend_pfn) { + printk(KERN_WARNING "This processor doesn't support highmem." + " %ldk highmem ignored\n", + (highend_pfn - max_low_pfn) << (PAGE_SHIFT - 10)); + max_zone_pfns[ZONE_HIGHMEM] = max_low_pfn; + } + + max_mapnr = highend_pfn ? highend_pfn : max_low_pfn; +#else + max_mapnr = max_low_pfn; +#endif + high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); + + free_area_init(max_zone_pfns); +} + +#ifdef CONFIG_64BIT +static struct kcore_list kcore_kseg0; +#endif + +static inline void __init mem_init_free_highmem(void) +{ +#ifdef CONFIG_HIGHMEM + unsigned long tmp; + + if (cpu_has_dc_aliases) + return; + + for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) { + struct page *page = pfn_to_page(tmp); + + if (!memblock_is_memory(PFN_PHYS(tmp))) + SetPageReserved(page); + else + free_highmem_page(page); + } +#endif +} + +void __init mem_init(void) +{ + /* + * When _PFN_SHIFT is greater than PAGE_SHIFT we won't have enough PTE + * bits to hold a full 32b physical address on MIPS32 systems. + */ + BUILD_BUG_ON(IS_ENABLED(CONFIG_32BIT) && (_PFN_SHIFT > PAGE_SHIFT)); + + maar_init(); + memblock_free_all(); + setup_zero_pages(); /* Setup zeroed pages. */ + mem_init_free_highmem(); + +#ifdef CONFIG_64BIT + if ((unsigned long) &_text > (unsigned long) CKSEG0) + /* The -4 is a hack so that user tools don't have to handle + the overflow. */ + kclist_add(&kcore_kseg0, (void *) CKSEG0, + 0x80000000 - 4, KCORE_TEXT); +#endif +} +#endif /* !CONFIG_NUMA */ + +void free_init_pages(const char *what, unsigned long begin, unsigned long end) +{ + unsigned long pfn; + + for (pfn = PFN_UP(begin); pfn < PFN_DOWN(end); pfn++) { + struct page *page = pfn_to_page(pfn); + void *addr = phys_to_virt(PFN_PHYS(pfn)); + + memset(addr, POISON_FREE_INITMEM, PAGE_SIZE); + free_reserved_page(page); + } + printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); +} + +void (*free_init_pages_eva)(void *begin, void *end) = NULL; + +void __weak __init prom_free_prom_memory(void) +{ + /* nothing to do */ +} + +void __ref free_initmem(void) +{ + prom_free_prom_memory(); + /* + * Let the platform define a specific function to free the + * init section since EVA may have used any possible mapping + * between virtual and physical addresses. + */ + if (free_init_pages_eva) + free_init_pages_eva((void *)&__init_begin, (void *)&__init_end); + else + free_initmem_default(POISON_FREE_INITMEM); +} + +#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(__per_cpu_offset); + +static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) +{ + return node_distance(cpu_to_node(from), cpu_to_node(to)); +} + +static int __init pcpu_cpu_to_node(int cpu) +{ + return cpu_to_node(cpu); +} + +void __init setup_per_cpu_areas(void) +{ + unsigned long delta; + unsigned int cpu; + int rc; + + /* + * Always reserve area for module percpu variables. That's + * what the legacy allocator did. + */ + rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, + PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, + pcpu_cpu_distance, + pcpu_cpu_to_node); + if (rc < 0) + panic("Failed to initialize percpu areas."); + + delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; + for_each_possible_cpu(cpu) + __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; +} +#endif + +#ifndef CONFIG_MIPS_PGD_C0_CONTEXT +unsigned long pgd_current[NR_CPUS]; +#endif + +/* + * Align swapper_pg_dir in to 64K, allows its address to be loaded + * with a single LUI instruction in the TLB handlers. If we used + * __aligned(64K), its size would get rounded up to the alignment + * size, and waste space. So we place it in its own section and align + * it in the linker script. + */ +pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(".bss..swapper_pg_dir"); +#ifndef __PAGETABLE_PUD_FOLDED +pud_t invalid_pud_table[PTRS_PER_PUD] __page_aligned_bss; +#endif +#ifndef __PAGETABLE_PMD_FOLDED +pmd_t invalid_pmd_table[PTRS_PER_PMD] __page_aligned_bss; +EXPORT_SYMBOL_GPL(invalid_pmd_table); +#endif +pte_t invalid_pte_table[PTRS_PER_PTE] __page_aligned_bss; +EXPORT_SYMBOL(invalid_pte_table); diff --git a/arch/mips/mm/ioremap.c b/arch/mips/mm/ioremap.c new file mode 100644 index 000000000..b6dad2fd5 --- /dev/null +++ b/arch/mips/mm/ioremap.c @@ -0,0 +1,119 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * (C) Copyright 1995 1996 Linus Torvalds + * (C) Copyright 2001, 2002 Ralf Baechle + */ +#include <linux/export.h> +#include <asm/addrspace.h> +#include <asm/byteorder.h> +#include <linux/ioport.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/mm_types.h> +#include <linux/io.h> +#include <asm/cacheflush.h> +#include <asm/tlbflush.h> +#include <ioremap.h> + +#define IS_LOW512(addr) (!((phys_addr_t)(addr) & (phys_addr_t) ~0x1fffffffULL)) +#define IS_KSEG1(addr) (((unsigned long)(addr) & ~0x1fffffffUL) == CKSEG1) + +static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages, + void *arg) +{ + unsigned long i; + + for (i = 0; i < nr_pages; i++) { + if (pfn_valid(start_pfn + i) && + !PageReserved(pfn_to_page(start_pfn + i))) + return 1; + } + + return 0; +} + +/* + * ioremap_prot - map bus memory into CPU space + * @phys_addr: bus address of the memory + * @size: size of the resource to map + * + * ioremap_prot gives the caller control over cache coherency attributes (CCA) + */ +void __iomem *ioremap_prot(phys_addr_t phys_addr, unsigned long size, + unsigned long prot_val) +{ + unsigned long flags = prot_val & _CACHE_MASK; + unsigned long offset, pfn, last_pfn; + struct vm_struct *area; + phys_addr_t last_addr; + unsigned long vaddr; + void __iomem *cpu_addr; + + cpu_addr = plat_ioremap(phys_addr, size, flags); + if (cpu_addr) + return cpu_addr; + + phys_addr = fixup_bigphys_addr(phys_addr, size); + + /* Don't allow wraparound or zero size */ + last_addr = phys_addr + size - 1; + if (!size || last_addr < phys_addr) + return NULL; + + /* + * Map uncached objects in the low 512mb of address space using KSEG1, + * otherwise map using page tables. + */ + if (IS_LOW512(phys_addr) && IS_LOW512(last_addr) && + flags == _CACHE_UNCACHED) + return (void __iomem *) CKSEG1ADDR(phys_addr); + + /* + * Don't allow anybody to remap RAM that may be allocated by the page + * allocator, since that could lead to races & data clobbering. + */ + pfn = PFN_DOWN(phys_addr); + last_pfn = PFN_DOWN(last_addr); + if (walk_system_ram_range(pfn, last_pfn - pfn + 1, NULL, + __ioremap_check_ram) == 1) { + WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n", + &phys_addr, &last_addr); + return NULL; + } + + /* + * Mappings have to be page-aligned + */ + offset = phys_addr & ~PAGE_MASK; + phys_addr &= PAGE_MASK; + size = PAGE_ALIGN(last_addr + 1) - phys_addr; + + /* + * Ok, go for it.. + */ + area = get_vm_area(size, VM_IOREMAP); + if (!area) + return NULL; + vaddr = (unsigned long)area->addr; + + flags |= _PAGE_GLOBAL | _PAGE_PRESENT | __READABLE | __WRITEABLE; + if (ioremap_page_range(vaddr, vaddr + size, phys_addr, + __pgprot(flags))) { + free_vm_area(area); + return NULL; + } + + return (void __iomem *)(vaddr + offset); +} +EXPORT_SYMBOL(ioremap_prot); + +void iounmap(const volatile void __iomem *addr) +{ + if (!plat_iounmap(addr) && !IS_KSEG1(addr)) + vunmap((void *)((unsigned long)addr & PAGE_MASK)); +} +EXPORT_SYMBOL(iounmap); diff --git a/arch/mips/mm/ioremap64.c b/arch/mips/mm/ioremap64.c new file mode 100644 index 000000000..15e7820d6 --- /dev/null +++ b/arch/mips/mm/ioremap64.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/io.h> +#include <ioremap.h> + +void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, + unsigned long prot_val) +{ + unsigned long flags = prot_val & _CACHE_MASK; + u64 base = (flags == _CACHE_UNCACHED ? IO_BASE : UNCAC_BASE); + void __iomem *addr; + + addr = plat_ioremap(offset, size, flags); + if (!addr) + addr = (void __iomem *)(unsigned long)(base + offset); + return addr; +} +EXPORT_SYMBOL(ioremap_prot); + +void iounmap(const volatile void __iomem *addr) +{ + plat_iounmap(addr); +} +EXPORT_SYMBOL(iounmap); diff --git a/arch/mips/mm/maccess.c b/arch/mips/mm/maccess.c new file mode 100644 index 000000000..58173842c --- /dev/null +++ b/arch/mips/mm/maccess.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/uaccess.h> +#include <linux/kernel.h> + +bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) +{ + /* highest bit set means kernel space */ + return (unsigned long)unsafe_src >> (BITS_PER_LONG - 1); +} diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c new file mode 100644 index 000000000..00fe90c6d --- /dev/null +++ b/arch/mips/mm/mmap.c @@ -0,0 +1,129 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2011 Wind River Systems, + * written by Ralf Baechle <ralf@linux-mips.org> + */ +#include <linux/compiler.h> +#include <linux/elf-randomize.h> +#include <linux/errno.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/export.h> +#include <linux/personality.h> +#include <linux/random.h> +#include <linux/sched/signal.h> +#include <linux/sched/mm.h> + +unsigned long shm_align_mask = PAGE_SIZE - 1; /* Sane caches */ +EXPORT_SYMBOL(shm_align_mask); + +#define COLOUR_ALIGN(addr, pgoff) \ + ((((addr) + shm_align_mask) & ~shm_align_mask) + \ + (((pgoff) << PAGE_SHIFT) & shm_align_mask)) + +enum mmap_allocation_direction {UP, DOWN}; + +static unsigned long arch_get_unmapped_area_common(struct file *filp, + unsigned long addr0, unsigned long len, unsigned long pgoff, + unsigned long flags, enum mmap_allocation_direction dir) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long addr = addr0; + int do_color_align; + struct vm_unmapped_area_info info; + + if (unlikely(len > TASK_SIZE)) + return -ENOMEM; + + if (flags & MAP_FIXED) { + /* Even MAP_FIXED mappings must reside within TASK_SIZE */ + if (TASK_SIZE - len < addr) + return -EINVAL; + + /* + * We do not accept a shared mapping if it would violate + * cache aliasing constraints. + */ + if ((flags & MAP_SHARED) && + ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask)) + return -EINVAL; + return addr; + } + + do_color_align = 0; + if (filp || (flags & MAP_SHARED)) + do_color_align = 1; + + /* requesting a specific address */ + if (addr) { + if (do_color_align) + addr = COLOUR_ALIGN(addr, pgoff); + else + addr = PAGE_ALIGN(addr); + + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && + (!vma || addr + len <= vm_start_gap(vma))) + return addr; + } + + info.length = len; + info.align_mask = do_color_align ? (PAGE_MASK & shm_align_mask) : 0; + info.align_offset = pgoff << PAGE_SHIFT; + + if (dir == DOWN) { + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.low_limit = PAGE_SIZE; + info.high_limit = mm->mmap_base; + addr = vm_unmapped_area(&info); + + if (!(addr & ~PAGE_MASK)) + return addr; + + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + } + + info.flags = 0; + info.low_limit = mm->mmap_base; + info.high_limit = TASK_SIZE; + return vm_unmapped_area(&info); +} + +unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr0, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + return arch_get_unmapped_area_common(filp, + addr0, len, pgoff, flags, UP); +} + +/* + * There is no need to export this but sched.h declares the function as + * extern so making it static here results in an error. + */ +unsigned long arch_get_unmapped_area_topdown(struct file *filp, + unsigned long addr0, unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + return arch_get_unmapped_area_common(filp, + addr0, len, pgoff, flags, DOWN); +} + +bool __virt_addr_valid(const volatile void *kaddr) +{ + unsigned long vaddr = (unsigned long)kaddr; + + if ((vaddr < PAGE_OFFSET) || (vaddr >= MAP_BASE)) + return false; + + return pfn_valid(PFN_DOWN(virt_to_phys(kaddr))); +} +EXPORT_SYMBOL_GPL(__virt_addr_valid); diff --git a/arch/mips/mm/page-funcs.S b/arch/mips/mm/page-funcs.S new file mode 100644 index 000000000..43181ac0a --- /dev/null +++ b/arch/mips/mm/page-funcs.S @@ -0,0 +1,53 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Micro-assembler generated clear_page/copy_page functions. + * + * Copyright (C) 2012 MIPS Technologies, Inc. + * Copyright (C) 2012 Ralf Baechle <ralf@linux-mips.org> + */ +#include <asm/asm.h> +#include <asm/export.h> +#include <asm/regdef.h> + +#ifdef CONFIG_SIBYTE_DMA_PAGEOPS +#define cpu_clear_page_function_name clear_page_cpu +#define cpu_copy_page_function_name copy_page_cpu +#else +#define cpu_clear_page_function_name clear_page +#define cpu_copy_page_function_name copy_page +#endif + +/* + * Maximum sizes: + * + * R4000 128 bytes S-cache: 0x058 bytes + * R4600 v1.7: 0x05c bytes + * R4600 v2.0: 0x060 bytes + * With prefetching, 16 word strides 0x120 bytes + */ +EXPORT(__clear_page_start) +LEAF(cpu_clear_page_function_name) +EXPORT_SYMBOL(cpu_clear_page_function_name) +1: j 1b /* Dummy, will be replaced. */ + .space 288 +END(cpu_clear_page_function_name) +EXPORT(__clear_page_end) + +/* + * Maximum sizes: + * + * R4000 128 bytes S-cache: 0x11c bytes + * R4600 v1.7: 0x080 bytes + * R4600 v2.0: 0x07c bytes + * With prefetching, 16 word strides 0x540 bytes + */ +EXPORT(__copy_page_start) +LEAF(cpu_copy_page_function_name) +EXPORT_SYMBOL(cpu_copy_page_function_name) +1: j 1b /* Dummy, will be replaced. */ + .space 1344 +END(cpu_copy_page_function_name) +EXPORT(__copy_page_end) diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c new file mode 100644 index 000000000..d3b4459d0 --- /dev/null +++ b/arch/mips/mm/page.c @@ -0,0 +1,682 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org) + * Copyright (C) 2007 Maciej W. Rozycki + * Copyright (C) 2008 Thiemo Seufer + * Copyright (C) 2012 MIPS Technologies, Inc. + */ +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/mm.h> +#include <linux/proc_fs.h> + +#include <asm/bugs.h> +#include <asm/cacheops.h> +#include <asm/cpu-type.h> +#include <asm/inst.h> +#include <asm/io.h> +#include <asm/page.h> +#include <asm/prefetch.h> +#include <asm/bootinfo.h> +#include <asm/mipsregs.h> +#include <asm/mmu_context.h> +#include <asm/cpu.h> + +#ifdef CONFIG_SIBYTE_DMA_PAGEOPS +#include <asm/sibyte/sb1250.h> +#include <asm/sibyte/sb1250_regs.h> +#include <asm/sibyte/sb1250_dma.h> +#endif + +#include <asm/uasm.h> + +/* Registers used in the assembled routines. */ +#define ZERO 0 +#define AT 2 +#define A0 4 +#define A1 5 +#define A2 6 +#define T0 8 +#define T1 9 +#define T2 10 +#define T3 11 +#define T9 25 +#define RA 31 + +/* Handle labels (which must be positive integers). */ +enum label_id { + label_clear_nopref = 1, + label_clear_pref, + label_copy_nopref, + label_copy_pref_both, + label_copy_pref_store, +}; + +UASM_L_LA(_clear_nopref) +UASM_L_LA(_clear_pref) +UASM_L_LA(_copy_nopref) +UASM_L_LA(_copy_pref_both) +UASM_L_LA(_copy_pref_store) + +/* We need one branch and therefore one relocation per target label. */ +static struct uasm_label labels[5]; +static struct uasm_reloc relocs[5]; + +#define cpu_is_r4600_v1_x() ((read_c0_prid() & 0xfffffff0) == 0x00002010) +#define cpu_is_r4600_v2_x() ((read_c0_prid() & 0xfffffff0) == 0x00002020) + +/* + * R6 has a limited offset of the pref instruction. + * Skip it if the offset is more than 9 bits. + */ +#define _uasm_i_pref(a, b, c, d) \ +do { \ + if (cpu_has_mips_r6) { \ + if (c <= 0xff && c >= -0x100) \ + uasm_i_pref(a, b, c, d);\ + } else { \ + uasm_i_pref(a, b, c, d); \ + } \ +} while(0) + +static int pref_bias_clear_store; +static int pref_bias_copy_load; +static int pref_bias_copy_store; + +static u32 pref_src_mode; +static u32 pref_dst_mode; + +static int clear_word_size; +static int copy_word_size; + +static int half_clear_loop_size; +static int half_copy_loop_size; + +static int cache_line_size; +#define cache_line_mask() (cache_line_size - 1) + +static inline void +pg_addiu(u32 **buf, unsigned int reg1, unsigned int reg2, unsigned int off) +{ + if (cpu_has_64bit_gp_regs && + IS_ENABLED(CONFIG_CPU_DADDI_WORKAROUNDS) && + r4k_daddiu_bug()) { + if (off > 0x7fff) { + uasm_i_lui(buf, T9, uasm_rel_hi(off)); + uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off)); + } else + uasm_i_addiu(buf, T9, ZERO, off); + uasm_i_daddu(buf, reg1, reg2, T9); + } else { + if (off > 0x7fff) { + uasm_i_lui(buf, T9, uasm_rel_hi(off)); + uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off)); + UASM_i_ADDU(buf, reg1, reg2, T9); + } else + UASM_i_ADDIU(buf, reg1, reg2, off); + } +} + +static void set_prefetch_parameters(void) +{ + if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg) + clear_word_size = 8; + else + clear_word_size = 4; + + if (cpu_has_64bit_gp_regs) + copy_word_size = 8; + else + copy_word_size = 4; + + /* + * The pref's used here are using "streaming" hints, which cause the + * copied data to be kicked out of the cache sooner. A page copy often + * ends up copying a lot more data than is commonly used, so this seems + * to make sense in terms of reducing cache pollution, but I've no real + * performance data to back this up. + */ + if (cpu_has_prefetch) { + /* + * XXX: Most prefetch bias values in here are based on + * guesswork. + */ + cache_line_size = cpu_dcache_line_size(); + switch (current_cpu_type()) { + case CPU_R5500: + case CPU_TX49XX: + /* These processors only support the Pref_Load. */ + pref_bias_copy_load = 256; + break; + + case CPU_R10000: + case CPU_R12000: + case CPU_R14000: + case CPU_R16000: + /* + * Those values have been experimentally tuned for an + * Origin 200. + */ + pref_bias_clear_store = 512; + pref_bias_copy_load = 256; + pref_bias_copy_store = 256; + pref_src_mode = Pref_LoadStreamed; + pref_dst_mode = Pref_StoreStreamed; + break; + + case CPU_SB1: + case CPU_SB1A: + pref_bias_clear_store = 128; + pref_bias_copy_load = 128; + pref_bias_copy_store = 128; + /* + * SB1 pass1 Pref_LoadStreamed/Pref_StoreStreamed + * hints are broken. + */ + if (current_cpu_type() == CPU_SB1 && + (current_cpu_data.processor_id & 0xff) < 0x02) { + pref_src_mode = Pref_Load; + pref_dst_mode = Pref_Store; + } else { + pref_src_mode = Pref_LoadStreamed; + pref_dst_mode = Pref_StoreStreamed; + } + break; + + case CPU_LOONGSON64: + /* Loongson-3 only support the Pref_Load/Pref_Store. */ + pref_bias_clear_store = 128; + pref_bias_copy_load = 128; + pref_bias_copy_store = 128; + pref_src_mode = Pref_Load; + pref_dst_mode = Pref_Store; + break; + + default: + pref_bias_clear_store = 128; + pref_bias_copy_load = 256; + pref_bias_copy_store = 128; + pref_src_mode = Pref_LoadStreamed; + if (cpu_has_mips_r6) + /* + * Bit 30 (Pref_PrepareForStore) has been + * removed from MIPS R6. Use bit 5 + * (Pref_StoreStreamed). + */ + pref_dst_mode = Pref_StoreStreamed; + else + pref_dst_mode = Pref_PrepareForStore; + break; + } + } else { + if (cpu_has_cache_cdex_s) + cache_line_size = cpu_scache_line_size(); + else if (cpu_has_cache_cdex_p) + cache_line_size = cpu_dcache_line_size(); + } + /* + * Too much unrolling will overflow the available space in + * clear_space_array / copy_page_array. + */ + half_clear_loop_size = min(16 * clear_word_size, + max(cache_line_size >> 1, + 4 * clear_word_size)); + half_copy_loop_size = min(16 * copy_word_size, + max(cache_line_size >> 1, + 4 * copy_word_size)); +} + +static void build_clear_store(u32 **buf, int off) +{ + if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg) { + uasm_i_sd(buf, ZERO, off, A0); + } else { + uasm_i_sw(buf, ZERO, off, A0); + } +} + +static inline void build_clear_pref(u32 **buf, int off) +{ + if (off & cache_line_mask()) + return; + + if (pref_bias_clear_store) { + _uasm_i_pref(buf, pref_dst_mode, pref_bias_clear_store + off, + A0); + } else if (cache_line_size == (half_clear_loop_size << 1)) { + if (cpu_has_cache_cdex_s) { + uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0); + } else if (cpu_has_cache_cdex_p) { + if (IS_ENABLED(CONFIG_WAR_R4600_V1_HIT_CACHEOP) && + cpu_is_r4600_v1_x()) { + uasm_i_nop(buf); + uasm_i_nop(buf); + uasm_i_nop(buf); + uasm_i_nop(buf); + } + + if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) && + cpu_is_r4600_v2_x()) + uasm_i_lw(buf, ZERO, ZERO, AT); + + uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0); + } + } +} + +extern u32 __clear_page_start; +extern u32 __clear_page_end; +extern u32 __copy_page_start; +extern u32 __copy_page_end; + +void build_clear_page(void) +{ + int off; + u32 *buf = &__clear_page_start; + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; + int i; + static atomic_t run_once = ATOMIC_INIT(0); + + if (atomic_xchg(&run_once, 1)) { + return; + } + + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + + set_prefetch_parameters(); + + /* + * This algorithm makes the following assumptions: + * - The prefetch bias is a multiple of 2 words. + * - The prefetch bias is less than one page. + */ + BUG_ON(pref_bias_clear_store % (2 * clear_word_size)); + BUG_ON(PAGE_SIZE < pref_bias_clear_store); + + off = PAGE_SIZE - pref_bias_clear_store; + if (off > 0xffff || !pref_bias_clear_store) + pg_addiu(&buf, A2, A0, off); + else + uasm_i_ori(&buf, A2, A0, off); + + if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) && cpu_is_r4600_v2_x()) + uasm_i_lui(&buf, AT, uasm_rel_hi(0xa0000000)); + + off = cache_line_size ? min(8, pref_bias_clear_store / cache_line_size) + * cache_line_size : 0; + while (off) { + build_clear_pref(&buf, -off); + off -= cache_line_size; + } + uasm_l_clear_pref(&l, buf); + do { + build_clear_pref(&buf, off); + build_clear_store(&buf, off); + off += clear_word_size; + } while (off < half_clear_loop_size); + pg_addiu(&buf, A0, A0, 2 * off); + off = -off; + do { + build_clear_pref(&buf, off); + if (off == -clear_word_size) + uasm_il_bne(&buf, &r, A0, A2, label_clear_pref); + build_clear_store(&buf, off); + off += clear_word_size; + } while (off < 0); + + if (pref_bias_clear_store) { + pg_addiu(&buf, A2, A0, pref_bias_clear_store); + uasm_l_clear_nopref(&l, buf); + off = 0; + do { + build_clear_store(&buf, off); + off += clear_word_size; + } while (off < half_clear_loop_size); + pg_addiu(&buf, A0, A0, 2 * off); + off = -off; + do { + if (off == -clear_word_size) + uasm_il_bne(&buf, &r, A0, A2, + label_clear_nopref); + build_clear_store(&buf, off); + off += clear_word_size; + } while (off < 0); + } + + uasm_i_jr(&buf, RA); + uasm_i_nop(&buf); + + BUG_ON(buf > &__clear_page_end); + + uasm_resolve_relocs(relocs, labels); + + pr_debug("Synthesized clear page handler (%u instructions).\n", + (u32)(buf - &__clear_page_start)); + + pr_debug("\t.set push\n"); + pr_debug("\t.set noreorder\n"); + for (i = 0; i < (buf - &__clear_page_start); i++) + pr_debug("\t.word 0x%08x\n", (&__clear_page_start)[i]); + pr_debug("\t.set pop\n"); +} + +static void build_copy_load(u32 **buf, int reg, int off) +{ + if (cpu_has_64bit_gp_regs) { + uasm_i_ld(buf, reg, off, A1); + } else { + uasm_i_lw(buf, reg, off, A1); + } +} + +static void build_copy_store(u32 **buf, int reg, int off) +{ + if (cpu_has_64bit_gp_regs) { + uasm_i_sd(buf, reg, off, A0); + } else { + uasm_i_sw(buf, reg, off, A0); + } +} + +static inline void build_copy_load_pref(u32 **buf, int off) +{ + if (off & cache_line_mask()) + return; + + if (pref_bias_copy_load) + _uasm_i_pref(buf, pref_src_mode, pref_bias_copy_load + off, A1); +} + +static inline void build_copy_store_pref(u32 **buf, int off) +{ + if (off & cache_line_mask()) + return; + + if (pref_bias_copy_store) { + _uasm_i_pref(buf, pref_dst_mode, pref_bias_copy_store + off, + A0); + } else if (cache_line_size == (half_copy_loop_size << 1)) { + if (cpu_has_cache_cdex_s) { + uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0); + } else if (cpu_has_cache_cdex_p) { + if (IS_ENABLED(CONFIG_WAR_R4600_V1_HIT_CACHEOP) && + cpu_is_r4600_v1_x()) { + uasm_i_nop(buf); + uasm_i_nop(buf); + uasm_i_nop(buf); + uasm_i_nop(buf); + } + + if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) && + cpu_is_r4600_v2_x()) + uasm_i_lw(buf, ZERO, ZERO, AT); + + uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0); + } + } +} + +void build_copy_page(void) +{ + int off; + u32 *buf = &__copy_page_start; + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; + int i; + static atomic_t run_once = ATOMIC_INIT(0); + + if (atomic_xchg(&run_once, 1)) { + return; + } + + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + + set_prefetch_parameters(); + + /* + * This algorithm makes the following assumptions: + * - All prefetch biases are multiples of 8 words. + * - The prefetch biases are less than one page. + * - The store prefetch bias isn't greater than the load + * prefetch bias. + */ + BUG_ON(pref_bias_copy_load % (8 * copy_word_size)); + BUG_ON(pref_bias_copy_store % (8 * copy_word_size)); + BUG_ON(PAGE_SIZE < pref_bias_copy_load); + BUG_ON(pref_bias_copy_store > pref_bias_copy_load); + + off = PAGE_SIZE - pref_bias_copy_load; + if (off > 0xffff || !pref_bias_copy_load) + pg_addiu(&buf, A2, A0, off); + else + uasm_i_ori(&buf, A2, A0, off); + + if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) && cpu_is_r4600_v2_x()) + uasm_i_lui(&buf, AT, uasm_rel_hi(0xa0000000)); + + off = cache_line_size ? min(8, pref_bias_copy_load / cache_line_size) * + cache_line_size : 0; + while (off) { + build_copy_load_pref(&buf, -off); + off -= cache_line_size; + } + off = cache_line_size ? min(8, pref_bias_copy_store / cache_line_size) * + cache_line_size : 0; + while (off) { + build_copy_store_pref(&buf, -off); + off -= cache_line_size; + } + uasm_l_copy_pref_both(&l, buf); + do { + build_copy_load_pref(&buf, off); + build_copy_load(&buf, T0, off); + build_copy_load_pref(&buf, off + copy_word_size); + build_copy_load(&buf, T1, off + copy_word_size); + build_copy_load_pref(&buf, off + 2 * copy_word_size); + build_copy_load(&buf, T2, off + 2 * copy_word_size); + build_copy_load_pref(&buf, off + 3 * copy_word_size); + build_copy_load(&buf, T3, off + 3 * copy_word_size); + build_copy_store_pref(&buf, off); + build_copy_store(&buf, T0, off); + build_copy_store_pref(&buf, off + copy_word_size); + build_copy_store(&buf, T1, off + copy_word_size); + build_copy_store_pref(&buf, off + 2 * copy_word_size); + build_copy_store(&buf, T2, off + 2 * copy_word_size); + build_copy_store_pref(&buf, off + 3 * copy_word_size); + build_copy_store(&buf, T3, off + 3 * copy_word_size); + off += 4 * copy_word_size; + } while (off < half_copy_loop_size); + pg_addiu(&buf, A1, A1, 2 * off); + pg_addiu(&buf, A0, A0, 2 * off); + off = -off; + do { + build_copy_load_pref(&buf, off); + build_copy_load(&buf, T0, off); + build_copy_load_pref(&buf, off + copy_word_size); + build_copy_load(&buf, T1, off + copy_word_size); + build_copy_load_pref(&buf, off + 2 * copy_word_size); + build_copy_load(&buf, T2, off + 2 * copy_word_size); + build_copy_load_pref(&buf, off + 3 * copy_word_size); + build_copy_load(&buf, T3, off + 3 * copy_word_size); + build_copy_store_pref(&buf, off); + build_copy_store(&buf, T0, off); + build_copy_store_pref(&buf, off + copy_word_size); + build_copy_store(&buf, T1, off + copy_word_size); + build_copy_store_pref(&buf, off + 2 * copy_word_size); + build_copy_store(&buf, T2, off + 2 * copy_word_size); + build_copy_store_pref(&buf, off + 3 * copy_word_size); + if (off == -(4 * copy_word_size)) + uasm_il_bne(&buf, &r, A2, A0, label_copy_pref_both); + build_copy_store(&buf, T3, off + 3 * copy_word_size); + off += 4 * copy_word_size; + } while (off < 0); + + if (pref_bias_copy_load - pref_bias_copy_store) { + pg_addiu(&buf, A2, A0, + pref_bias_copy_load - pref_bias_copy_store); + uasm_l_copy_pref_store(&l, buf); + off = 0; + do { + build_copy_load(&buf, T0, off); + build_copy_load(&buf, T1, off + copy_word_size); + build_copy_load(&buf, T2, off + 2 * copy_word_size); + build_copy_load(&buf, T3, off + 3 * copy_word_size); + build_copy_store_pref(&buf, off); + build_copy_store(&buf, T0, off); + build_copy_store_pref(&buf, off + copy_word_size); + build_copy_store(&buf, T1, off + copy_word_size); + build_copy_store_pref(&buf, off + 2 * copy_word_size); + build_copy_store(&buf, T2, off + 2 * copy_word_size); + build_copy_store_pref(&buf, off + 3 * copy_word_size); + build_copy_store(&buf, T3, off + 3 * copy_word_size); + off += 4 * copy_word_size; + } while (off < half_copy_loop_size); + pg_addiu(&buf, A1, A1, 2 * off); + pg_addiu(&buf, A0, A0, 2 * off); + off = -off; + do { + build_copy_load(&buf, T0, off); + build_copy_load(&buf, T1, off + copy_word_size); + build_copy_load(&buf, T2, off + 2 * copy_word_size); + build_copy_load(&buf, T3, off + 3 * copy_word_size); + build_copy_store_pref(&buf, off); + build_copy_store(&buf, T0, off); + build_copy_store_pref(&buf, off + copy_word_size); + build_copy_store(&buf, T1, off + copy_word_size); + build_copy_store_pref(&buf, off + 2 * copy_word_size); + build_copy_store(&buf, T2, off + 2 * copy_word_size); + build_copy_store_pref(&buf, off + 3 * copy_word_size); + if (off == -(4 * copy_word_size)) + uasm_il_bne(&buf, &r, A2, A0, + label_copy_pref_store); + build_copy_store(&buf, T3, off + 3 * copy_word_size); + off += 4 * copy_word_size; + } while (off < 0); + } + + if (pref_bias_copy_store) { + pg_addiu(&buf, A2, A0, pref_bias_copy_store); + uasm_l_copy_nopref(&l, buf); + off = 0; + do { + build_copy_load(&buf, T0, off); + build_copy_load(&buf, T1, off + copy_word_size); + build_copy_load(&buf, T2, off + 2 * copy_word_size); + build_copy_load(&buf, T3, off + 3 * copy_word_size); + build_copy_store(&buf, T0, off); + build_copy_store(&buf, T1, off + copy_word_size); + build_copy_store(&buf, T2, off + 2 * copy_word_size); + build_copy_store(&buf, T3, off + 3 * copy_word_size); + off += 4 * copy_word_size; + } while (off < half_copy_loop_size); + pg_addiu(&buf, A1, A1, 2 * off); + pg_addiu(&buf, A0, A0, 2 * off); + off = -off; + do { + build_copy_load(&buf, T0, off); + build_copy_load(&buf, T1, off + copy_word_size); + build_copy_load(&buf, T2, off + 2 * copy_word_size); + build_copy_load(&buf, T3, off + 3 * copy_word_size); + build_copy_store(&buf, T0, off); + build_copy_store(&buf, T1, off + copy_word_size); + build_copy_store(&buf, T2, off + 2 * copy_word_size); + if (off == -(4 * copy_word_size)) + uasm_il_bne(&buf, &r, A2, A0, + label_copy_nopref); + build_copy_store(&buf, T3, off + 3 * copy_word_size); + off += 4 * copy_word_size; + } while (off < 0); + } + + uasm_i_jr(&buf, RA); + uasm_i_nop(&buf); + + BUG_ON(buf > &__copy_page_end); + + uasm_resolve_relocs(relocs, labels); + + pr_debug("Synthesized copy page handler (%u instructions).\n", + (u32)(buf - &__copy_page_start)); + + pr_debug("\t.set push\n"); + pr_debug("\t.set noreorder\n"); + for (i = 0; i < (buf - &__copy_page_start); i++) + pr_debug("\t.word 0x%08x\n", (&__copy_page_start)[i]); + pr_debug("\t.set pop\n"); +} + +#ifdef CONFIG_SIBYTE_DMA_PAGEOPS +extern void clear_page_cpu(void *page); +extern void copy_page_cpu(void *to, void *from); + +/* + * Pad descriptors to cacheline, since each is exclusively owned by a + * particular CPU. + */ +struct dmadscr { + u64 dscr_a; + u64 dscr_b; + u64 pad_a; + u64 pad_b; +} ____cacheline_aligned_in_smp page_descr[DM_NUM_CHANNELS]; + +void clear_page(void *page) +{ + u64 to_phys = CPHYSADDR((unsigned long)page); + unsigned int cpu = smp_processor_id(); + + /* if the page is not in KSEG0, use old way */ + if ((long)KSEGX((unsigned long)page) != (long)CKSEG0) + return clear_page_cpu(page); + + page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_ZERO_MEM | + M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT; + page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE); + __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT))); + + /* + * Don't really want to do it this way, but there's no + * reliable way to delay completion detection. + */ + while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG))) + & M_DM_DSCR_BASE_INTERRUPT)) + ; + __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); +} +EXPORT_SYMBOL(clear_page); + +void copy_page(void *to, void *from) +{ + u64 from_phys = CPHYSADDR((unsigned long)from); + u64 to_phys = CPHYSADDR((unsigned long)to); + unsigned int cpu = smp_processor_id(); + + /* if any page is not in KSEG0, use old way */ + if ((long)KSEGX((unsigned long)to) != (long)CKSEG0 + || (long)KSEGX((unsigned long)from) != (long)CKSEG0) + return copy_page_cpu(to, from); + + page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_L2C_DEST | + M_DM_DSCRA_INTERRUPT; + page_descr[cpu].dscr_b = from_phys | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE); + __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT))); + + /* + * Don't really want to do it this way, but there's no + * reliable way to delay completion detection. + */ + while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG))) + & M_DM_DSCR_BASE_INTERRUPT)) + ; + __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE))); +} +EXPORT_SYMBOL(copy_page); + +#endif /* CONFIG_SIBYTE_DMA_PAGEOPS */ diff --git a/arch/mips/mm/pgtable-32.c b/arch/mips/mm/pgtable-32.c new file mode 100644 index 000000000..61891af25 --- /dev/null +++ b/arch/mips/mm/pgtable-32.c @@ -0,0 +1,90 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2003 by Ralf Baechle + */ +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/memblock.h> +#include <linux/highmem.h> +#include <asm/fixmap.h> +#include <asm/pgalloc.h> +#include <asm/tlbflush.h> + +void pgd_init(unsigned long page) +{ + unsigned long *p = (unsigned long *) page; + int i; + + for (i = 0; i < USER_PTRS_PER_PGD; i+=8) { + p[i + 0] = (unsigned long) invalid_pte_table; + p[i + 1] = (unsigned long) invalid_pte_table; + p[i + 2] = (unsigned long) invalid_pte_table; + p[i + 3] = (unsigned long) invalid_pte_table; + p[i + 4] = (unsigned long) invalid_pte_table; + p[i + 5] = (unsigned long) invalid_pte_table; + p[i + 6] = (unsigned long) invalid_pte_table; + p[i + 7] = (unsigned long) invalid_pte_table; + } +} + +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) +pmd_t mk_pmd(struct page *page, pgprot_t prot) +{ + pmd_t pmd; + + pmd_val(pmd) = (page_to_pfn(page) << _PFN_SHIFT) | pgprot_val(prot); + + return pmd; +} + + +void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd) +{ + *pmdp = pmd; +} +#endif /* defined(CONFIG_TRANSPARENT_HUGEPAGE) */ + +void __init pagetable_init(void) +{ + unsigned long vaddr; + pgd_t *pgd_base; +#ifdef CONFIG_HIGHMEM + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; +#endif + + /* Initialize the entire pgd. */ + pgd_init((unsigned long)swapper_pg_dir); + pgd_init((unsigned long)swapper_pg_dir + + sizeof(pgd_t) * USER_PTRS_PER_PGD); + + pgd_base = swapper_pg_dir; + + /* + * Fixed mappings: + */ + vaddr = __fix_to_virt(__end_of_fixed_addresses - 1); + fixrange_init(vaddr & PMD_MASK, vaddr + FIXADDR_SIZE, pgd_base); + +#ifdef CONFIG_HIGHMEM + /* + * Permanent kmaps: + */ + vaddr = PKMAP_BASE; + fixrange_init(vaddr & PMD_MASK, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); + + pgd = swapper_pg_dir + pgd_index(vaddr); + p4d = p4d_offset(pgd, vaddr); + pud = pud_offset(p4d, vaddr); + pmd = pmd_offset(pud, vaddr); + pte = pte_offset_kernel(pmd, vaddr); + pkmap_page_table = pte; +#endif +} diff --git a/arch/mips/mm/pgtable-64.c b/arch/mips/mm/pgtable-64.c new file mode 100644 index 000000000..7536f7804 --- /dev/null +++ b/arch/mips/mm/pgtable-64.c @@ -0,0 +1,124 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1999, 2000 by Silicon Graphics + * Copyright (C) 2003 by Ralf Baechle + */ +#include <linux/export.h> +#include <linux/init.h> +#include <linux/mm.h> +#include <asm/fixmap.h> +#include <asm/pgalloc.h> +#include <asm/tlbflush.h> + +void pgd_init(unsigned long page) +{ + unsigned long *p, *end; + unsigned long entry; + +#if !defined(__PAGETABLE_PUD_FOLDED) + entry = (unsigned long)invalid_pud_table; +#elif !defined(__PAGETABLE_PMD_FOLDED) + entry = (unsigned long)invalid_pmd_table; +#else + entry = (unsigned long)invalid_pte_table; +#endif + + p = (unsigned long *) page; + end = p + PTRS_PER_PGD; + + do { + p[0] = entry; + p[1] = entry; + p[2] = entry; + p[3] = entry; + p[4] = entry; + p += 8; + p[-3] = entry; + p[-2] = entry; + p[-1] = entry; + } while (p != end); +} + +#ifndef __PAGETABLE_PMD_FOLDED +void pmd_init(unsigned long addr, unsigned long pagetable) +{ + unsigned long *p, *end; + + p = (unsigned long *) addr; + end = p + PTRS_PER_PMD; + + do { + p[0] = pagetable; + p[1] = pagetable; + p[2] = pagetable; + p[3] = pagetable; + p[4] = pagetable; + p += 8; + p[-3] = pagetable; + p[-2] = pagetable; + p[-1] = pagetable; + } while (p != end); +} +EXPORT_SYMBOL_GPL(pmd_init); +#endif + +#ifndef __PAGETABLE_PUD_FOLDED +void pud_init(unsigned long addr, unsigned long pagetable) +{ + unsigned long *p, *end; + + p = (unsigned long *)addr; + end = p + PTRS_PER_PUD; + + do { + p[0] = pagetable; + p[1] = pagetable; + p[2] = pagetable; + p[3] = pagetable; + p[4] = pagetable; + p += 8; + p[-3] = pagetable; + p[-2] = pagetable; + p[-1] = pagetable; + } while (p != end); +} +#endif + +pmd_t mk_pmd(struct page *page, pgprot_t prot) +{ + pmd_t pmd; + + pmd_val(pmd) = (page_to_pfn(page) << _PFN_SHIFT) | pgprot_val(prot); + + return pmd; +} + +void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd) +{ + *pmdp = pmd; +} + +void __init pagetable_init(void) +{ + unsigned long vaddr; + pgd_t *pgd_base; + + /* Initialize the entire pgd. */ + pgd_init((unsigned long)swapper_pg_dir); +#ifndef __PAGETABLE_PUD_FOLDED + pud_init((unsigned long)invalid_pud_table, (unsigned long)invalid_pmd_table); +#endif +#ifndef __PAGETABLE_PMD_FOLDED + pmd_init((unsigned long)invalid_pmd_table, (unsigned long)invalid_pte_table); +#endif + pgd_base = swapper_pg_dir; + /* + * Fixed mappings: + */ + vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; + fixrange_init(vaddr, vaddr + FIXADDR_SIZE, pgd_base); +} diff --git a/arch/mips/mm/pgtable.c b/arch/mips/mm/pgtable.c new file mode 100644 index 000000000..3b7590660 --- /dev/null +++ b/arch/mips/mm/pgtable.c @@ -0,0 +1,25 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include <linux/export.h> +#include <linux/mm.h> +#include <linux/string.h> +#include <asm/pgalloc.h> + +pgd_t *pgd_alloc(struct mm_struct *mm) +{ + pgd_t *ret, *init; + + ret = (pgd_t *) __get_free_pages(GFP_KERNEL, PGD_TABLE_ORDER); + if (ret) { + init = pgd_offset(&init_mm, 0UL); + pgd_init((unsigned long)ret); + memcpy(ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD, + (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); + } + + return ret; +} +EXPORT_SYMBOL_GPL(pgd_alloc); diff --git a/arch/mips/mm/physaddr.c b/arch/mips/mm/physaddr.c new file mode 100644 index 000000000..f9b8c85e9 --- /dev/null +++ b/arch/mips/mm/physaddr.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bug.h> +#include <linux/export.h> +#include <linux/types.h> +#include <linux/mmdebug.h> +#include <linux/mm.h> + +#include <asm/addrspace.h> +#include <asm/sections.h> +#include <asm/io.h> +#include <asm/page.h> +#include <asm/dma.h> + +static inline bool __debug_virt_addr_valid(unsigned long x) +{ + /* + * MAX_DMA_ADDRESS is a virtual address that may not correspond to an + * actual physical address. Enough code relies on + * virt_to_phys(MAX_DMA_ADDRESS) that we just need to work around it + * and always return true. + */ + if (x == MAX_DMA_ADDRESS) + return true; + + return x >= PAGE_OFFSET && (KSEGX(x) < KSEG2 || + IS_ENABLED(CONFIG_EVA) || + !IS_ENABLED(CONFIG_HIGHMEM)); +} + +phys_addr_t __virt_to_phys(volatile const void *x) +{ + WARN(!__debug_virt_addr_valid((unsigned long)x), + "virt_to_phys used for non-linear address: %pK (%pS)\n", + x, x); + + return __virt_to_phys_nodebug(x); +} +EXPORT_SYMBOL(__virt_to_phys); + +phys_addr_t __phys_addr_symbol(unsigned long x) +{ + /* This is bounds checking against the kernel image only. + * __pa_symbol should only be used on kernel symbol addresses. + */ + VIRTUAL_BUG_ON(x < (unsigned long)_text || + x > (unsigned long)_end); + + return __pa_symbol_nodebug(x); +} +EXPORT_SYMBOL(__phys_addr_symbol); diff --git a/arch/mips/mm/sc-debugfs.c b/arch/mips/mm/sc-debugfs.c new file mode 100644 index 000000000..80ff39471 --- /dev/null +++ b/arch/mips/mm/sc-debugfs.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2015 Imagination Technologies + * Author: Paul Burton <paul.burton@mips.com> + */ + +#include <asm/bcache.h> +#include <asm/debug.h> +#include <linux/uaccess.h> +#include <linux/debugfs.h> +#include <linux/init.h> + +static ssize_t sc_prefetch_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + bool enabled = bc_prefetch_is_enabled(); + char buf[3]; + + buf[0] = enabled ? 'Y' : 'N'; + buf[1] = '\n'; + buf[2] = 0; + + return simple_read_from_buffer(user_buf, count, ppos, buf, 2); +} + +static ssize_t sc_prefetch_write(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + bool enabled; + int err; + + err = kstrtobool_from_user(user_buf, count, &enabled); + if (err) + return err; + + if (enabled) + bc_prefetch_enable(); + else + bc_prefetch_disable(); + + return count; +} + +static const struct file_operations sc_prefetch_fops = { + .open = simple_open, + .llseek = default_llseek, + .read = sc_prefetch_read, + .write = sc_prefetch_write, +}; + +static int __init sc_debugfs_init(void) +{ + struct dentry *dir; + + dir = debugfs_create_dir("l2cache", mips_debugfs_dir); + debugfs_create_file("prefetch", S_IRUGO | S_IWUSR, dir, NULL, + &sc_prefetch_fops); + return 0; +} +late_initcall(sc_debugfs_init); diff --git a/arch/mips/mm/sc-ip22.c b/arch/mips/mm/sc-ip22.c new file mode 100644 index 000000000..d7238687d --- /dev/null +++ b/arch/mips/mm/sc-ip22.c @@ -0,0 +1,190 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * sc-ip22.c: Indy cache management functions. + * + * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org), + * derived from r4xx0.c by David S. Miller (davem@davemloft.net). + */ +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/mm.h> + +#include <asm/bcache.h> +#include <asm/page.h> +#include <asm/bootinfo.h> +#include <asm/sgi/ip22.h> +#include <asm/sgi/mc.h> + +/* Secondary cache size in bytes, if present. */ +static unsigned long scache_size; + +#undef DEBUG_CACHE + +#define SC_SIZE 0x00080000 +#define SC_LINE 32 +#define CI_MASK (SC_SIZE - SC_LINE) +#define SC_INDEX(n) ((n) & CI_MASK) + +static inline void indy_sc_wipe(unsigned long first, unsigned long last) +{ + unsigned long tmp; + + __asm__ __volatile__( + " .set push # indy_sc_wipe \n" + " .set noreorder \n" + " .set mips3 \n" + " .set noat \n" + " mfc0 %2, $12 \n" + " li $1, 0x80 # Go 64 bit \n" + " mtc0 $1, $12 \n" + " \n" + " # \n" + " # Open code a dli $1, 0x9000000080000000 \n" + " # \n" + " # Required because binutils 2.25 will happily accept \n" + " # 64 bit instructions in .set mips3 mode but puke on \n" + " # 64 bit constants when generating 32 bit ELF \n" + " # \n" + " lui $1,0x9000 \n" + " dsll $1,$1,0x10 \n" + " ori $1,$1,0x8000 \n" + " dsll $1,$1,0x10 \n" + " \n" + " or %0, $1 # first line to flush \n" + " or %1, $1 # last line to flush \n" + " .set at \n" + " \n" + "1: sw $0, 0(%0) \n" + " bne %0, %1, 1b \n" + " daddu %0, 32 \n" + " \n" + " mtc0 %2, $12 # Back to 32 bit \n" + " nop # pipeline hazard \n" + " nop \n" + " nop \n" + " nop \n" + " .set pop \n" + : "=r" (first), "=r" (last), "=&r" (tmp) + : "0" (first), "1" (last)); +} + +static void indy_sc_wback_invalidate(unsigned long addr, unsigned long size) +{ + unsigned long first_line, last_line; + unsigned long flags; + +#ifdef DEBUG_CACHE + printk("indy_sc_wback_invalidate[%08lx,%08lx]", addr, size); +#endif + + /* Catch bad driver code */ + BUG_ON(size == 0); + + /* Which lines to flush? */ + first_line = SC_INDEX(addr); + last_line = SC_INDEX(addr + size - 1); + + local_irq_save(flags); + if (first_line <= last_line) { + indy_sc_wipe(first_line, last_line); + goto out; + } + + indy_sc_wipe(first_line, SC_SIZE - SC_LINE); + indy_sc_wipe(0, last_line); +out: + local_irq_restore(flags); +} + +static void indy_sc_enable(void) +{ + unsigned long addr, tmp1, tmp2; + + /* This is really cool... */ +#ifdef DEBUG_CACHE + printk("Enabling R4600 SCACHE\n"); +#endif + __asm__ __volatile__( + ".set\tpush\n\t" + ".set\tnoreorder\n\t" + ".set\tmips3\n\t" + "mfc0\t%2, $12\n\t" + "nop; nop; nop; nop;\n\t" + "li\t%1, 0x80\n\t" + "mtc0\t%1, $12\n\t" + "nop; nop; nop; nop;\n\t" + "li\t%0, 0x1\n\t" + "dsll\t%0, 31\n\t" + "lui\t%1, 0x9000\n\t" + "dsll32\t%1, 0\n\t" + "or\t%0, %1, %0\n\t" + "sb\t$0, 0(%0)\n\t" + "mtc0\t$0, $12\n\t" + "nop; nop; nop; nop;\n\t" + "mtc0\t%2, $12\n\t" + "nop; nop; nop; nop;\n\t" + ".set\tpop" + : "=r" (tmp1), "=r" (tmp2), "=r" (addr)); +} + +static void indy_sc_disable(void) +{ + unsigned long tmp1, tmp2, tmp3; + +#ifdef DEBUG_CACHE + printk("Disabling R4600 SCACHE\n"); +#endif + __asm__ __volatile__( + ".set\tpush\n\t" + ".set\tnoreorder\n\t" + ".set\tmips3\n\t" + "li\t%0, 0x1\n\t" + "dsll\t%0, 31\n\t" + "lui\t%1, 0x9000\n\t" + "dsll32\t%1, 0\n\t" + "or\t%0, %1, %0\n\t" + "mfc0\t%2, $12\n\t" + "nop; nop; nop; nop\n\t" + "li\t%1, 0x80\n\t" + "mtc0\t%1, $12\n\t" + "nop; nop; nop; nop\n\t" + "sh\t$0, 0(%0)\n\t" + "mtc0\t$0, $12\n\t" + "nop; nop; nop; nop\n\t" + "mtc0\t%2, $12\n\t" + "nop; nop; nop; nop\n\t" + ".set\tpop" + : "=r" (tmp1), "=r" (tmp2), "=r" (tmp3)); +} + +static inline int __init indy_sc_probe(void) +{ + unsigned int size = ip22_eeprom_read(&sgimc->eeprom, 17); + if (size == 0) + return 0; + + size <<= PAGE_SHIFT; + printk(KERN_INFO "R4600/R5000 SCACHE size %dK, linesize 32 bytes.\n", + size >> 10); + scache_size = size; + + return 1; +} + +/* XXX Check with wje if the Indy caches can differentiate between + writeback + invalidate and just invalidate. */ +static struct bcache_ops indy_sc_ops = { + .bc_enable = indy_sc_enable, + .bc_disable = indy_sc_disable, + .bc_wback_inv = indy_sc_wback_invalidate, + .bc_inv = indy_sc_wback_invalidate +}; + +void indy_sc_init(void) +{ + if (indy_sc_probe()) { + indy_sc_enable(); + bcops = &indy_sc_ops; + } +} diff --git a/arch/mips/mm/sc-mips.c b/arch/mips/mm/sc-mips.c new file mode 100644 index 000000000..06ec304ad --- /dev/null +++ b/arch/mips/mm/sc-mips.c @@ -0,0 +1,264 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2006 Chris Dearman (chris@mips.com), + */ +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/mm.h> + +#include <asm/cpu-type.h> +#include <asm/mipsregs.h> +#include <asm/bcache.h> +#include <asm/cacheops.h> +#include <asm/page.h> +#include <asm/mmu_context.h> +#include <asm/r4kcache.h> +#include <asm/mips-cps.h> +#include <asm/bootinfo.h> + +/* + * MIPS32/MIPS64 L2 cache handling + */ + +/* + * Writeback and invalidate the secondary cache before DMA. + */ +static void mips_sc_wback_inv(unsigned long addr, unsigned long size) +{ + blast_scache_range(addr, addr + size); +} + +/* + * Invalidate the secondary cache before DMA. + */ +static void mips_sc_inv(unsigned long addr, unsigned long size) +{ + unsigned long lsize = cpu_scache_line_size(); + unsigned long almask = ~(lsize - 1); + + cache_op(Hit_Writeback_Inv_SD, addr & almask); + cache_op(Hit_Writeback_Inv_SD, (addr + size - 1) & almask); + blast_inv_scache_range(addr, addr + size); +} + +static void mips_sc_enable(void) +{ + /* L2 cache is permanently enabled */ +} + +static void mips_sc_disable(void) +{ + /* L2 cache is permanently enabled */ +} + +static void mips_sc_prefetch_enable(void) +{ + unsigned long pftctl; + + if (mips_cm_revision() < CM_REV_CM2_5) + return; + + /* + * If there is one or more L2 prefetch unit present then enable + * prefetching for both code & data, for all ports. + */ + pftctl = read_gcr_l2_pft_control(); + if (pftctl & CM_GCR_L2_PFT_CONTROL_NPFT) { + pftctl &= ~CM_GCR_L2_PFT_CONTROL_PAGEMASK; + pftctl |= PAGE_MASK & CM_GCR_L2_PFT_CONTROL_PAGEMASK; + pftctl |= CM_GCR_L2_PFT_CONTROL_PFTEN; + write_gcr_l2_pft_control(pftctl); + + set_gcr_l2_pft_control_b(CM_GCR_L2_PFT_CONTROL_B_PORTID | + CM_GCR_L2_PFT_CONTROL_B_CEN); + } +} + +static void mips_sc_prefetch_disable(void) +{ + if (mips_cm_revision() < CM_REV_CM2_5) + return; + + clear_gcr_l2_pft_control(CM_GCR_L2_PFT_CONTROL_PFTEN); + clear_gcr_l2_pft_control_b(CM_GCR_L2_PFT_CONTROL_B_PORTID | + CM_GCR_L2_PFT_CONTROL_B_CEN); +} + +static bool mips_sc_prefetch_is_enabled(void) +{ + unsigned long pftctl; + + if (mips_cm_revision() < CM_REV_CM2_5) + return false; + + pftctl = read_gcr_l2_pft_control(); + if (!(pftctl & CM_GCR_L2_PFT_CONTROL_NPFT)) + return false; + return !!(pftctl & CM_GCR_L2_PFT_CONTROL_PFTEN); +} + +static struct bcache_ops mips_sc_ops = { + .bc_enable = mips_sc_enable, + .bc_disable = mips_sc_disable, + .bc_wback_inv = mips_sc_wback_inv, + .bc_inv = mips_sc_inv, + .bc_prefetch_enable = mips_sc_prefetch_enable, + .bc_prefetch_disable = mips_sc_prefetch_disable, + .bc_prefetch_is_enabled = mips_sc_prefetch_is_enabled, +}; + +/* + * Check if the L2 cache controller is activated on a particular platform. + * MTI's L2 controller and the L2 cache controller of Broadcom's BMIPS + * cores both use c0_config2's bit 12 as "L2 Bypass" bit, that is the + * cache being disabled. However there is no guarantee for this to be + * true on all platforms. In an act of stupidity the spec defined bits + * 12..15 as implementation defined so below function will eventually have + * to be replaced by a platform specific probe. + */ +static inline int mips_sc_is_activated(struct cpuinfo_mips *c) +{ + unsigned int config2 = read_c0_config2(); + unsigned int tmp; + + /* Check the bypass bit (L2B) */ + switch (current_cpu_type()) { + case CPU_34K: + case CPU_74K: + case CPU_1004K: + case CPU_1074K: + case CPU_INTERAPTIV: + case CPU_PROAPTIV: + case CPU_P5600: + case CPU_BMIPS5000: + case CPU_QEMU_GENERIC: + case CPU_P6600: + if (config2 & (1 << 12)) + return 0; + } + + tmp = (config2 >> 4) & 0x0f; + if (0 < tmp && tmp <= 7) + c->scache.linesz = 2 << tmp; + else + return 0; + return 1; +} + +static int mips_sc_probe_cm3(void) +{ + struct cpuinfo_mips *c = ¤t_cpu_data; + unsigned long cfg = read_gcr_l2_config(); + unsigned long sets, line_sz, assoc; + + if (cfg & CM_GCR_L2_CONFIG_BYPASS) + return 0; + + sets = cfg & CM_GCR_L2_CONFIG_SET_SIZE; + sets >>= __ffs(CM_GCR_L2_CONFIG_SET_SIZE); + if (sets) + c->scache.sets = 64 << sets; + + line_sz = cfg & CM_GCR_L2_CONFIG_LINE_SIZE; + line_sz >>= __ffs(CM_GCR_L2_CONFIG_LINE_SIZE); + if (line_sz) + c->scache.linesz = 2 << line_sz; + + assoc = cfg & CM_GCR_L2_CONFIG_ASSOC; + assoc >>= __ffs(CM_GCR_L2_CONFIG_ASSOC); + c->scache.ways = assoc + 1; + c->scache.waysize = c->scache.sets * c->scache.linesz; + c->scache.waybit = __ffs(c->scache.waysize); + + if (c->scache.linesz) { + c->scache.flags &= ~MIPS_CACHE_NOT_PRESENT; + c->options |= MIPS_CPU_INCLUSIVE_CACHES; + return 1; + } + + return 0; +} + +static inline int mips_sc_probe(void) +{ + struct cpuinfo_mips *c = ¤t_cpu_data; + unsigned int config1, config2; + unsigned int tmp; + + /* Mark as not present until probe completed */ + c->scache.flags |= MIPS_CACHE_NOT_PRESENT; + + if (mips_cm_revision() >= CM_REV_CM3) + return mips_sc_probe_cm3(); + + /* Ignore anything but MIPSxx processors */ + if (!(c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 | + MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 | + MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 | + MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6))) + return 0; + + /* Does this MIPS32/MIPS64 CPU have a config2 register? */ + config1 = read_c0_config1(); + if (!(config1 & MIPS_CONF_M)) + return 0; + + config2 = read_c0_config2(); + + if (!mips_sc_is_activated(c)) + return 0; + + tmp = (config2 >> 8) & 0x0f; + if (tmp <= 7) + c->scache.sets = 64 << tmp; + else + return 0; + + tmp = (config2 >> 0) & 0x0f; + if (tmp <= 7) + c->scache.ways = tmp + 1; + else + return 0; + + if (current_cpu_type() == CPU_XBURST) { + switch (mips_machtype) { + /* + * According to config2 it would be 5-ways, but that is + * contradicted by all documentation. + */ + case MACH_INGENIC_JZ4770: + case MACH_INGENIC_JZ4775: + c->scache.ways = 4; + break; + + /* + * According to config2 it would be 5-ways and 512-sets, + * but that is contradicted by all documentation. + */ + case MACH_INGENIC_X1000: + case MACH_INGENIC_X1000E: + c->scache.sets = 256; + c->scache.ways = 4; + break; + } + } + + c->scache.waysize = c->scache.sets * c->scache.linesz; + c->scache.waybit = __ffs(c->scache.waysize); + + c->scache.flags &= ~MIPS_CACHE_NOT_PRESENT; + + return 1; +} + +int mips_sc_init(void) +{ + int found = mips_sc_probe(); + if (found) { + mips_sc_enable(); + mips_sc_prefetch_enable(); + bcops = &mips_sc_ops; + } + return found; +} diff --git a/arch/mips/mm/sc-r5k.c b/arch/mips/mm/sc-r5k.c new file mode 100644 index 000000000..736615d68 --- /dev/null +++ b/arch/mips/mm/sc-r5k.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org), + * derived from r4xx0.c by David S. Miller (davem@davemloft.net). + */ +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/mm.h> + +#include <asm/mipsregs.h> +#include <asm/bcache.h> +#include <asm/cacheops.h> +#include <asm/page.h> +#include <asm/mmu_context.h> +#include <asm/r4kcache.h> + +/* Secondary cache size in bytes, if present. */ +static unsigned long scache_size; + +#define SC_LINE 32 +#define SC_PAGE (128*SC_LINE) + +static inline void blast_r5000_scache(void) +{ + unsigned long start = INDEX_BASE; + unsigned long end = start + scache_size; + + while(start < end) { + cache_op(R5K_Page_Invalidate_S, start); + start += SC_PAGE; + } +} + +static void r5k_dma_cache_inv_sc(unsigned long addr, unsigned long size) +{ + unsigned long end, a; + + /* Catch bad driver code */ + BUG_ON(size == 0); + + if (size >= scache_size) { + blast_r5000_scache(); + return; + } + + /* On the R5000 secondary cache we cannot + * invalidate less than a page at a time. + * The secondary cache is physically indexed, write-through. + */ + a = addr & ~(SC_PAGE - 1); + end = (addr + size - 1) & ~(SC_PAGE - 1); + while (a <= end) { + cache_op(R5K_Page_Invalidate_S, a); + a += SC_PAGE; + } +} + +static void r5k_sc_enable(void) +{ + unsigned long flags; + + local_irq_save(flags); + set_c0_config(R5K_CONF_SE); + blast_r5000_scache(); + local_irq_restore(flags); +} + +static void r5k_sc_disable(void) +{ + unsigned long flags; + + local_irq_save(flags); + blast_r5000_scache(); + clear_c0_config(R5K_CONF_SE); + local_irq_restore(flags); +} + +static inline int __init r5k_sc_probe(void) +{ + unsigned long config = read_c0_config(); + + if (config & CONF_SC) + return 0; + + scache_size = (512 * 1024) << ((config & R5K_CONF_SS) >> 20); + + printk("R5000 SCACHE size %ldkB, linesize 32 bytes.\n", + scache_size >> 10); + + return 1; +} + +static struct bcache_ops r5k_sc_ops = { + .bc_enable = r5k_sc_enable, + .bc_disable = r5k_sc_disable, + .bc_wback_inv = r5k_dma_cache_inv_sc, + .bc_inv = r5k_dma_cache_inv_sc +}; + +void r5k_sc_init(void) +{ + if (r5k_sc_probe()) { + r5k_sc_enable(); + bcops = &r5k_sc_ops; + } +} diff --git a/arch/mips/mm/sc-rm7k.c b/arch/mips/mm/sc-rm7k.c new file mode 100644 index 000000000..e9e3777a7 --- /dev/null +++ b/arch/mips/mm/sc-rm7k.c @@ -0,0 +1,270 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * sc-rm7k.c: RM7000 cache management functions. + * + * Copyright (C) 1997, 2001, 2003, 2004 Ralf Baechle (ralf@linux-mips.org) + */ + +#undef DEBUG + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/bitops.h> + +#include <asm/addrspace.h> +#include <asm/bcache.h> +#include <asm/cacheops.h> +#include <asm/mipsregs.h> +#include <asm/processor.h> +#include <asm/sections.h> +#include <asm/cacheflush.h> /* for run_uncached() */ + +/* Primary cache parameters. */ +#define sc_lsize 32 +#define tc_pagesize (32*128) + +/* Secondary cache parameters. */ +#define scache_size (256*1024) /* Fixed to 256KiB on RM7000 */ + +/* Tertiary cache parameters */ +#define tc_lsize 32 + +extern unsigned long icache_way_size, dcache_way_size; +static unsigned long tcache_size; + +#include <asm/r4kcache.h> + +static int rm7k_tcache_init; + +/* + * Writeback and invalidate the primary cache dcache before DMA. + * (XXX These need to be fixed ...) + */ +static void rm7k_sc_wback_inv(unsigned long addr, unsigned long size) +{ + unsigned long end, a; + + pr_debug("rm7k_sc_wback_inv[%08lx,%08lx]", addr, size); + + /* Catch bad driver code */ + BUG_ON(size == 0); + + blast_scache_range(addr, addr + size); + + if (!rm7k_tcache_init) + return; + + a = addr & ~(tc_pagesize - 1); + end = (addr + size - 1) & ~(tc_pagesize - 1); + while(1) { + invalidate_tcache_page(a); /* Page_Invalidate_T */ + if (a == end) + break; + a += tc_pagesize; + } +} + +static void rm7k_sc_inv(unsigned long addr, unsigned long size) +{ + unsigned long end, a; + + pr_debug("rm7k_sc_inv[%08lx,%08lx]", addr, size); + + /* Catch bad driver code */ + BUG_ON(size == 0); + + blast_inv_scache_range(addr, addr + size); + + if (!rm7k_tcache_init) + return; + + a = addr & ~(tc_pagesize - 1); + end = (addr + size - 1) & ~(tc_pagesize - 1); + while(1) { + invalidate_tcache_page(a); /* Page_Invalidate_T */ + if (a == end) + break; + a += tc_pagesize; + } +} + +static void blast_rm7k_tcache(void) +{ + unsigned long start = CKSEG0ADDR(0); + unsigned long end = start + tcache_size; + + write_c0_taglo(0); + + while (start < end) { + cache_op(Page_Invalidate_T, start); + start += tc_pagesize; + } +} + +/* + * This function is executed in uncached address space. + */ +static void __rm7k_tc_enable(void) +{ + int i; + + set_c0_config(RM7K_CONF_TE); + + write_c0_taglo(0); + write_c0_taghi(0); + + for (i = 0; i < tcache_size; i += tc_lsize) + cache_op(Index_Store_Tag_T, CKSEG0ADDR(i)); +} + +static void rm7k_tc_enable(void) +{ + if (read_c0_config() & RM7K_CONF_TE) + return; + + BUG_ON(tcache_size == 0); + + run_uncached(__rm7k_tc_enable); +} + +/* + * This function is executed in uncached address space. + */ +static void __rm7k_sc_enable(void) +{ + int i; + + set_c0_config(RM7K_CONF_SE); + + write_c0_taglo(0); + write_c0_taghi(0); + + for (i = 0; i < scache_size; i += sc_lsize) + cache_op(Index_Store_Tag_SD, CKSEG0ADDR(i)); +} + +static void rm7k_sc_enable(void) +{ + if (read_c0_config() & RM7K_CONF_SE) + return; + + pr_info("Enabling secondary cache...\n"); + run_uncached(__rm7k_sc_enable); + + if (rm7k_tcache_init) + rm7k_tc_enable(); +} + +static void rm7k_tc_disable(void) +{ + unsigned long flags; + + local_irq_save(flags); + blast_rm7k_tcache(); + clear_c0_config(RM7K_CONF_TE); + local_irq_restore(flags); +} + +static void rm7k_sc_disable(void) +{ + clear_c0_config(RM7K_CONF_SE); + + if (rm7k_tcache_init) + rm7k_tc_disable(); +} + +static struct bcache_ops rm7k_sc_ops = { + .bc_enable = rm7k_sc_enable, + .bc_disable = rm7k_sc_disable, + .bc_wback_inv = rm7k_sc_wback_inv, + .bc_inv = rm7k_sc_inv +}; + +/* + * This is a probing function like the one found in c-r4k.c, we look for the + * wrap around point with different addresses. + */ +static void __probe_tcache(void) +{ + unsigned long flags, addr, begin, end, pow2; + + begin = (unsigned long) &_stext; + begin &= ~((8 * 1024 * 1024) - 1); + end = begin + (8 * 1024 * 1024); + + local_irq_save(flags); + + set_c0_config(RM7K_CONF_TE); + + /* Fill size-multiple lines with a valid tag */ + pow2 = (256 * 1024); + for (addr = begin; addr <= end; addr = (begin + pow2)) { + unsigned long *p = (unsigned long *) addr; + __asm__ __volatile__("nop" : : "r" (*p)); + pow2 <<= 1; + } + + /* Load first line with a 0 tag, to check after */ + write_c0_taglo(0); + write_c0_taghi(0); + cache_op(Index_Store_Tag_T, begin); + + /* Look for the wrap-around */ + pow2 = (512 * 1024); + for (addr = begin + (512 * 1024); addr <= end; addr = begin + pow2) { + cache_op(Index_Load_Tag_T, addr); + if (!read_c0_taglo()) + break; + pow2 <<= 1; + } + + addr -= begin; + tcache_size = addr; + + clear_c0_config(RM7K_CONF_TE); + + local_irq_restore(flags); +} + +void rm7k_sc_init(void) +{ + struct cpuinfo_mips *c = ¤t_cpu_data; + unsigned int config = read_c0_config(); + + if ((config & RM7K_CONF_SC)) + return; + + c->scache.linesz = sc_lsize; + c->scache.ways = 4; + c->scache.waybit= __ffs(scache_size / c->scache.ways); + c->scache.waysize = scache_size / c->scache.ways; + c->scache.sets = scache_size / (c->scache.linesz * c->scache.ways); + printk(KERN_INFO "Secondary cache size %dK, linesize %d bytes.\n", + (scache_size >> 10), sc_lsize); + + if (!(config & RM7K_CONF_SE)) + rm7k_sc_enable(); + + bcops = &rm7k_sc_ops; + + /* + * While we're at it let's deal with the tertiary cache. + */ + + rm7k_tcache_init = 0; + tcache_size = 0; + + if (config & RM7K_CONF_TC) + return; + + /* + * No efficient way to ask the hardware for the size of the tcache, + * so must probe for it. + */ + run_uncached(__probe_tcache); + rm7k_tc_enable(); + rm7k_tcache_init = 1; + c->tcache.linesz = tc_lsize; + c->tcache.ways = 1; + pr_info("Tertiary cache size %ldK.\n", (tcache_size >> 10)); +} diff --git a/arch/mips/mm/tlb-funcs.S b/arch/mips/mm/tlb-funcs.S new file mode 100644 index 000000000..00fef578c --- /dev/null +++ b/arch/mips/mm/tlb-funcs.S @@ -0,0 +1,40 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Micro-assembler generated tlb handler functions. + * + * Copyright (C) 2013 Broadcom Corporation. + * + * Based on mm/page-funcs.c + * Copyright (C) 2012 MIPS Technologies, Inc. + * Copyright (C) 2012 Ralf Baechle <ralf@linux-mips.org> + */ +#include <asm/asm.h> +#include <asm/export.h> +#include <asm/regdef.h> + +#define FASTPATH_SIZE 128 + +LEAF(tlbmiss_handler_setup_pgd) +1: j 1b /* Dummy, will be replaced. */ + .space 64 +END(tlbmiss_handler_setup_pgd) +EXPORT(tlbmiss_handler_setup_pgd_end) +EXPORT_SYMBOL_GPL(tlbmiss_handler_setup_pgd) + +LEAF(handle_tlbm) + .space FASTPATH_SIZE * 4 +END(handle_tlbm) +EXPORT(handle_tlbm_end) + +LEAF(handle_tlbs) + .space FASTPATH_SIZE * 4 +END(handle_tlbs) +EXPORT(handle_tlbs_end) + +LEAF(handle_tlbl) + .space FASTPATH_SIZE * 4 +END(handle_tlbl) +EXPORT(handle_tlbl_end) diff --git a/arch/mips/mm/tlb-r3k.c b/arch/mips/mm/tlb-r3k.c new file mode 100644 index 000000000..53dfa2b93 --- /dev/null +++ b/arch/mips/mm/tlb-r3k.c @@ -0,0 +1,248 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * r2300.c: R2000 and R3000 specific mmu/cache code. + * + * Copyright (C) 1996 David S. Miller (davem@davemloft.net) + * + * with a lot of changes to make this thing work for R3000s + * Tx39XX R4k style caches added. HK + * Copyright (C) 1998, 1999, 2000 Harald Koerfgen + * Copyright (C) 1998 Gleb Raiko & Vladimir Roganov + * Copyright (C) 2002 Ralf Baechle + * Copyright (C) 2002 Maciej W. Rozycki + */ +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/mm.h> + +#include <asm/page.h> +#include <asm/mmu_context.h> +#include <asm/tlbmisc.h> +#include <asm/isadep.h> +#include <asm/io.h> +#include <asm/bootinfo.h> +#include <asm/cpu.h> + +#undef DEBUG_TLB + +extern void build_tlb_refill_handler(void); + +/* CP0 hazard avoidance. */ +#define BARRIER \ + __asm__ __volatile__( \ + ".set push\n\t" \ + ".set noreorder\n\t" \ + "nop\n\t" \ + ".set pop\n\t") + +/* TLB operations. */ +static void local_flush_tlb_from(int entry) +{ + unsigned long old_ctx; + + old_ctx = read_c0_entryhi() & cpu_asid_mask(¤t_cpu_data); + write_c0_entrylo0(0); + while (entry < current_cpu_data.tlbsize) { + write_c0_index(entry << 8); + write_c0_entryhi((entry | 0x80000) << 12); + entry++; /* BARRIER */ + tlb_write_indexed(); + } + write_c0_entryhi(old_ctx); +} + +void local_flush_tlb_all(void) +{ + unsigned long flags; + +#ifdef DEBUG_TLB + printk("[tlball]"); +#endif + local_irq_save(flags); + local_flush_tlb_from(8); + local_irq_restore(flags); +} + +void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + unsigned long asid_mask = cpu_asid_mask(¤t_cpu_data); + struct mm_struct *mm = vma->vm_mm; + int cpu = smp_processor_id(); + + if (cpu_context(cpu, mm) != 0) { + unsigned long size, flags; + +#ifdef DEBUG_TLB + printk("[tlbrange<%lu,0x%08lx,0x%08lx>]", + cpu_context(cpu, mm) & asid_mask, start, end); +#endif + local_irq_save(flags); + size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + if (size <= current_cpu_data.tlbsize) { + int oldpid = read_c0_entryhi() & asid_mask; + int newpid = cpu_context(cpu, mm) & asid_mask; + + start &= PAGE_MASK; + end += PAGE_SIZE - 1; + end &= PAGE_MASK; + while (start < end) { + int idx; + + write_c0_entryhi(start | newpid); + start += PAGE_SIZE; /* BARRIER */ + tlb_probe(); + idx = read_c0_index(); + write_c0_entrylo0(0); + write_c0_entryhi(KSEG0); + if (idx < 0) /* BARRIER */ + continue; + tlb_write_indexed(); + } + write_c0_entryhi(oldpid); + } else { + drop_mmu_context(mm); + } + local_irq_restore(flags); + } +} + +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + unsigned long size, flags; + +#ifdef DEBUG_TLB + printk("[tlbrange<%lu,0x%08lx,0x%08lx>]", start, end); +#endif + local_irq_save(flags); + size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + if (size <= current_cpu_data.tlbsize) { + int pid = read_c0_entryhi(); + + start &= PAGE_MASK; + end += PAGE_SIZE - 1; + end &= PAGE_MASK; + + while (start < end) { + int idx; + + write_c0_entryhi(start); + start += PAGE_SIZE; /* BARRIER */ + tlb_probe(); + idx = read_c0_index(); + write_c0_entrylo0(0); + write_c0_entryhi(KSEG0); + if (idx < 0) /* BARRIER */ + continue; + tlb_write_indexed(); + } + write_c0_entryhi(pid); + } else { + local_flush_tlb_all(); + } + local_irq_restore(flags); +} + +void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) +{ + unsigned long asid_mask = cpu_asid_mask(¤t_cpu_data); + int cpu = smp_processor_id(); + + if (cpu_context(cpu, vma->vm_mm) != 0) { + unsigned long flags; + int oldpid, newpid, idx; + +#ifdef DEBUG_TLB + printk("[tlbpage<%lu,0x%08lx>]", cpu_context(cpu, vma->vm_mm), page); +#endif + newpid = cpu_context(cpu, vma->vm_mm) & asid_mask; + page &= PAGE_MASK; + local_irq_save(flags); + oldpid = read_c0_entryhi() & asid_mask; + write_c0_entryhi(page | newpid); + BARRIER; + tlb_probe(); + idx = read_c0_index(); + write_c0_entrylo0(0); + write_c0_entryhi(KSEG0); + if (idx < 0) /* BARRIER */ + goto finish; + tlb_write_indexed(); + +finish: + write_c0_entryhi(oldpid); + local_irq_restore(flags); + } +} + +void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte) +{ + unsigned long asid_mask = cpu_asid_mask(¤t_cpu_data); + unsigned long flags; + int idx, pid; + + /* + * Handle debugger faulting in for debugee. + */ + if (current->active_mm != vma->vm_mm) + return; + + pid = read_c0_entryhi() & asid_mask; + +#ifdef DEBUG_TLB + if ((pid != (cpu_context(cpu, vma->vm_mm) & asid_mask)) || (cpu_context(cpu, vma->vm_mm) == 0)) { + printk("update_mmu_cache: Wheee, bogus tlbpid mmpid=%lu tlbpid=%d\n", + (cpu_context(cpu, vma->vm_mm)), pid); + } +#endif + + local_irq_save(flags); + address &= PAGE_MASK; + write_c0_entryhi(address | pid); + BARRIER; + tlb_probe(); + idx = read_c0_index(); + write_c0_entrylo0(pte_val(pte)); + write_c0_entryhi(address | pid); + if (idx < 0) { /* BARRIER */ + tlb_write_random(); + } else { + tlb_write_indexed(); + } + write_c0_entryhi(pid); + local_irq_restore(flags); +} + +void add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, + unsigned long entryhi, unsigned long pagemask) +{ + unsigned long asid_mask = cpu_asid_mask(¤t_cpu_data); + unsigned long flags; + unsigned long old_ctx; + static unsigned long wired = 0; + + if (wired < 8) { +#ifdef DEBUG_TLB + printk("[tlbwired<entry lo0 %8x, hi %8x\n>]\n", + entrylo0, entryhi); +#endif + + local_irq_save(flags); + old_ctx = read_c0_entryhi() & asid_mask; + write_c0_entrylo0(entrylo0); + write_c0_entryhi(entryhi); + write_c0_index(wired); + wired++; /* BARRIER */ + tlb_write_indexed(); + write_c0_entryhi(old_ctx); + local_flush_tlb_all(); + local_irq_restore(flags); + } +} + +void tlb_init(void) +{ + local_flush_tlb_from(0); + build_tlb_refill_handler(); +} diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c new file mode 100644 index 000000000..1b939abbe --- /dev/null +++ b/arch/mips/mm/tlb-r4k.c @@ -0,0 +1,583 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1996 David S. Miller (davem@davemloft.net) + * Copyright (C) 1997, 1998, 1999, 2000 Ralf Baechle ralf@gnu.org + * Carsten Langgaard, carstenl@mips.com + * Copyright (C) 2002 MIPS Technologies, Inc. All rights reserved. + */ +#include <linux/cpu_pm.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/export.h> + +#include <asm/cpu.h> +#include <asm/cpu-type.h> +#include <asm/bootinfo.h> +#include <asm/hazards.h> +#include <asm/mmu_context.h> +#include <asm/tlb.h> +#include <asm/tlbmisc.h> + +extern void build_tlb_refill_handler(void); + +/* + * LOONGSON-2 has a 4 entry itlb which is a subset of jtlb, LOONGSON-3 has + * a 4 entry itlb and a 4 entry dtlb which are subsets of jtlb. Unfortunately, + * itlb/dtlb are not totally transparent to software. + */ +static inline void flush_micro_tlb(void) +{ + switch (current_cpu_type()) { + case CPU_LOONGSON2EF: + write_c0_diag(LOONGSON_DIAG_ITLB); + break; + case CPU_LOONGSON64: + write_c0_diag(LOONGSON_DIAG_ITLB | LOONGSON_DIAG_DTLB); + break; + default: + break; + } +} + +static inline void flush_micro_tlb_vm(struct vm_area_struct *vma) +{ + if (vma->vm_flags & VM_EXEC) + flush_micro_tlb(); +} + +void local_flush_tlb_all(void) +{ + unsigned long flags; + unsigned long old_ctx; + int entry, ftlbhighset; + + local_irq_save(flags); + /* Save old context and create impossible VPN2 value */ + old_ctx = read_c0_entryhi(); + htw_stop(); + write_c0_entrylo0(0); + write_c0_entrylo1(0); + + entry = num_wired_entries(); + + /* + * Blast 'em all away. + * If there are any wired entries, fall back to iterating + */ + if (cpu_has_tlbinv && !entry) { + if (current_cpu_data.tlbsizevtlb) { + write_c0_index(0); + mtc0_tlbw_hazard(); + tlbinvf(); /* invalidate VTLB */ + } + ftlbhighset = current_cpu_data.tlbsizevtlb + + current_cpu_data.tlbsizeftlbsets; + for (entry = current_cpu_data.tlbsizevtlb; + entry < ftlbhighset; + entry++) { + write_c0_index(entry); + mtc0_tlbw_hazard(); + tlbinvf(); /* invalidate one FTLB set */ + } + } else { + while (entry < current_cpu_data.tlbsize) { + /* Make sure all entries differ. */ + write_c0_entryhi(UNIQUE_ENTRYHI(entry)); + write_c0_index(entry); + mtc0_tlbw_hazard(); + tlb_write_indexed(); + entry++; + } + } + tlbw_use_hazard(); + write_c0_entryhi(old_ctx); + htw_start(); + flush_micro_tlb(); + local_irq_restore(flags); +} +EXPORT_SYMBOL(local_flush_tlb_all); + +void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + struct mm_struct *mm = vma->vm_mm; + int cpu = smp_processor_id(); + + if (cpu_context(cpu, mm) != 0) { + unsigned long size, flags; + + local_irq_save(flags); + start = round_down(start, PAGE_SIZE << 1); + end = round_up(end, PAGE_SIZE << 1); + size = (end - start) >> (PAGE_SHIFT + 1); + if (size <= (current_cpu_data.tlbsizeftlbsets ? + current_cpu_data.tlbsize / 8 : + current_cpu_data.tlbsize / 2)) { + unsigned long old_entryhi, old_mmid; + int newpid = cpu_asid(cpu, mm); + + old_entryhi = read_c0_entryhi(); + if (cpu_has_mmid) { + old_mmid = read_c0_memorymapid(); + write_c0_memorymapid(newpid); + } + + htw_stop(); + while (start < end) { + int idx; + + if (cpu_has_mmid) + write_c0_entryhi(start); + else + write_c0_entryhi(start | newpid); + start += (PAGE_SIZE << 1); + mtc0_tlbw_hazard(); + tlb_probe(); + tlb_probe_hazard(); + idx = read_c0_index(); + write_c0_entrylo0(0); + write_c0_entrylo1(0); + if (idx < 0) + continue; + /* Make sure all entries differ. */ + write_c0_entryhi(UNIQUE_ENTRYHI(idx)); + mtc0_tlbw_hazard(); + tlb_write_indexed(); + } + tlbw_use_hazard(); + write_c0_entryhi(old_entryhi); + if (cpu_has_mmid) + write_c0_memorymapid(old_mmid); + htw_start(); + } else { + drop_mmu_context(mm); + } + flush_micro_tlb(); + local_irq_restore(flags); + } +} + +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + unsigned long size, flags; + + local_irq_save(flags); + size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + size = (size + 1) >> 1; + if (size <= (current_cpu_data.tlbsizeftlbsets ? + current_cpu_data.tlbsize / 8 : + current_cpu_data.tlbsize / 2)) { + int pid = read_c0_entryhi(); + + start &= (PAGE_MASK << 1); + end += ((PAGE_SIZE << 1) - 1); + end &= (PAGE_MASK << 1); + htw_stop(); + + while (start < end) { + int idx; + + write_c0_entryhi(start); + start += (PAGE_SIZE << 1); + mtc0_tlbw_hazard(); + tlb_probe(); + tlb_probe_hazard(); + idx = read_c0_index(); + write_c0_entrylo0(0); + write_c0_entrylo1(0); + if (idx < 0) + continue; + /* Make sure all entries differ. */ + write_c0_entryhi(UNIQUE_ENTRYHI(idx)); + mtc0_tlbw_hazard(); + tlb_write_indexed(); + } + tlbw_use_hazard(); + write_c0_entryhi(pid); + htw_start(); + } else { + local_flush_tlb_all(); + } + flush_micro_tlb(); + local_irq_restore(flags); +} + +void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) +{ + int cpu = smp_processor_id(); + + if (cpu_context(cpu, vma->vm_mm) != 0) { + unsigned long old_mmid; + unsigned long flags, old_entryhi; + int idx; + + page &= (PAGE_MASK << 1); + local_irq_save(flags); + old_entryhi = read_c0_entryhi(); + htw_stop(); + if (cpu_has_mmid) { + old_mmid = read_c0_memorymapid(); + write_c0_entryhi(page); + write_c0_memorymapid(cpu_asid(cpu, vma->vm_mm)); + } else { + write_c0_entryhi(page | cpu_asid(cpu, vma->vm_mm)); + } + mtc0_tlbw_hazard(); + tlb_probe(); + tlb_probe_hazard(); + idx = read_c0_index(); + write_c0_entrylo0(0); + write_c0_entrylo1(0); + if (idx < 0) + goto finish; + /* Make sure all entries differ. */ + write_c0_entryhi(UNIQUE_ENTRYHI(idx)); + mtc0_tlbw_hazard(); + tlb_write_indexed(); + tlbw_use_hazard(); + + finish: + write_c0_entryhi(old_entryhi); + if (cpu_has_mmid) + write_c0_memorymapid(old_mmid); + htw_start(); + flush_micro_tlb_vm(vma); + local_irq_restore(flags); + } +} + +/* + * This one is only used for pages with the global bit set so we don't care + * much about the ASID. + */ +void local_flush_tlb_one(unsigned long page) +{ + unsigned long flags; + int oldpid, idx; + + local_irq_save(flags); + oldpid = read_c0_entryhi(); + htw_stop(); + page &= (PAGE_MASK << 1); + write_c0_entryhi(page); + mtc0_tlbw_hazard(); + tlb_probe(); + tlb_probe_hazard(); + idx = read_c0_index(); + write_c0_entrylo0(0); + write_c0_entrylo1(0); + if (idx >= 0) { + /* Make sure all entries differ. */ + write_c0_entryhi(UNIQUE_ENTRYHI(idx)); + mtc0_tlbw_hazard(); + tlb_write_indexed(); + tlbw_use_hazard(); + } + write_c0_entryhi(oldpid); + htw_start(); + flush_micro_tlb(); + local_irq_restore(flags); +} + +/* + * We will need multiple versions of update_mmu_cache(), one that just + * updates the TLB with the new pte(s), and another which also checks + * for the R4k "end of page" hardware bug and does the needy. + */ +void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte) +{ + unsigned long flags; + pgd_t *pgdp; + p4d_t *p4dp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + int idx, pid; + + /* + * Handle debugger faulting in for debugee. + */ + if (current->active_mm != vma->vm_mm) + return; + + local_irq_save(flags); + + htw_stop(); + address &= (PAGE_MASK << 1); + if (cpu_has_mmid) { + write_c0_entryhi(address); + } else { + pid = read_c0_entryhi() & cpu_asid_mask(¤t_cpu_data); + write_c0_entryhi(address | pid); + } + pgdp = pgd_offset(vma->vm_mm, address); + mtc0_tlbw_hazard(); + tlb_probe(); + tlb_probe_hazard(); + p4dp = p4d_offset(pgdp, address); + pudp = pud_offset(p4dp, address); + pmdp = pmd_offset(pudp, address); + idx = read_c0_index(); +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT + /* this could be a huge page */ + if (pmd_huge(*pmdp)) { + unsigned long lo; + write_c0_pagemask(PM_HUGE_MASK); + ptep = (pte_t *)pmdp; + lo = pte_to_entrylo(pte_val(*ptep)); + write_c0_entrylo0(lo); + write_c0_entrylo1(lo + (HPAGE_SIZE >> 7)); + + mtc0_tlbw_hazard(); + if (idx < 0) + tlb_write_random(); + else + tlb_write_indexed(); + tlbw_use_hazard(); + write_c0_pagemask(PM_DEFAULT_MASK); + } else +#endif + { + ptep = pte_offset_map(pmdp, address); + +#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) +#ifdef CONFIG_XPA + write_c0_entrylo0(pte_to_entrylo(ptep->pte_high)); + if (cpu_has_xpa) + writex_c0_entrylo0(ptep->pte_low & _PFNX_MASK); + ptep++; + write_c0_entrylo1(pte_to_entrylo(ptep->pte_high)); + if (cpu_has_xpa) + writex_c0_entrylo1(ptep->pte_low & _PFNX_MASK); +#else + write_c0_entrylo0(ptep->pte_high); + ptep++; + write_c0_entrylo1(ptep->pte_high); +#endif +#else + write_c0_entrylo0(pte_to_entrylo(pte_val(*ptep++))); + write_c0_entrylo1(pte_to_entrylo(pte_val(*ptep))); +#endif + mtc0_tlbw_hazard(); + if (idx < 0) + tlb_write_random(); + else + tlb_write_indexed(); + } + tlbw_use_hazard(); + htw_start(); + flush_micro_tlb_vm(vma); + local_irq_restore(flags); +} + +void add_wired_entry(unsigned long entrylo0, unsigned long entrylo1, + unsigned long entryhi, unsigned long pagemask) +{ +#ifdef CONFIG_XPA + panic("Broken for XPA kernels"); +#else + unsigned int old_mmid; + unsigned long flags; + unsigned long wired; + unsigned long old_pagemask; + unsigned long old_ctx; + + local_irq_save(flags); + if (cpu_has_mmid) { + old_mmid = read_c0_memorymapid(); + write_c0_memorymapid(MMID_KERNEL_WIRED); + } + /* Save old context and create impossible VPN2 value */ + old_ctx = read_c0_entryhi(); + htw_stop(); + old_pagemask = read_c0_pagemask(); + wired = num_wired_entries(); + write_c0_wired(wired + 1); + write_c0_index(wired); + tlbw_use_hazard(); /* What is the hazard here? */ + write_c0_pagemask(pagemask); + write_c0_entryhi(entryhi); + write_c0_entrylo0(entrylo0); + write_c0_entrylo1(entrylo1); + mtc0_tlbw_hazard(); + tlb_write_indexed(); + tlbw_use_hazard(); + + write_c0_entryhi(old_ctx); + if (cpu_has_mmid) + write_c0_memorymapid(old_mmid); + tlbw_use_hazard(); /* What is the hazard here? */ + htw_start(); + write_c0_pagemask(old_pagemask); + local_flush_tlb_all(); + local_irq_restore(flags); +#endif +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + +int has_transparent_hugepage(void) +{ + static unsigned int mask = -1; + + if (mask == -1) { /* first call comes during __init */ + unsigned long flags; + + local_irq_save(flags); + write_c0_pagemask(PM_HUGE_MASK); + back_to_back_c0_hazard(); + mask = read_c0_pagemask(); + write_c0_pagemask(PM_DEFAULT_MASK); + local_irq_restore(flags); + } + return mask == PM_HUGE_MASK; +} +EXPORT_SYMBOL(has_transparent_hugepage); + +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +/* + * Used for loading TLB entries before trap_init() has started, when we + * don't actually want to add a wired entry which remains throughout the + * lifetime of the system + */ + +int temp_tlb_entry; + +__init int add_temporary_entry(unsigned long entrylo0, unsigned long entrylo1, + unsigned long entryhi, unsigned long pagemask) +{ + int ret = 0; + unsigned long flags; + unsigned long wired; + unsigned long old_pagemask; + unsigned long old_ctx; + + local_irq_save(flags); + /* Save old context and create impossible VPN2 value */ + htw_stop(); + old_ctx = read_c0_entryhi(); + old_pagemask = read_c0_pagemask(); + wired = num_wired_entries(); + if (--temp_tlb_entry < wired) { + printk(KERN_WARNING + "No TLB space left for add_temporary_entry\n"); + ret = -ENOSPC; + goto out; + } + + write_c0_index(temp_tlb_entry); + write_c0_pagemask(pagemask); + write_c0_entryhi(entryhi); + write_c0_entrylo0(entrylo0); + write_c0_entrylo1(entrylo1); + mtc0_tlbw_hazard(); + tlb_write_indexed(); + tlbw_use_hazard(); + + write_c0_entryhi(old_ctx); + write_c0_pagemask(old_pagemask); + htw_start(); +out: + local_irq_restore(flags); + return ret; +} + +static int ntlb; +static int __init set_ntlb(char *str) +{ + get_option(&str, &ntlb); + return 1; +} + +__setup("ntlb=", set_ntlb); + +/* + * Configure TLB (for init or after a CPU has been powered off). + */ +static void r4k_tlb_configure(void) +{ + /* + * You should never change this register: + * - On R4600 1.7 the tlbp never hits for pages smaller than + * the value in the c0_pagemask register. + * - The entire mm handling assumes the c0_pagemask register to + * be set to fixed-size pages. + */ + write_c0_pagemask(PM_DEFAULT_MASK); + back_to_back_c0_hazard(); + if (read_c0_pagemask() != PM_DEFAULT_MASK) + panic("MMU doesn't support PAGE_SIZE=0x%lx", PAGE_SIZE); + + write_c0_wired(0); + if (current_cpu_type() == CPU_R10000 || + current_cpu_type() == CPU_R12000 || + current_cpu_type() == CPU_R14000 || + current_cpu_type() == CPU_R16000) + write_c0_framemask(0); + + if (cpu_has_rixi) { + /* + * Enable the no read, no exec bits, and enable large physical + * address. + */ +#ifdef CONFIG_64BIT + set_c0_pagegrain(PG_RIE | PG_XIE | PG_ELPA); +#else + set_c0_pagegrain(PG_RIE | PG_XIE); +#endif + } + + temp_tlb_entry = current_cpu_data.tlbsize - 1; + + /* From this point on the ARC firmware is dead. */ + local_flush_tlb_all(); + + /* Did I tell you that ARC SUCKS? */ +} + +void tlb_init(void) +{ + r4k_tlb_configure(); + + if (ntlb) { + if (ntlb > 1 && ntlb <= current_cpu_data.tlbsize) { + int wired = current_cpu_data.tlbsize - ntlb; + write_c0_wired(wired); + write_c0_index(wired-1); + printk("Restricting TLB to %d entries\n", ntlb); + } else + printk("Ignoring invalid argument ntlb=%d\n", ntlb); + } + + build_tlb_refill_handler(); +} + +static int r4k_tlb_pm_notifier(struct notifier_block *self, unsigned long cmd, + void *v) +{ + switch (cmd) { + case CPU_PM_ENTER_FAILED: + case CPU_PM_EXIT: + r4k_tlb_configure(); + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block r4k_tlb_pm_notifier_block = { + .notifier_call = r4k_tlb_pm_notifier, +}; + +static int __init r4k_tlb_init_pm(void) +{ + return cpu_pm_register_notifier(&r4k_tlb_pm_notifier_block); +} +arch_initcall(r4k_tlb_init_pm); diff --git a/arch/mips/mm/tlbex-fault.S b/arch/mips/mm/tlbex-fault.S new file mode 100644 index 000000000..77db401fc --- /dev/null +++ b/arch/mips/mm/tlbex-fault.S @@ -0,0 +1,28 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1999 Ralf Baechle + * Copyright (C) 1999 Silicon Graphics, Inc. + */ +#include <asm/mipsregs.h> +#include <asm/regdef.h> +#include <asm/stackframe.h> + + .macro tlb_do_page_fault, write + NESTED(tlb_do_page_fault_\write, PT_SIZE, sp) + .cfi_signal_frame + SAVE_ALL docfi=1 + MFC0 a2, CP0_BADVADDR + KMODE + move a0, sp + REG_S a2, PT_BVADDR(sp) + li a1, \write + jal do_page_fault + j ret_from_exception + END(tlb_do_page_fault_\write) + .endm + + tlb_do_page_fault 0 + tlb_do_page_fault 1 diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c new file mode 100644 index 000000000..80e05ee98 --- /dev/null +++ b/arch/mips/mm/tlbex.c @@ -0,0 +1,2619 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Synthesize TLB refill handlers at runtime. + * + * Copyright (C) 2004, 2005, 2006, 2008 Thiemo Seufer + * Copyright (C) 2005, 2007, 2008, 2009 Maciej W. Rozycki + * Copyright (C) 2006 Ralf Baechle (ralf@linux-mips.org) + * Copyright (C) 2008, 2009 Cavium Networks, Inc. + * Copyright (C) 2011 MIPS Technologies, Inc. + * + * ... and the days got worse and worse and now you see + * I've gone completely out of my mind. + * + * They're coming to take me a away haha + * they're coming to take me a away hoho hihi haha + * to the funny farm where code is beautiful all the time ... + * + * (Condolences to Napoleon XIV) + */ + +#include <linux/bug.h> +#include <linux/export.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/smp.h> +#include <linux/string.h> +#include <linux/cache.h> +#include <linux/pgtable.h> + +#include <asm/cacheflush.h> +#include <asm/cpu-type.h> +#include <asm/mmu_context.h> +#include <asm/uasm.h> +#include <asm/setup.h> +#include <asm/tlbex.h> + +static int mips_xpa_disabled; + +static int __init xpa_disable(char *s) +{ + mips_xpa_disabled = 1; + + return 1; +} + +__setup("noxpa", xpa_disable); + +/* + * TLB load/store/modify handlers. + * + * Only the fastpath gets synthesized at runtime, the slowpath for + * do_page_fault remains normal asm. + */ +extern void tlb_do_page_fault_0(void); +extern void tlb_do_page_fault_1(void); + +struct work_registers { + int r1; + int r2; + int r3; +}; + +struct tlb_reg_save { + unsigned long a; + unsigned long b; +} ____cacheline_aligned_in_smp; + +static struct tlb_reg_save handler_reg_save[NR_CPUS]; + +static inline int r45k_bvahwbug(void) +{ + /* XXX: We should probe for the presence of this bug, but we don't. */ + return 0; +} + +static inline int r4k_250MHZhwbug(void) +{ + /* XXX: We should probe for the presence of this bug, but we don't. */ + return 0; +} + +extern int sb1250_m3_workaround_needed(void); + +static inline int __maybe_unused bcm1250_m3_war(void) +{ + if (IS_ENABLED(CONFIG_SB1_PASS_2_WORKAROUNDS)) + return sb1250_m3_workaround_needed(); + return 0; +} + +static inline int __maybe_unused r10000_llsc_war(void) +{ + return IS_ENABLED(CONFIG_WAR_R10000_LLSC); +} + +static int use_bbit_insns(void) +{ + switch (current_cpu_type()) { + case CPU_CAVIUM_OCTEON: + case CPU_CAVIUM_OCTEON_PLUS: + case CPU_CAVIUM_OCTEON2: + case CPU_CAVIUM_OCTEON3: + return 1; + default: + return 0; + } +} + +static int use_lwx_insns(void) +{ + switch (current_cpu_type()) { + case CPU_CAVIUM_OCTEON2: + case CPU_CAVIUM_OCTEON3: + return 1; + default: + return 0; + } +} +#if defined(CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE) && \ + CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE > 0 +static bool scratchpad_available(void) +{ + return true; +} +static int scratchpad_offset(int i) +{ + /* + * CVMSEG starts at address -32768 and extends for + * CAVIUM_OCTEON_CVMSEG_SIZE 128 byte cache lines. + */ + i += 1; /* Kernel use starts at the top and works down. */ + return CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE * 128 - (8 * i) - 32768; +} +#else +static bool scratchpad_available(void) +{ + return false; +} +static int scratchpad_offset(int i) +{ + BUG(); + /* Really unreachable, but evidently some GCC want this. */ + return 0; +} +#endif +/* + * Found by experiment: At least some revisions of the 4kc throw under + * some circumstances a machine check exception, triggered by invalid + * values in the index register. Delaying the tlbp instruction until + * after the next branch, plus adding an additional nop in front of + * tlbwi/tlbwr avoids the invalid index register values. Nobody knows + * why; it's not an issue caused by the core RTL. + * + */ +static int m4kc_tlbp_war(void) +{ + return current_cpu_type() == CPU_4KC; +} + +/* Handle labels (which must be positive integers). */ +enum label_id { + label_second_part = 1, + label_leave, + label_vmalloc, + label_vmalloc_done, + label_tlbw_hazard_0, + label_split = label_tlbw_hazard_0 + 8, + label_tlbl_goaround1, + label_tlbl_goaround2, + label_nopage_tlbl, + label_nopage_tlbs, + label_nopage_tlbm, + label_smp_pgtable_change, + label_r3000_write_probe_fail, + label_large_segbits_fault, +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT + label_tlb_huge_update, +#endif +}; + +UASM_L_LA(_second_part) +UASM_L_LA(_leave) +UASM_L_LA(_vmalloc) +UASM_L_LA(_vmalloc_done) +/* _tlbw_hazard_x is handled differently. */ +UASM_L_LA(_split) +UASM_L_LA(_tlbl_goaround1) +UASM_L_LA(_tlbl_goaround2) +UASM_L_LA(_nopage_tlbl) +UASM_L_LA(_nopage_tlbs) +UASM_L_LA(_nopage_tlbm) +UASM_L_LA(_smp_pgtable_change) +UASM_L_LA(_r3000_write_probe_fail) +UASM_L_LA(_large_segbits_fault) +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT +UASM_L_LA(_tlb_huge_update) +#endif + +static int hazard_instance; + +static void uasm_bgezl_hazard(u32 **p, struct uasm_reloc **r, int instance) +{ + switch (instance) { + case 0 ... 7: + uasm_il_bgezl(p, r, 0, label_tlbw_hazard_0 + instance); + return; + default: + BUG(); + } +} + +static void uasm_bgezl_label(struct uasm_label **l, u32 **p, int instance) +{ + switch (instance) { + case 0 ... 7: + uasm_build_label(l, *p, label_tlbw_hazard_0 + instance); + break; + default: + BUG(); + } +} + +/* + * pgtable bits are assigned dynamically depending on processor feature + * and statically based on kernel configuration. This spits out the actual + * values the kernel is using. Required to make sense from disassembled + * TLB exception handlers. + */ +static void output_pgtable_bits_defines(void) +{ +#define pr_define(fmt, ...) \ + pr_debug("#define " fmt, ##__VA_ARGS__) + + pr_debug("#include <asm/asm.h>\n"); + pr_debug("#include <asm/regdef.h>\n"); + pr_debug("\n"); + + pr_define("_PAGE_PRESENT_SHIFT %d\n", _PAGE_PRESENT_SHIFT); + pr_define("_PAGE_NO_READ_SHIFT %d\n", _PAGE_NO_READ_SHIFT); + pr_define("_PAGE_WRITE_SHIFT %d\n", _PAGE_WRITE_SHIFT); + pr_define("_PAGE_ACCESSED_SHIFT %d\n", _PAGE_ACCESSED_SHIFT); + pr_define("_PAGE_MODIFIED_SHIFT %d\n", _PAGE_MODIFIED_SHIFT); +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT + pr_define("_PAGE_HUGE_SHIFT %d\n", _PAGE_HUGE_SHIFT); +#endif +#ifdef _PAGE_NO_EXEC_SHIFT + if (cpu_has_rixi) + pr_define("_PAGE_NO_EXEC_SHIFT %d\n", _PAGE_NO_EXEC_SHIFT); +#endif + pr_define("_PAGE_GLOBAL_SHIFT %d\n", _PAGE_GLOBAL_SHIFT); + pr_define("_PAGE_VALID_SHIFT %d\n", _PAGE_VALID_SHIFT); + pr_define("_PAGE_DIRTY_SHIFT %d\n", _PAGE_DIRTY_SHIFT); + pr_define("_PFN_SHIFT %d\n", _PFN_SHIFT); + pr_debug("\n"); +} + +static inline void dump_handler(const char *symbol, const void *start, const void *end) +{ + unsigned int count = (end - start) / sizeof(u32); + const u32 *handler = start; + int i; + + pr_debug("LEAF(%s)\n", symbol); + + pr_debug("\t.set push\n"); + pr_debug("\t.set noreorder\n"); + + for (i = 0; i < count; i++) + pr_debug("\t.word\t0x%08x\t\t# %p\n", handler[i], &handler[i]); + + pr_debug("\t.set\tpop\n"); + + pr_debug("\tEND(%s)\n", symbol); +} + +/* The only general purpose registers allowed in TLB handlers. */ +#define K0 26 +#define K1 27 + +/* Some CP0 registers */ +#define C0_INDEX 0, 0 +#define C0_ENTRYLO0 2, 0 +#define C0_TCBIND 2, 2 +#define C0_ENTRYLO1 3, 0 +#define C0_CONTEXT 4, 0 +#define C0_PAGEMASK 5, 0 +#define C0_PWBASE 5, 5 +#define C0_PWFIELD 5, 6 +#define C0_PWSIZE 5, 7 +#define C0_PWCTL 6, 6 +#define C0_BADVADDR 8, 0 +#define C0_PGD 9, 7 +#define C0_ENTRYHI 10, 0 +#define C0_EPC 14, 0 +#define C0_XCONTEXT 20, 0 + +#ifdef CONFIG_64BIT +# define GET_CONTEXT(buf, reg) UASM_i_MFC0(buf, reg, C0_XCONTEXT) +#else +# define GET_CONTEXT(buf, reg) UASM_i_MFC0(buf, reg, C0_CONTEXT) +#endif + +/* The worst case length of the handler is around 18 instructions for + * R3000-style TLBs and up to 63 instructions for R4000-style TLBs. + * Maximum space available is 32 instructions for R3000 and 64 + * instructions for R4000. + * + * We deliberately chose a buffer size of 128, so we won't scribble + * over anything important on overflow before we panic. + */ +static u32 tlb_handler[128]; + +/* simply assume worst case size for labels and relocs */ +static struct uasm_label labels[128]; +static struct uasm_reloc relocs[128]; + +static int check_for_high_segbits; +static bool fill_includes_sw_bits; + +static unsigned int kscratch_used_mask; + +static inline int __maybe_unused c0_kscratch(void) +{ + return 31; +} + +static int allocate_kscratch(void) +{ + int r; + unsigned int a = cpu_data[0].kscratch_mask & ~kscratch_used_mask; + + r = ffs(a); + + if (r == 0) + return -1; + + r--; /* make it zero based */ + + kscratch_used_mask |= (1 << r); + + return r; +} + +static int scratch_reg; +int pgd_reg; +EXPORT_SYMBOL_GPL(pgd_reg); +enum vmalloc64_mode {not_refill, refill_scratch, refill_noscratch}; + +static struct work_registers build_get_work_registers(u32 **p) +{ + struct work_registers r; + + if (scratch_reg >= 0) { + /* Save in CPU local C0_KScratch? */ + UASM_i_MTC0(p, 1, c0_kscratch(), scratch_reg); + r.r1 = K0; + r.r2 = K1; + r.r3 = 1; + return r; + } + + if (num_possible_cpus() > 1) { + /* Get smp_processor_id */ + UASM_i_CPUID_MFC0(p, K0, SMP_CPUID_REG); + UASM_i_SRL_SAFE(p, K0, K0, SMP_CPUID_REGSHIFT); + + /* handler_reg_save index in K0 */ + UASM_i_SLL(p, K0, K0, ilog2(sizeof(struct tlb_reg_save))); + + UASM_i_LA(p, K1, (long)&handler_reg_save); + UASM_i_ADDU(p, K0, K0, K1); + } else { + UASM_i_LA(p, K0, (long)&handler_reg_save); + } + /* K0 now points to save area, save $1 and $2 */ + UASM_i_SW(p, 1, offsetof(struct tlb_reg_save, a), K0); + UASM_i_SW(p, 2, offsetof(struct tlb_reg_save, b), K0); + + r.r1 = K1; + r.r2 = 1; + r.r3 = 2; + return r; +} + +static void build_restore_work_registers(u32 **p) +{ + if (scratch_reg >= 0) { + uasm_i_ehb(p); + UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); + return; + } + /* K0 already points to save area, restore $1 and $2 */ + UASM_i_LW(p, 1, offsetof(struct tlb_reg_save, a), K0); + UASM_i_LW(p, 2, offsetof(struct tlb_reg_save, b), K0); +} + +#ifndef CONFIG_MIPS_PGD_C0_CONTEXT + +/* + * CONFIG_MIPS_PGD_C0_CONTEXT implies 64 bit and lack of pgd_current, + * we cannot do r3000 under these circumstances. + * + * The R3000 TLB handler is simple. + */ +static void build_r3000_tlb_refill_handler(void) +{ + long pgdc = (long)pgd_current; + u32 *p; + + memset(tlb_handler, 0, sizeof(tlb_handler)); + p = tlb_handler; + + uasm_i_mfc0(&p, K0, C0_BADVADDR); + uasm_i_lui(&p, K1, uasm_rel_hi(pgdc)); /* cp0 delay */ + uasm_i_lw(&p, K1, uasm_rel_lo(pgdc), K1); + uasm_i_srl(&p, K0, K0, 22); /* load delay */ + uasm_i_sll(&p, K0, K0, 2); + uasm_i_addu(&p, K1, K1, K0); + uasm_i_mfc0(&p, K0, C0_CONTEXT); + uasm_i_lw(&p, K1, 0, K1); /* cp0 delay */ + uasm_i_andi(&p, K0, K0, 0xffc); /* load delay */ + uasm_i_addu(&p, K1, K1, K0); + uasm_i_lw(&p, K0, 0, K1); + uasm_i_nop(&p); /* load delay */ + uasm_i_mtc0(&p, K0, C0_ENTRYLO0); + uasm_i_mfc0(&p, K1, C0_EPC); /* cp0 delay */ + uasm_i_tlbwr(&p); /* cp0 delay */ + uasm_i_jr(&p, K1); + uasm_i_rfe(&p); /* branch delay */ + + if (p > tlb_handler + 32) + panic("TLB refill handler space exceeded"); + + pr_debug("Wrote TLB refill handler (%u instructions).\n", + (unsigned int)(p - tlb_handler)); + + memcpy((void *)ebase, tlb_handler, 0x80); + local_flush_icache_range(ebase, ebase + 0x80); + dump_handler("r3000_tlb_refill", (u32 *)ebase, (u32 *)(ebase + 0x80)); +} +#endif /* CONFIG_MIPS_PGD_C0_CONTEXT */ + +/* + * The R4000 TLB handler is much more complicated. We have two + * consecutive handler areas with 32 instructions space each. + * Since they aren't used at the same time, we can overflow in the + * other one.To keep things simple, we first assume linear space, + * then we relocate it to the final handler layout as needed. + */ +static u32 final_handler[64]; + +/* + * Hazards + * + * From the IDT errata for the QED RM5230 (Nevada), processor revision 1.0: + * 2. A timing hazard exists for the TLBP instruction. + * + * stalling_instruction + * TLBP + * + * The JTLB is being read for the TLBP throughout the stall generated by the + * previous instruction. This is not really correct as the stalling instruction + * can modify the address used to access the JTLB. The failure symptom is that + * the TLBP instruction will use an address created for the stalling instruction + * and not the address held in C0_ENHI and thus report the wrong results. + * + * The software work-around is to not allow the instruction preceding the TLBP + * to stall - make it an NOP or some other instruction guaranteed not to stall. + * + * Errata 2 will not be fixed. This errata is also on the R5000. + * + * As if we MIPS hackers wouldn't know how to nop pipelines happy ... + */ +static void __maybe_unused build_tlb_probe_entry(u32 **p) +{ + switch (current_cpu_type()) { + /* Found by experiment: R4600 v2.0/R4700 needs this, too. */ + case CPU_R4600: + case CPU_R4700: + case CPU_R5000: + case CPU_NEVADA: + uasm_i_nop(p); + uasm_i_tlbp(p); + break; + + default: + uasm_i_tlbp(p); + break; + } +} + +void build_tlb_write_entry(u32 **p, struct uasm_label **l, + struct uasm_reloc **r, + enum tlb_write_entry wmode) +{ + void(*tlbw)(u32 **) = NULL; + + switch (wmode) { + case tlb_random: tlbw = uasm_i_tlbwr; break; + case tlb_indexed: tlbw = uasm_i_tlbwi; break; + } + + if (cpu_has_mips_r2_r6) { + if (cpu_has_mips_r2_exec_hazard) + uasm_i_ehb(p); + tlbw(p); + return; + } + + switch (current_cpu_type()) { + case CPU_R4000PC: + case CPU_R4000SC: + case CPU_R4000MC: + case CPU_R4400PC: + case CPU_R4400SC: + case CPU_R4400MC: + /* + * This branch uses up a mtc0 hazard nop slot and saves + * two nops after the tlbw instruction. + */ + uasm_bgezl_hazard(p, r, hazard_instance); + tlbw(p); + uasm_bgezl_label(l, p, hazard_instance); + hazard_instance++; + uasm_i_nop(p); + break; + + case CPU_R4600: + case CPU_R4700: + uasm_i_nop(p); + tlbw(p); + uasm_i_nop(p); + break; + + case CPU_R5000: + case CPU_NEVADA: + uasm_i_nop(p); /* QED specifies 2 nops hazard */ + uasm_i_nop(p); /* QED specifies 2 nops hazard */ + tlbw(p); + break; + + case CPU_R4300: + case CPU_5KC: + case CPU_TX49XX: + case CPU_PR4450: + uasm_i_nop(p); + tlbw(p); + break; + + case CPU_R10000: + case CPU_R12000: + case CPU_R14000: + case CPU_R16000: + case CPU_4KC: + case CPU_4KEC: + case CPU_M14KC: + case CPU_M14KEC: + case CPU_SB1: + case CPU_SB1A: + case CPU_4KSC: + case CPU_20KC: + case CPU_25KF: + case CPU_BMIPS32: + case CPU_BMIPS3300: + case CPU_BMIPS4350: + case CPU_BMIPS4380: + case CPU_BMIPS5000: + case CPU_LOONGSON2EF: + case CPU_LOONGSON64: + case CPU_R5500: + if (m4kc_tlbp_war()) + uasm_i_nop(p); + fallthrough; + case CPU_ALCHEMY: + tlbw(p); + break; + + case CPU_RM7000: + uasm_i_nop(p); + uasm_i_nop(p); + uasm_i_nop(p); + uasm_i_nop(p); + tlbw(p); + break; + + case CPU_XBURST: + tlbw(p); + uasm_i_nop(p); + break; + + default: + panic("No TLB refill handler yet (CPU type: %d)", + current_cpu_type()); + break; + } +} +EXPORT_SYMBOL_GPL(build_tlb_write_entry); + +static __maybe_unused void build_convert_pte_to_entrylo(u32 **p, + unsigned int reg) +{ + if (_PAGE_GLOBAL_SHIFT == 0) { + /* pte_t is already in EntryLo format */ + return; + } + + if (cpu_has_rixi && _PAGE_NO_EXEC != 0) { + if (fill_includes_sw_bits) { + UASM_i_ROTR(p, reg, reg, ilog2(_PAGE_GLOBAL)); + } else { + UASM_i_SRL(p, reg, reg, ilog2(_PAGE_NO_EXEC)); + UASM_i_ROTR(p, reg, reg, + ilog2(_PAGE_GLOBAL) - ilog2(_PAGE_NO_EXEC)); + } + } else { +#ifdef CONFIG_PHYS_ADDR_T_64BIT + uasm_i_dsrl_safe(p, reg, reg, ilog2(_PAGE_GLOBAL)); +#else + UASM_i_SRL(p, reg, reg, ilog2(_PAGE_GLOBAL)); +#endif + } +} + +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT + +static void build_restore_pagemask(u32 **p, struct uasm_reloc **r, + unsigned int tmp, enum label_id lid, + int restore_scratch) +{ + if (restore_scratch) { + /* + * Ensure the MFC0 below observes the value written to the + * KScratch register by the prior MTC0. + */ + if (scratch_reg >= 0) + uasm_i_ehb(p); + + /* Reset default page size */ + if (PM_DEFAULT_MASK >> 16) { + uasm_i_lui(p, tmp, PM_DEFAULT_MASK >> 16); + uasm_i_ori(p, tmp, tmp, PM_DEFAULT_MASK & 0xffff); + uasm_i_mtc0(p, tmp, C0_PAGEMASK); + uasm_il_b(p, r, lid); + } else if (PM_DEFAULT_MASK) { + uasm_i_ori(p, tmp, 0, PM_DEFAULT_MASK); + uasm_i_mtc0(p, tmp, C0_PAGEMASK); + uasm_il_b(p, r, lid); + } else { + uasm_i_mtc0(p, 0, C0_PAGEMASK); + uasm_il_b(p, r, lid); + } + if (scratch_reg >= 0) + UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); + else + UASM_i_LW(p, 1, scratchpad_offset(0), 0); + } else { + /* Reset default page size */ + if (PM_DEFAULT_MASK >> 16) { + uasm_i_lui(p, tmp, PM_DEFAULT_MASK >> 16); + uasm_i_ori(p, tmp, tmp, PM_DEFAULT_MASK & 0xffff); + uasm_il_b(p, r, lid); + uasm_i_mtc0(p, tmp, C0_PAGEMASK); + } else if (PM_DEFAULT_MASK) { + uasm_i_ori(p, tmp, 0, PM_DEFAULT_MASK); + uasm_il_b(p, r, lid); + uasm_i_mtc0(p, tmp, C0_PAGEMASK); + } else { + uasm_il_b(p, r, lid); + uasm_i_mtc0(p, 0, C0_PAGEMASK); + } + } +} + +static void build_huge_tlb_write_entry(u32 **p, struct uasm_label **l, + struct uasm_reloc **r, + unsigned int tmp, + enum tlb_write_entry wmode, + int restore_scratch) +{ + /* Set huge page tlb entry size */ + uasm_i_lui(p, tmp, PM_HUGE_MASK >> 16); + uasm_i_ori(p, tmp, tmp, PM_HUGE_MASK & 0xffff); + uasm_i_mtc0(p, tmp, C0_PAGEMASK); + + build_tlb_write_entry(p, l, r, wmode); + + build_restore_pagemask(p, r, tmp, label_leave, restore_scratch); +} + +/* + * Check if Huge PTE is present, if so then jump to LABEL. + */ +static void +build_is_huge_pte(u32 **p, struct uasm_reloc **r, unsigned int tmp, + unsigned int pmd, int lid) +{ + UASM_i_LW(p, tmp, 0, pmd); + if (use_bbit_insns()) { + uasm_il_bbit1(p, r, tmp, ilog2(_PAGE_HUGE), lid); + } else { + uasm_i_andi(p, tmp, tmp, _PAGE_HUGE); + uasm_il_bnez(p, r, tmp, lid); + } +} + +static void build_huge_update_entries(u32 **p, unsigned int pte, + unsigned int tmp) +{ + int small_sequence; + + /* + * A huge PTE describes an area the size of the + * configured huge page size. This is twice the + * of the large TLB entry size we intend to use. + * A TLB entry half the size of the configured + * huge page size is configured into entrylo0 + * and entrylo1 to cover the contiguous huge PTE + * address space. + */ + small_sequence = (HPAGE_SIZE >> 7) < 0x10000; + + /* We can clobber tmp. It isn't used after this.*/ + if (!small_sequence) + uasm_i_lui(p, tmp, HPAGE_SIZE >> (7 + 16)); + + build_convert_pte_to_entrylo(p, pte); + UASM_i_MTC0(p, pte, C0_ENTRYLO0); /* load it */ + /* convert to entrylo1 */ + if (small_sequence) + UASM_i_ADDIU(p, pte, pte, HPAGE_SIZE >> 7); + else + UASM_i_ADDU(p, pte, pte, tmp); + + UASM_i_MTC0(p, pte, C0_ENTRYLO1); /* load it */ +} + +static void build_huge_handler_tail(u32 **p, struct uasm_reloc **r, + struct uasm_label **l, + unsigned int pte, + unsigned int ptr, + unsigned int flush) +{ +#ifdef CONFIG_SMP + UASM_i_SC(p, pte, 0, ptr); + uasm_il_beqz(p, r, pte, label_tlb_huge_update); + UASM_i_LW(p, pte, 0, ptr); /* Needed because SC killed our PTE */ +#else + UASM_i_SW(p, pte, 0, ptr); +#endif + if (cpu_has_ftlb && flush) { + BUG_ON(!cpu_has_tlbinv); + + UASM_i_MFC0(p, ptr, C0_ENTRYHI); + uasm_i_ori(p, ptr, ptr, MIPS_ENTRYHI_EHINV); + UASM_i_MTC0(p, ptr, C0_ENTRYHI); + build_tlb_write_entry(p, l, r, tlb_indexed); + + uasm_i_xori(p, ptr, ptr, MIPS_ENTRYHI_EHINV); + UASM_i_MTC0(p, ptr, C0_ENTRYHI); + build_huge_update_entries(p, pte, ptr); + build_huge_tlb_write_entry(p, l, r, pte, tlb_random, 0); + + return; + } + + build_huge_update_entries(p, pte, ptr); + build_huge_tlb_write_entry(p, l, r, pte, tlb_indexed, 0); +} +#endif /* CONFIG_MIPS_HUGE_TLB_SUPPORT */ + +#ifdef CONFIG_64BIT +/* + * TMP and PTR are scratch. + * TMP will be clobbered, PTR will hold the pmd entry. + */ +void build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, + unsigned int tmp, unsigned int ptr) +{ +#ifndef CONFIG_MIPS_PGD_C0_CONTEXT + long pgdc = (long)pgd_current; +#endif + /* + * The vmalloc handling is not in the hotpath. + */ + uasm_i_dmfc0(p, tmp, C0_BADVADDR); + + if (check_for_high_segbits) { + /* + * The kernel currently implicitely assumes that the + * MIPS SEGBITS parameter for the processor is + * (PGDIR_SHIFT+PGDIR_BITS) or less, and will never + * allocate virtual addresses outside the maximum + * range for SEGBITS = (PGDIR_SHIFT+PGDIR_BITS). But + * that doesn't prevent user code from accessing the + * higher xuseg addresses. Here, we make sure that + * everything but the lower xuseg addresses goes down + * the module_alloc/vmalloc path. + */ + uasm_i_dsrl_safe(p, ptr, tmp, PGDIR_SHIFT + PGD_TABLE_ORDER + PAGE_SHIFT - 3); + uasm_il_bnez(p, r, ptr, label_vmalloc); + } else { + uasm_il_bltz(p, r, tmp, label_vmalloc); + } + /* No uasm_i_nop needed here, since the next insn doesn't touch TMP. */ + + if (pgd_reg != -1) { + /* pgd is in pgd_reg */ + if (cpu_has_ldpte) + UASM_i_MFC0(p, ptr, C0_PWBASE); + else + UASM_i_MFC0(p, ptr, c0_kscratch(), pgd_reg); + } else { +#if defined(CONFIG_MIPS_PGD_C0_CONTEXT) + /* + * &pgd << 11 stored in CONTEXT [23..63]. + */ + UASM_i_MFC0(p, ptr, C0_CONTEXT); + + /* Clear lower 23 bits of context. */ + uasm_i_dins(p, ptr, 0, 0, 23); + + /* insert bit[63:59] of CAC_BASE into bit[11:6] of ptr */ + uasm_i_ori(p, ptr, ptr, ((u64)(CAC_BASE) >> 53)); + uasm_i_drotr(p, ptr, ptr, 11); +#elif defined(CONFIG_SMP) + UASM_i_CPUID_MFC0(p, ptr, SMP_CPUID_REG); + uasm_i_dsrl_safe(p, ptr, ptr, SMP_CPUID_PTRSHIFT); + UASM_i_LA_mostly(p, tmp, pgdc); + uasm_i_daddu(p, ptr, ptr, tmp); + uasm_i_dmfc0(p, tmp, C0_BADVADDR); + uasm_i_ld(p, ptr, uasm_rel_lo(pgdc), ptr); +#else + UASM_i_LA_mostly(p, ptr, pgdc); + uasm_i_ld(p, ptr, uasm_rel_lo(pgdc), ptr); +#endif + } + + uasm_l_vmalloc_done(l, *p); + + /* get pgd offset in bytes */ + uasm_i_dsrl_safe(p, tmp, tmp, PGDIR_SHIFT - 3); + + uasm_i_andi(p, tmp, tmp, (PTRS_PER_PGD - 1)<<3); + uasm_i_daddu(p, ptr, ptr, tmp); /* add in pgd offset */ +#ifndef __PAGETABLE_PUD_FOLDED + uasm_i_dmfc0(p, tmp, C0_BADVADDR); /* get faulting address */ + uasm_i_ld(p, ptr, 0, ptr); /* get pud pointer */ + uasm_i_dsrl_safe(p, tmp, tmp, PUD_SHIFT - 3); /* get pud offset in bytes */ + uasm_i_andi(p, tmp, tmp, (PTRS_PER_PUD - 1) << 3); + uasm_i_daddu(p, ptr, ptr, tmp); /* add in pud offset */ +#endif +#ifndef __PAGETABLE_PMD_FOLDED + uasm_i_dmfc0(p, tmp, C0_BADVADDR); /* get faulting address */ + uasm_i_ld(p, ptr, 0, ptr); /* get pmd pointer */ + uasm_i_dsrl_safe(p, tmp, tmp, PMD_SHIFT-3); /* get pmd offset in bytes */ + uasm_i_andi(p, tmp, tmp, (PTRS_PER_PMD - 1)<<3); + uasm_i_daddu(p, ptr, ptr, tmp); /* add in pmd offset */ +#endif +} +EXPORT_SYMBOL_GPL(build_get_pmde64); + +/* + * BVADDR is the faulting address, PTR is scratch. + * PTR will hold the pgd for vmalloc. + */ +static void +build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, + unsigned int bvaddr, unsigned int ptr, + enum vmalloc64_mode mode) +{ + long swpd = (long)swapper_pg_dir; + int single_insn_swpd; + int did_vmalloc_branch = 0; + + single_insn_swpd = uasm_in_compat_space_p(swpd) && !uasm_rel_lo(swpd); + + uasm_l_vmalloc(l, *p); + + if (mode != not_refill && check_for_high_segbits) { + if (single_insn_swpd) { + uasm_il_bltz(p, r, bvaddr, label_vmalloc_done); + uasm_i_lui(p, ptr, uasm_rel_hi(swpd)); + did_vmalloc_branch = 1; + /* fall through */ + } else { + uasm_il_bgez(p, r, bvaddr, label_large_segbits_fault); + } + } + if (!did_vmalloc_branch) { + if (single_insn_swpd) { + uasm_il_b(p, r, label_vmalloc_done); + uasm_i_lui(p, ptr, uasm_rel_hi(swpd)); + } else { + UASM_i_LA_mostly(p, ptr, swpd); + uasm_il_b(p, r, label_vmalloc_done); + if (uasm_in_compat_space_p(swpd)) + uasm_i_addiu(p, ptr, ptr, uasm_rel_lo(swpd)); + else + uasm_i_daddiu(p, ptr, ptr, uasm_rel_lo(swpd)); + } + } + if (mode != not_refill && check_for_high_segbits) { + uasm_l_large_segbits_fault(l, *p); + + if (mode == refill_scratch && scratch_reg >= 0) + uasm_i_ehb(p); + + /* + * We get here if we are an xsseg address, or if we are + * an xuseg address above (PGDIR_SHIFT+PGDIR_BITS) boundary. + * + * Ignoring xsseg (assume disabled so would generate + * (address errors?), the only remaining possibility + * is the upper xuseg addresses. On processors with + * TLB_SEGBITS <= PGDIR_SHIFT+PGDIR_BITS, these + * addresses would have taken an address error. We try + * to mimic that here by taking a load/istream page + * fault. + */ + if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS)) + uasm_i_sync(p, 0); + UASM_i_LA(p, ptr, (unsigned long)tlb_do_page_fault_0); + uasm_i_jr(p, ptr); + + if (mode == refill_scratch) { + if (scratch_reg >= 0) + UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); + else + UASM_i_LW(p, 1, scratchpad_offset(0), 0); + } else { + uasm_i_nop(p); + } + } +} + +#else /* !CONFIG_64BIT */ + +/* + * TMP and PTR are scratch. + * TMP will be clobbered, PTR will hold the pgd entry. + */ +void build_get_pgde32(u32 **p, unsigned int tmp, unsigned int ptr) +{ + if (pgd_reg != -1) { + /* pgd is in pgd_reg */ + uasm_i_mfc0(p, ptr, c0_kscratch(), pgd_reg); + uasm_i_mfc0(p, tmp, C0_BADVADDR); /* get faulting address */ + } else { + long pgdc = (long)pgd_current; + + /* 32 bit SMP has smp_processor_id() stored in CONTEXT. */ +#ifdef CONFIG_SMP + uasm_i_mfc0(p, ptr, SMP_CPUID_REG); + UASM_i_LA_mostly(p, tmp, pgdc); + uasm_i_srl(p, ptr, ptr, SMP_CPUID_PTRSHIFT); + uasm_i_addu(p, ptr, tmp, ptr); +#else + UASM_i_LA_mostly(p, ptr, pgdc); +#endif + uasm_i_mfc0(p, tmp, C0_BADVADDR); /* get faulting address */ + uasm_i_lw(p, ptr, uasm_rel_lo(pgdc), ptr); + } + uasm_i_srl(p, tmp, tmp, PGDIR_SHIFT); /* get pgd only bits */ + uasm_i_sll(p, tmp, tmp, PGD_T_LOG2); + uasm_i_addu(p, ptr, ptr, tmp); /* add in pgd offset */ +} +EXPORT_SYMBOL_GPL(build_get_pgde32); + +#endif /* !CONFIG_64BIT */ + +static void build_adjust_context(u32 **p, unsigned int ctx) +{ + unsigned int shift = 4 - (PTE_T_LOG2 + 1) + PAGE_SHIFT - 12; + unsigned int mask = (PTRS_PER_PTE / 2 - 1) << (PTE_T_LOG2 + 1); + + if (shift) + UASM_i_SRL(p, ctx, ctx, shift); + uasm_i_andi(p, ctx, ctx, mask); +} + +void build_get_ptep(u32 **p, unsigned int tmp, unsigned int ptr) +{ + /* + * Bug workaround for the Nevada. It seems as if under certain + * circumstances the move from cp0_context might produce a + * bogus result when the mfc0 instruction and its consumer are + * in a different cacheline or a load instruction, probably any + * memory reference, is between them. + */ + switch (current_cpu_type()) { + case CPU_NEVADA: + UASM_i_LW(p, ptr, 0, ptr); + GET_CONTEXT(p, tmp); /* get context reg */ + break; + + default: + GET_CONTEXT(p, tmp); /* get context reg */ + UASM_i_LW(p, ptr, 0, ptr); + break; + } + + build_adjust_context(p, tmp); + UASM_i_ADDU(p, ptr, ptr, tmp); /* add in offset */ +} +EXPORT_SYMBOL_GPL(build_get_ptep); + +void build_update_entries(u32 **p, unsigned int tmp, unsigned int ptep) +{ + int pte_off_even = 0; + int pte_off_odd = sizeof(pte_t); + +#if defined(CONFIG_CPU_MIPS32) && defined(CONFIG_PHYS_ADDR_T_64BIT) + /* The low 32 bits of EntryLo is stored in pte_high */ + pte_off_even += offsetof(pte_t, pte_high); + pte_off_odd += offsetof(pte_t, pte_high); +#endif + + if (IS_ENABLED(CONFIG_XPA)) { + uasm_i_lw(p, tmp, pte_off_even, ptep); /* even pte */ + UASM_i_ROTR(p, tmp, tmp, ilog2(_PAGE_GLOBAL)); + UASM_i_MTC0(p, tmp, C0_ENTRYLO0); + + if (cpu_has_xpa && !mips_xpa_disabled) { + uasm_i_lw(p, tmp, 0, ptep); + uasm_i_ext(p, tmp, tmp, 0, 24); + uasm_i_mthc0(p, tmp, C0_ENTRYLO0); + } + + uasm_i_lw(p, tmp, pte_off_odd, ptep); /* odd pte */ + UASM_i_ROTR(p, tmp, tmp, ilog2(_PAGE_GLOBAL)); + UASM_i_MTC0(p, tmp, C0_ENTRYLO1); + + if (cpu_has_xpa && !mips_xpa_disabled) { + uasm_i_lw(p, tmp, sizeof(pte_t), ptep); + uasm_i_ext(p, tmp, tmp, 0, 24); + uasm_i_mthc0(p, tmp, C0_ENTRYLO1); + } + return; + } + + UASM_i_LW(p, tmp, pte_off_even, ptep); /* get even pte */ + UASM_i_LW(p, ptep, pte_off_odd, ptep); /* get odd pte */ + if (r45k_bvahwbug()) + build_tlb_probe_entry(p); + build_convert_pte_to_entrylo(p, tmp); + if (r4k_250MHZhwbug()) + UASM_i_MTC0(p, 0, C0_ENTRYLO0); + UASM_i_MTC0(p, tmp, C0_ENTRYLO0); /* load it */ + build_convert_pte_to_entrylo(p, ptep); + if (r45k_bvahwbug()) + uasm_i_mfc0(p, tmp, C0_INDEX); + if (r4k_250MHZhwbug()) + UASM_i_MTC0(p, 0, C0_ENTRYLO1); + UASM_i_MTC0(p, ptep, C0_ENTRYLO1); /* load it */ +} +EXPORT_SYMBOL_GPL(build_update_entries); + +struct mips_huge_tlb_info { + int huge_pte; + int restore_scratch; + bool need_reload_pte; +}; + +static struct mips_huge_tlb_info +build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l, + struct uasm_reloc **r, unsigned int tmp, + unsigned int ptr, int c0_scratch_reg) +{ + struct mips_huge_tlb_info rv; + unsigned int even, odd; + int vmalloc_branch_delay_filled = 0; + const int scratch = 1; /* Our extra working register */ + + rv.huge_pte = scratch; + rv.restore_scratch = 0; + rv.need_reload_pte = false; + + if (check_for_high_segbits) { + UASM_i_MFC0(p, tmp, C0_BADVADDR); + + if (pgd_reg != -1) + UASM_i_MFC0(p, ptr, c0_kscratch(), pgd_reg); + else + UASM_i_MFC0(p, ptr, C0_CONTEXT); + + if (c0_scratch_reg >= 0) + UASM_i_MTC0(p, scratch, c0_kscratch(), c0_scratch_reg); + else + UASM_i_SW(p, scratch, scratchpad_offset(0), 0); + + uasm_i_dsrl_safe(p, scratch, tmp, + PGDIR_SHIFT + PGD_TABLE_ORDER + PAGE_SHIFT - 3); + uasm_il_bnez(p, r, scratch, label_vmalloc); + + if (pgd_reg == -1) { + vmalloc_branch_delay_filled = 1; + /* Clear lower 23 bits of context. */ + uasm_i_dins(p, ptr, 0, 0, 23); + } + } else { + if (pgd_reg != -1) + UASM_i_MFC0(p, ptr, c0_kscratch(), pgd_reg); + else + UASM_i_MFC0(p, ptr, C0_CONTEXT); + + UASM_i_MFC0(p, tmp, C0_BADVADDR); + + if (c0_scratch_reg >= 0) + UASM_i_MTC0(p, scratch, c0_kscratch(), c0_scratch_reg); + else + UASM_i_SW(p, scratch, scratchpad_offset(0), 0); + + if (pgd_reg == -1) + /* Clear lower 23 bits of context. */ + uasm_i_dins(p, ptr, 0, 0, 23); + + uasm_il_bltz(p, r, tmp, label_vmalloc); + } + + if (pgd_reg == -1) { + vmalloc_branch_delay_filled = 1; + /* insert bit[63:59] of CAC_BASE into bit[11:6] of ptr */ + uasm_i_ori(p, ptr, ptr, ((u64)(CAC_BASE) >> 53)); + + uasm_i_drotr(p, ptr, ptr, 11); + } + +#ifdef __PAGETABLE_PMD_FOLDED +#define LOC_PTEP scratch +#else +#define LOC_PTEP ptr +#endif + + if (!vmalloc_branch_delay_filled) + /* get pgd offset in bytes */ + uasm_i_dsrl_safe(p, scratch, tmp, PGDIR_SHIFT - 3); + + uasm_l_vmalloc_done(l, *p); + + /* + * tmp ptr + * fall-through case = badvaddr *pgd_current + * vmalloc case = badvaddr swapper_pg_dir + */ + + if (vmalloc_branch_delay_filled) + /* get pgd offset in bytes */ + uasm_i_dsrl_safe(p, scratch, tmp, PGDIR_SHIFT - 3); + +#ifdef __PAGETABLE_PMD_FOLDED + GET_CONTEXT(p, tmp); /* get context reg */ +#endif + uasm_i_andi(p, scratch, scratch, (PTRS_PER_PGD - 1) << 3); + + if (use_lwx_insns()) { + UASM_i_LWX(p, LOC_PTEP, scratch, ptr); + } else { + uasm_i_daddu(p, ptr, ptr, scratch); /* add in pgd offset */ + uasm_i_ld(p, LOC_PTEP, 0, ptr); /* get pmd pointer */ + } + +#ifndef __PAGETABLE_PUD_FOLDED + /* get pud offset in bytes */ + uasm_i_dsrl_safe(p, scratch, tmp, PUD_SHIFT - 3); + uasm_i_andi(p, scratch, scratch, (PTRS_PER_PUD - 1) << 3); + + if (use_lwx_insns()) { + UASM_i_LWX(p, ptr, scratch, ptr); + } else { + uasm_i_daddu(p, ptr, ptr, scratch); /* add in pmd offset */ + UASM_i_LW(p, ptr, 0, ptr); + } + /* ptr contains a pointer to PMD entry */ + /* tmp contains the address */ +#endif + +#ifndef __PAGETABLE_PMD_FOLDED + /* get pmd offset in bytes */ + uasm_i_dsrl_safe(p, scratch, tmp, PMD_SHIFT - 3); + uasm_i_andi(p, scratch, scratch, (PTRS_PER_PMD - 1) << 3); + GET_CONTEXT(p, tmp); /* get context reg */ + + if (use_lwx_insns()) { + UASM_i_LWX(p, scratch, scratch, ptr); + } else { + uasm_i_daddu(p, ptr, ptr, scratch); /* add in pmd offset */ + UASM_i_LW(p, scratch, 0, ptr); + } +#endif + /* Adjust the context during the load latency. */ + build_adjust_context(p, tmp); + +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT + uasm_il_bbit1(p, r, scratch, ilog2(_PAGE_HUGE), label_tlb_huge_update); + /* + * The in the LWX case we don't want to do the load in the + * delay slot. It cannot issue in the same cycle and may be + * speculative and unneeded. + */ + if (use_lwx_insns()) + uasm_i_nop(p); +#endif /* CONFIG_MIPS_HUGE_TLB_SUPPORT */ + + + /* build_update_entries */ + if (use_lwx_insns()) { + even = ptr; + odd = tmp; + UASM_i_LWX(p, even, scratch, tmp); + UASM_i_ADDIU(p, tmp, tmp, sizeof(pte_t)); + UASM_i_LWX(p, odd, scratch, tmp); + } else { + UASM_i_ADDU(p, ptr, scratch, tmp); /* add in offset */ + even = tmp; + odd = ptr; + UASM_i_LW(p, even, 0, ptr); /* get even pte */ + UASM_i_LW(p, odd, sizeof(pte_t), ptr); /* get odd pte */ + } + if (cpu_has_rixi) { + uasm_i_drotr(p, even, even, ilog2(_PAGE_GLOBAL)); + UASM_i_MTC0(p, even, C0_ENTRYLO0); /* load it */ + uasm_i_drotr(p, odd, odd, ilog2(_PAGE_GLOBAL)); + } else { + uasm_i_dsrl_safe(p, even, even, ilog2(_PAGE_GLOBAL)); + UASM_i_MTC0(p, even, C0_ENTRYLO0); /* load it */ + uasm_i_dsrl_safe(p, odd, odd, ilog2(_PAGE_GLOBAL)); + } + UASM_i_MTC0(p, odd, C0_ENTRYLO1); /* load it */ + + if (c0_scratch_reg >= 0) { + uasm_i_ehb(p); + UASM_i_MFC0(p, scratch, c0_kscratch(), c0_scratch_reg); + build_tlb_write_entry(p, l, r, tlb_random); + uasm_l_leave(l, *p); + rv.restore_scratch = 1; + } else if (PAGE_SHIFT == 14 || PAGE_SHIFT == 13) { + build_tlb_write_entry(p, l, r, tlb_random); + uasm_l_leave(l, *p); + UASM_i_LW(p, scratch, scratchpad_offset(0), 0); + } else { + UASM_i_LW(p, scratch, scratchpad_offset(0), 0); + build_tlb_write_entry(p, l, r, tlb_random); + uasm_l_leave(l, *p); + rv.restore_scratch = 1; + } + + uasm_i_eret(p); /* return from trap */ + + return rv; +} + +/* + * For a 64-bit kernel, we are using the 64-bit XTLB refill exception + * because EXL == 0. If we wrap, we can also use the 32 instruction + * slots before the XTLB refill exception handler which belong to the + * unused TLB refill exception. + */ +#define MIPS64_REFILL_INSNS 32 + +static void build_r4000_tlb_refill_handler(void) +{ + u32 *p = tlb_handler; + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; + u32 *f; + unsigned int final_len; + struct mips_huge_tlb_info htlb_info __maybe_unused; + enum vmalloc64_mode vmalloc_mode __maybe_unused; + + memset(tlb_handler, 0, sizeof(tlb_handler)); + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + memset(final_handler, 0, sizeof(final_handler)); + + if (IS_ENABLED(CONFIG_64BIT) && (scratch_reg >= 0 || scratchpad_available()) && use_bbit_insns()) { + htlb_info = build_fast_tlb_refill_handler(&p, &l, &r, K0, K1, + scratch_reg); + vmalloc_mode = refill_scratch; + } else { + htlb_info.huge_pte = K0; + htlb_info.restore_scratch = 0; + htlb_info.need_reload_pte = true; + vmalloc_mode = refill_noscratch; + /* + * create the plain linear handler + */ + if (bcm1250_m3_war()) { + unsigned int segbits = 44; + + uasm_i_dmfc0(&p, K0, C0_BADVADDR); + uasm_i_dmfc0(&p, K1, C0_ENTRYHI); + uasm_i_xor(&p, K0, K0, K1); + uasm_i_dsrl_safe(&p, K1, K0, 62); + uasm_i_dsrl_safe(&p, K0, K0, 12 + 1); + uasm_i_dsll_safe(&p, K0, K0, 64 + 12 + 1 - segbits); + uasm_i_or(&p, K0, K0, K1); + uasm_il_bnez(&p, &r, K0, label_leave); + /* No need for uasm_i_nop */ + } + +#ifdef CONFIG_64BIT + build_get_pmde64(&p, &l, &r, K0, K1); /* get pmd in K1 */ +#else + build_get_pgde32(&p, K0, K1); /* get pgd in K1 */ +#endif + +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT + build_is_huge_pte(&p, &r, K0, K1, label_tlb_huge_update); +#endif + + build_get_ptep(&p, K0, K1); + build_update_entries(&p, K0, K1); + build_tlb_write_entry(&p, &l, &r, tlb_random); + uasm_l_leave(&l, p); + uasm_i_eret(&p); /* return from trap */ + } +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT + uasm_l_tlb_huge_update(&l, p); + if (htlb_info.need_reload_pte) + UASM_i_LW(&p, htlb_info.huge_pte, 0, K1); + build_huge_update_entries(&p, htlb_info.huge_pte, K1); + build_huge_tlb_write_entry(&p, &l, &r, K0, tlb_random, + htlb_info.restore_scratch); +#endif + +#ifdef CONFIG_64BIT + build_get_pgd_vmalloc64(&p, &l, &r, K0, K1, vmalloc_mode); +#endif + + /* + * Overflow check: For the 64bit handler, we need at least one + * free instruction slot for the wrap-around branch. In worst + * case, if the intended insertion point is a delay slot, we + * need three, with the second nop'ed and the third being + * unused. + */ + switch (boot_cpu_type()) { + default: + if (sizeof(long) == 4) { + fallthrough; + case CPU_LOONGSON2EF: + /* Loongson2 ebase is different than r4k, we have more space */ + if ((p - tlb_handler) > 64) + panic("TLB refill handler space exceeded"); + /* + * Now fold the handler in the TLB refill handler space. + */ + f = final_handler; + /* Simplest case, just copy the handler. */ + uasm_copy_handler(relocs, labels, tlb_handler, p, f); + final_len = p - tlb_handler; + break; + } else { + if (((p - tlb_handler) > (MIPS64_REFILL_INSNS * 2) - 1) + || (((p - tlb_handler) > (MIPS64_REFILL_INSNS * 2) - 3) + && uasm_insn_has_bdelay(relocs, + tlb_handler + MIPS64_REFILL_INSNS - 3))) + panic("TLB refill handler space exceeded"); + /* + * Now fold the handler in the TLB refill handler space. + */ + f = final_handler + MIPS64_REFILL_INSNS; + if ((p - tlb_handler) <= MIPS64_REFILL_INSNS) { + /* Just copy the handler. */ + uasm_copy_handler(relocs, labels, tlb_handler, p, f); + final_len = p - tlb_handler; + } else { +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT + const enum label_id ls = label_tlb_huge_update; +#else + const enum label_id ls = label_vmalloc; +#endif + u32 *split; + int ov = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(labels) && labels[i].lab != ls; i++) + ; + BUG_ON(i == ARRAY_SIZE(labels)); + split = labels[i].addr; + + /* + * See if we have overflown one way or the other. + */ + if (split > tlb_handler + MIPS64_REFILL_INSNS || + split < p - MIPS64_REFILL_INSNS) + ov = 1; + + if (ov) { + /* + * Split two instructions before the end. One + * for the branch and one for the instruction + * in the delay slot. + */ + split = tlb_handler + MIPS64_REFILL_INSNS - 2; + + /* + * If the branch would fall in a delay slot, + * we must back up an additional instruction + * so that it is no longer in a delay slot. + */ + if (uasm_insn_has_bdelay(relocs, split - 1)) + split--; + } + /* Copy first part of the handler. */ + uasm_copy_handler(relocs, labels, tlb_handler, split, f); + f += split - tlb_handler; + + if (ov) { + /* Insert branch. */ + uasm_l_split(&l, final_handler); + uasm_il_b(&f, &r, label_split); + if (uasm_insn_has_bdelay(relocs, split)) + uasm_i_nop(&f); + else { + uasm_copy_handler(relocs, labels, + split, split + 1, f); + uasm_move_labels(labels, f, f + 1, -1); + f++; + split++; + } + } + + /* Copy the rest of the handler. */ + uasm_copy_handler(relocs, labels, split, p, final_handler); + final_len = (f - (final_handler + MIPS64_REFILL_INSNS)) + + (p - split); + } + } + break; + } + + uasm_resolve_relocs(relocs, labels); + pr_debug("Wrote TLB refill handler (%u instructions).\n", + final_len); + + memcpy((void *)ebase, final_handler, 0x100); + local_flush_icache_range(ebase, ebase + 0x100); + dump_handler("r4000_tlb_refill", (u32 *)ebase, (u32 *)(ebase + 0x100)); +} + +static void setup_pw(void) +{ + unsigned int pwctl; + unsigned long pgd_i, pgd_w; +#ifndef __PAGETABLE_PMD_FOLDED + unsigned long pmd_i, pmd_w; +#endif + unsigned long pt_i, pt_w; + unsigned long pte_i, pte_w; +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT + unsigned long psn; + + psn = ilog2(_PAGE_HUGE); /* bit used to indicate huge page */ +#endif + pgd_i = PGDIR_SHIFT; /* 1st level PGD */ +#ifndef __PAGETABLE_PMD_FOLDED + pgd_w = PGDIR_SHIFT - PMD_SHIFT + PGD_TABLE_ORDER; + + pmd_i = PMD_SHIFT; /* 2nd level PMD */ + pmd_w = PMD_SHIFT - PAGE_SHIFT; +#else + pgd_w = PGDIR_SHIFT - PAGE_SHIFT + PGD_TABLE_ORDER; +#endif + + pt_i = PAGE_SHIFT; /* 3rd level PTE */ + pt_w = PAGE_SHIFT - 3; + + pte_i = ilog2(_PAGE_GLOBAL); + pte_w = 0; + pwctl = 1 << 30; /* Set PWDirExt */ + +#ifndef __PAGETABLE_PMD_FOLDED + write_c0_pwfield(pgd_i << 24 | pmd_i << 12 | pt_i << 6 | pte_i); + write_c0_pwsize(1 << 30 | pgd_w << 24 | pmd_w << 12 | pt_w << 6 | pte_w); +#else + write_c0_pwfield(pgd_i << 24 | pt_i << 6 | pte_i); + write_c0_pwsize(1 << 30 | pgd_w << 24 | pt_w << 6 | pte_w); +#endif + +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT + pwctl |= (1 << 6 | psn); +#endif + write_c0_pwctl(pwctl); + write_c0_kpgd((long)swapper_pg_dir); + kscratch_used_mask |= (1 << 7); /* KScratch6 is used for KPGD */ +} + +static void build_loongson3_tlb_refill_handler(void) +{ + u32 *p = tlb_handler; + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; + + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + memset(tlb_handler, 0, sizeof(tlb_handler)); + + if (check_for_high_segbits) { + uasm_i_dmfc0(&p, K0, C0_BADVADDR); + uasm_i_dsrl_safe(&p, K1, K0, PGDIR_SHIFT + PGD_TABLE_ORDER + PAGE_SHIFT - 3); + uasm_il_beqz(&p, &r, K1, label_vmalloc); + uasm_i_nop(&p); + + uasm_il_bgez(&p, &r, K0, label_large_segbits_fault); + uasm_i_nop(&p); + uasm_l_vmalloc(&l, p); + } + + uasm_i_dmfc0(&p, K1, C0_PGD); + + uasm_i_lddir(&p, K0, K1, 3); /* global page dir */ +#ifndef __PAGETABLE_PMD_FOLDED + uasm_i_lddir(&p, K1, K0, 1); /* middle page dir */ +#endif + uasm_i_ldpte(&p, K1, 0); /* even */ + uasm_i_ldpte(&p, K1, 1); /* odd */ + uasm_i_tlbwr(&p); + + /* restore page mask */ + if (PM_DEFAULT_MASK >> 16) { + uasm_i_lui(&p, K0, PM_DEFAULT_MASK >> 16); + uasm_i_ori(&p, K0, K0, PM_DEFAULT_MASK & 0xffff); + uasm_i_mtc0(&p, K0, C0_PAGEMASK); + } else if (PM_DEFAULT_MASK) { + uasm_i_ori(&p, K0, 0, PM_DEFAULT_MASK); + uasm_i_mtc0(&p, K0, C0_PAGEMASK); + } else { + uasm_i_mtc0(&p, 0, C0_PAGEMASK); + } + + uasm_i_eret(&p); + + if (check_for_high_segbits) { + uasm_l_large_segbits_fault(&l, p); + UASM_i_LA(&p, K1, (unsigned long)tlb_do_page_fault_0); + uasm_i_jr(&p, K1); + uasm_i_nop(&p); + } + + uasm_resolve_relocs(relocs, labels); + memcpy((void *)(ebase + 0x80), tlb_handler, 0x80); + local_flush_icache_range(ebase + 0x80, ebase + 0x100); + dump_handler("loongson3_tlb_refill", + (u32 *)(ebase + 0x80), (u32 *)(ebase + 0x100)); +} + +static void build_setup_pgd(void) +{ + const int a0 = 4; + const int __maybe_unused a1 = 5; + const int __maybe_unused a2 = 6; + u32 *p = (u32 *)msk_isa16_mode((ulong)tlbmiss_handler_setup_pgd); +#ifndef CONFIG_MIPS_PGD_C0_CONTEXT + long pgdc = (long)pgd_current; +#endif + + memset(p, 0, tlbmiss_handler_setup_pgd_end - (char *)p); + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + pgd_reg = allocate_kscratch(); +#ifdef CONFIG_MIPS_PGD_C0_CONTEXT + if (pgd_reg == -1) { + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; + + /* PGD << 11 in c0_Context */ + /* + * If it is a ckseg0 address, convert to a physical + * address. Shifting right by 29 and adding 4 will + * result in zero for these addresses. + * + */ + UASM_i_SRA(&p, a1, a0, 29); + UASM_i_ADDIU(&p, a1, a1, 4); + uasm_il_bnez(&p, &r, a1, label_tlbl_goaround1); + uasm_i_nop(&p); + uasm_i_dinsm(&p, a0, 0, 29, 64 - 29); + uasm_l_tlbl_goaround1(&l, p); + UASM_i_SLL(&p, a0, a0, 11); + UASM_i_MTC0(&p, a0, C0_CONTEXT); + uasm_i_jr(&p, 31); + uasm_i_ehb(&p); + } else { + /* PGD in c0_KScratch */ + if (cpu_has_ldpte) + UASM_i_MTC0(&p, a0, C0_PWBASE); + else + UASM_i_MTC0(&p, a0, c0_kscratch(), pgd_reg); + uasm_i_jr(&p, 31); + uasm_i_ehb(&p); + } +#else +#ifdef CONFIG_SMP + /* Save PGD to pgd_current[smp_processor_id()] */ + UASM_i_CPUID_MFC0(&p, a1, SMP_CPUID_REG); + UASM_i_SRL_SAFE(&p, a1, a1, SMP_CPUID_PTRSHIFT); + UASM_i_LA_mostly(&p, a2, pgdc); + UASM_i_ADDU(&p, a2, a2, a1); + UASM_i_SW(&p, a0, uasm_rel_lo(pgdc), a2); +#else + UASM_i_LA_mostly(&p, a2, pgdc); + UASM_i_SW(&p, a0, uasm_rel_lo(pgdc), a2); +#endif /* SMP */ + + /* if pgd_reg is allocated, save PGD also to scratch register */ + if (pgd_reg != -1) { + UASM_i_MTC0(&p, a0, c0_kscratch(), pgd_reg); + uasm_i_jr(&p, 31); + uasm_i_ehb(&p); + } else { + uasm_i_jr(&p, 31); + uasm_i_nop(&p); + } +#endif + if (p >= (u32 *)tlbmiss_handler_setup_pgd_end) + panic("tlbmiss_handler_setup_pgd space exceeded"); + + uasm_resolve_relocs(relocs, labels); + pr_debug("Wrote tlbmiss_handler_setup_pgd (%u instructions).\n", + (unsigned int)(p - (u32 *)tlbmiss_handler_setup_pgd)); + + dump_handler("tlbmiss_handler", tlbmiss_handler_setup_pgd, + tlbmiss_handler_setup_pgd_end); +} + +static void +iPTE_LW(u32 **p, unsigned int pte, unsigned int ptr) +{ +#ifdef CONFIG_SMP + if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS)) + uasm_i_sync(p, 0); +# ifdef CONFIG_PHYS_ADDR_T_64BIT + if (cpu_has_64bits) + uasm_i_lld(p, pte, 0, ptr); + else +# endif + UASM_i_LL(p, pte, 0, ptr); +#else +# ifdef CONFIG_PHYS_ADDR_T_64BIT + if (cpu_has_64bits) + uasm_i_ld(p, pte, 0, ptr); + else +# endif + UASM_i_LW(p, pte, 0, ptr); +#endif +} + +static void +iPTE_SW(u32 **p, struct uasm_reloc **r, unsigned int pte, unsigned int ptr, + unsigned int mode, unsigned int scratch) +{ + unsigned int hwmode = mode & (_PAGE_VALID | _PAGE_DIRTY); + unsigned int swmode = mode & ~hwmode; + + if (IS_ENABLED(CONFIG_XPA) && !cpu_has_64bits) { + uasm_i_lui(p, scratch, swmode >> 16); + uasm_i_or(p, pte, pte, scratch); + BUG_ON(swmode & 0xffff); + } else { + uasm_i_ori(p, pte, pte, mode); + } + +#ifdef CONFIG_SMP +# ifdef CONFIG_PHYS_ADDR_T_64BIT + if (cpu_has_64bits) + uasm_i_scd(p, pte, 0, ptr); + else +# endif + UASM_i_SC(p, pte, 0, ptr); + + if (r10000_llsc_war()) + uasm_il_beqzl(p, r, pte, label_smp_pgtable_change); + else + uasm_il_beqz(p, r, pte, label_smp_pgtable_change); + +# ifdef CONFIG_PHYS_ADDR_T_64BIT + if (!cpu_has_64bits) { + /* no uasm_i_nop needed */ + uasm_i_ll(p, pte, sizeof(pte_t) / 2, ptr); + uasm_i_ori(p, pte, pte, hwmode); + BUG_ON(hwmode & ~0xffff); + uasm_i_sc(p, pte, sizeof(pte_t) / 2, ptr); + uasm_il_beqz(p, r, pte, label_smp_pgtable_change); + /* no uasm_i_nop needed */ + uasm_i_lw(p, pte, 0, ptr); + } else + uasm_i_nop(p); +# else + uasm_i_nop(p); +# endif +#else +# ifdef CONFIG_PHYS_ADDR_T_64BIT + if (cpu_has_64bits) + uasm_i_sd(p, pte, 0, ptr); + else +# endif + UASM_i_SW(p, pte, 0, ptr); + +# ifdef CONFIG_PHYS_ADDR_T_64BIT + if (!cpu_has_64bits) { + uasm_i_lw(p, pte, sizeof(pte_t) / 2, ptr); + uasm_i_ori(p, pte, pte, hwmode); + BUG_ON(hwmode & ~0xffff); + uasm_i_sw(p, pte, sizeof(pte_t) / 2, ptr); + uasm_i_lw(p, pte, 0, ptr); + } +# endif +#endif +} + +/* + * Check if PTE is present, if not then jump to LABEL. PTR points to + * the page table where this PTE is located, PTE will be re-loaded + * with it's original value. + */ +static void +build_pte_present(u32 **p, struct uasm_reloc **r, + int pte, int ptr, int scratch, enum label_id lid) +{ + int t = scratch >= 0 ? scratch : pte; + int cur = pte; + + if (cpu_has_rixi) { + if (use_bbit_insns()) { + uasm_il_bbit0(p, r, pte, ilog2(_PAGE_PRESENT), lid); + uasm_i_nop(p); + } else { + if (_PAGE_PRESENT_SHIFT) { + uasm_i_srl(p, t, cur, _PAGE_PRESENT_SHIFT); + cur = t; + } + uasm_i_andi(p, t, cur, 1); + uasm_il_beqz(p, r, t, lid); + if (pte == t) + /* You lose the SMP race :-(*/ + iPTE_LW(p, pte, ptr); + } + } else { + if (_PAGE_PRESENT_SHIFT) { + uasm_i_srl(p, t, cur, _PAGE_PRESENT_SHIFT); + cur = t; + } + uasm_i_andi(p, t, cur, + (_PAGE_PRESENT | _PAGE_NO_READ) >> _PAGE_PRESENT_SHIFT); + uasm_i_xori(p, t, t, _PAGE_PRESENT >> _PAGE_PRESENT_SHIFT); + uasm_il_bnez(p, r, t, lid); + if (pte == t) + /* You lose the SMP race :-(*/ + iPTE_LW(p, pte, ptr); + } +} + +/* Make PTE valid, store result in PTR. */ +static void +build_make_valid(u32 **p, struct uasm_reloc **r, unsigned int pte, + unsigned int ptr, unsigned int scratch) +{ + unsigned int mode = _PAGE_VALID | _PAGE_ACCESSED; + + iPTE_SW(p, r, pte, ptr, mode, scratch); +} + +/* + * Check if PTE can be written to, if not branch to LABEL. Regardless + * restore PTE with value from PTR when done. + */ +static void +build_pte_writable(u32 **p, struct uasm_reloc **r, + unsigned int pte, unsigned int ptr, int scratch, + enum label_id lid) +{ + int t = scratch >= 0 ? scratch : pte; + int cur = pte; + + if (_PAGE_PRESENT_SHIFT) { + uasm_i_srl(p, t, cur, _PAGE_PRESENT_SHIFT); + cur = t; + } + uasm_i_andi(p, t, cur, + (_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT); + uasm_i_xori(p, t, t, + (_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT); + uasm_il_bnez(p, r, t, lid); + if (pte == t) + /* You lose the SMP race :-(*/ + iPTE_LW(p, pte, ptr); + else + uasm_i_nop(p); +} + +/* Make PTE writable, update software status bits as well, then store + * at PTR. + */ +static void +build_make_write(u32 **p, struct uasm_reloc **r, unsigned int pte, + unsigned int ptr, unsigned int scratch) +{ + unsigned int mode = (_PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID + | _PAGE_DIRTY); + + iPTE_SW(p, r, pte, ptr, mode, scratch); +} + +/* + * Check if PTE can be modified, if not branch to LABEL. Regardless + * restore PTE with value from PTR when done. + */ +static void +build_pte_modifiable(u32 **p, struct uasm_reloc **r, + unsigned int pte, unsigned int ptr, int scratch, + enum label_id lid) +{ + if (use_bbit_insns()) { + uasm_il_bbit0(p, r, pte, ilog2(_PAGE_WRITE), lid); + uasm_i_nop(p); + } else { + int t = scratch >= 0 ? scratch : pte; + uasm_i_srl(p, t, pte, _PAGE_WRITE_SHIFT); + uasm_i_andi(p, t, t, 1); + uasm_il_beqz(p, r, t, lid); + if (pte == t) + /* You lose the SMP race :-(*/ + iPTE_LW(p, pte, ptr); + } +} + +#ifndef CONFIG_MIPS_PGD_C0_CONTEXT + + +/* + * R3000 style TLB load/store/modify handlers. + */ + +/* + * This places the pte into ENTRYLO0 and writes it with tlbwi. + * Then it returns. + */ +static void +build_r3000_pte_reload_tlbwi(u32 **p, unsigned int pte, unsigned int tmp) +{ + uasm_i_mtc0(p, pte, C0_ENTRYLO0); /* cp0 delay */ + uasm_i_mfc0(p, tmp, C0_EPC); /* cp0 delay */ + uasm_i_tlbwi(p); + uasm_i_jr(p, tmp); + uasm_i_rfe(p); /* branch delay */ +} + +/* + * This places the pte into ENTRYLO0 and writes it with tlbwi + * or tlbwr as appropriate. This is because the index register + * may have the probe fail bit set as a result of a trap on a + * kseg2 access, i.e. without refill. Then it returns. + */ +static void +build_r3000_tlb_reload_write(u32 **p, struct uasm_label **l, + struct uasm_reloc **r, unsigned int pte, + unsigned int tmp) +{ + uasm_i_mfc0(p, tmp, C0_INDEX); + uasm_i_mtc0(p, pte, C0_ENTRYLO0); /* cp0 delay */ + uasm_il_bltz(p, r, tmp, label_r3000_write_probe_fail); /* cp0 delay */ + uasm_i_mfc0(p, tmp, C0_EPC); /* branch delay */ + uasm_i_tlbwi(p); /* cp0 delay */ + uasm_i_jr(p, tmp); + uasm_i_rfe(p); /* branch delay */ + uasm_l_r3000_write_probe_fail(l, *p); + uasm_i_tlbwr(p); /* cp0 delay */ + uasm_i_jr(p, tmp); + uasm_i_rfe(p); /* branch delay */ +} + +static void +build_r3000_tlbchange_handler_head(u32 **p, unsigned int pte, + unsigned int ptr) +{ + long pgdc = (long)pgd_current; + + uasm_i_mfc0(p, pte, C0_BADVADDR); + uasm_i_lui(p, ptr, uasm_rel_hi(pgdc)); /* cp0 delay */ + uasm_i_lw(p, ptr, uasm_rel_lo(pgdc), ptr); + uasm_i_srl(p, pte, pte, 22); /* load delay */ + uasm_i_sll(p, pte, pte, 2); + uasm_i_addu(p, ptr, ptr, pte); + uasm_i_mfc0(p, pte, C0_CONTEXT); + uasm_i_lw(p, ptr, 0, ptr); /* cp0 delay */ + uasm_i_andi(p, pte, pte, 0xffc); /* load delay */ + uasm_i_addu(p, ptr, ptr, pte); + uasm_i_lw(p, pte, 0, ptr); + uasm_i_tlbp(p); /* load delay */ +} + +static void build_r3000_tlb_load_handler(void) +{ + u32 *p = (u32 *)handle_tlbl; + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; + + memset(p, 0, handle_tlbl_end - (char *)p); + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + + build_r3000_tlbchange_handler_head(&p, K0, K1); + build_pte_present(&p, &r, K0, K1, -1, label_nopage_tlbl); + uasm_i_nop(&p); /* load delay */ + build_make_valid(&p, &r, K0, K1, -1); + build_r3000_tlb_reload_write(&p, &l, &r, K0, K1); + + uasm_l_nopage_tlbl(&l, p); + uasm_i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff); + uasm_i_nop(&p); + + if (p >= (u32 *)handle_tlbl_end) + panic("TLB load handler fastpath space exceeded"); + + uasm_resolve_relocs(relocs, labels); + pr_debug("Wrote TLB load handler fastpath (%u instructions).\n", + (unsigned int)(p - (u32 *)handle_tlbl)); + + dump_handler("r3000_tlb_load", handle_tlbl, handle_tlbl_end); +} + +static void build_r3000_tlb_store_handler(void) +{ + u32 *p = (u32 *)handle_tlbs; + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; + + memset(p, 0, handle_tlbs_end - (char *)p); + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + + build_r3000_tlbchange_handler_head(&p, K0, K1); + build_pte_writable(&p, &r, K0, K1, -1, label_nopage_tlbs); + uasm_i_nop(&p); /* load delay */ + build_make_write(&p, &r, K0, K1, -1); + build_r3000_tlb_reload_write(&p, &l, &r, K0, K1); + + uasm_l_nopage_tlbs(&l, p); + uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); + uasm_i_nop(&p); + + if (p >= (u32 *)handle_tlbs_end) + panic("TLB store handler fastpath space exceeded"); + + uasm_resolve_relocs(relocs, labels); + pr_debug("Wrote TLB store handler fastpath (%u instructions).\n", + (unsigned int)(p - (u32 *)handle_tlbs)); + + dump_handler("r3000_tlb_store", handle_tlbs, handle_tlbs_end); +} + +static void build_r3000_tlb_modify_handler(void) +{ + u32 *p = (u32 *)handle_tlbm; + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; + + memset(p, 0, handle_tlbm_end - (char *)p); + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + + build_r3000_tlbchange_handler_head(&p, K0, K1); + build_pte_modifiable(&p, &r, K0, K1, -1, label_nopage_tlbm); + uasm_i_nop(&p); /* load delay */ + build_make_write(&p, &r, K0, K1, -1); + build_r3000_pte_reload_tlbwi(&p, K0, K1); + + uasm_l_nopage_tlbm(&l, p); + uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); + uasm_i_nop(&p); + + if (p >= (u32 *)handle_tlbm_end) + panic("TLB modify handler fastpath space exceeded"); + + uasm_resolve_relocs(relocs, labels); + pr_debug("Wrote TLB modify handler fastpath (%u instructions).\n", + (unsigned int)(p - (u32 *)handle_tlbm)); + + dump_handler("r3000_tlb_modify", handle_tlbm, handle_tlbm_end); +} +#endif /* CONFIG_MIPS_PGD_C0_CONTEXT */ + +static bool cpu_has_tlbex_tlbp_race(void) +{ + /* + * When a Hardware Table Walker is running it can replace TLB entries + * at any time, leading to a race between it & the CPU. + */ + if (cpu_has_htw) + return true; + + /* + * If the CPU shares FTLB RAM with its siblings then our entry may be + * replaced at any time by a sibling performing a write to the FTLB. + */ + if (cpu_has_shared_ftlb_ram) + return true; + + /* In all other cases there ought to be no race condition to handle */ + return false; +} + +/* + * R4000 style TLB load/store/modify handlers. + */ +static struct work_registers +build_r4000_tlbchange_handler_head(u32 **p, struct uasm_label **l, + struct uasm_reloc **r) +{ + struct work_registers wr = build_get_work_registers(p); + +#ifdef CONFIG_64BIT + build_get_pmde64(p, l, r, wr.r1, wr.r2); /* get pmd in ptr */ +#else + build_get_pgde32(p, wr.r1, wr.r2); /* get pgd in ptr */ +#endif + +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT + /* + * For huge tlb entries, pmd doesn't contain an address but + * instead contains the tlb pte. Check the PAGE_HUGE bit and + * see if we need to jump to huge tlb processing. + */ + build_is_huge_pte(p, r, wr.r1, wr.r2, label_tlb_huge_update); +#endif + + UASM_i_MFC0(p, wr.r1, C0_BADVADDR); + UASM_i_LW(p, wr.r2, 0, wr.r2); + UASM_i_SRL(p, wr.r1, wr.r1, PAGE_SHIFT - PTE_T_LOG2); + uasm_i_andi(p, wr.r1, wr.r1, (PTRS_PER_PTE - 1) << PTE_T_LOG2); + UASM_i_ADDU(p, wr.r2, wr.r2, wr.r1); + +#ifdef CONFIG_SMP + uasm_l_smp_pgtable_change(l, *p); +#endif + iPTE_LW(p, wr.r1, wr.r2); /* get even pte */ + if (!m4kc_tlbp_war()) { + build_tlb_probe_entry(p); + if (cpu_has_tlbex_tlbp_race()) { + /* race condition happens, leaving */ + uasm_i_ehb(p); + uasm_i_mfc0(p, wr.r3, C0_INDEX); + uasm_il_bltz(p, r, wr.r3, label_leave); + uasm_i_nop(p); + } + } + return wr; +} + +static void +build_r4000_tlbchange_handler_tail(u32 **p, struct uasm_label **l, + struct uasm_reloc **r, unsigned int tmp, + unsigned int ptr) +{ + uasm_i_ori(p, ptr, ptr, sizeof(pte_t)); + uasm_i_xori(p, ptr, ptr, sizeof(pte_t)); + build_update_entries(p, tmp, ptr); + build_tlb_write_entry(p, l, r, tlb_indexed); + uasm_l_leave(l, *p); + build_restore_work_registers(p); + uasm_i_eret(p); /* return from trap */ + +#ifdef CONFIG_64BIT + build_get_pgd_vmalloc64(p, l, r, tmp, ptr, not_refill); +#endif +} + +static void build_r4000_tlb_load_handler(void) +{ + u32 *p = (u32 *)msk_isa16_mode((ulong)handle_tlbl); + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; + struct work_registers wr; + + memset(p, 0, handle_tlbl_end - (char *)p); + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + + if (bcm1250_m3_war()) { + unsigned int segbits = 44; + + uasm_i_dmfc0(&p, K0, C0_BADVADDR); + uasm_i_dmfc0(&p, K1, C0_ENTRYHI); + uasm_i_xor(&p, K0, K0, K1); + uasm_i_dsrl_safe(&p, K1, K0, 62); + uasm_i_dsrl_safe(&p, K0, K0, 12 + 1); + uasm_i_dsll_safe(&p, K0, K0, 64 + 12 + 1 - segbits); + uasm_i_or(&p, K0, K0, K1); + uasm_il_bnez(&p, &r, K0, label_leave); + /* No need for uasm_i_nop */ + } + + wr = build_r4000_tlbchange_handler_head(&p, &l, &r); + build_pte_present(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbl); + if (m4kc_tlbp_war()) + build_tlb_probe_entry(&p); + + if (cpu_has_rixi && !cpu_has_rixiex) { + /* + * If the page is not _PAGE_VALID, RI or XI could not + * have triggered it. Skip the expensive test.. + */ + if (use_bbit_insns()) { + uasm_il_bbit0(&p, &r, wr.r1, ilog2(_PAGE_VALID), + label_tlbl_goaround1); + } else { + uasm_i_andi(&p, wr.r3, wr.r1, _PAGE_VALID); + uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround1); + } + uasm_i_nop(&p); + + /* + * Warn if something may race with us & replace the TLB entry + * before we read it here. Everything with such races should + * also have dedicated RiXi exception handlers, so this + * shouldn't be hit. + */ + WARN(cpu_has_tlbex_tlbp_race(), "Unhandled race in RiXi path"); + + uasm_i_tlbr(&p); + + switch (current_cpu_type()) { + case CPU_CAVIUM_OCTEON: + case CPU_CAVIUM_OCTEON_PLUS: + case CPU_CAVIUM_OCTEON2: + break; + default: + if (cpu_has_mips_r2_exec_hazard) + uasm_i_ehb(&p); + break; + } + + /* Examine entrylo 0 or 1 based on ptr. */ + if (use_bbit_insns()) { + uasm_i_bbit0(&p, wr.r2, ilog2(sizeof(pte_t)), 8); + } else { + uasm_i_andi(&p, wr.r3, wr.r2, sizeof(pte_t)); + uasm_i_beqz(&p, wr.r3, 8); + } + /* load it in the delay slot*/ + UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO0); + /* load it if ptr is odd */ + UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO1); + /* + * If the entryLo (now in wr.r3) is valid (bit 1), RI or + * XI must have triggered it. + */ + if (use_bbit_insns()) { + uasm_il_bbit1(&p, &r, wr.r3, 1, label_nopage_tlbl); + uasm_i_nop(&p); + uasm_l_tlbl_goaround1(&l, p); + } else { + uasm_i_andi(&p, wr.r3, wr.r3, 2); + uasm_il_bnez(&p, &r, wr.r3, label_nopage_tlbl); + uasm_i_nop(&p); + } + uasm_l_tlbl_goaround1(&l, p); + } + build_make_valid(&p, &r, wr.r1, wr.r2, wr.r3); + build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2); + +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT + /* + * This is the entry point when build_r4000_tlbchange_handler_head + * spots a huge page. + */ + uasm_l_tlb_huge_update(&l, p); + iPTE_LW(&p, wr.r1, wr.r2); + build_pte_present(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbl); + build_tlb_probe_entry(&p); + + if (cpu_has_rixi && !cpu_has_rixiex) { + /* + * If the page is not _PAGE_VALID, RI or XI could not + * have triggered it. Skip the expensive test.. + */ + if (use_bbit_insns()) { + uasm_il_bbit0(&p, &r, wr.r1, ilog2(_PAGE_VALID), + label_tlbl_goaround2); + } else { + uasm_i_andi(&p, wr.r3, wr.r1, _PAGE_VALID); + uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround2); + } + uasm_i_nop(&p); + + /* + * Warn if something may race with us & replace the TLB entry + * before we read it here. Everything with such races should + * also have dedicated RiXi exception handlers, so this + * shouldn't be hit. + */ + WARN(cpu_has_tlbex_tlbp_race(), "Unhandled race in RiXi path"); + + uasm_i_tlbr(&p); + + switch (current_cpu_type()) { + case CPU_CAVIUM_OCTEON: + case CPU_CAVIUM_OCTEON_PLUS: + case CPU_CAVIUM_OCTEON2: + break; + default: + if (cpu_has_mips_r2_exec_hazard) + uasm_i_ehb(&p); + break; + } + + /* Examine entrylo 0 or 1 based on ptr. */ + if (use_bbit_insns()) { + uasm_i_bbit0(&p, wr.r2, ilog2(sizeof(pte_t)), 8); + } else { + uasm_i_andi(&p, wr.r3, wr.r2, sizeof(pte_t)); + uasm_i_beqz(&p, wr.r3, 8); + } + /* load it in the delay slot*/ + UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO0); + /* load it if ptr is odd */ + UASM_i_MFC0(&p, wr.r3, C0_ENTRYLO1); + /* + * If the entryLo (now in wr.r3) is valid (bit 1), RI or + * XI must have triggered it. + */ + if (use_bbit_insns()) { + uasm_il_bbit0(&p, &r, wr.r3, 1, label_tlbl_goaround2); + } else { + uasm_i_andi(&p, wr.r3, wr.r3, 2); + uasm_il_beqz(&p, &r, wr.r3, label_tlbl_goaround2); + } + if (PM_DEFAULT_MASK == 0) + uasm_i_nop(&p); + /* + * We clobbered C0_PAGEMASK, restore it. On the other branch + * it is restored in build_huge_tlb_write_entry. + */ + build_restore_pagemask(&p, &r, wr.r3, label_nopage_tlbl, 0); + + uasm_l_tlbl_goaround2(&l, p); + } + uasm_i_ori(&p, wr.r1, wr.r1, (_PAGE_ACCESSED | _PAGE_VALID)); + build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2, 1); +#endif + + uasm_l_nopage_tlbl(&l, p); + if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS)) + uasm_i_sync(&p, 0); + build_restore_work_registers(&p); +#ifdef CONFIG_CPU_MICROMIPS + if ((unsigned long)tlb_do_page_fault_0 & 1) { + uasm_i_lui(&p, K0, uasm_rel_hi((long)tlb_do_page_fault_0)); + uasm_i_addiu(&p, K0, K0, uasm_rel_lo((long)tlb_do_page_fault_0)); + uasm_i_jr(&p, K0); + } else +#endif + uasm_i_j(&p, (unsigned long)tlb_do_page_fault_0 & 0x0fffffff); + uasm_i_nop(&p); + + if (p >= (u32 *)handle_tlbl_end) + panic("TLB load handler fastpath space exceeded"); + + uasm_resolve_relocs(relocs, labels); + pr_debug("Wrote TLB load handler fastpath (%u instructions).\n", + (unsigned int)(p - (u32 *)handle_tlbl)); + + dump_handler("r4000_tlb_load", handle_tlbl, handle_tlbl_end); +} + +static void build_r4000_tlb_store_handler(void) +{ + u32 *p = (u32 *)msk_isa16_mode((ulong)handle_tlbs); + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; + struct work_registers wr; + + memset(p, 0, handle_tlbs_end - (char *)p); + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + + wr = build_r4000_tlbchange_handler_head(&p, &l, &r); + build_pte_writable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbs); + if (m4kc_tlbp_war()) + build_tlb_probe_entry(&p); + build_make_write(&p, &r, wr.r1, wr.r2, wr.r3); + build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2); + +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT + /* + * This is the entry point when + * build_r4000_tlbchange_handler_head spots a huge page. + */ + uasm_l_tlb_huge_update(&l, p); + iPTE_LW(&p, wr.r1, wr.r2); + build_pte_writable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbs); + build_tlb_probe_entry(&p); + uasm_i_ori(&p, wr.r1, wr.r1, + _PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY); + build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2, 1); +#endif + + uasm_l_nopage_tlbs(&l, p); + if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS)) + uasm_i_sync(&p, 0); + build_restore_work_registers(&p); +#ifdef CONFIG_CPU_MICROMIPS + if ((unsigned long)tlb_do_page_fault_1 & 1) { + uasm_i_lui(&p, K0, uasm_rel_hi((long)tlb_do_page_fault_1)); + uasm_i_addiu(&p, K0, K0, uasm_rel_lo((long)tlb_do_page_fault_1)); + uasm_i_jr(&p, K0); + } else +#endif + uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); + uasm_i_nop(&p); + + if (p >= (u32 *)handle_tlbs_end) + panic("TLB store handler fastpath space exceeded"); + + uasm_resolve_relocs(relocs, labels); + pr_debug("Wrote TLB store handler fastpath (%u instructions).\n", + (unsigned int)(p - (u32 *)handle_tlbs)); + + dump_handler("r4000_tlb_store", handle_tlbs, handle_tlbs_end); +} + +static void build_r4000_tlb_modify_handler(void) +{ + u32 *p = (u32 *)msk_isa16_mode((ulong)handle_tlbm); + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; + struct work_registers wr; + + memset(p, 0, handle_tlbm_end - (char *)p); + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + + wr = build_r4000_tlbchange_handler_head(&p, &l, &r); + build_pte_modifiable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbm); + if (m4kc_tlbp_war()) + build_tlb_probe_entry(&p); + /* Present and writable bits set, set accessed and dirty bits. */ + build_make_write(&p, &r, wr.r1, wr.r2, wr.r3); + build_r4000_tlbchange_handler_tail(&p, &l, &r, wr.r1, wr.r2); + +#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT + /* + * This is the entry point when + * build_r4000_tlbchange_handler_head spots a huge page. + */ + uasm_l_tlb_huge_update(&l, p); + iPTE_LW(&p, wr.r1, wr.r2); + build_pte_modifiable(&p, &r, wr.r1, wr.r2, wr.r3, label_nopage_tlbm); + build_tlb_probe_entry(&p); + uasm_i_ori(&p, wr.r1, wr.r1, + _PAGE_ACCESSED | _PAGE_MODIFIED | _PAGE_VALID | _PAGE_DIRTY); + build_huge_handler_tail(&p, &r, &l, wr.r1, wr.r2, 0); +#endif + + uasm_l_nopage_tlbm(&l, p); + if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS)) + uasm_i_sync(&p, 0); + build_restore_work_registers(&p); +#ifdef CONFIG_CPU_MICROMIPS + if ((unsigned long)tlb_do_page_fault_1 & 1) { + uasm_i_lui(&p, K0, uasm_rel_hi((long)tlb_do_page_fault_1)); + uasm_i_addiu(&p, K0, K0, uasm_rel_lo((long)tlb_do_page_fault_1)); + uasm_i_jr(&p, K0); + } else +#endif + uasm_i_j(&p, (unsigned long)tlb_do_page_fault_1 & 0x0fffffff); + uasm_i_nop(&p); + + if (p >= (u32 *)handle_tlbm_end) + panic("TLB modify handler fastpath space exceeded"); + + uasm_resolve_relocs(relocs, labels); + pr_debug("Wrote TLB modify handler fastpath (%u instructions).\n", + (unsigned int)(p - (u32 *)handle_tlbm)); + + dump_handler("r4000_tlb_modify", handle_tlbm, handle_tlbm_end); +} + +static void flush_tlb_handlers(void) +{ + local_flush_icache_range((unsigned long)handle_tlbl, + (unsigned long)handle_tlbl_end); + local_flush_icache_range((unsigned long)handle_tlbs, + (unsigned long)handle_tlbs_end); + local_flush_icache_range((unsigned long)handle_tlbm, + (unsigned long)handle_tlbm_end); + local_flush_icache_range((unsigned long)tlbmiss_handler_setup_pgd, + (unsigned long)tlbmiss_handler_setup_pgd_end); +} + +static void print_htw_config(void) +{ + unsigned long config; + unsigned int pwctl; + const int field = 2 * sizeof(unsigned long); + + config = read_c0_pwfield(); + pr_debug("PWField (0x%0*lx): GDI: 0x%02lx UDI: 0x%02lx MDI: 0x%02lx PTI: 0x%02lx PTEI: 0x%02lx\n", + field, config, + (config & MIPS_PWFIELD_GDI_MASK) >> MIPS_PWFIELD_GDI_SHIFT, + (config & MIPS_PWFIELD_UDI_MASK) >> MIPS_PWFIELD_UDI_SHIFT, + (config & MIPS_PWFIELD_MDI_MASK) >> MIPS_PWFIELD_MDI_SHIFT, + (config & MIPS_PWFIELD_PTI_MASK) >> MIPS_PWFIELD_PTI_SHIFT, + (config & MIPS_PWFIELD_PTEI_MASK) >> MIPS_PWFIELD_PTEI_SHIFT); + + config = read_c0_pwsize(); + pr_debug("PWSize (0x%0*lx): PS: 0x%lx GDW: 0x%02lx UDW: 0x%02lx MDW: 0x%02lx PTW: 0x%02lx PTEW: 0x%02lx\n", + field, config, + (config & MIPS_PWSIZE_PS_MASK) >> MIPS_PWSIZE_PS_SHIFT, + (config & MIPS_PWSIZE_GDW_MASK) >> MIPS_PWSIZE_GDW_SHIFT, + (config & MIPS_PWSIZE_UDW_MASK) >> MIPS_PWSIZE_UDW_SHIFT, + (config & MIPS_PWSIZE_MDW_MASK) >> MIPS_PWSIZE_MDW_SHIFT, + (config & MIPS_PWSIZE_PTW_MASK) >> MIPS_PWSIZE_PTW_SHIFT, + (config & MIPS_PWSIZE_PTEW_MASK) >> MIPS_PWSIZE_PTEW_SHIFT); + + pwctl = read_c0_pwctl(); + pr_debug("PWCtl (0x%x): PWEn: 0x%x XK: 0x%x XS: 0x%x XU: 0x%x DPH: 0x%x HugePg: 0x%x Psn: 0x%x\n", + pwctl, + (pwctl & MIPS_PWCTL_PWEN_MASK) >> MIPS_PWCTL_PWEN_SHIFT, + (pwctl & MIPS_PWCTL_XK_MASK) >> MIPS_PWCTL_XK_SHIFT, + (pwctl & MIPS_PWCTL_XS_MASK) >> MIPS_PWCTL_XS_SHIFT, + (pwctl & MIPS_PWCTL_XU_MASK) >> MIPS_PWCTL_XU_SHIFT, + (pwctl & MIPS_PWCTL_DPH_MASK) >> MIPS_PWCTL_DPH_SHIFT, + (pwctl & MIPS_PWCTL_HUGEPG_MASK) >> MIPS_PWCTL_HUGEPG_SHIFT, + (pwctl & MIPS_PWCTL_PSN_MASK) >> MIPS_PWCTL_PSN_SHIFT); +} + +static void config_htw_params(void) +{ + unsigned long pwfield, pwsize, ptei; + unsigned int config; + + /* + * We are using 2-level page tables, so we only need to + * setup GDW and PTW appropriately. UDW and MDW will remain 0. + * The default value of GDI/UDI/MDI/PTI is 0xc. It is illegal to + * write values less than 0xc in these fields because the entire + * write will be dropped. As a result of which, we must preserve + * the original reset values and overwrite only what we really want. + */ + + pwfield = read_c0_pwfield(); + /* re-initialize the GDI field */ + pwfield &= ~MIPS_PWFIELD_GDI_MASK; + pwfield |= PGDIR_SHIFT << MIPS_PWFIELD_GDI_SHIFT; + /* re-initialize the PTI field including the even/odd bit */ + pwfield &= ~MIPS_PWFIELD_PTI_MASK; + pwfield |= PAGE_SHIFT << MIPS_PWFIELD_PTI_SHIFT; + if (CONFIG_PGTABLE_LEVELS >= 3) { + pwfield &= ~MIPS_PWFIELD_MDI_MASK; + pwfield |= PMD_SHIFT << MIPS_PWFIELD_MDI_SHIFT; + } + /* Set the PTEI right shift */ + ptei = _PAGE_GLOBAL_SHIFT << MIPS_PWFIELD_PTEI_SHIFT; + pwfield |= ptei; + write_c0_pwfield(pwfield); + /* Check whether the PTEI value is supported */ + back_to_back_c0_hazard(); + pwfield = read_c0_pwfield(); + if (((pwfield & MIPS_PWFIELD_PTEI_MASK) << MIPS_PWFIELD_PTEI_SHIFT) + != ptei) { + pr_warn("Unsupported PTEI field value: 0x%lx. HTW will not be enabled", + ptei); + /* + * Drop option to avoid HTW being enabled via another path + * (eg htw_reset()) + */ + current_cpu_data.options &= ~MIPS_CPU_HTW; + return; + } + + pwsize = ilog2(PTRS_PER_PGD) << MIPS_PWSIZE_GDW_SHIFT; + pwsize |= ilog2(PTRS_PER_PTE) << MIPS_PWSIZE_PTW_SHIFT; + if (CONFIG_PGTABLE_LEVELS >= 3) + pwsize |= ilog2(PTRS_PER_PMD) << MIPS_PWSIZE_MDW_SHIFT; + + /* Set pointer size to size of directory pointers */ + if (IS_ENABLED(CONFIG_64BIT)) + pwsize |= MIPS_PWSIZE_PS_MASK; + /* PTEs may be multiple pointers long (e.g. with XPA) */ + pwsize |= ((PTE_T_LOG2 - PGD_T_LOG2) << MIPS_PWSIZE_PTEW_SHIFT) + & MIPS_PWSIZE_PTEW_MASK; + + write_c0_pwsize(pwsize); + + /* Make sure everything is set before we enable the HTW */ + back_to_back_c0_hazard(); + + /* + * Enable HTW (and only for XUSeg on 64-bit), and disable the rest of + * the pwctl fields. + */ + config = 1 << MIPS_PWCTL_PWEN_SHIFT; + if (IS_ENABLED(CONFIG_64BIT)) + config |= MIPS_PWCTL_XU_MASK; + write_c0_pwctl(config); + pr_info("Hardware Page Table Walker enabled\n"); + + print_htw_config(); +} + +static void config_xpa_params(void) +{ +#ifdef CONFIG_XPA + unsigned int pagegrain; + + if (mips_xpa_disabled) { + pr_info("Extended Physical Addressing (XPA) disabled\n"); + return; + } + + pagegrain = read_c0_pagegrain(); + write_c0_pagegrain(pagegrain | PG_ELPA); + back_to_back_c0_hazard(); + pagegrain = read_c0_pagegrain(); + + if (pagegrain & PG_ELPA) + pr_info("Extended Physical Addressing (XPA) enabled\n"); + else + panic("Extended Physical Addressing (XPA) disabled"); +#endif +} + +static void check_pabits(void) +{ + unsigned long entry; + unsigned pabits, fillbits; + + if (!cpu_has_rixi || _PAGE_NO_EXEC == 0) { + /* + * We'll only be making use of the fact that we can rotate bits + * into the fill if the CPU supports RIXI, so don't bother + * probing this for CPUs which don't. + */ + return; + } + + write_c0_entrylo0(~0ul); + back_to_back_c0_hazard(); + entry = read_c0_entrylo0(); + + /* clear all non-PFN bits */ + entry &= ~((1 << MIPS_ENTRYLO_PFN_SHIFT) - 1); + entry &= ~(MIPS_ENTRYLO_RI | MIPS_ENTRYLO_XI); + + /* find a lower bound on PABITS, and upper bound on fill bits */ + pabits = fls_long(entry) + 6; + fillbits = max_t(int, (int)BITS_PER_LONG - pabits, 0); + + /* minus the RI & XI bits */ + fillbits -= min_t(unsigned, fillbits, 2); + + if (fillbits >= ilog2(_PAGE_NO_EXEC)) + fill_includes_sw_bits = true; + + pr_debug("Entry* registers contain %u fill bits\n", fillbits); +} + +void build_tlb_refill_handler(void) +{ + /* + * The refill handler is generated per-CPU, multi-node systems + * may have local storage for it. The other handlers are only + * needed once. + */ + static int run_once = 0; + + if (IS_ENABLED(CONFIG_XPA) && !cpu_has_rixi) + panic("Kernels supporting XPA currently require CPUs with RIXI"); + + output_pgtable_bits_defines(); + check_pabits(); + +#ifdef CONFIG_64BIT + check_for_high_segbits = current_cpu_data.vmbits > (PGDIR_SHIFT + PGD_TABLE_ORDER + PAGE_SHIFT - 3); +#endif + + if (cpu_has_3kex) { +#ifndef CONFIG_MIPS_PGD_C0_CONTEXT + if (!run_once) { + build_setup_pgd(); + build_r3000_tlb_refill_handler(); + build_r3000_tlb_load_handler(); + build_r3000_tlb_store_handler(); + build_r3000_tlb_modify_handler(); + flush_tlb_handlers(); + run_once++; + } +#else + panic("No R3000 TLB refill handler"); +#endif + return; + } + + if (cpu_has_ldpte) + setup_pw(); + + if (!run_once) { + scratch_reg = allocate_kscratch(); + build_setup_pgd(); + build_r4000_tlb_load_handler(); + build_r4000_tlb_store_handler(); + build_r4000_tlb_modify_handler(); + if (cpu_has_ldpte) + build_loongson3_tlb_refill_handler(); + else + build_r4000_tlb_refill_handler(); + flush_tlb_handlers(); + run_once++; + } + if (cpu_has_xpa) + config_xpa_params(); + if (cpu_has_htw) + config_htw_params(); +} diff --git a/arch/mips/mm/uasm-micromips.c b/arch/mips/mm/uasm-micromips.c new file mode 100644 index 000000000..75ef90486 --- /dev/null +++ b/arch/mips/mm/uasm-micromips.c @@ -0,0 +1,232 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * A small micro-assembler. It is intentionally kept simple, does only + * support a subset of instructions, and does not try to hide pipeline + * effects like branch delay slots. + * + * Copyright (C) 2004, 2005, 2006, 2008 Thiemo Seufer + * Copyright (C) 2005, 2007 Maciej W. Rozycki + * Copyright (C) 2006 Ralf Baechle (ralf@linux-mips.org) + * Copyright (C) 2012, 2013 MIPS Technologies, Inc. All rights reserved. + */ + +#include <linux/kernel.h> +#include <linux/types.h> + +#include <asm/inst.h> +#include <asm/elf.h> +#include <asm/bugs.h> +#include <asm/uasm.h> + +#define RS_MASK 0x1f +#define RS_SH 16 +#define RT_MASK 0x1f +#define RT_SH 21 +#define SCIMM_MASK 0x3ff +#define SCIMM_SH 16 + +/* This macro sets the non-variable bits of an instruction. */ +#define M(a, b, c, d, e, f) \ + ((a) << OP_SH \ + | (b) << RT_SH \ + | (c) << RS_SH \ + | (d) << RD_SH \ + | (e) << RE_SH \ + | (f) << FUNC_SH) + +#include "uasm.c" + +static const struct insn insn_table_MM[insn_invalid] = { + [insn_addu] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_addu32_op), RT | RS | RD}, + [insn_addiu] = {M(mm_addiu32_op, 0, 0, 0, 0, 0), RT | RS | SIMM}, + [insn_and] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_and_op), RT | RS | RD}, + [insn_andi] = {M(mm_andi32_op, 0, 0, 0, 0, 0), RT | RS | UIMM}, + [insn_beq] = {M(mm_beq32_op, 0, 0, 0, 0, 0), RS | RT | BIMM}, + [insn_beql] = {0, 0}, + [insn_bgez] = {M(mm_pool32i_op, mm_bgez_op, 0, 0, 0, 0), RS | BIMM}, + [insn_bgezl] = {0, 0}, + [insn_bltz] = {M(mm_pool32i_op, mm_bltz_op, 0, 0, 0, 0), RS | BIMM}, + [insn_bltzl] = {0, 0}, + [insn_bne] = {M(mm_bne32_op, 0, 0, 0, 0, 0), RT | RS | BIMM}, + [insn_cache] = {M(mm_pool32b_op, 0, 0, mm_cache_func, 0, 0), RT | RS | SIMM}, + [insn_cfc1] = {M(mm_pool32f_op, 0, 0, 0, mm_cfc1_op, mm_32f_73_op), RT | RS}, + [insn_cfcmsa] = {M(mm_pool32s_op, 0, msa_cfc_op, 0, 0, mm_32s_elm_op), RD | RE}, + [insn_ctc1] = {M(mm_pool32f_op, 0, 0, 0, mm_ctc1_op, mm_32f_73_op), RT | RS}, + [insn_ctcmsa] = {M(mm_pool32s_op, 0, msa_ctc_op, 0, 0, mm_32s_elm_op), RD | RE}, + [insn_daddu] = {0, 0}, + [insn_daddiu] = {0, 0}, + [insn_di] = {M(mm_pool32a_op, 0, 0, 0, mm_di_op, mm_pool32axf_op), RS}, + [insn_divu] = {M(mm_pool32a_op, 0, 0, 0, mm_divu_op, mm_pool32axf_op), RT | RS}, + [insn_dmfc0] = {0, 0}, + [insn_dmtc0] = {0, 0}, + [insn_dsll] = {0, 0}, + [insn_dsll32] = {0, 0}, + [insn_dsra] = {0, 0}, + [insn_dsrl] = {0, 0}, + [insn_dsrl32] = {0, 0}, + [insn_drotr] = {0, 0}, + [insn_drotr32] = {0, 0}, + [insn_dsubu] = {0, 0}, + [insn_eret] = {M(mm_pool32a_op, 0, 0, 0, mm_eret_op, mm_pool32axf_op), 0}, + [insn_ins] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_ins_op), RT | RS | RD | RE}, + [insn_ext] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_ext_op), RT | RS | RD | RE}, + [insn_j] = {M(mm_j32_op, 0, 0, 0, 0, 0), JIMM}, + [insn_jal] = {M(mm_jal32_op, 0, 0, 0, 0, 0), JIMM}, + [insn_jalr] = {M(mm_pool32a_op, 0, 0, 0, mm_jalr_op, mm_pool32axf_op), RT | RS}, + [insn_jr] = {M(mm_pool32a_op, 0, 0, 0, mm_jalr_op, mm_pool32axf_op), RS}, + [insn_lb] = {M(mm_lb32_op, 0, 0, 0, 0, 0), RT | RS | SIMM}, + [insn_ld] = {0, 0}, + [insn_lh] = {M(mm_lh32_op, 0, 0, 0, 0, 0), RT | RS | SIMM}, + [insn_ll] = {M(mm_pool32c_op, 0, 0, (mm_ll_func << 1), 0, 0), RS | RT | SIMM}, + [insn_lld] = {0, 0}, + [insn_lui] = {M(mm_pool32i_op, mm_lui_op, 0, 0, 0, 0), RS | SIMM}, + [insn_lw] = {M(mm_lw32_op, 0, 0, 0, 0, 0), RT | RS | SIMM}, + [insn_mfc0] = {M(mm_pool32a_op, 0, 0, 0, mm_mfc0_op, mm_pool32axf_op), RT | RS | RD}, + [insn_mfhi] = {M(mm_pool32a_op, 0, 0, 0, mm_mfhi32_op, mm_pool32axf_op), RS}, + [insn_mflo] = {M(mm_pool32a_op, 0, 0, 0, mm_mflo32_op, mm_pool32axf_op), RS}, + [insn_mtc0] = {M(mm_pool32a_op, 0, 0, 0, mm_mtc0_op, mm_pool32axf_op), RT | RS | RD}, + [insn_mthi] = {M(mm_pool32a_op, 0, 0, 0, mm_mthi32_op, mm_pool32axf_op), RS}, + [insn_mtlo] = {M(mm_pool32a_op, 0, 0, 0, mm_mtlo32_op, mm_pool32axf_op), RS}, + [insn_mul] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_mul_op), RT | RS | RD}, + [insn_or] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_or32_op), RT | RS | RD}, + [insn_ori] = {M(mm_ori32_op, 0, 0, 0, 0, 0), RT | RS | UIMM}, + [insn_pref] = {M(mm_pool32c_op, 0, 0, (mm_pref_func << 1), 0, 0), RT | RS | SIMM}, + [insn_rfe] = {0, 0}, + [insn_sc] = {M(mm_pool32c_op, 0, 0, (mm_sc_func << 1), 0, 0), RT | RS | SIMM}, + [insn_scd] = {0, 0}, + [insn_sd] = {0, 0}, + [insn_sll] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_sll32_op), RT | RS | RD}, + [insn_sllv] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_sllv32_op), RT | RS | RD}, + [insn_slt] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_slt_op), RT | RS | RD}, + [insn_sltiu] = {M(mm_sltiu32_op, 0, 0, 0, 0, 0), RT | RS | SIMM}, + [insn_sltu] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_sltu_op), RT | RS | RD}, + [insn_sra] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_sra_op), RT | RS | RD}, + [insn_srav] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_srav_op), RT | RS | RD}, + [insn_srl] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_srl32_op), RT | RS | RD}, + [insn_srlv] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_srlv32_op), RT | RS | RD}, + [insn_rotr] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_rotr_op), RT | RS | RD}, + [insn_subu] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_subu32_op), RT | RS | RD}, + [insn_sw] = {M(mm_sw32_op, 0, 0, 0, 0, 0), RT | RS | SIMM}, + [insn_sync] = {M(mm_pool32a_op, 0, 0, 0, mm_sync_op, mm_pool32axf_op), RS}, + [insn_tlbp] = {M(mm_pool32a_op, 0, 0, 0, mm_tlbp_op, mm_pool32axf_op), 0}, + [insn_tlbr] = {M(mm_pool32a_op, 0, 0, 0, mm_tlbr_op, mm_pool32axf_op), 0}, + [insn_tlbwi] = {M(mm_pool32a_op, 0, 0, 0, mm_tlbwi_op, mm_pool32axf_op), 0}, + [insn_tlbwr] = {M(mm_pool32a_op, 0, 0, 0, mm_tlbwr_op, mm_pool32axf_op), 0}, + [insn_wait] = {M(mm_pool32a_op, 0, 0, 0, mm_wait_op, mm_pool32axf_op), SCIMM}, + [insn_wsbh] = {M(mm_pool32a_op, 0, 0, 0, mm_wsbh_op, mm_pool32axf_op), RT | RS}, + [insn_xor] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_xor32_op), RT | RS | RD}, + [insn_xori] = {M(mm_xori32_op, 0, 0, 0, 0, 0), RT | RS | UIMM}, + [insn_dins] = {0, 0}, + [insn_dinsm] = {0, 0}, + [insn_syscall] = {M(mm_pool32a_op, 0, 0, 0, mm_syscall_op, mm_pool32axf_op), SCIMM}, + [insn_bbit0] = {0, 0}, + [insn_bbit1] = {0, 0}, + [insn_lwx] = {0, 0}, + [insn_ldx] = {0, 0}, +}; + +#undef M + +static inline u32 build_bimm(s32 arg) +{ + WARN(arg > 0xffff || arg < -0x10000, + KERN_WARNING "Micro-assembler field overflow\n"); + + WARN(arg & 0x3, KERN_WARNING "Invalid micro-assembler branch target\n"); + + return ((arg < 0) ? (1 << 15) : 0) | ((arg >> 1) & 0x7fff); +} + +static inline u32 build_jimm(u32 arg) +{ + + WARN(arg & ~((JIMM_MASK << 2) | 1), + KERN_WARNING "Micro-assembler field overflow\n"); + + return (arg >> 1) & JIMM_MASK; +} + +/* + * The order of opcode arguments is implicitly left to right, + * starting with RS and ending with FUNC or IMM. + */ +static void build_insn(u32 **buf, enum opcode opc, ...) +{ + const struct insn *ip; + va_list ap; + u32 op; + + if (opc < 0 || opc >= insn_invalid || + (opc == insn_daddiu && r4k_daddiu_bug()) || + (insn_table_MM[opc].match == 0 && insn_table_MM[opc].fields == 0)) + panic("Unsupported Micro-assembler instruction %d", opc); + + ip = &insn_table_MM[opc]; + + op = ip->match; + va_start(ap, opc); + if (ip->fields & RS) { + if (opc == insn_mfc0 || opc == insn_mtc0 || + opc == insn_cfc1 || opc == insn_ctc1) + op |= build_rt(va_arg(ap, u32)); + else + op |= build_rs(va_arg(ap, u32)); + } + if (ip->fields & RT) { + if (opc == insn_mfc0 || opc == insn_mtc0 || + opc == insn_cfc1 || opc == insn_ctc1) + op |= build_rs(va_arg(ap, u32)); + else + op |= build_rt(va_arg(ap, u32)); + } + if (ip->fields & RD) + op |= build_rd(va_arg(ap, u32)); + if (ip->fields & RE) + op |= build_re(va_arg(ap, u32)); + if (ip->fields & SIMM) + op |= build_simm(va_arg(ap, s32)); + if (ip->fields & UIMM) + op |= build_uimm(va_arg(ap, u32)); + if (ip->fields & BIMM) + op |= build_bimm(va_arg(ap, s32)); + if (ip->fields & JIMM) + op |= build_jimm(va_arg(ap, u32)); + if (ip->fields & FUNC) + op |= build_func(va_arg(ap, u32)); + if (ip->fields & SET) + op |= build_set(va_arg(ap, u32)); + if (ip->fields & SCIMM) + op |= build_scimm(va_arg(ap, u32)); + va_end(ap); + +#ifdef CONFIG_CPU_LITTLE_ENDIAN + **buf = ((op & 0xffff) << 16) | (op >> 16); +#else + **buf = op; +#endif + (*buf)++; +} + +static inline void +__resolve_relocs(struct uasm_reloc *rel, struct uasm_label *lab) +{ + long laddr = (long)lab->addr; + long raddr = (long)rel->addr; + + switch (rel->type) { + case R_MIPS_PC16: +#ifdef CONFIG_CPU_LITTLE_ENDIAN + *rel->addr |= (build_bimm(laddr - (raddr + 4)) << 16); +#else + *rel->addr |= build_bimm(laddr - (raddr + 4)); +#endif + break; + + default: + panic("Unsupported Micro-assembler relocation %d", + rel->type); + } +} diff --git a/arch/mips/mm/uasm-mips.c b/arch/mips/mm/uasm-mips.c new file mode 100644 index 000000000..e15c6700c --- /dev/null +++ b/arch/mips/mm/uasm-mips.c @@ -0,0 +1,292 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * A small micro-assembler. It is intentionally kept simple, does only + * support a subset of instructions, and does not try to hide pipeline + * effects like branch delay slots. + * + * Copyright (C) 2004, 2005, 2006, 2008 Thiemo Seufer + * Copyright (C) 2005, 2007 Maciej W. Rozycki + * Copyright (C) 2006 Ralf Baechle (ralf@linux-mips.org) + * Copyright (C) 2012, 2013 MIPS Technologies, Inc. All rights reserved. + */ + +#include <linux/kernel.h> +#include <linux/types.h> + +#include <asm/inst.h> +#include <asm/elf.h> +#include <asm/bugs.h> +#include <asm/uasm.h> + +#define RS_MASK 0x1f +#define RS_SH 21 +#define RT_MASK 0x1f +#define RT_SH 16 +#define SCIMM_MASK 0xfffff +#define SCIMM_SH 6 + +/* This macro sets the non-variable bits of an instruction. */ +#define M(a, b, c, d, e, f) \ + ((a) << OP_SH \ + | (b) << RS_SH \ + | (c) << RT_SH \ + | (d) << RD_SH \ + | (e) << RE_SH \ + | (f) << FUNC_SH) + +/* This macro sets the non-variable bits of an R6 instruction. */ +#define M6(a, b, c, d, e) \ + ((a) << OP_SH \ + | (b) << RS_SH \ + | (c) << RT_SH \ + | (d) << SIMM9_SH \ + | (e) << FUNC_SH) + +#include "uasm.c" + +static const struct insn insn_table[insn_invalid] = { + [insn_addiu] = {M(addiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, + [insn_addu] = {M(spec_op, 0, 0, 0, 0, addu_op), RS | RT | RD}, + [insn_and] = {M(spec_op, 0, 0, 0, 0, and_op), RS | RT | RD}, + [insn_andi] = {M(andi_op, 0, 0, 0, 0, 0), RS | RT | UIMM}, + [insn_bbit0] = {M(lwc2_op, 0, 0, 0, 0, 0), RS | RT | BIMM}, + [insn_bbit1] = {M(swc2_op, 0, 0, 0, 0, 0), RS | RT | BIMM}, + [insn_beq] = {M(beq_op, 0, 0, 0, 0, 0), RS | RT | BIMM}, + [insn_beql] = {M(beql_op, 0, 0, 0, 0, 0), RS | RT | BIMM}, + [insn_bgez] = {M(bcond_op, 0, bgez_op, 0, 0, 0), RS | BIMM}, + [insn_bgezl] = {M(bcond_op, 0, bgezl_op, 0, 0, 0), RS | BIMM}, + [insn_bgtz] = {M(bgtz_op, 0, 0, 0, 0, 0), RS | BIMM}, + [insn_blez] = {M(blez_op, 0, 0, 0, 0, 0), RS | BIMM}, + [insn_bltz] = {M(bcond_op, 0, bltz_op, 0, 0, 0), RS | BIMM}, + [insn_bltzl] = {M(bcond_op, 0, bltzl_op, 0, 0, 0), RS | BIMM}, + [insn_bne] = {M(bne_op, 0, 0, 0, 0, 0), RS | RT | BIMM}, + [insn_break] = {M(spec_op, 0, 0, 0, 0, break_op), SCIMM}, +#ifndef CONFIG_CPU_MIPSR6 + [insn_cache] = {M(cache_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, +#else + [insn_cache] = {M6(spec3_op, 0, 0, 0, cache6_op), RS | RT | SIMM9}, +#endif + [insn_cfc1] = {M(cop1_op, cfc_op, 0, 0, 0, 0), RT | RD}, + [insn_cfcmsa] = {M(msa_op, 0, msa_cfc_op, 0, 0, msa_elm_op), RD | RE}, + [insn_ctc1] = {M(cop1_op, ctc_op, 0, 0, 0, 0), RT | RD}, + [insn_ctcmsa] = {M(msa_op, 0, msa_ctc_op, 0, 0, msa_elm_op), RD | RE}, + [insn_daddiu] = {M(daddiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, + [insn_daddu] = {M(spec_op, 0, 0, 0, 0, daddu_op), RS | RT | RD}, + [insn_ddivu] = {M(spec_op, 0, 0, 0, 0, ddivu_op), RS | RT}, + [insn_ddivu_r6] = {M(spec_op, 0, 0, 0, ddivu_ddivu6_op, ddivu_op), + RS | RT | RD}, + [insn_di] = {M(cop0_op, mfmc0_op, 0, 12, 0, 0), RT}, + [insn_dins] = {M(spec3_op, 0, 0, 0, 0, dins_op), RS | RT | RD | RE}, + [insn_dinsm] = {M(spec3_op, 0, 0, 0, 0, dinsm_op), RS | RT | RD | RE}, + [insn_dinsu] = {M(spec3_op, 0, 0, 0, 0, dinsu_op), RS | RT | RD | RE}, + [insn_divu] = {M(spec_op, 0, 0, 0, 0, divu_op), RS | RT}, + [insn_divu_r6] = {M(spec_op, 0, 0, 0, divu_divu6_op, divu_op), + RS | RT | RD}, + [insn_dmfc0] = {M(cop0_op, dmfc_op, 0, 0, 0, 0), RT | RD | SET}, + [insn_dmodu] = {M(spec_op, 0, 0, 0, ddivu_dmodu_op, ddivu_op), + RS | RT | RD}, + [insn_dmtc0] = {M(cop0_op, dmtc_op, 0, 0, 0, 0), RT | RD | SET}, + [insn_dmultu] = {M(spec_op, 0, 0, 0, 0, dmultu_op), RS | RT}, + [insn_dmulu] = {M(spec_op, 0, 0, 0, dmultu_dmulu_op, dmultu_op), + RS | RT | RD}, + [insn_drotr] = {M(spec_op, 1, 0, 0, 0, dsrl_op), RT | RD | RE}, + [insn_drotr32] = {M(spec_op, 1, 0, 0, 0, dsrl32_op), RT | RD | RE}, + [insn_dsbh] = {M(spec3_op, 0, 0, 0, dsbh_op, dbshfl_op), RT | RD}, + [insn_dshd] = {M(spec3_op, 0, 0, 0, dshd_op, dbshfl_op), RT | RD}, + [insn_dsll] = {M(spec_op, 0, 0, 0, 0, dsll_op), RT | RD | RE}, + [insn_dsll32] = {M(spec_op, 0, 0, 0, 0, dsll32_op), RT | RD | RE}, + [insn_dsllv] = {M(spec_op, 0, 0, 0, 0, dsllv_op), RS | RT | RD}, + [insn_dsra] = {M(spec_op, 0, 0, 0, 0, dsra_op), RT | RD | RE}, + [insn_dsra32] = {M(spec_op, 0, 0, 0, 0, dsra32_op), RT | RD | RE}, + [insn_dsrav] = {M(spec_op, 0, 0, 0, 0, dsrav_op), RS | RT | RD}, + [insn_dsrl] = {M(spec_op, 0, 0, 0, 0, dsrl_op), RT | RD | RE}, + [insn_dsrl32] = {M(spec_op, 0, 0, 0, 0, dsrl32_op), RT | RD | RE}, + [insn_dsrlv] = {M(spec_op, 0, 0, 0, 0, dsrlv_op), RS | RT | RD}, + [insn_dsubu] = {M(spec_op, 0, 0, 0, 0, dsubu_op), RS | RT | RD}, + [insn_eret] = {M(cop0_op, cop_op, 0, 0, 0, eret_op), 0}, + [insn_ext] = {M(spec3_op, 0, 0, 0, 0, ext_op), RS | RT | RD | RE}, + [insn_ins] = {M(spec3_op, 0, 0, 0, 0, ins_op), RS | RT | RD | RE}, + [insn_j] = {M(j_op, 0, 0, 0, 0, 0), JIMM}, + [insn_jal] = {M(jal_op, 0, 0, 0, 0, 0), JIMM}, + [insn_jalr] = {M(spec_op, 0, 0, 0, 0, jalr_op), RS | RD}, +#ifndef CONFIG_CPU_MIPSR6 + [insn_jr] = {M(spec_op, 0, 0, 0, 0, jr_op), RS}, +#else + [insn_jr] = {M(spec_op, 0, 0, 0, 0, jalr_op), RS}, +#endif + [insn_lb] = {M(lb_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, + [insn_lbu] = {M(lbu_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, + [insn_ld] = {M(ld_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, + [insn_lddir] = {M(lwc2_op, 0, 0, 0, lddir_op, mult_op), RS | RT | RD}, + [insn_ldpte] = {M(lwc2_op, 0, 0, 0, ldpte_op, mult_op), RS | RD}, + [insn_ldx] = {M(spec3_op, 0, 0, 0, ldx_op, lx_op), RS | RT | RD}, + [insn_lh] = {M(lh_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, + [insn_lhu] = {M(lhu_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, +#ifndef CONFIG_CPU_MIPSR6 + [insn_ll] = {M(ll_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, + [insn_lld] = {M(lld_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, +#else + [insn_ll] = {M6(spec3_op, 0, 0, 0, ll6_op), RS | RT | SIMM9}, + [insn_lld] = {M6(spec3_op, 0, 0, 0, lld6_op), RS | RT | SIMM9}, +#endif + [insn_lui] = {M(lui_op, 0, 0, 0, 0, 0), RT | SIMM}, + [insn_lw] = {M(lw_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, + [insn_lwu] = {M(lwu_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, + [insn_lwx] = {M(spec3_op, 0, 0, 0, lwx_op, lx_op), RS | RT | RD}, + [insn_mfc0] = {M(cop0_op, mfc_op, 0, 0, 0, 0), RT | RD | SET}, + [insn_mfhc0] = {M(cop0_op, mfhc0_op, 0, 0, 0, 0), RT | RD | SET}, + [insn_mfhi] = {M(spec_op, 0, 0, 0, 0, mfhi_op), RD}, + [insn_mflo] = {M(spec_op, 0, 0, 0, 0, mflo_op), RD}, + [insn_modu] = {M(spec_op, 0, 0, 0, divu_modu_op, divu_op), + RS | RT | RD}, + [insn_movn] = {M(spec_op, 0, 0, 0, 0, movn_op), RS | RT | RD}, + [insn_movz] = {M(spec_op, 0, 0, 0, 0, movz_op), RS | RT | RD}, + [insn_mtc0] = {M(cop0_op, mtc_op, 0, 0, 0, 0), RT | RD | SET}, + [insn_mthc0] = {M(cop0_op, mthc0_op, 0, 0, 0, 0), RT | RD | SET}, + [insn_mthi] = {M(spec_op, 0, 0, 0, 0, mthi_op), RS}, + [insn_mtlo] = {M(spec_op, 0, 0, 0, 0, mtlo_op), RS}, + [insn_mulu] = {M(spec_op, 0, 0, 0, multu_mulu_op, multu_op), + RS | RT | RD}, + [insn_muhu] = {M(spec_op, 0, 0, 0, multu_muhu_op, multu_op), + RS | RT | RD}, +#ifndef CONFIG_CPU_MIPSR6 + [insn_mul] = {M(spec2_op, 0, 0, 0, 0, mul_op), RS | RT | RD}, +#else + [insn_mul] = {M(spec_op, 0, 0, 0, mult_mul_op, mult_op), RS | RT | RD}, +#endif + [insn_multu] = {M(spec_op, 0, 0, 0, 0, multu_op), RS | RT}, + [insn_nor] = {M(spec_op, 0, 0, 0, 0, nor_op), RS | RT | RD}, + [insn_or] = {M(spec_op, 0, 0, 0, 0, or_op), RS | RT | RD}, + [insn_ori] = {M(ori_op, 0, 0, 0, 0, 0), RS | RT | UIMM}, +#ifndef CONFIG_CPU_MIPSR6 + [insn_pref] = {M(pref_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, +#else + [insn_pref] = {M6(spec3_op, 0, 0, 0, pref6_op), RS | RT | SIMM9}, +#endif + [insn_rfe] = {M(cop0_op, cop_op, 0, 0, 0, rfe_op), 0}, + [insn_rotr] = {M(spec_op, 1, 0, 0, 0, srl_op), RT | RD | RE}, + [insn_sb] = {M(sb_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, +#ifndef CONFIG_CPU_MIPSR6 + [insn_sc] = {M(sc_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, + [insn_scd] = {M(scd_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, +#else + [insn_sc] = {M6(spec3_op, 0, 0, 0, sc6_op), RS | RT | SIMM9}, + [insn_scd] = {M6(spec3_op, 0, 0, 0, scd6_op), RS | RT | SIMM9}, +#endif + [insn_sd] = {M(sd_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, + [insn_seleqz] = {M(spec_op, 0, 0, 0, 0, seleqz_op), RS | RT | RD}, + [insn_selnez] = {M(spec_op, 0, 0, 0, 0, selnez_op), RS | RT | RD}, + [insn_sh] = {M(sh_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, + [insn_sll] = {M(spec_op, 0, 0, 0, 0, sll_op), RT | RD | RE}, + [insn_sllv] = {M(spec_op, 0, 0, 0, 0, sllv_op), RS | RT | RD}, + [insn_slt] = {M(spec_op, 0, 0, 0, 0, slt_op), RS | RT | RD}, + [insn_slti] = {M(slti_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, + [insn_sltiu] = {M(sltiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, + [insn_sltu] = {M(spec_op, 0, 0, 0, 0, sltu_op), RS | RT | RD}, + [insn_sra] = {M(spec_op, 0, 0, 0, 0, sra_op), RT | RD | RE}, + [insn_srav] = {M(spec_op, 0, 0, 0, 0, srav_op), RS | RT | RD}, + [insn_srl] = {M(spec_op, 0, 0, 0, 0, srl_op), RT | RD | RE}, + [insn_srlv] = {M(spec_op, 0, 0, 0, 0, srlv_op), RS | RT | RD}, + [insn_subu] = {M(spec_op, 0, 0, 0, 0, subu_op), RS | RT | RD}, + [insn_sw] = {M(sw_op, 0, 0, 0, 0, 0), RS | RT | SIMM}, + [insn_sync] = {M(spec_op, 0, 0, 0, 0, sync_op), RE}, + [insn_syscall] = {M(spec_op, 0, 0, 0, 0, syscall_op), SCIMM}, + [insn_tlbp] = {M(cop0_op, cop_op, 0, 0, 0, tlbp_op), 0}, + [insn_tlbr] = {M(cop0_op, cop_op, 0, 0, 0, tlbr_op), 0}, + [insn_tlbwi] = {M(cop0_op, cop_op, 0, 0, 0, tlbwi_op), 0}, + [insn_tlbwr] = {M(cop0_op, cop_op, 0, 0, 0, tlbwr_op), 0}, + [insn_wait] = {M(cop0_op, cop_op, 0, 0, 0, wait_op), SCIMM}, + [insn_wsbh] = {M(spec3_op, 0, 0, 0, wsbh_op, bshfl_op), RT | RD}, + [insn_xor] = {M(spec_op, 0, 0, 0, 0, xor_op), RS | RT | RD}, + [insn_xori] = {M(xori_op, 0, 0, 0, 0, 0), RS | RT | UIMM}, + [insn_yield] = {M(spec3_op, 0, 0, 0, 0, yield_op), RS | RD}, +}; + +#undef M + +static inline u32 build_bimm(s32 arg) +{ + WARN(arg > 0x1ffff || arg < -0x20000, + KERN_WARNING "Micro-assembler field overflow\n"); + + WARN(arg & 0x3, KERN_WARNING "Invalid micro-assembler branch target\n"); + + return ((arg < 0) ? (1 << 15) : 0) | ((arg >> 2) & 0x7fff); +} + +static inline u32 build_jimm(u32 arg) +{ + WARN(arg & ~(JIMM_MASK << 2), + KERN_WARNING "Micro-assembler field overflow\n"); + + return (arg >> 2) & JIMM_MASK; +} + +/* + * The order of opcode arguments is implicitly left to right, + * starting with RS and ending with FUNC or IMM. + */ +static void build_insn(u32 **buf, enum opcode opc, ...) +{ + const struct insn *ip; + va_list ap; + u32 op; + + if (opc < 0 || opc >= insn_invalid || + (opc == insn_daddiu && r4k_daddiu_bug()) || + (insn_table[opc].match == 0 && insn_table[opc].fields == 0)) + panic("Unsupported Micro-assembler instruction %d", opc); + + ip = &insn_table[opc]; + + op = ip->match; + va_start(ap, opc); + if (ip->fields & RS) + op |= build_rs(va_arg(ap, u32)); + if (ip->fields & RT) + op |= build_rt(va_arg(ap, u32)); + if (ip->fields & RD) + op |= build_rd(va_arg(ap, u32)); + if (ip->fields & RE) + op |= build_re(va_arg(ap, u32)); + if (ip->fields & SIMM) + op |= build_simm(va_arg(ap, s32)); + if (ip->fields & UIMM) + op |= build_uimm(va_arg(ap, u32)); + if (ip->fields & BIMM) + op |= build_bimm(va_arg(ap, s32)); + if (ip->fields & JIMM) + op |= build_jimm(va_arg(ap, u32)); + if (ip->fields & FUNC) + op |= build_func(va_arg(ap, u32)); + if (ip->fields & SET) + op |= build_set(va_arg(ap, u32)); + if (ip->fields & SCIMM) + op |= build_scimm(va_arg(ap, u32)); + if (ip->fields & SIMM9) + op |= build_scimm9(va_arg(ap, u32)); + va_end(ap); + + **buf = op; + (*buf)++; +} + +static inline void +__resolve_relocs(struct uasm_reloc *rel, struct uasm_label *lab) +{ + long laddr = (long)lab->addr; + long raddr = (long)rel->addr; + + switch (rel->type) { + case R_MIPS_PC16: + *rel->addr |= build_bimm(laddr - (raddr + 4)); + break; + + default: + panic("Unsupported Micro-assembler relocation %d", + rel->type); + } +} diff --git a/arch/mips/mm/uasm.c b/arch/mips/mm/uasm.c new file mode 100644 index 000000000..125140979 --- /dev/null +++ b/arch/mips/mm/uasm.c @@ -0,0 +1,644 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * A small micro-assembler. It is intentionally kept simple, does only + * support a subset of instructions, and does not try to hide pipeline + * effects like branch delay slots. + * + * Copyright (C) 2004, 2005, 2006, 2008 Thiemo Seufer + * Copyright (C) 2005, 2007 Maciej W. Rozycki + * Copyright (C) 2006 Ralf Baechle (ralf@linux-mips.org) + * Copyright (C) 2012, 2013 MIPS Technologies, Inc. All rights reserved. + */ + +enum fields { + RS = 0x001, + RT = 0x002, + RD = 0x004, + RE = 0x008, + SIMM = 0x010, + UIMM = 0x020, + BIMM = 0x040, + JIMM = 0x080, + FUNC = 0x100, + SET = 0x200, + SCIMM = 0x400, + SIMM9 = 0x800, +}; + +#define OP_MASK 0x3f +#define OP_SH 26 +#define RD_MASK 0x1f +#define RD_SH 11 +#define RE_MASK 0x1f +#define RE_SH 6 +#define IMM_MASK 0xffff +#define IMM_SH 0 +#define JIMM_MASK 0x3ffffff +#define JIMM_SH 0 +#define FUNC_MASK 0x3f +#define FUNC_SH 0 +#define SET_MASK 0x7 +#define SET_SH 0 +#define SIMM9_SH 7 +#define SIMM9_MASK 0x1ff + +enum opcode { + insn_addiu, insn_addu, insn_and, insn_andi, insn_bbit0, insn_bbit1, + insn_beq, insn_beql, insn_bgez, insn_bgezl, insn_bgtz, insn_blez, + insn_bltz, insn_bltzl, insn_bne, insn_break, insn_cache, insn_cfc1, + insn_cfcmsa, insn_ctc1, insn_ctcmsa, insn_daddiu, insn_daddu, insn_ddivu, + insn_ddivu_r6, insn_di, insn_dins, insn_dinsm, insn_dinsu, insn_divu, + insn_divu_r6, insn_dmfc0, insn_dmodu, insn_dmtc0, insn_dmultu, + insn_dmulu, insn_drotr, insn_drotr32, insn_dsbh, insn_dshd, insn_dsll, + insn_dsll32, insn_dsllv, insn_dsra, insn_dsra32, insn_dsrav, insn_dsrl, + insn_dsrl32, insn_dsrlv, insn_dsubu, insn_eret, insn_ext, insn_ins, + insn_j, insn_jal, insn_jalr, insn_jr, insn_lb, insn_lbu, insn_ld, + insn_lddir, insn_ldpte, insn_ldx, insn_lh, insn_lhu, insn_ll, insn_lld, + insn_lui, insn_lw, insn_lwu, insn_lwx, insn_mfc0, insn_mfhc0, insn_mfhi, + insn_mflo, insn_modu, insn_movn, insn_movz, insn_mtc0, insn_mthc0, + insn_mthi, insn_mtlo, insn_mul, insn_multu, insn_mulu, insn_muhu, insn_nor, + insn_or, insn_ori, insn_pref, insn_rfe, insn_rotr, insn_sb, insn_sc, + insn_scd, insn_seleqz, insn_selnez, insn_sd, insn_sh, insn_sll, + insn_sllv, insn_slt, insn_slti, insn_sltiu, insn_sltu, insn_sra, + insn_srav, insn_srl, insn_srlv, insn_subu, insn_sw, insn_sync, + insn_syscall, insn_tlbp, insn_tlbr, insn_tlbwi, insn_tlbwr, insn_wait, + insn_wsbh, insn_xor, insn_xori, insn_yield, + insn_invalid /* insn_invalid must be last */ +}; + +struct insn { + u32 match; + enum fields fields; +}; + +static inline u32 build_rs(u32 arg) +{ + WARN(arg & ~RS_MASK, KERN_WARNING "Micro-assembler field overflow\n"); + + return (arg & RS_MASK) << RS_SH; +} + +static inline u32 build_rt(u32 arg) +{ + WARN(arg & ~RT_MASK, KERN_WARNING "Micro-assembler field overflow\n"); + + return (arg & RT_MASK) << RT_SH; +} + +static inline u32 build_rd(u32 arg) +{ + WARN(arg & ~RD_MASK, KERN_WARNING "Micro-assembler field overflow\n"); + + return (arg & RD_MASK) << RD_SH; +} + +static inline u32 build_re(u32 arg) +{ + WARN(arg & ~RE_MASK, KERN_WARNING "Micro-assembler field overflow\n"); + + return (arg & RE_MASK) << RE_SH; +} + +static inline u32 build_simm(s32 arg) +{ + WARN(arg > 0x7fff || arg < -0x8000, + KERN_WARNING "Micro-assembler field overflow\n"); + + return arg & 0xffff; +} + +static inline u32 build_uimm(u32 arg) +{ + WARN(arg & ~IMM_MASK, KERN_WARNING "Micro-assembler field overflow\n"); + + return arg & IMM_MASK; +} + +static inline u32 build_scimm(u32 arg) +{ + WARN(arg & ~SCIMM_MASK, + KERN_WARNING "Micro-assembler field overflow\n"); + + return (arg & SCIMM_MASK) << SCIMM_SH; +} + +static inline u32 build_scimm9(s32 arg) +{ + WARN((arg > 0xff || arg < -0x100), + KERN_WARNING "Micro-assembler field overflow\n"); + + return (arg & SIMM9_MASK) << SIMM9_SH; +} + +static inline u32 build_func(u32 arg) +{ + WARN(arg & ~FUNC_MASK, KERN_WARNING "Micro-assembler field overflow\n"); + + return arg & FUNC_MASK; +} + +static inline u32 build_set(u32 arg) +{ + WARN(arg & ~SET_MASK, KERN_WARNING "Micro-assembler field overflow\n"); + + return arg & SET_MASK; +} + +static void build_insn(u32 **buf, enum opcode opc, ...); + +#define I_u1u2u3(op) \ +Ip_u1u2u3(op) \ +{ \ + build_insn(buf, insn##op, a, b, c); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + +#define I_s3s1s2(op) \ +Ip_s3s1s2(op) \ +{ \ + build_insn(buf, insn##op, b, c, a); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + +#define I_u2u1u3(op) \ +Ip_u2u1u3(op) \ +{ \ + build_insn(buf, insn##op, b, a, c); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + +#define I_u3u2u1(op) \ +Ip_u3u2u1(op) \ +{ \ + build_insn(buf, insn##op, c, b, a); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + +#define I_u3u1u2(op) \ +Ip_u3u1u2(op) \ +{ \ + build_insn(buf, insn##op, b, c, a); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + +#define I_u1u2s3(op) \ +Ip_u1u2s3(op) \ +{ \ + build_insn(buf, insn##op, a, b, c); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + +#define I_u2s3u1(op) \ +Ip_u2s3u1(op) \ +{ \ + build_insn(buf, insn##op, c, a, b); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + +#define I_u2u1s3(op) \ +Ip_u2u1s3(op) \ +{ \ + build_insn(buf, insn##op, b, a, c); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + +#define I_u2u1msbu3(op) \ +Ip_u2u1msbu3(op) \ +{ \ + build_insn(buf, insn##op, b, a, c+d-1, c); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + +#define I_u2u1msb32u3(op) \ +Ip_u2u1msbu3(op) \ +{ \ + build_insn(buf, insn##op, b, a, c+d-33, c); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + +#define I_u2u1msb32msb3(op) \ +Ip_u2u1msbu3(op) \ +{ \ + build_insn(buf, insn##op, b, a, c+d-33, c-32); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + +#define I_u2u1msbdu3(op) \ +Ip_u2u1msbu3(op) \ +{ \ + build_insn(buf, insn##op, b, a, d-1, c); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + +#define I_u1u2(op) \ +Ip_u1u2(op) \ +{ \ + build_insn(buf, insn##op, a, b); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + +#define I_u2u1(op) \ +Ip_u1u2(op) \ +{ \ + build_insn(buf, insn##op, b, a); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + +#define I_u1s2(op) \ +Ip_u1s2(op) \ +{ \ + build_insn(buf, insn##op, a, b); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + +#define I_u1(op) \ +Ip_u1(op) \ +{ \ + build_insn(buf, insn##op, a); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + +#define I_0(op) \ +Ip_0(op) \ +{ \ + build_insn(buf, insn##op); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + +I_u2u1s3(_addiu) +I_u3u1u2(_addu) +I_u2u1u3(_andi) +I_u3u1u2(_and) +I_u1u2s3(_beq) +I_u1u2s3(_beql) +I_u1s2(_bgez) +I_u1s2(_bgezl) +I_u1s2(_bgtz) +I_u1s2(_blez) +I_u1s2(_bltz) +I_u1s2(_bltzl) +I_u1u2s3(_bne) +I_u1(_break) +I_u2s3u1(_cache) +I_u1u2(_cfc1) +I_u2u1(_cfcmsa) +I_u1u2(_ctc1) +I_u2u1(_ctcmsa) +I_u1u2(_ddivu) +I_u3u1u2(_ddivu_r6) +I_u1u2u3(_dmfc0) +I_u3u1u2(_dmodu) +I_u1u2u3(_dmtc0) +I_u1u2(_dmultu) +I_u3u1u2(_dmulu) +I_u2u1s3(_daddiu) +I_u3u1u2(_daddu) +I_u1(_di); +I_u1u2(_divu) +I_u3u1u2(_divu_r6) +I_u2u1(_dsbh); +I_u2u1(_dshd); +I_u2u1u3(_dsll) +I_u2u1u3(_dsll32) +I_u3u2u1(_dsllv) +I_u2u1u3(_dsra) +I_u2u1u3(_dsra32) +I_u3u2u1(_dsrav) +I_u2u1u3(_dsrl) +I_u2u1u3(_dsrl32) +I_u3u2u1(_dsrlv) +I_u2u1u3(_drotr) +I_u2u1u3(_drotr32) +I_u3u1u2(_dsubu) +I_0(_eret) +I_u2u1msbdu3(_ext) +I_u2u1msbu3(_ins) +I_u1(_j) +I_u1(_jal) +I_u2u1(_jalr) +I_u1(_jr) +I_u2s3u1(_lb) +I_u2s3u1(_lbu) +I_u2s3u1(_ld) +I_u2s3u1(_lh) +I_u2s3u1(_lhu) +I_u2s3u1(_ll) +I_u2s3u1(_lld) +I_u1s2(_lui) +I_u2s3u1(_lw) +I_u2s3u1(_lwu) +I_u1u2u3(_mfc0) +I_u1u2u3(_mfhc0) +I_u3u1u2(_modu) +I_u3u1u2(_movn) +I_u3u1u2(_movz) +I_u1(_mfhi) +I_u1(_mflo) +I_u1u2u3(_mtc0) +I_u1u2u3(_mthc0) +I_u1(_mthi) +I_u1(_mtlo) +I_u3u1u2(_mul) +I_u1u2(_multu) +I_u3u1u2(_mulu) +I_u3u1u2(_muhu) +I_u3u1u2(_nor) +I_u3u1u2(_or) +I_u2u1u3(_ori) +I_0(_rfe) +I_u2s3u1(_sb) +I_u2s3u1(_sc) +I_u2s3u1(_scd) +I_u2s3u1(_sd) +I_u3u1u2(_seleqz) +I_u3u1u2(_selnez) +I_u2s3u1(_sh) +I_u2u1u3(_sll) +I_u3u2u1(_sllv) +I_s3s1s2(_slt) +I_u2u1s3(_slti) +I_u2u1s3(_sltiu) +I_u3u1u2(_sltu) +I_u2u1u3(_sra) +I_u3u2u1(_srav) +I_u2u1u3(_srl) +I_u3u2u1(_srlv) +I_u2u1u3(_rotr) +I_u3u1u2(_subu) +I_u2s3u1(_sw) +I_u1(_sync) +I_0(_tlbp) +I_0(_tlbr) +I_0(_tlbwi) +I_0(_tlbwr) +I_u1(_wait); +I_u2u1(_wsbh) +I_u3u1u2(_xor) +I_u2u1u3(_xori) +I_u2u1(_yield) +I_u2u1msbu3(_dins); +I_u2u1msb32u3(_dinsm); +I_u2u1msb32msb3(_dinsu); +I_u1(_syscall); +I_u1u2s3(_bbit0); +I_u1u2s3(_bbit1); +I_u3u1u2(_lwx) +I_u3u1u2(_ldx) +I_u1u2(_ldpte) +I_u2u1u3(_lddir) + +#ifdef CONFIG_CPU_CAVIUM_OCTEON +#include <asm/octeon/octeon.h> +void uasm_i_pref(u32 **buf, unsigned int a, signed int b, + unsigned int c) +{ + if (OCTEON_IS_MODEL(OCTEON_CN6XXX) && a <= 24 && a != 5) + /* + * As per erratum Core-14449, replace prefetches 0-4, + * 6-24 with 'pref 28'. + */ + build_insn(buf, insn_pref, c, 28, b); + else + build_insn(buf, insn_pref, c, a, b); +} +UASM_EXPORT_SYMBOL(uasm_i_pref); +#else +I_u2s3u1(_pref) +#endif + +/* Handle labels. */ +void uasm_build_label(struct uasm_label **lab, u32 *addr, int lid) +{ + (*lab)->addr = addr; + (*lab)->lab = lid; + (*lab)++; +} +UASM_EXPORT_SYMBOL(uasm_build_label); + +int uasm_in_compat_space_p(long addr) +{ + /* Is this address in 32bit compat space? */ + return addr == (int)addr; +} +UASM_EXPORT_SYMBOL(uasm_in_compat_space_p); + +static int uasm_rel_highest(long val) +{ +#ifdef CONFIG_64BIT + return ((((val + 0x800080008000L) >> 48) & 0xffff) ^ 0x8000) - 0x8000; +#else + return 0; +#endif +} + +static int uasm_rel_higher(long val) +{ +#ifdef CONFIG_64BIT + return ((((val + 0x80008000L) >> 32) & 0xffff) ^ 0x8000) - 0x8000; +#else + return 0; +#endif +} + +int uasm_rel_hi(long val) +{ + return ((((val + 0x8000L) >> 16) & 0xffff) ^ 0x8000) - 0x8000; +} +UASM_EXPORT_SYMBOL(uasm_rel_hi); + +int uasm_rel_lo(long val) +{ + return ((val & 0xffff) ^ 0x8000) - 0x8000; +} +UASM_EXPORT_SYMBOL(uasm_rel_lo); + +void UASM_i_LA_mostly(u32 **buf, unsigned int rs, long addr) +{ + if (!uasm_in_compat_space_p(addr)) { + uasm_i_lui(buf, rs, uasm_rel_highest(addr)); + if (uasm_rel_higher(addr)) + uasm_i_daddiu(buf, rs, rs, uasm_rel_higher(addr)); + if (uasm_rel_hi(addr)) { + uasm_i_dsll(buf, rs, rs, 16); + uasm_i_daddiu(buf, rs, rs, + uasm_rel_hi(addr)); + uasm_i_dsll(buf, rs, rs, 16); + } else + uasm_i_dsll32(buf, rs, rs, 0); + } else + uasm_i_lui(buf, rs, uasm_rel_hi(addr)); +} +UASM_EXPORT_SYMBOL(UASM_i_LA_mostly); + +void UASM_i_LA(u32 **buf, unsigned int rs, long addr) +{ + UASM_i_LA_mostly(buf, rs, addr); + if (uasm_rel_lo(addr)) { + if (!uasm_in_compat_space_p(addr)) + uasm_i_daddiu(buf, rs, rs, + uasm_rel_lo(addr)); + else + uasm_i_addiu(buf, rs, rs, + uasm_rel_lo(addr)); + } +} +UASM_EXPORT_SYMBOL(UASM_i_LA); + +/* Handle relocations. */ +void uasm_r_mips_pc16(struct uasm_reloc **rel, u32 *addr, int lid) +{ + (*rel)->addr = addr; + (*rel)->type = R_MIPS_PC16; + (*rel)->lab = lid; + (*rel)++; +} +UASM_EXPORT_SYMBOL(uasm_r_mips_pc16); + +static inline void __resolve_relocs(struct uasm_reloc *rel, + struct uasm_label *lab); + +void uasm_resolve_relocs(struct uasm_reloc *rel, + struct uasm_label *lab) +{ + struct uasm_label *l; + + for (; rel->lab != UASM_LABEL_INVALID; rel++) + for (l = lab; l->lab != UASM_LABEL_INVALID; l++) + if (rel->lab == l->lab) + __resolve_relocs(rel, l); +} +UASM_EXPORT_SYMBOL(uasm_resolve_relocs); + +void uasm_move_relocs(struct uasm_reloc *rel, u32 *first, u32 *end, + long off) +{ + for (; rel->lab != UASM_LABEL_INVALID; rel++) + if (rel->addr >= first && rel->addr < end) + rel->addr += off; +} +UASM_EXPORT_SYMBOL(uasm_move_relocs); + +void uasm_move_labels(struct uasm_label *lab, u32 *first, u32 *end, + long off) +{ + for (; lab->lab != UASM_LABEL_INVALID; lab++) + if (lab->addr >= first && lab->addr < end) + lab->addr += off; +} +UASM_EXPORT_SYMBOL(uasm_move_labels); + +void uasm_copy_handler(struct uasm_reloc *rel, struct uasm_label *lab, + u32 *first, u32 *end, u32 *target) +{ + long off = (long)(target - first); + + memcpy(target, first, (end - first) * sizeof(u32)); + + uasm_move_relocs(rel, first, end, off); + uasm_move_labels(lab, first, end, off); +} +UASM_EXPORT_SYMBOL(uasm_copy_handler); + +int uasm_insn_has_bdelay(struct uasm_reloc *rel, u32 *addr) +{ + for (; rel->lab != UASM_LABEL_INVALID; rel++) { + if (rel->addr == addr + && (rel->type == R_MIPS_PC16 + || rel->type == R_MIPS_26)) + return 1; + } + + return 0; +} +UASM_EXPORT_SYMBOL(uasm_insn_has_bdelay); + +/* Convenience functions for labeled branches. */ +void uasm_il_bltz(u32 **p, struct uasm_reloc **r, unsigned int reg, + int lid) +{ + uasm_r_mips_pc16(r, *p, lid); + uasm_i_bltz(p, reg, 0); +} +UASM_EXPORT_SYMBOL(uasm_il_bltz); + +void uasm_il_b(u32 **p, struct uasm_reloc **r, int lid) +{ + uasm_r_mips_pc16(r, *p, lid); + uasm_i_b(p, 0); +} +UASM_EXPORT_SYMBOL(uasm_il_b); + +void uasm_il_beq(u32 **p, struct uasm_reloc **r, unsigned int r1, + unsigned int r2, int lid) +{ + uasm_r_mips_pc16(r, *p, lid); + uasm_i_beq(p, r1, r2, 0); +} +UASM_EXPORT_SYMBOL(uasm_il_beq); + +void uasm_il_beqz(u32 **p, struct uasm_reloc **r, unsigned int reg, + int lid) +{ + uasm_r_mips_pc16(r, *p, lid); + uasm_i_beqz(p, reg, 0); +} +UASM_EXPORT_SYMBOL(uasm_il_beqz); + +void uasm_il_beqzl(u32 **p, struct uasm_reloc **r, unsigned int reg, + int lid) +{ + uasm_r_mips_pc16(r, *p, lid); + uasm_i_beqzl(p, reg, 0); +} +UASM_EXPORT_SYMBOL(uasm_il_beqzl); + +void uasm_il_bne(u32 **p, struct uasm_reloc **r, unsigned int reg1, + unsigned int reg2, int lid) +{ + uasm_r_mips_pc16(r, *p, lid); + uasm_i_bne(p, reg1, reg2, 0); +} +UASM_EXPORT_SYMBOL(uasm_il_bne); + +void uasm_il_bnez(u32 **p, struct uasm_reloc **r, unsigned int reg, + int lid) +{ + uasm_r_mips_pc16(r, *p, lid); + uasm_i_bnez(p, reg, 0); +} +UASM_EXPORT_SYMBOL(uasm_il_bnez); + +void uasm_il_bgezl(u32 **p, struct uasm_reloc **r, unsigned int reg, + int lid) +{ + uasm_r_mips_pc16(r, *p, lid); + uasm_i_bgezl(p, reg, 0); +} +UASM_EXPORT_SYMBOL(uasm_il_bgezl); + +void uasm_il_bgez(u32 **p, struct uasm_reloc **r, unsigned int reg, + int lid) +{ + uasm_r_mips_pc16(r, *p, lid); + uasm_i_bgez(p, reg, 0); +} +UASM_EXPORT_SYMBOL(uasm_il_bgez); + +void uasm_il_bbit0(u32 **p, struct uasm_reloc **r, unsigned int reg, + unsigned int bit, int lid) +{ + uasm_r_mips_pc16(r, *p, lid); + uasm_i_bbit0(p, reg, bit, 0); +} +UASM_EXPORT_SYMBOL(uasm_il_bbit0); + +void uasm_il_bbit1(u32 **p, struct uasm_reloc **r, unsigned int reg, + unsigned int bit, int lid) +{ + uasm_r_mips_pc16(r, *p, lid); + uasm_i_bbit1(p, reg, bit, 0); +} +UASM_EXPORT_SYMBOL(uasm_il_bbit1); |