diff options
Diffstat (limited to '')
-rw-r--r-- | arch/arc/mm/Makefile | 11 | ||||
-rw-r--r-- | arch/arc/mm/cache.c | 1332 | ||||
-rw-r--r-- | arch/arc/mm/dma.c | 187 | ||||
-rw-r--r-- | arch/arc/mm/extable.c | 50 | ||||
-rw-r--r-- | arch/arc/mm/fault.c | 238 | ||||
-rw-r--r-- | arch/arc/mm/highmem.c | 141 | ||||
-rw-r--r-- | arch/arc/mm/init.c | 239 | ||||
-rw-r--r-- | arch/arc/mm/ioremap.c | 106 | ||||
-rw-r--r-- | arch/arc/mm/mmap.c | 79 | ||||
-rw-r--r-- | arch/arc/mm/tlb.c | 1008 | ||||
-rw-r--r-- | arch/arc/mm/tlbex.S | 405 |
11 files changed, 3796 insertions, 0 deletions
diff --git a/arch/arc/mm/Makefile b/arch/arc/mm/Makefile new file mode 100644 index 000000000..3703a4969 --- /dev/null +++ b/arch/arc/mm/Makefile @@ -0,0 +1,11 @@ +# +# Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. +# + +obj-y := extable.o ioremap.o dma.o fault.o init.o +obj-y += tlb.o tlbex.o cache.o mmap.o +obj-$(CONFIG_HIGHMEM) += highmem.o diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c new file mode 100644 index 000000000..c5254c596 --- /dev/null +++ b/arch/arc/mm/cache.c @@ -0,0 +1,1332 @@ +/* + * ARC Cache Management + * + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/cache.h> +#include <linux/mmu_context.h> +#include <linux/syscalls.h> +#include <linux/uaccess.h> +#include <linux/pagemap.h> +#include <asm/cacheflush.h> +#include <asm/cachectl.h> +#include <asm/setup.h> + +#ifdef CONFIG_ISA_ARCV2 +#define USE_RGN_FLSH 1 +#endif + +static int l2_line_sz; +static int ioc_exists; +int slc_enable = 1, ioc_enable = 1; +unsigned long perip_base = ARC_UNCACHED_ADDR_SPACE; /* legacy value for boot */ +unsigned long perip_end = 0xFFFFFFFF; /* legacy value */ + +void (*_cache_line_loop_ic_fn)(phys_addr_t paddr, unsigned long vaddr, + unsigned long sz, const int op, const int full_page); + +void (*__dma_cache_wback_inv)(phys_addr_t start, unsigned long sz); +void (*__dma_cache_inv)(phys_addr_t start, unsigned long sz); +void (*__dma_cache_wback)(phys_addr_t start, unsigned long sz); + +char *arc_cache_mumbojumbo(int c, char *buf, int len) +{ + int n = 0; + struct cpuinfo_arc_cache *p; + +#define PR_CACHE(p, cfg, str) \ + if (!(p)->line_len) \ + n += scnprintf(buf + n, len - n, str"\t\t: N/A\n"); \ + else \ + n += scnprintf(buf + n, len - n, \ + str"\t\t: %uK, %dway/set, %uB Line, %s%s%s\n", \ + (p)->sz_k, (p)->assoc, (p)->line_len, \ + (p)->vipt ? "VIPT" : "PIPT", \ + (p)->alias ? " aliasing" : "", \ + IS_USED_CFG(cfg)); + + PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache"); + PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache"); + + p = &cpuinfo_arc700[c].slc; + if (p->line_len) + n += scnprintf(buf + n, len - n, + "SLC\t\t: %uK, %uB Line%s\n", + p->sz_k, p->line_len, IS_USED_RUN(slc_enable)); + + n += scnprintf(buf + n, len - n, "Peripherals\t: %#lx%s%s\n", + perip_base, + IS_AVAIL3(ioc_exists, ioc_enable, ", IO-Coherency (per-device) ")); + + return buf; +} + +/* + * Read the Cache Build Confuration Registers, Decode them and save into + * the cpuinfo structure for later use. + * No Validation done here, simply read/convert the BCRs + */ +static void read_decode_cache_bcr_arcv2(int cpu) +{ + struct cpuinfo_arc_cache *p_slc = &cpuinfo_arc700[cpu].slc; + struct bcr_generic sbcr; + + struct bcr_slc_cfg { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:24, way:2, lsz:2, sz:4; +#else + unsigned int sz:4, lsz:2, way:2, pad:24; +#endif + } slc_cfg; + + struct bcr_clust_cfg { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:7, c:1, num_entries:8, num_cores:8, ver:8; +#else + unsigned int ver:8, num_cores:8, num_entries:8, c:1, pad:7; +#endif + } cbcr; + + struct bcr_volatile { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int start:4, limit:4, pad:22, order:1, disable:1; +#else + unsigned int disable:1, order:1, pad:22, limit:4, start:4; +#endif + } vol; + + + READ_BCR(ARC_REG_SLC_BCR, sbcr); + if (sbcr.ver) { + READ_BCR(ARC_REG_SLC_CFG, slc_cfg); + p_slc->sz_k = 128 << slc_cfg.sz; + l2_line_sz = p_slc->line_len = (slc_cfg.lsz == 0) ? 128 : 64; + } + + READ_BCR(ARC_REG_CLUSTER_BCR, cbcr); + if (cbcr.c) + ioc_exists = 1; + else + ioc_enable = 0; + + /* HS 2.0 didn't have AUX_VOL */ + if (cpuinfo_arc700[cpu].core.family > 0x51) { + READ_BCR(AUX_VOL, vol); + perip_base = vol.start << 28; + /* HS 3.0 has limit and strict-ordering fields */ + if (cpuinfo_arc700[cpu].core.family > 0x52) + perip_end = (vol.limit << 28) - 1; + } +} + +void read_decode_cache_bcr(void) +{ + struct cpuinfo_arc_cache *p_ic, *p_dc; + unsigned int cpu = smp_processor_id(); + struct bcr_cache { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:12, line_len:4, sz:4, config:4, ver:8; +#else + unsigned int ver:8, config:4, sz:4, line_len:4, pad:12; +#endif + } ibcr, dbcr; + + p_ic = &cpuinfo_arc700[cpu].icache; + READ_BCR(ARC_REG_IC_BCR, ibcr); + + if (!ibcr.ver) + goto dc_chk; + + if (ibcr.ver <= 3) { + BUG_ON(ibcr.config != 3); + p_ic->assoc = 2; /* Fixed to 2w set assoc */ + } else if (ibcr.ver >= 4) { + p_ic->assoc = 1 << ibcr.config; /* 1,2,4,8 */ + } + + p_ic->line_len = 8 << ibcr.line_len; + p_ic->sz_k = 1 << (ibcr.sz - 1); + p_ic->vipt = 1; + p_ic->alias = p_ic->sz_k/p_ic->assoc/TO_KB(PAGE_SIZE) > 1; + +dc_chk: + p_dc = &cpuinfo_arc700[cpu].dcache; + READ_BCR(ARC_REG_DC_BCR, dbcr); + + if (!dbcr.ver) + goto slc_chk; + + if (dbcr.ver <= 3) { + BUG_ON(dbcr.config != 2); + p_dc->assoc = 4; /* Fixed to 4w set assoc */ + p_dc->vipt = 1; + p_dc->alias = p_dc->sz_k/p_dc->assoc/TO_KB(PAGE_SIZE) > 1; + } else if (dbcr.ver >= 4) { + p_dc->assoc = 1 << dbcr.config; /* 1,2,4,8 */ + p_dc->vipt = 0; + p_dc->alias = 0; /* PIPT so can't VIPT alias */ + } + + p_dc->line_len = 16 << dbcr.line_len; + p_dc->sz_k = 1 << (dbcr.sz - 1); + +slc_chk: + if (is_isa_arcv2()) + read_decode_cache_bcr_arcv2(cpu); +} + +/* + * Line Operation on {I,D}-Cache + */ + +#define OP_INV 0x1 +#define OP_FLUSH 0x2 +#define OP_FLUSH_N_INV 0x3 +#define OP_INV_IC 0x4 + +/* + * I-Cache Aliasing in ARC700 VIPT caches (MMU v1-v3) + * + * ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag. + * The orig Cache Management Module "CDU" only required paddr to invalidate a + * certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry. + * Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching + * the exact same line. + * + * However for larger Caches (way-size > page-size) - i.e. in Aliasing config, + * paddr alone could not be used to correctly index the cache. + * + * ------------------ + * MMU v1/v2 (Fixed Page Size 8k) + * ------------------ + * The solution was to provide CDU with these additonal vaddr bits. These + * would be bits [x:13], x would depend on cache-geometry, 13 comes from + * standard page size of 8k. + * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits + * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the + * orig 5 bits of paddr were anyways ignored by CDU line ops, as they + * represent the offset within cache-line. The adv of using this "clumsy" + * interface for additional info was no new reg was needed in CDU programming + * model. + * + * 17:13 represented the max num of bits passable, actual bits needed were + * fewer, based on the num-of-aliases possible. + * -for 2 alias possibility, only bit 13 needed (32K cache) + * -for 4 alias possibility, bits 14:13 needed (64K cache) + * + * ------------------ + * MMU v3 + * ------------------ + * This ver of MMU supports variable page sizes (1k-16k): although Linux will + * only support 8k (default), 16k and 4k. + * However from hardware perspective, smaller page sizes aggravate aliasing + * meaning more vaddr bits needed to disambiguate the cache-line-op ; + * the existing scheme of piggybacking won't work for certain configurations. + * Two new registers IC_PTAG and DC_PTAG inttoduced. + * "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs + */ + +static inline +void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr, + unsigned long sz, const int op, const int full_page) +{ + unsigned int aux_cmd; + int num_lines; + + if (op == OP_INV_IC) { + aux_cmd = ARC_REG_IC_IVIL; + } else { + /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */ + aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL; + } + + /* Ensure we properly floor/ceil the non-line aligned/sized requests + * and have @paddr - aligned to cache line and integral @num_lines. + * This however can be avoided for page sized since: + * -@paddr will be cache-line aligned already (being page aligned) + * -@sz will be integral multiple of line size (being page sized). + */ + if (!full_page) { + sz += paddr & ~CACHE_LINE_MASK; + paddr &= CACHE_LINE_MASK; + vaddr &= CACHE_LINE_MASK; + } + + num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES); + + /* MMUv2 and before: paddr contains stuffed vaddrs bits */ + paddr |= (vaddr >> PAGE_SHIFT) & 0x1F; + + while (num_lines-- > 0) { + write_aux_reg(aux_cmd, paddr); + paddr += L1_CACHE_BYTES; + } +} + +/* + * For ARC700 MMUv3 I-cache and D-cache flushes + * - ARC700 programming model requires paddr and vaddr be passed in seperate + * AUX registers (*_IV*L and *_PTAG respectively) irrespective of whether the + * caches actually alias or not. + * - For HS38, only the aliasing I-cache configuration uses the PTAG reg + * (non aliasing I-cache version doesn't; while D-cache can't possibly alias) + */ +static inline +void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr, + unsigned long sz, const int op, const int full_page) +{ + unsigned int aux_cmd, aux_tag; + int num_lines; + + if (op == OP_INV_IC) { + aux_cmd = ARC_REG_IC_IVIL; + aux_tag = ARC_REG_IC_PTAG; + } else { + aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL; + aux_tag = ARC_REG_DC_PTAG; + } + + /* Ensure we properly floor/ceil the non-line aligned/sized requests + * and have @paddr - aligned to cache line and integral @num_lines. + * This however can be avoided for page sized since: + * -@paddr will be cache-line aligned already (being page aligned) + * -@sz will be integral multiple of line size (being page sized). + */ + if (!full_page) { + sz += paddr & ~CACHE_LINE_MASK; + paddr &= CACHE_LINE_MASK; + vaddr &= CACHE_LINE_MASK; + } + num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES); + + /* + * MMUv3, cache ops require paddr in PTAG reg + * if V-P const for loop, PTAG can be written once outside loop + */ + if (full_page) + write_aux_reg(aux_tag, paddr); + + /* + * This is technically for MMU v4, using the MMU v3 programming model + * Special work for HS38 aliasing I-cache configuration with PAE40 + * - upper 8 bits of paddr need to be written into PTAG_HI + * - (and needs to be written before the lower 32 bits) + * Note that PTAG_HI is hoisted outside the line loop + */ + if (is_pae40_enabled() && op == OP_INV_IC) + write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32); + + while (num_lines-- > 0) { + if (!full_page) { + write_aux_reg(aux_tag, paddr); + paddr += L1_CACHE_BYTES; + } + + write_aux_reg(aux_cmd, vaddr); + vaddr += L1_CACHE_BYTES; + } +} + +#ifndef USE_RGN_FLSH + +/* + * In HS38x (MMU v4), I-cache is VIPT (can alias), D-cache is PIPT + * Here's how cache ops are implemented + * + * - D-cache: only paddr needed (in DC_IVDL/DC_FLDL) + * - I-cache Non Aliasing: Despite VIPT, only paddr needed (in IC_IVIL) + * - I-cache Aliasing: Both vaddr and paddr needed (in IC_IVIL, IC_PTAG + * respectively, similar to MMU v3 programming model, hence + * __cache_line_loop_v3() is used) + * + * If PAE40 is enabled, independent of aliasing considerations, the higher bits + * needs to be written into PTAG_HI + */ +static inline +void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr, + unsigned long sz, const int op, const int full_page) +{ + unsigned int aux_cmd; + int num_lines; + + if (op == OP_INV_IC) { + aux_cmd = ARC_REG_IC_IVIL; + } else { + /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */ + aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL; + } + + /* Ensure we properly floor/ceil the non-line aligned/sized requests + * and have @paddr - aligned to cache line and integral @num_lines. + * This however can be avoided for page sized since: + * -@paddr will be cache-line aligned already (being page aligned) + * -@sz will be integral multiple of line size (being page sized). + */ + if (!full_page) { + sz += paddr & ~CACHE_LINE_MASK; + paddr &= CACHE_LINE_MASK; + } + + num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES); + + /* + * For HS38 PAE40 configuration + * - upper 8 bits of paddr need to be written into PTAG_HI + * - (and needs to be written before the lower 32 bits) + */ + if (is_pae40_enabled()) { + if (op == OP_INV_IC) + /* + * Non aliasing I-cache in HS38, + * aliasing I-cache handled in __cache_line_loop_v3() + */ + write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32); + else + write_aux_reg(ARC_REG_DC_PTAG_HI, (u64)paddr >> 32); + } + + while (num_lines-- > 0) { + write_aux_reg(aux_cmd, paddr); + paddr += L1_CACHE_BYTES; + } +} + +#else + +/* + * optimized flush operation which takes a region as opposed to iterating per line + */ +static inline +void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr, + unsigned long sz, const int op, const int full_page) +{ + unsigned int s, e; + + /* Only for Non aliasing I-cache in HS38 */ + if (op == OP_INV_IC) { + s = ARC_REG_IC_IVIR; + e = ARC_REG_IC_ENDR; + } else { + s = ARC_REG_DC_STARTR; + e = ARC_REG_DC_ENDR; + } + + if (!full_page) { + /* for any leading gap between @paddr and start of cache line */ + sz += paddr & ~CACHE_LINE_MASK; + paddr &= CACHE_LINE_MASK; + + /* + * account for any trailing gap to end of cache line + * this is equivalent to DIV_ROUND_UP() in line ops above + */ + sz += L1_CACHE_BYTES - 1; + } + + if (is_pae40_enabled()) { + /* TBD: check if crossing 4TB boundary */ + if (op == OP_INV_IC) + write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32); + else + write_aux_reg(ARC_REG_DC_PTAG_HI, (u64)paddr >> 32); + } + + /* ENDR needs to be set ahead of START */ + write_aux_reg(e, paddr + sz); /* ENDR is exclusive */ + write_aux_reg(s, paddr); + + /* caller waits on DC_CTRL.FS */ +} + +#endif + +#if (CONFIG_ARC_MMU_VER < 3) +#define __cache_line_loop __cache_line_loop_v2 +#elif (CONFIG_ARC_MMU_VER == 3) +#define __cache_line_loop __cache_line_loop_v3 +#elif (CONFIG_ARC_MMU_VER > 3) +#define __cache_line_loop __cache_line_loop_v4 +#endif + +#ifdef CONFIG_ARC_HAS_DCACHE + +/*************************************************************** + * Machine specific helpers for Entire D-Cache or Per Line ops + */ + +#ifndef USE_RGN_FLSH +/* + * this version avoids extra read/write of DC_CTRL for flush or invalid ops + * in the non region flush regime (such as for ARCompact) + */ +static inline void __before_dc_op(const int op) +{ + if (op == OP_FLUSH_N_INV) { + /* Dcache provides 2 cmd: FLUSH or INV + * INV inturn has sub-modes: DISCARD or FLUSH-BEFORE + * flush-n-inv is achieved by INV cmd but with IM=1 + * So toggle INV sub-mode depending on op request and default + */ + const unsigned int ctl = ARC_REG_DC_CTRL; + write_aux_reg(ctl, read_aux_reg(ctl) | DC_CTRL_INV_MODE_FLUSH); + } +} + +#else + +static inline void __before_dc_op(const int op) +{ + const unsigned int ctl = ARC_REG_DC_CTRL; + unsigned int val = read_aux_reg(ctl); + + if (op == OP_FLUSH_N_INV) { + val |= DC_CTRL_INV_MODE_FLUSH; + } + + if (op != OP_INV_IC) { + /* + * Flush / Invalidate is provided by DC_CTRL.RNG_OP 0 or 1 + * combined Flush-n-invalidate uses DC_CTRL.IM = 1 set above + */ + val &= ~DC_CTRL_RGN_OP_MSK; + if (op & OP_INV) + val |= DC_CTRL_RGN_OP_INV; + } + write_aux_reg(ctl, val); +} + +#endif + + +static inline void __after_dc_op(const int op) +{ + if (op & OP_FLUSH) { + const unsigned int ctl = ARC_REG_DC_CTRL; + unsigned int reg; + + /* flush / flush-n-inv both wait */ + while ((reg = read_aux_reg(ctl)) & DC_CTRL_FLUSH_STATUS) + ; + + /* Switch back to default Invalidate mode */ + if (op == OP_FLUSH_N_INV) + write_aux_reg(ctl, reg & ~DC_CTRL_INV_MODE_FLUSH); + } +} + +/* + * Operation on Entire D-Cache + * @op = {OP_INV, OP_FLUSH, OP_FLUSH_N_INV} + * Note that constant propagation ensures all the checks are gone + * in generated code + */ +static inline void __dc_entire_op(const int op) +{ + int aux; + + __before_dc_op(op); + + if (op & OP_INV) /* Inv or flush-n-inv use same cmd reg */ + aux = ARC_REG_DC_IVDC; + else + aux = ARC_REG_DC_FLSH; + + write_aux_reg(aux, 0x1); + + __after_dc_op(op); +} + +static inline void __dc_disable(void) +{ + const int r = ARC_REG_DC_CTRL; + + __dc_entire_op(OP_FLUSH_N_INV); + write_aux_reg(r, read_aux_reg(r) | DC_CTRL_DIS); +} + +static void __dc_enable(void) +{ + const int r = ARC_REG_DC_CTRL; + + write_aux_reg(r, read_aux_reg(r) & ~DC_CTRL_DIS); +} + +/* For kernel mappings cache operation: index is same as paddr */ +#define __dc_line_op_k(p, sz, op) __dc_line_op(p, p, sz, op) + +/* + * D-Cache Line ops: Per Line INV (discard or wback+discard) or FLUSH (wback) + */ +static inline void __dc_line_op(phys_addr_t paddr, unsigned long vaddr, + unsigned long sz, const int op) +{ + const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE; + unsigned long flags; + + local_irq_save(flags); + + __before_dc_op(op); + + __cache_line_loop(paddr, vaddr, sz, op, full_page); + + __after_dc_op(op); + + local_irq_restore(flags); +} + +#else + +#define __dc_entire_op(op) +#define __dc_disable() +#define __dc_enable() +#define __dc_line_op(paddr, vaddr, sz, op) +#define __dc_line_op_k(paddr, sz, op) + +#endif /* CONFIG_ARC_HAS_DCACHE */ + +#ifdef CONFIG_ARC_HAS_ICACHE + +static inline void __ic_entire_inv(void) +{ + write_aux_reg(ARC_REG_IC_IVIC, 1); + read_aux_reg(ARC_REG_IC_CTRL); /* blocks */ +} + +static inline void +__ic_line_inv_vaddr_local(phys_addr_t paddr, unsigned long vaddr, + unsigned long sz) +{ + const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE; + unsigned long flags; + + local_irq_save(flags); + (*_cache_line_loop_ic_fn)(paddr, vaddr, sz, OP_INV_IC, full_page); + local_irq_restore(flags); +} + +#ifndef CONFIG_SMP + +#define __ic_line_inv_vaddr(p, v, s) __ic_line_inv_vaddr_local(p, v, s) + +#else + +struct ic_inv_args { + phys_addr_t paddr, vaddr; + int sz; +}; + +static void __ic_line_inv_vaddr_helper(void *info) +{ + struct ic_inv_args *ic_inv = info; + + __ic_line_inv_vaddr_local(ic_inv->paddr, ic_inv->vaddr, ic_inv->sz); +} + +static void __ic_line_inv_vaddr(phys_addr_t paddr, unsigned long vaddr, + unsigned long sz) +{ + struct ic_inv_args ic_inv = { + .paddr = paddr, + .vaddr = vaddr, + .sz = sz + }; + + on_each_cpu(__ic_line_inv_vaddr_helper, &ic_inv, 1); +} + +#endif /* CONFIG_SMP */ + +#else /* !CONFIG_ARC_HAS_ICACHE */ + +#define __ic_entire_inv() +#define __ic_line_inv_vaddr(pstart, vstart, sz) + +#endif /* CONFIG_ARC_HAS_ICACHE */ + +noinline void slc_op_rgn(phys_addr_t paddr, unsigned long sz, const int op) +{ +#ifdef CONFIG_ISA_ARCV2 + /* + * SLC is shared between all cores and concurrent aux operations from + * multiple cores need to be serialized using a spinlock + * A concurrent operation can be silently ignored and/or the old/new + * operation can remain incomplete forever (lockup in SLC_CTRL_BUSY loop + * below) + */ + static DEFINE_SPINLOCK(lock); + unsigned long flags; + unsigned int ctrl; + phys_addr_t end; + + spin_lock_irqsave(&lock, flags); + + /* + * The Region Flush operation is specified by CTRL.RGN_OP[11..9] + * - b'000 (default) is Flush, + * - b'001 is Invalidate if CTRL.IM == 0 + * - b'001 is Flush-n-Invalidate if CTRL.IM == 1 + */ + ctrl = read_aux_reg(ARC_REG_SLC_CTRL); + + /* Don't rely on default value of IM bit */ + if (!(op & OP_FLUSH)) /* i.e. OP_INV */ + ctrl &= ~SLC_CTRL_IM; /* clear IM: Disable flush before Inv */ + else + ctrl |= SLC_CTRL_IM; + + if (op & OP_INV) + ctrl |= SLC_CTRL_RGN_OP_INV; /* Inv or flush-n-inv */ + else + ctrl &= ~SLC_CTRL_RGN_OP_INV; + + write_aux_reg(ARC_REG_SLC_CTRL, ctrl); + + /* + * Lower bits are ignored, no need to clip + * END needs to be setup before START (latter triggers the operation) + * END can't be same as START, so add (l2_line_sz - 1) to sz + */ + end = paddr + sz + l2_line_sz - 1; + if (is_pae40_enabled()) + write_aux_reg(ARC_REG_SLC_RGN_END1, upper_32_bits(end)); + + write_aux_reg(ARC_REG_SLC_RGN_END, lower_32_bits(end)); + + if (is_pae40_enabled()) + write_aux_reg(ARC_REG_SLC_RGN_START1, upper_32_bits(paddr)); + + write_aux_reg(ARC_REG_SLC_RGN_START, lower_32_bits(paddr)); + + /* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */ + read_aux_reg(ARC_REG_SLC_CTRL); + + while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY); + + spin_unlock_irqrestore(&lock, flags); +#endif +} + +noinline void slc_op_line(phys_addr_t paddr, unsigned long sz, const int op) +{ +#ifdef CONFIG_ISA_ARCV2 + /* + * SLC is shared between all cores and concurrent aux operations from + * multiple cores need to be serialized using a spinlock + * A concurrent operation can be silently ignored and/or the old/new + * operation can remain incomplete forever (lockup in SLC_CTRL_BUSY loop + * below) + */ + static DEFINE_SPINLOCK(lock); + + const unsigned long SLC_LINE_MASK = ~(l2_line_sz - 1); + unsigned int ctrl, cmd; + unsigned long flags; + int num_lines; + + spin_lock_irqsave(&lock, flags); + + ctrl = read_aux_reg(ARC_REG_SLC_CTRL); + + /* Don't rely on default value of IM bit */ + if (!(op & OP_FLUSH)) /* i.e. OP_INV */ + ctrl &= ~SLC_CTRL_IM; /* clear IM: Disable flush before Inv */ + else + ctrl |= SLC_CTRL_IM; + + write_aux_reg(ARC_REG_SLC_CTRL, ctrl); + + cmd = op & OP_INV ? ARC_AUX_SLC_IVDL : ARC_AUX_SLC_FLDL; + + sz += paddr & ~SLC_LINE_MASK; + paddr &= SLC_LINE_MASK; + + num_lines = DIV_ROUND_UP(sz, l2_line_sz); + + while (num_lines-- > 0) { + write_aux_reg(cmd, paddr); + paddr += l2_line_sz; + } + + /* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */ + read_aux_reg(ARC_REG_SLC_CTRL); + + while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY); + + spin_unlock_irqrestore(&lock, flags); +#endif +} + +#define slc_op(paddr, sz, op) slc_op_rgn(paddr, sz, op) + +noinline static void slc_entire_op(const int op) +{ + unsigned int ctrl, r = ARC_REG_SLC_CTRL; + + ctrl = read_aux_reg(r); + + if (!(op & OP_FLUSH)) /* i.e. OP_INV */ + ctrl &= ~SLC_CTRL_IM; /* clear IM: Disable flush before Inv */ + else + ctrl |= SLC_CTRL_IM; + + write_aux_reg(r, ctrl); + + if (op & OP_INV) /* Inv or flush-n-inv use same cmd reg */ + write_aux_reg(ARC_REG_SLC_INVALIDATE, 0x1); + else + write_aux_reg(ARC_REG_SLC_FLUSH, 0x1); + + /* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */ + read_aux_reg(r); + + /* Important to wait for flush to complete */ + while (read_aux_reg(r) & SLC_CTRL_BUSY); +} + +static inline void arc_slc_disable(void) +{ + const int r = ARC_REG_SLC_CTRL; + + slc_entire_op(OP_FLUSH_N_INV); + write_aux_reg(r, read_aux_reg(r) | SLC_CTRL_DIS); +} + +static inline void arc_slc_enable(void) +{ + const int r = ARC_REG_SLC_CTRL; + + write_aux_reg(r, read_aux_reg(r) & ~SLC_CTRL_DIS); +} + +/*********************************************************** + * Exported APIs + */ + +/* + * Handle cache congruency of kernel and userspace mappings of page when kernel + * writes-to/reads-from + * + * The idea is to defer flushing of kernel mapping after a WRITE, possible if: + * -dcache is NOT aliasing, hence any U/K-mappings of page are congruent + * -U-mapping doesn't exist yet for page (finalised in update_mmu_cache) + * -In SMP, if hardware caches are coherent + * + * There's a corollary case, where kernel READs from a userspace mapped page. + * If the U-mapping is not congruent to to K-mapping, former needs flushing. + */ +void flush_dcache_page(struct page *page) +{ + struct address_space *mapping; + + if (!cache_is_vipt_aliasing()) { + clear_bit(PG_dc_clean, &page->flags); + return; + } + + /* don't handle anon pages here */ + mapping = page_mapping_file(page); + if (!mapping) + return; + + /* + * pagecache page, file not yet mapped to userspace + * Make a note that K-mapping is dirty + */ + if (!mapping_mapped(mapping)) { + clear_bit(PG_dc_clean, &page->flags); + } else if (page_mapcount(page)) { + + /* kernel reading from page with U-mapping */ + phys_addr_t paddr = (unsigned long)page_address(page); + unsigned long vaddr = page->index << PAGE_SHIFT; + + if (addr_not_cache_congruent(paddr, vaddr)) + __flush_dcache_page(paddr, vaddr); + } +} +EXPORT_SYMBOL(flush_dcache_page); + +/* + * DMA ops for systems with L1 cache only + * Make memory coherent with L1 cache by flushing/invalidating L1 lines + */ +static void __dma_cache_wback_inv_l1(phys_addr_t start, unsigned long sz) +{ + __dc_line_op_k(start, sz, OP_FLUSH_N_INV); +} + +static void __dma_cache_inv_l1(phys_addr_t start, unsigned long sz) +{ + __dc_line_op_k(start, sz, OP_INV); +} + +static void __dma_cache_wback_l1(phys_addr_t start, unsigned long sz) +{ + __dc_line_op_k(start, sz, OP_FLUSH); +} + +/* + * DMA ops for systems with both L1 and L2 caches, but without IOC + * Both L1 and L2 lines need to be explicitly flushed/invalidated + */ +static void __dma_cache_wback_inv_slc(phys_addr_t start, unsigned long sz) +{ + __dc_line_op_k(start, sz, OP_FLUSH_N_INV); + slc_op(start, sz, OP_FLUSH_N_INV); +} + +static void __dma_cache_inv_slc(phys_addr_t start, unsigned long sz) +{ + __dc_line_op_k(start, sz, OP_INV); + slc_op(start, sz, OP_INV); +} + +static void __dma_cache_wback_slc(phys_addr_t start, unsigned long sz) +{ + __dc_line_op_k(start, sz, OP_FLUSH); + slc_op(start, sz, OP_FLUSH); +} + +/* + * Exported DMA API + */ +void dma_cache_wback_inv(phys_addr_t start, unsigned long sz) +{ + __dma_cache_wback_inv(start, sz); +} +EXPORT_SYMBOL(dma_cache_wback_inv); + +void dma_cache_inv(phys_addr_t start, unsigned long sz) +{ + __dma_cache_inv(start, sz); +} +EXPORT_SYMBOL(dma_cache_inv); + +void dma_cache_wback(phys_addr_t start, unsigned long sz) +{ + __dma_cache_wback(start, sz); +} +EXPORT_SYMBOL(dma_cache_wback); + +/* + * This is API for making I/D Caches consistent when modifying + * kernel code (loadable modules, kprobes, kgdb...) + * This is called on insmod, with kernel virtual address for CODE of + * the module. ARC cache maintenance ops require PHY address thus we + * need to convert vmalloc addr to PHY addr + */ +void flush_icache_range(unsigned long kstart, unsigned long kend) +{ + unsigned int tot_sz; + + WARN(kstart < TASK_SIZE, "%s() can't handle user vaddr", __func__); + + /* Shortcut for bigger flush ranges. + * Here we don't care if this was kernel virtual or phy addr + */ + tot_sz = kend - kstart; + if (tot_sz > PAGE_SIZE) { + flush_cache_all(); + return; + } + + /* Case: Kernel Phy addr (0x8000_0000 onwards) */ + if (likely(kstart > PAGE_OFFSET)) { + /* + * The 2nd arg despite being paddr will be used to index icache + * This is OK since no alternate virtual mappings will exist + * given the callers for this case: kprobe/kgdb in built-in + * kernel code only. + */ + __sync_icache_dcache(kstart, kstart, kend - kstart); + return; + } + + /* + * Case: Kernel Vaddr (0x7000_0000 to 0x7fff_ffff) + * (1) ARC Cache Maintenance ops only take Phy addr, hence special + * handling of kernel vaddr. + * + * (2) Despite @tot_sz being < PAGE_SIZE (bigger cases handled already), + * it still needs to handle a 2 page scenario, where the range + * straddles across 2 virtual pages and hence need for loop + */ + while (tot_sz > 0) { + unsigned int off, sz; + unsigned long phy, pfn; + + off = kstart % PAGE_SIZE; + pfn = vmalloc_to_pfn((void *)kstart); + phy = (pfn << PAGE_SHIFT) + off; + sz = min_t(unsigned int, tot_sz, PAGE_SIZE - off); + __sync_icache_dcache(phy, kstart, sz); + kstart += sz; + tot_sz -= sz; + } +} +EXPORT_SYMBOL(flush_icache_range); + +/* + * General purpose helper to make I and D cache lines consistent. + * @paddr is phy addr of region + * @vaddr is typically user vaddr (breakpoint) or kernel vaddr (vmalloc) + * However in one instance, when called by kprobe (for a breakpt in + * builtin kernel code) @vaddr will be paddr only, meaning CDU operation will + * use a paddr to index the cache (despite VIPT). This is fine since since a + * builtin kernel page will not have any virtual mappings. + * kprobe on loadable module will be kernel vaddr. + */ +void __sync_icache_dcache(phys_addr_t paddr, unsigned long vaddr, int len) +{ + __dc_line_op(paddr, vaddr, len, OP_FLUSH_N_INV); + __ic_line_inv_vaddr(paddr, vaddr, len); +} + +/* wrapper to compile time eliminate alignment checks in flush loop */ +void __inv_icache_page(phys_addr_t paddr, unsigned long vaddr) +{ + __ic_line_inv_vaddr(paddr, vaddr, PAGE_SIZE); +} + +/* + * wrapper to clearout kernel or userspace mappings of a page + * For kernel mappings @vaddr == @paddr + */ +void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr) +{ + __dc_line_op(paddr, vaddr & PAGE_MASK, PAGE_SIZE, OP_FLUSH_N_INV); +} + +noinline void flush_cache_all(void) +{ + unsigned long flags; + + local_irq_save(flags); + + __ic_entire_inv(); + __dc_entire_op(OP_FLUSH_N_INV); + + local_irq_restore(flags); + +} + +#ifdef CONFIG_ARC_CACHE_VIPT_ALIASING + +void flush_cache_mm(struct mm_struct *mm) +{ + flush_cache_all(); +} + +void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr, + unsigned long pfn) +{ + phys_addr_t paddr = pfn << PAGE_SHIFT; + + u_vaddr &= PAGE_MASK; + + __flush_dcache_page(paddr, u_vaddr); + + if (vma->vm_flags & VM_EXEC) + __inv_icache_page(paddr, u_vaddr); +} + +void flush_cache_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + flush_cache_all(); +} + +void flush_anon_page(struct vm_area_struct *vma, struct page *page, + unsigned long u_vaddr) +{ + /* TBD: do we really need to clear the kernel mapping */ + __flush_dcache_page((phys_addr_t)page_address(page), u_vaddr); + __flush_dcache_page((phys_addr_t)page_address(page), + (phys_addr_t)page_address(page)); + +} + +#endif + +void copy_user_highpage(struct page *to, struct page *from, + unsigned long u_vaddr, struct vm_area_struct *vma) +{ + void *kfrom = kmap_atomic(from); + void *kto = kmap_atomic(to); + int clean_src_k_mappings = 0; + + /* + * If SRC page was already mapped in userspace AND it's U-mapping is + * not congruent with K-mapping, sync former to physical page so that + * K-mapping in memcpy below, sees the right data + * + * Note that while @u_vaddr refers to DST page's userspace vaddr, it is + * equally valid for SRC page as well + * + * For !VIPT cache, all of this gets compiled out as + * addr_not_cache_congruent() is 0 + */ + if (page_mapcount(from) && addr_not_cache_congruent(kfrom, u_vaddr)) { + __flush_dcache_page((unsigned long)kfrom, u_vaddr); + clean_src_k_mappings = 1; + } + + copy_page(kto, kfrom); + + /* + * Mark DST page K-mapping as dirty for a later finalization by + * update_mmu_cache(). Although the finalization could have been done + * here as well (given that both vaddr/paddr are available). + * But update_mmu_cache() already has code to do that for other + * non copied user pages (e.g. read faults which wire in pagecache page + * directly). + */ + clear_bit(PG_dc_clean, &to->flags); + + /* + * if SRC was already usermapped and non-congruent to kernel mapping + * sync the kernel mapping back to physical page + */ + if (clean_src_k_mappings) { + __flush_dcache_page((unsigned long)kfrom, (unsigned long)kfrom); + set_bit(PG_dc_clean, &from->flags); + } else { + clear_bit(PG_dc_clean, &from->flags); + } + + kunmap_atomic(kto); + kunmap_atomic(kfrom); +} + +void clear_user_page(void *to, unsigned long u_vaddr, struct page *page) +{ + clear_page(to); + clear_bit(PG_dc_clean, &page->flags); +} +EXPORT_SYMBOL(clear_user_page); + +/********************************************************************** + * Explicit Cache flush request from user space via syscall + * Needed for JITs which generate code on the fly + */ +SYSCALL_DEFINE3(cacheflush, uint32_t, start, uint32_t, sz, uint32_t, flags) +{ + /* TBD: optimize this */ + flush_cache_all(); + return 0; +} + +/* + * IO-Coherency (IOC) setup rules: + * + * 1. Needs to be at system level, so only once by Master core + * Non-Masters need not be accessing caches at that time + * - They are either HALT_ON_RESET and kick started much later or + * - if run on reset, need to ensure that arc_platform_smp_wait_to_boot() + * doesn't perturb caches or coherency unit + * + * 2. caches (L1 and SLC) need to be purged (flush+inv) before setting up IOC, + * otherwise any straggler data might behave strangely post IOC enabling + * + * 3. All Caches need to be disabled when setting up IOC to elide any in-flight + * Coherency transactions + */ +noinline void __init arc_ioc_setup(void) +{ + unsigned int ioc_base, mem_sz; + + /* + * If IOC was already enabled (due to bootloader) it technically needs to + * be reconfigured with aperture base,size corresponding to Linux memory map + * which will certainly be different than uboot's. But disabling and + * reenabling IOC when DMA might be potentially active is tricky business. + * To avoid random memory issues later, just panic here and ask user to + * upgrade bootloader to one which doesn't enable IOC + */ + if (read_aux_reg(ARC_REG_IO_COH_ENABLE) & ARC_IO_COH_ENABLE_BIT) + panic("IOC already enabled, please upgrade bootloader!\n"); + + if (!ioc_enable) + return; + + /* + * As for today we don't support both IOC and ZONE_HIGHMEM enabled + * simultaneously. This happens because as of today IOC aperture covers + * only ZONE_NORMAL (low mem) and any dma transactions outside this + * region won't be HW coherent. + * If we want to use both IOC and ZONE_HIGHMEM we can use + * bounce_buffer to handle dma transactions to HIGHMEM. + * Also it is possible to modify dma_direct cache ops or increase IOC + * aperture size if we are planning to use HIGHMEM without PAE. + */ + if (IS_ENABLED(CONFIG_HIGHMEM)) + panic("IOC and HIGHMEM can't be used simultaneously"); + + /* Flush + invalidate + disable L1 dcache */ + __dc_disable(); + + /* Flush + invalidate SLC */ + if (read_aux_reg(ARC_REG_SLC_BCR)) + slc_entire_op(OP_FLUSH_N_INV); + + /* + * currently IOC Aperture covers entire DDR + * TBD: fix for PGU + 1GB of low mem + * TBD: fix for PAE + */ + mem_sz = arc_get_mem_sz(); + + if (!is_power_of_2(mem_sz) || mem_sz < 4096) + panic("IOC Aperture size must be power of 2 larger than 4KB"); + + /* + * IOC Aperture size decoded as 2 ^ (SIZE + 2) KB, + * so setting 0x11 implies 512MB, 0x12 implies 1GB... + */ + write_aux_reg(ARC_REG_IO_COH_AP0_SIZE, order_base_2(mem_sz >> 10) - 2); + + /* for now assume kernel base is start of IOC aperture */ + ioc_base = CONFIG_LINUX_RAM_BASE; + + if (ioc_base % mem_sz != 0) + panic("IOC Aperture start must be aligned to the size of the aperture"); + + write_aux_reg(ARC_REG_IO_COH_AP0_BASE, ioc_base >> 12); + write_aux_reg(ARC_REG_IO_COH_PARTIAL, ARC_IO_COH_PARTIAL_BIT); + write_aux_reg(ARC_REG_IO_COH_ENABLE, ARC_IO_COH_ENABLE_BIT); + + /* Re-enable L1 dcache */ + __dc_enable(); +} + +/* + * Cache related boot time checks/setups only needed on master CPU: + * - Geometry checks (kernel build and hardware agree: e.g. L1_CACHE_BYTES) + * Assume SMP only, so all cores will have same cache config. A check on + * one core suffices for all + * - IOC setup / dma callbacks only need to be done once + */ +void __init arc_cache_init_master(void) +{ + unsigned int __maybe_unused cpu = smp_processor_id(); + + if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) { + struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache; + + if (!ic->line_len) + panic("cache support enabled but non-existent cache\n"); + + if (ic->line_len != L1_CACHE_BYTES) + panic("ICache line [%d] != kernel Config [%d]", + ic->line_len, L1_CACHE_BYTES); + + /* + * In MMU v4 (HS38x) the aliasing icache config uses IVIL/PTAG + * pair to provide vaddr/paddr respectively, just as in MMU v3 + */ + if (is_isa_arcv2() && ic->alias) + _cache_line_loop_ic_fn = __cache_line_loop_v3; + else + _cache_line_loop_ic_fn = __cache_line_loop; + } + + if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) { + struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache; + + if (!dc->line_len) + panic("cache support enabled but non-existent cache\n"); + + if (dc->line_len != L1_CACHE_BYTES) + panic("DCache line [%d] != kernel Config [%d]", + dc->line_len, L1_CACHE_BYTES); + + /* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */ + if (is_isa_arcompact()) { + int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING); + int num_colors = dc->sz_k/dc->assoc/TO_KB(PAGE_SIZE); + + if (dc->alias) { + if (!handled) + panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); + if (CACHE_COLORS_NUM != num_colors) + panic("CACHE_COLORS_NUM not optimized for config\n"); + } else if (!dc->alias && handled) { + panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); + } + } + } + + /* + * Check that SMP_CACHE_BYTES (and hence ARCH_DMA_MINALIGN) is larger + * or equal to any cache line length. + */ + BUILD_BUG_ON_MSG(L1_CACHE_BYTES > SMP_CACHE_BYTES, + "SMP_CACHE_BYTES must be >= any cache line length"); + if (is_isa_arcv2() && (l2_line_sz > SMP_CACHE_BYTES)) + panic("L2 Cache line [%d] > kernel Config [%d]\n", + l2_line_sz, SMP_CACHE_BYTES); + + /* Note that SLC disable not formally supported till HS 3.0 */ + if (is_isa_arcv2() && l2_line_sz && !slc_enable) + arc_slc_disable(); + + if (is_isa_arcv2() && ioc_exists) + arc_ioc_setup(); + + if (is_isa_arcv2() && l2_line_sz && slc_enable) { + __dma_cache_wback_inv = __dma_cache_wback_inv_slc; + __dma_cache_inv = __dma_cache_inv_slc; + __dma_cache_wback = __dma_cache_wback_slc; + } else { + __dma_cache_wback_inv = __dma_cache_wback_inv_l1; + __dma_cache_inv = __dma_cache_inv_l1; + __dma_cache_wback = __dma_cache_wback_l1; + } + /* + * In case of IOC (say IOC+SLC case), pointers above could still be set + * but end up not being relevant as the first function in chain is not + * called at all for @dma_direct_ops + * arch_sync_dma_for_cpu() -> dma_cache_*() -> __dma_cache_*() + */ +} + +void __ref arc_cache_init(void) +{ + unsigned int __maybe_unused cpu = smp_processor_id(); + char str[256]; + + pr_info("%s", arc_cache_mumbojumbo(0, str, sizeof(str))); + + if (!cpu) + arc_cache_init_master(); + + /* + * In PAE regime, TLB and cache maintenance ops take wider addresses + * And even if PAE is not enabled in kernel, the upper 32-bits still need + * to be zeroed to keep the ops sane. + * As an optimization for more common !PAE enabled case, zero them out + * once at init, rather than checking/setting to 0 for every runtime op + */ + if (is_isa_arcv2() && pae40_exist_but_not_enab()) { + + if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) + write_aux_reg(ARC_REG_IC_PTAG_HI, 0); + + if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) + write_aux_reg(ARC_REG_DC_PTAG_HI, 0); + + if (l2_line_sz) { + write_aux_reg(ARC_REG_SLC_RGN_END1, 0); + write_aux_reg(ARC_REG_SLC_RGN_START1, 0); + } + } +} diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c new file mode 100644 index 000000000..c75d5c347 --- /dev/null +++ b/arch/arc/mm/dma.c @@ -0,0 +1,187 @@ +/* + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/dma-noncoherent.h> +#include <asm/cache.h> +#include <asm/cacheflush.h> + +/* + * ARCH specific callbacks for generic noncoherent DMA ops (dma/noncoherent.c) + * - hardware IOC not available (or "dma-coherent" not set for device in DT) + * - But still handle both coherent and non-coherent requests from caller + * + * For DMA coherent hardware (IOC) generic code suffices + */ +void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t gfp, unsigned long attrs) +{ + unsigned long order = get_order(size); + struct page *page; + phys_addr_t paddr; + void *kvaddr; + bool need_coh = !(attrs & DMA_ATTR_NON_CONSISTENT); + + /* + * __GFP_HIGHMEM flag is cleared by upper layer functions + * (in include/linux/dma-mapping.h) so we should never get a + * __GFP_HIGHMEM here. + */ + BUG_ON(gfp & __GFP_HIGHMEM); + + page = alloc_pages(gfp, order); + if (!page) + return NULL; + + /* This is linear addr (0x8000_0000 based) */ + paddr = page_to_phys(page); + + *dma_handle = paddr; + + /* + * A coherent buffer needs MMU mapping to enforce non-cachability. + * kvaddr is kernel Virtual address (0x7000_0000 based). + */ + if (need_coh) { + kvaddr = ioremap_nocache(paddr, size); + if (kvaddr == NULL) { + __free_pages(page, order); + return NULL; + } + } else { + kvaddr = (void *)(u32)paddr; + } + + /* + * Evict any existing L1 and/or L2 lines for the backing page + * in case it was used earlier as a normal "cached" page. + * Yeah this bit us - STAR 9000898266 + * + * Although core does call flush_cache_vmap(), it gets kvaddr hence + * can't be used to efficiently flush L1 and/or L2 which need paddr + * Currently flush_cache_vmap nukes the L1 cache completely which + * will be optimized as a separate commit + */ + if (need_coh) + dma_cache_wback_inv(paddr, size); + + return kvaddr; +} + +void arch_dma_free(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle, unsigned long attrs) +{ + phys_addr_t paddr = dma_handle; + struct page *page = virt_to_page(paddr); + + if (!(attrs & DMA_ATTR_NON_CONSISTENT)) + iounmap((void __force __iomem *)vaddr); + + __free_pages(page, get_order(size)); +} + +int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) +{ + unsigned long user_count = vma_pages(vma); + unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned long pfn = __phys_to_pfn(dma_addr); + unsigned long off = vma->vm_pgoff; + int ret = -ENXIO; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) + return ret; + + if (off < count && user_count <= (count - off)) { + ret = remap_pfn_range(vma, vma->vm_start, + pfn + off, + user_count << PAGE_SHIFT, + vma->vm_page_prot); + } + + return ret; +} + +/* + * Cache operations depending on function and direction argument, inspired by + * https://lkml.org/lkml/2018/5/18/979 + * "dma_sync_*_for_cpu and direction=TO_DEVICE (was Re: [PATCH 02/20] + * dma-mapping: provide a generic dma-noncoherent implementation)" + * + * | map == for_device | unmap == for_cpu + * |---------------------------------------------------------------- + * TO_DEV | writeback writeback | none none + * FROM_DEV | invalidate invalidate | invalidate* invalidate* + * BIDIR | writeback+inv writeback+inv | invalidate invalidate + * + * [*] needed for CPU speculative prefetches + * + * NOTE: we don't check the validity of direction argument as it is done in + * upper layer functions (in include/linux/dma-mapping.h) + */ + +void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) +{ + switch (dir) { + case DMA_TO_DEVICE: + dma_cache_wback(paddr, size); + break; + + case DMA_FROM_DEVICE: + dma_cache_inv(paddr, size); + break; + + case DMA_BIDIRECTIONAL: + dma_cache_wback_inv(paddr, size); + break; + + default: + break; + } +} + +void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) +{ + switch (dir) { + case DMA_TO_DEVICE: + break; + + /* FROM_DEVICE invalidate needed if speculative CPU prefetch only */ + case DMA_FROM_DEVICE: + case DMA_BIDIRECTIONAL: + dma_cache_inv(paddr, size); + break; + + default: + break; + } +} + +/* + * Plug in coherent or noncoherent dma ops + */ +void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, + const struct iommu_ops *iommu, bool coherent) +{ + /* + * IOC hardware snoops all DMA traffic keeping the caches consistent + * with memory - eliding need for any explicit cache maintenance of + * DMA buffers - so we can use dma_direct cache ops. + */ + if (is_isa_arcv2() && ioc_enable && coherent) { + set_dma_ops(dev, &dma_direct_ops); + dev_info(dev, "use dma_direct_ops cache ops\n"); + } else { + set_dma_ops(dev, &dma_noncoherent_ops); + dev_info(dev, "use dma_noncoherent_ops cache ops\n"); + } +} diff --git a/arch/arc/mm/extable.c b/arch/arc/mm/extable.c new file mode 100644 index 000000000..72125a34e --- /dev/null +++ b/arch/arc/mm/extable.c @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Borrowed heavily from MIPS + */ + +#include <linux/export.h> +#include <linux/extable.h> +#include <linux/uaccess.h> + +int fixup_exception(struct pt_regs *regs) +{ + const struct exception_table_entry *fixup; + + fixup = search_exception_tables(instruction_pointer(regs)); + if (fixup) { + regs->ret = fixup->fixup; + + return 1; + } + + return 0; +} + +#ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE + +unsigned long arc_clear_user_noinline(void __user *to, + unsigned long n) +{ + return __arc_clear_user(to, n); +} +EXPORT_SYMBOL(arc_clear_user_noinline); + +long arc_strncpy_from_user_noinline(char *dst, const char __user *src, + long count) +{ + return __arc_strncpy_from_user(dst, src, count); +} +EXPORT_SYMBOL(arc_strncpy_from_user_noinline); + +long arc_strnlen_user_noinline(const char __user *src, long n) +{ + return __arc_strnlen_user(src, n); +} +EXPORT_SYMBOL(arc_strnlen_user_noinline); +#endif diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c new file mode 100644 index 000000000..4e8143de3 --- /dev/null +++ b/arch/arc/mm/fault.c @@ -0,0 +1,238 @@ +/* Page Fault Handling for ARC (TLB Miss / ProtV) + * + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/signal.h> +#include <linux/interrupt.h> +#include <linux/sched/signal.h> +#include <linux/errno.h> +#include <linux/ptrace.h> +#include <linux/uaccess.h> +#include <linux/kdebug.h> +#include <linux/perf_event.h> +#include <linux/mm_types.h> +#include <asm/pgalloc.h> +#include <asm/mmu.h> + +/* + * kernel virtual address is required to implement vmalloc/pkmap/fixmap + * Refer to asm/processor.h for System Memory Map + * + * It simply copies the PMD entry (pointer to 2nd level page table or hugepage) + * from swapper pgdir to task pgdir. The 2nd level table/page is thus shared + */ +noinline static int handle_kernel_vaddr_fault(unsigned long address) +{ + /* + * Synchronize this task's top level page-table + * with the 'reference' page table. + */ + pgd_t *pgd, *pgd_k; + pud_t *pud, *pud_k; + pmd_t *pmd, *pmd_k; + + pgd = pgd_offset_fast(current->active_mm, address); + pgd_k = pgd_offset_k(address); + + if (!pgd_present(*pgd_k)) + goto bad_area; + + pud = pud_offset(pgd, address); + pud_k = pud_offset(pgd_k, address); + if (!pud_present(*pud_k)) + goto bad_area; + + pmd = pmd_offset(pud, address); + pmd_k = pmd_offset(pud_k, address); + if (!pmd_present(*pmd_k)) + goto bad_area; + + set_pmd(pmd, *pmd_k); + + /* XXX: create the TLB entry here */ + return 0; + +bad_area: + return 1; +} + +void do_page_fault(unsigned long address, struct pt_regs *regs) +{ + struct vm_area_struct *vma = NULL; + struct task_struct *tsk = current; + struct mm_struct *mm = tsk->mm; + int si_code = SEGV_MAPERR; + int ret; + vm_fault_t fault; + int write = regs->ecr_cause & ECR_C_PROTV_STORE; /* ST/EX */ + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + + /* + * We fault-in kernel-space virtual memory on-demand. The + * 'reference' page table is init_mm.pgd. + * + * NOTE! We MUST NOT take any locks for this case. We may + * be in an interrupt or a critical region, and should + * only copy the information from the master page table, + * nothing more. + */ + if (address >= VMALLOC_START && !user_mode(regs)) { + ret = handle_kernel_vaddr_fault(address); + if (unlikely(ret)) + goto no_context; + else + return; + } + + /* + * If we're in an interrupt or have no user + * context, we must not take the fault.. + */ + if (faulthandler_disabled() || !mm) + goto no_context; + + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; +retry: + down_read(&mm->mmap_sem); + vma = find_vma(mm, address); + if (!vma) + goto bad_area; + if (vma->vm_start <= address) + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + if (expand_stack(vma, address)) + goto bad_area; + + /* + * Ok, we have a good vm_area for this memory access, so + * we can handle it.. + */ +good_area: + si_code = SEGV_ACCERR; + + /* Handle protection violation, execute on heap or stack */ + + if ((regs->ecr_vec == ECR_V_PROTV) && + (regs->ecr_cause == ECR_C_PROTV_INST_FETCH)) + goto bad_area; + + if (write) { + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + flags |= FAULT_FLAG_WRITE; + } else { + if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + goto bad_area; + } + + /* + * If for any reason at all we couldn't handle the fault, + * make sure we exit gracefully rather than endlessly redo + * the fault. + */ + fault = handle_mm_fault(vma, address, flags); + + if (unlikely(fatal_signal_pending(current))) { + + /* + * if fault retry, mmap_sem already relinquished by core mm + * so OK to return to user mode (with signal handled first) + */ + if (fault & VM_FAULT_RETRY) { + if (!user_mode(regs)) + goto no_context; + return; + } + } + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + + if (likely(!(fault & VM_FAULT_ERROR))) { + if (flags & FAULT_FLAG_ALLOW_RETRY) { + /* To avoid updating stats twice for retry case */ + if (fault & VM_FAULT_MAJOR) { + tsk->maj_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, + regs, address); + } else { + tsk->min_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, + regs, address); + } + + if (fault & VM_FAULT_RETRY) { + flags &= ~FAULT_FLAG_ALLOW_RETRY; + flags |= FAULT_FLAG_TRIED; + goto retry; + } + } + + /* Fault Handled Gracefully */ + up_read(&mm->mmap_sem); + return; + } + + if (fault & VM_FAULT_OOM) + goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; + else if (fault & VM_FAULT_SIGBUS) + goto do_sigbus; + + /* no man's land */ + BUG(); + + /* + * Something tried to access memory that isn't in our memory map.. + * Fix it, but check if it's kernel or user first.. + */ +bad_area: + up_read(&mm->mmap_sem); + + /* User mode accesses just cause a SIGSEGV */ + if (user_mode(regs)) { + tsk->thread.fault_address = address; + force_sig_fault(SIGSEGV, si_code, (void __user *)address, tsk); + return; + } + +no_context: + /* Are we prepared to handle this kernel fault? + * + * (The kernel has valid exception-points in the source + * when it accesses user-memory. When it fails in one + * of those points, we find it in a table and do a jump + * to some fixup code that loads an appropriate error + * code) + */ + if (fixup_exception(regs)) + return; + + die("Oops", regs, address); + +out_of_memory: + up_read(&mm->mmap_sem); + + if (user_mode(regs)) { + pagefault_out_of_memory(); + return; + } + + goto no_context; + +do_sigbus: + up_read(&mm->mmap_sem); + + if (!user_mode(regs)) + goto no_context; + + tsk->thread.fault_address = address; + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, tsk); +} diff --git a/arch/arc/mm/highmem.c b/arch/arc/mm/highmem.c new file mode 100644 index 000000000..77ff64a87 --- /dev/null +++ b/arch/arc/mm/highmem.c @@ -0,0 +1,141 @@ +/* + * Copyright (C) 2015 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/bootmem.h> +#include <linux/export.h> +#include <linux/highmem.h> +#include <asm/processor.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/tlbflush.h> + +/* + * HIGHMEM API: + * + * kmap() API provides sleep semantics hence referred to as "permanent maps" + * It allows mapping LAST_PKMAP pages, using @last_pkmap_nr as the cursor + * for book-keeping + * + * kmap_atomic() can't sleep (calls pagefault_disable()), thus it provides + * shortlived ala "temporary mappings" which historically were implemented as + * fixmaps (compile time addr etc). Their book-keeping is done per cpu. + * + * Both these facts combined (preemption disabled and per-cpu allocation) + * means the total number of concurrent fixmaps will be limited to max + * such allocations in a single control path. Thus KM_TYPE_NR (another + * historic relic) is a small'ish number which caps max percpu fixmaps + * + * ARC HIGHMEM Details + * + * - the kernel vaddr space from 0x7z to 0x8z (currently used by vmalloc/module) + * is now shared between vmalloc and kmap (non overlapping though) + * + * - Both fixmap/pkmap use a dedicated page table each, hooked up to swapper PGD + * This means each only has 1 PGDIR_SIZE worth of kvaddr mappings, which means + * 2M of kvaddr space for typical config (8K page and 11:8:13 traversal split) + * + * - fixmap anyhow needs a limited number of mappings. So 2M kvaddr == 256 PTE + * slots across NR_CPUS would be more than sufficient (generic code defines + * KM_TYPE_NR as 20). + * + * - pkmap being preemptible, in theory could do with more than 256 concurrent + * mappings. However, generic pkmap code: map_new_virtual(), doesn't traverse + * the PGD and only works with a single page table @pkmap_page_table, hence + * sets the limit + */ + +extern pte_t * pkmap_page_table; +static pte_t * fixmap_page_table; + +void *kmap(struct page *page) +{ + BUG_ON(in_interrupt()); + if (!PageHighMem(page)) + return page_address(page); + + return kmap_high(page); +} +EXPORT_SYMBOL(kmap); + +void *kmap_atomic(struct page *page) +{ + int idx, cpu_idx; + unsigned long vaddr; + + preempt_disable(); + pagefault_disable(); + if (!PageHighMem(page)) + return page_address(page); + + cpu_idx = kmap_atomic_idx_push(); + idx = cpu_idx + KM_TYPE_NR * smp_processor_id(); + vaddr = FIXMAP_ADDR(idx); + + set_pte_at(&init_mm, vaddr, fixmap_page_table + idx, + mk_pte(page, kmap_prot)); + + return (void *)vaddr; +} +EXPORT_SYMBOL(kmap_atomic); + +void __kunmap_atomic(void *kv) +{ + unsigned long kvaddr = (unsigned long)kv; + + if (kvaddr >= FIXMAP_BASE && kvaddr < (FIXMAP_BASE + FIXMAP_SIZE)) { + + /* + * Because preemption is disabled, this vaddr can be associated + * with the current allocated index. + * But in case of multiple live kmap_atomic(), it still relies on + * callers to unmap in right order. + */ + int cpu_idx = kmap_atomic_idx(); + int idx = cpu_idx + KM_TYPE_NR * smp_processor_id(); + + WARN_ON(kvaddr != FIXMAP_ADDR(idx)); + + pte_clear(&init_mm, kvaddr, fixmap_page_table + idx); + local_flush_tlb_kernel_range(kvaddr, kvaddr + PAGE_SIZE); + + kmap_atomic_idx_pop(); + } + + pagefault_enable(); + preempt_enable(); +} +EXPORT_SYMBOL(__kunmap_atomic); + +static noinline pte_t * __init alloc_kmap_pgtable(unsigned long kvaddr) +{ + pgd_t *pgd_k; + pud_t *pud_k; + pmd_t *pmd_k; + pte_t *pte_k; + + pgd_k = pgd_offset_k(kvaddr); + pud_k = pud_offset(pgd_k, kvaddr); + pmd_k = pmd_offset(pud_k, kvaddr); + + pte_k = (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); + pmd_populate_kernel(&init_mm, pmd_k, pte_k); + return pte_k; +} + +void __init kmap_init(void) +{ + /* Due to recursive include hell, we can't do this in processor.h */ + BUILD_BUG_ON(PAGE_OFFSET < (VMALLOC_END + FIXMAP_SIZE + PKMAP_SIZE)); + + BUILD_BUG_ON(KM_TYPE_NR > PTRS_PER_PTE); + pkmap_page_table = alloc_kmap_pgtable(PKMAP_BASE); + + BUILD_BUG_ON(LAST_PKMAP > PTRS_PER_PTE); + fixmap_page_table = alloc_kmap_pgtable(FIXMAP_BASE); +} diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c new file mode 100644 index 000000000..f890b2f9f --- /dev/null +++ b/arch/arc/mm/init.c @@ -0,0 +1,239 @@ +/* + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/bootmem.h> +#include <linux/memblock.h> +#ifdef CONFIG_BLK_DEV_INITRD +#include <linux/initrd.h> +#endif +#include <linux/of_fdt.h> +#include <linux/swap.h> +#include <linux/module.h> +#include <linux/highmem.h> +#include <asm/page.h> +#include <asm/pgalloc.h> +#include <asm/sections.h> +#include <asm/arcregs.h> + +pgd_t swapper_pg_dir[PTRS_PER_PGD] __aligned(PAGE_SIZE); +char empty_zero_page[PAGE_SIZE] __aligned(PAGE_SIZE); +EXPORT_SYMBOL(empty_zero_page); + +static const unsigned long low_mem_start = CONFIG_LINUX_RAM_BASE; +static unsigned long low_mem_sz; + +#ifdef CONFIG_HIGHMEM +static unsigned long min_high_pfn, max_high_pfn; +static u64 high_mem_start; +static u64 high_mem_sz; +#endif + +#ifdef CONFIG_DISCONTIGMEM +struct pglist_data node_data[MAX_NUMNODES] __read_mostly; +EXPORT_SYMBOL(node_data); +#endif + +long __init arc_get_mem_sz(void) +{ + return low_mem_sz; +} + +/* User can over-ride above with "mem=nnn[KkMm]" in cmdline */ +static int __init setup_mem_sz(char *str) +{ + low_mem_sz = memparse(str, NULL) & PAGE_MASK; + + /* early console might not be setup yet - it will show up later */ + pr_info("\"mem=%s\": mem sz set to %ldM\n", str, TO_MB(low_mem_sz)); + + return 0; +} +early_param("mem", setup_mem_sz); + +void __init early_init_dt_add_memory_arch(u64 base, u64 size) +{ + int in_use = 0; + + if (!low_mem_sz) { + if (base != low_mem_start) + panic("CONFIG_LINUX_RAM_BASE != DT memory { }"); + + low_mem_sz = size; + in_use = 1; + } else { +#ifdef CONFIG_HIGHMEM + high_mem_start = base; + high_mem_sz = size; + in_use = 1; +#endif + } + + pr_info("Memory @ %llx [%lldM] %s\n", + base, TO_MB(size), !in_use ? "Not used":""); +} + +#ifdef CONFIG_BLK_DEV_INITRD +static int __init early_initrd(char *p) +{ + unsigned long start, size; + char *endp; + + start = memparse(p, &endp); + if (*endp == ',') { + size = memparse(endp + 1, NULL); + + initrd_start = (unsigned long)__va(start); + initrd_end = (unsigned long)__va(start + size); + } + return 0; +} +early_param("initrd", early_initrd); +#endif + +/* + * First memory setup routine called from setup_arch() + * 1. setup swapper's mm @init_mm + * 2. Count the pages we have and setup bootmem allocator + * 3. zone setup + */ +void __init setup_arch_memory(void) +{ + unsigned long zones_size[MAX_NR_ZONES]; + unsigned long zones_holes[MAX_NR_ZONES]; + + init_mm.start_code = (unsigned long)_text; + init_mm.end_code = (unsigned long)_etext; + init_mm.end_data = (unsigned long)_edata; + init_mm.brk = (unsigned long)_end; + + /* first page of system - kernel .vector starts here */ + min_low_pfn = ARCH_PFN_OFFSET; + + /* Last usable page of low mem */ + max_low_pfn = max_pfn = PFN_DOWN(low_mem_start + low_mem_sz); + +#ifdef CONFIG_FLATMEM + /* pfn_valid() uses this */ + max_mapnr = max_low_pfn - min_low_pfn; +#endif + + /*------------- bootmem allocator setup -----------------------*/ + + /* + * seed the bootmem allocator after any DT memory node parsing or + * "mem=xxx" cmdline overrides have potentially updated @arc_mem_sz + * + * Only low mem is added, otherwise we have crashes when allocating + * mem_map[] itself. NO_BOOTMEM allocates mem_map[] at the end of + * avail memory, ending in highmem with a > 32-bit address. However + * it then tries to memset it with a truncaed 32-bit handle, causing + * the crash + */ + + memblock_add_node(low_mem_start, low_mem_sz, 0); + memblock_reserve(CONFIG_LINUX_LINK_BASE, + __pa(_end) - CONFIG_LINUX_LINK_BASE); + +#ifdef CONFIG_BLK_DEV_INITRD + if (initrd_start) + memblock_reserve(__pa(initrd_start), initrd_end - initrd_start); +#endif + + early_init_fdt_reserve_self(); + early_init_fdt_scan_reserved_mem(); + + memblock_dump_all(); + + /*----------------- node/zones setup --------------------------*/ + memset(zones_size, 0, sizeof(zones_size)); + memset(zones_holes, 0, sizeof(zones_holes)); + + zones_size[ZONE_NORMAL] = max_low_pfn - min_low_pfn; + zones_holes[ZONE_NORMAL] = 0; + + /* + * We can't use the helper free_area_init(zones[]) because it uses + * PAGE_OFFSET to compute the @min_low_pfn which would be wrong + * when our kernel doesn't start at PAGE_OFFSET, i.e. + * PAGE_OFFSET != CONFIG_LINUX_RAM_BASE + */ + free_area_init_node(0, /* node-id */ + zones_size, /* num pages per zone */ + min_low_pfn, /* first pfn of node */ + zones_holes); /* holes */ + +#ifdef CONFIG_HIGHMEM + /* + * Populate a new node with highmem + * + * On ARC (w/o PAE) HIGHMEM addresses are actually smaller (0 based) + * than addresses in normal ala low memory (0x8000_0000 based). + * Even with PAE, the huge peripheral space hole would waste a lot of + * mem with single mem_map[]. This warrants a mem_map per region design. + * Thus HIGHMEM on ARC is imlemented with DISCONTIGMEM. + * + * DISCONTIGMEM in turns requires multiple nodes. node 0 above is + * populated with normal memory zone while node 1 only has highmem + */ + node_set_online(1); + + min_high_pfn = PFN_DOWN(high_mem_start); + max_high_pfn = PFN_DOWN(high_mem_start + high_mem_sz); + + zones_size[ZONE_NORMAL] = 0; + zones_holes[ZONE_NORMAL] = 0; + + zones_size[ZONE_HIGHMEM] = max_high_pfn - min_high_pfn; + zones_holes[ZONE_HIGHMEM] = 0; + + free_area_init_node(1, /* node-id */ + zones_size, /* num pages per zone */ + min_high_pfn, /* first pfn of node */ + zones_holes); /* holes */ + + high_memory = (void *)(min_high_pfn << PAGE_SHIFT); + kmap_init(); +#endif +} + +/* + * mem_init - initializes memory + * + * Frees up bootmem + * Calculates and displays memory available/used + */ +void __init mem_init(void) +{ +#ifdef CONFIG_HIGHMEM + unsigned long tmp; + + reset_all_zones_managed_pages(); + for (tmp = min_high_pfn; tmp < max_high_pfn; tmp++) + free_highmem_page(pfn_to_page(tmp)); +#endif + + free_all_bootmem(); + mem_init_print_info(NULL); +} + +/* + * free_initmem: Free all the __init memory. + */ +void __ref free_initmem(void) +{ + free_initmem_default(-1); +} + +#ifdef CONFIG_BLK_DEV_INITRD +void __init free_initrd_mem(unsigned long start, unsigned long end) +{ + free_reserved_area((void *)start, (void *)end, -1, "initrd"); +} +#endif diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c new file mode 100644 index 000000000..9881bd740 --- /dev/null +++ b/arch/arc/mm/ioremap.c @@ -0,0 +1,106 @@ +/* + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/vmalloc.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/io.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/cache.h> + +static inline bool arc_uncached_addr_space(phys_addr_t paddr) +{ + if (is_isa_arcompact()) { + if (paddr >= ARC_UNCACHED_ADDR_SPACE) + return true; + } else if (paddr >= perip_base && paddr <= perip_end) { + return true; + } + + return false; +} + +void __iomem *ioremap(phys_addr_t paddr, unsigned long size) +{ + phys_addr_t end; + + /* Don't allow wraparound or zero size */ + end = paddr + size - 1; + if (!size || (end < paddr)) + return NULL; + + /* + * If the region is h/w uncached, MMU mapping can be elided as optim + * The cast to u32 is fine as this region can only be inside 4GB + */ + if (arc_uncached_addr_space(paddr)) + return (void __iomem *)(u32)paddr; + + return ioremap_prot(paddr, size, PAGE_KERNEL_NO_CACHE); +} +EXPORT_SYMBOL(ioremap); + +/* + * ioremap with access flags + * Cache semantics wise it is same as ioremap - "forced" uncached. + * However unlike vanilla ioremap which bypasses ARC MMU for addresses in + * ARC hardware uncached region, this one still goes thru the MMU as caller + * might need finer access control (R/W/X) + */ +void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size, + unsigned long flags) +{ + unsigned long vaddr; + struct vm_struct *area; + phys_addr_t off, end; + pgprot_t prot = __pgprot(flags); + + /* Don't allow wraparound, zero size */ + end = paddr + size - 1; + if ((!size) || (end < paddr)) + return NULL; + + /* An early platform driver might end up here */ + if (!slab_is_available()) + return NULL; + + /* force uncached */ + prot = pgprot_noncached(prot); + + /* Mappings have to be page-aligned */ + off = paddr & ~PAGE_MASK; + paddr &= PAGE_MASK; + size = PAGE_ALIGN(end + 1) - paddr; + + /* + * Ok, go for it.. + */ + area = get_vm_area(size, VM_IOREMAP); + if (!area) + return NULL; + area->phys_addr = paddr; + vaddr = (unsigned long)area->addr; + if (ioremap_page_range(vaddr, vaddr + size, paddr, prot)) { + vunmap((void __force *)vaddr); + return NULL; + } + return (void __iomem *)(off + (char __iomem *)vaddr); +} +EXPORT_SYMBOL(ioremap_prot); + + +void iounmap(const void __iomem *addr) +{ + /* weird double cast to handle phys_addr_t > 32 bits */ + if (arc_uncached_addr_space((phys_addr_t)(u32)addr)) + return; + + vfree((void *)(PAGE_MASK & (unsigned long __force)addr)); +} +EXPORT_SYMBOL(iounmap); diff --git a/arch/arc/mm/mmap.c b/arch/arc/mm/mmap.c new file mode 100644 index 000000000..2e13683df --- /dev/null +++ b/arch/arc/mm/mmap.c @@ -0,0 +1,79 @@ +/* + * ARC700 mmap + * + * (started from arm version - for VIPT alias handling) + * + * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/sched/mm.h> + +#include <asm/cacheflush.h> + +#define COLOUR_ALIGN(addr, pgoff) \ + ((((addr) + SHMLBA - 1) & ~(SHMLBA - 1)) + \ + (((pgoff) << PAGE_SHIFT) & (SHMLBA - 1))) + +/* + * Ensure that shared mappings are correctly aligned to + * avoid aliasing issues with VIPT caches. + * We need to ensure that + * a specific page of an object is always mapped at a multiple of + * SHMLBA bytes. + */ +unsigned long +arch_get_unmapped_area(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + int do_align = 0; + int aliasing = cache_is_vipt_aliasing(); + struct vm_unmapped_area_info info; + + /* + * We only need to do colour alignment if D cache aliases. + */ + if (aliasing) + do_align = filp || (flags & MAP_SHARED); + + /* + * We enforce the MAP_FIXED case. + */ + if (flags & MAP_FIXED) { + if (aliasing && flags & MAP_SHARED && + (addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1)) + return -EINVAL; + return addr; + } + + if (len > TASK_SIZE) + return -ENOMEM; + + if (addr) { + if (do_align) + addr = COLOUR_ALIGN(addr, pgoff); + else + addr = PAGE_ALIGN(addr); + + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && + (!vma || addr + len <= vm_start_gap(vma))) + return addr; + } + + info.flags = 0; + info.length = len; + info.low_limit = mm->mmap_base; + info.high_limit = TASK_SIZE; + info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0; + info.align_offset = pgoff << PAGE_SHIFT; + return vm_unmapped_area(&info); +} diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c new file mode 100644 index 000000000..fa18c00b0 --- /dev/null +++ b/arch/arc/mm/tlb.c @@ -0,0 +1,1008 @@ +/* + * TLB Management (flush/create/diagnostics) for ARC700 + * + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * vineetg: Aug 2011 + * -Reintroduce duplicate PD fixup - some customer chips still have the issue + * + * vineetg: May 2011 + * -No need to flush_cache_page( ) for each call to update_mmu_cache() + * some of the LMBench tests improved amazingly + * = page-fault thrice as fast (75 usec to 28 usec) + * = mmap twice as fast (9.6 msec to 4.6 msec), + * = fork (5.3 msec to 3.7 msec) + * + * vineetg: April 2011 : + * -MMU v3: PD{0,1} bits layout changed: They don't overlap anymore, + * helps avoid a shift when preparing PD0 from PTE + * + * vineetg: April 2011 : Preparing for MMU V3 + * -MMU v2/v3 BCRs decoded differently + * -Remove TLB_SIZE hardcoding as it's variable now: 256 or 512 + * -tlb_entry_erase( ) can be void + * -local_flush_tlb_range( ): + * = need not "ceil" @end + * = walks MMU only if range spans < 32 entries, as opposed to 256 + * + * Vineetg: Sept 10th 2008 + * -Changes related to MMU v2 (Rel 4.8) + * + * Vineetg: Aug 29th 2008 + * -In TLB Flush operations (Metal Fix MMU) there is a explict command to + * flush Micro-TLBS. If TLB Index Reg is invalid prior to TLBIVUTLB cmd, + * it fails. Thus need to load it with ANY valid value before invoking + * TLBIVUTLB cmd + * + * Vineetg: Aug 21th 2008: + * -Reduced the duration of IRQ lockouts in TLB Flush routines + * -Multiple copies of TLB erase code seperated into a "single" function + * -In TLB Flush routines, interrupt disabling moved UP to retrieve ASID + * in interrupt-safe region. + * + * Vineetg: April 23rd Bug #93131 + * Problem: tlb_flush_kernel_range() doesn't do anything if the range to + * flush is more than the size of TLB itself. + * + * Rahul Trivedi : Codito Technologies 2004 + */ + +#include <linux/module.h> +#include <linux/bug.h> +#include <linux/mm_types.h> + +#include <asm/arcregs.h> +#include <asm/setup.h> +#include <asm/mmu_context.h> +#include <asm/mmu.h> + +/* Need for ARC MMU v2 + * + * ARC700 MMU-v1 had a Joint-TLB for Code and Data and is 2 way set-assoc. + * For a memcpy operation with 3 players (src/dst/code) such that all 3 pages + * map into same set, there would be contention for the 2 ways causing severe + * Thrashing. + * + * Although J-TLB is 2 way set assoc, ARC700 caches J-TLB into uTLBS which has + * much higher associativity. u-D-TLB is 8 ways, u-I-TLB is 4 ways. + * Given this, the thrasing problem should never happen because once the 3 + * J-TLB entries are created (even though 3rd will knock out one of the prev + * two), the u-D-TLB and u-I-TLB will have what is required to accomplish memcpy + * + * Yet we still see the Thrashing because a J-TLB Write cause flush of u-TLBs. + * This is a simple design for keeping them in sync. So what do we do? + * The solution which James came up was pretty neat. It utilised the assoc + * of uTLBs by not invalidating always but only when absolutely necessary. + * + * - Existing TLB commands work as before + * - New command (TLBWriteNI) for TLB write without clearing uTLBs + * - New command (TLBIVUTLB) to invalidate uTLBs. + * + * The uTLBs need only be invalidated when pages are being removed from the + * OS page table. If a 'victim' TLB entry is being overwritten in the main TLB + * as a result of a miss, the removed entry is still allowed to exist in the + * uTLBs as it is still valid and present in the OS page table. This allows the + * full associativity of the uTLBs to hide the limited associativity of the main + * TLB. + * + * During a miss handler, the new "TLBWriteNI" command is used to load + * entries without clearing the uTLBs. + * + * When the OS page table is updated, TLB entries that may be associated with a + * removed page are removed (flushed) from the TLB using TLBWrite. In this + * circumstance, the uTLBs must also be cleared. This is done by using the + * existing TLBWrite command. An explicit IVUTLB is also required for those + * corner cases when TLBWrite was not executed at all because the corresp + * J-TLB entry got evicted/replaced. + */ + + +/* A copy of the ASID from the PID reg is kept in asid_cache */ +DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE; + +static int __read_mostly pae_exists; + +/* + * Utility Routine to erase a J-TLB entry + * Caller needs to setup Index Reg (manually or via getIndex) + */ +static inline void __tlb_entry_erase(void) +{ + write_aux_reg(ARC_REG_TLBPD1, 0); + + if (is_pae40_enabled()) + write_aux_reg(ARC_REG_TLBPD1HI, 0); + + write_aux_reg(ARC_REG_TLBPD0, 0); + write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); +} + +#if (CONFIG_ARC_MMU_VER < 4) + +static inline unsigned int tlb_entry_lkup(unsigned long vaddr_n_asid) +{ + unsigned int idx; + + write_aux_reg(ARC_REG_TLBPD0, vaddr_n_asid); + + write_aux_reg(ARC_REG_TLBCOMMAND, TLBProbe); + idx = read_aux_reg(ARC_REG_TLBINDEX); + + return idx; +} + +static void tlb_entry_erase(unsigned int vaddr_n_asid) +{ + unsigned int idx; + + /* Locate the TLB entry for this vaddr + ASID */ + idx = tlb_entry_lkup(vaddr_n_asid); + + /* No error means entry found, zero it out */ + if (likely(!(idx & TLB_LKUP_ERR))) { + __tlb_entry_erase(); + } else { + /* Duplicate entry error */ + WARN(idx == TLB_DUP_ERR, "Probe returned Dup PD for %x\n", + vaddr_n_asid); + } +} + +/**************************************************************************** + * ARC700 MMU caches recently used J-TLB entries (RAM) as uTLBs (FLOPs) + * + * New IVUTLB cmd in MMU v2 explictly invalidates the uTLB + * + * utlb_invalidate ( ) + * -For v2 MMU calls Flush uTLB Cmd + * -For v1 MMU does nothing (except for Metal Fix v1 MMU) + * This is because in v1 TLBWrite itself invalidate uTLBs + ***************************************************************************/ + +static void utlb_invalidate(void) +{ +#if (CONFIG_ARC_MMU_VER >= 2) + +#if (CONFIG_ARC_MMU_VER == 2) + /* MMU v2 introduced the uTLB Flush command. + * There was however an obscure hardware bug, where uTLB flush would + * fail when a prior probe for J-TLB (both totally unrelated) would + * return lkup err - because the entry didn't exist in MMU. + * The Workround was to set Index reg with some valid value, prior to + * flush. This was fixed in MMU v3 hence not needed any more + */ + unsigned int idx; + + /* make sure INDEX Reg is valid */ + idx = read_aux_reg(ARC_REG_TLBINDEX); + + /* If not write some dummy val */ + if (unlikely(idx & TLB_LKUP_ERR)) + write_aux_reg(ARC_REG_TLBINDEX, 0xa); +#endif + + write_aux_reg(ARC_REG_TLBCOMMAND, TLBIVUTLB); +#endif + +} + +static void tlb_entry_insert(unsigned int pd0, pte_t pd1) +{ + unsigned int idx; + + /* + * First verify if entry for this vaddr+ASID already exists + * This also sets up PD0 (vaddr, ASID..) for final commit + */ + idx = tlb_entry_lkup(pd0); + + /* + * If Not already present get a free slot from MMU. + * Otherwise, Probe would have located the entry and set INDEX Reg + * with existing location. This will cause Write CMD to over-write + * existing entry with new PD0 and PD1 + */ + if (likely(idx & TLB_LKUP_ERR)) + write_aux_reg(ARC_REG_TLBCOMMAND, TLBGetIndex); + + /* setup the other half of TLB entry (pfn, rwx..) */ + write_aux_reg(ARC_REG_TLBPD1, pd1); + + /* + * Commit the Entry to MMU + * It doesn't sound safe to use the TLBWriteNI cmd here + * which doesn't flush uTLBs. I'd rather be safe than sorry. + */ + write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); +} + +#else /* CONFIG_ARC_MMU_VER >= 4) */ + +static void utlb_invalidate(void) +{ + /* No need since uTLB is always in sync with JTLB */ +} + +static void tlb_entry_erase(unsigned int vaddr_n_asid) +{ + write_aux_reg(ARC_REG_TLBPD0, vaddr_n_asid | _PAGE_PRESENT); + write_aux_reg(ARC_REG_TLBCOMMAND, TLBDeleteEntry); +} + +static void tlb_entry_insert(unsigned int pd0, pte_t pd1) +{ + write_aux_reg(ARC_REG_TLBPD0, pd0); + write_aux_reg(ARC_REG_TLBPD1, pd1); + + if (is_pae40_enabled()) + write_aux_reg(ARC_REG_TLBPD1HI, (u64)pd1 >> 32); + + write_aux_reg(ARC_REG_TLBCOMMAND, TLBInsertEntry); +} + +#endif + +/* + * Un-conditionally (without lookup) erase the entire MMU contents + */ + +noinline void local_flush_tlb_all(void) +{ + struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; + unsigned long flags; + unsigned int entry; + int num_tlb = mmu->sets * mmu->ways; + + local_irq_save(flags); + + /* Load PD0 and PD1 with template for a Blank Entry */ + write_aux_reg(ARC_REG_TLBPD1, 0); + + if (is_pae40_enabled()) + write_aux_reg(ARC_REG_TLBPD1HI, 0); + + write_aux_reg(ARC_REG_TLBPD0, 0); + + for (entry = 0; entry < num_tlb; entry++) { + /* write this entry to the TLB */ + write_aux_reg(ARC_REG_TLBINDEX, entry); + write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); + } + + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { + const int stlb_idx = 0x800; + + /* Blank sTLB entry */ + write_aux_reg(ARC_REG_TLBPD0, _PAGE_HW_SZ); + + for (entry = stlb_idx; entry < stlb_idx + 16; entry++) { + write_aux_reg(ARC_REG_TLBINDEX, entry); + write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); + } + } + + utlb_invalidate(); + + local_irq_restore(flags); +} + +/* + * Flush the entrie MM for userland. The fastest way is to move to Next ASID + */ +noinline void local_flush_tlb_mm(struct mm_struct *mm) +{ + /* + * Small optimisation courtesy IA64 + * flush_mm called during fork,exit,munmap etc, multiple times as well. + * Only for fork( ) do we need to move parent to a new MMU ctxt, + * all other cases are NOPs, hence this check. + */ + if (atomic_read(&mm->mm_users) == 0) + return; + + /* + * - Move to a new ASID, but only if the mm is still wired in + * (Android Binder ended up calling this for vma->mm != tsk->mm, + * causing h/w - s/w ASID to get out of sync) + * - Also get_new_mmu_context() new implementation allocates a new + * ASID only if it is not allocated already - so unallocate first + */ + destroy_context(mm); + if (current->mm == mm) + get_new_mmu_context(mm); +} + +/* + * Flush a Range of TLB entries for userland. + * @start is inclusive, while @end is exclusive + * Difference between this and Kernel Range Flush is + * -Here the fastest way (if range is too large) is to move to next ASID + * without doing any explicit Shootdown + * -In case of kernel Flush, entry has to be shot down explictly + */ +void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + const unsigned int cpu = smp_processor_id(); + unsigned long flags; + + /* If range @start to @end is more than 32 TLB entries deep, + * its better to move to a new ASID rather than searching for + * individual entries and then shooting them down + * + * The calc above is rough, doesn't account for unaligned parts, + * since this is heuristics based anyways + */ + if (unlikely((end - start) >= PAGE_SIZE * 32)) { + local_flush_tlb_mm(vma->vm_mm); + return; + } + + /* + * @start moved to page start: this alone suffices for checking + * loop end condition below, w/o need for aligning @end to end + * e.g. 2000 to 4001 will anyhow loop twice + */ + start &= PAGE_MASK; + + local_irq_save(flags); + + if (asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID) { + while (start < end) { + tlb_entry_erase(start | hw_pid(vma->vm_mm, cpu)); + start += PAGE_SIZE; + } + } + + utlb_invalidate(); + + local_irq_restore(flags); +} + +/* Flush the kernel TLB entries - vmalloc/modules (Global from MMU perspective) + * @start, @end interpreted as kvaddr + * Interestingly, shared TLB entries can also be flushed using just + * @start,@end alone (interpreted as user vaddr), although technically SASID + * is also needed. However our smart TLbProbe lookup takes care of that. + */ +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + unsigned long flags; + + /* exactly same as above, except for TLB entry not taking ASID */ + + if (unlikely((end - start) >= PAGE_SIZE * 32)) { + local_flush_tlb_all(); + return; + } + + start &= PAGE_MASK; + + local_irq_save(flags); + while (start < end) { + tlb_entry_erase(start); + start += PAGE_SIZE; + } + + utlb_invalidate(); + + local_irq_restore(flags); +} + +/* + * Delete TLB entry in MMU for a given page (??? address) + * NOTE One TLB entry contains translation for single PAGE + */ + +void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) +{ + const unsigned int cpu = smp_processor_id(); + unsigned long flags; + + /* Note that it is critical that interrupts are DISABLED between + * checking the ASID and using it flush the TLB entry + */ + local_irq_save(flags); + + if (asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID) { + tlb_entry_erase((page & PAGE_MASK) | hw_pid(vma->vm_mm, cpu)); + utlb_invalidate(); + } + + local_irq_restore(flags); +} + +#ifdef CONFIG_SMP + +struct tlb_args { + struct vm_area_struct *ta_vma; + unsigned long ta_start; + unsigned long ta_end; +}; + +static inline void ipi_flush_tlb_page(void *arg) +{ + struct tlb_args *ta = arg; + + local_flush_tlb_page(ta->ta_vma, ta->ta_start); +} + +static inline void ipi_flush_tlb_range(void *arg) +{ + struct tlb_args *ta = arg; + + local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end); +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline void ipi_flush_pmd_tlb_range(void *arg) +{ + struct tlb_args *ta = arg; + + local_flush_pmd_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end); +} +#endif + +static inline void ipi_flush_tlb_kernel_range(void *arg) +{ + struct tlb_args *ta = (struct tlb_args *)arg; + + local_flush_tlb_kernel_range(ta->ta_start, ta->ta_end); +} + +void flush_tlb_all(void) +{ + on_each_cpu((smp_call_func_t)local_flush_tlb_all, NULL, 1); +} + +void flush_tlb_mm(struct mm_struct *mm) +{ + on_each_cpu_mask(mm_cpumask(mm), (smp_call_func_t)local_flush_tlb_mm, + mm, 1); +} + +void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) +{ + struct tlb_args ta = { + .ta_vma = vma, + .ta_start = uaddr + }; + + on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_page, &ta, 1); +} + +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + struct tlb_args ta = { + .ta_vma = vma, + .ta_start = start, + .ta_end = end + }; + + on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range, &ta, 1); +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + struct tlb_args ta = { + .ta_vma = vma, + .ta_start = start, + .ta_end = end + }; + + on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_pmd_tlb_range, &ta, 1); +} +#endif + +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + struct tlb_args ta = { + .ta_start = start, + .ta_end = end + }; + + on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1); +} +#endif + +/* + * Routine to create a TLB entry + */ +void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep) +{ + unsigned long flags; + unsigned int asid_or_sasid, rwx; + unsigned long pd0; + pte_t pd1; + + /* + * create_tlb() assumes that current->mm == vma->mm, since + * -it ASID for TLB entry is fetched from MMU ASID reg (valid for curr) + * -completes the lazy write to SASID reg (again valid for curr tsk) + * + * Removing the assumption involves + * -Using vma->mm->context{ASID,SASID}, as opposed to MMU reg. + * -Fix the TLB paranoid debug code to not trigger false negatives. + * -More importantly it makes this handler inconsistent with fast-path + * TLB Refill handler which always deals with "current" + * + * Lets see the use cases when current->mm != vma->mm and we land here + * 1. execve->copy_strings()->__get_user_pages->handle_mm_fault + * Here VM wants to pre-install a TLB entry for user stack while + * current->mm still points to pre-execve mm (hence the condition). + * However the stack vaddr is soon relocated (randomization) and + * move_page_tables() tries to undo that TLB entry. + * Thus not creating TLB entry is not any worse. + * + * 2. ptrace(POKETEXT) causes a CoW - debugger(current) inserting a + * breakpoint in debugged task. Not creating a TLB now is not + * performance critical. + * + * Both the cases above are not good enough for code churn. + */ + if (current->active_mm != vma->vm_mm) + return; + + local_irq_save(flags); + + tlb_paranoid_check(asid_mm(vma->vm_mm, smp_processor_id()), vaddr); + + vaddr &= PAGE_MASK; + + /* update this PTE credentials */ + pte_val(*ptep) |= (_PAGE_PRESENT | _PAGE_ACCESSED); + + /* Create HW TLB(PD0,PD1) from PTE */ + + /* ASID for this task */ + asid_or_sasid = read_aux_reg(ARC_REG_PID) & 0xff; + + pd0 = vaddr | asid_or_sasid | (pte_val(*ptep) & PTE_BITS_IN_PD0); + + /* + * ARC MMU provides fully orthogonal access bits for K/U mode, + * however Linux only saves 1 set to save PTE real-estate + * Here we convert 3 PTE bits into 6 MMU bits: + * -Kernel only entries have Kr Kw Kx 0 0 0 + * -User entries have mirrored K and U bits + */ + rwx = pte_val(*ptep) & PTE_BITS_RWX; + + if (pte_val(*ptep) & _PAGE_GLOBAL) + rwx <<= 3; /* r w x => Kr Kw Kx 0 0 0 */ + else + rwx |= (rwx << 3); /* r w x => Kr Kw Kx Ur Uw Ux */ + + pd1 = rwx | (pte_val(*ptep) & PTE_BITS_NON_RWX_IN_PD1); + + tlb_entry_insert(pd0, pd1); + + local_irq_restore(flags); +} + +/* + * Called at the end of pagefault, for a userspace mapped page + * -pre-install the corresponding TLB entry into MMU + * -Finalize the delayed D-cache flush of kernel mapping of page due to + * flush_dcache_page(), copy_user_page() + * + * Note that flush (when done) involves both WBACK - so physical page is + * in sync as well as INV - so any non-congruent aliases don't remain + */ +void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned, + pte_t *ptep) +{ + unsigned long vaddr = vaddr_unaligned & PAGE_MASK; + phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK; + struct page *page = pfn_to_page(pte_pfn(*ptep)); + + create_tlb(vma, vaddr, ptep); + + if (page == ZERO_PAGE(0)) { + return; + } + + /* + * Exec page : Independent of aliasing/page-color considerations, + * since icache doesn't snoop dcache on ARC, any dirty + * K-mapping of a code page needs to be wback+inv so that + * icache fetch by userspace sees code correctly. + * !EXEC page: If K-mapping is NOT congruent to U-mapping, flush it + * so userspace sees the right data. + * (Avoids the flush for Non-exec + congruent mapping case) + */ + if ((vma->vm_flags & VM_EXEC) || + addr_not_cache_congruent(paddr, vaddr)) { + + int dirty = !test_and_set_bit(PG_dc_clean, &page->flags); + if (dirty) { + /* wback + inv dcache lines (K-mapping) */ + __flush_dcache_page(paddr, paddr); + + /* invalidate any existing icache lines (U-mapping) */ + if (vma->vm_flags & VM_EXEC) + __inv_icache_page(paddr, vaddr); + } + } +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + +/* + * MMUv4 in HS38x cores supports Super Pages which are basis for Linux THP + * support. + * + * Normal and Super pages can co-exist (ofcourse not overlap) in TLB with a + * new bit "SZ" in TLB page descriptor to distinguish between them. + * Super Page size is configurable in hardware (4K to 16M), but fixed once + * RTL builds. + * + * The exact THP size a Linx configuration will support is a function of: + * - MMU page size (typical 8K, RTL fixed) + * - software page walker address split between PGD:PTE:PFN (typical + * 11:8:13, but can be changed with 1 line) + * So for above default, THP size supported is 8K * (2^8) = 2M + * + * Default Page Walker is 2 levels, PGD:PTE:PFN, which in THP regime + * reduces to 1 level (as PTE is folded into PGD and canonically referred + * to as PMD). + * Thus THP PMD accessors are implemented in terms of PTE (just like sparc) + */ + +void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, + pmd_t *pmd) +{ + pte_t pte = __pte(pmd_val(*pmd)); + update_mmu_cache(vma, addr, &pte); +} + +void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable) +{ + struct list_head *lh = (struct list_head *) pgtable; + + assert_spin_locked(&mm->page_table_lock); + + /* FIFO */ + if (!pmd_huge_pte(mm, pmdp)) + INIT_LIST_HEAD(lh); + else + list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); + pmd_huge_pte(mm, pmdp) = pgtable; +} + +pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) +{ + struct list_head *lh; + pgtable_t pgtable; + + assert_spin_locked(&mm->page_table_lock); + + pgtable = pmd_huge_pte(mm, pmdp); + lh = (struct list_head *) pgtable; + if (list_empty(lh)) + pmd_huge_pte(mm, pmdp) = NULL; + else { + pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; + list_del(lh); + } + + pte_val(pgtable[0]) = 0; + pte_val(pgtable[1]) = 0; + + return pgtable; +} + +void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + unsigned int cpu; + unsigned long flags; + + local_irq_save(flags); + + cpu = smp_processor_id(); + + if (likely(asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID)) { + unsigned int asid = hw_pid(vma->vm_mm, cpu); + + /* No need to loop here: this will always be for 1 Huge Page */ + tlb_entry_erase(start | _PAGE_HW_SZ | asid); + } + + local_irq_restore(flags); +} + +#endif + +/* Read the Cache Build Confuration Registers, Decode them and save into + * the cpuinfo structure for later use. + * No Validation is done here, simply read/convert the BCRs + */ +void read_decode_mmu_bcr(void) +{ + struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; + unsigned int tmp; + struct bcr_mmu_1_2 { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int ver:8, ways:4, sets:4, u_itlb:8, u_dtlb:8; +#else + unsigned int u_dtlb:8, u_itlb:8, sets:4, ways:4, ver:8; +#endif + } *mmu2; + + struct bcr_mmu_3 { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int ver:8, ways:4, sets:4, res:3, sasid:1, pg_sz:4, + u_itlb:4, u_dtlb:4; +#else + unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, sasid:1, res:3, sets:4, + ways:4, ver:8; +#endif + } *mmu3; + + struct bcr_mmu_4 { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int ver:8, sasid:1, sz1:4, sz0:4, res:2, pae:1, + n_ways:2, n_entry:2, n_super:2, u_itlb:3, u_dtlb:3; +#else + /* DTLB ITLB JES JE JA */ + unsigned int u_dtlb:3, u_itlb:3, n_super:2, n_entry:2, n_ways:2, + pae:1, res:2, sz0:4, sz1:4, sasid:1, ver:8; +#endif + } *mmu4; + + tmp = read_aux_reg(ARC_REG_MMU_BCR); + mmu->ver = (tmp >> 24); + + if (is_isa_arcompact()) { + if (mmu->ver <= 2) { + mmu2 = (struct bcr_mmu_1_2 *)&tmp; + mmu->pg_sz_k = TO_KB(0x2000); + mmu->sets = 1 << mmu2->sets; + mmu->ways = 1 << mmu2->ways; + mmu->u_dtlb = mmu2->u_dtlb; + mmu->u_itlb = mmu2->u_itlb; + } else { + mmu3 = (struct bcr_mmu_3 *)&tmp; + mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1); + mmu->sets = 1 << mmu3->sets; + mmu->ways = 1 << mmu3->ways; + mmu->u_dtlb = mmu3->u_dtlb; + mmu->u_itlb = mmu3->u_itlb; + mmu->sasid = mmu3->sasid; + } + } else { + mmu4 = (struct bcr_mmu_4 *)&tmp; + mmu->pg_sz_k = 1 << (mmu4->sz0 - 1); + mmu->s_pg_sz_m = 1 << (mmu4->sz1 - 11); + mmu->sets = 64 << mmu4->n_entry; + mmu->ways = mmu4->n_ways * 2; + mmu->u_dtlb = mmu4->u_dtlb * 4; + mmu->u_itlb = mmu4->u_itlb * 4; + mmu->sasid = mmu4->sasid; + pae_exists = mmu->pae = mmu4->pae; + } +} + +char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len) +{ + int n = 0; + struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[cpu_id].mmu; + char super_pg[64] = ""; + + if (p_mmu->s_pg_sz_m) + scnprintf(super_pg, 64, "%dM Super Page %s", + p_mmu->s_pg_sz_m, + IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE)); + + n += scnprintf(buf + n, len - n, + "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d%s%s\n", + p_mmu->ver, p_mmu->pg_sz_k, super_pg, + p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways, + p_mmu->u_dtlb, p_mmu->u_itlb, + IS_AVAIL2(p_mmu->pae, ", PAE40 ", CONFIG_ARC_HAS_PAE40)); + + return buf; +} + +int pae40_exist_but_not_enab(void) +{ + return pae_exists && !is_pae40_enabled(); +} + +void arc_mmu_init(void) +{ + struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; + char str[256]; + int compat = 0; + + pr_info("%s", arc_mmu_mumbojumbo(0, str, sizeof(str))); + + /* + * Can't be done in processor.h due to header include depenedencies + */ + BUILD_BUG_ON(!IS_ALIGNED((CONFIG_ARC_KVADDR_SIZE << 20), PMD_SIZE)); + + /* + * stack top size sanity check, + * Can't be done in processor.h due to header include depenedencies + */ + BUILD_BUG_ON(!IS_ALIGNED(STACK_TOP, PMD_SIZE)); + + /* + * Ensure that MMU features assumed by kernel exist in hardware. + * For older ARC700 cpus, it has to be exact match, since the MMU + * revisions were not backwards compatible (MMUv3 TLB layout changed + * so even if kernel for v2 didn't use any new cmds of v3, it would + * still not work. + * For HS cpus, MMUv4 was baseline and v5 is backwards compatible + * (will run older software). + */ + if (is_isa_arcompact() && mmu->ver == CONFIG_ARC_MMU_VER) + compat = 1; + else if (is_isa_arcv2() && mmu->ver >= CONFIG_ARC_MMU_VER) + compat = 1; + + if (!compat) { + panic("MMU ver %d doesn't match kernel built for %d...\n", + mmu->ver, CONFIG_ARC_MMU_VER); + } + + if (mmu->pg_sz_k != TO_KB(PAGE_SIZE)) + panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE)); + + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && + mmu->s_pg_sz_m != TO_MB(HPAGE_PMD_SIZE)) + panic("MMU Super pg size != Linux HPAGE_PMD_SIZE (%luM)\n", + (unsigned long)TO_MB(HPAGE_PMD_SIZE)); + + if (IS_ENABLED(CONFIG_ARC_HAS_PAE40) && !mmu->pae) + panic("Hardware doesn't support PAE40\n"); + + /* Enable the MMU */ + write_aux_reg(ARC_REG_PID, MMU_ENABLE); + + /* In smp we use this reg for interrupt 1 scratch */ +#ifndef CONFIG_SMP + /* swapper_pg_dir is the pgd for the kernel, used by vmalloc */ + write_aux_reg(ARC_REG_SCRATCH_DATA0, swapper_pg_dir); +#endif + + if (pae40_exist_but_not_enab()) + write_aux_reg(ARC_REG_TLBPD1HI, 0); +} + +/* + * TLB Programmer's Model uses Linear Indexes: 0 to {255, 511} for 128 x {2,4} + * The mapping is Column-first. + * --------------------- ----------- + * |way0|way1|way2|way3| |way0|way1| + * --------------------- ----------- + * [set0] | 0 | 1 | 2 | 3 | | 0 | 1 | + * [set1] | 4 | 5 | 6 | 7 | | 2 | 3 | + * ~ ~ ~ ~ + * [set127] | 508| 509| 510| 511| | 254| 255| + * --------------------- ----------- + * For normal operations we don't(must not) care how above works since + * MMU cmd getIndex(vaddr) abstracts that out. + * However for walking WAYS of a SET, we need to know this + */ +#define SET_WAY_TO_IDX(mmu, set, way) ((set) * mmu->ways + (way)) + +/* Handling of Duplicate PD (TLB entry) in MMU. + * -Could be due to buggy customer tapeouts or obscure kernel bugs + * -MMU complaints not at the time of duplicate PD installation, but at the + * time of lookup matching multiple ways. + * -Ideally these should never happen - but if they do - workaround by deleting + * the duplicate one. + * -Knob to be verbose abt it.(TODO: hook them up to debugfs) + */ +volatile int dup_pd_silent; /* Be slient abt it or complain (default) */ + +void do_tlb_overlap_fault(unsigned long cause, unsigned long address, + struct pt_regs *regs) +{ + struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; + unsigned long flags; + int set, n_ways = mmu->ways; + + n_ways = min(n_ways, 4); + BUG_ON(mmu->ways > 4); + + local_irq_save(flags); + + /* loop thru all sets of TLB */ + for (set = 0; set < mmu->sets; set++) { + + int is_valid, way; + unsigned int pd0[4]; + + /* read out all the ways of current set */ + for (way = 0, is_valid = 0; way < n_ways; way++) { + write_aux_reg(ARC_REG_TLBINDEX, + SET_WAY_TO_IDX(mmu, set, way)); + write_aux_reg(ARC_REG_TLBCOMMAND, TLBRead); + pd0[way] = read_aux_reg(ARC_REG_TLBPD0); + is_valid |= pd0[way] & _PAGE_PRESENT; + pd0[way] &= PAGE_MASK; + } + + /* If all the WAYS in SET are empty, skip to next SET */ + if (!is_valid) + continue; + + /* Scan the set for duplicate ways: needs a nested loop */ + for (way = 0; way < n_ways - 1; way++) { + + int n; + + if (!pd0[way]) + continue; + + for (n = way + 1; n < n_ways; n++) { + if (pd0[way] != pd0[n]) + continue; + + if (!dup_pd_silent) + pr_info("Dup TLB PD0 %08x @ set %d ways %d,%d\n", + pd0[way], set, way, n); + + /* + * clear entry @way and not @n. + * This is critical to our optimised loop + */ + pd0[way] = 0; + write_aux_reg(ARC_REG_TLBINDEX, + SET_WAY_TO_IDX(mmu, set, way)); + __tlb_entry_erase(); + } + } + } + + local_irq_restore(flags); +} + +/*********************************************************************** + * Diagnostic Routines + * -Called from Low Level TLB Hanlders if things don;t look good + **********************************************************************/ + +#ifdef CONFIG_ARC_DBG_TLB_PARANOIA + +/* + * Low Level ASM TLB handler calls this if it finds that HW and SW ASIDS + * don't match + */ +void print_asid_mismatch(int mm_asid, int mmu_asid, int is_fast_path) +{ + pr_emerg("ASID Mismatch in %s Path Handler: sw-pid=0x%x hw-pid=0x%x\n", + is_fast_path ? "Fast" : "Slow", mm_asid, mmu_asid); + + __asm__ __volatile__("flag 1"); +} + +void tlb_paranoid_check(unsigned int mm_asid, unsigned long addr) +{ + unsigned int mmu_asid; + + mmu_asid = read_aux_reg(ARC_REG_PID) & 0xff; + + /* + * At the time of a TLB miss/installation + * - HW version needs to match SW version + * - SW needs to have a valid ASID + */ + if (addr < 0x70000000 && + ((mm_asid == MM_CTXT_NO_ASID) || + (mmu_asid != (mm_asid & MM_CTXT_ASID_MASK)))) + print_asid_mismatch(mm_asid, mmu_asid, 0); +} +#endif diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S new file mode 100644 index 000000000..0e1e47a67 --- /dev/null +++ b/arch/arc/mm/tlbex.S @@ -0,0 +1,405 @@ +/* + * TLB Exception Handling for ARC + * + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Vineetg: April 2011 : + * -MMU v1: moved out legacy code into a seperate file + * -MMU v3: PD{0,1} bits layout changed: They don't overlap anymore, + * helps avoid a shift when preparing PD0 from PTE + * + * Vineetg: July 2009 + * -For MMU V2, we need not do heuristics at the time of commiting a D-TLB + * entry, so that it doesn't knock out it's I-TLB entry + * -Some more fine tuning: + * bmsk instead of add, asl.cc instead of branch, delay slot utilise etc + * + * Vineetg: July 2009 + * -Practically rewrote the I/D TLB Miss handlers + * Now 40 and 135 instructions a peice as compared to 131 and 449 resp. + * Hence Leaner by 1.5 K + * Used Conditional arithmetic to replace excessive branching + * Also used short instructions wherever possible + * + * Vineetg: Aug 13th 2008 + * -Passing ECR (Exception Cause REG) to do_page_fault( ) for printing + * more information in case of a Fatality + * + * Vineetg: March 25th Bug #92690 + * -Added Debug Code to check if sw-ASID == hw-ASID + + * Rahul Trivedi, Amit Bhor: Codito Technologies 2004 + */ + +#include <linux/linkage.h> +#include <asm/entry.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> +#include <asm/arcregs.h> +#include <asm/cache.h> +#include <asm/processor.h> +#include <asm/tlb-mmu1.h> + +#ifdef CONFIG_ISA_ARCOMPACT +;----------------------------------------------------------------- +; ARC700 Exception Handling doesn't auto-switch stack and it only provides +; ONE scratch AUX reg "ARC_REG_SCRATCH_DATA0" +; +; For Non-SMP, the scratch AUX reg is repurposed to cache task PGD, so a +; "global" is used to free-up FIRST core reg to be able to code the rest of +; exception prologue (IRQ auto-disabled on Exceptions, so it's IRQ-safe). +; Since the Fast Path TLB Miss handler is coded with 4 regs, the remaining 3 +; need to be saved as well by extending the "global" to be 4 words. Hence +; ".size ex_saved_reg1, 16" +; [All of this dance is to avoid stack switching for each TLB Miss, since we +; only need to save only a handful of regs, as opposed to complete reg file] +; +; For ARC700 SMP, the "global" obviously can't be used for free up the FIRST +; core reg as it will not be SMP safe. +; Thus scratch AUX reg is used (and no longer used to cache task PGD). +; To save the rest of 3 regs - per cpu, the global is made "per-cpu". +; Epilogue thus has to locate the "per-cpu" storage for regs. +; To avoid cache line bouncing the per-cpu global is aligned/sized per +; L1_CACHE_SHIFT, despite fundamentally needing to be 12 bytes only. Hence +; ".size ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT)" + +; As simple as that.... +;-------------------------------------------------------------------------- + +; scratch memory to save [r0-r3] used to code TLB refill Handler +ARCFP_DATA ex_saved_reg1 + .align 1 << L1_CACHE_SHIFT + .type ex_saved_reg1, @object +#ifdef CONFIG_SMP + .size ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT) +ex_saved_reg1: + .zero (CONFIG_NR_CPUS << L1_CACHE_SHIFT) +#else + .size ex_saved_reg1, 16 +ex_saved_reg1: + .zero 16 +#endif + +.macro TLBMISS_FREEUP_REGS +#ifdef CONFIG_SMP + sr r0, [ARC_REG_SCRATCH_DATA0] ; freeup r0 to code with + GET_CPU_ID r0 ; get to per cpu scratch mem, + asl r0, r0, L1_CACHE_SHIFT ; cache line wide per cpu + add r0, @ex_saved_reg1, r0 +#else + st r0, [@ex_saved_reg1] + mov_s r0, @ex_saved_reg1 +#endif + st_s r1, [r0, 4] + st_s r2, [r0, 8] + st_s r3, [r0, 12] + + ; VERIFY if the ASID in MMU-PID Reg is same as + ; one in Linux data structures + + tlb_paranoid_check_asm +.endm + +.macro TLBMISS_RESTORE_REGS +#ifdef CONFIG_SMP + GET_CPU_ID r0 ; get to per cpu scratch mem + asl r0, r0, L1_CACHE_SHIFT ; each is cache line wide + add r0, @ex_saved_reg1, r0 + ld_s r3, [r0,12] + ld_s r2, [r0, 8] + ld_s r1, [r0, 4] + lr r0, [ARC_REG_SCRATCH_DATA0] +#else + mov_s r0, @ex_saved_reg1 + ld_s r3, [r0,12] + ld_s r2, [r0, 8] + ld_s r1, [r0, 4] + ld_s r0, [r0] +#endif +.endm + +#else /* ARCv2 */ + +.macro TLBMISS_FREEUP_REGS + PUSH r0 + PUSH r1 + PUSH r2 + PUSH r3 +.endm + +.macro TLBMISS_RESTORE_REGS + POP r3 + POP r2 + POP r1 + POP r0 +.endm + +#endif + +;============================================================================ +; Troubleshooting Stuff +;============================================================================ + +; Linux keeps ASID (Address Space ID) in task->active_mm->context.asid +; When Creating TLB Entries, instead of doing 3 dependent loads from memory, +; we use the MMU PID Reg to get current ASID. +; In bizzare scenrios SW and HW ASID can get out-of-sync which is trouble. +; So we try to detect this in TLB Mis shandler + +.macro tlb_paranoid_check_asm + +#ifdef CONFIG_ARC_DBG_TLB_PARANOIA + + GET_CURR_TASK_ON_CPU r3 + ld r0, [r3, TASK_ACT_MM] + ld r0, [r0, MM_CTXT+MM_CTXT_ASID] + breq r0, 0, 55f ; Error if no ASID allocated + + lr r1, [ARC_REG_PID] + and r1, r1, 0xFF + + and r2, r0, 0xFF ; MMU PID bits only for comparison + breq r1, r2, 5f + +55: + ; Error if H/w and S/w ASID don't match, but NOT if in kernel mode + lr r2, [erstatus] + bbit0 r2, STATUS_U_BIT, 5f + + ; We sure are in troubled waters, Flag the error, but to do so + ; need to switch to kernel mode stack to call error routine + GET_TSK_STACK_BASE r3, sp + + ; Call printk to shoutout aloud + mov r2, 1 + j print_asid_mismatch + +5: ; ASIDs match so proceed normally + nop + +#endif + +.endm + +;============================================================================ +;TLB Miss handling Code +;============================================================================ + +;----------------------------------------------------------------------------- +; This macro does the page-table lookup for the faulting address. +; OUT: r0 = PTE faulted on, r1 = ptr to PTE, r2 = Faulting V-address +.macro LOAD_FAULT_PTE + + lr r2, [efa] + +#ifndef CONFIG_SMP + lr r1, [ARC_REG_SCRATCH_DATA0] ; current pgd +#else + GET_CURR_TASK_ON_CPU r1 + ld r1, [r1, TASK_ACT_MM] + ld r1, [r1, MM_PGD] +#endif + + lsr r0, r2, PGDIR_SHIFT ; Bits for indexing into PGD + ld.as r3, [r1, r0] ; PGD entry corresp to faulting addr + tst r3, r3 + bz do_slow_path_pf ; if no Page Table, do page fault + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + and.f 0, r3, _PAGE_HW_SZ ; Is this Huge PMD (thp) + add2.nz r1, r1, r0 + bnz.d 2f ; YES: PGD == PMD has THP PTE: stop pgd walk + mov.nz r0, r3 + +#endif + and r1, r3, PAGE_MASK + + ; Get the PTE entry: The idea is + ; (1) x = addr >> PAGE_SHIFT -> masks page-off bits from @fault-addr + ; (2) y = x & (PTRS_PER_PTE - 1) -> to get index + ; (3) z = (pgtbl + y * 4) + +#ifdef CONFIG_ARC_HAS_PAE40 +#define PTE_SIZE_LOG 3 /* 8 == 2 ^ 3 */ +#else +#define PTE_SIZE_LOG 2 /* 4 == 2 ^ 2 */ +#endif + + ; multiply in step (3) above avoided by shifting lesser in step (1) + lsr r0, r2, ( PAGE_SHIFT - PTE_SIZE_LOG ) + and r0, r0, ( (PTRS_PER_PTE - 1) << PTE_SIZE_LOG ) + ld.aw r0, [r1, r0] ; r0: PTE (lower word only for PAE40) + ; r1: PTE ptr + +2: + +.endm + +;----------------------------------------------------------------- +; Convert Linux PTE entry into TLB entry +; A one-word PTE entry is programmed as two-word TLB Entry [PD0:PD1] in mmu +; (for PAE40, two-words PTE, while three-word TLB Entry [PD0:PD1:PD1HI]) +; IN: r0 = PTE, r1 = ptr to PTE + +.macro CONV_PTE_TO_TLB + and r3, r0, PTE_BITS_RWX ; r w x + asl r2, r3, 3 ; Kr Kw Kx 0 0 0 (GLOBAL, kernel only) + and.f 0, r0, _PAGE_GLOBAL + or.z r2, r2, r3 ; Kr Kw Kx Ur Uw Ux (!GLOBAL, user page) + + and r3, r0, PTE_BITS_NON_RWX_IN_PD1 ; Extract PFN+cache bits from PTE + or r3, r3, r2 + + sr r3, [ARC_REG_TLBPD1] ; paddr[31..13] | Kr Kw Kx Ur Uw Ux | C +#ifdef CONFIG_ARC_HAS_PAE40 + ld r3, [r1, 4] ; paddr[39..32] + sr r3, [ARC_REG_TLBPD1HI] +#endif + + and r2, r0, PTE_BITS_IN_PD0 ; Extract other PTE flags: (V)alid, (G)lb + + lr r3,[ARC_REG_TLBPD0] ; MMU prepares PD0 with vaddr and asid + + or r3, r3, r2 ; S | vaddr | {sasid|asid} + sr r3,[ARC_REG_TLBPD0] ; rewrite PD0 +.endm + +;----------------------------------------------------------------- +; Commit the TLB entry into MMU + +.macro COMMIT_ENTRY_TO_MMU +#if (CONFIG_ARC_MMU_VER < 4) + +#ifdef CONFIG_EZNPS_MTM_EXT + /* verify if entry for this vaddr+ASID already exists */ + sr TLBProbe, [ARC_REG_TLBCOMMAND] + lr r0, [ARC_REG_TLBINDEX] + bbit0 r0, 31, 88f +#endif + + /* Get free TLB slot: Set = computed from vaddr, way = random */ + sr TLBGetIndex, [ARC_REG_TLBCOMMAND] + + /* Commit the Write */ +#if (CONFIG_ARC_MMU_VER >= 2) /* introduced in v2 */ + sr TLBWriteNI, [ARC_REG_TLBCOMMAND] +#else + sr TLBWrite, [ARC_REG_TLBCOMMAND] +#endif + +#else + sr TLBInsertEntry, [ARC_REG_TLBCOMMAND] +#endif + +88: +.endm + + +ARCFP_CODE ;Fast Path Code, candidate for ICCM + +;----------------------------------------------------------------------------- +; I-TLB Miss Exception Handler +;----------------------------------------------------------------------------- + +ENTRY(EV_TLBMissI) + + TLBMISS_FREEUP_REGS + + ;---------------------------------------------------------------- + ; Get the PTE corresponding to V-addr accessed, r2 is setup with EFA + LOAD_FAULT_PTE + + ;---------------------------------------------------------------- + ; VERIFY_PTE: Check if PTE permissions approp for executing code + cmp_s r2, VMALLOC_START + mov_s r2, (_PAGE_PRESENT | _PAGE_EXECUTE) + or.hs r2, r2, _PAGE_GLOBAL + + and r3, r0, r2 ; Mask out NON Flag bits from PTE + xor.f r3, r3, r2 ; check ( ( pte & flags_test ) == flags_test ) + bnz do_slow_path_pf + + ; Let Linux VM know that the page was accessed + or r0, r0, _PAGE_ACCESSED ; set Accessed Bit + st_s r0, [r1] ; Write back PTE + + CONV_PTE_TO_TLB + COMMIT_ENTRY_TO_MMU + TLBMISS_RESTORE_REGS +EV_TLBMissI_fast_ret: ; additional label for VDK OS-kit instrumentation + rtie + +END(EV_TLBMissI) + +;----------------------------------------------------------------------------- +; D-TLB Miss Exception Handler +;----------------------------------------------------------------------------- + +ENTRY(EV_TLBMissD) + + TLBMISS_FREEUP_REGS + + ;---------------------------------------------------------------- + ; Get the PTE corresponding to V-addr accessed + ; If PTE exists, it will setup, r0 = PTE, r1 = Ptr to PTE, r2 = EFA + LOAD_FAULT_PTE + + ;---------------------------------------------------------------- + ; VERIFY_PTE: Chk if PTE permissions approp for data access (R/W/R+W) + + cmp_s r2, VMALLOC_START + mov_s r2, _PAGE_PRESENT ; common bit for K/U PTE + or.hs r2, r2, _PAGE_GLOBAL ; kernel PTE only + + ; Linux PTE [RWX] bits are semantically overloaded: + ; -If PAGE_GLOBAL set, they refer to kernel-only flags (vmalloc) + ; -Otherwise they are user-mode permissions, and those are exactly + ; same for kernel mode as well (e.g. copy_(to|from)_user) + + lr r3, [ecr] + btst_s r3, ECR_C_BIT_DTLB_LD_MISS ; Read Access + or.nz r2, r2, _PAGE_READ ; chk for Read flag in PTE + btst_s r3, ECR_C_BIT_DTLB_ST_MISS ; Write Access + or.nz r2, r2, _PAGE_WRITE ; chk for Write flag in PTE + ; Above laddering takes care of XCHG access (both R and W) + + ; By now, r2 setup with all the Flags we need to check in PTE + and r3, r0, r2 ; Mask out NON Flag bits from PTE + brne.d r3, r2, do_slow_path_pf ; is ((pte & flags_test) == flags_test) + + ;---------------------------------------------------------------- + ; UPDATE_PTE: Let Linux VM know that page was accessed/dirty + lr r3, [ecr] + or r0, r0, _PAGE_ACCESSED ; Accessed bit always + btst_s r3, ECR_C_BIT_DTLB_ST_MISS ; See if it was a Write Access ? + or.nz r0, r0, _PAGE_DIRTY ; if Write, set Dirty bit as well + st_s r0, [r1] ; Write back PTE + + CONV_PTE_TO_TLB + +#if (CONFIG_ARC_MMU_VER == 1) + ; MMU with 2 way set assoc J-TLB, needs some help in pathetic case of + ; memcpy where 3 parties contend for 2 ways, ensuing a livelock. + ; But only for old MMU or one with Metal Fix + TLB_WRITE_HEURISTICS +#endif + + COMMIT_ENTRY_TO_MMU + TLBMISS_RESTORE_REGS +EV_TLBMissD_fast_ret: ; additional label for VDK OS-kit instrumentation + rtie + +;-------- Common routine to call Linux Page Fault Handler ----------- +do_slow_path_pf: + + ; Restore the 4-scratch regs saved by fast path miss handler + TLBMISS_RESTORE_REGS + + ; Slow path TLB Miss handled as a regular ARC Exception + ; (stack switching / save the complete reg-file). + b call_do_page_fault +END(EV_TLBMissD) |