diff options
Diffstat (limited to 'arch/arm/mm')
105 files changed, 30417 insertions, 0 deletions
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig new file mode 100644 index 0000000000..c164cde502 --- /dev/null +++ b/arch/arm/mm/Kconfig @@ -0,0 +1,1153 @@ +# SPDX-License-Identifier: GPL-2.0 +comment "Processor Type" + +# Select CPU types depending on the architecture selected. This selects +# which CPUs we support in the kernel image, and the compiler instruction +# optimiser behaviour. + +# ARM7TDMI +config CPU_ARM7TDMI + bool + depends on !MMU + select CPU_32v4T + select CPU_ABRT_LV4T + select CPU_CACHE_V4 + select CPU_PABRT_LEGACY + help + A 32-bit RISC microprocessor based on the ARM7 processor core + which has no memory control unit and cache. + + Say Y if you want support for the ARM7TDMI processor. + Otherwise, say N. + +# ARM720T +config CPU_ARM720T + bool + select CPU_32v4T + select CPU_ABRT_LV4T + select CPU_CACHE_V4 + select CPU_CACHE_VIVT + select CPU_COPY_V4WT if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY + select CPU_THUMB_CAPABLE + select CPU_TLB_V4WT if MMU + help + A 32-bit RISC processor with 8kByte Cache, Write Buffer and + MMU built around an ARM7TDMI core. + + Say Y if you want support for the ARM720T processor. + Otherwise, say N. + +# ARM740T +config CPU_ARM740T + bool + depends on !MMU + select CPU_32v4T + select CPU_ABRT_LV4T + select CPU_CACHE_V4 + select CPU_CP15_MPU + select CPU_PABRT_LEGACY + select CPU_THUMB_CAPABLE + help + A 32-bit RISC processor with 8KB cache or 4KB variants, + write buffer and MPU(Protection Unit) built around + an ARM7TDMI core. + + Say Y if you want support for the ARM740T processor. + Otherwise, say N. + +# ARM9TDMI +config CPU_ARM9TDMI + bool + depends on !MMU + select CPU_32v4T + select CPU_ABRT_NOMMU + select CPU_CACHE_V4 + select CPU_PABRT_LEGACY + help + A 32-bit RISC microprocessor based on the ARM9 processor core + which has no memory control unit and cache. + + Say Y if you want support for the ARM9TDMI processor. + Otherwise, say N. + +# ARM920T +config CPU_ARM920T + bool + select CPU_32v4T + select CPU_ABRT_EV4T + select CPU_CACHE_V4WT + select CPU_CACHE_VIVT + select CPU_COPY_V4WB if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY + select CPU_THUMB_CAPABLE + select CPU_TLB_V4WBI if MMU + help + The ARM920T is licensed to be produced by numerous vendors, + and is used in the Cirrus EP93xx and the Samsung S3C2410. + + Say Y if you want support for the ARM920T processor. + Otherwise, say N. + +# ARM922T +config CPU_ARM922T + bool + select CPU_32v4T + select CPU_ABRT_EV4T + select CPU_CACHE_V4WT + select CPU_CACHE_VIVT + select CPU_COPY_V4WB if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY + select CPU_THUMB_CAPABLE + select CPU_TLB_V4WBI if MMU + help + The ARM922T is a version of the ARM920T, but with smaller + instruction and data caches. It is used in Altera's + Excalibur XA device family and the ARM Integrator. + + Say Y if you want support for the ARM922T processor. + Otherwise, say N. + +# ARM925T +config CPU_ARM925T + bool + select CPU_32v4T + select CPU_ABRT_EV4T + select CPU_CACHE_V4WT + select CPU_CACHE_VIVT + select CPU_COPY_V4WB if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY + select CPU_THUMB_CAPABLE + select CPU_TLB_V4WBI if MMU + help + The ARM925T is a mix between the ARM920T and ARM926T, but with + different instruction and data caches. It is used in TI's OMAP + device family. + + Say Y if you want support for the ARM925T processor. + Otherwise, say N. + +# ARM926T +config CPU_ARM926T + bool + select CPU_32v5 + select CPU_ABRT_EV5TJ + select CPU_CACHE_VIVT + select CPU_COPY_V4WB if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY + select CPU_THUMB_CAPABLE + select CPU_TLB_V4WBI if MMU + help + This is a variant of the ARM920. It has slightly different + instruction sequences for cache and TLB operations. Curiously, + there is no documentation on it at the ARM corporate website. + + Say Y if you want support for the ARM926T processor. + Otherwise, say N. + +# FA526 +config CPU_FA526 + bool + select CPU_32v4 + select CPU_ABRT_EV4 + select CPU_CACHE_FA + select CPU_CACHE_VIVT + select CPU_COPY_FA if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY + select CPU_TLB_FA if MMU + help + The FA526 is a version of the ARMv4 compatible processor with + Branch Target Buffer, Unified TLB and cache line size 16. + + Say Y if you want support for the FA526 processor. + Otherwise, say N. + +# ARM940T +config CPU_ARM940T + bool + depends on !MMU + select CPU_32v4T + select CPU_ABRT_NOMMU + select CPU_CACHE_VIVT + select CPU_CP15_MPU + select CPU_PABRT_LEGACY + select CPU_THUMB_CAPABLE + help + ARM940T is a member of the ARM9TDMI family of general- + purpose microprocessors with MPU and separate 4KB + instruction and 4KB data cases, each with a 4-word line + length. + + Say Y if you want support for the ARM940T processor. + Otherwise, say N. + +# ARM946E-S +config CPU_ARM946E + bool + depends on !MMU + select CPU_32v5 + select CPU_ABRT_NOMMU + select CPU_CACHE_VIVT + select CPU_CP15_MPU + select CPU_PABRT_LEGACY + select CPU_THUMB_CAPABLE + help + ARM946E-S is a member of the ARM9E-S family of high- + performance, 32-bit system-on-chip processor solutions. + The TCM and ARMv5TE 32-bit instruction set is supported. + + Say Y if you want support for the ARM946E-S processor. + Otherwise, say N. + +# ARM1020 - needs validating +config CPU_ARM1020 + bool + select CPU_32v5 + select CPU_ABRT_EV4T + select CPU_CACHE_V4WT + select CPU_CACHE_VIVT + select CPU_COPY_V4WB if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY + select CPU_THUMB_CAPABLE + select CPU_TLB_V4WBI if MMU + help + The ARM1020 is the 32K cached version of the ARM10 processor, + with an addition of a floating-point unit. + + Say Y if you want support for the ARM1020 processor. + Otherwise, say N. + +# ARM1020E - needs validating +config CPU_ARM1020E + bool + depends on n + select CPU_32v5 + select CPU_ABRT_EV4T + select CPU_CACHE_V4WT + select CPU_CACHE_VIVT + select CPU_COPY_V4WB if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY + select CPU_THUMB_CAPABLE + select CPU_TLB_V4WBI if MMU + +# ARM1022E +config CPU_ARM1022 + bool + select CPU_32v5 + select CPU_ABRT_EV4T + select CPU_CACHE_VIVT + select CPU_COPY_V4WB if MMU # can probably do better + select CPU_CP15_MMU + select CPU_PABRT_LEGACY + select CPU_THUMB_CAPABLE + select CPU_TLB_V4WBI if MMU + help + The ARM1022E is an implementation of the ARMv5TE architecture + based upon the ARM10 integer core with a 16KiB L1 Harvard cache, + embedded trace macrocell, and a floating-point unit. + + Say Y if you want support for the ARM1022E processor. + Otherwise, say N. + +# ARM1026EJ-S +config CPU_ARM1026 + bool + select CPU_32v5 + select CPU_ABRT_EV5T # But need Jazelle, but EV5TJ ignores bit 10 + select CPU_CACHE_VIVT + select CPU_COPY_V4WB if MMU # can probably do better + select CPU_CP15_MMU + select CPU_PABRT_LEGACY + select CPU_THUMB_CAPABLE + select CPU_TLB_V4WBI if MMU + help + The ARM1026EJ-S is an implementation of the ARMv5TEJ architecture + based upon the ARM10 integer core. + + Say Y if you want support for the ARM1026EJ-S processor. + Otherwise, say N. + +# SA110 +config CPU_SA110 + bool + select CPU_32v3 if ARCH_RPC + select CPU_32v4 if !ARCH_RPC + select CPU_ABRT_EV4 + select CPU_CACHE_V4WB + select CPU_CACHE_VIVT + select CPU_COPY_V4WB if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY + select CPU_TLB_V4WB if MMU + help + The Intel StrongARM(R) SA-110 is a 32-bit microprocessor and + is available at five speeds ranging from 100 MHz to 233 MHz. + More information is available at + <http://developer.intel.com/design/strong/sa110.htm>. + + Say Y if you want support for the SA-110 processor. + Otherwise, say N. + +# SA1100 +config CPU_SA1100 + bool + select CPU_32v4 + select CPU_ABRT_EV4 + select CPU_CACHE_V4WB + select CPU_CACHE_VIVT + select CPU_CP15_MMU + select CPU_PABRT_LEGACY + select CPU_TLB_V4WB if MMU + +# XScale +config CPU_XSCALE + bool + select CPU_32v5 + select CPU_ABRT_EV5T + select CPU_CACHE_VIVT + select CPU_CP15_MMU + select CPU_PABRT_LEGACY + select CPU_THUMB_CAPABLE + select CPU_TLB_V4WBI if MMU + +# XScale Core Version 3 +config CPU_XSC3 + bool + select CPU_32v5 + select CPU_ABRT_EV5T + select CPU_CACHE_VIVT + select CPU_CP15_MMU + select CPU_PABRT_LEGACY + select CPU_THUMB_CAPABLE + select CPU_TLB_V4WBI if MMU + select IO_36 + +# Marvell PJ1 (Mohawk) +config CPU_MOHAWK + bool + select CPU_32v5 + select CPU_ABRT_EV5T + select CPU_CACHE_VIVT + select CPU_COPY_V4WB if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY + select CPU_THUMB_CAPABLE + select CPU_TLB_V4WBI if MMU + +# Feroceon +config CPU_FEROCEON + bool + select CPU_32v5 + select CPU_ABRT_EV5T + select CPU_CACHE_VIVT + select CPU_COPY_FEROCEON if MMU + select CPU_CP15_MMU + select CPU_PABRT_LEGACY + select CPU_THUMB_CAPABLE + select CPU_TLB_FEROCEON if MMU + +config CPU_FEROCEON_OLD_ID + bool "Accept early Feroceon cores with an ARM926 ID" + depends on CPU_FEROCEON && !CPU_ARM926T + default y + help + This enables the usage of some old Feroceon cores + for which the CPU ID is equal to the ARM926 ID. + Relevant for Feroceon-1850 and early Feroceon-2850. + +# Marvell PJ4 +config CPU_PJ4 + bool + select ARM_THUMBEE + select CPU_V7 + +config CPU_PJ4B + bool + select CPU_V7 + +# ARMv6 +config CPU_V6 + bool + select CPU_32v6 + select CPU_ABRT_EV6 + select CPU_CACHE_V6 + select CPU_CACHE_VIPT + select CPU_COPY_V6 if MMU + select CPU_CP15_MMU + select CPU_HAS_ASID if MMU + select CPU_PABRT_V6 + select CPU_THUMB_CAPABLE + select CPU_TLB_V6 if MMU + select SMP_ON_UP if SMP + +# ARMv6k +config CPU_V6K + bool + select CPU_32v6 + select CPU_32v6K + select CPU_ABRT_EV6 + select CPU_CACHE_V6 + select CPU_CACHE_VIPT + select CPU_COPY_V6 if MMU + select CPU_CP15_MMU + select CPU_HAS_ASID if MMU + select CPU_PABRT_V6 + select CPU_THUMB_CAPABLE + select CPU_TLB_V6 if MMU + +# ARMv7 and ARMv8 architectures +config CPU_V7 + bool + select CPU_32v6K + select CPU_32v7 + select CPU_ABRT_EV7 + select CPU_CACHE_V7 + select CPU_CACHE_VIPT + select CPU_COPY_V6 if MMU + select CPU_CP15_MMU if MMU + select CPU_CP15_MPU if !MMU + select CPU_HAS_ASID if MMU + select CPU_PABRT_V7 + select CPU_SPECTRE if MMU + select CPU_THUMB_CAPABLE + select CPU_TLB_V7 if MMU + +# ARMv7M +config CPU_V7M + bool + select CPU_32v7M + select CPU_ABRT_NOMMU + select CPU_CACHE_V7M + select CPU_CACHE_NOP + select CPU_PABRT_LEGACY + select CPU_THUMBONLY + +config CPU_THUMBONLY + bool + select CPU_THUMB_CAPABLE + # There are no CPUs available with MMU that don't implement an ARM ISA: + depends on !MMU + help + Select this if your CPU doesn't support the 32 bit ARM instructions. + +config CPU_THUMB_CAPABLE + bool + help + Select this if your CPU can support Thumb mode. + +# Figure out what processor architecture version we should be using. +# This defines the compiler instruction set which depends on the machine type. +config CPU_32v3 + bool + select CPU_USE_DOMAINS if MMU + select NEED_KUSER_HELPERS + select TLS_REG_EMUL if SMP || !MMU + select CPU_NO_EFFICIENT_FFS + +config CPU_32v4 + bool + select CPU_USE_DOMAINS if MMU + select NEED_KUSER_HELPERS + select TLS_REG_EMUL if SMP || !MMU + select CPU_NO_EFFICIENT_FFS + +config CPU_32v4T + bool + select CPU_USE_DOMAINS if MMU + select NEED_KUSER_HELPERS + select TLS_REG_EMUL if SMP || !MMU + select CPU_NO_EFFICIENT_FFS + +config CPU_32v5 + bool + select CPU_USE_DOMAINS if MMU + select NEED_KUSER_HELPERS + select TLS_REG_EMUL if SMP || !MMU + +config CPU_32v6 + bool + select TLS_REG_EMUL if !CPU_32v6K && !MMU + +config CPU_32v6K + bool + +config CPU_32v7 + bool + +config CPU_32v7M + bool + +# The abort model +config CPU_ABRT_NOMMU + bool + +config CPU_ABRT_EV4 + bool + +config CPU_ABRT_EV4T + bool + +config CPU_ABRT_LV4T + bool + +config CPU_ABRT_EV5T + bool + +config CPU_ABRT_EV5TJ + bool + +config CPU_ABRT_EV6 + bool + +config CPU_ABRT_EV7 + bool + +config CPU_PABRT_LEGACY + bool + +config CPU_PABRT_V6 + bool + +config CPU_PABRT_V7 + bool + +# The cache model +config CPU_CACHE_V4 + bool + +config CPU_CACHE_V4WT + bool + +config CPU_CACHE_V4WB + bool + +config CPU_CACHE_V6 + bool + +config CPU_CACHE_V7 + bool + +config CPU_CACHE_NOP + bool + +config CPU_CACHE_VIVT + bool + +config CPU_CACHE_VIPT + bool + +config CPU_CACHE_FA + bool + +config CPU_CACHE_V7M + bool + +if MMU +# The copy-page model +config CPU_COPY_V4WT + bool + +config CPU_COPY_V4WB + bool + +config CPU_COPY_FEROCEON + bool + +config CPU_COPY_FA + bool + +config CPU_COPY_V6 + bool + +# This selects the TLB model +config CPU_TLB_V4WT + bool + help + ARM Architecture Version 4 TLB with writethrough cache. + +config CPU_TLB_V4WB + bool + help + ARM Architecture Version 4 TLB with writeback cache. + +config CPU_TLB_V4WBI + bool + help + ARM Architecture Version 4 TLB with writeback cache and invalidate + instruction cache entry. + +config CPU_TLB_FEROCEON + bool + help + Feroceon TLB (v4wbi with non-outer-cachable page table walks). + +config CPU_TLB_FA + bool + help + Faraday ARM FA526 architecture, unified TLB with writeback cache + and invalidate instruction cache entry. Branch target buffer is + also supported. + +config CPU_TLB_V6 + bool + +config CPU_TLB_V7 + bool + +endif + +config CPU_HAS_ASID + bool + help + This indicates whether the CPU has the ASID register; used to + tag TLB and possibly cache entries. + +config CPU_CP15 + bool + help + Processor has the CP15 register. + +config CPU_CP15_MMU + bool + select CPU_CP15 + help + Processor has the CP15 register, which has MMU related registers. + +config CPU_CP15_MPU + bool + select CPU_CP15 + help + Processor has the CP15 register, which has MPU related registers. + +config CPU_USE_DOMAINS + bool + help + This option enables or disables the use of domain switching + using the DACR (domain access control register) to protect memory + domains from each other. In Linux we use three domains: kernel, user + and IO. The domains are used to protect userspace from kernelspace + and to handle IO-space as a special type of memory by assigning + manager or client roles to running code (such as a process). + +config CPU_V7M_NUM_IRQ + int "Number of external interrupts connected to the NVIC" + depends on CPU_V7M + default 90 if ARCH_STM32 + default 112 if SOC_VF610 + default 240 + help + This option indicates the number of interrupts connected to the NVIC. + The value can be larger than the real number of interrupts supported + by the system, but must not be lower. + The default value is 240, corresponding to the maximum number of + interrupts supported by the NVIC on Cortex-M family. + + If unsure, keep default value. + +# +# CPU supports 36-bit I/O +# +config IO_36 + bool + +comment "Processor Features" + +config ARM_LPAE + bool "Support for the Large Physical Address Extension" + depends on MMU && CPU_32v7 && !CPU_32v6 && !CPU_32v5 && \ + !CPU_32v4 && !CPU_32v3 + select PHYS_ADDR_T_64BIT + select SWIOTLB + help + Say Y if you have an ARMv7 processor supporting the LPAE page + table format and you would like to access memory beyond the + 4GB limit. The resulting kernel image will not run on + processors without the LPA extension. + + If unsure, say N. + +config ARM_PV_FIXUP + def_bool y + depends on ARM_LPAE && ARM_PATCH_PHYS_VIRT && ARCH_KEYSTONE + +config ARM_THUMB + bool "Support Thumb user binaries" if !CPU_THUMBONLY && EXPERT + depends on CPU_THUMB_CAPABLE && !CPU_32v4 + default y + help + Say Y if you want to include kernel support for running user space + Thumb binaries. + + The Thumb instruction set is a compressed form of the standard ARM + instruction set resulting in smaller binaries at the expense of + slightly less efficient code. + + If this option is disabled, and you run userspace that switches to + Thumb mode, signal handling will not work correctly, resulting in + segmentation faults or illegal instruction aborts. + + If you don't know what this all is, saying Y is a safe choice. + +config ARM_THUMBEE + bool "Enable ThumbEE CPU extension" + depends on CPU_V7 + help + Say Y here if you have a CPU with the ThumbEE extension and code to + make use of it. Say N for code that can run on CPUs without ThumbEE. + +config ARM_VIRT_EXT + bool + default y if CPU_V7 + help + Enable the kernel to make use of the ARM Virtualization + Extensions to install hypervisors without run-time firmware + assistance. + + A compliant bootloader is required in order to make maximum + use of this feature. Refer to Documentation/arch/arm/booting.rst for + details. + +config SWP_EMULATE + bool "Emulate SWP/SWPB instructions" if !SMP + depends on CPU_V7 + default y if SMP + select HAVE_PROC_CPU if PROC_FS + help + ARMv6 architecture deprecates use of the SWP/SWPB instructions. + ARMv7 multiprocessing extensions introduce the ability to disable + these instructions, triggering an undefined instruction exception + when executed. Say Y here to enable software emulation of these + instructions for userspace (not kernel) using LDREX/STREX. + Also creates /proc/cpu/swp_emulation for statistics. + + In some older versions of glibc [<=2.8] SWP is used during futex + trylock() operations with the assumption that the code will not + be preempted. This invalid assumption may be more likely to fail + with SWP emulation enabled, leading to deadlock of the user + application. + + NOTE: when accessing uncached shared regions, LDREX/STREX rely + on an external transaction monitoring block called a global + monitor to maintain update atomicity. If your system does not + implement a global monitor, this option can cause programs that + perform SWP operations to uncached memory to deadlock. + + If unsure, say Y. + +choice + prompt "CPU Endianness" + default CPU_LITTLE_ENDIAN + +config CPU_LITTLE_ENDIAN + bool "Built little-endian kernel" + help + Say Y if you plan on running a kernel in little-endian mode. + This is the default and is used in practically all modern user + space builds. + +config CPU_BIG_ENDIAN + bool "Build big-endian kernel" + depends on !LD_IS_LLD + help + Say Y if you plan on running a kernel in big-endian mode. + This works on many machines using ARMv6 or newer processors + but requires big-endian user space. + + The only ARMv5 platform with big-endian support is + Intel IXP4xx. + +endchoice + +config CPU_ENDIAN_BE8 + bool + depends on CPU_BIG_ENDIAN + default CPU_V6 || CPU_V6K || CPU_V7 || CPU_V7M + help + Support for the BE-8 (big-endian) mode on ARMv6 and ARMv7 processors. + +config CPU_ENDIAN_BE32 + bool + depends on CPU_BIG_ENDIAN + default !CPU_ENDIAN_BE8 + help + Support for the BE-32 (big-endian) mode on pre-ARMv6 processors. + +config CPU_HIGH_VECTOR + depends on !MMU && CPU_CP15 && !CPU_ARM740T + bool "Select the High exception vector" + help + Say Y here to select high exception vector(0xFFFF0000~). + The exception vector can vary depending on the platform + design in nommu mode. If your platform needs to select + high exception vector, say Y. + Otherwise or if you are unsure, say N, and the low exception + vector (0x00000000~) will be used. + +config CPU_ICACHE_DISABLE + bool "Disable I-Cache (I-bit)" + depends on (CPU_CP15 && !(CPU_ARM720T || CPU_ARM740T || CPU_XSCALE || CPU_XSC3)) || CPU_V7M + help + Say Y here to disable the processor instruction cache. Unless + you have a reason not to or are unsure, say N. + +config CPU_ICACHE_MISMATCH_WORKAROUND + bool "Workaround for I-Cache line size mismatch between CPU cores" + depends on SMP && CPU_V7 + help + Some big.LITTLE systems have I-Cache line size mismatch between + LITTLE and big cores. Say Y here to enable a workaround for + proper I-Cache support on such systems. If unsure, say N. + +config CPU_DCACHE_DISABLE + bool "Disable D-Cache (C-bit)" + depends on (CPU_CP15 && !SMP) || CPU_V7M + help + Say Y here to disable the processor data cache. Unless + you have a reason not to or are unsure, say N. + +config CPU_DCACHE_SIZE + hex + depends on CPU_ARM740T || CPU_ARM946E + default 0x00001000 if CPU_ARM740T + default 0x00002000 # default size for ARM946E-S + help + Some cores are synthesizable to have various sized cache. For + ARM946E-S case, it can vary from 0KB to 1MB. + To support such cache operations, it is efficient to know the size + before compile time. + If your SoC is configured to have a different size, define the value + here with proper conditions. + +config CPU_DCACHE_WRITETHROUGH + bool "Force write through D-cache" + depends on (CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020 || CPU_FA526) && !CPU_DCACHE_DISABLE + default y if CPU_ARM925T + help + Say Y here to use the data cache in writethrough mode. Unless you + specifically require this or are unsure, say N. + +config CPU_CACHE_ROUND_ROBIN + bool "Round robin I and D cache replacement algorithm" + depends on (CPU_ARM926T || CPU_ARM946E || CPU_ARM1020) && (!CPU_ICACHE_DISABLE || !CPU_DCACHE_DISABLE) + help + Say Y here to use the predictable round-robin cache replacement + policy. Unless you specifically require this or are unsure, say N. + +config CPU_BPREDICT_DISABLE + bool "Disable branch prediction" + depends on CPU_ARM1020 || CPU_V6 || CPU_V6K || CPU_MOHAWK || CPU_XSC3 || CPU_V7 || CPU_FA526 || CPU_V7M + help + Say Y here to disable branch prediction. If unsure, say N. + +config CPU_SPECTRE + bool + select GENERIC_CPU_VULNERABILITIES + +config HARDEN_BRANCH_PREDICTOR + bool "Harden the branch predictor against aliasing attacks" if EXPERT + depends on CPU_SPECTRE + default y + help + Speculation attacks against some high-performance processors rely + on being able to manipulate the branch predictor for a victim + context by executing aliasing branches in the attacker context. + Such attacks can be partially mitigated against by clearing + internal branch predictor state and limiting the prediction + logic in some situations. + + This config option will take CPU-specific actions to harden + the branch predictor against aliasing attacks and may rely on + specific instruction sequences or control bits being set by + the system firmware. + + If unsure, say Y. + +config HARDEN_BRANCH_HISTORY + bool "Harden Spectre style attacks against branch history" if EXPERT + depends on CPU_SPECTRE + default y + help + Speculation attacks against some high-performance processors can + make use of branch history to influence future speculation. When + taking an exception, a sequence of branches overwrites the branch + history, or branch history is invalidated. + +config TLS_REG_EMUL + bool + select NEED_KUSER_HELPERS + help + An SMP system using a pre-ARMv6 processor (there are apparently + a few prototypes like that in existence) and therefore access to + that required register must be emulated. + +config NEED_KUSER_HELPERS + bool + +config KUSER_HELPERS + bool "Enable kuser helpers in vector page" if !NEED_KUSER_HELPERS + depends on MMU + default y + help + Warning: disabling this option may break user programs. + + Provide kuser helpers in the vector page. The kernel provides + helper code to userspace in read only form at a fixed location + in the high vector page to allow userspace to be independent of + the CPU type fitted to the system. This permits binaries to be + run on ARMv4 through to ARMv7 without modification. + + See Documentation/arch/arm/kernel_user_helpers.rst for details. + + However, the fixed address nature of these helpers can be used + by ROP (return orientated programming) authors when creating + exploits. + + If all of the binaries and libraries which run on your platform + are built specifically for your platform, and make no use of + these helpers, then you can turn this option off to hinder + such exploits. However, in that case, if a binary or library + relying on those helpers is run, it will receive a SIGILL signal, + which will terminate the program. + + Say N here only if you are absolutely certain that you do not + need these helpers; otherwise, the safe option is to say Y. + +config VDSO + bool "Enable VDSO for acceleration of some system calls" + depends on AEABI && MMU && CPU_V7 + default y if ARM_ARCH_TIMER + select HAVE_GENERIC_VDSO + select GENERIC_TIME_VSYSCALL + select GENERIC_VDSO_32 + select GENERIC_GETTIMEOFDAY + help + Place in the process address space an ELF shared object + providing fast implementations of gettimeofday and + clock_gettime. Systems that implement the ARM architected + timer will receive maximum benefit. + + You must have glibc 2.22 or later for programs to seamlessly + take advantage of this. + +config DMA_CACHE_RWFO + bool "Enable read/write for ownership DMA cache maintenance" + depends on CPU_V6K && SMP + default y + help + The Snoop Control Unit on ARM11MPCore does not detect the + cache maintenance operations and the dma_{map,unmap}_area() + functions may leave stale cache entries on other CPUs. By + enabling this option, Read or Write For Ownership in the ARMv6 + DMA cache maintenance functions is performed. These LDR/STR + instructions change the cache line state to shared or modified + so that the cache operation has the desired effect. + + Note that the workaround is only valid on processors that do + not perform speculative loads into the D-cache. For such + processors, if cache maintenance operations are not broadcast + in hardware, other workarounds are needed (e.g. cache + maintenance broadcasting in software via FIQ). + +config OUTER_CACHE + bool + +config OUTER_CACHE_SYNC + bool + select ARM_HEAVY_MB + help + The outer cache has a outer_cache_fns.sync function pointer + that can be used to drain the write buffer of the outer cache. + +config CACHE_B15_RAC + bool "Enable the Broadcom Brahma-B15 read-ahead cache controller" + depends on ARCH_BRCMSTB + default y + help + This option enables the Broadcom Brahma-B15 read-ahead cache + controller. If disabled, the read-ahead cache remains off. + +config CACHE_FEROCEON_L2 + bool "Enable the Feroceon L2 cache controller" + depends on ARCH_MV78XX0 || ARCH_MVEBU + default y + select OUTER_CACHE + help + This option enables the Feroceon L2 cache controller. + +config CACHE_FEROCEON_L2_WRITETHROUGH + bool "Force Feroceon L2 cache write through" + depends on CACHE_FEROCEON_L2 + help + Say Y here to use the Feroceon L2 cache in writethrough mode. + Unless you specifically require this, say N for writeback mode. + +config MIGHT_HAVE_CACHE_L2X0 + bool + help + This option should be selected by machines which have a L2x0 + or PL310 cache controller, but where its use is optional. + + The only effect of this option is to make CACHE_L2X0 and + related options available to the user for configuration. + + Boards or SoCs which always require the cache controller + support to be present should select CACHE_L2X0 directly + instead of this option, thus preventing the user from + inadvertently configuring a broken kernel. + +config CACHE_L2X0 + bool "Enable the L2x0 outer cache controller" if MIGHT_HAVE_CACHE_L2X0 + default MIGHT_HAVE_CACHE_L2X0 + select OUTER_CACHE + select OUTER_CACHE_SYNC + help + This option enables the L2x0 PrimeCell. + +config CACHE_L2X0_PMU + bool "L2x0 performance monitor support" if CACHE_L2X0 + depends on PERF_EVENTS + help + This option enables support for the performance monitoring features + of the L220 and PL310 outer cache controllers. + +if CACHE_L2X0 + +config PL310_ERRATA_588369 + bool "PL310 errata: Clean & Invalidate maintenance operations do not invalidate clean lines" + help + The PL310 L2 cache controller implements three types of Clean & + Invalidate maintenance operations: by Physical Address + (offset 0x7F0), by Index/Way (0x7F8) and by Way (0x7FC). + They are architecturally defined to behave as the execution of a + clean operation followed immediately by an invalidate operation, + both performing to the same memory location. This functionality + is not correctly implemented in PL310 prior to r2p0 (fixed in r2p0) + as clean lines are not invalidated as a result of these operations. + +config PL310_ERRATA_727915 + bool "PL310 errata: Background Clean & Invalidate by Way operation can cause data corruption" + help + PL310 implements the Clean & Invalidate by Way L2 cache maintenance + operation (offset 0x7FC). This operation runs in background so that + PL310 can handle normal accesses while it is in progress. Under very + rare circumstances, due to this erratum, write data can be lost when + PL310 treats a cacheable write transaction during a Clean & + Invalidate by Way operation. Revisions prior to r3p1 are affected by + this errata (fixed in r3p1). + +config PL310_ERRATA_753970 + bool "PL310 errata: cache sync operation may be faulty" + help + This option enables the workaround for the 753970 PL310 (r3p0) erratum. + + Under some condition the effect of cache sync operation on + the store buffer still remains when the operation completes. + This means that the store buffer is always asked to drain and + this prevents it from merging any further writes. The workaround + is to replace the normal offset of cache sync operation (0x730) + by another offset targeting an unmapped PL310 register 0x740. + This has the same effect as the cache sync operation: store buffer + drain and waiting for all buffers empty. + +config PL310_ERRATA_769419 + bool "PL310 errata: no automatic Store Buffer drain" + help + On revisions of the PL310 prior to r3p2, the Store Buffer does + not automatically drain. This can cause normal, non-cacheable + writes to be retained when the memory system is idle, leading + to suboptimal I/O performance for drivers using coherent DMA. + This option adds a write barrier to the cpu_idle loop so that, + on systems with an outer cache, the store buffer is drained + explicitly. + +endif + +config CACHE_TAUROS2 + bool "Enable the Tauros2 L2 cache controller" + depends on (CPU_MOHAWK || CPU_PJ4) + default y + select OUTER_CACHE + help + This option enables the Tauros2 L2 cache controller (as + found on PJ1/PJ4). + +config CACHE_UNIPHIER + bool "Enable the UniPhier outer cache controller" + depends on ARCH_UNIPHIER + select ARM_L1_CACHE_SHIFT_7 + select OUTER_CACHE + select OUTER_CACHE_SYNC + help + This option enables the UniPhier outer cache (system cache) + controller. + +config CACHE_XSC3L2 + bool "Enable the L2 cache on XScale3" + depends on CPU_XSC3 + default y + select OUTER_CACHE + help + This option enables the L2 cache on XScale3. + +config ARM_L1_CACHE_SHIFT_6 + bool + default y if CPU_V7 + help + Setting ARM L1 cache line size to 64 Bytes. + +config ARM_L1_CACHE_SHIFT_7 + bool + help + Setting ARM L1 cache line size to 128 Bytes. + +config ARM_L1_CACHE_SHIFT + int + default 7 if ARM_L1_CACHE_SHIFT_7 + default 6 if ARM_L1_CACHE_SHIFT_6 + default 5 + +config ARM_DMA_MEM_BUFFERABLE + bool "Use non-cacheable memory for DMA" if (CPU_V6 || CPU_V6K || CPU_V7M) && !CPU_V7 + default y if CPU_V6 || CPU_V6K || CPU_V7 || CPU_V7M + help + Historically, the kernel has used strongly ordered mappings to + provide DMA coherent memory. With the advent of ARMv7, mapping + memory with differing types results in unpredictable behaviour, + so on these CPUs, this option is forced on. + + Multiple mappings with differing attributes is also unpredictable + on ARMv6 CPUs, but since they do not have aggressive speculative + prefetch, no harm appears to occur. + + However, drivers may be missing the necessary barriers for ARMv6, + and therefore turning this on may result in unpredictable driver + behaviour. Therefore, we offer this as an option. + + On some of the beefier ARMv7-M machines (with DMA and write + buffers) you likely want this enabled, while those that + didn't need it until now also won't need it in the future. + + You are recommended say 'Y' here and debug any affected drivers. + +config ARM_HEAVY_MB + bool + +config DEBUG_ALIGN_RODATA + bool "Make rodata strictly non-executable" + depends on STRICT_KERNEL_RWX + default y + help + If this is set, rodata will be made explicitly non-executable. This + provides protection on the rare chance that attackers might find and + use ROP gadgets that exist in the rodata section. This adds an + additional section-aligned split of rodata from kernel text so it + can be made explicitly non-executable. This padding may waste memory + space to gain the additional protection. diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile new file mode 100644 index 0000000000..71b858c9b1 --- /dev/null +++ b/arch/arm/mm/Makefile @@ -0,0 +1,102 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the linux arm-specific parts of the memory manager. +# + +obj-y := extable.o fault.o init.o iomap.o +obj-y += dma-mapping$(MMUEXT).o +obj-$(CONFIG_MMU) += fault-armv.o flush.o idmap.o ioremap.o \ + mmap.o pgd.o mmu.o pageattr.o +KASAN_SANITIZE_mmu.o := n + +ifneq ($(CONFIG_MMU),y) +obj-y += nommu.o +obj-$(CONFIG_ARM_MPU) += pmsa-v7.o pmsa-v8.o +endif + +obj-$(CONFIG_ARM_PTDUMP_CORE) += dump.o +obj-$(CONFIG_ARM_PTDUMP_DEBUGFS) += ptdump_debugfs.o +obj-$(CONFIG_MODULES) += proc-syms.o +KASAN_SANITIZE_physaddr.o := n +obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o + +obj-$(CONFIG_ALIGNMENT_TRAP) += alignment.o +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_ARM_PV_FIXUP) += pv-fixup-asm.o + +obj-$(CONFIG_CPU_ABRT_NOMMU) += abort-nommu.o +obj-$(CONFIG_CPU_ABRT_EV4) += abort-ev4.o +obj-$(CONFIG_CPU_ABRT_EV4T) += abort-ev4t.o +obj-$(CONFIG_CPU_ABRT_LV4T) += abort-lv4t.o +obj-$(CONFIG_CPU_ABRT_EV5T) += abort-ev5t.o +obj-$(CONFIG_CPU_ABRT_EV5TJ) += abort-ev5tj.o +obj-$(CONFIG_CPU_ABRT_EV6) += abort-ev6.o +obj-$(CONFIG_CPU_ABRT_EV7) += abort-ev7.o + +obj-$(CONFIG_CPU_PABRT_LEGACY) += pabort-legacy.o +obj-$(CONFIG_CPU_PABRT_V6) += pabort-v6.o +obj-$(CONFIG_CPU_PABRT_V7) += pabort-v7.o + +obj-$(CONFIG_CPU_CACHE_V4) += cache-v4.o +obj-$(CONFIG_CPU_CACHE_V4WT) += cache-v4wt.o +obj-$(CONFIG_CPU_CACHE_V4WB) += cache-v4wb.o +obj-$(CONFIG_CPU_CACHE_V6) += cache-v6.o +obj-$(CONFIG_CPU_CACHE_V7) += cache-v7.o +obj-$(CONFIG_CPU_CACHE_FA) += cache-fa.o +obj-$(CONFIG_CPU_CACHE_NOP) += cache-nop.o +obj-$(CONFIG_CPU_CACHE_V7M) += cache-v7m.o + +obj-$(CONFIG_CPU_COPY_V4WT) += copypage-v4wt.o +obj-$(CONFIG_CPU_COPY_V4WB) += copypage-v4wb.o +obj-$(CONFIG_CPU_COPY_FEROCEON) += copypage-feroceon.o +obj-$(CONFIG_CPU_COPY_V6) += copypage-v6.o context.o +obj-$(CONFIG_CPU_SA1100) += copypage-v4mc.o +obj-$(CONFIG_CPU_XSCALE) += copypage-xscale.o +obj-$(CONFIG_CPU_XSC3) += copypage-xsc3.o +obj-$(CONFIG_CPU_COPY_FA) += copypage-fa.o + +obj-$(CONFIG_CPU_TLB_V4WT) += tlb-v4.o +obj-$(CONFIG_CPU_TLB_V4WB) += tlb-v4wb.o +obj-$(CONFIG_CPU_TLB_V4WBI) += tlb-v4wbi.o +obj-$(CONFIG_CPU_TLB_FEROCEON) += tlb-v4wbi.o # reuse v4wbi TLB functions +obj-$(CONFIG_CPU_TLB_V6) += tlb-v6.o +obj-$(CONFIG_CPU_TLB_V7) += tlb-v7.o +obj-$(CONFIG_CPU_TLB_FA) += tlb-fa.o + +obj-$(CONFIG_CPU_ARM7TDMI) += proc-arm7tdmi.o +obj-$(CONFIG_CPU_ARM720T) += proc-arm720.o +obj-$(CONFIG_CPU_ARM740T) += proc-arm740.o +obj-$(CONFIG_CPU_ARM9TDMI) += proc-arm9tdmi.o +obj-$(CONFIG_CPU_ARM920T) += proc-arm920.o +obj-$(CONFIG_CPU_ARM922T) += proc-arm922.o +obj-$(CONFIG_CPU_ARM925T) += proc-arm925.o +obj-$(CONFIG_CPU_ARM926T) += proc-arm926.o +obj-$(CONFIG_CPU_ARM940T) += proc-arm940.o +obj-$(CONFIG_CPU_ARM946E) += proc-arm946.o +obj-$(CONFIG_CPU_FA526) += proc-fa526.o +obj-$(CONFIG_CPU_ARM1020) += proc-arm1020.o +obj-$(CONFIG_CPU_ARM1020E) += proc-arm1020e.o +obj-$(CONFIG_CPU_ARM1022) += proc-arm1022.o +obj-$(CONFIG_CPU_ARM1026) += proc-arm1026.o +obj-$(CONFIG_CPU_SA110) += proc-sa110.o +obj-$(CONFIG_CPU_SA1100) += proc-sa1100.o +obj-$(CONFIG_CPU_XSCALE) += proc-xscale.o +obj-$(CONFIG_CPU_XSC3) += proc-xsc3.o +obj-$(CONFIG_CPU_MOHAWK) += proc-mohawk.o +obj-$(CONFIG_CPU_FEROCEON) += proc-feroceon.o +obj-$(CONFIG_CPU_V6) += proc-v6.o +obj-$(CONFIG_CPU_V6K) += proc-v6.o +obj-$(CONFIG_CPU_V7) += proc-v7.o proc-v7-bugs.o +obj-$(CONFIG_CPU_V7M) += proc-v7m.o + +obj-$(CONFIG_OUTER_CACHE) += l2c-common.o +obj-$(CONFIG_CACHE_B15_RAC) += cache-b15-rac.o +obj-$(CONFIG_CACHE_FEROCEON_L2) += cache-feroceon-l2.o +obj-$(CONFIG_CACHE_L2X0) += cache-l2x0.o l2c-l2x0-resume.o +obj-$(CONFIG_CACHE_L2X0_PMU) += cache-l2x0-pmu.o +obj-$(CONFIG_CACHE_XSC3L2) += cache-xsc3l2.o +obj-$(CONFIG_CACHE_TAUROS2) += cache-tauros2.o +obj-$(CONFIG_CACHE_UNIPHIER) += cache-uniphier.o + +KASAN_SANITIZE_kasan_init.o := n +obj-$(CONFIG_KASAN) += kasan_init.o diff --git a/arch/arm/mm/abort-ev4.S b/arch/arm/mm/abort-ev4.S new file mode 100644 index 0000000000..a10bcb8959 --- /dev/null +++ b/arch/arm/mm/abort-ev4.S @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/linkage.h> +#include <asm/assembler.h> +/* + * Function: v4_early_abort + * + * Params : r2 = pt_regs + * : r4 = aborted context pc + * : r5 = aborted context psr + * + * Returns : r4 - r11, r13 preserved + * + * Purpose : obtain information about current aborted instruction. + * Note: we read user space. This means we might cause a data + * abort here if the I-TLB and D-TLB aren't seeing the same + * picture. Unfortunately, this does happen. We live with it. + */ + .align 5 +ENTRY(v4_early_abort) + mrc p15, 0, r1, c5, c0, 0 @ get FSR + mrc p15, 0, r0, c6, c0, 0 @ get FAR + ldr r3, [r4] @ read aborted ARM instruction + uaccess_disable ip @ disable userspace access + bic r1, r1, #1 << 11 | 1 << 10 @ clear bits 11 and 10 of FSR + tst r3, #1 << 20 @ L = 1 -> write? + orreq r1, r1, #1 << 11 @ yes. + b do_DataAbort diff --git a/arch/arm/mm/abort-ev4t.S b/arch/arm/mm/abort-ev4t.S new file mode 100644 index 0000000000..14743a2f69 --- /dev/null +++ b/arch/arm/mm/abort-ev4t.S @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include "abort-macro.S" +/* + * Function: v4t_early_abort + * + * Params : r2 = pt_regs + * : r4 = aborted context pc + * : r5 = aborted context psr + * + * Returns : r4 - r11, r13 preserved + * + * Purpose : obtain information about current aborted instruction. + * Note: we read user space. This means we might cause a data + * abort here if the I-TLB and D-TLB aren't seeing the same + * picture. Unfortunately, this does happen. We live with it. + */ + .align 5 +ENTRY(v4t_early_abort) + mrc p15, 0, r1, c5, c0, 0 @ get FSR + mrc p15, 0, r0, c6, c0, 0 @ get FAR + do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3 + ldreq r3, [r4] @ read aborted ARM instruction + bic r1, r1, #1 << 11 | 1 << 10 @ clear bits 11 and 10 of FSR + tst r3, #1 << 20 @ check write + orreq r1, r1, #1 << 11 + b do_DataAbort diff --git a/arch/arm/mm/abort-ev5t.S b/arch/arm/mm/abort-ev5t.S new file mode 100644 index 0000000000..98c5231188 --- /dev/null +++ b/arch/arm/mm/abort-ev5t.S @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include "abort-macro.S" +/* + * Function: v5t_early_abort + * + * Params : r2 = pt_regs + * : r4 = aborted context pc + * : r5 = aborted context psr + * + * Returns : r4 - r11, r13 preserved + * + * Purpose : obtain information about current aborted instruction. + * Note: we read user space. This means we might cause a data + * abort here if the I-TLB and D-TLB aren't seeing the same + * picture. Unfortunately, this does happen. We live with it. + */ + .align 5 +ENTRY(v5t_early_abort) + mrc p15, 0, r1, c5, c0, 0 @ get FSR + mrc p15, 0, r0, c6, c0, 0 @ get FAR + do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3 + ldreq r3, [r4] @ read aborted ARM instruction + uaccess_disable ip @ disable user access + bic r1, r1, #1 << 11 @ clear bits 11 of FSR + teq_ldrd tmp=ip, insn=r3 @ insn was LDRD? + beq do_DataAbort @ yes + tst r3, #1 << 20 @ check write + orreq r1, r1, #1 << 11 + b do_DataAbort diff --git a/arch/arm/mm/abort-ev5tj.S b/arch/arm/mm/abort-ev5tj.S new file mode 100644 index 0000000000..fec72f4fba --- /dev/null +++ b/arch/arm/mm/abort-ev5tj.S @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include "abort-macro.S" +/* + * Function: v5tj_early_abort + * + * Params : r2 = pt_regs + * : r4 = aborted context pc + * : r5 = aborted context psr + * + * Returns : r4 - r11, r13 preserved + * + * Purpose : obtain information about current aborted instruction. + * Note: we read user space. This means we might cause a data + * abort here if the I-TLB and D-TLB aren't seeing the same + * picture. Unfortunately, this does happen. We live with it. + */ + .align 5 +ENTRY(v5tj_early_abort) + mrc p15, 0, r1, c5, c0, 0 @ get FSR + mrc p15, 0, r0, c6, c0, 0 @ get FAR + bic r1, r1, #1 << 11 | 1 << 10 @ clear bits 11 and 10 of FSR + tst r5, #PSR_J_BIT @ Java? + bne do_DataAbort + do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3 + ldreq r3, [r4] @ read aborted ARM instruction + uaccess_disable ip @ disable userspace access + teq_ldrd tmp=ip, insn=r3 @ insn was LDRD? + beq do_DataAbort @ yes + tst r3, #1 << 20 @ L = 0 -> write + orreq r1, r1, #1 << 11 @ yes. + b do_DataAbort diff --git a/arch/arm/mm/abort-ev6.S b/arch/arm/mm/abort-ev6.S new file mode 100644 index 0000000000..836dc12992 --- /dev/null +++ b/arch/arm/mm/abort-ev6.S @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include "abort-macro.S" +/* + * Function: v6_early_abort + * + * Params : r2 = pt_regs + * : r4 = aborted context pc + * : r5 = aborted context psr + * + * Returns : r4 - r11, r13 preserved + * + * Purpose : obtain information about current aborted instruction. + * Note: we read user space. This means we might cause a data + * abort here if the I-TLB and D-TLB aren't seeing the same + * picture. Unfortunately, this does happen. We live with it. + */ + .arch armv6k + .align 5 +ENTRY(v6_early_abort) + mrc p15, 0, r1, c5, c0, 0 @ get FSR + mrc p15, 0, r0, c6, c0, 0 @ get FAR +/* + * Faulty SWP instruction on 1136 doesn't set bit 11 in DFSR. + */ +#ifdef CONFIG_ARM_ERRATA_326103 + ldr ip, =0x4107b36 + mrc p15, 0, r3, c0, c0, 0 @ get processor id + teq ip, r3, lsr #4 @ r0 ARM1136? + bne 1f + tst r5, #PSR_J_BIT @ Java? + tsteq r5, #PSR_T_BIT @ Thumb? + bne 1f + bic r1, r1, #1 << 11 @ clear bit 11 of FSR + ldr r3, [r4] @ read aborted ARM instruction + ARM_BE8(rev r3, r3) + + teq_ldrd tmp=ip, insn=r3 @ insn was LDRD? + beq 1f @ yes + tst r3, #1 << 20 @ L = 0 -> write + orreq r1, r1, #1 << 11 @ yes. +#endif +1: uaccess_disable ip @ disable userspace access + b do_DataAbort diff --git a/arch/arm/mm/abort-ev7.S b/arch/arm/mm/abort-ev7.S new file mode 100644 index 0000000000..53fb41c247 --- /dev/null +++ b/arch/arm/mm/abort-ev7.S @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/linkage.h> +#include <asm/assembler.h> +/* + * Function: v7_early_abort + * + * Params : r2 = pt_regs + * : r4 = aborted context pc + * : r5 = aborted context psr + * + * Returns : r4 - r11, r13 preserved + * + * Purpose : obtain information about current aborted instruction. + */ + .arch armv7-a + .align 5 +ENTRY(v7_early_abort) + mrc p15, 0, r1, c5, c0, 0 @ get FSR + mrc p15, 0, r0, c6, c0, 0 @ get FAR + uaccess_disable ip @ disable userspace access + b do_DataAbort +ENDPROC(v7_early_abort) diff --git a/arch/arm/mm/abort-lv4t.S b/arch/arm/mm/abort-lv4t.S new file mode 100644 index 0000000000..fbd60a120f --- /dev/null +++ b/arch/arm/mm/abort-lv4t.S @@ -0,0 +1,237 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/linkage.h> +#include <asm/assembler.h> +/* + * Function: v4t_late_abort + * + * Params : r2 = pt_regs + * : r4 = aborted context pc + * : r5 = aborted context psr + * + * Returns : r4-r5, r9-r11, r13 preserved + * + * Purpose : obtain information about current aborted instruction. + * Note: we read user space. This means we might cause a data + * abort here if the I-TLB and D-TLB aren't seeing the same + * picture. Unfortunately, this does happen. We live with it. + */ +ENTRY(v4t_late_abort) + tst r5, #PSR_T_BIT @ check for thumb mode +#ifdef CONFIG_CPU_CP15_MMU + mrc p15, 0, r1, c5, c0, 0 @ get FSR + mrc p15, 0, r0, c6, c0, 0 @ get FAR + bic r1, r1, #1 << 11 | 1 << 10 @ clear bits 11 and 10 of FSR +#else + mov r0, #0 @ clear r0, r1 (no FSR/FAR) + mov r1, #0 +#endif + bne .data_thumb_abort + ldr r8, [r4] @ read arm instruction + uaccess_disable ip @ disable userspace access + tst r8, #1 << 20 @ L = 1 -> write? + orreq r1, r1, #1 << 11 @ yes. + and r7, r8, #15 << 24 + add pc, pc, r7, lsr #22 @ Now branch to the relevant processing routine + nop + +/* 0 */ b .data_arm_lateldrhpost @ ldrh rd, [rn], #m/rm +/* 1 */ b .data_arm_lateldrhpre @ ldrh rd, [rn, #m/rm] +/* 2 */ b .data_unknown +/* 3 */ b .data_unknown +/* 4 */ b .data_arm_lateldrpostconst @ ldr rd, [rn], #m +/* 5 */ b .data_arm_lateldrpreconst @ ldr rd, [rn, #m] +/* 6 */ b .data_arm_lateldrpostreg @ ldr rd, [rn], rm +/* 7 */ b .data_arm_lateldrprereg @ ldr rd, [rn, rm] +/* 8 */ b .data_arm_ldmstm @ ldm*a rn, <rlist> +/* 9 */ b .data_arm_ldmstm @ ldm*b rn, <rlist> +/* a */ b .data_unknown +/* b */ b .data_unknown +/* c */ b do_DataAbort @ ldc rd, [rn], #m @ Same as ldr rd, [rn], #m +/* d */ b do_DataAbort @ ldc rd, [rn, #m] +/* e */ b .data_unknown +/* f */ b .data_unknown + +.data_unknown_r9: + ldr r9, [sp], #4 +.data_unknown: @ Part of jumptable + mov r0, r4 + mov r1, r8 + b baddataabort + +.data_arm_ldmstm: + tst r8, #1 << 21 @ check writeback bit + beq do_DataAbort @ no writeback -> no fixup + str r9, [sp, #-4]! + mov r7, #0x11 + orr r7, r7, #0x1100 + and r6, r8, r7 + and r9, r8, r7, lsl #1 + add r6, r6, r9, lsr #1 + and r9, r8, r7, lsl #2 + add r6, r6, r9, lsr #2 + and r9, r8, r7, lsl #3 + add r6, r6, r9, lsr #3 + add r6, r6, r6, lsr #8 + add r6, r6, r6, lsr #4 + and r6, r6, #15 @ r6 = no. of registers to transfer. + and r9, r8, #15 << 16 @ Extract 'n' from instruction + ldr r7, [r2, r9, lsr #14] @ Get register 'Rn' + tst r8, #1 << 23 @ Check U bit + subne r7, r7, r6, lsl #2 @ Undo increment + addeq r7, r7, r6, lsl #2 @ Undo decrement + str r7, [r2, r9, lsr #14] @ Put register 'Rn' + ldr r9, [sp], #4 + b do_DataAbort + +.data_arm_lateldrhpre: + tst r8, #1 << 21 @ Check writeback bit + beq do_DataAbort @ No writeback -> no fixup +.data_arm_lateldrhpost: + str r9, [sp, #-4]! + and r9, r8, #0x00f @ get Rm / low nibble of immediate value + tst r8, #1 << 22 @ if (immediate offset) + andne r6, r8, #0xf00 @ { immediate high nibble + orrne r6, r9, r6, lsr #4 @ combine nibbles } else + ldreq r6, [r2, r9, lsl #2] @ { load Rm value } +.data_arm_apply_r6_and_rn: + and r9, r8, #15 << 16 @ Extract 'n' from instruction + ldr r7, [r2, r9, lsr #14] @ Get register 'Rn' + tst r8, #1 << 23 @ Check U bit + subne r7, r7, r6 @ Undo incrmenet + addeq r7, r7, r6 @ Undo decrement + str r7, [r2, r9, lsr #14] @ Put register 'Rn' + ldr r9, [sp], #4 + b do_DataAbort + +.data_arm_lateldrpreconst: + tst r8, #1 << 21 @ check writeback bit + beq do_DataAbort @ no writeback -> no fixup +.data_arm_lateldrpostconst: + movs r6, r8, lsl #20 @ Get offset + beq do_DataAbort @ zero -> no fixup + str r9, [sp, #-4]! + and r9, r8, #15 << 16 @ Extract 'n' from instruction + ldr r7, [r2, r9, lsr #14] @ Get register 'Rn' + tst r8, #1 << 23 @ Check U bit + subne r7, r7, r6, lsr #20 @ Undo increment + addeq r7, r7, r6, lsr #20 @ Undo decrement + str r7, [r2, r9, lsr #14] @ Put register 'Rn' + ldr r9, [sp], #4 + b do_DataAbort + +.data_arm_lateldrprereg: + tst r8, #1 << 21 @ check writeback bit + beq do_DataAbort @ no writeback -> no fixup +.data_arm_lateldrpostreg: + and r7, r8, #15 @ Extract 'm' from instruction + ldr r6, [r2, r7, lsl #2] @ Get register 'Rm' + str r9, [sp, #-4]! + mov r9, r8, lsr #7 @ get shift count + ands r9, r9, #31 + and r7, r8, #0x70 @ get shift type + orreq r7, r7, #8 @ shift count = 0 + add pc, pc, r7 + nop + + mov r6, r6, lsl r9 @ 0: LSL #!0 + b .data_arm_apply_r6_and_rn + b .data_arm_apply_r6_and_rn @ 1: LSL #0 + nop + b .data_unknown_r9 @ 2: MUL? + nop + b .data_unknown_r9 @ 3: MUL? + nop + mov r6, r6, lsr r9 @ 4: LSR #!0 + b .data_arm_apply_r6_and_rn + mov r6, r6, lsr #32 @ 5: LSR #32 + b .data_arm_apply_r6_and_rn + b .data_unknown_r9 @ 6: MUL? + nop + b .data_unknown_r9 @ 7: MUL? + nop + mov r6, r6, asr r9 @ 8: ASR #!0 + b .data_arm_apply_r6_and_rn + mov r6, r6, asr #32 @ 9: ASR #32 + b .data_arm_apply_r6_and_rn + b .data_unknown_r9 @ A: MUL? + nop + b .data_unknown_r9 @ B: MUL? + nop + mov r6, r6, ror r9 @ C: ROR #!0 + b .data_arm_apply_r6_and_rn + mov r6, r6, rrx @ D: RRX + b .data_arm_apply_r6_and_rn + b .data_unknown_r9 @ E: MUL? + nop + b .data_unknown_r9 @ F: MUL? + +.data_thumb_abort: + ldrh r8, [r4] @ read instruction + uaccess_disable ip @ disable userspace access + tst r8, #1 << 11 @ L = 1 -> write? + orreq r1, r1, #1 << 8 @ yes + and r7, r8, #15 << 12 + add pc, pc, r7, lsr #10 @ lookup in table + nop + +/* 0 */ b .data_unknown +/* 1 */ b .data_unknown +/* 2 */ b .data_unknown +/* 3 */ b .data_unknown +/* 4 */ b .data_unknown +/* 5 */ b .data_thumb_reg +/* 6 */ b do_DataAbort +/* 7 */ b do_DataAbort +/* 8 */ b do_DataAbort +/* 9 */ b do_DataAbort +/* A */ b .data_unknown +/* B */ b .data_thumb_pushpop +/* C */ b .data_thumb_ldmstm +/* D */ b .data_unknown +/* E */ b .data_unknown +/* F */ b .data_unknown + +.data_thumb_reg: + tst r8, #1 << 9 + beq do_DataAbort + tst r8, #1 << 10 @ If 'S' (signed) bit is set + movne r1, #0 @ it must be a load instr + b do_DataAbort + +.data_thumb_pushpop: + tst r8, #1 << 10 + beq .data_unknown + str r9, [sp, #-4]! + and r6, r8, #0x55 @ hweight8(r8) + R bit + and r9, r8, #0xaa + add r6, r6, r9, lsr #1 + and r9, r6, #0xcc + and r6, r6, #0x33 + add r6, r6, r9, lsr #2 + movs r7, r8, lsr #9 @ C = r8 bit 8 (R bit) + adc r6, r6, r6, lsr #4 @ high + low nibble + R bit + and r6, r6, #15 @ number of regs to transfer + ldr r7, [r2, #13 << 2] + tst r8, #1 << 11 + addeq r7, r7, r6, lsl #2 @ increment SP if PUSH + subne r7, r7, r6, lsl #2 @ decrement SP if POP + str r7, [r2, #13 << 2] + ldr r9, [sp], #4 + b do_DataAbort + +.data_thumb_ldmstm: + str r9, [sp, #-4]! + and r6, r8, #0x55 @ hweight8(r8) + and r9, r8, #0xaa + add r6, r6, r9, lsr #1 + and r9, r6, #0xcc + and r6, r6, #0x33 + add r6, r6, r9, lsr #2 + add r6, r6, r6, lsr #4 + and r9, r8, #7 << 8 + ldr r7, [r2, r9, lsr #6] + and r6, r6, #15 @ number of regs to transfer + sub r7, r7, r6, lsl #2 @ always decrement + str r7, [r2, r9, lsr #6] + ldr r9, [sp], #4 + b do_DataAbort diff --git a/arch/arm/mm/abort-macro.S b/arch/arm/mm/abort-macro.S new file mode 100644 index 0000000000..bacf53fd0b --- /dev/null +++ b/arch/arm/mm/abort-macro.S @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * The ARM LDRD and Thumb LDRSB instructions use bit 20/11 (ARM/Thumb) + * differently than every other instruction, so it is set to 0 (write) + * even though the instructions are read instructions. This means that + * during an abort the instructions will be treated as a write and the + * handler will raise a signal from unwriteable locations if they + * fault. We have to specifically check for these instructions + * from the abort handlers to treat them properly. + * + */ + + .macro do_thumb_abort, fsr, pc, psr, tmp + tst \psr, #PSR_T_BIT + beq not_thumb + ldrh \tmp, [\pc] @ Read aborted Thumb instruction + uaccess_disable ip @ disable userspace access + and \tmp, \tmp, # 0xfe00 @ Mask opcode field + cmp \tmp, # 0x5600 @ Is it ldrsb? + orreq \tmp, \tmp, #1 << 11 @ Set L-bit if yes + tst \tmp, #1 << 11 @ L = 0 -> write + orreq \fsr, \fsr, #1 << 11 @ yes. + b do_DataAbort +not_thumb: + .endm + +/* + * We check for the following instruction encoding for LDRD. + * + * [27:25] == 000 + * [7:4] == 1101 + * [20] == 0 + */ + .macro teq_ldrd, tmp, insn + mov \tmp, #0x0e100000 + orr \tmp, #0x000000f0 + and \tmp, \insn, \tmp + teq \tmp, #0x000000d0 + .endm diff --git a/arch/arm/mm/abort-nommu.S b/arch/arm/mm/abort-nommu.S new file mode 100644 index 0000000000..6e2366a263 --- /dev/null +++ b/arch/arm/mm/abort-nommu.S @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/linkage.h> +#include <asm/assembler.h> +/* + * Function: nommu_early_abort + * + * Params : r2 = pt_regs + * : r4 = aborted context pc + * : r5 = aborted context psr + * + * Returns : r4 - r11, r13 preserved + * + * Note: There is no FSR/FAR on !CPU_CP15_MMU cores. + * Just fill zero into the registers. + */ + .align 5 +ENTRY(nommu_early_abort) + mov r0, #0 @ clear r0, r1 (no FSR/FAR) + mov r1, #0 + b do_DataAbort +ENDPROC(nommu_early_abort) diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c new file mode 100644 index 0000000000..f8dd0b3cc8 --- /dev/null +++ b/arch/arm/mm/alignment.c @@ -0,0 +1,1052 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * linux/arch/arm/mm/alignment.c + * + * Copyright (C) 1995 Linus Torvalds + * Modifications for ARM processor (c) 1995-2001 Russell King + * Thumb alignment fault fixups (c) 2004 MontaVista Software, Inc. + * - Adapted from gdb/sim/arm/thumbemu.c -- Thumb instruction emulation. + * Copyright (C) 1996, Cygnus Software Technologies Ltd. + */ +#include <linux/moduleparam.h> +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <linux/sched/debug.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/init.h> +#include <linux/sched/signal.h> +#include <linux/uaccess.h> + +#include <asm/cp15.h> +#include <asm/system_info.h> +#include <asm/unaligned.h> +#include <asm/opcodes.h> + +#include "fault.h" +#include "mm.h" + +/* + * 32-bit misaligned trap handler (c) 1998 San Mehat (CCC) -July 1998 + * /proc/sys/debug/alignment, modified and integrated into + * Linux 2.1 by Russell King + * + * Speed optimisations and better fault handling by Russell King. + * + * *** NOTE *** + * This code is not portable to processors with late data abort handling. + */ +#define CODING_BITS(i) (i & 0x0e000000) +#define COND_BITS(i) (i & 0xf0000000) + +#define LDST_I_BIT(i) (i & (1 << 26)) /* Immediate constant */ +#define LDST_P_BIT(i) (i & (1 << 24)) /* Preindex */ +#define LDST_U_BIT(i) (i & (1 << 23)) /* Add offset */ +#define LDST_W_BIT(i) (i & (1 << 21)) /* Writeback */ +#define LDST_L_BIT(i) (i & (1 << 20)) /* Load */ + +#define LDST_P_EQ_U(i) ((((i) ^ ((i) >> 1)) & (1 << 23)) == 0) + +#define LDSTHD_I_BIT(i) (i & (1 << 22)) /* double/half-word immed */ +#define LDM_S_BIT(i) (i & (1 << 22)) /* write CPSR from SPSR */ + +#define RN_BITS(i) ((i >> 16) & 15) /* Rn */ +#define RD_BITS(i) ((i >> 12) & 15) /* Rd */ +#define RM_BITS(i) (i & 15) /* Rm */ + +#define REGMASK_BITS(i) (i & 0xffff) +#define OFFSET_BITS(i) (i & 0x0fff) + +#define IS_SHIFT(i) (i & 0x0ff0) +#define SHIFT_BITS(i) ((i >> 7) & 0x1f) +#define SHIFT_TYPE(i) (i & 0x60) +#define SHIFT_LSL 0x00 +#define SHIFT_LSR 0x20 +#define SHIFT_ASR 0x40 +#define SHIFT_RORRRX 0x60 + +#define BAD_INSTR 0xdeadc0de + +/* Thumb-2 32 bit format per ARMv7 DDI0406A A6.3, either f800h,e800h,f800h */ +#define IS_T32(hi16) \ + (((hi16) & 0xe000) == 0xe000 && ((hi16) & 0x1800)) + +static unsigned long ai_user; +static unsigned long ai_sys; +static void *ai_sys_last_pc; +static unsigned long ai_skipped; +static unsigned long ai_half; +static unsigned long ai_word; +static unsigned long ai_dword; +static unsigned long ai_multi; +static int ai_usermode; +static unsigned long cr_no_alignment; + +core_param(alignment, ai_usermode, int, 0600); + +#define UM_WARN (1 << 0) +#define UM_FIXUP (1 << 1) +#define UM_SIGNAL (1 << 2) + +/* Return true if and only if the ARMv6 unaligned access model is in use. */ +static bool cpu_is_v6_unaligned(void) +{ + return cpu_architecture() >= CPU_ARCH_ARMv6 && get_cr() & CR_U; +} + +static int safe_usermode(int new_usermode, bool warn) +{ + /* + * ARMv6 and later CPUs can perform unaligned accesses for + * most single load and store instructions up to word size. + * LDM, STM, LDRD and STRD still need to be handled. + * + * Ignoring the alignment fault is not an option on these + * CPUs since we spin re-faulting the instruction without + * making any progress. + */ + if (cpu_is_v6_unaligned() && !(new_usermode & (UM_FIXUP | UM_SIGNAL))) { + new_usermode |= UM_FIXUP; + + if (warn) + pr_warn("alignment: ignoring faults is unsafe on this CPU. Defaulting to fixup mode.\n"); + } + + return new_usermode; +} + +#ifdef CONFIG_PROC_FS +static const char *usermode_action[] = { + "ignored", + "warn", + "fixup", + "fixup+warn", + "signal", + "signal+warn" +}; + +static int alignment_proc_show(struct seq_file *m, void *v) +{ + seq_printf(m, "User:\t\t%lu\n", ai_user); + seq_printf(m, "System:\t\t%lu (%pS)\n", ai_sys, ai_sys_last_pc); + seq_printf(m, "Skipped:\t%lu\n", ai_skipped); + seq_printf(m, "Half:\t\t%lu\n", ai_half); + seq_printf(m, "Word:\t\t%lu\n", ai_word); + if (cpu_architecture() >= CPU_ARCH_ARMv5TE) + seq_printf(m, "DWord:\t\t%lu\n", ai_dword); + seq_printf(m, "Multi:\t\t%lu\n", ai_multi); + seq_printf(m, "User faults:\t%i (%s)\n", ai_usermode, + usermode_action[ai_usermode]); + + return 0; +} + +static int alignment_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, alignment_proc_show, NULL); +} + +static ssize_t alignment_proc_write(struct file *file, const char __user *buffer, + size_t count, loff_t *pos) +{ + char mode; + + if (count > 0) { + if (get_user(mode, buffer)) + return -EFAULT; + if (mode >= '0' && mode <= '5') + ai_usermode = safe_usermode(mode - '0', true); + } + return count; +} + +static const struct proc_ops alignment_proc_ops = { + .proc_open = alignment_proc_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_release = single_release, + .proc_write = alignment_proc_write, +}; +#endif /* CONFIG_PROC_FS */ + +union offset_union { + unsigned long un; + signed long sn; +}; + +#define TYPE_ERROR 0 +#define TYPE_FAULT 1 +#define TYPE_LDST 2 +#define TYPE_DONE 3 + +#ifdef __ARMEB__ +#define BE 1 +#define FIRST_BYTE_16 "mov %1, %1, ror #8\n" +#define FIRST_BYTE_32 "mov %1, %1, ror #24\n" +#define NEXT_BYTE "ror #24" +#else +#define BE 0 +#define FIRST_BYTE_16 +#define FIRST_BYTE_32 +#define NEXT_BYTE "lsr #8" +#endif + +#define __get8_unaligned_check(ins,val,addr,err) \ + __asm__( \ + ARM( "1: "ins" %1, [%2], #1\n" ) \ + THUMB( "1: "ins" %1, [%2]\n" ) \ + THUMB( " add %2, %2, #1\n" ) \ + "2:\n" \ + " .pushsection .text.fixup,\"ax\"\n" \ + " .align 2\n" \ + "3: mov %0, #1\n" \ + " b 2b\n" \ + " .popsection\n" \ + " .pushsection __ex_table,\"a\"\n" \ + " .align 3\n" \ + " .long 1b, 3b\n" \ + " .popsection\n" \ + : "=r" (err), "=&r" (val), "=r" (addr) \ + : "0" (err), "2" (addr)) + +#define __get16_unaligned_check(ins,val,addr) \ + do { \ + unsigned int err = 0, v, a = addr; \ + __get8_unaligned_check(ins,v,a,err); \ + val = v << ((BE) ? 8 : 0); \ + __get8_unaligned_check(ins,v,a,err); \ + val |= v << ((BE) ? 0 : 8); \ + if (err) \ + goto fault; \ + } while (0) + +#define get16_unaligned_check(val,addr) \ + __get16_unaligned_check("ldrb",val,addr) + +#define get16t_unaligned_check(val,addr) \ + __get16_unaligned_check("ldrbt",val,addr) + +#define __get32_unaligned_check(ins,val,addr) \ + do { \ + unsigned int err = 0, v, a = addr; \ + __get8_unaligned_check(ins,v,a,err); \ + val = v << ((BE) ? 24 : 0); \ + __get8_unaligned_check(ins,v,a,err); \ + val |= v << ((BE) ? 16 : 8); \ + __get8_unaligned_check(ins,v,a,err); \ + val |= v << ((BE) ? 8 : 16); \ + __get8_unaligned_check(ins,v,a,err); \ + val |= v << ((BE) ? 0 : 24); \ + if (err) \ + goto fault; \ + } while (0) + +#define get32_unaligned_check(val,addr) \ + __get32_unaligned_check("ldrb",val,addr) + +#define get32t_unaligned_check(val,addr) \ + __get32_unaligned_check("ldrbt",val,addr) + +#define __put16_unaligned_check(ins,val,addr) \ + do { \ + unsigned int err = 0, v = val, a = addr; \ + __asm__( FIRST_BYTE_16 \ + ARM( "1: "ins" %1, [%2], #1\n" ) \ + THUMB( "1: "ins" %1, [%2]\n" ) \ + THUMB( " add %2, %2, #1\n" ) \ + " mov %1, %1, "NEXT_BYTE"\n" \ + "2: "ins" %1, [%2]\n" \ + "3:\n" \ + " .pushsection .text.fixup,\"ax\"\n" \ + " .align 2\n" \ + "4: mov %0, #1\n" \ + " b 3b\n" \ + " .popsection\n" \ + " .pushsection __ex_table,\"a\"\n" \ + " .align 3\n" \ + " .long 1b, 4b\n" \ + " .long 2b, 4b\n" \ + " .popsection\n" \ + : "=r" (err), "=&r" (v), "=&r" (a) \ + : "0" (err), "1" (v), "2" (a)); \ + if (err) \ + goto fault; \ + } while (0) + +#define put16_unaligned_check(val,addr) \ + __put16_unaligned_check("strb",val,addr) + +#define put16t_unaligned_check(val,addr) \ + __put16_unaligned_check("strbt",val,addr) + +#define __put32_unaligned_check(ins,val,addr) \ + do { \ + unsigned int err = 0, v = val, a = addr; \ + __asm__( FIRST_BYTE_32 \ + ARM( "1: "ins" %1, [%2], #1\n" ) \ + THUMB( "1: "ins" %1, [%2]\n" ) \ + THUMB( " add %2, %2, #1\n" ) \ + " mov %1, %1, "NEXT_BYTE"\n" \ + ARM( "2: "ins" %1, [%2], #1\n" ) \ + THUMB( "2: "ins" %1, [%2]\n" ) \ + THUMB( " add %2, %2, #1\n" ) \ + " mov %1, %1, "NEXT_BYTE"\n" \ + ARM( "3: "ins" %1, [%2], #1\n" ) \ + THUMB( "3: "ins" %1, [%2]\n" ) \ + THUMB( " add %2, %2, #1\n" ) \ + " mov %1, %1, "NEXT_BYTE"\n" \ + "4: "ins" %1, [%2]\n" \ + "5:\n" \ + " .pushsection .text.fixup,\"ax\"\n" \ + " .align 2\n" \ + "6: mov %0, #1\n" \ + " b 5b\n" \ + " .popsection\n" \ + " .pushsection __ex_table,\"a\"\n" \ + " .align 3\n" \ + " .long 1b, 6b\n" \ + " .long 2b, 6b\n" \ + " .long 3b, 6b\n" \ + " .long 4b, 6b\n" \ + " .popsection\n" \ + : "=r" (err), "=&r" (v), "=&r" (a) \ + : "0" (err), "1" (v), "2" (a)); \ + if (err) \ + goto fault; \ + } while (0) + +#define put32_unaligned_check(val,addr) \ + __put32_unaligned_check("strb", val, addr) + +#define put32t_unaligned_check(val,addr) \ + __put32_unaligned_check("strbt", val, addr) + +static void +do_alignment_finish_ldst(unsigned long addr, u32 instr, struct pt_regs *regs, union offset_union offset) +{ + if (!LDST_U_BIT(instr)) + offset.un = -offset.un; + + if (!LDST_P_BIT(instr)) + addr += offset.un; + + if (!LDST_P_BIT(instr) || LDST_W_BIT(instr)) + regs->uregs[RN_BITS(instr)] = addr; +} + +static int +do_alignment_ldrhstrh(unsigned long addr, u32 instr, struct pt_regs *regs) +{ + unsigned int rd = RD_BITS(instr); + + ai_half += 1; + + if (user_mode(regs)) + goto user; + + if (LDST_L_BIT(instr)) { + unsigned long val; + get16_unaligned_check(val, addr); + + /* signed half-word? */ + if (instr & 0x40) + val = (signed long)((signed short) val); + + regs->uregs[rd] = val; + } else + put16_unaligned_check(regs->uregs[rd], addr); + + return TYPE_LDST; + + user: + if (LDST_L_BIT(instr)) { + unsigned long val; + unsigned int __ua_flags = uaccess_save_and_enable(); + + get16t_unaligned_check(val, addr); + uaccess_restore(__ua_flags); + + /* signed half-word? */ + if (instr & 0x40) + val = (signed long)((signed short) val); + + regs->uregs[rd] = val; + } else { + unsigned int __ua_flags = uaccess_save_and_enable(); + put16t_unaligned_check(regs->uregs[rd], addr); + uaccess_restore(__ua_flags); + } + + return TYPE_LDST; + + fault: + return TYPE_FAULT; +} + +static int +do_alignment_ldrdstrd(unsigned long addr, u32 instr, struct pt_regs *regs) +{ + unsigned int rd = RD_BITS(instr); + unsigned int rd2; + int load; + + if ((instr & 0xfe000000) == 0xe8000000) { + /* ARMv7 Thumb-2 32-bit LDRD/STRD */ + rd2 = (instr >> 8) & 0xf; + load = !!(LDST_L_BIT(instr)); + } else if (((rd & 1) == 1) || (rd == 14)) + goto bad; + else { + load = ((instr & 0xf0) == 0xd0); + rd2 = rd + 1; + } + + ai_dword += 1; + + if (user_mode(regs)) + goto user; + + if (load) { + unsigned long val; + get32_unaligned_check(val, addr); + regs->uregs[rd] = val; + get32_unaligned_check(val, addr + 4); + regs->uregs[rd2] = val; + } else { + put32_unaligned_check(regs->uregs[rd], addr); + put32_unaligned_check(regs->uregs[rd2], addr + 4); + } + + return TYPE_LDST; + + user: + if (load) { + unsigned long val, val2; + unsigned int __ua_flags = uaccess_save_and_enable(); + + get32t_unaligned_check(val, addr); + get32t_unaligned_check(val2, addr + 4); + + uaccess_restore(__ua_flags); + + regs->uregs[rd] = val; + regs->uregs[rd2] = val2; + } else { + unsigned int __ua_flags = uaccess_save_and_enable(); + put32t_unaligned_check(regs->uregs[rd], addr); + put32t_unaligned_check(regs->uregs[rd2], addr + 4); + uaccess_restore(__ua_flags); + } + + return TYPE_LDST; + bad: + return TYPE_ERROR; + fault: + return TYPE_FAULT; +} + +static int +do_alignment_ldrstr(unsigned long addr, u32 instr, struct pt_regs *regs) +{ + unsigned int rd = RD_BITS(instr); + + ai_word += 1; + + if ((!LDST_P_BIT(instr) && LDST_W_BIT(instr)) || user_mode(regs)) + goto trans; + + if (LDST_L_BIT(instr)) { + unsigned int val; + get32_unaligned_check(val, addr); + regs->uregs[rd] = val; + } else + put32_unaligned_check(regs->uregs[rd], addr); + return TYPE_LDST; + + trans: + if (LDST_L_BIT(instr)) { + unsigned int val; + unsigned int __ua_flags = uaccess_save_and_enable(); + get32t_unaligned_check(val, addr); + uaccess_restore(__ua_flags); + regs->uregs[rd] = val; + } else { + unsigned int __ua_flags = uaccess_save_and_enable(); + put32t_unaligned_check(regs->uregs[rd], addr); + uaccess_restore(__ua_flags); + } + return TYPE_LDST; + + fault: + return TYPE_FAULT; +} + +/* + * LDM/STM alignment handler. + * + * There are 4 variants of this instruction: + * + * B = rn pointer before instruction, A = rn pointer after instruction + * ------ increasing address -----> + * | | r0 | r1 | ... | rx | | + * PU = 01 B A + * PU = 11 B A + * PU = 00 A B + * PU = 10 A B + */ +static int +do_alignment_ldmstm(unsigned long addr, u32 instr, struct pt_regs *regs) +{ + unsigned int rd, rn, correction, nr_regs, regbits; + unsigned long eaddr, newaddr; + + if (LDM_S_BIT(instr)) + goto bad; + + correction = 4; /* processor implementation defined */ + regs->ARM_pc += correction; + + ai_multi += 1; + + /* count the number of registers in the mask to be transferred */ + nr_regs = hweight16(REGMASK_BITS(instr)) * 4; + + rn = RN_BITS(instr); + newaddr = eaddr = regs->uregs[rn]; + + if (!LDST_U_BIT(instr)) + nr_regs = -nr_regs; + newaddr += nr_regs; + if (!LDST_U_BIT(instr)) + eaddr = newaddr; + + if (LDST_P_EQ_U(instr)) /* U = P */ + eaddr += 4; + + /* + * For alignment faults on the ARM922T/ARM920T the MMU makes + * the FSR (and hence addr) equal to the updated base address + * of the multiple access rather than the restored value. + * Switch this message off if we've got a ARM92[02], otherwise + * [ls]dm alignment faults are noisy! + */ +#if !(defined CONFIG_CPU_ARM922T) && !(defined CONFIG_CPU_ARM920T) + /* + * This is a "hint" - we already have eaddr worked out by the + * processor for us. + */ + if (addr != eaddr) { + pr_err("LDMSTM: PC = %08lx, instr = %08x, " + "addr = %08lx, eaddr = %08lx\n", + instruction_pointer(regs), instr, addr, eaddr); + show_regs(regs); + } +#endif + + if (user_mode(regs)) { + unsigned int __ua_flags = uaccess_save_and_enable(); + for (regbits = REGMASK_BITS(instr), rd = 0; regbits; + regbits >>= 1, rd += 1) + if (regbits & 1) { + if (LDST_L_BIT(instr)) { + unsigned int val; + get32t_unaligned_check(val, eaddr); + regs->uregs[rd] = val; + } else + put32t_unaligned_check(regs->uregs[rd], eaddr); + eaddr += 4; + } + uaccess_restore(__ua_flags); + } else { + for (regbits = REGMASK_BITS(instr), rd = 0; regbits; + regbits >>= 1, rd += 1) + if (regbits & 1) { + if (LDST_L_BIT(instr)) { + unsigned int val; + get32_unaligned_check(val, eaddr); + regs->uregs[rd] = val; + } else + put32_unaligned_check(regs->uregs[rd], eaddr); + eaddr += 4; + } + } + + if (LDST_W_BIT(instr)) + regs->uregs[rn] = newaddr; + if (!LDST_L_BIT(instr) || !(REGMASK_BITS(instr) & (1 << 15))) + regs->ARM_pc -= correction; + return TYPE_DONE; + +fault: + regs->ARM_pc -= correction; + return TYPE_FAULT; + +bad: + pr_err("Alignment trap: not handling ldm with s-bit set\n"); + return TYPE_ERROR; +} + +/* + * Convert Thumb ld/st instruction forms to equivalent ARM instructions so + * we can reuse ARM userland alignment fault fixups for Thumb. + * + * This implementation was initially based on the algorithm found in + * gdb/sim/arm/thumbemu.c. It is basically just a code reduction of same + * to convert only Thumb ld/st instruction forms to equivalent ARM forms. + * + * NOTES: + * 1. Comments below refer to ARM ARM DDI0100E Thumb Instruction sections. + * 2. If for some reason we're passed an non-ld/st Thumb instruction to + * decode, we return 0xdeadc0de. This should never happen under normal + * circumstances but if it does, we've got other problems to deal with + * elsewhere and we obviously can't fix those problems here. + */ + +static unsigned long +thumb2arm(u16 tinstr) +{ + u32 L = (tinstr & (1<<11)) >> 11; + + switch ((tinstr & 0xf800) >> 11) { + /* 6.5.1 Format 1: */ + case 0x6000 >> 11: /* 7.1.52 STR(1) */ + case 0x6800 >> 11: /* 7.1.26 LDR(1) */ + case 0x7000 >> 11: /* 7.1.55 STRB(1) */ + case 0x7800 >> 11: /* 7.1.30 LDRB(1) */ + return 0xe5800000 | + ((tinstr & (1<<12)) << (22-12)) | /* fixup */ + (L<<20) | /* L==1? */ + ((tinstr & (7<<0)) << (12-0)) | /* Rd */ + ((tinstr & (7<<3)) << (16-3)) | /* Rn */ + ((tinstr & (31<<6)) >> /* immed_5 */ + (6 - ((tinstr & (1<<12)) ? 0 : 2))); + case 0x8000 >> 11: /* 7.1.57 STRH(1) */ + case 0x8800 >> 11: /* 7.1.32 LDRH(1) */ + return 0xe1c000b0 | + (L<<20) | /* L==1? */ + ((tinstr & (7<<0)) << (12-0)) | /* Rd */ + ((tinstr & (7<<3)) << (16-3)) | /* Rn */ + ((tinstr & (7<<6)) >> (6-1)) | /* immed_5[2:0] */ + ((tinstr & (3<<9)) >> (9-8)); /* immed_5[4:3] */ + + /* 6.5.1 Format 2: */ + case 0x5000 >> 11: + case 0x5800 >> 11: + { + static const u32 subset[8] = { + 0xe7800000, /* 7.1.53 STR(2) */ + 0xe18000b0, /* 7.1.58 STRH(2) */ + 0xe7c00000, /* 7.1.56 STRB(2) */ + 0xe19000d0, /* 7.1.34 LDRSB */ + 0xe7900000, /* 7.1.27 LDR(2) */ + 0xe19000b0, /* 7.1.33 LDRH(2) */ + 0xe7d00000, /* 7.1.31 LDRB(2) */ + 0xe19000f0 /* 7.1.35 LDRSH */ + }; + return subset[(tinstr & (7<<9)) >> 9] | + ((tinstr & (7<<0)) << (12-0)) | /* Rd */ + ((tinstr & (7<<3)) << (16-3)) | /* Rn */ + ((tinstr & (7<<6)) >> (6-0)); /* Rm */ + } + + /* 6.5.1 Format 3: */ + case 0x4800 >> 11: /* 7.1.28 LDR(3) */ + /* NOTE: This case is not technically possible. We're + * loading 32-bit memory data via PC relative + * addressing mode. So we can and should eliminate + * this case. But I'll leave it here for now. + */ + return 0xe59f0000 | + ((tinstr & (7<<8)) << (12-8)) | /* Rd */ + ((tinstr & 255) << (2-0)); /* immed_8 */ + + /* 6.5.1 Format 4: */ + case 0x9000 >> 11: /* 7.1.54 STR(3) */ + case 0x9800 >> 11: /* 7.1.29 LDR(4) */ + return 0xe58d0000 | + (L<<20) | /* L==1? */ + ((tinstr & (7<<8)) << (12-8)) | /* Rd */ + ((tinstr & 255) << 2); /* immed_8 */ + + /* 6.6.1 Format 1: */ + case 0xc000 >> 11: /* 7.1.51 STMIA */ + case 0xc800 >> 11: /* 7.1.25 LDMIA */ + { + u32 Rn = (tinstr & (7<<8)) >> 8; + u32 W = ((L<<Rn) & (tinstr&255)) ? 0 : 1<<21; + + return 0xe8800000 | W | (L<<20) | (Rn<<16) | + (tinstr&255); + } + + /* 6.6.1 Format 2: */ + case 0xb000 >> 11: /* 7.1.48 PUSH */ + case 0xb800 >> 11: /* 7.1.47 POP */ + if ((tinstr & (3 << 9)) == 0x0400) { + static const u32 subset[4] = { + 0xe92d0000, /* STMDB sp!,{registers} */ + 0xe92d4000, /* STMDB sp!,{registers,lr} */ + 0xe8bd0000, /* LDMIA sp!,{registers} */ + 0xe8bd8000 /* LDMIA sp!,{registers,pc} */ + }; + return subset[(L<<1) | ((tinstr & (1<<8)) >> 8)] | + (tinstr & 255); /* register_list */ + } + fallthrough; /* for illegal instruction case */ + + default: + return BAD_INSTR; + } +} + +/* + * Convert Thumb-2 32 bit LDM, STM, LDRD, STRD to equivalent instruction + * handlable by ARM alignment handler, also find the corresponding handler, + * so that we can reuse ARM userland alignment fault fixups for Thumb. + * + * @pinstr: original Thumb-2 instruction; returns new handlable instruction + * @regs: register context. + * @poffset: return offset from faulted addr for later writeback + * + * NOTES: + * 1. Comments below refer to ARMv7 DDI0406A Thumb Instruction sections. + * 2. Register name Rt from ARMv7 is same as Rd from ARMv6 (Rd is Rt) + */ +static void * +do_alignment_t32_to_handler(u32 *pinstr, struct pt_regs *regs, + union offset_union *poffset) +{ + u32 instr = *pinstr; + u16 tinst1 = (instr >> 16) & 0xffff; + u16 tinst2 = instr & 0xffff; + + switch (tinst1 & 0xffe0) { + /* A6.3.5 Load/Store multiple */ + case 0xe880: /* STM/STMIA/STMEA,LDM/LDMIA, PUSH/POP T2 */ + case 0xe8a0: /* ...above writeback version */ + case 0xe900: /* STMDB/STMFD, LDMDB/LDMEA */ + case 0xe920: /* ...above writeback version */ + /* no need offset decision since handler calculates it */ + return do_alignment_ldmstm; + + case 0xf840: /* POP/PUSH T3 (single register) */ + if (RN_BITS(instr) == 13 && (tinst2 & 0x09ff) == 0x0904) { + u32 L = !!(LDST_L_BIT(instr)); + const u32 subset[2] = { + 0xe92d0000, /* STMDB sp!,{registers} */ + 0xe8bd0000, /* LDMIA sp!,{registers} */ + }; + *pinstr = subset[L] | (1<<RD_BITS(instr)); + return do_alignment_ldmstm; + } + /* Else fall through for illegal instruction case */ + break; + + /* A6.3.6 Load/store double, STRD/LDRD(immed, lit, reg) */ + case 0xe860: + case 0xe960: + case 0xe8e0: + case 0xe9e0: + poffset->un = (tinst2 & 0xff) << 2; + fallthrough; + + case 0xe940: + case 0xe9c0: + return do_alignment_ldrdstrd; + + /* + * No need to handle load/store instructions up to word size + * since ARMv6 and later CPUs can perform unaligned accesses. + */ + default: + break; + } + return NULL; +} + +static int alignment_get_arm(struct pt_regs *regs, u32 *ip, u32 *inst) +{ + u32 instr = 0; + int fault; + + if (user_mode(regs)) + fault = get_user(instr, ip); + else + fault = get_kernel_nofault(instr, ip); + + *inst = __mem_to_opcode_arm(instr); + + return fault; +} + +static int alignment_get_thumb(struct pt_regs *regs, u16 *ip, u16 *inst) +{ + u16 instr = 0; + int fault; + + if (user_mode(regs)) + fault = get_user(instr, ip); + else + fault = get_kernel_nofault(instr, ip); + + *inst = __mem_to_opcode_thumb16(instr); + + return fault; +} + +static int +do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) +{ + union offset_union offset; + unsigned long instrptr; + int (*handler)(unsigned long addr, u32 instr, struct pt_regs *regs); + unsigned int type; + u32 instr = 0; + u16 tinstr = 0; + int isize = 4; + int thumb2_32b = 0; + int fault; + + if (interrupts_enabled(regs)) + local_irq_enable(); + + instrptr = instruction_pointer(regs); + + if (thumb_mode(regs)) { + u16 *ptr = (u16 *)(instrptr & ~1); + + fault = alignment_get_thumb(regs, ptr, &tinstr); + if (!fault) { + if (cpu_architecture() >= CPU_ARCH_ARMv7 && + IS_T32(tinstr)) { + /* Thumb-2 32-bit */ + u16 tinst2; + fault = alignment_get_thumb(regs, ptr + 1, &tinst2); + instr = __opcode_thumb32_compose(tinstr, tinst2); + thumb2_32b = 1; + } else { + isize = 2; + instr = thumb2arm(tinstr); + } + } + } else { + fault = alignment_get_arm(regs, (void *)instrptr, &instr); + } + + if (fault) { + type = TYPE_FAULT; + goto bad_or_fault; + } + + if (user_mode(regs)) + goto user; + + ai_sys += 1; + ai_sys_last_pc = (void *)instruction_pointer(regs); + + fixup: + + regs->ARM_pc += isize; + + switch (CODING_BITS(instr)) { + case 0x00000000: /* 3.13.4 load/store instruction extensions */ + if (LDSTHD_I_BIT(instr)) + offset.un = (instr & 0xf00) >> 4 | (instr & 15); + else + offset.un = regs->uregs[RM_BITS(instr)]; + + if ((instr & 0x000000f0) == 0x000000b0 || /* LDRH, STRH */ + (instr & 0x001000f0) == 0x001000f0) /* LDRSH */ + handler = do_alignment_ldrhstrh; + else if ((instr & 0x001000f0) == 0x000000d0 || /* LDRD */ + (instr & 0x001000f0) == 0x000000f0) /* STRD */ + handler = do_alignment_ldrdstrd; + else if ((instr & 0x01f00ff0) == 0x01000090) /* SWP */ + goto swp; + else + goto bad; + break; + + case 0x04000000: /* ldr or str immediate */ + if (COND_BITS(instr) == 0xf0000000) /* NEON VLDn, VSTn */ + goto bad; + offset.un = OFFSET_BITS(instr); + handler = do_alignment_ldrstr; + break; + + case 0x06000000: /* ldr or str register */ + offset.un = regs->uregs[RM_BITS(instr)]; + + if (IS_SHIFT(instr)) { + unsigned int shiftval = SHIFT_BITS(instr); + + switch(SHIFT_TYPE(instr)) { + case SHIFT_LSL: + offset.un <<= shiftval; + break; + + case SHIFT_LSR: + offset.un >>= shiftval; + break; + + case SHIFT_ASR: + offset.sn >>= shiftval; + break; + + case SHIFT_RORRRX: + if (shiftval == 0) { + offset.un >>= 1; + if (regs->ARM_cpsr & PSR_C_BIT) + offset.un |= 1 << 31; + } else + offset.un = offset.un >> shiftval | + offset.un << (32 - shiftval); + break; + } + } + handler = do_alignment_ldrstr; + break; + + case 0x08000000: /* ldm or stm, or thumb-2 32bit instruction */ + if (thumb2_32b) { + offset.un = 0; + handler = do_alignment_t32_to_handler(&instr, regs, &offset); + } else { + offset.un = 0; + handler = do_alignment_ldmstm; + } + break; + + default: + goto bad; + } + + if (!handler) + goto bad; + type = handler(addr, instr, regs); + + if (type == TYPE_ERROR || type == TYPE_FAULT) { + regs->ARM_pc -= isize; + goto bad_or_fault; + } + + if (type == TYPE_LDST) + do_alignment_finish_ldst(addr, instr, regs, offset); + + if (thumb_mode(regs)) + regs->ARM_cpsr = it_advance(regs->ARM_cpsr); + + return 0; + + bad_or_fault: + if (type == TYPE_ERROR) + goto bad; + /* + * We got a fault - fix it up, or die. + */ + do_bad_area(addr, fsr, regs); + return 0; + + swp: + pr_err("Alignment trap: not handling swp instruction\n"); + + bad: + /* + * Oops, we didn't handle the instruction. + */ + pr_err("Alignment trap: not handling instruction " + "%0*x at [<%08lx>]\n", + isize << 1, + isize == 2 ? tinstr : instr, instrptr); + ai_skipped += 1; + return 1; + + user: + ai_user += 1; + + if (ai_usermode & UM_WARN) + printk("Alignment trap: %s (%d) PC=0x%08lx Instr=0x%0*x " + "Address=0x%08lx FSR 0x%03x\n", current->comm, + task_pid_nr(current), instrptr, + isize << 1, + isize == 2 ? tinstr : instr, + addr, fsr); + + if (ai_usermode & UM_FIXUP) + goto fixup; + + if (ai_usermode & UM_SIGNAL) { + force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)addr); + } else { + /* + * We're about to disable the alignment trap and return to + * user space. But if an interrupt occurs before actually + * reaching user space, then the IRQ vector entry code will + * notice that we were still in kernel space and therefore + * the alignment trap won't be re-enabled in that case as it + * is presumed to be always on from kernel space. + * Let's prevent that race by disabling interrupts here (they + * are disabled on the way back to user space anyway in + * entry-common.S) and disable the alignment trap only if + * there is no work pending for this thread. + */ + raw_local_irq_disable(); + if (!(read_thread_flags() & _TIF_WORK_MASK)) + set_cr(cr_no_alignment); + } + + return 0; +} + +static int __init noalign_setup(char *__unused) +{ + set_cr(__clear_cr(CR_A)); + return 1; +} +__setup("noalign", noalign_setup); + +/* + * This needs to be done after sysctl_init_bases(), otherwise sys/ will be + * overwritten. Actually, this shouldn't be in sys/ at all since + * it isn't a sysctl, and it doesn't contain sysctl information. + * We now locate it in /proc/cpu/alignment instead. + */ +static int __init alignment_init(void) +{ +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *res; + + res = proc_create("cpu/alignment", S_IWUSR | S_IRUGO, NULL, + &alignment_proc_ops); + if (!res) + return -ENOMEM; +#endif + + if (cpu_is_v6_unaligned()) { + set_cr(__clear_cr(CR_A)); + ai_usermode = safe_usermode(ai_usermode, false); + } + + cr_no_alignment = get_cr() & ~CR_A; + + hook_fault_code(FAULT_CODE_ALIGNMENT, do_alignment, SIGBUS, BUS_ADRALN, + "alignment exception"); + + /* + * ARMv6K and ARMv7 use fault status 3 (0b00011) as Access Flag section + * fault, not as alignment error. + * + * TODO: handle ARMv6K properly. Runtime check for 'K' extension is + * needed. + */ + if (cpu_architecture() <= CPU_ARCH_ARMv6) { + hook_fault_code(3, do_alignment, SIGBUS, BUS_ADRALN, + "alignment exception"); + } + + return 0; +} + +fs_initcall(alignment_init); diff --git a/arch/arm/mm/cache-b15-rac.c b/arch/arm/mm/cache-b15-rac.c new file mode 100644 index 0000000000..9c1172f268 --- /dev/null +++ b/arch/arm/mm/cache-b15-rac.c @@ -0,0 +1,374 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Broadcom Brahma-B15 CPU read-ahead cache management functions + * + * Copyright (C) 2015-2016 Broadcom + */ + +#include <linux/err.h> +#include <linux/spinlock.h> +#include <linux/io.h> +#include <linux/bitops.h> +#include <linux/of_address.h> +#include <linux/notifier.h> +#include <linux/cpu.h> +#include <linux/syscore_ops.h> +#include <linux/reboot.h> + +#include <asm/cacheflush.h> +#include <asm/hardware/cache-b15-rac.h> + +extern void v7_flush_kern_cache_all(void); + +/* RAC register offsets, relative to the HIF_CPU_BIUCTRL register base */ +#define RAC_CONFIG0_REG (0x78) +#define RACENPREF_MASK (0x3) +#define RACPREFINST_SHIFT (0) +#define RACENINST_SHIFT (2) +#define RACPREFDATA_SHIFT (4) +#define RACENDATA_SHIFT (6) +#define RAC_CPU_SHIFT (8) +#define RACCFG_MASK (0xff) +#define RAC_CONFIG1_REG (0x7c) +/* Brahma-B15 is a quad-core only design */ +#define B15_RAC_FLUSH_REG (0x80) +/* Brahma-B53 is an octo-core design */ +#define B53_RAC_FLUSH_REG (0x84) +#define FLUSH_RAC (1 << 0) + +/* Bitmask to enable instruction and data prefetching with a 256-bytes stride */ +#define RAC_DATA_INST_EN_MASK (1 << RACPREFINST_SHIFT | \ + RACENPREF_MASK << RACENINST_SHIFT | \ + 1 << RACPREFDATA_SHIFT | \ + RACENPREF_MASK << RACENDATA_SHIFT) + +#define RAC_ENABLED 0 +/* Special state where we want to bypass the spinlock and call directly + * into the v7 cache maintenance operations during suspend/resume + */ +#define RAC_SUSPENDED 1 + +static void __iomem *b15_rac_base; +static DEFINE_SPINLOCK(rac_lock); + +static u32 rac_config0_reg; +static u32 rac_flush_offset; + +/* Initialization flag to avoid checking for b15_rac_base, and to prevent + * multi-platform kernels from crashing here as well. + */ +static unsigned long b15_rac_flags; + +static inline u32 __b15_rac_disable(void) +{ + u32 val = __raw_readl(b15_rac_base + RAC_CONFIG0_REG); + __raw_writel(0, b15_rac_base + RAC_CONFIG0_REG); + dmb(); + return val; +} + +static inline void __b15_rac_flush(void) +{ + u32 reg; + + __raw_writel(FLUSH_RAC, b15_rac_base + rac_flush_offset); + do { + /* This dmb() is required to force the Bus Interface Unit + * to clean outstanding writes, and forces an idle cycle + * to be inserted. + */ + dmb(); + reg = __raw_readl(b15_rac_base + rac_flush_offset); + } while (reg & FLUSH_RAC); +} + +static inline u32 b15_rac_disable_and_flush(void) +{ + u32 reg; + + reg = __b15_rac_disable(); + __b15_rac_flush(); + return reg; +} + +static inline void __b15_rac_enable(u32 val) +{ + __raw_writel(val, b15_rac_base + RAC_CONFIG0_REG); + /* dsb() is required here to be consistent with __flush_icache_all() */ + dsb(); +} + +#define BUILD_RAC_CACHE_OP(name, bar) \ +void b15_flush_##name(void) \ +{ \ + unsigned int do_flush; \ + u32 val = 0; \ + \ + if (test_bit(RAC_SUSPENDED, &b15_rac_flags)) { \ + v7_flush_##name(); \ + bar; \ + return; \ + } \ + \ + spin_lock(&rac_lock); \ + do_flush = test_bit(RAC_ENABLED, &b15_rac_flags); \ + if (do_flush) \ + val = b15_rac_disable_and_flush(); \ + v7_flush_##name(); \ + if (!do_flush) \ + bar; \ + else \ + __b15_rac_enable(val); \ + spin_unlock(&rac_lock); \ +} + +#define nobarrier + +/* The readahead cache present in the Brahma-B15 CPU is a special piece of + * hardware after the integrated L2 cache of the B15 CPU complex whose purpose + * is to prefetch instruction and/or data with a line size of either 64 bytes + * or 256 bytes. The rationale is that the data-bus of the CPU interface is + * optimized for 256-bytes transactions, and enabling the readahead cache + * provides a significant performance boost we want it enabled (typically + * twice the performance for a memcpy benchmark application). + * + * The readahead cache is transparent for Modified Virtual Addresses + * cache maintenance operations: ICIMVAU, DCIMVAC, DCCMVAC, DCCMVAU and + * DCCIMVAC. + * + * It is however not transparent for the following cache maintenance + * operations: DCISW, DCCSW, DCCISW, ICIALLUIS and ICIALLU which is precisely + * what we are patching here with our BUILD_RAC_CACHE_OP here. + */ +BUILD_RAC_CACHE_OP(kern_cache_all, nobarrier); + +static void b15_rac_enable(void) +{ + unsigned int cpu; + u32 enable = 0; + + for_each_possible_cpu(cpu) + enable |= (RAC_DATA_INST_EN_MASK << (cpu * RAC_CPU_SHIFT)); + + b15_rac_disable_and_flush(); + __b15_rac_enable(enable); +} + +static int b15_rac_reboot_notifier(struct notifier_block *nb, + unsigned long action, + void *data) +{ + /* During kexec, we are not yet migrated on the boot CPU, so we need to + * make sure we are SMP safe here. Once the RAC is disabled, flag it as + * suspended such that the hotplug notifier returns early. + */ + if (action == SYS_RESTART) { + spin_lock(&rac_lock); + b15_rac_disable_and_flush(); + clear_bit(RAC_ENABLED, &b15_rac_flags); + set_bit(RAC_SUSPENDED, &b15_rac_flags); + spin_unlock(&rac_lock); + } + + return NOTIFY_DONE; +} + +static struct notifier_block b15_rac_reboot_nb = { + .notifier_call = b15_rac_reboot_notifier, +}; + +/* The CPU hotplug case is the most interesting one, we basically need to make + * sure that the RAC is disabled for the entire system prior to having a CPU + * die, in particular prior to this dying CPU having exited the coherency + * domain. + * + * Once this CPU is marked dead, we can safely re-enable the RAC for the + * remaining CPUs in the system which are still online. + * + * Offlining a CPU is the problematic case, onlining a CPU is not much of an + * issue since the CPU and its cache-level hierarchy will start filling with + * the RAC disabled, so L1 and L2 only. + * + * In this function, we should NOT have to verify any unsafe setting/condition + * b15_rac_base: + * + * It is protected by the RAC_ENABLED flag which is cleared by default, and + * being cleared when initial procedure is done. b15_rac_base had been set at + * that time. + * + * RAC_ENABLED: + * There is a small timing windows, in b15_rac_init(), between + * cpuhp_setup_state_*() + * ... + * set RAC_ENABLED + * However, there is no hotplug activity based on the Linux booting procedure. + * + * Since we have to disable RAC for all cores, we keep RAC on as long as as + * possible (disable it as late as possible) to gain the cache benefit. + * + * Thus, dying/dead states are chosen here + * + * We are choosing not do disable the RAC on a per-CPU basis, here, if we did + * we would want to consider disabling it as early as possible to benefit the + * other active CPUs. + */ + +/* Running on the dying CPU */ +static int b15_rac_dying_cpu(unsigned int cpu) +{ + /* During kexec/reboot, the RAC is disabled via the reboot notifier + * return early here. + */ + if (test_bit(RAC_SUSPENDED, &b15_rac_flags)) + return 0; + + spin_lock(&rac_lock); + + /* Indicate that we are starting a hotplug procedure */ + __clear_bit(RAC_ENABLED, &b15_rac_flags); + + /* Disable the readahead cache and save its value to a global */ + rac_config0_reg = b15_rac_disable_and_flush(); + + spin_unlock(&rac_lock); + + return 0; +} + +/* Running on a non-dying CPU */ +static int b15_rac_dead_cpu(unsigned int cpu) +{ + /* During kexec/reboot, the RAC is disabled via the reboot notifier + * return early here. + */ + if (test_bit(RAC_SUSPENDED, &b15_rac_flags)) + return 0; + + spin_lock(&rac_lock); + + /* And enable it */ + __b15_rac_enable(rac_config0_reg); + __set_bit(RAC_ENABLED, &b15_rac_flags); + + spin_unlock(&rac_lock); + + return 0; +} + +static int b15_rac_suspend(void) +{ + /* Suspend the read-ahead cache oeprations, forcing our cache + * implementation to fallback to the regular ARMv7 calls. + * + * We are guaranteed to be running on the boot CPU at this point and + * with every other CPU quiesced, so setting RAC_SUSPENDED is not racy + * here. + */ + rac_config0_reg = b15_rac_disable_and_flush(); + set_bit(RAC_SUSPENDED, &b15_rac_flags); + + return 0; +} + +static void b15_rac_resume(void) +{ + /* Coming out of a S3 suspend/resume cycle, the read-ahead cache + * register RAC_CONFIG0_REG will be restored to its default value, make + * sure we re-enable it and set the enable flag, we are also guaranteed + * to run on the boot CPU, so not racy again. + */ + __b15_rac_enable(rac_config0_reg); + clear_bit(RAC_SUSPENDED, &b15_rac_flags); +} + +static struct syscore_ops b15_rac_syscore_ops = { + .suspend = b15_rac_suspend, + .resume = b15_rac_resume, +}; + +static int __init b15_rac_init(void) +{ + struct device_node *dn, *cpu_dn; + int ret = 0, cpu; + u32 reg, en_mask = 0; + + dn = of_find_compatible_node(NULL, NULL, "brcm,brcmstb-cpu-biu-ctrl"); + if (!dn) + return -ENODEV; + + if (WARN(num_possible_cpus() > 4, "RAC only supports 4 CPUs\n")) + goto out; + + b15_rac_base = of_iomap(dn, 0); + if (!b15_rac_base) { + pr_err("failed to remap BIU control base\n"); + ret = -ENOMEM; + goto out; + } + + cpu_dn = of_get_cpu_node(0, NULL); + if (!cpu_dn) { + ret = -ENODEV; + goto out; + } + + if (of_device_is_compatible(cpu_dn, "brcm,brahma-b15")) + rac_flush_offset = B15_RAC_FLUSH_REG; + else if (of_device_is_compatible(cpu_dn, "brcm,brahma-b53")) + rac_flush_offset = B53_RAC_FLUSH_REG; + else { + pr_err("Unsupported CPU\n"); + of_node_put(cpu_dn); + ret = -EINVAL; + goto out; + } + of_node_put(cpu_dn); + + ret = register_reboot_notifier(&b15_rac_reboot_nb); + if (ret) { + pr_err("failed to register reboot notifier\n"); + iounmap(b15_rac_base); + goto out; + } + + if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) { + ret = cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CACHE_B15_RAC_DEAD, + "arm/cache-b15-rac:dead", + NULL, b15_rac_dead_cpu); + if (ret) + goto out_unmap; + + ret = cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CACHE_B15_RAC_DYING, + "arm/cache-b15-rac:dying", + NULL, b15_rac_dying_cpu); + if (ret) + goto out_cpu_dead; + } + + if (IS_ENABLED(CONFIG_PM_SLEEP)) + register_syscore_ops(&b15_rac_syscore_ops); + + spin_lock(&rac_lock); + reg = __raw_readl(b15_rac_base + RAC_CONFIG0_REG); + for_each_possible_cpu(cpu) + en_mask |= ((1 << RACPREFDATA_SHIFT) << (cpu * RAC_CPU_SHIFT)); + WARN(reg & en_mask, "Read-ahead cache not previously disabled\n"); + + b15_rac_enable(); + set_bit(RAC_ENABLED, &b15_rac_flags); + spin_unlock(&rac_lock); + + pr_info("%pOF: Broadcom Brahma-B15 readahead cache\n", dn); + + goto out; + +out_cpu_dead: + cpuhp_remove_state_nocalls(CPUHP_AP_ARM_CACHE_B15_RAC_DYING); +out_unmap: + unregister_reboot_notifier(&b15_rac_reboot_nb); + iounmap(b15_rac_base); +out: + of_node_put(dn); + return ret; +} +arch_initcall(b15_rac_init); diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S new file mode 100644 index 0000000000..71c64e92de --- /dev/null +++ b/arch/arm/mm/cache-fa.S @@ -0,0 +1,246 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/mm/cache-fa.S + * + * Copyright (C) 2005 Faraday Corp. + * Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt> + * + * Based on cache-v4wb.S: + * Copyright (C) 1997-2002 Russell king + * + * Processors: FA520 FA526 FA626 + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/page.h> + +#include "proc-macros.S" + +/* + * The size of one data cache line. + */ +#define CACHE_DLINESIZE 16 + +/* + * The total size of the data cache. + */ +#ifdef CONFIG_ARCH_GEMINI +#define CACHE_DSIZE 8192 +#else +#define CACHE_DSIZE 16384 +#endif + +/* FIXME: put optimal value here. Current one is just estimation */ +#define CACHE_DLIMIT (CACHE_DSIZE * 2) + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(fa_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + ret lr +ENDPROC(fa_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Clean and invalidate all cache entries in a particular address + * space. + */ +ENTRY(fa_flush_user_cache_all) + /* FALLTHROUGH */ +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(fa_flush_kern_cache_all) + mov ip, #0 + mov r2, #VM_EXEC +__flush_whole_cache: + mcr p15, 0, ip, c7, c14, 0 @ clean/invalidate D cache + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB + mcrne p15, 0, ip, c7, c10, 4 @ drain write buffer + mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush + ret lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (inclusive, page aligned) + * - end - end address (exclusive, page aligned) + * - flags - vma_area_struct flags describing address space + */ +ENTRY(fa_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT @ total size >= limit? + bhs __flush_whole_cache @ flush whole D cache + +1: tst r2, #VM_EXEC + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I line + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB + mcrne p15, 0, ip, c7, c10, 4 @ data write barrier + mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush + ret lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(fa_coherent_kern_range) + /* fall through */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(fa_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mcr p15, 0, r0, c7, c5, 4 @ prefetch flush + ret lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure that the data held in the page kaddr is written back + * to the page in question. + * + * - addr - kernel address + * - size - size of region + */ +ENTRY(fa_flush_kern_dcache_area) + add r1, r0, r1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + ret lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + */ +fa_dma_inv_range: + tst r0, #CACHE_DLINESIZE - 1 + bic r0, r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D entry + tst r1, #CACHE_DLINESIZE - 1 + bic r1, r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D entry +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + ret lr + +/* + * dma_clean_range(start, end) + * + * Clean (write back) the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +fa_dma_clean_range: + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + ret lr + +/* + * dma_flush_range(start,end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +ENTRY(fa_dma_flush_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + ret lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(fa_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq fa_dma_clean_range + bcs fa_dma_inv_range + b fa_dma_flush_range +ENDPROC(fa_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(fa_dma_unmap_area) + ret lr +ENDPROC(fa_dma_unmap_area) + + .globl fa_flush_kern_cache_louis + .equ fa_flush_kern_cache_louis, fa_flush_kern_cache_all + + __INITDATA + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions fa diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c new file mode 100644 index 0000000000..25dbd84a1a --- /dev/null +++ b/arch/arm/mm/cache-feroceon-l2.c @@ -0,0 +1,389 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * arch/arm/mm/cache-feroceon-l2.c - Feroceon L2 cache controller support + * + * Copyright (C) 2008 Marvell Semiconductor + * + * References: + * - Unified Layer 2 Cache for Feroceon CPU Cores, + * Document ID MV-S104858-00, Rev. A, October 23 2007. + */ + +#include <linux/init.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/highmem.h> +#include <linux/io.h> +#include <asm/cacheflush.h> +#include <asm/cp15.h> +#include <asm/hardware/cache-feroceon-l2.h> + +#define L2_WRITETHROUGH_KIRKWOOD BIT(4) + +/* + * Low-level cache maintenance operations. + * + * As well as the regular 'clean/invalidate/flush L2 cache line by + * MVA' instructions, the Feroceon L2 cache controller also features + * 'clean/invalidate L2 range by MVA' operations. + * + * Cache range operations are initiated by writing the start and + * end addresses to successive cp15 registers, and process every + * cache line whose first byte address lies in the inclusive range + * [start:end]. + * + * The cache range operations stall the CPU pipeline until completion. + * + * The range operations require two successive cp15 writes, in + * between which we don't want to be preempted. + */ + +static inline unsigned long l2_get_va(unsigned long paddr) +{ +#ifdef CONFIG_HIGHMEM + /* + * Because range ops can't be done on physical addresses, + * we simply install a virtual mapping for it only for the + * TLB lookup to occur, hence no need to flush the untouched + * memory mapping afterwards (note: a cache flush may happen + * in some circumstances depending on the path taken in kunmap_atomic). + */ + void *vaddr = kmap_atomic_pfn(paddr >> PAGE_SHIFT); + return (unsigned long)vaddr + (paddr & ~PAGE_MASK); +#else + return __phys_to_virt(paddr); +#endif +} + +static inline void l2_put_va(unsigned long vaddr) +{ +#ifdef CONFIG_HIGHMEM + kunmap_atomic((void *)vaddr); +#endif +} + +static inline void l2_clean_pa(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c15, c9, 3" : : "r" (addr)); +} + +static inline void l2_clean_pa_range(unsigned long start, unsigned long end) +{ + unsigned long va_start, va_end, flags; + + /* + * Make sure 'start' and 'end' reference the same page, as + * L2 is PIPT and range operations only do a TLB lookup on + * the start address. + */ + BUG_ON((start ^ end) >> PAGE_SHIFT); + + va_start = l2_get_va(start); + va_end = va_start + (end - start); + raw_local_irq_save(flags); + __asm__("mcr p15, 1, %0, c15, c9, 4\n\t" + "mcr p15, 1, %1, c15, c9, 5" + : : "r" (va_start), "r" (va_end)); + raw_local_irq_restore(flags); + l2_put_va(va_start); +} + +static inline void l2_clean_inv_pa(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c15, c10, 3" : : "r" (addr)); +} + +static inline void l2_inv_pa(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c15, c11, 3" : : "r" (addr)); +} + +static inline void l2_inv_pa_range(unsigned long start, unsigned long end) +{ + unsigned long va_start, va_end, flags; + + /* + * Make sure 'start' and 'end' reference the same page, as + * L2 is PIPT and range operations only do a TLB lookup on + * the start address. + */ + BUG_ON((start ^ end) >> PAGE_SHIFT); + + va_start = l2_get_va(start); + va_end = va_start + (end - start); + raw_local_irq_save(flags); + __asm__("mcr p15, 1, %0, c15, c11, 4\n\t" + "mcr p15, 1, %1, c15, c11, 5" + : : "r" (va_start), "r" (va_end)); + raw_local_irq_restore(flags); + l2_put_va(va_start); +} + +static inline void l2_inv_all(void) +{ + __asm__("mcr p15, 1, %0, c15, c11, 0" : : "r" (0)); +} + +/* + * Linux primitives. + * + * Note that the end addresses passed to Linux primitives are + * noninclusive, while the hardware cache range operations use + * inclusive start and end addresses. + */ +#define CACHE_LINE_SIZE 32 +#define MAX_RANGE_SIZE 1024 + +static int l2_wt_override; + +static unsigned long calc_range_end(unsigned long start, unsigned long end) +{ + unsigned long range_end; + + BUG_ON(start & (CACHE_LINE_SIZE - 1)); + BUG_ON(end & (CACHE_LINE_SIZE - 1)); + + /* + * Try to process all cache lines between 'start' and 'end'. + */ + range_end = end; + + /* + * Limit the number of cache lines processed at once, + * since cache range operations stall the CPU pipeline + * until completion. + */ + if (range_end > start + MAX_RANGE_SIZE) + range_end = start + MAX_RANGE_SIZE; + + /* + * Cache range operations can't straddle a page boundary. + */ + if (range_end > (start | (PAGE_SIZE - 1)) + 1) + range_end = (start | (PAGE_SIZE - 1)) + 1; + + return range_end; +} + +static void feroceon_l2_inv_range(unsigned long start, unsigned long end) +{ + /* + * Clean and invalidate partial first cache line. + */ + if (start & (CACHE_LINE_SIZE - 1)) { + l2_clean_inv_pa(start & ~(CACHE_LINE_SIZE - 1)); + start = (start | (CACHE_LINE_SIZE - 1)) + 1; + } + + /* + * Clean and invalidate partial last cache line. + */ + if (start < end && end & (CACHE_LINE_SIZE - 1)) { + l2_clean_inv_pa(end & ~(CACHE_LINE_SIZE - 1)); + end &= ~(CACHE_LINE_SIZE - 1); + } + + /* + * Invalidate all full cache lines between 'start' and 'end'. + */ + while (start < end) { + unsigned long range_end = calc_range_end(start, end); + l2_inv_pa_range(start, range_end - CACHE_LINE_SIZE); + start = range_end; + } + + dsb(); +} + +static void feroceon_l2_clean_range(unsigned long start, unsigned long end) +{ + /* + * If L2 is forced to WT, the L2 will always be clean and we + * don't need to do anything here. + */ + if (!l2_wt_override) { + start &= ~(CACHE_LINE_SIZE - 1); + end = (end + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1); + while (start != end) { + unsigned long range_end = calc_range_end(start, end); + l2_clean_pa_range(start, range_end - CACHE_LINE_SIZE); + start = range_end; + } + } + + dsb(); +} + +static void feroceon_l2_flush_range(unsigned long start, unsigned long end) +{ + start &= ~(CACHE_LINE_SIZE - 1); + end = (end + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1); + while (start != end) { + unsigned long range_end = calc_range_end(start, end); + if (!l2_wt_override) + l2_clean_pa_range(start, range_end - CACHE_LINE_SIZE); + l2_inv_pa_range(start, range_end - CACHE_LINE_SIZE); + start = range_end; + } + + dsb(); +} + + +/* + * Routines to disable and re-enable the D-cache and I-cache at run + * time. These are necessary because the L2 cache can only be enabled + * or disabled while the L1 Dcache and Icache are both disabled. + */ +static int __init flush_and_disable_dcache(void) +{ + u32 cr; + + cr = get_cr(); + if (cr & CR_C) { + unsigned long flags; + + raw_local_irq_save(flags); + flush_cache_all(); + set_cr(cr & ~CR_C); + raw_local_irq_restore(flags); + return 1; + } + return 0; +} + +static void __init enable_dcache(void) +{ + u32 cr; + + cr = get_cr(); + set_cr(cr | CR_C); +} + +static void __init __invalidate_icache(void) +{ + __asm__("mcr p15, 0, %0, c7, c5, 0" : : "r" (0)); +} + +static int __init invalidate_and_disable_icache(void) +{ + u32 cr; + + cr = get_cr(); + if (cr & CR_I) { + set_cr(cr & ~CR_I); + __invalidate_icache(); + return 1; + } + return 0; +} + +static void __init enable_icache(void) +{ + u32 cr; + + cr = get_cr(); + set_cr(cr | CR_I); +} + +static inline u32 read_extra_features(void) +{ + u32 u; + + __asm__("mrc p15, 1, %0, c15, c1, 0" : "=r" (u)); + + return u; +} + +static inline void write_extra_features(u32 u) +{ + __asm__("mcr p15, 1, %0, c15, c1, 0" : : "r" (u)); +} + +static void __init disable_l2_prefetch(void) +{ + u32 u; + + /* + * Read the CPU Extra Features register and verify that the + * Disable L2 Prefetch bit is set. + */ + u = read_extra_features(); + if (!(u & 0x01000000)) { + pr_info("Feroceon L2: Disabling L2 prefetch.\n"); + write_extra_features(u | 0x01000000); + } +} + +static void __init enable_l2(void) +{ + u32 u; + + u = read_extra_features(); + if (!(u & 0x00400000)) { + int i, d; + + pr_info("Feroceon L2: Enabling L2\n"); + + d = flush_and_disable_dcache(); + i = invalidate_and_disable_icache(); + l2_inv_all(); + write_extra_features(u | 0x00400000); + if (i) + enable_icache(); + if (d) + enable_dcache(); + } else + pr_err(FW_BUG + "Feroceon L2: bootloader left the L2 cache on!\n"); +} + +void __init feroceon_l2_init(int __l2_wt_override) +{ + l2_wt_override = __l2_wt_override; + + disable_l2_prefetch(); + + outer_cache.inv_range = feroceon_l2_inv_range; + outer_cache.clean_range = feroceon_l2_clean_range; + outer_cache.flush_range = feroceon_l2_flush_range; + + enable_l2(); + + pr_info("Feroceon L2: Cache support initialised%s.\n", + l2_wt_override ? ", in WT override mode" : ""); +} +#ifdef CONFIG_OF +static const struct of_device_id feroceon_ids[] __initconst = { + { .compatible = "marvell,kirkwood-cache"}, + { .compatible = "marvell,feroceon-cache"}, + {} +}; + +int __init feroceon_of_init(void) +{ + struct device_node *node; + void __iomem *base; + bool l2_wt_override = false; + +#if defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH) + l2_wt_override = true; +#endif + + node = of_find_matching_node(NULL, feroceon_ids); + if (node && of_device_is_compatible(node, "marvell,kirkwood-cache")) { + base = of_iomap(node, 0); + if (!base) + return -ENOMEM; + + if (l2_wt_override) + writel(readl(base) | L2_WRITETHROUGH_KIRKWOOD, base); + else + writel(readl(base) & ~L2_WRITETHROUGH_KIRKWOOD, base); + } + + feroceon_l2_init(l2_wt_override); + + return 0; +} +#endif diff --git a/arch/arm/mm/cache-l2x0-pmu.c b/arch/arm/mm/cache-l2x0-pmu.c new file mode 100644 index 0000000000..993fefdc16 --- /dev/null +++ b/arch/arm/mm/cache-l2x0-pmu.c @@ -0,0 +1,565 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * L220/L310 cache controller support + * + * Copyright (C) 2016 ARM Limited + */ +#include <linux/errno.h> +#include <linux/hrtimer.h> +#include <linux/io.h> +#include <linux/list.h> +#include <linux/perf_event.h> +#include <linux/printk.h> +#include <linux/slab.h> +#include <linux/types.h> + +#include <asm/hardware/cache-l2x0.h> + +#define PMU_NR_COUNTERS 2 + +static void __iomem *l2x0_base; +static struct pmu *l2x0_pmu; +static cpumask_t pmu_cpu; + +static const char *l2x0_name; + +static ktime_t l2x0_pmu_poll_period; +static struct hrtimer l2x0_pmu_hrtimer; + +/* + * The L220/PL310 PMU has two equivalent counters, Counter1 and Counter0. + * Registers controlling these are laid out in pairs, in descending order, i.e. + * the register for Counter1 comes first, followed by the register for + * Counter0. + * We ensure that idx 0 -> Counter0, and idx1 -> Counter1. + */ +static struct perf_event *events[PMU_NR_COUNTERS]; + +/* Find an unused counter */ +static int l2x0_pmu_find_idx(void) +{ + int i; + + for (i = 0; i < PMU_NR_COUNTERS; i++) { + if (!events[i]) + return i; + } + + return -1; +} + +/* How many counters are allocated? */ +static int l2x0_pmu_num_active_counters(void) +{ + int i, cnt = 0; + + for (i = 0; i < PMU_NR_COUNTERS; i++) { + if (events[i]) + cnt++; + } + + return cnt; +} + +static void l2x0_pmu_counter_config_write(int idx, u32 val) +{ + writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT0_CFG - 4 * idx); +} + +static u32 l2x0_pmu_counter_read(int idx) +{ + return readl_relaxed(l2x0_base + L2X0_EVENT_CNT0_VAL - 4 * idx); +} + +static void l2x0_pmu_counter_write(int idx, u32 val) +{ + writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT0_VAL - 4 * idx); +} + +static void __l2x0_pmu_enable(void) +{ + u32 val = readl_relaxed(l2x0_base + L2X0_EVENT_CNT_CTRL); + val |= L2X0_EVENT_CNT_CTRL_ENABLE; + writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT_CTRL); +} + +static void __l2x0_pmu_disable(void) +{ + u32 val = readl_relaxed(l2x0_base + L2X0_EVENT_CNT_CTRL); + val &= ~L2X0_EVENT_CNT_CTRL_ENABLE; + writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT_CTRL); +} + +static void l2x0_pmu_enable(struct pmu *pmu) +{ + if (l2x0_pmu_num_active_counters() == 0) + return; + + __l2x0_pmu_enable(); +} + +static void l2x0_pmu_disable(struct pmu *pmu) +{ + if (l2x0_pmu_num_active_counters() == 0) + return; + + __l2x0_pmu_disable(); +} + +static void warn_if_saturated(u32 count) +{ + if (count != 0xffffffff) + return; + + pr_warn_ratelimited("L2X0 counter saturated. Poll period too long\n"); +} + +static void l2x0_pmu_event_read(struct perf_event *event) +{ + struct hw_perf_event *hw = &event->hw; + u64 prev_count, new_count, mask; + + do { + prev_count = local64_read(&hw->prev_count); + new_count = l2x0_pmu_counter_read(hw->idx); + } while (local64_xchg(&hw->prev_count, new_count) != prev_count); + + mask = GENMASK_ULL(31, 0); + local64_add((new_count - prev_count) & mask, &event->count); + + warn_if_saturated(new_count); +} + +static void l2x0_pmu_event_configure(struct perf_event *event) +{ + struct hw_perf_event *hw = &event->hw; + + /* + * The L2X0 counters saturate at 0xffffffff rather than wrapping, so we + * will *always* lose some number of events when a counter saturates, + * and have no way of detecting how many were lost. + * + * To minimize the impact of this, we try to maximize the period by + * always starting counters at zero. To ensure that group ratios are + * representative, we poll periodically to avoid counters saturating. + * See l2x0_pmu_poll(). + */ + local64_set(&hw->prev_count, 0); + l2x0_pmu_counter_write(hw->idx, 0); +} + +static enum hrtimer_restart l2x0_pmu_poll(struct hrtimer *hrtimer) +{ + unsigned long flags; + int i; + + local_irq_save(flags); + __l2x0_pmu_disable(); + + for (i = 0; i < PMU_NR_COUNTERS; i++) { + struct perf_event *event = events[i]; + + if (!event) + continue; + + l2x0_pmu_event_read(event); + l2x0_pmu_event_configure(event); + } + + __l2x0_pmu_enable(); + local_irq_restore(flags); + + hrtimer_forward_now(hrtimer, l2x0_pmu_poll_period); + return HRTIMER_RESTART; +} + + +static void __l2x0_pmu_event_enable(int idx, u32 event) +{ + u32 val; + + val = event << L2X0_EVENT_CNT_CFG_SRC_SHIFT; + val |= L2X0_EVENT_CNT_CFG_INT_DISABLED; + l2x0_pmu_counter_config_write(idx, val); +} + +static void l2x0_pmu_event_start(struct perf_event *event, int flags) +{ + struct hw_perf_event *hw = &event->hw; + + if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) + return; + + if (flags & PERF_EF_RELOAD) { + WARN_ON_ONCE(!(hw->state & PERF_HES_UPTODATE)); + l2x0_pmu_event_configure(event); + } + + hw->state = 0; + + __l2x0_pmu_event_enable(hw->idx, hw->config_base); +} + +static void __l2x0_pmu_event_disable(int idx) +{ + u32 val; + + val = L2X0_EVENT_CNT_CFG_SRC_DISABLED << L2X0_EVENT_CNT_CFG_SRC_SHIFT; + val |= L2X0_EVENT_CNT_CFG_INT_DISABLED; + l2x0_pmu_counter_config_write(idx, val); +} + +static void l2x0_pmu_event_stop(struct perf_event *event, int flags) +{ + struct hw_perf_event *hw = &event->hw; + + if (WARN_ON_ONCE(event->hw.state & PERF_HES_STOPPED)) + return; + + __l2x0_pmu_event_disable(hw->idx); + + hw->state |= PERF_HES_STOPPED; + + if (flags & PERF_EF_UPDATE) { + l2x0_pmu_event_read(event); + hw->state |= PERF_HES_UPTODATE; + } +} + +static int l2x0_pmu_event_add(struct perf_event *event, int flags) +{ + struct hw_perf_event *hw = &event->hw; + int idx = l2x0_pmu_find_idx(); + + if (idx == -1) + return -EAGAIN; + + /* + * Pin the timer, so that the overflows are handled by the chosen + * event->cpu (this is the same one as presented in "cpumask" + * attribute). + */ + if (l2x0_pmu_num_active_counters() == 0) + hrtimer_start(&l2x0_pmu_hrtimer, l2x0_pmu_poll_period, + HRTIMER_MODE_REL_PINNED); + + events[idx] = event; + hw->idx = idx; + + l2x0_pmu_event_configure(event); + + hw->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + + if (flags & PERF_EF_START) + l2x0_pmu_event_start(event, 0); + + return 0; +} + +static void l2x0_pmu_event_del(struct perf_event *event, int flags) +{ + struct hw_perf_event *hw = &event->hw; + + l2x0_pmu_event_stop(event, PERF_EF_UPDATE); + + events[hw->idx] = NULL; + hw->idx = -1; + + if (l2x0_pmu_num_active_counters() == 0) + hrtimer_cancel(&l2x0_pmu_hrtimer); +} + +static bool l2x0_pmu_group_is_valid(struct perf_event *event) +{ + struct pmu *pmu = event->pmu; + struct perf_event *leader = event->group_leader; + struct perf_event *sibling; + int num_hw = 0; + + if (leader->pmu == pmu) + num_hw++; + else if (!is_software_event(leader)) + return false; + + for_each_sibling_event(sibling, leader) { + if (sibling->pmu == pmu) + num_hw++; + else if (!is_software_event(sibling)) + return false; + } + + return num_hw <= PMU_NR_COUNTERS; +} + +static int l2x0_pmu_event_init(struct perf_event *event) +{ + struct hw_perf_event *hw = &event->hw; + + if (event->attr.type != l2x0_pmu->type) + return -ENOENT; + + if (is_sampling_event(event) || + event->attach_state & PERF_ATTACH_TASK) + return -EINVAL; + + if (event->cpu < 0) + return -EINVAL; + + if (event->attr.config & ~L2X0_EVENT_CNT_CFG_SRC_MASK) + return -EINVAL; + + hw->config_base = event->attr.config; + + if (!l2x0_pmu_group_is_valid(event)) + return -EINVAL; + + event->cpu = cpumask_first(&pmu_cpu); + + return 0; +} + +struct l2x0_event_attribute { + struct device_attribute attr; + unsigned int config; + bool pl310_only; +}; + +#define L2X0_EVENT_ATTR(_name, _config, _pl310_only) \ + (&((struct l2x0_event_attribute[]) {{ \ + .attr = __ATTR(_name, S_IRUGO, l2x0_pmu_event_show, NULL), \ + .config = _config, \ + .pl310_only = _pl310_only, \ + }})[0].attr.attr) + +#define L220_PLUS_EVENT_ATTR(_name, _config) \ + L2X0_EVENT_ATTR(_name, _config, false) + +#define PL310_EVENT_ATTR(_name, _config) \ + L2X0_EVENT_ATTR(_name, _config, true) + +static ssize_t l2x0_pmu_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct l2x0_event_attribute *lattr; + + lattr = container_of(attr, typeof(*lattr), attr); + return snprintf(buf, PAGE_SIZE, "config=0x%x\n", lattr->config); +} + +static umode_t l2x0_pmu_event_attr_is_visible(struct kobject *kobj, + struct attribute *attr, + int unused) +{ + struct device *dev = kobj_to_dev(kobj); + struct pmu *pmu = dev_get_drvdata(dev); + struct l2x0_event_attribute *lattr; + + lattr = container_of(attr, typeof(*lattr), attr.attr); + + if (!lattr->pl310_only || strcmp("l2c_310", pmu->name) == 0) + return attr->mode; + + return 0; +} + +static struct attribute *l2x0_pmu_event_attrs[] = { + L220_PLUS_EVENT_ATTR(co, 0x1), + L220_PLUS_EVENT_ATTR(drhit, 0x2), + L220_PLUS_EVENT_ATTR(drreq, 0x3), + L220_PLUS_EVENT_ATTR(dwhit, 0x4), + L220_PLUS_EVENT_ATTR(dwreq, 0x5), + L220_PLUS_EVENT_ATTR(dwtreq, 0x6), + L220_PLUS_EVENT_ATTR(irhit, 0x7), + L220_PLUS_EVENT_ATTR(irreq, 0x8), + L220_PLUS_EVENT_ATTR(wa, 0x9), + PL310_EVENT_ATTR(ipfalloc, 0xa), + PL310_EVENT_ATTR(epfhit, 0xb), + PL310_EVENT_ATTR(epfalloc, 0xc), + PL310_EVENT_ATTR(srrcvd, 0xd), + PL310_EVENT_ATTR(srconf, 0xe), + PL310_EVENT_ATTR(epfrcvd, 0xf), + NULL +}; + +static struct attribute_group l2x0_pmu_event_attrs_group = { + .name = "events", + .attrs = l2x0_pmu_event_attrs, + .is_visible = l2x0_pmu_event_attr_is_visible, +}; + +static ssize_t l2x0_pmu_cpumask_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return cpumap_print_to_pagebuf(true, buf, &pmu_cpu); +} + +static struct device_attribute l2x0_pmu_cpumask_attr = + __ATTR(cpumask, S_IRUGO, l2x0_pmu_cpumask_show, NULL); + +static struct attribute *l2x0_pmu_cpumask_attrs[] = { + &l2x0_pmu_cpumask_attr.attr, + NULL, +}; + +static struct attribute_group l2x0_pmu_cpumask_attr_group = { + .attrs = l2x0_pmu_cpumask_attrs, +}; + +static const struct attribute_group *l2x0_pmu_attr_groups[] = { + &l2x0_pmu_event_attrs_group, + &l2x0_pmu_cpumask_attr_group, + NULL, +}; + +static void l2x0_pmu_reset(void) +{ + int i; + + __l2x0_pmu_disable(); + + for (i = 0; i < PMU_NR_COUNTERS; i++) + __l2x0_pmu_event_disable(i); +} + +static int l2x0_pmu_offline_cpu(unsigned int cpu) +{ + unsigned int target; + + if (!cpumask_test_and_clear_cpu(cpu, &pmu_cpu)) + return 0; + + target = cpumask_any_but(cpu_online_mask, cpu); + if (target >= nr_cpu_ids) + return 0; + + perf_pmu_migrate_context(l2x0_pmu, cpu, target); + cpumask_set_cpu(target, &pmu_cpu); + + return 0; +} + +void l2x0_pmu_suspend(void) +{ + int i; + + if (!l2x0_pmu) + return; + + l2x0_pmu_disable(l2x0_pmu); + + for (i = 0; i < PMU_NR_COUNTERS; i++) { + if (events[i]) + l2x0_pmu_event_stop(events[i], PERF_EF_UPDATE); + } + +} + +void l2x0_pmu_resume(void) +{ + int i; + + if (!l2x0_pmu) + return; + + l2x0_pmu_reset(); + + for (i = 0; i < PMU_NR_COUNTERS; i++) { + if (events[i]) + l2x0_pmu_event_start(events[i], PERF_EF_RELOAD); + } + + l2x0_pmu_enable(l2x0_pmu); +} + +void __init l2x0_pmu_register(void __iomem *base, u32 part) +{ + /* + * Determine whether we support the PMU, and choose the name for sysfs. + * This is also used by l2x0_pmu_event_attr_is_visible to determine + * which events to display, as the PL310 PMU supports a superset of + * L220 events. + * + * The L210 PMU has a different programmer's interface, and is not + * supported by this driver. + * + * We must defer registering the PMU until the perf subsystem is up and + * running, so just stash the name and base, and leave that to another + * initcall. + */ + switch (part & L2X0_CACHE_ID_PART_MASK) { + case L2X0_CACHE_ID_PART_L220: + l2x0_name = "l2c_220"; + break; + case L2X0_CACHE_ID_PART_L310: + l2x0_name = "l2c_310"; + break; + default: + return; + } + + l2x0_base = base; +} + +static __init int l2x0_pmu_init(void) +{ + int ret; + + if (!l2x0_base) + return 0; + + l2x0_pmu = kzalloc(sizeof(*l2x0_pmu), GFP_KERNEL); + if (!l2x0_pmu) { + pr_warn("Unable to allocate L2x0 PMU\n"); + return -ENOMEM; + } + + *l2x0_pmu = (struct pmu) { + .task_ctx_nr = perf_invalid_context, + .pmu_enable = l2x0_pmu_enable, + .pmu_disable = l2x0_pmu_disable, + .read = l2x0_pmu_event_read, + .start = l2x0_pmu_event_start, + .stop = l2x0_pmu_event_stop, + .add = l2x0_pmu_event_add, + .del = l2x0_pmu_event_del, + .event_init = l2x0_pmu_event_init, + .attr_groups = l2x0_pmu_attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + }; + + l2x0_pmu_reset(); + + /* + * We always use a hrtimer rather than an interrupt. + * See comments in l2x0_pmu_event_configure and l2x0_pmu_poll. + * + * Polling once a second allows the counters to fill up to 1/128th on a + * quad-core test chip with cores clocked at 400MHz. Hopefully this + * leaves sufficient headroom to avoid overflow on production silicon + * at higher frequencies. + */ + l2x0_pmu_poll_period = ms_to_ktime(1000); + hrtimer_init(&l2x0_pmu_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + l2x0_pmu_hrtimer.function = l2x0_pmu_poll; + + cpumask_set_cpu(0, &pmu_cpu); + ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_L2X0_ONLINE, + "perf/arm/l2x0:online", NULL, + l2x0_pmu_offline_cpu); + if (ret) + goto out_pmu; + + ret = perf_pmu_register(l2x0_pmu, l2x0_name, -1); + if (ret) + goto out_cpuhp; + + return 0; + +out_cpuhp: + cpuhp_remove_state_nocalls(CPUHP_AP_PERF_ARM_L2X0_ONLINE); +out_pmu: + kfree(l2x0_pmu); + l2x0_pmu = NULL; + return ret; +} +device_initcall(l2x0_pmu_init); diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c new file mode 100644 index 0000000000..43d91bfd23 --- /dev/null +++ b/arch/arm/mm/cache-l2x0.c @@ -0,0 +1,1825 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * arch/arm/mm/cache-l2x0.c - L210/L220/L310 cache controller support + * + * Copyright (C) 2007 ARM Limited + */ +#include <linux/cpu.h> +#include <linux/err.h> +#include <linux/init.h> +#include <linux/smp.h> +#include <linux/spinlock.h> +#include <linux/log2.h> +#include <linux/io.h> +#include <linux/of.h> +#include <linux/of_address.h> + +#include <asm/cacheflush.h> +#include <asm/cp15.h> +#include <asm/cputype.h> +#include <asm/hardware/cache-l2x0.h> +#include <asm/hardware/cache-aurora-l2.h> +#include "cache-tauros3.h" + +struct l2c_init_data { + const char *type; + unsigned way_size_0; + unsigned num_lock; + void (*of_parse)(const struct device_node *, u32 *, u32 *); + void (*enable)(void __iomem *, unsigned); + void (*fixup)(void __iomem *, u32, struct outer_cache_fns *); + void (*save)(void __iomem *); + void (*configure)(void __iomem *); + void (*unlock)(void __iomem *, unsigned); + struct outer_cache_fns outer_cache; +}; + +#define CACHE_LINE_SIZE 32 + +static void __iomem *l2x0_base; +static const struct l2c_init_data *l2x0_data; +static DEFINE_RAW_SPINLOCK(l2x0_lock); +static u32 l2x0_way_mask; /* Bitmask of active ways */ +static u32 l2x0_size; +static unsigned long sync_reg_offset = L2X0_CACHE_SYNC; + +struct l2x0_regs l2x0_saved_regs; + +static bool l2x0_bresp_disable; +static bool l2x0_flz_disable; + +/* + * Common code for all cache controllers. + */ +static inline void l2c_wait_mask(void __iomem *reg, unsigned long mask) +{ + /* wait for cache operation by line or way to complete */ + while (readl_relaxed(reg) & mask) + cpu_relax(); +} + +/* + * By default, we write directly to secure registers. Platforms must + * override this if they are running non-secure. + */ +static void l2c_write_sec(unsigned long val, void __iomem *base, unsigned reg) +{ + if (val == readl_relaxed(base + reg)) + return; + if (outer_cache.write_sec) + outer_cache.write_sec(val, reg); + else + writel_relaxed(val, base + reg); +} + +/* + * This should only be called when we have a requirement that the + * register be written due to a work-around, as platforms running + * in non-secure mode may not be able to access this register. + */ +static inline void l2c_set_debug(void __iomem *base, unsigned long val) +{ + l2c_write_sec(val, base, L2X0_DEBUG_CTRL); +} + +static void __l2c_op_way(void __iomem *reg) +{ + writel_relaxed(l2x0_way_mask, reg); + l2c_wait_mask(reg, l2x0_way_mask); +} + +static inline void l2c_unlock(void __iomem *base, unsigned num) +{ + unsigned i; + + for (i = 0; i < num; i++) { + writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_D_BASE + + i * L2X0_LOCKDOWN_STRIDE); + writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_I_BASE + + i * L2X0_LOCKDOWN_STRIDE); + } +} + +static void l2c_configure(void __iomem *base) +{ + l2c_write_sec(l2x0_saved_regs.aux_ctrl, base, L2X0_AUX_CTRL); +} + +/* + * Enable the L2 cache controller. This function must only be + * called when the cache controller is known to be disabled. + */ +static void l2c_enable(void __iomem *base, unsigned num_lock) +{ + unsigned long flags; + + if (outer_cache.configure) + outer_cache.configure(&l2x0_saved_regs); + else + l2x0_data->configure(base); + + l2x0_data->unlock(base, num_lock); + + local_irq_save(flags); + __l2c_op_way(base + L2X0_INV_WAY); + writel_relaxed(0, base + sync_reg_offset); + l2c_wait_mask(base + sync_reg_offset, 1); + local_irq_restore(flags); + + l2c_write_sec(L2X0_CTRL_EN, base, L2X0_CTRL); +} + +static void l2c_disable(void) +{ + void __iomem *base = l2x0_base; + + l2x0_pmu_suspend(); + + outer_cache.flush_all(); + l2c_write_sec(0, base, L2X0_CTRL); + dsb(st); +} + +static void l2c_save(void __iomem *base) +{ + l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); +} + +static void l2c_resume(void) +{ + void __iomem *base = l2x0_base; + + /* Do not touch the controller if already enabled. */ + if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) + l2c_enable(base, l2x0_data->num_lock); + + l2x0_pmu_resume(); +} + +/* + * L2C-210 specific code. + * + * The L2C-2x0 PA, set/way and sync operations are atomic, but we must + * ensure that no background operation is running. The way operations + * are all background tasks. + * + * While a background operation is in progress, any new operation is + * ignored (unspecified whether this causes an error.) Thankfully, not + * used on SMP. + * + * Never has a different sync register other than L2X0_CACHE_SYNC, but + * we use sync_reg_offset here so we can share some of this with L2C-310. + */ +static void __l2c210_cache_sync(void __iomem *base) +{ + writel_relaxed(0, base + sync_reg_offset); +} + +static void __l2c210_op_pa_range(void __iomem *reg, unsigned long start, + unsigned long end) +{ + while (start < end) { + writel_relaxed(start, reg); + start += CACHE_LINE_SIZE; + } +} + +static void l2c210_inv_range(unsigned long start, unsigned long end) +{ + void __iomem *base = l2x0_base; + + if (start & (CACHE_LINE_SIZE - 1)) { + start &= ~(CACHE_LINE_SIZE - 1); + writel_relaxed(start, base + L2X0_CLEAN_INV_LINE_PA); + start += CACHE_LINE_SIZE; + } + + if (end & (CACHE_LINE_SIZE - 1)) { + end &= ~(CACHE_LINE_SIZE - 1); + writel_relaxed(end, base + L2X0_CLEAN_INV_LINE_PA); + } + + __l2c210_op_pa_range(base + L2X0_INV_LINE_PA, start, end); + __l2c210_cache_sync(base); +} + +static void l2c210_clean_range(unsigned long start, unsigned long end) +{ + void __iomem *base = l2x0_base; + + start &= ~(CACHE_LINE_SIZE - 1); + __l2c210_op_pa_range(base + L2X0_CLEAN_LINE_PA, start, end); + __l2c210_cache_sync(base); +} + +static void l2c210_flush_range(unsigned long start, unsigned long end) +{ + void __iomem *base = l2x0_base; + + start &= ~(CACHE_LINE_SIZE - 1); + __l2c210_op_pa_range(base + L2X0_CLEAN_INV_LINE_PA, start, end); + __l2c210_cache_sync(base); +} + +static void l2c210_flush_all(void) +{ + void __iomem *base = l2x0_base; + + BUG_ON(!irqs_disabled()); + + __l2c_op_way(base + L2X0_CLEAN_INV_WAY); + __l2c210_cache_sync(base); +} + +static void l2c210_sync(void) +{ + __l2c210_cache_sync(l2x0_base); +} + +static const struct l2c_init_data l2c210_data __initconst = { + .type = "L2C-210", + .way_size_0 = SZ_8K, + .num_lock = 1, + .enable = l2c_enable, + .save = l2c_save, + .configure = l2c_configure, + .unlock = l2c_unlock, + .outer_cache = { + .inv_range = l2c210_inv_range, + .clean_range = l2c210_clean_range, + .flush_range = l2c210_flush_range, + .flush_all = l2c210_flush_all, + .disable = l2c_disable, + .sync = l2c210_sync, + .resume = l2c_resume, + }, +}; + +/* + * L2C-220 specific code. + * + * All operations are background operations: they have to be waited for. + * Conflicting requests generate a slave error (which will cause an + * imprecise abort.) Never uses sync_reg_offset, so we hard-code the + * sync register here. + * + * However, we can re-use the l2c210_resume call. + */ +static inline void __l2c220_cache_sync(void __iomem *base) +{ + writel_relaxed(0, base + L2X0_CACHE_SYNC); + l2c_wait_mask(base + L2X0_CACHE_SYNC, 1); +} + +static void l2c220_op_way(void __iomem *base, unsigned reg) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&l2x0_lock, flags); + __l2c_op_way(base + reg); + __l2c220_cache_sync(base); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static unsigned long l2c220_op_pa_range(void __iomem *reg, unsigned long start, + unsigned long end, unsigned long flags) +{ + raw_spinlock_t *lock = &l2x0_lock; + + while (start < end) { + unsigned long blk_end = start + min(end - start, 4096UL); + + while (start < blk_end) { + l2c_wait_mask(reg, 1); + writel_relaxed(start, reg); + start += CACHE_LINE_SIZE; + } + + if (blk_end < end) { + raw_spin_unlock_irqrestore(lock, flags); + raw_spin_lock_irqsave(lock, flags); + } + } + + return flags; +} + +static void l2c220_inv_range(unsigned long start, unsigned long end) +{ + void __iomem *base = l2x0_base; + unsigned long flags; + + raw_spin_lock_irqsave(&l2x0_lock, flags); + if ((start | end) & (CACHE_LINE_SIZE - 1)) { + if (start & (CACHE_LINE_SIZE - 1)) { + start &= ~(CACHE_LINE_SIZE - 1); + writel_relaxed(start, base + L2X0_CLEAN_INV_LINE_PA); + start += CACHE_LINE_SIZE; + } + + if (end & (CACHE_LINE_SIZE - 1)) { + end &= ~(CACHE_LINE_SIZE - 1); + l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1); + writel_relaxed(end, base + L2X0_CLEAN_INV_LINE_PA); + } + } + + flags = l2c220_op_pa_range(base + L2X0_INV_LINE_PA, + start, end, flags); + l2c_wait_mask(base + L2X0_INV_LINE_PA, 1); + __l2c220_cache_sync(base); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void l2c220_clean_range(unsigned long start, unsigned long end) +{ + void __iomem *base = l2x0_base; + unsigned long flags; + + start &= ~(CACHE_LINE_SIZE - 1); + if ((end - start) >= l2x0_size) { + l2c220_op_way(base, L2X0_CLEAN_WAY); + return; + } + + raw_spin_lock_irqsave(&l2x0_lock, flags); + flags = l2c220_op_pa_range(base + L2X0_CLEAN_LINE_PA, + start, end, flags); + l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1); + __l2c220_cache_sync(base); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void l2c220_flush_range(unsigned long start, unsigned long end) +{ + void __iomem *base = l2x0_base; + unsigned long flags; + + start &= ~(CACHE_LINE_SIZE - 1); + if ((end - start) >= l2x0_size) { + l2c220_op_way(base, L2X0_CLEAN_INV_WAY); + return; + } + + raw_spin_lock_irqsave(&l2x0_lock, flags); + flags = l2c220_op_pa_range(base + L2X0_CLEAN_INV_LINE_PA, + start, end, flags); + l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1); + __l2c220_cache_sync(base); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void l2c220_flush_all(void) +{ + l2c220_op_way(l2x0_base, L2X0_CLEAN_INV_WAY); +} + +static void l2c220_sync(void) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&l2x0_lock, flags); + __l2c220_cache_sync(l2x0_base); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void l2c220_enable(void __iomem *base, unsigned num_lock) +{ + /* + * Always enable non-secure access to the lockdown registers - + * we write to them as part of the L2C enable sequence so they + * need to be accessible. + */ + l2x0_saved_regs.aux_ctrl |= L220_AUX_CTRL_NS_LOCKDOWN; + + l2c_enable(base, num_lock); +} + +static void l2c220_unlock(void __iomem *base, unsigned num_lock) +{ + if (readl_relaxed(base + L2X0_AUX_CTRL) & L220_AUX_CTRL_NS_LOCKDOWN) + l2c_unlock(base, num_lock); +} + +static const struct l2c_init_data l2c220_data = { + .type = "L2C-220", + .way_size_0 = SZ_8K, + .num_lock = 1, + .enable = l2c220_enable, + .save = l2c_save, + .configure = l2c_configure, + .unlock = l2c220_unlock, + .outer_cache = { + .inv_range = l2c220_inv_range, + .clean_range = l2c220_clean_range, + .flush_range = l2c220_flush_range, + .flush_all = l2c220_flush_all, + .disable = l2c_disable, + .sync = l2c220_sync, + .resume = l2c_resume, + }, +}; + +/* + * L2C-310 specific code. + * + * Very similar to L2C-210, the PA, set/way and sync operations are atomic, + * and the way operations are all background tasks. However, issuing an + * operation while a background operation is in progress results in a + * SLVERR response. We can reuse: + * + * __l2c210_cache_sync (using sync_reg_offset) + * l2c210_sync + * l2c210_inv_range (if 588369 is not applicable) + * l2c210_clean_range + * l2c210_flush_range (if 588369 is not applicable) + * l2c210_flush_all (if 727915 is not applicable) + * + * Errata: + * 588369: PL310 R0P0->R1P0, fixed R2P0. + * Affects: all clean+invalidate operations + * clean and invalidate skips the invalidate step, so we need to issue + * separate operations. We also require the above debug workaround + * enclosing this code fragment on affected parts. On unaffected parts, + * we must not use this workaround without the debug register writes + * to avoid exposing a problem similar to 727915. + * + * 727915: PL310 R2P0->R3P0, fixed R3P1. + * Affects: clean+invalidate by way + * clean and invalidate by way runs in the background, and a store can + * hit the line between the clean operation and invalidate operation, + * resulting in the store being lost. + * + * 752271: PL310 R3P0->R3P1-50REL0, fixed R3P2. + * Affects: 8x64-bit (double fill) line fetches + * double fill line fetches can fail to cause dirty data to be evicted + * from the cache before the new data overwrites the second line. + * + * 753970: PL310 R3P0, fixed R3P1. + * Affects: sync + * prevents merging writes after the sync operation, until another L2C + * operation is performed (or a number of other conditions.) + * + * 769419: PL310 R0P0->R3P1, fixed R3P2. + * Affects: store buffer + * store buffer is not automatically drained. + */ +static void l2c310_inv_range_erratum(unsigned long start, unsigned long end) +{ + void __iomem *base = l2x0_base; + + if ((start | end) & (CACHE_LINE_SIZE - 1)) { + unsigned long flags; + + /* Erratum 588369 for both clean+invalidate operations */ + raw_spin_lock_irqsave(&l2x0_lock, flags); + l2c_set_debug(base, 0x03); + + if (start & (CACHE_LINE_SIZE - 1)) { + start &= ~(CACHE_LINE_SIZE - 1); + writel_relaxed(start, base + L2X0_CLEAN_LINE_PA); + writel_relaxed(start, base + L2X0_INV_LINE_PA); + start += CACHE_LINE_SIZE; + } + + if (end & (CACHE_LINE_SIZE - 1)) { + end &= ~(CACHE_LINE_SIZE - 1); + writel_relaxed(end, base + L2X0_CLEAN_LINE_PA); + writel_relaxed(end, base + L2X0_INV_LINE_PA); + } + + l2c_set_debug(base, 0x00); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); + } + + __l2c210_op_pa_range(base + L2X0_INV_LINE_PA, start, end); + __l2c210_cache_sync(base); +} + +static void l2c310_flush_range_erratum(unsigned long start, unsigned long end) +{ + raw_spinlock_t *lock = &l2x0_lock; + unsigned long flags; + void __iomem *base = l2x0_base; + + raw_spin_lock_irqsave(lock, flags); + while (start < end) { + unsigned long blk_end = start + min(end - start, 4096UL); + + l2c_set_debug(base, 0x03); + while (start < blk_end) { + writel_relaxed(start, base + L2X0_CLEAN_LINE_PA); + writel_relaxed(start, base + L2X0_INV_LINE_PA); + start += CACHE_LINE_SIZE; + } + l2c_set_debug(base, 0x00); + + if (blk_end < end) { + raw_spin_unlock_irqrestore(lock, flags); + raw_spin_lock_irqsave(lock, flags); + } + } + raw_spin_unlock_irqrestore(lock, flags); + __l2c210_cache_sync(base); +} + +static void l2c310_flush_all_erratum(void) +{ + void __iomem *base = l2x0_base; + unsigned long flags; + + raw_spin_lock_irqsave(&l2x0_lock, flags); + l2c_set_debug(base, 0x03); + __l2c_op_way(base + L2X0_CLEAN_INV_WAY); + l2c_set_debug(base, 0x00); + __l2c210_cache_sync(base); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void __init l2c310_save(void __iomem *base) +{ + unsigned revision; + + l2c_save(base); + + l2x0_saved_regs.tag_latency = readl_relaxed(base + + L310_TAG_LATENCY_CTRL); + l2x0_saved_regs.data_latency = readl_relaxed(base + + L310_DATA_LATENCY_CTRL); + l2x0_saved_regs.filter_end = readl_relaxed(base + + L310_ADDR_FILTER_END); + l2x0_saved_regs.filter_start = readl_relaxed(base + + L310_ADDR_FILTER_START); + + revision = readl_relaxed(base + L2X0_CACHE_ID) & + L2X0_CACHE_ID_RTL_MASK; + + /* From r2p0, there is Prefetch offset/control register */ + if (revision >= L310_CACHE_ID_RTL_R2P0) + l2x0_saved_regs.prefetch_ctrl = readl_relaxed(base + + L310_PREFETCH_CTRL); + + /* From r3p0, there is Power control register */ + if (revision >= L310_CACHE_ID_RTL_R3P0) + l2x0_saved_regs.pwr_ctrl = readl_relaxed(base + + L310_POWER_CTRL); +} + +static void l2c310_configure(void __iomem *base) +{ + unsigned revision; + + l2c_configure(base); + + /* restore pl310 setup */ + l2c_write_sec(l2x0_saved_regs.tag_latency, base, + L310_TAG_LATENCY_CTRL); + l2c_write_sec(l2x0_saved_regs.data_latency, base, + L310_DATA_LATENCY_CTRL); + l2c_write_sec(l2x0_saved_regs.filter_end, base, + L310_ADDR_FILTER_END); + l2c_write_sec(l2x0_saved_regs.filter_start, base, + L310_ADDR_FILTER_START); + + revision = readl_relaxed(base + L2X0_CACHE_ID) & + L2X0_CACHE_ID_RTL_MASK; + + if (revision >= L310_CACHE_ID_RTL_R2P0) + l2c_write_sec(l2x0_saved_regs.prefetch_ctrl, base, + L310_PREFETCH_CTRL); + if (revision >= L310_CACHE_ID_RTL_R3P0) + l2c_write_sec(l2x0_saved_regs.pwr_ctrl, base, + L310_POWER_CTRL); +} + +static int l2c310_starting_cpu(unsigned int cpu) +{ + set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1)); + return 0; +} + +static int l2c310_dying_cpu(unsigned int cpu) +{ + set_auxcr(get_auxcr() & ~(BIT(3) | BIT(2) | BIT(1))); + return 0; +} + +static void __init l2c310_enable(void __iomem *base, unsigned num_lock) +{ + unsigned rev = readl_relaxed(base + L2X0_CACHE_ID) & L2X0_CACHE_ID_RTL_MASK; + bool cortex_a9 = read_cpuid_part() == ARM_CPU_PART_CORTEX_A9; + u32 aux = l2x0_saved_regs.aux_ctrl; + + if (rev >= L310_CACHE_ID_RTL_R2P0) { + if (cortex_a9 && !l2x0_bresp_disable) { + aux |= L310_AUX_CTRL_EARLY_BRESP; + pr_info("L2C-310 enabling early BRESP for Cortex-A9\n"); + } else if (aux & L310_AUX_CTRL_EARLY_BRESP) { + pr_warn("L2C-310 early BRESP only supported with Cortex-A9\n"); + aux &= ~L310_AUX_CTRL_EARLY_BRESP; + } + } + + if (cortex_a9 && !l2x0_flz_disable) { + u32 aux_cur = readl_relaxed(base + L2X0_AUX_CTRL); + u32 acr = get_auxcr(); + + pr_debug("Cortex-A9 ACR=0x%08x\n", acr); + + if (acr & BIT(3) && !(aux_cur & L310_AUX_CTRL_FULL_LINE_ZERO)) + pr_err("L2C-310: full line of zeros enabled in Cortex-A9 but not L2C-310 - invalid\n"); + + if (aux & L310_AUX_CTRL_FULL_LINE_ZERO && !(acr & BIT(3))) + pr_err("L2C-310: enabling full line of zeros but not enabled in Cortex-A9\n"); + + if (!(aux & L310_AUX_CTRL_FULL_LINE_ZERO) && !outer_cache.write_sec) { + aux |= L310_AUX_CTRL_FULL_LINE_ZERO; + pr_info("L2C-310 full line of zeros enabled for Cortex-A9\n"); + } + } else if (aux & (L310_AUX_CTRL_FULL_LINE_ZERO | L310_AUX_CTRL_EARLY_BRESP)) { + pr_err("L2C-310: disabling Cortex-A9 specific feature bits\n"); + aux &= ~(L310_AUX_CTRL_FULL_LINE_ZERO | L310_AUX_CTRL_EARLY_BRESP); + } + + /* + * Always enable non-secure access to the lockdown registers - + * we write to them as part of the L2C enable sequence so they + * need to be accessible. + */ + l2x0_saved_regs.aux_ctrl = aux | L310_AUX_CTRL_NS_LOCKDOWN; + + l2c_enable(base, num_lock); + + /* Read back resulting AUX_CTRL value as it could have been altered. */ + aux = readl_relaxed(base + L2X0_AUX_CTRL); + + if (aux & (L310_AUX_CTRL_DATA_PREFETCH | L310_AUX_CTRL_INSTR_PREFETCH)) { + u32 prefetch = readl_relaxed(base + L310_PREFETCH_CTRL); + + pr_info("L2C-310 %s%s prefetch enabled, offset %u lines\n", + aux & L310_AUX_CTRL_INSTR_PREFETCH ? "I" : "", + aux & L310_AUX_CTRL_DATA_PREFETCH ? "D" : "", + 1 + (prefetch & L310_PREFETCH_CTRL_OFFSET_MASK)); + } + + /* r3p0 or later has power control register */ + if (rev >= L310_CACHE_ID_RTL_R3P0) { + u32 power_ctrl; + + power_ctrl = readl_relaxed(base + L310_POWER_CTRL); + pr_info("L2C-310 dynamic clock gating %sabled, standby mode %sabled\n", + power_ctrl & L310_DYNAMIC_CLK_GATING_EN ? "en" : "dis", + power_ctrl & L310_STNDBY_MODE_EN ? "en" : "dis"); + } + + if (aux & L310_AUX_CTRL_FULL_LINE_ZERO) + cpuhp_setup_state(CPUHP_AP_ARM_L2X0_STARTING, + "arm/l2x0:starting", l2c310_starting_cpu, + l2c310_dying_cpu); +} + +static void __init l2c310_fixup(void __iomem *base, u32 cache_id, + struct outer_cache_fns *fns) +{ + unsigned revision = cache_id & L2X0_CACHE_ID_RTL_MASK; + const char *errata[8]; + unsigned n = 0; + + if (IS_ENABLED(CONFIG_PL310_ERRATA_588369) && + revision < L310_CACHE_ID_RTL_R2P0 && + /* For bcm compatibility */ + fns->inv_range == l2c210_inv_range) { + fns->inv_range = l2c310_inv_range_erratum; + fns->flush_range = l2c310_flush_range_erratum; + errata[n++] = "588369"; + } + + if (IS_ENABLED(CONFIG_PL310_ERRATA_727915) && + revision >= L310_CACHE_ID_RTL_R2P0 && + revision < L310_CACHE_ID_RTL_R3P1) { + fns->flush_all = l2c310_flush_all_erratum; + errata[n++] = "727915"; + } + + if (revision >= L310_CACHE_ID_RTL_R3P0 && + revision < L310_CACHE_ID_RTL_R3P2) { + u32 val = l2x0_saved_regs.prefetch_ctrl; + if (val & L310_PREFETCH_CTRL_DBL_LINEFILL) { + val &= ~L310_PREFETCH_CTRL_DBL_LINEFILL; + l2x0_saved_regs.prefetch_ctrl = val; + errata[n++] = "752271"; + } + } + + if (IS_ENABLED(CONFIG_PL310_ERRATA_753970) && + revision == L310_CACHE_ID_RTL_R3P0) { + sync_reg_offset = L2X0_DUMMY_REG; + errata[n++] = "753970"; + } + + if (IS_ENABLED(CONFIG_PL310_ERRATA_769419)) + errata[n++] = "769419"; + + if (n) { + unsigned i; + + pr_info("L2C-310 errat%s", n > 1 ? "a" : "um"); + for (i = 0; i < n; i++) + pr_cont(" %s", errata[i]); + pr_cont(" enabled\n"); + } +} + +static void l2c310_disable(void) +{ + /* + * If full-line-of-zeros is enabled, we must first disable it in the + * Cortex-A9 auxiliary control register before disabling the L2 cache. + */ + if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO) + set_auxcr(get_auxcr() & ~(BIT(3) | BIT(2) | BIT(1))); + + l2c_disable(); +} + +static void l2c310_resume(void) +{ + l2c_resume(); + + /* Re-enable full-line-of-zeros for Cortex-A9 */ + if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO) + set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1)); +} + +static void l2c310_unlock(void __iomem *base, unsigned num_lock) +{ + if (readl_relaxed(base + L2X0_AUX_CTRL) & L310_AUX_CTRL_NS_LOCKDOWN) + l2c_unlock(base, num_lock); +} + +static const struct l2c_init_data l2c310_init_fns __initconst = { + .type = "L2C-310", + .way_size_0 = SZ_8K, + .num_lock = 8, + .enable = l2c310_enable, + .fixup = l2c310_fixup, + .save = l2c310_save, + .configure = l2c310_configure, + .unlock = l2c310_unlock, + .outer_cache = { + .inv_range = l2c210_inv_range, + .clean_range = l2c210_clean_range, + .flush_range = l2c210_flush_range, + .flush_all = l2c210_flush_all, + .disable = l2c310_disable, + .sync = l2c210_sync, + .resume = l2c310_resume, + }, +}; + +static int __init __l2c_init(const struct l2c_init_data *data, + u32 aux_val, u32 aux_mask, u32 cache_id, bool nosync) +{ + struct outer_cache_fns fns; + unsigned way_size_bits, ways; + u32 aux, old_aux; + + /* + * Save the pointer globally so that callbacks which do not receive + * context from callers can access the structure. + */ + l2x0_data = kmemdup(data, sizeof(*data), GFP_KERNEL); + if (!l2x0_data) + return -ENOMEM; + + /* + * Sanity check the aux values. aux_mask is the bits we preserve + * from reading the hardware register, and aux_val is the bits we + * set. + */ + if (aux_val & aux_mask) + pr_alert("L2C: platform provided aux values permit register corruption.\n"); + + old_aux = aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); + aux &= aux_mask; + aux |= aux_val; + + if (old_aux != aux) + pr_warn("L2C: DT/platform modifies aux control register: 0x%08x -> 0x%08x\n", + old_aux, aux); + + /* Determine the number of ways */ + switch (cache_id & L2X0_CACHE_ID_PART_MASK) { + case L2X0_CACHE_ID_PART_L310: + if ((aux_val | ~aux_mask) & (L2C_AUX_CTRL_WAY_SIZE_MASK | L310_AUX_CTRL_ASSOCIATIVITY_16)) + pr_warn("L2C: DT/platform tries to modify or specify cache size\n"); + if (aux & (1 << 16)) + ways = 16; + else + ways = 8; + break; + + case L2X0_CACHE_ID_PART_L210: + case L2X0_CACHE_ID_PART_L220: + ways = (aux >> 13) & 0xf; + break; + + case AURORA_CACHE_ID: + ways = (aux >> 13) & 0xf; + ways = 2 << ((ways + 1) >> 2); + break; + + default: + /* Assume unknown chips have 8 ways */ + ways = 8; + break; + } + + l2x0_way_mask = (1 << ways) - 1; + + /* + * way_size_0 is the size that a way_size value of zero would be + * given the calculation: way_size = way_size_0 << way_size_bits. + * So, if way_size_bits=0 is reserved, but way_size_bits=1 is 16k, + * then way_size_0 would be 8k. + * + * L2 cache size = number of ways * way size. + */ + way_size_bits = (aux & L2C_AUX_CTRL_WAY_SIZE_MASK) >> + L2C_AUX_CTRL_WAY_SIZE_SHIFT; + l2x0_size = ways * (data->way_size_0 << way_size_bits); + + fns = data->outer_cache; + fns.write_sec = outer_cache.write_sec; + fns.configure = outer_cache.configure; + if (data->fixup) + data->fixup(l2x0_base, cache_id, &fns); + if (nosync) { + pr_info("L2C: disabling outer sync\n"); + fns.sync = NULL; + } + + /* + * Check if l2x0 controller is already enabled. If we are booting + * in non-secure mode accessing the below registers will fault. + */ + if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) { + l2x0_saved_regs.aux_ctrl = aux; + + data->enable(l2x0_base, data->num_lock); + } + + outer_cache = fns; + + /* + * It is strange to save the register state before initialisation, + * but hey, this is what the DT implementations decided to do. + */ + if (data->save) + data->save(l2x0_base); + + /* Re-read it in case some bits are reserved. */ + aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); + + pr_info("%s cache controller enabled, %d ways, %d kB\n", + data->type, ways, l2x0_size >> 10); + pr_info("%s: CACHE_ID 0x%08x, AUX_CTRL 0x%08x\n", + data->type, cache_id, aux); + + l2x0_pmu_register(l2x0_base, cache_id); + + return 0; +} + +void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask) +{ + const struct l2c_init_data *data; + u32 cache_id; + + l2x0_base = base; + + cache_id = readl_relaxed(base + L2X0_CACHE_ID); + + switch (cache_id & L2X0_CACHE_ID_PART_MASK) { + default: + case L2X0_CACHE_ID_PART_L210: + data = &l2c210_data; + break; + + case L2X0_CACHE_ID_PART_L220: + data = &l2c220_data; + break; + + case L2X0_CACHE_ID_PART_L310: + data = &l2c310_init_fns; + break; + } + + /* Read back current (default) hardware configuration */ + if (data->save) + data->save(l2x0_base); + + __l2c_init(data, aux_val, aux_mask, cache_id, false); +} + +#ifdef CONFIG_OF +static int l2_wt_override; + +/* Aurora don't have the cache ID register available, so we have to + * pass it though the device tree */ +static u32 cache_id_part_number_from_dt; + +/** + * l2x0_cache_size_of_parse() - read cache size parameters from DT + * @np: the device tree node for the l2 cache + * @aux_val: pointer to machine-supplied auxilary register value, to + * be augmented by the call (bits to be set to 1) + * @aux_mask: pointer to machine-supplied auxilary register mask, to + * be augmented by the call (bits to be set to 0) + * @associativity: variable to return the calculated associativity in + * @max_way_size: the maximum size in bytes for the cache ways + */ +static int __init l2x0_cache_size_of_parse(const struct device_node *np, + u32 *aux_val, u32 *aux_mask, + u32 *associativity, + u32 max_way_size) +{ + u32 mask = 0, val = 0; + u32 cache_size = 0, sets = 0; + u32 way_size_bits = 1; + u32 way_size = 0; + u32 block_size = 0; + u32 line_size = 0; + + of_property_read_u32(np, "cache-size", &cache_size); + of_property_read_u32(np, "cache-sets", &sets); + of_property_read_u32(np, "cache-block-size", &block_size); + of_property_read_u32(np, "cache-line-size", &line_size); + + if (!cache_size || !sets) + return -ENODEV; + + /* All these l2 caches have the same line = block size actually */ + if (!line_size) { + if (block_size) { + /* If linesize is not given, it is equal to blocksize */ + line_size = block_size; + } else { + /* Fall back to known size */ + pr_warn("L2C OF: no cache block/line size given: " + "falling back to default size %d bytes\n", + CACHE_LINE_SIZE); + line_size = CACHE_LINE_SIZE; + } + } + + if (line_size != CACHE_LINE_SIZE) + pr_warn("L2C OF: DT supplied line size %d bytes does " + "not match hardware line size of %d bytes\n", + line_size, + CACHE_LINE_SIZE); + + /* + * Since: + * set size = cache size / sets + * ways = cache size / (sets * line size) + * way size = cache size / (cache size / (sets * line size)) + * way size = sets * line size + * associativity = ways = cache size / way size + */ + way_size = sets * line_size; + *associativity = cache_size / way_size; + + if (way_size > max_way_size) { + pr_err("L2C OF: set size %dKB is too large\n", way_size); + return -EINVAL; + } + + pr_info("L2C OF: override cache size: %d bytes (%dKB)\n", + cache_size, cache_size >> 10); + pr_info("L2C OF: override line size: %d bytes\n", line_size); + pr_info("L2C OF: override way size: %d bytes (%dKB)\n", + way_size, way_size >> 10); + pr_info("L2C OF: override associativity: %d\n", *associativity); + + /* + * Calculates the bits 17:19 to set for way size: + * 512KB -> 6, 256KB -> 5, ... 16KB -> 1 + */ + way_size_bits = ilog2(way_size >> 10) - 3; + if (way_size_bits < 1 || way_size_bits > 6) { + pr_err("L2C OF: cache way size illegal: %dKB is not mapped\n", + way_size); + return -EINVAL; + } + + mask |= L2C_AUX_CTRL_WAY_SIZE_MASK; + val |= (way_size_bits << L2C_AUX_CTRL_WAY_SIZE_SHIFT); + + *aux_val &= ~mask; + *aux_val |= val; + *aux_mask &= ~mask; + + return 0; +} + +static void __init l2x0_of_parse(const struct device_node *np, + u32 *aux_val, u32 *aux_mask) +{ + u32 data[2] = { 0, 0 }; + u32 tag = 0; + u32 dirty = 0; + u32 val = 0, mask = 0; + u32 assoc; + int ret; + + of_property_read_u32(np, "arm,tag-latency", &tag); + if (tag) { + mask |= L2X0_AUX_CTRL_TAG_LATENCY_MASK; + val |= (tag - 1) << L2X0_AUX_CTRL_TAG_LATENCY_SHIFT; + } + + of_property_read_u32_array(np, "arm,data-latency", + data, ARRAY_SIZE(data)); + if (data[0] && data[1]) { + mask |= L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK | + L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK; + val |= ((data[0] - 1) << L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT) | + ((data[1] - 1) << L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT); + } + + of_property_read_u32(np, "arm,dirty-latency", &dirty); + if (dirty) { + mask |= L2X0_AUX_CTRL_DIRTY_LATENCY_MASK; + val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT; + } + + if (of_property_read_bool(np, "arm,parity-enable")) { + mask &= ~L2C_AUX_CTRL_PARITY_ENABLE; + val |= L2C_AUX_CTRL_PARITY_ENABLE; + } else if (of_property_read_bool(np, "arm,parity-disable")) { + mask &= ~L2C_AUX_CTRL_PARITY_ENABLE; + } + + if (of_property_read_bool(np, "arm,shared-override")) { + mask &= ~L2C_AUX_CTRL_SHARED_OVERRIDE; + val |= L2C_AUX_CTRL_SHARED_OVERRIDE; + } + + ret = l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_256K); + if (ret) + return; + + if (assoc > 8) { + pr_err("l2x0 of: cache setting yield too high associativity\n"); + pr_err("l2x0 of: %d calculated, max 8\n", assoc); + } else { + mask |= L2X0_AUX_CTRL_ASSOC_MASK; + val |= (assoc << L2X0_AUX_CTRL_ASSOC_SHIFT); + } + + *aux_val &= ~mask; + *aux_val |= val; + *aux_mask &= ~mask; +} + +static const struct l2c_init_data of_l2c210_data __initconst = { + .type = "L2C-210", + .way_size_0 = SZ_8K, + .num_lock = 1, + .of_parse = l2x0_of_parse, + .enable = l2c_enable, + .save = l2c_save, + .configure = l2c_configure, + .unlock = l2c_unlock, + .outer_cache = { + .inv_range = l2c210_inv_range, + .clean_range = l2c210_clean_range, + .flush_range = l2c210_flush_range, + .flush_all = l2c210_flush_all, + .disable = l2c_disable, + .sync = l2c210_sync, + .resume = l2c_resume, + }, +}; + +static const struct l2c_init_data of_l2c220_data __initconst = { + .type = "L2C-220", + .way_size_0 = SZ_8K, + .num_lock = 1, + .of_parse = l2x0_of_parse, + .enable = l2c220_enable, + .save = l2c_save, + .configure = l2c_configure, + .unlock = l2c220_unlock, + .outer_cache = { + .inv_range = l2c220_inv_range, + .clean_range = l2c220_clean_range, + .flush_range = l2c220_flush_range, + .flush_all = l2c220_flush_all, + .disable = l2c_disable, + .sync = l2c220_sync, + .resume = l2c_resume, + }, +}; + +static void __init l2c310_of_parse(const struct device_node *np, + u32 *aux_val, u32 *aux_mask) +{ + u32 data[3] = { 0, 0, 0 }; + u32 tag[3] = { 0, 0, 0 }; + u32 filter[2] = { 0, 0 }; + u32 assoc; + u32 prefetch; + u32 power; + u32 val; + int ret; + + of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag)); + if (tag[0] && tag[1] && tag[2]) + l2x0_saved_regs.tag_latency = + L310_LATENCY_CTRL_RD(tag[0] - 1) | + L310_LATENCY_CTRL_WR(tag[1] - 1) | + L310_LATENCY_CTRL_SETUP(tag[2] - 1); + + of_property_read_u32_array(np, "arm,data-latency", + data, ARRAY_SIZE(data)); + if (data[0] && data[1] && data[2]) + l2x0_saved_regs.data_latency = + L310_LATENCY_CTRL_RD(data[0] - 1) | + L310_LATENCY_CTRL_WR(data[1] - 1) | + L310_LATENCY_CTRL_SETUP(data[2] - 1); + + of_property_read_u32_array(np, "arm,filter-ranges", + filter, ARRAY_SIZE(filter)); + if (filter[1]) { + l2x0_saved_regs.filter_end = + ALIGN(filter[0] + filter[1], SZ_1M); + l2x0_saved_regs.filter_start = (filter[0] & ~(SZ_1M - 1)) + | L310_ADDR_FILTER_EN; + } + + ret = l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_512K); + if (!ret) { + switch (assoc) { + case 16: + *aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK; + *aux_val |= L310_AUX_CTRL_ASSOCIATIVITY_16; + *aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK; + break; + case 8: + *aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK; + *aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK; + break; + default: + pr_err("L2C-310 OF cache associativity %d invalid, only 8 or 16 permitted\n", + assoc); + break; + } + } + + if (of_property_read_bool(np, "arm,shared-override")) { + *aux_val |= L2C_AUX_CTRL_SHARED_OVERRIDE; + *aux_mask &= ~L2C_AUX_CTRL_SHARED_OVERRIDE; + } + + if (of_property_read_bool(np, "arm,parity-enable")) { + *aux_val |= L2C_AUX_CTRL_PARITY_ENABLE; + *aux_mask &= ~L2C_AUX_CTRL_PARITY_ENABLE; + } else if (of_property_read_bool(np, "arm,parity-disable")) { + *aux_val &= ~L2C_AUX_CTRL_PARITY_ENABLE; + *aux_mask &= ~L2C_AUX_CTRL_PARITY_ENABLE; + } + + if (of_property_read_bool(np, "arm,early-bresp-disable")) + l2x0_bresp_disable = true; + + if (of_property_read_bool(np, "arm,full-line-zero-disable")) + l2x0_flz_disable = true; + + prefetch = l2x0_saved_regs.prefetch_ctrl; + + ret = of_property_read_u32(np, "arm,double-linefill", &val); + if (ret == 0) { + if (val) + prefetch |= L310_PREFETCH_CTRL_DBL_LINEFILL; + else + prefetch &= ~L310_PREFETCH_CTRL_DBL_LINEFILL; + } else if (ret != -EINVAL) { + pr_err("L2C-310 OF arm,double-linefill property value is missing\n"); + } + + ret = of_property_read_u32(np, "arm,double-linefill-incr", &val); + if (ret == 0) { + if (val) + prefetch |= L310_PREFETCH_CTRL_DBL_LINEFILL_INCR; + else + prefetch &= ~L310_PREFETCH_CTRL_DBL_LINEFILL_INCR; + } else if (ret != -EINVAL) { + pr_err("L2C-310 OF arm,double-linefill-incr property value is missing\n"); + } + + ret = of_property_read_u32(np, "arm,double-linefill-wrap", &val); + if (ret == 0) { + if (!val) + prefetch |= L310_PREFETCH_CTRL_DBL_LINEFILL_WRAP; + else + prefetch &= ~L310_PREFETCH_CTRL_DBL_LINEFILL_WRAP; + } else if (ret != -EINVAL) { + pr_err("L2C-310 OF arm,double-linefill-wrap property value is missing\n"); + } + + ret = of_property_read_u32(np, "arm,prefetch-drop", &val); + if (ret == 0) { + if (val) + prefetch |= L310_PREFETCH_CTRL_PREFETCH_DROP; + else + prefetch &= ~L310_PREFETCH_CTRL_PREFETCH_DROP; + } else if (ret != -EINVAL) { + pr_err("L2C-310 OF arm,prefetch-drop property value is missing\n"); + } + + ret = of_property_read_u32(np, "arm,prefetch-offset", &val); + if (ret == 0) { + prefetch &= ~L310_PREFETCH_CTRL_OFFSET_MASK; + prefetch |= val & L310_PREFETCH_CTRL_OFFSET_MASK; + } else if (ret != -EINVAL) { + pr_err("L2C-310 OF arm,prefetch-offset property value is missing\n"); + } + + ret = of_property_read_u32(np, "prefetch-data", &val); + if (ret == 0) { + if (val) { + prefetch |= L310_PREFETCH_CTRL_DATA_PREFETCH; + *aux_val |= L310_PREFETCH_CTRL_DATA_PREFETCH; + } else { + prefetch &= ~L310_PREFETCH_CTRL_DATA_PREFETCH; + *aux_val &= ~L310_PREFETCH_CTRL_DATA_PREFETCH; + } + *aux_mask &= ~L310_PREFETCH_CTRL_DATA_PREFETCH; + } else if (ret != -EINVAL) { + pr_err("L2C-310 OF prefetch-data property value is missing\n"); + } + + ret = of_property_read_u32(np, "prefetch-instr", &val); + if (ret == 0) { + if (val) { + prefetch |= L310_PREFETCH_CTRL_INSTR_PREFETCH; + *aux_val |= L310_PREFETCH_CTRL_INSTR_PREFETCH; + } else { + prefetch &= ~L310_PREFETCH_CTRL_INSTR_PREFETCH; + *aux_val &= ~L310_PREFETCH_CTRL_INSTR_PREFETCH; + } + *aux_mask &= ~L310_PREFETCH_CTRL_INSTR_PREFETCH; + } else if (ret != -EINVAL) { + pr_err("L2C-310 OF prefetch-instr property value is missing\n"); + } + + l2x0_saved_regs.prefetch_ctrl = prefetch; + + power = l2x0_saved_regs.pwr_ctrl | + L310_DYNAMIC_CLK_GATING_EN | L310_STNDBY_MODE_EN; + + ret = of_property_read_u32(np, "arm,dynamic-clock-gating", &val); + if (!ret) { + if (!val) + power &= ~L310_DYNAMIC_CLK_GATING_EN; + } else if (ret != -EINVAL) { + pr_err("L2C-310 OF dynamic-clock-gating property value is missing or invalid\n"); + } + ret = of_property_read_u32(np, "arm,standby-mode", &val); + if (!ret) { + if (!val) + power &= ~L310_STNDBY_MODE_EN; + } else if (ret != -EINVAL) { + pr_err("L2C-310 OF standby-mode property value is missing or invalid\n"); + } + + l2x0_saved_regs.pwr_ctrl = power; +} + +static const struct l2c_init_data of_l2c310_data __initconst = { + .type = "L2C-310", + .way_size_0 = SZ_8K, + .num_lock = 8, + .of_parse = l2c310_of_parse, + .enable = l2c310_enable, + .fixup = l2c310_fixup, + .save = l2c310_save, + .configure = l2c310_configure, + .unlock = l2c310_unlock, + .outer_cache = { + .inv_range = l2c210_inv_range, + .clean_range = l2c210_clean_range, + .flush_range = l2c210_flush_range, + .flush_all = l2c210_flush_all, + .disable = l2c310_disable, + .sync = l2c210_sync, + .resume = l2c310_resume, + }, +}; + +/* + * This is a variant of the of_l2c310_data with .sync set to + * NULL. Outer sync operations are not needed when the system is I/O + * coherent, and potentially harmful in certain situations (PCIe/PL310 + * deadlock on Armada 375/38x due to hardware I/O coherency). The + * other operations are kept because they are infrequent (therefore do + * not cause the deadlock in practice) and needed for secondary CPU + * boot and other power management activities. + */ +static const struct l2c_init_data of_l2c310_coherent_data __initconst = { + .type = "L2C-310 Coherent", + .way_size_0 = SZ_8K, + .num_lock = 8, + .of_parse = l2c310_of_parse, + .enable = l2c310_enable, + .fixup = l2c310_fixup, + .save = l2c310_save, + .configure = l2c310_configure, + .unlock = l2c310_unlock, + .outer_cache = { + .inv_range = l2c210_inv_range, + .clean_range = l2c210_clean_range, + .flush_range = l2c210_flush_range, + .flush_all = l2c210_flush_all, + .disable = l2c310_disable, + .resume = l2c310_resume, + }, +}; + +/* + * Note that the end addresses passed to Linux primitives are + * noninclusive, while the hardware cache range operations use + * inclusive start and end addresses. + */ +static unsigned long aurora_range_end(unsigned long start, unsigned long end) +{ + /* + * Limit the number of cache lines processed at once, + * since cache range operations stall the CPU pipeline + * until completion. + */ + if (end > start + AURORA_MAX_RANGE_SIZE) + end = start + AURORA_MAX_RANGE_SIZE; + + /* + * Cache range operations can't straddle a page boundary. + */ + if (end > PAGE_ALIGN(start+1)) + end = PAGE_ALIGN(start+1); + + return end; +} + +static void aurora_pa_range(unsigned long start, unsigned long end, + unsigned long offset) +{ + void __iomem *base = l2x0_base; + unsigned long range_end; + unsigned long flags; + + /* + * round start and end adresses up to cache line size + */ + start &= ~(CACHE_LINE_SIZE - 1); + end = ALIGN(end, CACHE_LINE_SIZE); + + /* + * perform operation on all full cache lines between 'start' and 'end' + */ + while (start < end) { + range_end = aurora_range_end(start, end); + + raw_spin_lock_irqsave(&l2x0_lock, flags); + writel_relaxed(start, base + AURORA_RANGE_BASE_ADDR_REG); + writel_relaxed(range_end - CACHE_LINE_SIZE, base + offset); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); + + writel_relaxed(0, base + AURORA_SYNC_REG); + start = range_end; + } +} +static void aurora_inv_range(unsigned long start, unsigned long end) +{ + aurora_pa_range(start, end, AURORA_INVAL_RANGE_REG); +} + +static void aurora_clean_range(unsigned long start, unsigned long end) +{ + /* + * If L2 is forced to WT, the L2 will always be clean and we + * don't need to do anything here. + */ + if (!l2_wt_override) + aurora_pa_range(start, end, AURORA_CLEAN_RANGE_REG); +} + +static void aurora_flush_range(unsigned long start, unsigned long end) +{ + if (l2_wt_override) + aurora_pa_range(start, end, AURORA_INVAL_RANGE_REG); + else + aurora_pa_range(start, end, AURORA_FLUSH_RANGE_REG); +} + +static void aurora_flush_all(void) +{ + void __iomem *base = l2x0_base; + unsigned long flags; + + /* clean all ways */ + raw_spin_lock_irqsave(&l2x0_lock, flags); + __l2c_op_way(base + L2X0_CLEAN_INV_WAY); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); + + writel_relaxed(0, base + AURORA_SYNC_REG); +} + +static void aurora_cache_sync(void) +{ + writel_relaxed(0, l2x0_base + AURORA_SYNC_REG); +} + +static void aurora_disable(void) +{ + void __iomem *base = l2x0_base; + unsigned long flags; + + raw_spin_lock_irqsave(&l2x0_lock, flags); + __l2c_op_way(base + L2X0_CLEAN_INV_WAY); + writel_relaxed(0, base + AURORA_SYNC_REG); + l2c_write_sec(0, base, L2X0_CTRL); + dsb(st); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void aurora_save(void __iomem *base) +{ + l2x0_saved_regs.ctrl = readl_relaxed(base + L2X0_CTRL); + l2x0_saved_regs.aux_ctrl = readl_relaxed(base + L2X0_AUX_CTRL); +} + +/* + * For Aurora cache in no outer mode, enable via the CP15 coprocessor + * broadcasting of cache commands to L2. + */ +static void __init aurora_enable_no_outer(void __iomem *base, + unsigned num_lock) +{ + u32 u; + + asm volatile("mrc p15, 1, %0, c15, c2, 0" : "=r" (u)); + u |= AURORA_CTRL_FW; /* Set the FW bit */ + asm volatile("mcr p15, 1, %0, c15, c2, 0" : : "r" (u)); + + isb(); + + l2c_enable(base, num_lock); +} + +static void __init aurora_fixup(void __iomem *base, u32 cache_id, + struct outer_cache_fns *fns) +{ + sync_reg_offset = AURORA_SYNC_REG; +} + +static void __init aurora_of_parse(const struct device_node *np, + u32 *aux_val, u32 *aux_mask) +{ + u32 val = AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU; + u32 mask = AURORA_ACR_REPLACEMENT_MASK; + + of_property_read_u32(np, "cache-id-part", + &cache_id_part_number_from_dt); + + /* Determine and save the write policy */ + l2_wt_override = of_property_read_bool(np, "wt-override"); + + if (l2_wt_override) { + val |= AURORA_ACR_FORCE_WRITE_THRO_POLICY; + mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK; + } + + if (of_property_read_bool(np, "marvell,ecc-enable")) { + mask |= AURORA_ACR_ECC_EN; + val |= AURORA_ACR_ECC_EN; + } + + if (of_property_read_bool(np, "arm,parity-enable")) { + mask |= AURORA_ACR_PARITY_EN; + val |= AURORA_ACR_PARITY_EN; + } else if (of_property_read_bool(np, "arm,parity-disable")) { + mask |= AURORA_ACR_PARITY_EN; + } + + *aux_val &= ~mask; + *aux_val |= val; + *aux_mask &= ~mask; +} + +static const struct l2c_init_data of_aurora_with_outer_data __initconst = { + .type = "Aurora", + .way_size_0 = SZ_4K, + .num_lock = 4, + .of_parse = aurora_of_parse, + .enable = l2c_enable, + .fixup = aurora_fixup, + .save = aurora_save, + .configure = l2c_configure, + .unlock = l2c_unlock, + .outer_cache = { + .inv_range = aurora_inv_range, + .clean_range = aurora_clean_range, + .flush_range = aurora_flush_range, + .flush_all = aurora_flush_all, + .disable = aurora_disable, + .sync = aurora_cache_sync, + .resume = l2c_resume, + }, +}; + +static const struct l2c_init_data of_aurora_no_outer_data __initconst = { + .type = "Aurora", + .way_size_0 = SZ_4K, + .num_lock = 4, + .of_parse = aurora_of_parse, + .enable = aurora_enable_no_outer, + .fixup = aurora_fixup, + .save = aurora_save, + .configure = l2c_configure, + .unlock = l2c_unlock, + .outer_cache = { + .resume = l2c_resume, + }, +}; + +/* + * For certain Broadcom SoCs, depending on the address range, different offsets + * need to be added to the address before passing it to L2 for + * invalidation/clean/flush + * + * Section Address Range Offset EMI + * 1 0x00000000 - 0x3FFFFFFF 0x80000000 VC + * 2 0x40000000 - 0xBFFFFFFF 0x40000000 SYS + * 3 0xC0000000 - 0xFFFFFFFF 0x80000000 VC + * + * When the start and end addresses have crossed two different sections, we + * need to break the L2 operation into two, each within its own section. + * For example, if we need to invalidate addresses starts at 0xBFFF0000 and + * ends at 0xC0001000, we need do invalidate 1) 0xBFFF0000 - 0xBFFFFFFF and 2) + * 0xC0000000 - 0xC0001000 + * + * Note 1: + * By breaking a single L2 operation into two, we may potentially suffer some + * performance hit, but keep in mind the cross section case is very rare + * + * Note 2: + * We do not need to handle the case when the start address is in + * Section 1 and the end address is in Section 3, since it is not a valid use + * case + * + * Note 3: + * Section 1 in practical terms can no longer be used on rev A2. Because of + * that the code does not need to handle section 1 at all. + * + */ +#define BCM_SYS_EMI_START_ADDR 0x40000000UL +#define BCM_VC_EMI_SEC3_START_ADDR 0xC0000000UL + +#define BCM_SYS_EMI_OFFSET 0x40000000UL +#define BCM_VC_EMI_OFFSET 0x80000000UL + +static inline int bcm_addr_is_sys_emi(unsigned long addr) +{ + return (addr >= BCM_SYS_EMI_START_ADDR) && + (addr < BCM_VC_EMI_SEC3_START_ADDR); +} + +static inline unsigned long bcm_l2_phys_addr(unsigned long addr) +{ + if (bcm_addr_is_sys_emi(addr)) + return addr + BCM_SYS_EMI_OFFSET; + else + return addr + BCM_VC_EMI_OFFSET; +} + +static void bcm_inv_range(unsigned long start, unsigned long end) +{ + unsigned long new_start, new_end; + + BUG_ON(start < BCM_SYS_EMI_START_ADDR); + + if (unlikely(end <= start)) + return; + + new_start = bcm_l2_phys_addr(start); + new_end = bcm_l2_phys_addr(end); + + /* normal case, no cross section between start and end */ + if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) { + l2c210_inv_range(new_start, new_end); + return; + } + + /* They cross sections, so it can only be a cross from section + * 2 to section 3 + */ + l2c210_inv_range(new_start, + bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1)); + l2c210_inv_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), + new_end); +} + +static void bcm_clean_range(unsigned long start, unsigned long end) +{ + unsigned long new_start, new_end; + + BUG_ON(start < BCM_SYS_EMI_START_ADDR); + + if (unlikely(end <= start)) + return; + + new_start = bcm_l2_phys_addr(start); + new_end = bcm_l2_phys_addr(end); + + /* normal case, no cross section between start and end */ + if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) { + l2c210_clean_range(new_start, new_end); + return; + } + + /* They cross sections, so it can only be a cross from section + * 2 to section 3 + */ + l2c210_clean_range(new_start, + bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1)); + l2c210_clean_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), + new_end); +} + +static void bcm_flush_range(unsigned long start, unsigned long end) +{ + unsigned long new_start, new_end; + + BUG_ON(start < BCM_SYS_EMI_START_ADDR); + + if (unlikely(end <= start)) + return; + + if ((end - start) >= l2x0_size) { + outer_cache.flush_all(); + return; + } + + new_start = bcm_l2_phys_addr(start); + new_end = bcm_l2_phys_addr(end); + + /* normal case, no cross section between start and end */ + if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) { + l2c210_flush_range(new_start, new_end); + return; + } + + /* They cross sections, so it can only be a cross from section + * 2 to section 3 + */ + l2c210_flush_range(new_start, + bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1)); + l2c210_flush_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), + new_end); +} + +/* Broadcom L2C-310 start from ARMs R3P2 or later, and require no fixups */ +static const struct l2c_init_data of_bcm_l2x0_data __initconst = { + .type = "BCM-L2C-310", + .way_size_0 = SZ_8K, + .num_lock = 8, + .of_parse = l2c310_of_parse, + .enable = l2c310_enable, + .save = l2c310_save, + .configure = l2c310_configure, + .unlock = l2c310_unlock, + .outer_cache = { + .inv_range = bcm_inv_range, + .clean_range = bcm_clean_range, + .flush_range = bcm_flush_range, + .flush_all = l2c210_flush_all, + .disable = l2c310_disable, + .sync = l2c210_sync, + .resume = l2c310_resume, + }, +}; + +static void __init tauros3_save(void __iomem *base) +{ + l2c_save(base); + + l2x0_saved_regs.aux2_ctrl = + readl_relaxed(base + TAUROS3_AUX2_CTRL); + l2x0_saved_regs.prefetch_ctrl = + readl_relaxed(base + L310_PREFETCH_CTRL); +} + +static void tauros3_configure(void __iomem *base) +{ + l2c_configure(base); + writel_relaxed(l2x0_saved_regs.aux2_ctrl, + base + TAUROS3_AUX2_CTRL); + writel_relaxed(l2x0_saved_regs.prefetch_ctrl, + base + L310_PREFETCH_CTRL); +} + +static const struct l2c_init_data of_tauros3_data __initconst = { + .type = "Tauros3", + .way_size_0 = SZ_8K, + .num_lock = 8, + .enable = l2c_enable, + .save = tauros3_save, + .configure = tauros3_configure, + .unlock = l2c_unlock, + /* Tauros3 broadcasts L1 cache operations to L2 */ + .outer_cache = { + .resume = l2c_resume, + }, +}; + +#define L2C_ID(name, fns) { .compatible = name, .data = (void *)&fns } +static const struct of_device_id l2x0_ids[] __initconst = { + L2C_ID("arm,l210-cache", of_l2c210_data), + L2C_ID("arm,l220-cache", of_l2c220_data), + L2C_ID("arm,pl310-cache", of_l2c310_data), + L2C_ID("brcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data), + L2C_ID("marvell,aurora-outer-cache", of_aurora_with_outer_data), + L2C_ID("marvell,aurora-system-cache", of_aurora_no_outer_data), + L2C_ID("marvell,tauros3-cache", of_tauros3_data), + /* Deprecated IDs */ + L2C_ID("bcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data), + {} +}; + +int __init l2x0_of_init(u32 aux_val, u32 aux_mask) +{ + const struct l2c_init_data *data; + struct device_node *np; + struct resource res; + u32 cache_id, old_aux; + u32 cache_level = 2; + bool nosync = false; + + np = of_find_matching_node(NULL, l2x0_ids); + if (!np) + return -ENODEV; + + if (of_address_to_resource(np, 0, &res)) + return -ENODEV; + + l2x0_base = ioremap(res.start, resource_size(&res)); + if (!l2x0_base) + return -ENOMEM; + + l2x0_saved_regs.phy_base = res.start; + + data = of_match_node(l2x0_ids, np)->data; + + if (of_device_is_compatible(np, "arm,pl310-cache") && + of_property_read_bool(np, "arm,io-coherent")) + data = &of_l2c310_coherent_data; + + old_aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); + if (old_aux != ((old_aux & aux_mask) | aux_val)) { + pr_warn("L2C: platform modifies aux control register: 0x%08x -> 0x%08x\n", + old_aux, (old_aux & aux_mask) | aux_val); + } else if (aux_mask != ~0U && aux_val != 0) { + pr_alert("L2C: platform provided aux values match the hardware, so have no effect. Please remove them.\n"); + } + + /* All L2 caches are unified, so this property should be specified */ + if (!of_property_read_bool(np, "cache-unified")) + pr_err("L2C: device tree omits to specify unified cache\n"); + + if (of_property_read_u32(np, "cache-level", &cache_level)) + pr_err("L2C: device tree omits to specify cache-level\n"); + + if (cache_level != 2) + pr_err("L2C: device tree specifies invalid cache level\n"); + + nosync = of_property_read_bool(np, "arm,outer-sync-disable"); + + /* Read back current (default) hardware configuration */ + if (data->save) + data->save(l2x0_base); + + /* L2 configuration can only be changed if the cache is disabled */ + if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) + if (data->of_parse) + data->of_parse(np, &aux_val, &aux_mask); + + if (cache_id_part_number_from_dt) + cache_id = cache_id_part_number_from_dt; + else + cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID); + + return __l2c_init(data, aux_val, aux_mask, cache_id, nosync); +} +#endif diff --git a/arch/arm/mm/cache-nop.S b/arch/arm/mm/cache-nop.S new file mode 100644 index 0000000000..72d939ef87 --- /dev/null +++ b/arch/arm/mm/cache-nop.S @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> + +#include "proc-macros.S" + +ENTRY(nop_flush_icache_all) + ret lr +ENDPROC(nop_flush_icache_all) + + .globl nop_flush_kern_cache_all + .equ nop_flush_kern_cache_all, nop_flush_icache_all + + .globl nop_flush_kern_cache_louis + .equ nop_flush_kern_cache_louis, nop_flush_icache_all + + .globl nop_flush_user_cache_all + .equ nop_flush_user_cache_all, nop_flush_icache_all + + .globl nop_flush_user_cache_range + .equ nop_flush_user_cache_range, nop_flush_icache_all + + .globl nop_coherent_kern_range + .equ nop_coherent_kern_range, nop_flush_icache_all + +ENTRY(nop_coherent_user_range) + mov r0, 0 + ret lr +ENDPROC(nop_coherent_user_range) + + .globl nop_flush_kern_dcache_area + .equ nop_flush_kern_dcache_area, nop_flush_icache_all + + .globl nop_dma_flush_range + .equ nop_dma_flush_range, nop_flush_icache_all + + .globl nop_dma_map_area + .equ nop_dma_map_area, nop_flush_icache_all + + .globl nop_dma_unmap_area + .equ nop_dma_unmap_area, nop_flush_icache_all + + __INITDATA + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions nop diff --git a/arch/arm/mm/cache-tauros2.c b/arch/arm/mm/cache-tauros2.c new file mode 100644 index 0000000000..b1e1aba602 --- /dev/null +++ b/arch/arm/mm/cache-tauros2.c @@ -0,0 +1,303 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * arch/arm/mm/cache-tauros2.c - Tauros2 L2 cache controller support + * + * Copyright (C) 2008 Marvell Semiconductor + * + * References: + * - PJ1 CPU Core Datasheet, + * Document ID MV-S104837-01, Rev 0.7, January 24 2008. + * - PJ4 CPU Core Datasheet, + * Document ID MV-S105190-00, Rev 0.7, March 14 2008. + */ + +#include <linux/init.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <asm/cacheflush.h> +#include <asm/cp15.h> +#include <asm/cputype.h> +#include <asm/hardware/cache-tauros2.h> + +/* CP15 PJ4 Control configuration register */ +#define CCR_L2C_PREFETCH_DISABLE BIT(24) +#define CCR_L2C_ECC_ENABLE BIT(23) +#define CCR_L2C_WAY7_4_DISABLE BIT(21) +#define CCR_L2C_BURST8_ENABLE BIT(20) + +/* + * When Tauros2 is used on a CPU that supports the v7 hierarchical + * cache operations, the cache handling code in proc-v7.S takes care + * of everything, including handling DMA coherency. + * + * So, we only need to register outer cache operations here if we're + * being used on a pre-v7 CPU, and we only need to build support for + * outer cache operations into the kernel image if the kernel has been + * configured to support a pre-v7 CPU. + */ +#ifdef CONFIG_CPU_32v5 +/* + * Low-level cache maintenance operations. + */ +static inline void tauros2_clean_pa(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c7, c11, 3" : : "r" (addr)); +} + +static inline void tauros2_clean_inv_pa(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c7, c15, 3" : : "r" (addr)); +} + +static inline void tauros2_inv_pa(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c7, c7, 3" : : "r" (addr)); +} + + +/* + * Linux primitives. + * + * Note that the end addresses passed to Linux primitives are + * noninclusive. + */ +#define CACHE_LINE_SIZE 32 + +static void tauros2_inv_range(unsigned long start, unsigned long end) +{ + /* + * Clean and invalidate partial first cache line. + */ + if (start & (CACHE_LINE_SIZE - 1)) { + tauros2_clean_inv_pa(start & ~(CACHE_LINE_SIZE - 1)); + start = (start | (CACHE_LINE_SIZE - 1)) + 1; + } + + /* + * Clean and invalidate partial last cache line. + */ + if (end & (CACHE_LINE_SIZE - 1)) { + tauros2_clean_inv_pa(end & ~(CACHE_LINE_SIZE - 1)); + end &= ~(CACHE_LINE_SIZE - 1); + } + + /* + * Invalidate all full cache lines between 'start' and 'end'. + */ + while (start < end) { + tauros2_inv_pa(start); + start += CACHE_LINE_SIZE; + } + + dsb(); +} + +static void tauros2_clean_range(unsigned long start, unsigned long end) +{ + start &= ~(CACHE_LINE_SIZE - 1); + while (start < end) { + tauros2_clean_pa(start); + start += CACHE_LINE_SIZE; + } + + dsb(); +} + +static void tauros2_flush_range(unsigned long start, unsigned long end) +{ + start &= ~(CACHE_LINE_SIZE - 1); + while (start < end) { + tauros2_clean_inv_pa(start); + start += CACHE_LINE_SIZE; + } + + dsb(); +} + +static void tauros2_disable(void) +{ + __asm__ __volatile__ ( + "mcr p15, 1, %0, c7, c11, 0 @L2 Cache Clean All\n\t" + "mrc p15, 0, %0, c1, c0, 0\n\t" + "bic %0, %0, #(1 << 26)\n\t" + "mcr p15, 0, %0, c1, c0, 0 @Disable L2 Cache\n\t" + : : "r" (0x0)); +} + +static void tauros2_resume(void) +{ + __asm__ __volatile__ ( + "mcr p15, 1, %0, c7, c7, 0 @L2 Cache Invalidate All\n\t" + "mrc p15, 0, %0, c1, c0, 0\n\t" + "orr %0, %0, #(1 << 26)\n\t" + "mcr p15, 0, %0, c1, c0, 0 @Enable L2 Cache\n\t" + : : "r" (0x0)); +} +#endif + +static inline u32 __init read_extra_features(void) +{ + u32 u; + + __asm__("mrc p15, 1, %0, c15, c1, 0" : "=r" (u)); + + return u; +} + +static inline void __init write_extra_features(u32 u) +{ + __asm__("mcr p15, 1, %0, c15, c1, 0" : : "r" (u)); +} + +static inline int __init cpuid_scheme(void) +{ + return !!((processor_id & 0x000f0000) == 0x000f0000); +} + +static inline u32 __init read_mmfr3(void) +{ + u32 mmfr3; + + __asm__("mrc p15, 0, %0, c0, c1, 7\n" : "=r" (mmfr3)); + + return mmfr3; +} + +static inline u32 __init read_actlr(void) +{ + u32 actlr; + + __asm__("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr)); + + return actlr; +} + +static inline void __init write_actlr(u32 actlr) +{ + __asm__("mcr p15, 0, %0, c1, c0, 1\n" : : "r" (actlr)); +} + +static void enable_extra_feature(unsigned int features) +{ + u32 u; + + u = read_extra_features(); + + if (features & CACHE_TAUROS2_PREFETCH_ON) + u &= ~CCR_L2C_PREFETCH_DISABLE; + else + u |= CCR_L2C_PREFETCH_DISABLE; + pr_info("Tauros2: %s L2 prefetch.\n", + (features & CACHE_TAUROS2_PREFETCH_ON) + ? "Enabling" : "Disabling"); + + if (features & CACHE_TAUROS2_LINEFILL_BURST8) + u |= CCR_L2C_BURST8_ENABLE; + else + u &= ~CCR_L2C_BURST8_ENABLE; + pr_info("Tauros2: %s burst8 line fill.\n", + (features & CACHE_TAUROS2_LINEFILL_BURST8) + ? "Enabling" : "Disabling"); + + write_extra_features(u); +} + +static void __init tauros2_internal_init(unsigned int features) +{ + char *mode = NULL; + + enable_extra_feature(features); + +#ifdef CONFIG_CPU_32v5 + if ((processor_id & 0xff0f0000) == 0x56050000) { + u32 feat; + + /* + * v5 CPUs with Tauros2 have the L2 cache enable bit + * located in the CPU Extra Features register. + */ + feat = read_extra_features(); + if (!(feat & 0x00400000)) { + pr_info("Tauros2: Enabling L2 cache.\n"); + write_extra_features(feat | 0x00400000); + } + + mode = "ARMv5"; + outer_cache.inv_range = tauros2_inv_range; + outer_cache.clean_range = tauros2_clean_range; + outer_cache.flush_range = tauros2_flush_range; + outer_cache.disable = tauros2_disable; + outer_cache.resume = tauros2_resume; + } +#endif + +#ifdef CONFIG_CPU_32v7 + /* + * Check whether this CPU has support for the v7 hierarchical + * cache ops. (PJ4 is in its v7 personality mode if the MMFR3 + * register indicates support for the v7 hierarchical cache + * ops.) + * + * (Although strictly speaking there may exist CPUs that + * implement the v7 cache ops but are only ARMv6 CPUs (due to + * not complying with all of the other ARMv7 requirements), + * there are no real-life examples of Tauros2 being used on + * such CPUs as of yet.) + */ + if (cpuid_scheme() && (read_mmfr3() & 0xf) == 1) { + u32 actlr; + + /* + * When Tauros2 is used in an ARMv7 system, the L2 + * enable bit is located in the Auxiliary System Control + * Register (which is the only register allowed by the + * ARMv7 spec to contain fine-grained cache control bits). + */ + actlr = read_actlr(); + if (!(actlr & 0x00000002)) { + pr_info("Tauros2: Enabling L2 cache.\n"); + write_actlr(actlr | 0x00000002); + } + + mode = "ARMv7"; + } +#endif + + if (mode == NULL) { + pr_crit("Tauros2: Unable to detect CPU mode.\n"); + return; + } + + pr_info("Tauros2: L2 cache support initialised " + "in %s mode.\n", mode); +} + +#ifdef CONFIG_OF +static const struct of_device_id tauros2_ids[] __initconst = { + { .compatible = "marvell,tauros2-cache"}, + {} +}; +#endif + +void __init tauros2_init(unsigned int features) +{ +#ifdef CONFIG_OF + struct device_node *node; + int ret; + unsigned int f; + + node = of_find_matching_node(NULL, tauros2_ids); + if (!node) { + pr_info("Not found marvell,tauros2-cache, disable it\n"); + } else { + ret = of_property_read_u32(node, "marvell,tauros2-cache-features", &f); + if (ret) { + pr_info("Not found marvell,tauros-cache-features property, " + "disable extra features\n"); + features = 0; + } else + features = f; + } +#endif + tauros2_internal_init(features); +} diff --git a/arch/arm/mm/cache-tauros3.h b/arch/arm/mm/cache-tauros3.h new file mode 100644 index 0000000000..fa5f926799 --- /dev/null +++ b/arch/arm/mm/cache-tauros3.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Marvell Tauros3 cache controller includes + * + * Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com> + * + * based on GPL'ed 2.6 kernel sources + * (c) Marvell International Ltd. + */ + +#ifndef __ASM_ARM_HARDWARE_TAUROS3_H +#define __ASM_ARM_HARDWARE_TAUROS3_H + +/* + * Marvell Tauros3 L2CC is compatible with PL310 r0p0 + * but with PREFETCH_CTRL (r2p0) and an additional event counter. + * Also, there is AUX2_CTRL for some Marvell specific control. + */ + +#define TAUROS3_EVENT_CNT2_CFG 0x224 +#define TAUROS3_EVENT_CNT2_VAL 0x228 +#define TAUROS3_INV_ALL 0x780 +#define TAUROS3_CLEAN_ALL 0x784 +#define TAUROS3_AUX2_CTRL 0x820 + +/* Registers shifts and masks */ +#define TAUROS3_AUX2_CTRL_LINEFILL_BURST8_EN (1 << 2) + +#endif diff --git a/arch/arm/mm/cache-uniphier.c b/arch/arm/mm/cache-uniphier.c new file mode 100644 index 0000000000..84a2f17ff3 --- /dev/null +++ b/arch/arm/mm/cache-uniphier.c @@ -0,0 +1,497 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2015-2016 Socionext Inc. + * Author: Masahiro Yamada <yamada.masahiro@socionext.com> + */ + +#define pr_fmt(fmt) "uniphier: " fmt + +#include <linux/bitops.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/log2.h> +#include <linux/of_address.h> +#include <linux/slab.h> +#include <asm/hardware/cache-uniphier.h> +#include <asm/outercache.h> + +/* control registers */ +#define UNIPHIER_SSCC 0x0 /* Control Register */ +#define UNIPHIER_SSCC_BST BIT(20) /* UCWG burst read */ +#define UNIPHIER_SSCC_ACT BIT(19) /* Inst-Data separate */ +#define UNIPHIER_SSCC_WTG BIT(18) /* WT gathering on */ +#define UNIPHIER_SSCC_PRD BIT(17) /* enable pre-fetch */ +#define UNIPHIER_SSCC_ON BIT(0) /* enable cache */ +#define UNIPHIER_SSCLPDAWCR 0x30 /* Unified/Data Active Way Control */ +#define UNIPHIER_SSCLPIAWCR 0x34 /* Instruction Active Way Control */ + +/* revision registers */ +#define UNIPHIER_SSCID 0x0 /* ID Register */ + +/* operation registers */ +#define UNIPHIER_SSCOPE 0x244 /* Cache Operation Primitive Entry */ +#define UNIPHIER_SSCOPE_CM_INV 0x0 /* invalidate */ +#define UNIPHIER_SSCOPE_CM_CLEAN 0x1 /* clean */ +#define UNIPHIER_SSCOPE_CM_FLUSH 0x2 /* flush */ +#define UNIPHIER_SSCOPE_CM_SYNC 0x8 /* sync (drain bufs) */ +#define UNIPHIER_SSCOPE_CM_FLUSH_PREFETCH 0x9 /* flush p-fetch buf */ +#define UNIPHIER_SSCOQM 0x248 /* Cache Operation Queue Mode */ +#define UNIPHIER_SSCOQM_S_MASK (0x3 << 17) +#define UNIPHIER_SSCOQM_S_RANGE (0x0 << 17) +#define UNIPHIER_SSCOQM_S_ALL (0x1 << 17) +#define UNIPHIER_SSCOQM_CE BIT(15) /* notify completion */ +#define UNIPHIER_SSCOQM_CM_INV 0x0 /* invalidate */ +#define UNIPHIER_SSCOQM_CM_CLEAN 0x1 /* clean */ +#define UNIPHIER_SSCOQM_CM_FLUSH 0x2 /* flush */ +#define UNIPHIER_SSCOQAD 0x24c /* Cache Operation Queue Address */ +#define UNIPHIER_SSCOQSZ 0x250 /* Cache Operation Queue Size */ +#define UNIPHIER_SSCOPPQSEF 0x25c /* Cache Operation Queue Set Complete*/ +#define UNIPHIER_SSCOPPQSEF_FE BIT(1) +#define UNIPHIER_SSCOPPQSEF_OE BIT(0) +#define UNIPHIER_SSCOLPQS 0x260 /* Cache Operation Queue Status */ +#define UNIPHIER_SSCOLPQS_EF BIT(2) +#define UNIPHIER_SSCOLPQS_EST BIT(1) +#define UNIPHIER_SSCOLPQS_QST BIT(0) + +/* Is the operation region specified by address range? */ +#define UNIPHIER_SSCOQM_S_IS_RANGE(op) \ + ((op & UNIPHIER_SSCOQM_S_MASK) == UNIPHIER_SSCOQM_S_RANGE) + +/** + * struct uniphier_cache_data - UniPhier outer cache specific data + * + * @ctrl_base: virtual base address of control registers + * @rev_base: virtual base address of revision registers + * @op_base: virtual base address of operation registers + * @way_ctrl_base: virtual address of the way control registers for this + * SoC revision + * @way_mask: each bit specifies if the way is present + * @nsets: number of associativity sets + * @line_size: line size in bytes + * @range_op_max_size: max size that can be handled by a single range operation + * @list: list node to include this level in the whole cache hierarchy + */ +struct uniphier_cache_data { + void __iomem *ctrl_base; + void __iomem *rev_base; + void __iomem *op_base; + void __iomem *way_ctrl_base; + u32 way_mask; + u32 nsets; + u32 line_size; + u32 range_op_max_size; + struct list_head list; +}; + +/* + * List of the whole outer cache hierarchy. This list is only modified during + * the early boot stage, so no mutex is taken for the access to the list. + */ +static LIST_HEAD(uniphier_cache_list); + +/** + * __uniphier_cache_sync - perform a sync point for a particular cache level + * + * @data: cache controller specific data + */ +static void __uniphier_cache_sync(struct uniphier_cache_data *data) +{ + /* This sequence need not be atomic. Do not disable IRQ. */ + writel_relaxed(UNIPHIER_SSCOPE_CM_SYNC, + data->op_base + UNIPHIER_SSCOPE); + /* need a read back to confirm */ + readl_relaxed(data->op_base + UNIPHIER_SSCOPE); +} + +/** + * __uniphier_cache_maint_common - run a queue operation for a particular level + * + * @data: cache controller specific data + * @start: start address of range operation (don't care for "all" operation) + * @size: data size of range operation (don't care for "all" operation) + * @operation: flags to specify the desired cache operation + */ +static void __uniphier_cache_maint_common(struct uniphier_cache_data *data, + unsigned long start, + unsigned long size, + u32 operation) +{ + unsigned long flags; + + /* + * No spin lock is necessary here because: + * + * [1] This outer cache controller is able to accept maintenance + * operations from multiple CPUs at a time in an SMP system; if a + * maintenance operation is under way and another operation is issued, + * the new one is stored in the queue. The controller performs one + * operation after another. If the queue is full, the status register, + * UNIPHIER_SSCOPPQSEF, indicates that the queue registration has + * failed. The status registers, UNIPHIER_{SSCOPPQSEF, SSCOLPQS}, have + * different instances for each CPU, i.e. each CPU can track the status + * of the maintenance operations triggered by itself. + * + * [2] The cache command registers, UNIPHIER_{SSCOQM, SSCOQAD, SSCOQSZ, + * SSCOQWN}, are shared between multiple CPUs, but the hardware still + * guarantees the registration sequence is atomic; the write access to + * them are arbitrated by the hardware. The first accessor to the + * register, UNIPHIER_SSCOQM, holds the access right and it is released + * by reading the status register, UNIPHIER_SSCOPPQSEF. While one CPU + * is holding the access right, other CPUs fail to register operations. + * One CPU should not hold the access right for a long time, so local + * IRQs should be disabled while the following sequence. + */ + local_irq_save(flags); + + /* clear the complete notification flag */ + writel_relaxed(UNIPHIER_SSCOLPQS_EF, data->op_base + UNIPHIER_SSCOLPQS); + + do { + /* set cache operation */ + writel_relaxed(UNIPHIER_SSCOQM_CE | operation, + data->op_base + UNIPHIER_SSCOQM); + + /* set address range if needed */ + if (likely(UNIPHIER_SSCOQM_S_IS_RANGE(operation))) { + writel_relaxed(start, data->op_base + UNIPHIER_SSCOQAD); + writel_relaxed(size, data->op_base + UNIPHIER_SSCOQSZ); + } + } while (unlikely(readl_relaxed(data->op_base + UNIPHIER_SSCOPPQSEF) & + (UNIPHIER_SSCOPPQSEF_FE | UNIPHIER_SSCOPPQSEF_OE))); + + /* wait until the operation is completed */ + while (likely(readl_relaxed(data->op_base + UNIPHIER_SSCOLPQS) != + UNIPHIER_SSCOLPQS_EF)) + cpu_relax(); + + local_irq_restore(flags); +} + +static void __uniphier_cache_maint_all(struct uniphier_cache_data *data, + u32 operation) +{ + __uniphier_cache_maint_common(data, 0, 0, + UNIPHIER_SSCOQM_S_ALL | operation); + + __uniphier_cache_sync(data); +} + +static void __uniphier_cache_maint_range(struct uniphier_cache_data *data, + unsigned long start, unsigned long end, + u32 operation) +{ + unsigned long size; + + /* + * If the start address is not aligned, + * perform a cache operation for the first cache-line + */ + start = start & ~(data->line_size - 1); + + size = end - start; + + if (unlikely(size >= (unsigned long)(-data->line_size))) { + /* this means cache operation for all range */ + __uniphier_cache_maint_all(data, operation); + return; + } + + /* + * If the end address is not aligned, + * perform a cache operation for the last cache-line + */ + size = ALIGN(size, data->line_size); + + while (size) { + unsigned long chunk_size = min_t(unsigned long, size, + data->range_op_max_size); + + __uniphier_cache_maint_common(data, start, chunk_size, + UNIPHIER_SSCOQM_S_RANGE | operation); + + start += chunk_size; + size -= chunk_size; + } + + __uniphier_cache_sync(data); +} + +static void __uniphier_cache_enable(struct uniphier_cache_data *data, bool on) +{ + u32 val = 0; + + if (on) + val = UNIPHIER_SSCC_WTG | UNIPHIER_SSCC_PRD | UNIPHIER_SSCC_ON; + + writel_relaxed(val, data->ctrl_base + UNIPHIER_SSCC); +} + +static void __init __uniphier_cache_set_active_ways( + struct uniphier_cache_data *data) +{ + unsigned int cpu; + + for_each_possible_cpu(cpu) + writel_relaxed(data->way_mask, data->way_ctrl_base + 4 * cpu); +} + +static void uniphier_cache_maint_range(unsigned long start, unsigned long end, + u32 operation) +{ + struct uniphier_cache_data *data; + + list_for_each_entry(data, &uniphier_cache_list, list) + __uniphier_cache_maint_range(data, start, end, operation); +} + +static void uniphier_cache_maint_all(u32 operation) +{ + struct uniphier_cache_data *data; + + list_for_each_entry(data, &uniphier_cache_list, list) + __uniphier_cache_maint_all(data, operation); +} + +static void uniphier_cache_inv_range(unsigned long start, unsigned long end) +{ + uniphier_cache_maint_range(start, end, UNIPHIER_SSCOQM_CM_INV); +} + +static void uniphier_cache_clean_range(unsigned long start, unsigned long end) +{ + uniphier_cache_maint_range(start, end, UNIPHIER_SSCOQM_CM_CLEAN); +} + +static void uniphier_cache_flush_range(unsigned long start, unsigned long end) +{ + uniphier_cache_maint_range(start, end, UNIPHIER_SSCOQM_CM_FLUSH); +} + +static void __init uniphier_cache_inv_all(void) +{ + uniphier_cache_maint_all(UNIPHIER_SSCOQM_CM_INV); +} + +static void uniphier_cache_flush_all(void) +{ + uniphier_cache_maint_all(UNIPHIER_SSCOQM_CM_FLUSH); +} + +static void uniphier_cache_disable(void) +{ + struct uniphier_cache_data *data; + + list_for_each_entry_reverse(data, &uniphier_cache_list, list) + __uniphier_cache_enable(data, false); + + uniphier_cache_flush_all(); +} + +static void __init uniphier_cache_enable(void) +{ + struct uniphier_cache_data *data; + + uniphier_cache_inv_all(); + + list_for_each_entry(data, &uniphier_cache_list, list) { + __uniphier_cache_enable(data, true); + __uniphier_cache_set_active_ways(data); + } +} + +static void uniphier_cache_sync(void) +{ + struct uniphier_cache_data *data; + + list_for_each_entry(data, &uniphier_cache_list, list) + __uniphier_cache_sync(data); +} + +static const struct of_device_id uniphier_cache_match[] __initconst = { + { .compatible = "socionext,uniphier-system-cache" }, + { /* sentinel */ } +}; + +static int __init __uniphier_cache_init(struct device_node *np, + unsigned int *cache_level) +{ + struct uniphier_cache_data *data; + u32 level, cache_size; + struct device_node *next_np; + int ret = 0; + + if (!of_match_node(uniphier_cache_match, np)) { + pr_err("L%d: not compatible with uniphier cache\n", + *cache_level); + return -EINVAL; + } + + if (of_property_read_u32(np, "cache-level", &level)) { + pr_err("L%d: cache-level is not specified\n", *cache_level); + return -EINVAL; + } + + if (level != *cache_level) { + pr_err("L%d: cache-level is unexpected value %d\n", + *cache_level, level); + return -EINVAL; + } + + if (!of_property_read_bool(np, "cache-unified")) { + pr_err("L%d: cache-unified is not specified\n", *cache_level); + return -EINVAL; + } + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + if (of_property_read_u32(np, "cache-line-size", &data->line_size) || + !is_power_of_2(data->line_size)) { + pr_err("L%d: cache-line-size is unspecified or invalid\n", + *cache_level); + ret = -EINVAL; + goto err; + } + + if (of_property_read_u32(np, "cache-sets", &data->nsets) || + !is_power_of_2(data->nsets)) { + pr_err("L%d: cache-sets is unspecified or invalid\n", + *cache_level); + ret = -EINVAL; + goto err; + } + + if (of_property_read_u32(np, "cache-size", &cache_size) || + cache_size == 0 || cache_size % (data->nsets * data->line_size)) { + pr_err("L%d: cache-size is unspecified or invalid\n", + *cache_level); + ret = -EINVAL; + goto err; + } + + data->way_mask = GENMASK(cache_size / data->nsets / data->line_size - 1, + 0); + + data->ctrl_base = of_iomap(np, 0); + if (!data->ctrl_base) { + pr_err("L%d: failed to map control register\n", *cache_level); + ret = -ENOMEM; + goto err; + } + + data->rev_base = of_iomap(np, 1); + if (!data->rev_base) { + pr_err("L%d: failed to map revision register\n", *cache_level); + ret = -ENOMEM; + goto err; + } + + data->op_base = of_iomap(np, 2); + if (!data->op_base) { + pr_err("L%d: failed to map operation register\n", *cache_level); + ret = -ENOMEM; + goto err; + } + + data->way_ctrl_base = data->ctrl_base + 0xc00; + + if (*cache_level == 2) { + u32 revision = readl(data->rev_base + UNIPHIER_SSCID); + /* + * The size of range operation is limited to (1 << 22) or less + * for PH-sLD8 or older SoCs. + */ + if (revision <= 0x16) + data->range_op_max_size = (u32)1 << 22; + + /* + * Unfortunatly, the offset address of active way control base + * varies from SoC to SoC. + */ + switch (revision) { + case 0x11: /* sLD3 */ + data->way_ctrl_base = data->ctrl_base + 0x870; + break; + case 0x12: /* LD4 */ + case 0x16: /* sld8 */ + data->way_ctrl_base = data->ctrl_base + 0x840; + break; + default: + break; + } + } + + data->range_op_max_size -= data->line_size; + + INIT_LIST_HEAD(&data->list); + list_add_tail(&data->list, &uniphier_cache_list); /* no mutex */ + + /* + * OK, this level has been successfully initialized. Look for the next + * level cache. Do not roll back even if the initialization of the + * next level cache fails because we want to continue with available + * cache levels. + */ + next_np = of_find_next_cache_node(np); + if (next_np) { + (*cache_level)++; + ret = __uniphier_cache_init(next_np, cache_level); + } + of_node_put(next_np); + + return ret; +err: + iounmap(data->op_base); + iounmap(data->rev_base); + iounmap(data->ctrl_base); + kfree(data); + + return ret; +} + +int __init uniphier_cache_init(void) +{ + struct device_node *np = NULL; + unsigned int cache_level; + int ret = 0; + + /* look for level 2 cache */ + while ((np = of_find_matching_node(np, uniphier_cache_match))) + if (!of_property_read_u32(np, "cache-level", &cache_level) && + cache_level == 2) + break; + + if (!np) + return -ENODEV; + + ret = __uniphier_cache_init(np, &cache_level); + of_node_put(np); + + if (ret) { + /* + * Error out iif L2 initialization fails. Continue with any + * error on L3 or outer because they are optional. + */ + if (cache_level == 2) { + pr_err("failed to initialize L2 cache\n"); + return ret; + } + + cache_level--; + ret = 0; + } + + outer_cache.inv_range = uniphier_cache_inv_range; + outer_cache.clean_range = uniphier_cache_clean_range; + outer_cache.flush_range = uniphier_cache_flush_range; + outer_cache.flush_all = uniphier_cache_flush_all; + outer_cache.disable = uniphier_cache_disable; + outer_cache.sync = uniphier_cache_sync; + + uniphier_cache_enable(); + + pr_info("enabled outer cache (cache level: %d)\n", cache_level); + + return ret; +} diff --git a/arch/arm/mm/cache-v4.S b/arch/arm/mm/cache-v4.S new file mode 100644 index 0000000000..7787057e49 --- /dev/null +++ b/arch/arm/mm/cache-v4.S @@ -0,0 +1,147 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/mm/cache-v4.S + * + * Copyright (C) 1997-2002 Russell king + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/page.h> +#include "proc-macros.S" + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(v4_flush_icache_all) + ret lr +ENDPROC(v4_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Invalidate all cache entries in a particular address + * space. + * + * - mm - mm_struct describing address space + */ +ENTRY(v4_flush_user_cache_all) + /* FALLTHROUGH */ +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(v4_flush_kern_cache_all) +#ifdef CONFIG_CPU_CP15 + mov r0, #0 + mcr p15, 0, r0, c7, c7, 0 @ flush ID cache + ret lr +#else + /* FALLTHROUGH */ +#endif + +/* + * flush_user_cache_range(start, end, flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + * - flags - vma_area_struct flags describing address space + */ +ENTRY(v4_flush_user_cache_range) +#ifdef CONFIG_CPU_CP15 + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ flush ID cache + ret lr +#else + /* FALLTHROUGH */ +#endif + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v4_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v4_coherent_user_range) + mov r0, #0 + ret lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + */ +ENTRY(v4_flush_kern_dcache_area) + /* FALLTHROUGH */ + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v4_dma_flush_range) +#ifdef CONFIG_CPU_CP15 + mov r0, #0 + mcr p15, 0, r0, c7, c7, 0 @ flush ID cache +#endif + ret lr + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v4_dma_unmap_area) + teq r2, #DMA_TO_DEVICE + bne v4_dma_flush_range + /* FALLTHROUGH */ + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v4_dma_map_area) + ret lr +ENDPROC(v4_dma_unmap_area) +ENDPROC(v4_dma_map_area) + + .globl v4_flush_kern_cache_louis + .equ v4_flush_kern_cache_louis, v4_flush_kern_cache_all + + __INITDATA + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions v4 diff --git a/arch/arm/mm/cache-v4wb.S b/arch/arm/mm/cache-v4wb.S new file mode 100644 index 0000000000..ad382cee0f --- /dev/null +++ b/arch/arm/mm/cache-v4wb.S @@ -0,0 +1,258 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/mm/cache-v4wb.S + * + * Copyright (C) 1997-2002 Russell king + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/page.h> +#include "proc-macros.S" + +/* + * The size of one data cache line. + */ +#define CACHE_DLINESIZE 32 + +/* + * The total size of the data cache. + */ +#if defined(CONFIG_CPU_SA110) +# define CACHE_DSIZE 16384 +#elif defined(CONFIG_CPU_SA1100) +# define CACHE_DSIZE 8192 +#else +# error Unknown cache size +#endif + +/* + * This is the size at which it becomes more efficient to + * clean the whole cache, rather than using the individual + * cache line maintenance instructions. + * + * Size Clean (ticks) Dirty (ticks) + * 4096 21 20 21 53 55 54 + * 8192 40 41 40 106 100 102 + * 16384 77 77 76 140 140 138 + * 32768 150 149 150 214 216 212 <--- + * 65536 296 297 296 351 358 361 + * 131072 591 591 591 656 657 651 + * Whole 132 136 132 221 217 207 <--- + */ +#define CACHE_DLIMIT (CACHE_DSIZE * 4) + + .data + .align 2 +flush_base: + .long FLUSH_BASE + .text + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(v4wb_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + ret lr +ENDPROC(v4wb_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Clean and invalidate all cache entries in a particular address + * space. + */ +ENTRY(v4wb_flush_user_cache_all) + /* FALLTHROUGH */ +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(v4wb_flush_kern_cache_all) + mov ip, #0 + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache +__flush_whole_cache: + ldr r3, =flush_base + ldr r1, [r3, #0] + eor r1, r1, #CACHE_DSIZE + str r1, [r3, #0] + add r2, r1, #CACHE_DSIZE +1: ldr r3, [r1], #32 + cmp r1, r2 + blo 1b +#ifdef FLUSH_BASE_MINICACHE + add r2, r2, #FLUSH_BASE_MINICACHE - FLUSH_BASE + sub r1, r2, #512 @ only 512 bytes +1: ldr r3, [r1], #32 + cmp r1, r2 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain write buffer + ret lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (inclusive, page aligned) + * - end - end address (exclusive, page aligned) + * - flags - vma_area_struct flags describing address space + */ +ENTRY(v4wb_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + tst r2, #VM_EXEC @ executable region? + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + + cmp r3, #CACHE_DLIMIT @ total size >= limit? + bhs __flush_whole_cache @ flush whole D cache + +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c10, 4 @ drain write buffer + ret lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + */ +ENTRY(v4wb_flush_kern_dcache_area) + add r1, r0, r1 + /* fall through */ + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v4wb_coherent_kern_range) + /* fall through */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v4wb_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + */ +v4wb_dma_inv_range: + tst r0, #CACHE_DLINESIZE - 1 + bic r0, r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + ret lr + +/* + * dma_clean_range(start, end) + * + * Clean (write back) the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +v4wb_dma_clean_range: + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + ret lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * This is actually the same as v4wb_coherent_kern_range() + */ + .globl v4wb_dma_flush_range + .set v4wb_dma_flush_range, v4wb_coherent_kern_range + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v4wb_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq v4wb_dma_clean_range + bcs v4wb_dma_inv_range + b v4wb_dma_flush_range +ENDPROC(v4wb_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v4wb_dma_unmap_area) + ret lr +ENDPROC(v4wb_dma_unmap_area) + + .globl v4wb_flush_kern_cache_louis + .equ v4wb_flush_kern_cache_louis, v4wb_flush_kern_cache_all + + __INITDATA + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions v4wb diff --git a/arch/arm/mm/cache-v4wt.S b/arch/arm/mm/cache-v4wt.S new file mode 100644 index 0000000000..0b290c25a9 --- /dev/null +++ b/arch/arm/mm/cache-v4wt.S @@ -0,0 +1,203 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/mm/cache-v4wt.S + * + * Copyright (C) 1997-2002 Russell king + * + * ARMv4 write through cache operations support. + * + * We assume that the write buffer is not enabled. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/page.h> +#include "proc-macros.S" + +/* + * The size of one data cache line. + */ +#define CACHE_DLINESIZE 32 + +/* + * The number of data cache segments. + */ +#define CACHE_DSEGMENTS 8 + +/* + * The number of lines in a cache segment. + */ +#define CACHE_DENTRIES 64 + +/* + * This is the size at which it becomes more efficient to + * clean the whole cache, rather than using the individual + * cache line maintenance instructions. + * + * *** This needs benchmarking + */ +#define CACHE_DLIMIT 16384 + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(v4wt_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + ret lr +ENDPROC(v4wt_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Invalidate all cache entries in a particular address + * space. + */ +ENTRY(v4wt_flush_user_cache_all) + /* FALLTHROUGH */ +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(v4wt_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache + ret lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Clean and invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (inclusive, page aligned) + * - end - end address (exclusive, page aligned) + * - flags - vma_area_struct flags describing address space + */ +ENTRY(v4wt_flush_user_cache_range) + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bhs __flush_whole_cache + +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + tst r2, #VM_EXEC + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + ret lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v4wt_coherent_kern_range) + /* FALLTRHOUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(v4wt_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + ret lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + */ +ENTRY(v4wt_flush_kern_dcache_area) + mov r2, #0 + mcr p15, 0, r2, c7, c5, 0 @ invalidate I cache + add r1, r0, r1 + /* fallthrough */ + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + */ +v4wt_dma_inv_range: + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + ret lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ + .globl v4wt_dma_flush_range + .equ v4wt_dma_flush_range, v4wt_dma_inv_range + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v4wt_dma_unmap_area) + add r1, r1, r0 + teq r2, #DMA_TO_DEVICE + bne v4wt_dma_inv_range + /* FALLTHROUGH */ + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v4wt_dma_map_area) + ret lr +ENDPROC(v4wt_dma_unmap_area) +ENDPROC(v4wt_dma_map_area) + + .globl v4wt_flush_kern_cache_louis + .equ v4wt_flush_kern_cache_louis, v4wt_flush_kern_cache_all + + __INITDATA + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions v4wt diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S new file mode 100644 index 0000000000..250c83bf71 --- /dev/null +++ b/arch/arm/mm/cache-v6.S @@ -0,0 +1,334 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/mm/cache-v6.S + * + * Copyright (C) 2001 Deep Blue Solutions Ltd. + * + * This is the "shell" of the ARMv6 processor support. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/errno.h> +#include <asm/unwind.h> + +#include "proc-macros.S" + +#define HARVARD_CACHE +#define CACHE_LINE_SIZE 32 +#define D_CACHE_LINE_SIZE 32 +#define BTB_FLUSH_SIZE 8 + +.arch armv6 + +/* + * v6_flush_icache_all() + * + * Flush the whole I-cache. + * + * ARM1136 erratum 411920 - Invalidate Instruction Cache operation can fail. + * This erratum is present in 1136, 1156 and 1176. It does not affect the + * MPCore. + * + * Registers: + * r0 - set to 0 + * r1 - corrupted + */ +ENTRY(v6_flush_icache_all) + mov r0, #0 +#ifdef CONFIG_ARM_ERRATA_411920 + mrs r1, cpsr + cpsid ifa @ disable interrupts + mcr p15, 0, r0, c7, c5, 0 @ invalidate entire I-cache + mcr p15, 0, r0, c7, c5, 0 @ invalidate entire I-cache + mcr p15, 0, r0, c7, c5, 0 @ invalidate entire I-cache + mcr p15, 0, r0, c7, c5, 0 @ invalidate entire I-cache + msr cpsr_cx, r1 @ restore interrupts + .rept 11 @ ARM Ltd recommends at least + nop @ 11 NOPs + .endr +#else + mcr p15, 0, r0, c7, c5, 0 @ invalidate I-cache +#endif + ret lr +ENDPROC(v6_flush_icache_all) + +/* + * v6_flush_cache_all() + * + * Flush the entire cache. + * + * It is assumed that: + */ +ENTRY(v6_flush_kern_cache_all) + mov r0, #0 +#ifdef HARVARD_CACHE + mcr p15, 0, r0, c7, c14, 0 @ D cache clean+invalidate +#ifndef CONFIG_ARM_ERRATA_411920 + mcr p15, 0, r0, c7, c5, 0 @ I+BTB cache invalidate +#else + b v6_flush_icache_all +#endif +#else + mcr p15, 0, r0, c7, c15, 0 @ Cache clean+invalidate +#endif + ret lr + +/* + * v6_flush_cache_all() + * + * Flush all TLB entries in a particular address space + * + * - mm - mm_struct describing address space + */ +ENTRY(v6_flush_user_cache_all) + /*FALLTHROUGH*/ + +/* + * v6_flush_cache_range(start, end, flags) + * + * Flush a range of TLB entries in the specified address space. + * + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + * - flags - vm_area_struct flags describing address space + * + * It is assumed that: + * - we have a VIPT cache. + */ +ENTRY(v6_flush_user_cache_range) + ret lr + +/* + * v6_coherent_kern_range(start,end) + * + * Ensure that the I and D caches are coherent within specified + * region. This is typically used when code has been written to + * a memory region, and will be executed. + * + * - start - virtual start address of region + * - end - virtual end address of region + * + * It is assumed that: + * - the Icache does not read data from the write buffer + */ +ENTRY(v6_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * v6_coherent_user_range(start,end) + * + * Ensure that the I and D caches are coherent within specified + * region. This is typically used when code has been written to + * a memory region, and will be executed. + * + * - start - virtual start address of region + * - end - virtual end address of region + * + * It is assumed that: + * - the Icache does not read data from the write buffer + */ +ENTRY(v6_coherent_user_range) + UNWIND(.fnstart ) +#ifdef HARVARD_CACHE + bic r0, r0, #CACHE_LINE_SIZE - 1 +1: + USER( mcr p15, 0, r0, c7, c10, 1 ) @ clean D line + add r0, r0, #CACHE_LINE_SIZE + cmp r0, r1 + blo 1b +#endif + mov r0, #0 +#ifdef HARVARD_CACHE + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer +#ifndef CONFIG_ARM_ERRATA_411920 + mcr p15, 0, r0, c7, c5, 0 @ I+BTB cache invalidate +#else + b v6_flush_icache_all +#endif +#else + mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB +#endif + ret lr + +/* + * Fault handling for the cache operation above. If the virtual address in r0 + * isn't mapped, fail with -EFAULT. + */ +9001: + mov r0, #-EFAULT + ret lr + UNWIND(.fnend ) +ENDPROC(v6_coherent_user_range) +ENDPROC(v6_coherent_kern_range) + +/* + * v6_flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure that the data held in the page kaddr is written back + * to the page in question. + * + * - addr - kernel address + * - size - region size + */ +ENTRY(v6_flush_kern_dcache_area) + add r1, r0, r1 + bic r0, r0, #D_CACHE_LINE_SIZE - 1 +1: +#ifdef HARVARD_CACHE + mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line +#else + mcr p15, 0, r0, c7, c15, 1 @ clean & invalidate unified line +#endif + add r0, r0, #D_CACHE_LINE_SIZE + cmp r0, r1 + blo 1b +#ifdef HARVARD_CACHE + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 +#endif + ret lr + + +/* + * v6_dma_inv_range(start,end) + * + * Invalidate the data cache within the specified region; we will + * be performing a DMA operation in this region and we want to + * purge old data in the cache. + * + * - start - virtual start address of region + * - end - virtual end address of region + */ +v6_dma_inv_range: +#ifdef CONFIG_DMA_CACHE_RWFO + ldrb r2, [r0] @ read for ownership + strb r2, [r0] @ write for ownership +#endif + tst r0, #D_CACHE_LINE_SIZE - 1 + bic r0, r0, #D_CACHE_LINE_SIZE - 1 +#ifdef HARVARD_CACHE + mcrne p15, 0, r0, c7, c10, 1 @ clean D line +#else + mcrne p15, 0, r0, c7, c11, 1 @ clean unified line +#endif + tst r1, #D_CACHE_LINE_SIZE - 1 +#ifdef CONFIG_DMA_CACHE_RWFO + ldrbne r2, [r1, #-1] @ read for ownership + strbne r2, [r1, #-1] @ write for ownership +#endif + bic r1, r1, #D_CACHE_LINE_SIZE - 1 +#ifdef HARVARD_CACHE + mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D line +#else + mcrne p15, 0, r1, c7, c15, 1 @ clean & invalidate unified line +#endif +1: +#ifdef HARVARD_CACHE + mcr p15, 0, r0, c7, c6, 1 @ invalidate D line +#else + mcr p15, 0, r0, c7, c7, 1 @ invalidate unified line +#endif + add r0, r0, #D_CACHE_LINE_SIZE + cmp r0, r1 +#ifdef CONFIG_DMA_CACHE_RWFO + ldrlo r2, [r0] @ read for ownership + strlo r2, [r0] @ write for ownership +#endif + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + ret lr + +/* + * v6_dma_clean_range(start,end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +v6_dma_clean_range: + bic r0, r0, #D_CACHE_LINE_SIZE - 1 +1: +#ifdef CONFIG_DMA_CACHE_RWFO + ldr r2, [r0] @ read for ownership +#endif +#ifdef HARVARD_CACHE + mcr p15, 0, r0, c7, c10, 1 @ clean D line +#else + mcr p15, 0, r0, c7, c11, 1 @ clean unified line +#endif + add r0, r0, #D_CACHE_LINE_SIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + ret lr + +/* + * v6_dma_flush_range(start,end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +ENTRY(v6_dma_flush_range) +#ifdef CONFIG_DMA_CACHE_RWFO + ldrb r2, [r0] @ read for ownership + strb r2, [r0] @ write for ownership +#endif + bic r0, r0, #D_CACHE_LINE_SIZE - 1 +1: +#ifdef HARVARD_CACHE + mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line +#else + mcr p15, 0, r0, c7, c15, 1 @ clean & invalidate line +#endif + add r0, r0, #D_CACHE_LINE_SIZE + cmp r0, r1 +#ifdef CONFIG_DMA_CACHE_RWFO + ldrblo r2, [r0] @ read for ownership + strblo r2, [r0] @ write for ownership +#endif + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + ret lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v6_dma_map_area) + add r1, r1, r0 + teq r2, #DMA_FROM_DEVICE + beq v6_dma_inv_range +#ifndef CONFIG_DMA_CACHE_RWFO + b v6_dma_clean_range +#else + teq r2, #DMA_TO_DEVICE + beq v6_dma_clean_range + b v6_dma_flush_range +#endif +ENDPROC(v6_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v6_dma_unmap_area) +#ifndef CONFIG_DMA_CACHE_RWFO + add r1, r1, r0 + teq r2, #DMA_TO_DEVICE + bne v6_dma_inv_range +#endif + ret lr +ENDPROC(v6_dma_unmap_area) + + .globl v6_flush_kern_cache_louis + .equ v6_flush_kern_cache_louis, v6_flush_kern_cache_all + + __INITDATA + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions v6 diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S new file mode 100644 index 0000000000..127afe2096 --- /dev/null +++ b/arch/arm/mm/cache-v7.S @@ -0,0 +1,482 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/mm/cache-v7.S + * + * Copyright (C) 2001 Deep Blue Solutions Ltd. + * Copyright (C) 2005 ARM Ltd. + * + * This is the "shell" of the ARMv7 processor support. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/errno.h> +#include <asm/unwind.h> +#include <asm/hardware/cache-b15-rac.h> + +#include "proc-macros.S" + +.arch armv7-a + +#ifdef CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND +.globl icache_size + .data + .align 2 +icache_size: + .long 64 + .text +#endif +/* + * The secondary kernel init calls v7_flush_dcache_all before it enables + * the L1; however, the L1 comes out of reset in an undefined state, so + * the clean + invalidate performed by v7_flush_dcache_all causes a bunch + * of cache lines with uninitialized data and uninitialized tags to get + * written out to memory, which does really unpleasant things to the main + * processor. We fix this by performing an invalidate, rather than a + * clean + invalidate, before jumping into the kernel. + * + * This function needs to be called for both secondary cores startup and + * primary core resume procedures. + */ +ENTRY(v7_invalidate_l1) + mov r0, #0 + mcr p15, 2, r0, c0, c0, 0 @ select L1 data cache in CSSELR + isb + mrc p15, 1, r0, c0, c0, 0 @ read cache geometry from CCSIDR + + movw r3, #0x3ff + and r3, r3, r0, lsr #3 @ 'Associativity' in CCSIDR[12:3] + clz r1, r3 @ WayShift + mov r2, #1 + mov r3, r3, lsl r1 @ NumWays-1 shifted into bits [31:...] + movs r1, r2, lsl r1 @ #1 shifted left by same amount + moveq r1, #1 @ r1 needs value > 0 even if only 1 way + + and r2, r0, #0x7 + add r2, r2, #4 @ SetShift + +1: movw ip, #0x7fff + and r0, ip, r0, lsr #13 @ 'NumSets' in CCSIDR[27:13] + +2: mov ip, r0, lsl r2 @ NumSet << SetShift + orr ip, ip, r3 @ Reg = (Temp<<WayShift)|(NumSets<<SetShift) + mcr p15, 0, ip, c7, c6, 2 + subs r0, r0, #1 @ Set-- + bpl 2b + subs r3, r3, r1 @ Way-- + bcc 3f + mrc p15, 1, r0, c0, c0, 0 @ re-read cache geometry from CCSIDR + b 1b +3: dsb st + isb + ret lr +ENDPROC(v7_invalidate_l1) + +/* + * v7_flush_icache_all() + * + * Flush the whole I-cache. + * + * Registers: + * r0 - set to 0 + */ +ENTRY(v7_flush_icache_all) + mov r0, #0 + ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable + ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate + ret lr +ENDPROC(v7_flush_icache_all) + + /* + * v7_flush_dcache_louis() + * + * Flush the D-cache up to the Level of Unification Inner Shareable + * + * Corrupted registers: r0-r6, r9-r10 + */ + +ENTRY(v7_flush_dcache_louis) + dmb @ ensure ordering with previous memory accesses + mrc p15, 1, r0, c0, c0, 1 @ read clidr, r0 = clidr +ALT_SMP(mov r3, r0, lsr #20) @ move LoUIS into position +ALT_UP( mov r3, r0, lsr #26) @ move LoUU into position + ands r3, r3, #7 << 1 @ extract LoU*2 field from clidr + bne start_flush_levels @ LoU != 0, start flushing +#ifdef CONFIG_ARM_ERRATA_643719 +ALT_SMP(mrc p15, 0, r2, c0, c0, 0) @ read main ID register +ALT_UP( ret lr) @ LoUU is zero, so nothing to do + movw r1, #:lower16:(0x410fc090 >> 4) @ ID of ARM Cortex A9 r0p? + movt r1, #:upper16:(0x410fc090 >> 4) + teq r1, r2, lsr #4 @ test for errata affected core and if so... + moveq r3, #1 << 1 @ fix LoUIS value + beq start_flush_levels @ start flushing cache levels +#endif + ret lr +ENDPROC(v7_flush_dcache_louis) + +/* + * v7_flush_dcache_all() + * + * Flush the whole D-cache. + * + * Corrupted registers: r0-r6, r9-r10 + * + * - mm - mm_struct describing address space + */ +ENTRY(v7_flush_dcache_all) + dmb @ ensure ordering with previous memory accesses + mrc p15, 1, r0, c0, c0, 1 @ read clidr + mov r3, r0, lsr #23 @ move LoC into position + ands r3, r3, #7 << 1 @ extract LoC*2 from clidr + beq finished @ if loc is 0, then no need to clean +start_flush_levels: + mov r10, #0 @ start clean at cache level 0 +flush_levels: + add r2, r10, r10, lsr #1 @ work out 3x current cache level + mov r1, r0, lsr r2 @ extract cache type bits from clidr + and r1, r1, #7 @ mask of the bits for current cache only + cmp r1, #2 @ see what cache we have at this level + blt skip @ skip if no cache, or just i-cache +#ifdef CONFIG_PREEMPTION + save_and_disable_irqs_notrace r9 @ make cssr&csidr read atomic +#endif + mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr + isb @ isb to sych the new cssr&csidr + mrc p15, 1, r1, c0, c0, 0 @ read the new csidr +#ifdef CONFIG_PREEMPTION + restore_irqs_notrace r9 +#endif + and r2, r1, #7 @ extract the length of the cache lines + add r2, r2, #4 @ add 4 (line length offset) + movw r4, #0x3ff + ands r4, r4, r1, lsr #3 @ find maximum number on the way size + clz r5, r4 @ find bit position of way size increment + movw r6, #0x7fff + and r1, r6, r1, lsr #13 @ extract max number of the index size + mov r6, #1 + movne r4, r4, lsl r5 @ # of ways shifted into bits [31:...] + movne r6, r6, lsl r5 @ 1 shifted left by same amount +loop1: + mov r9, r1 @ create working copy of max index +loop2: + mov r5, r9, lsl r2 @ factor set number into r5 + orr r5, r5, r4 @ factor way number into r5 + orr r5, r5, r10 @ factor cache level into r5 + mcr p15, 0, r5, c7, c14, 2 @ clean & invalidate by set/way + subs r9, r9, #1 @ decrement the index + bge loop2 + subs r4, r4, r6 @ decrement the way + bcs loop1 +skip: + add r10, r10, #2 @ increment cache number + cmp r3, r10 +#ifdef CONFIG_ARM_ERRATA_814220 + dsb +#endif + bgt flush_levels +finished: + mov r10, #0 @ switch back to cache level 0 + mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr + dsb st + isb + ret lr +ENDPROC(v7_flush_dcache_all) + +/* + * v7_flush_cache_all() + * + * Flush the entire cache system. + * The data cache flush is now achieved using atomic clean / invalidates + * working outwards from L1 cache. This is done using Set/Way based cache + * maintenance instructions. + * The instruction cache can still be invalidated back to the point of + * unification in a single instruction. + * + */ +ENTRY(v7_flush_kern_cache_all) + stmfd sp!, {r4-r6, r9-r10, lr} + bl v7_flush_dcache_all + mov r0, #0 + ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable + ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate + ldmfd sp!, {r4-r6, r9-r10, lr} + ret lr +ENDPROC(v7_flush_kern_cache_all) + + /* + * v7_flush_kern_cache_louis(void) + * + * Flush the data cache up to Level of Unification Inner Shareable. + * Invalidate the I-cache to the point of unification. + */ +ENTRY(v7_flush_kern_cache_louis) + stmfd sp!, {r4-r6, r9-r10, lr} + bl v7_flush_dcache_louis + mov r0, #0 + ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable + ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate + ldmfd sp!, {r4-r6, r9-r10, lr} + ret lr +ENDPROC(v7_flush_kern_cache_louis) + +/* + * v7_flush_cache_all() + * + * Flush all TLB entries in a particular address space + * + * - mm - mm_struct describing address space + */ +ENTRY(v7_flush_user_cache_all) + /*FALLTHROUGH*/ + +/* + * v7_flush_cache_range(start, end, flags) + * + * Flush a range of TLB entries in the specified address space. + * + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + * - flags - vm_area_struct flags describing address space + * + * It is assumed that: + * - we have a VIPT cache. + */ +ENTRY(v7_flush_user_cache_range) + ret lr +ENDPROC(v7_flush_user_cache_all) +ENDPROC(v7_flush_user_cache_range) + +/* + * v7_coherent_kern_range(start,end) + * + * Ensure that the I and D caches are coherent within specified + * region. This is typically used when code has been written to + * a memory region, and will be executed. + * + * - start - virtual start address of region + * - end - virtual end address of region + * + * It is assumed that: + * - the Icache does not read data from the write buffer + */ +ENTRY(v7_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * v7_coherent_user_range(start,end) + * + * Ensure that the I and D caches are coherent within specified + * region. This is typically used when code has been written to + * a memory region, and will be executed. + * + * - start - virtual start address of region + * - end - virtual end address of region + * + * It is assumed that: + * - the Icache does not read data from the write buffer + */ +ENTRY(v7_coherent_user_range) + UNWIND(.fnstart ) + dcache_line_size r2, r3 + sub r3, r2, #1 + bic r12, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif +1: + USER( mcr p15, 0, r12, c7, c11, 1 ) @ clean D line to the point of unification + add r12, r12, r2 + cmp r12, r1 + blo 1b + dsb ishst +#ifdef CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND + ldr r3, =icache_size + ldr r2, [r3, #0] +#else + icache_line_size r2, r3 +#endif + sub r3, r2, #1 + bic r12, r0, r3 +2: + USER( mcr p15, 0, r12, c7, c5, 1 ) @ invalidate I line + add r12, r12, r2 + cmp r12, r1 + blo 2b + mov r0, #0 + ALT_SMP(mcr p15, 0, r0, c7, c1, 6) @ invalidate BTB Inner Shareable + ALT_UP(mcr p15, 0, r0, c7, c5, 6) @ invalidate BTB + dsb ishst + isb + ret lr + +/* + * Fault handling for the cache operation above. If the virtual address in r0 + * isn't mapped, fail with -EFAULT. + */ +9001: +#ifdef CONFIG_ARM_ERRATA_775420 + dsb +#endif + mov r0, #-EFAULT + ret lr + UNWIND(.fnend ) +ENDPROC(v7_coherent_kern_range) +ENDPROC(v7_coherent_user_range) + +/* + * v7_flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure that the data held in the page kaddr is written back + * to the page in question. + * + * - addr - kernel address + * - size - region size + */ +ENTRY(v7_flush_kern_dcache_area) + dcache_line_size r2, r3 + add r1, r0, r1 + sub r3, r2, #1 + bic r0, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif +1: + mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line / unified line + add r0, r0, r2 + cmp r0, r1 + blo 1b + dsb st + ret lr +ENDPROC(v7_flush_kern_dcache_area) + +/* + * v7_dma_inv_range(start,end) + * + * Invalidate the data cache within the specified region; we will + * be performing a DMA operation in this region and we want to + * purge old data in the cache. + * + * - start - virtual start address of region + * - end - virtual end address of region + */ +v7_dma_inv_range: + dcache_line_size r2, r3 + sub r3, r2, #1 + tst r0, r3 + bic r0, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif + mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line + addne r0, r0, r2 + + tst r1, r3 + bic r1, r1, r3 + mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D / U line + cmp r0, r1 +1: + mcrlo p15, 0, r0, c7, c6, 1 @ invalidate D / U line + addlo r0, r0, r2 + cmplo r0, r1 + blo 1b + dsb st + ret lr +ENDPROC(v7_dma_inv_range) + +/* + * v7_dma_clean_range(start,end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +v7_dma_clean_range: + dcache_line_size r2, r3 + sub r3, r2, #1 + bic r0, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif +1: + mcr p15, 0, r0, c7, c10, 1 @ clean D / U line + add r0, r0, r2 + cmp r0, r1 + blo 1b + dsb st + ret lr +ENDPROC(v7_dma_clean_range) + +/* + * v7_dma_flush_range(start,end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +ENTRY(v7_dma_flush_range) + dcache_line_size r2, r3 + sub r3, r2, #1 + bic r0, r0, r3 +#ifdef CONFIG_ARM_ERRATA_764369 + ALT_SMP(W(dsb)) + ALT_UP(W(nop)) +#endif +1: + mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line + add r0, r0, r2 + cmp r0, r1 + blo 1b + dsb st + ret lr +ENDPROC(v7_dma_flush_range) + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v7_dma_map_area) + add r1, r1, r0 + teq r2, #DMA_FROM_DEVICE + beq v7_dma_inv_range + b v7_dma_clean_range +ENDPROC(v7_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v7_dma_unmap_area) + add r1, r1, r0 + teq r2, #DMA_TO_DEVICE + bne v7_dma_inv_range + ret lr +ENDPROC(v7_dma_unmap_area) + + __INITDATA + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions v7 + + /* The Broadcom Brahma-B15 read-ahead cache requires some modifications + * to the v7_cache_fns, we only override the ones we need + */ +#ifndef CONFIG_CACHE_B15_RAC + globl_equ b15_flush_kern_cache_all, v7_flush_kern_cache_all +#endif + globl_equ b15_flush_icache_all, v7_flush_icache_all + globl_equ b15_flush_kern_cache_louis, v7_flush_kern_cache_louis + globl_equ b15_flush_user_cache_all, v7_flush_user_cache_all + globl_equ b15_flush_user_cache_range, v7_flush_user_cache_range + globl_equ b15_coherent_kern_range, v7_coherent_kern_range + globl_equ b15_coherent_user_range, v7_coherent_user_range + globl_equ b15_flush_kern_dcache_area, v7_flush_kern_dcache_area + + globl_equ b15_dma_map_area, v7_dma_map_area + globl_equ b15_dma_unmap_area, v7_dma_unmap_area + globl_equ b15_dma_flush_range, v7_dma_flush_range + + define_cache_functions b15 diff --git a/arch/arm/mm/cache-v7m.S b/arch/arm/mm/cache-v7m.S new file mode 100644 index 0000000000..eb60b5e5e2 --- /dev/null +++ b/arch/arm/mm/cache-v7m.S @@ -0,0 +1,456 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/mm/cache-v7m.S + * + * Based on linux/arch/arm/mm/cache-v7.S + * + * Copyright (C) 2001 Deep Blue Solutions Ltd. + * Copyright (C) 2005 ARM Ltd. + * + * This is the "shell" of the ARMv7M processor support. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/errno.h> +#include <asm/unwind.h> +#include <asm/v7m.h> + +#include "proc-macros.S" + +.arch armv7-m + +/* Generic V7M read/write macros for memory mapped cache operations */ +.macro v7m_cache_read, rt, reg + movw \rt, #:lower16:BASEADDR_V7M_SCB + \reg + movt \rt, #:upper16:BASEADDR_V7M_SCB + \reg + ldr \rt, [\rt] +.endm + +.macro v7m_cacheop, rt, tmp, op, c = al + movw\c \tmp, #:lower16:BASEADDR_V7M_SCB + \op + movt\c \tmp, #:upper16:BASEADDR_V7M_SCB + \op + str\c \rt, [\tmp] +.endm + + +.macro read_ccsidr, rt + v7m_cache_read \rt, V7M_SCB_CCSIDR +.endm + +.macro read_clidr, rt + v7m_cache_read \rt, V7M_SCB_CLIDR +.endm + +.macro write_csselr, rt, tmp + v7m_cacheop \rt, \tmp, V7M_SCB_CSSELR +.endm + +/* + * dcisw: Invalidate data cache by set/way + */ +.macro dcisw, rt, tmp + v7m_cacheop \rt, \tmp, V7M_SCB_DCISW +.endm + +/* + * dccisw: Clean and invalidate data cache by set/way + */ +.macro dccisw, rt, tmp + v7m_cacheop \rt, \tmp, V7M_SCB_DCCISW +.endm + +/* + * dccimvac: Clean and invalidate data cache line by MVA to PoC. + */ +.irp c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo +.macro dccimvac\c, rt, tmp + v7m_cacheop \rt, \tmp, V7M_SCB_DCCIMVAC, \c +.endm +.endr + +/* + * dcimvac: Invalidate data cache line by MVA to PoC + */ +.irp c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo +.macro dcimvac\c, rt, tmp + v7m_cacheop \rt, \tmp, V7M_SCB_DCIMVAC, \c +.endm +.endr + +/* + * dccmvau: Clean data cache line by MVA to PoU + */ +.macro dccmvau, rt, tmp + v7m_cacheop \rt, \tmp, V7M_SCB_DCCMVAU +.endm + +/* + * dccmvac: Clean data cache line by MVA to PoC + */ +.macro dccmvac, rt, tmp + v7m_cacheop \rt, \tmp, V7M_SCB_DCCMVAC +.endm + +/* + * icimvau: Invalidate instruction caches by MVA to PoU + */ +.macro icimvau, rt, tmp + v7m_cacheop \rt, \tmp, V7M_SCB_ICIMVAU +.endm + +/* + * Invalidate the icache, inner shareable if SMP, invalidate BTB for UP. + * rt data ignored by ICIALLU(IS), so can be used for the address + */ +.macro invalidate_icache, rt + v7m_cacheop \rt, \rt, V7M_SCB_ICIALLU + mov \rt, #0 +.endm + +/* + * Invalidate the BTB, inner shareable if SMP. + * rt data ignored by BPIALL, so it can be used for the address + */ +.macro invalidate_bp, rt + v7m_cacheop \rt, \rt, V7M_SCB_BPIALL + mov \rt, #0 +.endm + +ENTRY(v7m_invalidate_l1) + mov r0, #0 + + write_csselr r0, r1 + read_ccsidr r0 + + movw r1, #0x7fff + and r2, r1, r0, lsr #13 + + movw r1, #0x3ff + + and r3, r1, r0, lsr #3 @ NumWays - 1 + add r2, r2, #1 @ NumSets + + and r0, r0, #0x7 + add r0, r0, #4 @ SetShift + + clz r1, r3 @ WayShift + add r4, r3, #1 @ NumWays +1: sub r2, r2, #1 @ NumSets-- + mov r3, r4 @ Temp = NumWays +2: subs r3, r3, #1 @ Temp-- + mov r5, r3, lsl r1 + mov r6, r2, lsl r0 + orr r5, r5, r6 @ Reg = (Temp<<WayShift)|(NumSets<<SetShift) + dcisw r5, r6 + bgt 2b + cmp r2, #0 + bgt 1b + dsb st + isb + ret lr +ENDPROC(v7m_invalidate_l1) + +/* + * v7m_flush_icache_all() + * + * Flush the whole I-cache. + * + * Registers: + * r0 - set to 0 + */ +ENTRY(v7m_flush_icache_all) + invalidate_icache r0 + ret lr +ENDPROC(v7m_flush_icache_all) + +/* + * v7m_flush_dcache_all() + * + * Flush the whole D-cache. + * + * Corrupted registers: r0-r7, r9-r11 + */ +ENTRY(v7m_flush_dcache_all) + dmb @ ensure ordering with previous memory accesses + read_clidr r0 + mov r3, r0, lsr #23 @ move LoC into position + ands r3, r3, #7 << 1 @ extract LoC*2 from clidr + beq finished @ if loc is 0, then no need to clean +start_flush_levels: + mov r10, #0 @ start clean at cache level 0 +flush_levels: + add r2, r10, r10, lsr #1 @ work out 3x current cache level + mov r1, r0, lsr r2 @ extract cache type bits from clidr + and r1, r1, #7 @ mask of the bits for current cache only + cmp r1, #2 @ see what cache we have at this level + blt skip @ skip if no cache, or just i-cache +#ifdef CONFIG_PREEMPTION + save_and_disable_irqs_notrace r9 @ make cssr&csidr read atomic +#endif + write_csselr r10, r1 @ set current cache level + isb @ isb to sych the new cssr&csidr + read_ccsidr r1 @ read the new csidr +#ifdef CONFIG_PREEMPTION + restore_irqs_notrace r9 +#endif + and r2, r1, #7 @ extract the length of the cache lines + add r2, r2, #4 @ add 4 (line length offset) + movw r4, #0x3ff + ands r4, r4, r1, lsr #3 @ find maximum number on the way size + clz r5, r4 @ find bit position of way size increment + movw r7, #0x7fff + ands r7, r7, r1, lsr #13 @ extract max number of the index size +loop1: + mov r9, r7 @ create working copy of max index +loop2: + lsl r6, r4, r5 + orr r11, r10, r6 @ factor way and cache number into r11 + lsl r6, r9, r2 + orr r11, r11, r6 @ factor index number into r11 + dccisw r11, r6 @ clean/invalidate by set/way + subs r9, r9, #1 @ decrement the index + bge loop2 + subs r4, r4, #1 @ decrement the way + bge loop1 +skip: + add r10, r10, #2 @ increment cache number + cmp r3, r10 + bgt flush_levels +finished: + mov r10, #0 @ switch back to cache level 0 + write_csselr r10, r3 @ select current cache level in cssr + dsb st + isb + ret lr +ENDPROC(v7m_flush_dcache_all) + +/* + * v7m_flush_cache_all() + * + * Flush the entire cache system. + * The data cache flush is now achieved using atomic clean / invalidates + * working outwards from L1 cache. This is done using Set/Way based cache + * maintenance instructions. + * The instruction cache can still be invalidated back to the point of + * unification in a single instruction. + * + */ +ENTRY(v7m_flush_kern_cache_all) + stmfd sp!, {r4-r7, r9-r11, lr} + bl v7m_flush_dcache_all + invalidate_icache r0 + ldmfd sp!, {r4-r7, r9-r11, lr} + ret lr +ENDPROC(v7m_flush_kern_cache_all) + +/* + * v7m_flush_cache_all() + * + * Flush all TLB entries in a particular address space + * + * - mm - mm_struct describing address space + */ +ENTRY(v7m_flush_user_cache_all) + /*FALLTHROUGH*/ + +/* + * v7m_flush_cache_range(start, end, flags) + * + * Flush a range of TLB entries in the specified address space. + * + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + * - flags - vm_area_struct flags describing address space + * + * It is assumed that: + * - we have a VIPT cache. + */ +ENTRY(v7m_flush_user_cache_range) + ret lr +ENDPROC(v7m_flush_user_cache_all) +ENDPROC(v7m_flush_user_cache_range) + +/* + * v7m_coherent_kern_range(start,end) + * + * Ensure that the I and D caches are coherent within specified + * region. This is typically used when code has been written to + * a memory region, and will be executed. + * + * - start - virtual start address of region + * - end - virtual end address of region + * + * It is assumed that: + * - the Icache does not read data from the write buffer + */ +ENTRY(v7m_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * v7m_coherent_user_range(start,end) + * + * Ensure that the I and D caches are coherent within specified + * region. This is typically used when code has been written to + * a memory region, and will be executed. + * + * - start - virtual start address of region + * - end - virtual end address of region + * + * It is assumed that: + * - the Icache does not read data from the write buffer + */ +ENTRY(v7m_coherent_user_range) + UNWIND(.fnstart ) + dcache_line_size r2, r3 + sub r3, r2, #1 + bic r12, r0, r3 +1: +/* + * We use open coded version of dccmvau otherwise USER() would + * point at movw instruction. + */ + dccmvau r12, r3 + add r12, r12, r2 + cmp r12, r1 + blo 1b + dsb ishst + icache_line_size r2, r3 + sub r3, r2, #1 + bic r12, r0, r3 +2: + icimvau r12, r3 + add r12, r12, r2 + cmp r12, r1 + blo 2b + invalidate_bp r0 + dsb ishst + isb + ret lr + UNWIND(.fnend ) +ENDPROC(v7m_coherent_kern_range) +ENDPROC(v7m_coherent_user_range) + +/* + * v7m_flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure that the data held in the page kaddr is written back + * to the page in question. + * + * - addr - kernel address + * - size - region size + */ +ENTRY(v7m_flush_kern_dcache_area) + dcache_line_size r2, r3 + add r1, r0, r1 + sub r3, r2, #1 + bic r0, r0, r3 +1: + dccimvac r0, r3 @ clean & invalidate D line / unified line + add r0, r0, r2 + cmp r0, r1 + blo 1b + dsb st + ret lr +ENDPROC(v7m_flush_kern_dcache_area) + +/* + * v7m_dma_inv_range(start,end) + * + * Invalidate the data cache within the specified region; we will + * be performing a DMA operation in this region and we want to + * purge old data in the cache. + * + * - start - virtual start address of region + * - end - virtual end address of region + */ +v7m_dma_inv_range: + dcache_line_size r2, r3 + sub r3, r2, #1 + tst r0, r3 + bic r0, r0, r3 + dccimvacne r0, r3 + addne r0, r0, r2 + subne r3, r2, #1 @ restore r3, corrupted by v7m's dccimvac + tst r1, r3 + bic r1, r1, r3 + dccimvacne r1, r3 + cmp r0, r1 +1: + dcimvaclo r0, r3 + addlo r0, r0, r2 + cmplo r0, r1 + blo 1b + dsb st + ret lr +ENDPROC(v7m_dma_inv_range) + +/* + * v7m_dma_clean_range(start,end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +v7m_dma_clean_range: + dcache_line_size r2, r3 + sub r3, r2, #1 + bic r0, r0, r3 +1: + dccmvac r0, r3 @ clean D / U line + add r0, r0, r2 + cmp r0, r1 + blo 1b + dsb st + ret lr +ENDPROC(v7m_dma_clean_range) + +/* + * v7m_dma_flush_range(start,end) + * - start - virtual start address of region + * - end - virtual end address of region + */ +ENTRY(v7m_dma_flush_range) + dcache_line_size r2, r3 + sub r3, r2, #1 + bic r0, r0, r3 +1: + dccimvac r0, r3 @ clean & invalidate D / U line + add r0, r0, r2 + cmp r0, r1 + blo 1b + dsb st + ret lr +ENDPROC(v7m_dma_flush_range) + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v7m_dma_map_area) + add r1, r1, r0 + teq r2, #DMA_FROM_DEVICE + beq v7m_dma_inv_range + b v7m_dma_clean_range +ENDPROC(v7m_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(v7m_dma_unmap_area) + add r1, r1, r0 + teq r2, #DMA_TO_DEVICE + bne v7m_dma_inv_range + ret lr +ENDPROC(v7m_dma_unmap_area) + + .globl v7m_flush_kern_cache_louis + .equ v7m_flush_kern_cache_louis, v7m_flush_kern_cache_all + + __INITDATA + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions v7m diff --git a/arch/arm/mm/cache-xsc3l2.c b/arch/arm/mm/cache-xsc3l2.c new file mode 100644 index 0000000000..d20d7af02d --- /dev/null +++ b/arch/arm/mm/cache-xsc3l2.c @@ -0,0 +1,208 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * arch/arm/mm/cache-xsc3l2.c - XScale3 L2 cache controller support + * + * Copyright (C) 2007 ARM Limited + */ +#include <linux/init.h> +#include <linux/highmem.h> +#include <asm/cp15.h> +#include <asm/cputype.h> +#include <asm/cacheflush.h> + +#define CR_L2 (1 << 26) + +#define CACHE_LINE_SIZE 32 +#define CACHE_LINE_SHIFT 5 +#define CACHE_WAY_PER_SET 8 + +#define CACHE_WAY_SIZE(l2ctype) (8192 << (((l2ctype) >> 8) & 0xf)) +#define CACHE_SET_SIZE(l2ctype) (CACHE_WAY_SIZE(l2ctype) >> CACHE_LINE_SHIFT) + +static inline int xsc3_l2_present(void) +{ + unsigned long l2ctype; + + __asm__("mrc p15, 1, %0, c0, c0, 1" : "=r" (l2ctype)); + + return !!(l2ctype & 0xf8); +} + +static inline void xsc3_l2_clean_mva(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c7, c11, 1" : : "r" (addr)); +} + +static inline void xsc3_l2_inv_mva(unsigned long addr) +{ + __asm__("mcr p15, 1, %0, c7, c7, 1" : : "r" (addr)); +} + +static inline void xsc3_l2_inv_all(void) +{ + unsigned long l2ctype, set_way; + int set, way; + + __asm__("mrc p15, 1, %0, c0, c0, 1" : "=r" (l2ctype)); + + for (set = 0; set < CACHE_SET_SIZE(l2ctype); set++) { + for (way = 0; way < CACHE_WAY_PER_SET; way++) { + set_way = (way << 29) | (set << 5); + __asm__("mcr p15, 1, %0, c7, c11, 2" : : "r"(set_way)); + } + } + + dsb(); +} + +static inline void l2_unmap_va(unsigned long va) +{ +#ifdef CONFIG_HIGHMEM + if (va != -1) + kunmap_atomic((void *)va); +#endif +} + +static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va) +{ +#ifdef CONFIG_HIGHMEM + unsigned long va = prev_va & PAGE_MASK; + unsigned long pa_offset = pa << (32 - PAGE_SHIFT); + if (unlikely(pa_offset < (prev_va << (32 - PAGE_SHIFT)))) { + /* + * Switching to a new page. Because cache ops are + * using virtual addresses only, we must put a mapping + * in place for it. + */ + l2_unmap_va(prev_va); + va = (unsigned long)kmap_atomic_pfn(pa >> PAGE_SHIFT); + } + return va + (pa_offset >> (32 - PAGE_SHIFT)); +#else + return __phys_to_virt(pa); +#endif +} + +static void xsc3_l2_inv_range(unsigned long start, unsigned long end) +{ + unsigned long vaddr; + + if (start == 0 && end == -1ul) { + xsc3_l2_inv_all(); + return; + } + + vaddr = -1; /* to force the first mapping */ + + /* + * Clean and invalidate partial first cache line. + */ + if (start & (CACHE_LINE_SIZE - 1)) { + vaddr = l2_map_va(start & ~(CACHE_LINE_SIZE - 1), vaddr); + xsc3_l2_clean_mva(vaddr); + xsc3_l2_inv_mva(vaddr); + start = (start | (CACHE_LINE_SIZE - 1)) + 1; + } + + /* + * Invalidate all full cache lines between 'start' and 'end'. + */ + while (start < (end & ~(CACHE_LINE_SIZE - 1))) { + vaddr = l2_map_va(start, vaddr); + xsc3_l2_inv_mva(vaddr); + start += CACHE_LINE_SIZE; + } + + /* + * Clean and invalidate partial last cache line. + */ + if (start < end) { + vaddr = l2_map_va(start, vaddr); + xsc3_l2_clean_mva(vaddr); + xsc3_l2_inv_mva(vaddr); + } + + l2_unmap_va(vaddr); + + dsb(); +} + +static void xsc3_l2_clean_range(unsigned long start, unsigned long end) +{ + unsigned long vaddr; + + vaddr = -1; /* to force the first mapping */ + + start &= ~(CACHE_LINE_SIZE - 1); + while (start < end) { + vaddr = l2_map_va(start, vaddr); + xsc3_l2_clean_mva(vaddr); + start += CACHE_LINE_SIZE; + } + + l2_unmap_va(vaddr); + + dsb(); +} + +/* + * optimize L2 flush all operation by set/way format + */ +static inline void xsc3_l2_flush_all(void) +{ + unsigned long l2ctype, set_way; + int set, way; + + __asm__("mrc p15, 1, %0, c0, c0, 1" : "=r" (l2ctype)); + + for (set = 0; set < CACHE_SET_SIZE(l2ctype); set++) { + for (way = 0; way < CACHE_WAY_PER_SET; way++) { + set_way = (way << 29) | (set << 5); + __asm__("mcr p15, 1, %0, c7, c15, 2" : : "r"(set_way)); + } + } + + dsb(); +} + +static void xsc3_l2_flush_range(unsigned long start, unsigned long end) +{ + unsigned long vaddr; + + if (start == 0 && end == -1ul) { + xsc3_l2_flush_all(); + return; + } + + vaddr = -1; /* to force the first mapping */ + + start &= ~(CACHE_LINE_SIZE - 1); + while (start < end) { + vaddr = l2_map_va(start, vaddr); + xsc3_l2_clean_mva(vaddr); + xsc3_l2_inv_mva(vaddr); + start += CACHE_LINE_SIZE; + } + + l2_unmap_va(vaddr); + + dsb(); +} + +static int __init xsc3_l2_init(void) +{ + if (!cpu_is_xsc3() || !xsc3_l2_present()) + return 0; + + if (get_cr() & CR_L2) { + pr_info("XScale3 L2 cache enabled.\n"); + xsc3_l2_inv_all(); + + outer_cache.inv_range = xsc3_l2_inv_range; + outer_cache.clean_range = xsc3_l2_clean_range; + outer_cache.flush_range = xsc3_l2_flush_range; + } + + return 0; +} +core_initcall(xsc3_l2_init); diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c new file mode 100644 index 0000000000..4204ffa2d1 --- /dev/null +++ b/arch/arm/mm/context.c @@ -0,0 +1,276 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * linux/arch/arm/mm/context.c + * + * Copyright (C) 2002-2003 Deep Blue Solutions Ltd, all rights reserved. + * Copyright (C) 2012 ARM Limited + * + * Author: Will Deacon <will.deacon@arm.com> + */ +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/percpu.h> + +#include <asm/mmu_context.h> +#include <asm/smp_plat.h> +#include <asm/thread_notify.h> +#include <asm/tlbflush.h> +#include <asm/proc-fns.h> + +/* + * On ARMv6, we have the following structure in the Context ID: + * + * 31 7 0 + * +-------------------------+-----------+ + * | process ID | ASID | + * +-------------------------+-----------+ + * | context ID | + * +-------------------------------------+ + * + * The ASID is used to tag entries in the CPU caches and TLBs. + * The context ID is used by debuggers and trace logic, and + * should be unique within all running processes. + * + * In big endian operation, the two 32 bit words are swapped if accessed + * by non-64-bit operations. + */ +#define ASID_FIRST_VERSION (1ULL << ASID_BITS) +#define NUM_USER_ASIDS ASID_FIRST_VERSION + +static DEFINE_RAW_SPINLOCK(cpu_asid_lock); +static atomic64_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION); +static DECLARE_BITMAP(asid_map, NUM_USER_ASIDS); + +static DEFINE_PER_CPU(atomic64_t, active_asids); +static DEFINE_PER_CPU(u64, reserved_asids); +static cpumask_t tlb_flush_pending; + +#ifdef CONFIG_ARM_ERRATA_798181 +void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm, + cpumask_t *mask) +{ + int cpu; + unsigned long flags; + u64 context_id, asid; + + raw_spin_lock_irqsave(&cpu_asid_lock, flags); + context_id = mm->context.id.counter; + for_each_online_cpu(cpu) { + if (cpu == this_cpu) + continue; + /* + * We only need to send an IPI if the other CPUs are + * running the same ASID as the one being invalidated. + */ + asid = per_cpu(active_asids, cpu).counter; + if (asid == 0) + asid = per_cpu(reserved_asids, cpu); + if (context_id == asid) + cpumask_set_cpu(cpu, mask); + } + raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); +} +#endif + +#ifdef CONFIG_ARM_LPAE +/* + * With LPAE, the ASID and page tables are updated atomicly, so there is + * no need for a reserved set of tables (the active ASID tracking prevents + * any issues across a rollover). + */ +#define cpu_set_reserved_ttbr0() +#else +static void cpu_set_reserved_ttbr0(void) +{ + u32 ttb; + /* + * Copy TTBR1 into TTBR0. + * This points at swapper_pg_dir, which contains only global + * entries so any speculative walks are perfectly safe. + */ + asm volatile( + " mrc p15, 0, %0, c2, c0, 1 @ read TTBR1\n" + " mcr p15, 0, %0, c2, c0, 0 @ set TTBR0\n" + : "=r" (ttb)); + isb(); +} +#endif + +#ifdef CONFIG_PID_IN_CONTEXTIDR +static int contextidr_notifier(struct notifier_block *unused, unsigned long cmd, + void *t) +{ + u32 contextidr; + pid_t pid; + struct thread_info *thread = t; + + if (cmd != THREAD_NOTIFY_SWITCH) + return NOTIFY_DONE; + + pid = task_pid_nr(thread_task(thread)) << ASID_BITS; + asm volatile( + " mrc p15, 0, %0, c13, c0, 1\n" + " and %0, %0, %2\n" + " orr %0, %0, %1\n" + " mcr p15, 0, %0, c13, c0, 1\n" + : "=r" (contextidr), "+r" (pid) + : "I" (~ASID_MASK)); + isb(); + + return NOTIFY_OK; +} + +static struct notifier_block contextidr_notifier_block = { + .notifier_call = contextidr_notifier, +}; + +static int __init contextidr_notifier_init(void) +{ + return thread_register_notifier(&contextidr_notifier_block); +} +arch_initcall(contextidr_notifier_init); +#endif + +static void flush_context(unsigned int cpu) +{ + int i; + u64 asid; + + /* Update the list of reserved ASIDs and the ASID bitmap. */ + bitmap_clear(asid_map, 0, NUM_USER_ASIDS); + for_each_possible_cpu(i) { + asid = atomic64_xchg(&per_cpu(active_asids, i), 0); + /* + * If this CPU has already been through a + * rollover, but hasn't run another task in + * the meantime, we must preserve its reserved + * ASID, as this is the only trace we have of + * the process it is still running. + */ + if (asid == 0) + asid = per_cpu(reserved_asids, i); + __set_bit(asid & ~ASID_MASK, asid_map); + per_cpu(reserved_asids, i) = asid; + } + + /* Queue a TLB invalidate and flush the I-cache if necessary. */ + cpumask_setall(&tlb_flush_pending); + + if (icache_is_vivt_asid_tagged()) + __flush_icache_all(); +} + +static bool check_update_reserved_asid(u64 asid, u64 newasid) +{ + int cpu; + bool hit = false; + + /* + * Iterate over the set of reserved ASIDs looking for a match. + * If we find one, then we can update our mm to use newasid + * (i.e. the same ASID in the current generation) but we can't + * exit the loop early, since we need to ensure that all copies + * of the old ASID are updated to reflect the mm. Failure to do + * so could result in us missing the reserved ASID in a future + * generation. + */ + for_each_possible_cpu(cpu) { + if (per_cpu(reserved_asids, cpu) == asid) { + hit = true; + per_cpu(reserved_asids, cpu) = newasid; + } + } + + return hit; +} + +static u64 new_context(struct mm_struct *mm, unsigned int cpu) +{ + static u32 cur_idx = 1; + u64 asid = atomic64_read(&mm->context.id); + u64 generation = atomic64_read(&asid_generation); + + if (asid != 0) { + u64 newasid = generation | (asid & ~ASID_MASK); + + /* + * If our current ASID was active during a rollover, we + * can continue to use it and this was just a false alarm. + */ + if (check_update_reserved_asid(asid, newasid)) + return newasid; + + /* + * We had a valid ASID in a previous life, so try to re-use + * it if possible., + */ + asid &= ~ASID_MASK; + if (!__test_and_set_bit(asid, asid_map)) + return newasid; + } + + /* + * Allocate a free ASID. If we can't find one, take a note of the + * currently active ASIDs and mark the TLBs as requiring flushes. + * We always count from ASID #1, as we reserve ASID #0 to switch + * via TTBR0 and to avoid speculative page table walks from hitting + * in any partial walk caches, which could be populated from + * overlapping level-1 descriptors used to map both the module + * area and the userspace stack. + */ + asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx); + if (asid == NUM_USER_ASIDS) { + generation = atomic64_add_return(ASID_FIRST_VERSION, + &asid_generation); + flush_context(cpu); + asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1); + } + + __set_bit(asid, asid_map); + cur_idx = asid; + cpumask_clear(mm_cpumask(mm)); + return asid | generation; +} + +void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk) +{ + unsigned long flags; + unsigned int cpu = smp_processor_id(); + u64 asid; + + check_vmalloc_seq(mm); + + /* + * We cannot update the pgd and the ASID atomicly with classic + * MMU, so switch exclusively to global mappings to avoid + * speculative page table walking with the wrong TTBR. + */ + cpu_set_reserved_ttbr0(); + + asid = atomic64_read(&mm->context.id); + if (!((asid ^ atomic64_read(&asid_generation)) >> ASID_BITS) + && atomic64_xchg(&per_cpu(active_asids, cpu), asid)) + goto switch_mm_fastpath; + + raw_spin_lock_irqsave(&cpu_asid_lock, flags); + /* Check that our ASID belongs to the current generation. */ + asid = atomic64_read(&mm->context.id); + if ((asid ^ atomic64_read(&asid_generation)) >> ASID_BITS) { + asid = new_context(mm, cpu); + atomic64_set(&mm->context.id, asid); + } + + if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) { + local_flush_bp_all(); + local_flush_tlb_all(); + } + + atomic64_set(&per_cpu(active_asids, cpu), asid); + cpumask_set_cpu(cpu, mm_cpumask(mm)); + raw_spin_unlock_irqrestore(&cpu_asid_lock, flags); + +switch_mm_fastpath: + cpu_switch_mm(mm->pgd, mm); +} diff --git a/arch/arm/mm/copypage-fa.c b/arch/arm/mm/copypage-fa.c new file mode 100644 index 0000000000..7e28c26f5a --- /dev/null +++ b/arch/arm/mm/copypage-fa.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * linux/arch/arm/lib/copypage-fa.S + * + * Copyright (C) 2005 Faraday Corp. + * Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt> + * + * Based on copypage-v4wb.S: + * Copyright (C) 1995-1999 Russell King + */ +#include <linux/init.h> +#include <linux/highmem.h> + +/* + * Faraday optimised copy_user_page + */ +static void fa_copy_user_page(void *kto, const void *kfrom) +{ + int tmp; + + asm volatile ("\ +1: ldmia %1!, {r3, r4, ip, lr} @ 4\n\ + stmia %0, {r3, r4, ip, lr} @ 4\n\ + mcr p15, 0, %0, c7, c14, 1 @ 1 clean and invalidate D line\n\ + add %0, %0, #16 @ 1\n\ + ldmia %1!, {r3, r4, ip, lr} @ 4\n\ + stmia %0, {r3, r4, ip, lr} @ 4\n\ + mcr p15, 0, %0, c7, c14, 1 @ 1 clean and invalidate D line\n\ + add %0, %0, #16 @ 1\n\ + subs %2, %2, #1 @ 1\n\ + bne 1b @ 1\n\ + mcr p15, 0, %2, c7, c10, 4 @ 1 drain WB" + : "+&r" (kto), "+&r" (kfrom), "=&r" (tmp) + : "2" (PAGE_SIZE / 32) + : "r3", "r4", "ip", "lr"); +} + +void fa_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma) +{ + void *kto, *kfrom; + + kto = kmap_atomic(to); + kfrom = kmap_atomic(from); + fa_copy_user_page(kto, kfrom); + kunmap_atomic(kfrom); + kunmap_atomic(kto); +} + +/* + * Faraday optimised clear_user_page + * + * Same story as above. + */ +void fa_clear_user_highpage(struct page *page, unsigned long vaddr) +{ + void *ptr, *kaddr = kmap_atomic(page); + asm volatile("\ + mov r1, %2 @ 1\n\ + mov r2, #0 @ 1\n\ + mov r3, #0 @ 1\n\ + mov ip, #0 @ 1\n\ + mov lr, #0 @ 1\n\ +1: stmia %0, {r2, r3, ip, lr} @ 4\n\ + mcr p15, 0, %0, c7, c14, 1 @ 1 clean and invalidate D line\n\ + add %0, %0, #16 @ 1\n\ + stmia %0, {r2, r3, ip, lr} @ 4\n\ + mcr p15, 0, %0, c7, c14, 1 @ 1 clean and invalidate D line\n\ + add %0, %0, #16 @ 1\n\ + subs r1, r1, #1 @ 1\n\ + bne 1b @ 1\n\ + mcr p15, 0, r1, c7, c10, 4 @ 1 drain WB" + : "=r" (ptr) + : "0" (kaddr), "I" (PAGE_SIZE / 32) + : "r1", "r2", "r3", "ip", "lr"); + kunmap_atomic(kaddr); +} + +struct cpu_user_fns fa_user_fns __initdata = { + .cpu_clear_user_highpage = fa_clear_user_highpage, + .cpu_copy_user_highpage = fa_copy_user_highpage, +}; diff --git a/arch/arm/mm/copypage-feroceon.c b/arch/arm/mm/copypage-feroceon.c new file mode 100644 index 0000000000..5fc8ef1e66 --- /dev/null +++ b/arch/arm/mm/copypage-feroceon.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * linux/arch/arm/mm/copypage-feroceon.S + * + * Copyright (C) 2008 Marvell Semiconductors + * + * This handles copy_user_highpage and clear_user_page on Feroceon + * more optimally than the generic implementations. + */ +#include <linux/init.h> +#include <linux/highmem.h> + +static void feroceon_copy_user_page(void *kto, const void *kfrom) +{ + int tmp; + + asm volatile ("\ +.arch armv5te \n\ +1: ldmia %1!, {r2 - r7, ip, lr} \n\ + pld [%1, #0] \n\ + pld [%1, #32] \n\ + pld [%1, #64] \n\ + pld [%1, #96] \n\ + pld [%1, #128] \n\ + pld [%1, #160] \n\ + pld [%1, #192] \n\ + stmia %0, {r2 - r7, ip, lr} \n\ + ldmia %1!, {r2 - r7, ip, lr} \n\ + mcr p15, 0, %0, c7, c14, 1 @ clean and invalidate D line\n\ + add %0, %0, #32 \n\ + stmia %0, {r2 - r7, ip, lr} \n\ + ldmia %1!, {r2 - r7, ip, lr} \n\ + mcr p15, 0, %0, c7, c14, 1 @ clean and invalidate D line\n\ + add %0, %0, #32 \n\ + stmia %0, {r2 - r7, ip, lr} \n\ + ldmia %1!, {r2 - r7, ip, lr} \n\ + mcr p15, 0, %0, c7, c14, 1 @ clean and invalidate D line\n\ + add %0, %0, #32 \n\ + stmia %0, {r2 - r7, ip, lr} \n\ + ldmia %1!, {r2 - r7, ip, lr} \n\ + mcr p15, 0, %0, c7, c14, 1 @ clean and invalidate D line\n\ + add %0, %0, #32 \n\ + stmia %0, {r2 - r7, ip, lr} \n\ + ldmia %1!, {r2 - r7, ip, lr} \n\ + mcr p15, 0, %0, c7, c14, 1 @ clean and invalidate D line\n\ + add %0, %0, #32 \n\ + stmia %0, {r2 - r7, ip, lr} \n\ + ldmia %1!, {r2 - r7, ip, lr} \n\ + mcr p15, 0, %0, c7, c14, 1 @ clean and invalidate D line\n\ + add %0, %0, #32 \n\ + stmia %0, {r2 - r7, ip, lr} \n\ + ldmia %1!, {r2 - r7, ip, lr} \n\ + mcr p15, 0, %0, c7, c14, 1 @ clean and invalidate D line\n\ + add %0, %0, #32 \n\ + stmia %0, {r2 - r7, ip, lr} \n\ + subs %2, %2, #(32 * 8) \n\ + mcr p15, 0, %0, c7, c14, 1 @ clean and invalidate D line\n\ + add %0, %0, #32 \n\ + bne 1b \n\ + mcr p15, 0, %2, c7, c10, 4 @ drain WB" + : "+&r" (kto), "+&r" (kfrom), "=&r" (tmp) + : "2" (PAGE_SIZE) + : "r2", "r3", "r4", "r5", "r6", "r7", "ip", "lr"); +} + +void feroceon_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma) +{ + void *kto, *kfrom; + + kto = kmap_atomic(to); + kfrom = kmap_atomic(from); + flush_cache_page(vma, vaddr, page_to_pfn(from)); + feroceon_copy_user_page(kto, kfrom); + kunmap_atomic(kfrom); + kunmap_atomic(kto); +} + +void feroceon_clear_user_highpage(struct page *page, unsigned long vaddr) +{ + void *ptr, *kaddr = kmap_atomic(page); + asm volatile ("\ + mov r1, %2 \n\ + mov r2, #0 \n\ + mov r3, #0 \n\ + mov r4, #0 \n\ + mov r5, #0 \n\ + mov r6, #0 \n\ + mov r7, #0 \n\ + mov ip, #0 \n\ + mov lr, #0 \n\ +1: stmia %0, {r2-r7, ip, lr} \n\ + subs r1, r1, #1 \n\ + mcr p15, 0, %0, c7, c14, 1 @ clean and invalidate D line\n\ + add %0, %0, #32 \n\ + bne 1b \n\ + mcr p15, 0, r1, c7, c10, 4 @ drain WB" + : "=r" (ptr) + : "0" (kaddr), "I" (PAGE_SIZE / 32) + : "r1", "r2", "r3", "r4", "r5", "r6", "r7", "ip", "lr"); + kunmap_atomic(kaddr); +} + +struct cpu_user_fns feroceon_user_fns __initdata = { + .cpu_clear_user_highpage = feroceon_clear_user_highpage, + .cpu_copy_user_highpage = feroceon_copy_user_highpage, +}; + diff --git a/arch/arm/mm/copypage-v4mc.c b/arch/arm/mm/copypage-v4mc.c new file mode 100644 index 0000000000..7ddd82b9fe --- /dev/null +++ b/arch/arm/mm/copypage-v4mc.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * linux/arch/arm/lib/copypage-armv4mc.S + * + * Copyright (C) 1995-2005 Russell King + * + * This handles the mini data cache, as found on SA11x0 and XScale + * processors. When we copy a user page page, we map it in such a way + * that accesses to this page will not touch the main data cache, but + * will be cached in the mini data cache. This prevents us thrashing + * the main data cache on page faults. + */ +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/highmem.h> +#include <linux/pagemap.h> + +#include <asm/tlbflush.h> +#include <asm/cacheflush.h> + +#include "mm.h" + +#define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \ + L_PTE_MT_MINICACHE) + +static DEFINE_RAW_SPINLOCK(minicache_lock); + +/* + * ARMv4 mini-dcache optimised copy_user_highpage + * + * We flush the destination cache lines just before we write the data into the + * corresponding address. Since the Dcache is read-allocate, this removes the + * Dcache aliasing issue. The writes will be forwarded to the write buffer, + * and merged as appropriate. + * + * Note: We rely on all ARMv4 processors implementing the "invalidate D line" + * instruction. If your processor does not supply this, you have to write your + * own copy_user_highpage that does the right thing. + */ +static void mc_copy_user_page(void *from, void *to) +{ + int tmp; + + asm volatile ("\ + .syntax unified\n\ + ldmia %0!, {r2, r3, ip, lr} @ 4\n\ +1: mcr p15, 0, %1, c7, c6, 1 @ 1 invalidate D line\n\ + stmia %1!, {r2, r3, ip, lr} @ 4\n\ + ldmia %0!, {r2, r3, ip, lr} @ 4+1\n\ + stmia %1!, {r2, r3, ip, lr} @ 4\n\ + ldmia %0!, {r2, r3, ip, lr} @ 4\n\ + mcr p15, 0, %1, c7, c6, 1 @ 1 invalidate D line\n\ + stmia %1!, {r2, r3, ip, lr} @ 4\n\ + ldmia %0!, {r2, r3, ip, lr} @ 4\n\ + subs %2, %2, #1 @ 1\n\ + stmia %1!, {r2, r3, ip, lr} @ 4\n\ + ldmiane %0!, {r2, r3, ip, lr} @ 4\n\ + bne 1b @ " + : "+&r" (from), "+&r" (to), "=&r" (tmp) + : "2" (PAGE_SIZE / 64) + : "r2", "r3", "ip", "lr"); +} + +void v4_mc_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma) +{ + struct folio *src = page_folio(from); + void *kto = kmap_atomic(to); + + if (!test_and_set_bit(PG_dcache_clean, &src->flags)) + __flush_dcache_folio(folio_flush_mapping(src), src); + + raw_spin_lock(&minicache_lock); + + set_top_pte(COPYPAGE_MINICACHE, mk_pte(from, minicache_pgprot)); + + mc_copy_user_page((void *)COPYPAGE_MINICACHE, kto); + + raw_spin_unlock(&minicache_lock); + + kunmap_atomic(kto); +} + +/* + * ARMv4 optimised clear_user_page + */ +void v4_mc_clear_user_highpage(struct page *page, unsigned long vaddr) +{ + void *ptr, *kaddr = kmap_atomic(page); + asm volatile("\ + mov r1, %2 @ 1\n\ + mov r2, #0 @ 1\n\ + mov r3, #0 @ 1\n\ + mov ip, #0 @ 1\n\ + mov lr, #0 @ 1\n\ +1: mcr p15, 0, %0, c7, c6, 1 @ 1 invalidate D line\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + mcr p15, 0, %0, c7, c6, 1 @ 1 invalidate D line\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + subs r1, r1, #1 @ 1\n\ + bne 1b @ 1" + : "=r" (ptr) + : "0" (kaddr), "I" (PAGE_SIZE / 64) + : "r1", "r2", "r3", "ip", "lr"); + kunmap_atomic(kaddr); +} + +struct cpu_user_fns v4_mc_user_fns __initdata = { + .cpu_clear_user_highpage = v4_mc_clear_user_highpage, + .cpu_copy_user_highpage = v4_mc_copy_user_highpage, +}; diff --git a/arch/arm/mm/copypage-v4wb.c b/arch/arm/mm/copypage-v4wb.c new file mode 100644 index 0000000000..c3581b2264 --- /dev/null +++ b/arch/arm/mm/copypage-v4wb.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * linux/arch/arm/mm/copypage-v4wb.c + * + * Copyright (C) 1995-1999 Russell King + */ +#include <linux/init.h> +#include <linux/highmem.h> + +/* + * ARMv4 optimised copy_user_highpage + * + * We flush the destination cache lines just before we write the data into the + * corresponding address. Since the Dcache is read-allocate, this removes the + * Dcache aliasing issue. The writes will be forwarded to the write buffer, + * and merged as appropriate. + * + * Note: We rely on all ARMv4 processors implementing the "invalidate D line" + * instruction. If your processor does not supply this, you have to write your + * own copy_user_highpage that does the right thing. + */ +static void v4wb_copy_user_page(void *kto, const void *kfrom) +{ + int tmp; + + asm volatile ("\ + .syntax unified\n\ + ldmia %1!, {r3, r4, ip, lr} @ 4\n\ +1: mcr p15, 0, %0, c7, c6, 1 @ 1 invalidate D line\n\ + stmia %0!, {r3, r4, ip, lr} @ 4\n\ + ldmia %1!, {r3, r4, ip, lr} @ 4+1\n\ + stmia %0!, {r3, r4, ip, lr} @ 4\n\ + ldmia %1!, {r3, r4, ip, lr} @ 4\n\ + mcr p15, 0, %0, c7, c6, 1 @ 1 invalidate D line\n\ + stmia %0!, {r3, r4, ip, lr} @ 4\n\ + ldmia %1!, {r3, r4, ip, lr} @ 4\n\ + subs %2, %2, #1 @ 1\n\ + stmia %0!, {r3, r4, ip, lr} @ 4\n\ + ldmiane %1!, {r3, r4, ip, lr} @ 4\n\ + bne 1b @ 1\n\ + mcr p15, 0, %1, c7, c10, 4 @ 1 drain WB" + : "+&r" (kto), "+&r" (kfrom), "=&r" (tmp) + : "2" (PAGE_SIZE / 64) + : "r3", "r4", "ip", "lr"); +} + +void v4wb_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma) +{ + void *kto, *kfrom; + + kto = kmap_atomic(to); + kfrom = kmap_atomic(from); + flush_cache_page(vma, vaddr, page_to_pfn(from)); + v4wb_copy_user_page(kto, kfrom); + kunmap_atomic(kfrom); + kunmap_atomic(kto); +} + +/* + * ARMv4 optimised clear_user_page + * + * Same story as above. + */ +void v4wb_clear_user_highpage(struct page *page, unsigned long vaddr) +{ + void *ptr, *kaddr = kmap_atomic(page); + asm volatile("\ + mov r1, %2 @ 1\n\ + mov r2, #0 @ 1\n\ + mov r3, #0 @ 1\n\ + mov ip, #0 @ 1\n\ + mov lr, #0 @ 1\n\ +1: mcr p15, 0, %0, c7, c6, 1 @ 1 invalidate D line\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + mcr p15, 0, %0, c7, c6, 1 @ 1 invalidate D line\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + subs r1, r1, #1 @ 1\n\ + bne 1b @ 1\n\ + mcr p15, 0, r1, c7, c10, 4 @ 1 drain WB" + : "=r" (ptr) + : "0" (kaddr), "I" (PAGE_SIZE / 64) + : "r1", "r2", "r3", "ip", "lr"); + kunmap_atomic(kaddr); +} + +struct cpu_user_fns v4wb_user_fns __initdata = { + .cpu_clear_user_highpage = v4wb_clear_user_highpage, + .cpu_copy_user_highpage = v4wb_copy_user_highpage, +}; diff --git a/arch/arm/mm/copypage-v4wt.c b/arch/arm/mm/copypage-v4wt.c new file mode 100644 index 0000000000..1fb1073330 --- /dev/null +++ b/arch/arm/mm/copypage-v4wt.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * linux/arch/arm/mm/copypage-v4wt.S + * + * Copyright (C) 1995-1999 Russell King + * + * This is for CPUs with a writethrough cache and 'flush ID cache' is + * the only supported cache operation. + */ +#include <linux/init.h> +#include <linux/highmem.h> + +/* + * ARMv4 optimised copy_user_highpage + * + * Since we have writethrough caches, we don't have to worry about + * dirty data in the cache. However, we do have to ensure that + * subsequent reads are up to date. + */ +static void v4wt_copy_user_page(void *kto, const void *kfrom) +{ + int tmp; + + asm volatile ("\ + .syntax unified\n\ + ldmia %1!, {r3, r4, ip, lr} @ 4\n\ +1: stmia %0!, {r3, r4, ip, lr} @ 4\n\ + ldmia %1!, {r3, r4, ip, lr} @ 4+1\n\ + stmia %0!, {r3, r4, ip, lr} @ 4\n\ + ldmia %1!, {r3, r4, ip, lr} @ 4\n\ + stmia %0!, {r3, r4, ip, lr} @ 4\n\ + ldmia %1!, {r3, r4, ip, lr} @ 4\n\ + subs %2, %2, #1 @ 1\n\ + stmia %0!, {r3, r4, ip, lr} @ 4\n\ + ldmiane %1!, {r3, r4, ip, lr} @ 4\n\ + bne 1b @ 1\n\ + mcr p15, 0, %2, c7, c7, 0 @ flush ID cache" + : "+&r" (kto), "+&r" (kfrom), "=&r" (tmp) + : "2" (PAGE_SIZE / 64) + : "r3", "r4", "ip", "lr"); +} + +void v4wt_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma) +{ + void *kto, *kfrom; + + kto = kmap_atomic(to); + kfrom = kmap_atomic(from); + v4wt_copy_user_page(kto, kfrom); + kunmap_atomic(kfrom); + kunmap_atomic(kto); +} + +/* + * ARMv4 optimised clear_user_page + * + * Same story as above. + */ +void v4wt_clear_user_highpage(struct page *page, unsigned long vaddr) +{ + void *ptr, *kaddr = kmap_atomic(page); + asm volatile("\ + mov r1, %2 @ 1\n\ + mov r2, #0 @ 1\n\ + mov r3, #0 @ 1\n\ + mov ip, #0 @ 1\n\ + mov lr, #0 @ 1\n\ +1: stmia %0!, {r2, r3, ip, lr} @ 4\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + stmia %0!, {r2, r3, ip, lr} @ 4\n\ + subs r1, r1, #1 @ 1\n\ + bne 1b @ 1\n\ + mcr p15, 0, r2, c7, c7, 0 @ flush ID cache" + : "=r" (ptr) + : "0" (kaddr), "I" (PAGE_SIZE / 64) + : "r1", "r2", "r3", "ip", "lr"); + kunmap_atomic(kaddr); +} + +struct cpu_user_fns v4wt_user_fns __initdata = { + .cpu_clear_user_highpage = v4wt_clear_user_highpage, + .cpu_copy_user_highpage = v4wt_copy_user_highpage, +}; diff --git a/arch/arm/mm/copypage-v6.c b/arch/arm/mm/copypage-v6.c new file mode 100644 index 0000000000..a1a71f36d8 --- /dev/null +++ b/arch/arm/mm/copypage-v6.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * linux/arch/arm/mm/copypage-v6.c + * + * Copyright (C) 2002 Deep Blue Solutions Ltd, All Rights Reserved. + */ +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/mm.h> +#include <linux/highmem.h> +#include <linux/pagemap.h> + +#include <asm/shmparam.h> +#include <asm/tlbflush.h> +#include <asm/cacheflush.h> +#include <asm/cachetype.h> + +#include "mm.h" + +#if SHMLBA > 16384 +#error FIX ME +#endif + +static DEFINE_RAW_SPINLOCK(v6_lock); + +/* + * Copy the user page. No aliasing to deal with so we can just + * attack the kernel's existing mapping of these pages. + */ +static void v6_copy_user_highpage_nonaliasing(struct page *to, + struct page *from, unsigned long vaddr, struct vm_area_struct *vma) +{ + void *kto, *kfrom; + + kfrom = kmap_atomic(from); + kto = kmap_atomic(to); + copy_page(kto, kfrom); + kunmap_atomic(kto); + kunmap_atomic(kfrom); +} + +/* + * Clear the user page. No aliasing to deal with so we can just + * attack the kernel's existing mapping of this page. + */ +static void v6_clear_user_highpage_nonaliasing(struct page *page, unsigned long vaddr) +{ + void *kaddr = kmap_atomic(page); + clear_page(kaddr); + kunmap_atomic(kaddr); +} + +/* + * Discard data in the kernel mapping for the new page. + * FIXME: needs this MCRR to be supported. + */ +static void discard_old_kernel_data(void *kto) +{ + __asm__("mcrr p15, 0, %1, %0, c6 @ 0xec401f06" + : + : "r" (kto), + "r" ((unsigned long)kto + PAGE_SIZE - 1) + : "cc"); +} + +/* + * Copy the page, taking account of the cache colour. + */ +static void v6_copy_user_highpage_aliasing(struct page *to, + struct page *from, unsigned long vaddr, struct vm_area_struct *vma) +{ + struct folio *src = page_folio(from); + unsigned int offset = CACHE_COLOUR(vaddr); + unsigned long kfrom, kto; + + if (!test_and_set_bit(PG_dcache_clean, &src->flags)) + __flush_dcache_folio(folio_flush_mapping(src), src); + + /* FIXME: not highmem safe */ + discard_old_kernel_data(page_address(to)); + + /* + * Now copy the page using the same cache colour as the + * pages ultimate destination. + */ + raw_spin_lock(&v6_lock); + + kfrom = COPYPAGE_V6_FROM + (offset << PAGE_SHIFT); + kto = COPYPAGE_V6_TO + (offset << PAGE_SHIFT); + + set_top_pte(kfrom, mk_pte(from, PAGE_KERNEL)); + set_top_pte(kto, mk_pte(to, PAGE_KERNEL)); + + copy_page((void *)kto, (void *)kfrom); + + raw_spin_unlock(&v6_lock); +} + +/* + * Clear the user page. We need to deal with the aliasing issues, + * so remap the kernel page into the same cache colour as the user + * page. + */ +static void v6_clear_user_highpage_aliasing(struct page *page, unsigned long vaddr) +{ + unsigned long to = COPYPAGE_V6_TO + (CACHE_COLOUR(vaddr) << PAGE_SHIFT); + + /* FIXME: not highmem safe */ + discard_old_kernel_data(page_address(page)); + + /* + * Now clear the page using the same cache colour as + * the pages ultimate destination. + */ + raw_spin_lock(&v6_lock); + + set_top_pte(to, mk_pte(page, PAGE_KERNEL)); + clear_page((void *)to); + + raw_spin_unlock(&v6_lock); +} + +struct cpu_user_fns v6_user_fns __initdata = { + .cpu_clear_user_highpage = v6_clear_user_highpage_nonaliasing, + .cpu_copy_user_highpage = v6_copy_user_highpage_nonaliasing, +}; + +static int __init v6_userpage_init(void) +{ + if (cache_is_vipt_aliasing()) { + cpu_user.cpu_clear_user_highpage = v6_clear_user_highpage_aliasing; + cpu_user.cpu_copy_user_highpage = v6_copy_user_highpage_aliasing; + } + + return 0; +} + +core_initcall(v6_userpage_init); diff --git a/arch/arm/mm/copypage-xsc3.c b/arch/arm/mm/copypage-xsc3.c new file mode 100644 index 0000000000..c86e79677f --- /dev/null +++ b/arch/arm/mm/copypage-xsc3.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * linux/arch/arm/mm/copypage-xsc3.S + * + * Copyright (C) 2004 Intel Corp. + * + * Adapted for 3rd gen XScale core, no more mini-dcache + * Author: Matt Gilbert (matthew.m.gilbert@intel.com) + */ +#include <linux/init.h> +#include <linux/highmem.h> + +/* + * General note: + * We don't really want write-allocate cache behaviour for these functions + * since that will just eat through 8K of the cache. + */ + +/* + * XSC3 optimised copy_user_highpage + * + * The source page may have some clean entries in the cache already, but we + * can safely ignore them - break_cow() will flush them out of the cache + * if we eventually end up using our copied page. + * + */ +static void xsc3_mc_copy_user_page(void *kto, const void *kfrom) +{ + int tmp; + + asm volatile ("\ +.arch xscale \n\ + pld [%1, #0] \n\ + pld [%1, #32] \n\ +1: pld [%1, #64] \n\ + pld [%1, #96] \n\ + \n\ +2: ldrd r2, r3, [%1], #8 \n\ + ldrd r4, r5, [%1], #8 \n\ + mcr p15, 0, %0, c7, c6, 1 @ invalidate\n\ + strd r2, r3, [%0], #8 \n\ + ldrd r2, r3, [%1], #8 \n\ + strd r4, r5, [%0], #8 \n\ + ldrd r4, r5, [%1], #8 \n\ + strd r2, r3, [%0], #8 \n\ + strd r4, r5, [%0], #8 \n\ + ldrd r2, r3, [%1], #8 \n\ + ldrd r4, r5, [%1], #8 \n\ + mcr p15, 0, %0, c7, c6, 1 @ invalidate\n\ + strd r2, r3, [%0], #8 \n\ + ldrd r2, r3, [%1], #8 \n\ + subs %2, %2, #1 \n\ + strd r4, r5, [%0], #8 \n\ + ldrd r4, r5, [%1], #8 \n\ + strd r2, r3, [%0], #8 \n\ + strd r4, r5, [%0], #8 \n\ + bgt 1b \n\ + beq 2b " + : "+&r" (kto), "+&r" (kfrom), "=&r" (tmp) + : "2" (PAGE_SIZE / 64 - 1) + : "r2", "r3", "r4", "r5"); +} + +void xsc3_mc_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma) +{ + void *kto, *kfrom; + + kto = kmap_atomic(to); + kfrom = kmap_atomic(from); + flush_cache_page(vma, vaddr, page_to_pfn(from)); + xsc3_mc_copy_user_page(kto, kfrom); + kunmap_atomic(kfrom); + kunmap_atomic(kto); +} + +/* + * XScale optimised clear_user_page + */ +void xsc3_mc_clear_user_highpage(struct page *page, unsigned long vaddr) +{ + void *ptr, *kaddr = kmap_atomic(page); + asm volatile ("\ +.arch xscale \n\ + mov r1, %2 \n\ + mov r2, #0 \n\ + mov r3, #0 \n\ +1: mcr p15, 0, %0, c7, c6, 1 @ invalidate line\n\ + strd r2, r3, [%0], #8 \n\ + strd r2, r3, [%0], #8 \n\ + strd r2, r3, [%0], #8 \n\ + strd r2, r3, [%0], #8 \n\ + subs r1, r1, #1 \n\ + bne 1b" + : "=r" (ptr) + : "0" (kaddr), "I" (PAGE_SIZE / 32) + : "r1", "r2", "r3"); + kunmap_atomic(kaddr); +} + +struct cpu_user_fns xsc3_mc_user_fns __initdata = { + .cpu_clear_user_highpage = xsc3_mc_clear_user_highpage, + .cpu_copy_user_highpage = xsc3_mc_copy_user_highpage, +}; diff --git a/arch/arm/mm/copypage-xscale.c b/arch/arm/mm/copypage-xscale.c new file mode 100644 index 0000000000..f1e29d3e81 --- /dev/null +++ b/arch/arm/mm/copypage-xscale.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * linux/arch/arm/lib/copypage-xscale.S + * + * Copyright (C) 1995-2005 Russell King + * + * This handles the mini data cache, as found on SA11x0 and XScale + * processors. When we copy a user page page, we map it in such a way + * that accesses to this page will not touch the main data cache, but + * will be cached in the mini data cache. This prevents us thrashing + * the main data cache on page faults. + */ +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/highmem.h> +#include <linux/pagemap.h> + +#include <asm/tlbflush.h> +#include <asm/cacheflush.h> + +#include "mm.h" + +#define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \ + L_PTE_MT_MINICACHE) + +static DEFINE_RAW_SPINLOCK(minicache_lock); + +/* + * XScale mini-dcache optimised copy_user_highpage + * + * We flush the destination cache lines just before we write the data into the + * corresponding address. Since the Dcache is read-allocate, this removes the + * Dcache aliasing issue. The writes will be forwarded to the write buffer, + * and merged as appropriate. + */ +static void mc_copy_user_page(void *from, void *to) +{ + int tmp; + + /* + * Strangely enough, best performance is achieved + * when prefetching destination as well. (NP) + */ + asm volatile ("\ +.arch xscale \n\ + pld [%0, #0] \n\ + pld [%0, #32] \n\ + pld [%1, #0] \n\ + pld [%1, #32] \n\ +1: pld [%0, #64] \n\ + pld [%0, #96] \n\ + pld [%1, #64] \n\ + pld [%1, #96] \n\ +2: ldrd r2, r3, [%0], #8 \n\ + ldrd r4, r5, [%0], #8 \n\ + mov ip, %1 \n\ + strd r2, r3, [%1], #8 \n\ + ldrd r2, r3, [%0], #8 \n\ + strd r4, r5, [%1], #8 \n\ + ldrd r4, r5, [%0], #8 \n\ + strd r2, r3, [%1], #8 \n\ + strd r4, r5, [%1], #8 \n\ + mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\ + ldrd r2, r3, [%0], #8 \n\ + mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\ + ldrd r4, r5, [%0], #8 \n\ + mov ip, %1 \n\ + strd r2, r3, [%1], #8 \n\ + ldrd r2, r3, [%0], #8 \n\ + strd r4, r5, [%1], #8 \n\ + ldrd r4, r5, [%0], #8 \n\ + strd r2, r3, [%1], #8 \n\ + strd r4, r5, [%1], #8 \n\ + mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\ + subs %2, %2, #1 \n\ + mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\ + bgt 1b \n\ + beq 2b " + : "+&r" (from), "+&r" (to), "=&r" (tmp) + : "2" (PAGE_SIZE / 64 - 1) + : "r2", "r3", "r4", "r5", "ip"); +} + +void xscale_mc_copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma) +{ + struct folio *src = page_folio(from); + void *kto = kmap_atomic(to); + + if (!test_and_set_bit(PG_dcache_clean, &src->flags)) + __flush_dcache_folio(folio_flush_mapping(src), src); + + raw_spin_lock(&minicache_lock); + + set_top_pte(COPYPAGE_MINICACHE, mk_pte(from, minicache_pgprot)); + + mc_copy_user_page((void *)COPYPAGE_MINICACHE, kto); + + raw_spin_unlock(&minicache_lock); + + kunmap_atomic(kto); +} + +/* + * XScale optimised clear_user_page + */ +void +xscale_mc_clear_user_highpage(struct page *page, unsigned long vaddr) +{ + void *ptr, *kaddr = kmap_atomic(page); + asm volatile("\ +.arch xscale \n\ + mov r1, %2 \n\ + mov r2, #0 \n\ + mov r3, #0 \n\ +1: mov ip, %0 \n\ + strd r2, r3, [%0], #8 \n\ + strd r2, r3, [%0], #8 \n\ + strd r2, r3, [%0], #8 \n\ + strd r2, r3, [%0], #8 \n\ + mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\ + subs r1, r1, #1 \n\ + mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\ + bne 1b" + : "=r" (ptr) + : "0" (kaddr), "I" (PAGE_SIZE / 32) + : "r1", "r2", "r3", "ip"); + kunmap_atomic(kaddr); +} + +struct cpu_user_fns xscale_mc_user_fns __initdata = { + .cpu_clear_user_highpage = xscale_mc_clear_user_highpage, + .cpu_copy_user_highpage = xscale_mc_copy_user_highpage, +}; diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c new file mode 100644 index 0000000000..cfd9c933d2 --- /dev/null +++ b/arch/arm/mm/dma-mapping-nommu.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Based on linux/arch/arm/mm/dma-mapping.c + * + * Copyright (C) 2000-2004 Russell King + */ + +#include <linux/dma-map-ops.h> +#include <asm/cachetype.h> +#include <asm/cacheflush.h> +#include <asm/outercache.h> +#include <asm/cp15.h> + +#include "dma.h" + +void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, + enum dma_data_direction dir) +{ + dmac_map_area(__va(paddr), size, dir); + + if (dir == DMA_FROM_DEVICE) + outer_inv_range(paddr, paddr + size); + else + outer_clean_range(paddr, paddr + size); +} + +void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, + enum dma_data_direction dir) +{ + if (dir != DMA_TO_DEVICE) { + outer_inv_range(paddr, paddr + size); + dmac_unmap_area(__va(paddr), size, dir); + } +} + +void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, + const struct iommu_ops *iommu, bool coherent) +{ + if (IS_ENABLED(CONFIG_CPU_V7M)) { + /* + * Cache support for v7m is optional, so can be treated as + * coherent if no cache has been detected. Note that it is not + * enough to check if MPU is in use or not since in absense of + * MPU system memory map is used. + */ + dev->dma_coherent = cacheid ? coherent : true; + } else { + /* + * Assume coherent DMA in case MMU/MPU has not been set up. + */ + dev->dma_coherent = (get_cr() & CR_M) ? coherent : true; + } +} diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c new file mode 100644 index 0000000000..5409225b4a --- /dev/null +++ b/arch/arm/mm/dma-mapping.c @@ -0,0 +1,1822 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * linux/arch/arm/mm/dma-mapping.c + * + * Copyright (C) 2000-2004 Russell King + * + * DMA uncached mapping support. + */ +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/genalloc.h> +#include <linux/gfp.h> +#include <linux/errno.h> +#include <linux/list.h> +#include <linux/init.h> +#include <linux/device.h> +#include <linux/dma-direct.h> +#include <linux/dma-map-ops.h> +#include <linux/highmem.h> +#include <linux/memblock.h> +#include <linux/slab.h> +#include <linux/iommu.h> +#include <linux/io.h> +#include <linux/vmalloc.h> +#include <linux/sizes.h> +#include <linux/cma.h> + +#include <asm/page.h> +#include <asm/highmem.h> +#include <asm/cacheflush.h> +#include <asm/tlbflush.h> +#include <asm/mach/arch.h> +#include <asm/dma-iommu.h> +#include <asm/mach/map.h> +#include <asm/system_info.h> +#include <asm/xen/xen-ops.h> + +#include "dma.h" +#include "mm.h" + +struct arm_dma_alloc_args { + struct device *dev; + size_t size; + gfp_t gfp; + pgprot_t prot; + const void *caller; + bool want_vaddr; + int coherent_flag; +}; + +struct arm_dma_free_args { + struct device *dev; + size_t size; + void *cpu_addr; + struct page *page; + bool want_vaddr; +}; + +#define NORMAL 0 +#define COHERENT 1 + +struct arm_dma_allocator { + void *(*alloc)(struct arm_dma_alloc_args *args, + struct page **ret_page); + void (*free)(struct arm_dma_free_args *args); +}; + +struct arm_dma_buffer { + struct list_head list; + void *virt; + struct arm_dma_allocator *allocator; +}; + +static LIST_HEAD(arm_dma_bufs); +static DEFINE_SPINLOCK(arm_dma_bufs_lock); + +static struct arm_dma_buffer *arm_dma_buffer_find(void *virt) +{ + struct arm_dma_buffer *buf, *found = NULL; + unsigned long flags; + + spin_lock_irqsave(&arm_dma_bufs_lock, flags); + list_for_each_entry(buf, &arm_dma_bufs, list) { + if (buf->virt == virt) { + list_del(&buf->list); + found = buf; + break; + } + } + spin_unlock_irqrestore(&arm_dma_bufs_lock, flags); + return found; +} + +/* + * The DMA API is built upon the notion of "buffer ownership". A buffer + * is either exclusively owned by the CPU (and therefore may be accessed + * by it) or exclusively owned by the DMA device. These helper functions + * represent the transitions between these two ownership states. + * + * Note, however, that on later ARMs, this notion does not work due to + * speculative prefetches. We model our approach on the assumption that + * the CPU does do speculative prefetches, which means we clean caches + * before transfers and delay cache invalidation until transfer completion. + * + */ + +static void __dma_clear_buffer(struct page *page, size_t size, int coherent_flag) +{ + /* + * Ensure that the allocated pages are zeroed, and that any data + * lurking in the kernel direct-mapped region is invalidated. + */ + if (PageHighMem(page)) { + phys_addr_t base = __pfn_to_phys(page_to_pfn(page)); + phys_addr_t end = base + size; + while (size > 0) { + void *ptr = kmap_atomic(page); + memset(ptr, 0, PAGE_SIZE); + if (coherent_flag != COHERENT) + dmac_flush_range(ptr, ptr + PAGE_SIZE); + kunmap_atomic(ptr); + page++; + size -= PAGE_SIZE; + } + if (coherent_flag != COHERENT) + outer_flush_range(base, end); + } else { + void *ptr = page_address(page); + memset(ptr, 0, size); + if (coherent_flag != COHERENT) { + dmac_flush_range(ptr, ptr + size); + outer_flush_range(__pa(ptr), __pa(ptr) + size); + } + } +} + +/* + * Allocate a DMA buffer for 'dev' of size 'size' using the + * specified gfp mask. Note that 'size' must be page aligned. + */ +static struct page *__dma_alloc_buffer(struct device *dev, size_t size, + gfp_t gfp, int coherent_flag) +{ + unsigned long order = get_order(size); + struct page *page, *p, *e; + + page = alloc_pages(gfp, order); + if (!page) + return NULL; + + /* + * Now split the huge page and free the excess pages + */ + split_page(page, order); + for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++) + __free_page(p); + + __dma_clear_buffer(page, size, coherent_flag); + + return page; +} + +/* + * Free a DMA buffer. 'size' must be page aligned. + */ +static void __dma_free_buffer(struct page *page, size_t size) +{ + struct page *e = page + (size >> PAGE_SHIFT); + + while (page < e) { + __free_page(page); + page++; + } +} + +static void *__alloc_from_contiguous(struct device *dev, size_t size, + pgprot_t prot, struct page **ret_page, + const void *caller, bool want_vaddr, + int coherent_flag, gfp_t gfp); + +static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, + pgprot_t prot, struct page **ret_page, + const void *caller, bool want_vaddr); + +#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K +static struct gen_pool *atomic_pool __ro_after_init; + +static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE; + +static int __init early_coherent_pool(char *p) +{ + atomic_pool_size = memparse(p, &p); + return 0; +} +early_param("coherent_pool", early_coherent_pool); + +/* + * Initialise the coherent pool for atomic allocations. + */ +static int __init atomic_pool_init(void) +{ + pgprot_t prot = pgprot_dmacoherent(PAGE_KERNEL); + gfp_t gfp = GFP_KERNEL | GFP_DMA; + struct page *page; + void *ptr; + + atomic_pool = gen_pool_create(PAGE_SHIFT, -1); + if (!atomic_pool) + goto out; + /* + * The atomic pool is only used for non-coherent allocations + * so we must pass NORMAL for coherent_flag. + */ + if (dev_get_cma_area(NULL)) + ptr = __alloc_from_contiguous(NULL, atomic_pool_size, prot, + &page, atomic_pool_init, true, NORMAL, + GFP_KERNEL); + else + ptr = __alloc_remap_buffer(NULL, atomic_pool_size, gfp, prot, + &page, atomic_pool_init, true); + if (ptr) { + int ret; + + ret = gen_pool_add_virt(atomic_pool, (unsigned long)ptr, + page_to_phys(page), + atomic_pool_size, -1); + if (ret) + goto destroy_genpool; + + gen_pool_set_algo(atomic_pool, + gen_pool_first_fit_order_align, + NULL); + pr_info("DMA: preallocated %zu KiB pool for atomic coherent allocations\n", + atomic_pool_size / 1024); + return 0; + } + +destroy_genpool: + gen_pool_destroy(atomic_pool); + atomic_pool = NULL; +out: + pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n", + atomic_pool_size / 1024); + return -ENOMEM; +} +/* + * CMA is activated by core_initcall, so we must be called after it. + */ +postcore_initcall(atomic_pool_init); + +#ifdef CONFIG_CMA_AREAS +struct dma_contig_early_reserve { + phys_addr_t base; + unsigned long size; +}; + +static struct dma_contig_early_reserve dma_mmu_remap[MAX_CMA_AREAS] __initdata; + +static int dma_mmu_remap_num __initdata; + +#ifdef CONFIG_DMA_CMA +void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) +{ + dma_mmu_remap[dma_mmu_remap_num].base = base; + dma_mmu_remap[dma_mmu_remap_num].size = size; + dma_mmu_remap_num++; +} +#endif + +void __init dma_contiguous_remap(void) +{ + int i; + for (i = 0; i < dma_mmu_remap_num; i++) { + phys_addr_t start = dma_mmu_remap[i].base; + phys_addr_t end = start + dma_mmu_remap[i].size; + struct map_desc map; + unsigned long addr; + + if (end > arm_lowmem_limit) + end = arm_lowmem_limit; + if (start >= end) + continue; + + map.pfn = __phys_to_pfn(start); + map.virtual = __phys_to_virt(start); + map.length = end - start; + map.type = MT_MEMORY_DMA_READY; + + /* + * Clear previous low-memory mapping to ensure that the + * TLB does not see any conflicting entries, then flush + * the TLB of the old entries before creating new mappings. + * + * This ensures that any speculatively loaded TLB entries + * (even though they may be rare) can not cause any problems, + * and ensures that this code is architecturally compliant. + */ + for (addr = __phys_to_virt(start); addr < __phys_to_virt(end); + addr += PMD_SIZE) + pmd_clear(pmd_off_k(addr)); + + flush_tlb_kernel_range(__phys_to_virt(start), + __phys_to_virt(end)); + + iotable_init(&map, 1); + } +} +#endif + +static int __dma_update_pte(pte_t *pte, unsigned long addr, void *data) +{ + struct page *page = virt_to_page((void *)addr); + pgprot_t prot = *(pgprot_t *)data; + + set_pte_ext(pte, mk_pte(page, prot), 0); + return 0; +} + +static void __dma_remap(struct page *page, size_t size, pgprot_t prot) +{ + unsigned long start = (unsigned long) page_address(page); + unsigned end = start + size; + + apply_to_page_range(&init_mm, start, size, __dma_update_pte, &prot); + flush_tlb_kernel_range(start, end); +} + +static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, + pgprot_t prot, struct page **ret_page, + const void *caller, bool want_vaddr) +{ + struct page *page; + void *ptr = NULL; + /* + * __alloc_remap_buffer is only called when the device is + * non-coherent + */ + page = __dma_alloc_buffer(dev, size, gfp, NORMAL); + if (!page) + return NULL; + if (!want_vaddr) + goto out; + + ptr = dma_common_contiguous_remap(page, size, prot, caller); + if (!ptr) { + __dma_free_buffer(page, size); + return NULL; + } + + out: + *ret_page = page; + return ptr; +} + +static void *__alloc_from_pool(size_t size, struct page **ret_page) +{ + unsigned long val; + void *ptr = NULL; + + if (!atomic_pool) { + WARN(1, "coherent pool not initialised!\n"); + return NULL; + } + + val = gen_pool_alloc(atomic_pool, size); + if (val) { + phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val); + + *ret_page = phys_to_page(phys); + ptr = (void *)val; + } + + return ptr; +} + +static bool __in_atomic_pool(void *start, size_t size) +{ + return gen_pool_has_addr(atomic_pool, (unsigned long)start, size); +} + +static int __free_from_pool(void *start, size_t size) +{ + if (!__in_atomic_pool(start, size)) + return 0; + + gen_pool_free(atomic_pool, (unsigned long)start, size); + + return 1; +} + +static void *__alloc_from_contiguous(struct device *dev, size_t size, + pgprot_t prot, struct page **ret_page, + const void *caller, bool want_vaddr, + int coherent_flag, gfp_t gfp) +{ + unsigned long order = get_order(size); + size_t count = size >> PAGE_SHIFT; + struct page *page; + void *ptr = NULL; + + page = dma_alloc_from_contiguous(dev, count, order, gfp & __GFP_NOWARN); + if (!page) + return NULL; + + __dma_clear_buffer(page, size, coherent_flag); + + if (!want_vaddr) + goto out; + + if (PageHighMem(page)) { + ptr = dma_common_contiguous_remap(page, size, prot, caller); + if (!ptr) { + dma_release_from_contiguous(dev, page, count); + return NULL; + } + } else { + __dma_remap(page, size, prot); + ptr = page_address(page); + } + + out: + *ret_page = page; + return ptr; +} + +static void __free_from_contiguous(struct device *dev, struct page *page, + void *cpu_addr, size_t size, bool want_vaddr) +{ + if (want_vaddr) { + if (PageHighMem(page)) + dma_common_free_remap(cpu_addr, size); + else + __dma_remap(page, size, PAGE_KERNEL); + } + dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT); +} + +static inline pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot) +{ + prot = (attrs & DMA_ATTR_WRITE_COMBINE) ? + pgprot_writecombine(prot) : + pgprot_dmacoherent(prot); + return prot; +} + +static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp, + struct page **ret_page) +{ + struct page *page; + /* __alloc_simple_buffer is only called when the device is coherent */ + page = __dma_alloc_buffer(dev, size, gfp, COHERENT); + if (!page) + return NULL; + + *ret_page = page; + return page_address(page); +} + +static void *simple_allocator_alloc(struct arm_dma_alloc_args *args, + struct page **ret_page) +{ + return __alloc_simple_buffer(args->dev, args->size, args->gfp, + ret_page); +} + +static void simple_allocator_free(struct arm_dma_free_args *args) +{ + __dma_free_buffer(args->page, args->size); +} + +static struct arm_dma_allocator simple_allocator = { + .alloc = simple_allocator_alloc, + .free = simple_allocator_free, +}; + +static void *cma_allocator_alloc(struct arm_dma_alloc_args *args, + struct page **ret_page) +{ + return __alloc_from_contiguous(args->dev, args->size, args->prot, + ret_page, args->caller, + args->want_vaddr, args->coherent_flag, + args->gfp); +} + +static void cma_allocator_free(struct arm_dma_free_args *args) +{ + __free_from_contiguous(args->dev, args->page, args->cpu_addr, + args->size, args->want_vaddr); +} + +static struct arm_dma_allocator cma_allocator = { + .alloc = cma_allocator_alloc, + .free = cma_allocator_free, +}; + +static void *pool_allocator_alloc(struct arm_dma_alloc_args *args, + struct page **ret_page) +{ + return __alloc_from_pool(args->size, ret_page); +} + +static void pool_allocator_free(struct arm_dma_free_args *args) +{ + __free_from_pool(args->cpu_addr, args->size); +} + +static struct arm_dma_allocator pool_allocator = { + .alloc = pool_allocator_alloc, + .free = pool_allocator_free, +}; + +static void *remap_allocator_alloc(struct arm_dma_alloc_args *args, + struct page **ret_page) +{ + return __alloc_remap_buffer(args->dev, args->size, args->gfp, + args->prot, ret_page, args->caller, + args->want_vaddr); +} + +static void remap_allocator_free(struct arm_dma_free_args *args) +{ + if (args->want_vaddr) + dma_common_free_remap(args->cpu_addr, args->size); + + __dma_free_buffer(args->page, args->size); +} + +static struct arm_dma_allocator remap_allocator = { + .alloc = remap_allocator_alloc, + .free = remap_allocator_free, +}; + +static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, + gfp_t gfp, pgprot_t prot, bool is_coherent, + unsigned long attrs, const void *caller) +{ + u64 mask = min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit); + struct page *page = NULL; + void *addr; + bool allowblock, cma; + struct arm_dma_buffer *buf; + struct arm_dma_alloc_args args = { + .dev = dev, + .size = PAGE_ALIGN(size), + .gfp = gfp, + .prot = prot, + .caller = caller, + .want_vaddr = ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0), + .coherent_flag = is_coherent ? COHERENT : NORMAL, + }; + +#ifdef CONFIG_DMA_API_DEBUG + u64 limit = (mask + 1) & ~mask; + if (limit && size >= limit) { + dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n", + size, mask); + return NULL; + } +#endif + + buf = kzalloc(sizeof(*buf), + gfp & ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM)); + if (!buf) + return NULL; + + if (mask < 0xffffffffULL) + gfp |= GFP_DMA; + + args.gfp = gfp; + + *handle = DMA_MAPPING_ERROR; + allowblock = gfpflags_allow_blocking(gfp); + cma = allowblock ? dev_get_cma_area(dev) : NULL; + + if (cma) + buf->allocator = &cma_allocator; + else if (is_coherent) + buf->allocator = &simple_allocator; + else if (allowblock) + buf->allocator = &remap_allocator; + else + buf->allocator = &pool_allocator; + + addr = buf->allocator->alloc(&args, &page); + + if (page) { + unsigned long flags; + + *handle = phys_to_dma(dev, page_to_phys(page)); + buf->virt = args.want_vaddr ? addr : page; + + spin_lock_irqsave(&arm_dma_bufs_lock, flags); + list_add(&buf->list, &arm_dma_bufs); + spin_unlock_irqrestore(&arm_dma_bufs_lock, flags); + } else { + kfree(buf); + } + + return args.want_vaddr ? addr : page; +} + +/* + * Free a buffer as defined by the above mapping. + */ +static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, unsigned long attrs, + bool is_coherent) +{ + struct page *page = phys_to_page(dma_to_phys(dev, handle)); + struct arm_dma_buffer *buf; + struct arm_dma_free_args args = { + .dev = dev, + .size = PAGE_ALIGN(size), + .cpu_addr = cpu_addr, + .page = page, + .want_vaddr = ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0), + }; + + buf = arm_dma_buffer_find(cpu_addr); + if (WARN(!buf, "Freeing invalid buffer %p\n", cpu_addr)) + return; + + buf->allocator->free(&args); + kfree(buf); +} + +static void dma_cache_maint_page(struct page *page, unsigned long offset, + size_t size, enum dma_data_direction dir, + void (*op)(const void *, size_t, int)) +{ + unsigned long pfn; + size_t left = size; + + pfn = page_to_pfn(page) + offset / PAGE_SIZE; + offset %= PAGE_SIZE; + + /* + * A single sg entry may refer to multiple physically contiguous + * pages. But we still need to process highmem pages individually. + * If highmem is not configured then the bulk of this loop gets + * optimized out. + */ + do { + size_t len = left; + void *vaddr; + + page = pfn_to_page(pfn); + + if (PageHighMem(page)) { + if (len + offset > PAGE_SIZE) + len = PAGE_SIZE - offset; + + if (cache_is_vipt_nonaliasing()) { + vaddr = kmap_atomic(page); + op(vaddr + offset, len, dir); + kunmap_atomic(vaddr); + } else { + vaddr = kmap_high_get(page); + if (vaddr) { + op(vaddr + offset, len, dir); + kunmap_high(page); + } + } + } else { + vaddr = page_address(page) + offset; + op(vaddr, len, dir); + } + offset = 0; + pfn++; + left -= len; + } while (left); +} + +/* + * Make an area consistent for devices. + * Note: Drivers should NOT use this function directly. + * Use the driver DMA support - see dma-mapping.h (dma_sync_*) + */ +static void __dma_page_cpu_to_dev(struct page *page, unsigned long off, + size_t size, enum dma_data_direction dir) +{ + phys_addr_t paddr; + + dma_cache_maint_page(page, off, size, dir, dmac_map_area); + + paddr = page_to_phys(page) + off; + if (dir == DMA_FROM_DEVICE) { + outer_inv_range(paddr, paddr + size); + } else { + outer_clean_range(paddr, paddr + size); + } + /* FIXME: non-speculating: flush on bidirectional mappings? */ +} + +static void __dma_page_dev_to_cpu(struct page *page, unsigned long off, + size_t size, enum dma_data_direction dir) +{ + phys_addr_t paddr = page_to_phys(page) + off; + + /* FIXME: non-speculating: not required */ + /* in any case, don't bother invalidating if DMA to device */ + if (dir != DMA_TO_DEVICE) { + outer_inv_range(paddr, paddr + size); + + dma_cache_maint_page(page, off, size, dir, dmac_unmap_area); + } + + /* + * Mark the D-cache clean for these pages to avoid extra flushing. + */ + if (dir != DMA_TO_DEVICE && size >= PAGE_SIZE) { + struct folio *folio = pfn_folio(paddr / PAGE_SIZE); + size_t offset = offset_in_folio(folio, paddr); + + for (;;) { + size_t sz = folio_size(folio) - offset; + + if (size < sz) + break; + if (!offset) + set_bit(PG_dcache_clean, &folio->flags); + offset = 0; + size -= sz; + if (!size) + break; + folio = folio_next(folio); + } + } +} + +#ifdef CONFIG_ARM_DMA_USE_IOMMU + +static int __dma_info_to_prot(enum dma_data_direction dir, unsigned long attrs) +{ + int prot = 0; + + if (attrs & DMA_ATTR_PRIVILEGED) + prot |= IOMMU_PRIV; + + switch (dir) { + case DMA_BIDIRECTIONAL: + return prot | IOMMU_READ | IOMMU_WRITE; + case DMA_TO_DEVICE: + return prot | IOMMU_READ; + case DMA_FROM_DEVICE: + return prot | IOMMU_WRITE; + default: + return prot; + } +} + +/* IOMMU */ + +static int extend_iommu_mapping(struct dma_iommu_mapping *mapping); + +static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping, + size_t size) +{ + unsigned int order = get_order(size); + unsigned int align = 0; + unsigned int count, start; + size_t mapping_size = mapping->bits << PAGE_SHIFT; + unsigned long flags; + dma_addr_t iova; + int i; + + if (order > CONFIG_ARM_DMA_IOMMU_ALIGNMENT) + order = CONFIG_ARM_DMA_IOMMU_ALIGNMENT; + + count = PAGE_ALIGN(size) >> PAGE_SHIFT; + align = (1 << order) - 1; + + spin_lock_irqsave(&mapping->lock, flags); + for (i = 0; i < mapping->nr_bitmaps; i++) { + start = bitmap_find_next_zero_area(mapping->bitmaps[i], + mapping->bits, 0, count, align); + + if (start > mapping->bits) + continue; + + bitmap_set(mapping->bitmaps[i], start, count); + break; + } + + /* + * No unused range found. Try to extend the existing mapping + * and perform a second attempt to reserve an IO virtual + * address range of size bytes. + */ + if (i == mapping->nr_bitmaps) { + if (extend_iommu_mapping(mapping)) { + spin_unlock_irqrestore(&mapping->lock, flags); + return DMA_MAPPING_ERROR; + } + + start = bitmap_find_next_zero_area(mapping->bitmaps[i], + mapping->bits, 0, count, align); + + if (start > mapping->bits) { + spin_unlock_irqrestore(&mapping->lock, flags); + return DMA_MAPPING_ERROR; + } + + bitmap_set(mapping->bitmaps[i], start, count); + } + spin_unlock_irqrestore(&mapping->lock, flags); + + iova = mapping->base + (mapping_size * i); + iova += start << PAGE_SHIFT; + + return iova; +} + +static inline void __free_iova(struct dma_iommu_mapping *mapping, + dma_addr_t addr, size_t size) +{ + unsigned int start, count; + size_t mapping_size = mapping->bits << PAGE_SHIFT; + unsigned long flags; + dma_addr_t bitmap_base; + u32 bitmap_index; + + if (!size) + return; + + bitmap_index = (u32) (addr - mapping->base) / (u32) mapping_size; + BUG_ON(addr < mapping->base || bitmap_index > mapping->extensions); + + bitmap_base = mapping->base + mapping_size * bitmap_index; + + start = (addr - bitmap_base) >> PAGE_SHIFT; + + if (addr + size > bitmap_base + mapping_size) { + /* + * The address range to be freed reaches into the iova + * range of the next bitmap. This should not happen as + * we don't allow this in __alloc_iova (at the + * moment). + */ + BUG(); + } else + count = size >> PAGE_SHIFT; + + spin_lock_irqsave(&mapping->lock, flags); + bitmap_clear(mapping->bitmaps[bitmap_index], start, count); + spin_unlock_irqrestore(&mapping->lock, flags); +} + +/* We'll try 2M, 1M, 64K, and finally 4K; array must end with 0! */ +static const int iommu_order_array[] = { 9, 8, 4, 0 }; + +static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, + gfp_t gfp, unsigned long attrs, + int coherent_flag) +{ + struct page **pages; + int count = size >> PAGE_SHIFT; + int array_size = count * sizeof(struct page *); + int i = 0; + int order_idx = 0; + + if (array_size <= PAGE_SIZE) + pages = kzalloc(array_size, GFP_KERNEL); + else + pages = vzalloc(array_size); + if (!pages) + return NULL; + + if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) + { + unsigned long order = get_order(size); + struct page *page; + + page = dma_alloc_from_contiguous(dev, count, order, + gfp & __GFP_NOWARN); + if (!page) + goto error; + + __dma_clear_buffer(page, size, coherent_flag); + + for (i = 0; i < count; i++) + pages[i] = page + i; + + return pages; + } + + /* Go straight to 4K chunks if caller says it's OK. */ + if (attrs & DMA_ATTR_ALLOC_SINGLE_PAGES) + order_idx = ARRAY_SIZE(iommu_order_array) - 1; + + /* + * IOMMU can map any pages, so himem can also be used here + */ + gfp |= __GFP_NOWARN | __GFP_HIGHMEM; + + while (count) { + int j, order; + + order = iommu_order_array[order_idx]; + + /* Drop down when we get small */ + if (__fls(count) < order) { + order_idx++; + continue; + } + + if (order) { + /* See if it's easy to allocate a high-order chunk */ + pages[i] = alloc_pages(gfp | __GFP_NORETRY, order); + + /* Go down a notch at first sign of pressure */ + if (!pages[i]) { + order_idx++; + continue; + } + } else { + pages[i] = alloc_pages(gfp, 0); + if (!pages[i]) + goto error; + } + + if (order) { + split_page(pages[i], order); + j = 1 << order; + while (--j) + pages[i + j] = pages[i] + j; + } + + __dma_clear_buffer(pages[i], PAGE_SIZE << order, coherent_flag); + i += 1 << order; + count -= 1 << order; + } + + return pages; +error: + while (i--) + if (pages[i]) + __free_pages(pages[i], 0); + kvfree(pages); + return NULL; +} + +static int __iommu_free_buffer(struct device *dev, struct page **pages, + size_t size, unsigned long attrs) +{ + int count = size >> PAGE_SHIFT; + int i; + + if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { + dma_release_from_contiguous(dev, pages[0], count); + } else { + for (i = 0; i < count; i++) + if (pages[i]) + __free_pages(pages[i], 0); + } + + kvfree(pages); + return 0; +} + +/* + * Create a mapping in device IO address space for specified pages + */ +static dma_addr_t +__iommu_create_mapping(struct device *dev, struct page **pages, size_t size, + unsigned long attrs) +{ + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + dma_addr_t dma_addr, iova; + int i; + + dma_addr = __alloc_iova(mapping, size); + if (dma_addr == DMA_MAPPING_ERROR) + return dma_addr; + + iova = dma_addr; + for (i = 0; i < count; ) { + int ret; + + unsigned int next_pfn = page_to_pfn(pages[i]) + 1; + phys_addr_t phys = page_to_phys(pages[i]); + unsigned int len, j; + + for (j = i + 1; j < count; j++, next_pfn++) + if (page_to_pfn(pages[j]) != next_pfn) + break; + + len = (j - i) << PAGE_SHIFT; + ret = iommu_map(mapping->domain, iova, phys, len, + __dma_info_to_prot(DMA_BIDIRECTIONAL, attrs), + GFP_KERNEL); + if (ret < 0) + goto fail; + iova += len; + i = j; + } + return dma_addr; +fail: + iommu_unmap(mapping->domain, dma_addr, iova-dma_addr); + __free_iova(mapping, dma_addr, size); + return DMA_MAPPING_ERROR; +} + +static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t size) +{ + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); + + /* + * add optional in-page offset from iova to size and align + * result to page size + */ + size = PAGE_ALIGN((iova & ~PAGE_MASK) + size); + iova &= PAGE_MASK; + + iommu_unmap(mapping->domain, iova, size); + __free_iova(mapping, iova, size); + return 0; +} + +static struct page **__atomic_get_pages(void *addr) +{ + struct page *page; + phys_addr_t phys; + + phys = gen_pool_virt_to_phys(atomic_pool, (unsigned long)addr); + page = phys_to_page(phys); + + return (struct page **)page; +} + +static struct page **__iommu_get_pages(void *cpu_addr, unsigned long attrs) +{ + if (__in_atomic_pool(cpu_addr, PAGE_SIZE)) + return __atomic_get_pages(cpu_addr); + + if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) + return cpu_addr; + + return dma_common_find_pages(cpu_addr); +} + +static void *__iommu_alloc_simple(struct device *dev, size_t size, gfp_t gfp, + dma_addr_t *handle, int coherent_flag, + unsigned long attrs) +{ + struct page *page; + void *addr; + + if (coherent_flag == COHERENT) + addr = __alloc_simple_buffer(dev, size, gfp, &page); + else + addr = __alloc_from_pool(size, &page); + if (!addr) + return NULL; + + *handle = __iommu_create_mapping(dev, &page, size, attrs); + if (*handle == DMA_MAPPING_ERROR) + goto err_mapping; + + return addr; + +err_mapping: + __free_from_pool(addr, size); + return NULL; +} + +static void __iommu_free_atomic(struct device *dev, void *cpu_addr, + dma_addr_t handle, size_t size, int coherent_flag) +{ + __iommu_remove_mapping(dev, handle, size); + if (coherent_flag == COHERENT) + __dma_free_buffer(virt_to_page(cpu_addr), size); + else + __free_from_pool(cpu_addr, size); +} + +static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp, unsigned long attrs) +{ + pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL); + struct page **pages; + void *addr = NULL; + int coherent_flag = dev->dma_coherent ? COHERENT : NORMAL; + + *handle = DMA_MAPPING_ERROR; + size = PAGE_ALIGN(size); + + if (coherent_flag == COHERENT || !gfpflags_allow_blocking(gfp)) + return __iommu_alloc_simple(dev, size, gfp, handle, + coherent_flag, attrs); + + pages = __iommu_alloc_buffer(dev, size, gfp, attrs, coherent_flag); + if (!pages) + return NULL; + + *handle = __iommu_create_mapping(dev, pages, size, attrs); + if (*handle == DMA_MAPPING_ERROR) + goto err_buffer; + + if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) + return pages; + + addr = dma_common_pages_remap(pages, size, prot, + __builtin_return_address(0)); + if (!addr) + goto err_mapping; + + return addr; + +err_mapping: + __iommu_remove_mapping(dev, *handle, size); +err_buffer: + __iommu_free_buffer(dev, pages, size, attrs); + return NULL; +} + +static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) +{ + struct page **pages = __iommu_get_pages(cpu_addr, attrs); + unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; + int err; + + if (!pages) + return -ENXIO; + + if (vma->vm_pgoff >= nr_pages) + return -ENXIO; + + if (!dev->dma_coherent) + vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); + + err = vm_map_pages(vma, pages, nr_pages); + if (err) + pr_err("Remapping memory failed: %d\n", err); + + return err; +} + +/* + * free a page as defined by the above mapping. + * Must not be called with IRQs disabled. + */ +static void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, unsigned long attrs) +{ + int coherent_flag = dev->dma_coherent ? COHERENT : NORMAL; + struct page **pages; + size = PAGE_ALIGN(size); + + if (coherent_flag == COHERENT || __in_atomic_pool(cpu_addr, size)) { + __iommu_free_atomic(dev, cpu_addr, handle, size, coherent_flag); + return; + } + + pages = __iommu_get_pages(cpu_addr, attrs); + if (!pages) { + WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr); + return; + } + + if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0) + dma_common_free_remap(cpu_addr, size); + + __iommu_remove_mapping(dev, handle, size); + __iommu_free_buffer(dev, pages, size, attrs); +} + +static int arm_iommu_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, + size_t size, unsigned long attrs) +{ + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + struct page **pages = __iommu_get_pages(cpu_addr, attrs); + + if (!pages) + return -ENXIO; + + return sg_alloc_table_from_pages(sgt, pages, count, 0, size, + GFP_KERNEL); +} + +/* + * Map a part of the scatter-gather list into contiguous io address space + */ +static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, + size_t size, dma_addr_t *handle, + enum dma_data_direction dir, unsigned long attrs) +{ + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); + dma_addr_t iova, iova_base; + int ret = 0; + unsigned int count; + struct scatterlist *s; + int prot; + + size = PAGE_ALIGN(size); + *handle = DMA_MAPPING_ERROR; + + iova_base = iova = __alloc_iova(mapping, size); + if (iova == DMA_MAPPING_ERROR) + return -ENOMEM; + + for (count = 0, s = sg; count < (size >> PAGE_SHIFT); s = sg_next(s)) { + phys_addr_t phys = page_to_phys(sg_page(s)); + unsigned int len = PAGE_ALIGN(s->offset + s->length); + + if (!dev->dma_coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); + + prot = __dma_info_to_prot(dir, attrs); + + ret = iommu_map(mapping->domain, iova, phys, len, prot, + GFP_KERNEL); + if (ret < 0) + goto fail; + count += len >> PAGE_SHIFT; + iova += len; + } + *handle = iova_base; + + return 0; +fail: + iommu_unmap(mapping->domain, iova_base, count * PAGE_SIZE); + __free_iova(mapping, iova_base, size); + return ret; +} + +/** + * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to map + * @dir: DMA transfer direction + * + * Map a set of buffers described by scatterlist in streaming mode for DMA. + * The scatter gather list elements are merged together (if possible) and + * tagged with the appropriate dma address and length. They are obtained via + * sg_dma_{address,length}. + */ +static int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir, unsigned long attrs) +{ + struct scatterlist *s = sg, *dma = sg, *start = sg; + int i, count = 0, ret; + unsigned int offset = s->offset; + unsigned int size = s->offset + s->length; + unsigned int max = dma_get_max_seg_size(dev); + + for (i = 1; i < nents; i++) { + s = sg_next(s); + + s->dma_length = 0; + + if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) { + ret = __map_sg_chunk(dev, start, size, + &dma->dma_address, dir, attrs); + if (ret < 0) + goto bad_mapping; + + dma->dma_address += offset; + dma->dma_length = size - offset; + + size = offset = s->offset; + start = s; + dma = sg_next(dma); + count += 1; + } + size += s->length; + } + ret = __map_sg_chunk(dev, start, size, &dma->dma_address, dir, attrs); + if (ret < 0) + goto bad_mapping; + + dma->dma_address += offset; + dma->dma_length = size - offset; + + return count+1; + +bad_mapping: + for_each_sg(sg, s, count, i) + __iommu_remove_mapping(dev, sg_dma_address(s), sg_dma_len(s)); + if (ret == -ENOMEM) + return ret; + return -EINVAL; +} + +/** + * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to unmap (same as was passed to dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + * + * Unmap a set of streaming mode DMA translations. Again, CPU access + * rules concerning calls here are the same as for dma_unmap_single(). + */ +static void arm_iommu_unmap_sg(struct device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction dir, + unsigned long attrs) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) { + if (sg_dma_len(s)) + __iommu_remove_mapping(dev, sg_dma_address(s), + sg_dma_len(s)); + if (!dev->dma_coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + __dma_page_dev_to_cpu(sg_page(s), s->offset, + s->length, dir); + } +} + +/** + * arm_iommu_sync_sg_for_cpu + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to map (returned from dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + */ +static void arm_iommu_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sg, + int nents, enum dma_data_direction dir) +{ + struct scatterlist *s; + int i; + + if (dev->dma_coherent) + return; + + for_each_sg(sg, s, nents, i) + __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir); + +} + +/** + * arm_iommu_sync_sg_for_device + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to map (returned from dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + */ +static void arm_iommu_sync_sg_for_device(struct device *dev, + struct scatterlist *sg, + int nents, enum dma_data_direction dir) +{ + struct scatterlist *s; + int i; + + if (dev->dma_coherent) + return; + + for_each_sg(sg, s, nents, i) + __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); +} + +/** + * arm_iommu_map_page + * @dev: valid struct device pointer + * @page: page that buffer resides in + * @offset: offset into page for start of buffer + * @size: size of buffer to map + * @dir: DMA transfer direction + * + * IOMMU aware version of arm_dma_map_page() + */ +static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); + dma_addr_t dma_addr; + int ret, prot, len = PAGE_ALIGN(size + offset); + + if (!dev->dma_coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + __dma_page_cpu_to_dev(page, offset, size, dir); + + dma_addr = __alloc_iova(mapping, len); + if (dma_addr == DMA_MAPPING_ERROR) + return dma_addr; + + prot = __dma_info_to_prot(dir, attrs); + + ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, + prot, GFP_KERNEL); + if (ret < 0) + goto fail; + + return dma_addr + offset; +fail: + __free_iova(mapping, dma_addr, len); + return DMA_MAPPING_ERROR; +} + +/** + * arm_iommu_unmap_page + * @dev: valid struct device pointer + * @handle: DMA address of buffer + * @size: size of buffer (same as passed to dma_map_page) + * @dir: DMA transfer direction (same as passed to dma_map_page) + * + * IOMMU aware version of arm_dma_unmap_page() + */ +static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); + dma_addr_t iova = handle & PAGE_MASK; + struct page *page; + int offset = handle & ~PAGE_MASK; + int len = PAGE_ALIGN(size + offset); + + if (!iova) + return; + + if (!dev->dma_coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) { + page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); + __dma_page_dev_to_cpu(page, offset, size, dir); + } + + iommu_unmap(mapping->domain, iova, len); + __free_iova(mapping, iova, len); +} + +/** + * arm_iommu_map_resource - map a device resource for DMA + * @dev: valid struct device pointer + * @phys_addr: physical address of resource + * @size: size of resource to map + * @dir: DMA transfer direction + */ +static dma_addr_t arm_iommu_map_resource(struct device *dev, + phys_addr_t phys_addr, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); + dma_addr_t dma_addr; + int ret, prot; + phys_addr_t addr = phys_addr & PAGE_MASK; + unsigned int offset = phys_addr & ~PAGE_MASK; + size_t len = PAGE_ALIGN(size + offset); + + dma_addr = __alloc_iova(mapping, len); + if (dma_addr == DMA_MAPPING_ERROR) + return dma_addr; + + prot = __dma_info_to_prot(dir, attrs) | IOMMU_MMIO; + + ret = iommu_map(mapping->domain, dma_addr, addr, len, prot, GFP_KERNEL); + if (ret < 0) + goto fail; + + return dma_addr + offset; +fail: + __free_iova(mapping, dma_addr, len); + return DMA_MAPPING_ERROR; +} + +/** + * arm_iommu_unmap_resource - unmap a device DMA resource + * @dev: valid struct device pointer + * @dma_handle: DMA address to resource + * @size: size of resource to map + * @dir: DMA transfer direction + */ +static void arm_iommu_unmap_resource(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); + dma_addr_t iova = dma_handle & PAGE_MASK; + unsigned int offset = dma_handle & ~PAGE_MASK; + size_t len = PAGE_ALIGN(size + offset); + + if (!iova) + return; + + iommu_unmap(mapping->domain, iova, len); + __free_iova(mapping, iova, len); +} + +static void arm_iommu_sync_single_for_cpu(struct device *dev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); + dma_addr_t iova = handle & PAGE_MASK; + struct page *page; + unsigned int offset = handle & ~PAGE_MASK; + + if (dev->dma_coherent || !iova) + return; + + page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); + __dma_page_dev_to_cpu(page, offset, size, dir); +} + +static void arm_iommu_sync_single_for_device(struct device *dev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); + dma_addr_t iova = handle & PAGE_MASK; + struct page *page; + unsigned int offset = handle & ~PAGE_MASK; + + if (dev->dma_coherent || !iova) + return; + + page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); + __dma_page_cpu_to_dev(page, offset, size, dir); +} + +static const struct dma_map_ops iommu_ops = { + .alloc = arm_iommu_alloc_attrs, + .free = arm_iommu_free_attrs, + .mmap = arm_iommu_mmap_attrs, + .get_sgtable = arm_iommu_get_sgtable, + + .map_page = arm_iommu_map_page, + .unmap_page = arm_iommu_unmap_page, + .sync_single_for_cpu = arm_iommu_sync_single_for_cpu, + .sync_single_for_device = arm_iommu_sync_single_for_device, + + .map_sg = arm_iommu_map_sg, + .unmap_sg = arm_iommu_unmap_sg, + .sync_sg_for_cpu = arm_iommu_sync_sg_for_cpu, + .sync_sg_for_device = arm_iommu_sync_sg_for_device, + + .map_resource = arm_iommu_map_resource, + .unmap_resource = arm_iommu_unmap_resource, +}; + +/** + * arm_iommu_create_mapping + * @bus: pointer to the bus holding the client device (for IOMMU calls) + * @base: start address of the valid IO address space + * @size: maximum size of the valid IO address space + * + * Creates a mapping structure which holds information about used/unused + * IO address ranges, which is required to perform memory allocation and + * mapping with IOMMU aware functions. + * + * The client device need to be attached to the mapping with + * arm_iommu_attach_device function. + */ +struct dma_iommu_mapping * +arm_iommu_create_mapping(const struct bus_type *bus, dma_addr_t base, u64 size) +{ + unsigned int bits = size >> PAGE_SHIFT; + unsigned int bitmap_size = BITS_TO_LONGS(bits) * sizeof(long); + struct dma_iommu_mapping *mapping; + int extensions = 1; + int err = -ENOMEM; + + /* currently only 32-bit DMA address space is supported */ + if (size > DMA_BIT_MASK(32) + 1) + return ERR_PTR(-ERANGE); + + if (!bitmap_size) + return ERR_PTR(-EINVAL); + + if (bitmap_size > PAGE_SIZE) { + extensions = bitmap_size / PAGE_SIZE; + bitmap_size = PAGE_SIZE; + } + + mapping = kzalloc(sizeof(struct dma_iommu_mapping), GFP_KERNEL); + if (!mapping) + goto err; + + mapping->bitmap_size = bitmap_size; + mapping->bitmaps = kcalloc(extensions, sizeof(unsigned long *), + GFP_KERNEL); + if (!mapping->bitmaps) + goto err2; + + mapping->bitmaps[0] = kzalloc(bitmap_size, GFP_KERNEL); + if (!mapping->bitmaps[0]) + goto err3; + + mapping->nr_bitmaps = 1; + mapping->extensions = extensions; + mapping->base = base; + mapping->bits = BITS_PER_BYTE * bitmap_size; + + spin_lock_init(&mapping->lock); + + mapping->domain = iommu_domain_alloc(bus); + if (!mapping->domain) + goto err4; + + kref_init(&mapping->kref); + return mapping; +err4: + kfree(mapping->bitmaps[0]); +err3: + kfree(mapping->bitmaps); +err2: + kfree(mapping); +err: + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(arm_iommu_create_mapping); + +static void release_iommu_mapping(struct kref *kref) +{ + int i; + struct dma_iommu_mapping *mapping = + container_of(kref, struct dma_iommu_mapping, kref); + + iommu_domain_free(mapping->domain); + for (i = 0; i < mapping->nr_bitmaps; i++) + kfree(mapping->bitmaps[i]); + kfree(mapping->bitmaps); + kfree(mapping); +} + +static int extend_iommu_mapping(struct dma_iommu_mapping *mapping) +{ + int next_bitmap; + + if (mapping->nr_bitmaps >= mapping->extensions) + return -EINVAL; + + next_bitmap = mapping->nr_bitmaps; + mapping->bitmaps[next_bitmap] = kzalloc(mapping->bitmap_size, + GFP_ATOMIC); + if (!mapping->bitmaps[next_bitmap]) + return -ENOMEM; + + mapping->nr_bitmaps++; + + return 0; +} + +void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping) +{ + if (mapping) + kref_put(&mapping->kref, release_iommu_mapping); +} +EXPORT_SYMBOL_GPL(arm_iommu_release_mapping); + +static int __arm_iommu_attach_device(struct device *dev, + struct dma_iommu_mapping *mapping) +{ + int err; + + err = iommu_attach_device(mapping->domain, dev); + if (err) + return err; + + kref_get(&mapping->kref); + to_dma_iommu_mapping(dev) = mapping; + + pr_debug("Attached IOMMU controller to %s device.\n", dev_name(dev)); + return 0; +} + +/** + * arm_iommu_attach_device + * @dev: valid struct device pointer + * @mapping: io address space mapping structure (returned from + * arm_iommu_create_mapping) + * + * Attaches specified io address space mapping to the provided device. + * This replaces the dma operations (dma_map_ops pointer) with the + * IOMMU aware version. + * + * More than one client might be attached to the same io address space + * mapping. + */ +int arm_iommu_attach_device(struct device *dev, + struct dma_iommu_mapping *mapping) +{ + int err; + + err = __arm_iommu_attach_device(dev, mapping); + if (err) + return err; + + set_dma_ops(dev, &iommu_ops); + return 0; +} +EXPORT_SYMBOL_GPL(arm_iommu_attach_device); + +/** + * arm_iommu_detach_device + * @dev: valid struct device pointer + * + * Detaches the provided device from a previously attached map. + * This overwrites the dma_ops pointer with appropriate non-IOMMU ops. + */ +void arm_iommu_detach_device(struct device *dev) +{ + struct dma_iommu_mapping *mapping; + + mapping = to_dma_iommu_mapping(dev); + if (!mapping) { + dev_warn(dev, "Not attached\n"); + return; + } + + iommu_detach_device(mapping->domain, dev); + kref_put(&mapping->kref, release_iommu_mapping); + to_dma_iommu_mapping(dev) = NULL; + set_dma_ops(dev, NULL); + + pr_debug("Detached IOMMU controller from %s device.\n", dev_name(dev)); +} +EXPORT_SYMBOL_GPL(arm_iommu_detach_device); + +static void arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size, + const struct iommu_ops *iommu, bool coherent) +{ + struct dma_iommu_mapping *mapping; + + mapping = arm_iommu_create_mapping(dev->bus, dma_base, size); + if (IS_ERR(mapping)) { + pr_warn("Failed to create %llu-byte IOMMU mapping for device %s\n", + size, dev_name(dev)); + return; + } + + if (__arm_iommu_attach_device(dev, mapping)) { + pr_warn("Failed to attached device %s to IOMMU_mapping\n", + dev_name(dev)); + arm_iommu_release_mapping(mapping); + return; + } + + set_dma_ops(dev, &iommu_ops); +} + +static void arm_teardown_iommu_dma_ops(struct device *dev) +{ + struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); + + if (!mapping) + return; + + arm_iommu_detach_device(dev); + arm_iommu_release_mapping(mapping); +} + +#else + +static void arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size, + const struct iommu_ops *iommu, bool coherent) +{ +} + +static void arm_teardown_iommu_dma_ops(struct device *dev) { } + +#endif /* CONFIG_ARM_DMA_USE_IOMMU */ + +void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, + const struct iommu_ops *iommu, bool coherent) +{ + /* + * Due to legacy code that sets the ->dma_coherent flag from a bus + * notifier we can't just assign coherent to the ->dma_coherent flag + * here, but instead have to make sure we only set but never clear it + * for now. + */ + if (coherent) + dev->dma_coherent = true; + + /* + * Don't override the dma_ops if they have already been set. Ideally + * this should be the only location where dma_ops are set, remove this + * check when all other callers of set_dma_ops will have disappeared. + */ + if (dev->dma_ops) + return; + + if (iommu) + arm_setup_iommu_dma_ops(dev, dma_base, size, iommu, coherent); + + xen_setup_dma_ops(dev); + dev->archdata.dma_ops_setup = true; +} + +void arch_teardown_dma_ops(struct device *dev) +{ + if (!dev->archdata.dma_ops_setup) + return; + + arm_teardown_iommu_dma_ops(dev); + /* Let arch_setup_dma_ops() start again from scratch upon re-probe */ + set_dma_ops(dev, NULL); +} + +void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, + enum dma_data_direction dir) +{ + __dma_page_cpu_to_dev(phys_to_page(paddr), paddr & (PAGE_SIZE - 1), + size, dir); +} + +void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, + enum dma_data_direction dir) +{ + __dma_page_dev_to_cpu(phys_to_page(paddr), paddr & (PAGE_SIZE - 1), + size, dir); +} + +void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t gfp, unsigned long attrs) +{ + return __dma_alloc(dev, size, dma_handle, gfp, + __get_dma_pgprot(attrs, PAGE_KERNEL), false, + attrs, __builtin_return_address(0)); +} + +void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_handle, unsigned long attrs) +{ + __arm_dma_free(dev, size, cpu_addr, dma_handle, attrs, false); +} diff --git a/arch/arm/mm/dma.h b/arch/arm/mm/dma.h new file mode 100644 index 0000000000..aaef64b7f1 --- /dev/null +++ b/arch/arm/mm/dma.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef DMA_H +#define DMA_H + +#include <asm/glue-cache.h> + +#ifndef MULTI_CACHE +#define dmac_map_area __glue(_CACHE,_dma_map_area) +#define dmac_unmap_area __glue(_CACHE,_dma_unmap_area) + +/* + * These are private to the dma-mapping API. Do not use directly. + * Their sole purpose is to ensure that data held in the cache + * is visible to DMA, or data written by DMA to system memory is + * visible to the CPU. + */ +extern void dmac_map_area(const void *, size_t, int); +extern void dmac_unmap_area(const void *, size_t, int); + +#else + +/* + * These are private to the dma-mapping API. Do not use directly. + * Their sole purpose is to ensure that data held in the cache + * is visible to DMA, or data written by DMA to system memory is + * visible to the CPU. + */ +#define dmac_map_area cpu_cache.dma_map_area +#define dmac_unmap_area cpu_cache.dma_unmap_area + +#endif + +#endif diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c new file mode 100644 index 0000000000..a9381095ab --- /dev/null +++ b/arch/arm/mm/dump.c @@ -0,0 +1,479 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Debug helper to dump the current kernel pagetables of the system + * so that we can see what the various memory ranges are set to. + * + * Derived from x86 implementation: + * (C) Copyright 2008 Intel Corporation + * + * Author: Arjan van de Ven <arjan@linux.intel.com> + */ +#include <linux/debugfs.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/seq_file.h> + +#include <asm/domain.h> +#include <asm/fixmap.h> +#include <asm/page.h> +#include <asm/ptdump.h> + +static struct addr_marker address_markers[] = { +#ifdef CONFIG_KASAN + { KASAN_SHADOW_START, "Kasan shadow start"}, + { KASAN_SHADOW_END, "Kasan shadow end"}, +#endif + { MODULES_VADDR, "Modules" }, + { PAGE_OFFSET, "Kernel Mapping" }, + { 0, "vmalloc() Area" }, + { FDT_FIXED_BASE, "FDT Area" }, + { FIXADDR_START, "Fixmap Area" }, + { VECTORS_BASE, "Vectors" }, + { VECTORS_BASE + PAGE_SIZE * 2, "Vectors End" }, + { -1, NULL }, +}; + +#define pt_dump_seq_printf(m, fmt, args...) \ +({ \ + if (m) \ + seq_printf(m, fmt, ##args); \ +}) + +#define pt_dump_seq_puts(m, fmt) \ +({ \ + if (m) \ + seq_printf(m, fmt); \ +}) + +struct pg_state { + struct seq_file *seq; + const struct addr_marker *marker; + unsigned long start_address; + unsigned level; + u64 current_prot; + bool check_wx; + unsigned long wx_pages; + const char *current_domain; +}; + +struct prot_bits { + u64 mask; + u64 val; + const char *set; + const char *clear; + bool ro_bit; + bool nx_bit; +}; + +static const struct prot_bits pte_bits[] = { + { + .mask = L_PTE_USER, + .val = L_PTE_USER, + .set = "USR", + .clear = " ", + }, { + .mask = L_PTE_RDONLY, + .val = L_PTE_RDONLY, + .set = "ro", + .clear = "RW", + .ro_bit = true, + }, { + .mask = L_PTE_XN, + .val = L_PTE_XN, + .set = "NX", + .clear = "x ", + .nx_bit = true, + }, { + .mask = L_PTE_SHARED, + .val = L_PTE_SHARED, + .set = "SHD", + .clear = " ", + }, { + .mask = L_PTE_MT_MASK, + .val = L_PTE_MT_UNCACHED, + .set = "SO/UNCACHED", + }, { + .mask = L_PTE_MT_MASK, + .val = L_PTE_MT_BUFFERABLE, + .set = "MEM/BUFFERABLE/WC", + }, { + .mask = L_PTE_MT_MASK, + .val = L_PTE_MT_WRITETHROUGH, + .set = "MEM/CACHED/WT", + }, { + .mask = L_PTE_MT_MASK, + .val = L_PTE_MT_WRITEBACK, + .set = "MEM/CACHED/WBRA", +#ifndef CONFIG_ARM_LPAE + }, { + .mask = L_PTE_MT_MASK, + .val = L_PTE_MT_MINICACHE, + .set = "MEM/MINICACHE", +#endif + }, { + .mask = L_PTE_MT_MASK, + .val = L_PTE_MT_WRITEALLOC, + .set = "MEM/CACHED/WBWA", + }, { + .mask = L_PTE_MT_MASK, + .val = L_PTE_MT_DEV_SHARED, + .set = "DEV/SHARED", +#ifndef CONFIG_ARM_LPAE + }, { + .mask = L_PTE_MT_MASK, + .val = L_PTE_MT_DEV_NONSHARED, + .set = "DEV/NONSHARED", +#endif + }, { + .mask = L_PTE_MT_MASK, + .val = L_PTE_MT_DEV_WC, + .set = "DEV/WC", + }, { + .mask = L_PTE_MT_MASK, + .val = L_PTE_MT_DEV_CACHED, + .set = "DEV/CACHED", + }, +}; + +static const struct prot_bits section_bits[] = { +#ifdef CONFIG_ARM_LPAE + { + .mask = PMD_SECT_USER, + .val = PMD_SECT_USER, + .set = "USR", + }, { + .mask = L_PMD_SECT_RDONLY | PMD_SECT_AP2, + .val = L_PMD_SECT_RDONLY | PMD_SECT_AP2, + .set = "ro", + .clear = "RW", + .ro_bit = true, +#elif __LINUX_ARM_ARCH__ >= 6 + { + .mask = PMD_SECT_APX | PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, + .val = PMD_SECT_APX | PMD_SECT_AP_WRITE, + .set = " ro", + .ro_bit = true, + }, { + .mask = PMD_SECT_APX | PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, + .val = PMD_SECT_AP_WRITE, + .set = " RW", + }, { + .mask = PMD_SECT_APX | PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, + .val = PMD_SECT_AP_READ, + .set = "USR ro", + }, { + .mask = PMD_SECT_APX | PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, + .val = PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, + .set = "USR RW", +#else /* ARMv4/ARMv5 */ + /* These are approximate */ + { + .mask = PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, + .val = 0, + .set = " ro", + .ro_bit = true, + }, { + .mask = PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, + .val = PMD_SECT_AP_WRITE, + .set = " RW", + }, { + .mask = PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, + .val = PMD_SECT_AP_READ, + .set = "USR ro", + }, { + .mask = PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, + .val = PMD_SECT_AP_READ | PMD_SECT_AP_WRITE, + .set = "USR RW", +#endif + }, { + .mask = PMD_SECT_XN, + .val = PMD_SECT_XN, + .set = "NX", + .clear = "x ", + .nx_bit = true, + }, { + .mask = PMD_SECT_S, + .val = PMD_SECT_S, + .set = "SHD", + .clear = " ", + }, +}; + +struct pg_level { + const char *name; + const struct prot_bits *bits; + size_t num; + u64 mask; + const struct prot_bits *ro_bit; + const struct prot_bits *nx_bit; +}; + +static struct pg_level pg_level[] = { + { + }, { /* pgd */ + }, { /* p4d */ + }, { /* pud */ + }, { /* pmd */ + .name = (CONFIG_PGTABLE_LEVELS > 2) ? "PMD" : "PGD", + .bits = section_bits, + .num = ARRAY_SIZE(section_bits), + }, { /* pte */ + .name = "PTE", + .bits = pte_bits, + .num = ARRAY_SIZE(pte_bits), + }, +}; + +static void dump_prot(struct pg_state *st, const struct prot_bits *bits, size_t num) +{ + unsigned i; + + for (i = 0; i < num; i++, bits++) { + const char *s; + + if ((st->current_prot & bits->mask) == bits->val) + s = bits->set; + else + s = bits->clear; + + if (s) + pt_dump_seq_printf(st->seq, " %s", s); + } +} + +static void note_prot_wx(struct pg_state *st, unsigned long addr) +{ + if (!st->check_wx) + return; + if ((st->current_prot & pg_level[st->level].ro_bit->mask) == + pg_level[st->level].ro_bit->val) + return; + if ((st->current_prot & pg_level[st->level].nx_bit->mask) == + pg_level[st->level].nx_bit->val) + return; + + WARN_ONCE(1, "arm/mm: Found insecure W+X mapping at address %pS\n", + (void *)st->start_address); + + st->wx_pages += (addr - st->start_address) / PAGE_SIZE; +} + +static void note_page(struct pg_state *st, unsigned long addr, + unsigned int level, u64 val, const char *domain) +{ + static const char units[] = "KMGTPE"; + u64 prot = val & pg_level[level].mask; + + if (!st->level) { + st->level = level; + st->current_prot = prot; + st->current_domain = domain; + pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + } else if (prot != st->current_prot || level != st->level || + domain != st->current_domain || + addr >= st->marker[1].start_address) { + const char *unit = units; + unsigned long delta; + + if (st->current_prot) { + note_prot_wx(st, addr); + pt_dump_seq_printf(st->seq, "0x%08lx-0x%08lx ", + st->start_address, addr); + + delta = (addr - st->start_address) >> 10; + while (!(delta & 1023) && unit[1]) { + delta >>= 10; + unit++; + } + pt_dump_seq_printf(st->seq, "%9lu%c %s", delta, *unit, + pg_level[st->level].name); + if (st->current_domain) + pt_dump_seq_printf(st->seq, " %s", + st->current_domain); + if (pg_level[st->level].bits) + dump_prot(st, pg_level[st->level].bits, pg_level[st->level].num); + pt_dump_seq_printf(st->seq, "\n"); + } + + if (addr >= st->marker[1].start_address) { + st->marker++; + pt_dump_seq_printf(st->seq, "---[ %s ]---\n", + st->marker->name); + } + st->start_address = addr; + st->current_prot = prot; + st->current_domain = domain; + st->level = level; + } +} + +static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start, + const char *domain) +{ + pte_t *pte = pte_offset_kernel(pmd, 0); + unsigned long addr; + unsigned i; + + for (i = 0; i < PTRS_PER_PTE; i++, pte++) { + addr = start + i * PAGE_SIZE; + note_page(st, addr, 5, pte_val(*pte), domain); + } +} + +static const char *get_domain_name(pmd_t *pmd) +{ +#ifndef CONFIG_ARM_LPAE + switch (pmd_val(*pmd) & PMD_DOMAIN_MASK) { + case PMD_DOMAIN(DOMAIN_KERNEL): + return "KERNEL "; + case PMD_DOMAIN(DOMAIN_USER): + return "USER "; + case PMD_DOMAIN(DOMAIN_IO): + return "IO "; + case PMD_DOMAIN(DOMAIN_VECTORS): + return "VECTORS"; + default: + return "unknown"; + } +#endif + return NULL; +} + +static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) +{ + pmd_t *pmd = pmd_offset(pud, 0); + unsigned long addr; + unsigned i; + const char *domain; + + for (i = 0; i < PTRS_PER_PMD; i++, pmd++) { + addr = start + i * PMD_SIZE; + domain = get_domain_name(pmd); + if (pmd_none(*pmd) || pmd_large(*pmd) || !pmd_present(*pmd)) + note_page(st, addr, 4, pmd_val(*pmd), domain); + else + walk_pte(st, pmd, addr, domain); + + if (SECTION_SIZE < PMD_SIZE && pmd_large(pmd[1])) { + addr += SECTION_SIZE; + pmd++; + domain = get_domain_name(pmd); + note_page(st, addr, 4, pmd_val(*pmd), domain); + } + } +} + +static void walk_pud(struct pg_state *st, p4d_t *p4d, unsigned long start) +{ + pud_t *pud = pud_offset(p4d, 0); + unsigned long addr; + unsigned i; + + for (i = 0; i < PTRS_PER_PUD; i++, pud++) { + addr = start + i * PUD_SIZE; + if (!pud_none(*pud)) { + walk_pmd(st, pud, addr); + } else { + note_page(st, addr, 3, pud_val(*pud), NULL); + } + } +} + +static void walk_p4d(struct pg_state *st, pgd_t *pgd, unsigned long start) +{ + p4d_t *p4d = p4d_offset(pgd, 0); + unsigned long addr; + unsigned i; + + for (i = 0; i < PTRS_PER_P4D; i++, p4d++) { + addr = start + i * P4D_SIZE; + if (!p4d_none(*p4d)) { + walk_pud(st, p4d, addr); + } else { + note_page(st, addr, 2, p4d_val(*p4d), NULL); + } + } +} + +static void walk_pgd(struct pg_state *st, struct mm_struct *mm, + unsigned long start) +{ + pgd_t *pgd = pgd_offset(mm, 0UL); + unsigned i; + unsigned long addr; + + for (i = 0; i < PTRS_PER_PGD; i++, pgd++) { + addr = start + i * PGDIR_SIZE; + if (!pgd_none(*pgd)) { + walk_p4d(st, pgd, addr); + } else { + note_page(st, addr, 1, pgd_val(*pgd), NULL); + } + } +} + +void ptdump_walk_pgd(struct seq_file *m, struct ptdump_info *info) +{ + struct pg_state st = { + .seq = m, + .marker = info->markers, + .check_wx = false, + }; + + walk_pgd(&st, info->mm, info->base_addr); + note_page(&st, 0, 0, 0, NULL); +} + +static void __init ptdump_initialize(void) +{ + unsigned i, j; + + for (i = 0; i < ARRAY_SIZE(pg_level); i++) + if (pg_level[i].bits) + for (j = 0; j < pg_level[i].num; j++) { + pg_level[i].mask |= pg_level[i].bits[j].mask; + if (pg_level[i].bits[j].ro_bit) + pg_level[i].ro_bit = &pg_level[i].bits[j]; + if (pg_level[i].bits[j].nx_bit) + pg_level[i].nx_bit = &pg_level[i].bits[j]; + } +#ifdef CONFIG_KASAN + address_markers[4].start_address = VMALLOC_START; +#else + address_markers[2].start_address = VMALLOC_START; +#endif +} + +static struct ptdump_info kernel_ptdump_info = { + .mm = &init_mm, + .markers = address_markers, + .base_addr = 0, +}; + +void ptdump_check_wx(void) +{ + struct pg_state st = { + .seq = NULL, + .marker = (struct addr_marker[]) { + { 0, NULL}, + { -1, NULL}, + }, + .check_wx = true, + }; + + walk_pgd(&st, &init_mm, 0); + note_page(&st, 0, 0, 0, NULL); + if (st.wx_pages) + pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n", + st.wx_pages); + else + pr_info("Checked W+X mappings: passed, no W+X pages found\n"); +} + +static int __init ptdump_init(void) +{ + ptdump_initialize(); + ptdump_debugfs_register(&kernel_ptdump_info, "kernel_page_tables"); + return 0; +} +__initcall(ptdump_init); diff --git a/arch/arm/mm/extable.c b/arch/arm/mm/extable.c new file mode 100644 index 0000000000..fc33564597 --- /dev/null +++ b/arch/arm/mm/extable.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * linux/arch/arm/mm/extable.c + */ +#include <linux/extable.h> +#include <linux/uaccess.h> + +int fixup_exception(struct pt_regs *regs) +{ + const struct exception_table_entry *fixup; + + fixup = search_exception_tables(instruction_pointer(regs)); + if (fixup) { + regs->ARM_pc = fixup->fixup; +#ifdef CONFIG_THUMB2_KERNEL + /* Clear the IT state to avoid nasty surprises in the fixup */ + regs->ARM_cpsr &= ~PSR_IT_MASK; +#endif + } + + return fixup != NULL; +} diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c new file mode 100644 index 0000000000..2286c2ea60 --- /dev/null +++ b/arch/arm/mm/fault-armv.c @@ -0,0 +1,272 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * linux/arch/arm/mm/fault-armv.c + * + * Copyright (C) 1995 Linus Torvalds + * Modifications for ARM processor (c) 1995-2002 Russell King + */ +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/bitops.h> +#include <linux/vmalloc.h> +#include <linux/init.h> +#include <linux/pagemap.h> +#include <linux/gfp.h> + +#include <asm/bugs.h> +#include <asm/cacheflush.h> +#include <asm/cachetype.h> +#include <asm/tlbflush.h> + +#include "mm.h" + +static pteval_t shared_pte_mask = L_PTE_MT_BUFFERABLE; + +#if __LINUX_ARM_ARCH__ < 6 +/* + * We take the easy way out of this problem - we make the + * PTE uncacheable. However, we leave the write buffer on. + * + * Note that the pte lock held when calling update_mmu_cache must also + * guard the pte (somewhere else in the same mm) that we modify here. + * Therefore those configurations which might call adjust_pte (those + * without CONFIG_CPU_CACHE_VIPT) cannot support split page_table_lock. + */ +static int do_adjust_pte(struct vm_area_struct *vma, unsigned long address, + unsigned long pfn, pte_t *ptep) +{ + pte_t entry = *ptep; + int ret; + + /* + * If this page is present, it's actually being shared. + */ + ret = pte_present(entry); + + /* + * If this page isn't present, or is already setup to + * fault (ie, is old), we can safely ignore any issues. + */ + if (ret && (pte_val(entry) & L_PTE_MT_MASK) != shared_pte_mask) { + flush_cache_page(vma, address, pfn); + outer_flush_range((pfn << PAGE_SHIFT), + (pfn << PAGE_SHIFT) + PAGE_SIZE); + pte_val(entry) &= ~L_PTE_MT_MASK; + pte_val(entry) |= shared_pte_mask; + set_pte_at(vma->vm_mm, address, ptep, entry); + flush_tlb_page(vma, address); + } + + return ret; +} + +#if USE_SPLIT_PTE_PTLOCKS +/* + * If we are using split PTE locks, then we need to take the page + * lock here. Otherwise we are using shared mm->page_table_lock + * which is already locked, thus cannot take it. + */ +static inline void do_pte_lock(spinlock_t *ptl) +{ + /* + * Use nested version here to indicate that we are already + * holding one similar spinlock. + */ + spin_lock_nested(ptl, SINGLE_DEPTH_NESTING); +} + +static inline void do_pte_unlock(spinlock_t *ptl) +{ + spin_unlock(ptl); +} +#else /* !USE_SPLIT_PTE_PTLOCKS */ +static inline void do_pte_lock(spinlock_t *ptl) {} +static inline void do_pte_unlock(spinlock_t *ptl) {} +#endif /* USE_SPLIT_PTE_PTLOCKS */ + +static int adjust_pte(struct vm_area_struct *vma, unsigned long address, + unsigned long pfn) +{ + spinlock_t *ptl; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + int ret; + + pgd = pgd_offset(vma->vm_mm, address); + if (pgd_none_or_clear_bad(pgd)) + return 0; + + p4d = p4d_offset(pgd, address); + if (p4d_none_or_clear_bad(p4d)) + return 0; + + pud = pud_offset(p4d, address); + if (pud_none_or_clear_bad(pud)) + return 0; + + pmd = pmd_offset(pud, address); + if (pmd_none_or_clear_bad(pmd)) + return 0; + + /* + * This is called while another page table is mapped, so we + * must use the nested version. This also means we need to + * open-code the spin-locking. + */ + pte = pte_offset_map_nolock(vma->vm_mm, pmd, address, &ptl); + if (!pte) + return 0; + + do_pte_lock(ptl); + + ret = do_adjust_pte(vma, address, pfn, pte); + + do_pte_unlock(ptl); + pte_unmap(pte); + + return ret; +} + +static void +make_coherent(struct address_space *mapping, struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, unsigned long pfn) +{ + struct mm_struct *mm = vma->vm_mm; + struct vm_area_struct *mpnt; + unsigned long offset; + pgoff_t pgoff; + int aliases = 0; + + pgoff = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT); + + /* + * If we have any shared mappings that are in the same mm + * space, then we need to handle them specially to maintain + * cache coherency. + */ + flush_dcache_mmap_lock(mapping); + vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) { + /* + * If this VMA is not in our MM, we can ignore it. + * Note that we intentionally mask out the VMA + * that we are fixing up. + */ + if (mpnt->vm_mm != mm || mpnt == vma) + continue; + if (!(mpnt->vm_flags & VM_MAYSHARE)) + continue; + offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT; + aliases += adjust_pte(mpnt, mpnt->vm_start + offset, pfn); + } + flush_dcache_mmap_unlock(mapping); + if (aliases) + do_adjust_pte(vma, addr, pfn, ptep); +} + +/* + * Take care of architecture specific things when placing a new PTE into + * a page table, or changing an existing PTE. Basically, there are two + * things that we need to take care of: + * + * 1. If PG_dcache_clean is not set for the page, we need to ensure + * that any cache entries for the kernels virtual memory + * range are written back to the page. + * 2. If we have multiple shared mappings of the same space in + * an object, we need to deal with the cache aliasing issues. + * + * Note that the pte lock will be held. + */ +void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, unsigned int nr) +{ + unsigned long pfn = pte_pfn(*ptep); + struct address_space *mapping; + struct folio *folio; + + if (!pfn_valid(pfn)) + return; + + /* + * The zero page is never written to, so never has any dirty + * cache lines, and therefore never needs to be flushed. + */ + if (is_zero_pfn(pfn)) + return; + + folio = page_folio(pfn_to_page(pfn)); + mapping = folio_flush_mapping(folio); + if (!test_and_set_bit(PG_dcache_clean, &folio->flags)) + __flush_dcache_folio(mapping, folio); + if (mapping) { + if (cache_is_vivt()) + make_coherent(mapping, vma, addr, ptep, pfn); + else if (vma->vm_flags & VM_EXEC) + __flush_icache_all(); + } +} +#endif /* __LINUX_ARM_ARCH__ < 6 */ + +/* + * Check whether the write buffer has physical address aliasing + * issues. If it has, we need to avoid them for the case where + * we have several shared mappings of the same object in user + * space. + */ +static int __init check_writebuffer(unsigned long *p1, unsigned long *p2) +{ + register unsigned long zero = 0, one = 1, val; + + local_irq_disable(); + mb(); + *p1 = one; + mb(); + *p2 = zero; + mb(); + val = *p1; + mb(); + local_irq_enable(); + return val != zero; +} + +void __init check_writebuffer_bugs(void) +{ + struct page *page; + const char *reason; + unsigned long v = 1; + + pr_info("CPU: Testing write buffer coherency: "); + + page = alloc_page(GFP_KERNEL); + if (page) { + unsigned long *p1, *p2; + pgprot_t prot = __pgprot_modify(PAGE_KERNEL, + L_PTE_MT_MASK, L_PTE_MT_BUFFERABLE); + + p1 = vmap(&page, 1, VM_IOREMAP, prot); + p2 = vmap(&page, 1, VM_IOREMAP, prot); + + if (p1 && p2) { + v = check_writebuffer(p1, p2); + reason = "enabling work-around"; + } else { + reason = "unable to map memory\n"; + } + + vunmap(p1); + vunmap(p2); + put_page(page); + } else { + reason = "unable to grab page\n"; + } + + if (v) { + pr_cont("failed, %s\n", reason); + shared_pte_mask = L_PTE_MT_UNCACHED; + } else { + pr_cont("ok\n"); + } +} diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c new file mode 100644 index 0000000000..fef62e4a9e --- /dev/null +++ b/arch/arm/mm/fault.c @@ -0,0 +1,611 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * linux/arch/arm/mm/fault.c + * + * Copyright (C) 1995 Linus Torvalds + * Modifications for ARM processor (c) 1995-2004 Russell King + */ +#include <linux/extable.h> +#include <linux/signal.h> +#include <linux/mm.h> +#include <linux/hardirq.h> +#include <linux/init.h> +#include <linux/kprobes.h> +#include <linux/uaccess.h> +#include <linux/page-flags.h> +#include <linux/sched/signal.h> +#include <linux/sched/debug.h> +#include <linux/highmem.h> +#include <linux/perf_event.h> +#include <linux/kfence.h> + +#include <asm/system_misc.h> +#include <asm/system_info.h> +#include <asm/tlbflush.h> + +#include "fault.h" + +#ifdef CONFIG_MMU + +/* + * This is useful to dump out the page tables associated with + * 'addr' in mm 'mm'. + */ +void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + + if (!mm) + mm = &init_mm; + + pgd = pgd_offset(mm, addr); + printk("%s[%08lx] *pgd=%08llx", lvl, addr, (long long)pgd_val(*pgd)); + + do { + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + p4d = p4d_offset(pgd, addr); + if (p4d_none(*p4d)) + break; + + if (p4d_bad(*p4d)) { + pr_cont("(bad)"); + break; + } + + pud = pud_offset(p4d, addr); + if (PTRS_PER_PUD != 1) + pr_cont(", *pud=%08llx", (long long)pud_val(*pud)); + + if (pud_none(*pud)) + break; + + if (pud_bad(*pud)) { + pr_cont("(bad)"); + break; + } + + pmd = pmd_offset(pud, addr); + if (PTRS_PER_PMD != 1) + pr_cont(", *pmd=%08llx", (long long)pmd_val(*pmd)); + + if (pmd_none(*pmd)) + break; + + if (pmd_bad(*pmd)) { + pr_cont("(bad)"); + break; + } + + /* We must not map this if we have highmem enabled */ + if (PageHighMem(pfn_to_page(pmd_val(*pmd) >> PAGE_SHIFT))) + break; + + pte = pte_offset_map(pmd, addr); + if (!pte) + break; + + pr_cont(", *pte=%08llx", (long long)pte_val(*pte)); +#ifndef CONFIG_ARM_LPAE + pr_cont(", *ppte=%08llx", + (long long)pte_val(pte[PTE_HWTABLE_PTRS])); +#endif + pte_unmap(pte); + } while(0); + + pr_cont("\n"); +} +#else /* CONFIG_MMU */ +void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr) +{ } +#endif /* CONFIG_MMU */ + +static inline bool is_write_fault(unsigned int fsr) +{ + return (fsr & FSR_WRITE) && !(fsr & FSR_CM); +} + +static inline bool is_translation_fault(unsigned int fsr) +{ + int fs = fsr_fs(fsr); +#ifdef CONFIG_ARM_LPAE + if ((fs & FS_MMU_NOLL_MASK) == FS_TRANS_NOLL) + return true; +#else + if (fs == FS_L1_TRANS || fs == FS_L2_TRANS) + return true; +#endif + return false; +} + +static void die_kernel_fault(const char *msg, struct mm_struct *mm, + unsigned long addr, unsigned int fsr, + struct pt_regs *regs) +{ + bust_spinlocks(1); + pr_alert("8<--- cut here ---\n"); + pr_alert("Unable to handle kernel %s at virtual address %08lx when %s\n", + msg, addr, fsr & FSR_LNX_PF ? "execute" : + fsr & FSR_WRITE ? "write" : "read"); + + show_pte(KERN_ALERT, mm, addr); + die("Oops", regs, fsr); + bust_spinlocks(0); + make_task_dead(SIGKILL); +} + +/* + * Oops. The kernel tried to access some page that wasn't present. + */ +static void +__do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr, + struct pt_regs *regs) +{ + const char *msg; + /* + * Are we prepared to handle this kernel fault? + */ + if (fixup_exception(regs)) + return; + + /* + * No handler, we'll have to terminate things with extreme prejudice. + */ + if (addr < PAGE_SIZE) { + msg = "NULL pointer dereference"; + } else { + if (is_translation_fault(fsr) && + kfence_handle_page_fault(addr, is_write_fault(fsr), regs)) + return; + + msg = "paging request"; + } + + die_kernel_fault(msg, mm, addr, fsr, regs); +} + +/* + * Something tried to access memory that isn't in our memory map.. + * User mode accesses just cause a SIGSEGV + */ +static void +__do_user_fault(unsigned long addr, unsigned int fsr, unsigned int sig, + int code, struct pt_regs *regs) +{ + struct task_struct *tsk = current; + + if (addr > TASK_SIZE) + harden_branch_predictor(); + +#ifdef CONFIG_DEBUG_USER + if (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) || + ((user_debug & UDBG_BUS) && (sig == SIGBUS))) { + pr_err("8<--- cut here ---\n"); + pr_err("%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n", + tsk->comm, sig, addr, fsr); + show_pte(KERN_ERR, tsk->mm, addr); + show_regs(regs); + } +#endif +#ifndef CONFIG_KUSER_HELPERS + if ((sig == SIGSEGV) && ((addr & PAGE_MASK) == 0xffff0000)) + printk_ratelimited(KERN_DEBUG + "%s: CONFIG_KUSER_HELPERS disabled at 0x%08lx\n", + tsk->comm, addr); +#endif + + tsk->thread.address = addr; + tsk->thread.error_code = fsr; + tsk->thread.trap_no = 14; + force_sig_fault(sig, code, (void __user *)addr); +} + +void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs) +{ + struct task_struct *tsk = current; + struct mm_struct *mm = tsk->active_mm; + + /* + * If we are in kernel mode at this point, we + * have no context to handle this fault with. + */ + if (user_mode(regs)) + __do_user_fault(addr, fsr, SIGSEGV, SEGV_MAPERR, regs); + else + __do_kernel_fault(mm, addr, fsr, regs); +} + +#ifdef CONFIG_MMU +#define VM_FAULT_BADMAP ((__force vm_fault_t)0x010000) +#define VM_FAULT_BADACCESS ((__force vm_fault_t)0x020000) + +static inline bool is_permission_fault(unsigned int fsr) +{ + int fs = fsr_fs(fsr); +#ifdef CONFIG_ARM_LPAE + if ((fs & FS_MMU_NOLL_MASK) == FS_PERM_NOLL) + return true; +#else + if (fs == FS_L1_PERM || fs == FS_L2_PERM) + return true; +#endif + return false; +} + +static int __kprobes +do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + int sig, code; + vm_fault_t fault; + unsigned int flags = FAULT_FLAG_DEFAULT; + unsigned long vm_flags = VM_ACCESS_FLAGS; + + if (kprobe_page_fault(regs, fsr)) + return 0; + + + /* Enable interrupts if they were enabled in the parent context. */ + if (interrupts_enabled(regs)) + local_irq_enable(); + + /* + * If we're in an interrupt or have no user + * context, we must not take the fault.. + */ + if (faulthandler_disabled() || !mm) + goto no_context; + + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; + + if (is_write_fault(fsr)) { + flags |= FAULT_FLAG_WRITE; + vm_flags = VM_WRITE; + } + + if (fsr & FSR_LNX_PF) { + vm_flags = VM_EXEC; + + if (is_permission_fault(fsr) && !user_mode(regs)) + die_kernel_fault("execution of memory", + mm, addr, fsr, regs); + } + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); + +retry: + vma = lock_mm_and_find_vma(mm, addr, regs); + if (unlikely(!vma)) { + fault = VM_FAULT_BADMAP; + goto bad_area; + } + + /* + * ok, we have a good vm_area for this memory access, check the + * permissions on the VMA allow for the fault which occurred. + */ + if (!(vma->vm_flags & vm_flags)) + fault = VM_FAULT_BADACCESS; + else + fault = handle_mm_fault(vma, addr & PAGE_MASK, flags, regs); + + /* If we need to retry but a fatal signal is pending, handle the + * signal first. We do not need to release the mmap_lock because + * it would already be released in __lock_page_or_retry in + * mm/filemap.c. */ + if (fault_signal_pending(fault, regs)) { + if (!user_mode(regs)) + goto no_context; + return 0; + } + + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) + return 0; + + if (!(fault & VM_FAULT_ERROR)) { + if (fault & VM_FAULT_RETRY) { + flags |= FAULT_FLAG_TRIED; + goto retry; + } + } + + mmap_read_unlock(mm); + + /* + * Handle the "normal" case first - VM_FAULT_MAJOR + */ + if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS)))) + return 0; + +bad_area: + /* + * If we are in kernel mode at this point, we + * have no context to handle this fault with. + */ + if (!user_mode(regs)) + goto no_context; + + if (fault & VM_FAULT_OOM) { + /* + * We ran out of memory, call the OOM killer, and return to + * userspace (which will retry the fault, or kill us if we + * got oom-killed) + */ + pagefault_out_of_memory(); + return 0; + } + + if (fault & VM_FAULT_SIGBUS) { + /* + * We had some memory, but were unable to + * successfully fix up this page fault. + */ + sig = SIGBUS; + code = BUS_ADRERR; + } else { + /* + * Something tried to access memory that + * isn't in our memory map.. + */ + sig = SIGSEGV; + code = fault == VM_FAULT_BADACCESS ? + SEGV_ACCERR : SEGV_MAPERR; + } + + __do_user_fault(addr, fsr, sig, code, regs); + return 0; + +no_context: + __do_kernel_fault(mm, addr, fsr, regs); + return 0; +} +#else /* CONFIG_MMU */ +static int +do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) +{ + return 0; +} +#endif /* CONFIG_MMU */ + +/* + * First Level Translation Fault Handler + * + * We enter here because the first level page table doesn't contain + * a valid entry for the address. + * + * If the address is in kernel space (>= TASK_SIZE), then we are + * probably faulting in the vmalloc() area. + * + * If the init_task's first level page tables contains the relevant + * entry, we copy the it to this task. If not, we send the process + * a signal, fixup the exception, or oops the kernel. + * + * NOTE! We MUST NOT take any locks for this case. We may be in an + * interrupt or a critical region, and should only copy the information + * from the master page table, nothing more. + */ +#ifdef CONFIG_MMU +static int __kprobes +do_translation_fault(unsigned long addr, unsigned int fsr, + struct pt_regs *regs) +{ + unsigned int index; + pgd_t *pgd, *pgd_k; + p4d_t *p4d, *p4d_k; + pud_t *pud, *pud_k; + pmd_t *pmd, *pmd_k; + + if (addr < TASK_SIZE) + return do_page_fault(addr, fsr, regs); + + if (user_mode(regs)) + goto bad_area; + + index = pgd_index(addr); + + pgd = cpu_get_pgd() + index; + pgd_k = init_mm.pgd + index; + + p4d = p4d_offset(pgd, addr); + p4d_k = p4d_offset(pgd_k, addr); + + if (p4d_none(*p4d_k)) + goto bad_area; + if (!p4d_present(*p4d)) + set_p4d(p4d, *p4d_k); + + pud = pud_offset(p4d, addr); + pud_k = pud_offset(p4d_k, addr); + + if (pud_none(*pud_k)) + goto bad_area; + if (!pud_present(*pud)) + set_pud(pud, *pud_k); + + pmd = pmd_offset(pud, addr); + pmd_k = pmd_offset(pud_k, addr); + +#ifdef CONFIG_ARM_LPAE + /* + * Only one hardware entry per PMD with LPAE. + */ + index = 0; +#else + /* + * On ARM one Linux PGD entry contains two hardware entries (see page + * tables layout in pgtable.h). We normally guarantee that we always + * fill both L1 entries. But create_mapping() doesn't follow the rule. + * It can create inidividual L1 entries, so here we have to call + * pmd_none() check for the entry really corresponded to address, not + * for the first of pair. + */ + index = (addr >> SECTION_SHIFT) & 1; +#endif + if (pmd_none(pmd_k[index])) + goto bad_area; + + copy_pmd(pmd, pmd_k); + return 0; + +bad_area: + do_bad_area(addr, fsr, regs); + return 0; +} +#else /* CONFIG_MMU */ +static int +do_translation_fault(unsigned long addr, unsigned int fsr, + struct pt_regs *regs) +{ + return 0; +} +#endif /* CONFIG_MMU */ + +/* + * Some section permission faults need to be handled gracefully. + * They can happen due to a __{get,put}_user during an oops. + */ +#ifndef CONFIG_ARM_LPAE +static int +do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) +{ + do_bad_area(addr, fsr, regs); + return 0; +} +#endif /* CONFIG_ARM_LPAE */ + +/* + * This abort handler always returns "fault". + */ +static int +do_bad(unsigned long addr, unsigned int fsr, struct pt_regs *regs) +{ + return 1; +} + +struct fsr_info { + int (*fn)(unsigned long addr, unsigned int fsr, struct pt_regs *regs); + int sig; + int code; + const char *name; +}; + +/* FSR definition */ +#ifdef CONFIG_ARM_LPAE +#include "fsr-3level.c" +#else +#include "fsr-2level.c" +#endif + +void __init +hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *), + int sig, int code, const char *name) +{ + if (nr < 0 || nr >= ARRAY_SIZE(fsr_info)) + BUG(); + + fsr_info[nr].fn = fn; + fsr_info[nr].sig = sig; + fsr_info[nr].code = code; + fsr_info[nr].name = name; +} + +/* + * Dispatch a data abort to the relevant handler. + */ +asmlinkage void +do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs) +{ + const struct fsr_info *inf = fsr_info + fsr_fs(fsr); + + if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs)) + return; + + pr_alert("8<--- cut here ---\n"); + pr_alert("Unhandled fault: %s (0x%03x) at 0x%08lx\n", + inf->name, fsr, addr); + show_pte(KERN_ALERT, current->mm, addr); + + arm_notify_die("", regs, inf->sig, inf->code, (void __user *)addr, + fsr, 0); +} + +void __init +hook_ifault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *), + int sig, int code, const char *name) +{ + if (nr < 0 || nr >= ARRAY_SIZE(ifsr_info)) + BUG(); + + ifsr_info[nr].fn = fn; + ifsr_info[nr].sig = sig; + ifsr_info[nr].code = code; + ifsr_info[nr].name = name; +} + +asmlinkage void +do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs) +{ + const struct fsr_info *inf = ifsr_info + fsr_fs(ifsr); + + if (!inf->fn(addr, ifsr | FSR_LNX_PF, regs)) + return; + + pr_alert("Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n", + inf->name, ifsr, addr); + + arm_notify_die("", regs, inf->sig, inf->code, (void __user *)addr, + ifsr, 0); +} + +/* + * Abort handler to be used only during first unmasking of asynchronous aborts + * on the boot CPU. This makes sure that the machine will not die if the + * firmware/bootloader left an imprecise abort pending for us to trip over. + */ +static int __init early_abort_handler(unsigned long addr, unsigned int fsr, + struct pt_regs *regs) +{ + pr_warn("Hit pending asynchronous external abort (FSR=0x%08x) during " + "first unmask, this is most likely caused by a " + "firmware/bootloader bug.\n", fsr); + + return 0; +} + +void __init early_abt_enable(void) +{ + fsr_info[FSR_FS_AEA].fn = early_abort_handler; + local_abt_enable(); + fsr_info[FSR_FS_AEA].fn = do_bad; +} + +#ifndef CONFIG_ARM_LPAE +static int __init exceptions_init(void) +{ + if (cpu_architecture() >= CPU_ARCH_ARMv6) { + hook_fault_code(4, do_translation_fault, SIGSEGV, SEGV_MAPERR, + "I-cache maintenance fault"); + } + + if (cpu_architecture() >= CPU_ARCH_ARMv7) { + /* + * TODO: Access flag faults introduced in ARMv6K. + * Runtime check for 'K' extension is needed + */ + hook_fault_code(3, do_bad, SIGSEGV, SEGV_MAPERR, + "section access flag fault"); + hook_fault_code(6, do_bad, SIGSEGV, SEGV_MAPERR, + "section access flag fault"); + } + + return 0; +} + +arch_initcall(exceptions_init); +#endif diff --git a/arch/arm/mm/fault.h b/arch/arm/mm/fault.h new file mode 100644 index 0000000000..e8f8c19025 --- /dev/null +++ b/arch/arm/mm/fault.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ARCH_ARM_FAULT_H +#define __ARCH_ARM_FAULT_H + +/* + * Fault status register encodings. We steal bit 31 for our own purposes. + */ +#define FSR_LNX_PF (1 << 31) +#define FSR_CM (1 << 13) +#define FSR_WRITE (1 << 11) +#define FSR_FS4 (1 << 10) +#define FSR_FS3_0 (15) +#define FSR_FS5_0 (0x3f) + +#ifdef CONFIG_ARM_LPAE +#define FSR_FS_AEA 17 +#define FS_TRANS_NOLL 0x4 +#define FS_PERM_NOLL 0xC +#define FS_MMU_NOLL_MASK 0x3C + +static inline int fsr_fs(unsigned int fsr) +{ + return fsr & FSR_FS5_0; +} +#else +#define FSR_FS_AEA 22 +#define FS_L1_TRANS 0x5 +#define FS_L2_TRANS 0x7 +#define FS_L1_PERM 0xD +#define FS_L2_PERM 0xF + +static inline int fsr_fs(unsigned int fsr) +{ + return (fsr & FSR_FS3_0) | (fsr & FSR_FS4) >> 6; +} +#endif + +void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs); +void early_abt_enable(void); +asmlinkage void do_DataAbort(unsigned long addr, unsigned int fsr, + struct pt_regs *regs); +asmlinkage void do_PrefetchAbort(unsigned long addr, unsigned int ifsr, + struct pt_regs *regs); + +#endif /* __ARCH_ARM_FAULT_H */ diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c new file mode 100644 index 0000000000..d19d140a10 --- /dev/null +++ b/arch/arm/mm/flush.c @@ -0,0 +1,408 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * linux/arch/arm/mm/flush.c + * + * Copyright (C) 1995-2002 Russell King + */ +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/pagemap.h> +#include <linux/highmem.h> + +#include <asm/cacheflush.h> +#include <asm/cachetype.h> +#include <asm/highmem.h> +#include <asm/smp_plat.h> +#include <asm/tlbflush.h> +#include <linux/hugetlb.h> + +#include "mm.h" + +#ifdef CONFIG_ARM_HEAVY_MB +void (*soc_mb)(void); + +void arm_heavy_mb(void) +{ +#ifdef CONFIG_OUTER_CACHE_SYNC + if (outer_cache.sync) + outer_cache.sync(); +#endif + if (soc_mb) + soc_mb(); +} +EXPORT_SYMBOL(arm_heavy_mb); +#endif + +#ifdef CONFIG_CPU_CACHE_VIPT + +static void flush_pfn_alias(unsigned long pfn, unsigned long vaddr) +{ + unsigned long to = FLUSH_ALIAS_START + (CACHE_COLOUR(vaddr) << PAGE_SHIFT); + const int zero = 0; + + set_top_pte(to, pfn_pte(pfn, PAGE_KERNEL)); + + asm( "mcrr p15, 0, %1, %0, c14\n" + " mcr p15, 0, %2, c7, c10, 4" + : + : "r" (to), "r" (to + PAGE_SIZE - 1), "r" (zero) + : "cc"); +} + +static void flush_icache_alias(unsigned long pfn, unsigned long vaddr, unsigned long len) +{ + unsigned long va = FLUSH_ALIAS_START + (CACHE_COLOUR(vaddr) << PAGE_SHIFT); + unsigned long offset = vaddr & (PAGE_SIZE - 1); + unsigned long to; + + set_top_pte(va, pfn_pte(pfn, PAGE_KERNEL)); + to = va + offset; + flush_icache_range(to, to + len); +} + +void flush_cache_mm(struct mm_struct *mm) +{ + if (cache_is_vivt()) { + vivt_flush_cache_mm(mm); + return; + } + + if (cache_is_vipt_aliasing()) { + asm( "mcr p15, 0, %0, c7, c14, 0\n" + " mcr p15, 0, %0, c7, c10, 4" + : + : "r" (0) + : "cc"); + } +} + +void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) +{ + if (cache_is_vivt()) { + vivt_flush_cache_range(vma, start, end); + return; + } + + if (cache_is_vipt_aliasing()) { + asm( "mcr p15, 0, %0, c7, c14, 0\n" + " mcr p15, 0, %0, c7, c10, 4" + : + : "r" (0) + : "cc"); + } + + if (vma->vm_flags & VM_EXEC) + __flush_icache_all(); +} + +void flush_cache_pages(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn, unsigned int nr) +{ + if (cache_is_vivt()) { + vivt_flush_cache_pages(vma, user_addr, pfn, nr); + return; + } + + if (cache_is_vipt_aliasing()) { + flush_pfn_alias(pfn, user_addr); + __flush_icache_all(); + } + + if (vma->vm_flags & VM_EXEC && icache_is_vivt_asid_tagged()) + __flush_icache_all(); +} + +#else +#define flush_pfn_alias(pfn,vaddr) do { } while (0) +#define flush_icache_alias(pfn,vaddr,len) do { } while (0) +#endif + +#define FLAG_PA_IS_EXEC 1 +#define FLAG_PA_CORE_IN_MM 2 + +static void flush_ptrace_access_other(void *args) +{ + __flush_icache_all(); +} + +static inline +void __flush_ptrace_access(struct page *page, unsigned long uaddr, void *kaddr, + unsigned long len, unsigned int flags) +{ + if (cache_is_vivt()) { + if (flags & FLAG_PA_CORE_IN_MM) { + unsigned long addr = (unsigned long)kaddr; + __cpuc_coherent_kern_range(addr, addr + len); + } + return; + } + + if (cache_is_vipt_aliasing()) { + flush_pfn_alias(page_to_pfn(page), uaddr); + __flush_icache_all(); + return; + } + + /* VIPT non-aliasing D-cache */ + if (flags & FLAG_PA_IS_EXEC) { + unsigned long addr = (unsigned long)kaddr; + if (icache_is_vipt_aliasing()) + flush_icache_alias(page_to_pfn(page), uaddr, len); + else + __cpuc_coherent_kern_range(addr, addr + len); + if (cache_ops_need_broadcast()) + smp_call_function(flush_ptrace_access_other, + NULL, 1); + } +} + +static +void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, + unsigned long uaddr, void *kaddr, unsigned long len) +{ + unsigned int flags = 0; + if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) + flags |= FLAG_PA_CORE_IN_MM; + if (vma->vm_flags & VM_EXEC) + flags |= FLAG_PA_IS_EXEC; + __flush_ptrace_access(page, uaddr, kaddr, len, flags); +} + +void flush_uprobe_xol_access(struct page *page, unsigned long uaddr, + void *kaddr, unsigned long len) +{ + unsigned int flags = FLAG_PA_CORE_IN_MM|FLAG_PA_IS_EXEC; + + __flush_ptrace_access(page, uaddr, kaddr, len, flags); +} + +/* + * Copy user data from/to a page which is mapped into a different + * processes address space. Really, we want to allow our "user + * space" model to handle this. + * + * Note that this code needs to run on the current CPU. + */ +void copy_to_user_page(struct vm_area_struct *vma, struct page *page, + unsigned long uaddr, void *dst, const void *src, + unsigned long len) +{ +#ifdef CONFIG_SMP + preempt_disable(); +#endif + memcpy(dst, src, len); + flush_ptrace_access(vma, page, uaddr, dst, len); +#ifdef CONFIG_SMP + preempt_enable(); +#endif +} + +void __flush_dcache_folio(struct address_space *mapping, struct folio *folio) +{ + /* + * Writeback any data associated with the kernel mapping of this + * page. This ensures that data in the physical page is mutually + * coherent with the kernels mapping. + */ + if (!folio_test_highmem(folio)) { + __cpuc_flush_dcache_area(folio_address(folio), + folio_size(folio)); + } else { + unsigned long i; + if (cache_is_vipt_nonaliasing()) { + for (i = 0; i < folio_nr_pages(folio); i++) { + void *addr = kmap_local_folio(folio, + i * PAGE_SIZE); + __cpuc_flush_dcache_area(addr, PAGE_SIZE); + kunmap_local(addr); + } + } else { + for (i = 0; i < folio_nr_pages(folio); i++) { + void *addr = kmap_high_get(folio_page(folio, i)); + if (addr) { + __cpuc_flush_dcache_area(addr, PAGE_SIZE); + kunmap_high(folio_page(folio, i)); + } + } + } + } + + /* + * If this is a page cache page, and we have an aliasing VIPT cache, + * we only need to do one flush - which would be at the relevant + * userspace colour, which is congruent with page->index. + */ + if (mapping && cache_is_vipt_aliasing()) + flush_pfn_alias(folio_pfn(folio), folio_pos(folio)); +} + +static void __flush_dcache_aliases(struct address_space *mapping, struct folio *folio) +{ + struct mm_struct *mm = current->active_mm; + struct vm_area_struct *vma; + pgoff_t pgoff, pgoff_end; + + /* + * There are possible user space mappings of this page: + * - VIVT cache: we need to also write back and invalidate all user + * data in the current VM view associated with this page. + * - aliasing VIPT: we only need to find one mapping of this page. + */ + pgoff = folio->index; + pgoff_end = pgoff + folio_nr_pages(folio) - 1; + + flush_dcache_mmap_lock(mapping); + vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff_end) { + unsigned long start, offset, pfn; + unsigned int nr; + + /* + * If this VMA is not in our MM, we can ignore it. + */ + if (vma->vm_mm != mm) + continue; + if (!(vma->vm_flags & VM_MAYSHARE)) + continue; + + start = vma->vm_start; + pfn = folio_pfn(folio); + nr = folio_nr_pages(folio); + offset = pgoff - vma->vm_pgoff; + if (offset > -nr) { + pfn -= offset; + nr += offset; + } else { + start += offset * PAGE_SIZE; + } + if (start + nr * PAGE_SIZE > vma->vm_end) + nr = (vma->vm_end - start) / PAGE_SIZE; + + flush_cache_pages(vma, start, pfn, nr); + } + flush_dcache_mmap_unlock(mapping); +} + +#if __LINUX_ARM_ARCH__ >= 6 +void __sync_icache_dcache(pte_t pteval) +{ + unsigned long pfn; + struct folio *folio; + struct address_space *mapping; + + if (cache_is_vipt_nonaliasing() && !pte_exec(pteval)) + /* only flush non-aliasing VIPT caches for exec mappings */ + return; + pfn = pte_pfn(pteval); + if (!pfn_valid(pfn)) + return; + + folio = page_folio(pfn_to_page(pfn)); + if (cache_is_vipt_aliasing()) + mapping = folio_flush_mapping(folio); + else + mapping = NULL; + + if (!test_and_set_bit(PG_dcache_clean, &folio->flags)) + __flush_dcache_folio(mapping, folio); + + if (pte_exec(pteval)) + __flush_icache_all(); +} +#endif + +/* + * Ensure cache coherency between kernel mapping and userspace mapping + * of this page. + * + * We have three cases to consider: + * - VIPT non-aliasing cache: fully coherent so nothing required. + * - VIVT: fully aliasing, so we need to handle every alias in our + * current VM view. + * - VIPT aliasing: need to handle one alias in our current VM view. + * + * If we need to handle aliasing: + * If the page only exists in the page cache and there are no user + * space mappings, we can be lazy and remember that we may have dirty + * kernel cache lines for later. Otherwise, we assume we have + * aliasing mappings. + * + * Note that we disable the lazy flush for SMP configurations where + * the cache maintenance operations are not automatically broadcasted. + */ +void flush_dcache_folio(struct folio *folio) +{ + struct address_space *mapping; + + /* + * The zero page is never written to, so never has any dirty + * cache lines, and therefore never needs to be flushed. + */ + if (is_zero_pfn(folio_pfn(folio))) + return; + + if (!cache_ops_need_broadcast() && cache_is_vipt_nonaliasing()) { + if (test_bit(PG_dcache_clean, &folio->flags)) + clear_bit(PG_dcache_clean, &folio->flags); + return; + } + + mapping = folio_flush_mapping(folio); + + if (!cache_ops_need_broadcast() && + mapping && !folio_mapped(folio)) + clear_bit(PG_dcache_clean, &folio->flags); + else { + __flush_dcache_folio(mapping, folio); + if (mapping && cache_is_vivt()) + __flush_dcache_aliases(mapping, folio); + else if (mapping) + __flush_icache_all(); + set_bit(PG_dcache_clean, &folio->flags); + } +} +EXPORT_SYMBOL(flush_dcache_folio); + +void flush_dcache_page(struct page *page) +{ + flush_dcache_folio(page_folio(page)); +} +EXPORT_SYMBOL(flush_dcache_page); +/* + * Flush an anonymous page so that users of get_user_pages() + * can safely access the data. The expected sequence is: + * + * get_user_pages() + * -> flush_anon_page + * memcpy() to/from page + * if written to page, flush_dcache_page() + */ +void __flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr); +void __flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr) +{ + unsigned long pfn; + + /* VIPT non-aliasing caches need do nothing */ + if (cache_is_vipt_nonaliasing()) + return; + + /* + * Write back and invalidate userspace mapping. + */ + pfn = page_to_pfn(page); + if (cache_is_vivt()) { + flush_cache_page(vma, vmaddr, pfn); + } else { + /* + * For aliasing VIPT, we can flush an alias of the + * userspace address only. + */ + flush_pfn_alias(pfn, vmaddr); + __flush_icache_all(); + } + + /* + * Invalidate kernel mapping. No data should be contained + * in this mapping of the page. FIXME: this is overkill + * since we actually ask for a write-back and invalidate. + */ + __cpuc_flush_dcache_area(page_address(page), PAGE_SIZE); +} diff --git a/arch/arm/mm/fsr-2level.c b/arch/arm/mm/fsr-2level.c new file mode 100644 index 0000000000..f2be951972 --- /dev/null +++ b/arch/arm/mm/fsr-2level.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 +static struct fsr_info fsr_info[] = { + /* + * The following are the standard ARMv3 and ARMv4 aborts. ARMv5 + * defines these to be "precise" aborts. + */ + { do_bad, SIGSEGV, 0, "vector exception" }, + { do_bad, SIGBUS, BUS_ADRALN, "alignment exception" }, + { do_bad, SIGKILL, 0, "terminal exception" }, + { do_bad, SIGBUS, BUS_ADRALN, "alignment exception" }, + { do_bad, SIGBUS, 0, "external abort on linefetch" }, + { do_translation_fault, SIGSEGV, SEGV_MAPERR, "section translation fault" }, + { do_bad, SIGBUS, 0, "external abort on linefetch" }, + { do_page_fault, SIGSEGV, SEGV_MAPERR, "page translation fault" }, + { do_bad, SIGBUS, 0, "external abort on non-linefetch" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "section domain fault" }, + { do_bad, SIGBUS, 0, "external abort on non-linefetch" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "page domain fault" }, + { do_bad, SIGBUS, 0, "external abort on translation" }, + { do_sect_fault, SIGSEGV, SEGV_ACCERR, "section permission fault" }, + { do_bad, SIGBUS, 0, "external abort on translation" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "page permission fault" }, + /* + * The following are "imprecise" aborts, which are signalled by bit + * 10 of the FSR, and may not be recoverable. These are only + * supported if the CPU abort handler supports bit 10. + */ + { do_bad, SIGBUS, 0, "unknown 16" }, + { do_bad, SIGBUS, 0, "unknown 17" }, + { do_bad, SIGBUS, 0, "unknown 18" }, + { do_bad, SIGBUS, 0, "unknown 19" }, + { do_bad, SIGBUS, 0, "lock abort" }, /* xscale */ + { do_bad, SIGBUS, 0, "unknown 21" }, + { do_bad, SIGBUS, BUS_OBJERR, "imprecise external abort" }, /* xscale */ + { do_bad, SIGBUS, 0, "unknown 23" }, + { do_bad, SIGBUS, 0, "dcache parity error" }, /* xscale */ + { do_bad, SIGBUS, 0, "unknown 25" }, + { do_bad, SIGBUS, 0, "unknown 26" }, + { do_bad, SIGBUS, 0, "unknown 27" }, + { do_bad, SIGBUS, 0, "unknown 28" }, + { do_bad, SIGBUS, 0, "unknown 29" }, + { do_bad, SIGBUS, 0, "unknown 30" }, + { do_bad, SIGBUS, 0, "unknown 31" }, +}; + +static struct fsr_info ifsr_info[] = { + { do_bad, SIGBUS, 0, "unknown 0" }, + { do_bad, SIGBUS, 0, "unknown 1" }, + { do_bad, SIGBUS, 0, "debug event" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "section access flag fault" }, + { do_bad, SIGBUS, 0, "unknown 4" }, + { do_translation_fault, SIGSEGV, SEGV_MAPERR, "section translation fault" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "page access flag fault" }, + { do_page_fault, SIGSEGV, SEGV_MAPERR, "page translation fault" }, + { do_bad, SIGBUS, 0, "external abort on non-linefetch" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "section domain fault" }, + { do_bad, SIGBUS, 0, "unknown 10" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "page domain fault" }, + { do_bad, SIGBUS, 0, "external abort on translation" }, + { do_sect_fault, SIGSEGV, SEGV_ACCERR, "section permission fault" }, + { do_bad, SIGBUS, 0, "external abort on translation" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "page permission fault" }, + { do_bad, SIGBUS, 0, "unknown 16" }, + { do_bad, SIGBUS, 0, "unknown 17" }, + { do_bad, SIGBUS, 0, "unknown 18" }, + { do_bad, SIGBUS, 0, "unknown 19" }, + { do_bad, SIGBUS, 0, "unknown 20" }, + { do_bad, SIGBUS, 0, "unknown 21" }, + { do_bad, SIGBUS, 0, "unknown 22" }, + { do_bad, SIGBUS, 0, "unknown 23" }, + { do_bad, SIGBUS, 0, "unknown 24" }, + { do_bad, SIGBUS, 0, "unknown 25" }, + { do_bad, SIGBUS, 0, "unknown 26" }, + { do_bad, SIGBUS, 0, "unknown 27" }, + { do_bad, SIGBUS, 0, "unknown 28" }, + { do_bad, SIGBUS, 0, "unknown 29" }, + { do_bad, SIGBUS, 0, "unknown 30" }, + { do_bad, SIGBUS, 0, "unknown 31" }, +}; diff --git a/arch/arm/mm/fsr-3level.c b/arch/arm/mm/fsr-3level.c new file mode 100644 index 0000000000..d0ae296365 --- /dev/null +++ b/arch/arm/mm/fsr-3level.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0 +static struct fsr_info fsr_info[] = { + { do_bad, SIGBUS, 0, "unknown 0" }, + { do_bad, SIGBUS, 0, "unknown 1" }, + { do_bad, SIGBUS, 0, "unknown 2" }, + { do_bad, SIGBUS, 0, "unknown 3" }, + { do_bad, SIGBUS, 0, "reserved translation fault" }, + { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 1 translation fault" }, + { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" }, + { do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, + { do_bad, SIGBUS, 0, "reserved access flag fault" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 access flag fault" }, + { do_bad, SIGBUS, 0, "reserved permission fault" }, + { do_bad, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, + { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, + { do_bad, SIGBUS, 0, "synchronous external abort" }, + { do_bad, SIGBUS, 0, "asynchronous external abort" }, + { do_bad, SIGBUS, 0, "unknown 18" }, + { do_bad, SIGBUS, 0, "unknown 19" }, + { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous abort (translation table walk)" }, + { do_bad, SIGBUS, 0, "synchronous parity error" }, + { do_bad, SIGBUS, 0, "asynchronous parity error" }, + { do_bad, SIGBUS, 0, "unknown 26" }, + { do_bad, SIGBUS, 0, "unknown 27" }, + { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk" }, + { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk" }, + { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk" }, + { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk" }, + { do_bad, SIGBUS, 0, "unknown 32" }, + { do_bad, SIGBUS, BUS_ADRALN, "alignment fault" }, + { do_bad, SIGBUS, 0, "debug event" }, + { do_bad, SIGBUS, 0, "unknown 35" }, + { do_bad, SIGBUS, 0, "unknown 36" }, + { do_bad, SIGBUS, 0, "unknown 37" }, + { do_bad, SIGBUS, 0, "unknown 38" }, + { do_bad, SIGBUS, 0, "unknown 39" }, + { do_bad, SIGBUS, 0, "unknown 40" }, + { do_bad, SIGBUS, 0, "unknown 41" }, + { do_bad, SIGBUS, 0, "unknown 42" }, + { do_bad, SIGBUS, 0, "unknown 43" }, + { do_bad, SIGBUS, 0, "unknown 44" }, + { do_bad, SIGBUS, 0, "unknown 45" }, + { do_bad, SIGBUS, 0, "unknown 46" }, + { do_bad, SIGBUS, 0, "unknown 47" }, + { do_bad, SIGBUS, 0, "unknown 48" }, + { do_bad, SIGBUS, 0, "unknown 49" }, + { do_bad, SIGBUS, 0, "unknown 50" }, + { do_bad, SIGBUS, 0, "unknown 51" }, + { do_bad, SIGBUS, 0, "implementation fault (lockdown abort)" }, + { do_bad, SIGBUS, 0, "unknown 53" }, + { do_bad, SIGBUS, 0, "unknown 54" }, + { do_bad, SIGBUS, 0, "unknown 55" }, + { do_bad, SIGBUS, 0, "unknown 56" }, + { do_bad, SIGBUS, 0, "unknown 57" }, + { do_bad, SIGBUS, 0, "implementation fault (coprocessor abort)" }, + { do_bad, SIGBUS, 0, "unknown 59" }, + { do_bad, SIGBUS, 0, "unknown 60" }, + { do_bad, SIGBUS, 0, "unknown 61" }, + { do_bad, SIGBUS, 0, "unknown 62" }, + { do_bad, SIGBUS, 0, "unknown 63" }, +}; + +#define ifsr_info fsr_info diff --git a/arch/arm/mm/hugetlbpage.c b/arch/arm/mm/hugetlbpage.c new file mode 100644 index 0000000000..dd7a0277c5 --- /dev/null +++ b/arch/arm/mm/hugetlbpage.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * arch/arm/mm/hugetlbpage.c + * + * Copyright (C) 2012 ARM Ltd. + * + * Based on arch/x86/include/asm/hugetlb.h and Bill Carson's patches + */ + +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/pagemap.h> +#include <linux/err.h> +#include <linux/sysctl.h> +#include <asm/mman.h> +#include <asm/tlb.h> +#include <asm/tlbflush.h> + +/* + * On ARM, huge pages are backed by pmd's rather than pte's, so we do a lot + * of type casting from pmd_t * to pte_t *. + */ + +int pud_huge(pud_t pud) +{ + return 0; +} + +int pmd_huge(pmd_t pmd) +{ + return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT); +} diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c new file mode 100644 index 0000000000..448e57c6f6 --- /dev/null +++ b/arch/arm/mm/idmap.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/mm_types.h> +#include <linux/pgtable.h> + +#include <asm/cputype.h> +#include <asm/idmap.h> +#include <asm/hwcap.h> +#include <asm/pgalloc.h> +#include <asm/sections.h> +#include <asm/system_info.h> + +/* + * Note: accesses outside of the kernel image and the identity map area + * are not supported on any CPU using the idmap tables as its current + * page tables. + */ +pgd_t *idmap_pgd __ro_after_init; +long long arch_phys_to_idmap_offset __ro_after_init; + +#ifdef CONFIG_ARM_LPAE +static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end, + unsigned long prot) +{ + pmd_t *pmd; + unsigned long next; + + if (pud_none_or_clear_bad(pud) || (pud_val(*pud) & L_PGD_SWAPPER)) { + pmd = pmd_alloc_one(&init_mm, addr); + if (!pmd) { + pr_warn("Failed to allocate identity pmd.\n"); + return; + } + /* + * Copy the original PMD to ensure that the PMD entries for + * the kernel image are preserved. + */ + if (!pud_none(*pud)) + memcpy(pmd, pmd_offset(pud, 0), + PTRS_PER_PMD * sizeof(pmd_t)); + pud_populate(&init_mm, pud, pmd); + pmd += pmd_index(addr); + } else + pmd = pmd_offset(pud, addr); + + do { + next = pmd_addr_end(addr, end); + *pmd = __pmd((addr & PMD_MASK) | prot); + flush_pmd_entry(pmd); + } while (pmd++, addr = next, addr != end); +} +#else /* !CONFIG_ARM_LPAE */ +static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end, + unsigned long prot) +{ + pmd_t *pmd = pmd_offset(pud, addr); + + addr = (addr & PMD_MASK) | prot; + pmd[0] = __pmd(addr); + addr += SECTION_SIZE; + pmd[1] = __pmd(addr); + flush_pmd_entry(pmd); +} +#endif /* CONFIG_ARM_LPAE */ + +static void idmap_add_pud(pgd_t *pgd, unsigned long addr, unsigned long end, + unsigned long prot) +{ + p4d_t *p4d = p4d_offset(pgd, addr); + pud_t *pud = pud_offset(p4d, addr); + unsigned long next; + + do { + next = pud_addr_end(addr, end); + idmap_add_pmd(pud, addr, next, prot); + } while (pud++, addr = next, addr != end); +} + +static void identity_mapping_add(pgd_t *pgd, const char *text_start, + const char *text_end, unsigned long prot) +{ + unsigned long addr, end; + unsigned long next; + + addr = virt_to_idmap(text_start); + end = virt_to_idmap(text_end); + pr_info("Setting up static identity map for 0x%lx - 0x%lx\n", addr, end); + + prot |= PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AF; + + if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale_family()) + prot |= PMD_BIT4; + + pgd += pgd_index(addr); + do { + next = pgd_addr_end(addr, end); + idmap_add_pud(pgd, addr, next, prot); + } while (pgd++, addr = next, addr != end); +} + +extern char __idmap_text_start[], __idmap_text_end[]; + +static int __init init_static_idmap(void) +{ + idmap_pgd = pgd_alloc(&init_mm); + if (!idmap_pgd) + return -ENOMEM; + + identity_mapping_add(idmap_pgd, __idmap_text_start, + __idmap_text_end, 0); + + /* Flush L1 for the hardware to see this page table content */ + if (!(elf_hwcap & HWCAP_LPAE)) + flush_cache_louis(); + + return 0; +} +early_initcall(init_static_idmap); + +/* + * In order to soft-boot, we need to switch to a 1:1 mapping for the + * cpu_reset functions. This will then ensure that we have predictable + * results when turning off the mmu. + */ +void setup_mm_for_reboot(void) +{ + /* Switch to the identity mapping. */ + cpu_switch_mm(idmap_pgd, &init_mm); + local_flush_bp_all(); + +#ifdef CONFIG_CPU_HAS_ASID + /* + * We don't have a clean ASID for the identity mapping, which + * may clash with virtual addresses of the previous page tables + * and therefore potentially in the TLB. + */ + local_flush_tlb_all(); +#endif +} diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c new file mode 100644 index 0000000000..a42e4cd11d --- /dev/null +++ b/arch/arm/mm/init.c @@ -0,0 +1,488 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * linux/arch/arm/mm/init.c + * + * Copyright (C) 1995-2005 Russell King + */ +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/swap.h> +#include <linux/init.h> +#include <linux/mman.h> +#include <linux/sched/signal.h> +#include <linux/sched/task.h> +#include <linux/export.h> +#include <linux/nodemask.h> +#include <linux/initrd.h> +#include <linux/of_fdt.h> +#include <linux/highmem.h> +#include <linux/gfp.h> +#include <linux/memblock.h> +#include <linux/dma-map-ops.h> +#include <linux/sizes.h> +#include <linux/stop_machine.h> +#include <linux/swiotlb.h> + +#include <asm/cp15.h> +#include <asm/mach-types.h> +#include <asm/memblock.h> +#include <asm/page.h> +#include <asm/prom.h> +#include <asm/sections.h> +#include <asm/setup.h> +#include <asm/set_memory.h> +#include <asm/system_info.h> +#include <asm/tlb.h> +#include <asm/fixmap.h> +#include <asm/ptdump.h> + +#include <asm/mach/arch.h> +#include <asm/mach/map.h> + +#include "mm.h" + +#ifdef CONFIG_CPU_CP15_MMU +unsigned long __init __clear_cr(unsigned long mask) +{ + cr_alignment = cr_alignment & ~mask; + return cr_alignment; +} +#endif + +#ifdef CONFIG_BLK_DEV_INITRD +static int __init parse_tag_initrd(const struct tag *tag) +{ + pr_warn("ATAG_INITRD is deprecated; " + "please update your bootloader.\n"); + phys_initrd_start = __virt_to_phys(tag->u.initrd.start); + phys_initrd_size = tag->u.initrd.size; + return 0; +} + +__tagtable(ATAG_INITRD, parse_tag_initrd); + +static int __init parse_tag_initrd2(const struct tag *tag) +{ + phys_initrd_start = tag->u.initrd.start; + phys_initrd_size = tag->u.initrd.size; + return 0; +} + +__tagtable(ATAG_INITRD2, parse_tag_initrd2); +#endif + +static void __init find_limits(unsigned long *min, unsigned long *max_low, + unsigned long *max_high) +{ + *max_low = PFN_DOWN(memblock_get_current_limit()); + *min = PFN_UP(memblock_start_of_DRAM()); + *max_high = PFN_DOWN(memblock_end_of_DRAM()); +} + +#ifdef CONFIG_ZONE_DMA + +phys_addr_t arm_dma_zone_size __read_mostly; +EXPORT_SYMBOL(arm_dma_zone_size); + +/* + * The DMA mask corresponding to the maximum bus address allocatable + * using GFP_DMA. The default here places no restriction on DMA + * allocations. This must be the smallest DMA mask in the system, + * so a successful GFP_DMA allocation will always satisfy this. + */ +phys_addr_t arm_dma_limit; +unsigned long arm_dma_pfn_limit; +#endif + +void __init setup_dma_zone(const struct machine_desc *mdesc) +{ +#ifdef CONFIG_ZONE_DMA + if (mdesc->dma_zone_size) { + arm_dma_zone_size = mdesc->dma_zone_size; + arm_dma_limit = PHYS_OFFSET + arm_dma_zone_size - 1; + } else + arm_dma_limit = 0xffffffff; + arm_dma_pfn_limit = arm_dma_limit >> PAGE_SHIFT; +#endif +} + +static void __init zone_sizes_init(unsigned long min, unsigned long max_low, + unsigned long max_high) +{ + unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0 }; + +#ifdef CONFIG_ZONE_DMA + max_zone_pfn[ZONE_DMA] = min(arm_dma_pfn_limit, max_low); +#endif + max_zone_pfn[ZONE_NORMAL] = max_low; +#ifdef CONFIG_HIGHMEM + max_zone_pfn[ZONE_HIGHMEM] = max_high; +#endif + free_area_init(max_zone_pfn); +} + +#ifdef CONFIG_HAVE_ARCH_PFN_VALID +int pfn_valid(unsigned long pfn) +{ + phys_addr_t addr = __pfn_to_phys(pfn); + unsigned long pageblock_size = PAGE_SIZE * pageblock_nr_pages; + + if (__phys_to_pfn(addr) != pfn) + return 0; + + /* + * If address less than pageblock_size bytes away from a present + * memory chunk there still will be a memory map entry for it + * because we round freed memory map to the pageblock boundaries. + */ + if (memblock_overlaps_region(&memblock.memory, + ALIGN_DOWN(addr, pageblock_size), + pageblock_size)) + return 1; + + return 0; +} +EXPORT_SYMBOL(pfn_valid); +#endif + +static bool arm_memblock_steal_permitted = true; + +phys_addr_t __init arm_memblock_steal(phys_addr_t size, phys_addr_t align) +{ + phys_addr_t phys; + + BUG_ON(!arm_memblock_steal_permitted); + + phys = memblock_phys_alloc(size, align); + if (!phys) + panic("Failed to steal %pa bytes at %pS\n", + &size, (void *)_RET_IP_); + + memblock_phys_free(phys, size); + memblock_remove(phys, size); + + return phys; +} + +#ifdef CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND +void check_cpu_icache_size(int cpuid) +{ + u32 size, ctr; + + asm("mrc p15, 0, %0, c0, c0, 1" : "=r" (ctr)); + + size = 1 << ((ctr & 0xf) + 2); + if (cpuid != 0 && icache_size != size) + pr_info("CPU%u: detected I-Cache line size mismatch, workaround enabled\n", + cpuid); + if (icache_size > size) + icache_size = size; +} +#endif + +void __init arm_memblock_init(const struct machine_desc *mdesc) +{ + /* Register the kernel text, kernel data and initrd with memblock. */ + memblock_reserve(__pa(KERNEL_START), KERNEL_END - KERNEL_START); + + reserve_initrd_mem(); + + arm_mm_memblock_reserve(); + + /* reserve any platform specific memblock areas */ + if (mdesc->reserve) + mdesc->reserve(); + + early_init_fdt_scan_reserved_mem(); + + /* reserve memory for DMA contiguous allocations */ + dma_contiguous_reserve(arm_dma_limit); + + arm_memblock_steal_permitted = false; + memblock_dump_all(); +} + +void __init bootmem_init(void) +{ + memblock_allow_resize(); + + find_limits(&min_low_pfn, &max_low_pfn, &max_pfn); + + early_memtest((phys_addr_t)min_low_pfn << PAGE_SHIFT, + (phys_addr_t)max_low_pfn << PAGE_SHIFT); + + /* + * sparse_init() tries to allocate memory from memblock, so must be + * done after the fixed reservations + */ + sparse_init(); + + /* + * Now free the memory - free_area_init needs + * the sparse mem_map arrays initialized by sparse_init() + * for memmap_init_zone(), otherwise all PFNs are invalid. + */ + zone_sizes_init(min_low_pfn, max_low_pfn, max_pfn); +} + +/* + * Poison init memory with an undefined instruction (ARM) or a branch to an + * undefined instruction (Thumb). + */ +static inline void poison_init_mem(void *s, size_t count) +{ + u32 *p = (u32 *)s; + for (; count != 0; count -= 4) + *p++ = 0xe7fddef0; +} + +static void __init free_highpages(void) +{ +#ifdef CONFIG_HIGHMEM + unsigned long max_low = max_low_pfn; + phys_addr_t range_start, range_end; + u64 i; + + /* set highmem page free */ + for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, + &range_start, &range_end, NULL) { + unsigned long start = PFN_UP(range_start); + unsigned long end = PFN_DOWN(range_end); + + /* Ignore complete lowmem entries */ + if (end <= max_low) + continue; + + /* Truncate partial highmem entries */ + if (start < max_low) + start = max_low; + + for (; start < end; start++) + free_highmem_page(pfn_to_page(start)); + } +#endif +} + +/* + * mem_init() marks the free areas in the mem_map and tells us how much + * memory is free. This is done after various parts of the system have + * claimed their memory after the kernel image. + */ +void __init mem_init(void) +{ +#ifdef CONFIG_ARM_LPAE + swiotlb_init(max_pfn > arm_dma_pfn_limit, SWIOTLB_VERBOSE); +#endif + + set_max_mapnr(pfn_to_page(max_pfn) - mem_map); + + /* this will put all unused low memory onto the freelists */ + memblock_free_all(); + +#ifdef CONFIG_SA1111 + /* now that our DMA memory is actually so designated, we can free it */ + free_reserved_area(__va(PHYS_OFFSET), swapper_pg_dir, -1, NULL); +#endif + + free_highpages(); + + /* + * Check boundaries twice: Some fundamental inconsistencies can + * be detected at build time already. + */ +#ifdef CONFIG_MMU + BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR); + BUG_ON(TASK_SIZE > MODULES_VADDR); +#endif + +#ifdef CONFIG_HIGHMEM + BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP * PAGE_SIZE > PAGE_OFFSET); + BUG_ON(PKMAP_BASE + LAST_PKMAP * PAGE_SIZE > PAGE_OFFSET); +#endif +} + +#ifdef CONFIG_STRICT_KERNEL_RWX +struct section_perm { + const char *name; + unsigned long start; + unsigned long end; + pmdval_t mask; + pmdval_t prot; + pmdval_t clear; +}; + +/* First section-aligned location at or after __start_rodata. */ +extern char __start_rodata_section_aligned[]; + +static struct section_perm nx_perms[] = { + /* Make pages tables, etc before _stext RW (set NX). */ + { + .name = "pre-text NX", + .start = PAGE_OFFSET, + .end = (unsigned long)_stext, + .mask = ~PMD_SECT_XN, + .prot = PMD_SECT_XN, + }, + /* Make init RW (set NX). */ + { + .name = "init NX", + .start = (unsigned long)__init_begin, + .end = (unsigned long)_sdata, + .mask = ~PMD_SECT_XN, + .prot = PMD_SECT_XN, + }, + /* Make rodata NX (set RO in ro_perms below). */ + { + .name = "rodata NX", + .start = (unsigned long)__start_rodata_section_aligned, + .end = (unsigned long)__init_begin, + .mask = ~PMD_SECT_XN, + .prot = PMD_SECT_XN, + }, +}; + +static struct section_perm ro_perms[] = { + /* Make kernel code and rodata RX (set RO). */ + { + .name = "text/rodata RO", + .start = (unsigned long)_stext, + .end = (unsigned long)__init_begin, +#ifdef CONFIG_ARM_LPAE + .mask = ~(L_PMD_SECT_RDONLY | PMD_SECT_AP2), + .prot = L_PMD_SECT_RDONLY | PMD_SECT_AP2, +#else + .mask = ~(PMD_SECT_APX | PMD_SECT_AP_WRITE), + .prot = PMD_SECT_APX | PMD_SECT_AP_WRITE, + .clear = PMD_SECT_AP_WRITE, +#endif + }, +}; + +/* + * Updates section permissions only for the current mm (sections are + * copied into each mm). During startup, this is the init_mm. Is only + * safe to be called with preemption disabled, as under stop_machine(). + */ +static inline void section_update(unsigned long addr, pmdval_t mask, + pmdval_t prot, struct mm_struct *mm) +{ + pmd_t *pmd; + + pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, addr), addr), addr), addr); + +#ifdef CONFIG_ARM_LPAE + pmd[0] = __pmd((pmd_val(pmd[0]) & mask) | prot); +#else + if (addr & SECTION_SIZE) + pmd[1] = __pmd((pmd_val(pmd[1]) & mask) | prot); + else + pmd[0] = __pmd((pmd_val(pmd[0]) & mask) | prot); +#endif + flush_pmd_entry(pmd); + local_flush_tlb_kernel_range(addr, addr + SECTION_SIZE); +} + +/* Make sure extended page tables are in use. */ +static inline bool arch_has_strict_perms(void) +{ + if (cpu_architecture() < CPU_ARCH_ARMv6) + return false; + + return !!(get_cr() & CR_XP); +} + +static void set_section_perms(struct section_perm *perms, int n, bool set, + struct mm_struct *mm) +{ + size_t i; + unsigned long addr; + + if (!arch_has_strict_perms()) + return; + + for (i = 0; i < n; i++) { + if (!IS_ALIGNED(perms[i].start, SECTION_SIZE) || + !IS_ALIGNED(perms[i].end, SECTION_SIZE)) { + pr_err("BUG: %s section %lx-%lx not aligned to %lx\n", + perms[i].name, perms[i].start, perms[i].end, + SECTION_SIZE); + continue; + } + + for (addr = perms[i].start; + addr < perms[i].end; + addr += SECTION_SIZE) + section_update(addr, perms[i].mask, + set ? perms[i].prot : perms[i].clear, mm); + } + +} + +/** + * update_sections_early intended to be called only through stop_machine + * framework and executed by only one CPU while all other CPUs will spin and + * wait, so no locking is required in this function. + */ +static void update_sections_early(struct section_perm perms[], int n) +{ + struct task_struct *t, *s; + + for_each_process(t) { + if (t->flags & PF_KTHREAD) + continue; + for_each_thread(t, s) + if (s->mm) + set_section_perms(perms, n, true, s->mm); + } + set_section_perms(perms, n, true, current->active_mm); + set_section_perms(perms, n, true, &init_mm); +} + +static int __fix_kernmem_perms(void *unused) +{ + update_sections_early(nx_perms, ARRAY_SIZE(nx_perms)); + return 0; +} + +static void fix_kernmem_perms(void) +{ + stop_machine(__fix_kernmem_perms, NULL, NULL); +} + +static int __mark_rodata_ro(void *unused) +{ + update_sections_early(ro_perms, ARRAY_SIZE(ro_perms)); + return 0; +} + +void mark_rodata_ro(void) +{ + stop_machine(__mark_rodata_ro, NULL, NULL); + debug_checkwx(); +} + +#else +static inline void fix_kernmem_perms(void) { } +#endif /* CONFIG_STRICT_KERNEL_RWX */ + +void free_initmem(void) +{ + fix_kernmem_perms(); + + poison_init_mem(__init_begin, __init_end - __init_begin); + if (!machine_is_integrator() && !machine_is_cintegrator()) + free_initmem_default(-1); +} + +#ifdef CONFIG_BLK_DEV_INITRD +void free_initrd_mem(unsigned long start, unsigned long end) +{ + if (start == initrd_start) + start = round_down(start, PAGE_SIZE); + if (end == initrd_end) + end = round_up(end, PAGE_SIZE); + + poison_init_mem((void *)start, PAGE_ALIGN(end) - start); + free_reserved_area((void *)start, (void *)end, -1, "initrd"); +} +#endif diff --git a/arch/arm/mm/iomap.c b/arch/arm/mm/iomap.c new file mode 100644 index 0000000000..415d0a4542 --- /dev/null +++ b/arch/arm/mm/iomap.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * linux/arch/arm/mm/iomap.c + * + * Map IO port and PCI memory spaces so that {read,write}[bwl] can + * be used to access this memory. + */ +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/ioport.h> +#include <linux/io.h> + +#include <asm/vga.h> + +unsigned long vga_base; +EXPORT_SYMBOL(vga_base); + +#ifdef __io +void __iomem *ioport_map(unsigned long port, unsigned int nr) +{ + return __io(port); +} +EXPORT_SYMBOL(ioport_map); + +void ioport_unmap(void __iomem *addr) +{ +} +EXPORT_SYMBOL(ioport_unmap); +#endif + +#ifdef CONFIG_PCI +unsigned long pcibios_min_io = 0x1000; +EXPORT_SYMBOL(pcibios_min_io); + +unsigned long pcibios_min_mem = 0x01000000; +EXPORT_SYMBOL(pcibios_min_mem); + +void pci_iounmap(struct pci_dev *dev, void __iomem *addr) +{ + if ((unsigned long)addr >= VMALLOC_START && + (unsigned long)addr < VMALLOC_END) + iounmap(addr); +} +EXPORT_SYMBOL(pci_iounmap); +#endif diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c new file mode 100644 index 0000000000..2129070065 --- /dev/null +++ b/arch/arm/mm/ioremap.c @@ -0,0 +1,496 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * linux/arch/arm/mm/ioremap.c + * + * Re-map IO memory to kernel address space so that we can access it. + * + * (C) Copyright 1995 1996 Linus Torvalds + * + * Hacked for ARM by Phil Blundell <philb@gnu.org> + * Hacked to allow all architectures to build, and various cleanups + * by Russell King + * + * This allows a driver to remap an arbitrary region of bus memory into + * virtual space. One should *only* use readl, writel, memcpy_toio and + * so on with such remapped areas. + * + * Because the ARM only has a 32-bit address space we can't address the + * whole of the (physical) PCI space at once. PCI huge-mode addressing + * allows us to circumvent this restriction by splitting PCI space into + * two 2GB chunks and mapping only one at a time into processor memory. + * We use MMU protection domains to trap any attempt to access the bank + * that is not currently mapped. (This isn't fully implemented yet.) + */ +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/mm.h> +#include <linux/vmalloc.h> +#include <linux/io.h> +#include <linux/sizes.h> +#include <linux/memblock.h> + +#include <asm/cp15.h> +#include <asm/cputype.h> +#include <asm/cacheflush.h> +#include <asm/early_ioremap.h> +#include <asm/mmu_context.h> +#include <asm/pgalloc.h> +#include <asm/tlbflush.h> +#include <asm/set_memory.h> +#include <asm/system_info.h> + +#include <asm/mach/map.h> +#include <asm/mach/pci.h> +#include "mm.h" + + +LIST_HEAD(static_vmlist); + +static struct static_vm *find_static_vm_paddr(phys_addr_t paddr, + size_t size, unsigned int mtype) +{ + struct static_vm *svm; + struct vm_struct *vm; + + list_for_each_entry(svm, &static_vmlist, list) { + vm = &svm->vm; + if (!(vm->flags & VM_ARM_STATIC_MAPPING)) + continue; + if ((vm->flags & VM_ARM_MTYPE_MASK) != VM_ARM_MTYPE(mtype)) + continue; + + if (vm->phys_addr > paddr || + paddr + size - 1 > vm->phys_addr + vm->size - 1) + continue; + + return svm; + } + + return NULL; +} + +struct static_vm *find_static_vm_vaddr(void *vaddr) +{ + struct static_vm *svm; + struct vm_struct *vm; + + list_for_each_entry(svm, &static_vmlist, list) { + vm = &svm->vm; + + /* static_vmlist is ascending order */ + if (vm->addr > vaddr) + break; + + if (vm->addr <= vaddr && vm->addr + vm->size > vaddr) + return svm; + } + + return NULL; +} + +void __init add_static_vm_early(struct static_vm *svm) +{ + struct static_vm *curr_svm; + struct vm_struct *vm; + void *vaddr; + + vm = &svm->vm; + vm_area_add_early(vm); + vaddr = vm->addr; + + list_for_each_entry(curr_svm, &static_vmlist, list) { + vm = &curr_svm->vm; + + if (vm->addr > vaddr) + break; + } + list_add_tail(&svm->list, &curr_svm->list); +} + +int ioremap_page(unsigned long virt, unsigned long phys, + const struct mem_type *mtype) +{ + return ioremap_page_range(virt, virt + PAGE_SIZE, phys, + __pgprot(mtype->prot_pte)); +} +EXPORT_SYMBOL(ioremap_page); + +void __check_vmalloc_seq(struct mm_struct *mm) +{ + int seq; + + do { + seq = atomic_read(&init_mm.context.vmalloc_seq); + memcpy(pgd_offset(mm, VMALLOC_START), + pgd_offset_k(VMALLOC_START), + sizeof(pgd_t) * (pgd_index(VMALLOC_END) - + pgd_index(VMALLOC_START))); + /* + * Use a store-release so that other CPUs that observe the + * counter's new value are guaranteed to see the results of the + * memcpy as well. + */ + atomic_set_release(&mm->context.vmalloc_seq, seq); + } while (seq != atomic_read(&init_mm.context.vmalloc_seq)); +} + +#if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE) +/* + * Section support is unsafe on SMP - If you iounmap and ioremap a region, + * the other CPUs will not see this change until their next context switch. + * Meanwhile, (eg) if an interrupt comes in on one of those other CPUs + * which requires the new ioremap'd region to be referenced, the CPU will + * reference the _old_ region. + * + * Note that get_vm_area_caller() allocates a guard 4K page, so we need to + * mask the size back to 1MB aligned or we will overflow in the loop below. + */ +static void unmap_area_sections(unsigned long virt, unsigned long size) +{ + unsigned long addr = virt, end = virt + (size & ~(SZ_1M - 1)); + pmd_t *pmdp = pmd_off_k(addr); + + do { + pmd_t pmd = *pmdp; + + if (!pmd_none(pmd)) { + /* + * Clear the PMD from the page table, and + * increment the vmalloc sequence so others + * notice this change. + * + * Note: this is still racy on SMP machines. + */ + pmd_clear(pmdp); + atomic_inc_return_release(&init_mm.context.vmalloc_seq); + + /* + * Free the page table, if there was one. + */ + if ((pmd_val(pmd) & PMD_TYPE_MASK) == PMD_TYPE_TABLE) + pte_free_kernel(&init_mm, pmd_page_vaddr(pmd)); + } + + addr += PMD_SIZE; + pmdp += 2; + } while (addr < end); + + /* + * Ensure that the active_mm is up to date - we want to + * catch any use-after-iounmap cases. + */ + check_vmalloc_seq(current->active_mm); + + flush_tlb_kernel_range(virt, end); +} + +static int +remap_area_sections(unsigned long virt, unsigned long pfn, + size_t size, const struct mem_type *type) +{ + unsigned long addr = virt, end = virt + size; + pmd_t *pmd = pmd_off_k(addr); + + /* + * Remove and free any PTE-based mapping, and + * sync the current kernel mapping. + */ + unmap_area_sections(virt, size); + + do { + pmd[0] = __pmd(__pfn_to_phys(pfn) | type->prot_sect); + pfn += SZ_1M >> PAGE_SHIFT; + pmd[1] = __pmd(__pfn_to_phys(pfn) | type->prot_sect); + pfn += SZ_1M >> PAGE_SHIFT; + flush_pmd_entry(pmd); + + addr += PMD_SIZE; + pmd += 2; + } while (addr < end); + + return 0; +} + +static int +remap_area_supersections(unsigned long virt, unsigned long pfn, + size_t size, const struct mem_type *type) +{ + unsigned long addr = virt, end = virt + size; + pmd_t *pmd = pmd_off_k(addr); + + /* + * Remove and free any PTE-based mapping, and + * sync the current kernel mapping. + */ + unmap_area_sections(virt, size); + do { + unsigned long super_pmd_val, i; + + super_pmd_val = __pfn_to_phys(pfn) | type->prot_sect | + PMD_SECT_SUPER; + super_pmd_val |= ((pfn >> (32 - PAGE_SHIFT)) & 0xf) << 20; + + for (i = 0; i < 8; i++) { + pmd[0] = __pmd(super_pmd_val); + pmd[1] = __pmd(super_pmd_val); + flush_pmd_entry(pmd); + + addr += PMD_SIZE; + pmd += 2; + } + + pfn += SUPERSECTION_SIZE >> PAGE_SHIFT; + } while (addr < end); + + return 0; +} +#endif + +static void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn, + unsigned long offset, size_t size, unsigned int mtype, void *caller) +{ + const struct mem_type *type; + int err; + unsigned long addr; + struct vm_struct *area; + phys_addr_t paddr = __pfn_to_phys(pfn); + +#ifndef CONFIG_ARM_LPAE + /* + * High mappings must be supersection aligned + */ + if (pfn >= 0x100000 && (paddr & ~SUPERSECTION_MASK)) + return NULL; +#endif + + type = get_mem_type(mtype); + if (!type) + return NULL; + + /* + * Page align the mapping size, taking account of any offset. + */ + size = PAGE_ALIGN(offset + size); + + /* + * Try to reuse one of the static mapping whenever possible. + */ + if (size && !(sizeof(phys_addr_t) == 4 && pfn >= 0x100000)) { + struct static_vm *svm; + + svm = find_static_vm_paddr(paddr, size, mtype); + if (svm) { + addr = (unsigned long)svm->vm.addr; + addr += paddr - svm->vm.phys_addr; + return (void __iomem *) (offset + addr); + } + } + + /* + * Don't allow RAM to be mapped with mismatched attributes - this + * causes problems with ARMv6+ + */ + if (WARN_ON(memblock_is_map_memory(PFN_PHYS(pfn)) && + mtype != MT_MEMORY_RW)) + return NULL; + + area = get_vm_area_caller(size, VM_IOREMAP, caller); + if (!area) + return NULL; + addr = (unsigned long)area->addr; + area->phys_addr = paddr; + +#if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE) + if (DOMAIN_IO == 0 && + (((cpu_architecture() >= CPU_ARCH_ARMv6) && (get_cr() & CR_XP)) || + cpu_is_xsc3()) && pfn >= 0x100000 && + !((paddr | size | addr) & ~SUPERSECTION_MASK)) { + area->flags |= VM_ARM_SECTION_MAPPING; + err = remap_area_supersections(addr, pfn, size, type); + } else if (!((paddr | size | addr) & ~PMD_MASK)) { + area->flags |= VM_ARM_SECTION_MAPPING; + err = remap_area_sections(addr, pfn, size, type); + } else +#endif + err = ioremap_page_range(addr, addr + size, paddr, + __pgprot(type->prot_pte)); + + if (err) { + vunmap((void *)addr); + return NULL; + } + + flush_cache_vmap(addr, addr + size); + return (void __iomem *) (offset + addr); +} + +void __iomem *__arm_ioremap_caller(phys_addr_t phys_addr, size_t size, + unsigned int mtype, void *caller) +{ + phys_addr_t last_addr; + unsigned long offset = phys_addr & ~PAGE_MASK; + unsigned long pfn = __phys_to_pfn(phys_addr); + + /* + * Don't allow wraparound or zero size + */ + last_addr = phys_addr + size - 1; + if (!size || last_addr < phys_addr) + return NULL; + + return __arm_ioremap_pfn_caller(pfn, offset, size, mtype, + caller); +} + +/* + * Remap an arbitrary physical address space into the kernel virtual + * address space. Needed when the kernel wants to access high addresses + * directly. + * + * NOTE! We need to allow non-page-aligned mappings too: we will obviously + * have to convert them into an offset in a page-aligned mapping, but the + * caller shouldn't need to know that small detail. + */ +void __iomem * +__arm_ioremap_pfn(unsigned long pfn, unsigned long offset, size_t size, + unsigned int mtype) +{ + return __arm_ioremap_pfn_caller(pfn, offset, size, mtype, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(__arm_ioremap_pfn); + +void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, + unsigned int, void *) = + __arm_ioremap_caller; + +void __iomem *ioremap(resource_size_t res_cookie, size_t size) +{ + return arch_ioremap_caller(res_cookie, size, MT_DEVICE, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap); + +void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size) +{ + return arch_ioremap_caller(res_cookie, size, MT_DEVICE_CACHED, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_cache); + +void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size) +{ + return arch_ioremap_caller(res_cookie, size, MT_DEVICE_WC, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_wc); + +/* + * Remap an arbitrary physical address space into the kernel virtual + * address space as memory. Needed when the kernel wants to execute + * code in external memory. This is needed for reprogramming source + * clocks that would affect normal memory for example. Please see + * CONFIG_GENERIC_ALLOCATOR for allocating external memory. + */ +void __iomem * +__arm_ioremap_exec(phys_addr_t phys_addr, size_t size, bool cached) +{ + unsigned int mtype; + + if (cached) + mtype = MT_MEMORY_RWX; + else + mtype = MT_MEMORY_RWX_NONCACHED; + + return __arm_ioremap_caller(phys_addr, size, mtype, + __builtin_return_address(0)); +} + +void __arm_iomem_set_ro(void __iomem *ptr, size_t size) +{ + set_memory_ro((unsigned long)ptr, PAGE_ALIGN(size) / PAGE_SIZE); +} + +void *arch_memremap_wb(phys_addr_t phys_addr, size_t size) +{ + return (__force void *)arch_ioremap_caller(phys_addr, size, + MT_MEMORY_RW, + __builtin_return_address(0)); +} + +void iounmap(volatile void __iomem *io_addr) +{ + void *addr = (void *)(PAGE_MASK & (unsigned long)io_addr); + struct static_vm *svm; + + /* If this is a static mapping, we must leave it alone */ + svm = find_static_vm_vaddr(addr); + if (svm) + return; + +#if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE) + { + struct vm_struct *vm; + + vm = find_vm_area(addr); + + /* + * If this is a section based mapping we need to handle it + * specially as the VM subsystem does not know how to handle + * such a beast. + */ + if (vm && (vm->flags & VM_ARM_SECTION_MAPPING)) + unmap_area_sections((unsigned long)vm->addr, vm->size); + } +#endif + + vunmap(addr); +} +EXPORT_SYMBOL(iounmap); + +#if defined(CONFIG_PCI) || IS_ENABLED(CONFIG_PCMCIA) +static int pci_ioremap_mem_type = MT_DEVICE; + +void pci_ioremap_set_mem_type(int mem_type) +{ + pci_ioremap_mem_type = mem_type; +} + +int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr) +{ + unsigned long vaddr = (unsigned long)PCI_IOBASE + res->start; + + if (!(res->flags & IORESOURCE_IO)) + return -EINVAL; + + if (res->end > IO_SPACE_LIMIT) + return -EINVAL; + + return ioremap_page_range(vaddr, vaddr + resource_size(res), phys_addr, + __pgprot(get_mem_type(pci_ioremap_mem_type)->prot_pte)); +} +EXPORT_SYMBOL(pci_remap_iospace); + +void __iomem *pci_remap_cfgspace(resource_size_t res_cookie, size_t size) +{ + return arch_ioremap_caller(res_cookie, size, MT_UNCACHED, + __builtin_return_address(0)); +} +EXPORT_SYMBOL_GPL(pci_remap_cfgspace); +#endif + +/* + * Must be called after early_fixmap_init + */ +void __init early_ioremap_init(void) +{ + early_ioremap_setup(); +} + +bool arch_memremap_can_ram_remap(resource_size_t offset, size_t size, + unsigned long flags) +{ + unsigned long pfn = PHYS_PFN(offset); + + return memblock_is_map_memory(pfn); +} diff --git a/arch/arm/mm/kasan_init.c b/arch/arm/mm/kasan_init.c new file mode 100644 index 0000000000..24d71b5db6 --- /dev/null +++ b/arch/arm/mm/kasan_init.c @@ -0,0 +1,299 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This file contains kasan initialization code for ARM. + * + * Copyright (c) 2018 Samsung Electronics Co., Ltd. + * Author: Andrey Ryabinin <ryabinin.a.a@gmail.com> + * Author: Linus Walleij <linus.walleij@linaro.org> + */ + +#define pr_fmt(fmt) "kasan: " fmt +#include <linux/kasan.h> +#include <linux/kernel.h> +#include <linux/memblock.h> +#include <linux/sched/task.h> +#include <linux/start_kernel.h> +#include <linux/pgtable.h> +#include <asm/cputype.h> +#include <asm/highmem.h> +#include <asm/mach/map.h> +#include <asm/page.h> +#include <asm/pgalloc.h> +#include <asm/procinfo.h> +#include <asm/proc-fns.h> + +#include "mm.h" + +static pgd_t tmp_pgd_table[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE); + +pmd_t tmp_pmd_table[PTRS_PER_PMD] __page_aligned_bss; + +static __init void *kasan_alloc_block(size_t size) +{ + return memblock_alloc_try_nid(size, size, __pa(MAX_DMA_ADDRESS), + MEMBLOCK_ALLOC_NOLEAKTRACE, NUMA_NO_NODE); +} + +static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr, + unsigned long end, bool early) +{ + unsigned long next; + pte_t *ptep = pte_offset_kernel(pmdp, addr); + + do { + pte_t entry; + void *p; + + next = addr + PAGE_SIZE; + + if (!early) { + if (!pte_none(READ_ONCE(*ptep))) + continue; + + p = kasan_alloc_block(PAGE_SIZE); + if (!p) { + panic("%s failed to allocate shadow page for address 0x%lx\n", + __func__, addr); + return; + } + memset(p, KASAN_SHADOW_INIT, PAGE_SIZE); + entry = pfn_pte(virt_to_pfn(p), + __pgprot(pgprot_val(PAGE_KERNEL))); + } else if (pte_none(READ_ONCE(*ptep))) { + /* + * The early shadow memory is mapping all KASan + * operations to one and the same page in memory, + * "kasan_early_shadow_page" so that the instrumentation + * will work on a scratch area until we can set up the + * proper KASan shadow memory. + */ + entry = pfn_pte(virt_to_pfn(kasan_early_shadow_page), + __pgprot(_L_PTE_DEFAULT | L_PTE_DIRTY | L_PTE_XN)); + } else { + /* + * Early shadow mappings are PMD_SIZE aligned, so if the + * first entry is already set, they must all be set. + */ + return; + } + + set_pte_at(&init_mm, addr, ptep, entry); + } while (ptep++, addr = next, addr != end); +} + +/* + * The pmd (page middle directory) is only used on LPAE + */ +static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr, + unsigned long end, bool early) +{ + unsigned long next; + pmd_t *pmdp = pmd_offset(pudp, addr); + + do { + if (pmd_none(*pmdp)) { + /* + * We attempt to allocate a shadow block for the PMDs + * used by the PTEs for this address if it isn't already + * allocated. + */ + void *p = early ? kasan_early_shadow_pte : + kasan_alloc_block(PAGE_SIZE); + + if (!p) { + panic("%s failed to allocate shadow block for address 0x%lx\n", + __func__, addr); + return; + } + pmd_populate_kernel(&init_mm, pmdp, p); + flush_pmd_entry(pmdp); + } + + next = pmd_addr_end(addr, end); + kasan_pte_populate(pmdp, addr, next, early); + } while (pmdp++, addr = next, addr != end); +} + +static void __init kasan_pgd_populate(unsigned long addr, unsigned long end, + bool early) +{ + unsigned long next; + pgd_t *pgdp; + p4d_t *p4dp; + pud_t *pudp; + + pgdp = pgd_offset_k(addr); + + do { + /* + * Allocate and populate the shadow block of p4d folded into + * pud folded into pmd if it doesn't already exist + */ + if (!early && pgd_none(*pgdp)) { + void *p = kasan_alloc_block(PAGE_SIZE); + + if (!p) { + panic("%s failed to allocate shadow block for address 0x%lx\n", + __func__, addr); + return; + } + pgd_populate(&init_mm, pgdp, p); + } + + next = pgd_addr_end(addr, end); + /* + * We just immediately jump over the p4d and pud page + * directories since we believe ARM32 will never gain four + * nor five level page tables. + */ + p4dp = p4d_offset(pgdp, addr); + pudp = pud_offset(p4dp, addr); + + kasan_pmd_populate(pudp, addr, next, early); + } while (pgdp++, addr = next, addr != end); +} + +extern struct proc_info_list *lookup_processor_type(unsigned int); + +void __init kasan_early_init(void) +{ + struct proc_info_list *list; + + /* + * locate processor in the list of supported processor + * types. The linker builds this table for us from the + * entries in arch/arm/mm/proc-*.S + */ + list = lookup_processor_type(read_cpuid_id()); + if (list) { +#ifdef MULTI_CPU + processor = *list->proc; +#endif + } + + BUILD_BUG_ON((KASAN_SHADOW_END - (1UL << 29)) != KASAN_SHADOW_OFFSET); + /* + * We walk the page table and set all of the shadow memory to point + * to the scratch page. + */ + kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, true); +} + +static void __init clear_pgds(unsigned long start, + unsigned long end) +{ + for (; start && start < end; start += PMD_SIZE) + pmd_clear(pmd_off_k(start)); +} + +static int __init create_mapping(void *start, void *end) +{ + void *shadow_start, *shadow_end; + + shadow_start = kasan_mem_to_shadow(start); + shadow_end = kasan_mem_to_shadow(end); + + pr_info("Mapping kernel virtual memory block: %px-%px at shadow: %px-%px\n", + start, end, shadow_start, shadow_end); + + kasan_pgd_populate((unsigned long)shadow_start & PAGE_MASK, + PAGE_ALIGN((unsigned long)shadow_end), false); + return 0; +} + +void __init kasan_init(void) +{ + phys_addr_t pa_start, pa_end; + u64 i; + + /* + * We are going to perform proper setup of shadow memory. + * + * At first we should unmap early shadow (clear_pgds() call bellow). + * However, instrumented code can't execute without shadow memory. + * + * To keep the early shadow memory MMU tables around while setting up + * the proper shadow memory, we copy swapper_pg_dir (the initial page + * table) to tmp_pgd_table and use that to keep the early shadow memory + * mapped until the full shadow setup is finished. Then we swap back + * to the proper swapper_pg_dir. + */ + + memcpy(tmp_pgd_table, swapper_pg_dir, sizeof(tmp_pgd_table)); +#ifdef CONFIG_ARM_LPAE + /* We need to be in the same PGD or this won't work */ + BUILD_BUG_ON(pgd_index(KASAN_SHADOW_START) != + pgd_index(KASAN_SHADOW_END)); + memcpy(tmp_pmd_table, + (void*)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_START)), + sizeof(tmp_pmd_table)); + set_pgd(&tmp_pgd_table[pgd_index(KASAN_SHADOW_START)], + __pgd(__pa(tmp_pmd_table) | PMD_TYPE_TABLE | L_PGD_SWAPPER)); +#endif + cpu_switch_mm(tmp_pgd_table, &init_mm); + local_flush_tlb_all(); + + clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); + + if (!IS_ENABLED(CONFIG_KASAN_VMALLOC)) + kasan_populate_early_shadow(kasan_mem_to_shadow((void *)VMALLOC_START), + kasan_mem_to_shadow((void *)VMALLOC_END)); + + kasan_populate_early_shadow(kasan_mem_to_shadow((void *)VMALLOC_END), + kasan_mem_to_shadow((void *)-1UL) + 1); + + for_each_mem_range(i, &pa_start, &pa_end) { + void *start = __va(pa_start); + void *end = __va(pa_end); + + /* Do not attempt to shadow highmem */ + if (pa_start >= arm_lowmem_limit) { + pr_info("Skip highmem block at %pa-%pa\n", &pa_start, &pa_end); + continue; + } + if (pa_end > arm_lowmem_limit) { + pr_info("Truncating shadow for memory block at %pa-%pa to lowmem region at %pa\n", + &pa_start, &pa_end, &arm_lowmem_limit); + end = __va(arm_lowmem_limit); + } + if (start >= end) { + pr_info("Skipping invalid memory block %pa-%pa (virtual %p-%p)\n", + &pa_start, &pa_end, start, end); + continue; + } + + create_mapping(start, end); + } + + /* + * 1. The module global variables are in MODULES_VADDR ~ MODULES_END, + * so we need to map this area if CONFIG_KASAN_VMALLOC=n. With + * VMALLOC support KASAN will manage this region dynamically, + * refer to kasan_populate_vmalloc() and ARM's implementation of + * module_alloc(). + * 2. PKMAP_BASE ~ PKMAP_BASE+PMD_SIZE's shadow and MODULES_VADDR + * ~ MODULES_END's shadow is in the same PMD_SIZE, so we can't + * use kasan_populate_zero_shadow. + */ + if (!IS_ENABLED(CONFIG_KASAN_VMALLOC) && IS_ENABLED(CONFIG_MODULES)) + create_mapping((void *)MODULES_VADDR, (void *)(MODULES_END)); + create_mapping((void *)PKMAP_BASE, (void *)(PKMAP_BASE + PMD_SIZE)); + + /* + * KAsan may reuse the contents of kasan_early_shadow_pte directly, so + * we should make sure that it maps the zero page read-only. + */ + for (i = 0; i < PTRS_PER_PTE; i++) + set_pte_at(&init_mm, KASAN_SHADOW_START + i*PAGE_SIZE, + &kasan_early_shadow_pte[i], + pfn_pte(virt_to_pfn(kasan_early_shadow_page), + __pgprot(pgprot_val(PAGE_KERNEL) + | L_PTE_RDONLY))); + + cpu_switch_mm(swapper_pg_dir, &init_mm); + local_flush_tlb_all(); + + memset(kasan_early_shadow_page, 0, PAGE_SIZE); + pr_info("Kernel address sanitizer initialized\n"); + init_task.kasan_depth = 0; +} diff --git a/arch/arm/mm/l2c-common.c b/arch/arm/mm/l2c-common.c new file mode 100644 index 0000000000..073b435ae0 --- /dev/null +++ b/arch/arm/mm/l2c-common.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2010 ARM Ltd. + * Written by Catalin Marinas <catalin.marinas@arm.com> + */ +#include <linux/bug.h> +#include <linux/smp.h> +#include <asm/outercache.h> + +void outer_disable(void) +{ + WARN_ON(!irqs_disabled()); + WARN_ON(num_online_cpus() > 1); + + if (outer_cache.disable) + outer_cache.disable(); +} diff --git a/arch/arm/mm/l2c-l2x0-resume.S b/arch/arm/mm/l2c-l2x0-resume.S new file mode 100644 index 0000000000..fc01f1b185 --- /dev/null +++ b/arch/arm/mm/l2c-l2x0-resume.S @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * L2C-310 early resume code. This can be used by platforms to restore + * the settings of their L2 cache controller before restoring the + * processor state. + * + * This code can only be used to if you are running in the secure world. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/hardware/cache-l2x0.h> + + .text + +ENTRY(l2c310_early_resume) + adr r0, 1f + ldr r2, [r0] + add r0, r2, r0 + + ldmia r0, {r1, r2, r3, r4, r5, r6, r7, r8} + @ r1 = phys address of L2C-310 controller + @ r2 = aux_ctrl + @ r3 = tag_latency + @ r4 = data_latency + @ r5 = filter_start + @ r6 = filter_end + @ r7 = prefetch_ctrl + @ r8 = pwr_ctrl + + @ Check that the address has been initialised + teq r1, #0 + reteq lr + + @ The prefetch and power control registers are revision dependent + @ and can be written whether or not the L2 cache is enabled + ldr r0, [r1, #L2X0_CACHE_ID] + and r0, r0, #L2X0_CACHE_ID_RTL_MASK + cmp r0, #L310_CACHE_ID_RTL_R2P0 + strcs r7, [r1, #L310_PREFETCH_CTRL] + cmp r0, #L310_CACHE_ID_RTL_R3P0 + strcs r8, [r1, #L310_POWER_CTRL] + + @ Don't setup the L2 cache if it is already enabled + ldr r0, [r1, #L2X0_CTRL] + tst r0, #L2X0_CTRL_EN + retne lr + + str r3, [r1, #L310_TAG_LATENCY_CTRL] + str r4, [r1, #L310_DATA_LATENCY_CTRL] + str r6, [r1, #L310_ADDR_FILTER_END] + str r5, [r1, #L310_ADDR_FILTER_START] + + str r2, [r1, #L2X0_AUX_CTRL] + mov r9, #L2X0_CTRL_EN + str r9, [r1, #L2X0_CTRL] + ret lr +ENDPROC(l2c310_early_resume) + + .align +1: .long l2x0_saved_regs - . diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h new file mode 100644 index 0000000000..4193163167 --- /dev/null +++ b/arch/arm/mm/mm.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifdef CONFIG_MMU +#include <linux/list.h> +#include <linux/vmalloc.h> +#include <linux/pgtable.h> + +/* the upper-most page table pointer */ +extern pmd_t *top_pmd; + +extern int icache_size; + +/* + * 0xffff8000 to 0xffffffff is reserved for any ARM architecture + * specific hacks for copying pages efficiently, while 0xffff4000 + * is reserved for VIPT aliasing flushing by generic code. + * + * Note that we don't allow VIPT aliasing caches with SMP. + */ +#define COPYPAGE_MINICACHE 0xffff8000 +#define COPYPAGE_V6_FROM 0xffff8000 +#define COPYPAGE_V6_TO 0xffffc000 +/* PFN alias flushing, for VIPT caches */ +#define FLUSH_ALIAS_START 0xffff4000 + +static inline void set_top_pte(unsigned long va, pte_t pte) +{ + pte_t *ptep = pte_offset_kernel(top_pmd, va); + set_pte_ext(ptep, pte, 0); + local_flush_tlb_kernel_page(va); +} + +static inline pte_t get_top_pte(unsigned long va) +{ + pte_t *ptep = pte_offset_kernel(top_pmd, va); + return *ptep; +} + +struct mem_type { + pteval_t prot_pte; + pteval_t prot_pte_s2; + pmdval_t prot_l1; + pmdval_t prot_sect; + unsigned int domain; +}; + +const struct mem_type *get_mem_type(unsigned int type); + +void __flush_dcache_folio(struct address_space *mapping, struct folio *folio); + +/* + * ARM specific vm_struct->flags bits. + */ + +/* (super)section-mapped I/O regions used by ioremap()/iounmap() */ +#define VM_ARM_SECTION_MAPPING 0x80000000 + +/* permanent static mappings from iotable_init() */ +#define VM_ARM_STATIC_MAPPING 0x40000000 + +/* empty mapping */ +#define VM_ARM_EMPTY_MAPPING 0x20000000 + +/* mapping type (attributes) for permanent static mappings */ +#define VM_ARM_MTYPE(mt) ((mt) << 20) +#define VM_ARM_MTYPE_MASK (0x1f << 20) + + +struct static_vm { + struct vm_struct vm; + struct list_head list; +}; + +extern struct list_head static_vmlist; +extern struct static_vm *find_static_vm_vaddr(void *vaddr); +extern __init void add_static_vm_early(struct static_vm *svm); + +#endif + +#ifdef CONFIG_ZONE_DMA +extern phys_addr_t arm_dma_limit; +extern unsigned long arm_dma_pfn_limit; +#else +#define arm_dma_limit ((phys_addr_t)~0) +#define arm_dma_pfn_limit (~0ul >> PAGE_SHIFT) +#endif + +extern phys_addr_t arm_lowmem_limit; + +void __init bootmem_init(void); +void arm_mm_memblock_reserve(void); +#ifdef CONFIG_CMA_AREAS +void dma_contiguous_remap(void); +#else +static inline void dma_contiguous_remap(void) { } +#endif + +unsigned long __clear_cr(unsigned long mask); diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c new file mode 100644 index 0000000000..a0f8a0ca07 --- /dev/null +++ b/arch/arm/mm/mmap.c @@ -0,0 +1,167 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * linux/arch/arm/mm/mmap.c + */ +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/shm.h> +#include <linux/sched/signal.h> +#include <linux/sched/mm.h> +#include <linux/io.h> +#include <linux/personality.h> +#include <linux/random.h> +#include <asm/cachetype.h> + +#define COLOUR_ALIGN(addr,pgoff) \ + ((((addr)+SHMLBA-1)&~(SHMLBA-1)) + \ + (((pgoff)<<PAGE_SHIFT) & (SHMLBA-1))) + +/* + * We need to ensure that shared mappings are correctly aligned to + * avoid aliasing issues with VIPT caches. We need to ensure that + * a specific page of an object is always mapped at a multiple of + * SHMLBA bytes. + * + * We unconditionally provide this function for all cases, however + * in the VIVT case, we optimise out the alignment rules. + */ +unsigned long +arch_get_unmapped_area(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + int do_align = 0; + int aliasing = cache_is_vipt_aliasing(); + struct vm_unmapped_area_info info; + + /* + * We only need to do colour alignment if either the I or D + * caches alias. + */ + if (aliasing) + do_align = filp || (flags & MAP_SHARED); + + /* + * We enforce the MAP_FIXED case. + */ + if (flags & MAP_FIXED) { + if (aliasing && flags & MAP_SHARED && + (addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1)) + return -EINVAL; + return addr; + } + + if (len > TASK_SIZE) + return -ENOMEM; + + if (addr) { + if (do_align) + addr = COLOUR_ALIGN(addr, pgoff); + else + addr = PAGE_ALIGN(addr); + + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && + (!vma || addr + len <= vm_start_gap(vma))) + return addr; + } + + info.flags = 0; + info.length = len; + info.low_limit = mm->mmap_base; + info.high_limit = TASK_SIZE; + info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0; + info.align_offset = pgoff << PAGE_SHIFT; + return vm_unmapped_area(&info); +} + +unsigned long +arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, + const unsigned long len, const unsigned long pgoff, + const unsigned long flags) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + unsigned long addr = addr0; + int do_align = 0; + int aliasing = cache_is_vipt_aliasing(); + struct vm_unmapped_area_info info; + + /* + * We only need to do colour alignment if either the I or D + * caches alias. + */ + if (aliasing) + do_align = filp || (flags & MAP_SHARED); + + /* requested length too big for entire address space */ + if (len > TASK_SIZE) + return -ENOMEM; + + if (flags & MAP_FIXED) { + if (aliasing && flags & MAP_SHARED && + (addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1)) + return -EINVAL; + return addr; + } + + /* requesting a specific address */ + if (addr) { + if (do_align) + addr = COLOUR_ALIGN(addr, pgoff); + else + addr = PAGE_ALIGN(addr); + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && + (!vma || addr + len <= vm_start_gap(vma))) + return addr; + } + + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.length = len; + info.low_limit = FIRST_USER_ADDRESS; + info.high_limit = mm->mmap_base; + info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0; + info.align_offset = pgoff << PAGE_SHIFT; + addr = vm_unmapped_area(&info); + + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + if (addr & ~PAGE_MASK) { + VM_BUG_ON(addr != -ENOMEM); + info.flags = 0; + info.low_limit = mm->mmap_base; + info.high_limit = TASK_SIZE; + addr = vm_unmapped_area(&info); + } + + return addr; +} + +/* + * You really shouldn't be using read() or write() on /dev/mem. This + * might go away in the future. + */ +int valid_phys_addr_range(phys_addr_t addr, size_t size) +{ + if (addr < PHYS_OFFSET) + return 0; + if (addr + size > __pa(high_memory - 1) + 1) + return 0; + + return 1; +} + +/* + * Do not allow /dev/mem mappings beyond the supported physical range. + */ +int valid_mmap_phys_addr_range(unsigned long pfn, size_t size) +{ + return (pfn + (size >> PAGE_SHIFT)) <= (1 + (PHYS_MASK >> PAGE_SHIFT)); +} diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c new file mode 100644 index 0000000000..674ed71573 --- /dev/null +++ b/arch/arm/mm/mmu.c @@ -0,0 +1,1819 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * linux/arch/arm/mm/mmu.c + * + * Copyright (C) 1995-2005 Russell King + */ +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/mman.h> +#include <linux/nodemask.h> +#include <linux/memblock.h> +#include <linux/fs.h> +#include <linux/vmalloc.h> +#include <linux/sizes.h> + +#include <asm/cp15.h> +#include <asm/cputype.h> +#include <asm/cachetype.h> +#include <asm/sections.h> +#include <asm/setup.h> +#include <asm/smp_plat.h> +#include <asm/tcm.h> +#include <asm/tlb.h> +#include <asm/highmem.h> +#include <asm/system_info.h> +#include <asm/traps.h> +#include <asm/procinfo.h> +#include <asm/page.h> +#include <asm/pgalloc.h> +#include <asm/kasan_def.h> + +#include <asm/mach/arch.h> +#include <asm/mach/map.h> +#include <asm/mach/pci.h> +#include <asm/fixmap.h> + +#include "fault.h" +#include "mm.h" + +extern unsigned long __atags_pointer; + +/* + * empty_zero_page is a special page that is used for + * zero-initialized data and COW. + */ +struct page *empty_zero_page; +EXPORT_SYMBOL(empty_zero_page); + +/* + * The pmd table for the upper-most set of pages. + */ +pmd_t *top_pmd; + +pmdval_t user_pmd_table = _PAGE_USER_TABLE; + +#define CPOLICY_UNCACHED 0 +#define CPOLICY_BUFFERED 1 +#define CPOLICY_WRITETHROUGH 2 +#define CPOLICY_WRITEBACK 3 +#define CPOLICY_WRITEALLOC 4 + +static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK; +static unsigned int ecc_mask __initdata = 0; +pgprot_t pgprot_user; +pgprot_t pgprot_kernel; + +EXPORT_SYMBOL(pgprot_user); +EXPORT_SYMBOL(pgprot_kernel); + +struct cachepolicy { + const char policy[16]; + unsigned int cr_mask; + pmdval_t pmd; + pteval_t pte; +}; + +static struct cachepolicy cache_policies[] __initdata = { + { + .policy = "uncached", + .cr_mask = CR_W|CR_C, + .pmd = PMD_SECT_UNCACHED, + .pte = L_PTE_MT_UNCACHED, + }, { + .policy = "buffered", + .cr_mask = CR_C, + .pmd = PMD_SECT_BUFFERED, + .pte = L_PTE_MT_BUFFERABLE, + }, { + .policy = "writethrough", + .cr_mask = 0, + .pmd = PMD_SECT_WT, + .pte = L_PTE_MT_WRITETHROUGH, + }, { + .policy = "writeback", + .cr_mask = 0, + .pmd = PMD_SECT_WB, + .pte = L_PTE_MT_WRITEBACK, + }, { + .policy = "writealloc", + .cr_mask = 0, + .pmd = PMD_SECT_WBWA, + .pte = L_PTE_MT_WRITEALLOC, + } +}; + +#ifdef CONFIG_CPU_CP15 +static unsigned long initial_pmd_value __initdata = 0; + +/* + * Initialise the cache_policy variable with the initial state specified + * via the "pmd" value. This is used to ensure that on ARMv6 and later, + * the C code sets the page tables up with the same policy as the head + * assembly code, which avoids an illegal state where the TLBs can get + * confused. See comments in early_cachepolicy() for more information. + */ +void __init init_default_cache_policy(unsigned long pmd) +{ + int i; + + initial_pmd_value = pmd; + + pmd &= PMD_SECT_CACHE_MASK; + + for (i = 0; i < ARRAY_SIZE(cache_policies); i++) + if (cache_policies[i].pmd == pmd) { + cachepolicy = i; + break; + } + + if (i == ARRAY_SIZE(cache_policies)) + pr_err("ERROR: could not find cache policy\n"); +} + +/* + * These are useful for identifying cache coherency problems by allowing + * the cache or the cache and writebuffer to be turned off. (Note: the + * write buffer should not be on and the cache off). + */ +static int __init early_cachepolicy(char *p) +{ + int i, selected = -1; + + for (i = 0; i < ARRAY_SIZE(cache_policies); i++) { + int len = strlen(cache_policies[i].policy); + + if (memcmp(p, cache_policies[i].policy, len) == 0) { + selected = i; + break; + } + } + + if (selected == -1) + pr_err("ERROR: unknown or unsupported cache policy\n"); + + /* + * This restriction is partly to do with the way we boot; it is + * unpredictable to have memory mapped using two different sets of + * memory attributes (shared, type, and cache attribs). We can not + * change these attributes once the initial assembly has setup the + * page tables. + */ + if (cpu_architecture() >= CPU_ARCH_ARMv6 && selected != cachepolicy) { + pr_warn("Only cachepolicy=%s supported on ARMv6 and later\n", + cache_policies[cachepolicy].policy); + return 0; + } + + if (selected != cachepolicy) { + unsigned long cr = __clear_cr(cache_policies[selected].cr_mask); + cachepolicy = selected; + flush_cache_all(); + set_cr(cr); + } + return 0; +} +early_param("cachepolicy", early_cachepolicy); + +static int __init early_nocache(char *__unused) +{ + char *p = "buffered"; + pr_warn("nocache is deprecated; use cachepolicy=%s\n", p); + early_cachepolicy(p); + return 0; +} +early_param("nocache", early_nocache); + +static int __init early_nowrite(char *__unused) +{ + char *p = "uncached"; + pr_warn("nowb is deprecated; use cachepolicy=%s\n", p); + early_cachepolicy(p); + return 0; +} +early_param("nowb", early_nowrite); + +#ifndef CONFIG_ARM_LPAE +static int __init early_ecc(char *p) +{ + if (memcmp(p, "on", 2) == 0) + ecc_mask = PMD_PROTECTION; + else if (memcmp(p, "off", 3) == 0) + ecc_mask = 0; + return 0; +} +early_param("ecc", early_ecc); +#endif + +#else /* ifdef CONFIG_CPU_CP15 */ + +static int __init early_cachepolicy(char *p) +{ + pr_warn("cachepolicy kernel parameter not supported without cp15\n"); + return 0; +} +early_param("cachepolicy", early_cachepolicy); + +static int __init noalign_setup(char *__unused) +{ + pr_warn("noalign kernel parameter not supported without cp15\n"); + return 1; +} +__setup("noalign", noalign_setup); + +#endif /* ifdef CONFIG_CPU_CP15 / else */ + +#define PROT_PTE_DEVICE L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_XN +#define PROT_PTE_S2_DEVICE PROT_PTE_DEVICE +#define PROT_SECT_DEVICE PMD_TYPE_SECT|PMD_SECT_AP_WRITE + +static struct mem_type mem_types[] __ro_after_init = { + [MT_DEVICE] = { /* Strongly ordered / ARMv6 shared device */ + .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED | + L_PTE_SHARED, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PROT_SECT_DEVICE | PMD_SECT_S, + .domain = DOMAIN_IO, + }, + [MT_DEVICE_NONSHARED] = { /* ARMv6 non-shared device */ + .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_NONSHARED, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PROT_SECT_DEVICE, + .domain = DOMAIN_IO, + }, + [MT_DEVICE_CACHED] = { /* ioremap_cache */ + .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_CACHED, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PROT_SECT_DEVICE | PMD_SECT_WB, + .domain = DOMAIN_IO, + }, + [MT_DEVICE_WC] = { /* ioremap_wc */ + .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_WC, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PROT_SECT_DEVICE, + .domain = DOMAIN_IO, + }, + [MT_UNCACHED] = { + .prot_pte = PROT_PTE_DEVICE, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN, + .domain = DOMAIN_IO, + }, + [MT_CACHECLEAN] = { + .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN, + .domain = DOMAIN_KERNEL, + }, +#ifndef CONFIG_ARM_LPAE + [MT_MINICLEAN] = { + .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN | PMD_SECT_MINICACHE, + .domain = DOMAIN_KERNEL, + }, +#endif + [MT_LOW_VECTORS] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_RDONLY, + .prot_l1 = PMD_TYPE_TABLE, + .domain = DOMAIN_VECTORS, + }, + [MT_HIGH_VECTORS] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_USER | L_PTE_RDONLY, + .prot_l1 = PMD_TYPE_TABLE, + .domain = DOMAIN_VECTORS, + }, + [MT_MEMORY_RWX] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, + .domain = DOMAIN_KERNEL, + }, + [MT_MEMORY_RW] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_XN, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, + .domain = DOMAIN_KERNEL, + }, + [MT_MEMORY_RO] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_XN | L_PTE_RDONLY, + .prot_l1 = PMD_TYPE_TABLE, +#ifdef CONFIG_ARM_LPAE + .prot_sect = PMD_TYPE_SECT | L_PMD_SECT_RDONLY | PMD_SECT_AP2, +#else + .prot_sect = PMD_TYPE_SECT, +#endif + .domain = DOMAIN_KERNEL, + }, + [MT_ROM] = { + .prot_sect = PMD_TYPE_SECT, + .domain = DOMAIN_KERNEL, + }, + [MT_MEMORY_RWX_NONCACHED] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_MT_BUFFERABLE, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, + .domain = DOMAIN_KERNEL, + }, + [MT_MEMORY_RW_DTCM] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_XN, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN, + .domain = DOMAIN_KERNEL, + }, + [MT_MEMORY_RWX_ITCM] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY, + .prot_l1 = PMD_TYPE_TABLE, + .domain = DOMAIN_KERNEL, + }, + [MT_MEMORY_RW_SO] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_MT_UNCACHED | L_PTE_XN, + .prot_l1 = PMD_TYPE_TABLE, + .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_S | + PMD_SECT_UNCACHED | PMD_SECT_XN, + .domain = DOMAIN_KERNEL, + }, + [MT_MEMORY_DMA_READY] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_XN, + .prot_l1 = PMD_TYPE_TABLE, + .domain = DOMAIN_KERNEL, + }, +}; + +const struct mem_type *get_mem_type(unsigned int type) +{ + return type < ARRAY_SIZE(mem_types) ? &mem_types[type] : NULL; +} +EXPORT_SYMBOL(get_mem_type); + +static pte_t *(*pte_offset_fixmap)(pmd_t *dir, unsigned long addr); + +static pte_t bm_pte[PTRS_PER_PTE + PTE_HWTABLE_PTRS] + __aligned(PTE_HWTABLE_OFF + PTE_HWTABLE_SIZE) __initdata; + +static pte_t * __init pte_offset_early_fixmap(pmd_t *dir, unsigned long addr) +{ + return &bm_pte[pte_index(addr)]; +} + +static pte_t *pte_offset_late_fixmap(pmd_t *dir, unsigned long addr) +{ + return pte_offset_kernel(dir, addr); +} + +static inline pmd_t * __init fixmap_pmd(unsigned long addr) +{ + return pmd_off_k(addr); +} + +void __init early_fixmap_init(void) +{ + pmd_t *pmd; + + /* + * The early fixmap range spans multiple pmds, for which + * we are not prepared: + */ + BUILD_BUG_ON((__fix_to_virt(__end_of_early_ioremap_region) >> PMD_SHIFT) + != FIXADDR_TOP >> PMD_SHIFT); + + pmd = fixmap_pmd(FIXADDR_TOP); + pmd_populate_kernel(&init_mm, pmd, bm_pte); + + pte_offset_fixmap = pte_offset_early_fixmap; +} + +/* + * To avoid TLB flush broadcasts, this uses local_flush_tlb_kernel_range(). + * As a result, this can only be called with preemption disabled, as under + * stop_machine(). + */ +void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) +{ + unsigned long vaddr = __fix_to_virt(idx); + pte_t *pte = pte_offset_fixmap(pmd_off_k(vaddr), vaddr); + + /* Make sure fixmap region does not exceed available allocation. */ + BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) < FIXADDR_START); + BUG_ON(idx >= __end_of_fixed_addresses); + + /* We support only device mappings before pgprot_kernel is set. */ + if (WARN_ON(pgprot_val(prot) != pgprot_val(FIXMAP_PAGE_IO) && + pgprot_val(prot) && pgprot_val(pgprot_kernel) == 0)) + return; + + if (pgprot_val(prot)) + set_pte_at(NULL, vaddr, pte, + pfn_pte(phys >> PAGE_SHIFT, prot)); + else + pte_clear(NULL, vaddr, pte); + local_flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE); +} + +static pgprot_t protection_map[16] __ro_after_init = { + [VM_NONE] = __PAGE_NONE, + [VM_READ] = __PAGE_READONLY, + [VM_WRITE] = __PAGE_COPY, + [VM_WRITE | VM_READ] = __PAGE_COPY, + [VM_EXEC] = __PAGE_READONLY_EXEC, + [VM_EXEC | VM_READ] = __PAGE_READONLY_EXEC, + [VM_EXEC | VM_WRITE] = __PAGE_COPY_EXEC, + [VM_EXEC | VM_WRITE | VM_READ] = __PAGE_COPY_EXEC, + [VM_SHARED] = __PAGE_NONE, + [VM_SHARED | VM_READ] = __PAGE_READONLY, + [VM_SHARED | VM_WRITE] = __PAGE_SHARED, + [VM_SHARED | VM_WRITE | VM_READ] = __PAGE_SHARED, + [VM_SHARED | VM_EXEC] = __PAGE_READONLY_EXEC, + [VM_SHARED | VM_EXEC | VM_READ] = __PAGE_READONLY_EXEC, + [VM_SHARED | VM_EXEC | VM_WRITE] = __PAGE_SHARED_EXEC, + [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = __PAGE_SHARED_EXEC +}; +DECLARE_VM_GET_PAGE_PROT + +/* + * Adjust the PMD section entries according to the CPU in use. + */ +static void __init build_mem_type_table(void) +{ + struct cachepolicy *cp; + unsigned int cr = get_cr(); + pteval_t user_pgprot, kern_pgprot, vecs_pgprot; + int cpu_arch = cpu_architecture(); + int i; + + if (cpu_arch < CPU_ARCH_ARMv6) { +#if defined(CONFIG_CPU_DCACHE_DISABLE) + if (cachepolicy > CPOLICY_BUFFERED) + cachepolicy = CPOLICY_BUFFERED; +#elif defined(CONFIG_CPU_DCACHE_WRITETHROUGH) + if (cachepolicy > CPOLICY_WRITETHROUGH) + cachepolicy = CPOLICY_WRITETHROUGH; +#endif + } + if (cpu_arch < CPU_ARCH_ARMv5) { + if (cachepolicy >= CPOLICY_WRITEALLOC) + cachepolicy = CPOLICY_WRITEBACK; + ecc_mask = 0; + } + + if (is_smp()) { + if (cachepolicy != CPOLICY_WRITEALLOC) { + pr_warn("Forcing write-allocate cache policy for SMP\n"); + cachepolicy = CPOLICY_WRITEALLOC; + } + if (!(initial_pmd_value & PMD_SECT_S)) { + pr_warn("Forcing shared mappings for SMP\n"); + initial_pmd_value |= PMD_SECT_S; + } + } + + /* + * Strip out features not present on earlier architectures. + * Pre-ARMv5 CPUs don't have TEX bits. Pre-ARMv6 CPUs or those + * without extended page tables don't have the 'Shared' bit. + */ + if (cpu_arch < CPU_ARCH_ARMv5) + for (i = 0; i < ARRAY_SIZE(mem_types); i++) + mem_types[i].prot_sect &= ~PMD_SECT_TEX(7); + if ((cpu_arch < CPU_ARCH_ARMv6 || !(cr & CR_XP)) && !cpu_is_xsc3()) + for (i = 0; i < ARRAY_SIZE(mem_types); i++) + mem_types[i].prot_sect &= ~PMD_SECT_S; + + /* + * ARMv5 and lower, bit 4 must be set for page tables (was: cache + * "update-able on write" bit on ARM610). However, Xscale and + * Xscale3 require this bit to be cleared. + */ + if (cpu_is_xscale_family()) { + for (i = 0; i < ARRAY_SIZE(mem_types); i++) { + mem_types[i].prot_sect &= ~PMD_BIT4; + mem_types[i].prot_l1 &= ~PMD_BIT4; + } + } else if (cpu_arch < CPU_ARCH_ARMv6) { + for (i = 0; i < ARRAY_SIZE(mem_types); i++) { + if (mem_types[i].prot_l1) + mem_types[i].prot_l1 |= PMD_BIT4; + if (mem_types[i].prot_sect) + mem_types[i].prot_sect |= PMD_BIT4; + } + } + + /* + * Mark the device areas according to the CPU/architecture. + */ + if (cpu_is_xsc3() || (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP))) { + if (!cpu_is_xsc3()) { + /* + * Mark device regions on ARMv6+ as execute-never + * to prevent speculative instruction fetches. + */ + mem_types[MT_DEVICE].prot_sect |= PMD_SECT_XN; + mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_XN; + mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_XN; + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_XN; + + /* Also setup NX memory mapping */ + mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_XN; + mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_XN; + } + if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) { + /* + * For ARMv7 with TEX remapping, + * - shared device is SXCB=1100 + * - nonshared device is SXCB=0100 + * - write combine device mem is SXCB=0001 + * (Uncached Normal memory) + */ + mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1); + mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(1); + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE; + } else if (cpu_is_xsc3()) { + /* + * For Xscale3, + * - shared device is TEXCB=00101 + * - nonshared device is TEXCB=01000 + * - write combine device mem is TEXCB=00100 + * (Inner/Outer Uncacheable in xsc3 parlance) + */ + mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1) | PMD_SECT_BUFFERED; + mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2); + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1); + } else { + /* + * For ARMv6 and ARMv7 without TEX remapping, + * - shared device is TEXCB=00001 + * - nonshared device is TEXCB=01000 + * - write combine device mem is TEXCB=00100 + * (Uncached Normal in ARMv6 parlance). + */ + mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED; + mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2); + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1); + } + } else { + /* + * On others, write combining is "Uncached/Buffered" + */ + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE; + } + + /* + * Now deal with the memory-type mappings + */ + cp = &cache_policies[cachepolicy]; + vecs_pgprot = kern_pgprot = user_pgprot = cp->pte; + +#ifndef CONFIG_ARM_LPAE + /* + * We don't use domains on ARMv6 (since this causes problems with + * v6/v7 kernels), so we must use a separate memory type for user + * r/o, kernel r/w to map the vectors page. + */ + if (cpu_arch == CPU_ARCH_ARMv6) + vecs_pgprot |= L_PTE_MT_VECTORS; + + /* + * Check is it with support for the PXN bit + * in the Short-descriptor translation table format descriptors. + */ + if (cpu_arch == CPU_ARCH_ARMv7 && + (read_cpuid_ext(CPUID_EXT_MMFR0) & 0xF) >= 4) { + user_pmd_table |= PMD_PXNTABLE; + } +#endif + + /* + * ARMv6 and above have extended page tables. + */ + if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) { +#ifndef CONFIG_ARM_LPAE + /* + * Mark cache clean areas and XIP ROM read only + * from SVC mode and no access from userspace. + */ + mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; + mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; + mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; + mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; +#endif + + /* + * If the initial page tables were created with the S bit + * set, then we need to do the same here for the same + * reasons given in early_cachepolicy(). + */ + if (initial_pmd_value & PMD_SECT_S) { + user_pgprot |= L_PTE_SHARED; + kern_pgprot |= L_PTE_SHARED; + vecs_pgprot |= L_PTE_SHARED; + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_S; + mem_types[MT_DEVICE_WC].prot_pte |= L_PTE_SHARED; + mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_S; + mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED; + mem_types[MT_MEMORY_RWX].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY_RWX].prot_pte |= L_PTE_SHARED; + mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY_RW].prot_pte |= L_PTE_SHARED; + mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY_RO].prot_pte |= L_PTE_SHARED; + mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED; + mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY_RWX_NONCACHED].prot_pte |= L_PTE_SHARED; + } + } + + /* + * Non-cacheable Normal - intended for memory areas that must + * not cause dirty cache line writebacks when used + */ + if (cpu_arch >= CPU_ARCH_ARMv6) { + if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) { + /* Non-cacheable Normal is XCB = 001 */ + mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= + PMD_SECT_BUFFERED; + } else { + /* For both ARMv6 and non-TEX-remapping ARMv7 */ + mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= + PMD_SECT_TEX(1); + } + } else { + mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE; + } + +#ifdef CONFIG_ARM_LPAE + /* + * Do not generate access flag faults for the kernel mappings. + */ + for (i = 0; i < ARRAY_SIZE(mem_types); i++) { + mem_types[i].prot_pte |= PTE_EXT_AF; + if (mem_types[i].prot_sect) + mem_types[i].prot_sect |= PMD_SECT_AF; + } + kern_pgprot |= PTE_EXT_AF; + vecs_pgprot |= PTE_EXT_AF; + + /* + * Set PXN for user mappings + */ + user_pgprot |= PTE_EXT_PXN; +#endif + + for (i = 0; i < 16; i++) { + pteval_t v = pgprot_val(protection_map[i]); + protection_map[i] = __pgprot(v | user_pgprot); + } + + mem_types[MT_LOW_VECTORS].prot_pte |= vecs_pgprot; + mem_types[MT_HIGH_VECTORS].prot_pte |= vecs_pgprot; + + pgprot_user = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot); + pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | + L_PTE_DIRTY | kern_pgprot); + + mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask; + mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask; + mem_types[MT_MEMORY_RWX].prot_sect |= ecc_mask | cp->pmd; + mem_types[MT_MEMORY_RWX].prot_pte |= kern_pgprot; + mem_types[MT_MEMORY_RW].prot_sect |= ecc_mask | cp->pmd; + mem_types[MT_MEMORY_RW].prot_pte |= kern_pgprot; + mem_types[MT_MEMORY_RO].prot_sect |= ecc_mask | cp->pmd; + mem_types[MT_MEMORY_RO].prot_pte |= kern_pgprot; + mem_types[MT_MEMORY_DMA_READY].prot_pte |= kern_pgprot; + mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= ecc_mask; + mem_types[MT_ROM].prot_sect |= cp->pmd; + + switch (cp->pmd) { + case PMD_SECT_WT: + mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WT; + break; + case PMD_SECT_WB: + case PMD_SECT_WBWA: + mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB; + break; + } + pr_info("Memory policy: %sData cache %s\n", + ecc_mask ? "ECC enabled, " : "", cp->policy); + + for (i = 0; i < ARRAY_SIZE(mem_types); i++) { + struct mem_type *t = &mem_types[i]; + if (t->prot_l1) + t->prot_l1 |= PMD_DOMAIN(t->domain); + if (t->prot_sect) + t->prot_sect |= PMD_DOMAIN(t->domain); + } +} + +#ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE +pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot) +{ + if (!pfn_valid(pfn)) + return pgprot_noncached(vma_prot); + else if (file->f_flags & O_SYNC) + return pgprot_writecombine(vma_prot); + return vma_prot; +} +EXPORT_SYMBOL(phys_mem_access_prot); +#endif + +#define vectors_base() (vectors_high() ? 0xffff0000 : 0) + +static void __init *early_alloc(unsigned long sz) +{ + void *ptr = memblock_alloc(sz, sz); + + if (!ptr) + panic("%s: Failed to allocate %lu bytes align=0x%lx\n", + __func__, sz, sz); + + return ptr; +} + +static void *__init late_alloc(unsigned long sz) +{ + void *ptdesc = pagetable_alloc(GFP_PGTABLE_KERNEL & ~__GFP_HIGHMEM, + get_order(sz)); + + if (!ptdesc || !pagetable_pte_ctor(ptdesc)) + BUG(); + return ptdesc_to_virt(ptdesc); +} + +static pte_t * __init arm_pte_alloc(pmd_t *pmd, unsigned long addr, + unsigned long prot, + void *(*alloc)(unsigned long sz)) +{ + if (pmd_none(*pmd)) { + pte_t *pte = alloc(PTE_HWTABLE_OFF + PTE_HWTABLE_SIZE); + __pmd_populate(pmd, __pa(pte), prot); + } + BUG_ON(pmd_bad(*pmd)); + return pte_offset_kernel(pmd, addr); +} + +static pte_t * __init early_pte_alloc(pmd_t *pmd, unsigned long addr, + unsigned long prot) +{ + return arm_pte_alloc(pmd, addr, prot, early_alloc); +} + +static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, + unsigned long end, unsigned long pfn, + const struct mem_type *type, + void *(*alloc)(unsigned long sz), + bool ng) +{ + pte_t *pte = arm_pte_alloc(pmd, addr, type->prot_l1, alloc); + do { + set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)), + ng ? PTE_EXT_NG : 0); + pfn++; + } while (pte++, addr += PAGE_SIZE, addr != end); +} + +static void __init __map_init_section(pmd_t *pmd, unsigned long addr, + unsigned long end, phys_addr_t phys, + const struct mem_type *type, bool ng) +{ + pmd_t *p = pmd; + +#ifndef CONFIG_ARM_LPAE + /* + * In classic MMU format, puds and pmds are folded in to + * the pgds. pmd_offset gives the PGD entry. PGDs refer to a + * group of L1 entries making up one logical pointer to + * an L2 table (2MB), where as PMDs refer to the individual + * L1 entries (1MB). Hence increment to get the correct + * offset for odd 1MB sections. + * (See arch/arm/include/asm/pgtable-2level.h) + */ + if (addr & SECTION_SIZE) + pmd++; +#endif + do { + *pmd = __pmd(phys | type->prot_sect | (ng ? PMD_SECT_nG : 0)); + phys += SECTION_SIZE; + } while (pmd++, addr += SECTION_SIZE, addr != end); + + flush_pmd_entry(p); +} + +static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, + unsigned long end, phys_addr_t phys, + const struct mem_type *type, + void *(*alloc)(unsigned long sz), bool ng) +{ + pmd_t *pmd = pmd_offset(pud, addr); + unsigned long next; + + do { + /* + * With LPAE, we must loop over to map + * all the pmds for the given range. + */ + next = pmd_addr_end(addr, end); + + /* + * Try a section mapping - addr, next and phys must all be + * aligned to a section boundary. + */ + if (type->prot_sect && + ((addr | next | phys) & ~SECTION_MASK) == 0) { + __map_init_section(pmd, addr, next, phys, type, ng); + } else { + alloc_init_pte(pmd, addr, next, + __phys_to_pfn(phys), type, alloc, ng); + } + + phys += next - addr; + + } while (pmd++, addr = next, addr != end); +} + +static void __init alloc_init_pud(p4d_t *p4d, unsigned long addr, + unsigned long end, phys_addr_t phys, + const struct mem_type *type, + void *(*alloc)(unsigned long sz), bool ng) +{ + pud_t *pud = pud_offset(p4d, addr); + unsigned long next; + + do { + next = pud_addr_end(addr, end); + alloc_init_pmd(pud, addr, next, phys, type, alloc, ng); + phys += next - addr; + } while (pud++, addr = next, addr != end); +} + +static void __init alloc_init_p4d(pgd_t *pgd, unsigned long addr, + unsigned long end, phys_addr_t phys, + const struct mem_type *type, + void *(*alloc)(unsigned long sz), bool ng) +{ + p4d_t *p4d = p4d_offset(pgd, addr); + unsigned long next; + + do { + next = p4d_addr_end(addr, end); + alloc_init_pud(p4d, addr, next, phys, type, alloc, ng); + phys += next - addr; + } while (p4d++, addr = next, addr != end); +} + +#ifndef CONFIG_ARM_LPAE +static void __init create_36bit_mapping(struct mm_struct *mm, + struct map_desc *md, + const struct mem_type *type, + bool ng) +{ + unsigned long addr, length, end; + phys_addr_t phys; + pgd_t *pgd; + + addr = md->virtual; + phys = __pfn_to_phys(md->pfn); + length = PAGE_ALIGN(md->length); + + if (!(cpu_architecture() >= CPU_ARCH_ARMv6 || cpu_is_xsc3())) { + pr_err("MM: CPU does not support supersection mapping for 0x%08llx at 0x%08lx\n", + (long long)__pfn_to_phys((u64)md->pfn), addr); + return; + } + + /* N.B. ARMv6 supersections are only defined to work with domain 0. + * Since domain assignments can in fact be arbitrary, the + * 'domain == 0' check below is required to insure that ARMv6 + * supersections are only allocated for domain 0 regardless + * of the actual domain assignments in use. + */ + if (type->domain) { + pr_err("MM: invalid domain in supersection mapping for 0x%08llx at 0x%08lx\n", + (long long)__pfn_to_phys((u64)md->pfn), addr); + return; + } + + if ((addr | length | __pfn_to_phys(md->pfn)) & ~SUPERSECTION_MASK) { + pr_err("MM: cannot create mapping for 0x%08llx at 0x%08lx invalid alignment\n", + (long long)__pfn_to_phys((u64)md->pfn), addr); + return; + } + + /* + * Shift bits [35:32] of address into bits [23:20] of PMD + * (See ARMv6 spec). + */ + phys |= (((md->pfn >> (32 - PAGE_SHIFT)) & 0xF) << 20); + + pgd = pgd_offset(mm, addr); + end = addr + length; + do { + p4d_t *p4d = p4d_offset(pgd, addr); + pud_t *pud = pud_offset(p4d, addr); + pmd_t *pmd = pmd_offset(pud, addr); + int i; + + for (i = 0; i < 16; i++) + *pmd++ = __pmd(phys | type->prot_sect | PMD_SECT_SUPER | + (ng ? PMD_SECT_nG : 0)); + + addr += SUPERSECTION_SIZE; + phys += SUPERSECTION_SIZE; + pgd += SUPERSECTION_SIZE >> PGDIR_SHIFT; + } while (addr != end); +} +#endif /* !CONFIG_ARM_LPAE */ + +static void __init __create_mapping(struct mm_struct *mm, struct map_desc *md, + void *(*alloc)(unsigned long sz), + bool ng) +{ + unsigned long addr, length, end; + phys_addr_t phys; + const struct mem_type *type; + pgd_t *pgd; + + type = &mem_types[md->type]; + +#ifndef CONFIG_ARM_LPAE + /* + * Catch 36-bit addresses + */ + if (md->pfn >= 0x100000) { + create_36bit_mapping(mm, md, type, ng); + return; + } +#endif + + addr = md->virtual & PAGE_MASK; + phys = __pfn_to_phys(md->pfn); + length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK)); + + if (type->prot_l1 == 0 && ((addr | phys | length) & ~SECTION_MASK)) { + pr_warn("BUG: map for 0x%08llx at 0x%08lx can not be mapped using pages, ignoring.\n", + (long long)__pfn_to_phys(md->pfn), addr); + return; + } + + pgd = pgd_offset(mm, addr); + end = addr + length; + do { + unsigned long next = pgd_addr_end(addr, end); + + alloc_init_p4d(pgd, addr, next, phys, type, alloc, ng); + + phys += next - addr; + addr = next; + } while (pgd++, addr != end); +} + +/* + * Create the page directory entries and any necessary + * page tables for the mapping specified by `md'. We + * are able to cope here with varying sizes and address + * offsets, and we take full advantage of sections and + * supersections. + */ +static void __init create_mapping(struct map_desc *md) +{ + if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) { + pr_warn("BUG: not creating mapping for 0x%08llx at 0x%08lx in user region\n", + (long long)__pfn_to_phys((u64)md->pfn), md->virtual); + return; + } + + if (md->type == MT_DEVICE && + md->virtual >= PAGE_OFFSET && md->virtual < FIXADDR_START && + (md->virtual < VMALLOC_START || md->virtual >= VMALLOC_END)) { + pr_warn("BUG: mapping for 0x%08llx at 0x%08lx out of vmalloc space\n", + (long long)__pfn_to_phys((u64)md->pfn), md->virtual); + } + + __create_mapping(&init_mm, md, early_alloc, false); +} + +void __init create_mapping_late(struct mm_struct *mm, struct map_desc *md, + bool ng) +{ +#ifdef CONFIG_ARM_LPAE + p4d_t *p4d; + pud_t *pud; + + p4d = p4d_alloc(mm, pgd_offset(mm, md->virtual), md->virtual); + if (WARN_ON(!p4d)) + return; + pud = pud_alloc(mm, p4d, md->virtual); + if (WARN_ON(!pud)) + return; + pmd_alloc(mm, pud, 0); +#endif + __create_mapping(mm, md, late_alloc, ng); +} + +/* + * Create the architecture specific mappings + */ +void __init iotable_init(struct map_desc *io_desc, int nr) +{ + struct map_desc *md; + struct vm_struct *vm; + struct static_vm *svm; + + if (!nr) + return; + + svm = memblock_alloc(sizeof(*svm) * nr, __alignof__(*svm)); + if (!svm) + panic("%s: Failed to allocate %zu bytes align=0x%zx\n", + __func__, sizeof(*svm) * nr, __alignof__(*svm)); + + for (md = io_desc; nr; md++, nr--) { + create_mapping(md); + + vm = &svm->vm; + vm->addr = (void *)(md->virtual & PAGE_MASK); + vm->size = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK)); + vm->phys_addr = __pfn_to_phys(md->pfn); + vm->flags = VM_IOREMAP | VM_ARM_STATIC_MAPPING; + vm->flags |= VM_ARM_MTYPE(md->type); + vm->caller = iotable_init; + add_static_vm_early(svm++); + } +} + +void __init vm_reserve_area_early(unsigned long addr, unsigned long size, + void *caller) +{ + struct vm_struct *vm; + struct static_vm *svm; + + svm = memblock_alloc(sizeof(*svm), __alignof__(*svm)); + if (!svm) + panic("%s: Failed to allocate %zu bytes align=0x%zx\n", + __func__, sizeof(*svm), __alignof__(*svm)); + + vm = &svm->vm; + vm->addr = (void *)addr; + vm->size = size; + vm->flags = VM_IOREMAP | VM_ARM_EMPTY_MAPPING; + vm->caller = caller; + add_static_vm_early(svm); +} + +#ifndef CONFIG_ARM_LPAE + +/* + * The Linux PMD is made of two consecutive section entries covering 2MB + * (see definition in include/asm/pgtable-2level.h). However a call to + * create_mapping() may optimize static mappings by using individual + * 1MB section mappings. This leaves the actual PMD potentially half + * initialized if the top or bottom section entry isn't used, leaving it + * open to problems if a subsequent ioremap() or vmalloc() tries to use + * the virtual space left free by that unused section entry. + * + * Let's avoid the issue by inserting dummy vm entries covering the unused + * PMD halves once the static mappings are in place. + */ + +static void __init pmd_empty_section_gap(unsigned long addr) +{ + vm_reserve_area_early(addr, SECTION_SIZE, pmd_empty_section_gap); +} + +static void __init fill_pmd_gaps(void) +{ + struct static_vm *svm; + struct vm_struct *vm; + unsigned long addr, next = 0; + pmd_t *pmd; + + list_for_each_entry(svm, &static_vmlist, list) { + vm = &svm->vm; + addr = (unsigned long)vm->addr; + if (addr < next) + continue; + + /* + * Check if this vm starts on an odd section boundary. + * If so and the first section entry for this PMD is free + * then we block the corresponding virtual address. + */ + if ((addr & ~PMD_MASK) == SECTION_SIZE) { + pmd = pmd_off_k(addr); + if (pmd_none(*pmd)) + pmd_empty_section_gap(addr & PMD_MASK); + } + + /* + * Then check if this vm ends on an odd section boundary. + * If so and the second section entry for this PMD is empty + * then we block the corresponding virtual address. + */ + addr += vm->size; + if ((addr & ~PMD_MASK) == SECTION_SIZE) { + pmd = pmd_off_k(addr) + 1; + if (pmd_none(*pmd)) + pmd_empty_section_gap(addr); + } + + /* no need to look at any vm entry until we hit the next PMD */ + next = (addr + PMD_SIZE - 1) & PMD_MASK; + } +} + +#else +#define fill_pmd_gaps() do { } while (0) +#endif + +#if defined(CONFIG_PCI) && !defined(CONFIG_NEED_MACH_IO_H) +static void __init pci_reserve_io(void) +{ + struct static_vm *svm; + + svm = find_static_vm_vaddr((void *)PCI_IO_VIRT_BASE); + if (svm) + return; + + vm_reserve_area_early(PCI_IO_VIRT_BASE, SZ_2M, pci_reserve_io); +} +#else +#define pci_reserve_io() do { } while (0) +#endif + +#ifdef CONFIG_DEBUG_LL +void __init debug_ll_io_init(void) +{ + struct map_desc map; + + debug_ll_addr(&map.pfn, &map.virtual); + if (!map.pfn || !map.virtual) + return; + map.pfn = __phys_to_pfn(map.pfn); + map.virtual &= PAGE_MASK; + map.length = PAGE_SIZE; + map.type = MT_DEVICE; + iotable_init(&map, 1); +} +#endif + +static unsigned long __initdata vmalloc_size = 240 * SZ_1M; + +/* + * vmalloc=size forces the vmalloc area to be exactly 'size' + * bytes. This can be used to increase (or decrease) the vmalloc + * area - the default is 240MiB. + */ +static int __init early_vmalloc(char *arg) +{ + unsigned long vmalloc_reserve = memparse(arg, NULL); + unsigned long vmalloc_max; + + if (vmalloc_reserve < SZ_16M) { + vmalloc_reserve = SZ_16M; + pr_warn("vmalloc area is too small, limiting to %luMiB\n", + vmalloc_reserve >> 20); + } + + vmalloc_max = VMALLOC_END - (PAGE_OFFSET + SZ_32M + VMALLOC_OFFSET); + if (vmalloc_reserve > vmalloc_max) { + vmalloc_reserve = vmalloc_max; + pr_warn("vmalloc area is too big, limiting to %luMiB\n", + vmalloc_reserve >> 20); + } + + vmalloc_size = vmalloc_reserve; + return 0; +} +early_param("vmalloc", early_vmalloc); + +phys_addr_t arm_lowmem_limit __initdata = 0; + +void __init adjust_lowmem_bounds(void) +{ + phys_addr_t block_start, block_end, memblock_limit = 0; + u64 vmalloc_limit, i; + phys_addr_t lowmem_limit = 0; + + /* + * Let's use our own (unoptimized) equivalent of __pa() that is + * not affected by wrap-arounds when sizeof(phys_addr_t) == 4. + * The result is used as the upper bound on physical memory address + * and may itself be outside the valid range for which phys_addr_t + * and therefore __pa() is defined. + */ + vmalloc_limit = (u64)VMALLOC_END - vmalloc_size - VMALLOC_OFFSET - + PAGE_OFFSET + PHYS_OFFSET; + + /* + * The first usable region must be PMD aligned. Mark its start + * as MEMBLOCK_NOMAP if it isn't + */ + for_each_mem_range(i, &block_start, &block_end) { + if (!IS_ALIGNED(block_start, PMD_SIZE)) { + phys_addr_t len; + + len = round_up(block_start, PMD_SIZE) - block_start; + memblock_mark_nomap(block_start, len); + } + break; + } + + for_each_mem_range(i, &block_start, &block_end) { + if (block_start < vmalloc_limit) { + if (block_end > lowmem_limit) + /* + * Compare as u64 to ensure vmalloc_limit does + * not get truncated. block_end should always + * fit in phys_addr_t so there should be no + * issue with assignment. + */ + lowmem_limit = min_t(u64, + vmalloc_limit, + block_end); + + /* + * Find the first non-pmd-aligned page, and point + * memblock_limit at it. This relies on rounding the + * limit down to be pmd-aligned, which happens at the + * end of this function. + * + * With this algorithm, the start or end of almost any + * bank can be non-pmd-aligned. The only exception is + * that the start of the bank 0 must be section- + * aligned, since otherwise memory would need to be + * allocated when mapping the start of bank 0, which + * occurs before any free memory is mapped. + */ + if (!memblock_limit) { + if (!IS_ALIGNED(block_start, PMD_SIZE)) + memblock_limit = block_start; + else if (!IS_ALIGNED(block_end, PMD_SIZE)) + memblock_limit = lowmem_limit; + } + + } + } + + arm_lowmem_limit = lowmem_limit; + + high_memory = __va(arm_lowmem_limit - 1) + 1; + + if (!memblock_limit) + memblock_limit = arm_lowmem_limit; + + /* + * Round the memblock limit down to a pmd size. This + * helps to ensure that we will allocate memory from the + * last full pmd, which should be mapped. + */ + memblock_limit = round_down(memblock_limit, PMD_SIZE); + + if (!IS_ENABLED(CONFIG_HIGHMEM) || cache_is_vipt_aliasing()) { + if (memblock_end_of_DRAM() > arm_lowmem_limit) { + phys_addr_t end = memblock_end_of_DRAM(); + + pr_notice("Ignoring RAM at %pa-%pa\n", + &memblock_limit, &end); + pr_notice("Consider using a HIGHMEM enabled kernel.\n"); + + memblock_remove(memblock_limit, end - memblock_limit); + } + } + + memblock_set_current_limit(memblock_limit); +} + +static __init void prepare_page_table(void) +{ + unsigned long addr; + phys_addr_t end; + + /* + * Clear out all the mappings below the kernel image. + */ +#ifdef CONFIG_KASAN + /* + * KASan's shadow memory inserts itself between the TASK_SIZE + * and MODULES_VADDR. Do not clear the KASan shadow memory mappings. + */ + for (addr = 0; addr < KASAN_SHADOW_START; addr += PMD_SIZE) + pmd_clear(pmd_off_k(addr)); + /* + * Skip over the KASan shadow area. KASAN_SHADOW_END is sometimes + * equal to MODULES_VADDR and then we exit the pmd clearing. If we + * are using a thumb-compiled kernel, there there will be 8MB more + * to clear as KASan always offset to 16 MB below MODULES_VADDR. + */ + for (addr = KASAN_SHADOW_END; addr < MODULES_VADDR; addr += PMD_SIZE) + pmd_clear(pmd_off_k(addr)); +#else + for (addr = 0; addr < MODULES_VADDR; addr += PMD_SIZE) + pmd_clear(pmd_off_k(addr)); +#endif + +#ifdef CONFIG_XIP_KERNEL + /* The XIP kernel is mapped in the module area -- skip over it */ + addr = ((unsigned long)_exiprom + PMD_SIZE - 1) & PMD_MASK; +#endif + for ( ; addr < PAGE_OFFSET; addr += PMD_SIZE) + pmd_clear(pmd_off_k(addr)); + + /* + * Find the end of the first block of lowmem. + */ + end = memblock.memory.regions[0].base + memblock.memory.regions[0].size; + if (end >= arm_lowmem_limit) + end = arm_lowmem_limit; + + /* + * Clear out all the kernel space mappings, except for the first + * memory bank, up to the vmalloc region. + */ + for (addr = __phys_to_virt(end); + addr < VMALLOC_START; addr += PMD_SIZE) + pmd_clear(pmd_off_k(addr)); +} + +#ifdef CONFIG_ARM_LPAE +/* the first page is reserved for pgd */ +#define SWAPPER_PG_DIR_SIZE (PAGE_SIZE + \ + PTRS_PER_PGD * PTRS_PER_PMD * sizeof(pmd_t)) +#else +#define SWAPPER_PG_DIR_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) +#endif + +/* + * Reserve the special regions of memory + */ +void __init arm_mm_memblock_reserve(void) +{ + /* + * Reserve the page tables. These are already in use, + * and can only be in node 0. + */ + memblock_reserve(__pa(swapper_pg_dir), SWAPPER_PG_DIR_SIZE); + +#ifdef CONFIG_SA1111 + /* + * Because of the SA1111 DMA bug, we want to preserve our + * precious DMA-able memory... + */ + memblock_reserve(PHYS_OFFSET, __pa(swapper_pg_dir) - PHYS_OFFSET); +#endif +} + +/* + * Set up the device mappings. Since we clear out the page tables for all + * mappings above VMALLOC_START, except early fixmap, we might remove debug + * device mappings. This means earlycon can be used to debug this function + * Any other function or debugging method which may touch any device _will_ + * crash the kernel. + */ +static void __init devicemaps_init(const struct machine_desc *mdesc) +{ + struct map_desc map; + unsigned long addr; + void *vectors; + + /* + * Allocate the vector page early. + */ + vectors = early_alloc(PAGE_SIZE * 2); + + early_trap_init(vectors); + + /* + * Clear page table except top pmd used by early fixmaps + */ + for (addr = VMALLOC_START; addr < (FIXADDR_TOP & PMD_MASK); addr += PMD_SIZE) + pmd_clear(pmd_off_k(addr)); + + if (__atags_pointer) { + /* create a read-only mapping of the device tree */ + map.pfn = __phys_to_pfn(__atags_pointer & SECTION_MASK); + map.virtual = FDT_FIXED_BASE; + map.length = FDT_FIXED_SIZE; + map.type = MT_MEMORY_RO; + create_mapping(&map); + } + + /* + * Map the kernel if it is XIP. + * It is always first in the modulearea. + */ +#ifdef CONFIG_XIP_KERNEL + map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK); + map.virtual = MODULES_VADDR; + map.length = ((unsigned long)_exiprom - map.virtual + ~SECTION_MASK) & SECTION_MASK; + map.type = MT_ROM; + create_mapping(&map); +#endif + + /* + * Map the cache flushing regions. + */ +#ifdef FLUSH_BASE + map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS); + map.virtual = FLUSH_BASE; + map.length = SZ_1M; + map.type = MT_CACHECLEAN; + create_mapping(&map); +#endif +#ifdef FLUSH_BASE_MINICACHE + map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS + SZ_1M); + map.virtual = FLUSH_BASE_MINICACHE; + map.length = SZ_1M; + map.type = MT_MINICLEAN; + create_mapping(&map); +#endif + + /* + * Create a mapping for the machine vectors at the high-vectors + * location (0xffff0000). If we aren't using high-vectors, also + * create a mapping at the low-vectors virtual address. + */ + map.pfn = __phys_to_pfn(virt_to_phys(vectors)); + map.virtual = 0xffff0000; + map.length = PAGE_SIZE; +#ifdef CONFIG_KUSER_HELPERS + map.type = MT_HIGH_VECTORS; +#else + map.type = MT_LOW_VECTORS; +#endif + create_mapping(&map); + + if (!vectors_high()) { + map.virtual = 0; + map.length = PAGE_SIZE * 2; + map.type = MT_LOW_VECTORS; + create_mapping(&map); + } + + /* Now create a kernel read-only mapping */ + map.pfn += 1; + map.virtual = 0xffff0000 + PAGE_SIZE; + map.length = PAGE_SIZE; + map.type = MT_LOW_VECTORS; + create_mapping(&map); + + /* + * Ask the machine support to map in the statically mapped devices. + */ + if (mdesc->map_io) + mdesc->map_io(); + else + debug_ll_io_init(); + fill_pmd_gaps(); + + /* Reserve fixed i/o space in VMALLOC region */ + pci_reserve_io(); + + /* + * Finally flush the caches and tlb to ensure that we're in a + * consistent state wrt the writebuffer. This also ensures that + * any write-allocated cache lines in the vector page are written + * back. After this point, we can start to touch devices again. + */ + local_flush_tlb_all(); + flush_cache_all(); + + /* Enable asynchronous aborts */ + early_abt_enable(); +} + +static void __init kmap_init(void) +{ +#ifdef CONFIG_HIGHMEM + pkmap_page_table = early_pte_alloc(pmd_off_k(PKMAP_BASE), + PKMAP_BASE, _PAGE_KERNEL_TABLE); +#endif + + early_pte_alloc(pmd_off_k(FIXADDR_START), FIXADDR_START, + _PAGE_KERNEL_TABLE); +} + +static void __init map_lowmem(void) +{ + phys_addr_t start, end; + u64 i; + + /* Map all the lowmem memory banks. */ + for_each_mem_range(i, &start, &end) { + struct map_desc map; + + pr_debug("map lowmem start: 0x%08llx, end: 0x%08llx\n", + (long long)start, (long long)end); + if (end > arm_lowmem_limit) + end = arm_lowmem_limit; + if (start >= end) + break; + + /* + * If our kernel image is in the VMALLOC area we need to remove + * the kernel physical memory from lowmem since the kernel will + * be mapped separately. + * + * The kernel will typically be at the very start of lowmem, + * but any placement relative to memory ranges is possible. + * + * If the memblock contains the kernel, we have to chisel out + * the kernel memory from it and map each part separately. We + * get 6 different theoretical cases: + * + * +--------+ +--------+ + * +-- start --+ +--------+ | Kernel | | Kernel | + * | | | Kernel | | case 2 | | case 5 | + * | | | case 1 | +--------+ | | +--------+ + * | Memory | +--------+ | | | Kernel | + * | range | +--------+ | | | case 6 | + * | | | Kernel | +--------+ | | +--------+ + * | | | case 3 | | Kernel | | | + * +-- end ----+ +--------+ | case 4 | | | + * +--------+ +--------+ + */ + + /* Case 5: kernel covers range, don't map anything, should be rare */ + if ((start > kernel_sec_start) && (end < kernel_sec_end)) + break; + + /* Cases where the kernel is starting inside the range */ + if ((kernel_sec_start >= start) && (kernel_sec_start <= end)) { + /* Case 6: kernel is embedded in the range, we need two mappings */ + if ((start < kernel_sec_start) && (end > kernel_sec_end)) { + /* Map memory below the kernel */ + map.pfn = __phys_to_pfn(start); + map.virtual = __phys_to_virt(start); + map.length = kernel_sec_start - start; + map.type = MT_MEMORY_RW; + create_mapping(&map); + /* Map memory above the kernel */ + map.pfn = __phys_to_pfn(kernel_sec_end); + map.virtual = __phys_to_virt(kernel_sec_end); + map.length = end - kernel_sec_end; + map.type = MT_MEMORY_RW; + create_mapping(&map); + break; + } + /* Case 1: kernel and range start at the same address, should be common */ + if (kernel_sec_start == start) + start = kernel_sec_end; + /* Case 3: kernel and range end at the same address, should be rare */ + if (kernel_sec_end == end) + end = kernel_sec_start; + } else if ((kernel_sec_start < start) && (kernel_sec_end > start) && (kernel_sec_end < end)) { + /* Case 2: kernel ends inside range, starts below it */ + start = kernel_sec_end; + } else if ((kernel_sec_start > start) && (kernel_sec_start < end) && (kernel_sec_end > end)) { + /* Case 4: kernel starts inside range, ends above it */ + end = kernel_sec_start; + } + map.pfn = __phys_to_pfn(start); + map.virtual = __phys_to_virt(start); + map.length = end - start; + map.type = MT_MEMORY_RW; + create_mapping(&map); + } +} + +static void __init map_kernel(void) +{ + /* + * We use the well known kernel section start and end and split the area in the + * middle like this: + * . . + * | RW memory | + * +----------------+ kernel_x_start + * | Executable | + * | kernel memory | + * +----------------+ kernel_x_end / kernel_nx_start + * | Non-executable | + * | kernel memory | + * +----------------+ kernel_nx_end + * | RW memory | + * . . + * + * Notice that we are dealing with section sized mappings here so all of this + * will be bumped to the closest section boundary. This means that some of the + * non-executable part of the kernel memory is actually mapped as executable. + * This will only persist until we turn on proper memory management later on + * and we remap the whole kernel with page granularity. + */ + phys_addr_t kernel_x_start = kernel_sec_start; + phys_addr_t kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE); + phys_addr_t kernel_nx_start = kernel_x_end; + phys_addr_t kernel_nx_end = kernel_sec_end; + struct map_desc map; + + map.pfn = __phys_to_pfn(kernel_x_start); + map.virtual = __phys_to_virt(kernel_x_start); + map.length = kernel_x_end - kernel_x_start; + map.type = MT_MEMORY_RWX; + create_mapping(&map); + + /* If the nx part is small it may end up covered by the tail of the RWX section */ + if (kernel_x_end == kernel_nx_end) + return; + + map.pfn = __phys_to_pfn(kernel_nx_start); + map.virtual = __phys_to_virt(kernel_nx_start); + map.length = kernel_nx_end - kernel_nx_start; + map.type = MT_MEMORY_RW; + create_mapping(&map); +} + +#ifdef CONFIG_ARM_PV_FIXUP +typedef void pgtables_remap(long long offset, unsigned long pgd); +pgtables_remap lpae_pgtables_remap_asm; + +/* + * early_paging_init() recreates boot time page table setup, allowing machines + * to switch over to a high (>4G) address space on LPAE systems + */ +static void __init early_paging_init(const struct machine_desc *mdesc) +{ + pgtables_remap *lpae_pgtables_remap; + unsigned long pa_pgd; + unsigned int cr, ttbcr; + long long offset; + + if (!mdesc->pv_fixup) + return; + + offset = mdesc->pv_fixup(); + if (offset == 0) + return; + + /* + * Offset the kernel section physical offsets so that the kernel + * mapping will work out later on. + */ + kernel_sec_start += offset; + kernel_sec_end += offset; + + /* + * Get the address of the remap function in the 1:1 identity + * mapping setup by the early page table assembly code. We + * must get this prior to the pv update. The following barrier + * ensures that this is complete before we fixup any P:V offsets. + */ + lpae_pgtables_remap = (pgtables_remap *)(unsigned long)__pa(lpae_pgtables_remap_asm); + pa_pgd = __pa(swapper_pg_dir); + barrier(); + + pr_info("Switching physical address space to 0x%08llx\n", + (u64)PHYS_OFFSET + offset); + + /* Re-set the phys pfn offset, and the pv offset */ + __pv_offset += offset; + __pv_phys_pfn_offset += PFN_DOWN(offset); + + /* Run the patch stub to update the constants */ + fixup_pv_table(&__pv_table_begin, + (&__pv_table_end - &__pv_table_begin) << 2); + + /* + * We changing not only the virtual to physical mapping, but also + * the physical addresses used to access memory. We need to flush + * all levels of cache in the system with caching disabled to + * ensure that all data is written back, and nothing is prefetched + * into the caches. We also need to prevent the TLB walkers + * allocating into the caches too. Note that this is ARMv7 LPAE + * specific. + */ + cr = get_cr(); + set_cr(cr & ~(CR_I | CR_C)); + asm("mrc p15, 0, %0, c2, c0, 2" : "=r" (ttbcr)); + asm volatile("mcr p15, 0, %0, c2, c0, 2" + : : "r" (ttbcr & ~(3 << 8 | 3 << 10))); + flush_cache_all(); + + /* + * Fixup the page tables - this must be in the idmap region as + * we need to disable the MMU to do this safely, and hence it + * needs to be assembly. It's fairly simple, as we're using the + * temporary tables setup by the initial assembly code. + */ + lpae_pgtables_remap(offset, pa_pgd); + + /* Re-enable the caches and cacheable TLB walks */ + asm volatile("mcr p15, 0, %0, c2, c0, 2" : : "r" (ttbcr)); + set_cr(cr); +} + +#else + +static void __init early_paging_init(const struct machine_desc *mdesc) +{ + long long offset; + + if (!mdesc->pv_fixup) + return; + + offset = mdesc->pv_fixup(); + if (offset == 0) + return; + + pr_crit("Physical address space modification is only to support Keystone2.\n"); + pr_crit("Please enable ARM_LPAE and ARM_PATCH_PHYS_VIRT support to use this\n"); + pr_crit("feature. Your kernel may crash now, have a good day.\n"); + add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); +} + +#endif + +static void __init early_fixmap_shutdown(void) +{ + int i; + unsigned long va = fix_to_virt(__end_of_permanent_fixed_addresses - 1); + + pte_offset_fixmap = pte_offset_late_fixmap; + pmd_clear(fixmap_pmd(va)); + local_flush_tlb_kernel_page(va); + + for (i = 0; i < __end_of_permanent_fixed_addresses; i++) { + pte_t *pte; + struct map_desc map; + + map.virtual = fix_to_virt(i); + pte = pte_offset_early_fixmap(pmd_off_k(map.virtual), map.virtual); + + /* Only i/o device mappings are supported ATM */ + if (pte_none(*pte) || + (pte_val(*pte) & L_PTE_MT_MASK) != L_PTE_MT_DEV_SHARED) + continue; + + map.pfn = pte_pfn(*pte); + map.type = MT_DEVICE; + map.length = PAGE_SIZE; + + create_mapping(&map); + } +} + +/* + * paging_init() sets up the page tables, initialises the zone memory + * maps, and sets up the zero page, bad page and bad page tables. + */ +void __init paging_init(const struct machine_desc *mdesc) +{ + void *zero_page; + + pr_debug("physical kernel sections: 0x%08llx-0x%08llx\n", + kernel_sec_start, kernel_sec_end); + + prepare_page_table(); + map_lowmem(); + memblock_set_current_limit(arm_lowmem_limit); + pr_debug("lowmem limit is %08llx\n", (long long)arm_lowmem_limit); + /* + * After this point early_alloc(), i.e. the memblock allocator, can + * be used + */ + map_kernel(); + dma_contiguous_remap(); + early_fixmap_shutdown(); + devicemaps_init(mdesc); + kmap_init(); + tcm_init(); + + top_pmd = pmd_off_k(0xffff0000); + + /* allocate the zero page. */ + zero_page = early_alloc(PAGE_SIZE); + + bootmem_init(); + + empty_zero_page = virt_to_page(zero_page); + __flush_dcache_folio(NULL, page_folio(empty_zero_page)); +} + +void __init early_mm_init(const struct machine_desc *mdesc) +{ + build_mem_type_table(); + early_paging_init(mdesc); +} + +void set_ptes(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pteval, unsigned int nr) +{ + unsigned long ext = 0; + + if (addr < TASK_SIZE && pte_valid_user(pteval)) { + if (!pte_special(pteval)) + __sync_icache_dcache(pteval); + ext |= PTE_EXT_NG; + } + + for (;;) { + set_pte_ext(ptep, pteval, ext); + if (--nr == 0) + break; + ptep++; + pte_val(pteval) += PAGE_SIZE; + } +} diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c new file mode 100644 index 0000000000..c415f3859b --- /dev/null +++ b/arch/arm/mm/nommu.c @@ -0,0 +1,262 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * linux/arch/arm/mm/nommu.c + * + * ARM uCLinux supporting functions. + */ +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/pagemap.h> +#include <linux/io.h> +#include <linux/memblock.h> +#include <linux/kernel.h> + +#include <asm/cacheflush.h> +#include <asm/cp15.h> +#include <asm/sections.h> +#include <asm/page.h> +#include <asm/setup.h> +#include <asm/traps.h> +#include <asm/mach/arch.h> +#include <asm/cputype.h> +#include <asm/mpu.h> +#include <asm/procinfo.h> +#include <asm/idmap.h> + +#include "mm.h" + +unsigned long vectors_base; + +/* + * empty_zero_page is a special page that is used for + * zero-initialized data and COW. + */ +struct page *empty_zero_page; +EXPORT_SYMBOL(empty_zero_page); + +#ifdef CONFIG_ARM_MPU +struct mpu_rgn_info mpu_rgn_info; +#endif + +#ifdef CONFIG_CPU_CP15 +#ifdef CONFIG_CPU_HIGH_VECTOR +unsigned long setup_vectors_base(void) +{ + unsigned long reg = get_cr(); + + set_cr(reg | CR_V); + return 0xffff0000; +} +#else /* CONFIG_CPU_HIGH_VECTOR */ +/* Write exception base address to VBAR */ +static inline void set_vbar(unsigned long val) +{ + asm("mcr p15, 0, %0, c12, c0, 0" : : "r" (val) : "cc"); +} + +/* + * Security extensions, bits[7:4], permitted values, + * 0b0000 - not implemented, 0b0001/0b0010 - implemented + */ +static inline bool security_extensions_enabled(void) +{ + /* Check CPUID Identification Scheme before ID_PFR1 read */ + if ((read_cpuid_id() & 0x000f0000) == 0x000f0000) + return cpuid_feature_extract(CPUID_EXT_PFR1, 4) || + cpuid_feature_extract(CPUID_EXT_PFR1, 20); + return 0; +} + +unsigned long setup_vectors_base(void) +{ + unsigned long base = 0, reg = get_cr(); + + set_cr(reg & ~CR_V); + if (security_extensions_enabled()) { + if (IS_ENABLED(CONFIG_REMAP_VECTORS_TO_RAM)) + base = CONFIG_DRAM_BASE; + set_vbar(base); + } else if (IS_ENABLED(CONFIG_REMAP_VECTORS_TO_RAM)) { + if (CONFIG_DRAM_BASE != 0) + pr_err("Security extensions not enabled, vectors cannot be remapped to RAM, vectors base will be 0x00000000\n"); + } + + return base; +} +#endif /* CONFIG_CPU_HIGH_VECTOR */ +#endif /* CONFIG_CPU_CP15 */ + +void __init arm_mm_memblock_reserve(void) +{ +#ifndef CONFIG_CPU_V7M + vectors_base = IS_ENABLED(CONFIG_CPU_CP15) ? setup_vectors_base() : 0; + /* + * Register the exception vector page. + * some architectures which the DRAM is the exception vector to trap, + * alloc_page breaks with error, although it is not NULL, but "0." + */ + memblock_reserve(vectors_base, 2 * PAGE_SIZE); +#else /* ifndef CONFIG_CPU_V7M */ + /* + * There is no dedicated vector page on V7-M. So nothing needs to be + * reserved here. + */ +#endif + /* + * In any case, always ensure address 0 is never used as many things + * get very confused if 0 is returned as a legitimate address. + */ + memblock_reserve(0, 1); +} + +static void __init adjust_lowmem_bounds_mpu(void) +{ + unsigned long pmsa = read_cpuid_ext(CPUID_EXT_MMFR0) & MMFR0_PMSA; + + switch (pmsa) { + case MMFR0_PMSAv7: + pmsav7_adjust_lowmem_bounds(); + break; + case MMFR0_PMSAv8: + pmsav8_adjust_lowmem_bounds(); + break; + default: + break; + } +} + +static void __init mpu_setup(void) +{ + unsigned long pmsa = read_cpuid_ext(CPUID_EXT_MMFR0) & MMFR0_PMSA; + + switch (pmsa) { + case MMFR0_PMSAv7: + pmsav7_setup(); + break; + case MMFR0_PMSAv8: + pmsav8_setup(); + break; + default: + break; + } +} + +void __init adjust_lowmem_bounds(void) +{ + phys_addr_t end; + adjust_lowmem_bounds_mpu(); + end = memblock_end_of_DRAM(); + high_memory = __va(end - 1) + 1; + memblock_set_current_limit(end); +} + +/* + * paging_init() sets up the page tables, initialises the zone memory + * maps, and sets up the zero page, bad page and bad page tables. + */ +void __init paging_init(const struct machine_desc *mdesc) +{ + void *zero_page; + + early_trap_init((void *)vectors_base); + mpu_setup(); + + /* allocate the zero page. */ + zero_page = (void *)memblock_alloc(PAGE_SIZE, PAGE_SIZE); + if (!zero_page) + panic("%s: Failed to allocate %lu bytes align=0x%lx\n", + __func__, PAGE_SIZE, PAGE_SIZE); + + bootmem_init(); + + empty_zero_page = virt_to_page(zero_page); + flush_dcache_page(empty_zero_page); +} + +/* + * We don't need to do anything here for nommu machines. + */ +void setup_mm_for_reboot(void) +{ +} + +void flush_dcache_folio(struct folio *folio) +{ + __cpuc_flush_dcache_area(folio_address(folio), folio_size(folio)); +} +EXPORT_SYMBOL(flush_dcache_folio); + +void flush_dcache_page(struct page *page) +{ + __cpuc_flush_dcache_area(page_address(page), PAGE_SIZE); +} +EXPORT_SYMBOL(flush_dcache_page); + +void copy_to_user_page(struct vm_area_struct *vma, struct page *page, + unsigned long uaddr, void *dst, const void *src, + unsigned long len) +{ + memcpy(dst, src, len); + if (vma->vm_flags & VM_EXEC) + __cpuc_coherent_user_range(uaddr, uaddr + len); +} + +void __iomem *__arm_ioremap_pfn(unsigned long pfn, unsigned long offset, + size_t size, unsigned int mtype) +{ + if (pfn >= (0x100000000ULL >> PAGE_SHIFT)) + return NULL; + return (void __iomem *) (offset + (pfn << PAGE_SHIFT)); +} +EXPORT_SYMBOL(__arm_ioremap_pfn); + +void __iomem *__arm_ioremap_caller(phys_addr_t phys_addr, size_t size, + unsigned int mtype, void *caller) +{ + return (void __iomem *)phys_addr; +} + +void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *); + +void __iomem *ioremap(resource_size_t res_cookie, size_t size) +{ + return __arm_ioremap_caller(res_cookie, size, MT_DEVICE, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap); + +void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size) +{ + return __arm_ioremap_caller(res_cookie, size, MT_DEVICE_CACHED, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_cache); + +void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size) +{ + return __arm_ioremap_caller(res_cookie, size, MT_DEVICE_WC, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_wc); + +#ifdef CONFIG_PCI + +#include <asm/mach/map.h> + +void __iomem *pci_remap_cfgspace(resource_size_t res_cookie, size_t size) +{ + return arch_ioremap_caller(res_cookie, size, MT_UNCACHED, + __builtin_return_address(0)); +} +EXPORT_SYMBOL_GPL(pci_remap_cfgspace); +#endif + +void *arch_memremap_wb(phys_addr_t phys_addr, size_t size) +{ + return (void *)phys_addr; +} + +void iounmap(volatile void __iomem *io_addr) +{ +} +EXPORT_SYMBOL(iounmap); diff --git a/arch/arm/mm/pabort-legacy.S b/arch/arm/mm/pabort-legacy.S new file mode 100644 index 0000000000..b2ffce4201 --- /dev/null +++ b/arch/arm/mm/pabort-legacy.S @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/linkage.h> +#include <asm/assembler.h> + +/* + * Function: legacy_pabort + * + * Params : r2 = pt_regs + * : r4 = address of aborted instruction + * : r5 = psr for parent context + * + * Returns : r4 - r11, r13 preserved + * + * Purpose : obtain information about current prefetch abort. + */ + + .align 5 +ENTRY(legacy_pabort) + mov r0, r4 + mov r1, #5 + b do_PrefetchAbort +ENDPROC(legacy_pabort) diff --git a/arch/arm/mm/pabort-v6.S b/arch/arm/mm/pabort-v6.S new file mode 100644 index 0000000000..8686265dc9 --- /dev/null +++ b/arch/arm/mm/pabort-v6.S @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/linkage.h> +#include <asm/assembler.h> + +/* + * Function: v6_pabort + * + * Params : r2 = pt_regs + * : r4 = address of aborted instruction + * : r5 = psr for parent context + * + * Returns : r4 - r11, r13 preserved + * + * Purpose : obtain information about current prefetch abort. + */ + + .align 5 +ENTRY(v6_pabort) + mov r0, r4 + mrc p15, 0, r1, c5, c0, 1 @ get IFSR + b do_PrefetchAbort +ENDPROC(v6_pabort) diff --git a/arch/arm/mm/pabort-v7.S b/arch/arm/mm/pabort-v7.S new file mode 100644 index 0000000000..9c70b1a21d --- /dev/null +++ b/arch/arm/mm/pabort-v7.S @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/linkage.h> +#include <asm/assembler.h> + +/* + * Function: v7_pabort + * + * Params : r2 = pt_regs + * : r4 = address of aborted instruction + * : r5 = psr for parent context + * + * Returns : r4 - r11, r13 preserved + * + * Purpose : obtain information about current prefetch abort. + */ + + .align 5 +ENTRY(v7_pabort) + mrc p15, 0, r0, c6, c0, 2 @ get IFAR + mrc p15, 0, r1, c5, c0, 1 @ get IFSR + b do_PrefetchAbort +ENDPROC(v7_pabort) diff --git a/arch/arm/mm/pageattr.c b/arch/arm/mm/pageattr.c new file mode 100644 index 0000000000..064ad508c1 --- /dev/null +++ b/arch/arm/mm/pageattr.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2014, The Linux Foundation. All rights reserved. + */ +#include <linux/mm.h> +#include <linux/module.h> + +#include <asm/tlbflush.h> +#include <asm/set_memory.h> + +struct page_change_data { + pgprot_t set_mask; + pgprot_t clear_mask; +}; + +static int change_page_range(pte_t *ptep, unsigned long addr, void *data) +{ + struct page_change_data *cdata = data; + pte_t pte = *ptep; + + pte = clear_pte_bit(pte, cdata->clear_mask); + pte = set_pte_bit(pte, cdata->set_mask); + + set_pte_ext(ptep, pte, 0); + return 0; +} + +static bool range_in_range(unsigned long start, unsigned long size, + unsigned long range_start, unsigned long range_end) +{ + return start >= range_start && start < range_end && + size <= range_end - start; +} + +/* + * This function assumes that the range is mapped with PAGE_SIZE pages. + */ +static int __change_memory_common(unsigned long start, unsigned long size, + pgprot_t set_mask, pgprot_t clear_mask) +{ + struct page_change_data data; + int ret; + + data.set_mask = set_mask; + data.clear_mask = clear_mask; + + ret = apply_to_page_range(&init_mm, start, size, change_page_range, + &data); + + flush_tlb_kernel_range(start, start + size); + return ret; +} + +static int change_memory_common(unsigned long addr, int numpages, + pgprot_t set_mask, pgprot_t clear_mask) +{ + unsigned long start = addr & PAGE_MASK; + unsigned long end = PAGE_ALIGN(addr) + numpages * PAGE_SIZE; + unsigned long size = end - start; + + WARN_ON_ONCE(start != addr); + + if (!size) + return 0; + + if (!range_in_range(start, size, MODULES_VADDR, MODULES_END) && + !range_in_range(start, size, VMALLOC_START, VMALLOC_END)) + return -EINVAL; + + return __change_memory_common(start, size, set_mask, clear_mask); +} + +int set_memory_ro(unsigned long addr, int numpages) +{ + return change_memory_common(addr, numpages, + __pgprot(L_PTE_RDONLY), + __pgprot(0)); +} + +int set_memory_rw(unsigned long addr, int numpages) +{ + return change_memory_common(addr, numpages, + __pgprot(0), + __pgprot(L_PTE_RDONLY)); +} + +int set_memory_nx(unsigned long addr, int numpages) +{ + return change_memory_common(addr, numpages, + __pgprot(L_PTE_XN), + __pgprot(0)); +} + +int set_memory_x(unsigned long addr, int numpages) +{ + return change_memory_common(addr, numpages, + __pgprot(0), + __pgprot(L_PTE_XN)); +} + +int set_memory_valid(unsigned long addr, int numpages, int enable) +{ + if (enable) + return __change_memory_common(addr, PAGE_SIZE * numpages, + __pgprot(L_PTE_VALID), + __pgprot(0)); + else + return __change_memory_common(addr, PAGE_SIZE * numpages, + __pgprot(0), + __pgprot(L_PTE_VALID)); +} diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c new file mode 100644 index 0000000000..f8e9bc58a8 --- /dev/null +++ b/arch/arm/mm/pgd.c @@ -0,0 +1,211 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * linux/arch/arm/mm/pgd.c + * + * Copyright (C) 1998-2005 Russell King + */ +#include <linux/mm.h> +#include <linux/gfp.h> +#include <linux/highmem.h> +#include <linux/slab.h> + +#include <asm/cp15.h> +#include <asm/pgalloc.h> +#include <asm/page.h> +#include <asm/tlbflush.h> + +#include "mm.h" + +#ifdef CONFIG_ARM_LPAE +#define __pgd_alloc() kmalloc_array(PTRS_PER_PGD, sizeof(pgd_t), GFP_KERNEL) +#define __pgd_free(pgd) kfree(pgd) +#else +#define __pgd_alloc() (pgd_t *)__get_free_pages(GFP_KERNEL, 2) +#define __pgd_free(pgd) free_pages((unsigned long)pgd, 2) +#endif + +/* + * need to get a 16k page for level 1 + */ +pgd_t *pgd_alloc(struct mm_struct *mm) +{ + pgd_t *new_pgd, *init_pgd; + p4d_t *new_p4d, *init_p4d; + pud_t *new_pud, *init_pud; + pmd_t *new_pmd, *init_pmd; + pte_t *new_pte, *init_pte; + + new_pgd = __pgd_alloc(); + if (!new_pgd) + goto no_pgd; + + memset(new_pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); + + /* + * Copy over the kernel and IO PGD entries + */ + init_pgd = pgd_offset_k(0); + memcpy(new_pgd + USER_PTRS_PER_PGD, init_pgd + USER_PTRS_PER_PGD, + (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); + + clean_dcache_area(new_pgd, PTRS_PER_PGD * sizeof(pgd_t)); + +#ifdef CONFIG_ARM_LPAE + /* + * Allocate PMD table for modules and pkmap mappings. + */ + new_p4d = p4d_alloc(mm, new_pgd + pgd_index(MODULES_VADDR), + MODULES_VADDR); + if (!new_p4d) + goto no_p4d; + + new_pud = pud_alloc(mm, new_p4d, MODULES_VADDR); + if (!new_pud) + goto no_pud; + + new_pmd = pmd_alloc(mm, new_pud, 0); + if (!new_pmd) + goto no_pmd; +#ifdef CONFIG_KASAN + /* + * Copy PMD table for KASAN shadow mappings. + */ + init_pgd = pgd_offset_k(TASK_SIZE); + init_p4d = p4d_offset(init_pgd, TASK_SIZE); + init_pud = pud_offset(init_p4d, TASK_SIZE); + init_pmd = pmd_offset(init_pud, TASK_SIZE); + new_pmd = pmd_offset(new_pud, TASK_SIZE); + memcpy(new_pmd, init_pmd, + (pmd_index(MODULES_VADDR) - pmd_index(TASK_SIZE)) + * sizeof(pmd_t)); + clean_dcache_area(new_pmd, PTRS_PER_PMD * sizeof(pmd_t)); +#endif /* CONFIG_KASAN */ +#endif /* CONFIG_LPAE */ + + if (!vectors_high()) { + /* + * On ARM, first page must always be allocated since it + * contains the machine vectors. The vectors are always high + * with LPAE. + */ + new_p4d = p4d_alloc(mm, new_pgd, 0); + if (!new_p4d) + goto no_p4d; + + new_pud = pud_alloc(mm, new_p4d, 0); + if (!new_pud) + goto no_pud; + + new_pmd = pmd_alloc(mm, new_pud, 0); + if (!new_pmd) + goto no_pmd; + + new_pte = pte_alloc_map(mm, new_pmd, 0); + if (!new_pte) + goto no_pte; + +#ifndef CONFIG_ARM_LPAE + /* + * Modify the PTE pointer to have the correct domain. This + * needs to be the vectors domain to avoid the low vectors + * being unmapped. + */ + pmd_val(*new_pmd) &= ~PMD_DOMAIN_MASK; + pmd_val(*new_pmd) |= PMD_DOMAIN(DOMAIN_VECTORS); +#endif + + init_p4d = p4d_offset(init_pgd, 0); + init_pud = pud_offset(init_p4d, 0); + init_pmd = pmd_offset(init_pud, 0); + init_pte = pte_offset_map(init_pmd, 0); + set_pte_ext(new_pte + 0, init_pte[0], 0); + set_pte_ext(new_pte + 1, init_pte[1], 0); + pte_unmap(init_pte); + pte_unmap(new_pte); + } + + return new_pgd; + +no_pte: + pmd_free(mm, new_pmd); + mm_dec_nr_pmds(mm); +no_pmd: + pud_free(mm, new_pud); +no_pud: + p4d_free(mm, new_p4d); +no_p4d: + __pgd_free(new_pgd); +no_pgd: + return NULL; +} + +void pgd_free(struct mm_struct *mm, pgd_t *pgd_base) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pgtable_t pte; + + if (!pgd_base) + return; + + pgd = pgd_base + pgd_index(0); + if (pgd_none_or_clear_bad(pgd)) + goto no_pgd; + + p4d = p4d_offset(pgd, 0); + if (p4d_none_or_clear_bad(p4d)) + goto no_p4d; + + pud = pud_offset(p4d, 0); + if (pud_none_or_clear_bad(pud)) + goto no_pud; + + pmd = pmd_offset(pud, 0); + if (pmd_none_or_clear_bad(pmd)) + goto no_pmd; + + pte = pmd_pgtable(*pmd); + pmd_clear(pmd); + pte_free(mm, pte); + mm_dec_nr_ptes(mm); +no_pmd: + pud_clear(pud); + pmd_free(mm, pmd); + mm_dec_nr_pmds(mm); +no_pud: + p4d_clear(p4d); + pud_free(mm, pud); +no_p4d: + pgd_clear(pgd); + p4d_free(mm, p4d); +no_pgd: +#ifdef CONFIG_ARM_LPAE + /* + * Free modules/pkmap or identity pmd tables. + */ + for (pgd = pgd_base; pgd < pgd_base + PTRS_PER_PGD; pgd++) { + if (pgd_none_or_clear_bad(pgd)) + continue; + if (pgd_val(*pgd) & L_PGD_SWAPPER) + continue; + p4d = p4d_offset(pgd, 0); + if (p4d_none_or_clear_bad(p4d)) + continue; + pud = pud_offset(p4d, 0); + if (pud_none_or_clear_bad(pud)) + continue; + pmd = pmd_offset(pud, 0); + pud_clear(pud); + pmd_free(mm, pmd); + mm_dec_nr_pmds(mm); + p4d_clear(p4d); + pud_free(mm, pud); + mm_dec_nr_puds(mm); + pgd_clear(pgd); + p4d_free(mm, p4d); + } +#endif + __pgd_free(pgd_base); +} diff --git a/arch/arm/mm/physaddr.c b/arch/arm/mm/physaddr.c new file mode 100644 index 0000000000..3f263c840e --- /dev/null +++ b/arch/arm/mm/physaddr.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bug.h> +#include <linux/export.h> +#include <linux/types.h> +#include <linux/mmdebug.h> +#include <linux/mm.h> + +#include <asm/sections.h> +#include <asm/page.h> +#include <asm/fixmap.h> +#include <asm/dma.h> + +#include "mm.h" + +static inline bool __virt_addr_valid(unsigned long x) +{ + /* + * high_memory does not get immediately defined, and there + * are early callers of __pa() against PAGE_OFFSET + */ + if (!high_memory && x >= PAGE_OFFSET) + return true; + + if (high_memory && x >= PAGE_OFFSET && x < (unsigned long)high_memory) + return true; + + /* + * MAX_DMA_ADDRESS is a virtual address that may not correspond to an + * actual physical address. Enough code relies on __pa(MAX_DMA_ADDRESS) + * that we just need to work around it and always return true. + */ + if (x == MAX_DMA_ADDRESS) + return true; + + return false; +} + +phys_addr_t __virt_to_phys(unsigned long x) +{ + WARN(!__virt_addr_valid(x), + "virt_to_phys used for non-linear address: %pK (%pS)\n", + (void *)x, (void *)x); + + return __virt_to_phys_nodebug(x); +} +EXPORT_SYMBOL(__virt_to_phys); + +phys_addr_t __phys_addr_symbol(unsigned long x) +{ + /* This is bounds checking against the kernel image only. + * __pa_symbol should only be used on kernel symbol addresses. + */ + VIRTUAL_BUG_ON(x < (unsigned long)KERNEL_START || + x > (unsigned long)KERNEL_END); + + return __pa_symbol_nodebug(x); +} +EXPORT_SYMBOL(__phys_addr_symbol); diff --git a/arch/arm/mm/pmsa-v7.c b/arch/arm/mm/pmsa-v7.c new file mode 100644 index 0000000000..59d916ccdf --- /dev/null +++ b/arch/arm/mm/pmsa-v7.c @@ -0,0 +1,476 @@ +/* + * Based on linux/arch/arm/mm/nommu.c + * + * ARM PMSAv7 supporting functions. + */ + +#include <linux/bitops.h> +#include <linux/memblock.h> +#include <linux/string.h> + +#include <asm/cacheflush.h> +#include <asm/cp15.h> +#include <asm/cputype.h> +#include <asm/mpu.h> +#include <asm/sections.h> + +#include "mm.h" + +struct region { + phys_addr_t base; + phys_addr_t size; + unsigned long subreg; +}; + +static struct region __initdata mem[MPU_MAX_REGIONS]; +#ifdef CONFIG_XIP_KERNEL +static struct region __initdata xip[MPU_MAX_REGIONS]; +#endif + +static unsigned int __initdata mpu_min_region_order; +static unsigned int __initdata mpu_max_regions; + +static int __init __mpu_min_region_order(void); +static int __init __mpu_max_regions(void); + +#ifndef CONFIG_CPU_V7M + +#define DRBAR __ACCESS_CP15(c6, 0, c1, 0) +#define IRBAR __ACCESS_CP15(c6, 0, c1, 1) +#define DRSR __ACCESS_CP15(c6, 0, c1, 2) +#define IRSR __ACCESS_CP15(c6, 0, c1, 3) +#define DRACR __ACCESS_CP15(c6, 0, c1, 4) +#define IRACR __ACCESS_CP15(c6, 0, c1, 5) +#define RNGNR __ACCESS_CP15(c6, 0, c2, 0) + +/* Region number */ +static inline void rgnr_write(u32 v) +{ + write_sysreg(v, RNGNR); +} + +/* Data-side / unified region attributes */ + +/* Region access control register */ +static inline void dracr_write(u32 v) +{ + write_sysreg(v, DRACR); +} + +/* Region size register */ +static inline void drsr_write(u32 v) +{ + write_sysreg(v, DRSR); +} + +/* Region base address register */ +static inline void drbar_write(u32 v) +{ + write_sysreg(v, DRBAR); +} + +static inline u32 drbar_read(void) +{ + return read_sysreg(DRBAR); +} +/* Optional instruction-side region attributes */ + +/* I-side Region access control register */ +static inline void iracr_write(u32 v) +{ + write_sysreg(v, IRACR); +} + +/* I-side Region size register */ +static inline void irsr_write(u32 v) +{ + write_sysreg(v, IRSR); +} + +/* I-side Region base address register */ +static inline void irbar_write(u32 v) +{ + write_sysreg(v, IRBAR); +} + +static inline u32 irbar_read(void) +{ + return read_sysreg(IRBAR); +} + +#else + +static inline void rgnr_write(u32 v) +{ + writel_relaxed(v, BASEADDR_V7M_SCB + PMSAv7_RNR); +} + +/* Data-side / unified region attributes */ + +/* Region access control register */ +static inline void dracr_write(u32 v) +{ + u32 rsr = readl_relaxed(BASEADDR_V7M_SCB + PMSAv7_RASR) & GENMASK(15, 0); + + writel_relaxed((v << 16) | rsr, BASEADDR_V7M_SCB + PMSAv7_RASR); +} + +/* Region size register */ +static inline void drsr_write(u32 v) +{ + u32 racr = readl_relaxed(BASEADDR_V7M_SCB + PMSAv7_RASR) & GENMASK(31, 16); + + writel_relaxed(v | racr, BASEADDR_V7M_SCB + PMSAv7_RASR); +} + +/* Region base address register */ +static inline void drbar_write(u32 v) +{ + writel_relaxed(v, BASEADDR_V7M_SCB + PMSAv7_RBAR); +} + +static inline u32 drbar_read(void) +{ + return readl_relaxed(BASEADDR_V7M_SCB + PMSAv7_RBAR); +} + +/* ARMv7-M only supports a unified MPU, so I-side operations are nop */ + +static inline void iracr_write(u32 v) {} +static inline void irsr_write(u32 v) {} +static inline void irbar_write(u32 v) {} +static inline unsigned long irbar_read(void) {return 0;} + +#endif + +static bool __init try_split_region(phys_addr_t base, phys_addr_t size, struct region *region) +{ + unsigned long subreg, bslots, sslots; + phys_addr_t abase = base & ~(size - 1); + phys_addr_t asize = base + size - abase; + phys_addr_t p2size = 1 << __fls(asize); + phys_addr_t bdiff, sdiff; + + if (p2size != asize) + p2size *= 2; + + bdiff = base - abase; + sdiff = p2size - asize; + subreg = p2size / PMSAv7_NR_SUBREGS; + + if ((bdiff % subreg) || (sdiff % subreg)) + return false; + + bslots = bdiff / subreg; + sslots = sdiff / subreg; + + if (bslots || sslots) { + int i; + + if (subreg < PMSAv7_MIN_SUBREG_SIZE) + return false; + + if (bslots + sslots > PMSAv7_NR_SUBREGS) + return false; + + for (i = 0; i < bslots; i++) + _set_bit(i, ®ion->subreg); + + for (i = 1; i <= sslots; i++) + _set_bit(PMSAv7_NR_SUBREGS - i, ®ion->subreg); + } + + region->base = abase; + region->size = p2size; + + return true; +} + +static int __init allocate_region(phys_addr_t base, phys_addr_t size, + unsigned int limit, struct region *regions) +{ + int count = 0; + phys_addr_t diff = size; + int attempts = MPU_MAX_REGIONS; + + while (diff) { + /* Try cover region as is (maybe with help of subregions) */ + if (try_split_region(base, size, ®ions[count])) { + count++; + base += size; + diff -= size; + size = diff; + } else { + /* + * Maximum aligned region might overflow phys_addr_t + * if "base" is 0. Hence we keep everything below 4G + * until we take the smaller of the aligned region + * size ("asize") and rounded region size ("p2size"), + * one of which is guaranteed to be smaller than the + * maximum physical address. + */ + phys_addr_t asize = (base - 1) ^ base; + phys_addr_t p2size = (1 << __fls(diff)) - 1; + + size = asize < p2size ? asize + 1 : p2size + 1; + } + + if (count > limit) + break; + + if (!attempts) + break; + + attempts--; + } + + return count; +} + +/* MPU initialisation functions */ +void __init pmsav7_adjust_lowmem_bounds(void) +{ + phys_addr_t specified_mem_size = 0, total_mem_size = 0; + phys_addr_t mem_start; + phys_addr_t mem_end; + phys_addr_t reg_start, reg_end; + unsigned int mem_max_regions; + bool first = true; + int num; + u64 i; + + /* Free-up PMSAv7_PROBE_REGION */ + mpu_min_region_order = __mpu_min_region_order(); + + /* How many regions are supported */ + mpu_max_regions = __mpu_max_regions(); + + mem_max_regions = min((unsigned int)MPU_MAX_REGIONS, mpu_max_regions); + + /* We need to keep one slot for background region */ + mem_max_regions--; + +#ifndef CONFIG_CPU_V7M + /* ... and one for vectors */ + mem_max_regions--; +#endif + +#ifdef CONFIG_XIP_KERNEL + /* plus some regions to cover XIP ROM */ + num = allocate_region(CONFIG_XIP_PHYS_ADDR, __pa(_exiprom) - CONFIG_XIP_PHYS_ADDR, + mem_max_regions, xip); + + mem_max_regions -= num; +#endif + + for_each_mem_range(i, ®_start, ®_end) { + if (first) { + phys_addr_t phys_offset = PHYS_OFFSET; + + /* + * Initially only use memory continuous from + * PHYS_OFFSET */ + if (reg_start != phys_offset) + panic("First memory bank must be contiguous from PHYS_OFFSET"); + + mem_start = reg_start; + mem_end = reg_end; + specified_mem_size = mem_end - mem_start; + first = false; + } else { + /* + * memblock auto merges contiguous blocks, remove + * all blocks afterwards in one go (we can't remove + * blocks separately while iterating) + */ + pr_notice("Ignoring RAM after %pa, memory at %pa ignored\n", + &mem_end, ®_start); + memblock_remove(reg_start, 0 - reg_start); + break; + } + } + + memset(mem, 0, sizeof(mem)); + num = allocate_region(mem_start, specified_mem_size, mem_max_regions, mem); + + for (i = 0; i < num; i++) { + unsigned long subreg = mem[i].size / PMSAv7_NR_SUBREGS; + + total_mem_size += mem[i].size - subreg * hweight_long(mem[i].subreg); + + pr_debug("MPU: base %pa size %pa disable subregions: %*pbl\n", + &mem[i].base, &mem[i].size, PMSAv7_NR_SUBREGS, &mem[i].subreg); + } + + if (total_mem_size != specified_mem_size) { + pr_warn("Truncating memory from %pa to %pa (MPU region constraints)", + &specified_mem_size, &total_mem_size); + memblock_remove(mem_start + total_mem_size, + specified_mem_size - total_mem_size); + } +} + +static int __init __mpu_max_regions(void) +{ + /* + * We don't support a different number of I/D side regions so if we + * have separate instruction and data memory maps then return + * whichever side has a smaller number of supported regions. + */ + u32 dregions, iregions, mpuir; + + mpuir = read_cpuid_mputype(); + + dregions = iregions = (mpuir & MPUIR_DREGION_SZMASK) >> MPUIR_DREGION; + + /* Check for separate d-side and i-side memory maps */ + if (mpuir & MPUIR_nU) + iregions = (mpuir & MPUIR_IREGION_SZMASK) >> MPUIR_IREGION; + + /* Use the smallest of the two maxima */ + return min(dregions, iregions); +} + +static int __init mpu_iside_independent(void) +{ + /* MPUIR.nU specifies whether there is *not* a unified memory map */ + return read_cpuid_mputype() & MPUIR_nU; +} + +static int __init __mpu_min_region_order(void) +{ + u32 drbar_result, irbar_result; + + /* We've kept a region free for this probing */ + rgnr_write(PMSAv7_PROBE_REGION); + isb(); + /* + * As per ARM ARM, write 0xFFFFFFFC to DRBAR to find the minimum + * region order + */ + drbar_write(0xFFFFFFFC); + drbar_result = irbar_result = drbar_read(); + drbar_write(0x0); + /* If the MPU is non-unified, we use the larger of the two minima*/ + if (mpu_iside_independent()) { + irbar_write(0xFFFFFFFC); + irbar_result = irbar_read(); + irbar_write(0x0); + } + isb(); /* Ensure that MPU region operations have completed */ + /* Return whichever result is larger */ + + return __ffs(max(drbar_result, irbar_result)); +} + +static int __init mpu_setup_region(unsigned int number, phys_addr_t start, + unsigned int size_order, unsigned int properties, + unsigned int subregions, bool need_flush) +{ + u32 size_data; + + /* We kept a region free for probing resolution of MPU regions*/ + if (number > mpu_max_regions + || number >= MPU_MAX_REGIONS) + return -ENOENT; + + if (size_order > 32) + return -ENOMEM; + + if (size_order < mpu_min_region_order) + return -ENOMEM; + + /* Writing N to bits 5:1 (RSR_SZ) specifies region size 2^N+1 */ + size_data = ((size_order - 1) << PMSAv7_RSR_SZ) | 1 << PMSAv7_RSR_EN; + size_data |= subregions << PMSAv7_RSR_SD; + + if (need_flush) + flush_cache_all(); + + dsb(); /* Ensure all previous data accesses occur with old mappings */ + rgnr_write(number); + isb(); + drbar_write(start); + dracr_write(properties); + isb(); /* Propagate properties before enabling region */ + drsr_write(size_data); + + /* Check for independent I-side registers */ + if (mpu_iside_independent()) { + irbar_write(start); + iracr_write(properties); + isb(); + irsr_write(size_data); + } + isb(); + + /* Store region info (we treat i/d side the same, so only store d) */ + mpu_rgn_info.rgns[number].dracr = properties; + mpu_rgn_info.rgns[number].drbar = start; + mpu_rgn_info.rgns[number].drsr = size_data; + + mpu_rgn_info.used++; + + return 0; +} + +/* +* Set up default MPU regions, doing nothing if there is no MPU +*/ +void __init pmsav7_setup(void) +{ + int i, region = 0, err = 0; + + /* Setup MPU (order is important) */ + + /* Background */ + err |= mpu_setup_region(region++, 0, 32, + PMSAv7_ACR_XN | PMSAv7_RGN_STRONGLY_ORDERED | PMSAv7_AP_PL1RW_PL0RW, + 0, false); + +#ifdef CONFIG_XIP_KERNEL + /* ROM */ + for (i = 0; i < ARRAY_SIZE(xip); i++) { + /* + * In case we overwrite RAM region we set earlier in + * head-nommu.S (which is cachable) all subsequent + * data access till we setup RAM bellow would be done + * with BG region (which is uncachable), thus we need + * to clean and invalidate cache. + */ + bool need_flush = region == PMSAv7_RAM_REGION; + + if (!xip[i].size) + continue; + + err |= mpu_setup_region(region++, xip[i].base, ilog2(xip[i].size), + PMSAv7_AP_PL1RO_PL0NA | PMSAv7_RGN_NORMAL, + xip[i].subreg, need_flush); + } +#endif + + /* RAM */ + for (i = 0; i < ARRAY_SIZE(mem); i++) { + if (!mem[i].size) + continue; + + err |= mpu_setup_region(region++, mem[i].base, ilog2(mem[i].size), + PMSAv7_AP_PL1RW_PL0RW | PMSAv7_RGN_NORMAL, + mem[i].subreg, false); + } + + /* Vectors */ +#ifndef CONFIG_CPU_V7M + err |= mpu_setup_region(region++, vectors_base, ilog2(2 * PAGE_SIZE), + PMSAv7_AP_PL1RW_PL0NA | PMSAv7_RGN_NORMAL, + 0, false); +#endif + if (err) { + panic("MPU region initialization failure! %d", err); + } else { + pr_info("Using ARMv7 PMSA Compliant MPU. " + "Region independence: %s, Used %d of %d regions\n", + mpu_iside_independent() ? "Yes" : "No", + mpu_rgn_info.used, mpu_max_regions); + } +} diff --git a/arch/arm/mm/pmsa-v8.c b/arch/arm/mm/pmsa-v8.c new file mode 100644 index 0000000000..28cdc54684 --- /dev/null +++ b/arch/arm/mm/pmsa-v8.c @@ -0,0 +1,308 @@ +/* + * Based on linux/arch/arm/pmsa-v7.c + * + * ARM PMSAv8 supporting functions. + */ + +#include <linux/memblock.h> +#include <linux/range.h> + +#include <asm/cp15.h> +#include <asm/cputype.h> +#include <asm/mpu.h> + +#include <asm/page.h> +#include <asm/sections.h> + +#include "mm.h" + +#ifndef CONFIG_CPU_V7M + +#define PRSEL __ACCESS_CP15(c6, 0, c2, 1) +#define PRBAR __ACCESS_CP15(c6, 0, c3, 0) +#define PRLAR __ACCESS_CP15(c6, 0, c3, 1) + +static inline u32 prlar_read(void) +{ + return read_sysreg(PRLAR); +} + +static inline u32 prbar_read(void) +{ + return read_sysreg(PRBAR); +} + +static inline void prsel_write(u32 v) +{ + write_sysreg(v, PRSEL); +} + +static inline void prbar_write(u32 v) +{ + write_sysreg(v, PRBAR); +} + +static inline void prlar_write(u32 v) +{ + write_sysreg(v, PRLAR); +} +#else + +static inline u32 prlar_read(void) +{ + return readl_relaxed(BASEADDR_V7M_SCB + PMSAv8_RLAR); +} + +static inline u32 prbar_read(void) +{ + return readl_relaxed(BASEADDR_V7M_SCB + PMSAv8_RBAR); +} + +static inline void prsel_write(u32 v) +{ + writel_relaxed(v, BASEADDR_V7M_SCB + PMSAv8_RNR); +} + +static inline void prbar_write(u32 v) +{ + writel_relaxed(v, BASEADDR_V7M_SCB + PMSAv8_RBAR); +} + +static inline void prlar_write(u32 v) +{ + writel_relaxed(v, BASEADDR_V7M_SCB + PMSAv8_RLAR); +} + +#endif + +static struct range __initdata io[MPU_MAX_REGIONS]; +static struct range __initdata mem[MPU_MAX_REGIONS]; + +static unsigned int __initdata mpu_max_regions; + +static __init bool is_region_fixed(int number) +{ + switch (number) { + case PMSAv8_XIP_REGION: + case PMSAv8_KERNEL_REGION: + return true; + default: + return false; + } +} + +void __init pmsav8_adjust_lowmem_bounds(void) +{ + phys_addr_t mem_end; + phys_addr_t reg_start, reg_end; + bool first = true; + u64 i; + + for_each_mem_range(i, ®_start, ®_end) { + if (first) { + phys_addr_t phys_offset = PHYS_OFFSET; + + /* + * Initially only use memory continuous from + * PHYS_OFFSET */ + if (reg_start != phys_offset) + panic("First memory bank must be contiguous from PHYS_OFFSET"); + mem_end = reg_end; + first = false; + } else { + /* + * memblock auto merges contiguous blocks, remove + * all blocks afterwards in one go (we can't remove + * blocks separately while iterating) + */ + pr_notice("Ignoring RAM after %pa, memory at %pa ignored\n", + &mem_end, ®_start); + memblock_remove(reg_start, 0 - reg_start); + break; + } + } +} + +static int __init __mpu_max_regions(void) +{ + static int max_regions; + u32 mpuir; + + if (max_regions) + return max_regions; + + mpuir = read_cpuid_mputype(); + + max_regions = (mpuir & MPUIR_DREGION_SZMASK) >> MPUIR_DREGION; + + return max_regions; +} + +static int __init __pmsav8_setup_region(unsigned int number, u32 bar, u32 lar) +{ + if (number > mpu_max_regions + || number >= MPU_MAX_REGIONS) + return -ENOENT; + + dsb(); + prsel_write(number); + isb(); + prbar_write(bar); + prlar_write(lar); + + mpu_rgn_info.rgns[number].prbar = bar; + mpu_rgn_info.rgns[number].prlar = lar; + + mpu_rgn_info.used++; + + return 0; +} + +static int __init pmsav8_setup_ram(unsigned int number, phys_addr_t start,phys_addr_t end) +{ + u32 bar, lar; + + if (is_region_fixed(number)) + return -EINVAL; + + bar = start; + lar = (end - 1) & ~(PMSAv8_MINALIGN - 1); + + bar |= PMSAv8_AP_PL1RW_PL0RW | PMSAv8_RGN_SHARED; + lar |= PMSAv8_LAR_IDX(PMSAv8_RGN_NORMAL) | PMSAv8_LAR_EN; + + return __pmsav8_setup_region(number, bar, lar); +} + +static int __init pmsav8_setup_io(unsigned int number, phys_addr_t start,phys_addr_t end) +{ + u32 bar, lar; + + if (is_region_fixed(number)) + return -EINVAL; + + bar = start; + lar = (end - 1) & ~(PMSAv8_MINALIGN - 1); + + bar |= PMSAv8_AP_PL1RW_PL0RW | PMSAv8_RGN_SHARED | PMSAv8_BAR_XN; + lar |= PMSAv8_LAR_IDX(PMSAv8_RGN_DEVICE_nGnRnE) | PMSAv8_LAR_EN; + + return __pmsav8_setup_region(number, bar, lar); +} + +static int __init pmsav8_setup_fixed(unsigned int number, phys_addr_t start,phys_addr_t end) +{ + u32 bar, lar; + + if (!is_region_fixed(number)) + return -EINVAL; + + bar = start; + lar = (end - 1) & ~(PMSAv8_MINALIGN - 1); + + bar |= PMSAv8_AP_PL1RW_PL0NA | PMSAv8_RGN_SHARED; + lar |= PMSAv8_LAR_IDX(PMSAv8_RGN_NORMAL) | PMSAv8_LAR_EN; + + prsel_write(number); + isb(); + + if (prbar_read() != bar || prlar_read() != lar) + return -EINVAL; + + /* Reserved region was set up early, we just need a record for secondaries */ + mpu_rgn_info.rgns[number].prbar = bar; + mpu_rgn_info.rgns[number].prlar = lar; + + mpu_rgn_info.used++; + + return 0; +} + +#ifndef CONFIG_CPU_V7M +static int __init pmsav8_setup_vector(unsigned int number, phys_addr_t start,phys_addr_t end) +{ + u32 bar, lar; + + if (number == PMSAv8_KERNEL_REGION) + return -EINVAL; + + bar = start; + lar = (end - 1) & ~(PMSAv8_MINALIGN - 1); + + bar |= PMSAv8_AP_PL1RW_PL0NA | PMSAv8_RGN_SHARED; + lar |= PMSAv8_LAR_IDX(PMSAv8_RGN_NORMAL) | PMSAv8_LAR_EN; + + return __pmsav8_setup_region(number, bar, lar); +} +#endif + +void __init pmsav8_setup(void) +{ + int i, err = 0; + int region = PMSAv8_KERNEL_REGION; + + /* How many regions are supported ? */ + mpu_max_regions = __mpu_max_regions(); + + /* RAM: single chunk of memory */ + add_range(mem, ARRAY_SIZE(mem), 0, memblock.memory.regions[0].base, + memblock.memory.regions[0].base + memblock.memory.regions[0].size); + + /* IO: cover full 4G range */ + add_range(io, ARRAY_SIZE(io), 0, 0, 0xffffffff); + + /* RAM and IO: exclude kernel */ + subtract_range(mem, ARRAY_SIZE(mem), __pa(KERNEL_START), __pa(KERNEL_END)); + subtract_range(io, ARRAY_SIZE(io), __pa(KERNEL_START), __pa(KERNEL_END)); + +#ifdef CONFIG_XIP_KERNEL + /* RAM and IO: exclude xip */ + subtract_range(mem, ARRAY_SIZE(mem), CONFIG_XIP_PHYS_ADDR, __pa(_exiprom)); + subtract_range(io, ARRAY_SIZE(io), CONFIG_XIP_PHYS_ADDR, __pa(_exiprom)); +#endif + +#ifndef CONFIG_CPU_V7M + /* RAM and IO: exclude vectors */ + subtract_range(mem, ARRAY_SIZE(mem), vectors_base, vectors_base + 2 * PAGE_SIZE); + subtract_range(io, ARRAY_SIZE(io), vectors_base, vectors_base + 2 * PAGE_SIZE); +#endif + /* IO: exclude RAM */ + for (i = 0; i < ARRAY_SIZE(mem); i++) + subtract_range(io, ARRAY_SIZE(io), mem[i].start, mem[i].end); + + /* Now program MPU */ + +#ifdef CONFIG_XIP_KERNEL + /* ROM */ + err |= pmsav8_setup_fixed(PMSAv8_XIP_REGION, CONFIG_XIP_PHYS_ADDR, __pa(_exiprom)); +#endif + /* Kernel */ + err |= pmsav8_setup_fixed(region++, __pa(KERNEL_START), __pa(KERNEL_END)); + + + /* IO */ + for (i = 0; i < ARRAY_SIZE(io); i++) { + if (!io[i].end) + continue; + + err |= pmsav8_setup_io(region++, io[i].start, io[i].end); + } + + /* RAM */ + for (i = 0; i < ARRAY_SIZE(mem); i++) { + if (!mem[i].end) + continue; + + err |= pmsav8_setup_ram(region++, mem[i].start, mem[i].end); + } + + /* Vectors */ +#ifndef CONFIG_CPU_V7M + err |= pmsav8_setup_vector(region++, vectors_base, vectors_base + 2 * PAGE_SIZE); +#endif + if (err) + pr_warn("MPU region initialization failure! %d", err); + else + pr_info("Using ARM PMSAv8 Compliant MPU. Used %d of %d regions\n", + mpu_rgn_info.used, mpu_max_regions); +} diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S new file mode 100644 index 0000000000..6837cf7a48 --- /dev/null +++ b/arch/arm/mm/proc-arm1020.S @@ -0,0 +1,515 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * linux/arch/arm/mm/proc-arm1020.S: MMU functions for ARM1020 + * + * Copyright (C) 2000 ARM Limited + * Copyright (C) 2000 Deep Blue Solutions Ltd. + * hacked for non-paged-MM by Hyok S. Choi, 2003. + * + * These are the low level assembler for performing cache and TLB + * functions on the arm1020. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <linux/pgtable.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/ptrace.h> + +#include "proc-macros.S" + +/* + * This is the maximum size of an area which will be invalidated + * using the single invalidate entry instructions. Anything larger + * than this, and we go for the whole cache. + * + * This value should be chosen such that we choose the cheapest + * alternative. + */ +#define MAX_AREA_SIZE 32768 + +/* + * The size of one data cache line. + */ +#define CACHE_DLINESIZE 32 + +/* + * The number of data cache segments. + */ +#define CACHE_DSEGMENTS 16 + +/* + * The number of lines in a cache segment. + */ +#define CACHE_DENTRIES 64 + +/* + * This is the size at which it becomes more efficient to + * clean the whole cache, rather than using the individual + * cache line maintenance instructions. + */ +#define CACHE_DLIMIT 32768 + + .text +/* + * cpu_arm1020_proc_init() + */ +ENTRY(cpu_arm1020_proc_init) + ret lr + +/* + * cpu_arm1020_proc_fin() + */ +ENTRY(cpu_arm1020_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + ret lr + +/* + * cpu_arm1020_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_arm1020_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + ret r0 +ENDPROC(cpu_arm1020_reset) + .popsection + +/* + * cpu_arm1020_do_idle() + */ + .align 5 +ENTRY(cpu_arm1020_do_idle) + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + ret lr + +/* ================================= CACHE ================================ */ + + .align 5 + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(arm1020_flush_icache_all) +#ifndef CONFIG_CPU_ICACHE_DISABLE + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache +#endif + ret lr +ENDPROC(arm1020_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Invalidate all cache entries in a particular address + * space. + */ +ENTRY(arm1020_flush_user_cache_all) + /* FALLTHROUGH */ +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(arm1020_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 16 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c14, 2 @ clean+invalidate D index + mcr p15, 0, ip, c7, c10, 4 @ drain WB + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 5 + bcs 1b @ segments 15 to 0 +#endif + tst r2, #VM_EXEC +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache +#endif + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags for this space + */ +ENTRY(arm1020_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bhs __flush_whole_cache + +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, ip, c7, c10, 4 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + mcr p15, 0, ip, c7, c10, 4 @ drain WB + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + tst r2, #VM_EXEC +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache +#endif + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1020_coherent_kern_range) + /* FALLTRHOUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1020_coherent_user_range) + mov ip, #0 + bic r0, r0, #CACHE_DLINESIZE - 1 + mcr p15, 0, ip, c7, c10, 4 +1: +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, ip, c7, c10, 4 @ drain WB +#endif +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry +#endif + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov r0, #0 + ret lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + */ +ENTRY(arm1020_flush_kern_dcache_area) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + add r1, r0, r1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + mcr p15, 0, ip, c7, c10, 4 @ drain WB + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +arm1020_dma_inv_range: + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + tst r0, #CACHE_DLINESIZE - 1 + bic r0, r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, ip, c7, c10, 4 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, ip, c7, c10, 4 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry + mcrne p15, 0, ip, c7, c10, 4 @ drain WB +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +arm1020_dma_clean_range: + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, ip, c7, c10, 4 @ drain WB + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1020_dma_flush_range) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + bic r0, r0, #CACHE_DLINESIZE - 1 + mcr p15, 0, ip, c7, c10, 4 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + mcr p15, 0, ip, c7, c10, 4 @ drain WB + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm1020_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm1020_dma_clean_range + bcs arm1020_dma_inv_range + b arm1020_dma_flush_range +ENDPROC(arm1020_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm1020_dma_unmap_area) + ret lr +ENDPROC(arm1020_dma_unmap_area) + + .globl arm1020_flush_kern_cache_louis + .equ arm1020_flush_kern_cache_louis, arm1020_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm1020 + + .align 5 +ENTRY(cpu_arm1020_dcache_clean_area) +#ifndef CONFIG_CPU_DCACHE_DISABLE + mov ip, #0 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, ip, c7, c10, 4 @ drain WB + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b +#endif + ret lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_arm1020_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_arm1020_switch_mm) +#ifdef CONFIG_MMU +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, r3, c7, c10, 4 + mov r1, #0xF @ 16 segments +1: mov r3, #0x3F @ 64 entries +2: mov ip, r3, LSL #26 @ shift up entry + orr ip, ip, r1, LSL #5 @ shift in/up index + mcr p15, 0, ip, c7, c14, 2 @ Clean & Inval DCache entry + mov ip, #0 + mcr p15, 0, ip, c7, c10, 4 + subs r3, r3, #1 + cmp r3, #0 + bge 2b @ entries 3F to 0 + subs r1, r1, #1 + cmp r1, #0 + bge 1b @ segments 15 to 0 + +#endif + mov r1, #0 +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcr p15, 0, r1, c7, c5, 0 @ invalidate I cache +#endif + mcr p15, 0, r1, c7, c10, 4 @ drain WB + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs +#endif /* CONFIG_MMU */ + ret lr + +/* + * cpu_arm1020_set_pte(ptep, pte) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_arm1020_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext + mov r0, r0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, r0, c7, c10, 4 + mcr p15, 0, r0, c7, c10, 1 @ clean D entry +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif /* CONFIG_MMU */ + ret lr + + .type __arm1020_setup, #function +__arm1020_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 +#endif + + adr r5, arm1020_crval + ldmia r5, {r5, r6} + mrc p15, 0, r0, c1, c0 @ get control register v4 + bic r0, r0, r5 + orr r0, r0, r6 +#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN + orr r0, r0, #0x4000 @ .R.. .... .... .... +#endif + ret lr + .size __arm1020_setup, . - __arm1020_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * .011 1001 ..11 0101 + */ + .type arm1020_crval, #object +arm1020_crval: + crval clear=0x0000593f, mmuset=0x00003935, ucset=0x00001930 + + __INITDATA + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm1020, dabort=v4t_early_abort, pabort=legacy_pabort + + + .section ".rodata" + + string cpu_arch_name, "armv5t" + string cpu_elf_name, "v5" + + .type cpu_arm1020_name, #object +cpu_arm1020_name: + .ascii "ARM1020" +#ifndef CONFIG_CPU_ICACHE_DISABLE + .ascii "i" +#endif +#ifndef CONFIG_CPU_DCACHE_DISABLE + .ascii "d" +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + .ascii "(wt)" +#else + .ascii "(wb)" +#endif +#endif +#ifndef CONFIG_CPU_BPREDICT_DISABLE + .ascii "B" +#endif +#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN + .ascii "RR" +#endif + .ascii "\0" + .size cpu_arm1020_name, . - cpu_arm1020_name + + .align + + .section ".proc.info.init", "a" + + .type __arm1020_proc_info,#object +__arm1020_proc_info: + .long 0x4104a200 @ ARM 1020T (Architecture v5T) + .long 0xff0ffff0 + .long PMD_TYPE_SECT | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + initfn __arm1020_setup, __arm1020_proc_info + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB + .long cpu_arm1020_name + .long arm1020_processor_functions + .long v4wbi_tlb_fns + .long v4wb_user_fns + .long arm1020_cache_fns + .size __arm1020_proc_info, . - __arm1020_proc_info diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S new file mode 100644 index 0000000000..df49b10250 --- /dev/null +++ b/arch/arm/mm/proc-arm1020e.S @@ -0,0 +1,475 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * linux/arch/arm/mm/proc-arm1020e.S: MMU functions for ARM1020 + * + * Copyright (C) 2000 ARM Limited + * Copyright (C) 2000 Deep Blue Solutions Ltd. + * hacked for non-paged-MM by Hyok S. Choi, 2003. + * + * These are the low level assembler for performing cache and TLB + * functions on the arm1020e. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <linux/pgtable.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/ptrace.h> + +#include "proc-macros.S" + +/* + * This is the maximum size of an area which will be invalidated + * using the single invalidate entry instructions. Anything larger + * than this, and we go for the whole cache. + * + * This value should be chosen such that we choose the cheapest + * alternative. + */ +#define MAX_AREA_SIZE 32768 + +/* + * The size of one data cache line. + */ +#define CACHE_DLINESIZE 32 + +/* + * The number of data cache segments. + */ +#define CACHE_DSEGMENTS 16 + +/* + * The number of lines in a cache segment. + */ +#define CACHE_DENTRIES 64 + +/* + * This is the size at which it becomes more efficient to + * clean the whole cache, rather than using the individual + * cache line maintenance instructions. + */ +#define CACHE_DLIMIT 32768 + + .text +/* + * cpu_arm1020e_proc_init() + */ +ENTRY(cpu_arm1020e_proc_init) + ret lr + +/* + * cpu_arm1020e_proc_fin() + */ +ENTRY(cpu_arm1020e_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + ret lr + +/* + * cpu_arm1020e_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_arm1020e_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + ret r0 +ENDPROC(cpu_arm1020e_reset) + .popsection + +/* + * cpu_arm1020e_do_idle() + */ + .align 5 +ENTRY(cpu_arm1020e_do_idle) + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + ret lr + +/* ================================= CACHE ================================ */ + + .align 5 + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(arm1020e_flush_icache_all) +#ifndef CONFIG_CPU_ICACHE_DISABLE + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache +#endif + ret lr +ENDPROC(arm1020e_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Invalidate all cache entries in a particular address + * space. + */ +ENTRY(arm1020e_flush_user_cache_all) + /* FALLTHROUGH */ +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(arm1020e_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 16 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c14, 2 @ clean+invalidate D index + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 5 + bcs 1b @ segments 15 to 0 +#endif + tst r2, #VM_EXEC +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache +#endif + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags for this space + */ +ENTRY(arm1020e_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bhs __flush_whole_cache + +#ifndef CONFIG_CPU_DCACHE_DISABLE +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + tst r2, #VM_EXEC +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache +#endif + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1020e_coherent_kern_range) + /* FALLTHROUGH */ +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1020e_coherent_user_range) + mov ip, #0 + bic r0, r0, #CACHE_DLINESIZE - 1 +1: +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, r0, c7, c10, 1 @ clean D entry +#endif +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry +#endif + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov r0, #0 + ret lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + */ +ENTRY(arm1020e_flush_kern_dcache_area) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + add r1, r0, r1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +arm1020e_dma_inv_range: + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + tst r0, #CACHE_DLINESIZE - 1 + bic r0, r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +arm1020e_dma_clean_range: + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1020e_dma_flush_range) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm1020e_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm1020e_dma_clean_range + bcs arm1020e_dma_inv_range + b arm1020e_dma_flush_range +ENDPROC(arm1020e_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm1020e_dma_unmap_area) + ret lr +ENDPROC(arm1020e_dma_unmap_area) + + .globl arm1020e_flush_kern_cache_louis + .equ arm1020e_flush_kern_cache_louis, arm1020e_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm1020e + + .align 5 +ENTRY(cpu_arm1020e_dcache_clean_area) +#ifndef CONFIG_CPU_DCACHE_DISABLE + mov ip, #0 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b +#endif + ret lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_arm1020e_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_arm1020e_switch_mm) +#ifdef CONFIG_MMU +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, r3, c7, c10, 4 + mov r1, #0xF @ 16 segments +1: mov r3, #0x3F @ 64 entries +2: mov ip, r3, LSL #26 @ shift up entry + orr ip, ip, r1, LSL #5 @ shift in/up index + mcr p15, 0, ip, c7, c14, 2 @ Clean & Inval DCache entry + mov ip, #0 + subs r3, r3, #1 + cmp r3, #0 + bge 2b @ entries 3F to 0 + subs r1, r1, #1 + cmp r1, #0 + bge 1b @ segments 15 to 0 + +#endif + mov r1, #0 +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcr p15, 0, r1, c7, c5, 0 @ invalidate I cache +#endif + mcr p15, 0, r1, c7, c10, 4 @ drain WB + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs +#endif + ret lr + +/* + * cpu_arm1020e_set_pte(ptep, pte) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_arm1020e_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext + mov r0, r0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, r0, c7, c10, 1 @ clean D entry +#endif +#endif /* CONFIG_MMU */ + ret lr + + .type __arm1020e_setup, #function +__arm1020e_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 +#endif + adr r5, arm1020e_crval + ldmia r5, {r5, r6} + mrc p15, 0, r0, c1, c0 @ get control register v4 + bic r0, r0, r5 + orr r0, r0, r6 +#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN + orr r0, r0, #0x4000 @ .R.. .... .... .... +#endif + ret lr + .size __arm1020e_setup, . - __arm1020e_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * .011 1001 ..11 0101 + */ + .type arm1020e_crval, #object +arm1020e_crval: + crval clear=0x00007f3f, mmuset=0x00003935, ucset=0x00001930 + + __INITDATA + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm1020e, dabort=v4t_early_abort, pabort=legacy_pabort + + .section ".rodata" + + string cpu_arch_name, "armv5te" + string cpu_elf_name, "v5" + string cpu_arm1020e_name, "ARM1020E" + + .align + + .section ".proc.info.init", "a" + + .type __arm1020e_proc_info,#object +__arm1020e_proc_info: + .long 0x4105a200 @ ARM 1020TE (Architecture v5TE) + .long 0xff0ffff0 + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + initfn __arm1020e_setup, __arm1020e_proc_info + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_EDSP + .long cpu_arm1020e_name + .long arm1020e_processor_functions + .long v4wbi_tlb_fns + .long v4wb_user_fns + .long arm1020e_cache_fns + .size __arm1020e_proc_info, . - __arm1020e_proc_info diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S new file mode 100644 index 0000000000..e89ce467f6 --- /dev/null +++ b/arch/arm/mm/proc-arm1022.S @@ -0,0 +1,469 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * linux/arch/arm/mm/proc-arm1022.S: MMU functions for ARM1022E + * + * Copyright (C) 2000 ARM Limited + * Copyright (C) 2000 Deep Blue Solutions Ltd. + * hacked for non-paged-MM by Hyok S. Choi, 2003. + * + * These are the low level assembler for performing cache and TLB + * functions on the ARM1022E. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <linux/pgtable.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/ptrace.h> + +#include "proc-macros.S" + +/* + * This is the maximum size of an area which will be invalidated + * using the single invalidate entry instructions. Anything larger + * than this, and we go for the whole cache. + * + * This value should be chosen such that we choose the cheapest + * alternative. + */ +#define MAX_AREA_SIZE 32768 + +/* + * The size of one data cache line. + */ +#define CACHE_DLINESIZE 32 + +/* + * The number of data cache segments. + */ +#define CACHE_DSEGMENTS 16 + +/* + * The number of lines in a cache segment. + */ +#define CACHE_DENTRIES 64 + +/* + * This is the size at which it becomes more efficient to + * clean the whole cache, rather than using the individual + * cache line maintenance instructions. + */ +#define CACHE_DLIMIT 32768 + + .text +/* + * cpu_arm1022_proc_init() + */ +ENTRY(cpu_arm1022_proc_init) + ret lr + +/* + * cpu_arm1022_proc_fin() + */ +ENTRY(cpu_arm1022_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + ret lr + +/* + * cpu_arm1022_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_arm1022_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + ret r0 +ENDPROC(cpu_arm1022_reset) + .popsection + +/* + * cpu_arm1022_do_idle() + */ + .align 5 +ENTRY(cpu_arm1022_do_idle) + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + ret lr + +/* ================================= CACHE ================================ */ + + .align 5 + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(arm1022_flush_icache_all) +#ifndef CONFIG_CPU_ICACHE_DISABLE + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache +#endif + ret lr +ENDPROC(arm1022_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Invalidate all cache entries in a particular address + * space. + */ +ENTRY(arm1022_flush_user_cache_all) + /* FALLTHROUGH */ +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(arm1022_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: +#ifndef CONFIG_CPU_DCACHE_DISABLE + mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 16 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c14, 2 @ clean+invalidate D index + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 5 + bcs 1b @ segments 15 to 0 +#endif + tst r2, #VM_EXEC +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache +#endif + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags for this space + */ +ENTRY(arm1022_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bhs __flush_whole_cache + +#ifndef CONFIG_CPU_DCACHE_DISABLE +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + tst r2, #VM_EXEC +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache +#endif + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1022_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1022_coherent_user_range) + mov ip, #0 + bic r0, r0, #CACHE_DLINESIZE - 1 +1: +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, r0, c7, c10, 1 @ clean D entry +#endif +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry +#endif + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov r0, #0 + ret lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + */ +ENTRY(arm1022_flush_kern_dcache_area) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + add r1, r0, r1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +arm1022_dma_inv_range: + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + tst r0, #CACHE_DLINESIZE - 1 + bic r0, r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +arm1022_dma_clean_range: + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1022_dma_flush_range) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm1022_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm1022_dma_clean_range + bcs arm1022_dma_inv_range + b arm1022_dma_flush_range +ENDPROC(arm1022_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm1022_dma_unmap_area) + ret lr +ENDPROC(arm1022_dma_unmap_area) + + .globl arm1022_flush_kern_cache_louis + .equ arm1022_flush_kern_cache_louis, arm1022_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm1022 + + .align 5 +ENTRY(cpu_arm1022_dcache_clean_area) +#ifndef CONFIG_CPU_DCACHE_DISABLE + mov ip, #0 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b +#endif + ret lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_arm1022_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_arm1022_switch_mm) +#ifdef CONFIG_MMU +#ifndef CONFIG_CPU_DCACHE_DISABLE + mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 16 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c14, 2 @ clean+invalidate D index + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 5 + bcs 1b @ segments 15 to 0 +#endif + mov r1, #0 +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcr p15, 0, r1, c7, c5, 0 @ invalidate I cache +#endif + mcr p15, 0, r1, c7, c10, 4 @ drain WB + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs +#endif + ret lr + +/* + * cpu_arm1022_set_pte_ext(ptep, pte, ext) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_arm1022_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext + mov r0, r0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, r0, c7, c10, 1 @ clean D entry +#endif +#endif /* CONFIG_MMU */ + ret lr + + .type __arm1022_setup, #function +__arm1022_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 +#endif + adr r5, arm1022_crval + ldmia r5, {r5, r6} + mrc p15, 0, r0, c1, c0 @ get control register v4 + bic r0, r0, r5 + orr r0, r0, r6 +#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN + orr r0, r0, #0x4000 @ .R.............. +#endif + ret lr + .size __arm1022_setup, . - __arm1022_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * .011 1001 ..11 0101 + * + */ + .type arm1022_crval, #object +arm1022_crval: + crval clear=0x00007f3f, mmuset=0x00003935, ucset=0x00001930 + + __INITDATA + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm1022, dabort=v4t_early_abort, pabort=legacy_pabort + + .section ".rodata" + + string cpu_arch_name, "armv5te" + string cpu_elf_name, "v5" + string cpu_arm1022_name, "ARM1022" + + .align + + .section ".proc.info.init", "a" + + .type __arm1022_proc_info,#object +__arm1022_proc_info: + .long 0x4105a220 @ ARM 1022E (v5TE) + .long 0xff0ffff0 + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + initfn __arm1022_setup, __arm1022_proc_info + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_EDSP + .long cpu_arm1022_name + .long arm1022_processor_functions + .long v4wbi_tlb_fns + .long v4wb_user_fns + .long arm1022_cache_fns + .size __arm1022_proc_info, . - __arm1022_proc_info diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S new file mode 100644 index 0000000000..7fdd1a205e --- /dev/null +++ b/arch/arm/mm/proc-arm1026.S @@ -0,0 +1,463 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * linux/arch/arm/mm/proc-arm1026.S: MMU functions for ARM1026EJ-S + * + * Copyright (C) 2000 ARM Limited + * Copyright (C) 2000 Deep Blue Solutions Ltd. + * hacked for non-paged-MM by Hyok S. Choi, 2003. + * + * These are the low level assembler for performing cache and TLB + * functions on the ARM1026EJ-S. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <linux/pgtable.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/ptrace.h> + +#include "proc-macros.S" + +/* + * This is the maximum size of an area which will be invalidated + * using the single invalidate entry instructions. Anything larger + * than this, and we go for the whole cache. + * + * This value should be chosen such that we choose the cheapest + * alternative. + */ +#define MAX_AREA_SIZE 32768 + +/* + * The size of one data cache line. + */ +#define CACHE_DLINESIZE 32 + +/* + * The number of data cache segments. + */ +#define CACHE_DSEGMENTS 16 + +/* + * The number of lines in a cache segment. + */ +#define CACHE_DENTRIES 64 + +/* + * This is the size at which it becomes more efficient to + * clean the whole cache, rather than using the individual + * cache line maintenance instructions. + */ +#define CACHE_DLIMIT 32768 + + .text +/* + * cpu_arm1026_proc_init() + */ +ENTRY(cpu_arm1026_proc_init) + ret lr + +/* + * cpu_arm1026_proc_fin() + */ +ENTRY(cpu_arm1026_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + ret lr + +/* + * cpu_arm1026_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_arm1026_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + ret r0 +ENDPROC(cpu_arm1026_reset) + .popsection + +/* + * cpu_arm1026_do_idle() + */ + .align 5 +ENTRY(cpu_arm1026_do_idle) + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + ret lr + +/* ================================= CACHE ================================ */ + + .align 5 + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(arm1026_flush_icache_all) +#ifndef CONFIG_CPU_ICACHE_DISABLE + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache +#endif + ret lr +ENDPROC(arm1026_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Invalidate all cache entries in a particular address + * space. + */ +ENTRY(arm1026_flush_user_cache_all) + /* FALLTHROUGH */ +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(arm1026_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: +#ifndef CONFIG_CPU_DCACHE_DISABLE +1: mrc p15, 0, APSR_nzcv, c7, c14, 3 @ test, clean, invalidate + bne 1b +#endif + tst r2, #VM_EXEC +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache +#endif + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags for this space + */ +ENTRY(arm1026_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bhs __flush_whole_cache + +#ifndef CONFIG_CPU_DCACHE_DISABLE +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + tst r2, #VM_EXEC +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache +#endif + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1026_coherent_kern_range) + /* FALLTHROUGH */ +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1026_coherent_user_range) + mov ip, #0 + bic r0, r0, #CACHE_DLINESIZE - 1 +1: +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, r0, c7, c10, 1 @ clean D entry +#endif +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry +#endif + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mov r0, #0 + ret lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + */ +ENTRY(arm1026_flush_kern_dcache_area) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + add r1, r0, r1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +arm1026_dma_inv_range: + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + tst r0, #CACHE_DLINESIZE - 1 + bic r0, r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +arm1026_dma_clean_range: + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm1026_dma_flush_range) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm1026_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm1026_dma_clean_range + bcs arm1026_dma_inv_range + b arm1026_dma_flush_range +ENDPROC(arm1026_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm1026_dma_unmap_area) + ret lr +ENDPROC(arm1026_dma_unmap_area) + + .globl arm1026_flush_kern_cache_louis + .equ arm1026_flush_kern_cache_louis, arm1026_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm1026 + + .align 5 +ENTRY(cpu_arm1026_dcache_clean_area) +#ifndef CONFIG_CPU_DCACHE_DISABLE + mov ip, #0 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b +#endif + ret lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_arm1026_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_arm1026_switch_mm) +#ifdef CONFIG_MMU + mov r1, #0 +#ifndef CONFIG_CPU_DCACHE_DISABLE +1: mrc p15, 0, APSR_nzcv, c7, c14, 3 @ test, clean, invalidate + bne 1b +#endif +#ifndef CONFIG_CPU_ICACHE_DISABLE + mcr p15, 0, r1, c7, c5, 0 @ invalidate I cache +#endif + mcr p15, 0, r1, c7, c10, 4 @ drain WB + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs +#endif + ret lr + +/* + * cpu_arm1026_set_pte_ext(ptep, pte, ext) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_arm1026_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext + mov r0, r0 +#ifndef CONFIG_CPU_DCACHE_DISABLE + mcr p15, 0, r0, c7, c10, 1 @ clean D entry +#endif +#endif /* CONFIG_MMU */ + ret lr + + .type __arm1026_setup, #function +__arm1026_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 + mcr p15, 0, r4, c2, c0 @ load page table pointer +#endif +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mov r0, #4 @ explicitly disable writeback + mcr p15, 7, r0, c15, c0, 0 +#endif + adr r5, arm1026_crval + ldmia r5, {r5, r6} + mrc p15, 0, r0, c1, c0 @ get control register v4 + bic r0, r0, r5 + orr r0, r0, r6 +#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN + orr r0, r0, #0x4000 @ .R.. .... .... .... +#endif + ret lr + .size __arm1026_setup, . - __arm1026_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * .011 1001 ..11 0101 + * + */ + .type arm1026_crval, #object +arm1026_crval: + crval clear=0x00007f3f, mmuset=0x00003935, ucset=0x00001934 + + __INITDATA + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm1026, dabort=v5t_early_abort, pabort=legacy_pabort + + .section .rodata + + string cpu_arch_name, "armv5tej" + string cpu_elf_name, "v5" + .align + string cpu_arm1026_name, "ARM1026EJ-S" + .align + + .section ".proc.info.init", "a" + + .type __arm1026_proc_info,#object +__arm1026_proc_info: + .long 0x4106a260 @ ARM 1026EJ-S (v5TEJ) + .long 0xff0ffff0 + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + initfn __arm1026_setup, __arm1026_proc_info + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA + .long cpu_arm1026_name + .long arm1026_processor_functions + .long v4wbi_tlb_fns + .long v4wb_user_fns + .long arm1026_cache_fns + .size __arm1026_proc_info, . - __arm1026_proc_info diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S new file mode 100644 index 0000000000..3b687e6dd9 --- /dev/null +++ b/arch/arm/mm/proc-arm720.S @@ -0,0 +1,205 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * linux/arch/arm/mm/proc-arm720.S: MMU functions for ARM720 + * + * Copyright (C) 2000 Steve Hill (sjhill@cotw.com) + * Rob Scott (rscott@mtrob.fdns.net) + * Copyright (C) 2000 ARM Limited, Deep Blue Solutions Ltd. + * hacked for non-paged-MM by Hyok S. Choi, 2004. + * + * These are the low level assembler for performing cache and TLB + * functions on the ARM720T. The ARM720T has a writethrough IDC + * cache, so we don't need to clean it. + * + * Changelog: + * 05-09-2000 SJH Created by moving 720 specific functions + * out of 'proc-arm6,7.S' per RMK discussion + * 07-25-2000 SJH Added idle function. + * 08-25-2000 DBS Updated for integration of ARM Ltd version. + * 04-20-2004 HSC modified for non-paged memory management mode. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <linux/pgtable.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/ptrace.h> + +#include "proc-macros.S" + +/* + * Function: arm720_proc_init (void) + * : arm720_proc_fin (void) + * + * Notes : This processor does not require these + */ +ENTRY(cpu_arm720_dcache_clean_area) +ENTRY(cpu_arm720_proc_init) + ret lr + +ENTRY(cpu_arm720_proc_fin) + mrc p15, 0, r0, c1, c0, 0 + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + ret lr + +/* + * Function: arm720_proc_do_idle(void) + * Params : r0 = unused + * Purpose : put the processor in proper idle mode + */ +ENTRY(cpu_arm720_do_idle) + ret lr + +/* + * Function: arm720_switch_mm(unsigned long pgd_phys) + * Params : pgd_phys Physical address of page table + * Purpose : Perform a task switch, saving the old process' state and restoring + * the new. + */ +ENTRY(cpu_arm720_switch_mm) +#ifdef CONFIG_MMU + mov r1, #0 + mcr p15, 0, r1, c7, c7, 0 @ invalidate cache + mcr p15, 0, r0, c2, c0, 0 @ update page table ptr + mcr p15, 0, r1, c8, c7, 0 @ flush TLB (v4) +#endif + ret lr + +/* + * Function: arm720_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext) + * Params : r0 = Address to set + * : r1 = value to set + * Purpose : Set a PTE and flush it out of any WB cache + */ + .align 5 +ENTRY(cpu_arm720_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext wc_disable=0 +#endif + ret lr + +/* + * Function: arm720_reset + * Params : r0 = address to jump to + * Notes : This sets up everything for a reset + */ + .pushsection .idmap.text, "ax" +ENTRY(cpu_arm720_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate cache +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ flush TLB (v4) +#endif + mrc p15, 0, ip, c1, c0, 0 @ get ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x2100 @ ..v....s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + ret r0 +ENDPROC(cpu_arm720_reset) + .popsection + + .type __arm710_setup, #function +__arm710_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7, 0 @ invalidate caches +#ifdef CONFIG_MMU + mcr p15, 0, r0, c8, c7, 0 @ flush TLB (v4) +#endif + mrc p15, 0, r0, c1, c0 @ get control register + ldr r5, arm710_cr1_clear + bic r0, r0, r5 + ldr r5, arm710_cr1_set + orr r0, r0, r5 + ret lr @ __ret (head.S) + .size __arm710_setup, . - __arm710_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * .... 0001 ..11 1101 + * + */ + .type arm710_cr1_clear, #object + .type arm710_cr1_set, #object +arm710_cr1_clear: + .word 0x0f3f +arm710_cr1_set: + .word 0x013d + + .type __arm720_setup, #function +__arm720_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7, 0 @ invalidate caches +#ifdef CONFIG_MMU + mcr p15, 0, r0, c8, c7, 0 @ flush TLB (v4) +#endif + adr r5, arm720_crval + ldmia r5, {r5, r6} + mrc p15, 0, r0, c1, c0 @ get control register + bic r0, r0, r5 + orr r0, r0, r6 + ret lr @ __ret (head.S) + .size __arm720_setup, . - __arm720_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * ..1. 1001 ..11 1101 + * + */ + .type arm720_crval, #object +arm720_crval: + crval clear=0x00002f3f, mmuset=0x0000213d, ucset=0x00000130 + + __INITDATA + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm720, dabort=v4t_late_abort, pabort=legacy_pabort + + .section ".rodata" + + string cpu_arch_name, "armv4t" + string cpu_elf_name, "v4" + string cpu_arm710_name, "ARM710T" + string cpu_arm720_name, "ARM720T" + + .align + +/* + * See <asm/procinfo.h> for a definition of this structure. + */ + + .section ".proc.info.init", "a" + +.macro arm720_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cpu_flush:req + .type __\name\()_proc_info,#object +__\name\()_proc_info: + .long \cpu_val + .long \cpu_mask + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + initfn \cpu_flush, __\name\()_proc_info @ cpu_flush + .long cpu_arch_name @ arch_name + .long cpu_elf_name @ elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB @ elf_hwcap + .long \cpu_name + .long arm720_processor_functions + .long v4_tlb_fns + .long v4wt_user_fns + .long v4_cache_fns + .size __\name\()_proc_info, . - __\name\()_proc_info +.endm + + arm720_proc_info arm710, 0x41807100, 0xffffff00, cpu_arm710_name, __arm710_setup + arm720_proc_info arm720, 0x41807200, 0xffffff00, cpu_arm720_name, __arm720_setup diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S new file mode 100644 index 0000000000..f2ec3bc608 --- /dev/null +++ b/arch/arm/mm/proc-arm740.S @@ -0,0 +1,147 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/mm/arm740.S: utility functions for ARM740 + * + * Copyright (C) 2004-2006 Hyok S. Choi (hyok.choi@samsung.com) + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <linux/pgtable.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/ptrace.h> + +#include "proc-macros.S" + + .text +/* + * cpu_arm740_proc_init() + * cpu_arm740_do_idle() + * cpu_arm740_dcache_clean_area() + * cpu_arm740_switch_mm() + * + * These are not required. + */ +ENTRY(cpu_arm740_proc_init) +ENTRY(cpu_arm740_do_idle) +ENTRY(cpu_arm740_dcache_clean_area) +ENTRY(cpu_arm740_switch_mm) + ret lr + +/* + * cpu_arm740_proc_fin() + */ +ENTRY(cpu_arm740_proc_fin) + mrc p15, 0, r0, c1, c0, 0 + bic r0, r0, #0x3f000000 @ bank/f/lock/s + bic r0, r0, #0x0000000c @ w-buffer/cache + mcr p15, 0, r0, c1, c0, 0 @ disable caches + ret lr + +/* + * cpu_arm740_reset(loc) + * Params : r0 = address to jump to + * Notes : This sets up everything for a reset + */ + .pushsection .idmap.text, "ax" +ENTRY(cpu_arm740_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c0, 0 @ invalidate cache + mrc p15, 0, ip, c1, c0, 0 @ get ctrl register + bic ip, ip, #0x0000000c @ ............wc.. + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + ret r0 +ENDPROC(cpu_arm740_reset) + .popsection + + .type __arm740_setup, #function +__arm740_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c0, 0 @ invalidate caches + + mcr p15, 0, r0, c6, c3 @ disable area 3~7 + mcr p15, 0, r0, c6, c4 + mcr p15, 0, r0, c6, c5 + mcr p15, 0, r0, c6, c6 + mcr p15, 0, r0, c6, c7 + + mov r0, #0x0000003F @ base = 0, size = 4GB + mcr p15, 0, r0, c6, c0 @ set area 0, default + + ldr r0, =(CONFIG_DRAM_BASE & 0xFFFFF000) @ base[31:12] of RAM + ldr r3, =(CONFIG_DRAM_SIZE >> 12) @ size of RAM (must be >= 4KB) + mov r4, #10 @ 11 is the minimum (4KB) +1: add r4, r4, #1 @ area size *= 2 + movs r3, r3, lsr #1 + bne 1b @ count not zero r-shift + orr r0, r0, r4, lsl #1 @ the area register value + orr r0, r0, #1 @ set enable bit + mcr p15, 0, r0, c6, c1 @ set area 1, RAM + + ldr r0, =(CONFIG_FLASH_MEM_BASE & 0xFFFFF000) @ base[31:12] of FLASH + ldr r3, =(CONFIG_FLASH_SIZE >> 12) @ size of FLASH (must be >= 4KB) + cmp r3, #0 + moveq r0, #0 + beq 2f + mov r4, #10 @ 11 is the minimum (4KB) +1: add r4, r4, #1 @ area size *= 2 + movs r3, r3, lsr #1 + bne 1b @ count not zero r-shift + orr r0, r0, r4, lsl #1 @ the area register value + orr r0, r0, #1 @ set enable bit +2: mcr p15, 0, r0, c6, c2 @ set area 2, ROM/FLASH + + mov r0, #0x06 + mcr p15, 0, r0, c2, c0 @ Region 1&2 cacheable +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mov r0, #0x00 @ disable whole write buffer +#else + mov r0, #0x02 @ Region 1 write bufferred +#endif + mcr p15, 0, r0, c3, c0 + + mov r0, #0x10000 + sub r0, r0, #1 @ r0 = 0xffff + mcr p15, 0, r0, c5, c0 @ all read/write access + + mrc p15, 0, r0, c1, c0 @ get control register + bic r0, r0, #0x3F000000 @ set to standard caching mode + @ need some benchmark + orr r0, r0, #0x0000000d @ MPU/Cache/WB + + ret lr + + .size __arm740_setup, . - __arm740_setup + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm740, dabort=v4t_late_abort, pabort=legacy_pabort, nommu=1 + + .section ".rodata" + + string cpu_arch_name, "armv4" + string cpu_elf_name, "v4" + string cpu_arm740_name, "ARM740T" + + .align + + .section ".proc.info.init", "a" + .type __arm740_proc_info,#object +__arm740_proc_info: + .long 0x41807400 + .long 0xfffffff0 + .long 0 + .long 0 + initfn __arm740_setup, __arm740_proc_info + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_26BIT + .long cpu_arm740_name + .long arm740_processor_functions + .long 0 + .long 0 + .long v4_cache_fns @ cache model + .size __arm740_proc_info, . - __arm740_proc_info diff --git a/arch/arm/mm/proc-arm7tdmi.S b/arch/arm/mm/proc-arm7tdmi.S new file mode 100644 index 0000000000..01bbe7576c --- /dev/null +++ b/arch/arm/mm/proc-arm7tdmi.S @@ -0,0 +1,110 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/mm/proc-arm7tdmi.S: utility functions for ARM7TDMI + * + * Copyright (C) 2003-2006 Hyok S. Choi <hyok.choi@samsung.com> + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <linux/pgtable.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/ptrace.h> + +#include "proc-macros.S" + + .text +/* + * cpu_arm7tdmi_proc_init() + * cpu_arm7tdmi_do_idle() + * cpu_arm7tdmi_dcache_clean_area() + * cpu_arm7tdmi_switch_mm() + * + * These are not required. + */ +ENTRY(cpu_arm7tdmi_proc_init) +ENTRY(cpu_arm7tdmi_do_idle) +ENTRY(cpu_arm7tdmi_dcache_clean_area) +ENTRY(cpu_arm7tdmi_switch_mm) + ret lr + +/* + * cpu_arm7tdmi_proc_fin() + */ +ENTRY(cpu_arm7tdmi_proc_fin) + ret lr + +/* + * Function: cpu_arm7tdmi_reset(loc) + * Params : loc(r0) address to jump to + * Purpose : Sets up everything for a reset and jump to the location for soft reset. + */ + .pushsection .idmap.text, "ax" +ENTRY(cpu_arm7tdmi_reset) + ret r0 +ENDPROC(cpu_arm7tdmi_reset) + .popsection + + .type __arm7tdmi_setup, #function +__arm7tdmi_setup: + ret lr + .size __arm7tdmi_setup, . - __arm7tdmi_setup + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm7tdmi, dabort=v4t_late_abort, pabort=legacy_pabort, nommu=1 + + .section ".rodata" + + string cpu_arch_name, "armv4t" + string cpu_elf_name, "v4" + string cpu_arm7tdmi_name, "ARM7TDMI" + string cpu_triscenda7_name, "Triscend-A7x" + string cpu_at91_name, "Atmel-AT91M40xxx" + string cpu_s3c3410_name, "Samsung-S3C3410" + string cpu_s3c44b0x_name, "Samsung-S3C44B0x" + string cpu_s3c4510b_name, "Samsung-S3C4510B" + string cpu_s3c4530_name, "Samsung-S3C4530" + string cpu_netarm_name, "NETARM" + + .align + + .section ".proc.info.init", "a" + +.macro arm7tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, \ + extra_hwcaps=0 + .type __\name\()_proc_info, #object +__\name\()_proc_info: + .long \cpu_val + .long \cpu_mask + .long 0 + .long 0 + initfn __arm7tdmi_setup, __\name\()_proc_info + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_26BIT | ( \extra_hwcaps ) + .long \cpu_name + .long arm7tdmi_processor_functions + .long 0 + .long 0 + .long v4_cache_fns + .size __\name\()_proc_info, . - __\name\()_proc_info +.endm + + arm7tdmi_proc_info arm7tdmi, 0x41007700, 0xfff8ff00, \ + cpu_arm7tdmi_name + arm7tdmi_proc_info triscenda7, 0x0001d2ff, 0x0001ffff, \ + cpu_triscenda7_name, extra_hwcaps=HWCAP_THUMB + arm7tdmi_proc_info at91, 0x14000040, 0xfff000e0, \ + cpu_at91_name, extra_hwcaps=HWCAP_THUMB + arm7tdmi_proc_info s3c4510b, 0x36365000, 0xfffff000, \ + cpu_s3c4510b_name, extra_hwcaps=HWCAP_THUMB + arm7tdmi_proc_info s3c4530, 0x4c000000, 0xfff000e0, \ + cpu_s3c4530_name, extra_hwcaps=HWCAP_THUMB + arm7tdmi_proc_info s3c3410, 0x34100000, 0xffff0000, \ + cpu_s3c3410_name, extra_hwcaps=HWCAP_THUMB + arm7tdmi_proc_info s3c44b0x, 0x44b00000, 0xffff0000, \ + cpu_s3c44b0x_name, extra_hwcaps=HWCAP_THUMB diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S new file mode 100644 index 0000000000..a234cd8ba5 --- /dev/null +++ b/arch/arm/mm/proc-arm920.S @@ -0,0 +1,466 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * linux/arch/arm/mm/proc-arm920.S: MMU functions for ARM920 + * + * Copyright (C) 1999,2000 ARM Limited + * Copyright (C) 2000 Deep Blue Solutions Ltd. + * hacked for non-paged-MM by Hyok S. Choi, 2003. + * + * These are the low level assembler for performing cache and TLB + * functions on the arm920. + * + * CONFIG_CPU_ARM920_CPU_IDLE -> nohlt + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <linux/pgtable.h> +#include <asm/assembler.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include "proc-macros.S" + +/* + * The size of one data cache line. + */ +#define CACHE_DLINESIZE 32 + +/* + * The number of data cache segments. + */ +#define CACHE_DSEGMENTS 8 + +/* + * The number of lines in a cache segment. + */ +#define CACHE_DENTRIES 64 + +/* + * This is the size at which it becomes more efficient to + * clean the whole cache, rather than using the individual + * cache line maintenance instructions. + */ +#define CACHE_DLIMIT 65536 + + + .text +/* + * cpu_arm920_proc_init() + */ +ENTRY(cpu_arm920_proc_init) + ret lr + +/* + * cpu_arm920_proc_fin() + */ +ENTRY(cpu_arm920_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + ret lr + +/* + * cpu_arm920_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_arm920_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + ret r0 +ENDPROC(cpu_arm920_reset) + .popsection + +/* + * cpu_arm920_do_idle() + */ + .align 5 +ENTRY(cpu_arm920_do_idle) + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + ret lr + + +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(arm920_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + ret lr +ENDPROC(arm920_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Invalidate all cache entries in a particular address + * space. + */ +ENTRY(arm920_flush_user_cache_all) + /* FALLTHROUGH */ + +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(arm920_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: + mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 8 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c14, 2 @ clean+invalidate D index + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 5 + bcs 1b @ segments 7 to 0 + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags for address space + */ +ENTRY(arm920_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bhs __flush_whole_cache + +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + tst r2, #VM_EXEC + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm920_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm920_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov r0, #0 + ret lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + */ +ENTRY(arm920_flush_kern_dcache_area) + add r1, r0, r1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +arm920_dma_inv_range: + tst r0, #CACHE_DLINESIZE - 1 + bic r0, r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +arm920_dma_clean_range: + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm920_dma_flush_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm920_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm920_dma_clean_range + bcs arm920_dma_inv_range + b arm920_dma_flush_range +ENDPROC(arm920_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm920_dma_unmap_area) + ret lr +ENDPROC(arm920_dma_unmap_area) + + .globl arm920_flush_kern_cache_louis + .equ arm920_flush_kern_cache_louis, arm920_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm920 +#endif + + +ENTRY(cpu_arm920_dcache_clean_area) +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b + ret lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_arm920_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_arm920_switch_mm) +#ifdef CONFIG_MMU + mov ip, #0 +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache +#else +@ && 'Clean & Invalidate whole DCache' +@ && Re-written to use Index Ops. +@ && Uses registers r1, r3 and ip + + mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 8 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c14, 2 @ clean & invalidate D index + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 5 + bcs 1b @ segments 7 to 0 +#endif + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + ret lr + +/* + * cpu_arm920_set_pte(ptep, pte, ext) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_arm920_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext + mov r0, r0 + mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif + ret lr + +/* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */ +.globl cpu_arm920_suspend_size +.equ cpu_arm920_suspend_size, 4 * 3 +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_arm920_do_suspend) + stmfd sp!, {r4 - r6, lr} + mrc p15, 0, r4, c13, c0, 0 @ PID + mrc p15, 0, r5, c3, c0, 0 @ Domain ID + mrc p15, 0, r6, c1, c0, 0 @ Control register + stmia r0, {r4 - r6} + ldmfd sp!, {r4 - r6, pc} +ENDPROC(cpu_arm920_do_suspend) + +ENTRY(cpu_arm920_do_resume) + mov ip, #0 + mcr p15, 0, ip, c8, c7, 0 @ invalidate I+D TLBs + mcr p15, 0, ip, c7, c7, 0 @ invalidate I+D caches + ldmia r0, {r4 - r6} + mcr p15, 0, r4, c13, c0, 0 @ PID + mcr p15, 0, r5, c3, c0, 0 @ Domain ID + mcr p15, 0, r1, c2, c0, 0 @ TTB address + mov r0, r6 @ control register + b cpu_resume_mmu +ENDPROC(cpu_arm920_do_resume) +#endif + + .type __arm920_setup, #function +__arm920_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 +#endif + adr r5, arm920_crval + ldmia r5, {r5, r6} + mrc p15, 0, r0, c1, c0 @ get control register v4 + bic r0, r0, r5 + orr r0, r0, r6 + ret lr + .size __arm920_setup, . - __arm920_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * ..11 0001 ..11 0101 + * + */ + .type arm920_crval, #object +arm920_crval: + crval clear=0x00003f3f, mmuset=0x00003135, ucset=0x00001130 + + __INITDATA + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm920, dabort=v4t_early_abort, pabort=legacy_pabort, suspend=1 + + .section ".rodata" + + string cpu_arch_name, "armv4t" + string cpu_elf_name, "v4" + string cpu_arm920_name, "ARM920T" + + .align + + .section ".proc.info.init", "a" + + .type __arm920_proc_info,#object +__arm920_proc_info: + .long 0x41009200 + .long 0xff00fff0 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + initfn __arm920_setup, __arm920_proc_info + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB + .long cpu_arm920_name + .long arm920_processor_functions + .long v4wbi_tlb_fns + .long v4wb_user_fns +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + .long arm920_cache_fns +#else + .long v4wt_cache_fns +#endif + .size __arm920_proc_info, . - __arm920_proc_info diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S new file mode 100644 index 0000000000..53c029dcfd --- /dev/null +++ b/arch/arm/mm/proc-arm922.S @@ -0,0 +1,444 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * linux/arch/arm/mm/proc-arm922.S: MMU functions for ARM922 + * + * Copyright (C) 1999,2000 ARM Limited + * Copyright (C) 2000 Deep Blue Solutions Ltd. + * Copyright (C) 2001 Altera Corporation + * hacked for non-paged-MM by Hyok S. Choi, 2003. + * + * These are the low level assembler for performing cache and TLB + * functions on the arm922. + * + * CONFIG_CPU_ARM922_CPU_IDLE -> nohlt + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <linux/pgtable.h> +#include <asm/assembler.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include "proc-macros.S" + +/* + * The size of one data cache line. + */ +#define CACHE_DLINESIZE 32 + +/* + * The number of data cache segments. + */ +#define CACHE_DSEGMENTS 4 + +/* + * The number of lines in a cache segment. + */ +#define CACHE_DENTRIES 64 + +/* + * This is the size at which it becomes more efficient to + * clean the whole cache, rather than using the individual + * cache line maintenance instructions. (I think this should + * be 32768). + */ +#define CACHE_DLIMIT 8192 + + + .text +/* + * cpu_arm922_proc_init() + */ +ENTRY(cpu_arm922_proc_init) + ret lr + +/* + * cpu_arm922_proc_fin() + */ +ENTRY(cpu_arm922_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + ret lr + +/* + * cpu_arm922_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_arm922_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + ret r0 +ENDPROC(cpu_arm922_reset) + .popsection + +/* + * cpu_arm922_do_idle() + */ + .align 5 +ENTRY(cpu_arm922_do_idle) + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + ret lr + + +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(arm922_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + ret lr +ENDPROC(arm922_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Clean and invalidate all cache entries in a particular + * address space. + */ +ENTRY(arm922_flush_user_cache_all) + /* FALLTHROUGH */ + +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(arm922_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: + mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 8 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c14, 2 @ clean+invalidate D index + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 5 + bcs 1b @ segments 7 to 0 + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Clean and invalidate a range of cache entries in the + * specified address range. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags describing address space + */ +ENTRY(arm922_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bhs __flush_whole_cache + +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + tst r2, #VM_EXEC + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm922_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm922_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov r0, #0 + ret lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + */ +ENTRY(arm922_flush_kern_dcache_area) + add r1, r0, r1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +arm922_dma_inv_range: + tst r0, #CACHE_DLINESIZE - 1 + bic r0, r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +arm922_dma_clean_range: + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm922_dma_flush_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm922_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm922_dma_clean_range + bcs arm922_dma_inv_range + b arm922_dma_flush_range +ENDPROC(arm922_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm922_dma_unmap_area) + ret lr +ENDPROC(arm922_dma_unmap_area) + + .globl arm922_flush_kern_cache_louis + .equ arm922_flush_kern_cache_louis, arm922_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm922 +#endif + + +ENTRY(cpu_arm922_dcache_clean_area) +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b +#endif + ret lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_arm922_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_arm922_switch_mm) +#ifdef CONFIG_MMU + mov ip, #0 +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache +#else +@ && 'Clean & Invalidate whole DCache' +@ && Re-written to use Index Ops. +@ && Uses registers r1, r3 and ip + + mov r1, #(CACHE_DSEGMENTS - 1) << 5 @ 4 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c14, 2 @ clean & invalidate D index + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 5 + bcs 1b @ segments 7 to 0 +#endif + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + ret lr + +/* + * cpu_arm922_set_pte_ext(ptep, pte, ext) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_arm922_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext + mov r0, r0 + mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif /* CONFIG_MMU */ + ret lr + + .type __arm922_setup, #function +__arm922_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 +#endif + adr r5, arm922_crval + ldmia r5, {r5, r6} + mrc p15, 0, r0, c1, c0 @ get control register v4 + bic r0, r0, r5 + orr r0, r0, r6 + ret lr + .size __arm922_setup, . - __arm922_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * ..11 0001 ..11 0101 + * + */ + .type arm922_crval, #object +arm922_crval: + crval clear=0x00003f3f, mmuset=0x00003135, ucset=0x00001130 + + __INITDATA + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm922, dabort=v4t_early_abort, pabort=legacy_pabort + + .section ".rodata" + + string cpu_arch_name, "armv4t" + string cpu_elf_name, "v4" + string cpu_arm922_name, "ARM922T" + + .align + + .section ".proc.info.init", "a" + + .type __arm922_proc_info,#object +__arm922_proc_info: + .long 0x41009220 + .long 0xff00fff0 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + initfn __arm922_setup, __arm922_proc_info + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB + .long cpu_arm922_name + .long arm922_processor_functions + .long v4wbi_tlb_fns + .long v4wb_user_fns +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + .long arm922_cache_fns +#else + .long v4wt_cache_fns +#endif + .size __arm922_proc_info, . - __arm922_proc_info diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S new file mode 100644 index 0000000000..0bfad62ea8 --- /dev/null +++ b/arch/arm/mm/proc-arm925.S @@ -0,0 +1,509 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * linux/arch/arm/mm/arm925.S: MMU functions for ARM925 + * + * Copyright (C) 1999,2000 ARM Limited + * Copyright (C) 2000 Deep Blue Solutions Ltd. + * Copyright (C) 2002 RidgeRun, Inc. + * Copyright (C) 2002-2003 MontaVista Software, Inc. + * + * Update for Linux-2.6 and cache flush improvements + * Copyright (C) 2004 Nokia Corporation by Tony Lindgren <tony@atomide.com> + * + * hacked for non-paged-MM by Hyok S. Choi, 2004. + * + * These are the low level assembler for performing cache and TLB + * functions on the arm925. + * + * CONFIG_CPU_ARM925_CPU_IDLE -> nohlt + * + * Some additional notes based on deciphering the TI TRM on OMAP-5910: + * + * NOTE1: The TI925T Configuration Register bit "D-cache clean and flush + * entry mode" must be 0 to flush the entries in both segments + * at once. This is the default value. See TRM 2-20 and 2-24 for + * more information. + * + * NOTE2: Default is the "D-cache clean and flush entry mode". It looks + * like the "Transparent mode" must be on for partial cache flushes + * to work in this mode. This mode only works with 16-bit external + * memory. See TRM 2-24 for more information. + * + * NOTE3: Write-back cache flushing seems to be flakey with devices using + * direct memory access, such as USB OHCI. The workaround is to use + * write-through cache with CONFIG_CPU_DCACHE_WRITETHROUGH (this is + * the default for OMAP-1510). + */ + +#include <linux/linkage.h> +#include <linux/init.h> +#include <linux/pgtable.h> +#include <asm/assembler.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include "proc-macros.S" + +/* + * The size of one data cache line. + */ +#define CACHE_DLINESIZE 16 + +/* + * The number of data cache segments. + */ +#define CACHE_DSEGMENTS 2 + +/* + * The number of lines in a cache segment. + */ +#define CACHE_DENTRIES 256 + +/* + * This is the size at which it becomes more efficient to + * clean the whole cache, rather than using the individual + * cache line maintenance instructions. + */ +#define CACHE_DLIMIT 8192 + + .text +/* + * cpu_arm925_proc_init() + */ +ENTRY(cpu_arm925_proc_init) + ret lr + +/* + * cpu_arm925_proc_fin() + */ +ENTRY(cpu_arm925_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + ret lr + +/* + * cpu_arm925_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_arm925_reset) + /* Send software reset to MPU and DSP */ + mov ip, #0xff000000 + orr ip, ip, #0x00fe0000 + orr ip, ip, #0x0000ce00 + mov r4, #1 + strh r4, [ip, #0x10] +ENDPROC(cpu_arm925_reset) + .popsection + + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + ret r0 + +/* + * cpu_arm925_do_idle() + * + * Called with IRQs disabled + */ + .align 10 +ENTRY(cpu_arm925_do_idle) + mov r0, #0 + mrc p15, 0, r1, c1, c0, 0 @ Read control register + mcr p15, 0, r0, c7, c10, 4 @ Drain write buffer + bic r2, r1, #1 << 12 + mcr p15, 0, r2, c1, c0, 0 @ Disable I cache + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + mcr p15, 0, r1, c1, c0, 0 @ Restore ICache enable + ret lr + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(arm925_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + ret lr +ENDPROC(arm925_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Clean and invalidate all cache entries in a particular + * address space. + */ +ENTRY(arm925_flush_user_cache_all) + /* FALLTHROUGH */ + +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(arm925_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache +#else + /* Flush entries in both segments at once, see NOTE1 above */ + mov r3, #(CACHE_DENTRIES - 1) << 4 @ 256 entries in segment +2: mcr p15, 0, r3, c7, c14, 2 @ clean+invalidate D index + subs r3, r3, #1 << 4 + bcs 2b @ entries 255 to 0 +#endif + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Clean and invalidate a range of cache entries in the + * specified address range. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags describing address space + */ +ENTRY(arm925_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bgt __flush_whole_cache +1: tst r2, #VM_EXEC +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE +#else + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE +#endif + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm925_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm925_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov r0, #0 + ret lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + */ +ENTRY(arm925_flush_kern_dcache_area) + add r1, r0, r1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +arm925_dma_inv_range: +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + tst r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry +#endif + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +arm925_dma_clean_range: +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm925_dma_flush_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry +#else + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry +#endif + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm925_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm925_dma_clean_range + bcs arm925_dma_inv_range + b arm925_dma_flush_range +ENDPROC(arm925_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm925_dma_unmap_area) + ret lr +ENDPROC(arm925_dma_unmap_area) + + .globl arm925_flush_kern_cache_louis + .equ arm925_flush_kern_cache_louis, arm925_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm925 + +ENTRY(cpu_arm925_dcache_clean_area) +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_arm925_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_arm925_switch_mm) +#ifdef CONFIG_MMU + mov ip, #0 +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache +#else + /* Flush entries in bothe segments at once, see NOTE1 above */ + mov r3, #(CACHE_DENTRIES - 1) << 4 @ 256 entries in segment +2: mcr p15, 0, r3, c7, c14, 2 @ clean & invalidate D index + subs r3, r3, #1 << 4 + bcs 2b @ entries 255 to 0 +#endif + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + ret lr + +/* + * cpu_arm925_set_pte_ext(ptep, pte, ext) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_arm925_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext + mov r0, r0 +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, r0, c7, c10, 1 @ clean D entry +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif /* CONFIG_MMU */ + ret lr + + .type __arm925_setup, #function +__arm925_setup: + mov r0, #0 + + /* Transparent on, D-cache clean & flush mode. See NOTE2 above */ + orr r0,r0,#1 << 1 @ transparent mode on + mcr p15, 0, r0, c15, c1, 0 @ write TI config register + + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 +#endif + +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mov r0, #4 @ disable write-back on caches explicitly + mcr p15, 7, r0, c15, c0, 0 +#endif + + adr r5, arm925_crval + ldmia r5, {r5, r6} + mrc p15, 0, r0, c1, c0 @ get control register v4 + bic r0, r0, r5 + orr r0, r0, r6 +#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN + orr r0, r0, #0x4000 @ .1.. .... .... .... +#endif + ret lr + .size __arm925_setup, . - __arm925_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * .011 0001 ..11 1101 + * + */ + .type arm925_crval, #object +arm925_crval: + crval clear=0x00007f3f, mmuset=0x0000313d, ucset=0x00001130 + + __INITDATA + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm925, dabort=v4t_early_abort, pabort=legacy_pabort + + .section ".rodata" + + string cpu_arch_name, "armv4t" + string cpu_elf_name, "v4" + string cpu_arm925_name, "ARM925T" + + .align + + .section ".proc.info.init", "a" + +.macro arm925_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache + .type __\name\()_proc_info,#object +__\name\()_proc_info: + .long \cpu_val + .long \cpu_mask + .long PMD_TYPE_SECT | \ + PMD_SECT_CACHEABLE | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + initfn __arm925_setup, __\name\()_proc_info + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB + .long cpu_arm925_name + .long arm925_processor_functions + .long v4wbi_tlb_fns + .long v4wb_user_fns + .long arm925_cache_fns + .size __\name\()_proc_info, . - __\name\()_proc_info +.endm + + arm925_proc_info arm925, 0x54029250, 0xfffffff0, cpu_arm925_name + arm925_proc_info arm915, 0x54029150, 0xfffffff0, cpu_arm925_name diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S new file mode 100644 index 0000000000..0487a2c343 --- /dev/null +++ b/arch/arm/mm/proc-arm926.S @@ -0,0 +1,488 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * linux/arch/arm/mm/proc-arm926.S: MMU functions for ARM926EJ-S + * + * Copyright (C) 1999-2001 ARM Limited + * Copyright (C) 2000 Deep Blue Solutions Ltd. + * hacked for non-paged-MM by Hyok S. Choi, 2003. + * + * These are the low level assembler for performing cache and TLB + * functions on the arm926. + * + * CONFIG_CPU_ARM926_CPU_IDLE -> nohlt + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <linux/pgtable.h> +#include <asm/assembler.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include "proc-macros.S" + +/* + * This is the maximum size of an area which will be invalidated + * using the single invalidate entry instructions. Anything larger + * than this, and we go for the whole cache. + * + * This value should be chosen such that we choose the cheapest + * alternative. + */ +#define CACHE_DLIMIT 16384 + +/* + * the cache line size of the I and D cache + */ +#define CACHE_DLINESIZE 32 + + .text +/* + * cpu_arm926_proc_init() + */ +ENTRY(cpu_arm926_proc_init) + ret lr + +/* + * cpu_arm926_proc_fin() + */ +ENTRY(cpu_arm926_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + ret lr + +/* + * cpu_arm926_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_arm926_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + ret r0 +ENDPROC(cpu_arm926_reset) + .popsection + +/* + * cpu_arm926_do_idle() + * + * Called with IRQs disabled + */ + .align 10 +ENTRY(cpu_arm926_do_idle) + mov r0, #0 + mrc p15, 0, r1, c1, c0, 0 @ Read control register + mcr p15, 0, r0, c7, c10, 4 @ Drain write buffer + bic r2, r1, #1 << 12 + mrs r3, cpsr @ Disable FIQs while Icache + orr ip, r3, #PSR_F_BIT @ is disabled + msr cpsr_c, ip + mcr p15, 0, r2, c1, c0, 0 @ Disable I cache + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + mcr p15, 0, r1, c1, c0, 0 @ Restore ICache enable + msr cpsr_c, r3 @ Restore FIQ state + ret lr + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(arm926_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + ret lr +ENDPROC(arm926_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Clean and invalidate all cache entries in a particular + * address space. + */ +ENTRY(arm926_flush_user_cache_all) + /* FALLTHROUGH */ + +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(arm926_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache +#else +1: mrc p15, 0, APSR_nzcv, c7, c14, 3 @ test,clean,invalidate + bne 1b +#endif + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Clean and invalidate a range of cache entries in the + * specified address range. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags describing address space + */ +ENTRY(arm926_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bgt __flush_whole_cache +1: tst r2, #VM_EXEC +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE +#else + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE +#endif + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm926_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm926_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov r0, #0 + ret lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + */ +ENTRY(arm926_flush_kern_dcache_area) + add r1, r0, r1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +arm926_dma_inv_range: +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + tst r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry +#endif + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +arm926_dma_clean_range: +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm926_dma_flush_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry +#else + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry +#endif + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm926_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm926_dma_clean_range + bcs arm926_dma_inv_range + b arm926_dma_flush_range +ENDPROC(arm926_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm926_dma_unmap_area) + ret lr +ENDPROC(arm926_dma_unmap_area) + + .globl arm926_flush_kern_cache_louis + .equ arm926_flush_kern_cache_louis, arm926_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm926 + +ENTRY(cpu_arm926_dcache_clean_area) +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_arm926_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_arm926_switch_mm) +#ifdef CONFIG_MMU + mov ip, #0 +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache +#else +@ && 'Clean & Invalidate whole DCache' +1: mrc p15, 0, APSR_nzcv, c7, c14, 3 @ test,clean,invalidate + bne 1b +#endif + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + ret lr + +/* + * cpu_arm926_set_pte_ext(ptep, pte, ext) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_arm926_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext + mov r0, r0 +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, r0, c7, c10, 1 @ clean D entry +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif + ret lr + +/* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */ +.globl cpu_arm926_suspend_size +.equ cpu_arm926_suspend_size, 4 * 3 +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_arm926_do_suspend) + stmfd sp!, {r4 - r6, lr} + mrc p15, 0, r4, c13, c0, 0 @ PID + mrc p15, 0, r5, c3, c0, 0 @ Domain ID + mrc p15, 0, r6, c1, c0, 0 @ Control register + stmia r0, {r4 - r6} + ldmfd sp!, {r4 - r6, pc} +ENDPROC(cpu_arm926_do_suspend) + +ENTRY(cpu_arm926_do_resume) + mov ip, #0 + mcr p15, 0, ip, c8, c7, 0 @ invalidate I+D TLBs + mcr p15, 0, ip, c7, c7, 0 @ invalidate I+D caches + ldmia r0, {r4 - r6} + mcr p15, 0, r4, c13, c0, 0 @ PID + mcr p15, 0, r5, c3, c0, 0 @ Domain ID + mcr p15, 0, r1, c2, c0, 0 @ TTB address + mov r0, r6 @ control register + b cpu_resume_mmu +ENDPROC(cpu_arm926_do_resume) +#endif + + .type __arm926_setup, #function +__arm926_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 +#endif + + +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mov r0, #4 @ disable write-back on caches explicitly + mcr p15, 7, r0, c15, c0, 0 +#endif + + adr r5, arm926_crval + ldmia r5, {r5, r6} + mrc p15, 0, r0, c1, c0 @ get control register v4 + bic r0, r0, r5 + orr r0, r0, r6 +#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN + orr r0, r0, #0x4000 @ .1.. .... .... .... +#endif + ret lr + .size __arm926_setup, . - __arm926_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * .011 0001 ..11 0101 + * + */ + .type arm926_crval, #object +arm926_crval: + crval clear=0x00007f3f, mmuset=0x00003135, ucset=0x00001134 + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm926, dabort=v5tj_early_abort, pabort=legacy_pabort, suspend=1 + + .section ".rodata" + + string cpu_arch_name, "armv5tej" + string cpu_elf_name, "v5" + string cpu_arm926_name, "ARM926EJ-S" + + .align + + .section ".proc.info.init", "a" + + .type __arm926_proc_info,#object +__arm926_proc_info: + .long 0x41069260 @ ARM926EJ-S (v5TEJ) + .long 0xff0ffff0 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + initfn __arm926_setup, __arm926_proc_info + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA + .long cpu_arm926_name + .long arm926_processor_functions + .long v4wbi_tlb_fns + .long v4wb_user_fns + .long arm926_cache_fns + .size __arm926_proc_info, . - __arm926_proc_info diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S new file mode 100644 index 0000000000..cf9bfcc825 --- /dev/null +++ b/arch/arm/mm/proc-arm940.S @@ -0,0 +1,360 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/mm/arm940.S: utility functions for ARM940T + * + * Copyright (C) 2004-2006 Hyok S. Choi (hyok.choi@samsung.com) + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <linux/pgtable.h> +#include <asm/assembler.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/ptrace.h> +#include "proc-macros.S" + +/* ARM940T has a 4KB DCache comprising 256 lines of 4 words */ +#define CACHE_DLINESIZE 16 +#define CACHE_DSEGMENTS 4 +#define CACHE_DENTRIES 64 + + .text +/* + * cpu_arm940_proc_init() + * cpu_arm940_switch_mm() + * + * These are not required. + */ +ENTRY(cpu_arm940_proc_init) +ENTRY(cpu_arm940_switch_mm) + ret lr + +/* + * cpu_arm940_proc_fin() + */ +ENTRY(cpu_arm940_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x00001000 @ i-cache + bic r0, r0, #0x00000004 @ d-cache + mcr p15, 0, r0, c1, c0, 0 @ disable caches + ret lr + +/* + * cpu_arm940_reset(loc) + * Params : r0 = address to jump to + * Notes : This sets up everything for a reset + */ + .pushsection .idmap.text, "ax" +ENTRY(cpu_arm940_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c5, 0 @ flush I cache + mcr p15, 0, ip, c7, c6, 0 @ flush D cache + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x00000005 @ .............c.p + bic ip, ip, #0x00001000 @ i-cache + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + ret r0 +ENDPROC(cpu_arm940_reset) + .popsection + +/* + * cpu_arm940_do_idle() + */ + .align 5 +ENTRY(cpu_arm940_do_idle) + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + ret lr + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(arm940_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + ret lr +ENDPROC(arm940_flush_icache_all) + +/* + * flush_user_cache_all() + */ +ENTRY(arm940_flush_user_cache_all) + /* FALLTHROUGH */ + +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(arm940_flush_kern_cache_all) + mov r2, #VM_EXEC + /* FALLTHROUGH */ + +/* + * flush_user_cache_range(start, end, flags) + * + * There is no efficient way to flush a range of cache entries + * in the specified address range. Thus, flushes all. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags describing address space + */ +ENTRY(arm940_flush_user_cache_range) + mov ip, #0 +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, ip, c7, c6, 0 @ flush D cache +#else + mov r1, #(CACHE_DSEGMENTS - 1) << 4 @ 4 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c14, 2 @ clean/flush D index + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 4 + bcs 1b @ segments 3 to 0 +#endif + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm940_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm940_coherent_user_range) + /* FALLTHROUGH */ + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + */ +ENTRY(arm940_flush_kern_dcache_area) + mov r0, #0 + mov r1, #(CACHE_DSEGMENTS - 1) << 4 @ 4 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c14, 2 @ clean/flush D index + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 4 + bcs 1b @ segments 7 to 0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_inv_range(start, end) + * + * There is no efficient way to invalidate a specifid virtual + * address range. Thus, invalidates all. + * + * - start - virtual start address + * - end - virtual end address + */ +arm940_dma_inv_range: + mov ip, #0 + mov r1, #(CACHE_DSEGMENTS - 1) << 4 @ 4 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c6, 2 @ flush D entry + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 4 + bcs 1b @ segments 7 to 0 + mcr p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_clean_range(start, end) + * + * There is no efficient way to clean a specifid virtual + * address range. Thus, cleans all. + * + * - start - virtual start address + * - end - virtual end address + */ +arm940_dma_clean_range: +ENTRY(cpu_arm940_dcache_clean_area) + mov ip, #0 +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + mov r1, #(CACHE_DSEGMENTS - 1) << 4 @ 4 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: mcr p15, 0, r3, c7, c10, 2 @ clean D entry + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 4 + bcs 1b @ segments 7 to 0 +#endif + mcr p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_flush_range(start, end) + * + * There is no efficient way to clean and invalidate a specifid + * virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm940_dma_flush_range) + mov ip, #0 + mov r1, #(CACHE_DSEGMENTS - 1) << 4 @ 4 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries +2: +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, r3, c7, c14, 2 @ clean/flush D entry +#else + mcr p15, 0, r3, c7, c6, 2 @ invalidate D entry +#endif + subs r3, r3, #1 << 26 + bcs 2b @ entries 63 to 0 + subs r1, r1, #1 << 4 + bcs 1b @ segments 7 to 0 + mcr p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm940_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm940_dma_clean_range + bcs arm940_dma_inv_range + b arm940_dma_flush_range +ENDPROC(arm940_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm940_dma_unmap_area) + ret lr +ENDPROC(arm940_dma_unmap_area) + + .globl arm940_flush_kern_cache_louis + .equ arm940_flush_kern_cache_louis, arm940_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm940 + + .type __arm940_setup, #function +__arm940_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c6, 0 @ invalidate D cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + + mcr p15, 0, r0, c6, c3, 0 @ disable data area 3~7 + mcr p15, 0, r0, c6, c4, 0 + mcr p15, 0, r0, c6, c5, 0 + mcr p15, 0, r0, c6, c6, 0 + mcr p15, 0, r0, c6, c7, 0 + + mcr p15, 0, r0, c6, c3, 1 @ disable instruction area 3~7 + mcr p15, 0, r0, c6, c4, 1 + mcr p15, 0, r0, c6, c5, 1 + mcr p15, 0, r0, c6, c6, 1 + mcr p15, 0, r0, c6, c7, 1 + + mov r0, #0x0000003F @ base = 0, size = 4GB + mcr p15, 0, r0, c6, c0, 0 @ set area 0, default + mcr p15, 0, r0, c6, c0, 1 + + ldr r0, =(CONFIG_DRAM_BASE & 0xFFFFF000) @ base[31:12] of RAM + ldr r7, =CONFIG_DRAM_SIZE >> 12 @ size of RAM (must be >= 4KB) + pr_val r3, r0, r7, #1 + mcr p15, 0, r3, c6, c1, 0 @ set area 1, RAM + mcr p15, 0, r3, c6, c1, 1 + + ldr r0, =(CONFIG_FLASH_MEM_BASE & 0xFFFFF000) @ base[31:12] of FLASH + ldr r7, =CONFIG_FLASH_SIZE @ size of FLASH (must be >= 4KB) + pr_val r3, r0, r6, #1 + mcr p15, 0, r3, c6, c2, 0 @ set area 2, ROM/FLASH + mcr p15, 0, r3, c6, c2, 1 + + mov r0, #0x06 + mcr p15, 0, r0, c2, c0, 0 @ Region 1&2 cacheable + mcr p15, 0, r0, c2, c0, 1 +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mov r0, #0x00 @ disable whole write buffer +#else + mov r0, #0x02 @ Region 1 write bufferred +#endif + mcr p15, 0, r0, c3, c0, 0 + + mov r0, #0x10000 + sub r0, r0, #1 @ r0 = 0xffff + mcr p15, 0, r0, c5, c0, 0 @ all read/write access + mcr p15, 0, r0, c5, c0, 1 + + mrc p15, 0, r0, c1, c0 @ get control register + orr r0, r0, #0x00001000 @ I-cache + orr r0, r0, #0x00000005 @ MPU/D-cache + + ret lr + + .size __arm940_setup, . - __arm940_setup + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm940, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1 + + .section ".rodata" + + string cpu_arch_name, "armv4t" + string cpu_elf_name, "v4" + string cpu_arm940_name, "ARM940T" + + .align + + .section ".proc.info.init", "a" + + .type __arm940_proc_info,#object +__arm940_proc_info: + .long 0x41009400 + .long 0xff00fff0 + .long 0 + initfn __arm940_setup, __arm940_proc_info + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB + .long cpu_arm940_name + .long arm940_processor_functions + .long 0 + .long 0 + .long arm940_cache_fns + .size __arm940_proc_info, . - __arm940_proc_info + diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S new file mode 100644 index 0000000000..6fb3898ad1 --- /dev/null +++ b/arch/arm/mm/proc-arm946.S @@ -0,0 +1,415 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/mm/arm946.S: utility functions for ARM946E-S + * + * Copyright (C) 2004-2006 Hyok S. Choi (hyok.choi@samsung.com) + * + * (Many of cache codes are from proc-arm926.S) + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <linux/pgtable.h> +#include <asm/assembler.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/ptrace.h> +#include "proc-macros.S" + +/* + * ARM946E-S is synthesizable to have 0KB to 1MB sized D-Cache, + * comprising 256 lines of 32 bytes (8 words). + */ +#define CACHE_DSIZE (CONFIG_CPU_DCACHE_SIZE) /* typically 8KB. */ +#define CACHE_DLINESIZE 32 /* fixed */ +#define CACHE_DSEGMENTS 4 /* fixed */ +#define CACHE_DENTRIES (CACHE_DSIZE / CACHE_DSEGMENTS / CACHE_DLINESIZE) +#define CACHE_DLIMIT (CACHE_DSIZE * 4) /* benchmark needed */ + + .text +/* + * cpu_arm946_proc_init() + * cpu_arm946_switch_mm() + * + * These are not required. + */ +ENTRY(cpu_arm946_proc_init) +ENTRY(cpu_arm946_switch_mm) + ret lr + +/* + * cpu_arm946_proc_fin() + */ +ENTRY(cpu_arm946_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x00001000 @ i-cache + bic r0, r0, #0x00000004 @ d-cache + mcr p15, 0, r0, c1, c0, 0 @ disable caches + ret lr + +/* + * cpu_arm946_reset(loc) + * Params : r0 = address to jump to + * Notes : This sets up everything for a reset + */ + .pushsection .idmap.text, "ax" +ENTRY(cpu_arm946_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c5, 0 @ flush I cache + mcr p15, 0, ip, c7, c6, 0 @ flush D cache + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x00000005 @ .............c.p + bic ip, ip, #0x00001000 @ i-cache + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + ret r0 +ENDPROC(cpu_arm946_reset) + .popsection + +/* + * cpu_arm946_do_idle() + */ + .align 5 +ENTRY(cpu_arm946_do_idle) + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + ret lr + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(arm946_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + ret lr +ENDPROC(arm946_flush_icache_all) + +/* + * flush_user_cache_all() + */ +ENTRY(arm946_flush_user_cache_all) + /* FALLTHROUGH */ + +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(arm946_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, ip, c7, c6, 0 @ flush D cache +#else + mov r1, #(CACHE_DSEGMENTS - 1) << 29 @ 4 segments +1: orr r3, r1, #(CACHE_DENTRIES - 1) << 4 @ n entries +2: mcr p15, 0, r3, c7, c14, 2 @ clean/flush D index + subs r3, r3, #1 << 4 + bcs 2b @ entries n to 0 + subs r1, r1, #1 << 29 + bcs 1b @ segments 3 to 0 +#endif + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ flush I cache + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Clean and invalidate a range of cache entries in the + * specified address range. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags describing address space + * (same as arm926) + */ +ENTRY(arm946_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bhs __flush_whole_cache + +1: tst r2, #VM_EXEC +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE +#else + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE +#endif + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(arm946_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + * (same as arm926) + */ +ENTRY(arm946_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov r0, #0 + ret lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + * (same as arm926) + */ +ENTRY(arm946_flush_kern_dcache_area) + add r1, r0, r1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + * (same as arm926) + */ +arm946_dma_inv_range: +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + tst r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry +#endif + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as arm926) + */ +arm946_dma_clean_range: +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as arm926) + */ +ENTRY(arm946_dma_flush_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry +#else + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry +#endif + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm946_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq arm946_dma_clean_range + bcs arm946_dma_inv_range + b arm946_dma_flush_range +ENDPROC(arm946_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(arm946_dma_unmap_area) + ret lr +ENDPROC(arm946_dma_unmap_area) + + .globl arm946_flush_kern_cache_louis + .equ arm946_flush_kern_cache_louis, arm946_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions arm946 + +ENTRY(cpu_arm946_dcache_clean_area) +#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + + .type __arm946_setup, #function +__arm946_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c6, 0 @ invalidate D cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + + mcr p15, 0, r0, c6, c3, 0 @ disable memory region 3~7 + mcr p15, 0, r0, c6, c4, 0 + mcr p15, 0, r0, c6, c5, 0 + mcr p15, 0, r0, c6, c6, 0 + mcr p15, 0, r0, c6, c7, 0 + + mov r0, #0x0000003F @ base = 0, size = 4GB + mcr p15, 0, r0, c6, c0, 0 @ set region 0, default + + ldr r0, =(CONFIG_DRAM_BASE & 0xFFFFF000) @ base[31:12] of RAM + ldr r7, =CONFIG_DRAM_SIZE @ size of RAM (must be >= 4KB) + pr_val r3, r0, r7, #1 + mcr p15, 0, r3, c6, c1, 0 + + ldr r0, =(CONFIG_FLASH_MEM_BASE & 0xFFFFF000) @ base[31:12] of FLASH + ldr r7, =CONFIG_FLASH_SIZE @ size of FLASH (must be >= 4KB) + pr_val r3, r0, r7, #1 + mcr p15, 0, r3, c6, c2, 0 + + mov r0, #0x06 + mcr p15, 0, r0, c2, c0, 0 @ region 1,2 d-cacheable + mcr p15, 0, r0, c2, c0, 1 @ region 1,2 i-cacheable +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mov r0, #0x00 @ disable whole write buffer +#else + mov r0, #0x02 @ region 1 write bufferred +#endif + mcr p15, 0, r0, c3, c0, 0 + +/* + * Access Permission Settings for future permission control by PU. + * + * priv. user + * region 0 (whole) rw -- : b0001 + * region 1 (RAM) rw rw : b0011 + * region 2 (FLASH) rw r- : b0010 + * region 3~7 (none) -- -- : b0000 + */ + mov r0, #0x00000031 + orr r0, r0, #0x00000200 + mcr p15, 0, r0, c5, c0, 2 @ set data access permission + mcr p15, 0, r0, c5, c0, 3 @ set inst. access permission + + mrc p15, 0, r0, c1, c0 @ get control register + orr r0, r0, #0x00001000 @ I-cache + orr r0, r0, #0x00000005 @ MPU/D-cache +#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN + orr r0, r0, #0x00004000 @ .1.. .... .... .... +#endif + ret lr + + .size __arm946_setup, . - __arm946_setup + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm946, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1 + + .section ".rodata" + + string cpu_arch_name, "armv5te" + string cpu_elf_name, "v5t" + string cpu_arm946_name, "ARM946E-S" + + .align + + .section ".proc.info.init", "a" + .type __arm946_proc_info,#object +__arm946_proc_info: + .long 0x41009460 + .long 0xff00fff0 + .long 0 + .long 0 + initfn __arm946_setup, __arm946_proc_info + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB + .long cpu_arm946_name + .long arm946_processor_functions + .long 0 + .long 0 + .long arm946_cache_fns + .size __arm946_proc_info, . - __arm946_proc_info + diff --git a/arch/arm/mm/proc-arm9tdmi.S b/arch/arm/mm/proc-arm9tdmi.S new file mode 100644 index 0000000000..a054c0e9c0 --- /dev/null +++ b/arch/arm/mm/proc-arm9tdmi.S @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/mm/proc-arm9tdmi.S: utility functions for ARM9TDMI + * + * Copyright (C) 2003-2006 Hyok S. Choi <hyok.choi@samsung.com> + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <linux/pgtable.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/ptrace.h> + +#include "proc-macros.S" + + .text +/* + * cpu_arm9tdmi_proc_init() + * cpu_arm9tdmi_do_idle() + * cpu_arm9tdmi_dcache_clean_area() + * cpu_arm9tdmi_switch_mm() + * + * These are not required. + */ +ENTRY(cpu_arm9tdmi_proc_init) +ENTRY(cpu_arm9tdmi_do_idle) +ENTRY(cpu_arm9tdmi_dcache_clean_area) +ENTRY(cpu_arm9tdmi_switch_mm) + ret lr + +/* + * cpu_arm9tdmi_proc_fin() + */ +ENTRY(cpu_arm9tdmi_proc_fin) + ret lr + +/* + * Function: cpu_arm9tdmi_reset(loc) + * Params : loc(r0) address to jump to + * Purpose : Sets up everything for a reset and jump to the location for soft reset. + */ + .pushsection .idmap.text, "ax" +ENTRY(cpu_arm9tdmi_reset) + ret r0 +ENDPROC(cpu_arm9tdmi_reset) + .popsection + + .type __arm9tdmi_setup, #function +__arm9tdmi_setup: + ret lr + .size __arm9tdmi_setup, . - __arm9tdmi_setup + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions arm9tdmi, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1 + + .section ".rodata" + + string cpu_arch_name, "armv4t" + string cpu_elf_name, "v4" + string cpu_arm9tdmi_name, "ARM9TDMI" + string cpu_p2001_name, "P2001" + + .align + + .section ".proc.info.init", "a" + +.macro arm9tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req + .type __\name\()_proc_info, #object +__\name\()_proc_info: + .long \cpu_val + .long \cpu_mask + .long 0 + .long 0 + initfn __arm9tdmi_setup, __\name\()_proc_info + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_THUMB | HWCAP_26BIT + .long \cpu_name + .long arm9tdmi_processor_functions + .long 0 + .long 0 + .long v4_cache_fns + .size __\name\()_proc_info, . - __\name\()_proc_info +.endm + + arm9tdmi_proc_info arm9tdmi, 0x41009900, 0xfff8ff00, cpu_arm9tdmi_name + arm9tdmi_proc_info p2001, 0x41029000, 0xffffffff, cpu_p2001_name diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S new file mode 100644 index 0000000000..2c73e0d47d --- /dev/null +++ b/arch/arm/mm/proc-fa526.S @@ -0,0 +1,213 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * linux/arch/arm/mm/proc-fa526.S: MMU functions for FA526 + * + * Written by : Luke Lee + * Copyright (C) 2005 Faraday Corp. + * Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt> + * + * These are the low level assembler for performing cache and TLB + * functions on the fa526. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <linux/pgtable.h> +#include <asm/assembler.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/page.h> +#include <asm/ptrace.h> + +#include "proc-macros.S" + +#define CACHE_DLINESIZE 16 + + .text +/* + * cpu_fa526_proc_init() + */ +ENTRY(cpu_fa526_proc_init) + ret lr + +/* + * cpu_fa526_proc_fin() + */ +ENTRY(cpu_fa526_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + nop + nop + ret lr + +/* + * cpu_fa526_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 4 + .pushsection .idmap.text, "ax" +ENTRY(cpu_fa526_reset) +/* TODO: Use CP8 if possible... */ + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + bic ip, ip, #0x0800 @ BTB off + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + nop + nop + ret r0 +ENDPROC(cpu_fa526_reset) + .popsection + +/* + * cpu_fa526_do_idle() + */ + .align 4 +ENTRY(cpu_fa526_do_idle) + ret lr + + +ENTRY(cpu_fa526_dcache_clean_area) +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_fa526_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 4 +ENTRY(cpu_fa526_switch_mm) +#ifdef CONFIG_MMU + mov ip, #0 +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache +#else + mcr p15, 0, ip, c7, c14, 0 @ clean and invalidate whole D cache +#endif + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c7, c5, 6 @ invalidate BTB since mm changed + mcr p15, 0, ip, c7, c10, 4 @ data write barrier + mcr p15, 0, ip, c7, c5, 4 @ prefetch flush + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate UTLB +#endif + ret lr + +/* + * cpu_fa526_set_pte_ext(ptep, pte, ext) + * + * Set a PTE and flush it out + */ + .align 4 +ENTRY(cpu_fa526_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext + mov r0, r0 + mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif + ret lr + + .type __fa526_setup, #function +__fa526_setup: + /* On return of this routine, r0 must carry correct flags for CFG register */ + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 +#endif + mcr p15, 0, r0, c7, c5, 5 @ invalidate IScratchpad RAM + + mov r0, #1 + mcr p15, 0, r0, c1, c1, 0 @ turn-on ECR + + mov r0, #0 + mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB All + mcr p15, 0, r0, c7, c10, 4 @ data write barrier + mcr p15, 0, r0, c7, c5, 4 @ prefetch flush + + mov r0, #0x1f @ Domains 0, 1 = manager, 2 = client + mcr p15, 0, r0, c3, c0 @ load domain access register + + mrc p15, 0, r0, c1, c0 @ get control register v4 + ldr r5, fa526_cr1_clear + bic r0, r0, r5 + ldr r5, fa526_cr1_set + orr r0, r0, r5 + ret lr + .size __fa526_setup, . - __fa526_setup + + /* + * .RVI ZFRS BLDP WCAM + * ..11 1001 .111 1101 + * + */ + .type fa526_cr1_clear, #object + .type fa526_cr1_set, #object +fa526_cr1_clear: + .word 0x3f3f +fa526_cr1_set: + .word 0x397D + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions fa526, dabort=v4_early_abort, pabort=legacy_pabort + + .section ".rodata" + + string cpu_arch_name, "armv4" + string cpu_elf_name, "v4" + string cpu_fa526_name, "FA526" + + .align + + .section ".proc.info.init", "a" + + .type __fa526_proc_info,#object +__fa526_proc_info: + .long 0x66015261 + .long 0xff01fff1 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + initfn __fa526_setup, __fa526_proc_info + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF + .long cpu_fa526_name + .long fa526_processor_functions + .long fa_tlb_fns + .long fa_user_fns + .long fa_cache_fns + .size __fa526_proc_info, . - __fa526_proc_info diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S new file mode 100644 index 0000000000..072ff9b451 --- /dev/null +++ b/arch/arm/mm/proc-feroceon.S @@ -0,0 +1,617 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * linux/arch/arm/mm/proc-feroceon.S: MMU functions for Feroceon + * + * Heavily based on proc-arm926.S + * Maintainer: Assaf Hoffman <hoffman@marvell.com> + */ + +#include <linux/linkage.h> +#include <linux/init.h> +#include <linux/pgtable.h> +#include <asm/assembler.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include "proc-macros.S" + +/* + * This is the maximum size of an area which will be invalidated + * using the single invalidate entry instructions. Anything larger + * than this, and we go for the whole cache. + * + * This value should be chosen such that we choose the cheapest + * alternative. + */ +#define CACHE_DLIMIT 16384 + +/* + * the cache line size of the I and D cache + */ +#define CACHE_DLINESIZE 32 + + .bss + .align 3 +__cache_params_loc: + .space 8 + + .text +__cache_params: + .word __cache_params_loc + +/* + * cpu_feroceon_proc_init() + */ +ENTRY(cpu_feroceon_proc_init) + mrc p15, 0, r0, c0, c0, 1 @ read cache type register + ldr r1, __cache_params + mov r2, #(16 << 5) + tst r0, #(1 << 16) @ get way + mov r0, r0, lsr #18 @ get cache size order + movne r3, #((4 - 1) << 30) @ 4-way + and r0, r0, #0xf + moveq r3, #0 @ 1-way + mov r2, r2, lsl r0 @ actual cache size + movne r2, r2, lsr #2 @ turned into # of sets + sub r2, r2, #(1 << 5) + stmia r1, {r2, r3} +#ifdef CONFIG_VFP + mov r1, #1 @ disable quirky VFP + str_l r1, VFP_arch_feroceon, r2 +#endif + ret lr + +/* + * cpu_feroceon_proc_fin() + */ +ENTRY(cpu_feroceon_proc_fin) +#if defined(CONFIG_CACHE_FEROCEON_L2) && \ + !defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH) + mov r0, #0 + mcr p15, 1, r0, c15, c9, 0 @ clean L2 + mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif + + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + ret lr + +/* + * cpu_feroceon_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_feroceon_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + ret r0 +ENDPROC(cpu_feroceon_reset) + .popsection + +/* + * cpu_feroceon_do_idle() + * + * Called with IRQs disabled + */ + .align 5 +ENTRY(cpu_feroceon_do_idle) + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ Drain write buffer + mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt + ret lr + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(feroceon_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + ret lr +ENDPROC(feroceon_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Clean and invalidate all cache entries in a particular + * address space. + */ + .align 5 +ENTRY(feroceon_flush_user_cache_all) + /* FALLTHROUGH */ + +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(feroceon_flush_kern_cache_all) + mov r2, #VM_EXEC + +__flush_whole_cache: + ldr r1, __cache_params + ldmia r1, {r1, r3} +1: orr ip, r1, r3 +2: mcr p15, 0, ip, c7, c14, 2 @ clean + invalidate D set/way + subs ip, ip, #(1 << 30) @ next way + bcs 2b + subs r1, r1, #(1 << 5) @ next set + bcs 1b + + tst r2, #VM_EXEC + mov ip, #0 + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Clean and invalidate a range of cache entries in the + * specified address range. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags describing address space + */ + .align 5 +ENTRY(feroceon_flush_user_cache_range) + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bgt __flush_whole_cache +1: tst r2, #VM_EXEC + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mov ip, #0 + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ + .align 5 +ENTRY(feroceon_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(feroceon_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov r0, #0 + ret lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + */ + .align 5 +ENTRY(feroceon_flush_kern_dcache_area) + add r1, r0, r1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + + .align 5 +ENTRY(feroceon_range_flush_kern_dcache_area) + mrs r2, cpsr + add r1, r0, #PAGE_SZ - CACHE_DLINESIZE @ top addr is inclusive + orr r3, r2, #PSR_I_BIT + msr cpsr_c, r3 @ disable interrupts + mcr p15, 5, r0, c15, c15, 0 @ D clean/inv range start + mcr p15, 5, r1, c15, c15, 1 @ D clean/inv range top + msr cpsr_c, r2 @ restore interrupts + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ + .align 5 +feroceon_dma_inv_range: + tst r0, #CACHE_DLINESIZE - 1 + bic r0, r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + + .align 5 +feroceon_range_dma_inv_range: + mrs r2, cpsr + tst r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry + cmp r1, r0 + subne r1, r1, #1 @ top address is inclusive + orr r3, r2, #PSR_I_BIT + msr cpsr_c, r3 @ disable interrupts + mcr p15, 5, r0, c15, c14, 0 @ D inv range start + mcr p15, 5, r1, c15, c14, 1 @ D inv range top + msr cpsr_c, r2 @ restore interrupts + ret lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ + .align 5 +feroceon_dma_clean_range: + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + + .align 5 +feroceon_range_dma_clean_range: + mrs r2, cpsr + cmp r1, r0 + subne r1, r1, #1 @ top address is inclusive + orr r3, r2, #PSR_I_BIT + msr cpsr_c, r3 @ disable interrupts + mcr p15, 5, r0, c15, c13, 0 @ D clean range start + mcr p15, 5, r1, c15, c13, 1 @ D clean range top + msr cpsr_c, r2 @ restore interrupts + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ + .align 5 +ENTRY(feroceon_dma_flush_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + + .align 5 +ENTRY(feroceon_range_dma_flush_range) + mrs r2, cpsr + cmp r1, r0 + subne r1, r1, #1 @ top address is inclusive + orr r3, r2, #PSR_I_BIT + msr cpsr_c, r3 @ disable interrupts + mcr p15, 5, r0, c15, c15, 0 @ D clean/inv range start + mcr p15, 5, r1, c15, c15, 1 @ D clean/inv range top + msr cpsr_c, r2 @ restore interrupts + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(feroceon_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq feroceon_dma_clean_range + bcs feroceon_dma_inv_range + b feroceon_dma_flush_range +ENDPROC(feroceon_dma_map_area) + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(feroceon_range_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq feroceon_range_dma_clean_range + bcs feroceon_range_dma_inv_range + b feroceon_range_dma_flush_range +ENDPROC(feroceon_range_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(feroceon_dma_unmap_area) + ret lr +ENDPROC(feroceon_dma_unmap_area) + + .globl feroceon_flush_kern_cache_louis + .equ feroceon_flush_kern_cache_louis, feroceon_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions feroceon + +.macro range_alias basename + .globl feroceon_range_\basename + .type feroceon_range_\basename , %function + .equ feroceon_range_\basename , feroceon_\basename +.endm + +/* + * Most of the cache functions are unchanged for this case. + * Export suitable alias symbols for the unchanged functions: + */ + range_alias flush_icache_all + range_alias flush_user_cache_all + range_alias flush_kern_cache_all + range_alias flush_kern_cache_louis + range_alias flush_user_cache_range + range_alias coherent_kern_range + range_alias coherent_user_range + range_alias dma_unmap_area + + define_cache_functions feroceon_range + + .align 5 +ENTRY(cpu_feroceon_dcache_clean_area) +#if defined(CONFIG_CACHE_FEROCEON_L2) && \ + !defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH) + mov r2, r0 + mov r3, r1 +#endif +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b +#if defined(CONFIG_CACHE_FEROCEON_L2) && \ + !defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH) +1: mcr p15, 1, r2, c15, c9, 1 @ clean L2 entry + add r2, r2, #CACHE_DLINESIZE + subs r3, r3, #CACHE_DLINESIZE + bhi 1b +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_feroceon_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_feroceon_switch_mm) +#ifdef CONFIG_MMU + /* + * Note: we wish to call __flush_whole_cache but we need to preserve + * lr to do so. The only way without touching main memory is to + * use r2 which is normally used to test the VM_EXEC flag, and + * compensate locally for the skipped ops if it is not set. + */ + mov r2, lr @ abuse r2 to preserve lr + bl __flush_whole_cache + @ if r2 contains the VM_EXEC bit then the next 2 ops are done already + tst r2, #VM_EXEC + mcreq p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcreq p15, 0, ip, c7, c10, 4 @ drain WB + + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + ret r2 +#else + ret lr +#endif + +/* + * cpu_feroceon_set_pte_ext(ptep, pte, ext) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_feroceon_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext wc_disable=0 + mov r0, r0 + mcr p15, 0, r0, c7, c10, 1 @ clean D entry +#if defined(CONFIG_CACHE_FEROCEON_L2) && \ + !defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH) + mcr p15, 1, r0, c15, c9, 1 @ clean L2 entry +#endif + mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif + ret lr + +/* Suspend/resume support: taken from arch/arm/mm/proc-arm926.S */ +.globl cpu_feroceon_suspend_size +.equ cpu_feroceon_suspend_size, 4 * 3 +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_feroceon_do_suspend) + stmfd sp!, {r4 - r6, lr} + mrc p15, 0, r4, c13, c0, 0 @ PID + mrc p15, 0, r5, c3, c0, 0 @ Domain ID + mrc p15, 0, r6, c1, c0, 0 @ Control register + stmia r0, {r4 - r6} + ldmfd sp!, {r4 - r6, pc} +ENDPROC(cpu_feroceon_do_suspend) + +ENTRY(cpu_feroceon_do_resume) + mov ip, #0 + mcr p15, 0, ip, c8, c7, 0 @ invalidate I+D TLBs + mcr p15, 0, ip, c7, c7, 0 @ invalidate I+D caches + ldmia r0, {r4 - r6} + mcr p15, 0, r4, c13, c0, 0 @ PID + mcr p15, 0, r5, c3, c0, 0 @ Domain ID + mcr p15, 0, r1, c2, c0, 0 @ TTB address + mov r0, r6 @ control register + b cpu_resume_mmu +ENDPROC(cpu_feroceon_do_resume) +#endif + + .type __feroceon_setup, #function +__feroceon_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 +#endif + + adr r5, feroceon_crval + ldmia r5, {r5, r6} + mrc p15, 0, r0, c1, c0 @ get control register v4 + bic r0, r0, r5 + orr r0, r0, r6 + ret lr + .size __feroceon_setup, . - __feroceon_setup + + /* + * B + * R P + * .RVI UFRS BLDP WCAM + * .011 .001 ..11 0101 + * + */ + .type feroceon_crval, #object +feroceon_crval: + crval clear=0x0000773f, mmuset=0x00003135, ucset=0x00001134 + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions feroceon, dabort=v5t_early_abort, pabort=legacy_pabort + + .section ".rodata" + + string cpu_arch_name, "armv5te" + string cpu_elf_name, "v5" + string cpu_feroceon_name, "Feroceon" + string cpu_88fr531_name, "Feroceon 88FR531-vd" + string cpu_88fr571_name, "Feroceon 88FR571-vd" + string cpu_88fr131_name, "Feroceon 88FR131" + + .align + + .section ".proc.info.init", "a" + +.macro feroceon_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache:req + .type __\name\()_proc_info,#object +__\name\()_proc_info: + .long \cpu_val + .long \cpu_mask + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + initfn __feroceon_setup, __\name\()_proc_info + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP + .long \cpu_name + .long feroceon_processor_functions + .long v4wbi_tlb_fns + .long feroceon_user_fns + .long \cache + .size __\name\()_proc_info, . - __\name\()_proc_info +.endm + +#ifdef CONFIG_CPU_FEROCEON_OLD_ID + feroceon_proc_info feroceon_old_id, 0x41009260, 0xff00fff0, \ + cpu_name=cpu_feroceon_name, cache=feroceon_cache_fns +#endif + + feroceon_proc_info 88fr531, 0x56055310, 0xfffffff0, cpu_88fr531_name, \ + cache=feroceon_cache_fns + feroceon_proc_info 88fr571, 0x56155710, 0xfffffff0, cpu_88fr571_name, \ + cache=feroceon_range_cache_fns + feroceon_proc_info 88fr131, 0x56251310, 0xfffffff0, cpu_88fr131_name, \ + cache=feroceon_range_cache_fns diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S new file mode 100644 index 0000000000..e43f6d716b --- /dev/null +++ b/arch/arm/mm/proc-macros.S @@ -0,0 +1,387 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * We need constants.h for: + * VMA_VM_MM + * VMA_VM_FLAGS + * VM_EXEC + */ +#include <asm/asm-offsets.h> +#include <asm/pgtable.h> +#include <asm/thread_info.h> + +#ifdef CONFIG_CPU_V7M +#include <asm/v7m.h> +#endif + +/* + * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm) + */ + .macro vma_vm_mm, rd, rn + ldr \rd, [\rn, #VMA_VM_MM] + .endm + +/* + * vma_vm_flags - get vma->vm_flags + */ + .macro vma_vm_flags, rd, rn + ldr \rd, [\rn, #VMA_VM_FLAGS] + .endm + +/* + * act_mm - get current->active_mm + */ + .macro act_mm, rd + get_current \rd + .if (TSK_ACTIVE_MM > IMM12_MASK) + add \rd, \rd, #TSK_ACTIVE_MM & ~IMM12_MASK + .endif + ldr \rd, [\rd, #TSK_ACTIVE_MM & IMM12_MASK] + .endm + +/* + * mmid - get context id from mm pointer (mm->context.id) + * note, this field is 64bit, so in big-endian the two words are swapped too. + */ + .macro mmid, rd, rn +#ifdef __ARMEB__ + ldr \rd, [\rn, #MM_CONTEXT_ID + 4 ] +#else + ldr \rd, [\rn, #MM_CONTEXT_ID] +#endif + .endm + +/* + * mask_asid - mask the ASID from the context ID + */ + .macro asid, rd, rn + and \rd, \rn, #255 + .endm + + .macro crval, clear, mmuset, ucset +#ifdef CONFIG_MMU + .word \clear + .word \mmuset +#else + .word \clear + .word \ucset +#endif + .endm + +/* + * dcache_line_size - get the minimum D-cache line size from the CTR register + * on ARMv7. + */ + .macro dcache_line_size, reg, tmp +#ifdef CONFIG_CPU_V7M + movw \tmp, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_CTR + movt \tmp, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_CTR + ldr \tmp, [\tmp] +#else + mrc p15, 0, \tmp, c0, c0, 1 @ read ctr +#endif + lsr \tmp, \tmp, #16 + and \tmp, \tmp, #0xf @ cache line size encoding + mov \reg, #4 @ bytes per word + mov \reg, \reg, lsl \tmp @ actual cache line size + .endm + +/* + * icache_line_size - get the minimum I-cache line size from the CTR register + * on ARMv7. + */ + .macro icache_line_size, reg, tmp +#ifdef CONFIG_CPU_V7M + movw \tmp, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_CTR + movt \tmp, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_CTR + ldr \tmp, [\tmp] +#else + mrc p15, 0, \tmp, c0, c0, 1 @ read ctr +#endif + and \tmp, \tmp, #0xf @ cache line size encoding + mov \reg, #4 @ bytes per word + mov \reg, \reg, lsl \tmp @ actual cache line size + .endm + +/* + * Sanity check the PTE configuration for the code below - which makes + * certain assumptions about how these bits are laid out. + */ +#ifdef CONFIG_MMU +#if L_PTE_SHARED != PTE_EXT_SHARED +#error PTE shared bit mismatch +#endif +#if !defined (CONFIG_ARM_LPAE) && \ + (L_PTE_XN+L_PTE_USER+L_PTE_RDONLY+L_PTE_DIRTY+L_PTE_YOUNG+\ + L_PTE_PRESENT) > L_PTE_SHARED +#error Invalid Linux PTE bit settings +#endif +#endif /* CONFIG_MMU */ + +/* + * The ARMv6 and ARMv7 set_pte_ext translation function. + * + * Permission translation: + * YUWD APX AP1 AP0 SVC User + * 0xxx 0 0 0 no acc no acc + * 100x 1 0 1 r/o no acc + * 10x0 1 0 1 r/o no acc + * 1011 0 0 1 r/w no acc + * 110x 1 1 1 r/o r/o + * 11x0 1 1 1 r/o r/o + * 1111 0 1 1 r/w r/w + */ + .macro armv6_mt_table pfx +\pfx\()_mt_table: + .long 0x00 @ L_PTE_MT_UNCACHED + .long PTE_EXT_TEX(1) @ L_PTE_MT_BUFFERABLE + .long PTE_CACHEABLE @ L_PTE_MT_WRITETHROUGH + .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEBACK + .long PTE_BUFFERABLE @ L_PTE_MT_DEV_SHARED + .long 0x00 @ unused + .long 0x00 @ L_PTE_MT_MINICACHE (not present) + .long PTE_EXT_TEX(1) | PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEALLOC + .long 0x00 @ unused + .long PTE_EXT_TEX(1) @ L_PTE_MT_DEV_WC + .long 0x00 @ unused + .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_DEV_CACHED + .long PTE_EXT_TEX(2) @ L_PTE_MT_DEV_NONSHARED + .long 0x00 @ unused + .long 0x00 @ unused + .long PTE_CACHEABLE | PTE_BUFFERABLE | PTE_EXT_APX @ L_PTE_MT_VECTORS + .endm + + .macro armv6_set_pte_ext pfx + str r1, [r0], #2048 @ linux version + + bic r3, r1, #0x000003fc + bic r3, r3, #PTE_TYPE_MASK + orr r3, r3, r2 + orr r3, r3, #PTE_EXT_AP0 | 2 + + adr ip, \pfx\()_mt_table + and r2, r1, #L_PTE_MT_MASK + ldr r2, [ip, r2] + + eor r1, r1, #L_PTE_DIRTY + tst r1, #L_PTE_DIRTY|L_PTE_RDONLY + orrne r3, r3, #PTE_EXT_APX + + tst r1, #L_PTE_USER + orrne r3, r3, #PTE_EXT_AP1 + tstne r3, #PTE_EXT_APX + + @ user read-only -> kernel read-only + bicne r3, r3, #PTE_EXT_AP0 + + tst r1, #L_PTE_XN + orrne r3, r3, #PTE_EXT_XN + + eor r3, r3, r2 + + tst r1, #L_PTE_YOUNG + tstne r1, #L_PTE_PRESENT + moveq r3, #0 + tstne r1, #L_PTE_NONE + movne r3, #0 + + str r3, [r0] + mcr p15, 0, r0, c7, c10, 1 @ flush_pte + .endm + + +/* + * The ARMv3, ARMv4 and ARMv5 set_pte_ext translation function, + * covering most CPUs except Xscale and Xscale 3. + * + * Permission translation: + * YUWD AP SVC User + * 0xxx 0x00 no acc no acc + * 100x 0x00 r/o no acc + * 10x0 0x00 r/o no acc + * 1011 0x55 r/w no acc + * 110x 0xaa r/w r/o + * 11x0 0xaa r/w r/o + * 1111 0xff r/w r/w + */ + .macro armv3_set_pte_ext wc_disable=1 + str r1, [r0], #2048 @ linux version + + eor r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY + + bic r2, r1, #PTE_SMALL_AP_MASK @ keep C, B bits + bic r2, r2, #PTE_TYPE_MASK + orr r2, r2, #PTE_TYPE_SMALL + + tst r3, #L_PTE_USER @ user? + orrne r2, r2, #PTE_SMALL_AP_URO_SRW + + tst r3, #L_PTE_RDONLY | L_PTE_DIRTY @ write and dirty? + orreq r2, r2, #PTE_SMALL_AP_UNO_SRW + + tst r3, #L_PTE_PRESENT | L_PTE_YOUNG @ present and young? + movne r2, #0 + + .if \wc_disable +#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + tst r2, #PTE_CACHEABLE + bicne r2, r2, #PTE_BUFFERABLE +#endif + .endif + str r2, [r0] @ hardware version + .endm + + +/* + * Xscale set_pte_ext translation, split into two halves to cope + * with work-arounds. r3 must be preserved by code between these + * two macros. + * + * Permission translation: + * YUWD AP SVC User + * 0xxx 00 no acc no acc + * 100x 00 r/o no acc + * 10x0 00 r/o no acc + * 1011 01 r/w no acc + * 110x 10 r/w r/o + * 11x0 10 r/w r/o + * 1111 11 r/w r/w + */ + .macro xscale_set_pte_ext_prologue + str r1, [r0] @ linux version + + eor r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY + + bic r2, r1, #PTE_SMALL_AP_MASK @ keep C, B bits + orr r2, r2, #PTE_TYPE_EXT @ extended page + + tst r3, #L_PTE_USER @ user? + orrne r2, r2, #PTE_EXT_AP_URO_SRW @ yes -> user r/o, system r/w + + tst r3, #L_PTE_RDONLY | L_PTE_DIRTY @ write and dirty? + orreq r2, r2, #PTE_EXT_AP_UNO_SRW @ yes -> user n/a, system r/w + @ combined with user -> user r/w + .endm + + .macro xscale_set_pte_ext_epilogue + tst r3, #L_PTE_PRESENT | L_PTE_YOUNG @ present and young? + movne r2, #0 @ no -> fault + + str r2, [r0, #2048]! @ hardware version + mov ip, #0 + mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line + mcr p15, 0, ip, c7, c10, 4 @ data write barrier + .endm + +.macro define_processor_functions name:req, dabort:req, pabort:req, nommu=0, suspend=0, bugs=0 +/* + * If we are building for big.Little with branch predictor hardening, + * we need the processor function tables to remain available after boot. + */ +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) + .section ".rodata" +#endif + .type \name\()_processor_functions, #object + .align 2 +ENTRY(\name\()_processor_functions) + .word \dabort + .word \pabort + .word cpu_\name\()_proc_init + .word \bugs + .word cpu_\name\()_proc_fin + .word cpu_\name\()_reset + .word cpu_\name\()_do_idle + .word cpu_\name\()_dcache_clean_area + .word cpu_\name\()_switch_mm + + .if \nommu + .word 0 + .else + .word cpu_\name\()_set_pte_ext + .endif + + .if \suspend + .word cpu_\name\()_suspend_size +#ifdef CONFIG_ARM_CPU_SUSPEND + .word cpu_\name\()_do_suspend + .word cpu_\name\()_do_resume +#else + .word 0 + .word 0 +#endif + .else + .word 0 + .word 0 + .word 0 + .endif + + .size \name\()_processor_functions, . - \name\()_processor_functions +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) + .previous +#endif +.endm + +.macro define_cache_functions name:req + .align 2 + .type \name\()_cache_fns, #object +ENTRY(\name\()_cache_fns) + .long \name\()_flush_icache_all + .long \name\()_flush_kern_cache_all + .long \name\()_flush_kern_cache_louis + .long \name\()_flush_user_cache_all + .long \name\()_flush_user_cache_range + .long \name\()_coherent_kern_range + .long \name\()_coherent_user_range + .long \name\()_flush_kern_dcache_area + .long \name\()_dma_map_area + .long \name\()_dma_unmap_area + .long \name\()_dma_flush_range + .size \name\()_cache_fns, . - \name\()_cache_fns +.endm + +.macro define_tlb_functions name:req, flags_up:req, flags_smp + .type \name\()_tlb_fns, #object + .align 2 +ENTRY(\name\()_tlb_fns) + .long \name\()_flush_user_tlb_range + .long \name\()_flush_kern_tlb_range + .ifnb \flags_smp + ALT_SMP(.long \flags_smp ) + ALT_UP(.long \flags_up ) + .else + .long \flags_up + .endif + .size \name\()_tlb_fns, . - \name\()_tlb_fns +.endm + +.macro globl_equ x, y + .globl \x + .equ \x, \y +.endm + +.macro initfn, func, base + .long \func - \base +.endm + + /* + * Macro to calculate the log2 size for the protection region + * registers. This calculates rd = log2(size) - 1. tmp must + * not be the same register as rd. + */ +.macro pr_sz, rd, size, tmp + mov \tmp, \size, lsr #12 + mov \rd, #11 +1: movs \tmp, \tmp, lsr #1 + addne \rd, \rd, #1 + bne 1b +.endm + + /* + * Macro to generate a protection region register value + * given a pre-masked address, size, and enable bit. + * Corrupts size. + */ +.macro pr_val, dest, addr, size, enable + pr_sz \dest, \size, \size @ calculate log2(size) - 1 + orr \dest, \addr, \dest, lsl #1 @ mask in the region size + orr \dest, \dest, \enable +.endm diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S new file mode 100644 index 0000000000..1645ccaffe --- /dev/null +++ b/arch/arm/mm/proc-mohawk.S @@ -0,0 +1,444 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * linux/arch/arm/mm/proc-mohawk.S: MMU functions for Marvell PJ1 core + * + * PJ1 (codename Mohawk) is a hybrid of the xscale3 and Marvell's own core. + * + * Heavily based on proc-arm926.S and proc-xsc3.S + */ + +#include <linux/linkage.h> +#include <linux/init.h> +#include <linux/pgtable.h> +#include <asm/assembler.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include "proc-macros.S" + +/* + * This is the maximum size of an area which will be flushed. If the + * area is larger than this, then we flush the whole cache. + */ +#define CACHE_DLIMIT 32768 + +/* + * The cache line size of the L1 D cache. + */ +#define CACHE_DLINESIZE 32 + +/* + * cpu_mohawk_proc_init() + */ +ENTRY(cpu_mohawk_proc_init) + ret lr + +/* + * cpu_mohawk_proc_fin() + */ +ENTRY(cpu_mohawk_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1800 @ ...iz........... + bic r0, r0, #0x0006 @ .............ca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + ret lr + +/* + * cpu_mohawk_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + * + * (same as arm926) + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_mohawk_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x0007 @ .............cam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + ret r0 +ENDPROC(cpu_mohawk_reset) + .popsection + +/* + * cpu_mohawk_do_idle() + * + * Called with IRQs disabled + */ + .align 5 +ENTRY(cpu_mohawk_do_idle) + mov r0, #0 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mcr p15, 0, r0, c7, c0, 4 @ wait for interrupt + ret lr + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(mohawk_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + ret lr +ENDPROC(mohawk_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Clean and invalidate all cache entries in a particular + * address space. + */ +ENTRY(mohawk_flush_user_cache_all) + /* FALLTHROUGH */ + +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(mohawk_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: + mcr p15, 0, ip, c7, c14, 0 @ clean & invalidate all D cache + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcrne p15, 0, ip, c7, c10, 0 @ drain write buffer + ret lr + +/* + * flush_user_cache_range(start, end, flags) + * + * Clean and invalidate a range of cache entries in the + * specified address range. + * + * - start - start address (inclusive) + * - end - end address (exclusive) + * - flags - vm_flags describing address space + * + * (same as arm926) + */ +ENTRY(mohawk_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #CACHE_DLIMIT + bgt __flush_whole_cache +1: tst r2, #VM_EXEC + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D entry + mcrne p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c10, 4 @ drain WB + ret lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(mohawk_coherent_kern_range) + /* FALLTHROUGH */ + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start, end. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as arm926) + */ +ENTRY(mohawk_coherent_user_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c5, 1 @ invalidate I entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + mov r0, #0 + ret lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + */ +ENTRY(mohawk_flush_kern_dcache_area) + add r1, r0, r1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +mohawk_dma_inv_range: + tst r0, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHE_DLINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + * + * (same as v4wb) + */ +mohawk_dma_clean_range: + bic r0, r0, #CACHE_DLINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(mohawk_dma_flush_range) + bic r0, r0, #CACHE_DLINESIZE - 1 +1: + mcr p15, 0, r0, c7, c14, 1 @ clean+invalidate D entry + add r0, r0, #CACHE_DLINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(mohawk_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq mohawk_dma_clean_range + bcs mohawk_dma_inv_range + b mohawk_dma_flush_range +ENDPROC(mohawk_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(mohawk_dma_unmap_area) + ret lr +ENDPROC(mohawk_dma_unmap_area) + + .globl mohawk_flush_kern_cache_louis + .equ mohawk_flush_kern_cache_louis, mohawk_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions mohawk + +ENTRY(cpu_mohawk_dcache_clean_area) +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHE_DLINESIZE + subs r1, r1, #CACHE_DLINESIZE + bhi 1b + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr + +/* + * cpu_mohawk_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_mohawk_switch_mm) + mov ip, #0 + mcr p15, 0, ip, c7, c14, 0 @ clean & invalidate all D cache + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c7, c10, 4 @ drain WB + orr r0, r0, #0x18 @ cache the page table in L2 + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + ret lr + +/* + * cpu_mohawk_set_pte_ext(ptep, pte, ext) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_mohawk_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext + mov r0, r0 + mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c10, 4 @ drain WB + ret lr +#endif + +.globl cpu_mohawk_suspend_size +.equ cpu_mohawk_suspend_size, 4 * 6 +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_mohawk_do_suspend) + stmfd sp!, {r4 - r9, lr} + mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode + mrc p15, 0, r5, c15, c1, 0 @ CP access reg + mrc p15, 0, r6, c13, c0, 0 @ PID + mrc p15, 0, r7, c3, c0, 0 @ domain ID + mrc p15, 0, r8, c1, c0, 1 @ auxiliary control reg + mrc p15, 0, r9, c1, c0, 0 @ control reg + bic r4, r4, #2 @ clear frequency change bit + stmia r0, {r4 - r9} @ store cp regs + ldmia sp!, {r4 - r9, pc} +ENDPROC(cpu_mohawk_do_suspend) + +ENTRY(cpu_mohawk_do_resume) + ldmia r0, {r4 - r9} @ load cp regs + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I & D caches, BTB + mcr p15, 0, ip, c7, c10, 4 @ drain write (&fill) buffer + mcr p15, 0, ip, c7, c5, 4 @ flush prefetch buffer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mcr p14, 0, r4, c6, c0, 0 @ clock configuration, turbo mode. + mcr p15, 0, r5, c15, c1, 0 @ CP access reg + mcr p15, 0, r6, c13, c0, 0 @ PID + mcr p15, 0, r7, c3, c0, 0 @ domain ID + orr r1, r1, #0x18 @ cache the page table in L2 + mcr p15, 0, r1, c2, c0, 0 @ translation table base addr + mcr p15, 0, r8, c1, c0, 1 @ auxiliary control reg + mov r0, r9 @ control register + b cpu_resume_mmu +ENDPROC(cpu_mohawk_do_resume) +#endif + + .type __mohawk_setup, #function +__mohawk_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs + orr r4, r4, #0x18 @ cache the page table in L2 + mcr p15, 0, r4, c2, c0, 0 @ load page table pointer + + mov r0, #0 @ don't allow CP access + mcr p15, 0, r0, c15, c1, 0 @ write CP access register + + adr r5, mohawk_crval + ldmia r5, {r5, r6} + mrc p15, 0, r0, c1, c0 @ get control register + bic r0, r0, r5 + orr r0, r0, r6 + ret lr + + .size __mohawk_setup, . - __mohawk_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * .011 1001 ..00 0101 + * + */ + .type mohawk_crval, #object +mohawk_crval: + crval clear=0x00007f3f, mmuset=0x00003905, ucset=0x00001134 + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions mohawk, dabort=v5t_early_abort, pabort=legacy_pabort + + .section ".rodata" + + string cpu_arch_name, "armv5te" + string cpu_elf_name, "v5" + string cpu_mohawk_name, "Marvell 88SV331x" + + .align + + .section ".proc.info.init", "a" + + .type __88sv331x_proc_info,#object +__88sv331x_proc_info: + .long 0x56158000 @ Marvell 88SV331x (MOHAWK) + .long 0xfffff000 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_BIT4 | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + initfn __mohawk_setup, __88sv331x_proc_info + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP + .long cpu_mohawk_name + .long mohawk_processor_functions + .long v4wbi_tlb_fns + .long v4wb_user_fns + .long mohawk_cache_fns + .size __88sv331x_proc_info, . - __88sv331x_proc_info diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S new file mode 100644 index 0000000000..4071f7a61c --- /dev/null +++ b/arch/arm/mm/proc-sa110.S @@ -0,0 +1,222 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/mm/proc-sa110.S + * + * Copyright (C) 1997-2002 Russell King + * hacked for non-paged-MM by Hyok S. Choi, 2003. + * + * MMU functions for SA110 + * + * These are the low level assembler for performing cache and TLB + * functions on the StrongARM-110. + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <linux/pgtable.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <mach/hardware.h> +#include <asm/pgtable-hwdef.h> +#include <asm/ptrace.h> + +#include "proc-macros.S" + +/* + * the cache line size of the I and D cache + */ +#define DCACHELINESIZE 32 + + .text + +/* + * cpu_sa110_proc_init() + */ +ENTRY(cpu_sa110_proc_init) + mov r0, #0 + mcr p15, 0, r0, c15, c1, 2 @ Enable clock switching + ret lr + +/* + * cpu_sa110_proc_fin() + */ +ENTRY(cpu_sa110_proc_fin) + mov r0, #0 + mcr p15, 0, r0, c15, c2, 2 @ Disable clock switching + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + ret lr + +/* + * cpu_sa110_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_sa110_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + ret r0 +ENDPROC(cpu_sa110_reset) + .popsection + +/* + * cpu_sa110_do_idle(type) + * + * Cause the processor to idle + * + * type: call type: + * 0 = slow idle + * 1 = fast idle + * 2 = switch to slow processor clock + * 3 = switch to fast processor clock + */ + .align 5 + +ENTRY(cpu_sa110_do_idle) + mcr p15, 0, ip, c15, c2, 2 @ disable clock switching + ldr r1, =UNCACHEABLE_ADDR @ load from uncacheable loc + ldr r1, [r1, #0] @ force switch to MCLK + mov r0, r0 @ safety + mov r0, r0 @ safety + mov r0, r0 @ safety + mcr p15, 0, r0, c15, c8, 2 @ Wait for interrupt, cache aligned + mov r0, r0 @ safety + mov r0, r0 @ safety + mov r0, r0 @ safety + mcr p15, 0, r0, c15, c1, 2 @ enable clock switching + ret lr + +/* ================================= CACHE ================================ */ + +/* + * cpu_sa110_dcache_clean_area(addr,sz) + * + * Clean the specified entry of any caches such that the MMU + * translation fetches will obtain correct data. + * + * addr: cache-unaligned virtual address + */ + .align 5 +ENTRY(cpu_sa110_dcache_clean_area) +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #DCACHELINESIZE + subs r1, r1, #DCACHELINESIZE + bhi 1b + ret lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_sa110_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_sa110_switch_mm) +#ifdef CONFIG_MMU + str lr, [sp, #-4]! + bl v4wb_flush_kern_cache_all @ clears IP + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + ldr pc, [sp], #4 +#else + ret lr +#endif + +/* + * cpu_sa110_set_pte_ext(ptep, pte, ext) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_sa110_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext wc_disable=0 + mov r0, r0 + mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif + ret lr + + .type __sa110_setup, #function +__sa110_setup: + mov r10, #0 + mcr p15, 0, r10, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r10, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU + mcr p15, 0, r10, c8, c7 @ invalidate I,D TLBs on v4 +#endif + + adr r5, sa110_crval + ldmia r5, {r5, r6} + mrc p15, 0, r0, c1, c0 @ get control register v4 + bic r0, r0, r5 + orr r0, r0, r6 + ret lr + .size __sa110_setup, . - __sa110_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * ..01 0001 ..11 1101 + * + */ + .type sa110_crval, #object +sa110_crval: + crval clear=0x00003f3f, mmuset=0x0000113d, ucset=0x00001130 + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions sa110, dabort=v4_early_abort, pabort=legacy_pabort + + .section ".rodata" + + string cpu_arch_name, "armv4" + string cpu_elf_name, "v4" + string cpu_sa110_name, "StrongARM-110" + + .align + + .section ".proc.info.init", "a" + + .type __sa110_proc_info,#object +__sa110_proc_info: + .long 0x4401a100 + .long 0xfffffff0 + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + initfn __sa110_setup, __sa110_proc_info + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT | HWCAP_FAST_MULT + .long cpu_sa110_name + .long sa110_processor_functions + .long v4wb_tlb_fns + .long v4wb_user_fns + .long v4wb_cache_fns + .size __sa110_proc_info, . - __sa110_proc_info diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S new file mode 100644 index 0000000000..e723bd4119 --- /dev/null +++ b/arch/arm/mm/proc-sa1100.S @@ -0,0 +1,270 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/mm/proc-sa1100.S + * + * Copyright (C) 1997-2002 Russell King + * hacked for non-paged-MM by Hyok S. Choi, 2003. + * + * MMU functions for SA110 + * + * These are the low level assembler for performing cache and TLB + * functions on the StrongARM-1100 and StrongARM-1110. + * + * Note that SA1100 and SA1110 share everything but their name and CPU ID. + * + * 12-jun-2000, Erik Mouw (J.A.K.Mouw@its.tudelft.nl): + * Flush the read buffer at context switches + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <linux/pgtable.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <mach/hardware.h> +#include <asm/pgtable-hwdef.h> + +#include "proc-macros.S" + +/* + * the cache line size of the I and D cache + */ +#define DCACHELINESIZE 32 + + .section .text + +/* + * cpu_sa1100_proc_init() + */ +ENTRY(cpu_sa1100_proc_init) + mov r0, #0 + mcr p15, 0, r0, c15, c1, 2 @ Enable clock switching + mcr p15, 0, r0, c9, c0, 5 @ Allow read-buffer operations from userland + ret lr + +/* + * cpu_sa1100_proc_fin() + * + * Prepare the CPU for reset: + * - Disable interrupts + * - Clean and turn off caches. + */ +ENTRY(cpu_sa1100_proc_fin) + mcr p15, 0, ip, c15, c2, 2 @ Disable clock switching + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x000e @ ............wca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + ret lr + +/* + * cpu_sa1100_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_sa1100_reset) + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches + mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs +#endif + mrc p15, 0, ip, c1, c0, 0 @ ctrl register + bic ip, ip, #0x000f @ ............wcam + bic ip, ip, #0x1100 @ ...i...s........ + mcr p15, 0, ip, c1, c0, 0 @ ctrl register + ret r0 +ENDPROC(cpu_sa1100_reset) + .popsection + +/* + * cpu_sa1100_do_idle(type) + * + * Cause the processor to idle + * + * type: call type: + * 0 = slow idle + * 1 = fast idle + * 2 = switch to slow processor clock + * 3 = switch to fast processor clock + */ + .align 5 +ENTRY(cpu_sa1100_do_idle) + mov r0, r0 @ 4 nop padding + mov r0, r0 + mov r0, r0 + mov r0, r0 @ 4 nop padding + mov r0, r0 + mov r0, r0 + mov r0, #0 + ldr r1, =UNCACHEABLE_ADDR @ ptr to uncacheable address + @ --- aligned to a cache line + mcr p15, 0, r0, c15, c2, 2 @ disable clock switching + ldr r1, [r1, #0] @ force switch to MCLK + mcr p15, 0, r0, c15, c8, 2 @ wait for interrupt + mov r0, r0 @ safety + mcr p15, 0, r0, c15, c1, 2 @ enable clock switching + ret lr + +/* ================================= CACHE ================================ */ + +/* + * cpu_sa1100_dcache_clean_area(addr,sz) + * + * Clean the specified entry of any caches such that the MMU + * translation fetches will obtain correct data. + * + * addr: cache-unaligned virtual address + */ + .align 5 +ENTRY(cpu_sa1100_dcache_clean_area) +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #DCACHELINESIZE + subs r1, r1, #DCACHELINESIZE + bhi 1b + ret lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_sa1100_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_sa1100_switch_mm) +#ifdef CONFIG_MMU + str lr, [sp, #-4]! + bl v4wb_flush_kern_cache_all @ clears IP + mcr p15, 0, ip, c9, c0, 0 @ invalidate RB + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + ldr pc, [sp], #4 +#else + ret lr +#endif + +/* + * cpu_sa1100_set_pte_ext(ptep, pte, ext) + * + * Set a PTE and flush it out + */ + .align 5 +ENTRY(cpu_sa1100_set_pte_ext) +#ifdef CONFIG_MMU + armv3_set_pte_ext wc_disable=0 + mov r0, r0 + mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c10, 4 @ drain WB +#endif + ret lr + +.globl cpu_sa1100_suspend_size +.equ cpu_sa1100_suspend_size, 4 * 3 +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_sa1100_do_suspend) + stmfd sp!, {r4 - r6, lr} + mrc p15, 0, r4, c3, c0, 0 @ domain ID + mrc p15, 0, r5, c13, c0, 0 @ PID + mrc p15, 0, r6, c1, c0, 0 @ control reg + stmia r0, {r4 - r6} @ store cp regs + ldmfd sp!, {r4 - r6, pc} +ENDPROC(cpu_sa1100_do_suspend) + +ENTRY(cpu_sa1100_do_resume) + ldmia r0, {r4 - r6} @ load cp regs + mov ip, #0 + mcr p15, 0, ip, c8, c7, 0 @ flush I+D TLBs + mcr p15, 0, ip, c7, c7, 0 @ flush I&D cache + mcr p15, 0, ip, c9, c0, 0 @ invalidate RB + mcr p15, 0, ip, c9, c0, 5 @ allow user space to use RB + + mcr p15, 0, r4, c3, c0, 0 @ domain ID + mcr p15, 0, r1, c2, c0, 0 @ translation table base addr + mcr p15, 0, r5, c13, c0, 0 @ PID + mov r0, r6 @ control register + b cpu_resume_mmu +ENDPROC(cpu_sa1100_do_resume) +#endif + + .type __sa1100_setup, #function +__sa1100_setup: + mov r0, #0 + mcr p15, 0, r0, c7, c7 @ invalidate I,D caches on v4 + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer on v4 +#ifdef CONFIG_MMU + mcr p15, 0, r0, c8, c7 @ invalidate I,D TLBs on v4 +#endif + adr r5, sa1100_crval + ldmia r5, {r5, r6} + mrc p15, 0, r0, c1, c0 @ get control register v4 + bic r0, r0, r5 + orr r0, r0, r6 + ret lr + .size __sa1100_setup, . - __sa1100_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * ..11 0001 ..11 1101 + * + */ + .type sa1100_crval, #object +sa1100_crval: + crval clear=0x00003f3f, mmuset=0x0000313d, ucset=0x00001130 + + __INITDATA + +/* + * SA1100 and SA1110 share the same function calls + */ + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions sa1100, dabort=v4_early_abort, pabort=legacy_pabort, suspend=1 + + .section ".rodata" + + string cpu_arch_name, "armv4" + string cpu_elf_name, "v4" + string cpu_sa1100_name, "StrongARM-1100" + string cpu_sa1110_name, "StrongARM-1110" + + .align + + .section ".proc.info.init", "a" + +.macro sa1100_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req + .type __\name\()_proc_info,#object +__\name\()_proc_info: + .long \cpu_val + .long \cpu_mask + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + initfn __sa1100_setup, __\name\()_proc_info + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT | HWCAP_FAST_MULT + .long \cpu_name + .long sa1100_processor_functions + .long v4wb_tlb_fns + .long v4_mc_user_fns + .long v4wb_cache_fns + .size __\name\()_proc_info, . - __\name\()_proc_info +.endm + + sa1100_proc_info sa1100, 0x4401a110, 0xfffffff0, cpu_sa1100_name + sa1100_proc_info sa1110, 0x6901b110, 0xfffffff0, cpu_sa1110_name diff --git a/arch/arm/mm/proc-syms.c b/arch/arm/mm/proc-syms.c new file mode 100644 index 0000000000..e21249548e --- /dev/null +++ b/arch/arm/mm/proc-syms.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * linux/arch/arm/mm/proc-syms.c + * + * Copyright (C) 2000-2002 Russell King + */ +#include <linux/module.h> +#include <linux/mm.h> + +#include <asm/cacheflush.h> +#include <asm/proc-fns.h> +#include <asm/tlbflush.h> +#include <asm/page.h> + +#ifndef MULTI_CPU +EXPORT_SYMBOL(cpu_dcache_clean_area); +#ifdef CONFIG_MMU +EXPORT_SYMBOL(cpu_set_pte_ext); +#endif +#else +EXPORT_SYMBOL(processor); +#endif + +#ifndef MULTI_CACHE +EXPORT_SYMBOL(__cpuc_flush_kern_all); +EXPORT_SYMBOL(__cpuc_flush_user_all); +EXPORT_SYMBOL(__cpuc_flush_user_range); +EXPORT_SYMBOL(__cpuc_coherent_kern_range); +EXPORT_SYMBOL(__cpuc_flush_dcache_area); +#else +EXPORT_SYMBOL(cpu_cache); +#endif + +#ifdef CONFIG_MMU +#ifndef MULTI_USER +EXPORT_SYMBOL(__cpu_clear_user_highpage); +EXPORT_SYMBOL(__cpu_copy_user_highpage); +#else +EXPORT_SYMBOL(cpu_user); +#endif +#endif + +/* + * No module should need to touch the TLB (and currently + * no modules do. We export this for "loadkernel" support + * (booting a new kernel from within a running kernel.) + */ +#ifdef MULTI_TLB +EXPORT_SYMBOL(cpu_tlb); +#endif diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S new file mode 100644 index 0000000000..203dff89ab --- /dev/null +++ b/arch/arm/mm/proc-v6.S @@ -0,0 +1,299 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/mm/proc-v6.S + * + * Copyright (C) 2001 Deep Blue Solutions Ltd. + * Modified by Catalin Marinas for noMMU support + * + * This is the "shell" of the ARMv6 processor support. + */ +#include <linux/init.h> +#include <linux/linkage.h> +#include <linux/pgtable.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> + +#include "proc-macros.S" + +#define D_CACHE_LINE_SIZE 32 + +#define TTB_C (1 << 0) +#define TTB_S (1 << 1) +#define TTB_IMP (1 << 2) +#define TTB_RGN_NC (0 << 3) +#define TTB_RGN_WBWA (1 << 3) +#define TTB_RGN_WT (2 << 3) +#define TTB_RGN_WB (3 << 3) + +#define TTB_FLAGS_UP TTB_RGN_WBWA +#define PMD_FLAGS_UP PMD_SECT_WB +#define TTB_FLAGS_SMP TTB_RGN_WBWA|TTB_S +#define PMD_FLAGS_SMP PMD_SECT_WBWA|PMD_SECT_S + +.arch armv6 + +ENTRY(cpu_v6_proc_init) + ret lr + +ENTRY(cpu_v6_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x0006 @ .............ca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + ret lr + +/* + * cpu_v6_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * - loc - location to jump to for soft reset + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_v6_reset) + mrc p15, 0, r1, c1, c0, 0 @ ctrl register + bic r1, r1, #0x1 @ ...............m + mcr p15, 0, r1, c1, c0, 0 @ disable MMU + mov r1, #0 + mcr p15, 0, r1, c7, c5, 4 @ ISB + ret r0 +ENDPROC(cpu_v6_reset) + .popsection + +/* + * cpu_v6_do_idle() + * + * Idle the processor (eg, wait for interrupt). + * + * IRQs are already disabled. + */ +ENTRY(cpu_v6_do_idle) + mov r1, #0 + mcr p15, 0, r1, c7, c10, 4 @ DWB - WFI may enter a low-power mode + mcr p15, 0, r1, c7, c0, 4 @ wait for interrupt + ret lr + +ENTRY(cpu_v6_dcache_clean_area) +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #D_CACHE_LINE_SIZE + subs r1, r1, #D_CACHE_LINE_SIZE + bhi 1b + ret lr + +/* + * cpu_v6_switch_mm(pgd_phys, tsk) + * + * Set the translation table base pointer to be pgd_phys + * + * - pgd_phys - physical address of new TTB + * + * It is assumed that: + * - we are not using split page tables + */ +ENTRY(cpu_v6_switch_mm) +#ifdef CONFIG_MMU + mov r2, #0 + mmid r1, r1 @ get mm->context.id + ALT_SMP(orr r0, r0, #TTB_FLAGS_SMP) + ALT_UP(orr r0, r0, #TTB_FLAGS_UP) + mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB + mcr p15, 0, r2, c7, c10, 4 @ drain write buffer + mcr p15, 0, r0, c2, c0, 0 @ set TTB 0 +#ifdef CONFIG_PID_IN_CONTEXTIDR + mrc p15, 0, r2, c13, c0, 1 @ read current context ID + bic r2, r2, #0xff @ extract the PID + and r1, r1, #0xff + orr r1, r1, r2 @ insert into new context ID +#endif + mcr p15, 0, r1, c13, c0, 1 @ set context ID +#endif + ret lr + +/* + * cpu_v6_set_pte_ext(ptep, pte, ext) + * + * Set a level 2 translation table entry. + * + * - ptep - pointer to level 2 translation table entry + * (hardware version is stored at -1024 bytes) + * - pte - PTE value to store + * - ext - value for extended PTE bits + */ + armv6_mt_table cpu_v6 + +ENTRY(cpu_v6_set_pte_ext) +#ifdef CONFIG_MMU + armv6_set_pte_ext cpu_v6 +#endif + ret lr + +/* Suspend/resume support: taken from arch/arm/mach-s3c64xx/sleep.S */ +.globl cpu_v6_suspend_size +.equ cpu_v6_suspend_size, 4 * 6 +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_v6_do_suspend) + stmfd sp!, {r4 - r9, lr} + mrc p15, 0, r4, c13, c0, 0 @ FCSE/PID +#ifdef CONFIG_MMU + mrc p15, 0, r5, c3, c0, 0 @ Domain ID + mrc p15, 0, r6, c2, c0, 1 @ Translation table base 1 +#endif + mrc p15, 0, r7, c1, c0, 1 @ auxiliary control register + mrc p15, 0, r8, c1, c0, 2 @ co-processor access control + mrc p15, 0, r9, c1, c0, 0 @ control register + stmia r0, {r4 - r9} + ldmfd sp!, {r4- r9, pc} +ENDPROC(cpu_v6_do_suspend) + +ENTRY(cpu_v6_do_resume) + mov ip, #0 + mcr p15, 0, ip, c7, c14, 0 @ clean+invalidate D cache + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c7, c15, 0 @ clean+invalidate cache + mcr p15, 0, ip, c7, c10, 4 @ drain write buffer + mcr p15, 0, ip, c13, c0, 1 @ set reserved context ID + ldmia r0, {r4 - r9} + mcr p15, 0, r4, c13, c0, 0 @ FCSE/PID +#ifdef CONFIG_MMU + mcr p15, 0, r5, c3, c0, 0 @ Domain ID + ALT_SMP(orr r1, r1, #TTB_FLAGS_SMP) + ALT_UP(orr r1, r1, #TTB_FLAGS_UP) + mcr p15, 0, r1, c2, c0, 0 @ Translation table base 0 + mcr p15, 0, r6, c2, c0, 1 @ Translation table base 1 + mcr p15, 0, ip, c2, c0, 2 @ TTB control register +#endif + mcr p15, 0, r7, c1, c0, 1 @ auxiliary control register + mcr p15, 0, r8, c1, c0, 2 @ co-processor access control + mcr p15, 0, ip, c7, c5, 4 @ ISB + mov r0, r9 @ control register + b cpu_resume_mmu +ENDPROC(cpu_v6_do_resume) +#endif + + string cpu_v6_name, "ARMv6-compatible processor" + + .align + +/* + * __v6_setup + * + * Initialise TLB, Caches, and MMU state ready to switch the MMU + * on. Return in r0 the new CP15 C1 control register setting. + * + * We automatically detect if we have a Harvard cache, and use the + * Harvard cache control instructions insead of the unified cache + * control instructions. + * + * This should be able to cover all ARMv6 cores. + * + * It is assumed that: + * - cache type register is implemented + */ +__v6_setup: +#ifdef CONFIG_SMP + ALT_SMP(mrc p15, 0, r0, c1, c0, 1) @ Enable SMP/nAMP mode + ALT_UP(nop) + orr r0, r0, #0x20 + ALT_SMP(mcr p15, 0, r0, c1, c0, 1) + ALT_UP(nop) +#endif + + mov r0, #0 + mcr p15, 0, r0, c7, c14, 0 @ clean+invalidate D cache + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + mcr p15, 0, r0, c7, c15, 0 @ clean+invalidate cache +#ifdef CONFIG_MMU + mcr p15, 0, r0, c8, c7, 0 @ invalidate I + D TLBs + mcr p15, 0, r0, c2, c0, 2 @ TTB control register + ALT_SMP(orr r4, r4, #TTB_FLAGS_SMP) + ALT_UP(orr r4, r4, #TTB_FLAGS_UP) + ALT_SMP(orr r8, r8, #TTB_FLAGS_SMP) + ALT_UP(orr r8, r8, #TTB_FLAGS_UP) + mcr p15, 0, r8, c2, c0, 1 @ load TTB1 +#endif /* CONFIG_MMU */ + mcr p15, 0, r0, c7, c10, 4 @ drain write buffer and + @ complete invalidations + adr r5, v6_crval + ldmia r5, {r5, r6} + ARM_BE8(orr r6, r6, #1 << 25) @ big-endian page tables + mrc p15, 0, r0, c1, c0, 0 @ read control register + bic r0, r0, r5 @ clear bits them + orr r0, r0, r6 @ set them +#ifdef CONFIG_ARM_ERRATA_364296 + /* + * Workaround for the 364296 ARM1136 r0p2 erratum (possible cache data + * corruption with hit-under-miss enabled). The conditional code below + * (setting the undocumented bit 31 in the auxiliary control register + * and the FI bit in the control register) disables hit-under-miss + * without putting the processor into full low interrupt latency mode. + */ + ldr r6, =0x4107b362 @ id for ARM1136 r0p2 + mrc p15, 0, r5, c0, c0, 0 @ get processor id + teq r5, r6 @ check for the faulty core + mrceq p15, 0, r5, c1, c0, 1 @ load aux control reg + orreq r5, r5, #(1 << 31) @ set the undocumented bit 31 + mcreq p15, 0, r5, c1, c0, 1 @ write aux control reg + orreq r0, r0, #(1 << 21) @ low interrupt latency configuration +#endif + ret lr @ return to head.S:__ret + + /* + * V X F I D LR + * .... ...E PUI. .T.T 4RVI ZFRS BLDP WCAM + * rrrr rrrx xxx0 0101 xxxx xxxx x111 xxxx < forced + * 0 110 0011 1.00 .111 1101 < we want + */ + .type v6_crval, #object +v6_crval: + crval clear=0x01e0fb7f, mmuset=0x00c0387d, ucset=0x00c0187c + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions v6, dabort=v6_early_abort, pabort=v6_pabort, suspend=1 + + .section ".rodata" + + string cpu_arch_name, "armv6" + string cpu_elf_name, "v6" + .align + + .section ".proc.info.init", "a" + + /* + * Match any ARMv6 processor core. + */ + .type __v6_proc_info, #object +__v6_proc_info: + .long 0x0007b000 + .long 0x0007f000 + ALT_SMP(.long \ + PMD_TYPE_SECT | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ | \ + PMD_FLAGS_SMP) + ALT_UP(.long \ + PMD_TYPE_SECT | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ | \ + PMD_FLAGS_UP) + .long PMD_TYPE_SECT | \ + PMD_SECT_XN | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + initfn __v6_setup, __v6_proc_info + .long cpu_arch_name + .long cpu_elf_name + /* See also feat_v6_fixup() for HWCAP_TLS */ + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA|HWCAP_TLS + .long cpu_v6_name + .long v6_processor_functions + .long v6wbi_tlb_fns + .long v6_user_fns + .long v6_cache_fns + .size __v6_proc_info, . - __v6_proc_info diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S new file mode 100644 index 0000000000..0a3083ad19 --- /dev/null +++ b/arch/arm/mm/proc-v7-2level.S @@ -0,0 +1,164 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * arch/arm/mm/proc-v7-2level.S + * + * Copyright (C) 2001 Deep Blue Solutions Ltd. + */ + +#define TTB_S (1 << 1) +#define TTB_RGN_NC (0 << 3) +#define TTB_RGN_OC_WBWA (1 << 3) +#define TTB_RGN_OC_WT (2 << 3) +#define TTB_RGN_OC_WB (3 << 3) +#define TTB_NOS (1 << 5) +#define TTB_IRGN_NC ((0 << 0) | (0 << 6)) +#define TTB_IRGN_WBWA ((0 << 0) | (1 << 6)) +#define TTB_IRGN_WT ((1 << 0) | (0 << 6)) +#define TTB_IRGN_WB ((1 << 0) | (1 << 6)) + +/* PTWs cacheable, inner WB not shareable, outer WB not shareable */ +#define TTB_FLAGS_UP TTB_IRGN_WB|TTB_RGN_OC_WB +#define PMD_FLAGS_UP PMD_SECT_WB + +/* PTWs cacheable, inner WBWA shareable, outer WBWA not shareable */ +#define TTB_FLAGS_SMP TTB_IRGN_WBWA|TTB_S|TTB_NOS|TTB_RGN_OC_WBWA +#define PMD_FLAGS_SMP PMD_SECT_WBWA|PMD_SECT_S + +.arch armv7-a + +/* + * cpu_v7_switch_mm(pgd_phys, tsk) + * + * Set the translation table base pointer to be pgd_phys + * + * - pgd_phys - physical address of new TTB + * + * It is assumed that: + * - we are not using split page tables + * + * Note that we always need to flush BTAC/BTB if IBE is set + * even on Cortex-A8 revisions not affected by 430973. + * If IBE is not set, the flush BTAC/BTB won't do anything. + */ +ENTRY(cpu_v7_switch_mm) +#ifdef CONFIG_MMU + mmid r1, r1 @ get mm->context.id + ALT_SMP(orr r0, r0, #TTB_FLAGS_SMP) + ALT_UP(orr r0, r0, #TTB_FLAGS_UP) +#ifdef CONFIG_PID_IN_CONTEXTIDR + mrc p15, 0, r2, c13, c0, 1 @ read current context ID + lsr r2, r2, #8 @ extract the PID + bfi r1, r2, #8, #24 @ insert into new context ID +#endif +#ifdef CONFIG_ARM_ERRATA_754322 + dsb +#endif + mcr p15, 0, r1, c13, c0, 1 @ set context ID + isb + mcr p15, 0, r0, c2, c0, 0 @ set TTB 0 + isb +#endif + bx lr +ENDPROC(cpu_v7_switch_mm) + +/* + * cpu_v7_set_pte_ext(ptep, pte) + * + * Set a level 2 translation table entry. + * + * - ptep - pointer to level 2 translation table entry + * (hardware version is stored at +2048 bytes) + * - pte - PTE value to store + * - ext - value for extended PTE bits + */ +ENTRY(cpu_v7_set_pte_ext) +#ifdef CONFIG_MMU + str r1, [r0] @ linux version + + bic r3, r1, #0x000003f0 + bic r3, r3, #PTE_TYPE_MASK + orr r3, r3, r2 + orr r3, r3, #PTE_EXT_AP0 | 2 + + tst r1, #1 << 4 + orrne r3, r3, #PTE_EXT_TEX(1) + + eor r1, r1, #L_PTE_DIRTY + tst r1, #L_PTE_RDONLY | L_PTE_DIRTY + orrne r3, r3, #PTE_EXT_APX + + tst r1, #L_PTE_USER + orrne r3, r3, #PTE_EXT_AP1 + + tst r1, #L_PTE_XN + orrne r3, r3, #PTE_EXT_XN + + tst r1, #L_PTE_YOUNG + tstne r1, #L_PTE_VALID + eorne r1, r1, #L_PTE_NONE + tstne r1, #L_PTE_NONE + moveq r3, #0 + + ARM( str r3, [r0, #2048]! ) + THUMB( add r0, r0, #2048 ) + THUMB( str r3, [r0] ) + ALT_SMP(W(nop)) + ALT_UP (mcr p15, 0, r0, c7, c10, 1) @ flush_pte +#endif + bx lr +ENDPROC(cpu_v7_set_pte_ext) + + /* + * Memory region attributes with SCTLR.TRE=1 + * + * n = TEX[0],C,B + * TR = PRRR[2n+1:2n] - memory type + * IR = NMRR[2n+1:2n] - inner cacheable property + * OR = NMRR[2n+17:2n+16] - outer cacheable property + * + * n TR IR OR + * UNCACHED 000 00 + * BUFFERABLE 001 10 00 00 + * WRITETHROUGH 010 10 10 10 + * WRITEBACK 011 10 11 11 + * reserved 110 + * WRITEALLOC 111 10 01 01 + * DEV_SHARED 100 01 + * DEV_NONSHARED 100 01 + * DEV_WC 001 10 + * DEV_CACHED 011 10 + * + * Other attributes: + * + * DS0 = PRRR[16] = 0 - device shareable property + * DS1 = PRRR[17] = 1 - device shareable property + * NS0 = PRRR[18] = 0 - normal shareable property + * NS1 = PRRR[19] = 1 - normal shareable property + * NOS = PRRR[24+n] = 1 - not outer shareable + */ +.equ PRRR, 0xff0a81a8 +.equ NMRR, 0x40e040e0 + + /* + * Macro for setting up the TTBRx and TTBCR registers. + * - \ttb0 and \ttb1 updated with the corresponding flags. + */ + .macro v7_ttb_setup, zero, ttbr0l, ttbr0h, ttbr1, tmp + mcr p15, 0, \zero, c2, c0, 2 @ TTB control register + ALT_SMP(orr \ttbr0l, \ttbr0l, #TTB_FLAGS_SMP) + ALT_UP(orr \ttbr0l, \ttbr0l, #TTB_FLAGS_UP) + ALT_SMP(orr \ttbr1, \ttbr1, #TTB_FLAGS_SMP) + ALT_UP(orr \ttbr1, \ttbr1, #TTB_FLAGS_UP) + mcr p15, 0, \ttbr1, c2, c0, 1 @ load TTB1 + .endm + + /* AT + * TFR EV X F I D LR S + * .EEE ..EE PUI. .T.T 4RVI ZWRS BLDP WCAM + * rxxx rrxx xxx0 0101 xxxx xxxx x111 xxxx < forced + * 01 0 110 0011 1100 .111 1101 < we want + */ + .align 2 + .type v7_crval, #object +v7_crval: + crval clear=0x2120c302, mmuset=0x10c03c7d, ucset=0x00c01c7c diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S new file mode 100644 index 0000000000..131984462d --- /dev/null +++ b/arch/arm/mm/proc-v7-3level.S @@ -0,0 +1,148 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * arch/arm/mm/proc-v7-3level.S + * + * Copyright (C) 2001 Deep Blue Solutions Ltd. + * Copyright (C) 2011 ARM Ltd. + * Author: Catalin Marinas <catalin.marinas@arm.com> + * based on arch/arm/mm/proc-v7-2level.S + */ +#include <asm/assembler.h> + +#define TTB_IRGN_NC (0 << 8) +#define TTB_IRGN_WBWA (1 << 8) +#define TTB_IRGN_WT (2 << 8) +#define TTB_IRGN_WB (3 << 8) +#define TTB_RGN_NC (0 << 10) +#define TTB_RGN_OC_WBWA (1 << 10) +#define TTB_RGN_OC_WT (2 << 10) +#define TTB_RGN_OC_WB (3 << 10) +#define TTB_S (3 << 12) +#define TTB_EAE (1 << 31) + +/* PTWs cacheable, inner WB not shareable, outer WB not shareable */ +#define TTB_FLAGS_UP (TTB_IRGN_WB|TTB_RGN_OC_WB) +#define PMD_FLAGS_UP (PMD_SECT_WB) + +/* PTWs cacheable, inner WBWA shareable, outer WBWA not shareable */ +#define TTB_FLAGS_SMP (TTB_IRGN_WBWA|TTB_S|TTB_RGN_OC_WBWA) +#define PMD_FLAGS_SMP (PMD_SECT_WBWA|PMD_SECT_S) + +#ifndef __ARMEB__ +# define rpgdl r0 +# define rpgdh r1 +#else +# define rpgdl r1 +# define rpgdh r0 +#endif + +/* + * cpu_v7_switch_mm(pgd_phys, tsk) + * + * Set the translation table base pointer to be pgd_phys (physical address of + * the new TTB). + */ +ENTRY(cpu_v7_switch_mm) +#ifdef CONFIG_MMU + mmid r2, r2 + asid r2, r2 + orr rpgdh, rpgdh, r2, lsl #(48 - 32) @ upper 32-bits of pgd + mcrr p15, 0, rpgdl, rpgdh, c2 @ set TTB 0 + isb +#endif + ret lr +ENDPROC(cpu_v7_switch_mm) + +#ifdef __ARMEB__ +#define rl r3 +#define rh r2 +#else +#define rl r2 +#define rh r3 +#endif + +/* + * cpu_v7_set_pte_ext(ptep, pte) + * + * Set a level 2 translation table entry. + * - ptep - pointer to level 3 translation table entry + * - pte - PTE value to store (64-bit in r2 and r3) + */ +ENTRY(cpu_v7_set_pte_ext) +#ifdef CONFIG_MMU + tst rl, #L_PTE_VALID + beq 1f + tst rh, #1 << (57 - 32) @ L_PTE_NONE + bicne rl, #L_PTE_VALID + bne 1f + + eor ip, rh, #1 << (55 - 32) @ toggle L_PTE_DIRTY in temp reg to + @ test for !L_PTE_DIRTY || L_PTE_RDONLY + tst ip, #1 << (55 - 32) | 1 << (58 - 32) + orrne rl, #PTE_AP2 + biceq rl, #PTE_AP2 + +1: strd r2, r3, [r0] + ALT_SMP(W(nop)) + ALT_UP (mcr p15, 0, r0, c7, c10, 1) @ flush_pte +#endif + ret lr +ENDPROC(cpu_v7_set_pte_ext) + + /* + * Memory region attributes for LPAE (defined in pgtable-3level.h): + * + * n = AttrIndx[2:0] + * + * n MAIR + * UNCACHED 000 00000000 + * BUFFERABLE 001 01000100 + * DEV_WC 001 01000100 + * WRITETHROUGH 010 10101010 + * WRITEBACK 011 11101110 + * DEV_CACHED 011 11101110 + * DEV_SHARED 100 00000100 + * DEV_NONSHARED 100 00000100 + * unused 101 + * unused 110 + * WRITEALLOC 111 11111111 + */ +.equ PRRR, 0xeeaa4400 @ MAIR0 +.equ NMRR, 0xff000004 @ MAIR1 + + /* + * Macro for setting up the TTBRx and TTBCR registers. + * - \ttbr1 updated. + */ + .macro v7_ttb_setup, zero, ttbr0l, ttbr0h, ttbr1, tmp + ldr \tmp, =swapper_pg_dir @ swapper_pg_dir virtual address + cmp \ttbr1, \tmp, lsr #12 @ PHYS_OFFSET > PAGE_OFFSET? + mov \tmp, #TTB_EAE @ for TTB control egister + ALT_SMP(orr \tmp, \tmp, #TTB_FLAGS_SMP) + ALT_UP(orr \tmp, \tmp, #TTB_FLAGS_UP) + ALT_SMP(orr \tmp, \tmp, #TTB_FLAGS_SMP << 16) + ALT_UP(orr \tmp, \tmp, #TTB_FLAGS_UP << 16) + /* + * Only use split TTBRs if PHYS_OFFSET <= PAGE_OFFSET (cmp above), + * otherwise booting secondary CPUs would end up using TTBR1 for the + * identity mapping set up in TTBR0. + */ + orrls \tmp, \tmp, #TTBR1_SIZE @ TTBCR.T1SZ + mcr p15, 0, \tmp, c2, c0, 2 @ TTBCR + mov \tmp, \ttbr1, lsr #20 + mov \ttbr1, \ttbr1, lsl #12 + addls \ttbr1, \ttbr1, #TTBR1_OFFSET + mcrr p15, 1, \ttbr1, \tmp, c2 @ load TTBR1 + .endm + + /* + * AT + * TFR EV X F IHD LR S + * .EEE ..EE PUI. .TAT 4RVI ZWRS BLDP WCAM + * rxxx rrxx xxx0 0101 xxxx xxxx x111 xxxx < forced + * 11 0 110 0 0011 1100 .111 1101 < we want + */ + .align 2 + .type v7_crval, #object +v7_crval: + crval clear=0x0122c302, mmuset=0x30c03c7d, ucset=0x00c01c7c diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c new file mode 100644 index 0000000000..8bc7a2d6d6 --- /dev/null +++ b/arch/arm/mm/proc-v7-bugs.c @@ -0,0 +1,297 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/arm-smccc.h> +#include <linux/kernel.h> +#include <linux/smp.h> + +#include <asm/cp15.h> +#include <asm/cputype.h> +#include <asm/proc-fns.h> +#include <asm/spectre.h> +#include <asm/system_misc.h> + +#ifdef CONFIG_ARM_PSCI +static int __maybe_unused spectre_v2_get_cpu_fw_mitigation_state(void) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_1, &res); + + switch ((int)res.a0) { + case SMCCC_RET_SUCCESS: + return SPECTRE_MITIGATED; + + case SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED: + return SPECTRE_UNAFFECTED; + + default: + return SPECTRE_VULNERABLE; + } +} +#else +static int __maybe_unused spectre_v2_get_cpu_fw_mitigation_state(void) +{ + return SPECTRE_VULNERABLE; +} +#endif + +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR +DEFINE_PER_CPU(harden_branch_predictor_fn_t, harden_branch_predictor_fn); + +extern void cpu_v7_iciallu_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); +extern void cpu_v7_bpiall_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); +extern void cpu_v7_smc_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); +extern void cpu_v7_hvc_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm); + +static void harden_branch_predictor_bpiall(void) +{ + write_sysreg(0, BPIALL); +} + +static void harden_branch_predictor_iciallu(void) +{ + write_sysreg(0, ICIALLU); +} + +static void __maybe_unused call_smc_arch_workaround_1(void) +{ + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); +} + +static void __maybe_unused call_hvc_arch_workaround_1(void) +{ + arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); +} + +static unsigned int spectre_v2_install_workaround(unsigned int method) +{ + const char *spectre_v2_method = NULL; + int cpu = smp_processor_id(); + + if (per_cpu(harden_branch_predictor_fn, cpu)) + return SPECTRE_MITIGATED; + + switch (method) { + case SPECTRE_V2_METHOD_BPIALL: + per_cpu(harden_branch_predictor_fn, cpu) = + harden_branch_predictor_bpiall; + spectre_v2_method = "BPIALL"; + break; + + case SPECTRE_V2_METHOD_ICIALLU: + per_cpu(harden_branch_predictor_fn, cpu) = + harden_branch_predictor_iciallu; + spectre_v2_method = "ICIALLU"; + break; + + case SPECTRE_V2_METHOD_HVC: + per_cpu(harden_branch_predictor_fn, cpu) = + call_hvc_arch_workaround_1; + cpu_do_switch_mm = cpu_v7_hvc_switch_mm; + spectre_v2_method = "hypervisor"; + break; + + case SPECTRE_V2_METHOD_SMC: + per_cpu(harden_branch_predictor_fn, cpu) = + call_smc_arch_workaround_1; + cpu_do_switch_mm = cpu_v7_smc_switch_mm; + spectre_v2_method = "firmware"; + break; + } + + if (spectre_v2_method) + pr_info("CPU%u: Spectre v2: using %s workaround\n", + smp_processor_id(), spectre_v2_method); + + return SPECTRE_MITIGATED; +} +#else +static unsigned int spectre_v2_install_workaround(unsigned int method) +{ + pr_info_once("Spectre V2: workarounds disabled by configuration\n"); + + return SPECTRE_VULNERABLE; +} +#endif + +static void cpu_v7_spectre_v2_init(void) +{ + unsigned int state, method = 0; + + switch (read_cpuid_part()) { + case ARM_CPU_PART_CORTEX_A8: + case ARM_CPU_PART_CORTEX_A9: + case ARM_CPU_PART_CORTEX_A12: + case ARM_CPU_PART_CORTEX_A17: + case ARM_CPU_PART_CORTEX_A73: + case ARM_CPU_PART_CORTEX_A75: + state = SPECTRE_MITIGATED; + method = SPECTRE_V2_METHOD_BPIALL; + break; + + case ARM_CPU_PART_CORTEX_A15: + case ARM_CPU_PART_BRAHMA_B15: + state = SPECTRE_MITIGATED; + method = SPECTRE_V2_METHOD_ICIALLU; + break; + + case ARM_CPU_PART_BRAHMA_B53: + /* Requires no workaround */ + state = SPECTRE_UNAFFECTED; + break; + + default: + /* Other ARM CPUs require no workaround */ + if (read_cpuid_implementor() == ARM_CPU_IMP_ARM) { + state = SPECTRE_UNAFFECTED; + break; + } + + fallthrough; + + /* Cortex A57/A72 require firmware workaround */ + case ARM_CPU_PART_CORTEX_A57: + case ARM_CPU_PART_CORTEX_A72: + state = spectre_v2_get_cpu_fw_mitigation_state(); + if (state != SPECTRE_MITIGATED) + break; + + switch (arm_smccc_1_1_get_conduit()) { + case SMCCC_CONDUIT_HVC: + method = SPECTRE_V2_METHOD_HVC; + break; + + case SMCCC_CONDUIT_SMC: + method = SPECTRE_V2_METHOD_SMC; + break; + + default: + state = SPECTRE_VULNERABLE; + break; + } + } + + if (state == SPECTRE_MITIGATED) + state = spectre_v2_install_workaround(method); + + spectre_v2_update_state(state, method); +} + +#ifdef CONFIG_HARDEN_BRANCH_HISTORY +static int spectre_bhb_method; + +static const char *spectre_bhb_method_name(int method) +{ + switch (method) { + case SPECTRE_V2_METHOD_LOOP8: + return "loop"; + + case SPECTRE_V2_METHOD_BPIALL: + return "BPIALL"; + + default: + return "unknown"; + } +} + +static int spectre_bhb_install_workaround(int method) +{ + if (spectre_bhb_method != method) { + if (spectre_bhb_method) { + pr_err("CPU%u: Spectre BHB: method disagreement, system vulnerable\n", + smp_processor_id()); + + return SPECTRE_VULNERABLE; + } + + if (spectre_bhb_update_vectors(method) == SPECTRE_VULNERABLE) + return SPECTRE_VULNERABLE; + + spectre_bhb_method = method; + + pr_info("CPU%u: Spectre BHB: enabling %s workaround for all CPUs\n", + smp_processor_id(), spectre_bhb_method_name(method)); + } + + return SPECTRE_MITIGATED; +} +#else +static int spectre_bhb_install_workaround(int method) +{ + return SPECTRE_VULNERABLE; +} +#endif + +static void cpu_v7_spectre_bhb_init(void) +{ + unsigned int state, method = 0; + + switch (read_cpuid_part()) { + case ARM_CPU_PART_CORTEX_A15: + case ARM_CPU_PART_BRAHMA_B15: + case ARM_CPU_PART_CORTEX_A57: + case ARM_CPU_PART_CORTEX_A72: + state = SPECTRE_MITIGATED; + method = SPECTRE_V2_METHOD_LOOP8; + break; + + case ARM_CPU_PART_CORTEX_A73: + case ARM_CPU_PART_CORTEX_A75: + state = SPECTRE_MITIGATED; + method = SPECTRE_V2_METHOD_BPIALL; + break; + + default: + state = SPECTRE_UNAFFECTED; + break; + } + + if (state == SPECTRE_MITIGATED) + state = spectre_bhb_install_workaround(method); + + spectre_v2_update_state(state, method); +} + +static __maybe_unused bool cpu_v7_check_auxcr_set(bool *warned, + u32 mask, const char *msg) +{ + u32 aux_cr; + + asm("mrc p15, 0, %0, c1, c0, 1" : "=r" (aux_cr)); + + if ((aux_cr & mask) != mask) { + if (!*warned) + pr_err("CPU%u: %s", smp_processor_id(), msg); + *warned = true; + return false; + } + return true; +} + +static DEFINE_PER_CPU(bool, spectre_warned); + +static bool check_spectre_auxcr(bool *warned, u32 bit) +{ + return IS_ENABLED(CONFIG_HARDEN_BRANCH_PREDICTOR) && + cpu_v7_check_auxcr_set(warned, bit, + "Spectre v2: firmware did not set auxiliary control register IBE bit, system vulnerable\n"); +} + +void cpu_v7_ca8_ibe(void) +{ + if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(6))) + cpu_v7_spectre_v2_init(); +} + +void cpu_v7_ca15_ibe(void) +{ + if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(0))) + cpu_v7_spectre_v2_init(); + cpu_v7_spectre_bhb_init(); +} + +void cpu_v7_bugs_init(void) +{ + cpu_v7_spectre_v2_init(); + cpu_v7_spectre_bhb_init(); +} diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S new file mode 100644 index 0000000000..193c7aeb67 --- /dev/null +++ b/arch/arm/mm/proc-v7.S @@ -0,0 +1,825 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/mm/proc-v7.S + * + * Copyright (C) 2001 Deep Blue Solutions Ltd. + * + * This is the "shell" of the ARMv7 processor support. + */ +#include <linux/arm-smccc.h> +#include <linux/init.h> +#include <linux/linkage.h> +#include <linux/pgtable.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/page.h> + +#include "proc-macros.S" + +#ifdef CONFIG_ARM_LPAE +#include "proc-v7-3level.S" +#else +#include "proc-v7-2level.S" +#endif + +.arch armv7-a + +ENTRY(cpu_v7_proc_init) + ret lr +ENDPROC(cpu_v7_proc_init) + +ENTRY(cpu_v7_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1000 @ ...i............ + bic r0, r0, #0x0006 @ .............ca. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + ret lr +ENDPROC(cpu_v7_proc_fin) + +/* + * cpu_v7_reset(loc, hyp) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * - loc - location to jump to for soft reset + * - hyp - indicate if restart occurs in HYP mode + * + * This code must be executed using a flat identity mapping with + * caches disabled. + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_v7_reset) + mrc p15, 0, r2, c1, c0, 0 @ ctrl register + bic r2, r2, #0x1 @ ...............m + THUMB( bic r2, r2, #1 << 30 ) @ SCTLR.TE (Thumb exceptions) + mcr p15, 0, r2, c1, c0, 0 @ disable MMU + isb +#ifdef CONFIG_ARM_VIRT_EXT + teq r1, #0 + bne __hyp_soft_restart +#endif + bx r0 +ENDPROC(cpu_v7_reset) + .popsection + +/* + * cpu_v7_do_idle() + * + * Idle the processor (eg, wait for interrupt). + * + * IRQs are already disabled. + */ +ENTRY(cpu_v7_do_idle) + dsb @ WFI may enter a low-power mode + wfi + ret lr +ENDPROC(cpu_v7_do_idle) + +ENTRY(cpu_v7_dcache_clean_area) + ALT_SMP(W(nop)) @ MP extensions imply L1 PTW + ALT_UP_B(1f) + ret lr +1: dcache_line_size r2, r3 +2: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, r2 + subs r1, r1, r2 + bhi 2b + dsb ishst + ret lr +ENDPROC(cpu_v7_dcache_clean_area) + +#ifdef CONFIG_ARM_PSCI + .arch_extension sec +ENTRY(cpu_v7_smc_switch_mm) + stmfd sp!, {r0 - r3} + movw r0, #:lower16:ARM_SMCCC_ARCH_WORKAROUND_1 + movt r0, #:upper16:ARM_SMCCC_ARCH_WORKAROUND_1 + smc #0 + ldmfd sp!, {r0 - r3} + b cpu_v7_switch_mm +ENDPROC(cpu_v7_smc_switch_mm) + .arch_extension virt +ENTRY(cpu_v7_hvc_switch_mm) + stmfd sp!, {r0 - r3} + movw r0, #:lower16:ARM_SMCCC_ARCH_WORKAROUND_1 + movt r0, #:upper16:ARM_SMCCC_ARCH_WORKAROUND_1 + hvc #0 + ldmfd sp!, {r0 - r3} + b cpu_v7_switch_mm +ENDPROC(cpu_v7_hvc_switch_mm) +#endif +ENTRY(cpu_v7_iciallu_switch_mm) + mov r3, #0 + mcr p15, 0, r3, c7, c5, 0 @ ICIALLU + b cpu_v7_switch_mm +ENDPROC(cpu_v7_iciallu_switch_mm) +ENTRY(cpu_v7_bpiall_switch_mm) + mov r3, #0 + mcr p15, 0, r3, c7, c5, 6 @ flush BTAC/BTB + b cpu_v7_switch_mm +ENDPROC(cpu_v7_bpiall_switch_mm) + + string cpu_v7_name, "ARMv7 Processor" + .align + +/* Suspend/resume support: derived from arch/arm/mach-s5pv210/sleep.S */ +.globl cpu_v7_suspend_size +.equ cpu_v7_suspend_size, 4 * 9 +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_v7_do_suspend) + stmfd sp!, {r4 - r11, lr} + mrc p15, 0, r4, c13, c0, 0 @ FCSE/PID + mrc p15, 0, r5, c13, c0, 3 @ User r/o thread ID + stmia r0!, {r4 - r5} +#ifdef CONFIG_MMU + mrc p15, 0, r6, c3, c0, 0 @ Domain ID +#ifdef CONFIG_ARM_LPAE + mrrc p15, 1, r5, r7, c2 @ TTB 1 +#else + mrc p15, 0, r7, c2, c0, 1 @ TTB 1 +#endif + mrc p15, 0, r11, c2, c0, 2 @ TTB control register +#endif + mrc p15, 0, r8, c1, c0, 0 @ Control register + mrc p15, 0, r9, c1, c0, 1 @ Auxiliary control register + mrc p15, 0, r10, c1, c0, 2 @ Co-processor access control + stmia r0, {r5 - r11} + ldmfd sp!, {r4 - r11, pc} +ENDPROC(cpu_v7_do_suspend) + +ENTRY(cpu_v7_do_resume) + mov ip, #0 + mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache + mcr p15, 0, ip, c13, c0, 1 @ set reserved context ID + ldmia r0!, {r4 - r5} + mcr p15, 0, r4, c13, c0, 0 @ FCSE/PID + mcr p15, 0, r5, c13, c0, 3 @ User r/o thread ID + ldmia r0, {r5 - r11} +#ifdef CONFIG_MMU + mcr p15, 0, ip, c8, c7, 0 @ invalidate TLBs + mcr p15, 0, r6, c3, c0, 0 @ Domain ID +#ifdef CONFIG_ARM_LPAE + mcrr p15, 0, r1, ip, c2 @ TTB 0 + mcrr p15, 1, r5, r7, c2 @ TTB 1 +#else + ALT_SMP(orr r1, r1, #TTB_FLAGS_SMP) + ALT_UP(orr r1, r1, #TTB_FLAGS_UP) + mcr p15, 0, r1, c2, c0, 0 @ TTB 0 + mcr p15, 0, r7, c2, c0, 1 @ TTB 1 +#endif + mcr p15, 0, r11, c2, c0, 2 @ TTB control register + ldr r4, =PRRR @ PRRR + ldr r5, =NMRR @ NMRR + mcr p15, 0, r4, c10, c2, 0 @ write PRRR + mcr p15, 0, r5, c10, c2, 1 @ write NMRR +#endif /* CONFIG_MMU */ + mrc p15, 0, r4, c1, c0, 1 @ Read Auxiliary control register + teq r4, r9 @ Is it already set? + mcrne p15, 0, r9, c1, c0, 1 @ No, so write it + mcr p15, 0, r10, c1, c0, 2 @ Co-processor access control + isb + dsb + mov r0, r8 @ control register + b cpu_resume_mmu +ENDPROC(cpu_v7_do_resume) +#endif + +.globl cpu_ca9mp_suspend_size +.equ cpu_ca9mp_suspend_size, cpu_v7_suspend_size + 4 * 2 +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_ca9mp_do_suspend) + stmfd sp!, {r4 - r5} + mrc p15, 0, r4, c15, c0, 1 @ Diagnostic register + mrc p15, 0, r5, c15, c0, 0 @ Power register + stmia r0!, {r4 - r5} + ldmfd sp!, {r4 - r5} + b cpu_v7_do_suspend +ENDPROC(cpu_ca9mp_do_suspend) + +ENTRY(cpu_ca9mp_do_resume) + ldmia r0!, {r4 - r5} + mrc p15, 0, r10, c15, c0, 1 @ Read Diagnostic register + teq r4, r10 @ Already restored? + mcrne p15, 0, r4, c15, c0, 1 @ No, so restore it + mrc p15, 0, r10, c15, c0, 0 @ Read Power register + teq r5, r10 @ Already restored? + mcrne p15, 0, r5, c15, c0, 0 @ No, so restore it + b cpu_v7_do_resume +ENDPROC(cpu_ca9mp_do_resume) +#endif + +#ifdef CONFIG_CPU_PJ4B + globl_equ cpu_pj4b_switch_mm, cpu_v7_switch_mm + globl_equ cpu_pj4b_set_pte_ext, cpu_v7_set_pte_ext + globl_equ cpu_pj4b_proc_init, cpu_v7_proc_init + globl_equ cpu_pj4b_proc_fin, cpu_v7_proc_fin + globl_equ cpu_pj4b_reset, cpu_v7_reset +#ifdef CONFIG_PJ4B_ERRATA_4742 +ENTRY(cpu_pj4b_do_idle) + dsb @ WFI may enter a low-power mode + wfi + dsb @barrier + ret lr +ENDPROC(cpu_pj4b_do_idle) +#else + globl_equ cpu_pj4b_do_idle, cpu_v7_do_idle +#endif + globl_equ cpu_pj4b_dcache_clean_area, cpu_v7_dcache_clean_area +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_pj4b_do_suspend) + stmfd sp!, {r6 - r10} + mrc p15, 1, r6, c15, c1, 0 @ save CP15 - extra features + mrc p15, 1, r7, c15, c2, 0 @ save CP15 - Aux Func Modes Ctrl 0 + mrc p15, 1, r8, c15, c1, 2 @ save CP15 - Aux Debug Modes Ctrl 2 + mrc p15, 1, r9, c15, c1, 1 @ save CP15 - Aux Debug Modes Ctrl 1 + mrc p15, 0, r10, c9, c14, 0 @ save CP15 - PMC + stmia r0!, {r6 - r10} + ldmfd sp!, {r6 - r10} + b cpu_v7_do_suspend +ENDPROC(cpu_pj4b_do_suspend) + +ENTRY(cpu_pj4b_do_resume) + ldmia r0!, {r6 - r10} + mcr p15, 1, r6, c15, c1, 0 @ restore CP15 - extra features + mcr p15, 1, r7, c15, c2, 0 @ restore CP15 - Aux Func Modes Ctrl 0 + mcr p15, 1, r8, c15, c1, 2 @ restore CP15 - Aux Debug Modes Ctrl 2 + mcr p15, 1, r9, c15, c1, 1 @ restore CP15 - Aux Debug Modes Ctrl 1 + mcr p15, 0, r10, c9, c14, 0 @ restore CP15 - PMC + b cpu_v7_do_resume +ENDPROC(cpu_pj4b_do_resume) +#endif +.globl cpu_pj4b_suspend_size +.equ cpu_pj4b_suspend_size, cpu_v7_suspend_size + 4 * 5 + +#endif + + @ + @ Invoke the v7_invalidate_l1() function, which adheres to the AAPCS + @ rules, and so it may corrupt registers that we need to preserve. + @ + .macro do_invalidate_l1 + mov r6, r1 + mov r7, r2 + mov r10, lr + bl v7_invalidate_l1 @ corrupts {r0-r3, ip, lr} + mov r1, r6 + mov r2, r7 + mov lr, r10 + .endm + +/* + * __v7_setup + * + * Initialise TLB, Caches, and MMU state ready to switch the MMU + * on. Return in r0 the new CP15 C1 control register setting. + * + * r1, r2, r4, r5, r9, r13 must be preserved - r13 is not a stack + * r4: TTBR0 (low word) + * r5: TTBR0 (high word if LPAE) + * r8: TTBR1 + * r9: Main ID register + * + * This should be able to cover all ARMv7 cores. + * + * It is assumed that: + * - cache type register is implemented + */ +__v7_ca5mp_setup: +__v7_ca9mp_setup: +__v7_cr7mp_setup: +__v7_cr8mp_setup: + do_invalidate_l1 + mov r10, #(1 << 0) @ Cache/TLB ops broadcasting + b 1f +__v7_ca7mp_setup: +__v7_ca12mp_setup: +__v7_ca15mp_setup: +__v7_b15mp_setup: +__v7_ca17mp_setup: + do_invalidate_l1 + mov r10, #0 +1: +#ifdef CONFIG_SMP + orr r10, r10, #(1 << 6) @ Enable SMP/nAMP mode + ALT_SMP(mrc p15, 0, r0, c1, c0, 1) + ALT_UP(mov r0, r10) @ fake it for UP + orr r10, r10, r0 @ Set required bits + teq r10, r0 @ Were they already set? + mcrne p15, 0, r10, c1, c0, 1 @ No, update register +#endif + b __v7_setup_cont + +/* + * Errata: + * r0, r10 available for use + * r1, r2, r4, r5, r9, r13: must be preserved + * r3: contains MIDR rX number in bits 23-20 + * r6: contains MIDR rXpY as 8-bit XY number + * r9: MIDR + */ +__ca8_errata: +#if defined(CONFIG_ARM_ERRATA_430973) && !defined(CONFIG_ARCH_MULTIPLATFORM) + teq r3, #0x00100000 @ only present in r1p* + mrceq p15, 0, r0, c1, c0, 1 @ read aux control register + orreq r0, r0, #(1 << 6) @ set IBE to 1 + mcreq p15, 0, r0, c1, c0, 1 @ write aux control register +#endif +#ifdef CONFIG_ARM_ERRATA_458693 + teq r6, #0x20 @ only present in r2p0 + mrceq p15, 0, r0, c1, c0, 1 @ read aux control register + orreq r0, r0, #(1 << 5) @ set L1NEON to 1 + orreq r0, r0, #(1 << 9) @ set PLDNOP to 1 + mcreq p15, 0, r0, c1, c0, 1 @ write aux control register +#endif +#ifdef CONFIG_ARM_ERRATA_460075 + teq r6, #0x20 @ only present in r2p0 + mrceq p15, 1, r0, c9, c0, 2 @ read L2 cache aux ctrl register + tsteq r0, #1 << 22 + orreq r0, r0, #(1 << 22) @ set the Write Allocate disable bit + mcreq p15, 1, r0, c9, c0, 2 @ write the L2 cache aux ctrl register +#endif + b __errata_finish + +__ca9_errata: +#ifdef CONFIG_ARM_ERRATA_742230 + cmp r6, #0x22 @ only present up to r2p2 + mrcle p15, 0, r0, c15, c0, 1 @ read diagnostic register + orrle r0, r0, #1 << 4 @ set bit #4 + mcrle p15, 0, r0, c15, c0, 1 @ write diagnostic register +#endif +#ifdef CONFIG_ARM_ERRATA_742231 + teq r6, #0x20 @ present in r2p0 + teqne r6, #0x21 @ present in r2p1 + teqne r6, #0x22 @ present in r2p2 + mrceq p15, 0, r0, c15, c0, 1 @ read diagnostic register + orreq r0, r0, #1 << 12 @ set bit #12 + orreq r0, r0, #1 << 22 @ set bit #22 + mcreq p15, 0, r0, c15, c0, 1 @ write diagnostic register +#endif +#ifdef CONFIG_ARM_ERRATA_743622 + teq r3, #0x00200000 @ only present in r2p* + mrceq p15, 0, r0, c15, c0, 1 @ read diagnostic register + orreq r0, r0, #1 << 6 @ set bit #6 + mcreq p15, 0, r0, c15, c0, 1 @ write diagnostic register +#endif +#if defined(CONFIG_ARM_ERRATA_751472) && defined(CONFIG_SMP) + ALT_SMP(cmp r6, #0x30) @ present prior to r3p0 + ALT_UP_B(1f) + mrclt p15, 0, r0, c15, c0, 1 @ read diagnostic register + orrlt r0, r0, #1 << 11 @ set bit #11 + mcrlt p15, 0, r0, c15, c0, 1 @ write diagnostic register +1: +#endif + b __errata_finish + +__ca15_errata: +#ifdef CONFIG_ARM_ERRATA_773022 + cmp r6, #0x4 @ only present up to r0p4 + mrcle p15, 0, r0, c1, c0, 1 @ read aux control register + orrle r0, r0, #1 << 1 @ disable loop buffer + mcrle p15, 0, r0, c1, c0, 1 @ write aux control register +#endif + b __errata_finish + +__ca12_errata: +#ifdef CONFIG_ARM_ERRATA_818325_852422 + mrc p15, 0, r10, c15, c0, 1 @ read diagnostic register + orr r10, r10, #1 << 12 @ set bit #12 + mcr p15, 0, r10, c15, c0, 1 @ write diagnostic register +#endif +#ifdef CONFIG_ARM_ERRATA_821420 + mrc p15, 0, r10, c15, c0, 2 @ read internal feature reg + orr r10, r10, #1 << 1 @ set bit #1 + mcr p15, 0, r10, c15, c0, 2 @ write internal feature reg +#endif +#ifdef CONFIG_ARM_ERRATA_825619 + mrc p15, 0, r10, c15, c0, 1 @ read diagnostic register + orr r10, r10, #1 << 24 @ set bit #24 + mcr p15, 0, r10, c15, c0, 1 @ write diagnostic register +#endif +#ifdef CONFIG_ARM_ERRATA_857271 + mrc p15, 0, r10, c15, c0, 1 @ read diagnostic register + orr r10, r10, #3 << 10 @ set bits #10 and #11 + mcr p15, 0, r10, c15, c0, 1 @ write diagnostic register +#endif + b __errata_finish + +__ca17_errata: +#ifdef CONFIG_ARM_ERRATA_852421 + cmp r6, #0x12 @ only present up to r1p2 + mrcle p15, 0, r10, c15, c0, 1 @ read diagnostic register + orrle r10, r10, #1 << 24 @ set bit #24 + mcrle p15, 0, r10, c15, c0, 1 @ write diagnostic register +#endif +#ifdef CONFIG_ARM_ERRATA_852423 + cmp r6, #0x12 @ only present up to r1p2 + mrcle p15, 0, r10, c15, c0, 1 @ read diagnostic register + orrle r10, r10, #1 << 12 @ set bit #12 + mcrle p15, 0, r10, c15, c0, 1 @ write diagnostic register +#endif +#ifdef CONFIG_ARM_ERRATA_857272 + mrc p15, 0, r10, c15, c0, 1 @ read diagnostic register + orr r10, r10, #3 << 10 @ set bits #10 and #11 + mcr p15, 0, r10, c15, c0, 1 @ write diagnostic register +#endif + b __errata_finish + +__v7_pj4b_setup: +#ifdef CONFIG_CPU_PJ4B + +/* Auxiliary Debug Modes Control 1 Register */ +#define PJ4B_STATIC_BP (1 << 2) /* Enable Static BP */ +#define PJ4B_INTER_PARITY (1 << 8) /* Disable Internal Parity Handling */ +#define PJ4B_CLEAN_LINE (1 << 16) /* Disable data transfer for clean line */ + +/* Auxiliary Debug Modes Control 2 Register */ +#define PJ4B_FAST_LDR (1 << 23) /* Disable fast LDR */ +#define PJ4B_SNOOP_DATA (1 << 25) /* Do not interleave write and snoop data */ +#define PJ4B_CWF (1 << 27) /* Disable Critical Word First feature */ +#define PJ4B_OUTSDNG_NC (1 << 29) /* Disable outstanding non cacheable rqst */ +#define PJ4B_L1_REP_RR (1 << 30) /* L1 replacement - Strict round robin */ +#define PJ4B_AUX_DBG_CTRL2 (PJ4B_SNOOP_DATA | PJ4B_CWF |\ + PJ4B_OUTSDNG_NC | PJ4B_L1_REP_RR) + +/* Auxiliary Functional Modes Control Register 0 */ +#define PJ4B_SMP_CFB (1 << 1) /* Set SMP mode. Join the coherency fabric */ +#define PJ4B_L1_PAR_CHK (1 << 2) /* Support L1 parity checking */ +#define PJ4B_BROADCAST_CACHE (1 << 8) /* Broadcast Cache and TLB maintenance */ + +/* Auxiliary Debug Modes Control 0 Register */ +#define PJ4B_WFI_WFE (1 << 22) /* WFI/WFE - serve the DVM and back to idle */ + + /* Auxiliary Debug Modes Control 1 Register */ + mrc p15, 1, r0, c15, c1, 1 + orr r0, r0, #PJ4B_CLEAN_LINE + orr r0, r0, #PJ4B_INTER_PARITY + bic r0, r0, #PJ4B_STATIC_BP + mcr p15, 1, r0, c15, c1, 1 + + /* Auxiliary Debug Modes Control 2 Register */ + mrc p15, 1, r0, c15, c1, 2 + bic r0, r0, #PJ4B_FAST_LDR + orr r0, r0, #PJ4B_AUX_DBG_CTRL2 + mcr p15, 1, r0, c15, c1, 2 + + /* Auxiliary Functional Modes Control Register 0 */ + mrc p15, 1, r0, c15, c2, 0 +#ifdef CONFIG_SMP + orr r0, r0, #PJ4B_SMP_CFB +#endif + orr r0, r0, #PJ4B_L1_PAR_CHK + orr r0, r0, #PJ4B_BROADCAST_CACHE + mcr p15, 1, r0, c15, c2, 0 + + /* Auxiliary Debug Modes Control 0 Register */ + mrc p15, 1, r0, c15, c1, 0 + orr r0, r0, #PJ4B_WFI_WFE + mcr p15, 1, r0, c15, c1, 0 + +#endif /* CONFIG_CPU_PJ4B */ + +__v7_setup: + do_invalidate_l1 + +__v7_setup_cont: + and r0, r9, #0xff000000 @ ARM? + teq r0, #0x41000000 + bne __errata_finish + and r3, r9, #0x00f00000 @ variant + and r6, r9, #0x0000000f @ revision + orr r6, r6, r3, lsr #20-4 @ combine variant and revision + ubfx r0, r9, #4, #12 @ primary part number + + /* Cortex-A8 Errata */ + ldr r10, =0x00000c08 @ Cortex-A8 primary part number + teq r0, r10 + beq __ca8_errata + + /* Cortex-A9 Errata */ + ldr r10, =0x00000c09 @ Cortex-A9 primary part number + teq r0, r10 + beq __ca9_errata + + /* Cortex-A12 Errata */ + ldr r10, =0x00000c0d @ Cortex-A12 primary part number + teq r0, r10 + beq __ca12_errata + + /* Cortex-A17 Errata */ + ldr r10, =0x00000c0e @ Cortex-A17 primary part number + teq r0, r10 + beq __ca17_errata + + /* Cortex-A15 Errata */ + ldr r10, =0x00000c0f @ Cortex-A15 primary part number + teq r0, r10 + beq __ca15_errata + +__errata_finish: + mov r10, #0 + mcr p15, 0, r10, c7, c5, 0 @ I+BTB cache invalidate +#ifdef CONFIG_MMU + mcr p15, 0, r10, c8, c7, 0 @ invalidate I + D TLBs + v7_ttb_setup r10, r4, r5, r8, r3 @ TTBCR, TTBRx setup + ldr r3, =PRRR @ PRRR + ldr r6, =NMRR @ NMRR + mcr p15, 0, r3, c10, c2, 0 @ write PRRR + mcr p15, 0, r6, c10, c2, 1 @ write NMRR +#endif + dsb @ Complete invalidations +#ifndef CONFIG_ARM_THUMBEE + mrc p15, 0, r0, c0, c1, 0 @ read ID_PFR0 for ThumbEE + and r0, r0, #(0xf << 12) @ ThumbEE enabled field + teq r0, #(1 << 12) @ check if ThumbEE is present + bne 1f + mov r3, #0 + mcr p14, 6, r3, c1, c0, 0 @ Initialize TEEHBR to 0 + mrc p14, 6, r0, c0, c0, 0 @ load TEECR + orr r0, r0, #1 @ set the 1st bit in order to + mcr p14, 6, r0, c0, c0, 0 @ stop userspace TEEHBR access +1: +#endif + adr r3, v7_crval + ldmia r3, {r3, r6} + ARM_BE8(orr r6, r6, #1 << 25) @ big-endian page tables +#ifdef CONFIG_SWP_EMULATE + orr r3, r3, #(1 << 10) @ set SW bit in "clear" + bic r6, r6, #(1 << 10) @ clear it in "mmuset" +#endif + mrc p15, 0, r0, c1, c0, 0 @ read control register + bic r0, r0, r3 @ clear bits them + orr r0, r0, r6 @ set them + THUMB( orr r0, r0, #1 << 30 ) @ Thumb exceptions + ret lr @ return to head.S:__ret +ENDPROC(__v7_setup) + + __INITDATA + + .weak cpu_v7_bugs_init + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions v7, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_bugs_init + +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR + @ generic v7 bpiall on context switch + globl_equ cpu_v7_bpiall_proc_init, cpu_v7_proc_init + globl_equ cpu_v7_bpiall_proc_fin, cpu_v7_proc_fin + globl_equ cpu_v7_bpiall_reset, cpu_v7_reset + globl_equ cpu_v7_bpiall_do_idle, cpu_v7_do_idle + globl_equ cpu_v7_bpiall_dcache_clean_area, cpu_v7_dcache_clean_area + globl_equ cpu_v7_bpiall_set_pte_ext, cpu_v7_set_pte_ext + globl_equ cpu_v7_bpiall_suspend_size, cpu_v7_suspend_size +#ifdef CONFIG_ARM_CPU_SUSPEND + globl_equ cpu_v7_bpiall_do_suspend, cpu_v7_do_suspend + globl_equ cpu_v7_bpiall_do_resume, cpu_v7_do_resume +#endif + define_processor_functions v7_bpiall, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_bugs_init + +#define HARDENED_BPIALL_PROCESSOR_FUNCTIONS v7_bpiall_processor_functions +#else +#define HARDENED_BPIALL_PROCESSOR_FUNCTIONS v7_processor_functions +#endif + +#ifndef CONFIG_ARM_LPAE + @ Cortex-A8 - always needs bpiall switch_mm implementation + globl_equ cpu_ca8_proc_init, cpu_v7_proc_init + globl_equ cpu_ca8_proc_fin, cpu_v7_proc_fin + globl_equ cpu_ca8_reset, cpu_v7_reset + globl_equ cpu_ca8_do_idle, cpu_v7_do_idle + globl_equ cpu_ca8_dcache_clean_area, cpu_v7_dcache_clean_area + globl_equ cpu_ca8_set_pte_ext, cpu_v7_set_pte_ext + globl_equ cpu_ca8_switch_mm, cpu_v7_bpiall_switch_mm + globl_equ cpu_ca8_suspend_size, cpu_v7_suspend_size +#ifdef CONFIG_ARM_CPU_SUSPEND + globl_equ cpu_ca8_do_suspend, cpu_v7_do_suspend + globl_equ cpu_ca8_do_resume, cpu_v7_do_resume +#endif + define_processor_functions ca8, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_ca8_ibe + + @ Cortex-A9 - needs more registers preserved across suspend/resume + @ and bpiall switch_mm for hardening + globl_equ cpu_ca9mp_proc_init, cpu_v7_proc_init + globl_equ cpu_ca9mp_proc_fin, cpu_v7_proc_fin + globl_equ cpu_ca9mp_reset, cpu_v7_reset + globl_equ cpu_ca9mp_do_idle, cpu_v7_do_idle + globl_equ cpu_ca9mp_dcache_clean_area, cpu_v7_dcache_clean_area +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR + globl_equ cpu_ca9mp_switch_mm, cpu_v7_bpiall_switch_mm +#else + globl_equ cpu_ca9mp_switch_mm, cpu_v7_switch_mm +#endif + globl_equ cpu_ca9mp_set_pte_ext, cpu_v7_set_pte_ext + define_processor_functions ca9mp, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_bugs_init +#endif + + @ Cortex-A15 - needs iciallu switch_mm for hardening + globl_equ cpu_ca15_proc_init, cpu_v7_proc_init + globl_equ cpu_ca15_proc_fin, cpu_v7_proc_fin + globl_equ cpu_ca15_reset, cpu_v7_reset + globl_equ cpu_ca15_do_idle, cpu_v7_do_idle + globl_equ cpu_ca15_dcache_clean_area, cpu_v7_dcache_clean_area +#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR + globl_equ cpu_ca15_switch_mm, cpu_v7_iciallu_switch_mm +#else + globl_equ cpu_ca15_switch_mm, cpu_v7_switch_mm +#endif + globl_equ cpu_ca15_set_pte_ext, cpu_v7_set_pte_ext + globl_equ cpu_ca15_suspend_size, cpu_v7_suspend_size + globl_equ cpu_ca15_do_suspend, cpu_v7_do_suspend + globl_equ cpu_ca15_do_resume, cpu_v7_do_resume + define_processor_functions ca15, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_ca15_ibe +#ifdef CONFIG_CPU_PJ4B + define_processor_functions pj4b, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 +#endif + + .section ".rodata" + + string cpu_arch_name, "armv7" + string cpu_elf_name, "v7" + .align + + .section ".proc.info.init", "a" + + /* + * Standard v7 proc info content + */ +.macro __v7_proc name, initfunc, mm_mmuflags = 0, io_mmuflags = 0, hwcaps = 0, proc_fns = v7_processor_functions, cache_fns = v7_cache_fns + ALT_SMP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \ + PMD_SECT_AF | PMD_FLAGS_SMP | \mm_mmuflags) + ALT_UP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \ + PMD_SECT_AF | PMD_FLAGS_UP | \mm_mmuflags) + .long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ | PMD_SECT_AF | \io_mmuflags + initfn \initfunc, \name + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_FAST_MULT | \ + HWCAP_EDSP | HWCAP_TLS | \hwcaps + .long cpu_v7_name + .long \proc_fns + .long v7wbi_tlb_fns + .long v6_user_fns + .long \cache_fns +.endm + +#ifndef CONFIG_ARM_LPAE + /* + * ARM Ltd. Cortex A5 processor. + */ + .type __v7_ca5mp_proc_info, #object +__v7_ca5mp_proc_info: + .long 0x410fc050 + .long 0xff0ffff0 + __v7_proc __v7_ca5mp_proc_info, __v7_ca5mp_setup + .size __v7_ca5mp_proc_info, . - __v7_ca5mp_proc_info + + /* + * ARM Ltd. Cortex A9 processor. + */ + .type __v7_ca9mp_proc_info, #object +__v7_ca9mp_proc_info: + .long 0x410fc090 + .long 0xff0ffff0 + __v7_proc __v7_ca9mp_proc_info, __v7_ca9mp_setup, proc_fns = ca9mp_processor_functions + .size __v7_ca9mp_proc_info, . - __v7_ca9mp_proc_info + + /* + * ARM Ltd. Cortex A8 processor. + */ + .type __v7_ca8_proc_info, #object +__v7_ca8_proc_info: + .long 0x410fc080 + .long 0xff0ffff0 + __v7_proc __v7_ca8_proc_info, __v7_setup, proc_fns = ca8_processor_functions + .size __v7_ca8_proc_info, . - __v7_ca8_proc_info + +#endif /* CONFIG_ARM_LPAE */ + + /* + * Marvell PJ4B processor. + */ +#ifdef CONFIG_CPU_PJ4B + .type __v7_pj4b_proc_info, #object +__v7_pj4b_proc_info: + .long 0x560f5800 + .long 0xff0fff00 + __v7_proc __v7_pj4b_proc_info, __v7_pj4b_setup, proc_fns = pj4b_processor_functions + .size __v7_pj4b_proc_info, . - __v7_pj4b_proc_info +#endif + + /* + * ARM Ltd. Cortex R7 processor. + */ + .type __v7_cr7mp_proc_info, #object +__v7_cr7mp_proc_info: + .long 0x410fc170 + .long 0xff0ffff0 + __v7_proc __v7_cr7mp_proc_info, __v7_cr7mp_setup + .size __v7_cr7mp_proc_info, . - __v7_cr7mp_proc_info + + /* + * ARM Ltd. Cortex R8 processor. + */ + .type __v7_cr8mp_proc_info, #object +__v7_cr8mp_proc_info: + .long 0x410fc180 + .long 0xff0ffff0 + __v7_proc __v7_cr8mp_proc_info, __v7_cr8mp_setup + .size __v7_cr8mp_proc_info, . - __v7_cr8mp_proc_info + + /* + * ARM Ltd. Cortex A7 processor. + */ + .type __v7_ca7mp_proc_info, #object +__v7_ca7mp_proc_info: + .long 0x410fc070 + .long 0xff0ffff0 + __v7_proc __v7_ca7mp_proc_info, __v7_ca7mp_setup + .size __v7_ca7mp_proc_info, . - __v7_ca7mp_proc_info + + /* + * ARM Ltd. Cortex A12 processor. + */ + .type __v7_ca12mp_proc_info, #object +__v7_ca12mp_proc_info: + .long 0x410fc0d0 + .long 0xff0ffff0 + __v7_proc __v7_ca12mp_proc_info, __v7_ca12mp_setup, proc_fns = HARDENED_BPIALL_PROCESSOR_FUNCTIONS + .size __v7_ca12mp_proc_info, . - __v7_ca12mp_proc_info + + /* + * ARM Ltd. Cortex A15 processor. + */ + .type __v7_ca15mp_proc_info, #object +__v7_ca15mp_proc_info: + .long 0x410fc0f0 + .long 0xff0ffff0 + __v7_proc __v7_ca15mp_proc_info, __v7_ca15mp_setup, proc_fns = ca15_processor_functions + .size __v7_ca15mp_proc_info, . - __v7_ca15mp_proc_info + + /* + * Broadcom Corporation Brahma-B15 processor. + */ + .type __v7_b15mp_proc_info, #object +__v7_b15mp_proc_info: + .long 0x420f00f0 + .long 0xff0ffff0 + __v7_proc __v7_b15mp_proc_info, __v7_b15mp_setup, proc_fns = ca15_processor_functions, cache_fns = b15_cache_fns + .size __v7_b15mp_proc_info, . - __v7_b15mp_proc_info + + /* + * ARM Ltd. Cortex A17 processor. + */ + .type __v7_ca17mp_proc_info, #object +__v7_ca17mp_proc_info: + .long 0x410fc0e0 + .long 0xff0ffff0 + __v7_proc __v7_ca17mp_proc_info, __v7_ca17mp_setup, proc_fns = HARDENED_BPIALL_PROCESSOR_FUNCTIONS + .size __v7_ca17mp_proc_info, . - __v7_ca17mp_proc_info + + /* ARM Ltd. Cortex A73 processor */ + .type __v7_ca73_proc_info, #object +__v7_ca73_proc_info: + .long 0x410fd090 + .long 0xff0ffff0 + __v7_proc __v7_ca73_proc_info, __v7_setup, proc_fns = HARDENED_BPIALL_PROCESSOR_FUNCTIONS + .size __v7_ca73_proc_info, . - __v7_ca73_proc_info + + /* ARM Ltd. Cortex A75 processor */ + .type __v7_ca75_proc_info, #object +__v7_ca75_proc_info: + .long 0x410fd0a0 + .long 0xff0ffff0 + __v7_proc __v7_ca75_proc_info, __v7_setup, proc_fns = HARDENED_BPIALL_PROCESSOR_FUNCTIONS + .size __v7_ca75_proc_info, . - __v7_ca75_proc_info + + /* + * Qualcomm Inc. Krait processors. + */ + .type __krait_proc_info, #object +__krait_proc_info: + .long 0x510f0400 @ Required ID value + .long 0xff0ffc00 @ Mask for ID + /* + * Some Krait processors don't indicate support for SDIV and UDIV + * instructions in the ARM instruction set, even though they actually + * do support them. They also don't indicate support for fused multiply + * instructions even though they actually do support them. + */ + __v7_proc __krait_proc_info, __v7_setup, hwcaps = HWCAP_IDIV | HWCAP_VFPv4 + .size __krait_proc_info, . - __krait_proc_info + + /* + * Match any ARMv7 processor core. + */ + .type __v7_proc_info, #object +__v7_proc_info: + .long 0x000f0000 @ Required ID value + .long 0x000f0000 @ Mask for ID + __v7_proc __v7_proc_info, __v7_setup + .size __v7_proc_info, . - __v7_proc_info diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S new file mode 100644 index 0000000000..d65a12f851 --- /dev/null +++ b/arch/arm/mm/proc-v7m.S @@ -0,0 +1,255 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/mm/proc-v7m.S + * + * Copyright (C) 2008 ARM Ltd. + * Copyright (C) 2001 Deep Blue Solutions Ltd. + * + * This is the "shell" of the ARMv7-M processor support. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/page.h> +#include <asm/v7m.h> +#include "proc-macros.S" + +ENTRY(cpu_v7m_proc_init) + ret lr +ENDPROC(cpu_v7m_proc_init) + +ENTRY(cpu_v7m_proc_fin) + ret lr +ENDPROC(cpu_v7m_proc_fin) + +/* + * cpu_v7m_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * - loc - location to jump to for soft reset + */ + .align 5 +ENTRY(cpu_v7m_reset) + ret r0 +ENDPROC(cpu_v7m_reset) + +/* + * cpu_v7m_do_idle() + * + * Idle the processor (eg, wait for interrupt). + * + * IRQs are already disabled. + */ +ENTRY(cpu_v7m_do_idle) + wfi + ret lr +ENDPROC(cpu_v7m_do_idle) + +ENTRY(cpu_v7m_dcache_clean_area) + ret lr +ENDPROC(cpu_v7m_dcache_clean_area) + +/* + * There is no MMU, so here is nothing to do. + */ +ENTRY(cpu_v7m_switch_mm) + ret lr +ENDPROC(cpu_v7m_switch_mm) + +.globl cpu_v7m_suspend_size +.equ cpu_v7m_suspend_size, 0 + +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_v7m_do_suspend) + ret lr +ENDPROC(cpu_v7m_do_suspend) + +ENTRY(cpu_v7m_do_resume) + ret lr +ENDPROC(cpu_v7m_do_resume) +#endif + +ENTRY(cpu_cm7_dcache_clean_area) + dcache_line_size r2, r3 + movw r3, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_DCCMVAC + movt r3, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_DCCMVAC + +1: str r0, [r3] @ clean D entry + add r0, r0, r2 + subs r1, r1, r2 + bhi 1b + dsb + ret lr +ENDPROC(cpu_cm7_dcache_clean_area) + +ENTRY(cpu_cm7_proc_fin) + movw r2, #:lower16:(BASEADDR_V7M_SCB + V7M_SCB_CCR) + movt r2, #:upper16:(BASEADDR_V7M_SCB + V7M_SCB_CCR) + ldr r0, [r2] + bic r0, r0, #(V7M_SCB_CCR_DC | V7M_SCB_CCR_IC) + str r0, [r2] + ret lr +ENDPROC(cpu_cm7_proc_fin) + + .section ".init.text", "ax" + +__v7m_cm7_setup: + mov r8, #(V7M_SCB_CCR_DC | V7M_SCB_CCR_IC| V7M_SCB_CCR_BP) + b __v7m_setup_cont +/* + * __v7m_setup + * + * This should be able to cover all ARMv7-M cores. + */ +__v7m_setup: + mov r8, 0 + +__v7m_setup_cont: + @ Configure the vector table base address + ldr r0, =BASEADDR_V7M_SCB + ldr r12, =vector_table + str r12, [r0, V7M_SCB_VTOR] + + @ enable UsageFault, BusFault and MemManage fault. + ldr r5, [r0, #V7M_SCB_SHCSR] + orr r5, #(V7M_SCB_SHCSR_USGFAULTENA | V7M_SCB_SHCSR_BUSFAULTENA | V7M_SCB_SHCSR_MEMFAULTENA) + str r5, [r0, #V7M_SCB_SHCSR] + + @ Lower the priority of the SVC and PendSV exceptions + mov r5, #0x80000000 + str r5, [r0, V7M_SCB_SHPR2] @ set SVC priority + mov r5, #0x00800000 + str r5, [r0, V7M_SCB_SHPR3] @ set PendSV priority + + @ SVC to switch to handler mode. Notice that this requires sp to + @ point to writeable memory because the processor saves + @ some registers to the stack. + badr r1, 1f + ldr r5, [r12, #11 * 4] @ read the SVC vector entry + str r1, [r12, #11 * 4] @ write the temporary SVC vector entry + dsb + mov r6, lr @ save LR + ldr sp, =init_thread_union + THREAD_START_SP + cpsie i + svc #0 +1: cpsid i + /* Calculate exc_ret */ + orr r10, lr, #EXC_RET_THREADMODE_PROCESSSTACK + ldmia sp, {r0-r3, r12} + str r5, [r12, #11 * 4] @ restore the original SVC vector entry + mov lr, r6 @ restore LR + + @ Special-purpose control register + mov r1, #1 + msr control, r1 @ Thread mode has unpriviledged access + + @ Configure caches (if implemented) + teq r8, #0 + stmiane sp, {r0-r6, lr} @ v7m_invalidate_l1 touches r0-r6 + blne v7m_invalidate_l1 + teq r8, #0 @ re-evalutae condition + ldmiane sp, {r0-r6, lr} + + @ Configure the System Control Register to ensure 8-byte stack alignment + @ Note the STKALIGN bit is either RW or RAO. + ldr r0, [r0, V7M_SCB_CCR] @ system control register + orr r0, #V7M_SCB_CCR_STKALIGN + orr r0, r0, r8 + + ret lr +ENDPROC(__v7m_setup) + +/* + * Cortex-M7 processor functions + */ + globl_equ cpu_cm7_proc_init, cpu_v7m_proc_init + globl_equ cpu_cm7_reset, cpu_v7m_reset + globl_equ cpu_cm7_do_idle, cpu_v7m_do_idle + globl_equ cpu_cm7_switch_mm, cpu_v7m_switch_mm + + define_processor_functions v7m, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1 + define_processor_functions cm7, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1 + + .section ".rodata" + string cpu_arch_name, "armv7m" + string cpu_elf_name "v7m" + string cpu_v7m_name "ARMv7-M" + + .section ".proc.info.init", "a" + +.macro __v7m_proc name, initfunc, cache_fns = nop_cache_fns, hwcaps = 0, proc_fns = v7m_processor_functions + .long 0 /* proc_info_list.__cpu_mm_mmu_flags */ + .long 0 /* proc_info_list.__cpu_io_mmu_flags */ + initfn \initfunc, \name + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_HALF | HWCAP_THUMB | HWCAP_FAST_MULT | \hwcaps + .long cpu_v7m_name + .long \proc_fns + .long 0 /* proc_info_list.tlb */ + .long 0 /* proc_info_list.user */ + .long \cache_fns +.endm + + /* + * Match ARM Cortex-M55 processor. + */ + .type __v7m_cm55_proc_info, #object +__v7m_cm55_proc_info: + .long 0x410fd220 /* ARM Cortex-M55 0xD22 */ + .long 0xff0ffff0 /* Mask off revision, patch release */ + __v7m_proc __v7m_cm55_proc_info, __v7m_cm7_setup, hwcaps = HWCAP_EDSP, cache_fns = v7m_cache_fns, proc_fns = cm7_processor_functions + .size __v7m_cm55_proc_info, . - __v7m_cm55_proc_info + + /* + * Match ARM Cortex-M33 processor. + */ + .type __v7m_cm33_proc_info, #object +__v7m_cm33_proc_info: + .long 0x410fd210 /* ARM Cortex-M33 0xD21 */ + .long 0xff0ffff0 /* Mask off revision, patch release */ + __v7m_proc __v7m_cm33_proc_info, __v7m_setup, hwcaps = HWCAP_EDSP + .size __v7m_cm33_proc_info, . - __v7m_cm33_proc_info + + /* + * Match ARM Cortex-M7 processor. + */ + .type __v7m_cm7_proc_info, #object +__v7m_cm7_proc_info: + .long 0x410fc270 /* ARM Cortex-M7 0xC27 */ + .long 0xff0ffff0 /* Mask off revision, patch release */ + __v7m_proc __v7m_cm7_proc_info, __v7m_cm7_setup, hwcaps = HWCAP_EDSP, cache_fns = v7m_cache_fns, proc_fns = cm7_processor_functions + .size __v7m_cm7_proc_info, . - __v7m_cm7_proc_info + + /* + * Match ARM Cortex-M4 processor. + */ + .type __v7m_cm4_proc_info, #object +__v7m_cm4_proc_info: + .long 0x410fc240 /* ARM Cortex-M4 0xC24 */ + .long 0xff0ffff0 /* Mask off revision, patch release */ + __v7m_proc __v7m_cm4_proc_info, __v7m_setup, hwcaps = HWCAP_EDSP + .size __v7m_cm4_proc_info, . - __v7m_cm4_proc_info + + /* + * Match ARM Cortex-M3 processor. + */ + .type __v7m_cm3_proc_info, #object +__v7m_cm3_proc_info: + .long 0x410fc230 /* ARM Cortex-M3 0xC23 */ + .long 0xff0ffff0 /* Mask off revision, patch release */ + __v7m_proc __v7m_cm3_proc_info, __v7m_setup + .size __v7m_cm3_proc_info, . - __v7m_cm3_proc_info + + /* + * Match any ARMv7-M processor core. + */ + .type __v7m_proc_info, #object +__v7m_proc_info: + .long 0x000f0000 @ Required ID value + .long 0x000f0000 @ Mask for ID + __v7m_proc __v7m_proc_info, __v7m_setup + .size __v7m_proc_info, . - __v7m_proc_info + diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S new file mode 100644 index 0000000000..a17afe7e19 --- /dev/null +++ b/arch/arm/mm/proc-xsc3.S @@ -0,0 +1,529 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/mm/proc-xsc3.S + * + * Original Author: Matthew Gilbert + * Current Maintainer: Lennert Buytenhek <buytenh@wantstofly.org> + * + * Copyright 2004 (C) Intel Corp. + * Copyright 2005 (C) MontaVista Software, Inc. + * + * MMU functions for the Intel XScale3 Core (XSC3). The XSC3 core is + * an extension to Intel's original XScale core that adds the following + * features: + * + * - ARMv6 Supersections + * - Low Locality Reference pages (replaces mini-cache) + * - 36-bit addressing + * - L2 cache + * - Cache coherency if chipset supports it + * + * Based on original XScale code by Nicolas Pitre. + */ + +#include <linux/linkage.h> +#include <linux/init.h> +#include <linux/pgtable.h> +#include <asm/assembler.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include "proc-macros.S" + +/* + * This is the maximum size of an area which will be flushed. If the + * area is larger than this, then we flush the whole cache. + */ +#define MAX_AREA_SIZE 32768 + +/* + * The cache line size of the L1 I, L1 D and unified L2 cache. + */ +#define CACHELINESIZE 32 + +/* + * The size of the L1 D cache. + */ +#define CACHESIZE 32768 + +/* + * This macro is used to wait for a CP15 write and is needed when we + * have to ensure that the last operation to the coprocessor was + * completed before continuing with operation. + */ + .macro cpwait_ret, lr, rd + mrc p15, 0, \rd, c2, c0, 0 @ arbitrary read of cp15 + sub pc, \lr, \rd, LSR #32 @ wait for completion and + @ flush instruction pipeline + .endm + +/* + * This macro cleans and invalidates the entire L1 D cache. + */ + + .macro clean_d_cache rd, rs + mov \rd, #0x1f00 + orr \rd, \rd, #0x00e0 +1: mcr p15, 0, \rd, c7, c14, 2 @ clean/invalidate L1 D line + adds \rd, \rd, #0x40000000 + bcc 1b + subs \rd, \rd, #0x20 + bpl 1b + .endm + + .text + +/* + * cpu_xsc3_proc_init() + * + * Nothing too exciting at the moment + */ +ENTRY(cpu_xsc3_proc_init) + ret lr + +/* + * cpu_xsc3_proc_fin() + */ +ENTRY(cpu_xsc3_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1800 @ ...IZ........... + bic r0, r0, #0x0006 @ .............CA. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + ret lr + +/* + * cpu_xsc3_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_xsc3_reset) + mov r1, #PSR_F_BIT|PSR_I_BIT|SVC_MODE + msr cpsr_c, r1 @ reset CPSR + mrc p15, 0, r1, c1, c0, 0 @ ctrl register + bic r1, r1, #0x3900 @ ..VIZ..S........ + bic r1, r1, #0x0086 @ ........B....CA. + mcr p15, 0, r1, c1, c0, 0 @ ctrl register + mcr p15, 0, ip, c7, c7, 0 @ invalidate L1 caches and BTB + bic r1, r1, #0x0001 @ ...............M + mcr p15, 0, r1, c1, c0, 0 @ ctrl register + @ CAUTION: MMU turned off from this point. We count on the pipeline + @ already containing those two last instructions to survive. + mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs + ret r0 +ENDPROC(cpu_xsc3_reset) + .popsection + +/* + * cpu_xsc3_do_idle() + * + * Cause the processor to idle + * + * For now we do nothing but go to idle mode for every case + * + * XScale supports clock switching, but using idle mode support + * allows external hardware to react to system state changes. + */ + .align 5 + +ENTRY(cpu_xsc3_do_idle) + mov r0, #1 + mcr p14, 0, r0, c7, c0, 0 @ go to idle + ret lr + +/* ================================= CACHE ================================ */ + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(xsc3_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + ret lr +ENDPROC(xsc3_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Invalidate all cache entries in a particular address + * space. + */ +ENTRY(xsc3_flush_user_cache_all) + /* FALLTHROUGH */ + +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(xsc3_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: + clean_d_cache r0, r1 + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ invalidate L1 I cache and BTB + mcrne p15, 0, ip, c7, c10, 4 @ data write barrier + mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush + ret lr + +/* + * flush_user_cache_range(start, end, vm_flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + * - vma - vma_area_struct describing address space + */ + .align 5 +ENTRY(xsc3_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #MAX_AREA_SIZE + bhs __flush_whole_cache + +1: tst r2, #VM_EXEC + mcrne p15, 0, r0, c7, c5, 1 @ invalidate L1 I line + mcr p15, 0, r0, c7, c14, 1 @ clean/invalidate L1 D line + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB + mcrne p15, 0, ip, c7, c10, 4 @ data write barrier + mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush + ret lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the I cache and the D cache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + * + * Note: single I-cache line invalidation isn't used here since + * it also trashes the mini I-cache used by JTAG debuggers. + */ +ENTRY(xsc3_coherent_kern_range) +/* FALLTHROUGH */ +ENTRY(xsc3_coherent_user_range) + bic r0, r0, #CACHELINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate L1 I cache and BTB + mcr p15, 0, r0, c7, c10, 4 @ data write barrier + mcr p15, 0, r0, c7, c5, 4 @ prefetch flush + ret lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache. + * + * - addr - kernel address + * - size - region size + */ +ENTRY(xsc3_flush_kern_dcache_area) + add r1, r0, r1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean/invalidate L1 D line + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate L1 I cache and BTB + mcr p15, 0, r0, c7, c10, 4 @ data write barrier + mcr p15, 0, r0, c7, c5, 4 @ prefetch flush + ret lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + */ +xsc3_dma_inv_range: + tst r0, #CACHELINESIZE - 1 + bic r0, r0, #CACHELINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean L1 D line + tst r1, #CACHELINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean L1 D line +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate L1 D line + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ data write barrier + ret lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +xsc3_dma_clean_range: + bic r0, r0, #CACHELINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ data write barrier + ret lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(xsc3_dma_flush_range) + bic r0, r0, #CACHELINESIZE - 1 +1: mcr p15, 0, r0, c7, c14, 1 @ clean/invalidate L1 D line + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ data write barrier + ret lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(xsc3_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq xsc3_dma_clean_range + bcs xsc3_dma_inv_range + b xsc3_dma_flush_range +ENDPROC(xsc3_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(xsc3_dma_unmap_area) + ret lr +ENDPROC(xsc3_dma_unmap_area) + + .globl xsc3_flush_kern_cache_louis + .equ xsc3_flush_kern_cache_louis, xsc3_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions xsc3 + +ENTRY(cpu_xsc3_dcache_clean_area) +1: mcr p15, 0, r0, c7, c10, 1 @ clean L1 D line + add r0, r0, #CACHELINESIZE + subs r1, r1, #CACHELINESIZE + bhi 1b + ret lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_xsc3_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_xsc3_switch_mm) + clean_d_cache r1, r2 + mcr p15, 0, ip, c7, c5, 0 @ invalidate L1 I cache and BTB + mcr p15, 0, ip, c7, c10, 4 @ data write barrier + mcr p15, 0, ip, c7, c5, 4 @ prefetch flush + orr r0, r0, #0x18 @ cache the page table in L2 + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs + cpwait_ret lr, ip + +/* + * cpu_xsc3_set_pte_ext(ptep, pte, ext) + * + * Set a PTE and flush it out + */ +cpu_xsc3_mt_table: + .long 0x00 @ L_PTE_MT_UNCACHED + .long PTE_EXT_TEX(1) @ L_PTE_MT_BUFFERABLE + .long PTE_EXT_TEX(5) | PTE_CACHEABLE @ L_PTE_MT_WRITETHROUGH + .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEBACK + .long PTE_EXT_TEX(1) | PTE_BUFFERABLE @ L_PTE_MT_DEV_SHARED + .long 0x00 @ unused + .long 0x00 @ L_PTE_MT_MINICACHE (not present) + .long PTE_EXT_TEX(5) | PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEALLOC (not present?) + .long 0x00 @ unused + .long PTE_EXT_TEX(1) @ L_PTE_MT_DEV_WC + .long 0x00 @ unused + .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_DEV_CACHED + .long PTE_EXT_TEX(2) @ L_PTE_MT_DEV_NONSHARED + .long 0x00 @ unused + .long 0x00 @ unused + .long 0x00 @ unused + + .align 5 +ENTRY(cpu_xsc3_set_pte_ext) + xscale_set_pte_ext_prologue + + tst r1, #L_PTE_SHARED @ shared? + and r1, r1, #L_PTE_MT_MASK + adr ip, cpu_xsc3_mt_table + ldr ip, [ip, r1] + orrne r2, r2, #PTE_EXT_COHERENT @ interlock: mask in coherent bit + bic r2, r2, #0x0c @ clear old C,B bits + orr r2, r2, ip + + xscale_set_pte_ext_epilogue + ret lr + + .ltorg + .align + +.globl cpu_xsc3_suspend_size +.equ cpu_xsc3_suspend_size, 4 * 6 +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_xsc3_do_suspend) + stmfd sp!, {r4 - r9, lr} + mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode + mrc p15, 0, r5, c15, c1, 0 @ CP access reg + mrc p15, 0, r6, c13, c0, 0 @ PID + mrc p15, 0, r7, c3, c0, 0 @ domain ID + mrc p15, 0, r8, c1, c0, 1 @ auxiliary control reg + mrc p15, 0, r9, c1, c0, 0 @ control reg + bic r4, r4, #2 @ clear frequency change bit + stmia r0, {r4 - r9} @ store cp regs + ldmia sp!, {r4 - r9, pc} +ENDPROC(cpu_xsc3_do_suspend) + +ENTRY(cpu_xsc3_do_resume) + ldmia r0, {r4 - r9} @ load cp regs + mov ip, #0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate I & D caches, BTB + mcr p15, 0, ip, c7, c10, 4 @ drain write (&fill) buffer + mcr p15, 0, ip, c7, c5, 4 @ flush prefetch buffer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mcr p14, 0, r4, c6, c0, 0 @ clock configuration, turbo mode. + mcr p15, 0, r5, c15, c1, 0 @ CP access reg + mcr p15, 0, r6, c13, c0, 0 @ PID + mcr p15, 0, r7, c3, c0, 0 @ domain ID + orr r1, r1, #0x18 @ cache the page table in L2 + mcr p15, 0, r1, c2, c0, 0 @ translation table base addr + mcr p15, 0, r8, c1, c0, 1 @ auxiliary control reg + mov r0, r9 @ control register + b cpu_resume_mmu +ENDPROC(cpu_xsc3_do_resume) +#endif + + .type __xsc3_setup, #function +__xsc3_setup: + mov r0, #PSR_F_BIT|PSR_I_BIT|SVC_MODE + msr cpsr_c, r0 + mcr p15, 0, ip, c7, c7, 0 @ invalidate L1 caches and BTB + mcr p15, 0, ip, c7, c10, 4 @ data write barrier + mcr p15, 0, ip, c7, c5, 4 @ prefetch flush + mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs + orr r4, r4, #0x18 @ cache the page table in L2 + mcr p15, 0, r4, c2, c0, 0 @ load page table pointer + + mov r0, #1 << 6 @ cp6 access for early sched_clock + mcr p15, 0, r0, c15, c1, 0 @ write CP access register + + mrc p15, 0, r0, c1, c0, 1 @ get auxiliary control reg + and r0, r0, #2 @ preserve bit P bit setting + orr r0, r0, #(1 << 10) @ enable L2 for LLR cache + mcr p15, 0, r0, c1, c0, 1 @ set auxiliary control reg + + adr r5, xsc3_crval + ldmia r5, {r5, r6} + +#ifdef CONFIG_CACHE_XSC3L2 + mrc p15, 1, r0, c0, c0, 1 @ get L2 present information + ands r0, r0, #0xf8 + orrne r6, r6, #(1 << 26) @ enable L2 if present +#endif + + mrc p15, 0, r0, c1, c0, 0 @ get control register + bic r0, r0, r5 @ ..V. ..R. .... ..A. + orr r0, r0, r6 @ ..VI Z..S .... .C.M (mmu) + @ ...I Z..S .... .... (uc) + ret lr + + .size __xsc3_setup, . - __xsc3_setup + + .type xsc3_crval, #object +xsc3_crval: + crval clear=0x04002202, mmuset=0x00003905, ucset=0x00001900 + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions xsc3, dabort=v5t_early_abort, pabort=legacy_pabort, suspend=1 + + .section ".rodata" + + string cpu_arch_name, "armv5te" + string cpu_elf_name, "v5" + string cpu_xsc3_name, "XScale-V3 based processor" + + .align + + .section ".proc.info.init", "a" + +.macro xsc3_proc_info name:req, cpu_val:req, cpu_mask:req + .type __\name\()_proc_info,#object +__\name\()_proc_info: + .long \cpu_val + .long \cpu_mask + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + initfn __xsc3_setup, __\name\()_proc_info + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP + .long cpu_xsc3_name + .long xsc3_processor_functions + .long v4wbi_tlb_fns + .long xsc3_mc_user_fns + .long xsc3_cache_fns + .size __\name\()_proc_info, . - __\name\()_proc_info +.endm + + xsc3_proc_info xsc3, 0x69056000, 0xffffe000 + +/* Note: PXA935 changed its implementor ID from Intel to Marvell */ + xsc3_proc_info xsc3_pxa935, 0x56056000, 0xffffe000 diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S new file mode 100644 index 0000000000..d82590aa71 --- /dev/null +++ b/arch/arm/mm/proc-xscale.S @@ -0,0 +1,658 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/mm/proc-xscale.S + * + * Author: Nicolas Pitre + * Created: November 2000 + * Copyright: (C) 2000, 2001 MontaVista Software Inc. + * + * MMU functions for the Intel XScale CPUs + * + * 2001 Aug 21: + * some contributions by Brett Gaines <brett.w.gaines@intel.com> + * Copyright 2001 by Intel Corp. + * + * 2001 Sep 08: + * Completely revisited, many important fixes + * Nicolas Pitre <nico@fluxnic.net> + */ + +#include <linux/linkage.h> +#include <linux/init.h> +#include <linux/pgtable.h> +#include <asm/assembler.h> +#include <asm/hwcap.h> +#include <asm/pgtable-hwdef.h> +#include <asm/page.h> +#include <asm/ptrace.h> +#include "proc-macros.S" + +/* + * This is the maximum size of an area which will be flushed. If the area + * is larger than this, then we flush the whole cache + */ +#define MAX_AREA_SIZE 32768 + +/* + * the cache line size of the I and D cache + */ +#define CACHELINESIZE 32 + +/* + * the size of the data cache + */ +#define CACHESIZE 32768 + +/* + * Virtual address used to allocate the cache when flushed + * + * This must be an address range which is _never_ used. It should + * apparently have a mapping in the corresponding page table for + * compatibility with future CPUs that _could_ require it. For instance we + * don't care. + * + * This must be aligned on a 2*CACHESIZE boundary. The code selects one of + * the 2 areas in alternance each time the clean_d_cache macro is used. + * Without this the XScale core exhibits cache eviction problems and no one + * knows why. + * + * Reminder: the vector table is located at 0xffff0000-0xffff0fff. + */ +#define CLEAN_ADDR 0xfffe0000 + +/* + * This macro is used to wait for a CP15 write and is needed + * when we have to ensure that the last operation to the co-pro + * was completed before continuing with operation. + */ + .macro cpwait, rd + mrc p15, 0, \rd, c2, c0, 0 @ arbitrary read of cp15 + mov \rd, \rd @ wait for completion + sub pc, pc, #4 @ flush instruction pipeline + .endm + + .macro cpwait_ret, lr, rd + mrc p15, 0, \rd, c2, c0, 0 @ arbitrary read of cp15 + sub pc, \lr, \rd, LSR #32 @ wait for completion and + @ flush instruction pipeline + .endm + +/* + * This macro cleans the entire dcache using line allocate. + * The main loop has been unrolled to reduce loop overhead. + * rd and rs are two scratch registers. + */ + .macro clean_d_cache, rd, rs + ldr \rs, =clean_addr + ldr \rd, [\rs] + eor \rd, \rd, #CACHESIZE + str \rd, [\rs] + add \rs, \rd, #CACHESIZE +1: mcr p15, 0, \rd, c7, c2, 5 @ allocate D cache line + add \rd, \rd, #CACHELINESIZE + mcr p15, 0, \rd, c7, c2, 5 @ allocate D cache line + add \rd, \rd, #CACHELINESIZE + mcr p15, 0, \rd, c7, c2, 5 @ allocate D cache line + add \rd, \rd, #CACHELINESIZE + mcr p15, 0, \rd, c7, c2, 5 @ allocate D cache line + add \rd, \rd, #CACHELINESIZE + teq \rd, \rs + bne 1b + .endm + + .data + .align 2 +clean_addr: .word CLEAN_ADDR + + .text + +/* + * cpu_xscale_proc_init() + * + * Nothing too exciting at the moment + */ +ENTRY(cpu_xscale_proc_init) + @ enable write buffer coalescing. Some bootloader disable it + mrc p15, 0, r1, c1, c0, 1 + bic r1, r1, #1 + mcr p15, 0, r1, c1, c0, 1 + ret lr + +/* + * cpu_xscale_proc_fin() + */ +ENTRY(cpu_xscale_proc_fin) + mrc p15, 0, r0, c1, c0, 0 @ ctrl register + bic r0, r0, #0x1800 @ ...IZ........... + bic r0, r0, #0x0006 @ .............CA. + mcr p15, 0, r0, c1, c0, 0 @ disable caches + ret lr + +/* + * cpu_xscale_reset(loc) + * + * Perform a soft reset of the system. Put the CPU into the + * same state as it would be if it had been reset, and branch + * to what would be the reset vector. + * + * loc: location to jump to for soft reset + * + * Beware PXA270 erratum E7. + */ + .align 5 + .pushsection .idmap.text, "ax" +ENTRY(cpu_xscale_reset) + mov r1, #PSR_F_BIT|PSR_I_BIT|SVC_MODE + msr cpsr_c, r1 @ reset CPSR + mcr p15, 0, r1, c10, c4, 1 @ unlock I-TLB + mcr p15, 0, r1, c8, c5, 0 @ invalidate I-TLB + mrc p15, 0, r1, c1, c0, 0 @ ctrl register + bic r1, r1, #0x0086 @ ........B....CA. + bic r1, r1, #0x3900 @ ..VIZ..S........ + sub pc, pc, #4 @ flush pipeline + @ *** cache line aligned *** + mcr p15, 0, r1, c1, c0, 0 @ ctrl register + bic r1, r1, #0x0001 @ ...............M + mcr p15, 0, ip, c7, c7, 0 @ invalidate I,D caches & BTB + mcr p15, 0, r1, c1, c0, 0 @ ctrl register + @ CAUTION: MMU turned off from this point. We count on the pipeline + @ already containing those two last instructions to survive. + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + ret r0 +ENDPROC(cpu_xscale_reset) + .popsection + +/* + * cpu_xscale_do_idle() + * + * Cause the processor to idle + * + * For now we do nothing but go to idle mode for every case + * + * XScale supports clock switching, but using idle mode support + * allows external hardware to react to system state changes. + */ + .align 5 + +ENTRY(cpu_xscale_do_idle) + mov r0, #1 + mcr p14, 0, r0, c7, c0, 0 @ Go to IDLE + ret lr + +/* ================================= CACHE ================================ */ + +/* + * flush_icache_all() + * + * Unconditionally clean and invalidate the entire icache. + */ +ENTRY(xscale_flush_icache_all) + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache + ret lr +ENDPROC(xscale_flush_icache_all) + +/* + * flush_user_cache_all() + * + * Invalidate all cache entries in a particular address + * space. + */ +ENTRY(xscale_flush_user_cache_all) + /* FALLTHROUGH */ + +/* + * flush_kern_cache_all() + * + * Clean and invalidate the entire cache. + */ +ENTRY(xscale_flush_kern_cache_all) + mov r2, #VM_EXEC + mov ip, #0 +__flush_whole_cache: + clean_d_cache r0, r1 + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 0 @ Invalidate I cache & BTB + mcrne p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer + ret lr + +/* + * flush_user_cache_range(start, end, vm_flags) + * + * Invalidate a range of cache entries in the specified + * address space. + * + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + * - vma - vma_area_struct describing address space + */ + .align 5 +ENTRY(xscale_flush_user_cache_range) + mov ip, #0 + sub r3, r1, r0 @ calculate total size + cmp r3, #MAX_AREA_SIZE + bhs __flush_whole_cache + +1: tst r2, #VM_EXEC + mcrne p15, 0, r0, c7, c5, 1 @ Invalidate I cache line + mcr p15, 0, r0, c7, c10, 1 @ Clean D cache line + mcr p15, 0, r0, c7, c6, 1 @ Invalidate D cache line + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + tst r2, #VM_EXEC + mcrne p15, 0, ip, c7, c5, 6 @ Invalidate BTB + mcrne p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer + ret lr + +/* + * coherent_kern_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + * + * Note: single I-cache line invalidation isn't used here since + * it also trashes the mini I-cache used by JTAG debuggers. + */ +ENTRY(xscale_coherent_kern_range) + bic r0, r0, #CACHELINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ Invalidate I cache & BTB + mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer + ret lr + +/* + * coherent_user_range(start, end) + * + * Ensure coherency between the Icache and the Dcache in the + * region described by start. If you have non-snooping + * Harvard caches, you need to implement this function. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(xscale_coherent_user_range) + bic r0, r0, #CACHELINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c5, 1 @ Invalidate I cache entry + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 6 @ Invalidate BTB + mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer + ret lr + +/* + * flush_kern_dcache_area(void *addr, size_t size) + * + * Ensure no D cache aliasing occurs, either with itself or + * the I cache + * + * - addr - kernel address + * - size - region size + */ +ENTRY(xscale_flush_kern_dcache_area) + add r1, r0, r1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + mov r0, #0 + mcr p15, 0, r0, c7, c5, 0 @ Invalidate I cache & BTB + mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer + ret lr + +/* + * dma_inv_range(start, end) + * + * Invalidate (discard) the specified virtual address range. + * May not write back any entries. If 'start' or 'end' + * are not cache line aligned, those lines must be written + * back. + * + * - start - virtual start address + * - end - virtual end address + */ +xscale_dma_inv_range: + tst r0, #CACHELINESIZE - 1 + bic r0, r0, #CACHELINESIZE - 1 + mcrne p15, 0, r0, c7, c10, 1 @ clean D entry + tst r1, #CACHELINESIZE - 1 + mcrne p15, 0, r1, c7, c10, 1 @ clean D entry +1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer + ret lr + +/* + * dma_clean_range(start, end) + * + * Clean the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +xscale_dma_clean_range: + bic r0, r0, #CACHELINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer + ret lr + +/* + * dma_flush_range(start, end) + * + * Clean and invalidate the specified virtual address range. + * + * - start - virtual start address + * - end - virtual end address + */ +ENTRY(xscale_dma_flush_range) + bic r0, r0, #CACHELINESIZE - 1 +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + mcr p15, 0, r0, c7, c6, 1 @ invalidate D entry + add r0, r0, #CACHELINESIZE + cmp r0, r1 + blo 1b + mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer + ret lr + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(xscale_dma_map_area) + add r1, r1, r0 + cmp r2, #DMA_TO_DEVICE + beq xscale_dma_clean_range + bcs xscale_dma_inv_range + b xscale_dma_flush_range +ENDPROC(xscale_dma_map_area) + +/* + * dma_map_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(xscale_80200_A0_A1_dma_map_area) + add r1, r1, r0 + teq r2, #DMA_TO_DEVICE + beq xscale_dma_clean_range + b xscale_dma_flush_range +ENDPROC(xscale_80200_A0_A1_dma_map_area) + +/* + * dma_unmap_area(start, size, dir) + * - start - kernel virtual start address + * - size - size of region + * - dir - DMA direction + */ +ENTRY(xscale_dma_unmap_area) + ret lr +ENDPROC(xscale_dma_unmap_area) + + .globl xscale_flush_kern_cache_louis + .equ xscale_flush_kern_cache_louis, xscale_flush_kern_cache_all + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions xscale + +/* + * On stepping A0/A1 of the 80200, invalidating D-cache by line doesn't + * clear the dirty bits, which means that if we invalidate a dirty line, + * the dirty data can still be written back to external memory later on. + * + * The recommended workaround is to always do a clean D-cache line before + * doing an invalidate D-cache line, so on the affected processors, + * dma_inv_range() is implemented as dma_flush_range(). + * + * See erratum #25 of "Intel 80200 Processor Specification Update", + * revision January 22, 2003, available at: + * http://www.intel.com/design/iio/specupdt/273415.htm + */ +.macro a0_alias basename + .globl xscale_80200_A0_A1_\basename + .type xscale_80200_A0_A1_\basename , %function + .equ xscale_80200_A0_A1_\basename , xscale_\basename +.endm + +/* + * Most of the cache functions are unchanged for these processor revisions. + * Export suitable alias symbols for the unchanged functions: + */ + a0_alias flush_icache_all + a0_alias flush_user_cache_all + a0_alias flush_kern_cache_all + a0_alias flush_kern_cache_louis + a0_alias flush_user_cache_range + a0_alias coherent_kern_range + a0_alias coherent_user_range + a0_alias flush_kern_dcache_area + a0_alias dma_flush_range + a0_alias dma_unmap_area + + @ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S) + define_cache_functions xscale_80200_A0_A1 + +ENTRY(cpu_xscale_dcache_clean_area) +1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry + add r0, r0, #CACHELINESIZE + subs r1, r1, #CACHELINESIZE + bhi 1b + ret lr + +/* =============================== PageTable ============================== */ + +/* + * cpu_xscale_switch_mm(pgd) + * + * Set the translation base pointer to be as described by pgd. + * + * pgd: new page tables + */ + .align 5 +ENTRY(cpu_xscale_switch_mm) + clean_d_cache r1, r2 + mcr p15, 0, ip, c7, c5, 0 @ Invalidate I cache & BTB + mcr p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer + mcr p15, 0, r0, c2, c0, 0 @ load page table pointer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + cpwait_ret lr, ip + +/* + * cpu_xscale_set_pte_ext(ptep, pte, ext) + * + * Set a PTE and flush it out + * + * Errata 40: must set memory to write-through for user read-only pages. + */ +cpu_xscale_mt_table: + .long 0x00 @ L_PTE_MT_UNCACHED + .long PTE_BUFFERABLE @ L_PTE_MT_BUFFERABLE + .long PTE_CACHEABLE @ L_PTE_MT_WRITETHROUGH + .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEBACK + .long PTE_EXT_TEX(1) | PTE_BUFFERABLE @ L_PTE_MT_DEV_SHARED + .long 0x00 @ unused + .long PTE_EXT_TEX(1) | PTE_CACHEABLE @ L_PTE_MT_MINICACHE + .long PTE_EXT_TEX(1) | PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_WRITEALLOC + .long 0x00 @ unused + .long PTE_BUFFERABLE @ L_PTE_MT_DEV_WC + .long 0x00 @ unused + .long PTE_CACHEABLE | PTE_BUFFERABLE @ L_PTE_MT_DEV_CACHED + .long 0x00 @ L_PTE_MT_DEV_NONSHARED + .long 0x00 @ unused + .long 0x00 @ unused + .long 0x00 @ unused + + .align 5 +ENTRY(cpu_xscale_set_pte_ext) + xscale_set_pte_ext_prologue + + @ + @ Erratum 40: must set memory to write-through for user read-only pages + @ + and ip, r1, #(L_PTE_MT_MASK | L_PTE_USER | L_PTE_RDONLY) & ~(4 << 2) + teq ip, #L_PTE_MT_WRITEBACK | L_PTE_USER | L_PTE_RDONLY + + moveq r1, #L_PTE_MT_WRITETHROUGH + and r1, r1, #L_PTE_MT_MASK + adr ip, cpu_xscale_mt_table + ldr ip, [ip, r1] + bic r2, r2, #0x0c + orr r2, r2, ip + + xscale_set_pte_ext_epilogue + ret lr + + .ltorg + .align + +.globl cpu_xscale_suspend_size +.equ cpu_xscale_suspend_size, 4 * 6 +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_xscale_do_suspend) + stmfd sp!, {r4 - r9, lr} + mrc p14, 0, r4, c6, c0, 0 @ clock configuration, for turbo mode + mrc p15, 0, r5, c15, c1, 0 @ CP access reg + mrc p15, 0, r6, c13, c0, 0 @ PID + mrc p15, 0, r7, c3, c0, 0 @ domain ID + mrc p15, 0, r8, c1, c0, 1 @ auxiliary control reg + mrc p15, 0, r9, c1, c0, 0 @ control reg + bic r4, r4, #2 @ clear frequency change bit + stmia r0, {r4 - r9} @ store cp regs + ldmfd sp!, {r4 - r9, pc} +ENDPROC(cpu_xscale_do_suspend) + +ENTRY(cpu_xscale_do_resume) + ldmia r0, {r4 - r9} @ load cp regs + mov ip, #0 + mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs + mcr p15, 0, ip, c7, c7, 0 @ invalidate I & D caches, BTB + mcr p14, 0, r4, c6, c0, 0 @ clock configuration, turbo mode. + mcr p15, 0, r5, c15, c1, 0 @ CP access reg + mcr p15, 0, r6, c13, c0, 0 @ PID + mcr p15, 0, r7, c3, c0, 0 @ domain ID + mcr p15, 0, r1, c2, c0, 0 @ translation table base addr + mcr p15, 0, r8, c1, c0, 1 @ auxiliary control reg + mov r0, r9 @ control register + b cpu_resume_mmu +ENDPROC(cpu_xscale_do_resume) +#endif + + .type __xscale_setup, #function +__xscale_setup: + mcr p15, 0, ip, c7, c7, 0 @ invalidate I, D caches & BTB + mcr p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer + mcr p15, 0, ip, c8, c7, 0 @ invalidate I, D TLBs + mov r0, #1 << 6 @ cp6 for IOP3xx and Bulverde + orr r0, r0, #1 << 13 @ Its undefined whether this + mcr p15, 0, r0, c15, c1, 0 @ affects USR or SVC modes + + adr r5, xscale_crval + ldmia r5, {r5, r6} + mrc p15, 0, r0, c1, c0, 0 @ get control register + bic r0, r0, r5 + orr r0, r0, r6 + ret lr + .size __xscale_setup, . - __xscale_setup + + /* + * R + * .RVI ZFRS BLDP WCAM + * ..11 1.01 .... .101 + * + */ + .type xscale_crval, #object +xscale_crval: + crval clear=0x00003b07, mmuset=0x00003905, ucset=0x00001900 + + __INITDATA + + @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) + define_processor_functions xscale, dabort=v5t_early_abort, pabort=legacy_pabort, suspend=1 + + .section ".rodata" + + string cpu_arch_name, "armv5te" + string cpu_elf_name, "v5" + + string cpu_80200_A0_A1_name, "XScale-80200 A0/A1" + string cpu_80200_name, "XScale-80200" + string cpu_80219_name, "XScale-80219" + string cpu_8032x_name, "XScale-IOP8032x Family" + string cpu_8033x_name, "XScale-IOP8033x Family" + string cpu_pxa250_name, "XScale-PXA250" + string cpu_pxa210_name, "XScale-PXA210" + string cpu_ixp42x_name, "XScale-IXP42x Family" + string cpu_ixp43x_name, "XScale-IXP43x Family" + string cpu_ixp46x_name, "XScale-IXP46x Family" + string cpu_ixp2400_name, "XScale-IXP2400" + string cpu_ixp2800_name, "XScale-IXP2800" + string cpu_pxa255_name, "XScale-PXA255" + string cpu_pxa270_name, "XScale-PXA270" + + .align + + .section ".proc.info.init", "a" + +.macro xscale_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache + .type __\name\()_proc_info,#object +__\name\()_proc_info: + .long \cpu_val + .long \cpu_mask + .long PMD_TYPE_SECT | \ + PMD_SECT_BUFFERABLE | \ + PMD_SECT_CACHEABLE | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + .long PMD_TYPE_SECT | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + initfn __xscale_setup, __\name\()_proc_info + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP + .long \cpu_name + .long xscale_processor_functions + .long v4wbi_tlb_fns + .long xscale_mc_user_fns + .ifb \cache + .long xscale_cache_fns + .else + .long \cache + .endif + .size __\name\()_proc_info, . - __\name\()_proc_info +.endm + + xscale_proc_info 80200_A0_A1, 0x69052000, 0xfffffffe, cpu_80200_name, \ + cache=xscale_80200_A0_A1_cache_fns + xscale_proc_info 80200, 0x69052000, 0xfffffff0, cpu_80200_name + xscale_proc_info 80219, 0x69052e20, 0xffffffe0, cpu_80219_name + xscale_proc_info 8032x, 0x69052420, 0xfffff7e0, cpu_8032x_name + xscale_proc_info 8033x, 0x69054010, 0xfffffd30, cpu_8033x_name + xscale_proc_info pxa250, 0x69052100, 0xfffff7f0, cpu_pxa250_name + xscale_proc_info pxa210, 0x69052120, 0xfffff3f0, cpu_pxa210_name + xscale_proc_info ixp2400, 0x69054190, 0xfffffff0, cpu_ixp2400_name + xscale_proc_info ixp2800, 0x690541a0, 0xfffffff0, cpu_ixp2800_name + xscale_proc_info ixp42x, 0x690541c0, 0xffffffc0, cpu_ixp42x_name + xscale_proc_info ixp43x, 0x69054040, 0xfffffff0, cpu_ixp43x_name + xscale_proc_info ixp46x, 0x69054200, 0xffffff00, cpu_ixp46x_name + xscale_proc_info pxa255, 0x69052d00, 0xfffffff0, cpu_pxa255_name + xscale_proc_info pxa270, 0x69054110, 0xfffffff0, cpu_pxa270_name diff --git a/arch/arm/mm/ptdump_debugfs.c b/arch/arm/mm/ptdump_debugfs.c new file mode 100644 index 0000000000..318de969ae --- /dev/null +++ b/arch/arm/mm/ptdump_debugfs.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/debugfs.h> +#include <linux/seq_file.h> + +#include <asm/ptdump.h> + +static int ptdump_show(struct seq_file *m, void *v) +{ + struct ptdump_info *info = m->private; + + ptdump_walk_pgd(m, info); + return 0; +} +DEFINE_SHOW_ATTRIBUTE(ptdump); + +void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name) +{ + debugfs_create_file(name, 0400, NULL, info, &ptdump_fops); +} diff --git a/arch/arm/mm/pv-fixup-asm.S b/arch/arm/mm/pv-fixup-asm.S new file mode 100644 index 0000000000..1d9f52c71a --- /dev/null +++ b/arch/arm/mm/pv-fixup-asm.S @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2015 Russell King + * + * This assembly is required to safely remap the physical address space + * for Keystone 2 + */ +#include <linux/linkage.h> +#include <linux/pgtable.h> +#include <asm/asm-offsets.h> +#include <asm/cp15.h> +#include <asm/page.h> + + .section ".idmap.text", "ax" + +#define L1_ORDER 3 +#define L2_ORDER 3 + +ENTRY(lpae_pgtables_remap_asm) + stmfd sp!, {r4-r8, lr} + + mrc p15, 0, r8, c1, c0, 0 @ read control reg + bic ip, r8, #CR_M @ disable caches and MMU + mcr p15, 0, ip, c1, c0, 0 + dsb + isb + + /* Update level 2 entries covering the kernel */ + ldr r6, =(_end - 1) + add r7, r2, #0x1000 + add r6, r7, r6, lsr #SECTION_SHIFT - L2_ORDER + add r7, r7, #KERNEL_OFFSET >> (SECTION_SHIFT - L2_ORDER) +1: ldrd r4, r5, [r7] + adds r4, r4, r0 + adc r5, r5, r1 + strd r4, r5, [r7], #1 << L2_ORDER + cmp r7, r6 + bls 1b + + /* Update level 2 entries for the boot data */ + add r7, r2, #0x1000 + movw r3, #FDT_FIXED_BASE >> (SECTION_SHIFT - L2_ORDER) + add r7, r7, r3 + ldrd r4, r5, [r7] + adds r4, r4, r0 + adc r5, r5, r1 + strd r4, r5, [r7], #1 << L2_ORDER + ldrd r4, r5, [r7] + adds r4, r4, r0 + adc r5, r5, r1 + strd r4, r5, [r7] + + /* Update level 1 entries */ + mov r6, #4 + mov r7, r2 +2: ldrd r4, r5, [r7] + adds r4, r4, r0 + adc r5, r5, r1 + strd r4, r5, [r7], #1 << L1_ORDER + subs r6, r6, #1 + bne 2b + + mrrc p15, 0, r4, r5, c2 @ read TTBR0 + adds r4, r4, r0 @ update physical address + adc r5, r5, r1 + mcrr p15, 0, r4, r5, c2 @ write back TTBR0 + mrrc p15, 1, r4, r5, c2 @ read TTBR1 + adds r4, r4, r0 @ update physical address + adc r5, r5, r1 + mcrr p15, 1, r4, r5, c2 @ write back TTBR1 + + dsb + + mov ip, #0 + mcr p15, 0, ip, c7, c5, 0 @ I+BTB cache invalidate + mcr p15, 0, ip, c8, c7, 0 @ local_flush_tlb_all() + dsb + isb + + mcr p15, 0, r8, c1, c0, 0 @ re-enable MMU + dsb + isb + + ldmfd sp!, {r4-r8, pc} +ENDPROC(lpae_pgtables_remap_asm) diff --git a/arch/arm/mm/tlb-fa.S b/arch/arm/mm/tlb-fa.S new file mode 100644 index 0000000000..def6161ec4 --- /dev/null +++ b/arch/arm/mm/tlb-fa.S @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/mm/tlb-fa.S + * + * Copyright (C) 2005 Faraday Corp. + * Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt> + * + * Based on tlb-v4wbi.S: + * Copyright (C) 1997-2002 Russell King + * + * ARM architecture version 4, Faraday variation. + * This assume an unified TLBs, with a write buffer, and branch target buffer (BTB) + * + * Processors: FA520 FA526 FA626 + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/tlbflush.h> +#include "proc-macros.S" + + +/* + * flush_user_tlb_range(start, end, mm) + * + * Invalidate a range of TLB entries in the specified address space. + * + * - start - range start address + * - end - range end address + * - mm - mm_struct describing address space + */ + .align 4 +ENTRY(fa_flush_user_tlb_range) + vma_vm_mm ip, r2 + act_mm r3 @ get current->active_mm + eors r3, ip, r3 @ == mm ? + retne lr @ no, we dont do anything + mov r3, #0 + mcr p15, 0, r3, c7, c10, 4 @ drain WB + bic r0, r0, #0x0ff + bic r0, r0, #0xf00 +1: mcr p15, 0, r0, c8, c7, 1 @ invalidate UTLB entry + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + mcr p15, 0, r3, c7, c10, 4 @ data write barrier + ret lr + + +ENTRY(fa_flush_kern_tlb_range) + mov r3, #0 + mcr p15, 0, r3, c7, c10, 4 @ drain WB + bic r0, r0, #0x0ff + bic r0, r0, #0xf00 +1: mcr p15, 0, r0, c8, c7, 1 @ invalidate UTLB entry + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + mcr p15, 0, r3, c7, c10, 4 @ data write barrier + mcr p15, 0, r3, c7, c5, 4 @ prefetch flush (isb) + ret lr + + __INITDATA + + /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */ + define_tlb_functions fa, fa_tlb_flags diff --git a/arch/arm/mm/tlb-v4.S b/arch/arm/mm/tlb-v4.S new file mode 100644 index 0000000000..b962b4e751 --- /dev/null +++ b/arch/arm/mm/tlb-v4.S @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/mm/tlbv4.S + * + * Copyright (C) 1997-2002 Russell King + * + * ARM architecture version 4 TLB handling functions. + * These assume a split I/D TLBs, and no write buffer. + * + * Processors: ARM720T + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/tlbflush.h> +#include "proc-macros.S" + + .align 5 +/* + * v4_flush_user_tlb_range(start, end, mm) + * + * Invalidate a range of TLB entries in the specified user address space. + * + * - start - range start address + * - end - range end address + * - mm - mm_struct describing address space + */ + .align 5 +ENTRY(v4_flush_user_tlb_range) + vma_vm_mm ip, r2 + act_mm r3 @ get current->active_mm + eors r3, ip, r3 @ == mm ? + retne lr @ no, we dont do anything +.v4_flush_kern_tlb_range: + bic r0, r0, #0x0ff + bic r0, r0, #0xf00 +1: mcr p15, 0, r0, c8, c7, 1 @ invalidate TLB entry + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + ret lr + +/* + * v4_flush_kern_tlb_range(start, end) + * + * Invalidate a range of TLB entries in the specified kernel + * address range. + * + * - start - virtual address (may not be aligned) + * - end - virtual address (may not be aligned) + */ +.globl v4_flush_kern_tlb_range +.equ v4_flush_kern_tlb_range, .v4_flush_kern_tlb_range + + __INITDATA + + /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */ + define_tlb_functions v4, v4_tlb_flags diff --git a/arch/arm/mm/tlb-v4wb.S b/arch/arm/mm/tlb-v4wb.S new file mode 100644 index 0000000000..9348bba758 --- /dev/null +++ b/arch/arm/mm/tlb-v4wb.S @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/mm/tlbv4wb.S + * + * Copyright (C) 1997-2002 Russell King + * + * ARM architecture version 4 TLB handling functions. + * These assume a split I/D TLBs w/o I TLB entry, with a write buffer. + * + * Processors: SA110 SA1100 SA1110 + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/tlbflush.h> +#include "proc-macros.S" + + .align 5 +/* + * v4wb_flush_user_tlb_range(start, end, mm) + * + * Invalidate a range of TLB entries in the specified address space. + * + * - start - range start address + * - end - range end address + * - mm - mm_struct describing address space + */ + .align 5 +ENTRY(v4wb_flush_user_tlb_range) + vma_vm_mm ip, r2 + act_mm r3 @ get current->active_mm + eors r3, ip, r3 @ == mm ? + retne lr @ no, we dont do anything + vma_vm_flags r2, r2 + mcr p15, 0, r3, c7, c10, 4 @ drain WB + tst r2, #VM_EXEC + mcrne p15, 0, r3, c8, c5, 0 @ invalidate I TLB + bic r0, r0, #0x0ff + bic r0, r0, #0xf00 +1: mcr p15, 0, r0, c8, c6, 1 @ invalidate D TLB entry + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + ret lr + +/* + * v4_flush_kern_tlb_range(start, end) + * + * Invalidate a range of TLB entries in the specified kernel + * address range. + * + * - start - virtual address (may not be aligned) + * - end - virtual address (may not be aligned) + */ +ENTRY(v4wb_flush_kern_tlb_range) + mov r3, #0 + mcr p15, 0, r3, c7, c10, 4 @ drain WB + bic r0, r0, #0x0ff + bic r0, r0, #0xf00 + mcr p15, 0, r3, c8, c5, 0 @ invalidate I TLB +1: mcr p15, 0, r0, c8, c6, 1 @ invalidate D TLB entry + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + ret lr + + __INITDATA + + /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */ + define_tlb_functions v4wb, v4wb_tlb_flags diff --git a/arch/arm/mm/tlb-v4wbi.S b/arch/arm/mm/tlb-v4wbi.S new file mode 100644 index 0000000000..d4f9040a41 --- /dev/null +++ b/arch/arm/mm/tlb-v4wbi.S @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/mm/tlbv4wbi.S + * + * Copyright (C) 1997-2002 Russell King + * + * ARM architecture version 4 and version 5 TLB handling functions. + * These assume a split I/D TLBs, with a write buffer. + * + * Processors: ARM920 ARM922 ARM925 ARM926 XScale + */ +#include <linux/linkage.h> +#include <linux/init.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/tlbflush.h> +#include "proc-macros.S" + +/* + * v4wb_flush_user_tlb_range(start, end, mm) + * + * Invalidate a range of TLB entries in the specified address space. + * + * - start - range start address + * - end - range end address + * - mm - mm_struct describing address space + */ + .align 5 +ENTRY(v4wbi_flush_user_tlb_range) + vma_vm_mm ip, r2 + act_mm r3 @ get current->active_mm + eors r3, ip, r3 @ == mm ? + retne lr @ no, we dont do anything + mov r3, #0 + mcr p15, 0, r3, c7, c10, 4 @ drain WB + vma_vm_flags r2, r2 + bic r0, r0, #0x0ff + bic r0, r0, #0xf00 +1: tst r2, #VM_EXEC + mcrne p15, 0, r0, c8, c5, 1 @ invalidate I TLB entry + mcr p15, 0, r0, c8, c6, 1 @ invalidate D TLB entry + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + ret lr + +ENTRY(v4wbi_flush_kern_tlb_range) + mov r3, #0 + mcr p15, 0, r3, c7, c10, 4 @ drain WB + bic r0, r0, #0x0ff + bic r0, r0, #0xf00 +1: mcr p15, 0, r0, c8, c5, 1 @ invalidate I TLB entry + mcr p15, 0, r0, c8, c6, 1 @ invalidate D TLB entry + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + ret lr + + __INITDATA + + /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */ + define_tlb_functions v4wbi, v4wbi_tlb_flags diff --git a/arch/arm/mm/tlb-v6.S b/arch/arm/mm/tlb-v6.S new file mode 100644 index 0000000000..1d91e49b2c --- /dev/null +++ b/arch/arm/mm/tlb-v6.S @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/mm/tlb-v6.S + * + * Copyright (C) 1997-2002 Russell King + * + * ARM architecture version 6 TLB handling functions. + * These assume a split I/D TLB. + */ +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/assembler.h> +#include <asm/page.h> +#include <asm/tlbflush.h> +#include "proc-macros.S" + +#define HARVARD_TLB + +.arch armv6 + +/* + * v6wbi_flush_user_tlb_range(start, end, vma) + * + * Invalidate a range of TLB entries in the specified address space. + * + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + * - vma - vm_area_struct describing address range + * + * It is assumed that: + * - the "Invalidate single entry" instruction will invalidate + * both the I and the D TLBs on Harvard-style TLBs + */ +ENTRY(v6wbi_flush_user_tlb_range) + vma_vm_mm r3, r2 @ get vma->vm_mm + mov ip, #0 + mmid r3, r3 @ get vm_mm->context.id + mcr p15, 0, ip, c7, c10, 4 @ drain write buffer + mov r0, r0, lsr #PAGE_SHIFT @ align address + mov r1, r1, lsr #PAGE_SHIFT + asid r3, r3 @ mask ASID + orr r0, r3, r0, lsl #PAGE_SHIFT @ Create initial MVA + mov r1, r1, lsl #PAGE_SHIFT + vma_vm_flags r2, r2 @ get vma->vm_flags +1: +#ifdef HARVARD_TLB + mcr p15, 0, r0, c8, c6, 1 @ TLB invalidate D MVA (was 1) + tst r2, #VM_EXEC @ Executable area ? + mcrne p15, 0, r0, c8, c5, 1 @ TLB invalidate I MVA (was 1) +#else + mcr p15, 0, r0, c8, c7, 1 @ TLB invalidate MVA (was 1) +#endif + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + mcr p15, 0, ip, c7, c10, 4 @ data synchronization barrier + ret lr + +/* + * v6wbi_flush_kern_tlb_range(start,end) + * + * Invalidate a range of kernel TLB entries + * + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + */ +ENTRY(v6wbi_flush_kern_tlb_range) + mov r2, #0 + mcr p15, 0, r2, c7, c10, 4 @ drain write buffer + mov r0, r0, lsr #PAGE_SHIFT @ align address + mov r1, r1, lsr #PAGE_SHIFT + mov r0, r0, lsl #PAGE_SHIFT + mov r1, r1, lsl #PAGE_SHIFT +1: +#ifdef HARVARD_TLB + mcr p15, 0, r0, c8, c6, 1 @ TLB invalidate D MVA + mcr p15, 0, r0, c8, c5, 1 @ TLB invalidate I MVA +#else + mcr p15, 0, r0, c8, c7, 1 @ TLB invalidate MVA +#endif + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + mcr p15, 0, r2, c7, c10, 4 @ data synchronization barrier + mcr p15, 0, r2, c7, c5, 4 @ prefetch flush (isb) + ret lr + + __INIT + + /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */ + define_tlb_functions v6wbi, v6wbi_tlb_flags diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S new file mode 100644 index 0000000000..35fd6d4f0d --- /dev/null +++ b/arch/arm/mm/tlb-v7.S @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/mm/tlb-v7.S + * + * Copyright (C) 1997-2002 Russell King + * Modified for ARMv7 by Catalin Marinas + * + * ARM architecture version 6 TLB handling functions. + * These assume a split I/D TLB. + */ +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/asm-offsets.h> +#include <asm/page.h> +#include <asm/tlbflush.h> +#include "proc-macros.S" + +.arch armv7-a + +/* + * v7wbi_flush_user_tlb_range(start, end, vma) + * + * Invalidate a range of TLB entries in the specified address space. + * + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + * - vma - vm_area_struct describing address range + * + * It is assumed that: + * - the "Invalidate single entry" instruction will invalidate + * both the I and the D TLBs on Harvard-style TLBs + */ +ENTRY(v7wbi_flush_user_tlb_range) + vma_vm_mm r3, r2 @ get vma->vm_mm + mmid r3, r3 @ get vm_mm->context.id + dsb ish + mov r0, r0, lsr #PAGE_SHIFT @ align address + mov r1, r1, lsr #PAGE_SHIFT + asid r3, r3 @ mask ASID +#ifdef CONFIG_ARM_ERRATA_720789 + ALT_SMP(W(mov) r3, #0 ) + ALT_UP(W(nop) ) +#endif + orr r0, r3, r0, lsl #PAGE_SHIFT @ Create initial MVA + mov r1, r1, lsl #PAGE_SHIFT +1: +#ifdef CONFIG_ARM_ERRATA_720789 + ALT_SMP(mcr p15, 0, r0, c8, c3, 3) @ TLB invalidate U MVA all ASID (shareable) +#else + ALT_SMP(mcr p15, 0, r0, c8, c3, 1) @ TLB invalidate U MVA (shareable) +#endif + ALT_UP(mcr p15, 0, r0, c8, c7, 1) @ TLB invalidate U MVA + + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + dsb ish + ret lr +ENDPROC(v7wbi_flush_user_tlb_range) + +/* + * v7wbi_flush_kern_tlb_range(start,end) + * + * Invalidate a range of kernel TLB entries + * + * - start - start address (may not be aligned) + * - end - end address (exclusive, may not be aligned) + */ +ENTRY(v7wbi_flush_kern_tlb_range) + dsb ish + mov r0, r0, lsr #PAGE_SHIFT @ align address + mov r1, r1, lsr #PAGE_SHIFT + mov r0, r0, lsl #PAGE_SHIFT + mov r1, r1, lsl #PAGE_SHIFT +1: +#ifdef CONFIG_ARM_ERRATA_720789 + ALT_SMP(mcr p15, 0, r0, c8, c3, 3) @ TLB invalidate U MVA all ASID (shareable) +#else + ALT_SMP(mcr p15, 0, r0, c8, c3, 1) @ TLB invalidate U MVA (shareable) +#endif + ALT_UP(mcr p15, 0, r0, c8, c7, 1) @ TLB invalidate U MVA + add r0, r0, #PAGE_SZ + cmp r0, r1 + blo 1b + dsb ish + isb + ret lr +ENDPROC(v7wbi_flush_kern_tlb_range) + + __INIT + + /* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */ + define_tlb_functions v7wbi, v7wbi_tlb_flags_up, flags_smp=v7wbi_tlb_flags_smp |