summaryrefslogtreecommitdiffstats
path: root/arch/arm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm/lib')
-rw-r--r--arch/arm/lib/Makefile50
-rw-r--r--arch/arm/lib/ashldi3.S54
-rw-r--r--arch/arm/lib/ashrdi3.S54
-rw-r--r--arch/arm/lib/backtrace-clang.S222
-rw-r--r--arch/arm/lib/backtrace.S123
-rw-r--r--arch/arm/lib/bitops.h107
-rw-r--r--arch/arm/lib/bswapsdi2.S38
-rw-r--r--arch/arm/lib/call_with_stack.S32
-rw-r--r--arch/arm/lib/changebit.S12
-rw-r--r--arch/arm/lib/clear_user.S55
-rw-r--r--arch/arm/lib/clearbit.S12
-rw-r--r--arch/arm/lib/copy_from_user.S129
-rw-r--r--arch/arm/lib/copy_page.S44
-rw-r--r--arch/arm/lib/copy_template.S294
-rw-r--r--arch/arm/lib/copy_to_user.S129
-rw-r--r--arch/arm/lib/csumipv6.S30
-rw-r--r--arch/arm/lib/csumpartial.S139
-rw-r--r--arch/arm/lib/csumpartialcopy.S50
-rw-r--r--arch/arm/lib/csumpartialcopygeneric.S331
-rw-r--r--arch/arm/lib/csumpartialcopyuser.S83
-rw-r--r--arch/arm/lib/delay-loop.S59
-rw-r--r--arch/arm/lib/delay.c105
-rw-r--r--arch/arm/lib/div64.S209
-rw-r--r--arch/arm/lib/findbit.S193
-rw-r--r--arch/arm/lib/getuser.S167
-rw-r--r--arch/arm/lib/io-readsb.S120
-rw-r--r--arch/arm/lib/io-readsl.S76
-rw-r--r--arch/arm/lib/io-readsw-armv3.S103
-rw-r--r--arch/arm/lib/io-readsw-armv4.S128
-rw-r--r--arch/arm/lib/io-writesb.S91
-rw-r--r--arch/arm/lib/io-writesl.S64
-rw-r--r--arch/arm/lib/io-writesw-armv3.S123
-rw-r--r--arch/arm/lib/io-writesw-armv4.S97
-rw-r--r--arch/arm/lib/lib1funcs.S371
-rw-r--r--arch/arm/lib/lshrdi3.S54
-rw-r--r--arch/arm/lib/memchr.S23
-rw-r--r--arch/arm/lib/memcpy.S67
-rw-r--r--arch/arm/lib/memmove.S224
-rw-r--r--arch/arm/lib/memset.S147
-rw-r--r--arch/arm/lib/muldi3.S45
-rw-r--r--arch/arm/lib/putuser.S95
-rw-r--r--arch/arm/lib/setbit.S12
-rw-r--r--arch/arm/lib/strchr.S24
-rw-r--r--arch/arm/lib/strrchr.S23
-rw-r--r--arch/arm/lib/testchangebit.S12
-rw-r--r--arch/arm/lib/testclearbit.S12
-rw-r--r--arch/arm/lib/testsetbit.S12
-rw-r--r--arch/arm/lib/uaccess_with_memcpy.c289
-rw-r--r--arch/arm/lib/ucmpdi2.S50
-rw-r--r--arch/arm/lib/xor-neon.c44
50 files changed, 5027 insertions, 0 deletions
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
new file mode 100644
index 000000000..6d2ba454f
--- /dev/null
+++ b/arch/arm/lib/Makefile
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# linux/arch/arm/lib/Makefile
+#
+# Copyright (C) 1995-2000 Russell King
+#
+
+lib-y := changebit.o csumipv6.o csumpartial.o \
+ csumpartialcopy.o csumpartialcopyuser.o clearbit.o \
+ delay.o delay-loop.o findbit.o memchr.o memcpy.o \
+ memmove.o memset.o setbit.o \
+ strchr.o strrchr.o \
+ testchangebit.o testclearbit.o testsetbit.o \
+ ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \
+ ucmpdi2.o lib1funcs.o div64.o \
+ io-readsb.o io-writesb.o io-readsl.o io-writesl.o \
+ call_with_stack.o bswapsdi2.o
+
+mmu-y := clear_user.o copy_page.o getuser.o putuser.o \
+ copy_from_user.o copy_to_user.o
+
+ifdef CONFIG_CC_IS_CLANG
+ lib-y += backtrace-clang.o
+else
+ lib-y += backtrace.o
+endif
+
+# using lib_ here won't override already available weak symbols
+obj-$(CONFIG_UACCESS_WITH_MEMCPY) += uaccess_with_memcpy.o
+
+lib-$(CONFIG_MMU) += $(mmu-y)
+
+ifeq ($(CONFIG_CPU_32v3),y)
+ lib-y += io-readsw-armv3.o io-writesw-armv3.o
+else
+ lib-y += io-readsw-armv4.o io-writesw-armv4.o
+endif
+
+ifeq ($(CONFIG_ARCH_RPC),y)
+ AFLAGS_delay-loop.o += -march=armv4
+endif
+
+$(obj)/csumpartialcopy.o: $(obj)/csumpartialcopygeneric.S
+$(obj)/csumpartialcopyuser.o: $(obj)/csumpartialcopygeneric.S
+
+ifeq ($(CONFIG_KERNEL_MODE_NEON),y)
+ NEON_FLAGS := -march=armv7-a -mfloat-abi=softfp -mfpu=neon
+ CFLAGS_xor-neon.o += $(NEON_FLAGS)
+ obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o
+endif
diff --git a/arch/arm/lib/ashldi3.S b/arch/arm/lib/ashldi3.S
new file mode 100644
index 000000000..b05e95840
--- /dev/null
+++ b/arch/arm/lib/ashldi3.S
@@ -0,0 +1,54 @@
+/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
+ Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file. (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING. If not, write to
+the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA. */
+
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+#ifdef __ARMEB__
+#define al r1
+#define ah r0
+#else
+#define al r0
+#define ah r1
+#endif
+
+ENTRY(__ashldi3)
+ENTRY(__aeabi_llsl)
+
+ subs r3, r2, #32
+ rsb ip, r2, #32
+ movmi ah, ah, lsl r2
+ movpl ah, al, lsl r3
+ ARM( orrmi ah, ah, al, lsr ip )
+ THUMB( lsrmi r3, al, ip )
+ THUMB( orrmi ah, ah, r3 )
+ mov al, al, lsl r2
+ ret lr
+
+ENDPROC(__ashldi3)
+ENDPROC(__aeabi_llsl)
diff --git a/arch/arm/lib/ashrdi3.S b/arch/arm/lib/ashrdi3.S
new file mode 100644
index 000000000..275d7d234
--- /dev/null
+++ b/arch/arm/lib/ashrdi3.S
@@ -0,0 +1,54 @@
+/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
+ Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file. (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING. If not, write to
+the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA. */
+
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+#ifdef __ARMEB__
+#define al r1
+#define ah r0
+#else
+#define al r0
+#define ah r1
+#endif
+
+ENTRY(__ashrdi3)
+ENTRY(__aeabi_lasr)
+
+ subs r3, r2, #32
+ rsb ip, r2, #32
+ movmi al, al, lsr r2
+ movpl al, ah, asr r3
+ ARM( orrmi al, al, ah, lsl ip )
+ THUMB( lslmi r3, ah, ip )
+ THUMB( orrmi al, al, r3 )
+ mov ah, ah, asr r2
+ ret lr
+
+ENDPROC(__ashrdi3)
+ENDPROC(__aeabi_lasr)
diff --git a/arch/arm/lib/backtrace-clang.S b/arch/arm/lib/backtrace-clang.S
new file mode 100644
index 000000000..6174c45f5
--- /dev/null
+++ b/arch/arm/lib/backtrace-clang.S
@@ -0,0 +1,222 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/backtrace-clang.S
+ *
+ * Copyright (C) 2019 Nathan Huckleberry
+ *
+ */
+#include <linux/kern_levels.h>
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+ .text
+
+/* fp is 0 or stack frame */
+
+#define frame r4
+#define sv_fp r5
+#define sv_pc r6
+#define mask r7
+#define sv_lr r8
+#define loglvl r9
+
+ENTRY(c_backtrace)
+
+#if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK)
+ ret lr
+ENDPROC(c_backtrace)
+#else
+
+
+/*
+ * Clang does not store pc or sp in function prologues so we don't know exactly
+ * where the function starts.
+ *
+ * We can treat the current frame's lr as the saved pc and the preceding
+ * frame's lr as the current frame's lr, but we can't trace the most recent
+ * call. Inserting a false stack frame allows us to reference the function
+ * called last in the stacktrace.
+ *
+ * If the call instruction was a bl we can look at the callers branch
+ * instruction to calculate the saved pc. We can recover the pc in most cases,
+ * but in cases such as calling function pointers we cannot. In this case,
+ * default to using the lr. This will be some address in the function, but will
+ * not be the function start.
+ *
+ * Unfortunately due to the stack frame layout we can't dump r0 - r3, but these
+ * are less frequently saved.
+ *
+ * Stack frame layout:
+ * <larger addresses>
+ * saved lr
+ * frame=> saved fp
+ * optionally saved caller registers (r4 - r10)
+ * optionally saved arguments (r0 - r3)
+ * <top of stack frame>
+ * <smaller addresses>
+ *
+ * Functions start with the following code sequence:
+ * corrected pc => stmfd sp!, {..., fp, lr}
+ * add fp, sp, #x
+ * stmfd sp!, {r0 - r3} (optional)
+ *
+ *
+ *
+ *
+ *
+ *
+ * The diagram below shows an example stack setup for dump_stack.
+ *
+ * The frame for c_backtrace has pointers to the code of dump_stack. This is
+ * why the frame of c_backtrace is used to for the pc calculation of
+ * dump_stack. This is why we must move back a frame to print dump_stack.
+ *
+ * The stored locals for dump_stack are in dump_stack's frame. This means that
+ * to fully print dump_stack's frame we need both the frame for dump_stack (for
+ * locals) and the frame that was called by dump_stack (for pc).
+ *
+ * To print locals we must know where the function start is. If we read the
+ * function prologue opcodes we can determine which variables are stored in the
+ * stack frame.
+ *
+ * To find the function start of dump_stack we can look at the stored LR of
+ * show_stack. It points at the instruction directly after the bl dump_stack.
+ * We can then read the offset from the bl opcode to determine where the branch
+ * takes us. The address calculated must be the start of dump_stack.
+ *
+ * c_backtrace frame dump_stack:
+ * {[LR] } ============| ...
+ * {[FP] } =======| | bl c_backtrace
+ * | |=> ...
+ * {[R4-R10]} |
+ * {[R0-R3] } | show_stack:
+ * dump_stack frame | ...
+ * {[LR] } =============| bl dump_stack
+ * {[FP] } <=======| |=> ...
+ * {[R4-R10]}
+ * {[R0-R3] }
+ */
+
+ stmfd sp!, {r4 - r9, fp, lr} @ Save an extra register
+ @ to ensure 8 byte alignment
+ movs frame, r0 @ if frame pointer is zero
+ beq no_frame @ we have no stack frames
+ mov loglvl, r2
+ tst r1, #0x10 @ 26 or 32-bit mode?
+ moveq mask, #0xfc000003
+ movne mask, #0 @ mask for 32-bit
+
+/*
+ * Switches the current frame to be the frame for dump_stack.
+ */
+ add frame, sp, #24 @ switch to false frame
+for_each_frame: tst frame, mask @ Check for address exceptions
+ bne no_frame
+
+/*
+ * sv_fp is the stack frame with the locals for the current considered
+ * function.
+ *
+ * sv_pc is the saved lr frame the frame above. This is a pointer to a code
+ * address within the current considered function, but it is not the function
+ * start. This value gets updated to be the function start later if it is
+ * possible.
+ */
+1001: ldr sv_pc, [frame, #4] @ get saved 'pc'
+1002: ldr sv_fp, [frame, #0] @ get saved fp
+
+ teq sv_fp, mask @ make sure next frame exists
+ beq no_frame
+
+/*
+ * sv_lr is the lr from the function that called the current function. This is
+ * a pointer to a code address in the current function's caller. sv_lr-4 is
+ * the instruction used to call the current function.
+ *
+ * This sv_lr can be used to calculate the function start if the function was
+ * called using a bl instruction. If the function start can be recovered sv_pc
+ * is overwritten with the function start.
+ *
+ * If the current function was called using a function pointer we cannot
+ * recover the function start and instead continue with sv_pc as an arbitrary
+ * value within the current function. If this is the case we cannot print
+ * registers for the current function, but the stacktrace is still printed
+ * properly.
+ */
+1003: ldr sv_lr, [sv_fp, #4] @ get saved lr from next frame
+
+ ldr r0, [sv_lr, #-4] @ get call instruction
+ ldr r3, .Lopcode+4
+ and r2, r3, r0 @ is this a bl call
+ teq r2, r3
+ bne finished_setup @ give up if it's not
+ and r0, #0xffffff @ get call offset 24-bit int
+ lsl r0, r0, #8 @ sign extend offset
+ asr r0, r0, #8
+ ldr sv_pc, [sv_fp, #4] @ get lr address
+ add sv_pc, sv_pc, #-4 @ get call instruction address
+ add sv_pc, sv_pc, #8 @ take care of prefetch
+ add sv_pc, sv_pc, r0, lsl #2@ find function start
+
+finished_setup:
+
+ bic sv_pc, sv_pc, mask @ mask PC/LR for the mode
+
+/*
+ * Print the function (sv_pc) and where it was called from (sv_lr).
+ */
+1004: mov r0, sv_pc
+
+ mov r1, sv_lr
+ mov r2, frame
+ bic r1, r1, mask @ mask PC/LR for the mode
+ mov r3, loglvl
+ bl dump_backtrace_entry
+
+/*
+ * Test if the function start is a stmfd instruction to determine which
+ * registers were stored in the function prologue.
+ *
+ * If we could not recover the sv_pc because we were called through a function
+ * pointer the comparison will fail and no registers will print. Unwinding will
+ * continue as if there had been no registers stored in this frame.
+ */
+1005: ldr r1, [sv_pc, #0] @ if stmfd sp!, {..., fp, lr}
+ ldr r3, .Lopcode @ instruction exists,
+ teq r3, r1, lsr #11
+ ldr r0, [frame] @ locals are stored in
+ @ the preceding frame
+ subeq r0, r0, #4
+ mov r2, loglvl
+ bleq dump_backtrace_stm @ dump saved registers
+
+/*
+ * If we are out of frames or if the next frame is invalid.
+ */
+ teq sv_fp, #0 @ zero saved fp means
+ beq no_frame @ no further frames
+
+ cmp sv_fp, frame @ next frame must be
+ mov frame, sv_fp @ above the current frame
+ bhi for_each_frame
+
+1006: adr r0, .Lbad
+ mov r1, loglvl
+ mov r2, frame
+ bl printk
+no_frame: ldmfd sp!, {r4 - r9, fp, pc}
+ENDPROC(c_backtrace)
+ .pushsection __ex_table,"a"
+ .align 3
+ .long 1001b, 1006b
+ .long 1002b, 1006b
+ .long 1003b, 1006b
+ .long 1004b, 1006b
+ .long 1005b, 1006b
+ .popsection
+
+.Lbad: .asciz "%sBacktrace aborted due to bad frame pointer <%p>\n"
+ .align
+.Lopcode: .word 0xe92d4800 >> 11 @ stmfd sp!, {... fp, lr}
+ .word 0x0b000000 @ bl if these bits are set
+
+#endif
diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S
new file mode 100644
index 000000000..872f65863
--- /dev/null
+++ b/arch/arm/lib/backtrace.S
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/backtrace.S
+ *
+ * Copyright (C) 1995, 1996 Russell King
+ *
+ * 27/03/03 Ian Molton Clean up CONFIG_CPU
+ */
+#include <linux/kern_levels.h>
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+ .text
+
+@ fp is 0 or stack frame
+
+#define frame r4
+#define sv_fp r5
+#define sv_pc r6
+#define mask r7
+#define offset r8
+#define loglvl r9
+
+ENTRY(c_backtrace)
+
+#if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK)
+ ret lr
+ENDPROC(c_backtrace)
+#else
+ stmfd sp!, {r4 - r9, lr} @ Save an extra register so we have a location...
+ movs frame, r0 @ if frame pointer is zero
+ beq no_frame @ we have no stack frames
+ mov loglvl, r2
+
+ tst r1, #0x10 @ 26 or 32-bit mode?
+ ARM( moveq mask, #0xfc000003 )
+ THUMB( moveq mask, #0xfc000000 )
+ THUMB( orreq mask, #0x03 )
+ movne mask, #0 @ mask for 32-bit
+
+1: stmfd sp!, {pc} @ calculate offset of PC stored
+ ldr r0, [sp], #4 @ by stmfd for this CPU
+ adr r1, 1b
+ sub offset, r0, r1
+
+/*
+ * Stack frame layout:
+ * optionally saved caller registers (r4 - r10)
+ * saved fp
+ * saved sp
+ * saved lr
+ * frame => saved pc
+ * optionally saved arguments (r0 - r3)
+ * saved sp => <next word>
+ *
+ * Functions start with the following code sequence:
+ * mov ip, sp
+ * stmfd sp!, {r0 - r3} (optional)
+ * corrected pc => stmfd sp!, {..., fp, ip, lr, pc}
+ */
+for_each_frame: tst frame, mask @ Check for address exceptions
+ bne no_frame
+
+1001: ldr sv_pc, [frame, #0] @ get saved pc
+1002: ldr sv_fp, [frame, #-12] @ get saved fp
+
+ sub sv_pc, sv_pc, offset @ Correct PC for prefetching
+ bic sv_pc, sv_pc, mask @ mask PC/LR for the mode
+
+1003: ldr r2, [sv_pc, #-4] @ if stmfd sp!, {args} exists,
+ ldr r3, .Ldsi+4 @ adjust saved 'pc' back one
+ teq r3, r2, lsr #11 @ instruction
+ subne r0, sv_pc, #4 @ allow for mov
+ subeq r0, sv_pc, #8 @ allow for mov + stmia
+
+ ldr r1, [frame, #-4] @ get saved lr
+ mov r2, frame
+ bic r1, r1, mask @ mask PC/LR for the mode
+ mov r3, loglvl
+ bl dump_backtrace_entry
+
+ ldr r1, [sv_pc, #-4] @ if stmfd sp!, {args} exists,
+ ldr r3, .Ldsi+4
+ teq r3, r1, lsr #11
+ ldreq r0, [frame, #-8] @ get sp
+ subeq r0, r0, #4 @ point at the last arg
+ mov r2, loglvl
+ bleq dump_backtrace_stm @ dump saved registers
+
+1004: ldr r1, [sv_pc, #0] @ if stmfd sp!, {..., fp, ip, lr, pc}
+ ldr r3, .Ldsi @ instruction exists,
+ teq r3, r1, lsr #11
+ subeq r0, frame, #16
+ mov r2, loglvl
+ bleq dump_backtrace_stm @ dump saved registers
+
+ teq sv_fp, #0 @ zero saved fp means
+ beq no_frame @ no further frames
+
+ cmp sv_fp, frame @ next frame must be
+ mov frame, sv_fp @ above the current frame
+ bhi for_each_frame
+
+1006: adr r0, .Lbad
+ mov r1, loglvl
+ mov r2, frame
+ bl printk
+no_frame: ldmfd sp!, {r4 - r9, pc}
+ENDPROC(c_backtrace)
+
+ .pushsection __ex_table,"a"
+ .align 3
+ .long 1001b, 1006b
+ .long 1002b, 1006b
+ .long 1003b, 1006b
+ .long 1004b, 1006b
+ .popsection
+
+.Lbad: .asciz "%sBacktrace aborted due to bad frame pointer <%p>\n"
+ .align
+.Ldsi: .word 0xe92dd800 >> 11 @ stmfd sp!, {... fp, ip, lr, pc}
+ .word 0xe92d0000 >> 11 @ stmfd sp!, {}
+
+#endif
diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h
new file mode 100644
index 000000000..95bd35991
--- /dev/null
+++ b/arch/arm/lib/bitops.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/assembler.h>
+#include <asm/unwind.h>
+
+#if __LINUX_ARM_ARCH__ >= 6
+ .macro bitop, name, instr
+ENTRY( \name )
+UNWIND( .fnstart )
+ ands ip, r1, #3
+ strbne r1, [ip] @ assert word-aligned
+ mov r2, #1
+ and r3, r0, #31 @ Get bit offset
+ mov r0, r0, lsr #5
+ add r1, r1, r0, lsl #2 @ Get word offset
+#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP)
+ .arch_extension mp
+ ALT_SMP(W(pldw) [r1])
+ ALT_UP(W(nop))
+#endif
+ mov r3, r2, lsl r3
+1: ldrex r2, [r1]
+ \instr r2, r2, r3
+ strex r0, r2, [r1]
+ cmp r0, #0
+ bne 1b
+ bx lr
+UNWIND( .fnend )
+ENDPROC(\name )
+ .endm
+
+ .macro testop, name, instr, store
+ENTRY( \name )
+UNWIND( .fnstart )
+ ands ip, r1, #3
+ strbne r1, [ip] @ assert word-aligned
+ mov r2, #1
+ and r3, r0, #31 @ Get bit offset
+ mov r0, r0, lsr #5
+ add r1, r1, r0, lsl #2 @ Get word offset
+ mov r3, r2, lsl r3 @ create mask
+ smp_dmb
+#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP)
+ .arch_extension mp
+ ALT_SMP(W(pldw) [r1])
+ ALT_UP(W(nop))
+#endif
+1: ldrex r2, [r1]
+ ands r0, r2, r3 @ save old value of bit
+ \instr r2, r2, r3 @ toggle bit
+ strex ip, r2, [r1]
+ cmp ip, #0
+ bne 1b
+ smp_dmb
+ cmp r0, #0
+ movne r0, #1
+2: bx lr
+UNWIND( .fnend )
+ENDPROC(\name )
+ .endm
+#else
+ .macro bitop, name, instr
+ENTRY( \name )
+UNWIND( .fnstart )
+ ands ip, r1, #3
+ strbne r1, [ip] @ assert word-aligned
+ and r2, r0, #31
+ mov r0, r0, lsr #5
+ mov r3, #1
+ mov r3, r3, lsl r2
+ save_and_disable_irqs ip
+ ldr r2, [r1, r0, lsl #2]
+ \instr r2, r2, r3
+ str r2, [r1, r0, lsl #2]
+ restore_irqs ip
+ ret lr
+UNWIND( .fnend )
+ENDPROC(\name )
+ .endm
+
+/**
+ * testop - implement a test_and_xxx_bit operation.
+ * @instr: operational instruction
+ * @store: store instruction
+ *
+ * Note: we can trivially conditionalise the store instruction
+ * to avoid dirtying the data cache.
+ */
+ .macro testop, name, instr, store
+ENTRY( \name )
+UNWIND( .fnstart )
+ ands ip, r1, #3
+ strbne r1, [ip] @ assert word-aligned
+ and r3, r0, #31
+ mov r0, r0, lsr #5
+ save_and_disable_irqs ip
+ ldr r2, [r1, r0, lsl #2]!
+ mov r0, #1
+ tst r2, r0, lsl r3
+ \instr r2, r2, r0, lsl r3
+ \store r2, [r1]
+ moveq r0, #0
+ restore_irqs ip
+ ret lr
+UNWIND( .fnend )
+ENDPROC(\name )
+ .endm
+#endif
diff --git a/arch/arm/lib/bswapsdi2.S b/arch/arm/lib/bswapsdi2.S
new file mode 100644
index 000000000..591ba077e
--- /dev/null
+++ b/arch/arm/lib/bswapsdi2.S
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+#if __LINUX_ARM_ARCH__ >= 6
+ENTRY(__bswapsi2)
+ rev r0, r0
+ bx lr
+ENDPROC(__bswapsi2)
+
+ENTRY(__bswapdi2)
+ rev r3, r0
+ rev r0, r1
+ mov r1, r3
+ bx lr
+ENDPROC(__bswapdi2)
+#else
+ENTRY(__bswapsi2)
+ eor r3, r0, r0, ror #16
+ mov r3, r3, lsr #8
+ bic r3, r3, #0xff00
+ eor r0, r3, r0, ror #8
+ ret lr
+ENDPROC(__bswapsi2)
+
+ENTRY(__bswapdi2)
+ mov ip, r1
+ eor r3, ip, ip, ror #16
+ eor r1, r0, r0, ror #16
+ mov r1, r1, lsr #8
+ mov r3, r3, lsr #8
+ bic r3, r3, #0xff00
+ bic r1, r1, #0xff00
+ eor r1, r1, r0, ror #8
+ eor r0, r3, ip, ror #8
+ ret lr
+ENDPROC(__bswapdi2)
+#endif
diff --git a/arch/arm/lib/call_with_stack.S b/arch/arm/lib/call_with_stack.S
new file mode 100644
index 000000000..28b0341ae
--- /dev/null
+++ b/arch/arm/lib/call_with_stack.S
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * arch/arm/lib/call_with_stack.S
+ *
+ * Copyright (C) 2011 ARM Ltd.
+ * Written by Will Deacon <will.deacon@arm.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * void call_with_stack(void (*fn)(void *), void *arg, void *sp)
+ *
+ * Change the stack to that pointed at by sp, then invoke fn(arg) with
+ * the new stack.
+ */
+ENTRY(call_with_stack)
+ str sp, [r2, #-4]!
+ str lr, [r2, #-4]!
+
+ mov sp, r2
+ mov r2, r0
+ mov r0, r1
+
+ badr lr, 1f
+ ret r2
+
+1: ldr lr, [sp]
+ ldr sp, [sp, #4]
+ ret lr
+ENDPROC(call_with_stack)
diff --git a/arch/arm/lib/changebit.S b/arch/arm/lib/changebit.S
new file mode 100644
index 000000000..02424765e
--- /dev/null
+++ b/arch/arm/lib/changebit.S
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/changebit.S
+ *
+ * Copyright (C) 1995-1996 Russell King
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include "bitops.h"
+ .text
+
+bitop _change_bit, eor
diff --git a/arch/arm/lib/clear_user.S b/arch/arm/lib/clear_user.S
new file mode 100644
index 000000000..8f2c4dbfc
--- /dev/null
+++ b/arch/arm/lib/clear_user.S
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/clear_user.S
+ *
+ * Copyright (C) 1995, 1996,1997,1998 Russell King
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/unwind.h>
+
+ .text
+
+/* Prototype: unsigned long arm_clear_user(void *addr, size_t sz)
+ * Purpose : clear some user memory
+ * Params : addr - user memory address to clear
+ * : sz - number of bytes to clear
+ * Returns : number of bytes NOT cleared
+ */
+ENTRY(__clear_user_std)
+WEAK(arm_clear_user)
+UNWIND(.fnstart)
+UNWIND(.save {r1, lr})
+ stmfd sp!, {r1, lr}
+ mov r2, #0
+ cmp r1, #4
+ blt 2f
+ ands ip, r0, #3
+ beq 1f
+ cmp ip, #2
+ strusr r2, r0, 1
+ strusr r2, r0, 1, le
+ strusr r2, r0, 1, lt
+ rsb ip, ip, #4
+ sub r1, r1, ip @ 7 6 5 4 3 2 1
+1: subs r1, r1, #8 @ -1 -2 -3 -4 -5 -6 -7
+ strusr r2, r0, 4, pl, rept=2
+ bpl 1b
+ adds r1, r1, #4 @ 3 2 1 0 -1 -2 -3
+ strusr r2, r0, 4, pl
+2: tst r1, #2 @ 1x 1x 0x 0x 1x 1x 0x
+ strusr r2, r0, 1, ne, rept=2
+ tst r1, #1 @ x1 x0 x1 x0 x1 x0 x1
+ it ne @ explicit IT needed for the label
+USER( strbtne r2, [r0])
+ mov r0, #0
+ ldmfd sp!, {r1, pc}
+UNWIND(.fnend)
+ENDPROC(arm_clear_user)
+ENDPROC(__clear_user_std)
+
+ .pushsection .text.fixup,"ax"
+ .align 0
+9001: ldmfd sp!, {r0, pc}
+ .popsection
+
diff --git a/arch/arm/lib/clearbit.S b/arch/arm/lib/clearbit.S
new file mode 100644
index 000000000..4646dee8a
--- /dev/null
+++ b/arch/arm/lib/clearbit.S
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/clearbit.S
+ *
+ * Copyright (C) 1995-1996 Russell King
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include "bitops.h"
+ .text
+
+bitop _clear_bit, bic
diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S
new file mode 100644
index 000000000..f8016e3db
--- /dev/null
+++ b/arch/arm/lib/copy_from_user.S
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/copy_from_user.S
+ *
+ * Author: Nicolas Pitre
+ * Created: Sep 29, 2005
+ * Copyright: MontaVista Software, Inc.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/unwind.h>
+
+/*
+ * Prototype:
+ *
+ * size_t arm_copy_from_user(void *to, const void *from, size_t n)
+ *
+ * Purpose:
+ *
+ * copy a block to kernel memory from user memory
+ *
+ * Params:
+ *
+ * to = kernel memory
+ * from = user memory
+ * n = number of bytes to copy
+ *
+ * Return value:
+ *
+ * Number of bytes NOT copied.
+ */
+
+#ifdef CONFIG_CPU_USE_DOMAINS
+
+#ifndef CONFIG_THUMB2_KERNEL
+#define LDR1W_SHIFT 0
+#else
+#define LDR1W_SHIFT 1
+#endif
+
+ .macro ldr1w ptr reg abort
+ ldrusr \reg, \ptr, 4, abort=\abort
+ .endm
+
+ .macro ldr4w ptr reg1 reg2 reg3 reg4 abort
+ ldr1w \ptr, \reg1, \abort
+ ldr1w \ptr, \reg2, \abort
+ ldr1w \ptr, \reg3, \abort
+ ldr1w \ptr, \reg4, \abort
+ .endm
+
+ .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+ ldr4w \ptr, \reg1, \reg2, \reg3, \reg4, \abort
+ ldr4w \ptr, \reg5, \reg6, \reg7, \reg8, \abort
+ .endm
+
+#else
+
+#define LDR1W_SHIFT 0
+
+ .macro ldr1w ptr reg abort
+ USERL(\abort, W(ldr) \reg, [\ptr], #4)
+ .endm
+
+ .macro ldr4w ptr reg1 reg2 reg3 reg4 abort
+ USERL(\abort, ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4})
+ .endm
+
+ .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+ USERL(\abort, ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8})
+ .endm
+
+#endif /* CONFIG_CPU_USE_DOMAINS */
+
+ .macro ldr1b ptr reg cond=al abort
+ ldrusr \reg, \ptr, 1, \cond, abort=\abort
+ .endm
+
+#define STR1W_SHIFT 0
+
+ .macro str1w ptr reg abort
+ W(str) \reg, [\ptr], #4
+ .endm
+
+ .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+ stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
+ .endm
+
+ .macro str1b ptr reg cond=al abort
+ strb\cond \reg, [\ptr], #1
+ .endm
+
+ .macro enter reg1 reg2
+ mov r3, #0
+ stmdb sp!, {r0, r2, r3, \reg1, \reg2}
+ .endm
+
+ .macro usave reg1 reg2
+ UNWIND( .save {r0, r2, r3, \reg1, \reg2} )
+ .endm
+
+ .macro exit reg1 reg2
+ add sp, sp, #8
+ ldmfd sp!, {r0, \reg1, \reg2}
+ .endm
+
+ .text
+
+ENTRY(arm_copy_from_user)
+#ifdef CONFIG_CPU_SPECTRE
+ get_thread_info r3
+ ldr r3, [r3, #TI_ADDR_LIMIT]
+ uaccess_mask_range_ptr r1, r2, r3, ip
+#endif
+
+#include "copy_template.S"
+
+ENDPROC(arm_copy_from_user)
+
+ .pushsection .text.fixup,"ax"
+ .align 0
+ copy_abort_preamble
+ ldmfd sp!, {r1, r2, r3}
+ sub r0, r0, r1
+ rsb r0, r0, r2
+ copy_abort_end
+ .popsection
+
diff --git a/arch/arm/lib/copy_page.S b/arch/arm/lib/copy_page.S
new file mode 100644
index 000000000..5db1a8ee3
--- /dev/null
+++ b/arch/arm/lib/copy_page.S
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/copypage.S
+ *
+ * Copyright (C) 1995-1999 Russell King
+ *
+ * ASM optimised string functions
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/asm-offsets.h>
+#include <asm/cache.h>
+
+#define COPY_COUNT (PAGE_SZ / (2 * L1_CACHE_BYTES) PLD( -1 ))
+
+ .text
+ .align 5
+/*
+ * StrongARM optimised copy_page routine
+ * now 1.78bytes/cycle, was 1.60 bytes/cycle (50MHz bus -> 89MB/s)
+ * Note that we probably achieve closer to the 100MB/s target with
+ * the core clock switching.
+ */
+ENTRY(copy_page)
+ stmfd sp!, {r4, lr} @ 2
+ PLD( pld [r1, #0] )
+ PLD( pld [r1, #L1_CACHE_BYTES] )
+ mov r2, #COPY_COUNT @ 1
+ ldmia r1!, {r3, r4, ip, lr} @ 4+1
+1: PLD( pld [r1, #2 * L1_CACHE_BYTES])
+ PLD( pld [r1, #3 * L1_CACHE_BYTES])
+2:
+ .rept (2 * L1_CACHE_BYTES / 16 - 1)
+ stmia r0!, {r3, r4, ip, lr} @ 4
+ ldmia r1!, {r3, r4, ip, lr} @ 4
+ .endr
+ subs r2, r2, #1 @ 1
+ stmia r0!, {r3, r4, ip, lr} @ 4
+ ldmiagt r1!, {r3, r4, ip, lr} @ 4
+ bgt 1b @ 1
+ PLD( ldmiaeq r1!, {r3, r4, ip, lr} )
+ PLD( beq 2b )
+ ldmfd sp!, {r4, pc} @ 3
+ENDPROC(copy_page)
diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S
new file mode 100644
index 000000000..810a805d3
--- /dev/null
+++ b/arch/arm/lib/copy_template.S
@@ -0,0 +1,294 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/copy_template.s
+ *
+ * Code template for optimized memory copy functions
+ *
+ * Author: Nicolas Pitre
+ * Created: Sep 28, 2005
+ * Copyright: MontaVista Software, Inc.
+ */
+
+/*
+ * Theory of operation
+ * -------------------
+ *
+ * This file provides the core code for a forward memory copy used in
+ * the implementation of memcopy(), copy_to_user() and copy_from_user().
+ *
+ * The including file must define the following accessor macros
+ * according to the need of the given function:
+ *
+ * ldr1w ptr reg abort
+ *
+ * This loads one word from 'ptr', stores it in 'reg' and increments
+ * 'ptr' to the next word. The 'abort' argument is used for fixup tables.
+ *
+ * ldr4w ptr reg1 reg2 reg3 reg4 abort
+ * ldr8w ptr, reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+ *
+ * This loads four or eight words starting from 'ptr', stores them
+ * in provided registers and increments 'ptr' past those words.
+ * The'abort' argument is used for fixup tables.
+ *
+ * ldr1b ptr reg cond abort
+ *
+ * Similar to ldr1w, but it loads a byte and increments 'ptr' one byte.
+ * It also must apply the condition code if provided, otherwise the
+ * "al" condition is assumed by default.
+ *
+ * str1w ptr reg abort
+ * str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+ * str1b ptr reg cond abort
+ *
+ * Same as their ldr* counterparts, but data is stored to 'ptr' location
+ * rather than being loaded.
+ *
+ * enter reg1 reg2
+ *
+ * Preserve the provided registers on the stack plus any additional
+ * data as needed by the implementation including this code. Called
+ * upon code entry.
+ *
+ * usave reg1 reg2
+ *
+ * Unwind annotation macro is corresponding for 'enter' macro.
+ * It tell unwinder that preserved some provided registers on the stack
+ * and additional data by a prior 'enter' macro.
+ *
+ * exit reg1 reg2
+ *
+ * Restore registers with the values previously saved with the
+ * 'preserv' macro. Called upon code termination.
+ *
+ * LDR1W_SHIFT
+ * STR1W_SHIFT
+ *
+ * Correction to be applied to the "ip" register when branching into
+ * the ldr1w or str1w instructions (some of these macros may expand to
+ * than one 32bit instruction in Thumb-2)
+ */
+
+
+ UNWIND( .fnstart )
+ enter r4, lr
+ UNWIND( .fnend )
+
+ UNWIND( .fnstart )
+ usave r4, lr @ in first stmdb block
+
+ subs r2, r2, #4
+ blt 8f
+ ands ip, r0, #3
+ PLD( pld [r1, #0] )
+ bne 9f
+ ands ip, r1, #3
+ bne 10f
+
+1: subs r2, r2, #(28)
+ stmfd sp!, {r5 - r8}
+ UNWIND( .fnend )
+
+ UNWIND( .fnstart )
+ usave r4, lr
+ UNWIND( .save {r5 - r8} ) @ in second stmfd block
+ blt 5f
+
+ CALGN( ands ip, r0, #31 )
+ CALGN( rsb r3, ip, #32 )
+ CALGN( sbcsne r4, r3, r2 ) @ C is always set here
+ CALGN( bcs 2f )
+ CALGN( adr r4, 6f )
+ CALGN( subs r2, r2, r3 ) @ C gets set
+ CALGN( add pc, r4, ip )
+
+ PLD( pld [r1, #0] )
+2: PLD( subs r2, r2, #96 )
+ PLD( pld [r1, #28] )
+ PLD( blt 4f )
+ PLD( pld [r1, #60] )
+ PLD( pld [r1, #92] )
+
+3: PLD( pld [r1, #124] )
+4: ldr8w r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
+ subs r2, r2, #32
+ str8w r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
+ bge 3b
+ PLD( cmn r2, #96 )
+ PLD( bge 4b )
+
+5: ands ip, r2, #28
+ rsb ip, ip, #32
+#if LDR1W_SHIFT > 0
+ lsl ip, ip, #LDR1W_SHIFT
+#endif
+ addne pc, pc, ip @ C is always clear here
+ b 7f
+6:
+ .rept (1 << LDR1W_SHIFT)
+ W(nop)
+ .endr
+ ldr1w r1, r3, abort=20f
+ ldr1w r1, r4, abort=20f
+ ldr1w r1, r5, abort=20f
+ ldr1w r1, r6, abort=20f
+ ldr1w r1, r7, abort=20f
+ ldr1w r1, r8, abort=20f
+ ldr1w r1, lr, abort=20f
+
+#if LDR1W_SHIFT < STR1W_SHIFT
+ lsl ip, ip, #STR1W_SHIFT - LDR1W_SHIFT
+#elif LDR1W_SHIFT > STR1W_SHIFT
+ lsr ip, ip, #LDR1W_SHIFT - STR1W_SHIFT
+#endif
+ add pc, pc, ip
+ nop
+ .rept (1 << STR1W_SHIFT)
+ W(nop)
+ .endr
+ str1w r0, r3, abort=20f
+ str1w r0, r4, abort=20f
+ str1w r0, r5, abort=20f
+ str1w r0, r6, abort=20f
+ str1w r0, r7, abort=20f
+ str1w r0, r8, abort=20f
+ str1w r0, lr, abort=20f
+
+ CALGN( bcs 2b )
+
+7: ldmfd sp!, {r5 - r8}
+ UNWIND( .fnend ) @ end of second stmfd block
+
+ UNWIND( .fnstart )
+ usave r4, lr @ still in first stmdb block
+8: movs r2, r2, lsl #31
+ ldr1b r1, r3, ne, abort=21f
+ ldr1b r1, r4, cs, abort=21f
+ ldr1b r1, ip, cs, abort=21f
+ str1b r0, r3, ne, abort=21f
+ str1b r0, r4, cs, abort=21f
+ str1b r0, ip, cs, abort=21f
+
+ exit r4, pc
+
+9: rsb ip, ip, #4
+ cmp ip, #2
+ ldr1b r1, r3, gt, abort=21f
+ ldr1b r1, r4, ge, abort=21f
+ ldr1b r1, lr, abort=21f
+ str1b r0, r3, gt, abort=21f
+ str1b r0, r4, ge, abort=21f
+ subs r2, r2, ip
+ str1b r0, lr, abort=21f
+ blt 8b
+ ands ip, r1, #3
+ beq 1b
+
+10: bic r1, r1, #3
+ cmp ip, #2
+ ldr1w r1, lr, abort=21f
+ beq 17f
+ bgt 18f
+ UNWIND( .fnend )
+
+
+ .macro forward_copy_shift pull push
+
+ UNWIND( .fnstart )
+ usave r4, lr @ still in first stmdb block
+ subs r2, r2, #28
+ blt 14f
+
+ CALGN( ands ip, r0, #31 )
+ CALGN( rsb ip, ip, #32 )
+ CALGN( sbcsne r4, ip, r2 ) @ C is always set here
+ CALGN( subcc r2, r2, ip )
+ CALGN( bcc 15f )
+
+11: stmfd sp!, {r5 - r9}
+ UNWIND( .fnend )
+
+ UNWIND( .fnstart )
+ usave r4, lr
+ UNWIND( .save {r5 - r9} ) @ in new second stmfd block
+ PLD( pld [r1, #0] )
+ PLD( subs r2, r2, #96 )
+ PLD( pld [r1, #28] )
+ PLD( blt 13f )
+ PLD( pld [r1, #60] )
+ PLD( pld [r1, #92] )
+
+12: PLD( pld [r1, #124] )
+13: ldr4w r1, r4, r5, r6, r7, abort=19f
+ mov r3, lr, lspull #\pull
+ subs r2, r2, #32
+ ldr4w r1, r8, r9, ip, lr, abort=19f
+ orr r3, r3, r4, lspush #\push
+ mov r4, r4, lspull #\pull
+ orr r4, r4, r5, lspush #\push
+ mov r5, r5, lspull #\pull
+ orr r5, r5, r6, lspush #\push
+ mov r6, r6, lspull #\pull
+ orr r6, r6, r7, lspush #\push
+ mov r7, r7, lspull #\pull
+ orr r7, r7, r8, lspush #\push
+ mov r8, r8, lspull #\pull
+ orr r8, r8, r9, lspush #\push
+ mov r9, r9, lspull #\pull
+ orr r9, r9, ip, lspush #\push
+ mov ip, ip, lspull #\pull
+ orr ip, ip, lr, lspush #\push
+ str8w r0, r3, r4, r5, r6, r7, r8, r9, ip, abort=19f
+ bge 12b
+ PLD( cmn r2, #96 )
+ PLD( bge 13b )
+
+ ldmfd sp!, {r5 - r9}
+ UNWIND( .fnend ) @ end of the second stmfd block
+
+ UNWIND( .fnstart )
+ usave r4, lr @ still in first stmdb block
+14: ands ip, r2, #28
+ beq 16f
+
+15: mov r3, lr, lspull #\pull
+ ldr1w r1, lr, abort=21f
+ subs ip, ip, #4
+ orr r3, r3, lr, lspush #\push
+ str1w r0, r3, abort=21f
+ bgt 15b
+ CALGN( cmp r2, #0 )
+ CALGN( bge 11b )
+
+16: sub r1, r1, #(\push / 8)
+ b 8b
+ UNWIND( .fnend )
+
+ .endm
+
+
+ forward_copy_shift pull=8 push=24
+
+17: forward_copy_shift pull=16 push=16
+
+18: forward_copy_shift pull=24 push=8
+
+
+/*
+ * Abort preamble and completion macros.
+ * If a fixup handler is required then those macros must surround it.
+ * It is assumed that the fixup code will handle the private part of
+ * the exit macro.
+ */
+
+ .macro copy_abort_preamble
+19: ldmfd sp!, {r5 - r9}
+ b 21f
+20: ldmfd sp!, {r5 - r8}
+21:
+ .endm
+
+ .macro copy_abort_end
+ ldmfd sp!, {r4, pc}
+ .endm
+
diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S
new file mode 100644
index 000000000..ebfe4cb3d
--- /dev/null
+++ b/arch/arm/lib/copy_to_user.S
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/copy_to_user.S
+ *
+ * Author: Nicolas Pitre
+ * Created: Sep 29, 2005
+ * Copyright: MontaVista Software, Inc.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/unwind.h>
+
+/*
+ * Prototype:
+ *
+ * size_t arm_copy_to_user(void *to, const void *from, size_t n)
+ *
+ * Purpose:
+ *
+ * copy a block to user memory from kernel memory
+ *
+ * Params:
+ *
+ * to = user memory
+ * from = kernel memory
+ * n = number of bytes to copy
+ *
+ * Return value:
+ *
+ * Number of bytes NOT copied.
+ */
+
+#define LDR1W_SHIFT 0
+
+ .macro ldr1w ptr reg abort
+ W(ldr) \reg, [\ptr], #4
+ .endm
+
+ .macro ldr4w ptr reg1 reg2 reg3 reg4 abort
+ ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4}
+ .endm
+
+ .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+ ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
+ .endm
+
+ .macro ldr1b ptr reg cond=al abort
+ ldrb\cond \reg, [\ptr], #1
+ .endm
+
+#ifdef CONFIG_CPU_USE_DOMAINS
+
+#ifndef CONFIG_THUMB2_KERNEL
+#define STR1W_SHIFT 0
+#else
+#define STR1W_SHIFT 1
+#endif
+
+ .macro str1w ptr reg abort
+ strusr \reg, \ptr, 4, abort=\abort
+ .endm
+
+ .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+ str1w \ptr, \reg1, \abort
+ str1w \ptr, \reg2, \abort
+ str1w \ptr, \reg3, \abort
+ str1w \ptr, \reg4, \abort
+ str1w \ptr, \reg5, \abort
+ str1w \ptr, \reg6, \abort
+ str1w \ptr, \reg7, \abort
+ str1w \ptr, \reg8, \abort
+ .endm
+
+#else
+
+#define STR1W_SHIFT 0
+
+ .macro str1w ptr reg abort
+ USERL(\abort, W(str) \reg, [\ptr], #4)
+ .endm
+
+ .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+ USERL(\abort, stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8})
+ .endm
+
+#endif /* CONFIG_CPU_USE_DOMAINS */
+
+ .macro str1b ptr reg cond=al abort
+ strusr \reg, \ptr, 1, \cond, abort=\abort
+ .endm
+
+ .macro enter reg1 reg2
+ mov r3, #0
+ stmdb sp!, {r0, r2, r3, \reg1, \reg2}
+ .endm
+
+ .macro usave reg1 reg2
+ UNWIND( .save {r0, r2, r3, \reg1, \reg2} )
+ .endm
+
+ .macro exit reg1 reg2
+ add sp, sp, #8
+ ldmfd sp!, {r0, \reg1, \reg2}
+ .endm
+
+ .text
+
+ENTRY(__copy_to_user_std)
+WEAK(arm_copy_to_user)
+#ifdef CONFIG_CPU_SPECTRE
+ get_thread_info r3
+ ldr r3, [r3, #TI_ADDR_LIMIT]
+ uaccess_mask_range_ptr r0, r2, r3, ip
+#endif
+
+#include "copy_template.S"
+
+ENDPROC(arm_copy_to_user)
+ENDPROC(__copy_to_user_std)
+
+ .pushsection .text.fixup,"ax"
+ .align 0
+ copy_abort_preamble
+ ldmfd sp!, {r1, r2, r3}
+ sub r0, r0, r1
+ rsb r0, r0, r2
+ copy_abort_end
+ .popsection
diff --git a/arch/arm/lib/csumipv6.S b/arch/arm/lib/csumipv6.S
new file mode 100644
index 000000000..3559d5151
--- /dev/null
+++ b/arch/arm/lib/csumipv6.S
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/csumipv6.S
+ *
+ * Copyright (C) 1995-1998 Russell King
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ .text
+
+ENTRY(__csum_ipv6_magic)
+ str lr, [sp, #-4]!
+ adds ip, r2, r3
+ ldmia r1, {r1 - r3, lr}
+ adcs ip, ip, r1
+ adcs ip, ip, r2
+ adcs ip, ip, r3
+ adcs ip, ip, lr
+ ldmia r0, {r0 - r3}
+ adcs r0, ip, r0
+ adcs r0, r0, r1
+ adcs r0, r0, r2
+ ldr r2, [sp, #4]
+ adcs r0, r0, r3
+ adcs r0, r0, r2
+ adcs r0, r0, #0
+ ldmfd sp!, {pc}
+ENDPROC(__csum_ipv6_magic)
+
diff --git a/arch/arm/lib/csumpartial.S b/arch/arm/lib/csumpartial.S
new file mode 100644
index 000000000..87c9471be
--- /dev/null
+++ b/arch/arm/lib/csumpartial.S
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/csumpartial.S
+ *
+ * Copyright (C) 1995-1998 Russell King
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ .text
+
+/*
+ * Function: __u32 csum_partial(const char *src, int len, __u32 sum)
+ * Params : r0 = buffer, r1 = len, r2 = checksum
+ * Returns : r0 = new checksum
+ */
+
+buf .req r0
+len .req r1
+sum .req r2
+td0 .req r3
+td1 .req r4 @ save before use
+td2 .req r5 @ save before use
+td3 .req lr
+
+.Lzero: mov r0, sum
+ add sp, sp, #4
+ ldr pc, [sp], #4
+
+ /*
+ * Handle 0 to 7 bytes, with any alignment of source and
+ * destination pointers. Note that when we get here, C = 0
+ */
+.Lless8: teq len, #0 @ check for zero count
+ beq .Lzero
+
+ /* we must have at least one byte. */
+ tst buf, #1 @ odd address?
+ movne sum, sum, ror #8
+ ldrbne td0, [buf], #1
+ subne len, len, #1
+ adcsne sum, sum, td0, put_byte_1
+
+.Lless4: tst len, #6
+ beq .Lless8_byte
+
+ /* we are now half-word aligned */
+
+.Lless8_wordlp:
+#if __LINUX_ARM_ARCH__ >= 4
+ ldrh td0, [buf], #2
+ sub len, len, #2
+#else
+ ldrb td0, [buf], #1
+ ldrb td3, [buf], #1
+ sub len, len, #2
+#ifndef __ARMEB__
+ orr td0, td0, td3, lsl #8
+#else
+ orr td0, td3, td0, lsl #8
+#endif
+#endif
+ adcs sum, sum, td0
+ tst len, #6
+ bne .Lless8_wordlp
+
+.Lless8_byte: tst len, #1 @ odd number of bytes
+ ldrbne td0, [buf], #1 @ include last byte
+ adcsne sum, sum, td0, put_byte_0 @ update checksum
+
+.Ldone: adc r0, sum, #0 @ collect up the last carry
+ ldr td0, [sp], #4
+ tst td0, #1 @ check buffer alignment
+ movne r0, r0, ror #8 @ rotate checksum by 8 bits
+ ldr pc, [sp], #4 @ return
+
+.Lnot_aligned: tst buf, #1 @ odd address
+ ldrbne td0, [buf], #1 @ make even
+ subne len, len, #1
+ adcsne sum, sum, td0, put_byte_1 @ update checksum
+
+ tst buf, #2 @ 32-bit aligned?
+#if __LINUX_ARM_ARCH__ >= 4
+ ldrhne td0, [buf], #2 @ make 32-bit aligned
+ subne len, len, #2
+#else
+ ldrbne td0, [buf], #1
+ ldrbne ip, [buf], #1
+ subne len, len, #2
+#ifndef __ARMEB__
+ orrne td0, td0, ip, lsl #8
+#else
+ orrne td0, ip, td0, lsl #8
+#endif
+#endif
+ adcsne sum, sum, td0 @ update checksum
+ ret lr
+
+ENTRY(csum_partial)
+ stmfd sp!, {buf, lr}
+ cmp len, #8 @ Ensure that we have at least
+ blo .Lless8 @ 8 bytes to copy.
+
+ tst buf, #1
+ movne sum, sum, ror #8
+
+ adds sum, sum, #0 @ C = 0
+ tst buf, #3 @ Test destination alignment
+ blne .Lnot_aligned @ align destination, return here
+
+1: bics ip, len, #31
+ beq 3f
+
+ stmfd sp!, {r4 - r5}
+2: ldmia buf!, {td0, td1, td2, td3}
+ adcs sum, sum, td0
+ adcs sum, sum, td1
+ adcs sum, sum, td2
+ adcs sum, sum, td3
+ ldmia buf!, {td0, td1, td2, td3}
+ adcs sum, sum, td0
+ adcs sum, sum, td1
+ adcs sum, sum, td2
+ adcs sum, sum, td3
+ sub ip, ip, #32
+ teq ip, #0
+ bne 2b
+ ldmfd sp!, {r4 - r5}
+
+3: tst len, #0x1c @ should not change C
+ beq .Lless4
+
+4: ldr td0, [buf], #4
+ sub len, len, #4
+ adcs sum, sum, td0
+ tst len, #0x1c
+ bne 4b
+ b .Lless4
+ENDPROC(csum_partial)
diff --git a/arch/arm/lib/csumpartialcopy.S b/arch/arm/lib/csumpartialcopy.S
new file mode 100644
index 000000000..1ca6aadd6
--- /dev/null
+++ b/arch/arm/lib/csumpartialcopy.S
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/csumpartialcopy.S
+ *
+ * Copyright (C) 1995-1998 Russell King
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ .text
+
+/* Function: __u32 csum_partial_copy_nocheck(const char *src, char *dst, int len)
+ * Params : r0 = src, r1 = dst, r2 = len
+ * Returns : r0 = new checksum
+ */
+
+ .macro save_regs
+ stmfd sp!, {r1, r4 - r8, lr}
+ .endm
+
+ .macro load_regs
+ ldmfd sp!, {r1, r4 - r8, pc}
+ .endm
+
+ .macro load1b, reg1
+ ldrb \reg1, [r0], #1
+ .endm
+
+ .macro load2b, reg1, reg2
+ ldrb \reg1, [r0], #1
+ ldrb \reg2, [r0], #1
+ .endm
+
+ .macro load1l, reg1
+ ldr \reg1, [r0], #4
+ .endm
+
+ .macro load2l, reg1, reg2
+ ldr \reg1, [r0], #4
+ ldr \reg2, [r0], #4
+ .endm
+
+ .macro load4l, reg1, reg2, reg3, reg4
+ ldmia r0!, {\reg1, \reg2, \reg3, \reg4}
+ .endm
+
+#define FN_ENTRY ENTRY(csum_partial_copy_nocheck)
+#define FN_EXIT ENDPROC(csum_partial_copy_nocheck)
+
+#include "csumpartialcopygeneric.S"
diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S
new file mode 100644
index 000000000..0fd5c10e9
--- /dev/null
+++ b/arch/arm/lib/csumpartialcopygeneric.S
@@ -0,0 +1,331 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/csumpartialcopygeneric.S
+ *
+ * Copyright (C) 1995-2001 Russell King
+ */
+#include <asm/assembler.h>
+
+/*
+ * unsigned int
+ * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
+ * r0 = src, r1 = dst, r2 = len, r3 = sum
+ * Returns : r0 = checksum
+ *
+ * Note that 'tst' and 'teq' preserve the carry flag.
+ */
+
+src .req r0
+dst .req r1
+len .req r2
+sum .req r3
+
+.Lzero: mov r0, sum
+ load_regs
+
+ /*
+ * Align an unaligned destination pointer. We know that
+ * we have >= 8 bytes here, so we don't need to check
+ * the length. Note that the source pointer hasn't been
+ * aligned yet.
+ */
+.Ldst_unaligned:
+ tst dst, #1
+ beq .Ldst_16bit
+
+ load1b ip
+ sub len, len, #1
+ adcs sum, sum, ip, put_byte_1 @ update checksum
+ strb ip, [dst], #1
+ tst dst, #2
+ reteq lr @ dst is now 32bit aligned
+
+.Ldst_16bit: load2b r8, ip
+ sub len, len, #2
+ adcs sum, sum, r8, put_byte_0
+ strb r8, [dst], #1
+ adcs sum, sum, ip, put_byte_1
+ strb ip, [dst], #1
+ ret lr @ dst is now 32bit aligned
+
+ /*
+ * Handle 0 to 7 bytes, with any alignment of source and
+ * destination pointers. Note that when we get here, C = 0
+ */
+.Lless8: teq len, #0 @ check for zero count
+ beq .Lzero
+
+ /* we must have at least one byte. */
+ tst dst, #1 @ dst 16-bit aligned
+ beq .Lless8_aligned
+
+ /* Align dst */
+ load1b ip
+ sub len, len, #1
+ adcs sum, sum, ip, put_byte_1 @ update checksum
+ strb ip, [dst], #1
+ tst len, #6
+ beq .Lless8_byteonly
+
+1: load2b r8, ip
+ sub len, len, #2
+ adcs sum, sum, r8, put_byte_0
+ strb r8, [dst], #1
+ adcs sum, sum, ip, put_byte_1
+ strb ip, [dst], #1
+.Lless8_aligned:
+ tst len, #6
+ bne 1b
+.Lless8_byteonly:
+ tst len, #1
+ beq .Ldone
+ load1b r8
+ adcs sum, sum, r8, put_byte_0 @ update checksum
+ strb r8, [dst], #1
+ b .Ldone
+
+FN_ENTRY
+ save_regs
+ mov sum, #-1
+
+ cmp len, #8 @ Ensure that we have at least
+ blo .Lless8 @ 8 bytes to copy.
+
+ adds sum, sum, #0 @ C = 0
+ tst dst, #3 @ Test destination alignment
+ blne .Ldst_unaligned @ align destination, return here
+
+ /*
+ * Ok, the dst pointer is now 32bit aligned, and we know
+ * that we must have more than 4 bytes to copy. Note
+ * that C contains the carry from the dst alignment above.
+ */
+
+ tst src, #3 @ Test source alignment
+ bne .Lsrc_not_aligned
+
+ /* Routine for src & dst aligned */
+
+ bics ip, len, #15
+ beq 2f
+
+1: load4l r4, r5, r6, r7
+ stmia dst!, {r4, r5, r6, r7}
+ adcs sum, sum, r4
+ adcs sum, sum, r5
+ adcs sum, sum, r6
+ adcs sum, sum, r7
+ sub ip, ip, #16
+ teq ip, #0
+ bne 1b
+
+2: ands ip, len, #12
+ beq 4f
+ tst ip, #8
+ beq 3f
+ load2l r4, r5
+ stmia dst!, {r4, r5}
+ adcs sum, sum, r4
+ adcs sum, sum, r5
+ tst ip, #4
+ beq 4f
+
+3: load1l r4
+ str r4, [dst], #4
+ adcs sum, sum, r4
+
+4: ands len, len, #3
+ beq .Ldone
+ load1l r4
+ tst len, #2
+ mov r5, r4, get_byte_0
+ beq .Lexit
+ adcs sum, sum, r4, lspush #16
+ strb r5, [dst], #1
+ mov r5, r4, get_byte_1
+ strb r5, [dst], #1
+ mov r5, r4, get_byte_2
+.Lexit: tst len, #1
+ strbne r5, [dst], #1
+ andne r5, r5, #255
+ adcsne sum, sum, r5, put_byte_0
+
+ /*
+ * If the dst pointer was not 16-bit aligned, we
+ * need to rotate the checksum here to get around
+ * the inefficient byte manipulations in the
+ * architecture independent code.
+ */
+.Ldone: adc r0, sum, #0
+ ldr sum, [sp, #0] @ dst
+ tst sum, #1
+ movne r0, r0, ror #8
+ load_regs
+
+.Lsrc_not_aligned:
+ adc sum, sum, #0 @ include C from dst alignment
+ and ip, src, #3
+ bic src, src, #3
+ load1l r5
+ cmp ip, #2
+ beq .Lsrc2_aligned
+ bhi .Lsrc3_aligned
+ mov r4, r5, lspull #8 @ C = 0
+ bics ip, len, #15
+ beq 2f
+1: load4l r5, r6, r7, r8
+ orr r4, r4, r5, lspush #24
+ mov r5, r5, lspull #8
+ orr r5, r5, r6, lspush #24
+ mov r6, r6, lspull #8
+ orr r6, r6, r7, lspush #24
+ mov r7, r7, lspull #8
+ orr r7, r7, r8, lspush #24
+ stmia dst!, {r4, r5, r6, r7}
+ adcs sum, sum, r4
+ adcs sum, sum, r5
+ adcs sum, sum, r6
+ adcs sum, sum, r7
+ mov r4, r8, lspull #8
+ sub ip, ip, #16
+ teq ip, #0
+ bne 1b
+2: ands ip, len, #12
+ beq 4f
+ tst ip, #8
+ beq 3f
+ load2l r5, r6
+ orr r4, r4, r5, lspush #24
+ mov r5, r5, lspull #8
+ orr r5, r5, r6, lspush #24
+ stmia dst!, {r4, r5}
+ adcs sum, sum, r4
+ adcs sum, sum, r5
+ mov r4, r6, lspull #8
+ tst ip, #4
+ beq 4f
+3: load1l r5
+ orr r4, r4, r5, lspush #24
+ str r4, [dst], #4
+ adcs sum, sum, r4
+ mov r4, r5, lspull #8
+4: ands len, len, #3
+ beq .Ldone
+ mov r5, r4, get_byte_0
+ tst len, #2
+ beq .Lexit
+ adcs sum, sum, r4, lspush #16
+ strb r5, [dst], #1
+ mov r5, r4, get_byte_1
+ strb r5, [dst], #1
+ mov r5, r4, get_byte_2
+ b .Lexit
+
+.Lsrc2_aligned: mov r4, r5, lspull #16
+ adds sum, sum, #0
+ bics ip, len, #15
+ beq 2f
+1: load4l r5, r6, r7, r8
+ orr r4, r4, r5, lspush #16
+ mov r5, r5, lspull #16
+ orr r5, r5, r6, lspush #16
+ mov r6, r6, lspull #16
+ orr r6, r6, r7, lspush #16
+ mov r7, r7, lspull #16
+ orr r7, r7, r8, lspush #16
+ stmia dst!, {r4, r5, r6, r7}
+ adcs sum, sum, r4
+ adcs sum, sum, r5
+ adcs sum, sum, r6
+ adcs sum, sum, r7
+ mov r4, r8, lspull #16
+ sub ip, ip, #16
+ teq ip, #0
+ bne 1b
+2: ands ip, len, #12
+ beq 4f
+ tst ip, #8
+ beq 3f
+ load2l r5, r6
+ orr r4, r4, r5, lspush #16
+ mov r5, r5, lspull #16
+ orr r5, r5, r6, lspush #16
+ stmia dst!, {r4, r5}
+ adcs sum, sum, r4
+ adcs sum, sum, r5
+ mov r4, r6, lspull #16
+ tst ip, #4
+ beq 4f
+3: load1l r5
+ orr r4, r4, r5, lspush #16
+ str r4, [dst], #4
+ adcs sum, sum, r4
+ mov r4, r5, lspull #16
+4: ands len, len, #3
+ beq .Ldone
+ mov r5, r4, get_byte_0
+ tst len, #2
+ beq .Lexit
+ adcs sum, sum, r4
+ strb r5, [dst], #1
+ mov r5, r4, get_byte_1
+ strb r5, [dst], #1
+ tst len, #1
+ beq .Ldone
+ load1b r5
+ b .Lexit
+
+.Lsrc3_aligned: mov r4, r5, lspull #24
+ adds sum, sum, #0
+ bics ip, len, #15
+ beq 2f
+1: load4l r5, r6, r7, r8
+ orr r4, r4, r5, lspush #8
+ mov r5, r5, lspull #24
+ orr r5, r5, r6, lspush #8
+ mov r6, r6, lspull #24
+ orr r6, r6, r7, lspush #8
+ mov r7, r7, lspull #24
+ orr r7, r7, r8, lspush #8
+ stmia dst!, {r4, r5, r6, r7}
+ adcs sum, sum, r4
+ adcs sum, sum, r5
+ adcs sum, sum, r6
+ adcs sum, sum, r7
+ mov r4, r8, lspull #24
+ sub ip, ip, #16
+ teq ip, #0
+ bne 1b
+2: ands ip, len, #12
+ beq 4f
+ tst ip, #8
+ beq 3f
+ load2l r5, r6
+ orr r4, r4, r5, lspush #8
+ mov r5, r5, lspull #24
+ orr r5, r5, r6, lspush #8
+ stmia dst!, {r4, r5}
+ adcs sum, sum, r4
+ adcs sum, sum, r5
+ mov r4, r6, lspull #24
+ tst ip, #4
+ beq 4f
+3: load1l r5
+ orr r4, r4, r5, lspush #8
+ str r4, [dst], #4
+ adcs sum, sum, r4
+ mov r4, r5, lspull #24
+4: ands len, len, #3
+ beq .Ldone
+ mov r5, r4, get_byte_0
+ tst len, #2
+ beq .Lexit
+ strb r5, [dst], #1
+ adcs sum, sum, r4
+ load1l r4
+ mov r5, r4, get_byte_0
+ strb r5, [dst], #1
+ adcs sum, sum, r4, lspush #24
+ mov r5, r4, get_byte_1
+ b .Lexit
+FN_EXIT
diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S
new file mode 100644
index 000000000..6928781e6
--- /dev/null
+++ b/arch/arm/lib/csumpartialcopyuser.S
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/csumpartialcopyuser.S
+ *
+ * Copyright (C) 1995-1998 Russell King
+ *
+ * 27/03/03 Ian Molton Clean up CONFIG_CPU
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/errno.h>
+#include <asm/asm-offsets.h>
+
+ .text
+
+#ifdef CONFIG_CPU_SW_DOMAIN_PAN
+ .macro save_regs
+ mrc p15, 0, ip, c3, c0, 0
+ stmfd sp!, {r1, r2, r4 - r8, ip, lr}
+ uaccess_enable ip
+ .endm
+
+ .macro load_regs
+ ldmfd sp!, {r1, r2, r4 - r8, ip, lr}
+ mcr p15, 0, ip, c3, c0, 0
+ ret lr
+ .endm
+#else
+ .macro save_regs
+ stmfd sp!, {r1, r2, r4 - r8, lr}
+ .endm
+
+ .macro load_regs
+ ldmfd sp!, {r1, r2, r4 - r8, pc}
+ .endm
+#endif
+
+ .macro load1b, reg1
+ ldrusr \reg1, r0, 1
+ .endm
+
+ .macro load2b, reg1, reg2
+ ldrusr \reg1, r0, 1
+ ldrusr \reg2, r0, 1
+ .endm
+
+ .macro load1l, reg1
+ ldrusr \reg1, r0, 4
+ .endm
+
+ .macro load2l, reg1, reg2
+ ldrusr \reg1, r0, 4
+ ldrusr \reg2, r0, 4
+ .endm
+
+ .macro load4l, reg1, reg2, reg3, reg4
+ ldrusr \reg1, r0, 4
+ ldrusr \reg2, r0, 4
+ ldrusr \reg3, r0, 4
+ ldrusr \reg4, r0, 4
+ .endm
+
+/*
+ * unsigned int
+ * csum_partial_copy_from_user(const char *src, char *dst, int len)
+ * r0 = src, r1 = dst, r2 = len
+ * Returns : r0 = checksum or 0
+ */
+
+#define FN_ENTRY ENTRY(csum_partial_copy_from_user)
+#define FN_EXIT ENDPROC(csum_partial_copy_from_user)
+
+#include "csumpartialcopygeneric.S"
+
+/*
+ * We report fault by returning 0 csum - impossible in normal case, since
+ * we start with 0xffffffff for initial sum.
+ */
+ .pushsection .text.fixup,"ax"
+ .align 4
+9001: mov r0, #0
+ load_regs
+ .popsection
diff --git a/arch/arm/lib/delay-loop.S b/arch/arm/lib/delay-loop.S
new file mode 100644
index 000000000..3ccade0f8
--- /dev/null
+++ b/arch/arm/lib/delay-loop.S
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/delay.S
+ *
+ * Copyright (C) 1995, 1996 Russell King
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/delay.h>
+
+ .text
+
+.LC0: .word loops_per_jiffy
+.LC1: .word UDELAY_MULT
+
+/*
+ * loops = r0 * HZ * loops_per_jiffy / 1000000
+ *
+ * r0 <= 2000
+ * HZ <= 1000
+ */
+
+ENTRY(__loop_udelay)
+ ldr r2, .LC1
+ mul r0, r2, r0 @ r0 = delay_us * UDELAY_MULT
+ENTRY(__loop_const_udelay) @ 0 <= r0 <= 0xfffffaf0
+ ldr r2, .LC0
+ ldr r2, [r2]
+ umull r1, r0, r2, r0 @ r0-r1 = r0 * loops_per_jiffy
+ adds r1, r1, #0xffffffff @ rounding up ...
+ adcs r0, r0, r0 @ and right shift by 31
+ reteq lr
+
+ .align 3
+
+@ Delay routine
+ENTRY(__loop_delay)
+ subs r0, r0, #1
+#if 0
+ retls lr
+ subs r0, r0, #1
+ retls lr
+ subs r0, r0, #1
+ retls lr
+ subs r0, r0, #1
+ retls lr
+ subs r0, r0, #1
+ retls lr
+ subs r0, r0, #1
+ retls lr
+ subs r0, r0, #1
+ retls lr
+ subs r0, r0, #1
+#endif
+ bhi __loop_delay
+ ret lr
+ENDPROC(__loop_udelay)
+ENDPROC(__loop_const_udelay)
+ENDPROC(__loop_delay)
diff --git a/arch/arm/lib/delay.c b/arch/arm/lib/delay.c
new file mode 100644
index 000000000..b7fe84f68
--- /dev/null
+++ b/arch/arm/lib/delay.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Delay loops based on the OpenRISC implementation.
+ *
+ * Copyright (C) 2012 ARM Limited
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ */
+
+#include <linux/clocksource.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/timex.h>
+
+/*
+ * Default to the loop-based delay implementation.
+ */
+struct arm_delay_ops arm_delay_ops __ro_after_init = {
+ .delay = __loop_delay,
+ .const_udelay = __loop_const_udelay,
+ .udelay = __loop_udelay,
+};
+
+static const struct delay_timer *delay_timer;
+static bool delay_calibrated;
+static u64 delay_res;
+
+int read_current_timer(unsigned long *timer_val)
+{
+ if (!delay_timer)
+ return -ENXIO;
+
+ *timer_val = delay_timer->read_current_timer();
+ return 0;
+}
+EXPORT_SYMBOL_GPL(read_current_timer);
+
+static inline u64 cyc_to_ns(u64 cyc, u32 mult, u32 shift)
+{
+ return (cyc * mult) >> shift;
+}
+
+static void __timer_delay(unsigned long cycles)
+{
+ cycles_t start = get_cycles();
+
+ while ((get_cycles() - start) < cycles)
+ cpu_relax();
+}
+
+static void __timer_const_udelay(unsigned long xloops)
+{
+ unsigned long long loops = xloops;
+ loops *= arm_delay_ops.ticks_per_jiffy;
+ __timer_delay(loops >> UDELAY_SHIFT);
+}
+
+static void __timer_udelay(unsigned long usecs)
+{
+ __timer_const_udelay(usecs * UDELAY_MULT);
+}
+
+void __init register_current_timer_delay(const struct delay_timer *timer)
+{
+ u32 new_mult, new_shift;
+ u64 res;
+
+ clocks_calc_mult_shift(&new_mult, &new_shift, timer->freq,
+ NSEC_PER_SEC, 3600);
+ res = cyc_to_ns(1ULL, new_mult, new_shift);
+
+ if (res > 1000) {
+ pr_err("Ignoring delay timer %ps, which has insufficient resolution of %lluns\n",
+ timer, res);
+ return;
+ }
+
+ if (!delay_calibrated && (!delay_res || (res < delay_res))) {
+ pr_info("Switching to timer-based delay loop, resolution %lluns\n", res);
+ delay_timer = timer;
+ lpj_fine = timer->freq / HZ;
+ delay_res = res;
+
+ /* cpufreq may scale loops_per_jiffy, so keep a private copy */
+ arm_delay_ops.ticks_per_jiffy = lpj_fine;
+ arm_delay_ops.delay = __timer_delay;
+ arm_delay_ops.const_udelay = __timer_const_udelay;
+ arm_delay_ops.udelay = __timer_udelay;
+ } else {
+ pr_info("Ignoring duplicate/late registration of read_current_timer delay\n");
+ }
+}
+
+unsigned long calibrate_delay_is_known(void)
+{
+ delay_calibrated = true;
+ return lpj_fine;
+}
+
+void calibration_delay_done(void)
+{
+ delay_calibrated = true;
+}
diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S
new file mode 100644
index 000000000..a87c02925
--- /dev/null
+++ b/arch/arm/lib/div64.S
@@ -0,0 +1,209 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/div64.S
+ *
+ * Optimized computation of 64-bit dividend / 32-bit divisor
+ *
+ * Author: Nicolas Pitre
+ * Created: Oct 5, 2003
+ * Copyright: Monta Vista Software, Inc.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/unwind.h>
+
+#ifdef __ARMEB__
+#define xh r0
+#define xl r1
+#define yh r2
+#define yl r3
+#else
+#define xl r0
+#define xh r1
+#define yl r2
+#define yh r3
+#endif
+
+/*
+ * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
+ *
+ * Note: Calling convention is totally non standard for optimal code.
+ * This is meant to be used by do_div() from include/asm/div64.h only.
+ *
+ * Input parameters:
+ * xh-xl = dividend (clobbered)
+ * r4 = divisor (preserved)
+ *
+ * Output values:
+ * yh-yl = result
+ * xh = remainder
+ *
+ * Clobbered regs: xl, ip
+ */
+
+ENTRY(__do_div64)
+UNWIND(.fnstart)
+
+ @ Test for easy paths first.
+ subs ip, r4, #1
+ bls 9f @ divisor is 0 or 1
+ tst ip, r4
+ beq 8f @ divisor is power of 2
+
+ @ See if we need to handle upper 32-bit result.
+ cmp xh, r4
+ mov yh, #0
+ blo 3f
+
+ @ Align divisor with upper part of dividend.
+ @ The aligned divisor is stored in yl preserving the original.
+ @ The bit position is stored in ip.
+
+#if __LINUX_ARM_ARCH__ >= 5
+
+ clz yl, r4
+ clz ip, xh
+ sub yl, yl, ip
+ mov ip, #1
+ mov ip, ip, lsl yl
+ mov yl, r4, lsl yl
+
+#else
+
+ mov yl, r4
+ mov ip, #1
+1: cmp yl, #0x80000000
+ cmpcc yl, xh
+ movcc yl, yl, lsl #1
+ movcc ip, ip, lsl #1
+ bcc 1b
+
+#endif
+
+ @ The division loop for needed upper bit positions.
+ @ Break out early if dividend reaches 0.
+2: cmp xh, yl
+ orrcs yh, yh, ip
+ subscs xh, xh, yl
+ movsne ip, ip, lsr #1
+ mov yl, yl, lsr #1
+ bne 2b
+
+ @ See if we need to handle lower 32-bit result.
+3: cmp xh, #0
+ mov yl, #0
+ cmpeq xl, r4
+ movlo xh, xl
+ retlo lr
+
+ @ The division loop for lower bit positions.
+ @ Here we shift remainer bits leftwards rather than moving the
+ @ divisor for comparisons, considering the carry-out bit as well.
+ mov ip, #0x80000000
+4: movs xl, xl, lsl #1
+ adcs xh, xh, xh
+ beq 6f
+ cmpcc xh, r4
+5: orrcs yl, yl, ip
+ subcs xh, xh, r4
+ movs ip, ip, lsr #1
+ bne 4b
+ ret lr
+
+ @ The top part of remainder became zero. If carry is set
+ @ (the 33th bit) this is a false positive so resume the loop.
+ @ Otherwise, if lower part is also null then we are done.
+6: bcs 5b
+ cmp xl, #0
+ reteq lr
+
+ @ We still have remainer bits in the low part. Bring them up.
+
+#if __LINUX_ARM_ARCH__ >= 5
+
+ clz xh, xl @ we know xh is zero here so...
+ add xh, xh, #1
+ mov xl, xl, lsl xh
+ mov ip, ip, lsr xh
+
+#else
+
+7: movs xl, xl, lsl #1
+ mov ip, ip, lsr #1
+ bcc 7b
+
+#endif
+
+ @ Current remainder is now 1. It is worthless to compare with
+ @ divisor at this point since divisor can not be smaller than 3 here.
+ @ If possible, branch for another shift in the division loop.
+ @ If no bit position left then we are done.
+ movs ip, ip, lsr #1
+ mov xh, #1
+ bne 4b
+ ret lr
+
+8: @ Division by a power of 2: determine what that divisor order is
+ @ then simply shift values around
+
+#if __LINUX_ARM_ARCH__ >= 5
+
+ clz ip, r4
+ rsb ip, ip, #31
+
+#else
+
+ mov yl, r4
+ cmp r4, #(1 << 16)
+ mov ip, #0
+ movhs yl, yl, lsr #16
+ movhs ip, #16
+
+ cmp yl, #(1 << 8)
+ movhs yl, yl, lsr #8
+ addhs ip, ip, #8
+
+ cmp yl, #(1 << 4)
+ movhs yl, yl, lsr #4
+ addhs ip, ip, #4
+
+ cmp yl, #(1 << 2)
+ addhi ip, ip, #3
+ addls ip, ip, yl, lsr #1
+
+#endif
+
+ mov yh, xh, lsr ip
+ mov yl, xl, lsr ip
+ rsb ip, ip, #32
+ ARM( orr yl, yl, xh, lsl ip )
+ THUMB( lsl xh, xh, ip )
+ THUMB( orr yl, yl, xh )
+ mov xh, xl, lsl ip
+ mov xh, xh, lsr ip
+ ret lr
+
+ @ eq -> division by 1: obvious enough...
+9: moveq yl, xl
+ moveq yh, xh
+ moveq xh, #0
+ reteq lr
+UNWIND(.fnend)
+
+UNWIND(.fnstart)
+UNWIND(.pad #4)
+UNWIND(.save {lr})
+Ldiv0_64:
+ @ Division by 0:
+ str lr, [sp, #-8]!
+ bl __div0
+
+ @ as wrong as it could be...
+ mov yl, #0
+ mov yh, #0
+ mov xh, #0
+ ldr pc, [sp], #8
+
+UNWIND(.fnend)
+ENDPROC(__do_div64)
diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S
new file mode 100644
index 000000000..7fd3600db
--- /dev/null
+++ b/arch/arm/lib/findbit.S
@@ -0,0 +1,193 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/findbit.S
+ *
+ * Copyright (C) 1995-2000 Russell King
+ *
+ * 16th March 2001 - John Ripley <jripley@sonicblue.com>
+ * Fixed so that "size" is an exclusive not an inclusive quantity.
+ * All users of these functions expect exclusive sizes, and may
+ * also call with zero size.
+ * Reworked by rmk.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+ .text
+
+/*
+ * Purpose : Find a 'zero' bit
+ * Prototype: int find_first_zero_bit(void *addr, unsigned int maxbit);
+ */
+ENTRY(_find_first_zero_bit_le)
+ teq r1, #0
+ beq 3f
+ mov r2, #0
+1:
+ ARM( ldrb r3, [r0, r2, lsr #3] )
+ THUMB( lsr r3, r2, #3 )
+ THUMB( ldrb r3, [r0, r3] )
+ eors r3, r3, #0xff @ invert bits
+ bne .L_found @ any now set - found zero bit
+ add r2, r2, #8 @ next bit pointer
+2: cmp r2, r1 @ any more?
+ blo 1b
+3: mov r0, r1 @ no free bits
+ ret lr
+ENDPROC(_find_first_zero_bit_le)
+
+/*
+ * Purpose : Find next 'zero' bit
+ * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset)
+ */
+ENTRY(_find_next_zero_bit_le)
+ cmp r2, r1
+ bhs 3b
+ ands ip, r2, #7
+ beq 1b @ If new byte, goto old routine
+ ARM( ldrb r3, [r0, r2, lsr #3] )
+ THUMB( lsr r3, r2, #3 )
+ THUMB( ldrb r3, [r0, r3] )
+ eor r3, r3, #0xff @ now looking for a 1 bit
+ movs r3, r3, lsr ip @ shift off unused bits
+ bne .L_found
+ orr r2, r2, #7 @ if zero, then no bits here
+ add r2, r2, #1 @ align bit pointer
+ b 2b @ loop for next bit
+ENDPROC(_find_next_zero_bit_le)
+
+/*
+ * Purpose : Find a 'one' bit
+ * Prototype: int find_first_bit(const unsigned long *addr, unsigned int maxbit);
+ */
+ENTRY(_find_first_bit_le)
+ teq r1, #0
+ beq 3f
+ mov r2, #0
+1:
+ ARM( ldrb r3, [r0, r2, lsr #3] )
+ THUMB( lsr r3, r2, #3 )
+ THUMB( ldrb r3, [r0, r3] )
+ movs r3, r3
+ bne .L_found @ any now set - found zero bit
+ add r2, r2, #8 @ next bit pointer
+2: cmp r2, r1 @ any more?
+ blo 1b
+3: mov r0, r1 @ no free bits
+ ret lr
+ENDPROC(_find_first_bit_le)
+
+/*
+ * Purpose : Find next 'one' bit
+ * Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset)
+ */
+ENTRY(_find_next_bit_le)
+ cmp r2, r1
+ bhs 3b
+ ands ip, r2, #7
+ beq 1b @ If new byte, goto old routine
+ ARM( ldrb r3, [r0, r2, lsr #3] )
+ THUMB( lsr r3, r2, #3 )
+ THUMB( ldrb r3, [r0, r3] )
+ movs r3, r3, lsr ip @ shift off unused bits
+ bne .L_found
+ orr r2, r2, #7 @ if zero, then no bits here
+ add r2, r2, #1 @ align bit pointer
+ b 2b @ loop for next bit
+ENDPROC(_find_next_bit_le)
+
+#ifdef __ARMEB__
+
+ENTRY(_find_first_zero_bit_be)
+ teq r1, #0
+ beq 3f
+ mov r2, #0
+1: eor r3, r2, #0x18 @ big endian byte ordering
+ ARM( ldrb r3, [r0, r3, lsr #3] )
+ THUMB( lsr r3, #3 )
+ THUMB( ldrb r3, [r0, r3] )
+ eors r3, r3, #0xff @ invert bits
+ bne .L_found @ any now set - found zero bit
+ add r2, r2, #8 @ next bit pointer
+2: cmp r2, r1 @ any more?
+ blo 1b
+3: mov r0, r1 @ no free bits
+ ret lr
+ENDPROC(_find_first_zero_bit_be)
+
+ENTRY(_find_next_zero_bit_be)
+ cmp r2, r1
+ bhs 3b
+ ands ip, r2, #7
+ beq 1b @ If new byte, goto old routine
+ eor r3, r2, #0x18 @ big endian byte ordering
+ ARM( ldrb r3, [r0, r3, lsr #3] )
+ THUMB( lsr r3, #3 )
+ THUMB( ldrb r3, [r0, r3] )
+ eor r3, r3, #0xff @ now looking for a 1 bit
+ movs r3, r3, lsr ip @ shift off unused bits
+ bne .L_found
+ orr r2, r2, #7 @ if zero, then no bits here
+ add r2, r2, #1 @ align bit pointer
+ b 2b @ loop for next bit
+ENDPROC(_find_next_zero_bit_be)
+
+ENTRY(_find_first_bit_be)
+ teq r1, #0
+ beq 3f
+ mov r2, #0
+1: eor r3, r2, #0x18 @ big endian byte ordering
+ ARM( ldrb r3, [r0, r3, lsr #3] )
+ THUMB( lsr r3, #3 )
+ THUMB( ldrb r3, [r0, r3] )
+ movs r3, r3
+ bne .L_found @ any now set - found zero bit
+ add r2, r2, #8 @ next bit pointer
+2: cmp r2, r1 @ any more?
+ blo 1b
+3: mov r0, r1 @ no free bits
+ ret lr
+ENDPROC(_find_first_bit_be)
+
+ENTRY(_find_next_bit_be)
+ cmp r2, r1
+ bhs 3b
+ ands ip, r2, #7
+ beq 1b @ If new byte, goto old routine
+ eor r3, r2, #0x18 @ big endian byte ordering
+ ARM( ldrb r3, [r0, r3, lsr #3] )
+ THUMB( lsr r3, #3 )
+ THUMB( ldrb r3, [r0, r3] )
+ movs r3, r3, lsr ip @ shift off unused bits
+ bne .L_found
+ orr r2, r2, #7 @ if zero, then no bits here
+ add r2, r2, #1 @ align bit pointer
+ b 2b @ loop for next bit
+ENDPROC(_find_next_bit_be)
+
+#endif
+
+/*
+ * One or more bits in the LSB of r3 are assumed to be set.
+ */
+.L_found:
+#if __LINUX_ARM_ARCH__ >= 5
+ rsb r0, r3, #0
+ and r3, r3, r0
+ clz r3, r3
+ rsb r3, r3, #31
+ add r0, r2, r3
+#else
+ tst r3, #0x0f
+ addeq r2, r2, #4
+ movne r3, r3, lsl #4
+ tst r3, #0x30
+ addeq r2, r2, #2
+ movne r3, r3, lsl #2
+ tst r3, #0x40
+ addeq r2, r2, #1
+ mov r0, r2
+#endif
+ cmp r1, r0 @ Clamp to maxbit
+ movlo r0, r1
+ ret lr
+
diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S
new file mode 100644
index 000000000..c5e420750
--- /dev/null
+++ b/arch/arm/lib/getuser.S
@@ -0,0 +1,167 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/getuser.S
+ *
+ * Copyright (C) 2001 Russell King
+ *
+ * Idea from x86 version, (C) Copyright 1998 Linus Torvalds
+ *
+ * These functions have a non-standard call interface to make them more
+ * efficient, especially as they return an error value in addition to
+ * the "real" return value.
+ *
+ * __get_user_X
+ *
+ * Inputs: r0 contains the address
+ * r1 contains the address limit, which must be preserved
+ * Outputs: r0 is the error code
+ * r2, r3 contains the zero-extended value
+ * lr corrupted
+ *
+ * No other registers must be altered. (see <asm/uaccess.h>
+ * for specific ASM register usage).
+ *
+ * Note that ADDR_LIMIT is either 0 or 0xc0000000.
+ * Note also that it is intended that __get_user_bad is not global.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/errno.h>
+#include <asm/domain.h>
+
+ENTRY(__get_user_1)
+ check_uaccess r0, 1, r1, r2, __get_user_bad
+1: TUSER(ldrb) r2, [r0]
+ mov r0, #0
+ ret lr
+ENDPROC(__get_user_1)
+_ASM_NOKPROBE(__get_user_1)
+
+ENTRY(__get_user_2)
+ check_uaccess r0, 2, r1, r2, __get_user_bad
+#if __LINUX_ARM_ARCH__ >= 6
+
+2: TUSER(ldrh) r2, [r0]
+
+#else
+
+#ifdef CONFIG_CPU_USE_DOMAINS
+rb .req ip
+2: ldrbt r2, [r0], #1
+3: ldrbt rb, [r0], #0
+#else
+rb .req r0
+2: ldrb r2, [r0]
+3: ldrb rb, [r0, #1]
+#endif
+#ifndef __ARMEB__
+ orr r2, r2, rb, lsl #8
+#else
+ orr r2, rb, r2, lsl #8
+#endif
+
+#endif /* __LINUX_ARM_ARCH__ >= 6 */
+
+ mov r0, #0
+ ret lr
+ENDPROC(__get_user_2)
+_ASM_NOKPROBE(__get_user_2)
+
+ENTRY(__get_user_4)
+ check_uaccess r0, 4, r1, r2, __get_user_bad
+4: TUSER(ldr) r2, [r0]
+ mov r0, #0
+ ret lr
+ENDPROC(__get_user_4)
+_ASM_NOKPROBE(__get_user_4)
+
+ENTRY(__get_user_8)
+ check_uaccess r0, 8, r1, r2, __get_user_bad8
+#ifdef CONFIG_THUMB2_KERNEL
+5: TUSER(ldr) r2, [r0]
+6: TUSER(ldr) r3, [r0, #4]
+#else
+5: TUSER(ldr) r2, [r0], #4
+6: TUSER(ldr) r3, [r0]
+#endif
+ mov r0, #0
+ ret lr
+ENDPROC(__get_user_8)
+_ASM_NOKPROBE(__get_user_8)
+
+#ifdef __ARMEB__
+ENTRY(__get_user_32t_8)
+ check_uaccess r0, 8, r1, r2, __get_user_bad
+#ifdef CONFIG_CPU_USE_DOMAINS
+ add r0, r0, #4
+7: ldrt r2, [r0]
+#else
+7: ldr r2, [r0, #4]
+#endif
+ mov r0, #0
+ ret lr
+ENDPROC(__get_user_32t_8)
+_ASM_NOKPROBE(__get_user_32t_8)
+
+ENTRY(__get_user_64t_1)
+ check_uaccess r0, 1, r1, r2, __get_user_bad8
+8: TUSER(ldrb) r3, [r0]
+ mov r0, #0
+ ret lr
+ENDPROC(__get_user_64t_1)
+_ASM_NOKPROBE(__get_user_64t_1)
+
+ENTRY(__get_user_64t_2)
+ check_uaccess r0, 2, r1, r2, __get_user_bad8
+#ifdef CONFIG_CPU_USE_DOMAINS
+rb .req ip
+9: ldrbt r3, [r0], #1
+10: ldrbt rb, [r0], #0
+#else
+rb .req r0
+9: ldrb r3, [r0]
+10: ldrb rb, [r0, #1]
+#endif
+ orr r3, rb, r3, lsl #8
+ mov r0, #0
+ ret lr
+ENDPROC(__get_user_64t_2)
+_ASM_NOKPROBE(__get_user_64t_2)
+
+ENTRY(__get_user_64t_4)
+ check_uaccess r0, 4, r1, r2, __get_user_bad8
+11: TUSER(ldr) r3, [r0]
+ mov r0, #0
+ ret lr
+ENDPROC(__get_user_64t_4)
+_ASM_NOKPROBE(__get_user_64t_4)
+#endif
+
+__get_user_bad8:
+ mov r3, #0
+__get_user_bad:
+ mov r2, #0
+ mov r0, #-EFAULT
+ ret lr
+ENDPROC(__get_user_bad)
+ENDPROC(__get_user_bad8)
+_ASM_NOKPROBE(__get_user_bad)
+_ASM_NOKPROBE(__get_user_bad8)
+
+.pushsection __ex_table, "a"
+ .long 1b, __get_user_bad
+ .long 2b, __get_user_bad
+#if __LINUX_ARM_ARCH__ < 6
+ .long 3b, __get_user_bad
+#endif
+ .long 4b, __get_user_bad
+ .long 5b, __get_user_bad8
+ .long 6b, __get_user_bad8
+#ifdef __ARMEB__
+ .long 7b, __get_user_bad
+ .long 8b, __get_user_bad8
+ .long 9b, __get_user_bad8
+ .long 10b, __get_user_bad8
+ .long 11b, __get_user_bad8
+#endif
+.popsection
diff --git a/arch/arm/lib/io-readsb.S b/arch/arm/lib/io-readsb.S
new file mode 100644
index 000000000..0def9388f
--- /dev/null
+++ b/arch/arm/lib/io-readsb.S
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/io-readsb.S
+ *
+ * Copyright (C) 1995-2000 Russell King
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+.Linsb_align: rsb ip, ip, #4
+ cmp ip, r2
+ movgt ip, r2
+ cmp ip, #2
+ ldrb r3, [r0]
+ strb r3, [r1], #1
+ ldrbge r3, [r0]
+ strbge r3, [r1], #1
+ ldrbgt r3, [r0]
+ strbgt r3, [r1], #1
+ subs r2, r2, ip
+ bne .Linsb_aligned
+
+ENTRY(__raw_readsb)
+ teq r2, #0 @ do we have to check for the zero len?
+ reteq lr
+ ands ip, r1, #3
+ bne .Linsb_align
+
+.Linsb_aligned: stmfd sp!, {r4 - r6, lr}
+
+ subs r2, r2, #16
+ bmi .Linsb_no_16
+
+.Linsb_16_lp: ldrb r3, [r0]
+ ldrb r4, [r0]
+ ldrb r5, [r0]
+ mov r3, r3, put_byte_0
+ ldrb r6, [r0]
+ orr r3, r3, r4, put_byte_1
+ ldrb r4, [r0]
+ orr r3, r3, r5, put_byte_2
+ ldrb r5, [r0]
+ orr r3, r3, r6, put_byte_3
+ ldrb r6, [r0]
+ mov r4, r4, put_byte_0
+ ldrb ip, [r0]
+ orr r4, r4, r5, put_byte_1
+ ldrb r5, [r0]
+ orr r4, r4, r6, put_byte_2
+ ldrb r6, [r0]
+ orr r4, r4, ip, put_byte_3
+ ldrb ip, [r0]
+ mov r5, r5, put_byte_0
+ ldrb lr, [r0]
+ orr r5, r5, r6, put_byte_1
+ ldrb r6, [r0]
+ orr r5, r5, ip, put_byte_2
+ ldrb ip, [r0]
+ orr r5, r5, lr, put_byte_3
+ ldrb lr, [r0]
+ mov r6, r6, put_byte_0
+ orr r6, r6, ip, put_byte_1
+ ldrb ip, [r0]
+ orr r6, r6, lr, put_byte_2
+ orr r6, r6, ip, put_byte_3
+ stmia r1!, {r3 - r6}
+
+ subs r2, r2, #16
+ bpl .Linsb_16_lp
+
+ tst r2, #15
+ ldmfdeq sp!, {r4 - r6, pc}
+
+.Linsb_no_16: tst r2, #8
+ beq .Linsb_no_8
+
+ ldrb r3, [r0]
+ ldrb r4, [r0]
+ ldrb r5, [r0]
+ mov r3, r3, put_byte_0
+ ldrb r6, [r0]
+ orr r3, r3, r4, put_byte_1
+ ldrb r4, [r0]
+ orr r3, r3, r5, put_byte_2
+ ldrb r5, [r0]
+ orr r3, r3, r6, put_byte_3
+ ldrb r6, [r0]
+ mov r4, r4, put_byte_0
+ ldrb ip, [r0]
+ orr r4, r4, r5, put_byte_1
+ orr r4, r4, r6, put_byte_2
+ orr r4, r4, ip, put_byte_3
+ stmia r1!, {r3, r4}
+
+.Linsb_no_8: tst r2, #4
+ beq .Linsb_no_4
+
+ ldrb r3, [r0]
+ ldrb r4, [r0]
+ ldrb r5, [r0]
+ ldrb r6, [r0]
+ mov r3, r3, put_byte_0
+ orr r3, r3, r4, put_byte_1
+ orr r3, r3, r5, put_byte_2
+ orr r3, r3, r6, put_byte_3
+ str r3, [r1], #4
+
+.Linsb_no_4: ands r2, r2, #3
+ ldmfdeq sp!, {r4 - r6, pc}
+
+ cmp r2, #2
+ ldrb r3, [r0]
+ strb r3, [r1], #1
+ ldrbge r3, [r0]
+ strbge r3, [r1], #1
+ ldrbgt r3, [r0]
+ strbgt r3, [r1]
+
+ ldmfd sp!, {r4 - r6, pc}
+ENDPROC(__raw_readsb)
diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S
new file mode 100644
index 000000000..d9f6b372b
--- /dev/null
+++ b/arch/arm/lib/io-readsl.S
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/io-readsl.S
+ *
+ * Copyright (C) 1995-2000 Russell King
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ENTRY(__raw_readsl)
+ teq r2, #0 @ do we have to check for the zero len?
+ reteq lr
+ ands ip, r1, #3
+ bne 3f
+
+ subs r2, r2, #4
+ bmi 2f
+ stmfd sp!, {r4, lr}
+1: ldr r3, [r0, #0]
+ ldr r4, [r0, #0]
+ ldr ip, [r0, #0]
+ ldr lr, [r0, #0]
+ subs r2, r2, #4
+ stmia r1!, {r3, r4, ip, lr}
+ bpl 1b
+ ldmfd sp!, {r4, lr}
+2: movs r2, r2, lsl #31
+ ldrcs r3, [r0, #0]
+ ldrcs ip, [r0, #0]
+ stmiacs r1!, {r3, ip}
+ ldrne r3, [r0, #0]
+ strne r3, [r1, #0]
+ ret lr
+
+3: ldr r3, [r0]
+ cmp ip, #2
+ mov ip, r3, get_byte_0
+ strb ip, [r1], #1
+ bgt 6f
+ mov ip, r3, get_byte_1
+ strb ip, [r1], #1
+ beq 5f
+ mov ip, r3, get_byte_2
+ strb ip, [r1], #1
+
+4: subs r2, r2, #1
+ mov ip, r3, lspull #24
+ ldrne r3, [r0]
+ orrne ip, ip, r3, lspush #8
+ strne ip, [r1], #4
+ bne 4b
+ b 8f
+
+5: subs r2, r2, #1
+ mov ip, r3, lspull #16
+ ldrne r3, [r0]
+ orrne ip, ip, r3, lspush #16
+ strne ip, [r1], #4
+ bne 5b
+ b 7f
+
+6: subs r2, r2, #1
+ mov ip, r3, lspull #8
+ ldrne r3, [r0]
+ orrne ip, ip, r3, lspush #24
+ strne ip, [r1], #4
+ bne 6b
+
+ mov r3, ip, get_byte_2
+ strb r3, [r1, #2]
+7: mov r3, ip, get_byte_1
+ strb r3, [r1, #1]
+8: mov r3, ip, get_byte_0
+ strb r3, [r1, #0]
+ ret lr
+ENDPROC(__raw_readsl)
diff --git a/arch/arm/lib/io-readsw-armv3.S b/arch/arm/lib/io-readsw-armv3.S
new file mode 100644
index 000000000..266043610
--- /dev/null
+++ b/arch/arm/lib/io-readsw-armv3.S
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/io-readsw-armv3.S
+ *
+ * Copyright (C) 1995-2000 Russell King
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+.Linsw_bad_alignment:
+ adr r0, .Linsw_bad_align_msg
+ mov r2, lr
+ b panic
+.Linsw_bad_align_msg:
+ .asciz "insw: bad buffer alignment (0x%p, lr=0x%08lX)\n"
+ .align
+
+.Linsw_align: tst r1, #1
+ bne .Linsw_bad_alignment
+
+ ldr r3, [r0]
+ strb r3, [r1], #1
+ mov r3, r3, lsr #8
+ strb r3, [r1], #1
+
+ subs r2, r2, #1
+ reteq lr
+
+ENTRY(__raw_readsw)
+ teq r2, #0 @ do we have to check for the zero len?
+ reteq lr
+ tst r1, #3
+ bne .Linsw_align
+
+.Linsw_aligned: mov ip, #0xff
+ orr ip, ip, ip, lsl #8
+ stmfd sp!, {r4, r5, r6, lr}
+
+ subs r2, r2, #8
+ bmi .Lno_insw_8
+
+.Linsw_8_lp: ldr r3, [r0]
+ and r3, r3, ip
+ ldr r4, [r0]
+ orr r3, r3, r4, lsl #16
+
+ ldr r4, [r0]
+ and r4, r4, ip
+ ldr r5, [r0]
+ orr r4, r4, r5, lsl #16
+
+ ldr r5, [r0]
+ and r5, r5, ip
+ ldr r6, [r0]
+ orr r5, r5, r6, lsl #16
+
+ ldr r6, [r0]
+ and r6, r6, ip
+ ldr lr, [r0]
+ orr r6, r6, lr, lsl #16
+
+ stmia r1!, {r3 - r6}
+
+ subs r2, r2, #8
+ bpl .Linsw_8_lp
+
+ tst r2, #7
+ ldmfdeq sp!, {r4, r5, r6, pc}
+
+.Lno_insw_8: tst r2, #4
+ beq .Lno_insw_4
+
+ ldr r3, [r0]
+ and r3, r3, ip
+ ldr r4, [r0]
+ orr r3, r3, r4, lsl #16
+
+ ldr r4, [r0]
+ and r4, r4, ip
+ ldr r5, [r0]
+ orr r4, r4, r5, lsl #16
+
+ stmia r1!, {r3, r4}
+
+.Lno_insw_4: tst r2, #2
+ beq .Lno_insw_2
+
+ ldr r3, [r0]
+ and r3, r3, ip
+ ldr r4, [r0]
+ orr r3, r3, r4, lsl #16
+
+ str r3, [r1], #4
+
+.Lno_insw_2: tst r2, #1
+ ldrne r3, [r0]
+ strbne r3, [r1], #1
+ movne r3, r3, lsr #8
+ strbne r3, [r1]
+
+ ldmfd sp!, {r4, r5, r6, pc}
+
+
diff --git a/arch/arm/lib/io-readsw-armv4.S b/arch/arm/lib/io-readsw-armv4.S
new file mode 100644
index 000000000..228c176a9
--- /dev/null
+++ b/arch/arm/lib/io-readsw-armv4.S
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/io-readsw-armv4.S
+ *
+ * Copyright (C) 1995-2000 Russell King
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ .macro pack, rd, hw1, hw2
+#ifndef __ARMEB__
+ orr \rd, \hw1, \hw2, lsl #16
+#else
+ orr \rd, \hw2, \hw1, lsl #16
+#endif
+ .endm
+
+.Linsw_align: movs ip, r1, lsl #31
+ bne .Linsw_noalign
+ ldrh ip, [r0]
+ sub r2, r2, #1
+ strh ip, [r1], #2
+
+ENTRY(__raw_readsw)
+ teq r2, #0
+ reteq lr
+ tst r1, #3
+ bne .Linsw_align
+
+ stmfd sp!, {r4, r5, lr}
+
+ subs r2, r2, #8
+ bmi .Lno_insw_8
+
+.Linsw_8_lp: ldrh r3, [r0]
+ ldrh r4, [r0]
+ pack r3, r3, r4
+
+ ldrh r4, [r0]
+ ldrh r5, [r0]
+ pack r4, r4, r5
+
+ ldrh r5, [r0]
+ ldrh ip, [r0]
+ pack r5, r5, ip
+
+ ldrh ip, [r0]
+ ldrh lr, [r0]
+ pack ip, ip, lr
+
+ subs r2, r2, #8
+ stmia r1!, {r3 - r5, ip}
+ bpl .Linsw_8_lp
+
+.Lno_insw_8: tst r2, #4
+ beq .Lno_insw_4
+
+ ldrh r3, [r0]
+ ldrh r4, [r0]
+ pack r3, r3, r4
+
+ ldrh r4, [r0]
+ ldrh ip, [r0]
+ pack r4, r4, ip
+
+ stmia r1!, {r3, r4}
+
+.Lno_insw_4: movs r2, r2, lsl #31
+ bcc .Lno_insw_2
+
+ ldrh r3, [r0]
+ ldrh ip, [r0]
+ pack r3, r3, ip
+ str r3, [r1], #4
+
+.Lno_insw_2: ldrhne r3, [r0]
+ strhne r3, [r1]
+
+ ldmfd sp!, {r4, r5, pc}
+
+#ifdef __ARMEB__
+#define _BE_ONLY_(code...) code
+#define _LE_ONLY_(code...)
+#define push_hbyte0 lsr #8
+#define pull_hbyte1 lsl #24
+#else
+#define _BE_ONLY_(code...)
+#define _LE_ONLY_(code...) code
+#define push_hbyte0 lsl #24
+#define pull_hbyte1 lsr #8
+#endif
+
+.Linsw_noalign: stmfd sp!, {r4, lr}
+ ldrbcc ip, [r1, #-1]!
+ bcc 1f
+
+ ldrh ip, [r0]
+ sub r2, r2, #1
+ _BE_ONLY_( mov ip, ip, ror #8 )
+ strb ip, [r1], #1
+ _LE_ONLY_( mov ip, ip, lsr #8 )
+ _BE_ONLY_( mov ip, ip, lsr #24 )
+
+1: subs r2, r2, #2
+ bmi 3f
+ _BE_ONLY_( mov ip, ip, lsl #24 )
+
+2: ldrh r3, [r0]
+ ldrh r4, [r0]
+ subs r2, r2, #2
+ orr ip, ip, r3, lsl #8
+ orr ip, ip, r4, push_hbyte0
+ str ip, [r1], #4
+ mov ip, r4, pull_hbyte1
+ bpl 2b
+
+ _BE_ONLY_( mov ip, ip, lsr #24 )
+
+3: tst r2, #1
+ strb ip, [r1], #1
+ ldrhne ip, [r0]
+ _BE_ONLY_( movne ip, ip, ror #8 )
+ strbne ip, [r1], #1
+ _LE_ONLY_( movne ip, ip, lsr #8 )
+ _BE_ONLY_( movne ip, ip, lsr #24 )
+ strbne ip, [r1]
+ ldmfd sp!, {r4, pc}
+ENDPROC(__raw_readsw)
diff --git a/arch/arm/lib/io-writesb.S b/arch/arm/lib/io-writesb.S
new file mode 100644
index 000000000..e2ae312f0
--- /dev/null
+++ b/arch/arm/lib/io-writesb.S
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/io-writesb.S
+ *
+ * Copyright (C) 1995-2000 Russell King
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ .macro outword, rd
+#ifndef __ARMEB__
+ strb \rd, [r0]
+ mov \rd, \rd, lsr #8
+ strb \rd, [r0]
+ mov \rd, \rd, lsr #8
+ strb \rd, [r0]
+ mov \rd, \rd, lsr #8
+ strb \rd, [r0]
+#else
+ mov lr, \rd, lsr #24
+ strb lr, [r0]
+ mov lr, \rd, lsr #16
+ strb lr, [r0]
+ mov lr, \rd, lsr #8
+ strb lr, [r0]
+ strb \rd, [r0]
+#endif
+ .endm
+
+.Loutsb_align: rsb ip, ip, #4
+ cmp ip, r2
+ movgt ip, r2
+ cmp ip, #2
+ ldrb r3, [r1], #1
+ strb r3, [r0]
+ ldrbge r3, [r1], #1
+ strbge r3, [r0]
+ ldrbgt r3, [r1], #1
+ strbgt r3, [r0]
+ subs r2, r2, ip
+ bne .Loutsb_aligned
+
+ENTRY(__raw_writesb)
+ teq r2, #0 @ do we have to check for the zero len?
+ reteq lr
+ ands ip, r1, #3
+ bne .Loutsb_align
+
+.Loutsb_aligned:
+ stmfd sp!, {r4, r5, lr}
+
+ subs r2, r2, #16
+ bmi .Loutsb_no_16
+
+.Loutsb_16_lp: ldmia r1!, {r3, r4, r5, ip}
+ outword r3
+ outword r4
+ outword r5
+ outword ip
+ subs r2, r2, #16
+ bpl .Loutsb_16_lp
+
+ tst r2, #15
+ ldmfdeq sp!, {r4, r5, pc}
+
+.Loutsb_no_16: tst r2, #8
+ beq .Loutsb_no_8
+
+ ldmia r1!, {r3, r4}
+ outword r3
+ outword r4
+
+.Loutsb_no_8: tst r2, #4
+ beq .Loutsb_no_4
+
+ ldr r3, [r1], #4
+ outword r3
+
+.Loutsb_no_4: ands r2, r2, #3
+ ldmfdeq sp!, {r4, r5, pc}
+
+ cmp r2, #2
+ ldrb r3, [r1], #1
+ strb r3, [r0]
+ ldrbge r3, [r1], #1
+ strbge r3, [r0]
+ ldrbgt r3, [r1]
+ strbgt r3, [r0]
+
+ ldmfd sp!, {r4, r5, pc}
+ENDPROC(__raw_writesb)
diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S
new file mode 100644
index 000000000..89ef7be61
--- /dev/null
+++ b/arch/arm/lib/io-writesl.S
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/io-writesl.S
+ *
+ * Copyright (C) 1995-2000 Russell King
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ENTRY(__raw_writesl)
+ teq r2, #0 @ do we have to check for the zero len?
+ reteq lr
+ ands ip, r1, #3
+ bne 3f
+
+ subs r2, r2, #4
+ bmi 2f
+ stmfd sp!, {r4, lr}
+1: ldmia r1!, {r3, r4, ip, lr}
+ subs r2, r2, #4
+ str r3, [r0, #0]
+ str r4, [r0, #0]
+ str ip, [r0, #0]
+ str lr, [r0, #0]
+ bpl 1b
+ ldmfd sp!, {r4, lr}
+2: movs r2, r2, lsl #31
+ ldmiacs r1!, {r3, ip}
+ strcs r3, [r0, #0]
+ ldrne r3, [r1, #0]
+ strcs ip, [r0, #0]
+ strne r3, [r0, #0]
+ ret lr
+
+3: bic r1, r1, #3
+ ldr r3, [r1], #4
+ cmp ip, #2
+ blt 5f
+ bgt 6f
+
+4: mov ip, r3, lspull #16
+ ldr r3, [r1], #4
+ subs r2, r2, #1
+ orr ip, ip, r3, lspush #16
+ str ip, [r0]
+ bne 4b
+ ret lr
+
+5: mov ip, r3, lspull #8
+ ldr r3, [r1], #4
+ subs r2, r2, #1
+ orr ip, ip, r3, lspush #24
+ str ip, [r0]
+ bne 5b
+ ret lr
+
+6: mov ip, r3, lspull #24
+ ldr r3, [r1], #4
+ subs r2, r2, #1
+ orr ip, ip, r3, lspush #8
+ str ip, [r0]
+ bne 6b
+ ret lr
+ENDPROC(__raw_writesl)
diff --git a/arch/arm/lib/io-writesw-armv3.S b/arch/arm/lib/io-writesw-armv3.S
new file mode 100644
index 000000000..4cabbee7f
--- /dev/null
+++ b/arch/arm/lib/io-writesw-armv3.S
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/io-writesw-armv3.S
+ *
+ * Copyright (C) 1995-2000 Russell King
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+.Loutsw_bad_alignment:
+ adr r0, .Loutsw_bad_align_msg
+ mov r2, lr
+ b panic
+.Loutsw_bad_align_msg:
+ .asciz "outsw: bad buffer alignment (0x%p, lr=0x%08lX)\n"
+ .align
+
+.Loutsw_align: tst r1, #1
+ bne .Loutsw_bad_alignment
+
+ add r1, r1, #2
+
+ ldr r3, [r1, #-4]
+ mov r3, r3, lsr #16
+ orr r3, r3, r3, lsl #16
+ str r3, [r0]
+ subs r2, r2, #1
+ reteq lr
+
+ENTRY(__raw_writesw)
+ teq r2, #0 @ do we have to check for the zero len?
+ reteq lr
+ tst r1, #3
+ bne .Loutsw_align
+
+ stmfd sp!, {r4, r5, r6, lr}
+
+ subs r2, r2, #8
+ bmi .Lno_outsw_8
+
+.Loutsw_8_lp: ldmia r1!, {r3, r4, r5, r6}
+
+ mov ip, r3, lsl #16
+ orr ip, ip, ip, lsr #16
+ str ip, [r0]
+
+ mov ip, r3, lsr #16
+ orr ip, ip, ip, lsl #16
+ str ip, [r0]
+
+ mov ip, r4, lsl #16
+ orr ip, ip, ip, lsr #16
+ str ip, [r0]
+
+ mov ip, r4, lsr #16
+ orr ip, ip, ip, lsl #16
+ str ip, [r0]
+
+ mov ip, r5, lsl #16
+ orr ip, ip, ip, lsr #16
+ str ip, [r0]
+
+ mov ip, r5, lsr #16
+ orr ip, ip, ip, lsl #16
+ str ip, [r0]
+
+ mov ip, r6, lsl #16
+ orr ip, ip, ip, lsr #16
+ str ip, [r0]
+
+ mov ip, r6, lsr #16
+ orr ip, ip, ip, lsl #16
+ str ip, [r0]
+
+ subs r2, r2, #8
+ bpl .Loutsw_8_lp
+
+ tst r2, #7
+ ldmfdeq sp!, {r4, r5, r6, pc}
+
+.Lno_outsw_8: tst r2, #4
+ beq .Lno_outsw_4
+
+ ldmia r1!, {r3, r4}
+
+ mov ip, r3, lsl #16
+ orr ip, ip, ip, lsr #16
+ str ip, [r0]
+
+ mov ip, r3, lsr #16
+ orr ip, ip, ip, lsl #16
+ str ip, [r0]
+
+ mov ip, r4, lsl #16
+ orr ip, ip, ip, lsr #16
+ str ip, [r0]
+
+ mov ip, r4, lsr #16
+ orr ip, ip, ip, lsl #16
+ str ip, [r0]
+
+.Lno_outsw_4: tst r2, #2
+ beq .Lno_outsw_2
+
+ ldr r3, [r1], #4
+
+ mov ip, r3, lsl #16
+ orr ip, ip, ip, lsr #16
+ str ip, [r0]
+
+ mov ip, r3, lsr #16
+ orr ip, ip, ip, lsl #16
+ str ip, [r0]
+
+.Lno_outsw_2: tst r2, #1
+
+ ldrne r3, [r1]
+
+ movne ip, r3, lsl #16
+ orrne ip, ip, ip, lsr #16
+ strne ip, [r0]
+
+ ldmfd sp!, {r4, r5, r6, pc}
diff --git a/arch/arm/lib/io-writesw-armv4.S b/arch/arm/lib/io-writesw-armv4.S
new file mode 100644
index 000000000..12eec5326
--- /dev/null
+++ b/arch/arm/lib/io-writesw-armv4.S
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/io-writesw-armv4.S
+ *
+ * Copyright (C) 1995-2000 Russell King
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ .macro outword, rd
+#ifndef __ARMEB__
+ strh \rd, [r0]
+ mov \rd, \rd, lsr #16
+ strh \rd, [r0]
+#else
+ mov lr, \rd, lsr #16
+ strh lr, [r0]
+ strh \rd, [r0]
+#endif
+ .endm
+
+.Loutsw_align: movs ip, r1, lsl #31
+ bne .Loutsw_noalign
+
+ ldrh r3, [r1], #2
+ sub r2, r2, #1
+ strh r3, [r0]
+
+ENTRY(__raw_writesw)
+ teq r2, #0
+ reteq lr
+ ands r3, r1, #3
+ bne .Loutsw_align
+
+ stmfd sp!, {r4, r5, lr}
+
+ subs r2, r2, #8
+ bmi .Lno_outsw_8
+
+.Loutsw_8_lp: ldmia r1!, {r3, r4, r5, ip}
+ subs r2, r2, #8
+ outword r3
+ outword r4
+ outword r5
+ outword ip
+ bpl .Loutsw_8_lp
+
+.Lno_outsw_8: tst r2, #4
+ beq .Lno_outsw_4
+
+ ldmia r1!, {r3, ip}
+ outword r3
+ outword ip
+
+.Lno_outsw_4: movs r2, r2, lsl #31
+ bcc .Lno_outsw_2
+
+ ldr r3, [r1], #4
+ outword r3
+
+.Lno_outsw_2: ldrhne r3, [r1]
+ strhne r3, [r0]
+
+ ldmfd sp!, {r4, r5, pc}
+
+#ifdef __ARMEB__
+#define pull_hbyte0 lsl #8
+#define push_hbyte1 lsr #24
+#else
+#define pull_hbyte0 lsr #24
+#define push_hbyte1 lsl #8
+#endif
+
+.Loutsw_noalign:
+ ARM( ldr r3, [r1, -r3]! )
+ THUMB( rsb r3, r3, #0 )
+ THUMB( ldr r3, [r1, r3] )
+ THUMB( sub r1, r3 )
+ subcs r2, r2, #1
+ bcs 2f
+ subs r2, r2, #2
+ bmi 3f
+
+1: mov ip, r3, lsr #8
+ strh ip, [r0]
+2: mov ip, r3, pull_hbyte0
+ ldr r3, [r1, #4]!
+ subs r2, r2, #2
+ orr ip, ip, r3, push_hbyte1
+ strh ip, [r0]
+ bpl 1b
+
+ tst r2, #1
+3: movne ip, r3, lsr #8
+ strhne ip, [r0]
+ ret lr
+ENDPROC(__raw_writesw)
diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S
new file mode 100644
index 000000000..c23f9d9e2
--- /dev/null
+++ b/arch/arm/lib/lib1funcs.S
@@ -0,0 +1,371 @@
+/*
+ * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
+ *
+ * Author: Nicolas Pitre <nico@fluxnic.net>
+ * - contributed to gcc-3.4 on Sep 30, 2003
+ * - adapted for the Linux kernel on Oct 2, 2003
+ */
+
+/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file. (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING. If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/unwind.h>
+
+.macro ARM_DIV_BODY dividend, divisor, result, curbit
+
+#if __LINUX_ARM_ARCH__ >= 5
+
+ clz \curbit, \divisor
+ clz \result, \dividend
+ sub \result, \curbit, \result
+ mov \curbit, #1
+ mov \divisor, \divisor, lsl \result
+ mov \curbit, \curbit, lsl \result
+ mov \result, #0
+
+#else
+
+ @ Initially shift the divisor left 3 bits if possible,
+ @ set curbit accordingly. This allows for curbit to be located
+ @ at the left end of each 4 bit nibbles in the division loop
+ @ to save one loop in most cases.
+ tst \divisor, #0xe0000000
+ moveq \divisor, \divisor, lsl #3
+ moveq \curbit, #8
+ movne \curbit, #1
+
+ @ Unless the divisor is very big, shift it up in multiples of
+ @ four bits, since this is the amount of unwinding in the main
+ @ division loop. Continue shifting until the divisor is
+ @ larger than the dividend.
+1: cmp \divisor, #0x10000000
+ cmplo \divisor, \dividend
+ movlo \divisor, \divisor, lsl #4
+ movlo \curbit, \curbit, lsl #4
+ blo 1b
+
+ @ For very big divisors, we must shift it a bit at a time, or
+ @ we will be in danger of overflowing.
+1: cmp \divisor, #0x80000000
+ cmplo \divisor, \dividend
+ movlo \divisor, \divisor, lsl #1
+ movlo \curbit, \curbit, lsl #1
+ blo 1b
+
+ mov \result, #0
+
+#endif
+
+ @ Division loop
+1: cmp \dividend, \divisor
+ subhs \dividend, \dividend, \divisor
+ orrhs \result, \result, \curbit
+ cmp \dividend, \divisor, lsr #1
+ subhs \dividend, \dividend, \divisor, lsr #1
+ orrhs \result, \result, \curbit, lsr #1
+ cmp \dividend, \divisor, lsr #2
+ subhs \dividend, \dividend, \divisor, lsr #2
+ orrhs \result, \result, \curbit, lsr #2
+ cmp \dividend, \divisor, lsr #3
+ subhs \dividend, \dividend, \divisor, lsr #3
+ orrhs \result, \result, \curbit, lsr #3
+ cmp \dividend, #0 @ Early termination?
+ movsne \curbit, \curbit, lsr #4 @ No, any more bits to do?
+ movne \divisor, \divisor, lsr #4
+ bne 1b
+
+.endm
+
+
+.macro ARM_DIV2_ORDER divisor, order
+
+#if __LINUX_ARM_ARCH__ >= 5
+
+ clz \order, \divisor
+ rsb \order, \order, #31
+
+#else
+
+ cmp \divisor, #(1 << 16)
+ movhs \divisor, \divisor, lsr #16
+ movhs \order, #16
+ movlo \order, #0
+
+ cmp \divisor, #(1 << 8)
+ movhs \divisor, \divisor, lsr #8
+ addhs \order, \order, #8
+
+ cmp \divisor, #(1 << 4)
+ movhs \divisor, \divisor, lsr #4
+ addhs \order, \order, #4
+
+ cmp \divisor, #(1 << 2)
+ addhi \order, \order, #3
+ addls \order, \order, \divisor, lsr #1
+
+#endif
+
+.endm
+
+
+.macro ARM_MOD_BODY dividend, divisor, order, spare
+
+#if __LINUX_ARM_ARCH__ >= 5
+
+ clz \order, \divisor
+ clz \spare, \dividend
+ sub \order, \order, \spare
+ mov \divisor, \divisor, lsl \order
+
+#else
+
+ mov \order, #0
+
+ @ Unless the divisor is very big, shift it up in multiples of
+ @ four bits, since this is the amount of unwinding in the main
+ @ division loop. Continue shifting until the divisor is
+ @ larger than the dividend.
+1: cmp \divisor, #0x10000000
+ cmplo \divisor, \dividend
+ movlo \divisor, \divisor, lsl #4
+ addlo \order, \order, #4
+ blo 1b
+
+ @ For very big divisors, we must shift it a bit at a time, or
+ @ we will be in danger of overflowing.
+1: cmp \divisor, #0x80000000
+ cmplo \divisor, \dividend
+ movlo \divisor, \divisor, lsl #1
+ addlo \order, \order, #1
+ blo 1b
+
+#endif
+
+ @ Perform all needed subtractions to keep only the reminder.
+ @ Do comparisons in batch of 4 first.
+ subs \order, \order, #3 @ yes, 3 is intended here
+ blt 2f
+
+1: cmp \dividend, \divisor
+ subhs \dividend, \dividend, \divisor
+ cmp \dividend, \divisor, lsr #1
+ subhs \dividend, \dividend, \divisor, lsr #1
+ cmp \dividend, \divisor, lsr #2
+ subhs \dividend, \dividend, \divisor, lsr #2
+ cmp \dividend, \divisor, lsr #3
+ subhs \dividend, \dividend, \divisor, lsr #3
+ cmp \dividend, #1
+ mov \divisor, \divisor, lsr #4
+ subsge \order, \order, #4
+ bge 1b
+
+ tst \order, #3
+ teqne \dividend, #0
+ beq 5f
+
+ @ Either 1, 2 or 3 comparison/subtractions are left.
+2: cmn \order, #2
+ blt 4f
+ beq 3f
+ cmp \dividend, \divisor
+ subhs \dividend, \dividend, \divisor
+ mov \divisor, \divisor, lsr #1
+3: cmp \dividend, \divisor
+ subhs \dividend, \dividend, \divisor
+ mov \divisor, \divisor, lsr #1
+4: cmp \dividend, \divisor
+ subhs \dividend, \dividend, \divisor
+5:
+.endm
+
+
+#ifdef CONFIG_ARM_PATCH_IDIV
+ .align 3
+#endif
+
+ENTRY(__udivsi3)
+ENTRY(__aeabi_uidiv)
+UNWIND(.fnstart)
+
+ subs r2, r1, #1
+ reteq lr
+ bcc Ldiv0
+ cmp r0, r1
+ bls 11f
+ tst r1, r2
+ beq 12f
+
+ ARM_DIV_BODY r0, r1, r2, r3
+
+ mov r0, r2
+ ret lr
+
+11: moveq r0, #1
+ movne r0, #0
+ ret lr
+
+12: ARM_DIV2_ORDER r1, r2
+
+ mov r0, r0, lsr r2
+ ret lr
+
+UNWIND(.fnend)
+ENDPROC(__udivsi3)
+ENDPROC(__aeabi_uidiv)
+
+ENTRY(__umodsi3)
+UNWIND(.fnstart)
+
+ subs r2, r1, #1 @ compare divisor with 1
+ bcc Ldiv0
+ cmpne r0, r1 @ compare dividend with divisor
+ moveq r0, #0
+ tsthi r1, r2 @ see if divisor is power of 2
+ andeq r0, r0, r2
+ retls lr
+
+ ARM_MOD_BODY r0, r1, r2, r3
+
+ ret lr
+
+UNWIND(.fnend)
+ENDPROC(__umodsi3)
+
+#ifdef CONFIG_ARM_PATCH_IDIV
+ .align 3
+#endif
+
+ENTRY(__divsi3)
+ENTRY(__aeabi_idiv)
+UNWIND(.fnstart)
+
+ cmp r1, #0
+ eor ip, r0, r1 @ save the sign of the result.
+ beq Ldiv0
+ rsbmi r1, r1, #0 @ loops below use unsigned.
+ subs r2, r1, #1 @ division by 1 or -1 ?
+ beq 10f
+ movs r3, r0
+ rsbmi r3, r0, #0 @ positive dividend value
+ cmp r3, r1
+ bls 11f
+ tst r1, r2 @ divisor is power of 2 ?
+ beq 12f
+
+ ARM_DIV_BODY r3, r1, r0, r2
+
+ cmp ip, #0
+ rsbmi r0, r0, #0
+ ret lr
+
+10: teq ip, r0 @ same sign ?
+ rsbmi r0, r0, #0
+ ret lr
+
+11: movlo r0, #0
+ moveq r0, ip, asr #31
+ orreq r0, r0, #1
+ ret lr
+
+12: ARM_DIV2_ORDER r1, r2
+
+ cmp ip, #0
+ mov r0, r3, lsr r2
+ rsbmi r0, r0, #0
+ ret lr
+
+UNWIND(.fnend)
+ENDPROC(__divsi3)
+ENDPROC(__aeabi_idiv)
+
+ENTRY(__modsi3)
+UNWIND(.fnstart)
+
+ cmp r1, #0
+ beq Ldiv0
+ rsbmi r1, r1, #0 @ loops below use unsigned.
+ movs ip, r0 @ preserve sign of dividend
+ rsbmi r0, r0, #0 @ if negative make positive
+ subs r2, r1, #1 @ compare divisor with 1
+ cmpne r0, r1 @ compare dividend with divisor
+ moveq r0, #0
+ tsthi r1, r2 @ see if divisor is power of 2
+ andeq r0, r0, r2
+ bls 10f
+
+ ARM_MOD_BODY r0, r1, r2, r3
+
+10: cmp ip, #0
+ rsbmi r0, r0, #0
+ ret lr
+
+UNWIND(.fnend)
+ENDPROC(__modsi3)
+
+#ifdef CONFIG_AEABI
+
+ENTRY(__aeabi_uidivmod)
+UNWIND(.fnstart)
+UNWIND(.save {r0, r1, ip, lr} )
+
+ stmfd sp!, {r0, r1, ip, lr}
+ bl __aeabi_uidiv
+ ldmfd sp!, {r1, r2, ip, lr}
+ mul r3, r0, r2
+ sub r1, r1, r3
+ ret lr
+
+UNWIND(.fnend)
+ENDPROC(__aeabi_uidivmod)
+
+ENTRY(__aeabi_idivmod)
+UNWIND(.fnstart)
+UNWIND(.save {r0, r1, ip, lr} )
+ stmfd sp!, {r0, r1, ip, lr}
+ bl __aeabi_idiv
+ ldmfd sp!, {r1, r2, ip, lr}
+ mul r3, r0, r2
+ sub r1, r1, r3
+ ret lr
+
+UNWIND(.fnend)
+ENDPROC(__aeabi_idivmod)
+
+#endif
+
+Ldiv0:
+UNWIND(.fnstart)
+UNWIND(.pad #4)
+UNWIND(.save {lr})
+ str lr, [sp, #-8]!
+ bl __div0
+ mov r0, #0 @ About as wrong as it could be.
+ ldr pc, [sp], #8
+UNWIND(.fnend)
+ENDPROC(Ldiv0)
diff --git a/arch/arm/lib/lshrdi3.S b/arch/arm/lib/lshrdi3.S
new file mode 100644
index 000000000..922dcd88b
--- /dev/null
+++ b/arch/arm/lib/lshrdi3.S
@@ -0,0 +1,54 @@
+/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
+ Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file. (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING. If not, write to
+the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA. */
+
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+#ifdef __ARMEB__
+#define al r1
+#define ah r0
+#else
+#define al r0
+#define ah r1
+#endif
+
+ENTRY(__lshrdi3)
+ENTRY(__aeabi_llsr)
+
+ subs r3, r2, #32
+ rsb ip, r2, #32
+ movmi al, al, lsr r2
+ movpl al, ah, lsr r3
+ ARM( orrmi al, al, ah, lsl ip )
+ THUMB( lslmi r3, ah, ip )
+ THUMB( orrmi al, al, r3 )
+ mov ah, ah, lsr r2
+ ret lr
+
+ENDPROC(__lshrdi3)
+ENDPROC(__aeabi_llsr)
diff --git a/arch/arm/lib/memchr.S b/arch/arm/lib/memchr.S
new file mode 100644
index 000000000..95bedafd0
--- /dev/null
+++ b/arch/arm/lib/memchr.S
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/memchr.S
+ *
+ * Copyright (C) 1995-2000 Russell King
+ *
+ * ASM optimised string functions
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ .text
+ .align 5
+ENTRY(memchr)
+1: subs r2, r2, #1
+ bmi 2f
+ ldrb r3, [r0], #1
+ teq r3, r1
+ bne 1b
+ sub r0, r0, #1
+2: movne r0, #0
+ ret lr
+ENDPROC(memchr)
diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S
new file mode 100644
index 000000000..09a333153
--- /dev/null
+++ b/arch/arm/lib/memcpy.S
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/memcpy.S
+ *
+ * Author: Nicolas Pitre
+ * Created: Sep 28, 2005
+ * Copyright: MontaVista Software, Inc.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/unwind.h>
+
+#define LDR1W_SHIFT 0
+#define STR1W_SHIFT 0
+
+ .macro ldr1w ptr reg abort
+ W(ldr) \reg, [\ptr], #4
+ .endm
+
+ .macro ldr4w ptr reg1 reg2 reg3 reg4 abort
+ ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4}
+ .endm
+
+ .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+ ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
+ .endm
+
+ .macro ldr1b ptr reg cond=al abort
+ ldrb\cond \reg, [\ptr], #1
+ .endm
+
+ .macro str1w ptr reg abort
+ W(str) \reg, [\ptr], #4
+ .endm
+
+ .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+ stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
+ .endm
+
+ .macro str1b ptr reg cond=al abort
+ strb\cond \reg, [\ptr], #1
+ .endm
+
+ .macro enter reg1 reg2
+ stmdb sp!, {r0, \reg1, \reg2}
+ .endm
+
+ .macro usave reg1 reg2
+ UNWIND( .save {r0, \reg1, \reg2} )
+ .endm
+
+ .macro exit reg1 reg2
+ ldmfd sp!, {r0, \reg1, \reg2}
+ .endm
+
+ .text
+
+/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
+
+ENTRY(mmiocpy)
+ENTRY(memcpy)
+
+#include "copy_template.S"
+
+ENDPROC(memcpy)
+ENDPROC(mmiocpy)
diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S
new file mode 100644
index 000000000..b50e5770f
--- /dev/null
+++ b/arch/arm/lib/memmove.S
@@ -0,0 +1,224 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/memmove.S
+ *
+ * Author: Nicolas Pitre
+ * Created: Sep 28, 2005
+ * Copyright: (C) MontaVista Software Inc.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/unwind.h>
+
+ .text
+
+/*
+ * Prototype: void *memmove(void *dest, const void *src, size_t n);
+ *
+ * Note:
+ *
+ * If the memory regions don't overlap, we simply branch to memcpy which is
+ * normally a bit faster. Otherwise the copy is done going downwards. This
+ * is a transposition of the code from copy_template.S but with the copy
+ * occurring in the opposite direction.
+ */
+
+ENTRY(memmove)
+ UNWIND( .fnstart )
+
+ subs ip, r0, r1
+ cmphi r2, ip
+ bls memcpy
+
+ stmfd sp!, {r0, r4, lr}
+ UNWIND( .fnend )
+
+ UNWIND( .fnstart )
+ UNWIND( .save {r0, r4, lr} ) @ in first stmfd block
+ add r1, r1, r2
+ add r0, r0, r2
+ subs r2, r2, #4
+ blt 8f
+ ands ip, r0, #3
+ PLD( pld [r1, #-4] )
+ bne 9f
+ ands ip, r1, #3
+ bne 10f
+
+1: subs r2, r2, #(28)
+ stmfd sp!, {r5 - r8}
+ UNWIND( .fnend )
+
+ UNWIND( .fnstart )
+ UNWIND( .save {r0, r4, lr} )
+ UNWIND( .save {r5 - r8} ) @ in second stmfd block
+ blt 5f
+
+ CALGN( ands ip, r0, #31 )
+ CALGN( sbcsne r4, ip, r2 ) @ C is always set here
+ CALGN( bcs 2f )
+ CALGN( adr r4, 6f )
+ CALGN( subs r2, r2, ip ) @ C is set here
+ CALGN( rsb ip, ip, #32 )
+ CALGN( add pc, r4, ip )
+
+ PLD( pld [r1, #-4] )
+2: PLD( subs r2, r2, #96 )
+ PLD( pld [r1, #-32] )
+ PLD( blt 4f )
+ PLD( pld [r1, #-64] )
+ PLD( pld [r1, #-96] )
+
+3: PLD( pld [r1, #-128] )
+4: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
+ subs r2, r2, #32
+ stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
+ bge 3b
+ PLD( cmn r2, #96 )
+ PLD( bge 4b )
+
+5: ands ip, r2, #28
+ rsb ip, ip, #32
+ addne pc, pc, ip @ C is always clear here
+ b 7f
+6: W(nop)
+ W(ldr) r3, [r1, #-4]!
+ W(ldr) r4, [r1, #-4]!
+ W(ldr) r5, [r1, #-4]!
+ W(ldr) r6, [r1, #-4]!
+ W(ldr) r7, [r1, #-4]!
+ W(ldr) r8, [r1, #-4]!
+ W(ldr) lr, [r1, #-4]!
+
+ add pc, pc, ip
+ nop
+ W(nop)
+ W(str) r3, [r0, #-4]!
+ W(str) r4, [r0, #-4]!
+ W(str) r5, [r0, #-4]!
+ W(str) r6, [r0, #-4]!
+ W(str) r7, [r0, #-4]!
+ W(str) r8, [r0, #-4]!
+ W(str) lr, [r0, #-4]!
+
+ CALGN( bcs 2b )
+
+7: ldmfd sp!, {r5 - r8}
+ UNWIND( .fnend ) @ end of second stmfd block
+
+ UNWIND( .fnstart )
+ UNWIND( .save {r0, r4, lr} ) @ still in first stmfd block
+
+8: movs r2, r2, lsl #31
+ ldrbne r3, [r1, #-1]!
+ ldrbcs r4, [r1, #-1]!
+ ldrbcs ip, [r1, #-1]
+ strbne r3, [r0, #-1]!
+ strbcs r4, [r0, #-1]!
+ strbcs ip, [r0, #-1]
+ ldmfd sp!, {r0, r4, pc}
+
+9: cmp ip, #2
+ ldrbgt r3, [r1, #-1]!
+ ldrbge r4, [r1, #-1]!
+ ldrb lr, [r1, #-1]!
+ strbgt r3, [r0, #-1]!
+ strbge r4, [r0, #-1]!
+ subs r2, r2, ip
+ strb lr, [r0, #-1]!
+ blt 8b
+ ands ip, r1, #3
+ beq 1b
+
+10: bic r1, r1, #3
+ cmp ip, #2
+ ldr r3, [r1, #0]
+ beq 17f
+ blt 18f
+ UNWIND( .fnend )
+
+
+ .macro backward_copy_shift push pull
+
+ UNWIND( .fnstart )
+ UNWIND( .save {r0, r4, lr} ) @ still in first stmfd block
+ subs r2, r2, #28
+ blt 14f
+
+ CALGN( ands ip, r0, #31 )
+ CALGN( sbcsne r4, ip, r2 ) @ C is always set here
+ CALGN( subcc r2, r2, ip )
+ CALGN( bcc 15f )
+
+11: stmfd sp!, {r5 - r9}
+ UNWIND( .fnend )
+
+ UNWIND( .fnstart )
+ UNWIND( .save {r0, r4, lr} )
+ UNWIND( .save {r5 - r9} ) @ in new second stmfd block
+
+ PLD( pld [r1, #-4] )
+ PLD( subs r2, r2, #96 )
+ PLD( pld [r1, #-32] )
+ PLD( blt 13f )
+ PLD( pld [r1, #-64] )
+ PLD( pld [r1, #-96] )
+
+12: PLD( pld [r1, #-128] )
+13: ldmdb r1!, {r7, r8, r9, ip}
+ mov lr, r3, lspush #\push
+ subs r2, r2, #32
+ ldmdb r1!, {r3, r4, r5, r6}
+ orr lr, lr, ip, lspull #\pull
+ mov ip, ip, lspush #\push
+ orr ip, ip, r9, lspull #\pull
+ mov r9, r9, lspush #\push
+ orr r9, r9, r8, lspull #\pull
+ mov r8, r8, lspush #\push
+ orr r8, r8, r7, lspull #\pull
+ mov r7, r7, lspush #\push
+ orr r7, r7, r6, lspull #\pull
+ mov r6, r6, lspush #\push
+ orr r6, r6, r5, lspull #\pull
+ mov r5, r5, lspush #\push
+ orr r5, r5, r4, lspull #\pull
+ mov r4, r4, lspush #\push
+ orr r4, r4, r3, lspull #\pull
+ stmdb r0!, {r4 - r9, ip, lr}
+ bge 12b
+ PLD( cmn r2, #96 )
+ PLD( bge 13b )
+
+ ldmfd sp!, {r5 - r9}
+ UNWIND( .fnend ) @ end of the second stmfd block
+
+ UNWIND( .fnstart )
+ UNWIND( .save {r0, r4, lr} ) @ still in first stmfd block
+
+14: ands ip, r2, #28
+ beq 16f
+
+15: mov lr, r3, lspush #\push
+ ldr r3, [r1, #-4]!
+ subs ip, ip, #4
+ orr lr, lr, r3, lspull #\pull
+ str lr, [r0, #-4]!
+ bgt 15b
+ CALGN( cmp r2, #0 )
+ CALGN( bge 11b )
+
+16: add r1, r1, #(\pull / 8)
+ b 8b
+ UNWIND( .fnend )
+
+ .endm
+
+
+ backward_copy_shift push=8 pull=24
+
+17: backward_copy_shift push=16 pull=16
+
+18: backward_copy_shift push=24 pull=8
+
+ENDPROC(memmove)
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
new file mode 100644
index 000000000..e36d053a8
--- /dev/null
+++ b/arch/arm/lib/memset.S
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/memset.S
+ *
+ * Copyright (C) 1995-2000 Russell King
+ *
+ * ASM optimised string functions
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/unwind.h>
+
+ .text
+ .align 5
+
+ENTRY(mmioset)
+ENTRY(memset)
+UNWIND( .fnstart )
+ and r1, r1, #255 @ cast to unsigned char
+ ands r3, r0, #3 @ 1 unaligned?
+ mov ip, r0 @ preserve r0 as return value
+ bne 6f @ 1
+/*
+ * we know that the pointer in ip is aligned to a word boundary.
+ */
+1: orr r1, r1, r1, lsl #8
+ orr r1, r1, r1, lsl #16
+ mov r3, r1
+7: cmp r2, #16
+ blt 4f
+
+#if ! CALGN(1)+0
+
+/*
+ * We need 2 extra registers for this loop - use r8 and the LR
+ */
+ stmfd sp!, {r8, lr}
+UNWIND( .fnend )
+UNWIND( .fnstart )
+UNWIND( .save {r8, lr} )
+ mov r8, r1
+ mov lr, r3
+
+2: subs r2, r2, #64
+ stmiage ip!, {r1, r3, r8, lr} @ 64 bytes at a time.
+ stmiage ip!, {r1, r3, r8, lr}
+ stmiage ip!, {r1, r3, r8, lr}
+ stmiage ip!, {r1, r3, r8, lr}
+ bgt 2b
+ ldmfdeq sp!, {r8, pc} @ Now <64 bytes to go.
+/*
+ * No need to correct the count; we're only testing bits from now on
+ */
+ tst r2, #32
+ stmiane ip!, {r1, r3, r8, lr}
+ stmiane ip!, {r1, r3, r8, lr}
+ tst r2, #16
+ stmiane ip!, {r1, r3, r8, lr}
+ ldmfd sp!, {r8, lr}
+UNWIND( .fnend )
+
+#else
+
+/*
+ * This version aligns the destination pointer in order to write
+ * whole cache lines at once.
+ */
+
+ stmfd sp!, {r4-r8, lr}
+UNWIND( .fnend )
+UNWIND( .fnstart )
+UNWIND( .save {r4-r8, lr} )
+ mov r4, r1
+ mov r5, r3
+ mov r6, r1
+ mov r7, r3
+ mov r8, r1
+ mov lr, r3
+
+ cmp r2, #96
+ tstgt ip, #31
+ ble 3f
+
+ and r8, ip, #31
+ rsb r8, r8, #32
+ sub r2, r2, r8
+ movs r8, r8, lsl #(32 - 4)
+ stmiacs ip!, {r4, r5, r6, r7}
+ stmiami ip!, {r4, r5}
+ tst r8, #(1 << 30)
+ mov r8, r1
+ strne r1, [ip], #4
+
+3: subs r2, r2, #64
+ stmiage ip!, {r1, r3-r8, lr}
+ stmiage ip!, {r1, r3-r8, lr}
+ bgt 3b
+ ldmfdeq sp!, {r4-r8, pc}
+
+ tst r2, #32
+ stmiane ip!, {r1, r3-r8, lr}
+ tst r2, #16
+ stmiane ip!, {r4-r7}
+ ldmfd sp!, {r4-r8, lr}
+UNWIND( .fnend )
+
+#endif
+
+UNWIND( .fnstart )
+4: tst r2, #8
+ stmiane ip!, {r1, r3}
+ tst r2, #4
+ strne r1, [ip], #4
+/*
+ * When we get here, we've got less than 4 bytes to set. We
+ * may have an unaligned pointer as well.
+ */
+5: tst r2, #2
+ strbne r1, [ip], #1
+ strbne r1, [ip], #1
+ tst r2, #1
+ strbne r1, [ip], #1
+ ret lr
+
+6: subs r2, r2, #4 @ 1 do we have enough
+ blt 5b @ 1 bytes to align with?
+ cmp r3, #2 @ 1
+ strblt r1, [ip], #1 @ 1
+ strble r1, [ip], #1 @ 1
+ strb r1, [ip], #1 @ 1
+ add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3))
+ b 1b
+UNWIND( .fnend )
+ENDPROC(memset)
+ENDPROC(mmioset)
+
+ENTRY(__memset32)
+UNWIND( .fnstart )
+ mov r3, r1 @ copy r1 to r3 and fall into memset64
+UNWIND( .fnend )
+ENDPROC(__memset32)
+ENTRY(__memset64)
+UNWIND( .fnstart )
+ mov ip, r0 @ preserve r0 as return value
+ b 7b @ jump into the middle of memset
+UNWIND( .fnend )
+ENDPROC(__memset64)
diff --git a/arch/arm/lib/muldi3.S b/arch/arm/lib/muldi3.S
new file mode 100644
index 000000000..8362fe6c0
--- /dev/null
+++ b/arch/arm/lib/muldi3.S
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/muldi3.S
+ *
+ * Author: Nicolas Pitre
+ * Created: Oct 19, 2005
+ * Copyright: Monta Vista Software, Inc.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+#ifdef __ARMEB__
+#define xh r0
+#define xl r1
+#define yh r2
+#define yl r3
+#else
+#define xl r0
+#define xh r1
+#define yl r2
+#define yh r3
+#endif
+
+ENTRY(__muldi3)
+ENTRY(__aeabi_lmul)
+
+ mul xh, yl, xh
+ mla xh, xl, yh, xh
+ mov ip, xl, lsr #16
+ mov yh, yl, lsr #16
+ bic xl, xl, ip, lsl #16
+ bic yl, yl, yh, lsl #16
+ mla xh, yh, ip, xh
+ mul yh, xl, yh
+ mul xl, yl, xl
+ mul ip, yl, ip
+ adds xl, xl, yh, lsl #16
+ adc xh, xh, yh, lsr #16
+ adds xl, xl, ip, lsl #16
+ adc xh, xh, ip, lsr #16
+ ret lr
+
+ENDPROC(__muldi3)
+ENDPROC(__aeabi_lmul)
diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S
new file mode 100644
index 000000000..bdd8836dc
--- /dev/null
+++ b/arch/arm/lib/putuser.S
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/putuser.S
+ *
+ * Copyright (C) 2001 Russell King
+ *
+ * Idea from x86 version, (C) Copyright 1998 Linus Torvalds
+ *
+ * These functions have a non-standard call interface to make
+ * them more efficient, especially as they return an error
+ * value in addition to the "real" return value.
+ *
+ * __put_user_X
+ *
+ * Inputs: r0 contains the address
+ * r1 contains the address limit, which must be preserved
+ * r2, r3 contains the value
+ * Outputs: r0 is the error code
+ * lr corrupted
+ *
+ * No other registers must be altered. (see <asm/uaccess.h>
+ * for specific ASM register usage).
+ *
+ * Note that ADDR_LIMIT is either 0 or 0xc0000000
+ * Note also that it is intended that __put_user_bad is not global.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/errno.h>
+#include <asm/domain.h>
+
+ENTRY(__put_user_1)
+ check_uaccess r0, 1, r1, ip, __put_user_bad
+1: TUSER(strb) r2, [r0]
+ mov r0, #0
+ ret lr
+ENDPROC(__put_user_1)
+
+ENTRY(__put_user_2)
+ check_uaccess r0, 2, r1, ip, __put_user_bad
+#if __LINUX_ARM_ARCH__ >= 6
+
+2: TUSER(strh) r2, [r0]
+
+#else
+
+ mov ip, r2, lsr #8
+#ifndef __ARMEB__
+2: TUSER(strb) r2, [r0], #1
+3: TUSER(strb) ip, [r0]
+#else
+2: TUSER(strb) ip, [r0], #1
+3: TUSER(strb) r2, [r0]
+#endif
+
+#endif /* __LINUX_ARM_ARCH__ >= 6 */
+ mov r0, #0
+ ret lr
+ENDPROC(__put_user_2)
+
+ENTRY(__put_user_4)
+ check_uaccess r0, 4, r1, ip, __put_user_bad
+4: TUSER(str) r2, [r0]
+ mov r0, #0
+ ret lr
+ENDPROC(__put_user_4)
+
+ENTRY(__put_user_8)
+ check_uaccess r0, 8, r1, ip, __put_user_bad
+#ifdef CONFIG_THUMB2_KERNEL
+5: TUSER(str) r2, [r0]
+6: TUSER(str) r3, [r0, #4]
+#else
+5: TUSER(str) r2, [r0], #4
+6: TUSER(str) r3, [r0]
+#endif
+ mov r0, #0
+ ret lr
+ENDPROC(__put_user_8)
+
+__put_user_bad:
+ mov r0, #-EFAULT
+ ret lr
+ENDPROC(__put_user_bad)
+
+.pushsection __ex_table, "a"
+ .long 1b, __put_user_bad
+ .long 2b, __put_user_bad
+#if __LINUX_ARM_ARCH__ < 6
+ .long 3b, __put_user_bad
+#endif
+ .long 4b, __put_user_bad
+ .long 5b, __put_user_bad
+ .long 6b, __put_user_bad
+.popsection
diff --git a/arch/arm/lib/setbit.S b/arch/arm/lib/setbit.S
new file mode 100644
index 000000000..19a96f43f
--- /dev/null
+++ b/arch/arm/lib/setbit.S
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/setbit.S
+ *
+ * Copyright (C) 1995-1996 Russell King
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include "bitops.h"
+ .text
+
+bitop _set_bit, orr
diff --git a/arch/arm/lib/strchr.S b/arch/arm/lib/strchr.S
new file mode 100644
index 000000000..09e2cc8a8
--- /dev/null
+++ b/arch/arm/lib/strchr.S
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/strchr.S
+ *
+ * Copyright (C) 1995-2000 Russell King
+ *
+ * ASM optimised string functions
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ .text
+ .align 5
+ENTRY(strchr)
+ and r1, r1, #0xff
+1: ldrb r2, [r0], #1
+ teq r2, r1
+ teqne r2, #0
+ bne 1b
+ teq r2, r1
+ movne r0, #0
+ subeq r0, r0, #1
+ ret lr
+ENDPROC(strchr)
diff --git a/arch/arm/lib/strrchr.S b/arch/arm/lib/strrchr.S
new file mode 100644
index 000000000..5e87247d1
--- /dev/null
+++ b/arch/arm/lib/strrchr.S
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/strrchr.S
+ *
+ * Copyright (C) 1995-2000 Russell King
+ *
+ * ASM optimised string functions
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ .text
+ .align 5
+ENTRY(strrchr)
+ mov r3, #0
+1: ldrb r2, [r0], #1
+ teq r2, r1
+ subeq r3, r0, #1
+ teq r2, #0
+ bne 1b
+ mov r0, r3
+ ret lr
+ENDPROC(strrchr)
diff --git a/arch/arm/lib/testchangebit.S b/arch/arm/lib/testchangebit.S
new file mode 100644
index 000000000..4ebecc67e
--- /dev/null
+++ b/arch/arm/lib/testchangebit.S
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/testchangebit.S
+ *
+ * Copyright (C) 1995-1996 Russell King
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include "bitops.h"
+ .text
+
+testop _test_and_change_bit, eor, str
diff --git a/arch/arm/lib/testclearbit.S b/arch/arm/lib/testclearbit.S
new file mode 100644
index 000000000..009afa0f5
--- /dev/null
+++ b/arch/arm/lib/testclearbit.S
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/testclearbit.S
+ *
+ * Copyright (C) 1995-1996 Russell King
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include "bitops.h"
+ .text
+
+testop _test_and_clear_bit, bicne, strne
diff --git a/arch/arm/lib/testsetbit.S b/arch/arm/lib/testsetbit.S
new file mode 100644
index 000000000..f3192e55a
--- /dev/null
+++ b/arch/arm/lib/testsetbit.S
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/testsetbit.S
+ *
+ * Copyright (C) 1995-1996 Russell King
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include "bitops.h"
+ .text
+
+testop _test_and_set_bit, orreq, streq
diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c
new file mode 100644
index 000000000..106f83a5e
--- /dev/null
+++ b/arch/arm/lib/uaccess_with_memcpy.c
@@ -0,0 +1,289 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * linux/arch/arm/lib/uaccess_with_memcpy.c
+ *
+ * Written by: Lennert Buytenhek and Nicolas Pitre
+ * Copyright (C) 2009 Marvell Semiconductor
+ */
+
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/uaccess.h>
+#include <linux/rwsem.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/hardirq.h> /* for in_atomic() */
+#include <linux/gfp.h>
+#include <linux/highmem.h>
+#include <linux/hugetlb.h>
+#include <asm/current.h>
+#include <asm/page.h>
+
+static int
+pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
+{
+ unsigned long addr = (unsigned long)_addr;
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pmd_t *pmd;
+ pte_t *pte;
+ pud_t *pud;
+ spinlock_t *ptl;
+
+ pgd = pgd_offset(current->mm, addr);
+ if (unlikely(pgd_none(*pgd) || pgd_bad(*pgd)))
+ return 0;
+
+ p4d = p4d_offset(pgd, addr);
+ if (unlikely(p4d_none(*p4d) || p4d_bad(*p4d)))
+ return 0;
+
+ pud = pud_offset(p4d, addr);
+ if (unlikely(pud_none(*pud) || pud_bad(*pud)))
+ return 0;
+
+ pmd = pmd_offset(pud, addr);
+ if (unlikely(pmd_none(*pmd)))
+ return 0;
+
+ /*
+ * A pmd can be bad if it refers to a HugeTLB or THP page.
+ *
+ * Both THP and HugeTLB pages have the same pmd layout
+ * and should not be manipulated by the pte functions.
+ *
+ * Lock the page table for the destination and check
+ * to see that it's still huge and whether or not we will
+ * need to fault on write.
+ */
+ if (unlikely(pmd_thp_or_huge(*pmd))) {
+ ptl = &current->mm->page_table_lock;
+ spin_lock(ptl);
+ if (unlikely(!pmd_thp_or_huge(*pmd)
+ || pmd_hugewillfault(*pmd))) {
+ spin_unlock(ptl);
+ return 0;
+ }
+
+ *ptep = NULL;
+ *ptlp = ptl;
+ return 1;
+ }
+
+ if (unlikely(pmd_bad(*pmd)))
+ return 0;
+
+ pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl);
+ if (unlikely(!pte_present(*pte) || !pte_young(*pte) ||
+ !pte_write(*pte) || !pte_dirty(*pte))) {
+ pte_unmap_unlock(pte, ptl);
+ return 0;
+ }
+
+ *ptep = pte;
+ *ptlp = ptl;
+
+ return 1;
+}
+
+static unsigned long noinline
+__copy_to_user_memcpy(void __user *to, const void *from, unsigned long n)
+{
+ unsigned long ua_flags;
+ int atomic;
+
+ if (uaccess_kernel()) {
+ memcpy((void *)to, from, n);
+ return 0;
+ }
+
+ /* the mmap semaphore is taken only if not in an atomic context */
+ atomic = faulthandler_disabled();
+
+ if (!atomic)
+ mmap_read_lock(current->mm);
+ while (n) {
+ pte_t *pte;
+ spinlock_t *ptl;
+ int tocopy;
+
+ while (!pin_page_for_write(to, &pte, &ptl)) {
+ if (!atomic)
+ mmap_read_unlock(current->mm);
+ if (__put_user(0, (char __user *)to))
+ goto out;
+ if (!atomic)
+ mmap_read_lock(current->mm);
+ }
+
+ tocopy = (~(unsigned long)to & ~PAGE_MASK) + 1;
+ if (tocopy > n)
+ tocopy = n;
+
+ ua_flags = uaccess_save_and_enable();
+ memcpy((void *)to, from, tocopy);
+ uaccess_restore(ua_flags);
+ to += tocopy;
+ from += tocopy;
+ n -= tocopy;
+
+ if (pte)
+ pte_unmap_unlock(pte, ptl);
+ else
+ spin_unlock(ptl);
+ }
+ if (!atomic)
+ mmap_read_unlock(current->mm);
+
+out:
+ return n;
+}
+
+unsigned long
+arm_copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+ /*
+ * This test is stubbed out of the main function above to keep
+ * the overhead for small copies low by avoiding a large
+ * register dump on the stack just to reload them right away.
+ * With frame pointer disabled, tail call optimization kicks in
+ * as well making this test almost invisible.
+ */
+ if (n < 64) {
+ unsigned long ua_flags = uaccess_save_and_enable();
+ n = __copy_to_user_std(to, from, n);
+ uaccess_restore(ua_flags);
+ } else {
+ n = __copy_to_user_memcpy(uaccess_mask_range_ptr(to, n),
+ from, n);
+ }
+ return n;
+}
+
+static unsigned long noinline
+__clear_user_memset(void __user *addr, unsigned long n)
+{
+ unsigned long ua_flags;
+
+ if (uaccess_kernel()) {
+ memset((void *)addr, 0, n);
+ return 0;
+ }
+
+ mmap_read_lock(current->mm);
+ while (n) {
+ pte_t *pte;
+ spinlock_t *ptl;
+ int tocopy;
+
+ while (!pin_page_for_write(addr, &pte, &ptl)) {
+ mmap_read_unlock(current->mm);
+ if (__put_user(0, (char __user *)addr))
+ goto out;
+ mmap_read_lock(current->mm);
+ }
+
+ tocopy = (~(unsigned long)addr & ~PAGE_MASK) + 1;
+ if (tocopy > n)
+ tocopy = n;
+
+ ua_flags = uaccess_save_and_enable();
+ memset((void *)addr, 0, tocopy);
+ uaccess_restore(ua_flags);
+ addr += tocopy;
+ n -= tocopy;
+
+ if (pte)
+ pte_unmap_unlock(pte, ptl);
+ else
+ spin_unlock(ptl);
+ }
+ mmap_read_unlock(current->mm);
+
+out:
+ return n;
+}
+
+unsigned long arm_clear_user(void __user *addr, unsigned long n)
+{
+ /* See rational for this in __copy_to_user() above. */
+ if (n < 64) {
+ unsigned long ua_flags = uaccess_save_and_enable();
+ n = __clear_user_std(addr, n);
+ uaccess_restore(ua_flags);
+ } else {
+ n = __clear_user_memset(addr, n);
+ }
+ return n;
+}
+
+#if 0
+
+/*
+ * This code is disabled by default, but kept around in case the chosen
+ * thresholds need to be revalidated. Some overhead (small but still)
+ * would be implied by a runtime determined variable threshold, and
+ * so far the measurement on concerned targets didn't show a worthwhile
+ * variation.
+ *
+ * Note that a fairly precise sched_clock() implementation is needed
+ * for results to make some sense.
+ */
+
+#include <linux/vmalloc.h>
+
+static int __init test_size_treshold(void)
+{
+ struct page *src_page, *dst_page;
+ void *user_ptr, *kernel_ptr;
+ unsigned long long t0, t1, t2;
+ int size, ret;
+
+ ret = -ENOMEM;
+ src_page = alloc_page(GFP_KERNEL);
+ if (!src_page)
+ goto no_src;
+ dst_page = alloc_page(GFP_KERNEL);
+ if (!dst_page)
+ goto no_dst;
+ kernel_ptr = page_address(src_page);
+ user_ptr = vmap(&dst_page, 1, VM_IOREMAP, __pgprot(__P010));
+ if (!user_ptr)
+ goto no_vmap;
+
+ /* warm up the src page dcache */
+ ret = __copy_to_user_memcpy(user_ptr, kernel_ptr, PAGE_SIZE);
+
+ for (size = PAGE_SIZE; size >= 4; size /= 2) {
+ t0 = sched_clock();
+ ret |= __copy_to_user_memcpy(user_ptr, kernel_ptr, size);
+ t1 = sched_clock();
+ ret |= __copy_to_user_std(user_ptr, kernel_ptr, size);
+ t2 = sched_clock();
+ printk("copy_to_user: %d %llu %llu\n", size, t1 - t0, t2 - t1);
+ }
+
+ for (size = PAGE_SIZE; size >= 4; size /= 2) {
+ t0 = sched_clock();
+ ret |= __clear_user_memset(user_ptr, size);
+ t1 = sched_clock();
+ ret |= __clear_user_std(user_ptr, size);
+ t2 = sched_clock();
+ printk("clear_user: %d %llu %llu\n", size, t1 - t0, t2 - t1);
+ }
+
+ if (ret)
+ ret = -EFAULT;
+
+ vunmap(user_ptr);
+no_vmap:
+ put_page(dst_page);
+no_dst:
+ put_page(src_page);
+no_src:
+ return ret;
+}
+
+subsys_initcall(test_size_treshold);
+
+#endif
diff --git a/arch/arm/lib/ucmpdi2.S b/arch/arm/lib/ucmpdi2.S
new file mode 100644
index 000000000..679e16a21
--- /dev/null
+++ b/arch/arm/lib/ucmpdi2.S
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/arch/arm/lib/ucmpdi2.S
+ *
+ * Author: Nicolas Pitre
+ * Created: Oct 19, 2005
+ * Copyright: Monta Vista Software, Inc.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+#ifdef __ARMEB__
+#define xh r0
+#define xl r1
+#define yh r2
+#define yl r3
+#else
+#define xl r0
+#define xh r1
+#define yl r2
+#define yh r3
+#endif
+
+ENTRY(__ucmpdi2)
+
+ cmp xh, yh
+ cmpeq xl, yl
+ movlo r0, #0
+ moveq r0, #1
+ movhi r0, #2
+ ret lr
+
+ENDPROC(__ucmpdi2)
+
+#ifdef CONFIG_AEABI
+
+ENTRY(__aeabi_ulcmp)
+
+ cmp xh, yh
+ cmpeq xl, yl
+ movlo r0, #-1
+ moveq r0, #0
+ movhi r0, #1
+ ret lr
+
+ENDPROC(__aeabi_ulcmp)
+
+#endif
+
diff --git a/arch/arm/lib/xor-neon.c b/arch/arm/lib/xor-neon.c
new file mode 100644
index 000000000..7ba6cf826
--- /dev/null
+++ b/arch/arm/lib/xor-neon.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * linux/arch/arm/lib/xor-neon.c
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ */
+
+#include <linux/raid/xor.h>
+#include <linux/module.h>
+
+MODULE_LICENSE("GPL");
+
+#ifndef __ARM_NEON__
+#error You should compile this file with '-march=armv7-a -mfloat-abi=softfp -mfpu=neon'
+#endif
+
+/*
+ * Pull in the reference implementations while instructing GCC (through
+ * -ftree-vectorize) to attempt to exploit implicit parallelism and emit
+ * NEON instructions.
+ */
+#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
+#pragma GCC optimize "tree-vectorize"
+#else
+/*
+ * While older versions of GCC do not generate incorrect code, they fail to
+ * recognize the parallel nature of these functions, and emit plain ARM code,
+ * which is known to be slower than the optimized ARM code in asm-arm/xor.h.
+ *
+ * #warning This code requires at least version 4.6 of GCC
+ */
+#endif
+
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#include <asm-generic/xor.h>
+
+struct xor_block_template const xor_block_neon_inner = {
+ .name = "__inner_neon__",
+ .do_2 = xor_8regs_2,
+ .do_3 = xor_8regs_3,
+ .do_4 = xor_8regs_4,
+ .do_5 = xor_8regs_5,
+};
+EXPORT_SYMBOL(xor_block_neon_inner);