summaryrefslogtreecommitdiffstats
path: root/arch/arc/lib/strcpy-700.S
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-11 08:27:49 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-11 08:27:49 +0000
commitace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch)
treeb2d64bc10158fdd5497876388cd68142ca374ed3 /arch/arc/lib/strcpy-700.S
parentInitial commit. (diff)
downloadlinux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz
linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip
Adding upstream version 6.6.15.upstream/6.6.15
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'arch/arc/lib/strcpy-700.S')
-rw-r--r--arch/arc/lib/strcpy-700.S67
1 files changed, 67 insertions, 0 deletions
diff --git a/arch/arc/lib/strcpy-700.S b/arch/arc/lib/strcpy-700.S
new file mode 100644
index 0000000000..6e2294d13e
--- /dev/null
+++ b/arch/arc/lib/strcpy-700.S
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ */
+
+/* If dst and src are 4 byte aligned, copy 8 bytes at a time.
+ If the src is 4, but not 8 byte aligned, we first read 4 bytes to get
+ it 8 byte aligned. Thus, we can do a little read-ahead, without
+ dereferencing a cache line that we should not touch.
+ Note that short and long instructions have been scheduled to avoid
+ branch stalls.
+ The beq_s to r3z could be made unaligned & long to avoid a stall
+ there, but the it is not likely to be taken often, and it
+ would also be likey to cost an unaligned mispredict at the next call. */
+
+#include <linux/linkage.h>
+
+ENTRY_CFI(strcpy)
+ or r2,r0,r1
+ bmsk_s r2,r2,1
+ brne.d r2,0,charloop
+ mov_s r10,r0
+ ld_s r3,[r1,0]
+ mov r8,0x01010101
+ bbit0.d r1,2,loop_start
+ ror r12,r8
+ sub r2,r3,r8
+ bic_s r2,r2,r3
+ tst_s r2,r12
+ bne r3z
+ mov_s r4,r3
+ .balign 4
+loop:
+ ld.a r3,[r1,4]
+ st.ab r4,[r10,4]
+loop_start:
+ ld.a r4,[r1,4]
+ sub r2,r3,r8
+ bic_s r2,r2,r3
+ tst_s r2,r12
+ bne_s r3z
+ st.ab r3,[r10,4]
+ sub r2,r4,r8
+ bic r2,r2,r4
+ tst r2,r12
+ beq loop
+ mov_s r3,r4
+#ifdef __LITTLE_ENDIAN__
+r3z: bmsk.f r1,r3,7
+ lsr_s r3,r3,8
+#else
+r3z: lsr.f r1,r3,24
+ asl_s r3,r3,8
+#endif
+ bne.d r3z
+ stb.ab r1,[r10,1]
+ j_s [blink]
+
+ .balign 4
+charloop:
+ ldb.ab r3,[r1,1]
+
+
+ brne.d r3,0,charloop
+ stb.ab r3,[r10,1]
+ j [blink]
+END_CFI(strcpy)