summaryrefslogtreecommitdiffstats
path: root/arch/alpha/lib/strlen.S
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--arch/alpha/lib/strlen.S59
1 files changed, 59 insertions, 0 deletions
diff --git a/arch/alpha/lib/strlen.S b/arch/alpha/lib/strlen.S
new file mode 100644
index 000000000..dd882fe4d
--- /dev/null
+++ b/arch/alpha/lib/strlen.S
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * strlen.S (c) 1995 David Mosberger (davidm@cs.arizona.edu)
+ *
+ * Finds length of a 0-terminated string. Optimized for the
+ * Alpha architecture:
+ *
+ * - memory accessed as aligned quadwords only
+ * - uses bcmpge to compare 8 bytes in parallel
+ * - does binary search to find 0 byte in last
+ * quadword (HAKMEM needed 12 instructions to
+ * do this instead of the 9 instructions that
+ * binary search needs).
+ */
+#include <asm/export.h>
+ .set noreorder
+ .set noat
+
+ .align 3
+
+ .globl strlen
+ .ent strlen
+
+strlen:
+ ldq_u $1, 0($16) # load first quadword ($16 may be misaligned)
+ lda $2, -1($31)
+ insqh $2, $16, $2
+ andnot $16, 7, $0
+ or $2, $1, $1
+ cmpbge $31, $1, $2 # $2 <- bitmask: bit i == 1 <==> i-th byte == 0
+ bne $2, found
+
+loop: ldq $1, 8($0)
+ addq $0, 8, $0 # addr += 8
+ nop # helps dual issue last two insns
+ cmpbge $31, $1, $2
+ beq $2, loop
+
+found: blbs $2, done # make aligned case fast
+ negq $2, $3
+ and $2, $3, $2
+
+ and $2, 0x0f, $1
+ addq $0, 4, $3
+ cmoveq $1, $3, $0
+
+ and $2, 0x33, $1
+ addq $0, 2, $3
+ cmoveq $1, $3, $0
+
+ and $2, 0x55, $1
+ addq $0, 1, $3
+ cmoveq $1, $3, $0
+
+done: subq $0, $16, $0
+ ret $31, ($26)
+
+ .end strlen
+ EXPORT_SYMBOL(strlen)