summaryrefslogtreecommitdiffstats
path: root/src/internal/bytealg/compare_riscv64.s
diff options
context:
space:
mode:
Diffstat (limited to 'src/internal/bytealg/compare_riscv64.s')
-rw-r--r--src/internal/bytealg/compare_riscv64.s222
1 files changed, 222 insertions, 0 deletions
diff --git a/src/internal/bytealg/compare_riscv64.s b/src/internal/bytealg/compare_riscv64.s
new file mode 100644
index 0000000..a4164a2
--- /dev/null
+++ b/src/internal/bytealg/compare_riscv64.s
@@ -0,0 +1,222 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56
+ // X10 = a_base
+ // X11 = a_len
+ // X12 = a_cap (unused)
+ // X13 = b_base (want in X12)
+ // X14 = b_len (want in X13)
+ // X15 = b_cap (unused)
+ MOV X13, X12
+ MOV X14, X13
+ JMP compare<>(SB)
+
+TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
+ // X10 = a_base
+ // X11 = a_len
+ // X12 = b_base
+ // X13 = b_len
+ JMP compare<>(SB)
+
+// On entry:
+// X10 points to start of a
+// X11 length of a
+// X12 points to start of b
+// X13 length of b
+// for non-regabi X14 points to the address to store the return value (-1/0/1)
+// for regabi the return value in X10
+TEXT compare<>(SB),NOSPLIT|NOFRAME,$0
+ BEQ X10, X12, cmp_len
+
+ MOV X11, X5
+ BGE X13, X5, use_a_len // X5 = min(len(a), len(b))
+ MOV X13, X5
+use_a_len:
+ BEQZ X5, cmp_len
+
+ MOV $32, X6
+ BLT X5, X6, check8_unaligned
+
+ // Check alignment - if alignment differs we have to do one byte at a time.
+ AND $7, X10, X7
+ AND $7, X12, X8
+ BNE X7, X8, check8_unaligned
+ BEQZ X7, compare32
+
+ // Check one byte at a time until we reach 8 byte alignment.
+ SUB X7, X0, X7
+ ADD $8, X7, X7
+ SUB X7, X5, X5
+align:
+ ADD $-1, X7
+ MOVBU 0(X10), X8
+ MOVBU 0(X12), X9
+ BNE X8, X9, cmp
+ ADD $1, X10
+ ADD $1, X12
+ BNEZ X7, align
+
+check32:
+ // X6 contains $32
+ BLT X5, X6, compare16
+compare32:
+ MOV 0(X10), X15
+ MOV 0(X12), X16
+ MOV 8(X10), X17
+ MOV 8(X12), X18
+ BNE X15, X16, cmp8a
+ BNE X17, X18, cmp8b
+ MOV 16(X10), X15
+ MOV 16(X12), X16
+ MOV 24(X10), X17
+ MOV 24(X12), X18
+ BNE X15, X16, cmp8a
+ BNE X17, X18, cmp8b
+ ADD $32, X10
+ ADD $32, X12
+ ADD $-32, X5
+ BGE X5, X6, compare32
+ BEQZ X5, cmp_len
+
+check16:
+ MOV $16, X6
+ BLT X5, X6, check8_unaligned
+compare16:
+ MOV 0(X10), X15
+ MOV 0(X12), X16
+ MOV 8(X10), X17
+ MOV 8(X12), X18
+ BNE X15, X16, cmp8a
+ BNE X17, X18, cmp8b
+ ADD $16, X10
+ ADD $16, X12
+ ADD $-16, X5
+ BEQZ X5, cmp_len
+
+check8_unaligned:
+ MOV $8, X6
+ BLT X5, X6, check4_unaligned
+compare8_unaligned:
+ MOVBU 0(X10), X8
+ MOVBU 1(X10), X15
+ MOVBU 2(X10), X17
+ MOVBU 3(X10), X19
+ MOVBU 4(X10), X21
+ MOVBU 5(X10), X23
+ MOVBU 6(X10), X25
+ MOVBU 7(X10), X29
+ MOVBU 0(X12), X9
+ MOVBU 1(X12), X16
+ MOVBU 2(X12), X18
+ MOVBU 3(X12), X20
+ MOVBU 4(X12), X22
+ MOVBU 5(X12), X24
+ MOVBU 6(X12), X28
+ MOVBU 7(X12), X30
+ BNE X8, X9, cmp1a
+ BNE X15, X16, cmp1b
+ BNE X17, X18, cmp1c
+ BNE X19, X20, cmp1d
+ BNE X21, X22, cmp1e
+ BNE X23, X24, cmp1f
+ BNE X25, X28, cmp1g
+ BNE X29, X30, cmp1h
+ ADD $8, X10
+ ADD $8, X12
+ ADD $-8, X5
+ BGE X5, X6, compare8_unaligned
+ BEQZ X5, cmp_len
+
+check4_unaligned:
+ MOV $4, X6
+ BLT X5, X6, compare1
+compare4_unaligned:
+ MOVBU 0(X10), X8
+ MOVBU 1(X10), X15
+ MOVBU 2(X10), X17
+ MOVBU 3(X10), X19
+ MOVBU 0(X12), X9
+ MOVBU 1(X12), X16
+ MOVBU 2(X12), X18
+ MOVBU 3(X12), X20
+ BNE X8, X9, cmp1a
+ BNE X15, X16, cmp1b
+ BNE X17, X18, cmp1c
+ BNE X19, X20, cmp1d
+ ADD $4, X10
+ ADD $4, X12
+ ADD $-4, X5
+ BGE X5, X6, compare4_unaligned
+
+compare1:
+ BEQZ X5, cmp_len
+ MOVBU 0(X10), X8
+ MOVBU 0(X12), X9
+ BNE X8, X9, cmp
+ ADD $1, X10
+ ADD $1, X12
+ ADD $-1, X5
+ JMP compare1
+
+ // Compare 8 bytes of memory in X15/X16 that are known to differ.
+cmp8a:
+ MOV X15, X17
+ MOV X16, X18
+
+ // Compare 8 bytes of memory in X17/X18 that are known to differ.
+cmp8b:
+ MOV $0xff, X19
+cmp8_loop:
+ AND X17, X19, X8
+ AND X18, X19, X9
+ BNE X8, X9, cmp
+ SLLI $8, X19
+ JMP cmp8_loop
+
+cmp1a:
+ SLTU X9, X8, X5
+ SLTU X8, X9, X6
+ JMP cmp_ret
+cmp1b:
+ SLTU X16, X15, X5
+ SLTU X15, X16, X6
+ JMP cmp_ret
+cmp1c:
+ SLTU X18, X17, X5
+ SLTU X17, X18, X6
+ JMP cmp_ret
+cmp1d:
+ SLTU X20, X19, X5
+ SLTU X19, X20, X6
+ JMP cmp_ret
+cmp1e:
+ SLTU X22, X21, X5
+ SLTU X21, X22, X6
+ JMP cmp_ret
+cmp1f:
+ SLTU X24, X23, X5
+ SLTU X23, X24, X6
+ JMP cmp_ret
+cmp1g:
+ SLTU X28, X25, X5
+ SLTU X25, X28, X6
+ JMP cmp_ret
+cmp1h:
+ SLTU X30, X29, X5
+ SLTU X29, X30, X6
+ JMP cmp_ret
+
+cmp_len:
+ MOV X11, X8
+ MOV X13, X9
+cmp:
+ SLTU X9, X8, X5
+ SLTU X8, X9, X6
+cmp_ret:
+ SUB X5, X6, X10
+ RET