summaryrefslogtreecommitdiffstats
path: root/src/internal/bytealg/equal_ppc64x.s
diff options
context:
space:
mode:
Diffstat (limited to 'src/internal/bytealg/equal_ppc64x.s')
-rw-r--r--src/internal/bytealg/equal_ppc64x.s102
1 files changed, 102 insertions, 0 deletions
diff --git a/src/internal/bytealg/equal_ppc64x.s b/src/internal/bytealg/equal_ppc64x.s
new file mode 100644
index 0000000..18171ea
--- /dev/null
+++ b/src/internal/bytealg/equal_ppc64x.s
@@ -0,0 +1,102 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ppc64 ppc64le
+
+#include "go_asm.h"
+#include "textflag.h"
+
+// memequal(a, b unsafe.Pointer, size uintptr) bool
+TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
+ MOVD a+0(FP), R3
+ MOVD b+8(FP), R4
+ MOVD size+16(FP), R5
+ MOVD $ret+24(FP), R10
+
+ BR memeqbody<>(SB)
+
+// memequal_varlen(a, b unsafe.Pointer) bool
+TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17
+ MOVD a+0(FP), R3
+ MOVD b+8(FP), R4
+ CMP R3, R4
+ BEQ eq
+ MOVD 8(R11), R5 // compiler stores size at offset 8 in the closure
+ MOVD $ret+16(FP), R10
+ BR memeqbody<>(SB)
+eq:
+ MOVD $1, R3
+ MOVB R3, ret+16(FP)
+ RET
+
+// Do an efficient memequal for ppc64
+// R3 = s1
+// R4 = s2
+// R5 = len
+// R10 = addr of return value (byte)
+TEXT memeqbody<>(SB),NOSPLIT|NOFRAME,$0-0
+ MOVD R5,CTR
+ CMP R5,$8 // only optimize >=8
+ BLT simplecheck
+ DCBT (R3) // cache hint
+ DCBT (R4)
+ CMP R5,$32 // optimize >= 32
+ MOVD R5,R6 // needed if setup8a branch
+ BLT setup8a // 8 byte moves only
+setup32a: // 8 byte aligned, >= 32 bytes
+ SRADCC $5,R5,R6 // number of 32 byte chunks to compare
+ MOVD R6,CTR
+ MOVD $16,R14 // index for VSX loads and stores
+loop32a:
+ LXVD2X (R3+R0), VS32 // VS32 = V0
+ LXVD2X (R4+R0), VS33 // VS33 = V1
+ VCMPEQUBCC V0, V1, V2 // compare, setting CR6
+ BGE CR6, noteq
+ LXVD2X (R3+R14), VS32
+ LXVD2X (R4+R14), VS33
+ VCMPEQUBCC V0, V1, V2
+ BGE CR6, noteq
+ ADD $32,R3 // bump up to next 32
+ ADD $32,R4
+ BC 16, 0, loop32a // br ctr and cr
+ ANDCC $24,R5,R6 // Any 8 byte chunks?
+ BEQ leftover // and result is 0
+setup8a:
+ SRADCC $3,R6,R6 // get the 8 byte count
+ BEQ leftover // shifted value is 0
+ MOVD R6,CTR
+loop8:
+ MOVD 0(R3),R6 // doublewords to compare
+ ADD $8,R3
+ MOVD 0(R4),R7
+ ADD $8,R4
+ CMP R6,R7 // match?
+ BC 8,2,loop8 // bt ctr <> 0 && cr
+ BNE noteq
+leftover:
+ ANDCC $7,R5,R6 // check for leftover bytes
+ BEQ equal
+ MOVD R6,CTR
+ BR simple
+simplecheck:
+ CMP R5,$0
+ BEQ equal
+simple:
+ MOVBZ 0(R3), R6
+ ADD $1,R3
+ MOVBZ 0(R4), R7
+ ADD $1,R4
+ CMP R6, R7
+ BNE noteq
+ BC 8,2,simple
+ BNE noteq
+ BR equal
+noteq:
+ MOVB $0, (R10)
+ RET
+equal:
+ MOVD $1, R3
+ MOVB R3, (R10)
+ RET
+