diff options
Diffstat (limited to '')
-rw-r--r-- | src/internal/bytealg/equal_ppc64x.s | 102 |
1 files changed, 102 insertions, 0 deletions
diff --git a/src/internal/bytealg/equal_ppc64x.s b/src/internal/bytealg/equal_ppc64x.s new file mode 100644 index 0000000..18171ea --- /dev/null +++ b/src/internal/bytealg/equal_ppc64x.s @@ -0,0 +1,102 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build ppc64 ppc64le + +#include "go_asm.h" +#include "textflag.h" + +// memequal(a, b unsafe.Pointer, size uintptr) bool +TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25 + MOVD a+0(FP), R3 + MOVD b+8(FP), R4 + MOVD size+16(FP), R5 + MOVD $ret+24(FP), R10 + + BR memeqbody<>(SB) + +// memequal_varlen(a, b unsafe.Pointer) bool +TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17 + MOVD a+0(FP), R3 + MOVD b+8(FP), R4 + CMP R3, R4 + BEQ eq + MOVD 8(R11), R5 // compiler stores size at offset 8 in the closure + MOVD $ret+16(FP), R10 + BR memeqbody<>(SB) +eq: + MOVD $1, R3 + MOVB R3, ret+16(FP) + RET + +// Do an efficient memequal for ppc64 +// R3 = s1 +// R4 = s2 +// R5 = len +// R10 = addr of return value (byte) +TEXT memeqbody<>(SB),NOSPLIT|NOFRAME,$0-0 + MOVD R5,CTR + CMP R5,$8 // only optimize >=8 + BLT simplecheck + DCBT (R3) // cache hint + DCBT (R4) + CMP R5,$32 // optimize >= 32 + MOVD R5,R6 // needed if setup8a branch + BLT setup8a // 8 byte moves only +setup32a: // 8 byte aligned, >= 32 bytes + SRADCC $5,R5,R6 // number of 32 byte chunks to compare + MOVD R6,CTR + MOVD $16,R14 // index for VSX loads and stores +loop32a: + LXVD2X (R3+R0), VS32 // VS32 = V0 + LXVD2X (R4+R0), VS33 // VS33 = V1 + VCMPEQUBCC V0, V1, V2 // compare, setting CR6 + BGE CR6, noteq + LXVD2X (R3+R14), VS32 + LXVD2X (R4+R14), VS33 + VCMPEQUBCC V0, V1, V2 + BGE CR6, noteq + ADD $32,R3 // bump up to next 32 + ADD $32,R4 + BC 16, 0, loop32a // br ctr and cr + ANDCC $24,R5,R6 // Any 8 byte chunks? + BEQ leftover // and result is 0 +setup8a: + SRADCC $3,R6,R6 // get the 8 byte count + BEQ leftover // shifted value is 0 + MOVD R6,CTR +loop8: + MOVD 0(R3),R6 // doublewords to compare + ADD $8,R3 + MOVD 0(R4),R7 + ADD $8,R4 + CMP R6,R7 // match? + BC 8,2,loop8 // bt ctr <> 0 && cr + BNE noteq +leftover: + ANDCC $7,R5,R6 // check for leftover bytes + BEQ equal + MOVD R6,CTR + BR simple +simplecheck: + CMP R5,$0 + BEQ equal +simple: + MOVBZ 0(R3), R6 + ADD $1,R3 + MOVBZ 0(R4), R7 + ADD $1,R4 + CMP R6, R7 + BNE noteq + BC 8,2,simple + BNE noteq + BR equal +noteq: + MOVB $0, (R10) + RET +equal: + MOVD $1, R3 + MOVB R3, (R10) + RET + |