diff options
Diffstat (limited to 'src/internal/bytealg/indexbyte_s390x.s')
-rw-r--r-- | src/internal/bytealg/indexbyte_s390x.s | 108 |
1 files changed, 108 insertions, 0 deletions
diff --git a/src/internal/bytealg/indexbyte_s390x.s b/src/internal/bytealg/indexbyte_s390x.s new file mode 100644 index 0000000..cf88d92 --- /dev/null +++ b/src/internal/bytealg/indexbyte_s390x.s @@ -0,0 +1,108 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "go_asm.h" +#include "textflag.h" + +TEXT ·IndexByte(SB),NOSPLIT|NOFRAME,$0-40 + MOVD b_base+0(FP), R3// b_base => R3 + MOVD b_len+8(FP), R4 // b_len => R4 + MOVBZ c+24(FP), R5 // c => R5 + MOVD $ret+32(FP), R2 // &ret => R9 + BR indexbytebody<>(SB) + +TEXT ·IndexByteString(SB),NOSPLIT|NOFRAME,$0-32 + MOVD s_base+0(FP), R3// s_base => R3 + MOVD s_len+8(FP), R4 // s_len => R4 + MOVBZ c+16(FP), R5 // c => R5 + MOVD $ret+24(FP), R2 // &ret => R9 + BR indexbytebody<>(SB) + +// input: +// R3: s +// R4: s_len +// R5: c -- byte sought +// R2: &ret -- address to put index into +TEXT indexbytebody<>(SB),NOSPLIT|NOFRAME,$0 + CMPBEQ R4, $0, notfound + MOVD R3, R6 // store base for later + ADD R3, R4, R8 // the address after the end of the string + //if the length is small, use loop; otherwise, use vector or srst search + CMPBGE R4, $16, large + +residual: + CMPBEQ R3, R8, notfound + MOVBZ 0(R3), R7 + LA 1(R3), R3 + CMPBNE R7, R5, residual + +found: + SUB R6, R3 + SUB $1, R3 + MOVD R3, 0(R2) + RET + +notfound: + MOVD $-1, 0(R2) + RET + +large: + MOVBZ internal∕cpu·S390X+const_offsetS390xHasVX(SB), R1 + CMPBNE R1, $0, vectorimpl + +srstimpl: // no vector facility + MOVBZ R5, R0 // c needs to be in R0, leave until last minute as currently R0 is expected to be 0 +srstloop: + WORD $0xB25E0083 // srst %r8, %r3 (search the range [R3, R8)) + BVS srstloop // interrupted - continue + BGT notfoundr0 +foundr0: + XOR R0, R0 // reset R0 + SUB R6, R8 // remove base + MOVD R8, 0(R2) + RET +notfoundr0: + XOR R0, R0 // reset R0 + MOVD $-1, 0(R2) + RET + +vectorimpl: + //if the address is not 16byte aligned, use loop for the header + MOVD R3, R8 + AND $15, R8 + CMPBGT R8, $0, notaligned + +aligned: + ADD R6, R4, R8 + MOVD R8, R7 + AND $-16, R7 + // replicate c across V17 + VLVGB $0, R5, V19 + VREPB $0, V19, V17 + +vectorloop: + CMPBGE R3, R7, residual + VL 0(R3), V16 // load string to be searched into V16 + ADD $16, R3 + VFEEBS V16, V17, V18 // search V17 in V16 and set conditional code accordingly + BVS vectorloop + + // when vector search found c in the string + VLGVB $7, V18, R7 // load 7th element of V18 containing index into R7 + SUB $16, R3 + SUB R6, R3 + ADD R3, R7 + MOVD R7, 0(R2) + RET + +notaligned: + MOVD R3, R8 + AND $-16, R8 + ADD $16, R8 +notalignedloop: + CMPBEQ R3, R8, aligned + MOVBZ 0(R3), R7 + LA 1(R3), R3 + CMPBNE R7, R5, notalignedloop + BR found |