// Copyright 2018 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // +build ppc64le ppc64 #include "go_asm.h" #include "textflag.h" TEXT ·Count(SB), NOSPLIT|NOFRAME, $0-40 MOVD b_base+0(FP), R3 // R3 = byte array pointer MOVD b_len+8(FP), R4 // R4 = length MOVBZ c+24(FP), R5 // R5 = byte MOVD $ret+32(FP), R14 // R14 = &ret BR countbytebody<>(SB) TEXT ·CountString(SB), NOSPLIT|NOFRAME, $0-32 MOVD s_base+0(FP), R3 // R3 = string MOVD s_len+8(FP), R4 // R4 = length MOVBZ c+16(FP), R5 // R5 = byte MOVD $ret+24(FP), R14 // R14 = &ret BR countbytebody<>(SB) // R3: addr of string // R4: len of string // R5: byte to count // R14: addr for return value // endianness shouldn't matter since we are just counting and order // is irrelevant TEXT countbytebody<>(SB), NOSPLIT|NOFRAME, $0-0 DCBT (R3) // Prepare cache line. MOVD R0, R18 // byte count MOVD R3, R19 // Save base address for calculating the index later. MOVD R4, R16 MOVD R5, R6 RLDIMI $8, R6, $48, R6 RLDIMI $16, R6, $32, R6 RLDIMI $32, R6, $0, R6 // fill reg with the byte to count VSPLTISW $3, V4 // used for shift MTVRD R6, V1 // move compare byte VSPLTB $7, V1, V1 // replicate byte across V1 CMPU R4, $32 // Check if it's a small string (<32 bytes) BLT tail // Jump to the small string case XXLXOR VS37, VS37, VS37 // clear V5 (aka VS37) to use as accumulator cmploop: LXVW4X (R3), VS32 // load bytes from string // when the bytes match, the corresponding byte contains all 1s VCMPEQUB V1, V0, V2 // compare bytes VPOPCNTD V2, V3 // each double word contains its count VADDUDM V3, V5, V5 // accumulate bit count in each double word ADD $16, R3, R3 // increment pointer SUB $16, R16, R16 // remaining bytes CMP R16, $16 // at least 16 remaining? BGE cmploop VSRD V5, V4, V5 // shift by 3 to convert bits to bytes VSLDOI $8, V5, V5, V6 // get the double word values from vector MFVSRD V5, R9 MFVSRD V6, R10 ADD R9, R10, R9 ADD R9, R18, R18 tail: CMP R16, $8 // 8 bytes left? BLT small MOVD (R3), R12 // load 8 bytes CMPB R12, R6, R17 // compare bytes POPCNTD R17, R15 // bit count SRD $3, R15, R15 // byte count ADD R15, R18, R18 // add to byte count next1: ADD $8, R3, R3 SUB $8, R16, R16 // remaining bytes BR tail small: CMP $0, R16 // any remaining BEQ done MOVBZ (R3), R12 // check each remaining byte CMP R12, R5 BNE next2 ADD $1, R18 next2: SUB $1, R16 ADD $1, R3 // inc address BR small done: MOVD R18, (R14) // return count RET