diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 13:16:40 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 13:16:40 +0000 |
commit | 47ab3d4a42e9ab51c465c4322d2ec233f6324e6b (patch) | |
tree | a61a0ffd83f4a3def4b36e5c8e99630c559aa723 /src/runtime/memmove_ppc64x.s | |
parent | Initial commit. (diff) | |
download | golang-1.18-upstream.tar.xz golang-1.18-upstream.zip |
Adding upstream version 1.18.10.upstream/1.18.10upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/runtime/memmove_ppc64x.s')
-rw-r--r-- | src/runtime/memmove_ppc64x.s | 174 |
1 files changed, 174 insertions, 0 deletions
diff --git a/src/runtime/memmove_ppc64x.s b/src/runtime/memmove_ppc64x.s new file mode 100644 index 0000000..e69e71a --- /dev/null +++ b/src/runtime/memmove_ppc64x.s @@ -0,0 +1,174 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build ppc64 || ppc64le + +#include "textflag.h" + +// See memmove Go doc for important implementation constraints. + +// func memmove(to, from unsafe.Pointer, n uintptr) + +// target address +#define TGT R3 +// source address +#define SRC R4 +// length to move +#define LEN R5 +// number of doublewords +#define DWORDS R6 +// number of bytes < 8 +#define BYTES R7 +// const 16 used as index +#define IDX16 R8 +// temp used for copies, etc. +#define TMP R9 +// number of 32 byte chunks +#define QWORDS R10 + +TEXT runtime·memmove<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-24 +#ifndef GOEXPERIMENT_regabiargs + MOVD to+0(FP), TGT + MOVD from+8(FP), SRC + MOVD n+16(FP), LEN +#endif + + // Determine if there are doublewords to + // copy so a more efficient move can be done +check: + ANDCC $7, LEN, BYTES // R7: bytes to copy + SRD $3, LEN, DWORDS // R6: double words to copy + MOVFL CR0, CR3 // save CR from ANDCC + CMP DWORDS, $0, CR1 // CR1[EQ] set if no double words to copy + + // Determine overlap by subtracting dest - src and comparing against the + // length. This catches the cases where src and dest are in different types + // of storage such as stack and static to avoid doing backward move when not + // necessary. + + SUB SRC, TGT, TMP // dest - src + CMPU TMP, LEN, CR2 // < len? + BC 12, 8, backward // BLT CR2 backward + + // Copying forward if no overlap. + + BC 12, 6, checkbytes // BEQ CR1, checkbytes + SRDCC $2, DWORDS, QWORDS // 32 byte chunks? + BEQ lt32gt8 // < 32 bytes + + // Prepare for moves of 32 bytes at a time. + +forward32setup: + DCBTST (TGT) // prepare data cache + DCBT (SRC) + MOVD QWORDS, CTR // Number of 32 byte chunks + MOVD $16, IDX16 // 16 for index + +forward32: + LXVD2X (R0)(SRC), VS32 // load 16 bytes + LXVD2X (IDX16)(SRC), VS33 // load 16 bytes + ADD $32, SRC + STXVD2X VS32, (R0)(TGT) // store 16 bytes + STXVD2X VS33, (IDX16)(TGT) + ADD $32,TGT // bump up for next set + BC 16, 0, forward32 // continue + ANDCC $3, DWORDS // remaining doublewords + BEQ checkbytes // only bytes remain + +lt32gt8: + // At this point >= 8 and < 32 + // Move 16 bytes if possible + CMP DWORDS, $2 + BLT lt16 + LXVD2X (R0)(SRC), VS32 + ADD $-2, DWORDS + STXVD2X VS32, (R0)(TGT) + ADD $16, SRC + ADD $16, TGT + +lt16: // Move 8 bytes if possible + CMP DWORDS, $1 + BLT checkbytes + MOVD 0(SRC), TMP + ADD $8, SRC + MOVD TMP, 0(TGT) + ADD $8, TGT +checkbytes: + BC 12, 14, LR // BEQ lr +lt8: // Move word if possible + CMP BYTES, $4 + BLT lt4 + MOVWZ 0(SRC), TMP + ADD $-4, BYTES + MOVW TMP, 0(TGT) + ADD $4, SRC + ADD $4, TGT +lt4: // Move halfword if possible + CMP BYTES, $2 + BLT lt2 + MOVHZ 0(SRC), TMP + ADD $-2, BYTES + MOVH TMP, 0(TGT) + ADD $2, SRC + ADD $2, TGT +lt2: // Move last byte if 1 left + CMP BYTES, $1 + BC 12, 0, LR // ble lr + MOVBZ 0(SRC), TMP + MOVBZ TMP, 0(TGT) + RET + +backward: + // Copying backwards proceeds by copying R7 bytes then copying R6 double words. + // R3 and R4 are advanced to the end of the destination/source buffers + // respectively and moved back as we copy. + + ADD LEN, SRC, SRC // end of source + ADD TGT, LEN, TGT // end of dest + + BEQ nobackwardtail // earlier condition + + MOVD BYTES, CTR // bytes to move + +backwardtailloop: + MOVBZ -1(SRC), TMP // point to last byte + SUB $1,SRC + MOVBZ TMP, -1(TGT) + SUB $1,TGT + BC 16, 0, backwardtailloop // bndz + +nobackwardtail: + BC 4, 5, LR // ble CR1 lr + +backwardlarge: + MOVD DWORDS, CTR + SUB TGT, SRC, TMP // Use vsx if moving + CMP TMP, $32 // at least 32 byte chunks + BLT backwardlargeloop // and distance >= 32 + SRDCC $2,DWORDS,QWORDS // 32 byte chunks + BNE backward32setup + +backwardlargeloop: + MOVD -8(SRC), TMP + SUB $8,SRC + MOVD TMP, -8(TGT) + SUB $8,TGT + BC 16, 0, backwardlargeloop // bndz + RET + +backward32setup: + MOVD QWORDS, CTR // set up loop ctr + MOVD $16, IDX16 // 32 bytes at a time + +backward32loop: + SUB $32, TGT + SUB $32, SRC + LXVD2X (R0)(TGT), VS32 // load 16 bytes + LXVD2X (IDX16)(TGT), VS33 + STXVD2X VS32, (R0)(SRC) // store 16 bytes + STXVD2X VS33, (IDX16)(SRC) + BC 16, 0, backward32loop // bndz + BC 4, 5, LR // ble CR1 lr + MOVD DWORDS, CTR + BR backwardlargeloop |