diff options
Diffstat (limited to 'src/crypto/subtle/xor_ppc64x.s')
-rw-r--r-- | src/crypto/subtle/xor_ppc64x.s | 87 |
1 files changed, 87 insertions, 0 deletions
diff --git a/src/crypto/subtle/xor_ppc64x.s b/src/crypto/subtle/xor_ppc64x.s new file mode 100644 index 0000000..72bb80d --- /dev/null +++ b/src/crypto/subtle/xor_ppc64x.s @@ -0,0 +1,87 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build (ppc64 || ppc64le) && !purego + +#include "textflag.h" + +// func xorBytes(dst, a, b *byte, n int) +TEXT ·xorBytes(SB), NOSPLIT, $0 + MOVD dst+0(FP), R3 // R3 = dst + MOVD a+8(FP), R4 // R4 = a + MOVD b+16(FP), R5 // R5 = b + MOVD n+24(FP), R6 // R6 = n + + CMPU R6, $32, CR7 // Check if n ≥ 32 bytes + MOVD R0, R8 // R8 = index + CMPU R6, $8, CR6 // Check if 8 ≤ n < 32 bytes + BLT CR6, small // Smaller than 8 + BLT CR7, xor16 // Case for 16 ≤ n < 32 bytes + + // Case for n ≥ 32 bytes +preloop32: + SRD $5, R6, R7 // Setup loop counter + MOVD R7, CTR + MOVD $16, R10 + ANDCC $31, R6, R9 // Check for tailing bytes for later +loop32: + LXVD2X (R4)(R8), VS32 // VS32 = a[i,...,i+15] + LXVD2X (R4)(R10), VS34 + LXVD2X (R5)(R8), VS33 // VS33 = b[i,...,i+15] + LXVD2X (R5)(R10), VS35 + XXLXOR VS32, VS33, VS32 // VS34 = a[] ^ b[] + XXLXOR VS34, VS35, VS34 + STXVD2X VS32, (R3)(R8) // Store to dst + STXVD2X VS34, (R3)(R10) + ADD $32, R8 // Update index + ADD $32, R10 + BC 16, 0, loop32 // bdnz loop16 + + BEQ CR0, done + + MOVD R9, R6 + CMP R6, $8 + BLT small +xor16: + CMP R6, $16 + BLT xor8 + LXVD2X (R4)(R8), VS32 + LXVD2X (R5)(R8), VS33 + XXLXOR VS32, VS33, VS32 + STXVD2X VS32, (R3)(R8) + ADD $16, R8 + ADD $-16, R6 + CMP R6, $8 + BLT small +xor8: + // Case for 8 ≤ n < 16 bytes + MOVD (R4)(R8), R14 // R14 = a[i,...,i+7] + MOVD (R5)(R8), R15 // R15 = b[i,...,i+7] + XOR R14, R15, R16 // R16 = a[] ^ b[] + SUB $8, R6 // n = n - 8 + MOVD R16, (R3)(R8) // Store to dst + ADD $8, R8 + + // Check if we're finished + CMP R6, R0 + BGT small + RET + + // Case for n < 8 bytes and tailing bytes from the + // previous cases. +small: + CMP R6, R0 + BEQ done + MOVD R6, CTR // Setup loop counter + +loop: + MOVBZ (R4)(R8), R14 // R14 = a[i] + MOVBZ (R5)(R8), R15 // R15 = b[i] + XOR R14, R15, R16 // R16 = a[i] ^ b[i] + MOVB R16, (R3)(R8) // Store to dst + ADD $1, R8 + BC 16, 0, loop // bdnz loop + +done: + RET |