diff options
Diffstat (limited to 'src/crypto/aes/asm_ppc64le.s')
-rw-r--r-- | src/crypto/aes/asm_ppc64le.s | 501 |
1 files changed, 501 insertions, 0 deletions
diff --git a/src/crypto/aes/asm_ppc64le.s b/src/crypto/aes/asm_ppc64le.s new file mode 100644 index 0000000..a69cb78 --- /dev/null +++ b/src/crypto/aes/asm_ppc64le.s @@ -0,0 +1,501 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Based on CRYPTOGAMS code with the following comment: +// # ==================================================================== +// # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL +// # project. The module is, however, dual licensed under OpenSSL and +// # CRYPTOGAMS licenses depending on where you obtain it. For further +// # details see http://www.openssl.org/~appro/cryptogams/. +// # ==================================================================== + +// Original code can be found at the link below: +// https://github.com/dot-asm/cryptogams/blob/master/ppc/aesp8-ppc.pl + +// I changed some function names in order to be more likely to go standards. +// For instance, function aes_p8_set_{en,de}crypt_key become +// set{En,De}cryptKeyAsm. I also split setEncryptKeyAsm in two parts +// and a new session was created (doEncryptKeyAsm). This was necessary to +// avoid arguments overwriting when setDecryptKeyAsm calls setEncryptKeyAsm. +// There were other modifications as well but kept the same functionality. + +#include "textflag.h" + +// For set{En,De}cryptKeyAsm +#define INP R3 +#define BITS R4 +#define OUT R5 +#define PTR R6 +#define CNT R7 +#define ROUNDS R8 +#define TEMP R19 +#define ZERO V0 +#define IN0 V1 +#define IN1 V2 +#define KEY V3 +#define RCON V4 +#define MASK V5 +#define TMP V6 +#define STAGE V7 +#define OUTPERM V8 +#define OUTMASK V9 +#define OUTHEAD V10 +#define OUTTAIL V11 + +// For {en,de}cryptBlockAsm +#define BLK_INP R3 +#define BLK_OUT R4 +#define BLK_KEY R5 +#define BLK_ROUNDS R6 +#define BLK_IDX R7 + +DATA ·rcon+0x00(SB)/8, $0x0100000001000000 // RCON +DATA ·rcon+0x08(SB)/8, $0x0100000001000000 // RCON +DATA ·rcon+0x10(SB)/8, $0x1b0000001b000000 +DATA ·rcon+0x18(SB)/8, $0x1b0000001b000000 +DATA ·rcon+0x20(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK +DATA ·rcon+0x28(SB)/8, $0x0d0e0f0c0d0e0f0c // MASK +DATA ·rcon+0x30(SB)/8, $0x0000000000000000 +DATA ·rcon+0x38(SB)/8, $0x0000000000000000 +GLOBL ·rcon(SB), RODATA, $64 + +// func setEncryptKeyAsm(key *byte, keylen int, enc *uint32) int +TEXT ·setEncryptKeyAsm(SB),NOSPLIT|NOFRAME,$0 + // Load the arguments inside the registers + MOVD key+0(FP), INP + MOVD keylen+8(FP), BITS + MOVD enc+16(FP), OUT + JMP ·doEncryptKeyAsm(SB) + +// This text is used both setEncryptKeyAsm and setDecryptKeyAsm +TEXT ·doEncryptKeyAsm(SB),NOSPLIT|NOFRAME,$0 + // Do not change R10 since it's storing the LR value in setDecryptKeyAsm + + // Check arguments + MOVD $-1, PTR // li 6,-1 exit code to -1 (255) + CMPU INP, $0 // cmpldi r3,0 input key pointer set? + BC 0x0E, 2, enc_key_abort // beq- .Lenc_key_abort + CMPU OUT, $0 // cmpldi r5,0 output key pointer set? + BC 0x0E, 2, enc_key_abort // beq- .Lenc_key_abort + MOVD $-2, PTR // li 6,-2 exit code to -2 (254) + CMPW BITS, $128 // cmpwi 4,128 greater or equal to 128 + BC 0x0E, 0, enc_key_abort // blt- .Lenc_key_abort + CMPW BITS, $256 // cmpwi 4,256 lesser or equal to 256 + BC 0x0E, 1, enc_key_abort // bgt- .Lenc_key_abort + ANDCC $0x3f, BITS, TEMP // andi. 0,4,0x3f multiple of 64 + BC 0x06, 2, enc_key_abort // bne- .Lenc_key_abort + + MOVD $·rcon(SB), PTR // PTR point to rcon addr + + // Get key from memory and write aligned into VR + NEG INP, R9 // neg 9,3 R9 is ~INP + 1 + LVX (INP)(R0), IN0 // lvx 1,0,3 Load key inside IN0 + ADD $15, INP, INP // addi 3,3,15 Add 15B to INP addr + LVSR (R9)(R0), KEY // lvsr 3,0,9 + MOVD $0x20, R8 // li 8,0x20 R8 = 32 + CMPW BITS, $192 // cmpwi 4,192 Key size == 192? + LVX (INP)(R0), IN1 // lvx 2,0,3 + VSPLTISB $0x0f, MASK // vspltisb 5,0x0f 0x0f0f0f0f... mask + LVX (PTR)(R0), RCON // lvx 4,0,6 Load first 16 bytes into RCON + VXOR KEY, MASK, KEY // vxor 3,3,5 Adjust for byte swap + LVX (PTR)(R8), MASK // lvx 5,8,6 + ADD $0x10, PTR, PTR // addi 6,6,0x10 PTR to next 16 bytes of RCON + VPERM IN0, IN1, KEY, IN0 // vperm 1,1,2,3 Align + MOVD $8, CNT // li 7,8 CNT = 8 + VXOR ZERO, ZERO, ZERO // vxor 0,0,0 Zero to be zero :) + MOVD CNT, CTR // mtctr 7 Set the counter to 8 (rounds) + + LVSL (OUT)(R0), OUTPERM // lvsl 8,0,5 + VSPLTISB $-1, OUTMASK // vspltisb 9,-1 + LVX (OUT)(R0), OUTHEAD // lvx 10,0,5 + VPERM OUTMASK, ZERO, OUTPERM, OUTMASK // vperm 9,9,0,8 + + BLT loop128 // blt .Loop128 + ADD $8, INP, INP // addi 3,3,8 + BEQ l192 // beq .L192 + ADD $8, INP, INP // addi 3,3,8 + JMP l256 // b .L256 + +loop128: + // Key schedule (Round 1 to 8) + VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-splat + VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 + VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 Rotate + VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 + VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 + VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 + STVX STAGE, (OUT+R0) // stvx 7,0,5 Write to output + ADD $16, OUT, OUT // addi 5,5,16 Point to the next round + + VXOR IN0, TMP, IN0 // vxor 1,1,6 + VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 + VXOR IN0, TMP, IN0 // vxor 1,1,6 + VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 + VXOR IN0, TMP, IN0 // vxor 1,1,6 + VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 + VXOR IN0, KEY, IN0 // vxor 1,1,3 + BC 0x10, 0, loop128 // bdnz .Loop128 + + LVX (PTR)(R0), RCON // lvx 4,0,6 Last two round keys + + // Key schedule (Round 9) + VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-spat + VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 + VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 Rotate + VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 + VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 + VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 + STVX STAGE, (OUT+R0) // stvx 7,0,5 Round 9 + ADD $16, OUT, OUT // addi 5,5,16 + + // Key schedule (Round 10) + VXOR IN0, TMP, IN0 // vxor 1,1,6 + VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 + VXOR IN0, TMP, IN0 // vxor 1,1,6 + VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 + VXOR IN0, TMP, IN0 // vxor 1,1,6 + VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 + VXOR IN0, KEY, IN0 // vxor 1,1,3 + + VPERM IN0, IN0, MASK, KEY // vperm 3,1,1,5 Rotate-n-splat + VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 + VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 Rotate + VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 + VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 + VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 + STVX STAGE, (OUT+R0) // stvx 7,0,5 Round 10 + ADD $16, OUT, OUT // addi 5,5,16 + + // Key schedule (Round 11) + VXOR IN0, TMP, IN0 // vxor 1,1,6 + VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 + VXOR IN0, TMP, IN0 // vxor 1,1,6 + VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 + VXOR IN0, TMP, IN0 // vxor 1,1,6 + VXOR IN0, KEY, IN0 // vxor 1,1,3 + VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 + VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 + VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 + STVX STAGE, (OUT+R0) // stvx 7,0,5 Round 11 + + ADD $15, OUT, INP // addi 3,5,15 + ADD $0x50, OUT, OUT // addi 5,5,0x50 + + MOVD $10, ROUNDS // li 8,10 + JMP done // b .Ldone + +l192: + LVX (INP)(R0), TMP // lvx 6,0,3 + MOVD $4, CNT // li 7,4 + VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 + VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 + VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 + STVX STAGE, (OUT+R0) // stvx 7,0,5 + ADD $16, OUT, OUT // addi 5,5,16 + VPERM IN1, TMP, KEY, IN1 // vperm 2,2,6,3 + VSPLTISB $8, KEY // vspltisb 3,8 + MOVD CNT, CTR // mtctr 7 + VSUBUBM MASK, KEY, MASK // vsububm 5,5,3 + +loop192: + VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5 + VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 + VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 + + VXOR IN0, TMP, IN0 // vxor 1,1,6 + VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 + VXOR IN0, TMP, IN0 // vxor 1,1,6 + VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 + VXOR IN0, TMP, IN0 // vxor 1,1,6 + + VSLDOI $8, ZERO, IN1, STAGE // vsldoi 7,0,2,8 + VSPLTW $3, IN0, TMP // vspltw 6,1,3 + VXOR TMP, IN1, TMP // vxor 6,6,2 + VSLDOI $12, ZERO, IN1, IN1 // vsldoi 2,0,2,12 + VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 + VXOR IN1, TMP, IN1 // vxor 2,2,6 + VXOR IN0, KEY, IN0 // vxor 1,1,3 + VXOR IN1, KEY, IN1 // vxor 2,2,3 + VSLDOI $8, STAGE, IN0, STAGE // vsldoi 7,7,1,8 + + VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5 + VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 + VPERM STAGE, STAGE, OUTPERM, OUTTAIL // vperm 11,7,7,8 + VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 + VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 + VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 + STVX STAGE, (OUT+R0) // stvx 7,0,5 + ADD $16, OUT, OUT // addi 5,5,16 + + VSLDOI $8, IN0, IN1, STAGE // vsldoi 7,1,2,8 + VXOR IN0, TMP, IN0 // vxor 1,1,6 + VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 + VPERM STAGE, STAGE, OUTPERM, OUTTAIL // vperm 11,7,7,8 + VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 + VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 + VXOR IN0, TMP, IN0 // vxor 1,1,6 + VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 + VXOR IN0, TMP, IN0 // vxor 1,1,6 + STVX STAGE, (OUT+R0) // stvx 7,0,5 + ADD $16, OUT, OUT // addi 5,5,16 + + VSPLTW $3, IN0, TMP // vspltw 6,1,3 + VXOR TMP, IN1, TMP // vxor 6,6,2 + VSLDOI $12, ZERO, IN1, IN1 // vsldoi 2,0,2,12 + VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 + VXOR IN1, TMP, IN1 // vxor 2,2,6 + VXOR IN0, KEY, IN0 // vxor 1,1,3 + VXOR IN1, KEY, IN1 // vxor 2,2,3 + VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 + VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 + VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 + STVX STAGE, (OUT+R0) // stvx 7,0,5 + ADD $15, OUT, INP // addi 3,5,15 + ADD $16, OUT, OUT // addi 5,5,16 + BC 0x10, 0, loop192 // bdnz .Loop192 + + MOVD $12, ROUNDS // li 8,12 + ADD $0x20, OUT, OUT // addi 5,5,0x20 + JMP done // b .Ldone + +l256: + LVX (INP)(R0), TMP // lvx 6,0,3 + MOVD $7, CNT // li 7,7 + MOVD $14, ROUNDS // li 8,14 + VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 + VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 + VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 + STVX STAGE, (OUT+R0) // stvx 7,0,5 + ADD $16, OUT, OUT // addi 5,5,16 + VPERM IN1, TMP, KEY, IN1 // vperm 2,2,6,3 + MOVD CNT, CTR // mtctr 7 + +loop256: + VPERM IN1, IN1, MASK, KEY // vperm 3,2,2,5 + VSLDOI $12, ZERO, IN0, TMP // vsldoi 6,0,1,12 + VPERM IN1, IN1, OUTPERM, OUTTAIL // vperm 11,2,2,8 + VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 + VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 + VCIPHERLAST KEY, RCON, KEY // vcipherlast 3,3,4 + STVX STAGE, (OUT+R0) // stvx 7,0,5 + ADD $16, OUT, OUT // addi 5,5,16 + + VXOR IN0, TMP, IN0 // vxor 1,1,6 + VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 + VXOR IN0, TMP, IN0 // vxor 1,1,6 + VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 + VXOR IN0, TMP, IN0 // vxor 1,1,6 + VADDUWM RCON, RCON, RCON // vadduwm 4,4,4 + VXOR IN0, KEY, IN0 // vxor 1,1,3 + VPERM IN0, IN0, OUTPERM, OUTTAIL // vperm 11,1,1,8 + VSEL OUTHEAD, OUTTAIL, OUTMASK, STAGE // vsel 7,10,11,9 + VOR OUTTAIL, OUTTAIL, OUTHEAD // vor 10,11,11 + STVX STAGE, (OUT+R0) // stvx 7,0,5 + ADD $15, OUT, INP // addi 3,5,15 + ADD $16, OUT, OUT // addi 5,5,16 + BC 0x12, 0, done // bdz .Ldone + + VSPLTW $3, IN0, KEY // vspltw 3,1,3 + VSLDOI $12, ZERO, IN1, TMP // vsldoi 6,0,2,12 + VSBOX KEY, KEY // vsbox 3,3 + + VXOR IN1, TMP, IN1 // vxor 2,2,6 + VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 + VXOR IN1, TMP, IN1 // vxor 2,2,6 + VSLDOI $12, ZERO, TMP, TMP // vsldoi 6,0,6,12 + VXOR IN1, TMP, IN1 // vxor 2,2,6 + + VXOR IN1, KEY, IN1 // vxor 2,2,3 + JMP loop256 // b .Loop256 + +done: + LVX (INP)(R0), IN1 // lvx 2,0,3 + VSEL OUTHEAD, IN1, OUTMASK, IN1 // vsel 2,10,2,9 + STVX IN1, (INP+R0) // stvx 2,0,3 + MOVD $0, PTR // li 6,0 set PTR to 0 (exit code 0) + MOVW ROUNDS, 0(OUT) // stw 8,0(5) + +enc_key_abort: + MOVD PTR, INP // mr 3,6 set exit code with PTR value + MOVD INP, ret+24(FP) // Put return value into the FP + RET // blr + +// func setDecryptKeyAsm(key *byte, keylen int, dec *uint32) int +TEXT ·setDecryptKeyAsm(SB),NOSPLIT|NOFRAME,$0 + // Load the arguments inside the registers + MOVD key+0(FP), INP + MOVD keylen+8(FP), BITS + MOVD dec+16(FP), OUT + + MOVD LR, R10 // mflr 10 + CALL ·doEncryptKeyAsm(SB) + MOVD R10, LR // mtlr 10 + + CMPW INP, $0 // cmpwi 3,0 exit 0 = ok + BC 0x06, 2, dec_key_abort // bne- .Ldec_key_abort + + // doEncryptKeyAsm set ROUNDS (R8) with the proper value for each mode + SLW $4, ROUNDS, CNT // slwi 7,8,4 + SUB $240, OUT, INP // subi 3,5,240 + SRW $1, ROUNDS, ROUNDS // srwi 8,8,1 + ADD R7, INP, OUT // add 5,3,7 + MOVD ROUNDS, CTR // mtctr 8 + +// dec_key will invert the key sequence in order to be used for decrypt +dec_key: + MOVWZ 0(INP), TEMP // lwz 0, 0(3) + MOVWZ 4(INP), R6 // lwz 6, 4(3) + MOVWZ 8(INP), R7 // lwz 7, 8(3) + MOVWZ 12(INP), R8 // lwz 8, 12(3) + ADD $16, INP, INP // addi 3,3,16 + MOVWZ 0(OUT), R9 // lwz 9, 0(5) + MOVWZ 4(OUT), R10 // lwz 10,4(5) + MOVWZ 8(OUT), R11 // lwz 11,8(5) + MOVWZ 12(OUT), R12 // lwz 12,12(5) + MOVW TEMP, 0(OUT) // stw 0, 0(5) + MOVW R6, 4(OUT) // stw 6, 4(5) + MOVW R7, 8(OUT) // stw 7, 8(5) + MOVW R8, 12(OUT) // stw 8, 12(5) + SUB $16, OUT, OUT // subi 5,5,16 + MOVW R9, -16(INP) // stw 9, -16(3) + MOVW R10, -12(INP) // stw 10,-12(3) + MOVW R11, -8(INP) // stw 11,-8(3) + MOVW R12, -4(INP) // stw 12,-4(3) + BC 0x10, 0, dec_key // bdnz .Ldeckey + + XOR R3, R3, R3 // xor 3,3,3 Clean R3 + +dec_key_abort: + MOVD R3, ret+24(FP) // Put return value into the FP + RET // blr + + +// func encryptBlockAsm(dst, src *byte, enc *uint32) +TEXT ·encryptBlockAsm(SB),NOSPLIT|NOFRAME,$0 + // Load the arguments inside the registers + MOVD dst+0(FP), BLK_OUT + MOVD src+8(FP), BLK_INP + MOVD enc+16(FP), BLK_KEY + + MOVWZ 240(BLK_KEY), BLK_ROUNDS // lwz 6,240(5) + MOVD $15, BLK_IDX // li 7,15 + + LVX (BLK_INP)(R0), ZERO // lvx 0,0,3 + NEG BLK_OUT, R11 // neg 11,4 + LVX (BLK_INP)(BLK_IDX), IN0 // lvx 1,7,3 + LVSL (BLK_INP)(R0), IN1 // lvsl 2,0,3 + VSPLTISB $0x0f, RCON // vspltisb 4,0x0f + LVSR (R11)(R0), KEY // lvsr 3,0,11 + VXOR IN1, RCON, IN1 // vxor 2,2,4 + MOVD $16, BLK_IDX // li 7,16 + VPERM ZERO, IN0, IN1, ZERO // vperm 0,0,1,2 + LVX (BLK_KEY)(R0), IN0 // lvx 1,0,5 + LVSR (BLK_KEY)(R0), MASK // lvsr 5,0,5 + SRW $1, BLK_ROUNDS, BLK_ROUNDS // srwi 6,6,1 + LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 + ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 + SUB $1, BLK_ROUNDS, BLK_ROUNDS // subi 6,6,1 + VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 + + VXOR ZERO, IN0, ZERO // vxor 0,0,1 + LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5 + ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 + MOVD BLK_ROUNDS, CTR // mtctr 6 + +loop_enc: + VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5 + VCIPHER ZERO, IN1, ZERO // vcipher 0,0,2 + LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 + ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 + VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 + VCIPHER ZERO, IN0, ZERO // vcipher 0,0,1 + LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5 + ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 + BC 0x10, 0, loop_enc // bdnz .Loop_enc + + VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5 + VCIPHER ZERO, IN1, ZERO // vcipher 0,0,2 + LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 + VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 + VCIPHERLAST ZERO, IN0, ZERO // vcipherlast 0,0,1 + + VSPLTISB $-1, IN1 // vspltisb 2,-1 + VXOR IN0, IN0, IN0 // vxor 1,1,1 + MOVD $15, BLK_IDX // li 7,15 + VPERM IN1, IN0, KEY, IN1 // vperm 2,2,1,3 + VXOR KEY, RCON, KEY // vxor 3,3,4 + LVX (BLK_OUT)(R0), IN0 // lvx 1,0,4 + VPERM ZERO, ZERO, KEY, ZERO // vperm 0,0,0,3 + VSEL IN0, ZERO, IN1, IN0 // vsel 1,1,0,2 + LVX (BLK_OUT)(BLK_IDX), RCON // lvx 4,7,4 + STVX IN0, (BLK_OUT+R0) // stvx 1,0,4 + VSEL ZERO, RCON, IN1, ZERO // vsel 0,0,4,2 + STVX ZERO, (BLK_OUT+BLK_IDX) // stvx 0,7,4 + + RET // blr + + +// func decryptBlockAsm(dst, src *byte, dec *uint32) +TEXT ·decryptBlockAsm(SB),NOSPLIT|NOFRAME,$0 + // Load the arguments inside the registers + MOVD dst+0(FP), BLK_OUT + MOVD src+8(FP), BLK_INP + MOVD dec+16(FP), BLK_KEY + + MOVWZ 240(BLK_KEY), BLK_ROUNDS // lwz 6,240(5) + MOVD $15, BLK_IDX // li 7,15 + + LVX (BLK_INP)(R0), ZERO // lvx 0,0,3 + NEG BLK_OUT, R11 // neg 11,4 + LVX (BLK_INP)(BLK_IDX), IN0 // lvx 1,7,3 + LVSL (BLK_INP)(R0), IN1 // lvsl 2,0,3 + VSPLTISB $0x0f, RCON // vspltisb 4,0x0f + LVSR (R11)(R0), KEY // lvsr 3,0,11 + VXOR IN1, RCON, IN1 // vxor 2,2,4 + MOVD $16, BLK_IDX // li 7,16 + VPERM ZERO, IN0, IN1, ZERO // vperm 0,0,1,2 + LVX (BLK_KEY)(R0), IN0 // lvx 1,0,5 + LVSR (BLK_KEY)(R0), MASK // lvsr 5,0,5 + SRW $1, BLK_ROUNDS, BLK_ROUNDS // srwi 6,6,1 + LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 + ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 + SUB $1, BLK_ROUNDS, BLK_ROUNDS // subi 6,6,1 + VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 + + VXOR ZERO, IN0, ZERO // vxor 0,0,1 + LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5 + ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 + MOVD BLK_ROUNDS, CTR // mtctr 6 + +loop_dec: + VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5 + VNCIPHER ZERO, IN1, ZERO // vncipher 0,0,2 + LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 + ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 + VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 + VNCIPHER ZERO, IN0, ZERO // vncipher 0,0,1 + LVX (BLK_KEY)(BLK_IDX), IN0 // lvx 1,7,5 + ADD $16, BLK_IDX, BLK_IDX // addi 7,7,16 + BC 0x10, 0, loop_dec // bdnz .Loop_dec + + VPERM IN0, IN1, MASK, IN1 // vperm 2,1,2,5 + VNCIPHER ZERO, IN1, ZERO // vncipher 0,0,2 + LVX (BLK_KEY)(BLK_IDX), IN1 // lvx 2,7,5 + VPERM IN1, IN0, MASK, IN0 // vperm 1,2,1,5 + VNCIPHERLAST ZERO, IN0, ZERO // vncipherlast 0,0,1 + + VSPLTISB $-1, IN1 // vspltisb 2,-1 + VXOR IN0, IN0, IN0 // vxor 1,1,1 + MOVD $15, BLK_IDX // li 7,15 + VPERM IN1, IN0, KEY, IN1 // vperm 2,2,1,3 + VXOR KEY, RCON, KEY // vxor 3,3,4 + LVX (BLK_OUT)(R0), IN0 // lvx 1,0,4 + VPERM ZERO, ZERO, KEY, ZERO // vperm 0,0,0,3 + VSEL IN0, ZERO, IN1, IN0 // vsel 1,1,0,2 + LVX (BLK_OUT)(BLK_IDX), RCON // lvx 4,7,4 + STVX IN0, (BLK_OUT+R0) // stvx 1,0,4 + VSEL ZERO, RCON, IN1, ZERO // vsel 0,0,4,2 + STVX ZERO, (BLK_OUT+BLK_IDX) // stvx 0,7,4 + + RET // blr |