/* SPDX-License-Identifier: GPL-2.0-or-later */ # # Accelerated AES-GCM stitched implementation for ppc64le. # # Copyright 2022- IBM Inc. All rights reserved # #=================================================================================== # Written by Danny Tsen # # GHASH is based on the Karatsuba multiplication method. # # Xi xor X1 # # X1 * H^4 + X2 * H^3 + x3 * H^2 + X4 * H = # (X1.h * H4.h + xX.l * H4.l + X1 * H4) + # (X2.h * H3.h + X2.l * H3.l + X2 * H3) + # (X3.h * H2.h + X3.l * H2.l + X3 * H2) + # (X4.h * H.h + X4.l * H.l + X4 * H) # # Xi = v0 # H Poly = v2 # Hash keys = v3 - v14 # ( H.l, H, H.h) # ( H^2.l, H^2, H^2.h) # ( H^3.l, H^3, H^3.h) # ( H^4.l, H^4, H^4.h) # # v30 is IV # v31 - counter 1 # # AES used, # vs0 - vs14 for round keys # v15, v16, v17, v18, v19, v20, v21, v22 for 8 blocks (encrypted) # # This implementation uses stitched AES-GCM approach to improve overall performance. # AES is implemented with 8x blocks and GHASH is using 2 4x blocks. # # =================================================================================== # #include #include .machine "any" .text # 4x loops # v15 - v18 - input states # vs1 - vs9 - round keys # .macro Loop_aes_middle4x xxlor 19+32, 1, 1 xxlor 20+32, 2, 2 xxlor 21+32, 3, 3 xxlor 22+32, 4, 4 vcipher 15, 15, 19 vcipher 16, 16, 19 vcipher 17, 17, 19 vcipher 18, 18, 19 vcipher 15, 15, 20 vcipher 16, 16, 20 vcipher 17, 17, 20 vcipher 18, 18, 20 vcipher 15, 15, 21 vcipher 16, 16, 21 vcipher 17, 17, 21 vcipher 18, 18, 21 vcipher 15, 15, 22 vcipher 16, 16, 22 vcipher 17, 17, 22 vcipher 18, 18, 22 xxlor 19+32, 5, 5 xxlor 20+32, 6, 6 xxlor 21+32, 7, 7 xxlor 22+32, 8, 8 vcipher 15, 15, 19 vcipher 16, 16, 19 vcipher 17, 17, 19 vcipher 18, 18, 19 vcipher 15, 15, 20 vcipher 16, 16, 20 vcipher 17, 17, 20 vcipher 18, 18, 20 vcipher 15, 15, 21 vcipher 16, 16, 21 vcipher 17, 17, 21 vcipher 18, 18, 21 vcipher 15, 15, 22 vcipher 16, 16, 22 vcipher 17, 17, 22 vcipher 18, 18, 22 xxlor 23+32, 9, 9 vcipher 15, 15, 23 vcipher 16, 16, 23 vcipher 17, 17, 23 vcipher 18, 18, 23 .endm # 8x loops # v15 - v22 - input states # vs1 - vs9 - round keys # .macro Loop_aes_middle8x xxlor 23+32, 1, 1 xxlor 24+32, 2, 2 xxlor 25+32, 3, 3 xxlor 26+32, 4, 4 vcipher 15, 15, 23 vcipher 16, 16, 23 vcipher 17, 17, 23 vcipher 18, 18, 23 vcipher 19, 19, 23 vcipher 20, 20, 23 vcipher 21, 21, 23 vcipher 22, 22, 23 vcipher 15, 15, 24 vcipher 16, 16, 24 vcipher 17, 17, 24 vcipher 18, 18, 24 vcipher 19, 19, 24 vcipher 20, 20, 24 vcipher 21, 21, 24 vcipher 22, 22, 24 vcipher 15, 15, 25 vcipher 16, 16, 25 vcipher 17, 17, 25 vcipher 18, 18, 25 vcipher 19, 19, 25 vcipher 20, 20, 25 vcipher 21, 21, 25 vcipher 22, 22, 25 vcipher 15, 15, 26 vcipher 16, 16, 26 vcipher 17, 17, 26 vcipher 18, 18, 26 vcipher 19, 19, 26 vcipher 20, 20, 26 vcipher 21, 21, 26 vcipher 22, 22, 26 xxlor 23+32, 5, 5 xxlor 24+32, 6, 6 xxlor 25+32, 7, 7 xxlor 26+32, 8, 8 vcipher 15, 15, 23 vcipher 16, 16, 23 vcipher 17, 17, 23 vcipher 18, 18, 23 vcipher 19, 19, 23 vcipher 20, 20, 23 vcipher 21, 21, 23 vcipher 22, 22, 23 vcipher 15, 15, 24 vcipher 16, 16, 24 vcipher 17, 17, 24 vcipher 18, 18, 24 vcipher 19, 19, 24 vcipher 20, 20, 24 vcipher 21, 21, 24 vcipher 22, 22, 24 vcipher 15, 15, 25 vcipher 16, 16, 25 vcipher 17, 17, 25 vcipher 18, 18, 25 vcipher 19, 19, 25 vcipher 20, 20, 25 vcipher 21, 21, 25 vcipher 22, 22, 25 vcipher 15, 15, 26 vcipher 16, 16, 26 vcipher 17, 17, 26 vcipher 18, 18, 26 vcipher 19, 19, 26 vcipher 20, 20, 26 vcipher 21, 21, 26 vcipher 22, 22, 26 xxlor 23+32, 9, 9 vcipher 15, 15, 23 vcipher 16, 16, 23 vcipher 17, 17, 23 vcipher 18, 18, 23 vcipher 19, 19, 23 vcipher 20, 20, 23 vcipher 21, 21, 23 vcipher 22, 22, 23 .endm .macro Loop_aes_middle_1x xxlor 19+32, 1, 1 xxlor 20+32, 2, 2 xxlor 21+32, 3, 3 xxlor 22+32, 4, 4 vcipher 15, 15, 19 vcipher 15, 15, 20 vcipher 15, 15, 21 vcipher 15, 15, 22 xxlor 19+32, 5, 5 xxlor 20+32, 6, 6 xxlor 21+32, 7, 7 xxlor 22+32, 8, 8 vcipher 15, 15, 19 vcipher 15, 15, 20 vcipher 15, 15, 21 vcipher 15, 15, 22 xxlor 19+32, 9, 9 vcipher 15, 15, 19 .endm # # Compute 4x hash values based on Karatsuba method. # .macro ppc_aes_gcm_ghash vxor 15, 15, 0 vpmsumd 23, 12, 15 # H4.L * X.L vpmsumd 24, 9, 16 vpmsumd 25, 6, 17 vpmsumd 26, 3, 18 vxor 23, 23, 24 vxor 23, 23, 25 vxor 23, 23, 26 # L vpmsumd 24, 13, 15 # H4.L * X.H + H4.H * X.L vpmsumd 25, 10, 16 # H3.L * X1.H + H3.H * X1.L vpmsumd 26, 7, 17 vpmsumd 27, 4, 18 vxor 24, 24, 25 vxor 24, 24, 26 vxor 24, 24, 27 # M # sum hash and reduction with H Poly vpmsumd 28, 23, 2 # reduction vxor 29, 29, 29 vsldoi 26, 24, 29, 8 # mL vsldoi 29, 29, 24, 8 # mH vxor 23, 23, 26 # mL + L vsldoi 23, 23, 23, 8 # swap vxor 23, 23, 28 vpmsumd 24, 14, 15 # H4.H * X.H vpmsumd 25, 11, 16 vpmsumd 26, 8, 17 vpmsumd 27, 5, 18 vxor 24, 24, 25 vxor 24, 24, 26 vxor 24, 24, 27 vxor 24, 24, 29 # sum hash and reduction with H Poly vsldoi 27, 23, 23, 8 # swap vpmsumd 23, 23, 2 vxor 27, 27, 24 vxor 23, 23, 27 xxlor 32, 23+32, 23+32 # update hash .endm # # Combine two 4x ghash # v15 - v22 - input blocks # .macro ppc_aes_gcm_ghash2_4x # first 4x hash vxor 15, 15, 0 # Xi + X vpmsumd 23, 12, 15 # H4.L * X.L vpmsumd 24, 9, 16 vpmsumd 25, 6, 17 vpmsumd 26, 3, 18 vxor 23, 23, 24 vxor 23, 23, 25 vxor 23, 23, 26 # L vpmsumd 24, 13, 15 # H4.L * X.H + H4.H * X.L vpmsumd 25, 10, 16 # H3.L * X1.H + H3.H * X1.L vpmsumd 26, 7, 17 vpmsumd 27, 4, 18 vxor 24, 24, 25 vxor 24, 24, 26 # sum hash and reduction with H Poly vpmsumd 28, 23, 2 # reduction vxor 29, 29, 29 vxor 24, 24, 27 # M vsldoi 26, 24, 29, 8 # mL vsldoi 29, 29, 24, 8 # mH vxor 23, 23, 26 # mL + L vsldoi 23, 23, 23, 8 # swap vxor 23, 23, 28 vpmsumd 24, 14, 15 # H4.H * X.H vpmsumd 25, 11, 16 vpmsumd 26, 8, 17 vpmsumd 27, 5, 18 vxor 24, 24, 25 vxor 24, 24, 26 vxor 24, 24, 27 # H vxor 24, 24, 29 # H + mH # sum hash and reduction with H Poly vsldoi 27, 23, 23, 8 # swap vpmsumd 23, 23, 2 vxor 27, 27, 24 vxor 27, 23, 27 # 1st Xi # 2nd 4x hash vpmsumd 24, 9, 20 vpmsumd 25, 6, 21 vpmsumd 26, 3, 22 vxor 19, 19, 27 # Xi + X vpmsumd 23, 12, 19 # H4.L * X.L vxor 23, 23, 24 vxor 23, 23, 25 vxor 23, 23, 26 # L vpmsumd 24, 13, 19 # H4.L * X.H + H4.H * X.L vpmsumd 25, 10, 20 # H3.L * X1.H + H3.H * X1.L vpmsumd 26, 7, 21 vpmsumd 27, 4, 22 vxor 24, 24, 25 vxor 24, 24, 26 # sum hash and reduction with H Poly vpmsumd 28, 23, 2 # reduction vxor 29, 29, 29 vxor 24, 24, 27 # M vsldoi 26, 24, 29, 8 # mL vsldoi 29, 29, 24, 8 # mH vxor 23, 23, 26 # mL + L vsldoi 23, 23, 23, 8 # swap vxor 23, 23, 28 vpmsumd 24, 14, 19 # H4.H * X.H vpmsumd 25, 11, 20 vpmsumd 26, 8, 21 vpmsumd 27, 5, 22 vxor 24, 24, 25 vxor 24, 24, 26 vxor 24, 24, 27 # H vxor 24, 24, 29 # H + mH # sum hash and reduction with H Poly vsldoi 27, 23, 23, 8 # swap vpmsumd 23, 23, 2 vxor 27, 27, 24 vxor 23, 23, 27 xxlor 32, 23+32, 23+32 # update hash .endm # # Compute update single hash # .macro ppc_update_hash_1x vxor 28, 28, 0 vxor 19, 19, 19 vpmsumd 22, 3, 28 # L vpmsumd 23, 4, 28 # M vpmsumd 24, 5, 28 # H vpmsumd 27, 22, 2 # reduction vsldoi 25, 23, 19, 8 # mL vsldoi 26, 19, 23, 8 # mH vxor 22, 22, 25 # LL + LL vxor 24, 24, 26 # HH + HH vsldoi 22, 22, 22, 8 # swap vxor 22, 22, 27 vsldoi 20, 22, 22, 8 # swap vpmsumd 22, 22, 2 # reduction vxor 20, 20, 24 vxor 22, 22, 20 vmr 0, 22 # update hash .endm .macro SAVE_REGS stdu 1,-640(1) mflr 0 std 14,112(1) std 15,120(1) std 16,128(1) std 17,136(1) std 18,144(1) std 19,152(1) std 20,160(1) std 21,168(1) li 9, 256 stvx 20, 9, 1 addi 9, 9, 16 stvx 21, 9, 1 addi 9, 9, 16 stvx 22, 9, 1 addi 9, 9, 16 stvx 23, 9, 1 addi 9, 9, 16 stvx 24, 9, 1 addi 9, 9, 16 stvx 25, 9, 1 addi 9, 9, 16 stvx 26, 9, 1 addi 9, 9, 16 stvx 27, 9, 1 addi 9, 9, 16 stvx 28, 9, 1 addi 9, 9, 16 stvx 29, 9, 1 addi 9, 9, 16 stvx 30, 9, 1 addi 9, 9, 16 stvx 31, 9, 1 stxv 14, 464(1) stxv 15, 480(1) stxv 16, 496(1) stxv 17, 512(1) stxv 18, 528(1) stxv 19, 544(1) stxv 20, 560(1) stxv 21, 576(1) stxv 22, 592(1) std 0, 656(1) .endm .macro RESTORE_REGS lxv 14, 464(1) lxv 15, 480(1) lxv 16, 496(1) lxv 17, 512(1) lxv 18, 528(1) lxv 19, 544(1) lxv 20, 560(1) lxv 21, 576(1) lxv 22, 592(1) li 9, 256 lvx 20, 9, 1 addi 9, 9, 16 lvx 21, 9, 1 addi 9, 9, 16 lvx 22, 9, 1 addi 9, 9, 16 lvx 23, 9, 1 addi 9, 9, 16 lvx 24, 9, 1 addi 9, 9, 16 lvx 25, 9, 1 addi 9, 9, 16 lvx 26, 9, 1 addi 9, 9, 16 lvx 27, 9, 1 addi 9, 9, 16 lvx 28, 9, 1 addi 9, 9, 16 lvx 29, 9, 1 addi 9, 9, 16 lvx 30, 9, 1 addi 9, 9, 16 lvx 31, 9, 1 ld 0, 656(1) ld 14,112(1) ld 15,120(1) ld 16,128(1) ld 17,136(1) ld 18,144(1) ld 19,152(1) ld 20,160(1) ld 21,168(1) mtlr 0 addi 1, 1, 640 .endm .macro LOAD_HASH_TABLE # Load Xi lxvb16x 32, 0, 8 # load Xi # load Hash - h^4, h^3, h^2, h li 10, 32 lxvd2x 2+32, 10, 8 # H Poli li 10, 48 lxvd2x 3+32, 10, 8 # Hl li 10, 64 lxvd2x 4+32, 10, 8 # H li 10, 80 lxvd2x 5+32, 10, 8 # Hh li 10, 96 lxvd2x 6+32, 10, 8 # H^2l li 10, 112 lxvd2x 7+32, 10, 8 # H^2 li 10, 128 lxvd2x 8+32, 10, 8 # H^2h li 10, 144 lxvd2x 9+32, 10, 8 # H^3l li 10, 160 lxvd2x 10+32, 10, 8 # H^3 li 10, 176 lxvd2x 11+32, 10, 8 # H^3h li 10, 192 lxvd2x 12+32, 10, 8 # H^4l li 10, 208 lxvd2x 13+32, 10, 8 # H^4 li 10, 224 lxvd2x 14+32, 10, 8 # H^4h .endm # # aes_p10_gcm_encrypt (const void *inp, void *out, size_t len, # const char *rk, unsigned char iv[16], void *Xip); # # r3 - inp # r4 - out # r5 - len # r6 - AES round keys # r7 - iv and other data # r8 - Xi, HPoli, hash keys # # rounds is at offset 240 in rk # Xi is at 0 in gcm_table (Xip). # _GLOBAL(aes_p10_gcm_encrypt) .align 5 SAVE_REGS LOAD_HASH_TABLE # initialize ICB: GHASH( IV ), IV - r7 lxvb16x 30+32, 0, 7 # load IV - v30 mr 12, 5 # length li 11, 0 # block index # counter 1 vxor 31, 31, 31 vspltisb 22, 1 vsldoi 31, 31, 22,1 # counter 1 # load round key to VSR lxv 0, 0(6) lxv 1, 0x10(6) lxv 2, 0x20(6) lxv 3, 0x30(6) lxv 4, 0x40(6) lxv 5, 0x50(6) lxv 6, 0x60(6) lxv 7, 0x70(6) lxv 8, 0x80(6) lxv 9, 0x90(6) lxv 10, 0xa0(6) # load rounds - 10 (128), 12 (192), 14 (256) lwz 9,240(6) # # vxor state, state, w # addroundkey xxlor 32+29, 0, 0 vxor 15, 30, 29 # IV + round key - add round key 0 cmpdi 9, 10 beq Loop_aes_gcm_8x # load 2 more round keys (v11, v12) lxv 11, 0xb0(6) lxv 12, 0xc0(6) cmpdi 9, 12 beq Loop_aes_gcm_8x # load 2 more round keys (v11, v12, v13, v14) lxv 13, 0xd0(6) lxv 14, 0xe0(6) cmpdi 9, 14 beq Loop_aes_gcm_8x b aes_gcm_out .align 5 Loop_aes_gcm_8x: mr 14, 3 mr 9, 4 # # check partial block # Continue_partial_check: ld 15, 56(7) cmpdi 15, 0 beq Continue bgt Final_block cmpdi 15, 16 blt Final_block Continue: # n blcoks li 10, 128 divdu 10, 12, 10 # n 128 bytes-blocks cmpdi 10, 0 beq Loop_last_block vaddudm 30, 30, 31 # IV + counter vxor 16, 30, 29 vaddudm 30, 30, 31 vxor 17, 30, 29 vaddudm 30, 30, 31 vxor 18, 30, 29 vaddudm 30, 30, 31 vxor 19, 30, 29 vaddudm 30, 30, 31 vxor 20, 30, 29 vaddudm 30, 30, 31 vxor 21, 30, 29 vaddudm 30, 30, 31 vxor 22, 30, 29 mtctr 10 li 15, 16 li 16, 32 li 17, 48 li 18, 64 li 19, 80 li 20, 96 li 21, 112 lwz 10, 240(6) Loop_8x_block: lxvb16x 15, 0, 14 # load block lxvb16x 16, 15, 14 # load block lxvb16x 17, 16, 14 # load block lxvb16x 18, 17, 14 # load block lxvb16x 19, 18, 14 # load block lxvb16x 20, 19, 14 # load block lxvb16x 21, 20, 14 # load block lxvb16x 22, 21, 14 # load block addi 14, 14, 128 Loop_aes_middle8x xxlor 23+32, 10, 10 cmpdi 10, 10 beq Do_next_ghash # 192 bits xxlor 24+32, 11, 11 vcipher 15, 15, 23 vcipher 16, 16, 23 vcipher 17, 17, 23 vcipher 18, 18, 23 vcipher 19, 19, 23 vcipher 20, 20, 23 vcipher 21, 21, 23 vcipher 22, 22, 23 vcipher 15, 15, 24 vcipher 16, 16, 24 vcipher 17, 17, 24 vcipher 18, 18, 24 vcipher 19, 19, 24 vcipher 20, 20, 24 vcipher 21, 21, 24 vcipher 22, 22, 24 xxlor 23+32, 12, 12 cmpdi 10, 12 beq Do_next_ghash # 256 bits xxlor 24+32, 13, 13 vcipher 15, 15, 23 vcipher 16, 16, 23 vcipher 17, 17, 23 vcipher 18, 18, 23 vcipher 19, 19, 23 vcipher 20, 20, 23 vcipher 21, 21, 23 vcipher 22, 22, 23 vcipher 15, 15, 24 vcipher 16, 16, 24 vcipher 17, 17, 24 vcipher 18, 18, 24 vcipher 19, 19, 24 vcipher 20, 20, 24 vcipher 21, 21, 24 vcipher 22, 22, 24 xxlor 23+32, 14, 14 cmpdi 10, 14 beq Do_next_ghash b aes_gcm_out Do_next_ghash: # # last round vcipherlast 15, 15, 23 vcipherlast 16, 16, 23 xxlxor 47, 47, 15 stxvb16x 47, 0, 9 # store output xxlxor 48, 48, 16 stxvb16x 48, 15, 9 # store output vcipherlast 17, 17, 23 vcipherlast 18, 18, 23 xxlxor 49, 49, 17 stxvb16x 49, 16, 9 # store output xxlxor 50, 50, 18 stxvb16x 50, 17, 9 # store output vcipherlast 19, 19, 23 vcipherlast 20, 20, 23 xxlxor 51, 51, 19 stxvb16x 51, 18, 9 # store output xxlxor 52, 52, 20 stxvb16x 52, 19, 9 # store output vcipherlast 21, 21, 23 vcipherlast 22, 22, 23 xxlxor 53, 53, 21 stxvb16x 53, 20, 9 # store output xxlxor 54, 54, 22 stxvb16x 54, 21, 9 # store output addi 9, 9, 128 # ghash here ppc_aes_gcm_ghash2_4x xxlor 27+32, 0, 0 vaddudm 30, 30, 31 # IV + counter vmr 29, 30 vxor 15, 30, 27 # add round key vaddudm 30, 30, 31 vxor 16, 30, 27 vaddudm 30, 30, 31 vxor 17, 30, 27 vaddudm 30, 30, 31 vxor 18, 30, 27 vaddudm 30, 30, 31 vxor 19, 30, 27 vaddudm 30, 30, 31 vxor 20, 30, 27 vaddudm 30, 30, 31 vxor 21, 30, 27 vaddudm 30, 30, 31 vxor 22, 30, 27 addi 12, 12, -128 addi 11, 11, 128 bdnz Loop_8x_block vmr 30, 29 stxvb16x 30+32, 0, 7 # update IV Loop_last_block: cmpdi 12, 0 beq aes_gcm_out # loop last few blocks li 10, 16 divdu 10, 12, 10 mtctr 10 lwz 10, 240(6) cmpdi 12, 16 blt Final_block Next_rem_block: lxvb16x 15, 0, 14 # load block Loop_aes_middle_1x xxlor 23+32, 10, 10 cmpdi 10, 10 beq Do_next_1x # 192 bits xxlor 24+32, 11, 11 vcipher 15, 15, 23 vcipher 15, 15, 24 xxlor 23+32, 12, 12 cmpdi 10, 12 beq Do_next_1x # 256 bits xxlor 24+32, 13, 13 vcipher 15, 15, 23 vcipher 15, 15, 24 xxlor 23+32, 14, 14 cmpdi 10, 14 beq Do_next_1x Do_next_1x: vcipherlast 15, 15, 23 xxlxor 47, 47, 15 stxvb16x 47, 0, 9 # store output addi 14, 14, 16 addi 9, 9, 16 vmr 28, 15 ppc_update_hash_1x addi 12, 12, -16 addi 11, 11, 16 xxlor 19+32, 0, 0 vaddudm 30, 30, 31 # IV + counter vxor 15, 30, 19 # add round key bdnz Next_rem_block li 15, 0 std 15, 56(7) # clear partial? stxvb16x 30+32, 0, 7 # update IV cmpdi 12, 0 beq aes_gcm_out Final_block: lwz 10, 240(6) Loop_aes_middle_1x xxlor 23+32, 10, 10 cmpdi 10, 10 beq Do_final_1x # 192 bits xxlor 24+32, 11, 11 vcipher 15, 15, 23 vcipher 15, 15, 24 xxlor 23+32, 12, 12 cmpdi 10, 12 beq Do_final_1x # 256 bits xxlor 24+32, 13, 13 vcipher 15, 15, 23 vcipher 15, 15, 24 xxlor 23+32, 14, 14 cmpdi 10, 14 beq Do_final_1x Do_final_1x: vcipherlast 15, 15, 23 # check partial block li 21, 0 # encrypt ld 15, 56(7) # partial? cmpdi 15, 0 beq Normal_block bl Do_partial_block cmpdi 12, 0 ble aes_gcm_out b Continue_partial_check Normal_block: lxvb16x 15, 0, 14 # load last block xxlxor 47, 47, 15 # create partial block mask li 15, 16 sub 15, 15, 12 # index to the mask vspltisb 16, -1 # first 16 bytes - 0xffff...ff vspltisb 17, 0 # second 16 bytes - 0x0000...00 li 10, 192 stvx 16, 10, 1 addi 10, 10, 16 stvx 17, 10, 1 addi 10, 1, 192 lxvb16x 16, 15, 10 # load partial block mask xxland 47, 47, 16 vmr 28, 15 ppc_update_hash_1x # * should store only the remaining bytes. bl Write_partial_block stxvb16x 30+32, 0, 7 # update IV std 12, 56(7) # update partial? li 16, 16 stxvb16x 32, 0, 8 # write out Xi stxvb16x 32, 16, 8 # write out Xi b aes_gcm_out # # Compute data mask # .macro GEN_MASK _mask _start _end vspltisb 16, -1 # first 16 bytes - 0xffff...ff vspltisb 17, 0 # second 16 bytes - 0x0000...00 li 10, 192 stxvb16x 17+32, 10, 1 add 10, 10, \_start stxvb16x 16+32, 10, 1 add 10, 10, \_end stxvb16x 17+32, 10, 1 addi 10, 1, 192 lxvb16x \_mask, 0, 10 # load partial block mask .endm # # Handle multiple partial blocks for encrypt and decrypt # operations. # SYM_FUNC_START_LOCAL(Do_partial_block) add 17, 15, 5 cmpdi 17, 16 bgt Big_block GEN_MASK 18, 15, 5 b _Partial SYM_FUNC_END(Do_partial_block) Big_block: li 16, 16 GEN_MASK 18, 15, 16 _Partial: lxvb16x 17+32, 0, 14 # load last block sldi 16, 15, 3 mtvsrdd 32+16, 0, 16 vsro 17, 17, 16 xxlxor 47, 47, 17+32 xxland 47, 47, 18 vxor 0, 0, 0 # clear Xi vmr 28, 15 cmpdi 21, 0 # encrypt/decrypt ops? beq Skip_decrypt xxland 32+28, 32+17, 18 Skip_decrypt: ppc_update_hash_1x li 16, 16 lxvb16x 32+29, 16, 8 vxor 0, 0, 29 stxvb16x 32, 0, 8 # save Xi stxvb16x 32, 16, 8 # save Xi # store partial block # loop the rest of the stream if any sldi 16, 15, 3 mtvsrdd 32+16, 0, 16 vslo 15, 15, 16 #stxvb16x 15+32, 0, 9 # last block li 16, 16 sub 17, 16, 15 # 16 - partial add 16, 15, 5 cmpdi 16, 16 bgt Larger_16 mr 17, 5 Larger_16: # write partial li 10, 192 stxvb16x 15+32, 10, 1 # save current block addi 10, 9, -1 addi 16, 1, 191 mtctr 17 # move partial byte count Write_last_partial: lbzu 18, 1(16) stbu 18, 1(10) bdnz Write_last_partial # Complete loop partial add 14, 14, 17 add 9, 9, 17 sub 12, 12, 17 add 11, 11, 17 add 15, 15, 5 cmpdi 15, 16 blt Save_partial vaddudm 30, 30, 31 stxvb16x 30+32, 0, 7 # update IV xxlor 32+29, 0, 0 vxor 15, 30, 29 # IV + round key - add round key 0 li 15, 0 std 15, 56(7) # partial done - clear b Partial_done Save_partial: std 15, 56(7) # partial Partial_done: blr # # Write partial block # r9 - output # r12 - remaining bytes # v15 - partial input data # SYM_FUNC_START_LOCAL(Write_partial_block) li 10, 192 stxvb16x 15+32, 10, 1 # last block addi 10, 9, -1 addi 16, 1, 191 mtctr 12 # remaining bytes li 15, 0 Write_last_byte: lbzu 14, 1(16) stbu 14, 1(10) bdnz Write_last_byte blr SYM_FUNC_END(Write_partial_block) aes_gcm_out: # out = state stxvb16x 32, 0, 8 # write out Xi add 3, 11, 12 # return count RESTORE_REGS blr # # 8x Decrypt # _GLOBAL(aes_p10_gcm_decrypt) .align 5 SAVE_REGS LOAD_HASH_TABLE # initialize ICB: GHASH( IV ), IV - r7 lxvb16x 30+32, 0, 7 # load IV - v30 mr 12, 5 # length li 11, 0 # block index # counter 1 vxor 31, 31, 31 vspltisb 22, 1 vsldoi 31, 31, 22,1 # counter 1 # load round key to VSR lxv 0, 0(6) lxv 1, 0x10(6) lxv 2, 0x20(6) lxv 3, 0x30(6) lxv 4, 0x40(6) lxv 5, 0x50(6) lxv 6, 0x60(6) lxv 7, 0x70(6) lxv 8, 0x80(6) lxv 9, 0x90(6) lxv 10, 0xa0(6) # load rounds - 10 (128), 12 (192), 14 (256) lwz 9,240(6) # # vxor state, state, w # addroundkey xxlor 32+29, 0, 0 vxor 15, 30, 29 # IV + round key - add round key 0 cmpdi 9, 10 beq Loop_aes_gcm_8x_dec # load 2 more round keys (v11, v12) lxv 11, 0xb0(6) lxv 12, 0xc0(6) cmpdi 9, 12 beq Loop_aes_gcm_8x_dec # load 2 more round keys (v11, v12, v13, v14) lxv 13, 0xd0(6) lxv 14, 0xe0(6) cmpdi 9, 14 beq Loop_aes_gcm_8x_dec b aes_gcm_out .align 5 Loop_aes_gcm_8x_dec: mr 14, 3 mr 9, 4 # # check partial block # Continue_partial_check_dec: ld 15, 56(7) cmpdi 15, 0 beq Continue_dec bgt Final_block_dec cmpdi 15, 16 blt Final_block_dec Continue_dec: # n blcoks li 10, 128 divdu 10, 12, 10 # n 128 bytes-blocks cmpdi 10, 0 beq Loop_last_block_dec vaddudm 30, 30, 31 # IV + counter vxor 16, 30, 29 vaddudm 30, 30, 31 vxor 17, 30, 29 vaddudm 30, 30, 31 vxor 18, 30, 29 vaddudm 30, 30, 31 vxor 19, 30, 29 vaddudm 30, 30, 31 vxor 20, 30, 29 vaddudm 30, 30, 31 vxor 21, 30, 29 vaddudm 30, 30, 31 vxor 22, 30, 29 mtctr 10 li 15, 16 li 16, 32 li 17, 48 li 18, 64 li 19, 80 li 20, 96 li 21, 112 lwz 10, 240(6) Loop_8x_block_dec: lxvb16x 15, 0, 14 # load block lxvb16x 16, 15, 14 # load block lxvb16x 17, 16, 14 # load block lxvb16x 18, 17, 14 # load block lxvb16x 19, 18, 14 # load block lxvb16x 20, 19, 14 # load block lxvb16x 21, 20, 14 # load block lxvb16x 22, 21, 14 # load block addi 14, 14, 128 Loop_aes_middle8x xxlor 23+32, 10, 10 cmpdi 10, 10 beq Do_next_ghash_dec # 192 bits xxlor 24+32, 11, 11 vcipher 15, 15, 23 vcipher 16, 16, 23 vcipher 17, 17, 23 vcipher 18, 18, 23 vcipher 19, 19, 23 vcipher 20, 20, 23 vcipher 21, 21, 23 vcipher 22, 22, 23 vcipher 15, 15, 24 vcipher 16, 16, 24 vcipher 17, 17, 24 vcipher 18, 18, 24 vcipher 19, 19, 24 vcipher 20, 20, 24 vcipher 21, 21, 24 vcipher 22, 22, 24 xxlor 23+32, 12, 12 cmpdi 10, 12 beq Do_next_ghash_dec # 256 bits xxlor 24+32, 13, 13 vcipher 15, 15, 23 vcipher 16, 16, 23 vcipher 17, 17, 23 vcipher 18, 18, 23 vcipher 19, 19, 23 vcipher 20, 20, 23 vcipher 21, 21, 23 vcipher 22, 22, 23 vcipher 15, 15, 24 vcipher 16, 16, 24 vcipher 17, 17, 24 vcipher 18, 18, 24 vcipher 19, 19, 24 vcipher 20, 20, 24 vcipher 21, 21, 24 vcipher 22, 22, 24 xxlor 23+32, 14, 14 cmpdi 10, 14 beq Do_next_ghash_dec b aes_gcm_out Do_next_ghash_dec: # # last round vcipherlast 15, 15, 23 vcipherlast 16, 16, 23 xxlxor 47, 47, 15 stxvb16x 47, 0, 9 # store output xxlxor 48, 48, 16 stxvb16x 48, 15, 9 # store output vcipherlast 17, 17, 23 vcipherlast 18, 18, 23 xxlxor 49, 49, 17 stxvb16x 49, 16, 9 # store output xxlxor 50, 50, 18 stxvb16x 50, 17, 9 # store output vcipherlast 19, 19, 23 vcipherlast 20, 20, 23 xxlxor 51, 51, 19 stxvb16x 51, 18, 9 # store output xxlxor 52, 52, 20 stxvb16x 52, 19, 9 # store output vcipherlast 21, 21, 23 vcipherlast 22, 22, 23 xxlxor 53, 53, 21 stxvb16x 53, 20, 9 # store output xxlxor 54, 54, 22 stxvb16x 54, 21, 9 # store output addi 9, 9, 128 xxlor 15+32, 15, 15 xxlor 16+32, 16, 16 xxlor 17+32, 17, 17 xxlor 18+32, 18, 18 xxlor 19+32, 19, 19 xxlor 20+32, 20, 20 xxlor 21+32, 21, 21 xxlor 22+32, 22, 22 # ghash here ppc_aes_gcm_ghash2_4x xxlor 27+32, 0, 0 vaddudm 30, 30, 31 # IV + counter vmr 29, 30 vxor 15, 30, 27 # add round key vaddudm 30, 30, 31 vxor 16, 30, 27 vaddudm 30, 30, 31 vxor 17, 30, 27 vaddudm 30, 30, 31 vxor 18, 30, 27 vaddudm 30, 30, 31 vxor 19, 30, 27 vaddudm 30, 30, 31 vxor 20, 30, 27 vaddudm 30, 30, 31 vxor 21, 30, 27 vaddudm 30, 30, 31 vxor 22, 30, 27 addi 12, 12, -128 addi 11, 11, 128 bdnz Loop_8x_block_dec vmr 30, 29 stxvb16x 30+32, 0, 7 # update IV Loop_last_block_dec: cmpdi 12, 0 beq aes_gcm_out # loop last few blocks li 10, 16 divdu 10, 12, 10 mtctr 10 lwz 10, 240(6) cmpdi 12, 16 blt Final_block_dec Next_rem_block_dec: lxvb16x 15, 0, 14 # load block Loop_aes_middle_1x xxlor 23+32, 10, 10 cmpdi 10, 10 beq Do_next_1x_dec # 192 bits xxlor 24+32, 11, 11 vcipher 15, 15, 23 vcipher 15, 15, 24 xxlor 23+32, 12, 12 cmpdi 10, 12 beq Do_next_1x_dec # 256 bits xxlor 24+32, 13, 13 vcipher 15, 15, 23 vcipher 15, 15, 24 xxlor 23+32, 14, 14 cmpdi 10, 14 beq Do_next_1x_dec Do_next_1x_dec: vcipherlast 15, 15, 23 xxlxor 47, 47, 15 stxvb16x 47, 0, 9 # store output addi 14, 14, 16 addi 9, 9, 16 xxlor 28+32, 15, 15 #vmr 28, 15 ppc_update_hash_1x addi 12, 12, -16 addi 11, 11, 16 xxlor 19+32, 0, 0 vaddudm 30, 30, 31 # IV + counter vxor 15, 30, 19 # add round key bdnz Next_rem_block_dec li 15, 0 std 15, 56(7) # clear partial? stxvb16x 30+32, 0, 7 # update IV cmpdi 12, 0 beq aes_gcm_out Final_block_dec: lwz 10, 240(6) Loop_aes_middle_1x xxlor 23+32, 10, 10 cmpdi 10, 10 beq Do_final_1x_dec # 192 bits xxlor 24+32, 11, 11 vcipher 15, 15, 23 vcipher 15, 15, 24 xxlor 23+32, 12, 12 cmpdi 10, 12 beq Do_final_1x_dec # 256 bits xxlor 24+32, 13, 13 vcipher 15, 15, 23 vcipher 15, 15, 24 xxlor 23+32, 14, 14 cmpdi 10, 14 beq Do_final_1x_dec Do_final_1x_dec: vcipherlast 15, 15, 23 # check partial block li 21, 1 # decrypt ld 15, 56(7) # partial? cmpdi 15, 0 beq Normal_block_dec bl Do_partial_block cmpdi 12, 0 ble aes_gcm_out b Continue_partial_check_dec Normal_block_dec: lxvb16x 15, 0, 14 # load last block xxlxor 47, 47, 15 # create partial block mask li 15, 16 sub 15, 15, 12 # index to the mask vspltisb 16, -1 # first 16 bytes - 0xffff...ff vspltisb 17, 0 # second 16 bytes - 0x0000...00 li 10, 192 stvx 16, 10, 1 addi 10, 10, 16 stvx 17, 10, 1 addi 10, 1, 192 lxvb16x 16, 15, 10 # load partial block mask xxland 47, 47, 16 xxland 32+28, 15, 16 #vmr 28, 15 ppc_update_hash_1x # * should store only the remaining bytes. bl Write_partial_block stxvb16x 30+32, 0, 7 # update IV std 12, 56(7) # update partial? li 16, 16 stxvb16x 32, 0, 8 # write out Xi stxvb16x 32, 16, 8 # write out Xi b aes_gcm_out