#! /usr/bin/env perl # SPDX-License-Identifier: GPL-2.0 # This code is taken from CRYPTOGAMs[1] and is included here using the option # in the license to distribute the code under the GPL. Therefore this program # is free software; you can redistribute it and/or modify it under the terms of # the GNU General Public License version 2 as published by the Free Software # Foundation. # # [1] https://www.openssl.org/~appro/cryptogams/ # Copyright (c) 2006-2017, CRYPTOGAMS by # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # * Redistributions of source code must retain copyright notices, # this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials # provided with the distribution. # # * Neither the name of the CRYPTOGAMS nor the names of its # copyright holder and contributors may be used to endorse or # promote products derived from this software without specific # prior written permission. # # ALTERNATIVELY, provided that this notice is retained in full, this # product may be distributed under the terms of the GNU General Public # License (GPL), in which case the provisions of the GPL apply INSTEAD OF # those given above. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ==================================================================== # Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see https://www.openssl.org/~appro/cryptogams/. # ==================================================================== # # This module implements support for AES instructions as per PowerISA # specification version 2.07, first implemented by POWER8 processor. # The module is endian-agnostic in sense that it supports both big- # and little-endian cases. Data alignment in parallelizable modes is # handled with VSX loads and stores, which implies MSR.VSX flag being # set. It should also be noted that ISA specification doesn't prohibit # alignment exceptions for these instructions on page boundaries. # Initially alignment was handled in pure AltiVec/VMX way [when data # is aligned programmatically, which in turn guarantees exception- # free execution], but it turned to hamper performance when vcipher # instructions are interleaved. It's reckoned that eventual # misalignment penalties at page boundaries are in average lower # than additional overhead in pure AltiVec approach. # # May 2016 # # Add XTS subroutine, 9x on little- and 12x improvement on big-endian # systems were measured. # ###################################################################### # Current large-block performance in cycles per byte processed with # 128-bit key (less is better). # # CBC en-/decrypt CTR XTS # POWER8[le] 3.96/0.72 0.74 1.1 # POWER8[be] 3.75/0.65 0.66 1.0 $flavour = shift; if ($flavour =~ /64/) { $SIZE_T =8; $LRSAVE =2*$SIZE_T; $STU ="stdu"; $POP ="ld"; $PUSH ="std"; $UCMP ="cmpld"; $SHL ="sldi"; } elsif ($flavour =~ /32/) { $SIZE_T =4; $LRSAVE =$SIZE_T; $STU ="stwu"; $POP ="lwz"; $PUSH ="stw"; $UCMP ="cmplw"; $SHL ="slwi"; } else { die "nonsense $flavour"; } $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or die "can't locate ppc-xlate.pl"; open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; $FRAME=8*$SIZE_T; $prefix="aes_p10"; $sp="r1"; $vrsave="r12"; ######################################################################### {{{ # Key setup procedures # my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8)); my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6)); my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11)); $code.=<<___; .machine "any" .text .align 7 rcon: .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev .long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev .long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev .long 0,0,0,0 ?asis Lconsts: mflr r0 bcl 20,31,\$+4 mflr $ptr #vvvvv "distance between . and rcon addi $ptr,$ptr,-0x48 mtlr r0 blr .long 0 .byte 0,12,0x14,0,0,0,0,0 .asciz "AES for PowerISA 2.07, CRYPTOGAMS by " .globl .${prefix}_set_encrypt_key Lset_encrypt_key: mflr r11 $PUSH r11,$LRSAVE($sp) li $ptr,-1 ${UCMP}i $inp,0 beq- Lenc_key_abort # if ($inp==0) return -1; ${UCMP}i $out,0 beq- Lenc_key_abort # if ($out==0) return -1; li $ptr,-2 cmpwi $bits,128 blt- Lenc_key_abort cmpwi $bits,256 bgt- Lenc_key_abort andi. r0,$bits,0x3f bne- Lenc_key_abort lis r0,0xfff0 mfspr $vrsave,256 mtspr 256,r0 bl Lconsts mtlr r11 neg r9,$inp lvx $in0,0,$inp addi $inp,$inp,15 # 15 is not typo lvsr $key,0,r9 # borrow $key li r8,0x20 cmpwi $bits,192 lvx $in1,0,$inp le?vspltisb $mask,0x0f # borrow $mask lvx $rcon,0,$ptr le?vxor $key,$key,$mask # adjust for byte swap lvx $mask,r8,$ptr addi $ptr,$ptr,0x10 vperm $in0,$in0,$in1,$key # align [and byte swap in LE] li $cnt,8 vxor $zero,$zero,$zero mtctr $cnt ?lvsr $outperm,0,$out vspltisb $outmask,-1 lvx $outhead,0,$out ?vperm $outmask,$zero,$outmask,$outperm blt Loop128 addi $inp,$inp,8 beq L192 addi $inp,$inp,8 b L256 .align 4 Loop128: vperm $key,$in0,$in0,$mask # rotate-n-splat vsldoi $tmp,$zero,$in0,12 # >>32 vperm $outtail,$in0,$in0,$outperm # rotate vsel $stage,$outhead,$outtail,$outmask vmr $outhead,$outtail vcipherlast $key,$key,$rcon stvx $stage,0,$out addi $out,$out,16 vxor $in0,$in0,$tmp vsldoi $tmp,$zero,$tmp,12 # >>32 vxor $in0,$in0,$tmp vsldoi $tmp,$zero,$tmp,12 # >>32 vxor $in0,$in0,$tmp vadduwm $rcon,$rcon,$rcon vxor $in0,$in0,$key bdnz Loop128 lvx $rcon,0,$ptr # last two round keys vperm $key,$in0,$in0,$mask # rotate-n-splat vsldoi $tmp,$zero,$in0,12 # >>32 vperm $outtail,$in0,$in0,$outperm # rotate vsel $stage,$outhead,$outtail,$outmask vmr $outhead,$outtail vcipherlast $key,$key,$rcon stvx $stage,0,$out addi $out,$out,16 vxor $in0,$in0,$tmp vsldoi $tmp,$zero,$tmp,12 # >>32 vxor $in0,$in0,$tmp vsldoi $tmp,$zero,$tmp,12 # >>32 vxor $in0,$in0,$tmp vadduwm $rcon,$rcon,$rcon vxor $in0,$in0,$key vperm $key,$in0,$in0,$mask # rotate-n-splat vsldoi $tmp,$zero,$in0,12 # >>32 vperm $outtail,$in0,$in0,$outperm # rotate vsel $stage,$outhead,$outtail,$outmask vmr $outhead,$outtail vcipherlast $key,$key,$rcon stvx $stage,0,$out addi $out,$out,16 vxor $in0,$in0,$tmp vsldoi $tmp,$zero,$tmp,12 # >>32 vxor $in0,$in0,$tmp vsldoi $tmp,$zero,$tmp,12 # >>32 vxor $in0,$in0,$tmp vxor $in0,$in0,$key vperm $outtail,$in0,$in0,$outperm # rotate vsel $stage,$outhead,$outtail,$outmask vmr $outhead,$outtail stvx $stage,0,$out addi $inp,$out,15 # 15 is not typo addi $out,$out,0x50 li $rounds,10 b Ldone .align 4 L192: lvx $tmp,0,$inp li $cnt,4 vperm $outtail,$in0,$in0,$outperm # rotate vsel $stage,$outhead,$outtail,$outmask vmr $outhead,$outtail stvx $stage,0,$out addi $out,$out,16 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] vspltisb $key,8 # borrow $key mtctr $cnt vsububm $mask,$mask,$key # adjust the mask Loop192: vperm $key,$in1,$in1,$mask # roate-n-splat vsldoi $tmp,$zero,$in0,12 # >>32 vcipherlast $key,$key,$rcon vxor $in0,$in0,$tmp vsldoi $tmp,$zero,$tmp,12 # >>32 vxor $in0,$in0,$tmp vsldoi $tmp,$zero,$tmp,12 # >>32 vxor $in0,$in0,$tmp vsldoi $stage,$zero,$in1,8 vspltw $tmp,$in0,3 vxor $tmp,$tmp,$in1 vsldoi $in1,$zero,$in1,12 # >>32 vadduwm $rcon,$rcon,$rcon vxor $in1,$in1,$tmp vxor $in0,$in0,$key vxor $in1,$in1,$key vsldoi $stage,$stage,$in0,8 vperm $key,$in1,$in1,$mask # rotate-n-splat vsldoi $tmp,$zero,$in0,12 # >>32 vperm $outtail,$stage,$stage,$outperm # rotate vsel $stage,$outhead,$outtail,$outmask vmr $outhead,$outtail vcipherlast $key,$key,$rcon stvx $stage,0,$out addi $out,$out,16 vsldoi $stage,$in0,$in1,8 vxor $in0,$in0,$tmp vsldoi $tmp,$zero,$tmp,12 # >>32 vperm $outtail,$stage,$stage,$outperm # rotate vsel $stage,$outhead,$outtail,$outmask vmr $outhead,$outtail vxor $in0,$in0,$tmp vsldoi $tmp,$zero,$tmp,12 # >>32 vxor $in0,$in0,$tmp stvx $stage,0,$out addi $out,$out,16 vspltw $tmp,$in0,3 vxor $tmp,$tmp,$in1 vsldoi $in1,$zero,$in1,12 # >>32 vadduwm $rcon,$rcon,$rcon vxor $in1,$in1,$tmp vxor $in0,$in0,$key vxor $in1,$in1,$key vperm $outtail,$in0,$in0,$outperm # rotate vsel $stage,$outhead,$outtail,$outmask vmr $outhead,$outtail stvx $stage,0,$out addi $inp,$out,15 # 15 is not typo addi $out,$out,16 bdnz Loop192 li $rounds,12 addi $out,$out,0x20 b Ldone .align 4 L256: lvx $tmp,0,$inp li $cnt,7 li $rounds,14 vperm $outtail,$in0,$in0,$outperm # rotate vsel $stage,$outhead,$outtail,$outmask vmr $outhead,$outtail stvx $stage,0,$out addi $out,$out,16 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] mtctr $cnt Loop256: vperm $key,$in1,$in1,$mask # rotate-n-splat vsldoi $tmp,$zero,$in0,12 # >>32 vperm $outtail,$in1,$in1,$outperm # rotate vsel $stage,$outhead,$outtail,$outmask vmr $outhead,$outtail vcipherlast $key,$key,$rcon stvx $stage,0,$out addi $out,$out,16 vxor $in0,$in0,$tmp vsldoi $tmp,$zero,$tmp,12 # >>32 vxor $in0,$in0,$tmp vsldoi $tmp,$zero,$tmp,12 # >>32 vxor $in0,$in0,$tmp vadduwm $rcon,$rcon,$rcon vxor $in0,$in0,$key vperm $outtail,$in0,$in0,$outperm # rotate vsel $stage,$outhead,$outtail,$outmask vmr $outhead,$outtail stvx $stage,0,$out addi $inp,$out,15 # 15 is not typo addi $out,$out,16 bdz Ldone vspltw $key,$in0,3 # just splat vsldoi $tmp,$zero,$in1,12 # >>32 vsbox $key,$key vxor $in1,$in1,$tmp vsldoi $tmp,$zero,$tmp,12 # >>32 vxor $in1,$in1,$tmp vsldoi $tmp,$zero,$tmp,12 # >>32 vxor $in1,$in1,$tmp vxor $in1,$in1,$key b Loop256 .align 4 Ldone: lvx $in1,0,$inp # redundant in aligned case vsel $in1,$outhead,$in1,$outmask stvx $in1,0,$inp li $ptr,0 mtspr 256,$vrsave stw $rounds,0($out) Lenc_key_abort: mr r3,$ptr blr .long 0 .byte 0,12,0x14,1,0,0,3,0 .long 0 .size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key .globl .${prefix}_set_decrypt_key $STU $sp,-$FRAME($sp) mflr r10 $PUSH r10,$FRAME+$LRSAVE($sp) bl Lset_encrypt_key mtlr r10 cmpwi r3,0 bne- Ldec_key_abort slwi $cnt,$rounds,4 subi $inp,$out,240 # first round key srwi $rounds,$rounds,1 add $out,$inp,$cnt # last round key mtctr $rounds Ldeckey: lwz r0, 0($inp) lwz r6, 4($inp) lwz r7, 8($inp) lwz r8, 12($inp) addi $inp,$inp,16 lwz r9, 0($out) lwz r10,4($out) lwz r11,8($out) lwz r12,12($out) stw r0, 0($out) stw r6, 4($out) stw r7, 8($out) stw r8, 12($out) subi $out,$out,16 stw r9, -16($inp) stw r10,-12($inp) stw r11,-8($inp) stw r12,-4($inp) bdnz Ldeckey xor r3,r3,r3 # return value Ldec_key_abort: addi $sp,$sp,$FRAME blr .long 0 .byte 0,12,4,1,0x80,0,3,0 .long 0 .size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key ___ }}} ######################################################################### {{{ # Single block en- and decrypt procedures # sub gen_block () { my $dir = shift; my $n = $dir eq "de" ? "n" : ""; my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); $code.=<<___; .globl .${prefix}_${dir}crypt lwz $rounds,240($key) lis r0,0xfc00 mfspr $vrsave,256 li $idx,15 # 15 is not typo mtspr 256,r0 lvx v0,0,$inp neg r11,$out lvx v1,$idx,$inp lvsl v2,0,$inp # inpperm le?vspltisb v4,0x0f ?lvsl v3,0,r11 # outperm le?vxor v2,v2,v4 li $idx,16 vperm v0,v0,v1,v2 # align [and byte swap in LE] lvx v1,0,$key ?lvsl v5,0,$key # keyperm srwi $rounds,$rounds,1 lvx v2,$idx,$key addi $idx,$idx,16 subi $rounds,$rounds,1 ?vperm v1,v1,v2,v5 # align round key vxor v0,v0,v1 lvx v1,$idx,$key addi $idx,$idx,16 mtctr $rounds Loop_${dir}c: ?vperm v2,v2,v1,v5 v${n}cipher v0,v0,v2 lvx v2,$idx,$key addi $idx,$idx,16 ?vperm v1,v1,v2,v5 v${n}cipher v0,v0,v1 lvx v1,$idx,$key addi $idx,$idx,16 bdnz Loop_${dir}c ?vperm v2,v2,v1,v5 v${n}cipher v0,v0,v2 lvx v2,$idx,$key ?vperm v1,v1,v2,v5 v${n}cipherlast v0,v0,v1 vspltisb v2,-1 vxor v1,v1,v1 li $idx,15 # 15 is not typo ?vperm v2,v1,v2,v3 # outmask le?vxor v3,v3,v4 lvx v1,0,$out # outhead vperm v0,v0,v0,v3 # rotate [and byte swap in LE] vsel v1,v1,v0,v2 lvx v4,$idx,$out stvx v1,0,$out vsel v0,v0,v4,v2 stvx v0,$idx,$out mtspr 256,$vrsave blr .long 0 .byte 0,12,0x14,0,0,0,3,0 .long 0 .size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt ___ } &gen_block("en"); &gen_block("de"); }}} my $consts=1; foreach(split("\n",$code)) { s/\`([^\`]*)\`/eval($1)/geo; # constants table endian-specific conversion if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) { my $conv=$3; my @bytes=(); # convert to endian-agnostic format if ($1 eq "long") { foreach (split(/,\s*/,$2)) { my $l = /^0/?oct:int; push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; } } else { @bytes = map(/^0/?oct:int,split(/,\s*/,$2)); } # little-endian conversion if ($flavour =~ /le$/o) { SWITCH: for($conv) { /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; /\?rev/ && do { @bytes=reverse(@bytes); last; }; } } #emit print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; next; } $consts=0 if (m/Lconsts:/o); # end of table # instructions prefixed with '?' are endian-specific and need # to be adjusted accordingly... if ($flavour =~ /le$/o) { # little-endian s/le\?//o or s/be\?/#be#/o or s/\?lvsr/lvsl/o or s/\?lvsl/lvsr/o or s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; } else { # big-endian s/le\?/#le#/o or s/be\?//o or s/\?([a-z]+)/$1/o; } print $_,"\n"; } close STDOUT;