diff options
Diffstat (limited to 'lib/accelerated/x86/coff/aesni-x86.s')
-rw-r--r-- | lib/accelerated/x86/coff/aesni-x86.s | 3255 |
1 files changed, 3255 insertions, 0 deletions
diff --git a/lib/accelerated/x86/coff/aesni-x86.s b/lib/accelerated/x86/coff/aesni-x86.s new file mode 100644 index 0000000..92e5431 --- /dev/null +++ b/lib/accelerated/x86/coff/aesni-x86.s @@ -0,0 +1,3255 @@ +# Copyright (c) 2011-2016, Andy Polyakov <appro@openssl.org> +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain copyright notices, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# * Neither the name of the Andy Polyakov nor the names of its +# copyright holder and contributors may be used to endorse or +# promote products derived from this software without specific +# prior written permission. +# +# ALTERNATIVELY, provided that this notice is retained in full, this +# product may be distributed under the terms of the GNU General Public +# License (GPL), in which case the provisions of the GPL apply INSTEAD OF +# those given above. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# *** This file is auto-generated *** +# +.text +.globl _aesni_encrypt +.def _aesni_encrypt; .scl 2; .type 32; .endef +.align 16 +_aesni_encrypt: +.L_aesni_encrypt_begin: + movl 4(%esp),%eax + movl 12(%esp),%edx + movups (%eax),%xmm2 + movl 240(%edx),%ecx + movl 8(%esp),%eax + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L000enc1_loop_1: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L000enc1_loop_1 +.byte 102,15,56,221,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movups %xmm2,(%eax) + pxor %xmm2,%xmm2 + ret +.globl _aesni_decrypt +.def _aesni_decrypt; .scl 2; .type 32; .endef +.align 16 +_aesni_decrypt: +.L_aesni_decrypt_begin: + movl 4(%esp),%eax + movl 12(%esp),%edx + movups (%eax),%xmm2 + movl 240(%edx),%ecx + movl 8(%esp),%eax + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L001dec1_loop_2: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L001dec1_loop_2 +.byte 102,15,56,223,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movups %xmm2,(%eax) + pxor %xmm2,%xmm2 + ret +.def __aesni_encrypt2; .scl 3; .type 32; .endef +.align 16 +__aesni_encrypt2: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +.L002enc2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L002enc2_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + ret +.def __aesni_decrypt2; .scl 3; .type 32; .endef +.align 16 +__aesni_decrypt2: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +.L003dec2_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L003dec2_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 + ret +.def __aesni_encrypt3; .scl 3; .type 32; .endef +.align 16 +__aesni_encrypt3: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +.L004enc3_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L004enc3_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 + ret +.def __aesni_decrypt3; .scl 3; .type 32; .endef +.align 16 +__aesni_decrypt3: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx + addl $16,%ecx +.L005dec3_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L005dec3_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 + ret +.def __aesni_encrypt4; .scl 3; .type 32; .endef +.align 16 +__aesni_encrypt4: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + shll $4,%ecx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 15,31,64,0 + addl $16,%ecx +.L006enc4_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L006enc4_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 + ret +.def __aesni_decrypt4; .scl 3; .type 32; .endef +.align 16 +__aesni_decrypt4: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + shll $4,%ecx + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + movups 32(%edx),%xmm0 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 15,31,64,0 + addl $16,%ecx +.L007dec4_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L007dec4_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 + ret +.def __aesni_encrypt6; .scl 3; .type 32; .endef +.align 16 +__aesni_encrypt6: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 +.byte 102,15,56,220,209 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 +.byte 102,15,56,220,217 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 102,15,56,220,225 + pxor %xmm0,%xmm7 + movups (%edx,%ecx,1),%xmm0 + addl $16,%ecx + jmp .L008_aesni_encrypt6_inner +.align 16 +.L009enc6_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.L008_aesni_encrypt6_inner: +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.L_aesni_encrypt6_enter: + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 +.byte 102,15,56,220,224 +.byte 102,15,56,220,232 +.byte 102,15,56,220,240 +.byte 102,15,56,220,248 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L009enc6_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 +.byte 102,15,56,221,224 +.byte 102,15,56,221,232 +.byte 102,15,56,221,240 +.byte 102,15,56,221,248 + ret +.def __aesni_decrypt6; .scl 3; .type 32; .endef +.align 16 +__aesni_decrypt6: + movups (%edx),%xmm0 + shll $4,%ecx + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 +.byte 102,15,56,222,209 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 +.byte 102,15,56,222,217 + leal 32(%edx,%ecx,1),%edx + negl %ecx +.byte 102,15,56,222,225 + pxor %xmm0,%xmm7 + movups (%edx,%ecx,1),%xmm0 + addl $16,%ecx + jmp .L010_aesni_decrypt6_inner +.align 16 +.L011dec6_loop: +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.L010_aesni_decrypt6_inner: +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.L_aesni_decrypt6_enter: + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,222,208 +.byte 102,15,56,222,216 +.byte 102,15,56,222,224 +.byte 102,15,56,222,232 +.byte 102,15,56,222,240 +.byte 102,15,56,222,248 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L011dec6_loop +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 +.byte 102,15,56,223,208 +.byte 102,15,56,223,216 +.byte 102,15,56,223,224 +.byte 102,15,56,223,232 +.byte 102,15,56,223,240 +.byte 102,15,56,223,248 + ret +.globl _aesni_ecb_encrypt +.def _aesni_ecb_encrypt; .scl 2; .type 32; .endef +.align 16 +_aesni_ecb_encrypt: +.L_aesni_ecb_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + andl $-16,%eax + jz .L012ecb_ret + movl 240(%edx),%ecx + testl %ebx,%ebx + jz .L013ecb_decrypt + movl %edx,%ebp + movl %ecx,%ebx + cmpl $96,%eax + jb .L014ecb_enc_tail + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi + subl $96,%eax + jmp .L015ecb_enc_loop6_enter +.align 16 +.L016ecb_enc_loop6: + movups %xmm2,(%edi) + movdqu (%esi),%xmm2 + movups %xmm3,16(%edi) + movdqu 16(%esi),%xmm3 + movups %xmm4,32(%edi) + movdqu 32(%esi),%xmm4 + movups %xmm5,48(%edi) + movdqu 48(%esi),%xmm5 + movups %xmm6,64(%edi) + movdqu 64(%esi),%xmm6 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi +.L015ecb_enc_loop6_enter: + call __aesni_encrypt6 + movl %ebp,%edx + movl %ebx,%ecx + subl $96,%eax + jnc .L016ecb_enc_loop6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + movups %xmm7,80(%edi) + leal 96(%edi),%edi + addl $96,%eax + jz .L012ecb_ret +.L014ecb_enc_tail: + movups (%esi),%xmm2 + cmpl $32,%eax + jb .L017ecb_enc_one + movups 16(%esi),%xmm3 + je .L018ecb_enc_two + movups 32(%esi),%xmm4 + cmpl $64,%eax + jb .L019ecb_enc_three + movups 48(%esi),%xmm5 + je .L020ecb_enc_four + movups 64(%esi),%xmm6 + xorps %xmm7,%xmm7 + call __aesni_encrypt6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp .L012ecb_ret +.align 16 +.L017ecb_enc_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L021enc1_loop_3: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L021enc1_loop_3 +.byte 102,15,56,221,209 + movups %xmm2,(%edi) + jmp .L012ecb_ret +.align 16 +.L018ecb_enc_two: + call __aesni_encrypt2 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp .L012ecb_ret +.align 16 +.L019ecb_enc_three: + call __aesni_encrypt3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp .L012ecb_ret +.align 16 +.L020ecb_enc_four: + call __aesni_encrypt4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + jmp .L012ecb_ret +.align 16 +.L013ecb_decrypt: + movl %edx,%ebp + movl %ecx,%ebx + cmpl $96,%eax + jb .L022ecb_dec_tail + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi + subl $96,%eax + jmp .L023ecb_dec_loop6_enter +.align 16 +.L024ecb_dec_loop6: + movups %xmm2,(%edi) + movdqu (%esi),%xmm2 + movups %xmm3,16(%edi) + movdqu 16(%esi),%xmm3 + movups %xmm4,32(%edi) + movdqu 32(%esi),%xmm4 + movups %xmm5,48(%edi) + movdqu 48(%esi),%xmm5 + movups %xmm6,64(%edi) + movdqu 64(%esi),%xmm6 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi +.L023ecb_dec_loop6_enter: + call __aesni_decrypt6 + movl %ebp,%edx + movl %ebx,%ecx + subl $96,%eax + jnc .L024ecb_dec_loop6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + movups %xmm7,80(%edi) + leal 96(%edi),%edi + addl $96,%eax + jz .L012ecb_ret +.L022ecb_dec_tail: + movups (%esi),%xmm2 + cmpl $32,%eax + jb .L025ecb_dec_one + movups 16(%esi),%xmm3 + je .L026ecb_dec_two + movups 32(%esi),%xmm4 + cmpl $64,%eax + jb .L027ecb_dec_three + movups 48(%esi),%xmm5 + je .L028ecb_dec_four + movups 64(%esi),%xmm6 + xorps %xmm7,%xmm7 + call __aesni_decrypt6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp .L012ecb_ret +.align 16 +.L025ecb_dec_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L029dec1_loop_4: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L029dec1_loop_4 +.byte 102,15,56,223,209 + movups %xmm2,(%edi) + jmp .L012ecb_ret +.align 16 +.L026ecb_dec_two: + call __aesni_decrypt2 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp .L012ecb_ret +.align 16 +.L027ecb_dec_three: + call __aesni_decrypt3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp .L012ecb_ret +.align 16 +.L028ecb_dec_four: + call __aesni_decrypt4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) +.L012ecb_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _aesni_ccm64_encrypt_blocks +.def _aesni_ccm64_encrypt_blocks; .scl 2; .type 32; .endef +.align 16 +_aesni_ccm64_encrypt_blocks: +.L_aesni_ccm64_encrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl 40(%esp),%ecx + movl %esp,%ebp + subl $60,%esp + andl $-16,%esp + movl %ebp,48(%esp) + movdqu (%ebx),%xmm7 + movdqu (%ecx),%xmm3 + movl 240(%edx),%ecx + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $1,%ebx + xorl %ebp,%ebp + movl %ebx,16(%esp) + movl %ebp,20(%esp) + movl %ebp,24(%esp) + movl %ebp,28(%esp) + shll $4,%ecx + movl $16,%ebx + leal (%edx),%ebp + movdqa (%esp),%xmm5 + movdqa %xmm7,%xmm2 + leal 32(%edx,%ecx,1),%edx + subl %ecx,%ebx +.byte 102,15,56,0,253 +.L030ccm64_enc_outer: + movups (%ebp),%xmm0 + movl %ebx,%ecx + movups (%esi),%xmm6 + xorps %xmm0,%xmm2 + movups 16(%ebp),%xmm1 + xorps %xmm6,%xmm0 + xorps %xmm0,%xmm3 + movups 32(%ebp),%xmm0 +.L031ccm64_enc2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L031ccm64_enc2_loop +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + paddq 16(%esp),%xmm7 + decl %eax +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + leal 16(%esi),%esi + xorps %xmm2,%xmm6 + movdqa %xmm7,%xmm2 + movups %xmm6,(%edi) +.byte 102,15,56,0,213 + leal 16(%edi),%edi + jnz .L030ccm64_enc_outer + movl 48(%esp),%esp + movl 40(%esp),%edi + movups %xmm3,(%edi) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _aesni_ccm64_decrypt_blocks +.def _aesni_ccm64_decrypt_blocks; .scl 2; .type 32; .endef +.align 16 +_aesni_ccm64_decrypt_blocks: +.L_aesni_ccm64_decrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl 40(%esp),%ecx + movl %esp,%ebp + subl $60,%esp + andl $-16,%esp + movl %ebp,48(%esp) + movdqu (%ebx),%xmm7 + movdqu (%ecx),%xmm3 + movl 240(%edx),%ecx + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $1,%ebx + xorl %ebp,%ebp + movl %ebx,16(%esp) + movl %ebp,20(%esp) + movl %ebp,24(%esp) + movl %ebp,28(%esp) + movdqa (%esp),%xmm5 + movdqa %xmm7,%xmm2 + movl %edx,%ebp + movl %ecx,%ebx +.byte 102,15,56,0,253 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L032enc1_loop_5: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L032enc1_loop_5 +.byte 102,15,56,221,209 + shll $4,%ebx + movl $16,%ecx + movups (%esi),%xmm6 + paddq 16(%esp),%xmm7 + leal 16(%esi),%esi + subl %ebx,%ecx + leal 32(%ebp,%ebx,1),%edx + movl %ecx,%ebx + jmp .L033ccm64_dec_outer +.align 16 +.L033ccm64_dec_outer: + xorps %xmm2,%xmm6 + movdqa %xmm7,%xmm2 + movups %xmm6,(%edi) + leal 16(%edi),%edi +.byte 102,15,56,0,213 + subl $1,%eax + jz .L034ccm64_dec_break + movups (%ebp),%xmm0 + movl %ebx,%ecx + movups 16(%ebp),%xmm1 + xorps %xmm0,%xmm6 + xorps %xmm0,%xmm2 + xorps %xmm6,%xmm3 + movups 32(%ebp),%xmm0 +.L035ccm64_dec2_loop: +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 + movups (%edx,%ecx,1),%xmm1 + addl $32,%ecx +.byte 102,15,56,220,208 +.byte 102,15,56,220,216 + movups -16(%edx,%ecx,1),%xmm0 + jnz .L035ccm64_dec2_loop + movups (%esi),%xmm6 + paddq 16(%esp),%xmm7 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,221,208 +.byte 102,15,56,221,216 + leal 16(%esi),%esi + jmp .L033ccm64_dec_outer +.align 16 +.L034ccm64_dec_break: + movl 240(%ebp),%ecx + movl %ebp,%edx + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm6 + leal 32(%edx),%edx + xorps %xmm6,%xmm3 +.L036enc1_loop_6: +.byte 102,15,56,220,217 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L036enc1_loop_6 +.byte 102,15,56,221,217 + movl 48(%esp),%esp + movl 40(%esp),%edi + movups %xmm3,(%edi) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _aesni_ctr32_encrypt_blocks +.def _aesni_ctr32_encrypt_blocks; .scl 2; .type 32; .endef +.align 16 +_aesni_ctr32_encrypt_blocks: +.L_aesni_ctr32_encrypt_blocks_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl 36(%esp),%ebx + movl %esp,%ebp + subl $88,%esp + andl $-16,%esp + movl %ebp,80(%esp) + cmpl $1,%eax + je .L037ctr32_one_shortcut + movdqu (%ebx),%xmm7 + movl $202182159,(%esp) + movl $134810123,4(%esp) + movl $67438087,8(%esp) + movl $66051,12(%esp) + movl $6,%ecx + xorl %ebp,%ebp + movl %ecx,16(%esp) + movl %ecx,20(%esp) + movl %ecx,24(%esp) + movl %ebp,28(%esp) +.byte 102,15,58,22,251,3 +.byte 102,15,58,34,253,3 + movl 240(%edx),%ecx + bswap %ebx + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movdqa (%esp),%xmm2 +.byte 102,15,58,34,195,0 + leal 3(%ebx),%ebp +.byte 102,15,58,34,205,0 + incl %ebx +.byte 102,15,58,34,195,1 + incl %ebp +.byte 102,15,58,34,205,1 + incl %ebx +.byte 102,15,58,34,195,2 + incl %ebp +.byte 102,15,58,34,205,2 + movdqa %xmm0,48(%esp) +.byte 102,15,56,0,194 + movdqu (%edx),%xmm6 + movdqa %xmm1,64(%esp) +.byte 102,15,56,0,202 + pshufd $192,%xmm0,%xmm2 + pshufd $128,%xmm0,%xmm3 + cmpl $6,%eax + jb .L038ctr32_tail + pxor %xmm6,%xmm7 + shll $4,%ecx + movl $16,%ebx + movdqa %xmm7,32(%esp) + movl %edx,%ebp + subl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx + subl $6,%eax + jmp .L039ctr32_loop6 +.align 16 +.L039ctr32_loop6: + pshufd $64,%xmm0,%xmm4 + movdqa 32(%esp),%xmm0 + pshufd $192,%xmm1,%xmm5 + pxor %xmm0,%xmm2 + pshufd $128,%xmm1,%xmm6 + pxor %xmm0,%xmm3 + pshufd $64,%xmm1,%xmm7 + movups 16(%ebp),%xmm1 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 +.byte 102,15,56,220,209 + pxor %xmm0,%xmm6 + pxor %xmm0,%xmm7 +.byte 102,15,56,220,217 + movups 32(%ebp),%xmm0 + movl %ebx,%ecx +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + call .L_aesni_encrypt6_enter + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps %xmm1,%xmm2 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm3 + movups %xmm2,(%edi) + movdqa 16(%esp),%xmm0 + xorps %xmm1,%xmm4 + movdqa 64(%esp),%xmm1 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + paddd %xmm0,%xmm1 + paddd 48(%esp),%xmm0 + movdqa (%esp),%xmm2 + movups 48(%esi),%xmm3 + movups 64(%esi),%xmm4 + xorps %xmm3,%xmm5 + movups 80(%esi),%xmm3 + leal 96(%esi),%esi + movdqa %xmm0,48(%esp) +.byte 102,15,56,0,194 + xorps %xmm4,%xmm6 + movups %xmm5,48(%edi) + xorps %xmm3,%xmm7 + movdqa %xmm1,64(%esp) +.byte 102,15,56,0,202 + movups %xmm6,64(%edi) + pshufd $192,%xmm0,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + pshufd $128,%xmm0,%xmm3 + subl $6,%eax + jnc .L039ctr32_loop6 + addl $6,%eax + jz .L040ctr32_ret + movdqu (%ebp),%xmm7 + movl %ebp,%edx + pxor 32(%esp),%xmm7 + movl 240(%ebp),%ecx +.L038ctr32_tail: + por %xmm7,%xmm2 + cmpl $2,%eax + jb .L041ctr32_one + pshufd $64,%xmm0,%xmm4 + por %xmm7,%xmm3 + je .L042ctr32_two + pshufd $192,%xmm1,%xmm5 + por %xmm7,%xmm4 + cmpl $4,%eax + jb .L043ctr32_three + pshufd $128,%xmm1,%xmm6 + por %xmm7,%xmm5 + je .L044ctr32_four + por %xmm7,%xmm6 + call __aesni_encrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps %xmm1,%xmm2 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm3 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm4 + movups 64(%esi),%xmm1 + xorps %xmm0,%xmm5 + movups %xmm2,(%edi) + xorps %xmm1,%xmm6 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + jmp .L040ctr32_ret +.align 16 +.L037ctr32_one_shortcut: + movups (%ebx),%xmm2 + movl 240(%edx),%ecx +.L041ctr32_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L045enc1_loop_7: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L045enc1_loop_7 +.byte 102,15,56,221,209 + movups (%esi),%xmm6 + xorps %xmm2,%xmm6 + movups %xmm6,(%edi) + jmp .L040ctr32_ret +.align 16 +.L042ctr32_two: + call __aesni_encrypt2 + movups (%esi),%xmm5 + movups 16(%esi),%xmm6 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + jmp .L040ctr32_ret +.align 16 +.L043ctr32_three: + call __aesni_encrypt3 + movups (%esi),%xmm5 + movups 16(%esi),%xmm6 + xorps %xmm5,%xmm2 + movups 32(%esi),%xmm7 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + xorps %xmm7,%xmm4 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + jmp .L040ctr32_ret +.align 16 +.L044ctr32_four: + call __aesni_encrypt4 + movups (%esi),%xmm6 + movups 16(%esi),%xmm7 + movups 32(%esi),%xmm1 + xorps %xmm6,%xmm2 + movups 48(%esi),%xmm0 + xorps %xmm7,%xmm3 + movups %xmm2,(%edi) + xorps %xmm1,%xmm4 + movups %xmm3,16(%edi) + xorps %xmm0,%xmm5 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) +.L040ctr32_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + movdqa %xmm0,32(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm0,48(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm0,64(%esp) + pxor %xmm7,%xmm7 + movl 80(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _aesni_xts_encrypt +.def _aesni_xts_encrypt; .scl 2; .type 32; .endef +.align 16 +_aesni_xts_encrypt: +.L_aesni_xts_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 36(%esp),%edx + movl 40(%esp),%esi + movl 240(%edx),%ecx + movups (%esi),%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L046enc1_loop_8: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L046enc1_loop_8 +.byte 102,15,56,221,209 + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl %esp,%ebp + subl $120,%esp + movl 240(%edx),%ecx + andl $-16,%esp + movl $135,96(%esp) + movl $0,100(%esp) + movl $1,104(%esp) + movl $0,108(%esp) + movl %eax,112(%esp) + movl %ebp,116(%esp) + movdqa %xmm2,%xmm1 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + pcmpgtd %xmm1,%xmm0 + andl $-16,%eax + movl %edx,%ebp + movl %ecx,%ebx + subl $96,%eax + jc .L047xts_enc_short + shll $4,%ecx + movl $16,%ebx + subl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx + jmp .L048xts_enc_loop6 +.align 16 +.L048xts_enc_loop6: + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,16(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,32(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,64(%esp) + paddq %xmm1,%xmm1 + movups (%ebp),%xmm0 + pand %xmm3,%xmm7 + movups (%esi),%xmm2 + pxor %xmm1,%xmm7 + movl %ebx,%ecx + movdqu 16(%esi),%xmm3 + xorps %xmm0,%xmm2 + movdqu 32(%esi),%xmm4 + pxor %xmm0,%xmm3 + movdqu 48(%esi),%xmm5 + pxor %xmm0,%xmm4 + movdqu 64(%esi),%xmm6 + pxor %xmm0,%xmm5 + movdqu 80(%esi),%xmm1 + pxor %xmm0,%xmm6 + leal 96(%esi),%esi + pxor (%esp),%xmm2 + movdqa %xmm7,80(%esp) + pxor %xmm1,%xmm7 + movups 16(%ebp),%xmm1 + pxor 16(%esp),%xmm3 + pxor 32(%esp),%xmm4 +.byte 102,15,56,220,209 + pxor 48(%esp),%xmm5 + pxor 64(%esp),%xmm6 +.byte 102,15,56,220,217 + pxor %xmm0,%xmm7 + movups 32(%ebp),%xmm0 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + call .L_aesni_encrypt6_enter + movdqa 80(%esp),%xmm1 + pxor %xmm0,%xmm0 + xorps (%esp),%xmm2 + pcmpgtd %xmm1,%xmm0 + xorps 16(%esp),%xmm3 + movups %xmm2,(%edi) + xorps 32(%esp),%xmm4 + movups %xmm3,16(%edi) + xorps 48(%esp),%xmm5 + movups %xmm4,32(%edi) + xorps 64(%esp),%xmm6 + movups %xmm5,48(%edi) + xorps %xmm1,%xmm7 + movups %xmm6,64(%edi) + pshufd $19,%xmm0,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqa 96(%esp),%xmm3 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + subl $96,%eax + jnc .L048xts_enc_loop6 + movl 240(%ebp),%ecx + movl %ebp,%edx + movl %ecx,%ebx +.L047xts_enc_short: + addl $96,%eax + jz .L049xts_enc_done6x + movdqa %xmm1,%xmm5 + cmpl $32,%eax + jb .L050xts_enc_one + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + je .L051xts_enc_two + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + cmpl $64,%eax + jb .L052xts_enc_three + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm7 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + movdqa %xmm5,(%esp) + movdqa %xmm6,16(%esp) + je .L053xts_enc_four + movdqa %xmm7,32(%esp) + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm7 + pxor %xmm1,%xmm7 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + pxor (%esp),%xmm2 + movdqu 48(%esi),%xmm5 + pxor 16(%esp),%xmm3 + movdqu 64(%esi),%xmm6 + pxor 32(%esp),%xmm4 + leal 80(%esi),%esi + pxor 48(%esp),%xmm5 + movdqa %xmm7,64(%esp) + pxor %xmm7,%xmm6 + call __aesni_encrypt6 + movaps 64(%esp),%xmm1 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps 32(%esp),%xmm4 + movups %xmm2,(%edi) + xorps 48(%esp),%xmm5 + movups %xmm3,16(%edi) + xorps %xmm1,%xmm6 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + leal 80(%edi),%edi + jmp .L054xts_enc_done +.align 16 +.L050xts_enc_one: + movups (%esi),%xmm2 + leal 16(%esi),%esi + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L055enc1_loop_9: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L055enc1_loop_9 +.byte 102,15,56,221,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) + leal 16(%edi),%edi + movdqa %xmm5,%xmm1 + jmp .L054xts_enc_done +.align 16 +.L051xts_enc_two: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + leal 32(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + call __aesni_encrypt2 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 32(%edi),%edi + movdqa %xmm6,%xmm1 + jmp .L054xts_enc_done +.align 16 +.L052xts_enc_three: + movaps %xmm1,%xmm7 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + leal 48(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + call __aesni_encrypt3 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + leal 48(%edi),%edi + movdqa %xmm7,%xmm1 + jmp .L054xts_enc_done +.align 16 +.L053xts_enc_four: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + xorps (%esp),%xmm2 + movups 48(%esi),%xmm5 + leal 64(%esi),%esi + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + xorps %xmm6,%xmm5 + call __aesni_encrypt4 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + xorps %xmm6,%xmm5 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + leal 64(%edi),%edi + movdqa %xmm6,%xmm1 + jmp .L054xts_enc_done +.align 16 +.L049xts_enc_done6x: + movl 112(%esp),%eax + andl $15,%eax + jz .L056xts_enc_ret + movdqa %xmm1,%xmm5 + movl %eax,112(%esp) + jmp .L057xts_enc_steal +.align 16 +.L054xts_enc_done: + movl 112(%esp),%eax + pxor %xmm0,%xmm0 + andl $15,%eax + jz .L056xts_enc_ret + pcmpgtd %xmm1,%xmm0 + movl %eax,112(%esp) + pshufd $19,%xmm0,%xmm5 + paddq %xmm1,%xmm1 + pand 96(%esp),%xmm5 + pxor %xmm1,%xmm5 +.L057xts_enc_steal: + movzbl (%esi),%ecx + movzbl -16(%edi),%edx + leal 1(%esi),%esi + movb %cl,-16(%edi) + movb %dl,(%edi) + leal 1(%edi),%edi + subl $1,%eax + jnz .L057xts_enc_steal + subl 112(%esp),%edi + movl %ebp,%edx + movl %ebx,%ecx + movups -16(%edi),%xmm2 + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L058enc1_loop_10: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L058enc1_loop_10 +.byte 102,15,56,221,209 + xorps %xmm5,%xmm2 + movups %xmm2,-16(%edi) +.L056xts_enc_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + movdqa %xmm0,(%esp) + pxor %xmm3,%xmm3 + movdqa %xmm0,16(%esp) + pxor %xmm4,%xmm4 + movdqa %xmm0,32(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm0,48(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm0,64(%esp) + pxor %xmm7,%xmm7 + movdqa %xmm0,80(%esp) + movl 116(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _aesni_xts_decrypt +.def _aesni_xts_decrypt; .scl 2; .type 32; .endef +.align 16 +_aesni_xts_decrypt: +.L_aesni_xts_decrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 36(%esp),%edx + movl 40(%esp),%esi + movl 240(%edx),%ecx + movups (%esi),%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L059enc1_loop_11: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L059enc1_loop_11 +.byte 102,15,56,221,209 + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movl %esp,%ebp + subl $120,%esp + andl $-16,%esp + xorl %ebx,%ebx + testl $15,%eax + setnz %bl + shll $4,%ebx + subl %ebx,%eax + movl $135,96(%esp) + movl $0,100(%esp) + movl $1,104(%esp) + movl $0,108(%esp) + movl %eax,112(%esp) + movl %ebp,116(%esp) + movl 240(%edx),%ecx + movl %edx,%ebp + movl %ecx,%ebx + movdqa %xmm2,%xmm1 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + pcmpgtd %xmm1,%xmm0 + andl $-16,%eax + subl $96,%eax + jc .L060xts_dec_short + shll $4,%ecx + movl $16,%ebx + subl %ecx,%ebx + leal 32(%edx,%ecx,1),%edx + jmp .L061xts_dec_loop6 +.align 16 +.L061xts_dec_loop6: + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,16(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,32(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,64(%esp) + paddq %xmm1,%xmm1 + movups (%ebp),%xmm0 + pand %xmm3,%xmm7 + movups (%esi),%xmm2 + pxor %xmm1,%xmm7 + movl %ebx,%ecx + movdqu 16(%esi),%xmm3 + xorps %xmm0,%xmm2 + movdqu 32(%esi),%xmm4 + pxor %xmm0,%xmm3 + movdqu 48(%esi),%xmm5 + pxor %xmm0,%xmm4 + movdqu 64(%esi),%xmm6 + pxor %xmm0,%xmm5 + movdqu 80(%esi),%xmm1 + pxor %xmm0,%xmm6 + leal 96(%esi),%esi + pxor (%esp),%xmm2 + movdqa %xmm7,80(%esp) + pxor %xmm1,%xmm7 + movups 16(%ebp),%xmm1 + pxor 16(%esp),%xmm3 + pxor 32(%esp),%xmm4 +.byte 102,15,56,222,209 + pxor 48(%esp),%xmm5 + pxor 64(%esp),%xmm6 +.byte 102,15,56,222,217 + pxor %xmm0,%xmm7 + movups 32(%ebp),%xmm0 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + call .L_aesni_decrypt6_enter + movdqa 80(%esp),%xmm1 + pxor %xmm0,%xmm0 + xorps (%esp),%xmm2 + pcmpgtd %xmm1,%xmm0 + xorps 16(%esp),%xmm3 + movups %xmm2,(%edi) + xorps 32(%esp),%xmm4 + movups %xmm3,16(%edi) + xorps 48(%esp),%xmm5 + movups %xmm4,32(%edi) + xorps 64(%esp),%xmm6 + movups %xmm5,48(%edi) + xorps %xmm1,%xmm7 + movups %xmm6,64(%edi) + pshufd $19,%xmm0,%xmm2 + movups %xmm7,80(%edi) + leal 96(%edi),%edi + movdqa 96(%esp),%xmm3 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + subl $96,%eax + jnc .L061xts_dec_loop6 + movl 240(%ebp),%ecx + movl %ebp,%edx + movl %ecx,%ebx +.L060xts_dec_short: + addl $96,%eax + jz .L062xts_dec_done6x + movdqa %xmm1,%xmm5 + cmpl $32,%eax + jb .L063xts_dec_one + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + je .L064xts_dec_two + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + cmpl $64,%eax + jb .L065xts_dec_three + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa %xmm1,%xmm7 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 + movdqa %xmm5,(%esp) + movdqa %xmm6,16(%esp) + je .L066xts_dec_four + movdqa %xmm7,32(%esp) + pshufd $19,%xmm0,%xmm7 + movdqa %xmm1,48(%esp) + paddq %xmm1,%xmm1 + pand %xmm3,%xmm7 + pxor %xmm1,%xmm7 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + pxor (%esp),%xmm2 + movdqu 48(%esi),%xmm5 + pxor 16(%esp),%xmm3 + movdqu 64(%esi),%xmm6 + pxor 32(%esp),%xmm4 + leal 80(%esi),%esi + pxor 48(%esp),%xmm5 + movdqa %xmm7,64(%esp) + pxor %xmm7,%xmm6 + call __aesni_decrypt6 + movaps 64(%esp),%xmm1 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps 32(%esp),%xmm4 + movups %xmm2,(%edi) + xorps 48(%esp),%xmm5 + movups %xmm3,16(%edi) + xorps %xmm1,%xmm6 + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + movups %xmm6,64(%edi) + leal 80(%edi),%edi + jmp .L067xts_dec_done +.align 16 +.L063xts_dec_one: + movups (%esi),%xmm2 + leal 16(%esi),%esi + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L068dec1_loop_12: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L068dec1_loop_12 +.byte 102,15,56,223,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) + leal 16(%edi),%edi + movdqa %xmm5,%xmm1 + jmp .L067xts_dec_done +.align 16 +.L064xts_dec_two: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + leal 32(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + call __aesni_decrypt2 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 32(%edi),%edi + movdqa %xmm6,%xmm1 + jmp .L067xts_dec_done +.align 16 +.L065xts_dec_three: + movaps %xmm1,%xmm7 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + leal 48(%esi),%esi + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + call __aesni_decrypt3 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + leal 48(%edi),%edi + movdqa %xmm7,%xmm1 + jmp .L067xts_dec_done +.align 16 +.L066xts_dec_four: + movaps %xmm1,%xmm6 + movups (%esi),%xmm2 + movups 16(%esi),%xmm3 + movups 32(%esi),%xmm4 + xorps (%esp),%xmm2 + movups 48(%esi),%xmm5 + leal 64(%esi),%esi + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + xorps %xmm6,%xmm5 + call __aesni_decrypt4 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi) + xorps %xmm6,%xmm5 + movups %xmm3,16(%edi) + movups %xmm4,32(%edi) + movups %xmm5,48(%edi) + leal 64(%edi),%edi + movdqa %xmm6,%xmm1 + jmp .L067xts_dec_done +.align 16 +.L062xts_dec_done6x: + movl 112(%esp),%eax + andl $15,%eax + jz .L069xts_dec_ret + movl %eax,112(%esp) + jmp .L070xts_dec_only_one_more +.align 16 +.L067xts_dec_done: + movl 112(%esp),%eax + pxor %xmm0,%xmm0 + andl $15,%eax + jz .L069xts_dec_ret + pcmpgtd %xmm1,%xmm0 + movl %eax,112(%esp) + pshufd $19,%xmm0,%xmm2 + pxor %xmm0,%xmm0 + movdqa 96(%esp),%xmm3 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm2 + pcmpgtd %xmm1,%xmm0 + pxor %xmm2,%xmm1 +.L070xts_dec_only_one_more: + pshufd $19,%xmm0,%xmm5 + movdqa %xmm1,%xmm6 + paddq %xmm1,%xmm1 + pand %xmm3,%xmm5 + pxor %xmm1,%xmm5 + movl %ebp,%edx + movl %ebx,%ecx + movups (%esi),%xmm2 + xorps %xmm5,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L071dec1_loop_13: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L071dec1_loop_13 +.byte 102,15,56,223,209 + xorps %xmm5,%xmm2 + movups %xmm2,(%edi) +.L072xts_dec_steal: + movzbl 16(%esi),%ecx + movzbl (%edi),%edx + leal 1(%esi),%esi + movb %cl,(%edi) + movb %dl,16(%edi) + leal 1(%edi),%edi + subl $1,%eax + jnz .L072xts_dec_steal + subl 112(%esp),%edi + movl %ebp,%edx + movl %ebx,%ecx + movups (%edi),%xmm2 + xorps %xmm6,%xmm2 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L073dec1_loop_14: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L073dec1_loop_14 +.byte 102,15,56,223,209 + xorps %xmm6,%xmm2 + movups %xmm2,(%edi) +.L069xts_dec_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + movdqa %xmm0,(%esp) + pxor %xmm3,%xmm3 + movdqa %xmm0,16(%esp) + pxor %xmm4,%xmm4 + movdqa %xmm0,32(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm0,48(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm0,64(%esp) + pxor %xmm7,%xmm7 + movdqa %xmm0,80(%esp) + movl 116(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _aesni_ocb_encrypt +.def _aesni_ocb_encrypt; .scl 2; .type 32; .endef +.align 16 +_aesni_ocb_encrypt: +.L_aesni_ocb_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 40(%esp),%ecx + movl 48(%esp),%ebx + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movdqu (%ecx),%xmm0 + movl 36(%esp),%ebp + movdqu (%ebx),%xmm1 + movl 44(%esp),%ebx + movl %esp,%ecx + subl $132,%esp + andl $-16,%esp + subl %esi,%edi + shll $4,%eax + leal -96(%esi,%eax,1),%eax + movl %edi,120(%esp) + movl %eax,124(%esp) + movl %ecx,128(%esp) + movl 240(%edx),%ecx + testl $1,%ebp + jnz .L074odd + bsfl %ebp,%eax + addl $1,%ebp + shll $4,%eax + movdqu (%ebx,%eax,1),%xmm7 + movl %edx,%eax + movdqu (%esi),%xmm2 + leal 16(%esi),%esi + pxor %xmm0,%xmm7 + pxor %xmm2,%xmm1 + pxor %xmm7,%xmm2 + movdqa %xmm1,%xmm6 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L075enc1_loop_15: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L075enc1_loop_15 +.byte 102,15,56,221,209 + xorps %xmm7,%xmm2 + movdqa %xmm7,%xmm0 + movdqa %xmm6,%xmm1 + movups %xmm2,-16(%edi,%esi,1) + movl 240(%eax),%ecx + movl %eax,%edx + movl 124(%esp),%eax +.L074odd: + shll $4,%ecx + movl $16,%edi + subl %ecx,%edi + movl %edx,112(%esp) + leal 32(%edx,%ecx,1),%edx + movl %edi,116(%esp) + cmpl %eax,%esi + ja .L076short + jmp .L077grandloop +.align 32 +.L077grandloop: + leal 1(%ebp),%ecx + leal 3(%ebp),%eax + leal 5(%ebp),%edi + addl $6,%ebp + bsfl %ecx,%ecx + bsfl %eax,%eax + bsfl %edi,%edi + shll $4,%ecx + shll $4,%eax + shll $4,%edi + movdqu (%ebx),%xmm2 + movdqu (%ebx,%ecx,1),%xmm3 + movl 116(%esp),%ecx + movdqa %xmm2,%xmm4 + movdqu (%ebx,%eax,1),%xmm5 + movdqa %xmm2,%xmm6 + movdqu (%ebx,%edi,1),%xmm7 + pxor %xmm0,%xmm2 + pxor %xmm2,%xmm3 + movdqa %xmm2,(%esp) + pxor %xmm3,%xmm4 + movdqa %xmm3,16(%esp) + pxor %xmm4,%xmm5 + movdqa %xmm4,32(%esp) + pxor %xmm5,%xmm6 + movdqa %xmm5,48(%esp) + pxor %xmm6,%xmm7 + movdqa %xmm6,64(%esp) + movdqa %xmm7,80(%esp) + movups -48(%edx,%ecx,1),%xmm0 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi + pxor %xmm2,%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm3,%xmm1 + pxor %xmm0,%xmm3 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + pxor %xmm5,%xmm1 + pxor %xmm0,%xmm5 + pxor %xmm6,%xmm1 + pxor %xmm0,%xmm6 + pxor %xmm7,%xmm1 + pxor %xmm0,%xmm7 + movdqa %xmm1,96(%esp) + movups -32(%edx,%ecx,1),%xmm1 + pxor (%esp),%xmm2 + pxor 16(%esp),%xmm3 + pxor 32(%esp),%xmm4 + pxor 48(%esp),%xmm5 + pxor 64(%esp),%xmm6 + pxor 80(%esp),%xmm7 + movups -16(%edx,%ecx,1),%xmm0 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + movl 120(%esp),%edi + movl 124(%esp),%eax + call .L_aesni_encrypt6_enter + movdqa 80(%esp),%xmm0 + pxor (%esp),%xmm2 + pxor 16(%esp),%xmm3 + pxor 32(%esp),%xmm4 + pxor 48(%esp),%xmm5 + pxor 64(%esp),%xmm6 + pxor %xmm0,%xmm7 + movdqa 96(%esp),%xmm1 + movdqu %xmm2,-96(%edi,%esi,1) + movdqu %xmm3,-80(%edi,%esi,1) + movdqu %xmm4,-64(%edi,%esi,1) + movdqu %xmm5,-48(%edi,%esi,1) + movdqu %xmm6,-32(%edi,%esi,1) + movdqu %xmm7,-16(%edi,%esi,1) + cmpl %eax,%esi + jb .L077grandloop +.L076short: + addl $96,%eax + subl %esi,%eax + jz .L078done + cmpl $32,%eax + jb .L079one + je .L080two + cmpl $64,%eax + jb .L081three + je .L082four + leal 1(%ebp),%ecx + leal 3(%ebp),%eax + bsfl %ecx,%ecx + bsfl %eax,%eax + shll $4,%ecx + shll $4,%eax + movdqu (%ebx),%xmm2 + movdqu (%ebx,%ecx,1),%xmm3 + movl 116(%esp),%ecx + movdqa %xmm2,%xmm4 + movdqu (%ebx,%eax,1),%xmm5 + movdqa %xmm2,%xmm6 + pxor %xmm0,%xmm2 + pxor %xmm2,%xmm3 + movdqa %xmm2,(%esp) + pxor %xmm3,%xmm4 + movdqa %xmm3,16(%esp) + pxor %xmm4,%xmm5 + movdqa %xmm4,32(%esp) + pxor %xmm5,%xmm6 + movdqa %xmm5,48(%esp) + pxor %xmm6,%xmm7 + movdqa %xmm6,64(%esp) + movups -48(%edx,%ecx,1),%xmm0 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + pxor %xmm7,%xmm7 + pxor %xmm2,%xmm1 + pxor %xmm0,%xmm2 + pxor %xmm3,%xmm1 + pxor %xmm0,%xmm3 + pxor %xmm4,%xmm1 + pxor %xmm0,%xmm4 + pxor %xmm5,%xmm1 + pxor %xmm0,%xmm5 + pxor %xmm6,%xmm1 + pxor %xmm0,%xmm6 + movdqa %xmm1,96(%esp) + movups -32(%edx,%ecx,1),%xmm1 + pxor (%esp),%xmm2 + pxor 16(%esp),%xmm3 + pxor 32(%esp),%xmm4 + pxor 48(%esp),%xmm5 + pxor 64(%esp),%xmm6 + movups -16(%edx,%ecx,1),%xmm0 +.byte 102,15,56,220,209 +.byte 102,15,56,220,217 +.byte 102,15,56,220,225 +.byte 102,15,56,220,233 +.byte 102,15,56,220,241 +.byte 102,15,56,220,249 + movl 120(%esp),%edi + call .L_aesni_encrypt6_enter + movdqa 64(%esp),%xmm0 + pxor (%esp),%xmm2 + pxor 16(%esp),%xmm3 + pxor 32(%esp),%xmm4 + pxor 48(%esp),%xmm5 + pxor %xmm0,%xmm6 + movdqa 96(%esp),%xmm1 + movdqu %xmm2,(%edi,%esi,1) + movdqu %xmm3,16(%edi,%esi,1) + movdqu %xmm4,32(%edi,%esi,1) + movdqu %xmm5,48(%edi,%esi,1) + movdqu %xmm6,64(%edi,%esi,1) + jmp .L078done +.align 16 +.L079one: + movdqu (%ebx),%xmm7 + movl 112(%esp),%edx + movdqu (%esi),%xmm2 + movl 240(%edx),%ecx + pxor %xmm0,%xmm7 + pxor %xmm2,%xmm1 + pxor %xmm7,%xmm2 + movdqa %xmm1,%xmm6 + movl 120(%esp),%edi + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L083enc1_loop_16: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L083enc1_loop_16 +.byte 102,15,56,221,209 + xorps %xmm7,%xmm2 + movdqa %xmm7,%xmm0 + movdqa %xmm6,%xmm1 + movups %xmm2,(%edi,%esi,1) + jmp .L078done +.align 16 +.L080two: + leal 1(%ebp),%ecx + movl 112(%esp),%edx + bsfl %ecx,%ecx + shll $4,%ecx + movdqu (%ebx),%xmm6 + movdqu (%ebx,%ecx,1),%xmm7 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movl 240(%edx),%ecx + pxor %xmm0,%xmm6 + pxor %xmm6,%xmm7 + pxor %xmm2,%xmm1 + pxor %xmm6,%xmm2 + pxor %xmm3,%xmm1 + pxor %xmm7,%xmm3 + movdqa %xmm1,%xmm5 + movl 120(%esp),%edi + call __aesni_encrypt2 + xorps %xmm6,%xmm2 + xorps %xmm7,%xmm3 + movdqa %xmm7,%xmm0 + movdqa %xmm5,%xmm1 + movups %xmm2,(%edi,%esi,1) + movups %xmm3,16(%edi,%esi,1) + jmp .L078done +.align 16 +.L081three: + leal 1(%ebp),%ecx + movl 112(%esp),%edx + bsfl %ecx,%ecx + shll $4,%ecx + movdqu (%ebx),%xmm5 + movdqu (%ebx,%ecx,1),%xmm6 + movdqa %xmm5,%xmm7 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movl 240(%edx),%ecx + pxor %xmm0,%xmm5 + pxor %xmm5,%xmm6 + pxor %xmm6,%xmm7 + pxor %xmm2,%xmm1 + pxor %xmm5,%xmm2 + pxor %xmm3,%xmm1 + pxor %xmm6,%xmm3 + pxor %xmm4,%xmm1 + pxor %xmm7,%xmm4 + movdqa %xmm1,96(%esp) + movl 120(%esp),%edi + call __aesni_encrypt3 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + movdqa %xmm7,%xmm0 + movdqa 96(%esp),%xmm1 + movups %xmm2,(%edi,%esi,1) + movups %xmm3,16(%edi,%esi,1) + movups %xmm4,32(%edi,%esi,1) + jmp .L078done +.align 16 +.L082four: + leal 1(%ebp),%ecx + leal 3(%ebp),%eax + bsfl %ecx,%ecx + bsfl %eax,%eax + movl 112(%esp),%edx + shll $4,%ecx + shll $4,%eax + movdqu (%ebx),%xmm4 + movdqu (%ebx,%ecx,1),%xmm5 + movdqa %xmm4,%xmm6 + movdqu (%ebx,%eax,1),%xmm7 + pxor %xmm0,%xmm4 + movdqu (%esi),%xmm2 + pxor %xmm4,%xmm5 + movdqu 16(%esi),%xmm3 + pxor %xmm5,%xmm6 + movdqa %xmm4,(%esp) + pxor %xmm6,%xmm7 + movdqa %xmm5,16(%esp) + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movl 240(%edx),%ecx + pxor %xmm2,%xmm1 + pxor (%esp),%xmm2 + pxor %xmm3,%xmm1 + pxor 16(%esp),%xmm3 + pxor %xmm4,%xmm1 + pxor %xmm6,%xmm4 + pxor %xmm5,%xmm1 + pxor %xmm7,%xmm5 + movdqa %xmm1,96(%esp) + movl 120(%esp),%edi + call __aesni_encrypt4 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps %xmm6,%xmm4 + movups %xmm2,(%edi,%esi,1) + xorps %xmm7,%xmm5 + movups %xmm3,16(%edi,%esi,1) + movdqa %xmm7,%xmm0 + movups %xmm4,32(%edi,%esi,1) + movdqa 96(%esp),%xmm1 + movups %xmm5,48(%edi,%esi,1) +.L078done: + movl 128(%esp),%edx + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + movdqa %xmm2,(%esp) + pxor %xmm4,%xmm4 + movdqa %xmm2,16(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm2,32(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm2,48(%esp) + pxor %xmm7,%xmm7 + movdqa %xmm2,64(%esp) + movdqa %xmm2,80(%esp) + movdqa %xmm2,96(%esp) + leal (%edx),%esp + movl 40(%esp),%ecx + movl 48(%esp),%ebx + movdqu %xmm0,(%ecx) + pxor %xmm0,%xmm0 + movdqu %xmm1,(%ebx) + pxor %xmm1,%xmm1 + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _aesni_ocb_decrypt +.def _aesni_ocb_decrypt; .scl 2; .type 32; .endef +.align 16 +_aesni_ocb_decrypt: +.L_aesni_ocb_decrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 40(%esp),%ecx + movl 48(%esp),%ebx + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl 32(%esp),%edx + movdqu (%ecx),%xmm0 + movl 36(%esp),%ebp + movdqu (%ebx),%xmm1 + movl 44(%esp),%ebx + movl %esp,%ecx + subl $132,%esp + andl $-16,%esp + subl %esi,%edi + shll $4,%eax + leal -96(%esi,%eax,1),%eax + movl %edi,120(%esp) + movl %eax,124(%esp) + movl %ecx,128(%esp) + movl 240(%edx),%ecx + testl $1,%ebp + jnz .L084odd + bsfl %ebp,%eax + addl $1,%ebp + shll $4,%eax + movdqu (%ebx,%eax,1),%xmm7 + movl %edx,%eax + movdqu (%esi),%xmm2 + leal 16(%esi),%esi + pxor %xmm0,%xmm7 + pxor %xmm7,%xmm2 + movdqa %xmm1,%xmm6 + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L085dec1_loop_17: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L085dec1_loop_17 +.byte 102,15,56,223,209 + xorps %xmm7,%xmm2 + movaps %xmm6,%xmm1 + movdqa %xmm7,%xmm0 + xorps %xmm2,%xmm1 + movups %xmm2,-16(%edi,%esi,1) + movl 240(%eax),%ecx + movl %eax,%edx + movl 124(%esp),%eax +.L084odd: + shll $4,%ecx + movl $16,%edi + subl %ecx,%edi + movl %edx,112(%esp) + leal 32(%edx,%ecx,1),%edx + movl %edi,116(%esp) + cmpl %eax,%esi + ja .L086short + jmp .L087grandloop +.align 32 +.L087grandloop: + leal 1(%ebp),%ecx + leal 3(%ebp),%eax + leal 5(%ebp),%edi + addl $6,%ebp + bsfl %ecx,%ecx + bsfl %eax,%eax + bsfl %edi,%edi + shll $4,%ecx + shll $4,%eax + shll $4,%edi + movdqu (%ebx),%xmm2 + movdqu (%ebx,%ecx,1),%xmm3 + movl 116(%esp),%ecx + movdqa %xmm2,%xmm4 + movdqu (%ebx,%eax,1),%xmm5 + movdqa %xmm2,%xmm6 + movdqu (%ebx,%edi,1),%xmm7 + pxor %xmm0,%xmm2 + pxor %xmm2,%xmm3 + movdqa %xmm2,(%esp) + pxor %xmm3,%xmm4 + movdqa %xmm3,16(%esp) + pxor %xmm4,%xmm5 + movdqa %xmm4,32(%esp) + pxor %xmm5,%xmm6 + movdqa %xmm5,48(%esp) + pxor %xmm6,%xmm7 + movdqa %xmm6,64(%esp) + movdqa %xmm7,80(%esp) + movups -48(%edx,%ecx,1),%xmm0 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + leal 96(%esi),%esi + movdqa %xmm1,96(%esp) + pxor %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 + pxor %xmm0,%xmm7 + movups -32(%edx,%ecx,1),%xmm1 + pxor (%esp),%xmm2 + pxor 16(%esp),%xmm3 + pxor 32(%esp),%xmm4 + pxor 48(%esp),%xmm5 + pxor 64(%esp),%xmm6 + pxor 80(%esp),%xmm7 + movups -16(%edx,%ecx,1),%xmm0 +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + movl 120(%esp),%edi + movl 124(%esp),%eax + call .L_aesni_decrypt6_enter + movdqa 80(%esp),%xmm0 + pxor (%esp),%xmm2 + movdqa 96(%esp),%xmm1 + pxor 16(%esp),%xmm3 + pxor 32(%esp),%xmm4 + pxor 48(%esp),%xmm5 + pxor 64(%esp),%xmm6 + pxor %xmm0,%xmm7 + pxor %xmm2,%xmm1 + movdqu %xmm2,-96(%edi,%esi,1) + pxor %xmm3,%xmm1 + movdqu %xmm3,-80(%edi,%esi,1) + pxor %xmm4,%xmm1 + movdqu %xmm4,-64(%edi,%esi,1) + pxor %xmm5,%xmm1 + movdqu %xmm5,-48(%edi,%esi,1) + pxor %xmm6,%xmm1 + movdqu %xmm6,-32(%edi,%esi,1) + pxor %xmm7,%xmm1 + movdqu %xmm7,-16(%edi,%esi,1) + cmpl %eax,%esi + jb .L087grandloop +.L086short: + addl $96,%eax + subl %esi,%eax + jz .L088done + cmpl $32,%eax + jb .L089one + je .L090two + cmpl $64,%eax + jb .L091three + je .L092four + leal 1(%ebp),%ecx + leal 3(%ebp),%eax + bsfl %ecx,%ecx + bsfl %eax,%eax + shll $4,%ecx + shll $4,%eax + movdqu (%ebx),%xmm2 + movdqu (%ebx,%ecx,1),%xmm3 + movl 116(%esp),%ecx + movdqa %xmm2,%xmm4 + movdqu (%ebx,%eax,1),%xmm5 + movdqa %xmm2,%xmm6 + pxor %xmm0,%xmm2 + pxor %xmm2,%xmm3 + movdqa %xmm2,(%esp) + pxor %xmm3,%xmm4 + movdqa %xmm3,16(%esp) + pxor %xmm4,%xmm5 + movdqa %xmm4,32(%esp) + pxor %xmm5,%xmm6 + movdqa %xmm5,48(%esp) + pxor %xmm6,%xmm7 + movdqa %xmm6,64(%esp) + movups -48(%edx,%ecx,1),%xmm0 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + pxor %xmm7,%xmm7 + movdqa %xmm1,96(%esp) + pxor %xmm0,%xmm2 + pxor %xmm0,%xmm3 + pxor %xmm0,%xmm4 + pxor %xmm0,%xmm5 + pxor %xmm0,%xmm6 + movups -32(%edx,%ecx,1),%xmm1 + pxor (%esp),%xmm2 + pxor 16(%esp),%xmm3 + pxor 32(%esp),%xmm4 + pxor 48(%esp),%xmm5 + pxor 64(%esp),%xmm6 + movups -16(%edx,%ecx,1),%xmm0 +.byte 102,15,56,222,209 +.byte 102,15,56,222,217 +.byte 102,15,56,222,225 +.byte 102,15,56,222,233 +.byte 102,15,56,222,241 +.byte 102,15,56,222,249 + movl 120(%esp),%edi + call .L_aesni_decrypt6_enter + movdqa 64(%esp),%xmm0 + pxor (%esp),%xmm2 + movdqa 96(%esp),%xmm1 + pxor 16(%esp),%xmm3 + pxor 32(%esp),%xmm4 + pxor 48(%esp),%xmm5 + pxor %xmm0,%xmm6 + pxor %xmm2,%xmm1 + movdqu %xmm2,(%edi,%esi,1) + pxor %xmm3,%xmm1 + movdqu %xmm3,16(%edi,%esi,1) + pxor %xmm4,%xmm1 + movdqu %xmm4,32(%edi,%esi,1) + pxor %xmm5,%xmm1 + movdqu %xmm5,48(%edi,%esi,1) + pxor %xmm6,%xmm1 + movdqu %xmm6,64(%edi,%esi,1) + jmp .L088done +.align 16 +.L089one: + movdqu (%ebx),%xmm7 + movl 112(%esp),%edx + movdqu (%esi),%xmm2 + movl 240(%edx),%ecx + pxor %xmm0,%xmm7 + pxor %xmm7,%xmm2 + movdqa %xmm1,%xmm6 + movl 120(%esp),%edi + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L093dec1_loop_18: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L093dec1_loop_18 +.byte 102,15,56,223,209 + xorps %xmm7,%xmm2 + movaps %xmm6,%xmm1 + movdqa %xmm7,%xmm0 + xorps %xmm2,%xmm1 + movups %xmm2,(%edi,%esi,1) + jmp .L088done +.align 16 +.L090two: + leal 1(%ebp),%ecx + movl 112(%esp),%edx + bsfl %ecx,%ecx + shll $4,%ecx + movdqu (%ebx),%xmm6 + movdqu (%ebx,%ecx,1),%xmm7 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movl 240(%edx),%ecx + movdqa %xmm1,%xmm5 + pxor %xmm0,%xmm6 + pxor %xmm6,%xmm7 + pxor %xmm6,%xmm2 + pxor %xmm7,%xmm3 + movl 120(%esp),%edi + call __aesni_decrypt2 + xorps %xmm6,%xmm2 + xorps %xmm7,%xmm3 + movdqa %xmm7,%xmm0 + xorps %xmm2,%xmm5 + movups %xmm2,(%edi,%esi,1) + xorps %xmm3,%xmm5 + movups %xmm3,16(%edi,%esi,1) + movaps %xmm5,%xmm1 + jmp .L088done +.align 16 +.L091three: + leal 1(%ebp),%ecx + movl 112(%esp),%edx + bsfl %ecx,%ecx + shll $4,%ecx + movdqu (%ebx),%xmm5 + movdqu (%ebx,%ecx,1),%xmm6 + movdqa %xmm5,%xmm7 + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movl 240(%edx),%ecx + movdqa %xmm1,96(%esp) + pxor %xmm0,%xmm5 + pxor %xmm5,%xmm6 + pxor %xmm6,%xmm7 + pxor %xmm5,%xmm2 + pxor %xmm6,%xmm3 + pxor %xmm7,%xmm4 + movl 120(%esp),%edi + call __aesni_decrypt3 + movdqa 96(%esp),%xmm1 + xorps %xmm5,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm7,%xmm4 + movups %xmm2,(%edi,%esi,1) + pxor %xmm2,%xmm1 + movdqa %xmm7,%xmm0 + movups %xmm3,16(%edi,%esi,1) + pxor %xmm3,%xmm1 + movups %xmm4,32(%edi,%esi,1) + pxor %xmm4,%xmm1 + jmp .L088done +.align 16 +.L092four: + leal 1(%ebp),%ecx + leal 3(%ebp),%eax + bsfl %ecx,%ecx + bsfl %eax,%eax + movl 112(%esp),%edx + shll $4,%ecx + shll $4,%eax + movdqu (%ebx),%xmm4 + movdqu (%ebx,%ecx,1),%xmm5 + movdqa %xmm4,%xmm6 + movdqu (%ebx,%eax,1),%xmm7 + pxor %xmm0,%xmm4 + movdqu (%esi),%xmm2 + pxor %xmm4,%xmm5 + movdqu 16(%esi),%xmm3 + pxor %xmm5,%xmm6 + movdqa %xmm4,(%esp) + pxor %xmm6,%xmm7 + movdqa %xmm5,16(%esp) + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movl 240(%edx),%ecx + movdqa %xmm1,96(%esp) + pxor (%esp),%xmm2 + pxor 16(%esp),%xmm3 + pxor %xmm6,%xmm4 + pxor %xmm7,%xmm5 + movl 120(%esp),%edi + call __aesni_decrypt4 + movdqa 96(%esp),%xmm1 + xorps (%esp),%xmm2 + xorps 16(%esp),%xmm3 + xorps %xmm6,%xmm4 + movups %xmm2,(%edi,%esi,1) + pxor %xmm2,%xmm1 + xorps %xmm7,%xmm5 + movups %xmm3,16(%edi,%esi,1) + pxor %xmm3,%xmm1 + movdqa %xmm7,%xmm0 + movups %xmm4,32(%edi,%esi,1) + pxor %xmm4,%xmm1 + movups %xmm5,48(%edi,%esi,1) + pxor %xmm5,%xmm1 +.L088done: + movl 128(%esp),%edx + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + movdqa %xmm2,(%esp) + pxor %xmm4,%xmm4 + movdqa %xmm2,16(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm2,32(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm2,48(%esp) + pxor %xmm7,%xmm7 + movdqa %xmm2,64(%esp) + movdqa %xmm2,80(%esp) + movdqa %xmm2,96(%esp) + leal (%edx),%esp + movl 40(%esp),%ecx + movl 48(%esp),%ebx + movdqu %xmm0,(%ecx) + pxor %xmm0,%xmm0 + movdqu %xmm1,(%ebx) + pxor %xmm1,%xmm1 + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _aesni_cbc_encrypt +.def _aesni_cbc_encrypt; .scl 2; .type 32; .endef +.align 16 +_aesni_cbc_encrypt: +.L_aesni_cbc_encrypt_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl %esp,%ebx + movl 24(%esp),%edi + subl $24,%ebx + movl 28(%esp),%eax + andl $-16,%ebx + movl 32(%esp),%edx + movl 36(%esp),%ebp + testl %eax,%eax + jz .L094cbc_abort + cmpl $0,40(%esp) + xchgl %esp,%ebx + movups (%ebp),%xmm7 + movl 240(%edx),%ecx + movl %edx,%ebp + movl %ebx,16(%esp) + movl %ecx,%ebx + je .L095cbc_decrypt + movaps %xmm7,%xmm2 + cmpl $16,%eax + jb .L096cbc_enc_tail + subl $16,%eax + jmp .L097cbc_enc_loop +.align 16 +.L097cbc_enc_loop: + movups (%esi),%xmm7 + leal 16(%esi),%esi + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + xorps %xmm0,%xmm7 + leal 32(%edx),%edx + xorps %xmm7,%xmm2 +.L098enc1_loop_19: +.byte 102,15,56,220,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L098enc1_loop_19 +.byte 102,15,56,221,209 + movl %ebx,%ecx + movl %ebp,%edx + movups %xmm2,(%edi) + leal 16(%edi),%edi + subl $16,%eax + jnc .L097cbc_enc_loop + addl $16,%eax + jnz .L096cbc_enc_tail + movaps %xmm2,%xmm7 + pxor %xmm2,%xmm2 + jmp .L099cbc_ret +.L096cbc_enc_tail: + movl %eax,%ecx +.long 2767451785 + movl $16,%ecx + subl %eax,%ecx + xorl %eax,%eax +.long 2868115081 + leal -16(%edi),%edi + movl %ebx,%ecx + movl %edi,%esi + movl %ebp,%edx + jmp .L097cbc_enc_loop +.align 16 +.L095cbc_decrypt: + cmpl $80,%eax + jbe .L100cbc_dec_tail + movaps %xmm7,(%esp) + subl $80,%eax + jmp .L101cbc_dec_loop6_enter +.align 16 +.L102cbc_dec_loop6: + movaps %xmm0,(%esp) + movups %xmm7,(%edi) + leal 16(%edi),%edi +.L101cbc_dec_loop6_enter: + movdqu (%esi),%xmm2 + movdqu 16(%esi),%xmm3 + movdqu 32(%esi),%xmm4 + movdqu 48(%esi),%xmm5 + movdqu 64(%esi),%xmm6 + movdqu 80(%esi),%xmm7 + call __aesni_decrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps (%esp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%esi),%xmm1 + xorps %xmm0,%xmm6 + movups 80(%esi),%xmm0 + xorps %xmm1,%xmm7 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + leal 96(%esi),%esi + movups %xmm4,32(%edi) + movl %ebx,%ecx + movups %xmm5,48(%edi) + movl %ebp,%edx + movups %xmm6,64(%edi) + leal 80(%edi),%edi + subl $96,%eax + ja .L102cbc_dec_loop6 + movaps %xmm7,%xmm2 + movaps %xmm0,%xmm7 + addl $80,%eax + jle .L103cbc_dec_clear_tail_collected + movups %xmm2,(%edi) + leal 16(%edi),%edi +.L100cbc_dec_tail: + movups (%esi),%xmm2 + movaps %xmm2,%xmm6 + cmpl $16,%eax + jbe .L104cbc_dec_one + movups 16(%esi),%xmm3 + movaps %xmm3,%xmm5 + cmpl $32,%eax + jbe .L105cbc_dec_two + movups 32(%esi),%xmm4 + cmpl $48,%eax + jbe .L106cbc_dec_three + movups 48(%esi),%xmm5 + cmpl $64,%eax + jbe .L107cbc_dec_four + movups 64(%esi),%xmm6 + movaps %xmm7,(%esp) + movups (%esi),%xmm2 + xorps %xmm7,%xmm7 + call __aesni_decrypt6 + movups (%esi),%xmm1 + movups 16(%esi),%xmm0 + xorps (%esp),%xmm2 + xorps %xmm1,%xmm3 + movups 32(%esi),%xmm1 + xorps %xmm0,%xmm4 + movups 48(%esi),%xmm0 + xorps %xmm1,%xmm5 + movups 64(%esi),%xmm7 + xorps %xmm0,%xmm6 + movups %xmm2,(%edi) + movups %xmm3,16(%edi) + pxor %xmm3,%xmm3 + movups %xmm4,32(%edi) + pxor %xmm4,%xmm4 + movups %xmm5,48(%edi) + pxor %xmm5,%xmm5 + leal 64(%edi),%edi + movaps %xmm6,%xmm2 + pxor %xmm6,%xmm6 + subl $80,%eax + jmp .L108cbc_dec_tail_collected +.align 16 +.L104cbc_dec_one: + movups (%edx),%xmm0 + movups 16(%edx),%xmm1 + leal 32(%edx),%edx + xorps %xmm0,%xmm2 +.L109dec1_loop_20: +.byte 102,15,56,222,209 + decl %ecx + movups (%edx),%xmm1 + leal 16(%edx),%edx + jnz .L109dec1_loop_20 +.byte 102,15,56,223,209 + xorps %xmm7,%xmm2 + movaps %xmm6,%xmm7 + subl $16,%eax + jmp .L108cbc_dec_tail_collected +.align 16 +.L105cbc_dec_two: + call __aesni_decrypt2 + xorps %xmm7,%xmm2 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + movaps %xmm3,%xmm2 + pxor %xmm3,%xmm3 + leal 16(%edi),%edi + movaps %xmm5,%xmm7 + subl $32,%eax + jmp .L108cbc_dec_tail_collected +.align 16 +.L106cbc_dec_three: + call __aesni_decrypt3 + xorps %xmm7,%xmm2 + xorps %xmm6,%xmm3 + xorps %xmm5,%xmm4 + movups %xmm2,(%edi) + movaps %xmm4,%xmm2 + pxor %xmm4,%xmm4 + movups %xmm3,16(%edi) + pxor %xmm3,%xmm3 + leal 32(%edi),%edi + movups 32(%esi),%xmm7 + subl $48,%eax + jmp .L108cbc_dec_tail_collected +.align 16 +.L107cbc_dec_four: + call __aesni_decrypt4 + movups 16(%esi),%xmm1 + movups 32(%esi),%xmm0 + xorps %xmm7,%xmm2 + movups 48(%esi),%xmm7 + xorps %xmm6,%xmm3 + movups %xmm2,(%edi) + xorps %xmm1,%xmm4 + movups %xmm3,16(%edi) + pxor %xmm3,%xmm3 + xorps %xmm0,%xmm5 + movups %xmm4,32(%edi) + pxor %xmm4,%xmm4 + leal 48(%edi),%edi + movaps %xmm5,%xmm2 + pxor %xmm5,%xmm5 + subl $64,%eax + jmp .L108cbc_dec_tail_collected +.align 16 +.L103cbc_dec_clear_tail_collected: + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 +.L108cbc_dec_tail_collected: + andl $15,%eax + jnz .L110cbc_dec_tail_partial + movups %xmm2,(%edi) + pxor %xmm0,%xmm0 + jmp .L099cbc_ret +.align 16 +.L110cbc_dec_tail_partial: + movaps %xmm2,(%esp) + pxor %xmm0,%xmm0 + movl $16,%ecx + movl %esp,%esi + subl %eax,%ecx +.long 2767451785 + movdqa %xmm2,(%esp) +.L099cbc_ret: + movl 16(%esp),%esp + movl 36(%esp),%ebp + pxor %xmm2,%xmm2 + pxor %xmm1,%xmm1 + movups %xmm7,(%ebp) + pxor %xmm7,%xmm7 +.L094cbc_abort: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.def __aesni_set_encrypt_key; .scl 3; .type 32; .endef +.align 16 +__aesni_set_encrypt_key: + pushl %ebp + pushl %ebx + testl %eax,%eax + jz .L111bad_pointer + testl %edx,%edx + jz .L111bad_pointer + call .L112pic +.L112pic: + popl %ebx + leal .Lkey_const-.L112pic(%ebx),%ebx + leal _GNUTLS_x86_cpuid_s,%ebp + movups (%eax),%xmm0 + xorps %xmm4,%xmm4 + movl 4(%ebp),%ebp + leal 16(%edx),%edx + andl $268437504,%ebp + cmpl $256,%ecx + je .L11314rounds + cmpl $192,%ecx + je .L11412rounds + cmpl $128,%ecx + jne .L115bad_keybits +.align 16 +.L11610rounds: + cmpl $268435456,%ebp + je .L11710rounds_alt + movl $9,%ecx + movups %xmm0,-16(%edx) +.byte 102,15,58,223,200,1 + call .L118key_128_cold +.byte 102,15,58,223,200,2 + call .L119key_128 +.byte 102,15,58,223,200,4 + call .L119key_128 +.byte 102,15,58,223,200,8 + call .L119key_128 +.byte 102,15,58,223,200,16 + call .L119key_128 +.byte 102,15,58,223,200,32 + call .L119key_128 +.byte 102,15,58,223,200,64 + call .L119key_128 +.byte 102,15,58,223,200,128 + call .L119key_128 +.byte 102,15,58,223,200,27 + call .L119key_128 +.byte 102,15,58,223,200,54 + call .L119key_128 + movups %xmm0,(%edx) + movl %ecx,80(%edx) + jmp .L120good_key +.align 16 +.L119key_128: + movups %xmm0,(%edx) + leal 16(%edx),%edx +.L118key_128_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + ret +.align 16 +.L11710rounds_alt: + movdqa (%ebx),%xmm5 + movl $8,%ecx + movdqa 32(%ebx),%xmm4 + movdqa %xmm0,%xmm2 + movdqu %xmm0,-16(%edx) +.L121loop_key128: +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + leal 16(%edx),%edx + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + pxor %xmm2,%xmm0 + movdqu %xmm0,-16(%edx) + movdqa %xmm0,%xmm2 + decl %ecx + jnz .L121loop_key128 + movdqa 48(%ebx),%xmm4 +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + pxor %xmm2,%xmm0 + movdqu %xmm0,(%edx) + movdqa %xmm0,%xmm2 +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + pxor %xmm2,%xmm0 + movdqu %xmm0,16(%edx) + movl $9,%ecx + movl %ecx,96(%edx) + jmp .L120good_key +.align 16 +.L11412rounds: + movq 16(%eax),%xmm2 + cmpl $268435456,%ebp + je .L12212rounds_alt + movl $11,%ecx + movups %xmm0,-16(%edx) +.byte 102,15,58,223,202,1 + call .L123key_192a_cold +.byte 102,15,58,223,202,2 + call .L124key_192b +.byte 102,15,58,223,202,4 + call .L125key_192a +.byte 102,15,58,223,202,8 + call .L124key_192b +.byte 102,15,58,223,202,16 + call .L125key_192a +.byte 102,15,58,223,202,32 + call .L124key_192b +.byte 102,15,58,223,202,64 + call .L125key_192a +.byte 102,15,58,223,202,128 + call .L124key_192b + movups %xmm0,(%edx) + movl %ecx,48(%edx) + jmp .L120good_key +.align 16 +.L125key_192a: + movups %xmm0,(%edx) + leal 16(%edx),%edx +.align 16 +.L123key_192a_cold: + movaps %xmm2,%xmm5 +.L126key_192b_warm: + shufps $16,%xmm0,%xmm4 + movdqa %xmm2,%xmm3 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + pslldq $4,%xmm3 + xorps %xmm4,%xmm0 + pshufd $85,%xmm1,%xmm1 + pxor %xmm3,%xmm2 + pxor %xmm1,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm3,%xmm2 + ret +.align 16 +.L124key_192b: + movaps %xmm0,%xmm3 + shufps $68,%xmm0,%xmm5 + movups %xmm5,(%edx) + shufps $78,%xmm2,%xmm3 + movups %xmm3,16(%edx) + leal 32(%edx),%edx + jmp .L126key_192b_warm +.align 16 +.L12212rounds_alt: + movdqa 16(%ebx),%xmm5 + movdqa 32(%ebx),%xmm4 + movl $8,%ecx + movdqu %xmm0,-16(%edx) +.L127loop_key192: + movq %xmm2,(%edx) + movdqa %xmm2,%xmm1 +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + pslld $1,%xmm4 + leal 24(%edx),%edx + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pxor %xmm2,%xmm0 + pxor %xmm3,%xmm2 + movdqu %xmm0,-16(%edx) + decl %ecx + jnz .L127loop_key192 + movl $11,%ecx + movl %ecx,32(%edx) + jmp .L120good_key +.align 16 +.L11314rounds: + movups 16(%eax),%xmm2 + leal 16(%edx),%edx + cmpl $268435456,%ebp + je .L12814rounds_alt + movl $13,%ecx + movups %xmm0,-32(%edx) + movups %xmm2,-16(%edx) +.byte 102,15,58,223,202,1 + call .L129key_256a_cold +.byte 102,15,58,223,200,1 + call .L130key_256b +.byte 102,15,58,223,202,2 + call .L131key_256a +.byte 102,15,58,223,200,2 + call .L130key_256b +.byte 102,15,58,223,202,4 + call .L131key_256a +.byte 102,15,58,223,200,4 + call .L130key_256b +.byte 102,15,58,223,202,8 + call .L131key_256a +.byte 102,15,58,223,200,8 + call .L130key_256b +.byte 102,15,58,223,202,16 + call .L131key_256a +.byte 102,15,58,223,200,16 + call .L130key_256b +.byte 102,15,58,223,202,32 + call .L131key_256a +.byte 102,15,58,223,200,32 + call .L130key_256b +.byte 102,15,58,223,202,64 + call .L131key_256a + movups %xmm0,(%edx) + movl %ecx,16(%edx) + xorl %eax,%eax + jmp .L120good_key +.align 16 +.L131key_256a: + movups %xmm2,(%edx) + leal 16(%edx),%edx +.L129key_256a_cold: + shufps $16,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $140,%xmm0,%xmm4 + xorps %xmm4,%xmm0 + shufps $255,%xmm1,%xmm1 + xorps %xmm1,%xmm0 + ret +.align 16 +.L130key_256b: + movups %xmm0,(%edx) + leal 16(%edx),%edx + shufps $16,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $140,%xmm2,%xmm4 + xorps %xmm4,%xmm2 + shufps $170,%xmm1,%xmm1 + xorps %xmm1,%xmm2 + ret +.align 16 +.L12814rounds_alt: + movdqa (%ebx),%xmm5 + movdqa 32(%ebx),%xmm4 + movl $7,%ecx + movdqu %xmm0,-32(%edx) + movdqa %xmm2,%xmm1 + movdqu %xmm2,-16(%edx) +.L132loop_key256: +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + pslld $1,%xmm4 + pxor %xmm2,%xmm0 + movdqu %xmm0,(%edx) + decl %ecx + jz .L133done_key256 + pshufd $255,%xmm0,%xmm2 + pxor %xmm3,%xmm3 +.byte 102,15,56,221,211 + movdqa %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm3,%xmm1 + pxor %xmm1,%xmm2 + movdqu %xmm2,16(%edx) + leal 32(%edx),%edx + movdqa %xmm2,%xmm1 + jmp .L132loop_key256 +.L133done_key256: + movl $13,%ecx + movl %ecx,16(%edx) +.L120good_key: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + xorl %eax,%eax + popl %ebx + popl %ebp + ret +.align 4 +.L111bad_pointer: + movl $-1,%eax + popl %ebx + popl %ebp + ret +.align 4 +.L115bad_keybits: + pxor %xmm0,%xmm0 + movl $-2,%eax + popl %ebx + popl %ebp + ret +.globl _aesni_set_encrypt_key +.def _aesni_set_encrypt_key; .scl 2; .type 32; .endef +.align 16 +_aesni_set_encrypt_key: +.L_aesni_set_encrypt_key_begin: + movl 4(%esp),%eax + movl 8(%esp),%ecx + movl 12(%esp),%edx + call __aesni_set_encrypt_key + ret +.globl _aesni_set_decrypt_key +.def _aesni_set_decrypt_key; .scl 2; .type 32; .endef +.align 16 +_aesni_set_decrypt_key: +.L_aesni_set_decrypt_key_begin: + movl 4(%esp),%eax + movl 8(%esp),%ecx + movl 12(%esp),%edx + call __aesni_set_encrypt_key + movl 12(%esp),%edx + shll $4,%ecx + testl %eax,%eax + jnz .L134dec_key_ret + leal 16(%edx,%ecx,1),%eax + movups (%edx),%xmm0 + movups (%eax),%xmm1 + movups %xmm0,(%eax) + movups %xmm1,(%edx) + leal 16(%edx),%edx + leal -16(%eax),%eax +.L135dec_key_inverse: + movups (%edx),%xmm0 + movups (%eax),%xmm1 +.byte 102,15,56,219,192 +.byte 102,15,56,219,201 + leal 16(%edx),%edx + leal -16(%eax),%eax + movups %xmm0,16(%eax) + movups %xmm1,-16(%edx) + cmpl %edx,%eax + ja .L135dec_key_inverse + movups (%edx),%xmm0 +.byte 102,15,56,219,192 + movups %xmm0,(%edx) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + xorl %eax,%eax +.L134dec_key_ret: + ret +.align 64 +.Lkey_const: +.long 202313229,202313229,202313229,202313229 +.long 67569157,67569157,67569157,67569157 +.long 1,1,1,1 +.long 27,27,27,27 +.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 +.byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 +.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 +.byte 115,108,46,111,114,103,62,0 +.comm _GNUTLS_x86_cpuid_s,16 + |