summaryrefslogtreecommitdiffstats
path: root/lib/accelerated/x86/macosx/aesni-x86.s
diff options
context:
space:
mode:
Diffstat (limited to 'lib/accelerated/x86/macosx/aesni-x86.s')
-rw-r--r--lib/accelerated/x86/macosx/aesni-x86.s3237
1 files changed, 3237 insertions, 0 deletions
diff --git a/lib/accelerated/x86/macosx/aesni-x86.s b/lib/accelerated/x86/macosx/aesni-x86.s
new file mode 100644
index 0000000..774d1f4
--- /dev/null
+++ b/lib/accelerated/x86/macosx/aesni-x86.s
@@ -0,0 +1,3237 @@
+# Copyright (c) 2011-2016, Andy Polyakov <appro@openssl.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain copyright notices,
+# this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+# * Neither the name of the Andy Polyakov nor the names of its
+# copyright holder and contributors may be used to endorse or
+# promote products derived from this software without specific
+# prior written permission.
+#
+# ALTERNATIVELY, provided that this notice is retained in full, this
+# product may be distributed under the terms of the GNU General Public
+# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+# those given above.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *** This file is auto-generated ***
+#
+.text
+.globl _aesni_encrypt
+.align 4
+_aesni_encrypt:
+L_aesni_encrypt_begin:
+ movl 4(%esp),%eax
+ movl 12(%esp),%edx
+ movups (%eax),%xmm2
+ movl 240(%edx),%ecx
+ movl 8(%esp),%eax
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+L000enc1_loop_1:
+.byte 102,15,56,220,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz L000enc1_loop_1
+.byte 102,15,56,221,209
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ movups %xmm2,(%eax)
+ pxor %xmm2,%xmm2
+ ret
+.globl _aesni_decrypt
+.align 4
+_aesni_decrypt:
+L_aesni_decrypt_begin:
+ movl 4(%esp),%eax
+ movl 12(%esp),%edx
+ movups (%eax),%xmm2
+ movl 240(%edx),%ecx
+ movl 8(%esp),%eax
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+L001dec1_loop_2:
+.byte 102,15,56,222,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz L001dec1_loop_2
+.byte 102,15,56,223,209
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ movups %xmm2,(%eax)
+ pxor %xmm2,%xmm2
+ ret
+.align 4
+__aesni_encrypt2:
+ movups (%edx),%xmm0
+ shll $4,%ecx
+ movups 16(%edx),%xmm1
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ movups 32(%edx),%xmm0
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+ addl $16,%ecx
+L002enc2_loop:
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz L002enc2_loop
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,221,208
+.byte 102,15,56,221,216
+ ret
+.align 4
+__aesni_decrypt2:
+ movups (%edx),%xmm0
+ shll $4,%ecx
+ movups 16(%edx),%xmm1
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ movups 32(%edx),%xmm0
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+ addl $16,%ecx
+L003dec2_loop:
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
+.byte 102,15,56,222,208
+.byte 102,15,56,222,216
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz L003dec2_loop
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,223,208
+.byte 102,15,56,223,216
+ ret
+.align 4
+__aesni_encrypt3:
+ movups (%edx),%xmm0
+ shll $4,%ecx
+ movups 16(%edx),%xmm1
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ pxor %xmm0,%xmm4
+ movups 32(%edx),%xmm0
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+ addl $16,%ecx
+L004enc3_loop:
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+.byte 102,15,56,220,224
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz L004enc3_loop
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+.byte 102,15,56,221,208
+.byte 102,15,56,221,216
+.byte 102,15,56,221,224
+ ret
+.align 4
+__aesni_decrypt3:
+ movups (%edx),%xmm0
+ shll $4,%ecx
+ movups 16(%edx),%xmm1
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ pxor %xmm0,%xmm4
+ movups 32(%edx),%xmm0
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+ addl $16,%ecx
+L005dec3_loop:
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,222,225
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
+.byte 102,15,56,222,208
+.byte 102,15,56,222,216
+.byte 102,15,56,222,224
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz L005dec3_loop
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,222,225
+.byte 102,15,56,223,208
+.byte 102,15,56,223,216
+.byte 102,15,56,223,224
+ ret
+.align 4
+__aesni_encrypt4:
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ shll $4,%ecx
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ pxor %xmm0,%xmm4
+ pxor %xmm0,%xmm5
+ movups 32(%edx),%xmm0
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+.byte 15,31,64,0
+ addl $16,%ecx
+L006enc4_loop:
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+.byte 102,15,56,220,224
+.byte 102,15,56,220,232
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz L006enc4_loop
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+.byte 102,15,56,221,208
+.byte 102,15,56,221,216
+.byte 102,15,56,221,224
+.byte 102,15,56,221,232
+ ret
+.align 4
+__aesni_decrypt4:
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ shll $4,%ecx
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ pxor %xmm0,%xmm4
+ pxor %xmm0,%xmm5
+ movups 32(%edx),%xmm0
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+.byte 15,31,64,0
+ addl $16,%ecx
+L007dec4_loop:
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
+.byte 102,15,56,222,208
+.byte 102,15,56,222,216
+.byte 102,15,56,222,224
+.byte 102,15,56,222,232
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz L007dec4_loop
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+.byte 102,15,56,223,208
+.byte 102,15,56,223,216
+.byte 102,15,56,223,224
+.byte 102,15,56,223,232
+ ret
+.align 4
+__aesni_encrypt6:
+ movups (%edx),%xmm0
+ shll $4,%ecx
+ movups 16(%edx),%xmm1
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ pxor %xmm0,%xmm4
+.byte 102,15,56,220,209
+ pxor %xmm0,%xmm5
+ pxor %xmm0,%xmm6
+.byte 102,15,56,220,217
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+.byte 102,15,56,220,225
+ pxor %xmm0,%xmm7
+ movups (%edx,%ecx,1),%xmm0
+ addl $16,%ecx
+ jmp L008_aesni_encrypt6_inner
+.align 4,0x90
+L009enc6_loop:
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+L008_aesni_encrypt6_inner:
+.byte 102,15,56,220,233
+.byte 102,15,56,220,241
+.byte 102,15,56,220,249
+L_aesni_encrypt6_enter:
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+.byte 102,15,56,220,224
+.byte 102,15,56,220,232
+.byte 102,15,56,220,240
+.byte 102,15,56,220,248
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz L009enc6_loop
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+.byte 102,15,56,220,241
+.byte 102,15,56,220,249
+.byte 102,15,56,221,208
+.byte 102,15,56,221,216
+.byte 102,15,56,221,224
+.byte 102,15,56,221,232
+.byte 102,15,56,221,240
+.byte 102,15,56,221,248
+ ret
+.align 4
+__aesni_decrypt6:
+ movups (%edx),%xmm0
+ shll $4,%ecx
+ movups 16(%edx),%xmm1
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ pxor %xmm0,%xmm4
+.byte 102,15,56,222,209
+ pxor %xmm0,%xmm5
+ pxor %xmm0,%xmm6
+.byte 102,15,56,222,217
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+.byte 102,15,56,222,225
+ pxor %xmm0,%xmm7
+ movups (%edx,%ecx,1),%xmm0
+ addl $16,%ecx
+ jmp L010_aesni_decrypt6_inner
+.align 4,0x90
+L011dec6_loop:
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,222,225
+L010_aesni_decrypt6_inner:
+.byte 102,15,56,222,233
+.byte 102,15,56,222,241
+.byte 102,15,56,222,249
+L_aesni_decrypt6_enter:
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
+.byte 102,15,56,222,208
+.byte 102,15,56,222,216
+.byte 102,15,56,222,224
+.byte 102,15,56,222,232
+.byte 102,15,56,222,240
+.byte 102,15,56,222,248
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz L011dec6_loop
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+.byte 102,15,56,222,241
+.byte 102,15,56,222,249
+.byte 102,15,56,223,208
+.byte 102,15,56,223,216
+.byte 102,15,56,223,224
+.byte 102,15,56,223,232
+.byte 102,15,56,223,240
+.byte 102,15,56,223,248
+ ret
+.globl _aesni_ecb_encrypt
+.align 4
+_aesni_ecb_encrypt:
+L_aesni_ecb_encrypt_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 20(%esp),%esi
+ movl 24(%esp),%edi
+ movl 28(%esp),%eax
+ movl 32(%esp),%edx
+ movl 36(%esp),%ebx
+ andl $-16,%eax
+ jz L012ecb_ret
+ movl 240(%edx),%ecx
+ testl %ebx,%ebx
+ jz L013ecb_decrypt
+ movl %edx,%ebp
+ movl %ecx,%ebx
+ cmpl $96,%eax
+ jb L014ecb_enc_tail
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movdqu 32(%esi),%xmm4
+ movdqu 48(%esi),%xmm5
+ movdqu 64(%esi),%xmm6
+ movdqu 80(%esi),%xmm7
+ leal 96(%esi),%esi
+ subl $96,%eax
+ jmp L015ecb_enc_loop6_enter
+.align 4,0x90
+L016ecb_enc_loop6:
+ movups %xmm2,(%edi)
+ movdqu (%esi),%xmm2
+ movups %xmm3,16(%edi)
+ movdqu 16(%esi),%xmm3
+ movups %xmm4,32(%edi)
+ movdqu 32(%esi),%xmm4
+ movups %xmm5,48(%edi)
+ movdqu 48(%esi),%xmm5
+ movups %xmm6,64(%edi)
+ movdqu 64(%esi),%xmm6
+ movups %xmm7,80(%edi)
+ leal 96(%edi),%edi
+ movdqu 80(%esi),%xmm7
+ leal 96(%esi),%esi
+L015ecb_enc_loop6_enter:
+ call __aesni_encrypt6
+ movl %ebp,%edx
+ movl %ebx,%ecx
+ subl $96,%eax
+ jnc L016ecb_enc_loop6
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+ movups %xmm6,64(%edi)
+ movups %xmm7,80(%edi)
+ leal 96(%edi),%edi
+ addl $96,%eax
+ jz L012ecb_ret
+L014ecb_enc_tail:
+ movups (%esi),%xmm2
+ cmpl $32,%eax
+ jb L017ecb_enc_one
+ movups 16(%esi),%xmm3
+ je L018ecb_enc_two
+ movups 32(%esi),%xmm4
+ cmpl $64,%eax
+ jb L019ecb_enc_three
+ movups 48(%esi),%xmm5
+ je L020ecb_enc_four
+ movups 64(%esi),%xmm6
+ xorps %xmm7,%xmm7
+ call __aesni_encrypt6
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+ movups %xmm6,64(%edi)
+ jmp L012ecb_ret
+.align 4,0x90
+L017ecb_enc_one:
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+L021enc1_loop_3:
+.byte 102,15,56,220,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz L021enc1_loop_3
+.byte 102,15,56,221,209
+ movups %xmm2,(%edi)
+ jmp L012ecb_ret
+.align 4,0x90
+L018ecb_enc_two:
+ call __aesni_encrypt2
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ jmp L012ecb_ret
+.align 4,0x90
+L019ecb_enc_three:
+ call __aesni_encrypt3
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ jmp L012ecb_ret
+.align 4,0x90
+L020ecb_enc_four:
+ call __aesni_encrypt4
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+ jmp L012ecb_ret
+.align 4,0x90
+L013ecb_decrypt:
+ movl %edx,%ebp
+ movl %ecx,%ebx
+ cmpl $96,%eax
+ jb L022ecb_dec_tail
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movdqu 32(%esi),%xmm4
+ movdqu 48(%esi),%xmm5
+ movdqu 64(%esi),%xmm6
+ movdqu 80(%esi),%xmm7
+ leal 96(%esi),%esi
+ subl $96,%eax
+ jmp L023ecb_dec_loop6_enter
+.align 4,0x90
+L024ecb_dec_loop6:
+ movups %xmm2,(%edi)
+ movdqu (%esi),%xmm2
+ movups %xmm3,16(%edi)
+ movdqu 16(%esi),%xmm3
+ movups %xmm4,32(%edi)
+ movdqu 32(%esi),%xmm4
+ movups %xmm5,48(%edi)
+ movdqu 48(%esi),%xmm5
+ movups %xmm6,64(%edi)
+ movdqu 64(%esi),%xmm6
+ movups %xmm7,80(%edi)
+ leal 96(%edi),%edi
+ movdqu 80(%esi),%xmm7
+ leal 96(%esi),%esi
+L023ecb_dec_loop6_enter:
+ call __aesni_decrypt6
+ movl %ebp,%edx
+ movl %ebx,%ecx
+ subl $96,%eax
+ jnc L024ecb_dec_loop6
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+ movups %xmm6,64(%edi)
+ movups %xmm7,80(%edi)
+ leal 96(%edi),%edi
+ addl $96,%eax
+ jz L012ecb_ret
+L022ecb_dec_tail:
+ movups (%esi),%xmm2
+ cmpl $32,%eax
+ jb L025ecb_dec_one
+ movups 16(%esi),%xmm3
+ je L026ecb_dec_two
+ movups 32(%esi),%xmm4
+ cmpl $64,%eax
+ jb L027ecb_dec_three
+ movups 48(%esi),%xmm5
+ je L028ecb_dec_four
+ movups 64(%esi),%xmm6
+ xorps %xmm7,%xmm7
+ call __aesni_decrypt6
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+ movups %xmm6,64(%edi)
+ jmp L012ecb_ret
+.align 4,0x90
+L025ecb_dec_one:
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+L029dec1_loop_4:
+.byte 102,15,56,222,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz L029dec1_loop_4
+.byte 102,15,56,223,209
+ movups %xmm2,(%edi)
+ jmp L012ecb_ret
+.align 4,0x90
+L026ecb_dec_two:
+ call __aesni_decrypt2
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ jmp L012ecb_ret
+.align 4,0x90
+L027ecb_dec_three:
+ call __aesni_decrypt3
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ jmp L012ecb_ret
+.align 4,0x90
+L028ecb_dec_four:
+ call __aesni_decrypt4
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+L012ecb_ret:
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+ pxor %xmm7,%xmm7
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.globl _aesni_ccm64_encrypt_blocks
+.align 4
+_aesni_ccm64_encrypt_blocks:
+L_aesni_ccm64_encrypt_blocks_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 20(%esp),%esi
+ movl 24(%esp),%edi
+ movl 28(%esp),%eax
+ movl 32(%esp),%edx
+ movl 36(%esp),%ebx
+ movl 40(%esp),%ecx
+ movl %esp,%ebp
+ subl $60,%esp
+ andl $-16,%esp
+ movl %ebp,48(%esp)
+ movdqu (%ebx),%xmm7
+ movdqu (%ecx),%xmm3
+ movl 240(%edx),%ecx
+ movl $202182159,(%esp)
+ movl $134810123,4(%esp)
+ movl $67438087,8(%esp)
+ movl $66051,12(%esp)
+ movl $1,%ebx
+ xorl %ebp,%ebp
+ movl %ebx,16(%esp)
+ movl %ebp,20(%esp)
+ movl %ebp,24(%esp)
+ movl %ebp,28(%esp)
+ shll $4,%ecx
+ movl $16,%ebx
+ leal (%edx),%ebp
+ movdqa (%esp),%xmm5
+ movdqa %xmm7,%xmm2
+ leal 32(%edx,%ecx,1),%edx
+ subl %ecx,%ebx
+.byte 102,15,56,0,253
+L030ccm64_enc_outer:
+ movups (%ebp),%xmm0
+ movl %ebx,%ecx
+ movups (%esi),%xmm6
+ xorps %xmm0,%xmm2
+ movups 16(%ebp),%xmm1
+ xorps %xmm6,%xmm0
+ xorps %xmm0,%xmm3
+ movups 32(%ebp),%xmm0
+L031ccm64_enc2_loop:
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz L031ccm64_enc2_loop
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+ paddq 16(%esp),%xmm7
+ decl %eax
+.byte 102,15,56,221,208
+.byte 102,15,56,221,216
+ leal 16(%esi),%esi
+ xorps %xmm2,%xmm6
+ movdqa %xmm7,%xmm2
+ movups %xmm6,(%edi)
+.byte 102,15,56,0,213
+ leal 16(%edi),%edi
+ jnz L030ccm64_enc_outer
+ movl 48(%esp),%esp
+ movl 40(%esp),%edi
+ movups %xmm3,(%edi)
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+ pxor %xmm7,%xmm7
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.globl _aesni_ccm64_decrypt_blocks
+.align 4
+_aesni_ccm64_decrypt_blocks:
+L_aesni_ccm64_decrypt_blocks_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 20(%esp),%esi
+ movl 24(%esp),%edi
+ movl 28(%esp),%eax
+ movl 32(%esp),%edx
+ movl 36(%esp),%ebx
+ movl 40(%esp),%ecx
+ movl %esp,%ebp
+ subl $60,%esp
+ andl $-16,%esp
+ movl %ebp,48(%esp)
+ movdqu (%ebx),%xmm7
+ movdqu (%ecx),%xmm3
+ movl 240(%edx),%ecx
+ movl $202182159,(%esp)
+ movl $134810123,4(%esp)
+ movl $67438087,8(%esp)
+ movl $66051,12(%esp)
+ movl $1,%ebx
+ xorl %ebp,%ebp
+ movl %ebx,16(%esp)
+ movl %ebp,20(%esp)
+ movl %ebp,24(%esp)
+ movl %ebp,28(%esp)
+ movdqa (%esp),%xmm5
+ movdqa %xmm7,%xmm2
+ movl %edx,%ebp
+ movl %ecx,%ebx
+.byte 102,15,56,0,253
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+L032enc1_loop_5:
+.byte 102,15,56,220,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz L032enc1_loop_5
+.byte 102,15,56,221,209
+ shll $4,%ebx
+ movl $16,%ecx
+ movups (%esi),%xmm6
+ paddq 16(%esp),%xmm7
+ leal 16(%esi),%esi
+ subl %ebx,%ecx
+ leal 32(%ebp,%ebx,1),%edx
+ movl %ecx,%ebx
+ jmp L033ccm64_dec_outer
+.align 4,0x90
+L033ccm64_dec_outer:
+ xorps %xmm2,%xmm6
+ movdqa %xmm7,%xmm2
+ movups %xmm6,(%edi)
+ leal 16(%edi),%edi
+.byte 102,15,56,0,213
+ subl $1,%eax
+ jz L034ccm64_dec_break
+ movups (%ebp),%xmm0
+ movl %ebx,%ecx
+ movups 16(%ebp),%xmm1
+ xorps %xmm0,%xmm6
+ xorps %xmm0,%xmm2
+ xorps %xmm6,%xmm3
+ movups 32(%ebp),%xmm0
+L035ccm64_dec2_loop:
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz L035ccm64_dec2_loop
+ movups (%esi),%xmm6
+ paddq 16(%esp),%xmm7
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,221,208
+.byte 102,15,56,221,216
+ leal 16(%esi),%esi
+ jmp L033ccm64_dec_outer
+.align 4,0x90
+L034ccm64_dec_break:
+ movl 240(%ebp),%ecx
+ movl %ebp,%edx
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ xorps %xmm0,%xmm6
+ leal 32(%edx),%edx
+ xorps %xmm6,%xmm3
+L036enc1_loop_6:
+.byte 102,15,56,220,217
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz L036enc1_loop_6
+.byte 102,15,56,221,217
+ movl 48(%esp),%esp
+ movl 40(%esp),%edi
+ movups %xmm3,(%edi)
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+ pxor %xmm7,%xmm7
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.globl _aesni_ctr32_encrypt_blocks
+.align 4
+_aesni_ctr32_encrypt_blocks:
+L_aesni_ctr32_encrypt_blocks_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 20(%esp),%esi
+ movl 24(%esp),%edi
+ movl 28(%esp),%eax
+ movl 32(%esp),%edx
+ movl 36(%esp),%ebx
+ movl %esp,%ebp
+ subl $88,%esp
+ andl $-16,%esp
+ movl %ebp,80(%esp)
+ cmpl $1,%eax
+ je L037ctr32_one_shortcut
+ movdqu (%ebx),%xmm7
+ movl $202182159,(%esp)
+ movl $134810123,4(%esp)
+ movl $67438087,8(%esp)
+ movl $66051,12(%esp)
+ movl $6,%ecx
+ xorl %ebp,%ebp
+ movl %ecx,16(%esp)
+ movl %ecx,20(%esp)
+ movl %ecx,24(%esp)
+ movl %ebp,28(%esp)
+.byte 102,15,58,22,251,3
+.byte 102,15,58,34,253,3
+ movl 240(%edx),%ecx
+ bswap %ebx
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ movdqa (%esp),%xmm2
+.byte 102,15,58,34,195,0
+ leal 3(%ebx),%ebp
+.byte 102,15,58,34,205,0
+ incl %ebx
+.byte 102,15,58,34,195,1
+ incl %ebp
+.byte 102,15,58,34,205,1
+ incl %ebx
+.byte 102,15,58,34,195,2
+ incl %ebp
+.byte 102,15,58,34,205,2
+ movdqa %xmm0,48(%esp)
+.byte 102,15,56,0,194
+ movdqu (%edx),%xmm6
+ movdqa %xmm1,64(%esp)
+.byte 102,15,56,0,202
+ pshufd $192,%xmm0,%xmm2
+ pshufd $128,%xmm0,%xmm3
+ cmpl $6,%eax
+ jb L038ctr32_tail
+ pxor %xmm6,%xmm7
+ shll $4,%ecx
+ movl $16,%ebx
+ movdqa %xmm7,32(%esp)
+ movl %edx,%ebp
+ subl %ecx,%ebx
+ leal 32(%edx,%ecx,1),%edx
+ subl $6,%eax
+ jmp L039ctr32_loop6
+.align 4,0x90
+L039ctr32_loop6:
+ pshufd $64,%xmm0,%xmm4
+ movdqa 32(%esp),%xmm0
+ pshufd $192,%xmm1,%xmm5
+ pxor %xmm0,%xmm2
+ pshufd $128,%xmm1,%xmm6
+ pxor %xmm0,%xmm3
+ pshufd $64,%xmm1,%xmm7
+ movups 16(%ebp),%xmm1
+ pxor %xmm0,%xmm4
+ pxor %xmm0,%xmm5
+.byte 102,15,56,220,209
+ pxor %xmm0,%xmm6
+ pxor %xmm0,%xmm7
+.byte 102,15,56,220,217
+ movups 32(%ebp),%xmm0
+ movl %ebx,%ecx
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+.byte 102,15,56,220,241
+.byte 102,15,56,220,249
+ call L_aesni_encrypt6_enter
+ movups (%esi),%xmm1
+ movups 16(%esi),%xmm0
+ xorps %xmm1,%xmm2
+ movups 32(%esi),%xmm1
+ xorps %xmm0,%xmm3
+ movups %xmm2,(%edi)
+ movdqa 16(%esp),%xmm0
+ xorps %xmm1,%xmm4
+ movdqa 64(%esp),%xmm1
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ paddd %xmm0,%xmm1
+ paddd 48(%esp),%xmm0
+ movdqa (%esp),%xmm2
+ movups 48(%esi),%xmm3
+ movups 64(%esi),%xmm4
+ xorps %xmm3,%xmm5
+ movups 80(%esi),%xmm3
+ leal 96(%esi),%esi
+ movdqa %xmm0,48(%esp)
+.byte 102,15,56,0,194
+ xorps %xmm4,%xmm6
+ movups %xmm5,48(%edi)
+ xorps %xmm3,%xmm7
+ movdqa %xmm1,64(%esp)
+.byte 102,15,56,0,202
+ movups %xmm6,64(%edi)
+ pshufd $192,%xmm0,%xmm2
+ movups %xmm7,80(%edi)
+ leal 96(%edi),%edi
+ pshufd $128,%xmm0,%xmm3
+ subl $6,%eax
+ jnc L039ctr32_loop6
+ addl $6,%eax
+ jz L040ctr32_ret
+ movdqu (%ebp),%xmm7
+ movl %ebp,%edx
+ pxor 32(%esp),%xmm7
+ movl 240(%ebp),%ecx
+L038ctr32_tail:
+ por %xmm7,%xmm2
+ cmpl $2,%eax
+ jb L041ctr32_one
+ pshufd $64,%xmm0,%xmm4
+ por %xmm7,%xmm3
+ je L042ctr32_two
+ pshufd $192,%xmm1,%xmm5
+ por %xmm7,%xmm4
+ cmpl $4,%eax
+ jb L043ctr32_three
+ pshufd $128,%xmm1,%xmm6
+ por %xmm7,%xmm5
+ je L044ctr32_four
+ por %xmm7,%xmm6
+ call __aesni_encrypt6
+ movups (%esi),%xmm1
+ movups 16(%esi),%xmm0
+ xorps %xmm1,%xmm2
+ movups 32(%esi),%xmm1
+ xorps %xmm0,%xmm3
+ movups 48(%esi),%xmm0
+ xorps %xmm1,%xmm4
+ movups 64(%esi),%xmm1
+ xorps %xmm0,%xmm5
+ movups %xmm2,(%edi)
+ xorps %xmm1,%xmm6
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+ movups %xmm6,64(%edi)
+ jmp L040ctr32_ret
+.align 4,0x90
+L037ctr32_one_shortcut:
+ movups (%ebx),%xmm2
+ movl 240(%edx),%ecx
+L041ctr32_one:
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+L045enc1_loop_7:
+.byte 102,15,56,220,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz L045enc1_loop_7
+.byte 102,15,56,221,209
+ movups (%esi),%xmm6
+ xorps %xmm2,%xmm6
+ movups %xmm6,(%edi)
+ jmp L040ctr32_ret
+.align 4,0x90
+L042ctr32_two:
+ call __aesni_encrypt2
+ movups (%esi),%xmm5
+ movups 16(%esi),%xmm6
+ xorps %xmm5,%xmm2
+ xorps %xmm6,%xmm3
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ jmp L040ctr32_ret
+.align 4,0x90
+L043ctr32_three:
+ call __aesni_encrypt3
+ movups (%esi),%xmm5
+ movups 16(%esi),%xmm6
+ xorps %xmm5,%xmm2
+ movups 32(%esi),%xmm7
+ xorps %xmm6,%xmm3
+ movups %xmm2,(%edi)
+ xorps %xmm7,%xmm4
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ jmp L040ctr32_ret
+.align 4,0x90
+L044ctr32_four:
+ call __aesni_encrypt4
+ movups (%esi),%xmm6
+ movups 16(%esi),%xmm7
+ movups 32(%esi),%xmm1
+ xorps %xmm6,%xmm2
+ movups 48(%esi),%xmm0
+ xorps %xmm7,%xmm3
+ movups %xmm2,(%edi)
+ xorps %xmm1,%xmm4
+ movups %xmm3,16(%edi)
+ xorps %xmm0,%xmm5
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+L040ctr32_ret:
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ movdqa %xmm0,32(%esp)
+ pxor %xmm5,%xmm5
+ movdqa %xmm0,48(%esp)
+ pxor %xmm6,%xmm6
+ movdqa %xmm0,64(%esp)
+ pxor %xmm7,%xmm7
+ movl 80(%esp),%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.globl _aesni_xts_encrypt
+.align 4
+_aesni_xts_encrypt:
+L_aesni_xts_encrypt_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 36(%esp),%edx
+ movl 40(%esp),%esi
+ movl 240(%edx),%ecx
+ movups (%esi),%xmm2
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+L046enc1_loop_8:
+.byte 102,15,56,220,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz L046enc1_loop_8
+.byte 102,15,56,221,209
+ movl 20(%esp),%esi
+ movl 24(%esp),%edi
+ movl 28(%esp),%eax
+ movl 32(%esp),%edx
+ movl %esp,%ebp
+ subl $120,%esp
+ movl 240(%edx),%ecx
+ andl $-16,%esp
+ movl $135,96(%esp)
+ movl $0,100(%esp)
+ movl $1,104(%esp)
+ movl $0,108(%esp)
+ movl %eax,112(%esp)
+ movl %ebp,116(%esp)
+ movdqa %xmm2,%xmm1
+ pxor %xmm0,%xmm0
+ movdqa 96(%esp),%xmm3
+ pcmpgtd %xmm1,%xmm0
+ andl $-16,%eax
+ movl %edx,%ebp
+ movl %ecx,%ebx
+ subl $96,%eax
+ jc L047xts_enc_short
+ shll $4,%ecx
+ movl $16,%ebx
+ subl %ecx,%ebx
+ leal 32(%edx,%ecx,1),%edx
+ jmp L048xts_enc_loop6
+.align 4,0x90
+L048xts_enc_loop6:
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,(%esp)
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,16(%esp)
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,32(%esp)
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,48(%esp)
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ pshufd $19,%xmm0,%xmm7
+ movdqa %xmm1,64(%esp)
+ paddq %xmm1,%xmm1
+ movups (%ebp),%xmm0
+ pand %xmm3,%xmm7
+ movups (%esi),%xmm2
+ pxor %xmm1,%xmm7
+ movl %ebx,%ecx
+ movdqu 16(%esi),%xmm3
+ xorps %xmm0,%xmm2
+ movdqu 32(%esi),%xmm4
+ pxor %xmm0,%xmm3
+ movdqu 48(%esi),%xmm5
+ pxor %xmm0,%xmm4
+ movdqu 64(%esi),%xmm6
+ pxor %xmm0,%xmm5
+ movdqu 80(%esi),%xmm1
+ pxor %xmm0,%xmm6
+ leal 96(%esi),%esi
+ pxor (%esp),%xmm2
+ movdqa %xmm7,80(%esp)
+ pxor %xmm1,%xmm7
+ movups 16(%ebp),%xmm1
+ pxor 16(%esp),%xmm3
+ pxor 32(%esp),%xmm4
+.byte 102,15,56,220,209
+ pxor 48(%esp),%xmm5
+ pxor 64(%esp),%xmm6
+.byte 102,15,56,220,217
+ pxor %xmm0,%xmm7
+ movups 32(%ebp),%xmm0
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+.byte 102,15,56,220,241
+.byte 102,15,56,220,249
+ call L_aesni_encrypt6_enter
+ movdqa 80(%esp),%xmm1
+ pxor %xmm0,%xmm0
+ xorps (%esp),%xmm2
+ pcmpgtd %xmm1,%xmm0
+ xorps 16(%esp),%xmm3
+ movups %xmm2,(%edi)
+ xorps 32(%esp),%xmm4
+ movups %xmm3,16(%edi)
+ xorps 48(%esp),%xmm5
+ movups %xmm4,32(%edi)
+ xorps 64(%esp),%xmm6
+ movups %xmm5,48(%edi)
+ xorps %xmm1,%xmm7
+ movups %xmm6,64(%edi)
+ pshufd $19,%xmm0,%xmm2
+ movups %xmm7,80(%edi)
+ leal 96(%edi),%edi
+ movdqa 96(%esp),%xmm3
+ pxor %xmm0,%xmm0
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ subl $96,%eax
+ jnc L048xts_enc_loop6
+ movl 240(%ebp),%ecx
+ movl %ebp,%edx
+ movl %ecx,%ebx
+L047xts_enc_short:
+ addl $96,%eax
+ jz L049xts_enc_done6x
+ movdqa %xmm1,%xmm5
+ cmpl $32,%eax
+ jb L050xts_enc_one
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ je L051xts_enc_two
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,%xmm6
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ cmpl $64,%eax
+ jb L052xts_enc_three
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,%xmm7
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ movdqa %xmm5,(%esp)
+ movdqa %xmm6,16(%esp)
+ je L053xts_enc_four
+ movdqa %xmm7,32(%esp)
+ pshufd $19,%xmm0,%xmm7
+ movdqa %xmm1,48(%esp)
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm7
+ pxor %xmm1,%xmm7
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movdqu 32(%esi),%xmm4
+ pxor (%esp),%xmm2
+ movdqu 48(%esi),%xmm5
+ pxor 16(%esp),%xmm3
+ movdqu 64(%esi),%xmm6
+ pxor 32(%esp),%xmm4
+ leal 80(%esi),%esi
+ pxor 48(%esp),%xmm5
+ movdqa %xmm7,64(%esp)
+ pxor %xmm7,%xmm6
+ call __aesni_encrypt6
+ movaps 64(%esp),%xmm1
+ xorps (%esp),%xmm2
+ xorps 16(%esp),%xmm3
+ xorps 32(%esp),%xmm4
+ movups %xmm2,(%edi)
+ xorps 48(%esp),%xmm5
+ movups %xmm3,16(%edi)
+ xorps %xmm1,%xmm6
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+ movups %xmm6,64(%edi)
+ leal 80(%edi),%edi
+ jmp L054xts_enc_done
+.align 4,0x90
+L050xts_enc_one:
+ movups (%esi),%xmm2
+ leal 16(%esi),%esi
+ xorps %xmm5,%xmm2
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+L055enc1_loop_9:
+.byte 102,15,56,220,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz L055enc1_loop_9
+.byte 102,15,56,221,209
+ xorps %xmm5,%xmm2
+ movups %xmm2,(%edi)
+ leal 16(%edi),%edi
+ movdqa %xmm5,%xmm1
+ jmp L054xts_enc_done
+.align 4,0x90
+L051xts_enc_two:
+ movaps %xmm1,%xmm6
+ movups (%esi),%xmm2
+ movups 16(%esi),%xmm3
+ leal 32(%esi),%esi
+ xorps %xmm5,%xmm2
+ xorps %xmm6,%xmm3
+ call __aesni_encrypt2
+ xorps %xmm5,%xmm2
+ xorps %xmm6,%xmm3
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ leal 32(%edi),%edi
+ movdqa %xmm6,%xmm1
+ jmp L054xts_enc_done
+.align 4,0x90
+L052xts_enc_three:
+ movaps %xmm1,%xmm7
+ movups (%esi),%xmm2
+ movups 16(%esi),%xmm3
+ movups 32(%esi),%xmm4
+ leal 48(%esi),%esi
+ xorps %xmm5,%xmm2
+ xorps %xmm6,%xmm3
+ xorps %xmm7,%xmm4
+ call __aesni_encrypt3
+ xorps %xmm5,%xmm2
+ xorps %xmm6,%xmm3
+ xorps %xmm7,%xmm4
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ leal 48(%edi),%edi
+ movdqa %xmm7,%xmm1
+ jmp L054xts_enc_done
+.align 4,0x90
+L053xts_enc_four:
+ movaps %xmm1,%xmm6
+ movups (%esi),%xmm2
+ movups 16(%esi),%xmm3
+ movups 32(%esi),%xmm4
+ xorps (%esp),%xmm2
+ movups 48(%esi),%xmm5
+ leal 64(%esi),%esi
+ xorps 16(%esp),%xmm3
+ xorps %xmm7,%xmm4
+ xorps %xmm6,%xmm5
+ call __aesni_encrypt4
+ xorps (%esp),%xmm2
+ xorps 16(%esp),%xmm3
+ xorps %xmm7,%xmm4
+ movups %xmm2,(%edi)
+ xorps %xmm6,%xmm5
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+ leal 64(%edi),%edi
+ movdqa %xmm6,%xmm1
+ jmp L054xts_enc_done
+.align 4,0x90
+L049xts_enc_done6x:
+ movl 112(%esp),%eax
+ andl $15,%eax
+ jz L056xts_enc_ret
+ movdqa %xmm1,%xmm5
+ movl %eax,112(%esp)
+ jmp L057xts_enc_steal
+.align 4,0x90
+L054xts_enc_done:
+ movl 112(%esp),%eax
+ pxor %xmm0,%xmm0
+ andl $15,%eax
+ jz L056xts_enc_ret
+ pcmpgtd %xmm1,%xmm0
+ movl %eax,112(%esp)
+ pshufd $19,%xmm0,%xmm5
+ paddq %xmm1,%xmm1
+ pand 96(%esp),%xmm5
+ pxor %xmm1,%xmm5
+L057xts_enc_steal:
+ movzbl (%esi),%ecx
+ movzbl -16(%edi),%edx
+ leal 1(%esi),%esi
+ movb %cl,-16(%edi)
+ movb %dl,(%edi)
+ leal 1(%edi),%edi
+ subl $1,%eax
+ jnz L057xts_enc_steal
+ subl 112(%esp),%edi
+ movl %ebp,%edx
+ movl %ebx,%ecx
+ movups -16(%edi),%xmm2
+ xorps %xmm5,%xmm2
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+L058enc1_loop_10:
+.byte 102,15,56,220,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz L058enc1_loop_10
+.byte 102,15,56,221,209
+ xorps %xmm5,%xmm2
+ movups %xmm2,-16(%edi)
+L056xts_enc_ret:
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ movdqa %xmm0,(%esp)
+ pxor %xmm3,%xmm3
+ movdqa %xmm0,16(%esp)
+ pxor %xmm4,%xmm4
+ movdqa %xmm0,32(%esp)
+ pxor %xmm5,%xmm5
+ movdqa %xmm0,48(%esp)
+ pxor %xmm6,%xmm6
+ movdqa %xmm0,64(%esp)
+ pxor %xmm7,%xmm7
+ movdqa %xmm0,80(%esp)
+ movl 116(%esp),%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.globl _aesni_xts_decrypt
+.align 4
+_aesni_xts_decrypt:
+L_aesni_xts_decrypt_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 36(%esp),%edx
+ movl 40(%esp),%esi
+ movl 240(%edx),%ecx
+ movups (%esi),%xmm2
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+L059enc1_loop_11:
+.byte 102,15,56,220,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz L059enc1_loop_11
+.byte 102,15,56,221,209
+ movl 20(%esp),%esi
+ movl 24(%esp),%edi
+ movl 28(%esp),%eax
+ movl 32(%esp),%edx
+ movl %esp,%ebp
+ subl $120,%esp
+ andl $-16,%esp
+ xorl %ebx,%ebx
+ testl $15,%eax
+ setnz %bl
+ shll $4,%ebx
+ subl %ebx,%eax
+ movl $135,96(%esp)
+ movl $0,100(%esp)
+ movl $1,104(%esp)
+ movl $0,108(%esp)
+ movl %eax,112(%esp)
+ movl %ebp,116(%esp)
+ movl 240(%edx),%ecx
+ movl %edx,%ebp
+ movl %ecx,%ebx
+ movdqa %xmm2,%xmm1
+ pxor %xmm0,%xmm0
+ movdqa 96(%esp),%xmm3
+ pcmpgtd %xmm1,%xmm0
+ andl $-16,%eax
+ subl $96,%eax
+ jc L060xts_dec_short
+ shll $4,%ecx
+ movl $16,%ebx
+ subl %ecx,%ebx
+ leal 32(%edx,%ecx,1),%edx
+ jmp L061xts_dec_loop6
+.align 4,0x90
+L061xts_dec_loop6:
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,(%esp)
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,16(%esp)
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,32(%esp)
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,48(%esp)
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ pshufd $19,%xmm0,%xmm7
+ movdqa %xmm1,64(%esp)
+ paddq %xmm1,%xmm1
+ movups (%ebp),%xmm0
+ pand %xmm3,%xmm7
+ movups (%esi),%xmm2
+ pxor %xmm1,%xmm7
+ movl %ebx,%ecx
+ movdqu 16(%esi),%xmm3
+ xorps %xmm0,%xmm2
+ movdqu 32(%esi),%xmm4
+ pxor %xmm0,%xmm3
+ movdqu 48(%esi),%xmm5
+ pxor %xmm0,%xmm4
+ movdqu 64(%esi),%xmm6
+ pxor %xmm0,%xmm5
+ movdqu 80(%esi),%xmm1
+ pxor %xmm0,%xmm6
+ leal 96(%esi),%esi
+ pxor (%esp),%xmm2
+ movdqa %xmm7,80(%esp)
+ pxor %xmm1,%xmm7
+ movups 16(%ebp),%xmm1
+ pxor 16(%esp),%xmm3
+ pxor 32(%esp),%xmm4
+.byte 102,15,56,222,209
+ pxor 48(%esp),%xmm5
+ pxor 64(%esp),%xmm6
+.byte 102,15,56,222,217
+ pxor %xmm0,%xmm7
+ movups 32(%ebp),%xmm0
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+.byte 102,15,56,222,241
+.byte 102,15,56,222,249
+ call L_aesni_decrypt6_enter
+ movdqa 80(%esp),%xmm1
+ pxor %xmm0,%xmm0
+ xorps (%esp),%xmm2
+ pcmpgtd %xmm1,%xmm0
+ xorps 16(%esp),%xmm3
+ movups %xmm2,(%edi)
+ xorps 32(%esp),%xmm4
+ movups %xmm3,16(%edi)
+ xorps 48(%esp),%xmm5
+ movups %xmm4,32(%edi)
+ xorps 64(%esp),%xmm6
+ movups %xmm5,48(%edi)
+ xorps %xmm1,%xmm7
+ movups %xmm6,64(%edi)
+ pshufd $19,%xmm0,%xmm2
+ movups %xmm7,80(%edi)
+ leal 96(%edi),%edi
+ movdqa 96(%esp),%xmm3
+ pxor %xmm0,%xmm0
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ subl $96,%eax
+ jnc L061xts_dec_loop6
+ movl 240(%ebp),%ecx
+ movl %ebp,%edx
+ movl %ecx,%ebx
+L060xts_dec_short:
+ addl $96,%eax
+ jz L062xts_dec_done6x
+ movdqa %xmm1,%xmm5
+ cmpl $32,%eax
+ jb L063xts_dec_one
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ je L064xts_dec_two
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,%xmm6
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ cmpl $64,%eax
+ jb L065xts_dec_three
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,%xmm7
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ movdqa %xmm5,(%esp)
+ movdqa %xmm6,16(%esp)
+ je L066xts_dec_four
+ movdqa %xmm7,32(%esp)
+ pshufd $19,%xmm0,%xmm7
+ movdqa %xmm1,48(%esp)
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm7
+ pxor %xmm1,%xmm7
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movdqu 32(%esi),%xmm4
+ pxor (%esp),%xmm2
+ movdqu 48(%esi),%xmm5
+ pxor 16(%esp),%xmm3
+ movdqu 64(%esi),%xmm6
+ pxor 32(%esp),%xmm4
+ leal 80(%esi),%esi
+ pxor 48(%esp),%xmm5
+ movdqa %xmm7,64(%esp)
+ pxor %xmm7,%xmm6
+ call __aesni_decrypt6
+ movaps 64(%esp),%xmm1
+ xorps (%esp),%xmm2
+ xorps 16(%esp),%xmm3
+ xorps 32(%esp),%xmm4
+ movups %xmm2,(%edi)
+ xorps 48(%esp),%xmm5
+ movups %xmm3,16(%edi)
+ xorps %xmm1,%xmm6
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+ movups %xmm6,64(%edi)
+ leal 80(%edi),%edi
+ jmp L067xts_dec_done
+.align 4,0x90
+L063xts_dec_one:
+ movups (%esi),%xmm2
+ leal 16(%esi),%esi
+ xorps %xmm5,%xmm2
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+L068dec1_loop_12:
+.byte 102,15,56,222,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz L068dec1_loop_12
+.byte 102,15,56,223,209
+ xorps %xmm5,%xmm2
+ movups %xmm2,(%edi)
+ leal 16(%edi),%edi
+ movdqa %xmm5,%xmm1
+ jmp L067xts_dec_done
+.align 4,0x90
+L064xts_dec_two:
+ movaps %xmm1,%xmm6
+ movups (%esi),%xmm2
+ movups 16(%esi),%xmm3
+ leal 32(%esi),%esi
+ xorps %xmm5,%xmm2
+ xorps %xmm6,%xmm3
+ call __aesni_decrypt2
+ xorps %xmm5,%xmm2
+ xorps %xmm6,%xmm3
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ leal 32(%edi),%edi
+ movdqa %xmm6,%xmm1
+ jmp L067xts_dec_done
+.align 4,0x90
+L065xts_dec_three:
+ movaps %xmm1,%xmm7
+ movups (%esi),%xmm2
+ movups 16(%esi),%xmm3
+ movups 32(%esi),%xmm4
+ leal 48(%esi),%esi
+ xorps %xmm5,%xmm2
+ xorps %xmm6,%xmm3
+ xorps %xmm7,%xmm4
+ call __aesni_decrypt3
+ xorps %xmm5,%xmm2
+ xorps %xmm6,%xmm3
+ xorps %xmm7,%xmm4
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ leal 48(%edi),%edi
+ movdqa %xmm7,%xmm1
+ jmp L067xts_dec_done
+.align 4,0x90
+L066xts_dec_four:
+ movaps %xmm1,%xmm6
+ movups (%esi),%xmm2
+ movups 16(%esi),%xmm3
+ movups 32(%esi),%xmm4
+ xorps (%esp),%xmm2
+ movups 48(%esi),%xmm5
+ leal 64(%esi),%esi
+ xorps 16(%esp),%xmm3
+ xorps %xmm7,%xmm4
+ xorps %xmm6,%xmm5
+ call __aesni_decrypt4
+ xorps (%esp),%xmm2
+ xorps 16(%esp),%xmm3
+ xorps %xmm7,%xmm4
+ movups %xmm2,(%edi)
+ xorps %xmm6,%xmm5
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+ leal 64(%edi),%edi
+ movdqa %xmm6,%xmm1
+ jmp L067xts_dec_done
+.align 4,0x90
+L062xts_dec_done6x:
+ movl 112(%esp),%eax
+ andl $15,%eax
+ jz L069xts_dec_ret
+ movl %eax,112(%esp)
+ jmp L070xts_dec_only_one_more
+.align 4,0x90
+L067xts_dec_done:
+ movl 112(%esp),%eax
+ pxor %xmm0,%xmm0
+ andl $15,%eax
+ jz L069xts_dec_ret
+ pcmpgtd %xmm1,%xmm0
+ movl %eax,112(%esp)
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa 96(%esp),%xmm3
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+L070xts_dec_only_one_more:
+ pshufd $19,%xmm0,%xmm5
+ movdqa %xmm1,%xmm6
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm5
+ pxor %xmm1,%xmm5
+ movl %ebp,%edx
+ movl %ebx,%ecx
+ movups (%esi),%xmm2
+ xorps %xmm5,%xmm2
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+L071dec1_loop_13:
+.byte 102,15,56,222,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz L071dec1_loop_13
+.byte 102,15,56,223,209
+ xorps %xmm5,%xmm2
+ movups %xmm2,(%edi)
+L072xts_dec_steal:
+ movzbl 16(%esi),%ecx
+ movzbl (%edi),%edx
+ leal 1(%esi),%esi
+ movb %cl,(%edi)
+ movb %dl,16(%edi)
+ leal 1(%edi),%edi
+ subl $1,%eax
+ jnz L072xts_dec_steal
+ subl 112(%esp),%edi
+ movl %ebp,%edx
+ movl %ebx,%ecx
+ movups (%edi),%xmm2
+ xorps %xmm6,%xmm2
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+L073dec1_loop_14:
+.byte 102,15,56,222,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz L073dec1_loop_14
+.byte 102,15,56,223,209
+ xorps %xmm6,%xmm2
+ movups %xmm2,(%edi)
+L069xts_dec_ret:
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ movdqa %xmm0,(%esp)
+ pxor %xmm3,%xmm3
+ movdqa %xmm0,16(%esp)
+ pxor %xmm4,%xmm4
+ movdqa %xmm0,32(%esp)
+ pxor %xmm5,%xmm5
+ movdqa %xmm0,48(%esp)
+ pxor %xmm6,%xmm6
+ movdqa %xmm0,64(%esp)
+ pxor %xmm7,%xmm7
+ movdqa %xmm0,80(%esp)
+ movl 116(%esp),%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.globl _aesni_ocb_encrypt
+.align 4
+_aesni_ocb_encrypt:
+L_aesni_ocb_encrypt_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 40(%esp),%ecx
+ movl 48(%esp),%ebx
+ movl 20(%esp),%esi
+ movl 24(%esp),%edi
+ movl 28(%esp),%eax
+ movl 32(%esp),%edx
+ movdqu (%ecx),%xmm0
+ movl 36(%esp),%ebp
+ movdqu (%ebx),%xmm1
+ movl 44(%esp),%ebx
+ movl %esp,%ecx
+ subl $132,%esp
+ andl $-16,%esp
+ subl %esi,%edi
+ shll $4,%eax
+ leal -96(%esi,%eax,1),%eax
+ movl %edi,120(%esp)
+ movl %eax,124(%esp)
+ movl %ecx,128(%esp)
+ movl 240(%edx),%ecx
+ testl $1,%ebp
+ jnz L074odd
+ bsfl %ebp,%eax
+ addl $1,%ebp
+ shll $4,%eax
+ movdqu (%ebx,%eax,1),%xmm7
+ movl %edx,%eax
+ movdqu (%esi),%xmm2
+ leal 16(%esi),%esi
+ pxor %xmm0,%xmm7
+ pxor %xmm2,%xmm1
+ pxor %xmm7,%xmm2
+ movdqa %xmm1,%xmm6
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+L075enc1_loop_15:
+.byte 102,15,56,220,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz L075enc1_loop_15
+.byte 102,15,56,221,209
+ xorps %xmm7,%xmm2
+ movdqa %xmm7,%xmm0
+ movdqa %xmm6,%xmm1
+ movups %xmm2,-16(%edi,%esi,1)
+ movl 240(%eax),%ecx
+ movl %eax,%edx
+ movl 124(%esp),%eax
+L074odd:
+ shll $4,%ecx
+ movl $16,%edi
+ subl %ecx,%edi
+ movl %edx,112(%esp)
+ leal 32(%edx,%ecx,1),%edx
+ movl %edi,116(%esp)
+ cmpl %eax,%esi
+ ja L076short
+ jmp L077grandloop
+.align 5,0x90
+L077grandloop:
+ leal 1(%ebp),%ecx
+ leal 3(%ebp),%eax
+ leal 5(%ebp),%edi
+ addl $6,%ebp
+ bsfl %ecx,%ecx
+ bsfl %eax,%eax
+ bsfl %edi,%edi
+ shll $4,%ecx
+ shll $4,%eax
+ shll $4,%edi
+ movdqu (%ebx),%xmm2
+ movdqu (%ebx,%ecx,1),%xmm3
+ movl 116(%esp),%ecx
+ movdqa %xmm2,%xmm4
+ movdqu (%ebx,%eax,1),%xmm5
+ movdqa %xmm2,%xmm6
+ movdqu (%ebx,%edi,1),%xmm7
+ pxor %xmm0,%xmm2
+ pxor %xmm2,%xmm3
+ movdqa %xmm2,(%esp)
+ pxor %xmm3,%xmm4
+ movdqa %xmm3,16(%esp)
+ pxor %xmm4,%xmm5
+ movdqa %xmm4,32(%esp)
+ pxor %xmm5,%xmm6
+ movdqa %xmm5,48(%esp)
+ pxor %xmm6,%xmm7
+ movdqa %xmm6,64(%esp)
+ movdqa %xmm7,80(%esp)
+ movups -48(%edx,%ecx,1),%xmm0
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movdqu 32(%esi),%xmm4
+ movdqu 48(%esi),%xmm5
+ movdqu 64(%esi),%xmm6
+ movdqu 80(%esi),%xmm7
+ leal 96(%esi),%esi
+ pxor %xmm2,%xmm1
+ pxor %xmm0,%xmm2
+ pxor %xmm3,%xmm1
+ pxor %xmm0,%xmm3
+ pxor %xmm4,%xmm1
+ pxor %xmm0,%xmm4
+ pxor %xmm5,%xmm1
+ pxor %xmm0,%xmm5
+ pxor %xmm6,%xmm1
+ pxor %xmm0,%xmm6
+ pxor %xmm7,%xmm1
+ pxor %xmm0,%xmm7
+ movdqa %xmm1,96(%esp)
+ movups -32(%edx,%ecx,1),%xmm1
+ pxor (%esp),%xmm2
+ pxor 16(%esp),%xmm3
+ pxor 32(%esp),%xmm4
+ pxor 48(%esp),%xmm5
+ pxor 64(%esp),%xmm6
+ pxor 80(%esp),%xmm7
+ movups -16(%edx,%ecx,1),%xmm0
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+.byte 102,15,56,220,241
+.byte 102,15,56,220,249
+ movl 120(%esp),%edi
+ movl 124(%esp),%eax
+ call L_aesni_encrypt6_enter
+ movdqa 80(%esp),%xmm0
+ pxor (%esp),%xmm2
+ pxor 16(%esp),%xmm3
+ pxor 32(%esp),%xmm4
+ pxor 48(%esp),%xmm5
+ pxor 64(%esp),%xmm6
+ pxor %xmm0,%xmm7
+ movdqa 96(%esp),%xmm1
+ movdqu %xmm2,-96(%edi,%esi,1)
+ movdqu %xmm3,-80(%edi,%esi,1)
+ movdqu %xmm4,-64(%edi,%esi,1)
+ movdqu %xmm5,-48(%edi,%esi,1)
+ movdqu %xmm6,-32(%edi,%esi,1)
+ movdqu %xmm7,-16(%edi,%esi,1)
+ cmpl %eax,%esi
+ jb L077grandloop
+L076short:
+ addl $96,%eax
+ subl %esi,%eax
+ jz L078done
+ cmpl $32,%eax
+ jb L079one
+ je L080two
+ cmpl $64,%eax
+ jb L081three
+ je L082four
+ leal 1(%ebp),%ecx
+ leal 3(%ebp),%eax
+ bsfl %ecx,%ecx
+ bsfl %eax,%eax
+ shll $4,%ecx
+ shll $4,%eax
+ movdqu (%ebx),%xmm2
+ movdqu (%ebx,%ecx,1),%xmm3
+ movl 116(%esp),%ecx
+ movdqa %xmm2,%xmm4
+ movdqu (%ebx,%eax,1),%xmm5
+ movdqa %xmm2,%xmm6
+ pxor %xmm0,%xmm2
+ pxor %xmm2,%xmm3
+ movdqa %xmm2,(%esp)
+ pxor %xmm3,%xmm4
+ movdqa %xmm3,16(%esp)
+ pxor %xmm4,%xmm5
+ movdqa %xmm4,32(%esp)
+ pxor %xmm5,%xmm6
+ movdqa %xmm5,48(%esp)
+ pxor %xmm6,%xmm7
+ movdqa %xmm6,64(%esp)
+ movups -48(%edx,%ecx,1),%xmm0
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movdqu 32(%esi),%xmm4
+ movdqu 48(%esi),%xmm5
+ movdqu 64(%esi),%xmm6
+ pxor %xmm7,%xmm7
+ pxor %xmm2,%xmm1
+ pxor %xmm0,%xmm2
+ pxor %xmm3,%xmm1
+ pxor %xmm0,%xmm3
+ pxor %xmm4,%xmm1
+ pxor %xmm0,%xmm4
+ pxor %xmm5,%xmm1
+ pxor %xmm0,%xmm5
+ pxor %xmm6,%xmm1
+ pxor %xmm0,%xmm6
+ movdqa %xmm1,96(%esp)
+ movups -32(%edx,%ecx,1),%xmm1
+ pxor (%esp),%xmm2
+ pxor 16(%esp),%xmm3
+ pxor 32(%esp),%xmm4
+ pxor 48(%esp),%xmm5
+ pxor 64(%esp),%xmm6
+ movups -16(%edx,%ecx,1),%xmm0
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+.byte 102,15,56,220,241
+.byte 102,15,56,220,249
+ movl 120(%esp),%edi
+ call L_aesni_encrypt6_enter
+ movdqa 64(%esp),%xmm0
+ pxor (%esp),%xmm2
+ pxor 16(%esp),%xmm3
+ pxor 32(%esp),%xmm4
+ pxor 48(%esp),%xmm5
+ pxor %xmm0,%xmm6
+ movdqa 96(%esp),%xmm1
+ movdqu %xmm2,(%edi,%esi,1)
+ movdqu %xmm3,16(%edi,%esi,1)
+ movdqu %xmm4,32(%edi,%esi,1)
+ movdqu %xmm5,48(%edi,%esi,1)
+ movdqu %xmm6,64(%edi,%esi,1)
+ jmp L078done
+.align 4,0x90
+L079one:
+ movdqu (%ebx),%xmm7
+ movl 112(%esp),%edx
+ movdqu (%esi),%xmm2
+ movl 240(%edx),%ecx
+ pxor %xmm0,%xmm7
+ pxor %xmm2,%xmm1
+ pxor %xmm7,%xmm2
+ movdqa %xmm1,%xmm6
+ movl 120(%esp),%edi
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+L083enc1_loop_16:
+.byte 102,15,56,220,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz L083enc1_loop_16
+.byte 102,15,56,221,209
+ xorps %xmm7,%xmm2
+ movdqa %xmm7,%xmm0
+ movdqa %xmm6,%xmm1
+ movups %xmm2,(%edi,%esi,1)
+ jmp L078done
+.align 4,0x90
+L080two:
+ leal 1(%ebp),%ecx
+ movl 112(%esp),%edx
+ bsfl %ecx,%ecx
+ shll $4,%ecx
+ movdqu (%ebx),%xmm6
+ movdqu (%ebx,%ecx,1),%xmm7
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movl 240(%edx),%ecx
+ pxor %xmm0,%xmm6
+ pxor %xmm6,%xmm7
+ pxor %xmm2,%xmm1
+ pxor %xmm6,%xmm2
+ pxor %xmm3,%xmm1
+ pxor %xmm7,%xmm3
+ movdqa %xmm1,%xmm5
+ movl 120(%esp),%edi
+ call __aesni_encrypt2
+ xorps %xmm6,%xmm2
+ xorps %xmm7,%xmm3
+ movdqa %xmm7,%xmm0
+ movdqa %xmm5,%xmm1
+ movups %xmm2,(%edi,%esi,1)
+ movups %xmm3,16(%edi,%esi,1)
+ jmp L078done
+.align 4,0x90
+L081three:
+ leal 1(%ebp),%ecx
+ movl 112(%esp),%edx
+ bsfl %ecx,%ecx
+ shll $4,%ecx
+ movdqu (%ebx),%xmm5
+ movdqu (%ebx,%ecx,1),%xmm6
+ movdqa %xmm5,%xmm7
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movdqu 32(%esi),%xmm4
+ movl 240(%edx),%ecx
+ pxor %xmm0,%xmm5
+ pxor %xmm5,%xmm6
+ pxor %xmm6,%xmm7
+ pxor %xmm2,%xmm1
+ pxor %xmm5,%xmm2
+ pxor %xmm3,%xmm1
+ pxor %xmm6,%xmm3
+ pxor %xmm4,%xmm1
+ pxor %xmm7,%xmm4
+ movdqa %xmm1,96(%esp)
+ movl 120(%esp),%edi
+ call __aesni_encrypt3
+ xorps %xmm5,%xmm2
+ xorps %xmm6,%xmm3
+ xorps %xmm7,%xmm4
+ movdqa %xmm7,%xmm0
+ movdqa 96(%esp),%xmm1
+ movups %xmm2,(%edi,%esi,1)
+ movups %xmm3,16(%edi,%esi,1)
+ movups %xmm4,32(%edi,%esi,1)
+ jmp L078done
+.align 4,0x90
+L082four:
+ leal 1(%ebp),%ecx
+ leal 3(%ebp),%eax
+ bsfl %ecx,%ecx
+ bsfl %eax,%eax
+ movl 112(%esp),%edx
+ shll $4,%ecx
+ shll $4,%eax
+ movdqu (%ebx),%xmm4
+ movdqu (%ebx,%ecx,1),%xmm5
+ movdqa %xmm4,%xmm6
+ movdqu (%ebx,%eax,1),%xmm7
+ pxor %xmm0,%xmm4
+ movdqu (%esi),%xmm2
+ pxor %xmm4,%xmm5
+ movdqu 16(%esi),%xmm3
+ pxor %xmm5,%xmm6
+ movdqa %xmm4,(%esp)
+ pxor %xmm6,%xmm7
+ movdqa %xmm5,16(%esp)
+ movdqu 32(%esi),%xmm4
+ movdqu 48(%esi),%xmm5
+ movl 240(%edx),%ecx
+ pxor %xmm2,%xmm1
+ pxor (%esp),%xmm2
+ pxor %xmm3,%xmm1
+ pxor 16(%esp),%xmm3
+ pxor %xmm4,%xmm1
+ pxor %xmm6,%xmm4
+ pxor %xmm5,%xmm1
+ pxor %xmm7,%xmm5
+ movdqa %xmm1,96(%esp)
+ movl 120(%esp),%edi
+ call __aesni_encrypt4
+ xorps (%esp),%xmm2
+ xorps 16(%esp),%xmm3
+ xorps %xmm6,%xmm4
+ movups %xmm2,(%edi,%esi,1)
+ xorps %xmm7,%xmm5
+ movups %xmm3,16(%edi,%esi,1)
+ movdqa %xmm7,%xmm0
+ movups %xmm4,32(%edi,%esi,1)
+ movdqa 96(%esp),%xmm1
+ movups %xmm5,48(%edi,%esi,1)
+L078done:
+ movl 128(%esp),%edx
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ movdqa %xmm2,(%esp)
+ pxor %xmm4,%xmm4
+ movdqa %xmm2,16(%esp)
+ pxor %xmm5,%xmm5
+ movdqa %xmm2,32(%esp)
+ pxor %xmm6,%xmm6
+ movdqa %xmm2,48(%esp)
+ pxor %xmm7,%xmm7
+ movdqa %xmm2,64(%esp)
+ movdqa %xmm2,80(%esp)
+ movdqa %xmm2,96(%esp)
+ leal (%edx),%esp
+ movl 40(%esp),%ecx
+ movl 48(%esp),%ebx
+ movdqu %xmm0,(%ecx)
+ pxor %xmm0,%xmm0
+ movdqu %xmm1,(%ebx)
+ pxor %xmm1,%xmm1
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.globl _aesni_ocb_decrypt
+.align 4
+_aesni_ocb_decrypt:
+L_aesni_ocb_decrypt_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 40(%esp),%ecx
+ movl 48(%esp),%ebx
+ movl 20(%esp),%esi
+ movl 24(%esp),%edi
+ movl 28(%esp),%eax
+ movl 32(%esp),%edx
+ movdqu (%ecx),%xmm0
+ movl 36(%esp),%ebp
+ movdqu (%ebx),%xmm1
+ movl 44(%esp),%ebx
+ movl %esp,%ecx
+ subl $132,%esp
+ andl $-16,%esp
+ subl %esi,%edi
+ shll $4,%eax
+ leal -96(%esi,%eax,1),%eax
+ movl %edi,120(%esp)
+ movl %eax,124(%esp)
+ movl %ecx,128(%esp)
+ movl 240(%edx),%ecx
+ testl $1,%ebp
+ jnz L084odd
+ bsfl %ebp,%eax
+ addl $1,%ebp
+ shll $4,%eax
+ movdqu (%ebx,%eax,1),%xmm7
+ movl %edx,%eax
+ movdqu (%esi),%xmm2
+ leal 16(%esi),%esi
+ pxor %xmm0,%xmm7
+ pxor %xmm7,%xmm2
+ movdqa %xmm1,%xmm6
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+L085dec1_loop_17:
+.byte 102,15,56,222,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz L085dec1_loop_17
+.byte 102,15,56,223,209
+ xorps %xmm7,%xmm2
+ movaps %xmm6,%xmm1
+ movdqa %xmm7,%xmm0
+ xorps %xmm2,%xmm1
+ movups %xmm2,-16(%edi,%esi,1)
+ movl 240(%eax),%ecx
+ movl %eax,%edx
+ movl 124(%esp),%eax
+L084odd:
+ shll $4,%ecx
+ movl $16,%edi
+ subl %ecx,%edi
+ movl %edx,112(%esp)
+ leal 32(%edx,%ecx,1),%edx
+ movl %edi,116(%esp)
+ cmpl %eax,%esi
+ ja L086short
+ jmp L087grandloop
+.align 5,0x90
+L087grandloop:
+ leal 1(%ebp),%ecx
+ leal 3(%ebp),%eax
+ leal 5(%ebp),%edi
+ addl $6,%ebp
+ bsfl %ecx,%ecx
+ bsfl %eax,%eax
+ bsfl %edi,%edi
+ shll $4,%ecx
+ shll $4,%eax
+ shll $4,%edi
+ movdqu (%ebx),%xmm2
+ movdqu (%ebx,%ecx,1),%xmm3
+ movl 116(%esp),%ecx
+ movdqa %xmm2,%xmm4
+ movdqu (%ebx,%eax,1),%xmm5
+ movdqa %xmm2,%xmm6
+ movdqu (%ebx,%edi,1),%xmm7
+ pxor %xmm0,%xmm2
+ pxor %xmm2,%xmm3
+ movdqa %xmm2,(%esp)
+ pxor %xmm3,%xmm4
+ movdqa %xmm3,16(%esp)
+ pxor %xmm4,%xmm5
+ movdqa %xmm4,32(%esp)
+ pxor %xmm5,%xmm6
+ movdqa %xmm5,48(%esp)
+ pxor %xmm6,%xmm7
+ movdqa %xmm6,64(%esp)
+ movdqa %xmm7,80(%esp)
+ movups -48(%edx,%ecx,1),%xmm0
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movdqu 32(%esi),%xmm4
+ movdqu 48(%esi),%xmm5
+ movdqu 64(%esi),%xmm6
+ movdqu 80(%esi),%xmm7
+ leal 96(%esi),%esi
+ movdqa %xmm1,96(%esp)
+ pxor %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ pxor %xmm0,%xmm4
+ pxor %xmm0,%xmm5
+ pxor %xmm0,%xmm6
+ pxor %xmm0,%xmm7
+ movups -32(%edx,%ecx,1),%xmm1
+ pxor (%esp),%xmm2
+ pxor 16(%esp),%xmm3
+ pxor 32(%esp),%xmm4
+ pxor 48(%esp),%xmm5
+ pxor 64(%esp),%xmm6
+ pxor 80(%esp),%xmm7
+ movups -16(%edx,%ecx,1),%xmm0
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+.byte 102,15,56,222,241
+.byte 102,15,56,222,249
+ movl 120(%esp),%edi
+ movl 124(%esp),%eax
+ call L_aesni_decrypt6_enter
+ movdqa 80(%esp),%xmm0
+ pxor (%esp),%xmm2
+ movdqa 96(%esp),%xmm1
+ pxor 16(%esp),%xmm3
+ pxor 32(%esp),%xmm4
+ pxor 48(%esp),%xmm5
+ pxor 64(%esp),%xmm6
+ pxor %xmm0,%xmm7
+ pxor %xmm2,%xmm1
+ movdqu %xmm2,-96(%edi,%esi,1)
+ pxor %xmm3,%xmm1
+ movdqu %xmm3,-80(%edi,%esi,1)
+ pxor %xmm4,%xmm1
+ movdqu %xmm4,-64(%edi,%esi,1)
+ pxor %xmm5,%xmm1
+ movdqu %xmm5,-48(%edi,%esi,1)
+ pxor %xmm6,%xmm1
+ movdqu %xmm6,-32(%edi,%esi,1)
+ pxor %xmm7,%xmm1
+ movdqu %xmm7,-16(%edi,%esi,1)
+ cmpl %eax,%esi
+ jb L087grandloop
+L086short:
+ addl $96,%eax
+ subl %esi,%eax
+ jz L088done
+ cmpl $32,%eax
+ jb L089one
+ je L090two
+ cmpl $64,%eax
+ jb L091three
+ je L092four
+ leal 1(%ebp),%ecx
+ leal 3(%ebp),%eax
+ bsfl %ecx,%ecx
+ bsfl %eax,%eax
+ shll $4,%ecx
+ shll $4,%eax
+ movdqu (%ebx),%xmm2
+ movdqu (%ebx,%ecx,1),%xmm3
+ movl 116(%esp),%ecx
+ movdqa %xmm2,%xmm4
+ movdqu (%ebx,%eax,1),%xmm5
+ movdqa %xmm2,%xmm6
+ pxor %xmm0,%xmm2
+ pxor %xmm2,%xmm3
+ movdqa %xmm2,(%esp)
+ pxor %xmm3,%xmm4
+ movdqa %xmm3,16(%esp)
+ pxor %xmm4,%xmm5
+ movdqa %xmm4,32(%esp)
+ pxor %xmm5,%xmm6
+ movdqa %xmm5,48(%esp)
+ pxor %xmm6,%xmm7
+ movdqa %xmm6,64(%esp)
+ movups -48(%edx,%ecx,1),%xmm0
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movdqu 32(%esi),%xmm4
+ movdqu 48(%esi),%xmm5
+ movdqu 64(%esi),%xmm6
+ pxor %xmm7,%xmm7
+ movdqa %xmm1,96(%esp)
+ pxor %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ pxor %xmm0,%xmm4
+ pxor %xmm0,%xmm5
+ pxor %xmm0,%xmm6
+ movups -32(%edx,%ecx,1),%xmm1
+ pxor (%esp),%xmm2
+ pxor 16(%esp),%xmm3
+ pxor 32(%esp),%xmm4
+ pxor 48(%esp),%xmm5
+ pxor 64(%esp),%xmm6
+ movups -16(%edx,%ecx,1),%xmm0
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+.byte 102,15,56,222,241
+.byte 102,15,56,222,249
+ movl 120(%esp),%edi
+ call L_aesni_decrypt6_enter
+ movdqa 64(%esp),%xmm0
+ pxor (%esp),%xmm2
+ movdqa 96(%esp),%xmm1
+ pxor 16(%esp),%xmm3
+ pxor 32(%esp),%xmm4
+ pxor 48(%esp),%xmm5
+ pxor %xmm0,%xmm6
+ pxor %xmm2,%xmm1
+ movdqu %xmm2,(%edi,%esi,1)
+ pxor %xmm3,%xmm1
+ movdqu %xmm3,16(%edi,%esi,1)
+ pxor %xmm4,%xmm1
+ movdqu %xmm4,32(%edi,%esi,1)
+ pxor %xmm5,%xmm1
+ movdqu %xmm5,48(%edi,%esi,1)
+ pxor %xmm6,%xmm1
+ movdqu %xmm6,64(%edi,%esi,1)
+ jmp L088done
+.align 4,0x90
+L089one:
+ movdqu (%ebx),%xmm7
+ movl 112(%esp),%edx
+ movdqu (%esi),%xmm2
+ movl 240(%edx),%ecx
+ pxor %xmm0,%xmm7
+ pxor %xmm7,%xmm2
+ movdqa %xmm1,%xmm6
+ movl 120(%esp),%edi
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+L093dec1_loop_18:
+.byte 102,15,56,222,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz L093dec1_loop_18
+.byte 102,15,56,223,209
+ xorps %xmm7,%xmm2
+ movaps %xmm6,%xmm1
+ movdqa %xmm7,%xmm0
+ xorps %xmm2,%xmm1
+ movups %xmm2,(%edi,%esi,1)
+ jmp L088done
+.align 4,0x90
+L090two:
+ leal 1(%ebp),%ecx
+ movl 112(%esp),%edx
+ bsfl %ecx,%ecx
+ shll $4,%ecx
+ movdqu (%ebx),%xmm6
+ movdqu (%ebx,%ecx,1),%xmm7
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movl 240(%edx),%ecx
+ movdqa %xmm1,%xmm5
+ pxor %xmm0,%xmm6
+ pxor %xmm6,%xmm7
+ pxor %xmm6,%xmm2
+ pxor %xmm7,%xmm3
+ movl 120(%esp),%edi
+ call __aesni_decrypt2
+ xorps %xmm6,%xmm2
+ xorps %xmm7,%xmm3
+ movdqa %xmm7,%xmm0
+ xorps %xmm2,%xmm5
+ movups %xmm2,(%edi,%esi,1)
+ xorps %xmm3,%xmm5
+ movups %xmm3,16(%edi,%esi,1)
+ movaps %xmm5,%xmm1
+ jmp L088done
+.align 4,0x90
+L091three:
+ leal 1(%ebp),%ecx
+ movl 112(%esp),%edx
+ bsfl %ecx,%ecx
+ shll $4,%ecx
+ movdqu (%ebx),%xmm5
+ movdqu (%ebx,%ecx,1),%xmm6
+ movdqa %xmm5,%xmm7
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movdqu 32(%esi),%xmm4
+ movl 240(%edx),%ecx
+ movdqa %xmm1,96(%esp)
+ pxor %xmm0,%xmm5
+ pxor %xmm5,%xmm6
+ pxor %xmm6,%xmm7
+ pxor %xmm5,%xmm2
+ pxor %xmm6,%xmm3
+ pxor %xmm7,%xmm4
+ movl 120(%esp),%edi
+ call __aesni_decrypt3
+ movdqa 96(%esp),%xmm1
+ xorps %xmm5,%xmm2
+ xorps %xmm6,%xmm3
+ xorps %xmm7,%xmm4
+ movups %xmm2,(%edi,%esi,1)
+ pxor %xmm2,%xmm1
+ movdqa %xmm7,%xmm0
+ movups %xmm3,16(%edi,%esi,1)
+ pxor %xmm3,%xmm1
+ movups %xmm4,32(%edi,%esi,1)
+ pxor %xmm4,%xmm1
+ jmp L088done
+.align 4,0x90
+L092four:
+ leal 1(%ebp),%ecx
+ leal 3(%ebp),%eax
+ bsfl %ecx,%ecx
+ bsfl %eax,%eax
+ movl 112(%esp),%edx
+ shll $4,%ecx
+ shll $4,%eax
+ movdqu (%ebx),%xmm4
+ movdqu (%ebx,%ecx,1),%xmm5
+ movdqa %xmm4,%xmm6
+ movdqu (%ebx,%eax,1),%xmm7
+ pxor %xmm0,%xmm4
+ movdqu (%esi),%xmm2
+ pxor %xmm4,%xmm5
+ movdqu 16(%esi),%xmm3
+ pxor %xmm5,%xmm6
+ movdqa %xmm4,(%esp)
+ pxor %xmm6,%xmm7
+ movdqa %xmm5,16(%esp)
+ movdqu 32(%esi),%xmm4
+ movdqu 48(%esi),%xmm5
+ movl 240(%edx),%ecx
+ movdqa %xmm1,96(%esp)
+ pxor (%esp),%xmm2
+ pxor 16(%esp),%xmm3
+ pxor %xmm6,%xmm4
+ pxor %xmm7,%xmm5
+ movl 120(%esp),%edi
+ call __aesni_decrypt4
+ movdqa 96(%esp),%xmm1
+ xorps (%esp),%xmm2
+ xorps 16(%esp),%xmm3
+ xorps %xmm6,%xmm4
+ movups %xmm2,(%edi,%esi,1)
+ pxor %xmm2,%xmm1
+ xorps %xmm7,%xmm5
+ movups %xmm3,16(%edi,%esi,1)
+ pxor %xmm3,%xmm1
+ movdqa %xmm7,%xmm0
+ movups %xmm4,32(%edi,%esi,1)
+ pxor %xmm4,%xmm1
+ movups %xmm5,48(%edi,%esi,1)
+ pxor %xmm5,%xmm1
+L088done:
+ movl 128(%esp),%edx
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ movdqa %xmm2,(%esp)
+ pxor %xmm4,%xmm4
+ movdqa %xmm2,16(%esp)
+ pxor %xmm5,%xmm5
+ movdqa %xmm2,32(%esp)
+ pxor %xmm6,%xmm6
+ movdqa %xmm2,48(%esp)
+ pxor %xmm7,%xmm7
+ movdqa %xmm2,64(%esp)
+ movdqa %xmm2,80(%esp)
+ movdqa %xmm2,96(%esp)
+ leal (%edx),%esp
+ movl 40(%esp),%ecx
+ movl 48(%esp),%ebx
+ movdqu %xmm0,(%ecx)
+ pxor %xmm0,%xmm0
+ movdqu %xmm1,(%ebx)
+ pxor %xmm1,%xmm1
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.globl _aesni_cbc_encrypt
+.align 4
+_aesni_cbc_encrypt:
+L_aesni_cbc_encrypt_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 20(%esp),%esi
+ movl %esp,%ebx
+ movl 24(%esp),%edi
+ subl $24,%ebx
+ movl 28(%esp),%eax
+ andl $-16,%ebx
+ movl 32(%esp),%edx
+ movl 36(%esp),%ebp
+ testl %eax,%eax
+ jz L094cbc_abort
+ cmpl $0,40(%esp)
+ xchgl %esp,%ebx
+ movups (%ebp),%xmm7
+ movl 240(%edx),%ecx
+ movl %edx,%ebp
+ movl %ebx,16(%esp)
+ movl %ecx,%ebx
+ je L095cbc_decrypt
+ movaps %xmm7,%xmm2
+ cmpl $16,%eax
+ jb L096cbc_enc_tail
+ subl $16,%eax
+ jmp L097cbc_enc_loop
+.align 4,0x90
+L097cbc_enc_loop:
+ movups (%esi),%xmm7
+ leal 16(%esi),%esi
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ xorps %xmm0,%xmm7
+ leal 32(%edx),%edx
+ xorps %xmm7,%xmm2
+L098enc1_loop_19:
+.byte 102,15,56,220,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz L098enc1_loop_19
+.byte 102,15,56,221,209
+ movl %ebx,%ecx
+ movl %ebp,%edx
+ movups %xmm2,(%edi)
+ leal 16(%edi),%edi
+ subl $16,%eax
+ jnc L097cbc_enc_loop
+ addl $16,%eax
+ jnz L096cbc_enc_tail
+ movaps %xmm2,%xmm7
+ pxor %xmm2,%xmm2
+ jmp L099cbc_ret
+L096cbc_enc_tail:
+ movl %eax,%ecx
+.long 2767451785
+ movl $16,%ecx
+ subl %eax,%ecx
+ xorl %eax,%eax
+.long 2868115081
+ leal -16(%edi),%edi
+ movl %ebx,%ecx
+ movl %edi,%esi
+ movl %ebp,%edx
+ jmp L097cbc_enc_loop
+.align 4,0x90
+L095cbc_decrypt:
+ cmpl $80,%eax
+ jbe L100cbc_dec_tail
+ movaps %xmm7,(%esp)
+ subl $80,%eax
+ jmp L101cbc_dec_loop6_enter
+.align 4,0x90
+L102cbc_dec_loop6:
+ movaps %xmm0,(%esp)
+ movups %xmm7,(%edi)
+ leal 16(%edi),%edi
+L101cbc_dec_loop6_enter:
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movdqu 32(%esi),%xmm4
+ movdqu 48(%esi),%xmm5
+ movdqu 64(%esi),%xmm6
+ movdqu 80(%esi),%xmm7
+ call __aesni_decrypt6
+ movups (%esi),%xmm1
+ movups 16(%esi),%xmm0
+ xorps (%esp),%xmm2
+ xorps %xmm1,%xmm3
+ movups 32(%esi),%xmm1
+ xorps %xmm0,%xmm4
+ movups 48(%esi),%xmm0
+ xorps %xmm1,%xmm5
+ movups 64(%esi),%xmm1
+ xorps %xmm0,%xmm6
+ movups 80(%esi),%xmm0
+ xorps %xmm1,%xmm7
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ leal 96(%esi),%esi
+ movups %xmm4,32(%edi)
+ movl %ebx,%ecx
+ movups %xmm5,48(%edi)
+ movl %ebp,%edx
+ movups %xmm6,64(%edi)
+ leal 80(%edi),%edi
+ subl $96,%eax
+ ja L102cbc_dec_loop6
+ movaps %xmm7,%xmm2
+ movaps %xmm0,%xmm7
+ addl $80,%eax
+ jle L103cbc_dec_clear_tail_collected
+ movups %xmm2,(%edi)
+ leal 16(%edi),%edi
+L100cbc_dec_tail:
+ movups (%esi),%xmm2
+ movaps %xmm2,%xmm6
+ cmpl $16,%eax
+ jbe L104cbc_dec_one
+ movups 16(%esi),%xmm3
+ movaps %xmm3,%xmm5
+ cmpl $32,%eax
+ jbe L105cbc_dec_two
+ movups 32(%esi),%xmm4
+ cmpl $48,%eax
+ jbe L106cbc_dec_three
+ movups 48(%esi),%xmm5
+ cmpl $64,%eax
+ jbe L107cbc_dec_four
+ movups 64(%esi),%xmm6
+ movaps %xmm7,(%esp)
+ movups (%esi),%xmm2
+ xorps %xmm7,%xmm7
+ call __aesni_decrypt6
+ movups (%esi),%xmm1
+ movups 16(%esi),%xmm0
+ xorps (%esp),%xmm2
+ xorps %xmm1,%xmm3
+ movups 32(%esi),%xmm1
+ xorps %xmm0,%xmm4
+ movups 48(%esi),%xmm0
+ xorps %xmm1,%xmm5
+ movups 64(%esi),%xmm7
+ xorps %xmm0,%xmm6
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ pxor %xmm3,%xmm3
+ movups %xmm4,32(%edi)
+ pxor %xmm4,%xmm4
+ movups %xmm5,48(%edi)
+ pxor %xmm5,%xmm5
+ leal 64(%edi),%edi
+ movaps %xmm6,%xmm2
+ pxor %xmm6,%xmm6
+ subl $80,%eax
+ jmp L108cbc_dec_tail_collected
+.align 4,0x90
+L104cbc_dec_one:
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+L109dec1_loop_20:
+.byte 102,15,56,222,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz L109dec1_loop_20
+.byte 102,15,56,223,209
+ xorps %xmm7,%xmm2
+ movaps %xmm6,%xmm7
+ subl $16,%eax
+ jmp L108cbc_dec_tail_collected
+.align 4,0x90
+L105cbc_dec_two:
+ call __aesni_decrypt2
+ xorps %xmm7,%xmm2
+ xorps %xmm6,%xmm3
+ movups %xmm2,(%edi)
+ movaps %xmm3,%xmm2
+ pxor %xmm3,%xmm3
+ leal 16(%edi),%edi
+ movaps %xmm5,%xmm7
+ subl $32,%eax
+ jmp L108cbc_dec_tail_collected
+.align 4,0x90
+L106cbc_dec_three:
+ call __aesni_decrypt3
+ xorps %xmm7,%xmm2
+ xorps %xmm6,%xmm3
+ xorps %xmm5,%xmm4
+ movups %xmm2,(%edi)
+ movaps %xmm4,%xmm2
+ pxor %xmm4,%xmm4
+ movups %xmm3,16(%edi)
+ pxor %xmm3,%xmm3
+ leal 32(%edi),%edi
+ movups 32(%esi),%xmm7
+ subl $48,%eax
+ jmp L108cbc_dec_tail_collected
+.align 4,0x90
+L107cbc_dec_four:
+ call __aesni_decrypt4
+ movups 16(%esi),%xmm1
+ movups 32(%esi),%xmm0
+ xorps %xmm7,%xmm2
+ movups 48(%esi),%xmm7
+ xorps %xmm6,%xmm3
+ movups %xmm2,(%edi)
+ xorps %xmm1,%xmm4
+ movups %xmm3,16(%edi)
+ pxor %xmm3,%xmm3
+ xorps %xmm0,%xmm5
+ movups %xmm4,32(%edi)
+ pxor %xmm4,%xmm4
+ leal 48(%edi),%edi
+ movaps %xmm5,%xmm2
+ pxor %xmm5,%xmm5
+ subl $64,%eax
+ jmp L108cbc_dec_tail_collected
+.align 4,0x90
+L103cbc_dec_clear_tail_collected:
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+L108cbc_dec_tail_collected:
+ andl $15,%eax
+ jnz L110cbc_dec_tail_partial
+ movups %xmm2,(%edi)
+ pxor %xmm0,%xmm0
+ jmp L099cbc_ret
+.align 4,0x90
+L110cbc_dec_tail_partial:
+ movaps %xmm2,(%esp)
+ pxor %xmm0,%xmm0
+ movl $16,%ecx
+ movl %esp,%esi
+ subl %eax,%ecx
+.long 2767451785
+ movdqa %xmm2,(%esp)
+L099cbc_ret:
+ movl 16(%esp),%esp
+ movl 36(%esp),%ebp
+ pxor %xmm2,%xmm2
+ pxor %xmm1,%xmm1
+ movups %xmm7,(%ebp)
+ pxor %xmm7,%xmm7
+L094cbc_abort:
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.align 4
+__aesni_set_encrypt_key:
+ pushl %ebp
+ pushl %ebx
+ testl %eax,%eax
+ jz L111bad_pointer
+ testl %edx,%edx
+ jz L111bad_pointer
+ call L112pic
+L112pic:
+ popl %ebx
+ leal Lkey_const-L112pic(%ebx),%ebx
+ movl L_GNUTLS_x86_cpuid_s$non_lazy_ptr-Lkey_const(%ebx),%ebp
+ movups (%eax),%xmm0
+ xorps %xmm4,%xmm4
+ movl 4(%ebp),%ebp
+ leal 16(%edx),%edx
+ andl $268437504,%ebp
+ cmpl $256,%ecx
+ je L11314rounds
+ cmpl $192,%ecx
+ je L11412rounds
+ cmpl $128,%ecx
+ jne L115bad_keybits
+.align 4,0x90
+L11610rounds:
+ cmpl $268435456,%ebp
+ je L11710rounds_alt
+ movl $9,%ecx
+ movups %xmm0,-16(%edx)
+.byte 102,15,58,223,200,1
+ call L118key_128_cold
+.byte 102,15,58,223,200,2
+ call L119key_128
+.byte 102,15,58,223,200,4
+ call L119key_128
+.byte 102,15,58,223,200,8
+ call L119key_128
+.byte 102,15,58,223,200,16
+ call L119key_128
+.byte 102,15,58,223,200,32
+ call L119key_128
+.byte 102,15,58,223,200,64
+ call L119key_128
+.byte 102,15,58,223,200,128
+ call L119key_128
+.byte 102,15,58,223,200,27
+ call L119key_128
+.byte 102,15,58,223,200,54
+ call L119key_128
+ movups %xmm0,(%edx)
+ movl %ecx,80(%edx)
+ jmp L120good_key
+.align 4,0x90
+L119key_128:
+ movups %xmm0,(%edx)
+ leal 16(%edx),%edx
+L118key_128_cold:
+ shufps $16,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $255,%xmm1,%xmm1
+ xorps %xmm1,%xmm0
+ ret
+.align 4,0x90
+L11710rounds_alt:
+ movdqa (%ebx),%xmm5
+ movl $8,%ecx
+ movdqa 32(%ebx),%xmm4
+ movdqa %xmm0,%xmm2
+ movdqu %xmm0,-16(%edx)
+L121loop_key128:
+.byte 102,15,56,0,197
+.byte 102,15,56,221,196
+ pslld $1,%xmm4
+ leal 16(%edx),%edx
+ movdqa %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm3,%xmm2
+ pxor %xmm2,%xmm0
+ movdqu %xmm0,-16(%edx)
+ movdqa %xmm0,%xmm2
+ decl %ecx
+ jnz L121loop_key128
+ movdqa 48(%ebx),%xmm4
+.byte 102,15,56,0,197
+.byte 102,15,56,221,196
+ pslld $1,%xmm4
+ movdqa %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm3,%xmm2
+ pxor %xmm2,%xmm0
+ movdqu %xmm0,(%edx)
+ movdqa %xmm0,%xmm2
+.byte 102,15,56,0,197
+.byte 102,15,56,221,196
+ movdqa %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm3,%xmm2
+ pxor %xmm2,%xmm0
+ movdqu %xmm0,16(%edx)
+ movl $9,%ecx
+ movl %ecx,96(%edx)
+ jmp L120good_key
+.align 4,0x90
+L11412rounds:
+ movq 16(%eax),%xmm2
+ cmpl $268435456,%ebp
+ je L12212rounds_alt
+ movl $11,%ecx
+ movups %xmm0,-16(%edx)
+.byte 102,15,58,223,202,1
+ call L123key_192a_cold
+.byte 102,15,58,223,202,2
+ call L124key_192b
+.byte 102,15,58,223,202,4
+ call L125key_192a
+.byte 102,15,58,223,202,8
+ call L124key_192b
+.byte 102,15,58,223,202,16
+ call L125key_192a
+.byte 102,15,58,223,202,32
+ call L124key_192b
+.byte 102,15,58,223,202,64
+ call L125key_192a
+.byte 102,15,58,223,202,128
+ call L124key_192b
+ movups %xmm0,(%edx)
+ movl %ecx,48(%edx)
+ jmp L120good_key
+.align 4,0x90
+L125key_192a:
+ movups %xmm0,(%edx)
+ leal 16(%edx),%edx
+.align 4,0x90
+L123key_192a_cold:
+ movaps %xmm2,%xmm5
+L126key_192b_warm:
+ shufps $16,%xmm0,%xmm4
+ movdqa %xmm2,%xmm3
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ pslldq $4,%xmm3
+ xorps %xmm4,%xmm0
+ pshufd $85,%xmm1,%xmm1
+ pxor %xmm3,%xmm2
+ pxor %xmm1,%xmm0
+ pshufd $255,%xmm0,%xmm3
+ pxor %xmm3,%xmm2
+ ret
+.align 4,0x90
+L124key_192b:
+ movaps %xmm0,%xmm3
+ shufps $68,%xmm0,%xmm5
+ movups %xmm5,(%edx)
+ shufps $78,%xmm2,%xmm3
+ movups %xmm3,16(%edx)
+ leal 32(%edx),%edx
+ jmp L126key_192b_warm
+.align 4,0x90
+L12212rounds_alt:
+ movdqa 16(%ebx),%xmm5
+ movdqa 32(%ebx),%xmm4
+ movl $8,%ecx
+ movdqu %xmm0,-16(%edx)
+L127loop_key192:
+ movq %xmm2,(%edx)
+ movdqa %xmm2,%xmm1
+.byte 102,15,56,0,213
+.byte 102,15,56,221,212
+ pslld $1,%xmm4
+ leal 24(%edx),%edx
+ movdqa %xmm0,%xmm3
+ pslldq $4,%xmm0
+ pxor %xmm0,%xmm3
+ pslldq $4,%xmm0
+ pxor %xmm0,%xmm3
+ pslldq $4,%xmm0
+ pxor %xmm3,%xmm0
+ pshufd $255,%xmm0,%xmm3
+ pxor %xmm1,%xmm3
+ pslldq $4,%xmm1
+ pxor %xmm1,%xmm3
+ pxor %xmm2,%xmm0
+ pxor %xmm3,%xmm2
+ movdqu %xmm0,-16(%edx)
+ decl %ecx
+ jnz L127loop_key192
+ movl $11,%ecx
+ movl %ecx,32(%edx)
+ jmp L120good_key
+.align 4,0x90
+L11314rounds:
+ movups 16(%eax),%xmm2
+ leal 16(%edx),%edx
+ cmpl $268435456,%ebp
+ je L12814rounds_alt
+ movl $13,%ecx
+ movups %xmm0,-32(%edx)
+ movups %xmm2,-16(%edx)
+.byte 102,15,58,223,202,1
+ call L129key_256a_cold
+.byte 102,15,58,223,200,1
+ call L130key_256b
+.byte 102,15,58,223,202,2
+ call L131key_256a
+.byte 102,15,58,223,200,2
+ call L130key_256b
+.byte 102,15,58,223,202,4
+ call L131key_256a
+.byte 102,15,58,223,200,4
+ call L130key_256b
+.byte 102,15,58,223,202,8
+ call L131key_256a
+.byte 102,15,58,223,200,8
+ call L130key_256b
+.byte 102,15,58,223,202,16
+ call L131key_256a
+.byte 102,15,58,223,200,16
+ call L130key_256b
+.byte 102,15,58,223,202,32
+ call L131key_256a
+.byte 102,15,58,223,200,32
+ call L130key_256b
+.byte 102,15,58,223,202,64
+ call L131key_256a
+ movups %xmm0,(%edx)
+ movl %ecx,16(%edx)
+ xorl %eax,%eax
+ jmp L120good_key
+.align 4,0x90
+L131key_256a:
+ movups %xmm2,(%edx)
+ leal 16(%edx),%edx
+L129key_256a_cold:
+ shufps $16,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $255,%xmm1,%xmm1
+ xorps %xmm1,%xmm0
+ ret
+.align 4,0x90
+L130key_256b:
+ movups %xmm0,(%edx)
+ leal 16(%edx),%edx
+ shufps $16,%xmm2,%xmm4
+ xorps %xmm4,%xmm2
+ shufps $140,%xmm2,%xmm4
+ xorps %xmm4,%xmm2
+ shufps $170,%xmm1,%xmm1
+ xorps %xmm1,%xmm2
+ ret
+.align 4,0x90
+L12814rounds_alt:
+ movdqa (%ebx),%xmm5
+ movdqa 32(%ebx),%xmm4
+ movl $7,%ecx
+ movdqu %xmm0,-32(%edx)
+ movdqa %xmm2,%xmm1
+ movdqu %xmm2,-16(%edx)
+L132loop_key256:
+.byte 102,15,56,0,213
+.byte 102,15,56,221,212
+ movdqa %xmm0,%xmm3
+ pslldq $4,%xmm0
+ pxor %xmm0,%xmm3
+ pslldq $4,%xmm0
+ pxor %xmm0,%xmm3
+ pslldq $4,%xmm0
+ pxor %xmm3,%xmm0
+ pslld $1,%xmm4
+ pxor %xmm2,%xmm0
+ movdqu %xmm0,(%edx)
+ decl %ecx
+ jz L133done_key256
+ pshufd $255,%xmm0,%xmm2
+ pxor %xmm3,%xmm3
+.byte 102,15,56,221,211
+ movdqa %xmm1,%xmm3
+ pslldq $4,%xmm1
+ pxor %xmm1,%xmm3
+ pslldq $4,%xmm1
+ pxor %xmm1,%xmm3
+ pslldq $4,%xmm1
+ pxor %xmm3,%xmm1
+ pxor %xmm1,%xmm2
+ movdqu %xmm2,16(%edx)
+ leal 32(%edx),%edx
+ movdqa %xmm2,%xmm1
+ jmp L132loop_key256
+L133done_key256:
+ movl $13,%ecx
+ movl %ecx,16(%edx)
+L120good_key:
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ xorl %eax,%eax
+ popl %ebx
+ popl %ebp
+ ret
+.align 2,0x90
+L111bad_pointer:
+ movl $-1,%eax
+ popl %ebx
+ popl %ebp
+ ret
+.align 2,0x90
+L115bad_keybits:
+ pxor %xmm0,%xmm0
+ movl $-2,%eax
+ popl %ebx
+ popl %ebp
+ ret
+.globl _aesni_set_encrypt_key
+.align 4
+_aesni_set_encrypt_key:
+L_aesni_set_encrypt_key_begin:
+ movl 4(%esp),%eax
+ movl 8(%esp),%ecx
+ movl 12(%esp),%edx
+ call __aesni_set_encrypt_key
+ ret
+.globl _aesni_set_decrypt_key
+.align 4
+_aesni_set_decrypt_key:
+L_aesni_set_decrypt_key_begin:
+ movl 4(%esp),%eax
+ movl 8(%esp),%ecx
+ movl 12(%esp),%edx
+ call __aesni_set_encrypt_key
+ movl 12(%esp),%edx
+ shll $4,%ecx
+ testl %eax,%eax
+ jnz L134dec_key_ret
+ leal 16(%edx,%ecx,1),%eax
+ movups (%edx),%xmm0
+ movups (%eax),%xmm1
+ movups %xmm0,(%eax)
+ movups %xmm1,(%edx)
+ leal 16(%edx),%edx
+ leal -16(%eax),%eax
+L135dec_key_inverse:
+ movups (%edx),%xmm0
+ movups (%eax),%xmm1
+.byte 102,15,56,219,192
+.byte 102,15,56,219,201
+ leal 16(%edx),%edx
+ leal -16(%eax),%eax
+ movups %xmm0,16(%eax)
+ movups %xmm1,-16(%edx)
+ cmpl %edx,%eax
+ ja L135dec_key_inverse
+ movups (%edx),%xmm0
+.byte 102,15,56,219,192
+ movups %xmm0,(%edx)
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ xorl %eax,%eax
+L134dec_key_ret:
+ ret
+.align 6,0x90
+Lkey_const:
+.long 202313229,202313229,202313229,202313229
+.long 67569157,67569157,67569157,67569157
+.long 1,1,1,1
+.long 27,27,27,27
+.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
+.byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
+.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
+.byte 115,108,46,111,114,103,62,0
+.section __IMPORT,__pointers,non_lazy_symbol_pointers
+L_GNUTLS_x86_cpuid_s$non_lazy_ptr:
+.indirect_symbol _GNUTLS_x86_cpuid_s
+.long 0
+.comm _GNUTLS_x86_cpuid_s,16,2
+