summaryrefslogtreecommitdiffstats
path: root/lib/accelerated/x86/coff/aesni-x86.s
diff options
context:
space:
mode:
Diffstat (limited to 'lib/accelerated/x86/coff/aesni-x86.s')
-rw-r--r--lib/accelerated/x86/coff/aesni-x86.s3255
1 files changed, 3255 insertions, 0 deletions
diff --git a/lib/accelerated/x86/coff/aesni-x86.s b/lib/accelerated/x86/coff/aesni-x86.s
new file mode 100644
index 0000000..92e5431
--- /dev/null
+++ b/lib/accelerated/x86/coff/aesni-x86.s
@@ -0,0 +1,3255 @@
+# Copyright (c) 2011-2016, Andy Polyakov <appro@openssl.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain copyright notices,
+# this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+# * Neither the name of the Andy Polyakov nor the names of its
+# copyright holder and contributors may be used to endorse or
+# promote products derived from this software without specific
+# prior written permission.
+#
+# ALTERNATIVELY, provided that this notice is retained in full, this
+# product may be distributed under the terms of the GNU General Public
+# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+# those given above.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *** This file is auto-generated ***
+#
+.text
+.globl _aesni_encrypt
+.def _aesni_encrypt; .scl 2; .type 32; .endef
+.align 16
+_aesni_encrypt:
+.L_aesni_encrypt_begin:
+ movl 4(%esp),%eax
+ movl 12(%esp),%edx
+ movups (%eax),%xmm2
+ movl 240(%edx),%ecx
+ movl 8(%esp),%eax
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L000enc1_loop_1:
+.byte 102,15,56,220,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L000enc1_loop_1
+.byte 102,15,56,221,209
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ movups %xmm2,(%eax)
+ pxor %xmm2,%xmm2
+ ret
+.globl _aesni_decrypt
+.def _aesni_decrypt; .scl 2; .type 32; .endef
+.align 16
+_aesni_decrypt:
+.L_aesni_decrypt_begin:
+ movl 4(%esp),%eax
+ movl 12(%esp),%edx
+ movups (%eax),%xmm2
+ movl 240(%edx),%ecx
+ movl 8(%esp),%eax
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L001dec1_loop_2:
+.byte 102,15,56,222,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L001dec1_loop_2
+.byte 102,15,56,223,209
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ movups %xmm2,(%eax)
+ pxor %xmm2,%xmm2
+ ret
+.def __aesni_encrypt2; .scl 3; .type 32; .endef
+.align 16
+__aesni_encrypt2:
+ movups (%edx),%xmm0
+ shll $4,%ecx
+ movups 16(%edx),%xmm1
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ movups 32(%edx),%xmm0
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+ addl $16,%ecx
+.L002enc2_loop:
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L002enc2_loop
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,221,208
+.byte 102,15,56,221,216
+ ret
+.def __aesni_decrypt2; .scl 3; .type 32; .endef
+.align 16
+__aesni_decrypt2:
+ movups (%edx),%xmm0
+ shll $4,%ecx
+ movups 16(%edx),%xmm1
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ movups 32(%edx),%xmm0
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+ addl $16,%ecx
+.L003dec2_loop:
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
+.byte 102,15,56,222,208
+.byte 102,15,56,222,216
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L003dec2_loop
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,223,208
+.byte 102,15,56,223,216
+ ret
+.def __aesni_encrypt3; .scl 3; .type 32; .endef
+.align 16
+__aesni_encrypt3:
+ movups (%edx),%xmm0
+ shll $4,%ecx
+ movups 16(%edx),%xmm1
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ pxor %xmm0,%xmm4
+ movups 32(%edx),%xmm0
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+ addl $16,%ecx
+.L004enc3_loop:
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+.byte 102,15,56,220,224
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L004enc3_loop
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+.byte 102,15,56,221,208
+.byte 102,15,56,221,216
+.byte 102,15,56,221,224
+ ret
+.def __aesni_decrypt3; .scl 3; .type 32; .endef
+.align 16
+__aesni_decrypt3:
+ movups (%edx),%xmm0
+ shll $4,%ecx
+ movups 16(%edx),%xmm1
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ pxor %xmm0,%xmm4
+ movups 32(%edx),%xmm0
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+ addl $16,%ecx
+.L005dec3_loop:
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,222,225
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
+.byte 102,15,56,222,208
+.byte 102,15,56,222,216
+.byte 102,15,56,222,224
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L005dec3_loop
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,222,225
+.byte 102,15,56,223,208
+.byte 102,15,56,223,216
+.byte 102,15,56,223,224
+ ret
+.def __aesni_encrypt4; .scl 3; .type 32; .endef
+.align 16
+__aesni_encrypt4:
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ shll $4,%ecx
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ pxor %xmm0,%xmm4
+ pxor %xmm0,%xmm5
+ movups 32(%edx),%xmm0
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+.byte 15,31,64,0
+ addl $16,%ecx
+.L006enc4_loop:
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+.byte 102,15,56,220,224
+.byte 102,15,56,220,232
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L006enc4_loop
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+.byte 102,15,56,221,208
+.byte 102,15,56,221,216
+.byte 102,15,56,221,224
+.byte 102,15,56,221,232
+ ret
+.def __aesni_decrypt4; .scl 3; .type 32; .endef
+.align 16
+__aesni_decrypt4:
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ shll $4,%ecx
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ pxor %xmm0,%xmm4
+ pxor %xmm0,%xmm5
+ movups 32(%edx),%xmm0
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+.byte 15,31,64,0
+ addl $16,%ecx
+.L007dec4_loop:
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
+.byte 102,15,56,222,208
+.byte 102,15,56,222,216
+.byte 102,15,56,222,224
+.byte 102,15,56,222,232
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L007dec4_loop
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+.byte 102,15,56,223,208
+.byte 102,15,56,223,216
+.byte 102,15,56,223,224
+.byte 102,15,56,223,232
+ ret
+.def __aesni_encrypt6; .scl 3; .type 32; .endef
+.align 16
+__aesni_encrypt6:
+ movups (%edx),%xmm0
+ shll $4,%ecx
+ movups 16(%edx),%xmm1
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ pxor %xmm0,%xmm4
+.byte 102,15,56,220,209
+ pxor %xmm0,%xmm5
+ pxor %xmm0,%xmm6
+.byte 102,15,56,220,217
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+.byte 102,15,56,220,225
+ pxor %xmm0,%xmm7
+ movups (%edx,%ecx,1),%xmm0
+ addl $16,%ecx
+ jmp .L008_aesni_encrypt6_inner
+.align 16
+.L009enc6_loop:
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+.L008_aesni_encrypt6_inner:
+.byte 102,15,56,220,233
+.byte 102,15,56,220,241
+.byte 102,15,56,220,249
+.L_aesni_encrypt6_enter:
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+.byte 102,15,56,220,224
+.byte 102,15,56,220,232
+.byte 102,15,56,220,240
+.byte 102,15,56,220,248
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L009enc6_loop
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+.byte 102,15,56,220,241
+.byte 102,15,56,220,249
+.byte 102,15,56,221,208
+.byte 102,15,56,221,216
+.byte 102,15,56,221,224
+.byte 102,15,56,221,232
+.byte 102,15,56,221,240
+.byte 102,15,56,221,248
+ ret
+.def __aesni_decrypt6; .scl 3; .type 32; .endef
+.align 16
+__aesni_decrypt6:
+ movups (%edx),%xmm0
+ shll $4,%ecx
+ movups 16(%edx),%xmm1
+ xorps %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ pxor %xmm0,%xmm4
+.byte 102,15,56,222,209
+ pxor %xmm0,%xmm5
+ pxor %xmm0,%xmm6
+.byte 102,15,56,222,217
+ leal 32(%edx,%ecx,1),%edx
+ negl %ecx
+.byte 102,15,56,222,225
+ pxor %xmm0,%xmm7
+ movups (%edx,%ecx,1),%xmm0
+ addl $16,%ecx
+ jmp .L010_aesni_decrypt6_inner
+.align 16
+.L011dec6_loop:
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,222,225
+.L010_aesni_decrypt6_inner:
+.byte 102,15,56,222,233
+.byte 102,15,56,222,241
+.byte 102,15,56,222,249
+.L_aesni_decrypt6_enter:
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
+.byte 102,15,56,222,208
+.byte 102,15,56,222,216
+.byte 102,15,56,222,224
+.byte 102,15,56,222,232
+.byte 102,15,56,222,240
+.byte 102,15,56,222,248
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L011dec6_loop
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+.byte 102,15,56,222,241
+.byte 102,15,56,222,249
+.byte 102,15,56,223,208
+.byte 102,15,56,223,216
+.byte 102,15,56,223,224
+.byte 102,15,56,223,232
+.byte 102,15,56,223,240
+.byte 102,15,56,223,248
+ ret
+.globl _aesni_ecb_encrypt
+.def _aesni_ecb_encrypt; .scl 2; .type 32; .endef
+.align 16
+_aesni_ecb_encrypt:
+.L_aesni_ecb_encrypt_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 20(%esp),%esi
+ movl 24(%esp),%edi
+ movl 28(%esp),%eax
+ movl 32(%esp),%edx
+ movl 36(%esp),%ebx
+ andl $-16,%eax
+ jz .L012ecb_ret
+ movl 240(%edx),%ecx
+ testl %ebx,%ebx
+ jz .L013ecb_decrypt
+ movl %edx,%ebp
+ movl %ecx,%ebx
+ cmpl $96,%eax
+ jb .L014ecb_enc_tail
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movdqu 32(%esi),%xmm4
+ movdqu 48(%esi),%xmm5
+ movdqu 64(%esi),%xmm6
+ movdqu 80(%esi),%xmm7
+ leal 96(%esi),%esi
+ subl $96,%eax
+ jmp .L015ecb_enc_loop6_enter
+.align 16
+.L016ecb_enc_loop6:
+ movups %xmm2,(%edi)
+ movdqu (%esi),%xmm2
+ movups %xmm3,16(%edi)
+ movdqu 16(%esi),%xmm3
+ movups %xmm4,32(%edi)
+ movdqu 32(%esi),%xmm4
+ movups %xmm5,48(%edi)
+ movdqu 48(%esi),%xmm5
+ movups %xmm6,64(%edi)
+ movdqu 64(%esi),%xmm6
+ movups %xmm7,80(%edi)
+ leal 96(%edi),%edi
+ movdqu 80(%esi),%xmm7
+ leal 96(%esi),%esi
+.L015ecb_enc_loop6_enter:
+ call __aesni_encrypt6
+ movl %ebp,%edx
+ movl %ebx,%ecx
+ subl $96,%eax
+ jnc .L016ecb_enc_loop6
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+ movups %xmm6,64(%edi)
+ movups %xmm7,80(%edi)
+ leal 96(%edi),%edi
+ addl $96,%eax
+ jz .L012ecb_ret
+.L014ecb_enc_tail:
+ movups (%esi),%xmm2
+ cmpl $32,%eax
+ jb .L017ecb_enc_one
+ movups 16(%esi),%xmm3
+ je .L018ecb_enc_two
+ movups 32(%esi),%xmm4
+ cmpl $64,%eax
+ jb .L019ecb_enc_three
+ movups 48(%esi),%xmm5
+ je .L020ecb_enc_four
+ movups 64(%esi),%xmm6
+ xorps %xmm7,%xmm7
+ call __aesni_encrypt6
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+ movups %xmm6,64(%edi)
+ jmp .L012ecb_ret
+.align 16
+.L017ecb_enc_one:
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L021enc1_loop_3:
+.byte 102,15,56,220,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L021enc1_loop_3
+.byte 102,15,56,221,209
+ movups %xmm2,(%edi)
+ jmp .L012ecb_ret
+.align 16
+.L018ecb_enc_two:
+ call __aesni_encrypt2
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ jmp .L012ecb_ret
+.align 16
+.L019ecb_enc_three:
+ call __aesni_encrypt3
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ jmp .L012ecb_ret
+.align 16
+.L020ecb_enc_four:
+ call __aesni_encrypt4
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+ jmp .L012ecb_ret
+.align 16
+.L013ecb_decrypt:
+ movl %edx,%ebp
+ movl %ecx,%ebx
+ cmpl $96,%eax
+ jb .L022ecb_dec_tail
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movdqu 32(%esi),%xmm4
+ movdqu 48(%esi),%xmm5
+ movdqu 64(%esi),%xmm6
+ movdqu 80(%esi),%xmm7
+ leal 96(%esi),%esi
+ subl $96,%eax
+ jmp .L023ecb_dec_loop6_enter
+.align 16
+.L024ecb_dec_loop6:
+ movups %xmm2,(%edi)
+ movdqu (%esi),%xmm2
+ movups %xmm3,16(%edi)
+ movdqu 16(%esi),%xmm3
+ movups %xmm4,32(%edi)
+ movdqu 32(%esi),%xmm4
+ movups %xmm5,48(%edi)
+ movdqu 48(%esi),%xmm5
+ movups %xmm6,64(%edi)
+ movdqu 64(%esi),%xmm6
+ movups %xmm7,80(%edi)
+ leal 96(%edi),%edi
+ movdqu 80(%esi),%xmm7
+ leal 96(%esi),%esi
+.L023ecb_dec_loop6_enter:
+ call __aesni_decrypt6
+ movl %ebp,%edx
+ movl %ebx,%ecx
+ subl $96,%eax
+ jnc .L024ecb_dec_loop6
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+ movups %xmm6,64(%edi)
+ movups %xmm7,80(%edi)
+ leal 96(%edi),%edi
+ addl $96,%eax
+ jz .L012ecb_ret
+.L022ecb_dec_tail:
+ movups (%esi),%xmm2
+ cmpl $32,%eax
+ jb .L025ecb_dec_one
+ movups 16(%esi),%xmm3
+ je .L026ecb_dec_two
+ movups 32(%esi),%xmm4
+ cmpl $64,%eax
+ jb .L027ecb_dec_three
+ movups 48(%esi),%xmm5
+ je .L028ecb_dec_four
+ movups 64(%esi),%xmm6
+ xorps %xmm7,%xmm7
+ call __aesni_decrypt6
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+ movups %xmm6,64(%edi)
+ jmp .L012ecb_ret
+.align 16
+.L025ecb_dec_one:
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L029dec1_loop_4:
+.byte 102,15,56,222,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L029dec1_loop_4
+.byte 102,15,56,223,209
+ movups %xmm2,(%edi)
+ jmp .L012ecb_ret
+.align 16
+.L026ecb_dec_two:
+ call __aesni_decrypt2
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ jmp .L012ecb_ret
+.align 16
+.L027ecb_dec_three:
+ call __aesni_decrypt3
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ jmp .L012ecb_ret
+.align 16
+.L028ecb_dec_four:
+ call __aesni_decrypt4
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+.L012ecb_ret:
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+ pxor %xmm7,%xmm7
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.globl _aesni_ccm64_encrypt_blocks
+.def _aesni_ccm64_encrypt_blocks; .scl 2; .type 32; .endef
+.align 16
+_aesni_ccm64_encrypt_blocks:
+.L_aesni_ccm64_encrypt_blocks_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 20(%esp),%esi
+ movl 24(%esp),%edi
+ movl 28(%esp),%eax
+ movl 32(%esp),%edx
+ movl 36(%esp),%ebx
+ movl 40(%esp),%ecx
+ movl %esp,%ebp
+ subl $60,%esp
+ andl $-16,%esp
+ movl %ebp,48(%esp)
+ movdqu (%ebx),%xmm7
+ movdqu (%ecx),%xmm3
+ movl 240(%edx),%ecx
+ movl $202182159,(%esp)
+ movl $134810123,4(%esp)
+ movl $67438087,8(%esp)
+ movl $66051,12(%esp)
+ movl $1,%ebx
+ xorl %ebp,%ebp
+ movl %ebx,16(%esp)
+ movl %ebp,20(%esp)
+ movl %ebp,24(%esp)
+ movl %ebp,28(%esp)
+ shll $4,%ecx
+ movl $16,%ebx
+ leal (%edx),%ebp
+ movdqa (%esp),%xmm5
+ movdqa %xmm7,%xmm2
+ leal 32(%edx,%ecx,1),%edx
+ subl %ecx,%ebx
+.byte 102,15,56,0,253
+.L030ccm64_enc_outer:
+ movups (%ebp),%xmm0
+ movl %ebx,%ecx
+ movups (%esi),%xmm6
+ xorps %xmm0,%xmm2
+ movups 16(%ebp),%xmm1
+ xorps %xmm6,%xmm0
+ xorps %xmm0,%xmm3
+ movups 32(%ebp),%xmm0
+.L031ccm64_enc2_loop:
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L031ccm64_enc2_loop
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+ paddq 16(%esp),%xmm7
+ decl %eax
+.byte 102,15,56,221,208
+.byte 102,15,56,221,216
+ leal 16(%esi),%esi
+ xorps %xmm2,%xmm6
+ movdqa %xmm7,%xmm2
+ movups %xmm6,(%edi)
+.byte 102,15,56,0,213
+ leal 16(%edi),%edi
+ jnz .L030ccm64_enc_outer
+ movl 48(%esp),%esp
+ movl 40(%esp),%edi
+ movups %xmm3,(%edi)
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+ pxor %xmm7,%xmm7
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.globl _aesni_ccm64_decrypt_blocks
+.def _aesni_ccm64_decrypt_blocks; .scl 2; .type 32; .endef
+.align 16
+_aesni_ccm64_decrypt_blocks:
+.L_aesni_ccm64_decrypt_blocks_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 20(%esp),%esi
+ movl 24(%esp),%edi
+ movl 28(%esp),%eax
+ movl 32(%esp),%edx
+ movl 36(%esp),%ebx
+ movl 40(%esp),%ecx
+ movl %esp,%ebp
+ subl $60,%esp
+ andl $-16,%esp
+ movl %ebp,48(%esp)
+ movdqu (%ebx),%xmm7
+ movdqu (%ecx),%xmm3
+ movl 240(%edx),%ecx
+ movl $202182159,(%esp)
+ movl $134810123,4(%esp)
+ movl $67438087,8(%esp)
+ movl $66051,12(%esp)
+ movl $1,%ebx
+ xorl %ebp,%ebp
+ movl %ebx,16(%esp)
+ movl %ebp,20(%esp)
+ movl %ebp,24(%esp)
+ movl %ebp,28(%esp)
+ movdqa (%esp),%xmm5
+ movdqa %xmm7,%xmm2
+ movl %edx,%ebp
+ movl %ecx,%ebx
+.byte 102,15,56,0,253
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L032enc1_loop_5:
+.byte 102,15,56,220,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L032enc1_loop_5
+.byte 102,15,56,221,209
+ shll $4,%ebx
+ movl $16,%ecx
+ movups (%esi),%xmm6
+ paddq 16(%esp),%xmm7
+ leal 16(%esi),%esi
+ subl %ebx,%ecx
+ leal 32(%ebp,%ebx,1),%edx
+ movl %ecx,%ebx
+ jmp .L033ccm64_dec_outer
+.align 16
+.L033ccm64_dec_outer:
+ xorps %xmm2,%xmm6
+ movdqa %xmm7,%xmm2
+ movups %xmm6,(%edi)
+ leal 16(%edi),%edi
+.byte 102,15,56,0,213
+ subl $1,%eax
+ jz .L034ccm64_dec_break
+ movups (%ebp),%xmm0
+ movl %ebx,%ecx
+ movups 16(%ebp),%xmm1
+ xorps %xmm0,%xmm6
+ xorps %xmm0,%xmm2
+ xorps %xmm6,%xmm3
+ movups 32(%ebp),%xmm0
+.L035ccm64_dec2_loop:
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+ movups (%edx,%ecx,1),%xmm1
+ addl $32,%ecx
+.byte 102,15,56,220,208
+.byte 102,15,56,220,216
+ movups -16(%edx,%ecx,1),%xmm0
+ jnz .L035ccm64_dec2_loop
+ movups (%esi),%xmm6
+ paddq 16(%esp),%xmm7
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,221,208
+.byte 102,15,56,221,216
+ leal 16(%esi),%esi
+ jmp .L033ccm64_dec_outer
+.align 16
+.L034ccm64_dec_break:
+ movl 240(%ebp),%ecx
+ movl %ebp,%edx
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ xorps %xmm0,%xmm6
+ leal 32(%edx),%edx
+ xorps %xmm6,%xmm3
+.L036enc1_loop_6:
+.byte 102,15,56,220,217
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L036enc1_loop_6
+.byte 102,15,56,221,217
+ movl 48(%esp),%esp
+ movl 40(%esp),%edi
+ movups %xmm3,(%edi)
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+ pxor %xmm7,%xmm7
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.globl _aesni_ctr32_encrypt_blocks
+.def _aesni_ctr32_encrypt_blocks; .scl 2; .type 32; .endef
+.align 16
+_aesni_ctr32_encrypt_blocks:
+.L_aesni_ctr32_encrypt_blocks_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 20(%esp),%esi
+ movl 24(%esp),%edi
+ movl 28(%esp),%eax
+ movl 32(%esp),%edx
+ movl 36(%esp),%ebx
+ movl %esp,%ebp
+ subl $88,%esp
+ andl $-16,%esp
+ movl %ebp,80(%esp)
+ cmpl $1,%eax
+ je .L037ctr32_one_shortcut
+ movdqu (%ebx),%xmm7
+ movl $202182159,(%esp)
+ movl $134810123,4(%esp)
+ movl $67438087,8(%esp)
+ movl $66051,12(%esp)
+ movl $6,%ecx
+ xorl %ebp,%ebp
+ movl %ecx,16(%esp)
+ movl %ecx,20(%esp)
+ movl %ecx,24(%esp)
+ movl %ebp,28(%esp)
+.byte 102,15,58,22,251,3
+.byte 102,15,58,34,253,3
+ movl 240(%edx),%ecx
+ bswap %ebx
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ movdqa (%esp),%xmm2
+.byte 102,15,58,34,195,0
+ leal 3(%ebx),%ebp
+.byte 102,15,58,34,205,0
+ incl %ebx
+.byte 102,15,58,34,195,1
+ incl %ebp
+.byte 102,15,58,34,205,1
+ incl %ebx
+.byte 102,15,58,34,195,2
+ incl %ebp
+.byte 102,15,58,34,205,2
+ movdqa %xmm0,48(%esp)
+.byte 102,15,56,0,194
+ movdqu (%edx),%xmm6
+ movdqa %xmm1,64(%esp)
+.byte 102,15,56,0,202
+ pshufd $192,%xmm0,%xmm2
+ pshufd $128,%xmm0,%xmm3
+ cmpl $6,%eax
+ jb .L038ctr32_tail
+ pxor %xmm6,%xmm7
+ shll $4,%ecx
+ movl $16,%ebx
+ movdqa %xmm7,32(%esp)
+ movl %edx,%ebp
+ subl %ecx,%ebx
+ leal 32(%edx,%ecx,1),%edx
+ subl $6,%eax
+ jmp .L039ctr32_loop6
+.align 16
+.L039ctr32_loop6:
+ pshufd $64,%xmm0,%xmm4
+ movdqa 32(%esp),%xmm0
+ pshufd $192,%xmm1,%xmm5
+ pxor %xmm0,%xmm2
+ pshufd $128,%xmm1,%xmm6
+ pxor %xmm0,%xmm3
+ pshufd $64,%xmm1,%xmm7
+ movups 16(%ebp),%xmm1
+ pxor %xmm0,%xmm4
+ pxor %xmm0,%xmm5
+.byte 102,15,56,220,209
+ pxor %xmm0,%xmm6
+ pxor %xmm0,%xmm7
+.byte 102,15,56,220,217
+ movups 32(%ebp),%xmm0
+ movl %ebx,%ecx
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+.byte 102,15,56,220,241
+.byte 102,15,56,220,249
+ call .L_aesni_encrypt6_enter
+ movups (%esi),%xmm1
+ movups 16(%esi),%xmm0
+ xorps %xmm1,%xmm2
+ movups 32(%esi),%xmm1
+ xorps %xmm0,%xmm3
+ movups %xmm2,(%edi)
+ movdqa 16(%esp),%xmm0
+ xorps %xmm1,%xmm4
+ movdqa 64(%esp),%xmm1
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ paddd %xmm0,%xmm1
+ paddd 48(%esp),%xmm0
+ movdqa (%esp),%xmm2
+ movups 48(%esi),%xmm3
+ movups 64(%esi),%xmm4
+ xorps %xmm3,%xmm5
+ movups 80(%esi),%xmm3
+ leal 96(%esi),%esi
+ movdqa %xmm0,48(%esp)
+.byte 102,15,56,0,194
+ xorps %xmm4,%xmm6
+ movups %xmm5,48(%edi)
+ xorps %xmm3,%xmm7
+ movdqa %xmm1,64(%esp)
+.byte 102,15,56,0,202
+ movups %xmm6,64(%edi)
+ pshufd $192,%xmm0,%xmm2
+ movups %xmm7,80(%edi)
+ leal 96(%edi),%edi
+ pshufd $128,%xmm0,%xmm3
+ subl $6,%eax
+ jnc .L039ctr32_loop6
+ addl $6,%eax
+ jz .L040ctr32_ret
+ movdqu (%ebp),%xmm7
+ movl %ebp,%edx
+ pxor 32(%esp),%xmm7
+ movl 240(%ebp),%ecx
+.L038ctr32_tail:
+ por %xmm7,%xmm2
+ cmpl $2,%eax
+ jb .L041ctr32_one
+ pshufd $64,%xmm0,%xmm4
+ por %xmm7,%xmm3
+ je .L042ctr32_two
+ pshufd $192,%xmm1,%xmm5
+ por %xmm7,%xmm4
+ cmpl $4,%eax
+ jb .L043ctr32_three
+ pshufd $128,%xmm1,%xmm6
+ por %xmm7,%xmm5
+ je .L044ctr32_four
+ por %xmm7,%xmm6
+ call __aesni_encrypt6
+ movups (%esi),%xmm1
+ movups 16(%esi),%xmm0
+ xorps %xmm1,%xmm2
+ movups 32(%esi),%xmm1
+ xorps %xmm0,%xmm3
+ movups 48(%esi),%xmm0
+ xorps %xmm1,%xmm4
+ movups 64(%esi),%xmm1
+ xorps %xmm0,%xmm5
+ movups %xmm2,(%edi)
+ xorps %xmm1,%xmm6
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+ movups %xmm6,64(%edi)
+ jmp .L040ctr32_ret
+.align 16
+.L037ctr32_one_shortcut:
+ movups (%ebx),%xmm2
+ movl 240(%edx),%ecx
+.L041ctr32_one:
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L045enc1_loop_7:
+.byte 102,15,56,220,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L045enc1_loop_7
+.byte 102,15,56,221,209
+ movups (%esi),%xmm6
+ xorps %xmm2,%xmm6
+ movups %xmm6,(%edi)
+ jmp .L040ctr32_ret
+.align 16
+.L042ctr32_two:
+ call __aesni_encrypt2
+ movups (%esi),%xmm5
+ movups 16(%esi),%xmm6
+ xorps %xmm5,%xmm2
+ xorps %xmm6,%xmm3
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ jmp .L040ctr32_ret
+.align 16
+.L043ctr32_three:
+ call __aesni_encrypt3
+ movups (%esi),%xmm5
+ movups 16(%esi),%xmm6
+ xorps %xmm5,%xmm2
+ movups 32(%esi),%xmm7
+ xorps %xmm6,%xmm3
+ movups %xmm2,(%edi)
+ xorps %xmm7,%xmm4
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ jmp .L040ctr32_ret
+.align 16
+.L044ctr32_four:
+ call __aesni_encrypt4
+ movups (%esi),%xmm6
+ movups 16(%esi),%xmm7
+ movups 32(%esi),%xmm1
+ xorps %xmm6,%xmm2
+ movups 48(%esi),%xmm0
+ xorps %xmm7,%xmm3
+ movups %xmm2,(%edi)
+ xorps %xmm1,%xmm4
+ movups %xmm3,16(%edi)
+ xorps %xmm0,%xmm5
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+.L040ctr32_ret:
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ movdqa %xmm0,32(%esp)
+ pxor %xmm5,%xmm5
+ movdqa %xmm0,48(%esp)
+ pxor %xmm6,%xmm6
+ movdqa %xmm0,64(%esp)
+ pxor %xmm7,%xmm7
+ movl 80(%esp),%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.globl _aesni_xts_encrypt
+.def _aesni_xts_encrypt; .scl 2; .type 32; .endef
+.align 16
+_aesni_xts_encrypt:
+.L_aesni_xts_encrypt_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 36(%esp),%edx
+ movl 40(%esp),%esi
+ movl 240(%edx),%ecx
+ movups (%esi),%xmm2
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L046enc1_loop_8:
+.byte 102,15,56,220,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L046enc1_loop_8
+.byte 102,15,56,221,209
+ movl 20(%esp),%esi
+ movl 24(%esp),%edi
+ movl 28(%esp),%eax
+ movl 32(%esp),%edx
+ movl %esp,%ebp
+ subl $120,%esp
+ movl 240(%edx),%ecx
+ andl $-16,%esp
+ movl $135,96(%esp)
+ movl $0,100(%esp)
+ movl $1,104(%esp)
+ movl $0,108(%esp)
+ movl %eax,112(%esp)
+ movl %ebp,116(%esp)
+ movdqa %xmm2,%xmm1
+ pxor %xmm0,%xmm0
+ movdqa 96(%esp),%xmm3
+ pcmpgtd %xmm1,%xmm0
+ andl $-16,%eax
+ movl %edx,%ebp
+ movl %ecx,%ebx
+ subl $96,%eax
+ jc .L047xts_enc_short
+ shll $4,%ecx
+ movl $16,%ebx
+ subl %ecx,%ebx
+ leal 32(%edx,%ecx,1),%edx
+ jmp .L048xts_enc_loop6
+.align 16
+.L048xts_enc_loop6:
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,(%esp)
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,16(%esp)
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,32(%esp)
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,48(%esp)
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ pshufd $19,%xmm0,%xmm7
+ movdqa %xmm1,64(%esp)
+ paddq %xmm1,%xmm1
+ movups (%ebp),%xmm0
+ pand %xmm3,%xmm7
+ movups (%esi),%xmm2
+ pxor %xmm1,%xmm7
+ movl %ebx,%ecx
+ movdqu 16(%esi),%xmm3
+ xorps %xmm0,%xmm2
+ movdqu 32(%esi),%xmm4
+ pxor %xmm0,%xmm3
+ movdqu 48(%esi),%xmm5
+ pxor %xmm0,%xmm4
+ movdqu 64(%esi),%xmm6
+ pxor %xmm0,%xmm5
+ movdqu 80(%esi),%xmm1
+ pxor %xmm0,%xmm6
+ leal 96(%esi),%esi
+ pxor (%esp),%xmm2
+ movdqa %xmm7,80(%esp)
+ pxor %xmm1,%xmm7
+ movups 16(%ebp),%xmm1
+ pxor 16(%esp),%xmm3
+ pxor 32(%esp),%xmm4
+.byte 102,15,56,220,209
+ pxor 48(%esp),%xmm5
+ pxor 64(%esp),%xmm6
+.byte 102,15,56,220,217
+ pxor %xmm0,%xmm7
+ movups 32(%ebp),%xmm0
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+.byte 102,15,56,220,241
+.byte 102,15,56,220,249
+ call .L_aesni_encrypt6_enter
+ movdqa 80(%esp),%xmm1
+ pxor %xmm0,%xmm0
+ xorps (%esp),%xmm2
+ pcmpgtd %xmm1,%xmm0
+ xorps 16(%esp),%xmm3
+ movups %xmm2,(%edi)
+ xorps 32(%esp),%xmm4
+ movups %xmm3,16(%edi)
+ xorps 48(%esp),%xmm5
+ movups %xmm4,32(%edi)
+ xorps 64(%esp),%xmm6
+ movups %xmm5,48(%edi)
+ xorps %xmm1,%xmm7
+ movups %xmm6,64(%edi)
+ pshufd $19,%xmm0,%xmm2
+ movups %xmm7,80(%edi)
+ leal 96(%edi),%edi
+ movdqa 96(%esp),%xmm3
+ pxor %xmm0,%xmm0
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ subl $96,%eax
+ jnc .L048xts_enc_loop6
+ movl 240(%ebp),%ecx
+ movl %ebp,%edx
+ movl %ecx,%ebx
+.L047xts_enc_short:
+ addl $96,%eax
+ jz .L049xts_enc_done6x
+ movdqa %xmm1,%xmm5
+ cmpl $32,%eax
+ jb .L050xts_enc_one
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ je .L051xts_enc_two
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,%xmm6
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ cmpl $64,%eax
+ jb .L052xts_enc_three
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,%xmm7
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ movdqa %xmm5,(%esp)
+ movdqa %xmm6,16(%esp)
+ je .L053xts_enc_four
+ movdqa %xmm7,32(%esp)
+ pshufd $19,%xmm0,%xmm7
+ movdqa %xmm1,48(%esp)
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm7
+ pxor %xmm1,%xmm7
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movdqu 32(%esi),%xmm4
+ pxor (%esp),%xmm2
+ movdqu 48(%esi),%xmm5
+ pxor 16(%esp),%xmm3
+ movdqu 64(%esi),%xmm6
+ pxor 32(%esp),%xmm4
+ leal 80(%esi),%esi
+ pxor 48(%esp),%xmm5
+ movdqa %xmm7,64(%esp)
+ pxor %xmm7,%xmm6
+ call __aesni_encrypt6
+ movaps 64(%esp),%xmm1
+ xorps (%esp),%xmm2
+ xorps 16(%esp),%xmm3
+ xorps 32(%esp),%xmm4
+ movups %xmm2,(%edi)
+ xorps 48(%esp),%xmm5
+ movups %xmm3,16(%edi)
+ xorps %xmm1,%xmm6
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+ movups %xmm6,64(%edi)
+ leal 80(%edi),%edi
+ jmp .L054xts_enc_done
+.align 16
+.L050xts_enc_one:
+ movups (%esi),%xmm2
+ leal 16(%esi),%esi
+ xorps %xmm5,%xmm2
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L055enc1_loop_9:
+.byte 102,15,56,220,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L055enc1_loop_9
+.byte 102,15,56,221,209
+ xorps %xmm5,%xmm2
+ movups %xmm2,(%edi)
+ leal 16(%edi),%edi
+ movdqa %xmm5,%xmm1
+ jmp .L054xts_enc_done
+.align 16
+.L051xts_enc_two:
+ movaps %xmm1,%xmm6
+ movups (%esi),%xmm2
+ movups 16(%esi),%xmm3
+ leal 32(%esi),%esi
+ xorps %xmm5,%xmm2
+ xorps %xmm6,%xmm3
+ call __aesni_encrypt2
+ xorps %xmm5,%xmm2
+ xorps %xmm6,%xmm3
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ leal 32(%edi),%edi
+ movdqa %xmm6,%xmm1
+ jmp .L054xts_enc_done
+.align 16
+.L052xts_enc_three:
+ movaps %xmm1,%xmm7
+ movups (%esi),%xmm2
+ movups 16(%esi),%xmm3
+ movups 32(%esi),%xmm4
+ leal 48(%esi),%esi
+ xorps %xmm5,%xmm2
+ xorps %xmm6,%xmm3
+ xorps %xmm7,%xmm4
+ call __aesni_encrypt3
+ xorps %xmm5,%xmm2
+ xorps %xmm6,%xmm3
+ xorps %xmm7,%xmm4
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ leal 48(%edi),%edi
+ movdqa %xmm7,%xmm1
+ jmp .L054xts_enc_done
+.align 16
+.L053xts_enc_four:
+ movaps %xmm1,%xmm6
+ movups (%esi),%xmm2
+ movups 16(%esi),%xmm3
+ movups 32(%esi),%xmm4
+ xorps (%esp),%xmm2
+ movups 48(%esi),%xmm5
+ leal 64(%esi),%esi
+ xorps 16(%esp),%xmm3
+ xorps %xmm7,%xmm4
+ xorps %xmm6,%xmm5
+ call __aesni_encrypt4
+ xorps (%esp),%xmm2
+ xorps 16(%esp),%xmm3
+ xorps %xmm7,%xmm4
+ movups %xmm2,(%edi)
+ xorps %xmm6,%xmm5
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+ leal 64(%edi),%edi
+ movdqa %xmm6,%xmm1
+ jmp .L054xts_enc_done
+.align 16
+.L049xts_enc_done6x:
+ movl 112(%esp),%eax
+ andl $15,%eax
+ jz .L056xts_enc_ret
+ movdqa %xmm1,%xmm5
+ movl %eax,112(%esp)
+ jmp .L057xts_enc_steal
+.align 16
+.L054xts_enc_done:
+ movl 112(%esp),%eax
+ pxor %xmm0,%xmm0
+ andl $15,%eax
+ jz .L056xts_enc_ret
+ pcmpgtd %xmm1,%xmm0
+ movl %eax,112(%esp)
+ pshufd $19,%xmm0,%xmm5
+ paddq %xmm1,%xmm1
+ pand 96(%esp),%xmm5
+ pxor %xmm1,%xmm5
+.L057xts_enc_steal:
+ movzbl (%esi),%ecx
+ movzbl -16(%edi),%edx
+ leal 1(%esi),%esi
+ movb %cl,-16(%edi)
+ movb %dl,(%edi)
+ leal 1(%edi),%edi
+ subl $1,%eax
+ jnz .L057xts_enc_steal
+ subl 112(%esp),%edi
+ movl %ebp,%edx
+ movl %ebx,%ecx
+ movups -16(%edi),%xmm2
+ xorps %xmm5,%xmm2
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L058enc1_loop_10:
+.byte 102,15,56,220,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L058enc1_loop_10
+.byte 102,15,56,221,209
+ xorps %xmm5,%xmm2
+ movups %xmm2,-16(%edi)
+.L056xts_enc_ret:
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ movdqa %xmm0,(%esp)
+ pxor %xmm3,%xmm3
+ movdqa %xmm0,16(%esp)
+ pxor %xmm4,%xmm4
+ movdqa %xmm0,32(%esp)
+ pxor %xmm5,%xmm5
+ movdqa %xmm0,48(%esp)
+ pxor %xmm6,%xmm6
+ movdqa %xmm0,64(%esp)
+ pxor %xmm7,%xmm7
+ movdqa %xmm0,80(%esp)
+ movl 116(%esp),%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.globl _aesni_xts_decrypt
+.def _aesni_xts_decrypt; .scl 2; .type 32; .endef
+.align 16
+_aesni_xts_decrypt:
+.L_aesni_xts_decrypt_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 36(%esp),%edx
+ movl 40(%esp),%esi
+ movl 240(%edx),%ecx
+ movups (%esi),%xmm2
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L059enc1_loop_11:
+.byte 102,15,56,220,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L059enc1_loop_11
+.byte 102,15,56,221,209
+ movl 20(%esp),%esi
+ movl 24(%esp),%edi
+ movl 28(%esp),%eax
+ movl 32(%esp),%edx
+ movl %esp,%ebp
+ subl $120,%esp
+ andl $-16,%esp
+ xorl %ebx,%ebx
+ testl $15,%eax
+ setnz %bl
+ shll $4,%ebx
+ subl %ebx,%eax
+ movl $135,96(%esp)
+ movl $0,100(%esp)
+ movl $1,104(%esp)
+ movl $0,108(%esp)
+ movl %eax,112(%esp)
+ movl %ebp,116(%esp)
+ movl 240(%edx),%ecx
+ movl %edx,%ebp
+ movl %ecx,%ebx
+ movdqa %xmm2,%xmm1
+ pxor %xmm0,%xmm0
+ movdqa 96(%esp),%xmm3
+ pcmpgtd %xmm1,%xmm0
+ andl $-16,%eax
+ subl $96,%eax
+ jc .L060xts_dec_short
+ shll $4,%ecx
+ movl $16,%ebx
+ subl %ecx,%ebx
+ leal 32(%edx,%ecx,1),%edx
+ jmp .L061xts_dec_loop6
+.align 16
+.L061xts_dec_loop6:
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,(%esp)
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,16(%esp)
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,32(%esp)
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,48(%esp)
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ pshufd $19,%xmm0,%xmm7
+ movdqa %xmm1,64(%esp)
+ paddq %xmm1,%xmm1
+ movups (%ebp),%xmm0
+ pand %xmm3,%xmm7
+ movups (%esi),%xmm2
+ pxor %xmm1,%xmm7
+ movl %ebx,%ecx
+ movdqu 16(%esi),%xmm3
+ xorps %xmm0,%xmm2
+ movdqu 32(%esi),%xmm4
+ pxor %xmm0,%xmm3
+ movdqu 48(%esi),%xmm5
+ pxor %xmm0,%xmm4
+ movdqu 64(%esi),%xmm6
+ pxor %xmm0,%xmm5
+ movdqu 80(%esi),%xmm1
+ pxor %xmm0,%xmm6
+ leal 96(%esi),%esi
+ pxor (%esp),%xmm2
+ movdqa %xmm7,80(%esp)
+ pxor %xmm1,%xmm7
+ movups 16(%ebp),%xmm1
+ pxor 16(%esp),%xmm3
+ pxor 32(%esp),%xmm4
+.byte 102,15,56,222,209
+ pxor 48(%esp),%xmm5
+ pxor 64(%esp),%xmm6
+.byte 102,15,56,222,217
+ pxor %xmm0,%xmm7
+ movups 32(%ebp),%xmm0
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+.byte 102,15,56,222,241
+.byte 102,15,56,222,249
+ call .L_aesni_decrypt6_enter
+ movdqa 80(%esp),%xmm1
+ pxor %xmm0,%xmm0
+ xorps (%esp),%xmm2
+ pcmpgtd %xmm1,%xmm0
+ xorps 16(%esp),%xmm3
+ movups %xmm2,(%edi)
+ xorps 32(%esp),%xmm4
+ movups %xmm3,16(%edi)
+ xorps 48(%esp),%xmm5
+ movups %xmm4,32(%edi)
+ xorps 64(%esp),%xmm6
+ movups %xmm5,48(%edi)
+ xorps %xmm1,%xmm7
+ movups %xmm6,64(%edi)
+ pshufd $19,%xmm0,%xmm2
+ movups %xmm7,80(%edi)
+ leal 96(%edi),%edi
+ movdqa 96(%esp),%xmm3
+ pxor %xmm0,%xmm0
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ subl $96,%eax
+ jnc .L061xts_dec_loop6
+ movl 240(%ebp),%ecx
+ movl %ebp,%edx
+ movl %ecx,%ebx
+.L060xts_dec_short:
+ addl $96,%eax
+ jz .L062xts_dec_done6x
+ movdqa %xmm1,%xmm5
+ cmpl $32,%eax
+ jb .L063xts_dec_one
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ je .L064xts_dec_two
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,%xmm6
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ cmpl $64,%eax
+ jb .L065xts_dec_three
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa %xmm1,%xmm7
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+ movdqa %xmm5,(%esp)
+ movdqa %xmm6,16(%esp)
+ je .L066xts_dec_four
+ movdqa %xmm7,32(%esp)
+ pshufd $19,%xmm0,%xmm7
+ movdqa %xmm1,48(%esp)
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm7
+ pxor %xmm1,%xmm7
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movdqu 32(%esi),%xmm4
+ pxor (%esp),%xmm2
+ movdqu 48(%esi),%xmm5
+ pxor 16(%esp),%xmm3
+ movdqu 64(%esi),%xmm6
+ pxor 32(%esp),%xmm4
+ leal 80(%esi),%esi
+ pxor 48(%esp),%xmm5
+ movdqa %xmm7,64(%esp)
+ pxor %xmm7,%xmm6
+ call __aesni_decrypt6
+ movaps 64(%esp),%xmm1
+ xorps (%esp),%xmm2
+ xorps 16(%esp),%xmm3
+ xorps 32(%esp),%xmm4
+ movups %xmm2,(%edi)
+ xorps 48(%esp),%xmm5
+ movups %xmm3,16(%edi)
+ xorps %xmm1,%xmm6
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+ movups %xmm6,64(%edi)
+ leal 80(%edi),%edi
+ jmp .L067xts_dec_done
+.align 16
+.L063xts_dec_one:
+ movups (%esi),%xmm2
+ leal 16(%esi),%esi
+ xorps %xmm5,%xmm2
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L068dec1_loop_12:
+.byte 102,15,56,222,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L068dec1_loop_12
+.byte 102,15,56,223,209
+ xorps %xmm5,%xmm2
+ movups %xmm2,(%edi)
+ leal 16(%edi),%edi
+ movdqa %xmm5,%xmm1
+ jmp .L067xts_dec_done
+.align 16
+.L064xts_dec_two:
+ movaps %xmm1,%xmm6
+ movups (%esi),%xmm2
+ movups 16(%esi),%xmm3
+ leal 32(%esi),%esi
+ xorps %xmm5,%xmm2
+ xorps %xmm6,%xmm3
+ call __aesni_decrypt2
+ xorps %xmm5,%xmm2
+ xorps %xmm6,%xmm3
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ leal 32(%edi),%edi
+ movdqa %xmm6,%xmm1
+ jmp .L067xts_dec_done
+.align 16
+.L065xts_dec_three:
+ movaps %xmm1,%xmm7
+ movups (%esi),%xmm2
+ movups 16(%esi),%xmm3
+ movups 32(%esi),%xmm4
+ leal 48(%esi),%esi
+ xorps %xmm5,%xmm2
+ xorps %xmm6,%xmm3
+ xorps %xmm7,%xmm4
+ call __aesni_decrypt3
+ xorps %xmm5,%xmm2
+ xorps %xmm6,%xmm3
+ xorps %xmm7,%xmm4
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ leal 48(%edi),%edi
+ movdqa %xmm7,%xmm1
+ jmp .L067xts_dec_done
+.align 16
+.L066xts_dec_four:
+ movaps %xmm1,%xmm6
+ movups (%esi),%xmm2
+ movups 16(%esi),%xmm3
+ movups 32(%esi),%xmm4
+ xorps (%esp),%xmm2
+ movups 48(%esi),%xmm5
+ leal 64(%esi),%esi
+ xorps 16(%esp),%xmm3
+ xorps %xmm7,%xmm4
+ xorps %xmm6,%xmm5
+ call __aesni_decrypt4
+ xorps (%esp),%xmm2
+ xorps 16(%esp),%xmm3
+ xorps %xmm7,%xmm4
+ movups %xmm2,(%edi)
+ xorps %xmm6,%xmm5
+ movups %xmm3,16(%edi)
+ movups %xmm4,32(%edi)
+ movups %xmm5,48(%edi)
+ leal 64(%edi),%edi
+ movdqa %xmm6,%xmm1
+ jmp .L067xts_dec_done
+.align 16
+.L062xts_dec_done6x:
+ movl 112(%esp),%eax
+ andl $15,%eax
+ jz .L069xts_dec_ret
+ movl %eax,112(%esp)
+ jmp .L070xts_dec_only_one_more
+.align 16
+.L067xts_dec_done:
+ movl 112(%esp),%eax
+ pxor %xmm0,%xmm0
+ andl $15,%eax
+ jz .L069xts_dec_ret
+ pcmpgtd %xmm1,%xmm0
+ movl %eax,112(%esp)
+ pshufd $19,%xmm0,%xmm2
+ pxor %xmm0,%xmm0
+ movdqa 96(%esp),%xmm3
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm2
+ pcmpgtd %xmm1,%xmm0
+ pxor %xmm2,%xmm1
+.L070xts_dec_only_one_more:
+ pshufd $19,%xmm0,%xmm5
+ movdqa %xmm1,%xmm6
+ paddq %xmm1,%xmm1
+ pand %xmm3,%xmm5
+ pxor %xmm1,%xmm5
+ movl %ebp,%edx
+ movl %ebx,%ecx
+ movups (%esi),%xmm2
+ xorps %xmm5,%xmm2
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L071dec1_loop_13:
+.byte 102,15,56,222,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L071dec1_loop_13
+.byte 102,15,56,223,209
+ xorps %xmm5,%xmm2
+ movups %xmm2,(%edi)
+.L072xts_dec_steal:
+ movzbl 16(%esi),%ecx
+ movzbl (%edi),%edx
+ leal 1(%esi),%esi
+ movb %cl,(%edi)
+ movb %dl,16(%edi)
+ leal 1(%edi),%edi
+ subl $1,%eax
+ jnz .L072xts_dec_steal
+ subl 112(%esp),%edi
+ movl %ebp,%edx
+ movl %ebx,%ecx
+ movups (%edi),%xmm2
+ xorps %xmm6,%xmm2
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L073dec1_loop_14:
+.byte 102,15,56,222,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L073dec1_loop_14
+.byte 102,15,56,223,209
+ xorps %xmm6,%xmm2
+ movups %xmm2,(%edi)
+.L069xts_dec_ret:
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ movdqa %xmm0,(%esp)
+ pxor %xmm3,%xmm3
+ movdqa %xmm0,16(%esp)
+ pxor %xmm4,%xmm4
+ movdqa %xmm0,32(%esp)
+ pxor %xmm5,%xmm5
+ movdqa %xmm0,48(%esp)
+ pxor %xmm6,%xmm6
+ movdqa %xmm0,64(%esp)
+ pxor %xmm7,%xmm7
+ movdqa %xmm0,80(%esp)
+ movl 116(%esp),%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.globl _aesni_ocb_encrypt
+.def _aesni_ocb_encrypt; .scl 2; .type 32; .endef
+.align 16
+_aesni_ocb_encrypt:
+.L_aesni_ocb_encrypt_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 40(%esp),%ecx
+ movl 48(%esp),%ebx
+ movl 20(%esp),%esi
+ movl 24(%esp),%edi
+ movl 28(%esp),%eax
+ movl 32(%esp),%edx
+ movdqu (%ecx),%xmm0
+ movl 36(%esp),%ebp
+ movdqu (%ebx),%xmm1
+ movl 44(%esp),%ebx
+ movl %esp,%ecx
+ subl $132,%esp
+ andl $-16,%esp
+ subl %esi,%edi
+ shll $4,%eax
+ leal -96(%esi,%eax,1),%eax
+ movl %edi,120(%esp)
+ movl %eax,124(%esp)
+ movl %ecx,128(%esp)
+ movl 240(%edx),%ecx
+ testl $1,%ebp
+ jnz .L074odd
+ bsfl %ebp,%eax
+ addl $1,%ebp
+ shll $4,%eax
+ movdqu (%ebx,%eax,1),%xmm7
+ movl %edx,%eax
+ movdqu (%esi),%xmm2
+ leal 16(%esi),%esi
+ pxor %xmm0,%xmm7
+ pxor %xmm2,%xmm1
+ pxor %xmm7,%xmm2
+ movdqa %xmm1,%xmm6
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L075enc1_loop_15:
+.byte 102,15,56,220,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L075enc1_loop_15
+.byte 102,15,56,221,209
+ xorps %xmm7,%xmm2
+ movdqa %xmm7,%xmm0
+ movdqa %xmm6,%xmm1
+ movups %xmm2,-16(%edi,%esi,1)
+ movl 240(%eax),%ecx
+ movl %eax,%edx
+ movl 124(%esp),%eax
+.L074odd:
+ shll $4,%ecx
+ movl $16,%edi
+ subl %ecx,%edi
+ movl %edx,112(%esp)
+ leal 32(%edx,%ecx,1),%edx
+ movl %edi,116(%esp)
+ cmpl %eax,%esi
+ ja .L076short
+ jmp .L077grandloop
+.align 32
+.L077grandloop:
+ leal 1(%ebp),%ecx
+ leal 3(%ebp),%eax
+ leal 5(%ebp),%edi
+ addl $6,%ebp
+ bsfl %ecx,%ecx
+ bsfl %eax,%eax
+ bsfl %edi,%edi
+ shll $4,%ecx
+ shll $4,%eax
+ shll $4,%edi
+ movdqu (%ebx),%xmm2
+ movdqu (%ebx,%ecx,1),%xmm3
+ movl 116(%esp),%ecx
+ movdqa %xmm2,%xmm4
+ movdqu (%ebx,%eax,1),%xmm5
+ movdqa %xmm2,%xmm6
+ movdqu (%ebx,%edi,1),%xmm7
+ pxor %xmm0,%xmm2
+ pxor %xmm2,%xmm3
+ movdqa %xmm2,(%esp)
+ pxor %xmm3,%xmm4
+ movdqa %xmm3,16(%esp)
+ pxor %xmm4,%xmm5
+ movdqa %xmm4,32(%esp)
+ pxor %xmm5,%xmm6
+ movdqa %xmm5,48(%esp)
+ pxor %xmm6,%xmm7
+ movdqa %xmm6,64(%esp)
+ movdqa %xmm7,80(%esp)
+ movups -48(%edx,%ecx,1),%xmm0
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movdqu 32(%esi),%xmm4
+ movdqu 48(%esi),%xmm5
+ movdqu 64(%esi),%xmm6
+ movdqu 80(%esi),%xmm7
+ leal 96(%esi),%esi
+ pxor %xmm2,%xmm1
+ pxor %xmm0,%xmm2
+ pxor %xmm3,%xmm1
+ pxor %xmm0,%xmm3
+ pxor %xmm4,%xmm1
+ pxor %xmm0,%xmm4
+ pxor %xmm5,%xmm1
+ pxor %xmm0,%xmm5
+ pxor %xmm6,%xmm1
+ pxor %xmm0,%xmm6
+ pxor %xmm7,%xmm1
+ pxor %xmm0,%xmm7
+ movdqa %xmm1,96(%esp)
+ movups -32(%edx,%ecx,1),%xmm1
+ pxor (%esp),%xmm2
+ pxor 16(%esp),%xmm3
+ pxor 32(%esp),%xmm4
+ pxor 48(%esp),%xmm5
+ pxor 64(%esp),%xmm6
+ pxor 80(%esp),%xmm7
+ movups -16(%edx,%ecx,1),%xmm0
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+.byte 102,15,56,220,241
+.byte 102,15,56,220,249
+ movl 120(%esp),%edi
+ movl 124(%esp),%eax
+ call .L_aesni_encrypt6_enter
+ movdqa 80(%esp),%xmm0
+ pxor (%esp),%xmm2
+ pxor 16(%esp),%xmm3
+ pxor 32(%esp),%xmm4
+ pxor 48(%esp),%xmm5
+ pxor 64(%esp),%xmm6
+ pxor %xmm0,%xmm7
+ movdqa 96(%esp),%xmm1
+ movdqu %xmm2,-96(%edi,%esi,1)
+ movdqu %xmm3,-80(%edi,%esi,1)
+ movdqu %xmm4,-64(%edi,%esi,1)
+ movdqu %xmm5,-48(%edi,%esi,1)
+ movdqu %xmm6,-32(%edi,%esi,1)
+ movdqu %xmm7,-16(%edi,%esi,1)
+ cmpl %eax,%esi
+ jb .L077grandloop
+.L076short:
+ addl $96,%eax
+ subl %esi,%eax
+ jz .L078done
+ cmpl $32,%eax
+ jb .L079one
+ je .L080two
+ cmpl $64,%eax
+ jb .L081three
+ je .L082four
+ leal 1(%ebp),%ecx
+ leal 3(%ebp),%eax
+ bsfl %ecx,%ecx
+ bsfl %eax,%eax
+ shll $4,%ecx
+ shll $4,%eax
+ movdqu (%ebx),%xmm2
+ movdqu (%ebx,%ecx,1),%xmm3
+ movl 116(%esp),%ecx
+ movdqa %xmm2,%xmm4
+ movdqu (%ebx,%eax,1),%xmm5
+ movdqa %xmm2,%xmm6
+ pxor %xmm0,%xmm2
+ pxor %xmm2,%xmm3
+ movdqa %xmm2,(%esp)
+ pxor %xmm3,%xmm4
+ movdqa %xmm3,16(%esp)
+ pxor %xmm4,%xmm5
+ movdqa %xmm4,32(%esp)
+ pxor %xmm5,%xmm6
+ movdqa %xmm5,48(%esp)
+ pxor %xmm6,%xmm7
+ movdqa %xmm6,64(%esp)
+ movups -48(%edx,%ecx,1),%xmm0
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movdqu 32(%esi),%xmm4
+ movdqu 48(%esi),%xmm5
+ movdqu 64(%esi),%xmm6
+ pxor %xmm7,%xmm7
+ pxor %xmm2,%xmm1
+ pxor %xmm0,%xmm2
+ pxor %xmm3,%xmm1
+ pxor %xmm0,%xmm3
+ pxor %xmm4,%xmm1
+ pxor %xmm0,%xmm4
+ pxor %xmm5,%xmm1
+ pxor %xmm0,%xmm5
+ pxor %xmm6,%xmm1
+ pxor %xmm0,%xmm6
+ movdqa %xmm1,96(%esp)
+ movups -32(%edx,%ecx,1),%xmm1
+ pxor (%esp),%xmm2
+ pxor 16(%esp),%xmm3
+ pxor 32(%esp),%xmm4
+ pxor 48(%esp),%xmm5
+ pxor 64(%esp),%xmm6
+ movups -16(%edx,%ecx,1),%xmm0
+.byte 102,15,56,220,209
+.byte 102,15,56,220,217
+.byte 102,15,56,220,225
+.byte 102,15,56,220,233
+.byte 102,15,56,220,241
+.byte 102,15,56,220,249
+ movl 120(%esp),%edi
+ call .L_aesni_encrypt6_enter
+ movdqa 64(%esp),%xmm0
+ pxor (%esp),%xmm2
+ pxor 16(%esp),%xmm3
+ pxor 32(%esp),%xmm4
+ pxor 48(%esp),%xmm5
+ pxor %xmm0,%xmm6
+ movdqa 96(%esp),%xmm1
+ movdqu %xmm2,(%edi,%esi,1)
+ movdqu %xmm3,16(%edi,%esi,1)
+ movdqu %xmm4,32(%edi,%esi,1)
+ movdqu %xmm5,48(%edi,%esi,1)
+ movdqu %xmm6,64(%edi,%esi,1)
+ jmp .L078done
+.align 16
+.L079one:
+ movdqu (%ebx),%xmm7
+ movl 112(%esp),%edx
+ movdqu (%esi),%xmm2
+ movl 240(%edx),%ecx
+ pxor %xmm0,%xmm7
+ pxor %xmm2,%xmm1
+ pxor %xmm7,%xmm2
+ movdqa %xmm1,%xmm6
+ movl 120(%esp),%edi
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L083enc1_loop_16:
+.byte 102,15,56,220,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L083enc1_loop_16
+.byte 102,15,56,221,209
+ xorps %xmm7,%xmm2
+ movdqa %xmm7,%xmm0
+ movdqa %xmm6,%xmm1
+ movups %xmm2,(%edi,%esi,1)
+ jmp .L078done
+.align 16
+.L080two:
+ leal 1(%ebp),%ecx
+ movl 112(%esp),%edx
+ bsfl %ecx,%ecx
+ shll $4,%ecx
+ movdqu (%ebx),%xmm6
+ movdqu (%ebx,%ecx,1),%xmm7
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movl 240(%edx),%ecx
+ pxor %xmm0,%xmm6
+ pxor %xmm6,%xmm7
+ pxor %xmm2,%xmm1
+ pxor %xmm6,%xmm2
+ pxor %xmm3,%xmm1
+ pxor %xmm7,%xmm3
+ movdqa %xmm1,%xmm5
+ movl 120(%esp),%edi
+ call __aesni_encrypt2
+ xorps %xmm6,%xmm2
+ xorps %xmm7,%xmm3
+ movdqa %xmm7,%xmm0
+ movdqa %xmm5,%xmm1
+ movups %xmm2,(%edi,%esi,1)
+ movups %xmm3,16(%edi,%esi,1)
+ jmp .L078done
+.align 16
+.L081three:
+ leal 1(%ebp),%ecx
+ movl 112(%esp),%edx
+ bsfl %ecx,%ecx
+ shll $4,%ecx
+ movdqu (%ebx),%xmm5
+ movdqu (%ebx,%ecx,1),%xmm6
+ movdqa %xmm5,%xmm7
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movdqu 32(%esi),%xmm4
+ movl 240(%edx),%ecx
+ pxor %xmm0,%xmm5
+ pxor %xmm5,%xmm6
+ pxor %xmm6,%xmm7
+ pxor %xmm2,%xmm1
+ pxor %xmm5,%xmm2
+ pxor %xmm3,%xmm1
+ pxor %xmm6,%xmm3
+ pxor %xmm4,%xmm1
+ pxor %xmm7,%xmm4
+ movdqa %xmm1,96(%esp)
+ movl 120(%esp),%edi
+ call __aesni_encrypt3
+ xorps %xmm5,%xmm2
+ xorps %xmm6,%xmm3
+ xorps %xmm7,%xmm4
+ movdqa %xmm7,%xmm0
+ movdqa 96(%esp),%xmm1
+ movups %xmm2,(%edi,%esi,1)
+ movups %xmm3,16(%edi,%esi,1)
+ movups %xmm4,32(%edi,%esi,1)
+ jmp .L078done
+.align 16
+.L082four:
+ leal 1(%ebp),%ecx
+ leal 3(%ebp),%eax
+ bsfl %ecx,%ecx
+ bsfl %eax,%eax
+ movl 112(%esp),%edx
+ shll $4,%ecx
+ shll $4,%eax
+ movdqu (%ebx),%xmm4
+ movdqu (%ebx,%ecx,1),%xmm5
+ movdqa %xmm4,%xmm6
+ movdqu (%ebx,%eax,1),%xmm7
+ pxor %xmm0,%xmm4
+ movdqu (%esi),%xmm2
+ pxor %xmm4,%xmm5
+ movdqu 16(%esi),%xmm3
+ pxor %xmm5,%xmm6
+ movdqa %xmm4,(%esp)
+ pxor %xmm6,%xmm7
+ movdqa %xmm5,16(%esp)
+ movdqu 32(%esi),%xmm4
+ movdqu 48(%esi),%xmm5
+ movl 240(%edx),%ecx
+ pxor %xmm2,%xmm1
+ pxor (%esp),%xmm2
+ pxor %xmm3,%xmm1
+ pxor 16(%esp),%xmm3
+ pxor %xmm4,%xmm1
+ pxor %xmm6,%xmm4
+ pxor %xmm5,%xmm1
+ pxor %xmm7,%xmm5
+ movdqa %xmm1,96(%esp)
+ movl 120(%esp),%edi
+ call __aesni_encrypt4
+ xorps (%esp),%xmm2
+ xorps 16(%esp),%xmm3
+ xorps %xmm6,%xmm4
+ movups %xmm2,(%edi,%esi,1)
+ xorps %xmm7,%xmm5
+ movups %xmm3,16(%edi,%esi,1)
+ movdqa %xmm7,%xmm0
+ movups %xmm4,32(%edi,%esi,1)
+ movdqa 96(%esp),%xmm1
+ movups %xmm5,48(%edi,%esi,1)
+.L078done:
+ movl 128(%esp),%edx
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ movdqa %xmm2,(%esp)
+ pxor %xmm4,%xmm4
+ movdqa %xmm2,16(%esp)
+ pxor %xmm5,%xmm5
+ movdqa %xmm2,32(%esp)
+ pxor %xmm6,%xmm6
+ movdqa %xmm2,48(%esp)
+ pxor %xmm7,%xmm7
+ movdqa %xmm2,64(%esp)
+ movdqa %xmm2,80(%esp)
+ movdqa %xmm2,96(%esp)
+ leal (%edx),%esp
+ movl 40(%esp),%ecx
+ movl 48(%esp),%ebx
+ movdqu %xmm0,(%ecx)
+ pxor %xmm0,%xmm0
+ movdqu %xmm1,(%ebx)
+ pxor %xmm1,%xmm1
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.globl _aesni_ocb_decrypt
+.def _aesni_ocb_decrypt; .scl 2; .type 32; .endef
+.align 16
+_aesni_ocb_decrypt:
+.L_aesni_ocb_decrypt_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 40(%esp),%ecx
+ movl 48(%esp),%ebx
+ movl 20(%esp),%esi
+ movl 24(%esp),%edi
+ movl 28(%esp),%eax
+ movl 32(%esp),%edx
+ movdqu (%ecx),%xmm0
+ movl 36(%esp),%ebp
+ movdqu (%ebx),%xmm1
+ movl 44(%esp),%ebx
+ movl %esp,%ecx
+ subl $132,%esp
+ andl $-16,%esp
+ subl %esi,%edi
+ shll $4,%eax
+ leal -96(%esi,%eax,1),%eax
+ movl %edi,120(%esp)
+ movl %eax,124(%esp)
+ movl %ecx,128(%esp)
+ movl 240(%edx),%ecx
+ testl $1,%ebp
+ jnz .L084odd
+ bsfl %ebp,%eax
+ addl $1,%ebp
+ shll $4,%eax
+ movdqu (%ebx,%eax,1),%xmm7
+ movl %edx,%eax
+ movdqu (%esi),%xmm2
+ leal 16(%esi),%esi
+ pxor %xmm0,%xmm7
+ pxor %xmm7,%xmm2
+ movdqa %xmm1,%xmm6
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L085dec1_loop_17:
+.byte 102,15,56,222,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L085dec1_loop_17
+.byte 102,15,56,223,209
+ xorps %xmm7,%xmm2
+ movaps %xmm6,%xmm1
+ movdqa %xmm7,%xmm0
+ xorps %xmm2,%xmm1
+ movups %xmm2,-16(%edi,%esi,1)
+ movl 240(%eax),%ecx
+ movl %eax,%edx
+ movl 124(%esp),%eax
+.L084odd:
+ shll $4,%ecx
+ movl $16,%edi
+ subl %ecx,%edi
+ movl %edx,112(%esp)
+ leal 32(%edx,%ecx,1),%edx
+ movl %edi,116(%esp)
+ cmpl %eax,%esi
+ ja .L086short
+ jmp .L087grandloop
+.align 32
+.L087grandloop:
+ leal 1(%ebp),%ecx
+ leal 3(%ebp),%eax
+ leal 5(%ebp),%edi
+ addl $6,%ebp
+ bsfl %ecx,%ecx
+ bsfl %eax,%eax
+ bsfl %edi,%edi
+ shll $4,%ecx
+ shll $4,%eax
+ shll $4,%edi
+ movdqu (%ebx),%xmm2
+ movdqu (%ebx,%ecx,1),%xmm3
+ movl 116(%esp),%ecx
+ movdqa %xmm2,%xmm4
+ movdqu (%ebx,%eax,1),%xmm5
+ movdqa %xmm2,%xmm6
+ movdqu (%ebx,%edi,1),%xmm7
+ pxor %xmm0,%xmm2
+ pxor %xmm2,%xmm3
+ movdqa %xmm2,(%esp)
+ pxor %xmm3,%xmm4
+ movdqa %xmm3,16(%esp)
+ pxor %xmm4,%xmm5
+ movdqa %xmm4,32(%esp)
+ pxor %xmm5,%xmm6
+ movdqa %xmm5,48(%esp)
+ pxor %xmm6,%xmm7
+ movdqa %xmm6,64(%esp)
+ movdqa %xmm7,80(%esp)
+ movups -48(%edx,%ecx,1),%xmm0
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movdqu 32(%esi),%xmm4
+ movdqu 48(%esi),%xmm5
+ movdqu 64(%esi),%xmm6
+ movdqu 80(%esi),%xmm7
+ leal 96(%esi),%esi
+ movdqa %xmm1,96(%esp)
+ pxor %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ pxor %xmm0,%xmm4
+ pxor %xmm0,%xmm5
+ pxor %xmm0,%xmm6
+ pxor %xmm0,%xmm7
+ movups -32(%edx,%ecx,1),%xmm1
+ pxor (%esp),%xmm2
+ pxor 16(%esp),%xmm3
+ pxor 32(%esp),%xmm4
+ pxor 48(%esp),%xmm5
+ pxor 64(%esp),%xmm6
+ pxor 80(%esp),%xmm7
+ movups -16(%edx,%ecx,1),%xmm0
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+.byte 102,15,56,222,241
+.byte 102,15,56,222,249
+ movl 120(%esp),%edi
+ movl 124(%esp),%eax
+ call .L_aesni_decrypt6_enter
+ movdqa 80(%esp),%xmm0
+ pxor (%esp),%xmm2
+ movdqa 96(%esp),%xmm1
+ pxor 16(%esp),%xmm3
+ pxor 32(%esp),%xmm4
+ pxor 48(%esp),%xmm5
+ pxor 64(%esp),%xmm6
+ pxor %xmm0,%xmm7
+ pxor %xmm2,%xmm1
+ movdqu %xmm2,-96(%edi,%esi,1)
+ pxor %xmm3,%xmm1
+ movdqu %xmm3,-80(%edi,%esi,1)
+ pxor %xmm4,%xmm1
+ movdqu %xmm4,-64(%edi,%esi,1)
+ pxor %xmm5,%xmm1
+ movdqu %xmm5,-48(%edi,%esi,1)
+ pxor %xmm6,%xmm1
+ movdqu %xmm6,-32(%edi,%esi,1)
+ pxor %xmm7,%xmm1
+ movdqu %xmm7,-16(%edi,%esi,1)
+ cmpl %eax,%esi
+ jb .L087grandloop
+.L086short:
+ addl $96,%eax
+ subl %esi,%eax
+ jz .L088done
+ cmpl $32,%eax
+ jb .L089one
+ je .L090two
+ cmpl $64,%eax
+ jb .L091three
+ je .L092four
+ leal 1(%ebp),%ecx
+ leal 3(%ebp),%eax
+ bsfl %ecx,%ecx
+ bsfl %eax,%eax
+ shll $4,%ecx
+ shll $4,%eax
+ movdqu (%ebx),%xmm2
+ movdqu (%ebx,%ecx,1),%xmm3
+ movl 116(%esp),%ecx
+ movdqa %xmm2,%xmm4
+ movdqu (%ebx,%eax,1),%xmm5
+ movdqa %xmm2,%xmm6
+ pxor %xmm0,%xmm2
+ pxor %xmm2,%xmm3
+ movdqa %xmm2,(%esp)
+ pxor %xmm3,%xmm4
+ movdqa %xmm3,16(%esp)
+ pxor %xmm4,%xmm5
+ movdqa %xmm4,32(%esp)
+ pxor %xmm5,%xmm6
+ movdqa %xmm5,48(%esp)
+ pxor %xmm6,%xmm7
+ movdqa %xmm6,64(%esp)
+ movups -48(%edx,%ecx,1),%xmm0
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movdqu 32(%esi),%xmm4
+ movdqu 48(%esi),%xmm5
+ movdqu 64(%esi),%xmm6
+ pxor %xmm7,%xmm7
+ movdqa %xmm1,96(%esp)
+ pxor %xmm0,%xmm2
+ pxor %xmm0,%xmm3
+ pxor %xmm0,%xmm4
+ pxor %xmm0,%xmm5
+ pxor %xmm0,%xmm6
+ movups -32(%edx,%ecx,1),%xmm1
+ pxor (%esp),%xmm2
+ pxor 16(%esp),%xmm3
+ pxor 32(%esp),%xmm4
+ pxor 48(%esp),%xmm5
+ pxor 64(%esp),%xmm6
+ movups -16(%edx,%ecx,1),%xmm0
+.byte 102,15,56,222,209
+.byte 102,15,56,222,217
+.byte 102,15,56,222,225
+.byte 102,15,56,222,233
+.byte 102,15,56,222,241
+.byte 102,15,56,222,249
+ movl 120(%esp),%edi
+ call .L_aesni_decrypt6_enter
+ movdqa 64(%esp),%xmm0
+ pxor (%esp),%xmm2
+ movdqa 96(%esp),%xmm1
+ pxor 16(%esp),%xmm3
+ pxor 32(%esp),%xmm4
+ pxor 48(%esp),%xmm5
+ pxor %xmm0,%xmm6
+ pxor %xmm2,%xmm1
+ movdqu %xmm2,(%edi,%esi,1)
+ pxor %xmm3,%xmm1
+ movdqu %xmm3,16(%edi,%esi,1)
+ pxor %xmm4,%xmm1
+ movdqu %xmm4,32(%edi,%esi,1)
+ pxor %xmm5,%xmm1
+ movdqu %xmm5,48(%edi,%esi,1)
+ pxor %xmm6,%xmm1
+ movdqu %xmm6,64(%edi,%esi,1)
+ jmp .L088done
+.align 16
+.L089one:
+ movdqu (%ebx),%xmm7
+ movl 112(%esp),%edx
+ movdqu (%esi),%xmm2
+ movl 240(%edx),%ecx
+ pxor %xmm0,%xmm7
+ pxor %xmm7,%xmm2
+ movdqa %xmm1,%xmm6
+ movl 120(%esp),%edi
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L093dec1_loop_18:
+.byte 102,15,56,222,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L093dec1_loop_18
+.byte 102,15,56,223,209
+ xorps %xmm7,%xmm2
+ movaps %xmm6,%xmm1
+ movdqa %xmm7,%xmm0
+ xorps %xmm2,%xmm1
+ movups %xmm2,(%edi,%esi,1)
+ jmp .L088done
+.align 16
+.L090two:
+ leal 1(%ebp),%ecx
+ movl 112(%esp),%edx
+ bsfl %ecx,%ecx
+ shll $4,%ecx
+ movdqu (%ebx),%xmm6
+ movdqu (%ebx,%ecx,1),%xmm7
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movl 240(%edx),%ecx
+ movdqa %xmm1,%xmm5
+ pxor %xmm0,%xmm6
+ pxor %xmm6,%xmm7
+ pxor %xmm6,%xmm2
+ pxor %xmm7,%xmm3
+ movl 120(%esp),%edi
+ call __aesni_decrypt2
+ xorps %xmm6,%xmm2
+ xorps %xmm7,%xmm3
+ movdqa %xmm7,%xmm0
+ xorps %xmm2,%xmm5
+ movups %xmm2,(%edi,%esi,1)
+ xorps %xmm3,%xmm5
+ movups %xmm3,16(%edi,%esi,1)
+ movaps %xmm5,%xmm1
+ jmp .L088done
+.align 16
+.L091three:
+ leal 1(%ebp),%ecx
+ movl 112(%esp),%edx
+ bsfl %ecx,%ecx
+ shll $4,%ecx
+ movdqu (%ebx),%xmm5
+ movdqu (%ebx,%ecx,1),%xmm6
+ movdqa %xmm5,%xmm7
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movdqu 32(%esi),%xmm4
+ movl 240(%edx),%ecx
+ movdqa %xmm1,96(%esp)
+ pxor %xmm0,%xmm5
+ pxor %xmm5,%xmm6
+ pxor %xmm6,%xmm7
+ pxor %xmm5,%xmm2
+ pxor %xmm6,%xmm3
+ pxor %xmm7,%xmm4
+ movl 120(%esp),%edi
+ call __aesni_decrypt3
+ movdqa 96(%esp),%xmm1
+ xorps %xmm5,%xmm2
+ xorps %xmm6,%xmm3
+ xorps %xmm7,%xmm4
+ movups %xmm2,(%edi,%esi,1)
+ pxor %xmm2,%xmm1
+ movdqa %xmm7,%xmm0
+ movups %xmm3,16(%edi,%esi,1)
+ pxor %xmm3,%xmm1
+ movups %xmm4,32(%edi,%esi,1)
+ pxor %xmm4,%xmm1
+ jmp .L088done
+.align 16
+.L092four:
+ leal 1(%ebp),%ecx
+ leal 3(%ebp),%eax
+ bsfl %ecx,%ecx
+ bsfl %eax,%eax
+ movl 112(%esp),%edx
+ shll $4,%ecx
+ shll $4,%eax
+ movdqu (%ebx),%xmm4
+ movdqu (%ebx,%ecx,1),%xmm5
+ movdqa %xmm4,%xmm6
+ movdqu (%ebx,%eax,1),%xmm7
+ pxor %xmm0,%xmm4
+ movdqu (%esi),%xmm2
+ pxor %xmm4,%xmm5
+ movdqu 16(%esi),%xmm3
+ pxor %xmm5,%xmm6
+ movdqa %xmm4,(%esp)
+ pxor %xmm6,%xmm7
+ movdqa %xmm5,16(%esp)
+ movdqu 32(%esi),%xmm4
+ movdqu 48(%esi),%xmm5
+ movl 240(%edx),%ecx
+ movdqa %xmm1,96(%esp)
+ pxor (%esp),%xmm2
+ pxor 16(%esp),%xmm3
+ pxor %xmm6,%xmm4
+ pxor %xmm7,%xmm5
+ movl 120(%esp),%edi
+ call __aesni_decrypt4
+ movdqa 96(%esp),%xmm1
+ xorps (%esp),%xmm2
+ xorps 16(%esp),%xmm3
+ xorps %xmm6,%xmm4
+ movups %xmm2,(%edi,%esi,1)
+ pxor %xmm2,%xmm1
+ xorps %xmm7,%xmm5
+ movups %xmm3,16(%edi,%esi,1)
+ pxor %xmm3,%xmm1
+ movdqa %xmm7,%xmm0
+ movups %xmm4,32(%edi,%esi,1)
+ pxor %xmm4,%xmm1
+ movups %xmm5,48(%edi,%esi,1)
+ pxor %xmm5,%xmm1
+.L088done:
+ movl 128(%esp),%edx
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ movdqa %xmm2,(%esp)
+ pxor %xmm4,%xmm4
+ movdqa %xmm2,16(%esp)
+ pxor %xmm5,%xmm5
+ movdqa %xmm2,32(%esp)
+ pxor %xmm6,%xmm6
+ movdqa %xmm2,48(%esp)
+ pxor %xmm7,%xmm7
+ movdqa %xmm2,64(%esp)
+ movdqa %xmm2,80(%esp)
+ movdqa %xmm2,96(%esp)
+ leal (%edx),%esp
+ movl 40(%esp),%ecx
+ movl 48(%esp),%ebx
+ movdqu %xmm0,(%ecx)
+ pxor %xmm0,%xmm0
+ movdqu %xmm1,(%ebx)
+ pxor %xmm1,%xmm1
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.globl _aesni_cbc_encrypt
+.def _aesni_cbc_encrypt; .scl 2; .type 32; .endef
+.align 16
+_aesni_cbc_encrypt:
+.L_aesni_cbc_encrypt_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 20(%esp),%esi
+ movl %esp,%ebx
+ movl 24(%esp),%edi
+ subl $24,%ebx
+ movl 28(%esp),%eax
+ andl $-16,%ebx
+ movl 32(%esp),%edx
+ movl 36(%esp),%ebp
+ testl %eax,%eax
+ jz .L094cbc_abort
+ cmpl $0,40(%esp)
+ xchgl %esp,%ebx
+ movups (%ebp),%xmm7
+ movl 240(%edx),%ecx
+ movl %edx,%ebp
+ movl %ebx,16(%esp)
+ movl %ecx,%ebx
+ je .L095cbc_decrypt
+ movaps %xmm7,%xmm2
+ cmpl $16,%eax
+ jb .L096cbc_enc_tail
+ subl $16,%eax
+ jmp .L097cbc_enc_loop
+.align 16
+.L097cbc_enc_loop:
+ movups (%esi),%xmm7
+ leal 16(%esi),%esi
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ xorps %xmm0,%xmm7
+ leal 32(%edx),%edx
+ xorps %xmm7,%xmm2
+.L098enc1_loop_19:
+.byte 102,15,56,220,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L098enc1_loop_19
+.byte 102,15,56,221,209
+ movl %ebx,%ecx
+ movl %ebp,%edx
+ movups %xmm2,(%edi)
+ leal 16(%edi),%edi
+ subl $16,%eax
+ jnc .L097cbc_enc_loop
+ addl $16,%eax
+ jnz .L096cbc_enc_tail
+ movaps %xmm2,%xmm7
+ pxor %xmm2,%xmm2
+ jmp .L099cbc_ret
+.L096cbc_enc_tail:
+ movl %eax,%ecx
+.long 2767451785
+ movl $16,%ecx
+ subl %eax,%ecx
+ xorl %eax,%eax
+.long 2868115081
+ leal -16(%edi),%edi
+ movl %ebx,%ecx
+ movl %edi,%esi
+ movl %ebp,%edx
+ jmp .L097cbc_enc_loop
+.align 16
+.L095cbc_decrypt:
+ cmpl $80,%eax
+ jbe .L100cbc_dec_tail
+ movaps %xmm7,(%esp)
+ subl $80,%eax
+ jmp .L101cbc_dec_loop6_enter
+.align 16
+.L102cbc_dec_loop6:
+ movaps %xmm0,(%esp)
+ movups %xmm7,(%edi)
+ leal 16(%edi),%edi
+.L101cbc_dec_loop6_enter:
+ movdqu (%esi),%xmm2
+ movdqu 16(%esi),%xmm3
+ movdqu 32(%esi),%xmm4
+ movdqu 48(%esi),%xmm5
+ movdqu 64(%esi),%xmm6
+ movdqu 80(%esi),%xmm7
+ call __aesni_decrypt6
+ movups (%esi),%xmm1
+ movups 16(%esi),%xmm0
+ xorps (%esp),%xmm2
+ xorps %xmm1,%xmm3
+ movups 32(%esi),%xmm1
+ xorps %xmm0,%xmm4
+ movups 48(%esi),%xmm0
+ xorps %xmm1,%xmm5
+ movups 64(%esi),%xmm1
+ xorps %xmm0,%xmm6
+ movups 80(%esi),%xmm0
+ xorps %xmm1,%xmm7
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ leal 96(%esi),%esi
+ movups %xmm4,32(%edi)
+ movl %ebx,%ecx
+ movups %xmm5,48(%edi)
+ movl %ebp,%edx
+ movups %xmm6,64(%edi)
+ leal 80(%edi),%edi
+ subl $96,%eax
+ ja .L102cbc_dec_loop6
+ movaps %xmm7,%xmm2
+ movaps %xmm0,%xmm7
+ addl $80,%eax
+ jle .L103cbc_dec_clear_tail_collected
+ movups %xmm2,(%edi)
+ leal 16(%edi),%edi
+.L100cbc_dec_tail:
+ movups (%esi),%xmm2
+ movaps %xmm2,%xmm6
+ cmpl $16,%eax
+ jbe .L104cbc_dec_one
+ movups 16(%esi),%xmm3
+ movaps %xmm3,%xmm5
+ cmpl $32,%eax
+ jbe .L105cbc_dec_two
+ movups 32(%esi),%xmm4
+ cmpl $48,%eax
+ jbe .L106cbc_dec_three
+ movups 48(%esi),%xmm5
+ cmpl $64,%eax
+ jbe .L107cbc_dec_four
+ movups 64(%esi),%xmm6
+ movaps %xmm7,(%esp)
+ movups (%esi),%xmm2
+ xorps %xmm7,%xmm7
+ call __aesni_decrypt6
+ movups (%esi),%xmm1
+ movups 16(%esi),%xmm0
+ xorps (%esp),%xmm2
+ xorps %xmm1,%xmm3
+ movups 32(%esi),%xmm1
+ xorps %xmm0,%xmm4
+ movups 48(%esi),%xmm0
+ xorps %xmm1,%xmm5
+ movups 64(%esi),%xmm7
+ xorps %xmm0,%xmm6
+ movups %xmm2,(%edi)
+ movups %xmm3,16(%edi)
+ pxor %xmm3,%xmm3
+ movups %xmm4,32(%edi)
+ pxor %xmm4,%xmm4
+ movups %xmm5,48(%edi)
+ pxor %xmm5,%xmm5
+ leal 64(%edi),%edi
+ movaps %xmm6,%xmm2
+ pxor %xmm6,%xmm6
+ subl $80,%eax
+ jmp .L108cbc_dec_tail_collected
+.align 16
+.L104cbc_dec_one:
+ movups (%edx),%xmm0
+ movups 16(%edx),%xmm1
+ leal 32(%edx),%edx
+ xorps %xmm0,%xmm2
+.L109dec1_loop_20:
+.byte 102,15,56,222,209
+ decl %ecx
+ movups (%edx),%xmm1
+ leal 16(%edx),%edx
+ jnz .L109dec1_loop_20
+.byte 102,15,56,223,209
+ xorps %xmm7,%xmm2
+ movaps %xmm6,%xmm7
+ subl $16,%eax
+ jmp .L108cbc_dec_tail_collected
+.align 16
+.L105cbc_dec_two:
+ call __aesni_decrypt2
+ xorps %xmm7,%xmm2
+ xorps %xmm6,%xmm3
+ movups %xmm2,(%edi)
+ movaps %xmm3,%xmm2
+ pxor %xmm3,%xmm3
+ leal 16(%edi),%edi
+ movaps %xmm5,%xmm7
+ subl $32,%eax
+ jmp .L108cbc_dec_tail_collected
+.align 16
+.L106cbc_dec_three:
+ call __aesni_decrypt3
+ xorps %xmm7,%xmm2
+ xorps %xmm6,%xmm3
+ xorps %xmm5,%xmm4
+ movups %xmm2,(%edi)
+ movaps %xmm4,%xmm2
+ pxor %xmm4,%xmm4
+ movups %xmm3,16(%edi)
+ pxor %xmm3,%xmm3
+ leal 32(%edi),%edi
+ movups 32(%esi),%xmm7
+ subl $48,%eax
+ jmp .L108cbc_dec_tail_collected
+.align 16
+.L107cbc_dec_four:
+ call __aesni_decrypt4
+ movups 16(%esi),%xmm1
+ movups 32(%esi),%xmm0
+ xorps %xmm7,%xmm2
+ movups 48(%esi),%xmm7
+ xorps %xmm6,%xmm3
+ movups %xmm2,(%edi)
+ xorps %xmm1,%xmm4
+ movups %xmm3,16(%edi)
+ pxor %xmm3,%xmm3
+ xorps %xmm0,%xmm5
+ movups %xmm4,32(%edi)
+ pxor %xmm4,%xmm4
+ leal 48(%edi),%edi
+ movaps %xmm5,%xmm2
+ pxor %xmm5,%xmm5
+ subl $64,%eax
+ jmp .L108cbc_dec_tail_collected
+.align 16
+.L103cbc_dec_clear_tail_collected:
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+.L108cbc_dec_tail_collected:
+ andl $15,%eax
+ jnz .L110cbc_dec_tail_partial
+ movups %xmm2,(%edi)
+ pxor %xmm0,%xmm0
+ jmp .L099cbc_ret
+.align 16
+.L110cbc_dec_tail_partial:
+ movaps %xmm2,(%esp)
+ pxor %xmm0,%xmm0
+ movl $16,%ecx
+ movl %esp,%esi
+ subl %eax,%ecx
+.long 2767451785
+ movdqa %xmm2,(%esp)
+.L099cbc_ret:
+ movl 16(%esp),%esp
+ movl 36(%esp),%ebp
+ pxor %xmm2,%xmm2
+ pxor %xmm1,%xmm1
+ movups %xmm7,(%ebp)
+ pxor %xmm7,%xmm7
+.L094cbc_abort:
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.def __aesni_set_encrypt_key; .scl 3; .type 32; .endef
+.align 16
+__aesni_set_encrypt_key:
+ pushl %ebp
+ pushl %ebx
+ testl %eax,%eax
+ jz .L111bad_pointer
+ testl %edx,%edx
+ jz .L111bad_pointer
+ call .L112pic
+.L112pic:
+ popl %ebx
+ leal .Lkey_const-.L112pic(%ebx),%ebx
+ leal _GNUTLS_x86_cpuid_s,%ebp
+ movups (%eax),%xmm0
+ xorps %xmm4,%xmm4
+ movl 4(%ebp),%ebp
+ leal 16(%edx),%edx
+ andl $268437504,%ebp
+ cmpl $256,%ecx
+ je .L11314rounds
+ cmpl $192,%ecx
+ je .L11412rounds
+ cmpl $128,%ecx
+ jne .L115bad_keybits
+.align 16
+.L11610rounds:
+ cmpl $268435456,%ebp
+ je .L11710rounds_alt
+ movl $9,%ecx
+ movups %xmm0,-16(%edx)
+.byte 102,15,58,223,200,1
+ call .L118key_128_cold
+.byte 102,15,58,223,200,2
+ call .L119key_128
+.byte 102,15,58,223,200,4
+ call .L119key_128
+.byte 102,15,58,223,200,8
+ call .L119key_128
+.byte 102,15,58,223,200,16
+ call .L119key_128
+.byte 102,15,58,223,200,32
+ call .L119key_128
+.byte 102,15,58,223,200,64
+ call .L119key_128
+.byte 102,15,58,223,200,128
+ call .L119key_128
+.byte 102,15,58,223,200,27
+ call .L119key_128
+.byte 102,15,58,223,200,54
+ call .L119key_128
+ movups %xmm0,(%edx)
+ movl %ecx,80(%edx)
+ jmp .L120good_key
+.align 16
+.L119key_128:
+ movups %xmm0,(%edx)
+ leal 16(%edx),%edx
+.L118key_128_cold:
+ shufps $16,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $255,%xmm1,%xmm1
+ xorps %xmm1,%xmm0
+ ret
+.align 16
+.L11710rounds_alt:
+ movdqa (%ebx),%xmm5
+ movl $8,%ecx
+ movdqa 32(%ebx),%xmm4
+ movdqa %xmm0,%xmm2
+ movdqu %xmm0,-16(%edx)
+.L121loop_key128:
+.byte 102,15,56,0,197
+.byte 102,15,56,221,196
+ pslld $1,%xmm4
+ leal 16(%edx),%edx
+ movdqa %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm3,%xmm2
+ pxor %xmm2,%xmm0
+ movdqu %xmm0,-16(%edx)
+ movdqa %xmm0,%xmm2
+ decl %ecx
+ jnz .L121loop_key128
+ movdqa 48(%ebx),%xmm4
+.byte 102,15,56,0,197
+.byte 102,15,56,221,196
+ pslld $1,%xmm4
+ movdqa %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm3,%xmm2
+ pxor %xmm2,%xmm0
+ movdqu %xmm0,(%edx)
+ movdqa %xmm0,%xmm2
+.byte 102,15,56,0,197
+.byte 102,15,56,221,196
+ movdqa %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm2,%xmm3
+ pslldq $4,%xmm2
+ pxor %xmm3,%xmm2
+ pxor %xmm2,%xmm0
+ movdqu %xmm0,16(%edx)
+ movl $9,%ecx
+ movl %ecx,96(%edx)
+ jmp .L120good_key
+.align 16
+.L11412rounds:
+ movq 16(%eax),%xmm2
+ cmpl $268435456,%ebp
+ je .L12212rounds_alt
+ movl $11,%ecx
+ movups %xmm0,-16(%edx)
+.byte 102,15,58,223,202,1
+ call .L123key_192a_cold
+.byte 102,15,58,223,202,2
+ call .L124key_192b
+.byte 102,15,58,223,202,4
+ call .L125key_192a
+.byte 102,15,58,223,202,8
+ call .L124key_192b
+.byte 102,15,58,223,202,16
+ call .L125key_192a
+.byte 102,15,58,223,202,32
+ call .L124key_192b
+.byte 102,15,58,223,202,64
+ call .L125key_192a
+.byte 102,15,58,223,202,128
+ call .L124key_192b
+ movups %xmm0,(%edx)
+ movl %ecx,48(%edx)
+ jmp .L120good_key
+.align 16
+.L125key_192a:
+ movups %xmm0,(%edx)
+ leal 16(%edx),%edx
+.align 16
+.L123key_192a_cold:
+ movaps %xmm2,%xmm5
+.L126key_192b_warm:
+ shufps $16,%xmm0,%xmm4
+ movdqa %xmm2,%xmm3
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ pslldq $4,%xmm3
+ xorps %xmm4,%xmm0
+ pshufd $85,%xmm1,%xmm1
+ pxor %xmm3,%xmm2
+ pxor %xmm1,%xmm0
+ pshufd $255,%xmm0,%xmm3
+ pxor %xmm3,%xmm2
+ ret
+.align 16
+.L124key_192b:
+ movaps %xmm0,%xmm3
+ shufps $68,%xmm0,%xmm5
+ movups %xmm5,(%edx)
+ shufps $78,%xmm2,%xmm3
+ movups %xmm3,16(%edx)
+ leal 32(%edx),%edx
+ jmp .L126key_192b_warm
+.align 16
+.L12212rounds_alt:
+ movdqa 16(%ebx),%xmm5
+ movdqa 32(%ebx),%xmm4
+ movl $8,%ecx
+ movdqu %xmm0,-16(%edx)
+.L127loop_key192:
+ movq %xmm2,(%edx)
+ movdqa %xmm2,%xmm1
+.byte 102,15,56,0,213
+.byte 102,15,56,221,212
+ pslld $1,%xmm4
+ leal 24(%edx),%edx
+ movdqa %xmm0,%xmm3
+ pslldq $4,%xmm0
+ pxor %xmm0,%xmm3
+ pslldq $4,%xmm0
+ pxor %xmm0,%xmm3
+ pslldq $4,%xmm0
+ pxor %xmm3,%xmm0
+ pshufd $255,%xmm0,%xmm3
+ pxor %xmm1,%xmm3
+ pslldq $4,%xmm1
+ pxor %xmm1,%xmm3
+ pxor %xmm2,%xmm0
+ pxor %xmm3,%xmm2
+ movdqu %xmm0,-16(%edx)
+ decl %ecx
+ jnz .L127loop_key192
+ movl $11,%ecx
+ movl %ecx,32(%edx)
+ jmp .L120good_key
+.align 16
+.L11314rounds:
+ movups 16(%eax),%xmm2
+ leal 16(%edx),%edx
+ cmpl $268435456,%ebp
+ je .L12814rounds_alt
+ movl $13,%ecx
+ movups %xmm0,-32(%edx)
+ movups %xmm2,-16(%edx)
+.byte 102,15,58,223,202,1
+ call .L129key_256a_cold
+.byte 102,15,58,223,200,1
+ call .L130key_256b
+.byte 102,15,58,223,202,2
+ call .L131key_256a
+.byte 102,15,58,223,200,2
+ call .L130key_256b
+.byte 102,15,58,223,202,4
+ call .L131key_256a
+.byte 102,15,58,223,200,4
+ call .L130key_256b
+.byte 102,15,58,223,202,8
+ call .L131key_256a
+.byte 102,15,58,223,200,8
+ call .L130key_256b
+.byte 102,15,58,223,202,16
+ call .L131key_256a
+.byte 102,15,58,223,200,16
+ call .L130key_256b
+.byte 102,15,58,223,202,32
+ call .L131key_256a
+.byte 102,15,58,223,200,32
+ call .L130key_256b
+.byte 102,15,58,223,202,64
+ call .L131key_256a
+ movups %xmm0,(%edx)
+ movl %ecx,16(%edx)
+ xorl %eax,%eax
+ jmp .L120good_key
+.align 16
+.L131key_256a:
+ movups %xmm2,(%edx)
+ leal 16(%edx),%edx
+.L129key_256a_cold:
+ shufps $16,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $140,%xmm0,%xmm4
+ xorps %xmm4,%xmm0
+ shufps $255,%xmm1,%xmm1
+ xorps %xmm1,%xmm0
+ ret
+.align 16
+.L130key_256b:
+ movups %xmm0,(%edx)
+ leal 16(%edx),%edx
+ shufps $16,%xmm2,%xmm4
+ xorps %xmm4,%xmm2
+ shufps $140,%xmm2,%xmm4
+ xorps %xmm4,%xmm2
+ shufps $170,%xmm1,%xmm1
+ xorps %xmm1,%xmm2
+ ret
+.align 16
+.L12814rounds_alt:
+ movdqa (%ebx),%xmm5
+ movdqa 32(%ebx),%xmm4
+ movl $7,%ecx
+ movdqu %xmm0,-32(%edx)
+ movdqa %xmm2,%xmm1
+ movdqu %xmm2,-16(%edx)
+.L132loop_key256:
+.byte 102,15,56,0,213
+.byte 102,15,56,221,212
+ movdqa %xmm0,%xmm3
+ pslldq $4,%xmm0
+ pxor %xmm0,%xmm3
+ pslldq $4,%xmm0
+ pxor %xmm0,%xmm3
+ pslldq $4,%xmm0
+ pxor %xmm3,%xmm0
+ pslld $1,%xmm4
+ pxor %xmm2,%xmm0
+ movdqu %xmm0,(%edx)
+ decl %ecx
+ jz .L133done_key256
+ pshufd $255,%xmm0,%xmm2
+ pxor %xmm3,%xmm3
+.byte 102,15,56,221,211
+ movdqa %xmm1,%xmm3
+ pslldq $4,%xmm1
+ pxor %xmm1,%xmm3
+ pslldq $4,%xmm1
+ pxor %xmm1,%xmm3
+ pslldq $4,%xmm1
+ pxor %xmm3,%xmm1
+ pxor %xmm1,%xmm2
+ movdqu %xmm2,16(%edx)
+ leal 32(%edx),%edx
+ movdqa %xmm2,%xmm1
+ jmp .L132loop_key256
+.L133done_key256:
+ movl $13,%ecx
+ movl %ecx,16(%edx)
+.L120good_key:
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ xorl %eax,%eax
+ popl %ebx
+ popl %ebp
+ ret
+.align 4
+.L111bad_pointer:
+ movl $-1,%eax
+ popl %ebx
+ popl %ebp
+ ret
+.align 4
+.L115bad_keybits:
+ pxor %xmm0,%xmm0
+ movl $-2,%eax
+ popl %ebx
+ popl %ebp
+ ret
+.globl _aesni_set_encrypt_key
+.def _aesni_set_encrypt_key; .scl 2; .type 32; .endef
+.align 16
+_aesni_set_encrypt_key:
+.L_aesni_set_encrypt_key_begin:
+ movl 4(%esp),%eax
+ movl 8(%esp),%ecx
+ movl 12(%esp),%edx
+ call __aesni_set_encrypt_key
+ ret
+.globl _aesni_set_decrypt_key
+.def _aesni_set_decrypt_key; .scl 2; .type 32; .endef
+.align 16
+_aesni_set_decrypt_key:
+.L_aesni_set_decrypt_key_begin:
+ movl 4(%esp),%eax
+ movl 8(%esp),%ecx
+ movl 12(%esp),%edx
+ call __aesni_set_encrypt_key
+ movl 12(%esp),%edx
+ shll $4,%ecx
+ testl %eax,%eax
+ jnz .L134dec_key_ret
+ leal 16(%edx,%ecx,1),%eax
+ movups (%edx),%xmm0
+ movups (%eax),%xmm1
+ movups %xmm0,(%eax)
+ movups %xmm1,(%edx)
+ leal 16(%edx),%edx
+ leal -16(%eax),%eax
+.L135dec_key_inverse:
+ movups (%edx),%xmm0
+ movups (%eax),%xmm1
+.byte 102,15,56,219,192
+.byte 102,15,56,219,201
+ leal 16(%edx),%edx
+ leal -16(%eax),%eax
+ movups %xmm0,16(%eax)
+ movups %xmm1,-16(%edx)
+ cmpl %edx,%eax
+ ja .L135dec_key_inverse
+ movups (%edx),%xmm0
+.byte 102,15,56,219,192
+ movups %xmm0,(%edx)
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ xorl %eax,%eax
+.L134dec_key_ret:
+ ret
+.align 64
+.Lkey_const:
+.long 202313229,202313229,202313229,202313229
+.long 67569157,67569157,67569157,67569157
+.long 1,1,1,1
+.long 27,27,27,27
+.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
+.byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
+.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
+.byte 115,108,46,111,114,103,62,0
+.comm _GNUTLS_x86_cpuid_s,16
+