diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 07:33:12 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 07:33:12 +0000 |
commit | 36082a2fe36ecd800d784ae44c14f1f18c66a7e9 (patch) | |
tree | 6c68e0c0097987aff85a01dabddd34b862309a7c /lib/accelerated/x86/coff/sha1-ssse3-x86_64.s | |
parent | Initial commit. (diff) | |
download | gnutls28-36082a2fe36ecd800d784ae44c14f1f18c66a7e9.tar.xz gnutls28-36082a2fe36ecd800d784ae44c14f1f18c66a7e9.zip |
Adding upstream version 3.7.9.upstream/3.7.9upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'lib/accelerated/x86/coff/sha1-ssse3-x86_64.s')
-rw-r--r-- | lib/accelerated/x86/coff/sha1-ssse3-x86_64.s | 5792 |
1 files changed, 5792 insertions, 0 deletions
diff --git a/lib/accelerated/x86/coff/sha1-ssse3-x86_64.s b/lib/accelerated/x86/coff/sha1-ssse3-x86_64.s new file mode 100644 index 0000000..eb7924d --- /dev/null +++ b/lib/accelerated/x86/coff/sha1-ssse3-x86_64.s @@ -0,0 +1,5792 @@ +# Copyright (c) 2011-2016, Andy Polyakov <appro@openssl.org> +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain copyright notices, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# * Neither the name of the Andy Polyakov nor the names of its +# copyright holder and contributors may be used to endorse or +# promote products derived from this software without specific +# prior written permission. +# +# ALTERNATIVELY, provided that this notice is retained in full, this +# product may be distributed under the terms of the GNU General Public +# License (GPL), in which case the provisions of the GPL apply INSTEAD OF +# those given above. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# *** This file is auto-generated *** +# +.text + + +.globl sha1_block_data_order +.def sha1_block_data_order; .scl 2; .type 32; .endef +.p2align 4 +sha1_block_data_order: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_sha1_block_data_order: + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + + + movl GNUTLS_x86_cpuid_s+0(%rip),%r9d + movl GNUTLS_x86_cpuid_s+4(%rip),%r8d + movl GNUTLS_x86_cpuid_s+8(%rip),%r10d + testl $512,%r8d + jz .Lialu + testl $536870912,%r10d + jnz _shaext_shortcut + andl $296,%r10d + cmpl $296,%r10d + je _avx2_shortcut + andl $268435456,%r8d + andl $1073741824,%r9d + orl %r9d,%r8d + cmpl $1342177280,%r8d + je _avx_shortcut + jmp _ssse3_shortcut + +.p2align 4 +.Lialu: + movq %rsp,%rax + + pushq %rbx + + pushq %rbp + + pushq %r12 + + pushq %r13 + + pushq %r14 + + movq %rdi,%r8 + subq $72,%rsp + movq %rsi,%r9 + andq $-64,%rsp + movq %rdx,%r10 + movq %rax,64(%rsp) + +.Lprologue: + + movl 0(%r8),%esi + movl 4(%r8),%edi + movl 8(%r8),%r11d + movl 12(%r8),%r12d + movl 16(%r8),%r13d + jmp .Lloop + +.p2align 4 +.Lloop: + movl 0(%r9),%edx + bswapl %edx + movl 4(%r9),%ebp + movl %r12d,%eax + movl %edx,0(%rsp) + movl %esi,%ecx + bswapl %ebp + xorl %r11d,%eax + roll $5,%ecx + andl %edi,%eax + leal 1518500249(%rdx,%r13,1),%r13d + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl 8(%r9),%r14d + movl %r11d,%eax + movl %ebp,4(%rsp) + movl %r13d,%ecx + bswapl %r14d + xorl %edi,%eax + roll $5,%ecx + andl %esi,%eax + leal 1518500249(%rbp,%r12,1),%r12d + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl 12(%r9),%edx + movl %edi,%eax + movl %r14d,8(%rsp) + movl %r12d,%ecx + bswapl %edx + xorl %esi,%eax + roll $5,%ecx + andl %r13d,%eax + leal 1518500249(%r14,%r11,1),%r11d + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl 16(%r9),%ebp + movl %esi,%eax + movl %edx,12(%rsp) + movl %r11d,%ecx + bswapl %ebp + xorl %r13d,%eax + roll $5,%ecx + andl %r12d,%eax + leal 1518500249(%rdx,%rdi,1),%edi + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl 20(%r9),%r14d + movl %r13d,%eax + movl %ebp,16(%rsp) + movl %edi,%ecx + bswapl %r14d + xorl %r12d,%eax + roll $5,%ecx + andl %r11d,%eax + leal 1518500249(%rbp,%rsi,1),%esi + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl 24(%r9),%edx + movl %r12d,%eax + movl %r14d,20(%rsp) + movl %esi,%ecx + bswapl %edx + xorl %r11d,%eax + roll $5,%ecx + andl %edi,%eax + leal 1518500249(%r14,%r13,1),%r13d + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl 28(%r9),%ebp + movl %r11d,%eax + movl %edx,24(%rsp) + movl %r13d,%ecx + bswapl %ebp + xorl %edi,%eax + roll $5,%ecx + andl %esi,%eax + leal 1518500249(%rdx,%r12,1),%r12d + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl 32(%r9),%r14d + movl %edi,%eax + movl %ebp,28(%rsp) + movl %r12d,%ecx + bswapl %r14d + xorl %esi,%eax + roll $5,%ecx + andl %r13d,%eax + leal 1518500249(%rbp,%r11,1),%r11d + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl 36(%r9),%edx + movl %esi,%eax + movl %r14d,32(%rsp) + movl %r11d,%ecx + bswapl %edx + xorl %r13d,%eax + roll $5,%ecx + andl %r12d,%eax + leal 1518500249(%r14,%rdi,1),%edi + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl 40(%r9),%ebp + movl %r13d,%eax + movl %edx,36(%rsp) + movl %edi,%ecx + bswapl %ebp + xorl %r12d,%eax + roll $5,%ecx + andl %r11d,%eax + leal 1518500249(%rdx,%rsi,1),%esi + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl 44(%r9),%r14d + movl %r12d,%eax + movl %ebp,40(%rsp) + movl %esi,%ecx + bswapl %r14d + xorl %r11d,%eax + roll $5,%ecx + andl %edi,%eax + leal 1518500249(%rbp,%r13,1),%r13d + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl 48(%r9),%edx + movl %r11d,%eax + movl %r14d,44(%rsp) + movl %r13d,%ecx + bswapl %edx + xorl %edi,%eax + roll $5,%ecx + andl %esi,%eax + leal 1518500249(%r14,%r12,1),%r12d + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl 52(%r9),%ebp + movl %edi,%eax + movl %edx,48(%rsp) + movl %r12d,%ecx + bswapl %ebp + xorl %esi,%eax + roll $5,%ecx + andl %r13d,%eax + leal 1518500249(%rdx,%r11,1),%r11d + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl 56(%r9),%r14d + movl %esi,%eax + movl %ebp,52(%rsp) + movl %r11d,%ecx + bswapl %r14d + xorl %r13d,%eax + roll $5,%ecx + andl %r12d,%eax + leal 1518500249(%rbp,%rdi,1),%edi + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl 60(%r9),%edx + movl %r13d,%eax + movl %r14d,56(%rsp) + movl %edi,%ecx + bswapl %edx + xorl %r12d,%eax + roll $5,%ecx + andl %r11d,%eax + leal 1518500249(%r14,%rsi,1),%esi + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + xorl 0(%rsp),%ebp + movl %r12d,%eax + movl %edx,60(%rsp) + movl %esi,%ecx + xorl 8(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 32(%rsp),%ebp + andl %edi,%eax + leal 1518500249(%rdx,%r13,1),%r13d + roll $30,%edi + xorl %r12d,%eax + addl %ecx,%r13d + roll $1,%ebp + addl %eax,%r13d + xorl 4(%rsp),%r14d + movl %r11d,%eax + movl %ebp,0(%rsp) + movl %r13d,%ecx + xorl 12(%rsp),%r14d + xorl %edi,%eax + roll $5,%ecx + xorl 36(%rsp),%r14d + andl %esi,%eax + leal 1518500249(%rbp,%r12,1),%r12d + roll $30,%esi + xorl %r11d,%eax + addl %ecx,%r12d + roll $1,%r14d + addl %eax,%r12d + xorl 8(%rsp),%edx + movl %edi,%eax + movl %r14d,4(%rsp) + movl %r12d,%ecx + xorl 16(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + xorl 40(%rsp),%edx + andl %r13d,%eax + leal 1518500249(%r14,%r11,1),%r11d + roll $30,%r13d + xorl %edi,%eax + addl %ecx,%r11d + roll $1,%edx + addl %eax,%r11d + xorl 12(%rsp),%ebp + movl %esi,%eax + movl %edx,8(%rsp) + movl %r11d,%ecx + xorl 20(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + xorl 44(%rsp),%ebp + andl %r12d,%eax + leal 1518500249(%rdx,%rdi,1),%edi + roll $30,%r12d + xorl %esi,%eax + addl %ecx,%edi + roll $1,%ebp + addl %eax,%edi + xorl 16(%rsp),%r14d + movl %r13d,%eax + movl %ebp,12(%rsp) + movl %edi,%ecx + xorl 24(%rsp),%r14d + xorl %r12d,%eax + roll $5,%ecx + xorl 48(%rsp),%r14d + andl %r11d,%eax + leal 1518500249(%rbp,%rsi,1),%esi + roll $30,%r11d + xorl %r13d,%eax + addl %ecx,%esi + roll $1,%r14d + addl %eax,%esi + xorl 20(%rsp),%edx + movl %edi,%eax + movl %r14d,16(%rsp) + movl %esi,%ecx + xorl 28(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 52(%rsp),%edx + leal 1859775393(%r14,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + xorl 24(%rsp),%ebp + movl %esi,%eax + movl %edx,20(%rsp) + movl %r13d,%ecx + xorl 32(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 56(%rsp),%ebp + leal 1859775393(%rdx,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + xorl 28(%rsp),%r14d + movl %r13d,%eax + movl %ebp,24(%rsp) + movl %r12d,%ecx + xorl 36(%rsp),%r14d + xorl %edi,%eax + roll $5,%ecx + xorl 60(%rsp),%r14d + leal 1859775393(%rbp,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%r14d + xorl 32(%rsp),%edx + movl %r12d,%eax + movl %r14d,28(%rsp) + movl %r11d,%ecx + xorl 40(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + xorl 0(%rsp),%edx + leal 1859775393(%r14,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + xorl 36(%rsp),%ebp + movl %r11d,%eax + movl %edx,32(%rsp) + movl %edi,%ecx + xorl 44(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + xorl 4(%rsp),%ebp + leal 1859775393(%rdx,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + xorl 40(%rsp),%r14d + movl %edi,%eax + movl %ebp,36(%rsp) + movl %esi,%ecx + xorl 48(%rsp),%r14d + xorl %r12d,%eax + roll $5,%ecx + xorl 8(%rsp),%r14d + leal 1859775393(%rbp,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%r14d + xorl 44(%rsp),%edx + movl %esi,%eax + movl %r14d,40(%rsp) + movl %r13d,%ecx + xorl 52(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + xorl 12(%rsp),%edx + leal 1859775393(%r14,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + xorl 48(%rsp),%ebp + movl %r13d,%eax + movl %edx,44(%rsp) + movl %r12d,%ecx + xorl 56(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + xorl 16(%rsp),%ebp + leal 1859775393(%rdx,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + xorl 52(%rsp),%r14d + movl %r12d,%eax + movl %ebp,48(%rsp) + movl %r11d,%ecx + xorl 60(%rsp),%r14d + xorl %esi,%eax + roll $5,%ecx + xorl 20(%rsp),%r14d + leal 1859775393(%rbp,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%r14d + xorl 56(%rsp),%edx + movl %r11d,%eax + movl %r14d,52(%rsp) + movl %edi,%ecx + xorl 0(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + xorl 24(%rsp),%edx + leal 1859775393(%r14,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + xorl 60(%rsp),%ebp + movl %edi,%eax + movl %edx,56(%rsp) + movl %esi,%ecx + xorl 4(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + xorl 28(%rsp),%ebp + leal 1859775393(%rdx,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + xorl 0(%rsp),%r14d + movl %esi,%eax + movl %ebp,60(%rsp) + movl %r13d,%ecx + xorl 8(%rsp),%r14d + xorl %r11d,%eax + roll $5,%ecx + xorl 32(%rsp),%r14d + leal 1859775393(%rbp,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%r14d + xorl 4(%rsp),%edx + movl %r13d,%eax + movl %r14d,0(%rsp) + movl %r12d,%ecx + xorl 12(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 36(%rsp),%edx + leal 1859775393(%r14,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + xorl 8(%rsp),%ebp + movl %r12d,%eax + movl %edx,4(%rsp) + movl %r11d,%ecx + xorl 16(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 40(%rsp),%ebp + leal 1859775393(%rdx,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + xorl 12(%rsp),%r14d + movl %r11d,%eax + movl %ebp,8(%rsp) + movl %edi,%ecx + xorl 20(%rsp),%r14d + xorl %r13d,%eax + roll $5,%ecx + xorl 44(%rsp),%r14d + leal 1859775393(%rbp,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%r14d + xorl 16(%rsp),%edx + movl %edi,%eax + movl %r14d,12(%rsp) + movl %esi,%ecx + xorl 24(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 48(%rsp),%edx + leal 1859775393(%r14,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + xorl 20(%rsp),%ebp + movl %esi,%eax + movl %edx,16(%rsp) + movl %r13d,%ecx + xorl 28(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 52(%rsp),%ebp + leal 1859775393(%rdx,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + xorl 24(%rsp),%r14d + movl %r13d,%eax + movl %ebp,20(%rsp) + movl %r12d,%ecx + xorl 32(%rsp),%r14d + xorl %edi,%eax + roll $5,%ecx + xorl 56(%rsp),%r14d + leal 1859775393(%rbp,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%r14d + xorl 28(%rsp),%edx + movl %r12d,%eax + movl %r14d,24(%rsp) + movl %r11d,%ecx + xorl 36(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + xorl 60(%rsp),%edx + leal 1859775393(%r14,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + xorl 32(%rsp),%ebp + movl %r11d,%eax + movl %edx,28(%rsp) + movl %edi,%ecx + xorl 40(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + xorl 0(%rsp),%ebp + leal 1859775393(%rdx,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + xorl 36(%rsp),%r14d + movl %r12d,%eax + movl %ebp,32(%rsp) + movl %r12d,%ebx + xorl 44(%rsp),%r14d + andl %r11d,%eax + movl %esi,%ecx + xorl 4(%rsp),%r14d + leal -1894007588(%rbp,%r13,1),%r13d + xorl %r11d,%ebx + roll $5,%ecx + addl %eax,%r13d + roll $1,%r14d + andl %edi,%ebx + addl %ecx,%r13d + roll $30,%edi + addl %ebx,%r13d + xorl 40(%rsp),%edx + movl %r11d,%eax + movl %r14d,36(%rsp) + movl %r11d,%ebx + xorl 48(%rsp),%edx + andl %edi,%eax + movl %r13d,%ecx + xorl 8(%rsp),%edx + leal -1894007588(%r14,%r12,1),%r12d + xorl %edi,%ebx + roll $5,%ecx + addl %eax,%r12d + roll $1,%edx + andl %esi,%ebx + addl %ecx,%r12d + roll $30,%esi + addl %ebx,%r12d + xorl 44(%rsp),%ebp + movl %edi,%eax + movl %edx,40(%rsp) + movl %edi,%ebx + xorl 52(%rsp),%ebp + andl %esi,%eax + movl %r12d,%ecx + xorl 12(%rsp),%ebp + leal -1894007588(%rdx,%r11,1),%r11d + xorl %esi,%ebx + roll $5,%ecx + addl %eax,%r11d + roll $1,%ebp + andl %r13d,%ebx + addl %ecx,%r11d + roll $30,%r13d + addl %ebx,%r11d + xorl 48(%rsp),%r14d + movl %esi,%eax + movl %ebp,44(%rsp) + movl %esi,%ebx + xorl 56(%rsp),%r14d + andl %r13d,%eax + movl %r11d,%ecx + xorl 16(%rsp),%r14d + leal -1894007588(%rbp,%rdi,1),%edi + xorl %r13d,%ebx + roll $5,%ecx + addl %eax,%edi + roll $1,%r14d + andl %r12d,%ebx + addl %ecx,%edi + roll $30,%r12d + addl %ebx,%edi + xorl 52(%rsp),%edx + movl %r13d,%eax + movl %r14d,48(%rsp) + movl %r13d,%ebx + xorl 60(%rsp),%edx + andl %r12d,%eax + movl %edi,%ecx + xorl 20(%rsp),%edx + leal -1894007588(%r14,%rsi,1),%esi + xorl %r12d,%ebx + roll $5,%ecx + addl %eax,%esi + roll $1,%edx + andl %r11d,%ebx + addl %ecx,%esi + roll $30,%r11d + addl %ebx,%esi + xorl 56(%rsp),%ebp + movl %r12d,%eax + movl %edx,52(%rsp) + movl %r12d,%ebx + xorl 0(%rsp),%ebp + andl %r11d,%eax + movl %esi,%ecx + xorl 24(%rsp),%ebp + leal -1894007588(%rdx,%r13,1),%r13d + xorl %r11d,%ebx + roll $5,%ecx + addl %eax,%r13d + roll $1,%ebp + andl %edi,%ebx + addl %ecx,%r13d + roll $30,%edi + addl %ebx,%r13d + xorl 60(%rsp),%r14d + movl %r11d,%eax + movl %ebp,56(%rsp) + movl %r11d,%ebx + xorl 4(%rsp),%r14d + andl %edi,%eax + movl %r13d,%ecx + xorl 28(%rsp),%r14d + leal -1894007588(%rbp,%r12,1),%r12d + xorl %edi,%ebx + roll $5,%ecx + addl %eax,%r12d + roll $1,%r14d + andl %esi,%ebx + addl %ecx,%r12d + roll $30,%esi + addl %ebx,%r12d + xorl 0(%rsp),%edx + movl %edi,%eax + movl %r14d,60(%rsp) + movl %edi,%ebx + xorl 8(%rsp),%edx + andl %esi,%eax + movl %r12d,%ecx + xorl 32(%rsp),%edx + leal -1894007588(%r14,%r11,1),%r11d + xorl %esi,%ebx + roll $5,%ecx + addl %eax,%r11d + roll $1,%edx + andl %r13d,%ebx + addl %ecx,%r11d + roll $30,%r13d + addl %ebx,%r11d + xorl 4(%rsp),%ebp + movl %esi,%eax + movl %edx,0(%rsp) + movl %esi,%ebx + xorl 12(%rsp),%ebp + andl %r13d,%eax + movl %r11d,%ecx + xorl 36(%rsp),%ebp + leal -1894007588(%rdx,%rdi,1),%edi + xorl %r13d,%ebx + roll $5,%ecx + addl %eax,%edi + roll $1,%ebp + andl %r12d,%ebx + addl %ecx,%edi + roll $30,%r12d + addl %ebx,%edi + xorl 8(%rsp),%r14d + movl %r13d,%eax + movl %ebp,4(%rsp) + movl %r13d,%ebx + xorl 16(%rsp),%r14d + andl %r12d,%eax + movl %edi,%ecx + xorl 40(%rsp),%r14d + leal -1894007588(%rbp,%rsi,1),%esi + xorl %r12d,%ebx + roll $5,%ecx + addl %eax,%esi + roll $1,%r14d + andl %r11d,%ebx + addl %ecx,%esi + roll $30,%r11d + addl %ebx,%esi + xorl 12(%rsp),%edx + movl %r12d,%eax + movl %r14d,8(%rsp) + movl %r12d,%ebx + xorl 20(%rsp),%edx + andl %r11d,%eax + movl %esi,%ecx + xorl 44(%rsp),%edx + leal -1894007588(%r14,%r13,1),%r13d + xorl %r11d,%ebx + roll $5,%ecx + addl %eax,%r13d + roll $1,%edx + andl %edi,%ebx + addl %ecx,%r13d + roll $30,%edi + addl %ebx,%r13d + xorl 16(%rsp),%ebp + movl %r11d,%eax + movl %edx,12(%rsp) + movl %r11d,%ebx + xorl 24(%rsp),%ebp + andl %edi,%eax + movl %r13d,%ecx + xorl 48(%rsp),%ebp + leal -1894007588(%rdx,%r12,1),%r12d + xorl %edi,%ebx + roll $5,%ecx + addl %eax,%r12d + roll $1,%ebp + andl %esi,%ebx + addl %ecx,%r12d + roll $30,%esi + addl %ebx,%r12d + xorl 20(%rsp),%r14d + movl %edi,%eax + movl %ebp,16(%rsp) + movl %edi,%ebx + xorl 28(%rsp),%r14d + andl %esi,%eax + movl %r12d,%ecx + xorl 52(%rsp),%r14d + leal -1894007588(%rbp,%r11,1),%r11d + xorl %esi,%ebx + roll $5,%ecx + addl %eax,%r11d + roll $1,%r14d + andl %r13d,%ebx + addl %ecx,%r11d + roll $30,%r13d + addl %ebx,%r11d + xorl 24(%rsp),%edx + movl %esi,%eax + movl %r14d,20(%rsp) + movl %esi,%ebx + xorl 32(%rsp),%edx + andl %r13d,%eax + movl %r11d,%ecx + xorl 56(%rsp),%edx + leal -1894007588(%r14,%rdi,1),%edi + xorl %r13d,%ebx + roll $5,%ecx + addl %eax,%edi + roll $1,%edx + andl %r12d,%ebx + addl %ecx,%edi + roll $30,%r12d + addl %ebx,%edi + xorl 28(%rsp),%ebp + movl %r13d,%eax + movl %edx,24(%rsp) + movl %r13d,%ebx + xorl 36(%rsp),%ebp + andl %r12d,%eax + movl %edi,%ecx + xorl 60(%rsp),%ebp + leal -1894007588(%rdx,%rsi,1),%esi + xorl %r12d,%ebx + roll $5,%ecx + addl %eax,%esi + roll $1,%ebp + andl %r11d,%ebx + addl %ecx,%esi + roll $30,%r11d + addl %ebx,%esi + xorl 32(%rsp),%r14d + movl %r12d,%eax + movl %ebp,28(%rsp) + movl %r12d,%ebx + xorl 40(%rsp),%r14d + andl %r11d,%eax + movl %esi,%ecx + xorl 0(%rsp),%r14d + leal -1894007588(%rbp,%r13,1),%r13d + xorl %r11d,%ebx + roll $5,%ecx + addl %eax,%r13d + roll $1,%r14d + andl %edi,%ebx + addl %ecx,%r13d + roll $30,%edi + addl %ebx,%r13d + xorl 36(%rsp),%edx + movl %r11d,%eax + movl %r14d,32(%rsp) + movl %r11d,%ebx + xorl 44(%rsp),%edx + andl %edi,%eax + movl %r13d,%ecx + xorl 4(%rsp),%edx + leal -1894007588(%r14,%r12,1),%r12d + xorl %edi,%ebx + roll $5,%ecx + addl %eax,%r12d + roll $1,%edx + andl %esi,%ebx + addl %ecx,%r12d + roll $30,%esi + addl %ebx,%r12d + xorl 40(%rsp),%ebp + movl %edi,%eax + movl %edx,36(%rsp) + movl %edi,%ebx + xorl 48(%rsp),%ebp + andl %esi,%eax + movl %r12d,%ecx + xorl 8(%rsp),%ebp + leal -1894007588(%rdx,%r11,1),%r11d + xorl %esi,%ebx + roll $5,%ecx + addl %eax,%r11d + roll $1,%ebp + andl %r13d,%ebx + addl %ecx,%r11d + roll $30,%r13d + addl %ebx,%r11d + xorl 44(%rsp),%r14d + movl %esi,%eax + movl %ebp,40(%rsp) + movl %esi,%ebx + xorl 52(%rsp),%r14d + andl %r13d,%eax + movl %r11d,%ecx + xorl 12(%rsp),%r14d + leal -1894007588(%rbp,%rdi,1),%edi + xorl %r13d,%ebx + roll $5,%ecx + addl %eax,%edi + roll $1,%r14d + andl %r12d,%ebx + addl %ecx,%edi + roll $30,%r12d + addl %ebx,%edi + xorl 48(%rsp),%edx + movl %r13d,%eax + movl %r14d,44(%rsp) + movl %r13d,%ebx + xorl 56(%rsp),%edx + andl %r12d,%eax + movl %edi,%ecx + xorl 16(%rsp),%edx + leal -1894007588(%r14,%rsi,1),%esi + xorl %r12d,%ebx + roll $5,%ecx + addl %eax,%esi + roll $1,%edx + andl %r11d,%ebx + addl %ecx,%esi + roll $30,%r11d + addl %ebx,%esi + xorl 52(%rsp),%ebp + movl %edi,%eax + movl %edx,48(%rsp) + movl %esi,%ecx + xorl 60(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + xorl 20(%rsp),%ebp + leal -899497514(%rdx,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + xorl 56(%rsp),%r14d + movl %esi,%eax + movl %ebp,52(%rsp) + movl %r13d,%ecx + xorl 0(%rsp),%r14d + xorl %r11d,%eax + roll $5,%ecx + xorl 24(%rsp),%r14d + leal -899497514(%rbp,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%r14d + xorl 60(%rsp),%edx + movl %r13d,%eax + movl %r14d,56(%rsp) + movl %r12d,%ecx + xorl 4(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 28(%rsp),%edx + leal -899497514(%r14,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + xorl 0(%rsp),%ebp + movl %r12d,%eax + movl %edx,60(%rsp) + movl %r11d,%ecx + xorl 8(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 32(%rsp),%ebp + leal -899497514(%rdx,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + xorl 4(%rsp),%r14d + movl %r11d,%eax + movl %ebp,0(%rsp) + movl %edi,%ecx + xorl 12(%rsp),%r14d + xorl %r13d,%eax + roll $5,%ecx + xorl 36(%rsp),%r14d + leal -899497514(%rbp,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%r14d + xorl 8(%rsp),%edx + movl %edi,%eax + movl %r14d,4(%rsp) + movl %esi,%ecx + xorl 16(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 40(%rsp),%edx + leal -899497514(%r14,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + xorl 12(%rsp),%ebp + movl %esi,%eax + movl %edx,8(%rsp) + movl %r13d,%ecx + xorl 20(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 44(%rsp),%ebp + leal -899497514(%rdx,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + xorl 16(%rsp),%r14d + movl %r13d,%eax + movl %ebp,12(%rsp) + movl %r12d,%ecx + xorl 24(%rsp),%r14d + xorl %edi,%eax + roll $5,%ecx + xorl 48(%rsp),%r14d + leal -899497514(%rbp,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%r14d + xorl 20(%rsp),%edx + movl %r12d,%eax + movl %r14d,16(%rsp) + movl %r11d,%ecx + xorl 28(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + xorl 52(%rsp),%edx + leal -899497514(%r14,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + xorl 24(%rsp),%ebp + movl %r11d,%eax + movl %edx,20(%rsp) + movl %edi,%ecx + xorl 32(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + xorl 56(%rsp),%ebp + leal -899497514(%rdx,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + xorl 28(%rsp),%r14d + movl %edi,%eax + movl %ebp,24(%rsp) + movl %esi,%ecx + xorl 36(%rsp),%r14d + xorl %r12d,%eax + roll $5,%ecx + xorl 60(%rsp),%r14d + leal -899497514(%rbp,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%r14d + xorl 32(%rsp),%edx + movl %esi,%eax + movl %r14d,28(%rsp) + movl %r13d,%ecx + xorl 40(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + xorl 0(%rsp),%edx + leal -899497514(%r14,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + xorl 36(%rsp),%ebp + movl %r13d,%eax + + movl %r12d,%ecx + xorl 44(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + xorl 4(%rsp),%ebp + leal -899497514(%rdx,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + xorl 40(%rsp),%r14d + movl %r12d,%eax + + movl %r11d,%ecx + xorl 48(%rsp),%r14d + xorl %esi,%eax + roll $5,%ecx + xorl 8(%rsp),%r14d + leal -899497514(%rbp,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%r14d + xorl 44(%rsp),%edx + movl %r11d,%eax + + movl %edi,%ecx + xorl 52(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + xorl 12(%rsp),%edx + leal -899497514(%r14,%rsi,1),%esi + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + xorl 48(%rsp),%ebp + movl %edi,%eax + + movl %esi,%ecx + xorl 56(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + xorl 16(%rsp),%ebp + leal -899497514(%rdx,%r13,1),%r13d + xorl %r11d,%eax + addl %ecx,%r13d + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + xorl 52(%rsp),%r14d + movl %esi,%eax + + movl %r13d,%ecx + xorl 60(%rsp),%r14d + xorl %r11d,%eax + roll $5,%ecx + xorl 20(%rsp),%r14d + leal -899497514(%rbp,%r12,1),%r12d + xorl %edi,%eax + addl %ecx,%r12d + roll $30,%esi + addl %eax,%r12d + roll $1,%r14d + xorl 56(%rsp),%edx + movl %r13d,%eax + + movl %r12d,%ecx + xorl 0(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 24(%rsp),%edx + leal -899497514(%r14,%r11,1),%r11d + xorl %esi,%eax + addl %ecx,%r11d + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + xorl 60(%rsp),%ebp + movl %r12d,%eax + + movl %r11d,%ecx + xorl 4(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 28(%rsp),%ebp + leal -899497514(%rdx,%rdi,1),%edi + xorl %r13d,%eax + addl %ecx,%edi + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %r11d,%eax + movl %edi,%ecx + xorl %r13d,%eax + leal -899497514(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl %r12d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + addl 0(%r8),%esi + addl 4(%r8),%edi + addl 8(%r8),%r11d + addl 12(%r8),%r12d + addl 16(%r8),%r13d + movl %esi,0(%r8) + movl %edi,4(%r8) + movl %r11d,8(%r8) + movl %r12d,12(%r8) + movl %r13d,16(%r8) + + subq $1,%r10 + leaq 64(%r9),%r9 + jnz .Lloop + + movq 64(%rsp),%rsi + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbp + + movq -8(%rsi),%rbx + + leaq (%rsi),%rsp + +.Lepilogue: + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + .byte 0xf3,0xc3 + +.LSEH_end_sha1_block_data_order: +.def sha1_block_data_order_shaext; .scl 3; .type 32; .endef +.p2align 5 +sha1_block_data_order_shaext: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_sha1_block_data_order_shaext: + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + +_shaext_shortcut: + + leaq -72(%rsp),%rsp + movaps %xmm6,-8-64(%rax) + movaps %xmm7,-8-48(%rax) + movaps %xmm8,-8-32(%rax) + movaps %xmm9,-8-16(%rax) +.Lprologue_shaext: + movdqu (%rdi),%xmm0 + movd 16(%rdi),%xmm1 + movdqa K_XX_XX+160(%rip),%xmm3 + + movdqu (%rsi),%xmm4 + pshufd $27,%xmm0,%xmm0 + movdqu 16(%rsi),%xmm5 + pshufd $27,%xmm1,%xmm1 + movdqu 32(%rsi),%xmm6 +.byte 102,15,56,0,227 + movdqu 48(%rsi),%xmm7 +.byte 102,15,56,0,235 +.byte 102,15,56,0,243 + movdqa %xmm1,%xmm9 +.byte 102,15,56,0,251 + jmp .Loop_shaext + +.p2align 4 +.Loop_shaext: + decq %rdx + leaq 64(%rsi),%r8 + paddd %xmm4,%xmm1 + cmovneq %r8,%rsi + movdqa %xmm0,%xmm8 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,0 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,0 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,0 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,1 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,1 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,1 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,2 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 +.byte 15,56,201,229 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,213 + pxor %xmm6,%xmm4 +.byte 15,56,201,238 +.byte 15,56,202,231 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,2 +.byte 15,56,200,206 + pxor %xmm7,%xmm5 +.byte 15,56,202,236 +.byte 15,56,201,247 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,2 +.byte 15,56,200,215 + pxor %xmm4,%xmm6 +.byte 15,56,201,252 +.byte 15,56,202,245 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 15,56,200,204 + pxor %xmm5,%xmm7 +.byte 15,56,202,254 + movdqu (%rsi),%xmm4 + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,3 +.byte 15,56,200,213 + movdqu 16(%rsi),%xmm5 +.byte 102,15,56,0,227 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 15,56,200,206 + movdqu 32(%rsi),%xmm6 +.byte 102,15,56,0,235 + + movdqa %xmm0,%xmm2 +.byte 15,58,204,193,3 +.byte 15,56,200,215 + movdqu 48(%rsi),%xmm7 +.byte 102,15,56,0,243 + + movdqa %xmm0,%xmm1 +.byte 15,58,204,194,3 +.byte 65,15,56,200,201 +.byte 102,15,56,0,251 + + paddd %xmm8,%xmm0 + movdqa %xmm1,%xmm9 + + jnz .Loop_shaext + + pshufd $27,%xmm0,%xmm0 + pshufd $27,%xmm1,%xmm1 + movdqu %xmm0,(%rdi) + movd %xmm1,16(%rdi) + movaps -8-64(%rax),%xmm6 + movaps -8-48(%rax),%xmm7 + movaps -8-32(%rax),%xmm8 + movaps -8-16(%rax),%xmm9 + movq %rax,%rsp +.Lepilogue_shaext: + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + .byte 0xf3,0xc3 + +.LSEH_end_sha1_block_data_order_shaext: +.def sha1_block_data_order_ssse3; .scl 3; .type 32; .endef +.p2align 4 +sha1_block_data_order_ssse3: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_sha1_block_data_order_ssse3: + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + +_ssse3_shortcut: + + movq %rsp,%r11 + + pushq %rbx + + pushq %rbp + + pushq %r12 + + pushq %r13 + + pushq %r14 + + leaq -160(%rsp),%rsp + movaps %xmm6,-40-96(%r11) + movaps %xmm7,-40-80(%r11) + movaps %xmm8,-40-64(%r11) + movaps %xmm9,-40-48(%r11) + movaps %xmm10,-40-32(%r11) + movaps %xmm11,-40-16(%r11) +.Lprologue_ssse3: + andq $-64,%rsp + movq %rdi,%r8 + movq %rsi,%r9 + movq %rdx,%r10 + + shlq $6,%r10 + addq %r9,%r10 + leaq K_XX_XX+64(%rip),%r14 + + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movl %ebx,%esi + movl 16(%r8),%ebp + movl %ecx,%edi + xorl %edx,%edi + andl %edi,%esi + + movdqa 64(%r14),%xmm6 + movdqa -64(%r14),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 + addq $64,%r9 + paddd %xmm9,%xmm0 +.byte 102,15,56,0,222 + paddd %xmm9,%xmm1 + paddd %xmm9,%xmm2 + movdqa %xmm0,0(%rsp) + psubd %xmm9,%xmm0 + movdqa %xmm1,16(%rsp) + psubd %xmm9,%xmm1 + movdqa %xmm2,32(%rsp) + psubd %xmm9,%xmm2 + jmp .Loop_ssse3 +.p2align 4 +.Loop_ssse3: + rorl $2,%ebx + pshufd $238,%xmm0,%xmm4 + xorl %edx,%esi + movdqa %xmm3,%xmm8 + paddd %xmm3,%xmm9 + movl %eax,%edi + addl 0(%rsp),%ebp + punpcklqdq %xmm1,%xmm4 + xorl %ecx,%ebx + roll $5,%eax + addl %esi,%ebp + psrldq $4,%xmm8 + andl %ebx,%edi + xorl %ecx,%ebx + pxor %xmm0,%xmm4 + addl %eax,%ebp + rorl $7,%eax + pxor %xmm2,%xmm8 + xorl %ecx,%edi + movl %ebp,%esi + addl 4(%rsp),%edx + pxor %xmm8,%xmm4 + xorl %ebx,%eax + roll $5,%ebp + movdqa %xmm9,48(%rsp) + addl %edi,%edx + andl %eax,%esi + movdqa %xmm4,%xmm10 + xorl %ebx,%eax + addl %ebp,%edx + rorl $7,%ebp + movdqa %xmm4,%xmm8 + xorl %ebx,%esi + pslldq $12,%xmm10 + paddd %xmm4,%xmm4 + movl %edx,%edi + addl 8(%rsp),%ecx + psrld $31,%xmm8 + xorl %eax,%ebp + roll $5,%edx + addl %esi,%ecx + movdqa %xmm10,%xmm9 + andl %ebp,%edi + xorl %eax,%ebp + psrld $30,%xmm10 + addl %edx,%ecx + rorl $7,%edx + por %xmm8,%xmm4 + xorl %eax,%edi + movl %ecx,%esi + addl 12(%rsp),%ebx + pslld $2,%xmm9 + pxor %xmm10,%xmm4 + xorl %ebp,%edx + movdqa -64(%r14),%xmm10 + roll $5,%ecx + addl %edi,%ebx + andl %edx,%esi + pxor %xmm9,%xmm4 + xorl %ebp,%edx + addl %ecx,%ebx + rorl $7,%ecx + pshufd $238,%xmm1,%xmm5 + xorl %ebp,%esi + movdqa %xmm4,%xmm9 + paddd %xmm4,%xmm10 + movl %ebx,%edi + addl 16(%rsp),%eax + punpcklqdq %xmm2,%xmm5 + xorl %edx,%ecx + roll $5,%ebx + addl %esi,%eax + psrldq $4,%xmm9 + andl %ecx,%edi + xorl %edx,%ecx + pxor %xmm1,%xmm5 + addl %ebx,%eax + rorl $7,%ebx + pxor %xmm3,%xmm9 + xorl %edx,%edi + movl %eax,%esi + addl 20(%rsp),%ebp + pxor %xmm9,%xmm5 + xorl %ecx,%ebx + roll $5,%eax + movdqa %xmm10,0(%rsp) + addl %edi,%ebp + andl %ebx,%esi + movdqa %xmm5,%xmm8 + xorl %ecx,%ebx + addl %eax,%ebp + rorl $7,%eax + movdqa %xmm5,%xmm9 + xorl %ecx,%esi + pslldq $12,%xmm8 + paddd %xmm5,%xmm5 + movl %ebp,%edi + addl 24(%rsp),%edx + psrld $31,%xmm9 + xorl %ebx,%eax + roll $5,%ebp + addl %esi,%edx + movdqa %xmm8,%xmm10 + andl %eax,%edi + xorl %ebx,%eax + psrld $30,%xmm8 + addl %ebp,%edx + rorl $7,%ebp + por %xmm9,%xmm5 + xorl %ebx,%edi + movl %edx,%esi + addl 28(%rsp),%ecx + pslld $2,%xmm10 + pxor %xmm8,%xmm5 + xorl %eax,%ebp + movdqa -32(%r14),%xmm8 + roll $5,%edx + addl %edi,%ecx + andl %ebp,%esi + pxor %xmm10,%xmm5 + xorl %eax,%ebp + addl %edx,%ecx + rorl $7,%edx + pshufd $238,%xmm2,%xmm6 + xorl %eax,%esi + movdqa %xmm5,%xmm10 + paddd %xmm5,%xmm8 + movl %ecx,%edi + addl 32(%rsp),%ebx + punpcklqdq %xmm3,%xmm6 + xorl %ebp,%edx + roll $5,%ecx + addl %esi,%ebx + psrldq $4,%xmm10 + andl %edx,%edi + xorl %ebp,%edx + pxor %xmm2,%xmm6 + addl %ecx,%ebx + rorl $7,%ecx + pxor %xmm4,%xmm10 + xorl %ebp,%edi + movl %ebx,%esi + addl 36(%rsp),%eax + pxor %xmm10,%xmm6 + xorl %edx,%ecx + roll $5,%ebx + movdqa %xmm8,16(%rsp) + addl %edi,%eax + andl %ecx,%esi + movdqa %xmm6,%xmm9 + xorl %edx,%ecx + addl %ebx,%eax + rorl $7,%ebx + movdqa %xmm6,%xmm10 + xorl %edx,%esi + pslldq $12,%xmm9 + paddd %xmm6,%xmm6 + movl %eax,%edi + addl 40(%rsp),%ebp + psrld $31,%xmm10 + xorl %ecx,%ebx + roll $5,%eax + addl %esi,%ebp + movdqa %xmm9,%xmm8 + andl %ebx,%edi + xorl %ecx,%ebx + psrld $30,%xmm9 + addl %eax,%ebp + rorl $7,%eax + por %xmm10,%xmm6 + xorl %ecx,%edi + movl %ebp,%esi + addl 44(%rsp),%edx + pslld $2,%xmm8 + pxor %xmm9,%xmm6 + xorl %ebx,%eax + movdqa -32(%r14),%xmm9 + roll $5,%ebp + addl %edi,%edx + andl %eax,%esi + pxor %xmm8,%xmm6 + xorl %ebx,%eax + addl %ebp,%edx + rorl $7,%ebp + pshufd $238,%xmm3,%xmm7 + xorl %ebx,%esi + movdqa %xmm6,%xmm8 + paddd %xmm6,%xmm9 + movl %edx,%edi + addl 48(%rsp),%ecx + punpcklqdq %xmm4,%xmm7 + xorl %eax,%ebp + roll $5,%edx + addl %esi,%ecx + psrldq $4,%xmm8 + andl %ebp,%edi + xorl %eax,%ebp + pxor %xmm3,%xmm7 + addl %edx,%ecx + rorl $7,%edx + pxor %xmm5,%xmm8 + xorl %eax,%edi + movl %ecx,%esi + addl 52(%rsp),%ebx + pxor %xmm8,%xmm7 + xorl %ebp,%edx + roll $5,%ecx + movdqa %xmm9,32(%rsp) + addl %edi,%ebx + andl %edx,%esi + movdqa %xmm7,%xmm10 + xorl %ebp,%edx + addl %ecx,%ebx + rorl $7,%ecx + movdqa %xmm7,%xmm8 + xorl %ebp,%esi + pslldq $12,%xmm10 + paddd %xmm7,%xmm7 + movl %ebx,%edi + addl 56(%rsp),%eax + psrld $31,%xmm8 + xorl %edx,%ecx + roll $5,%ebx + addl %esi,%eax + movdqa %xmm10,%xmm9 + andl %ecx,%edi + xorl %edx,%ecx + psrld $30,%xmm10 + addl %ebx,%eax + rorl $7,%ebx + por %xmm8,%xmm7 + xorl %edx,%edi + movl %eax,%esi + addl 60(%rsp),%ebp + pslld $2,%xmm9 + pxor %xmm10,%xmm7 + xorl %ecx,%ebx + movdqa -32(%r14),%xmm10 + roll $5,%eax + addl %edi,%ebp + andl %ebx,%esi + pxor %xmm9,%xmm7 + pshufd $238,%xmm6,%xmm9 + xorl %ecx,%ebx + addl %eax,%ebp + rorl $7,%eax + pxor %xmm4,%xmm0 + xorl %ecx,%esi + movl %ebp,%edi + addl 0(%rsp),%edx + punpcklqdq %xmm7,%xmm9 + xorl %ebx,%eax + roll $5,%ebp + pxor %xmm1,%xmm0 + addl %esi,%edx + andl %eax,%edi + movdqa %xmm10,%xmm8 + xorl %ebx,%eax + paddd %xmm7,%xmm10 + addl %ebp,%edx + pxor %xmm9,%xmm0 + rorl $7,%ebp + xorl %ebx,%edi + movl %edx,%esi + addl 4(%rsp),%ecx + movdqa %xmm0,%xmm9 + xorl %eax,%ebp + roll $5,%edx + movdqa %xmm10,48(%rsp) + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp + pslld $2,%xmm0 + addl %edx,%ecx + rorl $7,%edx + psrld $30,%xmm9 + xorl %eax,%esi + movl %ecx,%edi + addl 8(%rsp),%ebx + por %xmm9,%xmm0 + xorl %ebp,%edx + roll $5,%ecx + pshufd $238,%xmm7,%xmm10 + addl %esi,%ebx + andl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + pxor %xmm5,%xmm1 + addl 16(%rsp),%ebp + xorl %ecx,%esi + punpcklqdq %xmm0,%xmm10 + movl %eax,%edi + roll $5,%eax + pxor %xmm2,%xmm1 + addl %esi,%ebp + xorl %ecx,%edi + movdqa %xmm8,%xmm9 + rorl $7,%ebx + paddd %xmm0,%xmm8 + addl %eax,%ebp + pxor %xmm10,%xmm1 + addl 20(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm1,%xmm10 + addl %edi,%edx + xorl %ebx,%esi + movdqa %xmm8,0(%rsp) + rorl $7,%eax + addl %ebp,%edx + addl 24(%rsp),%ecx + pslld $2,%xmm1 + xorl %eax,%esi + movl %edx,%edi + psrld $30,%xmm10 + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + por %xmm10,%xmm1 + addl %edx,%ecx + addl 28(%rsp),%ebx + pshufd $238,%xmm0,%xmm8 + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + pxor %xmm6,%xmm2 + addl 32(%rsp),%eax + xorl %edx,%esi + punpcklqdq %xmm1,%xmm8 + movl %ebx,%edi + roll $5,%ebx + pxor %xmm3,%xmm2 + addl %esi,%eax + xorl %edx,%edi + movdqa 0(%r14),%xmm10 + rorl $7,%ecx + paddd %xmm1,%xmm9 + addl %ebx,%eax + pxor %xmm8,%xmm2 + addl 36(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + movdqa %xmm2,%xmm8 + addl %edi,%ebp + xorl %ecx,%esi + movdqa %xmm9,16(%rsp) + rorl $7,%ebx + addl %eax,%ebp + addl 40(%rsp),%edx + pslld $2,%xmm2 + xorl %ebx,%esi + movl %ebp,%edi + psrld $30,%xmm8 + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + por %xmm8,%xmm2 + addl %ebp,%edx + addl 44(%rsp),%ecx + pshufd $238,%xmm1,%xmm9 + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + pxor %xmm7,%xmm3 + addl 48(%rsp),%ebx + xorl %ebp,%esi + punpcklqdq %xmm2,%xmm9 + movl %ecx,%edi + roll $5,%ecx + pxor %xmm4,%xmm3 + addl %esi,%ebx + xorl %ebp,%edi + movdqa %xmm10,%xmm8 + rorl $7,%edx + paddd %xmm2,%xmm10 + addl %ecx,%ebx + pxor %xmm9,%xmm3 + addl 52(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm3,%xmm9 + addl %edi,%eax + xorl %edx,%esi + movdqa %xmm10,32(%rsp) + rorl $7,%ecx + addl %ebx,%eax + addl 56(%rsp),%ebp + pslld $2,%xmm3 + xorl %ecx,%esi + movl %eax,%edi + psrld $30,%xmm9 + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + por %xmm9,%xmm3 + addl %eax,%ebp + addl 60(%rsp),%edx + pshufd $238,%xmm2,%xmm10 + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + pxor %xmm0,%xmm4 + addl 0(%rsp),%ecx + xorl %eax,%esi + punpcklqdq %xmm3,%xmm10 + movl %edx,%edi + roll $5,%edx + pxor %xmm5,%xmm4 + addl %esi,%ecx + xorl %eax,%edi + movdqa %xmm8,%xmm9 + rorl $7,%ebp + paddd %xmm3,%xmm8 + addl %edx,%ecx + pxor %xmm10,%xmm4 + addl 4(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm4,%xmm10 + addl %edi,%ebx + xorl %ebp,%esi + movdqa %xmm8,48(%rsp) + rorl $7,%edx + addl %ecx,%ebx + addl 8(%rsp),%eax + pslld $2,%xmm4 + xorl %edx,%esi + movl %ebx,%edi + psrld $30,%xmm10 + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + por %xmm10,%xmm4 + addl %ebx,%eax + addl 12(%rsp),%ebp + pshufd $238,%xmm3,%xmm8 + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + pxor %xmm1,%xmm5 + addl 16(%rsp),%edx + xorl %ebx,%esi + punpcklqdq %xmm4,%xmm8 + movl %ebp,%edi + roll $5,%ebp + pxor %xmm6,%xmm5 + addl %esi,%edx + xorl %ebx,%edi + movdqa %xmm9,%xmm10 + rorl $7,%eax + paddd %xmm4,%xmm9 + addl %ebp,%edx + pxor %xmm8,%xmm5 + addl 20(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + movdqa %xmm5,%xmm8 + addl %edi,%ecx + xorl %eax,%esi + movdqa %xmm9,0(%rsp) + rorl $7,%ebp + addl %edx,%ecx + addl 24(%rsp),%ebx + pslld $2,%xmm5 + xorl %ebp,%esi + movl %ecx,%edi + psrld $30,%xmm8 + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + por %xmm8,%xmm5 + addl %ecx,%ebx + addl 28(%rsp),%eax + pshufd $238,%xmm4,%xmm9 + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%edi + roll $5,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + pxor %xmm2,%xmm6 + addl 32(%rsp),%ebp + andl %ecx,%esi + xorl %edx,%ecx + rorl $7,%ebx + punpcklqdq %xmm5,%xmm9 + movl %eax,%edi + xorl %ecx,%esi + pxor %xmm7,%xmm6 + roll $5,%eax + addl %esi,%ebp + movdqa %xmm10,%xmm8 + xorl %ebx,%edi + paddd %xmm5,%xmm10 + xorl %ecx,%ebx + pxor %xmm9,%xmm6 + addl %eax,%ebp + addl 36(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + rorl $7,%eax + movdqa %xmm6,%xmm9 + movl %ebp,%esi + xorl %ebx,%edi + movdqa %xmm10,16(%rsp) + roll $5,%ebp + addl %edi,%edx + xorl %eax,%esi + pslld $2,%xmm6 + xorl %ebx,%eax + addl %ebp,%edx + psrld $30,%xmm9 + addl 40(%rsp),%ecx + andl %eax,%esi + xorl %ebx,%eax + por %xmm9,%xmm6 + rorl $7,%ebp + movl %edx,%edi + xorl %eax,%esi + roll $5,%edx + pshufd $238,%xmm5,%xmm10 + addl %esi,%ecx + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 44(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + rorl $7,%edx + movl %ecx,%esi + xorl %ebp,%edi + roll $5,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + pxor %xmm3,%xmm7 + addl 48(%rsp),%eax + andl %edx,%esi + xorl %ebp,%edx + rorl $7,%ecx + punpcklqdq %xmm6,%xmm10 + movl %ebx,%edi + xorl %edx,%esi + pxor %xmm0,%xmm7 + roll $5,%ebx + addl %esi,%eax + movdqa 32(%r14),%xmm9 + xorl %ecx,%edi + paddd %xmm6,%xmm8 + xorl %edx,%ecx + pxor %xmm10,%xmm7 + addl %ebx,%eax + addl 52(%rsp),%ebp + andl %ecx,%edi + xorl %edx,%ecx + rorl $7,%ebx + movdqa %xmm7,%xmm10 + movl %eax,%esi + xorl %ecx,%edi + movdqa %xmm8,32(%rsp) + roll $5,%eax + addl %edi,%ebp + xorl %ebx,%esi + pslld $2,%xmm7 + xorl %ecx,%ebx + addl %eax,%ebp + psrld $30,%xmm10 + addl 56(%rsp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + por %xmm10,%xmm7 + rorl $7,%eax + movl %ebp,%edi + xorl %ebx,%esi + roll $5,%ebp + pshufd $238,%xmm6,%xmm8 + addl %esi,%edx + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 60(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + rorl $7,%ebp + movl %edx,%esi + xorl %eax,%edi + roll $5,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + pxor %xmm4,%xmm0 + addl 0(%rsp),%ebx + andl %ebp,%esi + xorl %eax,%ebp + rorl $7,%edx + punpcklqdq %xmm7,%xmm8 + movl %ecx,%edi + xorl %ebp,%esi + pxor %xmm1,%xmm0 + roll $5,%ecx + addl %esi,%ebx + movdqa %xmm9,%xmm10 + xorl %edx,%edi + paddd %xmm7,%xmm9 + xorl %ebp,%edx + pxor %xmm8,%xmm0 + addl %ecx,%ebx + addl 4(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + rorl $7,%ecx + movdqa %xmm0,%xmm8 + movl %ebx,%esi + xorl %edx,%edi + movdqa %xmm9,48(%rsp) + roll $5,%ebx + addl %edi,%eax + xorl %ecx,%esi + pslld $2,%xmm0 + xorl %edx,%ecx + addl %ebx,%eax + psrld $30,%xmm8 + addl 8(%rsp),%ebp + andl %ecx,%esi + xorl %edx,%ecx + por %xmm8,%xmm0 + rorl $7,%ebx + movl %eax,%edi + xorl %ecx,%esi + roll $5,%eax + pshufd $238,%xmm7,%xmm9 + addl %esi,%ebp + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 12(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + rorl $7,%eax + movl %ebp,%esi + xorl %ebx,%edi + roll $5,%ebp + addl %edi,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + pxor %xmm5,%xmm1 + addl 16(%rsp),%ecx + andl %eax,%esi + xorl %ebx,%eax + rorl $7,%ebp + punpcklqdq %xmm0,%xmm9 + movl %edx,%edi + xorl %eax,%esi + pxor %xmm2,%xmm1 + roll $5,%edx + addl %esi,%ecx + movdqa %xmm10,%xmm8 + xorl %ebp,%edi + paddd %xmm0,%xmm10 + xorl %eax,%ebp + pxor %xmm9,%xmm1 + addl %edx,%ecx + addl 20(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + rorl $7,%edx + movdqa %xmm1,%xmm9 + movl %ecx,%esi + xorl %ebp,%edi + movdqa %xmm10,0(%rsp) + roll $5,%ecx + addl %edi,%ebx + xorl %edx,%esi + pslld $2,%xmm1 + xorl %ebp,%edx + addl %ecx,%ebx + psrld $30,%xmm9 + addl 24(%rsp),%eax + andl %edx,%esi + xorl %ebp,%edx + por %xmm9,%xmm1 + rorl $7,%ecx + movl %ebx,%edi + xorl %edx,%esi + roll $5,%ebx + pshufd $238,%xmm0,%xmm10 + addl %esi,%eax + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 28(%rsp),%ebp + andl %ecx,%edi + xorl %edx,%ecx + rorl $7,%ebx + movl %eax,%esi + xorl %ecx,%edi + roll $5,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + pxor %xmm6,%xmm2 + addl 32(%rsp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + rorl $7,%eax + punpcklqdq %xmm1,%xmm10 + movl %ebp,%edi + xorl %ebx,%esi + pxor %xmm3,%xmm2 + roll $5,%ebp + addl %esi,%edx + movdqa %xmm8,%xmm9 + xorl %eax,%edi + paddd %xmm1,%xmm8 + xorl %ebx,%eax + pxor %xmm10,%xmm2 + addl %ebp,%edx + addl 36(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + rorl $7,%ebp + movdqa %xmm2,%xmm10 + movl %edx,%esi + xorl %eax,%edi + movdqa %xmm8,16(%rsp) + roll $5,%edx + addl %edi,%ecx + xorl %ebp,%esi + pslld $2,%xmm2 + xorl %eax,%ebp + addl %edx,%ecx + psrld $30,%xmm10 + addl 40(%rsp),%ebx + andl %ebp,%esi + xorl %eax,%ebp + por %xmm10,%xmm2 + rorl $7,%edx + movl %ecx,%edi + xorl %ebp,%esi + roll $5,%ecx + pshufd $238,%xmm1,%xmm8 + addl %esi,%ebx + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 44(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%edi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + addl %ebx,%eax + pxor %xmm7,%xmm3 + addl 48(%rsp),%ebp + xorl %ecx,%esi + punpcklqdq %xmm2,%xmm8 + movl %eax,%edi + roll $5,%eax + pxor %xmm4,%xmm3 + addl %esi,%ebp + xorl %ecx,%edi + movdqa %xmm9,%xmm10 + rorl $7,%ebx + paddd %xmm2,%xmm9 + addl %eax,%ebp + pxor %xmm8,%xmm3 + addl 52(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm3,%xmm8 + addl %edi,%edx + xorl %ebx,%esi + movdqa %xmm9,32(%rsp) + rorl $7,%eax + addl %ebp,%edx + addl 56(%rsp),%ecx + pslld $2,%xmm3 + xorl %eax,%esi + movl %edx,%edi + psrld $30,%xmm8 + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + por %xmm8,%xmm3 + addl %edx,%ecx + addl 60(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 0(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + paddd %xmm3,%xmm10 + addl %esi,%eax + xorl %edx,%edi + movdqa %xmm10,48(%rsp) + rorl $7,%ecx + addl %ebx,%eax + addl 4(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 8(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 12(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + cmpq %r10,%r9 + je .Ldone_ssse3 + movdqa 64(%r14),%xmm6 + movdqa -64(%r14),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r9 + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi +.byte 102,15,56,0,206 + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + paddd %xmm9,%xmm0 + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + movdqa %xmm0,0(%rsp) + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + psubd %xmm9,%xmm0 + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi +.byte 102,15,56,0,214 + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + paddd %xmm9,%xmm1 + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + movdqa %xmm1,16(%rsp) + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + psubd %xmm9,%xmm1 + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi +.byte 102,15,56,0,222 + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + paddd %xmm9,%xmm2 + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + movdqa %xmm2,32(%rsp) + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + psubd %xmm9,%xmm2 + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + addl 12(%r8),%edx + movl %eax,0(%r8) + addl 16(%r8),%ebp + movl %esi,4(%r8) + movl %esi,%ebx + movl %ecx,8(%r8) + movl %ecx,%edi + movl %edx,12(%r8) + xorl %edx,%edi + movl %ebp,16(%r8) + andl %edi,%esi + jmp .Loop_ssse3 + +.p2align 4 +.Ldone_ssse3: + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + movl %eax,0(%r8) + addl 12(%r8),%edx + movl %esi,4(%r8) + addl 16(%r8),%ebp + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + movaps -40-96(%r11),%xmm6 + movaps -40-80(%r11),%xmm7 + movaps -40-64(%r11),%xmm8 + movaps -40-48(%r11),%xmm9 + movaps -40-32(%r11),%xmm10 + movaps -40-16(%r11),%xmm11 + movq -40(%r11),%r14 + + movq -32(%r11),%r13 + + movq -24(%r11),%r12 + + movq -16(%r11),%rbp + + movq -8(%r11),%rbx + + leaq (%r11),%rsp + +.Lepilogue_ssse3: + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + .byte 0xf3,0xc3 + +.LSEH_end_sha1_block_data_order_ssse3: +.def sha1_block_data_order_avx; .scl 3; .type 32; .endef +.p2align 4 +sha1_block_data_order_avx: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_sha1_block_data_order_avx: + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + +_avx_shortcut: + + movq %rsp,%r11 + + pushq %rbx + + pushq %rbp + + pushq %r12 + + pushq %r13 + + pushq %r14 + + leaq -160(%rsp),%rsp + vzeroupper + vmovaps %xmm6,-40-96(%r11) + vmovaps %xmm7,-40-80(%r11) + vmovaps %xmm8,-40-64(%r11) + vmovaps %xmm9,-40-48(%r11) + vmovaps %xmm10,-40-32(%r11) + vmovaps %xmm11,-40-16(%r11) +.Lprologue_avx: + andq $-64,%rsp + movq %rdi,%r8 + movq %rsi,%r9 + movq %rdx,%r10 + + shlq $6,%r10 + addq %r9,%r10 + leaq K_XX_XX+64(%rip),%r14 + + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movl %ebx,%esi + movl 16(%r8),%ebp + movl %ecx,%edi + xorl %edx,%edi + andl %edi,%esi + + vmovdqa 64(%r14),%xmm6 + vmovdqa -64(%r14),%xmm11 + vmovdqu 0(%r9),%xmm0 + vmovdqu 16(%r9),%xmm1 + vmovdqu 32(%r9),%xmm2 + vmovdqu 48(%r9),%xmm3 + vpshufb %xmm6,%xmm0,%xmm0 + addq $64,%r9 + vpshufb %xmm6,%xmm1,%xmm1 + vpshufb %xmm6,%xmm2,%xmm2 + vpshufb %xmm6,%xmm3,%xmm3 + vpaddd %xmm11,%xmm0,%xmm4 + vpaddd %xmm11,%xmm1,%xmm5 + vpaddd %xmm11,%xmm2,%xmm6 + vmovdqa %xmm4,0(%rsp) + vmovdqa %xmm5,16(%rsp) + vmovdqa %xmm6,32(%rsp) + jmp .Loop_avx +.p2align 4 +.Loop_avx: + shrdl $2,%ebx,%ebx + xorl %edx,%esi + vpalignr $8,%xmm0,%xmm1,%xmm4 + movl %eax,%edi + addl 0(%rsp),%ebp + vpaddd %xmm3,%xmm11,%xmm9 + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpsrldq $4,%xmm3,%xmm8 + addl %esi,%ebp + andl %ebx,%edi + vpxor %xmm0,%xmm4,%xmm4 + xorl %ecx,%ebx + addl %eax,%ebp + vpxor %xmm2,%xmm8,%xmm8 + shrdl $7,%eax,%eax + xorl %ecx,%edi + movl %ebp,%esi + addl 4(%rsp),%edx + vpxor %xmm8,%xmm4,%xmm4 + xorl %ebx,%eax + shldl $5,%ebp,%ebp + vmovdqa %xmm9,48(%rsp) + addl %edi,%edx + andl %eax,%esi + vpsrld $31,%xmm4,%xmm8 + xorl %ebx,%eax + addl %ebp,%edx + shrdl $7,%ebp,%ebp + xorl %ebx,%esi + vpslldq $12,%xmm4,%xmm10 + vpaddd %xmm4,%xmm4,%xmm4 + movl %edx,%edi + addl 8(%rsp),%ecx + xorl %eax,%ebp + shldl $5,%edx,%edx + vpsrld $30,%xmm10,%xmm9 + vpor %xmm8,%xmm4,%xmm4 + addl %esi,%ecx + andl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + vpslld $2,%xmm10,%xmm10 + vpxor %xmm9,%xmm4,%xmm4 + shrdl $7,%edx,%edx + xorl %eax,%edi + movl %ecx,%esi + addl 12(%rsp),%ebx + vpxor %xmm10,%xmm4,%xmm4 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + addl %edi,%ebx + andl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + shrdl $7,%ecx,%ecx + xorl %ebp,%esi + vpalignr $8,%xmm1,%xmm2,%xmm5 + movl %ebx,%edi + addl 16(%rsp),%eax + vpaddd %xmm4,%xmm11,%xmm9 + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vpsrldq $4,%xmm4,%xmm8 + addl %esi,%eax + andl %ecx,%edi + vpxor %xmm1,%xmm5,%xmm5 + xorl %edx,%ecx + addl %ebx,%eax + vpxor %xmm3,%xmm8,%xmm8 + shrdl $7,%ebx,%ebx + xorl %edx,%edi + movl %eax,%esi + addl 20(%rsp),%ebp + vpxor %xmm8,%xmm5,%xmm5 + xorl %ecx,%ebx + shldl $5,%eax,%eax + vmovdqa %xmm9,0(%rsp) + addl %edi,%ebp + andl %ebx,%esi + vpsrld $31,%xmm5,%xmm8 + xorl %ecx,%ebx + addl %eax,%ebp + shrdl $7,%eax,%eax + xorl %ecx,%esi + vpslldq $12,%xmm5,%xmm10 + vpaddd %xmm5,%xmm5,%xmm5 + movl %ebp,%edi + addl 24(%rsp),%edx + xorl %ebx,%eax + shldl $5,%ebp,%ebp + vpsrld $30,%xmm10,%xmm9 + vpor %xmm8,%xmm5,%xmm5 + addl %esi,%edx + andl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + vpslld $2,%xmm10,%xmm10 + vpxor %xmm9,%xmm5,%xmm5 + shrdl $7,%ebp,%ebp + xorl %ebx,%edi + movl %edx,%esi + addl 28(%rsp),%ecx + vpxor %xmm10,%xmm5,%xmm5 + xorl %eax,%ebp + shldl $5,%edx,%edx + vmovdqa -32(%r14),%xmm11 + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + shrdl $7,%edx,%edx + xorl %eax,%esi + vpalignr $8,%xmm2,%xmm3,%xmm6 + movl %ecx,%edi + addl 32(%rsp),%ebx + vpaddd %xmm5,%xmm11,%xmm9 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + vpsrldq $4,%xmm5,%xmm8 + addl %esi,%ebx + andl %edx,%edi + vpxor %xmm2,%xmm6,%xmm6 + xorl %ebp,%edx + addl %ecx,%ebx + vpxor %xmm4,%xmm8,%xmm8 + shrdl $7,%ecx,%ecx + xorl %ebp,%edi + movl %ebx,%esi + addl 36(%rsp),%eax + vpxor %xmm8,%xmm6,%xmm6 + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vmovdqa %xmm9,16(%rsp) + addl %edi,%eax + andl %ecx,%esi + vpsrld $31,%xmm6,%xmm8 + xorl %edx,%ecx + addl %ebx,%eax + shrdl $7,%ebx,%ebx + xorl %edx,%esi + vpslldq $12,%xmm6,%xmm10 + vpaddd %xmm6,%xmm6,%xmm6 + movl %eax,%edi + addl 40(%rsp),%ebp + xorl %ecx,%ebx + shldl $5,%eax,%eax + vpsrld $30,%xmm10,%xmm9 + vpor %xmm8,%xmm6,%xmm6 + addl %esi,%ebp + andl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + vpslld $2,%xmm10,%xmm10 + vpxor %xmm9,%xmm6,%xmm6 + shrdl $7,%eax,%eax + xorl %ecx,%edi + movl %ebp,%esi + addl 44(%rsp),%edx + vpxor %xmm10,%xmm6,%xmm6 + xorl %ebx,%eax + shldl $5,%ebp,%ebp + addl %edi,%edx + andl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + shrdl $7,%ebp,%ebp + xorl %ebx,%esi + vpalignr $8,%xmm3,%xmm4,%xmm7 + movl %edx,%edi + addl 48(%rsp),%ecx + vpaddd %xmm6,%xmm11,%xmm9 + xorl %eax,%ebp + shldl $5,%edx,%edx + vpsrldq $4,%xmm6,%xmm8 + addl %esi,%ecx + andl %ebp,%edi + vpxor %xmm3,%xmm7,%xmm7 + xorl %eax,%ebp + addl %edx,%ecx + vpxor %xmm5,%xmm8,%xmm8 + shrdl $7,%edx,%edx + xorl %eax,%edi + movl %ecx,%esi + addl 52(%rsp),%ebx + vpxor %xmm8,%xmm7,%xmm7 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + vmovdqa %xmm9,32(%rsp) + addl %edi,%ebx + andl %edx,%esi + vpsrld $31,%xmm7,%xmm8 + xorl %ebp,%edx + addl %ecx,%ebx + shrdl $7,%ecx,%ecx + xorl %ebp,%esi + vpslldq $12,%xmm7,%xmm10 + vpaddd %xmm7,%xmm7,%xmm7 + movl %ebx,%edi + addl 56(%rsp),%eax + xorl %edx,%ecx + shldl $5,%ebx,%ebx + vpsrld $30,%xmm10,%xmm9 + vpor %xmm8,%xmm7,%xmm7 + addl %esi,%eax + andl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + vpslld $2,%xmm10,%xmm10 + vpxor %xmm9,%xmm7,%xmm7 + shrdl $7,%ebx,%ebx + xorl %edx,%edi + movl %eax,%esi + addl 60(%rsp),%ebp + vpxor %xmm10,%xmm7,%xmm7 + xorl %ecx,%ebx + shldl $5,%eax,%eax + addl %edi,%ebp + andl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + vpalignr $8,%xmm6,%xmm7,%xmm8 + vpxor %xmm4,%xmm0,%xmm0 + shrdl $7,%eax,%eax + xorl %ecx,%esi + movl %ebp,%edi + addl 0(%rsp),%edx + vpxor %xmm1,%xmm0,%xmm0 + xorl %ebx,%eax + shldl $5,%ebp,%ebp + vpaddd %xmm7,%xmm11,%xmm9 + addl %esi,%edx + andl %eax,%edi + vpxor %xmm8,%xmm0,%xmm0 + xorl %ebx,%eax + addl %ebp,%edx + shrdl $7,%ebp,%ebp + xorl %ebx,%edi + vpsrld $30,%xmm0,%xmm8 + vmovdqa %xmm9,48(%rsp) + movl %edx,%esi + addl 4(%rsp),%ecx + xorl %eax,%ebp + shldl $5,%edx,%edx + vpslld $2,%xmm0,%xmm0 + addl %edi,%ecx + andl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + shrdl $7,%edx,%edx + xorl %eax,%esi + movl %ecx,%edi + addl 8(%rsp),%ebx + vpor %xmm8,%xmm0,%xmm0 + xorl %ebp,%edx + shldl $5,%ecx,%ecx + addl %esi,%ebx + andl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpalignr $8,%xmm7,%xmm0,%xmm8 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + vpxor %xmm2,%xmm1,%xmm1 + addl %esi,%ebp + xorl %ecx,%edi + vpaddd %xmm0,%xmm11,%xmm9 + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpxor %xmm8,%xmm1,%xmm1 + addl 20(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + vpsrld $30,%xmm1,%xmm8 + vmovdqa %xmm9,0(%rsp) + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpslld $2,%xmm1,%xmm1 + addl 24(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpor %xmm8,%xmm1,%xmm1 + addl 28(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpalignr $8,%xmm0,%xmm1,%xmm8 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + vpxor %xmm3,%xmm2,%xmm2 + addl %esi,%eax + xorl %edx,%edi + vpaddd %xmm1,%xmm11,%xmm9 + vmovdqa 0(%r14),%xmm11 + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpxor %xmm8,%xmm2,%xmm2 + addl 36(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + vpsrld $30,%xmm2,%xmm8 + vmovdqa %xmm9,16(%rsp) + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpslld $2,%xmm2,%xmm2 + addl 40(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpor %xmm8,%xmm2,%xmm2 + addl 44(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpalignr $8,%xmm1,%xmm2,%xmm8 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + vpxor %xmm4,%xmm3,%xmm3 + addl %esi,%ebx + xorl %ebp,%edi + vpaddd %xmm2,%xmm11,%xmm9 + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpxor %xmm8,%xmm3,%xmm3 + addl 52(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + vpsrld $30,%xmm3,%xmm8 + vmovdqa %xmm9,32(%rsp) + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpslld $2,%xmm3,%xmm3 + addl 56(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ecx,%edi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpor %xmm8,%xmm3,%xmm3 + addl 60(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpalignr $8,%xmm2,%xmm3,%xmm8 + vpxor %xmm0,%xmm4,%xmm4 + addl 0(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + vpxor %xmm5,%xmm4,%xmm4 + addl %esi,%ecx + xorl %eax,%edi + vpaddd %xmm3,%xmm11,%xmm9 + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpxor %xmm8,%xmm4,%xmm4 + addl 4(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + vpsrld $30,%xmm4,%xmm8 + vmovdqa %xmm9,48(%rsp) + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpslld $2,%xmm4,%xmm4 + addl 8(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vpor %xmm8,%xmm4,%xmm4 + addl 12(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpalignr $8,%xmm3,%xmm4,%xmm8 + vpxor %xmm1,%xmm5,%xmm5 + addl 16(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + vpxor %xmm6,%xmm5,%xmm5 + addl %esi,%edx + xorl %ebx,%edi + vpaddd %xmm4,%xmm11,%xmm9 + shrdl $7,%eax,%eax + addl %ebp,%edx + vpxor %xmm8,%xmm5,%xmm5 + addl 20(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + vpsrld $30,%xmm5,%xmm8 + vmovdqa %xmm9,0(%rsp) + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpslld $2,%xmm5,%xmm5 + addl 24(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vpor %xmm8,%xmm5,%xmm5 + addl 28(%rsp),%eax + shrdl $7,%ecx,%ecx + movl %ebx,%esi + xorl %edx,%edi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + vpalignr $8,%xmm4,%xmm5,%xmm8 + vpxor %xmm2,%xmm6,%xmm6 + addl 32(%rsp),%ebp + andl %ecx,%esi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + vpxor %xmm7,%xmm6,%xmm6 + movl %eax,%edi + xorl %ecx,%esi + vpaddd %xmm5,%xmm11,%xmm9 + shldl $5,%eax,%eax + addl %esi,%ebp + vpxor %xmm8,%xmm6,%xmm6 + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 36(%rsp),%edx + vpsrld $30,%xmm6,%xmm8 + vmovdqa %xmm9,16(%rsp) + andl %ebx,%edi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %ebp,%esi + vpslld $2,%xmm6,%xmm6 + xorl %ebx,%edi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + addl 40(%rsp),%ecx + andl %eax,%esi + vpor %xmm8,%xmm6,%xmm6 + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + movl %edx,%edi + xorl %eax,%esi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 44(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + shrdl $7,%edx,%edx + movl %ecx,%esi + xorl %ebp,%edi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + vpalignr $8,%xmm5,%xmm6,%xmm8 + vpxor %xmm3,%xmm7,%xmm7 + addl 48(%rsp),%eax + andl %edx,%esi + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + vpxor %xmm0,%xmm7,%xmm7 + movl %ebx,%edi + xorl %edx,%esi + vpaddd %xmm6,%xmm11,%xmm9 + vmovdqa 32(%r14),%xmm11 + shldl $5,%ebx,%ebx + addl %esi,%eax + vpxor %xmm8,%xmm7,%xmm7 + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 52(%rsp),%ebp + vpsrld $30,%xmm7,%xmm8 + vmovdqa %xmm9,32(%rsp) + andl %ecx,%edi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%esi + vpslld $2,%xmm7,%xmm7 + xorl %ecx,%edi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + addl 56(%rsp),%edx + andl %ebx,%esi + vpor %xmm8,%xmm7,%xmm7 + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %ebp,%edi + xorl %ebx,%esi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 60(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + movl %edx,%esi + xorl %eax,%edi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + vpalignr $8,%xmm6,%xmm7,%xmm8 + vpxor %xmm4,%xmm0,%xmm0 + addl 0(%rsp),%ebx + andl %ebp,%esi + xorl %eax,%ebp + shrdl $7,%edx,%edx + vpxor %xmm1,%xmm0,%xmm0 + movl %ecx,%edi + xorl %ebp,%esi + vpaddd %xmm7,%xmm11,%xmm9 + shldl $5,%ecx,%ecx + addl %esi,%ebx + vpxor %xmm8,%xmm0,%xmm0 + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 4(%rsp),%eax + vpsrld $30,%xmm0,%xmm8 + vmovdqa %xmm9,48(%rsp) + andl %edx,%edi + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%esi + vpslld $2,%xmm0,%xmm0 + xorl %edx,%edi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + addl 8(%rsp),%ebp + andl %ecx,%esi + vpor %xmm8,%xmm0,%xmm0 + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%edi + xorl %ecx,%esi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 12(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + movl %ebp,%esi + xorl %ebx,%edi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + vpalignr $8,%xmm7,%xmm0,%xmm8 + vpxor %xmm5,%xmm1,%xmm1 + addl 16(%rsp),%ecx + andl %eax,%esi + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + vpxor %xmm2,%xmm1,%xmm1 + movl %edx,%edi + xorl %eax,%esi + vpaddd %xmm0,%xmm11,%xmm9 + shldl $5,%edx,%edx + addl %esi,%ecx + vpxor %xmm8,%xmm1,%xmm1 + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 20(%rsp),%ebx + vpsrld $30,%xmm1,%xmm8 + vmovdqa %xmm9,0(%rsp) + andl %ebp,%edi + xorl %eax,%ebp + shrdl $7,%edx,%edx + movl %ecx,%esi + vpslld $2,%xmm1,%xmm1 + xorl %ebp,%edi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + addl 24(%rsp),%eax + andl %edx,%esi + vpor %xmm8,%xmm1,%xmm1 + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%edi + xorl %edx,%esi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 28(%rsp),%ebp + andl %ecx,%edi + xorl %edx,%ecx + shrdl $7,%ebx,%ebx + movl %eax,%esi + xorl %ecx,%edi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + vpalignr $8,%xmm0,%xmm1,%xmm8 + vpxor %xmm6,%xmm2,%xmm2 + addl 32(%rsp),%edx + andl %ebx,%esi + xorl %ecx,%ebx + shrdl $7,%eax,%eax + vpxor %xmm3,%xmm2,%xmm2 + movl %ebp,%edi + xorl %ebx,%esi + vpaddd %xmm1,%xmm11,%xmm9 + shldl $5,%ebp,%ebp + addl %esi,%edx + vpxor %xmm8,%xmm2,%xmm2 + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 36(%rsp),%ecx + vpsrld $30,%xmm2,%xmm8 + vmovdqa %xmm9,16(%rsp) + andl %eax,%edi + xorl %ebx,%eax + shrdl $7,%ebp,%ebp + movl %edx,%esi + vpslld $2,%xmm2,%xmm2 + xorl %eax,%edi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + addl 40(%rsp),%ebx + andl %ebp,%esi + vpor %xmm8,%xmm2,%xmm2 + xorl %eax,%ebp + shrdl $7,%edx,%edx + movl %ecx,%edi + xorl %ebp,%esi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 44(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + shrdl $7,%ecx,%ecx + movl %ebx,%esi + xorl %edx,%edi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + addl %ebx,%eax + vpalignr $8,%xmm1,%xmm2,%xmm8 + vpxor %xmm7,%xmm3,%xmm3 + addl 48(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + vpxor %xmm4,%xmm3,%xmm3 + addl %esi,%ebp + xorl %ecx,%edi + vpaddd %xmm2,%xmm11,%xmm9 + shrdl $7,%ebx,%ebx + addl %eax,%ebp + vpxor %xmm8,%xmm3,%xmm3 + addl 52(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + vpsrld $30,%xmm3,%xmm8 + vmovdqa %xmm9,32(%rsp) + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + vpslld $2,%xmm3,%xmm3 + addl 56(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vpor %xmm8,%xmm3,%xmm3 + addl 60(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 0(%rsp),%eax + vpaddd %xmm3,%xmm11,%xmm9 + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + vmovdqa %xmm9,48(%rsp) + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 4(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 8(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 12(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + cmpq %r10,%r9 + je .Ldone_avx + vmovdqa 64(%r14),%xmm6 + vmovdqa -64(%r14),%xmm11 + vmovdqu 0(%r9),%xmm0 + vmovdqu 16(%r9),%xmm1 + vmovdqu 32(%r9),%xmm2 + vmovdqu 48(%r9),%xmm3 + vpshufb %xmm6,%xmm0,%xmm0 + addq $64,%r9 + addl 16(%rsp),%ebx + xorl %ebp,%esi + vpshufb %xmm6,%xmm1,%xmm1 + movl %ecx,%edi + shldl $5,%ecx,%ecx + vpaddd %xmm11,%xmm0,%xmm4 + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + vmovdqa %xmm4,0(%rsp) + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ecx,%edi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + vpshufb %xmm6,%xmm2,%xmm2 + movl %edx,%edi + shldl $5,%edx,%edx + vpaddd %xmm11,%xmm1,%xmm5 + addl %esi,%ecx + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + vmovdqa %xmm5,16(%rsp) + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + vpshufb %xmm6,%xmm3,%xmm3 + movl %ebp,%edi + shldl $5,%ebp,%ebp + vpaddd %xmm11,%xmm2,%xmm6 + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + vmovdqa %xmm6,32(%rsp) + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + addl 12(%r8),%edx + movl %eax,0(%r8) + addl 16(%r8),%ebp + movl %esi,4(%r8) + movl %esi,%ebx + movl %ecx,8(%r8) + movl %ecx,%edi + movl %edx,12(%r8) + xorl %edx,%edi + movl %ebp,16(%r8) + andl %edi,%esi + jmp .Loop_avx + +.p2align 4 +.Ldone_avx: + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + xorl %edx,%esi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + shldl $5,%eax,%eax + addl %esi,%ebp + xorl %ecx,%edi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + shldl $5,%ebp,%ebp + addl %edi,%edx + xorl %ebx,%esi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + shldl $5,%edx,%edx + addl %esi,%ecx + xorl %eax,%edi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + shldl $5,%ecx,%ecx + addl %edi,%ebx + xorl %ebp,%esi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + shldl $5,%ebx,%ebx + addl %esi,%eax + xorl %edx,%edi + shrdl $7,%ecx,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + shldl $5,%eax,%eax + addl %edi,%ebp + xorl %ecx,%esi + shrdl $7,%ebx,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + shldl $5,%ebp,%ebp + addl %esi,%edx + xorl %ebx,%edi + shrdl $7,%eax,%eax + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + shldl $5,%edx,%edx + addl %edi,%ecx + xorl %eax,%esi + shrdl $7,%ebp,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + shldl $5,%ecx,%ecx + addl %esi,%ebx + xorl %ebp,%edi + shrdl $7,%edx,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + shldl $5,%ebx,%ebx + addl %edi,%eax + shrdl $7,%ecx,%ecx + addl %ebx,%eax + vzeroupper + + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + movl %eax,0(%r8) + addl 12(%r8),%edx + movl %esi,4(%r8) + addl 16(%r8),%ebp + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + movaps -40-96(%r11),%xmm6 + movaps -40-80(%r11),%xmm7 + movaps -40-64(%r11),%xmm8 + movaps -40-48(%r11),%xmm9 + movaps -40-32(%r11),%xmm10 + movaps -40-16(%r11),%xmm11 + movq -40(%r11),%r14 + + movq -32(%r11),%r13 + + movq -24(%r11),%r12 + + movq -16(%r11),%rbp + + movq -8(%r11),%rbx + + leaq (%r11),%rsp + +.Lepilogue_avx: + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + .byte 0xf3,0xc3 + +.LSEH_end_sha1_block_data_order_avx: +.def sha1_block_data_order_avx2; .scl 3; .type 32; .endef +.p2align 4 +sha1_block_data_order_avx2: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_sha1_block_data_order_avx2: + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + +_avx2_shortcut: + + movq %rsp,%r11 + + pushq %rbx + + pushq %rbp + + pushq %r12 + + pushq %r13 + + pushq %r14 + + vzeroupper + leaq -96(%rsp),%rsp + vmovaps %xmm6,-40-96(%r11) + vmovaps %xmm7,-40-80(%r11) + vmovaps %xmm8,-40-64(%r11) + vmovaps %xmm9,-40-48(%r11) + vmovaps %xmm10,-40-32(%r11) + vmovaps %xmm11,-40-16(%r11) +.Lprologue_avx2: + movq %rdi,%r8 + movq %rsi,%r9 + movq %rdx,%r10 + + leaq -640(%rsp),%rsp + shlq $6,%r10 + leaq 64(%r9),%r13 + andq $-128,%rsp + addq %r9,%r10 + leaq K_XX_XX+64(%rip),%r14 + + movl 0(%r8),%eax + cmpq %r10,%r13 + cmovaeq %r9,%r13 + movl 4(%r8),%ebp + movl 8(%r8),%ecx + movl 12(%r8),%edx + movl 16(%r8),%esi + vmovdqu 64(%r14),%ymm6 + + vmovdqu (%r9),%xmm0 + vmovdqu 16(%r9),%xmm1 + vmovdqu 32(%r9),%xmm2 + vmovdqu 48(%r9),%xmm3 + leaq 64(%r9),%r9 + vinserti128 $1,(%r13),%ymm0,%ymm0 + vinserti128 $1,16(%r13),%ymm1,%ymm1 + vpshufb %ymm6,%ymm0,%ymm0 + vinserti128 $1,32(%r13),%ymm2,%ymm2 + vpshufb %ymm6,%ymm1,%ymm1 + vinserti128 $1,48(%r13),%ymm3,%ymm3 + vpshufb %ymm6,%ymm2,%ymm2 + vmovdqu -64(%r14),%ymm11 + vpshufb %ymm6,%ymm3,%ymm3 + + vpaddd %ymm11,%ymm0,%ymm4 + vpaddd %ymm11,%ymm1,%ymm5 + vmovdqu %ymm4,0(%rsp) + vpaddd %ymm11,%ymm2,%ymm6 + vmovdqu %ymm5,32(%rsp) + vpaddd %ymm11,%ymm3,%ymm7 + vmovdqu %ymm6,64(%rsp) + vmovdqu %ymm7,96(%rsp) + vpalignr $8,%ymm0,%ymm1,%ymm4 + vpsrldq $4,%ymm3,%ymm8 + vpxor %ymm0,%ymm4,%ymm4 + vpxor %ymm2,%ymm8,%ymm8 + vpxor %ymm8,%ymm4,%ymm4 + vpsrld $31,%ymm4,%ymm8 + vpslldq $12,%ymm4,%ymm10 + vpaddd %ymm4,%ymm4,%ymm4 + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm4,%ymm4 + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm4,%ymm4 + vpxor %ymm10,%ymm4,%ymm4 + vpaddd %ymm11,%ymm4,%ymm9 + vmovdqu %ymm9,128(%rsp) + vpalignr $8,%ymm1,%ymm2,%ymm5 + vpsrldq $4,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm3,%ymm8,%ymm8 + vpxor %ymm8,%ymm5,%ymm5 + vpsrld $31,%ymm5,%ymm8 + vmovdqu -32(%r14),%ymm11 + vpslldq $12,%ymm5,%ymm10 + vpaddd %ymm5,%ymm5,%ymm5 + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm5,%ymm5 + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm5,%ymm5 + vpxor %ymm10,%ymm5,%ymm5 + vpaddd %ymm11,%ymm5,%ymm9 + vmovdqu %ymm9,160(%rsp) + vpalignr $8,%ymm2,%ymm3,%ymm6 + vpsrldq $4,%ymm5,%ymm8 + vpxor %ymm2,%ymm6,%ymm6 + vpxor %ymm4,%ymm8,%ymm8 + vpxor %ymm8,%ymm6,%ymm6 + vpsrld $31,%ymm6,%ymm8 + vpslldq $12,%ymm6,%ymm10 + vpaddd %ymm6,%ymm6,%ymm6 + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm6,%ymm6 + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm6,%ymm6 + vpxor %ymm10,%ymm6,%ymm6 + vpaddd %ymm11,%ymm6,%ymm9 + vmovdqu %ymm9,192(%rsp) + vpalignr $8,%ymm3,%ymm4,%ymm7 + vpsrldq $4,%ymm6,%ymm8 + vpxor %ymm3,%ymm7,%ymm7 + vpxor %ymm5,%ymm8,%ymm8 + vpxor %ymm8,%ymm7,%ymm7 + vpsrld $31,%ymm7,%ymm8 + vpslldq $12,%ymm7,%ymm10 + vpaddd %ymm7,%ymm7,%ymm7 + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm7,%ymm7 + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm7,%ymm7 + vpxor %ymm10,%ymm7,%ymm7 + vpaddd %ymm11,%ymm7,%ymm9 + vmovdqu %ymm9,224(%rsp) + leaq 128(%rsp),%r13 + jmp .Loop_avx2 +.p2align 5 +.Loop_avx2: + rorxl $2,%ebp,%ebx + andnl %edx,%ebp,%edi + andl %ecx,%ebp + xorl %edi,%ebp + jmp .Lalign32_1 +.p2align 5 +.Lalign32_1: + vpalignr $8,%ymm6,%ymm7,%ymm8 + vpxor %ymm4,%ymm0,%ymm0 + addl -128(%r13),%esi + andnl %ecx,%eax,%edi + vpxor %ymm1,%ymm0,%ymm0 + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + vpxor %ymm8,%ymm0,%ymm0 + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + vpsrld $30,%ymm0,%ymm8 + vpslld $2,%ymm0,%ymm0 + addl -124(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + vpor %ymm8,%ymm0,%ymm0 + addl %r12d,%edx + xorl %edi,%esi + addl -120(%r13),%ecx + andnl %ebp,%edx,%edi + vpaddd %ymm11,%ymm0,%ymm9 + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + vmovdqu %ymm9,256(%rsp) + addl %r12d,%ecx + xorl %edi,%edx + addl -116(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + addl -96(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + vpalignr $8,%ymm7,%ymm0,%ymm8 + vpxor %ymm5,%ymm1,%ymm1 + addl -92(%r13),%eax + andnl %edx,%ebp,%edi + vpxor %ymm2,%ymm1,%ymm1 + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + vpxor %ymm8,%ymm1,%ymm1 + andl %ecx,%ebp + addl %r12d,%eax + xorl %edi,%ebp + vpsrld $30,%ymm1,%ymm8 + vpslld $2,%ymm1,%ymm1 + addl -88(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + vpor %ymm8,%ymm1,%ymm1 + addl %r12d,%esi + xorl %edi,%eax + addl -84(%r13),%edx + andnl %ebx,%esi,%edi + vpaddd %ymm11,%ymm1,%ymm9 + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + vmovdqu %ymm9,288(%rsp) + addl %r12d,%edx + xorl %edi,%esi + addl -64(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + addl -60(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + vpalignr $8,%ymm0,%ymm1,%ymm8 + vpxor %ymm6,%ymm2,%ymm2 + addl -56(%r13),%ebp + andnl %esi,%ebx,%edi + vpxor %ymm3,%ymm2,%ymm2 + vmovdqu 0(%r14),%ymm11 + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + vpxor %ymm8,%ymm2,%ymm2 + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + vpsrld $30,%ymm2,%ymm8 + vpslld $2,%ymm2,%ymm2 + addl -52(%r13),%eax + andnl %edx,%ebp,%edi + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + vpor %ymm8,%ymm2,%ymm2 + addl %r12d,%eax + xorl %edi,%ebp + addl -32(%r13),%esi + andnl %ecx,%eax,%edi + vpaddd %ymm11,%ymm2,%ymm9 + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + vmovdqu %ymm9,320(%rsp) + addl %r12d,%esi + xorl %edi,%eax + addl -28(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + addl %r12d,%edx + xorl %edi,%esi + addl -24(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + vpalignr $8,%ymm1,%ymm2,%ymm8 + vpxor %ymm7,%ymm3,%ymm3 + addl -20(%r13),%ebx + andnl %eax,%ecx,%edi + vpxor %ymm4,%ymm3,%ymm3 + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + vpxor %ymm8,%ymm3,%ymm3 + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + vpsrld $30,%ymm3,%ymm8 + vpslld $2,%ymm3,%ymm3 + addl 0(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + vpor %ymm8,%ymm3,%ymm3 + addl %r12d,%ebp + xorl %edi,%ebx + addl 4(%r13),%eax + andnl %edx,%ebp,%edi + vpaddd %ymm11,%ymm3,%ymm9 + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + vmovdqu %ymm9,352(%rsp) + addl %r12d,%eax + xorl %edi,%ebp + addl 8(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl 12(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + vpalignr $8,%ymm2,%ymm3,%ymm8 + vpxor %ymm0,%ymm4,%ymm4 + addl 32(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + vpxor %ymm5,%ymm4,%ymm4 + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + vpxor %ymm8,%ymm4,%ymm4 + addl %r12d,%ecx + xorl %ebp,%edx + addl 36(%r13),%ebx + vpsrld $30,%ymm4,%ymm8 + vpslld $2,%ymm4,%ymm4 + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + vpor %ymm8,%ymm4,%ymm4 + addl 40(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + vpaddd %ymm11,%ymm4,%ymm9 + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl 44(%r13),%eax + vmovdqu %ymm9,384(%rsp) + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl 64(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + vpalignr $8,%ymm3,%ymm4,%ymm8 + vpxor %ymm1,%ymm5,%ymm5 + addl 68(%r13),%edx + leal (%rdx,%rax,1),%edx + vpxor %ymm6,%ymm5,%ymm5 + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + vpxor %ymm8,%ymm5,%ymm5 + addl %r12d,%edx + xorl %ebx,%esi + addl 72(%r13),%ecx + vpsrld $30,%ymm5,%ymm8 + vpslld $2,%ymm5,%ymm5 + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + vpor %ymm8,%ymm5,%ymm5 + addl 76(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + vpaddd %ymm11,%ymm5,%ymm9 + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl 96(%r13),%ebp + vmovdqu %ymm9,416(%rsp) + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl 100(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + vpalignr $8,%ymm4,%ymm5,%ymm8 + vpxor %ymm2,%ymm6,%ymm6 + addl 104(%r13),%esi + leal (%rsi,%rbp,1),%esi + vpxor %ymm7,%ymm6,%ymm6 + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + vpxor %ymm8,%ymm6,%ymm6 + addl %r12d,%esi + xorl %ecx,%eax + addl 108(%r13),%edx + leaq 256(%r13),%r13 + vpsrld $30,%ymm6,%ymm8 + vpslld $2,%ymm6,%ymm6 + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + vpor %ymm8,%ymm6,%ymm6 + addl -128(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + vpaddd %ymm11,%ymm6,%ymm9 + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -124(%r13),%ebx + vmovdqu %ymm9,448(%rsp) + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -120(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + vpalignr $8,%ymm5,%ymm6,%ymm8 + vpxor %ymm3,%ymm7,%ymm7 + addl -116(%r13),%eax + leal (%rax,%rbx,1),%eax + vpxor %ymm0,%ymm7,%ymm7 + vmovdqu 32(%r14),%ymm11 + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + vpxor %ymm8,%ymm7,%ymm7 + addl %r12d,%eax + xorl %edx,%ebp + addl -96(%r13),%esi + vpsrld $30,%ymm7,%ymm8 + vpslld $2,%ymm7,%ymm7 + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + vpor %ymm8,%ymm7,%ymm7 + addl -92(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + vpaddd %ymm11,%ymm7,%ymm9 + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -88(%r13),%ecx + vmovdqu %ymm9,480(%rsp) + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -84(%r13),%ebx + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + jmp .Lalign32_2 +.p2align 5 +.Lalign32_2: + vpalignr $8,%ymm6,%ymm7,%ymm8 + vpxor %ymm4,%ymm0,%ymm0 + addl -64(%r13),%ebp + xorl %esi,%ecx + vpxor %ymm1,%ymm0,%ymm0 + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + vpxor %ymm8,%ymm0,%ymm0 + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + vpsrld $30,%ymm0,%ymm8 + vpslld $2,%ymm0,%ymm0 + addl %r12d,%ebp + andl %edi,%ebx + addl -60(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + vpor %ymm8,%ymm0,%ymm0 + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + vpaddd %ymm11,%ymm0,%ymm9 + addl %r12d,%eax + andl %edi,%ebp + addl -56(%r13),%esi + xorl %ecx,%ebp + vmovdqu %ymm9,512(%rsp) + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + addl -52(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + andl %edi,%esi + addl -32(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + andl %edi,%edx + vpalignr $8,%ymm7,%ymm0,%ymm8 + vpxor %ymm5,%ymm1,%ymm1 + addl -28(%r13),%ebx + xorl %eax,%edx + vpxor %ymm2,%ymm1,%ymm1 + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + vpxor %ymm8,%ymm1,%ymm1 + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + vpsrld $30,%ymm1,%ymm8 + vpslld $2,%ymm1,%ymm1 + addl %r12d,%ebx + andl %edi,%ecx + addl -24(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + vpor %ymm8,%ymm1,%ymm1 + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + vpaddd %ymm11,%ymm1,%ymm9 + addl %r12d,%ebp + andl %edi,%ebx + addl -20(%r13),%eax + xorl %edx,%ebx + vmovdqu %ymm9,544(%rsp) + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl 0(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + addl 4(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + andl %edi,%esi + vpalignr $8,%ymm0,%ymm1,%ymm8 + vpxor %ymm6,%ymm2,%ymm2 + addl 8(%r13),%ecx + xorl %ebp,%esi + vpxor %ymm3,%ymm2,%ymm2 + movl %eax,%edi + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + vpxor %ymm8,%ymm2,%ymm2 + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + vpsrld $30,%ymm2,%ymm8 + vpslld $2,%ymm2,%ymm2 + addl %r12d,%ecx + andl %edi,%edx + addl 12(%r13),%ebx + xorl %eax,%edx + movl %esi,%edi + xorl %eax,%edi + vpor %ymm8,%ymm2,%ymm2 + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + vpaddd %ymm11,%ymm2,%ymm9 + addl %r12d,%ebx + andl %edi,%ecx + addl 32(%r13),%ebp + xorl %esi,%ecx + vmovdqu %ymm9,576(%rsp) + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl 36(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl 40(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + vpalignr $8,%ymm1,%ymm2,%ymm8 + vpxor %ymm7,%ymm3,%ymm3 + addl 44(%r13),%edx + xorl %ebx,%eax + vpxor %ymm4,%ymm3,%ymm3 + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + vpxor %ymm8,%ymm3,%ymm3 + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + vpsrld $30,%ymm3,%ymm8 + vpslld $2,%ymm3,%ymm3 + addl %r12d,%edx + andl %edi,%esi + addl 64(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + xorl %ebp,%edi + vpor %ymm8,%ymm3,%ymm3 + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + vpaddd %ymm11,%ymm3,%ymm9 + addl %r12d,%ecx + andl %edi,%edx + addl 68(%r13),%ebx + xorl %eax,%edx + vmovdqu %ymm9,608(%rsp) + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + addl 72(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl 76(%r13),%eax + xorl %edx,%ebx + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl 96(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl 100(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl 104(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl 108(%r13),%ebx + leaq 256(%r13),%r13 + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -128(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl -124(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -120(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -116(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -96(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -92(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -88(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl -84(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -64(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -60(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -56(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -52(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -32(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl -28(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -24(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -20(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + addl %r12d,%edx + leaq 128(%r9),%r13 + leaq 128(%r9),%rdi + cmpq %r10,%r13 + cmovaeq %r9,%r13 + + + addl 0(%r8),%edx + addl 4(%r8),%esi + addl 8(%r8),%ebp + movl %edx,0(%r8) + addl 12(%r8),%ebx + movl %esi,4(%r8) + movl %edx,%eax + addl 16(%r8),%ecx + movl %ebp,%r12d + movl %ebp,8(%r8) + movl %ebx,%edx + + movl %ebx,12(%r8) + movl %esi,%ebp + movl %ecx,16(%r8) + + movl %ecx,%esi + movl %r12d,%ecx + + + cmpq %r10,%r9 + je .Ldone_avx2 + vmovdqu 64(%r14),%ymm6 + cmpq %r10,%rdi + ja .Last_avx2 + + vmovdqu -64(%rdi),%xmm0 + vmovdqu -48(%rdi),%xmm1 + vmovdqu -32(%rdi),%xmm2 + vmovdqu -16(%rdi),%xmm3 + vinserti128 $1,0(%r13),%ymm0,%ymm0 + vinserti128 $1,16(%r13),%ymm1,%ymm1 + vinserti128 $1,32(%r13),%ymm2,%ymm2 + vinserti128 $1,48(%r13),%ymm3,%ymm3 + jmp .Last_avx2 + +.p2align 5 +.Last_avx2: + leaq 128+16(%rsp),%r13 + rorxl $2,%ebp,%ebx + andnl %edx,%ebp,%edi + andl %ecx,%ebp + xorl %edi,%ebp + subq $-128,%r9 + addl -128(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl -124(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + addl %r12d,%edx + xorl %edi,%esi + addl -120(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + addl -116(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + addl -96(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + addl -92(%r13),%eax + andnl %edx,%ebp,%edi + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + addl %r12d,%eax + xorl %edi,%ebp + addl -88(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl -84(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + addl %r12d,%edx + xorl %edi,%esi + addl -64(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + addl -60(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + addl -56(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + addl -52(%r13),%eax + andnl %edx,%ebp,%edi + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + addl %r12d,%eax + xorl %edi,%ebp + addl -32(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl -28(%r13),%edx + andnl %ebx,%esi,%edi + addl %eax,%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + andl %ebp,%esi + addl %r12d,%edx + xorl %edi,%esi + addl -24(%r13),%ecx + andnl %ebp,%edx,%edi + addl %esi,%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + andl %eax,%edx + addl %r12d,%ecx + xorl %edi,%edx + addl -20(%r13),%ebx + andnl %eax,%ecx,%edi + addl %edx,%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + andl %esi,%ecx + addl %r12d,%ebx + xorl %edi,%ecx + addl 0(%r13),%ebp + andnl %esi,%ebx,%edi + addl %ecx,%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + andl %edx,%ebx + addl %r12d,%ebp + xorl %edi,%ebx + addl 4(%r13),%eax + andnl %edx,%ebp,%edi + addl %ebx,%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + andl %ecx,%ebp + addl %r12d,%eax + xorl %edi,%ebp + addl 8(%r13),%esi + andnl %ecx,%eax,%edi + addl %ebp,%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + andl %ebx,%eax + addl %r12d,%esi + xorl %edi,%eax + addl 12(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl 32(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl 36(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl 40(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl 44(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl 64(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + vmovdqu -64(%r14),%ymm11 + vpshufb %ymm6,%ymm0,%ymm0 + addl 68(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl 72(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl 76(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl 96(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl 100(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + vpshufb %ymm6,%ymm1,%ymm1 + vpaddd %ymm11,%ymm0,%ymm8 + addl 104(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl 108(%r13),%edx + leaq 256(%r13),%r13 + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -128(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -124(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -120(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + vmovdqu %ymm8,0(%rsp) + vpshufb %ymm6,%ymm2,%ymm2 + vpaddd %ymm11,%ymm1,%ymm9 + addl -116(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -96(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -92(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + addl -88(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -84(%r13),%ebx + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + vmovdqu %ymm9,32(%rsp) + vpshufb %ymm6,%ymm3,%ymm3 + vpaddd %ymm11,%ymm2,%ymm6 + addl -64(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl -60(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl -56(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + addl -52(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + andl %edi,%esi + addl -32(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + andl %edi,%edx + jmp .Lalign32_3 +.p2align 5 +.Lalign32_3: + vmovdqu %ymm6,64(%rsp) + vpaddd %ymm11,%ymm3,%ymm7 + addl -28(%r13),%ebx + xorl %eax,%edx + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + addl -24(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl -20(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl 0(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + addl 4(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + andl %edi,%esi + vmovdqu %ymm7,96(%rsp) + addl 8(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + andl %edi,%edx + addl 12(%r13),%ebx + xorl %eax,%edx + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + andl %edi,%ecx + addl 32(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl 36(%r13),%eax + xorl %edx,%ebx + movl %ecx,%edi + xorl %edx,%edi + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + andl %edi,%ebp + addl 40(%r13),%esi + xorl %ecx,%ebp + movl %ebx,%edi + xorl %ecx,%edi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + andl %edi,%eax + vpalignr $8,%ymm0,%ymm1,%ymm4 + addl 44(%r13),%edx + xorl %ebx,%eax + movl %ebp,%edi + xorl %ebx,%edi + vpsrldq $4,%ymm3,%ymm8 + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + vpxor %ymm0,%ymm4,%ymm4 + vpxor %ymm2,%ymm8,%ymm8 + xorl %ebp,%esi + addl %r12d,%edx + vpxor %ymm8,%ymm4,%ymm4 + andl %edi,%esi + addl 64(%r13),%ecx + xorl %ebp,%esi + movl %eax,%edi + vpsrld $31,%ymm4,%ymm8 + xorl %ebp,%edi + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + vpslldq $12,%ymm4,%ymm10 + vpaddd %ymm4,%ymm4,%ymm4 + rorxl $2,%edx,%esi + xorl %eax,%edx + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm4,%ymm4 + addl %r12d,%ecx + andl %edi,%edx + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm4,%ymm4 + addl 68(%r13),%ebx + xorl %eax,%edx + vpxor %ymm10,%ymm4,%ymm4 + movl %esi,%edi + xorl %eax,%edi + leal (%rbx,%rdx,1),%ebx + vpaddd %ymm11,%ymm4,%ymm9 + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + vmovdqu %ymm9,128(%rsp) + addl %r12d,%ebx + andl %edi,%ecx + addl 72(%r13),%ebp + xorl %esi,%ecx + movl %edx,%edi + xorl %esi,%edi + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + andl %edi,%ebx + addl 76(%r13),%eax + xorl %edx,%ebx + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + vpalignr $8,%ymm1,%ymm2,%ymm5 + addl 96(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + vpsrldq $4,%ymm4,%ymm8 + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + vpxor %ymm1,%ymm5,%ymm5 + vpxor %ymm3,%ymm8,%ymm8 + addl 100(%r13),%edx + leal (%rdx,%rax,1),%edx + vpxor %ymm8,%ymm5,%ymm5 + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + xorl %ebp,%esi + addl %r12d,%edx + vpsrld $31,%ymm5,%ymm8 + vmovdqu -32(%r14),%ymm11 + xorl %ebx,%esi + addl 104(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + vpslldq $12,%ymm5,%ymm10 + vpaddd %ymm5,%ymm5,%ymm5 + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm5,%ymm5 + xorl %eax,%edx + addl %r12d,%ecx + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm5,%ymm5 + xorl %ebp,%edx + addl 108(%r13),%ebx + leaq 256(%r13),%r13 + vpxor %ymm10,%ymm5,%ymm5 + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + vpaddd %ymm11,%ymm5,%ymm9 + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + vmovdqu %ymm9,160(%rsp) + addl -128(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + vpalignr $8,%ymm2,%ymm3,%ymm6 + addl -124(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + vpsrldq $4,%ymm5,%ymm8 + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + vpxor %ymm2,%ymm6,%ymm6 + vpxor %ymm4,%ymm8,%ymm8 + addl -120(%r13),%esi + leal (%rsi,%rbp,1),%esi + vpxor %ymm8,%ymm6,%ymm6 + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + vpsrld $31,%ymm6,%ymm8 + xorl %ecx,%eax + addl -116(%r13),%edx + leal (%rdx,%rax,1),%edx + vpslldq $12,%ymm6,%ymm10 + vpaddd %ymm6,%ymm6,%ymm6 + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm6,%ymm6 + xorl %ebp,%esi + addl %r12d,%edx + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm6,%ymm6 + xorl %ebx,%esi + addl -96(%r13),%ecx + vpxor %ymm10,%ymm6,%ymm6 + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + vpaddd %ymm11,%ymm6,%ymm9 + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + vmovdqu %ymm9,192(%rsp) + addl -92(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + vpalignr $8,%ymm3,%ymm4,%ymm7 + addl -88(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + vpsrldq $4,%ymm6,%ymm8 + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + vpxor %ymm3,%ymm7,%ymm7 + vpxor %ymm5,%ymm8,%ymm8 + addl -84(%r13),%eax + leal (%rax,%rbx,1),%eax + vpxor %ymm8,%ymm7,%ymm7 + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + vpsrld $31,%ymm7,%ymm8 + xorl %edx,%ebp + addl -64(%r13),%esi + leal (%rsi,%rbp,1),%esi + vpslldq $12,%ymm7,%ymm10 + vpaddd %ymm7,%ymm7,%ymm7 + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + vpsrld $30,%ymm10,%ymm9 + vpor %ymm8,%ymm7,%ymm7 + xorl %ebx,%eax + addl %r12d,%esi + vpslld $2,%ymm10,%ymm10 + vpxor %ymm9,%ymm7,%ymm7 + xorl %ecx,%eax + addl -60(%r13),%edx + vpxor %ymm10,%ymm7,%ymm7 + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + rorxl $2,%esi,%eax + vpaddd %ymm11,%ymm7,%ymm9 + xorl %ebp,%esi + addl %r12d,%edx + xorl %ebx,%esi + vmovdqu %ymm9,224(%rsp) + addl -56(%r13),%ecx + leal (%rcx,%rsi,1),%ecx + rorxl $27,%edx,%r12d + rorxl $2,%edx,%esi + xorl %eax,%edx + addl %r12d,%ecx + xorl %ebp,%edx + addl -52(%r13),%ebx + leal (%rbx,%rdx,1),%ebx + rorxl $27,%ecx,%r12d + rorxl $2,%ecx,%edx + xorl %esi,%ecx + addl %r12d,%ebx + xorl %eax,%ecx + addl -32(%r13),%ebp + leal (%rcx,%rbp,1),%ebp + rorxl $27,%ebx,%r12d + rorxl $2,%ebx,%ecx + xorl %edx,%ebx + addl %r12d,%ebp + xorl %esi,%ebx + addl -28(%r13),%eax + leal (%rax,%rbx,1),%eax + rorxl $27,%ebp,%r12d + rorxl $2,%ebp,%ebx + xorl %ecx,%ebp + addl %r12d,%eax + xorl %edx,%ebp + addl -24(%r13),%esi + leal (%rsi,%rbp,1),%esi + rorxl $27,%eax,%r12d + rorxl $2,%eax,%ebp + xorl %ebx,%eax + addl %r12d,%esi + xorl %ecx,%eax + addl -20(%r13),%edx + leal (%rdx,%rax,1),%edx + rorxl $27,%esi,%r12d + addl %r12d,%edx + leaq 128(%rsp),%r13 + + + addl 0(%r8),%edx + addl 4(%r8),%esi + addl 8(%r8),%ebp + movl %edx,0(%r8) + addl 12(%r8),%ebx + movl %esi,4(%r8) + movl %edx,%eax + addl 16(%r8),%ecx + movl %ebp,%r12d + movl %ebp,8(%r8) + movl %ebx,%edx + + movl %ebx,12(%r8) + movl %esi,%ebp + movl %ecx,16(%r8) + + movl %ecx,%esi + movl %r12d,%ecx + + + cmpq %r10,%r9 + jbe .Loop_avx2 + +.Ldone_avx2: + vzeroupper + movaps -40-96(%r11),%xmm6 + movaps -40-80(%r11),%xmm7 + movaps -40-64(%r11),%xmm8 + movaps -40-48(%r11),%xmm9 + movaps -40-32(%r11),%xmm10 + movaps -40-16(%r11),%xmm11 + movq -40(%r11),%r14 + + movq -32(%r11),%r13 + + movq -24(%r11),%r12 + + movq -16(%r11),%rbp + + movq -8(%r11),%rbx + + leaq (%r11),%rsp + +.Lepilogue_avx2: + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + .byte 0xf3,0xc3 + +.LSEH_end_sha1_block_data_order_avx2: +.p2align 6 +K_XX_XX: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 +.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 6 + +.def se_handler; .scl 3; .type 32; .endef +.p2align 4 +se_handler: + pushq %rsi + pushq %rdi + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushfq + subq $64,%rsp + + movq 120(%r8),%rax + movq 248(%r8),%rbx + + leaq .Lprologue(%rip),%r10 + cmpq %r10,%rbx + jb .Lcommon_seh_tail + + movq 152(%r8),%rax + + leaq .Lepilogue(%rip),%r10 + cmpq %r10,%rbx + jae .Lcommon_seh_tail + + movq 64(%rax),%rax + + movq -8(%rax),%rbx + movq -16(%rax),%rbp + movq -24(%rax),%r12 + movq -32(%rax),%r13 + movq -40(%rax),%r14 + movq %rbx,144(%r8) + movq %rbp,160(%r8) + movq %r12,216(%r8) + movq %r13,224(%r8) + movq %r14,232(%r8) + + jmp .Lcommon_seh_tail + +.def shaext_handler; .scl 3; .type 32; .endef +.p2align 4 +shaext_handler: + pushq %rsi + pushq %rdi + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushfq + subq $64,%rsp + + movq 120(%r8),%rax + movq 248(%r8),%rbx + + leaq .Lprologue_shaext(%rip),%r10 + cmpq %r10,%rbx + jb .Lcommon_seh_tail + + leaq .Lepilogue_shaext(%rip),%r10 + cmpq %r10,%rbx + jae .Lcommon_seh_tail + + leaq -8-64(%rax),%rsi + leaq 512(%r8),%rdi + movl $8,%ecx +.long 0xa548f3fc + + jmp .Lcommon_seh_tail + +.def ssse3_handler; .scl 3; .type 32; .endef +.p2align 4 +ssse3_handler: + pushq %rsi + pushq %rdi + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushfq + subq $64,%rsp + + movq 120(%r8),%rax + movq 248(%r8),%rbx + + movq 8(%r9),%rsi + movq 56(%r9),%r11 + + movl 0(%r11),%r10d + leaq (%rsi,%r10,1),%r10 + cmpq %r10,%rbx + jb .Lcommon_seh_tail + + movq 208(%r8),%rax + + movl 4(%r11),%r10d + leaq (%rsi,%r10,1),%r10 + cmpq %r10,%rbx + jae .Lcommon_seh_tail + + leaq -40-96(%rax),%rsi + leaq 512(%r8),%rdi + movl $12,%ecx +.long 0xa548f3fc + + movq -8(%rax),%rbx + movq -16(%rax),%rbp + movq -24(%rax),%r12 + movq -32(%rax),%r13 + movq -40(%rax),%r14 + movq %rbx,144(%r8) + movq %rbp,160(%r8) + movq %r12,216(%r8) + movq %r13,224(%r8) + movq %r14,232(%r8) + +.Lcommon_seh_tail: + movq 8(%rax),%rdi + movq 16(%rax),%rsi + movq %rax,152(%r8) + movq %rsi,168(%r8) + movq %rdi,176(%r8) + + movq 40(%r9),%rdi + movq %r8,%rsi + movl $154,%ecx +.long 0xa548f3fc + + movq %r9,%rsi + xorq %rcx,%rcx + movq 8(%rsi),%rdx + movq 0(%rsi),%r8 + movq 16(%rsi),%r9 + movq 40(%rsi),%r10 + leaq 56(%rsi),%r11 + leaq 24(%rsi),%r12 + movq %r10,32(%rsp) + movq %r11,40(%rsp) + movq %r12,48(%rsp) + movq %rcx,56(%rsp) + call *__imp_RtlVirtualUnwind(%rip) + + movl $1,%eax + addq $64,%rsp + popfq + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + popq %rdi + popq %rsi + .byte 0xf3,0xc3 + + +.section .pdata +.p2align 2 +.rva .LSEH_begin_sha1_block_data_order +.rva .LSEH_end_sha1_block_data_order +.rva .LSEH_info_sha1_block_data_order +.rva .LSEH_begin_sha1_block_data_order_shaext +.rva .LSEH_end_sha1_block_data_order_shaext +.rva .LSEH_info_sha1_block_data_order_shaext +.rva .LSEH_begin_sha1_block_data_order_ssse3 +.rva .LSEH_end_sha1_block_data_order_ssse3 +.rva .LSEH_info_sha1_block_data_order_ssse3 +.rva .LSEH_begin_sha1_block_data_order_avx +.rva .LSEH_end_sha1_block_data_order_avx +.rva .LSEH_info_sha1_block_data_order_avx +.rva .LSEH_begin_sha1_block_data_order_avx2 +.rva .LSEH_end_sha1_block_data_order_avx2 +.rva .LSEH_info_sha1_block_data_order_avx2 +.section .xdata +.p2align 3 +.LSEH_info_sha1_block_data_order: +.byte 9,0,0,0 +.rva se_handler +.LSEH_info_sha1_block_data_order_shaext: +.byte 9,0,0,0 +.rva shaext_handler +.LSEH_info_sha1_block_data_order_ssse3: +.byte 9,0,0,0 +.rva ssse3_handler +.rva .Lprologue_ssse3,.Lepilogue_ssse3 +.LSEH_info_sha1_block_data_order_avx: +.byte 9,0,0,0 +.rva ssse3_handler +.rva .Lprologue_avx,.Lepilogue_avx +.LSEH_info_sha1_block_data_order_avx2: +.byte 9,0,0,0 +.rva ssse3_handler +.rva .Lprologue_avx2,.Lepilogue_avx2 + |