summaryrefslogtreecommitdiff
path: root/lib/accelerated
diff options
context:
space:
mode:
authorNikos Mavrogiannopoulos <nmav@gnutls.org>2013-12-14 11:57:02 +0100
committerNikos Mavrogiannopoulos <nmav@gnutls.org>2013-12-14 12:00:04 +0100
commitcbb9b17ff9f9861f6b6db466186f2fcb766955a2 (patch)
tree17fd7ed2a1e331aa4dde0a530dbc939365d33308 /lib/accelerated
parentc1416e9865a498fa102987310f30c00dfecf524e (diff)
downloadgnutls-cbb9b17ff9f9861f6b6db466186f2fcb766955a2.tar.gz
Added Appro's SSSE3 SHA implementations
Diffstat (limited to 'lib/accelerated')
-rw-r--r--lib/accelerated/x86/Makefile.am19
-rw-r--r--lib/accelerated/x86/aes-padlock.h3
-rw-r--r--lib/accelerated/x86/aes-x86.c101
-rw-r--r--lib/accelerated/x86/coff/aesni-x86.s (renamed from lib/accelerated/x86/coff/appro-aes-x86-coff.s)2
-rw-r--r--lib/accelerated/x86/coff/aesni-x86_64.s (renamed from lib/accelerated/x86/coff/appro-aes-x86-64-coff.s)2
-rw-r--r--lib/accelerated/x86/coff/cpuid-x86.s (renamed from lib/accelerated/x86/coff/cpuid-x86-coff.s)2
-rw-r--r--lib/accelerated/x86/coff/cpuid-x86_64.s (renamed from lib/accelerated/x86/coff/cpuid-x86-64-coff.s)2
-rw-r--r--lib/accelerated/x86/coff/e_padlock-x86.s (renamed from lib/accelerated/x86/coff/padlock-x86-coff.s)2
-rw-r--r--lib/accelerated/x86/coff/e_padlock-x86_64.s (renamed from lib/accelerated/x86/coff/padlock-x86-64-coff.s)2
-rw-r--r--lib/accelerated/x86/coff/ghash-x86_64.s (renamed from lib/accelerated/x86/coff/appro-aes-gcm-x86-64-coff.s)2
-rw-r--r--lib/accelerated/x86/coff/openssl-cpuid-x86.s396
-rw-r--r--lib/accelerated/x86/coff/openssl-cpuid-x86_64.s361
-rw-r--r--lib/accelerated/x86/coff/sha1-ssse3-x86.s1420
-rw-r--r--lib/accelerated/x86/coff/sha1-ssse3-x86_64.s2693
-rw-r--r--lib/accelerated/x86/coff/sha256-avx-x86_64.s2645
-rw-r--r--lib/accelerated/x86/coff/sha256-ssse3-x86.s3402
-rw-r--r--lib/accelerated/x86/coff/sha512-ssse3-x86.s605
-rw-r--r--lib/accelerated/x86/coff/sha512-ssse3-x86_64.s3025
-rw-r--r--lib/accelerated/x86/elf/aesni-x86.s (renamed from lib/accelerated/x86/elf/appro-aes-x86.s)0
-rw-r--r--lib/accelerated/x86/elf/aesni-x86_64.s (renamed from lib/accelerated/x86/elf/appro-aes-x86-64.s)0
-rw-r--r--lib/accelerated/x86/elf/cpuid-x86_64.s (renamed from lib/accelerated/x86/elf/cpuid-x86-64.s)0
-rw-r--r--lib/accelerated/x86/elf/e_padlock-x86.s (renamed from lib/accelerated/x86/elf/padlock-x86.s)0
-rw-r--r--lib/accelerated/x86/elf/e_padlock-x86_64.s (renamed from lib/accelerated/x86/elf/padlock-x86-64.s)0
-rw-r--r--lib/accelerated/x86/elf/ghash-x86_64.s (renamed from lib/accelerated/x86/elf/appro-aes-gcm-x86-64.s)0
-rw-r--r--lib/accelerated/x86/elf/sha1-ssse3-x86.s1421
-rw-r--r--lib/accelerated/x86/elf/sha1-ssse3-x86_64.s2515
-rw-r--r--lib/accelerated/x86/elf/sha256-avx-x86_64.s2614
-rw-r--r--lib/accelerated/x86/elf/sha256-ssse3-x86.s3403
-rw-r--r--lib/accelerated/x86/elf/sha512-ssse3-x86.s606
-rw-r--r--lib/accelerated/x86/elf/sha512-ssse3-x86_64.s2881
-rw-r--r--lib/accelerated/x86/files.mk6
-rw-r--r--lib/accelerated/x86/hmac-x86.c300
-rw-r--r--lib/accelerated/x86/macosx/aesni-x86.s (renamed from lib/accelerated/x86/macosx/appro-aes-x86-macosx.s)4
-rw-r--r--lib/accelerated/x86/macosx/aesni-x86_64.s (renamed from lib/accelerated/x86/macosx/appro-aes-x86-64-macosx.s)4
-rw-r--r--lib/accelerated/x86/macosx/cpuid-x86-64-macosx.s75
-rw-r--r--lib/accelerated/x86/macosx/cpuid-x86-macosx.s87
-rw-r--r--lib/accelerated/x86/macosx/cpuid-x86.s70
-rw-r--r--lib/accelerated/x86/macosx/cpuid-x86_64.s58
-rw-r--r--lib/accelerated/x86/macosx/e_padlock-x86.s (renamed from lib/accelerated/x86/macosx/padlock-x86-macosx.s)4
-rw-r--r--lib/accelerated/x86/macosx/e_padlock-x86_64.s (renamed from lib/accelerated/x86/macosx/padlock-x86-64-macosx.s)4
-rw-r--r--lib/accelerated/x86/macosx/ghash-x86_64.s (renamed from lib/accelerated/x86/macosx/appro-aes-gcm-x86-64-macosx.s)4
-rw-r--r--lib/accelerated/x86/macosx/openssl-cpuid-x86.s399
-rw-r--r--lib/accelerated/x86/macosx/openssl-cpuid-x86_64.s365
-rw-r--r--lib/accelerated/x86/macosx/sha1-ssse3-x86.s1419
-rw-r--r--lib/accelerated/x86/macosx/sha1-ssse3-x86_64.s2515
-rw-r--r--lib/accelerated/x86/macosx/sha256-avx-x86_64.s2614
-rw-r--r--lib/accelerated/x86/macosx/sha256-ssse3-x86.s3405
-rw-r--r--lib/accelerated/x86/macosx/sha512-ssse3-x86.s604
-rw-r--r--lib/accelerated/x86/macosx/sha512-ssse3-x86_64.s2881
-rw-r--r--lib/accelerated/x86/sha-padlock.h3
-rw-r--r--lib/accelerated/x86/sha-x86.c365
-rw-r--r--lib/accelerated/x86/sha-x86.h22
52 files changed, 43142 insertions, 190 deletions
diff --git a/lib/accelerated/x86/Makefile.am b/lib/accelerated/x86/Makefile.am
index 8edcbbb5f0..e231103dad 100644
--- a/lib/accelerated/x86/Makefile.am
+++ b/lib/accelerated/x86/Makefile.am
@@ -31,41 +31,44 @@ if ENABLE_MINITASN1
AM_CPPFLAGS += -I$(srcdir)/../../minitasn1
endif
-EXTRA_DIST = README license.txt
+EXTRA_DIST = README license.txt files.mk
noinst_LTLIBRARIES = libx86.la
-libx86_la_SOURCES = sha-padlock.c hmac-padlock.c aes-x86.c aes-padlock.c aes-gcm-padlock.c aes-padlock.h aes-x86.h x86.h sha-padlock.h
+libx86_la_SOURCES = sha-padlock.c hmac-padlock.c aes-x86.c aes-padlock.c aes-gcm-padlock.c \
+ aes-padlock.h aes-x86.h x86.h sha-padlock.h sha-x86.c sha-x86.h hmac-x86.c
+
+include files.mk
if ASM_X86_64
AM_CFLAGS += -DASM_X86_64 -DASM_X86
libx86_la_SOURCES += aes-gcm-x86.c
if WINDOWS
-libx86_la_SOURCES += coff/appro-aes-x86-64-coff.s coff/padlock-x86-64-coff.s coff/cpuid-x86-64-coff.s coff/appro-aes-gcm-x86-64-coff.s
+libx86_la_SOURCES += $(X86_64_FILES_COFF)
endif
if MACOSX
-libx86_la_SOURCES += macosx/appro-aes-x86-64-macosx.s macosx/padlock-x86-64-macosx.s macosx/cpuid-x86-64-macosx.s macosx/appro-aes-gcm-x86-64-macosx.s
+libx86_la_SOURCES += $(X86_64_FILES_MACOSX)
endif
if ELF
-libx86_la_SOURCES += elf/appro-aes-x86-64.s elf/appro-aes-gcm-x86-64.s elf/padlock-x86-64.s elf/cpuid-x86-64.s
+libx86_la_SOURCES += $(X86_64_FILES_ELF)
endif
else #ASM_X86_64
AM_CFLAGS += -DASM_X86_32 -DASM_X86
if WINDOWS
-libx86_la_SOURCES += coff/appro-aes-x86-coff.s coff/padlock-x86-coff.s coff/cpuid-x86-coff.s
+libx86_la_SOURCES += $(X86_FILES_ELF)
endif
if MACOSX
-libx86_la_SOURCES += macosx/appro-aes-x86-macosx.s macosx/padlock-x86-macosx.s macosx/cpuid-x86-macosx.s
+libx86_la_SOURCES += $(X86_FILES_MACOSX)
endif
if ELF
-libx86_la_SOURCES += elf/appro-aes-x86.s elf/padlock-x86.s elf/cpuid-x86.s
+libx86_la_SOURCES += $(X86_FILES_ELF)
endif
endif #ASM_X86_64
diff --git a/lib/accelerated/x86/aes-padlock.h b/lib/accelerated/x86/aes-padlock.h
index cd5d437c8f..851b40b6f9 100644
--- a/lib/accelerated/x86/aes-padlock.h
+++ b/lib/accelerated/x86/aes-padlock.h
@@ -31,9 +31,6 @@ extern const gnutls_crypto_cipher_st aes_gcm_padlock_struct;
extern const gnutls_crypto_mac_st hmac_sha_padlock_struct;
extern const gnutls_crypto_digest_st sha_padlock_struct;
-extern const gnutls_crypto_mac_st hmac_sha_padlock_nano_struct;
-extern const gnutls_crypto_digest_st sha_padlock_nano_struct;
-
int padlock_aes_cipher_setkey(void *_ctx, const void *userkey,
size_t keysize);
diff --git a/lib/accelerated/x86/aes-x86.c b/lib/accelerated/x86/aes-x86.c
index 59e2b13280..c50836a168 100644
--- a/lib/accelerated/x86/aes-x86.c
+++ b/lib/accelerated/x86/aes-x86.c
@@ -30,6 +30,7 @@
#include <gnutls/crypto.h>
#include <gnutls_errors.h>
#include <aes-x86.h>
+#include <sha-x86.h>
#include <x86.h>
struct aes_ctx {
@@ -38,6 +39,8 @@ struct aes_ctx {
int enc;
};
+unsigned int _gnutls_x86_cpuid_s[4];
+
static int
aes_cipher_init(gnutls_cipher_algorithm_t algorithm, void **_ctx, int enc)
{
@@ -126,19 +129,18 @@ static const gnutls_crypto_cipher_st cipher_struct = {
static unsigned check_optimized_aes(void)
{
- unsigned int a, b, c, d;
- gnutls_cpuid(1, &a, &b, &c, &d);
+ return (_gnutls_x86_cpuid_s[2] & 0x2000000);
+}
- return (c & 0x2000000);
+static unsigned check_ssse3(void)
+{
+ return (_gnutls_x86_cpuid_s[2] & 0x0000200);
}
#ifdef ASM_X86_64
static unsigned check_pclmul(void)
{
- unsigned int a, b, c, d;
- gnutls_cpuid(1, &a, &b, &c, &d);
-
- return (c & 0x2);
+ return (_gnutls_x86_cpuid_s[2] & 0x2);
}
#endif
@@ -165,6 +167,88 @@ void register_x86_crypto(void)
if (check_intel_or_amd() == 0)
return;
+ gnutls_cpuid(1, &_gnutls_x86_cpuid_s[0], &_gnutls_x86_cpuid_s[1],
+ &_gnutls_x86_cpuid_s[2], &_gnutls_x86_cpuid_s[3]);
+
+ if (check_ssse3()) {
+ _gnutls_debug_log("Intel SSSE3 was detected\n");
+
+ ret =
+ gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA1,
+ 80,
+ &sha_x86_struct);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA224,
+ 80,
+ &sha_x86_struct);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA256,
+ 80,
+ &sha_x86_struct);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+
+ ret =
+ gnutls_crypto_single_mac_register(GNUTLS_DIG_SHA1,
+ 80,
+ &hmac_sha_x86_struct);
+ if (ret < 0)
+ gnutls_assert();
+
+ ret =
+ gnutls_crypto_single_mac_register(GNUTLS_DIG_SHA224,
+ 80,
+ &hmac_sha_x86_struct);
+ if (ret < 0)
+ gnutls_assert();
+
+ ret =
+ gnutls_crypto_single_mac_register(GNUTLS_DIG_SHA256,
+ 80,
+ &hmac_sha_x86_struct);
+ if (ret < 0)
+ gnutls_assert();
+
+#ifdef ENABLE_SHA512
+ ret =
+ gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA384,
+ 80,
+ &sha_x86_struct);
+ if (ret < 0)
+ gnutls_assert();
+
+ ret =
+ gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA512,
+ 80,
+ &sha_x86_struct);
+ if (ret < 0)
+ gnutls_assert();
+ ret =
+ gnutls_crypto_single_mac_register(GNUTLS_DIG_SHA384,
+ 80,
+ &hmac_sha_x86_struct);
+ if (ret < 0)
+ gnutls_assert();
+
+ ret =
+ gnutls_crypto_single_mac_register(GNUTLS_DIG_SHA512,
+ 80,
+ &hmac_sha_x86_struct);
+ if (ret < 0)
+ gnutls_assert();
+#endif
+ }
+
if (check_optimized_aes()) {
_gnutls_debug_log("Intel AES accelerator was detected\n");
ret =
@@ -211,5 +295,8 @@ void register_x86_crypto(void)
#endif
}
+ /* convert _gnutls_x86_cpuid_s the way openssl asm expects it */
+ _gnutls_x86_cpuid_s[1] = _gnutls_x86_cpuid_s[2];
+
return;
}
diff --git a/lib/accelerated/x86/coff/appro-aes-x86-coff.s b/lib/accelerated/x86/coff/aesni-x86.s
index d00c02f285..1970712d30 100644
--- a/lib/accelerated/x86/coff/appro-aes-x86-coff.s
+++ b/lib/accelerated/x86/coff/aesni-x86.s
@@ -2162,3 +2162,5 @@ _aesni_set_decrypt_key:
.byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
.byte 115,108,46,111,114,103,62,0
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/coff/appro-aes-x86-64-coff.s b/lib/accelerated/x86/coff/aesni-x86_64.s
index 224a226b0d..85b51085a5 100644
--- a/lib/accelerated/x86/coff/appro-aes-x86-64-coff.s
+++ b/lib/accelerated/x86/coff/aesni-x86_64.s
@@ -3420,3 +3420,5 @@ cbc_se_handler:
.LSEH_info_key:
.byte 0x01,0x04,0x01,0x00
.byte 0x04,0x02,0x00,0x00
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/coff/cpuid-x86-coff.s b/lib/accelerated/x86/coff/cpuid-x86.s
index d8074ed62f..f35cfba63a 100644
--- a/lib/accelerated/x86/coff/cpuid-x86-coff.s
+++ b/lib/accelerated/x86/coff/cpuid-x86.s
@@ -68,3 +68,5 @@ _gnutls_have_cpuid:
andl $2097152,%eax
ret
.byte 67,80,85,73,68,32,102,111,114,32,120,56,54,0
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/coff/cpuid-x86-64-coff.s b/lib/accelerated/x86/coff/cpuid-x86_64.s
index 5662a4b586..033df92ebf 100644
--- a/lib/accelerated/x86/coff/cpuid-x86-64-coff.s
+++ b/lib/accelerated/x86/coff/cpuid-x86_64.s
@@ -67,3 +67,5 @@ gnutls_cpuid:
movq 16(%rsp),%rsi
.byte 0xf3,0xc3
.LSEH_end_gnutls_cpuid:
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/coff/padlock-x86-coff.s b/lib/accelerated/x86/coff/e_padlock-x86.s
index d969f307b5..d51d62ff73 100644
--- a/lib/accelerated/x86/coff/padlock-x86-coff.s
+++ b/lib/accelerated/x86/coff/e_padlock-x86.s
@@ -1059,3 +1059,5 @@ _padlock_sha512_blocks:
.align 4
.Lpadlock_saved_context:
.long 0
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/coff/padlock-x86-64-coff.s b/lib/accelerated/x86/coff/e_padlock-x86_64.s
index a3a0e301e7..14c62fd176 100644
--- a/lib/accelerated/x86/coff/padlock-x86-64-coff.s
+++ b/lib/accelerated/x86/coff/e_padlock-x86_64.s
@@ -1187,3 +1187,5 @@ padlock_ctr32_encrypt:
.p2align 3
.Lpadlock_saved_context:
.quad 0
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/coff/appro-aes-gcm-x86-64-coff.s b/lib/accelerated/x86/coff/ghash-x86_64.s
index ceb9108c32..951ee891b9 100644
--- a/lib/accelerated/x86/coff/appro-aes-gcm-x86-64-coff.s
+++ b/lib/accelerated/x86/coff/ghash-x86_64.s
@@ -1525,3 +1525,5 @@ se_handler:
.byte 0x0c,0x78,0x01,0x00
.byte 0x08,0x68,0x00,0x00
.byte 0x04,0x01,0x15,0x00
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/coff/openssl-cpuid-x86.s b/lib/accelerated/x86/coff/openssl-cpuid-x86.s
new file mode 100644
index 0000000000..2e1b08cd9a
--- /dev/null
+++ b/lib/accelerated/x86/coff/openssl-cpuid-x86.s
@@ -0,0 +1,396 @@
+# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain copyright notices,
+# this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+# * Neither the name of the Andy Polyakov nor the names of its
+# copyright holder and contributors may be used to endorse or
+# promote products derived from this software without specific
+# prior written permission.
+#
+# ALTERNATIVELY, provided that this notice is retained in full, this
+# product may be distributed under the terms of the GNU General Public
+# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+# those given above.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *** This file is auto-generated ***
+#
+.file "x86cpuid.s"
+.text
+.globl _OPENSSL_ia32_cpuid
+.def _OPENSSL_ia32_cpuid; .scl 2; .type 32; .endef
+.align 16
+_OPENSSL_ia32_cpuid:
+.L_OPENSSL_ia32_cpuid_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ xorl %edx,%edx
+ pushfl
+ popl %eax
+ movl %eax,%ecx
+ xorl $2097152,%eax
+ pushl %eax
+ popfl
+ pushfl
+ popl %eax
+ xorl %eax,%ecx
+ xorl %eax,%eax
+ btl $21,%ecx
+ jnc .L000nocpuid
+ movl 20(%esp),%esi
+ movl %eax,8(%esi)
+ .byte 0x0f,0xa2
+ movl %eax,%edi
+ xorl %eax,%eax
+ cmpl $1970169159,%ebx
+ setne %al
+ movl %eax,%ebp
+ cmpl $1231384169,%edx
+ setne %al
+ orl %eax,%ebp
+ cmpl $1818588270,%ecx
+ setne %al
+ orl %eax,%ebp
+ jz .L001intel
+ cmpl $1752462657,%ebx
+ setne %al
+ movl %eax,%esi
+ cmpl $1769238117,%edx
+ setne %al
+ orl %eax,%esi
+ cmpl $1145913699,%ecx
+ setne %al
+ orl %eax,%esi
+ jnz .L001intel
+ movl $2147483648,%eax
+ .byte 0x0f,0xa2
+ cmpl $2147483649,%eax
+ jb .L001intel
+ movl %eax,%esi
+ movl $2147483649,%eax
+ .byte 0x0f,0xa2
+ orl %ecx,%ebp
+ andl $2049,%ebp
+ cmpl $2147483656,%esi
+ jb .L001intel
+ movl $2147483656,%eax
+ .byte 0x0f,0xa2
+ movzbl %cl,%esi
+ incl %esi
+ movl $1,%eax
+ xorl %ecx,%ecx
+ .byte 0x0f,0xa2
+ btl $28,%edx
+ jnc .L002generic
+ shrl $16,%ebx
+ andl $255,%ebx
+ cmpl %esi,%ebx
+ ja .L002generic
+ andl $4026531839,%edx
+ jmp .L002generic
+.L001intel:
+ cmpl $7,%edi
+ jb .L003cacheinfo
+ movl 20(%esp),%esi
+ movl $7,%eax
+ xorl %ecx,%ecx
+ .byte 0x0f,0xa2
+ movl %ebx,8(%esi)
+.L003cacheinfo:
+ cmpl $4,%edi
+ movl $-1,%edi
+ jb .L004nocacheinfo
+ movl $4,%eax
+ movl $0,%ecx
+ .byte 0x0f,0xa2
+ movl %eax,%edi
+ shrl $14,%edi
+ andl $4095,%edi
+.L004nocacheinfo:
+ movl $1,%eax
+ xorl %ecx,%ecx
+ .byte 0x0f,0xa2
+ andl $3220176895,%edx
+ cmpl $0,%ebp
+ jne .L005notintel
+ orl $1073741824,%edx
+ andb $15,%ah
+ cmpb $15,%ah
+ jne .L005notintel
+ orl $1048576,%edx
+.L005notintel:
+ btl $28,%edx
+ jnc .L002generic
+ andl $4026531839,%edx
+ cmpl $0,%edi
+ je .L002generic
+ orl $268435456,%edx
+ shrl $16,%ebx
+ cmpb $1,%bl
+ ja .L002generic
+ andl $4026531839,%edx
+.L002generic:
+ andl $2048,%ebp
+ andl $4294965247,%ecx
+ movl %edx,%esi
+ orl %ecx,%ebp
+ btl $27,%ecx
+ jnc .L006clear_avx
+ xorl %ecx,%ecx
+.byte 15,1,208
+ andl $6,%eax
+ cmpl $6,%eax
+ je .L007done
+ cmpl $2,%eax
+ je .L006clear_avx
+.L008clear_xmm:
+ andl $4261412861,%ebp
+ andl $4278190079,%esi
+.L006clear_avx:
+ andl $4026525695,%ebp
+ movl 20(%esp),%edi
+ andl $4294967263,8(%edi)
+.L007done:
+ movl %esi,%eax
+ movl %ebp,%edx
+.L000nocpuid:
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.globl _OPENSSL_rdtsc
+.def _OPENSSL_rdtsc; .scl 2; .type 32; .endef
+.align 16
+_OPENSSL_rdtsc:
+.L_OPENSSL_rdtsc_begin:
+ xorl %eax,%eax
+ xorl %edx,%edx
+ leal __gnutls_x86_cpuid_s,%ecx
+ btl $4,(%ecx)
+ jnc .L009notsc
+ .byte 0x0f,0x31
+.L009notsc:
+ ret
+.globl _OPENSSL_instrument_halt
+.def _OPENSSL_instrument_halt; .scl 2; .type 32; .endef
+.align 16
+_OPENSSL_instrument_halt:
+.L_OPENSSL_instrument_halt_begin:
+ leal __gnutls_x86_cpuid_s,%ecx
+ btl $4,(%ecx)
+ jnc .L010nohalt
+.long 2421723150
+ andl $3,%eax
+ jnz .L010nohalt
+ pushfl
+ popl %eax
+ btl $9,%eax
+ jnc .L010nohalt
+ .byte 0x0f,0x31
+ pushl %edx
+ pushl %eax
+ hlt
+ .byte 0x0f,0x31
+ subl (%esp),%eax
+ sbbl 4(%esp),%edx
+ addl $8,%esp
+ ret
+.L010nohalt:
+ xorl %eax,%eax
+ xorl %edx,%edx
+ ret
+.globl _OPENSSL_far_spin
+.def _OPENSSL_far_spin; .scl 2; .type 32; .endef
+.align 16
+_OPENSSL_far_spin:
+.L_OPENSSL_far_spin_begin:
+ pushfl
+ popl %eax
+ btl $9,%eax
+ jnc .L011nospin
+ movl 4(%esp),%eax
+ movl 8(%esp),%ecx
+.long 2430111262
+ xorl %eax,%eax
+ movl (%ecx),%edx
+ jmp .L012spin
+.align 16
+.L012spin:
+ incl %eax
+ cmpl (%ecx),%edx
+ je .L012spin
+.long 529567888
+ ret
+.L011nospin:
+ xorl %eax,%eax
+ xorl %edx,%edx
+ ret
+.globl _OPENSSL_wipe_cpu
+.def _OPENSSL_wipe_cpu; .scl 2; .type 32; .endef
+.align 16
+_OPENSSL_wipe_cpu:
+.L_OPENSSL_wipe_cpu_begin:
+ xorl %eax,%eax
+ xorl %edx,%edx
+ leal __gnutls_x86_cpuid_s,%ecx
+ movl (%ecx),%ecx
+ btl $1,(%ecx)
+ jnc .L013no_x87
+.long 4007259865,4007259865,4007259865,4007259865,2430851995
+.L013no_x87:
+ leal 4(%esp),%eax
+ ret
+.globl _OPENSSL_atomic_add
+.def _OPENSSL_atomic_add; .scl 2; .type 32; .endef
+.align 16
+_OPENSSL_atomic_add:
+.L_OPENSSL_atomic_add_begin:
+ movl 4(%esp),%edx
+ movl 8(%esp),%ecx
+ pushl %ebx
+ nop
+ movl (%edx),%eax
+.L014spin:
+ leal (%eax,%ecx,1),%ebx
+ nop
+.long 447811568
+ jne .L014spin
+ movl %ebx,%eax
+ popl %ebx
+ ret
+.globl _OPENSSL_indirect_call
+.def _OPENSSL_indirect_call; .scl 2; .type 32; .endef
+.align 16
+_OPENSSL_indirect_call:
+.L_OPENSSL_indirect_call_begin:
+ pushl %ebp
+ movl %esp,%ebp
+ subl $28,%esp
+ movl 12(%ebp),%ecx
+ movl %ecx,(%esp)
+ movl 16(%ebp),%edx
+ movl %edx,4(%esp)
+ movl 20(%ebp),%eax
+ movl %eax,8(%esp)
+ movl 24(%ebp),%eax
+ movl %eax,12(%esp)
+ movl 28(%ebp),%eax
+ movl %eax,16(%esp)
+ movl 32(%ebp),%eax
+ movl %eax,20(%esp)
+ movl 36(%ebp),%eax
+ movl %eax,24(%esp)
+ call *8(%ebp)
+ movl %ebp,%esp
+ popl %ebp
+ ret
+.globl _OPENSSL_cleanse
+.def _OPENSSL_cleanse; .scl 2; .type 32; .endef
+.align 16
+_OPENSSL_cleanse:
+.L_OPENSSL_cleanse_begin:
+ movl 4(%esp),%edx
+ movl 8(%esp),%ecx
+ xorl %eax,%eax
+ cmpl $7,%ecx
+ jae .L015lot
+ cmpl $0,%ecx
+ je .L016ret
+.L017little:
+ movb %al,(%edx)
+ subl $1,%ecx
+ leal 1(%edx),%edx
+ jnz .L017little
+.L016ret:
+ ret
+.align 16
+.L015lot:
+ testl $3,%edx
+ jz .L018aligned
+ movb %al,(%edx)
+ leal -1(%ecx),%ecx
+ leal 1(%edx),%edx
+ jmp .L015lot
+.L018aligned:
+ movl %eax,(%edx)
+ leal -4(%ecx),%ecx
+ testl $-4,%ecx
+ leal 4(%edx),%edx
+ jnz .L018aligned
+ cmpl $0,%ecx
+ jne .L017little
+ ret
+.globl _OPENSSL_instrument_bus
+.def _OPENSSL_instrument_bus; .scl 2; .type 32; .endef
+.align 16
+_OPENSSL_instrument_bus:
+.L_OPENSSL_instrument_bus_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl $0,%eax
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.globl _OPENSSL_instrument_bus2
+.def _OPENSSL_instrument_bus2; .scl 2; .type 32; .endef
+.align 16
+_OPENSSL_instrument_bus2:
+.L_OPENSSL_instrument_bus2_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl $0,%eax
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.globl _OPENSSL_ia32_rdrand
+.def _OPENSSL_ia32_rdrand; .scl 2; .type 32; .endef
+.align 16
+_OPENSSL_ia32_rdrand:
+.L_OPENSSL_ia32_rdrand_begin:
+ movl $8,%ecx
+.L019loop:
+.byte 15,199,240
+ jc .L020break
+ loop .L019loop
+.L020break:
+ cmpl $0,%eax
+ cmovel %ecx,%eax
+ ret
+.comm __gnutls_x86_cpuid_s,16
+.section .ctors
+.long _OPENSSL_cpuid_setup
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/coff/openssl-cpuid-x86_64.s b/lib/accelerated/x86/coff/openssl-cpuid-x86_64.s
new file mode 100644
index 0000000000..d1439e8c13
--- /dev/null
+++ b/lib/accelerated/x86/coff/openssl-cpuid-x86_64.s
@@ -0,0 +1,361 @@
+# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain copyright notices,
+# this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+# * Neither the name of the Andy Polyakov nor the names of its
+# copyright holder and contributors may be used to endorse or
+# promote products derived from this software without specific
+# prior written permission.
+#
+# ALTERNATIVELY, provided that this notice is retained in full, this
+# product may be distributed under the terms of the GNU General Public
+# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+# those given above.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *** This file is auto-generated ***
+#
+
+
+.section .ctors
+ .p2align 3
+ .quad OPENSSL_cpuid_setup
+
+
+.comm _gnutls_x86_cpuid_s,16,4
+
+.text
+
+.globl OPENSSL_atomic_add
+.def OPENSSL_atomic_add; .scl 2; .type 32; .endef
+.p2align 4
+OPENSSL_atomic_add:
+ movl (%rcx),%eax
+.Lspin: leaq (%rdx,%rax,1),%r8
+.byte 0xf0
+ cmpxchgl %r8d,(%rcx)
+ jne .Lspin
+ movl %r8d,%eax
+.byte 0x48,0x98
+ .byte 0xf3,0xc3
+
+
+.globl OPENSSL_rdtsc
+.def OPENSSL_rdtsc; .scl 2; .type 32; .endef
+.p2align 4
+OPENSSL_rdtsc:
+ rdtsc
+ shlq $32,%rdx
+ orq %rdx,%rax
+ .byte 0xf3,0xc3
+
+
+.globl OPENSSL_ia32_cpuid
+.def OPENSSL_ia32_cpuid; .scl 2; .type 32; .endef
+.p2align 4
+OPENSSL_ia32_cpuid:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_OPENSSL_ia32_cpuid:
+ movq %rcx,%rdi
+
+ movq %rbx,%r8
+
+ xorl %eax,%eax
+ movl %eax,8(%rdi)
+ cpuid
+ movl %eax,%r11d
+
+ xorl %eax,%eax
+ cmpl $1970169159,%ebx
+ setne %al
+ movl %eax,%r9d
+ cmpl $1231384169,%edx
+ setne %al
+ orl %eax,%r9d
+ cmpl $1818588270,%ecx
+ setne %al
+ orl %eax,%r9d
+ jz .Lintel
+
+ cmpl $1752462657,%ebx
+ setne %al
+ movl %eax,%r10d
+ cmpl $1769238117,%edx
+ setne %al
+ orl %eax,%r10d
+ cmpl $1145913699,%ecx
+ setne %al
+ orl %eax,%r10d
+ jnz .Lintel
+
+
+ movl $2147483648,%eax
+ cpuid
+ cmpl $2147483649,%eax
+ jb .Lintel
+ movl %eax,%r10d
+ movl $2147483649,%eax
+ cpuid
+ orl %ecx,%r9d
+ andl $2049,%r9d
+
+ cmpl $2147483656,%r10d
+ jb .Lintel
+
+ movl $2147483656,%eax
+ cpuid
+ movzbq %cl,%r10
+ incq %r10
+
+ movl $1,%eax
+ cpuid
+ btl $28,%edx
+ jnc .Lgeneric
+ shrl $16,%ebx
+ cmpb %r10b,%bl
+ ja .Lgeneric
+ andl $4026531839,%edx
+ jmp .Lgeneric
+
+.Lintel:
+ cmpl $4,%r11d
+ movl $-1,%r10d
+ jb .Lnocacheinfo
+
+ movl $4,%eax
+ movl $0,%ecx
+ cpuid
+ movl %eax,%r10d
+ shrl $14,%r10d
+ andl $4095,%r10d
+
+ cmpl $7,%r11d
+ jb .Lnocacheinfo
+
+ movl $7,%eax
+ xorl %ecx,%ecx
+ cpuid
+ movl %ebx,8(%rdi)
+
+.Lnocacheinfo:
+ movl $1,%eax
+ cpuid
+ andl $3220176895,%edx
+ cmpl $0,%r9d
+ jne .Lnotintel
+ orl $1073741824,%edx
+ andb $15,%ah
+ cmpb $15,%ah
+ jne .Lnotintel
+ orl $1048576,%edx
+.Lnotintel:
+ btl $28,%edx
+ jnc .Lgeneric
+ andl $4026531839,%edx
+ cmpl $0,%r10d
+ je .Lgeneric
+
+ orl $268435456,%edx
+ shrl $16,%ebx
+ cmpb $1,%bl
+ ja .Lgeneric
+ andl $4026531839,%edx
+.Lgeneric:
+ andl $2048,%r9d
+ andl $4294965247,%ecx
+ orl %ecx,%r9d
+
+ movl %edx,%r10d
+ btl $27,%r9d
+ jnc .Lclear_avx
+ xorl %ecx,%ecx
+.byte 0x0f,0x01,0xd0
+ andl $6,%eax
+ cmpl $6,%eax
+ je .Ldone
+.Lclear_avx:
+ movl $4026525695,%eax
+ andl %eax,%r9d
+ andl $4294967263,8(%rdi)
+.Ldone:
+ shlq $32,%r9
+ movl %r10d,%eax
+ movq %r8,%rbx
+ orq %r9,%rax
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ .byte 0xf3,0xc3
+.LSEH_end_OPENSSL_ia32_cpuid:
+
+.globl OPENSSL_cleanse
+.def OPENSSL_cleanse; .scl 2; .type 32; .endef
+.p2align 4
+OPENSSL_cleanse:
+ xorq %rax,%rax
+ cmpq $15,%rdx
+ jae .Lot
+ cmpq $0,%rdx
+ je .Lret
+.Little:
+ movb %al,(%rcx)
+ subq $1,%rdx
+ leaq 1(%rcx),%rcx
+ jnz .Little
+.Lret:
+ .byte 0xf3,0xc3
+.p2align 4
+.Lot:
+ testq $7,%rcx
+ jz .Laligned
+ movb %al,(%rcx)
+ leaq -1(%rdx),%rdx
+ leaq 1(%rcx),%rcx
+ jmp .Lot
+.Laligned:
+ movq %rax,(%rcx)
+ leaq -8(%rdx),%rdx
+ testq $-8,%rdx
+ leaq 8(%rcx),%rcx
+ jnz .Laligned
+ cmpq $0,%rdx
+ jne .Little
+ .byte 0xf3,0xc3
+
+.globl OPENSSL_wipe_cpu
+.def OPENSSL_wipe_cpu; .scl 2; .type 32; .endef
+.p2align 4
+OPENSSL_wipe_cpu:
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ xorq %rcx,%rcx
+ xorq %rdx,%rdx
+ xorq %r8,%r8
+ xorq %r9,%r9
+ xorq %r10,%r10
+ xorq %r11,%r11
+ leaq 8(%rsp),%rax
+ .byte 0xf3,0xc3
+
+.globl OPENSSL_instrument_bus
+.def OPENSSL_instrument_bus; .scl 2; .type 32; .endef
+.p2align 4
+OPENSSL_instrument_bus:
+ movq %rcx,%r10
+ movq %rdx,%rcx
+ movq %rdx,%r11
+
+ rdtsc
+ movl %eax,%r8d
+ movl $0,%r9d
+ clflush (%r10)
+.byte 0xf0
+ addl %r9d,(%r10)
+ jmp .Loop
+.p2align 4
+.Loop: rdtsc
+ movl %eax,%edx
+ subl %r8d,%eax
+ movl %edx,%r8d
+ movl %eax,%r9d
+ clflush (%r10)
+.byte 0xf0
+ addl %eax,(%r10)
+ leaq 4(%r10),%r10
+ subq $1,%rcx
+ jnz .Loop
+
+ movq %r11,%rax
+ .byte 0xf3,0xc3
+
+
+.globl OPENSSL_instrument_bus2
+.def OPENSSL_instrument_bus2; .scl 2; .type 32; .endef
+.p2align 4
+OPENSSL_instrument_bus2:
+ movq %rcx,%r10
+ movq %rdx,%rcx
+ movq %r8,%r11
+ movq %rcx,8(%rsp)
+
+ rdtsc
+ movl %eax,%r8d
+ movl $0,%r9d
+
+ clflush (%r10)
+.byte 0xf0
+ addl %r9d,(%r10)
+
+ rdtsc
+ movl %eax,%edx
+ subl %r8d,%eax
+ movl %edx,%r8d
+ movl %eax,%r9d
+.Loop2:
+ clflush (%r10)
+.byte 0xf0
+ addl %eax,(%r10)
+
+ subq $1,%r11
+ jz .Ldone2
+
+ rdtsc
+ movl %eax,%edx
+ subl %r8d,%eax
+ movl %edx,%r8d
+ cmpl %r9d,%eax
+ movl %eax,%r9d
+ movl $0,%edx
+ setne %dl
+ subq %rdx,%rcx
+ leaq (%r10,%rdx,4),%r10
+ jnz .Loop2
+
+.Ldone2:
+ movq 8(%rsp),%rax
+ subq %rcx,%rax
+ .byte 0xf3,0xc3
+
+.globl OPENSSL_ia32_rdrand
+.def OPENSSL_ia32_rdrand; .scl 2; .type 32; .endef
+.p2align 4
+OPENSSL_ia32_rdrand:
+ movl $8,%ecx
+.Loop_rdrand:
+.byte 72,15,199,240
+ jc .Lbreak_rdrand
+ loop .Loop_rdrand
+.Lbreak_rdrand:
+ cmpq $0,%rax
+ cmoveq %rcx,%rax
+ .byte 0xf3,0xc3
+
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/coff/sha1-ssse3-x86.s b/lib/accelerated/x86/coff/sha1-ssse3-x86.s
new file mode 100644
index 0000000000..9bd41a0de4
--- /dev/null
+++ b/lib/accelerated/x86/coff/sha1-ssse3-x86.s
@@ -0,0 +1,1420 @@
+# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain copyright notices,
+# this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+# * Neither the name of the Andy Polyakov nor the names of its
+# copyright holder and contributors may be used to endorse or
+# promote products derived from this software without specific
+# prior written permission.
+#
+# ALTERNATIVELY, provided that this notice is retained in full, this
+# product may be distributed under the terms of the GNU General Public
+# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+# those given above.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *** This file is auto-generated ***
+#
+.file "sha1-586.s"
+.text
+.globl _sha1_block_data_order
+.def _sha1_block_data_order; .scl 2; .type 32; .endef
+.align 16
+_sha1_block_data_order:
+.L_sha1_block_data_order_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 20(%esp),%ebp
+ movl 24(%esp),%esi
+ movl 28(%esp),%eax
+ subl $76,%esp
+ shll $6,%eax
+ addl %esi,%eax
+ movl %eax,104(%esp)
+ movl 16(%ebp),%edi
+ jmp .L000loop
+.align 16
+.L000loop:
+ movl (%esi),%eax
+ movl 4(%esi),%ebx
+ movl 8(%esi),%ecx
+ movl 12(%esi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ movl %eax,(%esp)
+ movl %ebx,4(%esp)
+ movl %ecx,8(%esp)
+ movl %edx,12(%esp)
+ movl 16(%esi),%eax
+ movl 20(%esi),%ebx
+ movl 24(%esi),%ecx
+ movl 28(%esi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ movl %eax,16(%esp)
+ movl %ebx,20(%esp)
+ movl %ecx,24(%esp)
+ movl %edx,28(%esp)
+ movl 32(%esi),%eax
+ movl 36(%esi),%ebx
+ movl 40(%esi),%ecx
+ movl 44(%esi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ movl %eax,32(%esp)
+ movl %ebx,36(%esp)
+ movl %ecx,40(%esp)
+ movl %edx,44(%esp)
+ movl 48(%esi),%eax
+ movl 52(%esi),%ebx
+ movl 56(%esi),%ecx
+ movl 60(%esi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ movl %eax,48(%esp)
+ movl %ebx,52(%esp)
+ movl %ecx,56(%esp)
+ movl %edx,60(%esp)
+ movl %esi,100(%esp)
+ movl (%ebp),%eax
+ movl 4(%ebp),%ebx
+ movl 8(%ebp),%ecx
+ movl 12(%ebp),%edx
+ # 00_15 0
+ movl %ecx,%esi
+ movl %eax,%ebp
+ roll $5,%ebp
+ xorl %edx,%esi
+ addl %edi,%ebp
+ movl (%esp),%edi
+ andl %ebx,%esi
+ rorl $2,%ebx
+ xorl %edx,%esi
+ leal 1518500249(%ebp,%edi,1),%ebp
+ addl %esi,%ebp
+ # 00_15 1
+ movl %ebx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ecx,%edi
+ addl %edx,%ebp
+ movl 4(%esp),%edx
+ andl %eax,%edi
+ rorl $2,%eax
+ xorl %ecx,%edi
+ leal 1518500249(%ebp,%edx,1),%ebp
+ addl %edi,%ebp
+ # 00_15 2
+ movl %eax,%edx
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%edx
+ addl %ecx,%ebp
+ movl 8(%esp),%ecx
+ andl %esi,%edx
+ rorl $2,%esi
+ xorl %ebx,%edx
+ leal 1518500249(%ebp,%ecx,1),%ebp
+ addl %edx,%ebp
+ # 00_15 3
+ movl %esi,%ecx
+ movl %ebp,%edx
+ roll $5,%ebp
+ xorl %eax,%ecx
+ addl %ebx,%ebp
+ movl 12(%esp),%ebx
+ andl %edi,%ecx
+ rorl $2,%edi
+ xorl %eax,%ecx
+ leal 1518500249(%ebp,%ebx,1),%ebp
+ addl %ecx,%ebp
+ # 00_15 4
+ movl %edi,%ebx
+ movl %ebp,%ecx
+ roll $5,%ebp
+ xorl %esi,%ebx
+ addl %eax,%ebp
+ movl 16(%esp),%eax
+ andl %edx,%ebx
+ rorl $2,%edx
+ xorl %esi,%ebx
+ leal 1518500249(%ebp,%eax,1),%ebp
+ addl %ebx,%ebp
+ # 00_15 5
+ movl %edx,%eax
+ movl %ebp,%ebx
+ roll $5,%ebp
+ xorl %edi,%eax
+ addl %esi,%ebp
+ movl 20(%esp),%esi
+ andl %ecx,%eax
+ rorl $2,%ecx
+ xorl %edi,%eax
+ leal 1518500249(%ebp,%esi,1),%ebp
+ addl %eax,%ebp
+ # 00_15 6
+ movl %ecx,%esi
+ movl %ebp,%eax
+ roll $5,%ebp
+ xorl %edx,%esi
+ addl %edi,%ebp
+ movl 24(%esp),%edi
+ andl %ebx,%esi
+ rorl $2,%ebx
+ xorl %edx,%esi
+ leal 1518500249(%ebp,%edi,1),%ebp
+ addl %esi,%ebp
+ # 00_15 7
+ movl %ebx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ecx,%edi
+ addl %edx,%ebp
+ movl 28(%esp),%edx
+ andl %eax,%edi
+ rorl $2,%eax
+ xorl %ecx,%edi
+ leal 1518500249(%ebp,%edx,1),%ebp
+ addl %edi,%ebp
+ # 00_15 8
+ movl %eax,%edx
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%edx
+ addl %ecx,%ebp
+ movl 32(%esp),%ecx
+ andl %esi,%edx
+ rorl $2,%esi
+ xorl %ebx,%edx
+ leal 1518500249(%ebp,%ecx,1),%ebp
+ addl %edx,%ebp
+ # 00_15 9
+ movl %esi,%ecx
+ movl %ebp,%edx
+ roll $5,%ebp
+ xorl %eax,%ecx
+ addl %ebx,%ebp
+ movl 36(%esp),%ebx
+ andl %edi,%ecx
+ rorl $2,%edi
+ xorl %eax,%ecx
+ leal 1518500249(%ebp,%ebx,1),%ebp
+ addl %ecx,%ebp
+ # 00_15 10
+ movl %edi,%ebx
+ movl %ebp,%ecx
+ roll $5,%ebp
+ xorl %esi,%ebx
+ addl %eax,%ebp
+ movl 40(%esp),%eax
+ andl %edx,%ebx
+ rorl $2,%edx
+ xorl %esi,%ebx
+ leal 1518500249(%ebp,%eax,1),%ebp
+ addl %ebx,%ebp
+ # 00_15 11
+ movl %edx,%eax
+ movl %ebp,%ebx
+ roll $5,%ebp
+ xorl %edi,%eax
+ addl %esi,%ebp
+ movl 44(%esp),%esi
+ andl %ecx,%eax
+ rorl $2,%ecx
+ xorl %edi,%eax
+ leal 1518500249(%ebp,%esi,1),%ebp
+ addl %eax,%ebp
+ # 00_15 12
+ movl %ecx,%esi
+ movl %ebp,%eax
+ roll $5,%ebp
+ xorl %edx,%esi
+ addl %edi,%ebp
+ movl 48(%esp),%edi
+ andl %ebx,%esi
+ rorl $2,%ebx
+ xorl %edx,%esi
+ leal 1518500249(%ebp,%edi,1),%ebp
+ addl %esi,%ebp
+ # 00_15 13
+ movl %ebx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ecx,%edi
+ addl %edx,%ebp
+ movl 52(%esp),%edx
+ andl %eax,%edi
+ rorl $2,%eax
+ xorl %ecx,%edi
+ leal 1518500249(%ebp,%edx,1),%ebp
+ addl %edi,%ebp
+ # 00_15 14
+ movl %eax,%edx
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%edx
+ addl %ecx,%ebp
+ movl 56(%esp),%ecx
+ andl %esi,%edx
+ rorl $2,%esi
+ xorl %ebx,%edx
+ leal 1518500249(%ebp,%ecx,1),%ebp
+ addl %edx,%ebp
+ # 00_15 15
+ movl %esi,%ecx
+ movl %ebp,%edx
+ roll $5,%ebp
+ xorl %eax,%ecx
+ addl %ebx,%ebp
+ movl 60(%esp),%ebx
+ andl %edi,%ecx
+ rorl $2,%edi
+ xorl %eax,%ecx
+ leal 1518500249(%ebp,%ebx,1),%ebp
+ movl (%esp),%ebx
+ addl %ebp,%ecx
+ # 16_19 16
+ movl %edi,%ebp
+ xorl 8(%esp),%ebx
+ xorl %esi,%ebp
+ xorl 32(%esp),%ebx
+ andl %edx,%ebp
+ xorl 52(%esp),%ebx
+ roll $1,%ebx
+ xorl %esi,%ebp
+ addl %ebp,%eax
+ movl %ecx,%ebp
+ rorl $2,%edx
+ movl %ebx,(%esp)
+ roll $5,%ebp
+ leal 1518500249(%ebx,%eax,1),%ebx
+ movl 4(%esp),%eax
+ addl %ebp,%ebx
+ # 16_19 17
+ movl %edx,%ebp
+ xorl 12(%esp),%eax
+ xorl %edi,%ebp
+ xorl 36(%esp),%eax
+ andl %ecx,%ebp
+ xorl 56(%esp),%eax
+ roll $1,%eax
+ xorl %edi,%ebp
+ addl %ebp,%esi
+ movl %ebx,%ebp
+ rorl $2,%ecx
+ movl %eax,4(%esp)
+ roll $5,%ebp
+ leal 1518500249(%eax,%esi,1),%eax
+ movl 8(%esp),%esi
+ addl %ebp,%eax
+ # 16_19 18
+ movl %ecx,%ebp
+ xorl 16(%esp),%esi
+ xorl %edx,%ebp
+ xorl 40(%esp),%esi
+ andl %ebx,%ebp
+ xorl 60(%esp),%esi
+ roll $1,%esi
+ xorl %edx,%ebp
+ addl %ebp,%edi
+ movl %eax,%ebp
+ rorl $2,%ebx
+ movl %esi,8(%esp)
+ roll $5,%ebp
+ leal 1518500249(%esi,%edi,1),%esi
+ movl 12(%esp),%edi
+ addl %ebp,%esi
+ # 16_19 19
+ movl %ebx,%ebp
+ xorl 20(%esp),%edi
+ xorl %ecx,%ebp
+ xorl 44(%esp),%edi
+ andl %eax,%ebp
+ xorl (%esp),%edi
+ roll $1,%edi
+ xorl %ecx,%ebp
+ addl %ebp,%edx
+ movl %esi,%ebp
+ rorl $2,%eax
+ movl %edi,12(%esp)
+ roll $5,%ebp
+ leal 1518500249(%edi,%edx,1),%edi
+ movl 16(%esp),%edx
+ addl %ebp,%edi
+ # 20_39 20
+ movl %esi,%ebp
+ xorl 24(%esp),%edx
+ xorl %eax,%ebp
+ xorl 48(%esp),%edx
+ xorl %ebx,%ebp
+ xorl 4(%esp),%edx
+ roll $1,%edx
+ addl %ebp,%ecx
+ rorl $2,%esi
+ movl %edi,%ebp
+ roll $5,%ebp
+ movl %edx,16(%esp)
+ leal 1859775393(%edx,%ecx,1),%edx
+ movl 20(%esp),%ecx
+ addl %ebp,%edx
+ # 20_39 21
+ movl %edi,%ebp
+ xorl 28(%esp),%ecx
+ xorl %esi,%ebp
+ xorl 52(%esp),%ecx
+ xorl %eax,%ebp
+ xorl 8(%esp),%ecx
+ roll $1,%ecx
+ addl %ebp,%ebx
+ rorl $2,%edi
+ movl %edx,%ebp
+ roll $5,%ebp
+ movl %ecx,20(%esp)
+ leal 1859775393(%ecx,%ebx,1),%ecx
+ movl 24(%esp),%ebx
+ addl %ebp,%ecx
+ # 20_39 22
+ movl %edx,%ebp
+ xorl 32(%esp),%ebx
+ xorl %edi,%ebp
+ xorl 56(%esp),%ebx
+ xorl %esi,%ebp
+ xorl 12(%esp),%ebx
+ roll $1,%ebx
+ addl %ebp,%eax
+ rorl $2,%edx
+ movl %ecx,%ebp
+ roll $5,%ebp
+ movl %ebx,24(%esp)
+ leal 1859775393(%ebx,%eax,1),%ebx
+ movl 28(%esp),%eax
+ addl %ebp,%ebx
+ # 20_39 23
+ movl %ecx,%ebp
+ xorl 36(%esp),%eax
+ xorl %edx,%ebp
+ xorl 60(%esp),%eax
+ xorl %edi,%ebp
+ xorl 16(%esp),%eax
+ roll $1,%eax
+ addl %ebp,%esi
+ rorl $2,%ecx
+ movl %ebx,%ebp
+ roll $5,%ebp
+ movl %eax,28(%esp)
+ leal 1859775393(%eax,%esi,1),%eax
+ movl 32(%esp),%esi
+ addl %ebp,%eax
+ # 20_39 24
+ movl %ebx,%ebp
+ xorl 40(%esp),%esi
+ xorl %ecx,%ebp
+ xorl (%esp),%esi
+ xorl %edx,%ebp
+ xorl 20(%esp),%esi
+ roll $1,%esi
+ addl %ebp,%edi
+ rorl $2,%ebx
+ movl %eax,%ebp
+ roll $5,%ebp
+ movl %esi,32(%esp)
+ leal 1859775393(%esi,%edi,1),%esi
+ movl 36(%esp),%edi
+ addl %ebp,%esi
+ # 20_39 25
+ movl %eax,%ebp
+ xorl 44(%esp),%edi
+ xorl %ebx,%ebp
+ xorl 4(%esp),%edi
+ xorl %ecx,%ebp
+ xorl 24(%esp),%edi
+ roll $1,%edi
+ addl %ebp,%edx
+ rorl $2,%eax
+ movl %esi,%ebp
+ roll $5,%ebp
+ movl %edi,36(%esp)
+ leal 1859775393(%edi,%edx,1),%edi
+ movl 40(%esp),%edx
+ addl %ebp,%edi
+ # 20_39 26
+ movl %esi,%ebp
+ xorl 48(%esp),%edx
+ xorl %eax,%ebp
+ xorl 8(%esp),%edx
+ xorl %ebx,%ebp
+ xorl 28(%esp),%edx
+ roll $1,%edx
+ addl %ebp,%ecx
+ rorl $2,%esi
+ movl %edi,%ebp
+ roll $5,%ebp
+ movl %edx,40(%esp)
+ leal 1859775393(%edx,%ecx,1),%edx
+ movl 44(%esp),%ecx
+ addl %ebp,%edx
+ # 20_39 27
+ movl %edi,%ebp
+ xorl 52(%esp),%ecx
+ xorl %esi,%ebp
+ xorl 12(%esp),%ecx
+ xorl %eax,%ebp
+ xorl 32(%esp),%ecx
+ roll $1,%ecx
+ addl %ebp,%ebx
+ rorl $2,%edi
+ movl %edx,%ebp
+ roll $5,%ebp
+ movl %ecx,44(%esp)
+ leal 1859775393(%ecx,%ebx,1),%ecx
+ movl 48(%esp),%ebx
+ addl %ebp,%ecx
+ # 20_39 28
+ movl %edx,%ebp
+ xorl 56(%esp),%ebx
+ xorl %edi,%ebp
+ xorl 16(%esp),%ebx
+ xorl %esi,%ebp
+ xorl 36(%esp),%ebx
+ roll $1,%ebx
+ addl %ebp,%eax
+ rorl $2,%edx
+ movl %ecx,%ebp
+ roll $5,%ebp
+ movl %ebx,48(%esp)
+ leal 1859775393(%ebx,%eax,1),%ebx
+ movl 52(%esp),%eax
+ addl %ebp,%ebx
+ # 20_39 29
+ movl %ecx,%ebp
+ xorl 60(%esp),%eax
+ xorl %edx,%ebp
+ xorl 20(%esp),%eax
+ xorl %edi,%ebp
+ xorl 40(%esp),%eax
+ roll $1,%eax
+ addl %ebp,%esi
+ rorl $2,%ecx
+ movl %ebx,%ebp
+ roll $5,%ebp
+ movl %eax,52(%esp)
+ leal 1859775393(%eax,%esi,1),%eax
+ movl 56(%esp),%esi
+ addl %ebp,%eax
+ # 20_39 30
+ movl %ebx,%ebp
+ xorl (%esp),%esi
+ xorl %ecx,%ebp
+ xorl 24(%esp),%esi
+ xorl %edx,%ebp
+ xorl 44(%esp),%esi
+ roll $1,%esi
+ addl %ebp,%edi
+ rorl $2,%ebx
+ movl %eax,%ebp
+ roll $5,%ebp
+ movl %esi,56(%esp)
+ leal 1859775393(%esi,%edi,1),%esi
+ movl 60(%esp),%edi
+ addl %ebp,%esi
+ # 20_39 31
+ movl %eax,%ebp
+ xorl 4(%esp),%edi
+ xorl %ebx,%ebp
+ xorl 28(%esp),%edi
+ xorl %ecx,%ebp
+ xorl 48(%esp),%edi
+ roll $1,%edi
+ addl %ebp,%edx
+ rorl $2,%eax
+ movl %esi,%ebp
+ roll $5,%ebp
+ movl %edi,60(%esp)
+ leal 1859775393(%edi,%edx,1),%edi
+ movl (%esp),%edx
+ addl %ebp,%edi
+ # 20_39 32
+ movl %esi,%ebp
+ xorl 8(%esp),%edx
+ xorl %eax,%ebp
+ xorl 32(%esp),%edx
+ xorl %ebx,%ebp
+ xorl 52(%esp),%edx
+ roll $1,%edx
+ addl %ebp,%ecx
+ rorl $2,%esi
+ movl %edi,%ebp
+ roll $5,%ebp
+ movl %edx,(%esp)
+ leal 1859775393(%edx,%ecx,1),%edx
+ movl 4(%esp),%ecx
+ addl %ebp,%edx
+ # 20_39 33
+ movl %edi,%ebp
+ xorl 12(%esp),%ecx
+ xorl %esi,%ebp
+ xorl 36(%esp),%ecx
+ xorl %eax,%ebp
+ xorl 56(%esp),%ecx
+ roll $1,%ecx
+ addl %ebp,%ebx
+ rorl $2,%edi
+ movl %edx,%ebp
+ roll $5,%ebp
+ movl %ecx,4(%esp)
+ leal 1859775393(%ecx,%ebx,1),%ecx
+ movl 8(%esp),%ebx
+ addl %ebp,%ecx
+ # 20_39 34
+ movl %edx,%ebp
+ xorl 16(%esp),%ebx
+ xorl %edi,%ebp
+ xorl 40(%esp),%ebx
+ xorl %esi,%ebp
+ xorl 60(%esp),%ebx
+ roll $1,%ebx
+ addl %ebp,%eax
+ rorl $2,%edx
+ movl %ecx,%ebp
+ roll $5,%ebp
+ movl %ebx,8(%esp)
+ leal 1859775393(%ebx,%eax,1),%ebx
+ movl 12(%esp),%eax
+ addl %ebp,%ebx
+ # 20_39 35
+ movl %ecx,%ebp
+ xorl 20(%esp),%eax
+ xorl %edx,%ebp
+ xorl 44(%esp),%eax
+ xorl %edi,%ebp
+ xorl (%esp),%eax
+ roll $1,%eax
+ addl %ebp,%esi
+ rorl $2,%ecx
+ movl %ebx,%ebp
+ roll $5,%ebp
+ movl %eax,12(%esp)
+ leal 1859775393(%eax,%esi,1),%eax
+ movl 16(%esp),%esi
+ addl %ebp,%eax
+ # 20_39 36
+ movl %ebx,%ebp
+ xorl 24(%esp),%esi
+ xorl %ecx,%ebp
+ xorl 48(%esp),%esi
+ xorl %edx,%ebp
+ xorl 4(%esp),%esi
+ roll $1,%esi
+ addl %ebp,%edi
+ rorl $2,%ebx
+ movl %eax,%ebp
+ roll $5,%ebp
+ movl %esi,16(%esp)
+ leal 1859775393(%esi,%edi,1),%esi
+ movl 20(%esp),%edi
+ addl %ebp,%esi
+ # 20_39 37
+ movl %eax,%ebp
+ xorl 28(%esp),%edi
+ xorl %ebx,%ebp
+ xorl 52(%esp),%edi
+ xorl %ecx,%ebp
+ xorl 8(%esp),%edi
+ roll $1,%edi
+ addl %ebp,%edx
+ rorl $2,%eax
+ movl %esi,%ebp
+ roll $5,%ebp
+ movl %edi,20(%esp)
+ leal 1859775393(%edi,%edx,1),%edi
+ movl 24(%esp),%edx
+ addl %ebp,%edi
+ # 20_39 38
+ movl %esi,%ebp
+ xorl 32(%esp),%edx
+ xorl %eax,%ebp
+ xorl 56(%esp),%edx
+ xorl %ebx,%ebp
+ xorl 12(%esp),%edx
+ roll $1,%edx
+ addl %ebp,%ecx
+ rorl $2,%esi
+ movl %edi,%ebp
+ roll $5,%ebp
+ movl %edx,24(%esp)
+ leal 1859775393(%edx,%ecx,1),%edx
+ movl 28(%esp),%ecx
+ addl %ebp,%edx
+ # 20_39 39
+ movl %edi,%ebp
+ xorl 36(%esp),%ecx
+ xorl %esi,%ebp
+ xorl 60(%esp),%ecx
+ xorl %eax,%ebp
+ xorl 16(%esp),%ecx
+ roll $1,%ecx
+ addl %ebp,%ebx
+ rorl $2,%edi
+ movl %edx,%ebp
+ roll $5,%ebp
+ movl %ecx,28(%esp)
+ leal 1859775393(%ecx,%ebx,1),%ecx
+ movl 32(%esp),%ebx
+ addl %ebp,%ecx
+ # 40_59 40
+ movl %edi,%ebp
+ xorl 40(%esp),%ebx
+ xorl %esi,%ebp
+ xorl (%esp),%ebx
+ andl %edx,%ebp
+ xorl 20(%esp),%ebx
+ roll $1,%ebx
+ addl %eax,%ebp
+ rorl $2,%edx
+ movl %ecx,%eax
+ roll $5,%eax
+ movl %ebx,32(%esp)
+ leal 2400959708(%ebx,%ebp,1),%ebx
+ movl %edi,%ebp
+ addl %eax,%ebx
+ andl %esi,%ebp
+ movl 36(%esp),%eax
+ addl %ebp,%ebx
+ # 40_59 41
+ movl %edx,%ebp
+ xorl 44(%esp),%eax
+ xorl %edi,%ebp
+ xorl 4(%esp),%eax
+ andl %ecx,%ebp
+ xorl 24(%esp),%eax
+ roll $1,%eax
+ addl %esi,%ebp
+ rorl $2,%ecx
+ movl %ebx,%esi
+ roll $5,%esi
+ movl %eax,36(%esp)
+ leal 2400959708(%eax,%ebp,1),%eax
+ movl %edx,%ebp
+ addl %esi,%eax
+ andl %edi,%ebp
+ movl 40(%esp),%esi
+ addl %ebp,%eax
+ # 40_59 42
+ movl %ecx,%ebp
+ xorl 48(%esp),%esi
+ xorl %edx,%ebp
+ xorl 8(%esp),%esi
+ andl %ebx,%ebp
+ xorl 28(%esp),%esi
+ roll $1,%esi
+ addl %edi,%ebp
+ rorl $2,%ebx
+ movl %eax,%edi
+ roll $5,%edi
+ movl %esi,40(%esp)
+ leal 2400959708(%esi,%ebp,1),%esi
+ movl %ecx,%ebp
+ addl %edi,%esi
+ andl %edx,%ebp
+ movl 44(%esp),%edi
+ addl %ebp,%esi
+ # 40_59 43
+ movl %ebx,%ebp
+ xorl 52(%esp),%edi
+ xorl %ecx,%ebp
+ xorl 12(%esp),%edi
+ andl %eax,%ebp
+ xorl 32(%esp),%edi
+ roll $1,%edi
+ addl %edx,%ebp
+ rorl $2,%eax
+ movl %esi,%edx
+ roll $5,%edx
+ movl %edi,44(%esp)
+ leal 2400959708(%edi,%ebp,1),%edi
+ movl %ebx,%ebp
+ addl %edx,%edi
+ andl %ecx,%ebp
+ movl 48(%esp),%edx
+ addl %ebp,%edi
+ # 40_59 44
+ movl %eax,%ebp
+ xorl 56(%esp),%edx
+ xorl %ebx,%ebp
+ xorl 16(%esp),%edx
+ andl %esi,%ebp
+ xorl 36(%esp),%edx
+ roll $1,%edx
+ addl %ecx,%ebp
+ rorl $2,%esi
+ movl %edi,%ecx
+ roll $5,%ecx
+ movl %edx,48(%esp)
+ leal 2400959708(%edx,%ebp,1),%edx
+ movl %eax,%ebp
+ addl %ecx,%edx
+ andl %ebx,%ebp
+ movl 52(%esp),%ecx
+ addl %ebp,%edx
+ # 40_59 45
+ movl %esi,%ebp
+ xorl 60(%esp),%ecx
+ xorl %eax,%ebp
+ xorl 20(%esp),%ecx
+ andl %edi,%ebp
+ xorl 40(%esp),%ecx
+ roll $1,%ecx
+ addl %ebx,%ebp
+ rorl $2,%edi
+ movl %edx,%ebx
+ roll $5,%ebx
+ movl %ecx,52(%esp)
+ leal 2400959708(%ecx,%ebp,1),%ecx
+ movl %esi,%ebp
+ addl %ebx,%ecx
+ andl %eax,%ebp
+ movl 56(%esp),%ebx
+ addl %ebp,%ecx
+ # 40_59 46
+ movl %edi,%ebp
+ xorl (%esp),%ebx
+ xorl %esi,%ebp
+ xorl 24(%esp),%ebx
+ andl %edx,%ebp
+ xorl 44(%esp),%ebx
+ roll $1,%ebx
+ addl %eax,%ebp
+ rorl $2,%edx
+ movl %ecx,%eax
+ roll $5,%eax
+ movl %ebx,56(%esp)
+ leal 2400959708(%ebx,%ebp,1),%ebx
+ movl %edi,%ebp
+ addl %eax,%ebx
+ andl %esi,%ebp
+ movl 60(%esp),%eax
+ addl %ebp,%ebx
+ # 40_59 47
+ movl %edx,%ebp
+ xorl 4(%esp),%eax
+ xorl %edi,%ebp
+ xorl 28(%esp),%eax
+ andl %ecx,%ebp
+ xorl 48(%esp),%eax
+ roll $1,%eax
+ addl %esi,%ebp
+ rorl $2,%ecx
+ movl %ebx,%esi
+ roll $5,%esi
+ movl %eax,60(%esp)
+ leal 2400959708(%eax,%ebp,1),%eax
+ movl %edx,%ebp
+ addl %esi,%eax
+ andl %edi,%ebp
+ movl (%esp),%esi
+ addl %ebp,%eax
+ # 40_59 48
+ movl %ecx,%ebp
+ xorl 8(%esp),%esi
+ xorl %edx,%ebp
+ xorl 32(%esp),%esi
+ andl %ebx,%ebp
+ xorl 52(%esp),%esi
+ roll $1,%esi
+ addl %edi,%ebp
+ rorl $2,%ebx
+ movl %eax,%edi
+ roll $5,%edi
+ movl %esi,(%esp)
+ leal 2400959708(%esi,%ebp,1),%esi
+ movl %ecx,%ebp
+ addl %edi,%esi
+ andl %edx,%ebp
+ movl 4(%esp),%edi
+ addl %ebp,%esi
+ # 40_59 49
+ movl %ebx,%ebp
+ xorl 12(%esp),%edi
+ xorl %ecx,%ebp
+ xorl 36(%esp),%edi
+ andl %eax,%ebp
+ xorl 56(%esp),%edi
+ roll $1,%edi
+ addl %edx,%ebp
+ rorl $2,%eax
+ movl %esi,%edx
+ roll $5,%edx
+ movl %edi,4(%esp)
+ leal 2400959708(%edi,%ebp,1),%edi
+ movl %ebx,%ebp
+ addl %edx,%edi
+ andl %ecx,%ebp
+ movl 8(%esp),%edx
+ addl %ebp,%edi
+ # 40_59 50
+ movl %eax,%ebp
+ xorl 16(%esp),%edx
+ xorl %ebx,%ebp
+ xorl 40(%esp),%edx
+ andl %esi,%ebp
+ xorl 60(%esp),%edx
+ roll $1,%edx
+ addl %ecx,%ebp
+ rorl $2,%esi
+ movl %edi,%ecx
+ roll $5,%ecx
+ movl %edx,8(%esp)
+ leal 2400959708(%edx,%ebp,1),%edx
+ movl %eax,%ebp
+ addl %ecx,%edx
+ andl %ebx,%ebp
+ movl 12(%esp),%ecx
+ addl %ebp,%edx
+ # 40_59 51
+ movl %esi,%ebp
+ xorl 20(%esp),%ecx
+ xorl %eax,%ebp
+ xorl 44(%esp),%ecx
+ andl %edi,%ebp
+ xorl (%esp),%ecx
+ roll $1,%ecx
+ addl %ebx,%ebp
+ rorl $2,%edi
+ movl %edx,%ebx
+ roll $5,%ebx
+ movl %ecx,12(%esp)
+ leal 2400959708(%ecx,%ebp,1),%ecx
+ movl %esi,%ebp
+ addl %ebx,%ecx
+ andl %eax,%ebp
+ movl 16(%esp),%ebx
+ addl %ebp,%ecx
+ # 40_59 52
+ movl %edi,%ebp
+ xorl 24(%esp),%ebx
+ xorl %esi,%ebp
+ xorl 48(%esp),%ebx
+ andl %edx,%ebp
+ xorl 4(%esp),%ebx
+ roll $1,%ebx
+ addl %eax,%ebp
+ rorl $2,%edx
+ movl %ecx,%eax
+ roll $5,%eax
+ movl %ebx,16(%esp)
+ leal 2400959708(%ebx,%ebp,1),%ebx
+ movl %edi,%ebp
+ addl %eax,%ebx
+ andl %esi,%ebp
+ movl 20(%esp),%eax
+ addl %ebp,%ebx
+ # 40_59 53
+ movl %edx,%ebp
+ xorl 28(%esp),%eax
+ xorl %edi,%ebp
+ xorl 52(%esp),%eax
+ andl %ecx,%ebp
+ xorl 8(%esp),%eax
+ roll $1,%eax
+ addl %esi,%ebp
+ rorl $2,%ecx
+ movl %ebx,%esi
+ roll $5,%esi
+ movl %eax,20(%esp)
+ leal 2400959708(%eax,%ebp,1),%eax
+ movl %edx,%ebp
+ addl %esi,%eax
+ andl %edi,%ebp
+ movl 24(%esp),%esi
+ addl %ebp,%eax
+ # 40_59 54
+ movl %ecx,%ebp
+ xorl 32(%esp),%esi
+ xorl %edx,%ebp
+ xorl 56(%esp),%esi
+ andl %ebx,%ebp
+ xorl 12(%esp),%esi
+ roll $1,%esi
+ addl %edi,%ebp
+ rorl $2,%ebx
+ movl %eax,%edi
+ roll $5,%edi
+ movl %esi,24(%esp)
+ leal 2400959708(%esi,%ebp,1),%esi
+ movl %ecx,%ebp
+ addl %edi,%esi
+ andl %edx,%ebp
+ movl 28(%esp),%edi
+ addl %ebp,%esi
+ # 40_59 55
+ movl %ebx,%ebp
+ xorl 36(%esp),%edi
+ xorl %ecx,%ebp
+ xorl 60(%esp),%edi
+ andl %eax,%ebp
+ xorl 16(%esp),%edi
+ roll $1,%edi
+ addl %edx,%ebp
+ rorl $2,%eax
+ movl %esi,%edx
+ roll $5,%edx
+ movl %edi,28(%esp)
+ leal 2400959708(%edi,%ebp,1),%edi
+ movl %ebx,%ebp
+ addl %edx,%edi
+ andl %ecx,%ebp
+ movl 32(%esp),%edx
+ addl %ebp,%edi
+ # 40_59 56
+ movl %eax,%ebp
+ xorl 40(%esp),%edx
+ xorl %ebx,%ebp
+ xorl (%esp),%edx
+ andl %esi,%ebp
+ xorl 20(%esp),%edx
+ roll $1,%edx
+ addl %ecx,%ebp
+ rorl $2,%esi
+ movl %edi,%ecx
+ roll $5,%ecx
+ movl %edx,32(%esp)
+ leal 2400959708(%edx,%ebp,1),%edx
+ movl %eax,%ebp
+ addl %ecx,%edx
+ andl %ebx,%ebp
+ movl 36(%esp),%ecx
+ addl %ebp,%edx
+ # 40_59 57
+ movl %esi,%ebp
+ xorl 44(%esp),%ecx
+ xorl %eax,%ebp
+ xorl 4(%esp),%ecx
+ andl %edi,%ebp
+ xorl 24(%esp),%ecx
+ roll $1,%ecx
+ addl %ebx,%ebp
+ rorl $2,%edi
+ movl %edx,%ebx
+ roll $5,%ebx
+ movl %ecx,36(%esp)
+ leal 2400959708(%ecx,%ebp,1),%ecx
+ movl %esi,%ebp
+ addl %ebx,%ecx
+ andl %eax,%ebp
+ movl 40(%esp),%ebx
+ addl %ebp,%ecx
+ # 40_59 58
+ movl %edi,%ebp
+ xorl 48(%esp),%ebx
+ xorl %esi,%ebp
+ xorl 8(%esp),%ebx
+ andl %edx,%ebp
+ xorl 28(%esp),%ebx
+ roll $1,%ebx
+ addl %eax,%ebp
+ rorl $2,%edx
+ movl %ecx,%eax
+ roll $5,%eax
+ movl %ebx,40(%esp)
+ leal 2400959708(%ebx,%ebp,1),%ebx
+ movl %edi,%ebp
+ addl %eax,%ebx
+ andl %esi,%ebp
+ movl 44(%esp),%eax
+ addl %ebp,%ebx
+ # 40_59 59
+ movl %edx,%ebp
+ xorl 52(%esp),%eax
+ xorl %edi,%ebp
+ xorl 12(%esp),%eax
+ andl %ecx,%ebp
+ xorl 32(%esp),%eax
+ roll $1,%eax
+ addl %esi,%ebp
+ rorl $2,%ecx
+ movl %ebx,%esi
+ roll $5,%esi
+ movl %eax,44(%esp)
+ leal 2400959708(%eax,%ebp,1),%eax
+ movl %edx,%ebp
+ addl %esi,%eax
+ andl %edi,%ebp
+ movl 48(%esp),%esi
+ addl %ebp,%eax
+ # 20_39 60
+ movl %ebx,%ebp
+ xorl 56(%esp),%esi
+ xorl %ecx,%ebp
+ xorl 16(%esp),%esi
+ xorl %edx,%ebp
+ xorl 36(%esp),%esi
+ roll $1,%esi
+ addl %ebp,%edi
+ rorl $2,%ebx
+ movl %eax,%ebp
+ roll $5,%ebp
+ movl %esi,48(%esp)
+ leal 3395469782(%esi,%edi,1),%esi
+ movl 52(%esp),%edi
+ addl %ebp,%esi
+ # 20_39 61
+ movl %eax,%ebp
+ xorl 60(%esp),%edi
+ xorl %ebx,%ebp
+ xorl 20(%esp),%edi
+ xorl %ecx,%ebp
+ xorl 40(%esp),%edi
+ roll $1,%edi
+ addl %ebp,%edx
+ rorl $2,%eax
+ movl %esi,%ebp
+ roll $5,%ebp
+ movl %edi,52(%esp)
+ leal 3395469782(%edi,%edx,1),%edi
+ movl 56(%esp),%edx
+ addl %ebp,%edi
+ # 20_39 62
+ movl %esi,%ebp
+ xorl (%esp),%edx
+ xorl %eax,%ebp
+ xorl 24(%esp),%edx
+ xorl %ebx,%ebp
+ xorl 44(%esp),%edx
+ roll $1,%edx
+ addl %ebp,%ecx
+ rorl $2,%esi
+ movl %edi,%ebp
+ roll $5,%ebp
+ movl %edx,56(%esp)
+ leal 3395469782(%edx,%ecx,1),%edx
+ movl 60(%esp),%ecx
+ addl %ebp,%edx
+ # 20_39 63
+ movl %edi,%ebp
+ xorl 4(%esp),%ecx
+ xorl %esi,%ebp
+ xorl 28(%esp),%ecx
+ xorl %eax,%ebp
+ xorl 48(%esp),%ecx
+ roll $1,%ecx
+ addl %ebp,%ebx
+ rorl $2,%edi
+ movl %edx,%ebp
+ roll $5,%ebp
+ movl %ecx,60(%esp)
+ leal 3395469782(%ecx,%ebx,1),%ecx
+ movl (%esp),%ebx
+ addl %ebp,%ecx
+ # 20_39 64
+ movl %edx,%ebp
+ xorl 8(%esp),%ebx
+ xorl %edi,%ebp
+ xorl 32(%esp),%ebx
+ xorl %esi,%ebp
+ xorl 52(%esp),%ebx
+ roll $1,%ebx
+ addl %ebp,%eax
+ rorl $2,%edx
+ movl %ecx,%ebp
+ roll $5,%ebp
+ movl %ebx,(%esp)
+ leal 3395469782(%ebx,%eax,1),%ebx
+ movl 4(%esp),%eax
+ addl %ebp,%ebx
+ # 20_39 65
+ movl %ecx,%ebp
+ xorl 12(%esp),%eax
+ xorl %edx,%ebp
+ xorl 36(%esp),%eax
+ xorl %edi,%ebp
+ xorl 56(%esp),%eax
+ roll $1,%eax
+ addl %ebp,%esi
+ rorl $2,%ecx
+ movl %ebx,%ebp
+ roll $5,%ebp
+ movl %eax,4(%esp)
+ leal 3395469782(%eax,%esi,1),%eax
+ movl 8(%esp),%esi
+ addl %ebp,%eax
+ # 20_39 66
+ movl %ebx,%ebp
+ xorl 16(%esp),%esi
+ xorl %ecx,%ebp
+ xorl 40(%esp),%esi
+ xorl %edx,%ebp
+ xorl 60(%esp),%esi
+ roll $1,%esi
+ addl %ebp,%edi
+ rorl $2,%ebx
+ movl %eax,%ebp
+ roll $5,%ebp
+ movl %esi,8(%esp)
+ leal 3395469782(%esi,%edi,1),%esi
+ movl 12(%esp),%edi
+ addl %ebp,%esi
+ # 20_39 67
+ movl %eax,%ebp
+ xorl 20(%esp),%edi
+ xorl %ebx,%ebp
+ xorl 44(%esp),%edi
+ xorl %ecx,%ebp
+ xorl (%esp),%edi
+ roll $1,%edi
+ addl %ebp,%edx
+ rorl $2,%eax
+ movl %esi,%ebp
+ roll $5,%ebp
+ movl %edi,12(%esp)
+ leal 3395469782(%edi,%edx,1),%edi
+ movl 16(%esp),%edx
+ addl %ebp,%edi
+ # 20_39 68
+ movl %esi,%ebp
+ xorl 24(%esp),%edx
+ xorl %eax,%ebp
+ xorl 48(%esp),%edx
+ xorl %ebx,%ebp
+ xorl 4(%esp),%edx
+ roll $1,%edx
+ addl %ebp,%ecx
+ rorl $2,%esi
+ movl %edi,%ebp
+ roll $5,%ebp
+ movl %edx,16(%esp)
+ leal 3395469782(%edx,%ecx,1),%edx
+ movl 20(%esp),%ecx
+ addl %ebp,%edx
+ # 20_39 69
+ movl %edi,%ebp
+ xorl 28(%esp),%ecx
+ xorl %esi,%ebp
+ xorl 52(%esp),%ecx
+ xorl %eax,%ebp
+ xorl 8(%esp),%ecx
+ roll $1,%ecx
+ addl %ebp,%ebx
+ rorl $2,%edi
+ movl %edx,%ebp
+ roll $5,%ebp
+ movl %ecx,20(%esp)
+ leal 3395469782(%ecx,%ebx,1),%ecx
+ movl 24(%esp),%ebx
+ addl %ebp,%ecx
+ # 20_39 70
+ movl %edx,%ebp
+ xorl 32(%esp),%ebx
+ xorl %edi,%ebp
+ xorl 56(%esp),%ebx
+ xorl %esi,%ebp
+ xorl 12(%esp),%ebx
+ roll $1,%ebx
+ addl %ebp,%eax
+ rorl $2,%edx
+ movl %ecx,%ebp
+ roll $5,%ebp
+ movl %ebx,24(%esp)
+ leal 3395469782(%ebx,%eax,1),%ebx
+ movl 28(%esp),%eax
+ addl %ebp,%ebx
+ # 20_39 71
+ movl %ecx,%ebp
+ xorl 36(%esp),%eax
+ xorl %edx,%ebp
+ xorl 60(%esp),%eax
+ xorl %edi,%ebp
+ xorl 16(%esp),%eax
+ roll $1,%eax
+ addl %ebp,%esi
+ rorl $2,%ecx
+ movl %ebx,%ebp
+ roll $5,%ebp
+ movl %eax,28(%esp)
+ leal 3395469782(%eax,%esi,1),%eax
+ movl 32(%esp),%esi
+ addl %ebp,%eax
+ # 20_39 72
+ movl %ebx,%ebp
+ xorl 40(%esp),%esi
+ xorl %ecx,%ebp
+ xorl (%esp),%esi
+ xorl %edx,%ebp
+ xorl 20(%esp),%esi
+ roll $1,%esi
+ addl %ebp,%edi
+ rorl $2,%ebx
+ movl %eax,%ebp
+ roll $5,%ebp
+ movl %esi,32(%esp)
+ leal 3395469782(%esi,%edi,1),%esi
+ movl 36(%esp),%edi
+ addl %ebp,%esi
+ # 20_39 73
+ movl %eax,%ebp
+ xorl 44(%esp),%edi
+ xorl %ebx,%ebp
+ xorl 4(%esp),%edi
+ xorl %ecx,%ebp
+ xorl 24(%esp),%edi
+ roll $1,%edi
+ addl %ebp,%edx
+ rorl $2,%eax
+ movl %esi,%ebp
+ roll $5,%ebp
+ movl %edi,36(%esp)
+ leal 3395469782(%edi,%edx,1),%edi
+ movl 40(%esp),%edx
+ addl %ebp,%edi
+ # 20_39 74
+ movl %esi,%ebp
+ xorl 48(%esp),%edx
+ xorl %eax,%ebp
+ xorl 8(%esp),%edx
+ xorl %ebx,%ebp
+ xorl 28(%esp),%edx
+ roll $1,%edx
+ addl %ebp,%ecx
+ rorl $2,%esi
+ movl %edi,%ebp
+ roll $5,%ebp
+ movl %edx,40(%esp)
+ leal 3395469782(%edx,%ecx,1),%edx
+ movl 44(%esp),%ecx
+ addl %ebp,%edx
+ # 20_39 75
+ movl %edi,%ebp
+ xorl 52(%esp),%ecx
+ xorl %esi,%ebp
+ xorl 12(%esp),%ecx
+ xorl %eax,%ebp
+ xorl 32(%esp),%ecx
+ roll $1,%ecx
+ addl %ebp,%ebx
+ rorl $2,%edi
+ movl %edx,%ebp
+ roll $5,%ebp
+ movl %ecx,44(%esp)
+ leal 3395469782(%ecx,%ebx,1),%ecx
+ movl 48(%esp),%ebx
+ addl %ebp,%ecx
+ # 20_39 76
+ movl %edx,%ebp
+ xorl 56(%esp),%ebx
+ xorl %edi,%ebp
+ xorl 16(%esp),%ebx
+ xorl %esi,%ebp
+ xorl 36(%esp),%ebx
+ roll $1,%ebx
+ addl %ebp,%eax
+ rorl $2,%edx
+ movl %ecx,%ebp
+ roll $5,%ebp
+ movl %ebx,48(%esp)
+ leal 3395469782(%ebx,%eax,1),%ebx
+ movl 52(%esp),%eax
+ addl %ebp,%ebx
+ # 20_39 77
+ movl %ecx,%ebp
+ xorl 60(%esp),%eax
+ xorl %edx,%ebp
+ xorl 20(%esp),%eax
+ xorl %edi,%ebp
+ xorl 40(%esp),%eax
+ roll $1,%eax
+ addl %ebp,%esi
+ rorl $2,%ecx
+ movl %ebx,%ebp
+ roll $5,%ebp
+ leal 3395469782(%eax,%esi,1),%eax
+ movl 56(%esp),%esi
+ addl %ebp,%eax
+ # 20_39 78
+ movl %ebx,%ebp
+ xorl (%esp),%esi
+ xorl %ecx,%ebp
+ xorl 24(%esp),%esi
+ xorl %edx,%ebp
+ xorl 44(%esp),%esi
+ roll $1,%esi
+ addl %ebp,%edi
+ rorl $2,%ebx
+ movl %eax,%ebp
+ roll $5,%ebp
+ leal 3395469782(%esi,%edi,1),%esi
+ movl 60(%esp),%edi
+ addl %ebp,%esi
+ # 20_39 79
+ movl %eax,%ebp
+ xorl 4(%esp),%edi
+ xorl %ebx,%ebp
+ xorl 28(%esp),%edi
+ xorl %ecx,%ebp
+ xorl 48(%esp),%edi
+ roll $1,%edi
+ addl %ebp,%edx
+ rorl $2,%eax
+ movl %esi,%ebp
+ roll $5,%ebp
+ leal 3395469782(%edi,%edx,1),%edi
+ addl %ebp,%edi
+ movl 96(%esp),%ebp
+ movl 100(%esp),%edx
+ addl (%ebp),%edi
+ addl 4(%ebp),%esi
+ addl 8(%ebp),%eax
+ addl 12(%ebp),%ebx
+ addl 16(%ebp),%ecx
+ movl %edi,(%ebp)
+ addl $64,%edx
+ movl %esi,4(%ebp)
+ cmpl 104(%esp),%edx
+ movl %eax,8(%ebp)
+ movl %ecx,%edi
+ movl %ebx,12(%ebp)
+ movl %edx,%esi
+ movl %ecx,16(%ebp)
+ jb .L000loop
+ addl $76,%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115
+.byte 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82
+.byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
+.byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/coff/sha1-ssse3-x86_64.s b/lib/accelerated/x86/coff/sha1-ssse3-x86_64.s
new file mode 100644
index 0000000000..75868a42c6
--- /dev/null
+++ b/lib/accelerated/x86/coff/sha1-ssse3-x86_64.s
@@ -0,0 +1,2693 @@
+# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain copyright notices,
+# this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+# * Neither the name of the Andy Polyakov nor the names of its
+# copyright holder and contributors may be used to endorse or
+# promote products derived from this software without specific
+# prior written permission.
+#
+# ALTERNATIVELY, provided that this notice is retained in full, this
+# product may be distributed under the terms of the GNU General Public
+# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+# those given above.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *** This file is auto-generated ***
+#
+.text
+
+
+.globl sha1_block_data_order
+.def sha1_block_data_order; .scl 2; .type 32; .endef
+.p2align 4
+sha1_block_data_order:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_sha1_block_data_order:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+
+ movl _gnutls_x86_cpuid_s+0(%rip),%r9d
+ movl _gnutls_x86_cpuid_s+4(%rip),%r8d
+ movl _gnutls_x86_cpuid_s+8(%rip),%r10d
+ testl $512,%r8d
+ jz .Lialu
+ jmp _ssse3_shortcut
+
+.p2align 4
+.Lialu:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ movq %rsp,%r11
+ movq %rdi,%r8
+ subq $72,%rsp
+ movq %rsi,%r9
+ andq $-64,%rsp
+ movq %rdx,%r10
+ movq %r11,64(%rsp)
+.Lprologue:
+
+ movl 0(%r8),%esi
+ movl 4(%r8),%edi
+ movl 8(%r8),%r11d
+ movl 12(%r8),%r12d
+ movl 16(%r8),%r13d
+ jmp .Lloop
+
+.p2align 4
+.Lloop:
+ movl 0(%r9),%edx
+ bswapl %edx
+ movl %edx,0(%rsp)
+ movl %r11d,%eax
+ movl 4(%r9),%ebp
+ movl %esi,%ecx
+ xorl %r12d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r13,1),%r13d
+ andl %edi,%eax
+ movl %ebp,4(%rsp)
+ addl %ecx,%r13d
+ xorl %r12d,%eax
+ roll $30,%edi
+ addl %eax,%r13d
+ movl %edi,%eax
+ movl 8(%r9),%edx
+ movl %r13d,%ecx
+ xorl %r11d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r12,1),%r12d
+ andl %esi,%eax
+ movl %edx,8(%rsp)
+ addl %ecx,%r12d
+ xorl %r11d,%eax
+ roll $30,%esi
+ addl %eax,%r12d
+ movl %esi,%eax
+ movl 12(%r9),%ebp
+ movl %r12d,%ecx
+ xorl %edi,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r11,1),%r11d
+ andl %r13d,%eax
+ movl %ebp,12(%rsp)
+ addl %ecx,%r11d
+ xorl %edi,%eax
+ roll $30,%r13d
+ addl %eax,%r11d
+ movl %r13d,%eax
+ movl 16(%r9),%edx
+ movl %r11d,%ecx
+ xorl %esi,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%rdi,1),%edi
+ andl %r12d,%eax
+ movl %edx,16(%rsp)
+ addl %ecx,%edi
+ xorl %esi,%eax
+ roll $30,%r12d
+ addl %eax,%edi
+ movl %r12d,%eax
+ movl 20(%r9),%ebp
+ movl %edi,%ecx
+ xorl %r13d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%rsi,1),%esi
+ andl %r11d,%eax
+ movl %ebp,20(%rsp)
+ addl %ecx,%esi
+ xorl %r13d,%eax
+ roll $30,%r11d
+ addl %eax,%esi
+ movl %r11d,%eax
+ movl 24(%r9),%edx
+ movl %esi,%ecx
+ xorl %r12d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r13,1),%r13d
+ andl %edi,%eax
+ movl %edx,24(%rsp)
+ addl %ecx,%r13d
+ xorl %r12d,%eax
+ roll $30,%edi
+ addl %eax,%r13d
+ movl %edi,%eax
+ movl 28(%r9),%ebp
+ movl %r13d,%ecx
+ xorl %r11d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r12,1),%r12d
+ andl %esi,%eax
+ movl %ebp,28(%rsp)
+ addl %ecx,%r12d
+ xorl %r11d,%eax
+ roll $30,%esi
+ addl %eax,%r12d
+ movl %esi,%eax
+ movl 32(%r9),%edx
+ movl %r12d,%ecx
+ xorl %edi,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r11,1),%r11d
+ andl %r13d,%eax
+ movl %edx,32(%rsp)
+ addl %ecx,%r11d
+ xorl %edi,%eax
+ roll $30,%r13d
+ addl %eax,%r11d
+ movl %r13d,%eax
+ movl 36(%r9),%ebp
+ movl %r11d,%ecx
+ xorl %esi,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%rdi,1),%edi
+ andl %r12d,%eax
+ movl %ebp,36(%rsp)
+ addl %ecx,%edi
+ xorl %esi,%eax
+ roll $30,%r12d
+ addl %eax,%edi
+ movl %r12d,%eax
+ movl 40(%r9),%edx
+ movl %edi,%ecx
+ xorl %r13d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%rsi,1),%esi
+ andl %r11d,%eax
+ movl %edx,40(%rsp)
+ addl %ecx,%esi
+ xorl %r13d,%eax
+ roll $30,%r11d
+ addl %eax,%esi
+ movl %r11d,%eax
+ movl 44(%r9),%ebp
+ movl %esi,%ecx
+ xorl %r12d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r13,1),%r13d
+ andl %edi,%eax
+ movl %ebp,44(%rsp)
+ addl %ecx,%r13d
+ xorl %r12d,%eax
+ roll $30,%edi
+ addl %eax,%r13d
+ movl %edi,%eax
+ movl 48(%r9),%edx
+ movl %r13d,%ecx
+ xorl %r11d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r12,1),%r12d
+ andl %esi,%eax
+ movl %edx,48(%rsp)
+ addl %ecx,%r12d
+ xorl %r11d,%eax
+ roll $30,%esi
+ addl %eax,%r12d
+ movl %esi,%eax
+ movl 52(%r9),%ebp
+ movl %r12d,%ecx
+ xorl %edi,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r11,1),%r11d
+ andl %r13d,%eax
+ movl %ebp,52(%rsp)
+ addl %ecx,%r11d
+ xorl %edi,%eax
+ roll $30,%r13d
+ addl %eax,%r11d
+ movl %r13d,%eax
+ movl 56(%r9),%edx
+ movl %r11d,%ecx
+ xorl %esi,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%rdi,1),%edi
+ andl %r12d,%eax
+ movl %edx,56(%rsp)
+ addl %ecx,%edi
+ xorl %esi,%eax
+ roll $30,%r12d
+ addl %eax,%edi
+ movl %r12d,%eax
+ movl 60(%r9),%ebp
+ movl %edi,%ecx
+ xorl %r13d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%rsi,1),%esi
+ andl %r11d,%eax
+ movl %ebp,60(%rsp)
+ addl %ecx,%esi
+ xorl %r13d,%eax
+ roll $30,%r11d
+ addl %eax,%esi
+ movl 0(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 8(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ xorl 32(%rsp),%edx
+ andl %edi,%eax
+ leal 1518500249(%rbp,%r13,1),%r13d
+ xorl 52(%rsp),%edx
+ xorl %r12d,%eax
+ roll $1,%edx
+ addl %ecx,%r13d
+ roll $30,%edi
+ movl %edx,0(%rsp)
+ addl %eax,%r13d
+ movl 4(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ xorl 36(%rsp),%ebp
+ andl %esi,%eax
+ leal 1518500249(%rdx,%r12,1),%r12d
+ xorl 56(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $1,%ebp
+ addl %ecx,%r12d
+ roll $30,%esi
+ movl %ebp,4(%rsp)
+ addl %eax,%r12d
+ movl 8(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 16(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ xorl 40(%rsp),%edx
+ andl %r13d,%eax
+ leal 1518500249(%rbp,%r11,1),%r11d
+ xorl 60(%rsp),%edx
+ xorl %edi,%eax
+ roll $1,%edx
+ addl %ecx,%r11d
+ roll $30,%r13d
+ movl %edx,8(%rsp)
+ addl %eax,%r11d
+ movl 12(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ xorl 44(%rsp),%ebp
+ andl %r12d,%eax
+ leal 1518500249(%rdx,%rdi,1),%edi
+ xorl 0(%rsp),%ebp
+ xorl %esi,%eax
+ roll $1,%ebp
+ addl %ecx,%edi
+ roll $30,%r12d
+ movl %ebp,12(%rsp)
+ addl %eax,%edi
+ movl 16(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 24(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ xorl 48(%rsp),%edx
+ andl %r11d,%eax
+ leal 1518500249(%rbp,%rsi,1),%esi
+ xorl 4(%rsp),%edx
+ xorl %r13d,%eax
+ roll $1,%edx
+ addl %ecx,%esi
+ roll $30,%r11d
+ movl %edx,16(%rsp)
+ addl %eax,%esi
+ movl 20(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r13,1),%r13d
+ xorl 52(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 8(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,20(%rsp)
+ movl 24(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 32(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r12,1),%r12d
+ xorl 56(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 12(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,24(%rsp)
+ movl 28(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r11,1),%r11d
+ xorl 60(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 16(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,28(%rsp)
+ movl 32(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 40(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rdi,1),%edi
+ xorl 0(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 20(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,32(%rsp)
+ movl 36(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 44(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rsi,1),%esi
+ xorl 4(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 24(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,36(%rsp)
+ movl 40(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 48(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r13,1),%r13d
+ xorl 8(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 28(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,40(%rsp)
+ movl 44(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 52(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r12,1),%r12d
+ xorl 12(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 32(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl %ebp,44(%rsp)
+ movl 48(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 56(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r11,1),%r11d
+ xorl 16(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 36(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl %edx,48(%rsp)
+ movl 52(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rdi,1),%edi
+ xorl 20(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 40(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %ebp,52(%rsp)
+ movl 56(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 0(%rsp),%edx
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rsi,1),%esi
+ xorl 24(%rsp),%edx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 44(%rsp),%edx
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%edx
+ movl %edx,56(%rsp)
+ movl 60(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r13,1),%r13d
+ xorl 28(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 48(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,60(%rsp)
+ movl 0(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r12,1),%r12d
+ xorl 32(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 52(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,0(%rsp)
+ movl 4(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r11,1),%r11d
+ xorl 36(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 56(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,4(%rsp)
+ movl 8(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 16(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rdi,1),%edi
+ xorl 40(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 60(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,8(%rsp)
+ movl 12(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rsi,1),%esi
+ xorl 44(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 0(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,12(%rsp)
+ movl 16(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 24(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r13,1),%r13d
+ xorl 48(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 4(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,16(%rsp)
+ movl 20(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r12,1),%r12d
+ xorl 52(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 8(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl %ebp,20(%rsp)
+ movl 24(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 32(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r11,1),%r11d
+ xorl 56(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 12(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl %edx,24(%rsp)
+ movl 28(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rdi,1),%edi
+ xorl 60(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 16(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %ebp,28(%rsp)
+ movl 32(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 40(%rsp),%edx
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rsi,1),%esi
+ xorl 0(%rsp),%edx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 20(%rsp),%edx
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%edx
+ movl %edx,32(%rsp)
+ movl 36(%rsp),%ebp
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 44(%rsp),%ebp
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r12d,%ebx
+ leal -1894007588(%rdx,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 24(%rsp),%ebp
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %ebp,36(%rsp)
+ addl %ecx,%r13d
+ movl 40(%rsp),%edx
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 48(%rsp),%edx
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %r11d,%ebx
+ leal -1894007588(%rbp,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 28(%rsp),%edx
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%edx
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %edx,40(%rsp)
+ addl %ecx,%r12d
+ movl 44(%rsp),%ebp
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 52(%rsp),%ebp
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %edi,%ebx
+ leal -1894007588(%rdx,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 32(%rsp),%ebp
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%ebp
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %ebp,44(%rsp)
+ addl %ecx,%r11d
+ movl 48(%rsp),%edx
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 56(%rsp),%edx
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 16(%rsp),%edx
+ xorl %esi,%ebx
+ leal -1894007588(%rbp,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 36(%rsp),%edx
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%edx
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %edx,48(%rsp)
+ addl %ecx,%edi
+ movl 52(%rsp),%ebp
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 60(%rsp),%ebp
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %r13d,%ebx
+ leal -1894007588(%rdx,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 40(%rsp),%ebp
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%ebp
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %ebp,52(%rsp)
+ addl %ecx,%esi
+ movl 56(%rsp),%edx
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 0(%rsp),%edx
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 24(%rsp),%edx
+ xorl %r12d,%ebx
+ leal -1894007588(%rbp,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 44(%rsp),%edx
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%edx
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %edx,56(%rsp)
+ addl %ecx,%r13d
+ movl 60(%rsp),%ebp
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 4(%rsp),%ebp
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %r11d,%ebx
+ leal -1894007588(%rdx,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 48(%rsp),%ebp
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %ebp,60(%rsp)
+ addl %ecx,%r12d
+ movl 0(%rsp),%edx
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 8(%rsp),%edx
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 32(%rsp),%edx
+ xorl %edi,%ebx
+ leal -1894007588(%rbp,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 52(%rsp),%edx
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%edx
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %edx,0(%rsp)
+ addl %ecx,%r11d
+ movl 4(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 12(%rsp),%ebp
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %esi,%ebx
+ leal -1894007588(%rdx,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 56(%rsp),%ebp
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%ebp
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %ebp,4(%rsp)
+ addl %ecx,%edi
+ movl 8(%rsp),%edx
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 16(%rsp),%edx
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 40(%rsp),%edx
+ xorl %r13d,%ebx
+ leal -1894007588(%rbp,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 60(%rsp),%edx
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%edx
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %edx,8(%rsp)
+ addl %ecx,%esi
+ movl 12(%rsp),%ebp
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 20(%rsp),%ebp
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 44(%rsp),%ebp
+ xorl %r12d,%ebx
+ leal -1894007588(%rdx,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 0(%rsp),%ebp
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %ebp,12(%rsp)
+ addl %ecx,%r13d
+ movl 16(%rsp),%edx
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 24(%rsp),%edx
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 48(%rsp),%edx
+ xorl %r11d,%ebx
+ leal -1894007588(%rbp,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 4(%rsp),%edx
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%edx
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %edx,16(%rsp)
+ addl %ecx,%r12d
+ movl 20(%rsp),%ebp
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 28(%rsp),%ebp
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 52(%rsp),%ebp
+ xorl %edi,%ebx
+ leal -1894007588(%rdx,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 8(%rsp),%ebp
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%ebp
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %ebp,20(%rsp)
+ addl %ecx,%r11d
+ movl 24(%rsp),%edx
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 32(%rsp),%edx
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 56(%rsp),%edx
+ xorl %esi,%ebx
+ leal -1894007588(%rbp,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 12(%rsp),%edx
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%edx
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %edx,24(%rsp)
+ addl %ecx,%edi
+ movl 28(%rsp),%ebp
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 36(%rsp),%ebp
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %r13d,%ebx
+ leal -1894007588(%rdx,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 16(%rsp),%ebp
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%ebp
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %ebp,28(%rsp)
+ addl %ecx,%esi
+ movl 32(%rsp),%edx
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 40(%rsp),%edx
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 0(%rsp),%edx
+ xorl %r12d,%ebx
+ leal -1894007588(%rbp,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 20(%rsp),%edx
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%edx
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %edx,32(%rsp)
+ addl %ecx,%r13d
+ movl 36(%rsp),%ebp
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 44(%rsp),%ebp
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r11d,%ebx
+ leal -1894007588(%rdx,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 24(%rsp),%ebp
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %ebp,36(%rsp)
+ addl %ecx,%r12d
+ movl 40(%rsp),%edx
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 48(%rsp),%edx
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %edi,%ebx
+ leal -1894007588(%rbp,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 28(%rsp),%edx
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%edx
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %edx,40(%rsp)
+ addl %ecx,%r11d
+ movl 44(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 52(%rsp),%ebp
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %esi,%ebx
+ leal -1894007588(%rdx,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 32(%rsp),%ebp
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%ebp
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %ebp,44(%rsp)
+ addl %ecx,%edi
+ movl 48(%rsp),%edx
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 56(%rsp),%edx
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 16(%rsp),%edx
+ xorl %r13d,%ebx
+ leal -1894007588(%rbp,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 36(%rsp),%edx
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%edx
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %edx,48(%rsp)
+ addl %ecx,%esi
+ movl 52(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r13,1),%r13d
+ xorl 20(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 40(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,52(%rsp)
+ movl 56(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 0(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r12,1),%r12d
+ xorl 24(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 44(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,56(%rsp)
+ movl 60(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r11,1),%r11d
+ xorl 28(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 48(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,60(%rsp)
+ movl 0(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%rdi,1),%edi
+ xorl 32(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 52(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,0(%rsp)
+ movl 4(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rsi,1),%esi
+ xorl 36(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 56(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,4(%rsp)
+ movl 8(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 16(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r13,1),%r13d
+ xorl 40(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 60(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,8(%rsp)
+ movl 12(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r12,1),%r12d
+ xorl 44(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 0(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl %ebp,12(%rsp)
+ movl 16(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 24(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r11,1),%r11d
+ xorl 48(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 4(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl %edx,16(%rsp)
+ movl 20(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rdi,1),%edi
+ xorl 52(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 8(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %ebp,20(%rsp)
+ movl 24(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 32(%rsp),%edx
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%rsi,1),%esi
+ xorl 56(%rsp),%edx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 12(%rsp),%edx
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%edx
+ movl %edx,24(%rsp)
+ movl 28(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r13,1),%r13d
+ xorl 60(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 16(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,28(%rsp)
+ movl 32(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 40(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r12,1),%r12d
+ xorl 0(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 20(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,32(%rsp)
+ movl 36(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 44(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r11,1),%r11d
+ xorl 4(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 24(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,36(%rsp)
+ movl 40(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 48(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%rdi,1),%edi
+ xorl 8(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 28(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,40(%rsp)
+ movl 44(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 52(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rsi,1),%esi
+ xorl 12(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 32(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,44(%rsp)
+ movl 48(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 56(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r13,1),%r13d
+ xorl 16(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 36(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,48(%rsp)
+ movl 52(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r12,1),%r12d
+ xorl 20(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 40(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl 56(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 0(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r11,1),%r11d
+ xorl 24(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 44(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl 60(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rdi,1),%edi
+ xorl 28(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 48(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl %r11d,%eax
+ leal -899497514(%rbp,%rsi,1),%esi
+ roll $5,%ecx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ roll $30,%r11d
+ addl %eax,%esi
+ addl 0(%r8),%esi
+ addl 4(%r8),%edi
+ addl 8(%r8),%r11d
+ addl 12(%r8),%r12d
+ addl 16(%r8),%r13d
+ movl %esi,0(%r8)
+ movl %edi,4(%r8)
+ movl %r11d,8(%r8)
+ movl %r12d,12(%r8)
+ movl %r13d,16(%r8)
+
+ subq $1,%r10
+ leaq 64(%r9),%r9
+ jnz .Lloop
+
+ movq 64(%rsp),%rsi
+ movq (%rsi),%r13
+ movq 8(%rsi),%r12
+ movq 16(%rsi),%rbp
+ movq 24(%rsi),%rbx
+ leaq 32(%rsi),%rsp
+.Lepilogue:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ .byte 0xf3,0xc3
+.LSEH_end_sha1_block_data_order:
+.def sha1_block_data_order_ssse3; .scl 3; .type 32; .endef
+.p2align 4
+sha1_block_data_order_ssse3:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_sha1_block_data_order_ssse3:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+
+_ssse3_shortcut:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ leaq -160(%rsp),%rsp
+ movaps %xmm6,64+0(%rsp)
+ movaps %xmm7,64+16(%rsp)
+ movaps %xmm8,64+32(%rsp)
+ movaps %xmm9,64+48(%rsp)
+ movaps %xmm10,64+64(%rsp)
+ movaps %xmm11,64+80(%rsp)
+.Lprologue_ssse3:
+ movq %rdi,%r8
+ movq %rsi,%r9
+ movq %rdx,%r10
+
+ shlq $6,%r10
+ addq %r9,%r10
+ leaq K_XX_XX+64(%rip),%r11
+
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movl %ebx,%esi
+ movl 16(%r8),%ebp
+ movl %ecx,%edi
+ xorl %edx,%edi
+ andl %edi,%esi
+
+ movdqa 64(%r11),%xmm6
+ movdqa -64(%r11),%xmm9
+ movdqu 0(%r9),%xmm0
+ movdqu 16(%r9),%xmm1
+ movdqu 32(%r9),%xmm2
+ movdqu 48(%r9),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r9
+.byte 102,15,56,0,206
+.byte 102,15,56,0,214
+.byte 102,15,56,0,222
+ paddd %xmm9,%xmm0
+ paddd %xmm9,%xmm1
+ paddd %xmm9,%xmm2
+ movdqa %xmm0,0(%rsp)
+ psubd %xmm9,%xmm0
+ movdqa %xmm1,16(%rsp)
+ psubd %xmm9,%xmm1
+ movdqa %xmm2,32(%rsp)
+ psubd %xmm9,%xmm2
+ jmp .Loop_ssse3
+.p2align 4
+.Loop_ssse3:
+ movdqa %xmm1,%xmm4
+ rorl $2,%ebx
+ xorl %edx,%esi
+ movdqa %xmm3,%xmm8
+.byte 102,15,58,15,224,8
+ movl %eax,%edi
+ addl 0(%rsp),%ebp
+ paddd %xmm3,%xmm9
+ xorl %ecx,%ebx
+ roll $5,%eax
+ psrldq $4,%xmm8
+ addl %esi,%ebp
+ andl %ebx,%edi
+ pxor %xmm0,%xmm4
+ xorl %ecx,%ebx
+ addl %eax,%ebp
+ pxor %xmm2,%xmm8
+ rorl $7,%eax
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ addl 4(%rsp),%edx
+ pxor %xmm8,%xmm4
+ xorl %ebx,%eax
+ roll $5,%ebp
+ movdqa %xmm9,48(%rsp)
+ addl %edi,%edx
+ andl %eax,%esi
+ movdqa %xmm4,%xmm10
+ movdqa %xmm4,%xmm8
+ xorl %ebx,%eax
+ addl %ebp,%edx
+ rorl $7,%ebp
+ xorl %ebx,%esi
+ pslldq $12,%xmm10
+ paddd %xmm4,%xmm4
+ movl %edx,%edi
+ addl 8(%rsp),%ecx
+ xorl %eax,%ebp
+ roll $5,%edx
+ psrld $31,%xmm8
+ addl %esi,%ecx
+ andl %ebp,%edi
+ movdqa %xmm10,%xmm9
+ xorl %eax,%ebp
+ addl %edx,%ecx
+ psrld $30,%xmm10
+ por %xmm8,%xmm4
+ rorl $7,%edx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ addl 12(%rsp),%ebx
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm4
+ xorl %ebp,%edx
+ roll $5,%ecx
+ movdqa -64(%r11),%xmm10
+ addl %edi,%ebx
+ andl %edx,%esi
+ pxor %xmm9,%xmm4
+ xorl %ebp,%edx
+ addl %ecx,%ebx
+ movdqa %xmm2,%xmm5
+ rorl $7,%ecx
+ xorl %ebp,%esi
+ movdqa %xmm4,%xmm9
+.byte 102,15,58,15,233,8
+ movl %ebx,%edi
+ addl 16(%rsp),%eax
+ paddd %xmm4,%xmm10
+ xorl %edx,%ecx
+ roll $5,%ebx
+ psrldq $4,%xmm9
+ addl %esi,%eax
+ andl %ecx,%edi
+ pxor %xmm1,%xmm5
+ xorl %edx,%ecx
+ addl %ebx,%eax
+ pxor %xmm3,%xmm9
+ rorl $7,%ebx
+ xorl %edx,%edi
+ movl %eax,%esi
+ addl 20(%rsp),%ebp
+ pxor %xmm9,%xmm5
+ xorl %ecx,%ebx
+ roll $5,%eax
+ movdqa %xmm10,0(%rsp)
+ addl %edi,%ebp
+ andl %ebx,%esi
+ movdqa %xmm5,%xmm8
+ movdqa %xmm5,%xmm9
+ xorl %ecx,%ebx
+ addl %eax,%ebp
+ rorl $7,%eax
+ xorl %ecx,%esi
+ pslldq $12,%xmm8
+ paddd %xmm5,%xmm5
+ movl %ebp,%edi
+ addl 24(%rsp),%edx
+ xorl %ebx,%eax
+ roll $5,%ebp
+ psrld $31,%xmm9
+ addl %esi,%edx
+ andl %eax,%edi
+ movdqa %xmm8,%xmm10
+ xorl %ebx,%eax
+ addl %ebp,%edx
+ psrld $30,%xmm8
+ por %xmm9,%xmm5
+ rorl $7,%ebp
+ xorl %ebx,%edi
+ movl %edx,%esi
+ addl 28(%rsp),%ecx
+ pslld $2,%xmm10
+ pxor %xmm8,%xmm5
+ xorl %eax,%ebp
+ roll $5,%edx
+ movdqa -32(%r11),%xmm8
+ addl %edi,%ecx
+ andl %ebp,%esi
+ pxor %xmm10,%xmm5
+ xorl %eax,%ebp
+ addl %edx,%ecx
+ movdqa %xmm3,%xmm6
+ rorl $7,%edx
+ xorl %eax,%esi
+ movdqa %xmm5,%xmm10
+.byte 102,15,58,15,242,8
+ movl %ecx,%edi
+ addl 32(%rsp),%ebx
+ paddd %xmm5,%xmm8
+ xorl %ebp,%edx
+ roll $5,%ecx
+ psrldq $4,%xmm10
+ addl %esi,%ebx
+ andl %edx,%edi
+ pxor %xmm2,%xmm6
+ xorl %ebp,%edx
+ addl %ecx,%ebx
+ pxor %xmm4,%xmm10
+ rorl $7,%ecx
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ addl 36(%rsp),%eax
+ pxor %xmm10,%xmm6
+ xorl %edx,%ecx
+ roll $5,%ebx
+ movdqa %xmm8,16(%rsp)
+ addl %edi,%eax
+ andl %ecx,%esi
+ movdqa %xmm6,%xmm9
+ movdqa %xmm6,%xmm10
+ xorl %edx,%ecx
+ addl %ebx,%eax
+ rorl $7,%ebx
+ xorl %edx,%esi
+ pslldq $12,%xmm9
+ paddd %xmm6,%xmm6
+ movl %eax,%edi
+ addl 40(%rsp),%ebp
+ xorl %ecx,%ebx
+ roll $5,%eax
+ psrld $31,%xmm10
+ addl %esi,%ebp
+ andl %ebx,%edi
+ movdqa %xmm9,%xmm8
+ xorl %ecx,%ebx
+ addl %eax,%ebp
+ psrld $30,%xmm9
+ por %xmm10,%xmm6
+ rorl $7,%eax
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ addl 44(%rsp),%edx
+ pslld $2,%xmm8
+ pxor %xmm9,%xmm6
+ xorl %ebx,%eax
+ roll $5,%ebp
+ movdqa -32(%r11),%xmm9
+ addl %edi,%edx
+ andl %eax,%esi
+ pxor %xmm8,%xmm6
+ xorl %ebx,%eax
+ addl %ebp,%edx
+ movdqa %xmm4,%xmm7
+ rorl $7,%ebp
+ xorl %ebx,%esi
+ movdqa %xmm6,%xmm8
+.byte 102,15,58,15,251,8
+ movl %edx,%edi
+ addl 48(%rsp),%ecx
+ paddd %xmm6,%xmm9
+ xorl %eax,%ebp
+ roll $5,%edx
+ psrldq $4,%xmm8
+ addl %esi,%ecx
+ andl %ebp,%edi
+ pxor %xmm3,%xmm7
+ xorl %eax,%ebp
+ addl %edx,%ecx
+ pxor %xmm5,%xmm8
+ rorl $7,%edx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ addl 52(%rsp),%ebx
+ pxor %xmm8,%xmm7
+ xorl %ebp,%edx
+ roll $5,%ecx
+ movdqa %xmm9,32(%rsp)
+ addl %edi,%ebx
+ andl %edx,%esi
+ movdqa %xmm7,%xmm10
+ movdqa %xmm7,%xmm8
+ xorl %ebp,%edx
+ addl %ecx,%ebx
+ rorl $7,%ecx
+ xorl %ebp,%esi
+ pslldq $12,%xmm10
+ paddd %xmm7,%xmm7
+ movl %ebx,%edi
+ addl 56(%rsp),%eax
+ xorl %edx,%ecx
+ roll $5,%ebx
+ psrld $31,%xmm8
+ addl %esi,%eax
+ andl %ecx,%edi
+ movdqa %xmm10,%xmm9
+ xorl %edx,%ecx
+ addl %ebx,%eax
+ psrld $30,%xmm10
+ por %xmm8,%xmm7
+ rorl $7,%ebx
+ xorl %edx,%edi
+ movl %eax,%esi
+ addl 60(%rsp),%ebp
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm7
+ xorl %ecx,%ebx
+ roll $5,%eax
+ movdqa -32(%r11),%xmm10
+ addl %edi,%ebp
+ andl %ebx,%esi
+ pxor %xmm9,%xmm7
+ xorl %ecx,%ebx
+ addl %eax,%ebp
+ movdqa %xmm7,%xmm9
+ rorl $7,%eax
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,206,8
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ addl 0(%rsp),%edx
+ pxor %xmm1,%xmm0
+ xorl %ebx,%eax
+ roll $5,%ebp
+ movdqa %xmm10,%xmm8
+ paddd %xmm7,%xmm10
+ addl %esi,%edx
+ andl %eax,%edi
+ pxor %xmm9,%xmm0
+ xorl %ebx,%eax
+ addl %ebp,%edx
+ rorl $7,%ebp
+ xorl %ebx,%edi
+ movdqa %xmm0,%xmm9
+ movdqa %xmm10,48(%rsp)
+ movl %edx,%esi
+ addl 4(%rsp),%ecx
+ xorl %eax,%ebp
+ roll $5,%edx
+ pslld $2,%xmm0
+ addl %edi,%ecx
+ andl %ebp,%esi
+ psrld $30,%xmm9
+ xorl %eax,%ebp
+ addl %edx,%ecx
+ rorl $7,%edx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ addl 8(%rsp),%ebx
+ por %xmm9,%xmm0
+ xorl %ebp,%edx
+ roll $5,%ecx
+ movdqa %xmm0,%xmm10
+ addl %esi,%ebx
+ andl %edx,%edi
+ xorl %ebp,%edx
+ addl %ecx,%ebx
+ addl 12(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %edx,%esi
+ rorl $7,%ecx
+ addl %ebx,%eax
+ addl 16(%rsp),%ebp
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,215,8
+ xorl %ecx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm2,%xmm1
+ addl %esi,%ebp
+ xorl %ecx,%edi
+ movdqa %xmm8,%xmm9
+ paddd %xmm0,%xmm8
+ rorl $7,%ebx
+ addl %eax,%ebp
+ pxor %xmm10,%xmm1
+ addl 20(%rsp),%edx
+ xorl %ebx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm1,%xmm10
+ movdqa %xmm8,0(%rsp)
+ addl %edi,%edx
+ xorl %ebx,%esi
+ rorl $7,%eax
+ addl %ebp,%edx
+ pslld $2,%xmm1
+ addl 24(%rsp),%ecx
+ xorl %eax,%esi
+ psrld $30,%xmm10
+ movl %edx,%edi
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %eax,%edi
+ rorl $7,%ebp
+ addl %edx,%ecx
+ por %xmm10,%xmm1
+ addl 28(%rsp),%ebx
+ xorl %ebp,%edi
+ movdqa %xmm1,%xmm8
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %ebp,%esi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ addl 32(%rsp),%eax
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,192,8
+ xorl %edx,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ pxor %xmm3,%xmm2
+ addl %esi,%eax
+ xorl %edx,%edi
+ movdqa 0(%r11),%xmm10
+ paddd %xmm1,%xmm9
+ rorl $7,%ecx
+ addl %ebx,%eax
+ pxor %xmm8,%xmm2
+ addl 36(%rsp),%ebp
+ xorl %ecx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ movdqa %xmm2,%xmm8
+ movdqa %xmm9,16(%rsp)
+ addl %edi,%ebp
+ xorl %ecx,%esi
+ rorl $7,%ebx
+ addl %eax,%ebp
+ pslld $2,%xmm2
+ addl 40(%rsp),%edx
+ xorl %ebx,%esi
+ psrld $30,%xmm8
+ movl %ebp,%edi
+ roll $5,%ebp
+ addl %esi,%edx
+ xorl %ebx,%edi
+ rorl $7,%eax
+ addl %ebp,%edx
+ por %xmm8,%xmm2
+ addl 44(%rsp),%ecx
+ xorl %eax,%edi
+ movdqa %xmm2,%xmm9
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %eax,%esi
+ rorl $7,%ebp
+ addl %edx,%ecx
+ addl 48(%rsp),%ebx
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,201,8
+ xorl %ebp,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ pxor %xmm4,%xmm3
+ addl %esi,%ebx
+ xorl %ebp,%edi
+ movdqa %xmm10,%xmm8
+ paddd %xmm2,%xmm10
+ rorl $7,%edx
+ addl %ecx,%ebx
+ pxor %xmm9,%xmm3
+ addl 52(%rsp),%eax
+ xorl %edx,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ movdqa %xmm3,%xmm9
+ movdqa %xmm10,32(%rsp)
+ addl %edi,%eax
+ xorl %edx,%esi
+ rorl $7,%ecx
+ addl %ebx,%eax
+ pslld $2,%xmm3
+ addl 56(%rsp),%ebp
+ xorl %ecx,%esi
+ psrld $30,%xmm9
+ movl %eax,%edi
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %ecx,%edi
+ rorl $7,%ebx
+ addl %eax,%ebp
+ por %xmm9,%xmm3
+ addl 60(%rsp),%edx
+ xorl %ebx,%edi
+ movdqa %xmm3,%xmm10
+ movl %ebp,%esi
+ roll $5,%ebp
+ addl %edi,%edx
+ xorl %ebx,%esi
+ rorl $7,%eax
+ addl %ebp,%edx
+ addl 0(%rsp),%ecx
+ pxor %xmm0,%xmm4
+.byte 102,68,15,58,15,210,8
+ xorl %eax,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ pxor %xmm5,%xmm4
+ addl %esi,%ecx
+ xorl %eax,%edi
+ movdqa %xmm8,%xmm9
+ paddd %xmm3,%xmm8
+ rorl $7,%ebp
+ addl %edx,%ecx
+ pxor %xmm10,%xmm4
+ addl 4(%rsp),%ebx
+ xorl %ebp,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm8,48(%rsp)
+ addl %edi,%ebx
+ xorl %ebp,%esi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ pslld $2,%xmm4
+ addl 8(%rsp),%eax
+ xorl %edx,%esi
+ psrld $30,%xmm10
+ movl %ebx,%edi
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %edx,%edi
+ rorl $7,%ecx
+ addl %ebx,%eax
+ por %xmm10,%xmm4
+ addl 12(%rsp),%ebp
+ xorl %ecx,%edi
+ movdqa %xmm4,%xmm8
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %ecx,%esi
+ rorl $7,%ebx
+ addl %eax,%ebp
+ addl 16(%rsp),%edx
+ pxor %xmm1,%xmm5
+.byte 102,68,15,58,15,195,8
+ xorl %ebx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm6,%xmm5
+ addl %esi,%edx
+ xorl %ebx,%edi
+ movdqa %xmm9,%xmm10
+ paddd %xmm4,%xmm9
+ rorl $7,%eax
+ addl %ebp,%edx
+ pxor %xmm8,%xmm5
+ addl 20(%rsp),%ecx
+ xorl %eax,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ movdqa %xmm5,%xmm8
+ movdqa %xmm9,0(%rsp)
+ addl %edi,%ecx
+ xorl %eax,%esi
+ rorl $7,%ebp
+ addl %edx,%ecx
+ pslld $2,%xmm5
+ addl 24(%rsp),%ebx
+ xorl %ebp,%esi
+ psrld $30,%xmm8
+ movl %ecx,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %ebp,%edi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ por %xmm8,%xmm5
+ addl 28(%rsp),%eax
+ movdqa %xmm5,%xmm9
+ rorl $7,%ecx
+ movl %ebx,%esi
+ xorl %edx,%edi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ecx,%esi
+ xorl %edx,%ecx
+ addl %ebx,%eax
+ addl 32(%rsp),%ebp
+ pxor %xmm2,%xmm6
+.byte 102,68,15,58,15,204,8
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ rorl $7,%ebx
+ pxor %xmm7,%xmm6
+ movl %eax,%edi
+ xorl %ecx,%esi
+ movdqa %xmm10,%xmm8
+ paddd %xmm5,%xmm10
+ roll $5,%eax
+ addl %esi,%ebp
+ pxor %xmm9,%xmm6
+ xorl %ebx,%edi
+ xorl %ecx,%ebx
+ addl %eax,%ebp
+ addl 36(%rsp),%edx
+ movdqa %xmm6,%xmm9
+ movdqa %xmm10,16(%rsp)
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ rorl $7,%eax
+ movl %ebp,%esi
+ pslld $2,%xmm6
+ xorl %ebx,%edi
+ roll $5,%ebp
+ psrld $30,%xmm9
+ addl %edi,%edx
+ xorl %eax,%esi
+ xorl %ebx,%eax
+ addl %ebp,%edx
+ addl 40(%rsp),%ecx
+ andl %eax,%esi
+ por %xmm9,%xmm6
+ xorl %ebx,%eax
+ rorl $7,%ebp
+ movdqa %xmm6,%xmm10
+ movl %edx,%edi
+ xorl %eax,%esi
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebp,%edi
+ xorl %eax,%ebp
+ addl %edx,%ecx
+ addl 44(%rsp),%ebx
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ rorl $7,%edx
+ movl %ecx,%esi
+ xorl %ebp,%edi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %edx,%esi
+ xorl %ebp,%edx
+ addl %ecx,%ebx
+ addl 48(%rsp),%eax
+ pxor %xmm3,%xmm7
+.byte 102,68,15,58,15,213,8
+ andl %edx,%esi
+ xorl %ebp,%edx
+ rorl $7,%ecx
+ pxor %xmm0,%xmm7
+ movl %ebx,%edi
+ xorl %edx,%esi
+ movdqa 32(%r11),%xmm9
+ paddd %xmm6,%xmm8
+ roll $5,%ebx
+ addl %esi,%eax
+ pxor %xmm10,%xmm7
+ xorl %ecx,%edi
+ xorl %edx,%ecx
+ addl %ebx,%eax
+ addl 52(%rsp),%ebp
+ movdqa %xmm7,%xmm10
+ movdqa %xmm8,32(%rsp)
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ rorl $7,%ebx
+ movl %eax,%esi
+ pslld $2,%xmm7
+ xorl %ecx,%edi
+ roll $5,%eax
+ psrld $30,%xmm10
+ addl %edi,%ebp
+ xorl %ebx,%esi
+ xorl %ecx,%ebx
+ addl %eax,%ebp
+ addl 56(%rsp),%edx
+ andl %ebx,%esi
+ por %xmm10,%xmm7
+ xorl %ecx,%ebx
+ rorl $7,%eax
+ movdqa %xmm7,%xmm8
+ movl %ebp,%edi
+ xorl %ebx,%esi
+ roll $5,%ebp
+ addl %esi,%edx
+ xorl %eax,%edi
+ xorl %ebx,%eax
+ addl %ebp,%edx
+ addl 60(%rsp),%ecx
+ andl %eax,%edi
+ xorl %ebx,%eax
+ rorl $7,%ebp
+ movl %edx,%esi
+ xorl %eax,%edi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebp,%esi
+ xorl %eax,%ebp
+ addl %edx,%ecx
+ addl 0(%rsp),%ebx
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,198,8
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ rorl $7,%edx
+ pxor %xmm1,%xmm0
+ movl %ecx,%edi
+ xorl %ebp,%esi
+ movdqa %xmm9,%xmm10
+ paddd %xmm7,%xmm9
+ roll $5,%ecx
+ addl %esi,%ebx
+ pxor %xmm8,%xmm0
+ xorl %edx,%edi
+ xorl %ebp,%edx
+ addl %ecx,%ebx
+ addl 4(%rsp),%eax
+ movdqa %xmm0,%xmm8
+ movdqa %xmm9,48(%rsp)
+ andl %edx,%edi
+ xorl %ebp,%edx
+ rorl $7,%ecx
+ movl %ebx,%esi
+ pslld $2,%xmm0
+ xorl %edx,%edi
+ roll $5,%ebx
+ psrld $30,%xmm8
+ addl %edi,%eax
+ xorl %ecx,%esi
+ xorl %edx,%ecx
+ addl %ebx,%eax
+ addl 8(%rsp),%ebp
+ andl %ecx,%esi
+ por %xmm8,%xmm0
+ xorl %edx,%ecx
+ rorl $7,%ebx
+ movdqa %xmm0,%xmm9
+ movl %eax,%edi
+ xorl %ecx,%esi
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %ebx,%edi
+ xorl %ecx,%ebx
+ addl %eax,%ebp
+ addl 12(%rsp),%edx
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ rorl $7,%eax
+ movl %ebp,%esi
+ xorl %ebx,%edi
+ roll $5,%ebp
+ addl %edi,%edx
+ xorl %eax,%esi
+ xorl %ebx,%eax
+ addl %ebp,%edx
+ addl 16(%rsp),%ecx
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,207,8
+ andl %eax,%esi
+ xorl %ebx,%eax
+ rorl $7,%ebp
+ pxor %xmm2,%xmm1
+ movl %edx,%edi
+ xorl %eax,%esi
+ movdqa %xmm10,%xmm8
+ paddd %xmm0,%xmm10
+ roll $5,%edx
+ addl %esi,%ecx
+ pxor %xmm9,%xmm1
+ xorl %ebp,%edi
+ xorl %eax,%ebp
+ addl %edx,%ecx
+ addl 20(%rsp),%ebx
+ movdqa %xmm1,%xmm9
+ movdqa %xmm10,0(%rsp)
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ rorl $7,%edx
+ movl %ecx,%esi
+ pslld $2,%xmm1
+ xorl %ebp,%edi
+ roll $5,%ecx
+ psrld $30,%xmm9
+ addl %edi,%ebx
+ xorl %edx,%esi
+ xorl %ebp,%edx
+ addl %ecx,%ebx
+ addl 24(%rsp),%eax
+ andl %edx,%esi
+ por %xmm9,%xmm1
+ xorl %ebp,%edx
+ rorl $7,%ecx
+ movdqa %xmm1,%xmm10
+ movl %ebx,%edi
+ xorl %edx,%esi
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ecx,%edi
+ xorl %edx,%ecx
+ addl %ebx,%eax
+ addl 28(%rsp),%ebp
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ rorl $7,%ebx
+ movl %eax,%esi
+ xorl %ecx,%edi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %ebx,%esi
+ xorl %ecx,%ebx
+ addl %eax,%ebp
+ addl 32(%rsp),%edx
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,208,8
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ rorl $7,%eax
+ pxor %xmm3,%xmm2
+ movl %ebp,%edi
+ xorl %ebx,%esi
+ movdqa %xmm8,%xmm9
+ paddd %xmm1,%xmm8
+ roll $5,%ebp
+ addl %esi,%edx
+ pxor %xmm10,%xmm2
+ xorl %eax,%edi
+ xorl %ebx,%eax
+ addl %ebp,%edx
+ addl 36(%rsp),%ecx
+ movdqa %xmm2,%xmm10
+ movdqa %xmm8,16(%rsp)
+ andl %eax,%edi
+ xorl %ebx,%eax
+ rorl $7,%ebp
+ movl %edx,%esi
+ pslld $2,%xmm2
+ xorl %eax,%edi
+ roll $5,%edx
+ psrld $30,%xmm10
+ addl %edi,%ecx
+ xorl %ebp,%esi
+ xorl %eax,%ebp
+ addl %edx,%ecx
+ addl 40(%rsp),%ebx
+ andl %ebp,%esi
+ por %xmm10,%xmm2
+ xorl %eax,%ebp
+ rorl $7,%edx
+ movdqa %xmm2,%xmm8
+ movl %ecx,%edi
+ xorl %ebp,%esi
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %edx,%edi
+ xorl %ebp,%edx
+ addl %ecx,%ebx
+ addl 44(%rsp),%eax
+ andl %edx,%edi
+ xorl %ebp,%edx
+ rorl $7,%ecx
+ movl %ebx,%esi
+ xorl %edx,%edi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %edx,%esi
+ addl %ebx,%eax
+ addl 48(%rsp),%ebp
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,193,8
+ xorl %ecx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm4,%xmm3
+ addl %esi,%ebp
+ xorl %ecx,%edi
+ movdqa %xmm9,%xmm10
+ paddd %xmm2,%xmm9
+ rorl $7,%ebx
+ addl %eax,%ebp
+ pxor %xmm8,%xmm3
+ addl 52(%rsp),%edx
+ xorl %ebx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm3,%xmm8
+ movdqa %xmm9,32(%rsp)
+ addl %edi,%edx
+ xorl %ebx,%esi
+ rorl $7,%eax
+ addl %ebp,%edx
+ pslld $2,%xmm3
+ addl 56(%rsp),%ecx
+ xorl %eax,%esi
+ psrld $30,%xmm8
+ movl %edx,%edi
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %eax,%edi
+ rorl $7,%ebp
+ addl %edx,%ecx
+ por %xmm8,%xmm3
+ addl 60(%rsp),%ebx
+ xorl %ebp,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %ebp,%esi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ addl 0(%rsp),%eax
+ paddd %xmm3,%xmm10
+ xorl %edx,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ addl %esi,%eax
+ movdqa %xmm10,48(%rsp)
+ xorl %edx,%edi
+ rorl $7,%ecx
+ addl %ebx,%eax
+ addl 4(%rsp),%ebp
+ xorl %ecx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %ecx,%esi
+ rorl $7,%ebx
+ addl %eax,%ebp
+ addl 8(%rsp),%edx
+ xorl %ebx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ addl %esi,%edx
+ xorl %ebx,%edi
+ rorl $7,%eax
+ addl %ebp,%edx
+ addl 12(%rsp),%ecx
+ xorl %eax,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %eax,%esi
+ rorl $7,%ebp
+ addl %edx,%ecx
+ cmpq %r10,%r9
+ je .Ldone_ssse3
+ movdqa 64(%r11),%xmm6
+ movdqa -64(%r11),%xmm9
+ movdqu 0(%r9),%xmm0
+ movdqu 16(%r9),%xmm1
+ movdqu 32(%r9),%xmm2
+ movdqu 48(%r9),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r9
+ addl 16(%rsp),%ebx
+ xorl %ebp,%esi
+.byte 102,15,56,0,206
+ movl %ecx,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
+ paddd %xmm9,%xmm0
+ xorl %ebp,%edi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ addl 20(%rsp),%eax
+ movdqa %xmm0,0(%rsp)
+ xorl %edx,%edi
+ movl %ebx,%esi
+ psubd %xmm9,%xmm0
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %edx,%esi
+ rorl $7,%ecx
+ addl %ebx,%eax
+ addl 24(%rsp),%ebp
+ xorl %ecx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %ecx,%edi
+ rorl $7,%ebx
+ addl %eax,%ebp
+ addl 28(%rsp),%edx
+ xorl %ebx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ addl %edi,%edx
+ xorl %ebx,%esi
+ rorl $7,%eax
+ addl %ebp,%edx
+ addl 32(%rsp),%ecx
+ xorl %eax,%esi
+.byte 102,15,56,0,214
+ movl %edx,%edi
+ roll $5,%edx
+ addl %esi,%ecx
+ paddd %xmm9,%xmm1
+ xorl %eax,%edi
+ rorl $7,%ebp
+ addl %edx,%ecx
+ addl 36(%rsp),%ebx
+ movdqa %xmm1,16(%rsp)
+ xorl %ebp,%edi
+ movl %ecx,%esi
+ psubd %xmm9,%xmm1
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %ebp,%esi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ addl 40(%rsp),%eax
+ xorl %edx,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %edx,%edi
+ rorl $7,%ecx
+ addl %ebx,%eax
+ addl 44(%rsp),%ebp
+ xorl %ecx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %ecx,%esi
+ rorl $7,%ebx
+ addl %eax,%ebp
+ addl 48(%rsp),%edx
+ xorl %ebx,%esi
+.byte 102,15,56,0,222
+ movl %ebp,%edi
+ roll $5,%ebp
+ addl %esi,%edx
+ paddd %xmm9,%xmm2
+ xorl %ebx,%edi
+ rorl $7,%eax
+ addl %ebp,%edx
+ addl 52(%rsp),%ecx
+ movdqa %xmm2,32(%rsp)
+ xorl %eax,%edi
+ movl %edx,%esi
+ psubd %xmm9,%xmm2
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %eax,%esi
+ rorl $7,%ebp
+ addl %edx,%ecx
+ addl 56(%rsp),%ebx
+ xorl %ebp,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %ebp,%edi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ addl 60(%rsp),%eax
+ xorl %edx,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ rorl $7,%ecx
+ addl %ebx,%eax
+ addl 0(%r8),%eax
+ addl 4(%r8),%esi
+ addl 8(%r8),%ecx
+ addl 12(%r8),%edx
+ movl %eax,0(%r8)
+ addl 16(%r8),%ebp
+ movl %esi,4(%r8)
+ movl %esi,%ebx
+ movl %ecx,8(%r8)
+ movl %ecx,%edi
+ movl %edx,12(%r8)
+ xorl %edx,%edi
+ movl %ebp,16(%r8)
+ andl %edi,%esi
+ jmp .Loop_ssse3
+
+.p2align 4
+.Ldone_ssse3:
+ addl 16(%rsp),%ebx
+ xorl %ebp,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %ebp,%edi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ addl 20(%rsp),%eax
+ xorl %edx,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %edx,%esi
+ rorl $7,%ecx
+ addl %ebx,%eax
+ addl 24(%rsp),%ebp
+ xorl %ecx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %ecx,%edi
+ rorl $7,%ebx
+ addl %eax,%ebp
+ addl 28(%rsp),%edx
+ xorl %ebx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ addl %edi,%edx
+ xorl %ebx,%esi
+ rorl $7,%eax
+ addl %ebp,%edx
+ addl 32(%rsp),%ecx
+ xorl %eax,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %eax,%edi
+ rorl $7,%ebp
+ addl %edx,%ecx
+ addl 36(%rsp),%ebx
+ xorl %ebp,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %ebp,%esi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ addl 40(%rsp),%eax
+ xorl %edx,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %edx,%edi
+ rorl $7,%ecx
+ addl %ebx,%eax
+ addl 44(%rsp),%ebp
+ xorl %ecx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %ecx,%esi
+ rorl $7,%ebx
+ addl %eax,%ebp
+ addl 48(%rsp),%edx
+ xorl %ebx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ addl %esi,%edx
+ xorl %ebx,%edi
+ rorl $7,%eax
+ addl %ebp,%edx
+ addl 52(%rsp),%ecx
+ xorl %eax,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %eax,%esi
+ rorl $7,%ebp
+ addl %edx,%ecx
+ addl 56(%rsp),%ebx
+ xorl %ebp,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %ebp,%edi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ addl 60(%rsp),%eax
+ xorl %edx,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ rorl $7,%ecx
+ addl %ebx,%eax
+ addl 0(%r8),%eax
+ addl 4(%r8),%esi
+ addl 8(%r8),%ecx
+ movl %eax,0(%r8)
+ addl 12(%r8),%edx
+ movl %esi,4(%r8)
+ addl 16(%r8),%ebp
+ movl %ecx,8(%r8)
+ movl %edx,12(%r8)
+ movl %ebp,16(%r8)
+ movaps 64+0(%rsp),%xmm6
+ movaps 64+16(%rsp),%xmm7
+ movaps 64+32(%rsp),%xmm8
+ movaps 64+48(%rsp),%xmm9
+ movaps 64+64(%rsp),%xmm10
+ movaps 64+80(%rsp),%xmm11
+ leaq 160(%rsp),%rsi
+ movq 0(%rsi),%r12
+ movq 8(%rsi),%rbp
+ movq 16(%rsi),%rbx
+ leaq 24(%rsi),%rsp
+.Lepilogue_ssse3:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ .byte 0xf3,0xc3
+.LSEH_end_sha1_block_data_order_ssse3:
+.p2align 6
+K_XX_XX:
+.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999
+.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999
+.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align 6
+
+.def se_handler; .scl 3; .type 32; .endef
+.p2align 4
+se_handler:
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+
+ movq 120(%r8),%rax
+ movq 248(%r8),%rbx
+
+ leaq .Lprologue(%rip),%r10
+ cmpq %r10,%rbx
+ jb .Lcommon_seh_tail
+
+ movq 152(%r8),%rax
+
+ leaq .Lepilogue(%rip),%r10
+ cmpq %r10,%rbx
+ jae .Lcommon_seh_tail
+
+ movq 64(%rax),%rax
+ leaq 32(%rax),%rax
+
+ movq -8(%rax),%rbx
+ movq -16(%rax),%rbp
+ movq -24(%rax),%r12
+ movq -32(%rax),%r13
+ movq %rbx,144(%r8)
+ movq %rbp,160(%r8)
+ movq %r12,216(%r8)
+ movq %r13,224(%r8)
+
+ jmp .Lcommon_seh_tail
+
+
+.def ssse3_handler; .scl 3; .type 32; .endef
+.p2align 4
+ssse3_handler:
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+
+ movq 120(%r8),%rax
+ movq 248(%r8),%rbx
+
+ movq 8(%r9),%rsi
+ movq 56(%r9),%r11
+
+ movl 0(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jb .Lcommon_seh_tail
+
+ movq 152(%r8),%rax
+
+ movl 4(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jae .Lcommon_seh_tail
+
+ leaq 64(%rax),%rsi
+ leaq 512(%r8),%rdi
+ movl $12,%ecx
+.long 0xa548f3fc
+ leaq 184(%rax),%rax
+
+ movq -8(%rax),%rbx
+ movq -16(%rax),%rbp
+ movq -24(%rax),%r12
+ movq %rbx,144(%r8)
+ movq %rbp,160(%r8)
+ movq %r12,216(%r8)
+
+.Lcommon_seh_tail:
+ movq 8(%rax),%rdi
+ movq 16(%rax),%rsi
+ movq %rax,152(%r8)
+ movq %rsi,168(%r8)
+ movq %rdi,176(%r8)
+
+ movq 40(%r9),%rdi
+ movq %r8,%rsi
+ movl $154,%ecx
+.long 0xa548f3fc
+
+ movq %r9,%rsi
+ xorq %rcx,%rcx
+ movq 8(%rsi),%rdx
+ movq 0(%rsi),%r8
+ movq 16(%rsi),%r9
+ movq 40(%rsi),%r10
+ leaq 56(%rsi),%r11
+ leaq 24(%rsi),%r12
+ movq %r10,32(%rsp)
+ movq %r11,40(%rsp)
+ movq %r12,48(%rsp)
+ movq %rcx,56(%rsp)
+ call *__imp_RtlVirtualUnwind(%rip)
+
+ movl $1,%eax
+ addq $64,%rsp
+ popfq
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+ popq %rdi
+ popq %rsi
+ .byte 0xf3,0xc3
+
+
+.section .pdata
+.p2align 2
+.rva .LSEH_begin_sha1_block_data_order
+.rva .LSEH_end_sha1_block_data_order
+.rva .LSEH_info_sha1_block_data_order
+.rva .LSEH_begin_sha1_block_data_order_ssse3
+.rva .LSEH_end_sha1_block_data_order_ssse3
+.rva .LSEH_info_sha1_block_data_order_ssse3
+.section .xdata
+.p2align 3
+.LSEH_info_sha1_block_data_order:
+.byte 9,0,0,0
+.rva se_handler
+.LSEH_info_sha1_block_data_order_ssse3:
+.byte 9,0,0,0
+.rva ssse3_handler
+.rva .Lprologue_ssse3,.Lepilogue_ssse3
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/coff/sha256-avx-x86_64.s b/lib/accelerated/x86/coff/sha256-avx-x86_64.s
new file mode 100644
index 0000000000..2198b8e187
--- /dev/null
+++ b/lib/accelerated/x86/coff/sha256-avx-x86_64.s
@@ -0,0 +1,2645 @@
+# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain copyright notices,
+# this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+# * Neither the name of the Andy Polyakov nor the names of its
+# copyright holder and contributors may be used to endorse or
+# promote products derived from this software without specific
+# prior written permission.
+#
+# ALTERNATIVELY, provided that this notice is retained in full, this
+# product may be distributed under the terms of the GNU General Public
+# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+# those given above.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *** This file is auto-generated ***
+#
+.text
+
+
+
+.globl sha256_multi_block
+.def sha256_multi_block; .scl 2; .type 32; .endef
+.p2align 5
+sha256_multi_block:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_sha256_multi_block:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+
+ movq %rsp,%rax
+ pushq %rbx
+ pushq %rbp
+ leaq -168(%rsp),%rsp
+ movaps %xmm6,(%rsp)
+ movaps %xmm7,16(%rsp)
+ movaps %xmm8,32(%rsp)
+ movaps %xmm9,48(%rsp)
+ movaps %xmm10,-120(%rax)
+ movaps %xmm11,-104(%rax)
+ movaps %xmm12,-88(%rax)
+ movaps %xmm13,-72(%rax)
+ movaps %xmm14,-56(%rax)
+ movaps %xmm15,-40(%rax)
+ subq $288,%rsp
+ andq $-256,%rsp
+ movq %rax,272(%rsp)
+ leaq K256+128(%rip),%rbp
+ leaq 256(%rsp),%rbx
+ leaq 128(%rdi),%rdi
+
+.Loop_grande:
+ movl %edx,280(%rsp)
+ xorl %edx,%edx
+ movq 0(%rsi),%r8
+ movl 8(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,0(%rbx)
+ cmovleq %rbp,%r8
+ movq 16(%rsi),%r9
+ movl 24(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,4(%rbx)
+ cmovleq %rbp,%r9
+ movq 32(%rsi),%r10
+ movl 40(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,8(%rbx)
+ cmovleq %rbp,%r10
+ movq 48(%rsi),%r11
+ movl 56(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,12(%rbx)
+ cmovleq %rbp,%r11
+ testl %edx,%edx
+ jz .Ldone
+
+ movdqu 0-128(%rdi),%xmm8
+ leaq 128(%rsp),%rax
+ movdqu 32-128(%rdi),%xmm9
+ movdqu 64-128(%rdi),%xmm10
+ movdqu 96-128(%rdi),%xmm11
+ movdqu 128-128(%rdi),%xmm12
+ movdqu 160-128(%rdi),%xmm13
+ movdqu 192-128(%rdi),%xmm14
+ movdqu 224-128(%rdi),%xmm15
+ movdqu .Lpbswap(%rip),%xmm6
+ jmp .Loop
+
+.p2align 5
+.Loop:
+ movdqa %xmm10,%xmm4
+ pxor %xmm9,%xmm4
+ movd 0(%r8),%xmm5
+ movd 0(%r9),%xmm0
+ movd 0(%r10),%xmm1
+ movd 0(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm12,%xmm7
+ movdqa %xmm12,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm12,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,0-128(%rax)
+ paddd %xmm15,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -128(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm12,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm12,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm14,%xmm0
+ pand %xmm13,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm8,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm8,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm9,%xmm3
+ movdqa %xmm8,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm8,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm9,%xmm15
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm15
+ paddd %xmm5,%xmm11
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm15
+ paddd %xmm7,%xmm15
+ movd 4(%r8),%xmm5
+ movd 4(%r9),%xmm0
+ movd 4(%r10),%xmm1
+ movd 4(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm11,%xmm7
+ movdqa %xmm11,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm11,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,16-128(%rax)
+ paddd %xmm14,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -96(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm11,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm11,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm13,%xmm0
+ pand %xmm12,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm15,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm15,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm4,%xmm0
+ movdqa %xmm8,%xmm4
+ movdqa %xmm15,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm15,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm8,%xmm14
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm14
+ paddd %xmm5,%xmm10
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm14
+ paddd %xmm7,%xmm14
+ movd 8(%r8),%xmm5
+ movd 8(%r9),%xmm0
+ movd 8(%r10),%xmm1
+ movd 8(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm10,%xmm7
+ movdqa %xmm10,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm10,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,32-128(%rax)
+ paddd %xmm13,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -64(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm10,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm10,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm12,%xmm0
+ pand %xmm11,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm14,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm14,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm15,%xmm3
+ movdqa %xmm14,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm14,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm15,%xmm13
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm13
+ paddd %xmm5,%xmm9
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm13
+ paddd %xmm7,%xmm13
+ movd 12(%r8),%xmm5
+ movd 12(%r9),%xmm0
+ movd 12(%r10),%xmm1
+ movd 12(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm9,%xmm7
+ movdqa %xmm9,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm9,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,48-128(%rax)
+ paddd %xmm12,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -32(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm9,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm9,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm11,%xmm0
+ pand %xmm10,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm13,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm13,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm4,%xmm0
+ movdqa %xmm14,%xmm4
+ movdqa %xmm13,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm13,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm14,%xmm12
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm12
+ paddd %xmm5,%xmm8
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm12
+ paddd %xmm7,%xmm12
+ movd 16(%r8),%xmm5
+ movd 16(%r9),%xmm0
+ movd 16(%r10),%xmm1
+ movd 16(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm8,%xmm7
+ movdqa %xmm8,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm8,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,64-128(%rax)
+ paddd %xmm11,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 0(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm8,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm8,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm10,%xmm0
+ pand %xmm9,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm12,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm12,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm13,%xmm3
+ movdqa %xmm12,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm12,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm13,%xmm11
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm11
+ paddd %xmm5,%xmm15
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm11
+ paddd %xmm7,%xmm11
+ movd 20(%r8),%xmm5
+ movd 20(%r9),%xmm0
+ movd 20(%r10),%xmm1
+ movd 20(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm15,%xmm7
+ movdqa %xmm15,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm15,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,80-128(%rax)
+ paddd %xmm10,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 32(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm15,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm15,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm9,%xmm0
+ pand %xmm8,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm11,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm11,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm4,%xmm0
+ movdqa %xmm12,%xmm4
+ movdqa %xmm11,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm11,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm12,%xmm10
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm10
+ paddd %xmm5,%xmm14
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm10
+ paddd %xmm7,%xmm10
+ movd 24(%r8),%xmm5
+ movd 24(%r9),%xmm0
+ movd 24(%r10),%xmm1
+ movd 24(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm14,%xmm7
+ movdqa %xmm14,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm14,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,96-128(%rax)
+ paddd %xmm9,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 64(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm14,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm14,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm8,%xmm0
+ pand %xmm15,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm10,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm10,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm11,%xmm3
+ movdqa %xmm10,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm10,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm11,%xmm9
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm9
+ paddd %xmm5,%xmm13
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm9
+ paddd %xmm7,%xmm9
+ movd 28(%r8),%xmm5
+ movd 28(%r9),%xmm0
+ movd 28(%r10),%xmm1
+ movd 28(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm13,%xmm7
+ movdqa %xmm13,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm13,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,112-128(%rax)
+ paddd %xmm8,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 96(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm13,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm13,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm15,%xmm0
+ pand %xmm14,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm9,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm9,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm4,%xmm0
+ movdqa %xmm10,%xmm4
+ movdqa %xmm9,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm9,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm10,%xmm8
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm8
+ paddd %xmm5,%xmm12
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm8
+ paddd %xmm7,%xmm8
+ leaq 256(%rbp),%rbp
+ movd 32(%r8),%xmm5
+ movd 32(%r9),%xmm0
+ movd 32(%r10),%xmm1
+ movd 32(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm12,%xmm7
+ movdqa %xmm12,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm12,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,128-128(%rax)
+ paddd %xmm15,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -128(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm12,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm12,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm14,%xmm0
+ pand %xmm13,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm8,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm8,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm9,%xmm3
+ movdqa %xmm8,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm8,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm9,%xmm15
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm15
+ paddd %xmm5,%xmm11
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm15
+ paddd %xmm7,%xmm15
+ movd 36(%r8),%xmm5
+ movd 36(%r9),%xmm0
+ movd 36(%r10),%xmm1
+ movd 36(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm11,%xmm7
+ movdqa %xmm11,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm11,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,144-128(%rax)
+ paddd %xmm14,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -96(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm11,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm11,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm13,%xmm0
+ pand %xmm12,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm15,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm15,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm4,%xmm0
+ movdqa %xmm8,%xmm4
+ movdqa %xmm15,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm15,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm8,%xmm14
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm14
+ paddd %xmm5,%xmm10
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm14
+ paddd %xmm7,%xmm14
+ movd 40(%r8),%xmm5
+ movd 40(%r9),%xmm0
+ movd 40(%r10),%xmm1
+ movd 40(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm10,%xmm7
+ movdqa %xmm10,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm10,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,160-128(%rax)
+ paddd %xmm13,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -64(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm10,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm10,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm12,%xmm0
+ pand %xmm11,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm14,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm14,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm15,%xmm3
+ movdqa %xmm14,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm14,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm15,%xmm13
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm13
+ paddd %xmm5,%xmm9
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm13
+ paddd %xmm7,%xmm13
+ movd 44(%r8),%xmm5
+ movd 44(%r9),%xmm0
+ movd 44(%r10),%xmm1
+ movd 44(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm9,%xmm7
+ movdqa %xmm9,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm9,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,176-128(%rax)
+ paddd %xmm12,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -32(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm9,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm9,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm11,%xmm0
+ pand %xmm10,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm13,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm13,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm4,%xmm0
+ movdqa %xmm14,%xmm4
+ movdqa %xmm13,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm13,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm14,%xmm12
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm12
+ paddd %xmm5,%xmm8
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm12
+ paddd %xmm7,%xmm12
+ movd 48(%r8),%xmm5
+ movd 48(%r9),%xmm0
+ movd 48(%r10),%xmm1
+ movd 48(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm8,%xmm7
+ movdqa %xmm8,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm8,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,192-128(%rax)
+ paddd %xmm11,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 0(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm8,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm8,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm10,%xmm0
+ pand %xmm9,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm12,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm12,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm13,%xmm3
+ movdqa %xmm12,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm12,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm13,%xmm11
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm11
+ paddd %xmm5,%xmm15
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm11
+ paddd %xmm7,%xmm11
+ movd 52(%r8),%xmm5
+ movd 52(%r9),%xmm0
+ movd 52(%r10),%xmm1
+ movd 52(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm15,%xmm7
+ movdqa %xmm15,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm15,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,208-128(%rax)
+ paddd %xmm10,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 32(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm15,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm15,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm9,%xmm0
+ pand %xmm8,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm11,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm11,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm4,%xmm0
+ movdqa %xmm12,%xmm4
+ movdqa %xmm11,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm11,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm12,%xmm10
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm10
+ paddd %xmm5,%xmm14
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm10
+ paddd %xmm7,%xmm10
+ movd 56(%r8),%xmm5
+ movd 56(%r9),%xmm0
+ movd 56(%r10),%xmm1
+ movd 56(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm14,%xmm7
+ movdqa %xmm14,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm14,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,224-128(%rax)
+ paddd %xmm9,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 64(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm14,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm14,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm8,%xmm0
+ pand %xmm15,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm10,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm10,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm11,%xmm3
+ movdqa %xmm10,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm10,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm11,%xmm9
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm9
+ paddd %xmm5,%xmm13
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm9
+ paddd %xmm7,%xmm9
+ movd 60(%r8),%xmm5
+ leaq 64(%r8),%r8
+ movd 60(%r9),%xmm0
+ leaq 64(%r9),%r9
+ movd 60(%r10),%xmm1
+ leaq 64(%r10),%r10
+ movd 60(%r11),%xmm2
+ leaq 64(%r11),%r11
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm13,%xmm7
+ movdqa %xmm13,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm13,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,240-128(%rax)
+ paddd %xmm8,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 96(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm13,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm13,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm15,%xmm0
+ pand %xmm14,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm9,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm9,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm4,%xmm0
+ movdqa %xmm10,%xmm4
+ movdqa %xmm9,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm9,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm10,%xmm8
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm8
+ paddd %xmm5,%xmm12
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm8
+ paddd %xmm7,%xmm8
+ leaq 256(%rbp),%rbp
+ movdqu 0-128(%rax),%xmm5
+ movl $3,%ecx
+ jmp .Loop_16_xx
+.p2align 5
+.Loop_16_xx:
+ movdqa 16-128(%rax),%xmm6
+ paddd 144-128(%rax),%xmm5
+
+ movdqa %xmm6,%xmm7
+ movdqa %xmm6,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm6,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 224-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm3,%xmm1
+
+ psrld $17,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ psrld $19-17,%xmm3
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm3,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm5
+ movdqa %xmm12,%xmm7
+ movdqa %xmm12,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm12,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,0-128(%rax)
+ paddd %xmm15,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -128(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm12,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm12,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm14,%xmm0
+ pand %xmm13,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm8,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm8,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm9,%xmm3
+ movdqa %xmm8,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm8,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm9,%xmm15
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm15
+ paddd %xmm5,%xmm11
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm15
+ paddd %xmm7,%xmm15
+ movdqa 32-128(%rax),%xmm5
+ paddd 160-128(%rax),%xmm6
+
+ movdqa %xmm5,%xmm7
+ movdqa %xmm5,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm5,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 240-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm4,%xmm1
+
+ psrld $17,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ psrld $19-17,%xmm4
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm6
+ movdqa %xmm11,%xmm7
+ movdqa %xmm11,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm11,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm6,16-128(%rax)
+ paddd %xmm14,%xmm6
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -96(%rbp),%xmm6
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm11,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm11,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm13,%xmm0
+ pand %xmm12,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm15,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm15,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ movdqa %xmm8,%xmm4
+ movdqa %xmm15,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm15,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm6
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm8,%xmm14
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm14
+ paddd %xmm6,%xmm10
+ pxor %xmm2,%xmm7
+
+ paddd %xmm6,%xmm14
+ paddd %xmm7,%xmm14
+ movdqa 48-128(%rax),%xmm6
+ paddd 176-128(%rax),%xmm5
+
+ movdqa %xmm6,%xmm7
+ movdqa %xmm6,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm6,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 0-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm3,%xmm1
+
+ psrld $17,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ psrld $19-17,%xmm3
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm3,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm5
+ movdqa %xmm10,%xmm7
+ movdqa %xmm10,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm10,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,32-128(%rax)
+ paddd %xmm13,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -64(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm10,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm10,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm12,%xmm0
+ pand %xmm11,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm14,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm14,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm15,%xmm3
+ movdqa %xmm14,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm14,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm15,%xmm13
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm13
+ paddd %xmm5,%xmm9
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm13
+ paddd %xmm7,%xmm13
+ movdqa 64-128(%rax),%xmm5
+ paddd 192-128(%rax),%xmm6
+
+ movdqa %xmm5,%xmm7
+ movdqa %xmm5,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm5,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 16-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm4,%xmm1
+
+ psrld $17,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ psrld $19-17,%xmm4
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm6
+ movdqa %xmm9,%xmm7
+ movdqa %xmm9,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm9,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm6,48-128(%rax)
+ paddd %xmm12,%xmm6
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -32(%rbp),%xmm6
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm9,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm9,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm11,%xmm0
+ pand %xmm10,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm13,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm13,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ movdqa %xmm14,%xmm4
+ movdqa %xmm13,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm13,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm6
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm14,%xmm12
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm12
+ paddd %xmm6,%xmm8
+ pxor %xmm2,%xmm7
+
+ paddd %xmm6,%xmm12
+ paddd %xmm7,%xmm12
+ movdqa 80-128(%rax),%xmm6
+ paddd 208-128(%rax),%xmm5
+
+ movdqa %xmm6,%xmm7
+ movdqa %xmm6,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm6,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 32-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm3,%xmm1
+
+ psrld $17,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ psrld $19-17,%xmm3
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm3,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm5
+ movdqa %xmm8,%xmm7
+ movdqa %xmm8,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm8,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,64-128(%rax)
+ paddd %xmm11,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 0(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm8,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm8,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm10,%xmm0
+ pand %xmm9,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm12,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm12,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm13,%xmm3
+ movdqa %xmm12,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm12,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm13,%xmm11
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm11
+ paddd %xmm5,%xmm15
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm11
+ paddd %xmm7,%xmm11
+ movdqa 96-128(%rax),%xmm5
+ paddd 224-128(%rax),%xmm6
+
+ movdqa %xmm5,%xmm7
+ movdqa %xmm5,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm5,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 48-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm4,%xmm1
+
+ psrld $17,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ psrld $19-17,%xmm4
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm6
+ movdqa %xmm15,%xmm7
+ movdqa %xmm15,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm15,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm6,80-128(%rax)
+ paddd %xmm10,%xmm6
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 32(%rbp),%xmm6
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm15,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm15,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm9,%xmm0
+ pand %xmm8,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm11,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm11,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ movdqa %xmm12,%xmm4
+ movdqa %xmm11,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm11,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm6
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm12,%xmm10
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm10
+ paddd %xmm6,%xmm14
+ pxor %xmm2,%xmm7
+
+ paddd %xmm6,%xmm10
+ paddd %xmm7,%xmm10
+ movdqa 112-128(%rax),%xmm6
+ paddd 240-128(%rax),%xmm5
+
+ movdqa %xmm6,%xmm7
+ movdqa %xmm6,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm6,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 64-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm3,%xmm1
+
+ psrld $17,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ psrld $19-17,%xmm3
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm3,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm5
+ movdqa %xmm14,%xmm7
+ movdqa %xmm14,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm14,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,96-128(%rax)
+ paddd %xmm9,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 64(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm14,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm14,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm8,%xmm0
+ pand %xmm15,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm10,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm10,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm11,%xmm3
+ movdqa %xmm10,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm10,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm11,%xmm9
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm9
+ paddd %xmm5,%xmm13
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm9
+ paddd %xmm7,%xmm9
+ movdqa 128-128(%rax),%xmm5
+ paddd 0-128(%rax),%xmm6
+
+ movdqa %xmm5,%xmm7
+ movdqa %xmm5,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm5,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 80-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm4,%xmm1
+
+ psrld $17,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ psrld $19-17,%xmm4
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm6
+ movdqa %xmm13,%xmm7
+ movdqa %xmm13,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm13,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm6,112-128(%rax)
+ paddd %xmm8,%xmm6
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 96(%rbp),%xmm6
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm13,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm13,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm15,%xmm0
+ pand %xmm14,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm9,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm9,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ movdqa %xmm10,%xmm4
+ movdqa %xmm9,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm9,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm6
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm10,%xmm8
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm8
+ paddd %xmm6,%xmm12
+ pxor %xmm2,%xmm7
+
+ paddd %xmm6,%xmm8
+ paddd %xmm7,%xmm8
+ leaq 256(%rbp),%rbp
+ movdqa 144-128(%rax),%xmm6
+ paddd 16-128(%rax),%xmm5
+
+ movdqa %xmm6,%xmm7
+ movdqa %xmm6,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm6,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 96-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm3,%xmm1
+
+ psrld $17,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ psrld $19-17,%xmm3
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm3,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm5
+ movdqa %xmm12,%xmm7
+ movdqa %xmm12,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm12,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,128-128(%rax)
+ paddd %xmm15,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -128(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm12,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm12,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm14,%xmm0
+ pand %xmm13,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm8,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm8,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm9,%xmm3
+ movdqa %xmm8,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm8,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm9,%xmm15
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm15
+ paddd %xmm5,%xmm11
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm15
+ paddd %xmm7,%xmm15
+ movdqa 160-128(%rax),%xmm5
+ paddd 32-128(%rax),%xmm6
+
+ movdqa %xmm5,%xmm7
+ movdqa %xmm5,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm5,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 112-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm4,%xmm1
+
+ psrld $17,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ psrld $19-17,%xmm4
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm6
+ movdqa %xmm11,%xmm7
+ movdqa %xmm11,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm11,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm6,144-128(%rax)
+ paddd %xmm14,%xmm6
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -96(%rbp),%xmm6
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm11,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm11,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm13,%xmm0
+ pand %xmm12,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm15,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm15,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ movdqa %xmm8,%xmm4
+ movdqa %xmm15,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm15,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm6
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm8,%xmm14
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm14
+ paddd %xmm6,%xmm10
+ pxor %xmm2,%xmm7
+
+ paddd %xmm6,%xmm14
+ paddd %xmm7,%xmm14
+ movdqa 176-128(%rax),%xmm6
+ paddd 48-128(%rax),%xmm5
+
+ movdqa %xmm6,%xmm7
+ movdqa %xmm6,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm6,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 128-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm3,%xmm1
+
+ psrld $17,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ psrld $19-17,%xmm3
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm3,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm5
+ movdqa %xmm10,%xmm7
+ movdqa %xmm10,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm10,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,160-128(%rax)
+ paddd %xmm13,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -64(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm10,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm10,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm12,%xmm0
+ pand %xmm11,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm14,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm14,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm15,%xmm3
+ movdqa %xmm14,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm14,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm15,%xmm13
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm13
+ paddd %xmm5,%xmm9
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm13
+ paddd %xmm7,%xmm13
+ movdqa 192-128(%rax),%xmm5
+ paddd 64-128(%rax),%xmm6
+
+ movdqa %xmm5,%xmm7
+ movdqa %xmm5,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm5,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 144-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm4,%xmm1
+
+ psrld $17,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ psrld $19-17,%xmm4
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm6
+ movdqa %xmm9,%xmm7
+ movdqa %xmm9,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm9,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm6,176-128(%rax)
+ paddd %xmm12,%xmm6
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -32(%rbp),%xmm6
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm9,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm9,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm11,%xmm0
+ pand %xmm10,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm13,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm13,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ movdqa %xmm14,%xmm4
+ movdqa %xmm13,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm13,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm6
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm14,%xmm12
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm12
+ paddd %xmm6,%xmm8
+ pxor %xmm2,%xmm7
+
+ paddd %xmm6,%xmm12
+ paddd %xmm7,%xmm12
+ movdqa 208-128(%rax),%xmm6
+ paddd 80-128(%rax),%xmm5
+
+ movdqa %xmm6,%xmm7
+ movdqa %xmm6,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm6,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 160-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm3,%xmm1
+
+ psrld $17,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ psrld $19-17,%xmm3
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm3,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm5
+ movdqa %xmm8,%xmm7
+ movdqa %xmm8,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm8,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,192-128(%rax)
+ paddd %xmm11,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 0(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm8,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm8,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm10,%xmm0
+ pand %xmm9,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm12,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm12,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm13,%xmm3
+ movdqa %xmm12,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm12,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm13,%xmm11
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm11
+ paddd %xmm5,%xmm15
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm11
+ paddd %xmm7,%xmm11
+ movdqa 224-128(%rax),%xmm5
+ paddd 96-128(%rax),%xmm6
+
+ movdqa %xmm5,%xmm7
+ movdqa %xmm5,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm5,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 176-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm4,%xmm1
+
+ psrld $17,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ psrld $19-17,%xmm4
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm6
+ movdqa %xmm15,%xmm7
+ movdqa %xmm15,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm15,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm6,208-128(%rax)
+ paddd %xmm10,%xmm6
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 32(%rbp),%xmm6
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm15,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm15,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm9,%xmm0
+ pand %xmm8,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm11,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm11,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ movdqa %xmm12,%xmm4
+ movdqa %xmm11,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm11,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm6
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm12,%xmm10
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm10
+ paddd %xmm6,%xmm14
+ pxor %xmm2,%xmm7
+
+ paddd %xmm6,%xmm10
+ paddd %xmm7,%xmm10
+ movdqa 240-128(%rax),%xmm6
+ paddd 112-128(%rax),%xmm5
+
+ movdqa %xmm6,%xmm7
+ movdqa %xmm6,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm6,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 192-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm3,%xmm1
+
+ psrld $17,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ psrld $19-17,%xmm3
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm3,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm5
+ movdqa %xmm14,%xmm7
+ movdqa %xmm14,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm14,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,224-128(%rax)
+ paddd %xmm9,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 64(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm14,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm14,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm8,%xmm0
+ pand %xmm15,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm10,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm10,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm11,%xmm3
+ movdqa %xmm10,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm10,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm11,%xmm9
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm9
+ paddd %xmm5,%xmm13
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm9
+ paddd %xmm7,%xmm9
+ movdqa 0-128(%rax),%xmm5
+ paddd 128-128(%rax),%xmm6
+
+ movdqa %xmm5,%xmm7
+ movdqa %xmm5,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm5,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 208-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm4,%xmm1
+
+ psrld $17,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ psrld $19-17,%xmm4
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm6
+ movdqa %xmm13,%xmm7
+ movdqa %xmm13,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm13,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm6,240-128(%rax)
+ paddd %xmm8,%xmm6
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 96(%rbp),%xmm6
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm13,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm13,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm15,%xmm0
+ pand %xmm14,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm9,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm9,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ movdqa %xmm10,%xmm4
+ movdqa %xmm9,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm9,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm6
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm10,%xmm8
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm8
+ paddd %xmm6,%xmm12
+ pxor %xmm2,%xmm7
+
+ paddd %xmm6,%xmm8
+ paddd %xmm7,%xmm8
+ leaq 256(%rbp),%rbp
+ decl %ecx
+ jnz .Loop_16_xx
+
+ movl $1,%ecx
+ leaq K256+128(%rip),%rbp
+
+ movdqa (%rbx),%xmm7
+ cmpl 0(%rbx),%ecx
+ pxor %xmm0,%xmm0
+ cmovgeq %rbp,%r8
+ cmpl 4(%rbx),%ecx
+ movdqa %xmm7,%xmm6
+ cmovgeq %rbp,%r9
+ cmpl 8(%rbx),%ecx
+ pcmpgtd %xmm0,%xmm6
+ cmovgeq %rbp,%r10
+ cmpl 12(%rbx),%ecx
+ paddd %xmm6,%xmm7
+ cmovgeq %rbp,%r11
+
+ movdqu 0-128(%rdi),%xmm0
+ pand %xmm6,%xmm8
+ movdqu 32-128(%rdi),%xmm1
+ pand %xmm6,%xmm9
+ movdqu 64-128(%rdi),%xmm2
+ pand %xmm6,%xmm10
+ movdqu 96-128(%rdi),%xmm5
+ pand %xmm6,%xmm11
+ paddd %xmm0,%xmm8
+ movdqu 128-128(%rdi),%xmm0
+ pand %xmm6,%xmm12
+ paddd %xmm1,%xmm9
+ movdqu 160-128(%rdi),%xmm1
+ pand %xmm6,%xmm13
+ paddd %xmm2,%xmm10
+ movdqu 192-128(%rdi),%xmm2
+ pand %xmm6,%xmm14
+ paddd %xmm5,%xmm11
+ movdqu 224-128(%rdi),%xmm5
+ pand %xmm6,%xmm15
+ paddd %xmm0,%xmm12
+ paddd %xmm1,%xmm13
+ movdqu %xmm8,0-128(%rdi)
+ paddd %xmm2,%xmm14
+ movdqu %xmm9,32-128(%rdi)
+ paddd %xmm5,%xmm15
+ movdqu %xmm10,64-128(%rdi)
+ movdqu %xmm11,96-128(%rdi)
+ movdqu %xmm12,128-128(%rdi)
+ movdqu %xmm13,160-128(%rdi)
+ movdqu %xmm14,192-128(%rdi)
+ movdqu %xmm15,224-128(%rdi)
+
+ movdqa %xmm7,(%rbx)
+ movdqa .Lpbswap(%rip),%xmm6
+ decl %edx
+ jnz .Loop
+
+ movl 280(%rsp),%edx
+ leaq 16(%rdi),%rdi
+ leaq 64(%rsi),%rsi
+ decl %edx
+ jnz .Loop_grande
+
+.Ldone:
+ movq 272(%rsp),%rax
+ movaps -184(%rax),%xmm6
+ movaps -168(%rax),%xmm7
+ movaps -152(%rax),%xmm8
+ movaps -136(%rax),%xmm9
+ movaps -120(%rax),%xmm10
+ movaps -104(%rax),%xmm11
+ movaps -88(%rax),%xmm12
+ movaps -72(%rax),%xmm13
+ movaps -56(%rax),%xmm14
+ movaps -40(%rax),%xmm15
+ movq -16(%rax),%rbp
+ movq -8(%rax),%rbx
+ leaq (%rax),%rsp
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ .byte 0xf3,0xc3
+.LSEH_end_sha256_multi_block:
+.p2align 8
+K256:
+.long 1116352408,1116352408,1116352408,1116352408
+.long 1116352408,1116352408,1116352408,1116352408
+.long 1899447441,1899447441,1899447441,1899447441
+.long 1899447441,1899447441,1899447441,1899447441
+.long 3049323471,3049323471,3049323471,3049323471
+.long 3049323471,3049323471,3049323471,3049323471
+.long 3921009573,3921009573,3921009573,3921009573
+.long 3921009573,3921009573,3921009573,3921009573
+.long 961987163,961987163,961987163,961987163
+.long 961987163,961987163,961987163,961987163
+.long 1508970993,1508970993,1508970993,1508970993
+.long 1508970993,1508970993,1508970993,1508970993
+.long 2453635748,2453635748,2453635748,2453635748
+.long 2453635748,2453635748,2453635748,2453635748
+.long 2870763221,2870763221,2870763221,2870763221
+.long 2870763221,2870763221,2870763221,2870763221
+.long 3624381080,3624381080,3624381080,3624381080
+.long 3624381080,3624381080,3624381080,3624381080
+.long 310598401,310598401,310598401,310598401
+.long 310598401,310598401,310598401,310598401
+.long 607225278,607225278,607225278,607225278
+.long 607225278,607225278,607225278,607225278
+.long 1426881987,1426881987,1426881987,1426881987
+.long 1426881987,1426881987,1426881987,1426881987
+.long 1925078388,1925078388,1925078388,1925078388
+.long 1925078388,1925078388,1925078388,1925078388
+.long 2162078206,2162078206,2162078206,2162078206
+.long 2162078206,2162078206,2162078206,2162078206
+.long 2614888103,2614888103,2614888103,2614888103
+.long 2614888103,2614888103,2614888103,2614888103
+.long 3248222580,3248222580,3248222580,3248222580
+.long 3248222580,3248222580,3248222580,3248222580
+.long 3835390401,3835390401,3835390401,3835390401
+.long 3835390401,3835390401,3835390401,3835390401
+.long 4022224774,4022224774,4022224774,4022224774
+.long 4022224774,4022224774,4022224774,4022224774
+.long 264347078,264347078,264347078,264347078
+.long 264347078,264347078,264347078,264347078
+.long 604807628,604807628,604807628,604807628
+.long 604807628,604807628,604807628,604807628
+.long 770255983,770255983,770255983,770255983
+.long 770255983,770255983,770255983,770255983
+.long 1249150122,1249150122,1249150122,1249150122
+.long 1249150122,1249150122,1249150122,1249150122
+.long 1555081692,1555081692,1555081692,1555081692
+.long 1555081692,1555081692,1555081692,1555081692
+.long 1996064986,1996064986,1996064986,1996064986
+.long 1996064986,1996064986,1996064986,1996064986
+.long 2554220882,2554220882,2554220882,2554220882
+.long 2554220882,2554220882,2554220882,2554220882
+.long 2821834349,2821834349,2821834349,2821834349
+.long 2821834349,2821834349,2821834349,2821834349
+.long 2952996808,2952996808,2952996808,2952996808
+.long 2952996808,2952996808,2952996808,2952996808
+.long 3210313671,3210313671,3210313671,3210313671
+.long 3210313671,3210313671,3210313671,3210313671
+.long 3336571891,3336571891,3336571891,3336571891
+.long 3336571891,3336571891,3336571891,3336571891
+.long 3584528711,3584528711,3584528711,3584528711
+.long 3584528711,3584528711,3584528711,3584528711
+.long 113926993,113926993,113926993,113926993
+.long 113926993,113926993,113926993,113926993
+.long 338241895,338241895,338241895,338241895
+.long 338241895,338241895,338241895,338241895
+.long 666307205,666307205,666307205,666307205
+.long 666307205,666307205,666307205,666307205
+.long 773529912,773529912,773529912,773529912
+.long 773529912,773529912,773529912,773529912
+.long 1294757372,1294757372,1294757372,1294757372
+.long 1294757372,1294757372,1294757372,1294757372
+.long 1396182291,1396182291,1396182291,1396182291
+.long 1396182291,1396182291,1396182291,1396182291
+.long 1695183700,1695183700,1695183700,1695183700
+.long 1695183700,1695183700,1695183700,1695183700
+.long 1986661051,1986661051,1986661051,1986661051
+.long 1986661051,1986661051,1986661051,1986661051
+.long 2177026350,2177026350,2177026350,2177026350
+.long 2177026350,2177026350,2177026350,2177026350
+.long 2456956037,2456956037,2456956037,2456956037
+.long 2456956037,2456956037,2456956037,2456956037
+.long 2730485921,2730485921,2730485921,2730485921
+.long 2730485921,2730485921,2730485921,2730485921
+.long 2820302411,2820302411,2820302411,2820302411
+.long 2820302411,2820302411,2820302411,2820302411
+.long 3259730800,3259730800,3259730800,3259730800
+.long 3259730800,3259730800,3259730800,3259730800
+.long 3345764771,3345764771,3345764771,3345764771
+.long 3345764771,3345764771,3345764771,3345764771
+.long 3516065817,3516065817,3516065817,3516065817
+.long 3516065817,3516065817,3516065817,3516065817
+.long 3600352804,3600352804,3600352804,3600352804
+.long 3600352804,3600352804,3600352804,3600352804
+.long 4094571909,4094571909,4094571909,4094571909
+.long 4094571909,4094571909,4094571909,4094571909
+.long 275423344,275423344,275423344,275423344
+.long 275423344,275423344,275423344,275423344
+.long 430227734,430227734,430227734,430227734
+.long 430227734,430227734,430227734,430227734
+.long 506948616,506948616,506948616,506948616
+.long 506948616,506948616,506948616,506948616
+.long 659060556,659060556,659060556,659060556
+.long 659060556,659060556,659060556,659060556
+.long 883997877,883997877,883997877,883997877
+.long 883997877,883997877,883997877,883997877
+.long 958139571,958139571,958139571,958139571
+.long 958139571,958139571,958139571,958139571
+.long 1322822218,1322822218,1322822218,1322822218
+.long 1322822218,1322822218,1322822218,1322822218
+.long 1537002063,1537002063,1537002063,1537002063
+.long 1537002063,1537002063,1537002063,1537002063
+.long 1747873779,1747873779,1747873779,1747873779
+.long 1747873779,1747873779,1747873779,1747873779
+.long 1955562222,1955562222,1955562222,1955562222
+.long 1955562222,1955562222,1955562222,1955562222
+.long 2024104815,2024104815,2024104815,2024104815
+.long 2024104815,2024104815,2024104815,2024104815
+.long 2227730452,2227730452,2227730452,2227730452
+.long 2227730452,2227730452,2227730452,2227730452
+.long 2361852424,2361852424,2361852424,2361852424
+.long 2361852424,2361852424,2361852424,2361852424
+.long 2428436474,2428436474,2428436474,2428436474
+.long 2428436474,2428436474,2428436474,2428436474
+.long 2756734187,2756734187,2756734187,2756734187
+.long 2756734187,2756734187,2756734187,2756734187
+.long 3204031479,3204031479,3204031479,3204031479
+.long 3204031479,3204031479,3204031479,3204031479
+.long 3329325298,3329325298,3329325298,3329325298
+.long 3329325298,3329325298,3329325298,3329325298
+.Lpbswap:
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/coff/sha256-ssse3-x86.s b/lib/accelerated/x86/coff/sha256-ssse3-x86.s
new file mode 100644
index 0000000000..6fe27746ce
--- /dev/null
+++ b/lib/accelerated/x86/coff/sha256-ssse3-x86.s
@@ -0,0 +1,3402 @@
+# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain copyright notices,
+# this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+# * Neither the name of the Andy Polyakov nor the names of its
+# copyright holder and contributors may be used to endorse or
+# promote products derived from this software without specific
+# prior written permission.
+#
+# ALTERNATIVELY, provided that this notice is retained in full, this
+# product may be distributed under the terms of the GNU General Public
+# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+# those given above.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *** This file is auto-generated ***
+#
+.file "sha512-586.s"
+.text
+.globl _sha256_block_data_order
+.def _sha256_block_data_order; .scl 2; .type 32; .endef
+.align 16
+_sha256_block_data_order:
+.L_sha256_block_data_order_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 20(%esp),%esi
+ movl 24(%esp),%edi
+ movl 28(%esp),%eax
+ movl %esp,%ebx
+ call .L000pic_point
+.L000pic_point:
+ popl %ebp
+ leal .L001K256-.L000pic_point(%ebp),%ebp
+ subl $16,%esp
+ andl $-64,%esp
+ shll $6,%eax
+ addl %edi,%eax
+ movl %esi,(%esp)
+ movl %edi,4(%esp)
+ movl %eax,8(%esp)
+ movl %ebx,12(%esp)
+ leal __gnutls_x86_cpuid_s-.L001K256(%ebp),%edx
+ movl (%edx),%ecx
+ movl 4(%edx),%ebx
+ testl $1048576,%ecx
+ jnz .L002loop
+ andl $1073741824,%ecx
+ andl $268435968,%ebx
+ orl %ebx,%ecx
+ andl $1342177280,%ecx
+ cmpl $1342177280,%ecx
+ je .L003loop_shrd
+ subl %edi,%eax
+ cmpl $256,%eax
+ jae .L004unrolled
+ jmp .L002loop
+.align 16
+.L002loop:
+ movl (%edi),%eax
+ movl 4(%edi),%ebx
+ movl 8(%edi),%ecx
+ bswap %eax
+ movl 12(%edi),%edx
+ bswap %ebx
+ pushl %eax
+ bswap %ecx
+ pushl %ebx
+ bswap %edx
+ pushl %ecx
+ pushl %edx
+ movl 16(%edi),%eax
+ movl 20(%edi),%ebx
+ movl 24(%edi),%ecx
+ bswap %eax
+ movl 28(%edi),%edx
+ bswap %ebx
+ pushl %eax
+ bswap %ecx
+ pushl %ebx
+ bswap %edx
+ pushl %ecx
+ pushl %edx
+ movl 32(%edi),%eax
+ movl 36(%edi),%ebx
+ movl 40(%edi),%ecx
+ bswap %eax
+ movl 44(%edi),%edx
+ bswap %ebx
+ pushl %eax
+ bswap %ecx
+ pushl %ebx
+ bswap %edx
+ pushl %ecx
+ pushl %edx
+ movl 48(%edi),%eax
+ movl 52(%edi),%ebx
+ movl 56(%edi),%ecx
+ bswap %eax
+ movl 60(%edi),%edx
+ bswap %ebx
+ pushl %eax
+ bswap %ecx
+ pushl %ebx
+ bswap %edx
+ pushl %ecx
+ pushl %edx
+ addl $64,%edi
+ leal -36(%esp),%esp
+ movl %edi,104(%esp)
+ movl (%esi),%eax
+ movl 4(%esi),%ebx
+ movl 8(%esi),%ecx
+ movl 12(%esi),%edi
+ movl %ebx,8(%esp)
+ xorl %ecx,%ebx
+ movl %ecx,12(%esp)
+ movl %edi,16(%esp)
+ movl %ebx,(%esp)
+ movl 16(%esi),%edx
+ movl 20(%esi),%ebx
+ movl 24(%esi),%ecx
+ movl 28(%esi),%edi
+ movl %ebx,24(%esp)
+ movl %ecx,28(%esp)
+ movl %edi,32(%esp)
+.align 16
+.L00500_15:
+ movl %edx,%ecx
+ movl 24(%esp),%esi
+ rorl $14,%ecx
+ movl 28(%esp),%edi
+ xorl %edx,%ecx
+ xorl %edi,%esi
+ movl 96(%esp),%ebx
+ rorl $5,%ecx
+ andl %edx,%esi
+ movl %edx,20(%esp)
+ xorl %ecx,%edx
+ addl 32(%esp),%ebx
+ xorl %edi,%esi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %esi,%ebx
+ rorl $9,%ecx
+ addl %edx,%ebx
+ movl 8(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,4(%esp)
+ leal -4(%esp),%esp
+ rorl $11,%ecx
+ movl (%ebp),%esi
+ xorl %eax,%ecx
+ movl 20(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %esi,%ebx
+ movl %eax,(%esp)
+ addl %ebx,%edx
+ andl 4(%esp),%eax
+ addl %ecx,%ebx
+ xorl %edi,%eax
+ addl $4,%ebp
+ addl %ebx,%eax
+ cmpl $3248222580,%esi
+ jne .L00500_15
+ movl 156(%esp),%ecx
+ jmp .L00616_63
+.align 16
+.L00616_63:
+ movl %ecx,%ebx
+ movl 104(%esp),%esi
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 160(%esp),%ebx
+ shrl $10,%edi
+ addl 124(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 24(%esp),%esi
+ rorl $14,%ecx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %edx,%ecx
+ xorl %edi,%esi
+ movl %ebx,96(%esp)
+ rorl $5,%ecx
+ andl %edx,%esi
+ movl %edx,20(%esp)
+ xorl %ecx,%edx
+ addl 32(%esp),%ebx
+ xorl %edi,%esi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %esi,%ebx
+ rorl $9,%ecx
+ addl %edx,%ebx
+ movl 8(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,4(%esp)
+ leal -4(%esp),%esp
+ rorl $11,%ecx
+ movl (%ebp),%esi
+ xorl %eax,%ecx
+ movl 20(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %esi,%ebx
+ movl %eax,(%esp)
+ addl %ebx,%edx
+ andl 4(%esp),%eax
+ addl %ecx,%ebx
+ xorl %edi,%eax
+ movl 156(%esp),%ecx
+ addl $4,%ebp
+ addl %ebx,%eax
+ cmpl $3329325298,%esi
+ jne .L00616_63
+ movl 356(%esp),%esi
+ movl 8(%esp),%ebx
+ movl 16(%esp),%ecx
+ addl (%esi),%eax
+ addl 4(%esi),%ebx
+ addl 8(%esi),%edi
+ addl 12(%esi),%ecx
+ movl %eax,(%esi)
+ movl %ebx,4(%esi)
+ movl %edi,8(%esi)
+ movl %ecx,12(%esi)
+ movl 24(%esp),%eax
+ movl 28(%esp),%ebx
+ movl 32(%esp),%ecx
+ movl 360(%esp),%edi
+ addl 16(%esi),%edx
+ addl 20(%esi),%eax
+ addl 24(%esi),%ebx
+ addl 28(%esi),%ecx
+ movl %edx,16(%esi)
+ movl %eax,20(%esi)
+ movl %ebx,24(%esi)
+ movl %ecx,28(%esi)
+ leal 356(%esp),%esp
+ subl $256,%ebp
+ cmpl 8(%esp),%edi
+ jb .L002loop
+ movl 12(%esp),%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.align 32
+.L003loop_shrd:
+ movl (%edi),%eax
+ movl 4(%edi),%ebx
+ movl 8(%edi),%ecx
+ bswap %eax
+ movl 12(%edi),%edx
+ bswap %ebx
+ pushl %eax
+ bswap %ecx
+ pushl %ebx
+ bswap %edx
+ pushl %ecx
+ pushl %edx
+ movl 16(%edi),%eax
+ movl 20(%edi),%ebx
+ movl 24(%edi),%ecx
+ bswap %eax
+ movl 28(%edi),%edx
+ bswap %ebx
+ pushl %eax
+ bswap %ecx
+ pushl %ebx
+ bswap %edx
+ pushl %ecx
+ pushl %edx
+ movl 32(%edi),%eax
+ movl 36(%edi),%ebx
+ movl 40(%edi),%ecx
+ bswap %eax
+ movl 44(%edi),%edx
+ bswap %ebx
+ pushl %eax
+ bswap %ecx
+ pushl %ebx
+ bswap %edx
+ pushl %ecx
+ pushl %edx
+ movl 48(%edi),%eax
+ movl 52(%edi),%ebx
+ movl 56(%edi),%ecx
+ bswap %eax
+ movl 60(%edi),%edx
+ bswap %ebx
+ pushl %eax
+ bswap %ecx
+ pushl %ebx
+ bswap %edx
+ pushl %ecx
+ pushl %edx
+ addl $64,%edi
+ leal -36(%esp),%esp
+ movl %edi,104(%esp)
+ movl (%esi),%eax
+ movl 4(%esi),%ebx
+ movl 8(%esi),%ecx
+ movl 12(%esi),%edi
+ movl %ebx,8(%esp)
+ xorl %ecx,%ebx
+ movl %ecx,12(%esp)
+ movl %edi,16(%esp)
+ movl %ebx,(%esp)
+ movl 16(%esi),%edx
+ movl 20(%esi),%ebx
+ movl 24(%esi),%ecx
+ movl 28(%esi),%edi
+ movl %ebx,24(%esp)
+ movl %ecx,28(%esp)
+ movl %edi,32(%esp)
+.align 16
+.L00700_15_shrd:
+ movl %edx,%ecx
+ movl 24(%esp),%esi
+ shrdl $14,%ecx,%ecx
+ movl 28(%esp),%edi
+ xorl %edx,%ecx
+ xorl %edi,%esi
+ movl 96(%esp),%ebx
+ shrdl $5,%ecx,%ecx
+ andl %edx,%esi
+ movl %edx,20(%esp)
+ xorl %ecx,%edx
+ addl 32(%esp),%ebx
+ xorl %edi,%esi
+ shrdl $6,%edx,%edx
+ movl %eax,%ecx
+ addl %esi,%ebx
+ shrdl $9,%ecx,%ecx
+ addl %edx,%ebx
+ movl 8(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,4(%esp)
+ leal -4(%esp),%esp
+ shrdl $11,%ecx,%ecx
+ movl (%ebp),%esi
+ xorl %eax,%ecx
+ movl 20(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ addl %esi,%ebx
+ movl %eax,(%esp)
+ addl %ebx,%edx
+ andl 4(%esp),%eax
+ addl %ecx,%ebx
+ xorl %edi,%eax
+ addl $4,%ebp
+ addl %ebx,%eax
+ cmpl $3248222580,%esi
+ jne .L00700_15_shrd
+ movl 156(%esp),%ecx
+ jmp .L00816_63_shrd
+.align 16
+.L00816_63_shrd:
+ movl %ecx,%ebx
+ movl 104(%esp),%esi
+ shrdl $11,%ecx,%ecx
+ movl %esi,%edi
+ shrdl $2,%esi,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ shrdl $7,%ecx,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ shrdl $17,%esi,%esi
+ addl 160(%esp),%ebx
+ shrl $10,%edi
+ addl 124(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 24(%esp),%esi
+ shrdl $14,%ecx,%ecx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %edx,%ecx
+ xorl %edi,%esi
+ movl %ebx,96(%esp)
+ shrdl $5,%ecx,%ecx
+ andl %edx,%esi
+ movl %edx,20(%esp)
+ xorl %ecx,%edx
+ addl 32(%esp),%ebx
+ xorl %edi,%esi
+ shrdl $6,%edx,%edx
+ movl %eax,%ecx
+ addl %esi,%ebx
+ shrdl $9,%ecx,%ecx
+ addl %edx,%ebx
+ movl 8(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,4(%esp)
+ leal -4(%esp),%esp
+ shrdl $11,%ecx,%ecx
+ movl (%ebp),%esi
+ xorl %eax,%ecx
+ movl 20(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ addl %esi,%ebx
+ movl %eax,(%esp)
+ addl %ebx,%edx
+ andl 4(%esp),%eax
+ addl %ecx,%ebx
+ xorl %edi,%eax
+ movl 156(%esp),%ecx
+ addl $4,%ebp
+ addl %ebx,%eax
+ cmpl $3329325298,%esi
+ jne .L00816_63_shrd
+ movl 356(%esp),%esi
+ movl 8(%esp),%ebx
+ movl 16(%esp),%ecx
+ addl (%esi),%eax
+ addl 4(%esi),%ebx
+ addl 8(%esi),%edi
+ addl 12(%esi),%ecx
+ movl %eax,(%esi)
+ movl %ebx,4(%esi)
+ movl %edi,8(%esi)
+ movl %ecx,12(%esi)
+ movl 24(%esp),%eax
+ movl 28(%esp),%ebx
+ movl 32(%esp),%ecx
+ movl 360(%esp),%edi
+ addl 16(%esi),%edx
+ addl 20(%esi),%eax
+ addl 24(%esi),%ebx
+ addl 28(%esi),%ecx
+ movl %edx,16(%esi)
+ movl %eax,20(%esi)
+ movl %ebx,24(%esi)
+ movl %ecx,28(%esi)
+ leal 356(%esp),%esp
+ subl $256,%ebp
+ cmpl 8(%esp),%edi
+ jb .L003loop_shrd
+ movl 12(%esp),%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.align 64
+.L001K256:
+.long 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298
+.long 66051,67438087,134810123,202182159
+.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97
+.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32
+.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
+.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
+.byte 62,0
+.align 16
+.L004unrolled:
+ leal -96(%esp),%esp
+ movl (%esi),%eax
+ movl 4(%esi),%ebp
+ movl 8(%esi),%ecx
+ movl 12(%esi),%ebx
+ movl %ebp,4(%esp)
+ xorl %ecx,%ebp
+ movl %ecx,8(%esp)
+ movl %ebx,12(%esp)
+ movl 16(%esi),%edx
+ movl 20(%esi),%ebx
+ movl 24(%esi),%ecx
+ movl 28(%esi),%esi
+ movl %ebx,20(%esp)
+ movl %ecx,24(%esp)
+ movl %esi,28(%esp)
+ jmp .L009grand_loop
+.align 16
+.L009grand_loop:
+ movl (%edi),%ebx
+ movl 4(%edi),%ecx
+ bswap %ebx
+ movl 8(%edi),%esi
+ bswap %ecx
+ movl %ebx,32(%esp)
+ bswap %esi
+ movl %ecx,36(%esp)
+ movl %esi,40(%esp)
+ movl 12(%edi),%ebx
+ movl 16(%edi),%ecx
+ bswap %ebx
+ movl 20(%edi),%esi
+ bswap %ecx
+ movl %ebx,44(%esp)
+ bswap %esi
+ movl %ecx,48(%esp)
+ movl %esi,52(%esp)
+ movl 24(%edi),%ebx
+ movl 28(%edi),%ecx
+ bswap %ebx
+ movl 32(%edi),%esi
+ bswap %ecx
+ movl %ebx,56(%esp)
+ bswap %esi
+ movl %ecx,60(%esp)
+ movl %esi,64(%esp)
+ movl 36(%edi),%ebx
+ movl 40(%edi),%ecx
+ bswap %ebx
+ movl 44(%edi),%esi
+ bswap %ecx
+ movl %ebx,68(%esp)
+ bswap %esi
+ movl %ecx,72(%esp)
+ movl %esi,76(%esp)
+ movl 48(%edi),%ebx
+ movl 52(%edi),%ecx
+ bswap %ebx
+ movl 56(%edi),%esi
+ bswap %ecx
+ movl %ebx,80(%esp)
+ bswap %esi
+ movl %ecx,84(%esp)
+ movl %esi,88(%esp)
+ movl 60(%edi),%ebx
+ addl $64,%edi
+ bswap %ebx
+ movl %edi,100(%esp)
+ movl %ebx,92(%esp)
+ movl %edx,%ecx
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl 32(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1116352408(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl 36(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1899447441(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl 40(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3049323471(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl 44(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3921009573(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl 48(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 961987163(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl 52(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1508970993(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl 56(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2453635748(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl 60(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2870763221(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl 64(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3624381080(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl 68(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 310598401(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl 72(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 607225278(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl 76(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1426881987(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl 80(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1925078388(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl 84(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2162078206(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl 88(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2614888103(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl 92(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3248222580(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 36(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 88(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 32(%esp),%ebx
+ shrl $10,%edi
+ addl 68(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,32(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3835390401(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 40(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 92(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 36(%esp),%ebx
+ shrl $10,%edi
+ addl 72(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,36(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 4022224774(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 44(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 32(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 40(%esp),%ebx
+ shrl $10,%edi
+ addl 76(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,40(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 264347078(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 48(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 36(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 44(%esp),%ebx
+ shrl $10,%edi
+ addl 80(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,44(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 604807628(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 52(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl 40(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 48(%esp),%ebx
+ shrl $10,%edi
+ addl 84(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,48(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 770255983(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 56(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 44(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 52(%esp),%ebx
+ shrl $10,%edi
+ addl 88(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,52(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1249150122(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 60(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 48(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 56(%esp),%ebx
+ shrl $10,%edi
+ addl 92(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,56(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1555081692(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 64(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 52(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 60(%esp),%ebx
+ shrl $10,%edi
+ addl 32(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,60(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1996064986(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 68(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 56(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 64(%esp),%ebx
+ shrl $10,%edi
+ addl 36(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,64(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2554220882(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 72(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 60(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 68(%esp),%ebx
+ shrl $10,%edi
+ addl 40(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,68(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2821834349(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 76(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 64(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 72(%esp),%ebx
+ shrl $10,%edi
+ addl 44(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,72(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2952996808(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 80(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 68(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 76(%esp),%ebx
+ shrl $10,%edi
+ addl 48(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,76(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3210313671(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 84(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl 72(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 80(%esp),%ebx
+ shrl $10,%edi
+ addl 52(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,80(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3336571891(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 88(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 76(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 84(%esp),%ebx
+ shrl $10,%edi
+ addl 56(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,84(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3584528711(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 92(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 80(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 88(%esp),%ebx
+ shrl $10,%edi
+ addl 60(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,88(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 113926993(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 32(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 84(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 92(%esp),%ebx
+ shrl $10,%edi
+ addl 64(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,92(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 338241895(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 36(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 88(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 32(%esp),%ebx
+ shrl $10,%edi
+ addl 68(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,32(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 666307205(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 40(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 92(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 36(%esp),%ebx
+ shrl $10,%edi
+ addl 72(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,36(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 773529912(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 44(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 32(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 40(%esp),%ebx
+ shrl $10,%edi
+ addl 76(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,40(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1294757372(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 48(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 36(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 44(%esp),%ebx
+ shrl $10,%edi
+ addl 80(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,44(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1396182291(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 52(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl 40(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 48(%esp),%ebx
+ shrl $10,%edi
+ addl 84(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,48(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1695183700(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 56(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 44(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 52(%esp),%ebx
+ shrl $10,%edi
+ addl 88(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,52(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1986661051(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 60(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 48(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 56(%esp),%ebx
+ shrl $10,%edi
+ addl 92(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,56(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2177026350(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 64(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 52(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 60(%esp),%ebx
+ shrl $10,%edi
+ addl 32(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,60(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2456956037(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 68(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 56(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 64(%esp),%ebx
+ shrl $10,%edi
+ addl 36(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,64(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2730485921(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 72(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 60(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 68(%esp),%ebx
+ shrl $10,%edi
+ addl 40(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,68(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2820302411(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 76(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 64(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 72(%esp),%ebx
+ shrl $10,%edi
+ addl 44(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,72(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3259730800(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 80(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 68(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 76(%esp),%ebx
+ shrl $10,%edi
+ addl 48(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,76(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3345764771(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 84(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl 72(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 80(%esp),%ebx
+ shrl $10,%edi
+ addl 52(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,80(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3516065817(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 88(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 76(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 84(%esp),%ebx
+ shrl $10,%edi
+ addl 56(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,84(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3600352804(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 92(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 80(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 88(%esp),%ebx
+ shrl $10,%edi
+ addl 60(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,88(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 4094571909(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 32(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 84(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 92(%esp),%ebx
+ shrl $10,%edi
+ addl 64(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,92(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 275423344(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 36(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 88(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 32(%esp),%ebx
+ shrl $10,%edi
+ addl 68(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,32(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 430227734(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 40(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 92(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 36(%esp),%ebx
+ shrl $10,%edi
+ addl 72(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,36(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 506948616(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 44(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 32(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 40(%esp),%ebx
+ shrl $10,%edi
+ addl 76(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,40(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 659060556(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 48(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 36(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 44(%esp),%ebx
+ shrl $10,%edi
+ addl 80(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,44(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 883997877(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 52(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl 40(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 48(%esp),%ebx
+ shrl $10,%edi
+ addl 84(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,48(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 958139571(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 56(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 44(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 52(%esp),%ebx
+ shrl $10,%edi
+ addl 88(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,52(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1322822218(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 60(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 48(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 56(%esp),%ebx
+ shrl $10,%edi
+ addl 92(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,56(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1537002063(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 64(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 52(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 60(%esp),%ebx
+ shrl $10,%edi
+ addl 32(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,60(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1747873779(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 68(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 56(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 64(%esp),%ebx
+ shrl $10,%edi
+ addl 36(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,64(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1955562222(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 72(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 60(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 68(%esp),%ebx
+ shrl $10,%edi
+ addl 40(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,68(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2024104815(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 76(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 64(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 72(%esp),%ebx
+ shrl $10,%edi
+ addl 44(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,72(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2227730452(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 80(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 68(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 76(%esp),%ebx
+ shrl $10,%edi
+ addl 48(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,76(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2361852424(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 84(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl 72(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 80(%esp),%ebx
+ shrl $10,%edi
+ addl 52(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,80(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2428436474(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 88(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 76(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 84(%esp),%ebx
+ shrl $10,%edi
+ addl 56(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,84(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2756734187(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 92(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 80(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 88(%esp),%ebx
+ shrl $10,%edi
+ addl 60(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3204031479(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 32(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 84(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 92(%esp),%ebx
+ shrl $10,%edi
+ addl 64(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3329325298(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 96(%esp),%esi
+ xorl %edi,%ebp
+ movl 12(%esp),%ecx
+ addl (%esi),%eax
+ addl 4(%esi),%ebp
+ addl 8(%esi),%edi
+ addl 12(%esi),%ecx
+ movl %eax,(%esi)
+ movl %ebp,4(%esi)
+ movl %edi,8(%esi)
+ movl %ecx,12(%esi)
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ movl %edi,8(%esp)
+ movl %ecx,12(%esp)
+ movl 20(%esp),%edi
+ movl 24(%esp),%ebx
+ movl 28(%esp),%ecx
+ addl 16(%esi),%edx
+ addl 20(%esi),%edi
+ addl 24(%esi),%ebx
+ addl 28(%esi),%ecx
+ movl %edx,16(%esi)
+ movl %edi,20(%esi)
+ movl %ebx,24(%esi)
+ movl %ecx,28(%esi)
+ movl %edi,20(%esp)
+ movl 100(%esp),%edi
+ movl %ebx,24(%esp)
+ movl %ecx,28(%esp)
+ cmpl 104(%esp),%edi
+ jb .L009grand_loop
+ movl 108(%esp),%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.comm __gnutls_x86_cpuid_s,16
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/coff/sha512-ssse3-x86.s b/lib/accelerated/x86/coff/sha512-ssse3-x86.s
new file mode 100644
index 0000000000..79098da5c2
--- /dev/null
+++ b/lib/accelerated/x86/coff/sha512-ssse3-x86.s
@@ -0,0 +1,605 @@
+# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain copyright notices,
+# this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+# * Neither the name of the Andy Polyakov nor the names of its
+# copyright holder and contributors may be used to endorse or
+# promote products derived from this software without specific
+# prior written permission.
+#
+# ALTERNATIVELY, provided that this notice is retained in full, this
+# product may be distributed under the terms of the GNU General Public
+# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+# those given above.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *** This file is auto-generated ***
+#
+.file "sha512-586.s"
+.text
+.globl _sha512_block_data_order
+.def _sha512_block_data_order; .scl 2; .type 32; .endef
+.align 16
+_sha512_block_data_order:
+.L_sha512_block_data_order_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 20(%esp),%esi
+ movl 24(%esp),%edi
+ movl 28(%esp),%eax
+ movl %esp,%ebx
+ call .L000pic_point
+.L000pic_point:
+ popl %ebp
+ leal .L001K512-.L000pic_point(%ebp),%ebp
+ subl $16,%esp
+ andl $-64,%esp
+ shll $7,%eax
+ addl %edi,%eax
+ movl %esi,(%esp)
+ movl %edi,4(%esp)
+ movl %eax,8(%esp)
+ movl %ebx,12(%esp)
+.align 16
+.L002loop_x86:
+ movl (%edi),%eax
+ movl 4(%edi),%ebx
+ movl 8(%edi),%ecx
+ movl 12(%edi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+ movl 16(%edi),%eax
+ movl 20(%edi),%ebx
+ movl 24(%edi),%ecx
+ movl 28(%edi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+ movl 32(%edi),%eax
+ movl 36(%edi),%ebx
+ movl 40(%edi),%ecx
+ movl 44(%edi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+ movl 48(%edi),%eax
+ movl 52(%edi),%ebx
+ movl 56(%edi),%ecx
+ movl 60(%edi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+ movl 64(%edi),%eax
+ movl 68(%edi),%ebx
+ movl 72(%edi),%ecx
+ movl 76(%edi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+ movl 80(%edi),%eax
+ movl 84(%edi),%ebx
+ movl 88(%edi),%ecx
+ movl 92(%edi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+ movl 96(%edi),%eax
+ movl 100(%edi),%ebx
+ movl 104(%edi),%ecx
+ movl 108(%edi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+ movl 112(%edi),%eax
+ movl 116(%edi),%ebx
+ movl 120(%edi),%ecx
+ movl 124(%edi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+ addl $128,%edi
+ subl $72,%esp
+ movl %edi,204(%esp)
+ leal 8(%esp),%edi
+ movl $16,%ecx
+.long 2784229001
+.align 16
+.L00300_15_x86:
+ movl 40(%esp),%ecx
+ movl 44(%esp),%edx
+ movl %ecx,%esi
+ shrl $9,%ecx
+ movl %edx,%edi
+ shrl $9,%edx
+ movl %ecx,%ebx
+ shll $14,%esi
+ movl %edx,%eax
+ shll $14,%edi
+ xorl %esi,%ebx
+ shrl $5,%ecx
+ xorl %edi,%eax
+ shrl $5,%edx
+ xorl %ecx,%eax
+ shll $4,%esi
+ xorl %edx,%ebx
+ shll $4,%edi
+ xorl %esi,%ebx
+ shrl $4,%ecx
+ xorl %edi,%eax
+ shrl $4,%edx
+ xorl %ecx,%eax
+ shll $5,%esi
+ xorl %edx,%ebx
+ shll $5,%edi
+ xorl %esi,%eax
+ xorl %edi,%ebx
+ movl 48(%esp),%ecx
+ movl 52(%esp),%edx
+ movl 56(%esp),%esi
+ movl 60(%esp),%edi
+ addl 64(%esp),%eax
+ adcl 68(%esp),%ebx
+ xorl %esi,%ecx
+ xorl %edi,%edx
+ andl 40(%esp),%ecx
+ andl 44(%esp),%edx
+ addl 192(%esp),%eax
+ adcl 196(%esp),%ebx
+ xorl %esi,%ecx
+ xorl %edi,%edx
+ movl (%ebp),%esi
+ movl 4(%ebp),%edi
+ addl %ecx,%eax
+ adcl %edx,%ebx
+ movl 32(%esp),%ecx
+ movl 36(%esp),%edx
+ addl %esi,%eax
+ adcl %edi,%ebx
+ movl %eax,(%esp)
+ movl %ebx,4(%esp)
+ addl %ecx,%eax
+ adcl %edx,%ebx
+ movl 8(%esp),%ecx
+ movl 12(%esp),%edx
+ movl %eax,32(%esp)
+ movl %ebx,36(%esp)
+ movl %ecx,%esi
+ shrl $2,%ecx
+ movl %edx,%edi
+ shrl $2,%edx
+ movl %ecx,%ebx
+ shll $4,%esi
+ movl %edx,%eax
+ shll $4,%edi
+ xorl %esi,%ebx
+ shrl $5,%ecx
+ xorl %edi,%eax
+ shrl $5,%edx
+ xorl %ecx,%ebx
+ shll $21,%esi
+ xorl %edx,%eax
+ shll $21,%edi
+ xorl %esi,%eax
+ shrl $21,%ecx
+ xorl %edi,%ebx
+ shrl $21,%edx
+ xorl %ecx,%eax
+ shll $5,%esi
+ xorl %edx,%ebx
+ shll $5,%edi
+ xorl %esi,%eax
+ xorl %edi,%ebx
+ movl 8(%esp),%ecx
+ movl 12(%esp),%edx
+ movl 16(%esp),%esi
+ movl 20(%esp),%edi
+ addl (%esp),%eax
+ adcl 4(%esp),%ebx
+ orl %esi,%ecx
+ orl %edi,%edx
+ andl 24(%esp),%ecx
+ andl 28(%esp),%edx
+ andl 8(%esp),%esi
+ andl 12(%esp),%edi
+ orl %esi,%ecx
+ orl %edi,%edx
+ addl %ecx,%eax
+ adcl %edx,%ebx
+ movl %eax,(%esp)
+ movl %ebx,4(%esp)
+ movb (%ebp),%dl
+ subl $8,%esp
+ leal 8(%ebp),%ebp
+ cmpb $148,%dl
+ jne .L00300_15_x86
+.align 16
+.L00416_79_x86:
+ movl 312(%esp),%ecx
+ movl 316(%esp),%edx
+ movl %ecx,%esi
+ shrl $1,%ecx
+ movl %edx,%edi
+ shrl $1,%edx
+ movl %ecx,%eax
+ shll $24,%esi
+ movl %edx,%ebx
+ shll $24,%edi
+ xorl %esi,%ebx
+ shrl $6,%ecx
+ xorl %edi,%eax
+ shrl $6,%edx
+ xorl %ecx,%eax
+ shll $7,%esi
+ xorl %edx,%ebx
+ shll $1,%edi
+ xorl %esi,%ebx
+ shrl $1,%ecx
+ xorl %edi,%eax
+ shrl $1,%edx
+ xorl %ecx,%eax
+ shll $6,%edi
+ xorl %edx,%ebx
+ xorl %edi,%eax
+ movl %eax,(%esp)
+ movl %ebx,4(%esp)
+ movl 208(%esp),%ecx
+ movl 212(%esp),%edx
+ movl %ecx,%esi
+ shrl $6,%ecx
+ movl %edx,%edi
+ shrl $6,%edx
+ movl %ecx,%eax
+ shll $3,%esi
+ movl %edx,%ebx
+ shll $3,%edi
+ xorl %esi,%eax
+ shrl $13,%ecx
+ xorl %edi,%ebx
+ shrl $13,%edx
+ xorl %ecx,%eax
+ shll $10,%esi
+ xorl %edx,%ebx
+ shll $10,%edi
+ xorl %esi,%ebx
+ shrl $10,%ecx
+ xorl %edi,%eax
+ shrl $10,%edx
+ xorl %ecx,%ebx
+ shll $13,%edi
+ xorl %edx,%eax
+ xorl %edi,%eax
+ movl 320(%esp),%ecx
+ movl 324(%esp),%edx
+ addl (%esp),%eax
+ adcl 4(%esp),%ebx
+ movl 248(%esp),%esi
+ movl 252(%esp),%edi
+ addl %ecx,%eax
+ adcl %edx,%ebx
+ addl %esi,%eax
+ adcl %edi,%ebx
+ movl %eax,192(%esp)
+ movl %ebx,196(%esp)
+ movl 40(%esp),%ecx
+ movl 44(%esp),%edx
+ movl %ecx,%esi
+ shrl $9,%ecx
+ movl %edx,%edi
+ shrl $9,%edx
+ movl %ecx,%ebx
+ shll $14,%esi
+ movl %edx,%eax
+ shll $14,%edi
+ xorl %esi,%ebx
+ shrl $5,%ecx
+ xorl %edi,%eax
+ shrl $5,%edx
+ xorl %ecx,%eax
+ shll $4,%esi
+ xorl %edx,%ebx
+ shll $4,%edi
+ xorl %esi,%ebx
+ shrl $4,%ecx
+ xorl %edi,%eax
+ shrl $4,%edx
+ xorl %ecx,%eax
+ shll $5,%esi
+ xorl %edx,%ebx
+ shll $5,%edi
+ xorl %esi,%eax
+ xorl %edi,%ebx
+ movl 48(%esp),%ecx
+ movl 52(%esp),%edx
+ movl 56(%esp),%esi
+ movl 60(%esp),%edi
+ addl 64(%esp),%eax
+ adcl 68(%esp),%ebx
+ xorl %esi,%ecx
+ xorl %edi,%edx
+ andl 40(%esp),%ecx
+ andl 44(%esp),%edx
+ addl 192(%esp),%eax
+ adcl 196(%esp),%ebx
+ xorl %esi,%ecx
+ xorl %edi,%edx
+ movl (%ebp),%esi
+ movl 4(%ebp),%edi
+ addl %ecx,%eax
+ adcl %edx,%ebx
+ movl 32(%esp),%ecx
+ movl 36(%esp),%edx
+ addl %esi,%eax
+ adcl %edi,%ebx
+ movl %eax,(%esp)
+ movl %ebx,4(%esp)
+ addl %ecx,%eax
+ adcl %edx,%ebx
+ movl 8(%esp),%ecx
+ movl 12(%esp),%edx
+ movl %eax,32(%esp)
+ movl %ebx,36(%esp)
+ movl %ecx,%esi
+ shrl $2,%ecx
+ movl %edx,%edi
+ shrl $2,%edx
+ movl %ecx,%ebx
+ shll $4,%esi
+ movl %edx,%eax
+ shll $4,%edi
+ xorl %esi,%ebx
+ shrl $5,%ecx
+ xorl %edi,%eax
+ shrl $5,%edx
+ xorl %ecx,%ebx
+ shll $21,%esi
+ xorl %edx,%eax
+ shll $21,%edi
+ xorl %esi,%eax
+ shrl $21,%ecx
+ xorl %edi,%ebx
+ shrl $21,%edx
+ xorl %ecx,%eax
+ shll $5,%esi
+ xorl %edx,%ebx
+ shll $5,%edi
+ xorl %esi,%eax
+ xorl %edi,%ebx
+ movl 8(%esp),%ecx
+ movl 12(%esp),%edx
+ movl 16(%esp),%esi
+ movl 20(%esp),%edi
+ addl (%esp),%eax
+ adcl 4(%esp),%ebx
+ orl %esi,%ecx
+ orl %edi,%edx
+ andl 24(%esp),%ecx
+ andl 28(%esp),%edx
+ andl 8(%esp),%esi
+ andl 12(%esp),%edi
+ orl %esi,%ecx
+ orl %edi,%edx
+ addl %ecx,%eax
+ adcl %edx,%ebx
+ movl %eax,(%esp)
+ movl %ebx,4(%esp)
+ movb (%ebp),%dl
+ subl $8,%esp
+ leal 8(%ebp),%ebp
+ cmpb $23,%dl
+ jne .L00416_79_x86
+ movl 840(%esp),%esi
+ movl 844(%esp),%edi
+ movl (%esi),%eax
+ movl 4(%esi),%ebx
+ movl 8(%esi),%ecx
+ movl 12(%esi),%edx
+ addl 8(%esp),%eax
+ adcl 12(%esp),%ebx
+ movl %eax,(%esi)
+ movl %ebx,4(%esi)
+ addl 16(%esp),%ecx
+ adcl 20(%esp),%edx
+ movl %ecx,8(%esi)
+ movl %edx,12(%esi)
+ movl 16(%esi),%eax
+ movl 20(%esi),%ebx
+ movl 24(%esi),%ecx
+ movl 28(%esi),%edx
+ addl 24(%esp),%eax
+ adcl 28(%esp),%ebx
+ movl %eax,16(%esi)
+ movl %ebx,20(%esi)
+ addl 32(%esp),%ecx
+ adcl 36(%esp),%edx
+ movl %ecx,24(%esi)
+ movl %edx,28(%esi)
+ movl 32(%esi),%eax
+ movl 36(%esi),%ebx
+ movl 40(%esi),%ecx
+ movl 44(%esi),%edx
+ addl 40(%esp),%eax
+ adcl 44(%esp),%ebx
+ movl %eax,32(%esi)
+ movl %ebx,36(%esi)
+ addl 48(%esp),%ecx
+ adcl 52(%esp),%edx
+ movl %ecx,40(%esi)
+ movl %edx,44(%esi)
+ movl 48(%esi),%eax
+ movl 52(%esi),%ebx
+ movl 56(%esi),%ecx
+ movl 60(%esi),%edx
+ addl 56(%esp),%eax
+ adcl 60(%esp),%ebx
+ movl %eax,48(%esi)
+ movl %ebx,52(%esi)
+ addl 64(%esp),%ecx
+ adcl 68(%esp),%edx
+ movl %ecx,56(%esi)
+ movl %edx,60(%esi)
+ addl $840,%esp
+ subl $640,%ebp
+ cmpl 8(%esp),%edi
+ jb .L002loop_x86
+ movl 12(%esp),%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.align 64
+.L001K512:
+.long 3609767458,1116352408
+.long 602891725,1899447441
+.long 3964484399,3049323471
+.long 2173295548,3921009573
+.long 4081628472,961987163
+.long 3053834265,1508970993
+.long 2937671579,2453635748
+.long 3664609560,2870763221
+.long 2734883394,3624381080
+.long 1164996542,310598401
+.long 1323610764,607225278
+.long 3590304994,1426881987
+.long 4068182383,1925078388
+.long 991336113,2162078206
+.long 633803317,2614888103
+.long 3479774868,3248222580
+.long 2666613458,3835390401
+.long 944711139,4022224774
+.long 2341262773,264347078
+.long 2007800933,604807628
+.long 1495990901,770255983
+.long 1856431235,1249150122
+.long 3175218132,1555081692
+.long 2198950837,1996064986
+.long 3999719339,2554220882
+.long 766784016,2821834349
+.long 2566594879,2952996808
+.long 3203337956,3210313671
+.long 1034457026,3336571891
+.long 2466948901,3584528711
+.long 3758326383,113926993
+.long 168717936,338241895
+.long 1188179964,666307205
+.long 1546045734,773529912
+.long 1522805485,1294757372
+.long 2643833823,1396182291
+.long 2343527390,1695183700
+.long 1014477480,1986661051
+.long 1206759142,2177026350
+.long 344077627,2456956037
+.long 1290863460,2730485921
+.long 3158454273,2820302411
+.long 3505952657,3259730800
+.long 106217008,3345764771
+.long 3606008344,3516065817
+.long 1432725776,3600352804
+.long 1467031594,4094571909
+.long 851169720,275423344
+.long 3100823752,430227734
+.long 1363258195,506948616
+.long 3750685593,659060556
+.long 3785050280,883997877
+.long 3318307427,958139571
+.long 3812723403,1322822218
+.long 2003034995,1537002063
+.long 3602036899,1747873779
+.long 1575990012,1955562222
+.long 1125592928,2024104815
+.long 2716904306,2227730452
+.long 442776044,2361852424
+.long 593698344,2428436474
+.long 3733110249,2756734187
+.long 2999351573,3204031479
+.long 3815920427,3329325298
+.long 3928383900,3391569614
+.long 566280711,3515267271
+.long 3454069534,3940187606
+.long 4000239992,4118630271
+.long 1914138554,116418474
+.long 2731055270,174292421
+.long 3203993006,289380356
+.long 320620315,460393269
+.long 587496836,685471733
+.long 1086792851,852142971
+.long 365543100,1017036298
+.long 2618297676,1126000580
+.long 3409855158,1288033470
+.long 4234509866,1501505948
+.long 987167468,1607167915
+.long 1246189591,1816402316
+.long 67438087,66051
+.long 202182159,134810123
+.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97
+.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32
+.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
+.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
+.byte 62,0
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/coff/sha512-ssse3-x86_64.s b/lib/accelerated/x86/coff/sha512-ssse3-x86_64.s
new file mode 100644
index 0000000000..bbb2661f26
--- /dev/null
+++ b/lib/accelerated/x86/coff/sha512-ssse3-x86_64.s
@@ -0,0 +1,3025 @@
+# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain copyright notices,
+# this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+# * Neither the name of the Andy Polyakov nor the names of its
+# copyright holder and contributors may be used to endorse or
+# promote products derived from this software without specific
+# prior written permission.
+#
+# ALTERNATIVELY, provided that this notice is retained in full, this
+# product may be distributed under the terms of the GNU General Public
+# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+# those given above.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *** This file is auto-generated ***
+#
+.text
+
+
+.globl sha256_block_data_order
+.def sha256_block_data_order; .scl 2; .type 32; .endef
+.p2align 4
+sha256_block_data_order:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_sha256_block_data_order:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+
+ leaq _gnutls_x86_cpuid_s(%rip),%r11
+ movl 0(%r11),%r9d
+ movl 4(%r11),%r10d
+ movl 8(%r11),%r11d
+ testl $512,%r10d
+ jnz .Lssse3_shortcut
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ movq %rsp,%r11
+ shlq $4,%rdx
+ subq $64+32,%rsp
+ leaq (%rsi,%rdx,4),%rdx
+ andq $-64,%rsp
+ movq %rdi,64+0(%rsp)
+ movq %rsi,64+8(%rsp)
+ movq %rdx,64+16(%rsp)
+ movq %r11,64+24(%rsp)
+.Lprologue:
+
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+ movl 16(%rdi),%r8d
+ movl 20(%rdi),%r9d
+ movl 24(%rdi),%r10d
+ movl 28(%rdi),%r11d
+ jmp .Lloop
+
+.p2align 4
+.Lloop:
+ movl %ebx,%edi
+ leaq K256(%rip),%rbp
+ xorl %ecx,%edi
+ movl 0(%rsi),%r12d
+ movl %r8d,%r13d
+ movl %eax,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+
+ xorl %r8d,%r13d
+ rorl $9,%r14d
+ xorl %r10d,%r15d
+
+ movl %r12d,0(%rsp)
+ xorl %eax,%r14d
+ andl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %r10d,%r15d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ addl %r15d,%r12d
+
+ movl %eax,%r15d
+ addl (%rbp),%r12d
+ xorl %eax,%r14d
+
+ xorl %ebx,%r15d
+ rorl $6,%r13d
+ movl %ebx,%r11d
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%r11d
+ addl %r12d,%edx
+ addl %r12d,%r11d
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%r11d
+ movl 4(%rsi),%r12d
+ movl %edx,%r13d
+ movl %r11d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r8d,%edi
+
+ xorl %edx,%r13d
+ rorl $9,%r14d
+ xorl %r9d,%edi
+
+ movl %r12d,4(%rsp)
+ xorl %r11d,%r14d
+ andl %edx,%edi
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r9d,%edi
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ addl %edi,%r12d
+
+ movl %r11d,%edi
+ addl (%rbp),%r12d
+ xorl %r11d,%r14d
+
+ xorl %eax,%edi
+ rorl $6,%r13d
+ movl %eax,%r10d
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%r10d
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%r10d
+ movl 8(%rsi),%r12d
+ movl %ecx,%r13d
+ movl %r10d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %edx,%r15d
+
+ xorl %ecx,%r13d
+ rorl $9,%r14d
+ xorl %r8d,%r15d
+
+ movl %r12d,8(%rsp)
+ xorl %r10d,%r14d
+ andl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r8d,%r15d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ addl %r15d,%r12d
+
+ movl %r10d,%r15d
+ addl (%rbp),%r12d
+ xorl %r10d,%r14d
+
+ xorl %r11d,%r15d
+ rorl $6,%r13d
+ movl %r11d,%r9d
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%r9d
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%r9d
+ movl 12(%rsi),%r12d
+ movl %ebx,%r13d
+ movl %r9d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ecx,%edi
+
+ xorl %ebx,%r13d
+ rorl $9,%r14d
+ xorl %edx,%edi
+
+ movl %r12d,12(%rsp)
+ xorl %r9d,%r14d
+ andl %ebx,%edi
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %edx,%edi
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ addl %edi,%r12d
+
+ movl %r9d,%edi
+ addl (%rbp),%r12d
+ xorl %r9d,%r14d
+
+ xorl %r10d,%edi
+ rorl $6,%r13d
+ movl %r10d,%r8d
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%r8d
+ addl %r12d,%eax
+ addl %r12d,%r8d
+
+ leaq 20(%rbp),%rbp
+ addl %r14d,%r8d
+ movl 16(%rsi),%r12d
+ movl %eax,%r13d
+ movl %r8d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+
+ xorl %eax,%r13d
+ rorl $9,%r14d
+ xorl %ecx,%r15d
+
+ movl %r12d,16(%rsp)
+ xorl %r8d,%r14d
+ andl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %ecx,%r15d
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ addl %r15d,%r12d
+
+ movl %r8d,%r15d
+ addl (%rbp),%r12d
+ xorl %r8d,%r14d
+
+ xorl %r9d,%r15d
+ rorl $6,%r13d
+ movl %r9d,%edx
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%edx
+ addl %r12d,%r11d
+ addl %r12d,%edx
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%edx
+ movl 20(%rsi),%r12d
+ movl %r11d,%r13d
+ movl %edx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %eax,%edi
+
+ xorl %r11d,%r13d
+ rorl $9,%r14d
+ xorl %ebx,%edi
+
+ movl %r12d,20(%rsp)
+ xorl %edx,%r14d
+ andl %r11d,%edi
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %ebx,%edi
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ addl %edi,%r12d
+
+ movl %edx,%edi
+ addl (%rbp),%r12d
+ xorl %edx,%r14d
+
+ xorl %r8d,%edi
+ rorl $6,%r13d
+ movl %r8d,%ecx
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%ecx
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%ecx
+ movl 24(%rsi),%r12d
+ movl %r10d,%r13d
+ movl %ecx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+
+ xorl %r10d,%r13d
+ rorl $9,%r14d
+ xorl %eax,%r15d
+
+ movl %r12d,24(%rsp)
+ xorl %ecx,%r14d
+ andl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %eax,%r15d
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ addl %r15d,%r12d
+
+ movl %ecx,%r15d
+ addl (%rbp),%r12d
+ xorl %ecx,%r14d
+
+ xorl %edx,%r15d
+ rorl $6,%r13d
+ movl %edx,%ebx
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%ebx
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%ebx
+ movl 28(%rsi),%r12d
+ movl %r9d,%r13d
+ movl %ebx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r10d,%edi
+
+ xorl %r9d,%r13d
+ rorl $9,%r14d
+ xorl %r11d,%edi
+
+ movl %r12d,28(%rsp)
+ xorl %ebx,%r14d
+ andl %r9d,%edi
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %r11d,%edi
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ addl %edi,%r12d
+
+ movl %ebx,%edi
+ addl (%rbp),%r12d
+ xorl %ebx,%r14d
+
+ xorl %ecx,%edi
+ rorl $6,%r13d
+ movl %ecx,%eax
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%eax
+ addl %r12d,%r8d
+ addl %r12d,%eax
+
+ leaq 20(%rbp),%rbp
+ addl %r14d,%eax
+ movl 32(%rsi),%r12d
+ movl %r8d,%r13d
+ movl %eax,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+
+ xorl %r8d,%r13d
+ rorl $9,%r14d
+ xorl %r10d,%r15d
+
+ movl %r12d,32(%rsp)
+ xorl %eax,%r14d
+ andl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %r10d,%r15d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ addl %r15d,%r12d
+
+ movl %eax,%r15d
+ addl (%rbp),%r12d
+ xorl %eax,%r14d
+
+ xorl %ebx,%r15d
+ rorl $6,%r13d
+ movl %ebx,%r11d
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%r11d
+ addl %r12d,%edx
+ addl %r12d,%r11d
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%r11d
+ movl 36(%rsi),%r12d
+ movl %edx,%r13d
+ movl %r11d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r8d,%edi
+
+ xorl %edx,%r13d
+ rorl $9,%r14d
+ xorl %r9d,%edi
+
+ movl %r12d,36(%rsp)
+ xorl %r11d,%r14d
+ andl %edx,%edi
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r9d,%edi
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ addl %edi,%r12d
+
+ movl %r11d,%edi
+ addl (%rbp),%r12d
+ xorl %r11d,%r14d
+
+ xorl %eax,%edi
+ rorl $6,%r13d
+ movl %eax,%r10d
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%r10d
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%r10d
+ movl 40(%rsi),%r12d
+ movl %ecx,%r13d
+ movl %r10d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %edx,%r15d
+
+ xorl %ecx,%r13d
+ rorl $9,%r14d
+ xorl %r8d,%r15d
+
+ movl %r12d,40(%rsp)
+ xorl %r10d,%r14d
+ andl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r8d,%r15d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ addl %r15d,%r12d
+
+ movl %r10d,%r15d
+ addl (%rbp),%r12d
+ xorl %r10d,%r14d
+
+ xorl %r11d,%r15d
+ rorl $6,%r13d
+ movl %r11d,%r9d
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%r9d
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%r9d
+ movl 44(%rsi),%r12d
+ movl %ebx,%r13d
+ movl %r9d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ecx,%edi
+
+ xorl %ebx,%r13d
+ rorl $9,%r14d
+ xorl %edx,%edi
+
+ movl %r12d,44(%rsp)
+ xorl %r9d,%r14d
+ andl %ebx,%edi
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %edx,%edi
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ addl %edi,%r12d
+
+ movl %r9d,%edi
+ addl (%rbp),%r12d
+ xorl %r9d,%r14d
+
+ xorl %r10d,%edi
+ rorl $6,%r13d
+ movl %r10d,%r8d
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%r8d
+ addl %r12d,%eax
+ addl %r12d,%r8d
+
+ leaq 20(%rbp),%rbp
+ addl %r14d,%r8d
+ movl 48(%rsi),%r12d
+ movl %eax,%r13d
+ movl %r8d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+
+ xorl %eax,%r13d
+ rorl $9,%r14d
+ xorl %ecx,%r15d
+
+ movl %r12d,48(%rsp)
+ xorl %r8d,%r14d
+ andl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %ecx,%r15d
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ addl %r15d,%r12d
+
+ movl %r8d,%r15d
+ addl (%rbp),%r12d
+ xorl %r8d,%r14d
+
+ xorl %r9d,%r15d
+ rorl $6,%r13d
+ movl %r9d,%edx
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%edx
+ addl %r12d,%r11d
+ addl %r12d,%edx
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%edx
+ movl 52(%rsi),%r12d
+ movl %r11d,%r13d
+ movl %edx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %eax,%edi
+
+ xorl %r11d,%r13d
+ rorl $9,%r14d
+ xorl %ebx,%edi
+
+ movl %r12d,52(%rsp)
+ xorl %edx,%r14d
+ andl %r11d,%edi
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %ebx,%edi
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ addl %edi,%r12d
+
+ movl %edx,%edi
+ addl (%rbp),%r12d
+ xorl %edx,%r14d
+
+ xorl %r8d,%edi
+ rorl $6,%r13d
+ movl %r8d,%ecx
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%ecx
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%ecx
+ movl 56(%rsi),%r12d
+ movl %r10d,%r13d
+ movl %ecx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+
+ xorl %r10d,%r13d
+ rorl $9,%r14d
+ xorl %eax,%r15d
+
+ movl %r12d,56(%rsp)
+ xorl %ecx,%r14d
+ andl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %eax,%r15d
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ addl %r15d,%r12d
+
+ movl %ecx,%r15d
+ addl (%rbp),%r12d
+ xorl %ecx,%r14d
+
+ xorl %edx,%r15d
+ rorl $6,%r13d
+ movl %edx,%ebx
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%ebx
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%ebx
+ movl 60(%rsi),%r12d
+ movl %r9d,%r13d
+ movl %ebx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r10d,%edi
+
+ xorl %r9d,%r13d
+ rorl $9,%r14d
+ xorl %r11d,%edi
+
+ movl %r12d,60(%rsp)
+ xorl %ebx,%r14d
+ andl %r9d,%edi
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %r11d,%edi
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ addl %edi,%r12d
+
+ movl %ebx,%edi
+ addl (%rbp),%r12d
+ xorl %ebx,%r14d
+
+ xorl %ecx,%edi
+ rorl $6,%r13d
+ movl %ecx,%eax
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%eax
+ addl %r12d,%r8d
+ addl %r12d,%eax
+
+ leaq 20(%rbp),%rbp
+ jmp .Lrounds_16_xx
+.p2align 4
+.Lrounds_16_xx:
+ movl 4(%rsp),%r13d
+ movl 56(%rsp),%r15d
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%eax
+ movl %r15d,%r14d
+ rorl $2,%r15d
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ xorl %r13d,%r12d
+ xorl %r14d,%r15d
+ addl 36(%rsp),%r12d
+
+ addl 0(%rsp),%r12d
+ movl %r8d,%r13d
+ addl %r15d,%r12d
+ movl %eax,%r14d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+
+ xorl %r8d,%r13d
+ rorl $9,%r14d
+ xorl %r10d,%r15d
+
+ movl %r12d,0(%rsp)
+ xorl %eax,%r14d
+ andl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %r10d,%r15d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ addl %r15d,%r12d
+
+ movl %eax,%r15d
+ addl (%rbp),%r12d
+ xorl %eax,%r14d
+
+ xorl %ebx,%r15d
+ rorl $6,%r13d
+ movl %ebx,%r11d
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%r11d
+ addl %r12d,%edx
+ addl %r12d,%r11d
+
+ leaq 4(%rbp),%rbp
+ movl 8(%rsp),%r13d
+ movl 60(%rsp),%edi
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%r11d
+ movl %edi,%r14d
+ rorl $2,%edi
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%edi
+ shrl $10,%r14d
+
+ rorl $17,%edi
+ xorl %r13d,%r12d
+ xorl %r14d,%edi
+ addl 40(%rsp),%r12d
+
+ addl 4(%rsp),%r12d
+ movl %edx,%r13d
+ addl %edi,%r12d
+ movl %r11d,%r14d
+ rorl $14,%r13d
+ movl %r8d,%edi
+
+ xorl %edx,%r13d
+ rorl $9,%r14d
+ xorl %r9d,%edi
+
+ movl %r12d,4(%rsp)
+ xorl %r11d,%r14d
+ andl %edx,%edi
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r9d,%edi
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ addl %edi,%r12d
+
+ movl %r11d,%edi
+ addl (%rbp),%r12d
+ xorl %r11d,%r14d
+
+ xorl %eax,%edi
+ rorl $6,%r13d
+ movl %eax,%r10d
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%r10d
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+
+ leaq 4(%rbp),%rbp
+ movl 12(%rsp),%r13d
+ movl 0(%rsp),%r15d
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%r10d
+ movl %r15d,%r14d
+ rorl $2,%r15d
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ xorl %r13d,%r12d
+ xorl %r14d,%r15d
+ addl 44(%rsp),%r12d
+
+ addl 8(%rsp),%r12d
+ movl %ecx,%r13d
+ addl %r15d,%r12d
+ movl %r10d,%r14d
+ rorl $14,%r13d
+ movl %edx,%r15d
+
+ xorl %ecx,%r13d
+ rorl $9,%r14d
+ xorl %r8d,%r15d
+
+ movl %r12d,8(%rsp)
+ xorl %r10d,%r14d
+ andl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r8d,%r15d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ addl %r15d,%r12d
+
+ movl %r10d,%r15d
+ addl (%rbp),%r12d
+ xorl %r10d,%r14d
+
+ xorl %r11d,%r15d
+ rorl $6,%r13d
+ movl %r11d,%r9d
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%r9d
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+
+ leaq 4(%rbp),%rbp
+ movl 16(%rsp),%r13d
+ movl 4(%rsp),%edi
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%r9d
+ movl %edi,%r14d
+ rorl $2,%edi
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%edi
+ shrl $10,%r14d
+
+ rorl $17,%edi
+ xorl %r13d,%r12d
+ xorl %r14d,%edi
+ addl 48(%rsp),%r12d
+
+ addl 12(%rsp),%r12d
+ movl %ebx,%r13d
+ addl %edi,%r12d
+ movl %r9d,%r14d
+ rorl $14,%r13d
+ movl %ecx,%edi
+
+ xorl %ebx,%r13d
+ rorl $9,%r14d
+ xorl %edx,%edi
+
+ movl %r12d,12(%rsp)
+ xorl %r9d,%r14d
+ andl %ebx,%edi
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %edx,%edi
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ addl %edi,%r12d
+
+ movl %r9d,%edi
+ addl (%rbp),%r12d
+ xorl %r9d,%r14d
+
+ xorl %r10d,%edi
+ rorl $6,%r13d
+ movl %r10d,%r8d
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%r8d
+ addl %r12d,%eax
+ addl %r12d,%r8d
+
+ leaq 20(%rbp),%rbp
+ movl 20(%rsp),%r13d
+ movl 8(%rsp),%r15d
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%r8d
+ movl %r15d,%r14d
+ rorl $2,%r15d
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ xorl %r13d,%r12d
+ xorl %r14d,%r15d
+ addl 52(%rsp),%r12d
+
+ addl 16(%rsp),%r12d
+ movl %eax,%r13d
+ addl %r15d,%r12d
+ movl %r8d,%r14d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+
+ xorl %eax,%r13d
+ rorl $9,%r14d
+ xorl %ecx,%r15d
+
+ movl %r12d,16(%rsp)
+ xorl %r8d,%r14d
+ andl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %ecx,%r15d
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ addl %r15d,%r12d
+
+ movl %r8d,%r15d
+ addl (%rbp),%r12d
+ xorl %r8d,%r14d
+
+ xorl %r9d,%r15d
+ rorl $6,%r13d
+ movl %r9d,%edx
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%edx
+ addl %r12d,%r11d
+ addl %r12d,%edx
+
+ leaq 4(%rbp),%rbp
+ movl 24(%rsp),%r13d
+ movl 12(%rsp),%edi
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%edx
+ movl %edi,%r14d
+ rorl $2,%edi
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%edi
+ shrl $10,%r14d
+
+ rorl $17,%edi
+ xorl %r13d,%r12d
+ xorl %r14d,%edi
+ addl 56(%rsp),%r12d
+
+ addl 20(%rsp),%r12d
+ movl %r11d,%r13d
+ addl %edi,%r12d
+ movl %edx,%r14d
+ rorl $14,%r13d
+ movl %eax,%edi
+
+ xorl %r11d,%r13d
+ rorl $9,%r14d
+ xorl %ebx,%edi
+
+ movl %r12d,20(%rsp)
+ xorl %edx,%r14d
+ andl %r11d,%edi
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %ebx,%edi
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ addl %edi,%r12d
+
+ movl %edx,%edi
+ addl (%rbp),%r12d
+ xorl %edx,%r14d
+
+ xorl %r8d,%edi
+ rorl $6,%r13d
+ movl %r8d,%ecx
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%ecx
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+
+ leaq 4(%rbp),%rbp
+ movl 28(%rsp),%r13d
+ movl 16(%rsp),%r15d
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%ecx
+ movl %r15d,%r14d
+ rorl $2,%r15d
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ xorl %r13d,%r12d
+ xorl %r14d,%r15d
+ addl 60(%rsp),%r12d
+
+ addl 24(%rsp),%r12d
+ movl %r10d,%r13d
+ addl %r15d,%r12d
+ movl %ecx,%r14d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+
+ xorl %r10d,%r13d
+ rorl $9,%r14d
+ xorl %eax,%r15d
+
+ movl %r12d,24(%rsp)
+ xorl %ecx,%r14d
+ andl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %eax,%r15d
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ addl %r15d,%r12d
+
+ movl %ecx,%r15d
+ addl (%rbp),%r12d
+ xorl %ecx,%r14d
+
+ xorl %edx,%r15d
+ rorl $6,%r13d
+ movl %edx,%ebx
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%ebx
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+
+ leaq 4(%rbp),%rbp
+ movl 32(%rsp),%r13d
+ movl 20(%rsp),%edi
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%ebx
+ movl %edi,%r14d
+ rorl $2,%edi
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%edi
+ shrl $10,%r14d
+
+ rorl $17,%edi
+ xorl %r13d,%r12d
+ xorl %r14d,%edi
+ addl 0(%rsp),%r12d
+
+ addl 28(%rsp),%r12d
+ movl %r9d,%r13d
+ addl %edi,%r12d
+ movl %ebx,%r14d
+ rorl $14,%r13d
+ movl %r10d,%edi
+
+ xorl %r9d,%r13d
+ rorl $9,%r14d
+ xorl %r11d,%edi
+
+ movl %r12d,28(%rsp)
+ xorl %ebx,%r14d
+ andl %r9d,%edi
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %r11d,%edi
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ addl %edi,%r12d
+
+ movl %ebx,%edi
+ addl (%rbp),%r12d
+ xorl %ebx,%r14d
+
+ xorl %ecx,%edi
+ rorl $6,%r13d
+ movl %ecx,%eax
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%eax
+ addl %r12d,%r8d
+ addl %r12d,%eax
+
+ leaq 20(%rbp),%rbp
+ movl 36(%rsp),%r13d
+ movl 24(%rsp),%r15d
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%eax
+ movl %r15d,%r14d
+ rorl $2,%r15d
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ xorl %r13d,%r12d
+ xorl %r14d,%r15d
+ addl 4(%rsp),%r12d
+
+ addl 32(%rsp),%r12d
+ movl %r8d,%r13d
+ addl %r15d,%r12d
+ movl %eax,%r14d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+
+ xorl %r8d,%r13d
+ rorl $9,%r14d
+ xorl %r10d,%r15d
+
+ movl %r12d,32(%rsp)
+ xorl %eax,%r14d
+ andl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %r10d,%r15d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ addl %r15d,%r12d
+
+ movl %eax,%r15d
+ addl (%rbp),%r12d
+ xorl %eax,%r14d
+
+ xorl %ebx,%r15d
+ rorl $6,%r13d
+ movl %ebx,%r11d
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%r11d
+ addl %r12d,%edx
+ addl %r12d,%r11d
+
+ leaq 4(%rbp),%rbp
+ movl 40(%rsp),%r13d
+ movl 28(%rsp),%edi
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%r11d
+ movl %edi,%r14d
+ rorl $2,%edi
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%edi
+ shrl $10,%r14d
+
+ rorl $17,%edi
+ xorl %r13d,%r12d
+ xorl %r14d,%edi
+ addl 8(%rsp),%r12d
+
+ addl 36(%rsp),%r12d
+ movl %edx,%r13d
+ addl %edi,%r12d
+ movl %r11d,%r14d
+ rorl $14,%r13d
+ movl %r8d,%edi
+
+ xorl %edx,%r13d
+ rorl $9,%r14d
+ xorl %r9d,%edi
+
+ movl %r12d,36(%rsp)
+ xorl %r11d,%r14d
+ andl %edx,%edi
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r9d,%edi
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ addl %edi,%r12d
+
+ movl %r11d,%edi
+ addl (%rbp),%r12d
+ xorl %r11d,%r14d
+
+ xorl %eax,%edi
+ rorl $6,%r13d
+ movl %eax,%r10d
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%r10d
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+
+ leaq 4(%rbp),%rbp
+ movl 44(%rsp),%r13d
+ movl 32(%rsp),%r15d
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%r10d
+ movl %r15d,%r14d
+ rorl $2,%r15d
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ xorl %r13d,%r12d
+ xorl %r14d,%r15d
+ addl 12(%rsp),%r12d
+
+ addl 40(%rsp),%r12d
+ movl %ecx,%r13d
+ addl %r15d,%r12d
+ movl %r10d,%r14d
+ rorl $14,%r13d
+ movl %edx,%r15d
+
+ xorl %ecx,%r13d
+ rorl $9,%r14d
+ xorl %r8d,%r15d
+
+ movl %r12d,40(%rsp)
+ xorl %r10d,%r14d
+ andl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r8d,%r15d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ addl %r15d,%r12d
+
+ movl %r10d,%r15d
+ addl (%rbp),%r12d
+ xorl %r10d,%r14d
+
+ xorl %r11d,%r15d
+ rorl $6,%r13d
+ movl %r11d,%r9d
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%r9d
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+
+ leaq 4(%rbp),%rbp
+ movl 48(%rsp),%r13d
+ movl 36(%rsp),%edi
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%r9d
+ movl %edi,%r14d
+ rorl $2,%edi
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%edi
+ shrl $10,%r14d
+
+ rorl $17,%edi
+ xorl %r13d,%r12d
+ xorl %r14d,%edi
+ addl 16(%rsp),%r12d
+
+ addl 44(%rsp),%r12d
+ movl %ebx,%r13d
+ addl %edi,%r12d
+ movl %r9d,%r14d
+ rorl $14,%r13d
+ movl %ecx,%edi
+
+ xorl %ebx,%r13d
+ rorl $9,%r14d
+ xorl %edx,%edi
+
+ movl %r12d,44(%rsp)
+ xorl %r9d,%r14d
+ andl %ebx,%edi
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %edx,%edi
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ addl %edi,%r12d
+
+ movl %r9d,%edi
+ addl (%rbp),%r12d
+ xorl %r9d,%r14d
+
+ xorl %r10d,%edi
+ rorl $6,%r13d
+ movl %r10d,%r8d
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%r8d
+ addl %r12d,%eax
+ addl %r12d,%r8d
+
+ leaq 20(%rbp),%rbp
+ movl 52(%rsp),%r13d
+ movl 40(%rsp),%r15d
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%r8d
+ movl %r15d,%r14d
+ rorl $2,%r15d
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ xorl %r13d,%r12d
+ xorl %r14d,%r15d
+ addl 20(%rsp),%r12d
+
+ addl 48(%rsp),%r12d
+ movl %eax,%r13d
+ addl %r15d,%r12d
+ movl %r8d,%r14d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+
+ xorl %eax,%r13d
+ rorl $9,%r14d
+ xorl %ecx,%r15d
+
+ movl %r12d,48(%rsp)
+ xorl %r8d,%r14d
+ andl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %ecx,%r15d
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ addl %r15d,%r12d
+
+ movl %r8d,%r15d
+ addl (%rbp),%r12d
+ xorl %r8d,%r14d
+
+ xorl %r9d,%r15d
+ rorl $6,%r13d
+ movl %r9d,%edx
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%edx
+ addl %r12d,%r11d
+ addl %r12d,%edx
+
+ leaq 4(%rbp),%rbp
+ movl 56(%rsp),%r13d
+ movl 44(%rsp),%edi
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%edx
+ movl %edi,%r14d
+ rorl $2,%edi
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%edi
+ shrl $10,%r14d
+
+ rorl $17,%edi
+ xorl %r13d,%r12d
+ xorl %r14d,%edi
+ addl 24(%rsp),%r12d
+
+ addl 52(%rsp),%r12d
+ movl %r11d,%r13d
+ addl %edi,%r12d
+ movl %edx,%r14d
+ rorl $14,%r13d
+ movl %eax,%edi
+
+ xorl %r11d,%r13d
+ rorl $9,%r14d
+ xorl %ebx,%edi
+
+ movl %r12d,52(%rsp)
+ xorl %edx,%r14d
+ andl %r11d,%edi
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %ebx,%edi
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ addl %edi,%r12d
+
+ movl %edx,%edi
+ addl (%rbp),%r12d
+ xorl %edx,%r14d
+
+ xorl %r8d,%edi
+ rorl $6,%r13d
+ movl %r8d,%ecx
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%ecx
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+
+ leaq 4(%rbp),%rbp
+ movl 60(%rsp),%r13d
+ movl 48(%rsp),%r15d
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%ecx
+ movl %r15d,%r14d
+ rorl $2,%r15d
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ xorl %r13d,%r12d
+ xorl %r14d,%r15d
+ addl 28(%rsp),%r12d
+
+ addl 56(%rsp),%r12d
+ movl %r10d,%r13d
+ addl %r15d,%r12d
+ movl %ecx,%r14d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+
+ xorl %r10d,%r13d
+ rorl $9,%r14d
+ xorl %eax,%r15d
+
+ movl %r12d,56(%rsp)
+ xorl %ecx,%r14d
+ andl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %eax,%r15d
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ addl %r15d,%r12d
+
+ movl %ecx,%r15d
+ addl (%rbp),%r12d
+ xorl %ecx,%r14d
+
+ xorl %edx,%r15d
+ rorl $6,%r13d
+ movl %edx,%ebx
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%ebx
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+
+ leaq 4(%rbp),%rbp
+ movl 0(%rsp),%r13d
+ movl 52(%rsp),%edi
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%ebx
+ movl %edi,%r14d
+ rorl $2,%edi
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%edi
+ shrl $10,%r14d
+
+ rorl $17,%edi
+ xorl %r13d,%r12d
+ xorl %r14d,%edi
+ addl 32(%rsp),%r12d
+
+ addl 60(%rsp),%r12d
+ movl %r9d,%r13d
+ addl %edi,%r12d
+ movl %ebx,%r14d
+ rorl $14,%r13d
+ movl %r10d,%edi
+
+ xorl %r9d,%r13d
+ rorl $9,%r14d
+ xorl %r11d,%edi
+
+ movl %r12d,60(%rsp)
+ xorl %ebx,%r14d
+ andl %r9d,%edi
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %r11d,%edi
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ addl %edi,%r12d
+
+ movl %ebx,%edi
+ addl (%rbp),%r12d
+ xorl %ebx,%r14d
+
+ xorl %ecx,%edi
+ rorl $6,%r13d
+ movl %ecx,%eax
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%eax
+ addl %r12d,%r8d
+ addl %r12d,%eax
+
+ leaq 20(%rbp),%rbp
+ cmpb $0,3(%rbp)
+ jnz .Lrounds_16_xx
+
+ movq 64+0(%rsp),%rdi
+ addl %r14d,%eax
+ leaq 64(%rsi),%rsi
+
+ addl 0(%rdi),%eax
+ addl 4(%rdi),%ebx
+ addl 8(%rdi),%ecx
+ addl 12(%rdi),%edx
+ addl 16(%rdi),%r8d
+ addl 20(%rdi),%r9d
+ addl 24(%rdi),%r10d
+ addl 28(%rdi),%r11d
+
+ cmpq 64+16(%rsp),%rsi
+
+ movl %eax,0(%rdi)
+ movl %ebx,4(%rdi)
+ movl %ecx,8(%rdi)
+ movl %edx,12(%rdi)
+ movl %r8d,16(%rdi)
+ movl %r9d,20(%rdi)
+ movl %r10d,24(%rdi)
+ movl %r11d,28(%rdi)
+ jb .Lloop
+
+ movq 64+24(%rsp),%rsi
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Lepilogue:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ .byte 0xf3,0xc3
+.LSEH_end_sha256_block_data_order:
+.p2align 6
+
+K256:
+.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff
+.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff
+.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908
+.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908
+.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.def sha256_block_data_order_ssse3; .scl 3; .type 32; .endef
+.p2align 6
+sha256_block_data_order_ssse3:
+ movq %rdi,8(%rsp)
+ movq %rsi,16(%rsp)
+ movq %rsp,%rax
+.LSEH_begin_sha256_block_data_order_ssse3:
+ movq %rcx,%rdi
+ movq %rdx,%rsi
+ movq %r8,%rdx
+
+.Lssse3_shortcut:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ movq %rsp,%r11
+ shlq $4,%rdx
+ subq $160,%rsp
+ leaq (%rsi,%rdx,4),%rdx
+ andq $-64,%rsp
+ movq %rdi,64+0(%rsp)
+ movq %rsi,64+8(%rsp)
+ movq %rdx,64+16(%rsp)
+ movq %r11,64+24(%rsp)
+ movaps %xmm6,64+32(%rsp)
+ movaps %xmm7,64+48(%rsp)
+ movaps %xmm8,64+64(%rsp)
+ movaps %xmm9,64+80(%rsp)
+.Lprologue_ssse3:
+
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+ movl 16(%rdi),%r8d
+ movl 20(%rdi),%r9d
+ movl 24(%rdi),%r10d
+ movl 28(%rdi),%r11d
+
+
+ jmp .Lloop_ssse3
+.p2align 4
+.Lloop_ssse3:
+ movdqa K256+512(%rip),%xmm7
+ movdqu 0(%rsi),%xmm0
+ movdqu 16(%rsi),%xmm1
+ movdqu 32(%rsi),%xmm2
+ movdqu 48(%rsi),%xmm3
+.byte 102,15,56,0,199
+ leaq K256(%rip),%rbp
+.byte 102,15,56,0,207
+ movdqa 0(%rbp),%xmm4
+.byte 102,15,56,0,215
+ movdqa 32(%rbp),%xmm5
+ paddd %xmm0,%xmm4
+ movdqa 64(%rbp),%xmm6
+.byte 102,15,56,0,223
+ movdqa 96(%rbp),%xmm7
+ paddd %xmm1,%xmm5
+ paddd %xmm2,%xmm6
+ paddd %xmm3,%xmm7
+ movdqa %xmm4,0(%rsp)
+ movl %eax,%r14d
+ movdqa %xmm5,16(%rsp)
+ movl %ebx,%edi
+ movdqa %xmm6,32(%rsp)
+ xorl %ecx,%edi
+ movdqa %xmm7,48(%rsp)
+ movl %r8d,%r13d
+ jmp .Lssse3_00_47
+
+.p2align 4
+.Lssse3_00_47:
+ subq $-32*4,%rbp
+ rorl $14,%r13d
+ movdqa %xmm1,%xmm4
+ movl %r14d,%eax
+ movl %r9d,%r12d
+ movdqa %xmm3,%xmm7
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r12d
+ rorl $5,%r13d
+ xorl %eax,%r14d
+.byte 102,15,58,15,224,4
+ andl %r8d,%r12d
+ xorl %r8d,%r13d
+.byte 102,15,58,15,250,4
+ addl 0(%rsp),%r11d
+ movl %eax,%r15d
+ xorl %r10d,%r12d
+ rorl $11,%r14d
+ movdqa %xmm4,%xmm5
+ xorl %ebx,%r15d
+ addl %r12d,%r11d
+ movdqa %xmm4,%xmm6
+ rorl $6,%r13d
+ andl %r15d,%edi
+ psrld $3,%xmm4
+ xorl %eax,%r14d
+ addl %r13d,%r11d
+ xorl %ebx,%edi
+ paddd %xmm7,%xmm0
+ rorl $2,%r14d
+ addl %r11d,%edx
+ psrld $7,%xmm6
+ addl %edi,%r11d
+ movl %edx,%r13d
+ pshufd $250,%xmm3,%xmm7
+ addl %r11d,%r14d
+ rorl $14,%r13d
+ pslld $14,%xmm5
+ movl %r14d,%r11d
+ movl %r8d,%r12d
+ pxor %xmm6,%xmm4
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r12d
+ rorl $5,%r13d
+ psrld $11,%xmm6
+ xorl %r11d,%r14d
+ pxor %xmm5,%xmm4
+ andl %edx,%r12d
+ xorl %edx,%r13d
+ pslld $11,%xmm5
+ addl 4(%rsp),%r10d
+ movl %r11d,%edi
+ pxor %xmm6,%xmm4
+ xorl %r9d,%r12d
+ rorl $11,%r14d
+ movdqa %xmm7,%xmm6
+ xorl %eax,%edi
+ addl %r12d,%r10d
+ pxor %xmm5,%xmm4
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %r11d,%r14d
+ psrld $10,%xmm7
+ addl %r13d,%r10d
+ xorl %eax,%r15d
+ paddd %xmm4,%xmm0
+ rorl $2,%r14d
+ addl %r10d,%ecx
+ psrlq $17,%xmm6
+ addl %r15d,%r10d
+ movl %ecx,%r13d
+ addl %r10d,%r14d
+ pxor %xmm6,%xmm7
+ rorl $14,%r13d
+ movl %r14d,%r10d
+ movl %edx,%r12d
+ rorl $9,%r14d
+ psrlq $2,%xmm6
+ xorl %ecx,%r13d
+ xorl %r8d,%r12d
+ pxor %xmm6,%xmm7
+ rorl $5,%r13d
+ xorl %r10d,%r14d
+ andl %ecx,%r12d
+ pshufd $128,%xmm7,%xmm7
+ xorl %ecx,%r13d
+ addl 8(%rsp),%r9d
+ movl %r10d,%r15d
+ psrldq $8,%xmm7
+ xorl %r8d,%r12d
+ rorl $11,%r14d
+ xorl %r11d,%r15d
+ addl %r12d,%r9d
+ rorl $6,%r13d
+ paddd %xmm7,%xmm0
+ andl %r15d,%edi
+ xorl %r10d,%r14d
+ addl %r13d,%r9d
+ pshufd $80,%xmm0,%xmm7
+ xorl %r11d,%edi
+ rorl $2,%r14d
+ addl %r9d,%ebx
+ movdqa %xmm7,%xmm6
+ addl %edi,%r9d
+ movl %ebx,%r13d
+ psrld $10,%xmm7
+ addl %r9d,%r14d
+ rorl $14,%r13d
+ psrlq $17,%xmm6
+ movl %r14d,%r9d
+ movl %ecx,%r12d
+ pxor %xmm6,%xmm7
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r12d
+ rorl $5,%r13d
+ xorl %r9d,%r14d
+ psrlq $2,%xmm6
+ andl %ebx,%r12d
+ xorl %ebx,%r13d
+ addl 12(%rsp),%r8d
+ pxor %xmm6,%xmm7
+ movl %r9d,%edi
+ xorl %edx,%r12d
+ rorl $11,%r14d
+ pshufd $8,%xmm7,%xmm7
+ xorl %r10d,%edi
+ addl %r12d,%r8d
+ movdqa 0(%rbp),%xmm6
+ rorl $6,%r13d
+ andl %edi,%r15d
+ pslldq $8,%xmm7
+ xorl %r9d,%r14d
+ addl %r13d,%r8d
+ xorl %r10d,%r15d
+ paddd %xmm7,%xmm0
+ rorl $2,%r14d
+ addl %r8d,%eax
+ addl %r15d,%r8d
+ paddd %xmm0,%xmm6
+ movl %eax,%r13d
+ addl %r8d,%r14d
+ movdqa %xmm6,0(%rsp)
+ rorl $14,%r13d
+ movdqa %xmm2,%xmm4
+ movl %r14d,%r8d
+ movl %ebx,%r12d
+ movdqa %xmm0,%xmm7
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r12d
+ rorl $5,%r13d
+ xorl %r8d,%r14d
+.byte 102,15,58,15,225,4
+ andl %eax,%r12d
+ xorl %eax,%r13d
+.byte 102,15,58,15,251,4
+ addl 16(%rsp),%edx
+ movl %r8d,%r15d
+ xorl %ecx,%r12d
+ rorl $11,%r14d
+ movdqa %xmm4,%xmm5
+ xorl %r9d,%r15d
+ addl %r12d,%edx
+ movdqa %xmm4,%xmm6
+ rorl $6,%r13d
+ andl %r15d,%edi
+ psrld $3,%xmm4
+ xorl %r8d,%r14d
+ addl %r13d,%edx
+ xorl %r9d,%edi
+ paddd %xmm7,%xmm1
+ rorl $2,%r14d
+ addl %edx,%r11d
+ psrld $7,%xmm6
+ addl %edi,%edx
+ movl %r11d,%r13d
+ pshufd $250,%xmm0,%xmm7
+ addl %edx,%r14d
+ rorl $14,%r13d
+ pslld $14,%xmm5
+ movl %r14d,%edx
+ movl %eax,%r12d
+ pxor %xmm6,%xmm4
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r12d
+ rorl $5,%r13d
+ psrld $11,%xmm6
+ xorl %edx,%r14d
+ pxor %xmm5,%xmm4
+ andl %r11d,%r12d
+ xorl %r11d,%r13d
+ pslld $11,%xmm5
+ addl 20(%rsp),%ecx
+ movl %edx,%edi
+ pxor %xmm6,%xmm4
+ xorl %ebx,%r12d
+ rorl $11,%r14d
+ movdqa %xmm7,%xmm6
+ xorl %r8d,%edi
+ addl %r12d,%ecx
+ pxor %xmm5,%xmm4
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %edx,%r14d
+ psrld $10,%xmm7
+ addl %r13d,%ecx
+ xorl %r8d,%r15d
+ paddd %xmm4,%xmm1
+ rorl $2,%r14d
+ addl %ecx,%r10d
+ psrlq $17,%xmm6
+ addl %r15d,%ecx
+ movl %r10d,%r13d
+ addl %ecx,%r14d
+ pxor %xmm6,%xmm7
+ rorl $14,%r13d
+ movl %r14d,%ecx
+ movl %r11d,%r12d
+ rorl $9,%r14d
+ psrlq $2,%xmm6
+ xorl %r10d,%r13d
+ xorl %eax,%r12d
+ pxor %xmm6,%xmm7
+ rorl $5,%r13d
+ xorl %ecx,%r14d
+ andl %r10d,%r12d
+ pshufd $128,%xmm7,%xmm7
+ xorl %r10d,%r13d
+ addl 24(%rsp),%ebx
+ movl %ecx,%r15d
+ psrldq $8,%xmm7
+ xorl %eax,%r12d
+ rorl $11,%r14d
+ xorl %edx,%r15d
+ addl %r12d,%ebx
+ rorl $6,%r13d
+ paddd %xmm7,%xmm1
+ andl %r15d,%edi
+ xorl %ecx,%r14d
+ addl %r13d,%ebx
+ pshufd $80,%xmm1,%xmm7
+ xorl %edx,%edi
+ rorl $2,%r14d
+ addl %ebx,%r9d
+ movdqa %xmm7,%xmm6
+ addl %edi,%ebx
+ movl %r9d,%r13d
+ psrld $10,%xmm7
+ addl %ebx,%r14d
+ rorl $14,%r13d
+ psrlq $17,%xmm6
+ movl %r14d,%ebx
+ movl %r10d,%r12d
+ pxor %xmm6,%xmm7
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r12d
+ rorl $5,%r13d
+ xorl %ebx,%r14d
+ psrlq $2,%xmm6
+ andl %r9d,%r12d
+ xorl %r9d,%r13d
+ addl 28(%rsp),%eax
+ pxor %xmm6,%xmm7
+ movl %ebx,%edi
+ xorl %r11d,%r12d
+ rorl $11,%r14d
+ pshufd $8,%xmm7,%xmm7
+ xorl %ecx,%edi
+ addl %r12d,%eax
+ movdqa 32(%rbp),%xmm6
+ rorl $6,%r13d
+ andl %edi,%r15d
+ pslldq $8,%xmm7
+ xorl %ebx,%r14d
+ addl %r13d,%eax
+ xorl %ecx,%r15d
+ paddd %xmm7,%xmm1
+ rorl $2,%r14d
+ addl %eax,%r8d
+ addl %r15d,%eax
+ paddd %xmm1,%xmm6
+ movl %r8d,%r13d
+ addl %eax,%r14d
+ movdqa %xmm6,16(%rsp)
+ rorl $14,%r13d
+ movdqa %xmm3,%xmm4
+ movl %r14d,%eax
+ movl %r9d,%r12d
+ movdqa %xmm1,%xmm7
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r12d
+ rorl $5,%r13d
+ xorl %eax,%r14d
+.byte 102,15,58,15,226,4
+ andl %r8d,%r12d
+ xorl %r8d,%r13d
+.byte 102,15,58,15,248,4
+ addl 32(%rsp),%r11d
+ movl %eax,%r15d
+ xorl %r10d,%r12d
+ rorl $11,%r14d
+ movdqa %xmm4,%xmm5
+ xorl %ebx,%r15d
+ addl %r12d,%r11d
+ movdqa %xmm4,%xmm6
+ rorl $6,%r13d
+ andl %r15d,%edi
+ psrld $3,%xmm4
+ xorl %eax,%r14d
+ addl %r13d,%r11d
+ xorl %ebx,%edi
+ paddd %xmm7,%xmm2
+ rorl $2,%r14d
+ addl %r11d,%edx
+ psrld $7,%xmm6
+ addl %edi,%r11d
+ movl %edx,%r13d
+ pshufd $250,%xmm1,%xmm7
+ addl %r11d,%r14d
+ rorl $14,%r13d
+ pslld $14,%xmm5
+ movl %r14d,%r11d
+ movl %r8d,%r12d
+ pxor %xmm6,%xmm4
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r12d
+ rorl $5,%r13d
+ psrld $11,%xmm6
+ xorl %r11d,%r14d
+ pxor %xmm5,%xmm4
+ andl %edx,%r12d
+ xorl %edx,%r13d
+ pslld $11,%xmm5
+ addl 36(%rsp),%r10d
+ movl %r11d,%edi
+ pxor %xmm6,%xmm4
+ xorl %r9d,%r12d
+ rorl $11,%r14d
+ movdqa %xmm7,%xmm6
+ xorl %eax,%edi
+ addl %r12d,%r10d
+ pxor %xmm5,%xmm4
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %r11d,%r14d
+ psrld $10,%xmm7
+ addl %r13d,%r10d
+ xorl %eax,%r15d
+ paddd %xmm4,%xmm2
+ rorl $2,%r14d
+ addl %r10d,%ecx
+ psrlq $17,%xmm6
+ addl %r15d,%r10d
+ movl %ecx,%r13d
+ addl %r10d,%r14d
+ pxor %xmm6,%xmm7
+ rorl $14,%r13d
+ movl %r14d,%r10d
+ movl %edx,%r12d
+ rorl $9,%r14d
+ psrlq $2,%xmm6
+ xorl %ecx,%r13d
+ xorl %r8d,%r12d
+ pxor %xmm6,%xmm7
+ rorl $5,%r13d
+ xorl %r10d,%r14d
+ andl %ecx,%r12d
+ pshufd $128,%xmm7,%xmm7
+ xorl %ecx,%r13d
+ addl 40(%rsp),%r9d
+ movl %r10d,%r15d
+ psrldq $8,%xmm7
+ xorl %r8d,%r12d
+ rorl $11,%r14d
+ xorl %r11d,%r15d
+ addl %r12d,%r9d
+ rorl $6,%r13d
+ paddd %xmm7,%xmm2
+ andl %r15d,%edi
+ xorl %r10d,%r14d
+ addl %r13d,%r9d
+ pshufd $80,%xmm2,%xmm7
+ xorl %r11d,%edi
+ rorl $2,%r14d
+ addl %r9d,%ebx
+ movdqa %xmm7,%xmm6
+ addl %edi,%r9d
+ movl %ebx,%r13d
+ psrld $10,%xmm7
+ addl %r9d,%r14d
+ rorl $14,%r13d
+ psrlq $17,%xmm6
+ movl %r14d,%r9d
+ movl %ecx,%r12d
+ pxor %xmm6,%xmm7
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r12d
+ rorl $5,%r13d
+ xorl %r9d,%r14d
+ psrlq $2,%xmm6
+ andl %ebx,%r12d
+ xorl %ebx,%r13d
+ addl 44(%rsp),%r8d
+ pxor %xmm6,%xmm7
+ movl %r9d,%edi
+ xorl %edx,%r12d
+ rorl $11,%r14d
+ pshufd $8,%xmm7,%xmm7
+ xorl %r10d,%edi
+ addl %r12d,%r8d
+ movdqa 64(%rbp),%xmm6
+ rorl $6,%r13d
+ andl %edi,%r15d
+ pslldq $8,%xmm7
+ xorl %r9d,%r14d
+ addl %r13d,%r8d
+ xorl %r10d,%r15d
+ paddd %xmm7,%xmm2
+ rorl $2,%r14d
+ addl %r8d,%eax
+ addl %r15d,%r8d
+ paddd %xmm2,%xmm6
+ movl %eax,%r13d
+ addl %r8d,%r14d
+ movdqa %xmm6,32(%rsp)
+ rorl $14,%r13d
+ movdqa %xmm0,%xmm4
+ movl %r14d,%r8d
+ movl %ebx,%r12d
+ movdqa %xmm2,%xmm7
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r12d
+ rorl $5,%r13d
+ xorl %r8d,%r14d
+.byte 102,15,58,15,227,4
+ andl %eax,%r12d
+ xorl %eax,%r13d
+.byte 102,15,58,15,249,4
+ addl 48(%rsp),%edx
+ movl %r8d,%r15d
+ xorl %ecx,%r12d
+ rorl $11,%r14d
+ movdqa %xmm4,%xmm5
+ xorl %r9d,%r15d
+ addl %r12d,%edx
+ movdqa %xmm4,%xmm6
+ rorl $6,%r13d
+ andl %r15d,%edi
+ psrld $3,%xmm4
+ xorl %r8d,%r14d
+ addl %r13d,%edx
+ xorl %r9d,%edi
+ paddd %xmm7,%xmm3
+ rorl $2,%r14d
+ addl %edx,%r11d
+ psrld $7,%xmm6
+ addl %edi,%edx
+ movl %r11d,%r13d
+ pshufd $250,%xmm2,%xmm7
+ addl %edx,%r14d
+ rorl $14,%r13d
+ pslld $14,%xmm5
+ movl %r14d,%edx
+ movl %eax,%r12d
+ pxor %xmm6,%xmm4
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r12d
+ rorl $5,%r13d
+ psrld $11,%xmm6
+ xorl %edx,%r14d
+ pxor %xmm5,%xmm4
+ andl %r11d,%r12d
+ xorl %r11d,%r13d
+ pslld $11,%xmm5
+ addl 52(%rsp),%ecx
+ movl %edx,%edi
+ pxor %xmm6,%xmm4
+ xorl %ebx,%r12d
+ rorl $11,%r14d
+ movdqa %xmm7,%xmm6
+ xorl %r8d,%edi
+ addl %r12d,%ecx
+ pxor %xmm5,%xmm4
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %edx,%r14d
+ psrld $10,%xmm7
+ addl %r13d,%ecx
+ xorl %r8d,%r15d
+ paddd %xmm4,%xmm3
+ rorl $2,%r14d
+ addl %ecx,%r10d
+ psrlq $17,%xmm6
+ addl %r15d,%ecx
+ movl %r10d,%r13d
+ addl %ecx,%r14d
+ pxor %xmm6,%xmm7
+ rorl $14,%r13d
+ movl %r14d,%ecx
+ movl %r11d,%r12d
+ rorl $9,%r14d
+ psrlq $2,%xmm6
+ xorl %r10d,%r13d
+ xorl %eax,%r12d
+ pxor %xmm6,%xmm7
+ rorl $5,%r13d
+ xorl %ecx,%r14d
+ andl %r10d,%r12d
+ pshufd $128,%xmm7,%xmm7
+ xorl %r10d,%r13d
+ addl 56(%rsp),%ebx
+ movl %ecx,%r15d
+ psrldq $8,%xmm7
+ xorl %eax,%r12d
+ rorl $11,%r14d
+ xorl %edx,%r15d
+ addl %r12d,%ebx
+ rorl $6,%r13d
+ paddd %xmm7,%xmm3
+ andl %r15d,%edi
+ xorl %ecx,%r14d
+ addl %r13d,%ebx
+ pshufd $80,%xmm3,%xmm7
+ xorl %edx,%edi
+ rorl $2,%r14d
+ addl %ebx,%r9d
+ movdqa %xmm7,%xmm6
+ addl %edi,%ebx
+ movl %r9d,%r13d
+ psrld $10,%xmm7
+ addl %ebx,%r14d
+ rorl $14,%r13d
+ psrlq $17,%xmm6
+ movl %r14d,%ebx
+ movl %r10d,%r12d
+ pxor %xmm6,%xmm7
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r12d
+ rorl $5,%r13d
+ xorl %ebx,%r14d
+ psrlq $2,%xmm6
+ andl %r9d,%r12d
+ xorl %r9d,%r13d
+ addl 60(%rsp),%eax
+ pxor %xmm6,%xmm7
+ movl %ebx,%edi
+ xorl %r11d,%r12d
+ rorl $11,%r14d
+ pshufd $8,%xmm7,%xmm7
+ xorl %ecx,%edi
+ addl %r12d,%eax
+ movdqa 96(%rbp),%xmm6
+ rorl $6,%r13d
+ andl %edi,%r15d
+ pslldq $8,%xmm7
+ xorl %ebx,%r14d
+ addl %r13d,%eax
+ xorl %ecx,%r15d
+ paddd %xmm7,%xmm3
+ rorl $2,%r14d
+ addl %eax,%r8d
+ addl %r15d,%eax
+ paddd %xmm3,%xmm6
+ movl %r8d,%r13d
+ addl %eax,%r14d
+ movdqa %xmm6,48(%rsp)
+ cmpb $0,131(%rbp)
+ jne .Lssse3_00_47
+ rorl $14,%r13d
+ movl %r14d,%eax
+ movl %r9d,%r12d
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r12d
+ rorl $5,%r13d
+ xorl %eax,%r14d
+ andl %r8d,%r12d
+ xorl %r8d,%r13d
+ addl 0(%rsp),%r11d
+ movl %eax,%r15d
+ xorl %r10d,%r12d
+ rorl $11,%r14d
+ xorl %ebx,%r15d
+ addl %r12d,%r11d
+ rorl $6,%r13d
+ andl %r15d,%edi
+ xorl %eax,%r14d
+ addl %r13d,%r11d
+ xorl %ebx,%edi
+ rorl $2,%r14d
+ addl %r11d,%edx
+ addl %edi,%r11d
+ movl %edx,%r13d
+ addl %r11d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r11d
+ movl %r8d,%r12d
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r12d
+ rorl $5,%r13d
+ xorl %r11d,%r14d
+ andl %edx,%r12d
+ xorl %edx,%r13d
+ addl 4(%rsp),%r10d
+ movl %r11d,%edi
+ xorl %r9d,%r12d
+ rorl $11,%r14d
+ xorl %eax,%edi
+ addl %r12d,%r10d
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %r11d,%r14d
+ addl %r13d,%r10d
+ xorl %eax,%r15d
+ rorl $2,%r14d
+ addl %r10d,%ecx
+ addl %r15d,%r10d
+ movl %ecx,%r13d
+ addl %r10d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r10d
+ movl %edx,%r12d
+ rorl $9,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r12d
+ rorl $5,%r13d
+ xorl %r10d,%r14d
+ andl %ecx,%r12d
+ xorl %ecx,%r13d
+ addl 8(%rsp),%r9d
+ movl %r10d,%r15d
+ xorl %r8d,%r12d
+ rorl $11,%r14d
+ xorl %r11d,%r15d
+ addl %r12d,%r9d
+ rorl $6,%r13d
+ andl %r15d,%edi
+ xorl %r10d,%r14d
+ addl %r13d,%r9d
+ xorl %r11d,%edi
+ rorl $2,%r14d
+ addl %r9d,%ebx
+ addl %edi,%r9d
+ movl %ebx,%r13d
+ addl %r9d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r9d
+ movl %ecx,%r12d
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r12d
+ rorl $5,%r13d
+ xorl %r9d,%r14d
+ andl %ebx,%r12d
+ xorl %ebx,%r13d
+ addl 12(%rsp),%r8d
+ movl %r9d,%edi
+ xorl %edx,%r12d
+ rorl $11,%r14d
+ xorl %r10d,%edi
+ addl %r12d,%r8d
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %r9d,%r14d
+ addl %r13d,%r8d
+ xorl %r10d,%r15d
+ rorl $2,%r14d
+ addl %r8d,%eax
+ addl %r15d,%r8d
+ movl %eax,%r13d
+ addl %r8d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r8d
+ movl %ebx,%r12d
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r12d
+ rorl $5,%r13d
+ xorl %r8d,%r14d
+ andl %eax,%r12d
+ xorl %eax,%r13d
+ addl 16(%rsp),%edx
+ movl %r8d,%r15d
+ xorl %ecx,%r12d
+ rorl $11,%r14d
+ xorl %r9d,%r15d
+ addl %r12d,%edx
+ rorl $6,%r13d
+ andl %r15d,%edi
+ xorl %r8d,%r14d
+ addl %r13d,%edx
+ xorl %r9d,%edi
+ rorl $2,%r14d
+ addl %edx,%r11d
+ addl %edi,%edx
+ movl %r11d,%r13d
+ addl %edx,%r14d
+ rorl $14,%r13d
+ movl %r14d,%edx
+ movl %eax,%r12d
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r12d
+ rorl $5,%r13d
+ xorl %edx,%r14d
+ andl %r11d,%r12d
+ xorl %r11d,%r13d
+ addl 20(%rsp),%ecx
+ movl %edx,%edi
+ xorl %ebx,%r12d
+ rorl $11,%r14d
+ xorl %r8d,%edi
+ addl %r12d,%ecx
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %edx,%r14d
+ addl %r13d,%ecx
+ xorl %r8d,%r15d
+ rorl $2,%r14d
+ addl %ecx,%r10d
+ addl %r15d,%ecx
+ movl %r10d,%r13d
+ addl %ecx,%r14d
+ rorl $14,%r13d
+ movl %r14d,%ecx
+ movl %r11d,%r12d
+ rorl $9,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r12d
+ rorl $5,%r13d
+ xorl %ecx,%r14d
+ andl %r10d,%r12d
+ xorl %r10d,%r13d
+ addl 24(%rsp),%ebx
+ movl %ecx,%r15d
+ xorl %eax,%r12d
+ rorl $11,%r14d
+ xorl %edx,%r15d
+ addl %r12d,%ebx
+ rorl $6,%r13d
+ andl %r15d,%edi
+ xorl %ecx,%r14d
+ addl %r13d,%ebx
+ xorl %edx,%edi
+ rorl $2,%r14d
+ addl %ebx,%r9d
+ addl %edi,%ebx
+ movl %r9d,%r13d
+ addl %ebx,%r14d
+ rorl $14,%r13d
+ movl %r14d,%ebx
+ movl %r10d,%r12d
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r12d
+ rorl $5,%r13d
+ xorl %ebx,%r14d
+ andl %r9d,%r12d
+ xorl %r9d,%r13d
+ addl 28(%rsp),%eax
+ movl %ebx,%edi
+ xorl %r11d,%r12d
+ rorl $11,%r14d
+ xorl %ecx,%edi
+ addl %r12d,%eax
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %ebx,%r14d
+ addl %r13d,%eax
+ xorl %ecx,%r15d
+ rorl $2,%r14d
+ addl %eax,%r8d
+ addl %r15d,%eax
+ movl %r8d,%r13d
+ addl %eax,%r14d
+ rorl $14,%r13d
+ movl %r14d,%eax
+ movl %r9d,%r12d
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r12d
+ rorl $5,%r13d
+ xorl %eax,%r14d
+ andl %r8d,%r12d
+ xorl %r8d,%r13d
+ addl 32(%rsp),%r11d
+ movl %eax,%r15d
+ xorl %r10d,%r12d
+ rorl $11,%r14d
+ xorl %ebx,%r15d
+ addl %r12d,%r11d
+ rorl $6,%r13d
+ andl %r15d,%edi
+ xorl %eax,%r14d
+ addl %r13d,%r11d
+ xorl %ebx,%edi
+ rorl $2,%r14d
+ addl %r11d,%edx
+ addl %edi,%r11d
+ movl %edx,%r13d
+ addl %r11d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r11d
+ movl %r8d,%r12d
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r12d
+ rorl $5,%r13d
+ xorl %r11d,%r14d
+ andl %edx,%r12d
+ xorl %edx,%r13d
+ addl 36(%rsp),%r10d
+ movl %r11d,%edi
+ xorl %r9d,%r12d
+ rorl $11,%r14d
+ xorl %eax,%edi
+ addl %r12d,%r10d
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %r11d,%r14d
+ addl %r13d,%r10d
+ xorl %eax,%r15d
+ rorl $2,%r14d
+ addl %r10d,%ecx
+ addl %r15d,%r10d
+ movl %ecx,%r13d
+ addl %r10d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r10d
+ movl %edx,%r12d
+ rorl $9,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r12d
+ rorl $5,%r13d
+ xorl %r10d,%r14d
+ andl %ecx,%r12d
+ xorl %ecx,%r13d
+ addl 40(%rsp),%r9d
+ movl %r10d,%r15d
+ xorl %r8d,%r12d
+ rorl $11,%r14d
+ xorl %r11d,%r15d
+ addl %r12d,%r9d
+ rorl $6,%r13d
+ andl %r15d,%edi
+ xorl %r10d,%r14d
+ addl %r13d,%r9d
+ xorl %r11d,%edi
+ rorl $2,%r14d
+ addl %r9d,%ebx
+ addl %edi,%r9d
+ movl %ebx,%r13d
+ addl %r9d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r9d
+ movl %ecx,%r12d
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r12d
+ rorl $5,%r13d
+ xorl %r9d,%r14d
+ andl %ebx,%r12d
+ xorl %ebx,%r13d
+ addl 44(%rsp),%r8d
+ movl %r9d,%edi
+ xorl %edx,%r12d
+ rorl $11,%r14d
+ xorl %r10d,%edi
+ addl %r12d,%r8d
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %r9d,%r14d
+ addl %r13d,%r8d
+ xorl %r10d,%r15d
+ rorl $2,%r14d
+ addl %r8d,%eax
+ addl %r15d,%r8d
+ movl %eax,%r13d
+ addl %r8d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r8d
+ movl %ebx,%r12d
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r12d
+ rorl $5,%r13d
+ xorl %r8d,%r14d
+ andl %eax,%r12d
+ xorl %eax,%r13d
+ addl 48(%rsp),%edx
+ movl %r8d,%r15d
+ xorl %ecx,%r12d
+ rorl $11,%r14d
+ xorl %r9d,%r15d
+ addl %r12d,%edx
+ rorl $6,%r13d
+ andl %r15d,%edi
+ xorl %r8d,%r14d
+ addl %r13d,%edx
+ xorl %r9d,%edi
+ rorl $2,%r14d
+ addl %edx,%r11d
+ addl %edi,%edx
+ movl %r11d,%r13d
+ addl %edx,%r14d
+ rorl $14,%r13d
+ movl %r14d,%edx
+ movl %eax,%r12d
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r12d
+ rorl $5,%r13d
+ xorl %edx,%r14d
+ andl %r11d,%r12d
+ xorl %r11d,%r13d
+ addl 52(%rsp),%ecx
+ movl %edx,%edi
+ xorl %ebx,%r12d
+ rorl $11,%r14d
+ xorl %r8d,%edi
+ addl %r12d,%ecx
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %edx,%r14d
+ addl %r13d,%ecx
+ xorl %r8d,%r15d
+ rorl $2,%r14d
+ addl %ecx,%r10d
+ addl %r15d,%ecx
+ movl %r10d,%r13d
+ addl %ecx,%r14d
+ rorl $14,%r13d
+ movl %r14d,%ecx
+ movl %r11d,%r12d
+ rorl $9,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r12d
+ rorl $5,%r13d
+ xorl %ecx,%r14d
+ andl %r10d,%r12d
+ xorl %r10d,%r13d
+ addl 56(%rsp),%ebx
+ movl %ecx,%r15d
+ xorl %eax,%r12d
+ rorl $11,%r14d
+ xorl %edx,%r15d
+ addl %r12d,%ebx
+ rorl $6,%r13d
+ andl %r15d,%edi
+ xorl %ecx,%r14d
+ addl %r13d,%ebx
+ xorl %edx,%edi
+ rorl $2,%r14d
+ addl %ebx,%r9d
+ addl %edi,%ebx
+ movl %r9d,%r13d
+ addl %ebx,%r14d
+ rorl $14,%r13d
+ movl %r14d,%ebx
+ movl %r10d,%r12d
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r12d
+ rorl $5,%r13d
+ xorl %ebx,%r14d
+ andl %r9d,%r12d
+ xorl %r9d,%r13d
+ addl 60(%rsp),%eax
+ movl %ebx,%edi
+ xorl %r11d,%r12d
+ rorl $11,%r14d
+ xorl %ecx,%edi
+ addl %r12d,%eax
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %ebx,%r14d
+ addl %r13d,%eax
+ xorl %ecx,%r15d
+ rorl $2,%r14d
+ addl %eax,%r8d
+ addl %r15d,%eax
+ movl %r8d,%r13d
+ addl %eax,%r14d
+ movq 64+0(%rsp),%rdi
+ movl %r14d,%eax
+
+ addl 0(%rdi),%eax
+ leaq 64(%rsi),%rsi
+ addl 4(%rdi),%ebx
+ addl 8(%rdi),%ecx
+ addl 12(%rdi),%edx
+ addl 16(%rdi),%r8d
+ addl 20(%rdi),%r9d
+ addl 24(%rdi),%r10d
+ addl 28(%rdi),%r11d
+
+ cmpq 64+16(%rsp),%rsi
+
+ movl %eax,0(%rdi)
+ movl %ebx,4(%rdi)
+ movl %ecx,8(%rdi)
+ movl %edx,12(%rdi)
+ movl %r8d,16(%rdi)
+ movl %r9d,20(%rdi)
+ movl %r10d,24(%rdi)
+ movl %r11d,28(%rdi)
+ jb .Lloop_ssse3
+
+ movq 64+24(%rsp),%rsi
+ movaps 64+32(%rsp),%xmm6
+ movaps 64+48(%rsp),%xmm7
+ movaps 64+64(%rsp),%xmm8
+ movaps 64+80(%rsp),%xmm9
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Lepilogue_ssse3:
+ movq 8(%rsp),%rdi
+ movq 16(%rsp),%rsi
+ .byte 0xf3,0xc3
+.LSEH_end_sha256_block_data_order_ssse3:
+
+.def se_handler; .scl 3; .type 32; .endef
+.p2align 4
+se_handler:
+ pushq %rsi
+ pushq %rdi
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+ subq $64,%rsp
+
+ movq 120(%r8),%rax
+ movq 248(%r8),%rbx
+
+ movq 8(%r9),%rsi
+ movq 56(%r9),%r11
+
+ movl 0(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jb .Lin_prologue
+
+ movq 152(%r8),%rax
+
+ movl 4(%r11),%r10d
+ leaq (%rsi,%r10,1),%r10
+ cmpq %r10,%rbx
+ jae .Lin_prologue
+ movq %rax,%rsi
+ movq 64+24(%rax),%rax
+ leaq 48(%rax),%rax
+
+ movq -8(%rax),%rbx
+ movq -16(%rax),%rbp
+ movq -24(%rax),%r12
+ movq -32(%rax),%r13
+ movq -40(%rax),%r14
+ movq -48(%rax),%r15
+ movq %rbx,144(%r8)
+ movq %rbp,160(%r8)
+ movq %r12,216(%r8)
+ movq %r13,224(%r8)
+ movq %r14,232(%r8)
+ movq %r15,240(%r8)
+
+ leaq .Lepilogue(%rip),%r10
+ cmpq %r10,%rbx
+ jb .Lin_prologue
+
+ leaq 64+32(%rsi),%rsi
+ leaq 512(%r8),%rdi
+ movl $8,%ecx
+.long 0xa548f3fc
+
+.Lin_prologue:
+ movq 8(%rax),%rdi
+ movq 16(%rax),%rsi
+ movq %rax,152(%r8)
+ movq %rsi,168(%r8)
+ movq %rdi,176(%r8)
+
+ movq 40(%r9),%rdi
+ movq %r8,%rsi
+ movl $154,%ecx
+.long 0xa548f3fc
+
+ movq %r9,%rsi
+ xorq %rcx,%rcx
+ movq 8(%rsi),%rdx
+ movq 0(%rsi),%r8
+ movq 16(%rsi),%r9
+ movq 40(%rsi),%r10
+ leaq 56(%rsi),%r11
+ leaq 24(%rsi),%r12
+ movq %r10,32(%rsp)
+ movq %r11,40(%rsp)
+ movq %r12,48(%rsp)
+ movq %rcx,56(%rsp)
+ call *__imp_RtlVirtualUnwind(%rip)
+
+ movl $1,%eax
+ addq $64,%rsp
+ popfq
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+ popq %rdi
+ popq %rsi
+ .byte 0xf3,0xc3
+
+
+.section .pdata
+.p2align 2
+.rva .LSEH_begin_sha256_block_data_order
+.rva .LSEH_end_sha256_block_data_order
+.rva .LSEH_info_sha256_block_data_order
+.rva .LSEH_begin_sha256_block_data_order_ssse3
+.rva .LSEH_end_sha256_block_data_order_ssse3
+.rva .LSEH_info_sha256_block_data_order_ssse3
+.section .xdata
+.p2align 3
+.LSEH_info_sha256_block_data_order:
+.byte 9,0,0,0
+.rva se_handler
+.rva .Lprologue,.Lepilogue
+.LSEH_info_sha256_block_data_order_ssse3:
+.byte 9,0,0,0
+.rva se_handler
+.rva .Lprologue_ssse3,.Lepilogue_ssse3
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/elf/appro-aes-x86.s b/lib/accelerated/x86/elf/aesni-x86.s
index 5f0779756c..5f0779756c 100644
--- a/lib/accelerated/x86/elf/appro-aes-x86.s
+++ b/lib/accelerated/x86/elf/aesni-x86.s
diff --git a/lib/accelerated/x86/elf/appro-aes-x86-64.s b/lib/accelerated/x86/elf/aesni-x86_64.s
index d3734a6edd..d3734a6edd 100644
--- a/lib/accelerated/x86/elf/appro-aes-x86-64.s
+++ b/lib/accelerated/x86/elf/aesni-x86_64.s
diff --git a/lib/accelerated/x86/elf/cpuid-x86-64.s b/lib/accelerated/x86/elf/cpuid-x86_64.s
index 41a0061f71..41a0061f71 100644
--- a/lib/accelerated/x86/elf/cpuid-x86-64.s
+++ b/lib/accelerated/x86/elf/cpuid-x86_64.s
diff --git a/lib/accelerated/x86/elf/padlock-x86.s b/lib/accelerated/x86/elf/e_padlock-x86.s
index 2199255efe..2199255efe 100644
--- a/lib/accelerated/x86/elf/padlock-x86.s
+++ b/lib/accelerated/x86/elf/e_padlock-x86.s
diff --git a/lib/accelerated/x86/elf/padlock-x86-64.s b/lib/accelerated/x86/elf/e_padlock-x86_64.s
index 2ac113d72c..2ac113d72c 100644
--- a/lib/accelerated/x86/elf/padlock-x86-64.s
+++ b/lib/accelerated/x86/elf/e_padlock-x86_64.s
diff --git a/lib/accelerated/x86/elf/appro-aes-gcm-x86-64.s b/lib/accelerated/x86/elf/ghash-x86_64.s
index 9755951f7b..9755951f7b 100644
--- a/lib/accelerated/x86/elf/appro-aes-gcm-x86-64.s
+++ b/lib/accelerated/x86/elf/ghash-x86_64.s
diff --git a/lib/accelerated/x86/elf/sha1-ssse3-x86.s b/lib/accelerated/x86/elf/sha1-ssse3-x86.s
new file mode 100644
index 0000000000..e2f22e7c7e
--- /dev/null
+++ b/lib/accelerated/x86/elf/sha1-ssse3-x86.s
@@ -0,0 +1,1421 @@
+# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain copyright notices,
+# this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+# * Neither the name of the Andy Polyakov nor the names of its
+# copyright holder and contributors may be used to endorse or
+# promote products derived from this software without specific
+# prior written permission.
+#
+# ALTERNATIVELY, provided that this notice is retained in full, this
+# product may be distributed under the terms of the GNU General Public
+# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+# those given above.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *** This file is auto-generated ***
+#
+.file "sha1-586.s"
+.text
+.globl sha1_block_data_order
+.type sha1_block_data_order,@function
+.align 16
+sha1_block_data_order:
+.L_sha1_block_data_order_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 20(%esp),%ebp
+ movl 24(%esp),%esi
+ movl 28(%esp),%eax
+ subl $76,%esp
+ shll $6,%eax
+ addl %esi,%eax
+ movl %eax,104(%esp)
+ movl 16(%ebp),%edi
+ jmp .L000loop
+.align 16
+.L000loop:
+ movl (%esi),%eax
+ movl 4(%esi),%ebx
+ movl 8(%esi),%ecx
+ movl 12(%esi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ movl %eax,(%esp)
+ movl %ebx,4(%esp)
+ movl %ecx,8(%esp)
+ movl %edx,12(%esp)
+ movl 16(%esi),%eax
+ movl 20(%esi),%ebx
+ movl 24(%esi),%ecx
+ movl 28(%esi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ movl %eax,16(%esp)
+ movl %ebx,20(%esp)
+ movl %ecx,24(%esp)
+ movl %edx,28(%esp)
+ movl 32(%esi),%eax
+ movl 36(%esi),%ebx
+ movl 40(%esi),%ecx
+ movl 44(%esi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ movl %eax,32(%esp)
+ movl %ebx,36(%esp)
+ movl %ecx,40(%esp)
+ movl %edx,44(%esp)
+ movl 48(%esi),%eax
+ movl 52(%esi),%ebx
+ movl 56(%esi),%ecx
+ movl 60(%esi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ movl %eax,48(%esp)
+ movl %ebx,52(%esp)
+ movl %ecx,56(%esp)
+ movl %edx,60(%esp)
+ movl %esi,100(%esp)
+ movl (%ebp),%eax
+ movl 4(%ebp),%ebx
+ movl 8(%ebp),%ecx
+ movl 12(%ebp),%edx
+
+ movl %ecx,%esi
+ movl %eax,%ebp
+ roll $5,%ebp
+ xorl %edx,%esi
+ addl %edi,%ebp
+ movl (%esp),%edi
+ andl %ebx,%esi
+ rorl $2,%ebx
+ xorl %edx,%esi
+ leal 1518500249(%ebp,%edi,1),%ebp
+ addl %esi,%ebp
+
+ movl %ebx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ecx,%edi
+ addl %edx,%ebp
+ movl 4(%esp),%edx
+ andl %eax,%edi
+ rorl $2,%eax
+ xorl %ecx,%edi
+ leal 1518500249(%ebp,%edx,1),%ebp
+ addl %edi,%ebp
+
+ movl %eax,%edx
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%edx
+ addl %ecx,%ebp
+ movl 8(%esp),%ecx
+ andl %esi,%edx
+ rorl $2,%esi
+ xorl %ebx,%edx
+ leal 1518500249(%ebp,%ecx,1),%ebp
+ addl %edx,%ebp
+
+ movl %esi,%ecx
+ movl %ebp,%edx
+ roll $5,%ebp
+ xorl %eax,%ecx
+ addl %ebx,%ebp
+ movl 12(%esp),%ebx
+ andl %edi,%ecx
+ rorl $2,%edi
+ xorl %eax,%ecx
+ leal 1518500249(%ebp,%ebx,1),%ebp
+ addl %ecx,%ebp
+
+ movl %edi,%ebx
+ movl %ebp,%ecx
+ roll $5,%ebp
+ xorl %esi,%ebx
+ addl %eax,%ebp
+ movl 16(%esp),%eax
+ andl %edx,%ebx
+ rorl $2,%edx
+ xorl %esi,%ebx
+ leal 1518500249(%ebp,%eax,1),%ebp
+ addl %ebx,%ebp
+
+ movl %edx,%eax
+ movl %ebp,%ebx
+ roll $5,%ebp
+ xorl %edi,%eax
+ addl %esi,%ebp
+ movl 20(%esp),%esi
+ andl %ecx,%eax
+ rorl $2,%ecx
+ xorl %edi,%eax
+ leal 1518500249(%ebp,%esi,1),%ebp
+ addl %eax,%ebp
+
+ movl %ecx,%esi
+ movl %ebp,%eax
+ roll $5,%ebp
+ xorl %edx,%esi
+ addl %edi,%ebp
+ movl 24(%esp),%edi
+ andl %ebx,%esi
+ rorl $2,%ebx
+ xorl %edx,%esi
+ leal 1518500249(%ebp,%edi,1),%ebp
+ addl %esi,%ebp
+
+ movl %ebx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ecx,%edi
+ addl %edx,%ebp
+ movl 28(%esp),%edx
+ andl %eax,%edi
+ rorl $2,%eax
+ xorl %ecx,%edi
+ leal 1518500249(%ebp,%edx,1),%ebp
+ addl %edi,%ebp
+
+ movl %eax,%edx
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%edx
+ addl %ecx,%ebp
+ movl 32(%esp),%ecx
+ andl %esi,%edx
+ rorl $2,%esi
+ xorl %ebx,%edx
+ leal 1518500249(%ebp,%ecx,1),%ebp
+ addl %edx,%ebp
+
+ movl %esi,%ecx
+ movl %ebp,%edx
+ roll $5,%ebp
+ xorl %eax,%ecx
+ addl %ebx,%ebp
+ movl 36(%esp),%ebx
+ andl %edi,%ecx
+ rorl $2,%edi
+ xorl %eax,%ecx
+ leal 1518500249(%ebp,%ebx,1),%ebp
+ addl %ecx,%ebp
+
+ movl %edi,%ebx
+ movl %ebp,%ecx
+ roll $5,%ebp
+ xorl %esi,%ebx
+ addl %eax,%ebp
+ movl 40(%esp),%eax
+ andl %edx,%ebx
+ rorl $2,%edx
+ xorl %esi,%ebx
+ leal 1518500249(%ebp,%eax,1),%ebp
+ addl %ebx,%ebp
+
+ movl %edx,%eax
+ movl %ebp,%ebx
+ roll $5,%ebp
+ xorl %edi,%eax
+ addl %esi,%ebp
+ movl 44(%esp),%esi
+ andl %ecx,%eax
+ rorl $2,%ecx
+ xorl %edi,%eax
+ leal 1518500249(%ebp,%esi,1),%ebp
+ addl %eax,%ebp
+
+ movl %ecx,%esi
+ movl %ebp,%eax
+ roll $5,%ebp
+ xorl %edx,%esi
+ addl %edi,%ebp
+ movl 48(%esp),%edi
+ andl %ebx,%esi
+ rorl $2,%ebx
+ xorl %edx,%esi
+ leal 1518500249(%ebp,%edi,1),%ebp
+ addl %esi,%ebp
+
+ movl %ebx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ecx,%edi
+ addl %edx,%ebp
+ movl 52(%esp),%edx
+ andl %eax,%edi
+ rorl $2,%eax
+ xorl %ecx,%edi
+ leal 1518500249(%ebp,%edx,1),%ebp
+ addl %edi,%ebp
+
+ movl %eax,%edx
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%edx
+ addl %ecx,%ebp
+ movl 56(%esp),%ecx
+ andl %esi,%edx
+ rorl $2,%esi
+ xorl %ebx,%edx
+ leal 1518500249(%ebp,%ecx,1),%ebp
+ addl %edx,%ebp
+
+ movl %esi,%ecx
+ movl %ebp,%edx
+ roll $5,%ebp
+ xorl %eax,%ecx
+ addl %ebx,%ebp
+ movl 60(%esp),%ebx
+ andl %edi,%ecx
+ rorl $2,%edi
+ xorl %eax,%ecx
+ leal 1518500249(%ebp,%ebx,1),%ebp
+ movl (%esp),%ebx
+ addl %ebp,%ecx
+
+ movl %edi,%ebp
+ xorl 8(%esp),%ebx
+ xorl %esi,%ebp
+ xorl 32(%esp),%ebx
+ andl %edx,%ebp
+ xorl 52(%esp),%ebx
+ roll $1,%ebx
+ xorl %esi,%ebp
+ addl %ebp,%eax
+ movl %ecx,%ebp
+ rorl $2,%edx
+ movl %ebx,(%esp)
+ roll $5,%ebp
+ leal 1518500249(%ebx,%eax,1),%ebx
+ movl 4(%esp),%eax
+ addl %ebp,%ebx
+
+ movl %edx,%ebp
+ xorl 12(%esp),%eax
+ xorl %edi,%ebp
+ xorl 36(%esp),%eax
+ andl %ecx,%ebp
+ xorl 56(%esp),%eax
+ roll $1,%eax
+ xorl %edi,%ebp
+ addl %ebp,%esi
+ movl %ebx,%ebp
+ rorl $2,%ecx
+ movl %eax,4(%esp)
+ roll $5,%ebp
+ leal 1518500249(%eax,%esi,1),%eax
+ movl 8(%esp),%esi
+ addl %ebp,%eax
+
+ movl %ecx,%ebp
+ xorl 16(%esp),%esi
+ xorl %edx,%ebp
+ xorl 40(%esp),%esi
+ andl %ebx,%ebp
+ xorl 60(%esp),%esi
+ roll $1,%esi
+ xorl %edx,%ebp
+ addl %ebp,%edi
+ movl %eax,%ebp
+ rorl $2,%ebx
+ movl %esi,8(%esp)
+ roll $5,%ebp
+ leal 1518500249(%esi,%edi,1),%esi
+ movl 12(%esp),%edi
+ addl %ebp,%esi
+
+ movl %ebx,%ebp
+ xorl 20(%esp),%edi
+ xorl %ecx,%ebp
+ xorl 44(%esp),%edi
+ andl %eax,%ebp
+ xorl (%esp),%edi
+ roll $1,%edi
+ xorl %ecx,%ebp
+ addl %ebp,%edx
+ movl %esi,%ebp
+ rorl $2,%eax
+ movl %edi,12(%esp)
+ roll $5,%ebp
+ leal 1518500249(%edi,%edx,1),%edi
+ movl 16(%esp),%edx
+ addl %ebp,%edi
+
+ movl %esi,%ebp
+ xorl 24(%esp),%edx
+ xorl %eax,%ebp
+ xorl 48(%esp),%edx
+ xorl %ebx,%ebp
+ xorl 4(%esp),%edx
+ roll $1,%edx
+ addl %ebp,%ecx
+ rorl $2,%esi
+ movl %edi,%ebp
+ roll $5,%ebp
+ movl %edx,16(%esp)
+ leal 1859775393(%edx,%ecx,1),%edx
+ movl 20(%esp),%ecx
+ addl %ebp,%edx
+
+ movl %edi,%ebp
+ xorl 28(%esp),%ecx
+ xorl %esi,%ebp
+ xorl 52(%esp),%ecx
+ xorl %eax,%ebp
+ xorl 8(%esp),%ecx
+ roll $1,%ecx
+ addl %ebp,%ebx
+ rorl $2,%edi
+ movl %edx,%ebp
+ roll $5,%ebp
+ movl %ecx,20(%esp)
+ leal 1859775393(%ecx,%ebx,1),%ecx
+ movl 24(%esp),%ebx
+ addl %ebp,%ecx
+
+ movl %edx,%ebp
+ xorl 32(%esp),%ebx
+ xorl %edi,%ebp
+ xorl 56(%esp),%ebx
+ xorl %esi,%ebp
+ xorl 12(%esp),%ebx
+ roll $1,%ebx
+ addl %ebp,%eax
+ rorl $2,%edx
+ movl %ecx,%ebp
+ roll $5,%ebp
+ movl %ebx,24(%esp)
+ leal 1859775393(%ebx,%eax,1),%ebx
+ movl 28(%esp),%eax
+ addl %ebp,%ebx
+
+ movl %ecx,%ebp
+ xorl 36(%esp),%eax
+ xorl %edx,%ebp
+ xorl 60(%esp),%eax
+ xorl %edi,%ebp
+ xorl 16(%esp),%eax
+ roll $1,%eax
+ addl %ebp,%esi
+ rorl $2,%ecx
+ movl %ebx,%ebp
+ roll $5,%ebp
+ movl %eax,28(%esp)
+ leal 1859775393(%eax,%esi,1),%eax
+ movl 32(%esp),%esi
+ addl %ebp,%eax
+
+ movl %ebx,%ebp
+ xorl 40(%esp),%esi
+ xorl %ecx,%ebp
+ xorl (%esp),%esi
+ xorl %edx,%ebp
+ xorl 20(%esp),%esi
+ roll $1,%esi
+ addl %ebp,%edi
+ rorl $2,%ebx
+ movl %eax,%ebp
+ roll $5,%ebp
+ movl %esi,32(%esp)
+ leal 1859775393(%esi,%edi,1),%esi
+ movl 36(%esp),%edi
+ addl %ebp,%esi
+
+ movl %eax,%ebp
+ xorl 44(%esp),%edi
+ xorl %ebx,%ebp
+ xorl 4(%esp),%edi
+ xorl %ecx,%ebp
+ xorl 24(%esp),%edi
+ roll $1,%edi
+ addl %ebp,%edx
+ rorl $2,%eax
+ movl %esi,%ebp
+ roll $5,%ebp
+ movl %edi,36(%esp)
+ leal 1859775393(%edi,%edx,1),%edi
+ movl 40(%esp),%edx
+ addl %ebp,%edi
+
+ movl %esi,%ebp
+ xorl 48(%esp),%edx
+ xorl %eax,%ebp
+ xorl 8(%esp),%edx
+ xorl %ebx,%ebp
+ xorl 28(%esp),%edx
+ roll $1,%edx
+ addl %ebp,%ecx
+ rorl $2,%esi
+ movl %edi,%ebp
+ roll $5,%ebp
+ movl %edx,40(%esp)
+ leal 1859775393(%edx,%ecx,1),%edx
+ movl 44(%esp),%ecx
+ addl %ebp,%edx
+
+ movl %edi,%ebp
+ xorl 52(%esp),%ecx
+ xorl %esi,%ebp
+ xorl 12(%esp),%ecx
+ xorl %eax,%ebp
+ xorl 32(%esp),%ecx
+ roll $1,%ecx
+ addl %ebp,%ebx
+ rorl $2,%edi
+ movl %edx,%ebp
+ roll $5,%ebp
+ movl %ecx,44(%esp)
+ leal 1859775393(%ecx,%ebx,1),%ecx
+ movl 48(%esp),%ebx
+ addl %ebp,%ecx
+
+ movl %edx,%ebp
+ xorl 56(%esp),%ebx
+ xorl %edi,%ebp
+ xorl 16(%esp),%ebx
+ xorl %esi,%ebp
+ xorl 36(%esp),%ebx
+ roll $1,%ebx
+ addl %ebp,%eax
+ rorl $2,%edx
+ movl %ecx,%ebp
+ roll $5,%ebp
+ movl %ebx,48(%esp)
+ leal 1859775393(%ebx,%eax,1),%ebx
+ movl 52(%esp),%eax
+ addl %ebp,%ebx
+
+ movl %ecx,%ebp
+ xorl 60(%esp),%eax
+ xorl %edx,%ebp
+ xorl 20(%esp),%eax
+ xorl %edi,%ebp
+ xorl 40(%esp),%eax
+ roll $1,%eax
+ addl %ebp,%esi
+ rorl $2,%ecx
+ movl %ebx,%ebp
+ roll $5,%ebp
+ movl %eax,52(%esp)
+ leal 1859775393(%eax,%esi,1),%eax
+ movl 56(%esp),%esi
+ addl %ebp,%eax
+
+ movl %ebx,%ebp
+ xorl (%esp),%esi
+ xorl %ecx,%ebp
+ xorl 24(%esp),%esi
+ xorl %edx,%ebp
+ xorl 44(%esp),%esi
+ roll $1,%esi
+ addl %ebp,%edi
+ rorl $2,%ebx
+ movl %eax,%ebp
+ roll $5,%ebp
+ movl %esi,56(%esp)
+ leal 1859775393(%esi,%edi,1),%esi
+ movl 60(%esp),%edi
+ addl %ebp,%esi
+
+ movl %eax,%ebp
+ xorl 4(%esp),%edi
+ xorl %ebx,%ebp
+ xorl 28(%esp),%edi
+ xorl %ecx,%ebp
+ xorl 48(%esp),%edi
+ roll $1,%edi
+ addl %ebp,%edx
+ rorl $2,%eax
+ movl %esi,%ebp
+ roll $5,%ebp
+ movl %edi,60(%esp)
+ leal 1859775393(%edi,%edx,1),%edi
+ movl (%esp),%edx
+ addl %ebp,%edi
+
+ movl %esi,%ebp
+ xorl 8(%esp),%edx
+ xorl %eax,%ebp
+ xorl 32(%esp),%edx
+ xorl %ebx,%ebp
+ xorl 52(%esp),%edx
+ roll $1,%edx
+ addl %ebp,%ecx
+ rorl $2,%esi
+ movl %edi,%ebp
+ roll $5,%ebp
+ movl %edx,(%esp)
+ leal 1859775393(%edx,%ecx,1),%edx
+ movl 4(%esp),%ecx
+ addl %ebp,%edx
+
+ movl %edi,%ebp
+ xorl 12(%esp),%ecx
+ xorl %esi,%ebp
+ xorl 36(%esp),%ecx
+ xorl %eax,%ebp
+ xorl 56(%esp),%ecx
+ roll $1,%ecx
+ addl %ebp,%ebx
+ rorl $2,%edi
+ movl %edx,%ebp
+ roll $5,%ebp
+ movl %ecx,4(%esp)
+ leal 1859775393(%ecx,%ebx,1),%ecx
+ movl 8(%esp),%ebx
+ addl %ebp,%ecx
+
+ movl %edx,%ebp
+ xorl 16(%esp),%ebx
+ xorl %edi,%ebp
+ xorl 40(%esp),%ebx
+ xorl %esi,%ebp
+ xorl 60(%esp),%ebx
+ roll $1,%ebx
+ addl %ebp,%eax
+ rorl $2,%edx
+ movl %ecx,%ebp
+ roll $5,%ebp
+ movl %ebx,8(%esp)
+ leal 1859775393(%ebx,%eax,1),%ebx
+ movl 12(%esp),%eax
+ addl %ebp,%ebx
+
+ movl %ecx,%ebp
+ xorl 20(%esp),%eax
+ xorl %edx,%ebp
+ xorl 44(%esp),%eax
+ xorl %edi,%ebp
+ xorl (%esp),%eax
+ roll $1,%eax
+ addl %ebp,%esi
+ rorl $2,%ecx
+ movl %ebx,%ebp
+ roll $5,%ebp
+ movl %eax,12(%esp)
+ leal 1859775393(%eax,%esi,1),%eax
+ movl 16(%esp),%esi
+ addl %ebp,%eax
+
+ movl %ebx,%ebp
+ xorl 24(%esp),%esi
+ xorl %ecx,%ebp
+ xorl 48(%esp),%esi
+ xorl %edx,%ebp
+ xorl 4(%esp),%esi
+ roll $1,%esi
+ addl %ebp,%edi
+ rorl $2,%ebx
+ movl %eax,%ebp
+ roll $5,%ebp
+ movl %esi,16(%esp)
+ leal 1859775393(%esi,%edi,1),%esi
+ movl 20(%esp),%edi
+ addl %ebp,%esi
+
+ movl %eax,%ebp
+ xorl 28(%esp),%edi
+ xorl %ebx,%ebp
+ xorl 52(%esp),%edi
+ xorl %ecx,%ebp
+ xorl 8(%esp),%edi
+ roll $1,%edi
+ addl %ebp,%edx
+ rorl $2,%eax
+ movl %esi,%ebp
+ roll $5,%ebp
+ movl %edi,20(%esp)
+ leal 1859775393(%edi,%edx,1),%edi
+ movl 24(%esp),%edx
+ addl %ebp,%edi
+
+ movl %esi,%ebp
+ xorl 32(%esp),%edx
+ xorl %eax,%ebp
+ xorl 56(%esp),%edx
+ xorl %ebx,%ebp
+ xorl 12(%esp),%edx
+ roll $1,%edx
+ addl %ebp,%ecx
+ rorl $2,%esi
+ movl %edi,%ebp
+ roll $5,%ebp
+ movl %edx,24(%esp)
+ leal 1859775393(%edx,%ecx,1),%edx
+ movl 28(%esp),%ecx
+ addl %ebp,%edx
+
+ movl %edi,%ebp
+ xorl 36(%esp),%ecx
+ xorl %esi,%ebp
+ xorl 60(%esp),%ecx
+ xorl %eax,%ebp
+ xorl 16(%esp),%ecx
+ roll $1,%ecx
+ addl %ebp,%ebx
+ rorl $2,%edi
+ movl %edx,%ebp
+ roll $5,%ebp
+ movl %ecx,28(%esp)
+ leal 1859775393(%ecx,%ebx,1),%ecx
+ movl 32(%esp),%ebx
+ addl %ebp,%ecx
+
+ movl %edi,%ebp
+ xorl 40(%esp),%ebx
+ xorl %esi,%ebp
+ xorl (%esp),%ebx
+ andl %edx,%ebp
+ xorl 20(%esp),%ebx
+ roll $1,%ebx
+ addl %eax,%ebp
+ rorl $2,%edx
+ movl %ecx,%eax
+ roll $5,%eax
+ movl %ebx,32(%esp)
+ leal 2400959708(%ebx,%ebp,1),%ebx
+ movl %edi,%ebp
+ addl %eax,%ebx
+ andl %esi,%ebp
+ movl 36(%esp),%eax
+ addl %ebp,%ebx
+
+ movl %edx,%ebp
+ xorl 44(%esp),%eax
+ xorl %edi,%ebp
+ xorl 4(%esp),%eax
+ andl %ecx,%ebp
+ xorl 24(%esp),%eax
+ roll $1,%eax
+ addl %esi,%ebp
+ rorl $2,%ecx
+ movl %ebx,%esi
+ roll $5,%esi
+ movl %eax,36(%esp)
+ leal 2400959708(%eax,%ebp,1),%eax
+ movl %edx,%ebp
+ addl %esi,%eax
+ andl %edi,%ebp
+ movl 40(%esp),%esi
+ addl %ebp,%eax
+
+ movl %ecx,%ebp
+ xorl 48(%esp),%esi
+ xorl %edx,%ebp
+ xorl 8(%esp),%esi
+ andl %ebx,%ebp
+ xorl 28(%esp),%esi
+ roll $1,%esi
+ addl %edi,%ebp
+ rorl $2,%ebx
+ movl %eax,%edi
+ roll $5,%edi
+ movl %esi,40(%esp)
+ leal 2400959708(%esi,%ebp,1),%esi
+ movl %ecx,%ebp
+ addl %edi,%esi
+ andl %edx,%ebp
+ movl 44(%esp),%edi
+ addl %ebp,%esi
+
+ movl %ebx,%ebp
+ xorl 52(%esp),%edi
+ xorl %ecx,%ebp
+ xorl 12(%esp),%edi
+ andl %eax,%ebp
+ xorl 32(%esp),%edi
+ roll $1,%edi
+ addl %edx,%ebp
+ rorl $2,%eax
+ movl %esi,%edx
+ roll $5,%edx
+ movl %edi,44(%esp)
+ leal 2400959708(%edi,%ebp,1),%edi
+ movl %ebx,%ebp
+ addl %edx,%edi
+ andl %ecx,%ebp
+ movl 48(%esp),%edx
+ addl %ebp,%edi
+
+ movl %eax,%ebp
+ xorl 56(%esp),%edx
+ xorl %ebx,%ebp
+ xorl 16(%esp),%edx
+ andl %esi,%ebp
+ xorl 36(%esp),%edx
+ roll $1,%edx
+ addl %ecx,%ebp
+ rorl $2,%esi
+ movl %edi,%ecx
+ roll $5,%ecx
+ movl %edx,48(%esp)
+ leal 2400959708(%edx,%ebp,1),%edx
+ movl %eax,%ebp
+ addl %ecx,%edx
+ andl %ebx,%ebp
+ movl 52(%esp),%ecx
+ addl %ebp,%edx
+
+ movl %esi,%ebp
+ xorl 60(%esp),%ecx
+ xorl %eax,%ebp
+ xorl 20(%esp),%ecx
+ andl %edi,%ebp
+ xorl 40(%esp),%ecx
+ roll $1,%ecx
+ addl %ebx,%ebp
+ rorl $2,%edi
+ movl %edx,%ebx
+ roll $5,%ebx
+ movl %ecx,52(%esp)
+ leal 2400959708(%ecx,%ebp,1),%ecx
+ movl %esi,%ebp
+ addl %ebx,%ecx
+ andl %eax,%ebp
+ movl 56(%esp),%ebx
+ addl %ebp,%ecx
+
+ movl %edi,%ebp
+ xorl (%esp),%ebx
+ xorl %esi,%ebp
+ xorl 24(%esp),%ebx
+ andl %edx,%ebp
+ xorl 44(%esp),%ebx
+ roll $1,%ebx
+ addl %eax,%ebp
+ rorl $2,%edx
+ movl %ecx,%eax
+ roll $5,%eax
+ movl %ebx,56(%esp)
+ leal 2400959708(%ebx,%ebp,1),%ebx
+ movl %edi,%ebp
+ addl %eax,%ebx
+ andl %esi,%ebp
+ movl 60(%esp),%eax
+ addl %ebp,%ebx
+
+ movl %edx,%ebp
+ xorl 4(%esp),%eax
+ xorl %edi,%ebp
+ xorl 28(%esp),%eax
+ andl %ecx,%ebp
+ xorl 48(%esp),%eax
+ roll $1,%eax
+ addl %esi,%ebp
+ rorl $2,%ecx
+ movl %ebx,%esi
+ roll $5,%esi
+ movl %eax,60(%esp)
+ leal 2400959708(%eax,%ebp,1),%eax
+ movl %edx,%ebp
+ addl %esi,%eax
+ andl %edi,%ebp
+ movl (%esp),%esi
+ addl %ebp,%eax
+
+ movl %ecx,%ebp
+ xorl 8(%esp),%esi
+ xorl %edx,%ebp
+ xorl 32(%esp),%esi
+ andl %ebx,%ebp
+ xorl 52(%esp),%esi
+ roll $1,%esi
+ addl %edi,%ebp
+ rorl $2,%ebx
+ movl %eax,%edi
+ roll $5,%edi
+ movl %esi,(%esp)
+ leal 2400959708(%esi,%ebp,1),%esi
+ movl %ecx,%ebp
+ addl %edi,%esi
+ andl %edx,%ebp
+ movl 4(%esp),%edi
+ addl %ebp,%esi
+
+ movl %ebx,%ebp
+ xorl 12(%esp),%edi
+ xorl %ecx,%ebp
+ xorl 36(%esp),%edi
+ andl %eax,%ebp
+ xorl 56(%esp),%edi
+ roll $1,%edi
+ addl %edx,%ebp
+ rorl $2,%eax
+ movl %esi,%edx
+ roll $5,%edx
+ movl %edi,4(%esp)
+ leal 2400959708(%edi,%ebp,1),%edi
+ movl %ebx,%ebp
+ addl %edx,%edi
+ andl %ecx,%ebp
+ movl 8(%esp),%edx
+ addl %ebp,%edi
+
+ movl %eax,%ebp
+ xorl 16(%esp),%edx
+ xorl %ebx,%ebp
+ xorl 40(%esp),%edx
+ andl %esi,%ebp
+ xorl 60(%esp),%edx
+ roll $1,%edx
+ addl %ecx,%ebp
+ rorl $2,%esi
+ movl %edi,%ecx
+ roll $5,%ecx
+ movl %edx,8(%esp)
+ leal 2400959708(%edx,%ebp,1),%edx
+ movl %eax,%ebp
+ addl %ecx,%edx
+ andl %ebx,%ebp
+ movl 12(%esp),%ecx
+ addl %ebp,%edx
+
+ movl %esi,%ebp
+ xorl 20(%esp),%ecx
+ xorl %eax,%ebp
+ xorl 44(%esp),%ecx
+ andl %edi,%ebp
+ xorl (%esp),%ecx
+ roll $1,%ecx
+ addl %ebx,%ebp
+ rorl $2,%edi
+ movl %edx,%ebx
+ roll $5,%ebx
+ movl %ecx,12(%esp)
+ leal 2400959708(%ecx,%ebp,1),%ecx
+ movl %esi,%ebp
+ addl %ebx,%ecx
+ andl %eax,%ebp
+ movl 16(%esp),%ebx
+ addl %ebp,%ecx
+
+ movl %edi,%ebp
+ xorl 24(%esp),%ebx
+ xorl %esi,%ebp
+ xorl 48(%esp),%ebx
+ andl %edx,%ebp
+ xorl 4(%esp),%ebx
+ roll $1,%ebx
+ addl %eax,%ebp
+ rorl $2,%edx
+ movl %ecx,%eax
+ roll $5,%eax
+ movl %ebx,16(%esp)
+ leal 2400959708(%ebx,%ebp,1),%ebx
+ movl %edi,%ebp
+ addl %eax,%ebx
+ andl %esi,%ebp
+ movl 20(%esp),%eax
+ addl %ebp,%ebx
+
+ movl %edx,%ebp
+ xorl 28(%esp),%eax
+ xorl %edi,%ebp
+ xorl 52(%esp),%eax
+ andl %ecx,%ebp
+ xorl 8(%esp),%eax
+ roll $1,%eax
+ addl %esi,%ebp
+ rorl $2,%ecx
+ movl %ebx,%esi
+ roll $5,%esi
+ movl %eax,20(%esp)
+ leal 2400959708(%eax,%ebp,1),%eax
+ movl %edx,%ebp
+ addl %esi,%eax
+ andl %edi,%ebp
+ movl 24(%esp),%esi
+ addl %ebp,%eax
+
+ movl %ecx,%ebp
+ xorl 32(%esp),%esi
+ xorl %edx,%ebp
+ xorl 56(%esp),%esi
+ andl %ebx,%ebp
+ xorl 12(%esp),%esi
+ roll $1,%esi
+ addl %edi,%ebp
+ rorl $2,%ebx
+ movl %eax,%edi
+ roll $5,%edi
+ movl %esi,24(%esp)
+ leal 2400959708(%esi,%ebp,1),%esi
+ movl %ecx,%ebp
+ addl %edi,%esi
+ andl %edx,%ebp
+ movl 28(%esp),%edi
+ addl %ebp,%esi
+
+ movl %ebx,%ebp
+ xorl 36(%esp),%edi
+ xorl %ecx,%ebp
+ xorl 60(%esp),%edi
+ andl %eax,%ebp
+ xorl 16(%esp),%edi
+ roll $1,%edi
+ addl %edx,%ebp
+ rorl $2,%eax
+ movl %esi,%edx
+ roll $5,%edx
+ movl %edi,28(%esp)
+ leal 2400959708(%edi,%ebp,1),%edi
+ movl %ebx,%ebp
+ addl %edx,%edi
+ andl %ecx,%ebp
+ movl 32(%esp),%edx
+ addl %ebp,%edi
+
+ movl %eax,%ebp
+ xorl 40(%esp),%edx
+ xorl %ebx,%ebp
+ xorl (%esp),%edx
+ andl %esi,%ebp
+ xorl 20(%esp),%edx
+ roll $1,%edx
+ addl %ecx,%ebp
+ rorl $2,%esi
+ movl %edi,%ecx
+ roll $5,%ecx
+ movl %edx,32(%esp)
+ leal 2400959708(%edx,%ebp,1),%edx
+ movl %eax,%ebp
+ addl %ecx,%edx
+ andl %ebx,%ebp
+ movl 36(%esp),%ecx
+ addl %ebp,%edx
+
+ movl %esi,%ebp
+ xorl 44(%esp),%ecx
+ xorl %eax,%ebp
+ xorl 4(%esp),%ecx
+ andl %edi,%ebp
+ xorl 24(%esp),%ecx
+ roll $1,%ecx
+ addl %ebx,%ebp
+ rorl $2,%edi
+ movl %edx,%ebx
+ roll $5,%ebx
+ movl %ecx,36(%esp)
+ leal 2400959708(%ecx,%ebp,1),%ecx
+ movl %esi,%ebp
+ addl %ebx,%ecx
+ andl %eax,%ebp
+ movl 40(%esp),%ebx
+ addl %ebp,%ecx
+
+ movl %edi,%ebp
+ xorl 48(%esp),%ebx
+ xorl %esi,%ebp
+ xorl 8(%esp),%ebx
+ andl %edx,%ebp
+ xorl 28(%esp),%ebx
+ roll $1,%ebx
+ addl %eax,%ebp
+ rorl $2,%edx
+ movl %ecx,%eax
+ roll $5,%eax
+ movl %ebx,40(%esp)
+ leal 2400959708(%ebx,%ebp,1),%ebx
+ movl %edi,%ebp
+ addl %eax,%ebx
+ andl %esi,%ebp
+ movl 44(%esp),%eax
+ addl %ebp,%ebx
+
+ movl %edx,%ebp
+ xorl 52(%esp),%eax
+ xorl %edi,%ebp
+ xorl 12(%esp),%eax
+ andl %ecx,%ebp
+ xorl 32(%esp),%eax
+ roll $1,%eax
+ addl %esi,%ebp
+ rorl $2,%ecx
+ movl %ebx,%esi
+ roll $5,%esi
+ movl %eax,44(%esp)
+ leal 2400959708(%eax,%ebp,1),%eax
+ movl %edx,%ebp
+ addl %esi,%eax
+ andl %edi,%ebp
+ movl 48(%esp),%esi
+ addl %ebp,%eax
+
+ movl %ebx,%ebp
+ xorl 56(%esp),%esi
+ xorl %ecx,%ebp
+ xorl 16(%esp),%esi
+ xorl %edx,%ebp
+ xorl 36(%esp),%esi
+ roll $1,%esi
+ addl %ebp,%edi
+ rorl $2,%ebx
+ movl %eax,%ebp
+ roll $5,%ebp
+ movl %esi,48(%esp)
+ leal 3395469782(%esi,%edi,1),%esi
+ movl 52(%esp),%edi
+ addl %ebp,%esi
+
+ movl %eax,%ebp
+ xorl 60(%esp),%edi
+ xorl %ebx,%ebp
+ xorl 20(%esp),%edi
+ xorl %ecx,%ebp
+ xorl 40(%esp),%edi
+ roll $1,%edi
+ addl %ebp,%edx
+ rorl $2,%eax
+ movl %esi,%ebp
+ roll $5,%ebp
+ movl %edi,52(%esp)
+ leal 3395469782(%edi,%edx,1),%edi
+ movl 56(%esp),%edx
+ addl %ebp,%edi
+
+ movl %esi,%ebp
+ xorl (%esp),%edx
+ xorl %eax,%ebp
+ xorl 24(%esp),%edx
+ xorl %ebx,%ebp
+ xorl 44(%esp),%edx
+ roll $1,%edx
+ addl %ebp,%ecx
+ rorl $2,%esi
+ movl %edi,%ebp
+ roll $5,%ebp
+ movl %edx,56(%esp)
+ leal 3395469782(%edx,%ecx,1),%edx
+ movl 60(%esp),%ecx
+ addl %ebp,%edx
+
+ movl %edi,%ebp
+ xorl 4(%esp),%ecx
+ xorl %esi,%ebp
+ xorl 28(%esp),%ecx
+ xorl %eax,%ebp
+ xorl 48(%esp),%ecx
+ roll $1,%ecx
+ addl %ebp,%ebx
+ rorl $2,%edi
+ movl %edx,%ebp
+ roll $5,%ebp
+ movl %ecx,60(%esp)
+ leal 3395469782(%ecx,%ebx,1),%ecx
+ movl (%esp),%ebx
+ addl %ebp,%ecx
+
+ movl %edx,%ebp
+ xorl 8(%esp),%ebx
+ xorl %edi,%ebp
+ xorl 32(%esp),%ebx
+ xorl %esi,%ebp
+ xorl 52(%esp),%ebx
+ roll $1,%ebx
+ addl %ebp,%eax
+ rorl $2,%edx
+ movl %ecx,%ebp
+ roll $5,%ebp
+ movl %ebx,(%esp)
+ leal 3395469782(%ebx,%eax,1),%ebx
+ movl 4(%esp),%eax
+ addl %ebp,%ebx
+
+ movl %ecx,%ebp
+ xorl 12(%esp),%eax
+ xorl %edx,%ebp
+ xorl 36(%esp),%eax
+ xorl %edi,%ebp
+ xorl 56(%esp),%eax
+ roll $1,%eax
+ addl %ebp,%esi
+ rorl $2,%ecx
+ movl %ebx,%ebp
+ roll $5,%ebp
+ movl %eax,4(%esp)
+ leal 3395469782(%eax,%esi,1),%eax
+ movl 8(%esp),%esi
+ addl %ebp,%eax
+
+ movl %ebx,%ebp
+ xorl 16(%esp),%esi
+ xorl %ecx,%ebp
+ xorl 40(%esp),%esi
+ xorl %edx,%ebp
+ xorl 60(%esp),%esi
+ roll $1,%esi
+ addl %ebp,%edi
+ rorl $2,%ebx
+ movl %eax,%ebp
+ roll $5,%ebp
+ movl %esi,8(%esp)
+ leal 3395469782(%esi,%edi,1),%esi
+ movl 12(%esp),%edi
+ addl %ebp,%esi
+
+ movl %eax,%ebp
+ xorl 20(%esp),%edi
+ xorl %ebx,%ebp
+ xorl 44(%esp),%edi
+ xorl %ecx,%ebp
+ xorl (%esp),%edi
+ roll $1,%edi
+ addl %ebp,%edx
+ rorl $2,%eax
+ movl %esi,%ebp
+ roll $5,%ebp
+ movl %edi,12(%esp)
+ leal 3395469782(%edi,%edx,1),%edi
+ movl 16(%esp),%edx
+ addl %ebp,%edi
+
+ movl %esi,%ebp
+ xorl 24(%esp),%edx
+ xorl %eax,%ebp
+ xorl 48(%esp),%edx
+ xorl %ebx,%ebp
+ xorl 4(%esp),%edx
+ roll $1,%edx
+ addl %ebp,%ecx
+ rorl $2,%esi
+ movl %edi,%ebp
+ roll $5,%ebp
+ movl %edx,16(%esp)
+ leal 3395469782(%edx,%ecx,1),%edx
+ movl 20(%esp),%ecx
+ addl %ebp,%edx
+
+ movl %edi,%ebp
+ xorl 28(%esp),%ecx
+ xorl %esi,%ebp
+ xorl 52(%esp),%ecx
+ xorl %eax,%ebp
+ xorl 8(%esp),%ecx
+ roll $1,%ecx
+ addl %ebp,%ebx
+ rorl $2,%edi
+ movl %edx,%ebp
+ roll $5,%ebp
+ movl %ecx,20(%esp)
+ leal 3395469782(%ecx,%ebx,1),%ecx
+ movl 24(%esp),%ebx
+ addl %ebp,%ecx
+
+ movl %edx,%ebp
+ xorl 32(%esp),%ebx
+ xorl %edi,%ebp
+ xorl 56(%esp),%ebx
+ xorl %esi,%ebp
+ xorl 12(%esp),%ebx
+ roll $1,%ebx
+ addl %ebp,%eax
+ rorl $2,%edx
+ movl %ecx,%ebp
+ roll $5,%ebp
+ movl %ebx,24(%esp)
+ leal 3395469782(%ebx,%eax,1),%ebx
+ movl 28(%esp),%eax
+ addl %ebp,%ebx
+
+ movl %ecx,%ebp
+ xorl 36(%esp),%eax
+ xorl %edx,%ebp
+ xorl 60(%esp),%eax
+ xorl %edi,%ebp
+ xorl 16(%esp),%eax
+ roll $1,%eax
+ addl %ebp,%esi
+ rorl $2,%ecx
+ movl %ebx,%ebp
+ roll $5,%ebp
+ movl %eax,28(%esp)
+ leal 3395469782(%eax,%esi,1),%eax
+ movl 32(%esp),%esi
+ addl %ebp,%eax
+
+ movl %ebx,%ebp
+ xorl 40(%esp),%esi
+ xorl %ecx,%ebp
+ xorl (%esp),%esi
+ xorl %edx,%ebp
+ xorl 20(%esp),%esi
+ roll $1,%esi
+ addl %ebp,%edi
+ rorl $2,%ebx
+ movl %eax,%ebp
+ roll $5,%ebp
+ movl %esi,32(%esp)
+ leal 3395469782(%esi,%edi,1),%esi
+ movl 36(%esp),%edi
+ addl %ebp,%esi
+
+ movl %eax,%ebp
+ xorl 44(%esp),%edi
+ xorl %ebx,%ebp
+ xorl 4(%esp),%edi
+ xorl %ecx,%ebp
+ xorl 24(%esp),%edi
+ roll $1,%edi
+ addl %ebp,%edx
+ rorl $2,%eax
+ movl %esi,%ebp
+ roll $5,%ebp
+ movl %edi,36(%esp)
+ leal 3395469782(%edi,%edx,1),%edi
+ movl 40(%esp),%edx
+ addl %ebp,%edi
+
+ movl %esi,%ebp
+ xorl 48(%esp),%edx
+ xorl %eax,%ebp
+ xorl 8(%esp),%edx
+ xorl %ebx,%ebp
+ xorl 28(%esp),%edx
+ roll $1,%edx
+ addl %ebp,%ecx
+ rorl $2,%esi
+ movl %edi,%ebp
+ roll $5,%ebp
+ movl %edx,40(%esp)
+ leal 3395469782(%edx,%ecx,1),%edx
+ movl 44(%esp),%ecx
+ addl %ebp,%edx
+
+ movl %edi,%ebp
+ xorl 52(%esp),%ecx
+ xorl %esi,%ebp
+ xorl 12(%esp),%ecx
+ xorl %eax,%ebp
+ xorl 32(%esp),%ecx
+ roll $1,%ecx
+ addl %ebp,%ebx
+ rorl $2,%edi
+ movl %edx,%ebp
+ roll $5,%ebp
+ movl %ecx,44(%esp)
+ leal 3395469782(%ecx,%ebx,1),%ecx
+ movl 48(%esp),%ebx
+ addl %ebp,%ecx
+
+ movl %edx,%ebp
+ xorl 56(%esp),%ebx
+ xorl %edi,%ebp
+ xorl 16(%esp),%ebx
+ xorl %esi,%ebp
+ xorl 36(%esp),%ebx
+ roll $1,%ebx
+ addl %ebp,%eax
+ rorl $2,%edx
+ movl %ecx,%ebp
+ roll $5,%ebp
+ movl %ebx,48(%esp)
+ leal 3395469782(%ebx,%eax,1),%ebx
+ movl 52(%esp),%eax
+ addl %ebp,%ebx
+
+ movl %ecx,%ebp
+ xorl 60(%esp),%eax
+ xorl %edx,%ebp
+ xorl 20(%esp),%eax
+ xorl %edi,%ebp
+ xorl 40(%esp),%eax
+ roll $1,%eax
+ addl %ebp,%esi
+ rorl $2,%ecx
+ movl %ebx,%ebp
+ roll $5,%ebp
+ leal 3395469782(%eax,%esi,1),%eax
+ movl 56(%esp),%esi
+ addl %ebp,%eax
+
+ movl %ebx,%ebp
+ xorl (%esp),%esi
+ xorl %ecx,%ebp
+ xorl 24(%esp),%esi
+ xorl %edx,%ebp
+ xorl 44(%esp),%esi
+ roll $1,%esi
+ addl %ebp,%edi
+ rorl $2,%ebx
+ movl %eax,%ebp
+ roll $5,%ebp
+ leal 3395469782(%esi,%edi,1),%esi
+ movl 60(%esp),%edi
+ addl %ebp,%esi
+
+ movl %eax,%ebp
+ xorl 4(%esp),%edi
+ xorl %ebx,%ebp
+ xorl 28(%esp),%edi
+ xorl %ecx,%ebp
+ xorl 48(%esp),%edi
+ roll $1,%edi
+ addl %ebp,%edx
+ rorl $2,%eax
+ movl %esi,%ebp
+ roll $5,%ebp
+ leal 3395469782(%edi,%edx,1),%edi
+ addl %ebp,%edi
+ movl 96(%esp),%ebp
+ movl 100(%esp),%edx
+ addl (%ebp),%edi
+ addl 4(%ebp),%esi
+ addl 8(%ebp),%eax
+ addl 12(%ebp),%ebx
+ addl 16(%ebp),%ecx
+ movl %edi,(%ebp)
+ addl $64,%edx
+ movl %esi,4(%ebp)
+ cmpl 104(%esp),%edx
+ movl %eax,8(%ebp)
+ movl %ecx,%edi
+ movl %ebx,12(%ebp)
+ movl %edx,%esi
+ movl %ecx,16(%ebp)
+ jb .L000loop
+ addl $76,%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.size sha1_block_data_order,.-.L_sha1_block_data_order_begin
+.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115
+.byte 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82
+.byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
+.byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/elf/sha1-ssse3-x86_64.s b/lib/accelerated/x86/elf/sha1-ssse3-x86_64.s
new file mode 100644
index 0000000000..149edef796
--- /dev/null
+++ b/lib/accelerated/x86/elf/sha1-ssse3-x86_64.s
@@ -0,0 +1,2515 @@
+# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain copyright notices,
+# this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+# * Neither the name of the Andy Polyakov nor the names of its
+# copyright holder and contributors may be used to endorse or
+# promote products derived from this software without specific
+# prior written permission.
+#
+# ALTERNATIVELY, provided that this notice is retained in full, this
+# product may be distributed under the terms of the GNU General Public
+# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+# those given above.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *** This file is auto-generated ***
+#
+.text
+
+
+.globl sha1_block_data_order
+.type sha1_block_data_order,@function
+.align 16
+sha1_block_data_order:
+ movl _gnutls_x86_cpuid_s+0(%rip),%r9d
+ movl _gnutls_x86_cpuid_s+4(%rip),%r8d
+ movl _gnutls_x86_cpuid_s+8(%rip),%r10d
+ testl $512,%r8d
+ jz .Lialu
+ jmp _ssse3_shortcut
+
+.align 16
+.Lialu:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ movq %rsp,%r11
+ movq %rdi,%r8
+ subq $72,%rsp
+ movq %rsi,%r9
+ andq $-64,%rsp
+ movq %rdx,%r10
+ movq %r11,64(%rsp)
+.Lprologue:
+
+ movl 0(%r8),%esi
+ movl 4(%r8),%edi
+ movl 8(%r8),%r11d
+ movl 12(%r8),%r12d
+ movl 16(%r8),%r13d
+ jmp .Lloop
+
+.align 16
+.Lloop:
+ movl 0(%r9),%edx
+ bswapl %edx
+ movl %edx,0(%rsp)
+ movl %r11d,%eax
+ movl 4(%r9),%ebp
+ movl %esi,%ecx
+ xorl %r12d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r13,1),%r13d
+ andl %edi,%eax
+ movl %ebp,4(%rsp)
+ addl %ecx,%r13d
+ xorl %r12d,%eax
+ roll $30,%edi
+ addl %eax,%r13d
+ movl %edi,%eax
+ movl 8(%r9),%edx
+ movl %r13d,%ecx
+ xorl %r11d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r12,1),%r12d
+ andl %esi,%eax
+ movl %edx,8(%rsp)
+ addl %ecx,%r12d
+ xorl %r11d,%eax
+ roll $30,%esi
+ addl %eax,%r12d
+ movl %esi,%eax
+ movl 12(%r9),%ebp
+ movl %r12d,%ecx
+ xorl %edi,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r11,1),%r11d
+ andl %r13d,%eax
+ movl %ebp,12(%rsp)
+ addl %ecx,%r11d
+ xorl %edi,%eax
+ roll $30,%r13d
+ addl %eax,%r11d
+ movl %r13d,%eax
+ movl 16(%r9),%edx
+ movl %r11d,%ecx
+ xorl %esi,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%rdi,1),%edi
+ andl %r12d,%eax
+ movl %edx,16(%rsp)
+ addl %ecx,%edi
+ xorl %esi,%eax
+ roll $30,%r12d
+ addl %eax,%edi
+ movl %r12d,%eax
+ movl 20(%r9),%ebp
+ movl %edi,%ecx
+ xorl %r13d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%rsi,1),%esi
+ andl %r11d,%eax
+ movl %ebp,20(%rsp)
+ addl %ecx,%esi
+ xorl %r13d,%eax
+ roll $30,%r11d
+ addl %eax,%esi
+ movl %r11d,%eax
+ movl 24(%r9),%edx
+ movl %esi,%ecx
+ xorl %r12d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r13,1),%r13d
+ andl %edi,%eax
+ movl %edx,24(%rsp)
+ addl %ecx,%r13d
+ xorl %r12d,%eax
+ roll $30,%edi
+ addl %eax,%r13d
+ movl %edi,%eax
+ movl 28(%r9),%ebp
+ movl %r13d,%ecx
+ xorl %r11d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r12,1),%r12d
+ andl %esi,%eax
+ movl %ebp,28(%rsp)
+ addl %ecx,%r12d
+ xorl %r11d,%eax
+ roll $30,%esi
+ addl %eax,%r12d
+ movl %esi,%eax
+ movl 32(%r9),%edx
+ movl %r12d,%ecx
+ xorl %edi,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r11,1),%r11d
+ andl %r13d,%eax
+ movl %edx,32(%rsp)
+ addl %ecx,%r11d
+ xorl %edi,%eax
+ roll $30,%r13d
+ addl %eax,%r11d
+ movl %r13d,%eax
+ movl 36(%r9),%ebp
+ movl %r11d,%ecx
+ xorl %esi,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%rdi,1),%edi
+ andl %r12d,%eax
+ movl %ebp,36(%rsp)
+ addl %ecx,%edi
+ xorl %esi,%eax
+ roll $30,%r12d
+ addl %eax,%edi
+ movl %r12d,%eax
+ movl 40(%r9),%edx
+ movl %edi,%ecx
+ xorl %r13d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%rsi,1),%esi
+ andl %r11d,%eax
+ movl %edx,40(%rsp)
+ addl %ecx,%esi
+ xorl %r13d,%eax
+ roll $30,%r11d
+ addl %eax,%esi
+ movl %r11d,%eax
+ movl 44(%r9),%ebp
+ movl %esi,%ecx
+ xorl %r12d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r13,1),%r13d
+ andl %edi,%eax
+ movl %ebp,44(%rsp)
+ addl %ecx,%r13d
+ xorl %r12d,%eax
+ roll $30,%edi
+ addl %eax,%r13d
+ movl %edi,%eax
+ movl 48(%r9),%edx
+ movl %r13d,%ecx
+ xorl %r11d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r12,1),%r12d
+ andl %esi,%eax
+ movl %edx,48(%rsp)
+ addl %ecx,%r12d
+ xorl %r11d,%eax
+ roll $30,%esi
+ addl %eax,%r12d
+ movl %esi,%eax
+ movl 52(%r9),%ebp
+ movl %r12d,%ecx
+ xorl %edi,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r11,1),%r11d
+ andl %r13d,%eax
+ movl %ebp,52(%rsp)
+ addl %ecx,%r11d
+ xorl %edi,%eax
+ roll $30,%r13d
+ addl %eax,%r11d
+ movl %r13d,%eax
+ movl 56(%r9),%edx
+ movl %r11d,%ecx
+ xorl %esi,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%rdi,1),%edi
+ andl %r12d,%eax
+ movl %edx,56(%rsp)
+ addl %ecx,%edi
+ xorl %esi,%eax
+ roll $30,%r12d
+ addl %eax,%edi
+ movl %r12d,%eax
+ movl 60(%r9),%ebp
+ movl %edi,%ecx
+ xorl %r13d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%rsi,1),%esi
+ andl %r11d,%eax
+ movl %ebp,60(%rsp)
+ addl %ecx,%esi
+ xorl %r13d,%eax
+ roll $30,%r11d
+ addl %eax,%esi
+ movl 0(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 8(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ xorl 32(%rsp),%edx
+ andl %edi,%eax
+ leal 1518500249(%rbp,%r13,1),%r13d
+ xorl 52(%rsp),%edx
+ xorl %r12d,%eax
+ roll $1,%edx
+ addl %ecx,%r13d
+ roll $30,%edi
+ movl %edx,0(%rsp)
+ addl %eax,%r13d
+ movl 4(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ xorl 36(%rsp),%ebp
+ andl %esi,%eax
+ leal 1518500249(%rdx,%r12,1),%r12d
+ xorl 56(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $1,%ebp
+ addl %ecx,%r12d
+ roll $30,%esi
+ movl %ebp,4(%rsp)
+ addl %eax,%r12d
+ movl 8(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 16(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ xorl 40(%rsp),%edx
+ andl %r13d,%eax
+ leal 1518500249(%rbp,%r11,1),%r11d
+ xorl 60(%rsp),%edx
+ xorl %edi,%eax
+ roll $1,%edx
+ addl %ecx,%r11d
+ roll $30,%r13d
+ movl %edx,8(%rsp)
+ addl %eax,%r11d
+ movl 12(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ xorl 44(%rsp),%ebp
+ andl %r12d,%eax
+ leal 1518500249(%rdx,%rdi,1),%edi
+ xorl 0(%rsp),%ebp
+ xorl %esi,%eax
+ roll $1,%ebp
+ addl %ecx,%edi
+ roll $30,%r12d
+ movl %ebp,12(%rsp)
+ addl %eax,%edi
+ movl 16(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 24(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ xorl 48(%rsp),%edx
+ andl %r11d,%eax
+ leal 1518500249(%rbp,%rsi,1),%esi
+ xorl 4(%rsp),%edx
+ xorl %r13d,%eax
+ roll $1,%edx
+ addl %ecx,%esi
+ roll $30,%r11d
+ movl %edx,16(%rsp)
+ addl %eax,%esi
+ movl 20(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r13,1),%r13d
+ xorl 52(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 8(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,20(%rsp)
+ movl 24(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 32(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r12,1),%r12d
+ xorl 56(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 12(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,24(%rsp)
+ movl 28(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r11,1),%r11d
+ xorl 60(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 16(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,28(%rsp)
+ movl 32(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 40(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rdi,1),%edi
+ xorl 0(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 20(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,32(%rsp)
+ movl 36(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 44(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rsi,1),%esi
+ xorl 4(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 24(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,36(%rsp)
+ movl 40(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 48(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r13,1),%r13d
+ xorl 8(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 28(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,40(%rsp)
+ movl 44(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 52(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r12,1),%r12d
+ xorl 12(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 32(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl %ebp,44(%rsp)
+ movl 48(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 56(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r11,1),%r11d
+ xorl 16(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 36(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl %edx,48(%rsp)
+ movl 52(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rdi,1),%edi
+ xorl 20(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 40(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %ebp,52(%rsp)
+ movl 56(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 0(%rsp),%edx
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rsi,1),%esi
+ xorl 24(%rsp),%edx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 44(%rsp),%edx
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%edx
+ movl %edx,56(%rsp)
+ movl 60(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r13,1),%r13d
+ xorl 28(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 48(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,60(%rsp)
+ movl 0(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r12,1),%r12d
+ xorl 32(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 52(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,0(%rsp)
+ movl 4(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r11,1),%r11d
+ xorl 36(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 56(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,4(%rsp)
+ movl 8(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 16(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rdi,1),%edi
+ xorl 40(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 60(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,8(%rsp)
+ movl 12(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rsi,1),%esi
+ xorl 44(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 0(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,12(%rsp)
+ movl 16(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 24(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r13,1),%r13d
+ xorl 48(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 4(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,16(%rsp)
+ movl 20(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r12,1),%r12d
+ xorl 52(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 8(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl %ebp,20(%rsp)
+ movl 24(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 32(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r11,1),%r11d
+ xorl 56(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 12(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl %edx,24(%rsp)
+ movl 28(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rdi,1),%edi
+ xorl 60(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 16(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %ebp,28(%rsp)
+ movl 32(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 40(%rsp),%edx
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rsi,1),%esi
+ xorl 0(%rsp),%edx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 20(%rsp),%edx
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%edx
+ movl %edx,32(%rsp)
+ movl 36(%rsp),%ebp
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 44(%rsp),%ebp
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r12d,%ebx
+ leal -1894007588(%rdx,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 24(%rsp),%ebp
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %ebp,36(%rsp)
+ addl %ecx,%r13d
+ movl 40(%rsp),%edx
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 48(%rsp),%edx
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %r11d,%ebx
+ leal -1894007588(%rbp,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 28(%rsp),%edx
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%edx
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %edx,40(%rsp)
+ addl %ecx,%r12d
+ movl 44(%rsp),%ebp
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 52(%rsp),%ebp
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %edi,%ebx
+ leal -1894007588(%rdx,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 32(%rsp),%ebp
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%ebp
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %ebp,44(%rsp)
+ addl %ecx,%r11d
+ movl 48(%rsp),%edx
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 56(%rsp),%edx
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 16(%rsp),%edx
+ xorl %esi,%ebx
+ leal -1894007588(%rbp,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 36(%rsp),%edx
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%edx
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %edx,48(%rsp)
+ addl %ecx,%edi
+ movl 52(%rsp),%ebp
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 60(%rsp),%ebp
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %r13d,%ebx
+ leal -1894007588(%rdx,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 40(%rsp),%ebp
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%ebp
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %ebp,52(%rsp)
+ addl %ecx,%esi
+ movl 56(%rsp),%edx
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 0(%rsp),%edx
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 24(%rsp),%edx
+ xorl %r12d,%ebx
+ leal -1894007588(%rbp,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 44(%rsp),%edx
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%edx
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %edx,56(%rsp)
+ addl %ecx,%r13d
+ movl 60(%rsp),%ebp
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 4(%rsp),%ebp
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %r11d,%ebx
+ leal -1894007588(%rdx,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 48(%rsp),%ebp
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %ebp,60(%rsp)
+ addl %ecx,%r12d
+ movl 0(%rsp),%edx
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 8(%rsp),%edx
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 32(%rsp),%edx
+ xorl %edi,%ebx
+ leal -1894007588(%rbp,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 52(%rsp),%edx
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%edx
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %edx,0(%rsp)
+ addl %ecx,%r11d
+ movl 4(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 12(%rsp),%ebp
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %esi,%ebx
+ leal -1894007588(%rdx,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 56(%rsp),%ebp
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%ebp
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %ebp,4(%rsp)
+ addl %ecx,%edi
+ movl 8(%rsp),%edx
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 16(%rsp),%edx
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 40(%rsp),%edx
+ xorl %r13d,%ebx
+ leal -1894007588(%rbp,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 60(%rsp),%edx
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%edx
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %edx,8(%rsp)
+ addl %ecx,%esi
+ movl 12(%rsp),%ebp
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 20(%rsp),%ebp
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 44(%rsp),%ebp
+ xorl %r12d,%ebx
+ leal -1894007588(%rdx,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 0(%rsp),%ebp
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %ebp,12(%rsp)
+ addl %ecx,%r13d
+ movl 16(%rsp),%edx
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 24(%rsp),%edx
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 48(%rsp),%edx
+ xorl %r11d,%ebx
+ leal -1894007588(%rbp,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 4(%rsp),%edx
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%edx
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %edx,16(%rsp)
+ addl %ecx,%r12d
+ movl 20(%rsp),%ebp
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 28(%rsp),%ebp
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 52(%rsp),%ebp
+ xorl %edi,%ebx
+ leal -1894007588(%rdx,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 8(%rsp),%ebp
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%ebp
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %ebp,20(%rsp)
+ addl %ecx,%r11d
+ movl 24(%rsp),%edx
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 32(%rsp),%edx
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 56(%rsp),%edx
+ xorl %esi,%ebx
+ leal -1894007588(%rbp,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 12(%rsp),%edx
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%edx
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %edx,24(%rsp)
+ addl %ecx,%edi
+ movl 28(%rsp),%ebp
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 36(%rsp),%ebp
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %r13d,%ebx
+ leal -1894007588(%rdx,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 16(%rsp),%ebp
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%ebp
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %ebp,28(%rsp)
+ addl %ecx,%esi
+ movl 32(%rsp),%edx
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 40(%rsp),%edx
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 0(%rsp),%edx
+ xorl %r12d,%ebx
+ leal -1894007588(%rbp,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 20(%rsp),%edx
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%edx
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %edx,32(%rsp)
+ addl %ecx,%r13d
+ movl 36(%rsp),%ebp
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 44(%rsp),%ebp
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r11d,%ebx
+ leal -1894007588(%rdx,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 24(%rsp),%ebp
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %ebp,36(%rsp)
+ addl %ecx,%r12d
+ movl 40(%rsp),%edx
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 48(%rsp),%edx
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %edi,%ebx
+ leal -1894007588(%rbp,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 28(%rsp),%edx
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%edx
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %edx,40(%rsp)
+ addl %ecx,%r11d
+ movl 44(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 52(%rsp),%ebp
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %esi,%ebx
+ leal -1894007588(%rdx,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 32(%rsp),%ebp
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%ebp
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %ebp,44(%rsp)
+ addl %ecx,%edi
+ movl 48(%rsp),%edx
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 56(%rsp),%edx
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 16(%rsp),%edx
+ xorl %r13d,%ebx
+ leal -1894007588(%rbp,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 36(%rsp),%edx
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%edx
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %edx,48(%rsp)
+ addl %ecx,%esi
+ movl 52(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r13,1),%r13d
+ xorl 20(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 40(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,52(%rsp)
+ movl 56(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 0(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r12,1),%r12d
+ xorl 24(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 44(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,56(%rsp)
+ movl 60(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r11,1),%r11d
+ xorl 28(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 48(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,60(%rsp)
+ movl 0(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%rdi,1),%edi
+ xorl 32(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 52(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,0(%rsp)
+ movl 4(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rsi,1),%esi
+ xorl 36(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 56(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,4(%rsp)
+ movl 8(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 16(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r13,1),%r13d
+ xorl 40(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 60(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,8(%rsp)
+ movl 12(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r12,1),%r12d
+ xorl 44(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 0(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl %ebp,12(%rsp)
+ movl 16(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 24(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r11,1),%r11d
+ xorl 48(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 4(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl %edx,16(%rsp)
+ movl 20(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rdi,1),%edi
+ xorl 52(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 8(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %ebp,20(%rsp)
+ movl 24(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 32(%rsp),%edx
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%rsi,1),%esi
+ xorl 56(%rsp),%edx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 12(%rsp),%edx
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%edx
+ movl %edx,24(%rsp)
+ movl 28(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r13,1),%r13d
+ xorl 60(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 16(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,28(%rsp)
+ movl 32(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 40(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r12,1),%r12d
+ xorl 0(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 20(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,32(%rsp)
+ movl 36(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 44(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r11,1),%r11d
+ xorl 4(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 24(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,36(%rsp)
+ movl 40(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 48(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%rdi,1),%edi
+ xorl 8(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 28(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,40(%rsp)
+ movl 44(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 52(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rsi,1),%esi
+ xorl 12(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 32(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,44(%rsp)
+ movl 48(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 56(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r13,1),%r13d
+ xorl 16(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 36(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,48(%rsp)
+ movl 52(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r12,1),%r12d
+ xorl 20(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 40(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl 56(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 0(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r11,1),%r11d
+ xorl 24(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 44(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl 60(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rdi,1),%edi
+ xorl 28(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 48(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl %r11d,%eax
+ leal -899497514(%rbp,%rsi,1),%esi
+ roll $5,%ecx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ roll $30,%r11d
+ addl %eax,%esi
+ addl 0(%r8),%esi
+ addl 4(%r8),%edi
+ addl 8(%r8),%r11d
+ addl 12(%r8),%r12d
+ addl 16(%r8),%r13d
+ movl %esi,0(%r8)
+ movl %edi,4(%r8)
+ movl %r11d,8(%r8)
+ movl %r12d,12(%r8)
+ movl %r13d,16(%r8)
+
+ subq $1,%r10
+ leaq 64(%r9),%r9
+ jnz .Lloop
+
+ movq 64(%rsp),%rsi
+ movq (%rsi),%r13
+ movq 8(%rsi),%r12
+ movq 16(%rsi),%rbp
+ movq 24(%rsi),%rbx
+ leaq 32(%rsi),%rsp
+.Lepilogue:
+ .byte 0xf3,0xc3
+.size sha1_block_data_order,.-sha1_block_data_order
+.type sha1_block_data_order_ssse3,@function
+.align 16
+sha1_block_data_order_ssse3:
+_ssse3_shortcut:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ leaq -64(%rsp),%rsp
+ movq %rdi,%r8
+ movq %rsi,%r9
+ movq %rdx,%r10
+
+ shlq $6,%r10
+ addq %r9,%r10
+ leaq K_XX_XX+64(%rip),%r11
+
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movl %ebx,%esi
+ movl 16(%r8),%ebp
+ movl %ecx,%edi
+ xorl %edx,%edi
+ andl %edi,%esi
+
+ movdqa 64(%r11),%xmm6
+ movdqa -64(%r11),%xmm9
+ movdqu 0(%r9),%xmm0
+ movdqu 16(%r9),%xmm1
+ movdqu 32(%r9),%xmm2
+ movdqu 48(%r9),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r9
+.byte 102,15,56,0,206
+.byte 102,15,56,0,214
+.byte 102,15,56,0,222
+ paddd %xmm9,%xmm0
+ paddd %xmm9,%xmm1
+ paddd %xmm9,%xmm2
+ movdqa %xmm0,0(%rsp)
+ psubd %xmm9,%xmm0
+ movdqa %xmm1,16(%rsp)
+ psubd %xmm9,%xmm1
+ movdqa %xmm2,32(%rsp)
+ psubd %xmm9,%xmm2
+ jmp .Loop_ssse3
+.align 16
+.Loop_ssse3:
+ movdqa %xmm1,%xmm4
+ rorl $2,%ebx
+ xorl %edx,%esi
+ movdqa %xmm3,%xmm8
+.byte 102,15,58,15,224,8
+ movl %eax,%edi
+ addl 0(%rsp),%ebp
+ paddd %xmm3,%xmm9
+ xorl %ecx,%ebx
+ roll $5,%eax
+ psrldq $4,%xmm8
+ addl %esi,%ebp
+ andl %ebx,%edi
+ pxor %xmm0,%xmm4
+ xorl %ecx,%ebx
+ addl %eax,%ebp
+ pxor %xmm2,%xmm8
+ rorl $7,%eax
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ addl 4(%rsp),%edx
+ pxor %xmm8,%xmm4
+ xorl %ebx,%eax
+ roll $5,%ebp
+ movdqa %xmm9,48(%rsp)
+ addl %edi,%edx
+ andl %eax,%esi
+ movdqa %xmm4,%xmm10
+ movdqa %xmm4,%xmm8
+ xorl %ebx,%eax
+ addl %ebp,%edx
+ rorl $7,%ebp
+ xorl %ebx,%esi
+ pslldq $12,%xmm10
+ paddd %xmm4,%xmm4
+ movl %edx,%edi
+ addl 8(%rsp),%ecx
+ xorl %eax,%ebp
+ roll $5,%edx
+ psrld $31,%xmm8
+ addl %esi,%ecx
+ andl %ebp,%edi
+ movdqa %xmm10,%xmm9
+ xorl %eax,%ebp
+ addl %edx,%ecx
+ psrld $30,%xmm10
+ por %xmm8,%xmm4
+ rorl $7,%edx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ addl 12(%rsp),%ebx
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm4
+ xorl %ebp,%edx
+ roll $5,%ecx
+ movdqa -64(%r11),%xmm10
+ addl %edi,%ebx
+ andl %edx,%esi
+ pxor %xmm9,%xmm4
+ xorl %ebp,%edx
+ addl %ecx,%ebx
+ movdqa %xmm2,%xmm5
+ rorl $7,%ecx
+ xorl %ebp,%esi
+ movdqa %xmm4,%xmm9
+.byte 102,15,58,15,233,8
+ movl %ebx,%edi
+ addl 16(%rsp),%eax
+ paddd %xmm4,%xmm10
+ xorl %edx,%ecx
+ roll $5,%ebx
+ psrldq $4,%xmm9
+ addl %esi,%eax
+ andl %ecx,%edi
+ pxor %xmm1,%xmm5
+ xorl %edx,%ecx
+ addl %ebx,%eax
+ pxor %xmm3,%xmm9
+ rorl $7,%ebx
+ xorl %edx,%edi
+ movl %eax,%esi
+ addl 20(%rsp),%ebp
+ pxor %xmm9,%xmm5
+ xorl %ecx,%ebx
+ roll $5,%eax
+ movdqa %xmm10,0(%rsp)
+ addl %edi,%ebp
+ andl %ebx,%esi
+ movdqa %xmm5,%xmm8
+ movdqa %xmm5,%xmm9
+ xorl %ecx,%ebx
+ addl %eax,%ebp
+ rorl $7,%eax
+ xorl %ecx,%esi
+ pslldq $12,%xmm8
+ paddd %xmm5,%xmm5
+ movl %ebp,%edi
+ addl 24(%rsp),%edx
+ xorl %ebx,%eax
+ roll $5,%ebp
+ psrld $31,%xmm9
+ addl %esi,%edx
+ andl %eax,%edi
+ movdqa %xmm8,%xmm10
+ xorl %ebx,%eax
+ addl %ebp,%edx
+ psrld $30,%xmm8
+ por %xmm9,%xmm5
+ rorl $7,%ebp
+ xorl %ebx,%edi
+ movl %edx,%esi
+ addl 28(%rsp),%ecx
+ pslld $2,%xmm10
+ pxor %xmm8,%xmm5
+ xorl %eax,%ebp
+ roll $5,%edx
+ movdqa -32(%r11),%xmm8
+ addl %edi,%ecx
+ andl %ebp,%esi
+ pxor %xmm10,%xmm5
+ xorl %eax,%ebp
+ addl %edx,%ecx
+ movdqa %xmm3,%xmm6
+ rorl $7,%edx
+ xorl %eax,%esi
+ movdqa %xmm5,%xmm10
+.byte 102,15,58,15,242,8
+ movl %ecx,%edi
+ addl 32(%rsp),%ebx
+ paddd %xmm5,%xmm8
+ xorl %ebp,%edx
+ roll $5,%ecx
+ psrldq $4,%xmm10
+ addl %esi,%ebx
+ andl %edx,%edi
+ pxor %xmm2,%xmm6
+ xorl %ebp,%edx
+ addl %ecx,%ebx
+ pxor %xmm4,%xmm10
+ rorl $7,%ecx
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ addl 36(%rsp),%eax
+ pxor %xmm10,%xmm6
+ xorl %edx,%ecx
+ roll $5,%ebx
+ movdqa %xmm8,16(%rsp)
+ addl %edi,%eax
+ andl %ecx,%esi
+ movdqa %xmm6,%xmm9
+ movdqa %xmm6,%xmm10
+ xorl %edx,%ecx
+ addl %ebx,%eax
+ rorl $7,%ebx
+ xorl %edx,%esi
+ pslldq $12,%xmm9
+ paddd %xmm6,%xmm6
+ movl %eax,%edi
+ addl 40(%rsp),%ebp
+ xorl %ecx,%ebx
+ roll $5,%eax
+ psrld $31,%xmm10
+ addl %esi,%ebp
+ andl %ebx,%edi
+ movdqa %xmm9,%xmm8
+ xorl %ecx,%ebx
+ addl %eax,%ebp
+ psrld $30,%xmm9
+ por %xmm10,%xmm6
+ rorl $7,%eax
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ addl 44(%rsp),%edx
+ pslld $2,%xmm8
+ pxor %xmm9,%xmm6
+ xorl %ebx,%eax
+ roll $5,%ebp
+ movdqa -32(%r11),%xmm9
+ addl %edi,%edx
+ andl %eax,%esi
+ pxor %xmm8,%xmm6
+ xorl %ebx,%eax
+ addl %ebp,%edx
+ movdqa %xmm4,%xmm7
+ rorl $7,%ebp
+ xorl %ebx,%esi
+ movdqa %xmm6,%xmm8
+.byte 102,15,58,15,251,8
+ movl %edx,%edi
+ addl 48(%rsp),%ecx
+ paddd %xmm6,%xmm9
+ xorl %eax,%ebp
+ roll $5,%edx
+ psrldq $4,%xmm8
+ addl %esi,%ecx
+ andl %ebp,%edi
+ pxor %xmm3,%xmm7
+ xorl %eax,%ebp
+ addl %edx,%ecx
+ pxor %xmm5,%xmm8
+ rorl $7,%edx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ addl 52(%rsp),%ebx
+ pxor %xmm8,%xmm7
+ xorl %ebp,%edx
+ roll $5,%ecx
+ movdqa %xmm9,32(%rsp)
+ addl %edi,%ebx
+ andl %edx,%esi
+ movdqa %xmm7,%xmm10
+ movdqa %xmm7,%xmm8
+ xorl %ebp,%edx
+ addl %ecx,%ebx
+ rorl $7,%ecx
+ xorl %ebp,%esi
+ pslldq $12,%xmm10
+ paddd %xmm7,%xmm7
+ movl %ebx,%edi
+ addl 56(%rsp),%eax
+ xorl %edx,%ecx
+ roll $5,%ebx
+ psrld $31,%xmm8
+ addl %esi,%eax
+ andl %ecx,%edi
+ movdqa %xmm10,%xmm9
+ xorl %edx,%ecx
+ addl %ebx,%eax
+ psrld $30,%xmm10
+ por %xmm8,%xmm7
+ rorl $7,%ebx
+ xorl %edx,%edi
+ movl %eax,%esi
+ addl 60(%rsp),%ebp
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm7
+ xorl %ecx,%ebx
+ roll $5,%eax
+ movdqa -32(%r11),%xmm10
+ addl %edi,%ebp
+ andl %ebx,%esi
+ pxor %xmm9,%xmm7
+ xorl %ecx,%ebx
+ addl %eax,%ebp
+ movdqa %xmm7,%xmm9
+ rorl $7,%eax
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,206,8
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ addl 0(%rsp),%edx
+ pxor %xmm1,%xmm0
+ xorl %ebx,%eax
+ roll $5,%ebp
+ movdqa %xmm10,%xmm8
+ paddd %xmm7,%xmm10
+ addl %esi,%edx
+ andl %eax,%edi
+ pxor %xmm9,%xmm0
+ xorl %ebx,%eax
+ addl %ebp,%edx
+ rorl $7,%ebp
+ xorl %ebx,%edi
+ movdqa %xmm0,%xmm9
+ movdqa %xmm10,48(%rsp)
+ movl %edx,%esi
+ addl 4(%rsp),%ecx
+ xorl %eax,%ebp
+ roll $5,%edx
+ pslld $2,%xmm0
+ addl %edi,%ecx
+ andl %ebp,%esi
+ psrld $30,%xmm9
+ xorl %eax,%ebp
+ addl %edx,%ecx
+ rorl $7,%edx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ addl 8(%rsp),%ebx
+ por %xmm9,%xmm0
+ xorl %ebp,%edx
+ roll $5,%ecx
+ movdqa %xmm0,%xmm10
+ addl %esi,%ebx
+ andl %edx,%edi
+ xorl %ebp,%edx
+ addl %ecx,%ebx
+ addl 12(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %edx,%esi
+ rorl $7,%ecx
+ addl %ebx,%eax
+ addl 16(%rsp),%ebp
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,215,8
+ xorl %ecx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm2,%xmm1
+ addl %esi,%ebp
+ xorl %ecx,%edi
+ movdqa %xmm8,%xmm9
+ paddd %xmm0,%xmm8
+ rorl $7,%ebx
+ addl %eax,%ebp
+ pxor %xmm10,%xmm1
+ addl 20(%rsp),%edx
+ xorl %ebx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm1,%xmm10
+ movdqa %xmm8,0(%rsp)
+ addl %edi,%edx
+ xorl %ebx,%esi
+ rorl $7,%eax
+ addl %ebp,%edx
+ pslld $2,%xmm1
+ addl 24(%rsp),%ecx
+ xorl %eax,%esi
+ psrld $30,%xmm10
+ movl %edx,%edi
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %eax,%edi
+ rorl $7,%ebp
+ addl %edx,%ecx
+ por %xmm10,%xmm1
+ addl 28(%rsp),%ebx
+ xorl %ebp,%edi
+ movdqa %xmm1,%xmm8
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %ebp,%esi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ addl 32(%rsp),%eax
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,192,8
+ xorl %edx,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ pxor %xmm3,%xmm2
+ addl %esi,%eax
+ xorl %edx,%edi
+ movdqa 0(%r11),%xmm10
+ paddd %xmm1,%xmm9
+ rorl $7,%ecx
+ addl %ebx,%eax
+ pxor %xmm8,%xmm2
+ addl 36(%rsp),%ebp
+ xorl %ecx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ movdqa %xmm2,%xmm8
+ movdqa %xmm9,16(%rsp)
+ addl %edi,%ebp
+ xorl %ecx,%esi
+ rorl $7,%ebx
+ addl %eax,%ebp
+ pslld $2,%xmm2
+ addl 40(%rsp),%edx
+ xorl %ebx,%esi
+ psrld $30,%xmm8
+ movl %ebp,%edi
+ roll $5,%ebp
+ addl %esi,%edx
+ xorl %ebx,%edi
+ rorl $7,%eax
+ addl %ebp,%edx
+ por %xmm8,%xmm2
+ addl 44(%rsp),%ecx
+ xorl %eax,%edi
+ movdqa %xmm2,%xmm9
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %eax,%esi
+ rorl $7,%ebp
+ addl %edx,%ecx
+ addl 48(%rsp),%ebx
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,201,8
+ xorl %ebp,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ pxor %xmm4,%xmm3
+ addl %esi,%ebx
+ xorl %ebp,%edi
+ movdqa %xmm10,%xmm8
+ paddd %xmm2,%xmm10
+ rorl $7,%edx
+ addl %ecx,%ebx
+ pxor %xmm9,%xmm3
+ addl 52(%rsp),%eax
+ xorl %edx,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ movdqa %xmm3,%xmm9
+ movdqa %xmm10,32(%rsp)
+ addl %edi,%eax
+ xorl %edx,%esi
+ rorl $7,%ecx
+ addl %ebx,%eax
+ pslld $2,%xmm3
+ addl 56(%rsp),%ebp
+ xorl %ecx,%esi
+ psrld $30,%xmm9
+ movl %eax,%edi
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %ecx,%edi
+ rorl $7,%ebx
+ addl %eax,%ebp
+ por %xmm9,%xmm3
+ addl 60(%rsp),%edx
+ xorl %ebx,%edi
+ movdqa %xmm3,%xmm10
+ movl %ebp,%esi
+ roll $5,%ebp
+ addl %edi,%edx
+ xorl %ebx,%esi
+ rorl $7,%eax
+ addl %ebp,%edx
+ addl 0(%rsp),%ecx
+ pxor %xmm0,%xmm4
+.byte 102,68,15,58,15,210,8
+ xorl %eax,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ pxor %xmm5,%xmm4
+ addl %esi,%ecx
+ xorl %eax,%edi
+ movdqa %xmm8,%xmm9
+ paddd %xmm3,%xmm8
+ rorl $7,%ebp
+ addl %edx,%ecx
+ pxor %xmm10,%xmm4
+ addl 4(%rsp),%ebx
+ xorl %ebp,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm8,48(%rsp)
+ addl %edi,%ebx
+ xorl %ebp,%esi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ pslld $2,%xmm4
+ addl 8(%rsp),%eax
+ xorl %edx,%esi
+ psrld $30,%xmm10
+ movl %ebx,%edi
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %edx,%edi
+ rorl $7,%ecx
+ addl %ebx,%eax
+ por %xmm10,%xmm4
+ addl 12(%rsp),%ebp
+ xorl %ecx,%edi
+ movdqa %xmm4,%xmm8
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %ecx,%esi
+ rorl $7,%ebx
+ addl %eax,%ebp
+ addl 16(%rsp),%edx
+ pxor %xmm1,%xmm5
+.byte 102,68,15,58,15,195,8
+ xorl %ebx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm6,%xmm5
+ addl %esi,%edx
+ xorl %ebx,%edi
+ movdqa %xmm9,%xmm10
+ paddd %xmm4,%xmm9
+ rorl $7,%eax
+ addl %ebp,%edx
+ pxor %xmm8,%xmm5
+ addl 20(%rsp),%ecx
+ xorl %eax,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ movdqa %xmm5,%xmm8
+ movdqa %xmm9,0(%rsp)
+ addl %edi,%ecx
+ xorl %eax,%esi
+ rorl $7,%ebp
+ addl %edx,%ecx
+ pslld $2,%xmm5
+ addl 24(%rsp),%ebx
+ xorl %ebp,%esi
+ psrld $30,%xmm8
+ movl %ecx,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %ebp,%edi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ por %xmm8,%xmm5
+ addl 28(%rsp),%eax
+ movdqa %xmm5,%xmm9
+ rorl $7,%ecx
+ movl %ebx,%esi
+ xorl %edx,%edi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ecx,%esi
+ xorl %edx,%ecx
+ addl %ebx,%eax
+ addl 32(%rsp),%ebp
+ pxor %xmm2,%xmm6
+.byte 102,68,15,58,15,204,8
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ rorl $7,%ebx
+ pxor %xmm7,%xmm6
+ movl %eax,%edi
+ xorl %ecx,%esi
+ movdqa %xmm10,%xmm8
+ paddd %xmm5,%xmm10
+ roll $5,%eax
+ addl %esi,%ebp
+ pxor %xmm9,%xmm6
+ xorl %ebx,%edi
+ xorl %ecx,%ebx
+ addl %eax,%ebp
+ addl 36(%rsp),%edx
+ movdqa %xmm6,%xmm9
+ movdqa %xmm10,16(%rsp)
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ rorl $7,%eax
+ movl %ebp,%esi
+ pslld $2,%xmm6
+ xorl %ebx,%edi
+ roll $5,%ebp
+ psrld $30,%xmm9
+ addl %edi,%edx
+ xorl %eax,%esi
+ xorl %ebx,%eax
+ addl %ebp,%edx
+ addl 40(%rsp),%ecx
+ andl %eax,%esi
+ por %xmm9,%xmm6
+ xorl %ebx,%eax
+ rorl $7,%ebp
+ movdqa %xmm6,%xmm10
+ movl %edx,%edi
+ xorl %eax,%esi
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebp,%edi
+ xorl %eax,%ebp
+ addl %edx,%ecx
+ addl 44(%rsp),%ebx
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ rorl $7,%edx
+ movl %ecx,%esi
+ xorl %ebp,%edi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %edx,%esi
+ xorl %ebp,%edx
+ addl %ecx,%ebx
+ addl 48(%rsp),%eax
+ pxor %xmm3,%xmm7
+.byte 102,68,15,58,15,213,8
+ andl %edx,%esi
+ xorl %ebp,%edx
+ rorl $7,%ecx
+ pxor %xmm0,%xmm7
+ movl %ebx,%edi
+ xorl %edx,%esi
+ movdqa 32(%r11),%xmm9
+ paddd %xmm6,%xmm8
+ roll $5,%ebx
+ addl %esi,%eax
+ pxor %xmm10,%xmm7
+ xorl %ecx,%edi
+ xorl %edx,%ecx
+ addl %ebx,%eax
+ addl 52(%rsp),%ebp
+ movdqa %xmm7,%xmm10
+ movdqa %xmm8,32(%rsp)
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ rorl $7,%ebx
+ movl %eax,%esi
+ pslld $2,%xmm7
+ xorl %ecx,%edi
+ roll $5,%eax
+ psrld $30,%xmm10
+ addl %edi,%ebp
+ xorl %ebx,%esi
+ xorl %ecx,%ebx
+ addl %eax,%ebp
+ addl 56(%rsp),%edx
+ andl %ebx,%esi
+ por %xmm10,%xmm7
+ xorl %ecx,%ebx
+ rorl $7,%eax
+ movdqa %xmm7,%xmm8
+ movl %ebp,%edi
+ xorl %ebx,%esi
+ roll $5,%ebp
+ addl %esi,%edx
+ xorl %eax,%edi
+ xorl %ebx,%eax
+ addl %ebp,%edx
+ addl 60(%rsp),%ecx
+ andl %eax,%edi
+ xorl %ebx,%eax
+ rorl $7,%ebp
+ movl %edx,%esi
+ xorl %eax,%edi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebp,%esi
+ xorl %eax,%ebp
+ addl %edx,%ecx
+ addl 0(%rsp),%ebx
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,198,8
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ rorl $7,%edx
+ pxor %xmm1,%xmm0
+ movl %ecx,%edi
+ xorl %ebp,%esi
+ movdqa %xmm9,%xmm10
+ paddd %xmm7,%xmm9
+ roll $5,%ecx
+ addl %esi,%ebx
+ pxor %xmm8,%xmm0
+ xorl %edx,%edi
+ xorl %ebp,%edx
+ addl %ecx,%ebx
+ addl 4(%rsp),%eax
+ movdqa %xmm0,%xmm8
+ movdqa %xmm9,48(%rsp)
+ andl %edx,%edi
+ xorl %ebp,%edx
+ rorl $7,%ecx
+ movl %ebx,%esi
+ pslld $2,%xmm0
+ xorl %edx,%edi
+ roll $5,%ebx
+ psrld $30,%xmm8
+ addl %edi,%eax
+ xorl %ecx,%esi
+ xorl %edx,%ecx
+ addl %ebx,%eax
+ addl 8(%rsp),%ebp
+ andl %ecx,%esi
+ por %xmm8,%xmm0
+ xorl %edx,%ecx
+ rorl $7,%ebx
+ movdqa %xmm0,%xmm9
+ movl %eax,%edi
+ xorl %ecx,%esi
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %ebx,%edi
+ xorl %ecx,%ebx
+ addl %eax,%ebp
+ addl 12(%rsp),%edx
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ rorl $7,%eax
+ movl %ebp,%esi
+ xorl %ebx,%edi
+ roll $5,%ebp
+ addl %edi,%edx
+ xorl %eax,%esi
+ xorl %ebx,%eax
+ addl %ebp,%edx
+ addl 16(%rsp),%ecx
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,207,8
+ andl %eax,%esi
+ xorl %ebx,%eax
+ rorl $7,%ebp
+ pxor %xmm2,%xmm1
+ movl %edx,%edi
+ xorl %eax,%esi
+ movdqa %xmm10,%xmm8
+ paddd %xmm0,%xmm10
+ roll $5,%edx
+ addl %esi,%ecx
+ pxor %xmm9,%xmm1
+ xorl %ebp,%edi
+ xorl %eax,%ebp
+ addl %edx,%ecx
+ addl 20(%rsp),%ebx
+ movdqa %xmm1,%xmm9
+ movdqa %xmm10,0(%rsp)
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ rorl $7,%edx
+ movl %ecx,%esi
+ pslld $2,%xmm1
+ xorl %ebp,%edi
+ roll $5,%ecx
+ psrld $30,%xmm9
+ addl %edi,%ebx
+ xorl %edx,%esi
+ xorl %ebp,%edx
+ addl %ecx,%ebx
+ addl 24(%rsp),%eax
+ andl %edx,%esi
+ por %xmm9,%xmm1
+ xorl %ebp,%edx
+ rorl $7,%ecx
+ movdqa %xmm1,%xmm10
+ movl %ebx,%edi
+ xorl %edx,%esi
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ecx,%edi
+ xorl %edx,%ecx
+ addl %ebx,%eax
+ addl 28(%rsp),%ebp
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ rorl $7,%ebx
+ movl %eax,%esi
+ xorl %ecx,%edi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %ebx,%esi
+ xorl %ecx,%ebx
+ addl %eax,%ebp
+ addl 32(%rsp),%edx
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,208,8
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ rorl $7,%eax
+ pxor %xmm3,%xmm2
+ movl %ebp,%edi
+ xorl %ebx,%esi
+ movdqa %xmm8,%xmm9
+ paddd %xmm1,%xmm8
+ roll $5,%ebp
+ addl %esi,%edx
+ pxor %xmm10,%xmm2
+ xorl %eax,%edi
+ xorl %ebx,%eax
+ addl %ebp,%edx
+ addl 36(%rsp),%ecx
+ movdqa %xmm2,%xmm10
+ movdqa %xmm8,16(%rsp)
+ andl %eax,%edi
+ xorl %ebx,%eax
+ rorl $7,%ebp
+ movl %edx,%esi
+ pslld $2,%xmm2
+ xorl %eax,%edi
+ roll $5,%edx
+ psrld $30,%xmm10
+ addl %edi,%ecx
+ xorl %ebp,%esi
+ xorl %eax,%ebp
+ addl %edx,%ecx
+ addl 40(%rsp),%ebx
+ andl %ebp,%esi
+ por %xmm10,%xmm2
+ xorl %eax,%ebp
+ rorl $7,%edx
+ movdqa %xmm2,%xmm8
+ movl %ecx,%edi
+ xorl %ebp,%esi
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %edx,%edi
+ xorl %ebp,%edx
+ addl %ecx,%ebx
+ addl 44(%rsp),%eax
+ andl %edx,%edi
+ xorl %ebp,%edx
+ rorl $7,%ecx
+ movl %ebx,%esi
+ xorl %edx,%edi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %edx,%esi
+ addl %ebx,%eax
+ addl 48(%rsp),%ebp
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,193,8
+ xorl %ecx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm4,%xmm3
+ addl %esi,%ebp
+ xorl %ecx,%edi
+ movdqa %xmm9,%xmm10
+ paddd %xmm2,%xmm9
+ rorl $7,%ebx
+ addl %eax,%ebp
+ pxor %xmm8,%xmm3
+ addl 52(%rsp),%edx
+ xorl %ebx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm3,%xmm8
+ movdqa %xmm9,32(%rsp)
+ addl %edi,%edx
+ xorl %ebx,%esi
+ rorl $7,%eax
+ addl %ebp,%edx
+ pslld $2,%xmm3
+ addl 56(%rsp),%ecx
+ xorl %eax,%esi
+ psrld $30,%xmm8
+ movl %edx,%edi
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %eax,%edi
+ rorl $7,%ebp
+ addl %edx,%ecx
+ por %xmm8,%xmm3
+ addl 60(%rsp),%ebx
+ xorl %ebp,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %ebp,%esi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ addl 0(%rsp),%eax
+ paddd %xmm3,%xmm10
+ xorl %edx,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ addl %esi,%eax
+ movdqa %xmm10,48(%rsp)
+ xorl %edx,%edi
+ rorl $7,%ecx
+ addl %ebx,%eax
+ addl 4(%rsp),%ebp
+ xorl %ecx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %ecx,%esi
+ rorl $7,%ebx
+ addl %eax,%ebp
+ addl 8(%rsp),%edx
+ xorl %ebx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ addl %esi,%edx
+ xorl %ebx,%edi
+ rorl $7,%eax
+ addl %ebp,%edx
+ addl 12(%rsp),%ecx
+ xorl %eax,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %eax,%esi
+ rorl $7,%ebp
+ addl %edx,%ecx
+ cmpq %r10,%r9
+ je .Ldone_ssse3
+ movdqa 64(%r11),%xmm6
+ movdqa -64(%r11),%xmm9
+ movdqu 0(%r9),%xmm0
+ movdqu 16(%r9),%xmm1
+ movdqu 32(%r9),%xmm2
+ movdqu 48(%r9),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r9
+ addl 16(%rsp),%ebx
+ xorl %ebp,%esi
+.byte 102,15,56,0,206
+ movl %ecx,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
+ paddd %xmm9,%xmm0
+ xorl %ebp,%edi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ addl 20(%rsp),%eax
+ movdqa %xmm0,0(%rsp)
+ xorl %edx,%edi
+ movl %ebx,%esi
+ psubd %xmm9,%xmm0
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %edx,%esi
+ rorl $7,%ecx
+ addl %ebx,%eax
+ addl 24(%rsp),%ebp
+ xorl %ecx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %ecx,%edi
+ rorl $7,%ebx
+ addl %eax,%ebp
+ addl 28(%rsp),%edx
+ xorl %ebx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ addl %edi,%edx
+ xorl %ebx,%esi
+ rorl $7,%eax
+ addl %ebp,%edx
+ addl 32(%rsp),%ecx
+ xorl %eax,%esi
+.byte 102,15,56,0,214
+ movl %edx,%edi
+ roll $5,%edx
+ addl %esi,%ecx
+ paddd %xmm9,%xmm1
+ xorl %eax,%edi
+ rorl $7,%ebp
+ addl %edx,%ecx
+ addl 36(%rsp),%ebx
+ movdqa %xmm1,16(%rsp)
+ xorl %ebp,%edi
+ movl %ecx,%esi
+ psubd %xmm9,%xmm1
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %ebp,%esi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ addl 40(%rsp),%eax
+ xorl %edx,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %edx,%edi
+ rorl $7,%ecx
+ addl %ebx,%eax
+ addl 44(%rsp),%ebp
+ xorl %ecx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %ecx,%esi
+ rorl $7,%ebx
+ addl %eax,%ebp
+ addl 48(%rsp),%edx
+ xorl %ebx,%esi
+.byte 102,15,56,0,222
+ movl %ebp,%edi
+ roll $5,%ebp
+ addl %esi,%edx
+ paddd %xmm9,%xmm2
+ xorl %ebx,%edi
+ rorl $7,%eax
+ addl %ebp,%edx
+ addl 52(%rsp),%ecx
+ movdqa %xmm2,32(%rsp)
+ xorl %eax,%edi
+ movl %edx,%esi
+ psubd %xmm9,%xmm2
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %eax,%esi
+ rorl $7,%ebp
+ addl %edx,%ecx
+ addl 56(%rsp),%ebx
+ xorl %ebp,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %ebp,%edi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ addl 60(%rsp),%eax
+ xorl %edx,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ rorl $7,%ecx
+ addl %ebx,%eax
+ addl 0(%r8),%eax
+ addl 4(%r8),%esi
+ addl 8(%r8),%ecx
+ addl 12(%r8),%edx
+ movl %eax,0(%r8)
+ addl 16(%r8),%ebp
+ movl %esi,4(%r8)
+ movl %esi,%ebx
+ movl %ecx,8(%r8)
+ movl %ecx,%edi
+ movl %edx,12(%r8)
+ xorl %edx,%edi
+ movl %ebp,16(%r8)
+ andl %edi,%esi
+ jmp .Loop_ssse3
+
+.align 16
+.Ldone_ssse3:
+ addl 16(%rsp),%ebx
+ xorl %ebp,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %ebp,%edi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ addl 20(%rsp),%eax
+ xorl %edx,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %edx,%esi
+ rorl $7,%ecx
+ addl %ebx,%eax
+ addl 24(%rsp),%ebp
+ xorl %ecx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %ecx,%edi
+ rorl $7,%ebx
+ addl %eax,%ebp
+ addl 28(%rsp),%edx
+ xorl %ebx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ addl %edi,%edx
+ xorl %ebx,%esi
+ rorl $7,%eax
+ addl %ebp,%edx
+ addl 32(%rsp),%ecx
+ xorl %eax,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %eax,%edi
+ rorl $7,%ebp
+ addl %edx,%ecx
+ addl 36(%rsp),%ebx
+ xorl %ebp,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %ebp,%esi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ addl 40(%rsp),%eax
+ xorl %edx,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %edx,%edi
+ rorl $7,%ecx
+ addl %ebx,%eax
+ addl 44(%rsp),%ebp
+ xorl %ecx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %ecx,%esi
+ rorl $7,%ebx
+ addl %eax,%ebp
+ addl 48(%rsp),%edx
+ xorl %ebx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ addl %esi,%edx
+ xorl %ebx,%edi
+ rorl $7,%eax
+ addl %ebp,%edx
+ addl 52(%rsp),%ecx
+ xorl %eax,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %eax,%esi
+ rorl $7,%ebp
+ addl %edx,%ecx
+ addl 56(%rsp),%ebx
+ xorl %ebp,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %ebp,%edi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ addl 60(%rsp),%eax
+ xorl %edx,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ rorl $7,%ecx
+ addl %ebx,%eax
+ addl 0(%r8),%eax
+ addl 4(%r8),%esi
+ addl 8(%r8),%ecx
+ movl %eax,0(%r8)
+ addl 12(%r8),%edx
+ movl %esi,4(%r8)
+ addl 16(%r8),%ebp
+ movl %ecx,8(%r8)
+ movl %edx,12(%r8)
+ movl %ebp,16(%r8)
+ leaq 64(%rsp),%rsi
+ movq 0(%rsi),%r12
+ movq 8(%rsi),%rbp
+ movq 16(%rsi),%rbx
+ leaq 24(%rsi),%rsp
+.Lepilogue_ssse3:
+ .byte 0xf3,0xc3
+.size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3
+.align 64
+K_XX_XX:
+.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999
+.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999
+.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 64
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/elf/sha256-avx-x86_64.s b/lib/accelerated/x86/elf/sha256-avx-x86_64.s
new file mode 100644
index 0000000000..bb9236ea84
--- /dev/null
+++ b/lib/accelerated/x86/elf/sha256-avx-x86_64.s
@@ -0,0 +1,2614 @@
+# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain copyright notices,
+# this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+# * Neither the name of the Andy Polyakov nor the names of its
+# copyright holder and contributors may be used to endorse or
+# promote products derived from this software without specific
+# prior written permission.
+#
+# ALTERNATIVELY, provided that this notice is retained in full, this
+# product may be distributed under the terms of the GNU General Public
+# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+# those given above.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *** This file is auto-generated ***
+#
+.text
+
+
+
+.globl sha256_multi_block
+.type sha256_multi_block,@function
+.align 32
+sha256_multi_block:
+ movq %rsp,%rax
+ pushq %rbx
+ pushq %rbp
+ subq $288,%rsp
+ andq $-256,%rsp
+ movq %rax,272(%rsp)
+ leaq K256+128(%rip),%rbp
+ leaq 256(%rsp),%rbx
+ leaq 128(%rdi),%rdi
+
+.Loop_grande:
+ movl %edx,280(%rsp)
+ xorl %edx,%edx
+ movq 0(%rsi),%r8
+ movl 8(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,0(%rbx)
+ cmovleq %rbp,%r8
+ movq 16(%rsi),%r9
+ movl 24(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,4(%rbx)
+ cmovleq %rbp,%r9
+ movq 32(%rsi),%r10
+ movl 40(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,8(%rbx)
+ cmovleq %rbp,%r10
+ movq 48(%rsi),%r11
+ movl 56(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,12(%rbx)
+ cmovleq %rbp,%r11
+ testl %edx,%edx
+ jz .Ldone
+
+ movdqu 0-128(%rdi),%xmm8
+ leaq 128(%rsp),%rax
+ movdqu 32-128(%rdi),%xmm9
+ movdqu 64-128(%rdi),%xmm10
+ movdqu 96-128(%rdi),%xmm11
+ movdqu 128-128(%rdi),%xmm12
+ movdqu 160-128(%rdi),%xmm13
+ movdqu 192-128(%rdi),%xmm14
+ movdqu 224-128(%rdi),%xmm15
+ movdqu .Lpbswap(%rip),%xmm6
+ jmp .Loop
+
+.align 32
+.Loop:
+ movdqa %xmm10,%xmm4
+ pxor %xmm9,%xmm4
+ movd 0(%r8),%xmm5
+ movd 0(%r9),%xmm0
+ movd 0(%r10),%xmm1
+ movd 0(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm12,%xmm7
+ movdqa %xmm12,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm12,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,0-128(%rax)
+ paddd %xmm15,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -128(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm12,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm12,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm14,%xmm0
+ pand %xmm13,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm8,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm8,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm9,%xmm3
+ movdqa %xmm8,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm8,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm9,%xmm15
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm15
+ paddd %xmm5,%xmm11
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm15
+ paddd %xmm7,%xmm15
+ movd 4(%r8),%xmm5
+ movd 4(%r9),%xmm0
+ movd 4(%r10),%xmm1
+ movd 4(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm11,%xmm7
+ movdqa %xmm11,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm11,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,16-128(%rax)
+ paddd %xmm14,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -96(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm11,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm11,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm13,%xmm0
+ pand %xmm12,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm15,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm15,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm4,%xmm0
+ movdqa %xmm8,%xmm4
+ movdqa %xmm15,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm15,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm8,%xmm14
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm14
+ paddd %xmm5,%xmm10
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm14
+ paddd %xmm7,%xmm14
+ movd 8(%r8),%xmm5
+ movd 8(%r9),%xmm0
+ movd 8(%r10),%xmm1
+ movd 8(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm10,%xmm7
+ movdqa %xmm10,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm10,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,32-128(%rax)
+ paddd %xmm13,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -64(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm10,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm10,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm12,%xmm0
+ pand %xmm11,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm14,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm14,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm15,%xmm3
+ movdqa %xmm14,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm14,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm15,%xmm13
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm13
+ paddd %xmm5,%xmm9
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm13
+ paddd %xmm7,%xmm13
+ movd 12(%r8),%xmm5
+ movd 12(%r9),%xmm0
+ movd 12(%r10),%xmm1
+ movd 12(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm9,%xmm7
+ movdqa %xmm9,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm9,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,48-128(%rax)
+ paddd %xmm12,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -32(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm9,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm9,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm11,%xmm0
+ pand %xmm10,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm13,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm13,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm4,%xmm0
+ movdqa %xmm14,%xmm4
+ movdqa %xmm13,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm13,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm14,%xmm12
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm12
+ paddd %xmm5,%xmm8
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm12
+ paddd %xmm7,%xmm12
+ movd 16(%r8),%xmm5
+ movd 16(%r9),%xmm0
+ movd 16(%r10),%xmm1
+ movd 16(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm8,%xmm7
+ movdqa %xmm8,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm8,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,64-128(%rax)
+ paddd %xmm11,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 0(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm8,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm8,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm10,%xmm0
+ pand %xmm9,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm12,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm12,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm13,%xmm3
+ movdqa %xmm12,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm12,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm13,%xmm11
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm11
+ paddd %xmm5,%xmm15
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm11
+ paddd %xmm7,%xmm11
+ movd 20(%r8),%xmm5
+ movd 20(%r9),%xmm0
+ movd 20(%r10),%xmm1
+ movd 20(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm15,%xmm7
+ movdqa %xmm15,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm15,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,80-128(%rax)
+ paddd %xmm10,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 32(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm15,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm15,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm9,%xmm0
+ pand %xmm8,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm11,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm11,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm4,%xmm0
+ movdqa %xmm12,%xmm4
+ movdqa %xmm11,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm11,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm12,%xmm10
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm10
+ paddd %xmm5,%xmm14
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm10
+ paddd %xmm7,%xmm10
+ movd 24(%r8),%xmm5
+ movd 24(%r9),%xmm0
+ movd 24(%r10),%xmm1
+ movd 24(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm14,%xmm7
+ movdqa %xmm14,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm14,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,96-128(%rax)
+ paddd %xmm9,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 64(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm14,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm14,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm8,%xmm0
+ pand %xmm15,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm10,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm10,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm11,%xmm3
+ movdqa %xmm10,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm10,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm11,%xmm9
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm9
+ paddd %xmm5,%xmm13
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm9
+ paddd %xmm7,%xmm9
+ movd 28(%r8),%xmm5
+ movd 28(%r9),%xmm0
+ movd 28(%r10),%xmm1
+ movd 28(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm13,%xmm7
+ movdqa %xmm13,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm13,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,112-128(%rax)
+ paddd %xmm8,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 96(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm13,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm13,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm15,%xmm0
+ pand %xmm14,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm9,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm9,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm4,%xmm0
+ movdqa %xmm10,%xmm4
+ movdqa %xmm9,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm9,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm10,%xmm8
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm8
+ paddd %xmm5,%xmm12
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm8
+ paddd %xmm7,%xmm8
+ leaq 256(%rbp),%rbp
+ movd 32(%r8),%xmm5
+ movd 32(%r9),%xmm0
+ movd 32(%r10),%xmm1
+ movd 32(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm12,%xmm7
+ movdqa %xmm12,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm12,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,128-128(%rax)
+ paddd %xmm15,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -128(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm12,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm12,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm14,%xmm0
+ pand %xmm13,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm8,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm8,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm9,%xmm3
+ movdqa %xmm8,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm8,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm9,%xmm15
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm15
+ paddd %xmm5,%xmm11
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm15
+ paddd %xmm7,%xmm15
+ movd 36(%r8),%xmm5
+ movd 36(%r9),%xmm0
+ movd 36(%r10),%xmm1
+ movd 36(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm11,%xmm7
+ movdqa %xmm11,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm11,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,144-128(%rax)
+ paddd %xmm14,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -96(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm11,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm11,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm13,%xmm0
+ pand %xmm12,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm15,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm15,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm4,%xmm0
+ movdqa %xmm8,%xmm4
+ movdqa %xmm15,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm15,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm8,%xmm14
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm14
+ paddd %xmm5,%xmm10
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm14
+ paddd %xmm7,%xmm14
+ movd 40(%r8),%xmm5
+ movd 40(%r9),%xmm0
+ movd 40(%r10),%xmm1
+ movd 40(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm10,%xmm7
+ movdqa %xmm10,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm10,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,160-128(%rax)
+ paddd %xmm13,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -64(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm10,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm10,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm12,%xmm0
+ pand %xmm11,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm14,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm14,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm15,%xmm3
+ movdqa %xmm14,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm14,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm15,%xmm13
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm13
+ paddd %xmm5,%xmm9
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm13
+ paddd %xmm7,%xmm13
+ movd 44(%r8),%xmm5
+ movd 44(%r9),%xmm0
+ movd 44(%r10),%xmm1
+ movd 44(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm9,%xmm7
+ movdqa %xmm9,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm9,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,176-128(%rax)
+ paddd %xmm12,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -32(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm9,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm9,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm11,%xmm0
+ pand %xmm10,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm13,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm13,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm4,%xmm0
+ movdqa %xmm14,%xmm4
+ movdqa %xmm13,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm13,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm14,%xmm12
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm12
+ paddd %xmm5,%xmm8
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm12
+ paddd %xmm7,%xmm12
+ movd 48(%r8),%xmm5
+ movd 48(%r9),%xmm0
+ movd 48(%r10),%xmm1
+ movd 48(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm8,%xmm7
+ movdqa %xmm8,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm8,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,192-128(%rax)
+ paddd %xmm11,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 0(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm8,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm8,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm10,%xmm0
+ pand %xmm9,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm12,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm12,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm13,%xmm3
+ movdqa %xmm12,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm12,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm13,%xmm11
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm11
+ paddd %xmm5,%xmm15
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm11
+ paddd %xmm7,%xmm11
+ movd 52(%r8),%xmm5
+ movd 52(%r9),%xmm0
+ movd 52(%r10),%xmm1
+ movd 52(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm15,%xmm7
+ movdqa %xmm15,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm15,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,208-128(%rax)
+ paddd %xmm10,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 32(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm15,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm15,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm9,%xmm0
+ pand %xmm8,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm11,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm11,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm4,%xmm0
+ movdqa %xmm12,%xmm4
+ movdqa %xmm11,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm11,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm12,%xmm10
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm10
+ paddd %xmm5,%xmm14
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm10
+ paddd %xmm7,%xmm10
+ movd 56(%r8),%xmm5
+ movd 56(%r9),%xmm0
+ movd 56(%r10),%xmm1
+ movd 56(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm14,%xmm7
+ movdqa %xmm14,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm14,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,224-128(%rax)
+ paddd %xmm9,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 64(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm14,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm14,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm8,%xmm0
+ pand %xmm15,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm10,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm10,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm11,%xmm3
+ movdqa %xmm10,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm10,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm11,%xmm9
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm9
+ paddd %xmm5,%xmm13
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm9
+ paddd %xmm7,%xmm9
+ movd 60(%r8),%xmm5
+ leaq 64(%r8),%r8
+ movd 60(%r9),%xmm0
+ leaq 64(%r9),%r9
+ movd 60(%r10),%xmm1
+ leaq 64(%r10),%r10
+ movd 60(%r11),%xmm2
+ leaq 64(%r11),%r11
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm13,%xmm7
+ movdqa %xmm13,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm13,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,240-128(%rax)
+ paddd %xmm8,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 96(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm13,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm13,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm15,%xmm0
+ pand %xmm14,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm9,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm9,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm4,%xmm0
+ movdqa %xmm10,%xmm4
+ movdqa %xmm9,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm9,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm10,%xmm8
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm8
+ paddd %xmm5,%xmm12
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm8
+ paddd %xmm7,%xmm8
+ leaq 256(%rbp),%rbp
+ movdqu 0-128(%rax),%xmm5
+ movl $3,%ecx
+ jmp .Loop_16_xx
+.align 32
+.Loop_16_xx:
+ movdqa 16-128(%rax),%xmm6
+ paddd 144-128(%rax),%xmm5
+
+ movdqa %xmm6,%xmm7
+ movdqa %xmm6,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm6,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 224-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm3,%xmm1
+
+ psrld $17,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ psrld $19-17,%xmm3
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm3,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm5
+ movdqa %xmm12,%xmm7
+ movdqa %xmm12,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm12,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,0-128(%rax)
+ paddd %xmm15,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -128(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm12,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm12,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm14,%xmm0
+ pand %xmm13,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm8,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm8,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm9,%xmm3
+ movdqa %xmm8,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm8,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm9,%xmm15
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm15
+ paddd %xmm5,%xmm11
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm15
+ paddd %xmm7,%xmm15
+ movdqa 32-128(%rax),%xmm5
+ paddd 160-128(%rax),%xmm6
+
+ movdqa %xmm5,%xmm7
+ movdqa %xmm5,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm5,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 240-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm4,%xmm1
+
+ psrld $17,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ psrld $19-17,%xmm4
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm6
+ movdqa %xmm11,%xmm7
+ movdqa %xmm11,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm11,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm6,16-128(%rax)
+ paddd %xmm14,%xmm6
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -96(%rbp),%xmm6
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm11,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm11,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm13,%xmm0
+ pand %xmm12,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm15,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm15,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ movdqa %xmm8,%xmm4
+ movdqa %xmm15,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm15,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm6
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm8,%xmm14
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm14
+ paddd %xmm6,%xmm10
+ pxor %xmm2,%xmm7
+
+ paddd %xmm6,%xmm14
+ paddd %xmm7,%xmm14
+ movdqa 48-128(%rax),%xmm6
+ paddd 176-128(%rax),%xmm5
+
+ movdqa %xmm6,%xmm7
+ movdqa %xmm6,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm6,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 0-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm3,%xmm1
+
+ psrld $17,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ psrld $19-17,%xmm3
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm3,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm5
+ movdqa %xmm10,%xmm7
+ movdqa %xmm10,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm10,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,32-128(%rax)
+ paddd %xmm13,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -64(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm10,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm10,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm12,%xmm0
+ pand %xmm11,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm14,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm14,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm15,%xmm3
+ movdqa %xmm14,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm14,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm15,%xmm13
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm13
+ paddd %xmm5,%xmm9
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm13
+ paddd %xmm7,%xmm13
+ movdqa 64-128(%rax),%xmm5
+ paddd 192-128(%rax),%xmm6
+
+ movdqa %xmm5,%xmm7
+ movdqa %xmm5,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm5,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 16-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm4,%xmm1
+
+ psrld $17,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ psrld $19-17,%xmm4
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm6
+ movdqa %xmm9,%xmm7
+ movdqa %xmm9,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm9,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm6,48-128(%rax)
+ paddd %xmm12,%xmm6
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -32(%rbp),%xmm6
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm9,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm9,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm11,%xmm0
+ pand %xmm10,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm13,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm13,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ movdqa %xmm14,%xmm4
+ movdqa %xmm13,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm13,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm6
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm14,%xmm12
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm12
+ paddd %xmm6,%xmm8
+ pxor %xmm2,%xmm7
+
+ paddd %xmm6,%xmm12
+ paddd %xmm7,%xmm12
+ movdqa 80-128(%rax),%xmm6
+ paddd 208-128(%rax),%xmm5
+
+ movdqa %xmm6,%xmm7
+ movdqa %xmm6,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm6,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 32-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm3,%xmm1
+
+ psrld $17,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ psrld $19-17,%xmm3
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm3,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm5
+ movdqa %xmm8,%xmm7
+ movdqa %xmm8,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm8,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,64-128(%rax)
+ paddd %xmm11,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 0(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm8,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm8,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm10,%xmm0
+ pand %xmm9,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm12,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm12,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm13,%xmm3
+ movdqa %xmm12,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm12,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm13,%xmm11
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm11
+ paddd %xmm5,%xmm15
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm11
+ paddd %xmm7,%xmm11
+ movdqa 96-128(%rax),%xmm5
+ paddd 224-128(%rax),%xmm6
+
+ movdqa %xmm5,%xmm7
+ movdqa %xmm5,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm5,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 48-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm4,%xmm1
+
+ psrld $17,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ psrld $19-17,%xmm4
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm6
+ movdqa %xmm15,%xmm7
+ movdqa %xmm15,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm15,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm6,80-128(%rax)
+ paddd %xmm10,%xmm6
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 32(%rbp),%xmm6
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm15,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm15,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm9,%xmm0
+ pand %xmm8,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm11,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm11,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ movdqa %xmm12,%xmm4
+ movdqa %xmm11,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm11,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm6
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm12,%xmm10
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm10
+ paddd %xmm6,%xmm14
+ pxor %xmm2,%xmm7
+
+ paddd %xmm6,%xmm10
+ paddd %xmm7,%xmm10
+ movdqa 112-128(%rax),%xmm6
+ paddd 240-128(%rax),%xmm5
+
+ movdqa %xmm6,%xmm7
+ movdqa %xmm6,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm6,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 64-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm3,%xmm1
+
+ psrld $17,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ psrld $19-17,%xmm3
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm3,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm5
+ movdqa %xmm14,%xmm7
+ movdqa %xmm14,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm14,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,96-128(%rax)
+ paddd %xmm9,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 64(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm14,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm14,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm8,%xmm0
+ pand %xmm15,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm10,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm10,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm11,%xmm3
+ movdqa %xmm10,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm10,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm11,%xmm9
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm9
+ paddd %xmm5,%xmm13
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm9
+ paddd %xmm7,%xmm9
+ movdqa 128-128(%rax),%xmm5
+ paddd 0-128(%rax),%xmm6
+
+ movdqa %xmm5,%xmm7
+ movdqa %xmm5,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm5,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 80-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm4,%xmm1
+
+ psrld $17,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ psrld $19-17,%xmm4
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm6
+ movdqa %xmm13,%xmm7
+ movdqa %xmm13,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm13,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm6,112-128(%rax)
+ paddd %xmm8,%xmm6
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 96(%rbp),%xmm6
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm13,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm13,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm15,%xmm0
+ pand %xmm14,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm9,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm9,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ movdqa %xmm10,%xmm4
+ movdqa %xmm9,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm9,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm6
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm10,%xmm8
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm8
+ paddd %xmm6,%xmm12
+ pxor %xmm2,%xmm7
+
+ paddd %xmm6,%xmm8
+ paddd %xmm7,%xmm8
+ leaq 256(%rbp),%rbp
+ movdqa 144-128(%rax),%xmm6
+ paddd 16-128(%rax),%xmm5
+
+ movdqa %xmm6,%xmm7
+ movdqa %xmm6,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm6,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 96-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm3,%xmm1
+
+ psrld $17,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ psrld $19-17,%xmm3
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm3,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm5
+ movdqa %xmm12,%xmm7
+ movdqa %xmm12,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm12,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,128-128(%rax)
+ paddd %xmm15,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -128(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm12,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm12,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm14,%xmm0
+ pand %xmm13,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm8,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm8,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm9,%xmm3
+ movdqa %xmm8,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm8,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm9,%xmm15
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm15
+ paddd %xmm5,%xmm11
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm15
+ paddd %xmm7,%xmm15
+ movdqa 160-128(%rax),%xmm5
+ paddd 32-128(%rax),%xmm6
+
+ movdqa %xmm5,%xmm7
+ movdqa %xmm5,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm5,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 112-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm4,%xmm1
+
+ psrld $17,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ psrld $19-17,%xmm4
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm6
+ movdqa %xmm11,%xmm7
+ movdqa %xmm11,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm11,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm6,144-128(%rax)
+ paddd %xmm14,%xmm6
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -96(%rbp),%xmm6
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm11,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm11,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm13,%xmm0
+ pand %xmm12,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm15,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm15,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ movdqa %xmm8,%xmm4
+ movdqa %xmm15,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm15,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm6
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm8,%xmm14
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm14
+ paddd %xmm6,%xmm10
+ pxor %xmm2,%xmm7
+
+ paddd %xmm6,%xmm14
+ paddd %xmm7,%xmm14
+ movdqa 176-128(%rax),%xmm6
+ paddd 48-128(%rax),%xmm5
+
+ movdqa %xmm6,%xmm7
+ movdqa %xmm6,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm6,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 128-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm3,%xmm1
+
+ psrld $17,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ psrld $19-17,%xmm3
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm3,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm5
+ movdqa %xmm10,%xmm7
+ movdqa %xmm10,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm10,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,160-128(%rax)
+ paddd %xmm13,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -64(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm10,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm10,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm12,%xmm0
+ pand %xmm11,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm14,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm14,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm15,%xmm3
+ movdqa %xmm14,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm14,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm15,%xmm13
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm13
+ paddd %xmm5,%xmm9
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm13
+ paddd %xmm7,%xmm13
+ movdqa 192-128(%rax),%xmm5
+ paddd 64-128(%rax),%xmm6
+
+ movdqa %xmm5,%xmm7
+ movdqa %xmm5,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm5,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 144-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm4,%xmm1
+
+ psrld $17,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ psrld $19-17,%xmm4
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm6
+ movdqa %xmm9,%xmm7
+ movdqa %xmm9,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm9,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm6,176-128(%rax)
+ paddd %xmm12,%xmm6
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -32(%rbp),%xmm6
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm9,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm9,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm11,%xmm0
+ pand %xmm10,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm13,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm13,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ movdqa %xmm14,%xmm4
+ movdqa %xmm13,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm13,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm6
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm14,%xmm12
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm12
+ paddd %xmm6,%xmm8
+ pxor %xmm2,%xmm7
+
+ paddd %xmm6,%xmm12
+ paddd %xmm7,%xmm12
+ movdqa 208-128(%rax),%xmm6
+ paddd 80-128(%rax),%xmm5
+
+ movdqa %xmm6,%xmm7
+ movdqa %xmm6,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm6,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 160-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm3,%xmm1
+
+ psrld $17,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ psrld $19-17,%xmm3
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm3,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm5
+ movdqa %xmm8,%xmm7
+ movdqa %xmm8,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm8,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,192-128(%rax)
+ paddd %xmm11,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 0(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm8,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm8,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm10,%xmm0
+ pand %xmm9,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm12,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm12,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm13,%xmm3
+ movdqa %xmm12,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm12,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm13,%xmm11
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm11
+ paddd %xmm5,%xmm15
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm11
+ paddd %xmm7,%xmm11
+ movdqa 224-128(%rax),%xmm5
+ paddd 96-128(%rax),%xmm6
+
+ movdqa %xmm5,%xmm7
+ movdqa %xmm5,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm5,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 176-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm4,%xmm1
+
+ psrld $17,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ psrld $19-17,%xmm4
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm6
+ movdqa %xmm15,%xmm7
+ movdqa %xmm15,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm15,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm6,208-128(%rax)
+ paddd %xmm10,%xmm6
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 32(%rbp),%xmm6
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm15,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm15,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm9,%xmm0
+ pand %xmm8,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm11,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm11,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ movdqa %xmm12,%xmm4
+ movdqa %xmm11,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm11,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm6
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm12,%xmm10
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm10
+ paddd %xmm6,%xmm14
+ pxor %xmm2,%xmm7
+
+ paddd %xmm6,%xmm10
+ paddd %xmm7,%xmm10
+ movdqa 240-128(%rax),%xmm6
+ paddd 112-128(%rax),%xmm5
+
+ movdqa %xmm6,%xmm7
+ movdqa %xmm6,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm6,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 192-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm3,%xmm1
+
+ psrld $17,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ psrld $19-17,%xmm3
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm3,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm5
+ movdqa %xmm14,%xmm7
+ movdqa %xmm14,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm14,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,224-128(%rax)
+ paddd %xmm9,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 64(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm14,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm14,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm8,%xmm0
+ pand %xmm15,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm10,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm10,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm11,%xmm3
+ movdqa %xmm10,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm10,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm11,%xmm9
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm9
+ paddd %xmm5,%xmm13
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm9
+ paddd %xmm7,%xmm9
+ movdqa 0-128(%rax),%xmm5
+ paddd 128-128(%rax),%xmm6
+
+ movdqa %xmm5,%xmm7
+ movdqa %xmm5,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm5,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 208-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm4,%xmm1
+
+ psrld $17,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ psrld $19-17,%xmm4
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm6
+ movdqa %xmm13,%xmm7
+ movdqa %xmm13,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm13,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm6,240-128(%rax)
+ paddd %xmm8,%xmm6
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 96(%rbp),%xmm6
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm13,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm13,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm15,%xmm0
+ pand %xmm14,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm9,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm9,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ movdqa %xmm10,%xmm4
+ movdqa %xmm9,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm9,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm6
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm10,%xmm8
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm8
+ paddd %xmm6,%xmm12
+ pxor %xmm2,%xmm7
+
+ paddd %xmm6,%xmm8
+ paddd %xmm7,%xmm8
+ leaq 256(%rbp),%rbp
+ decl %ecx
+ jnz .Loop_16_xx
+
+ movl $1,%ecx
+ leaq K256+128(%rip),%rbp
+
+ movdqa (%rbx),%xmm7
+ cmpl 0(%rbx),%ecx
+ pxor %xmm0,%xmm0
+ cmovgeq %rbp,%r8
+ cmpl 4(%rbx),%ecx
+ movdqa %xmm7,%xmm6
+ cmovgeq %rbp,%r9
+ cmpl 8(%rbx),%ecx
+ pcmpgtd %xmm0,%xmm6
+ cmovgeq %rbp,%r10
+ cmpl 12(%rbx),%ecx
+ paddd %xmm6,%xmm7
+ cmovgeq %rbp,%r11
+
+ movdqu 0-128(%rdi),%xmm0
+ pand %xmm6,%xmm8
+ movdqu 32-128(%rdi),%xmm1
+ pand %xmm6,%xmm9
+ movdqu 64-128(%rdi),%xmm2
+ pand %xmm6,%xmm10
+ movdqu 96-128(%rdi),%xmm5
+ pand %xmm6,%xmm11
+ paddd %xmm0,%xmm8
+ movdqu 128-128(%rdi),%xmm0
+ pand %xmm6,%xmm12
+ paddd %xmm1,%xmm9
+ movdqu 160-128(%rdi),%xmm1
+ pand %xmm6,%xmm13
+ paddd %xmm2,%xmm10
+ movdqu 192-128(%rdi),%xmm2
+ pand %xmm6,%xmm14
+ paddd %xmm5,%xmm11
+ movdqu 224-128(%rdi),%xmm5
+ pand %xmm6,%xmm15
+ paddd %xmm0,%xmm12
+ paddd %xmm1,%xmm13
+ movdqu %xmm8,0-128(%rdi)
+ paddd %xmm2,%xmm14
+ movdqu %xmm9,32-128(%rdi)
+ paddd %xmm5,%xmm15
+ movdqu %xmm10,64-128(%rdi)
+ movdqu %xmm11,96-128(%rdi)
+ movdqu %xmm12,128-128(%rdi)
+ movdqu %xmm13,160-128(%rdi)
+ movdqu %xmm14,192-128(%rdi)
+ movdqu %xmm15,224-128(%rdi)
+
+ movdqa %xmm7,(%rbx)
+ movdqa .Lpbswap(%rip),%xmm6
+ decl %edx
+ jnz .Loop
+
+ movl 280(%rsp),%edx
+ leaq 16(%rdi),%rdi
+ leaq 64(%rsi),%rsi
+ decl %edx
+ jnz .Loop_grande
+
+.Ldone:
+ movq 272(%rsp),%rax
+ movq -16(%rax),%rbp
+ movq -8(%rax),%rbx
+ leaq (%rax),%rsp
+ .byte 0xf3,0xc3
+.size sha256_multi_block,.-sha256_multi_block
+.align 256
+K256:
+.long 1116352408,1116352408,1116352408,1116352408
+.long 1116352408,1116352408,1116352408,1116352408
+.long 1899447441,1899447441,1899447441,1899447441
+.long 1899447441,1899447441,1899447441,1899447441
+.long 3049323471,3049323471,3049323471,3049323471
+.long 3049323471,3049323471,3049323471,3049323471
+.long 3921009573,3921009573,3921009573,3921009573
+.long 3921009573,3921009573,3921009573,3921009573
+.long 961987163,961987163,961987163,961987163
+.long 961987163,961987163,961987163,961987163
+.long 1508970993,1508970993,1508970993,1508970993
+.long 1508970993,1508970993,1508970993,1508970993
+.long 2453635748,2453635748,2453635748,2453635748
+.long 2453635748,2453635748,2453635748,2453635748
+.long 2870763221,2870763221,2870763221,2870763221
+.long 2870763221,2870763221,2870763221,2870763221
+.long 3624381080,3624381080,3624381080,3624381080
+.long 3624381080,3624381080,3624381080,3624381080
+.long 310598401,310598401,310598401,310598401
+.long 310598401,310598401,310598401,310598401
+.long 607225278,607225278,607225278,607225278
+.long 607225278,607225278,607225278,607225278
+.long 1426881987,1426881987,1426881987,1426881987
+.long 1426881987,1426881987,1426881987,1426881987
+.long 1925078388,1925078388,1925078388,1925078388
+.long 1925078388,1925078388,1925078388,1925078388
+.long 2162078206,2162078206,2162078206,2162078206
+.long 2162078206,2162078206,2162078206,2162078206
+.long 2614888103,2614888103,2614888103,2614888103
+.long 2614888103,2614888103,2614888103,2614888103
+.long 3248222580,3248222580,3248222580,3248222580
+.long 3248222580,3248222580,3248222580,3248222580
+.long 3835390401,3835390401,3835390401,3835390401
+.long 3835390401,3835390401,3835390401,3835390401
+.long 4022224774,4022224774,4022224774,4022224774
+.long 4022224774,4022224774,4022224774,4022224774
+.long 264347078,264347078,264347078,264347078
+.long 264347078,264347078,264347078,264347078
+.long 604807628,604807628,604807628,604807628
+.long 604807628,604807628,604807628,604807628
+.long 770255983,770255983,770255983,770255983
+.long 770255983,770255983,770255983,770255983
+.long 1249150122,1249150122,1249150122,1249150122
+.long 1249150122,1249150122,1249150122,1249150122
+.long 1555081692,1555081692,1555081692,1555081692
+.long 1555081692,1555081692,1555081692,1555081692
+.long 1996064986,1996064986,1996064986,1996064986
+.long 1996064986,1996064986,1996064986,1996064986
+.long 2554220882,2554220882,2554220882,2554220882
+.long 2554220882,2554220882,2554220882,2554220882
+.long 2821834349,2821834349,2821834349,2821834349
+.long 2821834349,2821834349,2821834349,2821834349
+.long 2952996808,2952996808,2952996808,2952996808
+.long 2952996808,2952996808,2952996808,2952996808
+.long 3210313671,3210313671,3210313671,3210313671
+.long 3210313671,3210313671,3210313671,3210313671
+.long 3336571891,3336571891,3336571891,3336571891
+.long 3336571891,3336571891,3336571891,3336571891
+.long 3584528711,3584528711,3584528711,3584528711
+.long 3584528711,3584528711,3584528711,3584528711
+.long 113926993,113926993,113926993,113926993
+.long 113926993,113926993,113926993,113926993
+.long 338241895,338241895,338241895,338241895
+.long 338241895,338241895,338241895,338241895
+.long 666307205,666307205,666307205,666307205
+.long 666307205,666307205,666307205,666307205
+.long 773529912,773529912,773529912,773529912
+.long 773529912,773529912,773529912,773529912
+.long 1294757372,1294757372,1294757372,1294757372
+.long 1294757372,1294757372,1294757372,1294757372
+.long 1396182291,1396182291,1396182291,1396182291
+.long 1396182291,1396182291,1396182291,1396182291
+.long 1695183700,1695183700,1695183700,1695183700
+.long 1695183700,1695183700,1695183700,1695183700
+.long 1986661051,1986661051,1986661051,1986661051
+.long 1986661051,1986661051,1986661051,1986661051
+.long 2177026350,2177026350,2177026350,2177026350
+.long 2177026350,2177026350,2177026350,2177026350
+.long 2456956037,2456956037,2456956037,2456956037
+.long 2456956037,2456956037,2456956037,2456956037
+.long 2730485921,2730485921,2730485921,2730485921
+.long 2730485921,2730485921,2730485921,2730485921
+.long 2820302411,2820302411,2820302411,2820302411
+.long 2820302411,2820302411,2820302411,2820302411
+.long 3259730800,3259730800,3259730800,3259730800
+.long 3259730800,3259730800,3259730800,3259730800
+.long 3345764771,3345764771,3345764771,3345764771
+.long 3345764771,3345764771,3345764771,3345764771
+.long 3516065817,3516065817,3516065817,3516065817
+.long 3516065817,3516065817,3516065817,3516065817
+.long 3600352804,3600352804,3600352804,3600352804
+.long 3600352804,3600352804,3600352804,3600352804
+.long 4094571909,4094571909,4094571909,4094571909
+.long 4094571909,4094571909,4094571909,4094571909
+.long 275423344,275423344,275423344,275423344
+.long 275423344,275423344,275423344,275423344
+.long 430227734,430227734,430227734,430227734
+.long 430227734,430227734,430227734,430227734
+.long 506948616,506948616,506948616,506948616
+.long 506948616,506948616,506948616,506948616
+.long 659060556,659060556,659060556,659060556
+.long 659060556,659060556,659060556,659060556
+.long 883997877,883997877,883997877,883997877
+.long 883997877,883997877,883997877,883997877
+.long 958139571,958139571,958139571,958139571
+.long 958139571,958139571,958139571,958139571
+.long 1322822218,1322822218,1322822218,1322822218
+.long 1322822218,1322822218,1322822218,1322822218
+.long 1537002063,1537002063,1537002063,1537002063
+.long 1537002063,1537002063,1537002063,1537002063
+.long 1747873779,1747873779,1747873779,1747873779
+.long 1747873779,1747873779,1747873779,1747873779
+.long 1955562222,1955562222,1955562222,1955562222
+.long 1955562222,1955562222,1955562222,1955562222
+.long 2024104815,2024104815,2024104815,2024104815
+.long 2024104815,2024104815,2024104815,2024104815
+.long 2227730452,2227730452,2227730452,2227730452
+.long 2227730452,2227730452,2227730452,2227730452
+.long 2361852424,2361852424,2361852424,2361852424
+.long 2361852424,2361852424,2361852424,2361852424
+.long 2428436474,2428436474,2428436474,2428436474
+.long 2428436474,2428436474,2428436474,2428436474
+.long 2756734187,2756734187,2756734187,2756734187
+.long 2756734187,2756734187,2756734187,2756734187
+.long 3204031479,3204031479,3204031479,3204031479
+.long 3204031479,3204031479,3204031479,3204031479
+.long 3329325298,3329325298,3329325298,3329325298
+.long 3329325298,3329325298,3329325298,3329325298
+.Lpbswap:
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/elf/sha256-ssse3-x86.s b/lib/accelerated/x86/elf/sha256-ssse3-x86.s
new file mode 100644
index 0000000000..81470f510d
--- /dev/null
+++ b/lib/accelerated/x86/elf/sha256-ssse3-x86.s
@@ -0,0 +1,3403 @@
+# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain copyright notices,
+# this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+# * Neither the name of the Andy Polyakov nor the names of its
+# copyright holder and contributors may be used to endorse or
+# promote products derived from this software without specific
+# prior written permission.
+#
+# ALTERNATIVELY, provided that this notice is retained in full, this
+# product may be distributed under the terms of the GNU General Public
+# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+# those given above.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *** This file is auto-generated ***
+#
+.file "sha512-586.s"
+.text
+.globl sha256_block_data_order
+.type sha256_block_data_order,@function
+.align 16
+sha256_block_data_order:
+.L_sha256_block_data_order_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 20(%esp),%esi
+ movl 24(%esp),%edi
+ movl 28(%esp),%eax
+ movl %esp,%ebx
+ call .L000pic_point
+.L000pic_point:
+ popl %ebp
+ leal .L001K256-.L000pic_point(%ebp),%ebp
+ subl $16,%esp
+ andl $-64,%esp
+ shll $6,%eax
+ addl %edi,%eax
+ movl %esi,(%esp)
+ movl %edi,4(%esp)
+ movl %eax,8(%esp)
+ movl %ebx,12(%esp)
+ leal _gnutls_x86_cpuid_s-.L001K256(%ebp),%edx
+ movl (%edx),%ecx
+ movl 4(%edx),%ebx
+ testl $1048576,%ecx
+ jnz .L002loop
+ andl $1073741824,%ecx
+ andl $268435968,%ebx
+ orl %ebx,%ecx
+ andl $1342177280,%ecx
+ cmpl $1342177280,%ecx
+ je .L003loop_shrd
+ subl %edi,%eax
+ cmpl $256,%eax
+ jae .L004unrolled
+ jmp .L002loop
+.align 16
+.L002loop:
+ movl (%edi),%eax
+ movl 4(%edi),%ebx
+ movl 8(%edi),%ecx
+ bswap %eax
+ movl 12(%edi),%edx
+ bswap %ebx
+ pushl %eax
+ bswap %ecx
+ pushl %ebx
+ bswap %edx
+ pushl %ecx
+ pushl %edx
+ movl 16(%edi),%eax
+ movl 20(%edi),%ebx
+ movl 24(%edi),%ecx
+ bswap %eax
+ movl 28(%edi),%edx
+ bswap %ebx
+ pushl %eax
+ bswap %ecx
+ pushl %ebx
+ bswap %edx
+ pushl %ecx
+ pushl %edx
+ movl 32(%edi),%eax
+ movl 36(%edi),%ebx
+ movl 40(%edi),%ecx
+ bswap %eax
+ movl 44(%edi),%edx
+ bswap %ebx
+ pushl %eax
+ bswap %ecx
+ pushl %ebx
+ bswap %edx
+ pushl %ecx
+ pushl %edx
+ movl 48(%edi),%eax
+ movl 52(%edi),%ebx
+ movl 56(%edi),%ecx
+ bswap %eax
+ movl 60(%edi),%edx
+ bswap %ebx
+ pushl %eax
+ bswap %ecx
+ pushl %ebx
+ bswap %edx
+ pushl %ecx
+ pushl %edx
+ addl $64,%edi
+ leal -36(%esp),%esp
+ movl %edi,104(%esp)
+ movl (%esi),%eax
+ movl 4(%esi),%ebx
+ movl 8(%esi),%ecx
+ movl 12(%esi),%edi
+ movl %ebx,8(%esp)
+ xorl %ecx,%ebx
+ movl %ecx,12(%esp)
+ movl %edi,16(%esp)
+ movl %ebx,(%esp)
+ movl 16(%esi),%edx
+ movl 20(%esi),%ebx
+ movl 24(%esi),%ecx
+ movl 28(%esi),%edi
+ movl %ebx,24(%esp)
+ movl %ecx,28(%esp)
+ movl %edi,32(%esp)
+.align 16
+.L00500_15:
+ movl %edx,%ecx
+ movl 24(%esp),%esi
+ rorl $14,%ecx
+ movl 28(%esp),%edi
+ xorl %edx,%ecx
+ xorl %edi,%esi
+ movl 96(%esp),%ebx
+ rorl $5,%ecx
+ andl %edx,%esi
+ movl %edx,20(%esp)
+ xorl %ecx,%edx
+ addl 32(%esp),%ebx
+ xorl %edi,%esi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %esi,%ebx
+ rorl $9,%ecx
+ addl %edx,%ebx
+ movl 8(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,4(%esp)
+ leal -4(%esp),%esp
+ rorl $11,%ecx
+ movl (%ebp),%esi
+ xorl %eax,%ecx
+ movl 20(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %esi,%ebx
+ movl %eax,(%esp)
+ addl %ebx,%edx
+ andl 4(%esp),%eax
+ addl %ecx,%ebx
+ xorl %edi,%eax
+ addl $4,%ebp
+ addl %ebx,%eax
+ cmpl $3248222580,%esi
+ jne .L00500_15
+ movl 156(%esp),%ecx
+ jmp .L00616_63
+.align 16
+.L00616_63:
+ movl %ecx,%ebx
+ movl 104(%esp),%esi
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 160(%esp),%ebx
+ shrl $10,%edi
+ addl 124(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 24(%esp),%esi
+ rorl $14,%ecx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %edx,%ecx
+ xorl %edi,%esi
+ movl %ebx,96(%esp)
+ rorl $5,%ecx
+ andl %edx,%esi
+ movl %edx,20(%esp)
+ xorl %ecx,%edx
+ addl 32(%esp),%ebx
+ xorl %edi,%esi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %esi,%ebx
+ rorl $9,%ecx
+ addl %edx,%ebx
+ movl 8(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,4(%esp)
+ leal -4(%esp),%esp
+ rorl $11,%ecx
+ movl (%ebp),%esi
+ xorl %eax,%ecx
+ movl 20(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %esi,%ebx
+ movl %eax,(%esp)
+ addl %ebx,%edx
+ andl 4(%esp),%eax
+ addl %ecx,%ebx
+ xorl %edi,%eax
+ movl 156(%esp),%ecx
+ addl $4,%ebp
+ addl %ebx,%eax
+ cmpl $3329325298,%esi
+ jne .L00616_63
+ movl 356(%esp),%esi
+ movl 8(%esp),%ebx
+ movl 16(%esp),%ecx
+ addl (%esi),%eax
+ addl 4(%esi),%ebx
+ addl 8(%esi),%edi
+ addl 12(%esi),%ecx
+ movl %eax,(%esi)
+ movl %ebx,4(%esi)
+ movl %edi,8(%esi)
+ movl %ecx,12(%esi)
+ movl 24(%esp),%eax
+ movl 28(%esp),%ebx
+ movl 32(%esp),%ecx
+ movl 360(%esp),%edi
+ addl 16(%esi),%edx
+ addl 20(%esi),%eax
+ addl 24(%esi),%ebx
+ addl 28(%esi),%ecx
+ movl %edx,16(%esi)
+ movl %eax,20(%esi)
+ movl %ebx,24(%esi)
+ movl %ecx,28(%esi)
+ leal 356(%esp),%esp
+ subl $256,%ebp
+ cmpl 8(%esp),%edi
+ jb .L002loop
+ movl 12(%esp),%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.align 32
+.L003loop_shrd:
+ movl (%edi),%eax
+ movl 4(%edi),%ebx
+ movl 8(%edi),%ecx
+ bswap %eax
+ movl 12(%edi),%edx
+ bswap %ebx
+ pushl %eax
+ bswap %ecx
+ pushl %ebx
+ bswap %edx
+ pushl %ecx
+ pushl %edx
+ movl 16(%edi),%eax
+ movl 20(%edi),%ebx
+ movl 24(%edi),%ecx
+ bswap %eax
+ movl 28(%edi),%edx
+ bswap %ebx
+ pushl %eax
+ bswap %ecx
+ pushl %ebx
+ bswap %edx
+ pushl %ecx
+ pushl %edx
+ movl 32(%edi),%eax
+ movl 36(%edi),%ebx
+ movl 40(%edi),%ecx
+ bswap %eax
+ movl 44(%edi),%edx
+ bswap %ebx
+ pushl %eax
+ bswap %ecx
+ pushl %ebx
+ bswap %edx
+ pushl %ecx
+ pushl %edx
+ movl 48(%edi),%eax
+ movl 52(%edi),%ebx
+ movl 56(%edi),%ecx
+ bswap %eax
+ movl 60(%edi),%edx
+ bswap %ebx
+ pushl %eax
+ bswap %ecx
+ pushl %ebx
+ bswap %edx
+ pushl %ecx
+ pushl %edx
+ addl $64,%edi
+ leal -36(%esp),%esp
+ movl %edi,104(%esp)
+ movl (%esi),%eax
+ movl 4(%esi),%ebx
+ movl 8(%esi),%ecx
+ movl 12(%esi),%edi
+ movl %ebx,8(%esp)
+ xorl %ecx,%ebx
+ movl %ecx,12(%esp)
+ movl %edi,16(%esp)
+ movl %ebx,(%esp)
+ movl 16(%esi),%edx
+ movl 20(%esi),%ebx
+ movl 24(%esi),%ecx
+ movl 28(%esi),%edi
+ movl %ebx,24(%esp)
+ movl %ecx,28(%esp)
+ movl %edi,32(%esp)
+.align 16
+.L00700_15_shrd:
+ movl %edx,%ecx
+ movl 24(%esp),%esi
+ shrdl $14,%ecx,%ecx
+ movl 28(%esp),%edi
+ xorl %edx,%ecx
+ xorl %edi,%esi
+ movl 96(%esp),%ebx
+ shrdl $5,%ecx,%ecx
+ andl %edx,%esi
+ movl %edx,20(%esp)
+ xorl %ecx,%edx
+ addl 32(%esp),%ebx
+ xorl %edi,%esi
+ shrdl $6,%edx,%edx
+ movl %eax,%ecx
+ addl %esi,%ebx
+ shrdl $9,%ecx,%ecx
+ addl %edx,%ebx
+ movl 8(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,4(%esp)
+ leal -4(%esp),%esp
+ shrdl $11,%ecx,%ecx
+ movl (%ebp),%esi
+ xorl %eax,%ecx
+ movl 20(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ addl %esi,%ebx
+ movl %eax,(%esp)
+ addl %ebx,%edx
+ andl 4(%esp),%eax
+ addl %ecx,%ebx
+ xorl %edi,%eax
+ addl $4,%ebp
+ addl %ebx,%eax
+ cmpl $3248222580,%esi
+ jne .L00700_15_shrd
+ movl 156(%esp),%ecx
+ jmp .L00816_63_shrd
+.align 16
+.L00816_63_shrd:
+ movl %ecx,%ebx
+ movl 104(%esp),%esi
+ shrdl $11,%ecx,%ecx
+ movl %esi,%edi
+ shrdl $2,%esi,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ shrdl $7,%ecx,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ shrdl $17,%esi,%esi
+ addl 160(%esp),%ebx
+ shrl $10,%edi
+ addl 124(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 24(%esp),%esi
+ shrdl $14,%ecx,%ecx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %edx,%ecx
+ xorl %edi,%esi
+ movl %ebx,96(%esp)
+ shrdl $5,%ecx,%ecx
+ andl %edx,%esi
+ movl %edx,20(%esp)
+ xorl %ecx,%edx
+ addl 32(%esp),%ebx
+ xorl %edi,%esi
+ shrdl $6,%edx,%edx
+ movl %eax,%ecx
+ addl %esi,%ebx
+ shrdl $9,%ecx,%ecx
+ addl %edx,%ebx
+ movl 8(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,4(%esp)
+ leal -4(%esp),%esp
+ shrdl $11,%ecx,%ecx
+ movl (%ebp),%esi
+ xorl %eax,%ecx
+ movl 20(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ addl %esi,%ebx
+ movl %eax,(%esp)
+ addl %ebx,%edx
+ andl 4(%esp),%eax
+ addl %ecx,%ebx
+ xorl %edi,%eax
+ movl 156(%esp),%ecx
+ addl $4,%ebp
+ addl %ebx,%eax
+ cmpl $3329325298,%esi
+ jne .L00816_63_shrd
+ movl 356(%esp),%esi
+ movl 8(%esp),%ebx
+ movl 16(%esp),%ecx
+ addl (%esi),%eax
+ addl 4(%esi),%ebx
+ addl 8(%esi),%edi
+ addl 12(%esi),%ecx
+ movl %eax,(%esi)
+ movl %ebx,4(%esi)
+ movl %edi,8(%esi)
+ movl %ecx,12(%esi)
+ movl 24(%esp),%eax
+ movl 28(%esp),%ebx
+ movl 32(%esp),%ecx
+ movl 360(%esp),%edi
+ addl 16(%esi),%edx
+ addl 20(%esi),%eax
+ addl 24(%esi),%ebx
+ addl 28(%esi),%ecx
+ movl %edx,16(%esi)
+ movl %eax,20(%esi)
+ movl %ebx,24(%esi)
+ movl %ecx,28(%esi)
+ leal 356(%esp),%esp
+ subl $256,%ebp
+ cmpl 8(%esp),%edi
+ jb .L003loop_shrd
+ movl 12(%esp),%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.align 64
+.L001K256:
+.long 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298
+.long 66051,67438087,134810123,202182159
+.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97
+.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32
+.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
+.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
+.byte 62,0
+.align 16
+.L004unrolled:
+ leal -96(%esp),%esp
+ movl (%esi),%eax
+ movl 4(%esi),%ebp
+ movl 8(%esi),%ecx
+ movl 12(%esi),%ebx
+ movl %ebp,4(%esp)
+ xorl %ecx,%ebp
+ movl %ecx,8(%esp)
+ movl %ebx,12(%esp)
+ movl 16(%esi),%edx
+ movl 20(%esi),%ebx
+ movl 24(%esi),%ecx
+ movl 28(%esi),%esi
+ movl %ebx,20(%esp)
+ movl %ecx,24(%esp)
+ movl %esi,28(%esp)
+ jmp .L009grand_loop
+.align 16
+.L009grand_loop:
+ movl (%edi),%ebx
+ movl 4(%edi),%ecx
+ bswap %ebx
+ movl 8(%edi),%esi
+ bswap %ecx
+ movl %ebx,32(%esp)
+ bswap %esi
+ movl %ecx,36(%esp)
+ movl %esi,40(%esp)
+ movl 12(%edi),%ebx
+ movl 16(%edi),%ecx
+ bswap %ebx
+ movl 20(%edi),%esi
+ bswap %ecx
+ movl %ebx,44(%esp)
+ bswap %esi
+ movl %ecx,48(%esp)
+ movl %esi,52(%esp)
+ movl 24(%edi),%ebx
+ movl 28(%edi),%ecx
+ bswap %ebx
+ movl 32(%edi),%esi
+ bswap %ecx
+ movl %ebx,56(%esp)
+ bswap %esi
+ movl %ecx,60(%esp)
+ movl %esi,64(%esp)
+ movl 36(%edi),%ebx
+ movl 40(%edi),%ecx
+ bswap %ebx
+ movl 44(%edi),%esi
+ bswap %ecx
+ movl %ebx,68(%esp)
+ bswap %esi
+ movl %ecx,72(%esp)
+ movl %esi,76(%esp)
+ movl 48(%edi),%ebx
+ movl 52(%edi),%ecx
+ bswap %ebx
+ movl 56(%edi),%esi
+ bswap %ecx
+ movl %ebx,80(%esp)
+ bswap %esi
+ movl %ecx,84(%esp)
+ movl %esi,88(%esp)
+ movl 60(%edi),%ebx
+ addl $64,%edi
+ bswap %ebx
+ movl %edi,100(%esp)
+ movl %ebx,92(%esp)
+ movl %edx,%ecx
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl 32(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1116352408(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl 36(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1899447441(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl 40(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3049323471(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl 44(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3921009573(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl 48(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 961987163(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl 52(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1508970993(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl 56(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2453635748(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl 60(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2870763221(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl 64(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3624381080(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl 68(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 310598401(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl 72(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 607225278(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl 76(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1426881987(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl 80(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1925078388(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl 84(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2162078206(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl 88(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2614888103(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl 92(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3248222580(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 36(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 88(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 32(%esp),%ebx
+ shrl $10,%edi
+ addl 68(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,32(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3835390401(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 40(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 92(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 36(%esp),%ebx
+ shrl $10,%edi
+ addl 72(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,36(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 4022224774(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 44(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 32(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 40(%esp),%ebx
+ shrl $10,%edi
+ addl 76(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,40(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 264347078(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 48(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 36(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 44(%esp),%ebx
+ shrl $10,%edi
+ addl 80(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,44(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 604807628(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 52(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl 40(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 48(%esp),%ebx
+ shrl $10,%edi
+ addl 84(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,48(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 770255983(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 56(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 44(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 52(%esp),%ebx
+ shrl $10,%edi
+ addl 88(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,52(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1249150122(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 60(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 48(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 56(%esp),%ebx
+ shrl $10,%edi
+ addl 92(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,56(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1555081692(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 64(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 52(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 60(%esp),%ebx
+ shrl $10,%edi
+ addl 32(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,60(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1996064986(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 68(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 56(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 64(%esp),%ebx
+ shrl $10,%edi
+ addl 36(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,64(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2554220882(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 72(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 60(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 68(%esp),%ebx
+ shrl $10,%edi
+ addl 40(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,68(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2821834349(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 76(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 64(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 72(%esp),%ebx
+ shrl $10,%edi
+ addl 44(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,72(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2952996808(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 80(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 68(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 76(%esp),%ebx
+ shrl $10,%edi
+ addl 48(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,76(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3210313671(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 84(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl 72(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 80(%esp),%ebx
+ shrl $10,%edi
+ addl 52(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,80(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3336571891(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 88(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 76(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 84(%esp),%ebx
+ shrl $10,%edi
+ addl 56(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,84(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3584528711(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 92(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 80(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 88(%esp),%ebx
+ shrl $10,%edi
+ addl 60(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,88(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 113926993(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 32(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 84(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 92(%esp),%ebx
+ shrl $10,%edi
+ addl 64(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,92(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 338241895(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 36(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 88(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 32(%esp),%ebx
+ shrl $10,%edi
+ addl 68(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,32(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 666307205(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 40(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 92(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 36(%esp),%ebx
+ shrl $10,%edi
+ addl 72(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,36(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 773529912(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 44(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 32(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 40(%esp),%ebx
+ shrl $10,%edi
+ addl 76(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,40(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1294757372(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 48(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 36(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 44(%esp),%ebx
+ shrl $10,%edi
+ addl 80(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,44(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1396182291(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 52(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl 40(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 48(%esp),%ebx
+ shrl $10,%edi
+ addl 84(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,48(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1695183700(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 56(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 44(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 52(%esp),%ebx
+ shrl $10,%edi
+ addl 88(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,52(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1986661051(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 60(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 48(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 56(%esp),%ebx
+ shrl $10,%edi
+ addl 92(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,56(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2177026350(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 64(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 52(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 60(%esp),%ebx
+ shrl $10,%edi
+ addl 32(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,60(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2456956037(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 68(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 56(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 64(%esp),%ebx
+ shrl $10,%edi
+ addl 36(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,64(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2730485921(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 72(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 60(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 68(%esp),%ebx
+ shrl $10,%edi
+ addl 40(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,68(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2820302411(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 76(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 64(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 72(%esp),%ebx
+ shrl $10,%edi
+ addl 44(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,72(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3259730800(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 80(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 68(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 76(%esp),%ebx
+ shrl $10,%edi
+ addl 48(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,76(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3345764771(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 84(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl 72(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 80(%esp),%ebx
+ shrl $10,%edi
+ addl 52(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,80(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3516065817(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 88(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 76(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 84(%esp),%ebx
+ shrl $10,%edi
+ addl 56(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,84(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3600352804(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 92(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 80(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 88(%esp),%ebx
+ shrl $10,%edi
+ addl 60(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,88(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 4094571909(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 32(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 84(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 92(%esp),%ebx
+ shrl $10,%edi
+ addl 64(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,92(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 275423344(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 36(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 88(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 32(%esp),%ebx
+ shrl $10,%edi
+ addl 68(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,32(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 430227734(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 40(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 92(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 36(%esp),%ebx
+ shrl $10,%edi
+ addl 72(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,36(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 506948616(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 44(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 32(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 40(%esp),%ebx
+ shrl $10,%edi
+ addl 76(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,40(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 659060556(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 48(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 36(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 44(%esp),%ebx
+ shrl $10,%edi
+ addl 80(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,44(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 883997877(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 52(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl 40(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 48(%esp),%ebx
+ shrl $10,%edi
+ addl 84(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,48(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 958139571(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 56(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 44(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 52(%esp),%ebx
+ shrl $10,%edi
+ addl 88(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,52(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1322822218(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 60(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 48(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 56(%esp),%ebx
+ shrl $10,%edi
+ addl 92(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,56(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1537002063(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 64(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 52(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 60(%esp),%ebx
+ shrl $10,%edi
+ addl 32(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,60(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1747873779(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 68(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 56(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 64(%esp),%ebx
+ shrl $10,%edi
+ addl 36(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,64(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1955562222(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 72(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 60(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 68(%esp),%ebx
+ shrl $10,%edi
+ addl 40(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,68(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2024104815(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 76(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 64(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 72(%esp),%ebx
+ shrl $10,%edi
+ addl 44(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,72(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2227730452(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 80(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 68(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 76(%esp),%ebx
+ shrl $10,%edi
+ addl 48(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,76(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2361852424(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 84(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl 72(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 80(%esp),%ebx
+ shrl $10,%edi
+ addl 52(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,80(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2428436474(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 88(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 76(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 84(%esp),%ebx
+ shrl $10,%edi
+ addl 56(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,84(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2756734187(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 92(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 80(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 88(%esp),%ebx
+ shrl $10,%edi
+ addl 60(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3204031479(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 32(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 84(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 92(%esp),%ebx
+ shrl $10,%edi
+ addl 64(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3329325298(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 96(%esp),%esi
+ xorl %edi,%ebp
+ movl 12(%esp),%ecx
+ addl (%esi),%eax
+ addl 4(%esi),%ebp
+ addl 8(%esi),%edi
+ addl 12(%esi),%ecx
+ movl %eax,(%esi)
+ movl %ebp,4(%esi)
+ movl %edi,8(%esi)
+ movl %ecx,12(%esi)
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ movl %edi,8(%esp)
+ movl %ecx,12(%esp)
+ movl 20(%esp),%edi
+ movl 24(%esp),%ebx
+ movl 28(%esp),%ecx
+ addl 16(%esi),%edx
+ addl 20(%esi),%edi
+ addl 24(%esi),%ebx
+ addl 28(%esi),%ecx
+ movl %edx,16(%esi)
+ movl %edi,20(%esi)
+ movl %ebx,24(%esi)
+ movl %ecx,28(%esi)
+ movl %edi,20(%esp)
+ movl 100(%esp),%edi
+ movl %ebx,24(%esp)
+ movl %ecx,28(%esp)
+ cmpl 104(%esp),%edi
+ jb .L009grand_loop
+ movl 108(%esp),%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.size sha256_block_data_order,.-.L_sha256_block_data_order_begin
+.comm _gnutls_x86_cpuid_s,16,4
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/elf/sha512-ssse3-x86.s b/lib/accelerated/x86/elf/sha512-ssse3-x86.s
new file mode 100644
index 0000000000..088a0fa54a
--- /dev/null
+++ b/lib/accelerated/x86/elf/sha512-ssse3-x86.s
@@ -0,0 +1,606 @@
+# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain copyright notices,
+# this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+# * Neither the name of the Andy Polyakov nor the names of its
+# copyright holder and contributors may be used to endorse or
+# promote products derived from this software without specific
+# prior written permission.
+#
+# ALTERNATIVELY, provided that this notice is retained in full, this
+# product may be distributed under the terms of the GNU General Public
+# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+# those given above.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *** This file is auto-generated ***
+#
+.file "sha512-586.s"
+.text
+.globl sha512_block_data_order
+.type sha512_block_data_order,@function
+.align 16
+sha512_block_data_order:
+.L_sha512_block_data_order_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 20(%esp),%esi
+ movl 24(%esp),%edi
+ movl 28(%esp),%eax
+ movl %esp,%ebx
+ call .L000pic_point
+.L000pic_point:
+ popl %ebp
+ leal .L001K512-.L000pic_point(%ebp),%ebp
+ subl $16,%esp
+ andl $-64,%esp
+ shll $7,%eax
+ addl %edi,%eax
+ movl %esi,(%esp)
+ movl %edi,4(%esp)
+ movl %eax,8(%esp)
+ movl %ebx,12(%esp)
+.align 16
+.L002loop_x86:
+ movl (%edi),%eax
+ movl 4(%edi),%ebx
+ movl 8(%edi),%ecx
+ movl 12(%edi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+ movl 16(%edi),%eax
+ movl 20(%edi),%ebx
+ movl 24(%edi),%ecx
+ movl 28(%edi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+ movl 32(%edi),%eax
+ movl 36(%edi),%ebx
+ movl 40(%edi),%ecx
+ movl 44(%edi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+ movl 48(%edi),%eax
+ movl 52(%edi),%ebx
+ movl 56(%edi),%ecx
+ movl 60(%edi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+ movl 64(%edi),%eax
+ movl 68(%edi),%ebx
+ movl 72(%edi),%ecx
+ movl 76(%edi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+ movl 80(%edi),%eax
+ movl 84(%edi),%ebx
+ movl 88(%edi),%ecx
+ movl 92(%edi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+ movl 96(%edi),%eax
+ movl 100(%edi),%ebx
+ movl 104(%edi),%ecx
+ movl 108(%edi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+ movl 112(%edi),%eax
+ movl 116(%edi),%ebx
+ movl 120(%edi),%ecx
+ movl 124(%edi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+ addl $128,%edi
+ subl $72,%esp
+ movl %edi,204(%esp)
+ leal 8(%esp),%edi
+ movl $16,%ecx
+.long 2784229001
+.align 16
+.L00300_15_x86:
+ movl 40(%esp),%ecx
+ movl 44(%esp),%edx
+ movl %ecx,%esi
+ shrl $9,%ecx
+ movl %edx,%edi
+ shrl $9,%edx
+ movl %ecx,%ebx
+ shll $14,%esi
+ movl %edx,%eax
+ shll $14,%edi
+ xorl %esi,%ebx
+ shrl $5,%ecx
+ xorl %edi,%eax
+ shrl $5,%edx
+ xorl %ecx,%eax
+ shll $4,%esi
+ xorl %edx,%ebx
+ shll $4,%edi
+ xorl %esi,%ebx
+ shrl $4,%ecx
+ xorl %edi,%eax
+ shrl $4,%edx
+ xorl %ecx,%eax
+ shll $5,%esi
+ xorl %edx,%ebx
+ shll $5,%edi
+ xorl %esi,%eax
+ xorl %edi,%ebx
+ movl 48(%esp),%ecx
+ movl 52(%esp),%edx
+ movl 56(%esp),%esi
+ movl 60(%esp),%edi
+ addl 64(%esp),%eax
+ adcl 68(%esp),%ebx
+ xorl %esi,%ecx
+ xorl %edi,%edx
+ andl 40(%esp),%ecx
+ andl 44(%esp),%edx
+ addl 192(%esp),%eax
+ adcl 196(%esp),%ebx
+ xorl %esi,%ecx
+ xorl %edi,%edx
+ movl (%ebp),%esi
+ movl 4(%ebp),%edi
+ addl %ecx,%eax
+ adcl %edx,%ebx
+ movl 32(%esp),%ecx
+ movl 36(%esp),%edx
+ addl %esi,%eax
+ adcl %edi,%ebx
+ movl %eax,(%esp)
+ movl %ebx,4(%esp)
+ addl %ecx,%eax
+ adcl %edx,%ebx
+ movl 8(%esp),%ecx
+ movl 12(%esp),%edx
+ movl %eax,32(%esp)
+ movl %ebx,36(%esp)
+ movl %ecx,%esi
+ shrl $2,%ecx
+ movl %edx,%edi
+ shrl $2,%edx
+ movl %ecx,%ebx
+ shll $4,%esi
+ movl %edx,%eax
+ shll $4,%edi
+ xorl %esi,%ebx
+ shrl $5,%ecx
+ xorl %edi,%eax
+ shrl $5,%edx
+ xorl %ecx,%ebx
+ shll $21,%esi
+ xorl %edx,%eax
+ shll $21,%edi
+ xorl %esi,%eax
+ shrl $21,%ecx
+ xorl %edi,%ebx
+ shrl $21,%edx
+ xorl %ecx,%eax
+ shll $5,%esi
+ xorl %edx,%ebx
+ shll $5,%edi
+ xorl %esi,%eax
+ xorl %edi,%ebx
+ movl 8(%esp),%ecx
+ movl 12(%esp),%edx
+ movl 16(%esp),%esi
+ movl 20(%esp),%edi
+ addl (%esp),%eax
+ adcl 4(%esp),%ebx
+ orl %esi,%ecx
+ orl %edi,%edx
+ andl 24(%esp),%ecx
+ andl 28(%esp),%edx
+ andl 8(%esp),%esi
+ andl 12(%esp),%edi
+ orl %esi,%ecx
+ orl %edi,%edx
+ addl %ecx,%eax
+ adcl %edx,%ebx
+ movl %eax,(%esp)
+ movl %ebx,4(%esp)
+ movb (%ebp),%dl
+ subl $8,%esp
+ leal 8(%ebp),%ebp
+ cmpb $148,%dl
+ jne .L00300_15_x86
+.align 16
+.L00416_79_x86:
+ movl 312(%esp),%ecx
+ movl 316(%esp),%edx
+ movl %ecx,%esi
+ shrl $1,%ecx
+ movl %edx,%edi
+ shrl $1,%edx
+ movl %ecx,%eax
+ shll $24,%esi
+ movl %edx,%ebx
+ shll $24,%edi
+ xorl %esi,%ebx
+ shrl $6,%ecx
+ xorl %edi,%eax
+ shrl $6,%edx
+ xorl %ecx,%eax
+ shll $7,%esi
+ xorl %edx,%ebx
+ shll $1,%edi
+ xorl %esi,%ebx
+ shrl $1,%ecx
+ xorl %edi,%eax
+ shrl $1,%edx
+ xorl %ecx,%eax
+ shll $6,%edi
+ xorl %edx,%ebx
+ xorl %edi,%eax
+ movl %eax,(%esp)
+ movl %ebx,4(%esp)
+ movl 208(%esp),%ecx
+ movl 212(%esp),%edx
+ movl %ecx,%esi
+ shrl $6,%ecx
+ movl %edx,%edi
+ shrl $6,%edx
+ movl %ecx,%eax
+ shll $3,%esi
+ movl %edx,%ebx
+ shll $3,%edi
+ xorl %esi,%eax
+ shrl $13,%ecx
+ xorl %edi,%ebx
+ shrl $13,%edx
+ xorl %ecx,%eax
+ shll $10,%esi
+ xorl %edx,%ebx
+ shll $10,%edi
+ xorl %esi,%ebx
+ shrl $10,%ecx
+ xorl %edi,%eax
+ shrl $10,%edx
+ xorl %ecx,%ebx
+ shll $13,%edi
+ xorl %edx,%eax
+ xorl %edi,%eax
+ movl 320(%esp),%ecx
+ movl 324(%esp),%edx
+ addl (%esp),%eax
+ adcl 4(%esp),%ebx
+ movl 248(%esp),%esi
+ movl 252(%esp),%edi
+ addl %ecx,%eax
+ adcl %edx,%ebx
+ addl %esi,%eax
+ adcl %edi,%ebx
+ movl %eax,192(%esp)
+ movl %ebx,196(%esp)
+ movl 40(%esp),%ecx
+ movl 44(%esp),%edx
+ movl %ecx,%esi
+ shrl $9,%ecx
+ movl %edx,%edi
+ shrl $9,%edx
+ movl %ecx,%ebx
+ shll $14,%esi
+ movl %edx,%eax
+ shll $14,%edi
+ xorl %esi,%ebx
+ shrl $5,%ecx
+ xorl %edi,%eax
+ shrl $5,%edx
+ xorl %ecx,%eax
+ shll $4,%esi
+ xorl %edx,%ebx
+ shll $4,%edi
+ xorl %esi,%ebx
+ shrl $4,%ecx
+ xorl %edi,%eax
+ shrl $4,%edx
+ xorl %ecx,%eax
+ shll $5,%esi
+ xorl %edx,%ebx
+ shll $5,%edi
+ xorl %esi,%eax
+ xorl %edi,%ebx
+ movl 48(%esp),%ecx
+ movl 52(%esp),%edx
+ movl 56(%esp),%esi
+ movl 60(%esp),%edi
+ addl 64(%esp),%eax
+ adcl 68(%esp),%ebx
+ xorl %esi,%ecx
+ xorl %edi,%edx
+ andl 40(%esp),%ecx
+ andl 44(%esp),%edx
+ addl 192(%esp),%eax
+ adcl 196(%esp),%ebx
+ xorl %esi,%ecx
+ xorl %edi,%edx
+ movl (%ebp),%esi
+ movl 4(%ebp),%edi
+ addl %ecx,%eax
+ adcl %edx,%ebx
+ movl 32(%esp),%ecx
+ movl 36(%esp),%edx
+ addl %esi,%eax
+ adcl %edi,%ebx
+ movl %eax,(%esp)
+ movl %ebx,4(%esp)
+ addl %ecx,%eax
+ adcl %edx,%ebx
+ movl 8(%esp),%ecx
+ movl 12(%esp),%edx
+ movl %eax,32(%esp)
+ movl %ebx,36(%esp)
+ movl %ecx,%esi
+ shrl $2,%ecx
+ movl %edx,%edi
+ shrl $2,%edx
+ movl %ecx,%ebx
+ shll $4,%esi
+ movl %edx,%eax
+ shll $4,%edi
+ xorl %esi,%ebx
+ shrl $5,%ecx
+ xorl %edi,%eax
+ shrl $5,%edx
+ xorl %ecx,%ebx
+ shll $21,%esi
+ xorl %edx,%eax
+ shll $21,%edi
+ xorl %esi,%eax
+ shrl $21,%ecx
+ xorl %edi,%ebx
+ shrl $21,%edx
+ xorl %ecx,%eax
+ shll $5,%esi
+ xorl %edx,%ebx
+ shll $5,%edi
+ xorl %esi,%eax
+ xorl %edi,%ebx
+ movl 8(%esp),%ecx
+ movl 12(%esp),%edx
+ movl 16(%esp),%esi
+ movl 20(%esp),%edi
+ addl (%esp),%eax
+ adcl 4(%esp),%ebx
+ orl %esi,%ecx
+ orl %edi,%edx
+ andl 24(%esp),%ecx
+ andl 28(%esp),%edx
+ andl 8(%esp),%esi
+ andl 12(%esp),%edi
+ orl %esi,%ecx
+ orl %edi,%edx
+ addl %ecx,%eax
+ adcl %edx,%ebx
+ movl %eax,(%esp)
+ movl %ebx,4(%esp)
+ movb (%ebp),%dl
+ subl $8,%esp
+ leal 8(%ebp),%ebp
+ cmpb $23,%dl
+ jne .L00416_79_x86
+ movl 840(%esp),%esi
+ movl 844(%esp),%edi
+ movl (%esi),%eax
+ movl 4(%esi),%ebx
+ movl 8(%esi),%ecx
+ movl 12(%esi),%edx
+ addl 8(%esp),%eax
+ adcl 12(%esp),%ebx
+ movl %eax,(%esi)
+ movl %ebx,4(%esi)
+ addl 16(%esp),%ecx
+ adcl 20(%esp),%edx
+ movl %ecx,8(%esi)
+ movl %edx,12(%esi)
+ movl 16(%esi),%eax
+ movl 20(%esi),%ebx
+ movl 24(%esi),%ecx
+ movl 28(%esi),%edx
+ addl 24(%esp),%eax
+ adcl 28(%esp),%ebx
+ movl %eax,16(%esi)
+ movl %ebx,20(%esi)
+ addl 32(%esp),%ecx
+ adcl 36(%esp),%edx
+ movl %ecx,24(%esi)
+ movl %edx,28(%esi)
+ movl 32(%esi),%eax
+ movl 36(%esi),%ebx
+ movl 40(%esi),%ecx
+ movl 44(%esi),%edx
+ addl 40(%esp),%eax
+ adcl 44(%esp),%ebx
+ movl %eax,32(%esi)
+ movl %ebx,36(%esi)
+ addl 48(%esp),%ecx
+ adcl 52(%esp),%edx
+ movl %ecx,40(%esi)
+ movl %edx,44(%esi)
+ movl 48(%esi),%eax
+ movl 52(%esi),%ebx
+ movl 56(%esi),%ecx
+ movl 60(%esi),%edx
+ addl 56(%esp),%eax
+ adcl 60(%esp),%ebx
+ movl %eax,48(%esi)
+ movl %ebx,52(%esi)
+ addl 64(%esp),%ecx
+ adcl 68(%esp),%edx
+ movl %ecx,56(%esi)
+ movl %edx,60(%esi)
+ addl $840,%esp
+ subl $640,%ebp
+ cmpl 8(%esp),%edi
+ jb .L002loop_x86
+ movl 12(%esp),%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.align 64
+.L001K512:
+.long 3609767458,1116352408
+.long 602891725,1899447441
+.long 3964484399,3049323471
+.long 2173295548,3921009573
+.long 4081628472,961987163
+.long 3053834265,1508970993
+.long 2937671579,2453635748
+.long 3664609560,2870763221
+.long 2734883394,3624381080
+.long 1164996542,310598401
+.long 1323610764,607225278
+.long 3590304994,1426881987
+.long 4068182383,1925078388
+.long 991336113,2162078206
+.long 633803317,2614888103
+.long 3479774868,3248222580
+.long 2666613458,3835390401
+.long 944711139,4022224774
+.long 2341262773,264347078
+.long 2007800933,604807628
+.long 1495990901,770255983
+.long 1856431235,1249150122
+.long 3175218132,1555081692
+.long 2198950837,1996064986
+.long 3999719339,2554220882
+.long 766784016,2821834349
+.long 2566594879,2952996808
+.long 3203337956,3210313671
+.long 1034457026,3336571891
+.long 2466948901,3584528711
+.long 3758326383,113926993
+.long 168717936,338241895
+.long 1188179964,666307205
+.long 1546045734,773529912
+.long 1522805485,1294757372
+.long 2643833823,1396182291
+.long 2343527390,1695183700
+.long 1014477480,1986661051
+.long 1206759142,2177026350
+.long 344077627,2456956037
+.long 1290863460,2730485921
+.long 3158454273,2820302411
+.long 3505952657,3259730800
+.long 106217008,3345764771
+.long 3606008344,3516065817
+.long 1432725776,3600352804
+.long 1467031594,4094571909
+.long 851169720,275423344
+.long 3100823752,430227734
+.long 1363258195,506948616
+.long 3750685593,659060556
+.long 3785050280,883997877
+.long 3318307427,958139571
+.long 3812723403,1322822218
+.long 2003034995,1537002063
+.long 3602036899,1747873779
+.long 1575990012,1955562222
+.long 1125592928,2024104815
+.long 2716904306,2227730452
+.long 442776044,2361852424
+.long 593698344,2428436474
+.long 3733110249,2756734187
+.long 2999351573,3204031479
+.long 3815920427,3329325298
+.long 3928383900,3391569614
+.long 566280711,3515267271
+.long 3454069534,3940187606
+.long 4000239992,4118630271
+.long 1914138554,116418474
+.long 2731055270,174292421
+.long 3203993006,289380356
+.long 320620315,460393269
+.long 587496836,685471733
+.long 1086792851,852142971
+.long 365543100,1017036298
+.long 2618297676,1126000580
+.long 3409855158,1288033470
+.long 4234509866,1501505948
+.long 987167468,1607167915
+.long 1246189591,1816402316
+.long 67438087,66051
+.long 202182159,134810123
+.size sha512_block_data_order,.-.L_sha512_block_data_order_begin
+.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97
+.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32
+.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
+.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
+.byte 62,0
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/elf/sha512-ssse3-x86_64.s b/lib/accelerated/x86/elf/sha512-ssse3-x86_64.s
new file mode 100644
index 0000000000..ea1915ded0
--- /dev/null
+++ b/lib/accelerated/x86/elf/sha512-ssse3-x86_64.s
@@ -0,0 +1,2881 @@
+# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain copyright notices,
+# this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+# * Neither the name of the Andy Polyakov nor the names of its
+# copyright holder and contributors may be used to endorse or
+# promote products derived from this software without specific
+# prior written permission.
+#
+# ALTERNATIVELY, provided that this notice is retained in full, this
+# product may be distributed under the terms of the GNU General Public
+# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+# those given above.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *** This file is auto-generated ***
+#
+.text
+
+
+.globl sha256_block_data_order
+.type sha256_block_data_order,@function
+.align 16
+sha256_block_data_order:
+ leaq _gnutls_x86_cpuid_s(%rip),%r11
+ movl 0(%r11),%r9d
+ movl 4(%r11),%r10d
+ movl 8(%r11),%r11d
+ testl $512,%r10d
+ jnz .Lssse3_shortcut
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ movq %rsp,%r11
+ shlq $4,%rdx
+ subq $64+32,%rsp
+ leaq (%rsi,%rdx,4),%rdx
+ andq $-64,%rsp
+ movq %rdi,64+0(%rsp)
+ movq %rsi,64+8(%rsp)
+ movq %rdx,64+16(%rsp)
+ movq %r11,64+24(%rsp)
+.Lprologue:
+
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+ movl 16(%rdi),%r8d
+ movl 20(%rdi),%r9d
+ movl 24(%rdi),%r10d
+ movl 28(%rdi),%r11d
+ jmp .Lloop
+
+.align 16
+.Lloop:
+ movl %ebx,%edi
+ leaq K256(%rip),%rbp
+ xorl %ecx,%edi
+ movl 0(%rsi),%r12d
+ movl %r8d,%r13d
+ movl %eax,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+
+ xorl %r8d,%r13d
+ rorl $9,%r14d
+ xorl %r10d,%r15d
+
+ movl %r12d,0(%rsp)
+ xorl %eax,%r14d
+ andl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %r10d,%r15d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ addl %r15d,%r12d
+
+ movl %eax,%r15d
+ addl (%rbp),%r12d
+ xorl %eax,%r14d
+
+ xorl %ebx,%r15d
+ rorl $6,%r13d
+ movl %ebx,%r11d
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%r11d
+ addl %r12d,%edx
+ addl %r12d,%r11d
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%r11d
+ movl 4(%rsi),%r12d
+ movl %edx,%r13d
+ movl %r11d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r8d,%edi
+
+ xorl %edx,%r13d
+ rorl $9,%r14d
+ xorl %r9d,%edi
+
+ movl %r12d,4(%rsp)
+ xorl %r11d,%r14d
+ andl %edx,%edi
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r9d,%edi
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ addl %edi,%r12d
+
+ movl %r11d,%edi
+ addl (%rbp),%r12d
+ xorl %r11d,%r14d
+
+ xorl %eax,%edi
+ rorl $6,%r13d
+ movl %eax,%r10d
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%r10d
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%r10d
+ movl 8(%rsi),%r12d
+ movl %ecx,%r13d
+ movl %r10d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %edx,%r15d
+
+ xorl %ecx,%r13d
+ rorl $9,%r14d
+ xorl %r8d,%r15d
+
+ movl %r12d,8(%rsp)
+ xorl %r10d,%r14d
+ andl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r8d,%r15d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ addl %r15d,%r12d
+
+ movl %r10d,%r15d
+ addl (%rbp),%r12d
+ xorl %r10d,%r14d
+
+ xorl %r11d,%r15d
+ rorl $6,%r13d
+ movl %r11d,%r9d
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%r9d
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%r9d
+ movl 12(%rsi),%r12d
+ movl %ebx,%r13d
+ movl %r9d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ecx,%edi
+
+ xorl %ebx,%r13d
+ rorl $9,%r14d
+ xorl %edx,%edi
+
+ movl %r12d,12(%rsp)
+ xorl %r9d,%r14d
+ andl %ebx,%edi
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %edx,%edi
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ addl %edi,%r12d
+
+ movl %r9d,%edi
+ addl (%rbp),%r12d
+ xorl %r9d,%r14d
+
+ xorl %r10d,%edi
+ rorl $6,%r13d
+ movl %r10d,%r8d
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%r8d
+ addl %r12d,%eax
+ addl %r12d,%r8d
+
+ leaq 20(%rbp),%rbp
+ addl %r14d,%r8d
+ movl 16(%rsi),%r12d
+ movl %eax,%r13d
+ movl %r8d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+
+ xorl %eax,%r13d
+ rorl $9,%r14d
+ xorl %ecx,%r15d
+
+ movl %r12d,16(%rsp)
+ xorl %r8d,%r14d
+ andl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %ecx,%r15d
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ addl %r15d,%r12d
+
+ movl %r8d,%r15d
+ addl (%rbp),%r12d
+ xorl %r8d,%r14d
+
+ xorl %r9d,%r15d
+ rorl $6,%r13d
+ movl %r9d,%edx
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%edx
+ addl %r12d,%r11d
+ addl %r12d,%edx
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%edx
+ movl 20(%rsi),%r12d
+ movl %r11d,%r13d
+ movl %edx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %eax,%edi
+
+ xorl %r11d,%r13d
+ rorl $9,%r14d
+ xorl %ebx,%edi
+
+ movl %r12d,20(%rsp)
+ xorl %edx,%r14d
+ andl %r11d,%edi
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %ebx,%edi
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ addl %edi,%r12d
+
+ movl %edx,%edi
+ addl (%rbp),%r12d
+ xorl %edx,%r14d
+
+ xorl %r8d,%edi
+ rorl $6,%r13d
+ movl %r8d,%ecx
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%ecx
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%ecx
+ movl 24(%rsi),%r12d
+ movl %r10d,%r13d
+ movl %ecx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+
+ xorl %r10d,%r13d
+ rorl $9,%r14d
+ xorl %eax,%r15d
+
+ movl %r12d,24(%rsp)
+ xorl %ecx,%r14d
+ andl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %eax,%r15d
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ addl %r15d,%r12d
+
+ movl %ecx,%r15d
+ addl (%rbp),%r12d
+ xorl %ecx,%r14d
+
+ xorl %edx,%r15d
+ rorl $6,%r13d
+ movl %edx,%ebx
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%ebx
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%ebx
+ movl 28(%rsi),%r12d
+ movl %r9d,%r13d
+ movl %ebx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r10d,%edi
+
+ xorl %r9d,%r13d
+ rorl $9,%r14d
+ xorl %r11d,%edi
+
+ movl %r12d,28(%rsp)
+ xorl %ebx,%r14d
+ andl %r9d,%edi
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %r11d,%edi
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ addl %edi,%r12d
+
+ movl %ebx,%edi
+ addl (%rbp),%r12d
+ xorl %ebx,%r14d
+
+ xorl %ecx,%edi
+ rorl $6,%r13d
+ movl %ecx,%eax
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%eax
+ addl %r12d,%r8d
+ addl %r12d,%eax
+
+ leaq 20(%rbp),%rbp
+ addl %r14d,%eax
+ movl 32(%rsi),%r12d
+ movl %r8d,%r13d
+ movl %eax,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+
+ xorl %r8d,%r13d
+ rorl $9,%r14d
+ xorl %r10d,%r15d
+
+ movl %r12d,32(%rsp)
+ xorl %eax,%r14d
+ andl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %r10d,%r15d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ addl %r15d,%r12d
+
+ movl %eax,%r15d
+ addl (%rbp),%r12d
+ xorl %eax,%r14d
+
+ xorl %ebx,%r15d
+ rorl $6,%r13d
+ movl %ebx,%r11d
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%r11d
+ addl %r12d,%edx
+ addl %r12d,%r11d
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%r11d
+ movl 36(%rsi),%r12d
+ movl %edx,%r13d
+ movl %r11d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r8d,%edi
+
+ xorl %edx,%r13d
+ rorl $9,%r14d
+ xorl %r9d,%edi
+
+ movl %r12d,36(%rsp)
+ xorl %r11d,%r14d
+ andl %edx,%edi
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r9d,%edi
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ addl %edi,%r12d
+
+ movl %r11d,%edi
+ addl (%rbp),%r12d
+ xorl %r11d,%r14d
+
+ xorl %eax,%edi
+ rorl $6,%r13d
+ movl %eax,%r10d
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%r10d
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%r10d
+ movl 40(%rsi),%r12d
+ movl %ecx,%r13d
+ movl %r10d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %edx,%r15d
+
+ xorl %ecx,%r13d
+ rorl $9,%r14d
+ xorl %r8d,%r15d
+
+ movl %r12d,40(%rsp)
+ xorl %r10d,%r14d
+ andl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r8d,%r15d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ addl %r15d,%r12d
+
+ movl %r10d,%r15d
+ addl (%rbp),%r12d
+ xorl %r10d,%r14d
+
+ xorl %r11d,%r15d
+ rorl $6,%r13d
+ movl %r11d,%r9d
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%r9d
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%r9d
+ movl 44(%rsi),%r12d
+ movl %ebx,%r13d
+ movl %r9d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ecx,%edi
+
+ xorl %ebx,%r13d
+ rorl $9,%r14d
+ xorl %edx,%edi
+
+ movl %r12d,44(%rsp)
+ xorl %r9d,%r14d
+ andl %ebx,%edi
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %edx,%edi
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ addl %edi,%r12d
+
+ movl %r9d,%edi
+ addl (%rbp),%r12d
+ xorl %r9d,%r14d
+
+ xorl %r10d,%edi
+ rorl $6,%r13d
+ movl %r10d,%r8d
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%r8d
+ addl %r12d,%eax
+ addl %r12d,%r8d
+
+ leaq 20(%rbp),%rbp
+ addl %r14d,%r8d
+ movl 48(%rsi),%r12d
+ movl %eax,%r13d
+ movl %r8d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+
+ xorl %eax,%r13d
+ rorl $9,%r14d
+ xorl %ecx,%r15d
+
+ movl %r12d,48(%rsp)
+ xorl %r8d,%r14d
+ andl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %ecx,%r15d
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ addl %r15d,%r12d
+
+ movl %r8d,%r15d
+ addl (%rbp),%r12d
+ xorl %r8d,%r14d
+
+ xorl %r9d,%r15d
+ rorl $6,%r13d
+ movl %r9d,%edx
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%edx
+ addl %r12d,%r11d
+ addl %r12d,%edx
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%edx
+ movl 52(%rsi),%r12d
+ movl %r11d,%r13d
+ movl %edx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %eax,%edi
+
+ xorl %r11d,%r13d
+ rorl $9,%r14d
+ xorl %ebx,%edi
+
+ movl %r12d,52(%rsp)
+ xorl %edx,%r14d
+ andl %r11d,%edi
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %ebx,%edi
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ addl %edi,%r12d
+
+ movl %edx,%edi
+ addl (%rbp),%r12d
+ xorl %edx,%r14d
+
+ xorl %r8d,%edi
+ rorl $6,%r13d
+ movl %r8d,%ecx
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%ecx
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%ecx
+ movl 56(%rsi),%r12d
+ movl %r10d,%r13d
+ movl %ecx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+
+ xorl %r10d,%r13d
+ rorl $9,%r14d
+ xorl %eax,%r15d
+
+ movl %r12d,56(%rsp)
+ xorl %ecx,%r14d
+ andl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %eax,%r15d
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ addl %r15d,%r12d
+
+ movl %ecx,%r15d
+ addl (%rbp),%r12d
+ xorl %ecx,%r14d
+
+ xorl %edx,%r15d
+ rorl $6,%r13d
+ movl %edx,%ebx
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%ebx
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%ebx
+ movl 60(%rsi),%r12d
+ movl %r9d,%r13d
+ movl %ebx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r10d,%edi
+
+ xorl %r9d,%r13d
+ rorl $9,%r14d
+ xorl %r11d,%edi
+
+ movl %r12d,60(%rsp)
+ xorl %ebx,%r14d
+ andl %r9d,%edi
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %r11d,%edi
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ addl %edi,%r12d
+
+ movl %ebx,%edi
+ addl (%rbp),%r12d
+ xorl %ebx,%r14d
+
+ xorl %ecx,%edi
+ rorl $6,%r13d
+ movl %ecx,%eax
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%eax
+ addl %r12d,%r8d
+ addl %r12d,%eax
+
+ leaq 20(%rbp),%rbp
+ jmp .Lrounds_16_xx
+.align 16
+.Lrounds_16_xx:
+ movl 4(%rsp),%r13d
+ movl 56(%rsp),%r15d
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%eax
+ movl %r15d,%r14d
+ rorl $2,%r15d
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ xorl %r13d,%r12d
+ xorl %r14d,%r15d
+ addl 36(%rsp),%r12d
+
+ addl 0(%rsp),%r12d
+ movl %r8d,%r13d
+ addl %r15d,%r12d
+ movl %eax,%r14d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+
+ xorl %r8d,%r13d
+ rorl $9,%r14d
+ xorl %r10d,%r15d
+
+ movl %r12d,0(%rsp)
+ xorl %eax,%r14d
+ andl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %r10d,%r15d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ addl %r15d,%r12d
+
+ movl %eax,%r15d
+ addl (%rbp),%r12d
+ xorl %eax,%r14d
+
+ xorl %ebx,%r15d
+ rorl $6,%r13d
+ movl %ebx,%r11d
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%r11d
+ addl %r12d,%edx
+ addl %r12d,%r11d
+
+ leaq 4(%rbp),%rbp
+ movl 8(%rsp),%r13d
+ movl 60(%rsp),%edi
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%r11d
+ movl %edi,%r14d
+ rorl $2,%edi
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%edi
+ shrl $10,%r14d
+
+ rorl $17,%edi
+ xorl %r13d,%r12d
+ xorl %r14d,%edi
+ addl 40(%rsp),%r12d
+
+ addl 4(%rsp),%r12d
+ movl %edx,%r13d
+ addl %edi,%r12d
+ movl %r11d,%r14d
+ rorl $14,%r13d
+ movl %r8d,%edi
+
+ xorl %edx,%r13d
+ rorl $9,%r14d
+ xorl %r9d,%edi
+
+ movl %r12d,4(%rsp)
+ xorl %r11d,%r14d
+ andl %edx,%edi
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r9d,%edi
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ addl %edi,%r12d
+
+ movl %r11d,%edi
+ addl (%rbp),%r12d
+ xorl %r11d,%r14d
+
+ xorl %eax,%edi
+ rorl $6,%r13d
+ movl %eax,%r10d
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%r10d
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+
+ leaq 4(%rbp),%rbp
+ movl 12(%rsp),%r13d
+ movl 0(%rsp),%r15d
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%r10d
+ movl %r15d,%r14d
+ rorl $2,%r15d
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ xorl %r13d,%r12d
+ xorl %r14d,%r15d
+ addl 44(%rsp),%r12d
+
+ addl 8(%rsp),%r12d
+ movl %ecx,%r13d
+ addl %r15d,%r12d
+ movl %r10d,%r14d
+ rorl $14,%r13d
+ movl %edx,%r15d
+
+ xorl %ecx,%r13d
+ rorl $9,%r14d
+ xorl %r8d,%r15d
+
+ movl %r12d,8(%rsp)
+ xorl %r10d,%r14d
+ andl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r8d,%r15d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ addl %r15d,%r12d
+
+ movl %r10d,%r15d
+ addl (%rbp),%r12d
+ xorl %r10d,%r14d
+
+ xorl %r11d,%r15d
+ rorl $6,%r13d
+ movl %r11d,%r9d
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%r9d
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+
+ leaq 4(%rbp),%rbp
+ movl 16(%rsp),%r13d
+ movl 4(%rsp),%edi
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%r9d
+ movl %edi,%r14d
+ rorl $2,%edi
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%edi
+ shrl $10,%r14d
+
+ rorl $17,%edi
+ xorl %r13d,%r12d
+ xorl %r14d,%edi
+ addl 48(%rsp),%r12d
+
+ addl 12(%rsp),%r12d
+ movl %ebx,%r13d
+ addl %edi,%r12d
+ movl %r9d,%r14d
+ rorl $14,%r13d
+ movl %ecx,%edi
+
+ xorl %ebx,%r13d
+ rorl $9,%r14d
+ xorl %edx,%edi
+
+ movl %r12d,12(%rsp)
+ xorl %r9d,%r14d
+ andl %ebx,%edi
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %edx,%edi
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ addl %edi,%r12d
+
+ movl %r9d,%edi
+ addl (%rbp),%r12d
+ xorl %r9d,%r14d
+
+ xorl %r10d,%edi
+ rorl $6,%r13d
+ movl %r10d,%r8d
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%r8d
+ addl %r12d,%eax
+ addl %r12d,%r8d
+
+ leaq 20(%rbp),%rbp
+ movl 20(%rsp),%r13d
+ movl 8(%rsp),%r15d
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%r8d
+ movl %r15d,%r14d
+ rorl $2,%r15d
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ xorl %r13d,%r12d
+ xorl %r14d,%r15d
+ addl 52(%rsp),%r12d
+
+ addl 16(%rsp),%r12d
+ movl %eax,%r13d
+ addl %r15d,%r12d
+ movl %r8d,%r14d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+
+ xorl %eax,%r13d
+ rorl $9,%r14d
+ xorl %ecx,%r15d
+
+ movl %r12d,16(%rsp)
+ xorl %r8d,%r14d
+ andl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %ecx,%r15d
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ addl %r15d,%r12d
+
+ movl %r8d,%r15d
+ addl (%rbp),%r12d
+ xorl %r8d,%r14d
+
+ xorl %r9d,%r15d
+ rorl $6,%r13d
+ movl %r9d,%edx
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%edx
+ addl %r12d,%r11d
+ addl %r12d,%edx
+
+ leaq 4(%rbp),%rbp
+ movl 24(%rsp),%r13d
+ movl 12(%rsp),%edi
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%edx
+ movl %edi,%r14d
+ rorl $2,%edi
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%edi
+ shrl $10,%r14d
+
+ rorl $17,%edi
+ xorl %r13d,%r12d
+ xorl %r14d,%edi
+ addl 56(%rsp),%r12d
+
+ addl 20(%rsp),%r12d
+ movl %r11d,%r13d
+ addl %edi,%r12d
+ movl %edx,%r14d
+ rorl $14,%r13d
+ movl %eax,%edi
+
+ xorl %r11d,%r13d
+ rorl $9,%r14d
+ xorl %ebx,%edi
+
+ movl %r12d,20(%rsp)
+ xorl %edx,%r14d
+ andl %r11d,%edi
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %ebx,%edi
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ addl %edi,%r12d
+
+ movl %edx,%edi
+ addl (%rbp),%r12d
+ xorl %edx,%r14d
+
+ xorl %r8d,%edi
+ rorl $6,%r13d
+ movl %r8d,%ecx
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%ecx
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+
+ leaq 4(%rbp),%rbp
+ movl 28(%rsp),%r13d
+ movl 16(%rsp),%r15d
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%ecx
+ movl %r15d,%r14d
+ rorl $2,%r15d
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ xorl %r13d,%r12d
+ xorl %r14d,%r15d
+ addl 60(%rsp),%r12d
+
+ addl 24(%rsp),%r12d
+ movl %r10d,%r13d
+ addl %r15d,%r12d
+ movl %ecx,%r14d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+
+ xorl %r10d,%r13d
+ rorl $9,%r14d
+ xorl %eax,%r15d
+
+ movl %r12d,24(%rsp)
+ xorl %ecx,%r14d
+ andl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %eax,%r15d
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ addl %r15d,%r12d
+
+ movl %ecx,%r15d
+ addl (%rbp),%r12d
+ xorl %ecx,%r14d
+
+ xorl %edx,%r15d
+ rorl $6,%r13d
+ movl %edx,%ebx
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%ebx
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+
+ leaq 4(%rbp),%rbp
+ movl 32(%rsp),%r13d
+ movl 20(%rsp),%edi
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%ebx
+ movl %edi,%r14d
+ rorl $2,%edi
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%edi
+ shrl $10,%r14d
+
+ rorl $17,%edi
+ xorl %r13d,%r12d
+ xorl %r14d,%edi
+ addl 0(%rsp),%r12d
+
+ addl 28(%rsp),%r12d
+ movl %r9d,%r13d
+ addl %edi,%r12d
+ movl %ebx,%r14d
+ rorl $14,%r13d
+ movl %r10d,%edi
+
+ xorl %r9d,%r13d
+ rorl $9,%r14d
+ xorl %r11d,%edi
+
+ movl %r12d,28(%rsp)
+ xorl %ebx,%r14d
+ andl %r9d,%edi
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %r11d,%edi
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ addl %edi,%r12d
+
+ movl %ebx,%edi
+ addl (%rbp),%r12d
+ xorl %ebx,%r14d
+
+ xorl %ecx,%edi
+ rorl $6,%r13d
+ movl %ecx,%eax
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%eax
+ addl %r12d,%r8d
+ addl %r12d,%eax
+
+ leaq 20(%rbp),%rbp
+ movl 36(%rsp),%r13d
+ movl 24(%rsp),%r15d
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%eax
+ movl %r15d,%r14d
+ rorl $2,%r15d
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ xorl %r13d,%r12d
+ xorl %r14d,%r15d
+ addl 4(%rsp),%r12d
+
+ addl 32(%rsp),%r12d
+ movl %r8d,%r13d
+ addl %r15d,%r12d
+ movl %eax,%r14d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+
+ xorl %r8d,%r13d
+ rorl $9,%r14d
+ xorl %r10d,%r15d
+
+ movl %r12d,32(%rsp)
+ xorl %eax,%r14d
+ andl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %r10d,%r15d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ addl %r15d,%r12d
+
+ movl %eax,%r15d
+ addl (%rbp),%r12d
+ xorl %eax,%r14d
+
+ xorl %ebx,%r15d
+ rorl $6,%r13d
+ movl %ebx,%r11d
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%r11d
+ addl %r12d,%edx
+ addl %r12d,%r11d
+
+ leaq 4(%rbp),%rbp
+ movl 40(%rsp),%r13d
+ movl 28(%rsp),%edi
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%r11d
+ movl %edi,%r14d
+ rorl $2,%edi
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%edi
+ shrl $10,%r14d
+
+ rorl $17,%edi
+ xorl %r13d,%r12d
+ xorl %r14d,%edi
+ addl 8(%rsp),%r12d
+
+ addl 36(%rsp),%r12d
+ movl %edx,%r13d
+ addl %edi,%r12d
+ movl %r11d,%r14d
+ rorl $14,%r13d
+ movl %r8d,%edi
+
+ xorl %edx,%r13d
+ rorl $9,%r14d
+ xorl %r9d,%edi
+
+ movl %r12d,36(%rsp)
+ xorl %r11d,%r14d
+ andl %edx,%edi
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r9d,%edi
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ addl %edi,%r12d
+
+ movl %r11d,%edi
+ addl (%rbp),%r12d
+ xorl %r11d,%r14d
+
+ xorl %eax,%edi
+ rorl $6,%r13d
+ movl %eax,%r10d
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%r10d
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+
+ leaq 4(%rbp),%rbp
+ movl 44(%rsp),%r13d
+ movl 32(%rsp),%r15d
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%r10d
+ movl %r15d,%r14d
+ rorl $2,%r15d
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ xorl %r13d,%r12d
+ xorl %r14d,%r15d
+ addl 12(%rsp),%r12d
+
+ addl 40(%rsp),%r12d
+ movl %ecx,%r13d
+ addl %r15d,%r12d
+ movl %r10d,%r14d
+ rorl $14,%r13d
+ movl %edx,%r15d
+
+ xorl %ecx,%r13d
+ rorl $9,%r14d
+ xorl %r8d,%r15d
+
+ movl %r12d,40(%rsp)
+ xorl %r10d,%r14d
+ andl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r8d,%r15d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ addl %r15d,%r12d
+
+ movl %r10d,%r15d
+ addl (%rbp),%r12d
+ xorl %r10d,%r14d
+
+ xorl %r11d,%r15d
+ rorl $6,%r13d
+ movl %r11d,%r9d
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%r9d
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+
+ leaq 4(%rbp),%rbp
+ movl 48(%rsp),%r13d
+ movl 36(%rsp),%edi
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%r9d
+ movl %edi,%r14d
+ rorl $2,%edi
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%edi
+ shrl $10,%r14d
+
+ rorl $17,%edi
+ xorl %r13d,%r12d
+ xorl %r14d,%edi
+ addl 16(%rsp),%r12d
+
+ addl 44(%rsp),%r12d
+ movl %ebx,%r13d
+ addl %edi,%r12d
+ movl %r9d,%r14d
+ rorl $14,%r13d
+ movl %ecx,%edi
+
+ xorl %ebx,%r13d
+ rorl $9,%r14d
+ xorl %edx,%edi
+
+ movl %r12d,44(%rsp)
+ xorl %r9d,%r14d
+ andl %ebx,%edi
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %edx,%edi
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ addl %edi,%r12d
+
+ movl %r9d,%edi
+ addl (%rbp),%r12d
+ xorl %r9d,%r14d
+
+ xorl %r10d,%edi
+ rorl $6,%r13d
+ movl %r10d,%r8d
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%r8d
+ addl %r12d,%eax
+ addl %r12d,%r8d
+
+ leaq 20(%rbp),%rbp
+ movl 52(%rsp),%r13d
+ movl 40(%rsp),%r15d
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%r8d
+ movl %r15d,%r14d
+ rorl $2,%r15d
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ xorl %r13d,%r12d
+ xorl %r14d,%r15d
+ addl 20(%rsp),%r12d
+
+ addl 48(%rsp),%r12d
+ movl %eax,%r13d
+ addl %r15d,%r12d
+ movl %r8d,%r14d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+
+ xorl %eax,%r13d
+ rorl $9,%r14d
+ xorl %ecx,%r15d
+
+ movl %r12d,48(%rsp)
+ xorl %r8d,%r14d
+ andl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %ecx,%r15d
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ addl %r15d,%r12d
+
+ movl %r8d,%r15d
+ addl (%rbp),%r12d
+ xorl %r8d,%r14d
+
+ xorl %r9d,%r15d
+ rorl $6,%r13d
+ movl %r9d,%edx
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%edx
+ addl %r12d,%r11d
+ addl %r12d,%edx
+
+ leaq 4(%rbp),%rbp
+ movl 56(%rsp),%r13d
+ movl 44(%rsp),%edi
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%edx
+ movl %edi,%r14d
+ rorl $2,%edi
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%edi
+ shrl $10,%r14d
+
+ rorl $17,%edi
+ xorl %r13d,%r12d
+ xorl %r14d,%edi
+ addl 24(%rsp),%r12d
+
+ addl 52(%rsp),%r12d
+ movl %r11d,%r13d
+ addl %edi,%r12d
+ movl %edx,%r14d
+ rorl $14,%r13d
+ movl %eax,%edi
+
+ xorl %r11d,%r13d
+ rorl $9,%r14d
+ xorl %ebx,%edi
+
+ movl %r12d,52(%rsp)
+ xorl %edx,%r14d
+ andl %r11d,%edi
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %ebx,%edi
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ addl %edi,%r12d
+
+ movl %edx,%edi
+ addl (%rbp),%r12d
+ xorl %edx,%r14d
+
+ xorl %r8d,%edi
+ rorl $6,%r13d
+ movl %r8d,%ecx
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%ecx
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+
+ leaq 4(%rbp),%rbp
+ movl 60(%rsp),%r13d
+ movl 48(%rsp),%r15d
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%ecx
+ movl %r15d,%r14d
+ rorl $2,%r15d
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ xorl %r13d,%r12d
+ xorl %r14d,%r15d
+ addl 28(%rsp),%r12d
+
+ addl 56(%rsp),%r12d
+ movl %r10d,%r13d
+ addl %r15d,%r12d
+ movl %ecx,%r14d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+
+ xorl %r10d,%r13d
+ rorl $9,%r14d
+ xorl %eax,%r15d
+
+ movl %r12d,56(%rsp)
+ xorl %ecx,%r14d
+ andl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %eax,%r15d
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ addl %r15d,%r12d
+
+ movl %ecx,%r15d
+ addl (%rbp),%r12d
+ xorl %ecx,%r14d
+
+ xorl %edx,%r15d
+ rorl $6,%r13d
+ movl %edx,%ebx
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%ebx
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+
+ leaq 4(%rbp),%rbp
+ movl 0(%rsp),%r13d
+ movl 52(%rsp),%edi
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%ebx
+ movl %edi,%r14d
+ rorl $2,%edi
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%edi
+ shrl $10,%r14d
+
+ rorl $17,%edi
+ xorl %r13d,%r12d
+ xorl %r14d,%edi
+ addl 32(%rsp),%r12d
+
+ addl 60(%rsp),%r12d
+ movl %r9d,%r13d
+ addl %edi,%r12d
+ movl %ebx,%r14d
+ rorl $14,%r13d
+ movl %r10d,%edi
+
+ xorl %r9d,%r13d
+ rorl $9,%r14d
+ xorl %r11d,%edi
+
+ movl %r12d,60(%rsp)
+ xorl %ebx,%r14d
+ andl %r9d,%edi
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %r11d,%edi
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ addl %edi,%r12d
+
+ movl %ebx,%edi
+ addl (%rbp),%r12d
+ xorl %ebx,%r14d
+
+ xorl %ecx,%edi
+ rorl $6,%r13d
+ movl %ecx,%eax
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%eax
+ addl %r12d,%r8d
+ addl %r12d,%eax
+
+ leaq 20(%rbp),%rbp
+ cmpb $0,3(%rbp)
+ jnz .Lrounds_16_xx
+
+ movq 64+0(%rsp),%rdi
+ addl %r14d,%eax
+ leaq 64(%rsi),%rsi
+
+ addl 0(%rdi),%eax
+ addl 4(%rdi),%ebx
+ addl 8(%rdi),%ecx
+ addl 12(%rdi),%edx
+ addl 16(%rdi),%r8d
+ addl 20(%rdi),%r9d
+ addl 24(%rdi),%r10d
+ addl 28(%rdi),%r11d
+
+ cmpq 64+16(%rsp),%rsi
+
+ movl %eax,0(%rdi)
+ movl %ebx,4(%rdi)
+ movl %ecx,8(%rdi)
+ movl %edx,12(%rdi)
+ movl %r8d,16(%rdi)
+ movl %r9d,20(%rdi)
+ movl %r10d,24(%rdi)
+ movl %r11d,28(%rdi)
+ jb .Lloop
+
+ movq 64+24(%rsp),%rsi
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Lepilogue:
+ .byte 0xf3,0xc3
+.size sha256_block_data_order,.-sha256_block_data_order
+.align 64
+.type K256,@object
+K256:
+.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff
+.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff
+.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908
+.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908
+.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.type sha256_block_data_order_ssse3,@function
+.align 64
+sha256_block_data_order_ssse3:
+.Lssse3_shortcut:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ movq %rsp,%r11
+ shlq $4,%rdx
+ subq $96,%rsp
+ leaq (%rsi,%rdx,4),%rdx
+ andq $-64,%rsp
+ movq %rdi,64+0(%rsp)
+ movq %rsi,64+8(%rsp)
+ movq %rdx,64+16(%rsp)
+ movq %r11,64+24(%rsp)
+.Lprologue_ssse3:
+
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+ movl 16(%rdi),%r8d
+ movl 20(%rdi),%r9d
+ movl 24(%rdi),%r10d
+ movl 28(%rdi),%r11d
+
+
+ jmp .Lloop_ssse3
+.align 16
+.Lloop_ssse3:
+ movdqa K256+512(%rip),%xmm7
+ movdqu 0(%rsi),%xmm0
+ movdqu 16(%rsi),%xmm1
+ movdqu 32(%rsi),%xmm2
+ movdqu 48(%rsi),%xmm3
+.byte 102,15,56,0,199
+ leaq K256(%rip),%rbp
+.byte 102,15,56,0,207
+ movdqa 0(%rbp),%xmm4
+.byte 102,15,56,0,215
+ movdqa 32(%rbp),%xmm5
+ paddd %xmm0,%xmm4
+ movdqa 64(%rbp),%xmm6
+.byte 102,15,56,0,223
+ movdqa 96(%rbp),%xmm7
+ paddd %xmm1,%xmm5
+ paddd %xmm2,%xmm6
+ paddd %xmm3,%xmm7
+ movdqa %xmm4,0(%rsp)
+ movl %eax,%r14d
+ movdqa %xmm5,16(%rsp)
+ movl %ebx,%edi
+ movdqa %xmm6,32(%rsp)
+ xorl %ecx,%edi
+ movdqa %xmm7,48(%rsp)
+ movl %r8d,%r13d
+ jmp .Lssse3_00_47
+
+.align 16
+.Lssse3_00_47:
+ subq $-32*4,%rbp
+ rorl $14,%r13d
+ movdqa %xmm1,%xmm4
+ movl %r14d,%eax
+ movl %r9d,%r12d
+ movdqa %xmm3,%xmm7
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r12d
+ rorl $5,%r13d
+ xorl %eax,%r14d
+.byte 102,15,58,15,224,4
+ andl %r8d,%r12d
+ xorl %r8d,%r13d
+.byte 102,15,58,15,250,4
+ addl 0(%rsp),%r11d
+ movl %eax,%r15d
+ xorl %r10d,%r12d
+ rorl $11,%r14d
+ movdqa %xmm4,%xmm5
+ xorl %ebx,%r15d
+ addl %r12d,%r11d
+ movdqa %xmm4,%xmm6
+ rorl $6,%r13d
+ andl %r15d,%edi
+ psrld $3,%xmm4
+ xorl %eax,%r14d
+ addl %r13d,%r11d
+ xorl %ebx,%edi
+ paddd %xmm7,%xmm0
+ rorl $2,%r14d
+ addl %r11d,%edx
+ psrld $7,%xmm6
+ addl %edi,%r11d
+ movl %edx,%r13d
+ pshufd $250,%xmm3,%xmm7
+ addl %r11d,%r14d
+ rorl $14,%r13d
+ pslld $14,%xmm5
+ movl %r14d,%r11d
+ movl %r8d,%r12d
+ pxor %xmm6,%xmm4
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r12d
+ rorl $5,%r13d
+ psrld $11,%xmm6
+ xorl %r11d,%r14d
+ pxor %xmm5,%xmm4
+ andl %edx,%r12d
+ xorl %edx,%r13d
+ pslld $11,%xmm5
+ addl 4(%rsp),%r10d
+ movl %r11d,%edi
+ pxor %xmm6,%xmm4
+ xorl %r9d,%r12d
+ rorl $11,%r14d
+ movdqa %xmm7,%xmm6
+ xorl %eax,%edi
+ addl %r12d,%r10d
+ pxor %xmm5,%xmm4
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %r11d,%r14d
+ psrld $10,%xmm7
+ addl %r13d,%r10d
+ xorl %eax,%r15d
+ paddd %xmm4,%xmm0
+ rorl $2,%r14d
+ addl %r10d,%ecx
+ psrlq $17,%xmm6
+ addl %r15d,%r10d
+ movl %ecx,%r13d
+ addl %r10d,%r14d
+ pxor %xmm6,%xmm7
+ rorl $14,%r13d
+ movl %r14d,%r10d
+ movl %edx,%r12d
+ rorl $9,%r14d
+ psrlq $2,%xmm6
+ xorl %ecx,%r13d
+ xorl %r8d,%r12d
+ pxor %xmm6,%xmm7
+ rorl $5,%r13d
+ xorl %r10d,%r14d
+ andl %ecx,%r12d
+ pshufd $128,%xmm7,%xmm7
+ xorl %ecx,%r13d
+ addl 8(%rsp),%r9d
+ movl %r10d,%r15d
+ psrldq $8,%xmm7
+ xorl %r8d,%r12d
+ rorl $11,%r14d
+ xorl %r11d,%r15d
+ addl %r12d,%r9d
+ rorl $6,%r13d
+ paddd %xmm7,%xmm0
+ andl %r15d,%edi
+ xorl %r10d,%r14d
+ addl %r13d,%r9d
+ pshufd $80,%xmm0,%xmm7
+ xorl %r11d,%edi
+ rorl $2,%r14d
+ addl %r9d,%ebx
+ movdqa %xmm7,%xmm6
+ addl %edi,%r9d
+ movl %ebx,%r13d
+ psrld $10,%xmm7
+ addl %r9d,%r14d
+ rorl $14,%r13d
+ psrlq $17,%xmm6
+ movl %r14d,%r9d
+ movl %ecx,%r12d
+ pxor %xmm6,%xmm7
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r12d
+ rorl $5,%r13d
+ xorl %r9d,%r14d
+ psrlq $2,%xmm6
+ andl %ebx,%r12d
+ xorl %ebx,%r13d
+ addl 12(%rsp),%r8d
+ pxor %xmm6,%xmm7
+ movl %r9d,%edi
+ xorl %edx,%r12d
+ rorl $11,%r14d
+ pshufd $8,%xmm7,%xmm7
+ xorl %r10d,%edi
+ addl %r12d,%r8d
+ movdqa 0(%rbp),%xmm6
+ rorl $6,%r13d
+ andl %edi,%r15d
+ pslldq $8,%xmm7
+ xorl %r9d,%r14d
+ addl %r13d,%r8d
+ xorl %r10d,%r15d
+ paddd %xmm7,%xmm0
+ rorl $2,%r14d
+ addl %r8d,%eax
+ addl %r15d,%r8d
+ paddd %xmm0,%xmm6
+ movl %eax,%r13d
+ addl %r8d,%r14d
+ movdqa %xmm6,0(%rsp)
+ rorl $14,%r13d
+ movdqa %xmm2,%xmm4
+ movl %r14d,%r8d
+ movl %ebx,%r12d
+ movdqa %xmm0,%xmm7
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r12d
+ rorl $5,%r13d
+ xorl %r8d,%r14d
+.byte 102,15,58,15,225,4
+ andl %eax,%r12d
+ xorl %eax,%r13d
+.byte 102,15,58,15,251,4
+ addl 16(%rsp),%edx
+ movl %r8d,%r15d
+ xorl %ecx,%r12d
+ rorl $11,%r14d
+ movdqa %xmm4,%xmm5
+ xorl %r9d,%r15d
+ addl %r12d,%edx
+ movdqa %xmm4,%xmm6
+ rorl $6,%r13d
+ andl %r15d,%edi
+ psrld $3,%xmm4
+ xorl %r8d,%r14d
+ addl %r13d,%edx
+ xorl %r9d,%edi
+ paddd %xmm7,%xmm1
+ rorl $2,%r14d
+ addl %edx,%r11d
+ psrld $7,%xmm6
+ addl %edi,%edx
+ movl %r11d,%r13d
+ pshufd $250,%xmm0,%xmm7
+ addl %edx,%r14d
+ rorl $14,%r13d
+ pslld $14,%xmm5
+ movl %r14d,%edx
+ movl %eax,%r12d
+ pxor %xmm6,%xmm4
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r12d
+ rorl $5,%r13d
+ psrld $11,%xmm6
+ xorl %edx,%r14d
+ pxor %xmm5,%xmm4
+ andl %r11d,%r12d
+ xorl %r11d,%r13d
+ pslld $11,%xmm5
+ addl 20(%rsp),%ecx
+ movl %edx,%edi
+ pxor %xmm6,%xmm4
+ xorl %ebx,%r12d
+ rorl $11,%r14d
+ movdqa %xmm7,%xmm6
+ xorl %r8d,%edi
+ addl %r12d,%ecx
+ pxor %xmm5,%xmm4
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %edx,%r14d
+ psrld $10,%xmm7
+ addl %r13d,%ecx
+ xorl %r8d,%r15d
+ paddd %xmm4,%xmm1
+ rorl $2,%r14d
+ addl %ecx,%r10d
+ psrlq $17,%xmm6
+ addl %r15d,%ecx
+ movl %r10d,%r13d
+ addl %ecx,%r14d
+ pxor %xmm6,%xmm7
+ rorl $14,%r13d
+ movl %r14d,%ecx
+ movl %r11d,%r12d
+ rorl $9,%r14d
+ psrlq $2,%xmm6
+ xorl %r10d,%r13d
+ xorl %eax,%r12d
+ pxor %xmm6,%xmm7
+ rorl $5,%r13d
+ xorl %ecx,%r14d
+ andl %r10d,%r12d
+ pshufd $128,%xmm7,%xmm7
+ xorl %r10d,%r13d
+ addl 24(%rsp),%ebx
+ movl %ecx,%r15d
+ psrldq $8,%xmm7
+ xorl %eax,%r12d
+ rorl $11,%r14d
+ xorl %edx,%r15d
+ addl %r12d,%ebx
+ rorl $6,%r13d
+ paddd %xmm7,%xmm1
+ andl %r15d,%edi
+ xorl %ecx,%r14d
+ addl %r13d,%ebx
+ pshufd $80,%xmm1,%xmm7
+ xorl %edx,%edi
+ rorl $2,%r14d
+ addl %ebx,%r9d
+ movdqa %xmm7,%xmm6
+ addl %edi,%ebx
+ movl %r9d,%r13d
+ psrld $10,%xmm7
+ addl %ebx,%r14d
+ rorl $14,%r13d
+ psrlq $17,%xmm6
+ movl %r14d,%ebx
+ movl %r10d,%r12d
+ pxor %xmm6,%xmm7
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r12d
+ rorl $5,%r13d
+ xorl %ebx,%r14d
+ psrlq $2,%xmm6
+ andl %r9d,%r12d
+ xorl %r9d,%r13d
+ addl 28(%rsp),%eax
+ pxor %xmm6,%xmm7
+ movl %ebx,%edi
+ xorl %r11d,%r12d
+ rorl $11,%r14d
+ pshufd $8,%xmm7,%xmm7
+ xorl %ecx,%edi
+ addl %r12d,%eax
+ movdqa 32(%rbp),%xmm6
+ rorl $6,%r13d
+ andl %edi,%r15d
+ pslldq $8,%xmm7
+ xorl %ebx,%r14d
+ addl %r13d,%eax
+ xorl %ecx,%r15d
+ paddd %xmm7,%xmm1
+ rorl $2,%r14d
+ addl %eax,%r8d
+ addl %r15d,%eax
+ paddd %xmm1,%xmm6
+ movl %r8d,%r13d
+ addl %eax,%r14d
+ movdqa %xmm6,16(%rsp)
+ rorl $14,%r13d
+ movdqa %xmm3,%xmm4
+ movl %r14d,%eax
+ movl %r9d,%r12d
+ movdqa %xmm1,%xmm7
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r12d
+ rorl $5,%r13d
+ xorl %eax,%r14d
+.byte 102,15,58,15,226,4
+ andl %r8d,%r12d
+ xorl %r8d,%r13d
+.byte 102,15,58,15,248,4
+ addl 32(%rsp),%r11d
+ movl %eax,%r15d
+ xorl %r10d,%r12d
+ rorl $11,%r14d
+ movdqa %xmm4,%xmm5
+ xorl %ebx,%r15d
+ addl %r12d,%r11d
+ movdqa %xmm4,%xmm6
+ rorl $6,%r13d
+ andl %r15d,%edi
+ psrld $3,%xmm4
+ xorl %eax,%r14d
+ addl %r13d,%r11d
+ xorl %ebx,%edi
+ paddd %xmm7,%xmm2
+ rorl $2,%r14d
+ addl %r11d,%edx
+ psrld $7,%xmm6
+ addl %edi,%r11d
+ movl %edx,%r13d
+ pshufd $250,%xmm1,%xmm7
+ addl %r11d,%r14d
+ rorl $14,%r13d
+ pslld $14,%xmm5
+ movl %r14d,%r11d
+ movl %r8d,%r12d
+ pxor %xmm6,%xmm4
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r12d
+ rorl $5,%r13d
+ psrld $11,%xmm6
+ xorl %r11d,%r14d
+ pxor %xmm5,%xmm4
+ andl %edx,%r12d
+ xorl %edx,%r13d
+ pslld $11,%xmm5
+ addl 36(%rsp),%r10d
+ movl %r11d,%edi
+ pxor %xmm6,%xmm4
+ xorl %r9d,%r12d
+ rorl $11,%r14d
+ movdqa %xmm7,%xmm6
+ xorl %eax,%edi
+ addl %r12d,%r10d
+ pxor %xmm5,%xmm4
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %r11d,%r14d
+ psrld $10,%xmm7
+ addl %r13d,%r10d
+ xorl %eax,%r15d
+ paddd %xmm4,%xmm2
+ rorl $2,%r14d
+ addl %r10d,%ecx
+ psrlq $17,%xmm6
+ addl %r15d,%r10d
+ movl %ecx,%r13d
+ addl %r10d,%r14d
+ pxor %xmm6,%xmm7
+ rorl $14,%r13d
+ movl %r14d,%r10d
+ movl %edx,%r12d
+ rorl $9,%r14d
+ psrlq $2,%xmm6
+ xorl %ecx,%r13d
+ xorl %r8d,%r12d
+ pxor %xmm6,%xmm7
+ rorl $5,%r13d
+ xorl %r10d,%r14d
+ andl %ecx,%r12d
+ pshufd $128,%xmm7,%xmm7
+ xorl %ecx,%r13d
+ addl 40(%rsp),%r9d
+ movl %r10d,%r15d
+ psrldq $8,%xmm7
+ xorl %r8d,%r12d
+ rorl $11,%r14d
+ xorl %r11d,%r15d
+ addl %r12d,%r9d
+ rorl $6,%r13d
+ paddd %xmm7,%xmm2
+ andl %r15d,%edi
+ xorl %r10d,%r14d
+ addl %r13d,%r9d
+ pshufd $80,%xmm2,%xmm7
+ xorl %r11d,%edi
+ rorl $2,%r14d
+ addl %r9d,%ebx
+ movdqa %xmm7,%xmm6
+ addl %edi,%r9d
+ movl %ebx,%r13d
+ psrld $10,%xmm7
+ addl %r9d,%r14d
+ rorl $14,%r13d
+ psrlq $17,%xmm6
+ movl %r14d,%r9d
+ movl %ecx,%r12d
+ pxor %xmm6,%xmm7
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r12d
+ rorl $5,%r13d
+ xorl %r9d,%r14d
+ psrlq $2,%xmm6
+ andl %ebx,%r12d
+ xorl %ebx,%r13d
+ addl 44(%rsp),%r8d
+ pxor %xmm6,%xmm7
+ movl %r9d,%edi
+ xorl %edx,%r12d
+ rorl $11,%r14d
+ pshufd $8,%xmm7,%xmm7
+ xorl %r10d,%edi
+ addl %r12d,%r8d
+ movdqa 64(%rbp),%xmm6
+ rorl $6,%r13d
+ andl %edi,%r15d
+ pslldq $8,%xmm7
+ xorl %r9d,%r14d
+ addl %r13d,%r8d
+ xorl %r10d,%r15d
+ paddd %xmm7,%xmm2
+ rorl $2,%r14d
+ addl %r8d,%eax
+ addl %r15d,%r8d
+ paddd %xmm2,%xmm6
+ movl %eax,%r13d
+ addl %r8d,%r14d
+ movdqa %xmm6,32(%rsp)
+ rorl $14,%r13d
+ movdqa %xmm0,%xmm4
+ movl %r14d,%r8d
+ movl %ebx,%r12d
+ movdqa %xmm2,%xmm7
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r12d
+ rorl $5,%r13d
+ xorl %r8d,%r14d
+.byte 102,15,58,15,227,4
+ andl %eax,%r12d
+ xorl %eax,%r13d
+.byte 102,15,58,15,249,4
+ addl 48(%rsp),%edx
+ movl %r8d,%r15d
+ xorl %ecx,%r12d
+ rorl $11,%r14d
+ movdqa %xmm4,%xmm5
+ xorl %r9d,%r15d
+ addl %r12d,%edx
+ movdqa %xmm4,%xmm6
+ rorl $6,%r13d
+ andl %r15d,%edi
+ psrld $3,%xmm4
+ xorl %r8d,%r14d
+ addl %r13d,%edx
+ xorl %r9d,%edi
+ paddd %xmm7,%xmm3
+ rorl $2,%r14d
+ addl %edx,%r11d
+ psrld $7,%xmm6
+ addl %edi,%edx
+ movl %r11d,%r13d
+ pshufd $250,%xmm2,%xmm7
+ addl %edx,%r14d
+ rorl $14,%r13d
+ pslld $14,%xmm5
+ movl %r14d,%edx
+ movl %eax,%r12d
+ pxor %xmm6,%xmm4
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r12d
+ rorl $5,%r13d
+ psrld $11,%xmm6
+ xorl %edx,%r14d
+ pxor %xmm5,%xmm4
+ andl %r11d,%r12d
+ xorl %r11d,%r13d
+ pslld $11,%xmm5
+ addl 52(%rsp),%ecx
+ movl %edx,%edi
+ pxor %xmm6,%xmm4
+ xorl %ebx,%r12d
+ rorl $11,%r14d
+ movdqa %xmm7,%xmm6
+ xorl %r8d,%edi
+ addl %r12d,%ecx
+ pxor %xmm5,%xmm4
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %edx,%r14d
+ psrld $10,%xmm7
+ addl %r13d,%ecx
+ xorl %r8d,%r15d
+ paddd %xmm4,%xmm3
+ rorl $2,%r14d
+ addl %ecx,%r10d
+ psrlq $17,%xmm6
+ addl %r15d,%ecx
+ movl %r10d,%r13d
+ addl %ecx,%r14d
+ pxor %xmm6,%xmm7
+ rorl $14,%r13d
+ movl %r14d,%ecx
+ movl %r11d,%r12d
+ rorl $9,%r14d
+ psrlq $2,%xmm6
+ xorl %r10d,%r13d
+ xorl %eax,%r12d
+ pxor %xmm6,%xmm7
+ rorl $5,%r13d
+ xorl %ecx,%r14d
+ andl %r10d,%r12d
+ pshufd $128,%xmm7,%xmm7
+ xorl %r10d,%r13d
+ addl 56(%rsp),%ebx
+ movl %ecx,%r15d
+ psrldq $8,%xmm7
+ xorl %eax,%r12d
+ rorl $11,%r14d
+ xorl %edx,%r15d
+ addl %r12d,%ebx
+ rorl $6,%r13d
+ paddd %xmm7,%xmm3
+ andl %r15d,%edi
+ xorl %ecx,%r14d
+ addl %r13d,%ebx
+ pshufd $80,%xmm3,%xmm7
+ xorl %edx,%edi
+ rorl $2,%r14d
+ addl %ebx,%r9d
+ movdqa %xmm7,%xmm6
+ addl %edi,%ebx
+ movl %r9d,%r13d
+ psrld $10,%xmm7
+ addl %ebx,%r14d
+ rorl $14,%r13d
+ psrlq $17,%xmm6
+ movl %r14d,%ebx
+ movl %r10d,%r12d
+ pxor %xmm6,%xmm7
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r12d
+ rorl $5,%r13d
+ xorl %ebx,%r14d
+ psrlq $2,%xmm6
+ andl %r9d,%r12d
+ xorl %r9d,%r13d
+ addl 60(%rsp),%eax
+ pxor %xmm6,%xmm7
+ movl %ebx,%edi
+ xorl %r11d,%r12d
+ rorl $11,%r14d
+ pshufd $8,%xmm7,%xmm7
+ xorl %ecx,%edi
+ addl %r12d,%eax
+ movdqa 96(%rbp),%xmm6
+ rorl $6,%r13d
+ andl %edi,%r15d
+ pslldq $8,%xmm7
+ xorl %ebx,%r14d
+ addl %r13d,%eax
+ xorl %ecx,%r15d
+ paddd %xmm7,%xmm3
+ rorl $2,%r14d
+ addl %eax,%r8d
+ addl %r15d,%eax
+ paddd %xmm3,%xmm6
+ movl %r8d,%r13d
+ addl %eax,%r14d
+ movdqa %xmm6,48(%rsp)
+ cmpb $0,131(%rbp)
+ jne .Lssse3_00_47
+ rorl $14,%r13d
+ movl %r14d,%eax
+ movl %r9d,%r12d
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r12d
+ rorl $5,%r13d
+ xorl %eax,%r14d
+ andl %r8d,%r12d
+ xorl %r8d,%r13d
+ addl 0(%rsp),%r11d
+ movl %eax,%r15d
+ xorl %r10d,%r12d
+ rorl $11,%r14d
+ xorl %ebx,%r15d
+ addl %r12d,%r11d
+ rorl $6,%r13d
+ andl %r15d,%edi
+ xorl %eax,%r14d
+ addl %r13d,%r11d
+ xorl %ebx,%edi
+ rorl $2,%r14d
+ addl %r11d,%edx
+ addl %edi,%r11d
+ movl %edx,%r13d
+ addl %r11d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r11d
+ movl %r8d,%r12d
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r12d
+ rorl $5,%r13d
+ xorl %r11d,%r14d
+ andl %edx,%r12d
+ xorl %edx,%r13d
+ addl 4(%rsp),%r10d
+ movl %r11d,%edi
+ xorl %r9d,%r12d
+ rorl $11,%r14d
+ xorl %eax,%edi
+ addl %r12d,%r10d
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %r11d,%r14d
+ addl %r13d,%r10d
+ xorl %eax,%r15d
+ rorl $2,%r14d
+ addl %r10d,%ecx
+ addl %r15d,%r10d
+ movl %ecx,%r13d
+ addl %r10d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r10d
+ movl %edx,%r12d
+ rorl $9,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r12d
+ rorl $5,%r13d
+ xorl %r10d,%r14d
+ andl %ecx,%r12d
+ xorl %ecx,%r13d
+ addl 8(%rsp),%r9d
+ movl %r10d,%r15d
+ xorl %r8d,%r12d
+ rorl $11,%r14d
+ xorl %r11d,%r15d
+ addl %r12d,%r9d
+ rorl $6,%r13d
+ andl %r15d,%edi
+ xorl %r10d,%r14d
+ addl %r13d,%r9d
+ xorl %r11d,%edi
+ rorl $2,%r14d
+ addl %r9d,%ebx
+ addl %edi,%r9d
+ movl %ebx,%r13d
+ addl %r9d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r9d
+ movl %ecx,%r12d
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r12d
+ rorl $5,%r13d
+ xorl %r9d,%r14d
+ andl %ebx,%r12d
+ xorl %ebx,%r13d
+ addl 12(%rsp),%r8d
+ movl %r9d,%edi
+ xorl %edx,%r12d
+ rorl $11,%r14d
+ xorl %r10d,%edi
+ addl %r12d,%r8d
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %r9d,%r14d
+ addl %r13d,%r8d
+ xorl %r10d,%r15d
+ rorl $2,%r14d
+ addl %r8d,%eax
+ addl %r15d,%r8d
+ movl %eax,%r13d
+ addl %r8d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r8d
+ movl %ebx,%r12d
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r12d
+ rorl $5,%r13d
+ xorl %r8d,%r14d
+ andl %eax,%r12d
+ xorl %eax,%r13d
+ addl 16(%rsp),%edx
+ movl %r8d,%r15d
+ xorl %ecx,%r12d
+ rorl $11,%r14d
+ xorl %r9d,%r15d
+ addl %r12d,%edx
+ rorl $6,%r13d
+ andl %r15d,%edi
+ xorl %r8d,%r14d
+ addl %r13d,%edx
+ xorl %r9d,%edi
+ rorl $2,%r14d
+ addl %edx,%r11d
+ addl %edi,%edx
+ movl %r11d,%r13d
+ addl %edx,%r14d
+ rorl $14,%r13d
+ movl %r14d,%edx
+ movl %eax,%r12d
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r12d
+ rorl $5,%r13d
+ xorl %edx,%r14d
+ andl %r11d,%r12d
+ xorl %r11d,%r13d
+ addl 20(%rsp),%ecx
+ movl %edx,%edi
+ xorl %ebx,%r12d
+ rorl $11,%r14d
+ xorl %r8d,%edi
+ addl %r12d,%ecx
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %edx,%r14d
+ addl %r13d,%ecx
+ xorl %r8d,%r15d
+ rorl $2,%r14d
+ addl %ecx,%r10d
+ addl %r15d,%ecx
+ movl %r10d,%r13d
+ addl %ecx,%r14d
+ rorl $14,%r13d
+ movl %r14d,%ecx
+ movl %r11d,%r12d
+ rorl $9,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r12d
+ rorl $5,%r13d
+ xorl %ecx,%r14d
+ andl %r10d,%r12d
+ xorl %r10d,%r13d
+ addl 24(%rsp),%ebx
+ movl %ecx,%r15d
+ xorl %eax,%r12d
+ rorl $11,%r14d
+ xorl %edx,%r15d
+ addl %r12d,%ebx
+ rorl $6,%r13d
+ andl %r15d,%edi
+ xorl %ecx,%r14d
+ addl %r13d,%ebx
+ xorl %edx,%edi
+ rorl $2,%r14d
+ addl %ebx,%r9d
+ addl %edi,%ebx
+ movl %r9d,%r13d
+ addl %ebx,%r14d
+ rorl $14,%r13d
+ movl %r14d,%ebx
+ movl %r10d,%r12d
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r12d
+ rorl $5,%r13d
+ xorl %ebx,%r14d
+ andl %r9d,%r12d
+ xorl %r9d,%r13d
+ addl 28(%rsp),%eax
+ movl %ebx,%edi
+ xorl %r11d,%r12d
+ rorl $11,%r14d
+ xorl %ecx,%edi
+ addl %r12d,%eax
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %ebx,%r14d
+ addl %r13d,%eax
+ xorl %ecx,%r15d
+ rorl $2,%r14d
+ addl %eax,%r8d
+ addl %r15d,%eax
+ movl %r8d,%r13d
+ addl %eax,%r14d
+ rorl $14,%r13d
+ movl %r14d,%eax
+ movl %r9d,%r12d
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r12d
+ rorl $5,%r13d
+ xorl %eax,%r14d
+ andl %r8d,%r12d
+ xorl %r8d,%r13d
+ addl 32(%rsp),%r11d
+ movl %eax,%r15d
+ xorl %r10d,%r12d
+ rorl $11,%r14d
+ xorl %ebx,%r15d
+ addl %r12d,%r11d
+ rorl $6,%r13d
+ andl %r15d,%edi
+ xorl %eax,%r14d
+ addl %r13d,%r11d
+ xorl %ebx,%edi
+ rorl $2,%r14d
+ addl %r11d,%edx
+ addl %edi,%r11d
+ movl %edx,%r13d
+ addl %r11d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r11d
+ movl %r8d,%r12d
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r12d
+ rorl $5,%r13d
+ xorl %r11d,%r14d
+ andl %edx,%r12d
+ xorl %edx,%r13d
+ addl 36(%rsp),%r10d
+ movl %r11d,%edi
+ xorl %r9d,%r12d
+ rorl $11,%r14d
+ xorl %eax,%edi
+ addl %r12d,%r10d
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %r11d,%r14d
+ addl %r13d,%r10d
+ xorl %eax,%r15d
+ rorl $2,%r14d
+ addl %r10d,%ecx
+ addl %r15d,%r10d
+ movl %ecx,%r13d
+ addl %r10d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r10d
+ movl %edx,%r12d
+ rorl $9,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r12d
+ rorl $5,%r13d
+ xorl %r10d,%r14d
+ andl %ecx,%r12d
+ xorl %ecx,%r13d
+ addl 40(%rsp),%r9d
+ movl %r10d,%r15d
+ xorl %r8d,%r12d
+ rorl $11,%r14d
+ xorl %r11d,%r15d
+ addl %r12d,%r9d
+ rorl $6,%r13d
+ andl %r15d,%edi
+ xorl %r10d,%r14d
+ addl %r13d,%r9d
+ xorl %r11d,%edi
+ rorl $2,%r14d
+ addl %r9d,%ebx
+ addl %edi,%r9d
+ movl %ebx,%r13d
+ addl %r9d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r9d
+ movl %ecx,%r12d
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r12d
+ rorl $5,%r13d
+ xorl %r9d,%r14d
+ andl %ebx,%r12d
+ xorl %ebx,%r13d
+ addl 44(%rsp),%r8d
+ movl %r9d,%edi
+ xorl %edx,%r12d
+ rorl $11,%r14d
+ xorl %r10d,%edi
+ addl %r12d,%r8d
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %r9d,%r14d
+ addl %r13d,%r8d
+ xorl %r10d,%r15d
+ rorl $2,%r14d
+ addl %r8d,%eax
+ addl %r15d,%r8d
+ movl %eax,%r13d
+ addl %r8d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r8d
+ movl %ebx,%r12d
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r12d
+ rorl $5,%r13d
+ xorl %r8d,%r14d
+ andl %eax,%r12d
+ xorl %eax,%r13d
+ addl 48(%rsp),%edx
+ movl %r8d,%r15d
+ xorl %ecx,%r12d
+ rorl $11,%r14d
+ xorl %r9d,%r15d
+ addl %r12d,%edx
+ rorl $6,%r13d
+ andl %r15d,%edi
+ xorl %r8d,%r14d
+ addl %r13d,%edx
+ xorl %r9d,%edi
+ rorl $2,%r14d
+ addl %edx,%r11d
+ addl %edi,%edx
+ movl %r11d,%r13d
+ addl %edx,%r14d
+ rorl $14,%r13d
+ movl %r14d,%edx
+ movl %eax,%r12d
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r12d
+ rorl $5,%r13d
+ xorl %edx,%r14d
+ andl %r11d,%r12d
+ xorl %r11d,%r13d
+ addl 52(%rsp),%ecx
+ movl %edx,%edi
+ xorl %ebx,%r12d
+ rorl $11,%r14d
+ xorl %r8d,%edi
+ addl %r12d,%ecx
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %edx,%r14d
+ addl %r13d,%ecx
+ xorl %r8d,%r15d
+ rorl $2,%r14d
+ addl %ecx,%r10d
+ addl %r15d,%ecx
+ movl %r10d,%r13d
+ addl %ecx,%r14d
+ rorl $14,%r13d
+ movl %r14d,%ecx
+ movl %r11d,%r12d
+ rorl $9,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r12d
+ rorl $5,%r13d
+ xorl %ecx,%r14d
+ andl %r10d,%r12d
+ xorl %r10d,%r13d
+ addl 56(%rsp),%ebx
+ movl %ecx,%r15d
+ xorl %eax,%r12d
+ rorl $11,%r14d
+ xorl %edx,%r15d
+ addl %r12d,%ebx
+ rorl $6,%r13d
+ andl %r15d,%edi
+ xorl %ecx,%r14d
+ addl %r13d,%ebx
+ xorl %edx,%edi
+ rorl $2,%r14d
+ addl %ebx,%r9d
+ addl %edi,%ebx
+ movl %r9d,%r13d
+ addl %ebx,%r14d
+ rorl $14,%r13d
+ movl %r14d,%ebx
+ movl %r10d,%r12d
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r12d
+ rorl $5,%r13d
+ xorl %ebx,%r14d
+ andl %r9d,%r12d
+ xorl %r9d,%r13d
+ addl 60(%rsp),%eax
+ movl %ebx,%edi
+ xorl %r11d,%r12d
+ rorl $11,%r14d
+ xorl %ecx,%edi
+ addl %r12d,%eax
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %ebx,%r14d
+ addl %r13d,%eax
+ xorl %ecx,%r15d
+ rorl $2,%r14d
+ addl %eax,%r8d
+ addl %r15d,%eax
+ movl %r8d,%r13d
+ addl %eax,%r14d
+ movq 64+0(%rsp),%rdi
+ movl %r14d,%eax
+
+ addl 0(%rdi),%eax
+ leaq 64(%rsi),%rsi
+ addl 4(%rdi),%ebx
+ addl 8(%rdi),%ecx
+ addl 12(%rdi),%edx
+ addl 16(%rdi),%r8d
+ addl 20(%rdi),%r9d
+ addl 24(%rdi),%r10d
+ addl 28(%rdi),%r11d
+
+ cmpq 64+16(%rsp),%rsi
+
+ movl %eax,0(%rdi)
+ movl %ebx,4(%rdi)
+ movl %ecx,8(%rdi)
+ movl %edx,12(%rdi)
+ movl %r8d,16(%rdi)
+ movl %r9d,20(%rdi)
+ movl %r10d,24(%rdi)
+ movl %r11d,28(%rdi)
+ jb .Lloop_ssse3
+
+ movq 64+24(%rsp),%rsi
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+.Lepilogue_ssse3:
+ .byte 0xf3,0xc3
+.size sha256_block_data_order_ssse3,.-sha256_block_data_order_ssse3
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/files.mk b/lib/accelerated/x86/files.mk
new file mode 100644
index 0000000000..c22d758387
--- /dev/null
+++ b/lib/accelerated/x86/files.mk
@@ -0,0 +1,6 @@
+X86_FILES_ELF=elf/aesni-x86.s elf/cpuid-x86.s elf/e_padlock-x86.s elf/sha1-ssse3-x86.s elf/sha256-ssse3-x86.s elf/sha512-ssse3-x86.s
+X86_FILES_COFF=coff/aesni-x86.s coff/cpuid-x86.s coff/e_padlock-x86.s coff/sha1-ssse3-x86.s coff/sha256-ssse3-x86.s coff/sha512-ssse3-x86.s
+X86_FILES_MACOSX=macosx/aesni-x86.s macosx/cpuid-x86.s macosx/e_padlock-x86.s macosx/sha1-ssse3-x86.s macosx/sha256-ssse3-x86.s macosx/sha512-ssse3-x86.s
+X86_64_FILES_ELF=elf/aesni-x86_64.s elf/cpuid-x86_64.s elf/e_padlock-x86_64.s elf/ghash-x86_64.s elf/sha1-ssse3-x86_64.s elf/sha512-ssse3-x86_64.s
+X86_64_FILES_COFF=coff/aesni-x86_64.s coff/cpuid-x86_64.s coff/e_padlock-x86_64.s coff/ghash-x86_64.s coff/sha1-ssse3-x86_64.s coff/sha512-ssse3-x86_64.s
+X86_64_FILES_MACOSX=macosx/aesni-x86_64.s macosx/cpuid-x86_64.s macosx/e_padlock-x86_64.s macosx/ghash-x86_64.s macosx/sha1-ssse3-x86_64.s macosx/sha512-ssse3-x86_64.s
diff --git a/lib/accelerated/x86/hmac-x86.c b/lib/accelerated/x86/hmac-x86.c
new file mode 100644
index 0000000000..73b21cc25d
--- /dev/null
+++ b/lib/accelerated/x86/hmac-x86.c
@@ -0,0 +1,300 @@
+/*
+ * Copyright (C) 2008, 2010-2012 Free Software Foundation, Inc.
+ *
+ * Author: Nikos Mavrogiannopoulos
+ *
+ * This file is part of GNUTLS.
+ *
+ * The GNUTLS library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>
+ *
+ */
+
+/* This file provides the backend hash/mac implementation for
+ * VIA Padlock hardware acceleration.
+ */
+
+#include <gnutls_int.h>
+#include <gnutls_hash_int.h>
+#include <gnutls_errors.h>
+#include <nettle/sha.h>
+#include <nettle/hmac.h>
+#include <nettle/macros.h>
+#include <aes-x86.h>
+#include <sha-x86.h>
+#include <algorithms.h>
+
+#ifdef HAVE_LIBNETTLE
+
+typedef void (*update_func) (void *, unsigned, const uint8_t *);
+typedef void (*digest_func) (void *, unsigned, uint8_t *);
+typedef void (*set_key_func) (void *, unsigned, const uint8_t *);
+
+struct x86_hmac_ctx {
+ union {
+ struct hmac_sha1_ctx sha1;
+ struct hmac_sha224_ctx sha224;
+ struct hmac_sha256_ctx sha256;
+#ifdef ENABLE_SHA512
+ struct hmac_sha384_ctx sha384;
+ struct hmac_sha512_ctx sha512;
+#endif
+ } ctx;
+
+ void *ctx_ptr;
+ gnutls_mac_algorithm_t algo;
+ size_t length;
+ update_func update;
+ digest_func digest;
+ set_key_func setkey;
+};
+
+static void
+x86_hmac_sha1_set_key(struct hmac_sha1_ctx *ctx,
+ unsigned key_length, const uint8_t * key)
+{
+ HMAC_SET_KEY(ctx, &x86_sha1, key_length, key);
+}
+
+static void
+x86_hmac_sha1_update(struct hmac_sha1_ctx *ctx,
+ unsigned length, const uint8_t * data)
+{
+ x86_sha1_update(&ctx->state, length, data);
+}
+
+static void
+x86_hmac_sha1_digest(struct hmac_sha1_ctx *ctx,
+ unsigned length, uint8_t * digest)
+{
+ HMAC_DIGEST(ctx, &x86_sha1, length, digest);
+}
+
+static void
+x86_hmac_sha256_set_key(struct hmac_sha256_ctx *ctx,
+ unsigned key_length, const uint8_t * key)
+{
+ HMAC_SET_KEY(ctx, &x86_sha256, key_length, key);
+}
+
+static void
+x86_hmac_sha256_update(struct hmac_sha256_ctx *ctx,
+ unsigned length, const uint8_t * data)
+{
+ x86_sha256_update(&ctx->state, length, data);
+}
+
+static void
+x86_hmac_sha256_digest(struct hmac_sha256_ctx *ctx,
+ unsigned length, uint8_t * digest)
+{
+ HMAC_DIGEST(ctx, &x86_sha256, length, digest);
+}
+
+static void
+x86_hmac_sha224_set_key(struct hmac_sha224_ctx *ctx,
+ unsigned key_length, const uint8_t * key)
+{
+ HMAC_SET_KEY(ctx, &x86_sha224, key_length, key);
+}
+
+static void
+x86_hmac_sha224_digest(struct hmac_sha224_ctx *ctx,
+ unsigned length, uint8_t * digest)
+{
+ HMAC_DIGEST(ctx, &x86_sha224, length, digest);
+}
+
+#ifdef ENABLE_SHA512
+static void
+x86_hmac_sha384_set_key(struct hmac_sha384_ctx *ctx,
+ unsigned key_length, const uint8_t * key)
+{
+ HMAC_SET_KEY(ctx, &x86_sha384, key_length, key);
+}
+
+static void
+x86_hmac_sha384_digest(struct hmac_sha384_ctx *ctx,
+ unsigned length, uint8_t * digest)
+{
+ HMAC_DIGEST(ctx, &x86_sha384, length, digest);
+}
+
+static void
+x86_hmac_sha512_set_key(struct hmac_sha512_ctx *ctx,
+ unsigned key_length, const uint8_t * key)
+{
+ HMAC_SET_KEY(ctx, &x86_sha512, key_length, key);
+}
+
+static void
+x86_hmac_sha512_update(struct hmac_sha512_ctx *ctx,
+ unsigned length, const uint8_t * data)
+{
+ x86_sha512_update(&ctx->state, length, data);
+}
+
+static void
+x86_hmac_sha512_digest(struct hmac_sha512_ctx *ctx,
+ unsigned length, uint8_t * digest)
+{
+ HMAC_DIGEST(ctx, &x86_sha512, length, digest);
+}
+#endif
+
+static int
+_hmac_ctx_init(gnutls_mac_algorithm_t algo, struct x86_hmac_ctx *ctx)
+{
+ switch (algo) {
+ case GNUTLS_MAC_SHA1:
+ ctx->update = (update_func) x86_hmac_sha1_update;
+ ctx->digest = (digest_func) x86_hmac_sha1_digest;
+ ctx->setkey = (set_key_func) x86_hmac_sha1_set_key;
+ ctx->ctx_ptr = &ctx->ctx.sha1;
+ ctx->length = SHA1_DIGEST_SIZE;
+ break;
+ case GNUTLS_MAC_SHA224:
+ ctx->update = (update_func) x86_hmac_sha256_update;
+ ctx->digest = (digest_func) x86_hmac_sha224_digest;
+ ctx->setkey = (set_key_func) x86_hmac_sha224_set_key;
+ ctx->ctx_ptr = &ctx->ctx.sha224;
+ ctx->length = SHA224_DIGEST_SIZE;
+ break;
+ case GNUTLS_MAC_SHA256:
+ ctx->update = (update_func) x86_hmac_sha256_update;
+ ctx->digest = (digest_func) x86_hmac_sha256_digest;
+ ctx->setkey = (set_key_func) x86_hmac_sha256_set_key;
+ ctx->ctx_ptr = &ctx->ctx.sha256;
+ ctx->length = SHA256_DIGEST_SIZE;
+ break;
+#ifdef ENABLE_SHA512
+ case GNUTLS_MAC_SHA384:
+ ctx->update = (update_func) x86_hmac_sha512_update;
+ ctx->digest = (digest_func) x86_hmac_sha384_digest;
+ ctx->setkey = (set_key_func) x86_hmac_sha384_set_key;
+ ctx->ctx_ptr = &ctx->ctx.sha384;
+ ctx->length = SHA384_DIGEST_SIZE;
+ break;
+ case GNUTLS_MAC_SHA512:
+ ctx->update = (update_func) x86_hmac_sha512_update;
+ ctx->digest = (digest_func) x86_hmac_sha512_digest;
+ ctx->setkey = (set_key_func) x86_hmac_sha512_set_key;
+ ctx->ctx_ptr = &ctx->ctx.sha512;
+ ctx->length = SHA512_DIGEST_SIZE;
+ break;
+#endif
+ default:
+ gnutls_assert();
+ return GNUTLS_E_INVALID_REQUEST;
+ }
+
+ return 0;
+}
+
+
+static int wrap_x86_hmac_init(gnutls_mac_algorithm_t algo, void **_ctx)
+{
+ struct x86_hmac_ctx *ctx;
+ int ret;
+
+ ctx = gnutls_calloc(1, sizeof(struct x86_hmac_ctx));
+ if (ctx == NULL) {
+ gnutls_assert();
+ return GNUTLS_E_MEMORY_ERROR;
+ }
+
+ ctx->algo = algo;
+
+ ret = _hmac_ctx_init(algo, ctx);
+ if (ret < 0)
+ return gnutls_assert_val(ret);
+
+ *_ctx = ctx;
+
+ return 0;
+}
+
+static int
+wrap_x86_hmac_setkey(void *_ctx, const void *key, size_t keylen)
+{
+ struct x86_hmac_ctx *ctx = _ctx;
+
+ ctx->setkey(ctx->ctx_ptr, keylen, key);
+
+ return GNUTLS_E_SUCCESS;
+}
+
+static int
+wrap_x86_hmac_update(void *_ctx, const void *text, size_t textsize)
+{
+ struct x86_hmac_ctx *ctx = _ctx;
+
+ ctx->update(ctx->ctx_ptr, textsize, text);
+
+ return GNUTLS_E_SUCCESS;
+}
+
+static int
+wrap_x86_hmac_output(void *src_ctx, void *digest, size_t digestsize)
+{
+ struct x86_hmac_ctx *ctx;
+ ctx = src_ctx;
+
+ if (digestsize < ctx->length) {
+ gnutls_assert();
+ return GNUTLS_E_SHORT_MEMORY_BUFFER;
+ }
+
+ ctx->digest(ctx->ctx_ptr, digestsize, digest);
+
+ return 0;
+}
+
+static void wrap_x86_hmac_deinit(void *hd)
+{
+ gnutls_free(hd);
+}
+
+static int wrap_x86_hmac_fast(gnutls_mac_algorithm_t algo,
+ const void *nonce, size_t nonce_size,
+ const void *key, size_t key_size,
+ const void *text, size_t text_size,
+ void *digest)
+{
+ struct x86_hmac_ctx ctx;
+ int ret;
+
+ ret = _hmac_ctx_init(algo, &ctx);
+ if (ret < 0)
+ return gnutls_assert_val(ret);
+
+ ctx.setkey(&ctx, key_size, key);
+ ctx.update(&ctx, text_size, text);
+ ctx.digest(&ctx, ctx.length, digest);
+
+ zeroize_temp_key(&ctx, sizeof(ctx));
+
+ return 0;
+}
+
+const gnutls_crypto_mac_st hmac_sha_x86_struct = {
+ .init = wrap_x86_hmac_init,
+ .setkey = wrap_x86_hmac_setkey,
+ .setnonce = NULL,
+ .hash = wrap_x86_hmac_update,
+ .output = wrap_x86_hmac_output,
+ .deinit = wrap_x86_hmac_deinit,
+ .fast = wrap_x86_hmac_fast,
+};
+
+#endif /* HAVE_LIBNETTLE */
diff --git a/lib/accelerated/x86/macosx/appro-aes-x86-macosx.s b/lib/accelerated/x86/macosx/aesni-x86.s
index 92313599bd..4cb2d98af9 100644
--- a/lib/accelerated/x86/macosx/appro-aes-x86-macosx.s
+++ b/lib/accelerated/x86/macosx/aesni-x86.s
@@ -1,4 +1,3 @@
-/*
# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
# All rights reserved.
#
@@ -38,7 +37,6 @@
#
# *** This file is auto-generated ***
#
-*/
.file "devel/perlasm/aesni-x86.s"
.text
.globl _aesni_encrypt
@@ -2146,3 +2144,5 @@ L100dec_key_ret:
.byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
.byte 115,108,46,111,114,103,62,0
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/macosx/appro-aes-x86-64-macosx.s b/lib/accelerated/x86/macosx/aesni-x86_64.s
index e2cfa17951..85f26e05d4 100644
--- a/lib/accelerated/x86/macosx/appro-aes-x86-64-macosx.s
+++ b/lib/accelerated/x86/macosx/aesni-x86_64.s
@@ -1,4 +1,3 @@
-/*
# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
# All rights reserved.
#
@@ -38,7 +37,6 @@
#
# *** This file is auto-generated ***
#
-*/
.text
.globl _aesni_encrypt
@@ -2973,3 +2971,5 @@ L$increment1:
.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.p2align 6
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/macosx/cpuid-x86-64-macosx.s b/lib/accelerated/x86/macosx/cpuid-x86-64-macosx.s
deleted file mode 100644
index 26adc5b445..0000000000
--- a/lib/accelerated/x86/macosx/cpuid-x86-64-macosx.s
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
-# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-# * Redistributions of source code must retain copyright notices,
-# this list of conditions and the following disclaimer.
-#
-# * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following
-# disclaimer in the documentation and/or other materials
-# provided with the distribution.
-#
-# * Neither the name of the Andy Polyakov nor the names of its
-# copyright holder and contributors may be used to endorse or
-# promote products derived from this software without specific
-# prior written permission.
-#
-# ALTERNATIVELY, provided that this notice is retained in full, this
-# product may be distributed under the terms of the GNU General Public
-# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
-# those given above.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-# *** This file is auto-generated ***
-#
-*/
-.text
-.globl _gnutls_cpuid
-
-.p2align 4
-_gnutls_cpuid:
- pushq %rbp
- movq %rsp,%rbp
- pushq %rbx
- movl %edi,-12(%rbp)
- movq %rsi,-24(%rbp)
- movq %rdx,-32(%rbp)
- movq %rcx,-40(%rbp)
- movq %r8,-48(%rbp)
- movl -12(%rbp),%eax
- movl %eax,-60(%rbp)
- movl -60(%rbp),%eax
- cpuid
- movl %edx,-56(%rbp)
- movl %ecx,%esi
- movl %eax,-52(%rbp)
- movq -24(%rbp),%rax
- movl -52(%rbp),%edx
- movl %edx,(%rax)
- movq -32(%rbp),%rax
- movl %ebx,(%rax)
- movq -40(%rbp),%rax
- movl %esi,(%rax)
- movq -48(%rbp),%rax
- movl -56(%rbp),%ecx
- movl %ecx,(%rax)
- popq %rbx
- leave
- .byte 0xf3,0xc3
-
diff --git a/lib/accelerated/x86/macosx/cpuid-x86-macosx.s b/lib/accelerated/x86/macosx/cpuid-x86-macosx.s
deleted file mode 100644
index 0078f3d06f..0000000000
--- a/lib/accelerated/x86/macosx/cpuid-x86-macosx.s
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
-# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-# * Redistributions of source code must retain copyright notices,
-# this list of conditions and the following disclaimer.
-#
-# * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following
-# disclaimer in the documentation and/or other materials
-# provided with the distribution.
-#
-# * Neither the name of the Andy Polyakov nor the names of its
-# copyright holder and contributors may be used to endorse or
-# promote products derived from this software without specific
-# prior written permission.
-#
-# ALTERNATIVELY, provided that this notice is retained in full, this
-# product may be distributed under the terms of the GNU General Public
-# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
-# those given above.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-# *** This file is auto-generated ***
-#
-*/
-.file "devel/perlasm/cpuid-x86.s"
-.text
-.globl _gnutls_cpuid
-.align 4
-_gnutls_cpuid:
-L_gnutls_cpuid_begin:
- pushl %ebp
- movl %esp,%ebp
- subl $12,%esp
- movl %ebx,(%esp)
- movl 8(%ebp),%eax
- movl %esi,4(%esp)
- movl %edi,8(%esp)
- pushl %ebx
- .byte 0x0f,0xa2
- movl %ebx,%edi
- popl %ebx
- movl %edx,%esi
- movl 12(%ebp),%edx
- movl %eax,(%edx)
- movl 16(%ebp),%eax
- movl %edi,(%eax)
- movl 20(%ebp),%eax
- movl %ecx,(%eax)
- movl 24(%ebp),%eax
- movl %esi,(%eax)
- movl (%esp),%ebx
- movl 4(%esp),%esi
- movl 8(%esp),%edi
- movl %ebp,%esp
- popl %ebp
- ret
-.globl _gnutls_have_cpuid
-.align 4
-_gnutls_have_cpuid:
-L_gnutls_have_cpuid_begin:
- pushfl
- popl %eax
- orl $2097152,%eax
- pushl %eax
- popfl
- pushfl
- popl %eax
- andl $2097152,%eax
- ret
-.byte 67,80,85,73,68,32,102,111,114,32,120,56,54,0
diff --git a/lib/accelerated/x86/macosx/cpuid-x86.s b/lib/accelerated/x86/macosx/cpuid-x86.s
new file mode 100644
index 0000000000..978b232e74
--- /dev/null
+++ b/lib/accelerated/x86/macosx/cpuid-x86.s
@@ -0,0 +1,70 @@
+#
+# Copyright (C) 2011-2012 Free Software Foundation, Inc.
+#
+# Author: Nikos Mavrogiannopoulos
+#
+# This file is part of GnuTLS.
+#
+# The GnuTLS is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public License
+# as published by the Free Software Foundation; either version 2.1 of
+# the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# *** This file is auto-generated ***
+#
+.file "devel/perlasm/cpuid-x86.s"
+.text
+.globl _gnutls_cpuid
+.align 4
+_gnutls_cpuid:
+L_gnutls_cpuid_begin:
+ pushl %ebp
+ movl %esp,%ebp
+ subl $12,%esp
+ movl %ebx,(%esp)
+ movl 8(%ebp),%eax
+ movl %esi,4(%esp)
+ movl %edi,8(%esp)
+ pushl %ebx
+ .byte 0x0f,0xa2
+ movl %ebx,%edi
+ popl %ebx
+ movl %edx,%esi
+ movl 12(%ebp),%edx
+ movl %eax,(%edx)
+ movl 16(%ebp),%eax
+ movl %edi,(%eax)
+ movl 20(%ebp),%eax
+ movl %ecx,(%eax)
+ movl 24(%ebp),%eax
+ movl %esi,(%eax)
+ movl (%esp),%ebx
+ movl 4(%esp),%esi
+ movl 8(%esp),%edi
+ movl %ebp,%esp
+ popl %ebp
+ ret
+.globl _gnutls_have_cpuid
+.align 4
+_gnutls_have_cpuid:
+L_gnutls_have_cpuid_begin:
+ pushfl
+ popl %eax
+ orl $2097152,%eax
+ pushl %eax
+ popfl
+ pushfl
+ popl %eax
+ andl $2097152,%eax
+ ret
+.byte 67,80,85,73,68,32,102,111,114,32,120,56,54,0
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/macosx/cpuid-x86_64.s b/lib/accelerated/x86/macosx/cpuid-x86_64.s
new file mode 100644
index 0000000000..cf8fea99a2
--- /dev/null
+++ b/lib/accelerated/x86/macosx/cpuid-x86_64.s
@@ -0,0 +1,58 @@
+#
+# Copyright (C) 2011-2012 Free Software Foundation, Inc.
+#
+# Author: Nikos Mavrogiannopoulos
+#
+# This file is part of GnuTLS.
+#
+# The GnuTLS is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public License
+# as published by the Free Software Foundation; either version 2.1 of
+# the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# *** This file is auto-generated ***
+#
+.text
+.globl _gnutls_cpuid
+
+.p2align 4
+_gnutls_cpuid:
+ pushq %rbp
+ movq %rsp,%rbp
+ pushq %rbx
+ movl %edi,-12(%rbp)
+ movq %rsi,-24(%rbp)
+ movq %rdx,-32(%rbp)
+ movq %rcx,-40(%rbp)
+ movq %r8,-48(%rbp)
+ movl -12(%rbp),%eax
+ movl %eax,-60(%rbp)
+ movl -60(%rbp),%eax
+ cpuid
+ movl %edx,-56(%rbp)
+ movl %ecx,%esi
+ movl %eax,-52(%rbp)
+ movq -24(%rbp),%rax
+ movl -52(%rbp),%edx
+ movl %edx,(%rax)
+ movq -32(%rbp),%rax
+ movl %ebx,(%rax)
+ movq -40(%rbp),%rax
+ movl %esi,(%rax)
+ movq -48(%rbp),%rax
+ movl -56(%rbp),%ecx
+ movl %ecx,(%rax)
+ popq %rbx
+ leave
+ .byte 0xf3,0xc3
+
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/macosx/padlock-x86-macosx.s b/lib/accelerated/x86/macosx/e_padlock-x86.s
index 1a2fa9246b..4bdadb99a1 100644
--- a/lib/accelerated/x86/macosx/padlock-x86-macosx.s
+++ b/lib/accelerated/x86/macosx/e_padlock-x86.s
@@ -1,4 +1,3 @@
-/*
# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
# All rights reserved.
#
@@ -38,7 +37,6 @@
#
# *** This file is auto-generated ***
#
-*/
.file "devel/perlasm/e_padlock-x86.s"
.text
.globl _padlock_capability
@@ -1034,3 +1032,5 @@ L_padlock_sha512_blocks_begin:
.align 2,0x90
Lpadlock_saved_context:
.long 0
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/macosx/padlock-x86-64-macosx.s b/lib/accelerated/x86/macosx/e_padlock-x86_64.s
index 1327e82172..29723b3714 100644
--- a/lib/accelerated/x86/macosx/padlock-x86-64-macosx.s
+++ b/lib/accelerated/x86/macosx/e_padlock-x86_64.s
@@ -1,4 +1,3 @@
-/*
# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
# All rights reserved.
#
@@ -38,7 +37,6 @@
#
# *** This file is auto-generated ***
#
-*/
.text
.globl _padlock_capability
@@ -1065,3 +1063,5 @@ L$ctr32_abort:
.p2align 3
L$padlock_saved_context:
.quad 0
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/macosx/appro-aes-gcm-x86-64-macosx.s b/lib/accelerated/x86/macosx/ghash-x86_64.s
index eac88aeba1..2f5ac653ab 100644
--- a/lib/accelerated/x86/macosx/appro-aes-gcm-x86-64-macosx.s
+++ b/lib/accelerated/x86/macosx/ghash-x86_64.s
@@ -1,4 +1,3 @@
-/*
# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
# All rights reserved.
#
@@ -38,7 +37,6 @@
#
# *** This file is auto-generated ***
#
-*/
.text
.globl _gcm_gmult_4bit
@@ -1348,3 +1346,5 @@ L$rem_8bit:
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.p2align 6
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/macosx/openssl-cpuid-x86.s b/lib/accelerated/x86/macosx/openssl-cpuid-x86.s
new file mode 100644
index 0000000000..ba4f09c67b
--- /dev/null
+++ b/lib/accelerated/x86/macosx/openssl-cpuid-x86.s
@@ -0,0 +1,399 @@
+# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain copyright notices,
+# this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+# * Neither the name of the Andy Polyakov nor the names of its
+# copyright holder and contributors may be used to endorse or
+# promote products derived from this software without specific
+# prior written permission.
+#
+# ALTERNATIVELY, provided that this notice is retained in full, this
+# product may be distributed under the terms of the GNU General Public
+# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+# those given above.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *** This file is auto-generated ***
+#
+.file "x86cpuid.s"
+.text
+.globl _OPENSSL_ia32_cpuid
+.align 4
+_OPENSSL_ia32_cpuid:
+L_OPENSSL_ia32_cpuid_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ xorl %edx,%edx
+ pushfl
+ popl %eax
+ movl %eax,%ecx
+ xorl $2097152,%eax
+ pushl %eax
+ popfl
+ pushfl
+ popl %eax
+ xorl %eax,%ecx
+ xorl %eax,%eax
+ btl $21,%ecx
+ jnc L000nocpuid
+ movl 20(%esp),%esi
+ movl %eax,8(%esi)
+ .byte 0x0f,0xa2
+ movl %eax,%edi
+ xorl %eax,%eax
+ cmpl $1970169159,%ebx
+ setne %al
+ movl %eax,%ebp
+ cmpl $1231384169,%edx
+ setne %al
+ orl %eax,%ebp
+ cmpl $1818588270,%ecx
+ setne %al
+ orl %eax,%ebp
+ jz L001intel
+ cmpl $1752462657,%ebx
+ setne %al
+ movl %eax,%esi
+ cmpl $1769238117,%edx
+ setne %al
+ orl %eax,%esi
+ cmpl $1145913699,%ecx
+ setne %al
+ orl %eax,%esi
+ jnz L001intel
+ movl $2147483648,%eax
+ .byte 0x0f,0xa2
+ cmpl $2147483649,%eax
+ jb L001intel
+ movl %eax,%esi
+ movl $2147483649,%eax
+ .byte 0x0f,0xa2
+ orl %ecx,%ebp
+ andl $2049,%ebp
+ cmpl $2147483656,%esi
+ jb L001intel
+ movl $2147483656,%eax
+ .byte 0x0f,0xa2
+ movzbl %cl,%esi
+ incl %esi
+ movl $1,%eax
+ xorl %ecx,%ecx
+ .byte 0x0f,0xa2
+ btl $28,%edx
+ jnc L002generic
+ shrl $16,%ebx
+ andl $255,%ebx
+ cmpl %esi,%ebx
+ ja L002generic
+ andl $4026531839,%edx
+ jmp L002generic
+L001intel:
+ cmpl $7,%edi
+ jb L003cacheinfo
+ movl 20(%esp),%esi
+ movl $7,%eax
+ xorl %ecx,%ecx
+ .byte 0x0f,0xa2
+ movl %ebx,8(%esi)
+L003cacheinfo:
+ cmpl $4,%edi
+ movl $-1,%edi
+ jb L004nocacheinfo
+ movl $4,%eax
+ movl $0,%ecx
+ .byte 0x0f,0xa2
+ movl %eax,%edi
+ shrl $14,%edi
+ andl $4095,%edi
+L004nocacheinfo:
+ movl $1,%eax
+ xorl %ecx,%ecx
+ .byte 0x0f,0xa2
+ andl $3220176895,%edx
+ cmpl $0,%ebp
+ jne L005notintel
+ orl $1073741824,%edx
+ andb $15,%ah
+ cmpb $15,%ah
+ jne L005notintel
+ orl $1048576,%edx
+L005notintel:
+ btl $28,%edx
+ jnc L002generic
+ andl $4026531839,%edx
+ cmpl $0,%edi
+ je L002generic
+ orl $268435456,%edx
+ shrl $16,%ebx
+ cmpb $1,%bl
+ ja L002generic
+ andl $4026531839,%edx
+L002generic:
+ andl $2048,%ebp
+ andl $4294965247,%ecx
+ movl %edx,%esi
+ orl %ecx,%ebp
+ btl $27,%ecx
+ jnc L006clear_avx
+ xorl %ecx,%ecx
+.byte 15,1,208
+ andl $6,%eax
+ cmpl $6,%eax
+ je L007done
+ cmpl $2,%eax
+ je L006clear_avx
+L008clear_xmm:
+ andl $4261412861,%ebp
+ andl $4278190079,%esi
+L006clear_avx:
+ andl $4026525695,%ebp
+ movl 20(%esp),%edi
+ andl $4294967263,8(%edi)
+L007done:
+ movl %esi,%eax
+ movl %ebp,%edx
+L000nocpuid:
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.globl _OPENSSL_rdtsc
+.align 4
+_OPENSSL_rdtsc:
+L_OPENSSL_rdtsc_begin:
+ xorl %eax,%eax
+ xorl %edx,%edx
+ call L009PIC_me_up
+L009PIC_me_up:
+ popl %ecx
+ movl L__gnutls_x86_cpuid_s$non_lazy_ptr-L009PIC_me_up(%ecx),%ecx
+ btl $4,(%ecx)
+ jnc L010notsc
+ .byte 0x0f,0x31
+L010notsc:
+ ret
+.globl _OPENSSL_instrument_halt
+.align 4
+_OPENSSL_instrument_halt:
+L_OPENSSL_instrument_halt_begin:
+ call L011PIC_me_up
+L011PIC_me_up:
+ popl %ecx
+ movl L__gnutls_x86_cpuid_s$non_lazy_ptr-L011PIC_me_up(%ecx),%ecx
+ btl $4,(%ecx)
+ jnc L012nohalt
+.long 2421723150
+ andl $3,%eax
+ jnz L012nohalt
+ pushfl
+ popl %eax
+ btl $9,%eax
+ jnc L012nohalt
+ .byte 0x0f,0x31
+ pushl %edx
+ pushl %eax
+ hlt
+ .byte 0x0f,0x31
+ subl (%esp),%eax
+ sbbl 4(%esp),%edx
+ addl $8,%esp
+ ret
+L012nohalt:
+ xorl %eax,%eax
+ xorl %edx,%edx
+ ret
+.globl _OPENSSL_far_spin
+.align 4
+_OPENSSL_far_spin:
+L_OPENSSL_far_spin_begin:
+ pushfl
+ popl %eax
+ btl $9,%eax
+ jnc L013nospin
+ movl 4(%esp),%eax
+ movl 8(%esp),%ecx
+.long 2430111262
+ xorl %eax,%eax
+ movl (%ecx),%edx
+ jmp L014spin
+.align 4,0x90
+L014spin:
+ incl %eax
+ cmpl (%ecx),%edx
+ je L014spin
+.long 529567888
+ ret
+L013nospin:
+ xorl %eax,%eax
+ xorl %edx,%edx
+ ret
+.globl _OPENSSL_wipe_cpu
+.align 4
+_OPENSSL_wipe_cpu:
+L_OPENSSL_wipe_cpu_begin:
+ xorl %eax,%eax
+ xorl %edx,%edx
+ call L015PIC_me_up
+L015PIC_me_up:
+ popl %ecx
+ movl L__gnutls_x86_cpuid_s$non_lazy_ptr-L015PIC_me_up(%ecx),%ecx
+ movl (%ecx),%ecx
+ btl $1,(%ecx)
+ jnc L016no_x87
+.long 4007259865,4007259865,4007259865,4007259865,2430851995
+L016no_x87:
+ leal 4(%esp),%eax
+ ret
+.globl _OPENSSL_atomic_add
+.align 4
+_OPENSSL_atomic_add:
+L_OPENSSL_atomic_add_begin:
+ movl 4(%esp),%edx
+ movl 8(%esp),%ecx
+ pushl %ebx
+ nop
+ movl (%edx),%eax
+L017spin:
+ leal (%eax,%ecx,1),%ebx
+ nop
+.long 447811568
+ jne L017spin
+ movl %ebx,%eax
+ popl %ebx
+ ret
+.globl _OPENSSL_indirect_call
+.align 4
+_OPENSSL_indirect_call:
+L_OPENSSL_indirect_call_begin:
+ pushl %ebp
+ movl %esp,%ebp
+ subl $28,%esp
+ movl 12(%ebp),%ecx
+ movl %ecx,(%esp)
+ movl 16(%ebp),%edx
+ movl %edx,4(%esp)
+ movl 20(%ebp),%eax
+ movl %eax,8(%esp)
+ movl 24(%ebp),%eax
+ movl %eax,12(%esp)
+ movl 28(%ebp),%eax
+ movl %eax,16(%esp)
+ movl 32(%ebp),%eax
+ movl %eax,20(%esp)
+ movl 36(%ebp),%eax
+ movl %eax,24(%esp)
+ call *8(%ebp)
+ movl %ebp,%esp
+ popl %ebp
+ ret
+.globl _OPENSSL_cleanse
+.align 4
+_OPENSSL_cleanse:
+L_OPENSSL_cleanse_begin:
+ movl 4(%esp),%edx
+ movl 8(%esp),%ecx
+ xorl %eax,%eax
+ cmpl $7,%ecx
+ jae L018lot
+ cmpl $0,%ecx
+ je L019ret
+L020little:
+ movb %al,(%edx)
+ subl $1,%ecx
+ leal 1(%edx),%edx
+ jnz L020little
+L019ret:
+ ret
+.align 4,0x90
+L018lot:
+ testl $3,%edx
+ jz L021aligned
+ movb %al,(%edx)
+ leal -1(%ecx),%ecx
+ leal 1(%edx),%edx
+ jmp L018lot
+L021aligned:
+ movl %eax,(%edx)
+ leal -4(%ecx),%ecx
+ testl $-4,%ecx
+ leal 4(%edx),%edx
+ jnz L021aligned
+ cmpl $0,%ecx
+ jne L020little
+ ret
+.globl _OPENSSL_instrument_bus
+.align 4
+_OPENSSL_instrument_bus:
+L_OPENSSL_instrument_bus_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl $0,%eax
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.globl _OPENSSL_instrument_bus2
+.align 4
+_OPENSSL_instrument_bus2:
+L_OPENSSL_instrument_bus2_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl $0,%eax
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.globl _OPENSSL_ia32_rdrand
+.align 4
+_OPENSSL_ia32_rdrand:
+L_OPENSSL_ia32_rdrand_begin:
+ movl $8,%ecx
+L022loop:
+.byte 15,199,240
+ jc L023break
+ loop L022loop
+L023break:
+ cmpl $0,%eax
+ cmovel %ecx,%eax
+ ret
+.section __IMPORT,__pointers,non_lazy_symbol_pointers
+L__gnutls_x86_cpuid_s$non_lazy_ptr:
+.indirect_symbol __gnutls_x86_cpuid_s
+.long 0
+.comm __gnutls_x86_cpuid_s,16,2
+.mod_init_func
+.align 2
+.long _OPENSSL_cpuid_setup
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/macosx/openssl-cpuid-x86_64.s b/lib/accelerated/x86/macosx/openssl-cpuid-x86_64.s
new file mode 100644
index 0000000000..8e12261242
--- /dev/null
+++ b/lib/accelerated/x86/macosx/openssl-cpuid-x86_64.s
@@ -0,0 +1,365 @@
+# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain copyright notices,
+# this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+# * Neither the name of the Andy Polyakov nor the names of its
+# copyright holder and contributors may be used to endorse or
+# promote products derived from this software without specific
+# prior written permission.
+#
+# ALTERNATIVELY, provided that this notice is retained in full, this
+# product may be distributed under the terms of the GNU General Public
+# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+# those given above.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *** This file is auto-generated ***
+#
+
+.private_extern _OPENSSL_cpuid_setup
+.mod_init_func
+ .p2align 3
+ .quad _OPENSSL_cpuid_setup
+
+.private_extern __gnutls_x86_cpuid_s
+.comm __gnutls_x86_cpuid_s,16,2
+
+.text
+
+.globl _OPENSSL_atomic_add
+
+.p2align 4
+_OPENSSL_atomic_add:
+ movl (%rdi),%eax
+L$spin: leaq (%rsi,%rax,1),%r8
+.byte 0xf0
+ cmpxchgl %r8d,(%rdi)
+ jne L$spin
+ movl %r8d,%eax
+.byte 0x48,0x98
+ .byte 0xf3,0xc3
+
+
+.globl _OPENSSL_rdtsc
+
+.p2align 4
+_OPENSSL_rdtsc:
+ rdtsc
+ shlq $32,%rdx
+ orq %rdx,%rax
+ .byte 0xf3,0xc3
+
+
+.globl _OPENSSL_ia32_cpuid
+
+.p2align 4
+_OPENSSL_ia32_cpuid:
+ movq %rbx,%r8
+
+ xorl %eax,%eax
+ movl %eax,8(%rdi)
+ cpuid
+ movl %eax,%r11d
+
+ xorl %eax,%eax
+ cmpl $1970169159,%ebx
+ setne %al
+ movl %eax,%r9d
+ cmpl $1231384169,%edx
+ setne %al
+ orl %eax,%r9d
+ cmpl $1818588270,%ecx
+ setne %al
+ orl %eax,%r9d
+ jz L$intel
+
+ cmpl $1752462657,%ebx
+ setne %al
+ movl %eax,%r10d
+ cmpl $1769238117,%edx
+ setne %al
+ orl %eax,%r10d
+ cmpl $1145913699,%ecx
+ setne %al
+ orl %eax,%r10d
+ jnz L$intel
+
+
+ movl $2147483648,%eax
+ cpuid
+ cmpl $2147483649,%eax
+ jb L$intel
+ movl %eax,%r10d
+ movl $2147483649,%eax
+ cpuid
+ orl %ecx,%r9d
+ andl $2049,%r9d
+
+ cmpl $2147483656,%r10d
+ jb L$intel
+
+ movl $2147483656,%eax
+ cpuid
+ movzbq %cl,%r10
+ incq %r10
+
+ movl $1,%eax
+ cpuid
+ btl $28,%edx
+ jnc L$generic
+ shrl $16,%ebx
+ cmpb %r10b,%bl
+ ja L$generic
+ andl $4026531839,%edx
+ jmp L$generic
+
+L$intel:
+ cmpl $4,%r11d
+ movl $-1,%r10d
+ jb L$nocacheinfo
+
+ movl $4,%eax
+ movl $0,%ecx
+ cpuid
+ movl %eax,%r10d
+ shrl $14,%r10d
+ andl $4095,%r10d
+
+ cmpl $7,%r11d
+ jb L$nocacheinfo
+
+ movl $7,%eax
+ xorl %ecx,%ecx
+ cpuid
+ movl %ebx,8(%rdi)
+
+L$nocacheinfo:
+ movl $1,%eax
+ cpuid
+ andl $3220176895,%edx
+ cmpl $0,%r9d
+ jne L$notintel
+ orl $1073741824,%edx
+ andb $15,%ah
+ cmpb $15,%ah
+ jne L$notintel
+ orl $1048576,%edx
+L$notintel:
+ btl $28,%edx
+ jnc L$generic
+ andl $4026531839,%edx
+ cmpl $0,%r10d
+ je L$generic
+
+ orl $268435456,%edx
+ shrl $16,%ebx
+ cmpb $1,%bl
+ ja L$generic
+ andl $4026531839,%edx
+L$generic:
+ andl $2048,%r9d
+ andl $4294965247,%ecx
+ orl %ecx,%r9d
+
+ movl %edx,%r10d
+ btl $27,%r9d
+ jnc L$clear_avx
+ xorl %ecx,%ecx
+.byte 0x0f,0x01,0xd0
+ andl $6,%eax
+ cmpl $6,%eax
+ je L$done
+L$clear_avx:
+ movl $4026525695,%eax
+ andl %eax,%r9d
+ andl $4294967263,8(%rdi)
+L$done:
+ shlq $32,%r9
+ movl %r10d,%eax
+ movq %r8,%rbx
+ orq %r9,%rax
+ .byte 0xf3,0xc3
+
+
+.globl _OPENSSL_cleanse
+
+.p2align 4
+_OPENSSL_cleanse:
+ xorq %rax,%rax
+ cmpq $15,%rsi
+ jae L$ot
+ cmpq $0,%rsi
+ je L$ret
+L$ittle:
+ movb %al,(%rdi)
+ subq $1,%rsi
+ leaq 1(%rdi),%rdi
+ jnz L$ittle
+L$ret:
+ .byte 0xf3,0xc3
+.p2align 4
+L$ot:
+ testq $7,%rdi
+ jz L$aligned
+ movb %al,(%rdi)
+ leaq -1(%rsi),%rsi
+ leaq 1(%rdi),%rdi
+ jmp L$ot
+L$aligned:
+ movq %rax,(%rdi)
+ leaq -8(%rsi),%rsi
+ testq $-8,%rsi
+ leaq 8(%rdi),%rdi
+ jnz L$aligned
+ cmpq $0,%rsi
+ jne L$ittle
+ .byte 0xf3,0xc3
+
+.globl _OPENSSL_wipe_cpu
+
+.p2align 4
+_OPENSSL_wipe_cpu:
+ pxor %xmm0,%xmm0
+ pxor %xmm1,%xmm1
+ pxor %xmm2,%xmm2
+ pxor %xmm3,%xmm3
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+ pxor %xmm6,%xmm6
+ pxor %xmm7,%xmm7
+ pxor %xmm8,%xmm8
+ pxor %xmm9,%xmm9
+ pxor %xmm10,%xmm10
+ pxor %xmm11,%xmm11
+ pxor %xmm12,%xmm12
+ pxor %xmm13,%xmm13
+ pxor %xmm14,%xmm14
+ pxor %xmm15,%xmm15
+ xorq %rcx,%rcx
+ xorq %rdx,%rdx
+ xorq %rsi,%rsi
+ xorq %rdi,%rdi
+ xorq %r8,%r8
+ xorq %r9,%r9
+ xorq %r10,%r10
+ xorq %r11,%r11
+ leaq 8(%rsp),%rax
+ .byte 0xf3,0xc3
+
+.globl _OPENSSL_instrument_bus
+
+.p2align 4
+_OPENSSL_instrument_bus:
+ movq %rdi,%r10
+ movq %rsi,%rcx
+ movq %rsi,%r11
+
+ rdtsc
+ movl %eax,%r8d
+ movl $0,%r9d
+ clflush (%r10)
+.byte 0xf0
+ addl %r9d,(%r10)
+ jmp L$oop
+.p2align 4
+L$oop: rdtsc
+ movl %eax,%edx
+ subl %r8d,%eax
+ movl %edx,%r8d
+ movl %eax,%r9d
+ clflush (%r10)
+.byte 0xf0
+ addl %eax,(%r10)
+ leaq 4(%r10),%r10
+ subq $1,%rcx
+ jnz L$oop
+
+ movq %r11,%rax
+ .byte 0xf3,0xc3
+
+
+.globl _OPENSSL_instrument_bus2
+
+.p2align 4
+_OPENSSL_instrument_bus2:
+ movq %rdi,%r10
+ movq %rsi,%rcx
+ movq %rdx,%r11
+ movq %rcx,8(%rsp)
+
+ rdtsc
+ movl %eax,%r8d
+ movl $0,%r9d
+
+ clflush (%r10)
+.byte 0xf0
+ addl %r9d,(%r10)
+
+ rdtsc
+ movl %eax,%edx
+ subl %r8d,%eax
+ movl %edx,%r8d
+ movl %eax,%r9d
+L$oop2:
+ clflush (%r10)
+.byte 0xf0
+ addl %eax,(%r10)
+
+ subq $1,%r11
+ jz L$done2
+
+ rdtsc
+ movl %eax,%edx
+ subl %r8d,%eax
+ movl %edx,%r8d
+ cmpl %r9d,%eax
+ movl %eax,%r9d
+ movl $0,%edx
+ setne %dl
+ subq %rdx,%rcx
+ leaq (%r10,%rdx,4),%r10
+ jnz L$oop2
+
+L$done2:
+ movq 8(%rsp),%rax
+ subq %rcx,%rax
+ .byte 0xf3,0xc3
+
+.globl _OPENSSL_ia32_rdrand
+
+.p2align 4
+_OPENSSL_ia32_rdrand:
+ movl $8,%ecx
+L$oop_rdrand:
+.byte 72,15,199,240
+ jc L$break_rdrand
+ loop L$oop_rdrand
+L$break_rdrand:
+ cmpq $0,%rax
+ cmoveq %rcx,%rax
+ .byte 0xf3,0xc3
+
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/macosx/sha1-ssse3-x86.s b/lib/accelerated/x86/macosx/sha1-ssse3-x86.s
new file mode 100644
index 0000000000..0e0c719af2
--- /dev/null
+++ b/lib/accelerated/x86/macosx/sha1-ssse3-x86.s
@@ -0,0 +1,1419 @@
+# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain copyright notices,
+# this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+# * Neither the name of the Andy Polyakov nor the names of its
+# copyright holder and contributors may be used to endorse or
+# promote products derived from this software without specific
+# prior written permission.
+#
+# ALTERNATIVELY, provided that this notice is retained in full, this
+# product may be distributed under the terms of the GNU General Public
+# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+# those given above.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *** This file is auto-generated ***
+#
+.file "sha1-586.s"
+.text
+.globl _sha1_block_data_order
+.align 4
+_sha1_block_data_order:
+L_sha1_block_data_order_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 20(%esp),%ebp
+ movl 24(%esp),%esi
+ movl 28(%esp),%eax
+ subl $76,%esp
+ shll $6,%eax
+ addl %esi,%eax
+ movl %eax,104(%esp)
+ movl 16(%ebp),%edi
+ jmp L000loop
+.align 4,0x90
+L000loop:
+ movl (%esi),%eax
+ movl 4(%esi),%ebx
+ movl 8(%esi),%ecx
+ movl 12(%esi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ movl %eax,(%esp)
+ movl %ebx,4(%esp)
+ movl %ecx,8(%esp)
+ movl %edx,12(%esp)
+ movl 16(%esi),%eax
+ movl 20(%esi),%ebx
+ movl 24(%esi),%ecx
+ movl 28(%esi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ movl %eax,16(%esp)
+ movl %ebx,20(%esp)
+ movl %ecx,24(%esp)
+ movl %edx,28(%esp)
+ movl 32(%esi),%eax
+ movl 36(%esi),%ebx
+ movl 40(%esi),%ecx
+ movl 44(%esi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ movl %eax,32(%esp)
+ movl %ebx,36(%esp)
+ movl %ecx,40(%esp)
+ movl %edx,44(%esp)
+ movl 48(%esi),%eax
+ movl 52(%esi),%ebx
+ movl 56(%esi),%ecx
+ movl 60(%esi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ movl %eax,48(%esp)
+ movl %ebx,52(%esp)
+ movl %ecx,56(%esp)
+ movl %edx,60(%esp)
+ movl %esi,100(%esp)
+ movl (%ebp),%eax
+ movl 4(%ebp),%ebx
+ movl 8(%ebp),%ecx
+ movl 12(%ebp),%edx
+ # 00_15 0
+ movl %ecx,%esi
+ movl %eax,%ebp
+ roll $5,%ebp
+ xorl %edx,%esi
+ addl %edi,%ebp
+ movl (%esp),%edi
+ andl %ebx,%esi
+ rorl $2,%ebx
+ xorl %edx,%esi
+ leal 1518500249(%ebp,%edi,1),%ebp
+ addl %esi,%ebp
+ # 00_15 1
+ movl %ebx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ecx,%edi
+ addl %edx,%ebp
+ movl 4(%esp),%edx
+ andl %eax,%edi
+ rorl $2,%eax
+ xorl %ecx,%edi
+ leal 1518500249(%ebp,%edx,1),%ebp
+ addl %edi,%ebp
+ # 00_15 2
+ movl %eax,%edx
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%edx
+ addl %ecx,%ebp
+ movl 8(%esp),%ecx
+ andl %esi,%edx
+ rorl $2,%esi
+ xorl %ebx,%edx
+ leal 1518500249(%ebp,%ecx,1),%ebp
+ addl %edx,%ebp
+ # 00_15 3
+ movl %esi,%ecx
+ movl %ebp,%edx
+ roll $5,%ebp
+ xorl %eax,%ecx
+ addl %ebx,%ebp
+ movl 12(%esp),%ebx
+ andl %edi,%ecx
+ rorl $2,%edi
+ xorl %eax,%ecx
+ leal 1518500249(%ebp,%ebx,1),%ebp
+ addl %ecx,%ebp
+ # 00_15 4
+ movl %edi,%ebx
+ movl %ebp,%ecx
+ roll $5,%ebp
+ xorl %esi,%ebx
+ addl %eax,%ebp
+ movl 16(%esp),%eax
+ andl %edx,%ebx
+ rorl $2,%edx
+ xorl %esi,%ebx
+ leal 1518500249(%ebp,%eax,1),%ebp
+ addl %ebx,%ebp
+ # 00_15 5
+ movl %edx,%eax
+ movl %ebp,%ebx
+ roll $5,%ebp
+ xorl %edi,%eax
+ addl %esi,%ebp
+ movl 20(%esp),%esi
+ andl %ecx,%eax
+ rorl $2,%ecx
+ xorl %edi,%eax
+ leal 1518500249(%ebp,%esi,1),%ebp
+ addl %eax,%ebp
+ # 00_15 6
+ movl %ecx,%esi
+ movl %ebp,%eax
+ roll $5,%ebp
+ xorl %edx,%esi
+ addl %edi,%ebp
+ movl 24(%esp),%edi
+ andl %ebx,%esi
+ rorl $2,%ebx
+ xorl %edx,%esi
+ leal 1518500249(%ebp,%edi,1),%ebp
+ addl %esi,%ebp
+ # 00_15 7
+ movl %ebx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ecx,%edi
+ addl %edx,%ebp
+ movl 28(%esp),%edx
+ andl %eax,%edi
+ rorl $2,%eax
+ xorl %ecx,%edi
+ leal 1518500249(%ebp,%edx,1),%ebp
+ addl %edi,%ebp
+ # 00_15 8
+ movl %eax,%edx
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%edx
+ addl %ecx,%ebp
+ movl 32(%esp),%ecx
+ andl %esi,%edx
+ rorl $2,%esi
+ xorl %ebx,%edx
+ leal 1518500249(%ebp,%ecx,1),%ebp
+ addl %edx,%ebp
+ # 00_15 9
+ movl %esi,%ecx
+ movl %ebp,%edx
+ roll $5,%ebp
+ xorl %eax,%ecx
+ addl %ebx,%ebp
+ movl 36(%esp),%ebx
+ andl %edi,%ecx
+ rorl $2,%edi
+ xorl %eax,%ecx
+ leal 1518500249(%ebp,%ebx,1),%ebp
+ addl %ecx,%ebp
+ # 00_15 10
+ movl %edi,%ebx
+ movl %ebp,%ecx
+ roll $5,%ebp
+ xorl %esi,%ebx
+ addl %eax,%ebp
+ movl 40(%esp),%eax
+ andl %edx,%ebx
+ rorl $2,%edx
+ xorl %esi,%ebx
+ leal 1518500249(%ebp,%eax,1),%ebp
+ addl %ebx,%ebp
+ # 00_15 11
+ movl %edx,%eax
+ movl %ebp,%ebx
+ roll $5,%ebp
+ xorl %edi,%eax
+ addl %esi,%ebp
+ movl 44(%esp),%esi
+ andl %ecx,%eax
+ rorl $2,%ecx
+ xorl %edi,%eax
+ leal 1518500249(%ebp,%esi,1),%ebp
+ addl %eax,%ebp
+ # 00_15 12
+ movl %ecx,%esi
+ movl %ebp,%eax
+ roll $5,%ebp
+ xorl %edx,%esi
+ addl %edi,%ebp
+ movl 48(%esp),%edi
+ andl %ebx,%esi
+ rorl $2,%ebx
+ xorl %edx,%esi
+ leal 1518500249(%ebp,%edi,1),%ebp
+ addl %esi,%ebp
+ # 00_15 13
+ movl %ebx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ xorl %ecx,%edi
+ addl %edx,%ebp
+ movl 52(%esp),%edx
+ andl %eax,%edi
+ rorl $2,%eax
+ xorl %ecx,%edi
+ leal 1518500249(%ebp,%edx,1),%ebp
+ addl %edi,%ebp
+ # 00_15 14
+ movl %eax,%edx
+ movl %ebp,%edi
+ roll $5,%ebp
+ xorl %ebx,%edx
+ addl %ecx,%ebp
+ movl 56(%esp),%ecx
+ andl %esi,%edx
+ rorl $2,%esi
+ xorl %ebx,%edx
+ leal 1518500249(%ebp,%ecx,1),%ebp
+ addl %edx,%ebp
+ # 00_15 15
+ movl %esi,%ecx
+ movl %ebp,%edx
+ roll $5,%ebp
+ xorl %eax,%ecx
+ addl %ebx,%ebp
+ movl 60(%esp),%ebx
+ andl %edi,%ecx
+ rorl $2,%edi
+ xorl %eax,%ecx
+ leal 1518500249(%ebp,%ebx,1),%ebp
+ movl (%esp),%ebx
+ addl %ebp,%ecx
+ # 16_19 16
+ movl %edi,%ebp
+ xorl 8(%esp),%ebx
+ xorl %esi,%ebp
+ xorl 32(%esp),%ebx
+ andl %edx,%ebp
+ xorl 52(%esp),%ebx
+ roll $1,%ebx
+ xorl %esi,%ebp
+ addl %ebp,%eax
+ movl %ecx,%ebp
+ rorl $2,%edx
+ movl %ebx,(%esp)
+ roll $5,%ebp
+ leal 1518500249(%ebx,%eax,1),%ebx
+ movl 4(%esp),%eax
+ addl %ebp,%ebx
+ # 16_19 17
+ movl %edx,%ebp
+ xorl 12(%esp),%eax
+ xorl %edi,%ebp
+ xorl 36(%esp),%eax
+ andl %ecx,%ebp
+ xorl 56(%esp),%eax
+ roll $1,%eax
+ xorl %edi,%ebp
+ addl %ebp,%esi
+ movl %ebx,%ebp
+ rorl $2,%ecx
+ movl %eax,4(%esp)
+ roll $5,%ebp
+ leal 1518500249(%eax,%esi,1),%eax
+ movl 8(%esp),%esi
+ addl %ebp,%eax
+ # 16_19 18
+ movl %ecx,%ebp
+ xorl 16(%esp),%esi
+ xorl %edx,%ebp
+ xorl 40(%esp),%esi
+ andl %ebx,%ebp
+ xorl 60(%esp),%esi
+ roll $1,%esi
+ xorl %edx,%ebp
+ addl %ebp,%edi
+ movl %eax,%ebp
+ rorl $2,%ebx
+ movl %esi,8(%esp)
+ roll $5,%ebp
+ leal 1518500249(%esi,%edi,1),%esi
+ movl 12(%esp),%edi
+ addl %ebp,%esi
+ # 16_19 19
+ movl %ebx,%ebp
+ xorl 20(%esp),%edi
+ xorl %ecx,%ebp
+ xorl 44(%esp),%edi
+ andl %eax,%ebp
+ xorl (%esp),%edi
+ roll $1,%edi
+ xorl %ecx,%ebp
+ addl %ebp,%edx
+ movl %esi,%ebp
+ rorl $2,%eax
+ movl %edi,12(%esp)
+ roll $5,%ebp
+ leal 1518500249(%edi,%edx,1),%edi
+ movl 16(%esp),%edx
+ addl %ebp,%edi
+ # 20_39 20
+ movl %esi,%ebp
+ xorl 24(%esp),%edx
+ xorl %eax,%ebp
+ xorl 48(%esp),%edx
+ xorl %ebx,%ebp
+ xorl 4(%esp),%edx
+ roll $1,%edx
+ addl %ebp,%ecx
+ rorl $2,%esi
+ movl %edi,%ebp
+ roll $5,%ebp
+ movl %edx,16(%esp)
+ leal 1859775393(%edx,%ecx,1),%edx
+ movl 20(%esp),%ecx
+ addl %ebp,%edx
+ # 20_39 21
+ movl %edi,%ebp
+ xorl 28(%esp),%ecx
+ xorl %esi,%ebp
+ xorl 52(%esp),%ecx
+ xorl %eax,%ebp
+ xorl 8(%esp),%ecx
+ roll $1,%ecx
+ addl %ebp,%ebx
+ rorl $2,%edi
+ movl %edx,%ebp
+ roll $5,%ebp
+ movl %ecx,20(%esp)
+ leal 1859775393(%ecx,%ebx,1),%ecx
+ movl 24(%esp),%ebx
+ addl %ebp,%ecx
+ # 20_39 22
+ movl %edx,%ebp
+ xorl 32(%esp),%ebx
+ xorl %edi,%ebp
+ xorl 56(%esp),%ebx
+ xorl %esi,%ebp
+ xorl 12(%esp),%ebx
+ roll $1,%ebx
+ addl %ebp,%eax
+ rorl $2,%edx
+ movl %ecx,%ebp
+ roll $5,%ebp
+ movl %ebx,24(%esp)
+ leal 1859775393(%ebx,%eax,1),%ebx
+ movl 28(%esp),%eax
+ addl %ebp,%ebx
+ # 20_39 23
+ movl %ecx,%ebp
+ xorl 36(%esp),%eax
+ xorl %edx,%ebp
+ xorl 60(%esp),%eax
+ xorl %edi,%ebp
+ xorl 16(%esp),%eax
+ roll $1,%eax
+ addl %ebp,%esi
+ rorl $2,%ecx
+ movl %ebx,%ebp
+ roll $5,%ebp
+ movl %eax,28(%esp)
+ leal 1859775393(%eax,%esi,1),%eax
+ movl 32(%esp),%esi
+ addl %ebp,%eax
+ # 20_39 24
+ movl %ebx,%ebp
+ xorl 40(%esp),%esi
+ xorl %ecx,%ebp
+ xorl (%esp),%esi
+ xorl %edx,%ebp
+ xorl 20(%esp),%esi
+ roll $1,%esi
+ addl %ebp,%edi
+ rorl $2,%ebx
+ movl %eax,%ebp
+ roll $5,%ebp
+ movl %esi,32(%esp)
+ leal 1859775393(%esi,%edi,1),%esi
+ movl 36(%esp),%edi
+ addl %ebp,%esi
+ # 20_39 25
+ movl %eax,%ebp
+ xorl 44(%esp),%edi
+ xorl %ebx,%ebp
+ xorl 4(%esp),%edi
+ xorl %ecx,%ebp
+ xorl 24(%esp),%edi
+ roll $1,%edi
+ addl %ebp,%edx
+ rorl $2,%eax
+ movl %esi,%ebp
+ roll $5,%ebp
+ movl %edi,36(%esp)
+ leal 1859775393(%edi,%edx,1),%edi
+ movl 40(%esp),%edx
+ addl %ebp,%edi
+ # 20_39 26
+ movl %esi,%ebp
+ xorl 48(%esp),%edx
+ xorl %eax,%ebp
+ xorl 8(%esp),%edx
+ xorl %ebx,%ebp
+ xorl 28(%esp),%edx
+ roll $1,%edx
+ addl %ebp,%ecx
+ rorl $2,%esi
+ movl %edi,%ebp
+ roll $5,%ebp
+ movl %edx,40(%esp)
+ leal 1859775393(%edx,%ecx,1),%edx
+ movl 44(%esp),%ecx
+ addl %ebp,%edx
+ # 20_39 27
+ movl %edi,%ebp
+ xorl 52(%esp),%ecx
+ xorl %esi,%ebp
+ xorl 12(%esp),%ecx
+ xorl %eax,%ebp
+ xorl 32(%esp),%ecx
+ roll $1,%ecx
+ addl %ebp,%ebx
+ rorl $2,%edi
+ movl %edx,%ebp
+ roll $5,%ebp
+ movl %ecx,44(%esp)
+ leal 1859775393(%ecx,%ebx,1),%ecx
+ movl 48(%esp),%ebx
+ addl %ebp,%ecx
+ # 20_39 28
+ movl %edx,%ebp
+ xorl 56(%esp),%ebx
+ xorl %edi,%ebp
+ xorl 16(%esp),%ebx
+ xorl %esi,%ebp
+ xorl 36(%esp),%ebx
+ roll $1,%ebx
+ addl %ebp,%eax
+ rorl $2,%edx
+ movl %ecx,%ebp
+ roll $5,%ebp
+ movl %ebx,48(%esp)
+ leal 1859775393(%ebx,%eax,1),%ebx
+ movl 52(%esp),%eax
+ addl %ebp,%ebx
+ # 20_39 29
+ movl %ecx,%ebp
+ xorl 60(%esp),%eax
+ xorl %edx,%ebp
+ xorl 20(%esp),%eax
+ xorl %edi,%ebp
+ xorl 40(%esp),%eax
+ roll $1,%eax
+ addl %ebp,%esi
+ rorl $2,%ecx
+ movl %ebx,%ebp
+ roll $5,%ebp
+ movl %eax,52(%esp)
+ leal 1859775393(%eax,%esi,1),%eax
+ movl 56(%esp),%esi
+ addl %ebp,%eax
+ # 20_39 30
+ movl %ebx,%ebp
+ xorl (%esp),%esi
+ xorl %ecx,%ebp
+ xorl 24(%esp),%esi
+ xorl %edx,%ebp
+ xorl 44(%esp),%esi
+ roll $1,%esi
+ addl %ebp,%edi
+ rorl $2,%ebx
+ movl %eax,%ebp
+ roll $5,%ebp
+ movl %esi,56(%esp)
+ leal 1859775393(%esi,%edi,1),%esi
+ movl 60(%esp),%edi
+ addl %ebp,%esi
+ # 20_39 31
+ movl %eax,%ebp
+ xorl 4(%esp),%edi
+ xorl %ebx,%ebp
+ xorl 28(%esp),%edi
+ xorl %ecx,%ebp
+ xorl 48(%esp),%edi
+ roll $1,%edi
+ addl %ebp,%edx
+ rorl $2,%eax
+ movl %esi,%ebp
+ roll $5,%ebp
+ movl %edi,60(%esp)
+ leal 1859775393(%edi,%edx,1),%edi
+ movl (%esp),%edx
+ addl %ebp,%edi
+ # 20_39 32
+ movl %esi,%ebp
+ xorl 8(%esp),%edx
+ xorl %eax,%ebp
+ xorl 32(%esp),%edx
+ xorl %ebx,%ebp
+ xorl 52(%esp),%edx
+ roll $1,%edx
+ addl %ebp,%ecx
+ rorl $2,%esi
+ movl %edi,%ebp
+ roll $5,%ebp
+ movl %edx,(%esp)
+ leal 1859775393(%edx,%ecx,1),%edx
+ movl 4(%esp),%ecx
+ addl %ebp,%edx
+ # 20_39 33
+ movl %edi,%ebp
+ xorl 12(%esp),%ecx
+ xorl %esi,%ebp
+ xorl 36(%esp),%ecx
+ xorl %eax,%ebp
+ xorl 56(%esp),%ecx
+ roll $1,%ecx
+ addl %ebp,%ebx
+ rorl $2,%edi
+ movl %edx,%ebp
+ roll $5,%ebp
+ movl %ecx,4(%esp)
+ leal 1859775393(%ecx,%ebx,1),%ecx
+ movl 8(%esp),%ebx
+ addl %ebp,%ecx
+ # 20_39 34
+ movl %edx,%ebp
+ xorl 16(%esp),%ebx
+ xorl %edi,%ebp
+ xorl 40(%esp),%ebx
+ xorl %esi,%ebp
+ xorl 60(%esp),%ebx
+ roll $1,%ebx
+ addl %ebp,%eax
+ rorl $2,%edx
+ movl %ecx,%ebp
+ roll $5,%ebp
+ movl %ebx,8(%esp)
+ leal 1859775393(%ebx,%eax,1),%ebx
+ movl 12(%esp),%eax
+ addl %ebp,%ebx
+ # 20_39 35
+ movl %ecx,%ebp
+ xorl 20(%esp),%eax
+ xorl %edx,%ebp
+ xorl 44(%esp),%eax
+ xorl %edi,%ebp
+ xorl (%esp),%eax
+ roll $1,%eax
+ addl %ebp,%esi
+ rorl $2,%ecx
+ movl %ebx,%ebp
+ roll $5,%ebp
+ movl %eax,12(%esp)
+ leal 1859775393(%eax,%esi,1),%eax
+ movl 16(%esp),%esi
+ addl %ebp,%eax
+ # 20_39 36
+ movl %ebx,%ebp
+ xorl 24(%esp),%esi
+ xorl %ecx,%ebp
+ xorl 48(%esp),%esi
+ xorl %edx,%ebp
+ xorl 4(%esp),%esi
+ roll $1,%esi
+ addl %ebp,%edi
+ rorl $2,%ebx
+ movl %eax,%ebp
+ roll $5,%ebp
+ movl %esi,16(%esp)
+ leal 1859775393(%esi,%edi,1),%esi
+ movl 20(%esp),%edi
+ addl %ebp,%esi
+ # 20_39 37
+ movl %eax,%ebp
+ xorl 28(%esp),%edi
+ xorl %ebx,%ebp
+ xorl 52(%esp),%edi
+ xorl %ecx,%ebp
+ xorl 8(%esp),%edi
+ roll $1,%edi
+ addl %ebp,%edx
+ rorl $2,%eax
+ movl %esi,%ebp
+ roll $5,%ebp
+ movl %edi,20(%esp)
+ leal 1859775393(%edi,%edx,1),%edi
+ movl 24(%esp),%edx
+ addl %ebp,%edi
+ # 20_39 38
+ movl %esi,%ebp
+ xorl 32(%esp),%edx
+ xorl %eax,%ebp
+ xorl 56(%esp),%edx
+ xorl %ebx,%ebp
+ xorl 12(%esp),%edx
+ roll $1,%edx
+ addl %ebp,%ecx
+ rorl $2,%esi
+ movl %edi,%ebp
+ roll $5,%ebp
+ movl %edx,24(%esp)
+ leal 1859775393(%edx,%ecx,1),%edx
+ movl 28(%esp),%ecx
+ addl %ebp,%edx
+ # 20_39 39
+ movl %edi,%ebp
+ xorl 36(%esp),%ecx
+ xorl %esi,%ebp
+ xorl 60(%esp),%ecx
+ xorl %eax,%ebp
+ xorl 16(%esp),%ecx
+ roll $1,%ecx
+ addl %ebp,%ebx
+ rorl $2,%edi
+ movl %edx,%ebp
+ roll $5,%ebp
+ movl %ecx,28(%esp)
+ leal 1859775393(%ecx,%ebx,1),%ecx
+ movl 32(%esp),%ebx
+ addl %ebp,%ecx
+ # 40_59 40
+ movl %edi,%ebp
+ xorl 40(%esp),%ebx
+ xorl %esi,%ebp
+ xorl (%esp),%ebx
+ andl %edx,%ebp
+ xorl 20(%esp),%ebx
+ roll $1,%ebx
+ addl %eax,%ebp
+ rorl $2,%edx
+ movl %ecx,%eax
+ roll $5,%eax
+ movl %ebx,32(%esp)
+ leal 2400959708(%ebx,%ebp,1),%ebx
+ movl %edi,%ebp
+ addl %eax,%ebx
+ andl %esi,%ebp
+ movl 36(%esp),%eax
+ addl %ebp,%ebx
+ # 40_59 41
+ movl %edx,%ebp
+ xorl 44(%esp),%eax
+ xorl %edi,%ebp
+ xorl 4(%esp),%eax
+ andl %ecx,%ebp
+ xorl 24(%esp),%eax
+ roll $1,%eax
+ addl %esi,%ebp
+ rorl $2,%ecx
+ movl %ebx,%esi
+ roll $5,%esi
+ movl %eax,36(%esp)
+ leal 2400959708(%eax,%ebp,1),%eax
+ movl %edx,%ebp
+ addl %esi,%eax
+ andl %edi,%ebp
+ movl 40(%esp),%esi
+ addl %ebp,%eax
+ # 40_59 42
+ movl %ecx,%ebp
+ xorl 48(%esp),%esi
+ xorl %edx,%ebp
+ xorl 8(%esp),%esi
+ andl %ebx,%ebp
+ xorl 28(%esp),%esi
+ roll $1,%esi
+ addl %edi,%ebp
+ rorl $2,%ebx
+ movl %eax,%edi
+ roll $5,%edi
+ movl %esi,40(%esp)
+ leal 2400959708(%esi,%ebp,1),%esi
+ movl %ecx,%ebp
+ addl %edi,%esi
+ andl %edx,%ebp
+ movl 44(%esp),%edi
+ addl %ebp,%esi
+ # 40_59 43
+ movl %ebx,%ebp
+ xorl 52(%esp),%edi
+ xorl %ecx,%ebp
+ xorl 12(%esp),%edi
+ andl %eax,%ebp
+ xorl 32(%esp),%edi
+ roll $1,%edi
+ addl %edx,%ebp
+ rorl $2,%eax
+ movl %esi,%edx
+ roll $5,%edx
+ movl %edi,44(%esp)
+ leal 2400959708(%edi,%ebp,1),%edi
+ movl %ebx,%ebp
+ addl %edx,%edi
+ andl %ecx,%ebp
+ movl 48(%esp),%edx
+ addl %ebp,%edi
+ # 40_59 44
+ movl %eax,%ebp
+ xorl 56(%esp),%edx
+ xorl %ebx,%ebp
+ xorl 16(%esp),%edx
+ andl %esi,%ebp
+ xorl 36(%esp),%edx
+ roll $1,%edx
+ addl %ecx,%ebp
+ rorl $2,%esi
+ movl %edi,%ecx
+ roll $5,%ecx
+ movl %edx,48(%esp)
+ leal 2400959708(%edx,%ebp,1),%edx
+ movl %eax,%ebp
+ addl %ecx,%edx
+ andl %ebx,%ebp
+ movl 52(%esp),%ecx
+ addl %ebp,%edx
+ # 40_59 45
+ movl %esi,%ebp
+ xorl 60(%esp),%ecx
+ xorl %eax,%ebp
+ xorl 20(%esp),%ecx
+ andl %edi,%ebp
+ xorl 40(%esp),%ecx
+ roll $1,%ecx
+ addl %ebx,%ebp
+ rorl $2,%edi
+ movl %edx,%ebx
+ roll $5,%ebx
+ movl %ecx,52(%esp)
+ leal 2400959708(%ecx,%ebp,1),%ecx
+ movl %esi,%ebp
+ addl %ebx,%ecx
+ andl %eax,%ebp
+ movl 56(%esp),%ebx
+ addl %ebp,%ecx
+ # 40_59 46
+ movl %edi,%ebp
+ xorl (%esp),%ebx
+ xorl %esi,%ebp
+ xorl 24(%esp),%ebx
+ andl %edx,%ebp
+ xorl 44(%esp),%ebx
+ roll $1,%ebx
+ addl %eax,%ebp
+ rorl $2,%edx
+ movl %ecx,%eax
+ roll $5,%eax
+ movl %ebx,56(%esp)
+ leal 2400959708(%ebx,%ebp,1),%ebx
+ movl %edi,%ebp
+ addl %eax,%ebx
+ andl %esi,%ebp
+ movl 60(%esp),%eax
+ addl %ebp,%ebx
+ # 40_59 47
+ movl %edx,%ebp
+ xorl 4(%esp),%eax
+ xorl %edi,%ebp
+ xorl 28(%esp),%eax
+ andl %ecx,%ebp
+ xorl 48(%esp),%eax
+ roll $1,%eax
+ addl %esi,%ebp
+ rorl $2,%ecx
+ movl %ebx,%esi
+ roll $5,%esi
+ movl %eax,60(%esp)
+ leal 2400959708(%eax,%ebp,1),%eax
+ movl %edx,%ebp
+ addl %esi,%eax
+ andl %edi,%ebp
+ movl (%esp),%esi
+ addl %ebp,%eax
+ # 40_59 48
+ movl %ecx,%ebp
+ xorl 8(%esp),%esi
+ xorl %edx,%ebp
+ xorl 32(%esp),%esi
+ andl %ebx,%ebp
+ xorl 52(%esp),%esi
+ roll $1,%esi
+ addl %edi,%ebp
+ rorl $2,%ebx
+ movl %eax,%edi
+ roll $5,%edi
+ movl %esi,(%esp)
+ leal 2400959708(%esi,%ebp,1),%esi
+ movl %ecx,%ebp
+ addl %edi,%esi
+ andl %edx,%ebp
+ movl 4(%esp),%edi
+ addl %ebp,%esi
+ # 40_59 49
+ movl %ebx,%ebp
+ xorl 12(%esp),%edi
+ xorl %ecx,%ebp
+ xorl 36(%esp),%edi
+ andl %eax,%ebp
+ xorl 56(%esp),%edi
+ roll $1,%edi
+ addl %edx,%ebp
+ rorl $2,%eax
+ movl %esi,%edx
+ roll $5,%edx
+ movl %edi,4(%esp)
+ leal 2400959708(%edi,%ebp,1),%edi
+ movl %ebx,%ebp
+ addl %edx,%edi
+ andl %ecx,%ebp
+ movl 8(%esp),%edx
+ addl %ebp,%edi
+ # 40_59 50
+ movl %eax,%ebp
+ xorl 16(%esp),%edx
+ xorl %ebx,%ebp
+ xorl 40(%esp),%edx
+ andl %esi,%ebp
+ xorl 60(%esp),%edx
+ roll $1,%edx
+ addl %ecx,%ebp
+ rorl $2,%esi
+ movl %edi,%ecx
+ roll $5,%ecx
+ movl %edx,8(%esp)
+ leal 2400959708(%edx,%ebp,1),%edx
+ movl %eax,%ebp
+ addl %ecx,%edx
+ andl %ebx,%ebp
+ movl 12(%esp),%ecx
+ addl %ebp,%edx
+ # 40_59 51
+ movl %esi,%ebp
+ xorl 20(%esp),%ecx
+ xorl %eax,%ebp
+ xorl 44(%esp),%ecx
+ andl %edi,%ebp
+ xorl (%esp),%ecx
+ roll $1,%ecx
+ addl %ebx,%ebp
+ rorl $2,%edi
+ movl %edx,%ebx
+ roll $5,%ebx
+ movl %ecx,12(%esp)
+ leal 2400959708(%ecx,%ebp,1),%ecx
+ movl %esi,%ebp
+ addl %ebx,%ecx
+ andl %eax,%ebp
+ movl 16(%esp),%ebx
+ addl %ebp,%ecx
+ # 40_59 52
+ movl %edi,%ebp
+ xorl 24(%esp),%ebx
+ xorl %esi,%ebp
+ xorl 48(%esp),%ebx
+ andl %edx,%ebp
+ xorl 4(%esp),%ebx
+ roll $1,%ebx
+ addl %eax,%ebp
+ rorl $2,%edx
+ movl %ecx,%eax
+ roll $5,%eax
+ movl %ebx,16(%esp)
+ leal 2400959708(%ebx,%ebp,1),%ebx
+ movl %edi,%ebp
+ addl %eax,%ebx
+ andl %esi,%ebp
+ movl 20(%esp),%eax
+ addl %ebp,%ebx
+ # 40_59 53
+ movl %edx,%ebp
+ xorl 28(%esp),%eax
+ xorl %edi,%ebp
+ xorl 52(%esp),%eax
+ andl %ecx,%ebp
+ xorl 8(%esp),%eax
+ roll $1,%eax
+ addl %esi,%ebp
+ rorl $2,%ecx
+ movl %ebx,%esi
+ roll $5,%esi
+ movl %eax,20(%esp)
+ leal 2400959708(%eax,%ebp,1),%eax
+ movl %edx,%ebp
+ addl %esi,%eax
+ andl %edi,%ebp
+ movl 24(%esp),%esi
+ addl %ebp,%eax
+ # 40_59 54
+ movl %ecx,%ebp
+ xorl 32(%esp),%esi
+ xorl %edx,%ebp
+ xorl 56(%esp),%esi
+ andl %ebx,%ebp
+ xorl 12(%esp),%esi
+ roll $1,%esi
+ addl %edi,%ebp
+ rorl $2,%ebx
+ movl %eax,%edi
+ roll $5,%edi
+ movl %esi,24(%esp)
+ leal 2400959708(%esi,%ebp,1),%esi
+ movl %ecx,%ebp
+ addl %edi,%esi
+ andl %edx,%ebp
+ movl 28(%esp),%edi
+ addl %ebp,%esi
+ # 40_59 55
+ movl %ebx,%ebp
+ xorl 36(%esp),%edi
+ xorl %ecx,%ebp
+ xorl 60(%esp),%edi
+ andl %eax,%ebp
+ xorl 16(%esp),%edi
+ roll $1,%edi
+ addl %edx,%ebp
+ rorl $2,%eax
+ movl %esi,%edx
+ roll $5,%edx
+ movl %edi,28(%esp)
+ leal 2400959708(%edi,%ebp,1),%edi
+ movl %ebx,%ebp
+ addl %edx,%edi
+ andl %ecx,%ebp
+ movl 32(%esp),%edx
+ addl %ebp,%edi
+ # 40_59 56
+ movl %eax,%ebp
+ xorl 40(%esp),%edx
+ xorl %ebx,%ebp
+ xorl (%esp),%edx
+ andl %esi,%ebp
+ xorl 20(%esp),%edx
+ roll $1,%edx
+ addl %ecx,%ebp
+ rorl $2,%esi
+ movl %edi,%ecx
+ roll $5,%ecx
+ movl %edx,32(%esp)
+ leal 2400959708(%edx,%ebp,1),%edx
+ movl %eax,%ebp
+ addl %ecx,%edx
+ andl %ebx,%ebp
+ movl 36(%esp),%ecx
+ addl %ebp,%edx
+ # 40_59 57
+ movl %esi,%ebp
+ xorl 44(%esp),%ecx
+ xorl %eax,%ebp
+ xorl 4(%esp),%ecx
+ andl %edi,%ebp
+ xorl 24(%esp),%ecx
+ roll $1,%ecx
+ addl %ebx,%ebp
+ rorl $2,%edi
+ movl %edx,%ebx
+ roll $5,%ebx
+ movl %ecx,36(%esp)
+ leal 2400959708(%ecx,%ebp,1),%ecx
+ movl %esi,%ebp
+ addl %ebx,%ecx
+ andl %eax,%ebp
+ movl 40(%esp),%ebx
+ addl %ebp,%ecx
+ # 40_59 58
+ movl %edi,%ebp
+ xorl 48(%esp),%ebx
+ xorl %esi,%ebp
+ xorl 8(%esp),%ebx
+ andl %edx,%ebp
+ xorl 28(%esp),%ebx
+ roll $1,%ebx
+ addl %eax,%ebp
+ rorl $2,%edx
+ movl %ecx,%eax
+ roll $5,%eax
+ movl %ebx,40(%esp)
+ leal 2400959708(%ebx,%ebp,1),%ebx
+ movl %edi,%ebp
+ addl %eax,%ebx
+ andl %esi,%ebp
+ movl 44(%esp),%eax
+ addl %ebp,%ebx
+ # 40_59 59
+ movl %edx,%ebp
+ xorl 52(%esp),%eax
+ xorl %edi,%ebp
+ xorl 12(%esp),%eax
+ andl %ecx,%ebp
+ xorl 32(%esp),%eax
+ roll $1,%eax
+ addl %esi,%ebp
+ rorl $2,%ecx
+ movl %ebx,%esi
+ roll $5,%esi
+ movl %eax,44(%esp)
+ leal 2400959708(%eax,%ebp,1),%eax
+ movl %edx,%ebp
+ addl %esi,%eax
+ andl %edi,%ebp
+ movl 48(%esp),%esi
+ addl %ebp,%eax
+ # 20_39 60
+ movl %ebx,%ebp
+ xorl 56(%esp),%esi
+ xorl %ecx,%ebp
+ xorl 16(%esp),%esi
+ xorl %edx,%ebp
+ xorl 36(%esp),%esi
+ roll $1,%esi
+ addl %ebp,%edi
+ rorl $2,%ebx
+ movl %eax,%ebp
+ roll $5,%ebp
+ movl %esi,48(%esp)
+ leal 3395469782(%esi,%edi,1),%esi
+ movl 52(%esp),%edi
+ addl %ebp,%esi
+ # 20_39 61
+ movl %eax,%ebp
+ xorl 60(%esp),%edi
+ xorl %ebx,%ebp
+ xorl 20(%esp),%edi
+ xorl %ecx,%ebp
+ xorl 40(%esp),%edi
+ roll $1,%edi
+ addl %ebp,%edx
+ rorl $2,%eax
+ movl %esi,%ebp
+ roll $5,%ebp
+ movl %edi,52(%esp)
+ leal 3395469782(%edi,%edx,1),%edi
+ movl 56(%esp),%edx
+ addl %ebp,%edi
+ # 20_39 62
+ movl %esi,%ebp
+ xorl (%esp),%edx
+ xorl %eax,%ebp
+ xorl 24(%esp),%edx
+ xorl %ebx,%ebp
+ xorl 44(%esp),%edx
+ roll $1,%edx
+ addl %ebp,%ecx
+ rorl $2,%esi
+ movl %edi,%ebp
+ roll $5,%ebp
+ movl %edx,56(%esp)
+ leal 3395469782(%edx,%ecx,1),%edx
+ movl 60(%esp),%ecx
+ addl %ebp,%edx
+ # 20_39 63
+ movl %edi,%ebp
+ xorl 4(%esp),%ecx
+ xorl %esi,%ebp
+ xorl 28(%esp),%ecx
+ xorl %eax,%ebp
+ xorl 48(%esp),%ecx
+ roll $1,%ecx
+ addl %ebp,%ebx
+ rorl $2,%edi
+ movl %edx,%ebp
+ roll $5,%ebp
+ movl %ecx,60(%esp)
+ leal 3395469782(%ecx,%ebx,1),%ecx
+ movl (%esp),%ebx
+ addl %ebp,%ecx
+ # 20_39 64
+ movl %edx,%ebp
+ xorl 8(%esp),%ebx
+ xorl %edi,%ebp
+ xorl 32(%esp),%ebx
+ xorl %esi,%ebp
+ xorl 52(%esp),%ebx
+ roll $1,%ebx
+ addl %ebp,%eax
+ rorl $2,%edx
+ movl %ecx,%ebp
+ roll $5,%ebp
+ movl %ebx,(%esp)
+ leal 3395469782(%ebx,%eax,1),%ebx
+ movl 4(%esp),%eax
+ addl %ebp,%ebx
+ # 20_39 65
+ movl %ecx,%ebp
+ xorl 12(%esp),%eax
+ xorl %edx,%ebp
+ xorl 36(%esp),%eax
+ xorl %edi,%ebp
+ xorl 56(%esp),%eax
+ roll $1,%eax
+ addl %ebp,%esi
+ rorl $2,%ecx
+ movl %ebx,%ebp
+ roll $5,%ebp
+ movl %eax,4(%esp)
+ leal 3395469782(%eax,%esi,1),%eax
+ movl 8(%esp),%esi
+ addl %ebp,%eax
+ # 20_39 66
+ movl %ebx,%ebp
+ xorl 16(%esp),%esi
+ xorl %ecx,%ebp
+ xorl 40(%esp),%esi
+ xorl %edx,%ebp
+ xorl 60(%esp),%esi
+ roll $1,%esi
+ addl %ebp,%edi
+ rorl $2,%ebx
+ movl %eax,%ebp
+ roll $5,%ebp
+ movl %esi,8(%esp)
+ leal 3395469782(%esi,%edi,1),%esi
+ movl 12(%esp),%edi
+ addl %ebp,%esi
+ # 20_39 67
+ movl %eax,%ebp
+ xorl 20(%esp),%edi
+ xorl %ebx,%ebp
+ xorl 44(%esp),%edi
+ xorl %ecx,%ebp
+ xorl (%esp),%edi
+ roll $1,%edi
+ addl %ebp,%edx
+ rorl $2,%eax
+ movl %esi,%ebp
+ roll $5,%ebp
+ movl %edi,12(%esp)
+ leal 3395469782(%edi,%edx,1),%edi
+ movl 16(%esp),%edx
+ addl %ebp,%edi
+ # 20_39 68
+ movl %esi,%ebp
+ xorl 24(%esp),%edx
+ xorl %eax,%ebp
+ xorl 48(%esp),%edx
+ xorl %ebx,%ebp
+ xorl 4(%esp),%edx
+ roll $1,%edx
+ addl %ebp,%ecx
+ rorl $2,%esi
+ movl %edi,%ebp
+ roll $5,%ebp
+ movl %edx,16(%esp)
+ leal 3395469782(%edx,%ecx,1),%edx
+ movl 20(%esp),%ecx
+ addl %ebp,%edx
+ # 20_39 69
+ movl %edi,%ebp
+ xorl 28(%esp),%ecx
+ xorl %esi,%ebp
+ xorl 52(%esp),%ecx
+ xorl %eax,%ebp
+ xorl 8(%esp),%ecx
+ roll $1,%ecx
+ addl %ebp,%ebx
+ rorl $2,%edi
+ movl %edx,%ebp
+ roll $5,%ebp
+ movl %ecx,20(%esp)
+ leal 3395469782(%ecx,%ebx,1),%ecx
+ movl 24(%esp),%ebx
+ addl %ebp,%ecx
+ # 20_39 70
+ movl %edx,%ebp
+ xorl 32(%esp),%ebx
+ xorl %edi,%ebp
+ xorl 56(%esp),%ebx
+ xorl %esi,%ebp
+ xorl 12(%esp),%ebx
+ roll $1,%ebx
+ addl %ebp,%eax
+ rorl $2,%edx
+ movl %ecx,%ebp
+ roll $5,%ebp
+ movl %ebx,24(%esp)
+ leal 3395469782(%ebx,%eax,1),%ebx
+ movl 28(%esp),%eax
+ addl %ebp,%ebx
+ # 20_39 71
+ movl %ecx,%ebp
+ xorl 36(%esp),%eax
+ xorl %edx,%ebp
+ xorl 60(%esp),%eax
+ xorl %edi,%ebp
+ xorl 16(%esp),%eax
+ roll $1,%eax
+ addl %ebp,%esi
+ rorl $2,%ecx
+ movl %ebx,%ebp
+ roll $5,%ebp
+ movl %eax,28(%esp)
+ leal 3395469782(%eax,%esi,1),%eax
+ movl 32(%esp),%esi
+ addl %ebp,%eax
+ # 20_39 72
+ movl %ebx,%ebp
+ xorl 40(%esp),%esi
+ xorl %ecx,%ebp
+ xorl (%esp),%esi
+ xorl %edx,%ebp
+ xorl 20(%esp),%esi
+ roll $1,%esi
+ addl %ebp,%edi
+ rorl $2,%ebx
+ movl %eax,%ebp
+ roll $5,%ebp
+ movl %esi,32(%esp)
+ leal 3395469782(%esi,%edi,1),%esi
+ movl 36(%esp),%edi
+ addl %ebp,%esi
+ # 20_39 73
+ movl %eax,%ebp
+ xorl 44(%esp),%edi
+ xorl %ebx,%ebp
+ xorl 4(%esp),%edi
+ xorl %ecx,%ebp
+ xorl 24(%esp),%edi
+ roll $1,%edi
+ addl %ebp,%edx
+ rorl $2,%eax
+ movl %esi,%ebp
+ roll $5,%ebp
+ movl %edi,36(%esp)
+ leal 3395469782(%edi,%edx,1),%edi
+ movl 40(%esp),%edx
+ addl %ebp,%edi
+ # 20_39 74
+ movl %esi,%ebp
+ xorl 48(%esp),%edx
+ xorl %eax,%ebp
+ xorl 8(%esp),%edx
+ xorl %ebx,%ebp
+ xorl 28(%esp),%edx
+ roll $1,%edx
+ addl %ebp,%ecx
+ rorl $2,%esi
+ movl %edi,%ebp
+ roll $5,%ebp
+ movl %edx,40(%esp)
+ leal 3395469782(%edx,%ecx,1),%edx
+ movl 44(%esp),%ecx
+ addl %ebp,%edx
+ # 20_39 75
+ movl %edi,%ebp
+ xorl 52(%esp),%ecx
+ xorl %esi,%ebp
+ xorl 12(%esp),%ecx
+ xorl %eax,%ebp
+ xorl 32(%esp),%ecx
+ roll $1,%ecx
+ addl %ebp,%ebx
+ rorl $2,%edi
+ movl %edx,%ebp
+ roll $5,%ebp
+ movl %ecx,44(%esp)
+ leal 3395469782(%ecx,%ebx,1),%ecx
+ movl 48(%esp),%ebx
+ addl %ebp,%ecx
+ # 20_39 76
+ movl %edx,%ebp
+ xorl 56(%esp),%ebx
+ xorl %edi,%ebp
+ xorl 16(%esp),%ebx
+ xorl %esi,%ebp
+ xorl 36(%esp),%ebx
+ roll $1,%ebx
+ addl %ebp,%eax
+ rorl $2,%edx
+ movl %ecx,%ebp
+ roll $5,%ebp
+ movl %ebx,48(%esp)
+ leal 3395469782(%ebx,%eax,1),%ebx
+ movl 52(%esp),%eax
+ addl %ebp,%ebx
+ # 20_39 77
+ movl %ecx,%ebp
+ xorl 60(%esp),%eax
+ xorl %edx,%ebp
+ xorl 20(%esp),%eax
+ xorl %edi,%ebp
+ xorl 40(%esp),%eax
+ roll $1,%eax
+ addl %ebp,%esi
+ rorl $2,%ecx
+ movl %ebx,%ebp
+ roll $5,%ebp
+ leal 3395469782(%eax,%esi,1),%eax
+ movl 56(%esp),%esi
+ addl %ebp,%eax
+ # 20_39 78
+ movl %ebx,%ebp
+ xorl (%esp),%esi
+ xorl %ecx,%ebp
+ xorl 24(%esp),%esi
+ xorl %edx,%ebp
+ xorl 44(%esp),%esi
+ roll $1,%esi
+ addl %ebp,%edi
+ rorl $2,%ebx
+ movl %eax,%ebp
+ roll $5,%ebp
+ leal 3395469782(%esi,%edi,1),%esi
+ movl 60(%esp),%edi
+ addl %ebp,%esi
+ # 20_39 79
+ movl %eax,%ebp
+ xorl 4(%esp),%edi
+ xorl %ebx,%ebp
+ xorl 28(%esp),%edi
+ xorl %ecx,%ebp
+ xorl 48(%esp),%edi
+ roll $1,%edi
+ addl %ebp,%edx
+ rorl $2,%eax
+ movl %esi,%ebp
+ roll $5,%ebp
+ leal 3395469782(%edi,%edx,1),%edi
+ addl %ebp,%edi
+ movl 96(%esp),%ebp
+ movl 100(%esp),%edx
+ addl (%ebp),%edi
+ addl 4(%ebp),%esi
+ addl 8(%ebp),%eax
+ addl 12(%ebp),%ebx
+ addl 16(%ebp),%ecx
+ movl %edi,(%ebp)
+ addl $64,%edx
+ movl %esi,4(%ebp)
+ cmpl 104(%esp),%edx
+ movl %eax,8(%ebp)
+ movl %ecx,%edi
+ movl %ebx,12(%ebp)
+ movl %edx,%esi
+ movl %ecx,16(%ebp)
+ jb L000loop
+ addl $76,%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115
+.byte 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82
+.byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
+.byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/macosx/sha1-ssse3-x86_64.s b/lib/accelerated/x86/macosx/sha1-ssse3-x86_64.s
new file mode 100644
index 0000000000..88bf435f81
--- /dev/null
+++ b/lib/accelerated/x86/macosx/sha1-ssse3-x86_64.s
@@ -0,0 +1,2515 @@
+# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain copyright notices,
+# this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+# * Neither the name of the Andy Polyakov nor the names of its
+# copyright holder and contributors may be used to endorse or
+# promote products derived from this software without specific
+# prior written permission.
+#
+# ALTERNATIVELY, provided that this notice is retained in full, this
+# product may be distributed under the terms of the GNU General Public
+# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+# those given above.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *** This file is auto-generated ***
+#
+.text
+
+
+.globl _sha1_block_data_order
+
+.p2align 4
+_sha1_block_data_order:
+ movl __gnutls_x86_cpuid_s+0(%rip),%r9d
+ movl __gnutls_x86_cpuid_s+4(%rip),%r8d
+ movl __gnutls_x86_cpuid_s+8(%rip),%r10d
+ testl $512,%r8d
+ jz L$ialu
+ jmp _ssse3_shortcut
+
+.p2align 4
+L$ialu:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ movq %rsp,%r11
+ movq %rdi,%r8
+ subq $72,%rsp
+ movq %rsi,%r9
+ andq $-64,%rsp
+ movq %rdx,%r10
+ movq %r11,64(%rsp)
+L$prologue:
+
+ movl 0(%r8),%esi
+ movl 4(%r8),%edi
+ movl 8(%r8),%r11d
+ movl 12(%r8),%r12d
+ movl 16(%r8),%r13d
+ jmp L$loop
+
+.p2align 4
+L$loop:
+ movl 0(%r9),%edx
+ bswapl %edx
+ movl %edx,0(%rsp)
+ movl %r11d,%eax
+ movl 4(%r9),%ebp
+ movl %esi,%ecx
+ xorl %r12d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r13,1),%r13d
+ andl %edi,%eax
+ movl %ebp,4(%rsp)
+ addl %ecx,%r13d
+ xorl %r12d,%eax
+ roll $30,%edi
+ addl %eax,%r13d
+ movl %edi,%eax
+ movl 8(%r9),%edx
+ movl %r13d,%ecx
+ xorl %r11d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r12,1),%r12d
+ andl %esi,%eax
+ movl %edx,8(%rsp)
+ addl %ecx,%r12d
+ xorl %r11d,%eax
+ roll $30,%esi
+ addl %eax,%r12d
+ movl %esi,%eax
+ movl 12(%r9),%ebp
+ movl %r12d,%ecx
+ xorl %edi,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r11,1),%r11d
+ andl %r13d,%eax
+ movl %ebp,12(%rsp)
+ addl %ecx,%r11d
+ xorl %edi,%eax
+ roll $30,%r13d
+ addl %eax,%r11d
+ movl %r13d,%eax
+ movl 16(%r9),%edx
+ movl %r11d,%ecx
+ xorl %esi,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%rdi,1),%edi
+ andl %r12d,%eax
+ movl %edx,16(%rsp)
+ addl %ecx,%edi
+ xorl %esi,%eax
+ roll $30,%r12d
+ addl %eax,%edi
+ movl %r12d,%eax
+ movl 20(%r9),%ebp
+ movl %edi,%ecx
+ xorl %r13d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%rsi,1),%esi
+ andl %r11d,%eax
+ movl %ebp,20(%rsp)
+ addl %ecx,%esi
+ xorl %r13d,%eax
+ roll $30,%r11d
+ addl %eax,%esi
+ movl %r11d,%eax
+ movl 24(%r9),%edx
+ movl %esi,%ecx
+ xorl %r12d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r13,1),%r13d
+ andl %edi,%eax
+ movl %edx,24(%rsp)
+ addl %ecx,%r13d
+ xorl %r12d,%eax
+ roll $30,%edi
+ addl %eax,%r13d
+ movl %edi,%eax
+ movl 28(%r9),%ebp
+ movl %r13d,%ecx
+ xorl %r11d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r12,1),%r12d
+ andl %esi,%eax
+ movl %ebp,28(%rsp)
+ addl %ecx,%r12d
+ xorl %r11d,%eax
+ roll $30,%esi
+ addl %eax,%r12d
+ movl %esi,%eax
+ movl 32(%r9),%edx
+ movl %r12d,%ecx
+ xorl %edi,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r11,1),%r11d
+ andl %r13d,%eax
+ movl %edx,32(%rsp)
+ addl %ecx,%r11d
+ xorl %edi,%eax
+ roll $30,%r13d
+ addl %eax,%r11d
+ movl %r13d,%eax
+ movl 36(%r9),%ebp
+ movl %r11d,%ecx
+ xorl %esi,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%rdi,1),%edi
+ andl %r12d,%eax
+ movl %ebp,36(%rsp)
+ addl %ecx,%edi
+ xorl %esi,%eax
+ roll $30,%r12d
+ addl %eax,%edi
+ movl %r12d,%eax
+ movl 40(%r9),%edx
+ movl %edi,%ecx
+ xorl %r13d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%rsi,1),%esi
+ andl %r11d,%eax
+ movl %edx,40(%rsp)
+ addl %ecx,%esi
+ xorl %r13d,%eax
+ roll $30,%r11d
+ addl %eax,%esi
+ movl %r11d,%eax
+ movl 44(%r9),%ebp
+ movl %esi,%ecx
+ xorl %r12d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r13,1),%r13d
+ andl %edi,%eax
+ movl %ebp,44(%rsp)
+ addl %ecx,%r13d
+ xorl %r12d,%eax
+ roll $30,%edi
+ addl %eax,%r13d
+ movl %edi,%eax
+ movl 48(%r9),%edx
+ movl %r13d,%ecx
+ xorl %r11d,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%r12,1),%r12d
+ andl %esi,%eax
+ movl %edx,48(%rsp)
+ addl %ecx,%r12d
+ xorl %r11d,%eax
+ roll $30,%esi
+ addl %eax,%r12d
+ movl %esi,%eax
+ movl 52(%r9),%ebp
+ movl %r12d,%ecx
+ xorl %edi,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%r11,1),%r11d
+ andl %r13d,%eax
+ movl %ebp,52(%rsp)
+ addl %ecx,%r11d
+ xorl %edi,%eax
+ roll $30,%r13d
+ addl %eax,%r11d
+ movl %r13d,%eax
+ movl 56(%r9),%edx
+ movl %r11d,%ecx
+ xorl %esi,%eax
+ bswapl %edx
+ roll $5,%ecx
+ leal 1518500249(%rbp,%rdi,1),%edi
+ andl %r12d,%eax
+ movl %edx,56(%rsp)
+ addl %ecx,%edi
+ xorl %esi,%eax
+ roll $30,%r12d
+ addl %eax,%edi
+ movl %r12d,%eax
+ movl 60(%r9),%ebp
+ movl %edi,%ecx
+ xorl %r13d,%eax
+ bswapl %ebp
+ roll $5,%ecx
+ leal 1518500249(%rdx,%rsi,1),%esi
+ andl %r11d,%eax
+ movl %ebp,60(%rsp)
+ addl %ecx,%esi
+ xorl %r13d,%eax
+ roll $30,%r11d
+ addl %eax,%esi
+ movl 0(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 8(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ xorl 32(%rsp),%edx
+ andl %edi,%eax
+ leal 1518500249(%rbp,%r13,1),%r13d
+ xorl 52(%rsp),%edx
+ xorl %r12d,%eax
+ roll $1,%edx
+ addl %ecx,%r13d
+ roll $30,%edi
+ movl %edx,0(%rsp)
+ addl %eax,%r13d
+ movl 4(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ xorl 36(%rsp),%ebp
+ andl %esi,%eax
+ leal 1518500249(%rdx,%r12,1),%r12d
+ xorl 56(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $1,%ebp
+ addl %ecx,%r12d
+ roll $30,%esi
+ movl %ebp,4(%rsp)
+ addl %eax,%r12d
+ movl 8(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 16(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ xorl 40(%rsp),%edx
+ andl %r13d,%eax
+ leal 1518500249(%rbp,%r11,1),%r11d
+ xorl 60(%rsp),%edx
+ xorl %edi,%eax
+ roll $1,%edx
+ addl %ecx,%r11d
+ roll $30,%r13d
+ movl %edx,8(%rsp)
+ addl %eax,%r11d
+ movl 12(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ xorl 44(%rsp),%ebp
+ andl %r12d,%eax
+ leal 1518500249(%rdx,%rdi,1),%edi
+ xorl 0(%rsp),%ebp
+ xorl %esi,%eax
+ roll $1,%ebp
+ addl %ecx,%edi
+ roll $30,%r12d
+ movl %ebp,12(%rsp)
+ addl %eax,%edi
+ movl 16(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 24(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ xorl 48(%rsp),%edx
+ andl %r11d,%eax
+ leal 1518500249(%rbp,%rsi,1),%esi
+ xorl 4(%rsp),%edx
+ xorl %r13d,%eax
+ roll $1,%edx
+ addl %ecx,%esi
+ roll $30,%r11d
+ movl %edx,16(%rsp)
+ addl %eax,%esi
+ movl 20(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r13,1),%r13d
+ xorl 52(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 8(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,20(%rsp)
+ movl 24(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 32(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r12,1),%r12d
+ xorl 56(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 12(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,24(%rsp)
+ movl 28(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r11,1),%r11d
+ xorl 60(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 16(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,28(%rsp)
+ movl 32(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 40(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rdi,1),%edi
+ xorl 0(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 20(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,32(%rsp)
+ movl 36(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 44(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rsi,1),%esi
+ xorl 4(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 24(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,36(%rsp)
+ movl 40(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 48(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r13,1),%r13d
+ xorl 8(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 28(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,40(%rsp)
+ movl 44(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 52(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r12,1),%r12d
+ xorl 12(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 32(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl %ebp,44(%rsp)
+ movl 48(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 56(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r11,1),%r11d
+ xorl 16(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 36(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl %edx,48(%rsp)
+ movl 52(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rdi,1),%edi
+ xorl 20(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 40(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %ebp,52(%rsp)
+ movl 56(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 0(%rsp),%edx
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rsi,1),%esi
+ xorl 24(%rsp),%edx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 44(%rsp),%edx
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%edx
+ movl %edx,56(%rsp)
+ movl 60(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r13,1),%r13d
+ xorl 28(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 48(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,60(%rsp)
+ movl 0(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r12,1),%r12d
+ xorl 32(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 52(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,0(%rsp)
+ movl 4(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r11,1),%r11d
+ xorl 36(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 56(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,4(%rsp)
+ movl 8(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 16(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rdi,1),%edi
+ xorl 40(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 60(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,8(%rsp)
+ movl 12(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rsi,1),%esi
+ xorl 44(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 0(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,12(%rsp)
+ movl 16(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 24(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r13,1),%r13d
+ xorl 48(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 4(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,16(%rsp)
+ movl 20(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%r12,1),%r12d
+ xorl 52(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 8(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl %ebp,20(%rsp)
+ movl 24(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 32(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%r11,1),%r11d
+ xorl 56(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 12(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl %edx,24(%rsp)
+ movl 28(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rdx,%rdi,1),%edi
+ xorl 60(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 16(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %ebp,28(%rsp)
+ movl 32(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 40(%rsp),%edx
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal 1859775393(%rbp,%rsi,1),%esi
+ xorl 0(%rsp),%edx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 20(%rsp),%edx
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%edx
+ movl %edx,32(%rsp)
+ movl 36(%rsp),%ebp
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 44(%rsp),%ebp
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r12d,%ebx
+ leal -1894007588(%rdx,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 24(%rsp),%ebp
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %ebp,36(%rsp)
+ addl %ecx,%r13d
+ movl 40(%rsp),%edx
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 48(%rsp),%edx
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %r11d,%ebx
+ leal -1894007588(%rbp,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 28(%rsp),%edx
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%edx
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %edx,40(%rsp)
+ addl %ecx,%r12d
+ movl 44(%rsp),%ebp
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 52(%rsp),%ebp
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %edi,%ebx
+ leal -1894007588(%rdx,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 32(%rsp),%ebp
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%ebp
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %ebp,44(%rsp)
+ addl %ecx,%r11d
+ movl 48(%rsp),%edx
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 56(%rsp),%edx
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 16(%rsp),%edx
+ xorl %esi,%ebx
+ leal -1894007588(%rbp,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 36(%rsp),%edx
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%edx
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %edx,48(%rsp)
+ addl %ecx,%edi
+ movl 52(%rsp),%ebp
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 60(%rsp),%ebp
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %r13d,%ebx
+ leal -1894007588(%rdx,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 40(%rsp),%ebp
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%ebp
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %ebp,52(%rsp)
+ addl %ecx,%esi
+ movl 56(%rsp),%edx
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 0(%rsp),%edx
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 24(%rsp),%edx
+ xorl %r12d,%ebx
+ leal -1894007588(%rbp,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 44(%rsp),%edx
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%edx
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %edx,56(%rsp)
+ addl %ecx,%r13d
+ movl 60(%rsp),%ebp
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 4(%rsp),%ebp
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %r11d,%ebx
+ leal -1894007588(%rdx,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 48(%rsp),%ebp
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %ebp,60(%rsp)
+ addl %ecx,%r12d
+ movl 0(%rsp),%edx
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 8(%rsp),%edx
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 32(%rsp),%edx
+ xorl %edi,%ebx
+ leal -1894007588(%rbp,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 52(%rsp),%edx
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%edx
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %edx,0(%rsp)
+ addl %ecx,%r11d
+ movl 4(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 12(%rsp),%ebp
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %esi,%ebx
+ leal -1894007588(%rdx,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 56(%rsp),%ebp
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%ebp
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %ebp,4(%rsp)
+ addl %ecx,%edi
+ movl 8(%rsp),%edx
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 16(%rsp),%edx
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 40(%rsp),%edx
+ xorl %r13d,%ebx
+ leal -1894007588(%rbp,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 60(%rsp),%edx
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%edx
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %edx,8(%rsp)
+ addl %ecx,%esi
+ movl 12(%rsp),%ebp
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 20(%rsp),%ebp
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 44(%rsp),%ebp
+ xorl %r12d,%ebx
+ leal -1894007588(%rdx,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 0(%rsp),%ebp
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %ebp,12(%rsp)
+ addl %ecx,%r13d
+ movl 16(%rsp),%edx
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 24(%rsp),%edx
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 48(%rsp),%edx
+ xorl %r11d,%ebx
+ leal -1894007588(%rbp,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 4(%rsp),%edx
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%edx
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %edx,16(%rsp)
+ addl %ecx,%r12d
+ movl 20(%rsp),%ebp
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 28(%rsp),%ebp
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 52(%rsp),%ebp
+ xorl %edi,%ebx
+ leal -1894007588(%rdx,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 8(%rsp),%ebp
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%ebp
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %ebp,20(%rsp)
+ addl %ecx,%r11d
+ movl 24(%rsp),%edx
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 32(%rsp),%edx
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 56(%rsp),%edx
+ xorl %esi,%ebx
+ leal -1894007588(%rbp,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 12(%rsp),%edx
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%edx
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %edx,24(%rsp)
+ addl %ecx,%edi
+ movl 28(%rsp),%ebp
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 36(%rsp),%ebp
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %r13d,%ebx
+ leal -1894007588(%rdx,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 16(%rsp),%ebp
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%ebp
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %ebp,28(%rsp)
+ addl %ecx,%esi
+ movl 32(%rsp),%edx
+ movl %r11d,%eax
+ movl %r11d,%ebx
+ xorl 40(%rsp),%edx
+ andl %r12d,%eax
+ movl %esi,%ecx
+ xorl 0(%rsp),%edx
+ xorl %r12d,%ebx
+ leal -1894007588(%rbp,%r13,1),%r13d
+ roll $5,%ecx
+ xorl 20(%rsp),%edx
+ addl %eax,%r13d
+ andl %edi,%ebx
+ roll $1,%edx
+ addl %ebx,%r13d
+ roll $30,%edi
+ movl %edx,32(%rsp)
+ addl %ecx,%r13d
+ movl 36(%rsp),%ebp
+ movl %edi,%eax
+ movl %edi,%ebx
+ xorl 44(%rsp),%ebp
+ andl %r11d,%eax
+ movl %r13d,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r11d,%ebx
+ leal -1894007588(%rdx,%r12,1),%r12d
+ roll $5,%ecx
+ xorl 24(%rsp),%ebp
+ addl %eax,%r12d
+ andl %esi,%ebx
+ roll $1,%ebp
+ addl %ebx,%r12d
+ roll $30,%esi
+ movl %ebp,36(%rsp)
+ addl %ecx,%r12d
+ movl 40(%rsp),%edx
+ movl %esi,%eax
+ movl %esi,%ebx
+ xorl 48(%rsp),%edx
+ andl %edi,%eax
+ movl %r12d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %edi,%ebx
+ leal -1894007588(%rbp,%r11,1),%r11d
+ roll $5,%ecx
+ xorl 28(%rsp),%edx
+ addl %eax,%r11d
+ andl %r13d,%ebx
+ roll $1,%edx
+ addl %ebx,%r11d
+ roll $30,%r13d
+ movl %edx,40(%rsp)
+ addl %ecx,%r11d
+ movl 44(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r13d,%ebx
+ xorl 52(%rsp),%ebp
+ andl %esi,%eax
+ movl %r11d,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %esi,%ebx
+ leal -1894007588(%rdx,%rdi,1),%edi
+ roll $5,%ecx
+ xorl 32(%rsp),%ebp
+ addl %eax,%edi
+ andl %r12d,%ebx
+ roll $1,%ebp
+ addl %ebx,%edi
+ roll $30,%r12d
+ movl %ebp,44(%rsp)
+ addl %ecx,%edi
+ movl 48(%rsp),%edx
+ movl %r12d,%eax
+ movl %r12d,%ebx
+ xorl 56(%rsp),%edx
+ andl %r13d,%eax
+ movl %edi,%ecx
+ xorl 16(%rsp),%edx
+ xorl %r13d,%ebx
+ leal -1894007588(%rbp,%rsi,1),%esi
+ roll $5,%ecx
+ xorl 36(%rsp),%edx
+ addl %eax,%esi
+ andl %r11d,%ebx
+ roll $1,%edx
+ addl %ebx,%esi
+ roll $30,%r11d
+ movl %edx,48(%rsp)
+ addl %ecx,%esi
+ movl 52(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r13,1),%r13d
+ xorl 20(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 40(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,52(%rsp)
+ movl 56(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 0(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r12,1),%r12d
+ xorl 24(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 44(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,56(%rsp)
+ movl 60(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r11,1),%r11d
+ xorl 28(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 48(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,60(%rsp)
+ movl 0(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 8(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%rdi,1),%edi
+ xorl 32(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 52(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,0(%rsp)
+ movl 4(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 12(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rsi,1),%esi
+ xorl 36(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 56(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,4(%rsp)
+ movl 8(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 16(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r13,1),%r13d
+ xorl 40(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 60(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,8(%rsp)
+ movl 12(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 20(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r12,1),%r12d
+ xorl 44(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 0(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl %ebp,12(%rsp)
+ movl 16(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 24(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r11,1),%r11d
+ xorl 48(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 4(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl %edx,16(%rsp)
+ movl 20(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 28(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rdi,1),%edi
+ xorl 52(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 8(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %ebp,20(%rsp)
+ movl 24(%rsp),%edx
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 32(%rsp),%edx
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%rsi,1),%esi
+ xorl 56(%rsp),%edx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 12(%rsp),%edx
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%edx
+ movl %edx,24(%rsp)
+ movl 28(%rsp),%ebp
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 36(%rsp),%ebp
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r13,1),%r13d
+ xorl 60(%rsp),%ebp
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 16(%rsp),%ebp
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%ebp
+ movl %ebp,28(%rsp)
+ movl 32(%rsp),%edx
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 40(%rsp),%edx
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r12,1),%r12d
+ xorl 0(%rsp),%edx
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 20(%rsp),%edx
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%edx
+ movl %edx,32(%rsp)
+ movl 36(%rsp),%ebp
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 44(%rsp),%ebp
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r11,1),%r11d
+ xorl 4(%rsp),%ebp
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 24(%rsp),%ebp
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%ebp
+ movl %ebp,36(%rsp)
+ movl 40(%rsp),%edx
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 48(%rsp),%edx
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%rdi,1),%edi
+ xorl 8(%rsp),%edx
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 28(%rsp),%edx
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%edx
+ movl %edx,40(%rsp)
+ movl 44(%rsp),%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl 52(%rsp),%ebp
+ xorl %r11d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rsi,1),%esi
+ xorl 12(%rsp),%ebp
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ xorl 32(%rsp),%ebp
+ roll $30,%r11d
+ addl %eax,%esi
+ roll $1,%ebp
+ movl %ebp,44(%rsp)
+ movl 48(%rsp),%edx
+ movl %r11d,%eax
+ movl %esi,%ecx
+ xorl 56(%rsp),%edx
+ xorl %edi,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r13,1),%r13d
+ xorl 16(%rsp),%edx
+ xorl %r12d,%eax
+ addl %ecx,%r13d
+ xorl 36(%rsp),%edx
+ roll $30,%edi
+ addl %eax,%r13d
+ roll $1,%edx
+ movl %edx,48(%rsp)
+ movl 52(%rsp),%ebp
+ movl %edi,%eax
+ movl %r13d,%ecx
+ xorl 60(%rsp),%ebp
+ xorl %esi,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%r12,1),%r12d
+ xorl 20(%rsp),%ebp
+ xorl %r11d,%eax
+ addl %ecx,%r12d
+ xorl 40(%rsp),%ebp
+ roll $30,%esi
+ addl %eax,%r12d
+ roll $1,%ebp
+ movl 56(%rsp),%edx
+ movl %esi,%eax
+ movl %r12d,%ecx
+ xorl 0(%rsp),%edx
+ xorl %r13d,%eax
+ roll $5,%ecx
+ leal -899497514(%rbp,%r11,1),%r11d
+ xorl 24(%rsp),%edx
+ xorl %edi,%eax
+ addl %ecx,%r11d
+ xorl 44(%rsp),%edx
+ roll $30,%r13d
+ addl %eax,%r11d
+ roll $1,%edx
+ movl 60(%rsp),%ebp
+ movl %r13d,%eax
+ movl %r11d,%ecx
+ xorl 4(%rsp),%ebp
+ xorl %r12d,%eax
+ roll $5,%ecx
+ leal -899497514(%rdx,%rdi,1),%edi
+ xorl 28(%rsp),%ebp
+ xorl %esi,%eax
+ addl %ecx,%edi
+ xorl 48(%rsp),%ebp
+ roll $30,%r12d
+ addl %eax,%edi
+ roll $1,%ebp
+ movl %r12d,%eax
+ movl %edi,%ecx
+ xorl %r11d,%eax
+ leal -899497514(%rbp,%rsi,1),%esi
+ roll $5,%ecx
+ xorl %r13d,%eax
+ addl %ecx,%esi
+ roll $30,%r11d
+ addl %eax,%esi
+ addl 0(%r8),%esi
+ addl 4(%r8),%edi
+ addl 8(%r8),%r11d
+ addl 12(%r8),%r12d
+ addl 16(%r8),%r13d
+ movl %esi,0(%r8)
+ movl %edi,4(%r8)
+ movl %r11d,8(%r8)
+ movl %r12d,12(%r8)
+ movl %r13d,16(%r8)
+
+ subq $1,%r10
+ leaq 64(%r9),%r9
+ jnz L$loop
+
+ movq 64(%rsp),%rsi
+ movq (%rsi),%r13
+ movq 8(%rsi),%r12
+ movq 16(%rsi),%rbp
+ movq 24(%rsi),%rbx
+ leaq 32(%rsi),%rsp
+L$epilogue:
+ .byte 0xf3,0xc3
+
+
+.p2align 4
+sha1_block_data_order_ssse3:
+_ssse3_shortcut:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ leaq -64(%rsp),%rsp
+ movq %rdi,%r8
+ movq %rsi,%r9
+ movq %rdx,%r10
+
+ shlq $6,%r10
+ addq %r9,%r10
+ leaq K_XX_XX+64(%rip),%r11
+
+ movl 0(%r8),%eax
+ movl 4(%r8),%ebx
+ movl 8(%r8),%ecx
+ movl 12(%r8),%edx
+ movl %ebx,%esi
+ movl 16(%r8),%ebp
+ movl %ecx,%edi
+ xorl %edx,%edi
+ andl %edi,%esi
+
+ movdqa 64(%r11),%xmm6
+ movdqa -64(%r11),%xmm9
+ movdqu 0(%r9),%xmm0
+ movdqu 16(%r9),%xmm1
+ movdqu 32(%r9),%xmm2
+ movdqu 48(%r9),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r9
+.byte 102,15,56,0,206
+.byte 102,15,56,0,214
+.byte 102,15,56,0,222
+ paddd %xmm9,%xmm0
+ paddd %xmm9,%xmm1
+ paddd %xmm9,%xmm2
+ movdqa %xmm0,0(%rsp)
+ psubd %xmm9,%xmm0
+ movdqa %xmm1,16(%rsp)
+ psubd %xmm9,%xmm1
+ movdqa %xmm2,32(%rsp)
+ psubd %xmm9,%xmm2
+ jmp L$oop_ssse3
+.p2align 4
+L$oop_ssse3:
+ movdqa %xmm1,%xmm4
+ rorl $2,%ebx
+ xorl %edx,%esi
+ movdqa %xmm3,%xmm8
+.byte 102,15,58,15,224,8
+ movl %eax,%edi
+ addl 0(%rsp),%ebp
+ paddd %xmm3,%xmm9
+ xorl %ecx,%ebx
+ roll $5,%eax
+ psrldq $4,%xmm8
+ addl %esi,%ebp
+ andl %ebx,%edi
+ pxor %xmm0,%xmm4
+ xorl %ecx,%ebx
+ addl %eax,%ebp
+ pxor %xmm2,%xmm8
+ rorl $7,%eax
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ addl 4(%rsp),%edx
+ pxor %xmm8,%xmm4
+ xorl %ebx,%eax
+ roll $5,%ebp
+ movdqa %xmm9,48(%rsp)
+ addl %edi,%edx
+ andl %eax,%esi
+ movdqa %xmm4,%xmm10
+ movdqa %xmm4,%xmm8
+ xorl %ebx,%eax
+ addl %ebp,%edx
+ rorl $7,%ebp
+ xorl %ebx,%esi
+ pslldq $12,%xmm10
+ paddd %xmm4,%xmm4
+ movl %edx,%edi
+ addl 8(%rsp),%ecx
+ xorl %eax,%ebp
+ roll $5,%edx
+ psrld $31,%xmm8
+ addl %esi,%ecx
+ andl %ebp,%edi
+ movdqa %xmm10,%xmm9
+ xorl %eax,%ebp
+ addl %edx,%ecx
+ psrld $30,%xmm10
+ por %xmm8,%xmm4
+ rorl $7,%edx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ addl 12(%rsp),%ebx
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm4
+ xorl %ebp,%edx
+ roll $5,%ecx
+ movdqa -64(%r11),%xmm10
+ addl %edi,%ebx
+ andl %edx,%esi
+ pxor %xmm9,%xmm4
+ xorl %ebp,%edx
+ addl %ecx,%ebx
+ movdqa %xmm2,%xmm5
+ rorl $7,%ecx
+ xorl %ebp,%esi
+ movdqa %xmm4,%xmm9
+.byte 102,15,58,15,233,8
+ movl %ebx,%edi
+ addl 16(%rsp),%eax
+ paddd %xmm4,%xmm10
+ xorl %edx,%ecx
+ roll $5,%ebx
+ psrldq $4,%xmm9
+ addl %esi,%eax
+ andl %ecx,%edi
+ pxor %xmm1,%xmm5
+ xorl %edx,%ecx
+ addl %ebx,%eax
+ pxor %xmm3,%xmm9
+ rorl $7,%ebx
+ xorl %edx,%edi
+ movl %eax,%esi
+ addl 20(%rsp),%ebp
+ pxor %xmm9,%xmm5
+ xorl %ecx,%ebx
+ roll $5,%eax
+ movdqa %xmm10,0(%rsp)
+ addl %edi,%ebp
+ andl %ebx,%esi
+ movdqa %xmm5,%xmm8
+ movdqa %xmm5,%xmm9
+ xorl %ecx,%ebx
+ addl %eax,%ebp
+ rorl $7,%eax
+ xorl %ecx,%esi
+ pslldq $12,%xmm8
+ paddd %xmm5,%xmm5
+ movl %ebp,%edi
+ addl 24(%rsp),%edx
+ xorl %ebx,%eax
+ roll $5,%ebp
+ psrld $31,%xmm9
+ addl %esi,%edx
+ andl %eax,%edi
+ movdqa %xmm8,%xmm10
+ xorl %ebx,%eax
+ addl %ebp,%edx
+ psrld $30,%xmm8
+ por %xmm9,%xmm5
+ rorl $7,%ebp
+ xorl %ebx,%edi
+ movl %edx,%esi
+ addl 28(%rsp),%ecx
+ pslld $2,%xmm10
+ pxor %xmm8,%xmm5
+ xorl %eax,%ebp
+ roll $5,%edx
+ movdqa -32(%r11),%xmm8
+ addl %edi,%ecx
+ andl %ebp,%esi
+ pxor %xmm10,%xmm5
+ xorl %eax,%ebp
+ addl %edx,%ecx
+ movdqa %xmm3,%xmm6
+ rorl $7,%edx
+ xorl %eax,%esi
+ movdqa %xmm5,%xmm10
+.byte 102,15,58,15,242,8
+ movl %ecx,%edi
+ addl 32(%rsp),%ebx
+ paddd %xmm5,%xmm8
+ xorl %ebp,%edx
+ roll $5,%ecx
+ psrldq $4,%xmm10
+ addl %esi,%ebx
+ andl %edx,%edi
+ pxor %xmm2,%xmm6
+ xorl %ebp,%edx
+ addl %ecx,%ebx
+ pxor %xmm4,%xmm10
+ rorl $7,%ecx
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ addl 36(%rsp),%eax
+ pxor %xmm10,%xmm6
+ xorl %edx,%ecx
+ roll $5,%ebx
+ movdqa %xmm8,16(%rsp)
+ addl %edi,%eax
+ andl %ecx,%esi
+ movdqa %xmm6,%xmm9
+ movdqa %xmm6,%xmm10
+ xorl %edx,%ecx
+ addl %ebx,%eax
+ rorl $7,%ebx
+ xorl %edx,%esi
+ pslldq $12,%xmm9
+ paddd %xmm6,%xmm6
+ movl %eax,%edi
+ addl 40(%rsp),%ebp
+ xorl %ecx,%ebx
+ roll $5,%eax
+ psrld $31,%xmm10
+ addl %esi,%ebp
+ andl %ebx,%edi
+ movdqa %xmm9,%xmm8
+ xorl %ecx,%ebx
+ addl %eax,%ebp
+ psrld $30,%xmm9
+ por %xmm10,%xmm6
+ rorl $7,%eax
+ xorl %ecx,%edi
+ movl %ebp,%esi
+ addl 44(%rsp),%edx
+ pslld $2,%xmm8
+ pxor %xmm9,%xmm6
+ xorl %ebx,%eax
+ roll $5,%ebp
+ movdqa -32(%r11),%xmm9
+ addl %edi,%edx
+ andl %eax,%esi
+ pxor %xmm8,%xmm6
+ xorl %ebx,%eax
+ addl %ebp,%edx
+ movdqa %xmm4,%xmm7
+ rorl $7,%ebp
+ xorl %ebx,%esi
+ movdqa %xmm6,%xmm8
+.byte 102,15,58,15,251,8
+ movl %edx,%edi
+ addl 48(%rsp),%ecx
+ paddd %xmm6,%xmm9
+ xorl %eax,%ebp
+ roll $5,%edx
+ psrldq $4,%xmm8
+ addl %esi,%ecx
+ andl %ebp,%edi
+ pxor %xmm3,%xmm7
+ xorl %eax,%ebp
+ addl %edx,%ecx
+ pxor %xmm5,%xmm8
+ rorl $7,%edx
+ xorl %eax,%edi
+ movl %ecx,%esi
+ addl 52(%rsp),%ebx
+ pxor %xmm8,%xmm7
+ xorl %ebp,%edx
+ roll $5,%ecx
+ movdqa %xmm9,32(%rsp)
+ addl %edi,%ebx
+ andl %edx,%esi
+ movdqa %xmm7,%xmm10
+ movdqa %xmm7,%xmm8
+ xorl %ebp,%edx
+ addl %ecx,%ebx
+ rorl $7,%ecx
+ xorl %ebp,%esi
+ pslldq $12,%xmm10
+ paddd %xmm7,%xmm7
+ movl %ebx,%edi
+ addl 56(%rsp),%eax
+ xorl %edx,%ecx
+ roll $5,%ebx
+ psrld $31,%xmm8
+ addl %esi,%eax
+ andl %ecx,%edi
+ movdqa %xmm10,%xmm9
+ xorl %edx,%ecx
+ addl %ebx,%eax
+ psrld $30,%xmm10
+ por %xmm8,%xmm7
+ rorl $7,%ebx
+ xorl %edx,%edi
+ movl %eax,%esi
+ addl 60(%rsp),%ebp
+ pslld $2,%xmm9
+ pxor %xmm10,%xmm7
+ xorl %ecx,%ebx
+ roll $5,%eax
+ movdqa -32(%r11),%xmm10
+ addl %edi,%ebp
+ andl %ebx,%esi
+ pxor %xmm9,%xmm7
+ xorl %ecx,%ebx
+ addl %eax,%ebp
+ movdqa %xmm7,%xmm9
+ rorl $7,%eax
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,206,8
+ xorl %ecx,%esi
+ movl %ebp,%edi
+ addl 0(%rsp),%edx
+ pxor %xmm1,%xmm0
+ xorl %ebx,%eax
+ roll $5,%ebp
+ movdqa %xmm10,%xmm8
+ paddd %xmm7,%xmm10
+ addl %esi,%edx
+ andl %eax,%edi
+ pxor %xmm9,%xmm0
+ xorl %ebx,%eax
+ addl %ebp,%edx
+ rorl $7,%ebp
+ xorl %ebx,%edi
+ movdqa %xmm0,%xmm9
+ movdqa %xmm10,48(%rsp)
+ movl %edx,%esi
+ addl 4(%rsp),%ecx
+ xorl %eax,%ebp
+ roll $5,%edx
+ pslld $2,%xmm0
+ addl %edi,%ecx
+ andl %ebp,%esi
+ psrld $30,%xmm9
+ xorl %eax,%ebp
+ addl %edx,%ecx
+ rorl $7,%edx
+ xorl %eax,%esi
+ movl %ecx,%edi
+ addl 8(%rsp),%ebx
+ por %xmm9,%xmm0
+ xorl %ebp,%edx
+ roll $5,%ecx
+ movdqa %xmm0,%xmm10
+ addl %esi,%ebx
+ andl %edx,%edi
+ xorl %ebp,%edx
+ addl %ecx,%ebx
+ addl 12(%rsp),%eax
+ xorl %ebp,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %edx,%esi
+ rorl $7,%ecx
+ addl %ebx,%eax
+ addl 16(%rsp),%ebp
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,215,8
+ xorl %ecx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm2,%xmm1
+ addl %esi,%ebp
+ xorl %ecx,%edi
+ movdqa %xmm8,%xmm9
+ paddd %xmm0,%xmm8
+ rorl $7,%ebx
+ addl %eax,%ebp
+ pxor %xmm10,%xmm1
+ addl 20(%rsp),%edx
+ xorl %ebx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm1,%xmm10
+ movdqa %xmm8,0(%rsp)
+ addl %edi,%edx
+ xorl %ebx,%esi
+ rorl $7,%eax
+ addl %ebp,%edx
+ pslld $2,%xmm1
+ addl 24(%rsp),%ecx
+ xorl %eax,%esi
+ psrld $30,%xmm10
+ movl %edx,%edi
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %eax,%edi
+ rorl $7,%ebp
+ addl %edx,%ecx
+ por %xmm10,%xmm1
+ addl 28(%rsp),%ebx
+ xorl %ebp,%edi
+ movdqa %xmm1,%xmm8
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %ebp,%esi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ addl 32(%rsp),%eax
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,192,8
+ xorl %edx,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ pxor %xmm3,%xmm2
+ addl %esi,%eax
+ xorl %edx,%edi
+ movdqa 0(%r11),%xmm10
+ paddd %xmm1,%xmm9
+ rorl $7,%ecx
+ addl %ebx,%eax
+ pxor %xmm8,%xmm2
+ addl 36(%rsp),%ebp
+ xorl %ecx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ movdqa %xmm2,%xmm8
+ movdqa %xmm9,16(%rsp)
+ addl %edi,%ebp
+ xorl %ecx,%esi
+ rorl $7,%ebx
+ addl %eax,%ebp
+ pslld $2,%xmm2
+ addl 40(%rsp),%edx
+ xorl %ebx,%esi
+ psrld $30,%xmm8
+ movl %ebp,%edi
+ roll $5,%ebp
+ addl %esi,%edx
+ xorl %ebx,%edi
+ rorl $7,%eax
+ addl %ebp,%edx
+ por %xmm8,%xmm2
+ addl 44(%rsp),%ecx
+ xorl %eax,%edi
+ movdqa %xmm2,%xmm9
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %eax,%esi
+ rorl $7,%ebp
+ addl %edx,%ecx
+ addl 48(%rsp),%ebx
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,201,8
+ xorl %ebp,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ pxor %xmm4,%xmm3
+ addl %esi,%ebx
+ xorl %ebp,%edi
+ movdqa %xmm10,%xmm8
+ paddd %xmm2,%xmm10
+ rorl $7,%edx
+ addl %ecx,%ebx
+ pxor %xmm9,%xmm3
+ addl 52(%rsp),%eax
+ xorl %edx,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ movdqa %xmm3,%xmm9
+ movdqa %xmm10,32(%rsp)
+ addl %edi,%eax
+ xorl %edx,%esi
+ rorl $7,%ecx
+ addl %ebx,%eax
+ pslld $2,%xmm3
+ addl 56(%rsp),%ebp
+ xorl %ecx,%esi
+ psrld $30,%xmm9
+ movl %eax,%edi
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %ecx,%edi
+ rorl $7,%ebx
+ addl %eax,%ebp
+ por %xmm9,%xmm3
+ addl 60(%rsp),%edx
+ xorl %ebx,%edi
+ movdqa %xmm3,%xmm10
+ movl %ebp,%esi
+ roll $5,%ebp
+ addl %edi,%edx
+ xorl %ebx,%esi
+ rorl $7,%eax
+ addl %ebp,%edx
+ addl 0(%rsp),%ecx
+ pxor %xmm0,%xmm4
+.byte 102,68,15,58,15,210,8
+ xorl %eax,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ pxor %xmm5,%xmm4
+ addl %esi,%ecx
+ xorl %eax,%edi
+ movdqa %xmm8,%xmm9
+ paddd %xmm3,%xmm8
+ rorl $7,%ebp
+ addl %edx,%ecx
+ pxor %xmm10,%xmm4
+ addl 4(%rsp),%ebx
+ xorl %ebp,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ movdqa %xmm4,%xmm10
+ movdqa %xmm8,48(%rsp)
+ addl %edi,%ebx
+ xorl %ebp,%esi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ pslld $2,%xmm4
+ addl 8(%rsp),%eax
+ xorl %edx,%esi
+ psrld $30,%xmm10
+ movl %ebx,%edi
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %edx,%edi
+ rorl $7,%ecx
+ addl %ebx,%eax
+ por %xmm10,%xmm4
+ addl 12(%rsp),%ebp
+ xorl %ecx,%edi
+ movdqa %xmm4,%xmm8
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %ecx,%esi
+ rorl $7,%ebx
+ addl %eax,%ebp
+ addl 16(%rsp),%edx
+ pxor %xmm1,%xmm5
+.byte 102,68,15,58,15,195,8
+ xorl %ebx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ pxor %xmm6,%xmm5
+ addl %esi,%edx
+ xorl %ebx,%edi
+ movdqa %xmm9,%xmm10
+ paddd %xmm4,%xmm9
+ rorl $7,%eax
+ addl %ebp,%edx
+ pxor %xmm8,%xmm5
+ addl 20(%rsp),%ecx
+ xorl %eax,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ movdqa %xmm5,%xmm8
+ movdqa %xmm9,0(%rsp)
+ addl %edi,%ecx
+ xorl %eax,%esi
+ rorl $7,%ebp
+ addl %edx,%ecx
+ pslld $2,%xmm5
+ addl 24(%rsp),%ebx
+ xorl %ebp,%esi
+ psrld $30,%xmm8
+ movl %ecx,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %ebp,%edi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ por %xmm8,%xmm5
+ addl 28(%rsp),%eax
+ movdqa %xmm5,%xmm9
+ rorl $7,%ecx
+ movl %ebx,%esi
+ xorl %edx,%edi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %ecx,%esi
+ xorl %edx,%ecx
+ addl %ebx,%eax
+ addl 32(%rsp),%ebp
+ pxor %xmm2,%xmm6
+.byte 102,68,15,58,15,204,8
+ andl %ecx,%esi
+ xorl %edx,%ecx
+ rorl $7,%ebx
+ pxor %xmm7,%xmm6
+ movl %eax,%edi
+ xorl %ecx,%esi
+ movdqa %xmm10,%xmm8
+ paddd %xmm5,%xmm10
+ roll $5,%eax
+ addl %esi,%ebp
+ pxor %xmm9,%xmm6
+ xorl %ebx,%edi
+ xorl %ecx,%ebx
+ addl %eax,%ebp
+ addl 36(%rsp),%edx
+ movdqa %xmm6,%xmm9
+ movdqa %xmm10,16(%rsp)
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ rorl $7,%eax
+ movl %ebp,%esi
+ pslld $2,%xmm6
+ xorl %ebx,%edi
+ roll $5,%ebp
+ psrld $30,%xmm9
+ addl %edi,%edx
+ xorl %eax,%esi
+ xorl %ebx,%eax
+ addl %ebp,%edx
+ addl 40(%rsp),%ecx
+ andl %eax,%esi
+ por %xmm9,%xmm6
+ xorl %ebx,%eax
+ rorl $7,%ebp
+ movdqa %xmm6,%xmm10
+ movl %edx,%edi
+ xorl %eax,%esi
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %ebp,%edi
+ xorl %eax,%ebp
+ addl %edx,%ecx
+ addl 44(%rsp),%ebx
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ rorl $7,%edx
+ movl %ecx,%esi
+ xorl %ebp,%edi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %edx,%esi
+ xorl %ebp,%edx
+ addl %ecx,%ebx
+ addl 48(%rsp),%eax
+ pxor %xmm3,%xmm7
+.byte 102,68,15,58,15,213,8
+ andl %edx,%esi
+ xorl %ebp,%edx
+ rorl $7,%ecx
+ pxor %xmm0,%xmm7
+ movl %ebx,%edi
+ xorl %edx,%esi
+ movdqa 32(%r11),%xmm9
+ paddd %xmm6,%xmm8
+ roll $5,%ebx
+ addl %esi,%eax
+ pxor %xmm10,%xmm7
+ xorl %ecx,%edi
+ xorl %edx,%ecx
+ addl %ebx,%eax
+ addl 52(%rsp),%ebp
+ movdqa %xmm7,%xmm10
+ movdqa %xmm8,32(%rsp)
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ rorl $7,%ebx
+ movl %eax,%esi
+ pslld $2,%xmm7
+ xorl %ecx,%edi
+ roll $5,%eax
+ psrld $30,%xmm10
+ addl %edi,%ebp
+ xorl %ebx,%esi
+ xorl %ecx,%ebx
+ addl %eax,%ebp
+ addl 56(%rsp),%edx
+ andl %ebx,%esi
+ por %xmm10,%xmm7
+ xorl %ecx,%ebx
+ rorl $7,%eax
+ movdqa %xmm7,%xmm8
+ movl %ebp,%edi
+ xorl %ebx,%esi
+ roll $5,%ebp
+ addl %esi,%edx
+ xorl %eax,%edi
+ xorl %ebx,%eax
+ addl %ebp,%edx
+ addl 60(%rsp),%ecx
+ andl %eax,%edi
+ xorl %ebx,%eax
+ rorl $7,%ebp
+ movl %edx,%esi
+ xorl %eax,%edi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %ebp,%esi
+ xorl %eax,%ebp
+ addl %edx,%ecx
+ addl 0(%rsp),%ebx
+ pxor %xmm4,%xmm0
+.byte 102,68,15,58,15,198,8
+ andl %ebp,%esi
+ xorl %eax,%ebp
+ rorl $7,%edx
+ pxor %xmm1,%xmm0
+ movl %ecx,%edi
+ xorl %ebp,%esi
+ movdqa %xmm9,%xmm10
+ paddd %xmm7,%xmm9
+ roll $5,%ecx
+ addl %esi,%ebx
+ pxor %xmm8,%xmm0
+ xorl %edx,%edi
+ xorl %ebp,%edx
+ addl %ecx,%ebx
+ addl 4(%rsp),%eax
+ movdqa %xmm0,%xmm8
+ movdqa %xmm9,48(%rsp)
+ andl %edx,%edi
+ xorl %ebp,%edx
+ rorl $7,%ecx
+ movl %ebx,%esi
+ pslld $2,%xmm0
+ xorl %edx,%edi
+ roll $5,%ebx
+ psrld $30,%xmm8
+ addl %edi,%eax
+ xorl %ecx,%esi
+ xorl %edx,%ecx
+ addl %ebx,%eax
+ addl 8(%rsp),%ebp
+ andl %ecx,%esi
+ por %xmm8,%xmm0
+ xorl %edx,%ecx
+ rorl $7,%ebx
+ movdqa %xmm0,%xmm9
+ movl %eax,%edi
+ xorl %ecx,%esi
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %ebx,%edi
+ xorl %ecx,%ebx
+ addl %eax,%ebp
+ addl 12(%rsp),%edx
+ andl %ebx,%edi
+ xorl %ecx,%ebx
+ rorl $7,%eax
+ movl %ebp,%esi
+ xorl %ebx,%edi
+ roll $5,%ebp
+ addl %edi,%edx
+ xorl %eax,%esi
+ xorl %ebx,%eax
+ addl %ebp,%edx
+ addl 16(%rsp),%ecx
+ pxor %xmm5,%xmm1
+.byte 102,68,15,58,15,207,8
+ andl %eax,%esi
+ xorl %ebx,%eax
+ rorl $7,%ebp
+ pxor %xmm2,%xmm1
+ movl %edx,%edi
+ xorl %eax,%esi
+ movdqa %xmm10,%xmm8
+ paddd %xmm0,%xmm10
+ roll $5,%edx
+ addl %esi,%ecx
+ pxor %xmm9,%xmm1
+ xorl %ebp,%edi
+ xorl %eax,%ebp
+ addl %edx,%ecx
+ addl 20(%rsp),%ebx
+ movdqa %xmm1,%xmm9
+ movdqa %xmm10,0(%rsp)
+ andl %ebp,%edi
+ xorl %eax,%ebp
+ rorl $7,%edx
+ movl %ecx,%esi
+ pslld $2,%xmm1
+ xorl %ebp,%edi
+ roll $5,%ecx
+ psrld $30,%xmm9
+ addl %edi,%ebx
+ xorl %edx,%esi
+ xorl %ebp,%edx
+ addl %ecx,%ebx
+ addl 24(%rsp),%eax
+ andl %edx,%esi
+ por %xmm9,%xmm1
+ xorl %ebp,%edx
+ rorl $7,%ecx
+ movdqa %xmm1,%xmm10
+ movl %ebx,%edi
+ xorl %edx,%esi
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %ecx,%edi
+ xorl %edx,%ecx
+ addl %ebx,%eax
+ addl 28(%rsp),%ebp
+ andl %ecx,%edi
+ xorl %edx,%ecx
+ rorl $7,%ebx
+ movl %eax,%esi
+ xorl %ecx,%edi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %ebx,%esi
+ xorl %ecx,%ebx
+ addl %eax,%ebp
+ addl 32(%rsp),%edx
+ pxor %xmm6,%xmm2
+.byte 102,68,15,58,15,208,8
+ andl %ebx,%esi
+ xorl %ecx,%ebx
+ rorl $7,%eax
+ pxor %xmm3,%xmm2
+ movl %ebp,%edi
+ xorl %ebx,%esi
+ movdqa %xmm8,%xmm9
+ paddd %xmm1,%xmm8
+ roll $5,%ebp
+ addl %esi,%edx
+ pxor %xmm10,%xmm2
+ xorl %eax,%edi
+ xorl %ebx,%eax
+ addl %ebp,%edx
+ addl 36(%rsp),%ecx
+ movdqa %xmm2,%xmm10
+ movdqa %xmm8,16(%rsp)
+ andl %eax,%edi
+ xorl %ebx,%eax
+ rorl $7,%ebp
+ movl %edx,%esi
+ pslld $2,%xmm2
+ xorl %eax,%edi
+ roll $5,%edx
+ psrld $30,%xmm10
+ addl %edi,%ecx
+ xorl %ebp,%esi
+ xorl %eax,%ebp
+ addl %edx,%ecx
+ addl 40(%rsp),%ebx
+ andl %ebp,%esi
+ por %xmm10,%xmm2
+ xorl %eax,%ebp
+ rorl $7,%edx
+ movdqa %xmm2,%xmm8
+ movl %ecx,%edi
+ xorl %ebp,%esi
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %edx,%edi
+ xorl %ebp,%edx
+ addl %ecx,%ebx
+ addl 44(%rsp),%eax
+ andl %edx,%edi
+ xorl %ebp,%edx
+ rorl $7,%ecx
+ movl %ebx,%esi
+ xorl %edx,%edi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %edx,%esi
+ addl %ebx,%eax
+ addl 48(%rsp),%ebp
+ pxor %xmm7,%xmm3
+.byte 102,68,15,58,15,193,8
+ xorl %ecx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ pxor %xmm4,%xmm3
+ addl %esi,%ebp
+ xorl %ecx,%edi
+ movdqa %xmm9,%xmm10
+ paddd %xmm2,%xmm9
+ rorl $7,%ebx
+ addl %eax,%ebp
+ pxor %xmm8,%xmm3
+ addl 52(%rsp),%edx
+ xorl %ebx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ movdqa %xmm3,%xmm8
+ movdqa %xmm9,32(%rsp)
+ addl %edi,%edx
+ xorl %ebx,%esi
+ rorl $7,%eax
+ addl %ebp,%edx
+ pslld $2,%xmm3
+ addl 56(%rsp),%ecx
+ xorl %eax,%esi
+ psrld $30,%xmm8
+ movl %edx,%edi
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %eax,%edi
+ rorl $7,%ebp
+ addl %edx,%ecx
+ por %xmm8,%xmm3
+ addl 60(%rsp),%ebx
+ xorl %ebp,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %ebp,%esi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ addl 0(%rsp),%eax
+ paddd %xmm3,%xmm10
+ xorl %edx,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ addl %esi,%eax
+ movdqa %xmm10,48(%rsp)
+ xorl %edx,%edi
+ rorl $7,%ecx
+ addl %ebx,%eax
+ addl 4(%rsp),%ebp
+ xorl %ecx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %ecx,%esi
+ rorl $7,%ebx
+ addl %eax,%ebp
+ addl 8(%rsp),%edx
+ xorl %ebx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ addl %esi,%edx
+ xorl %ebx,%edi
+ rorl $7,%eax
+ addl %ebp,%edx
+ addl 12(%rsp),%ecx
+ xorl %eax,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %eax,%esi
+ rorl $7,%ebp
+ addl %edx,%ecx
+ cmpq %r10,%r9
+ je L$done_ssse3
+ movdqa 64(%r11),%xmm6
+ movdqa -64(%r11),%xmm9
+ movdqu 0(%r9),%xmm0
+ movdqu 16(%r9),%xmm1
+ movdqu 32(%r9),%xmm2
+ movdqu 48(%r9),%xmm3
+.byte 102,15,56,0,198
+ addq $64,%r9
+ addl 16(%rsp),%ebx
+ xorl %ebp,%esi
+.byte 102,15,56,0,206
+ movl %ecx,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
+ paddd %xmm9,%xmm0
+ xorl %ebp,%edi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ addl 20(%rsp),%eax
+ movdqa %xmm0,0(%rsp)
+ xorl %edx,%edi
+ movl %ebx,%esi
+ psubd %xmm9,%xmm0
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %edx,%esi
+ rorl $7,%ecx
+ addl %ebx,%eax
+ addl 24(%rsp),%ebp
+ xorl %ecx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %ecx,%edi
+ rorl $7,%ebx
+ addl %eax,%ebp
+ addl 28(%rsp),%edx
+ xorl %ebx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ addl %edi,%edx
+ xorl %ebx,%esi
+ rorl $7,%eax
+ addl %ebp,%edx
+ addl 32(%rsp),%ecx
+ xorl %eax,%esi
+.byte 102,15,56,0,214
+ movl %edx,%edi
+ roll $5,%edx
+ addl %esi,%ecx
+ paddd %xmm9,%xmm1
+ xorl %eax,%edi
+ rorl $7,%ebp
+ addl %edx,%ecx
+ addl 36(%rsp),%ebx
+ movdqa %xmm1,16(%rsp)
+ xorl %ebp,%edi
+ movl %ecx,%esi
+ psubd %xmm9,%xmm1
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %ebp,%esi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ addl 40(%rsp),%eax
+ xorl %edx,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %edx,%edi
+ rorl $7,%ecx
+ addl %ebx,%eax
+ addl 44(%rsp),%ebp
+ xorl %ecx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %ecx,%esi
+ rorl $7,%ebx
+ addl %eax,%ebp
+ addl 48(%rsp),%edx
+ xorl %ebx,%esi
+.byte 102,15,56,0,222
+ movl %ebp,%edi
+ roll $5,%ebp
+ addl %esi,%edx
+ paddd %xmm9,%xmm2
+ xorl %ebx,%edi
+ rorl $7,%eax
+ addl %ebp,%edx
+ addl 52(%rsp),%ecx
+ movdqa %xmm2,32(%rsp)
+ xorl %eax,%edi
+ movl %edx,%esi
+ psubd %xmm9,%xmm2
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %eax,%esi
+ rorl $7,%ebp
+ addl %edx,%ecx
+ addl 56(%rsp),%ebx
+ xorl %ebp,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %ebp,%edi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ addl 60(%rsp),%eax
+ xorl %edx,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ rorl $7,%ecx
+ addl %ebx,%eax
+ addl 0(%r8),%eax
+ addl 4(%r8),%esi
+ addl 8(%r8),%ecx
+ addl 12(%r8),%edx
+ movl %eax,0(%r8)
+ addl 16(%r8),%ebp
+ movl %esi,4(%r8)
+ movl %esi,%ebx
+ movl %ecx,8(%r8)
+ movl %ecx,%edi
+ movl %edx,12(%r8)
+ xorl %edx,%edi
+ movl %ebp,16(%r8)
+ andl %edi,%esi
+ jmp L$oop_ssse3
+
+.p2align 4
+L$done_ssse3:
+ addl 16(%rsp),%ebx
+ xorl %ebp,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %ebp,%edi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ addl 20(%rsp),%eax
+ xorl %edx,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ xorl %edx,%esi
+ rorl $7,%ecx
+ addl %ebx,%eax
+ addl 24(%rsp),%ebp
+ xorl %ecx,%esi
+ movl %eax,%edi
+ roll $5,%eax
+ addl %esi,%ebp
+ xorl %ecx,%edi
+ rorl $7,%ebx
+ addl %eax,%ebp
+ addl 28(%rsp),%edx
+ xorl %ebx,%edi
+ movl %ebp,%esi
+ roll $5,%ebp
+ addl %edi,%edx
+ xorl %ebx,%esi
+ rorl $7,%eax
+ addl %ebp,%edx
+ addl 32(%rsp),%ecx
+ xorl %eax,%esi
+ movl %edx,%edi
+ roll $5,%edx
+ addl %esi,%ecx
+ xorl %eax,%edi
+ rorl $7,%ebp
+ addl %edx,%ecx
+ addl 36(%rsp),%ebx
+ xorl %ebp,%edi
+ movl %ecx,%esi
+ roll $5,%ecx
+ addl %edi,%ebx
+ xorl %ebp,%esi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ addl 40(%rsp),%eax
+ xorl %edx,%esi
+ movl %ebx,%edi
+ roll $5,%ebx
+ addl %esi,%eax
+ xorl %edx,%edi
+ rorl $7,%ecx
+ addl %ebx,%eax
+ addl 44(%rsp),%ebp
+ xorl %ecx,%edi
+ movl %eax,%esi
+ roll $5,%eax
+ addl %edi,%ebp
+ xorl %ecx,%esi
+ rorl $7,%ebx
+ addl %eax,%ebp
+ addl 48(%rsp),%edx
+ xorl %ebx,%esi
+ movl %ebp,%edi
+ roll $5,%ebp
+ addl %esi,%edx
+ xorl %ebx,%edi
+ rorl $7,%eax
+ addl %ebp,%edx
+ addl 52(%rsp),%ecx
+ xorl %eax,%edi
+ movl %edx,%esi
+ roll $5,%edx
+ addl %edi,%ecx
+ xorl %eax,%esi
+ rorl $7,%ebp
+ addl %edx,%ecx
+ addl 56(%rsp),%ebx
+ xorl %ebp,%esi
+ movl %ecx,%edi
+ roll $5,%ecx
+ addl %esi,%ebx
+ xorl %ebp,%edi
+ rorl $7,%edx
+ addl %ecx,%ebx
+ addl 60(%rsp),%eax
+ xorl %edx,%edi
+ movl %ebx,%esi
+ roll $5,%ebx
+ addl %edi,%eax
+ rorl $7,%ecx
+ addl %ebx,%eax
+ addl 0(%r8),%eax
+ addl 4(%r8),%esi
+ addl 8(%r8),%ecx
+ movl %eax,0(%r8)
+ addl 12(%r8),%edx
+ movl %esi,4(%r8)
+ addl 16(%r8),%ebp
+ movl %ecx,8(%r8)
+ movl %edx,12(%r8)
+ movl %ebp,16(%r8)
+ leaq 64(%rsp),%rsi
+ movq 0(%rsi),%r12
+ movq 8(%rsi),%rbp
+ movq 16(%rsi),%rbx
+ leaq 24(%rsi),%rsp
+L$epilogue_ssse3:
+ .byte 0xf3,0xc3
+
+.p2align 6
+K_XX_XX:
+.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999
+.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999
+.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align 6
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/macosx/sha256-avx-x86_64.s b/lib/accelerated/x86/macosx/sha256-avx-x86_64.s
new file mode 100644
index 0000000000..bd53138cf6
--- /dev/null
+++ b/lib/accelerated/x86/macosx/sha256-avx-x86_64.s
@@ -0,0 +1,2614 @@
+# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain copyright notices,
+# this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+# * Neither the name of the Andy Polyakov nor the names of its
+# copyright holder and contributors may be used to endorse or
+# promote products derived from this software without specific
+# prior written permission.
+#
+# ALTERNATIVELY, provided that this notice is retained in full, this
+# product may be distributed under the terms of the GNU General Public
+# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+# those given above.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *** This file is auto-generated ***
+#
+.text
+
+
+
+.globl _sha256_multi_block
+
+.p2align 5
+_sha256_multi_block:
+ movq %rsp,%rax
+ pushq %rbx
+ pushq %rbp
+ subq $288,%rsp
+ andq $-256,%rsp
+ movq %rax,272(%rsp)
+ leaq K256+128(%rip),%rbp
+ leaq 256(%rsp),%rbx
+ leaq 128(%rdi),%rdi
+
+L$oop_grande:
+ movl %edx,280(%rsp)
+ xorl %edx,%edx
+ movq 0(%rsi),%r8
+ movl 8(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,0(%rbx)
+ cmovleq %rbp,%r8
+ movq 16(%rsi),%r9
+ movl 24(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,4(%rbx)
+ cmovleq %rbp,%r9
+ movq 32(%rsi),%r10
+ movl 40(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,8(%rbx)
+ cmovleq %rbp,%r10
+ movq 48(%rsi),%r11
+ movl 56(%rsi),%ecx
+ cmpl %edx,%ecx
+ cmovgl %ecx,%edx
+ testl %ecx,%ecx
+ movl %ecx,12(%rbx)
+ cmovleq %rbp,%r11
+ testl %edx,%edx
+ jz L$done
+
+ movdqu 0-128(%rdi),%xmm8
+ leaq 128(%rsp),%rax
+ movdqu 32-128(%rdi),%xmm9
+ movdqu 64-128(%rdi),%xmm10
+ movdqu 96-128(%rdi),%xmm11
+ movdqu 128-128(%rdi),%xmm12
+ movdqu 160-128(%rdi),%xmm13
+ movdqu 192-128(%rdi),%xmm14
+ movdqu 224-128(%rdi),%xmm15
+ movdqu L$pbswap(%rip),%xmm6
+ jmp L$oop
+
+.p2align 5
+L$oop:
+ movdqa %xmm10,%xmm4
+ pxor %xmm9,%xmm4
+ movd 0(%r8),%xmm5
+ movd 0(%r9),%xmm0
+ movd 0(%r10),%xmm1
+ movd 0(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm12,%xmm7
+ movdqa %xmm12,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm12,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,0-128(%rax)
+ paddd %xmm15,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -128(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm12,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm12,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm14,%xmm0
+ pand %xmm13,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm8,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm8,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm9,%xmm3
+ movdqa %xmm8,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm8,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm9,%xmm15
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm15
+ paddd %xmm5,%xmm11
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm15
+ paddd %xmm7,%xmm15
+ movd 4(%r8),%xmm5
+ movd 4(%r9),%xmm0
+ movd 4(%r10),%xmm1
+ movd 4(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm11,%xmm7
+ movdqa %xmm11,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm11,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,16-128(%rax)
+ paddd %xmm14,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -96(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm11,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm11,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm13,%xmm0
+ pand %xmm12,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm15,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm15,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm4,%xmm0
+ movdqa %xmm8,%xmm4
+ movdqa %xmm15,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm15,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm8,%xmm14
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm14
+ paddd %xmm5,%xmm10
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm14
+ paddd %xmm7,%xmm14
+ movd 8(%r8),%xmm5
+ movd 8(%r9),%xmm0
+ movd 8(%r10),%xmm1
+ movd 8(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm10,%xmm7
+ movdqa %xmm10,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm10,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,32-128(%rax)
+ paddd %xmm13,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -64(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm10,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm10,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm12,%xmm0
+ pand %xmm11,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm14,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm14,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm15,%xmm3
+ movdqa %xmm14,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm14,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm15,%xmm13
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm13
+ paddd %xmm5,%xmm9
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm13
+ paddd %xmm7,%xmm13
+ movd 12(%r8),%xmm5
+ movd 12(%r9),%xmm0
+ movd 12(%r10),%xmm1
+ movd 12(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm9,%xmm7
+ movdqa %xmm9,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm9,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,48-128(%rax)
+ paddd %xmm12,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -32(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm9,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm9,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm11,%xmm0
+ pand %xmm10,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm13,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm13,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm4,%xmm0
+ movdqa %xmm14,%xmm4
+ movdqa %xmm13,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm13,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm14,%xmm12
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm12
+ paddd %xmm5,%xmm8
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm12
+ paddd %xmm7,%xmm12
+ movd 16(%r8),%xmm5
+ movd 16(%r9),%xmm0
+ movd 16(%r10),%xmm1
+ movd 16(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm8,%xmm7
+ movdqa %xmm8,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm8,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,64-128(%rax)
+ paddd %xmm11,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 0(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm8,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm8,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm10,%xmm0
+ pand %xmm9,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm12,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm12,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm13,%xmm3
+ movdqa %xmm12,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm12,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm13,%xmm11
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm11
+ paddd %xmm5,%xmm15
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm11
+ paddd %xmm7,%xmm11
+ movd 20(%r8),%xmm5
+ movd 20(%r9),%xmm0
+ movd 20(%r10),%xmm1
+ movd 20(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm15,%xmm7
+ movdqa %xmm15,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm15,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,80-128(%rax)
+ paddd %xmm10,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 32(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm15,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm15,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm9,%xmm0
+ pand %xmm8,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm11,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm11,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm4,%xmm0
+ movdqa %xmm12,%xmm4
+ movdqa %xmm11,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm11,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm12,%xmm10
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm10
+ paddd %xmm5,%xmm14
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm10
+ paddd %xmm7,%xmm10
+ movd 24(%r8),%xmm5
+ movd 24(%r9),%xmm0
+ movd 24(%r10),%xmm1
+ movd 24(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm14,%xmm7
+ movdqa %xmm14,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm14,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,96-128(%rax)
+ paddd %xmm9,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 64(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm14,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm14,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm8,%xmm0
+ pand %xmm15,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm10,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm10,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm11,%xmm3
+ movdqa %xmm10,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm10,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm11,%xmm9
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm9
+ paddd %xmm5,%xmm13
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm9
+ paddd %xmm7,%xmm9
+ movd 28(%r8),%xmm5
+ movd 28(%r9),%xmm0
+ movd 28(%r10),%xmm1
+ movd 28(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm13,%xmm7
+ movdqa %xmm13,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm13,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,112-128(%rax)
+ paddd %xmm8,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 96(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm13,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm13,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm15,%xmm0
+ pand %xmm14,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm9,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm9,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm4,%xmm0
+ movdqa %xmm10,%xmm4
+ movdqa %xmm9,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm9,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm10,%xmm8
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm8
+ paddd %xmm5,%xmm12
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm8
+ paddd %xmm7,%xmm8
+ leaq 256(%rbp),%rbp
+ movd 32(%r8),%xmm5
+ movd 32(%r9),%xmm0
+ movd 32(%r10),%xmm1
+ movd 32(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm12,%xmm7
+ movdqa %xmm12,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm12,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,128-128(%rax)
+ paddd %xmm15,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -128(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm12,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm12,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm14,%xmm0
+ pand %xmm13,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm8,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm8,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm9,%xmm3
+ movdqa %xmm8,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm8,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm9,%xmm15
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm15
+ paddd %xmm5,%xmm11
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm15
+ paddd %xmm7,%xmm15
+ movd 36(%r8),%xmm5
+ movd 36(%r9),%xmm0
+ movd 36(%r10),%xmm1
+ movd 36(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm11,%xmm7
+ movdqa %xmm11,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm11,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,144-128(%rax)
+ paddd %xmm14,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -96(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm11,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm11,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm13,%xmm0
+ pand %xmm12,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm15,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm15,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm4,%xmm0
+ movdqa %xmm8,%xmm4
+ movdqa %xmm15,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm15,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm8,%xmm14
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm14
+ paddd %xmm5,%xmm10
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm14
+ paddd %xmm7,%xmm14
+ movd 40(%r8),%xmm5
+ movd 40(%r9),%xmm0
+ movd 40(%r10),%xmm1
+ movd 40(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm10,%xmm7
+ movdqa %xmm10,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm10,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,160-128(%rax)
+ paddd %xmm13,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -64(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm10,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm10,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm12,%xmm0
+ pand %xmm11,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm14,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm14,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm15,%xmm3
+ movdqa %xmm14,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm14,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm15,%xmm13
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm13
+ paddd %xmm5,%xmm9
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm13
+ paddd %xmm7,%xmm13
+ movd 44(%r8),%xmm5
+ movd 44(%r9),%xmm0
+ movd 44(%r10),%xmm1
+ movd 44(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm9,%xmm7
+ movdqa %xmm9,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm9,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,176-128(%rax)
+ paddd %xmm12,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -32(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm9,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm9,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm11,%xmm0
+ pand %xmm10,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm13,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm13,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm4,%xmm0
+ movdqa %xmm14,%xmm4
+ movdqa %xmm13,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm13,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm14,%xmm12
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm12
+ paddd %xmm5,%xmm8
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm12
+ paddd %xmm7,%xmm12
+ movd 48(%r8),%xmm5
+ movd 48(%r9),%xmm0
+ movd 48(%r10),%xmm1
+ movd 48(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm8,%xmm7
+ movdqa %xmm8,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm8,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,192-128(%rax)
+ paddd %xmm11,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 0(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm8,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm8,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm10,%xmm0
+ pand %xmm9,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm12,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm12,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm13,%xmm3
+ movdqa %xmm12,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm12,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm13,%xmm11
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm11
+ paddd %xmm5,%xmm15
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm11
+ paddd %xmm7,%xmm11
+ movd 52(%r8),%xmm5
+ movd 52(%r9),%xmm0
+ movd 52(%r10),%xmm1
+ movd 52(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm15,%xmm7
+ movdqa %xmm15,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm15,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,208-128(%rax)
+ paddd %xmm10,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 32(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm15,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm15,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm9,%xmm0
+ pand %xmm8,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm11,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm11,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm4,%xmm0
+ movdqa %xmm12,%xmm4
+ movdqa %xmm11,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm11,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm12,%xmm10
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm10
+ paddd %xmm5,%xmm14
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm10
+ paddd %xmm7,%xmm10
+ movd 56(%r8),%xmm5
+ movd 56(%r9),%xmm0
+ movd 56(%r10),%xmm1
+ movd 56(%r11),%xmm2
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm14,%xmm7
+ movdqa %xmm14,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm14,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,224-128(%rax)
+ paddd %xmm9,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 64(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm14,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm14,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm8,%xmm0
+ pand %xmm15,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm10,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm10,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm11,%xmm3
+ movdqa %xmm10,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm10,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm11,%xmm9
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm9
+ paddd %xmm5,%xmm13
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm9
+ paddd %xmm7,%xmm9
+ movd 60(%r8),%xmm5
+ leaq 64(%r8),%r8
+ movd 60(%r9),%xmm0
+ leaq 64(%r9),%r9
+ movd 60(%r10),%xmm1
+ leaq 64(%r10),%r10
+ movd 60(%r11),%xmm2
+ leaq 64(%r11),%r11
+ punpckldq %xmm1,%xmm5
+ punpckldq %xmm2,%xmm0
+ punpckldq %xmm0,%xmm5
+.byte 102,15,56,0,238
+ movdqa %xmm13,%xmm7
+ movdqa %xmm13,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm13,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,240-128(%rax)
+ paddd %xmm8,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 96(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm13,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm13,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm15,%xmm0
+ pand %xmm14,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm9,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm9,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm4,%xmm0
+ movdqa %xmm10,%xmm4
+ movdqa %xmm9,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm9,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm10,%xmm8
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm8
+ paddd %xmm5,%xmm12
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm8
+ paddd %xmm7,%xmm8
+ leaq 256(%rbp),%rbp
+ movdqu 0-128(%rax),%xmm5
+ movl $3,%ecx
+ jmp L$oop_16_xx
+.p2align 5
+L$oop_16_xx:
+ movdqa 16-128(%rax),%xmm6
+ paddd 144-128(%rax),%xmm5
+
+ movdqa %xmm6,%xmm7
+ movdqa %xmm6,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm6,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 224-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm3,%xmm1
+
+ psrld $17,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ psrld $19-17,%xmm3
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm3,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm5
+ movdqa %xmm12,%xmm7
+ movdqa %xmm12,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm12,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,0-128(%rax)
+ paddd %xmm15,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -128(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm12,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm12,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm14,%xmm0
+ pand %xmm13,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm8,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm8,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm9,%xmm3
+ movdqa %xmm8,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm8,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm9,%xmm15
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm15
+ paddd %xmm5,%xmm11
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm15
+ paddd %xmm7,%xmm15
+ movdqa 32-128(%rax),%xmm5
+ paddd 160-128(%rax),%xmm6
+
+ movdqa %xmm5,%xmm7
+ movdqa %xmm5,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm5,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 240-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm4,%xmm1
+
+ psrld $17,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ psrld $19-17,%xmm4
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm6
+ movdqa %xmm11,%xmm7
+ movdqa %xmm11,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm11,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm6,16-128(%rax)
+ paddd %xmm14,%xmm6
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -96(%rbp),%xmm6
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm11,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm11,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm13,%xmm0
+ pand %xmm12,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm15,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm15,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ movdqa %xmm8,%xmm4
+ movdqa %xmm15,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm15,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm6
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm8,%xmm14
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm14
+ paddd %xmm6,%xmm10
+ pxor %xmm2,%xmm7
+
+ paddd %xmm6,%xmm14
+ paddd %xmm7,%xmm14
+ movdqa 48-128(%rax),%xmm6
+ paddd 176-128(%rax),%xmm5
+
+ movdqa %xmm6,%xmm7
+ movdqa %xmm6,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm6,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 0-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm3,%xmm1
+
+ psrld $17,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ psrld $19-17,%xmm3
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm3,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm5
+ movdqa %xmm10,%xmm7
+ movdqa %xmm10,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm10,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,32-128(%rax)
+ paddd %xmm13,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -64(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm10,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm10,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm12,%xmm0
+ pand %xmm11,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm14,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm14,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm15,%xmm3
+ movdqa %xmm14,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm14,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm15,%xmm13
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm13
+ paddd %xmm5,%xmm9
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm13
+ paddd %xmm7,%xmm13
+ movdqa 64-128(%rax),%xmm5
+ paddd 192-128(%rax),%xmm6
+
+ movdqa %xmm5,%xmm7
+ movdqa %xmm5,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm5,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 16-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm4,%xmm1
+
+ psrld $17,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ psrld $19-17,%xmm4
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm6
+ movdqa %xmm9,%xmm7
+ movdqa %xmm9,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm9,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm6,48-128(%rax)
+ paddd %xmm12,%xmm6
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -32(%rbp),%xmm6
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm9,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm9,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm11,%xmm0
+ pand %xmm10,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm13,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm13,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ movdqa %xmm14,%xmm4
+ movdqa %xmm13,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm13,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm6
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm14,%xmm12
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm12
+ paddd %xmm6,%xmm8
+ pxor %xmm2,%xmm7
+
+ paddd %xmm6,%xmm12
+ paddd %xmm7,%xmm12
+ movdqa 80-128(%rax),%xmm6
+ paddd 208-128(%rax),%xmm5
+
+ movdqa %xmm6,%xmm7
+ movdqa %xmm6,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm6,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 32-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm3,%xmm1
+
+ psrld $17,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ psrld $19-17,%xmm3
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm3,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm5
+ movdqa %xmm8,%xmm7
+ movdqa %xmm8,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm8,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,64-128(%rax)
+ paddd %xmm11,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 0(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm8,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm8,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm10,%xmm0
+ pand %xmm9,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm12,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm12,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm13,%xmm3
+ movdqa %xmm12,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm12,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm13,%xmm11
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm11
+ paddd %xmm5,%xmm15
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm11
+ paddd %xmm7,%xmm11
+ movdqa 96-128(%rax),%xmm5
+ paddd 224-128(%rax),%xmm6
+
+ movdqa %xmm5,%xmm7
+ movdqa %xmm5,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm5,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 48-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm4,%xmm1
+
+ psrld $17,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ psrld $19-17,%xmm4
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm6
+ movdqa %xmm15,%xmm7
+ movdqa %xmm15,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm15,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm6,80-128(%rax)
+ paddd %xmm10,%xmm6
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 32(%rbp),%xmm6
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm15,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm15,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm9,%xmm0
+ pand %xmm8,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm11,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm11,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ movdqa %xmm12,%xmm4
+ movdqa %xmm11,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm11,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm6
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm12,%xmm10
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm10
+ paddd %xmm6,%xmm14
+ pxor %xmm2,%xmm7
+
+ paddd %xmm6,%xmm10
+ paddd %xmm7,%xmm10
+ movdqa 112-128(%rax),%xmm6
+ paddd 240-128(%rax),%xmm5
+
+ movdqa %xmm6,%xmm7
+ movdqa %xmm6,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm6,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 64-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm3,%xmm1
+
+ psrld $17,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ psrld $19-17,%xmm3
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm3,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm5
+ movdqa %xmm14,%xmm7
+ movdqa %xmm14,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm14,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,96-128(%rax)
+ paddd %xmm9,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 64(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm14,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm14,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm8,%xmm0
+ pand %xmm15,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm10,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm10,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm11,%xmm3
+ movdqa %xmm10,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm10,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm11,%xmm9
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm9
+ paddd %xmm5,%xmm13
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm9
+ paddd %xmm7,%xmm9
+ movdqa 128-128(%rax),%xmm5
+ paddd 0-128(%rax),%xmm6
+
+ movdqa %xmm5,%xmm7
+ movdqa %xmm5,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm5,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 80-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm4,%xmm1
+
+ psrld $17,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ psrld $19-17,%xmm4
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm6
+ movdqa %xmm13,%xmm7
+ movdqa %xmm13,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm13,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm6,112-128(%rax)
+ paddd %xmm8,%xmm6
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 96(%rbp),%xmm6
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm13,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm13,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm15,%xmm0
+ pand %xmm14,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm9,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm9,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ movdqa %xmm10,%xmm4
+ movdqa %xmm9,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm9,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm6
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm10,%xmm8
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm8
+ paddd %xmm6,%xmm12
+ pxor %xmm2,%xmm7
+
+ paddd %xmm6,%xmm8
+ paddd %xmm7,%xmm8
+ leaq 256(%rbp),%rbp
+ movdqa 144-128(%rax),%xmm6
+ paddd 16-128(%rax),%xmm5
+
+ movdqa %xmm6,%xmm7
+ movdqa %xmm6,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm6,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 96-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm3,%xmm1
+
+ psrld $17,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ psrld $19-17,%xmm3
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm3,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm5
+ movdqa %xmm12,%xmm7
+ movdqa %xmm12,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm12,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,128-128(%rax)
+ paddd %xmm15,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -128(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm12,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm12,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm14,%xmm0
+ pand %xmm13,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm8,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm8,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm9,%xmm3
+ movdqa %xmm8,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm8,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm9,%xmm15
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm15
+ paddd %xmm5,%xmm11
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm15
+ paddd %xmm7,%xmm15
+ movdqa 160-128(%rax),%xmm5
+ paddd 32-128(%rax),%xmm6
+
+ movdqa %xmm5,%xmm7
+ movdqa %xmm5,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm5,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 112-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm4,%xmm1
+
+ psrld $17,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ psrld $19-17,%xmm4
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm6
+ movdqa %xmm11,%xmm7
+ movdqa %xmm11,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm11,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm6,144-128(%rax)
+ paddd %xmm14,%xmm6
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -96(%rbp),%xmm6
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm11,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm11,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm13,%xmm0
+ pand %xmm12,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm15,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm15,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ movdqa %xmm8,%xmm4
+ movdqa %xmm15,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm15,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm6
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm8,%xmm14
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm14
+ paddd %xmm6,%xmm10
+ pxor %xmm2,%xmm7
+
+ paddd %xmm6,%xmm14
+ paddd %xmm7,%xmm14
+ movdqa 176-128(%rax),%xmm6
+ paddd 48-128(%rax),%xmm5
+
+ movdqa %xmm6,%xmm7
+ movdqa %xmm6,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm6,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 128-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm3,%xmm1
+
+ psrld $17,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ psrld $19-17,%xmm3
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm3,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm5
+ movdqa %xmm10,%xmm7
+ movdqa %xmm10,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm10,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,160-128(%rax)
+ paddd %xmm13,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -64(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm10,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm10,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm12,%xmm0
+ pand %xmm11,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm14,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm14,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm15,%xmm3
+ movdqa %xmm14,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm14,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm15,%xmm13
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm13
+ paddd %xmm5,%xmm9
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm13
+ paddd %xmm7,%xmm13
+ movdqa 192-128(%rax),%xmm5
+ paddd 64-128(%rax),%xmm6
+
+ movdqa %xmm5,%xmm7
+ movdqa %xmm5,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm5,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 144-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm4,%xmm1
+
+ psrld $17,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ psrld $19-17,%xmm4
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm6
+ movdqa %xmm9,%xmm7
+ movdqa %xmm9,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm9,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm6,176-128(%rax)
+ paddd %xmm12,%xmm6
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd -32(%rbp),%xmm6
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm9,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm9,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm11,%xmm0
+ pand %xmm10,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm13,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm13,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ movdqa %xmm14,%xmm4
+ movdqa %xmm13,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm13,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm6
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm14,%xmm12
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm12
+ paddd %xmm6,%xmm8
+ pxor %xmm2,%xmm7
+
+ paddd %xmm6,%xmm12
+ paddd %xmm7,%xmm12
+ movdqa 208-128(%rax),%xmm6
+ paddd 80-128(%rax),%xmm5
+
+ movdqa %xmm6,%xmm7
+ movdqa %xmm6,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm6,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 160-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm3,%xmm1
+
+ psrld $17,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ psrld $19-17,%xmm3
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm3,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm5
+ movdqa %xmm8,%xmm7
+ movdqa %xmm8,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm8,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,192-128(%rax)
+ paddd %xmm11,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 0(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm8,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm8,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm10,%xmm0
+ pand %xmm9,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm12,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm12,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm13,%xmm3
+ movdqa %xmm12,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm12,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm13,%xmm11
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm11
+ paddd %xmm5,%xmm15
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm11
+ paddd %xmm7,%xmm11
+ movdqa 224-128(%rax),%xmm5
+ paddd 96-128(%rax),%xmm6
+
+ movdqa %xmm5,%xmm7
+ movdqa %xmm5,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm5,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 176-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm4,%xmm1
+
+ psrld $17,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ psrld $19-17,%xmm4
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm6
+ movdqa %xmm15,%xmm7
+ movdqa %xmm15,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm15,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm6,208-128(%rax)
+ paddd %xmm10,%xmm6
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 32(%rbp),%xmm6
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm15,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm15,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm9,%xmm0
+ pand %xmm8,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm11,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm11,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ movdqa %xmm12,%xmm4
+ movdqa %xmm11,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm11,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm6
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm12,%xmm10
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm10
+ paddd %xmm6,%xmm14
+ pxor %xmm2,%xmm7
+
+ paddd %xmm6,%xmm10
+ paddd %xmm7,%xmm10
+ movdqa 240-128(%rax),%xmm6
+ paddd 112-128(%rax),%xmm5
+
+ movdqa %xmm6,%xmm7
+ movdqa %xmm6,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm6,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 192-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm3,%xmm1
+
+ psrld $17,%xmm3
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ psrld $19-17,%xmm3
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm3,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm5
+ movdqa %xmm14,%xmm7
+ movdqa %xmm14,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm14,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm5,224-128(%rax)
+ paddd %xmm9,%xmm5
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 64(%rbp),%xmm5
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm14,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm14,%xmm3
+ pslld $26-21,%xmm2
+ pandn %xmm8,%xmm0
+ pand %xmm15,%xmm3
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm10,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm10,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm5
+ pxor %xmm3,%xmm0
+ movdqa %xmm11,%xmm3
+ movdqa %xmm10,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm10,%xmm3
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm5
+ pslld $19-10,%xmm2
+ pand %xmm3,%xmm4
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm11,%xmm9
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm4,%xmm9
+ paddd %xmm5,%xmm13
+ pxor %xmm2,%xmm7
+
+ paddd %xmm5,%xmm9
+ paddd %xmm7,%xmm9
+ movdqa 0-128(%rax),%xmm5
+ paddd 128-128(%rax),%xmm6
+
+ movdqa %xmm5,%xmm7
+ movdqa %xmm5,%xmm1
+ psrld $3,%xmm7
+ movdqa %xmm5,%xmm2
+
+ psrld $7,%xmm1
+ movdqa 208-128(%rax),%xmm0
+ pslld $14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $18-7,%xmm1
+ movdqa %xmm0,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $25-14,%xmm2
+ pxor %xmm1,%xmm7
+ psrld $10,%xmm0
+ movdqa %xmm4,%xmm1
+
+ psrld $17,%xmm4
+ pxor %xmm2,%xmm7
+ pslld $13,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ psrld $19-17,%xmm4
+ pxor %xmm1,%xmm0
+ pslld $15-13,%xmm1
+ pxor %xmm4,%xmm0
+ pxor %xmm1,%xmm0
+ paddd %xmm0,%xmm6
+ movdqa %xmm13,%xmm7
+ movdqa %xmm13,%xmm2
+ psrld $6,%xmm7
+ movdqa %xmm13,%xmm1
+ pslld $7,%xmm2
+ movdqa %xmm6,240-128(%rax)
+ paddd %xmm8,%xmm6
+
+ psrld $11,%xmm1
+ pxor %xmm2,%xmm7
+ pslld $21-7,%xmm2
+ paddd 96(%rbp),%xmm6
+ pxor %xmm1,%xmm7
+
+ psrld $25-11,%xmm1
+ movdqa %xmm13,%xmm0
+ pxor %xmm2,%xmm7
+ movdqa %xmm13,%xmm4
+ pslld $26-21,%xmm2
+ pandn %xmm15,%xmm0
+ pand %xmm14,%xmm4
+ pxor %xmm1,%xmm7
+
+ movdqa %xmm9,%xmm1
+ pxor %xmm2,%xmm7
+ movdqa %xmm9,%xmm2
+ psrld $2,%xmm1
+ paddd %xmm7,%xmm6
+ pxor %xmm4,%xmm0
+ movdqa %xmm10,%xmm4
+ movdqa %xmm9,%xmm7
+ pslld $10,%xmm2
+ pxor %xmm9,%xmm4
+
+ psrld $13,%xmm7
+ pxor %xmm2,%xmm1
+ paddd %xmm0,%xmm6
+ pslld $19-10,%xmm2
+ pand %xmm4,%xmm3
+ pxor %xmm7,%xmm1
+
+ psrld $22-13,%xmm7
+ pxor %xmm2,%xmm1
+ movdqa %xmm10,%xmm8
+ pslld $30-19,%xmm2
+ pxor %xmm1,%xmm7
+ pxor %xmm3,%xmm8
+ paddd %xmm6,%xmm12
+ pxor %xmm2,%xmm7
+
+ paddd %xmm6,%xmm8
+ paddd %xmm7,%xmm8
+ leaq 256(%rbp),%rbp
+ decl %ecx
+ jnz L$oop_16_xx
+
+ movl $1,%ecx
+ leaq K256+128(%rip),%rbp
+
+ movdqa (%rbx),%xmm7
+ cmpl 0(%rbx),%ecx
+ pxor %xmm0,%xmm0
+ cmovgeq %rbp,%r8
+ cmpl 4(%rbx),%ecx
+ movdqa %xmm7,%xmm6
+ cmovgeq %rbp,%r9
+ cmpl 8(%rbx),%ecx
+ pcmpgtd %xmm0,%xmm6
+ cmovgeq %rbp,%r10
+ cmpl 12(%rbx),%ecx
+ paddd %xmm6,%xmm7
+ cmovgeq %rbp,%r11
+
+ movdqu 0-128(%rdi),%xmm0
+ pand %xmm6,%xmm8
+ movdqu 32-128(%rdi),%xmm1
+ pand %xmm6,%xmm9
+ movdqu 64-128(%rdi),%xmm2
+ pand %xmm6,%xmm10
+ movdqu 96-128(%rdi),%xmm5
+ pand %xmm6,%xmm11
+ paddd %xmm0,%xmm8
+ movdqu 128-128(%rdi),%xmm0
+ pand %xmm6,%xmm12
+ paddd %xmm1,%xmm9
+ movdqu 160-128(%rdi),%xmm1
+ pand %xmm6,%xmm13
+ paddd %xmm2,%xmm10
+ movdqu 192-128(%rdi),%xmm2
+ pand %xmm6,%xmm14
+ paddd %xmm5,%xmm11
+ movdqu 224-128(%rdi),%xmm5
+ pand %xmm6,%xmm15
+ paddd %xmm0,%xmm12
+ paddd %xmm1,%xmm13
+ movdqu %xmm8,0-128(%rdi)
+ paddd %xmm2,%xmm14
+ movdqu %xmm9,32-128(%rdi)
+ paddd %xmm5,%xmm15
+ movdqu %xmm10,64-128(%rdi)
+ movdqu %xmm11,96-128(%rdi)
+ movdqu %xmm12,128-128(%rdi)
+ movdqu %xmm13,160-128(%rdi)
+ movdqu %xmm14,192-128(%rdi)
+ movdqu %xmm15,224-128(%rdi)
+
+ movdqa %xmm7,(%rbx)
+ movdqa L$pbswap(%rip),%xmm6
+ decl %edx
+ jnz L$oop
+
+ movl 280(%rsp),%edx
+ leaq 16(%rdi),%rdi
+ leaq 64(%rsi),%rsi
+ decl %edx
+ jnz L$oop_grande
+
+L$done:
+ movq 272(%rsp),%rax
+ movq -16(%rax),%rbp
+ movq -8(%rax),%rbx
+ leaq (%rax),%rsp
+ .byte 0xf3,0xc3
+
+.p2align 8
+K256:
+.long 1116352408,1116352408,1116352408,1116352408
+.long 1116352408,1116352408,1116352408,1116352408
+.long 1899447441,1899447441,1899447441,1899447441
+.long 1899447441,1899447441,1899447441,1899447441
+.long 3049323471,3049323471,3049323471,3049323471
+.long 3049323471,3049323471,3049323471,3049323471
+.long 3921009573,3921009573,3921009573,3921009573
+.long 3921009573,3921009573,3921009573,3921009573
+.long 961987163,961987163,961987163,961987163
+.long 961987163,961987163,961987163,961987163
+.long 1508970993,1508970993,1508970993,1508970993
+.long 1508970993,1508970993,1508970993,1508970993
+.long 2453635748,2453635748,2453635748,2453635748
+.long 2453635748,2453635748,2453635748,2453635748
+.long 2870763221,2870763221,2870763221,2870763221
+.long 2870763221,2870763221,2870763221,2870763221
+.long 3624381080,3624381080,3624381080,3624381080
+.long 3624381080,3624381080,3624381080,3624381080
+.long 310598401,310598401,310598401,310598401
+.long 310598401,310598401,310598401,310598401
+.long 607225278,607225278,607225278,607225278
+.long 607225278,607225278,607225278,607225278
+.long 1426881987,1426881987,1426881987,1426881987
+.long 1426881987,1426881987,1426881987,1426881987
+.long 1925078388,1925078388,1925078388,1925078388
+.long 1925078388,1925078388,1925078388,1925078388
+.long 2162078206,2162078206,2162078206,2162078206
+.long 2162078206,2162078206,2162078206,2162078206
+.long 2614888103,2614888103,2614888103,2614888103
+.long 2614888103,2614888103,2614888103,2614888103
+.long 3248222580,3248222580,3248222580,3248222580
+.long 3248222580,3248222580,3248222580,3248222580
+.long 3835390401,3835390401,3835390401,3835390401
+.long 3835390401,3835390401,3835390401,3835390401
+.long 4022224774,4022224774,4022224774,4022224774
+.long 4022224774,4022224774,4022224774,4022224774
+.long 264347078,264347078,264347078,264347078
+.long 264347078,264347078,264347078,264347078
+.long 604807628,604807628,604807628,604807628
+.long 604807628,604807628,604807628,604807628
+.long 770255983,770255983,770255983,770255983
+.long 770255983,770255983,770255983,770255983
+.long 1249150122,1249150122,1249150122,1249150122
+.long 1249150122,1249150122,1249150122,1249150122
+.long 1555081692,1555081692,1555081692,1555081692
+.long 1555081692,1555081692,1555081692,1555081692
+.long 1996064986,1996064986,1996064986,1996064986
+.long 1996064986,1996064986,1996064986,1996064986
+.long 2554220882,2554220882,2554220882,2554220882
+.long 2554220882,2554220882,2554220882,2554220882
+.long 2821834349,2821834349,2821834349,2821834349
+.long 2821834349,2821834349,2821834349,2821834349
+.long 2952996808,2952996808,2952996808,2952996808
+.long 2952996808,2952996808,2952996808,2952996808
+.long 3210313671,3210313671,3210313671,3210313671
+.long 3210313671,3210313671,3210313671,3210313671
+.long 3336571891,3336571891,3336571891,3336571891
+.long 3336571891,3336571891,3336571891,3336571891
+.long 3584528711,3584528711,3584528711,3584528711
+.long 3584528711,3584528711,3584528711,3584528711
+.long 113926993,113926993,113926993,113926993
+.long 113926993,113926993,113926993,113926993
+.long 338241895,338241895,338241895,338241895
+.long 338241895,338241895,338241895,338241895
+.long 666307205,666307205,666307205,666307205
+.long 666307205,666307205,666307205,666307205
+.long 773529912,773529912,773529912,773529912
+.long 773529912,773529912,773529912,773529912
+.long 1294757372,1294757372,1294757372,1294757372
+.long 1294757372,1294757372,1294757372,1294757372
+.long 1396182291,1396182291,1396182291,1396182291
+.long 1396182291,1396182291,1396182291,1396182291
+.long 1695183700,1695183700,1695183700,1695183700
+.long 1695183700,1695183700,1695183700,1695183700
+.long 1986661051,1986661051,1986661051,1986661051
+.long 1986661051,1986661051,1986661051,1986661051
+.long 2177026350,2177026350,2177026350,2177026350
+.long 2177026350,2177026350,2177026350,2177026350
+.long 2456956037,2456956037,2456956037,2456956037
+.long 2456956037,2456956037,2456956037,2456956037
+.long 2730485921,2730485921,2730485921,2730485921
+.long 2730485921,2730485921,2730485921,2730485921
+.long 2820302411,2820302411,2820302411,2820302411
+.long 2820302411,2820302411,2820302411,2820302411
+.long 3259730800,3259730800,3259730800,3259730800
+.long 3259730800,3259730800,3259730800,3259730800
+.long 3345764771,3345764771,3345764771,3345764771
+.long 3345764771,3345764771,3345764771,3345764771
+.long 3516065817,3516065817,3516065817,3516065817
+.long 3516065817,3516065817,3516065817,3516065817
+.long 3600352804,3600352804,3600352804,3600352804
+.long 3600352804,3600352804,3600352804,3600352804
+.long 4094571909,4094571909,4094571909,4094571909
+.long 4094571909,4094571909,4094571909,4094571909
+.long 275423344,275423344,275423344,275423344
+.long 275423344,275423344,275423344,275423344
+.long 430227734,430227734,430227734,430227734
+.long 430227734,430227734,430227734,430227734
+.long 506948616,506948616,506948616,506948616
+.long 506948616,506948616,506948616,506948616
+.long 659060556,659060556,659060556,659060556
+.long 659060556,659060556,659060556,659060556
+.long 883997877,883997877,883997877,883997877
+.long 883997877,883997877,883997877,883997877
+.long 958139571,958139571,958139571,958139571
+.long 958139571,958139571,958139571,958139571
+.long 1322822218,1322822218,1322822218,1322822218
+.long 1322822218,1322822218,1322822218,1322822218
+.long 1537002063,1537002063,1537002063,1537002063
+.long 1537002063,1537002063,1537002063,1537002063
+.long 1747873779,1747873779,1747873779,1747873779
+.long 1747873779,1747873779,1747873779,1747873779
+.long 1955562222,1955562222,1955562222,1955562222
+.long 1955562222,1955562222,1955562222,1955562222
+.long 2024104815,2024104815,2024104815,2024104815
+.long 2024104815,2024104815,2024104815,2024104815
+.long 2227730452,2227730452,2227730452,2227730452
+.long 2227730452,2227730452,2227730452,2227730452
+.long 2361852424,2361852424,2361852424,2361852424
+.long 2361852424,2361852424,2361852424,2361852424
+.long 2428436474,2428436474,2428436474,2428436474
+.long 2428436474,2428436474,2428436474,2428436474
+.long 2756734187,2756734187,2756734187,2756734187
+.long 2756734187,2756734187,2756734187,2756734187
+.long 3204031479,3204031479,3204031479,3204031479
+.long 3204031479,3204031479,3204031479,3204031479
+.long 3329325298,3329325298,3329325298,3329325298
+.long 3329325298,3329325298,3329325298,3329325298
+L$pbswap:
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/macosx/sha256-ssse3-x86.s b/lib/accelerated/x86/macosx/sha256-ssse3-x86.s
new file mode 100644
index 0000000000..d6cf6cb2a0
--- /dev/null
+++ b/lib/accelerated/x86/macosx/sha256-ssse3-x86.s
@@ -0,0 +1,3405 @@
+# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain copyright notices,
+# this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+# * Neither the name of the Andy Polyakov nor the names of its
+# copyright holder and contributors may be used to endorse or
+# promote products derived from this software without specific
+# prior written permission.
+#
+# ALTERNATIVELY, provided that this notice is retained in full, this
+# product may be distributed under the terms of the GNU General Public
+# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+# those given above.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *** This file is auto-generated ***
+#
+.file "sha512-586.s"
+.text
+.globl _sha256_block_data_order
+.align 4
+_sha256_block_data_order:
+L_sha256_block_data_order_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 20(%esp),%esi
+ movl 24(%esp),%edi
+ movl 28(%esp),%eax
+ movl %esp,%ebx
+ call L000pic_point
+L000pic_point:
+ popl %ebp
+ leal L001K256-L000pic_point(%ebp),%ebp
+ subl $16,%esp
+ andl $-64,%esp
+ shll $6,%eax
+ addl %edi,%eax
+ movl %esi,(%esp)
+ movl %edi,4(%esp)
+ movl %eax,8(%esp)
+ movl %ebx,12(%esp)
+ movl L__gnutls_x86_cpuid_s$non_lazy_ptr-L001K256(%ebp),%edx
+ movl (%edx),%ecx
+ movl 4(%edx),%ebx
+ testl $1048576,%ecx
+ jnz L002loop
+ andl $1073741824,%ecx
+ andl $268435968,%ebx
+ orl %ebx,%ecx
+ andl $1342177280,%ecx
+ cmpl $1342177280,%ecx
+ je L003loop_shrd
+ subl %edi,%eax
+ cmpl $256,%eax
+ jae L004unrolled
+ jmp L002loop
+.align 4,0x90
+L002loop:
+ movl (%edi),%eax
+ movl 4(%edi),%ebx
+ movl 8(%edi),%ecx
+ bswap %eax
+ movl 12(%edi),%edx
+ bswap %ebx
+ pushl %eax
+ bswap %ecx
+ pushl %ebx
+ bswap %edx
+ pushl %ecx
+ pushl %edx
+ movl 16(%edi),%eax
+ movl 20(%edi),%ebx
+ movl 24(%edi),%ecx
+ bswap %eax
+ movl 28(%edi),%edx
+ bswap %ebx
+ pushl %eax
+ bswap %ecx
+ pushl %ebx
+ bswap %edx
+ pushl %ecx
+ pushl %edx
+ movl 32(%edi),%eax
+ movl 36(%edi),%ebx
+ movl 40(%edi),%ecx
+ bswap %eax
+ movl 44(%edi),%edx
+ bswap %ebx
+ pushl %eax
+ bswap %ecx
+ pushl %ebx
+ bswap %edx
+ pushl %ecx
+ pushl %edx
+ movl 48(%edi),%eax
+ movl 52(%edi),%ebx
+ movl 56(%edi),%ecx
+ bswap %eax
+ movl 60(%edi),%edx
+ bswap %ebx
+ pushl %eax
+ bswap %ecx
+ pushl %ebx
+ bswap %edx
+ pushl %ecx
+ pushl %edx
+ addl $64,%edi
+ leal -36(%esp),%esp
+ movl %edi,104(%esp)
+ movl (%esi),%eax
+ movl 4(%esi),%ebx
+ movl 8(%esi),%ecx
+ movl 12(%esi),%edi
+ movl %ebx,8(%esp)
+ xorl %ecx,%ebx
+ movl %ecx,12(%esp)
+ movl %edi,16(%esp)
+ movl %ebx,(%esp)
+ movl 16(%esi),%edx
+ movl 20(%esi),%ebx
+ movl 24(%esi),%ecx
+ movl 28(%esi),%edi
+ movl %ebx,24(%esp)
+ movl %ecx,28(%esp)
+ movl %edi,32(%esp)
+.align 4,0x90
+L00500_15:
+ movl %edx,%ecx
+ movl 24(%esp),%esi
+ rorl $14,%ecx
+ movl 28(%esp),%edi
+ xorl %edx,%ecx
+ xorl %edi,%esi
+ movl 96(%esp),%ebx
+ rorl $5,%ecx
+ andl %edx,%esi
+ movl %edx,20(%esp)
+ xorl %ecx,%edx
+ addl 32(%esp),%ebx
+ xorl %edi,%esi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %esi,%ebx
+ rorl $9,%ecx
+ addl %edx,%ebx
+ movl 8(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,4(%esp)
+ leal -4(%esp),%esp
+ rorl $11,%ecx
+ movl (%ebp),%esi
+ xorl %eax,%ecx
+ movl 20(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %esi,%ebx
+ movl %eax,(%esp)
+ addl %ebx,%edx
+ andl 4(%esp),%eax
+ addl %ecx,%ebx
+ xorl %edi,%eax
+ addl $4,%ebp
+ addl %ebx,%eax
+ cmpl $3248222580,%esi
+ jne L00500_15
+ movl 156(%esp),%ecx
+ jmp L00616_63
+.align 4,0x90
+L00616_63:
+ movl %ecx,%ebx
+ movl 104(%esp),%esi
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 160(%esp),%ebx
+ shrl $10,%edi
+ addl 124(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 24(%esp),%esi
+ rorl $14,%ecx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %edx,%ecx
+ xorl %edi,%esi
+ movl %ebx,96(%esp)
+ rorl $5,%ecx
+ andl %edx,%esi
+ movl %edx,20(%esp)
+ xorl %ecx,%edx
+ addl 32(%esp),%ebx
+ xorl %edi,%esi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %esi,%ebx
+ rorl $9,%ecx
+ addl %edx,%ebx
+ movl 8(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,4(%esp)
+ leal -4(%esp),%esp
+ rorl $11,%ecx
+ movl (%ebp),%esi
+ xorl %eax,%ecx
+ movl 20(%esp),%edx
+ xorl %edi,%eax
+ rorl $2,%ecx
+ addl %esi,%ebx
+ movl %eax,(%esp)
+ addl %ebx,%edx
+ andl 4(%esp),%eax
+ addl %ecx,%ebx
+ xorl %edi,%eax
+ movl 156(%esp),%ecx
+ addl $4,%ebp
+ addl %ebx,%eax
+ cmpl $3329325298,%esi
+ jne L00616_63
+ movl 356(%esp),%esi
+ movl 8(%esp),%ebx
+ movl 16(%esp),%ecx
+ addl (%esi),%eax
+ addl 4(%esi),%ebx
+ addl 8(%esi),%edi
+ addl 12(%esi),%ecx
+ movl %eax,(%esi)
+ movl %ebx,4(%esi)
+ movl %edi,8(%esi)
+ movl %ecx,12(%esi)
+ movl 24(%esp),%eax
+ movl 28(%esp),%ebx
+ movl 32(%esp),%ecx
+ movl 360(%esp),%edi
+ addl 16(%esi),%edx
+ addl 20(%esi),%eax
+ addl 24(%esi),%ebx
+ addl 28(%esi),%ecx
+ movl %edx,16(%esi)
+ movl %eax,20(%esi)
+ movl %ebx,24(%esi)
+ movl %ecx,28(%esi)
+ leal 356(%esp),%esp
+ subl $256,%ebp
+ cmpl 8(%esp),%edi
+ jb L002loop
+ movl 12(%esp),%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.align 5,0x90
+L003loop_shrd:
+ movl (%edi),%eax
+ movl 4(%edi),%ebx
+ movl 8(%edi),%ecx
+ bswap %eax
+ movl 12(%edi),%edx
+ bswap %ebx
+ pushl %eax
+ bswap %ecx
+ pushl %ebx
+ bswap %edx
+ pushl %ecx
+ pushl %edx
+ movl 16(%edi),%eax
+ movl 20(%edi),%ebx
+ movl 24(%edi),%ecx
+ bswap %eax
+ movl 28(%edi),%edx
+ bswap %ebx
+ pushl %eax
+ bswap %ecx
+ pushl %ebx
+ bswap %edx
+ pushl %ecx
+ pushl %edx
+ movl 32(%edi),%eax
+ movl 36(%edi),%ebx
+ movl 40(%edi),%ecx
+ bswap %eax
+ movl 44(%edi),%edx
+ bswap %ebx
+ pushl %eax
+ bswap %ecx
+ pushl %ebx
+ bswap %edx
+ pushl %ecx
+ pushl %edx
+ movl 48(%edi),%eax
+ movl 52(%edi),%ebx
+ movl 56(%edi),%ecx
+ bswap %eax
+ movl 60(%edi),%edx
+ bswap %ebx
+ pushl %eax
+ bswap %ecx
+ pushl %ebx
+ bswap %edx
+ pushl %ecx
+ pushl %edx
+ addl $64,%edi
+ leal -36(%esp),%esp
+ movl %edi,104(%esp)
+ movl (%esi),%eax
+ movl 4(%esi),%ebx
+ movl 8(%esi),%ecx
+ movl 12(%esi),%edi
+ movl %ebx,8(%esp)
+ xorl %ecx,%ebx
+ movl %ecx,12(%esp)
+ movl %edi,16(%esp)
+ movl %ebx,(%esp)
+ movl 16(%esi),%edx
+ movl 20(%esi),%ebx
+ movl 24(%esi),%ecx
+ movl 28(%esi),%edi
+ movl %ebx,24(%esp)
+ movl %ecx,28(%esp)
+ movl %edi,32(%esp)
+.align 4,0x90
+L00700_15_shrd:
+ movl %edx,%ecx
+ movl 24(%esp),%esi
+ shrdl $14,%ecx,%ecx
+ movl 28(%esp),%edi
+ xorl %edx,%ecx
+ xorl %edi,%esi
+ movl 96(%esp),%ebx
+ shrdl $5,%ecx,%ecx
+ andl %edx,%esi
+ movl %edx,20(%esp)
+ xorl %ecx,%edx
+ addl 32(%esp),%ebx
+ xorl %edi,%esi
+ shrdl $6,%edx,%edx
+ movl %eax,%ecx
+ addl %esi,%ebx
+ shrdl $9,%ecx,%ecx
+ addl %edx,%ebx
+ movl 8(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,4(%esp)
+ leal -4(%esp),%esp
+ shrdl $11,%ecx,%ecx
+ movl (%ebp),%esi
+ xorl %eax,%ecx
+ movl 20(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ addl %esi,%ebx
+ movl %eax,(%esp)
+ addl %ebx,%edx
+ andl 4(%esp),%eax
+ addl %ecx,%ebx
+ xorl %edi,%eax
+ addl $4,%ebp
+ addl %ebx,%eax
+ cmpl $3248222580,%esi
+ jne L00700_15_shrd
+ movl 156(%esp),%ecx
+ jmp L00816_63_shrd
+.align 4,0x90
+L00816_63_shrd:
+ movl %ecx,%ebx
+ movl 104(%esp),%esi
+ shrdl $11,%ecx,%ecx
+ movl %esi,%edi
+ shrdl $2,%esi,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ shrdl $7,%ecx,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ shrdl $17,%esi,%esi
+ addl 160(%esp),%ebx
+ shrl $10,%edi
+ addl 124(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 24(%esp),%esi
+ shrdl $14,%ecx,%ecx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %edx,%ecx
+ xorl %edi,%esi
+ movl %ebx,96(%esp)
+ shrdl $5,%ecx,%ecx
+ andl %edx,%esi
+ movl %edx,20(%esp)
+ xorl %ecx,%edx
+ addl 32(%esp),%ebx
+ xorl %edi,%esi
+ shrdl $6,%edx,%edx
+ movl %eax,%ecx
+ addl %esi,%ebx
+ shrdl $9,%ecx,%ecx
+ addl %edx,%ebx
+ movl 8(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,4(%esp)
+ leal -4(%esp),%esp
+ shrdl $11,%ecx,%ecx
+ movl (%ebp),%esi
+ xorl %eax,%ecx
+ movl 20(%esp),%edx
+ xorl %edi,%eax
+ shrdl $2,%ecx,%ecx
+ addl %esi,%ebx
+ movl %eax,(%esp)
+ addl %ebx,%edx
+ andl 4(%esp),%eax
+ addl %ecx,%ebx
+ xorl %edi,%eax
+ movl 156(%esp),%ecx
+ addl $4,%ebp
+ addl %ebx,%eax
+ cmpl $3329325298,%esi
+ jne L00816_63_shrd
+ movl 356(%esp),%esi
+ movl 8(%esp),%ebx
+ movl 16(%esp),%ecx
+ addl (%esi),%eax
+ addl 4(%esi),%ebx
+ addl 8(%esi),%edi
+ addl 12(%esi),%ecx
+ movl %eax,(%esi)
+ movl %ebx,4(%esi)
+ movl %edi,8(%esi)
+ movl %ecx,12(%esi)
+ movl 24(%esp),%eax
+ movl 28(%esp),%ebx
+ movl 32(%esp),%ecx
+ movl 360(%esp),%edi
+ addl 16(%esi),%edx
+ addl 20(%esi),%eax
+ addl 24(%esi),%ebx
+ addl 28(%esi),%ecx
+ movl %edx,16(%esi)
+ movl %eax,20(%esi)
+ movl %ebx,24(%esi)
+ movl %ecx,28(%esi)
+ leal 356(%esp),%esp
+ subl $256,%ebp
+ cmpl 8(%esp),%edi
+ jb L003loop_shrd
+ movl 12(%esp),%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.align 6,0x90
+L001K256:
+.long 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298
+.long 66051,67438087,134810123,202182159
+.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97
+.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32
+.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
+.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
+.byte 62,0
+.align 4,0x90
+L004unrolled:
+ leal -96(%esp),%esp
+ movl (%esi),%eax
+ movl 4(%esi),%ebp
+ movl 8(%esi),%ecx
+ movl 12(%esi),%ebx
+ movl %ebp,4(%esp)
+ xorl %ecx,%ebp
+ movl %ecx,8(%esp)
+ movl %ebx,12(%esp)
+ movl 16(%esi),%edx
+ movl 20(%esi),%ebx
+ movl 24(%esi),%ecx
+ movl 28(%esi),%esi
+ movl %ebx,20(%esp)
+ movl %ecx,24(%esp)
+ movl %esi,28(%esp)
+ jmp L009grand_loop
+.align 4,0x90
+L009grand_loop:
+ movl (%edi),%ebx
+ movl 4(%edi),%ecx
+ bswap %ebx
+ movl 8(%edi),%esi
+ bswap %ecx
+ movl %ebx,32(%esp)
+ bswap %esi
+ movl %ecx,36(%esp)
+ movl %esi,40(%esp)
+ movl 12(%edi),%ebx
+ movl 16(%edi),%ecx
+ bswap %ebx
+ movl 20(%edi),%esi
+ bswap %ecx
+ movl %ebx,44(%esp)
+ bswap %esi
+ movl %ecx,48(%esp)
+ movl %esi,52(%esp)
+ movl 24(%edi),%ebx
+ movl 28(%edi),%ecx
+ bswap %ebx
+ movl 32(%edi),%esi
+ bswap %ecx
+ movl %ebx,56(%esp)
+ bswap %esi
+ movl %ecx,60(%esp)
+ movl %esi,64(%esp)
+ movl 36(%edi),%ebx
+ movl 40(%edi),%ecx
+ bswap %ebx
+ movl 44(%edi),%esi
+ bswap %ecx
+ movl %ebx,68(%esp)
+ bswap %esi
+ movl %ecx,72(%esp)
+ movl %esi,76(%esp)
+ movl 48(%edi),%ebx
+ movl 52(%edi),%ecx
+ bswap %ebx
+ movl 56(%edi),%esi
+ bswap %ecx
+ movl %ebx,80(%esp)
+ bswap %esi
+ movl %ecx,84(%esp)
+ movl %esi,88(%esp)
+ movl 60(%edi),%ebx
+ addl $64,%edi
+ bswap %ebx
+ movl %edi,100(%esp)
+ movl %ebx,92(%esp)
+ movl %edx,%ecx
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl 32(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1116352408(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl 36(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1899447441(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl 40(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3049323471(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl 44(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3921009573(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl 48(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 961987163(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl 52(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1508970993(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl 56(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2453635748(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl 60(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2870763221(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl 64(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3624381080(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl 68(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 310598401(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl 72(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 607225278(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl 76(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1426881987(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl 80(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1925078388(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl 84(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2162078206(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl %edx,%ecx
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl 88(%esp),%ebx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2614888103(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl %edx,%esi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl 92(%esp),%ebx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3248222580(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 36(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 88(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 32(%esp),%ebx
+ shrl $10,%edi
+ addl 68(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,32(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3835390401(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 40(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 92(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 36(%esp),%ebx
+ shrl $10,%edi
+ addl 72(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,36(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 4022224774(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 44(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 32(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 40(%esp),%ebx
+ shrl $10,%edi
+ addl 76(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,40(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 264347078(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 48(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 36(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 44(%esp),%ebx
+ shrl $10,%edi
+ addl 80(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,44(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 604807628(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 52(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl 40(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 48(%esp),%ebx
+ shrl $10,%edi
+ addl 84(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,48(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 770255983(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 56(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 44(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 52(%esp),%ebx
+ shrl $10,%edi
+ addl 88(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,52(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1249150122(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 60(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 48(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 56(%esp),%ebx
+ shrl $10,%edi
+ addl 92(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,56(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1555081692(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 64(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 52(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 60(%esp),%ebx
+ shrl $10,%edi
+ addl 32(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,60(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1996064986(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 68(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 56(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 64(%esp),%ebx
+ shrl $10,%edi
+ addl 36(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,64(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2554220882(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 72(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 60(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 68(%esp),%ebx
+ shrl $10,%edi
+ addl 40(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,68(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2821834349(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 76(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 64(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 72(%esp),%ebx
+ shrl $10,%edi
+ addl 44(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,72(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2952996808(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 80(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 68(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 76(%esp),%ebx
+ shrl $10,%edi
+ addl 48(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,76(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3210313671(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 84(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl 72(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 80(%esp),%ebx
+ shrl $10,%edi
+ addl 52(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,80(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3336571891(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 88(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 76(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 84(%esp),%ebx
+ shrl $10,%edi
+ addl 56(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,84(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3584528711(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 92(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 80(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 88(%esp),%ebx
+ shrl $10,%edi
+ addl 60(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,88(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 113926993(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 32(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 84(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 92(%esp),%ebx
+ shrl $10,%edi
+ addl 64(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,92(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 338241895(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 36(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 88(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 32(%esp),%ebx
+ shrl $10,%edi
+ addl 68(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,32(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 666307205(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 40(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 92(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 36(%esp),%ebx
+ shrl $10,%edi
+ addl 72(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,36(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 773529912(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 44(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 32(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 40(%esp),%ebx
+ shrl $10,%edi
+ addl 76(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,40(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1294757372(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 48(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 36(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 44(%esp),%ebx
+ shrl $10,%edi
+ addl 80(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,44(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1396182291(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 52(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl 40(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 48(%esp),%ebx
+ shrl $10,%edi
+ addl 84(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,48(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1695183700(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 56(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 44(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 52(%esp),%ebx
+ shrl $10,%edi
+ addl 88(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,52(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1986661051(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 60(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 48(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 56(%esp),%ebx
+ shrl $10,%edi
+ addl 92(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,56(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2177026350(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 64(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 52(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 60(%esp),%ebx
+ shrl $10,%edi
+ addl 32(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,60(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2456956037(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 68(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 56(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 64(%esp),%ebx
+ shrl $10,%edi
+ addl 36(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,64(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2730485921(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 72(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 60(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 68(%esp),%ebx
+ shrl $10,%edi
+ addl 40(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,68(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2820302411(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 76(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 64(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 72(%esp),%ebx
+ shrl $10,%edi
+ addl 44(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,72(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3259730800(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 80(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 68(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 76(%esp),%ebx
+ shrl $10,%edi
+ addl 48(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,76(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3345764771(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 84(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl 72(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 80(%esp),%ebx
+ shrl $10,%edi
+ addl 52(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,80(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3516065817(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 88(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 76(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 84(%esp),%ebx
+ shrl $10,%edi
+ addl 56(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,84(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3600352804(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 92(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 80(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 88(%esp),%ebx
+ shrl $10,%edi
+ addl 60(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,88(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 4094571909(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 32(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 84(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 92(%esp),%ebx
+ shrl $10,%edi
+ addl 64(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,92(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 275423344(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 36(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 88(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 32(%esp),%ebx
+ shrl $10,%edi
+ addl 68(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,32(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 430227734(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 40(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 92(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 36(%esp),%ebx
+ shrl $10,%edi
+ addl 72(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,36(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 506948616(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 44(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 32(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 40(%esp),%ebx
+ shrl $10,%edi
+ addl 76(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,40(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 659060556(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 48(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 36(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 44(%esp),%ebx
+ shrl $10,%edi
+ addl 80(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,44(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 883997877(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 52(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl 40(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 48(%esp),%ebx
+ shrl $10,%edi
+ addl 84(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,48(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 958139571(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 56(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 44(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 52(%esp),%ebx
+ shrl $10,%edi
+ addl 88(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,52(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1322822218(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 60(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 48(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 56(%esp),%ebx
+ shrl $10,%edi
+ addl 92(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,56(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1537002063(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 64(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 52(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 60(%esp),%ebx
+ shrl $10,%edi
+ addl 32(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,60(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 1747873779(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 68(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 56(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 64(%esp),%ebx
+ shrl $10,%edi
+ addl 36(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 20(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 24(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,64(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,16(%esp)
+ xorl %ecx,%edx
+ addl 28(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 4(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 1955562222(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 72(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 12(%esp),%edx
+ addl %ecx,%ebp
+ movl 60(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 68(%esp),%ebx
+ shrl $10,%edi
+ addl 40(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 16(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 20(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,68(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,12(%esp)
+ xorl %esi,%edx
+ addl 24(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl (%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,28(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2024104815(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 76(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 8(%esp),%edx
+ addl %esi,%eax
+ movl 64(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 72(%esp),%ebx
+ shrl $10,%edi
+ addl 44(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 12(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 16(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,72(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,8(%esp)
+ xorl %ecx,%edx
+ addl 20(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 28(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,24(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2227730452(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 80(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 4(%esp),%edx
+ addl %ecx,%ebp
+ movl 68(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 76(%esp),%ebx
+ shrl $10,%edi
+ addl 48(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 8(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 12(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,76(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,4(%esp)
+ xorl %esi,%edx
+ addl 16(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 24(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,20(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2361852424(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 84(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl (%esp),%edx
+ addl %esi,%eax
+ movl 72(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 80(%esp),%ebx
+ shrl $10,%edi
+ addl 52(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 4(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 8(%esp),%edi
+ xorl %ecx,%edx
+ movl %ebx,80(%esp)
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,(%esp)
+ xorl %ecx,%edx
+ addl 12(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 20(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,16(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 2428436474(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 88(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 28(%esp),%edx
+ addl %ecx,%ebp
+ movl 76(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 84(%esp),%ebx
+ shrl $10,%edi
+ addl 56(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl (%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 4(%esp),%edi
+ xorl %esi,%edx
+ movl %ebx,84(%esp)
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,28(%esp)
+ xorl %esi,%edx
+ addl 8(%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 16(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,12(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 2756734187(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ movl 92(%esp),%ecx
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 24(%esp),%edx
+ addl %esi,%eax
+ movl 80(%esp),%esi
+ movl %ecx,%ebx
+ rorl $11,%ecx
+ movl %esi,%edi
+ rorl $2,%esi
+ xorl %ebx,%ecx
+ shrl $3,%ebx
+ rorl $7,%ecx
+ xorl %edi,%esi
+ xorl %ecx,%ebx
+ rorl $17,%esi
+ addl 88(%esp),%ebx
+ shrl $10,%edi
+ addl 60(%esp),%ebx
+ movl %edx,%ecx
+ xorl %esi,%edi
+ movl 28(%esp),%esi
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl (%esp),%edi
+ xorl %ecx,%edx
+ xorl %edi,%esi
+ rorl $5,%edx
+ andl %ecx,%esi
+ movl %ecx,24(%esp)
+ xorl %ecx,%edx
+ addl 4(%esp),%ebx
+ xorl %esi,%edi
+ rorl $6,%edx
+ movl %eax,%ecx
+ addl %edi,%ebx
+ rorl $9,%ecx
+ movl %eax,%esi
+ movl 12(%esp),%edi
+ xorl %eax,%ecx
+ movl %eax,8(%esp)
+ xorl %edi,%eax
+ rorl $11,%ecx
+ andl %eax,%ebp
+ leal 3204031479(%ebx,%edx,1),%edx
+ xorl %esi,%ecx
+ xorl %edi,%ebp
+ movl 32(%esp),%esi
+ rorl $2,%ecx
+ addl %edx,%ebp
+ addl 20(%esp),%edx
+ addl %ecx,%ebp
+ movl 84(%esp),%ecx
+ movl %esi,%ebx
+ rorl $11,%esi
+ movl %ecx,%edi
+ rorl $2,%ecx
+ xorl %ebx,%esi
+ shrl $3,%ebx
+ rorl $7,%esi
+ xorl %edi,%ecx
+ xorl %esi,%ebx
+ rorl $17,%ecx
+ addl 92(%esp),%ebx
+ shrl $10,%edi
+ addl 64(%esp),%ebx
+ movl %edx,%esi
+ xorl %ecx,%edi
+ movl 24(%esp),%ecx
+ rorl $14,%edx
+ addl %edi,%ebx
+ movl 28(%esp),%edi
+ xorl %esi,%edx
+ xorl %edi,%ecx
+ rorl $5,%edx
+ andl %esi,%ecx
+ movl %esi,20(%esp)
+ xorl %esi,%edx
+ addl (%esp),%ebx
+ xorl %ecx,%edi
+ rorl $6,%edx
+ movl %ebp,%esi
+ addl %edi,%ebx
+ rorl $9,%esi
+ movl %ebp,%ecx
+ movl 8(%esp),%edi
+ xorl %ebp,%esi
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ rorl $11,%esi
+ andl %ebp,%eax
+ leal 3329325298(%ebx,%edx,1),%edx
+ xorl %ecx,%esi
+ xorl %edi,%eax
+ rorl $2,%esi
+ addl %edx,%eax
+ addl 16(%esp),%edx
+ addl %esi,%eax
+ movl 96(%esp),%esi
+ xorl %edi,%ebp
+ movl 12(%esp),%ecx
+ addl (%esi),%eax
+ addl 4(%esi),%ebp
+ addl 8(%esi),%edi
+ addl 12(%esi),%ecx
+ movl %eax,(%esi)
+ movl %ebp,4(%esi)
+ movl %edi,8(%esi)
+ movl %ecx,12(%esi)
+ movl %ebp,4(%esp)
+ xorl %edi,%ebp
+ movl %edi,8(%esp)
+ movl %ecx,12(%esp)
+ movl 20(%esp),%edi
+ movl 24(%esp),%ebx
+ movl 28(%esp),%ecx
+ addl 16(%esi),%edx
+ addl 20(%esi),%edi
+ addl 24(%esi),%ebx
+ addl 28(%esi),%ecx
+ movl %edx,16(%esi)
+ movl %edi,20(%esi)
+ movl %ebx,24(%esi)
+ movl %ecx,28(%esi)
+ movl %edi,20(%esp)
+ movl 100(%esp),%edi
+ movl %ebx,24(%esp)
+ movl %ecx,28(%esp)
+ cmpl 104(%esp),%edi
+ jb L009grand_loop
+ movl 108(%esp),%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.section __IMPORT,__pointers,non_lazy_symbol_pointers
+L__gnutls_x86_cpuid_s$non_lazy_ptr:
+.indirect_symbol __gnutls_x86_cpuid_s
+.long 0
+.comm __gnutls_x86_cpuid_s,16,2
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/macosx/sha512-ssse3-x86.s b/lib/accelerated/x86/macosx/sha512-ssse3-x86.s
new file mode 100644
index 0000000000..cb097f16c8
--- /dev/null
+++ b/lib/accelerated/x86/macosx/sha512-ssse3-x86.s
@@ -0,0 +1,604 @@
+# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain copyright notices,
+# this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+# * Neither the name of the Andy Polyakov nor the names of its
+# copyright holder and contributors may be used to endorse or
+# promote products derived from this software without specific
+# prior written permission.
+#
+# ALTERNATIVELY, provided that this notice is retained in full, this
+# product may be distributed under the terms of the GNU General Public
+# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+# those given above.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *** This file is auto-generated ***
+#
+.file "sha512-586.s"
+.text
+.globl _sha512_block_data_order
+.align 4
+_sha512_block_data_order:
+L_sha512_block_data_order_begin:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 20(%esp),%esi
+ movl 24(%esp),%edi
+ movl 28(%esp),%eax
+ movl %esp,%ebx
+ call L000pic_point
+L000pic_point:
+ popl %ebp
+ leal L001K512-L000pic_point(%ebp),%ebp
+ subl $16,%esp
+ andl $-64,%esp
+ shll $7,%eax
+ addl %edi,%eax
+ movl %esi,(%esp)
+ movl %edi,4(%esp)
+ movl %eax,8(%esp)
+ movl %ebx,12(%esp)
+.align 4,0x90
+L002loop_x86:
+ movl (%edi),%eax
+ movl 4(%edi),%ebx
+ movl 8(%edi),%ecx
+ movl 12(%edi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+ movl 16(%edi),%eax
+ movl 20(%edi),%ebx
+ movl 24(%edi),%ecx
+ movl 28(%edi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+ movl 32(%edi),%eax
+ movl 36(%edi),%ebx
+ movl 40(%edi),%ecx
+ movl 44(%edi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+ movl 48(%edi),%eax
+ movl 52(%edi),%ebx
+ movl 56(%edi),%ecx
+ movl 60(%edi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+ movl 64(%edi),%eax
+ movl 68(%edi),%ebx
+ movl 72(%edi),%ecx
+ movl 76(%edi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+ movl 80(%edi),%eax
+ movl 84(%edi),%ebx
+ movl 88(%edi),%ecx
+ movl 92(%edi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+ movl 96(%edi),%eax
+ movl 100(%edi),%ebx
+ movl 104(%edi),%ecx
+ movl 108(%edi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+ movl 112(%edi),%eax
+ movl 116(%edi),%ebx
+ movl 120(%edi),%ecx
+ movl 124(%edi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+ addl $128,%edi
+ subl $72,%esp
+ movl %edi,204(%esp)
+ leal 8(%esp),%edi
+ movl $16,%ecx
+.long 2784229001
+.align 4,0x90
+L00300_15_x86:
+ movl 40(%esp),%ecx
+ movl 44(%esp),%edx
+ movl %ecx,%esi
+ shrl $9,%ecx
+ movl %edx,%edi
+ shrl $9,%edx
+ movl %ecx,%ebx
+ shll $14,%esi
+ movl %edx,%eax
+ shll $14,%edi
+ xorl %esi,%ebx
+ shrl $5,%ecx
+ xorl %edi,%eax
+ shrl $5,%edx
+ xorl %ecx,%eax
+ shll $4,%esi
+ xorl %edx,%ebx
+ shll $4,%edi
+ xorl %esi,%ebx
+ shrl $4,%ecx
+ xorl %edi,%eax
+ shrl $4,%edx
+ xorl %ecx,%eax
+ shll $5,%esi
+ xorl %edx,%ebx
+ shll $5,%edi
+ xorl %esi,%eax
+ xorl %edi,%ebx
+ movl 48(%esp),%ecx
+ movl 52(%esp),%edx
+ movl 56(%esp),%esi
+ movl 60(%esp),%edi
+ addl 64(%esp),%eax
+ adcl 68(%esp),%ebx
+ xorl %esi,%ecx
+ xorl %edi,%edx
+ andl 40(%esp),%ecx
+ andl 44(%esp),%edx
+ addl 192(%esp),%eax
+ adcl 196(%esp),%ebx
+ xorl %esi,%ecx
+ xorl %edi,%edx
+ movl (%ebp),%esi
+ movl 4(%ebp),%edi
+ addl %ecx,%eax
+ adcl %edx,%ebx
+ movl 32(%esp),%ecx
+ movl 36(%esp),%edx
+ addl %esi,%eax
+ adcl %edi,%ebx
+ movl %eax,(%esp)
+ movl %ebx,4(%esp)
+ addl %ecx,%eax
+ adcl %edx,%ebx
+ movl 8(%esp),%ecx
+ movl 12(%esp),%edx
+ movl %eax,32(%esp)
+ movl %ebx,36(%esp)
+ movl %ecx,%esi
+ shrl $2,%ecx
+ movl %edx,%edi
+ shrl $2,%edx
+ movl %ecx,%ebx
+ shll $4,%esi
+ movl %edx,%eax
+ shll $4,%edi
+ xorl %esi,%ebx
+ shrl $5,%ecx
+ xorl %edi,%eax
+ shrl $5,%edx
+ xorl %ecx,%ebx
+ shll $21,%esi
+ xorl %edx,%eax
+ shll $21,%edi
+ xorl %esi,%eax
+ shrl $21,%ecx
+ xorl %edi,%ebx
+ shrl $21,%edx
+ xorl %ecx,%eax
+ shll $5,%esi
+ xorl %edx,%ebx
+ shll $5,%edi
+ xorl %esi,%eax
+ xorl %edi,%ebx
+ movl 8(%esp),%ecx
+ movl 12(%esp),%edx
+ movl 16(%esp),%esi
+ movl 20(%esp),%edi
+ addl (%esp),%eax
+ adcl 4(%esp),%ebx
+ orl %esi,%ecx
+ orl %edi,%edx
+ andl 24(%esp),%ecx
+ andl 28(%esp),%edx
+ andl 8(%esp),%esi
+ andl 12(%esp),%edi
+ orl %esi,%ecx
+ orl %edi,%edx
+ addl %ecx,%eax
+ adcl %edx,%ebx
+ movl %eax,(%esp)
+ movl %ebx,4(%esp)
+ movb (%ebp),%dl
+ subl $8,%esp
+ leal 8(%ebp),%ebp
+ cmpb $148,%dl
+ jne L00300_15_x86
+.align 4,0x90
+L00416_79_x86:
+ movl 312(%esp),%ecx
+ movl 316(%esp),%edx
+ movl %ecx,%esi
+ shrl $1,%ecx
+ movl %edx,%edi
+ shrl $1,%edx
+ movl %ecx,%eax
+ shll $24,%esi
+ movl %edx,%ebx
+ shll $24,%edi
+ xorl %esi,%ebx
+ shrl $6,%ecx
+ xorl %edi,%eax
+ shrl $6,%edx
+ xorl %ecx,%eax
+ shll $7,%esi
+ xorl %edx,%ebx
+ shll $1,%edi
+ xorl %esi,%ebx
+ shrl $1,%ecx
+ xorl %edi,%eax
+ shrl $1,%edx
+ xorl %ecx,%eax
+ shll $6,%edi
+ xorl %edx,%ebx
+ xorl %edi,%eax
+ movl %eax,(%esp)
+ movl %ebx,4(%esp)
+ movl 208(%esp),%ecx
+ movl 212(%esp),%edx
+ movl %ecx,%esi
+ shrl $6,%ecx
+ movl %edx,%edi
+ shrl $6,%edx
+ movl %ecx,%eax
+ shll $3,%esi
+ movl %edx,%ebx
+ shll $3,%edi
+ xorl %esi,%eax
+ shrl $13,%ecx
+ xorl %edi,%ebx
+ shrl $13,%edx
+ xorl %ecx,%eax
+ shll $10,%esi
+ xorl %edx,%ebx
+ shll $10,%edi
+ xorl %esi,%ebx
+ shrl $10,%ecx
+ xorl %edi,%eax
+ shrl $10,%edx
+ xorl %ecx,%ebx
+ shll $13,%edi
+ xorl %edx,%eax
+ xorl %edi,%eax
+ movl 320(%esp),%ecx
+ movl 324(%esp),%edx
+ addl (%esp),%eax
+ adcl 4(%esp),%ebx
+ movl 248(%esp),%esi
+ movl 252(%esp),%edi
+ addl %ecx,%eax
+ adcl %edx,%ebx
+ addl %esi,%eax
+ adcl %edi,%ebx
+ movl %eax,192(%esp)
+ movl %ebx,196(%esp)
+ movl 40(%esp),%ecx
+ movl 44(%esp),%edx
+ movl %ecx,%esi
+ shrl $9,%ecx
+ movl %edx,%edi
+ shrl $9,%edx
+ movl %ecx,%ebx
+ shll $14,%esi
+ movl %edx,%eax
+ shll $14,%edi
+ xorl %esi,%ebx
+ shrl $5,%ecx
+ xorl %edi,%eax
+ shrl $5,%edx
+ xorl %ecx,%eax
+ shll $4,%esi
+ xorl %edx,%ebx
+ shll $4,%edi
+ xorl %esi,%ebx
+ shrl $4,%ecx
+ xorl %edi,%eax
+ shrl $4,%edx
+ xorl %ecx,%eax
+ shll $5,%esi
+ xorl %edx,%ebx
+ shll $5,%edi
+ xorl %esi,%eax
+ xorl %edi,%ebx
+ movl 48(%esp),%ecx
+ movl 52(%esp),%edx
+ movl 56(%esp),%esi
+ movl 60(%esp),%edi
+ addl 64(%esp),%eax
+ adcl 68(%esp),%ebx
+ xorl %esi,%ecx
+ xorl %edi,%edx
+ andl 40(%esp),%ecx
+ andl 44(%esp),%edx
+ addl 192(%esp),%eax
+ adcl 196(%esp),%ebx
+ xorl %esi,%ecx
+ xorl %edi,%edx
+ movl (%ebp),%esi
+ movl 4(%ebp),%edi
+ addl %ecx,%eax
+ adcl %edx,%ebx
+ movl 32(%esp),%ecx
+ movl 36(%esp),%edx
+ addl %esi,%eax
+ adcl %edi,%ebx
+ movl %eax,(%esp)
+ movl %ebx,4(%esp)
+ addl %ecx,%eax
+ adcl %edx,%ebx
+ movl 8(%esp),%ecx
+ movl 12(%esp),%edx
+ movl %eax,32(%esp)
+ movl %ebx,36(%esp)
+ movl %ecx,%esi
+ shrl $2,%ecx
+ movl %edx,%edi
+ shrl $2,%edx
+ movl %ecx,%ebx
+ shll $4,%esi
+ movl %edx,%eax
+ shll $4,%edi
+ xorl %esi,%ebx
+ shrl $5,%ecx
+ xorl %edi,%eax
+ shrl $5,%edx
+ xorl %ecx,%ebx
+ shll $21,%esi
+ xorl %edx,%eax
+ shll $21,%edi
+ xorl %esi,%eax
+ shrl $21,%ecx
+ xorl %edi,%ebx
+ shrl $21,%edx
+ xorl %ecx,%eax
+ shll $5,%esi
+ xorl %edx,%ebx
+ shll $5,%edi
+ xorl %esi,%eax
+ xorl %edi,%ebx
+ movl 8(%esp),%ecx
+ movl 12(%esp),%edx
+ movl 16(%esp),%esi
+ movl 20(%esp),%edi
+ addl (%esp),%eax
+ adcl 4(%esp),%ebx
+ orl %esi,%ecx
+ orl %edi,%edx
+ andl 24(%esp),%ecx
+ andl 28(%esp),%edx
+ andl 8(%esp),%esi
+ andl 12(%esp),%edi
+ orl %esi,%ecx
+ orl %edi,%edx
+ addl %ecx,%eax
+ adcl %edx,%ebx
+ movl %eax,(%esp)
+ movl %ebx,4(%esp)
+ movb (%ebp),%dl
+ subl $8,%esp
+ leal 8(%ebp),%ebp
+ cmpb $23,%dl
+ jne L00416_79_x86
+ movl 840(%esp),%esi
+ movl 844(%esp),%edi
+ movl (%esi),%eax
+ movl 4(%esi),%ebx
+ movl 8(%esi),%ecx
+ movl 12(%esi),%edx
+ addl 8(%esp),%eax
+ adcl 12(%esp),%ebx
+ movl %eax,(%esi)
+ movl %ebx,4(%esi)
+ addl 16(%esp),%ecx
+ adcl 20(%esp),%edx
+ movl %ecx,8(%esi)
+ movl %edx,12(%esi)
+ movl 16(%esi),%eax
+ movl 20(%esi),%ebx
+ movl 24(%esi),%ecx
+ movl 28(%esi),%edx
+ addl 24(%esp),%eax
+ adcl 28(%esp),%ebx
+ movl %eax,16(%esi)
+ movl %ebx,20(%esi)
+ addl 32(%esp),%ecx
+ adcl 36(%esp),%edx
+ movl %ecx,24(%esi)
+ movl %edx,28(%esi)
+ movl 32(%esi),%eax
+ movl 36(%esi),%ebx
+ movl 40(%esi),%ecx
+ movl 44(%esi),%edx
+ addl 40(%esp),%eax
+ adcl 44(%esp),%ebx
+ movl %eax,32(%esi)
+ movl %ebx,36(%esi)
+ addl 48(%esp),%ecx
+ adcl 52(%esp),%edx
+ movl %ecx,40(%esi)
+ movl %edx,44(%esi)
+ movl 48(%esi),%eax
+ movl 52(%esi),%ebx
+ movl 56(%esi),%ecx
+ movl 60(%esi),%edx
+ addl 56(%esp),%eax
+ adcl 60(%esp),%ebx
+ movl %eax,48(%esi)
+ movl %ebx,52(%esi)
+ addl 64(%esp),%ecx
+ adcl 68(%esp),%edx
+ movl %ecx,56(%esi)
+ movl %edx,60(%esi)
+ addl $840,%esp
+ subl $640,%ebp
+ cmpl 8(%esp),%edi
+ jb L002loop_x86
+ movl 12(%esp),%esp
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.align 6,0x90
+L001K512:
+.long 3609767458,1116352408
+.long 602891725,1899447441
+.long 3964484399,3049323471
+.long 2173295548,3921009573
+.long 4081628472,961987163
+.long 3053834265,1508970993
+.long 2937671579,2453635748
+.long 3664609560,2870763221
+.long 2734883394,3624381080
+.long 1164996542,310598401
+.long 1323610764,607225278
+.long 3590304994,1426881987
+.long 4068182383,1925078388
+.long 991336113,2162078206
+.long 633803317,2614888103
+.long 3479774868,3248222580
+.long 2666613458,3835390401
+.long 944711139,4022224774
+.long 2341262773,264347078
+.long 2007800933,604807628
+.long 1495990901,770255983
+.long 1856431235,1249150122
+.long 3175218132,1555081692
+.long 2198950837,1996064986
+.long 3999719339,2554220882
+.long 766784016,2821834349
+.long 2566594879,2952996808
+.long 3203337956,3210313671
+.long 1034457026,3336571891
+.long 2466948901,3584528711
+.long 3758326383,113926993
+.long 168717936,338241895
+.long 1188179964,666307205
+.long 1546045734,773529912
+.long 1522805485,1294757372
+.long 2643833823,1396182291
+.long 2343527390,1695183700
+.long 1014477480,1986661051
+.long 1206759142,2177026350
+.long 344077627,2456956037
+.long 1290863460,2730485921
+.long 3158454273,2820302411
+.long 3505952657,3259730800
+.long 106217008,3345764771
+.long 3606008344,3516065817
+.long 1432725776,3600352804
+.long 1467031594,4094571909
+.long 851169720,275423344
+.long 3100823752,430227734
+.long 1363258195,506948616
+.long 3750685593,659060556
+.long 3785050280,883997877
+.long 3318307427,958139571
+.long 3812723403,1322822218
+.long 2003034995,1537002063
+.long 3602036899,1747873779
+.long 1575990012,1955562222
+.long 1125592928,2024104815
+.long 2716904306,2227730452
+.long 442776044,2361852424
+.long 593698344,2428436474
+.long 3733110249,2756734187
+.long 2999351573,3204031479
+.long 3815920427,3329325298
+.long 3928383900,3391569614
+.long 566280711,3515267271
+.long 3454069534,3940187606
+.long 4000239992,4118630271
+.long 1914138554,116418474
+.long 2731055270,174292421
+.long 3203993006,289380356
+.long 320620315,460393269
+.long 587496836,685471733
+.long 1086792851,852142971
+.long 365543100,1017036298
+.long 2618297676,1126000580
+.long 3409855158,1288033470
+.long 4234509866,1501505948
+.long 987167468,1607167915
+.long 1246189591,1816402316
+.long 67438087,66051
+.long 202182159,134810123
+.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97
+.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32
+.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
+.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
+.byte 62,0
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/macosx/sha512-ssse3-x86_64.s b/lib/accelerated/x86/macosx/sha512-ssse3-x86_64.s
new file mode 100644
index 0000000000..a845708030
--- /dev/null
+++ b/lib/accelerated/x86/macosx/sha512-ssse3-x86_64.s
@@ -0,0 +1,2881 @@
+# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain copyright notices,
+# this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+# * Neither the name of the Andy Polyakov nor the names of its
+# copyright holder and contributors may be used to endorse or
+# promote products derived from this software without specific
+# prior written permission.
+#
+# ALTERNATIVELY, provided that this notice is retained in full, this
+# product may be distributed under the terms of the GNU General Public
+# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+# those given above.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *** This file is auto-generated ***
+#
+.text
+
+
+.globl _sha256_block_data_order
+
+.p2align 4
+_sha256_block_data_order:
+ leaq __gnutls_x86_cpuid_s(%rip),%r11
+ movl 0(%r11),%r9d
+ movl 4(%r11),%r10d
+ movl 8(%r11),%r11d
+ testl $512,%r10d
+ jnz L$ssse3_shortcut
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ movq %rsp,%r11
+ shlq $4,%rdx
+ subq $64+32,%rsp
+ leaq (%rsi,%rdx,4),%rdx
+ andq $-64,%rsp
+ movq %rdi,64+0(%rsp)
+ movq %rsi,64+8(%rsp)
+ movq %rdx,64+16(%rsp)
+ movq %r11,64+24(%rsp)
+L$prologue:
+
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+ movl 16(%rdi),%r8d
+ movl 20(%rdi),%r9d
+ movl 24(%rdi),%r10d
+ movl 28(%rdi),%r11d
+ jmp L$loop
+
+.p2align 4
+L$loop:
+ movl %ebx,%edi
+ leaq K256(%rip),%rbp
+ xorl %ecx,%edi
+ movl 0(%rsi),%r12d
+ movl %r8d,%r13d
+ movl %eax,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+
+ xorl %r8d,%r13d
+ rorl $9,%r14d
+ xorl %r10d,%r15d
+
+ movl %r12d,0(%rsp)
+ xorl %eax,%r14d
+ andl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %r10d,%r15d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ addl %r15d,%r12d
+
+ movl %eax,%r15d
+ addl (%rbp),%r12d
+ xorl %eax,%r14d
+
+ xorl %ebx,%r15d
+ rorl $6,%r13d
+ movl %ebx,%r11d
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%r11d
+ addl %r12d,%edx
+ addl %r12d,%r11d
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%r11d
+ movl 4(%rsi),%r12d
+ movl %edx,%r13d
+ movl %r11d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r8d,%edi
+
+ xorl %edx,%r13d
+ rorl $9,%r14d
+ xorl %r9d,%edi
+
+ movl %r12d,4(%rsp)
+ xorl %r11d,%r14d
+ andl %edx,%edi
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r9d,%edi
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ addl %edi,%r12d
+
+ movl %r11d,%edi
+ addl (%rbp),%r12d
+ xorl %r11d,%r14d
+
+ xorl %eax,%edi
+ rorl $6,%r13d
+ movl %eax,%r10d
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%r10d
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%r10d
+ movl 8(%rsi),%r12d
+ movl %ecx,%r13d
+ movl %r10d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %edx,%r15d
+
+ xorl %ecx,%r13d
+ rorl $9,%r14d
+ xorl %r8d,%r15d
+
+ movl %r12d,8(%rsp)
+ xorl %r10d,%r14d
+ andl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r8d,%r15d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ addl %r15d,%r12d
+
+ movl %r10d,%r15d
+ addl (%rbp),%r12d
+ xorl %r10d,%r14d
+
+ xorl %r11d,%r15d
+ rorl $6,%r13d
+ movl %r11d,%r9d
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%r9d
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%r9d
+ movl 12(%rsi),%r12d
+ movl %ebx,%r13d
+ movl %r9d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ecx,%edi
+
+ xorl %ebx,%r13d
+ rorl $9,%r14d
+ xorl %edx,%edi
+
+ movl %r12d,12(%rsp)
+ xorl %r9d,%r14d
+ andl %ebx,%edi
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %edx,%edi
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ addl %edi,%r12d
+
+ movl %r9d,%edi
+ addl (%rbp),%r12d
+ xorl %r9d,%r14d
+
+ xorl %r10d,%edi
+ rorl $6,%r13d
+ movl %r10d,%r8d
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%r8d
+ addl %r12d,%eax
+ addl %r12d,%r8d
+
+ leaq 20(%rbp),%rbp
+ addl %r14d,%r8d
+ movl 16(%rsi),%r12d
+ movl %eax,%r13d
+ movl %r8d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+
+ xorl %eax,%r13d
+ rorl $9,%r14d
+ xorl %ecx,%r15d
+
+ movl %r12d,16(%rsp)
+ xorl %r8d,%r14d
+ andl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %ecx,%r15d
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ addl %r15d,%r12d
+
+ movl %r8d,%r15d
+ addl (%rbp),%r12d
+ xorl %r8d,%r14d
+
+ xorl %r9d,%r15d
+ rorl $6,%r13d
+ movl %r9d,%edx
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%edx
+ addl %r12d,%r11d
+ addl %r12d,%edx
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%edx
+ movl 20(%rsi),%r12d
+ movl %r11d,%r13d
+ movl %edx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %eax,%edi
+
+ xorl %r11d,%r13d
+ rorl $9,%r14d
+ xorl %ebx,%edi
+
+ movl %r12d,20(%rsp)
+ xorl %edx,%r14d
+ andl %r11d,%edi
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %ebx,%edi
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ addl %edi,%r12d
+
+ movl %edx,%edi
+ addl (%rbp),%r12d
+ xorl %edx,%r14d
+
+ xorl %r8d,%edi
+ rorl $6,%r13d
+ movl %r8d,%ecx
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%ecx
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%ecx
+ movl 24(%rsi),%r12d
+ movl %r10d,%r13d
+ movl %ecx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+
+ xorl %r10d,%r13d
+ rorl $9,%r14d
+ xorl %eax,%r15d
+
+ movl %r12d,24(%rsp)
+ xorl %ecx,%r14d
+ andl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %eax,%r15d
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ addl %r15d,%r12d
+
+ movl %ecx,%r15d
+ addl (%rbp),%r12d
+ xorl %ecx,%r14d
+
+ xorl %edx,%r15d
+ rorl $6,%r13d
+ movl %edx,%ebx
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%ebx
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%ebx
+ movl 28(%rsi),%r12d
+ movl %r9d,%r13d
+ movl %ebx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r10d,%edi
+
+ xorl %r9d,%r13d
+ rorl $9,%r14d
+ xorl %r11d,%edi
+
+ movl %r12d,28(%rsp)
+ xorl %ebx,%r14d
+ andl %r9d,%edi
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %r11d,%edi
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ addl %edi,%r12d
+
+ movl %ebx,%edi
+ addl (%rbp),%r12d
+ xorl %ebx,%r14d
+
+ xorl %ecx,%edi
+ rorl $6,%r13d
+ movl %ecx,%eax
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%eax
+ addl %r12d,%r8d
+ addl %r12d,%eax
+
+ leaq 20(%rbp),%rbp
+ addl %r14d,%eax
+ movl 32(%rsi),%r12d
+ movl %r8d,%r13d
+ movl %eax,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+
+ xorl %r8d,%r13d
+ rorl $9,%r14d
+ xorl %r10d,%r15d
+
+ movl %r12d,32(%rsp)
+ xorl %eax,%r14d
+ andl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %r10d,%r15d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ addl %r15d,%r12d
+
+ movl %eax,%r15d
+ addl (%rbp),%r12d
+ xorl %eax,%r14d
+
+ xorl %ebx,%r15d
+ rorl $6,%r13d
+ movl %ebx,%r11d
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%r11d
+ addl %r12d,%edx
+ addl %r12d,%r11d
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%r11d
+ movl 36(%rsi),%r12d
+ movl %edx,%r13d
+ movl %r11d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r8d,%edi
+
+ xorl %edx,%r13d
+ rorl $9,%r14d
+ xorl %r9d,%edi
+
+ movl %r12d,36(%rsp)
+ xorl %r11d,%r14d
+ andl %edx,%edi
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r9d,%edi
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ addl %edi,%r12d
+
+ movl %r11d,%edi
+ addl (%rbp),%r12d
+ xorl %r11d,%r14d
+
+ xorl %eax,%edi
+ rorl $6,%r13d
+ movl %eax,%r10d
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%r10d
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%r10d
+ movl 40(%rsi),%r12d
+ movl %ecx,%r13d
+ movl %r10d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %edx,%r15d
+
+ xorl %ecx,%r13d
+ rorl $9,%r14d
+ xorl %r8d,%r15d
+
+ movl %r12d,40(%rsp)
+ xorl %r10d,%r14d
+ andl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r8d,%r15d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ addl %r15d,%r12d
+
+ movl %r10d,%r15d
+ addl (%rbp),%r12d
+ xorl %r10d,%r14d
+
+ xorl %r11d,%r15d
+ rorl $6,%r13d
+ movl %r11d,%r9d
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%r9d
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%r9d
+ movl 44(%rsi),%r12d
+ movl %ebx,%r13d
+ movl %r9d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ecx,%edi
+
+ xorl %ebx,%r13d
+ rorl $9,%r14d
+ xorl %edx,%edi
+
+ movl %r12d,44(%rsp)
+ xorl %r9d,%r14d
+ andl %ebx,%edi
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %edx,%edi
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ addl %edi,%r12d
+
+ movl %r9d,%edi
+ addl (%rbp),%r12d
+ xorl %r9d,%r14d
+
+ xorl %r10d,%edi
+ rorl $6,%r13d
+ movl %r10d,%r8d
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%r8d
+ addl %r12d,%eax
+ addl %r12d,%r8d
+
+ leaq 20(%rbp),%rbp
+ addl %r14d,%r8d
+ movl 48(%rsi),%r12d
+ movl %eax,%r13d
+ movl %r8d,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+
+ xorl %eax,%r13d
+ rorl $9,%r14d
+ xorl %ecx,%r15d
+
+ movl %r12d,48(%rsp)
+ xorl %r8d,%r14d
+ andl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %ecx,%r15d
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ addl %r15d,%r12d
+
+ movl %r8d,%r15d
+ addl (%rbp),%r12d
+ xorl %r8d,%r14d
+
+ xorl %r9d,%r15d
+ rorl $6,%r13d
+ movl %r9d,%edx
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%edx
+ addl %r12d,%r11d
+ addl %r12d,%edx
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%edx
+ movl 52(%rsi),%r12d
+ movl %r11d,%r13d
+ movl %edx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %eax,%edi
+
+ xorl %r11d,%r13d
+ rorl $9,%r14d
+ xorl %ebx,%edi
+
+ movl %r12d,52(%rsp)
+ xorl %edx,%r14d
+ andl %r11d,%edi
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %ebx,%edi
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ addl %edi,%r12d
+
+ movl %edx,%edi
+ addl (%rbp),%r12d
+ xorl %edx,%r14d
+
+ xorl %r8d,%edi
+ rorl $6,%r13d
+ movl %r8d,%ecx
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%ecx
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%ecx
+ movl 56(%rsi),%r12d
+ movl %r10d,%r13d
+ movl %ecx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+
+ xorl %r10d,%r13d
+ rorl $9,%r14d
+ xorl %eax,%r15d
+
+ movl %r12d,56(%rsp)
+ xorl %ecx,%r14d
+ andl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %eax,%r15d
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ addl %r15d,%r12d
+
+ movl %ecx,%r15d
+ addl (%rbp),%r12d
+ xorl %ecx,%r14d
+
+ xorl %edx,%r15d
+ rorl $6,%r13d
+ movl %edx,%ebx
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%ebx
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+
+ leaq 4(%rbp),%rbp
+ addl %r14d,%ebx
+ movl 60(%rsi),%r12d
+ movl %r9d,%r13d
+ movl %ebx,%r14d
+ bswapl %r12d
+ rorl $14,%r13d
+ movl %r10d,%edi
+
+ xorl %r9d,%r13d
+ rorl $9,%r14d
+ xorl %r11d,%edi
+
+ movl %r12d,60(%rsp)
+ xorl %ebx,%r14d
+ andl %r9d,%edi
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %r11d,%edi
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ addl %edi,%r12d
+
+ movl %ebx,%edi
+ addl (%rbp),%r12d
+ xorl %ebx,%r14d
+
+ xorl %ecx,%edi
+ rorl $6,%r13d
+ movl %ecx,%eax
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%eax
+ addl %r12d,%r8d
+ addl %r12d,%eax
+
+ leaq 20(%rbp),%rbp
+ jmp L$rounds_16_xx
+.p2align 4
+L$rounds_16_xx:
+ movl 4(%rsp),%r13d
+ movl 56(%rsp),%r15d
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%eax
+ movl %r15d,%r14d
+ rorl $2,%r15d
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ xorl %r13d,%r12d
+ xorl %r14d,%r15d
+ addl 36(%rsp),%r12d
+
+ addl 0(%rsp),%r12d
+ movl %r8d,%r13d
+ addl %r15d,%r12d
+ movl %eax,%r14d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+
+ xorl %r8d,%r13d
+ rorl $9,%r14d
+ xorl %r10d,%r15d
+
+ movl %r12d,0(%rsp)
+ xorl %eax,%r14d
+ andl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %r10d,%r15d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ addl %r15d,%r12d
+
+ movl %eax,%r15d
+ addl (%rbp),%r12d
+ xorl %eax,%r14d
+
+ xorl %ebx,%r15d
+ rorl $6,%r13d
+ movl %ebx,%r11d
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%r11d
+ addl %r12d,%edx
+ addl %r12d,%r11d
+
+ leaq 4(%rbp),%rbp
+ movl 8(%rsp),%r13d
+ movl 60(%rsp),%edi
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%r11d
+ movl %edi,%r14d
+ rorl $2,%edi
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%edi
+ shrl $10,%r14d
+
+ rorl $17,%edi
+ xorl %r13d,%r12d
+ xorl %r14d,%edi
+ addl 40(%rsp),%r12d
+
+ addl 4(%rsp),%r12d
+ movl %edx,%r13d
+ addl %edi,%r12d
+ movl %r11d,%r14d
+ rorl $14,%r13d
+ movl %r8d,%edi
+
+ xorl %edx,%r13d
+ rorl $9,%r14d
+ xorl %r9d,%edi
+
+ movl %r12d,4(%rsp)
+ xorl %r11d,%r14d
+ andl %edx,%edi
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r9d,%edi
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ addl %edi,%r12d
+
+ movl %r11d,%edi
+ addl (%rbp),%r12d
+ xorl %r11d,%r14d
+
+ xorl %eax,%edi
+ rorl $6,%r13d
+ movl %eax,%r10d
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%r10d
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+
+ leaq 4(%rbp),%rbp
+ movl 12(%rsp),%r13d
+ movl 0(%rsp),%r15d
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%r10d
+ movl %r15d,%r14d
+ rorl $2,%r15d
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ xorl %r13d,%r12d
+ xorl %r14d,%r15d
+ addl 44(%rsp),%r12d
+
+ addl 8(%rsp),%r12d
+ movl %ecx,%r13d
+ addl %r15d,%r12d
+ movl %r10d,%r14d
+ rorl $14,%r13d
+ movl %edx,%r15d
+
+ xorl %ecx,%r13d
+ rorl $9,%r14d
+ xorl %r8d,%r15d
+
+ movl %r12d,8(%rsp)
+ xorl %r10d,%r14d
+ andl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r8d,%r15d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ addl %r15d,%r12d
+
+ movl %r10d,%r15d
+ addl (%rbp),%r12d
+ xorl %r10d,%r14d
+
+ xorl %r11d,%r15d
+ rorl $6,%r13d
+ movl %r11d,%r9d
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%r9d
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+
+ leaq 4(%rbp),%rbp
+ movl 16(%rsp),%r13d
+ movl 4(%rsp),%edi
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%r9d
+ movl %edi,%r14d
+ rorl $2,%edi
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%edi
+ shrl $10,%r14d
+
+ rorl $17,%edi
+ xorl %r13d,%r12d
+ xorl %r14d,%edi
+ addl 48(%rsp),%r12d
+
+ addl 12(%rsp),%r12d
+ movl %ebx,%r13d
+ addl %edi,%r12d
+ movl %r9d,%r14d
+ rorl $14,%r13d
+ movl %ecx,%edi
+
+ xorl %ebx,%r13d
+ rorl $9,%r14d
+ xorl %edx,%edi
+
+ movl %r12d,12(%rsp)
+ xorl %r9d,%r14d
+ andl %ebx,%edi
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %edx,%edi
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ addl %edi,%r12d
+
+ movl %r9d,%edi
+ addl (%rbp),%r12d
+ xorl %r9d,%r14d
+
+ xorl %r10d,%edi
+ rorl $6,%r13d
+ movl %r10d,%r8d
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%r8d
+ addl %r12d,%eax
+ addl %r12d,%r8d
+
+ leaq 20(%rbp),%rbp
+ movl 20(%rsp),%r13d
+ movl 8(%rsp),%r15d
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%r8d
+ movl %r15d,%r14d
+ rorl $2,%r15d
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ xorl %r13d,%r12d
+ xorl %r14d,%r15d
+ addl 52(%rsp),%r12d
+
+ addl 16(%rsp),%r12d
+ movl %eax,%r13d
+ addl %r15d,%r12d
+ movl %r8d,%r14d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+
+ xorl %eax,%r13d
+ rorl $9,%r14d
+ xorl %ecx,%r15d
+
+ movl %r12d,16(%rsp)
+ xorl %r8d,%r14d
+ andl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %ecx,%r15d
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ addl %r15d,%r12d
+
+ movl %r8d,%r15d
+ addl (%rbp),%r12d
+ xorl %r8d,%r14d
+
+ xorl %r9d,%r15d
+ rorl $6,%r13d
+ movl %r9d,%edx
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%edx
+ addl %r12d,%r11d
+ addl %r12d,%edx
+
+ leaq 4(%rbp),%rbp
+ movl 24(%rsp),%r13d
+ movl 12(%rsp),%edi
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%edx
+ movl %edi,%r14d
+ rorl $2,%edi
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%edi
+ shrl $10,%r14d
+
+ rorl $17,%edi
+ xorl %r13d,%r12d
+ xorl %r14d,%edi
+ addl 56(%rsp),%r12d
+
+ addl 20(%rsp),%r12d
+ movl %r11d,%r13d
+ addl %edi,%r12d
+ movl %edx,%r14d
+ rorl $14,%r13d
+ movl %eax,%edi
+
+ xorl %r11d,%r13d
+ rorl $9,%r14d
+ xorl %ebx,%edi
+
+ movl %r12d,20(%rsp)
+ xorl %edx,%r14d
+ andl %r11d,%edi
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %ebx,%edi
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ addl %edi,%r12d
+
+ movl %edx,%edi
+ addl (%rbp),%r12d
+ xorl %edx,%r14d
+
+ xorl %r8d,%edi
+ rorl $6,%r13d
+ movl %r8d,%ecx
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%ecx
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+
+ leaq 4(%rbp),%rbp
+ movl 28(%rsp),%r13d
+ movl 16(%rsp),%r15d
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%ecx
+ movl %r15d,%r14d
+ rorl $2,%r15d
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ xorl %r13d,%r12d
+ xorl %r14d,%r15d
+ addl 60(%rsp),%r12d
+
+ addl 24(%rsp),%r12d
+ movl %r10d,%r13d
+ addl %r15d,%r12d
+ movl %ecx,%r14d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+
+ xorl %r10d,%r13d
+ rorl $9,%r14d
+ xorl %eax,%r15d
+
+ movl %r12d,24(%rsp)
+ xorl %ecx,%r14d
+ andl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %eax,%r15d
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ addl %r15d,%r12d
+
+ movl %ecx,%r15d
+ addl (%rbp),%r12d
+ xorl %ecx,%r14d
+
+ xorl %edx,%r15d
+ rorl $6,%r13d
+ movl %edx,%ebx
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%ebx
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+
+ leaq 4(%rbp),%rbp
+ movl 32(%rsp),%r13d
+ movl 20(%rsp),%edi
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%ebx
+ movl %edi,%r14d
+ rorl $2,%edi
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%edi
+ shrl $10,%r14d
+
+ rorl $17,%edi
+ xorl %r13d,%r12d
+ xorl %r14d,%edi
+ addl 0(%rsp),%r12d
+
+ addl 28(%rsp),%r12d
+ movl %r9d,%r13d
+ addl %edi,%r12d
+ movl %ebx,%r14d
+ rorl $14,%r13d
+ movl %r10d,%edi
+
+ xorl %r9d,%r13d
+ rorl $9,%r14d
+ xorl %r11d,%edi
+
+ movl %r12d,28(%rsp)
+ xorl %ebx,%r14d
+ andl %r9d,%edi
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %r11d,%edi
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ addl %edi,%r12d
+
+ movl %ebx,%edi
+ addl (%rbp),%r12d
+ xorl %ebx,%r14d
+
+ xorl %ecx,%edi
+ rorl $6,%r13d
+ movl %ecx,%eax
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%eax
+ addl %r12d,%r8d
+ addl %r12d,%eax
+
+ leaq 20(%rbp),%rbp
+ movl 36(%rsp),%r13d
+ movl 24(%rsp),%r15d
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%eax
+ movl %r15d,%r14d
+ rorl $2,%r15d
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ xorl %r13d,%r12d
+ xorl %r14d,%r15d
+ addl 4(%rsp),%r12d
+
+ addl 32(%rsp),%r12d
+ movl %r8d,%r13d
+ addl %r15d,%r12d
+ movl %eax,%r14d
+ rorl $14,%r13d
+ movl %r9d,%r15d
+
+ xorl %r8d,%r13d
+ rorl $9,%r14d
+ xorl %r10d,%r15d
+
+ movl %r12d,32(%rsp)
+ xorl %eax,%r14d
+ andl %r8d,%r15d
+
+ rorl $5,%r13d
+ addl %r11d,%r12d
+ xorl %r10d,%r15d
+
+ rorl $11,%r14d
+ xorl %r8d,%r13d
+ addl %r15d,%r12d
+
+ movl %eax,%r15d
+ addl (%rbp),%r12d
+ xorl %eax,%r14d
+
+ xorl %ebx,%r15d
+ rorl $6,%r13d
+ movl %ebx,%r11d
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%r11d
+ addl %r12d,%edx
+ addl %r12d,%r11d
+
+ leaq 4(%rbp),%rbp
+ movl 40(%rsp),%r13d
+ movl 28(%rsp),%edi
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%r11d
+ movl %edi,%r14d
+ rorl $2,%edi
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%edi
+ shrl $10,%r14d
+
+ rorl $17,%edi
+ xorl %r13d,%r12d
+ xorl %r14d,%edi
+ addl 8(%rsp),%r12d
+
+ addl 36(%rsp),%r12d
+ movl %edx,%r13d
+ addl %edi,%r12d
+ movl %r11d,%r14d
+ rorl $14,%r13d
+ movl %r8d,%edi
+
+ xorl %edx,%r13d
+ rorl $9,%r14d
+ xorl %r9d,%edi
+
+ movl %r12d,36(%rsp)
+ xorl %r11d,%r14d
+ andl %edx,%edi
+
+ rorl $5,%r13d
+ addl %r10d,%r12d
+ xorl %r9d,%edi
+
+ rorl $11,%r14d
+ xorl %edx,%r13d
+ addl %edi,%r12d
+
+ movl %r11d,%edi
+ addl (%rbp),%r12d
+ xorl %r11d,%r14d
+
+ xorl %eax,%edi
+ rorl $6,%r13d
+ movl %eax,%r10d
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%r10d
+ addl %r12d,%ecx
+ addl %r12d,%r10d
+
+ leaq 4(%rbp),%rbp
+ movl 44(%rsp),%r13d
+ movl 32(%rsp),%r15d
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%r10d
+ movl %r15d,%r14d
+ rorl $2,%r15d
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ xorl %r13d,%r12d
+ xorl %r14d,%r15d
+ addl 12(%rsp),%r12d
+
+ addl 40(%rsp),%r12d
+ movl %ecx,%r13d
+ addl %r15d,%r12d
+ movl %r10d,%r14d
+ rorl $14,%r13d
+ movl %edx,%r15d
+
+ xorl %ecx,%r13d
+ rorl $9,%r14d
+ xorl %r8d,%r15d
+
+ movl %r12d,40(%rsp)
+ xorl %r10d,%r14d
+ andl %ecx,%r15d
+
+ rorl $5,%r13d
+ addl %r9d,%r12d
+ xorl %r8d,%r15d
+
+ rorl $11,%r14d
+ xorl %ecx,%r13d
+ addl %r15d,%r12d
+
+ movl %r10d,%r15d
+ addl (%rbp),%r12d
+ xorl %r10d,%r14d
+
+ xorl %r11d,%r15d
+ rorl $6,%r13d
+ movl %r11d,%r9d
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%r9d
+ addl %r12d,%ebx
+ addl %r12d,%r9d
+
+ leaq 4(%rbp),%rbp
+ movl 48(%rsp),%r13d
+ movl 36(%rsp),%edi
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%r9d
+ movl %edi,%r14d
+ rorl $2,%edi
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%edi
+ shrl $10,%r14d
+
+ rorl $17,%edi
+ xorl %r13d,%r12d
+ xorl %r14d,%edi
+ addl 16(%rsp),%r12d
+
+ addl 44(%rsp),%r12d
+ movl %ebx,%r13d
+ addl %edi,%r12d
+ movl %r9d,%r14d
+ rorl $14,%r13d
+ movl %ecx,%edi
+
+ xorl %ebx,%r13d
+ rorl $9,%r14d
+ xorl %edx,%edi
+
+ movl %r12d,44(%rsp)
+ xorl %r9d,%r14d
+ andl %ebx,%edi
+
+ rorl $5,%r13d
+ addl %r8d,%r12d
+ xorl %edx,%edi
+
+ rorl $11,%r14d
+ xorl %ebx,%r13d
+ addl %edi,%r12d
+
+ movl %r9d,%edi
+ addl (%rbp),%r12d
+ xorl %r9d,%r14d
+
+ xorl %r10d,%edi
+ rorl $6,%r13d
+ movl %r10d,%r8d
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%r8d
+ addl %r12d,%eax
+ addl %r12d,%r8d
+
+ leaq 20(%rbp),%rbp
+ movl 52(%rsp),%r13d
+ movl 40(%rsp),%r15d
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%r8d
+ movl %r15d,%r14d
+ rorl $2,%r15d
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ xorl %r13d,%r12d
+ xorl %r14d,%r15d
+ addl 20(%rsp),%r12d
+
+ addl 48(%rsp),%r12d
+ movl %eax,%r13d
+ addl %r15d,%r12d
+ movl %r8d,%r14d
+ rorl $14,%r13d
+ movl %ebx,%r15d
+
+ xorl %eax,%r13d
+ rorl $9,%r14d
+ xorl %ecx,%r15d
+
+ movl %r12d,48(%rsp)
+ xorl %r8d,%r14d
+ andl %eax,%r15d
+
+ rorl $5,%r13d
+ addl %edx,%r12d
+ xorl %ecx,%r15d
+
+ rorl $11,%r14d
+ xorl %eax,%r13d
+ addl %r15d,%r12d
+
+ movl %r8d,%r15d
+ addl (%rbp),%r12d
+ xorl %r8d,%r14d
+
+ xorl %r9d,%r15d
+ rorl $6,%r13d
+ movl %r9d,%edx
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%edx
+ addl %r12d,%r11d
+ addl %r12d,%edx
+
+ leaq 4(%rbp),%rbp
+ movl 56(%rsp),%r13d
+ movl 44(%rsp),%edi
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%edx
+ movl %edi,%r14d
+ rorl $2,%edi
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%edi
+ shrl $10,%r14d
+
+ rorl $17,%edi
+ xorl %r13d,%r12d
+ xorl %r14d,%edi
+ addl 24(%rsp),%r12d
+
+ addl 52(%rsp),%r12d
+ movl %r11d,%r13d
+ addl %edi,%r12d
+ movl %edx,%r14d
+ rorl $14,%r13d
+ movl %eax,%edi
+
+ xorl %r11d,%r13d
+ rorl $9,%r14d
+ xorl %ebx,%edi
+
+ movl %r12d,52(%rsp)
+ xorl %edx,%r14d
+ andl %r11d,%edi
+
+ rorl $5,%r13d
+ addl %ecx,%r12d
+ xorl %ebx,%edi
+
+ rorl $11,%r14d
+ xorl %r11d,%r13d
+ addl %edi,%r12d
+
+ movl %edx,%edi
+ addl (%rbp),%r12d
+ xorl %edx,%r14d
+
+ xorl %r8d,%edi
+ rorl $6,%r13d
+ movl %r8d,%ecx
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%ecx
+ addl %r12d,%r10d
+ addl %r12d,%ecx
+
+ leaq 4(%rbp),%rbp
+ movl 60(%rsp),%r13d
+ movl 48(%rsp),%r15d
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%ecx
+ movl %r15d,%r14d
+ rorl $2,%r15d
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%r15d
+ shrl $10,%r14d
+
+ rorl $17,%r15d
+ xorl %r13d,%r12d
+ xorl %r14d,%r15d
+ addl 28(%rsp),%r12d
+
+ addl 56(%rsp),%r12d
+ movl %r10d,%r13d
+ addl %r15d,%r12d
+ movl %ecx,%r14d
+ rorl $14,%r13d
+ movl %r11d,%r15d
+
+ xorl %r10d,%r13d
+ rorl $9,%r14d
+ xorl %eax,%r15d
+
+ movl %r12d,56(%rsp)
+ xorl %ecx,%r14d
+ andl %r10d,%r15d
+
+ rorl $5,%r13d
+ addl %ebx,%r12d
+ xorl %eax,%r15d
+
+ rorl $11,%r14d
+ xorl %r10d,%r13d
+ addl %r15d,%r12d
+
+ movl %ecx,%r15d
+ addl (%rbp),%r12d
+ xorl %ecx,%r14d
+
+ xorl %edx,%r15d
+ rorl $6,%r13d
+ movl %edx,%ebx
+
+ andl %r15d,%edi
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %edi,%ebx
+ addl %r12d,%r9d
+ addl %r12d,%ebx
+
+ leaq 4(%rbp),%rbp
+ movl 0(%rsp),%r13d
+ movl 52(%rsp),%edi
+
+ movl %r13d,%r12d
+ rorl $11,%r13d
+ addl %r14d,%ebx
+ movl %edi,%r14d
+ rorl $2,%edi
+
+ xorl %r12d,%r13d
+ shrl $3,%r12d
+ rorl $7,%r13d
+ xorl %r14d,%edi
+ shrl $10,%r14d
+
+ rorl $17,%edi
+ xorl %r13d,%r12d
+ xorl %r14d,%edi
+ addl 32(%rsp),%r12d
+
+ addl 60(%rsp),%r12d
+ movl %r9d,%r13d
+ addl %edi,%r12d
+ movl %ebx,%r14d
+ rorl $14,%r13d
+ movl %r10d,%edi
+
+ xorl %r9d,%r13d
+ rorl $9,%r14d
+ xorl %r11d,%edi
+
+ movl %r12d,60(%rsp)
+ xorl %ebx,%r14d
+ andl %r9d,%edi
+
+ rorl $5,%r13d
+ addl %eax,%r12d
+ xorl %r11d,%edi
+
+ rorl $11,%r14d
+ xorl %r9d,%r13d
+ addl %edi,%r12d
+
+ movl %ebx,%edi
+ addl (%rbp),%r12d
+ xorl %ebx,%r14d
+
+ xorl %ecx,%edi
+ rorl $6,%r13d
+ movl %ecx,%eax
+
+ andl %edi,%r15d
+ rorl $2,%r14d
+ addl %r13d,%r12d
+
+ xorl %r15d,%eax
+ addl %r12d,%r8d
+ addl %r12d,%eax
+
+ leaq 20(%rbp),%rbp
+ cmpb $0,3(%rbp)
+ jnz L$rounds_16_xx
+
+ movq 64+0(%rsp),%rdi
+ addl %r14d,%eax
+ leaq 64(%rsi),%rsi
+
+ addl 0(%rdi),%eax
+ addl 4(%rdi),%ebx
+ addl 8(%rdi),%ecx
+ addl 12(%rdi),%edx
+ addl 16(%rdi),%r8d
+ addl 20(%rdi),%r9d
+ addl 24(%rdi),%r10d
+ addl 28(%rdi),%r11d
+
+ cmpq 64+16(%rsp),%rsi
+
+ movl %eax,0(%rdi)
+ movl %ebx,4(%rdi)
+ movl %ecx,8(%rdi)
+ movl %edx,12(%rdi)
+ movl %r8d,16(%rdi)
+ movl %r9d,20(%rdi)
+ movl %r10d,24(%rdi)
+ movl %r11d,28(%rdi)
+ jb L$loop
+
+ movq 64+24(%rsp),%rsi
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+L$epilogue:
+ .byte 0xf3,0xc3
+
+.p2align 6
+
+K256:
+.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff
+.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff
+.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908
+.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908
+.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+
+.p2align 6
+sha256_block_data_order_ssse3:
+L$ssse3_shortcut:
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ movq %rsp,%r11
+ shlq $4,%rdx
+ subq $96,%rsp
+ leaq (%rsi,%rdx,4),%rdx
+ andq $-64,%rsp
+ movq %rdi,64+0(%rsp)
+ movq %rsi,64+8(%rsp)
+ movq %rdx,64+16(%rsp)
+ movq %r11,64+24(%rsp)
+L$prologue_ssse3:
+
+ movl 0(%rdi),%eax
+ movl 4(%rdi),%ebx
+ movl 8(%rdi),%ecx
+ movl 12(%rdi),%edx
+ movl 16(%rdi),%r8d
+ movl 20(%rdi),%r9d
+ movl 24(%rdi),%r10d
+ movl 28(%rdi),%r11d
+
+
+ jmp L$loop_ssse3
+.p2align 4
+L$loop_ssse3:
+ movdqa K256+512(%rip),%xmm7
+ movdqu 0(%rsi),%xmm0
+ movdqu 16(%rsi),%xmm1
+ movdqu 32(%rsi),%xmm2
+ movdqu 48(%rsi),%xmm3
+.byte 102,15,56,0,199
+ leaq K256(%rip),%rbp
+.byte 102,15,56,0,207
+ movdqa 0(%rbp),%xmm4
+.byte 102,15,56,0,215
+ movdqa 32(%rbp),%xmm5
+ paddd %xmm0,%xmm4
+ movdqa 64(%rbp),%xmm6
+.byte 102,15,56,0,223
+ movdqa 96(%rbp),%xmm7
+ paddd %xmm1,%xmm5
+ paddd %xmm2,%xmm6
+ paddd %xmm3,%xmm7
+ movdqa %xmm4,0(%rsp)
+ movl %eax,%r14d
+ movdqa %xmm5,16(%rsp)
+ movl %ebx,%edi
+ movdqa %xmm6,32(%rsp)
+ xorl %ecx,%edi
+ movdqa %xmm7,48(%rsp)
+ movl %r8d,%r13d
+ jmp L$ssse3_00_47
+
+.p2align 4
+L$ssse3_00_47:
+ subq $-32*4,%rbp
+ rorl $14,%r13d
+ movdqa %xmm1,%xmm4
+ movl %r14d,%eax
+ movl %r9d,%r12d
+ movdqa %xmm3,%xmm7
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r12d
+ rorl $5,%r13d
+ xorl %eax,%r14d
+.byte 102,15,58,15,224,4
+ andl %r8d,%r12d
+ xorl %r8d,%r13d
+.byte 102,15,58,15,250,4
+ addl 0(%rsp),%r11d
+ movl %eax,%r15d
+ xorl %r10d,%r12d
+ rorl $11,%r14d
+ movdqa %xmm4,%xmm5
+ xorl %ebx,%r15d
+ addl %r12d,%r11d
+ movdqa %xmm4,%xmm6
+ rorl $6,%r13d
+ andl %r15d,%edi
+ psrld $3,%xmm4
+ xorl %eax,%r14d
+ addl %r13d,%r11d
+ xorl %ebx,%edi
+ paddd %xmm7,%xmm0
+ rorl $2,%r14d
+ addl %r11d,%edx
+ psrld $7,%xmm6
+ addl %edi,%r11d
+ movl %edx,%r13d
+ pshufd $250,%xmm3,%xmm7
+ addl %r11d,%r14d
+ rorl $14,%r13d
+ pslld $14,%xmm5
+ movl %r14d,%r11d
+ movl %r8d,%r12d
+ pxor %xmm6,%xmm4
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r12d
+ rorl $5,%r13d
+ psrld $11,%xmm6
+ xorl %r11d,%r14d
+ pxor %xmm5,%xmm4
+ andl %edx,%r12d
+ xorl %edx,%r13d
+ pslld $11,%xmm5
+ addl 4(%rsp),%r10d
+ movl %r11d,%edi
+ pxor %xmm6,%xmm4
+ xorl %r9d,%r12d
+ rorl $11,%r14d
+ movdqa %xmm7,%xmm6
+ xorl %eax,%edi
+ addl %r12d,%r10d
+ pxor %xmm5,%xmm4
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %r11d,%r14d
+ psrld $10,%xmm7
+ addl %r13d,%r10d
+ xorl %eax,%r15d
+ paddd %xmm4,%xmm0
+ rorl $2,%r14d
+ addl %r10d,%ecx
+ psrlq $17,%xmm6
+ addl %r15d,%r10d
+ movl %ecx,%r13d
+ addl %r10d,%r14d
+ pxor %xmm6,%xmm7
+ rorl $14,%r13d
+ movl %r14d,%r10d
+ movl %edx,%r12d
+ rorl $9,%r14d
+ psrlq $2,%xmm6
+ xorl %ecx,%r13d
+ xorl %r8d,%r12d
+ pxor %xmm6,%xmm7
+ rorl $5,%r13d
+ xorl %r10d,%r14d
+ andl %ecx,%r12d
+ pshufd $128,%xmm7,%xmm7
+ xorl %ecx,%r13d
+ addl 8(%rsp),%r9d
+ movl %r10d,%r15d
+ psrldq $8,%xmm7
+ xorl %r8d,%r12d
+ rorl $11,%r14d
+ xorl %r11d,%r15d
+ addl %r12d,%r9d
+ rorl $6,%r13d
+ paddd %xmm7,%xmm0
+ andl %r15d,%edi
+ xorl %r10d,%r14d
+ addl %r13d,%r9d
+ pshufd $80,%xmm0,%xmm7
+ xorl %r11d,%edi
+ rorl $2,%r14d
+ addl %r9d,%ebx
+ movdqa %xmm7,%xmm6
+ addl %edi,%r9d
+ movl %ebx,%r13d
+ psrld $10,%xmm7
+ addl %r9d,%r14d
+ rorl $14,%r13d
+ psrlq $17,%xmm6
+ movl %r14d,%r9d
+ movl %ecx,%r12d
+ pxor %xmm6,%xmm7
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r12d
+ rorl $5,%r13d
+ xorl %r9d,%r14d
+ psrlq $2,%xmm6
+ andl %ebx,%r12d
+ xorl %ebx,%r13d
+ addl 12(%rsp),%r8d
+ pxor %xmm6,%xmm7
+ movl %r9d,%edi
+ xorl %edx,%r12d
+ rorl $11,%r14d
+ pshufd $8,%xmm7,%xmm7
+ xorl %r10d,%edi
+ addl %r12d,%r8d
+ movdqa 0(%rbp),%xmm6
+ rorl $6,%r13d
+ andl %edi,%r15d
+ pslldq $8,%xmm7
+ xorl %r9d,%r14d
+ addl %r13d,%r8d
+ xorl %r10d,%r15d
+ paddd %xmm7,%xmm0
+ rorl $2,%r14d
+ addl %r8d,%eax
+ addl %r15d,%r8d
+ paddd %xmm0,%xmm6
+ movl %eax,%r13d
+ addl %r8d,%r14d
+ movdqa %xmm6,0(%rsp)
+ rorl $14,%r13d
+ movdqa %xmm2,%xmm4
+ movl %r14d,%r8d
+ movl %ebx,%r12d
+ movdqa %xmm0,%xmm7
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r12d
+ rorl $5,%r13d
+ xorl %r8d,%r14d
+.byte 102,15,58,15,225,4
+ andl %eax,%r12d
+ xorl %eax,%r13d
+.byte 102,15,58,15,251,4
+ addl 16(%rsp),%edx
+ movl %r8d,%r15d
+ xorl %ecx,%r12d
+ rorl $11,%r14d
+ movdqa %xmm4,%xmm5
+ xorl %r9d,%r15d
+ addl %r12d,%edx
+ movdqa %xmm4,%xmm6
+ rorl $6,%r13d
+ andl %r15d,%edi
+ psrld $3,%xmm4
+ xorl %r8d,%r14d
+ addl %r13d,%edx
+ xorl %r9d,%edi
+ paddd %xmm7,%xmm1
+ rorl $2,%r14d
+ addl %edx,%r11d
+ psrld $7,%xmm6
+ addl %edi,%edx
+ movl %r11d,%r13d
+ pshufd $250,%xmm0,%xmm7
+ addl %edx,%r14d
+ rorl $14,%r13d
+ pslld $14,%xmm5
+ movl %r14d,%edx
+ movl %eax,%r12d
+ pxor %xmm6,%xmm4
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r12d
+ rorl $5,%r13d
+ psrld $11,%xmm6
+ xorl %edx,%r14d
+ pxor %xmm5,%xmm4
+ andl %r11d,%r12d
+ xorl %r11d,%r13d
+ pslld $11,%xmm5
+ addl 20(%rsp),%ecx
+ movl %edx,%edi
+ pxor %xmm6,%xmm4
+ xorl %ebx,%r12d
+ rorl $11,%r14d
+ movdqa %xmm7,%xmm6
+ xorl %r8d,%edi
+ addl %r12d,%ecx
+ pxor %xmm5,%xmm4
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %edx,%r14d
+ psrld $10,%xmm7
+ addl %r13d,%ecx
+ xorl %r8d,%r15d
+ paddd %xmm4,%xmm1
+ rorl $2,%r14d
+ addl %ecx,%r10d
+ psrlq $17,%xmm6
+ addl %r15d,%ecx
+ movl %r10d,%r13d
+ addl %ecx,%r14d
+ pxor %xmm6,%xmm7
+ rorl $14,%r13d
+ movl %r14d,%ecx
+ movl %r11d,%r12d
+ rorl $9,%r14d
+ psrlq $2,%xmm6
+ xorl %r10d,%r13d
+ xorl %eax,%r12d
+ pxor %xmm6,%xmm7
+ rorl $5,%r13d
+ xorl %ecx,%r14d
+ andl %r10d,%r12d
+ pshufd $128,%xmm7,%xmm7
+ xorl %r10d,%r13d
+ addl 24(%rsp),%ebx
+ movl %ecx,%r15d
+ psrldq $8,%xmm7
+ xorl %eax,%r12d
+ rorl $11,%r14d
+ xorl %edx,%r15d
+ addl %r12d,%ebx
+ rorl $6,%r13d
+ paddd %xmm7,%xmm1
+ andl %r15d,%edi
+ xorl %ecx,%r14d
+ addl %r13d,%ebx
+ pshufd $80,%xmm1,%xmm7
+ xorl %edx,%edi
+ rorl $2,%r14d
+ addl %ebx,%r9d
+ movdqa %xmm7,%xmm6
+ addl %edi,%ebx
+ movl %r9d,%r13d
+ psrld $10,%xmm7
+ addl %ebx,%r14d
+ rorl $14,%r13d
+ psrlq $17,%xmm6
+ movl %r14d,%ebx
+ movl %r10d,%r12d
+ pxor %xmm6,%xmm7
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r12d
+ rorl $5,%r13d
+ xorl %ebx,%r14d
+ psrlq $2,%xmm6
+ andl %r9d,%r12d
+ xorl %r9d,%r13d
+ addl 28(%rsp),%eax
+ pxor %xmm6,%xmm7
+ movl %ebx,%edi
+ xorl %r11d,%r12d
+ rorl $11,%r14d
+ pshufd $8,%xmm7,%xmm7
+ xorl %ecx,%edi
+ addl %r12d,%eax
+ movdqa 32(%rbp),%xmm6
+ rorl $6,%r13d
+ andl %edi,%r15d
+ pslldq $8,%xmm7
+ xorl %ebx,%r14d
+ addl %r13d,%eax
+ xorl %ecx,%r15d
+ paddd %xmm7,%xmm1
+ rorl $2,%r14d
+ addl %eax,%r8d
+ addl %r15d,%eax
+ paddd %xmm1,%xmm6
+ movl %r8d,%r13d
+ addl %eax,%r14d
+ movdqa %xmm6,16(%rsp)
+ rorl $14,%r13d
+ movdqa %xmm3,%xmm4
+ movl %r14d,%eax
+ movl %r9d,%r12d
+ movdqa %xmm1,%xmm7
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r12d
+ rorl $5,%r13d
+ xorl %eax,%r14d
+.byte 102,15,58,15,226,4
+ andl %r8d,%r12d
+ xorl %r8d,%r13d
+.byte 102,15,58,15,248,4
+ addl 32(%rsp),%r11d
+ movl %eax,%r15d
+ xorl %r10d,%r12d
+ rorl $11,%r14d
+ movdqa %xmm4,%xmm5
+ xorl %ebx,%r15d
+ addl %r12d,%r11d
+ movdqa %xmm4,%xmm6
+ rorl $6,%r13d
+ andl %r15d,%edi
+ psrld $3,%xmm4
+ xorl %eax,%r14d
+ addl %r13d,%r11d
+ xorl %ebx,%edi
+ paddd %xmm7,%xmm2
+ rorl $2,%r14d
+ addl %r11d,%edx
+ psrld $7,%xmm6
+ addl %edi,%r11d
+ movl %edx,%r13d
+ pshufd $250,%xmm1,%xmm7
+ addl %r11d,%r14d
+ rorl $14,%r13d
+ pslld $14,%xmm5
+ movl %r14d,%r11d
+ movl %r8d,%r12d
+ pxor %xmm6,%xmm4
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r12d
+ rorl $5,%r13d
+ psrld $11,%xmm6
+ xorl %r11d,%r14d
+ pxor %xmm5,%xmm4
+ andl %edx,%r12d
+ xorl %edx,%r13d
+ pslld $11,%xmm5
+ addl 36(%rsp),%r10d
+ movl %r11d,%edi
+ pxor %xmm6,%xmm4
+ xorl %r9d,%r12d
+ rorl $11,%r14d
+ movdqa %xmm7,%xmm6
+ xorl %eax,%edi
+ addl %r12d,%r10d
+ pxor %xmm5,%xmm4
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %r11d,%r14d
+ psrld $10,%xmm7
+ addl %r13d,%r10d
+ xorl %eax,%r15d
+ paddd %xmm4,%xmm2
+ rorl $2,%r14d
+ addl %r10d,%ecx
+ psrlq $17,%xmm6
+ addl %r15d,%r10d
+ movl %ecx,%r13d
+ addl %r10d,%r14d
+ pxor %xmm6,%xmm7
+ rorl $14,%r13d
+ movl %r14d,%r10d
+ movl %edx,%r12d
+ rorl $9,%r14d
+ psrlq $2,%xmm6
+ xorl %ecx,%r13d
+ xorl %r8d,%r12d
+ pxor %xmm6,%xmm7
+ rorl $5,%r13d
+ xorl %r10d,%r14d
+ andl %ecx,%r12d
+ pshufd $128,%xmm7,%xmm7
+ xorl %ecx,%r13d
+ addl 40(%rsp),%r9d
+ movl %r10d,%r15d
+ psrldq $8,%xmm7
+ xorl %r8d,%r12d
+ rorl $11,%r14d
+ xorl %r11d,%r15d
+ addl %r12d,%r9d
+ rorl $6,%r13d
+ paddd %xmm7,%xmm2
+ andl %r15d,%edi
+ xorl %r10d,%r14d
+ addl %r13d,%r9d
+ pshufd $80,%xmm2,%xmm7
+ xorl %r11d,%edi
+ rorl $2,%r14d
+ addl %r9d,%ebx
+ movdqa %xmm7,%xmm6
+ addl %edi,%r9d
+ movl %ebx,%r13d
+ psrld $10,%xmm7
+ addl %r9d,%r14d
+ rorl $14,%r13d
+ psrlq $17,%xmm6
+ movl %r14d,%r9d
+ movl %ecx,%r12d
+ pxor %xmm6,%xmm7
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r12d
+ rorl $5,%r13d
+ xorl %r9d,%r14d
+ psrlq $2,%xmm6
+ andl %ebx,%r12d
+ xorl %ebx,%r13d
+ addl 44(%rsp),%r8d
+ pxor %xmm6,%xmm7
+ movl %r9d,%edi
+ xorl %edx,%r12d
+ rorl $11,%r14d
+ pshufd $8,%xmm7,%xmm7
+ xorl %r10d,%edi
+ addl %r12d,%r8d
+ movdqa 64(%rbp),%xmm6
+ rorl $6,%r13d
+ andl %edi,%r15d
+ pslldq $8,%xmm7
+ xorl %r9d,%r14d
+ addl %r13d,%r8d
+ xorl %r10d,%r15d
+ paddd %xmm7,%xmm2
+ rorl $2,%r14d
+ addl %r8d,%eax
+ addl %r15d,%r8d
+ paddd %xmm2,%xmm6
+ movl %eax,%r13d
+ addl %r8d,%r14d
+ movdqa %xmm6,32(%rsp)
+ rorl $14,%r13d
+ movdqa %xmm0,%xmm4
+ movl %r14d,%r8d
+ movl %ebx,%r12d
+ movdqa %xmm2,%xmm7
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r12d
+ rorl $5,%r13d
+ xorl %r8d,%r14d
+.byte 102,15,58,15,227,4
+ andl %eax,%r12d
+ xorl %eax,%r13d
+.byte 102,15,58,15,249,4
+ addl 48(%rsp),%edx
+ movl %r8d,%r15d
+ xorl %ecx,%r12d
+ rorl $11,%r14d
+ movdqa %xmm4,%xmm5
+ xorl %r9d,%r15d
+ addl %r12d,%edx
+ movdqa %xmm4,%xmm6
+ rorl $6,%r13d
+ andl %r15d,%edi
+ psrld $3,%xmm4
+ xorl %r8d,%r14d
+ addl %r13d,%edx
+ xorl %r9d,%edi
+ paddd %xmm7,%xmm3
+ rorl $2,%r14d
+ addl %edx,%r11d
+ psrld $7,%xmm6
+ addl %edi,%edx
+ movl %r11d,%r13d
+ pshufd $250,%xmm2,%xmm7
+ addl %edx,%r14d
+ rorl $14,%r13d
+ pslld $14,%xmm5
+ movl %r14d,%edx
+ movl %eax,%r12d
+ pxor %xmm6,%xmm4
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r12d
+ rorl $5,%r13d
+ psrld $11,%xmm6
+ xorl %edx,%r14d
+ pxor %xmm5,%xmm4
+ andl %r11d,%r12d
+ xorl %r11d,%r13d
+ pslld $11,%xmm5
+ addl 52(%rsp),%ecx
+ movl %edx,%edi
+ pxor %xmm6,%xmm4
+ xorl %ebx,%r12d
+ rorl $11,%r14d
+ movdqa %xmm7,%xmm6
+ xorl %r8d,%edi
+ addl %r12d,%ecx
+ pxor %xmm5,%xmm4
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %edx,%r14d
+ psrld $10,%xmm7
+ addl %r13d,%ecx
+ xorl %r8d,%r15d
+ paddd %xmm4,%xmm3
+ rorl $2,%r14d
+ addl %ecx,%r10d
+ psrlq $17,%xmm6
+ addl %r15d,%ecx
+ movl %r10d,%r13d
+ addl %ecx,%r14d
+ pxor %xmm6,%xmm7
+ rorl $14,%r13d
+ movl %r14d,%ecx
+ movl %r11d,%r12d
+ rorl $9,%r14d
+ psrlq $2,%xmm6
+ xorl %r10d,%r13d
+ xorl %eax,%r12d
+ pxor %xmm6,%xmm7
+ rorl $5,%r13d
+ xorl %ecx,%r14d
+ andl %r10d,%r12d
+ pshufd $128,%xmm7,%xmm7
+ xorl %r10d,%r13d
+ addl 56(%rsp),%ebx
+ movl %ecx,%r15d
+ psrldq $8,%xmm7
+ xorl %eax,%r12d
+ rorl $11,%r14d
+ xorl %edx,%r15d
+ addl %r12d,%ebx
+ rorl $6,%r13d
+ paddd %xmm7,%xmm3
+ andl %r15d,%edi
+ xorl %ecx,%r14d
+ addl %r13d,%ebx
+ pshufd $80,%xmm3,%xmm7
+ xorl %edx,%edi
+ rorl $2,%r14d
+ addl %ebx,%r9d
+ movdqa %xmm7,%xmm6
+ addl %edi,%ebx
+ movl %r9d,%r13d
+ psrld $10,%xmm7
+ addl %ebx,%r14d
+ rorl $14,%r13d
+ psrlq $17,%xmm6
+ movl %r14d,%ebx
+ movl %r10d,%r12d
+ pxor %xmm6,%xmm7
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r12d
+ rorl $5,%r13d
+ xorl %ebx,%r14d
+ psrlq $2,%xmm6
+ andl %r9d,%r12d
+ xorl %r9d,%r13d
+ addl 60(%rsp),%eax
+ pxor %xmm6,%xmm7
+ movl %ebx,%edi
+ xorl %r11d,%r12d
+ rorl $11,%r14d
+ pshufd $8,%xmm7,%xmm7
+ xorl %ecx,%edi
+ addl %r12d,%eax
+ movdqa 96(%rbp),%xmm6
+ rorl $6,%r13d
+ andl %edi,%r15d
+ pslldq $8,%xmm7
+ xorl %ebx,%r14d
+ addl %r13d,%eax
+ xorl %ecx,%r15d
+ paddd %xmm7,%xmm3
+ rorl $2,%r14d
+ addl %eax,%r8d
+ addl %r15d,%eax
+ paddd %xmm3,%xmm6
+ movl %r8d,%r13d
+ addl %eax,%r14d
+ movdqa %xmm6,48(%rsp)
+ cmpb $0,131(%rbp)
+ jne L$ssse3_00_47
+ rorl $14,%r13d
+ movl %r14d,%eax
+ movl %r9d,%r12d
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r12d
+ rorl $5,%r13d
+ xorl %eax,%r14d
+ andl %r8d,%r12d
+ xorl %r8d,%r13d
+ addl 0(%rsp),%r11d
+ movl %eax,%r15d
+ xorl %r10d,%r12d
+ rorl $11,%r14d
+ xorl %ebx,%r15d
+ addl %r12d,%r11d
+ rorl $6,%r13d
+ andl %r15d,%edi
+ xorl %eax,%r14d
+ addl %r13d,%r11d
+ xorl %ebx,%edi
+ rorl $2,%r14d
+ addl %r11d,%edx
+ addl %edi,%r11d
+ movl %edx,%r13d
+ addl %r11d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r11d
+ movl %r8d,%r12d
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r12d
+ rorl $5,%r13d
+ xorl %r11d,%r14d
+ andl %edx,%r12d
+ xorl %edx,%r13d
+ addl 4(%rsp),%r10d
+ movl %r11d,%edi
+ xorl %r9d,%r12d
+ rorl $11,%r14d
+ xorl %eax,%edi
+ addl %r12d,%r10d
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %r11d,%r14d
+ addl %r13d,%r10d
+ xorl %eax,%r15d
+ rorl $2,%r14d
+ addl %r10d,%ecx
+ addl %r15d,%r10d
+ movl %ecx,%r13d
+ addl %r10d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r10d
+ movl %edx,%r12d
+ rorl $9,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r12d
+ rorl $5,%r13d
+ xorl %r10d,%r14d
+ andl %ecx,%r12d
+ xorl %ecx,%r13d
+ addl 8(%rsp),%r9d
+ movl %r10d,%r15d
+ xorl %r8d,%r12d
+ rorl $11,%r14d
+ xorl %r11d,%r15d
+ addl %r12d,%r9d
+ rorl $6,%r13d
+ andl %r15d,%edi
+ xorl %r10d,%r14d
+ addl %r13d,%r9d
+ xorl %r11d,%edi
+ rorl $2,%r14d
+ addl %r9d,%ebx
+ addl %edi,%r9d
+ movl %ebx,%r13d
+ addl %r9d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r9d
+ movl %ecx,%r12d
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r12d
+ rorl $5,%r13d
+ xorl %r9d,%r14d
+ andl %ebx,%r12d
+ xorl %ebx,%r13d
+ addl 12(%rsp),%r8d
+ movl %r9d,%edi
+ xorl %edx,%r12d
+ rorl $11,%r14d
+ xorl %r10d,%edi
+ addl %r12d,%r8d
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %r9d,%r14d
+ addl %r13d,%r8d
+ xorl %r10d,%r15d
+ rorl $2,%r14d
+ addl %r8d,%eax
+ addl %r15d,%r8d
+ movl %eax,%r13d
+ addl %r8d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r8d
+ movl %ebx,%r12d
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r12d
+ rorl $5,%r13d
+ xorl %r8d,%r14d
+ andl %eax,%r12d
+ xorl %eax,%r13d
+ addl 16(%rsp),%edx
+ movl %r8d,%r15d
+ xorl %ecx,%r12d
+ rorl $11,%r14d
+ xorl %r9d,%r15d
+ addl %r12d,%edx
+ rorl $6,%r13d
+ andl %r15d,%edi
+ xorl %r8d,%r14d
+ addl %r13d,%edx
+ xorl %r9d,%edi
+ rorl $2,%r14d
+ addl %edx,%r11d
+ addl %edi,%edx
+ movl %r11d,%r13d
+ addl %edx,%r14d
+ rorl $14,%r13d
+ movl %r14d,%edx
+ movl %eax,%r12d
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r12d
+ rorl $5,%r13d
+ xorl %edx,%r14d
+ andl %r11d,%r12d
+ xorl %r11d,%r13d
+ addl 20(%rsp),%ecx
+ movl %edx,%edi
+ xorl %ebx,%r12d
+ rorl $11,%r14d
+ xorl %r8d,%edi
+ addl %r12d,%ecx
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %edx,%r14d
+ addl %r13d,%ecx
+ xorl %r8d,%r15d
+ rorl $2,%r14d
+ addl %ecx,%r10d
+ addl %r15d,%ecx
+ movl %r10d,%r13d
+ addl %ecx,%r14d
+ rorl $14,%r13d
+ movl %r14d,%ecx
+ movl %r11d,%r12d
+ rorl $9,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r12d
+ rorl $5,%r13d
+ xorl %ecx,%r14d
+ andl %r10d,%r12d
+ xorl %r10d,%r13d
+ addl 24(%rsp),%ebx
+ movl %ecx,%r15d
+ xorl %eax,%r12d
+ rorl $11,%r14d
+ xorl %edx,%r15d
+ addl %r12d,%ebx
+ rorl $6,%r13d
+ andl %r15d,%edi
+ xorl %ecx,%r14d
+ addl %r13d,%ebx
+ xorl %edx,%edi
+ rorl $2,%r14d
+ addl %ebx,%r9d
+ addl %edi,%ebx
+ movl %r9d,%r13d
+ addl %ebx,%r14d
+ rorl $14,%r13d
+ movl %r14d,%ebx
+ movl %r10d,%r12d
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r12d
+ rorl $5,%r13d
+ xorl %ebx,%r14d
+ andl %r9d,%r12d
+ xorl %r9d,%r13d
+ addl 28(%rsp),%eax
+ movl %ebx,%edi
+ xorl %r11d,%r12d
+ rorl $11,%r14d
+ xorl %ecx,%edi
+ addl %r12d,%eax
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %ebx,%r14d
+ addl %r13d,%eax
+ xorl %ecx,%r15d
+ rorl $2,%r14d
+ addl %eax,%r8d
+ addl %r15d,%eax
+ movl %r8d,%r13d
+ addl %eax,%r14d
+ rorl $14,%r13d
+ movl %r14d,%eax
+ movl %r9d,%r12d
+ rorl $9,%r14d
+ xorl %r8d,%r13d
+ xorl %r10d,%r12d
+ rorl $5,%r13d
+ xorl %eax,%r14d
+ andl %r8d,%r12d
+ xorl %r8d,%r13d
+ addl 32(%rsp),%r11d
+ movl %eax,%r15d
+ xorl %r10d,%r12d
+ rorl $11,%r14d
+ xorl %ebx,%r15d
+ addl %r12d,%r11d
+ rorl $6,%r13d
+ andl %r15d,%edi
+ xorl %eax,%r14d
+ addl %r13d,%r11d
+ xorl %ebx,%edi
+ rorl $2,%r14d
+ addl %r11d,%edx
+ addl %edi,%r11d
+ movl %edx,%r13d
+ addl %r11d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r11d
+ movl %r8d,%r12d
+ rorl $9,%r14d
+ xorl %edx,%r13d
+ xorl %r9d,%r12d
+ rorl $5,%r13d
+ xorl %r11d,%r14d
+ andl %edx,%r12d
+ xorl %edx,%r13d
+ addl 36(%rsp),%r10d
+ movl %r11d,%edi
+ xorl %r9d,%r12d
+ rorl $11,%r14d
+ xorl %eax,%edi
+ addl %r12d,%r10d
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %r11d,%r14d
+ addl %r13d,%r10d
+ xorl %eax,%r15d
+ rorl $2,%r14d
+ addl %r10d,%ecx
+ addl %r15d,%r10d
+ movl %ecx,%r13d
+ addl %r10d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r10d
+ movl %edx,%r12d
+ rorl $9,%r14d
+ xorl %ecx,%r13d
+ xorl %r8d,%r12d
+ rorl $5,%r13d
+ xorl %r10d,%r14d
+ andl %ecx,%r12d
+ xorl %ecx,%r13d
+ addl 40(%rsp),%r9d
+ movl %r10d,%r15d
+ xorl %r8d,%r12d
+ rorl $11,%r14d
+ xorl %r11d,%r15d
+ addl %r12d,%r9d
+ rorl $6,%r13d
+ andl %r15d,%edi
+ xorl %r10d,%r14d
+ addl %r13d,%r9d
+ xorl %r11d,%edi
+ rorl $2,%r14d
+ addl %r9d,%ebx
+ addl %edi,%r9d
+ movl %ebx,%r13d
+ addl %r9d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r9d
+ movl %ecx,%r12d
+ rorl $9,%r14d
+ xorl %ebx,%r13d
+ xorl %edx,%r12d
+ rorl $5,%r13d
+ xorl %r9d,%r14d
+ andl %ebx,%r12d
+ xorl %ebx,%r13d
+ addl 44(%rsp),%r8d
+ movl %r9d,%edi
+ xorl %edx,%r12d
+ rorl $11,%r14d
+ xorl %r10d,%edi
+ addl %r12d,%r8d
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %r9d,%r14d
+ addl %r13d,%r8d
+ xorl %r10d,%r15d
+ rorl $2,%r14d
+ addl %r8d,%eax
+ addl %r15d,%r8d
+ movl %eax,%r13d
+ addl %r8d,%r14d
+ rorl $14,%r13d
+ movl %r14d,%r8d
+ movl %ebx,%r12d
+ rorl $9,%r14d
+ xorl %eax,%r13d
+ xorl %ecx,%r12d
+ rorl $5,%r13d
+ xorl %r8d,%r14d
+ andl %eax,%r12d
+ xorl %eax,%r13d
+ addl 48(%rsp),%edx
+ movl %r8d,%r15d
+ xorl %ecx,%r12d
+ rorl $11,%r14d
+ xorl %r9d,%r15d
+ addl %r12d,%edx
+ rorl $6,%r13d
+ andl %r15d,%edi
+ xorl %r8d,%r14d
+ addl %r13d,%edx
+ xorl %r9d,%edi
+ rorl $2,%r14d
+ addl %edx,%r11d
+ addl %edi,%edx
+ movl %r11d,%r13d
+ addl %edx,%r14d
+ rorl $14,%r13d
+ movl %r14d,%edx
+ movl %eax,%r12d
+ rorl $9,%r14d
+ xorl %r11d,%r13d
+ xorl %ebx,%r12d
+ rorl $5,%r13d
+ xorl %edx,%r14d
+ andl %r11d,%r12d
+ xorl %r11d,%r13d
+ addl 52(%rsp),%ecx
+ movl %edx,%edi
+ xorl %ebx,%r12d
+ rorl $11,%r14d
+ xorl %r8d,%edi
+ addl %r12d,%ecx
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %edx,%r14d
+ addl %r13d,%ecx
+ xorl %r8d,%r15d
+ rorl $2,%r14d
+ addl %ecx,%r10d
+ addl %r15d,%ecx
+ movl %r10d,%r13d
+ addl %ecx,%r14d
+ rorl $14,%r13d
+ movl %r14d,%ecx
+ movl %r11d,%r12d
+ rorl $9,%r14d
+ xorl %r10d,%r13d
+ xorl %eax,%r12d
+ rorl $5,%r13d
+ xorl %ecx,%r14d
+ andl %r10d,%r12d
+ xorl %r10d,%r13d
+ addl 56(%rsp),%ebx
+ movl %ecx,%r15d
+ xorl %eax,%r12d
+ rorl $11,%r14d
+ xorl %edx,%r15d
+ addl %r12d,%ebx
+ rorl $6,%r13d
+ andl %r15d,%edi
+ xorl %ecx,%r14d
+ addl %r13d,%ebx
+ xorl %edx,%edi
+ rorl $2,%r14d
+ addl %ebx,%r9d
+ addl %edi,%ebx
+ movl %r9d,%r13d
+ addl %ebx,%r14d
+ rorl $14,%r13d
+ movl %r14d,%ebx
+ movl %r10d,%r12d
+ rorl $9,%r14d
+ xorl %r9d,%r13d
+ xorl %r11d,%r12d
+ rorl $5,%r13d
+ xorl %ebx,%r14d
+ andl %r9d,%r12d
+ xorl %r9d,%r13d
+ addl 60(%rsp),%eax
+ movl %ebx,%edi
+ xorl %r11d,%r12d
+ rorl $11,%r14d
+ xorl %ecx,%edi
+ addl %r12d,%eax
+ rorl $6,%r13d
+ andl %edi,%r15d
+ xorl %ebx,%r14d
+ addl %r13d,%eax
+ xorl %ecx,%r15d
+ rorl $2,%r14d
+ addl %eax,%r8d
+ addl %r15d,%eax
+ movl %r8d,%r13d
+ addl %eax,%r14d
+ movq 64+0(%rsp),%rdi
+ movl %r14d,%eax
+
+ addl 0(%rdi),%eax
+ leaq 64(%rsi),%rsi
+ addl 4(%rdi),%ebx
+ addl 8(%rdi),%ecx
+ addl 12(%rdi),%edx
+ addl 16(%rdi),%r8d
+ addl 20(%rdi),%r9d
+ addl 24(%rdi),%r10d
+ addl 28(%rdi),%r11d
+
+ cmpq 64+16(%rsp),%rsi
+
+ movl %eax,0(%rdi)
+ movl %ebx,4(%rdi)
+ movl %ecx,8(%rdi)
+ movl %edx,12(%rdi)
+ movl %r8d,16(%rdi)
+ movl %r9d,20(%rdi)
+ movl %r10d,24(%rdi)
+ movl %r11d,28(%rdi)
+ jb L$loop_ssse3
+
+ movq 64+24(%rsp),%rsi
+ movq (%rsi),%r15
+ movq 8(%rsi),%r14
+ movq 16(%rsi),%r13
+ movq 24(%rsi),%r12
+ movq 32(%rsi),%rbp
+ movq 40(%rsi),%rbx
+ leaq 48(%rsi),%rsp
+L$epilogue_ssse3:
+ .byte 0xf3,0xc3
+
+
+.section .note.GNU-stack,"",%progbits
diff --git a/lib/accelerated/x86/sha-padlock.h b/lib/accelerated/x86/sha-padlock.h
index 05af543075..30d3ccec28 100644
--- a/lib/accelerated/x86/sha-padlock.h
+++ b/lib/accelerated/x86/sha-padlock.h
@@ -30,4 +30,7 @@ extern const struct nettle_hash padlock_sha256;
extern const struct nettle_hash padlock_sha384;
extern const struct nettle_hash padlock_sha512;
+extern const gnutls_crypto_mac_st hmac_sha_padlock_nano_struct;
+extern const gnutls_crypto_digest_st sha_padlock_nano_struct;
+
#endif
diff --git a/lib/accelerated/x86/sha-x86.c b/lib/accelerated/x86/sha-x86.c
new file mode 100644
index 0000000000..054dc6ce73
--- /dev/null
+++ b/lib/accelerated/x86/sha-x86.c
@@ -0,0 +1,365 @@
+/*
+ * Copyright (C) 2011-2012 Free Software Foundation, Inc.
+ *
+ * Author: Nikos Mavrogiannopoulos
+ *
+ * This file is part of GnuTLS.
+ *
+ * The GnuTLS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>
+ *
+ */
+
+#include <gnutls_errors.h>
+#include <gnutls_int.h>
+#include <gnutls/crypto.h>
+#include <gnutls_errors.h>
+#include <aes-x86.h>
+#include <nettle/sha.h>
+#include <nettle/macros.h>
+#include <nettle/nettle-meta.h>
+#include <sha-x86.h>
+#include <x86.h>
+
+void sha1_block_data_order(void *c, const void *p, size_t len);
+void sha256_block_data_order(void *c, const void *p, size_t len);
+void sha512_block_data_order(void *c, const void *p, size_t len);
+
+typedef void (*update_func) (void *, unsigned, const uint8_t *);
+typedef void (*digest_func) (void *, unsigned, uint8_t *);
+typedef void (*set_key_func) (void *, unsigned, const uint8_t *);
+typedef void (*init_func) (void *);
+
+struct x86_hash_ctx {
+ union {
+ struct sha1_ctx sha1;
+ struct sha224_ctx sha224;
+ struct sha256_ctx sha256;
+#ifdef ENABLE_SHA512
+ struct sha384_ctx sha384;
+ struct sha512_ctx sha512;
+#endif
+ } ctx;
+ void *ctx_ptr;
+ gnutls_digest_algorithm_t algo;
+ size_t length;
+ update_func update;
+ digest_func digest;
+ init_func init;
+};
+
+static int
+wrap_x86_hash_update(void *_ctx, const void *text, size_t textsize)
+{
+ struct x86_hash_ctx *ctx = _ctx;
+
+ ctx->update(ctx->ctx_ptr, textsize, text);
+
+ return GNUTLS_E_SUCCESS;
+}
+
+static void wrap_x86_hash_deinit(void *hd)
+{
+ gnutls_free(hd);
+}
+
+void x86_sha1_update(struct sha1_ctx *ctx, size_t length,
+ const uint8_t * data)
+{
+ struct {
+ uint32_t h0, h1, h2, h3, h4;
+ uint32_t Nl, Nh;
+ uint32_t data[16];
+ unsigned int num;
+ } octx;
+ size_t res;
+ unsigned t2, i;
+
+ if ((res = ctx->index)) {
+ res = SHA1_DATA_SIZE - res;
+ if (length < res)
+ res = length;
+ sha1_update(ctx, res, data);
+ data += res;
+ length -= res;
+ }
+
+ octx.h0 = ctx->state[0];
+ octx.h1 = ctx->state[1];
+ octx.h2 = ctx->state[2];
+ octx.h3 = ctx->state[3];
+ octx.h4 = ctx->state[4];
+
+ memcpy(octx.data, ctx->block, SHA1_DATA_SIZE);
+ octx.num = ctx->index;
+
+ res = length % SHA1_DATA_SIZE;
+ length -= res;
+
+ if (length > 0) {
+
+ t2 = length / SHA1_DATA_SIZE;
+
+ sha1_block_data_order(&octx, data, t2);
+
+ for (i=0;i<t2;i++)
+ MD_INCR(ctx);
+ data += length;
+ }
+
+ ctx->state[0] = octx.h0;
+ ctx->state[1] = octx.h1;
+ ctx->state[2] = octx.h2;
+ ctx->state[3] = octx.h3;
+ ctx->state[4] = octx.h4;
+
+ memcpy(ctx->block, octx.data, octx.num);
+ ctx->index = octx.num;
+
+ if (res > 0) {
+ sha1_update(ctx, res, data);
+ }
+
+}
+
+void x86_sha256_update(struct sha256_ctx *ctx, size_t length,
+ const uint8_t * data)
+{
+ struct {
+ uint32_t h[8];
+ uint32_t Nl, Nh;
+ uint32_t data[16];
+ unsigned int num;
+ unsigned md_len;
+ } octx;
+ size_t res;
+ unsigned t2, i;
+
+ if ((res = ctx->index)) {
+ res = SHA256_DATA_SIZE - res;
+ if (length < res)
+ res = length;
+ sha256_update(ctx, res, data);
+ data += res;
+ length -= res;
+ }
+
+ memcpy(octx.h, ctx->state, sizeof(octx.h));
+ memcpy(octx.data, ctx->block, SHA256_DATA_SIZE);
+ octx.num = ctx->index;
+
+ res = length % SHA256_DATA_SIZE;
+ length -= res;
+
+ if (length > 0) {
+ t2 = length / SHA1_DATA_SIZE;
+ sha256_block_data_order(&octx, data, t2);
+
+ for (i=0;i<t2;i++)
+ MD_INCR(ctx);
+ data += length;
+ }
+
+ memcpy(ctx->state, octx.h, sizeof(octx.h));
+
+ memcpy(ctx->block, octx.data, octx.num);
+ ctx->index = octx.num;
+
+ if (res > 0) {
+ sha256_update(ctx, res, data);
+ }
+}
+
+#ifdef ENABLE_SHA512
+void x86_sha512_update(struct sha512_ctx *ctx, size_t length,
+ const uint8_t * data)
+{
+ struct {
+ uint64_t h[8];
+ uint64_t Nl, Nh;
+ union {
+ uint64_t d[16];
+ uint8_t p[16*8];
+ } u;
+ unsigned int num;
+ unsigned md_len;
+ } octx;
+ size_t res;
+ unsigned t2, i;
+
+ if ((res = ctx->index)) {
+ res = SHA512_DATA_SIZE - res;
+ if (length < res)
+ res = length;
+ sha512_update(ctx, res, data);
+ data += res;
+ length -= res;
+ }
+
+ memcpy(octx.h, ctx->state, sizeof(octx.h));
+ memcpy(octx.u.p, ctx->block, SHA512_DATA_SIZE);
+ octx.num = ctx->index;
+
+ res = length % SHA512_DATA_SIZE;
+ length -= res;
+
+ if (length > 0) {
+ t2 = length / SHA512_DATA_SIZE;
+ sha512_block_data_order(&octx, data, t2);
+
+ for (i=0;i<t2;i++)
+ MD_INCR(ctx);
+ data += length;
+ }
+
+ memcpy(ctx->state, octx.h, sizeof(octx.h));
+
+ memcpy(ctx->block, octx.u.p, octx.num);
+ ctx->index = octx.num;
+
+ if (res > 0) {
+ sha512_update(ctx, res, data);
+ }
+}
+#endif
+
+static int _ctx_init(gnutls_digest_algorithm_t algo,
+ struct x86_hash_ctx *ctx)
+{
+ switch (algo) {
+ case GNUTLS_DIG_SHA1:
+ sha1_init(&ctx->ctx.sha1);
+ ctx->update = (update_func) x86_sha1_update;
+ ctx->digest = (digest_func) sha1_digest;
+ ctx->init = (init_func) sha1_init;
+ ctx->ctx_ptr = &ctx->ctx.sha1;
+ ctx->length = SHA1_DIGEST_SIZE;
+ break;
+ case GNUTLS_DIG_SHA224:
+ sha224_init(&ctx->ctx.sha224);
+ ctx->update = (update_func) x86_sha256_update;
+ ctx->digest = (digest_func) sha256_digest;
+ ctx->init = (init_func) sha224_init;
+ ctx->ctx_ptr = &ctx->ctx.sha224;
+ ctx->length = SHA224_DIGEST_SIZE;
+ break;
+ case GNUTLS_DIG_SHA256:
+ sha256_init(&ctx->ctx.sha256);
+ ctx->update = (update_func) x86_sha256_update;
+ ctx->digest = (digest_func) sha256_digest;
+ ctx->init = (init_func) sha256_init;
+ ctx->ctx_ptr = &ctx->ctx.sha256;
+ ctx->length = SHA256_DIGEST_SIZE;
+ break;
+#ifdef ENABLE_SHA512
+ case GNUTLS_DIG_SHA384:
+ sha384_init(&ctx->ctx.sha384);
+ ctx->update = (update_func) x86_sha512_update;
+ ctx->digest = (digest_func) sha512_digest;
+ ctx->init = (init_func) sha384_init;
+ ctx->ctx_ptr = &ctx->ctx.sha384;
+ ctx->length = SHA384_DIGEST_SIZE;
+ break;
+ case GNUTLS_DIG_SHA512:
+ sha512_init(&ctx->ctx.sha512);
+ ctx->update = (update_func) x86_sha512_update;
+ ctx->digest = (digest_func) sha512_digest;
+ ctx->init = (init_func) sha512_init;
+ ctx->ctx_ptr = &ctx->ctx.sha512;
+ ctx->length = SHA512_DIGEST_SIZE;
+ break;
+#endif
+ default:
+ gnutls_assert();
+ return GNUTLS_E_INVALID_REQUEST;
+ }
+
+ return 0;
+}
+
+
+static int wrap_x86_hash_init(gnutls_digest_algorithm_t algo, void **_ctx)
+{
+ struct x86_hash_ctx *ctx;
+ int ret;
+
+ ctx = gnutls_malloc(sizeof(struct x86_hash_ctx));
+ if (ctx == NULL) {
+ gnutls_assert();
+ return GNUTLS_E_MEMORY_ERROR;
+ }
+
+ ctx->algo = algo;
+
+ if ((ret = _ctx_init(algo, ctx)) < 0) {
+ gnutls_assert();
+ return ret;
+ }
+
+ *_ctx = ctx;
+
+ return 0;
+}
+
+static int
+wrap_x86_hash_output(void *src_ctx, void *digest, size_t digestsize)
+{
+ struct x86_hash_ctx *ctx;
+ ctx = src_ctx;
+
+ if (digestsize < ctx->length)
+ return gnutls_assert_val(GNUTLS_E_SHORT_MEMORY_BUFFER);
+
+ ctx->digest(ctx->ctx_ptr, digestsize, digest);
+
+ return 0;
+}
+
+static int wrap_x86_hash_fast(gnutls_digest_algorithm_t algo,
+ const void *text, size_t text_size,
+ void *digest)
+{
+ struct x86_hash_ctx ctx;
+ int ret;
+
+ ret = _ctx_init(algo, &ctx);
+ if (ret < 0)
+ return gnutls_assert_val(ret);
+
+ ctx.update(&ctx, text_size, text);
+ ctx.digest(&ctx, ctx.length, digest);
+
+ return 0;
+}
+
+const struct nettle_hash x86_sha1 =
+NN_HASH(sha1, x86_sha1_update, sha1_digest, SHA1);
+const struct nettle_hash x86_sha224 =
+NN_HASH(sha224, x86_sha256_update, sha224_digest, SHA224);
+const struct nettle_hash x86_sha256 =
+NN_HASH(sha256, x86_sha256_update, sha256_digest, SHA256);
+
+#ifdef ENABLE_SHA512
+const struct nettle_hash x86_sha384 =
+NN_HASH(sha384, x86_sha512_update, sha384_digest, SHA384);
+const struct nettle_hash x86_sha512 =
+NN_HASH(sha512, x86_sha512_update, sha512_digest, SHA512);
+#endif
+
+const gnutls_crypto_digest_st sha_x86_struct = {
+ .init = wrap_x86_hash_init,
+ .hash = wrap_x86_hash_update,
+ .output = wrap_x86_hash_output,
+ .deinit = wrap_x86_hash_deinit,
+ .fast = wrap_x86_hash_fast,
+};
diff --git a/lib/accelerated/x86/sha-x86.h b/lib/accelerated/x86/sha-x86.h
new file mode 100644
index 0000000000..a288962da5
--- /dev/null
+++ b/lib/accelerated/x86/sha-x86.h
@@ -0,0 +1,22 @@
+#ifndef SHA_X86_H
+#define SHA_X86_H
+
+#include <nettle/sha.h>
+
+/* nettle's SHA512 is faster than openssl's */
+#undef ENABLE_SHA512
+
+extern const struct nettle_hash x86_sha1;
+extern const struct nettle_hash x86_sha224;
+extern const struct nettle_hash x86_sha256;
+extern const struct nettle_hash x86_sha384;
+extern const struct nettle_hash x86_sha512;
+
+void x86_sha1_update(struct sha1_ctx *ctx, size_t length, const uint8_t * data);
+void x86_sha256_update(struct sha256_ctx *ctx, size_t length, const uint8_t * data);
+void x86_sha512_update(struct sha512_ctx *ctx, size_t length, const uint8_t * data);
+
+extern const gnutls_crypto_digest_st sha_x86_struct;
+extern const gnutls_crypto_mac_st hmac_sha_x86_struct;
+
+#endif