diff options
author | Nikos Mavrogiannopoulos <nmav@gnutls.org> | 2013-12-14 11:57:02 +0100 |
---|---|---|
committer | Nikos Mavrogiannopoulos <nmav@gnutls.org> | 2013-12-14 12:00:04 +0100 |
commit | cbb9b17ff9f9861f6b6db466186f2fcb766955a2 (patch) | |
tree | 17fd7ed2a1e331aa4dde0a530dbc939365d33308 /lib/accelerated | |
parent | c1416e9865a498fa102987310f30c00dfecf524e (diff) | |
download | gnutls-cbb9b17ff9f9861f6b6db466186f2fcb766955a2.tar.gz |
Added Appro's SSSE3 SHA implementations
Diffstat (limited to 'lib/accelerated')
52 files changed, 43142 insertions, 190 deletions
diff --git a/lib/accelerated/x86/Makefile.am b/lib/accelerated/x86/Makefile.am index 8edcbbb5f0..e231103dad 100644 --- a/lib/accelerated/x86/Makefile.am +++ b/lib/accelerated/x86/Makefile.am @@ -31,41 +31,44 @@ if ENABLE_MINITASN1 AM_CPPFLAGS += -I$(srcdir)/../../minitasn1 endif -EXTRA_DIST = README license.txt +EXTRA_DIST = README license.txt files.mk noinst_LTLIBRARIES = libx86.la -libx86_la_SOURCES = sha-padlock.c hmac-padlock.c aes-x86.c aes-padlock.c aes-gcm-padlock.c aes-padlock.h aes-x86.h x86.h sha-padlock.h +libx86_la_SOURCES = sha-padlock.c hmac-padlock.c aes-x86.c aes-padlock.c aes-gcm-padlock.c \ + aes-padlock.h aes-x86.h x86.h sha-padlock.h sha-x86.c sha-x86.h hmac-x86.c + +include files.mk if ASM_X86_64 AM_CFLAGS += -DASM_X86_64 -DASM_X86 libx86_la_SOURCES += aes-gcm-x86.c if WINDOWS -libx86_la_SOURCES += coff/appro-aes-x86-64-coff.s coff/padlock-x86-64-coff.s coff/cpuid-x86-64-coff.s coff/appro-aes-gcm-x86-64-coff.s +libx86_la_SOURCES += $(X86_64_FILES_COFF) endif if MACOSX -libx86_la_SOURCES += macosx/appro-aes-x86-64-macosx.s macosx/padlock-x86-64-macosx.s macosx/cpuid-x86-64-macosx.s macosx/appro-aes-gcm-x86-64-macosx.s +libx86_la_SOURCES += $(X86_64_FILES_MACOSX) endif if ELF -libx86_la_SOURCES += elf/appro-aes-x86-64.s elf/appro-aes-gcm-x86-64.s elf/padlock-x86-64.s elf/cpuid-x86-64.s +libx86_la_SOURCES += $(X86_64_FILES_ELF) endif else #ASM_X86_64 AM_CFLAGS += -DASM_X86_32 -DASM_X86 if WINDOWS -libx86_la_SOURCES += coff/appro-aes-x86-coff.s coff/padlock-x86-coff.s coff/cpuid-x86-coff.s +libx86_la_SOURCES += $(X86_FILES_ELF) endif if MACOSX -libx86_la_SOURCES += macosx/appro-aes-x86-macosx.s macosx/padlock-x86-macosx.s macosx/cpuid-x86-macosx.s +libx86_la_SOURCES += $(X86_FILES_MACOSX) endif if ELF -libx86_la_SOURCES += elf/appro-aes-x86.s elf/padlock-x86.s elf/cpuid-x86.s +libx86_la_SOURCES += $(X86_FILES_ELF) endif endif #ASM_X86_64 diff --git a/lib/accelerated/x86/aes-padlock.h b/lib/accelerated/x86/aes-padlock.h index cd5d437c8f..851b40b6f9 100644 --- a/lib/accelerated/x86/aes-padlock.h +++ b/lib/accelerated/x86/aes-padlock.h @@ -31,9 +31,6 @@ extern const gnutls_crypto_cipher_st aes_gcm_padlock_struct; extern const gnutls_crypto_mac_st hmac_sha_padlock_struct; extern const gnutls_crypto_digest_st sha_padlock_struct; -extern const gnutls_crypto_mac_st hmac_sha_padlock_nano_struct; -extern const gnutls_crypto_digest_st sha_padlock_nano_struct; - int padlock_aes_cipher_setkey(void *_ctx, const void *userkey, size_t keysize); diff --git a/lib/accelerated/x86/aes-x86.c b/lib/accelerated/x86/aes-x86.c index 59e2b13280..c50836a168 100644 --- a/lib/accelerated/x86/aes-x86.c +++ b/lib/accelerated/x86/aes-x86.c @@ -30,6 +30,7 @@ #include <gnutls/crypto.h> #include <gnutls_errors.h> #include <aes-x86.h> +#include <sha-x86.h> #include <x86.h> struct aes_ctx { @@ -38,6 +39,8 @@ struct aes_ctx { int enc; }; +unsigned int _gnutls_x86_cpuid_s[4]; + static int aes_cipher_init(gnutls_cipher_algorithm_t algorithm, void **_ctx, int enc) { @@ -126,19 +129,18 @@ static const gnutls_crypto_cipher_st cipher_struct = { static unsigned check_optimized_aes(void) { - unsigned int a, b, c, d; - gnutls_cpuid(1, &a, &b, &c, &d); + return (_gnutls_x86_cpuid_s[2] & 0x2000000); +} - return (c & 0x2000000); +static unsigned check_ssse3(void) +{ + return (_gnutls_x86_cpuid_s[2] & 0x0000200); } #ifdef ASM_X86_64 static unsigned check_pclmul(void) { - unsigned int a, b, c, d; - gnutls_cpuid(1, &a, &b, &c, &d); - - return (c & 0x2); + return (_gnutls_x86_cpuid_s[2] & 0x2); } #endif @@ -165,6 +167,88 @@ void register_x86_crypto(void) if (check_intel_or_amd() == 0) return; + gnutls_cpuid(1, &_gnutls_x86_cpuid_s[0], &_gnutls_x86_cpuid_s[1], + &_gnutls_x86_cpuid_s[2], &_gnutls_x86_cpuid_s[3]); + + if (check_ssse3()) { + _gnutls_debug_log("Intel SSSE3 was detected\n"); + + ret = + gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA1, + 80, + &sha_x86_struct); + if (ret < 0) { + gnutls_assert(); + } + + ret = + gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA224, + 80, + &sha_x86_struct); + if (ret < 0) { + gnutls_assert(); + } + + ret = + gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA256, + 80, + &sha_x86_struct); + if (ret < 0) { + gnutls_assert(); + } + + + ret = + gnutls_crypto_single_mac_register(GNUTLS_DIG_SHA1, + 80, + &hmac_sha_x86_struct); + if (ret < 0) + gnutls_assert(); + + ret = + gnutls_crypto_single_mac_register(GNUTLS_DIG_SHA224, + 80, + &hmac_sha_x86_struct); + if (ret < 0) + gnutls_assert(); + + ret = + gnutls_crypto_single_mac_register(GNUTLS_DIG_SHA256, + 80, + &hmac_sha_x86_struct); + if (ret < 0) + gnutls_assert(); + +#ifdef ENABLE_SHA512 + ret = + gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA384, + 80, + &sha_x86_struct); + if (ret < 0) + gnutls_assert(); + + ret = + gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA512, + 80, + &sha_x86_struct); + if (ret < 0) + gnutls_assert(); + ret = + gnutls_crypto_single_mac_register(GNUTLS_DIG_SHA384, + 80, + &hmac_sha_x86_struct); + if (ret < 0) + gnutls_assert(); + + ret = + gnutls_crypto_single_mac_register(GNUTLS_DIG_SHA512, + 80, + &hmac_sha_x86_struct); + if (ret < 0) + gnutls_assert(); +#endif + } + if (check_optimized_aes()) { _gnutls_debug_log("Intel AES accelerator was detected\n"); ret = @@ -211,5 +295,8 @@ void register_x86_crypto(void) #endif } + /* convert _gnutls_x86_cpuid_s the way openssl asm expects it */ + _gnutls_x86_cpuid_s[1] = _gnutls_x86_cpuid_s[2]; + return; } diff --git a/lib/accelerated/x86/coff/appro-aes-x86-coff.s b/lib/accelerated/x86/coff/aesni-x86.s index d00c02f285..1970712d30 100644 --- a/lib/accelerated/x86/coff/appro-aes-x86-coff.s +++ b/lib/accelerated/x86/coff/aesni-x86.s @@ -2162,3 +2162,5 @@ _aesni_set_decrypt_key: .byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 .byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 .byte 115,108,46,111,114,103,62,0 + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/coff/appro-aes-x86-64-coff.s b/lib/accelerated/x86/coff/aesni-x86_64.s index 224a226b0d..85b51085a5 100644 --- a/lib/accelerated/x86/coff/appro-aes-x86-64-coff.s +++ b/lib/accelerated/x86/coff/aesni-x86_64.s @@ -3420,3 +3420,5 @@ cbc_se_handler: .LSEH_info_key: .byte 0x01,0x04,0x01,0x00 .byte 0x04,0x02,0x00,0x00 + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/coff/cpuid-x86-coff.s b/lib/accelerated/x86/coff/cpuid-x86.s index d8074ed62f..f35cfba63a 100644 --- a/lib/accelerated/x86/coff/cpuid-x86-coff.s +++ b/lib/accelerated/x86/coff/cpuid-x86.s @@ -68,3 +68,5 @@ _gnutls_have_cpuid: andl $2097152,%eax ret .byte 67,80,85,73,68,32,102,111,114,32,120,56,54,0 + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/coff/cpuid-x86-64-coff.s b/lib/accelerated/x86/coff/cpuid-x86_64.s index 5662a4b586..033df92ebf 100644 --- a/lib/accelerated/x86/coff/cpuid-x86-64-coff.s +++ b/lib/accelerated/x86/coff/cpuid-x86_64.s @@ -67,3 +67,5 @@ gnutls_cpuid: movq 16(%rsp),%rsi .byte 0xf3,0xc3 .LSEH_end_gnutls_cpuid: + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/coff/padlock-x86-coff.s b/lib/accelerated/x86/coff/e_padlock-x86.s index d969f307b5..d51d62ff73 100644 --- a/lib/accelerated/x86/coff/padlock-x86-coff.s +++ b/lib/accelerated/x86/coff/e_padlock-x86.s @@ -1059,3 +1059,5 @@ _padlock_sha512_blocks: .align 4 .Lpadlock_saved_context: .long 0 + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/coff/padlock-x86-64-coff.s b/lib/accelerated/x86/coff/e_padlock-x86_64.s index a3a0e301e7..14c62fd176 100644 --- a/lib/accelerated/x86/coff/padlock-x86-64-coff.s +++ b/lib/accelerated/x86/coff/e_padlock-x86_64.s @@ -1187,3 +1187,5 @@ padlock_ctr32_encrypt: .p2align 3 .Lpadlock_saved_context: .quad 0 + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/coff/appro-aes-gcm-x86-64-coff.s b/lib/accelerated/x86/coff/ghash-x86_64.s index ceb9108c32..951ee891b9 100644 --- a/lib/accelerated/x86/coff/appro-aes-gcm-x86-64-coff.s +++ b/lib/accelerated/x86/coff/ghash-x86_64.s @@ -1525,3 +1525,5 @@ se_handler: .byte 0x0c,0x78,0x01,0x00 .byte 0x08,0x68,0x00,0x00 .byte 0x04,0x01,0x15,0x00 + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/coff/openssl-cpuid-x86.s b/lib/accelerated/x86/coff/openssl-cpuid-x86.s new file mode 100644 index 0000000000..2e1b08cd9a --- /dev/null +++ b/lib/accelerated/x86/coff/openssl-cpuid-x86.s @@ -0,0 +1,396 @@ +# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org> +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain copyright notices, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# * Neither the name of the Andy Polyakov nor the names of its +# copyright holder and contributors may be used to endorse or +# promote products derived from this software without specific +# prior written permission. +# +# ALTERNATIVELY, provided that this notice is retained in full, this +# product may be distributed under the terms of the GNU General Public +# License (GPL), in which case the provisions of the GPL apply INSTEAD OF +# those given above. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# *** This file is auto-generated *** +# +.file "x86cpuid.s" +.text +.globl _OPENSSL_ia32_cpuid +.def _OPENSSL_ia32_cpuid; .scl 2; .type 32; .endef +.align 16 +_OPENSSL_ia32_cpuid: +.L_OPENSSL_ia32_cpuid_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + xorl %edx,%edx + pushfl + popl %eax + movl %eax,%ecx + xorl $2097152,%eax + pushl %eax + popfl + pushfl + popl %eax + xorl %eax,%ecx + xorl %eax,%eax + btl $21,%ecx + jnc .L000nocpuid + movl 20(%esp),%esi + movl %eax,8(%esi) + .byte 0x0f,0xa2 + movl %eax,%edi + xorl %eax,%eax + cmpl $1970169159,%ebx + setne %al + movl %eax,%ebp + cmpl $1231384169,%edx + setne %al + orl %eax,%ebp + cmpl $1818588270,%ecx + setne %al + orl %eax,%ebp + jz .L001intel + cmpl $1752462657,%ebx + setne %al + movl %eax,%esi + cmpl $1769238117,%edx + setne %al + orl %eax,%esi + cmpl $1145913699,%ecx + setne %al + orl %eax,%esi + jnz .L001intel + movl $2147483648,%eax + .byte 0x0f,0xa2 + cmpl $2147483649,%eax + jb .L001intel + movl %eax,%esi + movl $2147483649,%eax + .byte 0x0f,0xa2 + orl %ecx,%ebp + andl $2049,%ebp + cmpl $2147483656,%esi + jb .L001intel + movl $2147483656,%eax + .byte 0x0f,0xa2 + movzbl %cl,%esi + incl %esi + movl $1,%eax + xorl %ecx,%ecx + .byte 0x0f,0xa2 + btl $28,%edx + jnc .L002generic + shrl $16,%ebx + andl $255,%ebx + cmpl %esi,%ebx + ja .L002generic + andl $4026531839,%edx + jmp .L002generic +.L001intel: + cmpl $7,%edi + jb .L003cacheinfo + movl 20(%esp),%esi + movl $7,%eax + xorl %ecx,%ecx + .byte 0x0f,0xa2 + movl %ebx,8(%esi) +.L003cacheinfo: + cmpl $4,%edi + movl $-1,%edi + jb .L004nocacheinfo + movl $4,%eax + movl $0,%ecx + .byte 0x0f,0xa2 + movl %eax,%edi + shrl $14,%edi + andl $4095,%edi +.L004nocacheinfo: + movl $1,%eax + xorl %ecx,%ecx + .byte 0x0f,0xa2 + andl $3220176895,%edx + cmpl $0,%ebp + jne .L005notintel + orl $1073741824,%edx + andb $15,%ah + cmpb $15,%ah + jne .L005notintel + orl $1048576,%edx +.L005notintel: + btl $28,%edx + jnc .L002generic + andl $4026531839,%edx + cmpl $0,%edi + je .L002generic + orl $268435456,%edx + shrl $16,%ebx + cmpb $1,%bl + ja .L002generic + andl $4026531839,%edx +.L002generic: + andl $2048,%ebp + andl $4294965247,%ecx + movl %edx,%esi + orl %ecx,%ebp + btl $27,%ecx + jnc .L006clear_avx + xorl %ecx,%ecx +.byte 15,1,208 + andl $6,%eax + cmpl $6,%eax + je .L007done + cmpl $2,%eax + je .L006clear_avx +.L008clear_xmm: + andl $4261412861,%ebp + andl $4278190079,%esi +.L006clear_avx: + andl $4026525695,%ebp + movl 20(%esp),%edi + andl $4294967263,8(%edi) +.L007done: + movl %esi,%eax + movl %ebp,%edx +.L000nocpuid: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _OPENSSL_rdtsc +.def _OPENSSL_rdtsc; .scl 2; .type 32; .endef +.align 16 +_OPENSSL_rdtsc: +.L_OPENSSL_rdtsc_begin: + xorl %eax,%eax + xorl %edx,%edx + leal __gnutls_x86_cpuid_s,%ecx + btl $4,(%ecx) + jnc .L009notsc + .byte 0x0f,0x31 +.L009notsc: + ret +.globl _OPENSSL_instrument_halt +.def _OPENSSL_instrument_halt; .scl 2; .type 32; .endef +.align 16 +_OPENSSL_instrument_halt: +.L_OPENSSL_instrument_halt_begin: + leal __gnutls_x86_cpuid_s,%ecx + btl $4,(%ecx) + jnc .L010nohalt +.long 2421723150 + andl $3,%eax + jnz .L010nohalt + pushfl + popl %eax + btl $9,%eax + jnc .L010nohalt + .byte 0x0f,0x31 + pushl %edx + pushl %eax + hlt + .byte 0x0f,0x31 + subl (%esp),%eax + sbbl 4(%esp),%edx + addl $8,%esp + ret +.L010nohalt: + xorl %eax,%eax + xorl %edx,%edx + ret +.globl _OPENSSL_far_spin +.def _OPENSSL_far_spin; .scl 2; .type 32; .endef +.align 16 +_OPENSSL_far_spin: +.L_OPENSSL_far_spin_begin: + pushfl + popl %eax + btl $9,%eax + jnc .L011nospin + movl 4(%esp),%eax + movl 8(%esp),%ecx +.long 2430111262 + xorl %eax,%eax + movl (%ecx),%edx + jmp .L012spin +.align 16 +.L012spin: + incl %eax + cmpl (%ecx),%edx + je .L012spin +.long 529567888 + ret +.L011nospin: + xorl %eax,%eax + xorl %edx,%edx + ret +.globl _OPENSSL_wipe_cpu +.def _OPENSSL_wipe_cpu; .scl 2; .type 32; .endef +.align 16 +_OPENSSL_wipe_cpu: +.L_OPENSSL_wipe_cpu_begin: + xorl %eax,%eax + xorl %edx,%edx + leal __gnutls_x86_cpuid_s,%ecx + movl (%ecx),%ecx + btl $1,(%ecx) + jnc .L013no_x87 +.long 4007259865,4007259865,4007259865,4007259865,2430851995 +.L013no_x87: + leal 4(%esp),%eax + ret +.globl _OPENSSL_atomic_add +.def _OPENSSL_atomic_add; .scl 2; .type 32; .endef +.align 16 +_OPENSSL_atomic_add: +.L_OPENSSL_atomic_add_begin: + movl 4(%esp),%edx + movl 8(%esp),%ecx + pushl %ebx + nop + movl (%edx),%eax +.L014spin: + leal (%eax,%ecx,1),%ebx + nop +.long 447811568 + jne .L014spin + movl %ebx,%eax + popl %ebx + ret +.globl _OPENSSL_indirect_call +.def _OPENSSL_indirect_call; .scl 2; .type 32; .endef +.align 16 +_OPENSSL_indirect_call: +.L_OPENSSL_indirect_call_begin: + pushl %ebp + movl %esp,%ebp + subl $28,%esp + movl 12(%ebp),%ecx + movl %ecx,(%esp) + movl 16(%ebp),%edx + movl %edx,4(%esp) + movl 20(%ebp),%eax + movl %eax,8(%esp) + movl 24(%ebp),%eax + movl %eax,12(%esp) + movl 28(%ebp),%eax + movl %eax,16(%esp) + movl 32(%ebp),%eax + movl %eax,20(%esp) + movl 36(%ebp),%eax + movl %eax,24(%esp) + call *8(%ebp) + movl %ebp,%esp + popl %ebp + ret +.globl _OPENSSL_cleanse +.def _OPENSSL_cleanse; .scl 2; .type 32; .endef +.align 16 +_OPENSSL_cleanse: +.L_OPENSSL_cleanse_begin: + movl 4(%esp),%edx + movl 8(%esp),%ecx + xorl %eax,%eax + cmpl $7,%ecx + jae .L015lot + cmpl $0,%ecx + je .L016ret +.L017little: + movb %al,(%edx) + subl $1,%ecx + leal 1(%edx),%edx + jnz .L017little +.L016ret: + ret +.align 16 +.L015lot: + testl $3,%edx + jz .L018aligned + movb %al,(%edx) + leal -1(%ecx),%ecx + leal 1(%edx),%edx + jmp .L015lot +.L018aligned: + movl %eax,(%edx) + leal -4(%ecx),%ecx + testl $-4,%ecx + leal 4(%edx),%edx + jnz .L018aligned + cmpl $0,%ecx + jne .L017little + ret +.globl _OPENSSL_instrument_bus +.def _OPENSSL_instrument_bus; .scl 2; .type 32; .endef +.align 16 +_OPENSSL_instrument_bus: +.L_OPENSSL_instrument_bus_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl $0,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _OPENSSL_instrument_bus2 +.def _OPENSSL_instrument_bus2; .scl 2; .type 32; .endef +.align 16 +_OPENSSL_instrument_bus2: +.L_OPENSSL_instrument_bus2_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl $0,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _OPENSSL_ia32_rdrand +.def _OPENSSL_ia32_rdrand; .scl 2; .type 32; .endef +.align 16 +_OPENSSL_ia32_rdrand: +.L_OPENSSL_ia32_rdrand_begin: + movl $8,%ecx +.L019loop: +.byte 15,199,240 + jc .L020break + loop .L019loop +.L020break: + cmpl $0,%eax + cmovel %ecx,%eax + ret +.comm __gnutls_x86_cpuid_s,16 +.section .ctors +.long _OPENSSL_cpuid_setup + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/coff/openssl-cpuid-x86_64.s b/lib/accelerated/x86/coff/openssl-cpuid-x86_64.s new file mode 100644 index 0000000000..d1439e8c13 --- /dev/null +++ b/lib/accelerated/x86/coff/openssl-cpuid-x86_64.s @@ -0,0 +1,361 @@ +# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org> +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain copyright notices, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# * Neither the name of the Andy Polyakov nor the names of its +# copyright holder and contributors may be used to endorse or +# promote products derived from this software without specific +# prior written permission. +# +# ALTERNATIVELY, provided that this notice is retained in full, this +# product may be distributed under the terms of the GNU General Public +# License (GPL), in which case the provisions of the GPL apply INSTEAD OF +# those given above. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# *** This file is auto-generated *** +# + + +.section .ctors + .p2align 3 + .quad OPENSSL_cpuid_setup + + +.comm _gnutls_x86_cpuid_s,16,4 + +.text + +.globl OPENSSL_atomic_add +.def OPENSSL_atomic_add; .scl 2; .type 32; .endef +.p2align 4 +OPENSSL_atomic_add: + movl (%rcx),%eax +.Lspin: leaq (%rdx,%rax,1),%r8 +.byte 0xf0 + cmpxchgl %r8d,(%rcx) + jne .Lspin + movl %r8d,%eax +.byte 0x48,0x98 + .byte 0xf3,0xc3 + + +.globl OPENSSL_rdtsc +.def OPENSSL_rdtsc; .scl 2; .type 32; .endef +.p2align 4 +OPENSSL_rdtsc: + rdtsc + shlq $32,%rdx + orq %rdx,%rax + .byte 0xf3,0xc3 + + +.globl OPENSSL_ia32_cpuid +.def OPENSSL_ia32_cpuid; .scl 2; .type 32; .endef +.p2align 4 +OPENSSL_ia32_cpuid: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_OPENSSL_ia32_cpuid: + movq %rcx,%rdi + + movq %rbx,%r8 + + xorl %eax,%eax + movl %eax,8(%rdi) + cpuid + movl %eax,%r11d + + xorl %eax,%eax + cmpl $1970169159,%ebx + setne %al + movl %eax,%r9d + cmpl $1231384169,%edx + setne %al + orl %eax,%r9d + cmpl $1818588270,%ecx + setne %al + orl %eax,%r9d + jz .Lintel + + cmpl $1752462657,%ebx + setne %al + movl %eax,%r10d + cmpl $1769238117,%edx + setne %al + orl %eax,%r10d + cmpl $1145913699,%ecx + setne %al + orl %eax,%r10d + jnz .Lintel + + + movl $2147483648,%eax + cpuid + cmpl $2147483649,%eax + jb .Lintel + movl %eax,%r10d + movl $2147483649,%eax + cpuid + orl %ecx,%r9d + andl $2049,%r9d + + cmpl $2147483656,%r10d + jb .Lintel + + movl $2147483656,%eax + cpuid + movzbq %cl,%r10 + incq %r10 + + movl $1,%eax + cpuid + btl $28,%edx + jnc .Lgeneric + shrl $16,%ebx + cmpb %r10b,%bl + ja .Lgeneric + andl $4026531839,%edx + jmp .Lgeneric + +.Lintel: + cmpl $4,%r11d + movl $-1,%r10d + jb .Lnocacheinfo + + movl $4,%eax + movl $0,%ecx + cpuid + movl %eax,%r10d + shrl $14,%r10d + andl $4095,%r10d + + cmpl $7,%r11d + jb .Lnocacheinfo + + movl $7,%eax + xorl %ecx,%ecx + cpuid + movl %ebx,8(%rdi) + +.Lnocacheinfo: + movl $1,%eax + cpuid + andl $3220176895,%edx + cmpl $0,%r9d + jne .Lnotintel + orl $1073741824,%edx + andb $15,%ah + cmpb $15,%ah + jne .Lnotintel + orl $1048576,%edx +.Lnotintel: + btl $28,%edx + jnc .Lgeneric + andl $4026531839,%edx + cmpl $0,%r10d + je .Lgeneric + + orl $268435456,%edx + shrl $16,%ebx + cmpb $1,%bl + ja .Lgeneric + andl $4026531839,%edx +.Lgeneric: + andl $2048,%r9d + andl $4294965247,%ecx + orl %ecx,%r9d + + movl %edx,%r10d + btl $27,%r9d + jnc .Lclear_avx + xorl %ecx,%ecx +.byte 0x0f,0x01,0xd0 + andl $6,%eax + cmpl $6,%eax + je .Ldone +.Lclear_avx: + movl $4026525695,%eax + andl %eax,%r9d + andl $4294967263,8(%rdi) +.Ldone: + shlq $32,%r9 + movl %r10d,%eax + movq %r8,%rbx + orq %r9,%rax + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + .byte 0xf3,0xc3 +.LSEH_end_OPENSSL_ia32_cpuid: + +.globl OPENSSL_cleanse +.def OPENSSL_cleanse; .scl 2; .type 32; .endef +.p2align 4 +OPENSSL_cleanse: + xorq %rax,%rax + cmpq $15,%rdx + jae .Lot + cmpq $0,%rdx + je .Lret +.Little: + movb %al,(%rcx) + subq $1,%rdx + leaq 1(%rcx),%rcx + jnz .Little +.Lret: + .byte 0xf3,0xc3 +.p2align 4 +.Lot: + testq $7,%rcx + jz .Laligned + movb %al,(%rcx) + leaq -1(%rdx),%rdx + leaq 1(%rcx),%rcx + jmp .Lot +.Laligned: + movq %rax,(%rcx) + leaq -8(%rdx),%rdx + testq $-8,%rdx + leaq 8(%rcx),%rcx + jnz .Laligned + cmpq $0,%rdx + jne .Little + .byte 0xf3,0xc3 + +.globl OPENSSL_wipe_cpu +.def OPENSSL_wipe_cpu; .scl 2; .type 32; .endef +.p2align 4 +OPENSSL_wipe_cpu: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + xorq %rcx,%rcx + xorq %rdx,%rdx + xorq %r8,%r8 + xorq %r9,%r9 + xorq %r10,%r10 + xorq %r11,%r11 + leaq 8(%rsp),%rax + .byte 0xf3,0xc3 + +.globl OPENSSL_instrument_bus +.def OPENSSL_instrument_bus; .scl 2; .type 32; .endef +.p2align 4 +OPENSSL_instrument_bus: + movq %rcx,%r10 + movq %rdx,%rcx + movq %rdx,%r11 + + rdtsc + movl %eax,%r8d + movl $0,%r9d + clflush (%r10) +.byte 0xf0 + addl %r9d,(%r10) + jmp .Loop +.p2align 4 +.Loop: rdtsc + movl %eax,%edx + subl %r8d,%eax + movl %edx,%r8d + movl %eax,%r9d + clflush (%r10) +.byte 0xf0 + addl %eax,(%r10) + leaq 4(%r10),%r10 + subq $1,%rcx + jnz .Loop + + movq %r11,%rax + .byte 0xf3,0xc3 + + +.globl OPENSSL_instrument_bus2 +.def OPENSSL_instrument_bus2; .scl 2; .type 32; .endef +.p2align 4 +OPENSSL_instrument_bus2: + movq %rcx,%r10 + movq %rdx,%rcx + movq %r8,%r11 + movq %rcx,8(%rsp) + + rdtsc + movl %eax,%r8d + movl $0,%r9d + + clflush (%r10) +.byte 0xf0 + addl %r9d,(%r10) + + rdtsc + movl %eax,%edx + subl %r8d,%eax + movl %edx,%r8d + movl %eax,%r9d +.Loop2: + clflush (%r10) +.byte 0xf0 + addl %eax,(%r10) + + subq $1,%r11 + jz .Ldone2 + + rdtsc + movl %eax,%edx + subl %r8d,%eax + movl %edx,%r8d + cmpl %r9d,%eax + movl %eax,%r9d + movl $0,%edx + setne %dl + subq %rdx,%rcx + leaq (%r10,%rdx,4),%r10 + jnz .Loop2 + +.Ldone2: + movq 8(%rsp),%rax + subq %rcx,%rax + .byte 0xf3,0xc3 + +.globl OPENSSL_ia32_rdrand +.def OPENSSL_ia32_rdrand; .scl 2; .type 32; .endef +.p2align 4 +OPENSSL_ia32_rdrand: + movl $8,%ecx +.Loop_rdrand: +.byte 72,15,199,240 + jc .Lbreak_rdrand + loop .Loop_rdrand +.Lbreak_rdrand: + cmpq $0,%rax + cmoveq %rcx,%rax + .byte 0xf3,0xc3 + + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/coff/sha1-ssse3-x86.s b/lib/accelerated/x86/coff/sha1-ssse3-x86.s new file mode 100644 index 0000000000..9bd41a0de4 --- /dev/null +++ b/lib/accelerated/x86/coff/sha1-ssse3-x86.s @@ -0,0 +1,1420 @@ +# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org> +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain copyright notices, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# * Neither the name of the Andy Polyakov nor the names of its +# copyright holder and contributors may be used to endorse or +# promote products derived from this software without specific +# prior written permission. +# +# ALTERNATIVELY, provided that this notice is retained in full, this +# product may be distributed under the terms of the GNU General Public +# License (GPL), in which case the provisions of the GPL apply INSTEAD OF +# those given above. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# *** This file is auto-generated *** +# +.file "sha1-586.s" +.text +.globl _sha1_block_data_order +.def _sha1_block_data_order; .scl 2; .type 32; .endef +.align 16 +_sha1_block_data_order: +.L_sha1_block_data_order_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%ebp + movl 24(%esp),%esi + movl 28(%esp),%eax + subl $76,%esp + shll $6,%eax + addl %esi,%eax + movl %eax,104(%esp) + movl 16(%ebp),%edi + jmp .L000loop +.align 16 +.L000loop: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,(%esp) + movl %ebx,4(%esp) + movl %ecx,8(%esp) + movl %edx,12(%esp) + movl 16(%esi),%eax + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,16(%esp) + movl %ebx,20(%esp) + movl %ecx,24(%esp) + movl %edx,28(%esp) + movl 32(%esi),%eax + movl 36(%esi),%ebx + movl 40(%esi),%ecx + movl 44(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,40(%esp) + movl %edx,44(%esp) + movl 48(%esi),%eax + movl 52(%esi),%ebx + movl 56(%esi),%ecx + movl 60(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,48(%esp) + movl %ebx,52(%esp) + movl %ecx,56(%esp) + movl %edx,60(%esp) + movl %esi,100(%esp) + movl (%ebp),%eax + movl 4(%ebp),%ebx + movl 8(%ebp),%ecx + movl 12(%ebp),%edx + # 00_15 0 + movl %ecx,%esi + movl %eax,%ebp + roll $5,%ebp + xorl %edx,%esi + addl %edi,%ebp + movl (%esp),%edi + andl %ebx,%esi + rorl $2,%ebx + xorl %edx,%esi + leal 1518500249(%ebp,%edi,1),%ebp + addl %esi,%ebp + # 00_15 1 + movl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ecx,%edi + addl %edx,%ebp + movl 4(%esp),%edx + andl %eax,%edi + rorl $2,%eax + xorl %ecx,%edi + leal 1518500249(%ebp,%edx,1),%ebp + addl %edi,%ebp + # 00_15 2 + movl %eax,%edx + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%edx + addl %ecx,%ebp + movl 8(%esp),%ecx + andl %esi,%edx + rorl $2,%esi + xorl %ebx,%edx + leal 1518500249(%ebp,%ecx,1),%ebp + addl %edx,%ebp + # 00_15 3 + movl %esi,%ecx + movl %ebp,%edx + roll $5,%ebp + xorl %eax,%ecx + addl %ebx,%ebp + movl 12(%esp),%ebx + andl %edi,%ecx + rorl $2,%edi + xorl %eax,%ecx + leal 1518500249(%ebp,%ebx,1),%ebp + addl %ecx,%ebp + # 00_15 4 + movl %edi,%ebx + movl %ebp,%ecx + roll $5,%ebp + xorl %esi,%ebx + addl %eax,%ebp + movl 16(%esp),%eax + andl %edx,%ebx + rorl $2,%edx + xorl %esi,%ebx + leal 1518500249(%ebp,%eax,1),%ebp + addl %ebx,%ebp + # 00_15 5 + movl %edx,%eax + movl %ebp,%ebx + roll $5,%ebp + xorl %edi,%eax + addl %esi,%ebp + movl 20(%esp),%esi + andl %ecx,%eax + rorl $2,%ecx + xorl %edi,%eax + leal 1518500249(%ebp,%esi,1),%ebp + addl %eax,%ebp + # 00_15 6 + movl %ecx,%esi + movl %ebp,%eax + roll $5,%ebp + xorl %edx,%esi + addl %edi,%ebp + movl 24(%esp),%edi + andl %ebx,%esi + rorl $2,%ebx + xorl %edx,%esi + leal 1518500249(%ebp,%edi,1),%ebp + addl %esi,%ebp + # 00_15 7 + movl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ecx,%edi + addl %edx,%ebp + movl 28(%esp),%edx + andl %eax,%edi + rorl $2,%eax + xorl %ecx,%edi + leal 1518500249(%ebp,%edx,1),%ebp + addl %edi,%ebp + # 00_15 8 + movl %eax,%edx + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%edx + addl %ecx,%ebp + movl 32(%esp),%ecx + andl %esi,%edx + rorl $2,%esi + xorl %ebx,%edx + leal 1518500249(%ebp,%ecx,1),%ebp + addl %edx,%ebp + # 00_15 9 + movl %esi,%ecx + movl %ebp,%edx + roll $5,%ebp + xorl %eax,%ecx + addl %ebx,%ebp + movl 36(%esp),%ebx + andl %edi,%ecx + rorl $2,%edi + xorl %eax,%ecx + leal 1518500249(%ebp,%ebx,1),%ebp + addl %ecx,%ebp + # 00_15 10 + movl %edi,%ebx + movl %ebp,%ecx + roll $5,%ebp + xorl %esi,%ebx + addl %eax,%ebp + movl 40(%esp),%eax + andl %edx,%ebx + rorl $2,%edx + xorl %esi,%ebx + leal 1518500249(%ebp,%eax,1),%ebp + addl %ebx,%ebp + # 00_15 11 + movl %edx,%eax + movl %ebp,%ebx + roll $5,%ebp + xorl %edi,%eax + addl %esi,%ebp + movl 44(%esp),%esi + andl %ecx,%eax + rorl $2,%ecx + xorl %edi,%eax + leal 1518500249(%ebp,%esi,1),%ebp + addl %eax,%ebp + # 00_15 12 + movl %ecx,%esi + movl %ebp,%eax + roll $5,%ebp + xorl %edx,%esi + addl %edi,%ebp + movl 48(%esp),%edi + andl %ebx,%esi + rorl $2,%ebx + xorl %edx,%esi + leal 1518500249(%ebp,%edi,1),%ebp + addl %esi,%ebp + # 00_15 13 + movl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ecx,%edi + addl %edx,%ebp + movl 52(%esp),%edx + andl %eax,%edi + rorl $2,%eax + xorl %ecx,%edi + leal 1518500249(%ebp,%edx,1),%ebp + addl %edi,%ebp + # 00_15 14 + movl %eax,%edx + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%edx + addl %ecx,%ebp + movl 56(%esp),%ecx + andl %esi,%edx + rorl $2,%esi + xorl %ebx,%edx + leal 1518500249(%ebp,%ecx,1),%ebp + addl %edx,%ebp + # 00_15 15 + movl %esi,%ecx + movl %ebp,%edx + roll $5,%ebp + xorl %eax,%ecx + addl %ebx,%ebp + movl 60(%esp),%ebx + andl %edi,%ecx + rorl $2,%edi + xorl %eax,%ecx + leal 1518500249(%ebp,%ebx,1),%ebp + movl (%esp),%ebx + addl %ebp,%ecx + # 16_19 16 + movl %edi,%ebp + xorl 8(%esp),%ebx + xorl %esi,%ebp + xorl 32(%esp),%ebx + andl %edx,%ebp + xorl 52(%esp),%ebx + roll $1,%ebx + xorl %esi,%ebp + addl %ebp,%eax + movl %ecx,%ebp + rorl $2,%edx + movl %ebx,(%esp) + roll $5,%ebp + leal 1518500249(%ebx,%eax,1),%ebx + movl 4(%esp),%eax + addl %ebp,%ebx + # 16_19 17 + movl %edx,%ebp + xorl 12(%esp),%eax + xorl %edi,%ebp + xorl 36(%esp),%eax + andl %ecx,%ebp + xorl 56(%esp),%eax + roll $1,%eax + xorl %edi,%ebp + addl %ebp,%esi + movl %ebx,%ebp + rorl $2,%ecx + movl %eax,4(%esp) + roll $5,%ebp + leal 1518500249(%eax,%esi,1),%eax + movl 8(%esp),%esi + addl %ebp,%eax + # 16_19 18 + movl %ecx,%ebp + xorl 16(%esp),%esi + xorl %edx,%ebp + xorl 40(%esp),%esi + andl %ebx,%ebp + xorl 60(%esp),%esi + roll $1,%esi + xorl %edx,%ebp + addl %ebp,%edi + movl %eax,%ebp + rorl $2,%ebx + movl %esi,8(%esp) + roll $5,%ebp + leal 1518500249(%esi,%edi,1),%esi + movl 12(%esp),%edi + addl %ebp,%esi + # 16_19 19 + movl %ebx,%ebp + xorl 20(%esp),%edi + xorl %ecx,%ebp + xorl 44(%esp),%edi + andl %eax,%ebp + xorl (%esp),%edi + roll $1,%edi + xorl %ecx,%ebp + addl %ebp,%edx + movl %esi,%ebp + rorl $2,%eax + movl %edi,12(%esp) + roll $5,%ebp + leal 1518500249(%edi,%edx,1),%edi + movl 16(%esp),%edx + addl %ebp,%edi + # 20_39 20 + movl %esi,%ebp + xorl 24(%esp),%edx + xorl %eax,%ebp + xorl 48(%esp),%edx + xorl %ebx,%ebp + xorl 4(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,16(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 20(%esp),%ecx + addl %ebp,%edx + # 20_39 21 + movl %edi,%ebp + xorl 28(%esp),%ecx + xorl %esi,%ebp + xorl 52(%esp),%ecx + xorl %eax,%ebp + xorl 8(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,20(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 24(%esp),%ebx + addl %ebp,%ecx + # 20_39 22 + movl %edx,%ebp + xorl 32(%esp),%ebx + xorl %edi,%ebp + xorl 56(%esp),%ebx + xorl %esi,%ebp + xorl 12(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,24(%esp) + leal 1859775393(%ebx,%eax,1),%ebx + movl 28(%esp),%eax + addl %ebp,%ebx + # 20_39 23 + movl %ecx,%ebp + xorl 36(%esp),%eax + xorl %edx,%ebp + xorl 60(%esp),%eax + xorl %edi,%ebp + xorl 16(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,28(%esp) + leal 1859775393(%eax,%esi,1),%eax + movl 32(%esp),%esi + addl %ebp,%eax + # 20_39 24 + movl %ebx,%ebp + xorl 40(%esp),%esi + xorl %ecx,%ebp + xorl (%esp),%esi + xorl %edx,%ebp + xorl 20(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,32(%esp) + leal 1859775393(%esi,%edi,1),%esi + movl 36(%esp),%edi + addl %ebp,%esi + # 20_39 25 + movl %eax,%ebp + xorl 44(%esp),%edi + xorl %ebx,%ebp + xorl 4(%esp),%edi + xorl %ecx,%ebp + xorl 24(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,36(%esp) + leal 1859775393(%edi,%edx,1),%edi + movl 40(%esp),%edx + addl %ebp,%edi + # 20_39 26 + movl %esi,%ebp + xorl 48(%esp),%edx + xorl %eax,%ebp + xorl 8(%esp),%edx + xorl %ebx,%ebp + xorl 28(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,40(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 44(%esp),%ecx + addl %ebp,%edx + # 20_39 27 + movl %edi,%ebp + xorl 52(%esp),%ecx + xorl %esi,%ebp + xorl 12(%esp),%ecx + xorl %eax,%ebp + xorl 32(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,44(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 48(%esp),%ebx + addl %ebp,%ecx + # 20_39 28 + movl %edx,%ebp + xorl 56(%esp),%ebx + xorl %edi,%ebp + xorl 16(%esp),%ebx + xorl %esi,%ebp + xorl 36(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,48(%esp) + leal 1859775393(%ebx,%eax,1),%ebx + movl 52(%esp),%eax + addl %ebp,%ebx + # 20_39 29 + movl %ecx,%ebp + xorl 60(%esp),%eax + xorl %edx,%ebp + xorl 20(%esp),%eax + xorl %edi,%ebp + xorl 40(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,52(%esp) + leal 1859775393(%eax,%esi,1),%eax + movl 56(%esp),%esi + addl %ebp,%eax + # 20_39 30 + movl %ebx,%ebp + xorl (%esp),%esi + xorl %ecx,%ebp + xorl 24(%esp),%esi + xorl %edx,%ebp + xorl 44(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,56(%esp) + leal 1859775393(%esi,%edi,1),%esi + movl 60(%esp),%edi + addl %ebp,%esi + # 20_39 31 + movl %eax,%ebp + xorl 4(%esp),%edi + xorl %ebx,%ebp + xorl 28(%esp),%edi + xorl %ecx,%ebp + xorl 48(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,60(%esp) + leal 1859775393(%edi,%edx,1),%edi + movl (%esp),%edx + addl %ebp,%edi + # 20_39 32 + movl %esi,%ebp + xorl 8(%esp),%edx + xorl %eax,%ebp + xorl 32(%esp),%edx + xorl %ebx,%ebp + xorl 52(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 4(%esp),%ecx + addl %ebp,%edx + # 20_39 33 + movl %edi,%ebp + xorl 12(%esp),%ecx + xorl %esi,%ebp + xorl 36(%esp),%ecx + xorl %eax,%ebp + xorl 56(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,4(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 8(%esp),%ebx + addl %ebp,%ecx + # 20_39 34 + movl %edx,%ebp + xorl 16(%esp),%ebx + xorl %edi,%ebp + xorl 40(%esp),%ebx + xorl %esi,%ebp + xorl 60(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,8(%esp) + leal 1859775393(%ebx,%eax,1),%ebx + movl 12(%esp),%eax + addl %ebp,%ebx + # 20_39 35 + movl %ecx,%ebp + xorl 20(%esp),%eax + xorl %edx,%ebp + xorl 44(%esp),%eax + xorl %edi,%ebp + xorl (%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,12(%esp) + leal 1859775393(%eax,%esi,1),%eax + movl 16(%esp),%esi + addl %ebp,%eax + # 20_39 36 + movl %ebx,%ebp + xorl 24(%esp),%esi + xorl %ecx,%ebp + xorl 48(%esp),%esi + xorl %edx,%ebp + xorl 4(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,16(%esp) + leal 1859775393(%esi,%edi,1),%esi + movl 20(%esp),%edi + addl %ebp,%esi + # 20_39 37 + movl %eax,%ebp + xorl 28(%esp),%edi + xorl %ebx,%ebp + xorl 52(%esp),%edi + xorl %ecx,%ebp + xorl 8(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,20(%esp) + leal 1859775393(%edi,%edx,1),%edi + movl 24(%esp),%edx + addl %ebp,%edi + # 20_39 38 + movl %esi,%ebp + xorl 32(%esp),%edx + xorl %eax,%ebp + xorl 56(%esp),%edx + xorl %ebx,%ebp + xorl 12(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,24(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 28(%esp),%ecx + addl %ebp,%edx + # 20_39 39 + movl %edi,%ebp + xorl 36(%esp),%ecx + xorl %esi,%ebp + xorl 60(%esp),%ecx + xorl %eax,%ebp + xorl 16(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,28(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 32(%esp),%ebx + addl %ebp,%ecx + # 40_59 40 + movl %edi,%ebp + xorl 40(%esp),%ebx + xorl %esi,%ebp + xorl (%esp),%ebx + andl %edx,%ebp + xorl 20(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,32(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 36(%esp),%eax + addl %ebp,%ebx + # 40_59 41 + movl %edx,%ebp + xorl 44(%esp),%eax + xorl %edi,%ebp + xorl 4(%esp),%eax + andl %ecx,%ebp + xorl 24(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,36(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl 40(%esp),%esi + addl %ebp,%eax + # 40_59 42 + movl %ecx,%ebp + xorl 48(%esp),%esi + xorl %edx,%ebp + xorl 8(%esp),%esi + andl %ebx,%ebp + xorl 28(%esp),%esi + roll $1,%esi + addl %edi,%ebp + rorl $2,%ebx + movl %eax,%edi + roll $5,%edi + movl %esi,40(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp + addl %edi,%esi + andl %edx,%ebp + movl 44(%esp),%edi + addl %ebp,%esi + # 40_59 43 + movl %ebx,%ebp + xorl 52(%esp),%edi + xorl %ecx,%ebp + xorl 12(%esp),%edi + andl %eax,%ebp + xorl 32(%esp),%edi + roll $1,%edi + addl %edx,%ebp + rorl $2,%eax + movl %esi,%edx + roll $5,%edx + movl %edi,44(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp + addl %edx,%edi + andl %ecx,%ebp + movl 48(%esp),%edx + addl %ebp,%edi + # 40_59 44 + movl %eax,%ebp + xorl 56(%esp),%edx + xorl %ebx,%ebp + xorl 16(%esp),%edx + andl %esi,%ebp + xorl 36(%esp),%edx + roll $1,%edx + addl %ecx,%ebp + rorl $2,%esi + movl %edi,%ecx + roll $5,%ecx + movl %edx,48(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp + addl %ecx,%edx + andl %ebx,%ebp + movl 52(%esp),%ecx + addl %ebp,%edx + # 40_59 45 + movl %esi,%ebp + xorl 60(%esp),%ecx + xorl %eax,%ebp + xorl 20(%esp),%ecx + andl %edi,%ebp + xorl 40(%esp),%ecx + roll $1,%ecx + addl %ebx,%ebp + rorl $2,%edi + movl %edx,%ebx + roll $5,%ebx + movl %ecx,52(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp + addl %ebx,%ecx + andl %eax,%ebp + movl 56(%esp),%ebx + addl %ebp,%ecx + # 40_59 46 + movl %edi,%ebp + xorl (%esp),%ebx + xorl %esi,%ebp + xorl 24(%esp),%ebx + andl %edx,%ebp + xorl 44(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,56(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 60(%esp),%eax + addl %ebp,%ebx + # 40_59 47 + movl %edx,%ebp + xorl 4(%esp),%eax + xorl %edi,%ebp + xorl 28(%esp),%eax + andl %ecx,%ebp + xorl 48(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,60(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl (%esp),%esi + addl %ebp,%eax + # 40_59 48 + movl %ecx,%ebp + xorl 8(%esp),%esi + xorl %edx,%ebp + xorl 32(%esp),%esi + andl %ebx,%ebp + xorl 52(%esp),%esi + roll $1,%esi + addl %edi,%ebp + rorl $2,%ebx + movl %eax,%edi + roll $5,%edi + movl %esi,(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp + addl %edi,%esi + andl %edx,%ebp + movl 4(%esp),%edi + addl %ebp,%esi + # 40_59 49 + movl %ebx,%ebp + xorl 12(%esp),%edi + xorl %ecx,%ebp + xorl 36(%esp),%edi + andl %eax,%ebp + xorl 56(%esp),%edi + roll $1,%edi + addl %edx,%ebp + rorl $2,%eax + movl %esi,%edx + roll $5,%edx + movl %edi,4(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp + addl %edx,%edi + andl %ecx,%ebp + movl 8(%esp),%edx + addl %ebp,%edi + # 40_59 50 + movl %eax,%ebp + xorl 16(%esp),%edx + xorl %ebx,%ebp + xorl 40(%esp),%edx + andl %esi,%ebp + xorl 60(%esp),%edx + roll $1,%edx + addl %ecx,%ebp + rorl $2,%esi + movl %edi,%ecx + roll $5,%ecx + movl %edx,8(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp + addl %ecx,%edx + andl %ebx,%ebp + movl 12(%esp),%ecx + addl %ebp,%edx + # 40_59 51 + movl %esi,%ebp + xorl 20(%esp),%ecx + xorl %eax,%ebp + xorl 44(%esp),%ecx + andl %edi,%ebp + xorl (%esp),%ecx + roll $1,%ecx + addl %ebx,%ebp + rorl $2,%edi + movl %edx,%ebx + roll $5,%ebx + movl %ecx,12(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp + addl %ebx,%ecx + andl %eax,%ebp + movl 16(%esp),%ebx + addl %ebp,%ecx + # 40_59 52 + movl %edi,%ebp + xorl 24(%esp),%ebx + xorl %esi,%ebp + xorl 48(%esp),%ebx + andl %edx,%ebp + xorl 4(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,16(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 20(%esp),%eax + addl %ebp,%ebx + # 40_59 53 + movl %edx,%ebp + xorl 28(%esp),%eax + xorl %edi,%ebp + xorl 52(%esp),%eax + andl %ecx,%ebp + xorl 8(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,20(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl 24(%esp),%esi + addl %ebp,%eax + # 40_59 54 + movl %ecx,%ebp + xorl 32(%esp),%esi + xorl %edx,%ebp + xorl 56(%esp),%esi + andl %ebx,%ebp + xorl 12(%esp),%esi + roll $1,%esi + addl %edi,%ebp + rorl $2,%ebx + movl %eax,%edi + roll $5,%edi + movl %esi,24(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp + addl %edi,%esi + andl %edx,%ebp + movl 28(%esp),%edi + addl %ebp,%esi + # 40_59 55 + movl %ebx,%ebp + xorl 36(%esp),%edi + xorl %ecx,%ebp + xorl 60(%esp),%edi + andl %eax,%ebp + xorl 16(%esp),%edi + roll $1,%edi + addl %edx,%ebp + rorl $2,%eax + movl %esi,%edx + roll $5,%edx + movl %edi,28(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp + addl %edx,%edi + andl %ecx,%ebp + movl 32(%esp),%edx + addl %ebp,%edi + # 40_59 56 + movl %eax,%ebp + xorl 40(%esp),%edx + xorl %ebx,%ebp + xorl (%esp),%edx + andl %esi,%ebp + xorl 20(%esp),%edx + roll $1,%edx + addl %ecx,%ebp + rorl $2,%esi + movl %edi,%ecx + roll $5,%ecx + movl %edx,32(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp + addl %ecx,%edx + andl %ebx,%ebp + movl 36(%esp),%ecx + addl %ebp,%edx + # 40_59 57 + movl %esi,%ebp + xorl 44(%esp),%ecx + xorl %eax,%ebp + xorl 4(%esp),%ecx + andl %edi,%ebp + xorl 24(%esp),%ecx + roll $1,%ecx + addl %ebx,%ebp + rorl $2,%edi + movl %edx,%ebx + roll $5,%ebx + movl %ecx,36(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp + addl %ebx,%ecx + andl %eax,%ebp + movl 40(%esp),%ebx + addl %ebp,%ecx + # 40_59 58 + movl %edi,%ebp + xorl 48(%esp),%ebx + xorl %esi,%ebp + xorl 8(%esp),%ebx + andl %edx,%ebp + xorl 28(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,40(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 44(%esp),%eax + addl %ebp,%ebx + # 40_59 59 + movl %edx,%ebp + xorl 52(%esp),%eax + xorl %edi,%ebp + xorl 12(%esp),%eax + andl %ecx,%ebp + xorl 32(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,44(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl 48(%esp),%esi + addl %ebp,%eax + # 20_39 60 + movl %ebx,%ebp + xorl 56(%esp),%esi + xorl %ecx,%ebp + xorl 16(%esp),%esi + xorl %edx,%ebp + xorl 36(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,48(%esp) + leal 3395469782(%esi,%edi,1),%esi + movl 52(%esp),%edi + addl %ebp,%esi + # 20_39 61 + movl %eax,%ebp + xorl 60(%esp),%edi + xorl %ebx,%ebp + xorl 20(%esp),%edi + xorl %ecx,%ebp + xorl 40(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,52(%esp) + leal 3395469782(%edi,%edx,1),%edi + movl 56(%esp),%edx + addl %ebp,%edi + # 20_39 62 + movl %esi,%ebp + xorl (%esp),%edx + xorl %eax,%ebp + xorl 24(%esp),%edx + xorl %ebx,%ebp + xorl 44(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,56(%esp) + leal 3395469782(%edx,%ecx,1),%edx + movl 60(%esp),%ecx + addl %ebp,%edx + # 20_39 63 + movl %edi,%ebp + xorl 4(%esp),%ecx + xorl %esi,%ebp + xorl 28(%esp),%ecx + xorl %eax,%ebp + xorl 48(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,60(%esp) + leal 3395469782(%ecx,%ebx,1),%ecx + movl (%esp),%ebx + addl %ebp,%ecx + # 20_39 64 + movl %edx,%ebp + xorl 8(%esp),%ebx + xorl %edi,%ebp + xorl 32(%esp),%ebx + xorl %esi,%ebp + xorl 52(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,(%esp) + leal 3395469782(%ebx,%eax,1),%ebx + movl 4(%esp),%eax + addl %ebp,%ebx + # 20_39 65 + movl %ecx,%ebp + xorl 12(%esp),%eax + xorl %edx,%ebp + xorl 36(%esp),%eax + xorl %edi,%ebp + xorl 56(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,4(%esp) + leal 3395469782(%eax,%esi,1),%eax + movl 8(%esp),%esi + addl %ebp,%eax + # 20_39 66 + movl %ebx,%ebp + xorl 16(%esp),%esi + xorl %ecx,%ebp + xorl 40(%esp),%esi + xorl %edx,%ebp + xorl 60(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,8(%esp) + leal 3395469782(%esi,%edi,1),%esi + movl 12(%esp),%edi + addl %ebp,%esi + # 20_39 67 + movl %eax,%ebp + xorl 20(%esp),%edi + xorl %ebx,%ebp + xorl 44(%esp),%edi + xorl %ecx,%ebp + xorl (%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,12(%esp) + leal 3395469782(%edi,%edx,1),%edi + movl 16(%esp),%edx + addl %ebp,%edi + # 20_39 68 + movl %esi,%ebp + xorl 24(%esp),%edx + xorl %eax,%ebp + xorl 48(%esp),%edx + xorl %ebx,%ebp + xorl 4(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,16(%esp) + leal 3395469782(%edx,%ecx,1),%edx + movl 20(%esp),%ecx + addl %ebp,%edx + # 20_39 69 + movl %edi,%ebp + xorl 28(%esp),%ecx + xorl %esi,%ebp + xorl 52(%esp),%ecx + xorl %eax,%ebp + xorl 8(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,20(%esp) + leal 3395469782(%ecx,%ebx,1),%ecx + movl 24(%esp),%ebx + addl %ebp,%ecx + # 20_39 70 + movl %edx,%ebp + xorl 32(%esp),%ebx + xorl %edi,%ebp + xorl 56(%esp),%ebx + xorl %esi,%ebp + xorl 12(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,24(%esp) + leal 3395469782(%ebx,%eax,1),%ebx + movl 28(%esp),%eax + addl %ebp,%ebx + # 20_39 71 + movl %ecx,%ebp + xorl 36(%esp),%eax + xorl %edx,%ebp + xorl 60(%esp),%eax + xorl %edi,%ebp + xorl 16(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,28(%esp) + leal 3395469782(%eax,%esi,1),%eax + movl 32(%esp),%esi + addl %ebp,%eax + # 20_39 72 + movl %ebx,%ebp + xorl 40(%esp),%esi + xorl %ecx,%ebp + xorl (%esp),%esi + xorl %edx,%ebp + xorl 20(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,32(%esp) + leal 3395469782(%esi,%edi,1),%esi + movl 36(%esp),%edi + addl %ebp,%esi + # 20_39 73 + movl %eax,%ebp + xorl 44(%esp),%edi + xorl %ebx,%ebp + xorl 4(%esp),%edi + xorl %ecx,%ebp + xorl 24(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,36(%esp) + leal 3395469782(%edi,%edx,1),%edi + movl 40(%esp),%edx + addl %ebp,%edi + # 20_39 74 + movl %esi,%ebp + xorl 48(%esp),%edx + xorl %eax,%ebp + xorl 8(%esp),%edx + xorl %ebx,%ebp + xorl 28(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,40(%esp) + leal 3395469782(%edx,%ecx,1),%edx + movl 44(%esp),%ecx + addl %ebp,%edx + # 20_39 75 + movl %edi,%ebp + xorl 52(%esp),%ecx + xorl %esi,%ebp + xorl 12(%esp),%ecx + xorl %eax,%ebp + xorl 32(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,44(%esp) + leal 3395469782(%ecx,%ebx,1),%ecx + movl 48(%esp),%ebx + addl %ebp,%ecx + # 20_39 76 + movl %edx,%ebp + xorl 56(%esp),%ebx + xorl %edi,%ebp + xorl 16(%esp),%ebx + xorl %esi,%ebp + xorl 36(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,48(%esp) + leal 3395469782(%ebx,%eax,1),%ebx + movl 52(%esp),%eax + addl %ebp,%ebx + # 20_39 77 + movl %ecx,%ebp + xorl 60(%esp),%eax + xorl %edx,%ebp + xorl 20(%esp),%eax + xorl %edi,%ebp + xorl 40(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + leal 3395469782(%eax,%esi,1),%eax + movl 56(%esp),%esi + addl %ebp,%eax + # 20_39 78 + movl %ebx,%ebp + xorl (%esp),%esi + xorl %ecx,%ebp + xorl 24(%esp),%esi + xorl %edx,%ebp + xorl 44(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + leal 3395469782(%esi,%edi,1),%esi + movl 60(%esp),%edi + addl %ebp,%esi + # 20_39 79 + movl %eax,%ebp + xorl 4(%esp),%edi + xorl %ebx,%ebp + xorl 28(%esp),%edi + xorl %ecx,%ebp + xorl 48(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + leal 3395469782(%edi,%edx,1),%edi + addl %ebp,%edi + movl 96(%esp),%ebp + movl 100(%esp),%edx + addl (%ebp),%edi + addl 4(%ebp),%esi + addl 8(%ebp),%eax + addl 12(%ebp),%ebx + addl 16(%ebp),%ecx + movl %edi,(%ebp) + addl $64,%edx + movl %esi,4(%ebp) + cmpl 104(%esp),%edx + movl %eax,8(%ebp) + movl %ecx,%edi + movl %ebx,12(%ebp) + movl %edx,%esi + movl %ecx,16(%ebp) + jb .L000loop + addl $76,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115 +.byte 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82 +.byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 +.byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/coff/sha1-ssse3-x86_64.s b/lib/accelerated/x86/coff/sha1-ssse3-x86_64.s new file mode 100644 index 0000000000..75868a42c6 --- /dev/null +++ b/lib/accelerated/x86/coff/sha1-ssse3-x86_64.s @@ -0,0 +1,2693 @@ +# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org> +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain copyright notices, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# * Neither the name of the Andy Polyakov nor the names of its +# copyright holder and contributors may be used to endorse or +# promote products derived from this software without specific +# prior written permission. +# +# ALTERNATIVELY, provided that this notice is retained in full, this +# product may be distributed under the terms of the GNU General Public +# License (GPL), in which case the provisions of the GPL apply INSTEAD OF +# those given above. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# *** This file is auto-generated *** +# +.text + + +.globl sha1_block_data_order +.def sha1_block_data_order; .scl 2; .type 32; .endef +.p2align 4 +sha1_block_data_order: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_sha1_block_data_order: + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + + movl _gnutls_x86_cpuid_s+0(%rip),%r9d + movl _gnutls_x86_cpuid_s+4(%rip),%r8d + movl _gnutls_x86_cpuid_s+8(%rip),%r10d + testl $512,%r8d + jz .Lialu + jmp _ssse3_shortcut + +.p2align 4 +.Lialu: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + movq %rsp,%r11 + movq %rdi,%r8 + subq $72,%rsp + movq %rsi,%r9 + andq $-64,%rsp + movq %rdx,%r10 + movq %r11,64(%rsp) +.Lprologue: + + movl 0(%r8),%esi + movl 4(%r8),%edi + movl 8(%r8),%r11d + movl 12(%r8),%r12d + movl 16(%r8),%r13d + jmp .Lloop + +.p2align 4 +.Lloop: + movl 0(%r9),%edx + bswapl %edx + movl %edx,0(%rsp) + movl %r11d,%eax + movl 4(%r9),%ebp + movl %esi,%ecx + xorl %r12d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r13,1),%r13d + andl %edi,%eax + movl %ebp,4(%rsp) + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl %edi,%eax + movl 8(%r9),%edx + movl %r13d,%ecx + xorl %r11d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r12,1),%r12d + andl %esi,%eax + movl %edx,8(%rsp) + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl %esi,%eax + movl 12(%r9),%ebp + movl %r12d,%ecx + xorl %edi,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r11,1),%r11d + andl %r13d,%eax + movl %ebp,12(%rsp) + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl %r13d,%eax + movl 16(%r9),%edx + movl %r11d,%ecx + xorl %esi,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%rdi,1),%edi + andl %r12d,%eax + movl %edx,16(%rsp) + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl %r12d,%eax + movl 20(%r9),%ebp + movl %edi,%ecx + xorl %r13d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%rsi,1),%esi + andl %r11d,%eax + movl %ebp,20(%rsp) + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl %r11d,%eax + movl 24(%r9),%edx + movl %esi,%ecx + xorl %r12d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r13,1),%r13d + andl %edi,%eax + movl %edx,24(%rsp) + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl %edi,%eax + movl 28(%r9),%ebp + movl %r13d,%ecx + xorl %r11d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r12,1),%r12d + andl %esi,%eax + movl %ebp,28(%rsp) + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl %esi,%eax + movl 32(%r9),%edx + movl %r12d,%ecx + xorl %edi,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r11,1),%r11d + andl %r13d,%eax + movl %edx,32(%rsp) + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl %r13d,%eax + movl 36(%r9),%ebp + movl %r11d,%ecx + xorl %esi,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%rdi,1),%edi + andl %r12d,%eax + movl %ebp,36(%rsp) + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl %r12d,%eax + movl 40(%r9),%edx + movl %edi,%ecx + xorl %r13d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%rsi,1),%esi + andl %r11d,%eax + movl %edx,40(%rsp) + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl %r11d,%eax + movl 44(%r9),%ebp + movl %esi,%ecx + xorl %r12d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r13,1),%r13d + andl %edi,%eax + movl %ebp,44(%rsp) + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl %edi,%eax + movl 48(%r9),%edx + movl %r13d,%ecx + xorl %r11d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r12,1),%r12d + andl %esi,%eax + movl %edx,48(%rsp) + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl %esi,%eax + movl 52(%r9),%ebp + movl %r12d,%ecx + xorl %edi,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r11,1),%r11d + andl %r13d,%eax + movl %ebp,52(%rsp) + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl %r13d,%eax + movl 56(%r9),%edx + movl %r11d,%ecx + xorl %esi,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%rdi,1),%edi + andl %r12d,%eax + movl %edx,56(%rsp) + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl %r12d,%eax + movl 60(%r9),%ebp + movl %edi,%ecx + xorl %r13d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%rsi,1),%esi + andl %r11d,%eax + movl %ebp,60(%rsp) + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl 0(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 8(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 32(%rsp),%edx + andl %edi,%eax + leal 1518500249(%rbp,%r13,1),%r13d + xorl 52(%rsp),%edx + xorl %r12d,%eax + roll $1,%edx + addl %ecx,%r13d + roll $30,%edi + movl %edx,0(%rsp) + addl %eax,%r13d + movl 4(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 12(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 36(%rsp),%ebp + andl %esi,%eax + leal 1518500249(%rdx,%r12,1),%r12d + xorl 56(%rsp),%ebp + xorl %r11d,%eax + roll $1,%ebp + addl %ecx,%r12d + roll $30,%esi + movl %ebp,4(%rsp) + addl %eax,%r12d + movl 8(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 16(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 40(%rsp),%edx + andl %r13d,%eax + leal 1518500249(%rbp,%r11,1),%r11d + xorl 60(%rsp),%edx + xorl %edi,%eax + roll $1,%edx + addl %ecx,%r11d + roll $30,%r13d + movl %edx,8(%rsp) + addl %eax,%r11d + movl 12(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 20(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 44(%rsp),%ebp + andl %r12d,%eax + leal 1518500249(%rdx,%rdi,1),%edi + xorl 0(%rsp),%ebp + xorl %esi,%eax + roll $1,%ebp + addl %ecx,%edi + roll $30,%r12d + movl %ebp,12(%rsp) + addl %eax,%edi + movl 16(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 24(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + xorl 48(%rsp),%edx + andl %r11d,%eax + leal 1518500249(%rbp,%rsi,1),%esi + xorl 4(%rsp),%edx + xorl %r13d,%eax + roll $1,%edx + addl %ecx,%esi + roll $30,%r11d + movl %edx,16(%rsp) + addl %eax,%esi + movl 20(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 28(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r13,1),%r13d + xorl 52(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 8(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,20(%rsp) + movl 24(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 32(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r12,1),%r12d + xorl 56(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 12(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,24(%rsp) + movl 28(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 36(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r11,1),%r11d + xorl 60(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 16(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,28(%rsp) + movl 32(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 40(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rdi,1),%edi + xorl 0(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 20(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,32(%rsp) + movl 36(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 44(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rsi,1),%esi + xorl 4(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 24(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,36(%rsp) + movl 40(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 48(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r13,1),%r13d + xorl 8(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 28(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,40(%rsp) + movl 44(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 52(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r12,1),%r12d + xorl 12(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 32(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl %ebp,44(%rsp) + movl 48(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 56(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r11,1),%r11d + xorl 16(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 36(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl %edx,48(%rsp) + movl 52(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 60(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rdi,1),%edi + xorl 20(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 40(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %ebp,52(%rsp) + movl 56(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 0(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rsi,1),%esi + xorl 24(%rsp),%edx + xorl %r13d,%eax + addl %ecx,%esi + xorl 44(%rsp),%edx + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + movl %edx,56(%rsp) + movl 60(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 4(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r13,1),%r13d + xorl 28(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 48(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,60(%rsp) + movl 0(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 8(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r12,1),%r12d + xorl 32(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 52(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,0(%rsp) + movl 4(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 12(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r11,1),%r11d + xorl 36(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 56(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,4(%rsp) + movl 8(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 16(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rdi,1),%edi + xorl 40(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 60(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,8(%rsp) + movl 12(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 20(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rsi,1),%esi + xorl 44(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 0(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,12(%rsp) + movl 16(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 24(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r13,1),%r13d + xorl 48(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 4(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,16(%rsp) + movl 20(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 28(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r12,1),%r12d + xorl 52(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 8(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl %ebp,20(%rsp) + movl 24(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 32(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r11,1),%r11d + xorl 56(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 12(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl %edx,24(%rsp) + movl 28(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 36(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rdi,1),%edi + xorl 60(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 16(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %ebp,28(%rsp) + movl 32(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 40(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rsi,1),%esi + xorl 0(%rsp),%edx + xorl %r13d,%eax + addl %ecx,%esi + xorl 20(%rsp),%edx + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + movl %edx,32(%rsp) + movl 36(%rsp),%ebp + movl %r11d,%eax + movl %r11d,%ebx + xorl 44(%rsp),%ebp + andl %r12d,%eax + movl %esi,%ecx + xorl 4(%rsp),%ebp + xorl %r12d,%ebx + leal -1894007588(%rdx,%r13,1),%r13d + roll $5,%ecx + xorl 24(%rsp),%ebp + addl %eax,%r13d + andl %edi,%ebx + roll $1,%ebp + addl %ebx,%r13d + roll $30,%edi + movl %ebp,36(%rsp) + addl %ecx,%r13d + movl 40(%rsp),%edx + movl %edi,%eax + movl %edi,%ebx + xorl 48(%rsp),%edx + andl %r11d,%eax + movl %r13d,%ecx + xorl 8(%rsp),%edx + xorl %r11d,%ebx + leal -1894007588(%rbp,%r12,1),%r12d + roll $5,%ecx + xorl 28(%rsp),%edx + addl %eax,%r12d + andl %esi,%ebx + roll $1,%edx + addl %ebx,%r12d + roll $30,%esi + movl %edx,40(%rsp) + addl %ecx,%r12d + movl 44(%rsp),%ebp + movl %esi,%eax + movl %esi,%ebx + xorl 52(%rsp),%ebp + andl %edi,%eax + movl %r12d,%ecx + xorl 12(%rsp),%ebp + xorl %edi,%ebx + leal -1894007588(%rdx,%r11,1),%r11d + roll $5,%ecx + xorl 32(%rsp),%ebp + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%ebp + addl %ebx,%r11d + roll $30,%r13d + movl %ebp,44(%rsp) + addl %ecx,%r11d + movl 48(%rsp),%edx + movl %r13d,%eax + movl %r13d,%ebx + xorl 56(%rsp),%edx + andl %esi,%eax + movl %r11d,%ecx + xorl 16(%rsp),%edx + xorl %esi,%ebx + leal -1894007588(%rbp,%rdi,1),%edi + roll $5,%ecx + xorl 36(%rsp),%edx + addl %eax,%edi + andl %r12d,%ebx + roll $1,%edx + addl %ebx,%edi + roll $30,%r12d + movl %edx,48(%rsp) + addl %ecx,%edi + movl 52(%rsp),%ebp + movl %r12d,%eax + movl %r12d,%ebx + xorl 60(%rsp),%ebp + andl %r13d,%eax + movl %edi,%ecx + xorl 20(%rsp),%ebp + xorl %r13d,%ebx + leal -1894007588(%rdx,%rsi,1),%esi + roll $5,%ecx + xorl 40(%rsp),%ebp + addl %eax,%esi + andl %r11d,%ebx + roll $1,%ebp + addl %ebx,%esi + roll $30,%r11d + movl %ebp,52(%rsp) + addl %ecx,%esi + movl 56(%rsp),%edx + movl %r11d,%eax + movl %r11d,%ebx + xorl 0(%rsp),%edx + andl %r12d,%eax + movl %esi,%ecx + xorl 24(%rsp),%edx + xorl %r12d,%ebx + leal -1894007588(%rbp,%r13,1),%r13d + roll $5,%ecx + xorl 44(%rsp),%edx + addl %eax,%r13d + andl %edi,%ebx + roll $1,%edx + addl %ebx,%r13d + roll $30,%edi + movl %edx,56(%rsp) + addl %ecx,%r13d + movl 60(%rsp),%ebp + movl %edi,%eax + movl %edi,%ebx + xorl 4(%rsp),%ebp + andl %r11d,%eax + movl %r13d,%ecx + xorl 28(%rsp),%ebp + xorl %r11d,%ebx + leal -1894007588(%rdx,%r12,1),%r12d + roll $5,%ecx + xorl 48(%rsp),%ebp + addl %eax,%r12d + andl %esi,%ebx + roll $1,%ebp + addl %ebx,%r12d + roll $30,%esi + movl %ebp,60(%rsp) + addl %ecx,%r12d + movl 0(%rsp),%edx + movl %esi,%eax + movl %esi,%ebx + xorl 8(%rsp),%edx + andl %edi,%eax + movl %r12d,%ecx + xorl 32(%rsp),%edx + xorl %edi,%ebx + leal -1894007588(%rbp,%r11,1),%r11d + roll $5,%ecx + xorl 52(%rsp),%edx + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%edx + addl %ebx,%r11d + roll $30,%r13d + movl %edx,0(%rsp) + addl %ecx,%r11d + movl 4(%rsp),%ebp + movl %r13d,%eax + movl %r13d,%ebx + xorl 12(%rsp),%ebp + andl %esi,%eax + movl %r11d,%ecx + xorl 36(%rsp),%ebp + xorl %esi,%ebx + leal -1894007588(%rdx,%rdi,1),%edi + roll $5,%ecx + xorl 56(%rsp),%ebp + addl %eax,%edi + andl %r12d,%ebx + roll $1,%ebp + addl %ebx,%edi + roll $30,%r12d + movl %ebp,4(%rsp) + addl %ecx,%edi + movl 8(%rsp),%edx + movl %r12d,%eax + movl %r12d,%ebx + xorl 16(%rsp),%edx + andl %r13d,%eax + movl %edi,%ecx + xorl 40(%rsp),%edx + xorl %r13d,%ebx + leal -1894007588(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl 60(%rsp),%edx + addl %eax,%esi + andl %r11d,%ebx + roll $1,%edx + addl %ebx,%esi + roll $30,%r11d + movl %edx,8(%rsp) + addl %ecx,%esi + movl 12(%rsp),%ebp + movl %r11d,%eax + movl %r11d,%ebx + xorl 20(%rsp),%ebp + andl %r12d,%eax + movl %esi,%ecx + xorl 44(%rsp),%ebp + xorl %r12d,%ebx + leal -1894007588(%rdx,%r13,1),%r13d + roll $5,%ecx + xorl 0(%rsp),%ebp + addl %eax,%r13d + andl %edi,%ebx + roll $1,%ebp + addl %ebx,%r13d + roll $30,%edi + movl %ebp,12(%rsp) + addl %ecx,%r13d + movl 16(%rsp),%edx + movl %edi,%eax + movl %edi,%ebx + xorl 24(%rsp),%edx + andl %r11d,%eax + movl %r13d,%ecx + xorl 48(%rsp),%edx + xorl %r11d,%ebx + leal -1894007588(%rbp,%r12,1),%r12d + roll $5,%ecx + xorl 4(%rsp),%edx + addl %eax,%r12d + andl %esi,%ebx + roll $1,%edx + addl %ebx,%r12d + roll $30,%esi + movl %edx,16(%rsp) + addl %ecx,%r12d + movl 20(%rsp),%ebp + movl %esi,%eax + movl %esi,%ebx + xorl 28(%rsp),%ebp + andl %edi,%eax + movl %r12d,%ecx + xorl 52(%rsp),%ebp + xorl %edi,%ebx + leal -1894007588(%rdx,%r11,1),%r11d + roll $5,%ecx + xorl 8(%rsp),%ebp + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%ebp + addl %ebx,%r11d + roll $30,%r13d + movl %ebp,20(%rsp) + addl %ecx,%r11d + movl 24(%rsp),%edx + movl %r13d,%eax + movl %r13d,%ebx + xorl 32(%rsp),%edx + andl %esi,%eax + movl %r11d,%ecx + xorl 56(%rsp),%edx + xorl %esi,%ebx + leal -1894007588(%rbp,%rdi,1),%edi + roll $5,%ecx + xorl 12(%rsp),%edx + addl %eax,%edi + andl %r12d,%ebx + roll $1,%edx + addl %ebx,%edi + roll $30,%r12d + movl %edx,24(%rsp) + addl %ecx,%edi + movl 28(%rsp),%ebp + movl %r12d,%eax + movl %r12d,%ebx + xorl 36(%rsp),%ebp + andl %r13d,%eax + movl %edi,%ecx + xorl 60(%rsp),%ebp + xorl %r13d,%ebx + leal -1894007588(%rdx,%rsi,1),%esi + roll $5,%ecx + xorl 16(%rsp),%ebp + addl %eax,%esi + andl %r11d,%ebx + roll $1,%ebp + addl %ebx,%esi + roll $30,%r11d + movl %ebp,28(%rsp) + addl %ecx,%esi + movl 32(%rsp),%edx + movl %r11d,%eax + movl %r11d,%ebx + xorl 40(%rsp),%edx + andl %r12d,%eax + movl %esi,%ecx + xorl 0(%rsp),%edx + xorl %r12d,%ebx + leal -1894007588(%rbp,%r13,1),%r13d + roll $5,%ecx + xorl 20(%rsp),%edx + addl %eax,%r13d + andl %edi,%ebx + roll $1,%edx + addl %ebx,%r13d + roll $30,%edi + movl %edx,32(%rsp) + addl %ecx,%r13d + movl 36(%rsp),%ebp + movl %edi,%eax + movl %edi,%ebx + xorl 44(%rsp),%ebp + andl %r11d,%eax + movl %r13d,%ecx + xorl 4(%rsp),%ebp + xorl %r11d,%ebx + leal -1894007588(%rdx,%r12,1),%r12d + roll $5,%ecx + xorl 24(%rsp),%ebp + addl %eax,%r12d + andl %esi,%ebx + roll $1,%ebp + addl %ebx,%r12d + roll $30,%esi + movl %ebp,36(%rsp) + addl %ecx,%r12d + movl 40(%rsp),%edx + movl %esi,%eax + movl %esi,%ebx + xorl 48(%rsp),%edx + andl %edi,%eax + movl %r12d,%ecx + xorl 8(%rsp),%edx + xorl %edi,%ebx + leal -1894007588(%rbp,%r11,1),%r11d + roll $5,%ecx + xorl 28(%rsp),%edx + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%edx + addl %ebx,%r11d + roll $30,%r13d + movl %edx,40(%rsp) + addl %ecx,%r11d + movl 44(%rsp),%ebp + movl %r13d,%eax + movl %r13d,%ebx + xorl 52(%rsp),%ebp + andl %esi,%eax + movl %r11d,%ecx + xorl 12(%rsp),%ebp + xorl %esi,%ebx + leal -1894007588(%rdx,%rdi,1),%edi + roll $5,%ecx + xorl 32(%rsp),%ebp + addl %eax,%edi + andl %r12d,%ebx + roll $1,%ebp + addl %ebx,%edi + roll $30,%r12d + movl %ebp,44(%rsp) + addl %ecx,%edi + movl 48(%rsp),%edx + movl %r12d,%eax + movl %r12d,%ebx + xorl 56(%rsp),%edx + andl %r13d,%eax + movl %edi,%ecx + xorl 16(%rsp),%edx + xorl %r13d,%ebx + leal -1894007588(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl 36(%rsp),%edx + addl %eax,%esi + andl %r11d,%ebx + roll $1,%edx + addl %ebx,%esi + roll $30,%r11d + movl %edx,48(%rsp) + addl %ecx,%esi + movl 52(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 60(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r13,1),%r13d + xorl 20(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 40(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,52(%rsp) + movl 56(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 0(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r12,1),%r12d + xorl 24(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 44(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,56(%rsp) + movl 60(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 4(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rdx,%r11,1),%r11d + xorl 28(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 48(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,60(%rsp) + movl 0(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 8(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rbp,%rdi,1),%edi + xorl 32(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 52(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,0(%rsp) + movl 4(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 12(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rsi,1),%esi + xorl 36(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 56(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,4(%rsp) + movl 8(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 16(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r13,1),%r13d + xorl 40(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 60(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,8(%rsp) + movl 12(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 20(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r12,1),%r12d + xorl 44(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 0(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl %ebp,12(%rsp) + movl 16(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 24(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rbp,%r11,1),%r11d + xorl 48(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 4(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl %edx,16(%rsp) + movl 20(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 28(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rdi,1),%edi + xorl 52(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 8(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %ebp,20(%rsp) + movl 24(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 32(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + leal -899497514(%rbp,%rsi,1),%esi + xorl 56(%rsp),%edx + xorl %r13d,%eax + addl %ecx,%esi + xorl 12(%rsp),%edx + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + movl %edx,24(%rsp) + movl 28(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 36(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r13,1),%r13d + xorl 60(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 16(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,28(%rsp) + movl 32(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 40(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r12,1),%r12d + xorl 0(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 20(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,32(%rsp) + movl 36(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 44(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rdx,%r11,1),%r11d + xorl 4(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 24(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,36(%rsp) + movl 40(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 48(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rbp,%rdi,1),%edi + xorl 8(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 28(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,40(%rsp) + movl 44(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 52(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rsi,1),%esi + xorl 12(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 32(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,44(%rsp) + movl 48(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 56(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r13,1),%r13d + xorl 16(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 36(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,48(%rsp) + movl 52(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 60(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r12,1),%r12d + xorl 20(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 40(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl 56(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 0(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rbp,%r11,1),%r11d + xorl 24(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 44(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl 60(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 4(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rdi,1),%edi + xorl 28(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 48(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl %r11d,%eax + leal -899497514(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl %r13d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + addl 0(%r8),%esi + addl 4(%r8),%edi + addl 8(%r8),%r11d + addl 12(%r8),%r12d + addl 16(%r8),%r13d + movl %esi,0(%r8) + movl %edi,4(%r8) + movl %r11d,8(%r8) + movl %r12d,12(%r8) + movl %r13d,16(%r8) + + subq $1,%r10 + leaq 64(%r9),%r9 + jnz .Lloop + + movq 64(%rsp),%rsi + movq (%rsi),%r13 + movq 8(%rsi),%r12 + movq 16(%rsi),%rbp + movq 24(%rsi),%rbx + leaq 32(%rsi),%rsp +.Lepilogue: + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + .byte 0xf3,0xc3 +.LSEH_end_sha1_block_data_order: +.def sha1_block_data_order_ssse3; .scl 3; .type 32; .endef +.p2align 4 +sha1_block_data_order_ssse3: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_sha1_block_data_order_ssse3: + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + +_ssse3_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + leaq -160(%rsp),%rsp + movaps %xmm6,64+0(%rsp) + movaps %xmm7,64+16(%rsp) + movaps %xmm8,64+32(%rsp) + movaps %xmm9,64+48(%rsp) + movaps %xmm10,64+64(%rsp) + movaps %xmm11,64+80(%rsp) +.Lprologue_ssse3: + movq %rdi,%r8 + movq %rsi,%r9 + movq %rdx,%r10 + + shlq $6,%r10 + addq %r9,%r10 + leaq K_XX_XX+64(%rip),%r11 + + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movl %ebx,%esi + movl 16(%r8),%ebp + movl %ecx,%edi + xorl %edx,%edi + andl %edi,%esi + + movdqa 64(%r11),%xmm6 + movdqa -64(%r11),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r9 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 +.byte 102,15,56,0,222 + paddd %xmm9,%xmm0 + paddd %xmm9,%xmm1 + paddd %xmm9,%xmm2 + movdqa %xmm0,0(%rsp) + psubd %xmm9,%xmm0 + movdqa %xmm1,16(%rsp) + psubd %xmm9,%xmm1 + movdqa %xmm2,32(%rsp) + psubd %xmm9,%xmm2 + jmp .Loop_ssse3 +.p2align 4 +.Loop_ssse3: + movdqa %xmm1,%xmm4 + rorl $2,%ebx + xorl %edx,%esi + movdqa %xmm3,%xmm8 +.byte 102,15,58,15,224,8 + movl %eax,%edi + addl 0(%rsp),%ebp + paddd %xmm3,%xmm9 + xorl %ecx,%ebx + roll $5,%eax + psrldq $4,%xmm8 + addl %esi,%ebp + andl %ebx,%edi + pxor %xmm0,%xmm4 + xorl %ecx,%ebx + addl %eax,%ebp + pxor %xmm2,%xmm8 + rorl $7,%eax + xorl %ecx,%edi + movl %ebp,%esi + addl 4(%rsp),%edx + pxor %xmm8,%xmm4 + xorl %ebx,%eax + roll $5,%ebp + movdqa %xmm9,48(%rsp) + addl %edi,%edx + andl %eax,%esi + movdqa %xmm4,%xmm10 + movdqa %xmm4,%xmm8 + xorl %ebx,%eax + addl %ebp,%edx + rorl $7,%ebp + xorl %ebx,%esi + pslldq $12,%xmm10 + paddd %xmm4,%xmm4 + movl %edx,%edi + addl 8(%rsp),%ecx + xorl %eax,%ebp + roll $5,%edx + psrld $31,%xmm8 + addl %esi,%ecx + andl %ebp,%edi + movdqa %xmm10,%xmm9 + xorl %eax,%ebp + addl %edx,%ecx + psrld $30,%xmm10 + por %xmm8,%xmm4 + rorl $7,%edx + xorl %eax,%edi + movl %ecx,%esi + addl 12(%rsp),%ebx + pslld $2,%xmm9 + pxor %xmm10,%xmm4 + xorl %ebp,%edx + roll $5,%ecx + movdqa -64(%r11),%xmm10 + addl %edi,%ebx + andl %edx,%esi + pxor %xmm9,%xmm4 + xorl %ebp,%edx + addl %ecx,%ebx + movdqa %xmm2,%xmm5 + rorl $7,%ecx + xorl %ebp,%esi + movdqa %xmm4,%xmm9 +.byte 102,15,58,15,233,8 + movl %ebx,%edi + addl 16(%rsp),%eax + paddd %xmm4,%xmm10 + xorl %edx,%ecx + roll $5,%ebx + psrldq $4,%xmm9 + addl %esi,%eax + andl %ecx,%edi + pxor %xmm1,%xmm5 + xorl %edx,%ecx + addl %ebx,%eax + pxor %xmm3,%xmm9 + rorl $7,%ebx + xorl %edx,%edi + movl %eax,%esi + addl 20(%rsp),%ebp + pxor %xmm9,%xmm5 + xorl %ecx,%ebx + roll $5,%eax + movdqa %xmm10,0(%rsp) + addl %edi,%ebp + andl %ebx,%esi + movdqa %xmm5,%xmm8 + movdqa %xmm5,%xmm9 + xorl %ecx,%ebx + addl %eax,%ebp + rorl $7,%eax + xorl %ecx,%esi + pslldq $12,%xmm8 + paddd %xmm5,%xmm5 + movl %ebp,%edi + addl 24(%rsp),%edx + xorl %ebx,%eax + roll $5,%ebp + psrld $31,%xmm9 + addl %esi,%edx + andl %eax,%edi + movdqa %xmm8,%xmm10 + xorl %ebx,%eax + addl %ebp,%edx + psrld $30,%xmm8 + por %xmm9,%xmm5 + rorl $7,%ebp + xorl %ebx,%edi + movl %edx,%esi + addl 28(%rsp),%ecx + pslld $2,%xmm10 + pxor %xmm8,%xmm5 + xorl %eax,%ebp + roll $5,%edx + movdqa -32(%r11),%xmm8 + addl %edi,%ecx + andl %ebp,%esi + pxor %xmm10,%xmm5 + xorl %eax,%ebp + addl %edx,%ecx + movdqa %xmm3,%xmm6 + rorl $7,%edx + xorl %eax,%esi + movdqa %xmm5,%xmm10 +.byte 102,15,58,15,242,8 + movl %ecx,%edi + addl 32(%rsp),%ebx + paddd %xmm5,%xmm8 + xorl %ebp,%edx + roll $5,%ecx + psrldq $4,%xmm10 + addl %esi,%ebx + andl %edx,%edi + pxor %xmm2,%xmm6 + xorl %ebp,%edx + addl %ecx,%ebx + pxor %xmm4,%xmm10 + rorl $7,%ecx + xorl %ebp,%edi + movl %ebx,%esi + addl 36(%rsp),%eax + pxor %xmm10,%xmm6 + xorl %edx,%ecx + roll $5,%ebx + movdqa %xmm8,16(%rsp) + addl %edi,%eax + andl %ecx,%esi + movdqa %xmm6,%xmm9 + movdqa %xmm6,%xmm10 + xorl %edx,%ecx + addl %ebx,%eax + rorl $7,%ebx + xorl %edx,%esi + pslldq $12,%xmm9 + paddd %xmm6,%xmm6 + movl %eax,%edi + addl 40(%rsp),%ebp + xorl %ecx,%ebx + roll $5,%eax + psrld $31,%xmm10 + addl %esi,%ebp + andl %ebx,%edi + movdqa %xmm9,%xmm8 + xorl %ecx,%ebx + addl %eax,%ebp + psrld $30,%xmm9 + por %xmm10,%xmm6 + rorl $7,%eax + xorl %ecx,%edi + movl %ebp,%esi + addl 44(%rsp),%edx + pslld $2,%xmm8 + pxor %xmm9,%xmm6 + xorl %ebx,%eax + roll $5,%ebp + movdqa -32(%r11),%xmm9 + addl %edi,%edx + andl %eax,%esi + pxor %xmm8,%xmm6 + xorl %ebx,%eax + addl %ebp,%edx + movdqa %xmm4,%xmm7 + rorl $7,%ebp + xorl %ebx,%esi + movdqa %xmm6,%xmm8 +.byte 102,15,58,15,251,8 + movl %edx,%edi + addl 48(%rsp),%ecx + paddd %xmm6,%xmm9 + xorl %eax,%ebp + roll $5,%edx + psrldq $4,%xmm8 + addl %esi,%ecx + andl %ebp,%edi + pxor %xmm3,%xmm7 + xorl %eax,%ebp + addl %edx,%ecx + pxor %xmm5,%xmm8 + rorl $7,%edx + xorl %eax,%edi + movl %ecx,%esi + addl 52(%rsp),%ebx + pxor %xmm8,%xmm7 + xorl %ebp,%edx + roll $5,%ecx + movdqa %xmm9,32(%rsp) + addl %edi,%ebx + andl %edx,%esi + movdqa %xmm7,%xmm10 + movdqa %xmm7,%xmm8 + xorl %ebp,%edx + addl %ecx,%ebx + rorl $7,%ecx + xorl %ebp,%esi + pslldq $12,%xmm10 + paddd %xmm7,%xmm7 + movl %ebx,%edi + addl 56(%rsp),%eax + xorl %edx,%ecx + roll $5,%ebx + psrld $31,%xmm8 + addl %esi,%eax + andl %ecx,%edi + movdqa %xmm10,%xmm9 + xorl %edx,%ecx + addl %ebx,%eax + psrld $30,%xmm10 + por %xmm8,%xmm7 + rorl $7,%ebx + xorl %edx,%edi + movl %eax,%esi + addl 60(%rsp),%ebp + pslld $2,%xmm9 + pxor %xmm10,%xmm7 + xorl %ecx,%ebx + roll $5,%eax + movdqa -32(%r11),%xmm10 + addl %edi,%ebp + andl %ebx,%esi + pxor %xmm9,%xmm7 + xorl %ecx,%ebx + addl %eax,%ebp + movdqa %xmm7,%xmm9 + rorl $7,%eax + pxor %xmm4,%xmm0 +.byte 102,68,15,58,15,206,8 + xorl %ecx,%esi + movl %ebp,%edi + addl 0(%rsp),%edx + pxor %xmm1,%xmm0 + xorl %ebx,%eax + roll $5,%ebp + movdqa %xmm10,%xmm8 + paddd %xmm7,%xmm10 + addl %esi,%edx + andl %eax,%edi + pxor %xmm9,%xmm0 + xorl %ebx,%eax + addl %ebp,%edx + rorl $7,%ebp + xorl %ebx,%edi + movdqa %xmm0,%xmm9 + movdqa %xmm10,48(%rsp) + movl %edx,%esi + addl 4(%rsp),%ecx + xorl %eax,%ebp + roll $5,%edx + pslld $2,%xmm0 + addl %edi,%ecx + andl %ebp,%esi + psrld $30,%xmm9 + xorl %eax,%ebp + addl %edx,%ecx + rorl $7,%edx + xorl %eax,%esi + movl %ecx,%edi + addl 8(%rsp),%ebx + por %xmm9,%xmm0 + xorl %ebp,%edx + roll $5,%ecx + movdqa %xmm0,%xmm10 + addl %esi,%ebx + andl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 16(%rsp),%ebp + pxor %xmm5,%xmm1 +.byte 102,68,15,58,15,215,8 + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + pxor %xmm2,%xmm1 + addl %esi,%ebp + xorl %ecx,%edi + movdqa %xmm8,%xmm9 + paddd %xmm0,%xmm8 + rorl $7,%ebx + addl %eax,%ebp + pxor %xmm10,%xmm1 + addl 20(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm1,%xmm10 + movdqa %xmm8,0(%rsp) + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + pslld $2,%xmm1 + addl 24(%rsp),%ecx + xorl %eax,%esi + psrld $30,%xmm10 + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + addl %edx,%ecx + por %xmm10,%xmm1 + addl 28(%rsp),%ebx + xorl %ebp,%edi + movdqa %xmm1,%xmm8 + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 32(%rsp),%eax + pxor %xmm6,%xmm2 +.byte 102,68,15,58,15,192,8 + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + pxor %xmm3,%xmm2 + addl %esi,%eax + xorl %edx,%edi + movdqa 0(%r11),%xmm10 + paddd %xmm1,%xmm9 + rorl $7,%ecx + addl %ebx,%eax + pxor %xmm8,%xmm2 + addl 36(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + movdqa %xmm2,%xmm8 + movdqa %xmm9,16(%rsp) + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + pslld $2,%xmm2 + addl 40(%rsp),%edx + xorl %ebx,%esi + psrld $30,%xmm8 + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + por %xmm8,%xmm2 + addl 44(%rsp),%ecx + xorl %eax,%edi + movdqa %xmm2,%xmm9 + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + addl 48(%rsp),%ebx + pxor %xmm7,%xmm3 +.byte 102,68,15,58,15,201,8 + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + pxor %xmm4,%xmm3 + addl %esi,%ebx + xorl %ebp,%edi + movdqa %xmm10,%xmm8 + paddd %xmm2,%xmm10 + rorl $7,%edx + addl %ecx,%ebx + pxor %xmm9,%xmm3 + addl 52(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm3,%xmm9 + movdqa %xmm10,32(%rsp) + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + pslld $2,%xmm3 + addl 56(%rsp),%ebp + xorl %ecx,%esi + psrld $30,%xmm9 + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + por %xmm9,%xmm3 + addl 60(%rsp),%edx + xorl %ebx,%edi + movdqa %xmm3,%xmm10 + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 0(%rsp),%ecx + pxor %xmm0,%xmm4 +.byte 102,68,15,58,15,210,8 + xorl %eax,%esi + movl %edx,%edi + roll $5,%edx + pxor %xmm5,%xmm4 + addl %esi,%ecx + xorl %eax,%edi + movdqa %xmm8,%xmm9 + paddd %xmm3,%xmm8 + rorl $7,%ebp + addl %edx,%ecx + pxor %xmm10,%xmm4 + addl 4(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm4,%xmm10 + movdqa %xmm8,48(%rsp) + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + pslld $2,%xmm4 + addl 8(%rsp),%eax + xorl %edx,%esi + psrld $30,%xmm10 + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + por %xmm10,%xmm4 + addl 12(%rsp),%ebp + xorl %ecx,%edi + movdqa %xmm4,%xmm8 + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 16(%rsp),%edx + pxor %xmm1,%xmm5 +.byte 102,68,15,58,15,195,8 + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + pxor %xmm6,%xmm5 + addl %esi,%edx + xorl %ebx,%edi + movdqa %xmm9,%xmm10 + paddd %xmm4,%xmm9 + rorl $7,%eax + addl %ebp,%edx + pxor %xmm8,%xmm5 + addl 20(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + movdqa %xmm5,%xmm8 + movdqa %xmm9,0(%rsp) + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + pslld $2,%xmm5 + addl 24(%rsp),%ebx + xorl %ebp,%esi + psrld $30,%xmm8 + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + por %xmm8,%xmm5 + addl 28(%rsp),%eax + movdqa %xmm5,%xmm9 + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%edi + roll $5,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + addl 32(%rsp),%ebp + pxor %xmm2,%xmm6 +.byte 102,68,15,58,15,204,8 + andl %ecx,%esi + xorl %edx,%ecx + rorl $7,%ebx + pxor %xmm7,%xmm6 + movl %eax,%edi + xorl %ecx,%esi + movdqa %xmm10,%xmm8 + paddd %xmm5,%xmm10 + roll $5,%eax + addl %esi,%ebp + pxor %xmm9,%xmm6 + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 36(%rsp),%edx + movdqa %xmm6,%xmm9 + movdqa %xmm10,16(%rsp) + andl %ebx,%edi + xorl %ecx,%ebx + rorl $7,%eax + movl %ebp,%esi + pslld $2,%xmm6 + xorl %ebx,%edi + roll $5,%ebp + psrld $30,%xmm9 + addl %edi,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + addl 40(%rsp),%ecx + andl %eax,%esi + por %xmm9,%xmm6 + xorl %ebx,%eax + rorl $7,%ebp + movdqa %xmm6,%xmm10 + movl %edx,%edi + xorl %eax,%esi + roll $5,%edx + addl %esi,%ecx + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 44(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + rorl $7,%edx + movl %ecx,%esi + xorl %ebp,%edi + roll $5,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + addl 48(%rsp),%eax + pxor %xmm3,%xmm7 +.byte 102,68,15,58,15,213,8 + andl %edx,%esi + xorl %ebp,%edx + rorl $7,%ecx + pxor %xmm0,%xmm7 + movl %ebx,%edi + xorl %edx,%esi + movdqa 32(%r11),%xmm9 + paddd %xmm6,%xmm8 + roll $5,%ebx + addl %esi,%eax + pxor %xmm10,%xmm7 + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 52(%rsp),%ebp + movdqa %xmm7,%xmm10 + movdqa %xmm8,32(%rsp) + andl %ecx,%edi + xorl %edx,%ecx + rorl $7,%ebx + movl %eax,%esi + pslld $2,%xmm7 + xorl %ecx,%edi + roll $5,%eax + psrld $30,%xmm10 + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + addl 56(%rsp),%edx + andl %ebx,%esi + por %xmm10,%xmm7 + xorl %ecx,%ebx + rorl $7,%eax + movdqa %xmm7,%xmm8 + movl %ebp,%edi + xorl %ebx,%esi + roll $5,%ebp + addl %esi,%edx + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 60(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + rorl $7,%ebp + movl %edx,%esi + xorl %eax,%edi + roll $5,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + addl 0(%rsp),%ebx + pxor %xmm4,%xmm0 +.byte 102,68,15,58,15,198,8 + andl %ebp,%esi + xorl %eax,%ebp + rorl $7,%edx + pxor %xmm1,%xmm0 + movl %ecx,%edi + xorl %ebp,%esi + movdqa %xmm9,%xmm10 + paddd %xmm7,%xmm9 + roll $5,%ecx + addl %esi,%ebx + pxor %xmm8,%xmm0 + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 4(%rsp),%eax + movdqa %xmm0,%xmm8 + movdqa %xmm9,48(%rsp) + andl %edx,%edi + xorl %ebp,%edx + rorl $7,%ecx + movl %ebx,%esi + pslld $2,%xmm0 + xorl %edx,%edi + roll $5,%ebx + psrld $30,%xmm8 + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + addl 8(%rsp),%ebp + andl %ecx,%esi + por %xmm8,%xmm0 + xorl %edx,%ecx + rorl $7,%ebx + movdqa %xmm0,%xmm9 + movl %eax,%edi + xorl %ecx,%esi + roll $5,%eax + addl %esi,%ebp + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 12(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + rorl $7,%eax + movl %ebp,%esi + xorl %ebx,%edi + roll $5,%ebp + addl %edi,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + addl 16(%rsp),%ecx + pxor %xmm5,%xmm1 +.byte 102,68,15,58,15,207,8 + andl %eax,%esi + xorl %ebx,%eax + rorl $7,%ebp + pxor %xmm2,%xmm1 + movl %edx,%edi + xorl %eax,%esi + movdqa %xmm10,%xmm8 + paddd %xmm0,%xmm10 + roll $5,%edx + addl %esi,%ecx + pxor %xmm9,%xmm1 + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 20(%rsp),%ebx + movdqa %xmm1,%xmm9 + movdqa %xmm10,0(%rsp) + andl %ebp,%edi + xorl %eax,%ebp + rorl $7,%edx + movl %ecx,%esi + pslld $2,%xmm1 + xorl %ebp,%edi + roll $5,%ecx + psrld $30,%xmm9 + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + addl 24(%rsp),%eax + andl %edx,%esi + por %xmm9,%xmm1 + xorl %ebp,%edx + rorl $7,%ecx + movdqa %xmm1,%xmm10 + movl %ebx,%edi + xorl %edx,%esi + roll $5,%ebx + addl %esi,%eax + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 28(%rsp),%ebp + andl %ecx,%edi + xorl %edx,%ecx + rorl $7,%ebx + movl %eax,%esi + xorl %ecx,%edi + roll $5,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + addl 32(%rsp),%edx + pxor %xmm6,%xmm2 +.byte 102,68,15,58,15,208,8 + andl %ebx,%esi + xorl %ecx,%ebx + rorl $7,%eax + pxor %xmm3,%xmm2 + movl %ebp,%edi + xorl %ebx,%esi + movdqa %xmm8,%xmm9 + paddd %xmm1,%xmm8 + roll $5,%ebp + addl %esi,%edx + pxor %xmm10,%xmm2 + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 36(%rsp),%ecx + movdqa %xmm2,%xmm10 + movdqa %xmm8,16(%rsp) + andl %eax,%edi + xorl %ebx,%eax + rorl $7,%ebp + movl %edx,%esi + pslld $2,%xmm2 + xorl %eax,%edi + roll $5,%edx + psrld $30,%xmm10 + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + addl 40(%rsp),%ebx + andl %ebp,%esi + por %xmm10,%xmm2 + xorl %eax,%ebp + rorl $7,%edx + movdqa %xmm2,%xmm8 + movl %ecx,%edi + xorl %ebp,%esi + roll $5,%ecx + addl %esi,%ebx + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 44(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%edi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + addl %ebx,%eax + addl 48(%rsp),%ebp + pxor %xmm7,%xmm3 +.byte 102,68,15,58,15,193,8 + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + pxor %xmm4,%xmm3 + addl %esi,%ebp + xorl %ecx,%edi + movdqa %xmm9,%xmm10 + paddd %xmm2,%xmm9 + rorl $7,%ebx + addl %eax,%ebp + pxor %xmm8,%xmm3 + addl 52(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm3,%xmm8 + movdqa %xmm9,32(%rsp) + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + pslld $2,%xmm3 + addl 56(%rsp),%ecx + xorl %eax,%esi + psrld $30,%xmm8 + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + addl %edx,%ecx + por %xmm8,%xmm3 + addl 60(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 0(%rsp),%eax + paddd %xmm3,%xmm10 + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + movdqa %xmm10,48(%rsp) + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 4(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 8(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 12(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + cmpq %r10,%r9 + je .Ldone_ssse3 + movdqa 64(%r11),%xmm6 + movdqa -64(%r11),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r9 + addl 16(%rsp),%ebx + xorl %ebp,%esi +.byte 102,15,56,0,206 + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + paddd %xmm9,%xmm0 + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 20(%rsp),%eax + movdqa %xmm0,0(%rsp) + xorl %edx,%edi + movl %ebx,%esi + psubd %xmm9,%xmm0 + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi +.byte 102,15,56,0,214 + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + paddd %xmm9,%xmm1 + xorl %eax,%edi + rorl $7,%ebp + addl %edx,%ecx + addl 36(%rsp),%ebx + movdqa %xmm1,16(%rsp) + xorl %ebp,%edi + movl %ecx,%esi + psubd %xmm9,%xmm1 + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi +.byte 102,15,56,0,222 + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + paddd %xmm9,%xmm2 + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 52(%rsp),%ecx + movdqa %xmm2,32(%rsp) + xorl %eax,%edi + movl %edx,%esi + psubd %xmm9,%xmm2 + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + addl 12(%r8),%edx + movl %eax,0(%r8) + addl 16(%r8),%ebp + movl %esi,4(%r8) + movl %esi,%ebx + movl %ecx,8(%r8) + movl %ecx,%edi + movl %edx,12(%r8) + xorl %edx,%edi + movl %ebp,16(%r8) + andl %edi,%esi + jmp .Loop_ssse3 + +.p2align 4 +.Ldone_ssse3: + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + movl %eax,0(%r8) + addl 12(%r8),%edx + movl %esi,4(%r8) + addl 16(%r8),%ebp + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + movaps 64+0(%rsp),%xmm6 + movaps 64+16(%rsp),%xmm7 + movaps 64+32(%rsp),%xmm8 + movaps 64+48(%rsp),%xmm9 + movaps 64+64(%rsp),%xmm10 + movaps 64+80(%rsp),%xmm11 + leaq 160(%rsp),%rsi + movq 0(%rsi),%r12 + movq 8(%rsi),%rbp + movq 16(%rsi),%rbx + leaq 24(%rsi),%rsp +.Lepilogue_ssse3: + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + .byte 0xf3,0xc3 +.LSEH_end_sha1_block_data_order_ssse3: +.p2align 6 +K_XX_XX: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 6 + +.def se_handler; .scl 3; .type 32; .endef +.p2align 4 +se_handler: + pushq %rsi + pushq %rdi + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushfq + subq $64,%rsp + + movq 120(%r8),%rax + movq 248(%r8),%rbx + + leaq .Lprologue(%rip),%r10 + cmpq %r10,%rbx + jb .Lcommon_seh_tail + + movq 152(%r8),%rax + + leaq .Lepilogue(%rip),%r10 + cmpq %r10,%rbx + jae .Lcommon_seh_tail + + movq 64(%rax),%rax + leaq 32(%rax),%rax + + movq -8(%rax),%rbx + movq -16(%rax),%rbp + movq -24(%rax),%r12 + movq -32(%rax),%r13 + movq %rbx,144(%r8) + movq %rbp,160(%r8) + movq %r12,216(%r8) + movq %r13,224(%r8) + + jmp .Lcommon_seh_tail + + +.def ssse3_handler; .scl 3; .type 32; .endef +.p2align 4 +ssse3_handler: + pushq %rsi + pushq %rdi + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushfq + subq $64,%rsp + + movq 120(%r8),%rax + movq 248(%r8),%rbx + + movq 8(%r9),%rsi + movq 56(%r9),%r11 + + movl 0(%r11),%r10d + leaq (%rsi,%r10,1),%r10 + cmpq %r10,%rbx + jb .Lcommon_seh_tail + + movq 152(%r8),%rax + + movl 4(%r11),%r10d + leaq (%rsi,%r10,1),%r10 + cmpq %r10,%rbx + jae .Lcommon_seh_tail + + leaq 64(%rax),%rsi + leaq 512(%r8),%rdi + movl $12,%ecx +.long 0xa548f3fc + leaq 184(%rax),%rax + + movq -8(%rax),%rbx + movq -16(%rax),%rbp + movq -24(%rax),%r12 + movq %rbx,144(%r8) + movq %rbp,160(%r8) + movq %r12,216(%r8) + +.Lcommon_seh_tail: + movq 8(%rax),%rdi + movq 16(%rax),%rsi + movq %rax,152(%r8) + movq %rsi,168(%r8) + movq %rdi,176(%r8) + + movq 40(%r9),%rdi + movq %r8,%rsi + movl $154,%ecx +.long 0xa548f3fc + + movq %r9,%rsi + xorq %rcx,%rcx + movq 8(%rsi),%rdx + movq 0(%rsi),%r8 + movq 16(%rsi),%r9 + movq 40(%rsi),%r10 + leaq 56(%rsi),%r11 + leaq 24(%rsi),%r12 + movq %r10,32(%rsp) + movq %r11,40(%rsp) + movq %r12,48(%rsp) + movq %rcx,56(%rsp) + call *__imp_RtlVirtualUnwind(%rip) + + movl $1,%eax + addq $64,%rsp + popfq + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + popq %rdi + popq %rsi + .byte 0xf3,0xc3 + + +.section .pdata +.p2align 2 +.rva .LSEH_begin_sha1_block_data_order +.rva .LSEH_end_sha1_block_data_order +.rva .LSEH_info_sha1_block_data_order +.rva .LSEH_begin_sha1_block_data_order_ssse3 +.rva .LSEH_end_sha1_block_data_order_ssse3 +.rva .LSEH_info_sha1_block_data_order_ssse3 +.section .xdata +.p2align 3 +.LSEH_info_sha1_block_data_order: +.byte 9,0,0,0 +.rva se_handler +.LSEH_info_sha1_block_data_order_ssse3: +.byte 9,0,0,0 +.rva ssse3_handler +.rva .Lprologue_ssse3,.Lepilogue_ssse3 + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/coff/sha256-avx-x86_64.s b/lib/accelerated/x86/coff/sha256-avx-x86_64.s new file mode 100644 index 0000000000..2198b8e187 --- /dev/null +++ b/lib/accelerated/x86/coff/sha256-avx-x86_64.s @@ -0,0 +1,2645 @@ +# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org> +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain copyright notices, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# * Neither the name of the Andy Polyakov nor the names of its +# copyright holder and contributors may be used to endorse or +# promote products derived from this software without specific +# prior written permission. +# +# ALTERNATIVELY, provided that this notice is retained in full, this +# product may be distributed under the terms of the GNU General Public +# License (GPL), in which case the provisions of the GPL apply INSTEAD OF +# those given above. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# *** This file is auto-generated *** +# +.text + + + +.globl sha256_multi_block +.def sha256_multi_block; .scl 2; .type 32; .endef +.p2align 5 +sha256_multi_block: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_sha256_multi_block: + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + + movq %rsp,%rax + pushq %rbx + pushq %rbp + leaq -168(%rsp),%rsp + movaps %xmm6,(%rsp) + movaps %xmm7,16(%rsp) + movaps %xmm8,32(%rsp) + movaps %xmm9,48(%rsp) + movaps %xmm10,-120(%rax) + movaps %xmm11,-104(%rax) + movaps %xmm12,-88(%rax) + movaps %xmm13,-72(%rax) + movaps %xmm14,-56(%rax) + movaps %xmm15,-40(%rax) + subq $288,%rsp + andq $-256,%rsp + movq %rax,272(%rsp) + leaq K256+128(%rip),%rbp + leaq 256(%rsp),%rbx + leaq 128(%rdi),%rdi + +.Loop_grande: + movl %edx,280(%rsp) + xorl %edx,%edx + movq 0(%rsi),%r8 + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rbp,%r8 + movq 16(%rsi),%r9 + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rbp,%r9 + movq 32(%rsi),%r10 + movl 40(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,8(%rbx) + cmovleq %rbp,%r10 + movq 48(%rsi),%r11 + movl 56(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,12(%rbx) + cmovleq %rbp,%r11 + testl %edx,%edx + jz .Ldone + + movdqu 0-128(%rdi),%xmm8 + leaq 128(%rsp),%rax + movdqu 32-128(%rdi),%xmm9 + movdqu 64-128(%rdi),%xmm10 + movdqu 96-128(%rdi),%xmm11 + movdqu 128-128(%rdi),%xmm12 + movdqu 160-128(%rdi),%xmm13 + movdqu 192-128(%rdi),%xmm14 + movdqu 224-128(%rdi),%xmm15 + movdqu .Lpbswap(%rip),%xmm6 + jmp .Loop + +.p2align 5 +.Loop: + movdqa %xmm10,%xmm4 + pxor %xmm9,%xmm4 + movd 0(%r8),%xmm5 + movd 0(%r9),%xmm0 + movd 0(%r10),%xmm1 + movd 0(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm12,%xmm7 + movdqa %xmm12,%xmm2 + psrld $6,%xmm7 + movdqa %xmm12,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,0-128(%rax) + paddd %xmm15,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -128(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm12,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm14,%xmm0 + pand %xmm13,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm8,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm9,%xmm3 + movdqa %xmm8,%xmm7 + pslld $10,%xmm2 + pxor %xmm8,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm9,%xmm15 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm15 + paddd %xmm5,%xmm11 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm15 + paddd %xmm7,%xmm15 + movd 4(%r8),%xmm5 + movd 4(%r9),%xmm0 + movd 4(%r10),%xmm1 + movd 4(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm11,%xmm7 + movdqa %xmm11,%xmm2 + psrld $6,%xmm7 + movdqa %xmm11,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,16-128(%rax) + paddd %xmm14,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -96(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm11,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm13,%xmm0 + pand %xmm12,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm15,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm15,%xmm7 + pslld $10,%xmm2 + pxor %xmm15,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm8,%xmm14 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm14 + paddd %xmm5,%xmm10 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm14 + paddd %xmm7,%xmm14 + movd 8(%r8),%xmm5 + movd 8(%r9),%xmm0 + movd 8(%r10),%xmm1 + movd 8(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm10,%xmm7 + movdqa %xmm10,%xmm2 + psrld $6,%xmm7 + movdqa %xmm10,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,32-128(%rax) + paddd %xmm13,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm10,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm12,%xmm0 + pand %xmm11,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm14,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm15,%xmm3 + movdqa %xmm14,%xmm7 + pslld $10,%xmm2 + pxor %xmm14,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm15,%xmm13 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm13 + paddd %xmm5,%xmm9 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm13 + paddd %xmm7,%xmm13 + movd 12(%r8),%xmm5 + movd 12(%r9),%xmm0 + movd 12(%r10),%xmm1 + movd 12(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm9,%xmm7 + movdqa %xmm9,%xmm2 + psrld $6,%xmm7 + movdqa %xmm9,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,48-128(%rax) + paddd %xmm12,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -32(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm9,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm11,%xmm0 + pand %xmm10,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm13,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm14,%xmm4 + movdqa %xmm13,%xmm7 + pslld $10,%xmm2 + pxor %xmm13,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm14,%xmm12 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm12 + paddd %xmm5,%xmm8 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm12 + paddd %xmm7,%xmm12 + movd 16(%r8),%xmm5 + movd 16(%r9),%xmm0 + movd 16(%r10),%xmm1 + movd 16(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm8,%xmm7 + movdqa %xmm8,%xmm2 + psrld $6,%xmm7 + movdqa %xmm8,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,64-128(%rax) + paddd %xmm11,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 0(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm8,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm10,%xmm0 + pand %xmm9,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm12,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm13,%xmm3 + movdqa %xmm12,%xmm7 + pslld $10,%xmm2 + pxor %xmm12,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm13,%xmm11 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm11 + paddd %xmm5,%xmm15 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm11 + paddd %xmm7,%xmm11 + movd 20(%r8),%xmm5 + movd 20(%r9),%xmm0 + movd 20(%r10),%xmm1 + movd 20(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm15,%xmm7 + movdqa %xmm15,%xmm2 + psrld $6,%xmm7 + movdqa %xmm15,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,80-128(%rax) + paddd %xmm10,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 32(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm15,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm9,%xmm0 + pand %xmm8,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm11,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm12,%xmm4 + movdqa %xmm11,%xmm7 + pslld $10,%xmm2 + pxor %xmm11,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm12,%xmm10 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm10 + paddd %xmm5,%xmm14 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm10 + paddd %xmm7,%xmm10 + movd 24(%r8),%xmm5 + movd 24(%r9),%xmm0 + movd 24(%r10),%xmm1 + movd 24(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm14,%xmm7 + movdqa %xmm14,%xmm2 + psrld $6,%xmm7 + movdqa %xmm14,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,96-128(%rax) + paddd %xmm9,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm14,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm8,%xmm0 + pand %xmm15,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm10,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm11,%xmm3 + movdqa %xmm10,%xmm7 + pslld $10,%xmm2 + pxor %xmm10,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm11,%xmm9 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm9 + paddd %xmm5,%xmm13 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm9 + paddd %xmm7,%xmm9 + movd 28(%r8),%xmm5 + movd 28(%r9),%xmm0 + movd 28(%r10),%xmm1 + movd 28(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm13,%xmm7 + movdqa %xmm13,%xmm2 + psrld $6,%xmm7 + movdqa %xmm13,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,112-128(%rax) + paddd %xmm8,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 96(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm13,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm15,%xmm0 + pand %xmm14,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm9,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm10,%xmm4 + movdqa %xmm9,%xmm7 + pslld $10,%xmm2 + pxor %xmm9,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm10,%xmm8 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm8 + paddd %xmm5,%xmm12 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm8 + paddd %xmm7,%xmm8 + leaq 256(%rbp),%rbp + movd 32(%r8),%xmm5 + movd 32(%r9),%xmm0 + movd 32(%r10),%xmm1 + movd 32(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm12,%xmm7 + movdqa %xmm12,%xmm2 + psrld $6,%xmm7 + movdqa %xmm12,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,128-128(%rax) + paddd %xmm15,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -128(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm12,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm14,%xmm0 + pand %xmm13,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm8,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm9,%xmm3 + movdqa %xmm8,%xmm7 + pslld $10,%xmm2 + pxor %xmm8,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm9,%xmm15 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm15 + paddd %xmm5,%xmm11 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm15 + paddd %xmm7,%xmm15 + movd 36(%r8),%xmm5 + movd 36(%r9),%xmm0 + movd 36(%r10),%xmm1 + movd 36(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm11,%xmm7 + movdqa %xmm11,%xmm2 + psrld $6,%xmm7 + movdqa %xmm11,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,144-128(%rax) + paddd %xmm14,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -96(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm11,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm13,%xmm0 + pand %xmm12,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm15,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm15,%xmm7 + pslld $10,%xmm2 + pxor %xmm15,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm8,%xmm14 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm14 + paddd %xmm5,%xmm10 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm14 + paddd %xmm7,%xmm14 + movd 40(%r8),%xmm5 + movd 40(%r9),%xmm0 + movd 40(%r10),%xmm1 + movd 40(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm10,%xmm7 + movdqa %xmm10,%xmm2 + psrld $6,%xmm7 + movdqa %xmm10,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,160-128(%rax) + paddd %xmm13,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm10,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm12,%xmm0 + pand %xmm11,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm14,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm15,%xmm3 + movdqa %xmm14,%xmm7 + pslld $10,%xmm2 + pxor %xmm14,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm15,%xmm13 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm13 + paddd %xmm5,%xmm9 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm13 + paddd %xmm7,%xmm13 + movd 44(%r8),%xmm5 + movd 44(%r9),%xmm0 + movd 44(%r10),%xmm1 + movd 44(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm9,%xmm7 + movdqa %xmm9,%xmm2 + psrld $6,%xmm7 + movdqa %xmm9,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,176-128(%rax) + paddd %xmm12,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -32(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm9,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm11,%xmm0 + pand %xmm10,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm13,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm14,%xmm4 + movdqa %xmm13,%xmm7 + pslld $10,%xmm2 + pxor %xmm13,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm14,%xmm12 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm12 + paddd %xmm5,%xmm8 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm12 + paddd %xmm7,%xmm12 + movd 48(%r8),%xmm5 + movd 48(%r9),%xmm0 + movd 48(%r10),%xmm1 + movd 48(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm8,%xmm7 + movdqa %xmm8,%xmm2 + psrld $6,%xmm7 + movdqa %xmm8,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,192-128(%rax) + paddd %xmm11,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 0(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm8,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm10,%xmm0 + pand %xmm9,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm12,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm13,%xmm3 + movdqa %xmm12,%xmm7 + pslld $10,%xmm2 + pxor %xmm12,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm13,%xmm11 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm11 + paddd %xmm5,%xmm15 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm11 + paddd %xmm7,%xmm11 + movd 52(%r8),%xmm5 + movd 52(%r9),%xmm0 + movd 52(%r10),%xmm1 + movd 52(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm15,%xmm7 + movdqa %xmm15,%xmm2 + psrld $6,%xmm7 + movdqa %xmm15,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,208-128(%rax) + paddd %xmm10,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 32(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm15,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm9,%xmm0 + pand %xmm8,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm11,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm12,%xmm4 + movdqa %xmm11,%xmm7 + pslld $10,%xmm2 + pxor %xmm11,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm12,%xmm10 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm10 + paddd %xmm5,%xmm14 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm10 + paddd %xmm7,%xmm10 + movd 56(%r8),%xmm5 + movd 56(%r9),%xmm0 + movd 56(%r10),%xmm1 + movd 56(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm14,%xmm7 + movdqa %xmm14,%xmm2 + psrld $6,%xmm7 + movdqa %xmm14,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,224-128(%rax) + paddd %xmm9,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm14,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm8,%xmm0 + pand %xmm15,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm10,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm11,%xmm3 + movdqa %xmm10,%xmm7 + pslld $10,%xmm2 + pxor %xmm10,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm11,%xmm9 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm9 + paddd %xmm5,%xmm13 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm9 + paddd %xmm7,%xmm9 + movd 60(%r8),%xmm5 + leaq 64(%r8),%r8 + movd 60(%r9),%xmm0 + leaq 64(%r9),%r9 + movd 60(%r10),%xmm1 + leaq 64(%r10),%r10 + movd 60(%r11),%xmm2 + leaq 64(%r11),%r11 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm13,%xmm7 + movdqa %xmm13,%xmm2 + psrld $6,%xmm7 + movdqa %xmm13,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,240-128(%rax) + paddd %xmm8,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 96(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm13,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm15,%xmm0 + pand %xmm14,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm9,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm10,%xmm4 + movdqa %xmm9,%xmm7 + pslld $10,%xmm2 + pxor %xmm9,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm10,%xmm8 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm8 + paddd %xmm5,%xmm12 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm8 + paddd %xmm7,%xmm8 + leaq 256(%rbp),%rbp + movdqu 0-128(%rax),%xmm5 + movl $3,%ecx + jmp .Loop_16_xx +.p2align 5 +.Loop_16_xx: + movdqa 16-128(%rax),%xmm6 + paddd 144-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 224-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm12,%xmm7 + movdqa %xmm12,%xmm2 + psrld $6,%xmm7 + movdqa %xmm12,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,0-128(%rax) + paddd %xmm15,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -128(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm12,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm14,%xmm0 + pand %xmm13,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm8,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm9,%xmm3 + movdqa %xmm8,%xmm7 + pslld $10,%xmm2 + pxor %xmm8,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm9,%xmm15 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm15 + paddd %xmm5,%xmm11 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm15 + paddd %xmm7,%xmm15 + movdqa 32-128(%rax),%xmm5 + paddd 160-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 240-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm11,%xmm7 + movdqa %xmm11,%xmm2 + psrld $6,%xmm7 + movdqa %xmm11,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,16-128(%rax) + paddd %xmm14,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -96(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm11,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm13,%xmm0 + pand %xmm12,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm15,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm15,%xmm7 + pslld $10,%xmm2 + pxor %xmm15,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm8,%xmm14 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm14 + paddd %xmm6,%xmm10 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm14 + paddd %xmm7,%xmm14 + movdqa 48-128(%rax),%xmm6 + paddd 176-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 0-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm10,%xmm7 + movdqa %xmm10,%xmm2 + psrld $6,%xmm7 + movdqa %xmm10,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,32-128(%rax) + paddd %xmm13,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm10,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm12,%xmm0 + pand %xmm11,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm14,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm15,%xmm3 + movdqa %xmm14,%xmm7 + pslld $10,%xmm2 + pxor %xmm14,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm15,%xmm13 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm13 + paddd %xmm5,%xmm9 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm13 + paddd %xmm7,%xmm13 + movdqa 64-128(%rax),%xmm5 + paddd 192-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 16-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm9,%xmm7 + movdqa %xmm9,%xmm2 + psrld $6,%xmm7 + movdqa %xmm9,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,48-128(%rax) + paddd %xmm12,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -32(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm9,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm11,%xmm0 + pand %xmm10,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm13,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm14,%xmm4 + movdqa %xmm13,%xmm7 + pslld $10,%xmm2 + pxor %xmm13,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm14,%xmm12 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm12 + paddd %xmm6,%xmm8 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm12 + paddd %xmm7,%xmm12 + movdqa 80-128(%rax),%xmm6 + paddd 208-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 32-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm8,%xmm7 + movdqa %xmm8,%xmm2 + psrld $6,%xmm7 + movdqa %xmm8,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,64-128(%rax) + paddd %xmm11,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 0(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm8,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm10,%xmm0 + pand %xmm9,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm12,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm13,%xmm3 + movdqa %xmm12,%xmm7 + pslld $10,%xmm2 + pxor %xmm12,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm13,%xmm11 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm11 + paddd %xmm5,%xmm15 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm11 + paddd %xmm7,%xmm11 + movdqa 96-128(%rax),%xmm5 + paddd 224-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 48-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm15,%xmm7 + movdqa %xmm15,%xmm2 + psrld $6,%xmm7 + movdqa %xmm15,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,80-128(%rax) + paddd %xmm10,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 32(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm15,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm9,%xmm0 + pand %xmm8,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm11,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm12,%xmm4 + movdqa %xmm11,%xmm7 + pslld $10,%xmm2 + pxor %xmm11,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm12,%xmm10 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm10 + paddd %xmm6,%xmm14 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm10 + paddd %xmm7,%xmm10 + movdqa 112-128(%rax),%xmm6 + paddd 240-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 64-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm14,%xmm7 + movdqa %xmm14,%xmm2 + psrld $6,%xmm7 + movdqa %xmm14,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,96-128(%rax) + paddd %xmm9,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm14,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm8,%xmm0 + pand %xmm15,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm10,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm11,%xmm3 + movdqa %xmm10,%xmm7 + pslld $10,%xmm2 + pxor %xmm10,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm11,%xmm9 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm9 + paddd %xmm5,%xmm13 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm9 + paddd %xmm7,%xmm9 + movdqa 128-128(%rax),%xmm5 + paddd 0-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 80-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm13,%xmm7 + movdqa %xmm13,%xmm2 + psrld $6,%xmm7 + movdqa %xmm13,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,112-128(%rax) + paddd %xmm8,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 96(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm13,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm15,%xmm0 + pand %xmm14,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm9,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm10,%xmm4 + movdqa %xmm9,%xmm7 + pslld $10,%xmm2 + pxor %xmm9,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm10,%xmm8 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm8 + paddd %xmm6,%xmm12 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm8 + paddd %xmm7,%xmm8 + leaq 256(%rbp),%rbp + movdqa 144-128(%rax),%xmm6 + paddd 16-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 96-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm12,%xmm7 + movdqa %xmm12,%xmm2 + psrld $6,%xmm7 + movdqa %xmm12,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,128-128(%rax) + paddd %xmm15,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -128(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm12,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm14,%xmm0 + pand %xmm13,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm8,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm9,%xmm3 + movdqa %xmm8,%xmm7 + pslld $10,%xmm2 + pxor %xmm8,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm9,%xmm15 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm15 + paddd %xmm5,%xmm11 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm15 + paddd %xmm7,%xmm15 + movdqa 160-128(%rax),%xmm5 + paddd 32-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 112-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm11,%xmm7 + movdqa %xmm11,%xmm2 + psrld $6,%xmm7 + movdqa %xmm11,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,144-128(%rax) + paddd %xmm14,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -96(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm11,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm13,%xmm0 + pand %xmm12,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm15,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm15,%xmm7 + pslld $10,%xmm2 + pxor %xmm15,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm8,%xmm14 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm14 + paddd %xmm6,%xmm10 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm14 + paddd %xmm7,%xmm14 + movdqa 176-128(%rax),%xmm6 + paddd 48-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 128-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm10,%xmm7 + movdqa %xmm10,%xmm2 + psrld $6,%xmm7 + movdqa %xmm10,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,160-128(%rax) + paddd %xmm13,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm10,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm12,%xmm0 + pand %xmm11,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm14,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm15,%xmm3 + movdqa %xmm14,%xmm7 + pslld $10,%xmm2 + pxor %xmm14,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm15,%xmm13 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm13 + paddd %xmm5,%xmm9 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm13 + paddd %xmm7,%xmm13 + movdqa 192-128(%rax),%xmm5 + paddd 64-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 144-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm9,%xmm7 + movdqa %xmm9,%xmm2 + psrld $6,%xmm7 + movdqa %xmm9,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,176-128(%rax) + paddd %xmm12,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -32(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm9,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm11,%xmm0 + pand %xmm10,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm13,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm14,%xmm4 + movdqa %xmm13,%xmm7 + pslld $10,%xmm2 + pxor %xmm13,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm14,%xmm12 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm12 + paddd %xmm6,%xmm8 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm12 + paddd %xmm7,%xmm12 + movdqa 208-128(%rax),%xmm6 + paddd 80-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 160-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm8,%xmm7 + movdqa %xmm8,%xmm2 + psrld $6,%xmm7 + movdqa %xmm8,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,192-128(%rax) + paddd %xmm11,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 0(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm8,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm10,%xmm0 + pand %xmm9,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm12,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm13,%xmm3 + movdqa %xmm12,%xmm7 + pslld $10,%xmm2 + pxor %xmm12,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm13,%xmm11 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm11 + paddd %xmm5,%xmm15 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm11 + paddd %xmm7,%xmm11 + movdqa 224-128(%rax),%xmm5 + paddd 96-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 176-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm15,%xmm7 + movdqa %xmm15,%xmm2 + psrld $6,%xmm7 + movdqa %xmm15,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,208-128(%rax) + paddd %xmm10,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 32(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm15,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm9,%xmm0 + pand %xmm8,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm11,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm12,%xmm4 + movdqa %xmm11,%xmm7 + pslld $10,%xmm2 + pxor %xmm11,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm12,%xmm10 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm10 + paddd %xmm6,%xmm14 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm10 + paddd %xmm7,%xmm10 + movdqa 240-128(%rax),%xmm6 + paddd 112-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 192-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm14,%xmm7 + movdqa %xmm14,%xmm2 + psrld $6,%xmm7 + movdqa %xmm14,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,224-128(%rax) + paddd %xmm9,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm14,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm8,%xmm0 + pand %xmm15,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm10,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm11,%xmm3 + movdqa %xmm10,%xmm7 + pslld $10,%xmm2 + pxor %xmm10,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm11,%xmm9 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm9 + paddd %xmm5,%xmm13 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm9 + paddd %xmm7,%xmm9 + movdqa 0-128(%rax),%xmm5 + paddd 128-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 208-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm13,%xmm7 + movdqa %xmm13,%xmm2 + psrld $6,%xmm7 + movdqa %xmm13,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,240-128(%rax) + paddd %xmm8,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 96(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm13,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm15,%xmm0 + pand %xmm14,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm9,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm10,%xmm4 + movdqa %xmm9,%xmm7 + pslld $10,%xmm2 + pxor %xmm9,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm10,%xmm8 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm8 + paddd %xmm6,%xmm12 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm8 + paddd %xmm7,%xmm8 + leaq 256(%rbp),%rbp + decl %ecx + jnz .Loop_16_xx + + movl $1,%ecx + leaq K256+128(%rip),%rbp + + movdqa (%rbx),%xmm7 + cmpl 0(%rbx),%ecx + pxor %xmm0,%xmm0 + cmovgeq %rbp,%r8 + cmpl 4(%rbx),%ecx + movdqa %xmm7,%xmm6 + cmovgeq %rbp,%r9 + cmpl 8(%rbx),%ecx + pcmpgtd %xmm0,%xmm6 + cmovgeq %rbp,%r10 + cmpl 12(%rbx),%ecx + paddd %xmm6,%xmm7 + cmovgeq %rbp,%r11 + + movdqu 0-128(%rdi),%xmm0 + pand %xmm6,%xmm8 + movdqu 32-128(%rdi),%xmm1 + pand %xmm6,%xmm9 + movdqu 64-128(%rdi),%xmm2 + pand %xmm6,%xmm10 + movdqu 96-128(%rdi),%xmm5 + pand %xmm6,%xmm11 + paddd %xmm0,%xmm8 + movdqu 128-128(%rdi),%xmm0 + pand %xmm6,%xmm12 + paddd %xmm1,%xmm9 + movdqu 160-128(%rdi),%xmm1 + pand %xmm6,%xmm13 + paddd %xmm2,%xmm10 + movdqu 192-128(%rdi),%xmm2 + pand %xmm6,%xmm14 + paddd %xmm5,%xmm11 + movdqu 224-128(%rdi),%xmm5 + pand %xmm6,%xmm15 + paddd %xmm0,%xmm12 + paddd %xmm1,%xmm13 + movdqu %xmm8,0-128(%rdi) + paddd %xmm2,%xmm14 + movdqu %xmm9,32-128(%rdi) + paddd %xmm5,%xmm15 + movdqu %xmm10,64-128(%rdi) + movdqu %xmm11,96-128(%rdi) + movdqu %xmm12,128-128(%rdi) + movdqu %xmm13,160-128(%rdi) + movdqu %xmm14,192-128(%rdi) + movdqu %xmm15,224-128(%rdi) + + movdqa %xmm7,(%rbx) + movdqa .Lpbswap(%rip),%xmm6 + decl %edx + jnz .Loop + + movl 280(%rsp),%edx + leaq 16(%rdi),%rdi + leaq 64(%rsi),%rsi + decl %edx + jnz .Loop_grande + +.Ldone: + movq 272(%rsp),%rax + movaps -184(%rax),%xmm6 + movaps -168(%rax),%xmm7 + movaps -152(%rax),%xmm8 + movaps -136(%rax),%xmm9 + movaps -120(%rax),%xmm10 + movaps -104(%rax),%xmm11 + movaps -88(%rax),%xmm12 + movaps -72(%rax),%xmm13 + movaps -56(%rax),%xmm14 + movaps -40(%rax),%xmm15 + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + .byte 0xf3,0xc3 +.LSEH_end_sha256_multi_block: +.p2align 8 +K256: +.long 1116352408,1116352408,1116352408,1116352408 +.long 1116352408,1116352408,1116352408,1116352408 +.long 1899447441,1899447441,1899447441,1899447441 +.long 1899447441,1899447441,1899447441,1899447441 +.long 3049323471,3049323471,3049323471,3049323471 +.long 3049323471,3049323471,3049323471,3049323471 +.long 3921009573,3921009573,3921009573,3921009573 +.long 3921009573,3921009573,3921009573,3921009573 +.long 961987163,961987163,961987163,961987163 +.long 961987163,961987163,961987163,961987163 +.long 1508970993,1508970993,1508970993,1508970993 +.long 1508970993,1508970993,1508970993,1508970993 +.long 2453635748,2453635748,2453635748,2453635748 +.long 2453635748,2453635748,2453635748,2453635748 +.long 2870763221,2870763221,2870763221,2870763221 +.long 2870763221,2870763221,2870763221,2870763221 +.long 3624381080,3624381080,3624381080,3624381080 +.long 3624381080,3624381080,3624381080,3624381080 +.long 310598401,310598401,310598401,310598401 +.long 310598401,310598401,310598401,310598401 +.long 607225278,607225278,607225278,607225278 +.long 607225278,607225278,607225278,607225278 +.long 1426881987,1426881987,1426881987,1426881987 +.long 1426881987,1426881987,1426881987,1426881987 +.long 1925078388,1925078388,1925078388,1925078388 +.long 1925078388,1925078388,1925078388,1925078388 +.long 2162078206,2162078206,2162078206,2162078206 +.long 2162078206,2162078206,2162078206,2162078206 +.long 2614888103,2614888103,2614888103,2614888103 +.long 2614888103,2614888103,2614888103,2614888103 +.long 3248222580,3248222580,3248222580,3248222580 +.long 3248222580,3248222580,3248222580,3248222580 +.long 3835390401,3835390401,3835390401,3835390401 +.long 3835390401,3835390401,3835390401,3835390401 +.long 4022224774,4022224774,4022224774,4022224774 +.long 4022224774,4022224774,4022224774,4022224774 +.long 264347078,264347078,264347078,264347078 +.long 264347078,264347078,264347078,264347078 +.long 604807628,604807628,604807628,604807628 +.long 604807628,604807628,604807628,604807628 +.long 770255983,770255983,770255983,770255983 +.long 770255983,770255983,770255983,770255983 +.long 1249150122,1249150122,1249150122,1249150122 +.long 1249150122,1249150122,1249150122,1249150122 +.long 1555081692,1555081692,1555081692,1555081692 +.long 1555081692,1555081692,1555081692,1555081692 +.long 1996064986,1996064986,1996064986,1996064986 +.long 1996064986,1996064986,1996064986,1996064986 +.long 2554220882,2554220882,2554220882,2554220882 +.long 2554220882,2554220882,2554220882,2554220882 +.long 2821834349,2821834349,2821834349,2821834349 +.long 2821834349,2821834349,2821834349,2821834349 +.long 2952996808,2952996808,2952996808,2952996808 +.long 2952996808,2952996808,2952996808,2952996808 +.long 3210313671,3210313671,3210313671,3210313671 +.long 3210313671,3210313671,3210313671,3210313671 +.long 3336571891,3336571891,3336571891,3336571891 +.long 3336571891,3336571891,3336571891,3336571891 +.long 3584528711,3584528711,3584528711,3584528711 +.long 3584528711,3584528711,3584528711,3584528711 +.long 113926993,113926993,113926993,113926993 +.long 113926993,113926993,113926993,113926993 +.long 338241895,338241895,338241895,338241895 +.long 338241895,338241895,338241895,338241895 +.long 666307205,666307205,666307205,666307205 +.long 666307205,666307205,666307205,666307205 +.long 773529912,773529912,773529912,773529912 +.long 773529912,773529912,773529912,773529912 +.long 1294757372,1294757372,1294757372,1294757372 +.long 1294757372,1294757372,1294757372,1294757372 +.long 1396182291,1396182291,1396182291,1396182291 +.long 1396182291,1396182291,1396182291,1396182291 +.long 1695183700,1695183700,1695183700,1695183700 +.long 1695183700,1695183700,1695183700,1695183700 +.long 1986661051,1986661051,1986661051,1986661051 +.long 1986661051,1986661051,1986661051,1986661051 +.long 2177026350,2177026350,2177026350,2177026350 +.long 2177026350,2177026350,2177026350,2177026350 +.long 2456956037,2456956037,2456956037,2456956037 +.long 2456956037,2456956037,2456956037,2456956037 +.long 2730485921,2730485921,2730485921,2730485921 +.long 2730485921,2730485921,2730485921,2730485921 +.long 2820302411,2820302411,2820302411,2820302411 +.long 2820302411,2820302411,2820302411,2820302411 +.long 3259730800,3259730800,3259730800,3259730800 +.long 3259730800,3259730800,3259730800,3259730800 +.long 3345764771,3345764771,3345764771,3345764771 +.long 3345764771,3345764771,3345764771,3345764771 +.long 3516065817,3516065817,3516065817,3516065817 +.long 3516065817,3516065817,3516065817,3516065817 +.long 3600352804,3600352804,3600352804,3600352804 +.long 3600352804,3600352804,3600352804,3600352804 +.long 4094571909,4094571909,4094571909,4094571909 +.long 4094571909,4094571909,4094571909,4094571909 +.long 275423344,275423344,275423344,275423344 +.long 275423344,275423344,275423344,275423344 +.long 430227734,430227734,430227734,430227734 +.long 430227734,430227734,430227734,430227734 +.long 506948616,506948616,506948616,506948616 +.long 506948616,506948616,506948616,506948616 +.long 659060556,659060556,659060556,659060556 +.long 659060556,659060556,659060556,659060556 +.long 883997877,883997877,883997877,883997877 +.long 883997877,883997877,883997877,883997877 +.long 958139571,958139571,958139571,958139571 +.long 958139571,958139571,958139571,958139571 +.long 1322822218,1322822218,1322822218,1322822218 +.long 1322822218,1322822218,1322822218,1322822218 +.long 1537002063,1537002063,1537002063,1537002063 +.long 1537002063,1537002063,1537002063,1537002063 +.long 1747873779,1747873779,1747873779,1747873779 +.long 1747873779,1747873779,1747873779,1747873779 +.long 1955562222,1955562222,1955562222,1955562222 +.long 1955562222,1955562222,1955562222,1955562222 +.long 2024104815,2024104815,2024104815,2024104815 +.long 2024104815,2024104815,2024104815,2024104815 +.long 2227730452,2227730452,2227730452,2227730452 +.long 2227730452,2227730452,2227730452,2227730452 +.long 2361852424,2361852424,2361852424,2361852424 +.long 2361852424,2361852424,2361852424,2361852424 +.long 2428436474,2428436474,2428436474,2428436474 +.long 2428436474,2428436474,2428436474,2428436474 +.long 2756734187,2756734187,2756734187,2756734187 +.long 2756734187,2756734187,2756734187,2756734187 +.long 3204031479,3204031479,3204031479,3204031479 +.long 3204031479,3204031479,3204031479,3204031479 +.long 3329325298,3329325298,3329325298,3329325298 +.long 3329325298,3329325298,3329325298,3329325298 +.Lpbswap: +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/coff/sha256-ssse3-x86.s b/lib/accelerated/x86/coff/sha256-ssse3-x86.s new file mode 100644 index 0000000000..6fe27746ce --- /dev/null +++ b/lib/accelerated/x86/coff/sha256-ssse3-x86.s @@ -0,0 +1,3402 @@ +# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org> +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain copyright notices, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# * Neither the name of the Andy Polyakov nor the names of its +# copyright holder and contributors may be used to endorse or +# promote products derived from this software without specific +# prior written permission. +# +# ALTERNATIVELY, provided that this notice is retained in full, this +# product may be distributed under the terms of the GNU General Public +# License (GPL), in which case the provisions of the GPL apply INSTEAD OF +# those given above. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# *** This file is auto-generated *** +# +.file "sha512-586.s" +.text +.globl _sha256_block_data_order +.def _sha256_block_data_order; .scl 2; .type 32; .endef +.align 16 +_sha256_block_data_order: +.L_sha256_block_data_order_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl %esp,%ebx + call .L000pic_point +.L000pic_point: + popl %ebp + leal .L001K256-.L000pic_point(%ebp),%ebp + subl $16,%esp + andl $-64,%esp + shll $6,%eax + addl %edi,%eax + movl %esi,(%esp) + movl %edi,4(%esp) + movl %eax,8(%esp) + movl %ebx,12(%esp) + leal __gnutls_x86_cpuid_s-.L001K256(%ebp),%edx + movl (%edx),%ecx + movl 4(%edx),%ebx + testl $1048576,%ecx + jnz .L002loop + andl $1073741824,%ecx + andl $268435968,%ebx + orl %ebx,%ecx + andl $1342177280,%ecx + cmpl $1342177280,%ecx + je .L003loop_shrd + subl %edi,%eax + cmpl $256,%eax + jae .L004unrolled + jmp .L002loop +.align 16 +.L002loop: + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + bswap %eax + movl 12(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 16(%edi),%eax + movl 20(%edi),%ebx + movl 24(%edi),%ecx + bswap %eax + movl 28(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 32(%edi),%eax + movl 36(%edi),%ebx + movl 40(%edi),%ecx + bswap %eax + movl 44(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 48(%edi),%eax + movl 52(%edi),%ebx + movl 56(%edi),%ecx + bswap %eax + movl 60(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + addl $64,%edi + leal -36(%esp),%esp + movl %edi,104(%esp) + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edi + movl %ebx,8(%esp) + xorl %ecx,%ebx + movl %ecx,12(%esp) + movl %edi,16(%esp) + movl %ebx,(%esp) + movl 16(%esi),%edx + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edi + movl %ebx,24(%esp) + movl %ecx,28(%esp) + movl %edi,32(%esp) +.align 16 +.L00500_15: + movl %edx,%ecx + movl 24(%esp),%esi + rorl $14,%ecx + movl 28(%esp),%edi + xorl %edx,%ecx + xorl %edi,%esi + movl 96(%esp),%ebx + rorl $5,%ecx + andl %edx,%esi + movl %edx,20(%esp) + xorl %ecx,%edx + addl 32(%esp),%ebx + xorl %edi,%esi + rorl $6,%edx + movl %eax,%ecx + addl %esi,%ebx + rorl $9,%ecx + addl %edx,%ebx + movl 8(%esp),%edi + xorl %eax,%ecx + movl %eax,4(%esp) + leal -4(%esp),%esp + rorl $11,%ecx + movl (%ebp),%esi + xorl %eax,%ecx + movl 20(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %esi,%ebx + movl %eax,(%esp) + addl %ebx,%edx + andl 4(%esp),%eax + addl %ecx,%ebx + xorl %edi,%eax + addl $4,%ebp + addl %ebx,%eax + cmpl $3248222580,%esi + jne .L00500_15 + movl 156(%esp),%ecx + jmp .L00616_63 +.align 16 +.L00616_63: + movl %ecx,%ebx + movl 104(%esp),%esi + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 160(%esp),%ebx + shrl $10,%edi + addl 124(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 24(%esp),%esi + rorl $14,%ecx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %edx,%ecx + xorl %edi,%esi + movl %ebx,96(%esp) + rorl $5,%ecx + andl %edx,%esi + movl %edx,20(%esp) + xorl %ecx,%edx + addl 32(%esp),%ebx + xorl %edi,%esi + rorl $6,%edx + movl %eax,%ecx + addl %esi,%ebx + rorl $9,%ecx + addl %edx,%ebx + movl 8(%esp),%edi + xorl %eax,%ecx + movl %eax,4(%esp) + leal -4(%esp),%esp + rorl $11,%ecx + movl (%ebp),%esi + xorl %eax,%ecx + movl 20(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %esi,%ebx + movl %eax,(%esp) + addl %ebx,%edx + andl 4(%esp),%eax + addl %ecx,%ebx + xorl %edi,%eax + movl 156(%esp),%ecx + addl $4,%ebp + addl %ebx,%eax + cmpl $3329325298,%esi + jne .L00616_63 + movl 356(%esp),%esi + movl 8(%esp),%ebx + movl 16(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebx + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl 24(%esp),%eax + movl 28(%esp),%ebx + movl 32(%esp),%ecx + movl 360(%esp),%edi + addl 16(%esi),%edx + addl 20(%esi),%eax + addl 24(%esi),%ebx + addl 28(%esi),%ecx + movl %edx,16(%esi) + movl %eax,20(%esi) + movl %ebx,24(%esi) + movl %ecx,28(%esi) + leal 356(%esp),%esp + subl $256,%ebp + cmpl 8(%esp),%edi + jb .L002loop + movl 12(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 32 +.L003loop_shrd: + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + bswap %eax + movl 12(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 16(%edi),%eax + movl 20(%edi),%ebx + movl 24(%edi),%ecx + bswap %eax + movl 28(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 32(%edi),%eax + movl 36(%edi),%ebx + movl 40(%edi),%ecx + bswap %eax + movl 44(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 48(%edi),%eax + movl 52(%edi),%ebx + movl 56(%edi),%ecx + bswap %eax + movl 60(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + addl $64,%edi + leal -36(%esp),%esp + movl %edi,104(%esp) + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edi + movl %ebx,8(%esp) + xorl %ecx,%ebx + movl %ecx,12(%esp) + movl %edi,16(%esp) + movl %ebx,(%esp) + movl 16(%esi),%edx + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edi + movl %ebx,24(%esp) + movl %ecx,28(%esp) + movl %edi,32(%esp) +.align 16 +.L00700_15_shrd: + movl %edx,%ecx + movl 24(%esp),%esi + shrdl $14,%ecx,%ecx + movl 28(%esp),%edi + xorl %edx,%ecx + xorl %edi,%esi + movl 96(%esp),%ebx + shrdl $5,%ecx,%ecx + andl %edx,%esi + movl %edx,20(%esp) + xorl %ecx,%edx + addl 32(%esp),%ebx + xorl %edi,%esi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %esi,%ebx + shrdl $9,%ecx,%ecx + addl %edx,%ebx + movl 8(%esp),%edi + xorl %eax,%ecx + movl %eax,4(%esp) + leal -4(%esp),%esp + shrdl $11,%ecx,%ecx + movl (%ebp),%esi + xorl %eax,%ecx + movl 20(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %esi,%ebx + movl %eax,(%esp) + addl %ebx,%edx + andl 4(%esp),%eax + addl %ecx,%ebx + xorl %edi,%eax + addl $4,%ebp + addl %ebx,%eax + cmpl $3248222580,%esi + jne .L00700_15_shrd + movl 156(%esp),%ecx + jmp .L00816_63_shrd +.align 16 +.L00816_63_shrd: + movl %ecx,%ebx + movl 104(%esp),%esi + shrdl $11,%ecx,%ecx + movl %esi,%edi + shrdl $2,%esi,%esi + xorl %ebx,%ecx + shrl $3,%ebx + shrdl $7,%ecx,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + shrdl $17,%esi,%esi + addl 160(%esp),%ebx + shrl $10,%edi + addl 124(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 24(%esp),%esi + shrdl $14,%ecx,%ecx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %edx,%ecx + xorl %edi,%esi + movl %ebx,96(%esp) + shrdl $5,%ecx,%ecx + andl %edx,%esi + movl %edx,20(%esp) + xorl %ecx,%edx + addl 32(%esp),%ebx + xorl %edi,%esi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %esi,%ebx + shrdl $9,%ecx,%ecx + addl %edx,%ebx + movl 8(%esp),%edi + xorl %eax,%ecx + movl %eax,4(%esp) + leal -4(%esp),%esp + shrdl $11,%ecx,%ecx + movl (%ebp),%esi + xorl %eax,%ecx + movl 20(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %esi,%ebx + movl %eax,(%esp) + addl %ebx,%edx + andl 4(%esp),%eax + addl %ecx,%ebx + xorl %edi,%eax + movl 156(%esp),%ecx + addl $4,%ebp + addl %ebx,%eax + cmpl $3329325298,%esi + jne .L00816_63_shrd + movl 356(%esp),%esi + movl 8(%esp),%ebx + movl 16(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebx + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl 24(%esp),%eax + movl 28(%esp),%ebx + movl 32(%esp),%ecx + movl 360(%esp),%edi + addl 16(%esi),%edx + addl 20(%esi),%eax + addl 24(%esi),%ebx + addl 28(%esi),%ecx + movl %edx,16(%esi) + movl %eax,20(%esi) + movl %ebx,24(%esi) + movl %ecx,28(%esi) + leal 356(%esp),%esp + subl $256,%ebp + cmpl 8(%esp),%edi + jb .L003loop_shrd + movl 12(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 64 +.L001K256: +.long 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298 +.long 66051,67438087,134810123,202182159 +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 +.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 +.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +.byte 62,0 +.align 16 +.L004unrolled: + leal -96(%esp),%esp + movl (%esi),%eax + movl 4(%esi),%ebp + movl 8(%esi),%ecx + movl 12(%esi),%ebx + movl %ebp,4(%esp) + xorl %ecx,%ebp + movl %ecx,8(%esp) + movl %ebx,12(%esp) + movl 16(%esi),%edx + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%esi + movl %ebx,20(%esp) + movl %ecx,24(%esp) + movl %esi,28(%esp) + jmp .L009grand_loop +.align 16 +.L009grand_loop: + movl (%edi),%ebx + movl 4(%edi),%ecx + bswap %ebx + movl 8(%edi),%esi + bswap %ecx + movl %ebx,32(%esp) + bswap %esi + movl %ecx,36(%esp) + movl %esi,40(%esp) + movl 12(%edi),%ebx + movl 16(%edi),%ecx + bswap %ebx + movl 20(%edi),%esi + bswap %ecx + movl %ebx,44(%esp) + bswap %esi + movl %ecx,48(%esp) + movl %esi,52(%esp) + movl 24(%edi),%ebx + movl 28(%edi),%ecx + bswap %ebx + movl 32(%edi),%esi + bswap %ecx + movl %ebx,56(%esp) + bswap %esi + movl %ecx,60(%esp) + movl %esi,64(%esp) + movl 36(%edi),%ebx + movl 40(%edi),%ecx + bswap %ebx + movl 44(%edi),%esi + bswap %ecx + movl %ebx,68(%esp) + bswap %esi + movl %ecx,72(%esp) + movl %esi,76(%esp) + movl 48(%edi),%ebx + movl 52(%edi),%ecx + bswap %ebx + movl 56(%edi),%esi + bswap %ecx + movl %ebx,80(%esp) + bswap %esi + movl %ecx,84(%esp) + movl %esi,88(%esp) + movl 60(%edi),%ebx + addl $64,%edi + bswap %ebx + movl %edi,100(%esp) + movl %ebx,92(%esp) + movl %edx,%ecx + movl 20(%esp),%esi + rorl $14,%edx + movl 24(%esp),%edi + xorl %ecx,%edx + movl 32(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1116352408(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 16(%esp),%ecx + rorl $14,%edx + movl 20(%esp),%edi + xorl %esi,%edx + movl 36(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1899447441(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 12(%esp),%esi + rorl $14,%edx + movl 16(%esp),%edi + xorl %ecx,%edx + movl 40(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3049323471(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 8(%esp),%ecx + rorl $14,%edx + movl 12(%esp),%edi + xorl %esi,%edx + movl 44(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3921009573(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 4(%esp),%esi + rorl $14,%edx + movl 8(%esp),%edi + xorl %ecx,%edx + movl 48(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 961987163(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl (%esp),%ecx + rorl $14,%edx + movl 4(%esp),%edi + xorl %esi,%edx + movl 52(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1508970993(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 28(%esp),%esi + rorl $14,%edx + movl (%esp),%edi + xorl %ecx,%edx + movl 56(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2453635748(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 24(%esp),%ecx + rorl $14,%edx + movl 28(%esp),%edi + xorl %esi,%edx + movl 60(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2870763221(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 20(%esp),%esi + rorl $14,%edx + movl 24(%esp),%edi + xorl %ecx,%edx + movl 64(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3624381080(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 16(%esp),%ecx + rorl $14,%edx + movl 20(%esp),%edi + xorl %esi,%edx + movl 68(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 310598401(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 12(%esp),%esi + rorl $14,%edx + movl 16(%esp),%edi + xorl %ecx,%edx + movl 72(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 607225278(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 8(%esp),%ecx + rorl $14,%edx + movl 12(%esp),%edi + xorl %esi,%edx + movl 76(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1426881987(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 4(%esp),%esi + rorl $14,%edx + movl 8(%esp),%edi + xorl %ecx,%edx + movl 80(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1925078388(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl (%esp),%ecx + rorl $14,%edx + movl 4(%esp),%edi + xorl %esi,%edx + movl 84(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2162078206(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 28(%esp),%esi + rorl $14,%edx + movl (%esp),%edi + xorl %ecx,%edx + movl 88(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2614888103(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 24(%esp),%ecx + rorl $14,%edx + movl 28(%esp),%edi + xorl %esi,%edx + movl 92(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3248222580(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3835390401(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 4022224774(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 264347078(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 604807628(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 770255983(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1249150122(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1555081692(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1996064986(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2554220882(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2821834349(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2952996808(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3210313671(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3336571891(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3584528711(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,88(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 113926993(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,92(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 338241895(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 666307205(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 773529912(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1294757372(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1396182291(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1695183700(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1986661051(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2177026350(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2456956037(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2730485921(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2820302411(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3259730800(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3345764771(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3516065817(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3600352804(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,88(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 4094571909(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,92(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 275423344(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 430227734(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 506948616(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 659060556(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 883997877(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 958139571(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1322822218(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1537002063(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1747873779(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1955562222(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2024104815(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2227730452(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2361852424(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2428436474(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2756734187(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3204031479(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3329325298(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 96(%esp),%esi + xorl %edi,%ebp + movl 12(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebp + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebp,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl %ebp,4(%esp) + xorl %edi,%ebp + movl %edi,8(%esp) + movl %ecx,12(%esp) + movl 20(%esp),%edi + movl 24(%esp),%ebx + movl 28(%esp),%ecx + addl 16(%esi),%edx + addl 20(%esi),%edi + addl 24(%esi),%ebx + addl 28(%esi),%ecx + movl %edx,16(%esi) + movl %edi,20(%esi) + movl %ebx,24(%esi) + movl %ecx,28(%esi) + movl %edi,20(%esp) + movl 100(%esp),%edi + movl %ebx,24(%esp) + movl %ecx,28(%esp) + cmpl 104(%esp),%edi + jb .L009grand_loop + movl 108(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.comm __gnutls_x86_cpuid_s,16 + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/coff/sha512-ssse3-x86.s b/lib/accelerated/x86/coff/sha512-ssse3-x86.s new file mode 100644 index 0000000000..79098da5c2 --- /dev/null +++ b/lib/accelerated/x86/coff/sha512-ssse3-x86.s @@ -0,0 +1,605 @@ +# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org> +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain copyright notices, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# * Neither the name of the Andy Polyakov nor the names of its +# copyright holder and contributors may be used to endorse or +# promote products derived from this software without specific +# prior written permission. +# +# ALTERNATIVELY, provided that this notice is retained in full, this +# product may be distributed under the terms of the GNU General Public +# License (GPL), in which case the provisions of the GPL apply INSTEAD OF +# those given above. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# *** This file is auto-generated *** +# +.file "sha512-586.s" +.text +.globl _sha512_block_data_order +.def _sha512_block_data_order; .scl 2; .type 32; .endef +.align 16 +_sha512_block_data_order: +.L_sha512_block_data_order_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl %esp,%ebx + call .L000pic_point +.L000pic_point: + popl %ebp + leal .L001K512-.L000pic_point(%ebp),%ebp + subl $16,%esp + andl $-64,%esp + shll $7,%eax + addl %edi,%eax + movl %esi,(%esp) + movl %edi,4(%esp) + movl %eax,8(%esp) + movl %ebx,12(%esp) +.align 16 +.L002loop_x86: + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 16(%edi),%eax + movl 20(%edi),%ebx + movl 24(%edi),%ecx + movl 28(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 32(%edi),%eax + movl 36(%edi),%ebx + movl 40(%edi),%ecx + movl 44(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 48(%edi),%eax + movl 52(%edi),%ebx + movl 56(%edi),%ecx + movl 60(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 64(%edi),%eax + movl 68(%edi),%ebx + movl 72(%edi),%ecx + movl 76(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 80(%edi),%eax + movl 84(%edi),%ebx + movl 88(%edi),%ecx + movl 92(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 96(%edi),%eax + movl 100(%edi),%ebx + movl 104(%edi),%ecx + movl 108(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 112(%edi),%eax + movl 116(%edi),%ebx + movl 120(%edi),%ecx + movl 124(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + addl $128,%edi + subl $72,%esp + movl %edi,204(%esp) + leal 8(%esp),%edi + movl $16,%ecx +.long 2784229001 +.align 16 +.L00300_15_x86: + movl 40(%esp),%ecx + movl 44(%esp),%edx + movl %ecx,%esi + shrl $9,%ecx + movl %edx,%edi + shrl $9,%edx + movl %ecx,%ebx + shll $14,%esi + movl %edx,%eax + shll $14,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%eax + shll $4,%esi + xorl %edx,%ebx + shll $4,%edi + xorl %esi,%ebx + shrl $4,%ecx + xorl %edi,%eax + shrl $4,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 48(%esp),%ecx + movl 52(%esp),%edx + movl 56(%esp),%esi + movl 60(%esp),%edi + addl 64(%esp),%eax + adcl 68(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + andl 40(%esp),%ecx + andl 44(%esp),%edx + addl 192(%esp),%eax + adcl 196(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + movl (%ebp),%esi + movl 4(%ebp),%edi + addl %ecx,%eax + adcl %edx,%ebx + movl 32(%esp),%ecx + movl 36(%esp),%edx + addl %esi,%eax + adcl %edi,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + addl %ecx,%eax + adcl %edx,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,%esi + shrl $2,%ecx + movl %edx,%edi + shrl $2,%edx + movl %ecx,%ebx + shll $4,%esi + movl %edx,%eax + shll $4,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%ebx + shll $21,%esi + xorl %edx,%eax + shll $21,%edi + xorl %esi,%eax + shrl $21,%ecx + xorl %edi,%ebx + shrl $21,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl 16(%esp),%esi + movl 20(%esp),%edi + addl (%esp),%eax + adcl 4(%esp),%ebx + orl %esi,%ecx + orl %edi,%edx + andl 24(%esp),%ecx + andl 28(%esp),%edx + andl 8(%esp),%esi + andl 12(%esp),%edi + orl %esi,%ecx + orl %edi,%edx + addl %ecx,%eax + adcl %edx,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + movb (%ebp),%dl + subl $8,%esp + leal 8(%ebp),%ebp + cmpb $148,%dl + jne .L00300_15_x86 +.align 16 +.L00416_79_x86: + movl 312(%esp),%ecx + movl 316(%esp),%edx + movl %ecx,%esi + shrl $1,%ecx + movl %edx,%edi + shrl $1,%edx + movl %ecx,%eax + shll $24,%esi + movl %edx,%ebx + shll $24,%edi + xorl %esi,%ebx + shrl $6,%ecx + xorl %edi,%eax + shrl $6,%edx + xorl %ecx,%eax + shll $7,%esi + xorl %edx,%ebx + shll $1,%edi + xorl %esi,%ebx + shrl $1,%ecx + xorl %edi,%eax + shrl $1,%edx + xorl %ecx,%eax + shll $6,%edi + xorl %edx,%ebx + xorl %edi,%eax + movl %eax,(%esp) + movl %ebx,4(%esp) + movl 208(%esp),%ecx + movl 212(%esp),%edx + movl %ecx,%esi + shrl $6,%ecx + movl %edx,%edi + shrl $6,%edx + movl %ecx,%eax + shll $3,%esi + movl %edx,%ebx + shll $3,%edi + xorl %esi,%eax + shrl $13,%ecx + xorl %edi,%ebx + shrl $13,%edx + xorl %ecx,%eax + shll $10,%esi + xorl %edx,%ebx + shll $10,%edi + xorl %esi,%ebx + shrl $10,%ecx + xorl %edi,%eax + shrl $10,%edx + xorl %ecx,%ebx + shll $13,%edi + xorl %edx,%eax + xorl %edi,%eax + movl 320(%esp),%ecx + movl 324(%esp),%edx + addl (%esp),%eax + adcl 4(%esp),%ebx + movl 248(%esp),%esi + movl 252(%esp),%edi + addl %ecx,%eax + adcl %edx,%ebx + addl %esi,%eax + adcl %edi,%ebx + movl %eax,192(%esp) + movl %ebx,196(%esp) + movl 40(%esp),%ecx + movl 44(%esp),%edx + movl %ecx,%esi + shrl $9,%ecx + movl %edx,%edi + shrl $9,%edx + movl %ecx,%ebx + shll $14,%esi + movl %edx,%eax + shll $14,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%eax + shll $4,%esi + xorl %edx,%ebx + shll $4,%edi + xorl %esi,%ebx + shrl $4,%ecx + xorl %edi,%eax + shrl $4,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 48(%esp),%ecx + movl 52(%esp),%edx + movl 56(%esp),%esi + movl 60(%esp),%edi + addl 64(%esp),%eax + adcl 68(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + andl 40(%esp),%ecx + andl 44(%esp),%edx + addl 192(%esp),%eax + adcl 196(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + movl (%ebp),%esi + movl 4(%ebp),%edi + addl %ecx,%eax + adcl %edx,%ebx + movl 32(%esp),%ecx + movl 36(%esp),%edx + addl %esi,%eax + adcl %edi,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + addl %ecx,%eax + adcl %edx,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,%esi + shrl $2,%ecx + movl %edx,%edi + shrl $2,%edx + movl %ecx,%ebx + shll $4,%esi + movl %edx,%eax + shll $4,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%ebx + shll $21,%esi + xorl %edx,%eax + shll $21,%edi + xorl %esi,%eax + shrl $21,%ecx + xorl %edi,%ebx + shrl $21,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl 16(%esp),%esi + movl 20(%esp),%edi + addl (%esp),%eax + adcl 4(%esp),%ebx + orl %esi,%ecx + orl %edi,%edx + andl 24(%esp),%ecx + andl 28(%esp),%edx + andl 8(%esp),%esi + andl 12(%esp),%edi + orl %esi,%ecx + orl %edi,%edx + addl %ecx,%eax + adcl %edx,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + movb (%ebp),%dl + subl $8,%esp + leal 8(%ebp),%ebp + cmpb $23,%dl + jne .L00416_79_x86 + movl 840(%esp),%esi + movl 844(%esp),%edi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + addl 8(%esp),%eax + adcl 12(%esp),%ebx + movl %eax,(%esi) + movl %ebx,4(%esi) + addl 16(%esp),%ecx + adcl 20(%esp),%edx + movl %ecx,8(%esi) + movl %edx,12(%esi) + movl 16(%esi),%eax + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edx + addl 24(%esp),%eax + adcl 28(%esp),%ebx + movl %eax,16(%esi) + movl %ebx,20(%esi) + addl 32(%esp),%ecx + adcl 36(%esp),%edx + movl %ecx,24(%esi) + movl %edx,28(%esi) + movl 32(%esi),%eax + movl 36(%esi),%ebx + movl 40(%esi),%ecx + movl 44(%esi),%edx + addl 40(%esp),%eax + adcl 44(%esp),%ebx + movl %eax,32(%esi) + movl %ebx,36(%esi) + addl 48(%esp),%ecx + adcl 52(%esp),%edx + movl %ecx,40(%esi) + movl %edx,44(%esi) + movl 48(%esi),%eax + movl 52(%esi),%ebx + movl 56(%esi),%ecx + movl 60(%esi),%edx + addl 56(%esp),%eax + adcl 60(%esp),%ebx + movl %eax,48(%esi) + movl %ebx,52(%esi) + addl 64(%esp),%ecx + adcl 68(%esp),%edx + movl %ecx,56(%esi) + movl %edx,60(%esi) + addl $840,%esp + subl $640,%ebp + cmpl 8(%esp),%edi + jb .L002loop_x86 + movl 12(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 64 +.L001K512: +.long 3609767458,1116352408 +.long 602891725,1899447441 +.long 3964484399,3049323471 +.long 2173295548,3921009573 +.long 4081628472,961987163 +.long 3053834265,1508970993 +.long 2937671579,2453635748 +.long 3664609560,2870763221 +.long 2734883394,3624381080 +.long 1164996542,310598401 +.long 1323610764,607225278 +.long 3590304994,1426881987 +.long 4068182383,1925078388 +.long 991336113,2162078206 +.long 633803317,2614888103 +.long 3479774868,3248222580 +.long 2666613458,3835390401 +.long 944711139,4022224774 +.long 2341262773,264347078 +.long 2007800933,604807628 +.long 1495990901,770255983 +.long 1856431235,1249150122 +.long 3175218132,1555081692 +.long 2198950837,1996064986 +.long 3999719339,2554220882 +.long 766784016,2821834349 +.long 2566594879,2952996808 +.long 3203337956,3210313671 +.long 1034457026,3336571891 +.long 2466948901,3584528711 +.long 3758326383,113926993 +.long 168717936,338241895 +.long 1188179964,666307205 +.long 1546045734,773529912 +.long 1522805485,1294757372 +.long 2643833823,1396182291 +.long 2343527390,1695183700 +.long 1014477480,1986661051 +.long 1206759142,2177026350 +.long 344077627,2456956037 +.long 1290863460,2730485921 +.long 3158454273,2820302411 +.long 3505952657,3259730800 +.long 106217008,3345764771 +.long 3606008344,3516065817 +.long 1432725776,3600352804 +.long 1467031594,4094571909 +.long 851169720,275423344 +.long 3100823752,430227734 +.long 1363258195,506948616 +.long 3750685593,659060556 +.long 3785050280,883997877 +.long 3318307427,958139571 +.long 3812723403,1322822218 +.long 2003034995,1537002063 +.long 3602036899,1747873779 +.long 1575990012,1955562222 +.long 1125592928,2024104815 +.long 2716904306,2227730452 +.long 442776044,2361852424 +.long 593698344,2428436474 +.long 3733110249,2756734187 +.long 2999351573,3204031479 +.long 3815920427,3329325298 +.long 3928383900,3391569614 +.long 566280711,3515267271 +.long 3454069534,3940187606 +.long 4000239992,4118630271 +.long 1914138554,116418474 +.long 2731055270,174292421 +.long 3203993006,289380356 +.long 320620315,460393269 +.long 587496836,685471733 +.long 1086792851,852142971 +.long 365543100,1017036298 +.long 2618297676,1126000580 +.long 3409855158,1288033470 +.long 4234509866,1501505948 +.long 987167468,1607167915 +.long 1246189591,1816402316 +.long 67438087,66051 +.long 202182159,134810123 +.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97 +.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 +.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +.byte 62,0 + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/coff/sha512-ssse3-x86_64.s b/lib/accelerated/x86/coff/sha512-ssse3-x86_64.s new file mode 100644 index 0000000000..bbb2661f26 --- /dev/null +++ b/lib/accelerated/x86/coff/sha512-ssse3-x86_64.s @@ -0,0 +1,3025 @@ +# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org> +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain copyright notices, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# * Neither the name of the Andy Polyakov nor the names of its +# copyright holder and contributors may be used to endorse or +# promote products derived from this software without specific +# prior written permission. +# +# ALTERNATIVELY, provided that this notice is retained in full, this +# product may be distributed under the terms of the GNU General Public +# License (GPL), in which case the provisions of the GPL apply INSTEAD OF +# those given above. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# *** This file is auto-generated *** +# +.text + + +.globl sha256_block_data_order +.def sha256_block_data_order; .scl 2; .type 32; .endef +.p2align 4 +sha256_block_data_order: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_sha256_block_data_order: + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + + leaq _gnutls_x86_cpuid_s(%rip),%r11 + movl 0(%r11),%r9d + movl 4(%r11),%r10d + movl 8(%r11),%r11d + testl $512,%r10d + jnz .Lssse3_shortcut + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $64+32,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %r11,64+24(%rsp) +.Lprologue: + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + jmp .Lloop + +.p2align 4 +.Lloop: + movl %ebx,%edi + leaq K256(%rip),%rbp + xorl %ecx,%edi + movl 0(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,0(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + addl %r14d,%r11d + movl 4(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,4(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + addl %r14d,%r10d + movl 8(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,8(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + addl %r14d,%r9d + movl 12(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,12(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + addl %r14d,%r8d + movl 16(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,16(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + addl %r14d,%edx + movl 20(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,20(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + addl %r14d,%ecx + movl 24(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,24(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + addl %r14d,%ebx + movl 28(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,28(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + addl %r14d,%eax + movl 32(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,32(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + addl %r14d,%r11d + movl 36(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,36(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + addl %r14d,%r10d + movl 40(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,40(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + addl %r14d,%r9d + movl 44(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,44(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + addl %r14d,%r8d + movl 48(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,48(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + addl %r14d,%edx + movl 52(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,52(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + addl %r14d,%ecx + movl 56(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,56(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + addl %r14d,%ebx + movl 60(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,60(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + jmp .Lrounds_16_xx +.p2align 4 +.Lrounds_16_xx: + movl 4(%rsp),%r13d + movl 56(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%eax + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 36(%rsp),%r12d + + addl 0(%rsp),%r12d + movl %r8d,%r13d + addl %r15d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,0(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + movl 8(%rsp),%r13d + movl 60(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r11d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 40(%rsp),%r12d + + addl 4(%rsp),%r12d + movl %edx,%r13d + addl %edi,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,4(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + movl 12(%rsp),%r13d + movl 0(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r10d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 44(%rsp),%r12d + + addl 8(%rsp),%r12d + movl %ecx,%r13d + addl %r15d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,8(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + movl 16(%rsp),%r13d + movl 4(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r9d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 48(%rsp),%r12d + + addl 12(%rsp),%r12d + movl %ebx,%r13d + addl %edi,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,12(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + movl 20(%rsp),%r13d + movl 8(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r8d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 52(%rsp),%r12d + + addl 16(%rsp),%r12d + movl %eax,%r13d + addl %r15d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,16(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + movl 24(%rsp),%r13d + movl 12(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%edx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 56(%rsp),%r12d + + addl 20(%rsp),%r12d + movl %r11d,%r13d + addl %edi,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,20(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + movl 28(%rsp),%r13d + movl 16(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ecx + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 60(%rsp),%r12d + + addl 24(%rsp),%r12d + movl %r10d,%r13d + addl %r15d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,24(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + movl 32(%rsp),%r13d + movl 20(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ebx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 0(%rsp),%r12d + + addl 28(%rsp),%r12d + movl %r9d,%r13d + addl %edi,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,28(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + movl 36(%rsp),%r13d + movl 24(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%eax + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 4(%rsp),%r12d + + addl 32(%rsp),%r12d + movl %r8d,%r13d + addl %r15d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,32(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + movl 40(%rsp),%r13d + movl 28(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r11d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 8(%rsp),%r12d + + addl 36(%rsp),%r12d + movl %edx,%r13d + addl %edi,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,36(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + movl 44(%rsp),%r13d + movl 32(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r10d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 12(%rsp),%r12d + + addl 40(%rsp),%r12d + movl %ecx,%r13d + addl %r15d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,40(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + movl 48(%rsp),%r13d + movl 36(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r9d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 16(%rsp),%r12d + + addl 44(%rsp),%r12d + movl %ebx,%r13d + addl %edi,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,44(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + movl 52(%rsp),%r13d + movl 40(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r8d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 20(%rsp),%r12d + + addl 48(%rsp),%r12d + movl %eax,%r13d + addl %r15d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,48(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + movl 56(%rsp),%r13d + movl 44(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%edx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 24(%rsp),%r12d + + addl 52(%rsp),%r12d + movl %r11d,%r13d + addl %edi,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,52(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + movl 60(%rsp),%r13d + movl 48(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ecx + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 28(%rsp),%r12d + + addl 56(%rsp),%r12d + movl %r10d,%r13d + addl %r15d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,56(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + movl 0(%rsp),%r13d + movl 52(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ebx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 32(%rsp),%r12d + + addl 60(%rsp),%r12d + movl %r9d,%r13d + addl %edi,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,60(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + cmpb $0,3(%rbp) + jnz .Lrounds_16_xx + + movq 64+0(%rsp),%rdi + addl %r14d,%eax + leaq 64(%rsi),%rsi + + addl 0(%rdi),%eax + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb .Lloop + + movq 64+24(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue: + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + .byte 0xf3,0xc3 +.LSEH_end_sha256_block_data_order: +.p2align 6 + +K256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff +.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff +.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 +.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.def sha256_block_data_order_ssse3; .scl 3; .type 32; .endef +.p2align 6 +sha256_block_data_order_ssse3: + movq %rdi,8(%rsp) + movq %rsi,16(%rsp) + movq %rsp,%rax +.LSEH_begin_sha256_block_data_order_ssse3: + movq %rcx,%rdi + movq %rdx,%rsi + movq %r8,%rdx + +.Lssse3_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $160,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %r11,64+24(%rsp) + movaps %xmm6,64+32(%rsp) + movaps %xmm7,64+48(%rsp) + movaps %xmm8,64+64(%rsp) + movaps %xmm9,64+80(%rsp) +.Lprologue_ssse3: + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + + + jmp .Lloop_ssse3 +.p2align 4 +.Lloop_ssse3: + movdqa K256+512(%rip),%xmm7 + movdqu 0(%rsi),%xmm0 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm3 +.byte 102,15,56,0,199 + leaq K256(%rip),%rbp +.byte 102,15,56,0,207 + movdqa 0(%rbp),%xmm4 +.byte 102,15,56,0,215 + movdqa 32(%rbp),%xmm5 + paddd %xmm0,%xmm4 + movdqa 64(%rbp),%xmm6 +.byte 102,15,56,0,223 + movdqa 96(%rbp),%xmm7 + paddd %xmm1,%xmm5 + paddd %xmm2,%xmm6 + paddd %xmm3,%xmm7 + movdqa %xmm4,0(%rsp) + movl %eax,%r14d + movdqa %xmm5,16(%rsp) + movl %ebx,%edi + movdqa %xmm6,32(%rsp) + xorl %ecx,%edi + movdqa %xmm7,48(%rsp) + movl %r8d,%r13d + jmp .Lssse3_00_47 + +.p2align 4 +.Lssse3_00_47: + subq $-32*4,%rbp + rorl $14,%r13d + movdqa %xmm1,%xmm4 + movl %r14d,%eax + movl %r9d,%r12d + movdqa %xmm3,%xmm7 + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d +.byte 102,15,58,15,224,4 + andl %r8d,%r12d + xorl %r8d,%r13d +.byte 102,15,58,15,250,4 + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %ebx,%r15d + addl %r12d,%r11d + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + paddd %xmm7,%xmm0 + rorl $2,%r14d + addl %r11d,%edx + psrld $7,%xmm6 + addl %edi,%r11d + movl %edx,%r13d + pshufd $250,%xmm3,%xmm7 + addl %r11d,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%r11d + movl %r8d,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %r11d,%r14d + pxor %xmm5,%xmm4 + andl %edx,%r12d + xorl %edx,%r13d + pslld $11,%xmm5 + addl 4(%rsp),%r10d + movl %r11d,%edi + pxor %xmm6,%xmm4 + xorl %r9d,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %eax,%edi + addl %r12d,%r10d + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + psrld $10,%xmm7 + addl %r13d,%r10d + xorl %eax,%r15d + paddd %xmm4,%xmm0 + rorl $2,%r14d + addl %r10d,%ecx + psrlq $17,%xmm6 + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %ecx,%r13d + xorl %r8d,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + psrldq $8,%xmm7 + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + paddd %xmm7,%xmm0 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + pshufd $80,%xmm0,%xmm7 + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + movdqa %xmm7,%xmm6 + addl %edi,%r9d + movl %ebx,%r13d + psrld $10,%xmm7 + addl %r9d,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%r9d + movl %ecx,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + psrlq $2,%xmm6 + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + pxor %xmm6,%xmm7 + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %r10d,%edi + addl %r12d,%r8d + movdqa 0(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + paddd %xmm7,%xmm0 + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + paddd %xmm0,%xmm6 + movl %eax,%r13d + addl %r8d,%r14d + movdqa %xmm6,0(%rsp) + rorl $14,%r13d + movdqa %xmm2,%xmm4 + movl %r14d,%r8d + movl %ebx,%r12d + movdqa %xmm0,%xmm7 + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d +.byte 102,15,58,15,225,4 + andl %eax,%r12d + xorl %eax,%r13d +.byte 102,15,58,15,251,4 + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %r9d,%r15d + addl %r12d,%edx + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + paddd %xmm7,%xmm1 + rorl $2,%r14d + addl %edx,%r11d + psrld $7,%xmm6 + addl %edi,%edx + movl %r11d,%r13d + pshufd $250,%xmm0,%xmm7 + addl %edx,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%edx + movl %eax,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %edx,%r14d + pxor %xmm5,%xmm4 + andl %r11d,%r12d + xorl %r11d,%r13d + pslld $11,%xmm5 + addl 20(%rsp),%ecx + movl %edx,%edi + pxor %xmm6,%xmm4 + xorl %ebx,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %r8d,%edi + addl %r12d,%ecx + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + psrld $10,%xmm7 + addl %r13d,%ecx + xorl %r8d,%r15d + paddd %xmm4,%xmm1 + rorl $2,%r14d + addl %ecx,%r10d + psrlq $17,%xmm6 + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %r10d,%r13d + xorl %eax,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + psrldq $8,%xmm7 + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + paddd %xmm7,%xmm1 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + pshufd $80,%xmm1,%xmm7 + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + movdqa %xmm7,%xmm6 + addl %edi,%ebx + movl %r9d,%r13d + psrld $10,%xmm7 + addl %ebx,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%ebx + movl %r10d,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + psrlq $2,%xmm6 + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + pxor %xmm6,%xmm7 + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %ecx,%edi + addl %r12d,%eax + movdqa 32(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + paddd %xmm7,%xmm1 + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + paddd %xmm1,%xmm6 + movl %r8d,%r13d + addl %eax,%r14d + movdqa %xmm6,16(%rsp) + rorl $14,%r13d + movdqa %xmm3,%xmm4 + movl %r14d,%eax + movl %r9d,%r12d + movdqa %xmm1,%xmm7 + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d +.byte 102,15,58,15,226,4 + andl %r8d,%r12d + xorl %r8d,%r13d +.byte 102,15,58,15,248,4 + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %ebx,%r15d + addl %r12d,%r11d + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + paddd %xmm7,%xmm2 + rorl $2,%r14d + addl %r11d,%edx + psrld $7,%xmm6 + addl %edi,%r11d + movl %edx,%r13d + pshufd $250,%xmm1,%xmm7 + addl %r11d,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%r11d + movl %r8d,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %r11d,%r14d + pxor %xmm5,%xmm4 + andl %edx,%r12d + xorl %edx,%r13d + pslld $11,%xmm5 + addl 36(%rsp),%r10d + movl %r11d,%edi + pxor %xmm6,%xmm4 + xorl %r9d,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %eax,%edi + addl %r12d,%r10d + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + psrld $10,%xmm7 + addl %r13d,%r10d + xorl %eax,%r15d + paddd %xmm4,%xmm2 + rorl $2,%r14d + addl %r10d,%ecx + psrlq $17,%xmm6 + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %ecx,%r13d + xorl %r8d,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + psrldq $8,%xmm7 + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + paddd %xmm7,%xmm2 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + pshufd $80,%xmm2,%xmm7 + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + movdqa %xmm7,%xmm6 + addl %edi,%r9d + movl %ebx,%r13d + psrld $10,%xmm7 + addl %r9d,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%r9d + movl %ecx,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + psrlq $2,%xmm6 + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + pxor %xmm6,%xmm7 + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %r10d,%edi + addl %r12d,%r8d + movdqa 64(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + paddd %xmm7,%xmm2 + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + paddd %xmm2,%xmm6 + movl %eax,%r13d + addl %r8d,%r14d + movdqa %xmm6,32(%rsp) + rorl $14,%r13d + movdqa %xmm0,%xmm4 + movl %r14d,%r8d + movl %ebx,%r12d + movdqa %xmm2,%xmm7 + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d +.byte 102,15,58,15,227,4 + andl %eax,%r12d + xorl %eax,%r13d +.byte 102,15,58,15,249,4 + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %r9d,%r15d + addl %r12d,%edx + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + paddd %xmm7,%xmm3 + rorl $2,%r14d + addl %edx,%r11d + psrld $7,%xmm6 + addl %edi,%edx + movl %r11d,%r13d + pshufd $250,%xmm2,%xmm7 + addl %edx,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%edx + movl %eax,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %edx,%r14d + pxor %xmm5,%xmm4 + andl %r11d,%r12d + xorl %r11d,%r13d + pslld $11,%xmm5 + addl 52(%rsp),%ecx + movl %edx,%edi + pxor %xmm6,%xmm4 + xorl %ebx,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %r8d,%edi + addl %r12d,%ecx + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + psrld $10,%xmm7 + addl %r13d,%ecx + xorl %r8d,%r15d + paddd %xmm4,%xmm3 + rorl $2,%r14d + addl %ecx,%r10d + psrlq $17,%xmm6 + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %r10d,%r13d + xorl %eax,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + psrldq $8,%xmm7 + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + paddd %xmm7,%xmm3 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + pshufd $80,%xmm3,%xmm7 + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + movdqa %xmm7,%xmm6 + addl %edi,%ebx + movl %r9d,%r13d + psrld $10,%xmm7 + addl %ebx,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%ebx + movl %r10d,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + psrlq $2,%xmm6 + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + pxor %xmm6,%xmm7 + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %ecx,%edi + addl %r12d,%eax + movdqa 96(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + paddd %xmm7,%xmm3 + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + paddd %xmm3,%xmm6 + movl %r8d,%r13d + addl %eax,%r14d + movdqa %xmm6,48(%rsp) + cmpb $0,131(%rbp) + jne .Lssse3_00_47 + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + rorl $6,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + rorl $2,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 4(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + rorl $11,%r14d + xorl %eax,%edi + addl %r12d,%r10d + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + rorl $2,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + rorl $6,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + rorl $6,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + rorl $2,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 20(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + rorl $11,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + rorl $2,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + xorl %ecx,%edi + addl %r12d,%eax + rorl $6,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + rorl $6,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + rorl $2,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 36(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + rorl $11,%r14d + xorl %eax,%edi + addl %r12d,%r10d + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + rorl $2,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + rorl $6,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + rorl $6,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + rorl $2,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 52(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + rorl $11,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + rorl $2,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + xorl %ecx,%edi + addl %r12d,%eax + rorl $6,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + movq 64+0(%rsp),%rdi + movl %r14d,%eax + + addl 0(%rdi),%eax + leaq 64(%rsi),%rsi + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb .Lloop_ssse3 + + movq 64+24(%rsp),%rsi + movaps 64+32(%rsp),%xmm6 + movaps 64+48(%rsp),%xmm7 + movaps 64+64(%rsp),%xmm8 + movaps 64+80(%rsp),%xmm9 + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue_ssse3: + movq 8(%rsp),%rdi + movq 16(%rsp),%rsi + .byte 0xf3,0xc3 +.LSEH_end_sha256_block_data_order_ssse3: + +.def se_handler; .scl 3; .type 32; .endef +.p2align 4 +se_handler: + pushq %rsi + pushq %rdi + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushfq + subq $64,%rsp + + movq 120(%r8),%rax + movq 248(%r8),%rbx + + movq 8(%r9),%rsi + movq 56(%r9),%r11 + + movl 0(%r11),%r10d + leaq (%rsi,%r10,1),%r10 + cmpq %r10,%rbx + jb .Lin_prologue + + movq 152(%r8),%rax + + movl 4(%r11),%r10d + leaq (%rsi,%r10,1),%r10 + cmpq %r10,%rbx + jae .Lin_prologue + movq %rax,%rsi + movq 64+24(%rax),%rax + leaq 48(%rax),%rax + + movq -8(%rax),%rbx + movq -16(%rax),%rbp + movq -24(%rax),%r12 + movq -32(%rax),%r13 + movq -40(%rax),%r14 + movq -48(%rax),%r15 + movq %rbx,144(%r8) + movq %rbp,160(%r8) + movq %r12,216(%r8) + movq %r13,224(%r8) + movq %r14,232(%r8) + movq %r15,240(%r8) + + leaq .Lepilogue(%rip),%r10 + cmpq %r10,%rbx + jb .Lin_prologue + + leaq 64+32(%rsi),%rsi + leaq 512(%r8),%rdi + movl $8,%ecx +.long 0xa548f3fc + +.Lin_prologue: + movq 8(%rax),%rdi + movq 16(%rax),%rsi + movq %rax,152(%r8) + movq %rsi,168(%r8) + movq %rdi,176(%r8) + + movq 40(%r9),%rdi + movq %r8,%rsi + movl $154,%ecx +.long 0xa548f3fc + + movq %r9,%rsi + xorq %rcx,%rcx + movq 8(%rsi),%rdx + movq 0(%rsi),%r8 + movq 16(%rsi),%r9 + movq 40(%rsi),%r10 + leaq 56(%rsi),%r11 + leaq 24(%rsi),%r12 + movq %r10,32(%rsp) + movq %r11,40(%rsp) + movq %r12,48(%rsp) + movq %rcx,56(%rsp) + call *__imp_RtlVirtualUnwind(%rip) + + movl $1,%eax + addq $64,%rsp + popfq + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + popq %rdi + popq %rsi + .byte 0xf3,0xc3 + + +.section .pdata +.p2align 2 +.rva .LSEH_begin_sha256_block_data_order +.rva .LSEH_end_sha256_block_data_order +.rva .LSEH_info_sha256_block_data_order +.rva .LSEH_begin_sha256_block_data_order_ssse3 +.rva .LSEH_end_sha256_block_data_order_ssse3 +.rva .LSEH_info_sha256_block_data_order_ssse3 +.section .xdata +.p2align 3 +.LSEH_info_sha256_block_data_order: +.byte 9,0,0,0 +.rva se_handler +.rva .Lprologue,.Lepilogue +.LSEH_info_sha256_block_data_order_ssse3: +.byte 9,0,0,0 +.rva se_handler +.rva .Lprologue_ssse3,.Lepilogue_ssse3 + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/elf/appro-aes-x86.s b/lib/accelerated/x86/elf/aesni-x86.s index 5f0779756c..5f0779756c 100644 --- a/lib/accelerated/x86/elf/appro-aes-x86.s +++ b/lib/accelerated/x86/elf/aesni-x86.s diff --git a/lib/accelerated/x86/elf/appro-aes-x86-64.s b/lib/accelerated/x86/elf/aesni-x86_64.s index d3734a6edd..d3734a6edd 100644 --- a/lib/accelerated/x86/elf/appro-aes-x86-64.s +++ b/lib/accelerated/x86/elf/aesni-x86_64.s diff --git a/lib/accelerated/x86/elf/cpuid-x86-64.s b/lib/accelerated/x86/elf/cpuid-x86_64.s index 41a0061f71..41a0061f71 100644 --- a/lib/accelerated/x86/elf/cpuid-x86-64.s +++ b/lib/accelerated/x86/elf/cpuid-x86_64.s diff --git a/lib/accelerated/x86/elf/padlock-x86.s b/lib/accelerated/x86/elf/e_padlock-x86.s index 2199255efe..2199255efe 100644 --- a/lib/accelerated/x86/elf/padlock-x86.s +++ b/lib/accelerated/x86/elf/e_padlock-x86.s diff --git a/lib/accelerated/x86/elf/padlock-x86-64.s b/lib/accelerated/x86/elf/e_padlock-x86_64.s index 2ac113d72c..2ac113d72c 100644 --- a/lib/accelerated/x86/elf/padlock-x86-64.s +++ b/lib/accelerated/x86/elf/e_padlock-x86_64.s diff --git a/lib/accelerated/x86/elf/appro-aes-gcm-x86-64.s b/lib/accelerated/x86/elf/ghash-x86_64.s index 9755951f7b..9755951f7b 100644 --- a/lib/accelerated/x86/elf/appro-aes-gcm-x86-64.s +++ b/lib/accelerated/x86/elf/ghash-x86_64.s diff --git a/lib/accelerated/x86/elf/sha1-ssse3-x86.s b/lib/accelerated/x86/elf/sha1-ssse3-x86.s new file mode 100644 index 0000000000..e2f22e7c7e --- /dev/null +++ b/lib/accelerated/x86/elf/sha1-ssse3-x86.s @@ -0,0 +1,1421 @@ +# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org> +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain copyright notices, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# * Neither the name of the Andy Polyakov nor the names of its +# copyright holder and contributors may be used to endorse or +# promote products derived from this software without specific +# prior written permission. +# +# ALTERNATIVELY, provided that this notice is retained in full, this +# product may be distributed under the terms of the GNU General Public +# License (GPL), in which case the provisions of the GPL apply INSTEAD OF +# those given above. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# *** This file is auto-generated *** +# +.file "sha1-586.s" +.text +.globl sha1_block_data_order +.type sha1_block_data_order,@function +.align 16 +sha1_block_data_order: +.L_sha1_block_data_order_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%ebp + movl 24(%esp),%esi + movl 28(%esp),%eax + subl $76,%esp + shll $6,%eax + addl %esi,%eax + movl %eax,104(%esp) + movl 16(%ebp),%edi + jmp .L000loop +.align 16 +.L000loop: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,(%esp) + movl %ebx,4(%esp) + movl %ecx,8(%esp) + movl %edx,12(%esp) + movl 16(%esi),%eax + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,16(%esp) + movl %ebx,20(%esp) + movl %ecx,24(%esp) + movl %edx,28(%esp) + movl 32(%esi),%eax + movl 36(%esi),%ebx + movl 40(%esi),%ecx + movl 44(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,40(%esp) + movl %edx,44(%esp) + movl 48(%esi),%eax + movl 52(%esi),%ebx + movl 56(%esi),%ecx + movl 60(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,48(%esp) + movl %ebx,52(%esp) + movl %ecx,56(%esp) + movl %edx,60(%esp) + movl %esi,100(%esp) + movl (%ebp),%eax + movl 4(%ebp),%ebx + movl 8(%ebp),%ecx + movl 12(%ebp),%edx + + movl %ecx,%esi + movl %eax,%ebp + roll $5,%ebp + xorl %edx,%esi + addl %edi,%ebp + movl (%esp),%edi + andl %ebx,%esi + rorl $2,%ebx + xorl %edx,%esi + leal 1518500249(%ebp,%edi,1),%ebp + addl %esi,%ebp + + movl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ecx,%edi + addl %edx,%ebp + movl 4(%esp),%edx + andl %eax,%edi + rorl $2,%eax + xorl %ecx,%edi + leal 1518500249(%ebp,%edx,1),%ebp + addl %edi,%ebp + + movl %eax,%edx + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%edx + addl %ecx,%ebp + movl 8(%esp),%ecx + andl %esi,%edx + rorl $2,%esi + xorl %ebx,%edx + leal 1518500249(%ebp,%ecx,1),%ebp + addl %edx,%ebp + + movl %esi,%ecx + movl %ebp,%edx + roll $5,%ebp + xorl %eax,%ecx + addl %ebx,%ebp + movl 12(%esp),%ebx + andl %edi,%ecx + rorl $2,%edi + xorl %eax,%ecx + leal 1518500249(%ebp,%ebx,1),%ebp + addl %ecx,%ebp + + movl %edi,%ebx + movl %ebp,%ecx + roll $5,%ebp + xorl %esi,%ebx + addl %eax,%ebp + movl 16(%esp),%eax + andl %edx,%ebx + rorl $2,%edx + xorl %esi,%ebx + leal 1518500249(%ebp,%eax,1),%ebp + addl %ebx,%ebp + + movl %edx,%eax + movl %ebp,%ebx + roll $5,%ebp + xorl %edi,%eax + addl %esi,%ebp + movl 20(%esp),%esi + andl %ecx,%eax + rorl $2,%ecx + xorl %edi,%eax + leal 1518500249(%ebp,%esi,1),%ebp + addl %eax,%ebp + + movl %ecx,%esi + movl %ebp,%eax + roll $5,%ebp + xorl %edx,%esi + addl %edi,%ebp + movl 24(%esp),%edi + andl %ebx,%esi + rorl $2,%ebx + xorl %edx,%esi + leal 1518500249(%ebp,%edi,1),%ebp + addl %esi,%ebp + + movl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ecx,%edi + addl %edx,%ebp + movl 28(%esp),%edx + andl %eax,%edi + rorl $2,%eax + xorl %ecx,%edi + leal 1518500249(%ebp,%edx,1),%ebp + addl %edi,%ebp + + movl %eax,%edx + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%edx + addl %ecx,%ebp + movl 32(%esp),%ecx + andl %esi,%edx + rorl $2,%esi + xorl %ebx,%edx + leal 1518500249(%ebp,%ecx,1),%ebp + addl %edx,%ebp + + movl %esi,%ecx + movl %ebp,%edx + roll $5,%ebp + xorl %eax,%ecx + addl %ebx,%ebp + movl 36(%esp),%ebx + andl %edi,%ecx + rorl $2,%edi + xorl %eax,%ecx + leal 1518500249(%ebp,%ebx,1),%ebp + addl %ecx,%ebp + + movl %edi,%ebx + movl %ebp,%ecx + roll $5,%ebp + xorl %esi,%ebx + addl %eax,%ebp + movl 40(%esp),%eax + andl %edx,%ebx + rorl $2,%edx + xorl %esi,%ebx + leal 1518500249(%ebp,%eax,1),%ebp + addl %ebx,%ebp + + movl %edx,%eax + movl %ebp,%ebx + roll $5,%ebp + xorl %edi,%eax + addl %esi,%ebp + movl 44(%esp),%esi + andl %ecx,%eax + rorl $2,%ecx + xorl %edi,%eax + leal 1518500249(%ebp,%esi,1),%ebp + addl %eax,%ebp + + movl %ecx,%esi + movl %ebp,%eax + roll $5,%ebp + xorl %edx,%esi + addl %edi,%ebp + movl 48(%esp),%edi + andl %ebx,%esi + rorl $2,%ebx + xorl %edx,%esi + leal 1518500249(%ebp,%edi,1),%ebp + addl %esi,%ebp + + movl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ecx,%edi + addl %edx,%ebp + movl 52(%esp),%edx + andl %eax,%edi + rorl $2,%eax + xorl %ecx,%edi + leal 1518500249(%ebp,%edx,1),%ebp + addl %edi,%ebp + + movl %eax,%edx + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%edx + addl %ecx,%ebp + movl 56(%esp),%ecx + andl %esi,%edx + rorl $2,%esi + xorl %ebx,%edx + leal 1518500249(%ebp,%ecx,1),%ebp + addl %edx,%ebp + + movl %esi,%ecx + movl %ebp,%edx + roll $5,%ebp + xorl %eax,%ecx + addl %ebx,%ebp + movl 60(%esp),%ebx + andl %edi,%ecx + rorl $2,%edi + xorl %eax,%ecx + leal 1518500249(%ebp,%ebx,1),%ebp + movl (%esp),%ebx + addl %ebp,%ecx + + movl %edi,%ebp + xorl 8(%esp),%ebx + xorl %esi,%ebp + xorl 32(%esp),%ebx + andl %edx,%ebp + xorl 52(%esp),%ebx + roll $1,%ebx + xorl %esi,%ebp + addl %ebp,%eax + movl %ecx,%ebp + rorl $2,%edx + movl %ebx,(%esp) + roll $5,%ebp + leal 1518500249(%ebx,%eax,1),%ebx + movl 4(%esp),%eax + addl %ebp,%ebx + + movl %edx,%ebp + xorl 12(%esp),%eax + xorl %edi,%ebp + xorl 36(%esp),%eax + andl %ecx,%ebp + xorl 56(%esp),%eax + roll $1,%eax + xorl %edi,%ebp + addl %ebp,%esi + movl %ebx,%ebp + rorl $2,%ecx + movl %eax,4(%esp) + roll $5,%ebp + leal 1518500249(%eax,%esi,1),%eax + movl 8(%esp),%esi + addl %ebp,%eax + + movl %ecx,%ebp + xorl 16(%esp),%esi + xorl %edx,%ebp + xorl 40(%esp),%esi + andl %ebx,%ebp + xorl 60(%esp),%esi + roll $1,%esi + xorl %edx,%ebp + addl %ebp,%edi + movl %eax,%ebp + rorl $2,%ebx + movl %esi,8(%esp) + roll $5,%ebp + leal 1518500249(%esi,%edi,1),%esi + movl 12(%esp),%edi + addl %ebp,%esi + + movl %ebx,%ebp + xorl 20(%esp),%edi + xorl %ecx,%ebp + xorl 44(%esp),%edi + andl %eax,%ebp + xorl (%esp),%edi + roll $1,%edi + xorl %ecx,%ebp + addl %ebp,%edx + movl %esi,%ebp + rorl $2,%eax + movl %edi,12(%esp) + roll $5,%ebp + leal 1518500249(%edi,%edx,1),%edi + movl 16(%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl 24(%esp),%edx + xorl %eax,%ebp + xorl 48(%esp),%edx + xorl %ebx,%ebp + xorl 4(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,16(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 20(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 28(%esp),%ecx + xorl %esi,%ebp + xorl 52(%esp),%ecx + xorl %eax,%ebp + xorl 8(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,20(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 24(%esp),%ebx + addl %ebp,%ecx + + movl %edx,%ebp + xorl 32(%esp),%ebx + xorl %edi,%ebp + xorl 56(%esp),%ebx + xorl %esi,%ebp + xorl 12(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,24(%esp) + leal 1859775393(%ebx,%eax,1),%ebx + movl 28(%esp),%eax + addl %ebp,%ebx + + movl %ecx,%ebp + xorl 36(%esp),%eax + xorl %edx,%ebp + xorl 60(%esp),%eax + xorl %edi,%ebp + xorl 16(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,28(%esp) + leal 1859775393(%eax,%esi,1),%eax + movl 32(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl 40(%esp),%esi + xorl %ecx,%ebp + xorl (%esp),%esi + xorl %edx,%ebp + xorl 20(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,32(%esp) + leal 1859775393(%esi,%edi,1),%esi + movl 36(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 44(%esp),%edi + xorl %ebx,%ebp + xorl 4(%esp),%edi + xorl %ecx,%ebp + xorl 24(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,36(%esp) + leal 1859775393(%edi,%edx,1),%edi + movl 40(%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl 48(%esp),%edx + xorl %eax,%ebp + xorl 8(%esp),%edx + xorl %ebx,%ebp + xorl 28(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,40(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 44(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 52(%esp),%ecx + xorl %esi,%ebp + xorl 12(%esp),%ecx + xorl %eax,%ebp + xorl 32(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,44(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 48(%esp),%ebx + addl %ebp,%ecx + + movl %edx,%ebp + xorl 56(%esp),%ebx + xorl %edi,%ebp + xorl 16(%esp),%ebx + xorl %esi,%ebp + xorl 36(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,48(%esp) + leal 1859775393(%ebx,%eax,1),%ebx + movl 52(%esp),%eax + addl %ebp,%ebx + + movl %ecx,%ebp + xorl 60(%esp),%eax + xorl %edx,%ebp + xorl 20(%esp),%eax + xorl %edi,%ebp + xorl 40(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,52(%esp) + leal 1859775393(%eax,%esi,1),%eax + movl 56(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl (%esp),%esi + xorl %ecx,%ebp + xorl 24(%esp),%esi + xorl %edx,%ebp + xorl 44(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,56(%esp) + leal 1859775393(%esi,%edi,1),%esi + movl 60(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 4(%esp),%edi + xorl %ebx,%ebp + xorl 28(%esp),%edi + xorl %ecx,%ebp + xorl 48(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,60(%esp) + leal 1859775393(%edi,%edx,1),%edi + movl (%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl 8(%esp),%edx + xorl %eax,%ebp + xorl 32(%esp),%edx + xorl %ebx,%ebp + xorl 52(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 4(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 12(%esp),%ecx + xorl %esi,%ebp + xorl 36(%esp),%ecx + xorl %eax,%ebp + xorl 56(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,4(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 8(%esp),%ebx + addl %ebp,%ecx + + movl %edx,%ebp + xorl 16(%esp),%ebx + xorl %edi,%ebp + xorl 40(%esp),%ebx + xorl %esi,%ebp + xorl 60(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,8(%esp) + leal 1859775393(%ebx,%eax,1),%ebx + movl 12(%esp),%eax + addl %ebp,%ebx + + movl %ecx,%ebp + xorl 20(%esp),%eax + xorl %edx,%ebp + xorl 44(%esp),%eax + xorl %edi,%ebp + xorl (%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,12(%esp) + leal 1859775393(%eax,%esi,1),%eax + movl 16(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl 24(%esp),%esi + xorl %ecx,%ebp + xorl 48(%esp),%esi + xorl %edx,%ebp + xorl 4(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,16(%esp) + leal 1859775393(%esi,%edi,1),%esi + movl 20(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 28(%esp),%edi + xorl %ebx,%ebp + xorl 52(%esp),%edi + xorl %ecx,%ebp + xorl 8(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,20(%esp) + leal 1859775393(%edi,%edx,1),%edi + movl 24(%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl 32(%esp),%edx + xorl %eax,%ebp + xorl 56(%esp),%edx + xorl %ebx,%ebp + xorl 12(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,24(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 28(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 36(%esp),%ecx + xorl %esi,%ebp + xorl 60(%esp),%ecx + xorl %eax,%ebp + xorl 16(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,28(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 32(%esp),%ebx + addl %ebp,%ecx + + movl %edi,%ebp + xorl 40(%esp),%ebx + xorl %esi,%ebp + xorl (%esp),%ebx + andl %edx,%ebp + xorl 20(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,32(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 36(%esp),%eax + addl %ebp,%ebx + + movl %edx,%ebp + xorl 44(%esp),%eax + xorl %edi,%ebp + xorl 4(%esp),%eax + andl %ecx,%ebp + xorl 24(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,36(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl 40(%esp),%esi + addl %ebp,%eax + + movl %ecx,%ebp + xorl 48(%esp),%esi + xorl %edx,%ebp + xorl 8(%esp),%esi + andl %ebx,%ebp + xorl 28(%esp),%esi + roll $1,%esi + addl %edi,%ebp + rorl $2,%ebx + movl %eax,%edi + roll $5,%edi + movl %esi,40(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp + addl %edi,%esi + andl %edx,%ebp + movl 44(%esp),%edi + addl %ebp,%esi + + movl %ebx,%ebp + xorl 52(%esp),%edi + xorl %ecx,%ebp + xorl 12(%esp),%edi + andl %eax,%ebp + xorl 32(%esp),%edi + roll $1,%edi + addl %edx,%ebp + rorl $2,%eax + movl %esi,%edx + roll $5,%edx + movl %edi,44(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp + addl %edx,%edi + andl %ecx,%ebp + movl 48(%esp),%edx + addl %ebp,%edi + + movl %eax,%ebp + xorl 56(%esp),%edx + xorl %ebx,%ebp + xorl 16(%esp),%edx + andl %esi,%ebp + xorl 36(%esp),%edx + roll $1,%edx + addl %ecx,%ebp + rorl $2,%esi + movl %edi,%ecx + roll $5,%ecx + movl %edx,48(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp + addl %ecx,%edx + andl %ebx,%ebp + movl 52(%esp),%ecx + addl %ebp,%edx + + movl %esi,%ebp + xorl 60(%esp),%ecx + xorl %eax,%ebp + xorl 20(%esp),%ecx + andl %edi,%ebp + xorl 40(%esp),%ecx + roll $1,%ecx + addl %ebx,%ebp + rorl $2,%edi + movl %edx,%ebx + roll $5,%ebx + movl %ecx,52(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp + addl %ebx,%ecx + andl %eax,%ebp + movl 56(%esp),%ebx + addl %ebp,%ecx + + movl %edi,%ebp + xorl (%esp),%ebx + xorl %esi,%ebp + xorl 24(%esp),%ebx + andl %edx,%ebp + xorl 44(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,56(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 60(%esp),%eax + addl %ebp,%ebx + + movl %edx,%ebp + xorl 4(%esp),%eax + xorl %edi,%ebp + xorl 28(%esp),%eax + andl %ecx,%ebp + xorl 48(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,60(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl (%esp),%esi + addl %ebp,%eax + + movl %ecx,%ebp + xorl 8(%esp),%esi + xorl %edx,%ebp + xorl 32(%esp),%esi + andl %ebx,%ebp + xorl 52(%esp),%esi + roll $1,%esi + addl %edi,%ebp + rorl $2,%ebx + movl %eax,%edi + roll $5,%edi + movl %esi,(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp + addl %edi,%esi + andl %edx,%ebp + movl 4(%esp),%edi + addl %ebp,%esi + + movl %ebx,%ebp + xorl 12(%esp),%edi + xorl %ecx,%ebp + xorl 36(%esp),%edi + andl %eax,%ebp + xorl 56(%esp),%edi + roll $1,%edi + addl %edx,%ebp + rorl $2,%eax + movl %esi,%edx + roll $5,%edx + movl %edi,4(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp + addl %edx,%edi + andl %ecx,%ebp + movl 8(%esp),%edx + addl %ebp,%edi + + movl %eax,%ebp + xorl 16(%esp),%edx + xorl %ebx,%ebp + xorl 40(%esp),%edx + andl %esi,%ebp + xorl 60(%esp),%edx + roll $1,%edx + addl %ecx,%ebp + rorl $2,%esi + movl %edi,%ecx + roll $5,%ecx + movl %edx,8(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp + addl %ecx,%edx + andl %ebx,%ebp + movl 12(%esp),%ecx + addl %ebp,%edx + + movl %esi,%ebp + xorl 20(%esp),%ecx + xorl %eax,%ebp + xorl 44(%esp),%ecx + andl %edi,%ebp + xorl (%esp),%ecx + roll $1,%ecx + addl %ebx,%ebp + rorl $2,%edi + movl %edx,%ebx + roll $5,%ebx + movl %ecx,12(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp + addl %ebx,%ecx + andl %eax,%ebp + movl 16(%esp),%ebx + addl %ebp,%ecx + + movl %edi,%ebp + xorl 24(%esp),%ebx + xorl %esi,%ebp + xorl 48(%esp),%ebx + andl %edx,%ebp + xorl 4(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,16(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 20(%esp),%eax + addl %ebp,%ebx + + movl %edx,%ebp + xorl 28(%esp),%eax + xorl %edi,%ebp + xorl 52(%esp),%eax + andl %ecx,%ebp + xorl 8(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,20(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl 24(%esp),%esi + addl %ebp,%eax + + movl %ecx,%ebp + xorl 32(%esp),%esi + xorl %edx,%ebp + xorl 56(%esp),%esi + andl %ebx,%ebp + xorl 12(%esp),%esi + roll $1,%esi + addl %edi,%ebp + rorl $2,%ebx + movl %eax,%edi + roll $5,%edi + movl %esi,24(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp + addl %edi,%esi + andl %edx,%ebp + movl 28(%esp),%edi + addl %ebp,%esi + + movl %ebx,%ebp + xorl 36(%esp),%edi + xorl %ecx,%ebp + xorl 60(%esp),%edi + andl %eax,%ebp + xorl 16(%esp),%edi + roll $1,%edi + addl %edx,%ebp + rorl $2,%eax + movl %esi,%edx + roll $5,%edx + movl %edi,28(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp + addl %edx,%edi + andl %ecx,%ebp + movl 32(%esp),%edx + addl %ebp,%edi + + movl %eax,%ebp + xorl 40(%esp),%edx + xorl %ebx,%ebp + xorl (%esp),%edx + andl %esi,%ebp + xorl 20(%esp),%edx + roll $1,%edx + addl %ecx,%ebp + rorl $2,%esi + movl %edi,%ecx + roll $5,%ecx + movl %edx,32(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp + addl %ecx,%edx + andl %ebx,%ebp + movl 36(%esp),%ecx + addl %ebp,%edx + + movl %esi,%ebp + xorl 44(%esp),%ecx + xorl %eax,%ebp + xorl 4(%esp),%ecx + andl %edi,%ebp + xorl 24(%esp),%ecx + roll $1,%ecx + addl %ebx,%ebp + rorl $2,%edi + movl %edx,%ebx + roll $5,%ebx + movl %ecx,36(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp + addl %ebx,%ecx + andl %eax,%ebp + movl 40(%esp),%ebx + addl %ebp,%ecx + + movl %edi,%ebp + xorl 48(%esp),%ebx + xorl %esi,%ebp + xorl 8(%esp),%ebx + andl %edx,%ebp + xorl 28(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,40(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 44(%esp),%eax + addl %ebp,%ebx + + movl %edx,%ebp + xorl 52(%esp),%eax + xorl %edi,%ebp + xorl 12(%esp),%eax + andl %ecx,%ebp + xorl 32(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,44(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl 48(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl 56(%esp),%esi + xorl %ecx,%ebp + xorl 16(%esp),%esi + xorl %edx,%ebp + xorl 36(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,48(%esp) + leal 3395469782(%esi,%edi,1),%esi + movl 52(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 60(%esp),%edi + xorl %ebx,%ebp + xorl 20(%esp),%edi + xorl %ecx,%ebp + xorl 40(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,52(%esp) + leal 3395469782(%edi,%edx,1),%edi + movl 56(%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl (%esp),%edx + xorl %eax,%ebp + xorl 24(%esp),%edx + xorl %ebx,%ebp + xorl 44(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,56(%esp) + leal 3395469782(%edx,%ecx,1),%edx + movl 60(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 4(%esp),%ecx + xorl %esi,%ebp + xorl 28(%esp),%ecx + xorl %eax,%ebp + xorl 48(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,60(%esp) + leal 3395469782(%ecx,%ebx,1),%ecx + movl (%esp),%ebx + addl %ebp,%ecx + + movl %edx,%ebp + xorl 8(%esp),%ebx + xorl %edi,%ebp + xorl 32(%esp),%ebx + xorl %esi,%ebp + xorl 52(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,(%esp) + leal 3395469782(%ebx,%eax,1),%ebx + movl 4(%esp),%eax + addl %ebp,%ebx + + movl %ecx,%ebp + xorl 12(%esp),%eax + xorl %edx,%ebp + xorl 36(%esp),%eax + xorl %edi,%ebp + xorl 56(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,4(%esp) + leal 3395469782(%eax,%esi,1),%eax + movl 8(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl 16(%esp),%esi + xorl %ecx,%ebp + xorl 40(%esp),%esi + xorl %edx,%ebp + xorl 60(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,8(%esp) + leal 3395469782(%esi,%edi,1),%esi + movl 12(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 20(%esp),%edi + xorl %ebx,%ebp + xorl 44(%esp),%edi + xorl %ecx,%ebp + xorl (%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,12(%esp) + leal 3395469782(%edi,%edx,1),%edi + movl 16(%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl 24(%esp),%edx + xorl %eax,%ebp + xorl 48(%esp),%edx + xorl %ebx,%ebp + xorl 4(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,16(%esp) + leal 3395469782(%edx,%ecx,1),%edx + movl 20(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 28(%esp),%ecx + xorl %esi,%ebp + xorl 52(%esp),%ecx + xorl %eax,%ebp + xorl 8(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,20(%esp) + leal 3395469782(%ecx,%ebx,1),%ecx + movl 24(%esp),%ebx + addl %ebp,%ecx + + movl %edx,%ebp + xorl 32(%esp),%ebx + xorl %edi,%ebp + xorl 56(%esp),%ebx + xorl %esi,%ebp + xorl 12(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,24(%esp) + leal 3395469782(%ebx,%eax,1),%ebx + movl 28(%esp),%eax + addl %ebp,%ebx + + movl %ecx,%ebp + xorl 36(%esp),%eax + xorl %edx,%ebp + xorl 60(%esp),%eax + xorl %edi,%ebp + xorl 16(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,28(%esp) + leal 3395469782(%eax,%esi,1),%eax + movl 32(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl 40(%esp),%esi + xorl %ecx,%ebp + xorl (%esp),%esi + xorl %edx,%ebp + xorl 20(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,32(%esp) + leal 3395469782(%esi,%edi,1),%esi + movl 36(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 44(%esp),%edi + xorl %ebx,%ebp + xorl 4(%esp),%edi + xorl %ecx,%ebp + xorl 24(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,36(%esp) + leal 3395469782(%edi,%edx,1),%edi + movl 40(%esp),%edx + addl %ebp,%edi + + movl %esi,%ebp + xorl 48(%esp),%edx + xorl %eax,%ebp + xorl 8(%esp),%edx + xorl %ebx,%ebp + xorl 28(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,40(%esp) + leal 3395469782(%edx,%ecx,1),%edx + movl 44(%esp),%ecx + addl %ebp,%edx + + movl %edi,%ebp + xorl 52(%esp),%ecx + xorl %esi,%ebp + xorl 12(%esp),%ecx + xorl %eax,%ebp + xorl 32(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,44(%esp) + leal 3395469782(%ecx,%ebx,1),%ecx + movl 48(%esp),%ebx + addl %ebp,%ecx + + movl %edx,%ebp + xorl 56(%esp),%ebx + xorl %edi,%ebp + xorl 16(%esp),%ebx + xorl %esi,%ebp + xorl 36(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,48(%esp) + leal 3395469782(%ebx,%eax,1),%ebx + movl 52(%esp),%eax + addl %ebp,%ebx + + movl %ecx,%ebp + xorl 60(%esp),%eax + xorl %edx,%ebp + xorl 20(%esp),%eax + xorl %edi,%ebp + xorl 40(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + leal 3395469782(%eax,%esi,1),%eax + movl 56(%esp),%esi + addl %ebp,%eax + + movl %ebx,%ebp + xorl (%esp),%esi + xorl %ecx,%ebp + xorl 24(%esp),%esi + xorl %edx,%ebp + xorl 44(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + leal 3395469782(%esi,%edi,1),%esi + movl 60(%esp),%edi + addl %ebp,%esi + + movl %eax,%ebp + xorl 4(%esp),%edi + xorl %ebx,%ebp + xorl 28(%esp),%edi + xorl %ecx,%ebp + xorl 48(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + leal 3395469782(%edi,%edx,1),%edi + addl %ebp,%edi + movl 96(%esp),%ebp + movl 100(%esp),%edx + addl (%ebp),%edi + addl 4(%ebp),%esi + addl 8(%ebp),%eax + addl 12(%ebp),%ebx + addl 16(%ebp),%ecx + movl %edi,(%ebp) + addl $64,%edx + movl %esi,4(%ebp) + cmpl 104(%esp),%edx + movl %eax,8(%ebp) + movl %ecx,%edi + movl %ebx,12(%ebp) + movl %edx,%esi + movl %ecx,16(%ebp) + jb .L000loop + addl $76,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size sha1_block_data_order,.-.L_sha1_block_data_order_begin +.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115 +.byte 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82 +.byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 +.byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/elf/sha1-ssse3-x86_64.s b/lib/accelerated/x86/elf/sha1-ssse3-x86_64.s new file mode 100644 index 0000000000..149edef796 --- /dev/null +++ b/lib/accelerated/x86/elf/sha1-ssse3-x86_64.s @@ -0,0 +1,2515 @@ +# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org> +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain copyright notices, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# * Neither the name of the Andy Polyakov nor the names of its +# copyright holder and contributors may be used to endorse or +# promote products derived from this software without specific +# prior written permission. +# +# ALTERNATIVELY, provided that this notice is retained in full, this +# product may be distributed under the terms of the GNU General Public +# License (GPL), in which case the provisions of the GPL apply INSTEAD OF +# those given above. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# *** This file is auto-generated *** +# +.text + + +.globl sha1_block_data_order +.type sha1_block_data_order,@function +.align 16 +sha1_block_data_order: + movl _gnutls_x86_cpuid_s+0(%rip),%r9d + movl _gnutls_x86_cpuid_s+4(%rip),%r8d + movl _gnutls_x86_cpuid_s+8(%rip),%r10d + testl $512,%r8d + jz .Lialu + jmp _ssse3_shortcut + +.align 16 +.Lialu: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + movq %rsp,%r11 + movq %rdi,%r8 + subq $72,%rsp + movq %rsi,%r9 + andq $-64,%rsp + movq %rdx,%r10 + movq %r11,64(%rsp) +.Lprologue: + + movl 0(%r8),%esi + movl 4(%r8),%edi + movl 8(%r8),%r11d + movl 12(%r8),%r12d + movl 16(%r8),%r13d + jmp .Lloop + +.align 16 +.Lloop: + movl 0(%r9),%edx + bswapl %edx + movl %edx,0(%rsp) + movl %r11d,%eax + movl 4(%r9),%ebp + movl %esi,%ecx + xorl %r12d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r13,1),%r13d + andl %edi,%eax + movl %ebp,4(%rsp) + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl %edi,%eax + movl 8(%r9),%edx + movl %r13d,%ecx + xorl %r11d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r12,1),%r12d + andl %esi,%eax + movl %edx,8(%rsp) + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl %esi,%eax + movl 12(%r9),%ebp + movl %r12d,%ecx + xorl %edi,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r11,1),%r11d + andl %r13d,%eax + movl %ebp,12(%rsp) + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl %r13d,%eax + movl 16(%r9),%edx + movl %r11d,%ecx + xorl %esi,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%rdi,1),%edi + andl %r12d,%eax + movl %edx,16(%rsp) + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl %r12d,%eax + movl 20(%r9),%ebp + movl %edi,%ecx + xorl %r13d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%rsi,1),%esi + andl %r11d,%eax + movl %ebp,20(%rsp) + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl %r11d,%eax + movl 24(%r9),%edx + movl %esi,%ecx + xorl %r12d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r13,1),%r13d + andl %edi,%eax + movl %edx,24(%rsp) + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl %edi,%eax + movl 28(%r9),%ebp + movl %r13d,%ecx + xorl %r11d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r12,1),%r12d + andl %esi,%eax + movl %ebp,28(%rsp) + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl %esi,%eax + movl 32(%r9),%edx + movl %r12d,%ecx + xorl %edi,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r11,1),%r11d + andl %r13d,%eax + movl %edx,32(%rsp) + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl %r13d,%eax + movl 36(%r9),%ebp + movl %r11d,%ecx + xorl %esi,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%rdi,1),%edi + andl %r12d,%eax + movl %ebp,36(%rsp) + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl %r12d,%eax + movl 40(%r9),%edx + movl %edi,%ecx + xorl %r13d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%rsi,1),%esi + andl %r11d,%eax + movl %edx,40(%rsp) + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl %r11d,%eax + movl 44(%r9),%ebp + movl %esi,%ecx + xorl %r12d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r13,1),%r13d + andl %edi,%eax + movl %ebp,44(%rsp) + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl %edi,%eax + movl 48(%r9),%edx + movl %r13d,%ecx + xorl %r11d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r12,1),%r12d + andl %esi,%eax + movl %edx,48(%rsp) + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl %esi,%eax + movl 52(%r9),%ebp + movl %r12d,%ecx + xorl %edi,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r11,1),%r11d + andl %r13d,%eax + movl %ebp,52(%rsp) + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl %r13d,%eax + movl 56(%r9),%edx + movl %r11d,%ecx + xorl %esi,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%rdi,1),%edi + andl %r12d,%eax + movl %edx,56(%rsp) + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl %r12d,%eax + movl 60(%r9),%ebp + movl %edi,%ecx + xorl %r13d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%rsi,1),%esi + andl %r11d,%eax + movl %ebp,60(%rsp) + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl 0(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 8(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 32(%rsp),%edx + andl %edi,%eax + leal 1518500249(%rbp,%r13,1),%r13d + xorl 52(%rsp),%edx + xorl %r12d,%eax + roll $1,%edx + addl %ecx,%r13d + roll $30,%edi + movl %edx,0(%rsp) + addl %eax,%r13d + movl 4(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 12(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 36(%rsp),%ebp + andl %esi,%eax + leal 1518500249(%rdx,%r12,1),%r12d + xorl 56(%rsp),%ebp + xorl %r11d,%eax + roll $1,%ebp + addl %ecx,%r12d + roll $30,%esi + movl %ebp,4(%rsp) + addl %eax,%r12d + movl 8(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 16(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 40(%rsp),%edx + andl %r13d,%eax + leal 1518500249(%rbp,%r11,1),%r11d + xorl 60(%rsp),%edx + xorl %edi,%eax + roll $1,%edx + addl %ecx,%r11d + roll $30,%r13d + movl %edx,8(%rsp) + addl %eax,%r11d + movl 12(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 20(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 44(%rsp),%ebp + andl %r12d,%eax + leal 1518500249(%rdx,%rdi,1),%edi + xorl 0(%rsp),%ebp + xorl %esi,%eax + roll $1,%ebp + addl %ecx,%edi + roll $30,%r12d + movl %ebp,12(%rsp) + addl %eax,%edi + movl 16(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 24(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + xorl 48(%rsp),%edx + andl %r11d,%eax + leal 1518500249(%rbp,%rsi,1),%esi + xorl 4(%rsp),%edx + xorl %r13d,%eax + roll $1,%edx + addl %ecx,%esi + roll $30,%r11d + movl %edx,16(%rsp) + addl %eax,%esi + movl 20(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 28(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r13,1),%r13d + xorl 52(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 8(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,20(%rsp) + movl 24(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 32(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r12,1),%r12d + xorl 56(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 12(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,24(%rsp) + movl 28(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 36(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r11,1),%r11d + xorl 60(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 16(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,28(%rsp) + movl 32(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 40(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rdi,1),%edi + xorl 0(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 20(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,32(%rsp) + movl 36(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 44(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rsi,1),%esi + xorl 4(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 24(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,36(%rsp) + movl 40(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 48(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r13,1),%r13d + xorl 8(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 28(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,40(%rsp) + movl 44(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 52(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r12,1),%r12d + xorl 12(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 32(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl %ebp,44(%rsp) + movl 48(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 56(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r11,1),%r11d + xorl 16(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 36(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl %edx,48(%rsp) + movl 52(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 60(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rdi,1),%edi + xorl 20(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 40(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %ebp,52(%rsp) + movl 56(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 0(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rsi,1),%esi + xorl 24(%rsp),%edx + xorl %r13d,%eax + addl %ecx,%esi + xorl 44(%rsp),%edx + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + movl %edx,56(%rsp) + movl 60(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 4(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r13,1),%r13d + xorl 28(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 48(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,60(%rsp) + movl 0(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 8(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r12,1),%r12d + xorl 32(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 52(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,0(%rsp) + movl 4(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 12(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r11,1),%r11d + xorl 36(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 56(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,4(%rsp) + movl 8(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 16(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rdi,1),%edi + xorl 40(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 60(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,8(%rsp) + movl 12(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 20(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rsi,1),%esi + xorl 44(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 0(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,12(%rsp) + movl 16(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 24(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r13,1),%r13d + xorl 48(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 4(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,16(%rsp) + movl 20(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 28(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r12,1),%r12d + xorl 52(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 8(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl %ebp,20(%rsp) + movl 24(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 32(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r11,1),%r11d + xorl 56(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 12(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl %edx,24(%rsp) + movl 28(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 36(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rdi,1),%edi + xorl 60(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 16(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %ebp,28(%rsp) + movl 32(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 40(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rsi,1),%esi + xorl 0(%rsp),%edx + xorl %r13d,%eax + addl %ecx,%esi + xorl 20(%rsp),%edx + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + movl %edx,32(%rsp) + movl 36(%rsp),%ebp + movl %r11d,%eax + movl %r11d,%ebx + xorl 44(%rsp),%ebp + andl %r12d,%eax + movl %esi,%ecx + xorl 4(%rsp),%ebp + xorl %r12d,%ebx + leal -1894007588(%rdx,%r13,1),%r13d + roll $5,%ecx + xorl 24(%rsp),%ebp + addl %eax,%r13d + andl %edi,%ebx + roll $1,%ebp + addl %ebx,%r13d + roll $30,%edi + movl %ebp,36(%rsp) + addl %ecx,%r13d + movl 40(%rsp),%edx + movl %edi,%eax + movl %edi,%ebx + xorl 48(%rsp),%edx + andl %r11d,%eax + movl %r13d,%ecx + xorl 8(%rsp),%edx + xorl %r11d,%ebx + leal -1894007588(%rbp,%r12,1),%r12d + roll $5,%ecx + xorl 28(%rsp),%edx + addl %eax,%r12d + andl %esi,%ebx + roll $1,%edx + addl %ebx,%r12d + roll $30,%esi + movl %edx,40(%rsp) + addl %ecx,%r12d + movl 44(%rsp),%ebp + movl %esi,%eax + movl %esi,%ebx + xorl 52(%rsp),%ebp + andl %edi,%eax + movl %r12d,%ecx + xorl 12(%rsp),%ebp + xorl %edi,%ebx + leal -1894007588(%rdx,%r11,1),%r11d + roll $5,%ecx + xorl 32(%rsp),%ebp + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%ebp + addl %ebx,%r11d + roll $30,%r13d + movl %ebp,44(%rsp) + addl %ecx,%r11d + movl 48(%rsp),%edx + movl %r13d,%eax + movl %r13d,%ebx + xorl 56(%rsp),%edx + andl %esi,%eax + movl %r11d,%ecx + xorl 16(%rsp),%edx + xorl %esi,%ebx + leal -1894007588(%rbp,%rdi,1),%edi + roll $5,%ecx + xorl 36(%rsp),%edx + addl %eax,%edi + andl %r12d,%ebx + roll $1,%edx + addl %ebx,%edi + roll $30,%r12d + movl %edx,48(%rsp) + addl %ecx,%edi + movl 52(%rsp),%ebp + movl %r12d,%eax + movl %r12d,%ebx + xorl 60(%rsp),%ebp + andl %r13d,%eax + movl %edi,%ecx + xorl 20(%rsp),%ebp + xorl %r13d,%ebx + leal -1894007588(%rdx,%rsi,1),%esi + roll $5,%ecx + xorl 40(%rsp),%ebp + addl %eax,%esi + andl %r11d,%ebx + roll $1,%ebp + addl %ebx,%esi + roll $30,%r11d + movl %ebp,52(%rsp) + addl %ecx,%esi + movl 56(%rsp),%edx + movl %r11d,%eax + movl %r11d,%ebx + xorl 0(%rsp),%edx + andl %r12d,%eax + movl %esi,%ecx + xorl 24(%rsp),%edx + xorl %r12d,%ebx + leal -1894007588(%rbp,%r13,1),%r13d + roll $5,%ecx + xorl 44(%rsp),%edx + addl %eax,%r13d + andl %edi,%ebx + roll $1,%edx + addl %ebx,%r13d + roll $30,%edi + movl %edx,56(%rsp) + addl %ecx,%r13d + movl 60(%rsp),%ebp + movl %edi,%eax + movl %edi,%ebx + xorl 4(%rsp),%ebp + andl %r11d,%eax + movl %r13d,%ecx + xorl 28(%rsp),%ebp + xorl %r11d,%ebx + leal -1894007588(%rdx,%r12,1),%r12d + roll $5,%ecx + xorl 48(%rsp),%ebp + addl %eax,%r12d + andl %esi,%ebx + roll $1,%ebp + addl %ebx,%r12d + roll $30,%esi + movl %ebp,60(%rsp) + addl %ecx,%r12d + movl 0(%rsp),%edx + movl %esi,%eax + movl %esi,%ebx + xorl 8(%rsp),%edx + andl %edi,%eax + movl %r12d,%ecx + xorl 32(%rsp),%edx + xorl %edi,%ebx + leal -1894007588(%rbp,%r11,1),%r11d + roll $5,%ecx + xorl 52(%rsp),%edx + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%edx + addl %ebx,%r11d + roll $30,%r13d + movl %edx,0(%rsp) + addl %ecx,%r11d + movl 4(%rsp),%ebp + movl %r13d,%eax + movl %r13d,%ebx + xorl 12(%rsp),%ebp + andl %esi,%eax + movl %r11d,%ecx + xorl 36(%rsp),%ebp + xorl %esi,%ebx + leal -1894007588(%rdx,%rdi,1),%edi + roll $5,%ecx + xorl 56(%rsp),%ebp + addl %eax,%edi + andl %r12d,%ebx + roll $1,%ebp + addl %ebx,%edi + roll $30,%r12d + movl %ebp,4(%rsp) + addl %ecx,%edi + movl 8(%rsp),%edx + movl %r12d,%eax + movl %r12d,%ebx + xorl 16(%rsp),%edx + andl %r13d,%eax + movl %edi,%ecx + xorl 40(%rsp),%edx + xorl %r13d,%ebx + leal -1894007588(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl 60(%rsp),%edx + addl %eax,%esi + andl %r11d,%ebx + roll $1,%edx + addl %ebx,%esi + roll $30,%r11d + movl %edx,8(%rsp) + addl %ecx,%esi + movl 12(%rsp),%ebp + movl %r11d,%eax + movl %r11d,%ebx + xorl 20(%rsp),%ebp + andl %r12d,%eax + movl %esi,%ecx + xorl 44(%rsp),%ebp + xorl %r12d,%ebx + leal -1894007588(%rdx,%r13,1),%r13d + roll $5,%ecx + xorl 0(%rsp),%ebp + addl %eax,%r13d + andl %edi,%ebx + roll $1,%ebp + addl %ebx,%r13d + roll $30,%edi + movl %ebp,12(%rsp) + addl %ecx,%r13d + movl 16(%rsp),%edx + movl %edi,%eax + movl %edi,%ebx + xorl 24(%rsp),%edx + andl %r11d,%eax + movl %r13d,%ecx + xorl 48(%rsp),%edx + xorl %r11d,%ebx + leal -1894007588(%rbp,%r12,1),%r12d + roll $5,%ecx + xorl 4(%rsp),%edx + addl %eax,%r12d + andl %esi,%ebx + roll $1,%edx + addl %ebx,%r12d + roll $30,%esi + movl %edx,16(%rsp) + addl %ecx,%r12d + movl 20(%rsp),%ebp + movl %esi,%eax + movl %esi,%ebx + xorl 28(%rsp),%ebp + andl %edi,%eax + movl %r12d,%ecx + xorl 52(%rsp),%ebp + xorl %edi,%ebx + leal -1894007588(%rdx,%r11,1),%r11d + roll $5,%ecx + xorl 8(%rsp),%ebp + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%ebp + addl %ebx,%r11d + roll $30,%r13d + movl %ebp,20(%rsp) + addl %ecx,%r11d + movl 24(%rsp),%edx + movl %r13d,%eax + movl %r13d,%ebx + xorl 32(%rsp),%edx + andl %esi,%eax + movl %r11d,%ecx + xorl 56(%rsp),%edx + xorl %esi,%ebx + leal -1894007588(%rbp,%rdi,1),%edi + roll $5,%ecx + xorl 12(%rsp),%edx + addl %eax,%edi + andl %r12d,%ebx + roll $1,%edx + addl %ebx,%edi + roll $30,%r12d + movl %edx,24(%rsp) + addl %ecx,%edi + movl 28(%rsp),%ebp + movl %r12d,%eax + movl %r12d,%ebx + xorl 36(%rsp),%ebp + andl %r13d,%eax + movl %edi,%ecx + xorl 60(%rsp),%ebp + xorl %r13d,%ebx + leal -1894007588(%rdx,%rsi,1),%esi + roll $5,%ecx + xorl 16(%rsp),%ebp + addl %eax,%esi + andl %r11d,%ebx + roll $1,%ebp + addl %ebx,%esi + roll $30,%r11d + movl %ebp,28(%rsp) + addl %ecx,%esi + movl 32(%rsp),%edx + movl %r11d,%eax + movl %r11d,%ebx + xorl 40(%rsp),%edx + andl %r12d,%eax + movl %esi,%ecx + xorl 0(%rsp),%edx + xorl %r12d,%ebx + leal -1894007588(%rbp,%r13,1),%r13d + roll $5,%ecx + xorl 20(%rsp),%edx + addl %eax,%r13d + andl %edi,%ebx + roll $1,%edx + addl %ebx,%r13d + roll $30,%edi + movl %edx,32(%rsp) + addl %ecx,%r13d + movl 36(%rsp),%ebp + movl %edi,%eax + movl %edi,%ebx + xorl 44(%rsp),%ebp + andl %r11d,%eax + movl %r13d,%ecx + xorl 4(%rsp),%ebp + xorl %r11d,%ebx + leal -1894007588(%rdx,%r12,1),%r12d + roll $5,%ecx + xorl 24(%rsp),%ebp + addl %eax,%r12d + andl %esi,%ebx + roll $1,%ebp + addl %ebx,%r12d + roll $30,%esi + movl %ebp,36(%rsp) + addl %ecx,%r12d + movl 40(%rsp),%edx + movl %esi,%eax + movl %esi,%ebx + xorl 48(%rsp),%edx + andl %edi,%eax + movl %r12d,%ecx + xorl 8(%rsp),%edx + xorl %edi,%ebx + leal -1894007588(%rbp,%r11,1),%r11d + roll $5,%ecx + xorl 28(%rsp),%edx + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%edx + addl %ebx,%r11d + roll $30,%r13d + movl %edx,40(%rsp) + addl %ecx,%r11d + movl 44(%rsp),%ebp + movl %r13d,%eax + movl %r13d,%ebx + xorl 52(%rsp),%ebp + andl %esi,%eax + movl %r11d,%ecx + xorl 12(%rsp),%ebp + xorl %esi,%ebx + leal -1894007588(%rdx,%rdi,1),%edi + roll $5,%ecx + xorl 32(%rsp),%ebp + addl %eax,%edi + andl %r12d,%ebx + roll $1,%ebp + addl %ebx,%edi + roll $30,%r12d + movl %ebp,44(%rsp) + addl %ecx,%edi + movl 48(%rsp),%edx + movl %r12d,%eax + movl %r12d,%ebx + xorl 56(%rsp),%edx + andl %r13d,%eax + movl %edi,%ecx + xorl 16(%rsp),%edx + xorl %r13d,%ebx + leal -1894007588(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl 36(%rsp),%edx + addl %eax,%esi + andl %r11d,%ebx + roll $1,%edx + addl %ebx,%esi + roll $30,%r11d + movl %edx,48(%rsp) + addl %ecx,%esi + movl 52(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 60(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r13,1),%r13d + xorl 20(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 40(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,52(%rsp) + movl 56(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 0(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r12,1),%r12d + xorl 24(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 44(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,56(%rsp) + movl 60(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 4(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rdx,%r11,1),%r11d + xorl 28(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 48(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,60(%rsp) + movl 0(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 8(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rbp,%rdi,1),%edi + xorl 32(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 52(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,0(%rsp) + movl 4(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 12(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rsi,1),%esi + xorl 36(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 56(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,4(%rsp) + movl 8(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 16(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r13,1),%r13d + xorl 40(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 60(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,8(%rsp) + movl 12(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 20(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r12,1),%r12d + xorl 44(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 0(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl %ebp,12(%rsp) + movl 16(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 24(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rbp,%r11,1),%r11d + xorl 48(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 4(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl %edx,16(%rsp) + movl 20(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 28(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rdi,1),%edi + xorl 52(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 8(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %ebp,20(%rsp) + movl 24(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 32(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + leal -899497514(%rbp,%rsi,1),%esi + xorl 56(%rsp),%edx + xorl %r13d,%eax + addl %ecx,%esi + xorl 12(%rsp),%edx + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + movl %edx,24(%rsp) + movl 28(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 36(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r13,1),%r13d + xorl 60(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 16(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,28(%rsp) + movl 32(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 40(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r12,1),%r12d + xorl 0(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 20(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,32(%rsp) + movl 36(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 44(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rdx,%r11,1),%r11d + xorl 4(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 24(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,36(%rsp) + movl 40(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 48(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rbp,%rdi,1),%edi + xorl 8(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 28(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,40(%rsp) + movl 44(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 52(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rsi,1),%esi + xorl 12(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 32(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,44(%rsp) + movl 48(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 56(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r13,1),%r13d + xorl 16(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 36(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,48(%rsp) + movl 52(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 60(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r12,1),%r12d + xorl 20(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 40(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl 56(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 0(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rbp,%r11,1),%r11d + xorl 24(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 44(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl 60(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 4(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rdi,1),%edi + xorl 28(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 48(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl %r11d,%eax + leal -899497514(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl %r13d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + addl 0(%r8),%esi + addl 4(%r8),%edi + addl 8(%r8),%r11d + addl 12(%r8),%r12d + addl 16(%r8),%r13d + movl %esi,0(%r8) + movl %edi,4(%r8) + movl %r11d,8(%r8) + movl %r12d,12(%r8) + movl %r13d,16(%r8) + + subq $1,%r10 + leaq 64(%r9),%r9 + jnz .Lloop + + movq 64(%rsp),%rsi + movq (%rsi),%r13 + movq 8(%rsi),%r12 + movq 16(%rsi),%rbp + movq 24(%rsi),%rbx + leaq 32(%rsi),%rsp +.Lepilogue: + .byte 0xf3,0xc3 +.size sha1_block_data_order,.-sha1_block_data_order +.type sha1_block_data_order_ssse3,@function +.align 16 +sha1_block_data_order_ssse3: +_ssse3_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + leaq -64(%rsp),%rsp + movq %rdi,%r8 + movq %rsi,%r9 + movq %rdx,%r10 + + shlq $6,%r10 + addq %r9,%r10 + leaq K_XX_XX+64(%rip),%r11 + + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movl %ebx,%esi + movl 16(%r8),%ebp + movl %ecx,%edi + xorl %edx,%edi + andl %edi,%esi + + movdqa 64(%r11),%xmm6 + movdqa -64(%r11),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r9 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 +.byte 102,15,56,0,222 + paddd %xmm9,%xmm0 + paddd %xmm9,%xmm1 + paddd %xmm9,%xmm2 + movdqa %xmm0,0(%rsp) + psubd %xmm9,%xmm0 + movdqa %xmm1,16(%rsp) + psubd %xmm9,%xmm1 + movdqa %xmm2,32(%rsp) + psubd %xmm9,%xmm2 + jmp .Loop_ssse3 +.align 16 +.Loop_ssse3: + movdqa %xmm1,%xmm4 + rorl $2,%ebx + xorl %edx,%esi + movdqa %xmm3,%xmm8 +.byte 102,15,58,15,224,8 + movl %eax,%edi + addl 0(%rsp),%ebp + paddd %xmm3,%xmm9 + xorl %ecx,%ebx + roll $5,%eax + psrldq $4,%xmm8 + addl %esi,%ebp + andl %ebx,%edi + pxor %xmm0,%xmm4 + xorl %ecx,%ebx + addl %eax,%ebp + pxor %xmm2,%xmm8 + rorl $7,%eax + xorl %ecx,%edi + movl %ebp,%esi + addl 4(%rsp),%edx + pxor %xmm8,%xmm4 + xorl %ebx,%eax + roll $5,%ebp + movdqa %xmm9,48(%rsp) + addl %edi,%edx + andl %eax,%esi + movdqa %xmm4,%xmm10 + movdqa %xmm4,%xmm8 + xorl %ebx,%eax + addl %ebp,%edx + rorl $7,%ebp + xorl %ebx,%esi + pslldq $12,%xmm10 + paddd %xmm4,%xmm4 + movl %edx,%edi + addl 8(%rsp),%ecx + xorl %eax,%ebp + roll $5,%edx + psrld $31,%xmm8 + addl %esi,%ecx + andl %ebp,%edi + movdqa %xmm10,%xmm9 + xorl %eax,%ebp + addl %edx,%ecx + psrld $30,%xmm10 + por %xmm8,%xmm4 + rorl $7,%edx + xorl %eax,%edi + movl %ecx,%esi + addl 12(%rsp),%ebx + pslld $2,%xmm9 + pxor %xmm10,%xmm4 + xorl %ebp,%edx + roll $5,%ecx + movdqa -64(%r11),%xmm10 + addl %edi,%ebx + andl %edx,%esi + pxor %xmm9,%xmm4 + xorl %ebp,%edx + addl %ecx,%ebx + movdqa %xmm2,%xmm5 + rorl $7,%ecx + xorl %ebp,%esi + movdqa %xmm4,%xmm9 +.byte 102,15,58,15,233,8 + movl %ebx,%edi + addl 16(%rsp),%eax + paddd %xmm4,%xmm10 + xorl %edx,%ecx + roll $5,%ebx + psrldq $4,%xmm9 + addl %esi,%eax + andl %ecx,%edi + pxor %xmm1,%xmm5 + xorl %edx,%ecx + addl %ebx,%eax + pxor %xmm3,%xmm9 + rorl $7,%ebx + xorl %edx,%edi + movl %eax,%esi + addl 20(%rsp),%ebp + pxor %xmm9,%xmm5 + xorl %ecx,%ebx + roll $5,%eax + movdqa %xmm10,0(%rsp) + addl %edi,%ebp + andl %ebx,%esi + movdqa %xmm5,%xmm8 + movdqa %xmm5,%xmm9 + xorl %ecx,%ebx + addl %eax,%ebp + rorl $7,%eax + xorl %ecx,%esi + pslldq $12,%xmm8 + paddd %xmm5,%xmm5 + movl %ebp,%edi + addl 24(%rsp),%edx + xorl %ebx,%eax + roll $5,%ebp + psrld $31,%xmm9 + addl %esi,%edx + andl %eax,%edi + movdqa %xmm8,%xmm10 + xorl %ebx,%eax + addl %ebp,%edx + psrld $30,%xmm8 + por %xmm9,%xmm5 + rorl $7,%ebp + xorl %ebx,%edi + movl %edx,%esi + addl 28(%rsp),%ecx + pslld $2,%xmm10 + pxor %xmm8,%xmm5 + xorl %eax,%ebp + roll $5,%edx + movdqa -32(%r11),%xmm8 + addl %edi,%ecx + andl %ebp,%esi + pxor %xmm10,%xmm5 + xorl %eax,%ebp + addl %edx,%ecx + movdqa %xmm3,%xmm6 + rorl $7,%edx + xorl %eax,%esi + movdqa %xmm5,%xmm10 +.byte 102,15,58,15,242,8 + movl %ecx,%edi + addl 32(%rsp),%ebx + paddd %xmm5,%xmm8 + xorl %ebp,%edx + roll $5,%ecx + psrldq $4,%xmm10 + addl %esi,%ebx + andl %edx,%edi + pxor %xmm2,%xmm6 + xorl %ebp,%edx + addl %ecx,%ebx + pxor %xmm4,%xmm10 + rorl $7,%ecx + xorl %ebp,%edi + movl %ebx,%esi + addl 36(%rsp),%eax + pxor %xmm10,%xmm6 + xorl %edx,%ecx + roll $5,%ebx + movdqa %xmm8,16(%rsp) + addl %edi,%eax + andl %ecx,%esi + movdqa %xmm6,%xmm9 + movdqa %xmm6,%xmm10 + xorl %edx,%ecx + addl %ebx,%eax + rorl $7,%ebx + xorl %edx,%esi + pslldq $12,%xmm9 + paddd %xmm6,%xmm6 + movl %eax,%edi + addl 40(%rsp),%ebp + xorl %ecx,%ebx + roll $5,%eax + psrld $31,%xmm10 + addl %esi,%ebp + andl %ebx,%edi + movdqa %xmm9,%xmm8 + xorl %ecx,%ebx + addl %eax,%ebp + psrld $30,%xmm9 + por %xmm10,%xmm6 + rorl $7,%eax + xorl %ecx,%edi + movl %ebp,%esi + addl 44(%rsp),%edx + pslld $2,%xmm8 + pxor %xmm9,%xmm6 + xorl %ebx,%eax + roll $5,%ebp + movdqa -32(%r11),%xmm9 + addl %edi,%edx + andl %eax,%esi + pxor %xmm8,%xmm6 + xorl %ebx,%eax + addl %ebp,%edx + movdqa %xmm4,%xmm7 + rorl $7,%ebp + xorl %ebx,%esi + movdqa %xmm6,%xmm8 +.byte 102,15,58,15,251,8 + movl %edx,%edi + addl 48(%rsp),%ecx + paddd %xmm6,%xmm9 + xorl %eax,%ebp + roll $5,%edx + psrldq $4,%xmm8 + addl %esi,%ecx + andl %ebp,%edi + pxor %xmm3,%xmm7 + xorl %eax,%ebp + addl %edx,%ecx + pxor %xmm5,%xmm8 + rorl $7,%edx + xorl %eax,%edi + movl %ecx,%esi + addl 52(%rsp),%ebx + pxor %xmm8,%xmm7 + xorl %ebp,%edx + roll $5,%ecx + movdqa %xmm9,32(%rsp) + addl %edi,%ebx + andl %edx,%esi + movdqa %xmm7,%xmm10 + movdqa %xmm7,%xmm8 + xorl %ebp,%edx + addl %ecx,%ebx + rorl $7,%ecx + xorl %ebp,%esi + pslldq $12,%xmm10 + paddd %xmm7,%xmm7 + movl %ebx,%edi + addl 56(%rsp),%eax + xorl %edx,%ecx + roll $5,%ebx + psrld $31,%xmm8 + addl %esi,%eax + andl %ecx,%edi + movdqa %xmm10,%xmm9 + xorl %edx,%ecx + addl %ebx,%eax + psrld $30,%xmm10 + por %xmm8,%xmm7 + rorl $7,%ebx + xorl %edx,%edi + movl %eax,%esi + addl 60(%rsp),%ebp + pslld $2,%xmm9 + pxor %xmm10,%xmm7 + xorl %ecx,%ebx + roll $5,%eax + movdqa -32(%r11),%xmm10 + addl %edi,%ebp + andl %ebx,%esi + pxor %xmm9,%xmm7 + xorl %ecx,%ebx + addl %eax,%ebp + movdqa %xmm7,%xmm9 + rorl $7,%eax + pxor %xmm4,%xmm0 +.byte 102,68,15,58,15,206,8 + xorl %ecx,%esi + movl %ebp,%edi + addl 0(%rsp),%edx + pxor %xmm1,%xmm0 + xorl %ebx,%eax + roll $5,%ebp + movdqa %xmm10,%xmm8 + paddd %xmm7,%xmm10 + addl %esi,%edx + andl %eax,%edi + pxor %xmm9,%xmm0 + xorl %ebx,%eax + addl %ebp,%edx + rorl $7,%ebp + xorl %ebx,%edi + movdqa %xmm0,%xmm9 + movdqa %xmm10,48(%rsp) + movl %edx,%esi + addl 4(%rsp),%ecx + xorl %eax,%ebp + roll $5,%edx + pslld $2,%xmm0 + addl %edi,%ecx + andl %ebp,%esi + psrld $30,%xmm9 + xorl %eax,%ebp + addl %edx,%ecx + rorl $7,%edx + xorl %eax,%esi + movl %ecx,%edi + addl 8(%rsp),%ebx + por %xmm9,%xmm0 + xorl %ebp,%edx + roll $5,%ecx + movdqa %xmm0,%xmm10 + addl %esi,%ebx + andl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 16(%rsp),%ebp + pxor %xmm5,%xmm1 +.byte 102,68,15,58,15,215,8 + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + pxor %xmm2,%xmm1 + addl %esi,%ebp + xorl %ecx,%edi + movdqa %xmm8,%xmm9 + paddd %xmm0,%xmm8 + rorl $7,%ebx + addl %eax,%ebp + pxor %xmm10,%xmm1 + addl 20(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm1,%xmm10 + movdqa %xmm8,0(%rsp) + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + pslld $2,%xmm1 + addl 24(%rsp),%ecx + xorl %eax,%esi + psrld $30,%xmm10 + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + addl %edx,%ecx + por %xmm10,%xmm1 + addl 28(%rsp),%ebx + xorl %ebp,%edi + movdqa %xmm1,%xmm8 + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 32(%rsp),%eax + pxor %xmm6,%xmm2 +.byte 102,68,15,58,15,192,8 + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + pxor %xmm3,%xmm2 + addl %esi,%eax + xorl %edx,%edi + movdqa 0(%r11),%xmm10 + paddd %xmm1,%xmm9 + rorl $7,%ecx + addl %ebx,%eax + pxor %xmm8,%xmm2 + addl 36(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + movdqa %xmm2,%xmm8 + movdqa %xmm9,16(%rsp) + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + pslld $2,%xmm2 + addl 40(%rsp),%edx + xorl %ebx,%esi + psrld $30,%xmm8 + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + por %xmm8,%xmm2 + addl 44(%rsp),%ecx + xorl %eax,%edi + movdqa %xmm2,%xmm9 + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + addl 48(%rsp),%ebx + pxor %xmm7,%xmm3 +.byte 102,68,15,58,15,201,8 + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + pxor %xmm4,%xmm3 + addl %esi,%ebx + xorl %ebp,%edi + movdqa %xmm10,%xmm8 + paddd %xmm2,%xmm10 + rorl $7,%edx + addl %ecx,%ebx + pxor %xmm9,%xmm3 + addl 52(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm3,%xmm9 + movdqa %xmm10,32(%rsp) + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + pslld $2,%xmm3 + addl 56(%rsp),%ebp + xorl %ecx,%esi + psrld $30,%xmm9 + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + por %xmm9,%xmm3 + addl 60(%rsp),%edx + xorl %ebx,%edi + movdqa %xmm3,%xmm10 + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 0(%rsp),%ecx + pxor %xmm0,%xmm4 +.byte 102,68,15,58,15,210,8 + xorl %eax,%esi + movl %edx,%edi + roll $5,%edx + pxor %xmm5,%xmm4 + addl %esi,%ecx + xorl %eax,%edi + movdqa %xmm8,%xmm9 + paddd %xmm3,%xmm8 + rorl $7,%ebp + addl %edx,%ecx + pxor %xmm10,%xmm4 + addl 4(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm4,%xmm10 + movdqa %xmm8,48(%rsp) + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + pslld $2,%xmm4 + addl 8(%rsp),%eax + xorl %edx,%esi + psrld $30,%xmm10 + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + por %xmm10,%xmm4 + addl 12(%rsp),%ebp + xorl %ecx,%edi + movdqa %xmm4,%xmm8 + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 16(%rsp),%edx + pxor %xmm1,%xmm5 +.byte 102,68,15,58,15,195,8 + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + pxor %xmm6,%xmm5 + addl %esi,%edx + xorl %ebx,%edi + movdqa %xmm9,%xmm10 + paddd %xmm4,%xmm9 + rorl $7,%eax + addl %ebp,%edx + pxor %xmm8,%xmm5 + addl 20(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + movdqa %xmm5,%xmm8 + movdqa %xmm9,0(%rsp) + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + pslld $2,%xmm5 + addl 24(%rsp),%ebx + xorl %ebp,%esi + psrld $30,%xmm8 + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + por %xmm8,%xmm5 + addl 28(%rsp),%eax + movdqa %xmm5,%xmm9 + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%edi + roll $5,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + addl 32(%rsp),%ebp + pxor %xmm2,%xmm6 +.byte 102,68,15,58,15,204,8 + andl %ecx,%esi + xorl %edx,%ecx + rorl $7,%ebx + pxor %xmm7,%xmm6 + movl %eax,%edi + xorl %ecx,%esi + movdqa %xmm10,%xmm8 + paddd %xmm5,%xmm10 + roll $5,%eax + addl %esi,%ebp + pxor %xmm9,%xmm6 + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 36(%rsp),%edx + movdqa %xmm6,%xmm9 + movdqa %xmm10,16(%rsp) + andl %ebx,%edi + xorl %ecx,%ebx + rorl $7,%eax + movl %ebp,%esi + pslld $2,%xmm6 + xorl %ebx,%edi + roll $5,%ebp + psrld $30,%xmm9 + addl %edi,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + addl 40(%rsp),%ecx + andl %eax,%esi + por %xmm9,%xmm6 + xorl %ebx,%eax + rorl $7,%ebp + movdqa %xmm6,%xmm10 + movl %edx,%edi + xorl %eax,%esi + roll $5,%edx + addl %esi,%ecx + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 44(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + rorl $7,%edx + movl %ecx,%esi + xorl %ebp,%edi + roll $5,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + addl 48(%rsp),%eax + pxor %xmm3,%xmm7 +.byte 102,68,15,58,15,213,8 + andl %edx,%esi + xorl %ebp,%edx + rorl $7,%ecx + pxor %xmm0,%xmm7 + movl %ebx,%edi + xorl %edx,%esi + movdqa 32(%r11),%xmm9 + paddd %xmm6,%xmm8 + roll $5,%ebx + addl %esi,%eax + pxor %xmm10,%xmm7 + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 52(%rsp),%ebp + movdqa %xmm7,%xmm10 + movdqa %xmm8,32(%rsp) + andl %ecx,%edi + xorl %edx,%ecx + rorl $7,%ebx + movl %eax,%esi + pslld $2,%xmm7 + xorl %ecx,%edi + roll $5,%eax + psrld $30,%xmm10 + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + addl 56(%rsp),%edx + andl %ebx,%esi + por %xmm10,%xmm7 + xorl %ecx,%ebx + rorl $7,%eax + movdqa %xmm7,%xmm8 + movl %ebp,%edi + xorl %ebx,%esi + roll $5,%ebp + addl %esi,%edx + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 60(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + rorl $7,%ebp + movl %edx,%esi + xorl %eax,%edi + roll $5,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + addl 0(%rsp),%ebx + pxor %xmm4,%xmm0 +.byte 102,68,15,58,15,198,8 + andl %ebp,%esi + xorl %eax,%ebp + rorl $7,%edx + pxor %xmm1,%xmm0 + movl %ecx,%edi + xorl %ebp,%esi + movdqa %xmm9,%xmm10 + paddd %xmm7,%xmm9 + roll $5,%ecx + addl %esi,%ebx + pxor %xmm8,%xmm0 + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 4(%rsp),%eax + movdqa %xmm0,%xmm8 + movdqa %xmm9,48(%rsp) + andl %edx,%edi + xorl %ebp,%edx + rorl $7,%ecx + movl %ebx,%esi + pslld $2,%xmm0 + xorl %edx,%edi + roll $5,%ebx + psrld $30,%xmm8 + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + addl 8(%rsp),%ebp + andl %ecx,%esi + por %xmm8,%xmm0 + xorl %edx,%ecx + rorl $7,%ebx + movdqa %xmm0,%xmm9 + movl %eax,%edi + xorl %ecx,%esi + roll $5,%eax + addl %esi,%ebp + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 12(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + rorl $7,%eax + movl %ebp,%esi + xorl %ebx,%edi + roll $5,%ebp + addl %edi,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + addl 16(%rsp),%ecx + pxor %xmm5,%xmm1 +.byte 102,68,15,58,15,207,8 + andl %eax,%esi + xorl %ebx,%eax + rorl $7,%ebp + pxor %xmm2,%xmm1 + movl %edx,%edi + xorl %eax,%esi + movdqa %xmm10,%xmm8 + paddd %xmm0,%xmm10 + roll $5,%edx + addl %esi,%ecx + pxor %xmm9,%xmm1 + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 20(%rsp),%ebx + movdqa %xmm1,%xmm9 + movdqa %xmm10,0(%rsp) + andl %ebp,%edi + xorl %eax,%ebp + rorl $7,%edx + movl %ecx,%esi + pslld $2,%xmm1 + xorl %ebp,%edi + roll $5,%ecx + psrld $30,%xmm9 + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + addl 24(%rsp),%eax + andl %edx,%esi + por %xmm9,%xmm1 + xorl %ebp,%edx + rorl $7,%ecx + movdqa %xmm1,%xmm10 + movl %ebx,%edi + xorl %edx,%esi + roll $5,%ebx + addl %esi,%eax + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 28(%rsp),%ebp + andl %ecx,%edi + xorl %edx,%ecx + rorl $7,%ebx + movl %eax,%esi + xorl %ecx,%edi + roll $5,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + addl 32(%rsp),%edx + pxor %xmm6,%xmm2 +.byte 102,68,15,58,15,208,8 + andl %ebx,%esi + xorl %ecx,%ebx + rorl $7,%eax + pxor %xmm3,%xmm2 + movl %ebp,%edi + xorl %ebx,%esi + movdqa %xmm8,%xmm9 + paddd %xmm1,%xmm8 + roll $5,%ebp + addl %esi,%edx + pxor %xmm10,%xmm2 + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 36(%rsp),%ecx + movdqa %xmm2,%xmm10 + movdqa %xmm8,16(%rsp) + andl %eax,%edi + xorl %ebx,%eax + rorl $7,%ebp + movl %edx,%esi + pslld $2,%xmm2 + xorl %eax,%edi + roll $5,%edx + psrld $30,%xmm10 + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + addl 40(%rsp),%ebx + andl %ebp,%esi + por %xmm10,%xmm2 + xorl %eax,%ebp + rorl $7,%edx + movdqa %xmm2,%xmm8 + movl %ecx,%edi + xorl %ebp,%esi + roll $5,%ecx + addl %esi,%ebx + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 44(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%edi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + addl %ebx,%eax + addl 48(%rsp),%ebp + pxor %xmm7,%xmm3 +.byte 102,68,15,58,15,193,8 + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + pxor %xmm4,%xmm3 + addl %esi,%ebp + xorl %ecx,%edi + movdqa %xmm9,%xmm10 + paddd %xmm2,%xmm9 + rorl $7,%ebx + addl %eax,%ebp + pxor %xmm8,%xmm3 + addl 52(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm3,%xmm8 + movdqa %xmm9,32(%rsp) + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + pslld $2,%xmm3 + addl 56(%rsp),%ecx + xorl %eax,%esi + psrld $30,%xmm8 + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + addl %edx,%ecx + por %xmm8,%xmm3 + addl 60(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 0(%rsp),%eax + paddd %xmm3,%xmm10 + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + movdqa %xmm10,48(%rsp) + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 4(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 8(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 12(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + cmpq %r10,%r9 + je .Ldone_ssse3 + movdqa 64(%r11),%xmm6 + movdqa -64(%r11),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r9 + addl 16(%rsp),%ebx + xorl %ebp,%esi +.byte 102,15,56,0,206 + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + paddd %xmm9,%xmm0 + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 20(%rsp),%eax + movdqa %xmm0,0(%rsp) + xorl %edx,%edi + movl %ebx,%esi + psubd %xmm9,%xmm0 + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi +.byte 102,15,56,0,214 + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + paddd %xmm9,%xmm1 + xorl %eax,%edi + rorl $7,%ebp + addl %edx,%ecx + addl 36(%rsp),%ebx + movdqa %xmm1,16(%rsp) + xorl %ebp,%edi + movl %ecx,%esi + psubd %xmm9,%xmm1 + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi +.byte 102,15,56,0,222 + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + paddd %xmm9,%xmm2 + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 52(%rsp),%ecx + movdqa %xmm2,32(%rsp) + xorl %eax,%edi + movl %edx,%esi + psubd %xmm9,%xmm2 + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + addl 12(%r8),%edx + movl %eax,0(%r8) + addl 16(%r8),%ebp + movl %esi,4(%r8) + movl %esi,%ebx + movl %ecx,8(%r8) + movl %ecx,%edi + movl %edx,12(%r8) + xorl %edx,%edi + movl %ebp,16(%r8) + andl %edi,%esi + jmp .Loop_ssse3 + +.align 16 +.Ldone_ssse3: + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + movl %eax,0(%r8) + addl 12(%r8),%edx + movl %esi,4(%r8) + addl 16(%r8),%ebp + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + leaq 64(%rsp),%rsi + movq 0(%rsi),%r12 + movq 8(%rsi),%rbp + movq 16(%rsi),%rbx + leaq 24(%rsi),%rsp +.Lepilogue_ssse3: + .byte 0xf3,0xc3 +.size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3 +.align 64 +K_XX_XX: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 64 + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/elf/sha256-avx-x86_64.s b/lib/accelerated/x86/elf/sha256-avx-x86_64.s new file mode 100644 index 0000000000..bb9236ea84 --- /dev/null +++ b/lib/accelerated/x86/elf/sha256-avx-x86_64.s @@ -0,0 +1,2614 @@ +# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org> +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain copyright notices, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# * Neither the name of the Andy Polyakov nor the names of its +# copyright holder and contributors may be used to endorse or +# promote products derived from this software without specific +# prior written permission. +# +# ALTERNATIVELY, provided that this notice is retained in full, this +# product may be distributed under the terms of the GNU General Public +# License (GPL), in which case the provisions of the GPL apply INSTEAD OF +# those given above. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# *** This file is auto-generated *** +# +.text + + + +.globl sha256_multi_block +.type sha256_multi_block,@function +.align 32 +sha256_multi_block: + movq %rsp,%rax + pushq %rbx + pushq %rbp + subq $288,%rsp + andq $-256,%rsp + movq %rax,272(%rsp) + leaq K256+128(%rip),%rbp + leaq 256(%rsp),%rbx + leaq 128(%rdi),%rdi + +.Loop_grande: + movl %edx,280(%rsp) + xorl %edx,%edx + movq 0(%rsi),%r8 + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rbp,%r8 + movq 16(%rsi),%r9 + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rbp,%r9 + movq 32(%rsi),%r10 + movl 40(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,8(%rbx) + cmovleq %rbp,%r10 + movq 48(%rsi),%r11 + movl 56(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,12(%rbx) + cmovleq %rbp,%r11 + testl %edx,%edx + jz .Ldone + + movdqu 0-128(%rdi),%xmm8 + leaq 128(%rsp),%rax + movdqu 32-128(%rdi),%xmm9 + movdqu 64-128(%rdi),%xmm10 + movdqu 96-128(%rdi),%xmm11 + movdqu 128-128(%rdi),%xmm12 + movdqu 160-128(%rdi),%xmm13 + movdqu 192-128(%rdi),%xmm14 + movdqu 224-128(%rdi),%xmm15 + movdqu .Lpbswap(%rip),%xmm6 + jmp .Loop + +.align 32 +.Loop: + movdqa %xmm10,%xmm4 + pxor %xmm9,%xmm4 + movd 0(%r8),%xmm5 + movd 0(%r9),%xmm0 + movd 0(%r10),%xmm1 + movd 0(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm12,%xmm7 + movdqa %xmm12,%xmm2 + psrld $6,%xmm7 + movdqa %xmm12,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,0-128(%rax) + paddd %xmm15,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -128(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm12,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm14,%xmm0 + pand %xmm13,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm8,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm9,%xmm3 + movdqa %xmm8,%xmm7 + pslld $10,%xmm2 + pxor %xmm8,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm9,%xmm15 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm15 + paddd %xmm5,%xmm11 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm15 + paddd %xmm7,%xmm15 + movd 4(%r8),%xmm5 + movd 4(%r9),%xmm0 + movd 4(%r10),%xmm1 + movd 4(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm11,%xmm7 + movdqa %xmm11,%xmm2 + psrld $6,%xmm7 + movdqa %xmm11,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,16-128(%rax) + paddd %xmm14,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -96(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm11,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm13,%xmm0 + pand %xmm12,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm15,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm15,%xmm7 + pslld $10,%xmm2 + pxor %xmm15,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm8,%xmm14 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm14 + paddd %xmm5,%xmm10 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm14 + paddd %xmm7,%xmm14 + movd 8(%r8),%xmm5 + movd 8(%r9),%xmm0 + movd 8(%r10),%xmm1 + movd 8(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm10,%xmm7 + movdqa %xmm10,%xmm2 + psrld $6,%xmm7 + movdqa %xmm10,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,32-128(%rax) + paddd %xmm13,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm10,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm12,%xmm0 + pand %xmm11,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm14,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm15,%xmm3 + movdqa %xmm14,%xmm7 + pslld $10,%xmm2 + pxor %xmm14,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm15,%xmm13 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm13 + paddd %xmm5,%xmm9 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm13 + paddd %xmm7,%xmm13 + movd 12(%r8),%xmm5 + movd 12(%r9),%xmm0 + movd 12(%r10),%xmm1 + movd 12(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm9,%xmm7 + movdqa %xmm9,%xmm2 + psrld $6,%xmm7 + movdqa %xmm9,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,48-128(%rax) + paddd %xmm12,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -32(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm9,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm11,%xmm0 + pand %xmm10,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm13,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm14,%xmm4 + movdqa %xmm13,%xmm7 + pslld $10,%xmm2 + pxor %xmm13,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm14,%xmm12 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm12 + paddd %xmm5,%xmm8 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm12 + paddd %xmm7,%xmm12 + movd 16(%r8),%xmm5 + movd 16(%r9),%xmm0 + movd 16(%r10),%xmm1 + movd 16(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm8,%xmm7 + movdqa %xmm8,%xmm2 + psrld $6,%xmm7 + movdqa %xmm8,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,64-128(%rax) + paddd %xmm11,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 0(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm8,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm10,%xmm0 + pand %xmm9,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm12,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm13,%xmm3 + movdqa %xmm12,%xmm7 + pslld $10,%xmm2 + pxor %xmm12,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm13,%xmm11 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm11 + paddd %xmm5,%xmm15 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm11 + paddd %xmm7,%xmm11 + movd 20(%r8),%xmm5 + movd 20(%r9),%xmm0 + movd 20(%r10),%xmm1 + movd 20(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm15,%xmm7 + movdqa %xmm15,%xmm2 + psrld $6,%xmm7 + movdqa %xmm15,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,80-128(%rax) + paddd %xmm10,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 32(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm15,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm9,%xmm0 + pand %xmm8,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm11,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm12,%xmm4 + movdqa %xmm11,%xmm7 + pslld $10,%xmm2 + pxor %xmm11,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm12,%xmm10 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm10 + paddd %xmm5,%xmm14 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm10 + paddd %xmm7,%xmm10 + movd 24(%r8),%xmm5 + movd 24(%r9),%xmm0 + movd 24(%r10),%xmm1 + movd 24(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm14,%xmm7 + movdqa %xmm14,%xmm2 + psrld $6,%xmm7 + movdqa %xmm14,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,96-128(%rax) + paddd %xmm9,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm14,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm8,%xmm0 + pand %xmm15,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm10,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm11,%xmm3 + movdqa %xmm10,%xmm7 + pslld $10,%xmm2 + pxor %xmm10,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm11,%xmm9 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm9 + paddd %xmm5,%xmm13 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm9 + paddd %xmm7,%xmm9 + movd 28(%r8),%xmm5 + movd 28(%r9),%xmm0 + movd 28(%r10),%xmm1 + movd 28(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm13,%xmm7 + movdqa %xmm13,%xmm2 + psrld $6,%xmm7 + movdqa %xmm13,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,112-128(%rax) + paddd %xmm8,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 96(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm13,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm15,%xmm0 + pand %xmm14,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm9,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm10,%xmm4 + movdqa %xmm9,%xmm7 + pslld $10,%xmm2 + pxor %xmm9,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm10,%xmm8 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm8 + paddd %xmm5,%xmm12 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm8 + paddd %xmm7,%xmm8 + leaq 256(%rbp),%rbp + movd 32(%r8),%xmm5 + movd 32(%r9),%xmm0 + movd 32(%r10),%xmm1 + movd 32(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm12,%xmm7 + movdqa %xmm12,%xmm2 + psrld $6,%xmm7 + movdqa %xmm12,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,128-128(%rax) + paddd %xmm15,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -128(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm12,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm14,%xmm0 + pand %xmm13,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm8,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm9,%xmm3 + movdqa %xmm8,%xmm7 + pslld $10,%xmm2 + pxor %xmm8,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm9,%xmm15 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm15 + paddd %xmm5,%xmm11 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm15 + paddd %xmm7,%xmm15 + movd 36(%r8),%xmm5 + movd 36(%r9),%xmm0 + movd 36(%r10),%xmm1 + movd 36(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm11,%xmm7 + movdqa %xmm11,%xmm2 + psrld $6,%xmm7 + movdqa %xmm11,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,144-128(%rax) + paddd %xmm14,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -96(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm11,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm13,%xmm0 + pand %xmm12,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm15,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm15,%xmm7 + pslld $10,%xmm2 + pxor %xmm15,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm8,%xmm14 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm14 + paddd %xmm5,%xmm10 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm14 + paddd %xmm7,%xmm14 + movd 40(%r8),%xmm5 + movd 40(%r9),%xmm0 + movd 40(%r10),%xmm1 + movd 40(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm10,%xmm7 + movdqa %xmm10,%xmm2 + psrld $6,%xmm7 + movdqa %xmm10,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,160-128(%rax) + paddd %xmm13,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm10,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm12,%xmm0 + pand %xmm11,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm14,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm15,%xmm3 + movdqa %xmm14,%xmm7 + pslld $10,%xmm2 + pxor %xmm14,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm15,%xmm13 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm13 + paddd %xmm5,%xmm9 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm13 + paddd %xmm7,%xmm13 + movd 44(%r8),%xmm5 + movd 44(%r9),%xmm0 + movd 44(%r10),%xmm1 + movd 44(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm9,%xmm7 + movdqa %xmm9,%xmm2 + psrld $6,%xmm7 + movdqa %xmm9,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,176-128(%rax) + paddd %xmm12,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -32(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm9,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm11,%xmm0 + pand %xmm10,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm13,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm14,%xmm4 + movdqa %xmm13,%xmm7 + pslld $10,%xmm2 + pxor %xmm13,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm14,%xmm12 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm12 + paddd %xmm5,%xmm8 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm12 + paddd %xmm7,%xmm12 + movd 48(%r8),%xmm5 + movd 48(%r9),%xmm0 + movd 48(%r10),%xmm1 + movd 48(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm8,%xmm7 + movdqa %xmm8,%xmm2 + psrld $6,%xmm7 + movdqa %xmm8,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,192-128(%rax) + paddd %xmm11,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 0(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm8,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm10,%xmm0 + pand %xmm9,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm12,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm13,%xmm3 + movdqa %xmm12,%xmm7 + pslld $10,%xmm2 + pxor %xmm12,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm13,%xmm11 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm11 + paddd %xmm5,%xmm15 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm11 + paddd %xmm7,%xmm11 + movd 52(%r8),%xmm5 + movd 52(%r9),%xmm0 + movd 52(%r10),%xmm1 + movd 52(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm15,%xmm7 + movdqa %xmm15,%xmm2 + psrld $6,%xmm7 + movdqa %xmm15,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,208-128(%rax) + paddd %xmm10,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 32(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm15,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm9,%xmm0 + pand %xmm8,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm11,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm12,%xmm4 + movdqa %xmm11,%xmm7 + pslld $10,%xmm2 + pxor %xmm11,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm12,%xmm10 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm10 + paddd %xmm5,%xmm14 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm10 + paddd %xmm7,%xmm10 + movd 56(%r8),%xmm5 + movd 56(%r9),%xmm0 + movd 56(%r10),%xmm1 + movd 56(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm14,%xmm7 + movdqa %xmm14,%xmm2 + psrld $6,%xmm7 + movdqa %xmm14,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,224-128(%rax) + paddd %xmm9,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm14,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm8,%xmm0 + pand %xmm15,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm10,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm11,%xmm3 + movdqa %xmm10,%xmm7 + pslld $10,%xmm2 + pxor %xmm10,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm11,%xmm9 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm9 + paddd %xmm5,%xmm13 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm9 + paddd %xmm7,%xmm9 + movd 60(%r8),%xmm5 + leaq 64(%r8),%r8 + movd 60(%r9),%xmm0 + leaq 64(%r9),%r9 + movd 60(%r10),%xmm1 + leaq 64(%r10),%r10 + movd 60(%r11),%xmm2 + leaq 64(%r11),%r11 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm13,%xmm7 + movdqa %xmm13,%xmm2 + psrld $6,%xmm7 + movdqa %xmm13,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,240-128(%rax) + paddd %xmm8,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 96(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm13,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm15,%xmm0 + pand %xmm14,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm9,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm10,%xmm4 + movdqa %xmm9,%xmm7 + pslld $10,%xmm2 + pxor %xmm9,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm10,%xmm8 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm8 + paddd %xmm5,%xmm12 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm8 + paddd %xmm7,%xmm8 + leaq 256(%rbp),%rbp + movdqu 0-128(%rax),%xmm5 + movl $3,%ecx + jmp .Loop_16_xx +.align 32 +.Loop_16_xx: + movdqa 16-128(%rax),%xmm6 + paddd 144-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 224-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm12,%xmm7 + movdqa %xmm12,%xmm2 + psrld $6,%xmm7 + movdqa %xmm12,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,0-128(%rax) + paddd %xmm15,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -128(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm12,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm14,%xmm0 + pand %xmm13,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm8,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm9,%xmm3 + movdqa %xmm8,%xmm7 + pslld $10,%xmm2 + pxor %xmm8,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm9,%xmm15 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm15 + paddd %xmm5,%xmm11 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm15 + paddd %xmm7,%xmm15 + movdqa 32-128(%rax),%xmm5 + paddd 160-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 240-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm11,%xmm7 + movdqa %xmm11,%xmm2 + psrld $6,%xmm7 + movdqa %xmm11,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,16-128(%rax) + paddd %xmm14,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -96(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm11,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm13,%xmm0 + pand %xmm12,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm15,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm15,%xmm7 + pslld $10,%xmm2 + pxor %xmm15,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm8,%xmm14 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm14 + paddd %xmm6,%xmm10 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm14 + paddd %xmm7,%xmm14 + movdqa 48-128(%rax),%xmm6 + paddd 176-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 0-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm10,%xmm7 + movdqa %xmm10,%xmm2 + psrld $6,%xmm7 + movdqa %xmm10,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,32-128(%rax) + paddd %xmm13,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm10,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm12,%xmm0 + pand %xmm11,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm14,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm15,%xmm3 + movdqa %xmm14,%xmm7 + pslld $10,%xmm2 + pxor %xmm14,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm15,%xmm13 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm13 + paddd %xmm5,%xmm9 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm13 + paddd %xmm7,%xmm13 + movdqa 64-128(%rax),%xmm5 + paddd 192-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 16-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm9,%xmm7 + movdqa %xmm9,%xmm2 + psrld $6,%xmm7 + movdqa %xmm9,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,48-128(%rax) + paddd %xmm12,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -32(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm9,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm11,%xmm0 + pand %xmm10,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm13,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm14,%xmm4 + movdqa %xmm13,%xmm7 + pslld $10,%xmm2 + pxor %xmm13,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm14,%xmm12 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm12 + paddd %xmm6,%xmm8 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm12 + paddd %xmm7,%xmm12 + movdqa 80-128(%rax),%xmm6 + paddd 208-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 32-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm8,%xmm7 + movdqa %xmm8,%xmm2 + psrld $6,%xmm7 + movdqa %xmm8,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,64-128(%rax) + paddd %xmm11,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 0(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm8,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm10,%xmm0 + pand %xmm9,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm12,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm13,%xmm3 + movdqa %xmm12,%xmm7 + pslld $10,%xmm2 + pxor %xmm12,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm13,%xmm11 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm11 + paddd %xmm5,%xmm15 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm11 + paddd %xmm7,%xmm11 + movdqa 96-128(%rax),%xmm5 + paddd 224-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 48-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm15,%xmm7 + movdqa %xmm15,%xmm2 + psrld $6,%xmm7 + movdqa %xmm15,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,80-128(%rax) + paddd %xmm10,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 32(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm15,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm9,%xmm0 + pand %xmm8,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm11,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm12,%xmm4 + movdqa %xmm11,%xmm7 + pslld $10,%xmm2 + pxor %xmm11,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm12,%xmm10 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm10 + paddd %xmm6,%xmm14 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm10 + paddd %xmm7,%xmm10 + movdqa 112-128(%rax),%xmm6 + paddd 240-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 64-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm14,%xmm7 + movdqa %xmm14,%xmm2 + psrld $6,%xmm7 + movdqa %xmm14,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,96-128(%rax) + paddd %xmm9,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm14,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm8,%xmm0 + pand %xmm15,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm10,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm11,%xmm3 + movdqa %xmm10,%xmm7 + pslld $10,%xmm2 + pxor %xmm10,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm11,%xmm9 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm9 + paddd %xmm5,%xmm13 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm9 + paddd %xmm7,%xmm9 + movdqa 128-128(%rax),%xmm5 + paddd 0-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 80-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm13,%xmm7 + movdqa %xmm13,%xmm2 + psrld $6,%xmm7 + movdqa %xmm13,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,112-128(%rax) + paddd %xmm8,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 96(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm13,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm15,%xmm0 + pand %xmm14,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm9,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm10,%xmm4 + movdqa %xmm9,%xmm7 + pslld $10,%xmm2 + pxor %xmm9,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm10,%xmm8 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm8 + paddd %xmm6,%xmm12 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm8 + paddd %xmm7,%xmm8 + leaq 256(%rbp),%rbp + movdqa 144-128(%rax),%xmm6 + paddd 16-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 96-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm12,%xmm7 + movdqa %xmm12,%xmm2 + psrld $6,%xmm7 + movdqa %xmm12,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,128-128(%rax) + paddd %xmm15,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -128(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm12,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm14,%xmm0 + pand %xmm13,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm8,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm9,%xmm3 + movdqa %xmm8,%xmm7 + pslld $10,%xmm2 + pxor %xmm8,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm9,%xmm15 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm15 + paddd %xmm5,%xmm11 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm15 + paddd %xmm7,%xmm15 + movdqa 160-128(%rax),%xmm5 + paddd 32-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 112-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm11,%xmm7 + movdqa %xmm11,%xmm2 + psrld $6,%xmm7 + movdqa %xmm11,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,144-128(%rax) + paddd %xmm14,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -96(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm11,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm13,%xmm0 + pand %xmm12,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm15,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm15,%xmm7 + pslld $10,%xmm2 + pxor %xmm15,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm8,%xmm14 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm14 + paddd %xmm6,%xmm10 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm14 + paddd %xmm7,%xmm14 + movdqa 176-128(%rax),%xmm6 + paddd 48-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 128-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm10,%xmm7 + movdqa %xmm10,%xmm2 + psrld $6,%xmm7 + movdqa %xmm10,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,160-128(%rax) + paddd %xmm13,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm10,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm12,%xmm0 + pand %xmm11,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm14,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm15,%xmm3 + movdqa %xmm14,%xmm7 + pslld $10,%xmm2 + pxor %xmm14,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm15,%xmm13 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm13 + paddd %xmm5,%xmm9 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm13 + paddd %xmm7,%xmm13 + movdqa 192-128(%rax),%xmm5 + paddd 64-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 144-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm9,%xmm7 + movdqa %xmm9,%xmm2 + psrld $6,%xmm7 + movdqa %xmm9,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,176-128(%rax) + paddd %xmm12,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -32(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm9,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm11,%xmm0 + pand %xmm10,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm13,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm14,%xmm4 + movdqa %xmm13,%xmm7 + pslld $10,%xmm2 + pxor %xmm13,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm14,%xmm12 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm12 + paddd %xmm6,%xmm8 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm12 + paddd %xmm7,%xmm12 + movdqa 208-128(%rax),%xmm6 + paddd 80-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 160-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm8,%xmm7 + movdqa %xmm8,%xmm2 + psrld $6,%xmm7 + movdqa %xmm8,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,192-128(%rax) + paddd %xmm11,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 0(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm8,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm10,%xmm0 + pand %xmm9,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm12,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm13,%xmm3 + movdqa %xmm12,%xmm7 + pslld $10,%xmm2 + pxor %xmm12,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm13,%xmm11 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm11 + paddd %xmm5,%xmm15 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm11 + paddd %xmm7,%xmm11 + movdqa 224-128(%rax),%xmm5 + paddd 96-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 176-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm15,%xmm7 + movdqa %xmm15,%xmm2 + psrld $6,%xmm7 + movdqa %xmm15,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,208-128(%rax) + paddd %xmm10,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 32(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm15,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm9,%xmm0 + pand %xmm8,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm11,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm12,%xmm4 + movdqa %xmm11,%xmm7 + pslld $10,%xmm2 + pxor %xmm11,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm12,%xmm10 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm10 + paddd %xmm6,%xmm14 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm10 + paddd %xmm7,%xmm10 + movdqa 240-128(%rax),%xmm6 + paddd 112-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 192-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm14,%xmm7 + movdqa %xmm14,%xmm2 + psrld $6,%xmm7 + movdqa %xmm14,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,224-128(%rax) + paddd %xmm9,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm14,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm8,%xmm0 + pand %xmm15,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm10,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm11,%xmm3 + movdqa %xmm10,%xmm7 + pslld $10,%xmm2 + pxor %xmm10,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm11,%xmm9 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm9 + paddd %xmm5,%xmm13 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm9 + paddd %xmm7,%xmm9 + movdqa 0-128(%rax),%xmm5 + paddd 128-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 208-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm13,%xmm7 + movdqa %xmm13,%xmm2 + psrld $6,%xmm7 + movdqa %xmm13,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,240-128(%rax) + paddd %xmm8,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 96(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm13,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm15,%xmm0 + pand %xmm14,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm9,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm10,%xmm4 + movdqa %xmm9,%xmm7 + pslld $10,%xmm2 + pxor %xmm9,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm10,%xmm8 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm8 + paddd %xmm6,%xmm12 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm8 + paddd %xmm7,%xmm8 + leaq 256(%rbp),%rbp + decl %ecx + jnz .Loop_16_xx + + movl $1,%ecx + leaq K256+128(%rip),%rbp + + movdqa (%rbx),%xmm7 + cmpl 0(%rbx),%ecx + pxor %xmm0,%xmm0 + cmovgeq %rbp,%r8 + cmpl 4(%rbx),%ecx + movdqa %xmm7,%xmm6 + cmovgeq %rbp,%r9 + cmpl 8(%rbx),%ecx + pcmpgtd %xmm0,%xmm6 + cmovgeq %rbp,%r10 + cmpl 12(%rbx),%ecx + paddd %xmm6,%xmm7 + cmovgeq %rbp,%r11 + + movdqu 0-128(%rdi),%xmm0 + pand %xmm6,%xmm8 + movdqu 32-128(%rdi),%xmm1 + pand %xmm6,%xmm9 + movdqu 64-128(%rdi),%xmm2 + pand %xmm6,%xmm10 + movdqu 96-128(%rdi),%xmm5 + pand %xmm6,%xmm11 + paddd %xmm0,%xmm8 + movdqu 128-128(%rdi),%xmm0 + pand %xmm6,%xmm12 + paddd %xmm1,%xmm9 + movdqu 160-128(%rdi),%xmm1 + pand %xmm6,%xmm13 + paddd %xmm2,%xmm10 + movdqu 192-128(%rdi),%xmm2 + pand %xmm6,%xmm14 + paddd %xmm5,%xmm11 + movdqu 224-128(%rdi),%xmm5 + pand %xmm6,%xmm15 + paddd %xmm0,%xmm12 + paddd %xmm1,%xmm13 + movdqu %xmm8,0-128(%rdi) + paddd %xmm2,%xmm14 + movdqu %xmm9,32-128(%rdi) + paddd %xmm5,%xmm15 + movdqu %xmm10,64-128(%rdi) + movdqu %xmm11,96-128(%rdi) + movdqu %xmm12,128-128(%rdi) + movdqu %xmm13,160-128(%rdi) + movdqu %xmm14,192-128(%rdi) + movdqu %xmm15,224-128(%rdi) + + movdqa %xmm7,(%rbx) + movdqa .Lpbswap(%rip),%xmm6 + decl %edx + jnz .Loop + + movl 280(%rsp),%edx + leaq 16(%rdi),%rdi + leaq 64(%rsi),%rsi + decl %edx + jnz .Loop_grande + +.Ldone: + movq 272(%rsp),%rax + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp + .byte 0xf3,0xc3 +.size sha256_multi_block,.-sha256_multi_block +.align 256 +K256: +.long 1116352408,1116352408,1116352408,1116352408 +.long 1116352408,1116352408,1116352408,1116352408 +.long 1899447441,1899447441,1899447441,1899447441 +.long 1899447441,1899447441,1899447441,1899447441 +.long 3049323471,3049323471,3049323471,3049323471 +.long 3049323471,3049323471,3049323471,3049323471 +.long 3921009573,3921009573,3921009573,3921009573 +.long 3921009573,3921009573,3921009573,3921009573 +.long 961987163,961987163,961987163,961987163 +.long 961987163,961987163,961987163,961987163 +.long 1508970993,1508970993,1508970993,1508970993 +.long 1508970993,1508970993,1508970993,1508970993 +.long 2453635748,2453635748,2453635748,2453635748 +.long 2453635748,2453635748,2453635748,2453635748 +.long 2870763221,2870763221,2870763221,2870763221 +.long 2870763221,2870763221,2870763221,2870763221 +.long 3624381080,3624381080,3624381080,3624381080 +.long 3624381080,3624381080,3624381080,3624381080 +.long 310598401,310598401,310598401,310598401 +.long 310598401,310598401,310598401,310598401 +.long 607225278,607225278,607225278,607225278 +.long 607225278,607225278,607225278,607225278 +.long 1426881987,1426881987,1426881987,1426881987 +.long 1426881987,1426881987,1426881987,1426881987 +.long 1925078388,1925078388,1925078388,1925078388 +.long 1925078388,1925078388,1925078388,1925078388 +.long 2162078206,2162078206,2162078206,2162078206 +.long 2162078206,2162078206,2162078206,2162078206 +.long 2614888103,2614888103,2614888103,2614888103 +.long 2614888103,2614888103,2614888103,2614888103 +.long 3248222580,3248222580,3248222580,3248222580 +.long 3248222580,3248222580,3248222580,3248222580 +.long 3835390401,3835390401,3835390401,3835390401 +.long 3835390401,3835390401,3835390401,3835390401 +.long 4022224774,4022224774,4022224774,4022224774 +.long 4022224774,4022224774,4022224774,4022224774 +.long 264347078,264347078,264347078,264347078 +.long 264347078,264347078,264347078,264347078 +.long 604807628,604807628,604807628,604807628 +.long 604807628,604807628,604807628,604807628 +.long 770255983,770255983,770255983,770255983 +.long 770255983,770255983,770255983,770255983 +.long 1249150122,1249150122,1249150122,1249150122 +.long 1249150122,1249150122,1249150122,1249150122 +.long 1555081692,1555081692,1555081692,1555081692 +.long 1555081692,1555081692,1555081692,1555081692 +.long 1996064986,1996064986,1996064986,1996064986 +.long 1996064986,1996064986,1996064986,1996064986 +.long 2554220882,2554220882,2554220882,2554220882 +.long 2554220882,2554220882,2554220882,2554220882 +.long 2821834349,2821834349,2821834349,2821834349 +.long 2821834349,2821834349,2821834349,2821834349 +.long 2952996808,2952996808,2952996808,2952996808 +.long 2952996808,2952996808,2952996808,2952996808 +.long 3210313671,3210313671,3210313671,3210313671 +.long 3210313671,3210313671,3210313671,3210313671 +.long 3336571891,3336571891,3336571891,3336571891 +.long 3336571891,3336571891,3336571891,3336571891 +.long 3584528711,3584528711,3584528711,3584528711 +.long 3584528711,3584528711,3584528711,3584528711 +.long 113926993,113926993,113926993,113926993 +.long 113926993,113926993,113926993,113926993 +.long 338241895,338241895,338241895,338241895 +.long 338241895,338241895,338241895,338241895 +.long 666307205,666307205,666307205,666307205 +.long 666307205,666307205,666307205,666307205 +.long 773529912,773529912,773529912,773529912 +.long 773529912,773529912,773529912,773529912 +.long 1294757372,1294757372,1294757372,1294757372 +.long 1294757372,1294757372,1294757372,1294757372 +.long 1396182291,1396182291,1396182291,1396182291 +.long 1396182291,1396182291,1396182291,1396182291 +.long 1695183700,1695183700,1695183700,1695183700 +.long 1695183700,1695183700,1695183700,1695183700 +.long 1986661051,1986661051,1986661051,1986661051 +.long 1986661051,1986661051,1986661051,1986661051 +.long 2177026350,2177026350,2177026350,2177026350 +.long 2177026350,2177026350,2177026350,2177026350 +.long 2456956037,2456956037,2456956037,2456956037 +.long 2456956037,2456956037,2456956037,2456956037 +.long 2730485921,2730485921,2730485921,2730485921 +.long 2730485921,2730485921,2730485921,2730485921 +.long 2820302411,2820302411,2820302411,2820302411 +.long 2820302411,2820302411,2820302411,2820302411 +.long 3259730800,3259730800,3259730800,3259730800 +.long 3259730800,3259730800,3259730800,3259730800 +.long 3345764771,3345764771,3345764771,3345764771 +.long 3345764771,3345764771,3345764771,3345764771 +.long 3516065817,3516065817,3516065817,3516065817 +.long 3516065817,3516065817,3516065817,3516065817 +.long 3600352804,3600352804,3600352804,3600352804 +.long 3600352804,3600352804,3600352804,3600352804 +.long 4094571909,4094571909,4094571909,4094571909 +.long 4094571909,4094571909,4094571909,4094571909 +.long 275423344,275423344,275423344,275423344 +.long 275423344,275423344,275423344,275423344 +.long 430227734,430227734,430227734,430227734 +.long 430227734,430227734,430227734,430227734 +.long 506948616,506948616,506948616,506948616 +.long 506948616,506948616,506948616,506948616 +.long 659060556,659060556,659060556,659060556 +.long 659060556,659060556,659060556,659060556 +.long 883997877,883997877,883997877,883997877 +.long 883997877,883997877,883997877,883997877 +.long 958139571,958139571,958139571,958139571 +.long 958139571,958139571,958139571,958139571 +.long 1322822218,1322822218,1322822218,1322822218 +.long 1322822218,1322822218,1322822218,1322822218 +.long 1537002063,1537002063,1537002063,1537002063 +.long 1537002063,1537002063,1537002063,1537002063 +.long 1747873779,1747873779,1747873779,1747873779 +.long 1747873779,1747873779,1747873779,1747873779 +.long 1955562222,1955562222,1955562222,1955562222 +.long 1955562222,1955562222,1955562222,1955562222 +.long 2024104815,2024104815,2024104815,2024104815 +.long 2024104815,2024104815,2024104815,2024104815 +.long 2227730452,2227730452,2227730452,2227730452 +.long 2227730452,2227730452,2227730452,2227730452 +.long 2361852424,2361852424,2361852424,2361852424 +.long 2361852424,2361852424,2361852424,2361852424 +.long 2428436474,2428436474,2428436474,2428436474 +.long 2428436474,2428436474,2428436474,2428436474 +.long 2756734187,2756734187,2756734187,2756734187 +.long 2756734187,2756734187,2756734187,2756734187 +.long 3204031479,3204031479,3204031479,3204031479 +.long 3204031479,3204031479,3204031479,3204031479 +.long 3329325298,3329325298,3329325298,3329325298 +.long 3329325298,3329325298,3329325298,3329325298 +.Lpbswap: +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/elf/sha256-ssse3-x86.s b/lib/accelerated/x86/elf/sha256-ssse3-x86.s new file mode 100644 index 0000000000..81470f510d --- /dev/null +++ b/lib/accelerated/x86/elf/sha256-ssse3-x86.s @@ -0,0 +1,3403 @@ +# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org> +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain copyright notices, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# * Neither the name of the Andy Polyakov nor the names of its +# copyright holder and contributors may be used to endorse or +# promote products derived from this software without specific +# prior written permission. +# +# ALTERNATIVELY, provided that this notice is retained in full, this +# product may be distributed under the terms of the GNU General Public +# License (GPL), in which case the provisions of the GPL apply INSTEAD OF +# those given above. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# *** This file is auto-generated *** +# +.file "sha512-586.s" +.text +.globl sha256_block_data_order +.type sha256_block_data_order,@function +.align 16 +sha256_block_data_order: +.L_sha256_block_data_order_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl %esp,%ebx + call .L000pic_point +.L000pic_point: + popl %ebp + leal .L001K256-.L000pic_point(%ebp),%ebp + subl $16,%esp + andl $-64,%esp + shll $6,%eax + addl %edi,%eax + movl %esi,(%esp) + movl %edi,4(%esp) + movl %eax,8(%esp) + movl %ebx,12(%esp) + leal _gnutls_x86_cpuid_s-.L001K256(%ebp),%edx + movl (%edx),%ecx + movl 4(%edx),%ebx + testl $1048576,%ecx + jnz .L002loop + andl $1073741824,%ecx + andl $268435968,%ebx + orl %ebx,%ecx + andl $1342177280,%ecx + cmpl $1342177280,%ecx + je .L003loop_shrd + subl %edi,%eax + cmpl $256,%eax + jae .L004unrolled + jmp .L002loop +.align 16 +.L002loop: + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + bswap %eax + movl 12(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 16(%edi),%eax + movl 20(%edi),%ebx + movl 24(%edi),%ecx + bswap %eax + movl 28(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 32(%edi),%eax + movl 36(%edi),%ebx + movl 40(%edi),%ecx + bswap %eax + movl 44(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 48(%edi),%eax + movl 52(%edi),%ebx + movl 56(%edi),%ecx + bswap %eax + movl 60(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + addl $64,%edi + leal -36(%esp),%esp + movl %edi,104(%esp) + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edi + movl %ebx,8(%esp) + xorl %ecx,%ebx + movl %ecx,12(%esp) + movl %edi,16(%esp) + movl %ebx,(%esp) + movl 16(%esi),%edx + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edi + movl %ebx,24(%esp) + movl %ecx,28(%esp) + movl %edi,32(%esp) +.align 16 +.L00500_15: + movl %edx,%ecx + movl 24(%esp),%esi + rorl $14,%ecx + movl 28(%esp),%edi + xorl %edx,%ecx + xorl %edi,%esi + movl 96(%esp),%ebx + rorl $5,%ecx + andl %edx,%esi + movl %edx,20(%esp) + xorl %ecx,%edx + addl 32(%esp),%ebx + xorl %edi,%esi + rorl $6,%edx + movl %eax,%ecx + addl %esi,%ebx + rorl $9,%ecx + addl %edx,%ebx + movl 8(%esp),%edi + xorl %eax,%ecx + movl %eax,4(%esp) + leal -4(%esp),%esp + rorl $11,%ecx + movl (%ebp),%esi + xorl %eax,%ecx + movl 20(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %esi,%ebx + movl %eax,(%esp) + addl %ebx,%edx + andl 4(%esp),%eax + addl %ecx,%ebx + xorl %edi,%eax + addl $4,%ebp + addl %ebx,%eax + cmpl $3248222580,%esi + jne .L00500_15 + movl 156(%esp),%ecx + jmp .L00616_63 +.align 16 +.L00616_63: + movl %ecx,%ebx + movl 104(%esp),%esi + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 160(%esp),%ebx + shrl $10,%edi + addl 124(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 24(%esp),%esi + rorl $14,%ecx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %edx,%ecx + xorl %edi,%esi + movl %ebx,96(%esp) + rorl $5,%ecx + andl %edx,%esi + movl %edx,20(%esp) + xorl %ecx,%edx + addl 32(%esp),%ebx + xorl %edi,%esi + rorl $6,%edx + movl %eax,%ecx + addl %esi,%ebx + rorl $9,%ecx + addl %edx,%ebx + movl 8(%esp),%edi + xorl %eax,%ecx + movl %eax,4(%esp) + leal -4(%esp),%esp + rorl $11,%ecx + movl (%ebp),%esi + xorl %eax,%ecx + movl 20(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %esi,%ebx + movl %eax,(%esp) + addl %ebx,%edx + andl 4(%esp),%eax + addl %ecx,%ebx + xorl %edi,%eax + movl 156(%esp),%ecx + addl $4,%ebp + addl %ebx,%eax + cmpl $3329325298,%esi + jne .L00616_63 + movl 356(%esp),%esi + movl 8(%esp),%ebx + movl 16(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebx + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl 24(%esp),%eax + movl 28(%esp),%ebx + movl 32(%esp),%ecx + movl 360(%esp),%edi + addl 16(%esi),%edx + addl 20(%esi),%eax + addl 24(%esi),%ebx + addl 28(%esi),%ecx + movl %edx,16(%esi) + movl %eax,20(%esi) + movl %ebx,24(%esi) + movl %ecx,28(%esi) + leal 356(%esp),%esp + subl $256,%ebp + cmpl 8(%esp),%edi + jb .L002loop + movl 12(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 32 +.L003loop_shrd: + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + bswap %eax + movl 12(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 16(%edi),%eax + movl 20(%edi),%ebx + movl 24(%edi),%ecx + bswap %eax + movl 28(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 32(%edi),%eax + movl 36(%edi),%ebx + movl 40(%edi),%ecx + bswap %eax + movl 44(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 48(%edi),%eax + movl 52(%edi),%ebx + movl 56(%edi),%ecx + bswap %eax + movl 60(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + addl $64,%edi + leal -36(%esp),%esp + movl %edi,104(%esp) + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edi + movl %ebx,8(%esp) + xorl %ecx,%ebx + movl %ecx,12(%esp) + movl %edi,16(%esp) + movl %ebx,(%esp) + movl 16(%esi),%edx + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edi + movl %ebx,24(%esp) + movl %ecx,28(%esp) + movl %edi,32(%esp) +.align 16 +.L00700_15_shrd: + movl %edx,%ecx + movl 24(%esp),%esi + shrdl $14,%ecx,%ecx + movl 28(%esp),%edi + xorl %edx,%ecx + xorl %edi,%esi + movl 96(%esp),%ebx + shrdl $5,%ecx,%ecx + andl %edx,%esi + movl %edx,20(%esp) + xorl %ecx,%edx + addl 32(%esp),%ebx + xorl %edi,%esi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %esi,%ebx + shrdl $9,%ecx,%ecx + addl %edx,%ebx + movl 8(%esp),%edi + xorl %eax,%ecx + movl %eax,4(%esp) + leal -4(%esp),%esp + shrdl $11,%ecx,%ecx + movl (%ebp),%esi + xorl %eax,%ecx + movl 20(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %esi,%ebx + movl %eax,(%esp) + addl %ebx,%edx + andl 4(%esp),%eax + addl %ecx,%ebx + xorl %edi,%eax + addl $4,%ebp + addl %ebx,%eax + cmpl $3248222580,%esi + jne .L00700_15_shrd + movl 156(%esp),%ecx + jmp .L00816_63_shrd +.align 16 +.L00816_63_shrd: + movl %ecx,%ebx + movl 104(%esp),%esi + shrdl $11,%ecx,%ecx + movl %esi,%edi + shrdl $2,%esi,%esi + xorl %ebx,%ecx + shrl $3,%ebx + shrdl $7,%ecx,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + shrdl $17,%esi,%esi + addl 160(%esp),%ebx + shrl $10,%edi + addl 124(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 24(%esp),%esi + shrdl $14,%ecx,%ecx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %edx,%ecx + xorl %edi,%esi + movl %ebx,96(%esp) + shrdl $5,%ecx,%ecx + andl %edx,%esi + movl %edx,20(%esp) + xorl %ecx,%edx + addl 32(%esp),%ebx + xorl %edi,%esi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %esi,%ebx + shrdl $9,%ecx,%ecx + addl %edx,%ebx + movl 8(%esp),%edi + xorl %eax,%ecx + movl %eax,4(%esp) + leal -4(%esp),%esp + shrdl $11,%ecx,%ecx + movl (%ebp),%esi + xorl %eax,%ecx + movl 20(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %esi,%ebx + movl %eax,(%esp) + addl %ebx,%edx + andl 4(%esp),%eax + addl %ecx,%ebx + xorl %edi,%eax + movl 156(%esp),%ecx + addl $4,%ebp + addl %ebx,%eax + cmpl $3329325298,%esi + jne .L00816_63_shrd + movl 356(%esp),%esi + movl 8(%esp),%ebx + movl 16(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebx + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl 24(%esp),%eax + movl 28(%esp),%ebx + movl 32(%esp),%ecx + movl 360(%esp),%edi + addl 16(%esi),%edx + addl 20(%esi),%eax + addl 24(%esi),%ebx + addl 28(%esi),%ecx + movl %edx,16(%esi) + movl %eax,20(%esi) + movl %ebx,24(%esi) + movl %ecx,28(%esi) + leal 356(%esp),%esp + subl $256,%ebp + cmpl 8(%esp),%edi + jb .L003loop_shrd + movl 12(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 64 +.L001K256: +.long 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298 +.long 66051,67438087,134810123,202182159 +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 +.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 +.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +.byte 62,0 +.align 16 +.L004unrolled: + leal -96(%esp),%esp + movl (%esi),%eax + movl 4(%esi),%ebp + movl 8(%esi),%ecx + movl 12(%esi),%ebx + movl %ebp,4(%esp) + xorl %ecx,%ebp + movl %ecx,8(%esp) + movl %ebx,12(%esp) + movl 16(%esi),%edx + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%esi + movl %ebx,20(%esp) + movl %ecx,24(%esp) + movl %esi,28(%esp) + jmp .L009grand_loop +.align 16 +.L009grand_loop: + movl (%edi),%ebx + movl 4(%edi),%ecx + bswap %ebx + movl 8(%edi),%esi + bswap %ecx + movl %ebx,32(%esp) + bswap %esi + movl %ecx,36(%esp) + movl %esi,40(%esp) + movl 12(%edi),%ebx + movl 16(%edi),%ecx + bswap %ebx + movl 20(%edi),%esi + bswap %ecx + movl %ebx,44(%esp) + bswap %esi + movl %ecx,48(%esp) + movl %esi,52(%esp) + movl 24(%edi),%ebx + movl 28(%edi),%ecx + bswap %ebx + movl 32(%edi),%esi + bswap %ecx + movl %ebx,56(%esp) + bswap %esi + movl %ecx,60(%esp) + movl %esi,64(%esp) + movl 36(%edi),%ebx + movl 40(%edi),%ecx + bswap %ebx + movl 44(%edi),%esi + bswap %ecx + movl %ebx,68(%esp) + bswap %esi + movl %ecx,72(%esp) + movl %esi,76(%esp) + movl 48(%edi),%ebx + movl 52(%edi),%ecx + bswap %ebx + movl 56(%edi),%esi + bswap %ecx + movl %ebx,80(%esp) + bswap %esi + movl %ecx,84(%esp) + movl %esi,88(%esp) + movl 60(%edi),%ebx + addl $64,%edi + bswap %ebx + movl %edi,100(%esp) + movl %ebx,92(%esp) + movl %edx,%ecx + movl 20(%esp),%esi + rorl $14,%edx + movl 24(%esp),%edi + xorl %ecx,%edx + movl 32(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1116352408(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 16(%esp),%ecx + rorl $14,%edx + movl 20(%esp),%edi + xorl %esi,%edx + movl 36(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1899447441(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 12(%esp),%esi + rorl $14,%edx + movl 16(%esp),%edi + xorl %ecx,%edx + movl 40(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3049323471(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 8(%esp),%ecx + rorl $14,%edx + movl 12(%esp),%edi + xorl %esi,%edx + movl 44(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3921009573(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 4(%esp),%esi + rorl $14,%edx + movl 8(%esp),%edi + xorl %ecx,%edx + movl 48(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 961987163(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl (%esp),%ecx + rorl $14,%edx + movl 4(%esp),%edi + xorl %esi,%edx + movl 52(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1508970993(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 28(%esp),%esi + rorl $14,%edx + movl (%esp),%edi + xorl %ecx,%edx + movl 56(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2453635748(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 24(%esp),%ecx + rorl $14,%edx + movl 28(%esp),%edi + xorl %esi,%edx + movl 60(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2870763221(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 20(%esp),%esi + rorl $14,%edx + movl 24(%esp),%edi + xorl %ecx,%edx + movl 64(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3624381080(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 16(%esp),%ecx + rorl $14,%edx + movl 20(%esp),%edi + xorl %esi,%edx + movl 68(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 310598401(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 12(%esp),%esi + rorl $14,%edx + movl 16(%esp),%edi + xorl %ecx,%edx + movl 72(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 607225278(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 8(%esp),%ecx + rorl $14,%edx + movl 12(%esp),%edi + xorl %esi,%edx + movl 76(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1426881987(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 4(%esp),%esi + rorl $14,%edx + movl 8(%esp),%edi + xorl %ecx,%edx + movl 80(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1925078388(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl (%esp),%ecx + rorl $14,%edx + movl 4(%esp),%edi + xorl %esi,%edx + movl 84(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2162078206(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 28(%esp),%esi + rorl $14,%edx + movl (%esp),%edi + xorl %ecx,%edx + movl 88(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2614888103(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 24(%esp),%ecx + rorl $14,%edx + movl 28(%esp),%edi + xorl %esi,%edx + movl 92(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3248222580(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3835390401(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 4022224774(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 264347078(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 604807628(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 770255983(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1249150122(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1555081692(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1996064986(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2554220882(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2821834349(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2952996808(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3210313671(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3336571891(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3584528711(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,88(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 113926993(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,92(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 338241895(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 666307205(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 773529912(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1294757372(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1396182291(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1695183700(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1986661051(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2177026350(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2456956037(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2730485921(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2820302411(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3259730800(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3345764771(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3516065817(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3600352804(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,88(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 4094571909(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,92(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 275423344(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 430227734(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 506948616(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 659060556(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 883997877(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 958139571(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1322822218(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1537002063(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1747873779(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1955562222(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2024104815(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2227730452(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2361852424(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2428436474(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2756734187(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3204031479(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3329325298(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 96(%esp),%esi + xorl %edi,%ebp + movl 12(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebp + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebp,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl %ebp,4(%esp) + xorl %edi,%ebp + movl %edi,8(%esp) + movl %ecx,12(%esp) + movl 20(%esp),%edi + movl 24(%esp),%ebx + movl 28(%esp),%ecx + addl 16(%esi),%edx + addl 20(%esi),%edi + addl 24(%esi),%ebx + addl 28(%esi),%ecx + movl %edx,16(%esi) + movl %edi,20(%esi) + movl %ebx,24(%esi) + movl %ecx,28(%esi) + movl %edi,20(%esp) + movl 100(%esp),%edi + movl %ebx,24(%esp) + movl %ecx,28(%esp) + cmpl 104(%esp),%edi + jb .L009grand_loop + movl 108(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.size sha256_block_data_order,.-.L_sha256_block_data_order_begin +.comm _gnutls_x86_cpuid_s,16,4 + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/elf/sha512-ssse3-x86.s b/lib/accelerated/x86/elf/sha512-ssse3-x86.s new file mode 100644 index 0000000000..088a0fa54a --- /dev/null +++ b/lib/accelerated/x86/elf/sha512-ssse3-x86.s @@ -0,0 +1,606 @@ +# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org> +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain copyright notices, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# * Neither the name of the Andy Polyakov nor the names of its +# copyright holder and contributors may be used to endorse or +# promote products derived from this software without specific +# prior written permission. +# +# ALTERNATIVELY, provided that this notice is retained in full, this +# product may be distributed under the terms of the GNU General Public +# License (GPL), in which case the provisions of the GPL apply INSTEAD OF +# those given above. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# *** This file is auto-generated *** +# +.file "sha512-586.s" +.text +.globl sha512_block_data_order +.type sha512_block_data_order,@function +.align 16 +sha512_block_data_order: +.L_sha512_block_data_order_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl %esp,%ebx + call .L000pic_point +.L000pic_point: + popl %ebp + leal .L001K512-.L000pic_point(%ebp),%ebp + subl $16,%esp + andl $-64,%esp + shll $7,%eax + addl %edi,%eax + movl %esi,(%esp) + movl %edi,4(%esp) + movl %eax,8(%esp) + movl %ebx,12(%esp) +.align 16 +.L002loop_x86: + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 16(%edi),%eax + movl 20(%edi),%ebx + movl 24(%edi),%ecx + movl 28(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 32(%edi),%eax + movl 36(%edi),%ebx + movl 40(%edi),%ecx + movl 44(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 48(%edi),%eax + movl 52(%edi),%ebx + movl 56(%edi),%ecx + movl 60(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 64(%edi),%eax + movl 68(%edi),%ebx + movl 72(%edi),%ecx + movl 76(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 80(%edi),%eax + movl 84(%edi),%ebx + movl 88(%edi),%ecx + movl 92(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 96(%edi),%eax + movl 100(%edi),%ebx + movl 104(%edi),%ecx + movl 108(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 112(%edi),%eax + movl 116(%edi),%ebx + movl 120(%edi),%ecx + movl 124(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + addl $128,%edi + subl $72,%esp + movl %edi,204(%esp) + leal 8(%esp),%edi + movl $16,%ecx +.long 2784229001 +.align 16 +.L00300_15_x86: + movl 40(%esp),%ecx + movl 44(%esp),%edx + movl %ecx,%esi + shrl $9,%ecx + movl %edx,%edi + shrl $9,%edx + movl %ecx,%ebx + shll $14,%esi + movl %edx,%eax + shll $14,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%eax + shll $4,%esi + xorl %edx,%ebx + shll $4,%edi + xorl %esi,%ebx + shrl $4,%ecx + xorl %edi,%eax + shrl $4,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 48(%esp),%ecx + movl 52(%esp),%edx + movl 56(%esp),%esi + movl 60(%esp),%edi + addl 64(%esp),%eax + adcl 68(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + andl 40(%esp),%ecx + andl 44(%esp),%edx + addl 192(%esp),%eax + adcl 196(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + movl (%ebp),%esi + movl 4(%ebp),%edi + addl %ecx,%eax + adcl %edx,%ebx + movl 32(%esp),%ecx + movl 36(%esp),%edx + addl %esi,%eax + adcl %edi,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + addl %ecx,%eax + adcl %edx,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,%esi + shrl $2,%ecx + movl %edx,%edi + shrl $2,%edx + movl %ecx,%ebx + shll $4,%esi + movl %edx,%eax + shll $4,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%ebx + shll $21,%esi + xorl %edx,%eax + shll $21,%edi + xorl %esi,%eax + shrl $21,%ecx + xorl %edi,%ebx + shrl $21,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl 16(%esp),%esi + movl 20(%esp),%edi + addl (%esp),%eax + adcl 4(%esp),%ebx + orl %esi,%ecx + orl %edi,%edx + andl 24(%esp),%ecx + andl 28(%esp),%edx + andl 8(%esp),%esi + andl 12(%esp),%edi + orl %esi,%ecx + orl %edi,%edx + addl %ecx,%eax + adcl %edx,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + movb (%ebp),%dl + subl $8,%esp + leal 8(%ebp),%ebp + cmpb $148,%dl + jne .L00300_15_x86 +.align 16 +.L00416_79_x86: + movl 312(%esp),%ecx + movl 316(%esp),%edx + movl %ecx,%esi + shrl $1,%ecx + movl %edx,%edi + shrl $1,%edx + movl %ecx,%eax + shll $24,%esi + movl %edx,%ebx + shll $24,%edi + xorl %esi,%ebx + shrl $6,%ecx + xorl %edi,%eax + shrl $6,%edx + xorl %ecx,%eax + shll $7,%esi + xorl %edx,%ebx + shll $1,%edi + xorl %esi,%ebx + shrl $1,%ecx + xorl %edi,%eax + shrl $1,%edx + xorl %ecx,%eax + shll $6,%edi + xorl %edx,%ebx + xorl %edi,%eax + movl %eax,(%esp) + movl %ebx,4(%esp) + movl 208(%esp),%ecx + movl 212(%esp),%edx + movl %ecx,%esi + shrl $6,%ecx + movl %edx,%edi + shrl $6,%edx + movl %ecx,%eax + shll $3,%esi + movl %edx,%ebx + shll $3,%edi + xorl %esi,%eax + shrl $13,%ecx + xorl %edi,%ebx + shrl $13,%edx + xorl %ecx,%eax + shll $10,%esi + xorl %edx,%ebx + shll $10,%edi + xorl %esi,%ebx + shrl $10,%ecx + xorl %edi,%eax + shrl $10,%edx + xorl %ecx,%ebx + shll $13,%edi + xorl %edx,%eax + xorl %edi,%eax + movl 320(%esp),%ecx + movl 324(%esp),%edx + addl (%esp),%eax + adcl 4(%esp),%ebx + movl 248(%esp),%esi + movl 252(%esp),%edi + addl %ecx,%eax + adcl %edx,%ebx + addl %esi,%eax + adcl %edi,%ebx + movl %eax,192(%esp) + movl %ebx,196(%esp) + movl 40(%esp),%ecx + movl 44(%esp),%edx + movl %ecx,%esi + shrl $9,%ecx + movl %edx,%edi + shrl $9,%edx + movl %ecx,%ebx + shll $14,%esi + movl %edx,%eax + shll $14,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%eax + shll $4,%esi + xorl %edx,%ebx + shll $4,%edi + xorl %esi,%ebx + shrl $4,%ecx + xorl %edi,%eax + shrl $4,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 48(%esp),%ecx + movl 52(%esp),%edx + movl 56(%esp),%esi + movl 60(%esp),%edi + addl 64(%esp),%eax + adcl 68(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + andl 40(%esp),%ecx + andl 44(%esp),%edx + addl 192(%esp),%eax + adcl 196(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + movl (%ebp),%esi + movl 4(%ebp),%edi + addl %ecx,%eax + adcl %edx,%ebx + movl 32(%esp),%ecx + movl 36(%esp),%edx + addl %esi,%eax + adcl %edi,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + addl %ecx,%eax + adcl %edx,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,%esi + shrl $2,%ecx + movl %edx,%edi + shrl $2,%edx + movl %ecx,%ebx + shll $4,%esi + movl %edx,%eax + shll $4,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%ebx + shll $21,%esi + xorl %edx,%eax + shll $21,%edi + xorl %esi,%eax + shrl $21,%ecx + xorl %edi,%ebx + shrl $21,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl 16(%esp),%esi + movl 20(%esp),%edi + addl (%esp),%eax + adcl 4(%esp),%ebx + orl %esi,%ecx + orl %edi,%edx + andl 24(%esp),%ecx + andl 28(%esp),%edx + andl 8(%esp),%esi + andl 12(%esp),%edi + orl %esi,%ecx + orl %edi,%edx + addl %ecx,%eax + adcl %edx,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + movb (%ebp),%dl + subl $8,%esp + leal 8(%ebp),%ebp + cmpb $23,%dl + jne .L00416_79_x86 + movl 840(%esp),%esi + movl 844(%esp),%edi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + addl 8(%esp),%eax + adcl 12(%esp),%ebx + movl %eax,(%esi) + movl %ebx,4(%esi) + addl 16(%esp),%ecx + adcl 20(%esp),%edx + movl %ecx,8(%esi) + movl %edx,12(%esi) + movl 16(%esi),%eax + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edx + addl 24(%esp),%eax + adcl 28(%esp),%ebx + movl %eax,16(%esi) + movl %ebx,20(%esi) + addl 32(%esp),%ecx + adcl 36(%esp),%edx + movl %ecx,24(%esi) + movl %edx,28(%esi) + movl 32(%esi),%eax + movl 36(%esi),%ebx + movl 40(%esi),%ecx + movl 44(%esi),%edx + addl 40(%esp),%eax + adcl 44(%esp),%ebx + movl %eax,32(%esi) + movl %ebx,36(%esi) + addl 48(%esp),%ecx + adcl 52(%esp),%edx + movl %ecx,40(%esi) + movl %edx,44(%esi) + movl 48(%esi),%eax + movl 52(%esi),%ebx + movl 56(%esi),%ecx + movl 60(%esi),%edx + addl 56(%esp),%eax + adcl 60(%esp),%ebx + movl %eax,48(%esi) + movl %ebx,52(%esi) + addl 64(%esp),%ecx + adcl 68(%esp),%edx + movl %ecx,56(%esi) + movl %edx,60(%esi) + addl $840,%esp + subl $640,%ebp + cmpl 8(%esp),%edi + jb .L002loop_x86 + movl 12(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 64 +.L001K512: +.long 3609767458,1116352408 +.long 602891725,1899447441 +.long 3964484399,3049323471 +.long 2173295548,3921009573 +.long 4081628472,961987163 +.long 3053834265,1508970993 +.long 2937671579,2453635748 +.long 3664609560,2870763221 +.long 2734883394,3624381080 +.long 1164996542,310598401 +.long 1323610764,607225278 +.long 3590304994,1426881987 +.long 4068182383,1925078388 +.long 991336113,2162078206 +.long 633803317,2614888103 +.long 3479774868,3248222580 +.long 2666613458,3835390401 +.long 944711139,4022224774 +.long 2341262773,264347078 +.long 2007800933,604807628 +.long 1495990901,770255983 +.long 1856431235,1249150122 +.long 3175218132,1555081692 +.long 2198950837,1996064986 +.long 3999719339,2554220882 +.long 766784016,2821834349 +.long 2566594879,2952996808 +.long 3203337956,3210313671 +.long 1034457026,3336571891 +.long 2466948901,3584528711 +.long 3758326383,113926993 +.long 168717936,338241895 +.long 1188179964,666307205 +.long 1546045734,773529912 +.long 1522805485,1294757372 +.long 2643833823,1396182291 +.long 2343527390,1695183700 +.long 1014477480,1986661051 +.long 1206759142,2177026350 +.long 344077627,2456956037 +.long 1290863460,2730485921 +.long 3158454273,2820302411 +.long 3505952657,3259730800 +.long 106217008,3345764771 +.long 3606008344,3516065817 +.long 1432725776,3600352804 +.long 1467031594,4094571909 +.long 851169720,275423344 +.long 3100823752,430227734 +.long 1363258195,506948616 +.long 3750685593,659060556 +.long 3785050280,883997877 +.long 3318307427,958139571 +.long 3812723403,1322822218 +.long 2003034995,1537002063 +.long 3602036899,1747873779 +.long 1575990012,1955562222 +.long 1125592928,2024104815 +.long 2716904306,2227730452 +.long 442776044,2361852424 +.long 593698344,2428436474 +.long 3733110249,2756734187 +.long 2999351573,3204031479 +.long 3815920427,3329325298 +.long 3928383900,3391569614 +.long 566280711,3515267271 +.long 3454069534,3940187606 +.long 4000239992,4118630271 +.long 1914138554,116418474 +.long 2731055270,174292421 +.long 3203993006,289380356 +.long 320620315,460393269 +.long 587496836,685471733 +.long 1086792851,852142971 +.long 365543100,1017036298 +.long 2618297676,1126000580 +.long 3409855158,1288033470 +.long 4234509866,1501505948 +.long 987167468,1607167915 +.long 1246189591,1816402316 +.long 67438087,66051 +.long 202182159,134810123 +.size sha512_block_data_order,.-.L_sha512_block_data_order_begin +.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97 +.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 +.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +.byte 62,0 + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/elf/sha512-ssse3-x86_64.s b/lib/accelerated/x86/elf/sha512-ssse3-x86_64.s new file mode 100644 index 0000000000..ea1915ded0 --- /dev/null +++ b/lib/accelerated/x86/elf/sha512-ssse3-x86_64.s @@ -0,0 +1,2881 @@ +# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org> +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain copyright notices, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# * Neither the name of the Andy Polyakov nor the names of its +# copyright holder and contributors may be used to endorse or +# promote products derived from this software without specific +# prior written permission. +# +# ALTERNATIVELY, provided that this notice is retained in full, this +# product may be distributed under the terms of the GNU General Public +# License (GPL), in which case the provisions of the GPL apply INSTEAD OF +# those given above. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# *** This file is auto-generated *** +# +.text + + +.globl sha256_block_data_order +.type sha256_block_data_order,@function +.align 16 +sha256_block_data_order: + leaq _gnutls_x86_cpuid_s(%rip),%r11 + movl 0(%r11),%r9d + movl 4(%r11),%r10d + movl 8(%r11),%r11d + testl $512,%r10d + jnz .Lssse3_shortcut + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $64+32,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %r11,64+24(%rsp) +.Lprologue: + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + jmp .Lloop + +.align 16 +.Lloop: + movl %ebx,%edi + leaq K256(%rip),%rbp + xorl %ecx,%edi + movl 0(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,0(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + addl %r14d,%r11d + movl 4(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,4(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + addl %r14d,%r10d + movl 8(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,8(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + addl %r14d,%r9d + movl 12(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,12(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + addl %r14d,%r8d + movl 16(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,16(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + addl %r14d,%edx + movl 20(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,20(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + addl %r14d,%ecx + movl 24(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,24(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + addl %r14d,%ebx + movl 28(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,28(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + addl %r14d,%eax + movl 32(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,32(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + addl %r14d,%r11d + movl 36(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,36(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + addl %r14d,%r10d + movl 40(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,40(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + addl %r14d,%r9d + movl 44(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,44(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + addl %r14d,%r8d + movl 48(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,48(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + addl %r14d,%edx + movl 52(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,52(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + addl %r14d,%ecx + movl 56(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,56(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + addl %r14d,%ebx + movl 60(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,60(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + jmp .Lrounds_16_xx +.align 16 +.Lrounds_16_xx: + movl 4(%rsp),%r13d + movl 56(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%eax + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 36(%rsp),%r12d + + addl 0(%rsp),%r12d + movl %r8d,%r13d + addl %r15d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,0(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + movl 8(%rsp),%r13d + movl 60(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r11d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 40(%rsp),%r12d + + addl 4(%rsp),%r12d + movl %edx,%r13d + addl %edi,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,4(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + movl 12(%rsp),%r13d + movl 0(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r10d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 44(%rsp),%r12d + + addl 8(%rsp),%r12d + movl %ecx,%r13d + addl %r15d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,8(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + movl 16(%rsp),%r13d + movl 4(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r9d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 48(%rsp),%r12d + + addl 12(%rsp),%r12d + movl %ebx,%r13d + addl %edi,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,12(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + movl 20(%rsp),%r13d + movl 8(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r8d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 52(%rsp),%r12d + + addl 16(%rsp),%r12d + movl %eax,%r13d + addl %r15d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,16(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + movl 24(%rsp),%r13d + movl 12(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%edx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 56(%rsp),%r12d + + addl 20(%rsp),%r12d + movl %r11d,%r13d + addl %edi,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,20(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + movl 28(%rsp),%r13d + movl 16(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ecx + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 60(%rsp),%r12d + + addl 24(%rsp),%r12d + movl %r10d,%r13d + addl %r15d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,24(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + movl 32(%rsp),%r13d + movl 20(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ebx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 0(%rsp),%r12d + + addl 28(%rsp),%r12d + movl %r9d,%r13d + addl %edi,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,28(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + movl 36(%rsp),%r13d + movl 24(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%eax + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 4(%rsp),%r12d + + addl 32(%rsp),%r12d + movl %r8d,%r13d + addl %r15d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,32(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + movl 40(%rsp),%r13d + movl 28(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r11d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 8(%rsp),%r12d + + addl 36(%rsp),%r12d + movl %edx,%r13d + addl %edi,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,36(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + movl 44(%rsp),%r13d + movl 32(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r10d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 12(%rsp),%r12d + + addl 40(%rsp),%r12d + movl %ecx,%r13d + addl %r15d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,40(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + movl 48(%rsp),%r13d + movl 36(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r9d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 16(%rsp),%r12d + + addl 44(%rsp),%r12d + movl %ebx,%r13d + addl %edi,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,44(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + movl 52(%rsp),%r13d + movl 40(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r8d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 20(%rsp),%r12d + + addl 48(%rsp),%r12d + movl %eax,%r13d + addl %r15d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,48(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + movl 56(%rsp),%r13d + movl 44(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%edx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 24(%rsp),%r12d + + addl 52(%rsp),%r12d + movl %r11d,%r13d + addl %edi,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,52(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + movl 60(%rsp),%r13d + movl 48(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ecx + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 28(%rsp),%r12d + + addl 56(%rsp),%r12d + movl %r10d,%r13d + addl %r15d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,56(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + movl 0(%rsp),%r13d + movl 52(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ebx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 32(%rsp),%r12d + + addl 60(%rsp),%r12d + movl %r9d,%r13d + addl %edi,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,60(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + cmpb $0,3(%rbp) + jnz .Lrounds_16_xx + + movq 64+0(%rsp),%rdi + addl %r14d,%eax + leaq 64(%rsi),%rsi + + addl 0(%rdi),%eax + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb .Lloop + + movq 64+24(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue: + .byte 0xf3,0xc3 +.size sha256_block_data_order,.-sha256_block_data_order +.align 64 +.type K256,@object +K256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff +.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff +.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 +.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.type sha256_block_data_order_ssse3,@function +.align 64 +sha256_block_data_order_ssse3: +.Lssse3_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $96,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %r11,64+24(%rsp) +.Lprologue_ssse3: + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + + + jmp .Lloop_ssse3 +.align 16 +.Lloop_ssse3: + movdqa K256+512(%rip),%xmm7 + movdqu 0(%rsi),%xmm0 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm3 +.byte 102,15,56,0,199 + leaq K256(%rip),%rbp +.byte 102,15,56,0,207 + movdqa 0(%rbp),%xmm4 +.byte 102,15,56,0,215 + movdqa 32(%rbp),%xmm5 + paddd %xmm0,%xmm4 + movdqa 64(%rbp),%xmm6 +.byte 102,15,56,0,223 + movdqa 96(%rbp),%xmm7 + paddd %xmm1,%xmm5 + paddd %xmm2,%xmm6 + paddd %xmm3,%xmm7 + movdqa %xmm4,0(%rsp) + movl %eax,%r14d + movdqa %xmm5,16(%rsp) + movl %ebx,%edi + movdqa %xmm6,32(%rsp) + xorl %ecx,%edi + movdqa %xmm7,48(%rsp) + movl %r8d,%r13d + jmp .Lssse3_00_47 + +.align 16 +.Lssse3_00_47: + subq $-32*4,%rbp + rorl $14,%r13d + movdqa %xmm1,%xmm4 + movl %r14d,%eax + movl %r9d,%r12d + movdqa %xmm3,%xmm7 + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d +.byte 102,15,58,15,224,4 + andl %r8d,%r12d + xorl %r8d,%r13d +.byte 102,15,58,15,250,4 + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %ebx,%r15d + addl %r12d,%r11d + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + paddd %xmm7,%xmm0 + rorl $2,%r14d + addl %r11d,%edx + psrld $7,%xmm6 + addl %edi,%r11d + movl %edx,%r13d + pshufd $250,%xmm3,%xmm7 + addl %r11d,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%r11d + movl %r8d,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %r11d,%r14d + pxor %xmm5,%xmm4 + andl %edx,%r12d + xorl %edx,%r13d + pslld $11,%xmm5 + addl 4(%rsp),%r10d + movl %r11d,%edi + pxor %xmm6,%xmm4 + xorl %r9d,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %eax,%edi + addl %r12d,%r10d + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + psrld $10,%xmm7 + addl %r13d,%r10d + xorl %eax,%r15d + paddd %xmm4,%xmm0 + rorl $2,%r14d + addl %r10d,%ecx + psrlq $17,%xmm6 + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %ecx,%r13d + xorl %r8d,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + psrldq $8,%xmm7 + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + paddd %xmm7,%xmm0 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + pshufd $80,%xmm0,%xmm7 + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + movdqa %xmm7,%xmm6 + addl %edi,%r9d + movl %ebx,%r13d + psrld $10,%xmm7 + addl %r9d,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%r9d + movl %ecx,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + psrlq $2,%xmm6 + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + pxor %xmm6,%xmm7 + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %r10d,%edi + addl %r12d,%r8d + movdqa 0(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + paddd %xmm7,%xmm0 + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + paddd %xmm0,%xmm6 + movl %eax,%r13d + addl %r8d,%r14d + movdqa %xmm6,0(%rsp) + rorl $14,%r13d + movdqa %xmm2,%xmm4 + movl %r14d,%r8d + movl %ebx,%r12d + movdqa %xmm0,%xmm7 + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d +.byte 102,15,58,15,225,4 + andl %eax,%r12d + xorl %eax,%r13d +.byte 102,15,58,15,251,4 + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %r9d,%r15d + addl %r12d,%edx + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + paddd %xmm7,%xmm1 + rorl $2,%r14d + addl %edx,%r11d + psrld $7,%xmm6 + addl %edi,%edx + movl %r11d,%r13d + pshufd $250,%xmm0,%xmm7 + addl %edx,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%edx + movl %eax,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %edx,%r14d + pxor %xmm5,%xmm4 + andl %r11d,%r12d + xorl %r11d,%r13d + pslld $11,%xmm5 + addl 20(%rsp),%ecx + movl %edx,%edi + pxor %xmm6,%xmm4 + xorl %ebx,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %r8d,%edi + addl %r12d,%ecx + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + psrld $10,%xmm7 + addl %r13d,%ecx + xorl %r8d,%r15d + paddd %xmm4,%xmm1 + rorl $2,%r14d + addl %ecx,%r10d + psrlq $17,%xmm6 + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %r10d,%r13d + xorl %eax,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + psrldq $8,%xmm7 + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + paddd %xmm7,%xmm1 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + pshufd $80,%xmm1,%xmm7 + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + movdqa %xmm7,%xmm6 + addl %edi,%ebx + movl %r9d,%r13d + psrld $10,%xmm7 + addl %ebx,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%ebx + movl %r10d,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + psrlq $2,%xmm6 + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + pxor %xmm6,%xmm7 + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %ecx,%edi + addl %r12d,%eax + movdqa 32(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + paddd %xmm7,%xmm1 + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + paddd %xmm1,%xmm6 + movl %r8d,%r13d + addl %eax,%r14d + movdqa %xmm6,16(%rsp) + rorl $14,%r13d + movdqa %xmm3,%xmm4 + movl %r14d,%eax + movl %r9d,%r12d + movdqa %xmm1,%xmm7 + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d +.byte 102,15,58,15,226,4 + andl %r8d,%r12d + xorl %r8d,%r13d +.byte 102,15,58,15,248,4 + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %ebx,%r15d + addl %r12d,%r11d + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + paddd %xmm7,%xmm2 + rorl $2,%r14d + addl %r11d,%edx + psrld $7,%xmm6 + addl %edi,%r11d + movl %edx,%r13d + pshufd $250,%xmm1,%xmm7 + addl %r11d,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%r11d + movl %r8d,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %r11d,%r14d + pxor %xmm5,%xmm4 + andl %edx,%r12d + xorl %edx,%r13d + pslld $11,%xmm5 + addl 36(%rsp),%r10d + movl %r11d,%edi + pxor %xmm6,%xmm4 + xorl %r9d,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %eax,%edi + addl %r12d,%r10d + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + psrld $10,%xmm7 + addl %r13d,%r10d + xorl %eax,%r15d + paddd %xmm4,%xmm2 + rorl $2,%r14d + addl %r10d,%ecx + psrlq $17,%xmm6 + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %ecx,%r13d + xorl %r8d,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + psrldq $8,%xmm7 + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + paddd %xmm7,%xmm2 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + pshufd $80,%xmm2,%xmm7 + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + movdqa %xmm7,%xmm6 + addl %edi,%r9d + movl %ebx,%r13d + psrld $10,%xmm7 + addl %r9d,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%r9d + movl %ecx,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + psrlq $2,%xmm6 + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + pxor %xmm6,%xmm7 + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %r10d,%edi + addl %r12d,%r8d + movdqa 64(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + paddd %xmm7,%xmm2 + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + paddd %xmm2,%xmm6 + movl %eax,%r13d + addl %r8d,%r14d + movdqa %xmm6,32(%rsp) + rorl $14,%r13d + movdqa %xmm0,%xmm4 + movl %r14d,%r8d + movl %ebx,%r12d + movdqa %xmm2,%xmm7 + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d +.byte 102,15,58,15,227,4 + andl %eax,%r12d + xorl %eax,%r13d +.byte 102,15,58,15,249,4 + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %r9d,%r15d + addl %r12d,%edx + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + paddd %xmm7,%xmm3 + rorl $2,%r14d + addl %edx,%r11d + psrld $7,%xmm6 + addl %edi,%edx + movl %r11d,%r13d + pshufd $250,%xmm2,%xmm7 + addl %edx,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%edx + movl %eax,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %edx,%r14d + pxor %xmm5,%xmm4 + andl %r11d,%r12d + xorl %r11d,%r13d + pslld $11,%xmm5 + addl 52(%rsp),%ecx + movl %edx,%edi + pxor %xmm6,%xmm4 + xorl %ebx,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %r8d,%edi + addl %r12d,%ecx + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + psrld $10,%xmm7 + addl %r13d,%ecx + xorl %r8d,%r15d + paddd %xmm4,%xmm3 + rorl $2,%r14d + addl %ecx,%r10d + psrlq $17,%xmm6 + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %r10d,%r13d + xorl %eax,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + psrldq $8,%xmm7 + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + paddd %xmm7,%xmm3 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + pshufd $80,%xmm3,%xmm7 + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + movdqa %xmm7,%xmm6 + addl %edi,%ebx + movl %r9d,%r13d + psrld $10,%xmm7 + addl %ebx,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%ebx + movl %r10d,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + psrlq $2,%xmm6 + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + pxor %xmm6,%xmm7 + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %ecx,%edi + addl %r12d,%eax + movdqa 96(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + paddd %xmm7,%xmm3 + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + paddd %xmm3,%xmm6 + movl %r8d,%r13d + addl %eax,%r14d + movdqa %xmm6,48(%rsp) + cmpb $0,131(%rbp) + jne .Lssse3_00_47 + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + rorl $6,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + rorl $2,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 4(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + rorl $11,%r14d + xorl %eax,%edi + addl %r12d,%r10d + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + rorl $2,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + rorl $6,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + rorl $6,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + rorl $2,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 20(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + rorl $11,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + rorl $2,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + xorl %ecx,%edi + addl %r12d,%eax + rorl $6,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + rorl $6,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + rorl $2,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 36(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + rorl $11,%r14d + xorl %eax,%edi + addl %r12d,%r10d + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + rorl $2,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + rorl $6,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + rorl $6,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + rorl $2,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 52(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + rorl $11,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + rorl $2,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + xorl %ecx,%edi + addl %r12d,%eax + rorl $6,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + movq 64+0(%rsp),%rdi + movl %r14d,%eax + + addl 0(%rdi),%eax + leaq 64(%rsi),%rsi + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb .Lloop_ssse3 + + movq 64+24(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +.Lepilogue_ssse3: + .byte 0xf3,0xc3 +.size sha256_block_data_order_ssse3,.-sha256_block_data_order_ssse3 + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/files.mk b/lib/accelerated/x86/files.mk new file mode 100644 index 0000000000..c22d758387 --- /dev/null +++ b/lib/accelerated/x86/files.mk @@ -0,0 +1,6 @@ +X86_FILES_ELF=elf/aesni-x86.s elf/cpuid-x86.s elf/e_padlock-x86.s elf/sha1-ssse3-x86.s elf/sha256-ssse3-x86.s elf/sha512-ssse3-x86.s +X86_FILES_COFF=coff/aesni-x86.s coff/cpuid-x86.s coff/e_padlock-x86.s coff/sha1-ssse3-x86.s coff/sha256-ssse3-x86.s coff/sha512-ssse3-x86.s +X86_FILES_MACOSX=macosx/aesni-x86.s macosx/cpuid-x86.s macosx/e_padlock-x86.s macosx/sha1-ssse3-x86.s macosx/sha256-ssse3-x86.s macosx/sha512-ssse3-x86.s +X86_64_FILES_ELF=elf/aesni-x86_64.s elf/cpuid-x86_64.s elf/e_padlock-x86_64.s elf/ghash-x86_64.s elf/sha1-ssse3-x86_64.s elf/sha512-ssse3-x86_64.s +X86_64_FILES_COFF=coff/aesni-x86_64.s coff/cpuid-x86_64.s coff/e_padlock-x86_64.s coff/ghash-x86_64.s coff/sha1-ssse3-x86_64.s coff/sha512-ssse3-x86_64.s +X86_64_FILES_MACOSX=macosx/aesni-x86_64.s macosx/cpuid-x86_64.s macosx/e_padlock-x86_64.s macosx/ghash-x86_64.s macosx/sha1-ssse3-x86_64.s macosx/sha512-ssse3-x86_64.s diff --git a/lib/accelerated/x86/hmac-x86.c b/lib/accelerated/x86/hmac-x86.c new file mode 100644 index 0000000000..73b21cc25d --- /dev/null +++ b/lib/accelerated/x86/hmac-x86.c @@ -0,0 +1,300 @@ +/* + * Copyright (C) 2008, 2010-2012 Free Software Foundation, Inc. + * + * Author: Nikos Mavrogiannopoulos + * + * This file is part of GNUTLS. + * + * The GNUTLS library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/> + * + */ + +/* This file provides the backend hash/mac implementation for + * VIA Padlock hardware acceleration. + */ + +#include <gnutls_int.h> +#include <gnutls_hash_int.h> +#include <gnutls_errors.h> +#include <nettle/sha.h> +#include <nettle/hmac.h> +#include <nettle/macros.h> +#include <aes-x86.h> +#include <sha-x86.h> +#include <algorithms.h> + +#ifdef HAVE_LIBNETTLE + +typedef void (*update_func) (void *, unsigned, const uint8_t *); +typedef void (*digest_func) (void *, unsigned, uint8_t *); +typedef void (*set_key_func) (void *, unsigned, const uint8_t *); + +struct x86_hmac_ctx { + union { + struct hmac_sha1_ctx sha1; + struct hmac_sha224_ctx sha224; + struct hmac_sha256_ctx sha256; +#ifdef ENABLE_SHA512 + struct hmac_sha384_ctx sha384; + struct hmac_sha512_ctx sha512; +#endif + } ctx; + + void *ctx_ptr; + gnutls_mac_algorithm_t algo; + size_t length; + update_func update; + digest_func digest; + set_key_func setkey; +}; + +static void +x86_hmac_sha1_set_key(struct hmac_sha1_ctx *ctx, + unsigned key_length, const uint8_t * key) +{ + HMAC_SET_KEY(ctx, &x86_sha1, key_length, key); +} + +static void +x86_hmac_sha1_update(struct hmac_sha1_ctx *ctx, + unsigned length, const uint8_t * data) +{ + x86_sha1_update(&ctx->state, length, data); +} + +static void +x86_hmac_sha1_digest(struct hmac_sha1_ctx *ctx, + unsigned length, uint8_t * digest) +{ + HMAC_DIGEST(ctx, &x86_sha1, length, digest); +} + +static void +x86_hmac_sha256_set_key(struct hmac_sha256_ctx *ctx, + unsigned key_length, const uint8_t * key) +{ + HMAC_SET_KEY(ctx, &x86_sha256, key_length, key); +} + +static void +x86_hmac_sha256_update(struct hmac_sha256_ctx *ctx, + unsigned length, const uint8_t * data) +{ + x86_sha256_update(&ctx->state, length, data); +} + +static void +x86_hmac_sha256_digest(struct hmac_sha256_ctx *ctx, + unsigned length, uint8_t * digest) +{ + HMAC_DIGEST(ctx, &x86_sha256, length, digest); +} + +static void +x86_hmac_sha224_set_key(struct hmac_sha224_ctx *ctx, + unsigned key_length, const uint8_t * key) +{ + HMAC_SET_KEY(ctx, &x86_sha224, key_length, key); +} + +static void +x86_hmac_sha224_digest(struct hmac_sha224_ctx *ctx, + unsigned length, uint8_t * digest) +{ + HMAC_DIGEST(ctx, &x86_sha224, length, digest); +} + +#ifdef ENABLE_SHA512 +static void +x86_hmac_sha384_set_key(struct hmac_sha384_ctx *ctx, + unsigned key_length, const uint8_t * key) +{ + HMAC_SET_KEY(ctx, &x86_sha384, key_length, key); +} + +static void +x86_hmac_sha384_digest(struct hmac_sha384_ctx *ctx, + unsigned length, uint8_t * digest) +{ + HMAC_DIGEST(ctx, &x86_sha384, length, digest); +} + +static void +x86_hmac_sha512_set_key(struct hmac_sha512_ctx *ctx, + unsigned key_length, const uint8_t * key) +{ + HMAC_SET_KEY(ctx, &x86_sha512, key_length, key); +} + +static void +x86_hmac_sha512_update(struct hmac_sha512_ctx *ctx, + unsigned length, const uint8_t * data) +{ + x86_sha512_update(&ctx->state, length, data); +} + +static void +x86_hmac_sha512_digest(struct hmac_sha512_ctx *ctx, + unsigned length, uint8_t * digest) +{ + HMAC_DIGEST(ctx, &x86_sha512, length, digest); +} +#endif + +static int +_hmac_ctx_init(gnutls_mac_algorithm_t algo, struct x86_hmac_ctx *ctx) +{ + switch (algo) { + case GNUTLS_MAC_SHA1: + ctx->update = (update_func) x86_hmac_sha1_update; + ctx->digest = (digest_func) x86_hmac_sha1_digest; + ctx->setkey = (set_key_func) x86_hmac_sha1_set_key; + ctx->ctx_ptr = &ctx->ctx.sha1; + ctx->length = SHA1_DIGEST_SIZE; + break; + case GNUTLS_MAC_SHA224: + ctx->update = (update_func) x86_hmac_sha256_update; + ctx->digest = (digest_func) x86_hmac_sha224_digest; + ctx->setkey = (set_key_func) x86_hmac_sha224_set_key; + ctx->ctx_ptr = &ctx->ctx.sha224; + ctx->length = SHA224_DIGEST_SIZE; + break; + case GNUTLS_MAC_SHA256: + ctx->update = (update_func) x86_hmac_sha256_update; + ctx->digest = (digest_func) x86_hmac_sha256_digest; + ctx->setkey = (set_key_func) x86_hmac_sha256_set_key; + ctx->ctx_ptr = &ctx->ctx.sha256; + ctx->length = SHA256_DIGEST_SIZE; + break; +#ifdef ENABLE_SHA512 + case GNUTLS_MAC_SHA384: + ctx->update = (update_func) x86_hmac_sha512_update; + ctx->digest = (digest_func) x86_hmac_sha384_digest; + ctx->setkey = (set_key_func) x86_hmac_sha384_set_key; + ctx->ctx_ptr = &ctx->ctx.sha384; + ctx->length = SHA384_DIGEST_SIZE; + break; + case GNUTLS_MAC_SHA512: + ctx->update = (update_func) x86_hmac_sha512_update; + ctx->digest = (digest_func) x86_hmac_sha512_digest; + ctx->setkey = (set_key_func) x86_hmac_sha512_set_key; + ctx->ctx_ptr = &ctx->ctx.sha512; + ctx->length = SHA512_DIGEST_SIZE; + break; +#endif + default: + gnutls_assert(); + return GNUTLS_E_INVALID_REQUEST; + } + + return 0; +} + + +static int wrap_x86_hmac_init(gnutls_mac_algorithm_t algo, void **_ctx) +{ + struct x86_hmac_ctx *ctx; + int ret; + + ctx = gnutls_calloc(1, sizeof(struct x86_hmac_ctx)); + if (ctx == NULL) { + gnutls_assert(); + return GNUTLS_E_MEMORY_ERROR; + } + + ctx->algo = algo; + + ret = _hmac_ctx_init(algo, ctx); + if (ret < 0) + return gnutls_assert_val(ret); + + *_ctx = ctx; + + return 0; +} + +static int +wrap_x86_hmac_setkey(void *_ctx, const void *key, size_t keylen) +{ + struct x86_hmac_ctx *ctx = _ctx; + + ctx->setkey(ctx->ctx_ptr, keylen, key); + + return GNUTLS_E_SUCCESS; +} + +static int +wrap_x86_hmac_update(void *_ctx, const void *text, size_t textsize) +{ + struct x86_hmac_ctx *ctx = _ctx; + + ctx->update(ctx->ctx_ptr, textsize, text); + + return GNUTLS_E_SUCCESS; +} + +static int +wrap_x86_hmac_output(void *src_ctx, void *digest, size_t digestsize) +{ + struct x86_hmac_ctx *ctx; + ctx = src_ctx; + + if (digestsize < ctx->length) { + gnutls_assert(); + return GNUTLS_E_SHORT_MEMORY_BUFFER; + } + + ctx->digest(ctx->ctx_ptr, digestsize, digest); + + return 0; +} + +static void wrap_x86_hmac_deinit(void *hd) +{ + gnutls_free(hd); +} + +static int wrap_x86_hmac_fast(gnutls_mac_algorithm_t algo, + const void *nonce, size_t nonce_size, + const void *key, size_t key_size, + const void *text, size_t text_size, + void *digest) +{ + struct x86_hmac_ctx ctx; + int ret; + + ret = _hmac_ctx_init(algo, &ctx); + if (ret < 0) + return gnutls_assert_val(ret); + + ctx.setkey(&ctx, key_size, key); + ctx.update(&ctx, text_size, text); + ctx.digest(&ctx, ctx.length, digest); + + zeroize_temp_key(&ctx, sizeof(ctx)); + + return 0; +} + +const gnutls_crypto_mac_st hmac_sha_x86_struct = { + .init = wrap_x86_hmac_init, + .setkey = wrap_x86_hmac_setkey, + .setnonce = NULL, + .hash = wrap_x86_hmac_update, + .output = wrap_x86_hmac_output, + .deinit = wrap_x86_hmac_deinit, + .fast = wrap_x86_hmac_fast, +}; + +#endif /* HAVE_LIBNETTLE */ diff --git a/lib/accelerated/x86/macosx/appro-aes-x86-macosx.s b/lib/accelerated/x86/macosx/aesni-x86.s index 92313599bd..4cb2d98af9 100644 --- a/lib/accelerated/x86/macosx/appro-aes-x86-macosx.s +++ b/lib/accelerated/x86/macosx/aesni-x86.s @@ -1,4 +1,3 @@ -/* # Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org> # All rights reserved. # @@ -38,7 +37,6 @@ # # *** This file is auto-generated *** # -*/ .file "devel/perlasm/aesni-x86.s" .text .globl _aesni_encrypt @@ -2146,3 +2144,5 @@ L100dec_key_ret: .byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 .byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 .byte 115,108,46,111,114,103,62,0 + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/macosx/appro-aes-x86-64-macosx.s b/lib/accelerated/x86/macosx/aesni-x86_64.s index e2cfa17951..85f26e05d4 100644 --- a/lib/accelerated/x86/macosx/appro-aes-x86-64-macosx.s +++ b/lib/accelerated/x86/macosx/aesni-x86_64.s @@ -1,4 +1,3 @@ -/* # Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org> # All rights reserved. # @@ -38,7 +37,6 @@ # # *** This file is auto-generated *** # -*/ .text .globl _aesni_encrypt @@ -2973,3 +2971,5 @@ L$increment1: .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .p2align 6 + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/macosx/cpuid-x86-64-macosx.s b/lib/accelerated/x86/macosx/cpuid-x86-64-macosx.s deleted file mode 100644 index 26adc5b445..0000000000 --- a/lib/accelerated/x86/macosx/cpuid-x86-64-macosx.s +++ /dev/null @@ -1,75 +0,0 @@ -/* -# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org> -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# * Redistributions of source code must retain copyright notices, -# this list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials -# provided with the distribution. -# -# * Neither the name of the Andy Polyakov nor the names of its -# copyright holder and contributors may be used to endorse or -# promote products derived from this software without specific -# prior written permission. -# -# ALTERNATIVELY, provided that this notice is retained in full, this -# product may be distributed under the terms of the GNU General Public -# License (GPL), in which case the provisions of the GPL apply INSTEAD OF -# those given above. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# *** This file is auto-generated *** -# -*/ -.text -.globl _gnutls_cpuid - -.p2align 4 -_gnutls_cpuid: - pushq %rbp - movq %rsp,%rbp - pushq %rbx - movl %edi,-12(%rbp) - movq %rsi,-24(%rbp) - movq %rdx,-32(%rbp) - movq %rcx,-40(%rbp) - movq %r8,-48(%rbp) - movl -12(%rbp),%eax - movl %eax,-60(%rbp) - movl -60(%rbp),%eax - cpuid - movl %edx,-56(%rbp) - movl %ecx,%esi - movl %eax,-52(%rbp) - movq -24(%rbp),%rax - movl -52(%rbp),%edx - movl %edx,(%rax) - movq -32(%rbp),%rax - movl %ebx,(%rax) - movq -40(%rbp),%rax - movl %esi,(%rax) - movq -48(%rbp),%rax - movl -56(%rbp),%ecx - movl %ecx,(%rax) - popq %rbx - leave - .byte 0xf3,0xc3 - diff --git a/lib/accelerated/x86/macosx/cpuid-x86-macosx.s b/lib/accelerated/x86/macosx/cpuid-x86-macosx.s deleted file mode 100644 index 0078f3d06f..0000000000 --- a/lib/accelerated/x86/macosx/cpuid-x86-macosx.s +++ /dev/null @@ -1,87 +0,0 @@ -/* -# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org> -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# * Redistributions of source code must retain copyright notices, -# this list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials -# provided with the distribution. -# -# * Neither the name of the Andy Polyakov nor the names of its -# copyright holder and contributors may be used to endorse or -# promote products derived from this software without specific -# prior written permission. -# -# ALTERNATIVELY, provided that this notice is retained in full, this -# product may be distributed under the terms of the GNU General Public -# License (GPL), in which case the provisions of the GPL apply INSTEAD OF -# those given above. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# *** This file is auto-generated *** -# -*/ -.file "devel/perlasm/cpuid-x86.s" -.text -.globl _gnutls_cpuid -.align 4 -_gnutls_cpuid: -L_gnutls_cpuid_begin: - pushl %ebp - movl %esp,%ebp - subl $12,%esp - movl %ebx,(%esp) - movl 8(%ebp),%eax - movl %esi,4(%esp) - movl %edi,8(%esp) - pushl %ebx - .byte 0x0f,0xa2 - movl %ebx,%edi - popl %ebx - movl %edx,%esi - movl 12(%ebp),%edx - movl %eax,(%edx) - movl 16(%ebp),%eax - movl %edi,(%eax) - movl 20(%ebp),%eax - movl %ecx,(%eax) - movl 24(%ebp),%eax - movl %esi,(%eax) - movl (%esp),%ebx - movl 4(%esp),%esi - movl 8(%esp),%edi - movl %ebp,%esp - popl %ebp - ret -.globl _gnutls_have_cpuid -.align 4 -_gnutls_have_cpuid: -L_gnutls_have_cpuid_begin: - pushfl - popl %eax - orl $2097152,%eax - pushl %eax - popfl - pushfl - popl %eax - andl $2097152,%eax - ret -.byte 67,80,85,73,68,32,102,111,114,32,120,56,54,0 diff --git a/lib/accelerated/x86/macosx/cpuid-x86.s b/lib/accelerated/x86/macosx/cpuid-x86.s new file mode 100644 index 0000000000..978b232e74 --- /dev/null +++ b/lib/accelerated/x86/macosx/cpuid-x86.s @@ -0,0 +1,70 @@ +# +# Copyright (C) 2011-2012 Free Software Foundation, Inc. +# +# Author: Nikos Mavrogiannopoulos +# +# This file is part of GnuTLS. +# +# The GnuTLS is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public License +# as published by the Free Software Foundation; either version 2.1 of +# the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +# *** This file is auto-generated *** +# +.file "devel/perlasm/cpuid-x86.s" +.text +.globl _gnutls_cpuid +.align 4 +_gnutls_cpuid: +L_gnutls_cpuid_begin: + pushl %ebp + movl %esp,%ebp + subl $12,%esp + movl %ebx,(%esp) + movl 8(%ebp),%eax + movl %esi,4(%esp) + movl %edi,8(%esp) + pushl %ebx + .byte 0x0f,0xa2 + movl %ebx,%edi + popl %ebx + movl %edx,%esi + movl 12(%ebp),%edx + movl %eax,(%edx) + movl 16(%ebp),%eax + movl %edi,(%eax) + movl 20(%ebp),%eax + movl %ecx,(%eax) + movl 24(%ebp),%eax + movl %esi,(%eax) + movl (%esp),%ebx + movl 4(%esp),%esi + movl 8(%esp),%edi + movl %ebp,%esp + popl %ebp + ret +.globl _gnutls_have_cpuid +.align 4 +_gnutls_have_cpuid: +L_gnutls_have_cpuid_begin: + pushfl + popl %eax + orl $2097152,%eax + pushl %eax + popfl + pushfl + popl %eax + andl $2097152,%eax + ret +.byte 67,80,85,73,68,32,102,111,114,32,120,56,54,0 + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/macosx/cpuid-x86_64.s b/lib/accelerated/x86/macosx/cpuid-x86_64.s new file mode 100644 index 0000000000..cf8fea99a2 --- /dev/null +++ b/lib/accelerated/x86/macosx/cpuid-x86_64.s @@ -0,0 +1,58 @@ +# +# Copyright (C) 2011-2012 Free Software Foundation, Inc. +# +# Author: Nikos Mavrogiannopoulos +# +# This file is part of GnuTLS. +# +# The GnuTLS is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public License +# as published by the Free Software Foundation; either version 2.1 of +# the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +# *** This file is auto-generated *** +# +.text +.globl _gnutls_cpuid + +.p2align 4 +_gnutls_cpuid: + pushq %rbp + movq %rsp,%rbp + pushq %rbx + movl %edi,-12(%rbp) + movq %rsi,-24(%rbp) + movq %rdx,-32(%rbp) + movq %rcx,-40(%rbp) + movq %r8,-48(%rbp) + movl -12(%rbp),%eax + movl %eax,-60(%rbp) + movl -60(%rbp),%eax + cpuid + movl %edx,-56(%rbp) + movl %ecx,%esi + movl %eax,-52(%rbp) + movq -24(%rbp),%rax + movl -52(%rbp),%edx + movl %edx,(%rax) + movq -32(%rbp),%rax + movl %ebx,(%rax) + movq -40(%rbp),%rax + movl %esi,(%rax) + movq -48(%rbp),%rax + movl -56(%rbp),%ecx + movl %ecx,(%rax) + popq %rbx + leave + .byte 0xf3,0xc3 + + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/macosx/padlock-x86-macosx.s b/lib/accelerated/x86/macosx/e_padlock-x86.s index 1a2fa9246b..4bdadb99a1 100644 --- a/lib/accelerated/x86/macosx/padlock-x86-macosx.s +++ b/lib/accelerated/x86/macosx/e_padlock-x86.s @@ -1,4 +1,3 @@ -/* # Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org> # All rights reserved. # @@ -38,7 +37,6 @@ # # *** This file is auto-generated *** # -*/ .file "devel/perlasm/e_padlock-x86.s" .text .globl _padlock_capability @@ -1034,3 +1032,5 @@ L_padlock_sha512_blocks_begin: .align 2,0x90 Lpadlock_saved_context: .long 0 + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/macosx/padlock-x86-64-macosx.s b/lib/accelerated/x86/macosx/e_padlock-x86_64.s index 1327e82172..29723b3714 100644 --- a/lib/accelerated/x86/macosx/padlock-x86-64-macosx.s +++ b/lib/accelerated/x86/macosx/e_padlock-x86_64.s @@ -1,4 +1,3 @@ -/* # Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org> # All rights reserved. # @@ -38,7 +37,6 @@ # # *** This file is auto-generated *** # -*/ .text .globl _padlock_capability @@ -1065,3 +1063,5 @@ L$ctr32_abort: .p2align 3 L$padlock_saved_context: .quad 0 + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/macosx/appro-aes-gcm-x86-64-macosx.s b/lib/accelerated/x86/macosx/ghash-x86_64.s index eac88aeba1..2f5ac653ab 100644 --- a/lib/accelerated/x86/macosx/appro-aes-gcm-x86-64-macosx.s +++ b/lib/accelerated/x86/macosx/ghash-x86_64.s @@ -1,4 +1,3 @@ -/* # Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org> # All rights reserved. # @@ -38,7 +37,6 @@ # # *** This file is auto-generated *** # -*/ .text .globl _gcm_gmult_4bit @@ -1348,3 +1346,5 @@ L$rem_8bit: .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .p2align 6 + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/macosx/openssl-cpuid-x86.s b/lib/accelerated/x86/macosx/openssl-cpuid-x86.s new file mode 100644 index 0000000000..ba4f09c67b --- /dev/null +++ b/lib/accelerated/x86/macosx/openssl-cpuid-x86.s @@ -0,0 +1,399 @@ +# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org> +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain copyright notices, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# * Neither the name of the Andy Polyakov nor the names of its +# copyright holder and contributors may be used to endorse or +# promote products derived from this software without specific +# prior written permission. +# +# ALTERNATIVELY, provided that this notice is retained in full, this +# product may be distributed under the terms of the GNU General Public +# License (GPL), in which case the provisions of the GPL apply INSTEAD OF +# those given above. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# *** This file is auto-generated *** +# +.file "x86cpuid.s" +.text +.globl _OPENSSL_ia32_cpuid +.align 4 +_OPENSSL_ia32_cpuid: +L_OPENSSL_ia32_cpuid_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + xorl %edx,%edx + pushfl + popl %eax + movl %eax,%ecx + xorl $2097152,%eax + pushl %eax + popfl + pushfl + popl %eax + xorl %eax,%ecx + xorl %eax,%eax + btl $21,%ecx + jnc L000nocpuid + movl 20(%esp),%esi + movl %eax,8(%esi) + .byte 0x0f,0xa2 + movl %eax,%edi + xorl %eax,%eax + cmpl $1970169159,%ebx + setne %al + movl %eax,%ebp + cmpl $1231384169,%edx + setne %al + orl %eax,%ebp + cmpl $1818588270,%ecx + setne %al + orl %eax,%ebp + jz L001intel + cmpl $1752462657,%ebx + setne %al + movl %eax,%esi + cmpl $1769238117,%edx + setne %al + orl %eax,%esi + cmpl $1145913699,%ecx + setne %al + orl %eax,%esi + jnz L001intel + movl $2147483648,%eax + .byte 0x0f,0xa2 + cmpl $2147483649,%eax + jb L001intel + movl %eax,%esi + movl $2147483649,%eax + .byte 0x0f,0xa2 + orl %ecx,%ebp + andl $2049,%ebp + cmpl $2147483656,%esi + jb L001intel + movl $2147483656,%eax + .byte 0x0f,0xa2 + movzbl %cl,%esi + incl %esi + movl $1,%eax + xorl %ecx,%ecx + .byte 0x0f,0xa2 + btl $28,%edx + jnc L002generic + shrl $16,%ebx + andl $255,%ebx + cmpl %esi,%ebx + ja L002generic + andl $4026531839,%edx + jmp L002generic +L001intel: + cmpl $7,%edi + jb L003cacheinfo + movl 20(%esp),%esi + movl $7,%eax + xorl %ecx,%ecx + .byte 0x0f,0xa2 + movl %ebx,8(%esi) +L003cacheinfo: + cmpl $4,%edi + movl $-1,%edi + jb L004nocacheinfo + movl $4,%eax + movl $0,%ecx + .byte 0x0f,0xa2 + movl %eax,%edi + shrl $14,%edi + andl $4095,%edi +L004nocacheinfo: + movl $1,%eax + xorl %ecx,%ecx + .byte 0x0f,0xa2 + andl $3220176895,%edx + cmpl $0,%ebp + jne L005notintel + orl $1073741824,%edx + andb $15,%ah + cmpb $15,%ah + jne L005notintel + orl $1048576,%edx +L005notintel: + btl $28,%edx + jnc L002generic + andl $4026531839,%edx + cmpl $0,%edi + je L002generic + orl $268435456,%edx + shrl $16,%ebx + cmpb $1,%bl + ja L002generic + andl $4026531839,%edx +L002generic: + andl $2048,%ebp + andl $4294965247,%ecx + movl %edx,%esi + orl %ecx,%ebp + btl $27,%ecx + jnc L006clear_avx + xorl %ecx,%ecx +.byte 15,1,208 + andl $6,%eax + cmpl $6,%eax + je L007done + cmpl $2,%eax + je L006clear_avx +L008clear_xmm: + andl $4261412861,%ebp + andl $4278190079,%esi +L006clear_avx: + andl $4026525695,%ebp + movl 20(%esp),%edi + andl $4294967263,8(%edi) +L007done: + movl %esi,%eax + movl %ebp,%edx +L000nocpuid: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _OPENSSL_rdtsc +.align 4 +_OPENSSL_rdtsc: +L_OPENSSL_rdtsc_begin: + xorl %eax,%eax + xorl %edx,%edx + call L009PIC_me_up +L009PIC_me_up: + popl %ecx + movl L__gnutls_x86_cpuid_s$non_lazy_ptr-L009PIC_me_up(%ecx),%ecx + btl $4,(%ecx) + jnc L010notsc + .byte 0x0f,0x31 +L010notsc: + ret +.globl _OPENSSL_instrument_halt +.align 4 +_OPENSSL_instrument_halt: +L_OPENSSL_instrument_halt_begin: + call L011PIC_me_up +L011PIC_me_up: + popl %ecx + movl L__gnutls_x86_cpuid_s$non_lazy_ptr-L011PIC_me_up(%ecx),%ecx + btl $4,(%ecx) + jnc L012nohalt +.long 2421723150 + andl $3,%eax + jnz L012nohalt + pushfl + popl %eax + btl $9,%eax + jnc L012nohalt + .byte 0x0f,0x31 + pushl %edx + pushl %eax + hlt + .byte 0x0f,0x31 + subl (%esp),%eax + sbbl 4(%esp),%edx + addl $8,%esp + ret +L012nohalt: + xorl %eax,%eax + xorl %edx,%edx + ret +.globl _OPENSSL_far_spin +.align 4 +_OPENSSL_far_spin: +L_OPENSSL_far_spin_begin: + pushfl + popl %eax + btl $9,%eax + jnc L013nospin + movl 4(%esp),%eax + movl 8(%esp),%ecx +.long 2430111262 + xorl %eax,%eax + movl (%ecx),%edx + jmp L014spin +.align 4,0x90 +L014spin: + incl %eax + cmpl (%ecx),%edx + je L014spin +.long 529567888 + ret +L013nospin: + xorl %eax,%eax + xorl %edx,%edx + ret +.globl _OPENSSL_wipe_cpu +.align 4 +_OPENSSL_wipe_cpu: +L_OPENSSL_wipe_cpu_begin: + xorl %eax,%eax + xorl %edx,%edx + call L015PIC_me_up +L015PIC_me_up: + popl %ecx + movl L__gnutls_x86_cpuid_s$non_lazy_ptr-L015PIC_me_up(%ecx),%ecx + movl (%ecx),%ecx + btl $1,(%ecx) + jnc L016no_x87 +.long 4007259865,4007259865,4007259865,4007259865,2430851995 +L016no_x87: + leal 4(%esp),%eax + ret +.globl _OPENSSL_atomic_add +.align 4 +_OPENSSL_atomic_add: +L_OPENSSL_atomic_add_begin: + movl 4(%esp),%edx + movl 8(%esp),%ecx + pushl %ebx + nop + movl (%edx),%eax +L017spin: + leal (%eax,%ecx,1),%ebx + nop +.long 447811568 + jne L017spin + movl %ebx,%eax + popl %ebx + ret +.globl _OPENSSL_indirect_call +.align 4 +_OPENSSL_indirect_call: +L_OPENSSL_indirect_call_begin: + pushl %ebp + movl %esp,%ebp + subl $28,%esp + movl 12(%ebp),%ecx + movl %ecx,(%esp) + movl 16(%ebp),%edx + movl %edx,4(%esp) + movl 20(%ebp),%eax + movl %eax,8(%esp) + movl 24(%ebp),%eax + movl %eax,12(%esp) + movl 28(%ebp),%eax + movl %eax,16(%esp) + movl 32(%ebp),%eax + movl %eax,20(%esp) + movl 36(%ebp),%eax + movl %eax,24(%esp) + call *8(%ebp) + movl %ebp,%esp + popl %ebp + ret +.globl _OPENSSL_cleanse +.align 4 +_OPENSSL_cleanse: +L_OPENSSL_cleanse_begin: + movl 4(%esp),%edx + movl 8(%esp),%ecx + xorl %eax,%eax + cmpl $7,%ecx + jae L018lot + cmpl $0,%ecx + je L019ret +L020little: + movb %al,(%edx) + subl $1,%ecx + leal 1(%edx),%edx + jnz L020little +L019ret: + ret +.align 4,0x90 +L018lot: + testl $3,%edx + jz L021aligned + movb %al,(%edx) + leal -1(%ecx),%ecx + leal 1(%edx),%edx + jmp L018lot +L021aligned: + movl %eax,(%edx) + leal -4(%ecx),%ecx + testl $-4,%ecx + leal 4(%edx),%edx + jnz L021aligned + cmpl $0,%ecx + jne L020little + ret +.globl _OPENSSL_instrument_bus +.align 4 +_OPENSSL_instrument_bus: +L_OPENSSL_instrument_bus_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl $0,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _OPENSSL_instrument_bus2 +.align 4 +_OPENSSL_instrument_bus2: +L_OPENSSL_instrument_bus2_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl $0,%eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.globl _OPENSSL_ia32_rdrand +.align 4 +_OPENSSL_ia32_rdrand: +L_OPENSSL_ia32_rdrand_begin: + movl $8,%ecx +L022loop: +.byte 15,199,240 + jc L023break + loop L022loop +L023break: + cmpl $0,%eax + cmovel %ecx,%eax + ret +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L__gnutls_x86_cpuid_s$non_lazy_ptr: +.indirect_symbol __gnutls_x86_cpuid_s +.long 0 +.comm __gnutls_x86_cpuid_s,16,2 +.mod_init_func +.align 2 +.long _OPENSSL_cpuid_setup + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/macosx/openssl-cpuid-x86_64.s b/lib/accelerated/x86/macosx/openssl-cpuid-x86_64.s new file mode 100644 index 0000000000..8e12261242 --- /dev/null +++ b/lib/accelerated/x86/macosx/openssl-cpuid-x86_64.s @@ -0,0 +1,365 @@ +# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org> +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain copyright notices, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# * Neither the name of the Andy Polyakov nor the names of its +# copyright holder and contributors may be used to endorse or +# promote products derived from this software without specific +# prior written permission. +# +# ALTERNATIVELY, provided that this notice is retained in full, this +# product may be distributed under the terms of the GNU General Public +# License (GPL), in which case the provisions of the GPL apply INSTEAD OF +# those given above. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# *** This file is auto-generated *** +# + +.private_extern _OPENSSL_cpuid_setup +.mod_init_func + .p2align 3 + .quad _OPENSSL_cpuid_setup + +.private_extern __gnutls_x86_cpuid_s +.comm __gnutls_x86_cpuid_s,16,2 + +.text + +.globl _OPENSSL_atomic_add + +.p2align 4 +_OPENSSL_atomic_add: + movl (%rdi),%eax +L$spin: leaq (%rsi,%rax,1),%r8 +.byte 0xf0 + cmpxchgl %r8d,(%rdi) + jne L$spin + movl %r8d,%eax +.byte 0x48,0x98 + .byte 0xf3,0xc3 + + +.globl _OPENSSL_rdtsc + +.p2align 4 +_OPENSSL_rdtsc: + rdtsc + shlq $32,%rdx + orq %rdx,%rax + .byte 0xf3,0xc3 + + +.globl _OPENSSL_ia32_cpuid + +.p2align 4 +_OPENSSL_ia32_cpuid: + movq %rbx,%r8 + + xorl %eax,%eax + movl %eax,8(%rdi) + cpuid + movl %eax,%r11d + + xorl %eax,%eax + cmpl $1970169159,%ebx + setne %al + movl %eax,%r9d + cmpl $1231384169,%edx + setne %al + orl %eax,%r9d + cmpl $1818588270,%ecx + setne %al + orl %eax,%r9d + jz L$intel + + cmpl $1752462657,%ebx + setne %al + movl %eax,%r10d + cmpl $1769238117,%edx + setne %al + orl %eax,%r10d + cmpl $1145913699,%ecx + setne %al + orl %eax,%r10d + jnz L$intel + + + movl $2147483648,%eax + cpuid + cmpl $2147483649,%eax + jb L$intel + movl %eax,%r10d + movl $2147483649,%eax + cpuid + orl %ecx,%r9d + andl $2049,%r9d + + cmpl $2147483656,%r10d + jb L$intel + + movl $2147483656,%eax + cpuid + movzbq %cl,%r10 + incq %r10 + + movl $1,%eax + cpuid + btl $28,%edx + jnc L$generic + shrl $16,%ebx + cmpb %r10b,%bl + ja L$generic + andl $4026531839,%edx + jmp L$generic + +L$intel: + cmpl $4,%r11d + movl $-1,%r10d + jb L$nocacheinfo + + movl $4,%eax + movl $0,%ecx + cpuid + movl %eax,%r10d + shrl $14,%r10d + andl $4095,%r10d + + cmpl $7,%r11d + jb L$nocacheinfo + + movl $7,%eax + xorl %ecx,%ecx + cpuid + movl %ebx,8(%rdi) + +L$nocacheinfo: + movl $1,%eax + cpuid + andl $3220176895,%edx + cmpl $0,%r9d + jne L$notintel + orl $1073741824,%edx + andb $15,%ah + cmpb $15,%ah + jne L$notintel + orl $1048576,%edx +L$notintel: + btl $28,%edx + jnc L$generic + andl $4026531839,%edx + cmpl $0,%r10d + je L$generic + + orl $268435456,%edx + shrl $16,%ebx + cmpb $1,%bl + ja L$generic + andl $4026531839,%edx +L$generic: + andl $2048,%r9d + andl $4294965247,%ecx + orl %ecx,%r9d + + movl %edx,%r10d + btl $27,%r9d + jnc L$clear_avx + xorl %ecx,%ecx +.byte 0x0f,0x01,0xd0 + andl $6,%eax + cmpl $6,%eax + je L$done +L$clear_avx: + movl $4026525695,%eax + andl %eax,%r9d + andl $4294967263,8(%rdi) +L$done: + shlq $32,%r9 + movl %r10d,%eax + movq %r8,%rbx + orq %r9,%rax + .byte 0xf3,0xc3 + + +.globl _OPENSSL_cleanse + +.p2align 4 +_OPENSSL_cleanse: + xorq %rax,%rax + cmpq $15,%rsi + jae L$ot + cmpq $0,%rsi + je L$ret +L$ittle: + movb %al,(%rdi) + subq $1,%rsi + leaq 1(%rdi),%rdi + jnz L$ittle +L$ret: + .byte 0xf3,0xc3 +.p2align 4 +L$ot: + testq $7,%rdi + jz L$aligned + movb %al,(%rdi) + leaq -1(%rsi),%rsi + leaq 1(%rdi),%rdi + jmp L$ot +L$aligned: + movq %rax,(%rdi) + leaq -8(%rsi),%rsi + testq $-8,%rsi + leaq 8(%rdi),%rdi + jnz L$aligned + cmpq $0,%rsi + jne L$ittle + .byte 0xf3,0xc3 + +.globl _OPENSSL_wipe_cpu + +.p2align 4 +_OPENSSL_wipe_cpu: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + pxor %xmm8,%xmm8 + pxor %xmm9,%xmm9 + pxor %xmm10,%xmm10 + pxor %xmm11,%xmm11 + pxor %xmm12,%xmm12 + pxor %xmm13,%xmm13 + pxor %xmm14,%xmm14 + pxor %xmm15,%xmm15 + xorq %rcx,%rcx + xorq %rdx,%rdx + xorq %rsi,%rsi + xorq %rdi,%rdi + xorq %r8,%r8 + xorq %r9,%r9 + xorq %r10,%r10 + xorq %r11,%r11 + leaq 8(%rsp),%rax + .byte 0xf3,0xc3 + +.globl _OPENSSL_instrument_bus + +.p2align 4 +_OPENSSL_instrument_bus: + movq %rdi,%r10 + movq %rsi,%rcx + movq %rsi,%r11 + + rdtsc + movl %eax,%r8d + movl $0,%r9d + clflush (%r10) +.byte 0xf0 + addl %r9d,(%r10) + jmp L$oop +.p2align 4 +L$oop: rdtsc + movl %eax,%edx + subl %r8d,%eax + movl %edx,%r8d + movl %eax,%r9d + clflush (%r10) +.byte 0xf0 + addl %eax,(%r10) + leaq 4(%r10),%r10 + subq $1,%rcx + jnz L$oop + + movq %r11,%rax + .byte 0xf3,0xc3 + + +.globl _OPENSSL_instrument_bus2 + +.p2align 4 +_OPENSSL_instrument_bus2: + movq %rdi,%r10 + movq %rsi,%rcx + movq %rdx,%r11 + movq %rcx,8(%rsp) + + rdtsc + movl %eax,%r8d + movl $0,%r9d + + clflush (%r10) +.byte 0xf0 + addl %r9d,(%r10) + + rdtsc + movl %eax,%edx + subl %r8d,%eax + movl %edx,%r8d + movl %eax,%r9d +L$oop2: + clflush (%r10) +.byte 0xf0 + addl %eax,(%r10) + + subq $1,%r11 + jz L$done2 + + rdtsc + movl %eax,%edx + subl %r8d,%eax + movl %edx,%r8d + cmpl %r9d,%eax + movl %eax,%r9d + movl $0,%edx + setne %dl + subq %rdx,%rcx + leaq (%r10,%rdx,4),%r10 + jnz L$oop2 + +L$done2: + movq 8(%rsp),%rax + subq %rcx,%rax + .byte 0xf3,0xc3 + +.globl _OPENSSL_ia32_rdrand + +.p2align 4 +_OPENSSL_ia32_rdrand: + movl $8,%ecx +L$oop_rdrand: +.byte 72,15,199,240 + jc L$break_rdrand + loop L$oop_rdrand +L$break_rdrand: + cmpq $0,%rax + cmoveq %rcx,%rax + .byte 0xf3,0xc3 + + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/macosx/sha1-ssse3-x86.s b/lib/accelerated/x86/macosx/sha1-ssse3-x86.s new file mode 100644 index 0000000000..0e0c719af2 --- /dev/null +++ b/lib/accelerated/x86/macosx/sha1-ssse3-x86.s @@ -0,0 +1,1419 @@ +# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org> +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain copyright notices, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# * Neither the name of the Andy Polyakov nor the names of its +# copyright holder and contributors may be used to endorse or +# promote products derived from this software without specific +# prior written permission. +# +# ALTERNATIVELY, provided that this notice is retained in full, this +# product may be distributed under the terms of the GNU General Public +# License (GPL), in which case the provisions of the GPL apply INSTEAD OF +# those given above. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# *** This file is auto-generated *** +# +.file "sha1-586.s" +.text +.globl _sha1_block_data_order +.align 4 +_sha1_block_data_order: +L_sha1_block_data_order_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%ebp + movl 24(%esp),%esi + movl 28(%esp),%eax + subl $76,%esp + shll $6,%eax + addl %esi,%eax + movl %eax,104(%esp) + movl 16(%ebp),%edi + jmp L000loop +.align 4,0x90 +L000loop: + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,(%esp) + movl %ebx,4(%esp) + movl %ecx,8(%esp) + movl %edx,12(%esp) + movl 16(%esi),%eax + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,16(%esp) + movl %ebx,20(%esp) + movl %ecx,24(%esp) + movl %edx,28(%esp) + movl 32(%esi),%eax + movl 36(%esi),%ebx + movl 40(%esi),%ecx + movl 44(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,40(%esp) + movl %edx,44(%esp) + movl 48(%esi),%eax + movl 52(%esi),%ebx + movl 56(%esi),%ecx + movl 60(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax,48(%esp) + movl %ebx,52(%esp) + movl %ecx,56(%esp) + movl %edx,60(%esp) + movl %esi,100(%esp) + movl (%ebp),%eax + movl 4(%ebp),%ebx + movl 8(%ebp),%ecx + movl 12(%ebp),%edx + # 00_15 0 + movl %ecx,%esi + movl %eax,%ebp + roll $5,%ebp + xorl %edx,%esi + addl %edi,%ebp + movl (%esp),%edi + andl %ebx,%esi + rorl $2,%ebx + xorl %edx,%esi + leal 1518500249(%ebp,%edi,1),%ebp + addl %esi,%ebp + # 00_15 1 + movl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ecx,%edi + addl %edx,%ebp + movl 4(%esp),%edx + andl %eax,%edi + rorl $2,%eax + xorl %ecx,%edi + leal 1518500249(%ebp,%edx,1),%ebp + addl %edi,%ebp + # 00_15 2 + movl %eax,%edx + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%edx + addl %ecx,%ebp + movl 8(%esp),%ecx + andl %esi,%edx + rorl $2,%esi + xorl %ebx,%edx + leal 1518500249(%ebp,%ecx,1),%ebp + addl %edx,%ebp + # 00_15 3 + movl %esi,%ecx + movl %ebp,%edx + roll $5,%ebp + xorl %eax,%ecx + addl %ebx,%ebp + movl 12(%esp),%ebx + andl %edi,%ecx + rorl $2,%edi + xorl %eax,%ecx + leal 1518500249(%ebp,%ebx,1),%ebp + addl %ecx,%ebp + # 00_15 4 + movl %edi,%ebx + movl %ebp,%ecx + roll $5,%ebp + xorl %esi,%ebx + addl %eax,%ebp + movl 16(%esp),%eax + andl %edx,%ebx + rorl $2,%edx + xorl %esi,%ebx + leal 1518500249(%ebp,%eax,1),%ebp + addl %ebx,%ebp + # 00_15 5 + movl %edx,%eax + movl %ebp,%ebx + roll $5,%ebp + xorl %edi,%eax + addl %esi,%ebp + movl 20(%esp),%esi + andl %ecx,%eax + rorl $2,%ecx + xorl %edi,%eax + leal 1518500249(%ebp,%esi,1),%ebp + addl %eax,%ebp + # 00_15 6 + movl %ecx,%esi + movl %ebp,%eax + roll $5,%ebp + xorl %edx,%esi + addl %edi,%ebp + movl 24(%esp),%edi + andl %ebx,%esi + rorl $2,%ebx + xorl %edx,%esi + leal 1518500249(%ebp,%edi,1),%ebp + addl %esi,%ebp + # 00_15 7 + movl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ecx,%edi + addl %edx,%ebp + movl 28(%esp),%edx + andl %eax,%edi + rorl $2,%eax + xorl %ecx,%edi + leal 1518500249(%ebp,%edx,1),%ebp + addl %edi,%ebp + # 00_15 8 + movl %eax,%edx + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%edx + addl %ecx,%ebp + movl 32(%esp),%ecx + andl %esi,%edx + rorl $2,%esi + xorl %ebx,%edx + leal 1518500249(%ebp,%ecx,1),%ebp + addl %edx,%ebp + # 00_15 9 + movl %esi,%ecx + movl %ebp,%edx + roll $5,%ebp + xorl %eax,%ecx + addl %ebx,%ebp + movl 36(%esp),%ebx + andl %edi,%ecx + rorl $2,%edi + xorl %eax,%ecx + leal 1518500249(%ebp,%ebx,1),%ebp + addl %ecx,%ebp + # 00_15 10 + movl %edi,%ebx + movl %ebp,%ecx + roll $5,%ebp + xorl %esi,%ebx + addl %eax,%ebp + movl 40(%esp),%eax + andl %edx,%ebx + rorl $2,%edx + xorl %esi,%ebx + leal 1518500249(%ebp,%eax,1),%ebp + addl %ebx,%ebp + # 00_15 11 + movl %edx,%eax + movl %ebp,%ebx + roll $5,%ebp + xorl %edi,%eax + addl %esi,%ebp + movl 44(%esp),%esi + andl %ecx,%eax + rorl $2,%ecx + xorl %edi,%eax + leal 1518500249(%ebp,%esi,1),%ebp + addl %eax,%ebp + # 00_15 12 + movl %ecx,%esi + movl %ebp,%eax + roll $5,%ebp + xorl %edx,%esi + addl %edi,%ebp + movl 48(%esp),%edi + andl %ebx,%esi + rorl $2,%ebx + xorl %edx,%esi + leal 1518500249(%ebp,%edi,1),%ebp + addl %esi,%ebp + # 00_15 13 + movl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + xorl %ecx,%edi + addl %edx,%ebp + movl 52(%esp),%edx + andl %eax,%edi + rorl $2,%eax + xorl %ecx,%edi + leal 1518500249(%ebp,%edx,1),%ebp + addl %edi,%ebp + # 00_15 14 + movl %eax,%edx + movl %ebp,%edi + roll $5,%ebp + xorl %ebx,%edx + addl %ecx,%ebp + movl 56(%esp),%ecx + andl %esi,%edx + rorl $2,%esi + xorl %ebx,%edx + leal 1518500249(%ebp,%ecx,1),%ebp + addl %edx,%ebp + # 00_15 15 + movl %esi,%ecx + movl %ebp,%edx + roll $5,%ebp + xorl %eax,%ecx + addl %ebx,%ebp + movl 60(%esp),%ebx + andl %edi,%ecx + rorl $2,%edi + xorl %eax,%ecx + leal 1518500249(%ebp,%ebx,1),%ebp + movl (%esp),%ebx + addl %ebp,%ecx + # 16_19 16 + movl %edi,%ebp + xorl 8(%esp),%ebx + xorl %esi,%ebp + xorl 32(%esp),%ebx + andl %edx,%ebp + xorl 52(%esp),%ebx + roll $1,%ebx + xorl %esi,%ebp + addl %ebp,%eax + movl %ecx,%ebp + rorl $2,%edx + movl %ebx,(%esp) + roll $5,%ebp + leal 1518500249(%ebx,%eax,1),%ebx + movl 4(%esp),%eax + addl %ebp,%ebx + # 16_19 17 + movl %edx,%ebp + xorl 12(%esp),%eax + xorl %edi,%ebp + xorl 36(%esp),%eax + andl %ecx,%ebp + xorl 56(%esp),%eax + roll $1,%eax + xorl %edi,%ebp + addl %ebp,%esi + movl %ebx,%ebp + rorl $2,%ecx + movl %eax,4(%esp) + roll $5,%ebp + leal 1518500249(%eax,%esi,1),%eax + movl 8(%esp),%esi + addl %ebp,%eax + # 16_19 18 + movl %ecx,%ebp + xorl 16(%esp),%esi + xorl %edx,%ebp + xorl 40(%esp),%esi + andl %ebx,%ebp + xorl 60(%esp),%esi + roll $1,%esi + xorl %edx,%ebp + addl %ebp,%edi + movl %eax,%ebp + rorl $2,%ebx + movl %esi,8(%esp) + roll $5,%ebp + leal 1518500249(%esi,%edi,1),%esi + movl 12(%esp),%edi + addl %ebp,%esi + # 16_19 19 + movl %ebx,%ebp + xorl 20(%esp),%edi + xorl %ecx,%ebp + xorl 44(%esp),%edi + andl %eax,%ebp + xorl (%esp),%edi + roll $1,%edi + xorl %ecx,%ebp + addl %ebp,%edx + movl %esi,%ebp + rorl $2,%eax + movl %edi,12(%esp) + roll $5,%ebp + leal 1518500249(%edi,%edx,1),%edi + movl 16(%esp),%edx + addl %ebp,%edi + # 20_39 20 + movl %esi,%ebp + xorl 24(%esp),%edx + xorl %eax,%ebp + xorl 48(%esp),%edx + xorl %ebx,%ebp + xorl 4(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,16(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 20(%esp),%ecx + addl %ebp,%edx + # 20_39 21 + movl %edi,%ebp + xorl 28(%esp),%ecx + xorl %esi,%ebp + xorl 52(%esp),%ecx + xorl %eax,%ebp + xorl 8(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,20(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 24(%esp),%ebx + addl %ebp,%ecx + # 20_39 22 + movl %edx,%ebp + xorl 32(%esp),%ebx + xorl %edi,%ebp + xorl 56(%esp),%ebx + xorl %esi,%ebp + xorl 12(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,24(%esp) + leal 1859775393(%ebx,%eax,1),%ebx + movl 28(%esp),%eax + addl %ebp,%ebx + # 20_39 23 + movl %ecx,%ebp + xorl 36(%esp),%eax + xorl %edx,%ebp + xorl 60(%esp),%eax + xorl %edi,%ebp + xorl 16(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,28(%esp) + leal 1859775393(%eax,%esi,1),%eax + movl 32(%esp),%esi + addl %ebp,%eax + # 20_39 24 + movl %ebx,%ebp + xorl 40(%esp),%esi + xorl %ecx,%ebp + xorl (%esp),%esi + xorl %edx,%ebp + xorl 20(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,32(%esp) + leal 1859775393(%esi,%edi,1),%esi + movl 36(%esp),%edi + addl %ebp,%esi + # 20_39 25 + movl %eax,%ebp + xorl 44(%esp),%edi + xorl %ebx,%ebp + xorl 4(%esp),%edi + xorl %ecx,%ebp + xorl 24(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,36(%esp) + leal 1859775393(%edi,%edx,1),%edi + movl 40(%esp),%edx + addl %ebp,%edi + # 20_39 26 + movl %esi,%ebp + xorl 48(%esp),%edx + xorl %eax,%ebp + xorl 8(%esp),%edx + xorl %ebx,%ebp + xorl 28(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,40(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 44(%esp),%ecx + addl %ebp,%edx + # 20_39 27 + movl %edi,%ebp + xorl 52(%esp),%ecx + xorl %esi,%ebp + xorl 12(%esp),%ecx + xorl %eax,%ebp + xorl 32(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,44(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 48(%esp),%ebx + addl %ebp,%ecx + # 20_39 28 + movl %edx,%ebp + xorl 56(%esp),%ebx + xorl %edi,%ebp + xorl 16(%esp),%ebx + xorl %esi,%ebp + xorl 36(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,48(%esp) + leal 1859775393(%ebx,%eax,1),%ebx + movl 52(%esp),%eax + addl %ebp,%ebx + # 20_39 29 + movl %ecx,%ebp + xorl 60(%esp),%eax + xorl %edx,%ebp + xorl 20(%esp),%eax + xorl %edi,%ebp + xorl 40(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,52(%esp) + leal 1859775393(%eax,%esi,1),%eax + movl 56(%esp),%esi + addl %ebp,%eax + # 20_39 30 + movl %ebx,%ebp + xorl (%esp),%esi + xorl %ecx,%ebp + xorl 24(%esp),%esi + xorl %edx,%ebp + xorl 44(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,56(%esp) + leal 1859775393(%esi,%edi,1),%esi + movl 60(%esp),%edi + addl %ebp,%esi + # 20_39 31 + movl %eax,%ebp + xorl 4(%esp),%edi + xorl %ebx,%ebp + xorl 28(%esp),%edi + xorl %ecx,%ebp + xorl 48(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,60(%esp) + leal 1859775393(%edi,%edx,1),%edi + movl (%esp),%edx + addl %ebp,%edi + # 20_39 32 + movl %esi,%ebp + xorl 8(%esp),%edx + xorl %eax,%ebp + xorl 32(%esp),%edx + xorl %ebx,%ebp + xorl 52(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 4(%esp),%ecx + addl %ebp,%edx + # 20_39 33 + movl %edi,%ebp + xorl 12(%esp),%ecx + xorl %esi,%ebp + xorl 36(%esp),%ecx + xorl %eax,%ebp + xorl 56(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,4(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 8(%esp),%ebx + addl %ebp,%ecx + # 20_39 34 + movl %edx,%ebp + xorl 16(%esp),%ebx + xorl %edi,%ebp + xorl 40(%esp),%ebx + xorl %esi,%ebp + xorl 60(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,8(%esp) + leal 1859775393(%ebx,%eax,1),%ebx + movl 12(%esp),%eax + addl %ebp,%ebx + # 20_39 35 + movl %ecx,%ebp + xorl 20(%esp),%eax + xorl %edx,%ebp + xorl 44(%esp),%eax + xorl %edi,%ebp + xorl (%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,12(%esp) + leal 1859775393(%eax,%esi,1),%eax + movl 16(%esp),%esi + addl %ebp,%eax + # 20_39 36 + movl %ebx,%ebp + xorl 24(%esp),%esi + xorl %ecx,%ebp + xorl 48(%esp),%esi + xorl %edx,%ebp + xorl 4(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,16(%esp) + leal 1859775393(%esi,%edi,1),%esi + movl 20(%esp),%edi + addl %ebp,%esi + # 20_39 37 + movl %eax,%ebp + xorl 28(%esp),%edi + xorl %ebx,%ebp + xorl 52(%esp),%edi + xorl %ecx,%ebp + xorl 8(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,20(%esp) + leal 1859775393(%edi,%edx,1),%edi + movl 24(%esp),%edx + addl %ebp,%edi + # 20_39 38 + movl %esi,%ebp + xorl 32(%esp),%edx + xorl %eax,%ebp + xorl 56(%esp),%edx + xorl %ebx,%ebp + xorl 12(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,24(%esp) + leal 1859775393(%edx,%ecx,1),%edx + movl 28(%esp),%ecx + addl %ebp,%edx + # 20_39 39 + movl %edi,%ebp + xorl 36(%esp),%ecx + xorl %esi,%ebp + xorl 60(%esp),%ecx + xorl %eax,%ebp + xorl 16(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,28(%esp) + leal 1859775393(%ecx,%ebx,1),%ecx + movl 32(%esp),%ebx + addl %ebp,%ecx + # 40_59 40 + movl %edi,%ebp + xorl 40(%esp),%ebx + xorl %esi,%ebp + xorl (%esp),%ebx + andl %edx,%ebp + xorl 20(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,32(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 36(%esp),%eax + addl %ebp,%ebx + # 40_59 41 + movl %edx,%ebp + xorl 44(%esp),%eax + xorl %edi,%ebp + xorl 4(%esp),%eax + andl %ecx,%ebp + xorl 24(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,36(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl 40(%esp),%esi + addl %ebp,%eax + # 40_59 42 + movl %ecx,%ebp + xorl 48(%esp),%esi + xorl %edx,%ebp + xorl 8(%esp),%esi + andl %ebx,%ebp + xorl 28(%esp),%esi + roll $1,%esi + addl %edi,%ebp + rorl $2,%ebx + movl %eax,%edi + roll $5,%edi + movl %esi,40(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp + addl %edi,%esi + andl %edx,%ebp + movl 44(%esp),%edi + addl %ebp,%esi + # 40_59 43 + movl %ebx,%ebp + xorl 52(%esp),%edi + xorl %ecx,%ebp + xorl 12(%esp),%edi + andl %eax,%ebp + xorl 32(%esp),%edi + roll $1,%edi + addl %edx,%ebp + rorl $2,%eax + movl %esi,%edx + roll $5,%edx + movl %edi,44(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp + addl %edx,%edi + andl %ecx,%ebp + movl 48(%esp),%edx + addl %ebp,%edi + # 40_59 44 + movl %eax,%ebp + xorl 56(%esp),%edx + xorl %ebx,%ebp + xorl 16(%esp),%edx + andl %esi,%ebp + xorl 36(%esp),%edx + roll $1,%edx + addl %ecx,%ebp + rorl $2,%esi + movl %edi,%ecx + roll $5,%ecx + movl %edx,48(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp + addl %ecx,%edx + andl %ebx,%ebp + movl 52(%esp),%ecx + addl %ebp,%edx + # 40_59 45 + movl %esi,%ebp + xorl 60(%esp),%ecx + xorl %eax,%ebp + xorl 20(%esp),%ecx + andl %edi,%ebp + xorl 40(%esp),%ecx + roll $1,%ecx + addl %ebx,%ebp + rorl $2,%edi + movl %edx,%ebx + roll $5,%ebx + movl %ecx,52(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp + addl %ebx,%ecx + andl %eax,%ebp + movl 56(%esp),%ebx + addl %ebp,%ecx + # 40_59 46 + movl %edi,%ebp + xorl (%esp),%ebx + xorl %esi,%ebp + xorl 24(%esp),%ebx + andl %edx,%ebp + xorl 44(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,56(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 60(%esp),%eax + addl %ebp,%ebx + # 40_59 47 + movl %edx,%ebp + xorl 4(%esp),%eax + xorl %edi,%ebp + xorl 28(%esp),%eax + andl %ecx,%ebp + xorl 48(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,60(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl (%esp),%esi + addl %ebp,%eax + # 40_59 48 + movl %ecx,%ebp + xorl 8(%esp),%esi + xorl %edx,%ebp + xorl 32(%esp),%esi + andl %ebx,%ebp + xorl 52(%esp),%esi + roll $1,%esi + addl %edi,%ebp + rorl $2,%ebx + movl %eax,%edi + roll $5,%edi + movl %esi,(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp + addl %edi,%esi + andl %edx,%ebp + movl 4(%esp),%edi + addl %ebp,%esi + # 40_59 49 + movl %ebx,%ebp + xorl 12(%esp),%edi + xorl %ecx,%ebp + xorl 36(%esp),%edi + andl %eax,%ebp + xorl 56(%esp),%edi + roll $1,%edi + addl %edx,%ebp + rorl $2,%eax + movl %esi,%edx + roll $5,%edx + movl %edi,4(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp + addl %edx,%edi + andl %ecx,%ebp + movl 8(%esp),%edx + addl %ebp,%edi + # 40_59 50 + movl %eax,%ebp + xorl 16(%esp),%edx + xorl %ebx,%ebp + xorl 40(%esp),%edx + andl %esi,%ebp + xorl 60(%esp),%edx + roll $1,%edx + addl %ecx,%ebp + rorl $2,%esi + movl %edi,%ecx + roll $5,%ecx + movl %edx,8(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp + addl %ecx,%edx + andl %ebx,%ebp + movl 12(%esp),%ecx + addl %ebp,%edx + # 40_59 51 + movl %esi,%ebp + xorl 20(%esp),%ecx + xorl %eax,%ebp + xorl 44(%esp),%ecx + andl %edi,%ebp + xorl (%esp),%ecx + roll $1,%ecx + addl %ebx,%ebp + rorl $2,%edi + movl %edx,%ebx + roll $5,%ebx + movl %ecx,12(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp + addl %ebx,%ecx + andl %eax,%ebp + movl 16(%esp),%ebx + addl %ebp,%ecx + # 40_59 52 + movl %edi,%ebp + xorl 24(%esp),%ebx + xorl %esi,%ebp + xorl 48(%esp),%ebx + andl %edx,%ebp + xorl 4(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,16(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 20(%esp),%eax + addl %ebp,%ebx + # 40_59 53 + movl %edx,%ebp + xorl 28(%esp),%eax + xorl %edi,%ebp + xorl 52(%esp),%eax + andl %ecx,%ebp + xorl 8(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,20(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl 24(%esp),%esi + addl %ebp,%eax + # 40_59 54 + movl %ecx,%ebp + xorl 32(%esp),%esi + xorl %edx,%ebp + xorl 56(%esp),%esi + andl %ebx,%ebp + xorl 12(%esp),%esi + roll $1,%esi + addl %edi,%ebp + rorl $2,%ebx + movl %eax,%edi + roll $5,%edi + movl %esi,24(%esp) + leal 2400959708(%esi,%ebp,1),%esi + movl %ecx,%ebp + addl %edi,%esi + andl %edx,%ebp + movl 28(%esp),%edi + addl %ebp,%esi + # 40_59 55 + movl %ebx,%ebp + xorl 36(%esp),%edi + xorl %ecx,%ebp + xorl 60(%esp),%edi + andl %eax,%ebp + xorl 16(%esp),%edi + roll $1,%edi + addl %edx,%ebp + rorl $2,%eax + movl %esi,%edx + roll $5,%edx + movl %edi,28(%esp) + leal 2400959708(%edi,%ebp,1),%edi + movl %ebx,%ebp + addl %edx,%edi + andl %ecx,%ebp + movl 32(%esp),%edx + addl %ebp,%edi + # 40_59 56 + movl %eax,%ebp + xorl 40(%esp),%edx + xorl %ebx,%ebp + xorl (%esp),%edx + andl %esi,%ebp + xorl 20(%esp),%edx + roll $1,%edx + addl %ecx,%ebp + rorl $2,%esi + movl %edi,%ecx + roll $5,%ecx + movl %edx,32(%esp) + leal 2400959708(%edx,%ebp,1),%edx + movl %eax,%ebp + addl %ecx,%edx + andl %ebx,%ebp + movl 36(%esp),%ecx + addl %ebp,%edx + # 40_59 57 + movl %esi,%ebp + xorl 44(%esp),%ecx + xorl %eax,%ebp + xorl 4(%esp),%ecx + andl %edi,%ebp + xorl 24(%esp),%ecx + roll $1,%ecx + addl %ebx,%ebp + rorl $2,%edi + movl %edx,%ebx + roll $5,%ebx + movl %ecx,36(%esp) + leal 2400959708(%ecx,%ebp,1),%ecx + movl %esi,%ebp + addl %ebx,%ecx + andl %eax,%ebp + movl 40(%esp),%ebx + addl %ebp,%ecx + # 40_59 58 + movl %edi,%ebp + xorl 48(%esp),%ebx + xorl %esi,%ebp + xorl 8(%esp),%ebx + andl %edx,%ebp + xorl 28(%esp),%ebx + roll $1,%ebx + addl %eax,%ebp + rorl $2,%edx + movl %ecx,%eax + roll $5,%eax + movl %ebx,40(%esp) + leal 2400959708(%ebx,%ebp,1),%ebx + movl %edi,%ebp + addl %eax,%ebx + andl %esi,%ebp + movl 44(%esp),%eax + addl %ebp,%ebx + # 40_59 59 + movl %edx,%ebp + xorl 52(%esp),%eax + xorl %edi,%ebp + xorl 12(%esp),%eax + andl %ecx,%ebp + xorl 32(%esp),%eax + roll $1,%eax + addl %esi,%ebp + rorl $2,%ecx + movl %ebx,%esi + roll $5,%esi + movl %eax,44(%esp) + leal 2400959708(%eax,%ebp,1),%eax + movl %edx,%ebp + addl %esi,%eax + andl %edi,%ebp + movl 48(%esp),%esi + addl %ebp,%eax + # 20_39 60 + movl %ebx,%ebp + xorl 56(%esp),%esi + xorl %ecx,%ebp + xorl 16(%esp),%esi + xorl %edx,%ebp + xorl 36(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,48(%esp) + leal 3395469782(%esi,%edi,1),%esi + movl 52(%esp),%edi + addl %ebp,%esi + # 20_39 61 + movl %eax,%ebp + xorl 60(%esp),%edi + xorl %ebx,%ebp + xorl 20(%esp),%edi + xorl %ecx,%ebp + xorl 40(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,52(%esp) + leal 3395469782(%edi,%edx,1),%edi + movl 56(%esp),%edx + addl %ebp,%edi + # 20_39 62 + movl %esi,%ebp + xorl (%esp),%edx + xorl %eax,%ebp + xorl 24(%esp),%edx + xorl %ebx,%ebp + xorl 44(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,56(%esp) + leal 3395469782(%edx,%ecx,1),%edx + movl 60(%esp),%ecx + addl %ebp,%edx + # 20_39 63 + movl %edi,%ebp + xorl 4(%esp),%ecx + xorl %esi,%ebp + xorl 28(%esp),%ecx + xorl %eax,%ebp + xorl 48(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,60(%esp) + leal 3395469782(%ecx,%ebx,1),%ecx + movl (%esp),%ebx + addl %ebp,%ecx + # 20_39 64 + movl %edx,%ebp + xorl 8(%esp),%ebx + xorl %edi,%ebp + xorl 32(%esp),%ebx + xorl %esi,%ebp + xorl 52(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,(%esp) + leal 3395469782(%ebx,%eax,1),%ebx + movl 4(%esp),%eax + addl %ebp,%ebx + # 20_39 65 + movl %ecx,%ebp + xorl 12(%esp),%eax + xorl %edx,%ebp + xorl 36(%esp),%eax + xorl %edi,%ebp + xorl 56(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,4(%esp) + leal 3395469782(%eax,%esi,1),%eax + movl 8(%esp),%esi + addl %ebp,%eax + # 20_39 66 + movl %ebx,%ebp + xorl 16(%esp),%esi + xorl %ecx,%ebp + xorl 40(%esp),%esi + xorl %edx,%ebp + xorl 60(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,8(%esp) + leal 3395469782(%esi,%edi,1),%esi + movl 12(%esp),%edi + addl %ebp,%esi + # 20_39 67 + movl %eax,%ebp + xorl 20(%esp),%edi + xorl %ebx,%ebp + xorl 44(%esp),%edi + xorl %ecx,%ebp + xorl (%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,12(%esp) + leal 3395469782(%edi,%edx,1),%edi + movl 16(%esp),%edx + addl %ebp,%edi + # 20_39 68 + movl %esi,%ebp + xorl 24(%esp),%edx + xorl %eax,%ebp + xorl 48(%esp),%edx + xorl %ebx,%ebp + xorl 4(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,16(%esp) + leal 3395469782(%edx,%ecx,1),%edx + movl 20(%esp),%ecx + addl %ebp,%edx + # 20_39 69 + movl %edi,%ebp + xorl 28(%esp),%ecx + xorl %esi,%ebp + xorl 52(%esp),%ecx + xorl %eax,%ebp + xorl 8(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,20(%esp) + leal 3395469782(%ecx,%ebx,1),%ecx + movl 24(%esp),%ebx + addl %ebp,%ecx + # 20_39 70 + movl %edx,%ebp + xorl 32(%esp),%ebx + xorl %edi,%ebp + xorl 56(%esp),%ebx + xorl %esi,%ebp + xorl 12(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,24(%esp) + leal 3395469782(%ebx,%eax,1),%ebx + movl 28(%esp),%eax + addl %ebp,%ebx + # 20_39 71 + movl %ecx,%ebp + xorl 36(%esp),%eax + xorl %edx,%ebp + xorl 60(%esp),%eax + xorl %edi,%ebp + xorl 16(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + movl %eax,28(%esp) + leal 3395469782(%eax,%esi,1),%eax + movl 32(%esp),%esi + addl %ebp,%eax + # 20_39 72 + movl %ebx,%ebp + xorl 40(%esp),%esi + xorl %ecx,%ebp + xorl (%esp),%esi + xorl %edx,%ebp + xorl 20(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + movl %esi,32(%esp) + leal 3395469782(%esi,%edi,1),%esi + movl 36(%esp),%edi + addl %ebp,%esi + # 20_39 73 + movl %eax,%ebp + xorl 44(%esp),%edi + xorl %ebx,%ebp + xorl 4(%esp),%edi + xorl %ecx,%ebp + xorl 24(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + movl %edi,36(%esp) + leal 3395469782(%edi,%edx,1),%edi + movl 40(%esp),%edx + addl %ebp,%edi + # 20_39 74 + movl %esi,%ebp + xorl 48(%esp),%edx + xorl %eax,%ebp + xorl 8(%esp),%edx + xorl %ebx,%ebp + xorl 28(%esp),%edx + roll $1,%edx + addl %ebp,%ecx + rorl $2,%esi + movl %edi,%ebp + roll $5,%ebp + movl %edx,40(%esp) + leal 3395469782(%edx,%ecx,1),%edx + movl 44(%esp),%ecx + addl %ebp,%edx + # 20_39 75 + movl %edi,%ebp + xorl 52(%esp),%ecx + xorl %esi,%ebp + xorl 12(%esp),%ecx + xorl %eax,%ebp + xorl 32(%esp),%ecx + roll $1,%ecx + addl %ebp,%ebx + rorl $2,%edi + movl %edx,%ebp + roll $5,%ebp + movl %ecx,44(%esp) + leal 3395469782(%ecx,%ebx,1),%ecx + movl 48(%esp),%ebx + addl %ebp,%ecx + # 20_39 76 + movl %edx,%ebp + xorl 56(%esp),%ebx + xorl %edi,%ebp + xorl 16(%esp),%ebx + xorl %esi,%ebp + xorl 36(%esp),%ebx + roll $1,%ebx + addl %ebp,%eax + rorl $2,%edx + movl %ecx,%ebp + roll $5,%ebp + movl %ebx,48(%esp) + leal 3395469782(%ebx,%eax,1),%ebx + movl 52(%esp),%eax + addl %ebp,%ebx + # 20_39 77 + movl %ecx,%ebp + xorl 60(%esp),%eax + xorl %edx,%ebp + xorl 20(%esp),%eax + xorl %edi,%ebp + xorl 40(%esp),%eax + roll $1,%eax + addl %ebp,%esi + rorl $2,%ecx + movl %ebx,%ebp + roll $5,%ebp + leal 3395469782(%eax,%esi,1),%eax + movl 56(%esp),%esi + addl %ebp,%eax + # 20_39 78 + movl %ebx,%ebp + xorl (%esp),%esi + xorl %ecx,%ebp + xorl 24(%esp),%esi + xorl %edx,%ebp + xorl 44(%esp),%esi + roll $1,%esi + addl %ebp,%edi + rorl $2,%ebx + movl %eax,%ebp + roll $5,%ebp + leal 3395469782(%esi,%edi,1),%esi + movl 60(%esp),%edi + addl %ebp,%esi + # 20_39 79 + movl %eax,%ebp + xorl 4(%esp),%edi + xorl %ebx,%ebp + xorl 28(%esp),%edi + xorl %ecx,%ebp + xorl 48(%esp),%edi + roll $1,%edi + addl %ebp,%edx + rorl $2,%eax + movl %esi,%ebp + roll $5,%ebp + leal 3395469782(%edi,%edx,1),%edi + addl %ebp,%edi + movl 96(%esp),%ebp + movl 100(%esp),%edx + addl (%ebp),%edi + addl 4(%ebp),%esi + addl 8(%ebp),%eax + addl 12(%ebp),%ebx + addl 16(%ebp),%ecx + movl %edi,(%ebp) + addl $64,%edx + movl %esi,4(%ebp) + cmpl 104(%esp),%edx + movl %eax,8(%ebp) + movl %ecx,%edi + movl %ebx,12(%ebp) + movl %edx,%esi + movl %ecx,16(%ebp) + jb L000loop + addl $76,%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115 +.byte 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82 +.byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 +.byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/macosx/sha1-ssse3-x86_64.s b/lib/accelerated/x86/macosx/sha1-ssse3-x86_64.s new file mode 100644 index 0000000000..88bf435f81 --- /dev/null +++ b/lib/accelerated/x86/macosx/sha1-ssse3-x86_64.s @@ -0,0 +1,2515 @@ +# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org> +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain copyright notices, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# * Neither the name of the Andy Polyakov nor the names of its +# copyright holder and contributors may be used to endorse or +# promote products derived from this software without specific +# prior written permission. +# +# ALTERNATIVELY, provided that this notice is retained in full, this +# product may be distributed under the terms of the GNU General Public +# License (GPL), in which case the provisions of the GPL apply INSTEAD OF +# those given above. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# *** This file is auto-generated *** +# +.text + + +.globl _sha1_block_data_order + +.p2align 4 +_sha1_block_data_order: + movl __gnutls_x86_cpuid_s+0(%rip),%r9d + movl __gnutls_x86_cpuid_s+4(%rip),%r8d + movl __gnutls_x86_cpuid_s+8(%rip),%r10d + testl $512,%r8d + jz L$ialu + jmp _ssse3_shortcut + +.p2align 4 +L$ialu: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + movq %rsp,%r11 + movq %rdi,%r8 + subq $72,%rsp + movq %rsi,%r9 + andq $-64,%rsp + movq %rdx,%r10 + movq %r11,64(%rsp) +L$prologue: + + movl 0(%r8),%esi + movl 4(%r8),%edi + movl 8(%r8),%r11d + movl 12(%r8),%r12d + movl 16(%r8),%r13d + jmp L$loop + +.p2align 4 +L$loop: + movl 0(%r9),%edx + bswapl %edx + movl %edx,0(%rsp) + movl %r11d,%eax + movl 4(%r9),%ebp + movl %esi,%ecx + xorl %r12d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r13,1),%r13d + andl %edi,%eax + movl %ebp,4(%rsp) + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl %edi,%eax + movl 8(%r9),%edx + movl %r13d,%ecx + xorl %r11d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r12,1),%r12d + andl %esi,%eax + movl %edx,8(%rsp) + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl %esi,%eax + movl 12(%r9),%ebp + movl %r12d,%ecx + xorl %edi,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r11,1),%r11d + andl %r13d,%eax + movl %ebp,12(%rsp) + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl %r13d,%eax + movl 16(%r9),%edx + movl %r11d,%ecx + xorl %esi,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%rdi,1),%edi + andl %r12d,%eax + movl %edx,16(%rsp) + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl %r12d,%eax + movl 20(%r9),%ebp + movl %edi,%ecx + xorl %r13d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%rsi,1),%esi + andl %r11d,%eax + movl %ebp,20(%rsp) + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl %r11d,%eax + movl 24(%r9),%edx + movl %esi,%ecx + xorl %r12d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r13,1),%r13d + andl %edi,%eax + movl %edx,24(%rsp) + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl %edi,%eax + movl 28(%r9),%ebp + movl %r13d,%ecx + xorl %r11d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r12,1),%r12d + andl %esi,%eax + movl %ebp,28(%rsp) + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl %esi,%eax + movl 32(%r9),%edx + movl %r12d,%ecx + xorl %edi,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r11,1),%r11d + andl %r13d,%eax + movl %edx,32(%rsp) + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl %r13d,%eax + movl 36(%r9),%ebp + movl %r11d,%ecx + xorl %esi,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%rdi,1),%edi + andl %r12d,%eax + movl %ebp,36(%rsp) + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl %r12d,%eax + movl 40(%r9),%edx + movl %edi,%ecx + xorl %r13d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%rsi,1),%esi + andl %r11d,%eax + movl %edx,40(%rsp) + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl %r11d,%eax + movl 44(%r9),%ebp + movl %esi,%ecx + xorl %r12d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r13,1),%r13d + andl %edi,%eax + movl %ebp,44(%rsp) + addl %ecx,%r13d + xorl %r12d,%eax + roll $30,%edi + addl %eax,%r13d + movl %edi,%eax + movl 48(%r9),%edx + movl %r13d,%ecx + xorl %r11d,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%r12,1),%r12d + andl %esi,%eax + movl %edx,48(%rsp) + addl %ecx,%r12d + xorl %r11d,%eax + roll $30,%esi + addl %eax,%r12d + movl %esi,%eax + movl 52(%r9),%ebp + movl %r12d,%ecx + xorl %edi,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%r11,1),%r11d + andl %r13d,%eax + movl %ebp,52(%rsp) + addl %ecx,%r11d + xorl %edi,%eax + roll $30,%r13d + addl %eax,%r11d + movl %r13d,%eax + movl 56(%r9),%edx + movl %r11d,%ecx + xorl %esi,%eax + bswapl %edx + roll $5,%ecx + leal 1518500249(%rbp,%rdi,1),%edi + andl %r12d,%eax + movl %edx,56(%rsp) + addl %ecx,%edi + xorl %esi,%eax + roll $30,%r12d + addl %eax,%edi + movl %r12d,%eax + movl 60(%r9),%ebp + movl %edi,%ecx + xorl %r13d,%eax + bswapl %ebp + roll $5,%ecx + leal 1518500249(%rdx,%rsi,1),%esi + andl %r11d,%eax + movl %ebp,60(%rsp) + addl %ecx,%esi + xorl %r13d,%eax + roll $30,%r11d + addl %eax,%esi + movl 0(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 8(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + xorl 32(%rsp),%edx + andl %edi,%eax + leal 1518500249(%rbp,%r13,1),%r13d + xorl 52(%rsp),%edx + xorl %r12d,%eax + roll $1,%edx + addl %ecx,%r13d + roll $30,%edi + movl %edx,0(%rsp) + addl %eax,%r13d + movl 4(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 12(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + xorl 36(%rsp),%ebp + andl %esi,%eax + leal 1518500249(%rdx,%r12,1),%r12d + xorl 56(%rsp),%ebp + xorl %r11d,%eax + roll $1,%ebp + addl %ecx,%r12d + roll $30,%esi + movl %ebp,4(%rsp) + addl %eax,%r12d + movl 8(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 16(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + xorl 40(%rsp),%edx + andl %r13d,%eax + leal 1518500249(%rbp,%r11,1),%r11d + xorl 60(%rsp),%edx + xorl %edi,%eax + roll $1,%edx + addl %ecx,%r11d + roll $30,%r13d + movl %edx,8(%rsp) + addl %eax,%r11d + movl 12(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 20(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + xorl 44(%rsp),%ebp + andl %r12d,%eax + leal 1518500249(%rdx,%rdi,1),%edi + xorl 0(%rsp),%ebp + xorl %esi,%eax + roll $1,%ebp + addl %ecx,%edi + roll $30,%r12d + movl %ebp,12(%rsp) + addl %eax,%edi + movl 16(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 24(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + xorl 48(%rsp),%edx + andl %r11d,%eax + leal 1518500249(%rbp,%rsi,1),%esi + xorl 4(%rsp),%edx + xorl %r13d,%eax + roll $1,%edx + addl %ecx,%esi + roll $30,%r11d + movl %edx,16(%rsp) + addl %eax,%esi + movl 20(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 28(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r13,1),%r13d + xorl 52(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 8(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,20(%rsp) + movl 24(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 32(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r12,1),%r12d + xorl 56(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 12(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,24(%rsp) + movl 28(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 36(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r11,1),%r11d + xorl 60(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 16(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,28(%rsp) + movl 32(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 40(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rdi,1),%edi + xorl 0(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 20(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,32(%rsp) + movl 36(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 44(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rsi,1),%esi + xorl 4(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 24(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,36(%rsp) + movl 40(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 48(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r13,1),%r13d + xorl 8(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 28(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,40(%rsp) + movl 44(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 52(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r12,1),%r12d + xorl 12(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 32(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl %ebp,44(%rsp) + movl 48(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 56(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r11,1),%r11d + xorl 16(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 36(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl %edx,48(%rsp) + movl 52(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 60(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rdi,1),%edi + xorl 20(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 40(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %ebp,52(%rsp) + movl 56(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 0(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rsi,1),%esi + xorl 24(%rsp),%edx + xorl %r13d,%eax + addl %ecx,%esi + xorl 44(%rsp),%edx + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + movl %edx,56(%rsp) + movl 60(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 4(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r13,1),%r13d + xorl 28(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 48(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,60(%rsp) + movl 0(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 8(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r12,1),%r12d + xorl 32(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 52(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,0(%rsp) + movl 4(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 12(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r11,1),%r11d + xorl 36(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 56(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,4(%rsp) + movl 8(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 16(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rdi,1),%edi + xorl 40(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 60(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,8(%rsp) + movl 12(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 20(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rsi,1),%esi + xorl 44(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 0(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,12(%rsp) + movl 16(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 24(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r13,1),%r13d + xorl 48(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 4(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,16(%rsp) + movl 20(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 28(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal 1859775393(%rdx,%r12,1),%r12d + xorl 52(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 8(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl %ebp,20(%rsp) + movl 24(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 32(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%r11,1),%r11d + xorl 56(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 12(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl %edx,24(%rsp) + movl 28(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 36(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal 1859775393(%rdx,%rdi,1),%edi + xorl 60(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 16(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %ebp,28(%rsp) + movl 32(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 40(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + leal 1859775393(%rbp,%rsi,1),%esi + xorl 0(%rsp),%edx + xorl %r13d,%eax + addl %ecx,%esi + xorl 20(%rsp),%edx + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + movl %edx,32(%rsp) + movl 36(%rsp),%ebp + movl %r11d,%eax + movl %r11d,%ebx + xorl 44(%rsp),%ebp + andl %r12d,%eax + movl %esi,%ecx + xorl 4(%rsp),%ebp + xorl %r12d,%ebx + leal -1894007588(%rdx,%r13,1),%r13d + roll $5,%ecx + xorl 24(%rsp),%ebp + addl %eax,%r13d + andl %edi,%ebx + roll $1,%ebp + addl %ebx,%r13d + roll $30,%edi + movl %ebp,36(%rsp) + addl %ecx,%r13d + movl 40(%rsp),%edx + movl %edi,%eax + movl %edi,%ebx + xorl 48(%rsp),%edx + andl %r11d,%eax + movl %r13d,%ecx + xorl 8(%rsp),%edx + xorl %r11d,%ebx + leal -1894007588(%rbp,%r12,1),%r12d + roll $5,%ecx + xorl 28(%rsp),%edx + addl %eax,%r12d + andl %esi,%ebx + roll $1,%edx + addl %ebx,%r12d + roll $30,%esi + movl %edx,40(%rsp) + addl %ecx,%r12d + movl 44(%rsp),%ebp + movl %esi,%eax + movl %esi,%ebx + xorl 52(%rsp),%ebp + andl %edi,%eax + movl %r12d,%ecx + xorl 12(%rsp),%ebp + xorl %edi,%ebx + leal -1894007588(%rdx,%r11,1),%r11d + roll $5,%ecx + xorl 32(%rsp),%ebp + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%ebp + addl %ebx,%r11d + roll $30,%r13d + movl %ebp,44(%rsp) + addl %ecx,%r11d + movl 48(%rsp),%edx + movl %r13d,%eax + movl %r13d,%ebx + xorl 56(%rsp),%edx + andl %esi,%eax + movl %r11d,%ecx + xorl 16(%rsp),%edx + xorl %esi,%ebx + leal -1894007588(%rbp,%rdi,1),%edi + roll $5,%ecx + xorl 36(%rsp),%edx + addl %eax,%edi + andl %r12d,%ebx + roll $1,%edx + addl %ebx,%edi + roll $30,%r12d + movl %edx,48(%rsp) + addl %ecx,%edi + movl 52(%rsp),%ebp + movl %r12d,%eax + movl %r12d,%ebx + xorl 60(%rsp),%ebp + andl %r13d,%eax + movl %edi,%ecx + xorl 20(%rsp),%ebp + xorl %r13d,%ebx + leal -1894007588(%rdx,%rsi,1),%esi + roll $5,%ecx + xorl 40(%rsp),%ebp + addl %eax,%esi + andl %r11d,%ebx + roll $1,%ebp + addl %ebx,%esi + roll $30,%r11d + movl %ebp,52(%rsp) + addl %ecx,%esi + movl 56(%rsp),%edx + movl %r11d,%eax + movl %r11d,%ebx + xorl 0(%rsp),%edx + andl %r12d,%eax + movl %esi,%ecx + xorl 24(%rsp),%edx + xorl %r12d,%ebx + leal -1894007588(%rbp,%r13,1),%r13d + roll $5,%ecx + xorl 44(%rsp),%edx + addl %eax,%r13d + andl %edi,%ebx + roll $1,%edx + addl %ebx,%r13d + roll $30,%edi + movl %edx,56(%rsp) + addl %ecx,%r13d + movl 60(%rsp),%ebp + movl %edi,%eax + movl %edi,%ebx + xorl 4(%rsp),%ebp + andl %r11d,%eax + movl %r13d,%ecx + xorl 28(%rsp),%ebp + xorl %r11d,%ebx + leal -1894007588(%rdx,%r12,1),%r12d + roll $5,%ecx + xorl 48(%rsp),%ebp + addl %eax,%r12d + andl %esi,%ebx + roll $1,%ebp + addl %ebx,%r12d + roll $30,%esi + movl %ebp,60(%rsp) + addl %ecx,%r12d + movl 0(%rsp),%edx + movl %esi,%eax + movl %esi,%ebx + xorl 8(%rsp),%edx + andl %edi,%eax + movl %r12d,%ecx + xorl 32(%rsp),%edx + xorl %edi,%ebx + leal -1894007588(%rbp,%r11,1),%r11d + roll $5,%ecx + xorl 52(%rsp),%edx + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%edx + addl %ebx,%r11d + roll $30,%r13d + movl %edx,0(%rsp) + addl %ecx,%r11d + movl 4(%rsp),%ebp + movl %r13d,%eax + movl %r13d,%ebx + xorl 12(%rsp),%ebp + andl %esi,%eax + movl %r11d,%ecx + xorl 36(%rsp),%ebp + xorl %esi,%ebx + leal -1894007588(%rdx,%rdi,1),%edi + roll $5,%ecx + xorl 56(%rsp),%ebp + addl %eax,%edi + andl %r12d,%ebx + roll $1,%ebp + addl %ebx,%edi + roll $30,%r12d + movl %ebp,4(%rsp) + addl %ecx,%edi + movl 8(%rsp),%edx + movl %r12d,%eax + movl %r12d,%ebx + xorl 16(%rsp),%edx + andl %r13d,%eax + movl %edi,%ecx + xorl 40(%rsp),%edx + xorl %r13d,%ebx + leal -1894007588(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl 60(%rsp),%edx + addl %eax,%esi + andl %r11d,%ebx + roll $1,%edx + addl %ebx,%esi + roll $30,%r11d + movl %edx,8(%rsp) + addl %ecx,%esi + movl 12(%rsp),%ebp + movl %r11d,%eax + movl %r11d,%ebx + xorl 20(%rsp),%ebp + andl %r12d,%eax + movl %esi,%ecx + xorl 44(%rsp),%ebp + xorl %r12d,%ebx + leal -1894007588(%rdx,%r13,1),%r13d + roll $5,%ecx + xorl 0(%rsp),%ebp + addl %eax,%r13d + andl %edi,%ebx + roll $1,%ebp + addl %ebx,%r13d + roll $30,%edi + movl %ebp,12(%rsp) + addl %ecx,%r13d + movl 16(%rsp),%edx + movl %edi,%eax + movl %edi,%ebx + xorl 24(%rsp),%edx + andl %r11d,%eax + movl %r13d,%ecx + xorl 48(%rsp),%edx + xorl %r11d,%ebx + leal -1894007588(%rbp,%r12,1),%r12d + roll $5,%ecx + xorl 4(%rsp),%edx + addl %eax,%r12d + andl %esi,%ebx + roll $1,%edx + addl %ebx,%r12d + roll $30,%esi + movl %edx,16(%rsp) + addl %ecx,%r12d + movl 20(%rsp),%ebp + movl %esi,%eax + movl %esi,%ebx + xorl 28(%rsp),%ebp + andl %edi,%eax + movl %r12d,%ecx + xorl 52(%rsp),%ebp + xorl %edi,%ebx + leal -1894007588(%rdx,%r11,1),%r11d + roll $5,%ecx + xorl 8(%rsp),%ebp + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%ebp + addl %ebx,%r11d + roll $30,%r13d + movl %ebp,20(%rsp) + addl %ecx,%r11d + movl 24(%rsp),%edx + movl %r13d,%eax + movl %r13d,%ebx + xorl 32(%rsp),%edx + andl %esi,%eax + movl %r11d,%ecx + xorl 56(%rsp),%edx + xorl %esi,%ebx + leal -1894007588(%rbp,%rdi,1),%edi + roll $5,%ecx + xorl 12(%rsp),%edx + addl %eax,%edi + andl %r12d,%ebx + roll $1,%edx + addl %ebx,%edi + roll $30,%r12d + movl %edx,24(%rsp) + addl %ecx,%edi + movl 28(%rsp),%ebp + movl %r12d,%eax + movl %r12d,%ebx + xorl 36(%rsp),%ebp + andl %r13d,%eax + movl %edi,%ecx + xorl 60(%rsp),%ebp + xorl %r13d,%ebx + leal -1894007588(%rdx,%rsi,1),%esi + roll $5,%ecx + xorl 16(%rsp),%ebp + addl %eax,%esi + andl %r11d,%ebx + roll $1,%ebp + addl %ebx,%esi + roll $30,%r11d + movl %ebp,28(%rsp) + addl %ecx,%esi + movl 32(%rsp),%edx + movl %r11d,%eax + movl %r11d,%ebx + xorl 40(%rsp),%edx + andl %r12d,%eax + movl %esi,%ecx + xorl 0(%rsp),%edx + xorl %r12d,%ebx + leal -1894007588(%rbp,%r13,1),%r13d + roll $5,%ecx + xorl 20(%rsp),%edx + addl %eax,%r13d + andl %edi,%ebx + roll $1,%edx + addl %ebx,%r13d + roll $30,%edi + movl %edx,32(%rsp) + addl %ecx,%r13d + movl 36(%rsp),%ebp + movl %edi,%eax + movl %edi,%ebx + xorl 44(%rsp),%ebp + andl %r11d,%eax + movl %r13d,%ecx + xorl 4(%rsp),%ebp + xorl %r11d,%ebx + leal -1894007588(%rdx,%r12,1),%r12d + roll $5,%ecx + xorl 24(%rsp),%ebp + addl %eax,%r12d + andl %esi,%ebx + roll $1,%ebp + addl %ebx,%r12d + roll $30,%esi + movl %ebp,36(%rsp) + addl %ecx,%r12d + movl 40(%rsp),%edx + movl %esi,%eax + movl %esi,%ebx + xorl 48(%rsp),%edx + andl %edi,%eax + movl %r12d,%ecx + xorl 8(%rsp),%edx + xorl %edi,%ebx + leal -1894007588(%rbp,%r11,1),%r11d + roll $5,%ecx + xorl 28(%rsp),%edx + addl %eax,%r11d + andl %r13d,%ebx + roll $1,%edx + addl %ebx,%r11d + roll $30,%r13d + movl %edx,40(%rsp) + addl %ecx,%r11d + movl 44(%rsp),%ebp + movl %r13d,%eax + movl %r13d,%ebx + xorl 52(%rsp),%ebp + andl %esi,%eax + movl %r11d,%ecx + xorl 12(%rsp),%ebp + xorl %esi,%ebx + leal -1894007588(%rdx,%rdi,1),%edi + roll $5,%ecx + xorl 32(%rsp),%ebp + addl %eax,%edi + andl %r12d,%ebx + roll $1,%ebp + addl %ebx,%edi + roll $30,%r12d + movl %ebp,44(%rsp) + addl %ecx,%edi + movl 48(%rsp),%edx + movl %r12d,%eax + movl %r12d,%ebx + xorl 56(%rsp),%edx + andl %r13d,%eax + movl %edi,%ecx + xorl 16(%rsp),%edx + xorl %r13d,%ebx + leal -1894007588(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl 36(%rsp),%edx + addl %eax,%esi + andl %r11d,%ebx + roll $1,%edx + addl %ebx,%esi + roll $30,%r11d + movl %edx,48(%rsp) + addl %ecx,%esi + movl 52(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 60(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r13,1),%r13d + xorl 20(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 40(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,52(%rsp) + movl 56(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 0(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r12,1),%r12d + xorl 24(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 44(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,56(%rsp) + movl 60(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 4(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rdx,%r11,1),%r11d + xorl 28(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 48(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,60(%rsp) + movl 0(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 8(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rbp,%rdi,1),%edi + xorl 32(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 52(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,0(%rsp) + movl 4(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 12(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rsi,1),%esi + xorl 36(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 56(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,4(%rsp) + movl 8(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 16(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r13,1),%r13d + xorl 40(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 60(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,8(%rsp) + movl 12(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 20(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r12,1),%r12d + xorl 44(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 0(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl %ebp,12(%rsp) + movl 16(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 24(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rbp,%r11,1),%r11d + xorl 48(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 4(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl %edx,16(%rsp) + movl 20(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 28(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rdi,1),%edi + xorl 52(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 8(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %ebp,20(%rsp) + movl 24(%rsp),%edx + movl %r12d,%eax + movl %edi,%ecx + xorl 32(%rsp),%edx + xorl %r11d,%eax + roll $5,%ecx + leal -899497514(%rbp,%rsi,1),%esi + xorl 56(%rsp),%edx + xorl %r13d,%eax + addl %ecx,%esi + xorl 12(%rsp),%edx + roll $30,%r11d + addl %eax,%esi + roll $1,%edx + movl %edx,24(%rsp) + movl 28(%rsp),%ebp + movl %r11d,%eax + movl %esi,%ecx + xorl 36(%rsp),%ebp + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r13,1),%r13d + xorl 60(%rsp),%ebp + xorl %r12d,%eax + addl %ecx,%r13d + xorl 16(%rsp),%ebp + roll $30,%edi + addl %eax,%r13d + roll $1,%ebp + movl %ebp,28(%rsp) + movl 32(%rsp),%edx + movl %edi,%eax + movl %r13d,%ecx + xorl 40(%rsp),%edx + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r12,1),%r12d + xorl 0(%rsp),%edx + xorl %r11d,%eax + addl %ecx,%r12d + xorl 20(%rsp),%edx + roll $30,%esi + addl %eax,%r12d + roll $1,%edx + movl %edx,32(%rsp) + movl 36(%rsp),%ebp + movl %esi,%eax + movl %r12d,%ecx + xorl 44(%rsp),%ebp + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rdx,%r11,1),%r11d + xorl 4(%rsp),%ebp + xorl %edi,%eax + addl %ecx,%r11d + xorl 24(%rsp),%ebp + roll $30,%r13d + addl %eax,%r11d + roll $1,%ebp + movl %ebp,36(%rsp) + movl 40(%rsp),%edx + movl %r13d,%eax + movl %r11d,%ecx + xorl 48(%rsp),%edx + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rbp,%rdi,1),%edi + xorl 8(%rsp),%edx + xorl %esi,%eax + addl %ecx,%edi + xorl 28(%rsp),%edx + roll $30,%r12d + addl %eax,%edi + roll $1,%edx + movl %edx,40(%rsp) + movl 44(%rsp),%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl 52(%rsp),%ebp + xorl %r11d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rsi,1),%esi + xorl 12(%rsp),%ebp + xorl %r13d,%eax + addl %ecx,%esi + xorl 32(%rsp),%ebp + roll $30,%r11d + addl %eax,%esi + roll $1,%ebp + movl %ebp,44(%rsp) + movl 48(%rsp),%edx + movl %r11d,%eax + movl %esi,%ecx + xorl 56(%rsp),%edx + xorl %edi,%eax + roll $5,%ecx + leal -899497514(%rbp,%r13,1),%r13d + xorl 16(%rsp),%edx + xorl %r12d,%eax + addl %ecx,%r13d + xorl 36(%rsp),%edx + roll $30,%edi + addl %eax,%r13d + roll $1,%edx + movl %edx,48(%rsp) + movl 52(%rsp),%ebp + movl %edi,%eax + movl %r13d,%ecx + xorl 60(%rsp),%ebp + xorl %esi,%eax + roll $5,%ecx + leal -899497514(%rdx,%r12,1),%r12d + xorl 20(%rsp),%ebp + xorl %r11d,%eax + addl %ecx,%r12d + xorl 40(%rsp),%ebp + roll $30,%esi + addl %eax,%r12d + roll $1,%ebp + movl 56(%rsp),%edx + movl %esi,%eax + movl %r12d,%ecx + xorl 0(%rsp),%edx + xorl %r13d,%eax + roll $5,%ecx + leal -899497514(%rbp,%r11,1),%r11d + xorl 24(%rsp),%edx + xorl %edi,%eax + addl %ecx,%r11d + xorl 44(%rsp),%edx + roll $30,%r13d + addl %eax,%r11d + roll $1,%edx + movl 60(%rsp),%ebp + movl %r13d,%eax + movl %r11d,%ecx + xorl 4(%rsp),%ebp + xorl %r12d,%eax + roll $5,%ecx + leal -899497514(%rdx,%rdi,1),%edi + xorl 28(%rsp),%ebp + xorl %esi,%eax + addl %ecx,%edi + xorl 48(%rsp),%ebp + roll $30,%r12d + addl %eax,%edi + roll $1,%ebp + movl %r12d,%eax + movl %edi,%ecx + xorl %r11d,%eax + leal -899497514(%rbp,%rsi,1),%esi + roll $5,%ecx + xorl %r13d,%eax + addl %ecx,%esi + roll $30,%r11d + addl %eax,%esi + addl 0(%r8),%esi + addl 4(%r8),%edi + addl 8(%r8),%r11d + addl 12(%r8),%r12d + addl 16(%r8),%r13d + movl %esi,0(%r8) + movl %edi,4(%r8) + movl %r11d,8(%r8) + movl %r12d,12(%r8) + movl %r13d,16(%r8) + + subq $1,%r10 + leaq 64(%r9),%r9 + jnz L$loop + + movq 64(%rsp),%rsi + movq (%rsi),%r13 + movq 8(%rsi),%r12 + movq 16(%rsi),%rbp + movq 24(%rsi),%rbx + leaq 32(%rsi),%rsp +L$epilogue: + .byte 0xf3,0xc3 + + +.p2align 4 +sha1_block_data_order_ssse3: +_ssse3_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + leaq -64(%rsp),%rsp + movq %rdi,%r8 + movq %rsi,%r9 + movq %rdx,%r10 + + shlq $6,%r10 + addq %r9,%r10 + leaq K_XX_XX+64(%rip),%r11 + + movl 0(%r8),%eax + movl 4(%r8),%ebx + movl 8(%r8),%ecx + movl 12(%r8),%edx + movl %ebx,%esi + movl 16(%r8),%ebp + movl %ecx,%edi + xorl %edx,%edi + andl %edi,%esi + + movdqa 64(%r11),%xmm6 + movdqa -64(%r11),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r9 +.byte 102,15,56,0,206 +.byte 102,15,56,0,214 +.byte 102,15,56,0,222 + paddd %xmm9,%xmm0 + paddd %xmm9,%xmm1 + paddd %xmm9,%xmm2 + movdqa %xmm0,0(%rsp) + psubd %xmm9,%xmm0 + movdqa %xmm1,16(%rsp) + psubd %xmm9,%xmm1 + movdqa %xmm2,32(%rsp) + psubd %xmm9,%xmm2 + jmp L$oop_ssse3 +.p2align 4 +L$oop_ssse3: + movdqa %xmm1,%xmm4 + rorl $2,%ebx + xorl %edx,%esi + movdqa %xmm3,%xmm8 +.byte 102,15,58,15,224,8 + movl %eax,%edi + addl 0(%rsp),%ebp + paddd %xmm3,%xmm9 + xorl %ecx,%ebx + roll $5,%eax + psrldq $4,%xmm8 + addl %esi,%ebp + andl %ebx,%edi + pxor %xmm0,%xmm4 + xorl %ecx,%ebx + addl %eax,%ebp + pxor %xmm2,%xmm8 + rorl $7,%eax + xorl %ecx,%edi + movl %ebp,%esi + addl 4(%rsp),%edx + pxor %xmm8,%xmm4 + xorl %ebx,%eax + roll $5,%ebp + movdqa %xmm9,48(%rsp) + addl %edi,%edx + andl %eax,%esi + movdqa %xmm4,%xmm10 + movdqa %xmm4,%xmm8 + xorl %ebx,%eax + addl %ebp,%edx + rorl $7,%ebp + xorl %ebx,%esi + pslldq $12,%xmm10 + paddd %xmm4,%xmm4 + movl %edx,%edi + addl 8(%rsp),%ecx + xorl %eax,%ebp + roll $5,%edx + psrld $31,%xmm8 + addl %esi,%ecx + andl %ebp,%edi + movdqa %xmm10,%xmm9 + xorl %eax,%ebp + addl %edx,%ecx + psrld $30,%xmm10 + por %xmm8,%xmm4 + rorl $7,%edx + xorl %eax,%edi + movl %ecx,%esi + addl 12(%rsp),%ebx + pslld $2,%xmm9 + pxor %xmm10,%xmm4 + xorl %ebp,%edx + roll $5,%ecx + movdqa -64(%r11),%xmm10 + addl %edi,%ebx + andl %edx,%esi + pxor %xmm9,%xmm4 + xorl %ebp,%edx + addl %ecx,%ebx + movdqa %xmm2,%xmm5 + rorl $7,%ecx + xorl %ebp,%esi + movdqa %xmm4,%xmm9 +.byte 102,15,58,15,233,8 + movl %ebx,%edi + addl 16(%rsp),%eax + paddd %xmm4,%xmm10 + xorl %edx,%ecx + roll $5,%ebx + psrldq $4,%xmm9 + addl %esi,%eax + andl %ecx,%edi + pxor %xmm1,%xmm5 + xorl %edx,%ecx + addl %ebx,%eax + pxor %xmm3,%xmm9 + rorl $7,%ebx + xorl %edx,%edi + movl %eax,%esi + addl 20(%rsp),%ebp + pxor %xmm9,%xmm5 + xorl %ecx,%ebx + roll $5,%eax + movdqa %xmm10,0(%rsp) + addl %edi,%ebp + andl %ebx,%esi + movdqa %xmm5,%xmm8 + movdqa %xmm5,%xmm9 + xorl %ecx,%ebx + addl %eax,%ebp + rorl $7,%eax + xorl %ecx,%esi + pslldq $12,%xmm8 + paddd %xmm5,%xmm5 + movl %ebp,%edi + addl 24(%rsp),%edx + xorl %ebx,%eax + roll $5,%ebp + psrld $31,%xmm9 + addl %esi,%edx + andl %eax,%edi + movdqa %xmm8,%xmm10 + xorl %ebx,%eax + addl %ebp,%edx + psrld $30,%xmm8 + por %xmm9,%xmm5 + rorl $7,%ebp + xorl %ebx,%edi + movl %edx,%esi + addl 28(%rsp),%ecx + pslld $2,%xmm10 + pxor %xmm8,%xmm5 + xorl %eax,%ebp + roll $5,%edx + movdqa -32(%r11),%xmm8 + addl %edi,%ecx + andl %ebp,%esi + pxor %xmm10,%xmm5 + xorl %eax,%ebp + addl %edx,%ecx + movdqa %xmm3,%xmm6 + rorl $7,%edx + xorl %eax,%esi + movdqa %xmm5,%xmm10 +.byte 102,15,58,15,242,8 + movl %ecx,%edi + addl 32(%rsp),%ebx + paddd %xmm5,%xmm8 + xorl %ebp,%edx + roll $5,%ecx + psrldq $4,%xmm10 + addl %esi,%ebx + andl %edx,%edi + pxor %xmm2,%xmm6 + xorl %ebp,%edx + addl %ecx,%ebx + pxor %xmm4,%xmm10 + rorl $7,%ecx + xorl %ebp,%edi + movl %ebx,%esi + addl 36(%rsp),%eax + pxor %xmm10,%xmm6 + xorl %edx,%ecx + roll $5,%ebx + movdqa %xmm8,16(%rsp) + addl %edi,%eax + andl %ecx,%esi + movdqa %xmm6,%xmm9 + movdqa %xmm6,%xmm10 + xorl %edx,%ecx + addl %ebx,%eax + rorl $7,%ebx + xorl %edx,%esi + pslldq $12,%xmm9 + paddd %xmm6,%xmm6 + movl %eax,%edi + addl 40(%rsp),%ebp + xorl %ecx,%ebx + roll $5,%eax + psrld $31,%xmm10 + addl %esi,%ebp + andl %ebx,%edi + movdqa %xmm9,%xmm8 + xorl %ecx,%ebx + addl %eax,%ebp + psrld $30,%xmm9 + por %xmm10,%xmm6 + rorl $7,%eax + xorl %ecx,%edi + movl %ebp,%esi + addl 44(%rsp),%edx + pslld $2,%xmm8 + pxor %xmm9,%xmm6 + xorl %ebx,%eax + roll $5,%ebp + movdqa -32(%r11),%xmm9 + addl %edi,%edx + andl %eax,%esi + pxor %xmm8,%xmm6 + xorl %ebx,%eax + addl %ebp,%edx + movdqa %xmm4,%xmm7 + rorl $7,%ebp + xorl %ebx,%esi + movdqa %xmm6,%xmm8 +.byte 102,15,58,15,251,8 + movl %edx,%edi + addl 48(%rsp),%ecx + paddd %xmm6,%xmm9 + xorl %eax,%ebp + roll $5,%edx + psrldq $4,%xmm8 + addl %esi,%ecx + andl %ebp,%edi + pxor %xmm3,%xmm7 + xorl %eax,%ebp + addl %edx,%ecx + pxor %xmm5,%xmm8 + rorl $7,%edx + xorl %eax,%edi + movl %ecx,%esi + addl 52(%rsp),%ebx + pxor %xmm8,%xmm7 + xorl %ebp,%edx + roll $5,%ecx + movdqa %xmm9,32(%rsp) + addl %edi,%ebx + andl %edx,%esi + movdqa %xmm7,%xmm10 + movdqa %xmm7,%xmm8 + xorl %ebp,%edx + addl %ecx,%ebx + rorl $7,%ecx + xorl %ebp,%esi + pslldq $12,%xmm10 + paddd %xmm7,%xmm7 + movl %ebx,%edi + addl 56(%rsp),%eax + xorl %edx,%ecx + roll $5,%ebx + psrld $31,%xmm8 + addl %esi,%eax + andl %ecx,%edi + movdqa %xmm10,%xmm9 + xorl %edx,%ecx + addl %ebx,%eax + psrld $30,%xmm10 + por %xmm8,%xmm7 + rorl $7,%ebx + xorl %edx,%edi + movl %eax,%esi + addl 60(%rsp),%ebp + pslld $2,%xmm9 + pxor %xmm10,%xmm7 + xorl %ecx,%ebx + roll $5,%eax + movdqa -32(%r11),%xmm10 + addl %edi,%ebp + andl %ebx,%esi + pxor %xmm9,%xmm7 + xorl %ecx,%ebx + addl %eax,%ebp + movdqa %xmm7,%xmm9 + rorl $7,%eax + pxor %xmm4,%xmm0 +.byte 102,68,15,58,15,206,8 + xorl %ecx,%esi + movl %ebp,%edi + addl 0(%rsp),%edx + pxor %xmm1,%xmm0 + xorl %ebx,%eax + roll $5,%ebp + movdqa %xmm10,%xmm8 + paddd %xmm7,%xmm10 + addl %esi,%edx + andl %eax,%edi + pxor %xmm9,%xmm0 + xorl %ebx,%eax + addl %ebp,%edx + rorl $7,%ebp + xorl %ebx,%edi + movdqa %xmm0,%xmm9 + movdqa %xmm10,48(%rsp) + movl %edx,%esi + addl 4(%rsp),%ecx + xorl %eax,%ebp + roll $5,%edx + pslld $2,%xmm0 + addl %edi,%ecx + andl %ebp,%esi + psrld $30,%xmm9 + xorl %eax,%ebp + addl %edx,%ecx + rorl $7,%edx + xorl %eax,%esi + movl %ecx,%edi + addl 8(%rsp),%ebx + por %xmm9,%xmm0 + xorl %ebp,%edx + roll $5,%ecx + movdqa %xmm0,%xmm10 + addl %esi,%ebx + andl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 12(%rsp),%eax + xorl %ebp,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 16(%rsp),%ebp + pxor %xmm5,%xmm1 +.byte 102,68,15,58,15,215,8 + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + pxor %xmm2,%xmm1 + addl %esi,%ebp + xorl %ecx,%edi + movdqa %xmm8,%xmm9 + paddd %xmm0,%xmm8 + rorl $7,%ebx + addl %eax,%ebp + pxor %xmm10,%xmm1 + addl 20(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm1,%xmm10 + movdqa %xmm8,0(%rsp) + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + pslld $2,%xmm1 + addl 24(%rsp),%ecx + xorl %eax,%esi + psrld $30,%xmm10 + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + addl %edx,%ecx + por %xmm10,%xmm1 + addl 28(%rsp),%ebx + xorl %ebp,%edi + movdqa %xmm1,%xmm8 + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 32(%rsp),%eax + pxor %xmm6,%xmm2 +.byte 102,68,15,58,15,192,8 + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + pxor %xmm3,%xmm2 + addl %esi,%eax + xorl %edx,%edi + movdqa 0(%r11),%xmm10 + paddd %xmm1,%xmm9 + rorl $7,%ecx + addl %ebx,%eax + pxor %xmm8,%xmm2 + addl 36(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + movdqa %xmm2,%xmm8 + movdqa %xmm9,16(%rsp) + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + pslld $2,%xmm2 + addl 40(%rsp),%edx + xorl %ebx,%esi + psrld $30,%xmm8 + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + por %xmm8,%xmm2 + addl 44(%rsp),%ecx + xorl %eax,%edi + movdqa %xmm2,%xmm9 + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + addl 48(%rsp),%ebx + pxor %xmm7,%xmm3 +.byte 102,68,15,58,15,201,8 + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + pxor %xmm4,%xmm3 + addl %esi,%ebx + xorl %ebp,%edi + movdqa %xmm10,%xmm8 + paddd %xmm2,%xmm10 + rorl $7,%edx + addl %ecx,%ebx + pxor %xmm9,%xmm3 + addl 52(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + movdqa %xmm3,%xmm9 + movdqa %xmm10,32(%rsp) + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + pslld $2,%xmm3 + addl 56(%rsp),%ebp + xorl %ecx,%esi + psrld $30,%xmm9 + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + por %xmm9,%xmm3 + addl 60(%rsp),%edx + xorl %ebx,%edi + movdqa %xmm3,%xmm10 + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 0(%rsp),%ecx + pxor %xmm0,%xmm4 +.byte 102,68,15,58,15,210,8 + xorl %eax,%esi + movl %edx,%edi + roll $5,%edx + pxor %xmm5,%xmm4 + addl %esi,%ecx + xorl %eax,%edi + movdqa %xmm8,%xmm9 + paddd %xmm3,%xmm8 + rorl $7,%ebp + addl %edx,%ecx + pxor %xmm10,%xmm4 + addl 4(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + movdqa %xmm4,%xmm10 + movdqa %xmm8,48(%rsp) + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + pslld $2,%xmm4 + addl 8(%rsp),%eax + xorl %edx,%esi + psrld $30,%xmm10 + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + por %xmm10,%xmm4 + addl 12(%rsp),%ebp + xorl %ecx,%edi + movdqa %xmm4,%xmm8 + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 16(%rsp),%edx + pxor %xmm1,%xmm5 +.byte 102,68,15,58,15,195,8 + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + pxor %xmm6,%xmm5 + addl %esi,%edx + xorl %ebx,%edi + movdqa %xmm9,%xmm10 + paddd %xmm4,%xmm9 + rorl $7,%eax + addl %ebp,%edx + pxor %xmm8,%xmm5 + addl 20(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + movdqa %xmm5,%xmm8 + movdqa %xmm9,0(%rsp) + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + pslld $2,%xmm5 + addl 24(%rsp),%ebx + xorl %ebp,%esi + psrld $30,%xmm8 + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + por %xmm8,%xmm5 + addl 28(%rsp),%eax + movdqa %xmm5,%xmm9 + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%edi + roll $5,%ebx + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + addl 32(%rsp),%ebp + pxor %xmm2,%xmm6 +.byte 102,68,15,58,15,204,8 + andl %ecx,%esi + xorl %edx,%ecx + rorl $7,%ebx + pxor %xmm7,%xmm6 + movl %eax,%edi + xorl %ecx,%esi + movdqa %xmm10,%xmm8 + paddd %xmm5,%xmm10 + roll $5,%eax + addl %esi,%ebp + pxor %xmm9,%xmm6 + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 36(%rsp),%edx + movdqa %xmm6,%xmm9 + movdqa %xmm10,16(%rsp) + andl %ebx,%edi + xorl %ecx,%ebx + rorl $7,%eax + movl %ebp,%esi + pslld $2,%xmm6 + xorl %ebx,%edi + roll $5,%ebp + psrld $30,%xmm9 + addl %edi,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + addl 40(%rsp),%ecx + andl %eax,%esi + por %xmm9,%xmm6 + xorl %ebx,%eax + rorl $7,%ebp + movdqa %xmm6,%xmm10 + movl %edx,%edi + xorl %eax,%esi + roll $5,%edx + addl %esi,%ecx + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 44(%rsp),%ebx + andl %ebp,%edi + xorl %eax,%ebp + rorl $7,%edx + movl %ecx,%esi + xorl %ebp,%edi + roll $5,%ecx + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + addl 48(%rsp),%eax + pxor %xmm3,%xmm7 +.byte 102,68,15,58,15,213,8 + andl %edx,%esi + xorl %ebp,%edx + rorl $7,%ecx + pxor %xmm0,%xmm7 + movl %ebx,%edi + xorl %edx,%esi + movdqa 32(%r11),%xmm9 + paddd %xmm6,%xmm8 + roll $5,%ebx + addl %esi,%eax + pxor %xmm10,%xmm7 + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 52(%rsp),%ebp + movdqa %xmm7,%xmm10 + movdqa %xmm8,32(%rsp) + andl %ecx,%edi + xorl %edx,%ecx + rorl $7,%ebx + movl %eax,%esi + pslld $2,%xmm7 + xorl %ecx,%edi + roll $5,%eax + psrld $30,%xmm10 + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + addl 56(%rsp),%edx + andl %ebx,%esi + por %xmm10,%xmm7 + xorl %ecx,%ebx + rorl $7,%eax + movdqa %xmm7,%xmm8 + movl %ebp,%edi + xorl %ebx,%esi + roll $5,%ebp + addl %esi,%edx + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 60(%rsp),%ecx + andl %eax,%edi + xorl %ebx,%eax + rorl $7,%ebp + movl %edx,%esi + xorl %eax,%edi + roll $5,%edx + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + addl 0(%rsp),%ebx + pxor %xmm4,%xmm0 +.byte 102,68,15,58,15,198,8 + andl %ebp,%esi + xorl %eax,%ebp + rorl $7,%edx + pxor %xmm1,%xmm0 + movl %ecx,%edi + xorl %ebp,%esi + movdqa %xmm9,%xmm10 + paddd %xmm7,%xmm9 + roll $5,%ecx + addl %esi,%ebx + pxor %xmm8,%xmm0 + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 4(%rsp),%eax + movdqa %xmm0,%xmm8 + movdqa %xmm9,48(%rsp) + andl %edx,%edi + xorl %ebp,%edx + rorl $7,%ecx + movl %ebx,%esi + pslld $2,%xmm0 + xorl %edx,%edi + roll $5,%ebx + psrld $30,%xmm8 + addl %edi,%eax + xorl %ecx,%esi + xorl %edx,%ecx + addl %ebx,%eax + addl 8(%rsp),%ebp + andl %ecx,%esi + por %xmm8,%xmm0 + xorl %edx,%ecx + rorl $7,%ebx + movdqa %xmm0,%xmm9 + movl %eax,%edi + xorl %ecx,%esi + roll $5,%eax + addl %esi,%ebp + xorl %ebx,%edi + xorl %ecx,%ebx + addl %eax,%ebp + addl 12(%rsp),%edx + andl %ebx,%edi + xorl %ecx,%ebx + rorl $7,%eax + movl %ebp,%esi + xorl %ebx,%edi + roll $5,%ebp + addl %edi,%edx + xorl %eax,%esi + xorl %ebx,%eax + addl %ebp,%edx + addl 16(%rsp),%ecx + pxor %xmm5,%xmm1 +.byte 102,68,15,58,15,207,8 + andl %eax,%esi + xorl %ebx,%eax + rorl $7,%ebp + pxor %xmm2,%xmm1 + movl %edx,%edi + xorl %eax,%esi + movdqa %xmm10,%xmm8 + paddd %xmm0,%xmm10 + roll $5,%edx + addl %esi,%ecx + pxor %xmm9,%xmm1 + xorl %ebp,%edi + xorl %eax,%ebp + addl %edx,%ecx + addl 20(%rsp),%ebx + movdqa %xmm1,%xmm9 + movdqa %xmm10,0(%rsp) + andl %ebp,%edi + xorl %eax,%ebp + rorl $7,%edx + movl %ecx,%esi + pslld $2,%xmm1 + xorl %ebp,%edi + roll $5,%ecx + psrld $30,%xmm9 + addl %edi,%ebx + xorl %edx,%esi + xorl %ebp,%edx + addl %ecx,%ebx + addl 24(%rsp),%eax + andl %edx,%esi + por %xmm9,%xmm1 + xorl %ebp,%edx + rorl $7,%ecx + movdqa %xmm1,%xmm10 + movl %ebx,%edi + xorl %edx,%esi + roll $5,%ebx + addl %esi,%eax + xorl %ecx,%edi + xorl %edx,%ecx + addl %ebx,%eax + addl 28(%rsp),%ebp + andl %ecx,%edi + xorl %edx,%ecx + rorl $7,%ebx + movl %eax,%esi + xorl %ecx,%edi + roll $5,%eax + addl %edi,%ebp + xorl %ebx,%esi + xorl %ecx,%ebx + addl %eax,%ebp + addl 32(%rsp),%edx + pxor %xmm6,%xmm2 +.byte 102,68,15,58,15,208,8 + andl %ebx,%esi + xorl %ecx,%ebx + rorl $7,%eax + pxor %xmm3,%xmm2 + movl %ebp,%edi + xorl %ebx,%esi + movdqa %xmm8,%xmm9 + paddd %xmm1,%xmm8 + roll $5,%ebp + addl %esi,%edx + pxor %xmm10,%xmm2 + xorl %eax,%edi + xorl %ebx,%eax + addl %ebp,%edx + addl 36(%rsp),%ecx + movdqa %xmm2,%xmm10 + movdqa %xmm8,16(%rsp) + andl %eax,%edi + xorl %ebx,%eax + rorl $7,%ebp + movl %edx,%esi + pslld $2,%xmm2 + xorl %eax,%edi + roll $5,%edx + psrld $30,%xmm10 + addl %edi,%ecx + xorl %ebp,%esi + xorl %eax,%ebp + addl %edx,%ecx + addl 40(%rsp),%ebx + andl %ebp,%esi + por %xmm10,%xmm2 + xorl %eax,%ebp + rorl $7,%edx + movdqa %xmm2,%xmm8 + movl %ecx,%edi + xorl %ebp,%esi + roll $5,%ecx + addl %esi,%ebx + xorl %edx,%edi + xorl %ebp,%edx + addl %ecx,%ebx + addl 44(%rsp),%eax + andl %edx,%edi + xorl %ebp,%edx + rorl $7,%ecx + movl %ebx,%esi + xorl %edx,%edi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + addl %ebx,%eax + addl 48(%rsp),%ebp + pxor %xmm7,%xmm3 +.byte 102,68,15,58,15,193,8 + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + pxor %xmm4,%xmm3 + addl %esi,%ebp + xorl %ecx,%edi + movdqa %xmm9,%xmm10 + paddd %xmm2,%xmm9 + rorl $7,%ebx + addl %eax,%ebp + pxor %xmm8,%xmm3 + addl 52(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + movdqa %xmm3,%xmm8 + movdqa %xmm9,32(%rsp) + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + pslld $2,%xmm3 + addl 56(%rsp),%ecx + xorl %eax,%esi + psrld $30,%xmm8 + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + addl %edx,%ecx + por %xmm8,%xmm3 + addl 60(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 0(%rsp),%eax + paddd %xmm3,%xmm10 + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + movdqa %xmm10,48(%rsp) + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 4(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 8(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 12(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + cmpq %r10,%r9 + je L$done_ssse3 + movdqa 64(%r11),%xmm6 + movdqa -64(%r11),%xmm9 + movdqu 0(%r9),%xmm0 + movdqu 16(%r9),%xmm1 + movdqu 32(%r9),%xmm2 + movdqu 48(%r9),%xmm3 +.byte 102,15,56,0,198 + addq $64,%r9 + addl 16(%rsp),%ebx + xorl %ebp,%esi +.byte 102,15,56,0,206 + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + paddd %xmm9,%xmm0 + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 20(%rsp),%eax + movdqa %xmm0,0(%rsp) + xorl %edx,%edi + movl %ebx,%esi + psubd %xmm9,%xmm0 + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi +.byte 102,15,56,0,214 + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + paddd %xmm9,%xmm1 + xorl %eax,%edi + rorl $7,%ebp + addl %edx,%ecx + addl 36(%rsp),%ebx + movdqa %xmm1,16(%rsp) + xorl %ebp,%edi + movl %ecx,%esi + psubd %xmm9,%xmm1 + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi +.byte 102,15,56,0,222 + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + paddd %xmm9,%xmm2 + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 52(%rsp),%ecx + movdqa %xmm2,32(%rsp) + xorl %eax,%edi + movl %edx,%esi + psubd %xmm9,%xmm2 + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + addl 12(%r8),%edx + movl %eax,0(%r8) + addl 16(%r8),%ebp + movl %esi,4(%r8) + movl %esi,%ebx + movl %ecx,8(%r8) + movl %ecx,%edi + movl %edx,12(%r8) + xorl %edx,%edi + movl %ebp,16(%r8) + andl %edi,%esi + jmp L$oop_ssse3 + +.p2align 4 +L$done_ssse3: + addl 16(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 20(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + xorl %edx,%esi + rorl $7,%ecx + addl %ebx,%eax + addl 24(%rsp),%ebp + xorl %ecx,%esi + movl %eax,%edi + roll $5,%eax + addl %esi,%ebp + xorl %ecx,%edi + rorl $7,%ebx + addl %eax,%ebp + addl 28(%rsp),%edx + xorl %ebx,%edi + movl %ebp,%esi + roll $5,%ebp + addl %edi,%edx + xorl %ebx,%esi + rorl $7,%eax + addl %ebp,%edx + addl 32(%rsp),%ecx + xorl %eax,%esi + movl %edx,%edi + roll $5,%edx + addl %esi,%ecx + xorl %eax,%edi + rorl $7,%ebp + addl %edx,%ecx + addl 36(%rsp),%ebx + xorl %ebp,%edi + movl %ecx,%esi + roll $5,%ecx + addl %edi,%ebx + xorl %ebp,%esi + rorl $7,%edx + addl %ecx,%ebx + addl 40(%rsp),%eax + xorl %edx,%esi + movl %ebx,%edi + roll $5,%ebx + addl %esi,%eax + xorl %edx,%edi + rorl $7,%ecx + addl %ebx,%eax + addl 44(%rsp),%ebp + xorl %ecx,%edi + movl %eax,%esi + roll $5,%eax + addl %edi,%ebp + xorl %ecx,%esi + rorl $7,%ebx + addl %eax,%ebp + addl 48(%rsp),%edx + xorl %ebx,%esi + movl %ebp,%edi + roll $5,%ebp + addl %esi,%edx + xorl %ebx,%edi + rorl $7,%eax + addl %ebp,%edx + addl 52(%rsp),%ecx + xorl %eax,%edi + movl %edx,%esi + roll $5,%edx + addl %edi,%ecx + xorl %eax,%esi + rorl $7,%ebp + addl %edx,%ecx + addl 56(%rsp),%ebx + xorl %ebp,%esi + movl %ecx,%edi + roll $5,%ecx + addl %esi,%ebx + xorl %ebp,%edi + rorl $7,%edx + addl %ecx,%ebx + addl 60(%rsp),%eax + xorl %edx,%edi + movl %ebx,%esi + roll $5,%ebx + addl %edi,%eax + rorl $7,%ecx + addl %ebx,%eax + addl 0(%r8),%eax + addl 4(%r8),%esi + addl 8(%r8),%ecx + movl %eax,0(%r8) + addl 12(%r8),%edx + movl %esi,4(%r8) + addl 16(%r8),%ebp + movl %ecx,8(%r8) + movl %edx,12(%r8) + movl %ebp,16(%r8) + leaq 64(%rsp),%rsi + movq 0(%rsi),%r12 + movq 8(%rsi),%rbp + movq 16(%rsi),%rbx + leaq 24(%rsi),%rsp +L$epilogue_ssse3: + .byte 0xf3,0xc3 + +.p2align 6 +K_XX_XX: +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.p2align 6 + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/macosx/sha256-avx-x86_64.s b/lib/accelerated/x86/macosx/sha256-avx-x86_64.s new file mode 100644 index 0000000000..bd53138cf6 --- /dev/null +++ b/lib/accelerated/x86/macosx/sha256-avx-x86_64.s @@ -0,0 +1,2614 @@ +# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org> +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain copyright notices, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# * Neither the name of the Andy Polyakov nor the names of its +# copyright holder and contributors may be used to endorse or +# promote products derived from this software without specific +# prior written permission. +# +# ALTERNATIVELY, provided that this notice is retained in full, this +# product may be distributed under the terms of the GNU General Public +# License (GPL), in which case the provisions of the GPL apply INSTEAD OF +# those given above. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# *** This file is auto-generated *** +# +.text + + + +.globl _sha256_multi_block + +.p2align 5 +_sha256_multi_block: + movq %rsp,%rax + pushq %rbx + pushq %rbp + subq $288,%rsp + andq $-256,%rsp + movq %rax,272(%rsp) + leaq K256+128(%rip),%rbp + leaq 256(%rsp),%rbx + leaq 128(%rdi),%rdi + +L$oop_grande: + movl %edx,280(%rsp) + xorl %edx,%edx + movq 0(%rsi),%r8 + movl 8(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,0(%rbx) + cmovleq %rbp,%r8 + movq 16(%rsi),%r9 + movl 24(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,4(%rbx) + cmovleq %rbp,%r9 + movq 32(%rsi),%r10 + movl 40(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,8(%rbx) + cmovleq %rbp,%r10 + movq 48(%rsi),%r11 + movl 56(%rsi),%ecx + cmpl %edx,%ecx + cmovgl %ecx,%edx + testl %ecx,%ecx + movl %ecx,12(%rbx) + cmovleq %rbp,%r11 + testl %edx,%edx + jz L$done + + movdqu 0-128(%rdi),%xmm8 + leaq 128(%rsp),%rax + movdqu 32-128(%rdi),%xmm9 + movdqu 64-128(%rdi),%xmm10 + movdqu 96-128(%rdi),%xmm11 + movdqu 128-128(%rdi),%xmm12 + movdqu 160-128(%rdi),%xmm13 + movdqu 192-128(%rdi),%xmm14 + movdqu 224-128(%rdi),%xmm15 + movdqu L$pbswap(%rip),%xmm6 + jmp L$oop + +.p2align 5 +L$oop: + movdqa %xmm10,%xmm4 + pxor %xmm9,%xmm4 + movd 0(%r8),%xmm5 + movd 0(%r9),%xmm0 + movd 0(%r10),%xmm1 + movd 0(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm12,%xmm7 + movdqa %xmm12,%xmm2 + psrld $6,%xmm7 + movdqa %xmm12,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,0-128(%rax) + paddd %xmm15,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -128(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm12,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm14,%xmm0 + pand %xmm13,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm8,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm9,%xmm3 + movdqa %xmm8,%xmm7 + pslld $10,%xmm2 + pxor %xmm8,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm9,%xmm15 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm15 + paddd %xmm5,%xmm11 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm15 + paddd %xmm7,%xmm15 + movd 4(%r8),%xmm5 + movd 4(%r9),%xmm0 + movd 4(%r10),%xmm1 + movd 4(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm11,%xmm7 + movdqa %xmm11,%xmm2 + psrld $6,%xmm7 + movdqa %xmm11,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,16-128(%rax) + paddd %xmm14,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -96(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm11,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm13,%xmm0 + pand %xmm12,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm15,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm15,%xmm7 + pslld $10,%xmm2 + pxor %xmm15,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm8,%xmm14 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm14 + paddd %xmm5,%xmm10 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm14 + paddd %xmm7,%xmm14 + movd 8(%r8),%xmm5 + movd 8(%r9),%xmm0 + movd 8(%r10),%xmm1 + movd 8(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm10,%xmm7 + movdqa %xmm10,%xmm2 + psrld $6,%xmm7 + movdqa %xmm10,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,32-128(%rax) + paddd %xmm13,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm10,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm12,%xmm0 + pand %xmm11,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm14,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm15,%xmm3 + movdqa %xmm14,%xmm7 + pslld $10,%xmm2 + pxor %xmm14,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm15,%xmm13 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm13 + paddd %xmm5,%xmm9 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm13 + paddd %xmm7,%xmm13 + movd 12(%r8),%xmm5 + movd 12(%r9),%xmm0 + movd 12(%r10),%xmm1 + movd 12(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm9,%xmm7 + movdqa %xmm9,%xmm2 + psrld $6,%xmm7 + movdqa %xmm9,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,48-128(%rax) + paddd %xmm12,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -32(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm9,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm11,%xmm0 + pand %xmm10,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm13,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm14,%xmm4 + movdqa %xmm13,%xmm7 + pslld $10,%xmm2 + pxor %xmm13,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm14,%xmm12 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm12 + paddd %xmm5,%xmm8 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm12 + paddd %xmm7,%xmm12 + movd 16(%r8),%xmm5 + movd 16(%r9),%xmm0 + movd 16(%r10),%xmm1 + movd 16(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm8,%xmm7 + movdqa %xmm8,%xmm2 + psrld $6,%xmm7 + movdqa %xmm8,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,64-128(%rax) + paddd %xmm11,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 0(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm8,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm10,%xmm0 + pand %xmm9,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm12,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm13,%xmm3 + movdqa %xmm12,%xmm7 + pslld $10,%xmm2 + pxor %xmm12,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm13,%xmm11 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm11 + paddd %xmm5,%xmm15 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm11 + paddd %xmm7,%xmm11 + movd 20(%r8),%xmm5 + movd 20(%r9),%xmm0 + movd 20(%r10),%xmm1 + movd 20(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm15,%xmm7 + movdqa %xmm15,%xmm2 + psrld $6,%xmm7 + movdqa %xmm15,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,80-128(%rax) + paddd %xmm10,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 32(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm15,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm9,%xmm0 + pand %xmm8,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm11,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm12,%xmm4 + movdqa %xmm11,%xmm7 + pslld $10,%xmm2 + pxor %xmm11,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm12,%xmm10 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm10 + paddd %xmm5,%xmm14 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm10 + paddd %xmm7,%xmm10 + movd 24(%r8),%xmm5 + movd 24(%r9),%xmm0 + movd 24(%r10),%xmm1 + movd 24(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm14,%xmm7 + movdqa %xmm14,%xmm2 + psrld $6,%xmm7 + movdqa %xmm14,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,96-128(%rax) + paddd %xmm9,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm14,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm8,%xmm0 + pand %xmm15,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm10,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm11,%xmm3 + movdqa %xmm10,%xmm7 + pslld $10,%xmm2 + pxor %xmm10,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm11,%xmm9 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm9 + paddd %xmm5,%xmm13 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm9 + paddd %xmm7,%xmm9 + movd 28(%r8),%xmm5 + movd 28(%r9),%xmm0 + movd 28(%r10),%xmm1 + movd 28(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm13,%xmm7 + movdqa %xmm13,%xmm2 + psrld $6,%xmm7 + movdqa %xmm13,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,112-128(%rax) + paddd %xmm8,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 96(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm13,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm15,%xmm0 + pand %xmm14,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm9,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm10,%xmm4 + movdqa %xmm9,%xmm7 + pslld $10,%xmm2 + pxor %xmm9,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm10,%xmm8 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm8 + paddd %xmm5,%xmm12 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm8 + paddd %xmm7,%xmm8 + leaq 256(%rbp),%rbp + movd 32(%r8),%xmm5 + movd 32(%r9),%xmm0 + movd 32(%r10),%xmm1 + movd 32(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm12,%xmm7 + movdqa %xmm12,%xmm2 + psrld $6,%xmm7 + movdqa %xmm12,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,128-128(%rax) + paddd %xmm15,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -128(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm12,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm14,%xmm0 + pand %xmm13,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm8,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm9,%xmm3 + movdqa %xmm8,%xmm7 + pslld $10,%xmm2 + pxor %xmm8,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm9,%xmm15 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm15 + paddd %xmm5,%xmm11 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm15 + paddd %xmm7,%xmm15 + movd 36(%r8),%xmm5 + movd 36(%r9),%xmm0 + movd 36(%r10),%xmm1 + movd 36(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm11,%xmm7 + movdqa %xmm11,%xmm2 + psrld $6,%xmm7 + movdqa %xmm11,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,144-128(%rax) + paddd %xmm14,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -96(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm11,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm13,%xmm0 + pand %xmm12,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm15,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm15,%xmm7 + pslld $10,%xmm2 + pxor %xmm15,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm8,%xmm14 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm14 + paddd %xmm5,%xmm10 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm14 + paddd %xmm7,%xmm14 + movd 40(%r8),%xmm5 + movd 40(%r9),%xmm0 + movd 40(%r10),%xmm1 + movd 40(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm10,%xmm7 + movdqa %xmm10,%xmm2 + psrld $6,%xmm7 + movdqa %xmm10,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,160-128(%rax) + paddd %xmm13,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm10,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm12,%xmm0 + pand %xmm11,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm14,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm15,%xmm3 + movdqa %xmm14,%xmm7 + pslld $10,%xmm2 + pxor %xmm14,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm15,%xmm13 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm13 + paddd %xmm5,%xmm9 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm13 + paddd %xmm7,%xmm13 + movd 44(%r8),%xmm5 + movd 44(%r9),%xmm0 + movd 44(%r10),%xmm1 + movd 44(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm9,%xmm7 + movdqa %xmm9,%xmm2 + psrld $6,%xmm7 + movdqa %xmm9,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,176-128(%rax) + paddd %xmm12,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -32(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm9,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm11,%xmm0 + pand %xmm10,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm13,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm14,%xmm4 + movdqa %xmm13,%xmm7 + pslld $10,%xmm2 + pxor %xmm13,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm14,%xmm12 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm12 + paddd %xmm5,%xmm8 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm12 + paddd %xmm7,%xmm12 + movd 48(%r8),%xmm5 + movd 48(%r9),%xmm0 + movd 48(%r10),%xmm1 + movd 48(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm8,%xmm7 + movdqa %xmm8,%xmm2 + psrld $6,%xmm7 + movdqa %xmm8,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,192-128(%rax) + paddd %xmm11,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 0(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm8,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm10,%xmm0 + pand %xmm9,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm12,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm13,%xmm3 + movdqa %xmm12,%xmm7 + pslld $10,%xmm2 + pxor %xmm12,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm13,%xmm11 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm11 + paddd %xmm5,%xmm15 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm11 + paddd %xmm7,%xmm11 + movd 52(%r8),%xmm5 + movd 52(%r9),%xmm0 + movd 52(%r10),%xmm1 + movd 52(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm15,%xmm7 + movdqa %xmm15,%xmm2 + psrld $6,%xmm7 + movdqa %xmm15,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,208-128(%rax) + paddd %xmm10,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 32(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm15,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm9,%xmm0 + pand %xmm8,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm11,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm12,%xmm4 + movdqa %xmm11,%xmm7 + pslld $10,%xmm2 + pxor %xmm11,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm12,%xmm10 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm10 + paddd %xmm5,%xmm14 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm10 + paddd %xmm7,%xmm10 + movd 56(%r8),%xmm5 + movd 56(%r9),%xmm0 + movd 56(%r10),%xmm1 + movd 56(%r11),%xmm2 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm14,%xmm7 + movdqa %xmm14,%xmm2 + psrld $6,%xmm7 + movdqa %xmm14,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,224-128(%rax) + paddd %xmm9,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm14,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm8,%xmm0 + pand %xmm15,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm10,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm11,%xmm3 + movdqa %xmm10,%xmm7 + pslld $10,%xmm2 + pxor %xmm10,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm11,%xmm9 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm9 + paddd %xmm5,%xmm13 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm9 + paddd %xmm7,%xmm9 + movd 60(%r8),%xmm5 + leaq 64(%r8),%r8 + movd 60(%r9),%xmm0 + leaq 64(%r9),%r9 + movd 60(%r10),%xmm1 + leaq 64(%r10),%r10 + movd 60(%r11),%xmm2 + leaq 64(%r11),%r11 + punpckldq %xmm1,%xmm5 + punpckldq %xmm2,%xmm0 + punpckldq %xmm0,%xmm5 +.byte 102,15,56,0,238 + movdqa %xmm13,%xmm7 + movdqa %xmm13,%xmm2 + psrld $6,%xmm7 + movdqa %xmm13,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,240-128(%rax) + paddd %xmm8,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 96(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm13,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm15,%xmm0 + pand %xmm14,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm9,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm4,%xmm0 + movdqa %xmm10,%xmm4 + movdqa %xmm9,%xmm7 + pslld $10,%xmm2 + pxor %xmm9,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm10,%xmm8 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm8 + paddd %xmm5,%xmm12 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm8 + paddd %xmm7,%xmm8 + leaq 256(%rbp),%rbp + movdqu 0-128(%rax),%xmm5 + movl $3,%ecx + jmp L$oop_16_xx +.p2align 5 +L$oop_16_xx: + movdqa 16-128(%rax),%xmm6 + paddd 144-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 224-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm12,%xmm7 + movdqa %xmm12,%xmm2 + psrld $6,%xmm7 + movdqa %xmm12,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,0-128(%rax) + paddd %xmm15,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -128(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm12,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm14,%xmm0 + pand %xmm13,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm8,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm9,%xmm3 + movdqa %xmm8,%xmm7 + pslld $10,%xmm2 + pxor %xmm8,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm9,%xmm15 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm15 + paddd %xmm5,%xmm11 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm15 + paddd %xmm7,%xmm15 + movdqa 32-128(%rax),%xmm5 + paddd 160-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 240-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm11,%xmm7 + movdqa %xmm11,%xmm2 + psrld $6,%xmm7 + movdqa %xmm11,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,16-128(%rax) + paddd %xmm14,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -96(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm11,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm13,%xmm0 + pand %xmm12,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm15,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm15,%xmm7 + pslld $10,%xmm2 + pxor %xmm15,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm8,%xmm14 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm14 + paddd %xmm6,%xmm10 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm14 + paddd %xmm7,%xmm14 + movdqa 48-128(%rax),%xmm6 + paddd 176-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 0-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm10,%xmm7 + movdqa %xmm10,%xmm2 + psrld $6,%xmm7 + movdqa %xmm10,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,32-128(%rax) + paddd %xmm13,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm10,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm12,%xmm0 + pand %xmm11,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm14,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm15,%xmm3 + movdqa %xmm14,%xmm7 + pslld $10,%xmm2 + pxor %xmm14,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm15,%xmm13 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm13 + paddd %xmm5,%xmm9 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm13 + paddd %xmm7,%xmm13 + movdqa 64-128(%rax),%xmm5 + paddd 192-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 16-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm9,%xmm7 + movdqa %xmm9,%xmm2 + psrld $6,%xmm7 + movdqa %xmm9,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,48-128(%rax) + paddd %xmm12,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -32(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm9,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm11,%xmm0 + pand %xmm10,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm13,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm14,%xmm4 + movdqa %xmm13,%xmm7 + pslld $10,%xmm2 + pxor %xmm13,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm14,%xmm12 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm12 + paddd %xmm6,%xmm8 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm12 + paddd %xmm7,%xmm12 + movdqa 80-128(%rax),%xmm6 + paddd 208-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 32-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm8,%xmm7 + movdqa %xmm8,%xmm2 + psrld $6,%xmm7 + movdqa %xmm8,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,64-128(%rax) + paddd %xmm11,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 0(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm8,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm10,%xmm0 + pand %xmm9,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm12,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm13,%xmm3 + movdqa %xmm12,%xmm7 + pslld $10,%xmm2 + pxor %xmm12,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm13,%xmm11 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm11 + paddd %xmm5,%xmm15 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm11 + paddd %xmm7,%xmm11 + movdqa 96-128(%rax),%xmm5 + paddd 224-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 48-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm15,%xmm7 + movdqa %xmm15,%xmm2 + psrld $6,%xmm7 + movdqa %xmm15,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,80-128(%rax) + paddd %xmm10,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 32(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm15,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm9,%xmm0 + pand %xmm8,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm11,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm12,%xmm4 + movdqa %xmm11,%xmm7 + pslld $10,%xmm2 + pxor %xmm11,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm12,%xmm10 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm10 + paddd %xmm6,%xmm14 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm10 + paddd %xmm7,%xmm10 + movdqa 112-128(%rax),%xmm6 + paddd 240-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 64-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm14,%xmm7 + movdqa %xmm14,%xmm2 + psrld $6,%xmm7 + movdqa %xmm14,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,96-128(%rax) + paddd %xmm9,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm14,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm8,%xmm0 + pand %xmm15,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm10,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm11,%xmm3 + movdqa %xmm10,%xmm7 + pslld $10,%xmm2 + pxor %xmm10,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm11,%xmm9 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm9 + paddd %xmm5,%xmm13 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm9 + paddd %xmm7,%xmm9 + movdqa 128-128(%rax),%xmm5 + paddd 0-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 80-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm13,%xmm7 + movdqa %xmm13,%xmm2 + psrld $6,%xmm7 + movdqa %xmm13,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,112-128(%rax) + paddd %xmm8,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 96(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm13,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm15,%xmm0 + pand %xmm14,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm9,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm10,%xmm4 + movdqa %xmm9,%xmm7 + pslld $10,%xmm2 + pxor %xmm9,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm10,%xmm8 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm8 + paddd %xmm6,%xmm12 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm8 + paddd %xmm7,%xmm8 + leaq 256(%rbp),%rbp + movdqa 144-128(%rax),%xmm6 + paddd 16-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 96-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm12,%xmm7 + movdqa %xmm12,%xmm2 + psrld $6,%xmm7 + movdqa %xmm12,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,128-128(%rax) + paddd %xmm15,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -128(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm12,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm14,%xmm0 + pand %xmm13,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm8,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm9,%xmm3 + movdqa %xmm8,%xmm7 + pslld $10,%xmm2 + pxor %xmm8,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm9,%xmm15 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm15 + paddd %xmm5,%xmm11 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm15 + paddd %xmm7,%xmm15 + movdqa 160-128(%rax),%xmm5 + paddd 32-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 112-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm11,%xmm7 + movdqa %xmm11,%xmm2 + psrld $6,%xmm7 + movdqa %xmm11,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,144-128(%rax) + paddd %xmm14,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -96(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm11,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm13,%xmm0 + pand %xmm12,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm15,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm8,%xmm4 + movdqa %xmm15,%xmm7 + pslld $10,%xmm2 + pxor %xmm15,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm8,%xmm14 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm14 + paddd %xmm6,%xmm10 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm14 + paddd %xmm7,%xmm14 + movdqa 176-128(%rax),%xmm6 + paddd 48-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 128-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm10,%xmm7 + movdqa %xmm10,%xmm2 + psrld $6,%xmm7 + movdqa %xmm10,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,160-128(%rax) + paddd %xmm13,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm10,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm12,%xmm0 + pand %xmm11,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm14,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm15,%xmm3 + movdqa %xmm14,%xmm7 + pslld $10,%xmm2 + pxor %xmm14,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm15,%xmm13 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm13 + paddd %xmm5,%xmm9 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm13 + paddd %xmm7,%xmm13 + movdqa 192-128(%rax),%xmm5 + paddd 64-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 144-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm9,%xmm7 + movdqa %xmm9,%xmm2 + psrld $6,%xmm7 + movdqa %xmm9,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,176-128(%rax) + paddd %xmm12,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd -32(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm9,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm11,%xmm0 + pand %xmm10,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm13,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm14,%xmm4 + movdqa %xmm13,%xmm7 + pslld $10,%xmm2 + pxor %xmm13,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm14,%xmm12 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm12 + paddd %xmm6,%xmm8 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm12 + paddd %xmm7,%xmm12 + movdqa 208-128(%rax),%xmm6 + paddd 80-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 160-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm8,%xmm7 + movdqa %xmm8,%xmm2 + psrld $6,%xmm7 + movdqa %xmm8,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,192-128(%rax) + paddd %xmm11,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 0(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm8,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm8,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm10,%xmm0 + pand %xmm9,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm12,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm12,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm13,%xmm3 + movdqa %xmm12,%xmm7 + pslld $10,%xmm2 + pxor %xmm12,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm13,%xmm11 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm11 + paddd %xmm5,%xmm15 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm11 + paddd %xmm7,%xmm11 + movdqa 224-128(%rax),%xmm5 + paddd 96-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 176-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm15,%xmm7 + movdqa %xmm15,%xmm2 + psrld $6,%xmm7 + movdqa %xmm15,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,208-128(%rax) + paddd %xmm10,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 32(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm15,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm15,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm9,%xmm0 + pand %xmm8,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm11,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm11,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm12,%xmm4 + movdqa %xmm11,%xmm7 + pslld $10,%xmm2 + pxor %xmm11,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm12,%xmm10 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm10 + paddd %xmm6,%xmm14 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm10 + paddd %xmm7,%xmm10 + movdqa 240-128(%rax),%xmm6 + paddd 112-128(%rax),%xmm5 + + movdqa %xmm6,%xmm7 + movdqa %xmm6,%xmm1 + psrld $3,%xmm7 + movdqa %xmm6,%xmm2 + + psrld $7,%xmm1 + movdqa 192-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm3 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm3,%xmm1 + + psrld $17,%xmm3 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + psrld $19-17,%xmm3 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm3,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm5 + movdqa %xmm14,%xmm7 + movdqa %xmm14,%xmm2 + psrld $6,%xmm7 + movdqa %xmm14,%xmm1 + pslld $7,%xmm2 + movdqa %xmm5,224-128(%rax) + paddd %xmm9,%xmm5 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 64(%rbp),%xmm5 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm14,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm14,%xmm3 + pslld $26-21,%xmm2 + pandn %xmm8,%xmm0 + pand %xmm15,%xmm3 + pxor %xmm1,%xmm7 + + movdqa %xmm10,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm10,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm5 + pxor %xmm3,%xmm0 + movdqa %xmm11,%xmm3 + movdqa %xmm10,%xmm7 + pslld $10,%xmm2 + pxor %xmm10,%xmm3 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm5 + pslld $19-10,%xmm2 + pand %xmm3,%xmm4 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm11,%xmm9 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm4,%xmm9 + paddd %xmm5,%xmm13 + pxor %xmm2,%xmm7 + + paddd %xmm5,%xmm9 + paddd %xmm7,%xmm9 + movdqa 0-128(%rax),%xmm5 + paddd 128-128(%rax),%xmm6 + + movdqa %xmm5,%xmm7 + movdqa %xmm5,%xmm1 + psrld $3,%xmm7 + movdqa %xmm5,%xmm2 + + psrld $7,%xmm1 + movdqa 208-128(%rax),%xmm0 + pslld $14,%xmm2 + pxor %xmm1,%xmm7 + psrld $18-7,%xmm1 + movdqa %xmm0,%xmm4 + pxor %xmm2,%xmm7 + pslld $25-14,%xmm2 + pxor %xmm1,%xmm7 + psrld $10,%xmm0 + movdqa %xmm4,%xmm1 + + psrld $17,%xmm4 + pxor %xmm2,%xmm7 + pslld $13,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + psrld $19-17,%xmm4 + pxor %xmm1,%xmm0 + pslld $15-13,%xmm1 + pxor %xmm4,%xmm0 + pxor %xmm1,%xmm0 + paddd %xmm0,%xmm6 + movdqa %xmm13,%xmm7 + movdqa %xmm13,%xmm2 + psrld $6,%xmm7 + movdqa %xmm13,%xmm1 + pslld $7,%xmm2 + movdqa %xmm6,240-128(%rax) + paddd %xmm8,%xmm6 + + psrld $11,%xmm1 + pxor %xmm2,%xmm7 + pslld $21-7,%xmm2 + paddd 96(%rbp),%xmm6 + pxor %xmm1,%xmm7 + + psrld $25-11,%xmm1 + movdqa %xmm13,%xmm0 + pxor %xmm2,%xmm7 + movdqa %xmm13,%xmm4 + pslld $26-21,%xmm2 + pandn %xmm15,%xmm0 + pand %xmm14,%xmm4 + pxor %xmm1,%xmm7 + + movdqa %xmm9,%xmm1 + pxor %xmm2,%xmm7 + movdqa %xmm9,%xmm2 + psrld $2,%xmm1 + paddd %xmm7,%xmm6 + pxor %xmm4,%xmm0 + movdqa %xmm10,%xmm4 + movdqa %xmm9,%xmm7 + pslld $10,%xmm2 + pxor %xmm9,%xmm4 + + psrld $13,%xmm7 + pxor %xmm2,%xmm1 + paddd %xmm0,%xmm6 + pslld $19-10,%xmm2 + pand %xmm4,%xmm3 + pxor %xmm7,%xmm1 + + psrld $22-13,%xmm7 + pxor %xmm2,%xmm1 + movdqa %xmm10,%xmm8 + pslld $30-19,%xmm2 + pxor %xmm1,%xmm7 + pxor %xmm3,%xmm8 + paddd %xmm6,%xmm12 + pxor %xmm2,%xmm7 + + paddd %xmm6,%xmm8 + paddd %xmm7,%xmm8 + leaq 256(%rbp),%rbp + decl %ecx + jnz L$oop_16_xx + + movl $1,%ecx + leaq K256+128(%rip),%rbp + + movdqa (%rbx),%xmm7 + cmpl 0(%rbx),%ecx + pxor %xmm0,%xmm0 + cmovgeq %rbp,%r8 + cmpl 4(%rbx),%ecx + movdqa %xmm7,%xmm6 + cmovgeq %rbp,%r9 + cmpl 8(%rbx),%ecx + pcmpgtd %xmm0,%xmm6 + cmovgeq %rbp,%r10 + cmpl 12(%rbx),%ecx + paddd %xmm6,%xmm7 + cmovgeq %rbp,%r11 + + movdqu 0-128(%rdi),%xmm0 + pand %xmm6,%xmm8 + movdqu 32-128(%rdi),%xmm1 + pand %xmm6,%xmm9 + movdqu 64-128(%rdi),%xmm2 + pand %xmm6,%xmm10 + movdqu 96-128(%rdi),%xmm5 + pand %xmm6,%xmm11 + paddd %xmm0,%xmm8 + movdqu 128-128(%rdi),%xmm0 + pand %xmm6,%xmm12 + paddd %xmm1,%xmm9 + movdqu 160-128(%rdi),%xmm1 + pand %xmm6,%xmm13 + paddd %xmm2,%xmm10 + movdqu 192-128(%rdi),%xmm2 + pand %xmm6,%xmm14 + paddd %xmm5,%xmm11 + movdqu 224-128(%rdi),%xmm5 + pand %xmm6,%xmm15 + paddd %xmm0,%xmm12 + paddd %xmm1,%xmm13 + movdqu %xmm8,0-128(%rdi) + paddd %xmm2,%xmm14 + movdqu %xmm9,32-128(%rdi) + paddd %xmm5,%xmm15 + movdqu %xmm10,64-128(%rdi) + movdqu %xmm11,96-128(%rdi) + movdqu %xmm12,128-128(%rdi) + movdqu %xmm13,160-128(%rdi) + movdqu %xmm14,192-128(%rdi) + movdqu %xmm15,224-128(%rdi) + + movdqa %xmm7,(%rbx) + movdqa L$pbswap(%rip),%xmm6 + decl %edx + jnz L$oop + + movl 280(%rsp),%edx + leaq 16(%rdi),%rdi + leaq 64(%rsi),%rsi + decl %edx + jnz L$oop_grande + +L$done: + movq 272(%rsp),%rax + movq -16(%rax),%rbp + movq -8(%rax),%rbx + leaq (%rax),%rsp + .byte 0xf3,0xc3 + +.p2align 8 +K256: +.long 1116352408,1116352408,1116352408,1116352408 +.long 1116352408,1116352408,1116352408,1116352408 +.long 1899447441,1899447441,1899447441,1899447441 +.long 1899447441,1899447441,1899447441,1899447441 +.long 3049323471,3049323471,3049323471,3049323471 +.long 3049323471,3049323471,3049323471,3049323471 +.long 3921009573,3921009573,3921009573,3921009573 +.long 3921009573,3921009573,3921009573,3921009573 +.long 961987163,961987163,961987163,961987163 +.long 961987163,961987163,961987163,961987163 +.long 1508970993,1508970993,1508970993,1508970993 +.long 1508970993,1508970993,1508970993,1508970993 +.long 2453635748,2453635748,2453635748,2453635748 +.long 2453635748,2453635748,2453635748,2453635748 +.long 2870763221,2870763221,2870763221,2870763221 +.long 2870763221,2870763221,2870763221,2870763221 +.long 3624381080,3624381080,3624381080,3624381080 +.long 3624381080,3624381080,3624381080,3624381080 +.long 310598401,310598401,310598401,310598401 +.long 310598401,310598401,310598401,310598401 +.long 607225278,607225278,607225278,607225278 +.long 607225278,607225278,607225278,607225278 +.long 1426881987,1426881987,1426881987,1426881987 +.long 1426881987,1426881987,1426881987,1426881987 +.long 1925078388,1925078388,1925078388,1925078388 +.long 1925078388,1925078388,1925078388,1925078388 +.long 2162078206,2162078206,2162078206,2162078206 +.long 2162078206,2162078206,2162078206,2162078206 +.long 2614888103,2614888103,2614888103,2614888103 +.long 2614888103,2614888103,2614888103,2614888103 +.long 3248222580,3248222580,3248222580,3248222580 +.long 3248222580,3248222580,3248222580,3248222580 +.long 3835390401,3835390401,3835390401,3835390401 +.long 3835390401,3835390401,3835390401,3835390401 +.long 4022224774,4022224774,4022224774,4022224774 +.long 4022224774,4022224774,4022224774,4022224774 +.long 264347078,264347078,264347078,264347078 +.long 264347078,264347078,264347078,264347078 +.long 604807628,604807628,604807628,604807628 +.long 604807628,604807628,604807628,604807628 +.long 770255983,770255983,770255983,770255983 +.long 770255983,770255983,770255983,770255983 +.long 1249150122,1249150122,1249150122,1249150122 +.long 1249150122,1249150122,1249150122,1249150122 +.long 1555081692,1555081692,1555081692,1555081692 +.long 1555081692,1555081692,1555081692,1555081692 +.long 1996064986,1996064986,1996064986,1996064986 +.long 1996064986,1996064986,1996064986,1996064986 +.long 2554220882,2554220882,2554220882,2554220882 +.long 2554220882,2554220882,2554220882,2554220882 +.long 2821834349,2821834349,2821834349,2821834349 +.long 2821834349,2821834349,2821834349,2821834349 +.long 2952996808,2952996808,2952996808,2952996808 +.long 2952996808,2952996808,2952996808,2952996808 +.long 3210313671,3210313671,3210313671,3210313671 +.long 3210313671,3210313671,3210313671,3210313671 +.long 3336571891,3336571891,3336571891,3336571891 +.long 3336571891,3336571891,3336571891,3336571891 +.long 3584528711,3584528711,3584528711,3584528711 +.long 3584528711,3584528711,3584528711,3584528711 +.long 113926993,113926993,113926993,113926993 +.long 113926993,113926993,113926993,113926993 +.long 338241895,338241895,338241895,338241895 +.long 338241895,338241895,338241895,338241895 +.long 666307205,666307205,666307205,666307205 +.long 666307205,666307205,666307205,666307205 +.long 773529912,773529912,773529912,773529912 +.long 773529912,773529912,773529912,773529912 +.long 1294757372,1294757372,1294757372,1294757372 +.long 1294757372,1294757372,1294757372,1294757372 +.long 1396182291,1396182291,1396182291,1396182291 +.long 1396182291,1396182291,1396182291,1396182291 +.long 1695183700,1695183700,1695183700,1695183700 +.long 1695183700,1695183700,1695183700,1695183700 +.long 1986661051,1986661051,1986661051,1986661051 +.long 1986661051,1986661051,1986661051,1986661051 +.long 2177026350,2177026350,2177026350,2177026350 +.long 2177026350,2177026350,2177026350,2177026350 +.long 2456956037,2456956037,2456956037,2456956037 +.long 2456956037,2456956037,2456956037,2456956037 +.long 2730485921,2730485921,2730485921,2730485921 +.long 2730485921,2730485921,2730485921,2730485921 +.long 2820302411,2820302411,2820302411,2820302411 +.long 2820302411,2820302411,2820302411,2820302411 +.long 3259730800,3259730800,3259730800,3259730800 +.long 3259730800,3259730800,3259730800,3259730800 +.long 3345764771,3345764771,3345764771,3345764771 +.long 3345764771,3345764771,3345764771,3345764771 +.long 3516065817,3516065817,3516065817,3516065817 +.long 3516065817,3516065817,3516065817,3516065817 +.long 3600352804,3600352804,3600352804,3600352804 +.long 3600352804,3600352804,3600352804,3600352804 +.long 4094571909,4094571909,4094571909,4094571909 +.long 4094571909,4094571909,4094571909,4094571909 +.long 275423344,275423344,275423344,275423344 +.long 275423344,275423344,275423344,275423344 +.long 430227734,430227734,430227734,430227734 +.long 430227734,430227734,430227734,430227734 +.long 506948616,506948616,506948616,506948616 +.long 506948616,506948616,506948616,506948616 +.long 659060556,659060556,659060556,659060556 +.long 659060556,659060556,659060556,659060556 +.long 883997877,883997877,883997877,883997877 +.long 883997877,883997877,883997877,883997877 +.long 958139571,958139571,958139571,958139571 +.long 958139571,958139571,958139571,958139571 +.long 1322822218,1322822218,1322822218,1322822218 +.long 1322822218,1322822218,1322822218,1322822218 +.long 1537002063,1537002063,1537002063,1537002063 +.long 1537002063,1537002063,1537002063,1537002063 +.long 1747873779,1747873779,1747873779,1747873779 +.long 1747873779,1747873779,1747873779,1747873779 +.long 1955562222,1955562222,1955562222,1955562222 +.long 1955562222,1955562222,1955562222,1955562222 +.long 2024104815,2024104815,2024104815,2024104815 +.long 2024104815,2024104815,2024104815,2024104815 +.long 2227730452,2227730452,2227730452,2227730452 +.long 2227730452,2227730452,2227730452,2227730452 +.long 2361852424,2361852424,2361852424,2361852424 +.long 2361852424,2361852424,2361852424,2361852424 +.long 2428436474,2428436474,2428436474,2428436474 +.long 2428436474,2428436474,2428436474,2428436474 +.long 2756734187,2756734187,2756734187,2756734187 +.long 2756734187,2756734187,2756734187,2756734187 +.long 3204031479,3204031479,3204031479,3204031479 +.long 3204031479,3204031479,3204031479,3204031479 +.long 3329325298,3329325298,3329325298,3329325298 +.long 3329325298,3329325298,3329325298,3329325298 +L$pbswap: +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/macosx/sha256-ssse3-x86.s b/lib/accelerated/x86/macosx/sha256-ssse3-x86.s new file mode 100644 index 0000000000..d6cf6cb2a0 --- /dev/null +++ b/lib/accelerated/x86/macosx/sha256-ssse3-x86.s @@ -0,0 +1,3405 @@ +# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org> +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain copyright notices, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# * Neither the name of the Andy Polyakov nor the names of its +# copyright holder and contributors may be used to endorse or +# promote products derived from this software without specific +# prior written permission. +# +# ALTERNATIVELY, provided that this notice is retained in full, this +# product may be distributed under the terms of the GNU General Public +# License (GPL), in which case the provisions of the GPL apply INSTEAD OF +# those given above. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# *** This file is auto-generated *** +# +.file "sha512-586.s" +.text +.globl _sha256_block_data_order +.align 4 +_sha256_block_data_order: +L_sha256_block_data_order_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl %esp,%ebx + call L000pic_point +L000pic_point: + popl %ebp + leal L001K256-L000pic_point(%ebp),%ebp + subl $16,%esp + andl $-64,%esp + shll $6,%eax + addl %edi,%eax + movl %esi,(%esp) + movl %edi,4(%esp) + movl %eax,8(%esp) + movl %ebx,12(%esp) + movl L__gnutls_x86_cpuid_s$non_lazy_ptr-L001K256(%ebp),%edx + movl (%edx),%ecx + movl 4(%edx),%ebx + testl $1048576,%ecx + jnz L002loop + andl $1073741824,%ecx + andl $268435968,%ebx + orl %ebx,%ecx + andl $1342177280,%ecx + cmpl $1342177280,%ecx + je L003loop_shrd + subl %edi,%eax + cmpl $256,%eax + jae L004unrolled + jmp L002loop +.align 4,0x90 +L002loop: + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + bswap %eax + movl 12(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 16(%edi),%eax + movl 20(%edi),%ebx + movl 24(%edi),%ecx + bswap %eax + movl 28(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 32(%edi),%eax + movl 36(%edi),%ebx + movl 40(%edi),%ecx + bswap %eax + movl 44(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 48(%edi),%eax + movl 52(%edi),%ebx + movl 56(%edi),%ecx + bswap %eax + movl 60(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + addl $64,%edi + leal -36(%esp),%esp + movl %edi,104(%esp) + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edi + movl %ebx,8(%esp) + xorl %ecx,%ebx + movl %ecx,12(%esp) + movl %edi,16(%esp) + movl %ebx,(%esp) + movl 16(%esi),%edx + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edi + movl %ebx,24(%esp) + movl %ecx,28(%esp) + movl %edi,32(%esp) +.align 4,0x90 +L00500_15: + movl %edx,%ecx + movl 24(%esp),%esi + rorl $14,%ecx + movl 28(%esp),%edi + xorl %edx,%ecx + xorl %edi,%esi + movl 96(%esp),%ebx + rorl $5,%ecx + andl %edx,%esi + movl %edx,20(%esp) + xorl %ecx,%edx + addl 32(%esp),%ebx + xorl %edi,%esi + rorl $6,%edx + movl %eax,%ecx + addl %esi,%ebx + rorl $9,%ecx + addl %edx,%ebx + movl 8(%esp),%edi + xorl %eax,%ecx + movl %eax,4(%esp) + leal -4(%esp),%esp + rorl $11,%ecx + movl (%ebp),%esi + xorl %eax,%ecx + movl 20(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %esi,%ebx + movl %eax,(%esp) + addl %ebx,%edx + andl 4(%esp),%eax + addl %ecx,%ebx + xorl %edi,%eax + addl $4,%ebp + addl %ebx,%eax + cmpl $3248222580,%esi + jne L00500_15 + movl 156(%esp),%ecx + jmp L00616_63 +.align 4,0x90 +L00616_63: + movl %ecx,%ebx + movl 104(%esp),%esi + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 160(%esp),%ebx + shrl $10,%edi + addl 124(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 24(%esp),%esi + rorl $14,%ecx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %edx,%ecx + xorl %edi,%esi + movl %ebx,96(%esp) + rorl $5,%ecx + andl %edx,%esi + movl %edx,20(%esp) + xorl %ecx,%edx + addl 32(%esp),%ebx + xorl %edi,%esi + rorl $6,%edx + movl %eax,%ecx + addl %esi,%ebx + rorl $9,%ecx + addl %edx,%ebx + movl 8(%esp),%edi + xorl %eax,%ecx + movl %eax,4(%esp) + leal -4(%esp),%esp + rorl $11,%ecx + movl (%ebp),%esi + xorl %eax,%ecx + movl 20(%esp),%edx + xorl %edi,%eax + rorl $2,%ecx + addl %esi,%ebx + movl %eax,(%esp) + addl %ebx,%edx + andl 4(%esp),%eax + addl %ecx,%ebx + xorl %edi,%eax + movl 156(%esp),%ecx + addl $4,%ebp + addl %ebx,%eax + cmpl $3329325298,%esi + jne L00616_63 + movl 356(%esp),%esi + movl 8(%esp),%ebx + movl 16(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebx + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl 24(%esp),%eax + movl 28(%esp),%ebx + movl 32(%esp),%ecx + movl 360(%esp),%edi + addl 16(%esi),%edx + addl 20(%esi),%eax + addl 24(%esi),%ebx + addl 28(%esi),%ecx + movl %edx,16(%esi) + movl %eax,20(%esi) + movl %ebx,24(%esi) + movl %ecx,28(%esi) + leal 356(%esp),%esp + subl $256,%ebp + cmpl 8(%esp),%edi + jb L002loop + movl 12(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 5,0x90 +L003loop_shrd: + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + bswap %eax + movl 12(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 16(%edi),%eax + movl 20(%edi),%ebx + movl 24(%edi),%ecx + bswap %eax + movl 28(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 32(%edi),%eax + movl 36(%edi),%ebx + movl 40(%edi),%ecx + bswap %eax + movl 44(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + movl 48(%edi),%eax + movl 52(%edi),%ebx + movl 56(%edi),%ecx + bswap %eax + movl 60(%edi),%edx + bswap %ebx + pushl %eax + bswap %ecx + pushl %ebx + bswap %edx + pushl %ecx + pushl %edx + addl $64,%edi + leal -36(%esp),%esp + movl %edi,104(%esp) + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edi + movl %ebx,8(%esp) + xorl %ecx,%ebx + movl %ecx,12(%esp) + movl %edi,16(%esp) + movl %ebx,(%esp) + movl 16(%esi),%edx + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edi + movl %ebx,24(%esp) + movl %ecx,28(%esp) + movl %edi,32(%esp) +.align 4,0x90 +L00700_15_shrd: + movl %edx,%ecx + movl 24(%esp),%esi + shrdl $14,%ecx,%ecx + movl 28(%esp),%edi + xorl %edx,%ecx + xorl %edi,%esi + movl 96(%esp),%ebx + shrdl $5,%ecx,%ecx + andl %edx,%esi + movl %edx,20(%esp) + xorl %ecx,%edx + addl 32(%esp),%ebx + xorl %edi,%esi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %esi,%ebx + shrdl $9,%ecx,%ecx + addl %edx,%ebx + movl 8(%esp),%edi + xorl %eax,%ecx + movl %eax,4(%esp) + leal -4(%esp),%esp + shrdl $11,%ecx,%ecx + movl (%ebp),%esi + xorl %eax,%ecx + movl 20(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %esi,%ebx + movl %eax,(%esp) + addl %ebx,%edx + andl 4(%esp),%eax + addl %ecx,%ebx + xorl %edi,%eax + addl $4,%ebp + addl %ebx,%eax + cmpl $3248222580,%esi + jne L00700_15_shrd + movl 156(%esp),%ecx + jmp L00816_63_shrd +.align 4,0x90 +L00816_63_shrd: + movl %ecx,%ebx + movl 104(%esp),%esi + shrdl $11,%ecx,%ecx + movl %esi,%edi + shrdl $2,%esi,%esi + xorl %ebx,%ecx + shrl $3,%ebx + shrdl $7,%ecx,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + shrdl $17,%esi,%esi + addl 160(%esp),%ebx + shrl $10,%edi + addl 124(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 24(%esp),%esi + shrdl $14,%ecx,%ecx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %edx,%ecx + xorl %edi,%esi + movl %ebx,96(%esp) + shrdl $5,%ecx,%ecx + andl %edx,%esi + movl %edx,20(%esp) + xorl %ecx,%edx + addl 32(%esp),%ebx + xorl %edi,%esi + shrdl $6,%edx,%edx + movl %eax,%ecx + addl %esi,%ebx + shrdl $9,%ecx,%ecx + addl %edx,%ebx + movl 8(%esp),%edi + xorl %eax,%ecx + movl %eax,4(%esp) + leal -4(%esp),%esp + shrdl $11,%ecx,%ecx + movl (%ebp),%esi + xorl %eax,%ecx + movl 20(%esp),%edx + xorl %edi,%eax + shrdl $2,%ecx,%ecx + addl %esi,%ebx + movl %eax,(%esp) + addl %ebx,%edx + andl 4(%esp),%eax + addl %ecx,%ebx + xorl %edi,%eax + movl 156(%esp),%ecx + addl $4,%ebp + addl %ebx,%eax + cmpl $3329325298,%esi + jne L00816_63_shrd + movl 356(%esp),%esi + movl 8(%esp),%ebx + movl 16(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebx + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebx,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl 24(%esp),%eax + movl 28(%esp),%ebx + movl 32(%esp),%ecx + movl 360(%esp),%edi + addl 16(%esi),%edx + addl 20(%esi),%eax + addl 24(%esi),%ebx + addl 28(%esi),%ecx + movl %edx,16(%esi) + movl %eax,20(%esi) + movl %ebx,24(%esi) + movl %ecx,28(%esi) + leal 356(%esp),%esp + subl $256,%ebp + cmpl 8(%esp),%edi + jb L003loop_shrd + movl 12(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 6,0x90 +L001K256: +.long 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298 +.long 66051,67438087,134810123,202182159 +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 +.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 +.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +.byte 62,0 +.align 4,0x90 +L004unrolled: + leal -96(%esp),%esp + movl (%esi),%eax + movl 4(%esi),%ebp + movl 8(%esi),%ecx + movl 12(%esi),%ebx + movl %ebp,4(%esp) + xorl %ecx,%ebp + movl %ecx,8(%esp) + movl %ebx,12(%esp) + movl 16(%esi),%edx + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%esi + movl %ebx,20(%esp) + movl %ecx,24(%esp) + movl %esi,28(%esp) + jmp L009grand_loop +.align 4,0x90 +L009grand_loop: + movl (%edi),%ebx + movl 4(%edi),%ecx + bswap %ebx + movl 8(%edi),%esi + bswap %ecx + movl %ebx,32(%esp) + bswap %esi + movl %ecx,36(%esp) + movl %esi,40(%esp) + movl 12(%edi),%ebx + movl 16(%edi),%ecx + bswap %ebx + movl 20(%edi),%esi + bswap %ecx + movl %ebx,44(%esp) + bswap %esi + movl %ecx,48(%esp) + movl %esi,52(%esp) + movl 24(%edi),%ebx + movl 28(%edi),%ecx + bswap %ebx + movl 32(%edi),%esi + bswap %ecx + movl %ebx,56(%esp) + bswap %esi + movl %ecx,60(%esp) + movl %esi,64(%esp) + movl 36(%edi),%ebx + movl 40(%edi),%ecx + bswap %ebx + movl 44(%edi),%esi + bswap %ecx + movl %ebx,68(%esp) + bswap %esi + movl %ecx,72(%esp) + movl %esi,76(%esp) + movl 48(%edi),%ebx + movl 52(%edi),%ecx + bswap %ebx + movl 56(%edi),%esi + bswap %ecx + movl %ebx,80(%esp) + bswap %esi + movl %ecx,84(%esp) + movl %esi,88(%esp) + movl 60(%edi),%ebx + addl $64,%edi + bswap %ebx + movl %edi,100(%esp) + movl %ebx,92(%esp) + movl %edx,%ecx + movl 20(%esp),%esi + rorl $14,%edx + movl 24(%esp),%edi + xorl %ecx,%edx + movl 32(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1116352408(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 16(%esp),%ecx + rorl $14,%edx + movl 20(%esp),%edi + xorl %esi,%edx + movl 36(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1899447441(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 12(%esp),%esi + rorl $14,%edx + movl 16(%esp),%edi + xorl %ecx,%edx + movl 40(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3049323471(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 8(%esp),%ecx + rorl $14,%edx + movl 12(%esp),%edi + xorl %esi,%edx + movl 44(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3921009573(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 4(%esp),%esi + rorl $14,%edx + movl 8(%esp),%edi + xorl %ecx,%edx + movl 48(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 961987163(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl (%esp),%ecx + rorl $14,%edx + movl 4(%esp),%edi + xorl %esi,%edx + movl 52(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1508970993(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 28(%esp),%esi + rorl $14,%edx + movl (%esp),%edi + xorl %ecx,%edx + movl 56(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2453635748(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 24(%esp),%ecx + rorl $14,%edx + movl 28(%esp),%edi + xorl %esi,%edx + movl 60(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2870763221(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 20(%esp),%esi + rorl $14,%edx + movl 24(%esp),%edi + xorl %ecx,%edx + movl 64(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3624381080(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 16(%esp),%ecx + rorl $14,%edx + movl 20(%esp),%edi + xorl %esi,%edx + movl 68(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 310598401(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 12(%esp),%esi + rorl $14,%edx + movl 16(%esp),%edi + xorl %ecx,%edx + movl 72(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 607225278(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 8(%esp),%ecx + rorl $14,%edx + movl 12(%esp),%edi + xorl %esi,%edx + movl 76(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1426881987(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 4(%esp),%esi + rorl $14,%edx + movl 8(%esp),%edi + xorl %ecx,%edx + movl 80(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1925078388(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl (%esp),%ecx + rorl $14,%edx + movl 4(%esp),%edi + xorl %esi,%edx + movl 84(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2162078206(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl %edx,%ecx + movl 28(%esp),%esi + rorl $14,%edx + movl (%esp),%edi + xorl %ecx,%edx + movl 88(%esp),%ebx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2614888103(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl %edx,%esi + movl 24(%esp),%ecx + rorl $14,%edx + movl 28(%esp),%edi + xorl %esi,%edx + movl 92(%esp),%ebx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3248222580(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3835390401(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 4022224774(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 264347078(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 604807628(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 770255983(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1249150122(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1555081692(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1996064986(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2554220882(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2821834349(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2952996808(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3210313671(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3336571891(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3584528711(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,88(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 113926993(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,92(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 338241895(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 666307205(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 773529912(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1294757372(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1396182291(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1695183700(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1986661051(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2177026350(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2456956037(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2730485921(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2820302411(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3259730800(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3345764771(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3516065817(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3600352804(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,88(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 4094571909(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,92(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 275423344(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 36(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 88(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 32(%esp),%ebx + shrl $10,%edi + addl 68(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,32(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 430227734(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 40(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 92(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 36(%esp),%ebx + shrl $10,%edi + addl 72(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,36(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 506948616(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 44(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 32(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 40(%esp),%ebx + shrl $10,%edi + addl 76(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,40(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 659060556(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 48(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 36(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 44(%esp),%ebx + shrl $10,%edi + addl 80(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,44(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 883997877(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 52(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 40(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 48(%esp),%ebx + shrl $10,%edi + addl 84(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,48(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 958139571(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 56(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 44(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 52(%esp),%ebx + shrl $10,%edi + addl 88(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,52(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1322822218(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 60(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 48(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 56(%esp),%ebx + shrl $10,%edi + addl 92(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + movl %ebx,56(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1537002063(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 64(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 52(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 60(%esp),%ebx + shrl $10,%edi + addl 32(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + movl %ebx,60(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 1747873779(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 68(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 56(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 64(%esp),%ebx + shrl $10,%edi + addl 36(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 20(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 24(%esp),%edi + xorl %ecx,%edx + movl %ebx,64(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,16(%esp) + xorl %ecx,%edx + addl 28(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 4(%esp),%edi + xorl %eax,%ecx + movl %eax,(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 1955562222(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 72(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 12(%esp),%edx + addl %ecx,%ebp + movl 60(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 68(%esp),%ebx + shrl $10,%edi + addl 40(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 16(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 20(%esp),%edi + xorl %esi,%edx + movl %ebx,68(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,12(%esp) + xorl %esi,%edx + addl 24(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl (%esp),%edi + xorl %ebp,%esi + movl %ebp,28(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2024104815(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 76(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 8(%esp),%edx + addl %esi,%eax + movl 64(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 72(%esp),%ebx + shrl $10,%edi + addl 44(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 12(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 16(%esp),%edi + xorl %ecx,%edx + movl %ebx,72(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,8(%esp) + xorl %ecx,%edx + addl 20(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 28(%esp),%edi + xorl %eax,%ecx + movl %eax,24(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2227730452(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 80(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 4(%esp),%edx + addl %ecx,%ebp + movl 68(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 76(%esp),%ebx + shrl $10,%edi + addl 48(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 8(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 12(%esp),%edi + xorl %esi,%edx + movl %ebx,76(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,4(%esp) + xorl %esi,%edx + addl 16(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 24(%esp),%edi + xorl %ebp,%esi + movl %ebp,20(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2361852424(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 84(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl (%esp),%edx + addl %esi,%eax + movl 72(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 80(%esp),%ebx + shrl $10,%edi + addl 52(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 4(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl 8(%esp),%edi + xorl %ecx,%edx + movl %ebx,80(%esp) + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,(%esp) + xorl %ecx,%edx + addl 12(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 20(%esp),%edi + xorl %eax,%ecx + movl %eax,16(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 2428436474(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 88(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 28(%esp),%edx + addl %ecx,%ebp + movl 76(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 84(%esp),%ebx + shrl $10,%edi + addl 56(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl (%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 4(%esp),%edi + xorl %esi,%edx + movl %ebx,84(%esp) + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,28(%esp) + xorl %esi,%edx + addl 8(%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 16(%esp),%edi + xorl %ebp,%esi + movl %ebp,12(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 2756734187(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + movl 92(%esp),%ecx + rorl $2,%esi + addl %edx,%eax + addl 24(%esp),%edx + addl %esi,%eax + movl 80(%esp),%esi + movl %ecx,%ebx + rorl $11,%ecx + movl %esi,%edi + rorl $2,%esi + xorl %ebx,%ecx + shrl $3,%ebx + rorl $7,%ecx + xorl %edi,%esi + xorl %ecx,%ebx + rorl $17,%esi + addl 88(%esp),%ebx + shrl $10,%edi + addl 60(%esp),%ebx + movl %edx,%ecx + xorl %esi,%edi + movl 28(%esp),%esi + rorl $14,%edx + addl %edi,%ebx + movl (%esp),%edi + xorl %ecx,%edx + xorl %edi,%esi + rorl $5,%edx + andl %ecx,%esi + movl %ecx,24(%esp) + xorl %ecx,%edx + addl 4(%esp),%ebx + xorl %esi,%edi + rorl $6,%edx + movl %eax,%ecx + addl %edi,%ebx + rorl $9,%ecx + movl %eax,%esi + movl 12(%esp),%edi + xorl %eax,%ecx + movl %eax,8(%esp) + xorl %edi,%eax + rorl $11,%ecx + andl %eax,%ebp + leal 3204031479(%ebx,%edx,1),%edx + xorl %esi,%ecx + xorl %edi,%ebp + movl 32(%esp),%esi + rorl $2,%ecx + addl %edx,%ebp + addl 20(%esp),%edx + addl %ecx,%ebp + movl 84(%esp),%ecx + movl %esi,%ebx + rorl $11,%esi + movl %ecx,%edi + rorl $2,%ecx + xorl %ebx,%esi + shrl $3,%ebx + rorl $7,%esi + xorl %edi,%ecx + xorl %esi,%ebx + rorl $17,%ecx + addl 92(%esp),%ebx + shrl $10,%edi + addl 64(%esp),%ebx + movl %edx,%esi + xorl %ecx,%edi + movl 24(%esp),%ecx + rorl $14,%edx + addl %edi,%ebx + movl 28(%esp),%edi + xorl %esi,%edx + xorl %edi,%ecx + rorl $5,%edx + andl %esi,%ecx + movl %esi,20(%esp) + xorl %esi,%edx + addl (%esp),%ebx + xorl %ecx,%edi + rorl $6,%edx + movl %ebp,%esi + addl %edi,%ebx + rorl $9,%esi + movl %ebp,%ecx + movl 8(%esp),%edi + xorl %ebp,%esi + movl %ebp,4(%esp) + xorl %edi,%ebp + rorl $11,%esi + andl %ebp,%eax + leal 3329325298(%ebx,%edx,1),%edx + xorl %ecx,%esi + xorl %edi,%eax + rorl $2,%esi + addl %edx,%eax + addl 16(%esp),%edx + addl %esi,%eax + movl 96(%esp),%esi + xorl %edi,%ebp + movl 12(%esp),%ecx + addl (%esi),%eax + addl 4(%esi),%ebp + addl 8(%esi),%edi + addl 12(%esi),%ecx + movl %eax,(%esi) + movl %ebp,4(%esi) + movl %edi,8(%esi) + movl %ecx,12(%esi) + movl %ebp,4(%esp) + xorl %edi,%ebp + movl %edi,8(%esp) + movl %ecx,12(%esp) + movl 20(%esp),%edi + movl 24(%esp),%ebx + movl 28(%esp),%ecx + addl 16(%esi),%edx + addl 20(%esi),%edi + addl 24(%esi),%ebx + addl 28(%esi),%ecx + movl %edx,16(%esi) + movl %edi,20(%esi) + movl %ebx,24(%esi) + movl %ecx,28(%esi) + movl %edi,20(%esp) + movl 100(%esp),%edi + movl %ebx,24(%esp) + movl %ecx,28(%esp) + cmpl 104(%esp),%edi + jb L009grand_loop + movl 108(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L__gnutls_x86_cpuid_s$non_lazy_ptr: +.indirect_symbol __gnutls_x86_cpuid_s +.long 0 +.comm __gnutls_x86_cpuid_s,16,2 + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/macosx/sha512-ssse3-x86.s b/lib/accelerated/x86/macosx/sha512-ssse3-x86.s new file mode 100644 index 0000000000..cb097f16c8 --- /dev/null +++ b/lib/accelerated/x86/macosx/sha512-ssse3-x86.s @@ -0,0 +1,604 @@ +# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org> +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain copyright notices, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# * Neither the name of the Andy Polyakov nor the names of its +# copyright holder and contributors may be used to endorse or +# promote products derived from this software without specific +# prior written permission. +# +# ALTERNATIVELY, provided that this notice is retained in full, this +# product may be distributed under the terms of the GNU General Public +# License (GPL), in which case the provisions of the GPL apply INSTEAD OF +# those given above. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# *** This file is auto-generated *** +# +.file "sha512-586.s" +.text +.globl _sha512_block_data_order +.align 4 +_sha512_block_data_order: +L_sha512_block_data_order_begin: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + movl 20(%esp),%esi + movl 24(%esp),%edi + movl 28(%esp),%eax + movl %esp,%ebx + call L000pic_point +L000pic_point: + popl %ebp + leal L001K512-L000pic_point(%ebp),%ebp + subl $16,%esp + andl $-64,%esp + shll $7,%eax + addl %edi,%eax + movl %esi,(%esp) + movl %edi,4(%esp) + movl %eax,8(%esp) + movl %ebx,12(%esp) +.align 4,0x90 +L002loop_x86: + movl (%edi),%eax + movl 4(%edi),%ebx + movl 8(%edi),%ecx + movl 12(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 16(%edi),%eax + movl 20(%edi),%ebx + movl 24(%edi),%ecx + movl 28(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 32(%edi),%eax + movl 36(%edi),%ebx + movl 40(%edi),%ecx + movl 44(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 48(%edi),%eax + movl 52(%edi),%ebx + movl 56(%edi),%ecx + movl 60(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 64(%edi),%eax + movl 68(%edi),%ebx + movl 72(%edi),%ecx + movl 76(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 80(%edi),%eax + movl 84(%edi),%ebx + movl 88(%edi),%ecx + movl 92(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 96(%edi),%eax + movl 100(%edi),%ebx + movl 104(%edi),%ecx + movl 108(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + movl 112(%edi),%eax + movl 116(%edi),%ebx + movl 120(%edi),%ecx + movl 124(%edi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + addl $128,%edi + subl $72,%esp + movl %edi,204(%esp) + leal 8(%esp),%edi + movl $16,%ecx +.long 2784229001 +.align 4,0x90 +L00300_15_x86: + movl 40(%esp),%ecx + movl 44(%esp),%edx + movl %ecx,%esi + shrl $9,%ecx + movl %edx,%edi + shrl $9,%edx + movl %ecx,%ebx + shll $14,%esi + movl %edx,%eax + shll $14,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%eax + shll $4,%esi + xorl %edx,%ebx + shll $4,%edi + xorl %esi,%ebx + shrl $4,%ecx + xorl %edi,%eax + shrl $4,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 48(%esp),%ecx + movl 52(%esp),%edx + movl 56(%esp),%esi + movl 60(%esp),%edi + addl 64(%esp),%eax + adcl 68(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + andl 40(%esp),%ecx + andl 44(%esp),%edx + addl 192(%esp),%eax + adcl 196(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + movl (%ebp),%esi + movl 4(%ebp),%edi + addl %ecx,%eax + adcl %edx,%ebx + movl 32(%esp),%ecx + movl 36(%esp),%edx + addl %esi,%eax + adcl %edi,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + addl %ecx,%eax + adcl %edx,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,%esi + shrl $2,%ecx + movl %edx,%edi + shrl $2,%edx + movl %ecx,%ebx + shll $4,%esi + movl %edx,%eax + shll $4,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%ebx + shll $21,%esi + xorl %edx,%eax + shll $21,%edi + xorl %esi,%eax + shrl $21,%ecx + xorl %edi,%ebx + shrl $21,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl 16(%esp),%esi + movl 20(%esp),%edi + addl (%esp),%eax + adcl 4(%esp),%ebx + orl %esi,%ecx + orl %edi,%edx + andl 24(%esp),%ecx + andl 28(%esp),%edx + andl 8(%esp),%esi + andl 12(%esp),%edi + orl %esi,%ecx + orl %edi,%edx + addl %ecx,%eax + adcl %edx,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + movb (%ebp),%dl + subl $8,%esp + leal 8(%ebp),%ebp + cmpb $148,%dl + jne L00300_15_x86 +.align 4,0x90 +L00416_79_x86: + movl 312(%esp),%ecx + movl 316(%esp),%edx + movl %ecx,%esi + shrl $1,%ecx + movl %edx,%edi + shrl $1,%edx + movl %ecx,%eax + shll $24,%esi + movl %edx,%ebx + shll $24,%edi + xorl %esi,%ebx + shrl $6,%ecx + xorl %edi,%eax + shrl $6,%edx + xorl %ecx,%eax + shll $7,%esi + xorl %edx,%ebx + shll $1,%edi + xorl %esi,%ebx + shrl $1,%ecx + xorl %edi,%eax + shrl $1,%edx + xorl %ecx,%eax + shll $6,%edi + xorl %edx,%ebx + xorl %edi,%eax + movl %eax,(%esp) + movl %ebx,4(%esp) + movl 208(%esp),%ecx + movl 212(%esp),%edx + movl %ecx,%esi + shrl $6,%ecx + movl %edx,%edi + shrl $6,%edx + movl %ecx,%eax + shll $3,%esi + movl %edx,%ebx + shll $3,%edi + xorl %esi,%eax + shrl $13,%ecx + xorl %edi,%ebx + shrl $13,%edx + xorl %ecx,%eax + shll $10,%esi + xorl %edx,%ebx + shll $10,%edi + xorl %esi,%ebx + shrl $10,%ecx + xorl %edi,%eax + shrl $10,%edx + xorl %ecx,%ebx + shll $13,%edi + xorl %edx,%eax + xorl %edi,%eax + movl 320(%esp),%ecx + movl 324(%esp),%edx + addl (%esp),%eax + adcl 4(%esp),%ebx + movl 248(%esp),%esi + movl 252(%esp),%edi + addl %ecx,%eax + adcl %edx,%ebx + addl %esi,%eax + adcl %edi,%ebx + movl %eax,192(%esp) + movl %ebx,196(%esp) + movl 40(%esp),%ecx + movl 44(%esp),%edx + movl %ecx,%esi + shrl $9,%ecx + movl %edx,%edi + shrl $9,%edx + movl %ecx,%ebx + shll $14,%esi + movl %edx,%eax + shll $14,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%eax + shll $4,%esi + xorl %edx,%ebx + shll $4,%edi + xorl %esi,%ebx + shrl $4,%ecx + xorl %edi,%eax + shrl $4,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 48(%esp),%ecx + movl 52(%esp),%edx + movl 56(%esp),%esi + movl 60(%esp),%edi + addl 64(%esp),%eax + adcl 68(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + andl 40(%esp),%ecx + andl 44(%esp),%edx + addl 192(%esp),%eax + adcl 196(%esp),%ebx + xorl %esi,%ecx + xorl %edi,%edx + movl (%ebp),%esi + movl 4(%ebp),%edi + addl %ecx,%eax + adcl %edx,%ebx + movl 32(%esp),%ecx + movl 36(%esp),%edx + addl %esi,%eax + adcl %edi,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + addl %ecx,%eax + adcl %edx,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl %eax,32(%esp) + movl %ebx,36(%esp) + movl %ecx,%esi + shrl $2,%ecx + movl %edx,%edi + shrl $2,%edx + movl %ecx,%ebx + shll $4,%esi + movl %edx,%eax + shll $4,%edi + xorl %esi,%ebx + shrl $5,%ecx + xorl %edi,%eax + shrl $5,%edx + xorl %ecx,%ebx + shll $21,%esi + xorl %edx,%eax + shll $21,%edi + xorl %esi,%eax + shrl $21,%ecx + xorl %edi,%ebx + shrl $21,%edx + xorl %ecx,%eax + shll $5,%esi + xorl %edx,%ebx + shll $5,%edi + xorl %esi,%eax + xorl %edi,%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + movl 16(%esp),%esi + movl 20(%esp),%edi + addl (%esp),%eax + adcl 4(%esp),%ebx + orl %esi,%ecx + orl %edi,%edx + andl 24(%esp),%ecx + andl 28(%esp),%edx + andl 8(%esp),%esi + andl 12(%esp),%edi + orl %esi,%ecx + orl %edi,%edx + addl %ecx,%eax + adcl %edx,%ebx + movl %eax,(%esp) + movl %ebx,4(%esp) + movb (%ebp),%dl + subl $8,%esp + leal 8(%ebp),%ebp + cmpb $23,%dl + jne L00416_79_x86 + movl 840(%esp),%esi + movl 844(%esp),%edi + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + addl 8(%esp),%eax + adcl 12(%esp),%ebx + movl %eax,(%esi) + movl %ebx,4(%esi) + addl 16(%esp),%ecx + adcl 20(%esp),%edx + movl %ecx,8(%esi) + movl %edx,12(%esi) + movl 16(%esi),%eax + movl 20(%esi),%ebx + movl 24(%esi),%ecx + movl 28(%esi),%edx + addl 24(%esp),%eax + adcl 28(%esp),%ebx + movl %eax,16(%esi) + movl %ebx,20(%esi) + addl 32(%esp),%ecx + adcl 36(%esp),%edx + movl %ecx,24(%esi) + movl %edx,28(%esi) + movl 32(%esi),%eax + movl 36(%esi),%ebx + movl 40(%esi),%ecx + movl 44(%esi),%edx + addl 40(%esp),%eax + adcl 44(%esp),%ebx + movl %eax,32(%esi) + movl %ebx,36(%esi) + addl 48(%esp),%ecx + adcl 52(%esp),%edx + movl %ecx,40(%esi) + movl %edx,44(%esi) + movl 48(%esi),%eax + movl 52(%esi),%ebx + movl 56(%esi),%ecx + movl 60(%esi),%edx + addl 56(%esp),%eax + adcl 60(%esp),%ebx + movl %eax,48(%esi) + movl %ebx,52(%esi) + addl 64(%esp),%ecx + adcl 68(%esp),%edx + movl %ecx,56(%esi) + movl %edx,60(%esi) + addl $840,%esp + subl $640,%ebp + cmpl 8(%esp),%edi + jb L002loop_x86 + movl 12(%esp),%esp + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.align 6,0x90 +L001K512: +.long 3609767458,1116352408 +.long 602891725,1899447441 +.long 3964484399,3049323471 +.long 2173295548,3921009573 +.long 4081628472,961987163 +.long 3053834265,1508970993 +.long 2937671579,2453635748 +.long 3664609560,2870763221 +.long 2734883394,3624381080 +.long 1164996542,310598401 +.long 1323610764,607225278 +.long 3590304994,1426881987 +.long 4068182383,1925078388 +.long 991336113,2162078206 +.long 633803317,2614888103 +.long 3479774868,3248222580 +.long 2666613458,3835390401 +.long 944711139,4022224774 +.long 2341262773,264347078 +.long 2007800933,604807628 +.long 1495990901,770255983 +.long 1856431235,1249150122 +.long 3175218132,1555081692 +.long 2198950837,1996064986 +.long 3999719339,2554220882 +.long 766784016,2821834349 +.long 2566594879,2952996808 +.long 3203337956,3210313671 +.long 1034457026,3336571891 +.long 2466948901,3584528711 +.long 3758326383,113926993 +.long 168717936,338241895 +.long 1188179964,666307205 +.long 1546045734,773529912 +.long 1522805485,1294757372 +.long 2643833823,1396182291 +.long 2343527390,1695183700 +.long 1014477480,1986661051 +.long 1206759142,2177026350 +.long 344077627,2456956037 +.long 1290863460,2730485921 +.long 3158454273,2820302411 +.long 3505952657,3259730800 +.long 106217008,3345764771 +.long 3606008344,3516065817 +.long 1432725776,3600352804 +.long 1467031594,4094571909 +.long 851169720,275423344 +.long 3100823752,430227734 +.long 1363258195,506948616 +.long 3750685593,659060556 +.long 3785050280,883997877 +.long 3318307427,958139571 +.long 3812723403,1322822218 +.long 2003034995,1537002063 +.long 3602036899,1747873779 +.long 1575990012,1955562222 +.long 1125592928,2024104815 +.long 2716904306,2227730452 +.long 442776044,2361852424 +.long 593698344,2428436474 +.long 3733110249,2756734187 +.long 2999351573,3204031479 +.long 3815920427,3329325298 +.long 3928383900,3391569614 +.long 566280711,3515267271 +.long 3454069534,3940187606 +.long 4000239992,4118630271 +.long 1914138554,116418474 +.long 2731055270,174292421 +.long 3203993006,289380356 +.long 320620315,460393269 +.long 587496836,685471733 +.long 1086792851,852142971 +.long 365543100,1017036298 +.long 2618297676,1126000580 +.long 3409855158,1288033470 +.long 4234509866,1501505948 +.long 987167468,1607167915 +.long 1246189591,1816402316 +.long 67438087,66051 +.long 202182159,134810123 +.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97 +.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 +.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +.byte 62,0 + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/macosx/sha512-ssse3-x86_64.s b/lib/accelerated/x86/macosx/sha512-ssse3-x86_64.s new file mode 100644 index 0000000000..a845708030 --- /dev/null +++ b/lib/accelerated/x86/macosx/sha512-ssse3-x86_64.s @@ -0,0 +1,2881 @@ +# Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org> +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain copyright notices, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# * Neither the name of the Andy Polyakov nor the names of its +# copyright holder and contributors may be used to endorse or +# promote products derived from this software without specific +# prior written permission. +# +# ALTERNATIVELY, provided that this notice is retained in full, this +# product may be distributed under the terms of the GNU General Public +# License (GPL), in which case the provisions of the GPL apply INSTEAD OF +# those given above. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# *** This file is auto-generated *** +# +.text + + +.globl _sha256_block_data_order + +.p2align 4 +_sha256_block_data_order: + leaq __gnutls_x86_cpuid_s(%rip),%r11 + movl 0(%r11),%r9d + movl 4(%r11),%r10d + movl 8(%r11),%r11d + testl $512,%r10d + jnz L$ssse3_shortcut + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $64+32,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %r11,64+24(%rsp) +L$prologue: + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + jmp L$loop + +.p2align 4 +L$loop: + movl %ebx,%edi + leaq K256(%rip),%rbp + xorl %ecx,%edi + movl 0(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,0(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + addl %r14d,%r11d + movl 4(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,4(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + addl %r14d,%r10d + movl 8(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,8(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + addl %r14d,%r9d + movl 12(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,12(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + addl %r14d,%r8d + movl 16(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,16(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + addl %r14d,%edx + movl 20(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,20(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + addl %r14d,%ecx + movl 24(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,24(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + addl %r14d,%ebx + movl 28(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,28(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + addl %r14d,%eax + movl 32(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,32(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + addl %r14d,%r11d + movl 36(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,36(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + addl %r14d,%r10d + movl 40(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,40(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + addl %r14d,%r9d + movl 44(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,44(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + addl %r14d,%r8d + movl 48(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,48(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + addl %r14d,%edx + movl 52(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,52(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + addl %r14d,%ecx + movl 56(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,56(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + addl %r14d,%ebx + movl 60(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,60(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + jmp L$rounds_16_xx +.p2align 4 +L$rounds_16_xx: + movl 4(%rsp),%r13d + movl 56(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%eax + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 36(%rsp),%r12d + + addl 0(%rsp),%r12d + movl %r8d,%r13d + addl %r15d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,0(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + movl 8(%rsp),%r13d + movl 60(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r11d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 40(%rsp),%r12d + + addl 4(%rsp),%r12d + movl %edx,%r13d + addl %edi,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,4(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + movl 12(%rsp),%r13d + movl 0(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r10d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 44(%rsp),%r12d + + addl 8(%rsp),%r12d + movl %ecx,%r13d + addl %r15d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,8(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + movl 16(%rsp),%r13d + movl 4(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r9d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 48(%rsp),%r12d + + addl 12(%rsp),%r12d + movl %ebx,%r13d + addl %edi,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,12(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + movl 20(%rsp),%r13d + movl 8(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r8d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 52(%rsp),%r12d + + addl 16(%rsp),%r12d + movl %eax,%r13d + addl %r15d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,16(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + movl 24(%rsp),%r13d + movl 12(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%edx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 56(%rsp),%r12d + + addl 20(%rsp),%r12d + movl %r11d,%r13d + addl %edi,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,20(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + movl 28(%rsp),%r13d + movl 16(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ecx + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 60(%rsp),%r12d + + addl 24(%rsp),%r12d + movl %r10d,%r13d + addl %r15d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,24(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + movl 32(%rsp),%r13d + movl 20(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ebx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 0(%rsp),%r12d + + addl 28(%rsp),%r12d + movl %r9d,%r13d + addl %edi,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,28(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + movl 36(%rsp),%r13d + movl 24(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%eax + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 4(%rsp),%r12d + + addl 32(%rsp),%r12d + movl %r8d,%r13d + addl %r15d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + + movl %r12d,32(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + + leaq 4(%rbp),%rbp + movl 40(%rsp),%r13d + movl 28(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r11d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 8(%rsp),%r12d + + addl 36(%rsp),%r12d + movl %edx,%r13d + addl %edi,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%edi + + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + + movl %r12d,36(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + + leaq 4(%rbp),%rbp + movl 44(%rsp),%r13d + movl 32(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r10d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 12(%rsp),%r12d + + addl 40(%rsp),%r12d + movl %ecx,%r13d + addl %r15d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + + movl %r12d,40(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + + leaq 4(%rbp),%rbp + movl 48(%rsp),%r13d + movl 36(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r9d + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 16(%rsp),%r12d + + addl 44(%rsp),%r12d + movl %ebx,%r13d + addl %edi,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%edi + + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + + movl %r12d,44(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + + leaq 20(%rbp),%rbp + movl 52(%rsp),%r13d + movl 40(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r8d + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 20(%rsp),%r12d + + addl 48(%rsp),%r12d + movl %eax,%r13d + addl %r15d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + + movl %r12d,48(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + + leaq 4(%rbp),%rbp + movl 56(%rsp),%r13d + movl 44(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%edx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 24(%rsp),%r12d + + addl 52(%rsp),%r12d + movl %r11d,%r13d + addl %edi,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%edi + + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + + movl %r12d,52(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + + leaq 4(%rbp),%rbp + movl 60(%rsp),%r13d + movl 48(%rsp),%r15d + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ecx + movl %r15d,%r14d + rorl $2,%r15d + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 28(%rsp),%r12d + + addl 56(%rsp),%r12d + movl %r10d,%r13d + addl %r15d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + + movl %r12d,56(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + + leaq 4(%rbp),%rbp + movl 0(%rsp),%r13d + movl 52(%rsp),%edi + + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ebx + movl %edi,%r14d + rorl $2,%edi + + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 32(%rsp),%r12d + + addl 60(%rsp),%r12d + movl %r9d,%r13d + addl %edi,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%edi + + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + + movl %r12d,60(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + + leaq 20(%rbp),%rbp + cmpb $0,3(%rbp) + jnz L$rounds_16_xx + + movq 64+0(%rsp),%rdi + addl %r14d,%eax + leaq 64(%rsi),%rsi + + addl 0(%rdi),%eax + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb L$loop + + movq 64+24(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$epilogue: + .byte 0xf3,0xc3 + +.p2align 6 + +K256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff +.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff +.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 +.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 +.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 + +.p2align 6 +sha256_block_data_order_ssse3: +L$ssse3_shortcut: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + movq %rsp,%r11 + shlq $4,%rdx + subq $96,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %r11,64+24(%rsp) +L$prologue_ssse3: + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + + + jmp L$loop_ssse3 +.p2align 4 +L$loop_ssse3: + movdqa K256+512(%rip),%xmm7 + movdqu 0(%rsi),%xmm0 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 + movdqu 48(%rsi),%xmm3 +.byte 102,15,56,0,199 + leaq K256(%rip),%rbp +.byte 102,15,56,0,207 + movdqa 0(%rbp),%xmm4 +.byte 102,15,56,0,215 + movdqa 32(%rbp),%xmm5 + paddd %xmm0,%xmm4 + movdqa 64(%rbp),%xmm6 +.byte 102,15,56,0,223 + movdqa 96(%rbp),%xmm7 + paddd %xmm1,%xmm5 + paddd %xmm2,%xmm6 + paddd %xmm3,%xmm7 + movdqa %xmm4,0(%rsp) + movl %eax,%r14d + movdqa %xmm5,16(%rsp) + movl %ebx,%edi + movdqa %xmm6,32(%rsp) + xorl %ecx,%edi + movdqa %xmm7,48(%rsp) + movl %r8d,%r13d + jmp L$ssse3_00_47 + +.p2align 4 +L$ssse3_00_47: + subq $-32*4,%rbp + rorl $14,%r13d + movdqa %xmm1,%xmm4 + movl %r14d,%eax + movl %r9d,%r12d + movdqa %xmm3,%xmm7 + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d +.byte 102,15,58,15,224,4 + andl %r8d,%r12d + xorl %r8d,%r13d +.byte 102,15,58,15,250,4 + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %ebx,%r15d + addl %r12d,%r11d + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + paddd %xmm7,%xmm0 + rorl $2,%r14d + addl %r11d,%edx + psrld $7,%xmm6 + addl %edi,%r11d + movl %edx,%r13d + pshufd $250,%xmm3,%xmm7 + addl %r11d,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%r11d + movl %r8d,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %r11d,%r14d + pxor %xmm5,%xmm4 + andl %edx,%r12d + xorl %edx,%r13d + pslld $11,%xmm5 + addl 4(%rsp),%r10d + movl %r11d,%edi + pxor %xmm6,%xmm4 + xorl %r9d,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %eax,%edi + addl %r12d,%r10d + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + psrld $10,%xmm7 + addl %r13d,%r10d + xorl %eax,%r15d + paddd %xmm4,%xmm0 + rorl $2,%r14d + addl %r10d,%ecx + psrlq $17,%xmm6 + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %ecx,%r13d + xorl %r8d,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + psrldq $8,%xmm7 + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + paddd %xmm7,%xmm0 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + pshufd $80,%xmm0,%xmm7 + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + movdqa %xmm7,%xmm6 + addl %edi,%r9d + movl %ebx,%r13d + psrld $10,%xmm7 + addl %r9d,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%r9d + movl %ecx,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + psrlq $2,%xmm6 + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + pxor %xmm6,%xmm7 + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %r10d,%edi + addl %r12d,%r8d + movdqa 0(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + paddd %xmm7,%xmm0 + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + paddd %xmm0,%xmm6 + movl %eax,%r13d + addl %r8d,%r14d + movdqa %xmm6,0(%rsp) + rorl $14,%r13d + movdqa %xmm2,%xmm4 + movl %r14d,%r8d + movl %ebx,%r12d + movdqa %xmm0,%xmm7 + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d +.byte 102,15,58,15,225,4 + andl %eax,%r12d + xorl %eax,%r13d +.byte 102,15,58,15,251,4 + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %r9d,%r15d + addl %r12d,%edx + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + paddd %xmm7,%xmm1 + rorl $2,%r14d + addl %edx,%r11d + psrld $7,%xmm6 + addl %edi,%edx + movl %r11d,%r13d + pshufd $250,%xmm0,%xmm7 + addl %edx,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%edx + movl %eax,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %edx,%r14d + pxor %xmm5,%xmm4 + andl %r11d,%r12d + xorl %r11d,%r13d + pslld $11,%xmm5 + addl 20(%rsp),%ecx + movl %edx,%edi + pxor %xmm6,%xmm4 + xorl %ebx,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %r8d,%edi + addl %r12d,%ecx + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + psrld $10,%xmm7 + addl %r13d,%ecx + xorl %r8d,%r15d + paddd %xmm4,%xmm1 + rorl $2,%r14d + addl %ecx,%r10d + psrlq $17,%xmm6 + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %r10d,%r13d + xorl %eax,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + psrldq $8,%xmm7 + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + paddd %xmm7,%xmm1 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + pshufd $80,%xmm1,%xmm7 + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + movdqa %xmm7,%xmm6 + addl %edi,%ebx + movl %r9d,%r13d + psrld $10,%xmm7 + addl %ebx,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%ebx + movl %r10d,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + psrlq $2,%xmm6 + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + pxor %xmm6,%xmm7 + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %ecx,%edi + addl %r12d,%eax + movdqa 32(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + paddd %xmm7,%xmm1 + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + paddd %xmm1,%xmm6 + movl %r8d,%r13d + addl %eax,%r14d + movdqa %xmm6,16(%rsp) + rorl $14,%r13d + movdqa %xmm3,%xmm4 + movl %r14d,%eax + movl %r9d,%r12d + movdqa %xmm1,%xmm7 + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d +.byte 102,15,58,15,226,4 + andl %r8d,%r12d + xorl %r8d,%r13d +.byte 102,15,58,15,248,4 + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %ebx,%r15d + addl %r12d,%r11d + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + paddd %xmm7,%xmm2 + rorl $2,%r14d + addl %r11d,%edx + psrld $7,%xmm6 + addl %edi,%r11d + movl %edx,%r13d + pshufd $250,%xmm1,%xmm7 + addl %r11d,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%r11d + movl %r8d,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %r11d,%r14d + pxor %xmm5,%xmm4 + andl %edx,%r12d + xorl %edx,%r13d + pslld $11,%xmm5 + addl 36(%rsp),%r10d + movl %r11d,%edi + pxor %xmm6,%xmm4 + xorl %r9d,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %eax,%edi + addl %r12d,%r10d + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + psrld $10,%xmm7 + addl %r13d,%r10d + xorl %eax,%r15d + paddd %xmm4,%xmm2 + rorl $2,%r14d + addl %r10d,%ecx + psrlq $17,%xmm6 + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %ecx,%r13d + xorl %r8d,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + psrldq $8,%xmm7 + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + paddd %xmm7,%xmm2 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + pshufd $80,%xmm2,%xmm7 + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + movdqa %xmm7,%xmm6 + addl %edi,%r9d + movl %ebx,%r13d + psrld $10,%xmm7 + addl %r9d,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%r9d + movl %ecx,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + psrlq $2,%xmm6 + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + pxor %xmm6,%xmm7 + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %r10d,%edi + addl %r12d,%r8d + movdqa 64(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + paddd %xmm7,%xmm2 + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + paddd %xmm2,%xmm6 + movl %eax,%r13d + addl %r8d,%r14d + movdqa %xmm6,32(%rsp) + rorl $14,%r13d + movdqa %xmm0,%xmm4 + movl %r14d,%r8d + movl %ebx,%r12d + movdqa %xmm2,%xmm7 + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d +.byte 102,15,58,15,227,4 + andl %eax,%r12d + xorl %eax,%r13d +.byte 102,15,58,15,249,4 + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %r9d,%r15d + addl %r12d,%edx + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + paddd %xmm7,%xmm3 + rorl $2,%r14d + addl %edx,%r11d + psrld $7,%xmm6 + addl %edi,%edx + movl %r11d,%r13d + pshufd $250,%xmm2,%xmm7 + addl %edx,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%edx + movl %eax,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %edx,%r14d + pxor %xmm5,%xmm4 + andl %r11d,%r12d + xorl %r11d,%r13d + pslld $11,%xmm5 + addl 52(%rsp),%ecx + movl %edx,%edi + pxor %xmm6,%xmm4 + xorl %ebx,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %r8d,%edi + addl %r12d,%ecx + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + psrld $10,%xmm7 + addl %r13d,%ecx + xorl %r8d,%r15d + paddd %xmm4,%xmm3 + rorl $2,%r14d + addl %ecx,%r10d + psrlq $17,%xmm6 + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %r10d,%r13d + xorl %eax,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + psrldq $8,%xmm7 + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + paddd %xmm7,%xmm3 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + pshufd $80,%xmm3,%xmm7 + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + movdqa %xmm7,%xmm6 + addl %edi,%ebx + movl %r9d,%r13d + psrld $10,%xmm7 + addl %ebx,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%ebx + movl %r10d,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + psrlq $2,%xmm6 + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + pxor %xmm6,%xmm7 + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %ecx,%edi + addl %r12d,%eax + movdqa 96(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + paddd %xmm7,%xmm3 + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + paddd %xmm3,%xmm6 + movl %r8d,%r13d + addl %eax,%r14d + movdqa %xmm6,48(%rsp) + cmpb $0,131(%rbp) + jne L$ssse3_00_47 + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + rorl $6,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + rorl $2,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 4(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + rorl $11,%r14d + xorl %eax,%edi + addl %r12d,%r10d + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + rorl $2,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + rorl $6,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + rorl $6,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + rorl $2,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 20(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + rorl $11,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + rorl $2,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + xorl %ecx,%edi + addl %r12d,%eax + rorl $6,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + rorl $6,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + rorl $2,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 36(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + rorl $11,%r14d + xorl %eax,%edi + addl %r12d,%r10d + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + rorl $2,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + rorl $6,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + rorl $6,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + rorl $2,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 52(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + rorl $11,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + rorl $2,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + xorl %ecx,%edi + addl %r12d,%eax + rorl $6,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + movq 64+0(%rsp),%rdi + movl %r14d,%eax + + addl 0(%rdi),%eax + leaq 64(%rsi),%rsi + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb L$loop_ssse3 + + movq 64+24(%rsp),%rsi + movq (%rsi),%r15 + movq 8(%rsi),%r14 + movq 16(%rsi),%r13 + movq 24(%rsi),%r12 + movq 32(%rsi),%rbp + movq 40(%rsi),%rbx + leaq 48(%rsi),%rsp +L$epilogue_ssse3: + .byte 0xf3,0xc3 + + +.section .note.GNU-stack,"",%progbits diff --git a/lib/accelerated/x86/sha-padlock.h b/lib/accelerated/x86/sha-padlock.h index 05af543075..30d3ccec28 100644 --- a/lib/accelerated/x86/sha-padlock.h +++ b/lib/accelerated/x86/sha-padlock.h @@ -30,4 +30,7 @@ extern const struct nettle_hash padlock_sha256; extern const struct nettle_hash padlock_sha384; extern const struct nettle_hash padlock_sha512; +extern const gnutls_crypto_mac_st hmac_sha_padlock_nano_struct; +extern const gnutls_crypto_digest_st sha_padlock_nano_struct; + #endif diff --git a/lib/accelerated/x86/sha-x86.c b/lib/accelerated/x86/sha-x86.c new file mode 100644 index 0000000000..054dc6ce73 --- /dev/null +++ b/lib/accelerated/x86/sha-x86.c @@ -0,0 +1,365 @@ +/* + * Copyright (C) 2011-2012 Free Software Foundation, Inc. + * + * Author: Nikos Mavrogiannopoulos + * + * This file is part of GnuTLS. + * + * The GnuTLS is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/> + * + */ + +#include <gnutls_errors.h> +#include <gnutls_int.h> +#include <gnutls/crypto.h> +#include <gnutls_errors.h> +#include <aes-x86.h> +#include <nettle/sha.h> +#include <nettle/macros.h> +#include <nettle/nettle-meta.h> +#include <sha-x86.h> +#include <x86.h> + +void sha1_block_data_order(void *c, const void *p, size_t len); +void sha256_block_data_order(void *c, const void *p, size_t len); +void sha512_block_data_order(void *c, const void *p, size_t len); + +typedef void (*update_func) (void *, unsigned, const uint8_t *); +typedef void (*digest_func) (void *, unsigned, uint8_t *); +typedef void (*set_key_func) (void *, unsigned, const uint8_t *); +typedef void (*init_func) (void *); + +struct x86_hash_ctx { + union { + struct sha1_ctx sha1; + struct sha224_ctx sha224; + struct sha256_ctx sha256; +#ifdef ENABLE_SHA512 + struct sha384_ctx sha384; + struct sha512_ctx sha512; +#endif + } ctx; + void *ctx_ptr; + gnutls_digest_algorithm_t algo; + size_t length; + update_func update; + digest_func digest; + init_func init; +}; + +static int +wrap_x86_hash_update(void *_ctx, const void *text, size_t textsize) +{ + struct x86_hash_ctx *ctx = _ctx; + + ctx->update(ctx->ctx_ptr, textsize, text); + + return GNUTLS_E_SUCCESS; +} + +static void wrap_x86_hash_deinit(void *hd) +{ + gnutls_free(hd); +} + +void x86_sha1_update(struct sha1_ctx *ctx, size_t length, + const uint8_t * data) +{ + struct { + uint32_t h0, h1, h2, h3, h4; + uint32_t Nl, Nh; + uint32_t data[16]; + unsigned int num; + } octx; + size_t res; + unsigned t2, i; + + if ((res = ctx->index)) { + res = SHA1_DATA_SIZE - res; + if (length < res) + res = length; + sha1_update(ctx, res, data); + data += res; + length -= res; + } + + octx.h0 = ctx->state[0]; + octx.h1 = ctx->state[1]; + octx.h2 = ctx->state[2]; + octx.h3 = ctx->state[3]; + octx.h4 = ctx->state[4]; + + memcpy(octx.data, ctx->block, SHA1_DATA_SIZE); + octx.num = ctx->index; + + res = length % SHA1_DATA_SIZE; + length -= res; + + if (length > 0) { + + t2 = length / SHA1_DATA_SIZE; + + sha1_block_data_order(&octx, data, t2); + + for (i=0;i<t2;i++) + MD_INCR(ctx); + data += length; + } + + ctx->state[0] = octx.h0; + ctx->state[1] = octx.h1; + ctx->state[2] = octx.h2; + ctx->state[3] = octx.h3; + ctx->state[4] = octx.h4; + + memcpy(ctx->block, octx.data, octx.num); + ctx->index = octx.num; + + if (res > 0) { + sha1_update(ctx, res, data); + } + +} + +void x86_sha256_update(struct sha256_ctx *ctx, size_t length, + const uint8_t * data) +{ + struct { + uint32_t h[8]; + uint32_t Nl, Nh; + uint32_t data[16]; + unsigned int num; + unsigned md_len; + } octx; + size_t res; + unsigned t2, i; + + if ((res = ctx->index)) { + res = SHA256_DATA_SIZE - res; + if (length < res) + res = length; + sha256_update(ctx, res, data); + data += res; + length -= res; + } + + memcpy(octx.h, ctx->state, sizeof(octx.h)); + memcpy(octx.data, ctx->block, SHA256_DATA_SIZE); + octx.num = ctx->index; + + res = length % SHA256_DATA_SIZE; + length -= res; + + if (length > 0) { + t2 = length / SHA1_DATA_SIZE; + sha256_block_data_order(&octx, data, t2); + + for (i=0;i<t2;i++) + MD_INCR(ctx); + data += length; + } + + memcpy(ctx->state, octx.h, sizeof(octx.h)); + + memcpy(ctx->block, octx.data, octx.num); + ctx->index = octx.num; + + if (res > 0) { + sha256_update(ctx, res, data); + } +} + +#ifdef ENABLE_SHA512 +void x86_sha512_update(struct sha512_ctx *ctx, size_t length, + const uint8_t * data) +{ + struct { + uint64_t h[8]; + uint64_t Nl, Nh; + union { + uint64_t d[16]; + uint8_t p[16*8]; + } u; + unsigned int num; + unsigned md_len; + } octx; + size_t res; + unsigned t2, i; + + if ((res = ctx->index)) { + res = SHA512_DATA_SIZE - res; + if (length < res) + res = length; + sha512_update(ctx, res, data); + data += res; + length -= res; + } + + memcpy(octx.h, ctx->state, sizeof(octx.h)); + memcpy(octx.u.p, ctx->block, SHA512_DATA_SIZE); + octx.num = ctx->index; + + res = length % SHA512_DATA_SIZE; + length -= res; + + if (length > 0) { + t2 = length / SHA512_DATA_SIZE; + sha512_block_data_order(&octx, data, t2); + + for (i=0;i<t2;i++) + MD_INCR(ctx); + data += length; + } + + memcpy(ctx->state, octx.h, sizeof(octx.h)); + + memcpy(ctx->block, octx.u.p, octx.num); + ctx->index = octx.num; + + if (res > 0) { + sha512_update(ctx, res, data); + } +} +#endif + +static int _ctx_init(gnutls_digest_algorithm_t algo, + struct x86_hash_ctx *ctx) +{ + switch (algo) { + case GNUTLS_DIG_SHA1: + sha1_init(&ctx->ctx.sha1); + ctx->update = (update_func) x86_sha1_update; + ctx->digest = (digest_func) sha1_digest; + ctx->init = (init_func) sha1_init; + ctx->ctx_ptr = &ctx->ctx.sha1; + ctx->length = SHA1_DIGEST_SIZE; + break; + case GNUTLS_DIG_SHA224: + sha224_init(&ctx->ctx.sha224); + ctx->update = (update_func) x86_sha256_update; + ctx->digest = (digest_func) sha256_digest; + ctx->init = (init_func) sha224_init; + ctx->ctx_ptr = &ctx->ctx.sha224; + ctx->length = SHA224_DIGEST_SIZE; + break; + case GNUTLS_DIG_SHA256: + sha256_init(&ctx->ctx.sha256); + ctx->update = (update_func) x86_sha256_update; + ctx->digest = (digest_func) sha256_digest; + ctx->init = (init_func) sha256_init; + ctx->ctx_ptr = &ctx->ctx.sha256; + ctx->length = SHA256_DIGEST_SIZE; + break; +#ifdef ENABLE_SHA512 + case GNUTLS_DIG_SHA384: + sha384_init(&ctx->ctx.sha384); + ctx->update = (update_func) x86_sha512_update; + ctx->digest = (digest_func) sha512_digest; + ctx->init = (init_func) sha384_init; + ctx->ctx_ptr = &ctx->ctx.sha384; + ctx->length = SHA384_DIGEST_SIZE; + break; + case GNUTLS_DIG_SHA512: + sha512_init(&ctx->ctx.sha512); + ctx->update = (update_func) x86_sha512_update; + ctx->digest = (digest_func) sha512_digest; + ctx->init = (init_func) sha512_init; + ctx->ctx_ptr = &ctx->ctx.sha512; + ctx->length = SHA512_DIGEST_SIZE; + break; +#endif + default: + gnutls_assert(); + return GNUTLS_E_INVALID_REQUEST; + } + + return 0; +} + + +static int wrap_x86_hash_init(gnutls_digest_algorithm_t algo, void **_ctx) +{ + struct x86_hash_ctx *ctx; + int ret; + + ctx = gnutls_malloc(sizeof(struct x86_hash_ctx)); + if (ctx == NULL) { + gnutls_assert(); + return GNUTLS_E_MEMORY_ERROR; + } + + ctx->algo = algo; + + if ((ret = _ctx_init(algo, ctx)) < 0) { + gnutls_assert(); + return ret; + } + + *_ctx = ctx; + + return 0; +} + +static int +wrap_x86_hash_output(void *src_ctx, void *digest, size_t digestsize) +{ + struct x86_hash_ctx *ctx; + ctx = src_ctx; + + if (digestsize < ctx->length) + return gnutls_assert_val(GNUTLS_E_SHORT_MEMORY_BUFFER); + + ctx->digest(ctx->ctx_ptr, digestsize, digest); + + return 0; +} + +static int wrap_x86_hash_fast(gnutls_digest_algorithm_t algo, + const void *text, size_t text_size, + void *digest) +{ + struct x86_hash_ctx ctx; + int ret; + + ret = _ctx_init(algo, &ctx); + if (ret < 0) + return gnutls_assert_val(ret); + + ctx.update(&ctx, text_size, text); + ctx.digest(&ctx, ctx.length, digest); + + return 0; +} + +const struct nettle_hash x86_sha1 = +NN_HASH(sha1, x86_sha1_update, sha1_digest, SHA1); +const struct nettle_hash x86_sha224 = +NN_HASH(sha224, x86_sha256_update, sha224_digest, SHA224); +const struct nettle_hash x86_sha256 = +NN_HASH(sha256, x86_sha256_update, sha256_digest, SHA256); + +#ifdef ENABLE_SHA512 +const struct nettle_hash x86_sha384 = +NN_HASH(sha384, x86_sha512_update, sha384_digest, SHA384); +const struct nettle_hash x86_sha512 = +NN_HASH(sha512, x86_sha512_update, sha512_digest, SHA512); +#endif + +const gnutls_crypto_digest_st sha_x86_struct = { + .init = wrap_x86_hash_init, + .hash = wrap_x86_hash_update, + .output = wrap_x86_hash_output, + .deinit = wrap_x86_hash_deinit, + .fast = wrap_x86_hash_fast, +}; diff --git a/lib/accelerated/x86/sha-x86.h b/lib/accelerated/x86/sha-x86.h new file mode 100644 index 0000000000..a288962da5 --- /dev/null +++ b/lib/accelerated/x86/sha-x86.h @@ -0,0 +1,22 @@ +#ifndef SHA_X86_H +#define SHA_X86_H + +#include <nettle/sha.h> + +/* nettle's SHA512 is faster than openssl's */ +#undef ENABLE_SHA512 + +extern const struct nettle_hash x86_sha1; +extern const struct nettle_hash x86_sha224; +extern const struct nettle_hash x86_sha256; +extern const struct nettle_hash x86_sha384; +extern const struct nettle_hash x86_sha512; + +void x86_sha1_update(struct sha1_ctx *ctx, size_t length, const uint8_t * data); +void x86_sha256_update(struct sha256_ctx *ctx, size_t length, const uint8_t * data); +void x86_sha512_update(struct sha512_ctx *ctx, size_t length, const uint8_t * data); + +extern const gnutls_crypto_digest_st sha_x86_struct; +extern const gnutls_crypto_mac_st hmac_sha_x86_struct; + +#endif |