diff options
Diffstat (limited to 'lib/accelerated/x86/macosx/sha512-ssse3-x86_64.s')
-rw-r--r-- | lib/accelerated/x86/macosx/sha512-ssse3-x86_64.s | 8188 |
1 files changed, 5388 insertions, 2800 deletions
diff --git a/lib/accelerated/x86/macosx/sha512-ssse3-x86_64.s b/lib/accelerated/x86/macosx/sha512-ssse3-x86_64.s index 7e73227c2b..8bf161601e 100644 --- a/lib/accelerated/x86/macosx/sha512-ssse3-x86_64.s +++ b/lib/accelerated/x86/macosx/sha512-ssse3-x86_64.s @@ -1,4 +1,4 @@ -# Copyright (c) 2011-2013, Andy Polyakov <appro@openssl.org> +# Copyright (c) 2011-2016, Andy Polyakov <appro@openssl.org> # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -40,2850 +40,5438 @@ .text -.globl _sha256_block_data_order +.globl _sha512_block_data_order .p2align 4 -_sha256_block_data_order: +_sha512_block_data_order: + leaq __gnutls_x86_cpuid_s(%rip),%r11 movl 0(%r11),%r9d movl 4(%r11),%r10d movl 8(%r11),%r11d - testl $512,%r10d - jnz L$ssse3_shortcut + testl $2048,%r10d + jnz L$xop_shortcut + andl $296,%r11d + cmpl $296,%r11d + je L$avx2_shortcut + andl $1073741824,%r9d + andl $268435968,%r10d + orl %r9d,%r10d + cmpl $1342177792,%r10d + je L$avx_shortcut + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 - movq %rsp,%r11 + shlq $4,%rdx - subq $64+32,%rsp - leaq (%rsi,%rdx,4),%rdx + subq $128+32,%rsp + leaq (%rsi,%rdx,8),%rdx andq $-64,%rsp - movq %rdi,64+0(%rsp) - movq %rsi,64+8(%rsp) - movq %rdx,64+16(%rsp) - movq %r11,64+24(%rsp) + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %rax,152(%rsp) + L$prologue: - movl 0(%rdi),%eax - movl 4(%rdi),%ebx - movl 8(%rdi),%ecx - movl 12(%rdi),%edx - movl 16(%rdi),%r8d - movl 20(%rdi),%r9d - movl 24(%rdi),%r10d - movl 28(%rdi),%r11d + movq 0(%rdi),%rax + movq 8(%rdi),%rbx + movq 16(%rdi),%rcx + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 jmp L$loop .p2align 4 L$loop: - movl %ebx,%edi - leaq K256(%rip),%rbp - xorl %ecx,%edi - movl 0(%rsi),%r12d - movl %r8d,%r13d - movl %eax,%r14d - bswapl %r12d - rorl $14,%r13d - movl %r9d,%r15d - - rorl $9,%r14d - xorl %r8d,%r13d - xorl %r10d,%r15d - - movl %r12d,0(%rsp) - xorl %eax,%r14d - andl %r8d,%r15d - - rorl $5,%r13d - addl %r11d,%r12d - xorl %r10d,%r15d - - rorl $11,%r14d - xorl %r8d,%r13d - addl %r15d,%r12d - - movl %eax,%r15d - addl (%rbp),%r12d - xorl %eax,%r14d - - rorl $6,%r13d - xorl %ebx,%r15d - movl %ebx,%r11d - - rorl $2,%r14d - andl %r15d,%edi - addl %r13d,%r12d - - xorl %edi,%r11d - addl %r12d,%edx - addl %r12d,%r11d - leaq 4(%rbp),%rbp - addl %r14d,%r11d - - movl 4(%rsi),%r12d - movl %edx,%r13d - movl %r11d,%r14d - bswapl %r12d - rorl $14,%r13d - movl %r8d,%edi - - rorl $9,%r14d - xorl %edx,%r13d - xorl %r9d,%edi - - movl %r12d,4(%rsp) - xorl %r11d,%r14d - andl %edx,%edi - - rorl $5,%r13d - addl %r10d,%r12d - xorl %r9d,%edi - - rorl $11,%r14d - xorl %edx,%r13d - addl %edi,%r12d - - movl %r11d,%edi - addl (%rbp),%r12d - xorl %r11d,%r14d - - rorl $6,%r13d - xorl %eax,%edi - movl %eax,%r10d - - rorl $2,%r14d - andl %edi,%r15d - addl %r13d,%r12d - - xorl %r15d,%r10d - addl %r12d,%ecx - addl %r12d,%r10d - leaq 4(%rbp),%rbp - addl %r14d,%r10d - - movl 8(%rsi),%r12d - movl %ecx,%r13d - movl %r10d,%r14d - bswapl %r12d - rorl $14,%r13d - movl %edx,%r15d - - rorl $9,%r14d - xorl %ecx,%r13d - xorl %r8d,%r15d - - movl %r12d,8(%rsp) - xorl %r10d,%r14d - andl %ecx,%r15d - - rorl $5,%r13d - addl %r9d,%r12d - xorl %r8d,%r15d - - rorl $11,%r14d - xorl %ecx,%r13d - addl %r15d,%r12d - - movl %r10d,%r15d - addl (%rbp),%r12d - xorl %r10d,%r14d - - rorl $6,%r13d - xorl %r11d,%r15d - movl %r11d,%r9d - - rorl $2,%r14d - andl %r15d,%edi - addl %r13d,%r12d - - xorl %edi,%r9d - addl %r12d,%ebx - addl %r12d,%r9d - leaq 4(%rbp),%rbp - addl %r14d,%r9d - - movl 12(%rsi),%r12d - movl %ebx,%r13d - movl %r9d,%r14d - bswapl %r12d - rorl $14,%r13d - movl %ecx,%edi - - rorl $9,%r14d - xorl %ebx,%r13d - xorl %edx,%edi - - movl %r12d,12(%rsp) - xorl %r9d,%r14d - andl %ebx,%edi - - rorl $5,%r13d - addl %r8d,%r12d - xorl %edx,%edi - - rorl $11,%r14d - xorl %ebx,%r13d - addl %edi,%r12d - - movl %r9d,%edi - addl (%rbp),%r12d - xorl %r9d,%r14d - - rorl $6,%r13d - xorl %r10d,%edi - movl %r10d,%r8d - - rorl $2,%r14d - andl %edi,%r15d - addl %r13d,%r12d - - xorl %r15d,%r8d - addl %r12d,%eax - addl %r12d,%r8d - leaq 20(%rbp),%rbp - addl %r14d,%r8d - - movl 16(%rsi),%r12d - movl %eax,%r13d - movl %r8d,%r14d - bswapl %r12d - rorl $14,%r13d - movl %ebx,%r15d - - rorl $9,%r14d - xorl %eax,%r13d - xorl %ecx,%r15d - - movl %r12d,16(%rsp) - xorl %r8d,%r14d - andl %eax,%r15d - - rorl $5,%r13d - addl %edx,%r12d - xorl %ecx,%r15d - - rorl $11,%r14d - xorl %eax,%r13d - addl %r15d,%r12d - - movl %r8d,%r15d - addl (%rbp),%r12d - xorl %r8d,%r14d - - rorl $6,%r13d - xorl %r9d,%r15d - movl %r9d,%edx - - rorl $2,%r14d - andl %r15d,%edi - addl %r13d,%r12d - - xorl %edi,%edx - addl %r12d,%r11d - addl %r12d,%edx - leaq 4(%rbp),%rbp - addl %r14d,%edx - - movl 20(%rsi),%r12d - movl %r11d,%r13d - movl %edx,%r14d - bswapl %r12d - rorl $14,%r13d - movl %eax,%edi - - rorl $9,%r14d - xorl %r11d,%r13d - xorl %ebx,%edi - - movl %r12d,20(%rsp) - xorl %edx,%r14d - andl %r11d,%edi - - rorl $5,%r13d - addl %ecx,%r12d - xorl %ebx,%edi - - rorl $11,%r14d - xorl %r11d,%r13d - addl %edi,%r12d - - movl %edx,%edi - addl (%rbp),%r12d - xorl %edx,%r14d - - rorl $6,%r13d - xorl %r8d,%edi - movl %r8d,%ecx - - rorl $2,%r14d - andl %edi,%r15d - addl %r13d,%r12d - - xorl %r15d,%ecx - addl %r12d,%r10d - addl %r12d,%ecx - leaq 4(%rbp),%rbp - addl %r14d,%ecx - - movl 24(%rsi),%r12d - movl %r10d,%r13d - movl %ecx,%r14d - bswapl %r12d - rorl $14,%r13d - movl %r11d,%r15d - - rorl $9,%r14d - xorl %r10d,%r13d - xorl %eax,%r15d - - movl %r12d,24(%rsp) - xorl %ecx,%r14d - andl %r10d,%r15d - - rorl $5,%r13d - addl %ebx,%r12d - xorl %eax,%r15d - - rorl $11,%r14d - xorl %r10d,%r13d - addl %r15d,%r12d - - movl %ecx,%r15d - addl (%rbp),%r12d - xorl %ecx,%r14d - - rorl $6,%r13d - xorl %edx,%r15d - movl %edx,%ebx - - rorl $2,%r14d - andl %r15d,%edi - addl %r13d,%r12d - - xorl %edi,%ebx - addl %r12d,%r9d - addl %r12d,%ebx - leaq 4(%rbp),%rbp - addl %r14d,%ebx - - movl 28(%rsi),%r12d - movl %r9d,%r13d - movl %ebx,%r14d - bswapl %r12d - rorl $14,%r13d - movl %r10d,%edi - - rorl $9,%r14d - xorl %r9d,%r13d - xorl %r11d,%edi - - movl %r12d,28(%rsp) - xorl %ebx,%r14d - andl %r9d,%edi - - rorl $5,%r13d - addl %eax,%r12d - xorl %r11d,%edi - - rorl $11,%r14d - xorl %r9d,%r13d - addl %edi,%r12d - - movl %ebx,%edi - addl (%rbp),%r12d - xorl %ebx,%r14d - - rorl $6,%r13d - xorl %ecx,%edi - movl %ecx,%eax - - rorl $2,%r14d - andl %edi,%r15d - addl %r13d,%r12d - - xorl %r15d,%eax - addl %r12d,%r8d - addl %r12d,%eax - leaq 20(%rbp),%rbp - addl %r14d,%eax - - movl 32(%rsi),%r12d - movl %r8d,%r13d - movl %eax,%r14d - bswapl %r12d - rorl $14,%r13d - movl %r9d,%r15d - - rorl $9,%r14d - xorl %r8d,%r13d - xorl %r10d,%r15d - - movl %r12d,32(%rsp) - xorl %eax,%r14d - andl %r8d,%r15d - - rorl $5,%r13d - addl %r11d,%r12d - xorl %r10d,%r15d - - rorl $11,%r14d - xorl %r8d,%r13d - addl %r15d,%r12d - - movl %eax,%r15d - addl (%rbp),%r12d - xorl %eax,%r14d - - rorl $6,%r13d - xorl %ebx,%r15d - movl %ebx,%r11d - - rorl $2,%r14d - andl %r15d,%edi - addl %r13d,%r12d - - xorl %edi,%r11d - addl %r12d,%edx - addl %r12d,%r11d - leaq 4(%rbp),%rbp - addl %r14d,%r11d - - movl 36(%rsi),%r12d - movl %edx,%r13d - movl %r11d,%r14d - bswapl %r12d - rorl $14,%r13d - movl %r8d,%edi - - rorl $9,%r14d - xorl %edx,%r13d - xorl %r9d,%edi - - movl %r12d,36(%rsp) - xorl %r11d,%r14d - andl %edx,%edi - - rorl $5,%r13d - addl %r10d,%r12d - xorl %r9d,%edi - - rorl $11,%r14d - xorl %edx,%r13d - addl %edi,%r12d - - movl %r11d,%edi - addl (%rbp),%r12d - xorl %r11d,%r14d - - rorl $6,%r13d - xorl %eax,%edi - movl %eax,%r10d - - rorl $2,%r14d - andl %edi,%r15d - addl %r13d,%r12d - - xorl %r15d,%r10d - addl %r12d,%ecx - addl %r12d,%r10d - leaq 4(%rbp),%rbp - addl %r14d,%r10d - - movl 40(%rsi),%r12d - movl %ecx,%r13d - movl %r10d,%r14d - bswapl %r12d - rorl $14,%r13d - movl %edx,%r15d - - rorl $9,%r14d - xorl %ecx,%r13d - xorl %r8d,%r15d - - movl %r12d,40(%rsp) - xorl %r10d,%r14d - andl %ecx,%r15d - - rorl $5,%r13d - addl %r9d,%r12d - xorl %r8d,%r15d - - rorl $11,%r14d - xorl %ecx,%r13d - addl %r15d,%r12d - - movl %r10d,%r15d - addl (%rbp),%r12d - xorl %r10d,%r14d - - rorl $6,%r13d - xorl %r11d,%r15d - movl %r11d,%r9d - - rorl $2,%r14d - andl %r15d,%edi - addl %r13d,%r12d - - xorl %edi,%r9d - addl %r12d,%ebx - addl %r12d,%r9d - leaq 4(%rbp),%rbp - addl %r14d,%r9d - - movl 44(%rsi),%r12d - movl %ebx,%r13d - movl %r9d,%r14d - bswapl %r12d - rorl $14,%r13d - movl %ecx,%edi - - rorl $9,%r14d - xorl %ebx,%r13d - xorl %edx,%edi - - movl %r12d,44(%rsp) - xorl %r9d,%r14d - andl %ebx,%edi - - rorl $5,%r13d - addl %r8d,%r12d - xorl %edx,%edi - - rorl $11,%r14d - xorl %ebx,%r13d - addl %edi,%r12d - - movl %r9d,%edi - addl (%rbp),%r12d - xorl %r9d,%r14d - - rorl $6,%r13d - xorl %r10d,%edi - movl %r10d,%r8d - - rorl $2,%r14d - andl %edi,%r15d - addl %r13d,%r12d - - xorl %r15d,%r8d - addl %r12d,%eax - addl %r12d,%r8d - leaq 20(%rbp),%rbp - addl %r14d,%r8d - - movl 48(%rsi),%r12d - movl %eax,%r13d - movl %r8d,%r14d - bswapl %r12d - rorl $14,%r13d - movl %ebx,%r15d - - rorl $9,%r14d - xorl %eax,%r13d - xorl %ecx,%r15d - - movl %r12d,48(%rsp) - xorl %r8d,%r14d - andl %eax,%r15d - - rorl $5,%r13d - addl %edx,%r12d - xorl %ecx,%r15d - - rorl $11,%r14d - xorl %eax,%r13d - addl %r15d,%r12d - - movl %r8d,%r15d - addl (%rbp),%r12d - xorl %r8d,%r14d - - rorl $6,%r13d - xorl %r9d,%r15d - movl %r9d,%edx - - rorl $2,%r14d - andl %r15d,%edi - addl %r13d,%r12d - - xorl %edi,%edx - addl %r12d,%r11d - addl %r12d,%edx - leaq 4(%rbp),%rbp - addl %r14d,%edx - - movl 52(%rsi),%r12d - movl %r11d,%r13d - movl %edx,%r14d - bswapl %r12d - rorl $14,%r13d - movl %eax,%edi - - rorl $9,%r14d - xorl %r11d,%r13d - xorl %ebx,%edi - - movl %r12d,52(%rsp) - xorl %edx,%r14d - andl %r11d,%edi - - rorl $5,%r13d - addl %ecx,%r12d - xorl %ebx,%edi - - rorl $11,%r14d - xorl %r11d,%r13d - addl %edi,%r12d - - movl %edx,%edi - addl (%rbp),%r12d - xorl %edx,%r14d - - rorl $6,%r13d - xorl %r8d,%edi - movl %r8d,%ecx - - rorl $2,%r14d - andl %edi,%r15d - addl %r13d,%r12d - - xorl %r15d,%ecx - addl %r12d,%r10d - addl %r12d,%ecx - leaq 4(%rbp),%rbp - addl %r14d,%ecx - - movl 56(%rsi),%r12d - movl %r10d,%r13d - movl %ecx,%r14d - bswapl %r12d - rorl $14,%r13d - movl %r11d,%r15d - - rorl $9,%r14d - xorl %r10d,%r13d - xorl %eax,%r15d - - movl %r12d,56(%rsp) - xorl %ecx,%r14d - andl %r10d,%r15d - - rorl $5,%r13d - addl %ebx,%r12d - xorl %eax,%r15d - - rorl $11,%r14d - xorl %r10d,%r13d - addl %r15d,%r12d - - movl %ecx,%r15d - addl (%rbp),%r12d - xorl %ecx,%r14d - - rorl $6,%r13d - xorl %edx,%r15d - movl %edx,%ebx - - rorl $2,%r14d - andl %r15d,%edi - addl %r13d,%r12d - - xorl %edi,%ebx - addl %r12d,%r9d - addl %r12d,%ebx - leaq 4(%rbp),%rbp - addl %r14d,%ebx - - movl 60(%rsi),%r12d - movl %r9d,%r13d - movl %ebx,%r14d - bswapl %r12d - rorl $14,%r13d - movl %r10d,%edi - - rorl $9,%r14d - xorl %r9d,%r13d - xorl %r11d,%edi - - movl %r12d,60(%rsp) - xorl %ebx,%r14d - andl %r9d,%edi - - rorl $5,%r13d - addl %eax,%r12d - xorl %r11d,%edi - - rorl $11,%r14d - xorl %r9d,%r13d - addl %edi,%r12d - - movl %ebx,%edi - addl (%rbp),%r12d - xorl %ebx,%r14d - - rorl $6,%r13d - xorl %ecx,%edi - movl %ecx,%eax - - rorl $2,%r14d - andl %edi,%r15d - addl %r13d,%r12d - - xorl %r15d,%eax - addl %r12d,%r8d - addl %r12d,%eax - movl 4(%rsp),%r13d - leaq 20(%rbp),%rbp - addl %r14d,%eax - + movq %rbx,%rdi + leaq K512(%rip),%rbp + xorq %rcx,%rdi + movq 0(%rsi),%r12 + movq %r8,%r13 + movq %rax,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r9,%r15 + + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + + movq %r12,0(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + + leaq 8(%rbp),%rbp + addq %r14,%r11 + movq 8(%rsi),%r12 + movq %rdx,%r13 + movq %r11,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r8,%rdi + + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + + movq %r12,8(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + + leaq 24(%rbp),%rbp + addq %r14,%r10 + movq 16(%rsi),%r12 + movq %rcx,%r13 + movq %r10,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rdx,%r15 + + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + + movq %r12,16(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + + leaq 8(%rbp),%rbp + addq %r14,%r9 + movq 24(%rsi),%r12 + movq %rbx,%r13 + movq %r9,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rcx,%rdi + + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + + movq %r12,24(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + + leaq 24(%rbp),%rbp + addq %r14,%r8 + movq 32(%rsi),%r12 + movq %rax,%r13 + movq %r8,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rbx,%r15 + + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + + movq %r12,32(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + + leaq 8(%rbp),%rbp + addq %r14,%rdx + movq 40(%rsi),%r12 + movq %r11,%r13 + movq %rdx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rax,%rdi + + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + + movq %r12,40(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + + leaq 24(%rbp),%rbp + addq %r14,%rcx + movq 48(%rsi),%r12 + movq %r10,%r13 + movq %rcx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r11,%r15 + + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + + movq %r12,48(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + + leaq 8(%rbp),%rbp + addq %r14,%rbx + movq 56(%rsi),%r12 + movq %r9,%r13 + movq %rbx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r10,%rdi + + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + + movq %r12,56(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + + leaq 24(%rbp),%rbp + addq %r14,%rax + movq 64(%rsi),%r12 + movq %r8,%r13 + movq %rax,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r9,%r15 + + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + + movq %r12,64(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + + leaq 8(%rbp),%rbp + addq %r14,%r11 + movq 72(%rsi),%r12 + movq %rdx,%r13 + movq %r11,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r8,%rdi + + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + + movq %r12,72(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + + leaq 24(%rbp),%rbp + addq %r14,%r10 + movq 80(%rsi),%r12 + movq %rcx,%r13 + movq %r10,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rdx,%r15 + + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + + movq %r12,80(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + + leaq 8(%rbp),%rbp + addq %r14,%r9 + movq 88(%rsi),%r12 + movq %rbx,%r13 + movq %r9,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rcx,%rdi + + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + + movq %r12,88(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + + leaq 24(%rbp),%rbp + addq %r14,%r8 + movq 96(%rsi),%r12 + movq %rax,%r13 + movq %r8,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rbx,%r15 + + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + + movq %r12,96(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + + leaq 8(%rbp),%rbp + addq %r14,%rdx + movq 104(%rsi),%r12 + movq %r11,%r13 + movq %rdx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rax,%rdi + + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + + movq %r12,104(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + + leaq 24(%rbp),%rbp + addq %r14,%rcx + movq 112(%rsi),%r12 + movq %r10,%r13 + movq %rcx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r11,%r15 + + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + + movq %r12,112(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + + leaq 8(%rbp),%rbp + addq %r14,%rbx + movq 120(%rsi),%r12 + movq %r9,%r13 + movq %rbx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r10,%rdi + + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + + movq %r12,120(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + + leaq 24(%rbp),%rbp jmp L$rounds_16_xx .p2align 4 L$rounds_16_xx: - - movl 56(%rsp),%r14d - - movl %r13d,%r12d - rorl $11,%r13d - movl %r14d,%r15d - rorl $2,%r14d - - xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r15d,%r14d - shrl $10,%r15d - - xorl %r13d,%r12d - rorl $17,%r14d - addl 36(%rsp),%r12d - xorl %r15d,%r14d - - addl 0(%rsp),%r12d - movl %r8d,%r13d - addl %r14d,%r12d - movl %eax,%r14d - rorl $14,%r13d - movl %r9d,%r15d - - rorl $9,%r14d - xorl %r8d,%r13d - xorl %r10d,%r15d - - movl %r12d,0(%rsp) - xorl %eax,%r14d - andl %r8d,%r15d - - rorl $5,%r13d - addl %r11d,%r12d - xorl %r10d,%r15d - - rorl $11,%r14d - xorl %r8d,%r13d - addl %r15d,%r12d - - movl %eax,%r15d - addl (%rbp),%r12d - xorl %eax,%r14d - - rorl $6,%r13d - xorl %ebx,%r15d - movl %ebx,%r11d - - rorl $2,%r14d - andl %r15d,%edi - addl %r13d,%r12d - - xorl %edi,%r11d - addl %r12d,%edx - addl %r12d,%r11d - movl 8(%rsp),%r13d - leaq 4(%rbp),%rbp - addl %r14d,%r11d - - - movl 60(%rsp),%r14d - - movl %r13d,%r12d - rorl $11,%r13d - movl %r14d,%edi - rorl $2,%r14d - - xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %edi,%r14d - shrl $10,%edi - - xorl %r13d,%r12d - rorl $17,%r14d - addl 40(%rsp),%r12d - xorl %edi,%r14d - - addl 4(%rsp),%r12d - movl %edx,%r13d - addl %r14d,%r12d - movl %r11d,%r14d - rorl $14,%r13d - movl %r8d,%edi - - rorl $9,%r14d - xorl %edx,%r13d - xorl %r9d,%edi - - movl %r12d,4(%rsp) - xorl %r11d,%r14d - andl %edx,%edi - - rorl $5,%r13d - addl %r10d,%r12d - xorl %r9d,%edi - - rorl $11,%r14d - xorl %edx,%r13d - addl %edi,%r12d - - movl %r11d,%edi - addl (%rbp),%r12d - xorl %r11d,%r14d - - rorl $6,%r13d - xorl %eax,%edi - movl %eax,%r10d - - rorl $2,%r14d - andl %edi,%r15d - addl %r13d,%r12d - - xorl %r15d,%r10d - addl %r12d,%ecx - addl %r12d,%r10d - movl 12(%rsp),%r13d - leaq 4(%rbp),%rbp - addl %r14d,%r10d - - - movl 0(%rsp),%r14d - - movl %r13d,%r12d - rorl $11,%r13d - movl %r14d,%r15d - rorl $2,%r14d - - xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r15d,%r14d - shrl $10,%r15d - - xorl %r13d,%r12d - rorl $17,%r14d - addl 44(%rsp),%r12d - xorl %r15d,%r14d - - addl 8(%rsp),%r12d - movl %ecx,%r13d - addl %r14d,%r12d - movl %r10d,%r14d - rorl $14,%r13d - movl %edx,%r15d - - rorl $9,%r14d - xorl %ecx,%r13d - xorl %r8d,%r15d - - movl %r12d,8(%rsp) - xorl %r10d,%r14d - andl %ecx,%r15d - - rorl $5,%r13d - addl %r9d,%r12d - xorl %r8d,%r15d - - rorl $11,%r14d - xorl %ecx,%r13d - addl %r15d,%r12d - - movl %r10d,%r15d - addl (%rbp),%r12d - xorl %r10d,%r14d - - rorl $6,%r13d - xorl %r11d,%r15d - movl %r11d,%r9d - - rorl $2,%r14d - andl %r15d,%edi - addl %r13d,%r12d - - xorl %edi,%r9d - addl %r12d,%ebx - addl %r12d,%r9d - movl 16(%rsp),%r13d - leaq 4(%rbp),%rbp - addl %r14d,%r9d - - - movl 4(%rsp),%r14d - - movl %r13d,%r12d - rorl $11,%r13d - movl %r14d,%edi - rorl $2,%r14d - - xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %edi,%r14d - shrl $10,%edi - - xorl %r13d,%r12d - rorl $17,%r14d - addl 48(%rsp),%r12d - xorl %edi,%r14d - - addl 12(%rsp),%r12d - movl %ebx,%r13d - addl %r14d,%r12d - movl %r9d,%r14d - rorl $14,%r13d - movl %ecx,%edi - - rorl $9,%r14d - xorl %ebx,%r13d - xorl %edx,%edi - - movl %r12d,12(%rsp) - xorl %r9d,%r14d - andl %ebx,%edi - - rorl $5,%r13d - addl %r8d,%r12d - xorl %edx,%edi - - rorl $11,%r14d - xorl %ebx,%r13d - addl %edi,%r12d - - movl %r9d,%edi - addl (%rbp),%r12d - xorl %r9d,%r14d - - rorl $6,%r13d - xorl %r10d,%edi - movl %r10d,%r8d - - rorl $2,%r14d - andl %edi,%r15d - addl %r13d,%r12d - - xorl %r15d,%r8d - addl %r12d,%eax - addl %r12d,%r8d - movl 20(%rsp),%r13d - leaq 20(%rbp),%rbp - addl %r14d,%r8d - - - movl 8(%rsp),%r14d - - movl %r13d,%r12d - rorl $11,%r13d - movl %r14d,%r15d - rorl $2,%r14d - - xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r15d,%r14d - shrl $10,%r15d - - xorl %r13d,%r12d - rorl $17,%r14d - addl 52(%rsp),%r12d - xorl %r15d,%r14d - - addl 16(%rsp),%r12d - movl %eax,%r13d - addl %r14d,%r12d - movl %r8d,%r14d - rorl $14,%r13d - movl %ebx,%r15d - - rorl $9,%r14d - xorl %eax,%r13d - xorl %ecx,%r15d - - movl %r12d,16(%rsp) - xorl %r8d,%r14d - andl %eax,%r15d - - rorl $5,%r13d - addl %edx,%r12d - xorl %ecx,%r15d - - rorl $11,%r14d - xorl %eax,%r13d - addl %r15d,%r12d - - movl %r8d,%r15d - addl (%rbp),%r12d - xorl %r8d,%r14d - - rorl $6,%r13d - xorl %r9d,%r15d - movl %r9d,%edx - - rorl $2,%r14d - andl %r15d,%edi - addl %r13d,%r12d - - xorl %edi,%edx - addl %r12d,%r11d - addl %r12d,%edx - movl 24(%rsp),%r13d - leaq 4(%rbp),%rbp - addl %r14d,%edx - - - movl 12(%rsp),%r14d - - movl %r13d,%r12d - rorl $11,%r13d - movl %r14d,%edi - rorl $2,%r14d - - xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %edi,%r14d - shrl $10,%edi - - xorl %r13d,%r12d - rorl $17,%r14d - addl 56(%rsp),%r12d - xorl %edi,%r14d - - addl 20(%rsp),%r12d - movl %r11d,%r13d - addl %r14d,%r12d - movl %edx,%r14d - rorl $14,%r13d - movl %eax,%edi - - rorl $9,%r14d - xorl %r11d,%r13d - xorl %ebx,%edi - - movl %r12d,20(%rsp) - xorl %edx,%r14d - andl %r11d,%edi - - rorl $5,%r13d - addl %ecx,%r12d - xorl %ebx,%edi - - rorl $11,%r14d - xorl %r11d,%r13d - addl %edi,%r12d - - movl %edx,%edi - addl (%rbp),%r12d - xorl %edx,%r14d - - rorl $6,%r13d - xorl %r8d,%edi - movl %r8d,%ecx - - rorl $2,%r14d - andl %edi,%r15d - addl %r13d,%r12d - - xorl %r15d,%ecx - addl %r12d,%r10d - addl %r12d,%ecx - movl 28(%rsp),%r13d - leaq 4(%rbp),%rbp - addl %r14d,%ecx - - - movl 16(%rsp),%r14d - - movl %r13d,%r12d - rorl $11,%r13d - movl %r14d,%r15d - rorl $2,%r14d - - xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r15d,%r14d - shrl $10,%r15d - - xorl %r13d,%r12d - rorl $17,%r14d - addl 60(%rsp),%r12d - xorl %r15d,%r14d - - addl 24(%rsp),%r12d - movl %r10d,%r13d - addl %r14d,%r12d - movl %ecx,%r14d - rorl $14,%r13d - movl %r11d,%r15d - - rorl $9,%r14d - xorl %r10d,%r13d - xorl %eax,%r15d - - movl %r12d,24(%rsp) - xorl %ecx,%r14d - andl %r10d,%r15d - - rorl $5,%r13d - addl %ebx,%r12d - xorl %eax,%r15d - - rorl $11,%r14d - xorl %r10d,%r13d - addl %r15d,%r12d - - movl %ecx,%r15d - addl (%rbp),%r12d - xorl %ecx,%r14d - - rorl $6,%r13d - xorl %edx,%r15d - movl %edx,%ebx - - rorl $2,%r14d - andl %r15d,%edi - addl %r13d,%r12d - - xorl %edi,%ebx - addl %r12d,%r9d - addl %r12d,%ebx - movl 32(%rsp),%r13d - leaq 4(%rbp),%rbp - addl %r14d,%ebx - - - movl 20(%rsp),%r14d - - movl %r13d,%r12d - rorl $11,%r13d - movl %r14d,%edi - rorl $2,%r14d - - xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %edi,%r14d - shrl $10,%edi - - xorl %r13d,%r12d - rorl $17,%r14d - addl 0(%rsp),%r12d - xorl %edi,%r14d - - addl 28(%rsp),%r12d - movl %r9d,%r13d - addl %r14d,%r12d - movl %ebx,%r14d - rorl $14,%r13d - movl %r10d,%edi - - rorl $9,%r14d - xorl %r9d,%r13d - xorl %r11d,%edi - - movl %r12d,28(%rsp) - xorl %ebx,%r14d - andl %r9d,%edi - - rorl $5,%r13d - addl %eax,%r12d - xorl %r11d,%edi - - rorl $11,%r14d - xorl %r9d,%r13d - addl %edi,%r12d - - movl %ebx,%edi - addl (%rbp),%r12d - xorl %ebx,%r14d - - rorl $6,%r13d - xorl %ecx,%edi - movl %ecx,%eax - - rorl $2,%r14d - andl %edi,%r15d - addl %r13d,%r12d - - xorl %r15d,%eax - addl %r12d,%r8d - addl %r12d,%eax - movl 36(%rsp),%r13d - leaq 20(%rbp),%rbp - addl %r14d,%eax - - - movl 24(%rsp),%r14d - - movl %r13d,%r12d - rorl $11,%r13d - movl %r14d,%r15d - rorl $2,%r14d - - xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r15d,%r14d - shrl $10,%r15d - - xorl %r13d,%r12d - rorl $17,%r14d - addl 4(%rsp),%r12d - xorl %r15d,%r14d - - addl 32(%rsp),%r12d - movl %r8d,%r13d - addl %r14d,%r12d - movl %eax,%r14d - rorl $14,%r13d - movl %r9d,%r15d - - rorl $9,%r14d - xorl %r8d,%r13d - xorl %r10d,%r15d - - movl %r12d,32(%rsp) - xorl %eax,%r14d - andl %r8d,%r15d - - rorl $5,%r13d - addl %r11d,%r12d - xorl %r10d,%r15d - - rorl $11,%r14d - xorl %r8d,%r13d - addl %r15d,%r12d - - movl %eax,%r15d - addl (%rbp),%r12d - xorl %eax,%r14d - - rorl $6,%r13d - xorl %ebx,%r15d - movl %ebx,%r11d - - rorl $2,%r14d - andl %r15d,%edi - addl %r13d,%r12d - - xorl %edi,%r11d - addl %r12d,%edx - addl %r12d,%r11d - movl 40(%rsp),%r13d - leaq 4(%rbp),%rbp - addl %r14d,%r11d - - - movl 28(%rsp),%r14d - - movl %r13d,%r12d - rorl $11,%r13d - movl %r14d,%edi - rorl $2,%r14d - - xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %edi,%r14d - shrl $10,%edi - - xorl %r13d,%r12d - rorl $17,%r14d - addl 8(%rsp),%r12d - xorl %edi,%r14d - - addl 36(%rsp),%r12d - movl %edx,%r13d - addl %r14d,%r12d - movl %r11d,%r14d - rorl $14,%r13d - movl %r8d,%edi - - rorl $9,%r14d - xorl %edx,%r13d - xorl %r9d,%edi - - movl %r12d,36(%rsp) - xorl %r11d,%r14d - andl %edx,%edi - - rorl $5,%r13d - addl %r10d,%r12d - xorl %r9d,%edi - - rorl $11,%r14d - xorl %edx,%r13d - addl %edi,%r12d - - movl %r11d,%edi - addl (%rbp),%r12d - xorl %r11d,%r14d - - rorl $6,%r13d - xorl %eax,%edi - movl %eax,%r10d - - rorl $2,%r14d - andl %edi,%r15d - addl %r13d,%r12d - - xorl %r15d,%r10d - addl %r12d,%ecx - addl %r12d,%r10d - movl 44(%rsp),%r13d - leaq 4(%rbp),%rbp - addl %r14d,%r10d - - - movl 32(%rsp),%r14d - - movl %r13d,%r12d - rorl $11,%r13d - movl %r14d,%r15d - rorl $2,%r14d - - xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r15d,%r14d - shrl $10,%r15d - - xorl %r13d,%r12d - rorl $17,%r14d - addl 12(%rsp),%r12d - xorl %r15d,%r14d - - addl 40(%rsp),%r12d - movl %ecx,%r13d - addl %r14d,%r12d - movl %r10d,%r14d - rorl $14,%r13d - movl %edx,%r15d - - rorl $9,%r14d - xorl %ecx,%r13d - xorl %r8d,%r15d - - movl %r12d,40(%rsp) - xorl %r10d,%r14d - andl %ecx,%r15d - - rorl $5,%r13d - addl %r9d,%r12d - xorl %r8d,%r15d - - rorl $11,%r14d - xorl %ecx,%r13d - addl %r15d,%r12d - - movl %r10d,%r15d - addl (%rbp),%r12d - xorl %r10d,%r14d - - rorl $6,%r13d - xorl %r11d,%r15d - movl %r11d,%r9d - - rorl $2,%r14d - andl %r15d,%edi - addl %r13d,%r12d - - xorl %edi,%r9d - addl %r12d,%ebx - addl %r12d,%r9d - movl 48(%rsp),%r13d - leaq 4(%rbp),%rbp - addl %r14d,%r9d - - - movl 36(%rsp),%r14d - - movl %r13d,%r12d - rorl $11,%r13d - movl %r14d,%edi - rorl $2,%r14d - - xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %edi,%r14d - shrl $10,%edi - - xorl %r13d,%r12d - rorl $17,%r14d - addl 16(%rsp),%r12d - xorl %edi,%r14d - - addl 44(%rsp),%r12d - movl %ebx,%r13d - addl %r14d,%r12d - movl %r9d,%r14d - rorl $14,%r13d - movl %ecx,%edi - - rorl $9,%r14d - xorl %ebx,%r13d - xorl %edx,%edi - - movl %r12d,44(%rsp) - xorl %r9d,%r14d - andl %ebx,%edi - - rorl $5,%r13d - addl %r8d,%r12d - xorl %edx,%edi - - rorl $11,%r14d - xorl %ebx,%r13d - addl %edi,%r12d - - movl %r9d,%edi - addl (%rbp),%r12d - xorl %r9d,%r14d - - rorl $6,%r13d - xorl %r10d,%edi - movl %r10d,%r8d - - rorl $2,%r14d - andl %edi,%r15d - addl %r13d,%r12d - - xorl %r15d,%r8d - addl %r12d,%eax - addl %r12d,%r8d - movl 52(%rsp),%r13d - leaq 20(%rbp),%rbp - addl %r14d,%r8d - - - movl 40(%rsp),%r14d - - movl %r13d,%r12d - rorl $11,%r13d - movl %r14d,%r15d - rorl $2,%r14d - - xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r15d,%r14d - shrl $10,%r15d - - xorl %r13d,%r12d - rorl $17,%r14d - addl 20(%rsp),%r12d - xorl %r15d,%r14d - - addl 48(%rsp),%r12d - movl %eax,%r13d - addl %r14d,%r12d - movl %r8d,%r14d - rorl $14,%r13d - movl %ebx,%r15d - - rorl $9,%r14d - xorl %eax,%r13d - xorl %ecx,%r15d - - movl %r12d,48(%rsp) - xorl %r8d,%r14d - andl %eax,%r15d - - rorl $5,%r13d - addl %edx,%r12d - xorl %ecx,%r15d - - rorl $11,%r14d - xorl %eax,%r13d - addl %r15d,%r12d - - movl %r8d,%r15d - addl (%rbp),%r12d - xorl %r8d,%r14d - - rorl $6,%r13d - xorl %r9d,%r15d - movl %r9d,%edx - - rorl $2,%r14d - andl %r15d,%edi - addl %r13d,%r12d - - xorl %edi,%edx - addl %r12d,%r11d - addl %r12d,%edx - movl 56(%rsp),%r13d - leaq 4(%rbp),%rbp - addl %r14d,%edx - - - movl 44(%rsp),%r14d - - movl %r13d,%r12d - rorl $11,%r13d - movl %r14d,%edi - rorl $2,%r14d - - xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %edi,%r14d - shrl $10,%edi - - xorl %r13d,%r12d - rorl $17,%r14d - addl 24(%rsp),%r12d - xorl %edi,%r14d - - addl 52(%rsp),%r12d - movl %r11d,%r13d - addl %r14d,%r12d - movl %edx,%r14d - rorl $14,%r13d - movl %eax,%edi - - rorl $9,%r14d - xorl %r11d,%r13d - xorl %ebx,%edi - - movl %r12d,52(%rsp) - xorl %edx,%r14d - andl %r11d,%edi - - rorl $5,%r13d - addl %ecx,%r12d - xorl %ebx,%edi - - rorl $11,%r14d - xorl %r11d,%r13d - addl %edi,%r12d - - movl %edx,%edi - addl (%rbp),%r12d - xorl %edx,%r14d - - rorl $6,%r13d - xorl %r8d,%edi - movl %r8d,%ecx - - rorl $2,%r14d - andl %edi,%r15d - addl %r13d,%r12d - - xorl %r15d,%ecx - addl %r12d,%r10d - addl %r12d,%ecx - movl 60(%rsp),%r13d - leaq 4(%rbp),%rbp - addl %r14d,%ecx - - - movl 48(%rsp),%r14d - - movl %r13d,%r12d - rorl $11,%r13d - movl %r14d,%r15d - rorl $2,%r14d - - xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %r15d,%r14d - shrl $10,%r15d - - xorl %r13d,%r12d - rorl $17,%r14d - addl 28(%rsp),%r12d - xorl %r15d,%r14d - - addl 56(%rsp),%r12d - movl %r10d,%r13d - addl %r14d,%r12d - movl %ecx,%r14d - rorl $14,%r13d - movl %r11d,%r15d - - rorl $9,%r14d - xorl %r10d,%r13d - xorl %eax,%r15d - - movl %r12d,56(%rsp) - xorl %ecx,%r14d - andl %r10d,%r15d - - rorl $5,%r13d - addl %ebx,%r12d - xorl %eax,%r15d - - rorl $11,%r14d - xorl %r10d,%r13d - addl %r15d,%r12d - - movl %ecx,%r15d - addl (%rbp),%r12d - xorl %ecx,%r14d - - rorl $6,%r13d - xorl %edx,%r15d - movl %edx,%ebx - - rorl $2,%r14d - andl %r15d,%edi - addl %r13d,%r12d - - xorl %edi,%ebx - addl %r12d,%r9d - addl %r12d,%ebx - movl 0(%rsp),%r13d - leaq 4(%rbp),%rbp - addl %r14d,%ebx - - - movl 52(%rsp),%r14d - - movl %r13d,%r12d - rorl $11,%r13d - movl %r14d,%edi - rorl $2,%r14d - - xorl %r12d,%r13d - shrl $3,%r12d - rorl $7,%r13d - xorl %edi,%r14d - shrl $10,%edi - - xorl %r13d,%r12d - rorl $17,%r14d - addl 32(%rsp),%r12d - xorl %edi,%r14d - - addl 60(%rsp),%r12d - movl %r9d,%r13d - addl %r14d,%r12d - movl %ebx,%r14d - rorl $14,%r13d - movl %r10d,%edi - - rorl $9,%r14d - xorl %r9d,%r13d - xorl %r11d,%edi - - movl %r12d,60(%rsp) - xorl %ebx,%r14d - andl %r9d,%edi - - rorl $5,%r13d - addl %eax,%r12d - xorl %r11d,%edi - - rorl $11,%r14d - xorl %r9d,%r13d - addl %edi,%r12d - - movl %ebx,%edi - addl (%rbp),%r12d - xorl %ebx,%r14d - - rorl $6,%r13d - xorl %ecx,%edi - movl %ecx,%eax - - rorl $2,%r14d - andl %edi,%r15d - addl %r13d,%r12d - - xorl %r15d,%eax - addl %r12d,%r8d - addl %r12d,%eax - movl 4(%rsp),%r13d - leaq 20(%rbp),%rbp - addl %r14d,%eax - - cmpb $0,3(%rbp) + movq 8(%rsp),%r13 + movq 112(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rax + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 72(%rsp),%r12 + + addq 0(%rsp),%r12 + movq %r8,%r13 + addq %r15,%r12 + movq %rax,%r14 + rorq $23,%r13 + movq %r9,%r15 + + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + + movq %r12,0(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + + leaq 8(%rbp),%rbp + movq 16(%rsp),%r13 + movq 120(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r11 + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 80(%rsp),%r12 + + addq 8(%rsp),%r12 + movq %rdx,%r13 + addq %rdi,%r12 + movq %r11,%r14 + rorq $23,%r13 + movq %r8,%rdi + + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + + movq %r12,8(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + + leaq 24(%rbp),%rbp + movq 24(%rsp),%r13 + movq 0(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r10 + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 88(%rsp),%r12 + + addq 16(%rsp),%r12 + movq %rcx,%r13 + addq %r15,%r12 + movq %r10,%r14 + rorq $23,%r13 + movq %rdx,%r15 + + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + + movq %r12,16(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + + leaq 8(%rbp),%rbp + movq 32(%rsp),%r13 + movq 8(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r9 + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 96(%rsp),%r12 + + addq 24(%rsp),%r12 + movq %rbx,%r13 + addq %rdi,%r12 + movq %r9,%r14 + rorq $23,%r13 + movq %rcx,%rdi + + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + + movq %r12,24(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + + leaq 24(%rbp),%rbp + movq 40(%rsp),%r13 + movq 16(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r8 + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 104(%rsp),%r12 + + addq 32(%rsp),%r12 + movq %rax,%r13 + addq %r15,%r12 + movq %r8,%r14 + rorq $23,%r13 + movq %rbx,%r15 + + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + + movq %r12,32(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + + leaq 8(%rbp),%rbp + movq 48(%rsp),%r13 + movq 24(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rdx + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 112(%rsp),%r12 + + addq 40(%rsp),%r12 + movq %r11,%r13 + addq %rdi,%r12 + movq %rdx,%r14 + rorq $23,%r13 + movq %rax,%rdi + + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + + movq %r12,40(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + + leaq 24(%rbp),%rbp + movq 56(%rsp),%r13 + movq 32(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rcx + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 120(%rsp),%r12 + + addq 48(%rsp),%r12 + movq %r10,%r13 + addq %r15,%r12 + movq %rcx,%r14 + rorq $23,%r13 + movq %r11,%r15 + + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + + movq %r12,48(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + + leaq 8(%rbp),%rbp + movq 64(%rsp),%r13 + movq 40(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rbx + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 0(%rsp),%r12 + + addq 56(%rsp),%r12 + movq %r9,%r13 + addq %rdi,%r12 + movq %rbx,%r14 + rorq $23,%r13 + movq %r10,%rdi + + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + + movq %r12,56(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + + leaq 24(%rbp),%rbp + movq 72(%rsp),%r13 + movq 48(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rax + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 8(%rsp),%r12 + + addq 64(%rsp),%r12 + movq %r8,%r13 + addq %r15,%r12 + movq %rax,%r14 + rorq $23,%r13 + movq %r9,%r15 + + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + + movq %r12,64(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + + leaq 8(%rbp),%rbp + movq 80(%rsp),%r13 + movq 56(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r11 + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 16(%rsp),%r12 + + addq 72(%rsp),%r12 + movq %rdx,%r13 + addq %rdi,%r12 + movq %r11,%r14 + rorq $23,%r13 + movq %r8,%rdi + + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + + movq %r12,72(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + + leaq 24(%rbp),%rbp + movq 88(%rsp),%r13 + movq 64(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r10 + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 24(%rsp),%r12 + + addq 80(%rsp),%r12 + movq %rcx,%r13 + addq %r15,%r12 + movq %r10,%r14 + rorq $23,%r13 + movq %rdx,%r15 + + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + + movq %r12,80(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + + leaq 8(%rbp),%rbp + movq 96(%rsp),%r13 + movq 72(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r9 + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 32(%rsp),%r12 + + addq 88(%rsp),%r12 + movq %rbx,%r13 + addq %rdi,%r12 + movq %r9,%r14 + rorq $23,%r13 + movq %rcx,%rdi + + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + + movq %r12,88(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + + leaq 24(%rbp),%rbp + movq 104(%rsp),%r13 + movq 80(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r8 + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 40(%rsp),%r12 + + addq 96(%rsp),%r12 + movq %rax,%r13 + addq %r15,%r12 + movq %r8,%r14 + rorq $23,%r13 + movq %rbx,%r15 + + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + + movq %r12,96(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + + leaq 8(%rbp),%rbp + movq 112(%rsp),%r13 + movq 88(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rdx + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 48(%rsp),%r12 + + addq 104(%rsp),%r12 + movq %r11,%r13 + addq %rdi,%r12 + movq %rdx,%r14 + rorq $23,%r13 + movq %rax,%rdi + + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + + movq %r12,104(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + + leaq 24(%rbp),%rbp + movq 120(%rsp),%r13 + movq 96(%rsp),%r15 + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rcx + movq %r15,%r14 + rorq $42,%r15 + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 56(%rsp),%r12 + + addq 112(%rsp),%r12 + movq %r10,%r13 + addq %r15,%r12 + movq %rcx,%r14 + rorq $23,%r13 + movq %r11,%r15 + + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + + movq %r12,112(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + + leaq 8(%rbp),%rbp + movq 0(%rsp),%r13 + movq 104(%rsp),%rdi + + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rbx + movq %rdi,%r14 + rorq $42,%rdi + + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 64(%rsp),%r12 + + addq 120(%rsp),%r12 + movq %r9,%r13 + addq %rdi,%r12 + movq %rbx,%r14 + rorq $23,%r13 + movq %r10,%rdi + + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + + movq %r12,120(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + + leaq 24(%rbp),%rbp + cmpb $0,7(%rbp) jnz L$rounds_16_xx - movq 64+0(%rsp),%rdi - leaq 64(%rsi),%rsi - - addl 0(%rdi),%eax - addl 4(%rdi),%ebx - addl 8(%rdi),%ecx - addl 12(%rdi),%edx - addl 16(%rdi),%r8d - addl 20(%rdi),%r9d - addl 24(%rdi),%r10d - addl 28(%rdi),%r11d - - cmpq 64+16(%rsp),%rsi - - movl %eax,0(%rdi) - movl %ebx,4(%rdi) - movl %ecx,8(%rdi) - movl %edx,12(%rdi) - movl %r8d,16(%rdi) - movl %r9d,20(%rdi) - movl %r10d,24(%rdi) - movl %r11d,28(%rdi) + movq 128+0(%rsp),%rdi + addq %r14,%rax + leaq 128(%rsi),%rsi + + addq 0(%rdi),%rax + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + + cmpq 128+16(%rsp),%rsi + + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) jb L$loop - movq 64+24(%rsp),%rsi - movq (%rsi),%r15 - movq 8(%rsi),%r14 - movq 16(%rsi),%r13 - movq 24(%rsi),%r12 - movq 32(%rsi),%rbp - movq 40(%rsi),%rbx - leaq 48(%rsi),%rsp + movq 152(%rsp),%rsi + + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbp + + movq -8(%rsi),%rbx + + leaq (%rsi),%rsp + L$epilogue: .byte 0xf3,0xc3 + .p2align 6 -K256: -.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 -.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 -.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 -.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 -.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 -.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 -.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 -.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 -.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc -.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc -.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da -.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da -.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 -.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 -.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 -.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 -.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 -.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 -.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 -.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 -.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 -.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 -.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 -.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 -.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 -.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 -.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 -.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 -.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 -.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 -.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 -.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 - -.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f -.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f -.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff -.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff -.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 -.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 -.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +K512: +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0x06ca6351e003826f,0x142929670a0e6e70 +.quad 0x06ca6351e003826f,0x142929670a0e6e70 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 +.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + +.quad 0x0001020304050607,0x08090a0b0c0d0e0f +.quad 0x0001020304050607,0x08090a0b0c0d0e0f +.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .p2align 6 -sha256_block_data_order_ssse3: -L$ssse3_shortcut: +sha512_block_data_order_xop: + +L$xop_shortcut: + movq %rsp,%rax + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 - movq %rsp,%r11 + shlq $4,%rdx - subq $96,%rsp - leaq (%rsi,%rdx,4),%rdx + subq $160,%rsp + leaq (%rsi,%rdx,8),%rdx andq $-64,%rsp - movq %rdi,64+0(%rsp) - movq %rsi,64+8(%rsp) - movq %rdx,64+16(%rsp) - movq %r11,64+24(%rsp) -L$prologue_ssse3: - - movl 0(%rdi),%eax - movl 4(%rdi),%ebx - movl 8(%rdi),%ecx - movl 12(%rdi),%edx - movl 16(%rdi),%r8d - movl 20(%rdi),%r9d - movl 24(%rdi),%r10d - movl 28(%rdi),%r11d - movdqa K256+512+32(%rip),%xmm8 - movdqa K256+512+64(%rip),%xmm9 - jmp L$loop_ssse3 + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %rax,152(%rsp) + +L$prologue_xop: + + vzeroupper + movq 0(%rdi),%rax + movq 8(%rdi),%rbx + movq 16(%rdi),%rcx + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + jmp L$loop_xop .p2align 4 -L$loop_ssse3: - movdqa K256+512(%rip),%xmm7 - movdqu 0(%rsi),%xmm0 - movdqu 16(%rsi),%xmm1 - movdqu 32(%rsi),%xmm2 - movdqu 48(%rsi),%xmm3 -.byte 102,15,56,0,199 - leaq K256(%rip),%rbp -.byte 102,15,56,0,207 - movdqa 0(%rbp),%xmm4 -.byte 102,15,56,0,215 - movdqa 32(%rbp),%xmm5 - paddd %xmm0,%xmm4 - movdqa 64(%rbp),%xmm6 -.byte 102,15,56,0,223 - movdqa 96(%rbp),%xmm7 - paddd %xmm1,%xmm5 - paddd %xmm2,%xmm6 - paddd %xmm3,%xmm7 - movdqa %xmm4,0(%rsp) - movl %eax,%r14d - movdqa %xmm5,16(%rsp) - movl %ebx,%edi - movdqa %xmm6,32(%rsp) - xorl %ecx,%edi - movdqa %xmm7,48(%rsp) - movl %r8d,%r13d - jmp L$ssse3_00_47 +L$loop_xop: + vmovdqa K512+1280(%rip),%xmm11 + vmovdqu 0(%rsi),%xmm0 + leaq K512+128(%rip),%rbp + vmovdqu 16(%rsi),%xmm1 + vmovdqu 32(%rsi),%xmm2 + vpshufb %xmm11,%xmm0,%xmm0 + vmovdqu 48(%rsi),%xmm3 + vpshufb %xmm11,%xmm1,%xmm1 + vmovdqu 64(%rsi),%xmm4 + vpshufb %xmm11,%xmm2,%xmm2 + vmovdqu 80(%rsi),%xmm5 + vpshufb %xmm11,%xmm3,%xmm3 + vmovdqu 96(%rsi),%xmm6 + vpshufb %xmm11,%xmm4,%xmm4 + vmovdqu 112(%rsi),%xmm7 + vpshufb %xmm11,%xmm5,%xmm5 + vpaddq -128(%rbp),%xmm0,%xmm8 + vpshufb %xmm11,%xmm6,%xmm6 + vpaddq -96(%rbp),%xmm1,%xmm9 + vpshufb %xmm11,%xmm7,%xmm7 + vpaddq -64(%rbp),%xmm2,%xmm10 + vpaddq -32(%rbp),%xmm3,%xmm11 + vmovdqa %xmm8,0(%rsp) + vpaddq 0(%rbp),%xmm4,%xmm8 + vmovdqa %xmm9,16(%rsp) + vpaddq 32(%rbp),%xmm5,%xmm9 + vmovdqa %xmm10,32(%rsp) + vpaddq 64(%rbp),%xmm6,%xmm10 + vmovdqa %xmm11,48(%rsp) + vpaddq 96(%rbp),%xmm7,%xmm11 + vmovdqa %xmm8,64(%rsp) + movq %rax,%r14 + vmovdqa %xmm9,80(%rsp) + movq %rbx,%rdi + vmovdqa %xmm10,96(%rsp) + xorq %rcx,%rdi + vmovdqa %xmm11,112(%rsp) + movq %r8,%r13 + jmp L$xop_00_47 .p2align 4 -L$ssse3_00_47: - subq $-32*4,%rbp - rorl $14,%r13d - movl %r14d,%eax - movdqa %xmm1,%xmm4 - movl %r9d,%r12d - movdqa %xmm3,%xmm7 - xorl %r8d,%r13d - rorl $9,%r14d - xorl %r10d,%r12d -.byte 102,15,58,15,224,4 - rorl $5,%r13d - xorl %eax,%r14d -.byte 102,15,58,15,250,4 - andl %r8d,%r12d - xorl %r8d,%r13d - addl 0(%rsp),%r11d - movl %eax,%r15d - rorl $11,%r14d - xorl %r10d,%r12d - movdqa %xmm4,%xmm5 - xorl %ebx,%r15d - movdqa %xmm4,%xmm6 - rorl $6,%r13d - addl %r12d,%r11d - andl %r15d,%edi - psrld $3,%xmm4 - xorl %eax,%r14d - addl %r13d,%r11d - xorl %ebx,%edi - paddd %xmm7,%xmm0 - addl %r11d,%edx - rorl $2,%r14d - addl %edi,%r11d - psrld $7,%xmm6 - movl %edx,%r13d - addl %r11d,%r14d - rorl $14,%r13d - movl %r14d,%r11d - pshufd $250,%xmm3,%xmm7 - movl %r8d,%r12d - pslld $14,%xmm5 - xorl %edx,%r13d - pxor %xmm6,%xmm4 - rorl $9,%r14d - xorl %r9d,%r12d - psrld $11,%xmm6 - rorl $5,%r13d - xorl %r11d,%r14d - pxor %xmm5,%xmm4 - andl %edx,%r12d - xorl %edx,%r13d - pslld $11,%xmm5 - addl 4(%rsp),%r10d - pxor %xmm6,%xmm4 - movl %r11d,%edi - rorl $11,%r14d - xorl %r9d,%r12d - movdqa %xmm7,%xmm6 - xorl %eax,%edi - rorl $6,%r13d - addl %r12d,%r10d - pxor %xmm5,%xmm4 - andl %edi,%r15d - xorl %r11d,%r14d - psrld $10,%xmm7 - addl %r13d,%r10d - xorl %eax,%r15d - paddd %xmm4,%xmm0 - addl %r10d,%ecx - rorl $2,%r14d - addl %r15d,%r10d - movl %ecx,%r13d - psrlq $17,%xmm6 - addl %r10d,%r14d - rorl $14,%r13d - movl %r14d,%r10d - movl %edx,%r12d - pxor %xmm6,%xmm7 - xorl %ecx,%r13d - rorl $9,%r14d - psrlq $2,%xmm6 - xorl %r8d,%r12d - rorl $5,%r13d - xorl %r10d,%r14d - pxor %xmm6,%xmm7 - andl %ecx,%r12d - xorl %ecx,%r13d - addl 8(%rsp),%r9d -.byte 102,65,15,56,0,248 - movl %r10d,%r15d - rorl $11,%r14d - xorl %r8d,%r12d - xorl %r11d,%r15d - rorl $6,%r13d - paddd %xmm7,%xmm0 - addl %r12d,%r9d - pshufd $80,%xmm0,%xmm7 - andl %r15d,%edi - xorl %r10d,%r14d - addl %r13d,%r9d - xorl %r11d,%edi - movdqa %xmm7,%xmm6 - addl %r9d,%ebx - rorl $2,%r14d - addl %edi,%r9d - psrld $10,%xmm7 - movl %ebx,%r13d - psrlq $17,%xmm6 - addl %r9d,%r14d - rorl $14,%r13d - movl %r14d,%r9d - movl %ecx,%r12d - pxor %xmm6,%xmm7 - xorl %ebx,%r13d - rorl $9,%r14d - xorl %edx,%r12d - psrlq $2,%xmm6 - rorl $5,%r13d - xorl %r9d,%r14d - andl %ebx,%r12d - xorl %ebx,%r13d - pxor %xmm6,%xmm7 - addl 12(%rsp),%r8d - movl %r9d,%edi - movdqa 0(%rbp),%xmm6 - rorl $11,%r14d - xorl %edx,%r12d -.byte 102,65,15,56,0,249 - xorl %r10d,%edi - rorl $6,%r13d - addl %r12d,%r8d - andl %edi,%r15d - xorl %r9d,%r14d - paddd %xmm7,%xmm0 - addl %r13d,%r8d - xorl %r10d,%r15d - addl %r8d,%eax - paddd %xmm0,%xmm6 - rorl $2,%r14d - addl %r15d,%r8d - movl %eax,%r13d - addl %r8d,%r14d - movdqa %xmm6,0(%rsp) - rorl $14,%r13d - movl %r14d,%r8d - movdqa %xmm2,%xmm4 - movl %ebx,%r12d - movdqa %xmm0,%xmm7 - xorl %eax,%r13d - rorl $9,%r14d - xorl %ecx,%r12d -.byte 102,15,58,15,225,4 - rorl $5,%r13d - xorl %r8d,%r14d -.byte 102,15,58,15,251,4 - andl %eax,%r12d - xorl %eax,%r13d - addl 16(%rsp),%edx - movl %r8d,%r15d - rorl $11,%r14d - xorl %ecx,%r12d - movdqa %xmm4,%xmm5 - xorl %r9d,%r15d - movdqa %xmm4,%xmm6 - rorl $6,%r13d - addl %r12d,%edx - andl %r15d,%edi - psrld $3,%xmm4 - xorl %r8d,%r14d - addl %r13d,%edx - xorl %r9d,%edi - paddd %xmm7,%xmm1 - addl %edx,%r11d - rorl $2,%r14d - addl %edi,%edx - psrld $7,%xmm6 - movl %r11d,%r13d - addl %edx,%r14d - rorl $14,%r13d - movl %r14d,%edx - pshufd $250,%xmm0,%xmm7 - movl %eax,%r12d - pslld $14,%xmm5 - xorl %r11d,%r13d - pxor %xmm6,%xmm4 - rorl $9,%r14d - xorl %ebx,%r12d - psrld $11,%xmm6 - rorl $5,%r13d - xorl %edx,%r14d - pxor %xmm5,%xmm4 - andl %r11d,%r12d - xorl %r11d,%r13d - pslld $11,%xmm5 - addl 20(%rsp),%ecx - pxor %xmm6,%xmm4 - movl %edx,%edi - rorl $11,%r14d - xorl %ebx,%r12d - movdqa %xmm7,%xmm6 - xorl %r8d,%edi - rorl $6,%r13d - addl %r12d,%ecx - pxor %xmm5,%xmm4 - andl %edi,%r15d - xorl %edx,%r14d - psrld $10,%xmm7 - addl %r13d,%ecx - xorl %r8d,%r15d - paddd %xmm4,%xmm1 - addl %ecx,%r10d - rorl $2,%r14d - addl %r15d,%ecx - movl %r10d,%r13d - psrlq $17,%xmm6 - addl %ecx,%r14d - rorl $14,%r13d - movl %r14d,%ecx - movl %r11d,%r12d - pxor %xmm6,%xmm7 - xorl %r10d,%r13d - rorl $9,%r14d - psrlq $2,%xmm6 - xorl %eax,%r12d - rorl $5,%r13d - xorl %ecx,%r14d - pxor %xmm6,%xmm7 - andl %r10d,%r12d - xorl %r10d,%r13d - addl 24(%rsp),%ebx -.byte 102,65,15,56,0,248 - movl %ecx,%r15d - rorl $11,%r14d - xorl %eax,%r12d - xorl %edx,%r15d - rorl $6,%r13d - paddd %xmm7,%xmm1 - addl %r12d,%ebx - pshufd $80,%xmm1,%xmm7 - andl %r15d,%edi - xorl %ecx,%r14d - addl %r13d,%ebx - xorl %edx,%edi - movdqa %xmm7,%xmm6 - addl %ebx,%r9d - rorl $2,%r14d - addl %edi,%ebx - psrld $10,%xmm7 - movl %r9d,%r13d - psrlq $17,%xmm6 - addl %ebx,%r14d - rorl $14,%r13d - movl %r14d,%ebx - movl %r10d,%r12d - pxor %xmm6,%xmm7 - xorl %r9d,%r13d - rorl $9,%r14d - xorl %r11d,%r12d - psrlq $2,%xmm6 - rorl $5,%r13d - xorl %ebx,%r14d - andl %r9d,%r12d - xorl %r9d,%r13d - pxor %xmm6,%xmm7 - addl 28(%rsp),%eax - movl %ebx,%edi - movdqa 32(%rbp),%xmm6 - rorl $11,%r14d - xorl %r11d,%r12d -.byte 102,65,15,56,0,249 - xorl %ecx,%edi - rorl $6,%r13d - addl %r12d,%eax - andl %edi,%r15d - xorl %ebx,%r14d - paddd %xmm7,%xmm1 - addl %r13d,%eax - xorl %ecx,%r15d - addl %eax,%r8d - paddd %xmm1,%xmm6 - rorl $2,%r14d - addl %r15d,%eax - movl %r8d,%r13d - addl %eax,%r14d - movdqa %xmm6,16(%rsp) - rorl $14,%r13d - movl %r14d,%eax - movdqa %xmm3,%xmm4 - movl %r9d,%r12d - movdqa %xmm1,%xmm7 - xorl %r8d,%r13d - rorl $9,%r14d - xorl %r10d,%r12d -.byte 102,15,58,15,226,4 - rorl $5,%r13d - xorl %eax,%r14d -.byte 102,15,58,15,248,4 - andl %r8d,%r12d - xorl %r8d,%r13d - addl 32(%rsp),%r11d - movl %eax,%r15d - rorl $11,%r14d - xorl %r10d,%r12d - movdqa %xmm4,%xmm5 - xorl %ebx,%r15d - movdqa %xmm4,%xmm6 - rorl $6,%r13d - addl %r12d,%r11d - andl %r15d,%edi - psrld $3,%xmm4 - xorl %eax,%r14d - addl %r13d,%r11d - xorl %ebx,%edi - paddd %xmm7,%xmm2 - addl %r11d,%edx - rorl $2,%r14d - addl %edi,%r11d - psrld $7,%xmm6 - movl %edx,%r13d - addl %r11d,%r14d - rorl $14,%r13d - movl %r14d,%r11d - pshufd $250,%xmm1,%xmm7 - movl %r8d,%r12d - pslld $14,%xmm5 - xorl %edx,%r13d - pxor %xmm6,%xmm4 - rorl $9,%r14d - xorl %r9d,%r12d - psrld $11,%xmm6 - rorl $5,%r13d - xorl %r11d,%r14d - pxor %xmm5,%xmm4 - andl %edx,%r12d - xorl %edx,%r13d - pslld $11,%xmm5 - addl 36(%rsp),%r10d - pxor %xmm6,%xmm4 - movl %r11d,%edi - rorl $11,%r14d - xorl %r9d,%r12d - movdqa %xmm7,%xmm6 - xorl %eax,%edi - rorl $6,%r13d - addl %r12d,%r10d - pxor %xmm5,%xmm4 - andl %edi,%r15d - xorl %r11d,%r14d - psrld $10,%xmm7 - addl %r13d,%r10d - xorl %eax,%r15d - paddd %xmm4,%xmm2 - addl %r10d,%ecx - rorl $2,%r14d - addl %r15d,%r10d - movl %ecx,%r13d - psrlq $17,%xmm6 - addl %r10d,%r14d - rorl $14,%r13d - movl %r14d,%r10d - movl %edx,%r12d - pxor %xmm6,%xmm7 - xorl %ecx,%r13d - rorl $9,%r14d - psrlq $2,%xmm6 - xorl %r8d,%r12d - rorl $5,%r13d - xorl %r10d,%r14d - pxor %xmm6,%xmm7 - andl %ecx,%r12d - xorl %ecx,%r13d - addl 40(%rsp),%r9d -.byte 102,65,15,56,0,248 - movl %r10d,%r15d - rorl $11,%r14d - xorl %r8d,%r12d - xorl %r11d,%r15d - rorl $6,%r13d - paddd %xmm7,%xmm2 - addl %r12d,%r9d - pshufd $80,%xmm2,%xmm7 - andl %r15d,%edi - xorl %r10d,%r14d - addl %r13d,%r9d - xorl %r11d,%edi - movdqa %xmm7,%xmm6 - addl %r9d,%ebx - rorl $2,%r14d - addl %edi,%r9d - psrld $10,%xmm7 - movl %ebx,%r13d - psrlq $17,%xmm6 - addl %r9d,%r14d - rorl $14,%r13d - movl %r14d,%r9d - movl %ecx,%r12d - pxor %xmm6,%xmm7 - xorl %ebx,%r13d - rorl $9,%r14d - xorl %edx,%r12d - psrlq $2,%xmm6 - rorl $5,%r13d - xorl %r9d,%r14d - andl %ebx,%r12d - xorl %ebx,%r13d - pxor %xmm6,%xmm7 - addl 44(%rsp),%r8d - movl %r9d,%edi - movdqa 64(%rbp),%xmm6 - rorl $11,%r14d - xorl %edx,%r12d -.byte 102,65,15,56,0,249 - xorl %r10d,%edi - rorl $6,%r13d - addl %r12d,%r8d - andl %edi,%r15d - xorl %r9d,%r14d - paddd %xmm7,%xmm2 - addl %r13d,%r8d - xorl %r10d,%r15d - addl %r8d,%eax - paddd %xmm2,%xmm6 - rorl $2,%r14d - addl %r15d,%r8d - movl %eax,%r13d - addl %r8d,%r14d - movdqa %xmm6,32(%rsp) - rorl $14,%r13d - movl %r14d,%r8d - movdqa %xmm0,%xmm4 - movl %ebx,%r12d - movdqa %xmm2,%xmm7 - xorl %eax,%r13d - rorl $9,%r14d - xorl %ecx,%r12d -.byte 102,15,58,15,227,4 - rorl $5,%r13d - xorl %r8d,%r14d -.byte 102,15,58,15,249,4 - andl %eax,%r12d - xorl %eax,%r13d - addl 48(%rsp),%edx - movl %r8d,%r15d - rorl $11,%r14d - xorl %ecx,%r12d - movdqa %xmm4,%xmm5 - xorl %r9d,%r15d - movdqa %xmm4,%xmm6 - rorl $6,%r13d - addl %r12d,%edx - andl %r15d,%edi - psrld $3,%xmm4 - xorl %r8d,%r14d - addl %r13d,%edx - xorl %r9d,%edi - paddd %xmm7,%xmm3 - addl %edx,%r11d - rorl $2,%r14d - addl %edi,%edx - psrld $7,%xmm6 - movl %r11d,%r13d - addl %edx,%r14d - rorl $14,%r13d - movl %r14d,%edx - pshufd $250,%xmm2,%xmm7 - movl %eax,%r12d - pslld $14,%xmm5 - xorl %r11d,%r13d - pxor %xmm6,%xmm4 - rorl $9,%r14d - xorl %ebx,%r12d - psrld $11,%xmm6 - rorl $5,%r13d - xorl %edx,%r14d - pxor %xmm5,%xmm4 - andl %r11d,%r12d - xorl %r11d,%r13d - pslld $11,%xmm5 - addl 52(%rsp),%ecx - pxor %xmm6,%xmm4 - movl %edx,%edi - rorl $11,%r14d - xorl %ebx,%r12d - movdqa %xmm7,%xmm6 - xorl %r8d,%edi - rorl $6,%r13d - addl %r12d,%ecx - pxor %xmm5,%xmm4 - andl %edi,%r15d - xorl %edx,%r14d - psrld $10,%xmm7 - addl %r13d,%ecx - xorl %r8d,%r15d - paddd %xmm4,%xmm3 - addl %ecx,%r10d - rorl $2,%r14d - addl %r15d,%ecx - movl %r10d,%r13d - psrlq $17,%xmm6 - addl %ecx,%r14d - rorl $14,%r13d - movl %r14d,%ecx - movl %r11d,%r12d - pxor %xmm6,%xmm7 - xorl %r10d,%r13d - rorl $9,%r14d - psrlq $2,%xmm6 - xorl %eax,%r12d - rorl $5,%r13d - xorl %ecx,%r14d - pxor %xmm6,%xmm7 - andl %r10d,%r12d - xorl %r10d,%r13d - addl 56(%rsp),%ebx -.byte 102,65,15,56,0,248 - movl %ecx,%r15d - rorl $11,%r14d - xorl %eax,%r12d - xorl %edx,%r15d - rorl $6,%r13d - paddd %xmm7,%xmm3 - addl %r12d,%ebx - pshufd $80,%xmm3,%xmm7 - andl %r15d,%edi - xorl %ecx,%r14d - addl %r13d,%ebx - xorl %edx,%edi - movdqa %xmm7,%xmm6 - addl %ebx,%r9d - rorl $2,%r14d - addl %edi,%ebx - psrld $10,%xmm7 - movl %r9d,%r13d - psrlq $17,%xmm6 - addl %ebx,%r14d - rorl $14,%r13d - movl %r14d,%ebx - movl %r10d,%r12d - pxor %xmm6,%xmm7 - xorl %r9d,%r13d - rorl $9,%r14d - xorl %r11d,%r12d - psrlq $2,%xmm6 - rorl $5,%r13d - xorl %ebx,%r14d - andl %r9d,%r12d - xorl %r9d,%r13d - pxor %xmm6,%xmm7 - addl 60(%rsp),%eax - movl %ebx,%edi - movdqa 96(%rbp),%xmm6 - rorl $11,%r14d - xorl %r11d,%r12d -.byte 102,65,15,56,0,249 - xorl %ecx,%edi - rorl $6,%r13d - addl %r12d,%eax - andl %edi,%r15d - xorl %ebx,%r14d - paddd %xmm7,%xmm3 - addl %r13d,%eax - xorl %ecx,%r15d - addl %eax,%r8d - paddd %xmm3,%xmm6 - rorl $2,%r14d - addl %r15d,%eax - movl %r8d,%r13d - addl %eax,%r14d - movdqa %xmm6,48(%rsp) - cmpb $0,131(%rbp) - jne L$ssse3_00_47 - rorl $14,%r13d - movl %r14d,%eax - movl %r9d,%r12d - xorl %r8d,%r13d - rorl $9,%r14d - xorl %r10d,%r12d - rorl $5,%r13d - xorl %eax,%r14d - andl %r8d,%r12d - xorl %r8d,%r13d - addl 0(%rsp),%r11d - movl %eax,%r15d - rorl $11,%r14d - xorl %r10d,%r12d - xorl %ebx,%r15d - rorl $6,%r13d - addl %r12d,%r11d - andl %r15d,%edi - xorl %eax,%r14d - addl %r13d,%r11d - xorl %ebx,%edi - addl %r11d,%edx - rorl $2,%r14d - addl %edi,%r11d - movl %edx,%r13d - addl %r11d,%r14d - rorl $14,%r13d - movl %r14d,%r11d - movl %r8d,%r12d - xorl %edx,%r13d - rorl $9,%r14d - xorl %r9d,%r12d - rorl $5,%r13d - xorl %r11d,%r14d - andl %edx,%r12d - xorl %edx,%r13d - addl 4(%rsp),%r10d - movl %r11d,%edi - rorl $11,%r14d - xorl %r9d,%r12d - xorl %eax,%edi - rorl $6,%r13d - addl %r12d,%r10d - andl %edi,%r15d - xorl %r11d,%r14d - addl %r13d,%r10d - xorl %eax,%r15d - addl %r10d,%ecx - rorl $2,%r14d - addl %r15d,%r10d - movl %ecx,%r13d - addl %r10d,%r14d - rorl $14,%r13d - movl %r14d,%r10d - movl %edx,%r12d - xorl %ecx,%r13d - rorl $9,%r14d - xorl %r8d,%r12d - rorl $5,%r13d - xorl %r10d,%r14d - andl %ecx,%r12d - xorl %ecx,%r13d - addl 8(%rsp),%r9d - movl %r10d,%r15d - rorl $11,%r14d - xorl %r8d,%r12d - xorl %r11d,%r15d - rorl $6,%r13d - addl %r12d,%r9d - andl %r15d,%edi - xorl %r10d,%r14d - addl %r13d,%r9d - xorl %r11d,%edi - addl %r9d,%ebx - rorl $2,%r14d - addl %edi,%r9d - movl %ebx,%r13d - addl %r9d,%r14d - rorl $14,%r13d - movl %r14d,%r9d - movl %ecx,%r12d - xorl %ebx,%r13d - rorl $9,%r14d - xorl %edx,%r12d - rorl $5,%r13d - xorl %r9d,%r14d - andl %ebx,%r12d - xorl %ebx,%r13d - addl 12(%rsp),%r8d - movl %r9d,%edi - rorl $11,%r14d - xorl %edx,%r12d - xorl %r10d,%edi - rorl $6,%r13d - addl %r12d,%r8d - andl %edi,%r15d - xorl %r9d,%r14d - addl %r13d,%r8d - xorl %r10d,%r15d - addl %r8d,%eax - rorl $2,%r14d - addl %r15d,%r8d - movl %eax,%r13d - addl %r8d,%r14d - rorl $14,%r13d - movl %r14d,%r8d - movl %ebx,%r12d - xorl %eax,%r13d - rorl $9,%r14d - xorl %ecx,%r12d - rorl $5,%r13d - xorl %r8d,%r14d - andl %eax,%r12d - xorl %eax,%r13d - addl 16(%rsp),%edx - movl %r8d,%r15d - rorl $11,%r14d - xorl %ecx,%r12d - xorl %r9d,%r15d - rorl $6,%r13d - addl %r12d,%edx - andl %r15d,%edi - xorl %r8d,%r14d - addl %r13d,%edx - xorl %r9d,%edi - addl %edx,%r11d - rorl $2,%r14d - addl %edi,%edx - movl %r11d,%r13d - addl %edx,%r14d - rorl $14,%r13d - movl %r14d,%edx - movl %eax,%r12d - xorl %r11d,%r13d - rorl $9,%r14d - xorl %ebx,%r12d - rorl $5,%r13d - xorl %edx,%r14d - andl %r11d,%r12d - xorl %r11d,%r13d - addl 20(%rsp),%ecx - movl %edx,%edi - rorl $11,%r14d - xorl %ebx,%r12d - xorl %r8d,%edi - rorl $6,%r13d - addl %r12d,%ecx - andl %edi,%r15d - xorl %edx,%r14d - addl %r13d,%ecx - xorl %r8d,%r15d - addl %ecx,%r10d - rorl $2,%r14d - addl %r15d,%ecx - movl %r10d,%r13d - addl %ecx,%r14d - rorl $14,%r13d - movl %r14d,%ecx - movl %r11d,%r12d - xorl %r10d,%r13d - rorl $9,%r14d - xorl %eax,%r12d - rorl $5,%r13d - xorl %ecx,%r14d - andl %r10d,%r12d - xorl %r10d,%r13d - addl 24(%rsp),%ebx - movl %ecx,%r15d - rorl $11,%r14d - xorl %eax,%r12d - xorl %edx,%r15d - rorl $6,%r13d - addl %r12d,%ebx - andl %r15d,%edi - xorl %ecx,%r14d - addl %r13d,%ebx - xorl %edx,%edi - addl %ebx,%r9d - rorl $2,%r14d - addl %edi,%ebx - movl %r9d,%r13d - addl %ebx,%r14d - rorl $14,%r13d - movl %r14d,%ebx - movl %r10d,%r12d - xorl %r9d,%r13d - rorl $9,%r14d - xorl %r11d,%r12d - rorl $5,%r13d - xorl %ebx,%r14d - andl %r9d,%r12d - xorl %r9d,%r13d - addl 28(%rsp),%eax - movl %ebx,%edi - rorl $11,%r14d - xorl %r11d,%r12d - xorl %ecx,%edi - rorl $6,%r13d - addl %r12d,%eax - andl %edi,%r15d - xorl %ebx,%r14d - addl %r13d,%eax - xorl %ecx,%r15d - addl %eax,%r8d - rorl $2,%r14d - addl %r15d,%eax - movl %r8d,%r13d - addl %eax,%r14d - rorl $14,%r13d - movl %r14d,%eax - movl %r9d,%r12d - xorl %r8d,%r13d - rorl $9,%r14d - xorl %r10d,%r12d - rorl $5,%r13d - xorl %eax,%r14d - andl %r8d,%r12d - xorl %r8d,%r13d - addl 32(%rsp),%r11d - movl %eax,%r15d - rorl $11,%r14d - xorl %r10d,%r12d - xorl %ebx,%r15d - rorl $6,%r13d - addl %r12d,%r11d - andl %r15d,%edi - xorl %eax,%r14d - addl %r13d,%r11d - xorl %ebx,%edi - addl %r11d,%edx - rorl $2,%r14d - addl %edi,%r11d - movl %edx,%r13d - addl %r11d,%r14d - rorl $14,%r13d - movl %r14d,%r11d - movl %r8d,%r12d - xorl %edx,%r13d - rorl $9,%r14d - xorl %r9d,%r12d - rorl $5,%r13d - xorl %r11d,%r14d - andl %edx,%r12d - xorl %edx,%r13d - addl 36(%rsp),%r10d - movl %r11d,%edi - rorl $11,%r14d - xorl %r9d,%r12d - xorl %eax,%edi - rorl $6,%r13d - addl %r12d,%r10d - andl %edi,%r15d - xorl %r11d,%r14d - addl %r13d,%r10d - xorl %eax,%r15d - addl %r10d,%ecx - rorl $2,%r14d - addl %r15d,%r10d - movl %ecx,%r13d - addl %r10d,%r14d - rorl $14,%r13d - movl %r14d,%r10d - movl %edx,%r12d - xorl %ecx,%r13d - rorl $9,%r14d - xorl %r8d,%r12d - rorl $5,%r13d - xorl %r10d,%r14d - andl %ecx,%r12d - xorl %ecx,%r13d - addl 40(%rsp),%r9d - movl %r10d,%r15d - rorl $11,%r14d - xorl %r8d,%r12d - xorl %r11d,%r15d - rorl $6,%r13d - addl %r12d,%r9d - andl %r15d,%edi - xorl %r10d,%r14d - addl %r13d,%r9d - xorl %r11d,%edi - addl %r9d,%ebx - rorl $2,%r14d - addl %edi,%r9d - movl %ebx,%r13d - addl %r9d,%r14d - rorl $14,%r13d - movl %r14d,%r9d - movl %ecx,%r12d - xorl %ebx,%r13d - rorl $9,%r14d - xorl %edx,%r12d - rorl $5,%r13d - xorl %r9d,%r14d - andl %ebx,%r12d - xorl %ebx,%r13d - addl 44(%rsp),%r8d - movl %r9d,%edi - rorl $11,%r14d - xorl %edx,%r12d - xorl %r10d,%edi - rorl $6,%r13d - addl %r12d,%r8d - andl %edi,%r15d - xorl %r9d,%r14d - addl %r13d,%r8d - xorl %r10d,%r15d - addl %r8d,%eax - rorl $2,%r14d - addl %r15d,%r8d - movl %eax,%r13d - addl %r8d,%r14d - rorl $14,%r13d - movl %r14d,%r8d - movl %ebx,%r12d - xorl %eax,%r13d - rorl $9,%r14d - xorl %ecx,%r12d - rorl $5,%r13d - xorl %r8d,%r14d - andl %eax,%r12d - xorl %eax,%r13d - addl 48(%rsp),%edx - movl %r8d,%r15d - rorl $11,%r14d - xorl %ecx,%r12d - xorl %r9d,%r15d - rorl $6,%r13d - addl %r12d,%edx - andl %r15d,%edi - xorl %r8d,%r14d - addl %r13d,%edx - xorl %r9d,%edi - addl %edx,%r11d - rorl $2,%r14d - addl %edi,%edx - movl %r11d,%r13d - addl %edx,%r14d - rorl $14,%r13d - movl %r14d,%edx - movl %eax,%r12d - xorl %r11d,%r13d - rorl $9,%r14d - xorl %ebx,%r12d - rorl $5,%r13d - xorl %edx,%r14d - andl %r11d,%r12d - xorl %r11d,%r13d - addl 52(%rsp),%ecx - movl %edx,%edi - rorl $11,%r14d - xorl %ebx,%r12d - xorl %r8d,%edi - rorl $6,%r13d - addl %r12d,%ecx - andl %edi,%r15d - xorl %edx,%r14d - addl %r13d,%ecx - xorl %r8d,%r15d - addl %ecx,%r10d - rorl $2,%r14d - addl %r15d,%ecx - movl %r10d,%r13d - addl %ecx,%r14d - rorl $14,%r13d - movl %r14d,%ecx - movl %r11d,%r12d - xorl %r10d,%r13d - rorl $9,%r14d - xorl %eax,%r12d - rorl $5,%r13d - xorl %ecx,%r14d - andl %r10d,%r12d - xorl %r10d,%r13d - addl 56(%rsp),%ebx - movl %ecx,%r15d - rorl $11,%r14d - xorl %eax,%r12d - xorl %edx,%r15d - rorl $6,%r13d - addl %r12d,%ebx - andl %r15d,%edi - xorl %ecx,%r14d - addl %r13d,%ebx - xorl %edx,%edi - addl %ebx,%r9d - rorl $2,%r14d - addl %edi,%ebx - movl %r9d,%r13d - addl %ebx,%r14d - rorl $14,%r13d - movl %r14d,%ebx - movl %r10d,%r12d - xorl %r9d,%r13d - rorl $9,%r14d - xorl %r11d,%r12d - rorl $5,%r13d - xorl %ebx,%r14d - andl %r9d,%r12d - xorl %r9d,%r13d - addl 60(%rsp),%eax - movl %ebx,%edi - rorl $11,%r14d - xorl %r11d,%r12d - xorl %ecx,%edi - rorl $6,%r13d - addl %r12d,%eax - andl %edi,%r15d - xorl %ebx,%r14d - addl %r13d,%eax - xorl %ecx,%r15d - addl %eax,%r8d - rorl $2,%r14d - addl %r15d,%eax - movl %r8d,%r13d - addl %eax,%r14d - movq 64+0(%rsp),%rdi - movl %r14d,%eax - - addl 0(%rdi),%eax - leaq 64(%rsi),%rsi - addl 4(%rdi),%ebx - addl 8(%rdi),%ecx - addl 12(%rdi),%edx - addl 16(%rdi),%r8d - addl 20(%rdi),%r9d - addl 24(%rdi),%r10d - addl 28(%rdi),%r11d - - cmpq 64+16(%rsp),%rsi - - movl %eax,0(%rdi) - movl %ebx,4(%rdi) - movl %ecx,8(%rdi) - movl %edx,12(%rdi) - movl %r8d,16(%rdi) - movl %r9d,20(%rdi) - movl %r10d,24(%rdi) - movl %r11d,28(%rdi) - jb L$loop_ssse3 - - movq 64+24(%rsp),%rsi - movq (%rsi),%r15 - movq 8(%rsi),%r14 - movq 16(%rsi),%r13 - movq 24(%rsi),%r12 - movq 32(%rsi),%rbp - movq 40(%rsi),%rbx - leaq 48(%rsi),%rsp -L$epilogue_ssse3: +L$xop_00_47: + addq $256,%rbp + vpalignr $8,%xmm0,%xmm1,%xmm8 + rorq $23,%r13 + movq %r14,%rax + vpalignr $8,%xmm4,%xmm5,%xmm11 + movq %r9,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %r8,%r13 + xorq %r10,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %rax,%r14 + vpaddq %xmm11,%xmm0,%xmm0 + andq %r8,%r12 + xorq %r8,%r13 + addq 0(%rsp),%r11 + movq %rax,%r15 +.byte 143,72,120,195,209,7 + xorq %r10,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %rbx,%r15 + addq %r12,%r11 + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,223,3 + xorq %rax,%r14 + addq %r13,%r11 + vpxor %xmm10,%xmm8,%xmm8 + xorq %rbx,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm7,%xmm10 + addq %r11,%rdx + addq %rdi,%r11 + vpaddq %xmm8,%xmm0,%xmm0 + movq %rdx,%r13 + addq %r11,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%r11 + vpxor %xmm10,%xmm11,%xmm11 + movq %r8,%r12 + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + vpaddq %xmm11,%xmm0,%xmm0 + addq 8(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + rorq $6,%r14 + vpaddq -128(%rbp),%xmm0,%xmm10 + xorq %rax,%rdi + addq %r12,%r10 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + rorq $28,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + vmovdqa %xmm10,0(%rsp) + vpalignr $8,%xmm1,%xmm2,%xmm8 + rorq $23,%r13 + movq %r14,%r10 + vpalignr $8,%xmm5,%xmm6,%xmm11 + movq %rdx,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %rcx,%r13 + xorq %r8,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %r10,%r14 + vpaddq %xmm11,%xmm1,%xmm1 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 16(%rsp),%r9 + movq %r10,%r15 +.byte 143,72,120,195,209,7 + xorq %r8,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %r11,%r15 + addq %r12,%r9 + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,216,3 + xorq %r10,%r14 + addq %r13,%r9 + vpxor %xmm10,%xmm8,%xmm8 + xorq %r11,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm0,%xmm10 + addq %r9,%rbx + addq %rdi,%r9 + vpaddq %xmm8,%xmm1,%xmm1 + movq %rbx,%r13 + addq %r9,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%r9 + vpxor %xmm10,%xmm11,%xmm11 + movq %rcx,%r12 + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + vpaddq %xmm11,%xmm1,%xmm1 + addq 24(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + rorq $6,%r14 + vpaddq -96(%rbp),%xmm1,%xmm10 + xorq %r10,%rdi + addq %r12,%r8 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + rorq $28,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + vmovdqa %xmm10,16(%rsp) + vpalignr $8,%xmm2,%xmm3,%xmm8 + rorq $23,%r13 + movq %r14,%r8 + vpalignr $8,%xmm6,%xmm7,%xmm11 + movq %rbx,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %rax,%r13 + xorq %rcx,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %r8,%r14 + vpaddq %xmm11,%xmm2,%xmm2 + andq %rax,%r12 + xorq %rax,%r13 + addq 32(%rsp),%rdx + movq %r8,%r15 +.byte 143,72,120,195,209,7 + xorq %rcx,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %r9,%r15 + addq %r12,%rdx + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,217,3 + xorq %r8,%r14 + addq %r13,%rdx + vpxor %xmm10,%xmm8,%xmm8 + xorq %r9,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm1,%xmm10 + addq %rdx,%r11 + addq %rdi,%rdx + vpaddq %xmm8,%xmm2,%xmm2 + movq %r11,%r13 + addq %rdx,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%rdx + vpxor %xmm10,%xmm11,%xmm11 + movq %rax,%r12 + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + vpaddq %xmm11,%xmm2,%xmm2 + addq 40(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + rorq $6,%r14 + vpaddq -64(%rbp),%xmm2,%xmm10 + xorq %r8,%rdi + addq %r12,%rcx + rorq $14,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + rorq $28,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + vmovdqa %xmm10,32(%rsp) + vpalignr $8,%xmm3,%xmm4,%xmm8 + rorq $23,%r13 + movq %r14,%rcx + vpalignr $8,%xmm7,%xmm0,%xmm11 + movq %r11,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %r10,%r13 + xorq %rax,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %rcx,%r14 + vpaddq %xmm11,%xmm3,%xmm3 + andq %r10,%r12 + xorq %r10,%r13 + addq 48(%rsp),%rbx + movq %rcx,%r15 +.byte 143,72,120,195,209,7 + xorq %rax,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %rdx,%r15 + addq %r12,%rbx + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,218,3 + xorq %rcx,%r14 + addq %r13,%rbx + vpxor %xmm10,%xmm8,%xmm8 + xorq %rdx,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm2,%xmm10 + addq %rbx,%r9 + addq %rdi,%rbx + vpaddq %xmm8,%xmm3,%xmm3 + movq %r9,%r13 + addq %rbx,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%rbx + vpxor %xmm10,%xmm11,%xmm11 + movq %r10,%r12 + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + vpaddq %xmm11,%xmm3,%xmm3 + addq 56(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + rorq $6,%r14 + vpaddq -32(%rbp),%xmm3,%xmm10 + xorq %rcx,%rdi + addq %r12,%rax + rorq $14,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + rorq $28,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + vmovdqa %xmm10,48(%rsp) + vpalignr $8,%xmm4,%xmm5,%xmm8 + rorq $23,%r13 + movq %r14,%rax + vpalignr $8,%xmm0,%xmm1,%xmm11 + movq %r9,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %r8,%r13 + xorq %r10,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %rax,%r14 + vpaddq %xmm11,%xmm4,%xmm4 + andq %r8,%r12 + xorq %r8,%r13 + addq 64(%rsp),%r11 + movq %rax,%r15 +.byte 143,72,120,195,209,7 + xorq %r10,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %rbx,%r15 + addq %r12,%r11 + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,219,3 + xorq %rax,%r14 + addq %r13,%r11 + vpxor %xmm10,%xmm8,%xmm8 + xorq %rbx,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm3,%xmm10 + addq %r11,%rdx + addq %rdi,%r11 + vpaddq %xmm8,%xmm4,%xmm4 + movq %rdx,%r13 + addq %r11,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%r11 + vpxor %xmm10,%xmm11,%xmm11 + movq %r8,%r12 + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + vpaddq %xmm11,%xmm4,%xmm4 + addq 72(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + rorq $6,%r14 + vpaddq 0(%rbp),%xmm4,%xmm10 + xorq %rax,%rdi + addq %r12,%r10 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + rorq $28,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + vmovdqa %xmm10,64(%rsp) + vpalignr $8,%xmm5,%xmm6,%xmm8 + rorq $23,%r13 + movq %r14,%r10 + vpalignr $8,%xmm1,%xmm2,%xmm11 + movq %rdx,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %rcx,%r13 + xorq %r8,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %r10,%r14 + vpaddq %xmm11,%xmm5,%xmm5 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 80(%rsp),%r9 + movq %r10,%r15 +.byte 143,72,120,195,209,7 + xorq %r8,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %r11,%r15 + addq %r12,%r9 + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,220,3 + xorq %r10,%r14 + addq %r13,%r9 + vpxor %xmm10,%xmm8,%xmm8 + xorq %r11,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm4,%xmm10 + addq %r9,%rbx + addq %rdi,%r9 + vpaddq %xmm8,%xmm5,%xmm5 + movq %rbx,%r13 + addq %r9,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%r9 + vpxor %xmm10,%xmm11,%xmm11 + movq %rcx,%r12 + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + vpaddq %xmm11,%xmm5,%xmm5 + addq 88(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + rorq $6,%r14 + vpaddq 32(%rbp),%xmm5,%xmm10 + xorq %r10,%rdi + addq %r12,%r8 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + rorq $28,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + vmovdqa %xmm10,80(%rsp) + vpalignr $8,%xmm6,%xmm7,%xmm8 + rorq $23,%r13 + movq %r14,%r8 + vpalignr $8,%xmm2,%xmm3,%xmm11 + movq %rbx,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %rax,%r13 + xorq %rcx,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %r8,%r14 + vpaddq %xmm11,%xmm6,%xmm6 + andq %rax,%r12 + xorq %rax,%r13 + addq 96(%rsp),%rdx + movq %r8,%r15 +.byte 143,72,120,195,209,7 + xorq %rcx,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %r9,%r15 + addq %r12,%rdx + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,221,3 + xorq %r8,%r14 + addq %r13,%rdx + vpxor %xmm10,%xmm8,%xmm8 + xorq %r9,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm5,%xmm10 + addq %rdx,%r11 + addq %rdi,%rdx + vpaddq %xmm8,%xmm6,%xmm6 + movq %r11,%r13 + addq %rdx,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%rdx + vpxor %xmm10,%xmm11,%xmm11 + movq %rax,%r12 + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + vpaddq %xmm11,%xmm6,%xmm6 + addq 104(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + rorq $6,%r14 + vpaddq 64(%rbp),%xmm6,%xmm10 + xorq %r8,%rdi + addq %r12,%rcx + rorq $14,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + rorq $28,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + vmovdqa %xmm10,96(%rsp) + vpalignr $8,%xmm7,%xmm0,%xmm8 + rorq $23,%r13 + movq %r14,%rcx + vpalignr $8,%xmm3,%xmm4,%xmm11 + movq %r11,%r12 + rorq $5,%r14 +.byte 143,72,120,195,200,56 + xorq %r10,%r13 + xorq %rax,%r12 + vpsrlq $7,%xmm8,%xmm8 + rorq $4,%r13 + xorq %rcx,%r14 + vpaddq %xmm11,%xmm7,%xmm7 + andq %r10,%r12 + xorq %r10,%r13 + addq 112(%rsp),%rbx + movq %rcx,%r15 +.byte 143,72,120,195,209,7 + xorq %rax,%r12 + rorq $6,%r14 + vpxor %xmm9,%xmm8,%xmm8 + xorq %rdx,%r15 + addq %r12,%rbx + rorq $14,%r13 + andq %r15,%rdi +.byte 143,104,120,195,222,3 + xorq %rcx,%r14 + addq %r13,%rbx + vpxor %xmm10,%xmm8,%xmm8 + xorq %rdx,%rdi + rorq $28,%r14 + vpsrlq $6,%xmm6,%xmm10 + addq %rbx,%r9 + addq %rdi,%rbx + vpaddq %xmm8,%xmm7,%xmm7 + movq %r9,%r13 + addq %rbx,%r14 +.byte 143,72,120,195,203,42 + rorq $23,%r13 + movq %r14,%rbx + vpxor %xmm10,%xmm11,%xmm11 + movq %r10,%r12 + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + vpxor %xmm9,%xmm11,%xmm11 + rorq $4,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + vpaddq %xmm11,%xmm7,%xmm7 + addq 120(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + rorq $6,%r14 + vpaddq 96(%rbp),%xmm7,%xmm10 + xorq %rcx,%rdi + addq %r12,%rax + rorq $14,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + rorq $28,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + vmovdqa %xmm10,112(%rsp) + cmpb $0,135(%rbp) + jne L$xop_00_47 + rorq $23,%r13 + movq %r14,%rax + movq %r9,%r12 + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r12 + rorq $4,%r13 + xorq %rax,%r14 + andq %r8,%r12 + xorq %r8,%r13 + addq 0(%rsp),%r11 + movq %rax,%r15 + xorq %r10,%r12 + rorq $6,%r14 + xorq %rbx,%r15 + addq %r12,%r11 + rorq $14,%r13 + andq %r15,%rdi + xorq %rax,%r14 + addq %r13,%r11 + xorq %rbx,%rdi + rorq $28,%r14 + addq %r11,%rdx + addq %rdi,%r11 + movq %rdx,%r13 + addq %r11,%r14 + rorq $23,%r13 + movq %r14,%r11 + movq %r8,%r12 + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + rorq $4,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + addq 8(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + rorq $6,%r14 + xorq %rax,%rdi + addq %r12,%r10 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + rorq $28,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + rorq $23,%r13 + movq %r14,%r10 + movq %rdx,%r12 + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r12 + rorq $4,%r13 + xorq %r10,%r14 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 16(%rsp),%r9 + movq %r10,%r15 + xorq %r8,%r12 + rorq $6,%r14 + xorq %r11,%r15 + addq %r12,%r9 + rorq $14,%r13 + andq %r15,%rdi + xorq %r10,%r14 + addq %r13,%r9 + xorq %r11,%rdi + rorq $28,%r14 + addq %r9,%rbx + addq %rdi,%r9 + movq %rbx,%r13 + addq %r9,%r14 + rorq $23,%r13 + movq %r14,%r9 + movq %rcx,%r12 + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + rorq $4,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + addq 24(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + rorq $6,%r14 + xorq %r10,%rdi + addq %r12,%r8 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + rorq $28,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + rorq $23,%r13 + movq %r14,%r8 + movq %rbx,%r12 + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r12 + rorq $4,%r13 + xorq %r8,%r14 + andq %rax,%r12 + xorq %rax,%r13 + addq 32(%rsp),%rdx + movq %r8,%r15 + xorq %rcx,%r12 + rorq $6,%r14 + xorq %r9,%r15 + addq %r12,%rdx + rorq $14,%r13 + andq %r15,%rdi + xorq %r8,%r14 + addq %r13,%rdx + xorq %r9,%rdi + rorq $28,%r14 + addq %rdx,%r11 + addq %rdi,%rdx + movq %r11,%r13 + addq %rdx,%r14 + rorq $23,%r13 + movq %r14,%rdx + movq %rax,%r12 + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + rorq $4,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + addq 40(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + rorq $6,%r14 + xorq %r8,%rdi + addq %r12,%rcx + rorq $14,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + rorq $28,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + rorq $23,%r13 + movq %r14,%rcx + movq %r11,%r12 + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r12 + rorq $4,%r13 + xorq %rcx,%r14 + andq %r10,%r12 + xorq %r10,%r13 + addq 48(%rsp),%rbx + movq %rcx,%r15 + xorq %rax,%r12 + rorq $6,%r14 + xorq %rdx,%r15 + addq %r12,%rbx + rorq $14,%r13 + andq %r15,%rdi + xorq %rcx,%r14 + addq %r13,%rbx + xorq %rdx,%rdi + rorq $28,%r14 + addq %rbx,%r9 + addq %rdi,%rbx + movq %r9,%r13 + addq %rbx,%r14 + rorq $23,%r13 + movq %r14,%rbx + movq %r10,%r12 + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + rorq $4,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + addq 56(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + rorq $6,%r14 + xorq %rcx,%rdi + addq %r12,%rax + rorq $14,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + rorq $28,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + rorq $23,%r13 + movq %r14,%rax + movq %r9,%r12 + rorq $5,%r14 + xorq %r8,%r13 + xorq %r10,%r12 + rorq $4,%r13 + xorq %rax,%r14 + andq %r8,%r12 + xorq %r8,%r13 + addq 64(%rsp),%r11 + movq %rax,%r15 + xorq %r10,%r12 + rorq $6,%r14 + xorq %rbx,%r15 + addq %r12,%r11 + rorq $14,%r13 + andq %r15,%rdi + xorq %rax,%r14 + addq %r13,%r11 + xorq %rbx,%rdi + rorq $28,%r14 + addq %r11,%rdx + addq %rdi,%r11 + movq %rdx,%r13 + addq %r11,%r14 + rorq $23,%r13 + movq %r14,%r11 + movq %r8,%r12 + rorq $5,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + rorq $4,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + addq 72(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + rorq $6,%r14 + xorq %rax,%rdi + addq %r12,%r10 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + rorq $28,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + rorq $23,%r13 + movq %r14,%r10 + movq %rdx,%r12 + rorq $5,%r14 + xorq %rcx,%r13 + xorq %r8,%r12 + rorq $4,%r13 + xorq %r10,%r14 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 80(%rsp),%r9 + movq %r10,%r15 + xorq %r8,%r12 + rorq $6,%r14 + xorq %r11,%r15 + addq %r12,%r9 + rorq $14,%r13 + andq %r15,%rdi + xorq %r10,%r14 + addq %r13,%r9 + xorq %r11,%rdi + rorq $28,%r14 + addq %r9,%rbx + addq %rdi,%r9 + movq %rbx,%r13 + addq %r9,%r14 + rorq $23,%r13 + movq %r14,%r9 + movq %rcx,%r12 + rorq $5,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + rorq $4,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + addq 88(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + rorq $6,%r14 + xorq %r10,%rdi + addq %r12,%r8 + rorq $14,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + rorq $28,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + rorq $23,%r13 + movq %r14,%r8 + movq %rbx,%r12 + rorq $5,%r14 + xorq %rax,%r13 + xorq %rcx,%r12 + rorq $4,%r13 + xorq %r8,%r14 + andq %rax,%r12 + xorq %rax,%r13 + addq 96(%rsp),%rdx + movq %r8,%r15 + xorq %rcx,%r12 + rorq $6,%r14 + xorq %r9,%r15 + addq %r12,%rdx + rorq $14,%r13 + andq %r15,%rdi + xorq %r8,%r14 + addq %r13,%rdx + xorq %r9,%rdi + rorq $28,%r14 + addq %rdx,%r11 + addq %rdi,%rdx + movq %r11,%r13 + addq %rdx,%r14 + rorq $23,%r13 + movq %r14,%rdx + movq %rax,%r12 + rorq $5,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + rorq $4,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + addq 104(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + rorq $6,%r14 + xorq %r8,%rdi + addq %r12,%rcx + rorq $14,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + rorq $28,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + rorq $23,%r13 + movq %r14,%rcx + movq %r11,%r12 + rorq $5,%r14 + xorq %r10,%r13 + xorq %rax,%r12 + rorq $4,%r13 + xorq %rcx,%r14 + andq %r10,%r12 + xorq %r10,%r13 + addq 112(%rsp),%rbx + movq %rcx,%r15 + xorq %rax,%r12 + rorq $6,%r14 + xorq %rdx,%r15 + addq %r12,%rbx + rorq $14,%r13 + andq %r15,%rdi + xorq %rcx,%r14 + addq %r13,%rbx + xorq %rdx,%rdi + rorq $28,%r14 + addq %rbx,%r9 + addq %rdi,%rbx + movq %r9,%r13 + addq %rbx,%r14 + rorq $23,%r13 + movq %r14,%rbx + movq %r10,%r12 + rorq $5,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + rorq $4,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + addq 120(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + rorq $6,%r14 + xorq %rcx,%rdi + addq %r12,%rax + rorq $14,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + rorq $28,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + movq 128+0(%rsp),%rdi + movq %r14,%rax + + addq 0(%rdi),%rax + leaq 128(%rsi),%rsi + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + + cmpq 128+16(%rsp),%rsi + + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + jb L$loop_xop + + movq 152(%rsp),%rsi + + vzeroupper + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbp + + movq -8(%rsi),%rbx + + leaq (%rsi),%rsp + +L$epilogue_xop: + .byte 0xf3,0xc3 + + + +.p2align 6 +sha512_block_data_order_avx: + +L$avx_shortcut: + movq %rsp,%rax + + pushq %rbx + + pushq %rbp + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + shlq $4,%rdx + subq $160,%rsp + leaq (%rsi,%rdx,8),%rdx + andq $-64,%rsp + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %rax,152(%rsp) + +L$prologue_avx: + + vzeroupper + movq 0(%rdi),%rax + movq 8(%rdi),%rbx + movq 16(%rdi),%rcx + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + jmp L$loop_avx +.p2align 4 +L$loop_avx: + vmovdqa K512+1280(%rip),%xmm11 + vmovdqu 0(%rsi),%xmm0 + leaq K512+128(%rip),%rbp + vmovdqu 16(%rsi),%xmm1 + vmovdqu 32(%rsi),%xmm2 + vpshufb %xmm11,%xmm0,%xmm0 + vmovdqu 48(%rsi),%xmm3 + vpshufb %xmm11,%xmm1,%xmm1 + vmovdqu 64(%rsi),%xmm4 + vpshufb %xmm11,%xmm2,%xmm2 + vmovdqu 80(%rsi),%xmm5 + vpshufb %xmm11,%xmm3,%xmm3 + vmovdqu 96(%rsi),%xmm6 + vpshufb %xmm11,%xmm4,%xmm4 + vmovdqu 112(%rsi),%xmm7 + vpshufb %xmm11,%xmm5,%xmm5 + vpaddq -128(%rbp),%xmm0,%xmm8 + vpshufb %xmm11,%xmm6,%xmm6 + vpaddq -96(%rbp),%xmm1,%xmm9 + vpshufb %xmm11,%xmm7,%xmm7 + vpaddq -64(%rbp),%xmm2,%xmm10 + vpaddq -32(%rbp),%xmm3,%xmm11 + vmovdqa %xmm8,0(%rsp) + vpaddq 0(%rbp),%xmm4,%xmm8 + vmovdqa %xmm9,16(%rsp) + vpaddq 32(%rbp),%xmm5,%xmm9 + vmovdqa %xmm10,32(%rsp) + vpaddq 64(%rbp),%xmm6,%xmm10 + vmovdqa %xmm11,48(%rsp) + vpaddq 96(%rbp),%xmm7,%xmm11 + vmovdqa %xmm8,64(%rsp) + movq %rax,%r14 + vmovdqa %xmm9,80(%rsp) + movq %rbx,%rdi + vmovdqa %xmm10,96(%rsp) + xorq %rcx,%rdi + vmovdqa %xmm11,112(%rsp) + movq %r8,%r13 + jmp L$avx_00_47 + +.p2align 4 +L$avx_00_47: + addq $256,%rbp + vpalignr $8,%xmm0,%xmm1,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%rax + vpalignr $8,%xmm4,%xmm5,%xmm11 + movq %r9,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %r8,%r13 + xorq %r10,%r12 + vpaddq %xmm11,%xmm0,%xmm0 + shrdq $4,%r13,%r13 + xorq %rax,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %r8,%r12 + xorq %r8,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 0(%rsp),%r11 + movq %rax,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %r10,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %rbx,%r15 + addq %r12,%r11 + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %rax,%r14 + addq %r13,%r11 + vpxor %xmm10,%xmm8,%xmm8 + xorq %rbx,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm7,%xmm11 + addq %r11,%rdx + addq %rdi,%r11 + vpxor %xmm9,%xmm8,%xmm8 + movq %rdx,%r13 + addq %r11,%r14 + vpsllq $3,%xmm7,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%r11 + vpaddq %xmm8,%xmm0,%xmm0 + movq %r8,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm7,%xmm9 + xorq %rdx,%r13 + xorq %r9,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %r11,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %rdx,%r12 + xorq %rdx,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 8(%rsp),%r10 + movq %r11,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %r9,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %rax,%rdi + addq %r12,%r10 + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm0,%xmm0 + xorq %r11,%r14 + addq %r13,%r10 + vpaddq -128(%rbp),%xmm0,%xmm10 + xorq %rax,%r15 + shrdq $28,%r14,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + vmovdqa %xmm10,0(%rsp) + vpalignr $8,%xmm1,%xmm2,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%r10 + vpalignr $8,%xmm5,%xmm6,%xmm11 + movq %rdx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %rcx,%r13 + xorq %r8,%r12 + vpaddq %xmm11,%xmm1,%xmm1 + shrdq $4,%r13,%r13 + xorq %r10,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %rcx,%r12 + xorq %rcx,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 16(%rsp),%r9 + movq %r10,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %r8,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %r11,%r15 + addq %r12,%r9 + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %r10,%r14 + addq %r13,%r9 + vpxor %xmm10,%xmm8,%xmm8 + xorq %r11,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm0,%xmm11 + addq %r9,%rbx + addq %rdi,%r9 + vpxor %xmm9,%xmm8,%xmm8 + movq %rbx,%r13 + addq %r9,%r14 + vpsllq $3,%xmm0,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%r9 + vpaddq %xmm8,%xmm1,%xmm1 + movq %rcx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm0,%xmm9 + xorq %rbx,%r13 + xorq %rdx,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %r9,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %rbx,%r12 + xorq %rbx,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 24(%rsp),%r8 + movq %r9,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %rdx,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %r10,%rdi + addq %r12,%r8 + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm1,%xmm1 + xorq %r9,%r14 + addq %r13,%r8 + vpaddq -96(%rbp),%xmm1,%xmm10 + xorq %r10,%r15 + shrdq $28,%r14,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + vmovdqa %xmm10,16(%rsp) + vpalignr $8,%xmm2,%xmm3,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%r8 + vpalignr $8,%xmm6,%xmm7,%xmm11 + movq %rbx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %rax,%r13 + xorq %rcx,%r12 + vpaddq %xmm11,%xmm2,%xmm2 + shrdq $4,%r13,%r13 + xorq %r8,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %rax,%r12 + xorq %rax,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 32(%rsp),%rdx + movq %r8,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %rcx,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %r9,%r15 + addq %r12,%rdx + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %r8,%r14 + addq %r13,%rdx + vpxor %xmm10,%xmm8,%xmm8 + xorq %r9,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm1,%xmm11 + addq %rdx,%r11 + addq %rdi,%rdx + vpxor %xmm9,%xmm8,%xmm8 + movq %r11,%r13 + addq %rdx,%r14 + vpsllq $3,%xmm1,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%rdx + vpaddq %xmm8,%xmm2,%xmm2 + movq %rax,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm1,%xmm9 + xorq %r11,%r13 + xorq %rbx,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %rdx,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %r11,%r12 + xorq %r11,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 40(%rsp),%rcx + movq %rdx,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %rbx,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %r8,%rdi + addq %r12,%rcx + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm2,%xmm2 + xorq %rdx,%r14 + addq %r13,%rcx + vpaddq -64(%rbp),%xmm2,%xmm10 + xorq %r8,%r15 + shrdq $28,%r14,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + vmovdqa %xmm10,32(%rsp) + vpalignr $8,%xmm3,%xmm4,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%rcx + vpalignr $8,%xmm7,%xmm0,%xmm11 + movq %r11,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %r10,%r13 + xorq %rax,%r12 + vpaddq %xmm11,%xmm3,%xmm3 + shrdq $4,%r13,%r13 + xorq %rcx,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %r10,%r12 + xorq %r10,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 48(%rsp),%rbx + movq %rcx,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %rax,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %rdx,%r15 + addq %r12,%rbx + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %rcx,%r14 + addq %r13,%rbx + vpxor %xmm10,%xmm8,%xmm8 + xorq %rdx,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm2,%xmm11 + addq %rbx,%r9 + addq %rdi,%rbx + vpxor %xmm9,%xmm8,%xmm8 + movq %r9,%r13 + addq %rbx,%r14 + vpsllq $3,%xmm2,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%rbx + vpaddq %xmm8,%xmm3,%xmm3 + movq %r10,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm2,%xmm9 + xorq %r9,%r13 + xorq %r11,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %rbx,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %r9,%r12 + xorq %r9,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 56(%rsp),%rax + movq %rbx,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %r11,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %rcx,%rdi + addq %r12,%rax + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm3,%xmm3 + xorq %rbx,%r14 + addq %r13,%rax + vpaddq -32(%rbp),%xmm3,%xmm10 + xorq %rcx,%r15 + shrdq $28,%r14,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + vmovdqa %xmm10,48(%rsp) + vpalignr $8,%xmm4,%xmm5,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%rax + vpalignr $8,%xmm0,%xmm1,%xmm11 + movq %r9,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %r8,%r13 + xorq %r10,%r12 + vpaddq %xmm11,%xmm4,%xmm4 + shrdq $4,%r13,%r13 + xorq %rax,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %r8,%r12 + xorq %r8,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 64(%rsp),%r11 + movq %rax,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %r10,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %rbx,%r15 + addq %r12,%r11 + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %rax,%r14 + addq %r13,%r11 + vpxor %xmm10,%xmm8,%xmm8 + xorq %rbx,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm3,%xmm11 + addq %r11,%rdx + addq %rdi,%r11 + vpxor %xmm9,%xmm8,%xmm8 + movq %rdx,%r13 + addq %r11,%r14 + vpsllq $3,%xmm3,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%r11 + vpaddq %xmm8,%xmm4,%xmm4 + movq %r8,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm3,%xmm9 + xorq %rdx,%r13 + xorq %r9,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %r11,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %rdx,%r12 + xorq %rdx,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 72(%rsp),%r10 + movq %r11,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %r9,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %rax,%rdi + addq %r12,%r10 + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm4,%xmm4 + xorq %r11,%r14 + addq %r13,%r10 + vpaddq 0(%rbp),%xmm4,%xmm10 + xorq %rax,%r15 + shrdq $28,%r14,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + vmovdqa %xmm10,64(%rsp) + vpalignr $8,%xmm5,%xmm6,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%r10 + vpalignr $8,%xmm1,%xmm2,%xmm11 + movq %rdx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %rcx,%r13 + xorq %r8,%r12 + vpaddq %xmm11,%xmm5,%xmm5 + shrdq $4,%r13,%r13 + xorq %r10,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %rcx,%r12 + xorq %rcx,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 80(%rsp),%r9 + movq %r10,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %r8,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %r11,%r15 + addq %r12,%r9 + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %r10,%r14 + addq %r13,%r9 + vpxor %xmm10,%xmm8,%xmm8 + xorq %r11,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm4,%xmm11 + addq %r9,%rbx + addq %rdi,%r9 + vpxor %xmm9,%xmm8,%xmm8 + movq %rbx,%r13 + addq %r9,%r14 + vpsllq $3,%xmm4,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%r9 + vpaddq %xmm8,%xmm5,%xmm5 + movq %rcx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm4,%xmm9 + xorq %rbx,%r13 + xorq %rdx,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %r9,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %rbx,%r12 + xorq %rbx,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 88(%rsp),%r8 + movq %r9,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %rdx,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %r10,%rdi + addq %r12,%r8 + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm5,%xmm5 + xorq %r9,%r14 + addq %r13,%r8 + vpaddq 32(%rbp),%xmm5,%xmm10 + xorq %r10,%r15 + shrdq $28,%r14,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + vmovdqa %xmm10,80(%rsp) + vpalignr $8,%xmm6,%xmm7,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%r8 + vpalignr $8,%xmm2,%xmm3,%xmm11 + movq %rbx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %rax,%r13 + xorq %rcx,%r12 + vpaddq %xmm11,%xmm6,%xmm6 + shrdq $4,%r13,%r13 + xorq %r8,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %rax,%r12 + xorq %rax,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 96(%rsp),%rdx + movq %r8,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %rcx,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %r9,%r15 + addq %r12,%rdx + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %r8,%r14 + addq %r13,%rdx + vpxor %xmm10,%xmm8,%xmm8 + xorq %r9,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm5,%xmm11 + addq %rdx,%r11 + addq %rdi,%rdx + vpxor %xmm9,%xmm8,%xmm8 + movq %r11,%r13 + addq %rdx,%r14 + vpsllq $3,%xmm5,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%rdx + vpaddq %xmm8,%xmm6,%xmm6 + movq %rax,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm5,%xmm9 + xorq %r11,%r13 + xorq %rbx,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %rdx,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %r11,%r12 + xorq %r11,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 104(%rsp),%rcx + movq %rdx,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %rbx,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %r8,%rdi + addq %r12,%rcx + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm6,%xmm6 + xorq %rdx,%r14 + addq %r13,%rcx + vpaddq 64(%rbp),%xmm6,%xmm10 + xorq %r8,%r15 + shrdq $28,%r14,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + vmovdqa %xmm10,96(%rsp) + vpalignr $8,%xmm7,%xmm0,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%rcx + vpalignr $8,%xmm3,%xmm4,%xmm11 + movq %r11,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %r10,%r13 + xorq %rax,%r12 + vpaddq %xmm11,%xmm7,%xmm7 + shrdq $4,%r13,%r13 + xorq %rcx,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %r10,%r12 + xorq %r10,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 112(%rsp),%rbx + movq %rcx,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %rax,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %rdx,%r15 + addq %r12,%rbx + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %rcx,%r14 + addq %r13,%rbx + vpxor %xmm10,%xmm8,%xmm8 + xorq %rdx,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm6,%xmm11 + addq %rbx,%r9 + addq %rdi,%rbx + vpxor %xmm9,%xmm8,%xmm8 + movq %r9,%r13 + addq %rbx,%r14 + vpsllq $3,%xmm6,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%rbx + vpaddq %xmm8,%xmm7,%xmm7 + movq %r10,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm6,%xmm9 + xorq %r9,%r13 + xorq %r11,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %rbx,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %r9,%r12 + xorq %r9,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 120(%rsp),%rax + movq %rbx,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %r11,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %rcx,%rdi + addq %r12,%rax + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm7,%xmm7 + xorq %rbx,%r14 + addq %r13,%rax + vpaddq 96(%rbp),%xmm7,%xmm10 + xorq %rcx,%r15 + shrdq $28,%r14,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + vmovdqa %xmm10,112(%rsp) + cmpb $0,135(%rbp) + jne L$avx_00_47 + shrdq $23,%r13,%r13 + movq %r14,%rax + movq %r9,%r12 + shrdq $5,%r14,%r14 + xorq %r8,%r13 + xorq %r10,%r12 + shrdq $4,%r13,%r13 + xorq %rax,%r14 + andq %r8,%r12 + xorq %r8,%r13 + addq 0(%rsp),%r11 + movq %rax,%r15 + xorq %r10,%r12 + shrdq $6,%r14,%r14 + xorq %rbx,%r15 + addq %r12,%r11 + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %rax,%r14 + addq %r13,%r11 + xorq %rbx,%rdi + shrdq $28,%r14,%r14 + addq %r11,%rdx + addq %rdi,%r11 + movq %rdx,%r13 + addq %r11,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r11 + movq %r8,%r12 + shrdq $5,%r14,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + shrdq $4,%r13,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + addq 8(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + shrdq $6,%r14,%r14 + xorq %rax,%rdi + addq %r12,%r10 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + shrdq $28,%r14,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r10 + movq %rdx,%r12 + shrdq $5,%r14,%r14 + xorq %rcx,%r13 + xorq %r8,%r12 + shrdq $4,%r13,%r13 + xorq %r10,%r14 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 16(%rsp),%r9 + movq %r10,%r15 + xorq %r8,%r12 + shrdq $6,%r14,%r14 + xorq %r11,%r15 + addq %r12,%r9 + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %r10,%r14 + addq %r13,%r9 + xorq %r11,%rdi + shrdq $28,%r14,%r14 + addq %r9,%rbx + addq %rdi,%r9 + movq %rbx,%r13 + addq %r9,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r9 + movq %rcx,%r12 + shrdq $5,%r14,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + shrdq $4,%r13,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + addq 24(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + shrdq $6,%r14,%r14 + xorq %r10,%rdi + addq %r12,%r8 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + shrdq $28,%r14,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r8 + movq %rbx,%r12 + shrdq $5,%r14,%r14 + xorq %rax,%r13 + xorq %rcx,%r12 + shrdq $4,%r13,%r13 + xorq %r8,%r14 + andq %rax,%r12 + xorq %rax,%r13 + addq 32(%rsp),%rdx + movq %r8,%r15 + xorq %rcx,%r12 + shrdq $6,%r14,%r14 + xorq %r9,%r15 + addq %r12,%rdx + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %r8,%r14 + addq %r13,%rdx + xorq %r9,%rdi + shrdq $28,%r14,%r14 + addq %rdx,%r11 + addq %rdi,%rdx + movq %r11,%r13 + addq %rdx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rdx + movq %rax,%r12 + shrdq $5,%r14,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + shrdq $4,%r13,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + addq 40(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + shrdq $6,%r14,%r14 + xorq %r8,%rdi + addq %r12,%rcx + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + shrdq $28,%r14,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rcx + movq %r11,%r12 + shrdq $5,%r14,%r14 + xorq %r10,%r13 + xorq %rax,%r12 + shrdq $4,%r13,%r13 + xorq %rcx,%r14 + andq %r10,%r12 + xorq %r10,%r13 + addq 48(%rsp),%rbx + movq %rcx,%r15 + xorq %rax,%r12 + shrdq $6,%r14,%r14 + xorq %rdx,%r15 + addq %r12,%rbx + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %rcx,%r14 + addq %r13,%rbx + xorq %rdx,%rdi + shrdq $28,%r14,%r14 + addq %rbx,%r9 + addq %rdi,%rbx + movq %r9,%r13 + addq %rbx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rbx + movq %r10,%r12 + shrdq $5,%r14,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + shrdq $4,%r13,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + addq 56(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + shrdq $6,%r14,%r14 + xorq %rcx,%rdi + addq %r12,%rax + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + shrdq $28,%r14,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rax + movq %r9,%r12 + shrdq $5,%r14,%r14 + xorq %r8,%r13 + xorq %r10,%r12 + shrdq $4,%r13,%r13 + xorq %rax,%r14 + andq %r8,%r12 + xorq %r8,%r13 + addq 64(%rsp),%r11 + movq %rax,%r15 + xorq %r10,%r12 + shrdq $6,%r14,%r14 + xorq %rbx,%r15 + addq %r12,%r11 + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %rax,%r14 + addq %r13,%r11 + xorq %rbx,%rdi + shrdq $28,%r14,%r14 + addq %r11,%rdx + addq %rdi,%r11 + movq %rdx,%r13 + addq %r11,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r11 + movq %r8,%r12 + shrdq $5,%r14,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + shrdq $4,%r13,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + addq 72(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + shrdq $6,%r14,%r14 + xorq %rax,%rdi + addq %r12,%r10 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + shrdq $28,%r14,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r10 + movq %rdx,%r12 + shrdq $5,%r14,%r14 + xorq %rcx,%r13 + xorq %r8,%r12 + shrdq $4,%r13,%r13 + xorq %r10,%r14 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 80(%rsp),%r9 + movq %r10,%r15 + xorq %r8,%r12 + shrdq $6,%r14,%r14 + xorq %r11,%r15 + addq %r12,%r9 + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %r10,%r14 + addq %r13,%r9 + xorq %r11,%rdi + shrdq $28,%r14,%r14 + addq %r9,%rbx + addq %rdi,%r9 + movq %rbx,%r13 + addq %r9,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r9 + movq %rcx,%r12 + shrdq $5,%r14,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + shrdq $4,%r13,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + addq 88(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + shrdq $6,%r14,%r14 + xorq %r10,%rdi + addq %r12,%r8 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + shrdq $28,%r14,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r8 + movq %rbx,%r12 + shrdq $5,%r14,%r14 + xorq %rax,%r13 + xorq %rcx,%r12 + shrdq $4,%r13,%r13 + xorq %r8,%r14 + andq %rax,%r12 + xorq %rax,%r13 + addq 96(%rsp),%rdx + movq %r8,%r15 + xorq %rcx,%r12 + shrdq $6,%r14,%r14 + xorq %r9,%r15 + addq %r12,%rdx + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %r8,%r14 + addq %r13,%rdx + xorq %r9,%rdi + shrdq $28,%r14,%r14 + addq %rdx,%r11 + addq %rdi,%rdx + movq %r11,%r13 + addq %rdx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rdx + movq %rax,%r12 + shrdq $5,%r14,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + shrdq $4,%r13,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + addq 104(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + shrdq $6,%r14,%r14 + xorq %r8,%rdi + addq %r12,%rcx + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + shrdq $28,%r14,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rcx + movq %r11,%r12 + shrdq $5,%r14,%r14 + xorq %r10,%r13 + xorq %rax,%r12 + shrdq $4,%r13,%r13 + xorq %rcx,%r14 + andq %r10,%r12 + xorq %r10,%r13 + addq 112(%rsp),%rbx + movq %rcx,%r15 + xorq %rax,%r12 + shrdq $6,%r14,%r14 + xorq %rdx,%r15 + addq %r12,%rbx + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %rcx,%r14 + addq %r13,%rbx + xorq %rdx,%rdi + shrdq $28,%r14,%r14 + addq %rbx,%r9 + addq %rdi,%rbx + movq %r9,%r13 + addq %rbx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rbx + movq %r10,%r12 + shrdq $5,%r14,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + shrdq $4,%r13,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + addq 120(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + shrdq $6,%r14,%r14 + xorq %rcx,%rdi + addq %r12,%rax + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + shrdq $28,%r14,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + movq 128+0(%rsp),%rdi + movq %r14,%rax + + addq 0(%rdi),%rax + leaq 128(%rsi),%rsi + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + + cmpq 128+16(%rsp),%rsi + + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + jb L$loop_avx + + movq 152(%rsp),%rsi + + vzeroupper + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbp + + movq -8(%rsi),%rbx + + leaq (%rsi),%rsp + +L$epilogue_avx: + .byte 0xf3,0xc3 + + + +.p2align 6 +sha512_block_data_order_avx2: + +L$avx2_shortcut: + movq %rsp,%rax + + pushq %rbx + + pushq %rbp + + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $1312,%rsp + shlq $4,%rdx + andq $-2048,%rsp + leaq (%rsi,%rdx,8),%rdx + addq $1152,%rsp + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %rax,152(%rsp) + +L$prologue_avx2: + + vzeroupper + subq $-128,%rsi + movq 0(%rdi),%rax + movq %rsi,%r12 + movq 8(%rdi),%rbx + cmpq %rdx,%rsi + movq 16(%rdi),%rcx + cmoveq %rsp,%r12 + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + jmp L$oop_avx2 +.p2align 4 +L$oop_avx2: + vmovdqu -128(%rsi),%xmm0 + vmovdqu -128+16(%rsi),%xmm1 + vmovdqu -128+32(%rsi),%xmm2 + leaq K512+128(%rip),%rbp + vmovdqu -128+48(%rsi),%xmm3 + vmovdqu -128+64(%rsi),%xmm4 + vmovdqu -128+80(%rsi),%xmm5 + vmovdqu -128+96(%rsi),%xmm6 + vmovdqu -128+112(%rsi),%xmm7 + + vmovdqa 1152(%rbp),%ymm10 + vinserti128 $1,(%r12),%ymm0,%ymm0 + vinserti128 $1,16(%r12),%ymm1,%ymm1 + vpshufb %ymm10,%ymm0,%ymm0 + vinserti128 $1,32(%r12),%ymm2,%ymm2 + vpshufb %ymm10,%ymm1,%ymm1 + vinserti128 $1,48(%r12),%ymm3,%ymm3 + vpshufb %ymm10,%ymm2,%ymm2 + vinserti128 $1,64(%r12),%ymm4,%ymm4 + vpshufb %ymm10,%ymm3,%ymm3 + vinserti128 $1,80(%r12),%ymm5,%ymm5 + vpshufb %ymm10,%ymm4,%ymm4 + vinserti128 $1,96(%r12),%ymm6,%ymm6 + vpshufb %ymm10,%ymm5,%ymm5 + vinserti128 $1,112(%r12),%ymm7,%ymm7 + + vpaddq -128(%rbp),%ymm0,%ymm8 + vpshufb %ymm10,%ymm6,%ymm6 + vpaddq -96(%rbp),%ymm1,%ymm9 + vpshufb %ymm10,%ymm7,%ymm7 + vpaddq -64(%rbp),%ymm2,%ymm10 + vpaddq -32(%rbp),%ymm3,%ymm11 + vmovdqa %ymm8,0(%rsp) + vpaddq 0(%rbp),%ymm4,%ymm8 + vmovdqa %ymm9,32(%rsp) + vpaddq 32(%rbp),%ymm5,%ymm9 + vmovdqa %ymm10,64(%rsp) + vpaddq 64(%rbp),%ymm6,%ymm10 + vmovdqa %ymm11,96(%rsp) + leaq -128(%rsp),%rsp + vpaddq 96(%rbp),%ymm7,%ymm11 + vmovdqa %ymm8,0(%rsp) + xorq %r14,%r14 + vmovdqa %ymm9,32(%rsp) + movq %rbx,%rdi + vmovdqa %ymm10,64(%rsp) + xorq %rcx,%rdi + vmovdqa %ymm11,96(%rsp) + movq %r9,%r12 + addq $32*8,%rbp + jmp L$avx2_00_47 + +.p2align 4 +L$avx2_00_47: + leaq -128(%rsp),%rsp + vpalignr $8,%ymm0,%ymm1,%ymm8 + addq 0+256(%rsp),%r11 + andq %r8,%r12 + rorxq $41,%r8,%r13 + vpalignr $8,%ymm4,%ymm5,%ymm11 + rorxq $18,%r8,%r15 + leaq (%rax,%r14,1),%rax + leaq (%r11,%r12,1),%r11 + vpsrlq $1,%ymm8,%ymm10 + andnq %r10,%r8,%r12 + xorq %r15,%r13 + rorxq $14,%r8,%r14 + vpaddq %ymm11,%ymm0,%ymm0 + vpsrlq $7,%ymm8,%ymm11 + leaq (%r11,%r12,1),%r11 + xorq %r14,%r13 + movq %rax,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%rax,%r12 + leaq (%r11,%r13,1),%r11 + xorq %rbx,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%rax,%r14 + rorxq $28,%rax,%r13 + leaq (%rdx,%r11,1),%rdx + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rbx,%rdi + vpsrlq $6,%ymm7,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%r11,%rdi,1),%r11 + movq %r8,%r12 + vpsllq $3,%ymm7,%ymm10 + vpaddq %ymm8,%ymm0,%ymm0 + addq 8+256(%rsp),%r10 + andq %rdx,%r12 + rorxq $41,%rdx,%r13 + vpsrlq $19,%ymm7,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%rdx,%rdi + leaq (%r11,%r14,1),%r11 + leaq (%r10,%r12,1),%r10 + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %r9,%rdx,%r12 + xorq %rdi,%r13 + rorxq $14,%rdx,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%r10,%r12,1),%r10 + xorq %r14,%r13 + movq %r11,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%r11,%r12 + leaq (%r10,%r13,1),%r10 + xorq %rax,%rdi + vpaddq %ymm11,%ymm0,%ymm0 + rorxq $34,%r11,%r14 + rorxq $28,%r11,%r13 + leaq (%rcx,%r10,1),%rcx + vpaddq -128(%rbp),%ymm0,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rax,%r15 + xorq %r13,%r14 + leaq (%r10,%r15,1),%r10 + movq %rdx,%r12 + vmovdqa %ymm10,0(%rsp) + vpalignr $8,%ymm1,%ymm2,%ymm8 + addq 32+256(%rsp),%r9 + andq %rcx,%r12 + rorxq $41,%rcx,%r13 + vpalignr $8,%ymm5,%ymm6,%ymm11 + rorxq $18,%rcx,%r15 + leaq (%r10,%r14,1),%r10 + leaq (%r9,%r12,1),%r9 + vpsrlq $1,%ymm8,%ymm10 + andnq %r8,%rcx,%r12 + xorq %r15,%r13 + rorxq $14,%rcx,%r14 + vpaddq %ymm11,%ymm1,%ymm1 + vpsrlq $7,%ymm8,%ymm11 + leaq (%r9,%r12,1),%r9 + xorq %r14,%r13 + movq %r10,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%r10,%r12 + leaq (%r9,%r13,1),%r9 + xorq %r11,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%r10,%r14 + rorxq $28,%r10,%r13 + leaq (%rbx,%r9,1),%rbx + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r11,%rdi + vpsrlq $6,%ymm0,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%r9,%rdi,1),%r9 + movq %rcx,%r12 + vpsllq $3,%ymm0,%ymm10 + vpaddq %ymm8,%ymm1,%ymm1 + addq 40+256(%rsp),%r8 + andq %rbx,%r12 + rorxq $41,%rbx,%r13 + vpsrlq $19,%ymm0,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%rbx,%rdi + leaq (%r9,%r14,1),%r9 + leaq (%r8,%r12,1),%r8 + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %rdx,%rbx,%r12 + xorq %rdi,%r13 + rorxq $14,%rbx,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%r8,%r12,1),%r8 + xorq %r14,%r13 + movq %r9,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%r9,%r12 + leaq (%r8,%r13,1),%r8 + xorq %r10,%rdi + vpaddq %ymm11,%ymm1,%ymm1 + rorxq $34,%r9,%r14 + rorxq $28,%r9,%r13 + leaq (%rax,%r8,1),%rax + vpaddq -96(%rbp),%ymm1,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r10,%r15 + xorq %r13,%r14 + leaq (%r8,%r15,1),%r8 + movq %rbx,%r12 + vmovdqa %ymm10,32(%rsp) + vpalignr $8,%ymm2,%ymm3,%ymm8 + addq 64+256(%rsp),%rdx + andq %rax,%r12 + rorxq $41,%rax,%r13 + vpalignr $8,%ymm6,%ymm7,%ymm11 + rorxq $18,%rax,%r15 + leaq (%r8,%r14,1),%r8 + leaq (%rdx,%r12,1),%rdx + vpsrlq $1,%ymm8,%ymm10 + andnq %rcx,%rax,%r12 + xorq %r15,%r13 + rorxq $14,%rax,%r14 + vpaddq %ymm11,%ymm2,%ymm2 + vpsrlq $7,%ymm8,%ymm11 + leaq (%rdx,%r12,1),%rdx + xorq %r14,%r13 + movq %r8,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%r8,%r12 + leaq (%rdx,%r13,1),%rdx + xorq %r9,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%r8,%r14 + rorxq $28,%r8,%r13 + leaq (%r11,%rdx,1),%r11 + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r9,%rdi + vpsrlq $6,%ymm1,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%rdx,%rdi,1),%rdx + movq %rax,%r12 + vpsllq $3,%ymm1,%ymm10 + vpaddq %ymm8,%ymm2,%ymm2 + addq 72+256(%rsp),%rcx + andq %r11,%r12 + rorxq $41,%r11,%r13 + vpsrlq $19,%ymm1,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%r11,%rdi + leaq (%rdx,%r14,1),%rdx + leaq (%rcx,%r12,1),%rcx + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %rbx,%r11,%r12 + xorq %rdi,%r13 + rorxq $14,%r11,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%rcx,%r12,1),%rcx + xorq %r14,%r13 + movq %rdx,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%rdx,%r12 + leaq (%rcx,%r13,1),%rcx + xorq %r8,%rdi + vpaddq %ymm11,%ymm2,%ymm2 + rorxq $34,%rdx,%r14 + rorxq $28,%rdx,%r13 + leaq (%r10,%rcx,1),%r10 + vpaddq -64(%rbp),%ymm2,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r8,%r15 + xorq %r13,%r14 + leaq (%rcx,%r15,1),%rcx + movq %r11,%r12 + vmovdqa %ymm10,64(%rsp) + vpalignr $8,%ymm3,%ymm4,%ymm8 + addq 96+256(%rsp),%rbx + andq %r10,%r12 + rorxq $41,%r10,%r13 + vpalignr $8,%ymm7,%ymm0,%ymm11 + rorxq $18,%r10,%r15 + leaq (%rcx,%r14,1),%rcx + leaq (%rbx,%r12,1),%rbx + vpsrlq $1,%ymm8,%ymm10 + andnq %rax,%r10,%r12 + xorq %r15,%r13 + rorxq $14,%r10,%r14 + vpaddq %ymm11,%ymm3,%ymm3 + vpsrlq $7,%ymm8,%ymm11 + leaq (%rbx,%r12,1),%rbx + xorq %r14,%r13 + movq %rcx,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%rcx,%r12 + leaq (%rbx,%r13,1),%rbx + xorq %rdx,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%rcx,%r14 + rorxq $28,%rcx,%r13 + leaq (%r9,%rbx,1),%r9 + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rdx,%rdi + vpsrlq $6,%ymm2,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%rbx,%rdi,1),%rbx + movq %r10,%r12 + vpsllq $3,%ymm2,%ymm10 + vpaddq %ymm8,%ymm3,%ymm3 + addq 104+256(%rsp),%rax + andq %r9,%r12 + rorxq $41,%r9,%r13 + vpsrlq $19,%ymm2,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%r9,%rdi + leaq (%rbx,%r14,1),%rbx + leaq (%rax,%r12,1),%rax + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %r11,%r9,%r12 + xorq %rdi,%r13 + rorxq $14,%r9,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%rax,%r12,1),%rax + xorq %r14,%r13 + movq %rbx,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%rbx,%r12 + leaq (%rax,%r13,1),%rax + xorq %rcx,%rdi + vpaddq %ymm11,%ymm3,%ymm3 + rorxq $34,%rbx,%r14 + rorxq $28,%rbx,%r13 + leaq (%r8,%rax,1),%r8 + vpaddq -32(%rbp),%ymm3,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rcx,%r15 + xorq %r13,%r14 + leaq (%rax,%r15,1),%rax + movq %r9,%r12 + vmovdqa %ymm10,96(%rsp) + leaq -128(%rsp),%rsp + vpalignr $8,%ymm4,%ymm5,%ymm8 + addq 0+256(%rsp),%r11 + andq %r8,%r12 + rorxq $41,%r8,%r13 + vpalignr $8,%ymm0,%ymm1,%ymm11 + rorxq $18,%r8,%r15 + leaq (%rax,%r14,1),%rax + leaq (%r11,%r12,1),%r11 + vpsrlq $1,%ymm8,%ymm10 + andnq %r10,%r8,%r12 + xorq %r15,%r13 + rorxq $14,%r8,%r14 + vpaddq %ymm11,%ymm4,%ymm4 + vpsrlq $7,%ymm8,%ymm11 + leaq (%r11,%r12,1),%r11 + xorq %r14,%r13 + movq %rax,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%rax,%r12 + leaq (%r11,%r13,1),%r11 + xorq %rbx,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%rax,%r14 + rorxq $28,%rax,%r13 + leaq (%rdx,%r11,1),%rdx + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rbx,%rdi + vpsrlq $6,%ymm3,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%r11,%rdi,1),%r11 + movq %r8,%r12 + vpsllq $3,%ymm3,%ymm10 + vpaddq %ymm8,%ymm4,%ymm4 + addq 8+256(%rsp),%r10 + andq %rdx,%r12 + rorxq $41,%rdx,%r13 + vpsrlq $19,%ymm3,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%rdx,%rdi + leaq (%r11,%r14,1),%r11 + leaq (%r10,%r12,1),%r10 + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %r9,%rdx,%r12 + xorq %rdi,%r13 + rorxq $14,%rdx,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%r10,%r12,1),%r10 + xorq %r14,%r13 + movq %r11,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%r11,%r12 + leaq (%r10,%r13,1),%r10 + xorq %rax,%rdi + vpaddq %ymm11,%ymm4,%ymm4 + rorxq $34,%r11,%r14 + rorxq $28,%r11,%r13 + leaq (%rcx,%r10,1),%rcx + vpaddq 0(%rbp),%ymm4,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rax,%r15 + xorq %r13,%r14 + leaq (%r10,%r15,1),%r10 + movq %rdx,%r12 + vmovdqa %ymm10,0(%rsp) + vpalignr $8,%ymm5,%ymm6,%ymm8 + addq 32+256(%rsp),%r9 + andq %rcx,%r12 + rorxq $41,%rcx,%r13 + vpalignr $8,%ymm1,%ymm2,%ymm11 + rorxq $18,%rcx,%r15 + leaq (%r10,%r14,1),%r10 + leaq (%r9,%r12,1),%r9 + vpsrlq $1,%ymm8,%ymm10 + andnq %r8,%rcx,%r12 + xorq %r15,%r13 + rorxq $14,%rcx,%r14 + vpaddq %ymm11,%ymm5,%ymm5 + vpsrlq $7,%ymm8,%ymm11 + leaq (%r9,%r12,1),%r9 + xorq %r14,%r13 + movq %r10,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%r10,%r12 + leaq (%r9,%r13,1),%r9 + xorq %r11,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%r10,%r14 + rorxq $28,%r10,%r13 + leaq (%rbx,%r9,1),%rbx + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r11,%rdi + vpsrlq $6,%ymm4,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%r9,%rdi,1),%r9 + movq %rcx,%r12 + vpsllq $3,%ymm4,%ymm10 + vpaddq %ymm8,%ymm5,%ymm5 + addq 40+256(%rsp),%r8 + andq %rbx,%r12 + rorxq $41,%rbx,%r13 + vpsrlq $19,%ymm4,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%rbx,%rdi + leaq (%r9,%r14,1),%r9 + leaq (%r8,%r12,1),%r8 + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %rdx,%rbx,%r12 + xorq %rdi,%r13 + rorxq $14,%rbx,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%r8,%r12,1),%r8 + xorq %r14,%r13 + movq %r9,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%r9,%r12 + leaq (%r8,%r13,1),%r8 + xorq %r10,%rdi + vpaddq %ymm11,%ymm5,%ymm5 + rorxq $34,%r9,%r14 + rorxq $28,%r9,%r13 + leaq (%rax,%r8,1),%rax + vpaddq 32(%rbp),%ymm5,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r10,%r15 + xorq %r13,%r14 + leaq (%r8,%r15,1),%r8 + movq %rbx,%r12 + vmovdqa %ymm10,32(%rsp) + vpalignr $8,%ymm6,%ymm7,%ymm8 + addq 64+256(%rsp),%rdx + andq %rax,%r12 + rorxq $41,%rax,%r13 + vpalignr $8,%ymm2,%ymm3,%ymm11 + rorxq $18,%rax,%r15 + leaq (%r8,%r14,1),%r8 + leaq (%rdx,%r12,1),%rdx + vpsrlq $1,%ymm8,%ymm10 + andnq %rcx,%rax,%r12 + xorq %r15,%r13 + rorxq $14,%rax,%r14 + vpaddq %ymm11,%ymm6,%ymm6 + vpsrlq $7,%ymm8,%ymm11 + leaq (%rdx,%r12,1),%rdx + xorq %r14,%r13 + movq %r8,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%r8,%r12 + leaq (%rdx,%r13,1),%rdx + xorq %r9,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%r8,%r14 + rorxq $28,%r8,%r13 + leaq (%r11,%rdx,1),%r11 + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r9,%rdi + vpsrlq $6,%ymm5,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%rdx,%rdi,1),%rdx + movq %rax,%r12 + vpsllq $3,%ymm5,%ymm10 + vpaddq %ymm8,%ymm6,%ymm6 + addq 72+256(%rsp),%rcx + andq %r11,%r12 + rorxq $41,%r11,%r13 + vpsrlq $19,%ymm5,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%r11,%rdi + leaq (%rdx,%r14,1),%rdx + leaq (%rcx,%r12,1),%rcx + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %rbx,%r11,%r12 + xorq %rdi,%r13 + rorxq $14,%r11,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%rcx,%r12,1),%rcx + xorq %r14,%r13 + movq %rdx,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%rdx,%r12 + leaq (%rcx,%r13,1),%rcx + xorq %r8,%rdi + vpaddq %ymm11,%ymm6,%ymm6 + rorxq $34,%rdx,%r14 + rorxq $28,%rdx,%r13 + leaq (%r10,%rcx,1),%r10 + vpaddq 64(%rbp),%ymm6,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r8,%r15 + xorq %r13,%r14 + leaq (%rcx,%r15,1),%rcx + movq %r11,%r12 + vmovdqa %ymm10,64(%rsp) + vpalignr $8,%ymm7,%ymm0,%ymm8 + addq 96+256(%rsp),%rbx + andq %r10,%r12 + rorxq $41,%r10,%r13 + vpalignr $8,%ymm3,%ymm4,%ymm11 + rorxq $18,%r10,%r15 + leaq (%rcx,%r14,1),%rcx + leaq (%rbx,%r12,1),%rbx + vpsrlq $1,%ymm8,%ymm10 + andnq %rax,%r10,%r12 + xorq %r15,%r13 + rorxq $14,%r10,%r14 + vpaddq %ymm11,%ymm7,%ymm7 + vpsrlq $7,%ymm8,%ymm11 + leaq (%rbx,%r12,1),%rbx + xorq %r14,%r13 + movq %rcx,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%rcx,%r12 + leaq (%rbx,%r13,1),%rbx + xorq %rdx,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%rcx,%r14 + rorxq $28,%rcx,%r13 + leaq (%r9,%rbx,1),%r9 + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rdx,%rdi + vpsrlq $6,%ymm6,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%rbx,%rdi,1),%rbx + movq %r10,%r12 + vpsllq $3,%ymm6,%ymm10 + vpaddq %ymm8,%ymm7,%ymm7 + addq 104+256(%rsp),%rax + andq %r9,%r12 + rorxq $41,%r9,%r13 + vpsrlq $19,%ymm6,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%r9,%rdi + leaq (%rbx,%r14,1),%rbx + leaq (%rax,%r12,1),%rax + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %r11,%r9,%r12 + xorq %rdi,%r13 + rorxq $14,%r9,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%rax,%r12,1),%rax + xorq %r14,%r13 + movq %rbx,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%rbx,%r12 + leaq (%rax,%r13,1),%rax + xorq %rcx,%rdi + vpaddq %ymm11,%ymm7,%ymm7 + rorxq $34,%rbx,%r14 + rorxq $28,%rbx,%r13 + leaq (%r8,%rax,1),%r8 + vpaddq 96(%rbp),%ymm7,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rcx,%r15 + xorq %r13,%r14 + leaq (%rax,%r15,1),%rax + movq %r9,%r12 + vmovdqa %ymm10,96(%rsp) + leaq 256(%rbp),%rbp + cmpb $0,-121(%rbp) + jne L$avx2_00_47 + addq 0+128(%rsp),%r11 + andq %r8,%r12 + rorxq $41,%r8,%r13 + rorxq $18,%r8,%r15 + leaq (%rax,%r14,1),%rax + leaq (%r11,%r12,1),%r11 + andnq %r10,%r8,%r12 + xorq %r15,%r13 + rorxq $14,%r8,%r14 + leaq (%r11,%r12,1),%r11 + xorq %r14,%r13 + movq %rax,%r15 + rorxq $39,%rax,%r12 + leaq (%r11,%r13,1),%r11 + xorq %rbx,%r15 + rorxq $34,%rax,%r14 + rorxq $28,%rax,%r13 + leaq (%rdx,%r11,1),%rdx + andq %r15,%rdi + xorq %r12,%r14 + xorq %rbx,%rdi + xorq %r13,%r14 + leaq (%r11,%rdi,1),%r11 + movq %r8,%r12 + addq 8+128(%rsp),%r10 + andq %rdx,%r12 + rorxq $41,%rdx,%r13 + rorxq $18,%rdx,%rdi + leaq (%r11,%r14,1),%r11 + leaq (%r10,%r12,1),%r10 + andnq %r9,%rdx,%r12 + xorq %rdi,%r13 + rorxq $14,%rdx,%r14 + leaq (%r10,%r12,1),%r10 + xorq %r14,%r13 + movq %r11,%rdi + rorxq $39,%r11,%r12 + leaq (%r10,%r13,1),%r10 + xorq %rax,%rdi + rorxq $34,%r11,%r14 + rorxq $28,%r11,%r13 + leaq (%rcx,%r10,1),%rcx + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rax,%r15 + xorq %r13,%r14 + leaq (%r10,%r15,1),%r10 + movq %rdx,%r12 + addq 32+128(%rsp),%r9 + andq %rcx,%r12 + rorxq $41,%rcx,%r13 + rorxq $18,%rcx,%r15 + leaq (%r10,%r14,1),%r10 + leaq (%r9,%r12,1),%r9 + andnq %r8,%rcx,%r12 + xorq %r15,%r13 + rorxq $14,%rcx,%r14 + leaq (%r9,%r12,1),%r9 + xorq %r14,%r13 + movq %r10,%r15 + rorxq $39,%r10,%r12 + leaq (%r9,%r13,1),%r9 + xorq %r11,%r15 + rorxq $34,%r10,%r14 + rorxq $28,%r10,%r13 + leaq (%rbx,%r9,1),%rbx + andq %r15,%rdi + xorq %r12,%r14 + xorq %r11,%rdi + xorq %r13,%r14 + leaq (%r9,%rdi,1),%r9 + movq %rcx,%r12 + addq 40+128(%rsp),%r8 + andq %rbx,%r12 + rorxq $41,%rbx,%r13 + rorxq $18,%rbx,%rdi + leaq (%r9,%r14,1),%r9 + leaq (%r8,%r12,1),%r8 + andnq %rdx,%rbx,%r12 + xorq %rdi,%r13 + rorxq $14,%rbx,%r14 + leaq (%r8,%r12,1),%r8 + xorq %r14,%r13 + movq %r9,%rdi + rorxq $39,%r9,%r12 + leaq (%r8,%r13,1),%r8 + xorq %r10,%rdi + rorxq $34,%r9,%r14 + rorxq $28,%r9,%r13 + leaq (%rax,%r8,1),%rax + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r10,%r15 + xorq %r13,%r14 + leaq (%r8,%r15,1),%r8 + movq %rbx,%r12 + addq 64+128(%rsp),%rdx + andq %rax,%r12 + rorxq $41,%rax,%r13 + rorxq $18,%rax,%r15 + leaq (%r8,%r14,1),%r8 + leaq (%rdx,%r12,1),%rdx + andnq %rcx,%rax,%r12 + xorq %r15,%r13 + rorxq $14,%rax,%r14 + leaq (%rdx,%r12,1),%rdx + xorq %r14,%r13 + movq %r8,%r15 + rorxq $39,%r8,%r12 + leaq (%rdx,%r13,1),%rdx + xorq %r9,%r15 + rorxq $34,%r8,%r14 + rorxq $28,%r8,%r13 + leaq (%r11,%rdx,1),%r11 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r9,%rdi + xorq %r13,%r14 + leaq (%rdx,%rdi,1),%rdx + movq %rax,%r12 + addq 72+128(%rsp),%rcx + andq %r11,%r12 + rorxq $41,%r11,%r13 + rorxq $18,%r11,%rdi + leaq (%rdx,%r14,1),%rdx + leaq (%rcx,%r12,1),%rcx + andnq %rbx,%r11,%r12 + xorq %rdi,%r13 + rorxq $14,%r11,%r14 + leaq (%rcx,%r12,1),%rcx + xorq %r14,%r13 + movq %rdx,%rdi + rorxq $39,%rdx,%r12 + leaq (%rcx,%r13,1),%rcx + xorq %r8,%rdi + rorxq $34,%rdx,%r14 + rorxq $28,%rdx,%r13 + leaq (%r10,%rcx,1),%r10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r8,%r15 + xorq %r13,%r14 + leaq (%rcx,%r15,1),%rcx + movq %r11,%r12 + addq 96+128(%rsp),%rbx + andq %r10,%r12 + rorxq $41,%r10,%r13 + rorxq $18,%r10,%r15 + leaq (%rcx,%r14,1),%rcx + leaq (%rbx,%r12,1),%rbx + andnq %rax,%r10,%r12 + xorq %r15,%r13 + rorxq $14,%r10,%r14 + leaq (%rbx,%r12,1),%rbx + xorq %r14,%r13 + movq %rcx,%r15 + rorxq $39,%rcx,%r12 + leaq (%rbx,%r13,1),%rbx + xorq %rdx,%r15 + rorxq $34,%rcx,%r14 + rorxq $28,%rcx,%r13 + leaq (%r9,%rbx,1),%r9 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rdx,%rdi + xorq %r13,%r14 + leaq (%rbx,%rdi,1),%rbx + movq %r10,%r12 + addq 104+128(%rsp),%rax + andq %r9,%r12 + rorxq $41,%r9,%r13 + rorxq $18,%r9,%rdi + leaq (%rbx,%r14,1),%rbx + leaq (%rax,%r12,1),%rax + andnq %r11,%r9,%r12 + xorq %rdi,%r13 + rorxq $14,%r9,%r14 + leaq (%rax,%r12,1),%rax + xorq %r14,%r13 + movq %rbx,%rdi + rorxq $39,%rbx,%r12 + leaq (%rax,%r13,1),%rax + xorq %rcx,%rdi + rorxq $34,%rbx,%r14 + rorxq $28,%rbx,%r13 + leaq (%r8,%rax,1),%r8 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rcx,%r15 + xorq %r13,%r14 + leaq (%rax,%r15,1),%rax + movq %r9,%r12 + addq 0(%rsp),%r11 + andq %r8,%r12 + rorxq $41,%r8,%r13 + rorxq $18,%r8,%r15 + leaq (%rax,%r14,1),%rax + leaq (%r11,%r12,1),%r11 + andnq %r10,%r8,%r12 + xorq %r15,%r13 + rorxq $14,%r8,%r14 + leaq (%r11,%r12,1),%r11 + xorq %r14,%r13 + movq %rax,%r15 + rorxq $39,%rax,%r12 + leaq (%r11,%r13,1),%r11 + xorq %rbx,%r15 + rorxq $34,%rax,%r14 + rorxq $28,%rax,%r13 + leaq (%rdx,%r11,1),%rdx + andq %r15,%rdi + xorq %r12,%r14 + xorq %rbx,%rdi + xorq %r13,%r14 + leaq (%r11,%rdi,1),%r11 + movq %r8,%r12 + addq 8(%rsp),%r10 + andq %rdx,%r12 + rorxq $41,%rdx,%r13 + rorxq $18,%rdx,%rdi + leaq (%r11,%r14,1),%r11 + leaq (%r10,%r12,1),%r10 + andnq %r9,%rdx,%r12 + xorq %rdi,%r13 + rorxq $14,%rdx,%r14 + leaq (%r10,%r12,1),%r10 + xorq %r14,%r13 + movq %r11,%rdi + rorxq $39,%r11,%r12 + leaq (%r10,%r13,1),%r10 + xorq %rax,%rdi + rorxq $34,%r11,%r14 + rorxq $28,%r11,%r13 + leaq (%rcx,%r10,1),%rcx + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rax,%r15 + xorq %r13,%r14 + leaq (%r10,%r15,1),%r10 + movq %rdx,%r12 + addq 32(%rsp),%r9 + andq %rcx,%r12 + rorxq $41,%rcx,%r13 + rorxq $18,%rcx,%r15 + leaq (%r10,%r14,1),%r10 + leaq (%r9,%r12,1),%r9 + andnq %r8,%rcx,%r12 + xorq %r15,%r13 + rorxq $14,%rcx,%r14 + leaq (%r9,%r12,1),%r9 + xorq %r14,%r13 + movq %r10,%r15 + rorxq $39,%r10,%r12 + leaq (%r9,%r13,1),%r9 + xorq %r11,%r15 + rorxq $34,%r10,%r14 + rorxq $28,%r10,%r13 + leaq (%rbx,%r9,1),%rbx + andq %r15,%rdi + xorq %r12,%r14 + xorq %r11,%rdi + xorq %r13,%r14 + leaq (%r9,%rdi,1),%r9 + movq %rcx,%r12 + addq 40(%rsp),%r8 + andq %rbx,%r12 + rorxq $41,%rbx,%r13 + rorxq $18,%rbx,%rdi + leaq (%r9,%r14,1),%r9 + leaq (%r8,%r12,1),%r8 + andnq %rdx,%rbx,%r12 + xorq %rdi,%r13 + rorxq $14,%rbx,%r14 + leaq (%r8,%r12,1),%r8 + xorq %r14,%r13 + movq %r9,%rdi + rorxq $39,%r9,%r12 + leaq (%r8,%r13,1),%r8 + xorq %r10,%rdi + rorxq $34,%r9,%r14 + rorxq $28,%r9,%r13 + leaq (%rax,%r8,1),%rax + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r10,%r15 + xorq %r13,%r14 + leaq (%r8,%r15,1),%r8 + movq %rbx,%r12 + addq 64(%rsp),%rdx + andq %rax,%r12 + rorxq $41,%rax,%r13 + rorxq $18,%rax,%r15 + leaq (%r8,%r14,1),%r8 + leaq (%rdx,%r12,1),%rdx + andnq %rcx,%rax,%r12 + xorq %r15,%r13 + rorxq $14,%rax,%r14 + leaq (%rdx,%r12,1),%rdx + xorq %r14,%r13 + movq %r8,%r15 + rorxq $39,%r8,%r12 + leaq (%rdx,%r13,1),%rdx + xorq %r9,%r15 + rorxq $34,%r8,%r14 + rorxq $28,%r8,%r13 + leaq (%r11,%rdx,1),%r11 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r9,%rdi + xorq %r13,%r14 + leaq (%rdx,%rdi,1),%rdx + movq %rax,%r12 + addq 72(%rsp),%rcx + andq %r11,%r12 + rorxq $41,%r11,%r13 + rorxq $18,%r11,%rdi + leaq (%rdx,%r14,1),%rdx + leaq (%rcx,%r12,1),%rcx + andnq %rbx,%r11,%r12 + xorq %rdi,%r13 + rorxq $14,%r11,%r14 + leaq (%rcx,%r12,1),%rcx + xorq %r14,%r13 + movq %rdx,%rdi + rorxq $39,%rdx,%r12 + leaq (%rcx,%r13,1),%rcx + xorq %r8,%rdi + rorxq $34,%rdx,%r14 + rorxq $28,%rdx,%r13 + leaq (%r10,%rcx,1),%r10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r8,%r15 + xorq %r13,%r14 + leaq (%rcx,%r15,1),%rcx + movq %r11,%r12 + addq 96(%rsp),%rbx + andq %r10,%r12 + rorxq $41,%r10,%r13 + rorxq $18,%r10,%r15 + leaq (%rcx,%r14,1),%rcx + leaq (%rbx,%r12,1),%rbx + andnq %rax,%r10,%r12 + xorq %r15,%r13 + rorxq $14,%r10,%r14 + leaq (%rbx,%r12,1),%rbx + xorq %r14,%r13 + movq %rcx,%r15 + rorxq $39,%rcx,%r12 + leaq (%rbx,%r13,1),%rbx + xorq %rdx,%r15 + rorxq $34,%rcx,%r14 + rorxq $28,%rcx,%r13 + leaq (%r9,%rbx,1),%r9 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rdx,%rdi + xorq %r13,%r14 + leaq (%rbx,%rdi,1),%rbx + movq %r10,%r12 + addq 104(%rsp),%rax + andq %r9,%r12 + rorxq $41,%r9,%r13 + rorxq $18,%r9,%rdi + leaq (%rbx,%r14,1),%rbx + leaq (%rax,%r12,1),%rax + andnq %r11,%r9,%r12 + xorq %rdi,%r13 + rorxq $14,%r9,%r14 + leaq (%rax,%r12,1),%rax + xorq %r14,%r13 + movq %rbx,%rdi + rorxq $39,%rbx,%r12 + leaq (%rax,%r13,1),%rax + xorq %rcx,%rdi + rorxq $34,%rbx,%r14 + rorxq $28,%rbx,%r13 + leaq (%r8,%rax,1),%r8 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rcx,%r15 + xorq %r13,%r14 + leaq (%rax,%r15,1),%rax + movq %r9,%r12 + movq 1280(%rsp),%rdi + addq %r14,%rax + + leaq 1152(%rsp),%rbp + + addq 0(%rdi),%rax + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + + cmpq 144(%rbp),%rsi + je L$done_avx2 + + xorq %r14,%r14 + movq %rbx,%rdi + xorq %rcx,%rdi + movq %r9,%r12 + jmp L$ower_avx2 +.p2align 4 +L$ower_avx2: + addq 0+16(%rbp),%r11 + andq %r8,%r12 + rorxq $41,%r8,%r13 + rorxq $18,%r8,%r15 + leaq (%rax,%r14,1),%rax + leaq (%r11,%r12,1),%r11 + andnq %r10,%r8,%r12 + xorq %r15,%r13 + rorxq $14,%r8,%r14 + leaq (%r11,%r12,1),%r11 + xorq %r14,%r13 + movq %rax,%r15 + rorxq $39,%rax,%r12 + leaq (%r11,%r13,1),%r11 + xorq %rbx,%r15 + rorxq $34,%rax,%r14 + rorxq $28,%rax,%r13 + leaq (%rdx,%r11,1),%rdx + andq %r15,%rdi + xorq %r12,%r14 + xorq %rbx,%rdi + xorq %r13,%r14 + leaq (%r11,%rdi,1),%r11 + movq %r8,%r12 + addq 8+16(%rbp),%r10 + andq %rdx,%r12 + rorxq $41,%rdx,%r13 + rorxq $18,%rdx,%rdi + leaq (%r11,%r14,1),%r11 + leaq (%r10,%r12,1),%r10 + andnq %r9,%rdx,%r12 + xorq %rdi,%r13 + rorxq $14,%rdx,%r14 + leaq (%r10,%r12,1),%r10 + xorq %r14,%r13 + movq %r11,%rdi + rorxq $39,%r11,%r12 + leaq (%r10,%r13,1),%r10 + xorq %rax,%rdi + rorxq $34,%r11,%r14 + rorxq $28,%r11,%r13 + leaq (%rcx,%r10,1),%rcx + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rax,%r15 + xorq %r13,%r14 + leaq (%r10,%r15,1),%r10 + movq %rdx,%r12 + addq 32+16(%rbp),%r9 + andq %rcx,%r12 + rorxq $41,%rcx,%r13 + rorxq $18,%rcx,%r15 + leaq (%r10,%r14,1),%r10 + leaq (%r9,%r12,1),%r9 + andnq %r8,%rcx,%r12 + xorq %r15,%r13 + rorxq $14,%rcx,%r14 + leaq (%r9,%r12,1),%r9 + xorq %r14,%r13 + movq %r10,%r15 + rorxq $39,%r10,%r12 + leaq (%r9,%r13,1),%r9 + xorq %r11,%r15 + rorxq $34,%r10,%r14 + rorxq $28,%r10,%r13 + leaq (%rbx,%r9,1),%rbx + andq %r15,%rdi + xorq %r12,%r14 + xorq %r11,%rdi + xorq %r13,%r14 + leaq (%r9,%rdi,1),%r9 + movq %rcx,%r12 + addq 40+16(%rbp),%r8 + andq %rbx,%r12 + rorxq $41,%rbx,%r13 + rorxq $18,%rbx,%rdi + leaq (%r9,%r14,1),%r9 + leaq (%r8,%r12,1),%r8 + andnq %rdx,%rbx,%r12 + xorq %rdi,%r13 + rorxq $14,%rbx,%r14 + leaq (%r8,%r12,1),%r8 + xorq %r14,%r13 + movq %r9,%rdi + rorxq $39,%r9,%r12 + leaq (%r8,%r13,1),%r8 + xorq %r10,%rdi + rorxq $34,%r9,%r14 + rorxq $28,%r9,%r13 + leaq (%rax,%r8,1),%rax + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r10,%r15 + xorq %r13,%r14 + leaq (%r8,%r15,1),%r8 + movq %rbx,%r12 + addq 64+16(%rbp),%rdx + andq %rax,%r12 + rorxq $41,%rax,%r13 + rorxq $18,%rax,%r15 + leaq (%r8,%r14,1),%r8 + leaq (%rdx,%r12,1),%rdx + andnq %rcx,%rax,%r12 + xorq %r15,%r13 + rorxq $14,%rax,%r14 + leaq (%rdx,%r12,1),%rdx + xorq %r14,%r13 + movq %r8,%r15 + rorxq $39,%r8,%r12 + leaq (%rdx,%r13,1),%rdx + xorq %r9,%r15 + rorxq $34,%r8,%r14 + rorxq $28,%r8,%r13 + leaq (%r11,%rdx,1),%r11 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r9,%rdi + xorq %r13,%r14 + leaq (%rdx,%rdi,1),%rdx + movq %rax,%r12 + addq 72+16(%rbp),%rcx + andq %r11,%r12 + rorxq $41,%r11,%r13 + rorxq $18,%r11,%rdi + leaq (%rdx,%r14,1),%rdx + leaq (%rcx,%r12,1),%rcx + andnq %rbx,%r11,%r12 + xorq %rdi,%r13 + rorxq $14,%r11,%r14 + leaq (%rcx,%r12,1),%rcx + xorq %r14,%r13 + movq %rdx,%rdi + rorxq $39,%rdx,%r12 + leaq (%rcx,%r13,1),%rcx + xorq %r8,%rdi + rorxq $34,%rdx,%r14 + rorxq $28,%rdx,%r13 + leaq (%r10,%rcx,1),%r10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r8,%r15 + xorq %r13,%r14 + leaq (%rcx,%r15,1),%rcx + movq %r11,%r12 + addq 96+16(%rbp),%rbx + andq %r10,%r12 + rorxq $41,%r10,%r13 + rorxq $18,%r10,%r15 + leaq (%rcx,%r14,1),%rcx + leaq (%rbx,%r12,1),%rbx + andnq %rax,%r10,%r12 + xorq %r15,%r13 + rorxq $14,%r10,%r14 + leaq (%rbx,%r12,1),%rbx + xorq %r14,%r13 + movq %rcx,%r15 + rorxq $39,%rcx,%r12 + leaq (%rbx,%r13,1),%rbx + xorq %rdx,%r15 + rorxq $34,%rcx,%r14 + rorxq $28,%rcx,%r13 + leaq (%r9,%rbx,1),%r9 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rdx,%rdi + xorq %r13,%r14 + leaq (%rbx,%rdi,1),%rbx + movq %r10,%r12 + addq 104+16(%rbp),%rax + andq %r9,%r12 + rorxq $41,%r9,%r13 + rorxq $18,%r9,%rdi + leaq (%rbx,%r14,1),%rbx + leaq (%rax,%r12,1),%rax + andnq %r11,%r9,%r12 + xorq %rdi,%r13 + rorxq $14,%r9,%r14 + leaq (%rax,%r12,1),%rax + xorq %r14,%r13 + movq %rbx,%rdi + rorxq $39,%rbx,%r12 + leaq (%rax,%r13,1),%rax + xorq %rcx,%rdi + rorxq $34,%rbx,%r14 + rorxq $28,%rbx,%r13 + leaq (%r8,%rax,1),%r8 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rcx,%r15 + xorq %r13,%r14 + leaq (%rax,%r15,1),%rax + movq %r9,%r12 + leaq -128(%rbp),%rbp + cmpq %rsp,%rbp + jae L$ower_avx2 + + movq 1280(%rsp),%rdi + addq %r14,%rax + + leaq 1152(%rsp),%rsp + + addq 0(%rdi),%rax + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + leaq 256(%rsi),%rsi + addq 48(%rdi),%r10 + movq %rsi,%r12 + addq 56(%rdi),%r11 + cmpq 128+16(%rsp),%rsi + + movq %rax,0(%rdi) + cmoveq %rsp,%r12 + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + + jbe L$oop_avx2 + leaq (%rsp),%rbp + +L$done_avx2: + leaq (%rbp),%rsp + movq 152(%rsp),%rsi + + vzeroupper + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbp + + movq -8(%rsi),%rbx + + leaq (%rsi),%rsp + +L$epilogue_avx2: .byte 0xf3,0xc3 + |