diff options
Diffstat (limited to 'lib/accelerated/x86/macosx/ghash-x86_64.s')
-rw-r--r-- | lib/accelerated/x86/macosx/ghash-x86_64.s | 68 |
1 files changed, 57 insertions, 11 deletions
diff --git a/lib/accelerated/x86/macosx/ghash-x86_64.s b/lib/accelerated/x86/macosx/ghash-x86_64.s index 8fe772fd35..5fd3216755 100644 --- a/lib/accelerated/x86/macosx/ghash-x86_64.s +++ b/lib/accelerated/x86/macosx/ghash-x86_64.s @@ -1,4 +1,4 @@ -# Copyright (c) 2011-2013, Andy Polyakov <appro@openssl.org> +# Copyright (c) 2011-2016, Andy Polyakov <appro@openssl.org> # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -44,9 +44,21 @@ .p2align 4 _gcm_gmult_4bit: + pushq %rbx + pushq %rbp + pushq %r12 + + pushq %r13 + + pushq %r14 + + pushq %r15 + + subq $280,%rsp + L$gmult_prologue: movzbq 15(%rdi),%r8 @@ -123,22 +135,35 @@ L$break1: movq %r8,8(%rdi) movq %r9,(%rdi) - movq 16(%rsp),%rbx - leaq 24(%rsp),%rsp + leaq 280+48(%rsp),%rsi + + movq -8(%rsi),%rbx + + leaq (%rsi),%rsp + L$gmult_epilogue: .byte 0xf3,0xc3 + .globl _gcm_ghash_4bit .p2align 4 _gcm_ghash_4bit: + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + subq $280,%rsp + L$ghash_prologue: movq %rdx,%r14 movq %rcx,%r15 @@ -683,21 +708,31 @@ L$outer_loop: movq %r8,8(%rdi) movq %r9,(%rdi) - leaq 280(%rsp),%rsi - movq 0(%rsi),%r15 - movq 8(%rsi),%r14 - movq 16(%rsi),%r13 - movq 24(%rsi),%r12 - movq 32(%rsi),%rbp - movq 40(%rsi),%rbx - leaq 48(%rsi),%rsp + leaq 280+48(%rsp),%rsi + + movq -48(%rsi),%r15 + + movq -40(%rsi),%r14 + + movq -32(%rsi),%r13 + + movq -24(%rsi),%r12 + + movq -16(%rsi),%rbp + + movq -8(%rsi),%rbx + + leaq 0(%rsi),%rsp + L$ghash_epilogue: .byte 0xf3,0xc3 + .globl _gcm_init_clmul .p2align 4 _gcm_init_clmul: + L$_init_clmul: movdqu (%rsi),%xmm2 pshufd $78,%xmm2,%xmm2 @@ -850,10 +885,12 @@ L$_init_clmul: movdqu %xmm4,80(%rdi) .byte 0xf3,0xc3 + .globl _gcm_gmult_clmul .p2align 4 _gcm_gmult_clmul: + L$_gmult_clmul: movdqu (%rdi),%xmm0 movdqa L$bswap_mask(%rip),%xmm5 @@ -901,10 +938,12 @@ L$_gmult_clmul: movdqu %xmm0,(%rdi) .byte 0xf3,0xc3 + .globl _gcm_ghash_clmul .p2align 5 _gcm_ghash_clmul: + L$_ghash_clmul: movdqa L$bswap_mask(%rip),%xmm10 @@ -1284,10 +1323,12 @@ L$done: movdqu %xmm0,(%rdi) .byte 0xf3,0xc3 + .globl _gcm_init_avx .p2align 5 _gcm_init_avx: + vzeroupper vmovdqu (%rsi),%xmm2 @@ -1391,16 +1432,20 @@ L$init_start_avx: vzeroupper .byte 0xf3,0xc3 + .globl _gcm_gmult_avx .p2align 5 _gcm_gmult_avx: + jmp L$_gmult_clmul + .globl _gcm_ghash_avx .p2align 5 _gcm_ghash_avx: + vzeroupper vmovdqu (%rdi),%xmm10 @@ -1773,6 +1818,7 @@ L$tail_no_xor_avx: vzeroupper .byte 0xf3,0xc3 + .p2align 6 L$bswap_mask: .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 |