summaryrefslogtreecommitdiff
path: root/lib/accelerated
diff options
context:
space:
mode:
authorNikos Mavrogiannopoulos <nmav@redhat.com>2014-08-29 11:44:55 +0200
committerNikos Mavrogiannopoulos <nmav@redhat.com>2014-08-29 11:44:55 +0200
commit5d8a9cb2f6f795165eabbee20a1ee015695e22ac (patch)
tree961f36c8ce7e0940c2f07154b5b97793b6fc2817 /lib/accelerated
parent40186fd7a29df3533eb44450bfb364b6f73c001a (diff)
downloadgnutls-5d8a9cb2f6f795165eabbee20a1ee015695e22ac.tar.gz
Revert "updated asm sources"
This reverts commit 97895066e18abc5689ede9af1a463539ea783e90.
Diffstat (limited to 'lib/accelerated')
-rw-r--r--lib/accelerated/x86/coff/ghash-x86_64.s42
-rw-r--r--lib/accelerated/x86/coff/sha1-ssse3-x86_64.s229
-rw-r--r--lib/accelerated/x86/coff/sha512-ssse3-x86_64.s274
-rw-r--r--lib/accelerated/x86/elf/ghash-x86_64.s42
-rw-r--r--lib/accelerated/x86/elf/sha1-ssse3-x86_64.s170
-rw-r--r--lib/accelerated/x86/elf/sha512-ssse3-x86_64.s213
-rw-r--r--lib/accelerated/x86/macosx/ghash-x86_64.s42
-rw-r--r--lib/accelerated/x86/macosx/sha1-ssse3-x86_64.s170
-rw-r--r--lib/accelerated/x86/macosx/sha512-ssse3-x86_64.s213
9 files changed, 72 insertions, 1323 deletions
diff --git a/lib/accelerated/x86/coff/ghash-x86_64.s b/lib/accelerated/x86/coff/ghash-x86_64.s
index 2b4c72acd4..b1d69911c7 100644
--- a/lib/accelerated/x86/coff/ghash-x86_64.s
+++ b/lib/accelerated/x86/coff/ghash-x86_64.s
@@ -990,8 +990,8 @@ gcm_ghash_clmul:
pxor %xmm11,%xmm12
.byte 102,68,15,58,68,222,0
.byte 102,68,15,58,68,238,17
-.byte 102,68,15,58,68,231,16
xorps %xmm11,%xmm3
+.byte 102,68,15,58,68,231,16
xorps %xmm13,%xmm5
movups 80(%rdx),%xmm7
xorps %xmm12,%xmm4
@@ -1009,8 +1009,8 @@ gcm_ghash_clmul:
pshufd $78,%xmm0,%xmm8
pxor %xmm0,%xmm8
.byte 102,69,15,58,68,238,17
-.byte 102,68,15,58,68,231,0
xorps %xmm11,%xmm3
+.byte 102,68,15,58,68,231,0
xorps %xmm13,%xmm5
leaq 64(%r8),%r8
@@ -1028,23 +1028,23 @@ gcm_ghash_clmul:
xorps %xmm3,%xmm0
movdqu 32(%r8),%xmm3
movdqa %xmm11,%xmm13
-.byte 102,68,15,58,68,199,16
pshufd $78,%xmm11,%xmm12
+.byte 102,68,15,58,68,199,16
xorps %xmm5,%xmm1
pxor %xmm11,%xmm12
.byte 102,65,15,56,0,218
movups 32(%rdx),%xmm7
- xorps %xmm4,%xmm8
.byte 102,68,15,58,68,218,0
+ xorps %xmm4,%xmm8
+ movdqa %xmm3,%xmm5
pshufd $78,%xmm3,%xmm4
pxor %xmm0,%xmm8
- movdqa %xmm3,%xmm5
- pxor %xmm1,%xmm8
pxor %xmm3,%xmm4
+ pxor %xmm1,%xmm8
movdqa %xmm8,%xmm9
-.byte 102,68,15,58,68,234,17
pslldq $8,%xmm8
+.byte 102,68,15,58,68,234,17
psrldq $8,%xmm9
pxor %xmm8,%xmm0
movdqa .L7_mask(%rip),%xmm8
@@ -1053,8 +1053,8 @@ gcm_ghash_clmul:
pand %xmm0,%xmm8
.byte 102,69,15,56,0,200
- pxor %xmm0,%xmm9
.byte 102,68,15,58,68,231,0
+ pxor %xmm0,%xmm9
psllq $57,%xmm9
movdqa %xmm9,%xmm8
pslldq $8,%xmm9
@@ -1081,31 +1081,32 @@ gcm_ghash_clmul:
movdqa %xmm11,%xmm13
pxor %xmm12,%xmm4
pshufd $78,%xmm11,%xmm12
- pxor %xmm9,%xmm0
- pxor %xmm8,%xmm1
pxor %xmm11,%xmm12
.byte 102,69,15,58,68,222,0
+ pxor %xmm9,%xmm0
+ pxor %xmm8,%xmm1
psrlq $1,%xmm0
- pxor %xmm1,%xmm0
- movdqa %xmm0,%xmm1
.byte 102,69,15,58,68,238,17
xorps %xmm11,%xmm3
- pshufd $78,%xmm0,%xmm8
- pxor %xmm0,%xmm8
+ pxor %xmm1,%xmm0
.byte 102,68,15,58,68,231,0
xorps %xmm13,%xmm5
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm8
+ pxor %xmm0,%xmm8
+
leaq 64(%r8),%r8
subq $64,%r9
jnc .Lmod4_loop
.Ltail4x:
.byte 102,65,15,58,68,199,0
-.byte 102,65,15,58,68,207,17
-.byte 102,68,15,58,68,199,16
xorps %xmm12,%xmm4
+.byte 102,65,15,58,68,207,17
xorps %xmm3,%xmm0
+.byte 102,68,15,58,68,199,16
xorps %xmm5,%xmm1
pxor %xmm0,%xmm1
pxor %xmm4,%xmm8
@@ -1185,13 +1186,13 @@ gcm_ghash_clmul:
pxor %xmm3,%xmm0
pxor %xmm5,%xmm1
- movdqu (%r8),%xmm9
+ movdqu (%r8),%xmm5
pxor %xmm0,%xmm8
-.byte 102,69,15,56,0,202
+.byte 102,65,15,56,0,234
movdqu 16(%r8),%xmm3
pxor %xmm1,%xmm8
- pxor %xmm9,%xmm1
+ pxor %xmm5,%xmm1
pxor %xmm8,%xmm4
.byte 102,65,15,56,0,218
movdqa %xmm4,%xmm8
@@ -1218,9 +1219,9 @@ gcm_ghash_clmul:
pxor %xmm8,%xmm1
pxor %xmm5,%xmm4
+.byte 102,15,58,68,234,17
movdqa %xmm0,%xmm9
psrlq $1,%xmm0
-.byte 102,15,58,68,234,17
pxor %xmm9,%xmm1
pxor %xmm0,%xmm9
psrlq $5,%xmm0
@@ -1229,6 +1230,7 @@ gcm_ghash_clmul:
psrlq $1,%xmm0
.byte 102,15,58,68,231,0
pxor %xmm1,%xmm0
+.byte 0x66,0x90
subq $32,%r9
ja .Lmod_loop
diff --git a/lib/accelerated/x86/coff/sha1-ssse3-x86_64.s b/lib/accelerated/x86/coff/sha1-ssse3-x86_64.s
index 1b7fe3a51c..e8136025e8 100644
--- a/lib/accelerated/x86/coff/sha1-ssse3-x86_64.s
+++ b/lib/accelerated/x86/coff/sha1-ssse3-x86_64.s
@@ -57,8 +57,6 @@ sha1_block_data_order:
movl _gnutls_x86_cpuid_s+8(%rip),%r10d
testl $512,%r8d
jz .Lialu
- testl $536870912,%r10d
- jnz _shaext_shortcut
jmp _ssse3_shortcut
.p2align 4
@@ -1280,195 +1278,6 @@ sha1_block_data_order:
movq 16(%rsp),%rsi
.byte 0xf3,0xc3
.LSEH_end_sha1_block_data_order:
-.def sha1_block_data_order_shaext; .scl 3; .type 32; .endef
-.p2align 5
-sha1_block_data_order_shaext:
- movq %rdi,8(%rsp)
- movq %rsi,16(%rsp)
- movq %rsp,%rax
-.LSEH_begin_sha1_block_data_order_shaext:
- movq %rcx,%rdi
- movq %rdx,%rsi
- movq %r8,%rdx
-
-_shaext_shortcut:
- leaq -72(%rsp),%rsp
- movaps %xmm6,-8-64(%rax)
- movaps %xmm7,-8-48(%rax)
- movaps %xmm8,-8-32(%rax)
- movaps %xmm9,-8-16(%rax)
-.Lprologue_shaext:
- movdqu (%rdi),%xmm0
- movd 16(%rdi),%xmm1
- movdqa K_XX_XX+160(%rip),%xmm3
-
- movdqu (%rsi),%xmm4
- pshufd $27,%xmm0,%xmm0
- movdqu 16(%rsi),%xmm5
- pshufd $27,%xmm1,%xmm1
- movdqu 32(%rsi),%xmm6
-.byte 102,15,56,0,227
- movdqu 48(%rsi),%xmm7
-.byte 102,15,56,0,235
-.byte 102,15,56,0,243
- movdqa %xmm1,%xmm9
-.byte 102,15,56,0,251
- jmp .Loop_shaext
-
-.p2align 4
-.Loop_shaext:
- decq %rdx
- leaq 64(%rsi),%rax
- paddd %xmm4,%xmm1
- cmovneq %rax,%rsi
- movdqa %xmm0,%xmm8
-.byte 15,56,201,229
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,0
-.byte 15,56,200,213
- pxor %xmm6,%xmm4
-.byte 15,56,201,238
-.byte 15,56,202,231
-
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,0
-.byte 15,56,200,206
- pxor %xmm7,%xmm5
-.byte 15,56,202,236
-.byte 15,56,201,247
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,0
-.byte 15,56,200,215
- pxor %xmm4,%xmm6
-.byte 15,56,201,252
-.byte 15,56,202,245
-
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,0
-.byte 15,56,200,204
- pxor %xmm5,%xmm7
-.byte 15,56,202,254
-.byte 15,56,201,229
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,0
-.byte 15,56,200,213
- pxor %xmm6,%xmm4
-.byte 15,56,201,238
-.byte 15,56,202,231
-
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,1
-.byte 15,56,200,206
- pxor %xmm7,%xmm5
-.byte 15,56,202,236
-.byte 15,56,201,247
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,1
-.byte 15,56,200,215
- pxor %xmm4,%xmm6
-.byte 15,56,201,252
-.byte 15,56,202,245
-
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,1
-.byte 15,56,200,204
- pxor %xmm5,%xmm7
-.byte 15,56,202,254
-.byte 15,56,201,229
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,1
-.byte 15,56,200,213
- pxor %xmm6,%xmm4
-.byte 15,56,201,238
-.byte 15,56,202,231
-
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,1
-.byte 15,56,200,206
- pxor %xmm7,%xmm5
-.byte 15,56,202,236
-.byte 15,56,201,247
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,2
-.byte 15,56,200,215
- pxor %xmm4,%xmm6
-.byte 15,56,201,252
-.byte 15,56,202,245
-
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,2
-.byte 15,56,200,204
- pxor %xmm5,%xmm7
-.byte 15,56,202,254
-.byte 15,56,201,229
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,2
-.byte 15,56,200,213
- pxor %xmm6,%xmm4
-.byte 15,56,201,238
-.byte 15,56,202,231
-
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,2
-.byte 15,56,200,206
- pxor %xmm7,%xmm5
-.byte 15,56,202,236
-.byte 15,56,201,247
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,2
-.byte 15,56,200,215
- pxor %xmm4,%xmm6
-.byte 15,56,201,252
-.byte 15,56,202,245
-
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,3
-.byte 15,56,200,204
- pxor %xmm5,%xmm7
-.byte 15,56,202,254
- movdqu (%rsi),%xmm4
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,3
-.byte 15,56,200,213
- movdqu 16(%rsi),%xmm5
-.byte 102,15,56,0,227
-
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,3
-.byte 15,56,200,206
- movdqu 32(%rsi),%xmm6
-.byte 102,15,56,0,235
-
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,3
-.byte 15,56,200,215
- movdqu 48(%rsi),%xmm7
-.byte 102,15,56,0,243
-
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,3
-.byte 65,15,56,200,201
-.byte 102,15,56,0,251
-
- paddd %xmm8,%xmm0
- movdqa %xmm1,%xmm9
-
- jnz .Loop_shaext
-
- pshufd $27,%xmm0,%xmm0
- pshufd $27,%xmm1,%xmm1
- movdqu %xmm0,(%rdi)
- movd %xmm1,16(%rdi)
- movaps -8-64(%rax),%xmm6
- movaps -8-48(%rax),%xmm7
- movaps -8-32(%rax),%xmm8
- movaps -8-16(%rax),%xmm9
- movq %rax,%rsp
-.Lepilogue_shaext:
- movq 8(%rsp),%rdi
- movq 16(%rsp),%rsi
- .byte 0xf3,0xc3
-.LSEH_end_sha1_block_data_order_shaext:
.def sha1_block_data_order_ssse3; .scl 3; .type 32; .endef
.p2align 4
sha1_block_data_order_ssse3:
@@ -2680,7 +2489,6 @@ K_XX_XX:
.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
-.byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0
.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.p2align 6
@@ -2726,37 +2534,6 @@ se_handler:
jmp .Lcommon_seh_tail
-.def shaext_handler; .scl 3; .type 32; .endef
-.p2align 4
-shaext_handler:
- pushq %rsi
- pushq %rdi
- pushq %rbx
- pushq %rbp
- pushq %r12
- pushq %r13
- pushq %r14
- pushq %r15
- pushfq
- subq $64,%rsp
-
- movq 120(%r8),%rax
- movq 248(%r8),%rbx
-
- leaq .Lprologue_shaext(%rip),%r10
- cmpq %r10,%rbx
- jb .Lcommon_seh_tail
-
- leaq .Lepilogue_shaext(%rip),%r10
- cmpq %r10,%rbx
- jae .Lcommon_seh_tail
-
- leaq -8-64(%rax),%rsi
- leaq 512(%r8),%rdi
- movl $8,%ecx
-.long 0xa548f3fc
-
- jmp .Lcommon_seh_tail
.def ssse3_handler; .scl 3; .type 32; .endef
.p2align 4
@@ -2853,9 +2630,6 @@ ssse3_handler:
.rva .LSEH_begin_sha1_block_data_order
.rva .LSEH_end_sha1_block_data_order
.rva .LSEH_info_sha1_block_data_order
-.rva .LSEH_begin_sha1_block_data_order_shaext
-.rva .LSEH_end_sha1_block_data_order_shaext
-.rva .LSEH_info_sha1_block_data_order_shaext
.rva .LSEH_begin_sha1_block_data_order_ssse3
.rva .LSEH_end_sha1_block_data_order_ssse3
.rva .LSEH_info_sha1_block_data_order_ssse3
@@ -2864,9 +2638,6 @@ ssse3_handler:
.LSEH_info_sha1_block_data_order:
.byte 9,0,0,0
.rva se_handler
-.LSEH_info_sha1_block_data_order_shaext:
-.byte 9,0,0,0
-.rva shaext_handler
.LSEH_info_sha1_block_data_order_ssse3:
.byte 9,0,0,0
.rva ssse3_handler
diff --git a/lib/accelerated/x86/coff/sha512-ssse3-x86_64.s b/lib/accelerated/x86/coff/sha512-ssse3-x86_64.s
index 959a2525bf..edaa67b95f 100644
--- a/lib/accelerated/x86/coff/sha512-ssse3-x86_64.s
+++ b/lib/accelerated/x86/coff/sha512-ssse3-x86_64.s
@@ -56,8 +56,6 @@ sha256_block_data_order:
movl 0(%r11),%r9d
movl 4(%r11),%r10d
movl 8(%r11),%r11d
- testl $536870912,%r11d
- jnz _shaext_shortcut
testl $512,%r10d
jnz .Lssse3_shortcut
pushq %rbx
@@ -1794,237 +1792,6 @@ K256:
.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908
.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908
.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.def sha256_block_data_order_shaext; .scl 3; .type 32; .endef
-.p2align 6
-sha256_block_data_order_shaext:
- movq %rdi,8(%rsp)
- movq %rsi,16(%rsp)
- movq %rsp,%rax
-.LSEH_begin_sha256_block_data_order_shaext:
- movq %rcx,%rdi
- movq %rdx,%rsi
- movq %r8,%rdx
-
-_shaext_shortcut:
- leaq -88(%rsp),%rsp
- movaps %xmm6,-8-80(%rax)
- movaps %xmm7,-8-64(%rax)
- movaps %xmm8,-8-48(%rax)
- movaps %xmm9,-8-32(%rax)
- movaps %xmm10,-8-16(%rax)
-.Lprologue_shaext:
- leaq K256+128(%rip),%rcx
- movdqu (%rdi),%xmm1
- movdqu 16(%rdi),%xmm2
- movdqa 512-128(%rcx),%xmm7
-
- pshufd $27,%xmm1,%xmm0
- pshufd $177,%xmm1,%xmm1
- pshufd $27,%xmm2,%xmm2
- movdqa %xmm7,%xmm8
-.byte 102,15,58,15,202,8
- punpcklqdq %xmm0,%xmm2
- jmp .Loop_shaext
-
-.p2align 4
-.Loop_shaext:
- movdqu (%rsi),%xmm3
- movdqu 16(%rsi),%xmm4
- movdqu 32(%rsi),%xmm5
-.byte 102,15,56,0,223
- movdqu 48(%rsi),%xmm6
-
- movdqa 0-128(%rcx),%xmm0
- paddd %xmm3,%xmm0
-.byte 102,15,56,0,231
- movdqa %xmm2,%xmm10
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- nop
- movdqa %xmm1,%xmm9
-.byte 15,56,203,202
-
- movdqa 32-128(%rcx),%xmm0
- paddd %xmm4,%xmm0
-.byte 102,15,56,0,239
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- leaq 64(%rsi),%rsi
-.byte 15,56,204,220
-.byte 15,56,203,202
-
- movdqa 64-128(%rcx),%xmm0
- paddd %xmm5,%xmm0
-.byte 102,15,56,0,247
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm6,%xmm7
-.byte 102,15,58,15,253,4
- nop
- paddd %xmm7,%xmm3
-.byte 15,56,204,229
-.byte 15,56,203,202
-
- movdqa 96-128(%rcx),%xmm0
- paddd %xmm6,%xmm0
-.byte 15,56,205,222
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm3,%xmm7
-.byte 102,15,58,15,254,4
- nop
- paddd %xmm7,%xmm4
-.byte 15,56,204,238
-.byte 15,56,203,202
- movdqa 128-128(%rcx),%xmm0
- paddd %xmm3,%xmm0
-.byte 15,56,205,227
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm4,%xmm7
-.byte 102,15,58,15,251,4
- nop
- paddd %xmm7,%xmm5
-.byte 15,56,204,243
-.byte 15,56,203,202
- movdqa 160-128(%rcx),%xmm0
- paddd %xmm4,%xmm0
-.byte 15,56,205,236
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm5,%xmm7
-.byte 102,15,58,15,252,4
- nop
- paddd %xmm7,%xmm6
-.byte 15,56,204,220
-.byte 15,56,203,202
- movdqa 192-128(%rcx),%xmm0
- paddd %xmm5,%xmm0
-.byte 15,56,205,245
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm6,%xmm7
-.byte 102,15,58,15,253,4
- nop
- paddd %xmm7,%xmm3
-.byte 15,56,204,229
-.byte 15,56,203,202
- movdqa 224-128(%rcx),%xmm0
- paddd %xmm6,%xmm0
-.byte 15,56,205,222
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm3,%xmm7
-.byte 102,15,58,15,254,4
- nop
- paddd %xmm7,%xmm4
-.byte 15,56,204,238
-.byte 15,56,203,202
- movdqa 256-128(%rcx),%xmm0
- paddd %xmm3,%xmm0
-.byte 15,56,205,227
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm4,%xmm7
-.byte 102,15,58,15,251,4
- nop
- paddd %xmm7,%xmm5
-.byte 15,56,204,243
-.byte 15,56,203,202
- movdqa 288-128(%rcx),%xmm0
- paddd %xmm4,%xmm0
-.byte 15,56,205,236
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm5,%xmm7
-.byte 102,15,58,15,252,4
- nop
- paddd %xmm7,%xmm6
-.byte 15,56,204,220
-.byte 15,56,203,202
- movdqa 320-128(%rcx),%xmm0
- paddd %xmm5,%xmm0
-.byte 15,56,205,245
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm6,%xmm7
-.byte 102,15,58,15,253,4
- nop
- paddd %xmm7,%xmm3
-.byte 15,56,204,229
-.byte 15,56,203,202
- movdqa 352-128(%rcx),%xmm0
- paddd %xmm6,%xmm0
-.byte 15,56,205,222
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm3,%xmm7
-.byte 102,15,58,15,254,4
- nop
- paddd %xmm7,%xmm4
-.byte 15,56,204,238
-.byte 15,56,203,202
- movdqa 384-128(%rcx),%xmm0
- paddd %xmm3,%xmm0
-.byte 15,56,205,227
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm4,%xmm7
-.byte 102,15,58,15,251,4
- nop
- paddd %xmm7,%xmm5
-.byte 15,56,204,243
-.byte 15,56,203,202
- movdqa 416-128(%rcx),%xmm0
- paddd %xmm4,%xmm0
-.byte 15,56,205,236
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm5,%xmm7
-.byte 102,15,58,15,252,4
-.byte 15,56,203,202
- paddd %xmm7,%xmm6
-
- movdqa 448-128(%rcx),%xmm0
- paddd %xmm5,%xmm0
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
-.byte 15,56,205,245
- movdqa %xmm8,%xmm7
-.byte 15,56,203,202
-
- movdqa 480-128(%rcx),%xmm0
- paddd %xmm6,%xmm0
- nop
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- decq %rdx
- nop
-.byte 15,56,203,202
-
- paddd %xmm10,%xmm2
- paddd %xmm9,%xmm1
- jnz .Loop_shaext
-
- pshufd $177,%xmm2,%xmm2
- pshufd $27,%xmm1,%xmm7
- pshufd $177,%xmm1,%xmm1
- punpckhqdq %xmm2,%xmm1
-.byte 102,15,58,15,215,8
-
- movdqu %xmm1,(%rdi)
- movdqu %xmm2,16(%rdi)
- movaps -8-80(%rax),%xmm6
- movaps -8-64(%rax),%xmm7
- movaps -8-48(%rax),%xmm8
- movaps -8-32(%rax),%xmm9
- movaps -8-16(%rax),%xmm10
- movq %rax,%rsp
-.Lepilogue_shaext:
- movq 8(%rsp),%rdi
- movq 16(%rsp),%rsi
- .byte 0xf3,0xc3
-.LSEH_end_sha256_block_data_order_shaext:
.def sha256_block_data_order_ssse3; .scl 3; .type 32; .endef
.p2align 6
sha256_block_data_order_ssse3:
@@ -2075,13 +1842,13 @@ sha256_block_data_order_ssse3:
movdqu 0(%rsi),%xmm0
movdqu 16(%rsi),%xmm1
movdqu 32(%rsi),%xmm2
-.byte 102,15,56,0,199
movdqu 48(%rsi),%xmm3
+.byte 102,15,56,0,199
leaq K256(%rip),%rbp
.byte 102,15,56,0,207
movdqa 0(%rbp),%xmm4
- movdqa 32(%rbp),%xmm5
.byte 102,15,56,0,215
+ movdqa 32(%rbp),%xmm5
paddd %xmm0,%xmm4
movdqa 64(%rbp),%xmm6
.byte 102,15,56,0,223
@@ -3235,46 +3002,12 @@ se_handler:
popq %rsi
.byte 0xf3,0xc3
-.def shaext_handler; .scl 3; .type 32; .endef
-.p2align 4
-shaext_handler:
- pushq %rsi
- pushq %rdi
- pushq %rbx
- pushq %rbp
- pushq %r12
- pushq %r13
- pushq %r14
- pushq %r15
- pushfq
- subq $64,%rsp
-
- movq 120(%r8),%rax
- movq 248(%r8),%rbx
-
- leaq .Lprologue_shaext(%rip),%r10
- cmpq %r10,%rbx
- jb .Lin_prologue
-
- leaq .Lepilogue_shaext(%rip),%r10
- cmpq %r10,%rbx
- jae .Lin_prologue
-
- leaq -8-80(%rax),%rsi
- leaq 512(%r8),%rdi
- movl $10,%ecx
-.long 0xa548f3fc
-
- jmp .Lin_prologue
.section .pdata
.p2align 2
.rva .LSEH_begin_sha256_block_data_order
.rva .LSEH_end_sha256_block_data_order
.rva .LSEH_info_sha256_block_data_order
-.rva .LSEH_begin_sha256_block_data_order_shaext
-.rva .LSEH_end_sha256_block_data_order_shaext
-.rva .LSEH_info_sha256_block_data_order_shaext
.rva .LSEH_begin_sha256_block_data_order_ssse3
.rva .LSEH_end_sha256_block_data_order_ssse3
.rva .LSEH_info_sha256_block_data_order_ssse3
@@ -3284,9 +3017,6 @@ shaext_handler:
.byte 9,0,0,0
.rva se_handler
.rva .Lprologue,.Lepilogue
-.LSEH_info_sha256_block_data_order_shaext:
-.byte 9,0,0,0
-.rva shaext_handler
.LSEH_info_sha256_block_data_order_ssse3:
.byte 9,0,0,0
.rva se_handler
diff --git a/lib/accelerated/x86/elf/ghash-x86_64.s b/lib/accelerated/x86/elf/ghash-x86_64.s
index afa6714a6e..17f6bebe6e 100644
--- a/lib/accelerated/x86/elf/ghash-x86_64.s
+++ b/lib/accelerated/x86/elf/ghash-x86_64.s
@@ -949,8 +949,8 @@ gcm_ghash_clmul:
pxor %xmm11,%xmm12
.byte 102,68,15,58,68,222,0
.byte 102,68,15,58,68,238,17
-.byte 102,68,15,58,68,231,16
xorps %xmm11,%xmm3
+.byte 102,68,15,58,68,231,16
xorps %xmm13,%xmm5
movups 80(%rsi),%xmm7
xorps %xmm12,%xmm4
@@ -968,8 +968,8 @@ gcm_ghash_clmul:
pshufd $78,%xmm0,%xmm8
pxor %xmm0,%xmm8
.byte 102,69,15,58,68,238,17
-.byte 102,68,15,58,68,231,0
xorps %xmm11,%xmm3
+.byte 102,68,15,58,68,231,0
xorps %xmm13,%xmm5
leaq 64(%rdx),%rdx
@@ -987,23 +987,23 @@ gcm_ghash_clmul:
xorps %xmm3,%xmm0
movdqu 32(%rdx),%xmm3
movdqa %xmm11,%xmm13
-.byte 102,68,15,58,68,199,16
pshufd $78,%xmm11,%xmm12
+.byte 102,68,15,58,68,199,16
xorps %xmm5,%xmm1
pxor %xmm11,%xmm12
.byte 102,65,15,56,0,218
movups 32(%rsi),%xmm7
- xorps %xmm4,%xmm8
.byte 102,68,15,58,68,218,0
+ xorps %xmm4,%xmm8
+ movdqa %xmm3,%xmm5
pshufd $78,%xmm3,%xmm4
pxor %xmm0,%xmm8
- movdqa %xmm3,%xmm5
- pxor %xmm1,%xmm8
pxor %xmm3,%xmm4
+ pxor %xmm1,%xmm8
movdqa %xmm8,%xmm9
-.byte 102,68,15,58,68,234,17
pslldq $8,%xmm8
+.byte 102,68,15,58,68,234,17
psrldq $8,%xmm9
pxor %xmm8,%xmm0
movdqa .L7_mask(%rip),%xmm8
@@ -1012,8 +1012,8 @@ gcm_ghash_clmul:
pand %xmm0,%xmm8
.byte 102,69,15,56,0,200
- pxor %xmm0,%xmm9
.byte 102,68,15,58,68,231,0
+ pxor %xmm0,%xmm9
psllq $57,%xmm9
movdqa %xmm9,%xmm8
pslldq $8,%xmm9
@@ -1040,31 +1040,32 @@ gcm_ghash_clmul:
movdqa %xmm11,%xmm13
pxor %xmm12,%xmm4
pshufd $78,%xmm11,%xmm12
- pxor %xmm9,%xmm0
- pxor %xmm8,%xmm1
pxor %xmm11,%xmm12
.byte 102,69,15,58,68,222,0
+ pxor %xmm9,%xmm0
+ pxor %xmm8,%xmm1
psrlq $1,%xmm0
- pxor %xmm1,%xmm0
- movdqa %xmm0,%xmm1
.byte 102,69,15,58,68,238,17
xorps %xmm11,%xmm3
- pshufd $78,%xmm0,%xmm8
- pxor %xmm0,%xmm8
+ pxor %xmm1,%xmm0
.byte 102,68,15,58,68,231,0
xorps %xmm13,%xmm5
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm8
+ pxor %xmm0,%xmm8
+
leaq 64(%rdx),%rdx
subq $64,%rcx
jnc .Lmod4_loop
.Ltail4x:
.byte 102,65,15,58,68,199,0
-.byte 102,65,15,58,68,207,17
-.byte 102,68,15,58,68,199,16
xorps %xmm12,%xmm4
+.byte 102,65,15,58,68,207,17
xorps %xmm3,%xmm0
+.byte 102,68,15,58,68,199,16
xorps %xmm5,%xmm1
pxor %xmm0,%xmm1
pxor %xmm4,%xmm8
@@ -1144,13 +1145,13 @@ gcm_ghash_clmul:
pxor %xmm3,%xmm0
pxor %xmm5,%xmm1
- movdqu (%rdx),%xmm9
+ movdqu (%rdx),%xmm5
pxor %xmm0,%xmm8
-.byte 102,69,15,56,0,202
+.byte 102,65,15,56,0,234
movdqu 16(%rdx),%xmm3
pxor %xmm1,%xmm8
- pxor %xmm9,%xmm1
+ pxor %xmm5,%xmm1
pxor %xmm8,%xmm4
.byte 102,65,15,56,0,218
movdqa %xmm4,%xmm8
@@ -1177,9 +1178,9 @@ gcm_ghash_clmul:
pxor %xmm8,%xmm1
pxor %xmm5,%xmm4
+.byte 102,15,58,68,234,17
movdqa %xmm0,%xmm9
psrlq $1,%xmm0
-.byte 102,15,58,68,234,17
pxor %xmm9,%xmm1
pxor %xmm0,%xmm9
psrlq $5,%xmm0
@@ -1188,6 +1189,7 @@ gcm_ghash_clmul:
psrlq $1,%xmm0
.byte 102,15,58,68,231,0
pxor %xmm1,%xmm0
+.byte 0x66,0x90
subq $32,%rcx
ja .Lmod_loop
diff --git a/lib/accelerated/x86/elf/sha1-ssse3-x86_64.s b/lib/accelerated/x86/elf/sha1-ssse3-x86_64.s
index bed632617f..116efd0c18 100644
--- a/lib/accelerated/x86/elf/sha1-ssse3-x86_64.s
+++ b/lib/accelerated/x86/elf/sha1-ssse3-x86_64.s
@@ -49,8 +49,6 @@ sha1_block_data_order:
movl _gnutls_x86_cpuid_s+8(%rip),%r10d
testl $512,%r8d
jz .Lialu
- testl $536870912,%r10d
- jnz _shaext_shortcut
jmp _ssse3_shortcut
.align 16
@@ -1270,173 +1268,6 @@ sha1_block_data_order:
.Lepilogue:
.byte 0xf3,0xc3
.size sha1_block_data_order,.-sha1_block_data_order
-.type sha1_block_data_order_shaext,@function
-.align 32
-sha1_block_data_order_shaext:
-_shaext_shortcut:
- movdqu (%rdi),%xmm0
- movd 16(%rdi),%xmm1
- movdqa K_XX_XX+160(%rip),%xmm3
-
- movdqu (%rsi),%xmm4
- pshufd $27,%xmm0,%xmm0
- movdqu 16(%rsi),%xmm5
- pshufd $27,%xmm1,%xmm1
- movdqu 32(%rsi),%xmm6
-.byte 102,15,56,0,227
- movdqu 48(%rsi),%xmm7
-.byte 102,15,56,0,235
-.byte 102,15,56,0,243
- movdqa %xmm1,%xmm9
-.byte 102,15,56,0,251
- jmp .Loop_shaext
-
-.align 16
-.Loop_shaext:
- decq %rdx
- leaq 64(%rsi),%rax
- paddd %xmm4,%xmm1
- cmovneq %rax,%rsi
- movdqa %xmm0,%xmm8
-.byte 15,56,201,229
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,0
-.byte 15,56,200,213
- pxor %xmm6,%xmm4
-.byte 15,56,201,238
-.byte 15,56,202,231
-
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,0
-.byte 15,56,200,206
- pxor %xmm7,%xmm5
-.byte 15,56,202,236
-.byte 15,56,201,247
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,0
-.byte 15,56,200,215
- pxor %xmm4,%xmm6
-.byte 15,56,201,252
-.byte 15,56,202,245
-
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,0
-.byte 15,56,200,204
- pxor %xmm5,%xmm7
-.byte 15,56,202,254
-.byte 15,56,201,229
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,0
-.byte 15,56,200,213
- pxor %xmm6,%xmm4
-.byte 15,56,201,238
-.byte 15,56,202,231
-
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,1
-.byte 15,56,200,206
- pxor %xmm7,%xmm5
-.byte 15,56,202,236
-.byte 15,56,201,247
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,1
-.byte 15,56,200,215
- pxor %xmm4,%xmm6
-.byte 15,56,201,252
-.byte 15,56,202,245
-
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,1
-.byte 15,56,200,204
- pxor %xmm5,%xmm7
-.byte 15,56,202,254
-.byte 15,56,201,229
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,1
-.byte 15,56,200,213
- pxor %xmm6,%xmm4
-.byte 15,56,201,238
-.byte 15,56,202,231
-
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,1
-.byte 15,56,200,206
- pxor %xmm7,%xmm5
-.byte 15,56,202,236
-.byte 15,56,201,247
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,2
-.byte 15,56,200,215
- pxor %xmm4,%xmm6
-.byte 15,56,201,252
-.byte 15,56,202,245
-
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,2
-.byte 15,56,200,204
- pxor %xmm5,%xmm7
-.byte 15,56,202,254
-.byte 15,56,201,229
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,2
-.byte 15,56,200,213
- pxor %xmm6,%xmm4
-.byte 15,56,201,238
-.byte 15,56,202,231
-
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,2
-.byte 15,56,200,206
- pxor %xmm7,%xmm5
-.byte 15,56,202,236
-.byte 15,56,201,247
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,2
-.byte 15,56,200,215
- pxor %xmm4,%xmm6
-.byte 15,56,201,252
-.byte 15,56,202,245
-
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,3
-.byte 15,56,200,204
- pxor %xmm5,%xmm7
-.byte 15,56,202,254
- movdqu (%rsi),%xmm4
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,3
-.byte 15,56,200,213
- movdqu 16(%rsi),%xmm5
-.byte 102,15,56,0,227
-
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,3
-.byte 15,56,200,206
- movdqu 32(%rsi),%xmm6
-.byte 102,15,56,0,235
-
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,3
-.byte 15,56,200,215
- movdqu 48(%rsi),%xmm7
-.byte 102,15,56,0,243
-
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,3
-.byte 65,15,56,200,201
-.byte 102,15,56,0,251
-
- paddd %xmm8,%xmm0
- movdqa %xmm1,%xmm9
-
- jnz .Loop_shaext
-
- pshufd $27,%xmm0,%xmm0
- pshufd $27,%xmm1,%xmm1
- movdqu %xmm0,(%rdi)
- movd %xmm1,16(%rdi)
- .byte 0xf3,0xc3
-.size sha1_block_data_order_shaext,.-sha1_block_data_order_shaext
.type sha1_block_data_order_ssse3,@function
.align 16
sha1_block_data_order_ssse3:
@@ -2625,7 +2456,6 @@ K_XX_XX:
.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
-.byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0
.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
diff --git a/lib/accelerated/x86/elf/sha512-ssse3-x86_64.s b/lib/accelerated/x86/elf/sha512-ssse3-x86_64.s
index e1f92d4a78..f85e0bb6d8 100644
--- a/lib/accelerated/x86/elf/sha512-ssse3-x86_64.s
+++ b/lib/accelerated/x86/elf/sha512-ssse3-x86_64.s
@@ -48,8 +48,6 @@ sha256_block_data_order:
movl 0(%r11),%r9d
movl 4(%r11),%r10d
movl 8(%r11),%r11d
- testl $536870912,%r11d
- jnz _shaext_shortcut
testl $512,%r10d
jnz .Lssse3_shortcut
pushq %rbx
@@ -1784,213 +1782,6 @@ K256:
.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908
.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908
.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.type sha256_block_data_order_shaext,@function
-.align 64
-sha256_block_data_order_shaext:
-_shaext_shortcut:
- leaq K256+128(%rip),%rcx
- movdqu (%rdi),%xmm1
- movdqu 16(%rdi),%xmm2
- movdqa 512-128(%rcx),%xmm7
-
- pshufd $27,%xmm1,%xmm0
- pshufd $177,%xmm1,%xmm1
- pshufd $27,%xmm2,%xmm2
- movdqa %xmm7,%xmm8
-.byte 102,15,58,15,202,8
- punpcklqdq %xmm0,%xmm2
- jmp .Loop_shaext
-
-.align 16
-.Loop_shaext:
- movdqu (%rsi),%xmm3
- movdqu 16(%rsi),%xmm4
- movdqu 32(%rsi),%xmm5
-.byte 102,15,56,0,223
- movdqu 48(%rsi),%xmm6
-
- movdqa 0-128(%rcx),%xmm0
- paddd %xmm3,%xmm0
-.byte 102,15,56,0,231
- movdqa %xmm2,%xmm10
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- nop
- movdqa %xmm1,%xmm9
-.byte 15,56,203,202
-
- movdqa 32-128(%rcx),%xmm0
- paddd %xmm4,%xmm0
-.byte 102,15,56,0,239
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- leaq 64(%rsi),%rsi
-.byte 15,56,204,220
-.byte 15,56,203,202
-
- movdqa 64-128(%rcx),%xmm0
- paddd %xmm5,%xmm0
-.byte 102,15,56,0,247
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm6,%xmm7
-.byte 102,15,58,15,253,4
- nop
- paddd %xmm7,%xmm3
-.byte 15,56,204,229
-.byte 15,56,203,202
-
- movdqa 96-128(%rcx),%xmm0
- paddd %xmm6,%xmm0
-.byte 15,56,205,222
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm3,%xmm7
-.byte 102,15,58,15,254,4
- nop
- paddd %xmm7,%xmm4
-.byte 15,56,204,238
-.byte 15,56,203,202
- movdqa 128-128(%rcx),%xmm0
- paddd %xmm3,%xmm0
-.byte 15,56,205,227
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm4,%xmm7
-.byte 102,15,58,15,251,4
- nop
- paddd %xmm7,%xmm5
-.byte 15,56,204,243
-.byte 15,56,203,202
- movdqa 160-128(%rcx),%xmm0
- paddd %xmm4,%xmm0
-.byte 15,56,205,236
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm5,%xmm7
-.byte 102,15,58,15,252,4
- nop
- paddd %xmm7,%xmm6
-.byte 15,56,204,220
-.byte 15,56,203,202
- movdqa 192-128(%rcx),%xmm0
- paddd %xmm5,%xmm0
-.byte 15,56,205,245
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm6,%xmm7
-.byte 102,15,58,15,253,4
- nop
- paddd %xmm7,%xmm3
-.byte 15,56,204,229
-.byte 15,56,203,202
- movdqa 224-128(%rcx),%xmm0
- paddd %xmm6,%xmm0
-.byte 15,56,205,222
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm3,%xmm7
-.byte 102,15,58,15,254,4
- nop
- paddd %xmm7,%xmm4
-.byte 15,56,204,238
-.byte 15,56,203,202
- movdqa 256-128(%rcx),%xmm0
- paddd %xmm3,%xmm0
-.byte 15,56,205,227
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm4,%xmm7
-.byte 102,15,58,15,251,4
- nop
- paddd %xmm7,%xmm5
-.byte 15,56,204,243
-.byte 15,56,203,202
- movdqa 288-128(%rcx),%xmm0
- paddd %xmm4,%xmm0
-.byte 15,56,205,236
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm5,%xmm7
-.byte 102,15,58,15,252,4
- nop
- paddd %xmm7,%xmm6
-.byte 15,56,204,220
-.byte 15,56,203,202
- movdqa 320-128(%rcx),%xmm0
- paddd %xmm5,%xmm0
-.byte 15,56,205,245
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm6,%xmm7
-.byte 102,15,58,15,253,4
- nop
- paddd %xmm7,%xmm3
-.byte 15,56,204,229
-.byte 15,56,203,202
- movdqa 352-128(%rcx),%xmm0
- paddd %xmm6,%xmm0
-.byte 15,56,205,222
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm3,%xmm7
-.byte 102,15,58,15,254,4
- nop
- paddd %xmm7,%xmm4
-.byte 15,56,204,238
-.byte 15,56,203,202
- movdqa 384-128(%rcx),%xmm0
- paddd %xmm3,%xmm0
-.byte 15,56,205,227
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm4,%xmm7
-.byte 102,15,58,15,251,4
- nop
- paddd %xmm7,%xmm5
-.byte 15,56,204,243
-.byte 15,56,203,202
- movdqa 416-128(%rcx),%xmm0
- paddd %xmm4,%xmm0
-.byte 15,56,205,236
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm5,%xmm7
-.byte 102,15,58,15,252,4
-.byte 15,56,203,202
- paddd %xmm7,%xmm6
-
- movdqa 448-128(%rcx),%xmm0
- paddd %xmm5,%xmm0
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
-.byte 15,56,205,245
- movdqa %xmm8,%xmm7
-.byte 15,56,203,202
-
- movdqa 480-128(%rcx),%xmm0
- paddd %xmm6,%xmm0
- nop
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- decq %rdx
- nop
-.byte 15,56,203,202
-
- paddd %xmm10,%xmm2
- paddd %xmm9,%xmm1
- jnz .Loop_shaext
-
- pshufd $177,%xmm2,%xmm2
- pshufd $27,%xmm1,%xmm7
- pshufd $177,%xmm1,%xmm1
- punpckhqdq %xmm2,%xmm1
-.byte 102,15,58,15,215,8
-
- movdqu %xmm1,(%rdi)
- movdqu %xmm2,16(%rdi)
- .byte 0xf3,0xc3
-.size sha256_block_data_order_shaext,.-sha256_block_data_order_shaext
.type sha256_block_data_order_ssse3,@function
.align 64
sha256_block_data_order_ssse3:
@@ -2029,13 +1820,13 @@ sha256_block_data_order_ssse3:
movdqu 0(%rsi),%xmm0
movdqu 16(%rsi),%xmm1
movdqu 32(%rsi),%xmm2
-.byte 102,15,56,0,199
movdqu 48(%rsi),%xmm3
+.byte 102,15,56,0,199
leaq K256(%rip),%rbp
.byte 102,15,56,0,207
movdqa 0(%rbp),%xmm4
- movdqa 32(%rbp),%xmm5
.byte 102,15,56,0,215
+ movdqa 32(%rbp),%xmm5
paddd %xmm0,%xmm4
movdqa 64(%rbp),%xmm6
.byte 102,15,56,0,223
diff --git a/lib/accelerated/x86/macosx/ghash-x86_64.s b/lib/accelerated/x86/macosx/ghash-x86_64.s
index 9e246794f0..a63034a6fc 100644
--- a/lib/accelerated/x86/macosx/ghash-x86_64.s
+++ b/lib/accelerated/x86/macosx/ghash-x86_64.s
@@ -949,8 +949,8 @@ L$_ghash_clmul:
pxor %xmm11,%xmm12
.byte 102,68,15,58,68,222,0
.byte 102,68,15,58,68,238,17
-.byte 102,68,15,58,68,231,16
xorps %xmm11,%xmm3
+.byte 102,68,15,58,68,231,16
xorps %xmm13,%xmm5
movups 80(%rsi),%xmm7
xorps %xmm12,%xmm4
@@ -968,8 +968,8 @@ L$_ghash_clmul:
pshufd $78,%xmm0,%xmm8
pxor %xmm0,%xmm8
.byte 102,69,15,58,68,238,17
-.byte 102,68,15,58,68,231,0
xorps %xmm11,%xmm3
+.byte 102,68,15,58,68,231,0
xorps %xmm13,%xmm5
leaq 64(%rdx),%rdx
@@ -987,23 +987,23 @@ L$mod4_loop:
xorps %xmm3,%xmm0
movdqu 32(%rdx),%xmm3
movdqa %xmm11,%xmm13
-.byte 102,68,15,58,68,199,16
pshufd $78,%xmm11,%xmm12
+.byte 102,68,15,58,68,199,16
xorps %xmm5,%xmm1
pxor %xmm11,%xmm12
.byte 102,65,15,56,0,218
movups 32(%rsi),%xmm7
- xorps %xmm4,%xmm8
.byte 102,68,15,58,68,218,0
+ xorps %xmm4,%xmm8
+ movdqa %xmm3,%xmm5
pshufd $78,%xmm3,%xmm4
pxor %xmm0,%xmm8
- movdqa %xmm3,%xmm5
- pxor %xmm1,%xmm8
pxor %xmm3,%xmm4
+ pxor %xmm1,%xmm8
movdqa %xmm8,%xmm9
-.byte 102,68,15,58,68,234,17
pslldq $8,%xmm8
+.byte 102,68,15,58,68,234,17
psrldq $8,%xmm9
pxor %xmm8,%xmm0
movdqa L$7_mask(%rip),%xmm8
@@ -1012,8 +1012,8 @@ L$mod4_loop:
pand %xmm0,%xmm8
.byte 102,69,15,56,0,200
- pxor %xmm0,%xmm9
.byte 102,68,15,58,68,231,0
+ pxor %xmm0,%xmm9
psllq $57,%xmm9
movdqa %xmm9,%xmm8
pslldq $8,%xmm9
@@ -1040,31 +1040,32 @@ L$mod4_loop:
movdqa %xmm11,%xmm13
pxor %xmm12,%xmm4
pshufd $78,%xmm11,%xmm12
- pxor %xmm9,%xmm0
- pxor %xmm8,%xmm1
pxor %xmm11,%xmm12
.byte 102,69,15,58,68,222,0
+ pxor %xmm9,%xmm0
+ pxor %xmm8,%xmm1
psrlq $1,%xmm0
- pxor %xmm1,%xmm0
- movdqa %xmm0,%xmm1
.byte 102,69,15,58,68,238,17
xorps %xmm11,%xmm3
- pshufd $78,%xmm0,%xmm8
- pxor %xmm0,%xmm8
+ pxor %xmm1,%xmm0
.byte 102,68,15,58,68,231,0
xorps %xmm13,%xmm5
+ movdqa %xmm0,%xmm1
+ pshufd $78,%xmm0,%xmm8
+ pxor %xmm0,%xmm8
+
leaq 64(%rdx),%rdx
subq $64,%rcx
jnc L$mod4_loop
L$tail4x:
.byte 102,65,15,58,68,199,0
-.byte 102,65,15,58,68,207,17
-.byte 102,68,15,58,68,199,16
xorps %xmm12,%xmm4
+.byte 102,65,15,58,68,207,17
xorps %xmm3,%xmm0
+.byte 102,68,15,58,68,199,16
xorps %xmm5,%xmm1
pxor %xmm0,%xmm1
pxor %xmm4,%xmm8
@@ -1144,13 +1145,13 @@ L$mod_loop:
pxor %xmm3,%xmm0
pxor %xmm5,%xmm1
- movdqu (%rdx),%xmm9
+ movdqu (%rdx),%xmm5
pxor %xmm0,%xmm8
-.byte 102,69,15,56,0,202
+.byte 102,65,15,56,0,234
movdqu 16(%rdx),%xmm3
pxor %xmm1,%xmm8
- pxor %xmm9,%xmm1
+ pxor %xmm5,%xmm1
pxor %xmm8,%xmm4
.byte 102,65,15,56,0,218
movdqa %xmm4,%xmm8
@@ -1177,9 +1178,9 @@ L$mod_loop:
pxor %xmm8,%xmm1
pxor %xmm5,%xmm4
+.byte 102,15,58,68,234,17
movdqa %xmm0,%xmm9
psrlq $1,%xmm0
-.byte 102,15,58,68,234,17
pxor %xmm9,%xmm1
pxor %xmm0,%xmm9
psrlq $5,%xmm0
@@ -1188,6 +1189,7 @@ L$mod_loop:
psrlq $1,%xmm0
.byte 102,15,58,68,231,0
pxor %xmm1,%xmm0
+.byte 0x66,0x90
subq $32,%rcx
ja L$mod_loop
diff --git a/lib/accelerated/x86/macosx/sha1-ssse3-x86_64.s b/lib/accelerated/x86/macosx/sha1-ssse3-x86_64.s
index 6e63270971..9091bd802e 100644
--- a/lib/accelerated/x86/macosx/sha1-ssse3-x86_64.s
+++ b/lib/accelerated/x86/macosx/sha1-ssse3-x86_64.s
@@ -49,8 +49,6 @@ _sha1_block_data_order:
movl __gnutls_x86_cpuid_s+8(%rip),%r10d
testl $512,%r8d
jz L$ialu
- testl $536870912,%r10d
- jnz _shaext_shortcut
jmp _ssse3_shortcut
.p2align 4
@@ -1271,173 +1269,6 @@ L$epilogue:
.byte 0xf3,0xc3
-.p2align 5
-sha1_block_data_order_shaext:
-_shaext_shortcut:
- movdqu (%rdi),%xmm0
- movd 16(%rdi),%xmm1
- movdqa K_XX_XX+160(%rip),%xmm3
-
- movdqu (%rsi),%xmm4
- pshufd $27,%xmm0,%xmm0
- movdqu 16(%rsi),%xmm5
- pshufd $27,%xmm1,%xmm1
- movdqu 32(%rsi),%xmm6
-.byte 102,15,56,0,227
- movdqu 48(%rsi),%xmm7
-.byte 102,15,56,0,235
-.byte 102,15,56,0,243
- movdqa %xmm1,%xmm9
-.byte 102,15,56,0,251
- jmp L$oop_shaext
-
-.p2align 4
-L$oop_shaext:
- decq %rdx
- leaq 64(%rsi),%rax
- paddd %xmm4,%xmm1
- cmovneq %rax,%rsi
- movdqa %xmm0,%xmm8
-.byte 15,56,201,229
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,0
-.byte 15,56,200,213
- pxor %xmm6,%xmm4
-.byte 15,56,201,238
-.byte 15,56,202,231
-
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,0
-.byte 15,56,200,206
- pxor %xmm7,%xmm5
-.byte 15,56,202,236
-.byte 15,56,201,247
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,0
-.byte 15,56,200,215
- pxor %xmm4,%xmm6
-.byte 15,56,201,252
-.byte 15,56,202,245
-
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,0
-.byte 15,56,200,204
- pxor %xmm5,%xmm7
-.byte 15,56,202,254
-.byte 15,56,201,229
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,0
-.byte 15,56,200,213
- pxor %xmm6,%xmm4
-.byte 15,56,201,238
-.byte 15,56,202,231
-
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,1
-.byte 15,56,200,206
- pxor %xmm7,%xmm5
-.byte 15,56,202,236
-.byte 15,56,201,247
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,1
-.byte 15,56,200,215
- pxor %xmm4,%xmm6
-.byte 15,56,201,252
-.byte 15,56,202,245
-
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,1
-.byte 15,56,200,204
- pxor %xmm5,%xmm7
-.byte 15,56,202,254
-.byte 15,56,201,229
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,1
-.byte 15,56,200,213
- pxor %xmm6,%xmm4
-.byte 15,56,201,238
-.byte 15,56,202,231
-
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,1
-.byte 15,56,200,206
- pxor %xmm7,%xmm5
-.byte 15,56,202,236
-.byte 15,56,201,247
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,2
-.byte 15,56,200,215
- pxor %xmm4,%xmm6
-.byte 15,56,201,252
-.byte 15,56,202,245
-
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,2
-.byte 15,56,200,204
- pxor %xmm5,%xmm7
-.byte 15,56,202,254
-.byte 15,56,201,229
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,2
-.byte 15,56,200,213
- pxor %xmm6,%xmm4
-.byte 15,56,201,238
-.byte 15,56,202,231
-
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,2
-.byte 15,56,200,206
- pxor %xmm7,%xmm5
-.byte 15,56,202,236
-.byte 15,56,201,247
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,2
-.byte 15,56,200,215
- pxor %xmm4,%xmm6
-.byte 15,56,201,252
-.byte 15,56,202,245
-
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,3
-.byte 15,56,200,204
- pxor %xmm5,%xmm7
-.byte 15,56,202,254
- movdqu (%rsi),%xmm4
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,3
-.byte 15,56,200,213
- movdqu 16(%rsi),%xmm5
-.byte 102,15,56,0,227
-
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,3
-.byte 15,56,200,206
- movdqu 32(%rsi),%xmm6
-.byte 102,15,56,0,235
-
- movdqa %xmm0,%xmm2
-.byte 15,58,204,193,3
-.byte 15,56,200,215
- movdqu 48(%rsi),%xmm7
-.byte 102,15,56,0,243
-
- movdqa %xmm0,%xmm1
-.byte 15,58,204,194,3
-.byte 65,15,56,200,201
-.byte 102,15,56,0,251
-
- paddd %xmm8,%xmm0
- movdqa %xmm1,%xmm9
-
- jnz L$oop_shaext
-
- pshufd $27,%xmm0,%xmm0
- pshufd $27,%xmm1,%xmm1
- movdqu %xmm0,(%rdi)
- movd %xmm1,16(%rdi)
- .byte 0xf3,0xc3
-
-
.p2align 4
sha1_block_data_order_ssse3:
_ssse3_shortcut:
@@ -2625,7 +2456,6 @@ K_XX_XX:
.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
-.byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0
.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.p2align 6
diff --git a/lib/accelerated/x86/macosx/sha512-ssse3-x86_64.s b/lib/accelerated/x86/macosx/sha512-ssse3-x86_64.s
index 94ea73c417..c48240f457 100644
--- a/lib/accelerated/x86/macosx/sha512-ssse3-x86_64.s
+++ b/lib/accelerated/x86/macosx/sha512-ssse3-x86_64.s
@@ -48,8 +48,6 @@ _sha256_block_data_order:
movl 0(%r11),%r9d
movl 4(%r11),%r10d
movl 8(%r11),%r11d
- testl $536870912,%r11d
- jnz _shaext_shortcut
testl $512,%r10d
jnz L$ssse3_shortcut
pushq %rbx
@@ -1786,213 +1784,6 @@ K256:
.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.p2align 6
-sha256_block_data_order_shaext:
-_shaext_shortcut:
- leaq K256+128(%rip),%rcx
- movdqu (%rdi),%xmm1
- movdqu 16(%rdi),%xmm2
- movdqa 512-128(%rcx),%xmm7
-
- pshufd $27,%xmm1,%xmm0
- pshufd $177,%xmm1,%xmm1
- pshufd $27,%xmm2,%xmm2
- movdqa %xmm7,%xmm8
-.byte 102,15,58,15,202,8
- punpcklqdq %xmm0,%xmm2
- jmp L$oop_shaext
-
-.p2align 4
-L$oop_shaext:
- movdqu (%rsi),%xmm3
- movdqu 16(%rsi),%xmm4
- movdqu 32(%rsi),%xmm5
-.byte 102,15,56,0,223
- movdqu 48(%rsi),%xmm6
-
- movdqa 0-128(%rcx),%xmm0
- paddd %xmm3,%xmm0
-.byte 102,15,56,0,231
- movdqa %xmm2,%xmm10
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- nop
- movdqa %xmm1,%xmm9
-.byte 15,56,203,202
-
- movdqa 32-128(%rcx),%xmm0
- paddd %xmm4,%xmm0
-.byte 102,15,56,0,239
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- leaq 64(%rsi),%rsi
-.byte 15,56,204,220
-.byte 15,56,203,202
-
- movdqa 64-128(%rcx),%xmm0
- paddd %xmm5,%xmm0
-.byte 102,15,56,0,247
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm6,%xmm7
-.byte 102,15,58,15,253,4
- nop
- paddd %xmm7,%xmm3
-.byte 15,56,204,229
-.byte 15,56,203,202
-
- movdqa 96-128(%rcx),%xmm0
- paddd %xmm6,%xmm0
-.byte 15,56,205,222
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm3,%xmm7
-.byte 102,15,58,15,254,4
- nop
- paddd %xmm7,%xmm4
-.byte 15,56,204,238
-.byte 15,56,203,202
- movdqa 128-128(%rcx),%xmm0
- paddd %xmm3,%xmm0
-.byte 15,56,205,227
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm4,%xmm7
-.byte 102,15,58,15,251,4
- nop
- paddd %xmm7,%xmm5
-.byte 15,56,204,243
-.byte 15,56,203,202
- movdqa 160-128(%rcx),%xmm0
- paddd %xmm4,%xmm0
-.byte 15,56,205,236
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm5,%xmm7
-.byte 102,15,58,15,252,4
- nop
- paddd %xmm7,%xmm6
-.byte 15,56,204,220
-.byte 15,56,203,202
- movdqa 192-128(%rcx),%xmm0
- paddd %xmm5,%xmm0
-.byte 15,56,205,245
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm6,%xmm7
-.byte 102,15,58,15,253,4
- nop
- paddd %xmm7,%xmm3
-.byte 15,56,204,229
-.byte 15,56,203,202
- movdqa 224-128(%rcx),%xmm0
- paddd %xmm6,%xmm0
-.byte 15,56,205,222
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm3,%xmm7
-.byte 102,15,58,15,254,4
- nop
- paddd %xmm7,%xmm4
-.byte 15,56,204,238
-.byte 15,56,203,202
- movdqa 256-128(%rcx),%xmm0
- paddd %xmm3,%xmm0
-.byte 15,56,205,227
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm4,%xmm7
-.byte 102,15,58,15,251,4
- nop
- paddd %xmm7,%xmm5
-.byte 15,56,204,243
-.byte 15,56,203,202
- movdqa 288-128(%rcx),%xmm0
- paddd %xmm4,%xmm0
-.byte 15,56,205,236
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm5,%xmm7
-.byte 102,15,58,15,252,4
- nop
- paddd %xmm7,%xmm6
-.byte 15,56,204,220
-.byte 15,56,203,202
- movdqa 320-128(%rcx),%xmm0
- paddd %xmm5,%xmm0
-.byte 15,56,205,245
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm6,%xmm7
-.byte 102,15,58,15,253,4
- nop
- paddd %xmm7,%xmm3
-.byte 15,56,204,229
-.byte 15,56,203,202
- movdqa 352-128(%rcx),%xmm0
- paddd %xmm6,%xmm0
-.byte 15,56,205,222
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm3,%xmm7
-.byte 102,15,58,15,254,4
- nop
- paddd %xmm7,%xmm4
-.byte 15,56,204,238
-.byte 15,56,203,202
- movdqa 384-128(%rcx),%xmm0
- paddd %xmm3,%xmm0
-.byte 15,56,205,227
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm4,%xmm7
-.byte 102,15,58,15,251,4
- nop
- paddd %xmm7,%xmm5
-.byte 15,56,204,243
-.byte 15,56,203,202
- movdqa 416-128(%rcx),%xmm0
- paddd %xmm4,%xmm0
-.byte 15,56,205,236
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- movdqa %xmm5,%xmm7
-.byte 102,15,58,15,252,4
-.byte 15,56,203,202
- paddd %xmm7,%xmm6
-
- movdqa 448-128(%rcx),%xmm0
- paddd %xmm5,%xmm0
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
-.byte 15,56,205,245
- movdqa %xmm8,%xmm7
-.byte 15,56,203,202
-
- movdqa 480-128(%rcx),%xmm0
- paddd %xmm6,%xmm0
- nop
-.byte 15,56,203,209
- pshufd $14,%xmm0,%xmm0
- decq %rdx
- nop
-.byte 15,56,203,202
-
- paddd %xmm10,%xmm2
- paddd %xmm9,%xmm1
- jnz L$oop_shaext
-
- pshufd $177,%xmm2,%xmm2
- pshufd $27,%xmm1,%xmm7
- pshufd $177,%xmm1,%xmm1
- punpckhqdq %xmm2,%xmm1
-.byte 102,15,58,15,215,8
-
- movdqu %xmm1,(%rdi)
- movdqu %xmm2,16(%rdi)
- .byte 0xf3,0xc3
-
-
-.p2align 6
sha256_block_data_order_ssse3:
L$ssse3_shortcut:
pushq %rbx
@@ -2029,13 +1820,13 @@ L$loop_ssse3:
movdqu 0(%rsi),%xmm0
movdqu 16(%rsi),%xmm1
movdqu 32(%rsi),%xmm2
-.byte 102,15,56,0,199
movdqu 48(%rsi),%xmm3
+.byte 102,15,56,0,199
leaq K256(%rip),%rbp
.byte 102,15,56,0,207
movdqa 0(%rbp),%xmm4
- movdqa 32(%rbp),%xmm5
.byte 102,15,56,0,215
+ movdqa 32(%rbp),%xmm5
paddd %xmm0,%xmm4
movdqa 64(%rbp),%xmm6
.byte 102,15,56,0,223