summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2023-05-16 21:11:04 +0200
committerNiels Möller <nisse@lysator.liu.se>2023-05-16 21:11:04 +0200
commit58fd657743fe29f3c4dacd50a402fb598f204399 (patch)
treebe94b3d82ff9b52870279caa155448b7ddc9c558
parent966da449232766ad41b9be4f263fcccd4500bd22 (diff)
downloadnettle-fix-x86_64-ghash-alignment.tar.gz
x86_64: Fix alignment bug in _nettle_ghash_update.fix-x86_64-ghash-alignment
-rw-r--r--ChangeLog7
-rw-r--r--x86_64/ghash-update.asm22
2 files changed, 23 insertions, 6 deletions
diff --git a/ChangeLog b/ChangeLog
index 507e0438..6312ac00 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2023-05-16 Niels Möller <nisse@lysator.liu.se>
+
+ * x86_64/ghash-update.asm: Use separate unaligned load
+ instructions (movups) to load the tabulated values, since they are
+ only 8-byte aligned and pand memory operands require 16-byte
+ alignment.
+
2023-05-15 Niels Möller <nisse@lysator.liu.se>
* eccdata.c (output_bignum_redc): Add missing mpz_clear, reported
diff --git a/x86_64/ghash-update.asm b/x86_64/ghash-update.asm
index b3417e45..0a72f071 100644
--- a/x86_64/ghash-update.asm
+++ b/x86_64/ghash-update.asm
@@ -44,6 +44,8 @@ define(`M0', `%xmm2')
define(`M1', `%xmm3')
define(`M2', `%xmm4')
define(`M3', `%xmm5')
+define(`T0', `%xmm6')
+define(`T1', `%xmm7')
.file "ghash-update.asm"
@@ -54,7 +56,7 @@ define(`M3', `%xmm5')
.text
ALIGN(16)
PROLOGUE(_nettle_ghash_update)
- W64_ENTRY(4, 6)
+ W64_ENTRY(4, 8)
sub $1, BLOCKS
movups (XP), X
jc .Ldone
@@ -77,10 +79,18 @@ ALIGN(16)
pshufd $0xaa, M3, M2
pshufd $0xff, M3, M3
pslld $1, X
- pand (KEY, CNT), M0
- pand (KEY32, CNT), M1
- pand 16(KEY, CNT), M2
- pand 16(KEY32, CNT), M3
+ C Tabulated values are only 8-byte aligned, and therefore
+ C can't be used as memory operands to pand. It would be nice
+ C if we could force 16-byte alignment on nettle_block16, using
+ C C11 alignas.
+ movups (KEY, CNT), T0
+ movups (KEY32, CNT), T1
+ pand T0, M0
+ pand T1, M1
+ movups 16(KEY, CNT), T0
+ movups 16(KEY32, CNT), T1
+ pand T0, M2
+ pand T1, M3
pxor M0, M1
pxor M2, M3
pxor M1, R
@@ -98,6 +108,6 @@ ALIGN(16)
.Ldone:
movups X, (XP)
mov SRC, %rax
- W64_EXIT(4, 6)
+ W64_EXIT(4, 8)
ret
EPILOGUE(_nettle_ghash_update)