From 58fd657743fe29f3c4dacd50a402fb598f204399 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niels=20M=C3=B6ller?= Date: Tue, 16 May 2023 21:11:04 +0200 Subject: x86_64: Fix alignment bug in _nettle_ghash_update. --- ChangeLog | 7 +++++++ x86_64/ghash-update.asm | 22 ++++++++++++++++------ 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/ChangeLog b/ChangeLog index 507e0438..6312ac00 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2023-05-16 Niels Möller + + * x86_64/ghash-update.asm: Use separate unaligned load + instructions (movups) to load the tabulated values, since they are + only 8-byte aligned and pand memory operands require 16-byte + alignment. + 2023-05-15 Niels Möller * eccdata.c (output_bignum_redc): Add missing mpz_clear, reported diff --git a/x86_64/ghash-update.asm b/x86_64/ghash-update.asm index b3417e45..0a72f071 100644 --- a/x86_64/ghash-update.asm +++ b/x86_64/ghash-update.asm @@ -44,6 +44,8 @@ define(`M0', `%xmm2') define(`M1', `%xmm3') define(`M2', `%xmm4') define(`M3', `%xmm5') +define(`T0', `%xmm6') +define(`T1', `%xmm7') .file "ghash-update.asm" @@ -54,7 +56,7 @@ define(`M3', `%xmm5') .text ALIGN(16) PROLOGUE(_nettle_ghash_update) - W64_ENTRY(4, 6) + W64_ENTRY(4, 8) sub $1, BLOCKS movups (XP), X jc .Ldone @@ -77,10 +79,18 @@ ALIGN(16) pshufd $0xaa, M3, M2 pshufd $0xff, M3, M3 pslld $1, X - pand (KEY, CNT), M0 - pand (KEY32, CNT), M1 - pand 16(KEY, CNT), M2 - pand 16(KEY32, CNT), M3 + C Tabulated values are only 8-byte aligned, and therefore + C can't be used as memory operands to pand. It would be nice + C if we could force 16-byte alignment on nettle_block16, using + C C11 alignas. + movups (KEY, CNT), T0 + movups (KEY32, CNT), T1 + pand T0, M0 + pand T1, M1 + movups 16(KEY, CNT), T0 + movups 16(KEY32, CNT), T1 + pand T0, M2 + pand T1, M3 pxor M0, M1 pxor M2, M3 pxor M1, R @@ -98,6 +108,6 @@ ALIGN(16) .Ldone: movups X, (XP) mov SRC, %rax - W64_EXIT(4, 6) + W64_EXIT(4, 8) ret EPILOGUE(_nettle_ghash_update) -- cgit v1.2.1