summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2022-08-15 09:27:36 +0200
committerNiels Möller <nisse@lysator.liu.se>2022-08-15 09:27:36 +0200
commit7328fb0df069c9ce42b1a58f6788be6ea3fc2419 (patch)
treec00c59d414c9161e49faea43ec4a0b21132b718b
parentd618864183ccfdcd0d1b5443111fbaf9a5934517 (diff)
parent6a384afa5f5b498384d629c3257b6c7f62f459eb (diff)
downloadnettle-7328fb0df069c9ce42b1a58f6788be6ea3fc2419.tar.gz
Merge branch 'sha256-compress-n' into master-updates
-rw-r--r--ChangeLog42
-rw-r--r--Makefile.in4
-rw-r--r--arm/fat/sha256-compress-n-2.asm (renamed from arm/fat/sha256-compress-2.asm)6
-rw-r--r--arm/v6/sha256-compress-n.asm (renamed from arm/v6/sha256-compress.asm)110
-rw-r--r--arm64/crypto/sha256-compress-n.asm (renamed from arm64/crypto/sha256-compress.asm)31
-rw-r--r--arm64/fat/sha256-compress-n-2.asm (renamed from arm64/fat/sha256-compress-2.asm)6
-rw-r--r--configure.ac6
-rw-r--r--fat-arm.c17
-rw-r--r--fat-arm64.c17
-rw-r--r--fat-s390x.c17
-rw-r--r--fat-setup.h4
-rw-r--r--fat-x86_64.c17
-rw-r--r--md-internal.h57
-rw-r--r--s390x/fat/sha256-compress-n-2.asm (renamed from s390x/fat/sha256-compress-2.asm)6
-rw-r--r--s390x/msa_x1/sha256-compress-n.asm (renamed from s390x/msa_x1/sha256-compress.asm)24
-rw-r--r--sha2-internal.h5
-rw-r--r--sha256-compress-n.c (renamed from sha256-compress.c)132
-rw-r--r--sha256.c30
-rw-r--r--x86_64/fat/sha256-compress-n-2.asm (renamed from x86_64/fat/sha256-compress-2.asm)4
-rw-r--r--x86_64/fat/sha256-compress-n.asm (renamed from x86_64/fat/sha256-compress.asm)4
-rw-r--r--x86_64/sha256-compress-n.asm (renamed from x86_64/sha256-compress.asm)85
-rw-r--r--x86_64/sha_ni/sha256-compress-n.asm (renamed from x86_64/sha_ni/sha256-compress.asm)42
22 files changed, 440 insertions, 226 deletions
diff --git a/ChangeLog b/ChangeLog
index b545f7ff..d3a962d8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -22,6 +22,48 @@
* aclocal.m4 (LSH_CCPIC): Use proper PIC flag for *BSD OS's.
* blowfish-bcrypt.c (swap32): Eliminate conflict with OpenBSD's swap32 macro.
+2022-07-29 Niels Möller <nisse@lysator.liu.se>
+
+ * s390x/msa_x1/sha256-compress-n.asm: New file. replacing...
+ * s390x/msa_x1/sha256-compress.asm: ...deleted file.
+ * s390x/fat/sha256-compress-n-2.asm: New file. replacing...
+ * s390x/fat/sha256-compress-2.asm: ...deleted file.
+ * fat-s390x.c: Update fat setup.
+
+2022-07-26 Niels Möller <nisse@lysator.liu.se>
+
+ * arm/v6/sha256-compress-n.asm: New file. replacing...
+ * arm/v6/sha256-compress.asm: ...deleted file.
+ * arm/fat/sha256-compress-n-2.asm: New file. replacing...
+ * arm/fat/sha256-compress-2.asm: ...deleted file.
+ * fat-arm.c: Update fat setup.
+
+2022-07-11 Niels Möller <nisse@lysator.liu.se>
+
+ * arm64/crypto/sha256-compress-n.asm: New file. replacing...
+ * arm64/crypto/sha256-compress.asm: ...deleted file.
+ * arm64/fat/sha256-compress-n-2.asm: New file. replacing...
+ * arm64/fat/sha256-compress-2.asm: ...deleted file.
+ * fat-arm64.c: Update fat setup.
+
+2022-07-05 Niels Möller <nisse@lysator.liu.se>
+
+ * md-internal.h (MD_FILL_OR_RETURN): New file, new macro.
+ * sha256-compress-n.c (_nettle_sha256_compress_n): New file and
+ function, replacing...
+ * sha256-compress.c (_nettle_sha256_compress): ...deleted file and
+ function.
+ * sha2-internal.h (_nettle_sha256_compress_n): Declare new function..
+ * sha256.c (sha256_compress): Update to use
+ _nettle_sha256_compress_n and MD_FILL_OR_RETURN.
+ * x86_64/sha256-compress-n.asm: New file. replacing...
+ * x86_64/sha256-compress.asm: ...deleted file.
+ * x86_64/sha_ni/sha256-compress-n.asm: New file. replacing...
+ * x86_64/sha_ni/sha256-compress.asm: ...deleted file.
+ * fat-setup.h (sha256_compress_n_func): New typedef, replacing...
+ (sha256_compress_func): ... deleted typedef.
+ * fat-x86_64.c: Update fat setup.
+
2022-06-20 Niels Möller <nisse@lysator.liu.se>
* testsuite/sha1-test.c (test_sha1_compress): New function.
diff --git a/Makefile.in b/Makefile.in
index 11c88114..8a20ed6d 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -138,7 +138,7 @@ nettle_SOURCES = aes-decrypt-internal.c aes-decrypt.c aes-decrypt-table.c \
salsa20-set-nonce.c \
salsa20-128-set-key.c salsa20-256-set-key.c \
sha1.c sha1-compress.c sha1-meta.c \
- sha256.c sha256-compress.c sha224-meta.c sha256-meta.c \
+ sha256.c sha256-compress-n.c sha224-meta.c sha256-meta.c \
sha512.c sha512-compress.c sha384-meta.c sha512-meta.c \
sha512-224-meta.c sha512-256-meta.c \
sha3.c sha3-permute.c \
@@ -260,7 +260,7 @@ DISTFILES = $(SOURCES) $(HEADERS) getopt.h getopt_int.h \
aes-internal.h block-internal.h blowfish-internal.h camellia-internal.h \
ghash-internal.h gost28147-internal.h poly1305-internal.h \
serpent-internal.h cast128_sboxes.h desinfo.h desCode.h \
- ripemd160-internal.h sha2-internal.h \
+ ripemd160-internal.h md-internal.h sha2-internal.h \
memxor-internal.h nettle-internal.h nettle-write.h \
ctr-internal.h chacha-internal.h sha3-internal.h \
salsa20-internal.h umac-internal.h hogweed-internal.h \
diff --git a/arm/fat/sha256-compress-2.asm b/arm/fat/sha256-compress-n-2.asm
index 36d55e4b..8834d93d 100644
--- a/arm/fat/sha256-compress-2.asm
+++ b/arm/fat/sha256-compress-n-2.asm
@@ -1,4 +1,4 @@
-C arm/fat/sha256-compress-2.asm
+C arm/fat/sha256-compress-n-2.asm
ifelse(`
@@ -31,7 +31,7 @@ ifelse(`
not, see http://www.gnu.org/licenses/.
')
-dnl PROLOGUE(_nettle_sha256_compress) picked up by configure
+dnl PROLOGUE(_nettle_sha256_compress_n) picked up by configure
define(`fat_transform', `$1_armv6')
-include_src(`arm/v6/sha256-compress.asm')
+include_src(`arm/v6/sha256-compress-n.asm')
diff --git a/arm/v6/sha256-compress.asm b/arm/v6/sha256-compress-n.asm
index 3c021284..bf225bd8 100644
--- a/arm/v6/sha256-compress.asm
+++ b/arm/v6/sha256-compress-n.asm
@@ -1,7 +1,7 @@
-C arm/v6/sha256-compress.asm
+C arm/v6/sha256-compress-n.asm
ifelse(`
- Copyright (C) 2013 Niels Möller
+ Copyright (C) 2013, 2022 Niels Möller
This file is part of GNU Nettle.
@@ -30,13 +30,14 @@ ifelse(`
not, see http://www.gnu.org/licenses/.
')
- .file "sha256-compress.asm"
+ .file "sha256-compress-n.asm"
.arch armv6
define(`STATE', `r0')
-define(`INPUT', `r1')
-define(`K', `r2')
-define(`SA', `r3')
+define(`K', `r1')
+define(`BLOCKS', `r2')
+define(`INPUT', `r3')
+define(`SA', `r2') C Overlap BLOCKS
define(`SB', `r4')
define(`SC', `r5')
define(`SD', `r6')
@@ -45,12 +46,12 @@ define(`SF', `r8')
define(`SG', `r10')
define(`SH', `r11')
define(`T0', `r12')
-define(`T1', `r1') C Overlap INPUT
+define(`T1', `r3') C Overlap INPUT
define(`COUNT', `r0') C Overlap STATE
define(`W', `r14')
-C Used for data load
-define(`I0', `r3')
+C Used for data load. Must not clobber STATE (r0), K (r1) or INPUT (r3)
+define(`I0', `r2')
define(`I1', `r4')
define(`I2', `r5')
define(`I3', `r6')
@@ -88,7 +89,7 @@ C S1(E) = E<<<26 ^ E<<<21 ^ E<<<7
C S0(A) = A<<<30 ^ A<<<19 ^ A<<<10
C Choice (E, F, G) = G^(E&(F^G))
C Majority (A,B,C) = (A&B) + (C&(A^B))
-
+
define(`ROUND', `
ror T0, $5, #6
eor T0, T0, $5, ror #11
@@ -117,16 +118,31 @@ define(`NOEXPN', `
ldr W, [sp, + $1]
add $1, $1, #4
')
- C void
- C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k)
-
.text
.align 2
-PROLOGUE(_nettle_sha256_compress)
- push {r4,r5,r6,r7,r8,r10,r11,r14}
- sub sp, sp, #68
- str STATE, [sp, #+64]
+define(`SHIFT_OFFSET', 64)
+define(`INPUT_OFFSET', 68)
+define(`I0_OFFSET', 72)
+define(`STATE_OFFSET', 76)
+define(`K_OFFSET', 80)
+define(`BLOCKS_OFFSET', 84)
+
+ C const uint8_t *
+ C _nettle_sha256_compress_n(uint32_t *state, const uint32_t *k,
+ C size_t blocks, const uint8_t *input)
+
+PROLOGUE(_nettle_sha256_compress_n)
+ cmp BLOCKS, #0
+ bne .Lwork
+
+ mov r0, INPUT
+ bx lr
+
+.Lwork:
+ C Also save STATE (r0), K (r1) and BLOCKS (r2)
+ push {r0,r1,r2,r4,r5,r6,r7,r8,r10,r11,r12,r14}
+ sub sp, sp, #STATE_OFFSET
C Load data up front, since we don't have enough registers
C to load and shift on-the-fly
@@ -144,6 +160,9 @@ IF_BE(` lsr I1, T0, SHIFT')
C because there is no rotate left
IF_BE(` rsb SHIFT, SHIFT, #32')
+ str SHIFT, [sp, #SHIFT_OFFSET]
+
+.Loop_block:
mov DST, sp
mov ILEFT, #4
.Lcopy:
@@ -164,7 +183,12 @@ IF_LE(` rev I3, I3')
stm DST!, {I0,I1,I2,I3}
mov I0, I4
bne .Lcopy
-
+
+ str INPUT, [sp, #INPUT_OFFSET]
+ str I0, [sp, #I0_OFFSET]
+
+ C Process block, with input at sp, expanded on the fly
+
ldm STATE, {SA,SB,SC,SD,SE,SF,SG,SH}
mov COUNT,#0
@@ -203,20 +227,40 @@ IF_LE(` rev I3, I3')
EXPN(15) ROUND(SB,SC,SD,SE,SF,SG,SH,SA)
bne .Loop2
- ldr STATE, [sp, #+64]
+ ldr STATE, [sp, #STATE_OFFSET]
C No longer needed registers
- ldm STATE, {r1,r2,r12,r14}
- add SA, SA, r1
- add SB, SB, r2
- add SC, SC, r12
- add SD, SD, r14
+ ldm STATE, {K, T1, T0, W}
+ add SA, SA, K
+ add SB, SB, T1
+ add SC, SC, T0
+ add SD, SD, W
stm STATE!, {SA,SB,SC,SD}
- ldm STATE, {r1,r2,r12,r14}
- add SE, SE, r1
- add SF, SF, r2
- add SG, SG, r12
- add SH, SH, r14
- stm STATE!, {SE,SF,SG,SH}
- add sp, sp, #68
- pop {r4,r5,r6,r7,r8,r10,r11,pc}
-EPILOGUE(_nettle_sha256_compress)
+ ldm STATE, {K, T1, T0, W}
+ add SE, SE, K
+ add SF, SF, T1
+ add SG, SG, T0
+ add SH, SH, W
+ stm STATE, {SE,SF,SG,SH}
+ sub STATE, STATE, #16
+
+ ldr BLOCKS, [sp, #BLOCKS_OFFSET]
+ subs BLOCKS, BLOCKS, #1
+ str BLOCKS, [sp, #BLOCKS_OFFSET]
+
+ ldr SHIFT, [sp, #SHIFT_OFFSET]
+ ldr K, [sp, #K_OFFSET]
+ ldr INPUT, [sp, #INPUT_OFFSET]
+ ldr I0, [sp, #I0_OFFSET]
+
+ bne .Loop_block
+
+ C Restore input pointer adjustment
+IF_BE(` rsbs SHIFT, SHIFT, #32')
+IF_LE(` cmp SHIFT, #0')
+ subne INPUT, INPUT, #4
+ orr r0, INPUT, SHIFT, lsr #3
+
+ C Discard saved STATE, K and BLOCKS.
+ add sp, sp, #STATE_OFFSET + 12
+ pop {r4,r5,r6,r7,r8,r10,r11,r12,pc}
+EPILOGUE(_nettle_sha256_compress_n)
diff --git a/arm64/crypto/sha256-compress.asm b/arm64/crypto/sha256-compress-n.asm
index 2bddea05..447dc590 100644
--- a/arm64/crypto/sha256-compress.asm
+++ b/arm64/crypto/sha256-compress-n.asm
@@ -1,4 +1,4 @@
-C arm64/crypto/sha256-compress.asm
+C arm64/crypto/sha256-compress-n.asm
ifelse(`
Copyright (C) 2021 Mamone Tarsha
@@ -37,7 +37,7 @@ C SHA256H2: SHA256 hash update (part 2)
C SHA256SU0: SHA256 schedule update 0
C SHA256SU1: SHA256 schedule update 1
-.file "sha256-compress.asm"
+.file "sha256-compress-n.asm"
.arch armv8-a+crypto
.text
@@ -45,8 +45,9 @@ C SHA256SU1: SHA256 schedule update 1
C Register usage:
define(`STATE', `x0')
-define(`INPUT', `x1')
-define(`K', `x2')
+define(`K', `x1')
+define(`BLOCKS', `x2')
+define(`INPUT', `x3')
define(`MSG0', `v0')
define(`MSG1', `v1')
@@ -59,19 +60,23 @@ define(`TMP', `v7')
define(`STATE0_SAVED', `v16')
define(`STATE1_SAVED', `v17')
-C void
-C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k)
+C const uint8_t *
+C _nettle_sha256_compress_n(uint32_t *state, const uint32_t *k,
+C size_t blocks, const uint8_t *input)
+
+PROLOGUE(_nettle_sha256_compress_n)
+ cbz BLOCKS, .Lend
-PROLOGUE(_nettle_sha256_compress)
C Load state
ld1 {STATE0.4s,STATE1.4s},[STATE]
+.Loop:
C Save state
mov STATE0_SAVED.16b,STATE0.16b
mov STATE1_SAVED.16b,STATE1.16b
C Load message
- ld1 {MSG0.16b,MSG1.16b,MSG2.16b,MSG3.16b},[INPUT]
+ ld1 {MSG0.16b,MSG1.16b,MSG2.16b,MSG3.16b},[INPUT],#64
C Reverse for little endian
rev32 MSG0.16b,MSG0.16b
@@ -217,9 +222,13 @@ PROLOGUE(_nettle_sha256_compress)
C Combine state
add STATE0.4s,STATE0.4s,STATE0_SAVED.4s
add STATE1.4s,STATE1.4s,STATE1_SAVED.4s
-
+ subs BLOCKS, BLOCKS, #1
+ sub K, K, #240
+ b.ne .Loop
+
C Store state
st1 {STATE0.4s,STATE1.4s},[STATE]
-
+.Lend:
+ mov x0, INPUT
ret
-EPILOGUE(_nettle_sha256_compress)
+EPILOGUE(_nettle_sha256_compress_n)
diff --git a/arm64/fat/sha256-compress-2.asm b/arm64/fat/sha256-compress-n-2.asm
index 67590794..2f70686e 100644
--- a/arm64/fat/sha256-compress-2.asm
+++ b/arm64/fat/sha256-compress-n-2.asm
@@ -1,4 +1,4 @@
-C arm64/fat/sha256-compress-2.asm
+C arm64/fat/sha256-compress-n-2.asm
ifelse(`
@@ -31,7 +31,7 @@ ifelse(`
not, see http://www.gnu.org/licenses/.
')
-dnl PROLOGUE(_nettle_sha256_compress) picked up by configure
+dnl PROLOGUE(_nettle_sha256_compress_n) picked up by configure
define(`fat_transform', `$1_arm64')
-include_src(`arm64/crypto/sha256-compress.asm')
+include_src(`arm64/crypto/sha256-compress-n.asm')
diff --git a/configure.ac b/configure.ac
index 3ebfb175..70eb0873 100644
--- a/configure.ac
+++ b/configure.ac
@@ -598,7 +598,7 @@ asm_replace_list="aes-encrypt-internal.asm aes-decrypt-internal.asm \
chacha-core-internal.asm \
salsa20-crypt.asm salsa20-core-internal.asm \
serpent-encrypt.asm serpent-decrypt.asm \
- sha1-compress.asm sha256-compress.asm sha512-compress.asm \
+ sha1-compress.asm sha256-compress-n.asm sha512-compress.asm \
sha3-permute.asm umac-nh.asm umac-nh-n.asm machine.m4"
# Assembler files which generate additional object files if they are used.
@@ -615,7 +615,7 @@ asm_nettle_optional_list="cpuid.asm cpu-facility.asm \
poly1305-internal-2.asm \
ghash-set-key-2.asm ghash-update-2.asm \
salsa20-2core.asm salsa20-core-internal-2.asm \
- sha1-compress-2.asm sha256-compress-2.asm \
+ sha1-compress-2.asm sha256-compress-n-2.asm \
sha3-permute-2.asm sha512-compress-2.asm \
umac-nh-n-2.asm umac-nh-2.asm"
@@ -768,7 +768,7 @@ AH_VERBATIM([HAVE_NATIVE],
#undef HAVE_NATIVE_salsa20_2core
#undef HAVE_NATIVE_fat_salsa20_2core
#undef HAVE_NATIVE_sha1_compress
-#undef HAVE_NATIVE_sha256_compress
+#undef HAVE_NATIVE_sha256_compress_n
#undef HAVE_NATIVE_sha512_compress
#undef HAVE_NATIVE_sha3_permute
#undef HAVE_NATIVE_umac_nh
diff --git a/fat-arm.c b/fat-arm.c
index 56647404..8133ca69 100644
--- a/fat-arm.c
+++ b/fat-arm.c
@@ -153,9 +153,9 @@ DECLARE_FAT_FUNC(nettle_sha1_compress, sha1_compress_func)
DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, c)
DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, armv6)
-DECLARE_FAT_FUNC(_nettle_sha256_compress, sha256_compress_func)
-DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, c)
-DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, armv6)
+DECLARE_FAT_FUNC(_nettle_sha256_compress_n, sha256_compress_n_func)
+DECLARE_FAT_FUNC_VAR(sha256_compress_n, sha256_compress_n_func, c)
+DECLARE_FAT_FUNC_VAR(sha256_compress_n, sha256_compress_n_func, armv6)
DECLARE_FAT_FUNC(_nettle_sha512_compress, sha512_compress_func)
DECLARE_FAT_FUNC_VAR(sha512_compress, sha512_compress_func, c)
@@ -202,7 +202,7 @@ fat_init (void)
_nettle_aes_encrypt_vec = _nettle_aes_encrypt_armv6;
_nettle_aes_decrypt_vec = _nettle_aes_decrypt_armv6;
nettle_sha1_compress_vec = _nettle_sha1_compress_armv6;
- _nettle_sha256_compress_vec = _nettle_sha256_compress_armv6;
+ _nettle_sha256_compress_n_vec = _nettle_sha256_compress_n_armv6;
}
else
{
@@ -211,7 +211,7 @@ fat_init (void)
_nettle_aes_encrypt_vec = _nettle_aes_encrypt_arm;
_nettle_aes_decrypt_vec = _nettle_aes_decrypt_arm;
nettle_sha1_compress_vec = _nettle_sha1_compress_c;
- _nettle_sha256_compress_vec = _nettle_sha256_compress_c;
+ _nettle_sha256_compress_n_vec = _nettle_sha256_compress_n_c;
}
if (features.have_neon)
{
@@ -263,9 +263,10 @@ DEFINE_FAT_FUNC(nettle_sha1_compress, void,
(uint32_t *state, const uint8_t *input),
(state, input))
-DEFINE_FAT_FUNC(_nettle_sha256_compress, void,
- (uint32_t *state, const uint8_t *input, const uint32_t *k),
- (state, input, k))
+DEFINE_FAT_FUNC(_nettle_sha256_compress_n, const uint8_t *,
+ (uint32_t *state, const uint32_t *k,
+ size_t blocks, const uint8_t *input),
+ (state, k, blocks, input))
DEFINE_FAT_FUNC(_nettle_sha512_compress, void,
(uint64_t *state, const uint8_t *input, const uint64_t *k),
diff --git a/fat-arm64.c b/fat-arm64.c
index f2b8493d..aec99f66 100644
--- a/fat-arm64.c
+++ b/fat-arm64.c
@@ -178,9 +178,9 @@ DECLARE_FAT_FUNC(nettle_sha1_compress, sha1_compress_func)
DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, c)
DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, arm64)
-DECLARE_FAT_FUNC(_nettle_sha256_compress, sha256_compress_func)
-DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, c)
-DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, arm64)
+DECLARE_FAT_FUNC(_nettle_sha256_compress_n, sha256_compress_n_func)
+DECLARE_FAT_FUNC_VAR(sha256_compress_n, sha256_compress_n_func, c)
+DECLARE_FAT_FUNC_VAR(sha256_compress_n, sha256_compress_n_func, arm64)
static void CONSTRUCTOR
fat_init (void)
@@ -250,11 +250,11 @@ fat_init (void)
{
if (verbose)
fprintf (stderr, "libnettle: enabling hardware-accelerated sha256 compress code.\n");
- _nettle_sha256_compress_vec = _nettle_sha256_compress_arm64;
+ _nettle_sha256_compress_n_vec = _nettle_sha256_compress_n_arm64;
}
else
{
- _nettle_sha256_compress_vec = _nettle_sha256_compress_c;
+ _nettle_sha256_compress_n_vec = _nettle_sha256_compress_n_c;
}
}
@@ -297,6 +297,7 @@ DEFINE_FAT_FUNC(nettle_sha1_compress, void,
(uint32_t *state, const uint8_t *input),
(state, input))
-DEFINE_FAT_FUNC(_nettle_sha256_compress, void,
- (uint32_t *state, const uint8_t *input, const uint32_t *k),
- (state, input, k))
+DEFINE_FAT_FUNC(_nettle_sha256_compress_n, const uint8_t *,
+ (uint32_t *state, const uint32_t *k,
+ size_t blocks, const uint8_t *input),
+ (state, k, blocks, input))
diff --git a/fat-s390x.c b/fat-s390x.c
index fa026018..1bbd8e16 100644
--- a/fat-s390x.c
+++ b/fat-s390x.c
@@ -254,9 +254,9 @@ DECLARE_FAT_FUNC(nettle_sha1_compress, sha1_compress_func)
DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, c)
DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, s390x)
-DECLARE_FAT_FUNC(_nettle_sha256_compress, sha256_compress_func)
-DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, c)
-DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, s390x)
+DECLARE_FAT_FUNC(_nettle_sha256_compress_n, sha256_compress_n_func)
+DECLARE_FAT_FUNC_VAR(sha256_compress_n, sha256_compress_n_func, c)
+DECLARE_FAT_FUNC_VAR(sha256_compress_n, sha256_compress_n_func, s390x)
DECLARE_FAT_FUNC(_nettle_sha512_compress, sha512_compress_func)
DECLARE_FAT_FUNC_VAR(sha512_compress, sha512_compress_func, c)
@@ -398,11 +398,11 @@ fat_init (void)
{
if (verbose)
fprintf (stderr, "libnettle: enabling hardware accelerated SHA256 compress code.\n");
- _nettle_sha256_compress_vec = _nettle_sha256_compress_s390x;
+ _nettle_sha256_compress_n_vec = _nettle_sha256_compress_n_s390x;
}
else
{
- _nettle_sha256_compress_vec = _nettle_sha256_compress_c;
+ _nettle_sha256_compress_n_vec = _nettle_sha256_compress_n_c;
}
/* SHA512 */
@@ -495,9 +495,10 @@ DEFINE_FAT_FUNC(nettle_sha1_compress, void,
(state, input))
/* SHA256 */
-DEFINE_FAT_FUNC(_nettle_sha256_compress, void,
- (uint32_t *state, const uint8_t *input, const uint32_t *k),
- (state, input, k))
+DEFINE_FAT_FUNC(_nettle_sha256_compress_n, const uint8_t *,
+ (uint32_t *state, const uint32_t *k,
+ size_t blocks, const uint8_t *input),
+ (state, k, blocks, input))
/* SHA512 */
DEFINE_FAT_FUNC(_nettle_sha512_compress, void,
diff --git a/fat-setup.h b/fat-setup.h
index a35b8b8c..eeec629e 100644
--- a/fat-setup.h
+++ b/fat-setup.h
@@ -179,7 +179,9 @@ typedef void salsa20_crypt_func (struct salsa20_ctx *ctx, unsigned rounds,
const uint8_t *src);
typedef void sha1_compress_func(uint32_t *state, const uint8_t *input);
-typedef void sha256_compress_func(uint32_t *state, const uint8_t *input, const uint32_t *k);
+typedef const uint8_t *
+sha256_compress_n_func(uint32_t *state, const uint32_t *k,
+ size_t blocks, const uint8_t *input);
struct sha3_state;
typedef void sha3_permute_func (struct sha3_state *state);
diff --git a/fat-x86_64.c b/fat-x86_64.c
index 47cf78ae..0a2fedf4 100644
--- a/fat-x86_64.c
+++ b/fat-x86_64.c
@@ -155,9 +155,9 @@ DECLARE_FAT_FUNC(nettle_sha1_compress, sha1_compress_func)
DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, x86_64)
DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, sha_ni)
-DECLARE_FAT_FUNC(_nettle_sha256_compress, sha256_compress_func)
-DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, x86_64)
-DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, sha_ni)
+DECLARE_FAT_FUNC(_nettle_sha256_compress_n, sha256_compress_n_func)
+DECLARE_FAT_FUNC_VAR(sha256_compress_n, sha256_compress_n_func, x86_64)
+DECLARE_FAT_FUNC_VAR(sha256_compress_n, sha256_compress_n_func, sha_ni)
DECLARE_FAT_FUNC(_nettle_ghash_set_key, ghash_set_key_func)
DECLARE_FAT_FUNC_VAR(ghash_set_key, ghash_set_key_func, c)
@@ -228,14 +228,14 @@ fat_init (void)
if (verbose)
fprintf (stderr, "libnettle: using sha_ni instructions.\n");
nettle_sha1_compress_vec = _nettle_sha1_compress_sha_ni;
- _nettle_sha256_compress_vec = _nettle_sha256_compress_sha_ni;
+ _nettle_sha256_compress_n_vec = _nettle_sha256_compress_n_sha_ni;
}
else
{
if (verbose)
fprintf (stderr, "libnettle: not using sha_ni instructions.\n");
nettle_sha1_compress_vec = _nettle_sha1_compress_x86_64;
- _nettle_sha256_compress_vec = _nettle_sha256_compress_x86_64;
+ _nettle_sha256_compress_n_vec = _nettle_sha256_compress_n_x86_64;
}
if (features.have_pclmul)
@@ -315,9 +315,10 @@ DEFINE_FAT_FUNC(nettle_sha1_compress, void,
(uint32_t *state, const uint8_t *input),
(state, input))
-DEFINE_FAT_FUNC(_nettle_sha256_compress, void,
- (uint32_t *state, const uint8_t *input, const uint32_t *k),
- (state, input, k))
+DEFINE_FAT_FUNC(_nettle_sha256_compress_n, const uint8_t *,
+ (uint32_t *state, const uint32_t *k,
+ size_t blocks, const uint8_t *input),
+ (state, k, blocks, input))
DEFINE_FAT_FUNC(_nettle_ghash_set_key, void,
(struct gcm_key *ctx, const union nettle_block16 *key),
diff --git a/md-internal.h b/md-internal.h
new file mode 100644
index 00000000..fe520c63
--- /dev/null
+++ b/md-internal.h
@@ -0,0 +1,57 @@
+/* md-internal.h
+
+ Copyright (C) 2001, 2010, 2022 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+*/
+
+#ifndef NETTLE_MD_INTERNAL_H_INCLUDED
+#define NETTLE_MD_INTERNAL_H_INCLUDED
+
+/* Internal helper macros for Merkle-Damgård hash functions. Assumes the context
+ structs includes the following fields:
+
+ uint8_t block[...]; // Buffer holding one block
+ unsigned int index; // Index into block
+*/
+
+#define MD_FILL_OR_RETURN(ctx, length, data) \
+ do { \
+ unsigned __md_left = sizeof((ctx)->block) - (ctx)->index; \
+ if ((length) < __md_left) \
+ { \
+ memcpy((ctx)->block + (ctx)->index, (data), (length)); \
+ (ctx)->index += (length); \
+ return; \
+ } \
+ memcpy((ctx)->block + (ctx)->index, (data), __md_left); \
+ (data) += __md_left; \
+ (length) -= __md_left; \
+ (ctx)->index = 0; \
+ } while(0)
+
+#endif /* NETTLE_MD_INTERNAL_H_INCLUDED */
diff --git a/s390x/fat/sha256-compress-2.asm b/s390x/fat/sha256-compress-n-2.asm
index f4b16181..06fb1014 100644
--- a/s390x/fat/sha256-compress-2.asm
+++ b/s390x/fat/sha256-compress-n-2.asm
@@ -1,4 +1,4 @@
-C s390x/fat/sha256-compress-2.asm
+C s390x/fat/sha256-compress-n-2.asm
ifelse(`
Copyright (C) 2021 Mamone Tarsha
@@ -30,7 +30,7 @@ ifelse(`
not, see http://www.gnu.org/licenses/.
')
-dnl PROLOGUE(_nettle_sha256_compress) picked up by configure
+dnl PROLOGUE(_nettle_sha256_compress_n) picked up by configure
define(`fat_transform', `$1_s390x')
-include_src(`s390x/msa_x1/sha256-compress.asm')
+include_src(`s390x/msa_x1/sha256-compress-n.asm')
diff --git a/s390x/msa_x1/sha256-compress.asm b/s390x/msa_x1/sha256-compress-n.asm
index 9a9511fb..51539927 100644
--- a/s390x/msa_x1/sha256-compress.asm
+++ b/s390x/msa_x1/sha256-compress-n.asm
@@ -1,7 +1,7 @@
-C s390x/msa_x1/sha256-compress.asm
+C s390x/msa_x1/sha256-compress-n.asm
ifelse(`
- Copyright (C) 2021 Mamone Tarsha
+ Copyright (C) 2021, 2022 Mamone Tarsha, Niels Möller
This file is part of GNU Nettle.
GNU Nettle is free software: you can redistribute it and/or
@@ -56,25 +56,23 @@ C |----------------------------------------------|
C | H7 (4 bytes) |
C *----------------------------------------------*
-.file "sha256-compress.asm"
+.file "sha256-compress-n.asm"
.text
C SHA function code
define(`SHA256_FUNCTION_CODE', `2')
-C Size of block
-define(`SHA256_BLOCK_SIZE', `64')
-C void
-C _nettle_sha256_compress(uint32_t *state, const uint8_t *input,
-C const uint32_t *k)
+C const uint8_t *
+C _nettle_sha256_compress_n(uint32_t *state, const uint32_t *k,
+C size_t blocks, const uint8_t *input)
-PROLOGUE(_nettle_sha256_compress)
+PROLOGUE(_nettle_sha256_compress_n)
lghi %r0,SHA256_FUNCTION_CODE C SHA-256 Function Code
lgr %r1,%r2
- lgr %r4,%r3
- lghi %r5,SHA256_BLOCK_SIZE
-1: .long 0xb93e0004 C kimd %r0,%r4. perform KIMD-SHA operation on data
+ lgr %r2, %r5
+ sllg %r3, %r4, 6 C 64 * block size
+1: .long 0xb93e0002 C kimd %r0,%r2. perform KIMD-SHA operation on data
brc 1,1b
br RA
-EPILOGUE(_nettle_sha256_compress)
+EPILOGUE(_nettle_sha256_compress_n)
diff --git a/sha2-internal.h b/sha2-internal.h
index 40f25a5f..93080bee 100644
--- a/sha2-internal.h
+++ b/sha2-internal.h
@@ -39,8 +39,9 @@
/* Internal compression function. STATE points to 8 uint32_t words,
DATA points to 64 bytes of input data, possibly unaligned, and K
points to the table of constants. */
-void
-_nettle_sha256_compress(uint32_t *state, const uint8_t *data, const uint32_t *k);
+const uint8_t *
+_nettle_sha256_compress_n(uint32_t *state, const uint32_t *k,
+ size_t blocks, const uint8_t *data);
/* Internal compression function. STATE points to 8 uint64_t words,
DATA points to 128 bytes of input data, possibly unaligned, and K
diff --git a/sha256-compress.c b/sha256-compress-n.c
index cf17e3e1..d135d14f 100644
--- a/sha256-compress.c
+++ b/sha256-compress-n.c
@@ -1,8 +1,8 @@
-/* sha256-compress.c
+/* sha256-compress-n.c
The compression function of the sha256 hash function.
- Copyright (C) 2001, 2010 Niels Möller
+ Copyright (C) 2001, 2010, 2022 Niels Möller
This file is part of GNU Nettle.
@@ -118,26 +118,19 @@
} while (0)
/* For fat builds */
-#if HAVE_NATIVE_sha256_compress
-void
-_nettle_sha256_compress_c(uint32_t *state, const uint8_t *input, const uint32_t *k);
-#define _nettle_sha256_compress _nettle_sha256_compress_c
+#if HAVE_NATIVE_sha256_compress_n
+const uint8_t *
+_nettle_sha256_compress_n_c(uint32_t *state, const uint32_t *table,
+ size_t blocks, const uint8_t *input);
+#define _nettle_sha256_compress_n _nettle_sha256_compress_n_c
#endif
-void
-_nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k)
+const uint8_t *
+_nettle_sha256_compress_n(uint32_t *state, const uint32_t *table,
+ size_t blocks, const uint8_t *input)
{
- uint32_t data[SHA256_DATA_LENGTH];
uint32_t A, B, C, D, E, F, G, H; /* Local vars */
- unsigned i;
- uint32_t *d;
- for (i = 0; i < SHA256_DATA_LENGTH; i++, input+= 4)
- {
- data[i] = READ_UINT32(input);
- }
-
- /* Set up first buffer and local data buffer */
A = state[0];
B = state[1];
C = state[2];
@@ -146,55 +139,68 @@ _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k
F = state[5];
G = state[6];
H = state[7];
-
- /* Heavy mangling */
- /* First 16 subrounds that act on the original data */
- DEBUG(-1);
- for (i = 0, d = data; i<16; i+=8, k += 8, d+= 8)
+ for (; blocks > 0; blocks--)
{
- ROUND(A, B, C, D, E, F, G, H, k[0], d[0]); DEBUG(i);
- ROUND(H, A, B, C, D, E, F, G, k[1], d[1]); DEBUG(i+1);
- ROUND(G, H, A, B, C, D, E, F, k[2], d[2]);
- ROUND(F, G, H, A, B, C, D, E, k[3], d[3]);
- ROUND(E, F, G, H, A, B, C, D, k[4], d[4]);
- ROUND(D, E, F, G, H, A, B, C, k[5], d[5]);
- ROUND(C, D, E, F, G, H, A, B, k[6], d[6]); DEBUG(i+6);
- ROUND(B, C, D, E, F, G, H, A, k[7], d[7]); DEBUG(i+7);
- }
+ uint32_t data[SHA256_DATA_LENGTH];
+ unsigned i;
+ const uint32_t *k;
+ uint32_t *d;
+ for (i = 0; i < SHA256_DATA_LENGTH; i++, input+= 4)
+ {
+ data[i] = READ_UINT32(input);
+ }
+
+ /* Heavy mangling */
+ /* First 16 subrounds that act on the original data */
+
+ DEBUG(-1);
+ for (i = 0, d = data, k = table; i<16; i+=8, k += 8, d+= 8)
+ {
+ ROUND(A, B, C, D, E, F, G, H, k[0], d[0]); DEBUG(i);
+ ROUND(H, A, B, C, D, E, F, G, k[1], d[1]); DEBUG(i+1);
+ ROUND(G, H, A, B, C, D, E, F, k[2], d[2]);
+ ROUND(F, G, H, A, B, C, D, E, k[3], d[3]);
+ ROUND(E, F, G, H, A, B, C, D, k[4], d[4]);
+ ROUND(D, E, F, G, H, A, B, C, k[5], d[5]);
+ ROUND(C, D, E, F, G, H, A, B, k[6], d[6]); DEBUG(i+6);
+ ROUND(B, C, D, E, F, G, H, A, k[7], d[7]); DEBUG(i+7);
+ }
- for (; i<64; i += 16, k+= 16)
- {
- ROUND(A, B, C, D, E, F, G, H, k[ 0], EXPAND(data, 0)); DEBUG(i);
- ROUND(H, A, B, C, D, E, F, G, k[ 1], EXPAND(data, 1)); DEBUG(i+1);
- ROUND(G, H, A, B, C, D, E, F, k[ 2], EXPAND(data, 2)); DEBUG(i+2);
- ROUND(F, G, H, A, B, C, D, E, k[ 3], EXPAND(data, 3)); DEBUG(i+3);
- ROUND(E, F, G, H, A, B, C, D, k[ 4], EXPAND(data, 4)); DEBUG(i+4);
- ROUND(D, E, F, G, H, A, B, C, k[ 5], EXPAND(data, 5)); DEBUG(i+5);
- ROUND(C, D, E, F, G, H, A, B, k[ 6], EXPAND(data, 6)); DEBUG(i+6);
- ROUND(B, C, D, E, F, G, H, A, k[ 7], EXPAND(data, 7)); DEBUG(i+7);
- ROUND(A, B, C, D, E, F, G, H, k[ 8], EXPAND(data, 8)); DEBUG(i+8);
- ROUND(H, A, B, C, D, E, F, G, k[ 9], EXPAND(data, 9)); DEBUG(i+9);
- ROUND(G, H, A, B, C, D, E, F, k[10], EXPAND(data, 10)); DEBUG(i+10);
- ROUND(F, G, H, A, B, C, D, E, k[11], EXPAND(data, 11)); DEBUG(i+11);
- ROUND(E, F, G, H, A, B, C, D, k[12], EXPAND(data, 12)); DEBUG(i+12);
- ROUND(D, E, F, G, H, A, B, C, k[13], EXPAND(data, 13)); DEBUG(i+13);
- ROUND(C, D, E, F, G, H, A, B, k[14], EXPAND(data, 14)); DEBUG(i+14);
- ROUND(B, C, D, E, F, G, H, A, k[15], EXPAND(data, 15)); DEBUG(i+15);
- }
-
- /* Update state */
- state[0] += A;
- state[1] += B;
- state[2] += C;
- state[3] += D;
- state[4] += E;
- state[5] += F;
- state[6] += G;
- state[7] += H;
+ for (; i<64; i += 16, k+= 16)
+ {
+ ROUND(A, B, C, D, E, F, G, H, k[ 0], EXPAND(data, 0)); DEBUG(i);
+ ROUND(H, A, B, C, D, E, F, G, k[ 1], EXPAND(data, 1)); DEBUG(i+1);
+ ROUND(G, H, A, B, C, D, E, F, k[ 2], EXPAND(data, 2)); DEBUG(i+2);
+ ROUND(F, G, H, A, B, C, D, E, k[ 3], EXPAND(data, 3)); DEBUG(i+3);
+ ROUND(E, F, G, H, A, B, C, D, k[ 4], EXPAND(data, 4)); DEBUG(i+4);
+ ROUND(D, E, F, G, H, A, B, C, k[ 5], EXPAND(data, 5)); DEBUG(i+5);
+ ROUND(C, D, E, F, G, H, A, B, k[ 6], EXPAND(data, 6)); DEBUG(i+6);
+ ROUND(B, C, D, E, F, G, H, A, k[ 7], EXPAND(data, 7)); DEBUG(i+7);
+ ROUND(A, B, C, D, E, F, G, H, k[ 8], EXPAND(data, 8)); DEBUG(i+8);
+ ROUND(H, A, B, C, D, E, F, G, k[ 9], EXPAND(data, 9)); DEBUG(i+9);
+ ROUND(G, H, A, B, C, D, E, F, k[10], EXPAND(data, 10)); DEBUG(i+10);
+ ROUND(F, G, H, A, B, C, D, E, k[11], EXPAND(data, 11)); DEBUG(i+11);
+ ROUND(E, F, G, H, A, B, C, D, k[12], EXPAND(data, 12)); DEBUG(i+12);
+ ROUND(D, E, F, G, H, A, B, C, k[13], EXPAND(data, 13)); DEBUG(i+13);
+ ROUND(C, D, E, F, G, H, A, B, k[14], EXPAND(data, 14)); DEBUG(i+14);
+ ROUND(B, C, D, E, F, G, H, A, k[15], EXPAND(data, 15)); DEBUG(i+15);
+ }
+
+ /* Update state */
+ state[0] = A = state[0] + A;
+ state[1] = B = state[1] + B;
+ state[2] = C = state[2] + C;
+ state[3] = D = state[3] + D;
+ state[4] = E = state[4] + E;
+ state[5] = F = state[5] + F;
+ state[6] = G = state[6] + G;
+ state[7] = H = state[7] + H;
#if SHA256_DEBUG
- fprintf(stderr, "99: %8x %8x %8x %8x %8x %8x %8x %8x\n",
- state[0], state[1], state[2], state[3],
- state[4], state[5], state[6], state[7]);
+ fprintf(stderr, "99: %8x %8x %8x %8x %8x %8x %8x %8x\n",
+ state[0], state[1], state[2], state[3],
+ state[4], state[5], state[6], state[7]);
#endif
+ }
+ return input;
}
diff --git a/sha256.c b/sha256.c
index 3872ca6f..0c9c21a0 100644
--- a/sha256.c
+++ b/sha256.c
@@ -46,6 +46,7 @@
#include "sha2-internal.h"
#include "macros.h"
+#include "md-internal.h"
#include "nettle-write.h"
/* Generated by the shadata program. */
@@ -70,6 +71,12 @@ K[64] =
0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL,
};
+void
+sha256_compress(uint32_t *state, const uint8_t *input)
+{
+ _nettle_sha256_compress_n(state, K, 1, input);
+}
+
#define COMPRESS(ctx, data) (sha256_compress((ctx)->state, (data)))
/* Initialize the SHA values */
@@ -97,7 +104,22 @@ void
sha256_update(struct sha256_ctx *ctx,
size_t length, const uint8_t *data)
{
- MD_UPDATE (ctx, length, data, COMPRESS, ctx->count++);
+ size_t blocks;
+ if (ctx->index > 0)
+ {
+ /* Try to fill partial block */
+ MD_FILL_OR_RETURN (ctx, length, data);
+ sha256_compress (ctx->state, ctx->block);
+ ctx->count++;
+ }
+
+ blocks = length >> 6;
+ data = _nettle_sha256_compress_n (ctx->state, K, blocks, data);
+ ctx->count += blocks;
+ length &= 63;
+
+ memcpy (ctx->block, data, length);
+ ctx->index = length;
}
static void
@@ -161,9 +183,3 @@ sha224_digest(struct sha256_ctx *ctx,
sha256_write_digest(ctx, length, digest);
sha224_init(ctx);
}
-
-void
-sha256_compress(uint32_t *state, const uint8_t *input)
-{
- _nettle_sha256_compress(state, input, K);
-}
diff --git a/x86_64/fat/sha256-compress-2.asm b/x86_64/fat/sha256-compress-n-2.asm
index 996cf8c5..60f7c8f6 100644
--- a/x86_64/fat/sha256-compress-2.asm
+++ b/x86_64/fat/sha256-compress-n-2.asm
@@ -1,4 +1,4 @@
-C x86_64/fat/sha256-compress-2.asm
+C x86_64/fat/sha256-compress-n-2.asm
ifelse(`
Copyright (C) 2018 Niels Möller
@@ -31,4 +31,4 @@ ifelse(`
')
define(`fat_transform', `$1_sha_ni')
-include_src(`x86_64/sha_ni/sha256-compress.asm')
+include_src(`x86_64/sha_ni/sha256-compress-n.asm')
diff --git a/x86_64/fat/sha256-compress.asm b/x86_64/fat/sha256-compress-n.asm
index 2aaeb5e8..fc358858 100644
--- a/x86_64/fat/sha256-compress.asm
+++ b/x86_64/fat/sha256-compress-n.asm
@@ -1,4 +1,4 @@
-C x86_64/fat/sha256-compress.asm
+C x86_64/fat/sha256-compress-n.asm
ifelse(`
Copyright (C) 2018 Niels Möller
@@ -31,4 +31,4 @@ ifelse(`
')
define(`fat_transform', `$1_x86_64')
-include_src(`x86_64/sha256-compress.asm')
+include_src(`x86_64/sha256-compress-n.asm')
diff --git a/x86_64/sha256-compress.asm b/x86_64/sha256-compress-n.asm
index 5ed669b1..e10d260c 100644
--- a/x86_64/sha256-compress.asm
+++ b/x86_64/sha256-compress-n.asm
@@ -1,7 +1,7 @@
-C x86_64/sha256-compress.asm
+C x86_64/sha256-compress-n.asm
ifelse(`
- Copyright (C) 2013 Niels Möller
+ Copyright (C) 2013, 2022 Niels Möller
This file is part of GNU Nettle.
@@ -30,21 +30,24 @@ ifelse(`
not, see http://www.gnu.org/licenses/.
')
- .file "sha256-compress.asm"
+ .file "sha256-compress-n.asm"
define(`STATE', `%rdi')
-define(`INPUT', `%rsi')
-define(`K', `%rdx')
+define(`K', `%rsi')
+define(`BLOCKS', `%rdx')
+define(`INPUT', `%rcx')
+define(`STATE_SAVED', `64(%rsp)')
+
define(`SA', `%eax')
define(`SB', `%ebx')
-define(`SC', `%ecx')
+define(`SC', `%ebp')
define(`SD', `%r8d')
define(`SE', `%r9d')
define(`SF', `%r10d')
define(`SG', `%r11d')
define(`SH', `%r12d')
define(`T0', `%r13d')
-define(`T1', `%edi') C Overlap STATE
-define(`COUNT', `%r14')
+define(`T1', `%r14d')
+define(`COUNT', `%rdi') C Overlap STATE
define(`W', `%r15d')
define(`EXPN', `
@@ -123,18 +126,21 @@ define(`NOEXPN', `
movl W, OFFSET($1)(%rsp, COUNT, 4)
')
- C void
- C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k)
+ C const uint8_t *
+ C _nettle_sha256_compress_n(uint32_t *state, const uint32_t *k,
+ C size_t blocks, const uint8_t *input)
.text
ALIGN(16)
-PROLOGUE(_nettle_sha256_compress)
+PROLOGUE(_nettle_sha256_compress_n)
W64_ENTRY(3, 0)
+ test BLOCKS, BLOCKS
+ jz .Lend
sub $120, %rsp
- mov %rbx, 64(%rsp)
- mov STATE, 72(%rsp) C Save state, to free a register
+ mov STATE, STATE_SAVED C Save state, to free a register
+ mov %rbx, 72(%rsp)
mov %rbp, 80(%rsp)
mov %r12, 88(%rsp)
mov %r13, 96(%rsp)
@@ -149,7 +155,9 @@ PROLOGUE(_nettle_sha256_compress)
movl 20(STATE), SF
movl 24(STATE), SG
movl 28(STATE), SH
- xor COUNT, COUNT
+
+.Loop_block:
+ xorl XREG(COUNT), XREG(COUNT)
ALIGN(16)
.Loop1:
@@ -161,8 +169,8 @@ PROLOGUE(_nettle_sha256_compress)
NOEXPN(5) ROUND(SD,SE,SF,SG,SH,SA,SB,SC,5)
NOEXPN(6) ROUND(SC,SD,SE,SF,SG,SH,SA,SB,6)
NOEXPN(7) ROUND(SB,SC,SD,SE,SF,SG,SH,SA,7)
- add $8, COUNT
- cmp $16, COUNT
+ addl $8, XREG(COUNT)
+ cmpl $16, XREG(COUNT)
jne .Loop1
.Loop2:
@@ -182,22 +190,35 @@ PROLOGUE(_nettle_sha256_compress)
EXPN(13) ROUND(SD,SE,SF,SG,SH,SA,SB,SC,13)
EXPN(14) ROUND(SC,SD,SE,SF,SG,SH,SA,SB,14)
EXPN(15) ROUND(SB,SC,SD,SE,SF,SG,SH,SA,15)
- add $16, COUNT
- cmp $64, COUNT
+ addl $16, XREG(COUNT)
+ cmpl $64, XREG(COUNT)
jne .Loop2
- mov 72(%rsp), STATE
-
- addl SA, (STATE)
- addl SB, 4(STATE)
- addl SC, 8(STATE)
- addl SD, 12(STATE)
- addl SE, 16(STATE)
- addl SF, 20(STATE)
- addl SG, 24(STATE)
- addl SH, 28(STATE)
-
- mov 64(%rsp), %rbx
+ mov STATE_SAVED, STATE
+
+ addl (STATE), SA
+ addl 4(STATE), SB
+ addl 8(STATE), SC
+ addl 12(STATE), SD
+ addl 16(STATE), SE
+ addl 20(STATE), SF
+ addl 24(STATE), SG
+ addl 28(STATE), SH
+
+ movl SA, (STATE)
+ movl SB, 4(STATE)
+ movl SC, 8(STATE)
+ movl SD, 12(STATE)
+ movl SE, 16(STATE)
+ movl SF, 20(STATE)
+ movl SG, 24(STATE)
+ movl SH, 28(STATE)
+
+ add $64, INPUT
+ dec BLOCKS
+ jnz .Loop_block
+
+ mov 72(%rsp), %rbx
mov 80(%rsp), %rbp
mov 88(%rsp), %r12
mov 96(%rsp), %r13
@@ -205,6 +226,8 @@ PROLOGUE(_nettle_sha256_compress)
mov 112(%rsp),%r15
add $120, %rsp
+.Lend:
+ mov INPUT, %rax
W64_EXIT(3, 0)
ret
-EPILOGUE(_nettle_sha256_compress)
+EPILOGUE(_nettle_sha256_compress_n)
diff --git a/x86_64/sha_ni/sha256-compress.asm b/x86_64/sha_ni/sha256-compress-n.asm
index 00bd3cd3..005909df 100644
--- a/x86_64/sha_ni/sha256-compress.asm
+++ b/x86_64/sha_ni/sha256-compress-n.asm
@@ -1,7 +1,7 @@
-C x86_64/sha_ni/sha256-compress.asm
+C x86_64/sha_ni/sha256-compress-n.asm
ifelse(`
- Copyright (C) 2018 Niels Möller
+ Copyright (C) 2018, 2022 Niels Möller
This file is part of GNU Nettle.
@@ -30,10 +30,11 @@ ifelse(`
not, see http://www.gnu.org/licenses/.
')
- .file "sha256-compress.asm"
+ .file "sha256-compress-n.asm"
define(`STATE', `%rdi')
-define(`INPUT', `%rsi')
-define(`K', `%rdx')
+define(`K', `%rsi')
+define(`BLOCKS', `%rdx')
+define(`INPUT', `%rcx')
define(`MSGK',`%xmm0') C Implicit operand of sha256rnds2
define(`MSG0',`%xmm1')
@@ -45,7 +46,7 @@ define(`CDGH',`%xmm6')
define(`ABEF_ORIG',`%xmm7')
define(`CDGH_ORIG', `%xmm8')
define(`SWAP_MASK',`%xmm9')
-define(`TMP', `%xmm9') C Overlaps SWAP_MASK
+define(`TMP', `%xmm10')
C QROUND(M0, M1, M2, M3, R)
define(`QROUND', `
@@ -69,15 +70,19 @@ define(`TRANSPOSE', `
punpcklqdq $1, $3
')
- C void
- C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k)
+ C const uint8_t *
+ C _nettle_sha256_compress_n(uint32_t *state, const uint32_t *k,
+ C size_t blocks, const uint8_t *input)
.text
ALIGN(16)
.Lswap_mask:
.byte 3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12
-PROLOGUE(_nettle_sha256_compress)
- W64_ENTRY(3, 10)
+PROLOGUE(_nettle_sha256_compress_n)
+ W64_ENTRY(4, 11)
+ test BLOCKS, BLOCKS
+ jz .Lend
+
movups (STATE), TMP
movups 16(STATE), ABEF
@@ -88,12 +93,13 @@ PROLOGUE(_nettle_sha256_compress)
movdqa .Lswap_mask(%rip), SWAP_MASK
- movdqa ABEF, ABEF_ORIG
- movdqa CDGH, CDGH_ORIG
-
+.Loop:
movups (INPUT), MSG0
pshufb SWAP_MASK, MSG0
+ movdqa ABEF, ABEF_ORIG
+ movdqa CDGH, CDGH_ORIG
+
movdqa (K), MSGK
paddd MSG0, MSGK
sha256rnds2 ABEF, CDGH C Round 0-1
@@ -163,6 +169,10 @@ PROLOGUE(_nettle_sha256_compress)
paddd ABEF_ORIG, ABEF
paddd CDGH_ORIG, CDGH
+ add $64, INPUT
+ dec BLOCKS
+ jnz .Loop
+
TRANSPOSE(ABEF, CDGH, TMP)
pshufd $0x1b, CDGH, CDGH
@@ -170,6 +180,8 @@ PROLOGUE(_nettle_sha256_compress)
movups CDGH, 0(STATE)
movups TMP, 16(STATE)
- W64_EXIT(3, 10)
+.Lend:
+ mov INPUT, %rax
+ W64_EXIT(4, 11)
ret
-EPILOGUE(_nettle_sha256_compress)
+EPILOGUE(_nettle_sha256_compress_n)