diff options
author | Niels Möller <nisse@lysator.liu.se> | 2022-08-15 09:27:36 +0200 |
---|---|---|
committer | Niels Möller <nisse@lysator.liu.se> | 2022-08-15 09:27:36 +0200 |
commit | 7328fb0df069c9ce42b1a58f6788be6ea3fc2419 (patch) | |
tree | c00c59d414c9161e49faea43ec4a0b21132b718b | |
parent | d618864183ccfdcd0d1b5443111fbaf9a5934517 (diff) | |
parent | 6a384afa5f5b498384d629c3257b6c7f62f459eb (diff) | |
download | nettle-7328fb0df069c9ce42b1a58f6788be6ea3fc2419.tar.gz |
Merge branch 'sha256-compress-n' into master-updates
-rw-r--r-- | ChangeLog | 42 | ||||
-rw-r--r-- | Makefile.in | 4 | ||||
-rw-r--r-- | arm/fat/sha256-compress-n-2.asm (renamed from arm/fat/sha256-compress-2.asm) | 6 | ||||
-rw-r--r-- | arm/v6/sha256-compress-n.asm (renamed from arm/v6/sha256-compress.asm) | 110 | ||||
-rw-r--r-- | arm64/crypto/sha256-compress-n.asm (renamed from arm64/crypto/sha256-compress.asm) | 31 | ||||
-rw-r--r-- | arm64/fat/sha256-compress-n-2.asm (renamed from arm64/fat/sha256-compress-2.asm) | 6 | ||||
-rw-r--r-- | configure.ac | 6 | ||||
-rw-r--r-- | fat-arm.c | 17 | ||||
-rw-r--r-- | fat-arm64.c | 17 | ||||
-rw-r--r-- | fat-s390x.c | 17 | ||||
-rw-r--r-- | fat-setup.h | 4 | ||||
-rw-r--r-- | fat-x86_64.c | 17 | ||||
-rw-r--r-- | md-internal.h | 57 | ||||
-rw-r--r-- | s390x/fat/sha256-compress-n-2.asm (renamed from s390x/fat/sha256-compress-2.asm) | 6 | ||||
-rw-r--r-- | s390x/msa_x1/sha256-compress-n.asm (renamed from s390x/msa_x1/sha256-compress.asm) | 24 | ||||
-rw-r--r-- | sha2-internal.h | 5 | ||||
-rw-r--r-- | sha256-compress-n.c (renamed from sha256-compress.c) | 132 | ||||
-rw-r--r-- | sha256.c | 30 | ||||
-rw-r--r-- | x86_64/fat/sha256-compress-n-2.asm (renamed from x86_64/fat/sha256-compress-2.asm) | 4 | ||||
-rw-r--r-- | x86_64/fat/sha256-compress-n.asm (renamed from x86_64/fat/sha256-compress.asm) | 4 | ||||
-rw-r--r-- | x86_64/sha256-compress-n.asm (renamed from x86_64/sha256-compress.asm) | 85 | ||||
-rw-r--r-- | x86_64/sha_ni/sha256-compress-n.asm (renamed from x86_64/sha_ni/sha256-compress.asm) | 42 |
22 files changed, 440 insertions, 226 deletions
@@ -22,6 +22,48 @@ * aclocal.m4 (LSH_CCPIC): Use proper PIC flag for *BSD OS's. * blowfish-bcrypt.c (swap32): Eliminate conflict with OpenBSD's swap32 macro. +2022-07-29 Niels Möller <nisse@lysator.liu.se> + + * s390x/msa_x1/sha256-compress-n.asm: New file. replacing... + * s390x/msa_x1/sha256-compress.asm: ...deleted file. + * s390x/fat/sha256-compress-n-2.asm: New file. replacing... + * s390x/fat/sha256-compress-2.asm: ...deleted file. + * fat-s390x.c: Update fat setup. + +2022-07-26 Niels Möller <nisse@lysator.liu.se> + + * arm/v6/sha256-compress-n.asm: New file. replacing... + * arm/v6/sha256-compress.asm: ...deleted file. + * arm/fat/sha256-compress-n-2.asm: New file. replacing... + * arm/fat/sha256-compress-2.asm: ...deleted file. + * fat-arm.c: Update fat setup. + +2022-07-11 Niels Möller <nisse@lysator.liu.se> + + * arm64/crypto/sha256-compress-n.asm: New file. replacing... + * arm64/crypto/sha256-compress.asm: ...deleted file. + * arm64/fat/sha256-compress-n-2.asm: New file. replacing... + * arm64/fat/sha256-compress-2.asm: ...deleted file. + * fat-arm64.c: Update fat setup. + +2022-07-05 Niels Möller <nisse@lysator.liu.se> + + * md-internal.h (MD_FILL_OR_RETURN): New file, new macro. + * sha256-compress-n.c (_nettle_sha256_compress_n): New file and + function, replacing... + * sha256-compress.c (_nettle_sha256_compress): ...deleted file and + function. + * sha2-internal.h (_nettle_sha256_compress_n): Declare new function.. + * sha256.c (sha256_compress): Update to use + _nettle_sha256_compress_n and MD_FILL_OR_RETURN. + * x86_64/sha256-compress-n.asm: New file. replacing... + * x86_64/sha256-compress.asm: ...deleted file. + * x86_64/sha_ni/sha256-compress-n.asm: New file. replacing... + * x86_64/sha_ni/sha256-compress.asm: ...deleted file. + * fat-setup.h (sha256_compress_n_func): New typedef, replacing... + (sha256_compress_func): ... deleted typedef. + * fat-x86_64.c: Update fat setup. + 2022-06-20 Niels Möller <nisse@lysator.liu.se> * testsuite/sha1-test.c (test_sha1_compress): New function. diff --git a/Makefile.in b/Makefile.in index 11c88114..8a20ed6d 100644 --- a/Makefile.in +++ b/Makefile.in @@ -138,7 +138,7 @@ nettle_SOURCES = aes-decrypt-internal.c aes-decrypt.c aes-decrypt-table.c \ salsa20-set-nonce.c \ salsa20-128-set-key.c salsa20-256-set-key.c \ sha1.c sha1-compress.c sha1-meta.c \ - sha256.c sha256-compress.c sha224-meta.c sha256-meta.c \ + sha256.c sha256-compress-n.c sha224-meta.c sha256-meta.c \ sha512.c sha512-compress.c sha384-meta.c sha512-meta.c \ sha512-224-meta.c sha512-256-meta.c \ sha3.c sha3-permute.c \ @@ -260,7 +260,7 @@ DISTFILES = $(SOURCES) $(HEADERS) getopt.h getopt_int.h \ aes-internal.h block-internal.h blowfish-internal.h camellia-internal.h \ ghash-internal.h gost28147-internal.h poly1305-internal.h \ serpent-internal.h cast128_sboxes.h desinfo.h desCode.h \ - ripemd160-internal.h sha2-internal.h \ + ripemd160-internal.h md-internal.h sha2-internal.h \ memxor-internal.h nettle-internal.h nettle-write.h \ ctr-internal.h chacha-internal.h sha3-internal.h \ salsa20-internal.h umac-internal.h hogweed-internal.h \ diff --git a/arm/fat/sha256-compress-2.asm b/arm/fat/sha256-compress-n-2.asm index 36d55e4b..8834d93d 100644 --- a/arm/fat/sha256-compress-2.asm +++ b/arm/fat/sha256-compress-n-2.asm @@ -1,4 +1,4 @@ -C arm/fat/sha256-compress-2.asm +C arm/fat/sha256-compress-n-2.asm ifelse(` @@ -31,7 +31,7 @@ ifelse(` not, see http://www.gnu.org/licenses/. ') -dnl PROLOGUE(_nettle_sha256_compress) picked up by configure +dnl PROLOGUE(_nettle_sha256_compress_n) picked up by configure define(`fat_transform', `$1_armv6') -include_src(`arm/v6/sha256-compress.asm') +include_src(`arm/v6/sha256-compress-n.asm') diff --git a/arm/v6/sha256-compress.asm b/arm/v6/sha256-compress-n.asm index 3c021284..bf225bd8 100644 --- a/arm/v6/sha256-compress.asm +++ b/arm/v6/sha256-compress-n.asm @@ -1,7 +1,7 @@ -C arm/v6/sha256-compress.asm +C arm/v6/sha256-compress-n.asm ifelse(` - Copyright (C) 2013 Niels Möller + Copyright (C) 2013, 2022 Niels Möller This file is part of GNU Nettle. @@ -30,13 +30,14 @@ ifelse(` not, see http://www.gnu.org/licenses/. ') - .file "sha256-compress.asm" + .file "sha256-compress-n.asm" .arch armv6 define(`STATE', `r0') -define(`INPUT', `r1') -define(`K', `r2') -define(`SA', `r3') +define(`K', `r1') +define(`BLOCKS', `r2') +define(`INPUT', `r3') +define(`SA', `r2') C Overlap BLOCKS define(`SB', `r4') define(`SC', `r5') define(`SD', `r6') @@ -45,12 +46,12 @@ define(`SF', `r8') define(`SG', `r10') define(`SH', `r11') define(`T0', `r12') -define(`T1', `r1') C Overlap INPUT +define(`T1', `r3') C Overlap INPUT define(`COUNT', `r0') C Overlap STATE define(`W', `r14') -C Used for data load -define(`I0', `r3') +C Used for data load. Must not clobber STATE (r0), K (r1) or INPUT (r3) +define(`I0', `r2') define(`I1', `r4') define(`I2', `r5') define(`I3', `r6') @@ -88,7 +89,7 @@ C S1(E) = E<<<26 ^ E<<<21 ^ E<<<7 C S0(A) = A<<<30 ^ A<<<19 ^ A<<<10 C Choice (E, F, G) = G^(E&(F^G)) C Majority (A,B,C) = (A&B) + (C&(A^B)) - + define(`ROUND', ` ror T0, $5, #6 eor T0, T0, $5, ror #11 @@ -117,16 +118,31 @@ define(`NOEXPN', ` ldr W, [sp, + $1] add $1, $1, #4 ') - C void - C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k) - .text .align 2 -PROLOGUE(_nettle_sha256_compress) - push {r4,r5,r6,r7,r8,r10,r11,r14} - sub sp, sp, #68 - str STATE, [sp, #+64] +define(`SHIFT_OFFSET', 64) +define(`INPUT_OFFSET', 68) +define(`I0_OFFSET', 72) +define(`STATE_OFFSET', 76) +define(`K_OFFSET', 80) +define(`BLOCKS_OFFSET', 84) + + C const uint8_t * + C _nettle_sha256_compress_n(uint32_t *state, const uint32_t *k, + C size_t blocks, const uint8_t *input) + +PROLOGUE(_nettle_sha256_compress_n) + cmp BLOCKS, #0 + bne .Lwork + + mov r0, INPUT + bx lr + +.Lwork: + C Also save STATE (r0), K (r1) and BLOCKS (r2) + push {r0,r1,r2,r4,r5,r6,r7,r8,r10,r11,r12,r14} + sub sp, sp, #STATE_OFFSET C Load data up front, since we don't have enough registers C to load and shift on-the-fly @@ -144,6 +160,9 @@ IF_BE(` lsr I1, T0, SHIFT') C because there is no rotate left IF_BE(` rsb SHIFT, SHIFT, #32') + str SHIFT, [sp, #SHIFT_OFFSET] + +.Loop_block: mov DST, sp mov ILEFT, #4 .Lcopy: @@ -164,7 +183,12 @@ IF_LE(` rev I3, I3') stm DST!, {I0,I1,I2,I3} mov I0, I4 bne .Lcopy - + + str INPUT, [sp, #INPUT_OFFSET] + str I0, [sp, #I0_OFFSET] + + C Process block, with input at sp, expanded on the fly + ldm STATE, {SA,SB,SC,SD,SE,SF,SG,SH} mov COUNT,#0 @@ -203,20 +227,40 @@ IF_LE(` rev I3, I3') EXPN(15) ROUND(SB,SC,SD,SE,SF,SG,SH,SA) bne .Loop2 - ldr STATE, [sp, #+64] + ldr STATE, [sp, #STATE_OFFSET] C No longer needed registers - ldm STATE, {r1,r2,r12,r14} - add SA, SA, r1 - add SB, SB, r2 - add SC, SC, r12 - add SD, SD, r14 + ldm STATE, {K, T1, T0, W} + add SA, SA, K + add SB, SB, T1 + add SC, SC, T0 + add SD, SD, W stm STATE!, {SA,SB,SC,SD} - ldm STATE, {r1,r2,r12,r14} - add SE, SE, r1 - add SF, SF, r2 - add SG, SG, r12 - add SH, SH, r14 - stm STATE!, {SE,SF,SG,SH} - add sp, sp, #68 - pop {r4,r5,r6,r7,r8,r10,r11,pc} -EPILOGUE(_nettle_sha256_compress) + ldm STATE, {K, T1, T0, W} + add SE, SE, K + add SF, SF, T1 + add SG, SG, T0 + add SH, SH, W + stm STATE, {SE,SF,SG,SH} + sub STATE, STATE, #16 + + ldr BLOCKS, [sp, #BLOCKS_OFFSET] + subs BLOCKS, BLOCKS, #1 + str BLOCKS, [sp, #BLOCKS_OFFSET] + + ldr SHIFT, [sp, #SHIFT_OFFSET] + ldr K, [sp, #K_OFFSET] + ldr INPUT, [sp, #INPUT_OFFSET] + ldr I0, [sp, #I0_OFFSET] + + bne .Loop_block + + C Restore input pointer adjustment +IF_BE(` rsbs SHIFT, SHIFT, #32') +IF_LE(` cmp SHIFT, #0') + subne INPUT, INPUT, #4 + orr r0, INPUT, SHIFT, lsr #3 + + C Discard saved STATE, K and BLOCKS. + add sp, sp, #STATE_OFFSET + 12 + pop {r4,r5,r6,r7,r8,r10,r11,r12,pc} +EPILOGUE(_nettle_sha256_compress_n) diff --git a/arm64/crypto/sha256-compress.asm b/arm64/crypto/sha256-compress-n.asm index 2bddea05..447dc590 100644 --- a/arm64/crypto/sha256-compress.asm +++ b/arm64/crypto/sha256-compress-n.asm @@ -1,4 +1,4 @@ -C arm64/crypto/sha256-compress.asm +C arm64/crypto/sha256-compress-n.asm ifelse(` Copyright (C) 2021 Mamone Tarsha @@ -37,7 +37,7 @@ C SHA256H2: SHA256 hash update (part 2) C SHA256SU0: SHA256 schedule update 0 C SHA256SU1: SHA256 schedule update 1 -.file "sha256-compress.asm" +.file "sha256-compress-n.asm" .arch armv8-a+crypto .text @@ -45,8 +45,9 @@ C SHA256SU1: SHA256 schedule update 1 C Register usage: define(`STATE', `x0') -define(`INPUT', `x1') -define(`K', `x2') +define(`K', `x1') +define(`BLOCKS', `x2') +define(`INPUT', `x3') define(`MSG0', `v0') define(`MSG1', `v1') @@ -59,19 +60,23 @@ define(`TMP', `v7') define(`STATE0_SAVED', `v16') define(`STATE1_SAVED', `v17') -C void -C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k) +C const uint8_t * +C _nettle_sha256_compress_n(uint32_t *state, const uint32_t *k, +C size_t blocks, const uint8_t *input) + +PROLOGUE(_nettle_sha256_compress_n) + cbz BLOCKS, .Lend -PROLOGUE(_nettle_sha256_compress) C Load state ld1 {STATE0.4s,STATE1.4s},[STATE] +.Loop: C Save state mov STATE0_SAVED.16b,STATE0.16b mov STATE1_SAVED.16b,STATE1.16b C Load message - ld1 {MSG0.16b,MSG1.16b,MSG2.16b,MSG3.16b},[INPUT] + ld1 {MSG0.16b,MSG1.16b,MSG2.16b,MSG3.16b},[INPUT],#64 C Reverse for little endian rev32 MSG0.16b,MSG0.16b @@ -217,9 +222,13 @@ PROLOGUE(_nettle_sha256_compress) C Combine state add STATE0.4s,STATE0.4s,STATE0_SAVED.4s add STATE1.4s,STATE1.4s,STATE1_SAVED.4s - + subs BLOCKS, BLOCKS, #1 + sub K, K, #240 + b.ne .Loop + C Store state st1 {STATE0.4s,STATE1.4s},[STATE] - +.Lend: + mov x0, INPUT ret -EPILOGUE(_nettle_sha256_compress) +EPILOGUE(_nettle_sha256_compress_n) diff --git a/arm64/fat/sha256-compress-2.asm b/arm64/fat/sha256-compress-n-2.asm index 67590794..2f70686e 100644 --- a/arm64/fat/sha256-compress-2.asm +++ b/arm64/fat/sha256-compress-n-2.asm @@ -1,4 +1,4 @@ -C arm64/fat/sha256-compress-2.asm +C arm64/fat/sha256-compress-n-2.asm ifelse(` @@ -31,7 +31,7 @@ ifelse(` not, see http://www.gnu.org/licenses/. ') -dnl PROLOGUE(_nettle_sha256_compress) picked up by configure +dnl PROLOGUE(_nettle_sha256_compress_n) picked up by configure define(`fat_transform', `$1_arm64') -include_src(`arm64/crypto/sha256-compress.asm') +include_src(`arm64/crypto/sha256-compress-n.asm') diff --git a/configure.ac b/configure.ac index 3ebfb175..70eb0873 100644 --- a/configure.ac +++ b/configure.ac @@ -598,7 +598,7 @@ asm_replace_list="aes-encrypt-internal.asm aes-decrypt-internal.asm \ chacha-core-internal.asm \ salsa20-crypt.asm salsa20-core-internal.asm \ serpent-encrypt.asm serpent-decrypt.asm \ - sha1-compress.asm sha256-compress.asm sha512-compress.asm \ + sha1-compress.asm sha256-compress-n.asm sha512-compress.asm \ sha3-permute.asm umac-nh.asm umac-nh-n.asm machine.m4" # Assembler files which generate additional object files if they are used. @@ -615,7 +615,7 @@ asm_nettle_optional_list="cpuid.asm cpu-facility.asm \ poly1305-internal-2.asm \ ghash-set-key-2.asm ghash-update-2.asm \ salsa20-2core.asm salsa20-core-internal-2.asm \ - sha1-compress-2.asm sha256-compress-2.asm \ + sha1-compress-2.asm sha256-compress-n-2.asm \ sha3-permute-2.asm sha512-compress-2.asm \ umac-nh-n-2.asm umac-nh-2.asm" @@ -768,7 +768,7 @@ AH_VERBATIM([HAVE_NATIVE], #undef HAVE_NATIVE_salsa20_2core #undef HAVE_NATIVE_fat_salsa20_2core #undef HAVE_NATIVE_sha1_compress -#undef HAVE_NATIVE_sha256_compress +#undef HAVE_NATIVE_sha256_compress_n #undef HAVE_NATIVE_sha512_compress #undef HAVE_NATIVE_sha3_permute #undef HAVE_NATIVE_umac_nh @@ -153,9 +153,9 @@ DECLARE_FAT_FUNC(nettle_sha1_compress, sha1_compress_func) DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, c) DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, armv6) -DECLARE_FAT_FUNC(_nettle_sha256_compress, sha256_compress_func) -DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, c) -DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, armv6) +DECLARE_FAT_FUNC(_nettle_sha256_compress_n, sha256_compress_n_func) +DECLARE_FAT_FUNC_VAR(sha256_compress_n, sha256_compress_n_func, c) +DECLARE_FAT_FUNC_VAR(sha256_compress_n, sha256_compress_n_func, armv6) DECLARE_FAT_FUNC(_nettle_sha512_compress, sha512_compress_func) DECLARE_FAT_FUNC_VAR(sha512_compress, sha512_compress_func, c) @@ -202,7 +202,7 @@ fat_init (void) _nettle_aes_encrypt_vec = _nettle_aes_encrypt_armv6; _nettle_aes_decrypt_vec = _nettle_aes_decrypt_armv6; nettle_sha1_compress_vec = _nettle_sha1_compress_armv6; - _nettle_sha256_compress_vec = _nettle_sha256_compress_armv6; + _nettle_sha256_compress_n_vec = _nettle_sha256_compress_n_armv6; } else { @@ -211,7 +211,7 @@ fat_init (void) _nettle_aes_encrypt_vec = _nettle_aes_encrypt_arm; _nettle_aes_decrypt_vec = _nettle_aes_decrypt_arm; nettle_sha1_compress_vec = _nettle_sha1_compress_c; - _nettle_sha256_compress_vec = _nettle_sha256_compress_c; + _nettle_sha256_compress_n_vec = _nettle_sha256_compress_n_c; } if (features.have_neon) { @@ -263,9 +263,10 @@ DEFINE_FAT_FUNC(nettle_sha1_compress, void, (uint32_t *state, const uint8_t *input), (state, input)) -DEFINE_FAT_FUNC(_nettle_sha256_compress, void, - (uint32_t *state, const uint8_t *input, const uint32_t *k), - (state, input, k)) +DEFINE_FAT_FUNC(_nettle_sha256_compress_n, const uint8_t *, + (uint32_t *state, const uint32_t *k, + size_t blocks, const uint8_t *input), + (state, k, blocks, input)) DEFINE_FAT_FUNC(_nettle_sha512_compress, void, (uint64_t *state, const uint8_t *input, const uint64_t *k), diff --git a/fat-arm64.c b/fat-arm64.c index f2b8493d..aec99f66 100644 --- a/fat-arm64.c +++ b/fat-arm64.c @@ -178,9 +178,9 @@ DECLARE_FAT_FUNC(nettle_sha1_compress, sha1_compress_func) DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, c) DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, arm64) -DECLARE_FAT_FUNC(_nettle_sha256_compress, sha256_compress_func) -DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, c) -DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, arm64) +DECLARE_FAT_FUNC(_nettle_sha256_compress_n, sha256_compress_n_func) +DECLARE_FAT_FUNC_VAR(sha256_compress_n, sha256_compress_n_func, c) +DECLARE_FAT_FUNC_VAR(sha256_compress_n, sha256_compress_n_func, arm64) static void CONSTRUCTOR fat_init (void) @@ -250,11 +250,11 @@ fat_init (void) { if (verbose) fprintf (stderr, "libnettle: enabling hardware-accelerated sha256 compress code.\n"); - _nettle_sha256_compress_vec = _nettle_sha256_compress_arm64; + _nettle_sha256_compress_n_vec = _nettle_sha256_compress_n_arm64; } else { - _nettle_sha256_compress_vec = _nettle_sha256_compress_c; + _nettle_sha256_compress_n_vec = _nettle_sha256_compress_n_c; } } @@ -297,6 +297,7 @@ DEFINE_FAT_FUNC(nettle_sha1_compress, void, (uint32_t *state, const uint8_t *input), (state, input)) -DEFINE_FAT_FUNC(_nettle_sha256_compress, void, - (uint32_t *state, const uint8_t *input, const uint32_t *k), - (state, input, k)) +DEFINE_FAT_FUNC(_nettle_sha256_compress_n, const uint8_t *, + (uint32_t *state, const uint32_t *k, + size_t blocks, const uint8_t *input), + (state, k, blocks, input)) diff --git a/fat-s390x.c b/fat-s390x.c index fa026018..1bbd8e16 100644 --- a/fat-s390x.c +++ b/fat-s390x.c @@ -254,9 +254,9 @@ DECLARE_FAT_FUNC(nettle_sha1_compress, sha1_compress_func) DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, c) DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, s390x) -DECLARE_FAT_FUNC(_nettle_sha256_compress, sha256_compress_func) -DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, c) -DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, s390x) +DECLARE_FAT_FUNC(_nettle_sha256_compress_n, sha256_compress_n_func) +DECLARE_FAT_FUNC_VAR(sha256_compress_n, sha256_compress_n_func, c) +DECLARE_FAT_FUNC_VAR(sha256_compress_n, sha256_compress_n_func, s390x) DECLARE_FAT_FUNC(_nettle_sha512_compress, sha512_compress_func) DECLARE_FAT_FUNC_VAR(sha512_compress, sha512_compress_func, c) @@ -398,11 +398,11 @@ fat_init (void) { if (verbose) fprintf (stderr, "libnettle: enabling hardware accelerated SHA256 compress code.\n"); - _nettle_sha256_compress_vec = _nettle_sha256_compress_s390x; + _nettle_sha256_compress_n_vec = _nettle_sha256_compress_n_s390x; } else { - _nettle_sha256_compress_vec = _nettle_sha256_compress_c; + _nettle_sha256_compress_n_vec = _nettle_sha256_compress_n_c; } /* SHA512 */ @@ -495,9 +495,10 @@ DEFINE_FAT_FUNC(nettle_sha1_compress, void, (state, input)) /* SHA256 */ -DEFINE_FAT_FUNC(_nettle_sha256_compress, void, - (uint32_t *state, const uint8_t *input, const uint32_t *k), - (state, input, k)) +DEFINE_FAT_FUNC(_nettle_sha256_compress_n, const uint8_t *, + (uint32_t *state, const uint32_t *k, + size_t blocks, const uint8_t *input), + (state, k, blocks, input)) /* SHA512 */ DEFINE_FAT_FUNC(_nettle_sha512_compress, void, diff --git a/fat-setup.h b/fat-setup.h index a35b8b8c..eeec629e 100644 --- a/fat-setup.h +++ b/fat-setup.h @@ -179,7 +179,9 @@ typedef void salsa20_crypt_func (struct salsa20_ctx *ctx, unsigned rounds, const uint8_t *src); typedef void sha1_compress_func(uint32_t *state, const uint8_t *input); -typedef void sha256_compress_func(uint32_t *state, const uint8_t *input, const uint32_t *k); +typedef const uint8_t * +sha256_compress_n_func(uint32_t *state, const uint32_t *k, + size_t blocks, const uint8_t *input); struct sha3_state; typedef void sha3_permute_func (struct sha3_state *state); diff --git a/fat-x86_64.c b/fat-x86_64.c index 47cf78ae..0a2fedf4 100644 --- a/fat-x86_64.c +++ b/fat-x86_64.c @@ -155,9 +155,9 @@ DECLARE_FAT_FUNC(nettle_sha1_compress, sha1_compress_func) DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, x86_64) DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, sha_ni) -DECLARE_FAT_FUNC(_nettle_sha256_compress, sha256_compress_func) -DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, x86_64) -DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, sha_ni) +DECLARE_FAT_FUNC(_nettle_sha256_compress_n, sha256_compress_n_func) +DECLARE_FAT_FUNC_VAR(sha256_compress_n, sha256_compress_n_func, x86_64) +DECLARE_FAT_FUNC_VAR(sha256_compress_n, sha256_compress_n_func, sha_ni) DECLARE_FAT_FUNC(_nettle_ghash_set_key, ghash_set_key_func) DECLARE_FAT_FUNC_VAR(ghash_set_key, ghash_set_key_func, c) @@ -228,14 +228,14 @@ fat_init (void) if (verbose) fprintf (stderr, "libnettle: using sha_ni instructions.\n"); nettle_sha1_compress_vec = _nettle_sha1_compress_sha_ni; - _nettle_sha256_compress_vec = _nettle_sha256_compress_sha_ni; + _nettle_sha256_compress_n_vec = _nettle_sha256_compress_n_sha_ni; } else { if (verbose) fprintf (stderr, "libnettle: not using sha_ni instructions.\n"); nettle_sha1_compress_vec = _nettle_sha1_compress_x86_64; - _nettle_sha256_compress_vec = _nettle_sha256_compress_x86_64; + _nettle_sha256_compress_n_vec = _nettle_sha256_compress_n_x86_64; } if (features.have_pclmul) @@ -315,9 +315,10 @@ DEFINE_FAT_FUNC(nettle_sha1_compress, void, (uint32_t *state, const uint8_t *input), (state, input)) -DEFINE_FAT_FUNC(_nettle_sha256_compress, void, - (uint32_t *state, const uint8_t *input, const uint32_t *k), - (state, input, k)) +DEFINE_FAT_FUNC(_nettle_sha256_compress_n, const uint8_t *, + (uint32_t *state, const uint32_t *k, + size_t blocks, const uint8_t *input), + (state, k, blocks, input)) DEFINE_FAT_FUNC(_nettle_ghash_set_key, void, (struct gcm_key *ctx, const union nettle_block16 *key), diff --git a/md-internal.h b/md-internal.h new file mode 100644 index 00000000..fe520c63 --- /dev/null +++ b/md-internal.h @@ -0,0 +1,57 @@ +/* md-internal.h + + Copyright (C) 2001, 2010, 2022 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +*/ + +#ifndef NETTLE_MD_INTERNAL_H_INCLUDED +#define NETTLE_MD_INTERNAL_H_INCLUDED + +/* Internal helper macros for Merkle-Damgård hash functions. Assumes the context + structs includes the following fields: + + uint8_t block[...]; // Buffer holding one block + unsigned int index; // Index into block +*/ + +#define MD_FILL_OR_RETURN(ctx, length, data) \ + do { \ + unsigned __md_left = sizeof((ctx)->block) - (ctx)->index; \ + if ((length) < __md_left) \ + { \ + memcpy((ctx)->block + (ctx)->index, (data), (length)); \ + (ctx)->index += (length); \ + return; \ + } \ + memcpy((ctx)->block + (ctx)->index, (data), __md_left); \ + (data) += __md_left; \ + (length) -= __md_left; \ + (ctx)->index = 0; \ + } while(0) + +#endif /* NETTLE_MD_INTERNAL_H_INCLUDED */ diff --git a/s390x/fat/sha256-compress-2.asm b/s390x/fat/sha256-compress-n-2.asm index f4b16181..06fb1014 100644 --- a/s390x/fat/sha256-compress-2.asm +++ b/s390x/fat/sha256-compress-n-2.asm @@ -1,4 +1,4 @@ -C s390x/fat/sha256-compress-2.asm +C s390x/fat/sha256-compress-n-2.asm ifelse(` Copyright (C) 2021 Mamone Tarsha @@ -30,7 +30,7 @@ ifelse(` not, see http://www.gnu.org/licenses/. ') -dnl PROLOGUE(_nettle_sha256_compress) picked up by configure +dnl PROLOGUE(_nettle_sha256_compress_n) picked up by configure define(`fat_transform', `$1_s390x') -include_src(`s390x/msa_x1/sha256-compress.asm') +include_src(`s390x/msa_x1/sha256-compress-n.asm') diff --git a/s390x/msa_x1/sha256-compress.asm b/s390x/msa_x1/sha256-compress-n.asm index 9a9511fb..51539927 100644 --- a/s390x/msa_x1/sha256-compress.asm +++ b/s390x/msa_x1/sha256-compress-n.asm @@ -1,7 +1,7 @@ -C s390x/msa_x1/sha256-compress.asm +C s390x/msa_x1/sha256-compress-n.asm ifelse(` - Copyright (C) 2021 Mamone Tarsha + Copyright (C) 2021, 2022 Mamone Tarsha, Niels Möller This file is part of GNU Nettle. GNU Nettle is free software: you can redistribute it and/or @@ -56,25 +56,23 @@ C |----------------------------------------------| C | H7 (4 bytes) | C *----------------------------------------------* -.file "sha256-compress.asm" +.file "sha256-compress-n.asm" .text C SHA function code define(`SHA256_FUNCTION_CODE', `2') -C Size of block -define(`SHA256_BLOCK_SIZE', `64') -C void -C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, -C const uint32_t *k) +C const uint8_t * +C _nettle_sha256_compress_n(uint32_t *state, const uint32_t *k, +C size_t blocks, const uint8_t *input) -PROLOGUE(_nettle_sha256_compress) +PROLOGUE(_nettle_sha256_compress_n) lghi %r0,SHA256_FUNCTION_CODE C SHA-256 Function Code lgr %r1,%r2 - lgr %r4,%r3 - lghi %r5,SHA256_BLOCK_SIZE -1: .long 0xb93e0004 C kimd %r0,%r4. perform KIMD-SHA operation on data + lgr %r2, %r5 + sllg %r3, %r4, 6 C 64 * block size +1: .long 0xb93e0002 C kimd %r0,%r2. perform KIMD-SHA operation on data brc 1,1b br RA -EPILOGUE(_nettle_sha256_compress) +EPILOGUE(_nettle_sha256_compress_n) diff --git a/sha2-internal.h b/sha2-internal.h index 40f25a5f..93080bee 100644 --- a/sha2-internal.h +++ b/sha2-internal.h @@ -39,8 +39,9 @@ /* Internal compression function. STATE points to 8 uint32_t words, DATA points to 64 bytes of input data, possibly unaligned, and K points to the table of constants. */ -void -_nettle_sha256_compress(uint32_t *state, const uint8_t *data, const uint32_t *k); +const uint8_t * +_nettle_sha256_compress_n(uint32_t *state, const uint32_t *k, + size_t blocks, const uint8_t *data); /* Internal compression function. STATE points to 8 uint64_t words, DATA points to 128 bytes of input data, possibly unaligned, and K diff --git a/sha256-compress.c b/sha256-compress-n.c index cf17e3e1..d135d14f 100644 --- a/sha256-compress.c +++ b/sha256-compress-n.c @@ -1,8 +1,8 @@ -/* sha256-compress.c +/* sha256-compress-n.c The compression function of the sha256 hash function. - Copyright (C) 2001, 2010 Niels Möller + Copyright (C) 2001, 2010, 2022 Niels Möller This file is part of GNU Nettle. @@ -118,26 +118,19 @@ } while (0) /* For fat builds */ -#if HAVE_NATIVE_sha256_compress -void -_nettle_sha256_compress_c(uint32_t *state, const uint8_t *input, const uint32_t *k); -#define _nettle_sha256_compress _nettle_sha256_compress_c +#if HAVE_NATIVE_sha256_compress_n +const uint8_t * +_nettle_sha256_compress_n_c(uint32_t *state, const uint32_t *table, + size_t blocks, const uint8_t *input); +#define _nettle_sha256_compress_n _nettle_sha256_compress_n_c #endif -void -_nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k) +const uint8_t * +_nettle_sha256_compress_n(uint32_t *state, const uint32_t *table, + size_t blocks, const uint8_t *input) { - uint32_t data[SHA256_DATA_LENGTH]; uint32_t A, B, C, D, E, F, G, H; /* Local vars */ - unsigned i; - uint32_t *d; - for (i = 0; i < SHA256_DATA_LENGTH; i++, input+= 4) - { - data[i] = READ_UINT32(input); - } - - /* Set up first buffer and local data buffer */ A = state[0]; B = state[1]; C = state[2]; @@ -146,55 +139,68 @@ _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k F = state[5]; G = state[6]; H = state[7]; - - /* Heavy mangling */ - /* First 16 subrounds that act on the original data */ - DEBUG(-1); - for (i = 0, d = data; i<16; i+=8, k += 8, d+= 8) + for (; blocks > 0; blocks--) { - ROUND(A, B, C, D, E, F, G, H, k[0], d[0]); DEBUG(i); - ROUND(H, A, B, C, D, E, F, G, k[1], d[1]); DEBUG(i+1); - ROUND(G, H, A, B, C, D, E, F, k[2], d[2]); - ROUND(F, G, H, A, B, C, D, E, k[3], d[3]); - ROUND(E, F, G, H, A, B, C, D, k[4], d[4]); - ROUND(D, E, F, G, H, A, B, C, k[5], d[5]); - ROUND(C, D, E, F, G, H, A, B, k[6], d[6]); DEBUG(i+6); - ROUND(B, C, D, E, F, G, H, A, k[7], d[7]); DEBUG(i+7); - } + uint32_t data[SHA256_DATA_LENGTH]; + unsigned i; + const uint32_t *k; + uint32_t *d; + for (i = 0; i < SHA256_DATA_LENGTH; i++, input+= 4) + { + data[i] = READ_UINT32(input); + } + + /* Heavy mangling */ + /* First 16 subrounds that act on the original data */ + + DEBUG(-1); + for (i = 0, d = data, k = table; i<16; i+=8, k += 8, d+= 8) + { + ROUND(A, B, C, D, E, F, G, H, k[0], d[0]); DEBUG(i); + ROUND(H, A, B, C, D, E, F, G, k[1], d[1]); DEBUG(i+1); + ROUND(G, H, A, B, C, D, E, F, k[2], d[2]); + ROUND(F, G, H, A, B, C, D, E, k[3], d[3]); + ROUND(E, F, G, H, A, B, C, D, k[4], d[4]); + ROUND(D, E, F, G, H, A, B, C, k[5], d[5]); + ROUND(C, D, E, F, G, H, A, B, k[6], d[6]); DEBUG(i+6); + ROUND(B, C, D, E, F, G, H, A, k[7], d[7]); DEBUG(i+7); + } - for (; i<64; i += 16, k+= 16) - { - ROUND(A, B, C, D, E, F, G, H, k[ 0], EXPAND(data, 0)); DEBUG(i); - ROUND(H, A, B, C, D, E, F, G, k[ 1], EXPAND(data, 1)); DEBUG(i+1); - ROUND(G, H, A, B, C, D, E, F, k[ 2], EXPAND(data, 2)); DEBUG(i+2); - ROUND(F, G, H, A, B, C, D, E, k[ 3], EXPAND(data, 3)); DEBUG(i+3); - ROUND(E, F, G, H, A, B, C, D, k[ 4], EXPAND(data, 4)); DEBUG(i+4); - ROUND(D, E, F, G, H, A, B, C, k[ 5], EXPAND(data, 5)); DEBUG(i+5); - ROUND(C, D, E, F, G, H, A, B, k[ 6], EXPAND(data, 6)); DEBUG(i+6); - ROUND(B, C, D, E, F, G, H, A, k[ 7], EXPAND(data, 7)); DEBUG(i+7); - ROUND(A, B, C, D, E, F, G, H, k[ 8], EXPAND(data, 8)); DEBUG(i+8); - ROUND(H, A, B, C, D, E, F, G, k[ 9], EXPAND(data, 9)); DEBUG(i+9); - ROUND(G, H, A, B, C, D, E, F, k[10], EXPAND(data, 10)); DEBUG(i+10); - ROUND(F, G, H, A, B, C, D, E, k[11], EXPAND(data, 11)); DEBUG(i+11); - ROUND(E, F, G, H, A, B, C, D, k[12], EXPAND(data, 12)); DEBUG(i+12); - ROUND(D, E, F, G, H, A, B, C, k[13], EXPAND(data, 13)); DEBUG(i+13); - ROUND(C, D, E, F, G, H, A, B, k[14], EXPAND(data, 14)); DEBUG(i+14); - ROUND(B, C, D, E, F, G, H, A, k[15], EXPAND(data, 15)); DEBUG(i+15); - } - - /* Update state */ - state[0] += A; - state[1] += B; - state[2] += C; - state[3] += D; - state[4] += E; - state[5] += F; - state[6] += G; - state[7] += H; + for (; i<64; i += 16, k+= 16) + { + ROUND(A, B, C, D, E, F, G, H, k[ 0], EXPAND(data, 0)); DEBUG(i); + ROUND(H, A, B, C, D, E, F, G, k[ 1], EXPAND(data, 1)); DEBUG(i+1); + ROUND(G, H, A, B, C, D, E, F, k[ 2], EXPAND(data, 2)); DEBUG(i+2); + ROUND(F, G, H, A, B, C, D, E, k[ 3], EXPAND(data, 3)); DEBUG(i+3); + ROUND(E, F, G, H, A, B, C, D, k[ 4], EXPAND(data, 4)); DEBUG(i+4); + ROUND(D, E, F, G, H, A, B, C, k[ 5], EXPAND(data, 5)); DEBUG(i+5); + ROUND(C, D, E, F, G, H, A, B, k[ 6], EXPAND(data, 6)); DEBUG(i+6); + ROUND(B, C, D, E, F, G, H, A, k[ 7], EXPAND(data, 7)); DEBUG(i+7); + ROUND(A, B, C, D, E, F, G, H, k[ 8], EXPAND(data, 8)); DEBUG(i+8); + ROUND(H, A, B, C, D, E, F, G, k[ 9], EXPAND(data, 9)); DEBUG(i+9); + ROUND(G, H, A, B, C, D, E, F, k[10], EXPAND(data, 10)); DEBUG(i+10); + ROUND(F, G, H, A, B, C, D, E, k[11], EXPAND(data, 11)); DEBUG(i+11); + ROUND(E, F, G, H, A, B, C, D, k[12], EXPAND(data, 12)); DEBUG(i+12); + ROUND(D, E, F, G, H, A, B, C, k[13], EXPAND(data, 13)); DEBUG(i+13); + ROUND(C, D, E, F, G, H, A, B, k[14], EXPAND(data, 14)); DEBUG(i+14); + ROUND(B, C, D, E, F, G, H, A, k[15], EXPAND(data, 15)); DEBUG(i+15); + } + + /* Update state */ + state[0] = A = state[0] + A; + state[1] = B = state[1] + B; + state[2] = C = state[2] + C; + state[3] = D = state[3] + D; + state[4] = E = state[4] + E; + state[5] = F = state[5] + F; + state[6] = G = state[6] + G; + state[7] = H = state[7] + H; #if SHA256_DEBUG - fprintf(stderr, "99: %8x %8x %8x %8x %8x %8x %8x %8x\n", - state[0], state[1], state[2], state[3], - state[4], state[5], state[6], state[7]); + fprintf(stderr, "99: %8x %8x %8x %8x %8x %8x %8x %8x\n", + state[0], state[1], state[2], state[3], + state[4], state[5], state[6], state[7]); #endif + } + return input; } @@ -46,6 +46,7 @@ #include "sha2-internal.h" #include "macros.h" +#include "md-internal.h" #include "nettle-write.h" /* Generated by the shadata program. */ @@ -70,6 +71,12 @@ K[64] = 0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL, }; +void +sha256_compress(uint32_t *state, const uint8_t *input) +{ + _nettle_sha256_compress_n(state, K, 1, input); +} + #define COMPRESS(ctx, data) (sha256_compress((ctx)->state, (data))) /* Initialize the SHA values */ @@ -97,7 +104,22 @@ void sha256_update(struct sha256_ctx *ctx, size_t length, const uint8_t *data) { - MD_UPDATE (ctx, length, data, COMPRESS, ctx->count++); + size_t blocks; + if (ctx->index > 0) + { + /* Try to fill partial block */ + MD_FILL_OR_RETURN (ctx, length, data); + sha256_compress (ctx->state, ctx->block); + ctx->count++; + } + + blocks = length >> 6; + data = _nettle_sha256_compress_n (ctx->state, K, blocks, data); + ctx->count += blocks; + length &= 63; + + memcpy (ctx->block, data, length); + ctx->index = length; } static void @@ -161,9 +183,3 @@ sha224_digest(struct sha256_ctx *ctx, sha256_write_digest(ctx, length, digest); sha224_init(ctx); } - -void -sha256_compress(uint32_t *state, const uint8_t *input) -{ - _nettle_sha256_compress(state, input, K); -} diff --git a/x86_64/fat/sha256-compress-2.asm b/x86_64/fat/sha256-compress-n-2.asm index 996cf8c5..60f7c8f6 100644 --- a/x86_64/fat/sha256-compress-2.asm +++ b/x86_64/fat/sha256-compress-n-2.asm @@ -1,4 +1,4 @@ -C x86_64/fat/sha256-compress-2.asm +C x86_64/fat/sha256-compress-n-2.asm ifelse(` Copyright (C) 2018 Niels Möller @@ -31,4 +31,4 @@ ifelse(` ') define(`fat_transform', `$1_sha_ni') -include_src(`x86_64/sha_ni/sha256-compress.asm') +include_src(`x86_64/sha_ni/sha256-compress-n.asm') diff --git a/x86_64/fat/sha256-compress.asm b/x86_64/fat/sha256-compress-n.asm index 2aaeb5e8..fc358858 100644 --- a/x86_64/fat/sha256-compress.asm +++ b/x86_64/fat/sha256-compress-n.asm @@ -1,4 +1,4 @@ -C x86_64/fat/sha256-compress.asm +C x86_64/fat/sha256-compress-n.asm ifelse(` Copyright (C) 2018 Niels Möller @@ -31,4 +31,4 @@ ifelse(` ') define(`fat_transform', `$1_x86_64') -include_src(`x86_64/sha256-compress.asm') +include_src(`x86_64/sha256-compress-n.asm') diff --git a/x86_64/sha256-compress.asm b/x86_64/sha256-compress-n.asm index 5ed669b1..e10d260c 100644 --- a/x86_64/sha256-compress.asm +++ b/x86_64/sha256-compress-n.asm @@ -1,7 +1,7 @@ -C x86_64/sha256-compress.asm +C x86_64/sha256-compress-n.asm ifelse(` - Copyright (C) 2013 Niels Möller + Copyright (C) 2013, 2022 Niels Möller This file is part of GNU Nettle. @@ -30,21 +30,24 @@ ifelse(` not, see http://www.gnu.org/licenses/. ') - .file "sha256-compress.asm" + .file "sha256-compress-n.asm" define(`STATE', `%rdi') -define(`INPUT', `%rsi') -define(`K', `%rdx') +define(`K', `%rsi') +define(`BLOCKS', `%rdx') +define(`INPUT', `%rcx') +define(`STATE_SAVED', `64(%rsp)') + define(`SA', `%eax') define(`SB', `%ebx') -define(`SC', `%ecx') +define(`SC', `%ebp') define(`SD', `%r8d') define(`SE', `%r9d') define(`SF', `%r10d') define(`SG', `%r11d') define(`SH', `%r12d') define(`T0', `%r13d') -define(`T1', `%edi') C Overlap STATE -define(`COUNT', `%r14') +define(`T1', `%r14d') +define(`COUNT', `%rdi') C Overlap STATE define(`W', `%r15d') define(`EXPN', ` @@ -123,18 +126,21 @@ define(`NOEXPN', ` movl W, OFFSET($1)(%rsp, COUNT, 4) ') - C void - C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k) + C const uint8_t * + C _nettle_sha256_compress_n(uint32_t *state, const uint32_t *k, + C size_t blocks, const uint8_t *input) .text ALIGN(16) -PROLOGUE(_nettle_sha256_compress) +PROLOGUE(_nettle_sha256_compress_n) W64_ENTRY(3, 0) + test BLOCKS, BLOCKS + jz .Lend sub $120, %rsp - mov %rbx, 64(%rsp) - mov STATE, 72(%rsp) C Save state, to free a register + mov STATE, STATE_SAVED C Save state, to free a register + mov %rbx, 72(%rsp) mov %rbp, 80(%rsp) mov %r12, 88(%rsp) mov %r13, 96(%rsp) @@ -149,7 +155,9 @@ PROLOGUE(_nettle_sha256_compress) movl 20(STATE), SF movl 24(STATE), SG movl 28(STATE), SH - xor COUNT, COUNT + +.Loop_block: + xorl XREG(COUNT), XREG(COUNT) ALIGN(16) .Loop1: @@ -161,8 +169,8 @@ PROLOGUE(_nettle_sha256_compress) NOEXPN(5) ROUND(SD,SE,SF,SG,SH,SA,SB,SC,5) NOEXPN(6) ROUND(SC,SD,SE,SF,SG,SH,SA,SB,6) NOEXPN(7) ROUND(SB,SC,SD,SE,SF,SG,SH,SA,7) - add $8, COUNT - cmp $16, COUNT + addl $8, XREG(COUNT) + cmpl $16, XREG(COUNT) jne .Loop1 .Loop2: @@ -182,22 +190,35 @@ PROLOGUE(_nettle_sha256_compress) EXPN(13) ROUND(SD,SE,SF,SG,SH,SA,SB,SC,13) EXPN(14) ROUND(SC,SD,SE,SF,SG,SH,SA,SB,14) EXPN(15) ROUND(SB,SC,SD,SE,SF,SG,SH,SA,15) - add $16, COUNT - cmp $64, COUNT + addl $16, XREG(COUNT) + cmpl $64, XREG(COUNT) jne .Loop2 - mov 72(%rsp), STATE - - addl SA, (STATE) - addl SB, 4(STATE) - addl SC, 8(STATE) - addl SD, 12(STATE) - addl SE, 16(STATE) - addl SF, 20(STATE) - addl SG, 24(STATE) - addl SH, 28(STATE) - - mov 64(%rsp), %rbx + mov STATE_SAVED, STATE + + addl (STATE), SA + addl 4(STATE), SB + addl 8(STATE), SC + addl 12(STATE), SD + addl 16(STATE), SE + addl 20(STATE), SF + addl 24(STATE), SG + addl 28(STATE), SH + + movl SA, (STATE) + movl SB, 4(STATE) + movl SC, 8(STATE) + movl SD, 12(STATE) + movl SE, 16(STATE) + movl SF, 20(STATE) + movl SG, 24(STATE) + movl SH, 28(STATE) + + add $64, INPUT + dec BLOCKS + jnz .Loop_block + + mov 72(%rsp), %rbx mov 80(%rsp), %rbp mov 88(%rsp), %r12 mov 96(%rsp), %r13 @@ -205,6 +226,8 @@ PROLOGUE(_nettle_sha256_compress) mov 112(%rsp),%r15 add $120, %rsp +.Lend: + mov INPUT, %rax W64_EXIT(3, 0) ret -EPILOGUE(_nettle_sha256_compress) +EPILOGUE(_nettle_sha256_compress_n) diff --git a/x86_64/sha_ni/sha256-compress.asm b/x86_64/sha_ni/sha256-compress-n.asm index 00bd3cd3..005909df 100644 --- a/x86_64/sha_ni/sha256-compress.asm +++ b/x86_64/sha_ni/sha256-compress-n.asm @@ -1,7 +1,7 @@ -C x86_64/sha_ni/sha256-compress.asm +C x86_64/sha_ni/sha256-compress-n.asm ifelse(` - Copyright (C) 2018 Niels Möller + Copyright (C) 2018, 2022 Niels Möller This file is part of GNU Nettle. @@ -30,10 +30,11 @@ ifelse(` not, see http://www.gnu.org/licenses/. ') - .file "sha256-compress.asm" + .file "sha256-compress-n.asm" define(`STATE', `%rdi') -define(`INPUT', `%rsi') -define(`K', `%rdx') +define(`K', `%rsi') +define(`BLOCKS', `%rdx') +define(`INPUT', `%rcx') define(`MSGK',`%xmm0') C Implicit operand of sha256rnds2 define(`MSG0',`%xmm1') @@ -45,7 +46,7 @@ define(`CDGH',`%xmm6') define(`ABEF_ORIG',`%xmm7') define(`CDGH_ORIG', `%xmm8') define(`SWAP_MASK',`%xmm9') -define(`TMP', `%xmm9') C Overlaps SWAP_MASK +define(`TMP', `%xmm10') C QROUND(M0, M1, M2, M3, R) define(`QROUND', ` @@ -69,15 +70,19 @@ define(`TRANSPOSE', ` punpcklqdq $1, $3 ') - C void - C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k) + C const uint8_t * + C _nettle_sha256_compress_n(uint32_t *state, const uint32_t *k, + C size_t blocks, const uint8_t *input) .text ALIGN(16) .Lswap_mask: .byte 3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12 -PROLOGUE(_nettle_sha256_compress) - W64_ENTRY(3, 10) +PROLOGUE(_nettle_sha256_compress_n) + W64_ENTRY(4, 11) + test BLOCKS, BLOCKS + jz .Lend + movups (STATE), TMP movups 16(STATE), ABEF @@ -88,12 +93,13 @@ PROLOGUE(_nettle_sha256_compress) movdqa .Lswap_mask(%rip), SWAP_MASK - movdqa ABEF, ABEF_ORIG - movdqa CDGH, CDGH_ORIG - +.Loop: movups (INPUT), MSG0 pshufb SWAP_MASK, MSG0 + movdqa ABEF, ABEF_ORIG + movdqa CDGH, CDGH_ORIG + movdqa (K), MSGK paddd MSG0, MSGK sha256rnds2 ABEF, CDGH C Round 0-1 @@ -163,6 +169,10 @@ PROLOGUE(_nettle_sha256_compress) paddd ABEF_ORIG, ABEF paddd CDGH_ORIG, CDGH + add $64, INPUT + dec BLOCKS + jnz .Loop + TRANSPOSE(ABEF, CDGH, TMP) pshufd $0x1b, CDGH, CDGH @@ -170,6 +180,8 @@ PROLOGUE(_nettle_sha256_compress) movups CDGH, 0(STATE) movups TMP, 16(STATE) - W64_EXIT(3, 10) +.Lend: + mov INPUT, %rax + W64_EXIT(4, 11) ret -EPILOGUE(_nettle_sha256_compress) +EPILOGUE(_nettle_sha256_compress_n) |