summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2022-07-26 21:24:57 +0200
committerNiels Möller <nisse@lysator.liu.se>2022-07-26 21:24:57 +0200
commit840383935c7331ddc451f05db802137b96e5c564 (patch)
treef6f9e02f10a10e0c18afe98f61ab0e0c6582ae49
parentdba9cacc4757df033034978e1959a1564345ae07 (diff)
downloadnettle-840383935c7331ddc451f05db802137b96e5c564.tar.gz
arm: Implement sha256_compress_n
-rw-r--r--ChangeLog8
-rw-r--r--arm/fat/sha256-compress-n-2.asm (renamed from arm/fat/sha256-compress-2.asm)6
-rw-r--r--arm/v6/sha256-compress-n.asm (renamed from arm/v6/sha256-compress.asm)110
-rw-r--r--fat-arm.c17
4 files changed, 97 insertions, 44 deletions
diff --git a/ChangeLog b/ChangeLog
index a2cdefa0..bd97421e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2022-07-26 Niels Möller <nisse@lysator.liu.se>
+
+ * arm/v6/sha256-compress-n.asm: New file. replacing...
+ * arm/v6/sha256-compress.asm: ...deleted file.
+ * arm/fat/sha256-compress-n-2.asm: New file. replacing...
+ * arm/fat/sha256-compress-2.asm: ...deleted file.
+ * fat-arm.c: Update fat setup.
+
2022-07-11 Niels Möller <nisse@lysator.liu.se>
* arm64/crypto/sha256-compress-n.asm: New file. replacing...
diff --git a/arm/fat/sha256-compress-2.asm b/arm/fat/sha256-compress-n-2.asm
index 36d55e4b..8834d93d 100644
--- a/arm/fat/sha256-compress-2.asm
+++ b/arm/fat/sha256-compress-n-2.asm
@@ -1,4 +1,4 @@
-C arm/fat/sha256-compress-2.asm
+C arm/fat/sha256-compress-n-2.asm
ifelse(`
@@ -31,7 +31,7 @@ ifelse(`
not, see http://www.gnu.org/licenses/.
')
-dnl PROLOGUE(_nettle_sha256_compress) picked up by configure
+dnl PROLOGUE(_nettle_sha256_compress_n) picked up by configure
define(`fat_transform', `$1_armv6')
-include_src(`arm/v6/sha256-compress.asm')
+include_src(`arm/v6/sha256-compress-n.asm')
diff --git a/arm/v6/sha256-compress.asm b/arm/v6/sha256-compress-n.asm
index 3c021284..bf225bd8 100644
--- a/arm/v6/sha256-compress.asm
+++ b/arm/v6/sha256-compress-n.asm
@@ -1,7 +1,7 @@
-C arm/v6/sha256-compress.asm
+C arm/v6/sha256-compress-n.asm
ifelse(`
- Copyright (C) 2013 Niels Möller
+ Copyright (C) 2013, 2022 Niels Möller
This file is part of GNU Nettle.
@@ -30,13 +30,14 @@ ifelse(`
not, see http://www.gnu.org/licenses/.
')
- .file "sha256-compress.asm"
+ .file "sha256-compress-n.asm"
.arch armv6
define(`STATE', `r0')
-define(`INPUT', `r1')
-define(`K', `r2')
-define(`SA', `r3')
+define(`K', `r1')
+define(`BLOCKS', `r2')
+define(`INPUT', `r3')
+define(`SA', `r2') C Overlap BLOCKS
define(`SB', `r4')
define(`SC', `r5')
define(`SD', `r6')
@@ -45,12 +46,12 @@ define(`SF', `r8')
define(`SG', `r10')
define(`SH', `r11')
define(`T0', `r12')
-define(`T1', `r1') C Overlap INPUT
+define(`T1', `r3') C Overlap INPUT
define(`COUNT', `r0') C Overlap STATE
define(`W', `r14')
-C Used for data load
-define(`I0', `r3')
+C Used for data load. Must not clobber STATE (r0), K (r1) or INPUT (r3)
+define(`I0', `r2')
define(`I1', `r4')
define(`I2', `r5')
define(`I3', `r6')
@@ -88,7 +89,7 @@ C S1(E) = E<<<26 ^ E<<<21 ^ E<<<7
C S0(A) = A<<<30 ^ A<<<19 ^ A<<<10
C Choice (E, F, G) = G^(E&(F^G))
C Majority (A,B,C) = (A&B) + (C&(A^B))
-
+
define(`ROUND', `
ror T0, $5, #6
eor T0, T0, $5, ror #11
@@ -117,16 +118,31 @@ define(`NOEXPN', `
ldr W, [sp, + $1]
add $1, $1, #4
')
- C void
- C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k)
-
.text
.align 2
-PROLOGUE(_nettle_sha256_compress)
- push {r4,r5,r6,r7,r8,r10,r11,r14}
- sub sp, sp, #68
- str STATE, [sp, #+64]
+define(`SHIFT_OFFSET', 64)
+define(`INPUT_OFFSET', 68)
+define(`I0_OFFSET', 72)
+define(`STATE_OFFSET', 76)
+define(`K_OFFSET', 80)
+define(`BLOCKS_OFFSET', 84)
+
+ C const uint8_t *
+ C _nettle_sha256_compress_n(uint32_t *state, const uint32_t *k,
+ C size_t blocks, const uint8_t *input)
+
+PROLOGUE(_nettle_sha256_compress_n)
+ cmp BLOCKS, #0
+ bne .Lwork
+
+ mov r0, INPUT
+ bx lr
+
+.Lwork:
+ C Also save STATE (r0), K (r1) and BLOCKS (r2)
+ push {r0,r1,r2,r4,r5,r6,r7,r8,r10,r11,r12,r14}
+ sub sp, sp, #STATE_OFFSET
C Load data up front, since we don't have enough registers
C to load and shift on-the-fly
@@ -144,6 +160,9 @@ IF_BE(` lsr I1, T0, SHIFT')
C because there is no rotate left
IF_BE(` rsb SHIFT, SHIFT, #32')
+ str SHIFT, [sp, #SHIFT_OFFSET]
+
+.Loop_block:
mov DST, sp
mov ILEFT, #4
.Lcopy:
@@ -164,7 +183,12 @@ IF_LE(` rev I3, I3')
stm DST!, {I0,I1,I2,I3}
mov I0, I4
bne .Lcopy
-
+
+ str INPUT, [sp, #INPUT_OFFSET]
+ str I0, [sp, #I0_OFFSET]
+
+ C Process block, with input at sp, expanded on the fly
+
ldm STATE, {SA,SB,SC,SD,SE,SF,SG,SH}
mov COUNT,#0
@@ -203,20 +227,40 @@ IF_LE(` rev I3, I3')
EXPN(15) ROUND(SB,SC,SD,SE,SF,SG,SH,SA)
bne .Loop2
- ldr STATE, [sp, #+64]
+ ldr STATE, [sp, #STATE_OFFSET]
C No longer needed registers
- ldm STATE, {r1,r2,r12,r14}
- add SA, SA, r1
- add SB, SB, r2
- add SC, SC, r12
- add SD, SD, r14
+ ldm STATE, {K, T1, T0, W}
+ add SA, SA, K
+ add SB, SB, T1
+ add SC, SC, T0
+ add SD, SD, W
stm STATE!, {SA,SB,SC,SD}
- ldm STATE, {r1,r2,r12,r14}
- add SE, SE, r1
- add SF, SF, r2
- add SG, SG, r12
- add SH, SH, r14
- stm STATE!, {SE,SF,SG,SH}
- add sp, sp, #68
- pop {r4,r5,r6,r7,r8,r10,r11,pc}
-EPILOGUE(_nettle_sha256_compress)
+ ldm STATE, {K, T1, T0, W}
+ add SE, SE, K
+ add SF, SF, T1
+ add SG, SG, T0
+ add SH, SH, W
+ stm STATE, {SE,SF,SG,SH}
+ sub STATE, STATE, #16
+
+ ldr BLOCKS, [sp, #BLOCKS_OFFSET]
+ subs BLOCKS, BLOCKS, #1
+ str BLOCKS, [sp, #BLOCKS_OFFSET]
+
+ ldr SHIFT, [sp, #SHIFT_OFFSET]
+ ldr K, [sp, #K_OFFSET]
+ ldr INPUT, [sp, #INPUT_OFFSET]
+ ldr I0, [sp, #I0_OFFSET]
+
+ bne .Loop_block
+
+ C Restore input pointer adjustment
+IF_BE(` rsbs SHIFT, SHIFT, #32')
+IF_LE(` cmp SHIFT, #0')
+ subne INPUT, INPUT, #4
+ orr r0, INPUT, SHIFT, lsr #3
+
+ C Discard saved STATE, K and BLOCKS.
+ add sp, sp, #STATE_OFFSET + 12
+ pop {r4,r5,r6,r7,r8,r10,r11,r12,pc}
+EPILOGUE(_nettle_sha256_compress_n)
diff --git a/fat-arm.c b/fat-arm.c
index 56647404..8133ca69 100644
--- a/fat-arm.c
+++ b/fat-arm.c
@@ -153,9 +153,9 @@ DECLARE_FAT_FUNC(nettle_sha1_compress, sha1_compress_func)
DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, c)
DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, armv6)
-DECLARE_FAT_FUNC(_nettle_sha256_compress, sha256_compress_func)
-DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, c)
-DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, armv6)
+DECLARE_FAT_FUNC(_nettle_sha256_compress_n, sha256_compress_n_func)
+DECLARE_FAT_FUNC_VAR(sha256_compress_n, sha256_compress_n_func, c)
+DECLARE_FAT_FUNC_VAR(sha256_compress_n, sha256_compress_n_func, armv6)
DECLARE_FAT_FUNC(_nettle_sha512_compress, sha512_compress_func)
DECLARE_FAT_FUNC_VAR(sha512_compress, sha512_compress_func, c)
@@ -202,7 +202,7 @@ fat_init (void)
_nettle_aes_encrypt_vec = _nettle_aes_encrypt_armv6;
_nettle_aes_decrypt_vec = _nettle_aes_decrypt_armv6;
nettle_sha1_compress_vec = _nettle_sha1_compress_armv6;
- _nettle_sha256_compress_vec = _nettle_sha256_compress_armv6;
+ _nettle_sha256_compress_n_vec = _nettle_sha256_compress_n_armv6;
}
else
{
@@ -211,7 +211,7 @@ fat_init (void)
_nettle_aes_encrypt_vec = _nettle_aes_encrypt_arm;
_nettle_aes_decrypt_vec = _nettle_aes_decrypt_arm;
nettle_sha1_compress_vec = _nettle_sha1_compress_c;
- _nettle_sha256_compress_vec = _nettle_sha256_compress_c;
+ _nettle_sha256_compress_n_vec = _nettle_sha256_compress_n_c;
}
if (features.have_neon)
{
@@ -263,9 +263,10 @@ DEFINE_FAT_FUNC(nettle_sha1_compress, void,
(uint32_t *state, const uint8_t *input),
(state, input))
-DEFINE_FAT_FUNC(_nettle_sha256_compress, void,
- (uint32_t *state, const uint8_t *input, const uint32_t *k),
- (state, input, k))
+DEFINE_FAT_FUNC(_nettle_sha256_compress_n, const uint8_t *,
+ (uint32_t *state, const uint32_t *k,
+ size_t blocks, const uint8_t *input),
+ (state, k, blocks, input))
DEFINE_FAT_FUNC(_nettle_sha512_compress, void,
(uint64_t *state, const uint8_t *input, const uint64_t *k),