summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2018-03-14 19:21:33 +0100
committerNiels Möller <nisse@lysator.liu.se>2018-03-14 19:21:33 +0100
commit34224fa0576c4eae107ec37bf679a687692d776f (patch)
tree17cccf10f491e6821d4e69512d17e69ad1237526
parentbe5b2eda8461cbe2f586b82fc04a245e4d18da99 (diff)
parentc02493e53384bc5c6d6d07581524c81d181e1fdd (diff)
downloadnettle-34224fa0576c4eae107ec37bf679a687692d776f.tar.gz
Merge branch 'x86_64-sha_ni-sha256'
-rw-r--r--ChangeLog17
-rw-r--r--fat-x86_64.c10
-rw-r--r--x86_64/fat/sha256-compress-2.asm34
-rw-r--r--x86_64/fat/sha256-compress.asm34
-rw-r--r--x86_64/sha_ni/sha256-compress.asm175
5 files changed, 270 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index efe70d63..829ad0ab 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,20 @@
+2018-03-14 Niels Möller <nisse@lysator.liu.se>
+
+ Merge sha256 code using the x86_64 sha_ni instructions, starting
+ 2018-02-21.
+
+2018-03-11 Niels Möller <nisse@lysator.liu.se>
+
+ * x86_64/fat/sha256-compress.asm: New file.
+ * x86_64/fat/sha256-compress-2.asm: New file.
+ * fat-x86_64.c (fat_init): Select plain x86_64 assembly version or
+ sha_ni version for sha256_compress.
+
+2018-02-21 Niels Möller <nisse@lysator.liu.se>
+
+ * x86_64/sha_ni/sha256-compress.asm: New implementation using sha_ni
+ instructions.
+
2018-02-20 Niels Möller <nisse@lysator.liu.se>
* testsuite/cmac-test.c (test_cmac_hash): Deallocate ctx properly.
diff --git a/fat-x86_64.c b/fat-x86_64.c
index bf7d31cf..b66d0861 100644
--- a/fat-x86_64.c
+++ b/fat-x86_64.c
@@ -127,6 +127,10 @@ DECLARE_FAT_FUNC(_nettle_sha1_compress, sha1_compress_func)
DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, x86_64)
DECLARE_FAT_FUNC_VAR(sha1_compress, sha1_compress_func, sha_ni)
+DECLARE_FAT_FUNC(_nettle_sha256_compress, sha256_compress_func)
+DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, x86_64)
+DECLARE_FAT_FUNC_VAR(sha256_compress, sha256_compress_func, sha_ni)
+
/* This function should usually be called only once, at startup. But
it is idempotent, and on x86, pointer updates are atomic, so
there's no danger if it is called simultaneously from multiple
@@ -172,12 +176,14 @@ fat_init (void)
if (verbose)
fprintf (stderr, "libnettle: using sha_ni instructions.\n");
_nettle_sha1_compress_vec = _nettle_sha1_compress_sha_ni;
+ _nettle_sha256_compress_vec = _nettle_sha256_compress_sha_ni;
}
else
{
if (verbose)
fprintf (stderr, "libnettle: not using sha_ni instructions.\n");
_nettle_sha1_compress_vec = _nettle_sha1_compress_x86_64;
+ _nettle_sha256_compress_vec = _nettle_sha256_compress_x86_64;
}
if (features.vendor == X86_INTEL)
{
@@ -214,3 +220,7 @@ DEFINE_FAT_FUNC(nettle_memxor, void *,
DEFINE_FAT_FUNC(_nettle_sha1_compress, void,
(uint32_t *state, const uint8_t *input),
(state, input))
+
+DEFINE_FAT_FUNC(_nettle_sha256_compress, void,
+ (uint32_t *state, const uint8_t *input, const uint32_t *k),
+ (state, input, k))
diff --git a/x86_64/fat/sha256-compress-2.asm b/x86_64/fat/sha256-compress-2.asm
new file mode 100644
index 00000000..50e23d49
--- /dev/null
+++ b/x86_64/fat/sha256-compress-2.asm
@@ -0,0 +1,34 @@
+C x86_64/fat/sha256-compress-2.asm
+
+ifelse(<
+ Copyright (C) 2018 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+>)
+
+define(<fat_transform>, <$1_sha_ni>)
+include_src(<x86_64/sha_ni/sha256-compress.asm>)
diff --git a/x86_64/fat/sha256-compress.asm b/x86_64/fat/sha256-compress.asm
new file mode 100644
index 00000000..ac300cf1
--- /dev/null
+++ b/x86_64/fat/sha256-compress.asm
@@ -0,0 +1,34 @@
+C x86_64/fat/sha256-compress.asm
+
+ifelse(<
+ Copyright (C) 2018 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+>)
+
+define(<fat_transform>, <$1_x86_64>)
+include_src(<x86_64/sha256-compress.asm>)
diff --git a/x86_64/sha_ni/sha256-compress.asm b/x86_64/sha_ni/sha256-compress.asm
new file mode 100644
index 00000000..f2a4bd32
--- /dev/null
+++ b/x86_64/sha_ni/sha256-compress.asm
@@ -0,0 +1,175 @@
+C x86_64/sha_ni/sha256-compress.asm
+
+ifelse(<
+ Copyright (C) 2018 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+>)
+
+ .file "sha256-compress.asm"
+define(<STATE>, <%rdi>)
+define(<INPUT>, <%rsi>)
+define(<K>, <%rdx>)
+
+define(<MSGK>,<%xmm0>) C Implicit operand of sha256rnds2
+define(<MSG0>,<%xmm1>)
+define(<MSG1>,<%xmm2>)
+define(<MSG2>,<%xmm3>)
+define(<MSG3>,<%xmm4>)
+define(<ABEF>,<%xmm5>)
+define(<CDGH>,<%xmm6>)
+define(<ABEF_ORIG>,<%xmm7>)
+define(<CDGH_ORIG>, <%xmm8>)
+define(<SWAP_MASK>,<%xmm9>)
+define(<TMP>, <%xmm9>) C Overlaps SWAP_MASK
+
+C QROUND(M0, M1, M2, M3, R)
+define(<QROUND>, <
+ movdqa eval($5*4)(K), MSGK
+ paddd $1, MSGK
+ sha256rnds2 ABEF, CDGH
+ pshufd <$>0xe, MSGK, MSGK
+ sha256rnds2 CDGH, ABEF
+ movdqa $1, TMP
+ palignr <$>4, $4, TMP
+ paddd TMP, $2
+ sha256msg2 $1, $2
+ sha256msg1 $1, $4
+ >)
+
+C FIXME: Do something more clever, taking the pshufd into account.
+C TRANSPOSE(ABCD, EFGH, scratch) --> untouched, ABEF, CDGH
+define(<TRANSPOSE>, <
+ movdqa $2, $3
+ punpckhqdq $1, $2
+ punpcklqdq $1, $3
+>)
+
+ C void
+ C _nettle_sha256_compress(uint32_t *state, const uint8_t *input, const uint32_t *k)
+
+ .text
+ ALIGN(16)
+.Lswap_mask:
+ .byte 3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12
+PROLOGUE(_nettle_sha256_compress)
+ W64_ENTRY(3, 10)
+ movups (STATE), TMP
+ movups 16(STATE), ABEF
+
+ pshufd $0x1b, TMP, TMP
+ pshufd $0x1b, ABEF, ABEF
+
+ TRANSPOSE(TMP, ABEF, CDGH)
+
+ movdqa .Lswap_mask(%rip), SWAP_MASK
+
+ movdqa ABEF, ABEF_ORIG
+ movdqa CDGH, CDGH_ORIG
+
+ movups (INPUT), MSG0
+ pshufb SWAP_MASK, MSG0
+
+ movdqa (K), MSGK
+ paddd MSG0, MSGK
+ sha256rnds2 ABEF, CDGH C Round 0-1
+ pshufd $0xe, MSGK, MSGK
+ sha256rnds2 CDGH, ABEF C Round 2-3
+
+ movups 16(INPUT), MSG1
+ pshufb SWAP_MASK, MSG1
+
+ movdqa 16(K), MSGK
+ paddd MSG1, MSGK
+ sha256rnds2 ABEF, CDGH C Round 4-5
+ pshufd $0xe, MSGK, MSGK
+ sha256rnds2 CDGH, ABEF C Round 6-7
+ sha256msg1 MSG1, MSG0
+
+ movups 32(INPUT), MSG2
+ pshufb SWAP_MASK, MSG2
+
+ movdqa 32(K), MSGK
+ paddd MSG2, MSGK
+ sha256rnds2 ABEF, CDGH C Round 8-9
+ pshufd $0xe, MSGK, MSGK
+ sha256rnds2 CDGH, ABEF C Round 10-11
+ sha256msg1 MSG2, MSG1
+
+ movups 48(INPUT), MSG3
+ pshufb SWAP_MASK, MSG3
+
+ QROUND(MSG3, MSG0, MSG1, MSG2, 12) C Round 12-15
+ QROUND(MSG0, MSG1, MSG2, MSG3, 16)
+ QROUND(MSG1, MSG2, MSG3, MSG0, 20)
+ QROUND(MSG2, MSG3, MSG0, MSG1, 24)
+ QROUND(MSG3, MSG0, MSG1, MSG2, 28)
+ QROUND(MSG0, MSG1, MSG2, MSG3, 32)
+ QROUND(MSG1, MSG2, MSG3, MSG0, 36)
+ QROUND(MSG2, MSG3, MSG0, MSG1, 40)
+ QROUND(MSG3, MSG0, MSG1, MSG2, 44)
+ QROUND(MSG0, MSG1, MSG2, MSG3, 48)
+
+ movdqa 208(K), MSGK
+ paddd MSG1, MSGK
+ sha256rnds2 ABEF, CDGH C Round 52-53
+ pshufd $0xe, MSGK, MSGK
+ sha256rnds2 CDGH, ABEF C Round 54-55
+ movdqa MSG1, TMP
+ palignr $4, MSG0, TMP
+ paddd TMP, MSG2
+ sha256msg2 MSG1, MSG2
+
+ movdqa 224(K), MSGK
+ paddd MSG2, MSGK
+ sha256rnds2 ABEF, CDGH C Round 56-57
+ pshufd $0xe, MSGK, MSGK
+ sha256rnds2 CDGH, ABEF C Round 58-59
+ movdqa MSG2, TMP
+ palignr $4, MSG1, TMP
+ paddd TMP, MSG3
+ sha256msg2 MSG2, MSG3
+
+ movdqa 240(K), MSGK
+ paddd MSG3, MSGK
+ sha256rnds2 ABEF, CDGH C Round 60-61
+ pshufd $0xe, MSGK, MSGK
+ sha256rnds2 CDGH, ABEF C Round 62-63
+
+ paddd ABEF_ORIG, ABEF
+ paddd CDGH_ORIG, CDGH
+
+ TRANSPOSE(ABEF, CDGH, TMP)
+
+ pshufd $0x1b, CDGH, CDGH
+ pshufd $0x1b, TMP, TMP
+ movups CDGH, 0(STATE)
+ movups TMP, 16(STATE)
+
+ W64_EXIT(3, 10)
+ ret
+EPILOGUE(_nettle_sha256_compress)