summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2023-04-07 16:40:09 +0200
committerNiels Möller <nisse@lysator.liu.se>2023-04-07 16:40:09 +0200
commit98785af09c097eabc91c1695ec1fe1472b6438c5 (patch)
tree87c71737913bc8a21d38751de79c35c0952a22ad
parent059fb63d4de35c2681feb32369c1719a0277e891 (diff)
downloadnettle-98785af09c097eabc91c1695ec1fe1472b6438c5.tar.gz
Rewrite ghash C implementation.
-rw-r--r--ChangeLog14
-rw-r--r--Makefile.in3
-rw-r--r--gcmdata.c84
-rw-r--r--ghash-set-key.c50
-rw-r--r--ghash-update.c83
5 files changed, 62 insertions, 172 deletions
diff --git a/ChangeLog b/ChangeLog
index 67986932..f3e2f4bb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,17 @@
+2023-04-07 Niels Möller <nisse@lysator.liu.se>
+
+ * ghash-update.c (gcm_gf_mul): Rewrite to avoid side channel
+ leakage. Now processes message bits one at a time, using tabulated
+ values of the key premultiplied by appropriate powers of x, so
+ that the table is accessed in a fixed sequential order.
+ Performance penalty, on x86_64, is roughly 3 times.
+ (shift_table): Deleted table.
+ (gcm_gf_shift_8): Deleted function.
+
+ * ghash-set-key.c (_ghash_set_key): Rewrite table generation.
+
+ * gcmdata.c: Deleted.
+
2023-04-03 Niels Möller <nisse@lysator.liu.se>
From Mamone Tarsha:
diff --git a/Makefile.in b/Makefile.in
index 081337a8..2464e17e 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -250,7 +250,7 @@ INSTALL_HEADERS = $(HEADERS) version.h @IF_MINI_GMP@ mini-gmp.h
SOURCES = $(nettle_SOURCES) $(hogweed_SOURCES) \
$(getopt_SOURCES) $(internal_SOURCES) \
$(OPT_SOURCES) \
- aesdata.c desdata.c twofishdata.c shadata.c gcmdata.c eccdata.c
+ aesdata.c desdata.c twofishdata.c shadata.c eccdata.c
# NOTE: This list must include all source files, with no duplicates,
# independently of which source files are included in the build.
@@ -683,7 +683,6 @@ clean-here:
desdata$(EXEEXT_FOR_BUILD) \
twofishdata$(EXEEXT_FOR_BUILD) \
shadata$(EXEEXT_FOR_BUILD) \
- gcmdata$(EXEEXT_FOR_BUILD) \
eccdata$(EXEEXT_FOR_BUILD) eccdata.stamp
-rm -rf .lib libnettle.stamp libhogweed.stamp
diff --git a/gcmdata.c b/gcmdata.c
deleted file mode 100644
index 2d57b46a..00000000
--- a/gcmdata.c
+++ /dev/null
@@ -1,84 +0,0 @@
-/* gcmdata.c
-
- Galois counter mode, specified by NIST,
- http://csrc.nist.gov/publications/nistpubs/800-38D/SP-800-38D.pdf
-
- Generation of fixed multiplication tables.
-
- Copyright (C) 2011 Niels Möller
-
- This file is part of GNU Nettle.
-
- GNU Nettle is free software: you can redistribute it and/or
- modify it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
- or
-
- * the GNU General Public License as published by the Free
- Software Foundation; either version 2 of the License, or (at your
- option) any later version.
-
- or both in parallel, as here.
-
- GNU Nettle is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received copies of the GNU General Public License and
- the GNU Lesser General Public License along with this program. If
- not, see http://www.gnu.org/licenses/.
-*/
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#define GHASH_POLYNOMIAL 0xE1
-
-
-/* When x is shifted out over the block edge, add multiples of the
- defining polynomial to eliminate each bit. */
-static unsigned
-reduce(unsigned x)
-{
- unsigned p = GHASH_POLYNOMIAL << 1;
- unsigned y = 0;
- for (; x; x >>= 1, p <<= 1)
- if (x & 1)
- y ^= p;
- return y;
-}
-
-int
-main(int argc, char **argv)
-{
- unsigned i;
- printf("4-bit table:\n");
-
- for (i = 0; i<16; i++)
- {
- unsigned x;
- if (i && !(i%8))
- printf("\n");
-
- x = reduce(i << 4);
- printf("W(%02x,%02x),", x >> 8, x & 0xff);
- }
- printf("\n\n");
- printf("8-bit table:\n");
- for (i = 0; i<256; i++)
- {
- unsigned x;
- if (i && !(i%8))
- printf("\n");
-
- x = reduce(i);
- printf("W(%02x,%02x),", x >> 8, x & 0xff);
- }
- printf("\n");
- return EXIT_SUCCESS;
-}
diff --git a/ghash-set-key.c b/ghash-set-key.c
index 0e91afcb..58ebb633 100644
--- a/ghash-set-key.c
+++ b/ghash-set-key.c
@@ -51,25 +51,43 @@ _nettle_ghash_set_key_c (struct gcm_key *ctx, const union nettle_block16 *key);
#define _nettle_ghash_set_key _nettle_ghash_set_key_c
#endif
+#if GCM_TABLE_BITS < 7
+# error Unsupported table size.
+#endif
+
/* Implements a lookup table for processors without carryless-mul
instruction. */
void
_ghash_set_key (struct gcm_key *ctx, const union nettle_block16 *key)
{
- /* Middle element if GCM_TABLE_BITS > 0, otherwise the first
- element */
- unsigned i = (1<<GCM_TABLE_BITS)/2;
- block16_zero (&ctx->h[0]);
- ctx->h[i] = *key;
-
- /* Algorithm 3 from the gcm paper. First do powers of two, then do
- the rest by adding. */
- while (i /= 2)
- block16_mulx_ghash (&ctx->h[i], &ctx->h[2*i]);
- for (i = 2; i < 1<<GCM_TABLE_BITS; i *= 2)
- {
- unsigned j;
- for (j = 1; j < i; j++)
- block16_xor3 (&ctx->h[i+j], &ctx->h[i], &ctx->h[j]);
- }
+ /* Table elements hold the key, premultiplied by all needed powers
+ of x. Element ordering follows the order bits are processed in
+ _ghash_update, first u64[0] bits, starting from the least
+ significant end, then the u64[1] bits, also from least
+ significant end. In the gcm bit order, bits (left to right)
+ correspond to x powers (the numbers) like
+
+ |0...7|8...15|...|56...63|64...71|72...79|...|120...127|
+
+ where | indicates the byte boundaries. On little endian, these
+ bits are in u64 like
+
+ u64[0]: | 56...63 48...55 40...47 32...39 24...31 16...23 8...15 0...7|
+ u64[1]: |120...127 112...129 104...111 96...103 88...95 80...87 72...79 64...71|
+
+ With big-endian, we instead get
+
+ u64[0]: |0...63|
+ u64[1]: |64...127|
+ */
+#if WORDS_BIGENDIAN
+# define INDEX_PERMUTE 63
+#else
+# define INDEX_PERMUTE 7
+#endif
+ unsigned i;
+
+ block16_set (&ctx->h[INDEX_PERMUTE], key);
+ for (i = 1; i < 128; i++)
+ block16_mulx_ghash(&ctx->h[i ^ INDEX_PERMUTE], &ctx->h[(i-1) ^ INDEX_PERMUTE]);
}
diff --git a/ghash-update.c b/ghash-update.c
index 6eb19d80..7cd19643 100644
--- a/ghash-update.c
+++ b/ghash-update.c
@@ -44,7 +44,7 @@
#include "ghash-internal.h"
#include "block-internal.h"
-#if GCM_TABLE_BITS != 8
+#if GCM_TABLE_BITS < 7
# error Unsupported table size.
#endif
@@ -54,83 +54,26 @@ const uint8_t *
_nettle_ghash_update_c (const struct gcm_key *ctx, union nettle_block16 *state,
size_t blocks, const uint8_t *data);
#define _nettle_ghash_update _nettle_ghash_update_c
-
-#endif
-#if WORDS_BIGENDIAN
-# define W(left,right) (0x##left##right)
-#else
-# define W(left,right) (0x##right##left)
#endif
-static const uint16_t
-shift_table[0x100] = {
- W(00,00),W(01,c2),W(03,84),W(02,46),W(07,08),W(06,ca),W(04,8c),W(05,4e),
- W(0e,10),W(0f,d2),W(0d,94),W(0c,56),W(09,18),W(08,da),W(0a,9c),W(0b,5e),
- W(1c,20),W(1d,e2),W(1f,a4),W(1e,66),W(1b,28),W(1a,ea),W(18,ac),W(19,6e),
- W(12,30),W(13,f2),W(11,b4),W(10,76),W(15,38),W(14,fa),W(16,bc),W(17,7e),
- W(38,40),W(39,82),W(3b,c4),W(3a,06),W(3f,48),W(3e,8a),W(3c,cc),W(3d,0e),
- W(36,50),W(37,92),W(35,d4),W(34,16),W(31,58),W(30,9a),W(32,dc),W(33,1e),
- W(24,60),W(25,a2),W(27,e4),W(26,26),W(23,68),W(22,aa),W(20,ec),W(21,2e),
- W(2a,70),W(2b,b2),W(29,f4),W(28,36),W(2d,78),W(2c,ba),W(2e,fc),W(2f,3e),
- W(70,80),W(71,42),W(73,04),W(72,c6),W(77,88),W(76,4a),W(74,0c),W(75,ce),
- W(7e,90),W(7f,52),W(7d,14),W(7c,d6),W(79,98),W(78,5a),W(7a,1c),W(7b,de),
- W(6c,a0),W(6d,62),W(6f,24),W(6e,e6),W(6b,a8),W(6a,6a),W(68,2c),W(69,ee),
- W(62,b0),W(63,72),W(61,34),W(60,f6),W(65,b8),W(64,7a),W(66,3c),W(67,fe),
- W(48,c0),W(49,02),W(4b,44),W(4a,86),W(4f,c8),W(4e,0a),W(4c,4c),W(4d,8e),
- W(46,d0),W(47,12),W(45,54),W(44,96),W(41,d8),W(40,1a),W(42,5c),W(43,9e),
- W(54,e0),W(55,22),W(57,64),W(56,a6),W(53,e8),W(52,2a),W(50,6c),W(51,ae),
- W(5a,f0),W(5b,32),W(59,74),W(58,b6),W(5d,f8),W(5c,3a),W(5e,7c),W(5f,be),
- W(e1,00),W(e0,c2),W(e2,84),W(e3,46),W(e6,08),W(e7,ca),W(e5,8c),W(e4,4e),
- W(ef,10),W(ee,d2),W(ec,94),W(ed,56),W(e8,18),W(e9,da),W(eb,9c),W(ea,5e),
- W(fd,20),W(fc,e2),W(fe,a4),W(ff,66),W(fa,28),W(fb,ea),W(f9,ac),W(f8,6e),
- W(f3,30),W(f2,f2),W(f0,b4),W(f1,76),W(f4,38),W(f5,fa),W(f7,bc),W(f6,7e),
- W(d9,40),W(d8,82),W(da,c4),W(db,06),W(de,48),W(df,8a),W(dd,cc),W(dc,0e),
- W(d7,50),W(d6,92),W(d4,d4),W(d5,16),W(d0,58),W(d1,9a),W(d3,dc),W(d2,1e),
- W(c5,60),W(c4,a2),W(c6,e4),W(c7,26),W(c2,68),W(c3,aa),W(c1,ec),W(c0,2e),
- W(cb,70),W(ca,b2),W(c8,f4),W(c9,36),W(cc,78),W(cd,ba),W(cf,fc),W(ce,3e),
- W(91,80),W(90,42),W(92,04),W(93,c6),W(96,88),W(97,4a),W(95,0c),W(94,ce),
- W(9f,90),W(9e,52),W(9c,14),W(9d,d6),W(98,98),W(99,5a),W(9b,1c),W(9a,de),
- W(8d,a0),W(8c,62),W(8e,24),W(8f,e6),W(8a,a8),W(8b,6a),W(89,2c),W(88,ee),
- W(83,b0),W(82,72),W(80,34),W(81,f6),W(84,b8),W(85,7a),W(87,3c),W(86,fe),
- W(a9,c0),W(a8,02),W(aa,44),W(ab,86),W(ae,c8),W(af,0a),W(ad,4c),W(ac,8e),
- W(a7,d0),W(a6,12),W(a4,54),W(a5,96),W(a0,d8),W(a1,1a),W(a3,5c),W(a2,9e),
- W(b5,e0),W(b4,22),W(b6,64),W(b7,a6),W(b2,e8),W(b3,2a),W(b1,6c),W(b0,ae),
- W(bb,f0),W(ba,32),W(b8,74),W(b9,b6),W(bc,f8),W(bd,3a),W(bf,7c),W(be,be),
-};
-#undef W
-
-static void
-gcm_gf_shift_8(union nettle_block16 *x)
-{
- uint64_t reduce;
-
- /* Shift uses big-endian representation. */
-#if WORDS_BIGENDIAN
- reduce = shift_table[x->u64[1] & 0xff];
- x->u64[1] = (x->u64[1] >> 8) | ((x->u64[0] & 0xff) << 56);
- x->u64[0] = (x->u64[0] >> 8) ^ (reduce << 48);
-#else /* ! WORDS_BIGENDIAN */
- reduce = shift_table[(x->u64[1] >> 56) & 0xff];
- x->u64[1] = (x->u64[1] << 8) | (x->u64[0] >> 56);
- x->u64[0] = (x->u64[0] << 8) ^ reduce;
-#endif /* ! WORDS_BIGENDIAN */
-}
-
static void
gcm_gf_mul (union nettle_block16 *x, const union nettle_block16 *table)
{
- union nettle_block16 Z;
+ uint64_t x0 = x->u64[0];
+ uint64_t x1 = x->u64[1];
+ uint64_t r0 = 0;
+ uint64_t r1 = 0;
unsigned i;
-
- Z = table[x->b[GCM_BLOCK_SIZE-1]];
-
- for (i = GCM_BLOCK_SIZE-2; i > 0; i--)
+ for (i = 0; i < 64; i++, x0 >>= 1, x1 >>= 1)
{
- gcm_gf_shift_8(&Z);
- block16_xor(&Z, &table[x->b[i]]);
+ uint64_t m0 = -(x0 & 1);
+ uint64_t m1 = -(x1 & 1);
+ r0 ^= m0 & table[i].u64[0];
+ r1 ^= m0 & table[i].u64[1];
+ r0 ^= m1 & table[64+i].u64[0];
+ r1 ^= m1 & table[64+i].u64[1];
}
- gcm_gf_shift_8(&Z);
- block16_xor3(x, &Z, &table[x->b[0]]);
+ x->u64[0] = r0; x->u64[1] = r1;
}
const uint8_t *