summaryrefslogtreecommitdiff
path: root/gcm.cpp
diff options
context:
space:
mode:
authorJeffrey Walton <noloader@gmail.com>2017-08-13 04:31:17 -0400
committerJeffrey Walton <noloader@gmail.com>2017-08-13 04:31:17 -0400
commitf02bf91ee50877a41aa2f201f4840aee9a8dcc55 (patch)
treeef832ea7ccdafca7df3a64e837e43cff05954461 /gcm.cpp
parent95ee8975b4fcc132c839dfa47bd80dbaeee7ed54 (diff)
downloadcryptopp-git-f02bf91ee50877a41aa2f201f4840aee9a8dcc55.tar.gz
Revert 95ee8975b4fc and 7d21cdd54e95 (Issue 408)
This broke some OS X builds. We need more testing. Ugh...
Diffstat (limited to 'gcm.cpp')
-rw-r--r--gcm.cpp109
1 files changed, 47 insertions, 62 deletions
diff --git a/gcm.cpp b/gcm.cpp
index 86a0a3a6..0f06730f 100644
--- a/gcm.cpp
+++ b/gcm.cpp
@@ -1,6 +1,6 @@
-// gcm.cpp - originally written and placed in the public domain by Wei Dai
+// gcm.cpp - originally written and placed in the public domain by Wei Dai.
// ARM and Aarch64 added by Jeffrey Walton. The ARM carryless
-// multiply routines are less efficient because they shadowed x86.
+// multiply routines are less efficient because they shadow x86.
// The precomputed key table integration makes it tricky to use the
// more efficient ARMv8 implementation of the multiply and reduce.
@@ -325,14 +325,8 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
BlockCipher &blockCipher = AccessBlockCipher();
blockCipher.SetKey(userKey, keylength, params);
- // GCM is only defined for 16-byte block ciphers at the moment.
- // However, variable blocksize support means we have to defer
- // blocksize checks to runtime after the key is set. Also see
- // https://github.com/weidai11/cryptopp/issues/408.
- const unsigned int blockSize = blockCipher.BlockSize();
- CRYPTOPP_ASSERT(blockSize == REQUIRED_BLOCKSIZE);
- if (blockSize != REQUIRED_BLOCKSIZE)
- throw InvalidArgument(AlgorithmName() + ": block size of underlying block cipher is not " + IntToString(blockSize));
+ if (blockCipher.BlockSize() != REQUIRED_BLOCKSIZE)
+ throw InvalidArgument(AlgorithmName() + ": block size of underlying block cipher is not 16");
int tableSize, i, j, k;
@@ -341,8 +335,7 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
{
// Avoid "parameter not used" error and suppress Coverity finding
(void)params.GetIntValue(Name::TableSize(), tableSize);
- tableSize = s_clmulTableSizeInBlocks * blockSize;
- CRYPTOPP_ASSERT(tableSize > (int)blockSize);
+ tableSize = s_clmulTableSizeInBlocks * REQUIRED_BLOCKSIZE;
}
else
#elif CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE
@@ -350,8 +343,7 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
{
// Avoid "parameter not used" error and suppress Coverity finding
(void)params.GetIntValue(Name::TableSize(), tableSize);
- tableSize = s_clmulTableSizeInBlocks * blockSize;
- CRYPTOPP_ASSERT(tableSize > (int)blockSize);
+ tableSize = s_clmulTableSizeInBlocks * REQUIRED_BLOCKSIZE;
}
else
#endif
@@ -367,10 +359,10 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
#endif
}
- m_buffer.resize(3*blockSize + tableSize);
- byte *table = MulTable();
+ m_buffer.resize(3*REQUIRED_BLOCKSIZE + tableSize);
+ byte *mulTable = MulTable();
byte *hashKey = HashKey();
- memset(hashKey, 0, blockSize);
+ memset(hashKey, 0, REQUIRED_BLOCKSIZE);
blockCipher.ProcessBlock(hashKey);
#if CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE
@@ -383,10 +375,10 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
for (i=0; i<tableSize; i+=32)
{
__m128i h1 = CLMUL_GF_Mul(h, h0, r);
- _mm_storel_epi64((__m128i *)(void *)(table+i), h);
- _mm_storeu_si128((__m128i *)(void *)(table+i+16), h1);
- _mm_storeu_si128((__m128i *)(void *)(table+i+8), h);
- _mm_storel_epi64((__m128i *)(void *)(table+i+8), h1);
+ _mm_storel_epi64((__m128i *)(void *)(mulTable+i), h);
+ _mm_storeu_si128((__m128i *)(void *)(mulTable+i+16), h1);
+ _mm_storeu_si128((__m128i *)(void *)(mulTable+i+8), h);
+ _mm_storel_epi64((__m128i *)(void *)(mulTable+i+8), h1);
h = CLMUL_GF_Mul(h1, h0, r);
}
@@ -403,18 +395,18 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
for (i=0; i<tableSize-32; i+=32)
{
const uint64x2_t h1 = PMULL_GF_Mul(h, h0, r);
- vst1_u64((uint64_t *)(table+i), vget_low_u64(h));
- vst1q_u64((uint64_t *)(table+i+16), h1);
- vst1q_u64((uint64_t *)(table+i+8), h);
- vst1_u64((uint64_t *)(table+i+8), vget_low_u64(h1));
+ vst1_u64((uint64_t *)(mulTable+i), vget_low_u64(h));
+ vst1q_u64((uint64_t *)(mulTable+i+16), h1);
+ vst1q_u64((uint64_t *)(mulTable+i+8), h);
+ vst1_u64((uint64_t *)(mulTable+i+8), vget_low_u64(h1));
h = PMULL_GF_Mul(h1, h0, r);
}
const uint64x2_t h1 = PMULL_GF_Mul(h, h0, r);
- vst1_u64((uint64_t *)(table+i), vget_low_u64(h));
- vst1q_u64((uint64_t *)(table+i+16), h1);
- vst1q_u64((uint64_t *)(table+i+8), h);
- vst1_u64((uint64_t *)(table+i+8), vget_low_u64(h1));
+ vst1_u64((uint64_t *)(mulTable+i), vget_low_u64(h));
+ vst1q_u64((uint64_t *)(mulTable+i+16), h1);
+ vst1q_u64((uint64_t *)(mulTable+i+8), h);
+ vst1_u64((uint64_t *)(mulTable+i+8), vget_low_u64(h1));
return;
}
@@ -429,7 +421,7 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
for (i=0; i<128; i++)
{
k = i%8;
- Block::Put(NULLPTR, table+(i/8)*256*16+(size_t(1)<<(11-k)))(V0)(V1);
+ Block::Put(NULLPTR, mulTable+(i/8)*256*16+(size_t(1)<<(11-k)))(V0)(V1);
int x = (int)V1 & 1;
V1 = (V1>>1) | (V0<<63);
@@ -438,23 +430,23 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
for (i=0; i<16; i++)
{
- memset(table+i*256*16, 0, 16);
+ memset(mulTable+i*256*16, 0, 16);
#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE || CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
if (HasSSE2())
for (j=2; j<=0x80; j*=2)
for (k=1; k<j; k++)
- SSE2_Xor16(table+i*256*16+(j+k)*16, table+i*256*16+j*16, table+i*256*16+k*16);
+ SSE2_Xor16(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
else
#elif CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
if (HasNEON())
for (j=2; j<=0x80; j*=2)
for (k=1; k<j; k++)
- NEON_Xor16(table+i*256*16+(j+k)*16, table+i*256*16+j*16, table+i*256*16+k*16);
+ NEON_Xor16(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
else
#endif
for (j=2; j<=0x80; j*=2)
for (k=1; k<j; k++)
- Xor16(table+i*256*16+(j+k)*16, table+i*256*16+j*16, table+i*256*16+k*16);
+ Xor16(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
}
}
else
@@ -478,9 +470,9 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
{
k = i%32;
if (k < 4)
- Block::Put(NULLPTR, table+1024+(i/32)*256+(size_t(1)<<(7-k)))(V0)(V1);
+ Block::Put(NULLPTR, mulTable+1024+(i/32)*256+(size_t(1)<<(7-k)))(V0)(V1);
else if (k < 8)
- Block::Put(NULLPTR, table+(i/32)*256+(size_t(1)<<(11-k)))(V0)(V1);
+ Block::Put(NULLPTR, mulTable+(i/32)*256+(size_t(1)<<(11-k)))(V0)(V1);
int x = (int)V1 & 1;
V1 = (V1>>1) | (V0<<63);
@@ -489,15 +481,15 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
for (i=0; i<4; i++)
{
- memset(table+i*256, 0, 16);
- memset(table+1024+i*256, 0, 16);
+ memset(mulTable+i*256, 0, 16);
+ memset(mulTable+1024+i*256, 0, 16);
#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE || CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
if (HasSSE2())
for (j=2; j<=8; j*=2)
for (k=1; k<j; k++)
{
- SSE2_Xor16(table+i*256+(j+k)*16, table+i*256+j*16, table+i*256+k*16);
- SSE2_Xor16(table+1024+i*256+(j+k)*16, table+1024+i*256+j*16, table+1024+i*256+k*16);
+ SSE2_Xor16(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
+ SSE2_Xor16(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
}
else
#elif CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE
@@ -505,16 +497,16 @@ void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const
for (j=2; j<=8; j*=2)
for (k=1; k<j; k++)
{
- NEON_Xor16(table+i*256+(j+k)*16, table+i*256+j*16, table+i*256+k*16);
- NEON_Xor16(table+1024+i*256+(j+k)*16, table+1024+i*256+j*16, table+1024+i*256+k*16);
+ NEON_Xor16(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
+ NEON_Xor16(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
}
else
#endif
for (j=2; j<=8; j*=2)
for (k=1; k<j; k++)
{
- Xor16(table+i*256+(j+k)*16, table+i*256+j*16, table+i*256+k*16);
- Xor16(table+1024+i*256+(j+k)*16, table+1024+i*256+j*16, table+1024+i*256+k*16);
+ Xor16(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
+ Xor16(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
}
}
}
@@ -545,13 +537,6 @@ void GCM_Base::Resync(const byte *iv, size_t len)
BlockCipher &cipher = AccessBlockCipher();
byte *hashBuffer = HashBuffer();
- // GCM is only defined for 16-byte block ciphers at the moment.
- // However, variable blocksize support means we have to defer
- // blocksize checks to runtime after the key is set. Also see
- // https://github.com/weidai11/cryptopp/issues/408.
- const unsigned int blockSize = cipher.BlockSize();
- CRYPTOPP_ASSERT(blockSize == REQUIRED_BLOCKSIZE);
-
if (len == 12)
{
memcpy(hashBuffer, iv, len);
@@ -583,7 +568,7 @@ void GCM_Base::Resync(const byte *iv, size_t len)
}
if (m_state >= State_IVSet)
- m_ctr.Resynchronize(hashBuffer, blockSize);
+ m_ctr.Resynchronize(hashBuffer, REQUIRED_BLOCKSIZE);
else
m_ctr.SetCipherWithIV(cipher, hashBuffer);
@@ -623,7 +608,7 @@ size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
#if CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE
if (HasCLMUL())
{
- const __m128i *table = (const __m128i *)(const void *)MulTable();
+ const __m128i *mulTable = (const __m128i *)(const void *)MulTable();
__m128i x = _mm_load_si128((__m128i *)(void *)HashBuffer());
const __m128i r = s_clmulConstants[0], mask1 = s_clmulConstants[1], mask2 = s_clmulConstants[2];
@@ -637,8 +622,8 @@ size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
while (true)
{
- __m128i h0 = _mm_load_si128(table+i);
- __m128i h1 = _mm_load_si128(table+i+1);
+ __m128i h0 = _mm_load_si128(mulTable+i);
+ __m128i h1 = _mm_load_si128(mulTable+i+1);
__m128i h2 = _mm_xor_si128(h0, h1);
if (++i == s)
@@ -688,7 +673,7 @@ size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
#elif CRYPTOPP_BOOL_ARM_PMULL_AVAILABLE
if (HasPMULL())
{
- const uint64x2_t *table = (const uint64x2_t *)MulTable();
+ const uint64x2_t *mulTable = (const uint64x2_t *)MulTable();
uint64x2_t x = vreinterpretq_u64_u8(vld1q_u8(HashBuffer()));
const uint64x2_t r = s_clmulConstants[0];
@@ -702,8 +687,8 @@ size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
while (true)
{
- const uint64x2_t h0 = vld1q_u64((const uint64_t*)(table+i));
- const uint64x2_t h1 = vld1q_u64((const uint64_t*)(table+i+1));
+ const uint64x2_t h0 = vld1q_u64((const uint64_t*)(mulTable+i));
+ const uint64x2_t h1 = vld1q_u64((const uint64_t*)(mulTable+i+1));
const uint64x2_t h2 = veorq_u64(h0, h1);
if (++i == s)
@@ -771,7 +756,7 @@ size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
{
case 0: // non-SSE2 and 2K tables
{
- byte *table = MulTable();
+ byte *mulTable = MulTable();
word64 x0 = hashBuffer[0], x1 = hashBuffer[1];
do
@@ -784,7 +769,7 @@ size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
data += HASH_BLOCKSIZE;
len -= HASH_BLOCKSIZE;
- #define READ_TABLE_WORD64_COMMON(a, b, c, d) *(word64 *)(void *)(table+(a*1024)+(b*256)+c+d*8)
+ #define READ_TABLE_WORD64_COMMON(a, b, c, d) *(word64 *)(void *)(mulTable+(a*1024)+(b*256)+c+d*8)
#ifdef IS_LITTLE_ENDIAN
#if CRYPTOPP_BOOL_SLOW_WORD64
@@ -839,7 +824,7 @@ size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
case 2: // non-SSE2 and 64K tables
{
- byte *table = MulTable();
+ byte *mulTable = MulTable();
word64 x0 = hashBuffer[0], x1 = hashBuffer[1];
do
@@ -855,7 +840,7 @@ size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
#undef READ_TABLE_WORD64_COMMON
#undef READ_TABLE_WORD64
- #define READ_TABLE_WORD64_COMMON(a, c, d) *(word64 *)(void *)(table+(a)*256*16+(c)+(d)*8)
+ #define READ_TABLE_WORD64_COMMON(a, c, d) *(word64 *)(void *)(mulTable+(a)*256*16+(c)+(d)*8)
#ifdef IS_LITTLE_ENDIAN
#if CRYPTOPP_BOOL_SLOW_WORD64