summaryrefslogtreecommitdiff
path: root/rabbit.cpp
diff options
context:
space:
mode:
authorJeffrey Walton <noloader@gmail.com>2018-07-05 08:41:44 -0400
committerJeffrey Walton <noloader@gmail.com>2018-07-05 08:41:44 -0400
commitc9c51a5e01ea574c2301963cc62c09d8eb1ed9f2 (patch)
tree7d03eca58c572beacce262cba1e59d8fc1195b1b /rabbit.cpp
parent665e16d340671e5c36533b0f35f8adfa8e64acb5 (diff)
downloadcryptopp-git-c9c51a5e01ea574c2301963cc62c09d8eb1ed9f2.tar.gz
Rework OperateKeystream (GH #678)
This improves Rabbit performance on a Core i5 6400 from 5.5 cpb to 4.7 cpb
Diffstat (limited to 'rabbit.cpp')
-rw-r--r--rabbit.cpp132
1 files changed, 59 insertions, 73 deletions
diff --git a/rabbit.cpp b/rabbit.cpp
index 505f07ac..a2bf1cbc 100644
--- a/rabbit.cpp
+++ b/rabbit.cpp
@@ -16,8 +16,6 @@ ANONYMOUS_NAMESPACE_BEGIN
using CryptoPP::word32;
using CryptoPP::rotlConstant;
-#define ROTL32(x, y) rotlConstant<y>(x)
-#define U32V(x) static_cast<word32>(x)
word32 G_func(word32 x)
{
@@ -29,11 +27,11 @@ word32 G_func(word32 x)
b = x >> 16;
/* Calculate high and low result of squaring */
- h = (((U32V(a*a) >> 17U) + U32V(a*b)) >> 15U) + b*b;
+ h = (((static_cast<word32>(a*a) >> 17U) + static_cast<word32>(a*b)) >> 15U) + b*b;
l = x*x;
/* Return high XOR low */
- return U32V(h^l);
+ return static_cast<word32>(h^l);
}
word32 NextState(word32 c[8], word32 x[8], word32 carry)
@@ -46,29 +44,29 @@ word32 NextState(word32 c[8], word32 x[8], word32 carry)
c_old[i] = c[i];
/* Calculate new counter values */
- c[0] = U32V(c[0] + 0x4D34D34D + carry);
- c[1] = U32V(c[1] + 0xD34D34D3 + (c[0] < c_old[0]));
- c[2] = U32V(c[2] + 0x34D34D34 + (c[1] < c_old[1]));
- c[3] = U32V(c[3] + 0x4D34D34D + (c[2] < c_old[2]));
- c[4] = U32V(c[4] + 0xD34D34D3 + (c[3] < c_old[3]));
- c[5] = U32V(c[5] + 0x34D34D34 + (c[4] < c_old[4]));
- c[6] = U32V(c[6] + 0x4D34D34D + (c[5] < c_old[5]));
- c[7] = U32V(c[7] + 0xD34D34D3 + (c[6] < c_old[6]));
+ c[0] = static_cast<word32>(c[0] + 0x4D34D34D + carry);
+ c[1] = static_cast<word32>(c[1] + 0xD34D34D3 + (c[0] < c_old[0]));
+ c[2] = static_cast<word32>(c[2] + 0x34D34D34 + (c[1] < c_old[1]));
+ c[3] = static_cast<word32>(c[3] + 0x4D34D34D + (c[2] < c_old[2]));
+ c[4] = static_cast<word32>(c[4] + 0xD34D34D3 + (c[3] < c_old[3]));
+ c[5] = static_cast<word32>(c[5] + 0x34D34D34 + (c[4] < c_old[4]));
+ c[6] = static_cast<word32>(c[6] + 0x4D34D34D + (c[5] < c_old[5]));
+ c[7] = static_cast<word32>(c[7] + 0xD34D34D3 + (c[6] < c_old[6]));
carry = (c[7] < c_old[7]);
/* Calculate the g-values */
for (i = 0; i<8; i++)
- g[i] = G_func(U32V(x[i] + c[i]));
+ g[i] = G_func(static_cast<word32>(x[i] + c[i]));
/* Calculate new state values */
- x[0] = U32V(g[0] + ROTL32(g[7], 16) + ROTL32(g[6], 16));
- x[1] = U32V(g[1] + ROTL32(g[0], 8) + g[7]);
- x[2] = U32V(g[2] + ROTL32(g[1], 16) + ROTL32(g[0], 16));
- x[3] = U32V(g[3] + ROTL32(g[2], 8) + g[1]);
- x[4] = U32V(g[4] + ROTL32(g[3], 16) + ROTL32(g[2], 16));
- x[5] = U32V(g[5] + ROTL32(g[4], 8) + g[3]);
- x[6] = U32V(g[6] + ROTL32(g[5], 16) + ROTL32(g[4], 16));
- x[7] = U32V(g[7] + ROTL32(g[6], 8) + g[5]);
+ x[0] = static_cast<word32>(g[0] + rotlConstant<16>(g[7]) + rotlConstant<16>(g[6]));
+ x[1] = static_cast<word32>(g[1] + rotlConstant<8>(g[0]) + g[7]);
+ x[2] = static_cast<word32>(g[2] + rotlConstant<16>(g[1]) + rotlConstant<16>(g[0]));
+ x[3] = static_cast<word32>(g[3] + rotlConstant<8>(g[2]) + g[1]);
+ x[4] = static_cast<word32>(g[4] + rotlConstant<16>(g[3]) + rotlConstant<16>(g[2]));
+ x[5] = static_cast<word32>(g[5] + rotlConstant<8>(g[4]) + g[3]);
+ x[6] = static_cast<word32>(g[6] + rotlConstant<16>(g[5]) + rotlConstant<16>(g[4]));
+ x[7] = static_cast<word32>(g[7] + rotlConstant<8>(g[6]) + g[5]);
return carry;
}
@@ -88,16 +86,16 @@ void RabbitPolicy::CipherSetKey(const NameValuePairs &params, const byte *userKe
m_mx[2] = m_t[1];
m_mx[4] = m_t[2];
m_mx[6] = m_t[3];
- m_mx[1] = U32V(m_t[3] << 16) | (m_t[2] >> 16);
- m_mx[3] = U32V(m_t[0] << 16) | (m_t[3] >> 16);
- m_mx[5] = U32V(m_t[1] << 16) | (m_t[0] >> 16);
- m_mx[7] = U32V(m_t[2] << 16) | (m_t[1] >> 16);
+ m_mx[1] = static_cast<word32>(m_t[3] << 16) | (m_t[2] >> 16);
+ m_mx[3] = static_cast<word32>(m_t[0] << 16) | (m_t[3] >> 16);
+ m_mx[5] = static_cast<word32>(m_t[1] << 16) | (m_t[0] >> 16);
+ m_mx[7] = static_cast<word32>(m_t[2] << 16) | (m_t[1] >> 16);
/* Generate initial counter values */
- m_mc[0] = ROTL32(m_t[2], 16);
- m_mc[2] = ROTL32(m_t[3], 16);
- m_mc[4] = ROTL32(m_t[0], 16);
- m_mc[6] = ROTL32(m_t[1], 16);
+ m_mc[0] = rotlConstant<16>(m_t[2]);
+ m_mc[2] = rotlConstant<16>(m_t[3]);
+ m_mc[4] = rotlConstant<16>(m_t[0]);
+ m_mc[6] = rotlConstant<16>(m_t[1]);
m_mc[1] = (m_t[0] & 0xFFFF0000) | (m_t[1] & 0xFFFF);
m_mc[3] = (m_t[1] & 0xFFFF0000) | (m_t[2] & 0xFFFF);
m_mc[5] = (m_t[2] & 0xFFFF0000) | (m_t[3] & 0xFFFF);
@@ -125,30 +123,24 @@ void RabbitPolicy::CipherSetKey(const NameValuePairs &params, const byte *userKe
void RabbitPolicy::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount)
{
- // Rabbit's bufferByteSize in AdditiveCipherTemplate
- const unsigned int BUFFER_SIZE = 16;
-
- for (unsigned int i = 0; i<iterationCount; ++i)
+ byte* out = output;
+ for (unsigned int i = 0; i<iterationCount; ++i, out += 16)
{
/* Iterate the system */
m_wcy = NextState(m_wc, m_wx, m_wcy);
/* Encrypt/decrypt 16 bytes of data */
- PutWord(false, LITTLE_ENDIAN_ORDER, output + 0, m_wx[0] ^ (m_wx[5] >> 16) ^ (m_wx[3] << 16));
- PutWord(false, LITTLE_ENDIAN_ORDER, output + 4, m_wx[2] ^ (m_wx[7] >> 16) ^ (m_wx[5] << 16));
- PutWord(false, LITTLE_ENDIAN_ORDER, output + 8, m_wx[4] ^ (m_wx[1] >> 16) ^ (m_wx[7] << 16));
- PutWord(false, LITTLE_ENDIAN_ORDER, output + 12, m_wx[6] ^ (m_wx[3] >> 16) ^ (m_wx[1] << 16));
-
- // If AdditiveCipherTemplate does not have an accumulated keystream
- // then it will ask OperateKeystream to XOR the plaintext with
- // the keystream and write it to the ciphertext buffer.
- if ((operation & INPUT_NULL) != INPUT_NULL)
- xorbuf(output, input, BUFFER_SIZE);
-
- /* Increment pointers to input and output data */
- input += BUFFER_SIZE;
- output += BUFFER_SIZE;
+ PutWord(false, LITTLE_ENDIAN_ORDER, out + 0, m_wx[0] ^ (m_wx[5] >> 16) ^ (m_wx[3] << 16));
+ PutWord(false, LITTLE_ENDIAN_ORDER, out + 4, m_wx[2] ^ (m_wx[7] >> 16) ^ (m_wx[5] << 16));
+ PutWord(false, LITTLE_ENDIAN_ORDER, out + 8, m_wx[4] ^ (m_wx[1] >> 16) ^ (m_wx[7] << 16));
+ PutWord(false, LITTLE_ENDIAN_ORDER, out + 12, m_wx[6] ^ (m_wx[3] >> 16) ^ (m_wx[1] << 16));
}
+
+ // If AdditiveCipherTemplate does not have an accumulated keystream
+ // then it will ask OperateKeystream to XOR the plaintext with
+ // the keystream and write it to the ciphertext buffer.
+ if ((operation & INPUT_NULL) != INPUT_NULL)
+ xorbuf(output, input, GetBytesPerIteration() * iterationCount);
}
void RabbitWithIVPolicy::CipherSetKey(const NameValuePairs &params, const byte *userKey, size_t keylen)
@@ -162,16 +154,16 @@ void RabbitWithIVPolicy::CipherSetKey(const NameValuePairs &params, const byte *
m_mx[2] = m_t[1];
m_mx[4] = m_t[2];
m_mx[6] = m_t[3];
- m_mx[1] = U32V(m_t[3] << 16) | (m_t[2] >> 16);
- m_mx[3] = U32V(m_t[0] << 16) | (m_t[3] >> 16);
- m_mx[5] = U32V(m_t[1] << 16) | (m_t[0] >> 16);
- m_mx[7] = U32V(m_t[2] << 16) | (m_t[1] >> 16);
+ m_mx[1] = static_cast<word32>(m_t[3] << 16) | (m_t[2] >> 16);
+ m_mx[3] = static_cast<word32>(m_t[0] << 16) | (m_t[3] >> 16);
+ m_mx[5] = static_cast<word32>(m_t[1] << 16) | (m_t[0] >> 16);
+ m_mx[7] = static_cast<word32>(m_t[2] << 16) | (m_t[1] >> 16);
/* Generate initial counter values */
- m_mc[0] = ROTL32(m_t[2], 16);
- m_mc[2] = ROTL32(m_t[3], 16);
- m_mc[4] = ROTL32(m_t[0], 16);
- m_mc[6] = ROTL32(m_t[1], 16);
+ m_mc[0] = rotlConstant<16>(m_t[2]);
+ m_mc[2] = rotlConstant<16>(m_t[3]);
+ m_mc[4] = rotlConstant<16>(m_t[0]);
+ m_mc[6] = rotlConstant<16>(m_t[1]);
m_mc[1] = (m_t[0] & 0xFFFF0000) | (m_t[1] & 0xFFFF);
m_mc[3] = (m_t[1] & 0xFFFF0000) | (m_t[2] & 0xFFFF);
m_mc[5] = (m_t[2] & 0xFFFF0000) | (m_t[3] & 0xFFFF);
@@ -229,30 +221,24 @@ void RabbitWithIVPolicy::CipherResynchronize(byte *keystreamBuffer, const byte *
void RabbitWithIVPolicy::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount)
{
- // Rabbit's bufferByteSize in AdditiveCipherTemplate
- const unsigned int BUFFER_SIZE = 16;
-
- for (unsigned int i = 0; i<iterationCount; ++i)
+ byte* out = output;
+ for (unsigned int i = 0; i<iterationCount; ++i, out += 16)
{
/* Iterate the system */
m_wcy = NextState(m_wc, m_wx, m_wcy);
/* Encrypt/decrypt 16 bytes of data */
- PutWord(false, LITTLE_ENDIAN_ORDER, output + 0, m_wx[0] ^ (m_wx[5] >> 16) ^ (m_wx[3] << 16));
- PutWord(false, LITTLE_ENDIAN_ORDER, output + 4, m_wx[2] ^ (m_wx[7] >> 16) ^ (m_wx[5] << 16));
- PutWord(false, LITTLE_ENDIAN_ORDER, output + 8, m_wx[4] ^ (m_wx[1] >> 16) ^ (m_wx[7] << 16));
- PutWord(false, LITTLE_ENDIAN_ORDER, output + 12, m_wx[6] ^ (m_wx[3] >> 16) ^ (m_wx[1] << 16));
-
- // If AdditiveCipherTemplate does not have an accumulated keystream
- // then it will ask OperateKeystream to XOR the plaintext with
- // the keystream and write it to the ciphertext buffer.
- if ((operation & INPUT_NULL) != INPUT_NULL)
- xorbuf(output, input, BUFFER_SIZE);
-
- /* Increment pointers to input and output data */
- input += BUFFER_SIZE;
- output += BUFFER_SIZE;
+ PutWord(false, LITTLE_ENDIAN_ORDER, out + 0, m_wx[0] ^ (m_wx[5] >> 16) ^ (m_wx[3] << 16));
+ PutWord(false, LITTLE_ENDIAN_ORDER, out + 4, m_wx[2] ^ (m_wx[7] >> 16) ^ (m_wx[5] << 16));
+ PutWord(false, LITTLE_ENDIAN_ORDER, out + 8, m_wx[4] ^ (m_wx[1] >> 16) ^ (m_wx[7] << 16));
+ PutWord(false, LITTLE_ENDIAN_ORDER, out + 12, m_wx[6] ^ (m_wx[3] >> 16) ^ (m_wx[1] << 16));
}
+
+ // If AdditiveCipherTemplate does not have an accumulated keystream
+ // then it will ask OperateKeystream to XOR the plaintext with
+ // the keystream and write it to the ciphertext buffer.
+ if ((operation & INPUT_NULL) != INPUT_NULL)
+ xorbuf(output, input, GetBytesPerIteration() * iterationCount);
}
NAMESPACE_END