summaryrefslogtreecommitdiff
path: root/aria.cpp
diff options
context:
space:
mode:
authorJeffrey Walton <noloader@gmail.com>2017-04-11 20:47:54 -0400
committerJeffrey Walton <noloader@gmail.com>2017-04-11 20:47:54 -0400
commitaf561758dfbd96c373ae3e4539516031f1b7055c (patch)
tree49c2f06707cbeb3878527d230cda91eb7169443b /aria.cpp
parentd6b295203b75306bad49d006befe3af7ee561a61 (diff)
downloadcryptopp-git-af561758dfbd96c373ae3e4539516031f1b7055c.tar.gz
Rework ARIA_GSRK to have MSVC generate "rotate imm" rather than "rot reg"
The immediate version of rotate can be 4 to 6 times faster than the register version
Diffstat (limited to 'aria.cpp')
-rw-r--r--aria.cpp112
1 files changed, 64 insertions, 48 deletions
diff --git a/aria.cpp b/aria.cpp
index fabc7ae6..a2629320 100644
--- a/aria.cpp
+++ b/aria.cpp
@@ -171,9 +171,7 @@ ANONYMOUS_NAMESPACE_END
NAMESPACE_BEGIN(CryptoPP)
-#define ARIA_WO(X,Y) (((word32 *)(X))[Y])
-
-inline byte ARIA_BRF(const word32 x, int y) {
+inline byte ARIA_BRF(const word32 x, const int y) {
return GETBYTE(x, y);
}
@@ -181,13 +179,28 @@ inline word32 ReverseWord(const word32 w) {
return ByteReverse(w);
}
-inline word32 LoadWord(const word32 x) {
- return ConditionalByteReverse(BIG_ENDIAN_ORDER, x);
+// Retireve the i-th word, optionally in Big Endian
+template <bool big_endian>
+inline word32 LoadWord(const word32 x[4], const unsigned int i) {
+ if (big_endian)
+ return ConditionalByteReverse(BIG_ENDIAN_ORDER, x[i]);
+ else
+ return x[i];
+}
+
+// Reinterpret x as a word32[], and retireve the i-th word, optionally in Big Endian
+template <bool big_endian>
+inline word32 LoadWord(const byte x[16], const unsigned int i) {
+ if (big_endian)
+ return ConditionalByteReverse(BIG_ENDIAN_ORDER, reinterpret_cast<const word32*>(x)[i]);
+ else
+ return reinterpret_cast<const word32*>(x)[i];
}
// Key XOR Layer
#define ARIA_KXL { \
- t[0]^=ARIA_WO(rk,0); t[1]^=ARIA_WO(rk,1); t[2]^=ARIA_WO(rk,2); t[3]^=ARIA_WO(rk,3); \
+ t[0]^=LoadWord<false>(rk,0); t[1]^=LoadWord<false>(rk,1); \
+ t[2]^=LoadWord<false>(rk,2); t[3]^=LoadWord<false>(rk,3); \
}
// S-Box Layer 1 + M
@@ -217,19 +230,6 @@ inline word32 LoadWord(const word32 x) {
(T3) = ReverseWord((T3)); \
}
-#define ARIA_FO {SBL1_M(t[0],t[1],t[2],t[3]) ARIA_MM(t[0],t[1],t[2],t[3]) ARIA_P(t[0],t[1],t[2],t[3]) ARIA_MM(t[0],t[1],t[2],t[3])}
-#define ARIA_FE {SBL2_M(t[0],t[1],t[2],t[3]) ARIA_MM(t[0],t[1],t[2],t[3]) ARIA_P(t[2],t[3],t[0],t[1]) ARIA_MM(t[0],t[1],t[2],t[3])}
-
-// n-bit right shift of Y XORed to X
-#define ARIA_GSRK(RK, X, Y, n) { \
- q = 4-((n)/32); \
- r = (n) % 32; \
- ARIA_WO((RK),0) = ((X)[0]) ^ (((Y)[(q )%4])>>r) ^ (((Y)[(q+3)%4])<<(32-r)); \
- ARIA_WO((RK),1) = ((X)[1]) ^ (((Y)[(q+1)%4])>>r) ^ (((Y)[(q )%4])<<(32-r)); \
- ARIA_WO((RK),2) = ((X)[2]) ^ (((Y)[(q+2)%4])>>r) ^ (((Y)[(q+1)%4])<<(32-r)); \
- ARIA_WO((RK),3) = ((X)[3]) ^ (((Y)[(q+3)%4])>>r) ^ (((Y)[(q+2)%4])<<(32-r)); \
- }
-
#if defined(_MSC_VER)
#define ARIA_M1(X,Y) { \
w=rotrFixed((X), 8); \
@@ -241,6 +241,22 @@ inline word32 LoadWord(const word32 x) {
}
#endif
+#define ARIA_FO {SBL1_M(t[0],t[1],t[2],t[3]) ARIA_MM(t[0],t[1],t[2],t[3]) ARIA_P(t[0],t[1],t[2],t[3]) ARIA_MM(t[0],t[1],t[2],t[3])}
+#define ARIA_FE {SBL2_M(t[0],t[1],t[2],t[3]) ARIA_MM(t[0],t[1],t[2],t[3]) ARIA_P(t[2],t[3],t[0],t[1]) ARIA_MM(t[0],t[1],t[2],t[3])}
+
+// n-bit right shift of Y XORed to X
+template <unsigned int N>
+inline void ARIA_GSRK(const word32 X[4], const word32 Y[4], byte RK[16])
+{
+ // MSVC is not generating a "rotate immediate". Unroll and constify to help it along.
+ static const unsigned int Q = 4-(N/32);
+ static const unsigned int R = N % 32;
+ reinterpret_cast<word32*>(RK)[0] = (X[0]) ^ ((Y[(Q )%4])>>R) ^ ((Y[(Q+3)%4])<<(32-R));
+ reinterpret_cast<word32*>(RK)[1] = (X[1]) ^ ((Y[(Q+1)%4])>>R) ^ ((Y[(Q )%4])<<(32-R));
+ reinterpret_cast<word32*>(RK)[2] = (X[2]) ^ ((Y[(Q+2)%4])>>R) ^ ((Y[(Q+1)%4])<<(32-R));
+ reinterpret_cast<word32*>(RK)[3] = (X[3]) ^ ((Y[(Q+3)%4])>>R) ^ ((Y[(Q+2)%4])<<(32-R));
+ }
+
void ARIA::Base::UncheckedSetKey(const byte *key, unsigned int keylen, const NameValuePairs &params)
{
CRYPTOPP_ASSERT(key && keylen);
@@ -275,9 +291,8 @@ void ARIA::Base::UncheckedSetKey(const byte *key, unsigned int keylen, const Nam
// w0 has room for 32 bytes. w1-w3 each has room for 16 bytes. t is a 16 byte temp area.
word32 *w0 = m_w.data(), *w1 = m_w.data()+8, *w2 = m_w.data()+12, *w3 = m_w.data()+16, *t = m_w.data()+20;
- w0[0] = LoadWord(ARIA_WO(mk,0)); w0[1] = LoadWord(ARIA_WO(mk,1));
- w0[0] = LoadWord(ARIA_WO(mk,0)); w0[1] = LoadWord(ARIA_WO(mk,1));
- w0[2] = LoadWord(ARIA_WO(mk,2)); w0[3] = LoadWord(ARIA_WO(mk,3));
+ w0[0] = LoadWord<true>(mk,0); w0[1] = LoadWord<true>(mk,1);
+ w0[2] = LoadWord<true>(mk,2); w0[3] = LoadWord<true>(mk,3);
t[0]=w0[0]^KRK[q][0]; t[1]=w0[1]^KRK[q][1];
t[2]=w0[2]^KRK[q][2]; t[3]=w0[3]^KRK[q][3];
@@ -285,13 +300,13 @@ void ARIA::Base::UncheckedSetKey(const byte *key, unsigned int keylen, const Nam
if (keyBits > 128)
{
- w1[0] = LoadWord(ARIA_WO(mk,4));
- w1[1] = LoadWord(ARIA_WO(mk,5));
+ w1[0] = LoadWord<true>(mk,4);
+ w1[1] = LoadWord<true>(mk,5);
if (keyBits > 192)
{
- w1[2] = LoadWord(ARIA_WO(mk,6));
- w1[3] = LoadWord(ARIA_WO(mk,7));
+ w1[2] = LoadWord<true>(mk,6);
+ w1[3] = LoadWord<true>(mk,7);
}
else
{
@@ -317,29 +332,29 @@ void ARIA::Base::UncheckedSetKey(const byte *key, unsigned int keylen, const Nam
ARIA_FO;
w3[0]=t[0]^w1[0]; w3[1]=t[1]^w1[1]; w3[2]=t[2]^w1[2]; w3[3]=t[3]^w1[3];
- ARIA_GSRK(rk + 0, w0, w1, 19);
- ARIA_GSRK(rk + 16, w1, w2, 19);
- ARIA_GSRK(rk + 32, w2, w3, 19);
- ARIA_GSRK(rk + 48, w3, w0, 19);
- ARIA_GSRK(rk + 64, w0, w1, 31);
- ARIA_GSRK(rk + 80, w1, w2, 31);
- ARIA_GSRK(rk + 96, w2, w3, 31);
- ARIA_GSRK(rk + 112, w3, w0, 31);
- ARIA_GSRK(rk + 128, w0, w1, 67);
- ARIA_GSRK(rk + 144, w1, w2, 67);
- ARIA_GSRK(rk + 160, w2, w3, 67);
- ARIA_GSRK(rk + 176, w3, w0, 67);
- ARIA_GSRK(rk + 192, w0, w1, 97);
+ ARIA_GSRK<19>(w0, w1, rk + 0);
+ ARIA_GSRK<19>(w1, w2, rk + 16);
+ ARIA_GSRK<19>(w2, w3, rk + 32);
+ ARIA_GSRK<19>(w3, w0, rk + 48);
+ ARIA_GSRK<31>(w0, w1, rk + 64);
+ ARIA_GSRK<31>(w1, w2, rk + 80);
+ ARIA_GSRK<31>(w2, w3, rk + 96);
+ ARIA_GSRK<31>(w3, w0, rk + 112);
+ ARIA_GSRK<67>(w0, w1, rk + 128);
+ ARIA_GSRK<67>(w1, w2, rk + 144);
+ ARIA_GSRK<67>(w2, w3, rk + 160);
+ ARIA_GSRK<67>(w3, w0, rk + 176);
+ ARIA_GSRK<97>(w0, w1, rk + 192);
if (keyBits > 128)
{
- ARIA_GSRK(rk + 208, w1, w2, 97);
- ARIA_GSRK(rk + 224, w2, w3, 97);
+ ARIA_GSRK<97>(w1, w2, rk + 208);
+ ARIA_GSRK<97>(w2, w3, rk + 224);
if (keyBits > 192)
{
- ARIA_GSRK(rk + 240, w3, w0, 97);
- ARIA_GSRK(rk + 256, w0, w1, 109);
+ ARIA_GSRK< 97>(w3, w0, rk + 240);
+ ARIA_GSRK<109>(w0, w1, rk + 256);
}
}
@@ -383,8 +398,8 @@ void ARIA::Base::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, b
const byte *rk = reinterpret_cast<const byte*>(m_rk.data());
word32 *t = const_cast<word32*>(m_w.data()+20);
- t[0] = LoadWord(ARIA_WO(i,0)); t[1] = LoadWord(ARIA_WO(i,1));
- t[2] = LoadWord(ARIA_WO(i,2)); t[3] = LoadWord(ARIA_WO(i,3));
+ t[0] = LoadWord<true>(i,0); t[1] = LoadWord<true>(i,1);
+ t[2] = LoadWord<true>(i,2); t[3] = LoadWord<true>(i,3);
if (m_rounds > 12) {
ARIA_KXL rk+= 16; ARIA_FO
@@ -421,6 +436,7 @@ void ARIA::Base::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, b
o[14] = (byte)(S1[ARIA_BRF(t[3],1)] ) ^ rk[13];
o[15] = (byte)(S2[ARIA_BRF(t[3],0)] ) ^ rk[12];
#else
+ #define ARIA_WORD(X,Y) (((word32 *)(X))[Y])
o[ 0] = (byte)(X1[ARIA_BRF(t[0],3)] );
o[ 1] = (byte)(X2[ARIA_BRF(t[0],2)]>>8);
o[ 2] = (byte)(S1[ARIA_BRF(t[0],1)] );
@@ -437,12 +453,12 @@ void ARIA::Base::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, b
o[13] = (byte)(X2[ARIA_BRF(t[3],2)]>>8);
o[14] = (byte)(S1[ARIA_BRF(t[3],1)] );
o[15] = (byte)(S2[ARIA_BRF(t[3],0)] );
- ARIA_WO(o,0)^=ARIA_WO(rk,0); ARIA_WO(o,1)^=ARIA_WO(rk,1);
- ARIA_WO(o,2)^=ARIA_WO(rk,2); ARIA_WO(o,3)^=ARIA_WO(rk,3);
+ ARIA_WORD(o,0)^=LoadWord<true>(rk,0); ARIA_WORD(o,1)^=LoadWord<true>(rk,1);
+ ARIA_WORD(o,2)^=LoadWord<true>(rk,2); ARIA_WORD(o,3)^=LoadWord<true>(rk,3);
#endif
if (x)
- for (size_t n=0; n<16; ++n)
+ for (unsigned int n=0; n<16; ++n)
o[n] ^= x[n];
}