summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--rijndael-simd.cpp57
-rw-r--r--rijndael.cpp31
2 files changed, 66 insertions, 22 deletions
diff --git a/rijndael-simd.cpp b/rijndael-simd.cpp
index 32203978..bdea2da4 100644
--- a/rijndael-simd.cpp
+++ b/rijndael-simd.cpp
@@ -700,6 +700,7 @@ void Rijndael_UncheckedSetKey_SSE4_AESNI(const byte *userKey, size_t keyLen, wor
// keySize: m_key allocates 4*(rounds+1) word32's.
const size_t keySize = 4*(rounds+1);
const word32* end = rk + keySize;
+
while (true)
{
CRYPTOPP_ASSERT(rc < ro + COUNTOF(s_rconLE));
@@ -822,7 +823,7 @@ static inline uint8x16_p8 Load8x16(int off, const uint8_t src[16])
#endif
}
-static inline void Store8x16(const uint8x16_p8 src, uint8_t dest[16])
+static inline void Store8x16(const uint8x16_p8& src, uint8_t dest[16])
{
#if defined(CRYPTOPP_XLC_VERSION)
vec_xst_be(src, 0, (uint8_t*)dest);
@@ -861,7 +862,7 @@ static inline uint64x2_p8 Load64x2(int off, const uint8_t src[16])
#endif
}
-static inline void Store64x2(const uint64x2_p8 src, uint8_t dest[16])
+static inline void Store64x2(const uint64x2_p8& src, uint8_t dest[16])
{
#if defined(CRYPTOPP_XLC_VERSION)
vec_xst_be((uint8x16_p8)src, 0, (uint8_t*)dest);
@@ -992,6 +993,58 @@ inline T1 VectorDecryptLast(const T1& state, const T2& key)
//////////////////////////////////////////////////////////////////
+void Rijndael_UncheckedSetKey_POWER8(word32* rk, size_t keyLen, const word32* rc,
+ const byte* Se, unsigned int rounds)
+{
+ word32 *rk_saved = rk, temp;
+
+ // keySize: m_key allocates 4*(rounds+1) word32's.
+ const size_t keySize = 4*(rounds+1);
+ const word32* end = rk + keySize;
+
+ while (true)
+ {
+ temp = rk[keyLen/4-1];
+ word32 x = (word32(Se[GETBYTE(temp, 2)]) << 24) ^ (word32(Se[GETBYTE(temp, 1)]) << 16) ^
+ (word32(Se[GETBYTE(temp, 0)]) << 8) ^ Se[GETBYTE(temp, 3)];
+ rk[keyLen/4] = rk[0] ^ x ^ *(rc++);
+ rk[keyLen/4+1] = rk[1] ^ rk[keyLen/4];
+ rk[keyLen/4+2] = rk[2] ^ rk[keyLen/4+1];
+ rk[keyLen/4+3] = rk[3] ^ rk[keyLen/4+2];
+
+ if (rk + keyLen/4 + 4 == end)
+ break;
+
+ if (keyLen == 24)
+ {
+ rk[10] = rk[ 4] ^ rk[ 9];
+ rk[11] = rk[ 5] ^ rk[10];
+ }
+ else if (keyLen == 32)
+ {
+ temp = rk[11];
+ rk[12] = rk[ 4] ^ (word32(Se[GETBYTE(temp, 3)]) << 24) ^ (word32(Se[GETBYTE(temp, 2)]) << 16) ^ (word32(Se[GETBYTE(temp, 1)]) << 8) ^ Se[GETBYTE(temp, 0)];
+ rk[13] = rk[ 5] ^ rk[12];
+ rk[14] = rk[ 6] ^ rk[13];
+ rk[15] = rk[ 7] ^ rk[14];
+ }
+ rk += keyLen/4;
+ }
+
+ rk = rk_saved;
+ ConditionalByteReverse(BIG_ENDIAN_ORDER, rk, rk, 16);
+ ConditionalByteReverse(BIG_ENDIAN_ORDER, rk + rounds*4, rk + rounds*4, 16);
+ ConditionalByteReverse(BIG_ENDIAN_ORDER, rk+4, rk+4, (rounds-1)*16);
+
+#if defined(IS_LITTLE_ENDIAN)
+ // VSX registers are big-endian. The entire subkey table must be byte
+ // reversed on little-endian systems to ensure it loads properly.
+ byte * ptr = reinterpret_cast<byte*>(rk);
+ for (unsigned int i=0; i<=rounds; i++)
+ ReverseByteArrayLE(ptr+i*16);
+#endif // IS_LITTLE_ENDIAN
+}
+
inline void POWER8_Enc_Block(VectorType &block, const word32 *subkeys, unsigned int rounds)
{
CRYPTOPP_ASSERT(IsAlignedOn(subkeys, 16));
diff --git a/rijndael.cpp b/rijndael.cpp
index 704779e4..56ea9e77 100644
--- a/rijndael.cpp
+++ b/rijndael.cpp
@@ -253,6 +253,9 @@ extern size_t Rijndael_Dec_AdvancedProcessBlocks_ARMV8(const word32 *subkeys, si
#if (CRYPTOPP_POWER8_AES_AVAILABLE)
extern void ReverseByteArrayLE(byte src[16]);
+extern void Rijndael_UncheckedSetKey_POWER8(word32* rk, size_t keyLen,
+ const word32* rc, const byte* Se, unsigned int rounds);
+
extern size_t Rijndael_Enc_AdvancedProcessBlocks_POWER8(const word32 *subkeys, size_t rounds,
const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags);
extern size_t Rijndael_Dec_AdvancedProcessBlocks_POWER8(const word32 *subkeys, size_t rounds,
@@ -265,7 +268,6 @@ void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keyLen, c
m_rounds = keyLen/4 + 6;
m_key.New(4*(m_rounds+1));
-
word32 *rk = m_key;
#if (CRYPTOPP_AESNI_AVAILABLE && CRYPTOPP_SSE41_AVAILABLE && (!defined(_MSC_VER) || _MSC_VER >= 1600 || CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32))
@@ -286,6 +288,14 @@ void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keyLen, c
const word32 *rc = rcon;
word32 temp;
+#if CRYPTOPP_POWER8_AES_AVAILABLE
+ if (HasAES())
+ {
+ Rijndael_UncheckedSetKey_POWER8(rk, keyLen, rc, Se, m_rounds);
+ return;
+ }
+#endif
+
while (true)
{
temp = rk[keyLen/4-1];
@@ -317,25 +327,6 @@ void Rijndael::Base::UncheckedSetKey(const byte *userKey, unsigned int keyLen, c
rk = m_key;
-#if CRYPTOPP_POWER8_AES_AVAILABLE
- if (HasAES())
- {
- ConditionalByteReverse(BIG_ENDIAN_ORDER, rk, rk, 16);
- ConditionalByteReverse(BIG_ENDIAN_ORDER, rk + m_rounds*4, rk + m_rounds*4, 16);
- ConditionalByteReverse(BIG_ENDIAN_ORDER, rk+4, rk+4, (m_rounds-1)*16);
-
-#if defined(IS_LITTLE_ENDIAN)
- // VSX registers are big-endian. The entire subkey table must be byte
- // reversed on little-endian systems to ensure it loads properly.
- byte * ptr = reinterpret_cast<byte*>(rk);
- for (unsigned int i=0; i<=m_rounds; i++)
- ReverseByteArrayLE(ptr+i*16);
-#endif // IS_LITTLE_ENDIAN
-
- return;
- }
-#endif // CRYPTOPP_POWER8_AES_AVAILABLE
-
if (IsForwardTransformation())
{
if (!s_TeFilled)