summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeffrey Walton <noloader@gmail.com>2018-11-09 11:56:47 -0500
committerJeffrey Walton <noloader@gmail.com>2018-11-09 11:56:47 -0500
commit656be82a8fd798d5242e553ced508760b215288b (patch)
tree799c2731f23af998231e9794103c6747777f9630
parentca9d0f10f6baa115b2b974057af39c175fba2cc7 (diff)
downloadcryptopp-git-656be82a8fd798d5242e553ced508760b215288b.tar.gz
Cleanup ARIA SSE and NEON code
-rw-r--r--aria-simd.cpp97
-rw-r--r--aria.cpp31
2 files changed, 82 insertions, 46 deletions
diff --git a/aria-simd.cpp b/aria-simd.cpp
index ce16ada5..9c741d7e 100644
--- a/aria-simd.cpp
+++ b/aria-simd.cpp
@@ -29,10 +29,6 @@
#define M128_CAST(x) ((__m128i *)(void *)(x))
#define CONST_M128_CAST(x) ((const __m128i *)(const void *)(x))
-// GCC cast warning
-#define UINT32_CAST(x) ((uint32_t *)(void *)(x))
-#define CONST_UINT32_CAST(x) ((const uint32_t *)(const void *)(x))
-
NAMESPACE_BEGIN(CryptoPP)
NAMESPACE_BEGIN(ARIATab)
@@ -45,6 +41,17 @@ extern const word32 KRK[3][4];
NAMESPACE_END
NAMESPACE_END
+ANONYMOUS_NAMESPACE_BEGIN
+
+using CryptoPP::byte;
+using CryptoPP::word32;
+
+inline byte ARIA_BRF(const word32 x, const int y) {
+ return GETBYTE(x, y);
+}
+
+ANONYMOUS_NAMESPACE_END
+
NAMESPACE_BEGIN(CryptoPP)
using CryptoPP::ARIATab::S1;
@@ -58,22 +65,23 @@ using CryptoPP::ARIATab::KRK;
template <unsigned int N>
inline void ARIA_GSRK_NEON(const uint32x4_t X, const uint32x4_t Y, byte RK[16])
{
- static const unsigned int Q1 = (4-(N/32)) % 4;
- static const unsigned int Q2 = (3-(N/32)) % 4;
- static const unsigned int R = N % 32;
+ enum { Q1 = (4-(N/32)) % 4,
+ Q2 = (3-(N/32)) % 4,
+ R = N % 32
+ };
- vst1q_u32(UINT32_CAST(RK),
+ vst1q_u8(RK, vreinterpretq_u8_u32(
veorq_u32(X, veorq_u32(
vshrq_n_u32(vextq_u32(Y, Y, Q1), R),
- vshlq_n_u32(vextq_u32(Y, Y, Q2), 32-R))));
+ vshlq_n_u32(vextq_u32(Y, Y, Q2), 32-R)))));
}
void ARIA_UncheckedSetKey_Schedule_NEON(byte* rk, word32* ws, unsigned int keylen)
{
- const uint32x4_t w0 = vld1q_u32(CONST_UINT32_CAST(ws+ 0));
- const uint32x4_t w1 = vld1q_u32(CONST_UINT32_CAST(ws+ 8));
- const uint32x4_t w2 = vld1q_u32(CONST_UINT32_CAST(ws+12));
- const uint32x4_t w3 = vld1q_u32(CONST_UINT32_CAST(ws+16));
+ const uint32x4_t w0 = vld1q_u32(ws+ 0);
+ const uint32x4_t w1 = vld1q_u32(ws+ 8);
+ const uint32x4_t w2 = vld1q_u32(ws+12);
+ const uint32x4_t w3 = vld1q_u32(ws+16);
ARIA_GSRK_NEON<19>(w0, w1, rk + 0);
ARIA_GSRK_NEON<19>(w1, w2, rk + 16);
@@ -102,22 +110,49 @@ void ARIA_UncheckedSetKey_Schedule_NEON(byte* rk, word32* ws, unsigned int keyle
}
}
-void ARIA_ProcessAndXorBlock_Xor_NEON(const byte* xorBlock, byte* outBlock)
+void ARIA_ProcessAndXorBlock_NEON(const byte* xorBlock, byte* outBlock, const byte *rk, word32 *t)
{
- vst1q_u32(UINT32_CAST(outBlock), veorq_u32(
- vld1q_u32(CONST_UINT32_CAST(outBlock)),
- vld1q_u32(CONST_UINT32_CAST(xorBlock))));
+ outBlock[ 0] = (byte)(X1[ARIA_BRF(t[0],3)] );
+ outBlock[ 1] = (byte)(X2[ARIA_BRF(t[0],2)]>>8);
+ outBlock[ 2] = (byte)(S1[ARIA_BRF(t[0],1)] );
+ outBlock[ 3] = (byte)(S2[ARIA_BRF(t[0],0)] );
+ outBlock[ 4] = (byte)(X1[ARIA_BRF(t[1],3)] );
+ outBlock[ 5] = (byte)(X2[ARIA_BRF(t[1],2)]>>8);
+ outBlock[ 6] = (byte)(S1[ARIA_BRF(t[1],1)] );
+ outBlock[ 7] = (byte)(S2[ARIA_BRF(t[1],0)] );
+ outBlock[ 8] = (byte)(X1[ARIA_BRF(t[2],3)] );
+ outBlock[ 9] = (byte)(X2[ARIA_BRF(t[2],2)]>>8);
+ outBlock[10] = (byte)(S1[ARIA_BRF(t[2],1)] );
+ outBlock[11] = (byte)(S2[ARIA_BRF(t[2],0)] );
+ outBlock[12] = (byte)(X1[ARIA_BRF(t[3],3)] );
+ outBlock[13] = (byte)(X2[ARIA_BRF(t[3],2)]>>8);
+ outBlock[14] = (byte)(S1[ARIA_BRF(t[3],1)] );
+ outBlock[15] = (byte)(S2[ARIA_BRF(t[3],0)] );
+
+ // 'outBlock' and 'xorBlock' may be unaligned.
+ if (xorBlock != NULLPTR)
+ {
+ vst1q_u8(outBlock,
+ veorq_u8(
+ vld1q_u8(xorBlock),
+ veorq_u8(
+ vld1q_u8(outBlock),
+ vrev32q_u8(vld1q_u8((rk))))));
+ }
+ else
+ {
+ vst1q_u8(outBlock,
+ veorq_u8(
+ vld1q_u8(outBlock),
+ vrev32q_u8(vld1q_u8(rk))));
+ }
}
#endif // CRYPTOPP_ARM_NEON_AVAILABLE
#if (CRYPTOPP_SSSE3_AVAILABLE)
-inline byte ARIA_BRF(const word32 x, const int y) {
- return GETBYTE(x, y);
-}
-
-void ARIA_ProcessAndXorBlock_Xor_SSSE3(const byte* xorBlock, byte* outBlock, const byte *rk, word32 *t)
+void ARIA_ProcessAndXorBlock_SSSE3(const byte* xorBlock, byte* outBlock, const byte *rk, word32 *t)
{
const __m128i MASK = _mm_set_epi8(12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3);
@@ -138,18 +173,22 @@ void ARIA_ProcessAndXorBlock_Xor_SSSE3(const byte* xorBlock, byte* outBlock, con
outBlock[14] = (byte)(S1[ARIA_BRF(t[3],1)] );
outBlock[15] = (byte)(S2[ARIA_BRF(t[3],0)] );
- // 'outBlock' may be unaligned.
- _mm_storeu_si128(M128_CAST(outBlock),
- _mm_xor_si128(_mm_loadu_si128(CONST_M128_CAST(outBlock)),
- _mm_shuffle_epi8(_mm_load_si128(CONST_M128_CAST(rk)), MASK)));
-
// 'outBlock' and 'xorBlock' may be unaligned.
if (xorBlock != NULLPTR)
{
_mm_storeu_si128(M128_CAST(outBlock),
_mm_xor_si128(
- _mm_loadu_si128(CONST_M128_CAST(outBlock)),
- _mm_loadu_si128(CONST_M128_CAST(xorBlock))));
+ _mm_loadu_si128(CONST_M128_CAST(xorBlock)),
+ _mm_xor_si128(
+ _mm_loadu_si128(CONST_M128_CAST(outBlock)),
+ _mm_shuffle_epi8(_mm_load_si128(CONST_M128_CAST(rk)), MASK)))
+ );
+ }
+ else
+ {
+ _mm_storeu_si128(M128_CAST(outBlock),
+ _mm_xor_si128(_mm_loadu_si128(CONST_M128_CAST(outBlock)),
+ _mm_shuffle_epi8(_mm_load_si128(CONST_M128_CAST(rk)), MASK)));
}
}
diff --git a/aria.cpp b/aria.cpp
index 52cc2eb2..8f3f221a 100644
--- a/aria.cpp
+++ b/aria.cpp
@@ -85,11 +85,11 @@ inline byte ARIA_BRF(const word32 x, const int y) {
#if (CRYPTOPP_ARM_NEON_AVAILABLE)
extern void ARIA_UncheckedSetKey_Schedule_NEON(byte* rk, word32* ws, unsigned int keylen);
-extern void ARIA_ProcessAndXorBlock_Xor_NEON(const byte* xorBlock, byte* outblock);
+extern void ARIA_ProcessAndXorBlock_NEON(const byte* xorBlock, byte* outblock, const byte *rk, word32 *t);
#endif
#if (CRYPTOPP_SSSE3_AVAILABLE)
-extern void ARIA_ProcessAndXorBlock_Xor_SSSE3(const byte* xorBlock, byte* outBlock, const byte *rk, word32 *t);
+extern void ARIA_ProcessAndXorBlock_SSSE3(const byte* xorBlock, byte* outBlock, const byte *rk, word32 *t);
#endif
// n-bit right shift of Y XORed to X
@@ -283,12 +283,19 @@ void ARIA::Base::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, b
#if CRYPTOPP_ENABLE_ARIA_SSSE3_INTRINSICS
if (HasSSSE3())
{
- ARIA_ProcessAndXorBlock_Xor_SSSE3(xorBlock, outBlock, rk, t);
+ ARIA_ProcessAndXorBlock_SSSE3(xorBlock, outBlock, rk, t);
return;
}
else
#endif // CRYPTOPP_ENABLE_ARIA_SSSE3_INTRINSICS
-
+#if (CRYPTOPP_ARM_NEON_AVAILABLE)
+ if (HasNEON())
+ {
+ ARIA_ProcessAndXorBlock_NEON(xorBlock, outBlock, rk, t);
+ return;
+ }
+ else
+#endif // CRYPTOPP_ARM_NEON_AVAILABLE
#if (CRYPTOPP_LITTLE_ENDIAN)
{
outBlock[ 0] = (byte)(X1[ARIA_BRF(t[0],3)] ) ^ rk[ 3];
@@ -329,19 +336,9 @@ void ARIA::Base::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, b
}
#endif // CRYPTOPP_LITTLE_ENDIAN
-#if CRYPTOPP_ARM_NEON_AVAILABLE
- if (HasNEON())
- {
- if (xorBlock != NULLPTR)
- ARIA_ProcessAndXorBlock_Xor_NEON(xorBlock, outBlock);
- }
- else
-#endif // CRYPTOPP_ARM_NEON_AVAILABLE
- {
- if (xorBlock != NULLPTR)
- for (unsigned int n=0; n<ARIA::BLOCKSIZE; ++n)
- outBlock[n] ^= xorBlock[n];
- }
+ if (xorBlock != NULLPTR)
+ for (unsigned int n=0; n<ARIA::BLOCKSIZE; ++n)
+ outBlock[n] ^= xorBlock[n];
}
NAMESPACE_END