summaryrefslogtreecommitdiff
path: root/aria.cpp
diff options
context:
space:
mode:
authorJeffrey Walton <noloader@gmail.com>2017-04-16 13:00:45 -0400
committerJeffrey Walton <noloader@gmail.com>2017-04-16 13:00:45 -0400
commit1d1a1507372b7be7cbccdfec818a9f9985be03c6 (patch)
treece583dc1073d86651e09975751256ac89cccb528 /aria.cpp
parentddc0f3a899ff235794108dc9289b689a4508112e (diff)
downloadcryptopp-git-1d1a1507372b7be7cbccdfec818a9f9985be03c6.tar.gz
Avoid extra loads of workspace variables
Diffstat (limited to 'aria.cpp')
-rw-r--r--aria.cpp24
1 files changed, 11 insertions, 13 deletions
diff --git a/aria.cpp b/aria.cpp
index 10bbb2e5..3f5b9058 100644
--- a/aria.cpp
+++ b/aria.cpp
@@ -248,21 +248,16 @@ inline void ARIA_GSRK(const word32 X[4], const word32 Y[4], byte RK[16])
#if CRYPTOPP_ENABLE_ARIA_NEON_INTRINSICS
template <unsigned int N>
-inline void ARIA_GSRK_NEON(const word32 X[4], const word32 Y[4], byte RK[16])
+inline void ARIA_GSRK_NEON(const uint32x4_t X, const uint32x4_t Y, byte RK[16])
{
static const unsigned int Q1 = (4-(N/32)) % 4;
static const unsigned int Q2 = (3-(N/32)) % 4;
static const unsigned int R = N % 32;
- const uint32x4_t a = vld1q_u32((const uint32_t*)X);
- const uint32x4_t t = vld1q_u32((const uint32_t*)Y);
- const uint32x4_t b = vextq_u32(t, t, Q1);
- const uint32x4_t c = vextq_u32(t, t, Q2);
-
vst1q_u32(reinterpret_cast<uint32_t*>(RK),
- veorq_u32(a, veorq_u32(
- vshrq_n_u32(b, R),
- vshlq_n_u32(c, 32-R))));
+ veorq_u32(X, veorq_u32(
+ vshrq_n_u32(vextq_u32(Y, Y, Q1), R),
+ vshlq_n_u32(vextq_u32(Y, Y, Q2), 32-R))));
}
#endif
@@ -401,6 +396,11 @@ void ARIA::Base::UncheckedSetKey(const byte *key, unsigned int keylen, const Nam
#if CRYPTOPP_ENABLE_ARIA_NEON_INTRINSICS
if (HasNEON())
{
+ const uint32x4_t w0 = vld1q_u32((const uint32_t*)(m_w.data()+0));
+ const uint32x4_t w1 = vld1q_u32((const uint32_t*)(m_w.data()+8));
+ const uint32x4_t w2 = vld1q_u32((const uint32_t*)(m_w.data()+12));
+ const uint32x4_t w3 = vld1q_u32((const uint32_t*)(m_w.data()+16));
+
ARIA_GSRK_NEON<19>(w0, w1, rk + 0);
ARIA_GSRK_NEON<19>(w1, w2, rk + 16);
ARIA_GSRK_NEON<19>(w2, w3, rk + 32);
@@ -467,7 +467,7 @@ void ARIA::Base::UncheckedSetKey(const byte *key, unsigned int keylen, const Nam
#if CRYPTOPP_ENABLE_ARIA_SSE2_INTRINSICS
if (HasSSE2())
{
- a=reinterpret_cast<word32*>(rk); s=m_w.data()+24; z=a+r*4;
+ a=reinterpret_cast<word32*>(rk); s=m_w.data()+24; z=a+r*4;
_mm_store_si128((__m128i*)t, _mm_load_si128((const __m128i*)a));
_mm_store_si128((__m128i*)a, _mm_load_si128((const __m128i*)z));
_mm_store_si128((__m128i*)z, _mm_load_si128((const __m128i*)t));
@@ -492,7 +492,7 @@ void ARIA::Base::UncheckedSetKey(const byte *key, unsigned int keylen, const Nam
else
#endif
{
- a=reinterpret_cast<word32*>(rk); s=m_w.data()+24; z=a+r*4;
+ a=reinterpret_cast<word32*>(rk); s=m_w.data()+24; z=a+r*4;
::memcpy(t, a, 16); ::memcpy(a, z, 16); ::memcpy(z, t, 16);
a+=4; z-=4;
@@ -585,7 +585,6 @@ void ARIA::Base::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, b
_mm_loadu_si128((const __m128i*)(outBlock)),
_mm_loadu_si128((const __m128i*)(xorBlock))));
}
-
return;
}
else
@@ -641,7 +640,6 @@ void ARIA::Base::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, b
vld1q_u32((const uint32_t*)outBlock),
vld1q_u32((const uint32_t*)xorBlock)));
}
- return;
}
else
#endif // CRYPTOPP_ENABLE_ARIA_NEON_INTRINSICS