diff options
author | Jeffrey Walton <noloader@gmail.com> | 2019-01-16 00:02:04 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-01-16 00:02:04 -0500 |
commit | df9fa62205f2d341e2b1b26595a3a1b6377c60c5 (patch) | |
tree | d4b2b34861e76a9c7c7ec1d0b0e6fe4faf25c3d6 /adv_simd.h | |
parent | 982655845a784a9a4cfbc92221359a25a74184a3 (diff) | |
download | cryptopp-git-df9fa62205f2d341e2b1b26595a3a1b6377c60c5.tar.gz |
Use carryless multiplies for NIST b233 and k233 curves (GH #783, PR #784)
Use carryless multiplies for NIST b233 and k233 curves.
Diffstat (limited to 'adv_simd.h')
-rw-r--r-- | adv_simd.h | 59 |
1 files changed, 16 insertions, 43 deletions
@@ -59,17 +59,6 @@ # include <arm_acle.h>
#endif
-// Thanks to Peter Cordes, https://stackoverflow.com/q/54016821/608639
-#if (CRYPTOPP_ARM_NEON_AVAILABLE)
-# ifndef PACK32x4
-# if defined(_MSC_VER)
-# define PACK32x4(w,x,y,z) { ((w) + (word64(x) << 32)), ((y) + (word64(z) << 32)) }
-# else
-# define PACK32x4(w,x,y,z) { (w), (x), (y), (z) }
-# endif
-# endif // PACK32x4
-#endif // Microsoft workaround
-
#if (CRYPTOPP_SSE2_INTRIN_AVAILABLE)
# include <emmintrin.h>
# include <xmmintrin.h>
@@ -124,14 +113,10 @@ inline size_t AdvancedProcessBlocks64_6x2_NEON(F2 func2, F6 func6, CRYPTOPP_ASSERT(outBlocks);
CRYPTOPP_ASSERT(length >= 8);
-#if (CRYPTOPP_LITTLE_ENDIAN)
- const uint32x4_t s_one = PACK32x4(0, 0, 0, 1<<24);
- const uint32x4_t s_two = PACK32x4(0, 2<<24, 0, 2<<24);
-#else
- // TODO: verify these constants on ARM-BE
- //const uint32x4_t s_one = PACK32x4(0, 0, 0, 1);
- //const uint32x4_t s_two = PACK32x4(0, 2, 0, 2);
-#endif
+ const unsigned int w_one[] = {0, 0<<24, 0, 1<<24};
+ const unsigned int w_two[] = {0, 2<<24, 0, 2<<24};
+ const uint32x4_t s_one = vld1q_u32(w_one);
+ const uint32x4_t s_two = vld1q_u32(w_two);
const size_t blockSize = 8;
const size_t neonBlockSize = 16;
@@ -369,14 +354,10 @@ inline size_t AdvancedProcessBlocks128_6x1_NEON(F1 func1, F6 func6, CRYPTOPP_ASSERT(outBlocks);
CRYPTOPP_ASSERT(length >= 16);
-#if (CRYPTOPP_LITTLE_ENDIAN)
- const uint32x4_t s_one = PACK32x4(0, 0, 0, 1<<24);
- //const uint32x4_t s_two = PACK32x4(0, 2<<24, 0, 2<<24);
-#else
- // TODO: verify these constants on ARM-BE
- //const uint32x4_t s_one = PACK32x4(0, 0, 0, 1);
- //const uint32x4_t s_two = PACK32x4(0, 2, 0, 2);
-#endif
+ const unsigned int w_one[] = {0, 0<<24, 0, 1<<24};
+ const unsigned int w_two[] = {0, 2<<24, 0, 2<<24};
+ const uint32x4_t s_one = vld1q_u32(w_one);
+ const uint32x4_t s_two = vld1q_u32(w_two);
const size_t blockSize = 16;
// const size_t neonBlockSize = 16;
@@ -529,14 +510,10 @@ inline size_t AdvancedProcessBlocks128_4x1_NEON(F1 func1, F4 func4, CRYPTOPP_ASSERT(outBlocks);
CRYPTOPP_ASSERT(length >= 16);
-#if (CRYPTOPP_LITTLE_ENDIAN)
- const uint32x4_t s_one = PACK32x4(0, 0, 0, 1<<24);
- //const uint32x4_t s_two = PACK32x4(0, 2<<24, 0, 2<<24);
-#else
- // TODO: verify these constants on ARM-BE
- //const uint32x4_t s_one = PACK32x4(0, 0, 0, 1);
- //const uint32x4_t s_two = PACK32x4(0, 2, 0, 2);
-#endif
+ const unsigned int w_one[] = {0, 0<<24, 0, 1<<24};
+ const unsigned int w_two[] = {0, 2<<24, 0, 2<<24};
+ const uint32x4_t s_one = vld1q_u32(w_one);
+ const uint32x4_t s_two = vld1q_u32(w_two);
const size_t blockSize = 16;
// const size_t neonBlockSize = 16;
@@ -669,14 +646,10 @@ inline size_t AdvancedProcessBlocks128_6x2_NEON(F2 func2, F6 func6, CRYPTOPP_ASSERT(outBlocks);
CRYPTOPP_ASSERT(length >= 16);
-#if (CRYPTOPP_LITTLE_ENDIAN)
- const uint32x4_t s_one = PACK32x4(0, 0, 0, 1<<24);
- //const uint32x4_t s_two = PACK32x4(0, 2<<24, 0, 2<<24);
-#else
- // TODO: verify these constants on ARM-BE
- //const uint32x4_t s_one = PACK32x4(0, 0, 0, 1);
- //const uint32x4_t s_two = PACK32x4(0, 2, 0, 2);
-#endif
+ const unsigned int w_one[] = {0, 0<<24, 0, 1<<24};
+ const unsigned int w_two[] = {0, 2<<24, 0, 2<<24};
+ const uint32x4_t s_one = vld1q_u32(w_one);
+ const uint32x4_t s_two = vld1q_u32(w_two);
const size_t blockSize = 16;
// const size_t neonBlockSize = 16;
|