summaryrefslogtreecommitdiff
path: root/adv_simd.h
diff options
context:
space:
mode:
authorJeffrey Walton <noloader@gmail.com>2019-01-16 00:02:04 -0500
committerGitHub <noreply@github.com>2019-01-16 00:02:04 -0500
commitdf9fa62205f2d341e2b1b26595a3a1b6377c60c5 (patch)
treed4b2b34861e76a9c7c7ec1d0b0e6fe4faf25c3d6 /adv_simd.h
parent982655845a784a9a4cfbc92221359a25a74184a3 (diff)
downloadcryptopp-git-df9fa62205f2d341e2b1b26595a3a1b6377c60c5.tar.gz
Use carryless multiplies for NIST b233 and k233 curves (GH #783, PR #784)
Use carryless multiplies for NIST b233 and k233 curves.
Diffstat (limited to 'adv_simd.h')
-rw-r--r--adv_simd.h59
1 files changed, 16 insertions, 43 deletions
diff --git a/adv_simd.h b/adv_simd.h
index 517f0c29..37696c2c 100644
--- a/adv_simd.h
+++ b/adv_simd.h
@@ -59,17 +59,6 @@
# include <arm_acle.h>
#endif
-// Thanks to Peter Cordes, https://stackoverflow.com/q/54016821/608639
-#if (CRYPTOPP_ARM_NEON_AVAILABLE)
-# ifndef PACK32x4
-# if defined(_MSC_VER)
-# define PACK32x4(w,x,y,z) { ((w) + (word64(x) << 32)), ((y) + (word64(z) << 32)) }
-# else
-# define PACK32x4(w,x,y,z) { (w), (x), (y), (z) }
-# endif
-# endif // PACK32x4
-#endif // Microsoft workaround
-
#if (CRYPTOPP_SSE2_INTRIN_AVAILABLE)
# include <emmintrin.h>
# include <xmmintrin.h>
@@ -124,14 +113,10 @@ inline size_t AdvancedProcessBlocks64_6x2_NEON(F2 func2, F6 func6,
CRYPTOPP_ASSERT(outBlocks);
CRYPTOPP_ASSERT(length >= 8);
-#if (CRYPTOPP_LITTLE_ENDIAN)
- const uint32x4_t s_one = PACK32x4(0, 0, 0, 1<<24);
- const uint32x4_t s_two = PACK32x4(0, 2<<24, 0, 2<<24);
-#else
- // TODO: verify these constants on ARM-BE
- //const uint32x4_t s_one = PACK32x4(0, 0, 0, 1);
- //const uint32x4_t s_two = PACK32x4(0, 2, 0, 2);
-#endif
+ const unsigned int w_one[] = {0, 0<<24, 0, 1<<24};
+ const unsigned int w_two[] = {0, 2<<24, 0, 2<<24};
+ const uint32x4_t s_one = vld1q_u32(w_one);
+ const uint32x4_t s_two = vld1q_u32(w_two);
const size_t blockSize = 8;
const size_t neonBlockSize = 16;
@@ -369,14 +354,10 @@ inline size_t AdvancedProcessBlocks128_6x1_NEON(F1 func1, F6 func6,
CRYPTOPP_ASSERT(outBlocks);
CRYPTOPP_ASSERT(length >= 16);
-#if (CRYPTOPP_LITTLE_ENDIAN)
- const uint32x4_t s_one = PACK32x4(0, 0, 0, 1<<24);
- //const uint32x4_t s_two = PACK32x4(0, 2<<24, 0, 2<<24);
-#else
- // TODO: verify these constants on ARM-BE
- //const uint32x4_t s_one = PACK32x4(0, 0, 0, 1);
- //const uint32x4_t s_two = PACK32x4(0, 2, 0, 2);
-#endif
+ const unsigned int w_one[] = {0, 0<<24, 0, 1<<24};
+ const unsigned int w_two[] = {0, 2<<24, 0, 2<<24};
+ const uint32x4_t s_one = vld1q_u32(w_one);
+ const uint32x4_t s_two = vld1q_u32(w_two);
const size_t blockSize = 16;
// const size_t neonBlockSize = 16;
@@ -529,14 +510,10 @@ inline size_t AdvancedProcessBlocks128_4x1_NEON(F1 func1, F4 func4,
CRYPTOPP_ASSERT(outBlocks);
CRYPTOPP_ASSERT(length >= 16);
-#if (CRYPTOPP_LITTLE_ENDIAN)
- const uint32x4_t s_one = PACK32x4(0, 0, 0, 1<<24);
- //const uint32x4_t s_two = PACK32x4(0, 2<<24, 0, 2<<24);
-#else
- // TODO: verify these constants on ARM-BE
- //const uint32x4_t s_one = PACK32x4(0, 0, 0, 1);
- //const uint32x4_t s_two = PACK32x4(0, 2, 0, 2);
-#endif
+ const unsigned int w_one[] = {0, 0<<24, 0, 1<<24};
+ const unsigned int w_two[] = {0, 2<<24, 0, 2<<24};
+ const uint32x4_t s_one = vld1q_u32(w_one);
+ const uint32x4_t s_two = vld1q_u32(w_two);
const size_t blockSize = 16;
// const size_t neonBlockSize = 16;
@@ -669,14 +646,10 @@ inline size_t AdvancedProcessBlocks128_6x2_NEON(F2 func2, F6 func6,
CRYPTOPP_ASSERT(outBlocks);
CRYPTOPP_ASSERT(length >= 16);
-#if (CRYPTOPP_LITTLE_ENDIAN)
- const uint32x4_t s_one = PACK32x4(0, 0, 0, 1<<24);
- //const uint32x4_t s_two = PACK32x4(0, 2<<24, 0, 2<<24);
-#else
- // TODO: verify these constants on ARM-BE
- //const uint32x4_t s_one = PACK32x4(0, 0, 0, 1);
- //const uint32x4_t s_two = PACK32x4(0, 2, 0, 2);
-#endif
+ const unsigned int w_one[] = {0, 0<<24, 0, 1<<24};
+ const unsigned int w_two[] = {0, 2<<24, 0, 2<<24};
+ const uint32x4_t s_one = vld1q_u32(w_one);
+ const uint32x4_t s_two = vld1q_u32(w_two);
const size_t blockSize = 16;
// const size_t neonBlockSize = 16;