summaryrefslogtreecommitdiff
path: root/sha_simd.cpp
diff options
context:
space:
mode:
authorJeffrey Walton <noloader@gmail.com>2019-01-16 00:02:04 -0500
committerGitHub <noreply@github.com>2019-01-16 00:02:04 -0500
commitdf9fa62205f2d341e2b1b26595a3a1b6377c60c5 (patch)
treed4b2b34861e76a9c7c7ec1d0b0e6fe4faf25c3d6 /sha_simd.cpp
parent982655845a784a9a4cfbc92221359a25a74184a3 (diff)
downloadcryptopp-git-df9fa62205f2d341e2b1b26595a3a1b6377c60c5.tar.gz
Use carryless multiplies for NIST b233 and k233 curves (GH #783, PR #784)
Use carryless multiplies for NIST b233 and k233 curves.
Diffstat (limited to 'sha_simd.cpp')
-rw-r--r--sha_simd.cpp35
1 files changed, 16 insertions, 19 deletions
diff --git a/sha_simd.cpp b/sha_simd.cpp
index 7184f3e1..0b31ea37 100644
--- a/sha_simd.cpp
+++ b/sha_simd.cpp
@@ -46,17 +46,6 @@
# define EXCEPTION_EXECUTE_HANDLER 1
#endif
-// Thanks to Peter Cordes, https://stackoverflow.com/q/54016821/608639
-#if (CRYPTOPP_ARM_NEON_AVAILABLE)
-# ifndef PACK32x4
-# if defined(_MSC_VER)
-# define PACK32x4(w,x,y,z) { ((w) + (word64(x) << 32)), ((y) + (word64(z) << 32)) }
-# else
-# define PACK32x4(w,x,y,z) { (w), (x), (y), (z) }
-# endif
-# endif // PACK32x4
-#endif // Microsoft workaround
-
// Clang __m128i casts
#define M128_CAST(x) ((__m128i *)(void *)(x))
#define CONST_M128_CAST(x) ((const __m128i *)(const void *)(x))
@@ -95,9 +84,10 @@ bool CPU_ProbeSHA1()
volatile bool result = true;
__try
{
- uint32x4_t data1 = PACK32x4(1,2,3,4);
- uint32x4_t data2 = PACK32x4(5,6,7,8);
- uint32x4_t data3 = PACK32x4(9,10,11,12);
+ unsigned int w[] = {1,2,3,4, 5,6,7,8, 9,10,11,12};
+ uint32x4_t data1 = vld1q_u32(w+0);
+ uint32x4_t data2 = vld1q_u32(w+4);
+ uint32x4_t data3 = vld1q_u32(w+8);
uint32x4_t r1 = vsha1cq_u32 (data1, 0, data2);
uint32x4_t r2 = vsha1mq_u32 (data1, 0, data2);
@@ -130,7 +120,10 @@ bool CPU_ProbeSHA1()
result = false;
else
{
- uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
+ unsigned int w[] = {1,2,3,4, 5,6,7,8, 9,10,11,12};
+ uint32x4_t data1 = vld1q_u32(w+0);
+ uint32x4_t data2 = vld1q_u32(w+4);
+ uint32x4_t data3 = vld1q_u32(w+8);
uint32x4_t r1 = vsha1cq_u32 (data1, 0, data2);
uint32x4_t r2 = vsha1mq_u32 (data1, 0, data2);
@@ -159,9 +152,10 @@ bool CPU_ProbeSHA2()
volatile bool result = true;
__try
{
- uint32x4_t data1 = PACK32x4(1,2,3,4);
- uint32x4_t data2 = PACK32x4(5,6,7,8);
- uint32x4_t data3 = PACK32x4(9,10,11,12);
+ unsigned int w[] = {1,2,3,4, 5,6,7,8, 9,10,11,12};
+ uint32x4_t data1 = vld1q_u32(w+0);
+ uint32x4_t data2 = vld1q_u32(w+4);
+ uint32x4_t data3 = vld1q_u32(w+8);
uint32x4_t r1 = vsha256hq_u32 (data1, data2, data3);
uint32x4_t r2 = vsha256h2q_u32 (data1, data2, data3);
@@ -193,7 +187,10 @@ bool CPU_ProbeSHA2()
result = false;
else
{
- uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12};
+ unsigned int w[] = {1,2,3,4, 5,6,7,8, 9,10,11,12};
+ uint32x4_t data1 = vld1q_u32(w+0);
+ uint32x4_t data2 = vld1q_u32(w+4);
+ uint32x4_t data3 = vld1q_u32(w+8);
uint32x4_t r1 = vsha256hq_u32 (data1, data2, data3);
uint32x4_t r2 = vsha256h2q_u32 (data1, data2, data3);