summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cpu.cpp40
1 files changed, 18 insertions, 22 deletions
diff --git a/cpu.cpp b/cpu.cpp
index e8bf611b..e4d3b7d4 100644
--- a/cpu.cpp
+++ b/cpu.cpp
@@ -445,21 +445,19 @@ static bool TryPMULL()
volatile bool result = true;
__try
{
- const poly64_t a1={2}, b1={3};
- const poly64x2_t a2={4,5}, b2={6,7};
- const poly64x2_t a3={0x8080808080808080,0xa0a0a0a0a0a0a0a0}, b3={0xc0c0c0c0c0c0c0c0, 0xe0e0e0e0e0e0e0e0};
+ const poly64_t a1={0x9090909090909090}, b1={0xb0b0b0b0b0b0b0b0};
+ const poly8x16_t a2={0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0},
+ b2={0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0};
const poly128_t r1 = vmull_p64(a1, b1);
- const poly128_t r2 = vmull_high_p64(a2, b2);
- const poly128_t r3 = vmull_high_p64(a3, b3);
+ const poly128_t r2 = vmull_high_p64((poly64x2_t)(a2), (poly64x2_t)(b2));
- // Also see https://github.com/weidai11/cryptopp/issues/233.
- const uint64x2_t& t1 = vreinterpretq_u64_p128(r1); // {6,0}
- const uint64x2_t& t2 = vreinterpretq_u64_p128(r2); // {24,0}
- const uint64x2_t& t3 = vreinterpretq_u64_p128(r3); // {bignum,bignum}
+ // Linaro is missing vreinterpretq_u64_p128. Also see http://github.com/weidai11/cryptopp/issues/233.
+ const uint64x2_t& t1 = (uint64x2_t)(r1); // {bignum,bignum}
+ const uint64x2_t& t2 = (uint64x2_t)(r2); // {bignum,bignum}
- result = !!(vgetq_lane_u64(t1,0) == 0x06 && vgetq_lane_u64(t1,1) == 0x00 && vgetq_lane_u64(t2,0) == 0x1b &&
- vgetq_lane_u64(t2,1) == 0x00 && vgetq_lane_u64(t3,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t3,1) == 0x6c006c006c006c00);
+ result = !!(vgetq_lane_u64(t1,0) == 0x5300530053005300 && vgetq_lane_u64(t1,1) == 0x5300530053005300 &&
+ vgetq_lane_u64(t2,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t2,1) == 0x6c006c006c006c00);
}
__except (EXCEPTION_EXECUTE_HANDLER)
{
@@ -483,21 +481,19 @@ static bool TryPMULL()
result = false;
else
{
- const poly64_t a1={2}, b1={3};
- const poly64x2_t a2={4,5}, b2={6,7};
- const poly64x2_t a3={0x8080808080808080,0xa0a0a0a0a0a0a0a0}, b3={0xc0c0c0c0c0c0c0c0, 0xe0e0e0e0e0e0e0e0};
+ const poly64_t a1={0x9090909090909090}, b1={0xb0b0b0b0b0b0b0b0};
+ const poly8x16_t a2={0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0,0xa0},
+ b2={0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xc0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0,0xe0};
const poly128_t r1 = vmull_p64(a1, b1);
- const poly128_t r2 = vmull_high_p64(a2, b2);
- const poly128_t r3 = vmull_high_p64(a3, b3);
+ const poly128_t r2 = vmull_high_p64((poly64x2_t)(a2), (poly64x2_t)(b2));
- // Linaro is missing vreinterpretq_u64_p128. Also see https://github.com/weidai11/cryptopp/issues/233.
- const uint64x2_t& t1 = (uint64x2_t)(r1); // {6,0}
- const uint64x2_t& t2 = (uint64x2_t)(r2); // {24,0}
- const uint64x2_t& t3 = (uint64x2_t)(r3); // {bignum,bignum}
+ // Linaro is missing vreinterpretq_u64_p128. Also see http://github.com/weidai11/cryptopp/issues/233.
+ const uint64x2_t& t1 = (uint64x2_t)(r1); // {bignum,bignum}
+ const uint64x2_t& t2 = (uint64x2_t)(r2); // {bignum,bignum}
- result = !!(vgetq_lane_u64(t1,0) == 0x06 && vgetq_lane_u64(t1,1) == 0x00 && vgetq_lane_u64(t2,0) == 0x1b &&
- vgetq_lane_u64(t2,1) == 0x00 && vgetq_lane_u64(t3,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t3,1) == 0x6c006c006c006c00);
+ result = !!(vgetq_lane_u64(t1,0) == 0x5300530053005300 && vgetq_lane_u64(t1,1) == 0x5300530053005300 &&
+ vgetq_lane_u64(t2,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t2,1) == 0x6c006c006c006c00);
}
sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);