summaryrefslogtreecommitdiff
path: root/sha512_armv4.S
diff options
context:
space:
mode:
authorJeffrey Walton <noloader@gmail.com>2019-05-25 06:23:19 -0400
committerJeffrey Walton <noloader@gmail.com>2019-05-25 06:23:19 -0400
commitfc10a7f1ea56b0c3c3ca7db8fa4de868cf3daad2 (patch)
treec86d705156d874fe672223c6d81394c3179a6ca8 /sha512_armv4.S
parent92df2a685f55a4777a897f0a921ed06e3e21401a (diff)
downloadcryptopp-git-fc10a7f1ea56b0c3c3ca7db8fa4de868cf3daad2.tar.gz
Fix SHA512 on ARM benchmarks
This was a mistake when porting from Cryptogams to Crypto++. The macros VFP_ABI_PUSH and VFP_ABI_POP needed to be defined because they save and restore SIMD register state. They were originally missing during the port. The benchmarks would hang because the doubles we used for benchmarking were blown away in sha512_block_data_order_neon.
Diffstat (limited to 'sha512_armv4.S')
-rw-r--r--sha512_armv4.S17
1 files changed, 8 insertions, 9 deletions
diff --git a/sha512_armv4.S b/sha512_armv4.S
index 4c80608e..a1e8cd44 100644
--- a/sha512_armv4.S
+++ b/sha512_armv4.S
@@ -79,6 +79,9 @@
# define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1
#endif
+#define VFP_ABI_PUSH vstmdb sp!,{d8-d15}
+#define VFP_ABI_POP vldmia sp!,{d8-d15}
+
#if defined(__thumb2__)
.syntax unified
.thumb
@@ -147,19 +150,15 @@ sha512_block_data_order:
#else
adr r3,.Lsha512_block_data_order
#endif
+
#if __ARM_MAX_ARCH__>=7
- @ldr r12,.LCRYPTOGAMS_armcap
ldr r12,=CRYPTOGAMS_armcap_P
-# if !defined(_WIN32)
- @ldr r12,[r3,r12] @ CRYPTOGAMS_armcap_P
ldr r12,[r12] @ CRYPTOGAMS_armcap_P
-# endif
-# if defined(__APPLE__) || defined(_WIN32)
- ldr r12,[r12]
-# endif
+
tst r12,#ARMV7_NEON
bne .LNEON
#endif
+
add r2,r1,r2,lsl#7 @ len to point at the end of inp
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
sub r14,r3,#672 @ K512
@@ -554,7 +553,7 @@ sha512_block_data_order_neon:
dmb @ errata #451034 on early Cortex A8
add r2,r1,r2,lsl#7 @ len to point at the end of inp
adr r3,K512
- @VFP_ABI_PUSH
+ VFP_ABI_PUSH
vldmia r0,{d16,d17,d18,d19,d20,d21,d22,d23} @ load context
.Loop_neon:
vshr.u64 d24,d20,#14 @ 0
@@ -1868,7 +1867,7 @@ sha512_block_data_order_neon:
teq r1,r2
sub r3,#640 @ rewind K512
bne .Loop_neon
- @VFP_ABI_POP
+ VFP_ABI_POP
bx lr @ .word 0xe12fff1e
.size sha512_block_data_order_neon,.-sha512_block_data_order_neon
#endif