diff options
author | Jeffrey Walton <noloader@gmail.com> | 2019-05-25 06:23:19 -0400 |
---|---|---|
committer | Jeffrey Walton <noloader@gmail.com> | 2019-05-25 06:23:19 -0400 |
commit | fc10a7f1ea56b0c3c3ca7db8fa4de868cf3daad2 (patch) | |
tree | c86d705156d874fe672223c6d81394c3179a6ca8 /sha512_armv4.S | |
parent | 92df2a685f55a4777a897f0a921ed06e3e21401a (diff) | |
download | cryptopp-git-fc10a7f1ea56b0c3c3ca7db8fa4de868cf3daad2.tar.gz |
Fix SHA512 on ARM benchmarks
This was a mistake when porting from Cryptogams to Crypto++. The macros VFP_ABI_PUSH and VFP_ABI_POP needed to be defined because they save and restore SIMD register state. They were originally missing during the port. The benchmarks would hang because the doubles we used for benchmarking were blown away in sha512_block_data_order_neon.
Diffstat (limited to 'sha512_armv4.S')
-rw-r--r-- | sha512_armv4.S | 17 |
1 files changed, 8 insertions, 9 deletions
diff --git a/sha512_armv4.S b/sha512_armv4.S index 4c80608e..a1e8cd44 100644 --- a/sha512_armv4.S +++ b/sha512_armv4.S @@ -79,6 +79,9 @@ # define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1 #endif +#define VFP_ABI_PUSH vstmdb sp!,{d8-d15} +#define VFP_ABI_POP vldmia sp!,{d8-d15} + #if defined(__thumb2__) .syntax unified .thumb @@ -147,19 +150,15 @@ sha512_block_data_order: #else adr r3,.Lsha512_block_data_order #endif + #if __ARM_MAX_ARCH__>=7 - @ldr r12,.LCRYPTOGAMS_armcap ldr r12,=CRYPTOGAMS_armcap_P -# if !defined(_WIN32) - @ldr r12,[r3,r12] @ CRYPTOGAMS_armcap_P ldr r12,[r12] @ CRYPTOGAMS_armcap_P -# endif -# if defined(__APPLE__) || defined(_WIN32) - ldr r12,[r12] -# endif + tst r12,#ARMV7_NEON bne .LNEON #endif + add r2,r1,r2,lsl#7 @ len to point at the end of inp stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} sub r14,r3,#672 @ K512 @@ -554,7 +553,7 @@ sha512_block_data_order_neon: dmb @ errata #451034 on early Cortex A8 add r2,r1,r2,lsl#7 @ len to point at the end of inp adr r3,K512 - @VFP_ABI_PUSH + VFP_ABI_PUSH vldmia r0,{d16,d17,d18,d19,d20,d21,d22,d23} @ load context .Loop_neon: vshr.u64 d24,d20,#14 @ 0 @@ -1868,7 +1867,7 @@ sha512_block_data_order_neon: teq r1,r2 sub r3,#640 @ rewind K512 bne .Loop_neon - @VFP_ABI_POP + VFP_ABI_POP bx lr @ .word 0xe12fff1e .size sha512_block_data_order_neon,.-sha512_block_data_order_neon #endif |