summaryrefslogtreecommitdiff
path: root/crypto/armcap.c
diff options
context:
space:
mode:
authorXiaokangQian <xiaokang.qian@arm.com>2021-06-09 06:35:46 +0000
committerPauli <pauli@openssl.org>2022-01-25 14:30:00 +1100
commit954f45ba4c504570206ff5bed811e512cf92dc8e (patch)
tree6d2521f79615afd4c8b35cb2c6794a57aded5602 /crypto/armcap.c
parent44a563dde1584cd9284e80b6e45ee5019be8d36c (diff)
downloadopenssl-new-954f45ba4c504570206ff5bed811e512cf92dc8e.tar.gz
Optimize AES-GCM for uarchs with unroll and new instructions
Increase the block numbers to 8 for every iteration. Increase the hash table capacity. Make use of EOR3 instruction to improve the performance. This can improve performance 25-40% on out-of-order microarchitectures with a large number of fast execution units, such as Neoverse V1. We also see 20-30% performance improvements on other architectures such as the M1. Assembly code reviewd by Tom Cosgrove (ARM). Reviewed-by: Bernd Edlinger <bernd.edlinger@hotmail.de> Reviewed-by: Paul Dale <pauli@openssl.org> (Merged from https://github.com/openssl/openssl/pull/15916)
Diffstat (limited to 'crypto/armcap.c')
-rw-r--r--crypto/armcap.c24
1 files changed, 21 insertions, 3 deletions
diff --git a/crypto/armcap.c b/crypto/armcap.c
index 5016987eeb..c50322f504 100644
--- a/crypto/armcap.c
+++ b/crypto/armcap.c
@@ -171,6 +171,7 @@ static unsigned long getauxval(unsigned long key)
# define HWCAP_CE_SHA1 (1 << 5)
# define HWCAP_CE_SHA256 (1 << 6)
# define HWCAP_CPUID (1 << 11)
+# define HWCAP_SHA3 (1 << 17)
# define HWCAP_CE_SM3 (1 << 18)
# define HWCAP_CE_SM4 (1 << 19)
# define HWCAP_CE_SHA512 (1 << 21)
@@ -216,11 +217,20 @@ void OPENSSL_cpuid_setup(void)
*/
# else
{
- unsigned int sha512;
- size_t len = sizeof(sha512);
+ unsigned int feature;
+ size_t len = sizeof(feature);
+ char uarch[64];
- if (sysctlbyname("hw.optional.armv8_2_sha512", &sha512, &len, NULL, 0) == 0 && sha512 == 1)
+ if (sysctlbyname("hw.optional.armv8_2_sha512", &feature, &len, NULL, 0) == 0 && feature == 1)
OPENSSL_armcap_P |= ARMV8_SHA512;
+ feature = 0;
+ if (sysctlbyname("hw.optional.armv8_2_sha3", &feature, &len, NULL, 0) == 0 && feature == 1) {
+ OPENSSL_armcap_P |= ARMV8_SHA3;
+ len = sizeof(uarch);
+ if ((sysctlbyname("machdep.cpu.brand_string", uarch, &len, NULL, 0) == 0) &&
+ (strncmp(uarch, "Apple M1", 8) == 0))
+ OPENSSL_armcap_P |= ARMV8_UNROLL8_EOR3;
+ }
}
# endif
# endif
@@ -255,6 +265,8 @@ void OPENSSL_cpuid_setup(void)
if (hwcap & HWCAP_CE_SM3)
OPENSSL_armcap_P |= ARMV8_SM3;
+ if (hwcap & HWCAP_SHA3)
+ OPENSSL_armcap_P |= ARMV8_SHA3;
# endif
}
# ifdef __aarch64__
@@ -311,6 +323,9 @@ void OPENSSL_cpuid_setup(void)
if (sigsetjmp(ill_jmp, 1) == 0) {
_armv8_sm3_probe();
OPENSSL_armcap_P |= ARMV8_SM3;
+ if (sigsetjmp(ill_jmp, 1) == 0) {
+ _armv8_eor3_probe();
+ OPENSSL_armcap_P |= ARMV8_SHA3;
}
# endif
}
@@ -340,6 +355,9 @@ void OPENSSL_cpuid_setup(void)
(OPENSSL_armcap_P & ARMV7_NEON)) {
OPENSSL_armv8_rsa_neonized = 1;
}
+ if ((MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_V1)) &&
+ (OPENSSL_armcap_P & ARMV8_SHA3))
+ OPENSSL_armcap_P |= ARMV8_UNROLL8_EOR3;
# endif
}
#endif