summaryrefslogtreecommitdiff
path: root/lib/accelerated/x86/x86-common.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/accelerated/x86/x86-common.c')
-rw-r--r--lib/accelerated/x86/x86-common.c86
1 files changed, 69 insertions, 17 deletions
diff --git a/lib/accelerated/x86/x86-common.c b/lib/accelerated/x86/x86-common.c
index acd4586e4e..fb3ff90919 100644
--- a/lib/accelerated/x86/x86-common.c
+++ b/lib/accelerated/x86/x86-common.c
@@ -38,6 +38,12 @@
# include <sha-padlock.h>
#endif
#include <aes-padlock.h>
+#ifdef HAVE_CPUID_H
+# include <cpuid.h>
+#else
+# define __get_cpuid(...) 0
+# define __get_cpuid_count(...) 0
+#endif
/* ebx, ecx, edx
* This is a format compatible with openssl's CPUID detection.
@@ -49,11 +55,21 @@ __hidden
#endif
unsigned int _gnutls_x86_cpuid_s[4];
+#ifndef bit_SHA
+# define bit_SHA (1<<29)
+#endif
+
+/* ecx */
+#ifndef bit_AVX512BITALG
+# define bit_AVX512BITALG 0x4000
+#endif
+
#ifndef bit_PCLMUL
# define bit_PCLMUL 0x2
#endif
#ifndef bit_SSSE3
+/* ecx */
# define bit_SSSE3 0x0000200
#endif
@@ -85,10 +101,26 @@ unsigned int _gnutls_x86_cpuid_s[4];
#define INTEL_SSSE3 (1<<2)
#define INTEL_PCLMUL (1<<3)
#define INTEL_AVX (1<<4)
+#define INTEL_SHA (1<<5)
#define VIA_PADLOCK (1<<20)
#define VIA_PADLOCK_PHE (1<<21)
#define VIA_PADLOCK_PHE_SHA512 (1<<22)
+static unsigned read_cpuid_vals(unsigned int vals[4])
+{
+ unsigned t1, t2, t3;
+ if (!__get_cpuid(1, &t1, &vals[0],
+ &vals[1], &t2))
+ return 0;
+ /* suppress AVX512; it works conditionally on certain CPUs on the original code */
+ vals[1] &= 0xfffff7ff;
+
+ if (!__get_cpuid_count(7, 0, &t1, &vals[2], &t2, &t3))
+ return 0;
+
+ return 1;
+}
+
/* Based on the example in "How to detect New Instruction support in
* the 4th generation Intel Core processor family.
* https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family
@@ -111,18 +143,17 @@ static unsigned check_4th_gen_intel_features(unsigned ecx)
static void capabilities_to_intel_cpuid(unsigned capabilities)
{
- unsigned a,b,c,t;
-
- memset(_gnutls_x86_cpuid_s, 0, sizeof(_gnutls_x86_cpuid_s));
+ unsigned a[4];
if (capabilities & EMPTY_SET) {
return;
}
- gnutls_cpuid(1, &t, &a, &b, &c);
+ if (!read_cpuid_vals(a))
+ return;
if (capabilities & INTEL_AES_NI) {
- if (b & bit_AES) {
+ if (a[1] & bit_AES) {
_gnutls_x86_cpuid_s[1] |= bit_AES;
} else {
_gnutls_debug_log
@@ -131,7 +162,7 @@ static void capabilities_to_intel_cpuid(unsigned capabilities)
}
if (capabilities & INTEL_SSSE3) {
- if (b & bit_SSSE3) {
+ if (a[1] & bit_SSSE3) {
_gnutls_x86_cpuid_s[1] |= bit_SSSE3;
} else {
_gnutls_debug_log
@@ -140,7 +171,7 @@ static void capabilities_to_intel_cpuid(unsigned capabilities)
}
if (capabilities & INTEL_AVX) {
- if ((b & bit_AVX) && check_4th_gen_intel_features(b)) {
+ if ((a[1] & bit_AVX) && check_4th_gen_intel_features(a[1])) {
_gnutls_x86_cpuid_s[1] |= bit_AVX|bit_MOVBE;
} else {
_gnutls_debug_log
@@ -149,7 +180,7 @@ static void capabilities_to_intel_cpuid(unsigned capabilities)
}
if (capabilities & INTEL_PCLMUL) {
- if (b & bit_PCLMUL) {
+ if (a[1] & bit_PCLMUL) {
_gnutls_x86_cpuid_s[1] |= bit_PCLMUL;
} else {
_gnutls_debug_log
@@ -157,6 +188,14 @@ static void capabilities_to_intel_cpuid(unsigned capabilities)
}
}
+ if (capabilities & INTEL_SHA) {
+ if (a[2] & bit_SHA) {
+ _gnutls_x86_cpuid_s[2] |= bit_SHA;
+ } else {
+ _gnutls_debug_log
+ ("SHA acceleration requested but not available\n");
+ }
+ }
}
@@ -170,6 +209,11 @@ static unsigned check_ssse3(void)
return (_gnutls_x86_cpuid_s[1] & bit_SSSE3);
}
+static unsigned check_sha(void)
+{
+ return (_gnutls_x86_cpuid_s[2] & bit_SHA);
+}
+
#ifdef ASM_X86_64
static unsigned check_avx_movbe(void)
{
@@ -196,8 +240,8 @@ static unsigned capabilities_to_via_edx(unsigned capabilities)
return 0;
}
- gnutls_cpuid(1, &t, &a, &b, &c);
-
+ if (!__get_cpuid(1, &t, &a, &b, &c))
+ return 0;
if (capabilities & VIA_PADLOCK) {
if (c & via_bit_PADLOCK) {
_gnutls_x86_cpuid_s[2] |= via_bit_PADLOCK;
@@ -265,7 +309,9 @@ static int check_phe_partial(void)
static unsigned check_via(void)
{
unsigned int a, b, c, d;
- gnutls_cpuid(0, &a, &b, &c, &d);
+
+ if (!__get_cpuid(0, &a, &b, &c, &d))
+ return 0;
if ((memcmp(&b, "Cent", 4) == 0 &&
memcmp(&d, "aurH", 4) == 0 && memcmp(&c, "auls", 4) == 0)) {
@@ -455,7 +501,9 @@ void register_x86_padlock_crypto(unsigned capabilities)
static unsigned check_intel_or_amd(void)
{
unsigned int a, b, c, d;
- gnutls_cpuid(0, &a, &b, &c, &d);
+
+ if (!__get_cpuid(0, &a, &b, &c, &d))
+ return 0;
if ((memcmp(&b, "Genu", 4) == 0 &&
memcmp(&d, "ineI", 4) == 0 &&
@@ -472,14 +520,15 @@ static
void register_x86_intel_crypto(unsigned capabilities)
{
int ret;
- unsigned t;
+
+ memset(_gnutls_x86_cpuid_s, 0, sizeof(_gnutls_x86_cpuid_s));
if (check_intel_or_amd() == 0)
return;
if (capabilities == 0) {
- gnutls_cpuid(1, &t, &_gnutls_x86_cpuid_s[0],
- &_gnutls_x86_cpuid_s[1], &_gnutls_x86_cpuid_s[2]);
+ if (!read_cpuid_vals(_gnutls_x86_cpuid_s))
+ return;
} else {
capabilities_to_intel_cpuid(capabilities);
}
@@ -523,6 +572,11 @@ void register_x86_intel_crypto(unsigned capabilities)
if (ret < 0) {
gnutls_assert();
}
+ }
+
+ if (check_sha() || check_ssse3()) {
+ if (check_sha())
+ _gnutls_debug_log("Intel SHA was detected\n");
ret =
gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA1,
@@ -570,7 +624,6 @@ void register_x86_intel_crypto(unsigned capabilities)
if (ret < 0)
gnutls_assert();
-#ifdef ENABLE_SHA512
ret =
gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA384,
80,
@@ -597,7 +650,6 @@ void register_x86_intel_crypto(unsigned capabilities)
&_gnutls_hmac_sha_x86_ssse3, 0);
if (ret < 0)
gnutls_assert();
-#endif
}
if (check_optimized_aes()) {