summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cpu.cpp49
-rw-r--r--cpu.h30
-rw-r--r--validat1.cpp15
3 files changed, 86 insertions, 8 deletions
diff --git a/cpu.cpp b/cpu.cpp
index ef0ddf69..13e15b36 100644
--- a/cpu.cpp
+++ b/cpu.cpp
@@ -21,6 +21,14 @@
# include <unistd.h>
#endif
+//#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X86
+//# if defined(_MSC_VER)
+//# include <intrin.h>
+//# else
+//# include <immintrin.h>
+//# endif
+//#endif
+
// Capability queries, requires Glibc 2.16, http://lwn.net/Articles/519085/
// CRYPTOPP_GLIBC_VERSION not used because config.h is missing <feature.h>
#if (((__GLIBC__ * 100) + __GLIBC_MINOR__) >= 216)
@@ -187,6 +195,8 @@ bool CRYPTOPP_SECTION_INIT g_hasSSE2 = false;
bool CRYPTOPP_SECTION_INIT g_hasSSSE3 = false;
bool CRYPTOPP_SECTION_INIT g_hasSSE41 = false;
bool CRYPTOPP_SECTION_INIT g_hasSSE42 = false;
+bool CRYPTOPP_SECTION_INIT g_hasAVX = false;
+bool CRYPTOPP_SECTION_INIT g_hasAVX2 = false;
bool CRYPTOPP_SECTION_INIT g_hasAESNI = false;
bool CRYPTOPP_SECTION_INIT g_hasCLMUL = false;
bool CRYPTOPP_SECTION_INIT g_hasADX = false;
@@ -245,12 +255,38 @@ void DetectX86Features()
g_hasAESNI = g_hasSSE2 && ((cpuid1[2] & (1<<25)) != 0);
g_hasCLMUL = g_hasSSE2 && ((cpuid1[2] & (1<< 1)) != 0);
+ // AVX is similar to SSE, but check both bits 27 (SSE) and 28 (AVX).
+ // https://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled
+ CRYPTOPP_CONSTANT(YMM_FLAG = (3 << 1))
+ CRYPTOPP_CONSTANT(AVX_FLAG = (3 << 27))
+ if ((cpuid1[2] & AVX_FLAG) == AVX_FLAG)
+ {
+#if defined(__GNUC__)
+ // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71659 and
+ // http://www.agner.org/optimize/vectorclass/read.php?i=65
+ word32 a=0, d=0;
+ __asm __volatile
+ (
+ // GCC 4.1/Binutils 2.17 cannot consume xgetbv
+ // "xgetbv" : "=a"(a), "=d"(d) : "c"(0) :
+ ".byte 0x0f, 0x01, 0xd0" "\n\t"
+ : "=a"(a), "=d"(d) : "c"(0) :
+ );
+ word64 xcr0 = a | static_cast<word64>(d) << 32;
+ g_hasAVX = (xcr0 & YMM_FLAG) == YMM_FLAG;
+#else
+ word64 xcr0 = _xgetbv(0);
+ g_hasAVX = (xcr0 & YMM_FLAG) == YMM_FLAG;
+#endif
+ }
+
if (IsIntel(cpuid0))
{
CRYPTOPP_CONSTANT(RDRAND_FLAG = (1 << 30))
CRYPTOPP_CONSTANT(RDSEED_FLAG = (1 << 18))
CRYPTOPP_CONSTANT( ADX_FLAG = (1 << 19))
CRYPTOPP_CONSTANT( SHA_FLAG = (1 << 29))
+ CRYPTOPP_CONSTANT( AVX2_FLAG = (1 << 5))
g_isP4 = ((cpuid1[0] >> 8) & 0xf) == 0xf;
g_cacheLineSize = 8 * GETBYTE(cpuid1[1], 1);
@@ -263,6 +299,7 @@ void DetectX86Features()
g_hasRDSEED = (cpuid2[1] /*EBX*/ & RDSEED_FLAG) != 0;
g_hasADX = (cpuid2[1] /*EBX*/ & ADX_FLAG) != 0;
g_hasSHA = (cpuid2[1] /*EBX*/ & SHA_FLAG) != 0;
+ g_hasAVX2 = (cpuid2[1] /*EBX*/ & AVX2_FLAG) != 0;
}
}
}
@@ -272,6 +309,7 @@ void DetectX86Features()
CRYPTOPP_CONSTANT(RDSEED_FLAG = (1 << 18))
CRYPTOPP_CONSTANT( ADX_FLAG = (1 << 19))
CRYPTOPP_CONSTANT( SHA_FLAG = (1 << 29))
+ CRYPTOPP_CONSTANT( AVX2_FLAG = (1 << 5))
CpuId(0x80000005, 0, cpuid2);
g_cacheLineSize = GETBYTE(cpuid2[2], 0);
@@ -284,6 +322,7 @@ void DetectX86Features()
g_hasRDSEED = (cpuid2[1] /*EBX*/ & RDSEED_FLAG) != 0;
g_hasADX = (cpuid2[1] /*EBX*/ & ADX_FLAG) != 0;
g_hasSHA = (cpuid2[1] /*EBX*/ & SHA_FLAG) != 0;
+ g_hasAVX2 = (cpuid2[1] /*EBX*/ & AVX2_FLAG) != 0;
}
}
}
@@ -300,11 +339,11 @@ void DetectX86Features()
{
// Extended features available
CpuId(0xC0000001, 0, cpuid2);
- g_hasPadlockRNG = (cpuid2[3] /*EDX*/ & RNG_FLAGS) != 0;
- g_hasPadlockACE = (cpuid2[3] /*EDX*/ & ACE_FLAGS) != 0;
- g_hasPadlockACE2 = (cpuid2[3] /*EDX*/ & ACE2_FLAGS) != 0;
- g_hasPadlockPHE = (cpuid2[3] /*EDX*/ & PHE_FLAGS) != 0;
- g_hasPadlockPMM = (cpuid2[3] /*EDX*/ & PMM_FLAGS) != 0;
+ g_hasPadlockRNG = (cpuid2[3] /*EDX*/ & RNG_FLAGS) == RNG_FLAGS;
+ g_hasPadlockACE = (cpuid2[3] /*EDX*/ & ACE_FLAGS) == ACE_FLAGS;
+ g_hasPadlockACE2 = (cpuid2[3] /*EDX*/ & ACE2_FLAGS) == ACE2_FLAGS;
+ g_hasPadlockPHE = (cpuid2[3] /*EDX*/ & PHE_FLAGS) == PHE_FLAGS;
+ g_hasPadlockPMM = (cpuid2[3] /*EDX*/ & PMM_FLAGS) == PMM_FLAGS;
}
}
diff --git a/cpu.h b/cpu.h
index 68ac804c..0d555b4f 100644
--- a/cpu.h
+++ b/cpu.h
@@ -85,6 +85,8 @@ extern CRYPTOPP_DLL bool g_hasSSE2;
extern CRYPTOPP_DLL bool g_hasSSSE3;
extern CRYPTOPP_DLL bool g_hasSSE41;
extern CRYPTOPP_DLL bool g_hasSSE42;
+extern CRYPTOPP_DLL bool g_hasAVX;
+extern CRYPTOPP_DLL bool g_hasAVX2;
extern CRYPTOPP_DLL bool g_hasAESNI;
extern CRYPTOPP_DLL bool g_hasCLMUL;
extern CRYPTOPP_DLL bool g_hasSHA;
@@ -158,6 +160,7 @@ inline bool HasSSE42()
/// \brief Determines AES-NI availability
/// \returns true if AES-NI is determined to be available, false otherwise
/// \details HasAESNI() is a runtime check performed using CPUID
+/// \since Crypto++ 5.6.1
/// \note This function is only available on Intel IA-32 platforms
inline bool HasAESNI()
{
@@ -169,6 +172,7 @@ inline bool HasAESNI()
/// \brief Determines Carryless Multiply availability
/// \returns true if pclmulqdq is determined to be available, false otherwise
/// \details HasCLMUL() is a runtime check performed using CPUID
+/// \since Crypto++ 5.6.1
/// \note This function is only available on Intel IA-32 platforms
inline bool HasCLMUL()
{
@@ -180,6 +184,7 @@ inline bool HasCLMUL()
/// \brief Determines SHA availability
/// \returns true if SHA is determined to be available, false otherwise
/// \details HasSHA() is a runtime check performed using CPUID
+/// \since Crypto++ 6.0
/// \note This function is only available on Intel IA-32 platforms
inline bool HasSHA()
{
@@ -191,6 +196,7 @@ inline bool HasSHA()
/// \brief Determines ADX availability
/// \returns true if ADX is determined to be available, false otherwise
/// \details HasADX() is a runtime check performed using CPUID
+/// \since Crypto++ 7.0
/// \note This function is only available on Intel IA-32 platforms
inline bool HasADX()
{
@@ -199,6 +205,30 @@ inline bool HasADX()
return g_hasADX;
}
+/// \brief Determines AVX availability
+/// \returns true if AVX is determined to be available, false otherwise
+/// \details HasAVX() is a runtime check performed using CPUID
+/// \since Crypto++ 7.1
+/// \note This function is only available on Intel IA-32 platforms
+inline bool HasAVX()
+{
+ if (!g_x86DetectionDone)
+ DetectX86Features();
+ return g_hasAVX;
+}
+
+/// \brief Determines AVX2 availability
+/// \returns true if AVX2 is determined to be available, false otherwise
+/// \details HasAVX2() is a runtime check performed using CPUID
+/// \since Crypto++ 7.1
+/// \note This function is only available on Intel IA-32 platforms
+inline bool HasAVX2()
+{
+ if (!g_x86DetectionDone)
+ DetectX86Features();
+ return g_hasAVX2;
+}
+
/// \brief Determines if the CPU is an Intel P4
/// \returns true if the CPU is a P4, false otherwise
/// \details IsP4() is a runtime check performed using CPUID
diff --git a/validat1.cpp b/validat1.cpp
index 02e5ef95..c8d58255 100644
--- a/validat1.cpp
+++ b/validat1.cpp
@@ -369,11 +369,20 @@ bool TestSettings()
bool hasSSSE3 = HasSSSE3();
bool hasSSE41 = HasSSE41();
bool hasSSE42 = HasSSE42();
+ bool hasAVX = HasAVX();
+ bool hasAVX2 = HasAVX2();
+ bool hasAESNI = HasAESNI();
+ bool hasCLMUL = HasCLMUL();
+ bool hasRDRAND = HasRDRAND();
+ bool hasRDSEED = HasRDSEED();
+ bool hasSHA = HasSHA();
bool isP4 = IsP4();
- std::cout << "hasSSE2 == " << hasSSE2 << ", hasSSSE3 == " << hasSSSE3 << ", hasSSE4.1 == " << hasSSE41 << ", hasSSE4.2 == " << hasSSE42;
- std::cout << ", hasAESNI == " << HasAESNI() << ", hasCLMUL == " << HasCLMUL() << ", hasRDRAND == " << HasRDRAND() << ", hasRDSEED == " << HasRDSEED();
- std::cout << ", hasSHA == " << HasSHA() << ", isP4 == " << isP4 << "\n";
+ std::cout << "hasSSE2 == " << hasSSE2 << ", hasSSSE3 == " << hasSSSE3 << ", hasSSE4.1 == " << hasSSE41;
+ std::cout << ", hasSSE4.2 == " << hasSSE42 << ", hasAVX == " << hasAVX << ", hasAVX2 == " << hasAVX2;
+ std::cout << ", hasAESNI == " << hasAESNI << ", hasCLMUL == " << hasCLMUL << ", hasRDRAND == " << HasRDRAND;
+ std::cout << ", hasRDSEED == " << HasRDSEED << ", hasSHA == " << HasSHA() << ", isP4 == " << isP4;
+ std::cout << "\n";
#elif (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64)
bool hasNEON = HasNEON();