summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cpu.cpp33
-rw-r--r--x64dll.asm14
2 files changed, 46 insertions, 1 deletions
diff --git a/cpu.cpp b/cpu.cpp
index 285b17cd..67d4efba 100644
--- a/cpu.cpp
+++ b/cpu.cpp
@@ -1,3 +1,4 @@
+
// cpu.cpp - originally written and placed in the public domain by Wei Dai
#include "pch.h"
@@ -56,6 +57,11 @@ unsigned long int getauxval(unsigned long int) { return 0; }
# include <setjmp.h>
#endif
+// Visual Studio 2008 and below is missing _xgetbv. See x64dll.asm for the body.
+#if defined(_MSC_VER) && defined(_M_X64)
+extern "C" unsigned long long __fastcall ExtendedControlRegister(unsigned int);
+#endif
+
ANONYMOUS_NAMESPACE_BEGIN
#if defined(__APPLE__)
@@ -309,19 +315,44 @@ void DetectX86Features()
CRYPTOPP_CONSTANT(AVX_FLAG = (3 << 27))
if ((cpuid1[2] & AVX_FLAG) == AVX_FLAG)
{
+// GCC 4.1/Binutils 2.17 cannot consume xgetbv
#if defined(__GNUC__) || defined(__SUNPRO_CC) || defined(__BORLANDC__)
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71659 and
// http://www.agner.org/optimize/vectorclass/read.php?i=65
word32 a=0, d=0;
__asm __volatile
(
- // GCC 4.1/Binutils 2.17 cannot consume xgetbv
// "xgetbv" : "=a"(a), "=d"(d) : "c"(0) :
".byte 0x0f, 0x01, 0xd0" "\n\t"
: "=a"(a), "=d"(d) : "c"(0) :
);
word64 xcr0 = a | static_cast<word64>(d) << 32;
g_hasAVX = (xcr0 & YMM_FLAG) == YMM_FLAG;
+
+// Visual Studio 2008 and below lack xgetbv
+#elif defined(_MSC_VER) && defined(_M_IX86)
+ word32 a=0, d=0;
+ __asm {
+ push eax
+ push edx
+ push ecx
+ mov ecx, 0
+ _emit 0x0f
+ _emit 0x01
+ _emit 0xd0
+ mov a, eax
+ mov d, edx
+ pop ecx
+ pop edx
+ pop eax
+ }
+ word64 xcr0 = a | static_cast<word64>(d) << 32;
+ g_hasAVX = (xcr0 & YMM_FLAG) == YMM_FLAG;
+
+// Visual Studio 2008 and below lack xgetbv
+#elif defined(_MSC_VER) && defined(_M_X64)
+ word64 xcr0 = ExtendedControlRegister(0);
+ g_hasAVX = (xcr0 & YMM_FLAG) == YMM_FLAG;
#else
word64 xcr0 = _xgetbv(0);
g_hasAVX = (xcr0 & YMM_FLAG) == YMM_FLAG;
diff --git a/x64dll.asm b/x64dll.asm
index 6f90239c..7857f5f1 100644
--- a/x64dll.asm
+++ b/x64dll.asm
@@ -1964,5 +1964,19 @@ pop rsi
ret
SHA256_HashMultipleBlocks_SSE2 ENDP
+ ALIGN 8
+ExtendedControlRegister PROC
+;; First paramter is RCX, and xgetbv expects the CTR in ECX
+;; http://www.agner.org/optimize/vectorclass/read.php?i=65
+DB 0fh
+DB 01h
+DB 0d0h
+;; xcr = (EDX << 32) | EAX
+and rax, 0ffffffffh
+shl rdx, 32
+or rax, rdx
+ret
+ExtendedControlRegister ENDP
+
_TEXT ENDS
END