From 6d6c8c59ebbcab0a2ec3e87912f4daf466da1e54 Mon Sep 17 00:00:00 2001 From: Chun-wei Fan Date: Mon, 12 Apr 2021 16:24:37 +0800 Subject: meson.build: Improve F16C detection on Visual Studio The __builtin_cpu...() intrisics are strictly for GCC/CLang-based compilers, so don't use them in the checking code on Visual Studio. The test code will still compile without this change, but will certainly fail if we want it to link. --- meson.build | 2 ++ 1 file changed, 2 insertions(+) diff --git a/meson.build b/meson.build index 641208158c..902bd72031 100644 --- a/meson.build +++ b/meson.build @@ -707,8 +707,10 @@ int main () { __m128i i = _mm_cvtps_ph (s, 0); _mm_storel_epi64 ((__m128i*)h, i); +#if defined (__GNUC__) || defined (__clang__) __builtin_cpu_init (); __builtin_cpu_supports ("f16c"); +#endif return 0; }''' -- cgit v1.2.1 From d5ced21264ecf75df01ca11e86d240283cb82276 Mon Sep 17 00:00:00 2001 From: Chun-wei Fan Date: Mon, 12 Apr 2021 16:48:15 +0800 Subject: gsk/ngl/fp16.c: Implement runtime F16C detection on MSVC We need to use __cpuid() to check for the presence of F16C instructions on Visual Studio builds, and call the half_to_float4() or float_to_half4() implementation accordingly, as the __builtin_cpu...() functions are strictly for GCC or CLang only. Also, since __m128i_u is not a standard intrisics type across the board, just use __m128i on Visual Studio as it is safe to do so there for use for _mm_loadl_epi64(). Like running on Darwin, we cannot use the alias __attribute__ as __attribute__ is also for GCC and CLang only. --- gsk/ngl/fp16.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 60 insertions(+), 3 deletions(-) diff --git a/gsk/ngl/fp16.c b/gsk/ngl/fp16.c index e9a8c8fbce..1e11faafd8 100644 --- a/gsk/ngl/fp16.c +++ b/gsk/ngl/fp16.c @@ -80,6 +80,12 @@ half_to_float4_c (const guint16 h[4], #ifdef HAVE_F16C +#if defined(_MSC_VER) && !defined(__clang__) +#define CAST_M128I_P(a) (__m128i const *) a +#else +#define CAST_M128I_P(a) (__m128i_u const *) a +#endif + static void float_to_half4_f16c (const float f[4], guint16 h[4]) @@ -93,11 +99,61 @@ static void half_to_float4_f16c (const guint16 h[4], float f[4]) { - __m128i i = _mm_loadl_epi64 ((__m128i_u const *)h); + __m128i i = _mm_loadl_epi64 (CAST_M128I_P (h)); __m128 s = _mm_cvtph_ps (i); + _mm_store_ps (f, s); } +#undef CAST_M128I_P + +#if defined(_MSC_VER) && !defined(__clang__) +/* based on info from https://walbourn.github.io/directxmath-f16c-and-fma/ */ +static gboolean +have_f16c_msvc (void) +{ + static gboolean result = FALSE; + static gsize inited = 0; + + if (g_once_init_enter (&inited)) + { + int cpuinfo[4] = { -1 }; + + __cpuid (cpuinfo, 0); + + if (cpuinfo[0] > 0) + { + __cpuid (cpuinfo, 1); + + if ((cpuinfo[2] & 0x8000000) != 0) + result = (cpuinfo[2] & 0x20000000) != 0; + } + + g_once_init_leave (&inited, 1); + } + + return result; +} + +void +float_to_half4 (const float f[4], guint16 h[4]) +{ + if (have_f16c_msvc ()) + float_to_half4_f16c (f, h); + else + float_to_half4_c (f, h); +} + +void +half_to_float4 (const guint16 h[4], float f[4]) +{ + if (have_f16c_msvc ()) + half_to_float4_f16c (h, f); + else + half_to_float4_c (h, f); +} + +#else void float_to_half4 (const float f[4], guint16 h[4]) __attribute__((ifunc ("resolve_float_to_half4"))); void half_to_float4 (const guint16 h[4], float f[4]) __attribute__((ifunc ("resolve_half_to_float4"))); @@ -120,11 +176,12 @@ resolve_half_to_float4 (void) else return half_to_float4_c; } +#endif #else -#ifdef __APPLE__ -// turns out aliases don't work on Darwin +#if defined(__APPLE__) || (defined(_MSC_VER) && !defined(__clang__)) +// turns out aliases don't work on Darwin nor Visual Studio void float_to_half4 (const float f[4], -- cgit v1.2.1 From 65b35457fdfde69fcc8aaed516a6406855c0dbbc Mon Sep 17 00:00:00 2001 From: Chun-wei Fan Date: Mon, 12 Apr 2021 17:58:46 +0800 Subject: meson.build: Check for F16C on 32-bit Visual Studio It is also possible to enable F16C instructions on 32-bit Visual Studio builds, so also check for the compiler's ability to build F16C code on Visual Studio 32-bit instead of just bailing out. --- meson.build | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/meson.build b/meson.build index 902bd72031..26cda67aba 100644 --- a/meson.build +++ b/meson.build @@ -692,10 +692,8 @@ if get_option('f16c').enabled() # if !defined(__amd64__) && !defined(__x86_64__) # error "F16C intrinsics are only available on x86_64" # endif -#elif defined (_MSC_VER) && !defined (_M_X64) && !defined (_M_AMD64) -# error "F16C intrinsics not supported on x86 MSVC builds" #endif -#if defined(__SSE__) || (_M_X64 > 0) +#if defined(__SSE__) || defined(_MSC_VER) # include #else # error "No F16C intrinsics available" -- cgit v1.2.1