From 6d6c8c59ebbcab0a2ec3e87912f4daf466da1e54 Mon Sep 17 00:00:00 2001
From: Chun-wei Fan <fanchunwei@src.gnome.org>
Date: Mon, 12 Apr 2021 16:24:37 +0800
Subject: meson.build: Improve F16C detection on Visual Studio

The __builtin_cpu...() intrisics are strictly for GCC/CLang-based compilers, so
don't use them in the checking code on Visual Studio.  The test code will still
compile without this change, but will certainly fail if we want it to link.
---
 meson.build | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/meson.build b/meson.build
index 641208158c..902bd72031 100644
--- a/meson.build
+++ b/meson.build
@@ -707,8 +707,10 @@ int main () {
   __m128i i = _mm_cvtps_ph (s, 0);
   _mm_storel_epi64 ((__m128i*)h, i);
 
+#if defined (__GNUC__) || defined (__clang__)
   __builtin_cpu_init ();
   __builtin_cpu_supports ("f16c");
+#endif
 
     return 0;
 }'''
-- 
cgit v1.2.1


From d5ced21264ecf75df01ca11e86d240283cb82276 Mon Sep 17 00:00:00 2001
From: Chun-wei Fan <fanchunwei@src.gnome.org>
Date: Mon, 12 Apr 2021 16:48:15 +0800
Subject: gsk/ngl/fp16.c: Implement runtime F16C detection on MSVC

We need to use __cpuid() to check for the presence of F16C instructions on
Visual Studio builds, and call the half_to_float4() or float_to_half4()
implementation accordingly, as the __builtin_cpu...() functions are strictly
for GCC or CLang only.

Also, since __m128i_u is not a standard intrisics type across the board, just
use __m128i on Visual Studio as it is safe to do so there for use for
_mm_loadl_epi64().

Like running on Darwin, we cannot use the alias __attribute__ as __attribute__
is also for GCC and CLang only.
---
 gsk/ngl/fp16.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 60 insertions(+), 3 deletions(-)

diff --git a/gsk/ngl/fp16.c b/gsk/ngl/fp16.c
index e9a8c8fbce..1e11faafd8 100644
--- a/gsk/ngl/fp16.c
+++ b/gsk/ngl/fp16.c
@@ -80,6 +80,12 @@ half_to_float4_c (const guint16 h[4],
 
 #ifdef HAVE_F16C
 
+#if defined(_MSC_VER) && !defined(__clang__)
+#define CAST_M128I_P(a) (__m128i const *) a
+#else
+#define CAST_M128I_P(a) (__m128i_u const *) a
+#endif
+
 static void
 float_to_half4_f16c (const float f[4],
                      guint16     h[4])
@@ -93,11 +99,61 @@ static void
 half_to_float4_f16c (const guint16 h[4],
                      float         f[4])
 {
-  __m128i i = _mm_loadl_epi64 ((__m128i_u const *)h);
+  __m128i i = _mm_loadl_epi64 (CAST_M128I_P (h));
   __m128 s = _mm_cvtph_ps (i);
+
   _mm_store_ps (f, s);
 }
 
+#undef CAST_M128I_P
+
+#if defined(_MSC_VER) && !defined(__clang__)
+/* based on info from https://walbourn.github.io/directxmath-f16c-and-fma/ */
+static gboolean
+have_f16c_msvc (void)
+{
+  static gboolean result = FALSE;
+  static gsize inited = 0;
+
+  if (g_once_init_enter (&inited))
+    {
+      int cpuinfo[4] = { -1 };
+
+      __cpuid (cpuinfo, 0);
+
+      if (cpuinfo[0] > 0)
+        {
+          __cpuid (cpuinfo, 1);
+
+          if ((cpuinfo[2] & 0x8000000) != 0)
+            result = (cpuinfo[2] & 0x20000000) != 0;
+        }
+
+      g_once_init_leave (&inited, 1);
+    }
+
+  return result;
+}
+
+void
+float_to_half4 (const float f[4], guint16 h[4])
+{
+  if (have_f16c_msvc ())
+    float_to_half4_f16c (f, h);
+  else
+    float_to_half4_c (f, h);
+}
+
+void
+half_to_float4 (const guint16 h[4], float f[4])
+{
+  if (have_f16c_msvc ())
+    half_to_float4_f16c (h, f);
+  else
+    half_to_float4_c (h, f);
+}
+
+#else
 void float_to_half4 (const float f[4], guint16 h[4]) __attribute__((ifunc ("resolve_float_to_half4")));
 void half_to_float4 (const guint16 h[4], float f[4]) __attribute__((ifunc ("resolve_half_to_float4")));
 
@@ -120,11 +176,12 @@ resolve_half_to_float4 (void)
   else
     return half_to_float4_c;
 }
+#endif
 
 #else
 
-#ifdef __APPLE__
-// turns out aliases don't work on Darwin
+#if defined(__APPLE__) || (defined(_MSC_VER) && !defined(__clang__))
+// turns out aliases don't work on Darwin nor Visual Studio
 
 void
 float_to_half4 (const float f[4],
-- 
cgit v1.2.1


From 65b35457fdfde69fcc8aaed516a6406855c0dbbc Mon Sep 17 00:00:00 2001
From: Chun-wei Fan <fanchunwei@src.gnome.org>
Date: Mon, 12 Apr 2021 17:58:46 +0800
Subject: meson.build: Check for F16C on 32-bit Visual Studio

It is also possible to enable F16C instructions on 32-bit Visual Studio builds,
so also check for the compiler's ability to build F16C code on Visual Studio
32-bit instead of just bailing out.
---
 meson.build | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/meson.build b/meson.build
index 902bd72031..26cda67aba 100644
--- a/meson.build
+++ b/meson.build
@@ -692,10 +692,8 @@ if get_option('f16c').enabled()
 # if !defined(__amd64__) && !defined(__x86_64__)
 #   error "F16C intrinsics are only available on x86_64"
 # endif
-#elif defined (_MSC_VER) && !defined (_M_X64) && !defined (_M_AMD64)
-# error "F16C intrinsics not supported on x86 MSVC builds"
 #endif
-#if defined(__SSE__) || (_M_X64 > 0)
+#if defined(__SSE__) || defined(_MSC_VER)
 # include <immintrin.h>
 #else
 # error "No F16C intrinsics available"
-- 
cgit v1.2.1