diff options
-rw-r--r-- | gsk/ngl/fp16.c | 96 | ||||
-rw-r--r-- | gsk/ngl/fp16i.c | 71 | ||||
-rw-r--r-- | gsk/ngl/fp16private.h | 24 | ||||
-rw-r--r-- | testsuite/gsk/half-float.c | 54 |
4 files changed, 225 insertions, 20 deletions
diff --git a/gsk/ngl/fp16.c b/gsk/ngl/fp16.c index a1ff1d1af9..2f71820ce4 100644 --- a/gsk/ngl/fp16.c +++ b/gsk/ngl/fp16.c @@ -37,7 +37,7 @@ as_float (const guint x) // IEEE-754 16-bit floating-point format (without infinity): 1-5-10 static inline float -half_to_float (const guint16 x) +half_to_float_one (const guint16 x) { const guint e = (x&0x7C00)>>10; // exponent const guint m = (x&0x03FF)<<13; // mantissa @@ -46,7 +46,7 @@ half_to_float (const guint16 x) } static inline guint16 -float_to_half (const float x) +float_to_half_one (const float x) { const guint b = as_uint(x)+0x00001000; // round-to-nearest-even const guint e = (b&0x7F800000)>>23; // exponent @@ -58,20 +58,38 @@ void float_to_half4_c (const float f[4], guint16 h[4]) { - h[0] = float_to_half (f[0]); - h[1] = float_to_half (f[1]); - h[2] = float_to_half (f[2]); - h[3] = float_to_half (f[3]); + h[0] = float_to_half_one (f[0]); + h[1] = float_to_half_one (f[1]); + h[2] = float_to_half_one (f[2]); + h[3] = float_to_half_one (f[3]); } void half_to_float4_c (const guint16 h[4], float f[4]) { - f[0] = half_to_float (h[0]); - f[1] = half_to_float (h[1]); - f[2] = half_to_float (h[2]); - f[3] = half_to_float (h[3]); + f[0] = half_to_float_one (h[0]); + f[1] = half_to_float_one (h[1]); + f[2] = half_to_float_one (h[2]); + f[3] = half_to_float_one (h[3]); +} + +void +float_to_half_c (const float *f, + guint16 *h, + int n) +{ + for (int i = 0; i < n; i++) + h[i] = float_to_half_one (f[i]); +} + +void +half_to_float_c (const guint16 *h, + float *f, + int n) +{ + for (int i = 0; i < n; i++) + f[i] = half_to_float_one (h[i]); } #ifdef HAVE_F16C @@ -122,10 +140,30 @@ half_to_float4 (const guint16 h[4], float f[4]) half_to_float4_c (h, f); } +void +float_to_half (const float *f, guint16 *h, int n) +{ + if (have_f16c_msvc ()) + float_to_half_f16c (f, h, n); + else + float_to_half4_c (f, h, n); +} + +void +half_to_float (const guint16 *h, float *f, int n) +{ + if (have_f16c_msvc ()) + half_to_float_f16c (h, f, n); + else + half_to_float_c (h, f, n); +} + #else void float_to_half4 (const float f[4], guint16 h[4]) __attribute__((ifunc ("resolve_float_to_half4"))); void half_to_float4 (const guint16 h[4], float f[4]) __attribute__((ifunc ("resolve_half_to_float4"))); +void float_to_half (const float *f, guint16 *h, int n) __attribute__((ifunc ("resolve_float_to_half"))); +void half_to_float (const guint16 *h, float *f, int n) __attribute__((ifunc ("resolve_half_to_float"))); static void * resolve_float_to_half4 (void) @@ -147,6 +185,26 @@ resolve_half_to_float4 (void) return half_to_float4_c; } +static void * +resolve_float_to_half (void) +{ + __builtin_cpu_init (); + if (__builtin_cpu_supports ("f16c")) + return float_to_half_f16c; + else + return float_to_half_c; +} + +static void * +resolve_half_to_float (void) +{ + __builtin_cpu_init (); + if (__builtin_cpu_supports ("f16c")) + return half_to_float_f16c; + else + return half_to_float_c; +} + #endif #else /* ! HAVE_F16C */ @@ -168,10 +226,28 @@ half_to_float4 (const guint16 h[4], half_to_float4_c (h, f); } +void +float_to_half (const float *f, + guint16 *h, + int n) +{ + float_to_half_c (f, h, n); +} + +void +half_to_float (const guint16 *h, + float *f, + int n) +{ + half_to_float_c (h, f, n); +} + #else void float_to_half4 (const float f[4], guint16 h[4]) __attribute__((alias ("float_to_half4_c"))); void half_to_float4 (const guint16 h[4], float f[4]) __attribute__((alias ("half_to_float4_c"))); +void float_to_half (const float *f, guint16 *h, int n) __attribute__((alias ("float_to_half_c"))); +void half_to_float (const guint16 *h, float *f, int n) __attribute__((alias ("half_to_float_c"))); #endif diff --git a/gsk/ngl/fp16i.c b/gsk/ngl/fp16i.c index 74c5827ff8..aff38411bd 100644 --- a/gsk/ngl/fp16i.c +++ b/gsk/ngl/fp16i.c @@ -30,7 +30,6 @@ #else #define CAST_M128I_P(a) (__m128i_u const *) a #endif - void float_to_half4_f16c (const float f[4], guint16 h[4]) @@ -50,4 +49,74 @@ half_to_float4_f16c (const guint16 h[4], _mm_store_ps (f, s); } +#define ALIGNED(p, n) (GPOINTER_TO_UINT(p) % n == 0) +void +float_to_half_f16c (const float *f, + guint16 *h, + int n) +{ + __m128 s; + __m128i i; + int j; + const float *ff = f; + guint16 *hh = h; + + for (j = 0; j < n; j++) + { + if (ALIGNED (ff, 16) && ALIGNED (hh, 16)) + break; + ff++; + hh++; + } + + float_to_half_c (f, h, j); + + for (; j + 4 < n; j += 4) + { + s = _mm_loadu_ps (ff); + i = _mm_cvtps_ph (s, 0); + _mm_storel_epi64 ((__m128i*)hh, i); + ff += 4; + hh += 4; + } + + if (j < n) + float_to_half_c (ff, hh, n - j); +} + +void +half_to_float_f16c (const guint16 *h, + float *f, + int n) +{ + __m128i i; + __m128 s; + int j; + const guint16 *hh = h; + float *ff = f; + + for (j = 0; j < n; j++) + { + if (ALIGNED (ff, 16) && ALIGNED (hh, 16)) + break; + ff++; + hh++; + } + + half_to_float_c (h, f, j); + + for (; j + 4 < n; j += 4) + { + i = _mm_loadl_epi64 (CAST_M128I_P (hh)); + s = _mm_cvtph_ps (i); + _mm_store_ps (ff, s); + hh += 4; + ff += 4; + } + + if (j < n) + half_to_float_c (hh, ff, n - j); +} + #endif /* HAVE_F16C */ + diff --git a/gsk/ngl/fp16private.h b/gsk/ngl/fp16private.h index 574d7e4388..fbb95cdc1b 100644 --- a/gsk/ngl/fp16private.h +++ b/gsk/ngl/fp16private.h @@ -35,18 +35,42 @@ void float_to_half4 (const float f[4], void half_to_float4 (const guint16 h[4], float f[4]); +void float_to_half (const float *f, + guint16 *h, + int n); + +void half_to_float (const guint16 *h, + float *f, + int n); + void float_to_half4_f16c (const float f[4], guint16 h[4]); void half_to_float4_f16c (const guint16 h[4], float f[4]); +void float_to_half_f16c (const float *f, + guint16 *h, + int n); + +void half_to_float_f16c (const guint16 *h, + float *f, + int n); + void float_to_half4_c (const float f[4], guint16 h[4]); void half_to_float4_c (const guint16 h[4], float f[4]); +void float_to_half_c (const float *f, + guint16 *h, + int n); + +void half_to_float_c (const guint16 *h, + float *f, + int n); + G_END_DECLS #endif diff --git a/testsuite/gsk/half-float.c b/testsuite/gsk/half-float.c index 35dce9c1c8..ba56992f21 100644 --- a/testsuite/gsk/half-float.c +++ b/testsuite/gsk/half-float.c @@ -32,6 +32,23 @@ test_constants (void) } } +static float +random_representable_float (void) +{ + guint16 h[4]; + float f[4]; + do + { + /* generate a random float thats representable as fp16 */ + memset (h, 0, sizeof (h)); + h[0] = g_random_int_range (G_MININT16, G_MAXINT16); + half_to_float4 (h, f); + } + while (!isnormal (f[0])); /* skip nans and infs since they don't compare well */ + + return f[0]; +} + static void test_roundtrip (void) { @@ -41,15 +58,7 @@ test_roundtrip (void) float f2[4]; guint16 h[4]; - do - { - /* generate a random float thats representable as fp16 */ - memset (h, 0, sizeof (h)); - h[0] = g_random_int_range (G_MININT16, G_MAXINT16); - half_to_float4 (h, f2); - } - while (!isnormal (f2[0])); /* skip nans and infs since they don't compare well */ - + f2[0] = random_representable_float (); memset (f, 0, sizeof (f)); f[0] = f2[0]; @@ -60,6 +69,32 @@ test_roundtrip (void) } } +/* Test that the array version work as expected, + * in particular with unaligned boundaries. + */ +static void +test_many (void) +{ + for (int i = 0; i < 100; i++) + { + int size = g_random_int_range (100, 200); + int offset = g_random_int_range (0, 20); + + guint16 *h = g_new0 (guint16, size); + float *f = g_new0 (float, size); + float *f2 = g_new0 (float, size); + + for (int j = offset; j < size; j++) + f[j] = random_representable_float (); + + float_to_half (f + offset, h + offset, size - offset); + half_to_float (h + offset, f2 + offset, size - offset); + + for (int j = offset; j < size; j++) + g_assert_cmpfloat (f[j], ==, f2[j]); + } +} + int main (int argc, char *argv[]) { @@ -67,6 +102,7 @@ main (int argc, char *argv[]) g_test_add_func ("/half-float/constants", test_constants); g_test_add_func ("/half-float/roundtrip", test_roundtrip); + g_test_add_func ("/half-float/many", test_many); return g_test_run (); } |