diff options
author | Matthias Clasen <mclasen@redhat.com> | 2021-09-10 07:46:24 -0400 |
---|---|---|
committer | Matthias Clasen <mclasen@redhat.com> | 2021-09-10 22:17:31 -0400 |
commit | 155a4fac5cc668e3142ed62803bd38c80c1f38c2 (patch) | |
tree | f61f28f478a9f959a6f5d20a83ac9bfd6e2f8152 /gsk | |
parent | 76f481eb7b5c1bca7f47b2ec244539a8147af5ee (diff) | |
download | gtk+-155a4fac5cc668e3142ed62803bd38c80c1f38c2.tar.gz |
Add vectorized half-float conversion
We can't make the -4 versions inline, since
we use ifuncs for them, so make vectorized
versions.
Test included.
Diffstat (limited to 'gsk')
-rw-r--r-- | gsk/ngl/fp16.c | 96 | ||||
-rw-r--r-- | gsk/ngl/fp16i.c | 71 | ||||
-rw-r--r-- | gsk/ngl/fp16private.h | 24 |
3 files changed, 180 insertions, 11 deletions
diff --git a/gsk/ngl/fp16.c b/gsk/ngl/fp16.c index a1ff1d1af9..2f71820ce4 100644 --- a/gsk/ngl/fp16.c +++ b/gsk/ngl/fp16.c @@ -37,7 +37,7 @@ as_float (const guint x) // IEEE-754 16-bit floating-point format (without infinity): 1-5-10 static inline float -half_to_float (const guint16 x) +half_to_float_one (const guint16 x) { const guint e = (x&0x7C00)>>10; // exponent const guint m = (x&0x03FF)<<13; // mantissa @@ -46,7 +46,7 @@ half_to_float (const guint16 x) } static inline guint16 -float_to_half (const float x) +float_to_half_one (const float x) { const guint b = as_uint(x)+0x00001000; // round-to-nearest-even const guint e = (b&0x7F800000)>>23; // exponent @@ -58,20 +58,38 @@ void float_to_half4_c (const float f[4], guint16 h[4]) { - h[0] = float_to_half (f[0]); - h[1] = float_to_half (f[1]); - h[2] = float_to_half (f[2]); - h[3] = float_to_half (f[3]); + h[0] = float_to_half_one (f[0]); + h[1] = float_to_half_one (f[1]); + h[2] = float_to_half_one (f[2]); + h[3] = float_to_half_one (f[3]); } void half_to_float4_c (const guint16 h[4], float f[4]) { - f[0] = half_to_float (h[0]); - f[1] = half_to_float (h[1]); - f[2] = half_to_float (h[2]); - f[3] = half_to_float (h[3]); + f[0] = half_to_float_one (h[0]); + f[1] = half_to_float_one (h[1]); + f[2] = half_to_float_one (h[2]); + f[3] = half_to_float_one (h[3]); +} + +void +float_to_half_c (const float *f, + guint16 *h, + int n) +{ + for (int i = 0; i < n; i++) + h[i] = float_to_half_one (f[i]); +} + +void +half_to_float_c (const guint16 *h, + float *f, + int n) +{ + for (int i = 0; i < n; i++) + f[i] = half_to_float_one (h[i]); } #ifdef HAVE_F16C @@ -122,10 +140,30 @@ half_to_float4 (const guint16 h[4], float f[4]) half_to_float4_c (h, f); } +void +float_to_half (const float *f, guint16 *h, int n) +{ + if (have_f16c_msvc ()) + float_to_half_f16c (f, h, n); + else + float_to_half4_c (f, h, n); +} + +void +half_to_float (const guint16 *h, float *f, int n) +{ + if (have_f16c_msvc ()) + half_to_float_f16c (h, f, n); + else + half_to_float_c (h, f, n); +} + #else void float_to_half4 (const float f[4], guint16 h[4]) __attribute__((ifunc ("resolve_float_to_half4"))); void half_to_float4 (const guint16 h[4], float f[4]) __attribute__((ifunc ("resolve_half_to_float4"))); +void float_to_half (const float *f, guint16 *h, int n) __attribute__((ifunc ("resolve_float_to_half"))); +void half_to_float (const guint16 *h, float *f, int n) __attribute__((ifunc ("resolve_half_to_float"))); static void * resolve_float_to_half4 (void) @@ -147,6 +185,26 @@ resolve_half_to_float4 (void) return half_to_float4_c; } +static void * +resolve_float_to_half (void) +{ + __builtin_cpu_init (); + if (__builtin_cpu_supports ("f16c")) + return float_to_half_f16c; + else + return float_to_half_c; +} + +static void * +resolve_half_to_float (void) +{ + __builtin_cpu_init (); + if (__builtin_cpu_supports ("f16c")) + return half_to_float_f16c; + else + return half_to_float_c; +} + #endif #else /* ! HAVE_F16C */ @@ -168,10 +226,28 @@ half_to_float4 (const guint16 h[4], half_to_float4_c (h, f); } +void +float_to_half (const float *f, + guint16 *h, + int n) +{ + float_to_half_c (f, h, n); +} + +void +half_to_float (const guint16 *h, + float *f, + int n) +{ + half_to_float_c (h, f, n); +} + #else void float_to_half4 (const float f[4], guint16 h[4]) __attribute__((alias ("float_to_half4_c"))); void half_to_float4 (const guint16 h[4], float f[4]) __attribute__((alias ("half_to_float4_c"))); +void float_to_half (const float *f, guint16 *h, int n) __attribute__((alias ("float_to_half_c"))); +void half_to_float (const guint16 *h, float *f, int n) __attribute__((alias ("half_to_float_c"))); #endif diff --git a/gsk/ngl/fp16i.c b/gsk/ngl/fp16i.c index 74c5827ff8..aff38411bd 100644 --- a/gsk/ngl/fp16i.c +++ b/gsk/ngl/fp16i.c @@ -30,7 +30,6 @@ #else #define CAST_M128I_P(a) (__m128i_u const *) a #endif - void float_to_half4_f16c (const float f[4], guint16 h[4]) @@ -50,4 +49,74 @@ half_to_float4_f16c (const guint16 h[4], _mm_store_ps (f, s); } +#define ALIGNED(p, n) (GPOINTER_TO_UINT(p) % n == 0) +void +float_to_half_f16c (const float *f, + guint16 *h, + int n) +{ + __m128 s; + __m128i i; + int j; + const float *ff = f; + guint16 *hh = h; + + for (j = 0; j < n; j++) + { + if (ALIGNED (ff, 16) && ALIGNED (hh, 16)) + break; + ff++; + hh++; + } + + float_to_half_c (f, h, j); + + for (; j + 4 < n; j += 4) + { + s = _mm_loadu_ps (ff); + i = _mm_cvtps_ph (s, 0); + _mm_storel_epi64 ((__m128i*)hh, i); + ff += 4; + hh += 4; + } + + if (j < n) + float_to_half_c (ff, hh, n - j); +} + +void +half_to_float_f16c (const guint16 *h, + float *f, + int n) +{ + __m128i i; + __m128 s; + int j; + const guint16 *hh = h; + float *ff = f; + + for (j = 0; j < n; j++) + { + if (ALIGNED (ff, 16) && ALIGNED (hh, 16)) + break; + ff++; + hh++; + } + + half_to_float_c (h, f, j); + + for (; j + 4 < n; j += 4) + { + i = _mm_loadl_epi64 (CAST_M128I_P (hh)); + s = _mm_cvtph_ps (i); + _mm_store_ps (ff, s); + hh += 4; + ff += 4; + } + + if (j < n) + half_to_float_c (hh, ff, n - j); +} + #endif /* HAVE_F16C */ + diff --git a/gsk/ngl/fp16private.h b/gsk/ngl/fp16private.h index 574d7e4388..fbb95cdc1b 100644 --- a/gsk/ngl/fp16private.h +++ b/gsk/ngl/fp16private.h @@ -35,18 +35,42 @@ void float_to_half4 (const float f[4], void half_to_float4 (const guint16 h[4], float f[4]); +void float_to_half (const float *f, + guint16 *h, + int n); + +void half_to_float (const guint16 *h, + float *f, + int n); + void float_to_half4_f16c (const float f[4], guint16 h[4]); void half_to_float4_f16c (const guint16 h[4], float f[4]); +void float_to_half_f16c (const float *f, + guint16 *h, + int n); + +void half_to_float_f16c (const guint16 *h, + float *f, + int n); + void float_to_half4_c (const float f[4], guint16 h[4]); void half_to_float4_c (const guint16 h[4], float f[4]); +void float_to_half_c (const float *f, + guint16 *h, + int n); + +void half_to_float_c (const guint16 *h, + float *f, + int n); + G_END_DECLS #endif |