summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthias Clasen <mclasen@redhat.com>2021-04-09 03:34:05 +0000
committerMatthias Clasen <mclasen@redhat.com>2021-04-09 03:34:05 +0000
commit5d0a3bd4cb2f8f3f992106b8ce2e13bd1fd50410 (patch)
tree26c5e64197675d301201c548db7c28567dd04347
parent1c2238cc2abbca0486a5fdac5523efc268affb96 (diff)
parent2d7169fd5f2467e6a6159978fcbd89478de85424 (diff)
downloadgtk+-5d0a3bd4cb2f8f3f992106b8ce2e13bd1fd50410.tar.gz
Merge branch 'half-float' into 'master'
ngl: Use fp16 for colors See merge request GNOME/gtk!3405
-rw-r--r--config.h.meson2
-rw-r--r--gsk/meson.build1
-rw-r--r--gsk/ngl/fp16.c150
-rw-r--r--gsk/ngl/fp16private.h40
-rw-r--r--gsk/ngl/gsknglcommandqueue.c4
-rw-r--r--gsk/ngl/gsknglrenderjob.c216
-rw-r--r--gsk/ngl/gskngltypesprivate.h8
-rw-r--r--meson.build40
-rw-r--r--meson_options.txt5
9 files changed, 375 insertions, 91 deletions
diff --git a/config.h.meson b/config.h.meson
index 549a677379..fc2dd0b3d2 100644
--- a/config.h.meson
+++ b/config.h.meson
@@ -279,3 +279,5 @@
/* Define if tracker3 is available */
#mesondefine HAVE_TRACKER3
+
+#mesondefine HAVE_F16C
diff --git a/gsk/meson.build b/gsk/meson.build
index 5c381b51c9..f351941c22 100644
--- a/gsk/meson.build
+++ b/gsk/meson.build
@@ -85,6 +85,7 @@ gsk_private_sources = files([
'ngl/gskngltexturelibrary.c',
'ngl/gskngluniformstate.c',
'ngl/gskngltexturepool.c',
+ 'ngl/fp16.c',
])
gsk_public_headers = files([
diff --git a/gsk/ngl/fp16.c b/gsk/ngl/fp16.c
new file mode 100644
index 0000000000..e9a8c8fbce
--- /dev/null
+++ b/gsk/ngl/fp16.c
@@ -0,0 +1,150 @@
+/* fp16.c
+ *
+ * Copyright 2021 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+#include <config.h>
+
+#include "fp16private.h"
+
+#ifdef HAVE_F16C
+#include <immintrin.h>
+#endif
+
+static inline guint
+as_uint (const float x)
+{
+ return *(guint*)&x;
+}
+
+static inline float
+as_float (const guint x)
+{
+ return *(float*)&x;
+}
+
+// IEEE-754 16-bit floating-point format (without infinity): 1-5-10
+
+static inline float
+half_to_float (const guint16 x)
+{
+ const guint e = (x&0x7C00)>>10; // exponent
+ const guint m = (x&0x03FF)<<13; // mantissa
+ const guint v = as_uint((float)m)>>23;
+ return as_float((x&0x8000)<<16 | (e!=0)*((e+112)<<23|m) | ((e==0)&(m!=0))*((v-37)<<23|((m<<(150-v))&0x007FE000)));
+}
+
+static inline guint16
+float_to_half (const float x)
+{
+ const guint b = as_uint(x)+0x00001000; // round-to-nearest-even
+ const guint e = (b&0x7F800000)>>23; // exponent
+ const guint m = b&0x007FFFFF; // mantissa
+ return (b&0x80000000)>>16 | (e>112)*((((e-112)<<10)&0x7C00)|m>>13) | ((e<113)&(e>101))*((((0x007FF000+m)>>(125-e))+1)>>1) | (e>143)*0x7FFF; // sign : normalized : denormalized : saturate
+}
+
+static void
+float_to_half4_c (const float f[4],
+ guint16 h[4])
+{
+ h[0] = float_to_half (f[0]);
+ h[1] = float_to_half (f[1]);
+ h[2] = float_to_half (f[2]);
+ h[3] = float_to_half (f[3]);
+}
+
+static void
+half_to_float4_c (const guint16 h[4],
+ float f[4])
+{
+ f[0] = half_to_float (h[0]);
+ f[1] = half_to_float (h[1]);
+ f[2] = half_to_float (h[2]);
+ f[3] = half_to_float (h[3]);
+}
+
+#ifdef HAVE_F16C
+
+static void
+float_to_half4_f16c (const float f[4],
+ guint16 h[4])
+{
+ __m128 s = _mm_loadu_ps (f);
+ __m128i i = _mm_cvtps_ph (s, 0);
+ _mm_storel_epi64 ((__m128i*)h, i);
+}
+
+static void
+half_to_float4_f16c (const guint16 h[4],
+ float f[4])
+{
+ __m128i i = _mm_loadl_epi64 ((__m128i_u const *)h);
+ __m128 s = _mm_cvtph_ps (i);
+ _mm_store_ps (f, s);
+}
+
+void float_to_half4 (const float f[4], guint16 h[4]) __attribute__((ifunc ("resolve_float_to_half4")));
+void half_to_float4 (const guint16 h[4], float f[4]) __attribute__((ifunc ("resolve_half_to_float4")));
+
+static void *
+resolve_float_to_half4 (void)
+{
+ __builtin_cpu_init ();
+ if (__builtin_cpu_supports ("f16c"))
+ return float_to_half4_f16c;
+ else
+ return float_to_half4_c;
+}
+
+static void *
+resolve_half_to_float4 (void)
+{
+ __builtin_cpu_init ();
+ if (__builtin_cpu_supports ("f16c"))
+ return half_to_float4_f16c;
+ else
+ return half_to_float4_c;
+}
+
+#else
+
+#ifdef __APPLE__
+// turns out aliases don't work on Darwin
+
+void
+float_to_half4 (const float f[4],
+ guint16 h[4])
+{
+ float_to_half4_c (f, h);
+}
+
+void
+half_to_float4 (const guint16 h[4],
+ float f[4])
+{
+ half_to_float4_c (h, f);
+}
+
+#else
+
+void float_to_half4 (const float f[4], guint16 h[4]) __attribute__((alias ("float_to_half4_c")));
+void half_to_float4 (const guint16 h[4], float f[4]) __attribute__((alias ("half_to_float4_c")));
+
+#endif
+
+#endif /* GTK_HAS_F16C */
diff --git a/gsk/ngl/fp16private.h b/gsk/ngl/fp16private.h
new file mode 100644
index 0000000000..d76f18a04f
--- /dev/null
+++ b/gsk/ngl/fp16private.h
@@ -0,0 +1,40 @@
+/* fp16private.h
+ *
+ * Copyright 2021 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+#ifndef __FP16_PRIVATE_H__
+#define __FP16_PRIVATE_H__
+
+#include <glib.h>
+
+G_BEGIN_DECLS
+
+#define FP16_ZERO ((guint16)0)
+#define FP16_ONE ((guint16)15360)
+#define FP16_MINUS_ONE ((guint16)48128)
+
+void float_to_half4 (const float f[4],
+ guint16 h[4]);
+
+void half_to_float4 (const guint16 h[4],
+ float f[4]);
+
+G_END_DECLS
+
+#endif
diff --git a/gsk/ngl/gsknglcommandqueue.c b/gsk/ngl/gsknglcommandqueue.c
index e160076c57..bdfef2f35c 100644
--- a/gsk/ngl/gsknglcommandqueue.c
+++ b/gsk/ngl/gsknglcommandqueue.c
@@ -1003,13 +1003,13 @@ gsk_ngl_command_queue_execute (GskNglCommandQueue *self,
/* 2 = color location */
glEnableVertexAttribArray (2);
- glVertexAttribPointer (2, 4, GL_FLOAT, GL_FALSE,
+ glVertexAttribPointer (2, 4, GL_HALF_FLOAT, GL_FALSE,
sizeof (GskNglDrawVertex),
(void *) G_STRUCT_OFFSET (GskNglDrawVertex, color));
/* 3 = color2 location */
glEnableVertexAttribArray (3);
- glVertexAttribPointer (3, 4, GL_FLOAT, GL_FALSE,
+ glVertexAttribPointer (3, 4, GL_HALF_FLOAT, GL_FALSE,
sizeof (GskNglDrawVertex),
(void *) G_STRUCT_OFFSET (GskNglDrawVertex, color2));
diff --git a/gsk/ngl/gsknglrenderjob.c b/gsk/ngl/gsknglrenderjob.c
index 3e823dedab..b1bda44790 100644
--- a/gsk/ngl/gsknglrenderjob.c
+++ b/gsk/ngl/gsknglrenderjob.c
@@ -43,6 +43,7 @@
#include "gsknglshadowlibraryprivate.h"
#include "ninesliceprivate.h"
+#include "fp16private.h"
#define ORTHO_NEAR_PLANE -10000
#define ORTHO_FAR_PLANE 10000
@@ -884,6 +885,13 @@ gsk_ngl_render_job_update_clip (GskNglRenderJob *job,
return TRUE;
}
+static inline void
+rgba_to_half (const GdkRGBA *rgba,
+ guint16 h[4])
+{
+ float_to_half4 ((const float *)rgba, h);
+}
+
/* fill_vertex_data */
static void
gsk_ngl_render_job_draw_coords (GskNglRenderJob *job,
@@ -895,16 +903,16 @@ gsk_ngl_render_job_draw_coords (GskNglRenderJob *job,
float min_v,
float max_u,
float max_v,
- const GdkRGBA *color)
+ guint16 c[4])
{
GskNglDrawVertex *vertices = gsk_ngl_command_queue_add_vertices (job->command_queue);
- vertices[0] = (GskNglDrawVertex) { { min_x, min_y }, { min_u, min_v }, { color->red, color->green, color->blue, color->alpha } };
- vertices[1] = (GskNglDrawVertex) { { min_x, max_y }, { min_u, max_v }, { color->red, color->green, color->blue, color->alpha } };
- vertices[2] = (GskNglDrawVertex) { { max_x, min_y }, { max_u, min_v }, { color->red, color->green, color->blue, color->alpha } };
- vertices[3] = (GskNglDrawVertex) { { max_x, max_y }, { max_u, max_v }, { color->red, color->green, color->blue, color->alpha } };
- vertices[4] = (GskNglDrawVertex) { { min_x, max_y }, { min_u, max_v }, { color->red, color->green, color->blue, color->alpha } };
- vertices[5] = (GskNglDrawVertex) { { max_x, min_y }, { max_u, min_v }, { color->red, color->green, color->blue, color->alpha } };
+ vertices[0] = (GskNglDrawVertex) { .position = { min_x, min_y }, .uv = { min_u, min_v }, .color = { c[0], c[1], c[2], c[3] } };
+ vertices[1] = (GskNglDrawVertex) { .position = { min_x, max_y }, .uv = { min_u, max_v }, .color = { c[0], c[1], c[2], c[3] } };
+ vertices[2] = (GskNglDrawVertex) { .position = { max_x, min_y }, .uv = { max_u, min_v }, .color = { c[0], c[1], c[2], c[3] } };
+ vertices[3] = (GskNglDrawVertex) { .position = { max_x, max_y }, .uv = { max_u, max_v }, .color = { c[0], c[1], c[2], c[3] } };
+ vertices[4] = (GskNglDrawVertex) { .position = { min_x, max_y }, .uv = { min_u, max_v }, .color = { c[0], c[1], c[2], c[3] } };
+ vertices[5] = (GskNglDrawVertex) { .position = { max_x, min_y }, .uv = { max_u, min_v }, .color = { c[0], c[1], c[2], c[3] } };
}
/* load_vertex_data_with_region */
@@ -912,7 +920,7 @@ static inline void
gsk_ngl_render_job_draw_offscreen_with_color (GskNglRenderJob *job,
const graphene_rect_t *bounds,
const GskNglRenderOffscreen *offscreen,
- const GdkRGBA *color)
+ guint16 color[4])
{
float min_x = job->offset_x + bounds->origin.x;
float min_y = job->offset_y + bounds->origin.y;
@@ -932,7 +940,8 @@ gsk_ngl_render_job_draw_offscreen (GskNglRenderJob *job,
const graphene_rect_t *bounds,
const GskNglRenderOffscreen *offscreen)
{
- gsk_ngl_render_job_draw_offscreen_with_color (job, bounds, offscreen, &(GdkRGBA) { 0.f, 0.f, 0.f, 0.f });
+ gsk_ngl_render_job_draw_offscreen_with_color (job, bounds, offscreen,
+ (guint16[]) { FP16_ZERO, FP16_ZERO, FP16_ZERO, FP16_ZERO });
}
/* load_float_vertex_data */
@@ -942,7 +951,7 @@ gsk_ngl_render_job_draw_with_color (GskNglRenderJob *job,
float y,
float width,
float height,
- const GdkRGBA *color)
+ guint16 color[4])
{
float min_x = job->offset_x + x;
float min_y = job->offset_y + y;
@@ -959,14 +968,15 @@ gsk_ngl_render_job_draw (GskNglRenderJob *job,
float width,
float height)
{
- gsk_ngl_render_job_draw_with_color (job, x, y, width, height, &(GdkRGBA) { 0.f, 0.f, 0.f, 0.f });
+ gsk_ngl_render_job_draw_with_color (job, x, y, width, height,
+ (guint16[]) { FP_ZERO, FP_ZERO, FP_ZERO, FP_ZERO });
}
/* load_vertex_data */
static inline void
gsk_ngl_render_job_draw_rect_with_color (GskNglRenderJob *job,
const graphene_rect_t *bounds,
- const GdkRGBA *color)
+ guint16 color[4])
{
gsk_ngl_render_job_draw_with_color (job,
bounds->origin.x,
@@ -995,11 +1005,12 @@ gsk_ngl_render_job_draw_offscreen_rect (GskNglRenderJob *job,
float min_y = job->offset_y + bounds->origin.y;
float max_x = min_x + bounds->size.width;
float max_y = min_y + bounds->size.height;
+ guint16 color[4] = { FP16_ZERO, FP16_ZERO, FP16_ZERO, FP16_ZERO };
gsk_ngl_render_job_draw_coords (job,
min_x, min_y, max_x, max_y,
0, 1, 1, 0,
- &(GdkRGBA) { 0.f, 0.f, 0.f, 0.f } );
+ color);
}
static inline void
@@ -1253,7 +1264,7 @@ blur_offscreen (GskNglRenderJob *job,
gsk_ngl_render_job_draw_coords (job,
0, 0, texture_to_blur_width, texture_to_blur_height,
0, 1, 1, 0,
- &(GdkRGBA) { 0.f, 0.f, 0.f, 0.f });
+ (guint16[]) { FP16_ZERO, FP16_ZERO, FP16_ZERO, FP16_ZERO });
gsk_ngl_render_job_end_draw (job);
/* Bind second pass framebuffer and clear it */
@@ -1280,7 +1291,7 @@ blur_offscreen (GskNglRenderJob *job,
gsk_ngl_render_job_draw_coords (job,
0, 0, texture_to_blur_width, texture_to_blur_height,
0, 1, 1, 0,
- &(GdkRGBA) { 0.f, 0.f, 0.f, 0.f });
+ (guint16[]) { FP16_ZERO, FP16_ZERO, FP16_ZERO, FP16_ZERO });
gsk_ngl_render_job_end_draw (job);
gsk_ngl_render_job_pop_modelview (job);
@@ -1353,10 +1364,11 @@ static inline void
gsk_ngl_render_job_visit_color_node (GskNglRenderJob *job,
const GskRenderNode *node)
{
+ guint16 color[4];
+
+ rgba_to_half (gsk_color_node_get_color (node), color);
gsk_ngl_render_job_begin_draw (job, CHOOSE_PROGRAM (job, color));
- gsk_ngl_render_job_draw_rect_with_color (job,
- &node->bounds,
- gsk_color_node_get_color (node));
+ gsk_ngl_render_job_draw_rect_with_color (job, &node->bounds, color);
gsk_ngl_render_job_end_draw (job);
}
@@ -1627,28 +1639,41 @@ gsk_ngl_render_job_visit_rect_border_node (GskNglRenderJob *job,
const float *widths = gsk_border_node_get_widths (node);
const graphene_point_t *origin = &node->bounds.origin;
const graphene_size_t *size = &node->bounds.size;
+ guint16 color[4];
gsk_ngl_render_job_begin_draw (job, CHOOSE_PROGRAM (job, color));
if (widths[0] > 0)
- gsk_ngl_render_job_draw_rect_with_color (job,
- &GRAPHENE_RECT_INIT (origin->x, origin->y, size->width - widths[1], widths[0]),
- &colors[0]);
+ {
+ rgba_to_half (&colors[0], color);
+ gsk_ngl_render_job_draw_rect_with_color (job,
+ &GRAPHENE_RECT_INIT (origin->x, origin->y, size->width - widths[1], widths[0]),
+ color);
+ }
if (widths[1] > 0)
- gsk_ngl_render_job_draw_rect_with_color (job,
- &GRAPHENE_RECT_INIT (origin->x + size->width - widths[1], origin->y, widths[1], size->height - widths[2]),
- &colors[0]);
+ {
+ rgba_to_half (&colors[1], color);
+ gsk_ngl_render_job_draw_rect_with_color (job,
+ &GRAPHENE_RECT_INIT (origin->x + size->width - widths[1], origin->y, widths[1], size->height - widths[2]),
+ color);
+ }
if (widths[2] > 0)
- gsk_ngl_render_job_draw_rect_with_color (job,
- &GRAPHENE_RECT_INIT (origin->x + widths[3], origin->y + size->height - widths[2], size->width - widths[1], widths[2]),
- &colors[0]);
+ {
+ rgba_to_half (&colors[2], color);
+ gsk_ngl_render_job_draw_rect_with_color (job,
+ &GRAPHENE_RECT_INIT (origin->x + widths[3], origin->y + size->height - widths[2], size->width - widths[1], widths[2]),
+ color);
+ }
if (widths[3] > 0)
- gsk_ngl_render_job_draw_rect_with_color (job,
- &GRAPHENE_RECT_INIT (origin->x, origin->y + widths[0], widths[3], size->height - widths[0]),
- &colors[0]);
+ {
+ rgba_to_half (&colors[3], color);
+ gsk_ngl_render_job_draw_rect_with_color (job,
+ &GRAPHENE_RECT_INIT (origin->x, origin->y + widths[0], widths[3], size->height - widths[0]),
+ color);
+ }
gsk_ngl_render_job_end_draw (job);
}
@@ -1658,7 +1683,7 @@ gsk_ngl_render_job_visit_border_node (GskNglRenderJob *job,
const GskRenderNode *node)
{
const GskRoundedRect *rounded_outline = gsk_border_node_get_outline (node);
- const GdkRGBA *c = gsk_border_node_get_colors (node);
+ const GdkRGBA *colors = gsk_border_node_get_colors (node);
const float *widths = gsk_border_node_get_widths (node);
struct {
float w;
@@ -1669,6 +1694,7 @@ gsk_ngl_render_job_visit_border_node (GskNglRenderJob *job,
float max_x = min_x + node->bounds.size.width;
float max_y = min_y + node->bounds.size.height;
GskRoundedRect outline;
+ guint16 color[4];
memset (sizes, 0, sizeof sizes);
@@ -1712,52 +1738,60 @@ gsk_ngl_render_job_visit_border_node (GskNglRenderJob *job,
{
GskNglDrawVertex *vertices = gsk_ngl_command_queue_add_vertices (job->command_queue);
- vertices[0] = (GskNglDrawVertex) { { min_x, min_y }, { 0, 1 }, { c[0].red, c[0].green, c[0].blue, c[0].alpha } };
- vertices[1] = (GskNglDrawVertex) { { min_x + sizes[0].w, min_y + sizes[0].h }, { 0, 0 }, { c[0].red, c[0].green, c[0].blue, c[0].alpha } };
- vertices[2] = (GskNglDrawVertex) { { max_x, min_y }, { 1, 1 }, { c[0].red, c[0].green, c[0].blue, c[0].alpha } };
+ rgba_to_half (&colors[0], color);
- vertices[3] = (GskNglDrawVertex) { { max_x - sizes[1].w, min_y + sizes[1].h }, { 1, 0 }, { c[0].red, c[0].green, c[0].blue, c[0].alpha } };
- vertices[4] = (GskNglDrawVertex) { { min_x + sizes[0].w, min_y + sizes[0].h }, { 0, 0 }, { c[0].red, c[0].green, c[0].blue, c[0].alpha } };
- vertices[5] = (GskNglDrawVertex) { { max_x, min_y }, { 1, 1 }, { c[0].red, c[0].green, c[0].blue, c[0].alpha } };
+ vertices[0] = (GskNglDrawVertex) { .position = { min_x, min_y }, .uv = { 0, 1 }, .color = { color[0], color[1], color[2], color[3] } };
+ vertices[1] = (GskNglDrawVertex) { .position = { min_x + sizes[0].w, min_y + sizes[0].h }, .uv = { 0, 0 }, .color = { color[0], color[1], color[2], color[3] } };
+ vertices[2] = (GskNglDrawVertex) { .position = { max_x, min_y }, .uv = { 1, 1 }, .color = { color[0], color[1], color[2], color[3] } };
+
+ vertices[3] = (GskNglDrawVertex) { .position = { max_x - sizes[1].w, min_y + sizes[1].h }, .uv = { 1, 0 }, .color = { color[0], color[1], color[2], color[3] } };
+ vertices[4] = (GskNglDrawVertex) { .position = { min_x + sizes[0].w, min_y + sizes[0].h }, .uv = { 0, 0 }, .color = { color[0], color[1], color[2], color[3] } };
+ vertices[5] = (GskNglDrawVertex) { .position = { max_x, min_y }, .uv = { 1, 1 }, .color = { color[0], color[1], color[2], color[3] } };
}
if (widths[1] > 0)
{
GskNglDrawVertex *vertices = gsk_ngl_command_queue_add_vertices (job->command_queue);
- vertices[0] = (GskNglDrawVertex) { { max_x - sizes[1].w, min_y + sizes[1].h }, { 0, 1 }, { c[1].red, c[1].green, c[1].blue, c[1].alpha } };
- vertices[1] = (GskNglDrawVertex) { { max_x - sizes[2].w, max_y - sizes[2].h }, { 0, 0 }, { c[1].red, c[1].green, c[1].blue, c[1].alpha } };
- vertices[2] = (GskNglDrawVertex) { { max_x, min_y }, { 1, 1 }, { c[1].red, c[1].green, c[1].blue, c[1].alpha } };
+ rgba_to_half (&colors[1], color);
+
+ vertices[0] = (GskNglDrawVertex) { .position = { max_x - sizes[1].w, min_y + sizes[1].h }, .uv = { 0, 1 }, .color = { color[0], color[1], color[2], color[3] } };
+ vertices[1] = (GskNglDrawVertex) { .position = { max_x - sizes[2].w, max_y - sizes[2].h }, .uv = { 0, 0 }, .color = { color[0], color[1], color[2], color[3] } };
+ vertices[2] = (GskNglDrawVertex) { .position = { max_x, min_y }, .uv = { 1, 1 }, .color = { color[0], color[1], color[2], color[3] } };
- vertices[3] = (GskNglDrawVertex) { { max_x, max_y }, { 1, 0 }, { c[1].red, c[1].green, c[1].blue, c[1].alpha } };
- vertices[4] = (GskNglDrawVertex) { { max_x - sizes[2].w, max_y - sizes[2].h }, { 0, 0 }, { c[1].red, c[1].green, c[1].blue, c[1].alpha } };
- vertices[5] = (GskNglDrawVertex) { { max_x, min_y }, { 1, 1 }, { c[1].red, c[1].green, c[1].blue, c[1].alpha } };
+ vertices[3] = (GskNglDrawVertex) { .position = { max_x, max_y }, .uv = { 1, 0 }, .color = { color[0], color[1], color[2], color[3] } };
+ vertices[4] = (GskNglDrawVertex) { .position = { max_x - sizes[2].w, max_y - sizes[2].h }, .uv = { 0, 0 }, .color = { color[0], color[1], color[2], color[3] } };
+ vertices[5] = (GskNglDrawVertex) { .position = { max_x, min_y }, .uv = { 1, 1 }, .color = { color[0], color[1], color[2], color[3] } };
}
if (widths[2] > 0)
{
GskNglDrawVertex *vertices = gsk_ngl_command_queue_add_vertices (job->command_queue);
- vertices[0] = (GskNglDrawVertex) { { min_x + sizes[3].w, max_y - sizes[3].h }, { 0, 1 }, { c[2].red, c[2].green, c[2].blue, c[2].alpha } };
- vertices[1] = (GskNglDrawVertex) { { min_x, max_y }, { 0, 0 }, { c[2].red, c[2].green, c[2].blue, c[2].alpha } };
- vertices[2] = (GskNglDrawVertex) { { max_x - sizes[2].w, max_y - sizes[2].h }, { 1, 1 }, { c[2].red, c[2].green, c[2].blue, c[2].alpha } };
+ rgba_to_half (&colors[2], color);
+
+ vertices[0] = (GskNglDrawVertex) { .position = { min_x + sizes[3].w, max_y - sizes[3].h }, .uv = { 0, 1 }, .color = { color[0], color[1], color[2], color[3] } };
+ vertices[1] = (GskNglDrawVertex) { .position = { min_x, max_y }, .uv = { 0, 0 }, .color = { color[0], color[1], color[2], color[3] } };
+ vertices[2] = (GskNglDrawVertex) { .position = { max_x - sizes[2].w, max_y - sizes[2].h }, .uv = { 1, 1 }, .color = { color[0], color[1], color[2], color[3] } };
- vertices[3] = (GskNglDrawVertex) { { max_x, max_y }, { 1, 0 }, { c[2].red, c[2].green, c[2].blue, c[2].alpha } };
- vertices[4] = (GskNglDrawVertex) { { min_x , max_y }, { 0, 0 }, { c[2].red, c[2].green, c[2].blue, c[2].alpha } };
- vertices[5] = (GskNglDrawVertex) { { max_x - sizes[2].w, max_y - sizes[2].h }, { 1, 1 }, { c[2].red, c[2].green, c[2].blue, c[2].alpha } };
+ vertices[3] = (GskNglDrawVertex) { .position = { max_x, max_y }, .uv = { 1, 0 }, .color = { color[0], color[1], color[2], color[3] } };
+ vertices[4] = (GskNglDrawVertex) { .position = { min_x , max_y }, .uv = { 0, 0 }, .color = { color[0], color[1], color[2], color[3] } };
+ vertices[5] = (GskNglDrawVertex) { .position = { max_x - sizes[2].w, max_y - sizes[2].h }, .uv = { 1, 1 }, .color = { color[0], color[1], color[2], color[3] } };
}
if (widths[3] > 0)
{
GskNglDrawVertex *vertices = gsk_ngl_command_queue_add_vertices (job->command_queue);
- vertices[0] = (GskNglDrawVertex) { { min_x, min_y }, { 0, 1 }, { c[3].red, c[3].green, c[3].blue, c[3].alpha } };
- vertices[1] = (GskNglDrawVertex) { { min_x, max_y }, { 0, 0 }, { c[3].red, c[3].green, c[3].blue, c[3].alpha } };
- vertices[2] = (GskNglDrawVertex) { { min_x + sizes[0].w, min_y + sizes[0].h }, { 1, 1 }, { c[3].red, c[3].green, c[3].blue, c[3].alpha } };
+ rgba_to_half (&colors[3], color);
- vertices[3] = (GskNglDrawVertex) { { min_x + sizes[3].w, max_y - sizes[3].h }, { 1, 0 }, { c[3].red, c[3].green, c[3].blue, c[3].alpha } };
- vertices[4] = (GskNglDrawVertex) { { min_x, max_y }, { 0, 0 }, { c[3].red, c[3].green, c[3].blue, c[3].alpha } };
- vertices[5] = (GskNglDrawVertex) { { min_x + sizes[0].w, min_y + sizes[0].h }, { 1, 1 }, { c[3].red, c[3].green, c[3].blue, c[3].alpha } };
+ vertices[0] = (GskNglDrawVertex) { .position = { min_x, min_y }, .uv = { 0, 1 }, .color = { color[0], color[1], color[2], color[3] } };
+ vertices[1] = (GskNglDrawVertex) { .position = { min_x, max_y }, .uv = { 0, 0 }, .color = { color[0], color[1], color[2], color[3] } };
+ vertices[2] = (GskNglDrawVertex) { .position = { min_x + sizes[0].w, min_y + sizes[0].h }, .uv = { 1, 1 }, .color = { color[0], color[1], color[2], color[3] } };
+
+ vertices[3] = (GskNglDrawVertex) { .position = { min_x + sizes[3].w, max_y - sizes[3].h }, .uv = { 1, 0 }, .color = { color[0], color[1], color[2], color[3] } };
+ vertices[4] = (GskNglDrawVertex) { .position = { min_x, max_y }, .uv = { 0, 0 }, .color = { color[0], color[1], color[2], color[3] } };
+ vertices[5] = (GskNglDrawVertex) { .position = { min_x + sizes[0].w, min_y + sizes[0].h }, .uv = { 1, 1 }, .color = { color[0], color[1], color[2], color[3] } };
}
gsk_ngl_render_job_end_draw (job);
@@ -1775,8 +1809,6 @@ gsk_ngl_render_job_visit_css_background (GskNglRenderJob *job,
const GskRenderNode *node2)
{
const GskRenderNode *child = gsk_rounded_clip_node_get_child (node);
- const GdkRGBA *c2 = gsk_color_node_get_color (child);
- const GdkRGBA *c = gsk_border_node_get_colors (node2);
const GskRoundedRect *rounded_outline = gsk_border_node_get_outline (node2);
const float *widths = gsk_border_node_get_widths (node2);
float min_x = job->offset_x + node2->bounds.origin.x;
@@ -1785,10 +1817,15 @@ gsk_ngl_render_job_visit_css_background (GskNglRenderJob *job,
float max_y = min_y + node2->bounds.size.height;
GskRoundedRect outline;
GskNglDrawVertex *vertices;
+ guint16 color[4];
+ guint16 color2[4];
if (node_is_invisible (node2))
return;
+ rgba_to_half (&gsk_border_node_get_colors (node2)[0], color);
+ rgba_to_half (gsk_color_node_get_color (child), color2);
+
gsk_ngl_render_job_transform_rounded_rect (job, rounded_outline, &outline);
gsk_ngl_render_job_begin_draw (job, CHOOSE_PROGRAM (job, filled_border));
@@ -1803,12 +1840,12 @@ gsk_ngl_render_job_visit_css_background (GskNglRenderJob *job,
vertices = gsk_ngl_command_queue_add_vertices (job->command_queue);
- vertices[0] = (GskNglDrawVertex) { { min_x, min_y }, { 0, 0 }, { c[0].red, c[0].green, c[0].blue, c[0].alpha }, { c2->red, c2->green, c2->blue, c2->alpha } };
- vertices[1] = (GskNglDrawVertex) { { min_x, max_y }, { 0, 0 }, { c[0].red, c[0].green, c[0].blue, c[0].alpha }, { c2->red, c2->green, c2->blue, c2->alpha } };
- vertices[2] = (GskNglDrawVertex) { { max_x, min_y }, { 0, 0 }, { c[0].red, c[0].green, c[0].blue, c[0].alpha }, { c2->red, c2->green, c2->blue, c2->alpha } };
- vertices[3] = (GskNglDrawVertex) { { max_x, max_y }, { 0, 0 }, { c[0].red, c[0].green, c[0].blue, c[0].alpha }, { c2->red, c2->green, c2->blue, c2->alpha } };
- vertices[4] = (GskNglDrawVertex) { { min_x, max_y }, { 0, 0 }, { c[0].red, c[0].green, c[0].blue, c[0].alpha }, { c2->red, c2->green, c2->blue, c2->alpha } };
- vertices[5] = (GskNglDrawVertex) { { max_x, min_y }, { 0, 0 }, { c[0].red, c[0].green, c[0].blue, c[0].alpha }, { c2->red, c2->green, c2->blue, c2->alpha } };
+ vertices[0] = (GskNglDrawVertex) { .position = { min_x, min_y }, .color = { color[0], color[1], color[2], color[3] }, .color2 = { color2[0], color2[1], color2[2], color2[3] } };
+ vertices[1] = (GskNglDrawVertex) { .position = { min_x, max_y }, .color = { color[0], color[1], color[2], color[3] }, .color2 = { color2[0], color2[1], color2[2], color2[3] } };
+ vertices[2] = (GskNglDrawVertex) { .position = { max_x, min_y }, .color = { color[0], color[1], color[2], color[3] }, .color2 = { color2[0], color2[1], color2[2], color2[3] } };
+ vertices[3] = (GskNglDrawVertex) { .position = { max_x, max_y }, .color = { color[0], color[1], color[2], color[3] }, .color2 = { color2[0], color2[1], color2[2], color2[3] } };
+ vertices[4] = (GskNglDrawVertex) { .position = { min_x, max_y }, .color = { color[0], color[1], color[2], color[3] }, .color2 = { color2[0], color2[1], color2[2], color2[3] } };
+ vertices[5] = (GskNglDrawVertex) { .position = { max_x, min_y }, .color = { color[0], color[1], color[2], color[3] }, .color2 = { color2[0], color2[1], color2[2], color2[3] } };
gsk_ngl_render_job_end_draw (job);
}
@@ -1933,6 +1970,7 @@ gsk_ngl_render_job_visit_unblurred_inset_shadow_node (GskNglRenderJob *job,
{
const GskRoundedRect *outline = gsk_inset_shadow_node_get_outline (node);
GskRoundedRect transformed_outline;
+ guint16 color[4];
gsk_ngl_render_job_transform_rounded_rect (job, outline, &transformed_outline);
@@ -1947,7 +1985,8 @@ gsk_ngl_render_job_visit_unblurred_inset_shadow_node (GskNglRenderJob *job,
UNIFORM_INSET_SHADOW_OFFSET, 0,
gsk_inset_shadow_node_get_dx (node),
gsk_inset_shadow_node_get_dy (node));
- gsk_ngl_render_job_draw_rect_with_color (job, &node->bounds, gsk_inset_shadow_node_get_color (node));
+ rgba_to_half (gsk_inset_shadow_node_get_color (node), color);
+ gsk_ngl_render_job_draw_rect_with_color (job, &node->bounds, color);
gsk_ngl_render_job_end_draw (job);
}
@@ -1968,6 +2007,7 @@ gsk_ngl_render_job_visit_blurred_inset_shadow_node (GskNglRenderJob *job,
int blurred_texture_id;
GskTextureKey key;
GskNglRenderOffscreen offscreen = {0};
+ guint16 color[4];
g_assert (blur_radius > 0);
@@ -2047,9 +2087,10 @@ gsk_ngl_render_job_visit_blurred_inset_shadow_node (GskNglRenderJob *job,
UNIFORM_INSET_SHADOW_OFFSET, 0,
offset_x * scale_x,
offset_y * scale_y);
+ rgba_to_half (gsk_inset_shadow_node_get_color (node), color);
gsk_ngl_render_job_draw_with_color (job,
0, 0, texture_width, texture_height,
- gsk_inset_shadow_node_get_color (node));
+ color);
gsk_ngl_render_job_end_draw (job);
gsk_ngl_render_job_pop_modelview (job);
@@ -2132,7 +2173,7 @@ gsk_ngl_render_job_visit_unblurred_outset_shadow_node (GskNglRenderJob *job,
float spread = gsk_outset_shadow_node_get_spread (node);
float dx = gsk_outset_shadow_node_get_dx (node);
float dy = gsk_outset_shadow_node_get_dy (node);
- const GdkRGBA *color = gsk_outset_shadow_node_get_color (node);
+ guint16 color[4];
const float edge_sizes[] = { // Top, right, bottom, left
spread - dy, spread + dx, spread + dy, spread - dx
};
@@ -2143,6 +2184,8 @@ gsk_ngl_render_job_visit_unblurred_outset_shadow_node (GskNglRenderJob *job,
{ outline->corner[3].width + spread - dx, outline->corner[3].height + spread + dy },
};
+ rgba_to_half (gsk_outset_shadow_node_get_color (node), color);
+
gsk_ngl_render_job_transform_rounded_rect (job, outline, &transformed_outline);
gsk_ngl_render_job_begin_draw (job, CHOOSE_PROGRAM (job, unblurred_outset_shadow));
@@ -2205,10 +2248,7 @@ static inline void
gsk_ngl_render_job_visit_blurred_outset_shadow_node (GskNglRenderJob *job,
const GskRenderNode *node)
{
- static const GdkRGBA white = { 1, 1, 1, 1 };
-
const GskRoundedRect *outline = gsk_outset_shadow_node_get_outline (node);
- const GdkRGBA *color = gsk_outset_shadow_node_get_color (node);
float scale_x = job->scale_x;
float scale_y = job->scale_y;
float blur_radius = gsk_outset_shadow_node_get_blur_radius (node);
@@ -2225,6 +2265,9 @@ gsk_ngl_render_job_visit_blurred_outset_shadow_node (GskNglRenderJob *job,
int blurred_texture_id;
int cached_tid;
gboolean do_slicing;
+ guint16 color[4];
+
+ rgba_to_half (gsk_outset_shadow_node_get_color (node), color);
/* scaled_outline is the minimal outline we need to draw the given drop shadow,
* enlarged by the spread and offset by the blur radius. */
@@ -2306,7 +2349,8 @@ gsk_ngl_render_job_visit_blurred_outset_shadow_node (GskNglRenderJob *job,
/* Draw the outline using color program */
gsk_ngl_render_job_begin_draw (job, CHOOSE_PROGRAM (job, color));
- gsk_ngl_render_job_draw_with_color (job, 0, 0, texture_width, texture_height, &white);
+ gsk_ngl_render_job_draw_with_color (job, 0, 0, texture_width, texture_height,
+ (guint16[]){ FP16_ONE, FP16_ONE, FP16_ONE, FP16_ONE });
gsk_ngl_render_job_end_draw (job);
/* Reset state from offscreen */
@@ -2690,7 +2734,7 @@ gsk_ngl_render_job_visit_text_node (GskNglRenderJob *job,
guint last_texture = 0;
GskNglDrawVertex *vertices;
guint used = 0;
- GdkRGBA c;
+ guint16 c[4] = { FP16_MINUS_ONE, FP16_MINUS_ONE, FP16_MINUS_ONE, FP16_MINUS_ONE };
const PangoGlyphInfo *gi;
guint i;
int yshift;
@@ -2702,10 +2746,8 @@ gsk_ngl_render_job_visit_text_node (GskNglRenderJob *job,
/* If the font has color glyphs, we don't need to recolor anything.
* We tell the shader by setting the color to vec4(-1).
*/
- if (!force_color && gsk_text_node_has_color_glyphs (node))
- c = (GdkRGBA) { -1.f, -1.f, -1.f, -1.f };
- else
- c = *color;
+ if (force_color || !gsk_text_node_has_color_glyphs (node))
+ rgba_to_half (color, c);
lookup.font = (PangoFont *)font;
lookup.scale = (guint) (text_scale * 1024);
@@ -2783,13 +2825,13 @@ gsk_ngl_render_job_visit_text_node (GskNglRenderJob *job,
glyph_x2 = glyph_x + glyph->ink_rect.width;
glyph_y2 = glyph_y + glyph->ink_rect.height;
- *(vertices++) = (GskNglDrawVertex) { { glyph_x, glyph_y }, { tx, ty }, { c.red, c.green, c.blue, c.alpha } };
- *(vertices++) = (GskNglDrawVertex) { { glyph_x, glyph_y2 }, { tx, ty2 }, { c.red, c.green, c.blue, c.alpha } };
- *(vertices++) = (GskNglDrawVertex) { { glyph_x2, glyph_y }, { tx2, ty }, { c.red, c.green, c.blue, c.alpha } };
+ *(vertices++) = (GskNglDrawVertex) { .position = { glyph_x, glyph_y }, .uv = { tx, ty }, .color = { c[0], c[1], c[2], c[3] } };
+ *(vertices++) = (GskNglDrawVertex) { .position = { glyph_x, glyph_y2 }, .uv = { tx, ty2 }, .color = { c[0], c[1], c[2], c[3] } };
+ *(vertices++) = (GskNglDrawVertex) { .position = { glyph_x2, glyph_y }, .uv = { tx2, ty }, .color = { c[0], c[1], c[2], c[3] } };
- *(vertices++) = (GskNglDrawVertex) { { glyph_x2, glyph_y2 }, { tx2, ty2 }, { c.red, c.green, c.blue, c.alpha } };
- *(vertices++) = (GskNglDrawVertex) { { glyph_x, glyph_y2 }, { tx, ty2 }, { c.red, c.green, c.blue, c.alpha } };
- *(vertices++) = (GskNglDrawVertex) { { glyph_x2, glyph_y }, { tx2, ty }, { c.red, c.green, c.blue, c.alpha } };
+ *(vertices++) = (GskNglDrawVertex) { .position = { glyph_x2, glyph_y2 }, .uv = { tx2, ty2 }, .color = { c[0], c[1], c[2], c[3] } };
+ *(vertices++) = (GskNglDrawVertex) { .position = { glyph_x, glyph_y2 }, .uv = { tx, ty2 }, .color = { c[0], c[1], c[2], c[3] } };
+ *(vertices++) = (GskNglDrawVertex) { .position = { glyph_x2, glyph_y }, .uv = { tx2, ty }, .color = { c[0], c[1], c[2], c[3] } };
batch->draw.vbo_count += GSK_NGL_N_VERTICES;
used++;
@@ -2822,6 +2864,7 @@ gsk_ngl_render_job_visit_shadow_node (GskNglRenderJob *job,
const float dy = shadow->dy;
GskNglRenderOffscreen offscreen = {0};
graphene_rect_t bounds;
+ guint16 color[4];
if (shadow->radius == 0 &&
gsk_render_node_get_node_type (shadow_child) == GSK_TEXT_NODE)
@@ -2884,7 +2927,8 @@ gsk_ngl_render_job_visit_shadow_node (GskNglRenderJob *job,
GL_TEXTURE_2D,
GL_TEXTURE0,
offscreen.texture_id);
- gsk_ngl_render_job_draw_offscreen_with_color (job, &bounds, &offscreen, &shadow->color);
+ rgba_to_half (&shadow->color, color);
+ gsk_ngl_render_job_draw_offscreen_with_color (job, &bounds, &offscreen, color);
gsk_ngl_render_job_end_draw (job);
gsk_ngl_render_job_offset (job, -dx, -dy);
}
@@ -2941,7 +2985,7 @@ gsk_ngl_render_job_visit_blur_node (GskNglRenderJob *job,
gsk_ngl_render_job_draw_coords (job,
min_x, min_y, max_x, max_y,
0, 1, 1, 0,
- &(GdkRGBA) { 0.f, 0.f, 0.f, 0.f } );
+ (guint16[]) { FP16_ZERO, FP16_ZERO, FP16_ZERO, FP16_ZERO } );
gsk_ngl_render_job_end_draw (job);
}
@@ -3047,10 +3091,10 @@ static inline void
gsk_ngl_render_job_visit_gl_shader_node_fallback (GskNglRenderJob *job,
const GskRenderNode *node)
{
- static const GdkRGBA pink = { 255 / 255., 105 / 255., 180 / 255., 1.0 };
+ guint16 pink[4] = { 15360, 13975, 14758, 15360 }; /* 255 105 180 */
gsk_ngl_render_job_begin_draw (job, CHOOSE_PROGRAM (job, color));
- gsk_ngl_render_job_draw_rect_with_color (job, &node->bounds, &pink);
+ gsk_ngl_render_job_draw_rect_with_color (job, &node->bounds, pink);
gsk_ngl_render_job_end_draw (job);
}
@@ -3256,7 +3300,7 @@ gsk_ngl_render_job_visit_texture_node (GskNglRenderJob *job,
gsk_ngl_render_job_draw_coords (job,
x1, y1, x2, y2,
0, 0, 1, 1,
- &(GdkRGBA) { 0.f, 0.f, 0.f, 0.f });
+ (guint16[]) { FP16_ZERO, FP16_ZERO, FP16_ZERO, FP16_ZERO });
}
gsk_ngl_render_job_end_draw (job);
diff --git a/gsk/ngl/gskngltypesprivate.h b/gsk/ngl/gskngltypesprivate.h
index a65130c19f..ce2319ef05 100644
--- a/gsk/ngl/gskngltypesprivate.h
+++ b/gsk/ngl/gskngltypesprivate.h
@@ -54,9 +54,11 @@ typedef struct _GskNglDriver GskNglDriver;
struct _GskNglDrawVertex
{
float position[2];
- float uv[2];
- float color[4];
- float color2[4];
+ union {
+ float uv[2];
+ guint16 color2[4];
+ };
+ guint16 color[4];
};
G_END_DECLS
diff --git a/meson.build b/meson.build
index 38d23ed61c..641208158c 100644
--- a/meson.build
+++ b/meson.build
@@ -685,6 +685,46 @@ if graphene_has_sse2 or graphene_has_gcc
endif
endif
+f16c_cflags = []
+if get_option('f16c').enabled()
+ f16c_prog = '''
+#if defined(__GNUC__)
+# if !defined(__amd64__) && !defined(__x86_64__)
+# error "F16C intrinsics are only available on x86_64"
+# endif
+#elif defined (_MSC_VER) && !defined (_M_X64) && !defined (_M_AMD64)
+# error "F16C intrinsics not supported on x86 MSVC builds"
+#endif
+#if defined(__SSE__) || (_M_X64 > 0)
+# include <immintrin.h>
+#else
+# error "No F16C intrinsics available"
+#endif
+int main () {
+ float f[4] = { 0, };
+ unsigned short h[4] = { 0, };
+ __m128 s = _mm_loadu_ps (f);
+ __m128i i = _mm_cvtps_ph (s, 0);
+ _mm_storel_epi64 ((__m128i*)h, i);
+
+ __builtin_cpu_init ();
+ __builtin_cpu_supports ("f16c");
+
+ return 0;
+}'''
+ if cc.get_id() != 'msvc'
+ test_f16c_cflags = [ '-mf16c' ]
+ else
+ test_f16c_cflags = []
+ endif
+
+ if cc.compiles(f16c_prog, args: test_f16c_cflags, name: 'F16C intrinsics')
+ cdata.set('HAVE_F16C', 1)
+ f16c_cflags = test_f16c_cflags
+ common_cflags += test_f16c_cflags
+ endif
+endif
+
subdir('gtk/css')
subdir('gdk')
subdir('gsk')
diff --git a/meson_options.txt b/meson_options.txt
index 16a1cbd9fe..4193415151 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -86,6 +86,11 @@ option('sassc',
value: 'auto',
description: 'Rebuild themes using sassc')
+option('f16c',
+ type: 'feature',
+ value: 'enabled',
+ description: 'Enable F16C fast paths (requires F16C)')
+
# Documentation and introspection
option('gtk_doc',