VPP: clear a surface using media pipeline on GEN8+

Remove the extra sync between BCS and CS rings Signed-off-by: Haihao Xiang <haihao.xiang@intel.com>
author: Haihao Xiang <haihao.xiang@intel.com> 2018-05-15 13:02:22 +0800
committer: Xiang, Haihao <haihao.xiang@intel.com> 2018-07-03 14:19:04 +0800
commit: 5bdb6754ae91c6d54290c541fc2aeaf20f6550e5 (patch)
tree: 6e3cfd174aad387c7b47af983e07c4995505cfe0 /src/gen9_post_processing.c
parent: 3ccc26bc0fdb5b12769c5d2feeafa8fa706a78ba (diff)
download: libva-intel-driver-5bdb6754ae91c6d54290c541fc2aeaf20f6550e5.tar.gz
1 files changed, 304 insertions, 0 deletions
diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c
index eede36f7..7e1ccd38 100644
--- a/src/gen9_post_processing.c
+++ b/src/gen9_post_processing.c
@@ -124,6 +124,30 @@ static const uint32_t pp_8bit_420_rgb32_scaling_gen9[][4] = {
 #include "shaders/post_processing/gen9/conv_8bit_420_rgb32.g9b"
 };
 
+static const uint32_t pp_clear_yuy2_gen9[][4] = {
+#include "shaders/post_processing/gen9/clear_yuy2.g9b"
+};
+
+static const uint32_t pp_clear_uyvy_gen9[][4] = {
+#include "shaders/post_processing/gen9/clear_uyvy.g9b"
+};
+
+static const uint32_t pp_clear_pl2_8bit_gen9[][4] = {
+#include "shaders/post_processing/gen9/clear_pl2_8bit.g9b"
+};
+
+static const uint32_t pp_clear_pl3_8bit_gen9[][4] = {
+#include "shaders/post_processing/gen9/clear_pl3_8bit.g9b"
+};
+
+static const uint32_t pp_clear_rgbx_gen9[][4] = {
+#include "shaders/post_processing/gen9/clear_rgbx.g9b"
+};
+
+static const uint32_t pp_clear_bgrx_gen9[][4] = {
+#include "shaders/post_processing/gen9/clear_bgrx.g9b"
+};
+
 struct i965_kernel pp_common_scaling_gen9[] = {
     {
         "10bit to 10bit",
@@ -158,6 +182,56 @@ struct i965_kernel pp_common_scaling_gen9[] = {
     },
 };
 
+struct i965_kernel pp_clear_gen9[] = {
+    {
+        "pl2 8bit",
+        0,
+        pp_clear_pl2_8bit_gen9,
+        sizeof(pp_clear_pl2_8bit_gen9),
+        NULL,
+    },
+
+    {
+        "pl3 8bit",
+        1,
+        pp_clear_pl3_8bit_gen9,
+        sizeof(pp_clear_pl3_8bit_gen9),
+        NULL,
+    },
+
+    {
+        "yuy2",
+        2,
+        pp_clear_yuy2_gen9,
+        sizeof(pp_clear_yuy2_gen9),
+        NULL,
+    },
+
+    {
+        "uyvy",
+        3,
+        pp_clear_uyvy_gen9,
+        sizeof(pp_clear_uyvy_gen9),
+        NULL,
+    },
+
+    {
+        "rgbx",
+        4,
+        pp_clear_rgbx_gen9,
+        sizeof(pp_clear_rgbx_gen9),
+        NULL,
+    },
+
+    {
+        "bgrx",
+        5,
+        pp_clear_bgrx_gen9,
+        sizeof(pp_clear_bgrx_gen9),
+        NULL,
+    },
+};
+
 static struct pp_module pp_modules_gen9[] = {
     {
         {
@@ -575,6 +649,36 @@ gen9_post_processing_context_init(VADriverContextP ctx,
     gen8_gpe_context_init(ctx, gpe_context);
     pp_context->scaling_gpe_context_initialized |= (VPPGPE_8BIT_8BIT | VPPGPE_10BIT_10BIT | VPPGPE_10BIT_8BIT | VPPGPE_8BIT_420_RGB32);
 
+    gpe_context = &pp_context->clear_gpe_context;
+    gen8_gpe_load_kernels(ctx, gpe_context, pp_clear_gen9, ARRAY_ELEMS(pp_clear_gen9));
+    gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
+    gpe_context->idrt.max_entries = ALIGN(ARRAY_ELEMS(pp_clear_gen9), 2);
+    gpe_context->sampler.entry_size = ALIGN(sizeof(struct gen8_sampler_state), 64);
+    gpe_context->sampler.max_entries = 1;
+    gpe_context->curbe.length = ALIGN(sizeof(struct clear_input_parameter), 64);
+
+    gpe_context->surface_state_binding_table.max_entries = MAX_SCALING_SURFACES;
+    gpe_context->surface_state_binding_table.binding_table_offset = 0;
+    gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_SCALING_SURFACES * 4, 64);
+    gpe_context->surface_state_binding_table.length = ALIGN(MAX_SCALING_SURFACES * 4, 64) + ALIGN(MAX_SCALING_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
+
+    if (i965->intel.eu_total > 0) {
+        gpe_context->vfe_state.max_num_threads = i965->intel.eu_total * 6;
+    } else {
+        if (i965->intel.has_bsd2)
+            gpe_context->vfe_state.max_num_threads = 300;
+        else
+            gpe_context->vfe_state.max_num_threads = 60;
+    }
+
+    gpe_context->vfe_state.curbe_allocation_size = 37;
+    gpe_context->vfe_state.urb_entry_size = 16;
+    gpe_context->vfe_state.num_urb_entries = 127;
+    gpe_context->vfe_state.gpgpu_mode = 0;
+
+    gen8_gpe_context_init(ctx, gpe_context);
+    pp_context->clear_gpe_context_initialized = 1;
+
     return;
 }
 
@@ -1699,3 +1803,203 @@ gen9_8bit_420_rgb32_scaling_post_processing(VADriverContextP   ctx,
 
     return VA_STATUS_SUCCESS;
 }
+
+static void
+gen9_clear_surface_sample_state(VADriverContextP ctx,
+                                struct i965_gpe_context *gpe_context,
+                                const struct object_surface *obj_surface)
+{
+    struct gen8_sampler_state *sampler_state;
+
+    if (gpe_context == NULL)
+        return;
+
+    dri_bo_map(gpe_context->sampler.bo, 1);
+
+    if (gpe_context->sampler.bo->virtual == NULL)
+        return;
+
+    sampler_state = (struct gen8_sampler_state *)(gpe_context->sampler.bo->virtual + gpe_context->sampler.offset);
+
+    memset(sampler_state, 0, sizeof(*sampler_state));
+
+    dri_bo_unmap(gpe_context->sampler.bo);
+}
+
+static void
+gen9_clear_surface_curbe(VADriverContextP ctx,
+                         struct i965_gpe_context *gpe_context,
+                         const struct object_surface *obj_surface,
+                         unsigned int color)
+{
+    struct clear_input_parameter  *clear_curbe;
+
+    if (gpe_context == NULL || !obj_surface)
+        return;
+
+    clear_curbe = i965_gpe_context_map_curbe(gpe_context);
+
+    if (!clear_curbe)
+        return;
+
+    memset(clear_curbe, 0, sizeof(struct clear_input_parameter));
+    clear_curbe->color = color;
+
+    i965_gpe_context_unmap_curbe(gpe_context);
+}
+
+static void
+gen9_clear_surface_state(VADriverContextP ctx,
+                         struct i965_gpe_context *gpe_context,
+                         const struct object_surface *obj_surface)
+{
+    struct i965_surface src_surface;
+    VARectangle rect;
+    dri_bo *bo;
+    unsigned int fourcc;
+    int width[3], height[3], pitch[3], bo_offset[3];
+    int bti;
+
+    src_surface.base  = (struct object_base *)obj_surface;
+    src_surface.type  = I965_SURFACE_TYPE_SURFACE;
+    src_surface.flags = I965_SURFACE_FLAG_FRAME;
+
+    fourcc = obj_surface->fourcc;
+    rect.x = 0;
+    rect.y = 0;
+    rect.width = obj_surface->orig_width;
+    rect.height = obj_surface->orig_height;
+
+    gen9_pp_context_get_surface_conf(ctx, &src_surface,
+                                     &rect,
+                                     width,
+                                     height,
+                                     pitch,
+                                     bo_offset);
+
+    bti = 1;
+    bo = obj_surface->bo;
+
+    if (fourcc == VA_FOURCC_RGBA ||
+        fourcc == VA_FOURCC_RGBX ||
+        fourcc == VA_FOURCC_BGRA ||
+        fourcc == VA_FOURCC_BGRX) {
+        gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
+                                           bo_offset[0],
+                                           width[0] * 4, height[0],
+                                           pitch[0], 1,
+                                           I965_SURFACEFORMAT_R8_UINT,
+                                           bti, 0);
+    } else if (fourcc == VA_FOURCC_YUY2 || fourcc == VA_FOURCC_UYVY) {
+        gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
+                                           bo_offset[0],
+                                           width[0] * 2, height[0],
+                                           pitch[0], 1,
+                                           I965_SURFACEFORMAT_R8_UINT,
+                                           bti, 0);
+    } else {
+        gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
+                                           bo_offset[0],
+                                           width[0], height[0],
+                                           pitch[0], 1,
+                                           I965_SURFACEFORMAT_R8_UINT,
+                                           bti, 0);
+
+        if (fourcc == VA_FOURCC_NV12) {
+            gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
+                                               bo_offset[1],
+                                               width[1] * 2, height[1],
+                                               pitch[1], 1,
+                                               I965_SURFACEFORMAT_R8_UINT,
+                                               bti + 1, 0);
+        } else {
+            gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
+                                               bo_offset[1],
+                                               width[1], height[1],
+                                               pitch[1], 1,
+                                               I965_SURFACEFORMAT_R8_UINT,
+                                               bti + 1, 0);
+
+            gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
+                                               bo_offset[2],
+                                               width[2], height[2],
+                                               pitch[2], 1,
+                                               I965_SURFACEFORMAT_R8_UINT,
+                                               bti + 2, 0);
+        }
+    }
+}
+
+void
+gen9_clear_surface(VADriverContextP ctx,
+                   struct i965_post_processing_context *pp_context,
+                   const struct object_surface *obj_surface,
+                   unsigned int color)
+{
+    struct i965_gpe_context *gpe_context;
+    struct gpe_media_object_walker_parameter media_object_walker_param;
+    struct intel_vpp_kernel_walker_parameter kernel_walker_param;
+    int index = 0;
+
+    if (!pp_context || !obj_surface)
+        return;
+
+    if (!pp_context->clear_gpe_context_initialized)
+        return;
+
+    switch (obj_surface->fourcc) {
+    case VA_FOURCC_NV12:
+        index = 0;
+        break;
+
+    case VA_FOURCC_I420:
+    case VA_FOURCC_YV12:
+    case VA_FOURCC_IMC1:
+    case VA_FOURCC_IMC3:
+        index = 1;
+        break;
+
+    case VA_FOURCC_YUY2:
+        index = 2;
+        break;
+
+    case VA_FOURCC_UYVY:
+        index = 3;
+        break;
+
+    case VA_FOURCC_RGBA:
+    case VA_FOURCC_RGBX:
+        index = 4;
+        break;
+
+    case VA_FOURCC_BGRA:
+    case VA_FOURCC_BGRX:
+        index = 5;
+        break;
+
+    default:
+        /* TODO: add support for other fourccs */
+        return;
+    }
+
+    gpe_context = &pp_context->clear_gpe_context;
+
+    gen8_gpe_context_init(ctx, gpe_context);
+    gen9_clear_surface_sample_state(ctx, gpe_context, obj_surface);
+    gen9_gpe_reset_binding_table(ctx, gpe_context);
+    gen9_clear_surface_curbe(ctx, gpe_context, obj_surface, color);
+    gen9_clear_surface_state(ctx, gpe_context, obj_surface);
+    gen8_gpe_setup_interface_data(ctx, gpe_context);
+
+    memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
+    kernel_walker_param.resolution_x = ALIGN(obj_surface->orig_width, 16) >> 4;
+    kernel_walker_param.resolution_y = ALIGN(obj_surface->orig_height, 16) >> 4;
+    kernel_walker_param.no_dependency = 1;
+
+    intel_vpp_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
+    media_object_walker_param.interface_offset = index;
+    gen9_run_kernel_media_object_walker(ctx,
+                                        pp_context->batch,
+                                        gpe_context,
+                                        &media_object_walker_param);
+}
author	Haihao Xiang <haihao.xiang@intel.com>	2018-05-15 13:02:22 +0800
committer	Xiang, Haihao <haihao.xiang@intel.com>	2018-07-03 14:19:04 +0800
commit	5bdb6754ae91c6d54290c541fc2aeaf20f6550e5 (patch)
tree	6e3cfd174aad387c7b47af983e07c4995505cfe0 /src/gen9_post_processing.c
parent	3ccc26bc0fdb5b12769c5d2feeafa8fa706a78ba (diff)
download	libva-intel-driver-5bdb6754ae91c6d54290c541fc2aeaf20f6550e5.tar.gz