summaryrefslogtreecommitdiff
path: root/src/gen9_post_processing.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gen9_post_processing.c')
-rw-r--r--src/gen9_post_processing.c281
1 files changed, 275 insertions, 6 deletions
diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c
index efa82168..88d092b1 100644
--- a/src/gen9_post_processing.c
+++ b/src/gen9_post_processing.c
@@ -38,6 +38,7 @@
#include "intel_media.h"
#include "gen8_post_processing.h"
+#include "gen75_picture_process.h"
#include "intel_gen_vppapi.h"
#include "intel_common_vpp_internal.h"
@@ -113,6 +114,10 @@ static const uint32_t pp_10bit_scaling_gen9[][4] = {
#include "shaders/post_processing/gen9/conv_p010.g9b"
};
+static const uint32_t pp_yuv420p8_scaling_gen9[][4] = {
+#include "shaders/post_processing/gen9/conv_nv12.g9b"
+};
+
static struct pp_module pp_modules_gen9[] = {
{
{
@@ -449,7 +454,7 @@ gen9_post_processing(VADriverContextP ctx,
}
static void
-gen9_p010_scaling_sample_state(VADriverContextP ctx,
+gen9_vpp_scaling_sample_state(VADriverContextP ctx,
struct i965_gpe_context *gpe_context,
VARectangle *src_rect,
VARectangle *dst_rect)
@@ -533,6 +538,45 @@ gen9_post_processing_context_init(VADriverContextP ctx,
gen8_gpe_context_init(ctx, gpe_context);
pp_context->scaling_context_initialized = 1;
+
+ /* initialize the YUV420 8-Bit scaling context. The below is supported.
+ * NV12 ->NV12
+ * NV12 ->I420
+ * I420 ->I420
+ * I420 ->NV12
+ */
+ gpe_context = &pp_context->scaling_yuv420p8_context;
+ memset(&scaling_kernel, 0, sizeof(scaling_kernel));
+ scaling_kernel.bin = pp_yuv420p8_scaling_gen9;
+ scaling_kernel.size = sizeof(pp_yuv420p8_scaling_gen9);
+ gen8_gpe_load_kernels(ctx, gpe_context, &scaling_kernel, 1);
+ gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64);
+ gpe_context->idrt.max_entries = 1;
+ gpe_context->sampler.entry_size = ALIGN(sizeof(struct gen8_sampler_state), 64);
+ gpe_context->sampler.max_entries = 1;
+ gpe_context->curbe.length = ALIGN(sizeof(struct scaling_input_parameter), 32);
+
+ gpe_context->surface_state_binding_table.max_entries = MAX_SCALING_SURFACES;
+ gpe_context->surface_state_binding_table.binding_table_offset = 0;
+ gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_SCALING_SURFACES * 4, 64);
+ gpe_context->surface_state_binding_table.length = ALIGN(MAX_SCALING_SURFACES * 4, 64) + ALIGN(MAX_SCALING_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64);
+
+ if (i965->intel.eu_total > 0) {
+ gpe_context->vfe_state.max_num_threads = i965->intel.eu_total * 6;
+ } else {
+ if (i965->intel.has_bsd2)
+ gpe_context->vfe_state.max_num_threads = 300;
+ else
+ gpe_context->vfe_state.max_num_threads = 60;
+ }
+
+ gpe_context->vfe_state.curbe_allocation_size = 37;
+ gpe_context->vfe_state.urb_entry_size = 16;
+ gpe_context->vfe_state.num_urb_entries = 127;
+ gpe_context->vfe_state.gpgpu_mode = 0;
+
+ gen8_gpe_context_init(ctx, gpe_context);
+ pp_context->scaling_8bit_initialized = VPPGPE_8BIT_420;
return;
}
@@ -699,13 +743,13 @@ gen9_pp_context_get_surface_conf(VADriverContextP ctx,
pitch[0] = obj_surface->width;
bo_offset[0] = 0;
- if (fourcc == VA_FOURCC_P010) {
+ if (fourcc == VA_FOURCC_P010 || fourcc == VA_FOURCC_NV12) {
width[1] = width[0] / 2;
height[1] = height[0] / 2;
pitch[1] = obj_surface->cb_cr_pitch;
bo_offset[1] = obj_surface->width * obj_surface->y_cb_offset;
} else {
- /* I010 format */
+ /* I010/I420 format */
width[1] = width[0] / 2;
height[1] = height[0] / 2;
pitch[1] = obj_surface->cb_cr_pitch;
@@ -726,13 +770,13 @@ gen9_pp_context_get_surface_conf(VADriverContextP ctx,
pitch[0] = obj_image->image.pitches[0];
bo_offset[0] = obj_image->image.offsets[0];
- if (fourcc == VA_FOURCC_P010) {
+ if (fourcc == VA_FOURCC_P010 || fourcc == VA_FOURCC_NV12) {
width[1] = width[0] / 2;
height[1] = height[0] / 2;
pitch[1] = obj_image->image.pitches[1];
bo_offset[1] = obj_image->image.offsets[1];
} else {
- /* I010 format */
+ /* I010/I420 format */
width[1] = width[0] / 2;
height[1] = height[0] / 2;
pitch[1] = obj_image->image.pitches[1];
@@ -887,7 +931,7 @@ gen9_p010_scaling_post_processing(
gpe_context = &pp_context->scaling_10bit_context;
gen8_gpe_context_init(ctx, gpe_context);
- gen9_p010_scaling_sample_state(ctx, gpe_context, src_rect, dst_rect);
+ gen9_vpp_scaling_sample_state(ctx, gpe_context, src_rect, dst_rect);
gen9_gpe_reset_binding_table(ctx, gpe_context);
gen9_gpe_context_p010_scaling_curbe(ctx, gpe_context,
src_rect, src_surface,
@@ -912,3 +956,228 @@ gen9_p010_scaling_post_processing(
return VA_STATUS_SUCCESS;
}
+
+static void
+gen9_gpe_context_yuv420p8_scaling_curbe(VADriverContextP ctx,
+ struct i965_gpe_context *gpe_context,
+ VARectangle *src_rect,
+ struct i965_surface *src_surface,
+ VARectangle *dst_rect,
+ struct i965_surface *dst_surface)
+{
+ struct scaling_input_parameter *scaling_curbe;
+ float src_width, src_height;
+ float coeff;
+ unsigned int fourcc;
+
+ if ((gpe_context == NULL) ||
+ (src_rect == NULL) || (src_surface == NULL) ||
+ (dst_rect == NULL) || (dst_surface == NULL))
+ return;
+
+ scaling_curbe = i965_gpe_context_map_curbe(gpe_context);
+
+ if (!scaling_curbe)
+ return;
+
+ memset(scaling_curbe, 0, sizeof(struct scaling_input_parameter));
+
+ scaling_curbe->bti_input = BTI_SCALING_INPUT_Y;
+ scaling_curbe->bti_output = BTI_SCALING_OUTPUT_Y;
+
+ /* As the src_rect/dst_rect is already checked, it is skipped.*/
+ scaling_curbe->x_dst = dst_rect->x;
+ scaling_curbe->y_dst = dst_rect->y;
+
+ src_width = src_rect->x + src_rect->width;
+ src_height = src_rect->y + src_rect->height;
+
+ scaling_curbe->inv_width = 1 / src_width;
+ scaling_curbe->inv_height = 1 / src_height;
+
+ coeff = (float) (src_rect->width) / dst_rect->width;
+ scaling_curbe->x_factor = coeff / src_width;
+ scaling_curbe->x_orig = (float)(src_rect->x) / src_width;
+
+ coeff = (float) (src_rect->height) / dst_rect->height;
+ scaling_curbe->y_factor = coeff / src_height;
+ scaling_curbe->y_orig = (float)(src_rect->y) / src_height;
+
+ fourcc = pp_get_surface_fourcc(ctx, src_surface);
+ if (fourcc == VA_FOURCC_NV12) {
+ scaling_curbe->dw7.src_packed = 1;
+ }
+
+ fourcc = pp_get_surface_fourcc(ctx, dst_surface);
+
+ if (fourcc == VA_FOURCC_NV12) {
+ scaling_curbe->dw7.dst_packed = 1;
+ }
+
+ i965_gpe_context_unmap_curbe(gpe_context);
+}
+
+static void
+gen9_gpe_context_yuv420p8_scaling_surfaces(VADriverContextP ctx,
+ struct i965_gpe_context *gpe_context,
+ VARectangle *src_rect,
+ struct i965_surface *src_surface,
+ VARectangle *dst_rect,
+ struct i965_surface *dst_surface)
+{
+ unsigned int fourcc;
+ int width[3], height[3], pitch[3], bo_offset[3];
+ dri_bo *bo;
+ struct object_surface *obj_surface;
+ struct object_image *obj_image;
+ int bti;
+
+ if ((gpe_context == NULL) ||
+ (src_rect == NULL) || (src_surface == NULL) ||
+ (dst_rect == NULL) || (dst_surface == NULL))
+ return;
+
+ if (src_surface->base == NULL || dst_surface->base == NULL)
+ return;
+
+ fourcc = pp_get_surface_fourcc(ctx, src_surface);
+
+ if (src_surface->type == I965_SURFACE_TYPE_SURFACE) {
+ obj_surface = (struct object_surface *)src_surface->base;
+ bo = obj_surface->bo;
+ } else {
+ obj_image = (struct object_image *)src_surface->base;
+ bo = obj_image->bo;
+ }
+
+ bti = 0;
+ if (gen9_pp_context_get_surface_conf(ctx, src_surface, src_rect,
+ width, height, pitch,
+ bo_offset)) {
+ bti = BTI_SCALING_INPUT_Y;
+ /* Input surface */
+ gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
+ bo_offset[0],
+ width[0], height[0],
+ pitch[0], 0,
+ I965_SURFACEFORMAT_R8_UNORM,
+ bti, 0);
+ if (fourcc == VA_FOURCC_NV12) {
+ gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
+ bo_offset[1],
+ width[1], height[1],
+ pitch[1], 0,
+ I965_SURFACEFORMAT_R8G8_UNORM,
+ bti + 1, 0);
+ } else {
+ gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
+ bo_offset[1],
+ width[1], height[1],
+ pitch[1], 0,
+ I965_SURFACEFORMAT_R8_UNORM,
+ bti + 1, 0);
+
+ gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
+ bo_offset[2],
+ width[2], height[2],
+ pitch[2], 0,
+ I965_SURFACEFORMAT_R8_UNORM,
+ bti + 2, 0);
+ }
+ }
+
+ fourcc = pp_get_surface_fourcc(ctx, dst_surface);
+
+ if (dst_surface->type == I965_SURFACE_TYPE_SURFACE) {
+ obj_surface = (struct object_surface *)dst_surface->base;
+ bo = obj_surface->bo;
+ } else {
+ obj_image = (struct object_image *)dst_surface->base;
+ bo = obj_image->bo;
+ }
+
+ if (gen9_pp_context_get_surface_conf(ctx, dst_surface, dst_rect,
+ width, height, pitch,
+ bo_offset)) {
+ bti = BTI_SCALING_OUTPUT_Y;
+ /* Input surface */
+ gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
+ bo_offset[0],
+ width[0], height[0],
+ pitch[0], 1,
+ I965_SURFACEFORMAT_R8_UINT,
+ bti, 0);
+ if (fourcc == VA_FOURCC_NV12) {
+ gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
+ bo_offset[1],
+ width[1] * 2, height[1],
+ pitch[1], 1,
+ I965_SURFACEFORMAT_R16_UINT,
+ bti + 1, 0);
+ } else {
+ gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
+ bo_offset[1],
+ width[1], height[1],
+ pitch[1], 1,
+ I965_SURFACEFORMAT_R8_UINT,
+ bti + 1, 0);
+
+ gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo,
+ bo_offset[2],
+ width[2], height[2],
+ pitch[2], 1,
+ I965_SURFACEFORMAT_R8_UINT,
+ bti + 2, 0);
+ }
+ }
+
+ return;
+}
+
+VAStatus
+gen9_yuv420p8_scaling_post_processing(
+ VADriverContextP ctx,
+ struct i965_post_processing_context *pp_context,
+ struct i965_surface *src_surface,
+ VARectangle *src_rect,
+ struct i965_surface *dst_surface,
+ VARectangle *dst_rect)
+{
+ struct i965_gpe_context *gpe_context;
+ struct gpe_media_object_walker_parameter media_object_walker_param;
+ struct intel_vpp_kernel_walker_parameter kernel_walker_param;
+
+ if (!pp_context || !src_surface || !src_rect || !dst_surface || !dst_rect)
+ return VA_STATUS_ERROR_INVALID_PARAMETER;
+
+ if (!(pp_context->scaling_8bit_initialized & VPPGPE_8BIT_420))
+ return VA_STATUS_ERROR_UNIMPLEMENTED;
+
+ gpe_context = &pp_context->scaling_yuv420p8_context;
+
+ gen8_gpe_context_init(ctx, gpe_context);
+ gen9_vpp_scaling_sample_state(ctx, gpe_context, src_rect, dst_rect);
+ gen9_gpe_reset_binding_table(ctx, gpe_context);
+ gen9_gpe_context_yuv420p8_scaling_curbe(ctx, gpe_context,
+ src_rect, src_surface,
+ dst_rect, dst_surface);
+
+ gen9_gpe_context_yuv420p8_scaling_surfaces(ctx, gpe_context,
+ src_rect, src_surface,
+ dst_rect, dst_surface);
+
+ gen8_gpe_setup_interface_data(ctx, gpe_context);
+
+ memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
+ kernel_walker_param.resolution_x = ALIGN(dst_rect->width, 16) >> 4;
+ kernel_walker_param.resolution_y = ALIGN(dst_rect->height, 16) >> 4;
+ kernel_walker_param.no_dependency = 1;
+
+ intel_vpp_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
+
+ gen9_run_kernel_media_object_walker(ctx, pp_context->batch,
+ gpe_context,
+ &media_object_walker_param);
+
+ return VA_STATUS_SUCCESS;
+}