diff options
Diffstat (limited to 'src/gen9_post_processing.c')
-rw-r--r-- | src/gen9_post_processing.c | 281 |
1 files changed, 275 insertions, 6 deletions
diff --git a/src/gen9_post_processing.c b/src/gen9_post_processing.c index efa82168..88d092b1 100644 --- a/src/gen9_post_processing.c +++ b/src/gen9_post_processing.c @@ -38,6 +38,7 @@ #include "intel_media.h" #include "gen8_post_processing.h" +#include "gen75_picture_process.h" #include "intel_gen_vppapi.h" #include "intel_common_vpp_internal.h" @@ -113,6 +114,10 @@ static const uint32_t pp_10bit_scaling_gen9[][4] = { #include "shaders/post_processing/gen9/conv_p010.g9b" }; +static const uint32_t pp_yuv420p8_scaling_gen9[][4] = { +#include "shaders/post_processing/gen9/conv_nv12.g9b" +}; + static struct pp_module pp_modules_gen9[] = { { { @@ -449,7 +454,7 @@ gen9_post_processing(VADriverContextP ctx, } static void -gen9_p010_scaling_sample_state(VADriverContextP ctx, +gen9_vpp_scaling_sample_state(VADriverContextP ctx, struct i965_gpe_context *gpe_context, VARectangle *src_rect, VARectangle *dst_rect) @@ -533,6 +538,45 @@ gen9_post_processing_context_init(VADriverContextP ctx, gen8_gpe_context_init(ctx, gpe_context); pp_context->scaling_context_initialized = 1; + + /* initialize the YUV420 8-Bit scaling context. The below is supported. + * NV12 ->NV12 + * NV12 ->I420 + * I420 ->I420 + * I420 ->NV12 + */ + gpe_context = &pp_context->scaling_yuv420p8_context; + memset(&scaling_kernel, 0, sizeof(scaling_kernel)); + scaling_kernel.bin = pp_yuv420p8_scaling_gen9; + scaling_kernel.size = sizeof(pp_yuv420p8_scaling_gen9); + gen8_gpe_load_kernels(ctx, gpe_context, &scaling_kernel, 1); + gpe_context->idrt.entry_size = ALIGN(sizeof(struct gen8_interface_descriptor_data), 64); + gpe_context->idrt.max_entries = 1; + gpe_context->sampler.entry_size = ALIGN(sizeof(struct gen8_sampler_state), 64); + gpe_context->sampler.max_entries = 1; + gpe_context->curbe.length = ALIGN(sizeof(struct scaling_input_parameter), 32); + + gpe_context->surface_state_binding_table.max_entries = MAX_SCALING_SURFACES; + gpe_context->surface_state_binding_table.binding_table_offset = 0; + gpe_context->surface_state_binding_table.surface_state_offset = ALIGN(MAX_SCALING_SURFACES * 4, 64); + gpe_context->surface_state_binding_table.length = ALIGN(MAX_SCALING_SURFACES * 4, 64) + ALIGN(MAX_SCALING_SURFACES * SURFACE_STATE_PADDED_SIZE_GEN9, 64); + + if (i965->intel.eu_total > 0) { + gpe_context->vfe_state.max_num_threads = i965->intel.eu_total * 6; + } else { + if (i965->intel.has_bsd2) + gpe_context->vfe_state.max_num_threads = 300; + else + gpe_context->vfe_state.max_num_threads = 60; + } + + gpe_context->vfe_state.curbe_allocation_size = 37; + gpe_context->vfe_state.urb_entry_size = 16; + gpe_context->vfe_state.num_urb_entries = 127; + gpe_context->vfe_state.gpgpu_mode = 0; + + gen8_gpe_context_init(ctx, gpe_context); + pp_context->scaling_8bit_initialized = VPPGPE_8BIT_420; return; } @@ -699,13 +743,13 @@ gen9_pp_context_get_surface_conf(VADriverContextP ctx, pitch[0] = obj_surface->width; bo_offset[0] = 0; - if (fourcc == VA_FOURCC_P010) { + if (fourcc == VA_FOURCC_P010 || fourcc == VA_FOURCC_NV12) { width[1] = width[0] / 2; height[1] = height[0] / 2; pitch[1] = obj_surface->cb_cr_pitch; bo_offset[1] = obj_surface->width * obj_surface->y_cb_offset; } else { - /* I010 format */ + /* I010/I420 format */ width[1] = width[0] / 2; height[1] = height[0] / 2; pitch[1] = obj_surface->cb_cr_pitch; @@ -726,13 +770,13 @@ gen9_pp_context_get_surface_conf(VADriverContextP ctx, pitch[0] = obj_image->image.pitches[0]; bo_offset[0] = obj_image->image.offsets[0]; - if (fourcc == VA_FOURCC_P010) { + if (fourcc == VA_FOURCC_P010 || fourcc == VA_FOURCC_NV12) { width[1] = width[0] / 2; height[1] = height[0] / 2; pitch[1] = obj_image->image.pitches[1]; bo_offset[1] = obj_image->image.offsets[1]; } else { - /* I010 format */ + /* I010/I420 format */ width[1] = width[0] / 2; height[1] = height[0] / 2; pitch[1] = obj_image->image.pitches[1]; @@ -887,7 +931,7 @@ gen9_p010_scaling_post_processing( gpe_context = &pp_context->scaling_10bit_context; gen8_gpe_context_init(ctx, gpe_context); - gen9_p010_scaling_sample_state(ctx, gpe_context, src_rect, dst_rect); + gen9_vpp_scaling_sample_state(ctx, gpe_context, src_rect, dst_rect); gen9_gpe_reset_binding_table(ctx, gpe_context); gen9_gpe_context_p010_scaling_curbe(ctx, gpe_context, src_rect, src_surface, @@ -912,3 +956,228 @@ gen9_p010_scaling_post_processing( return VA_STATUS_SUCCESS; } + +static void +gen9_gpe_context_yuv420p8_scaling_curbe(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + VARectangle *src_rect, + struct i965_surface *src_surface, + VARectangle *dst_rect, + struct i965_surface *dst_surface) +{ + struct scaling_input_parameter *scaling_curbe; + float src_width, src_height; + float coeff; + unsigned int fourcc; + + if ((gpe_context == NULL) || + (src_rect == NULL) || (src_surface == NULL) || + (dst_rect == NULL) || (dst_surface == NULL)) + return; + + scaling_curbe = i965_gpe_context_map_curbe(gpe_context); + + if (!scaling_curbe) + return; + + memset(scaling_curbe, 0, sizeof(struct scaling_input_parameter)); + + scaling_curbe->bti_input = BTI_SCALING_INPUT_Y; + scaling_curbe->bti_output = BTI_SCALING_OUTPUT_Y; + + /* As the src_rect/dst_rect is already checked, it is skipped.*/ + scaling_curbe->x_dst = dst_rect->x; + scaling_curbe->y_dst = dst_rect->y; + + src_width = src_rect->x + src_rect->width; + src_height = src_rect->y + src_rect->height; + + scaling_curbe->inv_width = 1 / src_width; + scaling_curbe->inv_height = 1 / src_height; + + coeff = (float) (src_rect->width) / dst_rect->width; + scaling_curbe->x_factor = coeff / src_width; + scaling_curbe->x_orig = (float)(src_rect->x) / src_width; + + coeff = (float) (src_rect->height) / dst_rect->height; + scaling_curbe->y_factor = coeff / src_height; + scaling_curbe->y_orig = (float)(src_rect->y) / src_height; + + fourcc = pp_get_surface_fourcc(ctx, src_surface); + if (fourcc == VA_FOURCC_NV12) { + scaling_curbe->dw7.src_packed = 1; + } + + fourcc = pp_get_surface_fourcc(ctx, dst_surface); + + if (fourcc == VA_FOURCC_NV12) { + scaling_curbe->dw7.dst_packed = 1; + } + + i965_gpe_context_unmap_curbe(gpe_context); +} + +static void +gen9_gpe_context_yuv420p8_scaling_surfaces(VADriverContextP ctx, + struct i965_gpe_context *gpe_context, + VARectangle *src_rect, + struct i965_surface *src_surface, + VARectangle *dst_rect, + struct i965_surface *dst_surface) +{ + unsigned int fourcc; + int width[3], height[3], pitch[3], bo_offset[3]; + dri_bo *bo; + struct object_surface *obj_surface; + struct object_image *obj_image; + int bti; + + if ((gpe_context == NULL) || + (src_rect == NULL) || (src_surface == NULL) || + (dst_rect == NULL) || (dst_surface == NULL)) + return; + + if (src_surface->base == NULL || dst_surface->base == NULL) + return; + + fourcc = pp_get_surface_fourcc(ctx, src_surface); + + if (src_surface->type == I965_SURFACE_TYPE_SURFACE) { + obj_surface = (struct object_surface *)src_surface->base; + bo = obj_surface->bo; + } else { + obj_image = (struct object_image *)src_surface->base; + bo = obj_image->bo; + } + + bti = 0; + if (gen9_pp_context_get_surface_conf(ctx, src_surface, src_rect, + width, height, pitch, + bo_offset)) { + bti = BTI_SCALING_INPUT_Y; + /* Input surface */ + gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo, + bo_offset[0], + width[0], height[0], + pitch[0], 0, + I965_SURFACEFORMAT_R8_UNORM, + bti, 0); + if (fourcc == VA_FOURCC_NV12) { + gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo, + bo_offset[1], + width[1], height[1], + pitch[1], 0, + I965_SURFACEFORMAT_R8G8_UNORM, + bti + 1, 0); + } else { + gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo, + bo_offset[1], + width[1], height[1], + pitch[1], 0, + I965_SURFACEFORMAT_R8_UNORM, + bti + 1, 0); + + gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo, + bo_offset[2], + width[2], height[2], + pitch[2], 0, + I965_SURFACEFORMAT_R8_UNORM, + bti + 2, 0); + } + } + + fourcc = pp_get_surface_fourcc(ctx, dst_surface); + + if (dst_surface->type == I965_SURFACE_TYPE_SURFACE) { + obj_surface = (struct object_surface *)dst_surface->base; + bo = obj_surface->bo; + } else { + obj_image = (struct object_image *)dst_surface->base; + bo = obj_image->bo; + } + + if (gen9_pp_context_get_surface_conf(ctx, dst_surface, dst_rect, + width, height, pitch, + bo_offset)) { + bti = BTI_SCALING_OUTPUT_Y; + /* Input surface */ + gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo, + bo_offset[0], + width[0], height[0], + pitch[0], 1, + I965_SURFACEFORMAT_R8_UINT, + bti, 0); + if (fourcc == VA_FOURCC_NV12) { + gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo, + bo_offset[1], + width[1] * 2, height[1], + pitch[1], 1, + I965_SURFACEFORMAT_R16_UINT, + bti + 1, 0); + } else { + gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo, + bo_offset[1], + width[1], height[1], + pitch[1], 1, + I965_SURFACEFORMAT_R8_UINT, + bti + 1, 0); + + gen9_add_dri_buffer_2d_gpe_surface(ctx, gpe_context, bo, + bo_offset[2], + width[2], height[2], + pitch[2], 1, + I965_SURFACEFORMAT_R8_UINT, + bti + 2, 0); + } + } + + return; +} + +VAStatus +gen9_yuv420p8_scaling_post_processing( + VADriverContextP ctx, + struct i965_post_processing_context *pp_context, + struct i965_surface *src_surface, + VARectangle *src_rect, + struct i965_surface *dst_surface, + VARectangle *dst_rect) +{ + struct i965_gpe_context *gpe_context; + struct gpe_media_object_walker_parameter media_object_walker_param; + struct intel_vpp_kernel_walker_parameter kernel_walker_param; + + if (!pp_context || !src_surface || !src_rect || !dst_surface || !dst_rect) + return VA_STATUS_ERROR_INVALID_PARAMETER; + + if (!(pp_context->scaling_8bit_initialized & VPPGPE_8BIT_420)) + return VA_STATUS_ERROR_UNIMPLEMENTED; + + gpe_context = &pp_context->scaling_yuv420p8_context; + + gen8_gpe_context_init(ctx, gpe_context); + gen9_vpp_scaling_sample_state(ctx, gpe_context, src_rect, dst_rect); + gen9_gpe_reset_binding_table(ctx, gpe_context); + gen9_gpe_context_yuv420p8_scaling_curbe(ctx, gpe_context, + src_rect, src_surface, + dst_rect, dst_surface); + + gen9_gpe_context_yuv420p8_scaling_surfaces(ctx, gpe_context, + src_rect, src_surface, + dst_rect, dst_surface); + + gen8_gpe_setup_interface_data(ctx, gpe_context); + + memset(&kernel_walker_param, 0, sizeof(kernel_walker_param)); + kernel_walker_param.resolution_x = ALIGN(dst_rect->width, 16) >> 4; + kernel_walker_param.resolution_y = ALIGN(dst_rect->height, 16) >> 4; + kernel_walker_param.no_dependency = 1; + + intel_vpp_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param); + + gen9_run_kernel_media_object_walker(ctx, pp_context->batch, + gpe_context, + &media_object_walker_param); + + return VA_STATUS_SUCCESS; +} |