From 73aed29c640fb911bb88344369cfd80bf7169d6c Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 7 Jun 2016 08:56:41 -0400 Subject: Restrict the VP9 HW encoding for Profile0 Fix the issue that VP9 HW encoding is reported incorrectly for VP9 Profile2. Signed-off-by: Zhao Yakui --- src/i965_drv_video.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 0a337f44..6c88be7a 100644 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -735,7 +735,7 @@ i965_QueryConfigEntrypoints(VADriverContextP ctx, if(HAS_VP9_DECODING_PROFILE(i965, profile)) entrypoint_list[n++] = VAEntrypointVLD; - if (HAS_VP9_ENCODING(i965)) + if (HAS_VP9_ENCODING(i965) && (profile == VAProfileVP9Profile0)) entrypoint_list[n++] = VAEntrypointEncSlice; if(profile == VAProfileVP9Profile0) { -- cgit v1.2.1 From 5d528baf61d392d84e846e22bc7c4474f02c4050 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 7 Jun 2016 08:56:42 -0400 Subject: Fix the potential NULL issue Signed-off-by: Zhao Yakui --- src/gen9_vp9_encoder.c | 99 +++++++++++++++++++++++++++++++++++++++++++++----- src/gen9_vp9_encoder.h | 1 + 2 files changed, 90 insertions(+), 10 deletions(-) diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c index e6b15433..eecd206a 100644 --- a/src/gen9_vp9_encoder.c +++ b/src/gen9_vp9_encoder.c @@ -272,6 +272,11 @@ gen9_vp9_init_check_surfaces(VADriverContextP ctx, &vp9_surface->scaled_4x_surface_id); vp9_surface->scaled_4x_surface_obj = SURFACE(vp9_surface->scaled_4x_surface_id); + + if (!vp9_surface->scaled_4x_surface_obj) { + return VA_STATUS_ERROR_ALLOCATION_FAILED; + } + i965_check_alloc_surface_bo(ctx, vp9_surface->scaled_4x_surface_obj, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420); @@ -284,6 +289,11 @@ gen9_vp9_init_check_surfaces(VADriverContextP ctx, 1, &vp9_surface->scaled_16x_surface_id); vp9_surface->scaled_16x_surface_obj = SURFACE(vp9_surface->scaled_16x_surface_id); + + if (!vp9_surface->scaled_16x_surface_obj) { + return VA_STATUS_ERROR_ALLOCATION_FAILED; + } + i965_check_alloc_surface_bo(ctx, vp9_surface->scaled_16x_surface_obj, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420); @@ -344,6 +354,11 @@ gen9_vp9_check_dys_surfaces(VADriverContextP ctx, 1, &vp9_surface->dys_surface_id); vp9_surface->dys_surface_obj = SURFACE(vp9_surface->dys_surface_id); + + if (!vp9_surface->dys_surface_obj) { + return VA_STATUS_ERROR_ALLOCATION_FAILED; + } + i965_check_alloc_surface_bo(ctx, vp9_surface->dys_surface_obj, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420); @@ -358,6 +373,11 @@ gen9_vp9_check_dys_surfaces(VADriverContextP ctx, &vp9_surface->dys_4x_surface_id); vp9_surface->dys_4x_surface_obj = SURFACE(vp9_surface->dys_4x_surface_id); + + if (!vp9_surface->dys_4x_surface_obj) { + return VA_STATUS_ERROR_ALLOCATION_FAILED; + } + i965_check_alloc_surface_bo(ctx, vp9_surface->dys_4x_surface_obj, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420); @@ -370,6 +390,11 @@ gen9_vp9_check_dys_surfaces(VADriverContextP ctx, 1, &vp9_surface->dys_16x_surface_id); vp9_surface->dys_16x_surface_obj = SURFACE(vp9_surface->dys_16x_surface_id); + + if (!vp9_surface->dys_16x_surface_obj) { + return VA_STATUS_ERROR_ALLOCATION_FAILED; + } + i965_check_alloc_surface_bo(ctx, vp9_surface->dys_16x_surface_obj, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420); @@ -1135,6 +1160,10 @@ void gen9_vp9_set_curbe_brc(VADriverContextP ctx, segment_param = param->psegment_param; cmd = gen8p_gpe_context_map_curbe(gpe_context); + + if (!cmd) + return; + memset(cmd, 0, sizeof(vp9_brc_curbe_data)); if (!vp9_state->dys_enabled) @@ -1548,7 +1577,7 @@ intel_vp9enc_construct_picstate_batchbuf(VADriverContextP ctx, pdata = i965_map_gpe_resource(gpe_resource); vp9_state = (struct gen9_vp9_state *) encoder_context->enc_priv_state; - if (!vp9_state || !vp9_state->pic_param) + if (!vp9_state || !vp9_state->pic_param || !pdata) return; pic_param = vp9_state->pic_param; @@ -1954,6 +1983,10 @@ gen9_vp9_brc_update_kernel(VADriverContextP ctx, { char *brc_const_buffer; brc_const_buffer = i965_map_gpe_resource(&vme_context->res_brc_const_data_buffer); + + if (!brc_const_buffer) + return VA_STATUS_ERROR_OPERATION_FAILED; + if (vp9_state->picture_coding_type) memcpy(brc_const_buffer, vp9_brc_const_data_p_g9, sizeof(vp9_brc_const_data_p_g9)); @@ -2025,6 +2058,10 @@ void gen9_vp9_set_curbe_me(VADriverContextP ctx, enc_media_state = VP9_MEDIA_STATE_4X_ME; me_cmd = gen8p_gpe_context_map_curbe(gpe_context); + + if (!me_cmd) + return; + memset(me_cmd, 0, sizeof(vp9_me_curbe_data)); me_cmd->dw1.max_num_mvs = 0x10; @@ -2362,6 +2399,10 @@ gen9_vp9_set_curbe_scaling_cm(VADriverContextP ctx, vp9_scaling4x_curbe_data_cm *curbe_cmd; curbe_cmd = gen8p_gpe_context_map_curbe(gpe_context); + + if (!curbe_cmd) + return; + memset(curbe_cmd, 0, sizeof(vp9_scaling4x_curbe_data_cm)); curbe_cmd->dw0.input_picture_width = curbe_param->input_picture_width; @@ -2549,8 +2590,15 @@ static void gen9_vp9_dys_set_sampler_state(struct i965_gpe_context *gpe_context) { struct gen9_sampler_8x8_avs *sampler_cmd; + + if (!gpe_context) + return; + dri_bo_map(gpe_context->dynamic_state.bo, 1); + if (!gpe_context->dynamic_state.bo->virtual) + return; + sampler_cmd = (struct gen9_sampler_8x8_avs *) (gpe_context->dynamic_state.bo->virtual + gpe_context->sampler_offset); @@ -2623,6 +2671,10 @@ gen9_vp9_set_curbe_dys(VADriverContextP ctx, vp9_dys_curbe_data *curbe_cmd; curbe_cmd = gen8p_gpe_context_map_curbe(gpe_context); + + if (!curbe_cmd) + return; + memset(curbe_cmd, 0, sizeof(vp9_dys_curbe_data)); curbe_cmd->dw0.input_frame_width = curbe_param->input_width; @@ -2780,7 +2832,8 @@ gen9_vp9_run_dys_refframes(VADriverContextP ctx, &dys_kernel_param); } - if (vp9_state->dys_ref_frame_flag & VP9_LAST_REF) { + if ((vp9_state->dys_ref_frame_flag & VP9_LAST_REF) && + vp9_state->last_ref_obj) { obj_surface = vp9_state->last_ref_obj; vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data); @@ -2832,7 +2885,8 @@ gen9_vp9_run_dys_refframes(VADriverContextP ctx, } } - if (vp9_state->dys_ref_frame_flag & VP9_GOLDEN_REF) { + if ((vp9_state->dys_ref_frame_flag & VP9_GOLDEN_REF) && + vp9_state->golden_ref_obj) { obj_surface = vp9_state->golden_ref_obj; vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data); @@ -2884,7 +2938,8 @@ gen9_vp9_run_dys_refframes(VADriverContextP ctx, } } - if (vp9_state->dys_ref_frame_flag & VP9_ALT_REF) { + if ((vp9_state->dys_ref_frame_flag & VP9_ALT_REF) && + vp9_state->alt_ref_obj) { obj_surface = vp9_state->alt_ref_obj; vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data); @@ -2969,6 +3024,10 @@ gen9_vp9_set_curbe_mbenc(VADriverContextP ctx, } curbe_cmd = gen8p_gpe_context_map_curbe(gpe_context); + + if (!curbe_cmd) + return; + memset(curbe_cmd, 0, sizeof(vp9_mbenc_curbe_data)); if (vp9_state->dys_in_use) @@ -3843,6 +3902,10 @@ gen9_encode_vp9_check_parameter(VADriverContextP ctx, encode_state->seq_param_ext->buffer) seq_param = (VAEncSequenceParameterBufferVP9 *)encode_state->seq_param_ext->buffer; + if (!seq_param) { + seq_param = &vp9_state->bogus_seq_param; + } + vp9_state->pic_param = pic_param; vp9_state->segment_param = seg_param; vp9_state->seq_param = seq_param; @@ -4051,7 +4114,8 @@ gen9_encode_vp9_check_parameter(VADriverContextP ctx, !pic_param->pic_flags.bits.intra_only) { vp9_state->dys_ref_frame_flag = vp9_state->ref_frame_flag; - if (vp9_state->ref_frame_flag & VP9_LAST_REF) { + if ((vp9_state->ref_frame_flag & VP9_LAST_REF) && + vp9_state->last_ref_obj) { obj_surface = vp9_state->last_ref_obj; vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data); @@ -4059,7 +4123,8 @@ gen9_encode_vp9_check_parameter(VADriverContextP ctx, vp9_state->frame_height == vp9_priv_surface->frame_height) vp9_state->dys_ref_frame_flag &= ~(VP9_LAST_REF); } - if (vp9_state->ref_frame_flag & VP9_GOLDEN_REF) { + if ((vp9_state->ref_frame_flag & VP9_GOLDEN_REF) && + vp9_state->golden_ref_obj) { obj_surface = vp9_state->golden_ref_obj; vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data); @@ -4067,7 +4132,8 @@ gen9_encode_vp9_check_parameter(VADriverContextP ctx, vp9_state->frame_height == vp9_priv_surface->frame_height) vp9_state->dys_ref_frame_flag &= ~(VP9_GOLDEN_REF); } - if (vp9_state->ref_frame_flag & VP9_ALT_REF) { + if ((vp9_state->ref_frame_flag & VP9_ALT_REF) && + vp9_state->alt_ref_obj) { obj_surface = vp9_state->alt_ref_obj; vp9_priv_surface = (struct gen9_surface_vp9 *)(obj_surface->private_data); @@ -4214,7 +4280,8 @@ gen9_vme_gpe_kernel_prepare_vp9(VADriverContextP ctx, } if (vp9_state->dys_ref_frame_flag) { - if (vp9_state->dys_ref_frame_flag & VP9_LAST_REF) { + if ((vp9_state->dys_ref_frame_flag & VP9_LAST_REF) && + vp9_state->last_ref_obj) { obj_surface = vp9_state->last_ref_obj; surface_param.frame_width = vp9_state->frame_width; surface_param.frame_height = vp9_state->frame_height; @@ -4225,7 +4292,8 @@ gen9_vme_gpe_kernel_prepare_vp9(VADriverContextP ctx, if (va_status) return va_status; } - if (vp9_state->dys_ref_frame_flag & VP9_GOLDEN_REF) { + if ((vp9_state->dys_ref_frame_flag & VP9_GOLDEN_REF) && + vp9_state->golden_ref_obj) { obj_surface = vp9_state->golden_ref_obj; surface_param.frame_width = vp9_state->frame_width; surface_param.frame_height = vp9_state->frame_height; @@ -4236,7 +4304,8 @@ gen9_vme_gpe_kernel_prepare_vp9(VADriverContextP ctx, if (va_status) return va_status; } - if (vp9_state->dys_ref_frame_flag & VP9_ALT_REF) { + if ((vp9_state->dys_ref_frame_flag & VP9_ALT_REF) && + vp9_state->alt_ref_obj) { obj_surface = vp9_state->alt_ref_obj; surface_param.frame_width = vp9_state->frame_width; surface_param.frame_height = vp9_state->frame_height; @@ -4843,6 +4912,9 @@ intel_vp9enc_refresh_frame_internal_buffers(VADriverContextP ctx, i965_zero_gpe_resource(&pak_context->res_compressed_input_buffer); buffer = i965_map_gpe_resource(&pak_context->res_compressed_input_buffer); + if (!buffer) + return; + /* write tx_size */ if ((pic_param->luma_ac_qindex == 0) && (pic_param->luma_dc_qindex_delta == 0) && @@ -5357,6 +5429,9 @@ intel_vp9enc_construct_pak_insertobj_batchbuffer(VADriverContextP ctx, uncompressed_header_length = vp9_state->header_length; cmd_ptr = i965_map_gpe_resource(obj_batch_buffer); + if (!cmd_ptr) + return; + bits_in_last_dw = uncompressed_header_length % 4; bits_in_last_dw *= 8; @@ -5437,6 +5512,10 @@ gen9_vp9_pak_picture_level(VADriverContextP ctx, uint8_t *prob_ptr; prob_ptr = i965_map_gpe_resource(&pak_context->res_prob_buffer); + + if (!prob_ptr) + return; + /* copy the current fc to vp9_prob buffer */ memcpy(prob_ptr, &vp9_state->vp9_current_fc, sizeof(FRAME_CONTEXT)); if ((pic_param->pic_flags.bits.frame_type == HCP_VP9_KEY_FRAME) || diff --git a/src/gen9_vp9_encoder.h b/src/gen9_vp9_encoder.h index ccc9f80c..8034240c 100644 --- a/src/gen9_vp9_encoder.h +++ b/src/gen9_vp9_encoder.h @@ -1908,6 +1908,7 @@ struct gen9_vp9_state { struct object_surface *alt_ref_obj; VAEncSequenceParameterBufferVP9 *seq_param; + VAEncSequenceParameterBufferVP9 bogus_seq_param; VAEncPictureParameterBufferVP9 *pic_param; VAEncMiscParameterTypeVP9PerSegmantParam *segment_param; double brc_init_current_target_buf_full_in_bits; -- cgit v1.2.1 From 0d8d3bd00e614d0391889c6b3ea265895771f298 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Fri, 3 Jun 2016 00:57:58 +0800 Subject: Update NEWS Signed-off-by: Xiang, Haihao --- NEWS | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/NEWS b/NEWS index 1677ee0a..fc2f47f0 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,15 @@ -libva-intel-driver NEWS -- summary of changes. 2016-03-15 -Copyright (C) 2009-2015 Intel Corporation +libva-intel-driver NEWS -- summary of changes. 2016-06-xx +Copyright (C) 2009-2016 Intel Corporation + +Version 1.7.1 - DD.Jun.2016 +* Add support VP9 8bit encoding on KBL +* Add support for low-power/high-performance H.264 encoder on SKL +* Fix incorrect color space conversion in driver + (https://bugs.freedesktop.org/show_bug.cgi?id=94845) +* Fix FPS caculation for HEVC encoder +* Fix VP9 10bit decoding issue on KBL +* Fix the noise issue when VA_FILTER_SCALING_HQ upscaling from 1280x720 to 1920x1080 with YUY2 format on BDW+ + (https://bugs.freedesktop.org/show_bug.cgi?id=94765) Version 1.7.0 - 15.Mar.2016 * Add support for Kabylake -- cgit v1.2.1 From 396224348ae47f3093e79dc84a7d4f1f1513ba55 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Tue, 21 Jun 2016 09:51:33 +0800 Subject: libva-intel-driver 1.7.1 Signed-off-by: Xiang, Haihao --- NEWS | 4 ++-- configure.ac | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/NEWS b/NEWS index fc2f47f0..6a78f56a 100644 --- a/NEWS +++ b/NEWS @@ -1,7 +1,7 @@ -libva-intel-driver NEWS -- summary of changes. 2016-06-xx +libva-intel-driver NEWS -- summary of changes. 2016-06-21 Copyright (C) 2009-2016 Intel Corporation -Version 1.7.1 - DD.Jun.2016 +Version 1.7.1 - 21.Jun.2016 * Add support VP9 8bit encoding on KBL * Add support for low-power/high-performance H.264 encoder on SKL * Fix incorrect color space conversion in driver diff --git a/configure.ac b/configure.ac index 3f6157fb..f56a9f42 100644 --- a/configure.ac +++ b/configure.ac @@ -2,7 +2,7 @@ m4_define([intel_driver_major_version], [1]) m4_define([intel_driver_minor_version], [7]) m4_define([intel_driver_micro_version], [1]) -m4_define([intel_driver_pre_version], [1]) +m4_define([intel_driver_pre_version], [0]) m4_define([intel_driver_version], [intel_driver_major_version.intel_driver_minor_version.intel_driver_micro_version]) m4_if(intel_driver_pre_version, [0], [], [ @@ -11,7 +11,7 @@ m4_append([intel_driver_version], intel_driver_pre_version, [.pre]) # libva minimum version requirement m4_define([va_api_version], [0.39.2]) -m4_define([libva_package_version], [1.7.0]) +m4_define([libva_package_version], [1.7.1]) # libdrm minimum version requirement m4_define([libdrm_version], [2.4.52]) -- cgit v1.2.1 From 19d26a4f93a3e3d8bd1e74d7f7a7c1e8c0ce82ba Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Fri, 3 Jun 2016 01:05:47 +0800 Subject: 1.7.2.pre1 for development Signed-off-by: Xiang, Haihao (cherry picked from commit 3b84d9866f6eeaa013c75e6e5e99e8b5bdeb0e94) Conflicts: configure.ac --- configure.ac | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index f56a9f42..6e555e37 100644 --- a/configure.ac +++ b/configure.ac @@ -1,8 +1,8 @@ # intel-driver package version number m4_define([intel_driver_major_version], [1]) m4_define([intel_driver_minor_version], [7]) -m4_define([intel_driver_micro_version], [1]) -m4_define([intel_driver_pre_version], [0]) +m4_define([intel_driver_micro_version], [2]) +m4_define([intel_driver_pre_version], [1]) m4_define([intel_driver_version], [intel_driver_major_version.intel_driver_minor_version.intel_driver_micro_version]) m4_if(intel_driver_pre_version, [0], [], [ -- cgit v1.2.1 From 0e6516c5cbc3a1e7995a7858c5fbd14b7c0411e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADctor=20Manuel=20J=C3=A1quez=20Leal?= Date: Fri, 3 Jun 2016 12:48:09 +0200 Subject: Fix the alpha mask at getting derive images MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The alpha mask is set to 0x0 when getting derived images, regardless the alpha channel in the RGB format. But, When RGBx, the x means an alpha mask of 0x00000000 When RGBA, the A means an alpha mask of 0xff000000 This patch set the alpha mask correctly. Signed-off-by: Víctor Manuel Jáquez Leal (cherry picked from commit e656d84dc512dc899dda4b9a8121b48f5148fc4b) --- src/i965_drv_video.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 6c88be7a..efac5a55 100644 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -4351,14 +4351,12 @@ VAStatus i965_DeriveImage(VADriverContextP ctx, image->format.red_mask = 0x000000ff; image->format.green_mask = 0x0000ff00; image->format.blue_mask = 0x00ff0000; - image->format.alpha_mask = 0x00000000; break; case VA_FOURCC_BGRA: case VA_FOURCC_BGRX: image->format.red_mask = 0x00ff0000; image->format.green_mask = 0x0000ff00; image->format.blue_mask = 0x000000ff; - image->format.alpha_mask = 0x00000000; break; default: goto error; @@ -4367,10 +4365,12 @@ VAStatus i965_DeriveImage(VADriverContextP ctx, switch (image->format.fourcc) { case VA_FOURCC_RGBA: case VA_FOURCC_BGRA: + image->format.alpha_mask = 0xff000000; image->format.depth = 32; break; case VA_FOURCC_RGBX: case VA_FOURCC_BGRX: + image->format.alpha_mask = 0x00000000; image->format.depth = 24; break; default: -- cgit v1.2.1 From c6f1a176183ffffa4685adffadff7d481b8fa4db Mon Sep 17 00:00:00 2001 From: "U. Artie Eoff" Date: Tue, 7 Jun 2016 13:28:02 -0700 Subject: i965_drv: add support for per-codec max resolution Add a functor to hw_codec_info to allow each hw instance to report maximum resolution on a per-codec basis. Signed-off-by: U. Artie Eoff (cherry picked from commit 39ebba0d61657a7c306cc8fd1c5780f3bfc8ba30) --- src/i965_drv_video.c | 32 +++++++++++++++++++++++++++----- src/i965_drv_video.h | 9 +++++++++ 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index efac5a55..66cdb9e5 100644 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -2130,6 +2130,20 @@ i965_destroy_context(struct object_heap *heap, struct object_base *obj) object_heap_free(heap, obj); } +static inline void +max_resolution(struct i965_driver_data *i965, + struct object_config *obj_config, + int *w, /* out */ + int *h) /* out */ +{ + if (i965->codec_info->max_resolution) { + i965->codec_info->max_resolution(i965, obj_config, w, h); + } else { + *w = i965->codec_info->max_width; + *h = i965->codec_info->max_height; + } +} + VAStatus i965_CreateContext(VADriverContextP ctx, VAConfigID config_id, @@ -2147,14 +2161,18 @@ i965_CreateContext(VADriverContextP ctx, VAStatus vaStatus = VA_STATUS_SUCCESS; int contextID; int i; + int max_width; + int max_height; if (NULL == obj_config) { vaStatus = VA_STATUS_ERROR_INVALID_CONFIG; return vaStatus; } - if (picture_width > i965->codec_info->max_width || - picture_height > i965->codec_info->max_height) { + max_resolution(i965, obj_config, &max_width, &max_height); + + if (picture_width > max_width || + picture_height > max_height) { vaStatus = VA_STATUS_ERROR_RESOLUTION_NOT_SUPPORTED; return vaStatus; } @@ -5485,7 +5503,9 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx, struct object_config *obj_config; int i = 0; VASurfaceAttrib *attribs = NULL; - + int max_width; + int max_height; + if (config == VA_INVALID_ID) return VA_STATUS_ERROR_INVALID_CONFIG; @@ -5873,16 +5893,18 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx, attribs[i].value.value.p = NULL; /* ignore */ i++; + max_resolution(i965, obj_config, &max_width, &max_height); + attribs[i].type = VASurfaceAttribMaxWidth; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE; - attribs[i].value.value.i = i965->codec_info->max_width; + attribs[i].value.value.i = max_width; i++; attribs[i].type = VASurfaceAttribMaxHeight; attribs[i].value.type = VAGenericValueTypeInteger; attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE; - attribs[i].value.value.i = i965->codec_info->max_height; + attribs[i].value.value.i = max_height; i++; if (i > *num_attribs) { diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index b8d61a16..47e27d0f 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -354,6 +354,8 @@ struct i965_filter int ring; }; +struct i965_driver_data; + struct hw_codec_info { struct hw_context *(*dec_hw_context_init)(VADriverContextP, struct object_config *); @@ -363,6 +365,13 @@ struct hw_codec_info void (*post_processing_context_init)(VADriverContextP, void *, struct intel_batchbuffer *); void (*preinit_hw_codec)(VADriverContextP, struct hw_codec_info *); + /** + * Allows HW info to support per-codec max resolution. If this functor is + * not initialized, then @max_width and @max_height will be used as the + * default maximum resolution for all codecs on this HW info. + */ + void (*max_resolution)(struct i965_driver_data *, struct object_config *, int *, int *); + int max_width; int max_height; int min_linear_wpitch; -- cgit v1.2.1 From ebeccdfbbc21aa1a6c7e91e7487f36d4466b262e Mon Sep 17 00:00:00 2001 From: "U. Artie Eoff" Date: Tue, 7 Jun 2016 13:28:03 -0700 Subject: jpeg enc/dec gen9: Allow up to 8K JPEG max resolution on gen9 Allow up to 8K * 8K resolution for JPEG encode and decode on gen9 HW (SKL,BXT,KBL). Signed-off-by: U. Artie Eoff (cherry picked from commit 8b1e436793360880f54402161a9f469e9b42e143) --- src/gen9_mfd.c | 14 ++++++++++++++ src/i965_device_info.c | 16 ++++++++++------ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/src/gen9_mfd.c b/src/gen9_mfd.c index f178c03f..5f425149 100644 --- a/src/gen9_mfd.c +++ b/src/gen9_mfd.c @@ -1989,3 +1989,17 @@ gen9_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config) return gen8_dec_hw_context_init(ctx, obj_config); } } + +void gen9_max_resolution(struct i965_driver_data *i965, + struct object_config *obj_config, + int *w, /* out */ + int *h) /* out */ +{ + if (obj_config->profile == VAProfileJPEGBaseline) { + *w = 8192; + *h = 8192; + } else { + *w = i965->codec_info->max_width; + *h = i965->codec_info->max_height; + } +} diff --git a/src/i965_device_info.c b/src/i965_device_info.c index 96ea43c9..239961cd 100644 --- a/src/i965_device_info.c +++ b/src/i965_device_info.c @@ -321,15 +321,17 @@ static struct hw_codec_info chv_hw_codec_info = { extern struct hw_context *gen9_enc_hw_context_init(VADriverContextP, struct object_config *); extern void gen9_post_processing_context_init(VADriverContextP, void *, struct intel_batchbuffer *); +extern void gen9_max_resolution(struct i965_driver_data *, struct object_config *, int *, int *); static struct hw_codec_info skl_hw_codec_info = { .dec_hw_context_init = gen9_dec_hw_context_init, .enc_hw_context_init = gen9_enc_hw_context_init, .proc_hw_context_init = gen75_proc_context_init, .render_init = gen9_render_init, .post_processing_context_init = gen9_post_processing_context_init, + .max_resolution = gen9_max_resolution, - .max_width = 4096, - .max_height = 4096, + .max_width = 4096, /* default. See max_resolution */ + .max_height = 4096, /* default. See max_resolution */ .min_linear_wpitch = 128, .min_linear_hpitch = 16, @@ -378,9 +380,10 @@ static struct hw_codec_info bxt_hw_codec_info = { .proc_hw_context_init = gen75_proc_context_init, .render_init = gen9_render_init, .post_processing_context_init = gen9_post_processing_context_init, + .max_resolution = gen9_max_resolution, - .max_width = 4096, - .max_height = 4096, + .max_width = 4096, /* default. See max_resolution */ + .max_height = 4096, /* default. See max_resolution */ .min_linear_wpitch = 128, .min_linear_hpitch = 16, @@ -430,9 +433,10 @@ static struct hw_codec_info kbl_hw_codec_info = { .proc_hw_context_init = gen75_proc_context_init, .render_init = gen9_render_init, .post_processing_context_init = gen9_post_processing_context_init, + .max_resolution = gen9_max_resolution, - .max_width = 4096, - .max_height = 4096, + .max_width = 4096, /* default. See max_resolution */ + .max_height = 4096, /* default. See max_resolution */ .min_linear_wpitch = 128, .min_linear_hpitch = 16, -- cgit v1.2.1 From 572cca30765b28c199fd676510dad45d25dda930 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 30 May 2016 09:55:58 -0400 Subject: Encoding: Encoding reuses aux_batchbuffer instead of allocate new buffer Signed-off-by: Zhao Yakui (cherry picked from commit 79f2bd8d76392331a3033ae4b8dc8edb3642b90f) --- src/gen8_mfc.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index c30bad6d..60e6362f 100644 --- a/src/gen8_mfc.c +++ b/src/gen8_mfc.c @@ -1244,23 +1244,12 @@ static void gen8_mfc_batchbuffer_surfaces_output(VADriverContextP ctx, struct encode_state *encode_state, struct intel_encoder_context *encoder_context) - { - struct i965_driver_data *i965 = i965_driver_data(ctx); struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; - VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; - int width_in_mbs = pSequenceParameter->picture_width_in_mbs; - int height_in_mbs = pSequenceParameter->picture_height_in_mbs; - mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1; - mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */ - mfc_context->mfc_batchbuffer_surface.pitch = 16; - mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr, - "MFC batchbuffer", - mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block, - 0x1000); + assert(mfc_context->aux_batchbuffer_surface.bo); mfc_context->buffer_suface_setup(ctx, &mfc_context->gpe_context, - &mfc_context->mfc_batchbuffer_surface, + &mfc_context->aux_batchbuffer_surface, BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER), SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER)); } @@ -1602,10 +1591,10 @@ gen8_mfc_avc_hardware_batchbuffer(VADriverContextP ctx, { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo); gen8_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context); - dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo); - return mfc_context->mfc_batchbuffer_surface.bo; + return mfc_context->aux_batchbuffer_surface.bo; } #endif @@ -3712,8 +3701,10 @@ static void gen8_mfc_vp8_init(VADriverContextP ctx, dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo); mfc_context->aux_batchbuffer_surface.bo = NULL; - if (mfc_context->aux_batchbuffer) + if (mfc_context->aux_batchbuffer) { intel_batchbuffer_free(mfc_context->aux_batchbuffer); + mfc_context->aux_batchbuffer = NULL; + } mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size); mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer; -- cgit v1.2.1 From 4b6ec732769ef057bd013062303e951d01182208 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 30 May 2016 09:55:59 -0400 Subject: Encoding: H264 uses the GPU to construct the PAK obj command on Gen8+ This is helpful to reduce the waiting time when preparing the command buffer of PAK object. Signed-off-by: Zhao Yakui (cherry picked from commit 7b823b8fe7d3d4b166852b8714abe52d4344d0fc) --- src/gen8_mfc.c | 388 +++++++++++++++--------------- src/shaders/utils/Makefile.am | 30 ++- src/shaders/utils/mfc_batchbuffer_hsw.g8a | 28 +++ src/shaders/utils/mfc_batchbuffer_hsw.g8b | 105 ++++++++ src/shaders/utils/mfc_batchbuffer_hsw.g9a | 28 +++ src/shaders/utils/mfc_batchbuffer_hsw.g9b | 105 ++++++++ 6 files changed, 481 insertions(+), 203 deletions(-) create mode 100644 src/shaders/utils/mfc_batchbuffer_hsw.g8a create mode 100644 src/shaders/utils/mfc_batchbuffer_hsw.g8b create mode 100644 src/shaders/utils/mfc_batchbuffer_hsw.g9a create mode 100644 src/shaders/utils/mfc_batchbuffer_hsw.g9b diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index 60e6362f..1f8e57b3 100644 --- a/src/gen8_mfc.c +++ b/src/gen8_mfc.c @@ -49,7 +49,7 @@ #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) #define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index) -#define MFC_SOFTWARE_HASWELL 1 +#define MFC_SOFTWARE_BATCH 0 #define B0_STEP_REV 2 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV) @@ -101,32 +101,35 @@ static const int va_to_gen7_jpeg_hufftable[2] = { MFX_HUFFTABLE_ID_UV }; -static const uint32_t gen8_mfc_batchbuffer_avc_intra[][4] = { -#include "shaders/utils/mfc_batchbuffer_avc_intra.g7b" +static const uint32_t gen8_mfc_batchbuffer_avc[][4] = { +#include "shaders/utils/mfc_batchbuffer_hsw.g8b" }; -static const uint32_t gen8_mfc_batchbuffer_avc_inter[][4] = { -#include "shaders/utils/mfc_batchbuffer_avc_inter.g7b" +static const uint32_t gen9_mfc_batchbuffer_avc[][4] = { +#include "shaders/utils/mfc_batchbuffer_hsw.g9b" }; static struct i965_kernel gen8_mfc_kernels[] = { { "MFC AVC INTRA BATCHBUFFER ", MFC_BATCHBUFFER_AVC_INTRA, - gen8_mfc_batchbuffer_avc_intra, - sizeof(gen8_mfc_batchbuffer_avc_intra), + gen8_mfc_batchbuffer_avc, + sizeof(gen8_mfc_batchbuffer_avc), NULL }, +}; +static struct i965_kernel gen9_mfc_kernels[] = { { - "MFC AVC INTER BATCHBUFFER ", - MFC_BATCHBUFFER_AVC_INTER, - gen8_mfc_batchbuffer_avc_inter, - sizeof(gen8_mfc_batchbuffer_avc_inter), + "MFC AVC INTRA BATCHBUFFER ", + MFC_BATCHBUFFER_AVC_INTRA, + gen9_mfc_batchbuffer_avc, + sizeof(gen9_mfc_batchbuffer_avc), NULL }, }; + #define INTER_MODE_MASK 0x03 #define INTER_8X8 0x03 #define INTER_16X8 0x01 @@ -570,7 +573,7 @@ static void gen8_mfc_init(VADriverContextP ctx, mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16; mfc_context->aux_batchbuffer_surface.size_block = 16; - i965_gpe_context_init(ctx, &mfc_context->gpe_context); + gen8_gpe_context_init(ctx, &mfc_context->gpe_context); } static void @@ -930,8 +933,13 @@ gen8_mfc_avc_slice_state(VADriverContextP ctx, ADVANCE_BCS_BATCH(batch); } +#define AVC_INTRA_RDO_OFFSET 4 +#define AVC_INTER_RDO_OFFSET 10 +#define AVC_INTER_MSG_OFFSET 8 +#define AVC_INTER_MV_OFFSET 48 +#define AVC_RDO_MASK 0xFFFF -#ifdef MFC_SOFTWARE_HASWELL +#if MFC_SOFTWARE_BATCH static int gen8_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, @@ -1082,12 +1090,6 @@ gen8_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, in return len_in_dwords; } -#define AVC_INTRA_RDO_OFFSET 4 -#define AVC_INTER_RDO_OFFSET 10 -#define AVC_INTER_MSG_OFFSET 8 -#define AVC_INTER_MV_OFFSET 48 -#define AVC_RDO_MASK 0xFFFF - static void gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, struct encode_state *encode_state, @@ -1221,7 +1223,6 @@ static void gen8_mfc_batchbuffer_surfaces_input(VADriverContextP ctx, struct encode_state *encode_state, struct intel_encoder_context *encoder_context) - { struct gen6_vme_context *vme_context = encoder_context->vme_context; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; @@ -1232,12 +1233,6 @@ gen8_mfc_batchbuffer_surfaces_input(VADriverContextP ctx, &vme_context->vme_output, BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT), SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT)); - assert(mfc_context->aux_batchbuffer_surface.bo); - mfc_context->buffer_suface_setup(ctx, - &mfc_context->gpe_context, - &mfc_context->aux_batchbuffer_surface, - BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER), - SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER)); } static void @@ -1269,41 +1264,39 @@ gen8_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; - struct gen6_interface_descriptor_data *desc; + struct gen8_interface_descriptor_data *desc; int i; dri_bo *bo; + unsigned char *desc_ptr; - bo = mfc_context->gpe_context.idrt.bo; + bo = mfc_context->gpe_context.dynamic_state.bo; dri_bo_map(bo, 1); assert(bo->virtual); - desc = bo->virtual; + desc_ptr = (unsigned char *)bo->virtual + mfc_context->gpe_context.idrt_offset; + + desc = (struct gen8_interface_descriptor_data *)desc_ptr; for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) { struct i965_kernel *kernel; - kernel = &mfc_context->gpe_context.kernels[i]; assert(sizeof(*desc) == 32); - /*Setup the descritor table*/ memset(desc, 0, sizeof(*desc)); - desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6); - desc->desc2.sampler_count = 0; - desc->desc2.sampler_state_pointer = 0; - desc->desc3.binding_table_entry_count = 2; - desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5); - desc->desc4.constant_urb_entry_read_offset = 0; - desc->desc4.constant_urb_entry_read_length = 4; + desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6; + desc->desc3.sampler_count = 0; + desc->desc3.sampler_state_pointer = 0; + desc->desc4.binding_table_entry_count = 1; + desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5); + desc->desc5.constant_urb_entry_read_offset = 0; + desc->desc5.constant_urb_entry_read_length = 4; + - /*kernel start*/ - dri_bo_emit_reloc(bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - 0, - i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0), - kernel->bo); desc++; } dri_bo_unmap(bo); + + return; } static void @@ -1316,147 +1309,129 @@ gen8_mfc_batchbuffer_constant_setup(VADriverContextP ctx, (void)mfc_context; } +#define AVC_PAK_LEN_IN_BYTE 48 +#define AVC_PAK_LEN_IN_OWORD 3 + static void gen8_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch, - int index, - int head_offset, - int batchbuffer_offset, - int head_size, - int tail_size, - int number_mb_cmds, - int first_object, - int last_object, - int last_slice, - int mb_x, - int mb_y, - int width_in_mbs, - int qp) + uint32_t intra_flag, + int head_offset, + int number_mb_cmds, + int slice_end_x, + int slice_end_y, + int mb_x, + int mb_y, + int width_in_mbs, + int qp, + uint32_t fwd_ref, + uint32_t bwd_ref) { - BEGIN_BATCH(batch, 12); + uint32_t temp_value; + BEGIN_BATCH(batch, 14); - OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2)); - OUT_BATCH(batch, index); + OUT_BATCH(batch, CMD_MEDIA_OBJECT | (14 - 2)); + OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); OUT_BATCH(batch, 0); /*inline data */ - OUT_BATCH(batch, head_offset); - OUT_BATCH(batch, batchbuffer_offset); - OUT_BATCH(batch, - head_size << 16 | - tail_size); - OUT_BATCH(batch, - number_mb_cmds << 16 | - first_object << 2 | - last_object << 1 | - last_slice); - OUT_BATCH(batch, - mb_y << 8 | - mb_x); + OUT_BATCH(batch, head_offset / 16); + OUT_BATCH(batch, (intra_flag) | (qp << 16)); + temp_value = (mb_x | (mb_y << 8) | (width_in_mbs << 16)); + OUT_BATCH(batch, temp_value); + + OUT_BATCH(batch, number_mb_cmds); + OUT_BATCH(batch, - qp << 16 | - width_in_mbs); + ((slice_end_y << 8) | (slice_end_x))); + OUT_BATCH(batch, fwd_ref); + OUT_BATCH(batch, bwd_ref); + + OUT_BATCH(batch, MI_NOOP); ADVANCE_BATCH(batch); } static void gen8_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx, - struct intel_encoder_context *encoder_context, - VAEncSliceParameterBufferH264 *slice_param, - int head_offset, - unsigned short head_size, - unsigned short tail_size, - int batchbuffer_offset, - int qp, - int last_slice) + struct intel_encoder_context *encoder_context, + VAEncSliceParameterBufferH264 *slice_param, + int head_offset, + int qp, + int last_slice) { struct intel_batchbuffer *batch = encoder_context->base.batch; + struct gen6_vme_context *vme_context = encoder_context->vme_context; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; int total_mbs = slice_param->num_macroblocks; + int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); int number_mb_cmds = 128; - int starting_mb = 0; - int last_object = 0; - int first_object = 1; - int i; + int starting_offset = 0; int mb_x, mb_y; - int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER; + int last_mb, slice_end_x, slice_end_y; + int remaining_mb = total_mbs; + uint32_t fwd_ref , bwd_ref, mb_flag; - for (i = 0; i < total_mbs / number_mb_cmds; i++) { - last_object = (total_mbs - starting_mb) == number_mb_cmds; - mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs; - mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs; - assert(mb_x <= 255 && mb_y <= 255); + last_mb = slice_param->macroblock_address + total_mbs - 1; + slice_end_x = last_mb % width_in_mbs; + slice_end_y = last_mb / width_in_mbs; - starting_mb += number_mb_cmds; - - gen8_mfc_batchbuffer_emit_object_command(batch, - index, - head_offset, - batchbuffer_offset, - head_size, - tail_size, - number_mb_cmds, - first_object, - last_object, - last_slice, - mb_x, - mb_y, - width_in_mbs, - qp); - - if (first_object) { - head_offset += head_size; - batchbuffer_offset += head_size; - } - - if (last_object) { - head_offset += tail_size; - batchbuffer_offset += tail_size; - } - - batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD; + if (slice_type == SLICE_TYPE_I) { + fwd_ref = 0; + bwd_ref = 0; + mb_flag = 1; + } else { + fwd_ref = vme_context->ref_index_in_mb[0]; + bwd_ref = vme_context->ref_index_in_mb[1]; + mb_flag = 0; + } - first_object = 0; + if (width_in_mbs >= 100) { + number_mb_cmds = width_in_mbs / 5; + } else if (width_in_mbs >= 80) { + number_mb_cmds = width_in_mbs / 4; + } else if (width_in_mbs >= 60) { + number_mb_cmds = width_in_mbs / 3; + } else if (width_in_mbs >= 40) { + number_mb_cmds = width_in_mbs / 2; + } else { + number_mb_cmds = width_in_mbs; } - if (!last_object) { - last_object = 1; - number_mb_cmds = total_mbs % number_mb_cmds; - mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs; - mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs; - assert(mb_x <= 255 && mb_y <= 255); - starting_mb += number_mb_cmds; + do { + if (number_mb_cmds >= remaining_mb) { + number_mb_cmds = remaining_mb; + } + mb_x = (slice_param->macroblock_address + starting_offset) % width_in_mbs; + mb_y = (slice_param->macroblock_address + starting_offset) / width_in_mbs; gen8_mfc_batchbuffer_emit_object_command(batch, - index, - head_offset, - batchbuffer_offset, - head_size, - tail_size, - number_mb_cmds, - first_object, - last_object, - last_slice, - mb_x, - mb_y, - width_in_mbs, - qp); - } + mb_flag, + head_offset, + number_mb_cmds, + slice_end_x, + slice_end_y, + mb_x, + mb_y, + width_in_mbs, + qp, + fwd_ref, + bwd_ref); + + head_offset += (number_mb_cmds * AVC_PAK_LEN_IN_BYTE); + remaining_mb -= number_mb_cmds; + starting_offset += number_mb_cmds; + } while (remaining_mb > 0); } - -/* - * return size in Owords (16bytes) - */ -static int + +static void gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context, - int slice_index, - int batchbuffer_offset) + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + int slice_index) { struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer; @@ -1470,8 +1445,6 @@ gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx, unsigned int rate_control_mode = encoder_context->rate_control_mode; unsigned int tail_data[] = { 0x0, 0x0 }; long head_offset; - int old_used = intel_batchbuffer_used_size(slice_batch), used; - unsigned short head_size, tail_size; int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type); int qp_slice; @@ -1490,15 +1463,14 @@ gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx, assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52); assert(qp >= 0 && qp < 52); - head_offset = old_used / 16; gen8_mfc_avc_slice_state(ctx, - pPicParameter, - pSliceParameter, - encode_state, - encoder_context, - (rate_control_mode == VA_RC_CBR), - qp_slice, - slice_batch); + pPicParameter, + pSliceParameter, + encode_state, + encoder_context, + (rate_control_mode == VA_RC_CBR), + qp_slice, + slice_batch); if (slice_index == 0) intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch); @@ -1506,11 +1478,20 @@ gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx, intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch); intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */ - used = intel_batchbuffer_used_size(slice_batch); - head_size = (used - old_used) / 16; - old_used = used; + head_offset = intel_batchbuffer_used_size(slice_batch); + + slice_batch->ptr += pSliceParameter->num_macroblocks * AVC_PAK_LEN_IN_BYTE; - /* tail */ + gen8_mfc_avc_batchbuffer_slice_command(ctx, + encoder_context, + pSliceParameter, + head_offset, + qp, + last_slice); + + + /* Aligned for tail */ + intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */ if (last_slice) { mfc_context->insert_object(ctx, encoder_context, @@ -1535,22 +1516,7 @@ gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx, slice_batch); } - intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */ - used = intel_batchbuffer_used_size(slice_batch); - tail_size = (used - old_used) / 16; - - - gen8_mfc_avc_batchbuffer_slice_command(ctx, - encoder_context, - pSliceParameter, - head_offset, - head_size, - tail_size, - batchbuffer_offset, - qp, - last_slice); - - return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD; + return; } static void @@ -1558,19 +1524,41 @@ gen8_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx, struct encode_state *encode_state, struct intel_encoder_context *encoder_context) { + struct i965_driver_data *i965 = i965_driver_data(ctx); struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; struct intel_batchbuffer *batch = encoder_context->base.batch; - int i, size, offset = 0; - intel_batchbuffer_start_atomic(batch, 0x4000); - gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch); + int i; + + intel_batchbuffer_start_atomic(batch, 0x4000); + + if (IS_GEN9(i965->intel.device_info)) + gen9_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch); + else + gen8_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch); for ( i = 0; i < encode_state->num_slice_params_ext; i++) { - size = gen8_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset); - offset += size; + gen8_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i); + } + { + struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer; + + intel_batchbuffer_align(slice_batch, 8); + BEGIN_BCS_BATCH(slice_batch, 2); + OUT_BCS_BATCH(slice_batch, 0); + OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END); + ADVANCE_BCS_BATCH(slice_batch); + + BEGIN_BATCH(batch, 2); + OUT_BATCH(batch, CMD_MEDIA_STATE_FLUSH); + OUT_BATCH(batch, 0); + ADVANCE_BATCH(batch); } intel_batchbuffer_end_atomic(batch); intel_batchbuffer_flush(batch); + + if (IS_GEN9(i965->intel.device_info)) + gen9_gpe_pipeline_end(ctx, &mfc_context->gpe_context, batch); } static void @@ -1613,7 +1601,7 @@ gen8_mfc_avc_pipeline_programing(VADriverContextP ctx, return; } -#ifdef MFC_SOFTWARE_HASWELL +#if MFC_SOFTWARE_BATCH slice_batch_bo = gen8_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context); #else slice_batch_bo = gen8_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context); @@ -3713,7 +3701,7 @@ static void gen8_mfc_vp8_init(VADriverContextP ctx, mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16; mfc_context->aux_batchbuffer_surface.size_block = 16; - i965_gpe_context_init(ctx, &mfc_context->gpe_context); + gen8_gpe_context_init(ctx, &mfc_context->gpe_context); /* alloc vp8 encoding buffers*/ dri_bo_unreference(mfc_context->vp8_state.frame_header_bo); @@ -4480,7 +4468,7 @@ gen8_mfc_context_destroy(void *context) mfc_context->reference_surfaces[i].bo = NULL; } - i965_gpe_context_destroy(&mfc_context->gpe_context); + gen8_gpe_context_destroy(&mfc_context->gpe_context); dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo); mfc_context->mfc_batchbuffer_surface.bo = NULL; @@ -4558,14 +4546,15 @@ static VAStatus gen8_mfc_pipeline(VADriverContextP ctx, Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { + struct i965_driver_data *i965 = i965_driver_data(ctx); struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context)); + assert(mfc_context); mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6; - mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS; - mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data); - - mfc_context->gpe_context.curbe.length = 32 * 4; + mfc_context->gpe_context.idrt_size = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6; + mfc_context->gpe_context.curbe_size = 32 * 4; + mfc_context->gpe_context.sampler_size = 0; mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1; mfc_context->gpe_context.vfe_state.num_urb_entries = 16; @@ -4573,10 +4562,17 @@ Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *e mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1; mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1; - i965_gpe_load_kernels(ctx, + if (IS_GEN9(i965->intel.device_info)) { + gen8_gpe_load_kernels(ctx, + &mfc_context->gpe_context, + gen9_mfc_kernels, + 1); + } else { + gen8_gpe_load_kernels(ctx, &mfc_context->gpe_context, gen8_mfc_kernels, - NUM_MFC_KERNEL); + 1); + } mfc_context->pipe_mode_select = gen8_mfc_pipe_mode_select; mfc_context->set_surface_state = gen8_mfc_surface_state; diff --git a/src/shaders/utils/Makefile.am b/src/shaders/utils/Makefile.am index 4a5d89d7..1bffbb8c 100644 --- a/src/shaders/utils/Makefile.am +++ b/src/shaders/utils/Makefile.am @@ -24,9 +24,14 @@ INTEL_G75A = mfc_batchbuffer_hsw.g75a INTEL_GEN75_INC = mfc_batchbuffer_hsw.inc INTEL_GEN75_ASM = $(INTEL_G75A:%.g75a=%.gen75.asm) -INTEL_G9B = mfc_batchbuffer_avc_intra.g9b mfc_batchbuffer_avc_inter.g9b -INTEL_G9A = mfc_batchbuffer_avc_intra.g9a mfc_batchbuffer_avc_inter.g9a -INTEL_GEN9_INC = mfc_batchbuffer.inc +INTEL_G8B = mfc_batchbuffer_hsw.g8b +INTEL_G8A = mfc_batchbuffer_hsw.g8a +INTEL_GEN8_INC = mfc_batchbuffer_hsw.inc +INTEL_GEN8_ASM = $(INTEL_G9A:%.g9a=%.gen8.asm) + +INTEL_G9B = mfc_batchbuffer_hsw.g9b +INTEL_G9A = mfc_batchbuffer_hsw.g9a +INTEL_GEN9_INC = mfc_batchbuffer_hsw.inc INTEL_GEN9_ASM = $(INTEL_G9A:%.g9a=%.gen9.asm) TARGETS = @@ -34,12 +39,13 @@ if HAVE_GEN4ASM TARGETS += $(INTEL_G6B) TARGETS += $(INTEL_G7B) TARGETS += $(INTEL_G75B) +TARGETS += $(INTEL_G8B) TARGETS += $(INTEL_G9B) endif all-local: $(TARGETS) -SUFFIXES = .g6a .g6b .g7a .g7b .gen6.asm .gen7.asm .g75a .g75b .gen75.asm .g9a .g9b .gen9.asm +SUFFIXES = .g6a .g6b .g7a .g7b .gen6.asm .gen7.asm .g75a .g75b .gen75.asm .g9a .g9b .gen9.asm .g8a .g8b .gen8.asm if HAVE_GEN4ASM $(INTEL_GEN6_ASM): $(MFC_CORE) $(MFC_CORE_AVC) $(INTEL_GEN6_INC) @@ -66,9 +72,17 @@ $(INTEL_GEN75_ASM): $(MFC_CORE_HSW) $(INTEL_GEN75_INC) .gen75.asm.g75b: $(AM_V_GEN)$(GEN4ASM) -g 7.5 -o $@ $< -$(INTEL_GEN9_ASM): $(MFC_CORE) $(MFC_CORE_AVC) $(INTEL_GEN9_INC) +$(INTEL_GEN8_ASM): $(MFC_CORE_HSW) $(INTEL_GEN8_INC) +.g8a.gen8.asm: + $(AM_V_GEN)cpp -P $< > _mfc0.$@ && \ + m4 _mfc0.$@ > $@ && \ + rm _mfc0.$@ +.gen8.asm.g8b: + $(AM_V_GEN)$(GEN4ASM) -g 8 -o $@ $< + +$(INTEL_GEN9_ASM): $(MFC_CORE_HSW) $(INTEL_GEN8_INC) .g9a.gen9.asm: - $(AM_V_GEN)cpp -P -DDEV_IVB $< > _mfc0.$@ && \ + $(AM_V_GEN)cpp -P $< > _mfc0.$@ && \ m4 _mfc0.$@ > $@ && \ rm _mfc0.$@ .gen9.asm.g9b: @@ -76,7 +90,7 @@ $(INTEL_GEN9_ASM): $(MFC_CORE) $(MFC_CORE_AVC) $(INTEL_GEN9_INC) endif -CLEANFILES = $(INTEL_GEN6_ASM) $(INTEL_GEN7_ASM) $(INTEL_GEN75_ASM) $(INTEL_GEN9_ASM) +CLEANFILES = $(INTEL_GEN6_ASM) $(INTEL_GEN7_ASM) $(INTEL_GEN75_ASM) $(INTEL_GEN9_ASM) $(INTEL_GEN8_ASM) EXTRA_DIST = \ $(INTEL_G6A) \ @@ -88,6 +102,8 @@ EXTRA_DIST = \ $(INTEL_GEN6_INC) \ $(INTEL_GEN7_INC) \ $(INTEL_GEN75_INC) \ + $(INTEL_G8A) \ + $(INTEL_G8B) \ $(INTEL_G9A) \ $(INTEL_G9B) \ $(INTEL_GEN9_INC) \ diff --git a/src/shaders/utils/mfc_batchbuffer_hsw.g8a b/src/shaders/utils/mfc_batchbuffer_hsw.g8a new file mode 100644 index 00000000..dc39253c --- /dev/null +++ b/src/shaders/utils/mfc_batchbuffer_hsw.g8a @@ -0,0 +1,28 @@ +/* + * Copyright © 2010-2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Zhao Yakui + */ + +#include "mfc_batchbuffer_hsw.inc" +#include "mfc_batchbuffer_hsw.asm" diff --git a/src/shaders/utils/mfc_batchbuffer_hsw.g8b b/src/shaders/utils/mfc_batchbuffer_hsw.g8b new file mode 100644 index 00000000..cca39f50 --- /dev/null +++ b/src/shaders/utils/mfc_batchbuffer_hsw.g8b @@ -0,0 +1,105 @@ + { 0x00800001, 0x23400608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x21e00608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2b000608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x2ac00608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x2ac02248, 0x000000a8, 0x00000000 }, + { 0x00000001, 0x2ac22248, 0x000000a9, 0x00000000 }, + { 0x00000001, 0x2ae02248, 0x000000b0, 0x00000000 }, + { 0x00000001, 0x2ae22248, 0x000000b1, 0x00000000 }, + { 0x00000001, 0x2ae41248, 0x000000ac, 0x00000000 }, + { 0x00000001, 0x2ae80608, 0x00000000, 0x00000002 }, + { 0x01000005, 0x20002240, 0x160000a4, 0x00010001 }, + { 0x00010001, 0x2ae80608, 0x00000000, 0x00000018 }, + { 0x00000001, 0x21e80208, 0x000000a0, 0x00000000 }, + { 0x00000001, 0x21f42288, 0x00000014, 0x00000000 }, + { 0x00000041, 0x2b081208, 0x120000aa, 0x00000ac2 }, + { 0x00000040, 0x2b080208, 0x12000b08, 0x00000ac0 }, + { 0x00000041, 0x2b080208, 0x02000b08, 0x00000ae8 }, + { 0x00000001, 0x2b142288, 0x00000014, 0x00000000 }, + { 0x00000001, 0x23400608, 0x00000000, 0x7149000a }, + { 0x00000001, 0x23540608, 0x00000000, 0x000f000f }, + { 0x00000001, 0x23680608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23582288, 0x000000a6, 0x00000000 }, + { 0x00600001, 0x2b400208, 0x008d0b00, 0x00000000 }, + { 0x00000001, 0x23500608, 0x00000000, 0xffff0000 }, + { 0x00000001, 0x21002288, 0x00000ac0, 0x00000000 }, + { 0x00000001, 0x21012288, 0x00000ac2, 0x00000000 }, + { 0x00000001, 0x23501248, 0x00000100, 0x00000000 }, + { 0x00000001, 0x235a1648, 0x10000000, 0x00000000 }, + { 0x01000010, 0x20001240, 0x12000ac0, 0x00000ae0 }, + { 0x00110020, 0x34000000, 0x0e001400, 0x00000020 }, + { 0x01000010, 0x20001240, 0x12000ac2, 0x00000ae2 }, + { 0x00010001, 0x235a1648, 0x10000000, 0x04000400 }, + { 0x01000005, 0x20002240, 0x160000a4, 0x00010001 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000040 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02180200 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000240 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0a800031, 0x2b600a08, 0x0e000b40, 0x02280300 }, + { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000001f0 }, + { 0x00000005, 0x234c0208, 0x06000b80, 0x1f00ffff }, + { 0x00000040, 0x234c0208, 0x0600034c, 0x000e0000 }, + { 0x00000005, 0x21001248, 0x16000b80, 0x00030003 }, + { 0x00000001, 0x23440608, 0x00000000, 0x00000020 }, + { 0x01000010, 0x20001240, 0x16000100, 0x00030003 }, + { 0x00110040, 0x234c0208, 0x0600034c, 0x00400000 }, + { 0x00110020, 0x34000000, 0x0e001400, 0x00000050 }, + { 0x02000005, 0x20001200, 0x16000b84, 0xff00ff00 }, + { 0x00010001, 0x23440608, 0x00000000, 0x00000080 }, + { 0x00010040, 0x234c0208, 0x0600034c, 0x00600000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000000c0 }, + { 0x00000040, 0x234c0208, 0x0600034c, 0x00400000 }, + { 0x00000005, 0x21001248, 0x16000b80, 0x00030003 }, + { 0x01000010, 0x20001240, 0x16000100, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000080 }, + { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 }, + { 0x0a800031, 0x2ba00a08, 0x0e000b40, 0x02480400 }, + { 0x00200001, 0x2ba80208, 0x00450bc0, 0x00000000 }, + { 0x00200001, 0x2bb00208, 0x00450be0, 0x00000000 }, + { 0x00200001, 0x2bb80208, 0x00450c00, 0x00000000 }, + { 0x00600001, 0x28000208, 0x008d0b40, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d0ba0, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0200 }, + { 0x00000001, 0x23600208, 0x000000b4, 0x00000000 }, + { 0x00000001, 0x23640208, 0x000000b8, 0x00000000 }, + { 0x00000001, 0x235c0608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x235c2288, 0x00000b85, 0x00000000 }, + { 0x00000001, 0x235d2288, 0x00000b86, 0x00000000 }, + { 0x00000040, 0x21040208, 0x06000b08, 0x00000003 }, + { 0x00000041, 0x23480208, 0x06000104, 0x00000010 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x000000b0 }, + { 0x00200001, 0x23440608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x235c0208, 0x00000b64, 0x00000000 }, + { 0x00000001, 0x23600208, 0x00000b68, 0x00000000 }, + { 0x00000001, 0x23640608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23642288, 0x00000b6c, 0x00000000 }, + { 0x00000005, 0x234c0208, 0x06000b60, 0x0000c0ff }, + { 0x00000040, 0x234c0208, 0x0600034c, 0x000e2000 }, + { 0x00000001, 0x21000608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x21012288, 0x00000b62, 0x00000000 }, + { 0x00000005, 0x21001248, 0x16000100, 0x1f001f00 }, + { 0x00000040, 0x234c0208, 0x0200034c, 0x00000100 }, + { 0x00600001, 0x28000208, 0x008d01e0, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d0340, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0202 }, + { 0x00000040, 0x28080208, 0x06000808, 0x00000002 }, + { 0x00600001, 0x28200208, 0x008d0360, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x0e000800, 0x040a0002 }, + { 0x00000040, 0x2ac41248, 0x16000ac4, 0x00010001 }, + { 0x01000010, 0x20001240, 0x12000ac4, 0x00000ae4 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000090 }, + { 0x00000040, 0x21e80208, 0x160001e8, 0x00030003 }, + { 0x00000040, 0x2b080208, 0x02000b08, 0x00000ae8 }, + { 0x00000040, 0x2ac01248, 0x16000ac0, 0x00010001 }, + { 0x01000010, 0x20001240, 0x12000ac0, 0x000000aa }, + { 0x00010040, 0x2ac21248, 0x16000ac2, 0x00010001 }, + { 0x00010001, 0x2ac01648, 0x10000000, 0x00000000 }, + { 0x00000020, 0x34000000, 0x0e001400, 0xfffffb30 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0a800031, 0x2b600a08, 0x0e000800, 0x0219e002 }, + { 0x00600001, 0x2e000208, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x24000a40, 0x0e000e00, 0x82000010 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, diff --git a/src/shaders/utils/mfc_batchbuffer_hsw.g9a b/src/shaders/utils/mfc_batchbuffer_hsw.g9a new file mode 100644 index 00000000..dc39253c --- /dev/null +++ b/src/shaders/utils/mfc_batchbuffer_hsw.g9a @@ -0,0 +1,28 @@ +/* + * Copyright © 2010-2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Zhao Yakui + */ + +#include "mfc_batchbuffer_hsw.inc" +#include "mfc_batchbuffer_hsw.asm" diff --git a/src/shaders/utils/mfc_batchbuffer_hsw.g9b b/src/shaders/utils/mfc_batchbuffer_hsw.g9b new file mode 100644 index 00000000..40812fbf --- /dev/null +++ b/src/shaders/utils/mfc_batchbuffer_hsw.g9b @@ -0,0 +1,105 @@ + { 0x00800001, 0x23400608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x21e00608, 0x00000000, 0x00000000 }, + { 0x00600001, 0x2b000608, 0x00000000, 0x00000000 }, + { 0x00800001, 0x2ac00608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x2ac02248, 0x000000a8, 0x00000000 }, + { 0x00000001, 0x2ac22248, 0x000000a9, 0x00000000 }, + { 0x00000001, 0x2ae02248, 0x000000b0, 0x00000000 }, + { 0x00000001, 0x2ae22248, 0x000000b1, 0x00000000 }, + { 0x00000001, 0x2ae41248, 0x000000ac, 0x00000000 }, + { 0x00000001, 0x2ae80608, 0x00000000, 0x00000002 }, + { 0x01000005, 0x20002240, 0x160000a4, 0x00010001 }, + { 0x00010001, 0x2ae80608, 0x00000000, 0x00000018 }, + { 0x00000001, 0x21e80208, 0x000000a0, 0x00000000 }, + { 0x00000001, 0x21f42288, 0x00000014, 0x00000000 }, + { 0x00000041, 0x2b081208, 0x120000aa, 0x00000ac2 }, + { 0x00000040, 0x2b080208, 0x12000b08, 0x00000ac0 }, + { 0x00000041, 0x2b080208, 0x02000b08, 0x00000ae8 }, + { 0x00000001, 0x2b142288, 0x00000014, 0x00000000 }, + { 0x00000001, 0x23400608, 0x00000000, 0x7149000a }, + { 0x00000001, 0x23540608, 0x00000000, 0x000f000f }, + { 0x00000001, 0x23680608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23582288, 0x000000a6, 0x00000000 }, + { 0x00600001, 0x2b400208, 0x008d0b00, 0x00000000 }, + { 0x00000001, 0x23500608, 0x00000000, 0xffff0000 }, + { 0x00000001, 0x21002288, 0x00000ac0, 0x00000000 }, + { 0x00000001, 0x21012288, 0x00000ac2, 0x00000000 }, + { 0x00000001, 0x23501248, 0x00000100, 0x00000000 }, + { 0x00000001, 0x235a1648, 0x10000000, 0x00000000 }, + { 0x01000010, 0x20001240, 0x12000ac0, 0x00000ae0 }, + { 0x00110020, 0x34000000, 0x0e001400, 0x00000020 }, + { 0x01000010, 0x20001240, 0x12000ac2, 0x00000ae2 }, + { 0x00010001, 0x235a1648, 0x10000000, 0x04000400 }, + { 0x01000005, 0x20002240, 0x160000a4, 0x00010001 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000040 }, + { 0x0a800031, 0x2b600a08, 0x06000b40, 0x02180200 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000240 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0a800031, 0x2b600a08, 0x06000b40, 0x02280300 }, + { 0x05000010, 0x20001260, 0x12000b70, 0x00000b88 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000001f0 }, + { 0x00000005, 0x234c0208, 0x06000b80, 0x1f00ffff }, + { 0x00000040, 0x234c0208, 0x0600034c, 0x000e0000 }, + { 0x00000005, 0x21001248, 0x16000b80, 0x00030003 }, + { 0x00000001, 0x23440608, 0x00000000, 0x00000020 }, + { 0x01000010, 0x20001240, 0x16000100, 0x00030003 }, + { 0x00110040, 0x234c0208, 0x0600034c, 0x00400000 }, + { 0x00110020, 0x34000000, 0x0e001400, 0x00000050 }, + { 0x02000005, 0x20001200, 0x16000b84, 0xff00ff00 }, + { 0x00010001, 0x23440608, 0x00000000, 0x00000080 }, + { 0x00010040, 0x234c0208, 0x0600034c, 0x00600000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x000000c0 }, + { 0x00000040, 0x234c0208, 0x0600034c, 0x00400000 }, + { 0x00000005, 0x21001248, 0x16000b80, 0x00030003 }, + { 0x01000010, 0x20001240, 0x16000100, 0x00000000 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000080 }, + { 0x00000040, 0x2b480208, 0x06000b48, 0x00000003 }, + { 0x0a800031, 0x2ba00a08, 0x06000b40, 0x02480400 }, + { 0x00200001, 0x2ba80208, 0x00450bc0, 0x00000000 }, + { 0x00200001, 0x2bb00208, 0x00450be0, 0x00000000 }, + { 0x00200001, 0x2bb80208, 0x00450c00, 0x00000000 }, + { 0x00600001, 0x28000208, 0x008d0b40, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d0ba0, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x06000800, 0x040a0200 }, + { 0x00000001, 0x23600208, 0x000000b4, 0x00000000 }, + { 0x00000001, 0x23640208, 0x000000b8, 0x00000000 }, + { 0x00000001, 0x235c0608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x235c2288, 0x00000b85, 0x00000000 }, + { 0x00000001, 0x235d2288, 0x00000b86, 0x00000000 }, + { 0x00000040, 0x21040208, 0x06000b08, 0x00000003 }, + { 0x00000041, 0x23480208, 0x06000104, 0x00000010 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x000000b0 }, + { 0x00200001, 0x23440608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x235c0208, 0x00000b64, 0x00000000 }, + { 0x00000001, 0x23600208, 0x00000b68, 0x00000000 }, + { 0x00000001, 0x23640608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x23642288, 0x00000b6c, 0x00000000 }, + { 0x00000005, 0x234c0208, 0x06000b60, 0x0000c0ff }, + { 0x00000040, 0x234c0208, 0x0600034c, 0x000e2000 }, + { 0x00000001, 0x21000608, 0x00000000, 0x00000000 }, + { 0x00000001, 0x21012288, 0x00000b62, 0x00000000 }, + { 0x00000005, 0x21001248, 0x16000100, 0x1f001f00 }, + { 0x00000040, 0x234c0208, 0x0200034c, 0x00000100 }, + { 0x00600001, 0x28000208, 0x008d01e0, 0x00000000 }, + { 0x00600001, 0x28200208, 0x008d0340, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x06000800, 0x040a0202 }, + { 0x00000040, 0x28080208, 0x06000808, 0x00000002 }, + { 0x00600001, 0x28200208, 0x008d0360, 0x00000000 }, + { 0x0a800031, 0x20000a60, 0x06000800, 0x040a0002 }, + { 0x00000040, 0x2ac41248, 0x16000ac4, 0x00010001 }, + { 0x01000010, 0x20001240, 0x12000ac4, 0x00000ae4 }, + { 0x00010020, 0x34000000, 0x0e001400, 0x00000090 }, + { 0x00000040, 0x21e80208, 0x160001e8, 0x00030003 }, + { 0x00000040, 0x2b080208, 0x02000b08, 0x00000ae8 }, + { 0x00000040, 0x2ac01248, 0x16000ac0, 0x00010001 }, + { 0x01000010, 0x20001240, 0x12000ac0, 0x000000aa }, + { 0x00010040, 0x2ac21248, 0x16000ac2, 0x00010001 }, + { 0x00010001, 0x2ac01648, 0x10000000, 0x00000000 }, + { 0x00000020, 0x34000000, 0x0e001400, 0xfffffb30 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, + { 0x0a800031, 0x2b600a08, 0x06000800, 0x0219e002 }, + { 0x00600001, 0x2e000208, 0x008d0000, 0x00000000 }, + { 0x07000031, 0x24000a40, 0x06000e00, 0x82000010 }, + { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, -- cgit v1.2.1 From 90492086c30bf295ba87325517a07ec3e3afd748 Mon Sep 17 00:00:00 2001 From: "Qu, PengFei" Date: Mon, 30 May 2016 09:56:00 -0400 Subject: Follow the HW spec to set the surface cache attribute for Gen9+ Currently it will use the unoptimized cache attribute for the surface on Gen9+. This is to follow the HW spec to optimize the cache attribute of the surface for gen9+. Signed-off-by: Qu, Pengfei Signed-off-by: Zhao Yakui (cherry picked from commit dd9a0fb7a885f79f6413df0bd1afd5556c919a03) --- src/gen8_post_processing.c | 10 ++++++++++ src/i965_defines.h | 2 ++ src/i965_gpe_utils.c | 16 ++++++++++++++++ 3 files changed, 28 insertions(+) diff --git a/src/gen8_post_processing.c b/src/gen8_post_processing.c index fbf0e579..375bbe08 100644 --- a/src/gen8_post_processing.c +++ b/src/gen8_post_processing.c @@ -380,6 +380,7 @@ gen8_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_cont int width, int height, int pitch, int format, int index, int is_target) { + struct i965_driver_data *i965 = i965_driver_data(ctx); struct gen8_surface_state *ss; dri_bo *ss_bo; unsigned int tiling; @@ -393,6 +394,10 @@ gen8_pp_set_surface_state(VADriverContextP ctx, struct i965_post_processing_cont assert(ss_bo->virtual); ss = (struct gen8_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index)); memset(ss, 0, sizeof(*ss)); + + if (IS_GEN9(i965->intel.device_info)) + ss->ss1.surface_mocs = GEN9_CACHE_PTE; + ss->ss0.surface_type = I965_SURFACE_2D; ss->ss0.surface_format = format; ss->ss8.base_addr = surf_bo->offset + surf_bo_offset; @@ -424,6 +429,7 @@ gen8_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_con int format, int interleave_chroma, int index) { + struct i965_driver_data *i965 = i965_driver_data(ctx); struct gen8_surface_state2 *ss2; dri_bo *ss2_bo; unsigned int tiling; @@ -437,6 +443,10 @@ gen8_pp_set_surface2_state(VADriverContextP ctx, struct i965_post_processing_con assert(ss2_bo->virtual); ss2 = (struct gen8_surface_state2 *)((char *)ss2_bo->virtual + SURFACE_STATE_OFFSET(index)); memset(ss2, 0, sizeof(*ss2)); + + if (IS_GEN9(i965->intel.device_info)) + ss2->ss5.surface_object_mocs = GEN9_CACHE_PTE; + ss2->ss6.base_addr = surf_bo->offset + surf_bo_offset; ss2->ss1.cbcr_pixel_offset_v_direction = 0; ss2->ss1.width = width - 1; diff --git a/src/i965_defines.h b/src/i965_defines.h index e69f23f2..f86ac8ef 100755 --- a/src/i965_defines.h +++ b/src/i965_defines.h @@ -978,4 +978,6 @@ #define MFC_BITSTREAM_BYTECOUNT_FRAME_REG 0x128A0 #define MFC_IMAGE_STATUS_CTRL_REG 0x128B8 +#define GEN9_CACHE_PTE 0x02 + #endif /* _I965_DEFINES_H_ */ diff --git a/src/i965_gpe_utils.c b/src/i965_gpe_utils.c index 91d1192a..d911196b 100644 --- a/src/i965_gpe_utils.c +++ b/src/i965_gpe_utils.c @@ -721,6 +721,7 @@ gen8_gpe_set_surface2_state(VADriverContextP ctx, struct object_surface *obj_surface, struct gen8_surface_state2 *ss) { + struct i965_driver_data *i965 = i965_driver_data(ctx); int w, h, w_pitch; unsigned int tiling, swizzle; @@ -734,6 +735,9 @@ gen8_gpe_set_surface2_state(VADriverContextP ctx, memset(ss, 0, sizeof(*ss)); /* ss0 */ + if (IS_GEN9(i965->intel.device_info)) + ss->ss5.surface_object_mocs = GEN9_CACHE_PTE; + ss->ss6.base_addr = (uint32_t)obj_surface->bo->offset64; ss->ss7.base_addr_high = (uint32_t)(obj_surface->bo->offset64 >> 32); /* ss1 */ @@ -782,6 +786,7 @@ gen8_gpe_set_media_rw_surface_state(VADriverContextP ctx, struct object_surface *obj_surface, struct gen8_surface_state *ss) { + struct i965_driver_data *i965 = i965_driver_data(ctx); int w, h, w_pitch; unsigned int tiling, swizzle; @@ -792,6 +797,9 @@ gen8_gpe_set_media_rw_surface_state(VADriverContextP ctx, memset(ss, 0, sizeof(*ss)); /* ss0 */ + if (IS_GEN9(i965->intel.device_info)) + ss->ss1.surface_mocs = GEN9_CACHE_PTE; + ss->ss0.surface_type = I965_SURFACE_2D; ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM; /* ss1 */ @@ -810,6 +818,7 @@ gen8_gpe_set_media_chroma_surface_state(VADriverContextP ctx, struct object_surface *obj_surface, struct gen8_surface_state *ss) { + struct i965_driver_data *i965 = i965_driver_data(ctx); int w, w_pitch; unsigned int tiling, swizzle; int cbcr_offset; @@ -822,6 +831,9 @@ gen8_gpe_set_media_chroma_surface_state(VADriverContextP ctx, cbcr_offset = obj_surface->height * obj_surface->width; memset(ss, 0, sizeof(*ss)); /* ss0 */ + if (IS_GEN9(i965->intel.device_info)) + ss->ss1.surface_mocs = GEN9_CACHE_PTE; + ss->ss0.surface_type = I965_SURFACE_2D; ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM; /* ss1 */ @@ -897,6 +909,7 @@ gen8_gpe_set_buffer_surface_state(VADriverContextP ctx, struct i965_buffer_surface *buffer_surface, struct gen8_surface_state *ss) { + struct i965_driver_data *i965 = i965_driver_data(ctx); int num_entries; assert(buffer_surface->bo); @@ -905,6 +918,9 @@ gen8_gpe_set_buffer_surface_state(VADriverContextP ctx, memset(ss, 0, sizeof(*ss)); /* ss0 */ ss->ss0.surface_type = I965_SURFACE_BUFFER; + if (IS_GEN9(i965->intel.device_info)) + ss->ss1.surface_mocs = GEN9_CACHE_PTE; + /* ss1 */ ss->ss8.base_addr = (uint32_t)buffer_surface->bo->offset64; ss->ss9.base_addr_high = (uint32_t)(buffer_surface->bo->offset64 >> 32); -- cgit v1.2.1 From 160084ad832453d52561fb3bc3cbe56aa1632c65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=ADctor=20Manuel=20J=C3=A1quez=20Leal?= Date: Wed, 8 Jun 2016 14:03:16 +0200 Subject: check the result of hsw_veb_post_format_convert() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit is related to previous commit e4996019, which is a simpler approach of this commit, by adding more supported color formats. For example, VA_FOURCC_BGRX should be added too, to avoid an assert with this gstreamer pipeline: gst-play-1.0 burosch1.mpg --videosink=ximagesink http://samples.mplayerhq.hu/MPEG2/interlaced/burosch1.mpg Nonetheless, instead of just adding already supported color formats conversion, it is better to rely on what vpp_surface_convert() already checks, by verifying the result operation, and avoid the assert. This patch does it for hsw_veb_post_format_convert(). Signed-off-by: Víctor Manuel Jáquez Leal (cherry picked from commit bf387bbdde4fa1a419d19d7c606e9ba94e07e24e) --- src/gen75_vpp_vebox.c | 34 +++++++++++++--------------------- 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/src/gen75_vpp_vebox.c b/src/gen75_vpp_vebox.c index 1b4232d2..8d0569d0 100644 --- a/src/gen75_vpp_vebox.c +++ b/src/gen75_vpp_vebox.c @@ -1500,16 +1500,18 @@ int hsw_veb_pre_format_convert(VADriverContextP ctx, return 0; } -int hsw_veb_post_format_convert(VADriverContextP ctx, +VAStatus +hsw_veb_post_format_convert(VADriverContextP ctx, struct intel_vebox_context *proc_ctx) { struct object_surface *obj_surface = NULL; + VAStatus va_status = VA_STATUS_SUCCESS; obj_surface = proc_ctx->frame_store[proc_ctx->current_output].obj_surface; if (proc_ctx->format_convert_flags & POST_COPY_CONVERT) { /* copy the saved frame in the second call */ - vpp_surface_convert(ctx, obj_surface, proc_ctx->surface_output_object); + va_status = vpp_surface_convert(ctx, obj_surface, proc_ctx->surface_output_object); } else if(!(proc_ctx->format_convert_flags & POST_FORMAT_CONVERT) && !(proc_ctx->format_convert_flags & POST_SCALING_CONVERT)){ /* Output surface format is covered by vebox pipeline and @@ -1518,7 +1520,7 @@ int hsw_veb_post_format_convert(VADriverContextP ctx, } else if ((proc_ctx->format_convert_flags & POST_FORMAT_CONVERT) && !(proc_ctx->format_convert_flags & POST_SCALING_CONVERT)){ /* convert and copy NV12 to YV12/IMC3/IMC2/RGBA output*/ - vpp_surface_convert(ctx, obj_surface, proc_ctx->surface_output_object); + va_status = vpp_surface_convert(ctx, obj_surface, proc_ctx->surface_output_object); } else if(proc_ctx->format_convert_flags & POST_SCALING_CONVERT) { VAProcPipelineParameterBuffer * const pipe = proc_ctx->pipeline_param; @@ -1532,20 +1534,10 @@ int hsw_veb_post_format_convert(VADriverContextP ctx, /* second step: color format convert and copy to output */ obj_surface = proc_ctx->surface_output_object; - if(obj_surface->fourcc == VA_FOURCC_NV12 || - obj_surface->fourcc == VA_FOURCC_YV12 || - obj_surface->fourcc == VA_FOURCC_I420 || - obj_surface->fourcc == VA_FOURCC_YUY2 || - obj_surface->fourcc == VA_FOURCC_IMC1 || - obj_surface->fourcc == VA_FOURCC_IMC3 || - obj_surface->fourcc == VA_FOURCC_RGBA) { - vpp_surface_convert(ctx, proc_ctx->surface_output_scaled_object, obj_surface); - }else { - assert(0); - } + va_status = vpp_surface_convert(ctx, proc_ctx->surface_output_scaled_object, obj_surface); } - return 0; + return va_status; } static VAStatus @@ -1714,9 +1706,9 @@ gen75_vebox_process_picture(VADriverContextP ctx, intel_batchbuffer_flush(proc_ctx->batch); } - hsw_veb_post_format_convert(ctx, proc_ctx); + status = hsw_veb_post_format_convert(ctx, proc_ctx); - return VA_STATUS_SUCCESS; + return status; } void gen75_vebox_context_destroy(VADriverContextP ctx, @@ -1944,9 +1936,9 @@ gen8_vebox_process_picture(VADriverContextP ctx, intel_batchbuffer_flush(proc_ctx->batch); } - hsw_veb_post_format_convert(ctx, proc_ctx); + status = hsw_veb_post_format_convert(ctx, proc_ctx); - return VA_STATUS_SUCCESS; + return status; } @@ -2412,7 +2404,7 @@ gen9_vebox_process_picture(VADriverContextP ctx, intel_batchbuffer_flush(proc_ctx->batch); } - hsw_veb_post_format_convert(ctx, proc_ctx); + status = hsw_veb_post_format_convert(ctx, proc_ctx); - return VA_STATUS_SUCCESS; + return status; } -- cgit v1.2.1 From 62b867a81be551e18f0b84c6526d7d3c68c85ef3 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Thu, 16 Jun 2016 13:32:00 +0800 Subject: Make sure a right VEBOX_IECP_STATE is used on BDW+ Some features of IECP aren't enabled, and the corresponding fields must be set to 0 in VEBOX_IECP_STATE. Thanks for Peng's finding: The issue disappear when disable libdrm cache This fixes https://bugs.freedesktop.org/show_bug.cgi?id=95349 Cc: peng.chen Signed-off-by: Xiang, Haihao Tested-by: Lim Siew Hoon Tested-by: peng.chen Tested-by: Sreerenj Balachandran (cherry picked from commit 09afaebfb94aad7a01ed5a3f71a77fbb70e7b550) --- src/gen75_vpp_vebox.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gen75_vpp_vebox.c b/src/gen75_vpp_vebox.c index 8d0569d0..e124604f 100644 --- a/src/gen75_vpp_vebox.c +++ b/src/gen75_vpp_vebox.c @@ -923,6 +923,7 @@ void hsw_veb_state_table_setup(VADriverContextP ctx, struct intel_vebox_context dri_bo *iecp_bo = proc_ctx->iecp_state_table.bo; dri_bo_map(iecp_bo, 1); proc_ctx->iecp_state_table.ptr = iecp_bo->virtual; + memset(proc_ctx->iecp_state_table.ptr, 0, 97 * 4); hsw_veb_iecp_std_table(ctx, proc_ctx); hsw_veb_iecp_ace_table(ctx, proc_ctx); @@ -2196,6 +2197,7 @@ void skl_veb_state_table_setup(VADriverContextP ctx, struct intel_vebox_context dri_bo *iecp_bo = proc_ctx->iecp_state_table.bo; dri_bo_map(iecp_bo, 1); proc_ctx->iecp_state_table.ptr = iecp_bo->virtual; + memset(proc_ctx->iecp_state_table.ptr, 0, 90 * 4); hsw_veb_iecp_std_table(ctx, proc_ctx); hsw_veb_iecp_ace_table(ctx, proc_ctx); -- cgit v1.2.1 From e7ba1a336a9603b6d878b556ce8915fe69ad6611 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Fri, 24 Jun 2016 10:04:28 +0800 Subject: Update PCI IDs for Kabylake Remove unused PCI IDs and add new PCI IDs for KBL, the IDs are taken directly from intel-gfx patches, which are under review: https://lists.freedesktop.org/archives/intel-gfx/2016-June/099263.html https://lists.freedesktop.org/archives/intel-gfx/2016-June/099264.html Signed-off-by: Xiang, Haihao (cherry picked from commit f47e513dcd8557b84463379d52d7156adef121a9) --- src/i965_pciids.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/i965_pciids.h b/src/i965_pciids.h index fe3f4d0d..1ea3c984 100644 --- a/src/i965_pciids.h +++ b/src/i965_pciids.h @@ -174,14 +174,12 @@ CHIPSET(0x591E, kbl, kbl, "Intel(R) Kabylake") CHIPSET(0x5912, kbl, kbl, "Intel(R) Kabylake") CHIPSET(0x5917, kbl, kbl, "Intel(R) Kabylake") CHIPSET(0x5902, kbl, kbl, "Intel(R) Kabylake") -CHIPSET(0x5932, kbl, kbl, "Intel(R) Kabylake") CHIPSET(0x591B, kbl, kbl, "Intel(R) Kabylake") -CHIPSET(0x592B, kbl, kbl, "Intel(R) Kabylake") CHIPSET(0x593B, kbl, kbl, "Intel(R) Kabylake") CHIPSET(0x590B, kbl, kbl, "Intel(R) Kabylake") CHIPSET(0x591A, kbl, kbl, "Intel(R) Kabylake") -CHIPSET(0x592A, kbl, kbl, "Intel(R) Kabylake") -CHIPSET(0x593A, kbl, kbl, "Intel(R) Kabylake") CHIPSET(0x590A, kbl, kbl, "Intel(R) Kabylake") CHIPSET(0x591D, kbl, kbl, "Intel(R) Kabylake") -CHIPSET(0x593D, kbl, kbl, "Intel(R) Kabylake") \ No newline at end of file +CHIPSET(0x5908, kbl, kbl, "Intel(R) Kabylake") +CHIPSET(0x5923, kbl, kbl, "Intel(R) Kabylake") +CHIPSET(0x5927, kbl, kbl, "Intel(R) Kabylake") -- cgit v1.2.1 From 2b03996d479606e2d2284fb259dd66aa42ce6a21 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Mon, 27 Jun 2016 12:08:26 +0800 Subject: Downgrade the alignment requirement for linear surface on BDW+ When sharing the YUY2/UYVY buffer with other driver, the current alignment is too strict, which causes that it is not handled correctly by other driver.(The current alignment is considered based on I420/YV12) https://bugs.freedesktop.org/show_bug.cgi?id=96689 Tested-by: Cheah, Vincent Beng Keat Signed-off-by: Zhao Yakui (cherry picked from commit bcde10dac40cbc4c8502fa519404c9379372184b) --- src/i965_device_info.c | 10 +++++----- src/i965_drv_video.c | 4 ++++ 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/i965_device_info.c b/src/i965_device_info.c index 239961cd..47fd50a9 100644 --- a/src/i965_device_info.c +++ b/src/i965_device_info.c @@ -239,7 +239,7 @@ static struct hw_codec_info bdw_hw_codec_info = { .max_width = 4096, .max_height = 4096, - .min_linear_wpitch = 128, + .min_linear_wpitch = 64, .min_linear_hpitch = 16, .h264_mvc_dec_profiles = (VA_PROFILE_MASK(H264StereoHigh) | @@ -282,7 +282,7 @@ static struct hw_codec_info chv_hw_codec_info = { .max_width = 4096, .max_height = 4096, - .min_linear_wpitch = 128, + .min_linear_wpitch = 64, .min_linear_hpitch = 16, .h264_mvc_dec_profiles = (VA_PROFILE_MASK(H264StereoHigh) | @@ -332,7 +332,7 @@ static struct hw_codec_info skl_hw_codec_info = { .max_width = 4096, /* default. See max_resolution */ .max_height = 4096, /* default. See max_resolution */ - .min_linear_wpitch = 128, + .min_linear_wpitch = 64, .min_linear_hpitch = 16, .h264_mvc_dec_profiles = (VA_PROFILE_MASK(H264StereoHigh) | @@ -384,7 +384,7 @@ static struct hw_codec_info bxt_hw_codec_info = { .max_width = 4096, /* default. See max_resolution */ .max_height = 4096, /* default. See max_resolution */ - .min_linear_wpitch = 128, + .min_linear_wpitch = 64, .min_linear_hpitch = 16, .h264_mvc_dec_profiles = (VA_PROFILE_MASK(H264StereoHigh) | @@ -437,7 +437,7 @@ static struct hw_codec_info kbl_hw_codec_info = { .max_width = 4096, /* default. See max_resolution */ .max_height = 4096, /* default. See max_resolution */ - .min_linear_wpitch = 128, + .min_linear_wpitch = 64, .min_linear_hpitch = 16, .h264_mvc_dec_profiles = (VA_PROFILE_MASK(H264StereoHigh) | diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 66cdb9e5..ad48f22f 100644 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -4162,10 +4162,12 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, case VA_FOURCC_YV16: obj_surface->cb_cr_width = obj_surface->orig_width / 2; + obj_surface->width = ALIGN(obj_surface->cb_cr_width, i965->codec_info->min_linear_wpitch) * 2; obj_surface->cb_cr_height = obj_surface->orig_height; obj_surface->y_cr_offset = obj_surface->height; obj_surface->y_cb_offset = obj_surface->y_cr_offset + ALIGN(obj_surface->cb_cr_height, 32) / 2; obj_surface->cb_cr_pitch = obj_surface->width / 2; + region_width = obj_surface->width; region_height = obj_surface->height + ALIGN(obj_surface->cb_cr_height, 32); break; @@ -4180,8 +4182,10 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, } obj_surface->cb_cr_width = obj_surface->orig_width / 2; + obj_surface->width = ALIGN(obj_surface->cb_cr_width, i965->codec_info->min_linear_wpitch) * 2; obj_surface->cb_cr_height = obj_surface->orig_height / 2; obj_surface->cb_cr_pitch = obj_surface->width / 2; + region_width = obj_surface->width; region_height = obj_surface->height + obj_surface->height / 2; break; -- cgit v1.2.1 From 39dce4174c1b15f1bbc2213c44910c43504451d4 Mon Sep 17 00:00:00 2001 From: Scott D Phillips Date: Tue, 19 Jul 2016 10:04:54 -0700 Subject: i965_drv: fix cb_cr_height for YUV422 formats YUV422 has full vertical chroma resolution, not half. Signed-off-by: Scott D Phillips (cherry picked from commit a77556ca48f977b89eab9d3f9b965d1a78e6bacb) --- src/i965_drv_video.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index ad48f22f..ec678481 100644 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -4118,7 +4118,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, obj_surface->y_cb_offset = 0; obj_surface->y_cr_offset = 0; obj_surface->cb_cr_width = obj_surface->orig_width / 2; - obj_surface->cb_cr_height = obj_surface->orig_height / 2; + obj_surface->cb_cr_height = obj_surface->orig_height; region_width = obj_surface->width; region_height = obj_surface->height; -- cgit v1.2.1 From 0f670d8d1195d93693fc1d4cfc41f76064f32976 Mon Sep 17 00:00:00 2001 From: "U. Artie Eoff" Date: Wed, 20 Jul 2016 09:33:18 -0700 Subject: shaders/gen9: fix build when no intel-gen4asm available If intel-gen4asm version is < 1.9 or not installed then we shouldn't run the associated make rules. This fixes 'make dist' failure. Signed-off-by: U. Artie Eoff Reviewed-by: Zhao Yakui (cherry picked from commit 09d29db2a1dd6880297c49a73fc8be4b872f1eb9) --- src/shaders/post_processing/gen9/Makefile.am | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/shaders/post_processing/gen9/Makefile.am b/src/shaders/post_processing/gen9/Makefile.am index 1e32dffd..3e0c2cec 100644 --- a/src/shaders/post_processing/gen9/Makefile.am +++ b/src/shaders/post_processing/gen9/Makefile.am @@ -56,6 +56,7 @@ all-local: $(TARGETS) SUFFIXES = .g9b .g9s .asm +if HAVE_GEN4ASM $(INTEL_PP_GEN9_ASM): $(INTEL_PP_ASM) $(INTEL_PP_G9A) .asm.g9s: $(AM_V_GEN)cpp $< > _pp0.$@; \ @@ -63,6 +64,7 @@ $(INTEL_PP_GEN9_ASM): $(INTEL_PP_ASM) $(INTEL_PP_G9A) rm _pp0.$@ .g9s.g9b: $(AM_V_GEN)$(GEN4ASM) -a -o $@ -g 9 $< +endif CLEANFILES = $(INTEL_PP_GEN9_ASM) -- cgit v1.2.1 From 74fd17a4b8ee862bb348c306a7fa8f3715cac7f0 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Tue, 19 Jul 2016 15:33:46 +0800 Subject: Export the P010 surface attribute for HEVC/VP9 10-bits decoding Signed-off-by: Zhao Yakui (cherry picked from commit 62c3a0d75434e76c228247786817336430862b29) --- src/i965_drv_video.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index ec678481..657edf34 100644 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -5798,6 +5798,15 @@ i965_QuerySurfaceAttributes(VADriverContextP ctx, attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; attribs[i].value.value.i = VA_FOURCC_NV12; i++; + + if ((obj_config->profile == VAProfileHEVCMain10) || + (obj_config->profile == VAProfileVP9Profile2)) { + attribs[i].type = VASurfaceAttribPixelFormat; + attribs[i].value.type = VAGenericValueTypeInteger; + attribs[i].flags = VA_SURFACE_ATTRIB_GETTABLE | VA_SURFACE_ATTRIB_SETTABLE; + attribs[i].value.value.i = VA_FOURCC_P010; + i++; + } } } else if (obj_config->entrypoint == VAEntrypointEncSlice || /* encode */ obj_config->entrypoint == VAEntrypointVideoProc || -- cgit v1.2.1 From 974f4a29b843c48128ec4c2e597620f5464ed51e Mon Sep 17 00:00:00 2001 From: Sreerenj Balachandran Date: Fri, 15 Jul 2016 17:38:20 +0300 Subject: encode: h264, h265: Remove unnecessary warning The warning "Input ref list is Wrong" is generating based on assumption that reference frames provided in VAEncPictureParameterBuffer are in align with ref_pic_list included in VAEncSliceParameterBuffer. There shoudn't be such constraints, as per VA specification pic_param->reference_frames can have any order based on dpb manipulation. Signed-off-by: Sreerenj Balachandran (cherry picked from commit b21b187c8146f7840bf176f94a3ad62a4c922add) --- src/gen6_mfc_common.c | 3 --- src/gen9_mfc_hevc.c | 4 ---- 2 files changed, 7 deletions(-) diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index f961ecdc..2f9f7614 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -1176,9 +1176,6 @@ intel_mfc_avc_ref_idx_state(VADriverContextP ctx, fref_entry &= ~(0xFF << ref_idx_l0_shift); fref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index) << ref_idx_l0_shift); } - if(frame_index == 1){ - WARN_ONCE("Input ref list is Wrong !\n"); - } } if (slice_type == SLICE_TYPE_B) { diff --git a/src/gen9_mfc_hevc.c b/src/gen9_mfc_hevc.c index ad5e9360..7435d2a0 100644 --- a/src/gen9_mfc_hevc.c +++ b/src/gen9_mfc_hevc.c @@ -714,10 +714,6 @@ gen9_hcpe_ref_idx_state_1(struct intel_batchbuffer *batch, WARN_ONCE("RefPicList 0 or 1 is not found in DPB!\n"); } - if(num_ref_minus1 == 0 && frame_index == 1 && list == 0){ - WARN_ONCE("Input ref list is Wrong !\n"); - } - BEGIN_BCS_BATCH(batch, 18); OUT_BCS_BATCH(batch, HCP_REF_IDX_STATE | (18 - 2)); -- cgit v1.2.1 From 47d8f231e955a26929feafdc1d6e92122cc49a01 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Wed, 13 Jul 2016 16:41:31 +0800 Subject: Fix the condition used in 'else if()' This 'else if()' is used to check output surface format, not input surface format. Tested-by: Xu, Guangxin Signed-off-by: Xiang, Haihao Reviewed-by: Sean V Kelley (cherry picked from commit e5544460dcb4b325df42e343953e6d5b57f8f544) --- src/gen75_vpp_vebox.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gen75_vpp_vebox.c b/src/gen75_vpp_vebox.c index e124604f..9773c246 100644 --- a/src/gen75_vpp_vebox.c +++ b/src/gen75_vpp_vebox.c @@ -1450,8 +1450,8 @@ int hsw_veb_pre_format_convert(VADriverContextP ctx, proc_ctx->format_convert_flags |= POST_FORMAT_CONVERT; } else if(obj_surf_output->fourcc == VA_FOURCC_AYUV || obj_surf_output->fourcc == VA_FOURCC_YUY2 || - obj_surf_input->fourcc == VA_FOURCC_NV12 || - obj_surf_input->fourcc == VA_FOURCC_P010){ + obj_surf_output->fourcc == VA_FOURCC_NV12 || + obj_surf_output->fourcc == VA_FOURCC_P010) { /* Nothing to do here */ } else { -- cgit v1.2.1 From 04e73fd9351b625e06a5d166be0d1a6921c06768 Mon Sep 17 00:00:00 2001 From: "Ung, Teng En" Date: Wed, 22 Jun 2016 10:49:22 +0800 Subject: Fix to use source and output regions size instead of the input output surfaces original size. This fixes https://bugs.freedesktop.org/show_bug.cgi?id=96739 Signed-off-by: Ung, Teng En Reviewed-by: Zhao Yakui (cherry picked from commit 51ad826fcc0d2512f7ef74e807e4b8526663fc28) --- src/gen75_vpp_vebox.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/gen75_vpp_vebox.c b/src/gen75_vpp_vebox.c index 9773c246..566f3375 100644 --- a/src/gen75_vpp_vebox.c +++ b/src/gen75_vpp_vebox.c @@ -1379,11 +1379,11 @@ int hsw_veb_pre_format_convert(VADriverContextP ctx, proc_ctx->format_convert_flags = 0; - proc_ctx->width_input = obj_surf_input->orig_width; - proc_ctx->height_input = obj_surf_input->orig_height; - proc_ctx->width_output = obj_surf_output->orig_width; - proc_ctx->height_output = obj_surf_output->orig_height; - + proc_ctx->width_input = proc_ctx->pipeline_param->surface_region->width; + proc_ctx->height_input = proc_ctx->pipeline_param->surface_region->height; + proc_ctx->width_output = proc_ctx->pipeline_param->output_region->width; + proc_ctx->height_output = proc_ctx->pipeline_param->output_region->height; + /* only partial frame is not supported to be processed */ /* assert(proc_ctx->width_input == proc_ctx->pipeline_param->surface_region->width); -- cgit v1.2.1 From 396bf01a6181401e14d8d60f34774109239c8fd1 Mon Sep 17 00:00:00 2001 From: Scott D Phillips Date: Mon, 25 Jul 2016 13:19:28 -0700 Subject: dri: return error for unimplemented surface formats Previously packed YUV422 surface were allowed to be renderd but got rendered improperly. Signed-off-by: Scott D Phillips (cherry picked from commit c7c69eb953822bbaf2075f9bea6f52d41212f5d3) --- src/i965_output_dri.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/i965_output_dri.c b/src/i965_output_dri.c index d36fec5b..ae3bcf6d 100644 --- a/src/i965_output_dri.c +++ b/src/i965_output_dri.c @@ -135,6 +135,9 @@ i965_put_surface_dri( */ obj_surface = SURFACE(surface); ASSERT_RET(obj_surface && obj_surface->bo, VA_STATUS_SUCCESS); + ASSERT_RET(obj_surface->fourcc != VA_FOURCC_YUY2 && + obj_surface->fourcc != VA_FOURCC_UYVY, + VA_STATUS_ERROR_UNIMPLEMENTED); _i965LockMutex(&i965->render_mutex); -- cgit v1.2.1 From 4577ded31179da8c9bc0ee66c00e8075fe17efe8 Mon Sep 17 00:00:00 2001 From: Lim Siew Hoon Date: Tue, 26 Jul 2016 16:54:37 +0800 Subject: Missing 'do' in "do ...while" in macro ALLOC_VDENC_BUFFER_RESOURCE Signed-off-by: Lim Siew Hoon Reviewed-by: Zhao Yakui (cherry picked from commit 9fc4b6675c42df9002279fbc85985dcdd7510525) --- src/gen9_vdenc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gen9_vdenc.c b/src/gen9_vdenc.c index 75268bbf..e19def13 100644 --- a/src/gen9_vdenc.c +++ b/src/gen9_vdenc.c @@ -778,7 +778,7 @@ const int vdenc_hme_cost[8][52] = { OUT_BCS_BATCH(batch, attr); \ } while (0) -#define ALLOC_VDENC_BUFFER_RESOURCE(buffer, bfsize, des) { \ +#define ALLOC_VDENC_BUFFER_RESOURCE(buffer, bfsize, des) do { \ buffer.type = I965_GPE_RESOURCE_BUFFER; \ buffer.width = bfsize; \ buffer.height = 1; \ -- cgit v1.2.1 From 3f6a5f6f6c43ea6278cc331442548d6b0633aeee Mon Sep 17 00:00:00 2001 From: Daniel Charles Date: Thu, 28 Jul 2016 17:11:54 -0700 Subject: i965_encoder: return a failing status VAStatus when calling vme_pipeline was silently failing Signed-off-by: Daniel Charles Reviewed-by: Sean V Kelley (cherry picked from commit 4307b27248d67077036b64614515bf7f84273676) --- src/i965_encoder.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/i965_encoder.c b/src/i965_encoder.c index 1088f088..361aa93b 100644 --- a/src/i965_encoder.c +++ b/src/i965_encoder.c @@ -721,10 +721,12 @@ intel_encoder_end_picture(VADriverContextP ctx, if((encoder_context->vme_context && encoder_context->vme_pipeline)) { vaStatus = encoder_context->vme_pipeline(ctx, profile, encode_state, encoder_context); + if (vaStatus != VA_STATUS_SUCCESS) + return vaStatus; } - if (vaStatus == VA_STATUS_SUCCESS) - encoder_context->mfc_pipeline(ctx, profile, encode_state, encoder_context); + encoder_context->mfc_pipeline(ctx, profile, encode_state, encoder_context); + return VA_STATUS_SUCCESS; } -- cgit v1.2.1 From 1d0b881400d80e86bc4c6192da0dd28a4867ca11 Mon Sep 17 00:00:00 2001 From: XuGuangxin Date: Mon, 25 Jul 2016 16:53:07 +0800 Subject: Encode: Clear right and bottom border of NV12 surface to avoid run2run issue This fixes some issues mentioned in https://bugs.freedesktop.org/show_bug.cgi?id=96703 Signed-off-by: Xu Guangxin Reviewed-by: Sean V Kelley Tested-by: Mingruo Sun (cherry picked from commit a478779c8c6b296c2069ab0fbc6f71c2e55c6a85) --- src/i965_drv_video.c | 1 + src/i965_drv_video.h | 3 +++ src/i965_encoder.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 58 insertions(+), 2 deletions(-) diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 657edf34..98395840 100644 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -1576,6 +1576,7 @@ i965_CreateSurfaces2( obj_surface->user_disable_tiling = false; obj_surface->user_h_stride_set = false; obj_surface->user_v_stride_set = false; + obj_surface->border_cleared = false; obj_surface->subpic_render_idx = 0; for(j = 0; j < I965_MAX_SUBPIC_SUM; j++){ diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index 47e27d0f..fea75eeb 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -295,6 +295,9 @@ struct object_surface uint32_t user_disable_tiling : 1; uint32_t user_h_stride_set : 1; uint32_t user_v_stride_set : 1; + /* we need clear right and bottom border for NV12. + * to avoid encode run to run issue*/ + uint32_t border_cleared : 1; VAGenericID wrapper_surface; diff --git a/src/i965_encoder.c b/src/i965_encoder.c index 361aa93b..c83cc7df 100644 --- a/src/i965_encoder.c +++ b/src/i965_encoder.c @@ -49,6 +49,58 @@ extern Bool gen6_vme_context_init(VADriverContextP ctx, struct intel_encoder_con extern Bool gen7_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context); extern Bool gen9_hcpe_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context); +static VAStatus +clear_border(struct object_surface *obj_surface) +{ + int width[3], height[3], hstride[3], vstride[3]; /* in byte */ + int planes; + unsigned char* p; + int i,j; + + if (obj_surface->border_cleared) + return VA_STATUS_SUCCESS; + + if (obj_surface->fourcc == VA_FOURCC_NV12) { + planes = 2; + width[0] = width[1] = obj_surface->orig_width; + height[0] = obj_surface->orig_height; + height[1] = obj_surface->orig_height / 2; + hstride[0] = hstride[1] = obj_surface->width; + vstride[0]= obj_surface->height; + vstride[1] = obj_surface->height / 2; + + } else { + /* todo add P010 */ + return VA_STATUS_SUCCESS; + } + drm_intel_gem_bo_map_gtt(obj_surface->bo); + + p = (unsigned char*)obj_surface->bo->virtual; + if (!p) + return VA_STATUS_ERROR_INVALID_SURFACE; + + for (i = 0; i < planes; i++) { + int w = width[i]; + int h = height[i]; + int hs = hstride[i]; + int vs = vstride[i]; + /* right */ + for (j = 0; j < h; j++) { + memset(p + w, 0, hs - w); + p += hs; + } + /* bottom */ + for (/* nothing */; j < vs; j++) { + memset(p, 0, hs); + p += hs; + } + + } + drm_intel_gem_bo_unmap_gtt(obj_surface->bo); + obj_surface->border_cleared = true; + return VA_STATUS_SUCCESS; +} + static VAStatus intel_encoder_check_yuv_surface(VADriverContextP ctx, VAProfile profile, @@ -82,7 +134,7 @@ intel_encoder_check_yuv_surface(VADriverContextP ctx, if (tiling == I915_TILING_Y) { encoder_context->input_yuv_surface = encode_state->current_render_target; encode_state->input_yuv_object = obj_surface; - return VA_STATUS_SUCCESS; + return clear_border(obj_surface); } } @@ -124,7 +176,7 @@ intel_encoder_check_yuv_surface(VADriverContextP ctx, encoder_context->is_tmp_id = 1; - return VA_STATUS_SUCCESS; + return clear_border(obj_surface); } -- cgit v1.2.1 From 57f7b2d686beebe10786472e6254aa377178518f Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 25 Jul 2016 16:53:08 +0800 Subject: Encode: Set cost for MODE_CHROMA_INTRA/MODE_REFID_COST This fixes the remaining issues mentioned in https://bugs.freedesktop.org/show_bug.cgi?id=96703 after applying commit 3699c14 On GEN75+, driver copies vme_context->vme_state_message to VME kernel curbe buffer and VME kernel uses the data in curbe buffer to initialize VME message payload. vme_context->vme_state_message is set up in intel_vme_update_mbmv_cost(), which doesn't set all costs for used modes in VME kernels. The uninitialized mode cost will result in difference in VME output. Thanks for Elaine's finding that the issue disappears after initializing VME state message buffer with zeros. Signed-off-by: Elaine Wang Signed-off-by: Xiang, Haihao Reviewed-by: Sean V Kelley Tested-by: Mingruo Sun (cherry picked from commit 1cd67951974c944b060e2d5d88a76abb34c0c99b) --- src/gen6_mfc_common.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index 2f9f7614..c3e4d80e 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -790,6 +790,11 @@ void intel_vme_update_mbmv_cost(VADriverContextP ctx, assert(qp <= QP_MAX); lambda = intel_lambda_qp(qp); + + m_cost = lambda; + vme_state_message[MODE_CHROMA_INTRA] = intel_format_lutvalue(m_cost, 0x8f); + vme_state_message[MODE_REFID_COST] = intel_format_lutvalue(m_cost, 0x8f); + if (slice_type == SLICE_TYPE_I) { vme_state_message[MODE_INTRA_16X16] = 0; m_cost = lambda * 4; -- cgit v1.2.1 From dad1d3d9e6788cbcacf214c157cbb5dd7c931e4c Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Wed, 3 Aug 2016 16:43:31 +0800 Subject: Set cost for modes used for VP8 encoding This is similar to what commit 1cd6795 does Signed-off-by: Xiang, Haihao Tested-by: Wang, Fei W Reviewed-by: Sean V Kelley (cherry picked from commit e32ac14feacff2d1b2a082ad54c0a91e9a8735a5) --- src/gen6_mfc_common.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index c3e4d80e..59f7785f 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -906,10 +906,16 @@ void intel_vme_vp8_update_mbmv_cost(VADriverContextP ctx, qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; lambda = intel_lambda_qp(qp * QP_MAX / VP8_QP_MAX); + + m_cost = lambda; + vme_state_message[MODE_CHROMA_INTRA] = intel_format_lutvalue(m_cost, 0x8f); + if (is_key_frame) { vme_state_message[MODE_INTRA_16X16] = 0; m_cost = lambda * 16; vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f); + m_cost = lambda * 3; + vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f); } else { m_cost = 0; vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f); @@ -934,6 +940,7 @@ void intel_vme_vp8_update_mbmv_cost(VADriverContextP ctx, vme_state_message[MODE_INTER_16X8] = 0x4a; vme_state_message[MODE_INTER_8X8] = 0x4a; vme_state_message[MODE_INTER_4X4] = 0x4a; + vme_state_message[MODE_INTER_BWD] = 0; return; } m_costf = lambda * 10; @@ -941,6 +948,10 @@ void intel_vme_vp8_update_mbmv_cost(VADriverContextP ctx, m_cost = lambda * 24; vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f); + m_costf = lambda * 3.5; + m_cost = m_costf; + vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f); + m_costf = lambda * 2.5; m_cost = m_costf; vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f); -- cgit v1.2.1 From cdd057735c36b37769d0452e0ff8564a06c635ef Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Fri, 5 Aug 2016 13:25:58 +0800 Subject: decoder/h264: don't assert on invalid parameter Remove redundant checking on input parameters as well. This avoids assertion failure in https://bugs.freedesktop.org/show_bug.cgi?id=94007, but the upper library should check why are the invalid paramters passed to libva. Signed-off-by: Xiang, Haihao Reviewed-by: Sean V Kelley (cherry picked from commit fb7d6f56ad3000734390279a55b5c7f54ed9e3a3) --- src/i965_decoder_utils.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c index df0abe23..640fd145 100644 --- a/src/i965_decoder_utils.c +++ b/src/i965_decoder_utils.c @@ -953,17 +953,9 @@ intel_decoder_check_avc_parameter(VADriverContextP ctx, VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param; int j; - assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID)); - assert(pic_param->CurrPic.picture_id != VA_INVALID_SURFACE); - - if (pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID || - pic_param->CurrPic.picture_id == VA_INVALID_SURFACE) - goto error; - - assert(pic_param->CurrPic.picture_id == decode_state->current_render_target); - - if (pic_param->CurrPic.picture_id != decode_state->current_render_target) - goto error; + ASSERT_RET(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID), VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET((pic_param->CurrPic.picture_id != VA_INVALID_SURFACE), VA_STATUS_ERROR_INVALID_PARAMETER); + ASSERT_RET((pic_param->CurrPic.picture_id == decode_state->current_render_target), VA_STATUS_ERROR_INVALID_PARAMETER); if ((h264_profile != VAProfileH264Baseline)) { if (pic_param->num_slice_groups_minus1 || @@ -1002,7 +994,7 @@ intel_decoder_check_avc_parameter(VADriverContextP ctx, } for (j = 0; j < decode_state->num_slice_params; j++) { - assert(decode_state->slice_params && decode_state->slice_params[j]->buffer); + ASSERT_RET((decode_state->slice_params && decode_state->slice_params[j]->buffer), VA_STATUS_ERROR_INVALID_PARAMETER); slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer; if (j == decode_state->num_slice_params - 1) -- cgit v1.2.1 From 8d8120f9baf170618c5203019a58e53e6736be94 Mon Sep 17 00:00:00 2001 From: Hyunjun Ko Date: Wed, 10 Aug 2016 12:24:23 +0900 Subject: gen8_mfc: fix memory leak during vp8 encoding This fixes https://bugs.freedesktop.org/show_bug.cgi?id=97272 Signed-off-by: Hyunjun Ko (cherry picked from commit 860192924ee81f443c06312dc0fc023822c3f05c) --- src/gen8_mfc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index 1f8e57b3..2f0e7aea 100644 --- a/src/gen8_mfc.c +++ b/src/gen8_mfc.c @@ -3585,6 +3585,7 @@ static void vp8_enc_frame_header_binarize(struct encode_state *encode_state, frame_header_buffer = (unsigned char *)mfc_context->vp8_state.frame_header_bo->virtual; assert(frame_header_buffer); memcpy(frame_header_buffer, mfc_context->vp8_state.vp8_frame_header, (mfc_context->vp8_state.frame_header_bit_count + 7) / 8); + free(mfc_context->vp8_state.vp8_frame_header); dri_bo_unmap(mfc_context->vp8_state.frame_header_bo); } -- cgit v1.2.1 From 47d10f1d7364f6915a40a4b0d763f1cc57b2cec1 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Wed, 10 Aug 2016 12:30:12 +0800 Subject: VPP: Check the VPP pipeline_parameter to avoid NULL pointer This is to fix the crash issue caused by the commit 51ad826fcc0d2512f7ef74e807e4b8526663fc28. Reported-by: Xu,Guangxin Signed-off-by: Zhao Yakui (cherry picked from commit 4f8d4b211b4f90ef26c356b8028c5435cd685952) --- src/gen75_vpp_vebox.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/src/gen75_vpp_vebox.c b/src/gen75_vpp_vebox.c index 566f3375..2bc5cf97 100644 --- a/src/gen75_vpp_vebox.c +++ b/src/gen75_vpp_vebox.c @@ -1379,10 +1379,29 @@ int hsw_veb_pre_format_convert(VADriverContextP ctx, proc_ctx->format_convert_flags = 0; - proc_ctx->width_input = proc_ctx->pipeline_param->surface_region->width; - proc_ctx->height_input = proc_ctx->pipeline_param->surface_region->height; - proc_ctx->width_output = proc_ctx->pipeline_param->output_region->width; - proc_ctx->height_output = proc_ctx->pipeline_param->output_region->height; + if ((obj_surf_input == NULL) && + (proc_ctx->pipeline_param->surface_region == NULL)) + assert(0); + + if ((obj_surf_output == NULL) && + (proc_ctx->pipeline_param->output_region == NULL)) + assert(0); + + if (proc_ctx->pipeline_param->surface_region) { + proc_ctx->width_input = proc_ctx->pipeline_param->surface_region->width; + proc_ctx->height_input = proc_ctx->pipeline_param->surface_region->height; + } else { + proc_ctx->width_input = obj_surf_input->orig_width; + proc_ctx->height_input = obj_surf_input->orig_height; + } + + if (proc_ctx->pipeline_param->output_region) { + proc_ctx->width_output = proc_ctx->pipeline_param->output_region->width; + proc_ctx->height_output = proc_ctx->pipeline_param->output_region->height; + } else { + proc_ctx->width_output = obj_surf_output->orig_width; + proc_ctx->height_output = obj_surf_output->orig_height; + } /* only partial frame is not supported to be processed */ /* -- cgit v1.2.1 From 28c3d41e81579b7c0bcfa471c687f3637e07a862 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Wed, 10 Aug 2016 12:30:13 +0800 Subject: VPP: Check the returned status of hsw_veb_pre_format_convert before VEBOX VPP V1->V2: Use the ASSERT_RET for the debug purpose Signed-off-by: Zhao Yakui (cherry picked from commit 5a30370b22f0937f64d5935621e66561adc48ae0) --- src/gen75_vpp_vebox.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/src/gen75_vpp_vebox.c b/src/gen75_vpp_vebox.c index 2bc5cf97..91715fc5 100644 --- a/src/gen75_vpp_vebox.c +++ b/src/gen75_vpp_vebox.c @@ -1367,7 +1367,7 @@ gen75_vebox_ensure_surfaces(VADriverContextP ctx, return VA_STATUS_SUCCESS; } -int hsw_veb_pre_format_convert(VADriverContextP ctx, +VAStatus hsw_veb_pre_format_convert(VADriverContextP ctx, struct intel_vebox_context *proc_ctx) { VAStatus va_status; @@ -1381,11 +1381,11 @@ int hsw_veb_pre_format_convert(VADriverContextP ctx, if ((obj_surf_input == NULL) && (proc_ctx->pipeline_param->surface_region == NULL)) - assert(0); + ASSERT_RET(0, VA_STATUS_ERROR_INVALID_PARAMETER); if ((obj_surf_output == NULL) && (proc_ctx->pipeline_param->output_region == NULL)) - assert(0); + ASSERT_RET(0, VA_STATUS_ERROR_INVALID_PARAMETER); if (proc_ctx->pipeline_param->surface_region) { proc_ctx->width_input = proc_ctx->pipeline_param->surface_region->width; @@ -1434,7 +1434,7 @@ int hsw_veb_pre_format_convert(VADriverContextP ctx, // nothing to do here } else { /* not support other format as input */ - assert(0); + ASSERT_RET(0, VA_STATUS_ERROR_UNIMPLEMENTED); } if (proc_ctx->format_convert_flags & PRE_FORMAT_CONVERT) { @@ -1474,8 +1474,8 @@ int hsw_veb_pre_format_convert(VADriverContextP ctx, /* Nothing to do here */ } else { - /* not support other format as input */ - assert(0); + /* not support other format as input */ + ASSERT_RET(0, VA_STATUS_ERROR_UNIMPLEMENTED); } if(proc_ctx->format_convert_flags & POST_FORMAT_CONVERT || @@ -1517,7 +1517,7 @@ int hsw_veb_pre_format_convert(VADriverContextP ctx, } } - return 0; + return VA_STATUS_SUCCESS; } VAStatus @@ -1701,7 +1701,9 @@ gen75_vebox_process_picture(VADriverContextP ctx, if (status != VA_STATUS_SUCCESS) return status; - hsw_veb_pre_format_convert(ctx, proc_ctx); + status = hsw_veb_pre_format_convert(ctx, proc_ctx); + if (status != VA_STATUS_SUCCESS) + return status; status = gen75_vebox_ensure_surfaces(ctx, proc_ctx); if (status != VA_STATUS_SUCCESS) @@ -1931,7 +1933,9 @@ gen8_vebox_process_picture(VADriverContextP ctx, if (status != VA_STATUS_SUCCESS) return status; - hsw_veb_pre_format_convert(ctx, proc_ctx); + status = hsw_veb_pre_format_convert(ctx, proc_ctx); + if (status != VA_STATUS_SUCCESS) + return status; status = gen75_vebox_ensure_surfaces(ctx, proc_ctx); if (status != VA_STATUS_SUCCESS) @@ -2400,7 +2404,9 @@ gen9_vebox_process_picture(VADriverContextP ctx, if (status != VA_STATUS_SUCCESS) return status; - hsw_veb_pre_format_convert(ctx, proc_ctx); + status = hsw_veb_pre_format_convert(ctx, proc_ctx); + if (status != VA_STATUS_SUCCESS) + return status; status = gen75_vebox_ensure_surfaces(ctx, proc_ctx); if (status != VA_STATUS_SUCCESS) -- cgit v1.2.1 From 890c274ebdd3f6e4966c4defa4054d93df766402 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Sun, 1 Jan 2012 01:24:19 +0000 Subject: Encoding: mbmv cost table related changes for ROI v3:remove the warning according to haihao's comments v2: merge three mbmv cost table related patches together. Encoding:Abstract the calculation of mbmv cost for qp as one function. Encoding:Add one function that initialize mbmv cost table for supported Qp range. Encoding:Setup one cost_table surface state for VME shader According haihao's comments, free pointer directly. v1: format/style aligment accordingly to avoid the warning. Currently the length of VME MEDIA_OBJECT command on Ivy can't exceed 8 dwords. If more parameter needs to be passed, the buffer length should be enlarged. Pass the Qp parameter into VME shader Signed-off-by: Zhao Yakui Signed-off-by: pjl Signed-off-by: Pengfei Qu (cherry picked from commit 6e5f956fb4a291b38c3613c32ee20b3a40a831eb) --- src/gen6_mfc_common.c | 160 ++++++++++++++++++++++++++++++++++++++++++-------- src/gen6_vme.h | 19 ++++++ src/gen75_vme.c | 35 +++++++++-- src/gen7_vme.c | 24 ++++++-- src/gen8_vme.c | 21 +++++-- 5 files changed, 219 insertions(+), 40 deletions(-) diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index 59f7785f..233c2c6a 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -764,30 +764,14 @@ static float intel_lambda_qp(int qp) return lambdaf; } - -void intel_vme_update_mbmv_cost(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) +static +void intel_h264_calc_mbmvcost_qp(int qp, + int slice_type, + uint8_t *vme_state_message) { - struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; - struct gen6_vme_context *vme_context = encoder_context->vme_context; - VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; - VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; - int qp, m_cost, j, mv_count; - uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message); + int m_cost, j, mv_count; float lambda, m_costf; - int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); - - - if (encoder_context->rate_control_mode == VA_RC_CQP) - qp = pic_param->pic_init_qp + slice_param->slice_qp_delta; - else - qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; - - if (vme_state_message == NULL) - return; - assert(qp <= QP_MAX); lambda = intel_lambda_qp(qp); @@ -880,6 +864,31 @@ void intel_vme_update_mbmv_cost(VADriverContextP ctx, vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f); } } + return; +} + +void intel_vme_update_mbmv_cost(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + struct gen6_vme_context *vme_context = encoder_context->vme_context; + VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; + VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; + int qp; + uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message); + + int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); + + if (encoder_context->rate_control_mode == VA_RC_CQP) + qp = pic_param->pic_init_qp + slice_param->slice_qp_delta; + else + qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; + + if (vme_state_message == NULL) + return; + + intel_h264_calc_mbmvcost_qp(qp, slice_type, vme_state_message); } void intel_vme_vp8_update_mbmv_cost(VADriverContextP ctx, @@ -1023,6 +1032,16 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, int mb_row; int s; unsigned int *command_ptr; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; + VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; + int qp; + int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); + + if (encoder_context->rate_control_mode == VA_RC_CQP) + qp = pic_param->pic_init_qp + slice_param->slice_qp_delta; + else + qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; #define USE_SCOREBOARD (1 << 21) @@ -1062,7 +1081,7 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, } } - *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); + *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2)); *command_ptr++ = kernel; *command_ptr++ = USE_SCOREBOARD; /* Indirect data */ @@ -1073,6 +1092,8 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, /*inline data */ *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner); *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); + /* QP occupies one byte */ + *command_ptr++ = qp; x_inner -= 2; y_inner += 1; } @@ -1106,7 +1127,7 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, } } - *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); + *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2)); *command_ptr++ = kernel; *command_ptr++ = USE_SCOREBOARD; /* Indirect data */ @@ -1117,6 +1138,8 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, /*inline data */ *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner); *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); + /* qp occupies one byte */ + *command_ptr++ = qp; x_inner -= 2; y_inner += 1; @@ -1649,6 +1672,97 @@ void intel_avc_slice_insert_packed_data(VADriverContextP ctx, return; } +void +intel_h264_initialize_mbmv_cost(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct gen6_vme_context *vme_context = encoder_context->vme_context; + VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; + int qp; + dri_bo *bo; + uint8_t *cost_table; + + int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); + + + if (slice_type == SLICE_TYPE_I) { + if (vme_context->i_qp_cost_table) + return; + } else if (slice_type == SLICE_TYPE_P) { + if (vme_context->p_qp_cost_table) + return; + } else { + if (vme_context->b_qp_cost_table) + return; + } + + /* It is enough to allocate 32 bytes for each qp. */ + bo = dri_bo_alloc(i965->intel.bufmgr, + "cost_table ", + QP_MAX * 32, + 64); + + dri_bo_map(bo, 1); + assert(bo->virtual); + cost_table = (uint8_t *)(bo->virtual); + for (qp = 0; qp < QP_MAX; qp++) { + intel_h264_calc_mbmvcost_qp(qp, slice_type, cost_table); + cost_table += 32; + } + + dri_bo_unmap(bo); + + if (slice_type == SLICE_TYPE_I) { + vme_context->i_qp_cost_table = bo; + } else if (slice_type == SLICE_TYPE_P) { + vme_context->p_qp_cost_table = bo; + } else { + vme_context->b_qp_cost_table = bo; + } + + vme_context->cost_table_size = QP_MAX * 32; + return; +} + +extern void +intel_h264_setup_cost_surface(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + unsigned long binding_table_offset, + unsigned long surface_state_offset) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; + dri_bo *bo; + + + struct i965_buffer_surface cost_table; + + int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); + + + if (slice_type == SLICE_TYPE_I) { + bo = vme_context->i_qp_cost_table; + } else if (slice_type == SLICE_TYPE_P) { + bo = vme_context->p_qp_cost_table; + } else { + bo = vme_context->b_qp_cost_table; + } + + cost_table.bo = bo; + cost_table.num_blocks = QP_MAX; + cost_table.pitch = 16; + cost_table.size_block = 32; + + vme_context->vme_buffer_suface_setup(ctx, + &vme_context->gpe_context, + &cost_table, + binding_table_offset, + surface_state_offset); +} + /* HEVC */ static int hevc_temporal_find_surface(VAPictureHEVC *curr_pic, diff --git a/src/gen6_vme.h b/src/gen6_vme.h index dc568acf..50313399 100644 --- a/src/gen6_vme.h +++ b/src/gen6_vme.h @@ -46,6 +46,8 @@ #define GEN6_VME_KERNEL_NUMBER 3 +#define INTEL_COST_TABLE_OFFSET 8 + struct encode_state; struct intel_encoder_context; @@ -91,6 +93,11 @@ struct gen6_vme_context struct object_surface *used_reference_objects[2]; void *used_references[2]; unsigned int ref_index_in_mb[2]; + + dri_bo *i_qp_cost_table; + dri_bo *p_qp_cost_table; + dri_bo *b_qp_cost_table; + int cost_table_size; }; #define MPEG2_PIC_WIDTH_HEIGHT 30 @@ -200,4 +207,16 @@ void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx, extern Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context); extern Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context); + +extern void +intel_h264_initialize_mbmv_cost(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context); + +extern void +intel_h264_setup_cost_surface(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + unsigned long binding_table_offset, + unsigned long surface_state_offset); #endif /* _GEN6_VME_H_ */ diff --git a/src/gen75_vme.c b/src/gen75_vme.c index a85d6b3e..dcf170ec 100644 --- a/src/gen75_vme.c +++ b/src/gen75_vme.c @@ -280,6 +280,9 @@ gen75_vme_surface_setup(VADriverContextP ctx, /* VME output */ gen75_vme_output_buffer_setup(ctx, encode_state, 3, encoder_context); gen75_vme_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context); + intel_h264_setup_cost_surface(ctx, encode_state, encoder_context, + BINDING_TABLE_OFFSET(INTEL_COST_TABLE_OFFSET), + SURFACE_STATE_OFFSET(INTEL_COST_TABLE_OFFSET)); return VA_STATUS_SUCCESS; } @@ -488,6 +491,16 @@ gen75_vme_fill_vme_batchbuffer(VADriverContextP ctx, int mb_x = 0, mb_y = 0; int i, s; unsigned int *command_ptr; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; + VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; + int qp; + int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); + + if (encoder_context->rate_control_mode == VA_RC_CQP) + qp = pic_param->pic_init_qp + slice_param->slice_qp_delta; + else + qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; dri_bo_map(vme_context->vme_batchbuffer.bo, 1); command_ptr = vme_context->vme_batchbuffer.bo->virtual; @@ -525,7 +538,7 @@ gen75_vme_fill_vme_batchbuffer(VADriverContextP ctx, if ((i == mb_width) && slice_mb_x) { mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D); } - *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); + *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2)); *command_ptr++ = kernel; *command_ptr++ = 0; *command_ptr++ = 0; @@ -535,6 +548,8 @@ gen75_vme_fill_vme_batchbuffer(VADriverContextP ctx, /*inline data */ *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x); *command_ptr++ = ((encoder_context->quality_level << 24) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); + /* qp occupies one byte */ + *command_ptr++ = qp; i += 1; } @@ -647,7 +662,8 @@ static VAStatus gen75_vme_prepare(VADriverContextP ctx, } intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context); - + intel_h264_initialize_mbmv_cost(ctx, encode_state, encoder_context); + /*Setup all the memory object*/ gen75_vme_surface_setup(ctx, encode_state, is_intra, encoder_context); gen75_vme_interface_setup(ctx, encode_state, encoder_context); @@ -1002,10 +1018,17 @@ gen75_vme_context_destroy(void *context) dri_bo_unreference(vme_context->vme_batchbuffer.bo); vme_context->vme_batchbuffer.bo = NULL; - if (vme_context->vme_state_message) { - free(vme_context->vme_state_message); - vme_context->vme_state_message = NULL; - } + free(vme_context->vme_state_message); + vme_context->vme_state_message = NULL; + + dri_bo_unreference(vme_context->i_qp_cost_table); + vme_context->i_qp_cost_table = NULL; + + dri_bo_unreference(vme_context->p_qp_cost_table); + vme_context->p_qp_cost_table = NULL; + + dri_bo_unreference(vme_context->b_qp_cost_table); + vme_context->b_qp_cost_table = NULL; free(vme_context); } diff --git a/src/gen7_vme.c b/src/gen7_vme.c index 9da44d1e..fb6358f3 100644 --- a/src/gen7_vme.c +++ b/src/gen7_vme.c @@ -227,7 +227,7 @@ gen7_vme_output_vme_batchbuffer_setup(VADriverContextP ctx, int height_in_mbs = pSequenceParameter->picture_height_in_mbs; vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1; - vme_context->vme_batchbuffer.size_block = 32; /* 2 OWORDs */ + vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */ vme_context->vme_batchbuffer.pitch = 16; vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr, "VME batchbuffer", @@ -270,6 +270,9 @@ gen7_vme_surface_setup(VADriverContextP ctx, /* VME output */ gen7_vme_output_buffer_setup(ctx, encode_state, 3, encoder_context); gen7_vme_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context); + intel_h264_setup_cost_surface(ctx, encode_state, encoder_context, + BINDING_TABLE_OFFSET(INTEL_COST_TABLE_OFFSET), + SURFACE_STATE_OFFSET(INTEL_COST_TABLE_OFFSET)); return VA_STATUS_SUCCESS; } @@ -669,8 +672,10 @@ static VAStatus gen7_vme_prepare(VADriverContextP ctx, (vme_context->h264_level != pSequenceParameter->level_idc)) { vme_context->h264_level = pSequenceParameter->level_idc; } - + intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context); + intel_h264_initialize_mbmv_cost(ctx, encode_state, encoder_context); + /*Setup all the memory object*/ gen7_vme_surface_setup(ctx, encode_state, is_intra, encoder_context); gen7_vme_interface_setup(ctx, encode_state, encoder_context); @@ -1018,10 +1023,17 @@ gen7_vme_context_destroy(void *context) dri_bo_unreference(vme_context->vme_batchbuffer.bo); vme_context->vme_batchbuffer.bo = NULL; - if (vme_context->vme_state_message) { - free(vme_context->vme_state_message); - vme_context->vme_state_message = NULL; - } + free(vme_context->vme_state_message); + vme_context->vme_state_message = NULL; + + dri_bo_unreference(vme_context->i_qp_cost_table); + vme_context->i_qp_cost_table = NULL; + + dri_bo_unreference(vme_context->p_qp_cost_table); + vme_context->p_qp_cost_table = NULL; + + dri_bo_unreference(vme_context->b_qp_cost_table); + vme_context->b_qp_cost_table = NULL; free(vme_context); } diff --git a/src/gen8_vme.c b/src/gen8_vme.c index edf6060c..998f7d65 100644 --- a/src/gen8_vme.c +++ b/src/gen8_vme.c @@ -314,6 +314,9 @@ gen8_vme_surface_setup(VADriverContextP ctx, /* VME output */ gen8_vme_avc_output_buffer_setup(ctx, encode_state, 3, encoder_context); gen8_vme_avc_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context); + intel_h264_setup_cost_surface(ctx, encode_state, encoder_context, + BINDING_TABLE_OFFSET(INTEL_COST_TABLE_OFFSET), + SURFACE_STATE_OFFSET(INTEL_COST_TABLE_OFFSET)); return VA_STATUS_SUCCESS; } @@ -721,7 +724,8 @@ static VAStatus gen8_vme_prepare(VADriverContextP ctx, } intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context); - + intel_h264_initialize_mbmv_cost(ctx, encode_state, encoder_context); + /*Setup all the memory object*/ gen8_vme_surface_setup(ctx, encode_state, is_intra, encoder_context); gen8_vme_interface_setup(ctx, encode_state, encoder_context); @@ -1287,10 +1291,17 @@ gen8_vme_context_destroy(void *context) dri_bo_unreference(vme_context->vme_batchbuffer.bo); vme_context->vme_batchbuffer.bo = NULL; - if (vme_context->vme_state_message) { - free(vme_context->vme_state_message); - vme_context->vme_state_message = NULL; - } + free(vme_context->vme_state_message); + vme_context->vme_state_message = NULL; + + dri_bo_unreference(vme_context->i_qp_cost_table); + vme_context->i_qp_cost_table = NULL; + + dri_bo_unreference(vme_context->p_qp_cost_table); + vme_context->p_qp_cost_table = NULL; + + dri_bo_unreference(vme_context->b_qp_cost_table); + vme_context->b_qp_cost_table = NULL; free(vme_context); } -- cgit v1.2.1 From 1943adbd697564de463e2f84291e579135e7a915 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Sun, 1 Jan 2012 02:18:44 +0000 Subject: Encoding: VME shader reads mbmv_cost from cost_table surface instead of constant buffer on Haswell This is to do the prepartion of enhanced features. Signed-off-by: Zhao Yakui Signed-off-by: pjl Signed-off-by: Pengfei Qu (cherry picked from commit 579514daa9456357fd0896332486ea3ca5cb502f) --- src/shaders/vme/inter_bframe_haswell.asm | 24 +++++++++++++++++++++++- src/shaders/vme/inter_bframe_haswell.g75b | 23 ++++++++++++++--------- src/shaders/vme/inter_frame_haswell.asm | 24 ++++++++++++++++++++++-- src/shaders/vme/inter_frame_haswell.g75b | 11 ++++++++--- src/shaders/vme/intra_frame_haswell.asm | 23 ++++++++++++++++++++++- src/shaders/vme/intra_frame_haswell.g75b | 7 ++++++- src/shaders/vme/vme75.inc | 4 ++++ 7 files changed, 99 insertions(+), 17 deletions(-) diff --git a/src/shaders/vme/inter_bframe_haswell.asm b/src/shaders/vme/inter_bframe_haswell.asm index ff914874..dbc15c52 100644 --- a/src/shaders/vme/inter_bframe_haswell.asm +++ b/src/shaders/vme/inter_bframe_haswell.asm @@ -485,9 +485,31 @@ __mb_hwdep_end: asr (4) mb_ref_win.0<1>:w mb_mvp_ref.0<4,4,1>:w 2:w {align1}; add (4) mb_ref_win.8<1>:w mb_ref_win.0<4,4,1>:w 3:w {align1}; and (4) mb_ref_win.16<1>:uw mb_ref_win.8<4,4,1>:uw 0xFFFC:uw {align1}; + +mov (8) msg_reg0.0<1>:ud 0:ud {align1}; +mov (1) msg_reg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ +mov (1) tmp_reg0.0<1>:UD qp_ub<0,1,0>:ub {align1}; +mul (1) msg_reg0.8<1>:ud tmp_reg0.0<1>:ud 2:ud {align1}; + +send (16) + msg_ind + vme_cost_wb.0<1>:ud + null + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_2, + BIND_IDX_COST, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; + /* m2, get the MV/Mb cost passed from constant buffer when spawning thread by MEDIA_OBJECT */ -mov (8) vme_m2<1>:UD r1.0<8,8,1>:UD {align1}; +mov (8) vme_m2<1>:UD vme_cost_wb.0<8,8,1>:UD {align1}; mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1}; diff --git a/src/shaders/vme/inter_bframe_haswell.g75b b/src/shaders/vme/inter_bframe_haswell.g75b index cabef201..113eac04 100644 --- a/src/shaders/vme/inter_bframe_haswell.g75b +++ b/src/shaders/vme/inter_bframe_haswell.g75b @@ -64,7 +64,7 @@ { 0x00000001, 0x2fa40021, 0x00000b80, 0x00000000 }, { 0x00000001, 0x2fa80061, 0x00000000, 0x00000001 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000013e0 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00001430 }, { 0x00000001, 0x2aa00129, 0x00000fe4, 0x00000000 }, { 0x01000010, 0x20002d28, 0x00000aa0, 0x00000000 }, { 0x00010001, 0x2af001e9, 0x00000000, 0x00010001 }, @@ -100,7 +100,7 @@ { 0x00000001, 0x2fa40021, 0x00000b80, 0x00000000 }, { 0x00000001, 0x2fa80061, 0x00000000, 0x00000002 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000011a0 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x000011f0 }, { 0x00000001, 0x2aa00129, 0x00000fe4, 0x00000000 }, { 0x01000010, 0x20002d28, 0x00000aa0, 0x00000000 }, { 0x00010001, 0x2b1001e9, 0x00000000, 0x00010001 }, @@ -135,7 +135,7 @@ { 0x00000001, 0x2fa40021, 0x00000b80, 0x00000000 }, { 0x00000001, 0x2fa80061, 0x00000000, 0x00000002 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x00000f70 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000fc0 }, { 0x00000001, 0x2aa00129, 0x00000fe4, 0x00000000 }, { 0x01000010, 0x20002d28, 0x00000aa0, 0x00000000 }, { 0x00010001, 0x2b3001e9, 0x00000000, 0x00010001 }, @@ -170,7 +170,7 @@ { 0x00000001, 0x2fa40021, 0x00000b80, 0x00000000 }, { 0x00000001, 0x2fa80061, 0x00000000, 0x00000003 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x00000d40 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000d90 }, { 0x00000001, 0x2aa00129, 0x00000fe4, 0x00000000 }, { 0x00000001, 0x2b3201ed, 0x00000000, 0x00010001 }, { 0x01000010, 0x20002d28, 0x00000aa0, 0x00000000 }, @@ -211,13 +211,13 @@ { 0x00000001, 0x2fa401ad, 0x00000b04, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b24, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000009b0 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000a00 }, { 0x00000001, 0x2ac001ad, 0x00000fe4, 0x00000000 }, { 0x00000001, 0x2fa001ad, 0x00000ae6, 0x00000000 }, { 0x00000001, 0x2fa401ad, 0x00000b06, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b26, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x00000950 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x000009a0 }, { 0x00000001, 0x2ac201ad, 0x00000fe4, 0x00000000 }, { 0x00600001, 0x24000061, 0x00000000, 0x00000000 }, { 0x01000010, 0x20003da4, 0x00200af6, 0x00000000 }, @@ -236,18 +236,23 @@ { 0x00000001, 0x2fa401ad, 0x00000b08, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b28, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x00000820 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000870 }, { 0x00000001, 0x2ac401ad, 0x00000fe4, 0x00000000 }, { 0x00000001, 0x2fa001ad, 0x00000aea, 0x00000000 }, { 0x00000001, 0x2fa401ad, 0x00000b0a, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b2a, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000007c0 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000810 }, { 0x00000001, 0x2ac601ad, 0x00000fe4, 0x00000000 }, { 0x0040000c, 0x2a803dad, 0x00690ac0, 0x00020002 }, { 0x00400040, 0x2a883dad, 0x00690a80, 0x00030003 }, { 0x00400005, 0x2a902d29, 0x00690a88, 0xfffcfffc }, - { 0x00600001, 0x25600021, 0x008d0020, 0x00000000 }, + { 0x00600001, 0x28000061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x28140231, 0x00000014, 0x00000000 }, + { 0x00000001, 0x24000221, 0x000000a8, 0x00000000 }, + { 0x00000041, 0x28080c21, 0x00200400, 0x00000002 }, + { 0x0a800031, 0x25801ca1, 0x00000800, 0x02180208 }, + { 0x00600001, 0x25600021, 0x008d0580, 0x00000000 }, { 0x00600001, 0x28400021, 0x008d0560, 0x00000000 }, { 0x00600001, 0x28600061, 0x00000000, 0x00000000 }, { 0x00000001, 0x23800061, 0x00000000, 0x00000000 }, diff --git a/src/shaders/vme/inter_frame_haswell.asm b/src/shaders/vme/inter_frame_haswell.asm index 8a829f62..7f08d3ef 100644 --- a/src/shaders/vme/inter_frame_haswell.asm +++ b/src/shaders/vme/inter_frame_haswell.asm @@ -387,10 +387,30 @@ __mb_hwdep_end: asr (2) mb_ref_win.0<1>:w mb_mvp_ref.0<2,2,1>:w 2:w {align1}; add (2) mb_ref_win.8<1>:w mb_ref_win.0<2,2,1>:w 3:w {align1}; and (2) mb_ref_win.16<1>:uw mb_ref_win.8<2,2,1>:uw 0xFFFC:uw {align1}; - + +mov (8) msg_reg0.0<1>:ud 0:ud {align1}; +mov (1) msg_reg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ +mov (1) tmp_reg0.0<1>:UD qp_ub<0,1,0>:ub {align1}; +mul (1) msg_reg0.8<1>:ud tmp_reg0.0<1>:ud 2:ud {align1}; + +send (16) + msg_ind + vme_cost_wb.0<1>:ud + null + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_2, + BIND_IDX_COST, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; /* m2, get the MV/Mb cost passed from constant buffer when spawning thread by MEDIA_OBJECT */ -mov (8) vme_m2<1>:UD r1.0<8,8,1>:UD {align1}; +mov (8) vme_m2<1>:UD vme_cost_wb.0<8,8,1>:UD {align1}; mov (8) vme_msg_2<1>:UD vme_m2.0<8,8,1>:UD {align1}; diff --git a/src/shaders/vme/inter_frame_haswell.g75b b/src/shaders/vme/inter_frame_haswell.g75b index 1a60c511..5aa520e1 100644 --- a/src/shaders/vme/inter_frame_haswell.g75b +++ b/src/shaders/vme/inter_frame_haswell.g75b @@ -145,18 +145,23 @@ { 0x00000001, 0x2fa401ad, 0x00000b04, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b24, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x00000930 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000980 }, { 0x00000001, 0x2ac001ad, 0x00000fe4, 0x00000000 }, { 0x00000001, 0x2fa001ad, 0x00000ae6, 0x00000000 }, { 0x00000001, 0x2fa401ad, 0x00000b06, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b26, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000008d0 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000920 }, { 0x00000001, 0x2ac201ad, 0x00000fe4, 0x00000000 }, { 0x0020000c, 0x2a803dad, 0x00450ac0, 0x00020002 }, { 0x00200040, 0x2a883dad, 0x00450a80, 0x00030003 }, { 0x00200005, 0x2a902d29, 0x00450a88, 0xfffcfffc }, - { 0x00600001, 0x25600021, 0x008d0020, 0x00000000 }, + { 0x00600001, 0x28000061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x28140231, 0x00000014, 0x00000000 }, + { 0x00000001, 0x24000221, 0x000000a8, 0x00000000 }, + { 0x00000041, 0x28080c21, 0x00200400, 0x00000002 }, + { 0x0a800031, 0x25801ca1, 0x00000800, 0x02180208 }, + { 0x00600001, 0x25600021, 0x008d0580, 0x00000000 }, { 0x00600001, 0x28400021, 0x008d0560, 0x00000000 }, { 0x00600001, 0x28600061, 0x00000000, 0x00000000 }, { 0x00000001, 0x23800061, 0x00000000, 0x00000000 }, diff --git a/src/shaders/vme/intra_frame_haswell.asm b/src/shaders/vme/intra_frame_haswell.asm index c4cb76cf..ac4a6ba2 100644 --- a/src/shaders/vme/intra_frame_haswell.asm +++ b/src/shaders/vme/intra_frame_haswell.asm @@ -89,9 +89,30 @@ mov (1) read1_header.8<1>:UD BLOCK_8X4 {align1}; mov (8) msg_reg0.0<1>:UD read1_header.0<8,8,1>:UD {align1}; send (8) msg_ind CHROMA_COL<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1}; +mov (8) msg_reg0.0<1>:ud 0:ud {align1}; +mov (1) msg_reg0.20<1>:UB thread_id_ub {align1}; /* dispatch id */ +mov (1) tmp_reg0.0<1>:UD qp_ub<0,1,0>:ub {align1}; +mul (1) msg_reg0.8<1>:ud tmp_reg0.0<1>:ud 2:ud {align1}; + +send (16) + msg_ind + vme_cost_wb.0<1>:ud + null + data_port( + OBR_CACHE_TYPE, + OBR_MESSAGE_TYPE, + OBR_CONTROL_2, + BIND_IDX_COST, + OBR_WRITE_COMMIT_CATEGORY, + OBR_HEADER_PRESENT + ) + mlen 1 + rlen 1 + {align1}; + /* m2, get the MV/Mb cost passed by constant buffer when creating EU thread by MEDIA_OBJECT */ -mov (8) vme_msg_2<1>:UD r1.0<8,8,1>:UD {align1}; +mov (8) vme_msg_2<1>:UD vme_cost_wb<8,8,1>:UD {align1}; /* m3 */ mov (8) vme_msg_3<1>:UD 0x0:UD {align1}; diff --git a/src/shaders/vme/intra_frame_haswell.g75b b/src/shaders/vme/intra_frame_haswell.g75b index 4bb8ad92..88866d34 100644 --- a/src/shaders/vme/intra_frame_haswell.g75b +++ b/src/shaders/vme/intra_frame_haswell.g75b @@ -33,7 +33,12 @@ { 0x00000001, 0x242800e1, 0x00000000, 0x00070003 }, { 0x00600001, 0x28000021, 0x008d0420, 0x00000000 }, { 0x04600031, 0x26201cb1, 0x00000800, 0x02190006 }, - { 0x00600001, 0x28400021, 0x008d0020, 0x00000000 }, + { 0x00600001, 0x28000061, 0x00000000, 0x00000000 }, + { 0x00000001, 0x28140231, 0x00000014, 0x00000000 }, + { 0x00000001, 0x24000221, 0x000000a8, 0x00000000 }, + { 0x00000041, 0x28080c21, 0x00200400, 0x00000002 }, + { 0x0a800031, 0x25801ca1, 0x00000800, 0x02180208 }, + { 0x00600001, 0x28400021, 0x008d0580, 0x00000000 }, { 0x00600001, 0x28600061, 0x00000000, 0x00000000 }, { 0x00000001, 0x23800061, 0x00000000, 0x00000000 }, { 0x00000005, 0x23840c21, 0x00000384, 0xff000000 }, diff --git a/src/shaders/vme/vme75.inc b/src/shaders/vme/vme75.inc index 75c461eb..fa7aa7e5 100644 --- a/src/shaders/vme/vme75.inc +++ b/src/shaders/vme/vme75.inc @@ -56,6 +56,7 @@ define(`BIND_IDX_VME_REF0', `1') define(`BIND_IDX_VME_REF1', `2') define(`BIND_IDX_OUTPUT', `3') define(`BIND_IDX_INEP', `4') +define(`BIND_IDX_COST', `8') define(`SUB_PEL_MODE_INTEGER', `0x00000000') define(`SUB_PEL_MODE_HALF', `0x00001000') @@ -153,6 +154,8 @@ define(`input_mb_intra_ub', `inline_reg0.5') define(`num_macroblocks', `inline_reg0.6') define(`quality_level_ub', `inline_reg0.7') +define(`qp_ub', `inline_reg0.8') + /* * GRF 6~11 -- reserved */ @@ -235,6 +238,7 @@ define(`tmp_uw6', `tmp_rega.12') define(`tmp_uw7', `tmp_rega.14') define(`vme_m2', `r43') +define(`vme_cost_wb', `r44') /* * MRF registers */ -- cgit v1.2.1 From ff1ebe3e92734222610c96ec3ba30278ed2bafa0 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Sun, 1 Jan 2012 02:18:48 +0000 Subject: encoding: Send VME instruction uses one register as the desc parameter The desc parameter of current VME send instruction is immediate. And it can't be updated based on the input parameter. Signed-off-by: Zhao Yakui Signed-off-by: pjl Signed-off-by: Pengfei Qu (cherry picked from commit 3013a910d05ea36654731b18a361c7385a5de3ab) --- src/shaders/vme/inter_bframe_ivb.asm | 17 +++-------------- src/shaders/vme/inter_bframe_ivb.g7b | 19 ++++++++++--------- src/shaders/vme/inter_frame_ivb.asm | 16 +++------------- src/shaders/vme/inter_frame_ivb.g7b | 7 ++++--- src/shaders/vme/intra_frame_ivb.asm | 17 +++-------------- src/shaders/vme/intra_frame_ivb.g7b | 3 ++- 6 files changed, 25 insertions(+), 54 deletions(-) diff --git a/src/shaders/vme/inter_bframe_ivb.asm b/src/shaders/vme/inter_bframe_ivb.asm index e7be377f..16e31785 100644 --- a/src/shaders/vme/inter_bframe_ivb.asm +++ b/src/shaders/vme/inter_bframe_ivb.asm @@ -542,20 +542,9 @@ mov (1) vme_m1.16<1>:ud mb_mvp_ref.0<0,1,0>:ud {align1}; mov (1) vme_m1.20<1>:ud mb_mvp_ref.4<0,1,0>:ud {align1}; mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; - -send (8) - vme_msg_ind - vme_wb - null - vme( - BIND_IDX_VME, - 0, - 0, - VME_MESSAGE_TYPE_MIXED - ) - mlen vme_msg_length - rlen vme_inter_wb_length - {align1}; +/* Use one register as the descriptor of send instruction */ +mov (1) a0.0<1>:ud 0x0a686000:ud {align1}; +send (1) vme_wb.0<1>:ud vme_msg_0 0x08 a0.0<0,1,0>:ud {align1}; and.z.f0.0 (1) null<1>:ud vme_wb0.0<0,1,0>:ud INTRAMBFLAG_MASK:ud {align1} ; diff --git a/src/shaders/vme/inter_bframe_ivb.g7b b/src/shaders/vme/inter_bframe_ivb.g7b index adcb3907..7f24b632 100644 --- a/src/shaders/vme/inter_bframe_ivb.g7b +++ b/src/shaders/vme/inter_bframe_ivb.g7b @@ -53,7 +53,7 @@ { 0x00000001, 0x2fa40021, 0x00000b80, 0x00000000 }, { 0x00000001, 0x2fa80061, 0x00000000, 0x00000001 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x00000248 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x0000024a }, { 0x00000001, 0x2aa00129, 0x00000fe4, 0x00000000 }, { 0x01000010, 0x20002d28, 0x00000aa0, 0x00000000 }, { 0x00010001, 0x2af001e9, 0x00000000, 0x00010001 }, @@ -91,7 +91,7 @@ { 0x00000001, 0x2fa40021, 0x00000b80, 0x00000000 }, { 0x00000001, 0x2fa80061, 0x00000000, 0x00000002 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000001fc }, + { 0x00000020, 0x34001c00, 0x00001400, 0x000001fe }, { 0x00000001, 0x2aa00129, 0x00000fe4, 0x00000000 }, { 0x01000010, 0x20002d28, 0x00000aa0, 0x00000000 }, { 0x00010001, 0x2b1001e9, 0x00000000, 0x00010001 }, @@ -127,7 +127,7 @@ { 0x00000001, 0x2fa40021, 0x00000b80, 0x00000000 }, { 0x00000001, 0x2fa80061, 0x00000000, 0x00000002 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000001b4 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x000001b6 }, { 0x00000001, 0x2aa00129, 0x00000fe4, 0x00000000 }, { 0x01000010, 0x20002d28, 0x00000aa0, 0x00000000 }, { 0x00010001, 0x2b3001e9, 0x00000000, 0x00010001 }, @@ -164,7 +164,7 @@ { 0x00000001, 0x2fa40021, 0x00000b80, 0x00000000 }, { 0x00000001, 0x2fa80061, 0x00000000, 0x00000003 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x0000016a }, + { 0x00000020, 0x34001c00, 0x00001400, 0x0000016c }, { 0x00000001, 0x2aa00129, 0x00000fe4, 0x00000000 }, { 0x00000001, 0x2b3201ed, 0x00000000, 0x00010001 }, { 0x01000010, 0x20002d28, 0x00000aa0, 0x00000000 }, @@ -205,13 +205,13 @@ { 0x00000001, 0x2fa401ad, 0x00000b04, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b24, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000000f8 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x000000fa }, { 0x00000001, 0x2ac001ad, 0x00000fe4, 0x00000000 }, { 0x00000001, 0x2fa001ad, 0x00000ae6, 0x00000000 }, { 0x00000001, 0x2fa401ad, 0x00000b06, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b26, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000000ec }, + { 0x00000020, 0x34001c00, 0x00001400, 0x000000ee }, { 0x00000001, 0x2ac201ad, 0x00000fe4, 0x00000000 }, { 0x00600001, 0x24000061, 0x00000000, 0x00000000 }, { 0x01000010, 0x20003da4, 0x00200af6, 0x00000000 }, @@ -230,13 +230,13 @@ { 0x00000001, 0x2fa401ad, 0x00000b08, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b28, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000000c6 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x000000c8 }, { 0x00000001, 0x2ac401ad, 0x00000fe4, 0x00000000 }, { 0x00000001, 0x2fa001ad, 0x00000aea, 0x00000000 }, { 0x00000001, 0x2fa401ad, 0x00000b0a, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b2a, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000000ba }, + { 0x00000020, 0x34001c00, 0x00001400, 0x000000bc }, { 0x00000001, 0x2ac601ad, 0x00000fe4, 0x00000000 }, { 0x0040000c, 0x2a803dad, 0x00690ac0, 0x00020002 }, { 0x00400040, 0x2a883dad, 0x00690a80, 0x00030003 }, @@ -276,7 +276,8 @@ { 0x00000001, 0x24700021, 0x00000ac0, 0x00000000 }, { 0x00000001, 0x24740021, 0x00000ac4, 0x00000000 }, { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 }, - { 0x08600031, 0x21801cbd, 0x00000800, 0x0a686000 }, + { 0x00000001, 0x22000060, 0x00000000, 0x0a686000 }, + { 0x08000031, 0x21800221, 0x00000800, 0x00000200 }, { 0x01000005, 0x20000c20, 0x00000180, 0x00002000 }, { 0x00110020, 0x34001c00, 0x00001400, 0x0000002c }, { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 }, diff --git a/src/shaders/vme/inter_frame_ivb.asm b/src/shaders/vme/inter_frame_ivb.asm index 501257a3..060dae46 100644 --- a/src/shaders/vme/inter_frame_ivb.asm +++ b/src/shaders/vme/inter_frame_ivb.asm @@ -458,19 +458,9 @@ mov (1) vme_m1.20<1>:ud mb_mvp_ref.0<0,1,0>:ud {align1}; mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; -send (8) - vme_msg_ind - vme_wb - null - vme( - BIND_IDX_VME, - 0, - 0, - VME_MESSAGE_TYPE_MIXED - ) - mlen vme_msg_length - rlen vme_inter_wb_length - {align1}; +/* Use one register as the descriptor of send instruction */ +mov (1) a0.0<1>:ud 0x0a686000:ud {align1}; +send (1) vme_wb.0<1>:ud vme_msg_0 0x08 a0.0<0,1,0>:ud {align1}; and.z.f0.0 (1) null<1>:ud vme_wb0.0<0,1,0>:ud INTRAMBFLAG_MASK:ud {align1} ; diff --git a/src/shaders/vme/inter_frame_ivb.g7b b/src/shaders/vme/inter_frame_ivb.g7b index 7ed38c5e..df9572f2 100644 --- a/src/shaders/vme/inter_frame_ivb.g7b +++ b/src/shaders/vme/inter_frame_ivb.g7b @@ -141,13 +141,13 @@ { 0x00000001, 0x2fa401ad, 0x00000b04, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b24, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000000d0 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x000000d2 }, { 0x00000001, 0x2ac001ad, 0x00000fe4, 0x00000000 }, { 0x00000001, 0x2fa001ad, 0x00000ae6, 0x00000000 }, { 0x00000001, 0x2fa401ad, 0x00000b06, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b26, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000000c4 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x000000c6 }, { 0x00000001, 0x2ac201ad, 0x00000fe4, 0x00000000 }, { 0x0020000c, 0x2a803dad, 0x00450ac0, 0x00020002 }, { 0x00200040, 0x2a883dad, 0x00450a80, 0x00030003 }, @@ -193,7 +193,8 @@ { 0x00000001, 0x24700021, 0x00000ac0, 0x00000000 }, { 0x00000001, 0x24740021, 0x00000ac0, 0x00000000 }, { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 }, - { 0x08600031, 0x21801cbd, 0x00000800, 0x0a686000 }, + { 0x00000001, 0x22000060, 0x00000000, 0x0a686000 }, + { 0x08000031, 0x21800221, 0x00000800, 0x00000200 }, { 0x01000005, 0x20000c20, 0x00000180, 0x00002000 }, { 0x00110020, 0x34001c00, 0x00001400, 0x0000002c }, { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 }, diff --git a/src/shaders/vme/intra_frame_ivb.asm b/src/shaders/vme/intra_frame_ivb.asm index 6a009ccd..718af864 100644 --- a/src/shaders/vme/intra_frame_ivb.asm +++ b/src/shaders/vme/intra_frame_ivb.asm @@ -104,20 +104,9 @@ mov (8) vme_msg_4<1>:UD 0x0 {align1}; mov (16) vme_msg_4.0<1>:UB INEP_COL0.3<32,8,4>:UB {align1}; mov (1) vme_msg_4.16<1>:UD INTRA_PREDICTORE_MODE {align1}; -send (8) - vme_msg_ind - vme_wb - null - vme( - BIND_IDX_VME, - 0, - 0, - VME_MESSAGE_TYPE_INTRA - ) - mlen vme_msg_length - rlen vme_intra_wb_length - {align1}; - +/* Use one register as the descriptor of send instruction */ +mov (1) a0.0<1>:ud 0x0a184000:ud {align1}; +send (1) vme_wb.0<1>:ud vme_msg_0 0x08 a0.0<0,1,0>:ud {align1}; /* * Oword Block Write message diff --git a/src/shaders/vme/intra_frame_ivb.g7b b/src/shaders/vme/intra_frame_ivb.g7b index 748cfdf8..7dd16fc9 100644 --- a/src/shaders/vme/intra_frame_ivb.g7b +++ b/src/shaders/vme/intra_frame_ivb.g7b @@ -35,7 +35,8 @@ { 0x00600001, 0x288000e1, 0x00000000, 0x00000000 }, { 0x00800001, 0x28800231, 0x00cf03a3, 0x00000000 }, { 0x00000001, 0x28900061, 0x00000000, 0x11111111 }, - { 0x08600031, 0x21801cbd, 0x00000800, 0x0a184000 }, + { 0x00000001, 0x22000060, 0x00000000, 0x0a184000 }, + { 0x08000031, 0x21800221, 0x00000800, 0x00000200 }, { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 }, { 0x00000001, 0x28200021, 0x00000180, 0x00000000 }, { 0x00000001, 0x28240021, 0x00000190, 0x00000000 }, -- cgit v1.2.1 From 7e9ad8e3d7ab0d0e7cb4eca03ff5d96a72f8e35e Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Sun, 1 Jan 2012 02:18:52 +0000 Subject: Encoding: Pass the qp parameter into VME shader and VME shader select the different cost table based on input Qp on Ivy v1: add assert after bo map In order to suppor that macroblocks have the different QP to do the motion prediction, different cost tables are provided so that the VME engine can select the different mode/motion-vector cost tables based on the input Qp. Signed-off-by: Zhao Yakui Signed-off-by: pjl Signed-off-by: Pengfei Qu (cherry picked from commit ae08e61f0746c5ed34d740ad2ed3731e43b5b456) --- src/gen7_vme.c | 155 +++++++++++++++++++++++++---------- src/shaders/vme/inter_bframe_ivb.asm | 13 ++- src/shaders/vme/inter_bframe_ivb.g7b | 24 ++++-- src/shaders/vme/inter_frame_ivb.asm | 12 ++- src/shaders/vme/inter_frame_ivb.g7b | 12 ++- src/shaders/vme/intra_frame_ivb.asm | 13 ++- src/shaders/vme/intra_frame_ivb.g7b | 8 +- src/shaders/vme/vme7.inc | 1 + 8 files changed, 179 insertions(+), 59 deletions(-) diff --git a/src/gen7_vme.c b/src/gen7_vme.c index fb6358f3..d9898a7b 100644 --- a/src/gen7_vme.c +++ b/src/gen7_vme.c @@ -374,58 +374,117 @@ static VAStatus gen7_vme_avc_state_setup(VADriverContextP ctx, int i; VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; unsigned int is_low_quality = (encoder_context->quality_level == ENCODER_LOW_QUALITY); + dri_bo *cost_bo; + int slice_type; + uint8_t *cost_ptr; + int qp; + + slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); + + if (slice_type == SLICE_TYPE_I) { + cost_bo = vme_context->i_qp_cost_table; + } else if (slice_type == SLICE_TYPE_P) { + cost_bo = vme_context->p_qp_cost_table; + } else { + cost_bo = vme_context->b_qp_cost_table; + } mb_cost_table = (unsigned int *)vme_context->vme_state_message; - //building VME state message dri_bo_map(vme_context->vme_state.bo, 1); + dri_bo_map(cost_bo, 0); assert(vme_context->vme_state.bo->virtual); + assert(cost_bo->virtual); vme_state_message = (unsigned int *)vme_context->vme_state.bo->virtual; - if (((slice_param->slice_type == SLICE_TYPE_P) || - (slice_param->slice_type == SLICE_TYPE_SP)) && - !is_low_quality) { - vme_state_message[0] = 0x01010101; - vme_state_message[1] = 0x10010101; - vme_state_message[2] = 0x0F0F0F0F; - vme_state_message[3] = 0x100F0F0F; - vme_state_message[4] = 0x01010101; - vme_state_message[5] = 0x10010101; - vme_state_message[6] = 0x0F0F0F0F; - vme_state_message[7] = 0x100F0F0F; - vme_state_message[8] = 0x01010101; - vme_state_message[9] = 0x10010101; - vme_state_message[10] = 0x0F0F0F0F; - vme_state_message[11] = 0x000F0F0F; - vme_state_message[12] = 0x00; - vme_state_message[13] = 0x00; - } else { - vme_state_message[0] = 0x10010101; - vme_state_message[1] = 0x100F0F0F; - vme_state_message[2] = 0x10010101; - vme_state_message[3] = 0x000F0F0F; - vme_state_message[4] = 0; - vme_state_message[5] = 0; - vme_state_message[6] = 0; - vme_state_message[7] = 0; - vme_state_message[8] = 0; - vme_state_message[9] = 0; - vme_state_message[10] = 0; - vme_state_message[11] = 0; - vme_state_message[12] = 0; - vme_state_message[13] = 0; - } + cost_ptr = (uint8_t *)cost_bo->virtual; + + /* up to 8 VME_SEARCH_PATH_LUT is supported */ + /* Two subsequent qp will share the same mode/motion-vector cost table */ + /* the range is from 0-51 */ + for (i = 0; i < 8; i++) { + + vme_state_message = (unsigned int *)vme_context->vme_state.bo->virtual + + i * 32; + if ((slice_type == SLICE_TYPE_P) && !is_low_quality) { + vme_state_message[0] = 0x01010101; + vme_state_message[1] = 0x10010101; + vme_state_message[2] = 0x0F0F0F0F; + vme_state_message[3] = 0x100F0F0F; + vme_state_message[4] = 0x01010101; + vme_state_message[5] = 0x10010101; + vme_state_message[6] = 0x0F0F0F0F; + vme_state_message[7] = 0x100F0F0F; + vme_state_message[8] = 0x01010101; + vme_state_message[9] = 0x10010101; + vme_state_message[10] = 0x0F0F0F0F; + vme_state_message[11] = 0x000F0F0F; + vme_state_message[12] = 0x00; + vme_state_message[13] = 0x00; + } else { + vme_state_message[0] = 0x10010101; + vme_state_message[1] = 0x100F0F0F; + vme_state_message[2] = 0x10010101; + vme_state_message[3] = 0x000F0F0F; + vme_state_message[4] = 0; + vme_state_message[5] = 0; + vme_state_message[6] = 0; + vme_state_message[7] = 0; + vme_state_message[8] = 0; + vme_state_message[9] = 0; + vme_state_message[10] = 0; + vme_state_message[11] = 0; + vme_state_message[12] = 0; + vme_state_message[13] = 0; + } - vme_state_message[14] = (mb_cost_table[2] & 0xFFFF); - vme_state_message[15] = 0; - vme_state_message[16] = mb_cost_table[0]; - vme_state_message[17] = mb_cost_table[1]; - vme_state_message[18] = mb_cost_table[3]; - vme_state_message[19] = mb_cost_table[4]; + qp = 8 * i; - for(i = 20; i < 32; i++) { - vme_state_message[i] = 0; + /* when qp is greater than 51, use the cost_table of qp=51 to fulfill */ + if (qp > 51) { + qp = 51; + } + /* Setup the four LUT sets for MbMV cost */ + mb_cost_table = (unsigned int *)(cost_ptr + qp * 32); + vme_state_message[14] = (mb_cost_table[2] & 0xFFFF); + vme_state_message[16] = mb_cost_table[0]; + vme_state_message[17] = mb_cost_table[1]; + vme_state_message[18] = mb_cost_table[3]; + vme_state_message[19] = mb_cost_table[4]; + + qp += 2; + if (qp > 51) { + qp = 51; + } + mb_cost_table = (unsigned int *)(cost_ptr + qp * 32); + vme_state_message[14] |= ((mb_cost_table[2] & 0xFFFF) << 16); + vme_state_message[20] = mb_cost_table[0]; + vme_state_message[21] = mb_cost_table[1]; + vme_state_message[22] = mb_cost_table[3]; + vme_state_message[23] = mb_cost_table[4]; + + qp += 2; + if (qp > 51) { + qp = 51; + } + vme_state_message[15] = (mb_cost_table[2] & 0xFFFF); + vme_state_message[24] = mb_cost_table[0]; + vme_state_message[25] = mb_cost_table[1]; + vme_state_message[26] = mb_cost_table[3]; + vme_state_message[27] = mb_cost_table[4]; + + qp += 2; + if (qp > 51) { + qp = 51; + } + mb_cost_table = (unsigned int *)(cost_ptr + qp * 32); + vme_state_message[15] |= ((mb_cost_table[2] & 0xFFFF) << 16); + vme_state_message[28] = mb_cost_table[0]; + vme_state_message[29] = mb_cost_table[1]; + vme_state_message[30] = mb_cost_table[3]; + vme_state_message[31] = mb_cost_table[4]; } + dri_bo_unmap(cost_bo); dri_bo_unmap( vme_context->vme_state.bo); return VA_STATUS_SUCCESS; } @@ -490,7 +549,16 @@ gen7_vme_fill_vme_batchbuffer(VADriverContextP ctx, int mb_x = 0, mb_y = 0; int i, s, j; unsigned int *command_ptr; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; + VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; + int qp; + int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); + if (encoder_context->rate_control_mode == VA_RC_CQP) + qp = pic_param->pic_init_qp + slice_param->slice_qp_delta; + else + qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; dri_bo_map(vme_context->vme_batchbuffer.bo, 1); command_ptr = vme_context->vme_batchbuffer.bo->virtual; @@ -540,7 +608,7 @@ gen7_vme_fill_vme_batchbuffer(VADriverContextP ctx, mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D); } - *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); + *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2)); *command_ptr++ = kernel; *command_ptr++ = 0; *command_ptr++ = 0; @@ -551,6 +619,7 @@ gen7_vme_fill_vme_batchbuffer(VADriverContextP ctx, *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x); *command_ptr++ = ((encoder_context->quality_level << 24) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); + *command_ptr++ = qp; i += 1; } diff --git a/src/shaders/vme/inter_bframe_ivb.asm b/src/shaders/vme/inter_bframe_ivb.asm index 16e31785..8277ae55 100644 --- a/src/shaders/vme/inter_bframe_ivb.asm +++ b/src/shaders/vme/inter_bframe_ivb.asm @@ -542,8 +542,19 @@ mov (1) vme_m1.16<1>:ud mb_mvp_ref.0<0,1,0>:ud {align1}; mov (1) vme_m1.20<1>:ud mb_mvp_ref.4<0,1,0>:ud {align1}; mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; +mov (1) tmp_reg0.0<1>:ud qp_ub<0,1,0>:ub {align1}; +/* lut_subindex */ +and (1) tmp_reg1.0<1>:ud tmp_reg0.0<0,1,0>:ud 0x06:ud {align1}; +shl (1) tmp_reg0.4<1>:ud tmp_reg1.0<0,1,0>:ud 10:ud {align1}; + +/* lut_index */ +and (1) tmp_reg1.0<1>:ud tmp_reg0.0<0,1,0>:ud 0x038:ud {align1}; +shl (1) tmp_reg1.4<1>:ud tmp_reg1.0<0,1,0>:ud 5:ud {align1}; + +add (1) tmp_reg0.0<1>:ud tmp_reg0.4<0,1,0>:ud tmp_reg1.4<0,1,0>:ud {align1}; /* Use one register as the descriptor of send instruction */ -mov (1) a0.0<1>:ud 0x0a686000:ud {align1}; + +add (1) a0.0<1>:ud tmp_reg0.0<0,1,0>:ud 0x0a686000:ud {align1}; send (1) vme_wb.0<1>:ud vme_msg_0 0x08 a0.0<0,1,0>:ud {align1}; and.z.f0.0 (1) null<1>:ud vme_wb0.0<0,1,0>:ud INTRAMBFLAG_MASK:ud {align1} ; diff --git a/src/shaders/vme/inter_bframe_ivb.g7b b/src/shaders/vme/inter_bframe_ivb.g7b index 7f24b632..79eb292d 100644 --- a/src/shaders/vme/inter_bframe_ivb.g7b +++ b/src/shaders/vme/inter_bframe_ivb.g7b @@ -53,7 +53,7 @@ { 0x00000001, 0x2fa40021, 0x00000b80, 0x00000000 }, { 0x00000001, 0x2fa80061, 0x00000000, 0x00000001 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x0000024a }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000256 }, { 0x00000001, 0x2aa00129, 0x00000fe4, 0x00000000 }, { 0x01000010, 0x20002d28, 0x00000aa0, 0x00000000 }, { 0x00010001, 0x2af001e9, 0x00000000, 0x00010001 }, @@ -91,7 +91,7 @@ { 0x00000001, 0x2fa40021, 0x00000b80, 0x00000000 }, { 0x00000001, 0x2fa80061, 0x00000000, 0x00000002 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000001fe }, + { 0x00000020, 0x34001c00, 0x00001400, 0x0000020a }, { 0x00000001, 0x2aa00129, 0x00000fe4, 0x00000000 }, { 0x01000010, 0x20002d28, 0x00000aa0, 0x00000000 }, { 0x00010001, 0x2b1001e9, 0x00000000, 0x00010001 }, @@ -127,7 +127,7 @@ { 0x00000001, 0x2fa40021, 0x00000b80, 0x00000000 }, { 0x00000001, 0x2fa80061, 0x00000000, 0x00000002 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000001b6 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x000001c2 }, { 0x00000001, 0x2aa00129, 0x00000fe4, 0x00000000 }, { 0x01000010, 0x20002d28, 0x00000aa0, 0x00000000 }, { 0x00010001, 0x2b3001e9, 0x00000000, 0x00010001 }, @@ -164,7 +164,7 @@ { 0x00000001, 0x2fa40021, 0x00000b80, 0x00000000 }, { 0x00000001, 0x2fa80061, 0x00000000, 0x00000003 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x0000016c }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000178 }, { 0x00000001, 0x2aa00129, 0x00000fe4, 0x00000000 }, { 0x00000001, 0x2b3201ed, 0x00000000, 0x00010001 }, { 0x01000010, 0x20002d28, 0x00000aa0, 0x00000000 }, @@ -205,13 +205,13 @@ { 0x00000001, 0x2fa401ad, 0x00000b04, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b24, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000000fa }, + { 0x00000020, 0x34001c00, 0x00001400, 0x00000106 }, { 0x00000001, 0x2ac001ad, 0x00000fe4, 0x00000000 }, { 0x00000001, 0x2fa001ad, 0x00000ae6, 0x00000000 }, { 0x00000001, 0x2fa401ad, 0x00000b06, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b26, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000000ee }, + { 0x00000020, 0x34001c00, 0x00001400, 0x000000fa }, { 0x00000001, 0x2ac201ad, 0x00000fe4, 0x00000000 }, { 0x00600001, 0x24000061, 0x00000000, 0x00000000 }, { 0x01000010, 0x20003da4, 0x00200af6, 0x00000000 }, @@ -230,13 +230,13 @@ { 0x00000001, 0x2fa401ad, 0x00000b08, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b28, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000000c8 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x000000d4 }, { 0x00000001, 0x2ac401ad, 0x00000fe4, 0x00000000 }, { 0x00000001, 0x2fa001ad, 0x00000aea, 0x00000000 }, { 0x00000001, 0x2fa401ad, 0x00000b0a, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b2a, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000000bc }, + { 0x00000020, 0x34001c00, 0x00001400, 0x000000c8 }, { 0x00000001, 0x2ac601ad, 0x00000fe4, 0x00000000 }, { 0x0040000c, 0x2a803dad, 0x00690ac0, 0x00020002 }, { 0x00400040, 0x2a883dad, 0x00690a80, 0x00030003 }, @@ -276,7 +276,13 @@ { 0x00000001, 0x24700021, 0x00000ac0, 0x00000000 }, { 0x00000001, 0x24740021, 0x00000ac4, 0x00000000 }, { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 }, - { 0x00000001, 0x22000060, 0x00000000, 0x0a686000 }, + { 0x00000001, 0x24000221, 0x000000a8, 0x00000000 }, + { 0x00000005, 0x24200c21, 0x00000400, 0x00000006 }, + { 0x00000009, 0x24040c21, 0x00000420, 0x0000000a }, + { 0x00000005, 0x24200c21, 0x00000400, 0x00000038 }, + { 0x00000009, 0x24240c21, 0x00000420, 0x00000005 }, + { 0x00000040, 0x24000421, 0x00000404, 0x00000424 }, + { 0x00000040, 0x22000c20, 0x00000400, 0x0a686000 }, { 0x08000031, 0x21800221, 0x00000800, 0x00000200 }, { 0x01000005, 0x20000c20, 0x00000180, 0x00002000 }, { 0x00110020, 0x34001c00, 0x00001400, 0x0000002c }, diff --git a/src/shaders/vme/inter_frame_ivb.asm b/src/shaders/vme/inter_frame_ivb.asm index 060dae46..42a99694 100644 --- a/src/shaders/vme/inter_frame_ivb.asm +++ b/src/shaders/vme/inter_frame_ivb.asm @@ -457,9 +457,19 @@ mov (1) vme_m1.16<1>:ud mb_mvp_ref.0<0,1,0>:ud {align1}; mov (1) vme_m1.20<1>:ud mb_mvp_ref.0<0,1,0>:ud {align1}; mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1}; +mov (1) tmp_reg0.0<1>:ud qp_ub<0,1,0>:ub {align1}; +/* lut_subindex */ +and (1) tmp_reg1.0<1>:ud tmp_reg0.0<0,1,0>:ud 0x06:ud {align1}; +shl (1) tmp_reg0.4<1>:ud tmp_reg1.0<0,1,0>:ud 10:ud {align1}; +/* lut_index */ +and (1) tmp_reg1.0<1>:ud tmp_reg0.0<0,1,0>:ud 0x038:ud {align1}; +shl (1) tmp_reg1.4<1>:ud tmp_reg1.0<0,1,0>:ud 5:ud {align1}; + +add (1) tmp_reg0.0<1>:ud tmp_reg0.4<0,1,0>:ud tmp_reg1.4<0,1,0>:ud {align1}; /* Use one register as the descriptor of send instruction */ -mov (1) a0.0<1>:ud 0x0a686000:ud {align1}; + +add (1) a0.0<1>:ud tmp_reg0.0<0,1,0>:ud 0x0a686000:ud {align1}; send (1) vme_wb.0<1>:ud vme_msg_0 0x08 a0.0<0,1,0>:ud {align1}; and.z.f0.0 (1) null<1>:ud vme_wb0.0<0,1,0>:ud INTRAMBFLAG_MASK:ud {align1} ; diff --git a/src/shaders/vme/inter_frame_ivb.g7b b/src/shaders/vme/inter_frame_ivb.g7b index df9572f2..9f0dfae0 100644 --- a/src/shaders/vme/inter_frame_ivb.g7b +++ b/src/shaders/vme/inter_frame_ivb.g7b @@ -141,13 +141,13 @@ { 0x00000001, 0x2fa401ad, 0x00000b04, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b24, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000000d2 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x000000de }, { 0x00000001, 0x2ac001ad, 0x00000fe4, 0x00000000 }, { 0x00000001, 0x2fa001ad, 0x00000ae6, 0x00000000 }, { 0x00000001, 0x2fa401ad, 0x00000b06, 0x00000000 }, { 0x00000001, 0x2fa801ad, 0x00000b26, 0x00000000 }, { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 }, - { 0x00000020, 0x34001c00, 0x00001400, 0x000000c6 }, + { 0x00000020, 0x34001c00, 0x00001400, 0x000000d2 }, { 0x00000001, 0x2ac201ad, 0x00000fe4, 0x00000000 }, { 0x0020000c, 0x2a803dad, 0x00450ac0, 0x00020002 }, { 0x00200040, 0x2a883dad, 0x00450a80, 0x00030003 }, @@ -193,7 +193,13 @@ { 0x00000001, 0x24700021, 0x00000ac0, 0x00000000 }, { 0x00000001, 0x24740021, 0x00000ac0, 0x00000000 }, { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 }, - { 0x00000001, 0x22000060, 0x00000000, 0x0a686000 }, + { 0x00000001, 0x24000221, 0x000000a8, 0x00000000 }, + { 0x00000005, 0x24200c21, 0x00000400, 0x00000006 }, + { 0x00000009, 0x24040c21, 0x00000420, 0x0000000a }, + { 0x00000005, 0x24200c21, 0x00000400, 0x00000038 }, + { 0x00000009, 0x24240c21, 0x00000420, 0x00000005 }, + { 0x00000040, 0x24000421, 0x00000404, 0x00000424 }, + { 0x00000040, 0x22000c20, 0x00000400, 0x0a686000 }, { 0x08000031, 0x21800221, 0x00000800, 0x00000200 }, { 0x01000005, 0x20000c20, 0x00000180, 0x00002000 }, { 0x00110020, 0x34001c00, 0x00001400, 0x0000002c }, diff --git a/src/shaders/vme/intra_frame_ivb.asm b/src/shaders/vme/intra_frame_ivb.asm index 718af864..b138e885 100644 --- a/src/shaders/vme/intra_frame_ivb.asm +++ b/src/shaders/vme/intra_frame_ivb.asm @@ -104,8 +104,19 @@ mov (8) vme_msg_4<1>:UD 0x0 {align1}; mov (16) vme_msg_4.0<1>:UB INEP_COL0.3<32,8,4>:UB {align1}; mov (1) vme_msg_4.16<1>:UD INTRA_PREDICTORE_MODE {align1}; +mov (1) tmp_reg0.0<1>:ud qp_ub<0,1,0>:ub {align1}; +/* lut_subindex */ +and (1) tmp_reg1.0<1>:ud tmp_reg0.0<0,1,0>:ud 0x06:ud {align1}; +shl (1) tmp_reg0.4<1>:ud tmp_reg1.0<0,1,0>:ud 10:ud {align1}; + +/* lut_index */ +and (1) tmp_reg1.0<1>:ud tmp_reg0.0<0,1,0>:ud 0x038:ud {align1}; +shl (1) tmp_reg1.4<1>:ud tmp_reg1.0<0,1,0>:ud 5:ud {align1}; + +add (1) tmp_reg0.0<1>:ud tmp_reg0.4<0,1,0>:ud tmp_reg1.4<0,1,0>:ud {align1}; /* Use one register as the descriptor of send instruction */ -mov (1) a0.0<1>:ud 0x0a184000:ud {align1}; + +add (1) a0.0<1>:ud tmp_reg0.0<0,1,0>:ud 0x0a184000:ud {align1}; send (1) vme_wb.0<1>:ud vme_msg_0 0x08 a0.0<0,1,0>:ud {align1}; /* diff --git a/src/shaders/vme/intra_frame_ivb.g7b b/src/shaders/vme/intra_frame_ivb.g7b index 7dd16fc9..d2aa1ebc 100644 --- a/src/shaders/vme/intra_frame_ivb.g7b +++ b/src/shaders/vme/intra_frame_ivb.g7b @@ -35,7 +35,13 @@ { 0x00600001, 0x288000e1, 0x00000000, 0x00000000 }, { 0x00800001, 0x28800231, 0x00cf03a3, 0x00000000 }, { 0x00000001, 0x28900061, 0x00000000, 0x11111111 }, - { 0x00000001, 0x22000060, 0x00000000, 0x0a184000 }, + { 0x00000001, 0x24000221, 0x000000a8, 0x00000000 }, + { 0x00000005, 0x24200c21, 0x00000400, 0x00000006 }, + { 0x00000009, 0x24040c21, 0x00000420, 0x0000000a }, + { 0x00000005, 0x24200c21, 0x00000400, 0x00000038 }, + { 0x00000009, 0x24240c21, 0x00000420, 0x00000005 }, + { 0x00000040, 0x24000421, 0x00000404, 0x00000424 }, + { 0x00000040, 0x22000c20, 0x00000400, 0x0a184000 }, { 0x08000031, 0x21800221, 0x00000800, 0x00000200 }, { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 }, { 0x00000001, 0x28200021, 0x00000180, 0x00000000 }, diff --git a/src/shaders/vme/vme7.inc b/src/shaders/vme/vme7.inc index acff81f7..8c1731c5 100644 --- a/src/shaders/vme/vme7.inc +++ b/src/shaders/vme/vme7.inc @@ -152,6 +152,7 @@ define(`input_mb_intra_ub', `inline_reg0.5') define(`num_macroblocks', `inline_reg0.6') define(`quality_level_ub', `inline_reg0.7') +define(`qp_ub', `inline_reg0.8') /* * GRF 6~11 -- reserved */ -- cgit v1.2.1 From 03424b76bd7f0fb5186d9e2cacd8be790e267ab9 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Sun, 1 Jan 2012 02:18:58 +0000 Subject: Encoding: Dynamically select one mechanism to construct encoding command buffer for each frame on Haswell and Gen7/Gen6 v2:remove the warning when compiling v1: combine the Haswell and Gen7/6 patch together Currently it uses the fixed policy to construct encoding command buffer. (Use CPU or GPU). And it is statically compiled. But sometimes it needs to choose the different mechanism on the fly instead of statically compiled mode. Signed-off-by: Zhao Yakui Signed-off-by: pjl Signed-off-by: Pengfei Qu (cherry picked from commit 89463c30a9ce2740603f36f1000da54b8b5c731b) --- src/gen6_mfc.c | 12 ++++-------- src/gen6_mfc.h | 1 - src/gen75_mfc.c | 16 +++++----------- src/i965_encoder.h | 1 + 4 files changed, 10 insertions(+), 20 deletions(-) diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c index acefc975..0208ddb5 100644 --- a/src/gen6_mfc.c +++ b/src/gen6_mfc.c @@ -676,7 +676,6 @@ gen6_mfc_stop(VADriverContextP ctx, return VA_STATUS_SUCCESS; } -#if __SOFTWARE__ static int gen6_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg, @@ -899,7 +898,6 @@ gen6_mfc_avc_software_batchbuffer(VADriverContextP ctx, return batch_bo; } -#else static void gen6_mfc_batchbuffer_surfaces_input(VADriverContextP ctx, @@ -1299,7 +1297,6 @@ gen6_mfc_avc_hardware_batchbuffer(VADriverContextP ctx, return mfc_context->mfc_batchbuffer_surface.bo; } -#endif static void @@ -1316,11 +1313,10 @@ gen6_mfc_avc_pipeline_programing(VADriverContextP ctx, return; } -#if __SOFTWARE__ - slice_batch_bo = gen6_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context); -#else - slice_batch_bo = gen6_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context); -#endif + if (encoder_context->soft_batch_force) + slice_batch_bo = gen6_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context); + else + slice_batch_bo = gen6_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context); // begin programing intel_batchbuffer_start_atomic_bcs(batch, 0x4000); diff --git a/src/gen6_mfc.h b/src/gen6_mfc.h index 087c6762..fa610d44 100644 --- a/src/gen6_mfc.h +++ b/src/gen6_mfc.h @@ -50,7 +50,6 @@ struct encode_state; /* the space required for slice tail. */ #define SLICE_TAIL 16 -#define __SOFTWARE__ 0 #define MFC_BATCHBUFFER_AVC_INTRA 0 #define MFC_BATCHBUFFER_AVC_INTER 1 diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c index b6b18ea9..635be9d0 100644 --- a/src/gen75_mfc.c +++ b/src/gen75_mfc.c @@ -49,8 +49,6 @@ #define AVC_INTER_MV_OFFSET 48 #define AVC_RDO_MASK 0xFFFF -#define MFC_SOFTWARE_HASWELL 0 - #define SURFACE_STATE_PADDED_SIZE MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7) #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) #define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index) @@ -995,7 +993,6 @@ gen75_mfc_avc_slice_state(VADriverContextP ctx, } -#if MFC_SOFTWARE_HASWELL static int gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, @@ -1252,7 +1249,6 @@ gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx, struct intel_batchbuffer *batch; dri_bo *batch_bo; int i; - int buffer_size; batch = mfc_context->aux_batchbuffer; batch_bo = batch->buffer; @@ -1275,7 +1271,6 @@ gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx, return batch_bo; } -#else static void gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx, @@ -1636,7 +1631,6 @@ gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx, return mfc_context->aux_batchbuffer_surface.bo; } -#endif static void gen75_mfc_avc_pipeline_programing(VADriverContextP ctx, @@ -1652,11 +1646,11 @@ gen75_mfc_avc_pipeline_programing(VADriverContextP ctx, return; } -#if MFC_SOFTWARE_HASWELL - slice_batch_bo = gen75_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context); -#else - slice_batch_bo = gen75_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context); -#endif + + if (encoder_context->soft_batch_force) + slice_batch_bo = gen75_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context); + else + slice_batch_bo = gen75_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context); // begin programing intel_batchbuffer_start_atomic_bcs(batch, 0x4000); diff --git a/src/i965_encoder.h b/src/i965_encoder.h index ddfcf9f1..94d84b5d 100644 --- a/src/i965_encoder.h +++ b/src/i965_encoder.h @@ -65,6 +65,7 @@ struct intel_encoder_context unsigned int is_tmp_id:1; unsigned int low_power_mode:1; + unsigned int soft_batch_force:1; void (*vme_context_destroy)(void *vme_context); VAStatus (*vme_pipeline)(VADriverContextP ctx, -- cgit v1.2.1 From df4b569f76c3ba3bcca097ea2d72630a038cfd59 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Sun, 1 Jan 2012 02:19:05 +0000 Subject: Encoding: Add one ROI flag and ROI buffer v3: free the qp_per_mb for vme context v2: remove unused variable to avoid warning when compiling. v1: Add one flag to indicate whether ROI is supported in one encode context Allocate one ROI buffer to hold qp per mb dynamically Signed-off-by: Zhao Yakui Signed-off-by: pjl Signed-off-by: Pengfei Qu (cherry picked from commit 93247612f9e7ebb95c8ef7b6fdcf5dac4d556bb4) --- src/gen6_mfc_common.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/gen6_vme.c | 3 +++ src/gen6_vme.h | 13 ++++++++++ src/gen75_vme.c | 3 +++ src/gen7_vme.c | 3 +++ src/gen8_vme.c | 3 +++ src/gen9_vme.c | 3 +++ src/i965_encoder.c | 7 ++++-- src/i965_encoder.h | 1 + 9 files changed, 103 insertions(+), 2 deletions(-) diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index 233c2c6a..364401f5 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -1763,6 +1763,75 @@ intel_h264_setup_cost_surface(VADriverContextP ctx, surface_state_offset); } +extern void +intel_h264_enc_roi_config(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + VAEncMiscParameterBuffer* pMiscParamROI; + VAEncMiscParameterBufferROI *pParamROI; + struct gen6_vme_context *vme_context = encoder_context->vme_context; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; + int width_in_mbs = pSequenceParameter->picture_width_in_mbs; + int height_in_mbs = pSequenceParameter->picture_height_in_mbs; + + vme_context->roi_enabled = 0; + /* Restriction: Disable ROI when multi-slice is enabled */ + if (!encoder_context->context_roi || (encode_state->num_slice_params_ext > 1)) + return; + + if (encode_state->misc_param[VAEncMiscParameterTypeROI] == NULL) { + return; + } + + pMiscParamROI = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeROI]->buffer; + pParamROI = (VAEncMiscParameterBufferROI *)pMiscParamROI->data; + + /* check whether number of ROI is correct */ + /* currently one region is supported */ + if (pParamROI->num_roi != 1) { + return; + } + + vme_context->roi_enabled = 1; + + if ((vme_context->saved_width_mbs != width_in_mbs) || + (vme_context->saved_height_mbs != height_in_mbs)) { + free(vme_context->qp_per_mb); + vme_context->qp_per_mb = calloc(1, width_in_mbs * height_in_mbs); + + vme_context->saved_width_mbs = width_in_mbs; + vme_context->saved_height_mbs = height_in_mbs; + assert(vme_context->qp_per_mb); + } + if (encoder_context->rate_control_mode == VA_RC_CBR) { + /* + * TODO: More complex Qp adjust needs to be added. + * Currently it is initialized to slice_qp. + */ + VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; + int qp; + int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); + + qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; + memset(vme_context->qp_per_mb, qp, width_in_mbs * height_in_mbs); + } else if (encoder_context->rate_control_mode == VA_RC_CQP){ + VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; + VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; + int qp; + + qp = pic_param->pic_init_qp + slice_param->slice_qp_delta; + memset(vme_context->qp_per_mb, qp, width_in_mbs * height_in_mbs); + } else { + /* + * TODO: Disable it for non CBR-CQP. + */ + vme_context->roi_enabled = 0; + } + return; +} + /* HEVC */ static int hevc_temporal_find_surface(VAPictureHEVC *curr_pic, diff --git a/src/gen6_vme.c b/src/gen6_vme.c index 07e353ea..8c8667e4 100644 --- a/src/gen6_vme.c +++ b/src/gen6_vme.c @@ -622,6 +622,9 @@ gen6_vme_context_destroy(void *context) dri_bo_unreference(vme_context->vme_batchbuffer.bo); vme_context->vme_batchbuffer.bo = NULL; + free(vme_context->qp_per_mb); + vme_context->qp_per_mb = NULL; + free(vme_context); } diff --git a/src/gen6_vme.h b/src/gen6_vme.h index 50313399..6cf35f9e 100644 --- a/src/gen6_vme.h +++ b/src/gen6_vme.h @@ -98,6 +98,13 @@ struct gen6_vme_context dri_bo *p_qp_cost_table; dri_bo *b_qp_cost_table; int cost_table_size; + + /* one buffer define qp per mb. one byte for every mb. + * If it needs to be accessed by GPU, it will be changed to dri_bo. + */ + bool roi_enabled; + char *qp_per_mb; + int saved_width_mbs, saved_height_mbs; }; #define MPEG2_PIC_WIDTH_HEIGHT 30 @@ -219,4 +226,10 @@ intel_h264_setup_cost_surface(VADriverContextP ctx, struct intel_encoder_context *encoder_context, unsigned long binding_table_offset, unsigned long surface_state_offset); + +extern void +intel_h264_enc_roi_config(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context); + #endif /* _GEN6_VME_H_ */ diff --git a/src/gen75_vme.c b/src/gen75_vme.c index dcf170ec..f4b3ab14 100644 --- a/src/gen75_vme.c +++ b/src/gen75_vme.c @@ -1030,6 +1030,9 @@ gen75_vme_context_destroy(void *context) dri_bo_unreference(vme_context->b_qp_cost_table); vme_context->b_qp_cost_table = NULL; + free(vme_context->qp_per_mb); + vme_context->qp_per_mb = NULL; + free(vme_context); } diff --git a/src/gen7_vme.c b/src/gen7_vme.c index d9898a7b..7927ce15 100644 --- a/src/gen7_vme.c +++ b/src/gen7_vme.c @@ -1104,6 +1104,9 @@ gen7_vme_context_destroy(void *context) dri_bo_unreference(vme_context->b_qp_cost_table); vme_context->b_qp_cost_table = NULL; + free(vme_context->qp_per_mb); + vme_context->qp_per_mb = NULL; + free(vme_context); } diff --git a/src/gen8_vme.c b/src/gen8_vme.c index 998f7d65..559accec 100644 --- a/src/gen8_vme.c +++ b/src/gen8_vme.c @@ -1303,6 +1303,9 @@ gen8_vme_context_destroy(void *context) dri_bo_unreference(vme_context->b_qp_cost_table); vme_context->b_qp_cost_table = NULL; + free(vme_context->qp_per_mb); + vme_context->qp_per_mb = NULL; + free(vme_context); } diff --git a/src/gen9_vme.c b/src/gen9_vme.c index 5f9b796b..fd89c177 100644 --- a/src/gen9_vme.c +++ b/src/gen9_vme.c @@ -1776,6 +1776,9 @@ gen9_vme_context_destroy(void *context) vme_context->vme_state_message = NULL; } + free(vme_context->qp_per_mb); + vme_context->qp_per_mb = NULL; + free(vme_context); } diff --git a/src/i965_encoder.c b/src/i965_encoder.c index c83cc7df..c2001438 100644 --- a/src/i965_encoder.c +++ b/src/i965_encoder.c @@ -882,6 +882,7 @@ intel_enc_hw_context_init(VADriverContextP ctx, break; } + encoder_context->context_roi = 0; for (i = 0; i < obj_config->num_attribs; i++) { if (obj_config->attrib_list[i].type == VAConfigAttribRateControl) { encoder_context->rate_control_mode = obj_config->attrib_list[i].value; @@ -891,8 +892,10 @@ intel_enc_hw_context_init(VADriverContextP ctx, WARN_ONCE("Don't support CBR for MPEG-2 encoding\n"); encoder_context->rate_control_mode &= ~VA_RC_CBR; } - - break; + } + if (obj_config->attrib_list[i].type == VAConfigAttribEncROI) { + if (encoder_context->codec == CODEC_H264) + encoder_context->context_roi = 1; } } diff --git a/src/i965_encoder.h b/src/i965_encoder.h index 94d84b5d..db7e6988 100644 --- a/src/i965_encoder.h +++ b/src/i965_encoder.h @@ -66,6 +66,7 @@ struct intel_encoder_context unsigned int is_tmp_id:1; unsigned int low_power_mode:1; unsigned int soft_batch_force:1; + unsigned int context_roi:1; void (*vme_context_destroy)(void *vme_context); VAStatus (*vme_pipeline)(VADriverContextP ctx, -- cgit v1.2.1 From 3f2bd0d5d7a2b117325236da60c06e0aee0792c4 Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Sun, 1 Jan 2012 02:13:28 +0000 Subject: encoding:use the qp per every macroblock on Ivy and haswell v1: combine the patch together for Ivy and haswell use-CPU-to-construct-the-MFC-pak-command Signed-off-by: Zhao Yakui Signed-off-by: ceciliapeng Signed-off-by: Pengfei Qu (cherry picked from commit 3d12cd730f319c7f0fd23a978721ee2482342a79) --- src/gen6_mfc.c | 15 ++++++++++++--- src/gen6_mfc_common.c | 18 +++++++++++++++--- src/gen75_mfc.c | 16 ++++++++++++---- src/gen75_vme.c | 9 ++++++++- src/gen7_vme.c | 9 ++++++++- 5 files changed, 55 insertions(+), 12 deletions(-) diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c index 0208ddb5..fd4c1202 100644 --- a/src/gen6_mfc.c +++ b/src/gen6_mfc.c @@ -791,6 +791,7 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type); int is_intra = slice_type == SLICE_TYPE_I; int qp_slice; + int qp_mb; qp_slice = qp; if (rate_control_mode == VA_RC_CBR) { @@ -835,15 +836,23 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, x = i % width_in_mbs; y = i / width_in_mbs; + if (vme_context->roi_enabled) { + qp_mb = *(vme_context->qp_per_mb + i); + } else { + qp_mb = qp; + } + if (is_intra) { assert(msg); - gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch); + gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp_mb, msg, encoder_context, 0, 0, slice_batch); msg += INTRA_VME_OUTPUT_IN_DWS; } else { if (msg[0] & INTRA_MB_FLAG_MASK) { - gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch); + gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp_mb, msg, encoder_context, 0, 0, slice_batch); } else { - gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, slice_type, slice_batch); + gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp_mb, + msg, offset, encoder_context, + 0, 0, slice_type, slice_batch); } msg += INTER_VME_OUTPUT_IN_DWS; diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index 364401f5..1668e7fb 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -1035,7 +1035,7 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; - int qp; + int qp,qp_mb,qp_index; int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); if (encoder_context->rate_control_mode == VA_RC_CQP) @@ -1093,7 +1093,12 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner); *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); /* QP occupies one byte */ - *command_ptr++ = qp; + if (vme_context->roi_enabled) { + qp_index = y_inner * mb_width + x_inner; + qp_mb = *(vme_context->qp_per_mb + qp_index); + } else + qp_mb = qp; + *command_ptr++ = qp_mb; x_inner -= 2; y_inner += 1; } @@ -1139,7 +1144,12 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx, *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner); *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); /* qp occupies one byte */ - *command_ptr++ = qp; + if (vme_context->roi_enabled) { + qp_index = y_inner * mb_width + x_inner; + qp_mb = *(vme_context->qp_per_mb + qp_index); + } else + qp_mb = qp; + *command_ptr++ = qp_mb; x_inner -= 2; y_inner += 1; @@ -1829,6 +1839,8 @@ intel_h264_enc_roi_config(VADriverContextP ctx, */ vme_context->roi_enabled = 0; } + if (vme_context->roi_enabled) + encoder_context->soft_batch_force = 1; return; } diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c index 635be9d0..99b9cf1e 100644 --- a/src/gen75_mfc.c +++ b/src/gen75_mfc.c @@ -1167,6 +1167,7 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type); int is_intra = slice_type == SLICE_TYPE_I; int qp_slice; + int qp_mb; qp_slice = qp; if (rate_control_mode == VA_RC_CBR) { @@ -1210,19 +1211,26 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, y = i / width_in_mbs; msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block); + if (vme_context->roi_enabled) { + qp_mb = *(vme_context->qp_per_mb + i); + } else + qp_mb = qp; + if (is_intra) { assert(msg); - gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch); + gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp_mb, msg, encoder_context, 0, 0, slice_batch); } else { int inter_rdo, intra_rdo; inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK; intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK; offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET; if (intra_rdo < inter_rdo) { - gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch); + gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp_mb, msg, encoder_context, 0, 0, slice_batch); } else { - msg += AVC_INTER_MSG_OFFSET; - gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, slice_type, slice_batch); + msg += AVC_INTER_MSG_OFFSET; + gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp_mb, + msg, offset, encoder_context, + 0, 0, slice_type, slice_batch); } } } diff --git a/src/gen75_vme.c b/src/gen75_vme.c index f4b3ab14..a0c3558b 100644 --- a/src/gen75_vme.c +++ b/src/gen75_vme.c @@ -496,6 +496,7 @@ gen75_vme_fill_vme_batchbuffer(VADriverContextP ctx, VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; int qp; int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); + int qp_mb, qp_index; if (encoder_context->rate_control_mode == VA_RC_CQP) qp = pic_param->pic_init_qp + slice_param->slice_qp_delta; @@ -549,7 +550,12 @@ gen75_vme_fill_vme_batchbuffer(VADriverContextP ctx, *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x); *command_ptr++ = ((encoder_context->quality_level << 24) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); /* qp occupies one byte */ - *command_ptr++ = qp; + if (vme_context->roi_enabled) { + qp_index = mb_y * mb_width + mb_x; + qp_mb = *(vme_context->qp_per_mb + qp_index); + } else + qp_mb = qp; + *command_ptr++ = qp_mb; i += 1; } @@ -663,6 +669,7 @@ static VAStatus gen75_vme_prepare(VADriverContextP ctx, intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context); intel_h264_initialize_mbmv_cost(ctx, encode_state, encoder_context); + intel_h264_enc_roi_config(ctx, encode_state, encoder_context); /*Setup all the memory object*/ gen75_vme_surface_setup(ctx, encode_state, is_intra, encoder_context); diff --git a/src/gen7_vme.c b/src/gen7_vme.c index 7927ce15..899acd38 100644 --- a/src/gen7_vme.c +++ b/src/gen7_vme.c @@ -554,6 +554,7 @@ gen7_vme_fill_vme_batchbuffer(VADriverContextP ctx, VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; int qp; int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); + int qp_mb, qp_index; if (encoder_context->rate_control_mode == VA_RC_CQP) qp = pic_param->pic_init_qp + slice_param->slice_qp_delta; @@ -619,7 +620,13 @@ gen7_vme_fill_vme_batchbuffer(VADriverContextP ctx, *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x); *command_ptr++ = ((encoder_context->quality_level << 24) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); - *command_ptr++ = qp; + if (vme_context->roi_enabled) { + qp_index = mb_y * mb_width + mb_x; + qp_mb = *(vme_context->qp_per_mb + qp_index); + } else + qp_mb = qp; + *command_ptr++ = qp_mb; + i += 1; } -- cgit v1.2.1 From 768c7bfb8ecb59432f902a4b7c28bc0c64bc631d Mon Sep 17 00:00:00 2001 From: Zhao Yakui Date: Sun, 1 Jan 2012 02:54:34 +0000 Subject: Encoding: ROI support for CQP/CBR on Haswell/Ivy v2: remove unused variable set max ROI number to 3 according low power mode or 8 v1: merge 3 ROI patches together Encoding: Add the support of ROI under CQP on Haswell/Ivybridge Encoding: Add the support of ROI for CBR Currently it will allocate the different qp for the ROI and non_ROI region based on the ROI ratio. The qp delta is related with the ratio of ROI region. Encoding: Expand to support multiple ROI regions. Encoding: bits.roi_rc_qp_delat_support user guide: The first is that the driver should expose the feature of qp_delta in VAConfigAttribValEncROI. The second is that the user-app can pass the qp_delta flag in VAEncMiscParameterBufferROI and then the driver will use the qp_delta to calculate the corresponding qp for ROI region. For the non-ROI region: I think that currently we can use the following model to predicate the qp. (qp_value = intel_qpvalue_from_qp(qp)) Qp_value_roi * ROI_area + qp_value_nonroi * area_nonroi = base_qp * total_area. Signed-off-by: ceciliapeng Signed-off-by: Pengfei Qu (cherry picked from commit dfb64b3220a6df9be1bd6f667cd96f056d1a8cb6) --- src/gen6_mfc_common.c | 202 ++++++++++++++++++++++++++++++++++++++++++++++++-- src/i965_drv_video.c | 34 +++++++-- src/i965_drv_video.h | 2 + src/i965_encoder.c | 1 - 4 files changed, 227 insertions(+), 12 deletions(-) diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index 1668e7fb..4f51c3e1 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -1773,11 +1773,174 @@ intel_h264_setup_cost_surface(VADriverContextP ctx, surface_state_offset); } +/* + * the idea of conversion between qp and qstep comes from scaling process + * of transform coeff for Luma component in H264 spec. + * 2^(Qpy / 6 - 6) + * In order to avoid too small qstep, it is multiplied by 16. + */ +static float intel_h264_qp_qstep(int qp) +{ + float value, qstep; + value = qp; + value = value / 6 - 2; + qstep = powf(2, value); + return qstep; +} + +static int intel_h264_qstep_qp(float qstep) +{ + float qp; + + qp = 12.0f + 6.0f * log2f(qstep); + + return floorf(qp); +} + +/* + * Currently it is based on the following assumption: + * SUM(roi_area * 1 / roi_qstep) + non_area * 1 / nonroi_qstep = + * total_aread * 1 / baseqp_qstep + * + * qstep is the linearized quantizer of H264 quantizer + */ +typedef struct { + int row_start_in_mb; + int row_end_in_mb; + int col_start_in_mb; + int col_end_in_mb; + + int width_mbs; + int height_mbs; + + int roi_qp; +} ROIRegionParam; + +static void +intel_h264_enc_roi_cbr(VADriverContextP ctx, + int base_qp, + VAEncMiscParameterBufferROI *pMiscParamROI, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + int nonroi_qp; + VAEncROI *region_roi; + bool quickfill = 0; + + ROIRegionParam param_regions[I965_MAX_NUM_ROI_REGIONS]; + int num_roi; + int i,j; + + float temp; + float qstep_nonroi, qstep_base; + float roi_area, total_area, nonroi_area; + float sum_roi; + + VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; + int width_in_mbs = pSequenceParameter->picture_width_in_mbs; + int height_in_mbs = pSequenceParameter->picture_height_in_mbs; + int mbs_in_picture = width_in_mbs * height_in_mbs; + + struct gen6_vme_context *vme_context = encoder_context->vme_context; + + num_roi = (pMiscParamROI->num_roi > I965_MAX_NUM_ROI_REGIONS) ? I965_MAX_NUM_ROI_REGIONS : pMiscParamROI->num_roi; + + /* when the base_qp is lower than 12, the quality is quite good based + * on the H264 test experience. + * In such case it is unnecessary to adjust the quality for ROI region. + */ + if (base_qp <= 12) { + nonroi_qp = base_qp; + quickfill = 1; + goto qp_fill; + } + + /* currently roi_value_is_qp_delta is the only supported mode of priority. + * + * qp_delta set by user is added to base_qp, which is then clapped by + * [base_qp-min_delta, base_qp+max_delta]. + */ + assert (pMiscParamROI->roi_flags.bits.roi_value_is_qp_delta); + + sum_roi = 0.0f; + roi_area = 0; + for (i = 0; i < num_roi; i++) { + int row_start, row_end, col_start, col_end; + int roi_width_mbs, roi_height_mbs; + int mbs_in_roi; + int roi_qp; + float qstep_roi; + + region_roi = (VAEncROI *)pMiscParamROI->roi + i; + + col_start = region_roi->roi_rectangle.x; + col_end = col_start + region_roi->roi_rectangle.width; + row_start = region_roi->roi_rectangle.y; + row_end = row_start + region_roi->roi_rectangle.height; + col_start = col_start / 16; + col_end = (col_end + 15) / 16; + row_start = row_start / 16; + row_end = (row_end + 15) / 16; + + roi_width_mbs = col_end - col_start; + roi_height_mbs = row_end - row_start; + mbs_in_roi = roi_width_mbs * roi_height_mbs; + + param_regions[i].row_start_in_mb = row_start; + param_regions[i].row_end_in_mb = row_end; + param_regions[i].col_start_in_mb = col_start; + param_regions[i].col_end_in_mb = col_end; + param_regions[i].width_mbs = roi_width_mbs; + param_regions[i].height_mbs = roi_height_mbs; + + roi_qp = base_qp + region_roi->roi_value; + BRC_CLIP(roi_qp, 1, 51); + + param_regions[i].roi_qp = roi_qp; + qstep_roi = intel_h264_qp_qstep(roi_qp); + + roi_area += mbs_in_roi; + sum_roi += mbs_in_roi / qstep_roi; + } + + total_area = mbs_in_picture; + nonroi_area = total_area - roi_area; + + qstep_base = intel_h264_qp_qstep(base_qp); + temp = (total_area / qstep_base - sum_roi); + + if (temp < 0) { + nonroi_qp = 51; + } else { + qstep_nonroi = nonroi_area / temp; + nonroi_qp = intel_h264_qstep_qp(qstep_nonroi); + } + + BRC_CLIP(nonroi_qp, 1, 51); + +qp_fill: + memset(vme_context->qp_per_mb, nonroi_qp, mbs_in_picture); + if (!quickfill) { + char *qp_ptr; + + for (i = 0; i < num_roi; i++) { + for (j = param_regions[i].row_start_in_mb; j < param_regions[i].row_end_in_mb; j++) { + qp_ptr = vme_context->qp_per_mb + (j * width_in_mbs) + param_regions[i].col_start_in_mb; + memset(qp_ptr, param_regions[i].roi_qp, param_regions[i].width_mbs); + } + } + } + return ; +} + extern void intel_h264_enc_roi_config(VADriverContextP ctx, struct encode_state *encode_state, struct intel_encoder_context *encoder_context) { + char *qp_ptr; + int i, j; + VAEncROI *region_roi; VAEncMiscParameterBuffer* pMiscParamROI; VAEncMiscParameterBufferROI *pParamROI; struct gen6_vme_context *vme_context = encoder_context->vme_context; @@ -1786,6 +1949,9 @@ intel_h264_enc_roi_config(VADriverContextP ctx, int width_in_mbs = pSequenceParameter->picture_width_in_mbs; int height_in_mbs = pSequenceParameter->picture_height_in_mbs; + int row_start, row_end, col_start, col_end; + int num_roi; + vme_context->roi_enabled = 0; /* Restriction: Disable ROI when multi-slice is enabled */ if (!encoder_context->context_roi || (encode_state->num_slice_params_ext > 1)) @@ -1799,10 +1965,7 @@ intel_h264_enc_roi_config(VADriverContextP ctx, pParamROI = (VAEncMiscParameterBufferROI *)pMiscParamROI->data; /* check whether number of ROI is correct */ - /* currently one region is supported */ - if (pParamROI->num_roi != 1) { - return; - } + num_roi = (pParamROI->num_roi > I965_MAX_NUM_ROI_REGIONS) ? I965_MAX_NUM_ROI_REGIONS : pParamROI->num_roi; vme_context->roi_enabled = 1; @@ -1825,7 +1988,9 @@ intel_h264_enc_roi_config(VADriverContextP ctx, int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; - memset(vme_context->qp_per_mb, qp, width_in_mbs * height_in_mbs); + intel_h264_enc_roi_cbr(ctx, qp, pParamROI, + encode_state, encoder_context); + } else if (encoder_context->rate_control_mode == VA_RC_CQP){ VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; @@ -1833,6 +1998,33 @@ intel_h264_enc_roi_config(VADriverContextP ctx, qp = pic_param->pic_init_qp + slice_param->slice_qp_delta; memset(vme_context->qp_per_mb, qp, width_in_mbs * height_in_mbs); + + + for (j = num_roi; j ; j--) { + int qp_delta, qp_clip; + + region_roi = (VAEncROI *)pParamROI->roi + j - 1; + + col_start = region_roi->roi_rectangle.x; + col_end = col_start + region_roi->roi_rectangle.width; + row_start = region_roi->roi_rectangle.y; + row_end = row_start + region_roi->roi_rectangle.height; + + col_start = col_start / 16; + col_end = (col_end + 15) / 16; + row_start = row_start / 16; + row_end = (row_end + 15) / 16; + + qp_delta = region_roi->roi_value; + qp_clip = qp + qp_delta; + + BRC_CLIP(qp_clip, 1, 51); + + for (i = row_start; i < row_end; i++) { + qp_ptr = vme_context->qp_per_mb + (i * width_in_mbs) + col_start; + memset(qp_ptr, qp_clip, (col_end - col_start)); + } + } } else { /* * TODO: Disable it for non CBR-CQP. diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 98395840..000be57e 100644 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -936,6 +936,7 @@ i965_GetConfigAttributes(VADriverContextP ctx, int num_attribs) { VAStatus va_status; + struct i965_driver_data *i965 = i965_driver_data(ctx); int i; va_status = i965_validate_config(ctx, profile, entrypoint); @@ -1048,13 +1049,24 @@ i965_GetConfigAttributes(VADriverContextP ctx, break; case VAConfigAttribEncROI: - if ((entrypoint == VAEntrypointEncSliceLP) && - (profile == VAProfileH264ConstrainedBaseline || - profile == VAProfileH264Main || - profile == VAProfileH264High)) - attrib_list[i].value = 3; - else + if (profile == VAProfileH264ConstrainedBaseline || + profile == VAProfileH264Main || + profile == VAProfileH264High) { + VAConfigAttribValEncROI *roi_config = (VAConfigAttribValEncROI *)&(attrib_list[i].value); + + if(entrypoint == VAEntrypointEncSliceLP) { + roi_config->bits.num_roi_regions = 3; + roi_config->bits.roi_rc_priority_support = 0; + roi_config->bits.roi_rc_qp_delat_support = 0; + } else if (IS_GEN7(i965->intel.device_info)) { + roi_config->bits.num_roi_regions = I965_MAX_NUM_ROI_REGIONS; + roi_config->bits.roi_rc_priority_support = 0; + roi_config->bits.roi_rc_qp_delat_support = 1; + } else + roi_config->bits.num_roi_regions = 0; + }else { attrib_list[i].value = 0; + } break; @@ -2857,6 +2869,16 @@ i965_BeginPicture(VADriverContextP ctx, obj_context->codec_state.encode.num_packed_header_data_ext = 0; obj_context->codec_state.encode.slice_index = 0; obj_context->codec_state.encode.vps_sps_seq_index = 0; + /* + * Based on ROI definition in va/va.h, the ROI set through this + * structure is applicable only to the current frame or field. + * That is to say: it is on-the-fly setting. If it is not set, + * the current frame doesn't use ROI. + * It is uncertain whether the other misc buffer should be released. + * So only release the previous ROI buffer. + */ + i965_release_buffer_store(&obj_context->codec_state.encode.misc_param[VAEncMiscParameterTypeROI]); + i965_release_buffer_store(&obj_context->codec_state.encode.encmb_map); if (obj_config->profile == VAProfileVP9Profile0) { diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index fea75eeb..f67599e9 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -73,6 +73,8 @@ #define ENCODER_HIGH_QUALITY ENCODER_DEFAULT_QUALITY #define ENCODER_LOW_QUALITY 2 +#define I965_MAX_NUM_ROI_REGIONS 8 + #define ENCODER_LP_QUALITY_RANGE 8 struct i965_surface diff --git a/src/i965_encoder.c b/src/i965_encoder.c index c2001438..a9a1189c 100644 --- a/src/i965_encoder.c +++ b/src/i965_encoder.c @@ -882,7 +882,6 @@ intel_enc_hw_context_init(VADriverContextP ctx, break; } - encoder_context->context_roi = 0; for (i = 0; i < obj_config->num_attribs; i++) { if (obj_config->attrib_list[i].type == VAConfigAttribRateControl) { encoder_context->rate_control_mode = obj_config->attrib_list[i].value; -- cgit v1.2.1 From 950e65fa5781405908002f9fcd8f4ac9a8d1ab6f Mon Sep 17 00:00:00 2001 From: Pengfei Qu Date: Mon, 18 Jul 2016 12:57:39 +0800 Subject: ROI:enable on gen8 and gen9 v2: use ASSERT_RET to check the ROI flag setted by user. instead of assert. v1: ROI enable on gen8 and gen9 Enable GPU to construct GPU command under ROI scenario fix roi attrib config incorrectly Signed-off-by: Zhao Yakui Signed-off-by: Pengfei Qu (cherry picked from commit f53ff4069d14bcb2bafa95997cad5af9d67f3fbc) --- src/gen6_mfc_common.c | 56 +++++++++++++++++++++++++++++---------------------- src/gen8_mfc.c | 53 +++++++++++++++++++++++++++++++++--------------- src/gen8_vme.c | 21 ++++++++++++++++++- src/gen9_mfc.c | 35 ++++++++++++++++---------------- src/gen9_vme.c | 40 +++++++++++++++++++++++++++++++----- src/i965_drv_video.c | 6 ++---- 6 files changed, 143 insertions(+), 68 deletions(-) diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index 4f51c3e1..b0342dab 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -1816,7 +1816,7 @@ typedef struct { int roi_qp; } ROIRegionParam; -static void +static VAStatus intel_h264_enc_roi_cbr(VADriverContextP ctx, int base_qp, VAEncMiscParameterBufferROI *pMiscParamROI, @@ -1828,7 +1828,7 @@ intel_h264_enc_roi_cbr(VADriverContextP ctx, bool quickfill = 0; ROIRegionParam param_regions[I965_MAX_NUM_ROI_REGIONS]; - int num_roi; + int num_roi = 0; int i,j; float temp; @@ -1842,8 +1842,19 @@ intel_h264_enc_roi_cbr(VADriverContextP ctx, int mbs_in_picture = width_in_mbs * height_in_mbs; struct gen6_vme_context *vme_context = encoder_context->vme_context; + VAStatus vaStatus = VA_STATUS_SUCCESS; - num_roi = (pMiscParamROI->num_roi > I965_MAX_NUM_ROI_REGIONS) ? I965_MAX_NUM_ROI_REGIONS : pMiscParamROI->num_roi; + if(pMiscParamROI != NULL) + { + num_roi = (pMiscParamROI->num_roi > I965_MAX_NUM_ROI_REGIONS) ? I965_MAX_NUM_ROI_REGIONS : pMiscParamROI->num_roi; + + /* currently roi_value_is_qp_delta is the only supported mode of priority. + * + * qp_delta set by user is added to base_qp, which is then clapped by + * [base_qp-min_delta, base_qp+max_delta]. + */ + ASSERT_RET(pMiscParamROI->roi_flags.bits.roi_value_is_qp_delta,VA_STATUS_ERROR_INVALID_PARAMETER); + } /* when the base_qp is lower than 12, the quality is quite good based * on the H264 test experience. @@ -1855,13 +1866,6 @@ intel_h264_enc_roi_cbr(VADriverContextP ctx, goto qp_fill; } - /* currently roi_value_is_qp_delta is the only supported mode of priority. - * - * qp_delta set by user is added to base_qp, which is then clapped by - * [base_qp-min_delta, base_qp+max_delta]. - */ - assert (pMiscParamROI->roi_flags.bits.roi_value_is_qp_delta); - sum_roi = 0.0f; roi_area = 0; for (i = 0; i < num_roi; i++) { @@ -1930,7 +1934,7 @@ qp_fill: } } } - return ; + return vaStatus; } extern void @@ -1941,8 +1945,9 @@ intel_h264_enc_roi_config(VADriverContextP ctx, char *qp_ptr; int i, j; VAEncROI *region_roi; + struct i965_driver_data *i965 = i965_driver_data(ctx); VAEncMiscParameterBuffer* pMiscParamROI; - VAEncMiscParameterBufferROI *pParamROI; + VAEncMiscParameterBufferROI *pParamROI = NULL; struct gen6_vme_context *vme_context = encoder_context->vme_context; struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; @@ -1950,24 +1955,26 @@ intel_h264_enc_roi_config(VADriverContextP ctx, int height_in_mbs = pSequenceParameter->picture_height_in_mbs; int row_start, row_end, col_start, col_end; - int num_roi; + int num_roi = 0; vme_context->roi_enabled = 0; /* Restriction: Disable ROI when multi-slice is enabled */ if (!encoder_context->context_roi || (encode_state->num_slice_params_ext > 1)) return; - if (encode_state->misc_param[VAEncMiscParameterTypeROI] == NULL) { - return; - } + if (encode_state->misc_param[VAEncMiscParameterTypeROI] != NULL) { + pMiscParamROI = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeROI]->buffer; + pParamROI = (VAEncMiscParameterBufferROI *)pMiscParamROI->data; - pMiscParamROI = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeROI]->buffer; - pParamROI = (VAEncMiscParameterBufferROI *)pMiscParamROI->data; + /* check whether number of ROI is correct */ + num_roi = (pParamROI->num_roi > I965_MAX_NUM_ROI_REGIONS) ? I965_MAX_NUM_ROI_REGIONS : pParamROI->num_roi; + } - /* check whether number of ROI is correct */ - num_roi = (pParamROI->num_roi > I965_MAX_NUM_ROI_REGIONS) ? I965_MAX_NUM_ROI_REGIONS : pParamROI->num_roi; + if (num_roi > 0) + vme_context->roi_enabled = 1; - vme_context->roi_enabled = 1; + if (!vme_context->roi_enabled) + return; if ((vme_context->saved_width_mbs != width_in_mbs) || (vme_context->saved_height_mbs != height_in_mbs)) { @@ -1988,8 +1995,7 @@ intel_h264_enc_roi_config(VADriverContextP ctx, int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; - intel_h264_enc_roi_cbr(ctx, qp, pParamROI, - encode_state, encoder_context); + intel_h264_enc_roi_cbr(ctx, qp, pParamROI,encode_state, encoder_context); } else if (encoder_context->rate_control_mode == VA_RC_CQP){ VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; @@ -2031,8 +2037,10 @@ intel_h264_enc_roi_config(VADriverContextP ctx, */ vme_context->roi_enabled = 0; } - if (vme_context->roi_enabled) + + if (vme_context->roi_enabled && IS_GEN7(i965->intel.device_info)) encoder_context->soft_batch_force = 1; + return; } diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index 2f0e7aea..3e047546 100644 --- a/src/gen8_mfc.c +++ b/src/gen8_mfc.c @@ -939,8 +939,6 @@ gen8_mfc_avc_slice_state(VADriverContextP ctx, #define AVC_INTER_MV_OFFSET 48 #define AVC_RDO_MASK 0xFFFF -#if MFC_SOFTWARE_BATCH - static int gen8_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg, @@ -1114,6 +1112,7 @@ gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type); int is_intra = slice_type == SLICE_TYPE_I; int qp_slice; + int qp_mb; qp_slice = qp; if (rate_control_mode == VA_RC_CBR) { @@ -1156,20 +1155,24 @@ gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, x = i % width_in_mbs; y = i / width_in_mbs; msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block); + if (vme_context->roi_enabled) { + qp_mb = *(vme_context->qp_per_mb + i); + } else + qp_mb = qp; if (is_intra) { assert(msg); - gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch); + gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp_mb, msg, encoder_context, 0, 0, slice_batch); } else { int inter_rdo, intra_rdo; inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK; intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK; offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET; if (intra_rdo < inter_rdo) { - gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch); + gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp_mb, msg, encoder_context, 0, 0, slice_batch); } else { msg += AVC_INTER_MSG_OFFSET; - gen8_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch); + gen8_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp_mb, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch); } } } @@ -1217,7 +1220,6 @@ gen8_mfc_avc_software_batchbuffer(VADriverContextP ctx, return batch_bo; } -#else static void gen8_mfc_batchbuffer_surfaces_input(VADriverContextP ctx, @@ -1374,6 +1376,8 @@ gen8_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx, int last_mb, slice_end_x, slice_end_y; int remaining_mb = total_mbs; uint32_t fwd_ref , bwd_ref, mb_flag; + char tmp_qp; + int number_roi_mbs, max_mb_cmds, i; last_mb = slice_param->macroblock_address + total_mbs - 1; slice_end_x = last_mb % width_in_mbs; @@ -1401,13 +1405,32 @@ gen8_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx, number_mb_cmds = width_in_mbs; } + max_mb_cmds = number_mb_cmds; + do { - if (number_mb_cmds >= remaining_mb) { - number_mb_cmds = remaining_mb; - } mb_x = (slice_param->macroblock_address + starting_offset) % width_in_mbs; mb_y = (slice_param->macroblock_address + starting_offset) / width_in_mbs; + number_mb_cmds = max_mb_cmds; + if (vme_context->roi_enabled) { + + number_roi_mbs = 1; + tmp_qp = *(vme_context->qp_per_mb + starting_offset); + for (i = 1; i < max_mb_cmds; i++) { + if (tmp_qp != *(vme_context->qp_per_mb + starting_offset + i)) + break; + + number_roi_mbs++; + } + + number_mb_cmds = number_roi_mbs; + qp = tmp_qp; + } + + if (number_mb_cmds >= remaining_mb) { + number_mb_cmds = remaining_mb; + } + gen8_mfc_batchbuffer_emit_object_command(batch, mb_flag, head_offset, @@ -1585,8 +1608,6 @@ gen8_mfc_avc_hardware_batchbuffer(VADriverContextP ctx, return mfc_context->aux_batchbuffer_surface.bo; } -#endif - static void gen8_mfc_avc_pipeline_programing(VADriverContextP ctx, struct encode_state *encode_state, @@ -1601,11 +1622,11 @@ gen8_mfc_avc_pipeline_programing(VADriverContextP ctx, return; } -#if MFC_SOFTWARE_BATCH - slice_batch_bo = gen8_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context); -#else - slice_batch_bo = gen8_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context); -#endif + if (encoder_context->soft_batch_force) + slice_batch_bo = gen8_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context); + else + slice_batch_bo = gen8_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context); + // begin programing intel_batchbuffer_start_atomic_bcs(batch, 0x4000); diff --git a/src/gen8_vme.c b/src/gen8_vme.c index 559accec..8f424eb0 100644 --- a/src/gen8_vme.c +++ b/src/gen8_vme.c @@ -562,6 +562,17 @@ gen8_vme_fill_vme_batchbuffer(VADriverContextP ctx, int mb_x = 0, mb_y = 0; int i, s; unsigned int *command_ptr; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; + VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; + int qp; + int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); + int qp_mb, qp_index; + + if (encoder_context->rate_control_mode == VA_RC_CQP) + qp = pic_param->pic_init_qp + slice_param->slice_qp_delta; + else + qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; dri_bo_map(vme_context->vme_batchbuffer.bo, 1); command_ptr = vme_context->vme_batchbuffer.bo->virtual; @@ -599,7 +610,7 @@ gen8_vme_fill_vme_batchbuffer(VADriverContextP ctx, if ((i == mb_width) && slice_mb_x) { mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D); } - *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); + *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2)); *command_ptr++ = kernel; *command_ptr++ = 0; *command_ptr++ = 0; @@ -609,6 +620,13 @@ gen8_vme_fill_vme_batchbuffer(VADriverContextP ctx, /*inline data */ *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x); *command_ptr++ = ((encoder_context->quality_level << 24) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); + /* qp occupies one byte */ + if (vme_context->roi_enabled) { + qp_index = mb_y * mb_width + mb_x; + qp_mb = *(vme_context->qp_per_mb + qp_index); + } else + qp_mb = qp; + *command_ptr++ = qp_mb; *command_ptr++ = CMD_MEDIA_STATE_FLUSH; *command_ptr++ = 0; @@ -725,6 +743,7 @@ static VAStatus gen8_vme_prepare(VADriverContextP ctx, intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context); intel_h264_initialize_mbmv_cost(ctx, encode_state, encoder_context); + intel_h264_enc_roi_config(ctx, encode_state, encoder_context); /*Setup all the memory object*/ gen8_vme_surface_setup(ctx, encode_state, is_intra, encoder_context); diff --git a/src/gen9_mfc.c b/src/gen9_mfc.c index 6cca579a..109e6979 100644 --- a/src/gen9_mfc.c +++ b/src/gen9_mfc.c @@ -47,8 +47,6 @@ #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) #define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index) -#define MFC_SOFTWARE_HASWELL 1 - #define B0_STEP_REV 2 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV) @@ -856,8 +854,6 @@ gen9_mfc_avc_slice_state(VADriverContextP ctx, } -#ifdef MFC_SOFTWARE_HASWELL - static int gen9_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg, @@ -1037,6 +1033,7 @@ gen9_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type); int is_intra = slice_type == SLICE_TYPE_I; int qp_slice; + int qp_mb; qp_slice = qp; if (rate_control_mode == VA_RC_CBR) { @@ -1080,19 +1077,24 @@ gen9_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, y = i / width_in_mbs; msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block); + if (vme_context->roi_enabled) { + qp_mb = *(vme_context->qp_per_mb + i); + } else + qp_mb = qp; + if (is_intra) { assert(msg); - gen9_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch); + gen9_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp_mb, msg, encoder_context, 0, 0, slice_batch); } else { int inter_rdo, intra_rdo; inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK; intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK; offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET; if (intra_rdo < inter_rdo) { - gen9_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch); + gen9_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp_mb, msg, encoder_context, 0, 0, slice_batch); } else { msg += AVC_INTER_MSG_OFFSET; - gen9_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch); + gen9_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp_mb, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch); } } } @@ -1142,8 +1144,6 @@ gen9_mfc_avc_software_batchbuffer(VADriverContextP ctx, return batch_bo; } -#else - static void gen9_mfc_batchbuffer_surfaces_input(VADriverContextP ctx, struct encode_state *encode_state, @@ -1537,7 +1537,6 @@ gen9_mfc_avc_hardware_batchbuffer(VADriverContextP ctx, return mfc_context->mfc_batchbuffer_surface.bo; } -#endif static void gen9_mfc_avc_pipeline_programing(VADriverContextP ctx, @@ -1553,11 +1552,11 @@ gen9_mfc_avc_pipeline_programing(VADriverContextP ctx, return; } -#ifdef MFC_SOFTWARE_HASWELL - slice_batch_bo = gen9_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context); -#else - slice_batch_bo = gen9_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context); -#endif + if (encoder_context->soft_batch_force) + slice_batch_bo = gen9_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context); + else + slice_batch_bo = gen9_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context); + // begin programing intel_batchbuffer_start_atomic_bcs(batch, 0x4000); @@ -1705,12 +1704,12 @@ Bool gen9_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *e { struct gen6_mfc_context *mfc_context = NULL; -#if MFC_SOFTWARE_HASWELL + if ((encoder_context->codec == CODEC_H264) || (encoder_context->codec == CODEC_H264_MVC)) { - return gen8_mfc_context_init(ctx, encoder_context); + return gen8_mfc_context_init(ctx, encoder_context); } -#endif + if ((encoder_context->codec == CODEC_VP8) || (encoder_context->codec == CODEC_MPEG2)) diff --git a/src/gen9_vme.c b/src/gen9_vme.c index fd89c177..d2eebf4e 100644 --- a/src/gen9_vme.c +++ b/src/gen9_vme.c @@ -352,6 +352,9 @@ gen9_vme_surface_setup(VADriverContextP ctx, /* VME output */ gen9_vme_avc_output_buffer_setup(ctx, encode_state, 3, encoder_context); gen9_vme_avc_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context); + intel_h264_setup_cost_surface(ctx, encode_state, encoder_context, + BINDING_TABLE_OFFSET(INTEL_COST_TABLE_OFFSET), + SURFACE_STATE_OFFSET(INTEL_COST_TABLE_OFFSET)); return VA_STATUS_SUCCESS; } @@ -603,6 +606,17 @@ gen9_vme_fill_vme_batchbuffer(VADriverContextP ctx, int mb_x = 0, mb_y = 0; int i, s; unsigned int *command_ptr; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; + VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; + int qp; + int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); + int qp_mb, qp_index; + + if (encoder_context->rate_control_mode == VA_RC_CQP) + qp = pic_param->pic_init_qp + slice_param->slice_qp_delta; + else + qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; dri_bo_map(vme_context->vme_batchbuffer.bo, 1); command_ptr = vme_context->vme_batchbuffer.bo->virtual; @@ -640,7 +654,7 @@ gen9_vme_fill_vme_batchbuffer(VADriverContextP ctx, if ((i == mb_width) && slice_mb_x) { mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D); } - *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); + *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2)); *command_ptr++ = kernel; *command_ptr++ = 0; *command_ptr++ = 0; @@ -650,6 +664,13 @@ gen9_vme_fill_vme_batchbuffer(VADriverContextP ctx, /*inline data */ *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x); *command_ptr++ = ((encoder_context->quality_level << 24) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8)); + /* qp occupies one byte */ + if (vme_context->roi_enabled) { + qp_index = mb_y * mb_width + mb_x; + qp_mb = *(vme_context->qp_per_mb + qp_index); + } else + qp_mb = qp; + *command_ptr++ = qp_mb; *command_ptr++ = CMD_MEDIA_STATE_FLUSH; *command_ptr++ = 0; @@ -767,6 +788,8 @@ static VAStatus gen9_vme_prepare(VADriverContextP ctx, } intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context); + intel_h264_initialize_mbmv_cost(ctx, encode_state, encoder_context); + intel_h264_enc_roi_config(ctx, encode_state, encoder_context); /*Setup all the memory object*/ gen9_vme_surface_setup(ctx, encode_state, is_intra, encoder_context); @@ -1771,10 +1794,17 @@ gen9_vme_context_destroy(void *context) dri_bo_unreference(vme_context->vme_batchbuffer.bo); vme_context->vme_batchbuffer.bo = NULL; - if (vme_context->vme_state_message) { - free(vme_context->vme_state_message); - vme_context->vme_state_message = NULL; - } + free(vme_context->vme_state_message); + vme_context->vme_state_message = NULL; + + dri_bo_unreference(vme_context->i_qp_cost_table); + vme_context->i_qp_cost_table = NULL; + + dri_bo_unreference(vme_context->p_qp_cost_table); + vme_context->p_qp_cost_table = NULL; + + dri_bo_unreference(vme_context->b_qp_cost_table); + vme_context->b_qp_cost_table = NULL; free(vme_context->qp_per_mb); vme_context->qp_per_mb = NULL; diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 000be57e..1c24d003 100644 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -936,7 +936,6 @@ i965_GetConfigAttributes(VADriverContextP ctx, int num_attribs) { VAStatus va_status; - struct i965_driver_data *i965 = i965_driver_data(ctx); int i; va_status = i965_validate_config(ctx, profile, entrypoint); @@ -1058,12 +1057,11 @@ i965_GetConfigAttributes(VADriverContextP ctx, roi_config->bits.num_roi_regions = 3; roi_config->bits.roi_rc_priority_support = 0; roi_config->bits.roi_rc_qp_delat_support = 0; - } else if (IS_GEN7(i965->intel.device_info)) { + } else { roi_config->bits.num_roi_regions = I965_MAX_NUM_ROI_REGIONS; roi_config->bits.roi_rc_priority_support = 0; roi_config->bits.roi_rc_qp_delat_support = 1; - } else - roi_config->bits.num_roi_regions = 0; + } }else { attrib_list[i].value = 0; } -- cgit v1.2.1 From b1cc38fb8857544ee6bddbba6774947b3ec25e70 Mon Sep 17 00:00:00 2001 From: Jia Meng Date: Fri, 11 Oct 2013 11:22:47 +0800 Subject: Adjust the maximum number of motion vectors for B frame on HSW+ Signed-off-by: Jia Meng Signed-off-by: Pengfei Qu (cherry picked from commit 134995732028a3ca6e55c8ceaa9743cd405c6461) --- src/gen75_vme.c | 13 +++++++------ src/gen8_vme.c | 15 ++++++++------- src/gen9_vme.c | 17 +++++++++-------- 3 files changed, 24 insertions(+), 21 deletions(-) diff --git a/src/gen75_vme.c b/src/gen75_vme.c index a0c3558b..05efff8e 100644 --- a/src/gen75_vme.c +++ b/src/gen75_vme.c @@ -330,7 +330,8 @@ static VAStatus gen75_vme_interface_setup(VADriverContextP ctx, static VAStatus gen75_vme_constant_setup(VADriverContextP ctx, struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct intel_encoder_context *encoder_context, + int denom) { struct gen6_vme_context *vme_context = encoder_context->vme_context; unsigned char *constant_buffer; @@ -342,13 +343,13 @@ static VAStatus gen75_vme_constant_setup(VADriverContextP ctx, if (encoder_context->codec == CODEC_H264 || encoder_context->codec == CODEC_H264_MVC) { if (vme_context->h264_level >= 30) { - mv_num = 16; + mv_num = 16 / denom; if (vme_context->h264_level >= 31) - mv_num = 8; + mv_num = 8 / denom; } } else if (encoder_context->codec == CODEC_MPEG2) { - mv_num = 2; + mv_num = 2 / denom; } vme_state_message[31] = mv_num; @@ -675,7 +676,7 @@ static VAStatus gen75_vme_prepare(VADriverContextP ctx, gen75_vme_surface_setup(ctx, encode_state, is_intra, encoder_context); gen75_vme_interface_setup(ctx, encode_state, encoder_context); //gen75_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context); - gen75_vme_constant_setup(ctx, encode_state, encoder_context); + gen75_vme_constant_setup(ctx, encode_state, encoder_context, (pSliceParameter->slice_type == SLICE_TYPE_B) ? 2 : 1); /*Programing media pipeline*/ gen75_vme_pipeline_programing(ctx, encode_state, encoder_context); @@ -987,7 +988,7 @@ gen75_vme_mpeg2_prepare(VADriverContextP ctx, gen75_vme_interface_setup(ctx, encode_state, encoder_context); gen75_vme_vme_state_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context); intel_vme_mpeg2_state_setup(ctx, encode_state, encoder_context); - gen75_vme_constant_setup(ctx, encode_state, encoder_context); + gen75_vme_constant_setup(ctx, encode_state, encoder_context, 1); /*Programing media pipeline*/ gen75_vme_mpeg2_pipeline_programing(ctx, encode_state, slice_param->is_intra_slice, encoder_context); diff --git a/src/gen8_vme.c b/src/gen8_vme.c index 8f424eb0..d2f68cbd 100644 --- a/src/gen8_vme.c +++ b/src/gen8_vme.c @@ -363,7 +363,8 @@ static VAStatus gen8_vme_interface_setup(VADriverContextP ctx, static VAStatus gen8_vme_constant_setup(VADriverContextP ctx, struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct intel_encoder_context *encoder_context, + int denom) { struct gen6_vme_context *vme_context = encoder_context->vme_context; unsigned char *constant_buffer; @@ -375,13 +376,13 @@ static VAStatus gen8_vme_constant_setup(VADriverContextP ctx, if (encoder_context->codec == CODEC_H264 || encoder_context->codec == CODEC_H264_MVC) { if (vme_context->h264_level >= 30) { - mv_num = 16; + mv_num = 16 / denom; if (vme_context->h264_level >= 31) - mv_num = 8; + mv_num = 8 / denom; } } else if (encoder_context->codec == CODEC_MPEG2) { - mv_num = 2; + mv_num = 2 / denom; } vme_state_message[31] = mv_num; @@ -749,7 +750,7 @@ static VAStatus gen8_vme_prepare(VADriverContextP ctx, gen8_vme_surface_setup(ctx, encode_state, is_intra, encoder_context); gen8_vme_interface_setup(ctx, encode_state, encoder_context); //gen8_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context); - gen8_vme_constant_setup(ctx, encode_state, encoder_context); + gen8_vme_constant_setup(ctx, encode_state, encoder_context, (pSliceParameter->slice_type == SLICE_TYPE_B) ? 2 : 1); /*Programing media pipeline*/ gen8_vme_pipeline_programing(ctx, encode_state, encoder_context); @@ -1139,7 +1140,7 @@ gen8_vme_mpeg2_prepare(VADriverContextP ctx, gen8_vme_interface_setup(ctx, encode_state, encoder_context); //gen8_vme_vme_state_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context); intel_vme_mpeg2_state_setup(ctx, encode_state, encoder_context); - gen8_vme_constant_setup(ctx, encode_state, encoder_context); + gen8_vme_constant_setup(ctx, encode_state, encoder_context, 1); /*Programing media pipeline*/ gen8_vme_mpeg2_pipeline_programing(ctx, encode_state, slice_param->is_intra_slice, encoder_context); @@ -1272,7 +1273,7 @@ static VAStatus gen8_vme_vp8_prepare(VADriverContextP ctx, /*Setup all the memory object*/ gen8_vme_vp8_surface_setup(ctx, encode_state, is_intra, encoder_context); gen8_vme_interface_setup(ctx, encode_state, encoder_context); - gen8_vme_constant_setup(ctx, encode_state, encoder_context); + gen8_vme_constant_setup(ctx, encode_state, encoder_context, 1); /*Programing media pipeline*/ gen8_vme_vp8_pipeline_programing(ctx, encode_state, is_intra, encoder_context); diff --git a/src/gen9_vme.c b/src/gen9_vme.c index d2eebf4e..8dde7e71 100644 --- a/src/gen9_vme.c +++ b/src/gen9_vme.c @@ -400,7 +400,8 @@ static VAStatus gen9_vme_interface_setup(VADriverContextP ctx, static VAStatus gen9_vme_constant_setup(VADriverContextP ctx, struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) + struct intel_encoder_context *encoder_context, + int denom) { struct gen6_vme_context *vme_context = encoder_context->vme_context; unsigned char *constant_buffer; @@ -412,13 +413,13 @@ static VAStatus gen9_vme_constant_setup(VADriverContextP ctx, if (encoder_context->codec == CODEC_H264 || encoder_context->codec == CODEC_H264_MVC) { if (vme_context->h264_level >= 30) { - mv_num = 16; + mv_num = 16 / denom; if (vme_context->h264_level >= 31) - mv_num = 8; + mv_num = 8 / denom; } } else if (encoder_context->codec == CODEC_MPEG2) { - mv_num = 2; + mv_num = 2 / denom; }else if (encoder_context->codec == CODEC_HEVC) { if (vme_context->hevc_level >= 30*3) { mv_num = 16; @@ -795,7 +796,7 @@ static VAStatus gen9_vme_prepare(VADriverContextP ctx, gen9_vme_surface_setup(ctx, encode_state, is_intra, encoder_context); gen9_vme_interface_setup(ctx, encode_state, encoder_context); //gen9_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context); - gen9_vme_constant_setup(ctx, encode_state, encoder_context); + gen9_vme_constant_setup(ctx, encode_state, encoder_context, (pSliceParameter->slice_type == SLICE_TYPE_B) ? 2 : 1); /*Programing media pipeline*/ gen9_vme_pipeline_programing(ctx, encode_state, encoder_context); @@ -1187,7 +1188,7 @@ gen9_vme_mpeg2_prepare(VADriverContextP ctx, gen9_vme_interface_setup(ctx, encode_state, encoder_context); //gen9_vme_vme_state_setup(ctx, encode_state, slice_param->is_intra_slice, encoder_context); intel_vme_mpeg2_state_setup(ctx, encode_state, encoder_context); - gen9_vme_constant_setup(ctx, encode_state, encoder_context); + gen9_vme_constant_setup(ctx, encode_state, encoder_context, 1); /*Programing media pipeline*/ gen9_vme_mpeg2_pipeline_programing(ctx, encode_state, slice_param->is_intra_slice, encoder_context); @@ -1322,7 +1323,7 @@ static VAStatus gen9_vme_vp8_prepare(VADriverContextP ctx, /*Setup all the memory object*/ gen9_vme_vp8_surface_setup(ctx, encode_state, is_intra, encoder_context); gen9_vme_interface_setup(ctx, encode_state, encoder_context); - gen9_vme_constant_setup(ctx, encode_state, encoder_context); + gen9_vme_constant_setup(ctx, encode_state, encoder_context, 1); /*Programing media pipeline*/ gen9_vme_vp8_pipeline_programing(ctx, encode_state, is_intra, encoder_context); @@ -1754,7 +1755,7 @@ static VAStatus gen9_vme_hevc_prepare(VADriverContextP ctx, gen9_vme_hevc_surface_setup(ctx, encode_state, is_intra, encoder_context); gen9_vme_interface_setup(ctx, encode_state, encoder_context); //gen9_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context); - gen9_vme_constant_setup(ctx, encode_state, encoder_context); + gen9_vme_constant_setup(ctx, encode_state, encoder_context, 1); /*Programing media pipeline*/ gen9_vme_hevc_pipeline_programing(ctx, encode_state, encoder_context); -- cgit v1.2.1 From 1c84923ef8fddd7be4341dcbb4c7163fe7daa647 Mon Sep 17 00:00:00 2001 From: Jia Meng Date: Tue, 17 May 2016 10:13:16 +0800 Subject: scaling matrix of h264 encoder on gen7/gen7.5/gen8/gen9 v1: change the title according to yakui's comments. qm is in raster scan order per va api, and fqm is in column wise raster scan order per hardware requirement. Signed-off-by: Jia Meng Signed-off-by: Pengfei Qu (cherry picked from commit 38e3d97d19ee6ff43ec9fa2b568b41a14bafd8e4) --- src/gen6_mfc.c | 12 ++++-- src/gen6_mfc.h | 2 + src/gen75_mfc.c | 12 ++++-- src/gen7_mfc.c | 8 +++- src/gen8_mfc.c | 122 ++++++++++++++++++++++++++++++++++++++++------------- src/gen9_mfc.c | 127 ++++++++++++++++++++++++++++++++++++++++++-------------- 6 files changed, 211 insertions(+), 72 deletions(-) diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c index fd4c1202..3c0d4cc1 100644 --- a/src/gen6_mfc.c +++ b/src/gen6_mfc.c @@ -464,7 +464,9 @@ gen6_mfc_avc_slice_state(VADriverContextP ctx, ADVANCE_BCS_BATCH(batch); } -static void gen6_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) +static void gen6_mfc_avc_qm_state(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; int i; @@ -480,7 +482,9 @@ static void gen6_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_con ADVANCE_BCS_BATCH(batch); } -static void gen6_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) +static void gen6_mfc_avc_fqm_state(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { struct intel_batchbuffer *batch = encoder_context->base.batch; int i; @@ -639,8 +643,8 @@ static void gen6_mfc_avc_pipeline_picture_programing( VADriverContextP ctx, gen6_mfc_pipe_buf_addr_state(ctx, encoder_context); gen6_mfc_bsp_buf_base_addr_state(ctx, encoder_context); mfc_context->avc_img_state(ctx, encode_state, encoder_context); - mfc_context->avc_qm_state(ctx, encoder_context); - mfc_context->avc_fqm_state(ctx, encoder_context); + mfc_context->avc_qm_state(ctx, encode_state, encoder_context); + mfc_context->avc_fqm_state(ctx, encode_state, encoder_context); gen6_mfc_avc_directmode_state(ctx, encoder_context); intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context); } diff --git a/src/gen6_mfc.h b/src/gen6_mfc.h index fa610d44..4561d433 100644 --- a/src/gen6_mfc.h +++ b/src/gen6_mfc.h @@ -319,8 +319,10 @@ struct gen6_mfc_context struct encode_state *encode_state, struct intel_encoder_context *encoder_context); void (*avc_qm_state)(VADriverContextP ctx, + struct encode_state *encode_state, struct intel_encoder_context *encoder_context); void (*avc_fqm_state)(VADriverContextP ctx, + struct encode_state *encode_state, struct intel_encoder_context *encoder_context); void (*insert_object)(VADriverContextP ctx, struct intel_encoder_context *encoder_context, diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c index 99b9cf1e..29aeb661 100644 --- a/src/gen75_mfc.c +++ b/src/gen75_mfc.c @@ -331,7 +331,9 @@ gen75_mfc_qm_state(VADriverContextP ctx, } static void -gen75_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) +gen75_mfc_avc_qm_state(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { unsigned int qm[16] = { 0x10101010, 0x10101010, 0x10101010, 0x10101010, @@ -368,7 +370,9 @@ gen75_mfc_fqm_state(VADriverContextP ctx, } static void -gen75_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) +gen75_mfc_avc_fqm_state(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { unsigned int qm[32] = { 0x10001000, 0x10001000, 0x10001000, 0x10001000, @@ -827,8 +831,8 @@ static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx, gen75_mfc_pipe_buf_addr_state(ctx, encoder_context); gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context); mfc_context->avc_img_state(ctx, encode_state, encoder_context); - mfc_context->avc_qm_state(ctx, encoder_context); - mfc_context->avc_fqm_state(ctx, encoder_context); + mfc_context->avc_qm_state(ctx, encode_state, encoder_context); + mfc_context->avc_fqm_state(ctx, encode_state, encoder_context); gen75_mfc_avc_directmode_state(ctx, encoder_context); intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context); } diff --git a/src/gen7_mfc.c b/src/gen7_mfc.c index 2c17779d..ce43e909 100644 --- a/src/gen7_mfc.c +++ b/src/gen7_mfc.c @@ -287,7 +287,9 @@ gen7_mfc_qm_state(VADriverContextP ctx, } static void -gen7_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) +gen7_mfc_avc_qm_state(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { unsigned int qm[16] = { 0x10101010, 0x10101010, 0x10101010, 0x10101010, @@ -324,7 +326,9 @@ gen7_mfc_fqm_state(VADriverContextP ctx, } static void -gen7_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) +gen7_mfc_avc_fqm_state(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { unsigned int qm[32] = { 0x10001000, 0x10001000, 0x10001000, 0x10001000, diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index 3e047546..2bedcad0 100644 --- a/src/gen8_mfc.c +++ b/src/gen8_mfc.c @@ -129,6 +129,23 @@ static struct i965_kernel gen9_mfc_kernels[] = { }, }; +static const uint32_t qm_flat[16] = { + 0x10101010, 0x10101010, 0x10101010, 0x10101010, + 0x10101010, 0x10101010, 0x10101010, 0x10101010, + 0x10101010, 0x10101010, 0x10101010, 0x10101010, + 0x10101010, 0x10101010, 0x10101010, 0x10101010 +}; + +static const uint32_t fqm_flat[32] = { + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000 +}; #define INTER_MODE_MASK 0x03 #define INTER_8X8 0x03 @@ -361,7 +378,7 @@ gen8_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state, static void gen8_mfc_qm_state(VADriverContextP ctx, int qm_type, - unsigned int *qm, + const uint32_t *qm, int qm_length, struct intel_encoder_context *encoder_context) { @@ -380,25 +397,42 @@ gen8_mfc_qm_state(VADriverContextP ctx, } static void -gen8_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) +gen8_mfc_avc_qm_state(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { - unsigned int qm[16] = { - 0x10101010, 0x10101010, 0x10101010, 0x10101010, - 0x10101010, 0x10101010, 0x10101010, 0x10101010, - 0x10101010, 0x10101010, 0x10101010, 0x10101010, - 0x10101010, 0x10101010, 0x10101010, 0x10101010 - }; + const unsigned int *qm_4x4_intra; + const unsigned int *qm_4x4_inter; + const unsigned int *qm_8x8_intra; + const unsigned int *qm_8x8_inter; + VAEncSequenceParameterBufferH264 *pSeqParameter = + (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; + VAEncPictureParameterBufferH264 *pPicParameter = + (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; + + if (!pSeqParameter->seq_fields.bits.seq_scaling_matrix_present_flag + && !pPicParameter->pic_fields.bits.pic_scaling_matrix_present_flag) { + qm_4x4_intra = qm_4x4_inter = qm_8x8_intra = qm_8x8_inter = qm_flat; + } else { + VAIQMatrixBufferH264 *qm; + assert(encode_state->q_matrix && encode_state->q_matrix->buffer); + qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer; + qm_4x4_intra = (unsigned int *)qm->ScalingList4x4[0]; + qm_4x4_inter = (unsigned int *)qm->ScalingList4x4[3]; + qm_8x8_intra = (unsigned int *)qm->ScalingList8x8[0]; + qm_8x8_inter = (unsigned int *)qm->ScalingList8x8[1]; + } - gen8_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context); - gen8_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context); - gen8_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context); - gen8_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context); + gen8_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm_4x4_intra, 12, encoder_context); + gen8_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm_4x4_inter, 12, encoder_context); + gen8_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm_8x8_intra, 16, encoder_context); + gen8_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm_8x8_inter, 16, encoder_context); } static void gen8_mfc_fqm_state(VADriverContextP ctx, int fqm_type, - unsigned int *fqm, + const uint32_t *fqm, int fqm_length, struct intel_encoder_context *encoder_context) { @@ -417,23 +451,51 @@ gen8_mfc_fqm_state(VADriverContextP ctx, } static void -gen8_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) +gen8_mfc_avc_fill_fqm(uint8_t *qm, uint16_t *fqm, int len) { - unsigned int qm[32] = { - 0x10001000, 0x10001000, 0x10001000, 0x10001000, - 0x10001000, 0x10001000, 0x10001000, 0x10001000, - 0x10001000, 0x10001000, 0x10001000, 0x10001000, - 0x10001000, 0x10001000, 0x10001000, 0x10001000, - 0x10001000, 0x10001000, 0x10001000, 0x10001000, - 0x10001000, 0x10001000, 0x10001000, 0x10001000, - 0x10001000, 0x10001000, 0x10001000, 0x10001000, - 0x10001000, 0x10001000, 0x10001000, 0x10001000 - }; + int i, j; + for (i = 0; i < len; i++) + for (j = 0; j < len; j++) + fqm[i * len + j] = (1 << 16) / qm[j * len + i]; +} - gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context); - gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context); - gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context); - gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context); +static void +gen8_mfc_avc_fqm_state(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + VAEncSequenceParameterBufferH264 *pSeqParameter = + (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; + VAEncPictureParameterBufferH264 *pPicParameter = + (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; + + if (!pSeqParameter->seq_fields.bits.seq_scaling_matrix_present_flag + && !pPicParameter->pic_fields.bits.pic_scaling_matrix_present_flag) { + gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm_flat, 24, encoder_context); + gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm_flat, 24, encoder_context); + gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm_flat, 32, encoder_context); + gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm_flat, 32, encoder_context); + } else { + int i; + uint32_t fqm[32]; + VAIQMatrixBufferH264 *qm; + assert(encode_state->q_matrix && encode_state->q_matrix->buffer); + qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer; + + for (i = 0; i < 3; i++) + gen8_mfc_avc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * i, 4); + gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm, 24, encoder_context); + + for (i = 3; i < 6; i++) + gen8_mfc_avc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * (i - 3), 4); + gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm, 24, encoder_context); + + gen8_mfc_avc_fill_fqm(qm->ScalingList8x8[0], (uint16_t *)fqm, 8); + gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm, 32, encoder_context); + + gen8_mfc_avc_fill_fqm(qm->ScalingList8x8[1], (uint16_t *)fqm, 8); + gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm, 32, encoder_context); + } } static void @@ -768,8 +830,8 @@ static void gen8_mfc_avc_pipeline_picture_programing( VADriverContextP ctx, gen8_mfc_pipe_buf_addr_state(ctx, encoder_context); gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context); mfc_context->avc_img_state(ctx, encode_state, encoder_context); - mfc_context->avc_qm_state(ctx, encoder_context); - mfc_context->avc_fqm_state(ctx, encoder_context); + mfc_context->avc_qm_state(ctx, encode_state, encoder_context); + mfc_context->avc_fqm_state(ctx, encode_state, encoder_context); gen8_mfc_avc_directmode_state(ctx, encoder_context); intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context); } diff --git a/src/gen9_mfc.c b/src/gen9_mfc.c index 109e6979..87b118fa 100644 --- a/src/gen9_mfc.c +++ b/src/gen9_mfc.c @@ -76,6 +76,24 @@ static struct i965_kernel gen9_mfc_kernels[] = { }, }; +static const uint32_t qm_flat[16] = { + 0x10101010, 0x10101010, 0x10101010, 0x10101010, + 0x10101010, 0x10101010, 0x10101010, 0x10101010, + 0x10101010, 0x10101010, 0x10101010, 0x10101010, + 0x10101010, 0x10101010, 0x10101010, 0x10101010 +}; + +static const uint32_t fqm_flat[32] = { + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000, + 0x10001000, 0x10001000, 0x10001000, 0x10001000 +}; + #define INTER_MODE_MASK 0x03 #define INTER_8X8 0x03 #define INTER_16X8 0x01 @@ -286,7 +304,7 @@ gen9_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state, static void gen9_mfc_qm_state(VADriverContextP ctx, int qm_type, - unsigned int *qm, + const uint32_t *qm, int qm_length, struct intel_encoder_context *encoder_context) { @@ -305,25 +323,42 @@ gen9_mfc_qm_state(VADriverContextP ctx, } static void -gen9_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) +gen9_mfc_avc_qm_state(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { - unsigned int qm[16] = { - 0x10101010, 0x10101010, 0x10101010, 0x10101010, - 0x10101010, 0x10101010, 0x10101010, 0x10101010, - 0x10101010, 0x10101010, 0x10101010, 0x10101010, - 0x10101010, 0x10101010, 0x10101010, 0x10101010 - }; - - gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context); - gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context); - gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context); - gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context); + const unsigned int *qm_4x4_intra; + const unsigned int *qm_4x4_inter; + const unsigned int *qm_8x8_intra; + const unsigned int *qm_8x8_inter; + VAEncSequenceParameterBufferH264 *pSeqParameter = + (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; + VAEncPictureParameterBufferH264 *pPicParameter = + (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; + + if (!pSeqParameter->seq_fields.bits.seq_scaling_matrix_present_flag + && !pPicParameter->pic_fields.bits.pic_scaling_matrix_present_flag) { + qm_4x4_intra = qm_4x4_inter = qm_8x8_intra = qm_8x8_inter = qm_flat; + } else { + VAIQMatrixBufferH264 *qm; + assert(encode_state->q_matrix && encode_state->q_matrix->buffer); + qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer; + qm_4x4_intra = (unsigned int *)qm->ScalingList4x4[0]; + qm_4x4_inter = (unsigned int *)qm->ScalingList4x4[3]; + qm_8x8_intra = (unsigned int *)qm->ScalingList8x8[0]; + qm_8x8_inter = (unsigned int *)qm->ScalingList8x8[1]; + } + + gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm_4x4_intra, 12, encoder_context); + gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm_4x4_inter, 12, encoder_context); + gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm_8x8_intra, 16, encoder_context); + gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm_8x8_inter, 16, encoder_context); } static void gen9_mfc_fqm_state(VADriverContextP ctx, int fqm_type, - unsigned int *fqm, + const uint32_t *fqm, int fqm_length, struct intel_encoder_context *encoder_context) { @@ -342,23 +377,51 @@ gen9_mfc_fqm_state(VADriverContextP ctx, } static void -gen9_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) +gen9_mfc_avc_fill_fqm(uint8_t *qm, uint16_t *fqm, int len) +{ + int i, j; + for (i = 0; i < len; i++) + for (j = 0; j < len; j++) + fqm[i * len + j] = (1 << 16) / qm[j * len + i]; +} + +static void +gen9_mfc_avc_fqm_state(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) { - unsigned int qm[32] = { - 0x10001000, 0x10001000, 0x10001000, 0x10001000, - 0x10001000, 0x10001000, 0x10001000, 0x10001000, - 0x10001000, 0x10001000, 0x10001000, 0x10001000, - 0x10001000, 0x10001000, 0x10001000, 0x10001000, - 0x10001000, 0x10001000, 0x10001000, 0x10001000, - 0x10001000, 0x10001000, 0x10001000, 0x10001000, - 0x10001000, 0x10001000, 0x10001000, 0x10001000, - 0x10001000, 0x10001000, 0x10001000, 0x10001000 - }; - - gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context); - gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context); - gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context); - gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context); + VAEncSequenceParameterBufferH264 *pSeqParameter = + (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; + VAEncPictureParameterBufferH264 *pPicParameter = + (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; + + if (!pSeqParameter->seq_fields.bits.seq_scaling_matrix_present_flag + && !pPicParameter->pic_fields.bits.pic_scaling_matrix_present_flag) { + gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm_flat, 24, encoder_context); + gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm_flat, 24, encoder_context); + gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm_flat, 32, encoder_context); + gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm_flat, 32, encoder_context); + } else { + int i; + uint32_t fqm[32]; + VAIQMatrixBufferH264 *qm; + assert(encode_state->q_matrix && encode_state->q_matrix->buffer); + qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer; + + for (i = 0; i < 3; i++) + gen9_mfc_avc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * i, 4); + gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm, 24, encoder_context); + + for (i = 3; i < 6; i++) + gen9_mfc_avc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * (i - 3), 4); + gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm, 24, encoder_context); + + gen9_mfc_avc_fill_fqm(qm->ScalingList8x8[0], (uint16_t *)fqm, 8); + gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm, 32, encoder_context); + + gen9_mfc_avc_fill_fqm(qm->ScalingList8x8[1], (uint16_t *)fqm, 8); + gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm, 32, encoder_context); + } } static void @@ -688,8 +751,8 @@ static void gen9_mfc_avc_pipeline_picture_programing( VADriverContextP ctx, gen9_mfc_pipe_buf_addr_state(ctx, encoder_context); gen9_mfc_bsp_buf_base_addr_state(ctx, encoder_context); mfc_context->avc_img_state(ctx, encode_state, encoder_context); - mfc_context->avc_qm_state(ctx, encoder_context); - mfc_context->avc_fqm_state(ctx, encoder_context); + mfc_context->avc_qm_state(ctx, encode_state, encoder_context); + mfc_context->avc_fqm_state(ctx, encode_state, encoder_context); gen9_mfc_avc_directmode_state(ctx, encoder_context); intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context); } -- cgit v1.2.1 From da43121a1f48097e18b3171116402be9b19864c4 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 15 Aug 2016 15:44:48 +0800 Subject: Update the dependency on VA-API version VA-API 0.39.3 is required for ROI delta QP support for CBR Signed-off-by: Xiang, Haihao (cherry picked from commit 3e115315e9ef2df1b3d8f64bced69d0f685c6651) Conflicts: configure.ac --- configure.ac | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index 6e555e37..9923dfee 100644 --- a/configure.ac +++ b/configure.ac @@ -10,8 +10,8 @@ m4_append([intel_driver_version], intel_driver_pre_version, [.pre]) ]) # libva minimum version requirement -m4_define([va_api_version], [0.39.2]) -m4_define([libva_package_version], [1.7.1]) +m4_define([va_api_version], [0.39.3]) +m4_define([libva_package_version], [1.7.2]) # libdrm minimum version requirement m4_define([libdrm_version], [2.4.52]) -- cgit v1.2.1 From 0e6d9f303c489389b0758549ebcfe34247bf1d51 Mon Sep 17 00:00:00 2001 From: Daniel Charles Date: Fri, 12 Aug 2016 17:30:55 -0700 Subject: vp9encoder: encoder to handle properly CSC on input surface VP9 encoder was not checking for the yuv surface fourcc provided as the input. If the format is I420, the driver creates an underlying surface where the input is converted to NV12. The underlying temporary surface was not used properly by the vme_pipeline_vp9 as intel_encoder_check_yuv_surface will place the underlying surface on the encode_state->input_yuv_object if it needed conversion or it will place the correct current_render_target if the conversion is not needed. Signed-off-by: Daniel Charles (cherry picked from commit 98555d27c8291fe2511355d53924d17a5263dc22) --- src/gen9_vp9_encoder.c | 1 - src/i965_encoder.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c index eecd206a..0980a77f 100644 --- a/src/gen9_vp9_encoder.c +++ b/src/gen9_vp9_encoder.c @@ -3813,7 +3813,6 @@ gen9_encode_vp9_check_parameter(VADriverContextP ctx, vp9_state->status_buffer.bo = obj_buffer->buffer_store->bo; encode_state->reconstructed_object = SURFACE(pic_param->reconstructed_frame); - encode_state->input_yuv_object = SURFACE(encode_state->current_render_target); if (!encode_state->reconstructed_object || !encode_state->input_yuv_object) diff --git a/src/i965_encoder.c b/src/i965_encoder.c index a9a1189c..beea811b 100644 --- a/src/i965_encoder.c +++ b/src/i965_encoder.c @@ -740,6 +740,7 @@ intel_encoder_sanity_check_input(VADriverContextP ctx, vaStatus = intel_encoder_check_vp9_parameter(ctx, encode_state, encoder_context); if (vaStatus != VA_STATUS_SUCCESS) goto out; + vaStatus = intel_encoder_check_yuv_surface(ctx, profile, encode_state, encoder_context); break; } default: -- cgit v1.2.1 From bc688b49e1bb34e456ecbc836e42cecacde98996 Mon Sep 17 00:00:00 2001 From: Daniel Charles Date: Thu, 18 Aug 2016 12:43:46 -0700 Subject: i965_encoder: remove double check for VAStatus result after creating underlying surface there's a double check on the VAStatus result. Replace it with ASSERT_RET. Signed-off-by: Daniel Charles Reviewed-by: Sean V Kelley (cherry picked from commit c237791816a1e1067fc5504401a95ae910daa950) --- src/i965_encoder.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/i965_encoder.c b/src/i965_encoder.c index beea811b..47368fbc 100644 --- a/src/i965_encoder.c +++ b/src/i965_encoder.c @@ -153,10 +153,7 @@ intel_encoder_check_yuv_surface(VADriverContextP ctx, VA_RT_FORMAT_YUV420, 1, &encoder_context->input_yuv_surface); - assert(status == VA_STATUS_SUCCESS); - - if (status != VA_STATUS_SUCCESS) - return status; + ASSERT_RET(status == VA_STATUS_SUCCESS, status); obj_surface = SURFACE(encoder_context->input_yuv_surface); encode_state->input_yuv_object = obj_surface; -- cgit v1.2.1 From a057c8120c0d3fd6fcc02927bada412e4c47c10a Mon Sep 17 00:00:00 2001 From: "Ung, Teng En" Date: Thu, 18 Aug 2016 14:06:36 +0800 Subject: Fix the code to avoid plus minus conversion if the result is already zero in the float to integer conversion algorithm. Signed-off-by: Ung, Teng En Reviewed-by: Sean V Kelley (cherry picked from commit 1e98d344014d6e7415008f9ddc4f6cfd982143b0) --- src/intel_media_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/intel_media_common.c b/src/intel_media_common.c index 99293b2d..8821bc45 100644 --- a/src/intel_media_common.c +++ b/src/intel_media_common.c @@ -76,7 +76,7 @@ int intel_format_convert(float src, int out_int_bits, int out_frac_bits,int out_ if(negative_flag) output_value = (~output_value + 1) & ((1 <<(out_int_bits + out_frac_bits)) -1); - if(out_sign_flag == 1 && negative_flag) + if(output_value != 0 && out_sign_flag == 1 && negative_flag) { output_value |= negative_flag <<(out_int_bits + out_frac_bits); } -- cgit v1.2.1 From 4bbe4b5d3507a7f38a02169757d93a56f8bf376e Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 15 Aug 2016 10:46:10 +0800 Subject: Encode/VP9: turn gen9_vp9_get_coded_status() into a local function Set encoder_context->get_status to this local function when initializing, so that we can fetch VP9 encoding status from the underlying context. This patch changes the input parameters and removes redundant assigns Signed-off-by: Xiang, Haihao Reviewed-by: Zhao Yakui (cherry picked from commit 1dea706fe10f7161d880f889f37b7fc86b193e55) --- src/gen9_vp9_encapi.h | 9 --------- src/gen9_vp9_encoder.c | 53 ++++++++++++++++++++++---------------------------- src/i965_drv_video.c | 14 +------------ 3 files changed, 24 insertions(+), 52 deletions(-) diff --git a/src/gen9_vp9_encapi.h b/src/gen9_vp9_encapi.h index 0100a06a..99963fca 100644 --- a/src/gen9_vp9_encapi.h +++ b/src/gen9_vp9_encapi.h @@ -40,13 +40,4 @@ gen9_vp9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *en extern Bool gen9_vp9_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context); -/* buffer points to the start address of coded_buffer. It is also - * interpreted as i965_coded_buffer_segment. - */ -extern VAStatus -gen9_vp9_get_coded_status(VADriverContextP ctx, - char *buffer, - struct hw_context *hw_context); - - #endif // _INTE_VP9_ENC_API_H_ diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c index 0980a77f..6b9f13f5 100644 --- a/src/gen9_vp9_encoder.c +++ b/src/gen9_vp9_encoder.c @@ -5789,6 +5789,7 @@ gen9_vp9_pak_pipeline_prepare(VADriverContextP ctx, coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual; coded_buffer_segment->mapped = 0; coded_buffer_segment->codec = encoder_context->codec; + coded_buffer_segment->status_support = 1; dri_bo_unmap(bo); return VA_STATUS_SUCCESS; @@ -6007,58 +6008,50 @@ gen9_vp9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *en return true; } -Bool -gen9_vp9_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context) -{ - /* VME & PAK share the same context */ - struct gen9_encoder_context_vp9 *pak_context = encoder_context->vme_context; - - if (!pak_context) - return false; - - encoder_context->mfc_context = pak_context; - encoder_context->mfc_context_destroy = gen9_vp9_pak_context_destroy; - encoder_context->mfc_pipeline = gen9_vp9_pak_pipeline; - encoder_context->mfc_brc_prepare = gen9_vp9_pak_brc_prepare; - - return true; -} - -VAStatus +static VAStatus gen9_vp9_get_coded_status(VADriverContextP ctx, - char *buffer, - struct hw_context *hw_context) + struct intel_encoder_context *encoder_context, + struct i965_coded_buffer_segment *coded_buf_seg) { - struct intel_encoder_context *encoder_context = - (struct intel_encoder_context *)hw_context; struct gen9_vp9_state *vp9_state = NULL; struct vp9_encode_status_buffer_internal *status_buffer; - struct i965_coded_buffer_segment *coded_buf_seg; unsigned int *buf_ptr; - if (!encoder_context || !buffer) + if (!encoder_context || !coded_buf_seg) return VA_STATUS_ERROR_INVALID_BUFFER; vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state); - coded_buf_seg = (struct i965_coded_buffer_segment *) buffer; if (!vp9_state) return VA_STATUS_ERROR_INVALID_BUFFER; status_buffer = &vp9_state->status_buffer; - buf_ptr = (unsigned int *)(buffer + status_buffer->bs_byte_count_offset); - coded_buf_seg->base.buf = buffer + I965_CODEDBUFFER_HEADER_SIZE; - coded_buf_seg->base.next = NULL; + buf_ptr = (unsigned int *)((char *)coded_buf_seg + status_buffer->bs_byte_count_offset); /* the stream size is writen into the bs_byte_count_offset address of buffer */ coded_buf_seg->base.size = *buf_ptr; - coded_buf_seg->mapped = 1; - /* One VACodedBufferSegment for VP9 will be added later. * It will be linked to the next element of coded_buf_seg->base.next */ return VA_STATUS_SUCCESS; } + +Bool +gen9_vp9_pak_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context) +{ + /* VME & PAK share the same context */ + struct gen9_encoder_context_vp9 *pak_context = encoder_context->vme_context; + + if (!pak_context) + return false; + + encoder_context->mfc_context = pak_context; + encoder_context->mfc_context_destroy = gen9_vp9_pak_context_destroy; + encoder_context->mfc_pipeline = gen9_vp9_pak_pipeline; + encoder_context->mfc_brc_prepare = gen9_vp9_pak_brc_prepare; + encoder_context->get_status = gen9_vp9_get_coded_status; + return true; +} diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 1c24d003..912896de 100644 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -2654,16 +2654,7 @@ i965_MapBuffer(VADriverContextP ctx, coded_buffer_segment->status_support) { vaStatus = obj_context->hw_context->get_status(ctx, obj_context->hw_context, coded_buffer_segment); } else { - - if (coded_buffer_segment->codec == CODEC_VP9) { - - if (obj_context == NULL) - return VA_STATUS_ERROR_ENCODING_ERROR; - - gen9_vp9_get_coded_status(ctx, (char *)coded_buffer_segment, - obj_context->hw_context); - } - else if (coded_buffer_segment->codec == CODEC_H264 || + if (coded_buffer_segment->codec == CODEC_H264 || coded_buffer_segment->codec == CODEC_H264_MVC) { delimiter0 = H264_DELIMITER0; delimiter1 = H264_DELIMITER1; @@ -2690,9 +2681,6 @@ i965_MapBuffer(VADriverContextP ctx, ASSERT_RET(0, VA_STATUS_ERROR_UNSUPPORTED_PROFILE); } - if(coded_buffer_segment->codec == CODEC_VP9) { - /* it is already handled */ - } else if(coded_buffer_segment->codec == CODEC_JPEG) { for(i = 0; i < obj_buffer->size_element - header_offset - 1 - 0x1000; i++) { if( (buffer[i] == 0xFF) && (buffer[i + 1] == 0xD9)) { -- cgit v1.2.1 From 8a61d394782e88c608cfe5d1691e553d901da538 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 15 Aug 2016 10:46:11 +0800 Subject: Encode/VP9: Don't use hardcoded offsets codec_private_data in struct i965_coded_buffer_segment is used to store codec related data. Add 'struct vp9_encode_status' for the data that will be written into codec_private_data Signed-off-by: Xiang, Haihao Reviewed-by: Zhao Yakui (cherry picked from commit d6cfa6a5a2dd0b7b1a88d73c5451657df57a940a) --- src/gen9_vp9_encoder.c | 26 ++++++++------------------ src/gen9_vp9_encoder.h | 12 ++++++++---- 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/src/gen9_vp9_encoder.c b/src/gen9_vp9_encoder.c index 6b9f13f5..34952c97 100644 --- a/src/gen9_vp9_encoder.c +++ b/src/gen9_vp9_encoder.c @@ -5984,15 +5984,16 @@ gen9_vp9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *en /* the definition of status buffer offset for VP9 */ { struct vp9_encode_status_buffer_internal *status_buffer; + uint32_t base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data); status_buffer = &vp9_state->status_buffer; memset(status_buffer, 0, sizeof(struct vp9_encode_status_buffer_internal)); - status_buffer->bs_byte_count_offset = 2048; - status_buffer->image_status_mask_offset = 2052; - status_buffer->image_status_ctrl_offset = 2056; - status_buffer->media_index_offset = 2060; + status_buffer->bs_byte_count_offset = base_offset + offsetof(struct vp9_encode_status, bs_byte_count); + status_buffer->image_status_mask_offset = base_offset + offsetof(struct vp9_encode_status, image_status_mask); + status_buffer->image_status_ctrl_offset = base_offset + offsetof(struct vp9_encode_status, image_status_ctrl); + status_buffer->media_index_offset = base_offset + offsetof(struct vp9_encode_status, media_index); status_buffer->vp9_bs_frame_reg_offset = 0x1E9E0; status_buffer->vp9_image_mask_reg_offset = 0x1E9F0; @@ -6013,24 +6014,13 @@ gen9_vp9_get_coded_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context, struct i965_coded_buffer_segment *coded_buf_seg) { - struct gen9_vp9_state *vp9_state = NULL; - struct vp9_encode_status_buffer_internal *status_buffer; - unsigned int *buf_ptr; + struct vp9_encode_status *vp9_encode_status; if (!encoder_context || !coded_buf_seg) return VA_STATUS_ERROR_INVALID_BUFFER; - vp9_state = (struct gen9_vp9_state *)(encoder_context->enc_priv_state); - - if (!vp9_state) - return VA_STATUS_ERROR_INVALID_BUFFER; - - status_buffer = &vp9_state->status_buffer; - - buf_ptr = (unsigned int *)((char *)coded_buf_seg + status_buffer->bs_byte_count_offset); - - /* the stream size is writen into the bs_byte_count_offset address of buffer */ - coded_buf_seg->base.size = *buf_ptr; + vp9_encode_status = (struct vp9_encode_status *)coded_buf_seg->codec_private_data; + coded_buf_seg->base.size = vp9_encode_status->bs_byte_count; /* One VACodedBufferSegment for VP9 will be added later. * It will be linked to the next element of coded_buf_seg->base.next diff --git a/src/gen9_vp9_encoder.h b/src/gen9_vp9_encoder.h index 8034240c..ad2d875b 100644 --- a/src/gen9_vp9_encoder.h +++ b/src/gen9_vp9_encoder.h @@ -1849,15 +1849,19 @@ enum INTEL_ENC_VP9_TU_MODE #define VP9_GOLDEN_REF 0x02 #define VP9_ALT_REF 0x04 -/* the vp9_encode_status_buffer is the shadow - * of vp9_encode_status_buffer_internal. - */ +struct vp9_encode_status +{ + uint32_t bs_byte_count; + uint32_t image_status_mask; + uint32_t image_status_ctrl; + uint32_t media_index; +}; + struct vp9_encode_status_buffer_internal { uint32_t bs_byte_count_offset; uint32_t reserved[15]; - /* the above is shared with the gen9_encode_status_buffer */ uint32_t image_status_mask_offset; uint32_t image_status_ctrl_offset; -- cgit v1.2.1 From e7a99f9846ec8e37e7132a8ae1f19c29dc68f71c Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Tue, 16 Aug 2016 12:10:55 +0800 Subject: Set cost to 0 for CHROMA INTRA mode Commit 1cd6795 causes quality drop for U/V plane. Reported-by: Wang, Fei W Signed-off-by: Xiang, Haihao Tested-by: Wang, Fei W Reviewed-by: Seaan V Kelley (cherry picked from commit 99cd714fc467c141677f842cbcda0523b21d9571) --- src/gen6_mfc_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index b0342dab..9f041d83 100644 --- a/src/gen6_mfc_common.c +++ b/src/gen6_mfc_common.c @@ -776,7 +776,7 @@ void intel_h264_calc_mbmvcost_qp(int qp, lambda = intel_lambda_qp(qp); m_cost = lambda; - vme_state_message[MODE_CHROMA_INTRA] = intel_format_lutvalue(m_cost, 0x8f); + vme_state_message[MODE_CHROMA_INTRA] = 0; vme_state_message[MODE_REFID_COST] = intel_format_lutvalue(m_cost, 0x8f); if (slice_type == SLICE_TYPE_I) { -- cgit v1.2.1 From b3ddfd236b1563c23cba3173b47514f2adf5157e Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Tue, 16 Aug 2016 12:33:19 +0800 Subject: Remove unused fields in struct encode_state It also fixes a wrong assert() in gen8_mfc.c v2: Correct condition code when removing used fields Signed-off-by: Xiang, Haihao Reviewed-by: Sean V Kelley (cherry picked from commit f13307275165d1d3c0f6efaf9ff387574f5c1701) --- src/gen8_mfc.c | 2 +- src/i965_drv_video.c | 29 +++-------------------------- src/i965_drv_video.h | 6 ------ 3 files changed, 4 insertions(+), 33 deletions(-) diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c index 2bedcad0..fc11e631 100644 --- a/src/gen8_mfc.c +++ b/src/gen8_mfc.c @@ -3221,7 +3221,7 @@ gen8_mfc_jpeg_pipeline_picture_programing(VADriverContextP ctx, //I dont think I need this for loop. Just to be consistent with other encoding logic... for(i = 0; i < encode_state->num_slice_params_ext; i++) { - assert(encode_state->slice_params && encode_state->slice_params_ext[i]->buffer); + assert(encode_state->slice_params_ext && encode_state->slice_params_ext[i]->buffer); slice_param = (VAEncSliceParameterBufferJPEG *)encode_state->slice_params_ext[i]->buffer; for(j = 0; j < encode_state->slice_params_ext[i]->num_elements; j++) { diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 912896de..77b9f376 100644 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -2067,18 +2067,9 @@ i965_destroy_context(struct object_heap *heap, struct object_base *obj) i965_release_buffer_store(&obj_context->codec_state.proc.pipeline_param); } else if (obj_context->codec_type == CODEC_ENC) { - assert(obj_context->codec_state.encode.num_slice_params <= obj_context->codec_state.encode.max_slice_params); - i965_release_buffer_store(&obj_context->codec_state.encode.pic_param); - i965_release_buffer_store(&obj_context->codec_state.encode.seq_param); - i965_release_buffer_store(&obj_context->codec_state.encode.q_matrix); i965_release_buffer_store(&obj_context->codec_state.encode.huffman_table); - for (i = 0; i < obj_context->codec_state.encode.num_slice_params; i++) - i965_release_buffer_store(&obj_context->codec_state.encode.slice_params[i]); - - free(obj_context->codec_state.encode.slice_params); - assert(obj_context->codec_state.encode.num_slice_params_ext <= obj_context->codec_state.encode.max_slice_params_ext); i965_release_buffer_store(&obj_context->codec_state.encode.pic_param_ext); i965_release_buffer_store(&obj_context->codec_state.encode.seq_param_ext); @@ -2236,9 +2227,6 @@ i965_CreateContext(VADriverContextP ctx, obj_context->codec_type = CODEC_ENC; memset(&obj_context->codec_state.encode, 0, sizeof(obj_context->codec_state.encode)); obj_context->codec_state.encode.current_render_target = VA_INVALID_ID; - obj_context->codec_state.encode.max_slice_params = NUM_SLICES; - obj_context->codec_state.encode.slice_params = calloc(obj_context->codec_state.encode.max_slice_params, - sizeof(*obj_context->codec_state.encode.slice_params)); obj_context->codec_state.encode.max_packed_header_params_ext = NUM_SLICES; obj_context->codec_state.encode.packed_header_params_ext = calloc(obj_context->codec_state.encode.max_packed_header_params_ext, @@ -2817,14 +2805,6 @@ i965_BeginPicture(VADriverContextP ctx, if (obj_context->codec_type == CODEC_PROC) { obj_context->codec_state.proc.current_render_target = render_target; } else if (obj_context->codec_type == CODEC_ENC) { - i965_release_buffer_store(&obj_context->codec_state.encode.pic_param); - - for (i = 0; i < obj_context->codec_state.encode.num_slice_params; i++) { - i965_release_buffer_store(&obj_context->codec_state.encode.slice_params[i]); - } - - obj_context->codec_state.encode.num_slice_params = 0; - /* ext */ i965_release_buffer_store(&obj_context->codec_state.encode.pic_param_ext); @@ -3522,19 +3502,16 @@ i965_EndPicture(VADriverContextP ctx, VAContextID context) WARN_ONCE("the packed header/data is not paired for encoding!\n"); return VA_STATUS_ERROR_INVALID_PARAMETER; } - if (!(obj_context->codec_state.encode.pic_param || - obj_context->codec_state.encode.pic_param_ext)) { + if (!obj_context->codec_state.encode.pic_param_ext) { return VA_STATUS_ERROR_INVALID_PARAMETER; } - if (!(obj_context->codec_state.encode.seq_param || - obj_context->codec_state.encode.seq_param_ext) && + if (!obj_context->codec_state.encode.seq_param_ext && (VAEntrypointEncPicture != obj_config->entrypoint)) { /* The seq_param is not mandatory for VP9 encoding */ if (obj_config->profile != VAProfileVP9Profile0) return VA_STATUS_ERROR_INVALID_PARAMETER; } - if ((obj_context->codec_state.encode.num_slice_params <=0) && - (obj_context->codec_state.encode.num_slice_params_ext <=0) && + if ((obj_context->codec_state.encode.num_slice_params_ext <=0) && ((obj_config->profile != VAProfileVP8Version0_3) && (obj_config->profile != VAProfileVP9Profile0))) { return VA_STATUS_ERROR_INVALID_PARAMETER; diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index f67599e9..9fe042f9 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -145,15 +145,9 @@ struct decode_state struct encode_state { struct codec_state_base base; - struct buffer_store *seq_param; - struct buffer_store *pic_param; - struct buffer_store *pic_control; struct buffer_store *iq_matrix; struct buffer_store *q_matrix; - struct buffer_store **slice_params; struct buffer_store *huffman_table; - int max_slice_params; - int num_slice_params; /* for ext */ struct buffer_store *seq_param_ext; -- cgit v1.2.1 From 2feb3637652390ba8b2a9f1c505c189a9e30ba56 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Wed, 17 Aug 2016 09:59:53 +0800 Subject: Set the right transform 8x8 flag for Intra macroblock in VME output on BDW+ VME message doesn't output the transform 8x8 flag, the shader set the right flag accordingly. Signed-off-by: Xiang, Haihao Tested-by: Meng, Jia Reviewed-by: Sean V Kelley (cherry picked from commit b53fad9ba9759a3d26f162be2bcf31113b58a503) --- src/shaders/vme/inter_frame_gen8.asm | 13 +++++++++++++ src/shaders/vme/inter_frame_gen8.g8b | 9 +++++++-- src/shaders/vme/inter_frame_gen9.g9b | 9 +++++++-- src/shaders/vme/intra_frame_gen8.asm | 13 +++++++++++++ src/shaders/vme/intra_frame_gen8.g8b | 5 +++++ src/shaders/vme/intra_frame_gen9.g9b | 5 +++++ src/shaders/vme/vme8.inc | 5 +++++ 7 files changed, 55 insertions(+), 4 deletions(-) diff --git a/src/shaders/vme/inter_frame_gen8.asm b/src/shaders/vme/inter_frame_gen8.asm index 17bc412b..6296aa17 100644 --- a/src/shaders/vme/inter_frame_gen8.asm +++ b/src/shaders/vme/inter_frame_gen8.asm @@ -458,6 +458,19 @@ send (8) mlen sic_vme_msg_length rlen vme_wb_length {align1}; + +/* Check whether mb type is 0 */ +and.z.f0.0 (1) null<1>:UD vme_wb.0<0,1,0>:UD W0_INTRA_MB_TYPE_MASK {align1}; +(-f0.0) jmpi (1) __write_intra_output; + +/* Check whether intra mb mode is INTRA_8x8 */ +and (1) tmp_reg2<1>:UD vme_wb.0<0,1,0>:UD W0_INTRA_MB_MODE_MASK {align1}; +cmp.z.f0.0 (1) null<1>:UD tmp_reg2<0,1,0>:UD W0_INTRA_8x8 {align1}; + +/* Set transform 8x8 flag */ +(f0.0) or (1) vme_wb.0<1>:UD vme_wb.0<0,1,0>:UD W0_TRANSFORM_8x8_FLAG {align1}; + +__write_intra_output: /* * Oword Block Write message */ diff --git a/src/shaders/vme/inter_frame_gen8.g8b b/src/shaders/vme/inter_frame_gen8.g8b index d0cc25d7..ddc96fc4 100644 --- a/src/shaders/vme/inter_frame_gen8.g8b +++ b/src/shaders/vme/inter_frame_gen8.g8b @@ -146,13 +146,13 @@ { 0x00000001, 0x2fa41a68, 0x00000b04, 0x00000000 }, { 0x00000001, 0x2fa81a68, 0x00000b24, 0x00000000 }, { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, - { 0x00000020, 0x34000000, 0x0e001400, 0x00000a20 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000a70 }, { 0x00000001, 0x2ac01a68, 0x00000fe4, 0x00000000 }, { 0x00000001, 0x2fa01a68, 0x00000ae6, 0x00000000 }, { 0x00000001, 0x2fa41a68, 0x00000b06, 0x00000000 }, { 0x00000001, 0x2fa81a68, 0x00000b26, 0x00000000 }, { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, - { 0x00000020, 0x34000000, 0x0e001400, 0x000009c0 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000a10 }, { 0x00000001, 0x2ac21a68, 0x00000fe4, 0x00000000 }, { 0x0020000c, 0x2a801a68, 0x1e450ac0, 0x00020002 }, { 0x00200040, 0x2a881a68, 0x1e450a80, 0x00030003 }, @@ -183,6 +183,11 @@ { 0x00000001, 0x244c0608, 0x00000000, 0x00800000 }, { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, { 0x0d600031, 0x21800a08, 0x0e000800, 0x10782000 }, + { 0x01000005, 0x20000200, 0x06000180, 0x001f0000 }, + { 0x00110020, 0x34000000, 0x0e001400, 0x00000030 }, + { 0x00000005, 0x24400208, 0x06000180, 0x00000030 }, + { 0x01000010, 0x20000200, 0x06000440, 0x00000010 }, + { 0x00010006, 0x21800208, 0x06000180, 0x00008000 }, { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, { 0x00000001, 0x28200208, 0x00000180, 0x00000000 }, { 0x00000001, 0x28240208, 0x00000190, 0x00000000 }, diff --git a/src/shaders/vme/inter_frame_gen9.g9b b/src/shaders/vme/inter_frame_gen9.g9b index b79042c9..1a7376ce 100644 --- a/src/shaders/vme/inter_frame_gen9.g9b +++ b/src/shaders/vme/inter_frame_gen9.g9b @@ -146,13 +146,13 @@ { 0x00000001, 0x2fa41a68, 0x00000b04, 0x00000000 }, { 0x00000001, 0x2fa81a68, 0x00000b24, 0x00000000 }, { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, - { 0x00000020, 0x34000000, 0x0e001400, 0x00000a20 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000a70 }, { 0x00000001, 0x2ac01a68, 0x00000fe4, 0x00000000 }, { 0x00000001, 0x2fa01a68, 0x00000ae6, 0x00000000 }, { 0x00000001, 0x2fa41a68, 0x00000b06, 0x00000000 }, { 0x00000001, 0x2fa81a68, 0x00000b26, 0x00000000 }, { 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 }, - { 0x00000020, 0x34000000, 0x0e001400, 0x000009c0 }, + { 0x00000020, 0x34000000, 0x0e001400, 0x00000a10 }, { 0x00000001, 0x2ac21a68, 0x00000fe4, 0x00000000 }, { 0x0020000c, 0x2a801a68, 0x1e450ac0, 0x00020002 }, { 0x00200040, 0x2a881a68, 0x1e450a80, 0x00030003 }, @@ -183,6 +183,11 @@ { 0x00000001, 0x244c0608, 0x00000000, 0x00800000 }, { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, { 0x0d600031, 0x21800a08, 0x06000800, 0x10782000 }, + { 0x01000005, 0x20000200, 0x06000180, 0x001f0000 }, + { 0x00110020, 0x34000000, 0x0e001400, 0x00000030 }, + { 0x00000005, 0x24400208, 0x06000180, 0x00000030 }, + { 0x01000010, 0x20000200, 0x06000440, 0x00000010 }, + { 0x00010006, 0x21800208, 0x06000180, 0x00008000 }, { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, { 0x00000001, 0x28200208, 0x00000180, 0x00000000 }, { 0x00000001, 0x28240208, 0x00000190, 0x00000000 }, diff --git a/src/shaders/vme/intra_frame_gen8.asm b/src/shaders/vme/intra_frame_gen8.asm index 15b260ef..d43ec92f 100644 --- a/src/shaders/vme/intra_frame_gen8.asm +++ b/src/shaders/vme/intra_frame_gen8.asm @@ -157,6 +157,19 @@ send (8) mlen sic_vme_msg_length rlen vme_wb_length {align1}; + +/* Check whether mb type is 0 */ +and.z.f0.0 (1) null<1>:UD vme_wb.0<0,1,0>:UD W0_INTRA_MB_TYPE_MASK {align1}; +(-f0.0) jmpi (1) __write_intra_output; + +/* Check whether intra mb mode is INTRA_8x8 */ +and (1) tmp_reg2<1>:UD vme_wb.0<0,1,0>:UD W0_INTRA_MB_MODE_MASK {align1}; +cmp.z.f0.0 (1) null<1>:UD tmp_reg2<0,1,0>:UD W0_INTRA_8x8 {align1}; + +/* Set transform 8x8 flag */ +(f0.0) or (1) vme_wb.0<1>:UD vme_wb.0<0,1,0>:UD W0_TRANSFORM_8x8_FLAG {align1}; + +__write_intra_output: /* * Oword Block Write message */ diff --git a/src/shaders/vme/intra_frame_gen8.g8b b/src/shaders/vme/intra_frame_gen8.g8b index 56c7283b..a4ddf296 100644 --- a/src/shaders/vme/intra_frame_gen8.g8b +++ b/src/shaders/vme/intra_frame_gen8.g8b @@ -58,6 +58,11 @@ { 0x00000040, 0x244c0208, 0x0600044c, 0x00800000 }, { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, { 0x0d600031, 0x21800a08, 0x0e000800, 0x10782000 }, + { 0x01000005, 0x20000200, 0x06000180, 0x001f0000 }, + { 0x00110020, 0x34000000, 0x0e001400, 0x00000030 }, + { 0x00000005, 0x24400208, 0x06000180, 0x00000030 }, + { 0x01000010, 0x20000200, 0x06000440, 0x00000010 }, + { 0x00010006, 0x21800208, 0x06000180, 0x00008000 }, { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, { 0x00000001, 0x28200208, 0x00000180, 0x00000000 }, { 0x00000001, 0x28240208, 0x00000190, 0x00000000 }, diff --git a/src/shaders/vme/intra_frame_gen9.g9b b/src/shaders/vme/intra_frame_gen9.g9b index 63d7455c..2d92cf47 100644 --- a/src/shaders/vme/intra_frame_gen9.g9b +++ b/src/shaders/vme/intra_frame_gen9.g9b @@ -58,6 +58,11 @@ { 0x00000040, 0x244c0208, 0x0600044c, 0x00800000 }, { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 }, { 0x0d600031, 0x21800a08, 0x06000800, 0x10782000 }, + { 0x01000005, 0x20000200, 0x06000180, 0x001f0000 }, + { 0x00110020, 0x34000000, 0x0e001400, 0x00000030 }, + { 0x00000005, 0x24400208, 0x06000180, 0x00000030 }, + { 0x01000010, 0x20000200, 0x06000440, 0x00000010 }, + { 0x00010006, 0x21800208, 0x06000180, 0x00008000 }, { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 }, { 0x00000001, 0x28200208, 0x00000180, 0x00000000 }, { 0x00000001, 0x28240208, 0x00000190, 0x00000000 }, diff --git a/src/shaders/vme/vme8.inc b/src/shaders/vme/vme8.inc index 5f05ef20..c2bdaab4 100644 --- a/src/shaders/vme/vme8.inc +++ b/src/shaders/vme/vme8.inc @@ -129,6 +129,11 @@ define(`OBW_WRITE_COMMIT_CATEGORY', `0') /* category on Ivybridge */ define(`OBW_HEADER_PRESENT', `1') +define(`W0_INTRA_MB_TYPE_MASK', `0x1F0000:UD') +define(`W0_INTRA_MB_MODE_MASK', `0x30:UD') +define(`W0_INTRA_8x8', `0x10:UD') +define(`W0_TRANSFORM_8x8_FLAG', `0x8000:UD') + /* GRF registers * r0 header * r1~r4 constant buffer (reserved) -- cgit v1.2.1 From 070491b8d9e61adbde491f9923347f5aa10bf4ce Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 22 Aug 2016 16:52:51 +0800 Subject: Cleanup gen9_mfc.c This patch removes all unneeded functions/variables in gen9_mfc.c for mfc pipeline setting. Especially it doesn't include shaders/utils/mfc_batchbuffer_avc_intra.g9b and shaders/utils/mfc_batchbuffer_avc_inter.g9b in gen9_mfc.c because the two .g9b files are no longer included in the package created by 'make dist' C i965_drv_video_la-gen9_mfc.lo gen9_mfc.c:54:55: fatal error: shaders/utils/mfc_batchbuffer_avc_intra.g9b: No such file or directory #include "shaders/utils/mfc_batchbuffer_avc_intra.g9b" Signed-off-by: Xiang, Haihao Reviewed-by: Zhao Yakui (cherry picked from commit 2c213d3a071c540eaba23eb2ef82db6e33327a4f) --- src/gen9_mfc.c | 1764 +------------------------------------------------------- 1 file changed, 3 insertions(+), 1761 deletions(-) diff --git a/src/gen9_mfc.c b/src/gen9_mfc.c index 87b118fa..b3d6e78c 100644 --- a/src/gen9_mfc.c +++ b/src/gen9_mfc.c @@ -38,1736 +38,10 @@ #include "i965_structs.h" #include "i965_drv_video.h" #include "i965_encoder.h" -#include "i965_encoder_utils.h" #include "gen6_mfc.h" -#include "gen6_vme.h" -#include "intel_media.h" - -#define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN8 -#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) -#define BINDING_TABLE_OFFSET(index) (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index) - -#define B0_STEP_REV 2 -#define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV) - -static const uint32_t gen9_mfc_batchbuffer_avc_intra[][4] = { -#include "shaders/utils/mfc_batchbuffer_avc_intra.g9b" -}; - -static const uint32_t gen9_mfc_batchbuffer_avc_inter[][4] = { -#include "shaders/utils/mfc_batchbuffer_avc_inter.g9b" -}; - -static struct i965_kernel gen9_mfc_kernels[] = { - { - "MFC AVC INTRA BATCHBUFFER ", - MFC_BATCHBUFFER_AVC_INTRA, - gen9_mfc_batchbuffer_avc_intra, - sizeof(gen9_mfc_batchbuffer_avc_intra), - NULL - }, - - { - "MFC AVC INTER BATCHBUFFER ", - MFC_BATCHBUFFER_AVC_INTER, - gen9_mfc_batchbuffer_avc_inter, - sizeof(gen9_mfc_batchbuffer_avc_inter), - NULL - }, -}; - -static const uint32_t qm_flat[16] = { - 0x10101010, 0x10101010, 0x10101010, 0x10101010, - 0x10101010, 0x10101010, 0x10101010, 0x10101010, - 0x10101010, 0x10101010, 0x10101010, 0x10101010, - 0x10101010, 0x10101010, 0x10101010, 0x10101010 -}; - -static const uint32_t fqm_flat[32] = { - 0x10001000, 0x10001000, 0x10001000, 0x10001000, - 0x10001000, 0x10001000, 0x10001000, 0x10001000, - 0x10001000, 0x10001000, 0x10001000, 0x10001000, - 0x10001000, 0x10001000, 0x10001000, 0x10001000, - 0x10001000, 0x10001000, 0x10001000, 0x10001000, - 0x10001000, 0x10001000, 0x10001000, 0x10001000, - 0x10001000, 0x10001000, 0x10001000, 0x10001000, - 0x10001000, 0x10001000, 0x10001000, 0x10001000 -}; - -#define INTER_MODE_MASK 0x03 -#define INTER_8X8 0x03 -#define INTER_16X8 0x01 -#define INTER_8X16 0x02 -#define SUBMB_SHAPE_MASK 0x00FF00 -#define INTER_16X16 0x00 - -#define INTER_MV8 (4 << 20) -#define INTER_MV32 (6 << 20) - -static void -gen9_mfc_pipe_mode_select(VADriverContextP ctx, - int standard_select, - struct intel_encoder_context *encoder_context) -{ - struct intel_batchbuffer *batch = encoder_context->base.batch; - struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; - - assert(standard_select == MFX_FORMAT_MPEG2 || - standard_select == MFX_FORMAT_AVC || - standard_select == MFX_FORMAT_VP8); - - BEGIN_BCS_BATCH(batch, 5); - - OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2)); - OUT_BCS_BATCH(batch, - (MFX_LONG_MODE << 17) | /* Must be long format for encoder */ - (MFD_MODE_VLD << 15) | /* VLD mode */ - (0 << 10) | /* Stream-Out Enable */ - ((!!mfc_context->post_deblocking_output.bo) << 9) | /* Post Deblocking Output */ - ((!!mfc_context->pre_deblocking_output.bo) << 8) | /* Pre Deblocking Output */ - (0 << 6) | /* frame statistics stream-out enable*/ - (0 << 5) | /* not in stitch mode */ - (1 << 4) | /* encoding mode */ - (standard_select << 0)); /* standard select: avc or mpeg2 */ - OUT_BCS_BATCH(batch, - (0 << 7) | /* expand NOA bus flag */ - (0 << 6) | /* disable slice-level clock gating */ - (0 << 5) | /* disable clock gating for NOA */ - (0 << 4) | /* terminate if AVC motion and POC table error occurs */ - (0 << 3) | /* terminate if AVC mbdata error occurs */ - (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */ - (0 << 1) | - (0 << 0)); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - - ADVANCE_BCS_BATCH(batch); -} - -static void -gen9_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context) -{ - struct intel_batchbuffer *batch = encoder_context->base.batch; - struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; - - BEGIN_BCS_BATCH(batch, 6); - - OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, - ((mfc_context->surface_state.height - 1) << 18) | - ((mfc_context->surface_state.width - 1) << 4)); - OUT_BCS_BATCH(batch, - (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */ - (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */ - (0 << 22) | /* surface object control state, FIXME??? */ - ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */ - (0 << 2) | /* must be 0 for interleave U/V */ - (1 << 1) | /* must be tiled */ - (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */ - OUT_BCS_BATCH(batch, - (0 << 16) | /* must be 0 for interleave U/V */ - (mfc_context->surface_state.h_pitch)); /* y offset for U(cb) */ - OUT_BCS_BATCH(batch, 0); - - ADVANCE_BCS_BATCH(batch); -} - -static void -gen9_mfc_ind_obj_base_addr_state(VADriverContextP ctx, - struct intel_encoder_context *encoder_context) -{ - struct intel_batchbuffer *batch = encoder_context->base.batch; - struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; - struct gen6_vme_context *vme_context = encoder_context->vme_context; - int vme_size; - - BEGIN_BCS_BATCH(batch, 26); - - OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2)); - /* the DW1-3 is for the MFX indirect bistream offset */ - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - - vme_size = vme_context->vme_output.size_block * vme_context->vme_output.num_blocks; - - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - - /* the DW6-10 is for MFX Indirect MV Object Base Address */ - OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, vme_size); - OUT_BCS_BATCH(batch, 0); - - /* the DW11-15 is for MFX IT-COFF. Not used on encoder */ - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - - /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */ - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - - /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/ - OUT_BCS_RELOC(batch, - mfc_context->mfc_indirect_pak_bse_object.bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - - OUT_BCS_RELOC(batch, - mfc_context->mfc_indirect_pak_bse_object.bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - mfc_context->mfc_indirect_pak_bse_object.end_offset); - OUT_BCS_BATCH(batch, 0); - - ADVANCE_BCS_BATCH(batch); -} - -static void -gen9_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) -{ - struct intel_batchbuffer *batch = encoder_context->base.batch; - struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; - VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; - - int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; - int height_in_mbs = (mfc_context->surface_state.height + 15) / 16; - - BEGIN_BCS_BATCH(batch, 16); - - OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2)); - /*DW1. MB setting of frame */ - OUT_BCS_BATCH(batch, - ((width_in_mbs * height_in_mbs - 1) & 0xFFFF)); - OUT_BCS_BATCH(batch, - ((height_in_mbs - 1) << 16) | - ((width_in_mbs - 1) << 0)); - /* DW3 QP setting */ - OUT_BCS_BATCH(batch, - (0 << 24) | /* Second Chroma QP Offset */ - (0 << 16) | /* Chroma QP Offset */ - (0 << 14) | /* Max-bit conformance Intra flag */ - (0 << 13) | /* Max Macroblock size conformance Inter flag */ - (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) | /*Weighted_Pred_Flag */ - (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) | /* Weighted_BiPred_Idc */ - (0 << 8) | /* FIXME: Image Structure */ - (0 << 0) ); /* Current Decoed Image Frame Store ID, reserved in Encode mode */ - OUT_BCS_BATCH(batch, - (0 << 16) | /* Mininum Frame size */ - (0 << 15) | /* Disable reading of Macroblock Status Buffer */ - (0 << 14) | /* Load BitStream Pointer only once, 1 slic 1 frame */ - (0 << 13) | /* CABAC 0 word insertion test enable */ - (1 << 12) | /* MVUnpackedEnable,compliant to DXVA */ - (1 << 10) | /* Chroma Format IDC, 4:2:0 */ - (0 << 8) | /* FIXME: MbMvFormatFlag */ - (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7) | /*0:CAVLC encoding mode,1:CABAC*/ - (0 << 6) | /* Only valid for VLD decoding mode */ - (0 << 5) | /* Constrained Intra Predition Flag, from PPS */ - (0 << 4) | /* Direct 8x8 inference flag */ - (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3) | /*8x8 or 4x4 IDCT Transform Mode Flag*/ - (1 << 2) | /* Frame MB only flag */ - (0 << 1) | /* MBAFF mode is in active */ - (0 << 0)); /* Field picture flag */ - /* DW5 Trellis quantization */ - OUT_BCS_BATCH(batch, 0); /* Mainly about MB rate control and debug, just ignoring */ - OUT_BCS_BATCH(batch, /* Inter and Intra Conformance Max size limit */ - (0xBB8 << 16) | /* InterMbMaxSz */ - (0xEE8) ); /* IntraMbMaxSz */ - OUT_BCS_BATCH(batch, 0); /* Reserved */ - /* DW8. QP delta */ - OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */ - OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */ - /* DW10. Bit setting for MB */ - OUT_BCS_BATCH(batch, 0x8C000000); - OUT_BCS_BATCH(batch, 0x00010000); - /* DW12. */ - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0x02010100); - /* DW14. For short format */ - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - - ADVANCE_BCS_BATCH(batch); -} - -static void -gen9_mfc_qm_state(VADriverContextP ctx, - int qm_type, - const uint32_t *qm, - int qm_length, - struct intel_encoder_context *encoder_context) -{ - struct intel_batchbuffer *batch = encoder_context->base.batch; - unsigned int qm_buffer[16]; - - assert(qm_length <= 16); - assert(sizeof(*qm) == 4); - memcpy(qm_buffer, qm, qm_length * 4); - - BEGIN_BCS_BATCH(batch, 18); - OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2)); - OUT_BCS_BATCH(batch, qm_type << 0); - intel_batchbuffer_data(batch, qm_buffer, 16 * 4); - ADVANCE_BCS_BATCH(batch); -} - -static void -gen9_mfc_avc_qm_state(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) -{ - const unsigned int *qm_4x4_intra; - const unsigned int *qm_4x4_inter; - const unsigned int *qm_8x8_intra; - const unsigned int *qm_8x8_inter; - VAEncSequenceParameterBufferH264 *pSeqParameter = - (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; - VAEncPictureParameterBufferH264 *pPicParameter = - (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; - - if (!pSeqParameter->seq_fields.bits.seq_scaling_matrix_present_flag - && !pPicParameter->pic_fields.bits.pic_scaling_matrix_present_flag) { - qm_4x4_intra = qm_4x4_inter = qm_8x8_intra = qm_8x8_inter = qm_flat; - } else { - VAIQMatrixBufferH264 *qm; - assert(encode_state->q_matrix && encode_state->q_matrix->buffer); - qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer; - qm_4x4_intra = (unsigned int *)qm->ScalingList4x4[0]; - qm_4x4_inter = (unsigned int *)qm->ScalingList4x4[3]; - qm_8x8_intra = (unsigned int *)qm->ScalingList8x8[0]; - qm_8x8_inter = (unsigned int *)qm->ScalingList8x8[1]; - } - - gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm_4x4_intra, 12, encoder_context); - gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm_4x4_inter, 12, encoder_context); - gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm_8x8_intra, 16, encoder_context); - gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm_8x8_inter, 16, encoder_context); -} - -static void -gen9_mfc_fqm_state(VADriverContextP ctx, - int fqm_type, - const uint32_t *fqm, - int fqm_length, - struct intel_encoder_context *encoder_context) -{ - struct intel_batchbuffer *batch = encoder_context->base.batch; - unsigned int fqm_buffer[32]; - - assert(fqm_length <= 32); - assert(sizeof(*fqm) == 4); - memcpy(fqm_buffer, fqm, fqm_length * 4); - - BEGIN_BCS_BATCH(batch, 34); - OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2)); - OUT_BCS_BATCH(batch, fqm_type << 0); - intel_batchbuffer_data(batch, fqm_buffer, 32 * 4); - ADVANCE_BCS_BATCH(batch); -} - -static void -gen9_mfc_avc_fill_fqm(uint8_t *qm, uint16_t *fqm, int len) -{ - int i, j; - for (i = 0; i < len; i++) - for (j = 0; j < len; j++) - fqm[i * len + j] = (1 << 16) / qm[j * len + i]; -} - -static void -gen9_mfc_avc_fqm_state(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) -{ - VAEncSequenceParameterBufferH264 *pSeqParameter = - (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; - VAEncPictureParameterBufferH264 *pPicParameter = - (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; - - if (!pSeqParameter->seq_fields.bits.seq_scaling_matrix_present_flag - && !pPicParameter->pic_fields.bits.pic_scaling_matrix_present_flag) { - gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm_flat, 24, encoder_context); - gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm_flat, 24, encoder_context); - gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm_flat, 32, encoder_context); - gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm_flat, 32, encoder_context); - } else { - int i; - uint32_t fqm[32]; - VAIQMatrixBufferH264 *qm; - assert(encode_state->q_matrix && encode_state->q_matrix->buffer); - qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer; - - for (i = 0; i < 3; i++) - gen9_mfc_avc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * i, 4); - gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm, 24, encoder_context); - - for (i = 3; i < 6; i++) - gen9_mfc_avc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * (i - 3), 4); - gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm, 24, encoder_context); - - gen9_mfc_avc_fill_fqm(qm->ScalingList8x8[0], (uint16_t *)fqm, 8); - gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm, 32, encoder_context); - - gen9_mfc_avc_fill_fqm(qm->ScalingList8x8[1], (uint16_t *)fqm, 8); - gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm, 32, encoder_context); - } -} - -static void -gen9_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context, - unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw, - int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag, - struct intel_batchbuffer *batch) -{ - if (batch == NULL) - batch = encoder_context->base.batch; - - if (data_bits_in_last_dw == 0) - data_bits_in_last_dw = 32; - - BEGIN_BCS_BATCH(batch, lenght_in_dws + 2); - - OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2)); - OUT_BCS_BATCH(batch, - (0 << 16) | /* always start at offset 0 */ - (data_bits_in_last_dw << 8) | - (skip_emul_byte_count << 4) | - (!!emulation_flag << 3) | - ((!!is_last_header) << 2) | - ((!!is_end_of_slice) << 1) | - (0 << 0)); /* FIXME: ??? */ - intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4); - - ADVANCE_BCS_BATCH(batch); -} - - -static void gen9_mfc_init(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; - dri_bo *bo; - int i; - int width_in_mbs = 0; - int height_in_mbs = 0; - int slice_batchbuffer_size; - - if (encoder_context->codec == CODEC_H264 || - encoder_context->codec == CODEC_H264_MVC) { - VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; - width_in_mbs = pSequenceParameter->picture_width_in_mbs; - height_in_mbs = pSequenceParameter->picture_height_in_mbs; - } else { - VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer; - - assert(encoder_context->codec == CODEC_MPEG2); - - width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16; - height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16; - } - - slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 + - (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext; - - /*Encode common setup for MFC*/ - dri_bo_unreference(mfc_context->post_deblocking_output.bo); - mfc_context->post_deblocking_output.bo = NULL; - - dri_bo_unreference(mfc_context->pre_deblocking_output.bo); - mfc_context->pre_deblocking_output.bo = NULL; - - dri_bo_unreference(mfc_context->uncompressed_picture_source.bo); - mfc_context->uncompressed_picture_source.bo = NULL; - - dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); - mfc_context->mfc_indirect_pak_bse_object.bo = NULL; - - for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){ - if (mfc_context->direct_mv_buffers[i].bo != NULL) - dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo); - mfc_context->direct_mv_buffers[i].bo = NULL; - } - - for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){ - if (mfc_context->reference_surfaces[i].bo != NULL) - dri_bo_unreference(mfc_context->reference_surfaces[i].bo); - mfc_context->reference_surfaces[i].bo = NULL; - } - - dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo); - bo = dri_bo_alloc(i965->intel.bufmgr, - "Buffer", - width_in_mbs * 64, - 64); - assert(bo); - mfc_context->intra_row_store_scratch_buffer.bo = bo; - - dri_bo_unreference(mfc_context->macroblock_status_buffer.bo); - bo = dri_bo_alloc(i965->intel.bufmgr, - "Buffer", - width_in_mbs * height_in_mbs * 16, - 64); - assert(bo); - mfc_context->macroblock_status_buffer.bo = bo; - - dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo); - bo = dri_bo_alloc(i965->intel.bufmgr, - "Buffer", - 4 * width_in_mbs * 64, /* 4 * width_in_mbs * 64 */ - 64); - assert(bo); - mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo; - - dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo); - bo = dri_bo_alloc(i965->intel.bufmgr, - "Buffer", - 2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */ - 0x1000); - assert(bo); - mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo; - - dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo); - mfc_context->mfc_batchbuffer_surface.bo = NULL; - - dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo); - mfc_context->aux_batchbuffer_surface.bo = NULL; - - if (mfc_context->aux_batchbuffer) - intel_batchbuffer_free(mfc_context->aux_batchbuffer); - - mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size); - mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer; - dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo); - mfc_context->aux_batchbuffer_surface.pitch = 16; - mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16; - mfc_context->aux_batchbuffer_surface.size_block = 16; - - i965_gpe_context_init(ctx, &mfc_context->gpe_context); -} - -static void -gen9_mfc_pipe_buf_addr_state(VADriverContextP ctx, - struct intel_encoder_context *encoder_context) -{ - struct intel_batchbuffer *batch = encoder_context->base.batch; - struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; - int i; - - BEGIN_BCS_BATCH(batch, 61); - - OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2)); - - /* the DW1-3 is for pre_deblocking */ - if (mfc_context->pre_deblocking_output.bo) - OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - 0); - else - OUT_BCS_BATCH(batch, 0); /* pre output addr */ - - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - /* the DW4-6 is for the post_deblocking */ - - /* post output addr */ - if (mfc_context->post_deblocking_output.bo) - OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - 0); - else - OUT_BCS_BATCH(batch, 0); - - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - - /* the DW7-9 is for the uncompressed_picture */ - OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - 0); /* uncompressed data */ - - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - - /* the DW10-12 is for the mb status */ - OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - 0); /* StreamOut data*/ - - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - - /* the DW13-15 is for the intra_row_store_scratch */ - OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - 0); - - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - - /* the DW16-18 is for the deblocking filter */ - OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - 0); - - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - - /* the DW 19-50 is for Reference pictures*/ - for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) { - if ( mfc_context->reference_surfaces[i].bo != NULL) { - OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - 0); - } else { - OUT_BCS_BATCH(batch, 0); - } - - OUT_BCS_BATCH(batch, 0); - } - - OUT_BCS_BATCH(batch, 0); - - /* The DW 52-54 is for the MB status buffer */ - OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - 0); - - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - - /* the DW 55-57 is the ILDB buffer */ - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - - /* the DW 58-60 is the second ILDB buffer */ - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - - ADVANCE_BCS_BATCH(batch); -} - -static void -gen9_mfc_avc_directmode_state(VADriverContextP ctx, - struct intel_encoder_context *encoder_context) -{ - struct intel_batchbuffer *batch = encoder_context->base.batch; - struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; - - int i; - - BEGIN_BCS_BATCH(batch, 71); - - OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2)); - - /* Reference frames and Current frames */ - /* the DW1-32 is for the direct MV for reference */ - for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) { - if ( mfc_context->direct_mv_buffers[i].bo != NULL) { - OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - 0); - OUT_BCS_BATCH(batch, 0); - } else { - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - } - } - - OUT_BCS_BATCH(batch, 0); - - /* the DW34-36 is the MV for the current reference */ - OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - 0); - - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - - /* POL list */ - for(i = 0; i < 32; i++) { - OUT_BCS_BATCH(batch, i/2); - } - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - - ADVANCE_BCS_BATCH(batch); -} - - -static void -gen9_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, - struct intel_encoder_context *encoder_context) -{ - struct intel_batchbuffer *batch = encoder_context->base.batch; - struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; - - BEGIN_BCS_BATCH(batch, 10); - - OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2)); - OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - - /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */ - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - - /* the DW7-9 is for Bitplane Read Buffer Base Address */ - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - - ADVANCE_BCS_BATCH(batch); -} - - -static void gen9_mfc_avc_pipeline_picture_programing( VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) -{ - struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; - - mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context); - mfc_context->set_surface_state(ctx, encoder_context); - mfc_context->ind_obj_base_addr_state(ctx, encoder_context); - gen9_mfc_pipe_buf_addr_state(ctx, encoder_context); - gen9_mfc_bsp_buf_base_addr_state(ctx, encoder_context); - mfc_context->avc_img_state(ctx, encode_state, encoder_context); - mfc_context->avc_qm_state(ctx, encode_state, encoder_context); - mfc_context->avc_fqm_state(ctx, encode_state, encoder_context); - gen9_mfc_avc_directmode_state(ctx, encoder_context); - intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context); -} - - -static VAStatus gen9_mfc_run(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) -{ - struct intel_batchbuffer *batch = encoder_context->base.batch; - - intel_batchbuffer_flush(batch); //run the pipeline - - return VA_STATUS_SUCCESS; -} - - -static VAStatus -gen9_mfc_stop(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context, - int *encoded_bits_size) -{ - VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN; - VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; - VACodedBufferSegment *coded_buffer_segment; - - vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment); - assert(vaStatus == VA_STATUS_SUCCESS); - *encoded_bits_size = coded_buffer_segment->size * 8; - i965_UnmapBuffer(ctx, pPicParameter->coded_buf); - - return VA_STATUS_SUCCESS; -} - - -static void -gen9_mfc_avc_slice_state(VADriverContextP ctx, - VAEncPictureParameterBufferH264 *pic_param, - VAEncSliceParameterBufferH264 *slice_param, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context, - int rate_control_enable, - int qp, - struct intel_batchbuffer *batch) -{ - struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; - int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; - int height_in_mbs = (mfc_context->surface_state.height + 15) / 16; - int beginmb = slice_param->macroblock_address; - int endmb = beginmb + slice_param->num_macroblocks; - int beginx = beginmb % width_in_mbs; - int beginy = beginmb / width_in_mbs; - int nextx = endmb % width_in_mbs; - int nexty = endmb / width_in_mbs; - int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type); - int last_slice = (endmb == (width_in_mbs * height_in_mbs)); - int maxQpN, maxQpP; - unsigned char correct[6], grow, shrink; - int i; - int weighted_pred_idc = 0; - unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom; - unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom; - int num_ref_l0 = 0, num_ref_l1 = 0; - - if (batch == NULL) - batch = encoder_context->base.batch; - - if (slice_type == SLICE_TYPE_I) { - luma_log2_weight_denom = 0; - chroma_log2_weight_denom = 0; - } else if (slice_type == SLICE_TYPE_P) { - weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag; - num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1; - - if (slice_param->num_ref_idx_active_override_flag) - num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1; - } else if (slice_type == SLICE_TYPE_B) { - weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc; - num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1; - num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1; - - if (slice_param->num_ref_idx_active_override_flag) { - num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1; - num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1; - } - - if (weighted_pred_idc == 2) { - /* 8.4.3 - Derivation process for prediction weights (8-279) */ - luma_log2_weight_denom = 5; - chroma_log2_weight_denom = 5; - } - } - - maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier; - maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier; - - for (i = 0; i < 6; i++) - correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i]; - - grow = mfc_context->bit_rate_control_context[slice_type].GrowInit + - (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4); - shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit + - (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4); - - BEGIN_BCS_BATCH(batch, 11);; - - OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) ); - OUT_BCS_BATCH(batch, slice_type); /*Slice Type: I:P:B Slice*/ - - OUT_BCS_BATCH(batch, - (num_ref_l0 << 16) | - (num_ref_l1 << 24) | - (chroma_log2_weight_denom << 8) | - (luma_log2_weight_denom << 0)); - - OUT_BCS_BATCH(batch, - (weighted_pred_idc << 30) | - (slice_param->direct_spatial_mv_pred_flag<<29) | /*Direct Prediction Type*/ - (slice_param->disable_deblocking_filter_idc << 27) | - (slice_param->cabac_init_idc << 24) | - (qp<<16) | /*Slice Quantization Parameter*/ - ((slice_param->slice_beta_offset_div2 & 0xf) << 8) | - ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0)); - OUT_BCS_BATCH(batch, - (beginy << 24) | /*First MB X&Y , the begin postion of current slice*/ - (beginx << 16) | - slice_param->macroblock_address ); - OUT_BCS_BATCH(batch, (nexty << 16) | nextx); /*Next slice first MB X&Y*/ - OUT_BCS_BATCH(batch, - (0/*rate_control_enable*/ << 31) | /*in CBR mode RateControlCounterEnable = enable*/ - (1 << 30) | /*ResetRateControlCounter*/ - (0 << 28) | /*RC Triggle Mode = Always Rate Control*/ - (4 << 24) | /*RC Stable Tolerance, middle level*/ - (0/*rate_control_enable*/ << 23) | /*RC Panic Enable*/ - (0 << 22) | /*QP mode, don't modfiy CBP*/ - (0 << 21) | /*MB Type Direct Conversion Enabled*/ - (0 << 20) | /*MB Type Skip Conversion Enabled*/ - (last_slice << 19) | /*IsLastSlice*/ - (0 << 18) | /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/ - (1 << 17) | /*HeaderPresentFlag*/ - (1 << 16) | /*SliceData PresentFlag*/ - (1 << 15) | /*TailPresentFlag*/ - (1 << 13) | /*RBSP NAL TYPE*/ - (0 << 12) ); /*CabacZeroWordInsertionEnable*/ - OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset); - OUT_BCS_BATCH(batch, - (maxQpN << 24) | /*Target QP - 24 is lowest QP*/ - (maxQpP << 16) | /*Target QP + 20 is highest QP*/ - (shrink << 8) | - (grow << 0)); - OUT_BCS_BATCH(batch, - (correct[5] << 20) | - (correct[4] << 16) | - (correct[3] << 12) | - (correct[2] << 8) | - (correct[1] << 4) | - (correct[0] << 0)); - OUT_BCS_BATCH(batch, 0); - - ADVANCE_BCS_BATCH(batch); -} - - -static int -gen9_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, - int qp,unsigned int *msg, - struct intel_encoder_context *encoder_context, - unsigned char target_mb_size, unsigned char max_mb_size, - struct intel_batchbuffer *batch) -{ - int len_in_dwords = 12; - unsigned int intra_msg; -#define INTRA_MSG_FLAG (1 << 13) -#define INTRA_MBTYPE_MASK (0x1F0000) - if (batch == NULL) - batch = encoder_context->base.batch; - - BEGIN_BCS_BATCH(batch, len_in_dwords); - - intra_msg = msg[0] & 0xC0FF; - intra_msg |= INTRA_MSG_FLAG; - intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8); - OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2)); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, - (0 << 24) | /* PackedMvNum, Debug*/ - (0 << 20) | /* No motion vector */ - (1 << 19) | /* CbpDcY */ - (1 << 18) | /* CbpDcU */ - (1 << 17) | /* CbpDcV */ - intra_msg); - - OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x); /* Code Block Pattern for Y*/ - OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */ - - OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp); /* Last MB */ - - /*Stuff for Intra MB*/ - OUT_BCS_BATCH(batch, msg[1]); /* We using Intra16x16 no 4x4 predmode*/ - OUT_BCS_BATCH(batch, msg[2]); - OUT_BCS_BATCH(batch, msg[3]&0xFF); - - /*MaxSizeInWord and TargetSzieInWord*/ - OUT_BCS_BATCH(batch, (max_mb_size << 24) | - (target_mb_size << 16) ); - - OUT_BCS_BATCH(batch, 0); - - ADVANCE_BCS_BATCH(batch); - - return len_in_dwords; -} - -static int -gen9_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp, - unsigned int *msg, unsigned int offset, - struct intel_encoder_context *encoder_context, - unsigned char target_mb_size,unsigned char max_mb_size, int slice_type, - struct intel_batchbuffer *batch) -{ - struct gen6_vme_context *vme_context = encoder_context->vme_context; - int len_in_dwords = 12; - unsigned int inter_msg = 0; - if (batch == NULL) - batch = encoder_context->base.batch; - { -#define MSG_MV_OFFSET 4 - unsigned int *mv_ptr; - mv_ptr = msg + MSG_MV_OFFSET; - /* MV of VME output is based on 16 sub-blocks. So it is necessary - * to convert them to be compatible with the format of AVC_PAK - * command. - */ - if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) { - /* MV[0] and MV[2] are replicated */ - mv_ptr[4] = mv_ptr[0]; - mv_ptr[5] = mv_ptr[1]; - mv_ptr[2] = mv_ptr[8]; - mv_ptr[3] = mv_ptr[9]; - mv_ptr[6] = mv_ptr[8]; - mv_ptr[7] = mv_ptr[9]; - } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) { - /* MV[0] and MV[1] are replicated */ - mv_ptr[2] = mv_ptr[0]; - mv_ptr[3] = mv_ptr[1]; - mv_ptr[4] = mv_ptr[16]; - mv_ptr[5] = mv_ptr[17]; - mv_ptr[6] = mv_ptr[24]; - mv_ptr[7] = mv_ptr[25]; - } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) && - !(msg[1] & SUBMB_SHAPE_MASK)) { - /* Don't touch MV[0] or MV[1] */ - mv_ptr[2] = mv_ptr[8]; - mv_ptr[3] = mv_ptr[9]; - mv_ptr[4] = mv_ptr[16]; - mv_ptr[5] = mv_ptr[17]; - mv_ptr[6] = mv_ptr[24]; - mv_ptr[7] = mv_ptr[25]; - } - } - - BEGIN_BCS_BATCH(batch, len_in_dwords); - - OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2)); - - inter_msg = 32; - /* MV quantity */ - if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) { - if (msg[1] & SUBMB_SHAPE_MASK) - inter_msg = 128; - } - OUT_BCS_BATCH(batch, inter_msg); /* 32 MV*/ - OUT_BCS_BATCH(batch, offset); - inter_msg = msg[0] & (0x1F00FFFF); - inter_msg |= INTER_MV8; - inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17)); - if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) && - (msg[1] & SUBMB_SHAPE_MASK)) { - inter_msg |= INTER_MV32; - } - - OUT_BCS_BATCH(batch, inter_msg); - - OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x); /* Code Block Pattern for Y*/ - OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */ -#if 0 - if ( slice_type == SLICE_TYPE_B) { - OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp); /* Last MB */ - } else { - OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */ - } -#else - OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */ -#endif - - inter_msg = msg[1] >> 8; - /*Stuff for Inter MB*/ - OUT_BCS_BATCH(batch, inter_msg); - OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]); - OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]); - - /*MaxSizeInWord and TargetSzieInWord*/ - OUT_BCS_BATCH(batch, (max_mb_size << 24) | - (target_mb_size << 16) ); - - OUT_BCS_BATCH(batch, 0x0); - ADVANCE_BCS_BATCH(batch); - - return len_in_dwords; -} - -#define AVC_INTRA_RDO_OFFSET 4 -#define AVC_INTER_RDO_OFFSET 10 -#define AVC_INTER_MSG_OFFSET 8 -#define AVC_INTER_MV_OFFSET 48 -#define AVC_RDO_MASK 0xFFFF - -static void -gen9_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context, - int slice_index, - struct intel_batchbuffer *slice_batch) -{ - struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; - struct gen6_vme_context *vme_context = encoder_context->vme_context; - VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; - VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; - VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; - unsigned int *msg = NULL, offset = 0; - unsigned char *msg_ptr = NULL; - int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; - int height_in_mbs = (mfc_context->surface_state.height + 15) / 16; - int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs); - int i,x,y; - int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta; - unsigned int rate_control_mode = encoder_context->rate_control_mode; - unsigned int tail_data[] = { 0x0, 0x0 }; - int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type); - int is_intra = slice_type == SLICE_TYPE_I; - int qp_slice; - int qp_mb; - - qp_slice = qp; - if (rate_control_mode == VA_RC_CBR) { - qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; - if (encode_state->slice_header_index[slice_index] == 0) { - pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; - qp_slice = qp; - } - } - - /* only support for 8-bit pixel bit-depth */ - assert(pSequenceParameter->bit_depth_luma_minus8 == 0); - assert(pSequenceParameter->bit_depth_chroma_minus8 == 0); - assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52); - assert(qp >= 0 && qp < 52); - - gen9_mfc_avc_slice_state(ctx, - pPicParameter, - pSliceParameter, - encode_state, encoder_context, - (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch); - - if ( slice_index == 0) - intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch); - - intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch); - - dri_bo_map(vme_context->vme_output.bo , 1); - msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual; - - if (is_intra) { - msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block); - } else { - msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block); - } - - for (i = pSliceParameter->macroblock_address; - i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) { - int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) ); - x = i % width_in_mbs; - y = i / width_in_mbs; - msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block); - - if (vme_context->roi_enabled) { - qp_mb = *(vme_context->qp_per_mb + i); - } else - qp_mb = qp; - - if (is_intra) { - assert(msg); - gen9_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp_mb, msg, encoder_context, 0, 0, slice_batch); - } else { - int inter_rdo, intra_rdo; - inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK; - intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK; - offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET; - if (intra_rdo < inter_rdo) { - gen9_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp_mb, msg, encoder_context, 0, 0, slice_batch); - } else { - msg += AVC_INTER_MSG_OFFSET; - gen9_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp_mb, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch); - } - } - } - - dri_bo_unmap(vme_context->vme_output.bo); - - if ( last_slice ) { - mfc_context->insert_object(ctx, encoder_context, - tail_data, 2, 8, - 2, 1, 1, 0, slice_batch); - } else { - mfc_context->insert_object(ctx, encoder_context, - tail_data, 1, 8, - 1, 1, 1, 0, slice_batch); - } - - -} - -static dri_bo * -gen9_mfc_avc_software_batchbuffer(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) -{ - struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; - struct intel_batchbuffer *batch; - dri_bo *batch_bo; - int i; - - batch = mfc_context->aux_batchbuffer; - batch_bo = batch->buffer; - for (i = 0; i < encode_state->num_slice_params_ext; i++) { - gen9_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch); - } - - intel_batchbuffer_align(batch, 8); - - BEGIN_BCS_BATCH(batch, 2); - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END); - ADVANCE_BCS_BATCH(batch); - - dri_bo_reference(batch_bo); - intel_batchbuffer_free(batch); - mfc_context->aux_batchbuffer = NULL; - - return batch_bo; -} - -static void -gen9_mfc_batchbuffer_surfaces_input(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) - -{ - struct gen6_vme_context *vme_context = encoder_context->vme_context; - struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; - - assert(vme_context->vme_output.bo); - mfc_context->buffer_suface_setup(ctx, - &mfc_context->gpe_context, - &vme_context->vme_output, - BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT), - SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT)); - assert(mfc_context->aux_batchbuffer_surface.bo); - mfc_context->buffer_suface_setup(ctx, - &mfc_context->gpe_context, - &mfc_context->aux_batchbuffer_surface, - BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER), - SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER)); -} - -static void -gen9_mfc_batchbuffer_surfaces_output(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) - -{ - struct i965_driver_data *i965 = i965_driver_data(ctx); - struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; - VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; - int width_in_mbs = pSequenceParameter->picture_width_in_mbs; - int height_in_mbs = pSequenceParameter->picture_height_in_mbs; - mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1; - mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */ - mfc_context->mfc_batchbuffer_surface.pitch = 16; - mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr, - "MFC batchbuffer", - mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block, - 0x1000); - mfc_context->buffer_suface_setup(ctx, - &mfc_context->gpe_context, - &mfc_context->mfc_batchbuffer_surface, - BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER), - SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER)); -} - -static void -gen9_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) -{ - gen9_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context); - gen9_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context); -} - -static void -gen9_mfc_batchbuffer_idrt_setup(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) -{ - struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; - struct gen6_interface_descriptor_data *desc; - int i; - dri_bo *bo; - - bo = mfc_context->gpe_context.idrt.bo; - dri_bo_map(bo, 1); - assert(bo->virtual); - desc = bo->virtual; - - for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) { - struct i965_kernel *kernel; - - kernel = &mfc_context->gpe_context.kernels[i]; - assert(sizeof(*desc) == 32); - - /*Setup the descritor table*/ - memset(desc, 0, sizeof(*desc)); - desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6); - desc->desc2.sampler_count = 0; - desc->desc2.sampler_state_pointer = 0; - desc->desc3.binding_table_entry_count = 2; - desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5); - desc->desc4.constant_urb_entry_read_offset = 0; - desc->desc4.constant_urb_entry_read_length = 4; - - /*kernel start*/ - dri_bo_emit_reloc(bo, - I915_GEM_DOMAIN_INSTRUCTION, 0, - 0, - i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0), - kernel->bo); - desc++; - } - - dri_bo_unmap(bo); -} - -static void -gen9_mfc_batchbuffer_constant_setup(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) -{ - struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; - - (void)mfc_context; -} - -static void -gen9_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch, - int index, - int head_offset, - int batchbuffer_offset, - int head_size, - int tail_size, - int number_mb_cmds, - int first_object, - int last_object, - int last_slice, - int mb_x, - int mb_y, - int width_in_mbs, - int qp) -{ - BEGIN_BATCH(batch, 12); - - OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2)); - OUT_BATCH(batch, index); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - OUT_BATCH(batch, 0); - - /*inline data */ - OUT_BATCH(batch, head_offset); - OUT_BATCH(batch, batchbuffer_offset); - OUT_BATCH(batch, - head_size << 16 | - tail_size); - OUT_BATCH(batch, - number_mb_cmds << 16 | - first_object << 2 | - last_object << 1 | - last_slice); - OUT_BATCH(batch, - mb_y << 8 | - mb_x); - OUT_BATCH(batch, - qp << 16 | - width_in_mbs); - - ADVANCE_BATCH(batch); -} - -static void -gen9_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx, - struct intel_encoder_context *encoder_context, - VAEncSliceParameterBufferH264 *slice_param, - int head_offset, - unsigned short head_size, - unsigned short tail_size, - int batchbuffer_offset, - int qp, - int last_slice) -{ - struct intel_batchbuffer *batch = encoder_context->base.batch; - struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; - int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; - int total_mbs = slice_param->num_macroblocks; - int number_mb_cmds = 128; - int starting_mb = 0; - int last_object = 0; - int first_object = 1; - int i; - int mb_x, mb_y; - int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER; - - for (i = 0; i < total_mbs / number_mb_cmds; i++) { - last_object = (total_mbs - starting_mb) == number_mb_cmds; - mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs; - mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs; - assert(mb_x <= 255 && mb_y <= 255); - - starting_mb += number_mb_cmds; - - gen9_mfc_batchbuffer_emit_object_command(batch, - index, - head_offset, - batchbuffer_offset, - head_size, - tail_size, - number_mb_cmds, - first_object, - last_object, - last_slice, - mb_x, - mb_y, - width_in_mbs, - qp); - - if (first_object) { - head_offset += head_size; - batchbuffer_offset += head_size; - } - - if (last_object) { - head_offset += tail_size; - batchbuffer_offset += tail_size; - } - - batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD; - - first_object = 0; - } - - if (!last_object) { - last_object = 1; - number_mb_cmds = total_mbs % number_mb_cmds; - mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs; - mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs; - assert(mb_x <= 255 && mb_y <= 255); - starting_mb += number_mb_cmds; - - gen9_mfc_batchbuffer_emit_object_command(batch, - index, - head_offset, - batchbuffer_offset, - head_size, - tail_size, - number_mb_cmds, - first_object, - last_object, - last_slice, - mb_x, - mb_y, - width_in_mbs, - qp); - } -} - -/* - * return size in Owords (16bytes) - */ -static int -gen9_mfc_avc_batchbuffer_slice(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context, - int slice_index, - int batchbuffer_offset) -{ - struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; - struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer; - VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer; - VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer; - VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; - int width_in_mbs = (mfc_context->surface_state.width + 15) / 16; - int height_in_mbs = (mfc_context->surface_state.height + 15) / 16; - int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs); - int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta; - unsigned int rate_control_mode = encoder_context->rate_control_mode; - unsigned int tail_data[] = { 0x0, 0x0 }; - long head_offset; - int old_used = intel_batchbuffer_used_size(slice_batch), used; - unsigned short head_size, tail_size; - int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type); - int qp_slice; - - qp_slice = qp; - if (rate_control_mode == VA_RC_CBR) { - qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY; - if (encode_state->slice_header_index[slice_index] == 0) { - pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp; - qp_slice = qp; - } - } - - /* only support for 8-bit pixel bit-depth */ - assert(pSequenceParameter->bit_depth_luma_minus8 == 0); - assert(pSequenceParameter->bit_depth_chroma_minus8 == 0); - assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52); - assert(qp >= 0 && qp < 52); - - head_offset = old_used / 16; - gen9_mfc_avc_slice_state(ctx, - pPicParameter, - pSliceParameter, - encode_state, - encoder_context, - (rate_control_mode == VA_RC_CBR), - qp_slice, - slice_batch); - - if (slice_index == 0) - intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch); - - - intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch); - - - intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */ - used = intel_batchbuffer_used_size(slice_batch); - head_size = (used - old_used) / 16; - old_used = used; - - /* tail */ - if (last_slice) { - mfc_context->insert_object(ctx, - encoder_context, - tail_data, - 2, - 8, - 2, - 1, - 1, - 0, - slice_batch); - } else { - mfc_context->insert_object(ctx, - encoder_context, - tail_data, - 1, - 8, - 1, - 1, - 1, - 0, - slice_batch); - } - - intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */ - used = intel_batchbuffer_used_size(slice_batch); - tail_size = (used - old_used) / 16; - - gen9_mfc_avc_batchbuffer_slice_command(ctx, - encoder_context, - pSliceParameter, - head_offset, - head_size, - tail_size, - batchbuffer_offset, - qp, - last_slice); - - return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD; -} - -static void -gen9_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) -{ - struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; - struct intel_batchbuffer *batch = encoder_context->base.batch; - int i, size, offset = 0; - - intel_batchbuffer_start_atomic(batch, 0x4000); - gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch); - - for ( i = 0; i < encode_state->num_slice_params_ext; i++) { - size = gen9_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset); - offset += size; - } - - intel_batchbuffer_end_atomic(batch); - intel_batchbuffer_flush(batch); -} - -static void -gen9_mfc_build_avc_batchbuffer(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) -{ - gen9_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context); - gen9_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context); - gen9_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context); - gen9_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context); -} - -static dri_bo * -gen9_mfc_avc_hardware_batchbuffer(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) -{ - struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; - - gen9_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context); - dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo); - - return mfc_context->mfc_batchbuffer_surface.bo; -} - - -static void -gen9_mfc_avc_pipeline_programing(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) -{ - struct intel_batchbuffer *batch = encoder_context->base.batch; - dri_bo *slice_batch_bo; - - if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) { - fprintf(stderr, "Current VA driver don't support interlace mode!\n"); - assert(0); - return; - } - - if (encoder_context->soft_batch_force) - slice_batch_bo = gen9_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context); - else - slice_batch_bo = gen9_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context); - - - // begin programing - intel_batchbuffer_start_atomic_bcs(batch, 0x4000); - intel_batchbuffer_emit_mi_flush(batch); - - // picture level programing - gen9_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context); - - BEGIN_BCS_BATCH(batch, 3); - OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0)); - OUT_BCS_RELOC(batch, - slice_batch_bo, - I915_GEM_DOMAIN_COMMAND, 0, - 0); - OUT_BCS_BATCH(batch, 0); - ADVANCE_BCS_BATCH(batch); - - // end programing - intel_batchbuffer_end_atomic(batch); - - dri_bo_unreference(slice_batch_bo); -} - - -static VAStatus -gen9_mfc_avc_encode_picture(VADriverContextP ctx, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) -{ - struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; - unsigned int rate_control_mode = encoder_context->rate_control_mode; - int current_frame_bits_size; - int sts; - - for (;;) { - gen9_mfc_init(ctx, encode_state, encoder_context); - intel_mfc_avc_prepare(ctx, encode_state, encoder_context); - /*Programing bcs pipeline*/ - gen9_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context); //filling the pipeline - gen9_mfc_run(ctx, encode_state, encoder_context); - if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) { - gen9_mfc_stop(ctx, encode_state, encoder_context, ¤t_frame_bits_size); - sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size); - if (sts == BRC_NO_HRD_VIOLATION) { - intel_mfc_hrd_context_update(encode_state, mfc_context); - break; - } - else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) { - if (!mfc_context->hrd.violation_noted) { - fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow"); - mfc_context->hrd.violation_noted = 1; - } - return VA_STATUS_SUCCESS; - } - } else { - break; - } - } - - return VA_STATUS_SUCCESS; -} - -static void -gen9_mfc_context_destroy(void *context) -{ - struct gen6_mfc_context *mfc_context = context; - int i; - - dri_bo_unreference(mfc_context->post_deblocking_output.bo); - mfc_context->post_deblocking_output.bo = NULL; - - dri_bo_unreference(mfc_context->pre_deblocking_output.bo); - mfc_context->pre_deblocking_output.bo = NULL; - - dri_bo_unreference(mfc_context->uncompressed_picture_source.bo); - mfc_context->uncompressed_picture_source.bo = NULL; - - dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); - mfc_context->mfc_indirect_pak_bse_object.bo = NULL; - - for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){ - dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo); - mfc_context->direct_mv_buffers[i].bo = NULL; - } - - dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo); - mfc_context->intra_row_store_scratch_buffer.bo = NULL; - - dri_bo_unreference(mfc_context->macroblock_status_buffer.bo); - mfc_context->macroblock_status_buffer.bo = NULL; - - dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo); - mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL; - - dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo); - mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL; - - - for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){ - dri_bo_unreference(mfc_context->reference_surfaces[i].bo); - mfc_context->reference_surfaces[i].bo = NULL; - } - - i965_gpe_context_destroy(&mfc_context->gpe_context); - - dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo); - mfc_context->mfc_batchbuffer_surface.bo = NULL; - - dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo); - mfc_context->aux_batchbuffer_surface.bo = NULL; - - if (mfc_context->aux_batchbuffer) - intel_batchbuffer_free(mfc_context->aux_batchbuffer); - - mfc_context->aux_batchbuffer = NULL; - - free(mfc_context); -} - -static VAStatus gen9_mfc_pipeline(VADriverContextP ctx, - VAProfile profile, - struct encode_state *encode_state, - struct intel_encoder_context *encoder_context) -{ - VAStatus vaStatus; - - switch (profile) { - case VAProfileH264ConstrainedBaseline: - case VAProfileH264Main: - case VAProfileH264High: - case VAProfileH264MultiviewHigh: - case VAProfileH264StereoHigh: - vaStatus = gen9_mfc_avc_encode_picture(ctx, encode_state, encoder_context); - break; - - default: - vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE; - break; - } - - return vaStatus; -} Bool gen9_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context) { - struct gen6_mfc_context *mfc_context = NULL; - - if ((encoder_context->codec == CODEC_H264) || (encoder_context->codec == CODEC_H264_MVC)) { return gen8_mfc_context_init(ctx, encoder_context); @@ -1778,39 +52,7 @@ Bool gen9_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *e (encoder_context->codec == CODEC_MPEG2)) return gen8_mfc_context_init(ctx, encoder_context); - mfc_context = calloc(1, sizeof(struct gen6_mfc_context)); - assert(mfc_context); - mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6; - - mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS; - mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data); - - mfc_context->gpe_context.curbe.length = 32 * 4; - - mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1; - mfc_context->gpe_context.vfe_state.num_urb_entries = 16; - mfc_context->gpe_context.vfe_state.gpgpu_mode = 0; - mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1; - mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1; - - i965_gpe_load_kernels(ctx, - &mfc_context->gpe_context, - gen9_mfc_kernels, - NUM_MFC_KERNEL); - - mfc_context->pipe_mode_select = gen9_mfc_pipe_mode_select; - mfc_context->set_surface_state = gen9_mfc_surface_state; - mfc_context->ind_obj_base_addr_state = gen9_mfc_ind_obj_base_addr_state; - mfc_context->avc_img_state = gen9_mfc_avc_img_state; - mfc_context->avc_qm_state = gen9_mfc_avc_qm_state; - mfc_context->avc_fqm_state = gen9_mfc_avc_fqm_state; - mfc_context->insert_object = gen9_mfc_avc_insert_object; - mfc_context->buffer_suface_setup = gen8_gpe_buffer_suface_setup; - - encoder_context->mfc_context = mfc_context; - encoder_context->mfc_context_destroy = gen9_mfc_context_destroy; - encoder_context->mfc_pipeline = gen9_mfc_pipeline; - encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare; - - return True; + /* Other profile/entrypoint pairs never get here, see gen9_enc_hw_context_init() */ + assert(0); + return False; } -- cgit v1.2.1 From f2a5584bc55f28a2de0b896848d6f0c8eb847d83 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Tue, 23 Aug 2016 09:53:02 +0800 Subject: Remove unused .g9a/.g9b files v2: remove unused .g9a files, and change the short commit log as well Signed-off-by: Xiang, Haihao Reviewed-by: Zhao Yakui (cherry picked from commit 455a725ee292601a0a82f7878bc753fca468a826) --- src/shaders/utils/mfc_batchbuffer_avc_inter.g9a | 33 --------- src/shaders/utils/mfc_batchbuffer_avc_inter.g9b | 90 ------------------------- src/shaders/utils/mfc_batchbuffer_avc_intra.g9a | 33 --------- src/shaders/utils/mfc_batchbuffer_avc_intra.g9b | 66 ------------------ 4 files changed, 222 deletions(-) delete mode 100644 src/shaders/utils/mfc_batchbuffer_avc_inter.g9a delete mode 100644 src/shaders/utils/mfc_batchbuffer_avc_inter.g9b delete mode 100644 src/shaders/utils/mfc_batchbuffer_avc_intra.g9a delete mode 100644 src/shaders/utils/mfc_batchbuffer_avc_intra.g9b diff --git a/src/shaders/utils/mfc_batchbuffer_avc_inter.g9a b/src/shaders/utils/mfc_batchbuffer_avc_inter.g9a deleted file mode 100644 index ebc884ce..00000000 --- a/src/shaders/utils/mfc_batchbuffer_avc_inter.g9a +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Xiang Haihao - * Li Zhong - */ - -#include "mfc_batchbuffer.inc" -#include "mfc_batchbuffer_head.asm" -#include "mfc_batchbuffer_avc_inter.asm" -#include "mfc_batchbuffer_tail.asm" -#include "end_thread.asm" - diff --git a/src/shaders/utils/mfc_batchbuffer_avc_inter.g9b b/src/shaders/utils/mfc_batchbuffer_avc_inter.g9b deleted file mode 100644 index dfea9163..00000000 --- a/src/shaders/utils/mfc_batchbuffer_avc_inter.g9b +++ /dev/null @@ -1,90 +0,0 @@ - { 0x00800001, 0x21000608, 0x00000000, 0x00000000 }, - { 0x00800001, 0x21400608, 0x00000000, 0x00000000 }, - { 0x00000001, 0x21342288, 0x00000014, 0x00000000 }, - { 0x00000001, 0x21280208, 0x000000a0, 0x00000000 }, - { 0x00000001, 0x21542288, 0x00000014, 0x00000000 }, - { 0x00000001, 0x21480208, 0x000000a4, 0x00000000 }, - { 0x01000005, 0x20001240, 0x160000ac, 0x00040004 }, - { 0x00010020, 0x34000000, 0x0e001400, 0x00000090 }, - { 0x00600001, 0x28000208, 0x008d0120, 0x00000000 }, - { 0x0a800031, 0x22000a48, 0x06000800, 0x02180001 }, - { 0x00600001, 0x28000208, 0x008d0140, 0x00000000 }, - { 0x00600001, 0x28200208, 0x008d0200, 0x00000000 }, - { 0x0a800031, 0x20000a60, 0x06000800, 0x040a0002 }, - { 0x00000040, 0x21280208, 0x06000128, 0x00000001 }, - { 0x00000040, 0x21480208, 0x06000148, 0x00000001 }, - { 0x01000040, 0x20aa1a68, 0x1e0000aa, 0xffffffff }, - { 0x00110020, 0x34000000, 0x0e001400, 0xffffff70 }, - { 0x00000041, 0x21e01208, 0x220000b4, 0x000000b1 }, - { 0x00000040, 0x21e00208, 0x220001e0, 0x000000b0 }, - { 0x00000041, 0x21080208, 0x060001e0, 0x0000000a }, - { 0x00000040, 0x21080208, 0x16000108, 0x00080008 }, - { 0x00600001, 0x28000208, 0x008d0100, 0x00000000 }, - { 0x0a800031, 0x22000a48, 0x06000800, 0x02180200 }, - { 0x00800001, 0x23400608, 0x00000000, 0x00000000 }, - { 0x01000005, 0x20001241, 0x160000ac, 0x00020002 }, - { 0x01000005, 0x20000200, 0x06000200, 0x00002000 }, - { 0x00110020, 0x34000000, 0x0e001400, 0x00000130 }, - { 0x00000001, 0x23400608, 0x00000000, 0x71490009 }, - { 0x00000041, 0x23480208, 0x060001e0, 0x000000a0 }, - { 0x00000001, 0x23540608, 0x00000000, 0x000f000f }, - { 0x00000001, 0x23440208, 0x00000208, 0x00000000 }, - { 0x00000001, 0x234c0208, 0x00000200, 0x00000000 }, - { 0x00000040, 0x23501208, 0x060000b0, 0xffff0000 }, - { 0x00000040, 0x20b02288, 0x160000b0, 0x00010001 }, - { 0x01000010, 0x20001240, 0x220000b4, 0x000000b0 }, - { 0x00010001, 0x20b01688, 0x10000000, 0x00000000 }, - { 0x00010040, 0x20b12288, 0x160000b1, 0x00010001 }, - { 0x00000001, 0x23580608, 0x00000000, 0x00000000 }, - { 0x00110001, 0x23580609, 0x00000000, 0x04000000 }, - { 0x01000010, 0x20001240, 0x160000ae, 0x00010001 }, - { 0x00110001, 0x23580608, 0x00000000, 0x00000000 }, - { 0x00000040, 0x23580208, 0x22000358, 0x000000b6 }, - { 0x00000001, 0x235c0208, 0x00000204, 0x00000000 }, - { 0x00000001, 0x23600208, 0x000000b8, 0x00000000 }, - { 0x00000001, 0x23640208, 0x000000bc, 0x00000000 }, - { 0x00000020, 0x34000000, 0x0e001400, 0x00000110 }, - { 0x00000001, 0x23400608, 0x00000000, 0x71490009 }, - { 0x00000001, 0x23540608, 0x00000000, 0x000f000f }, - { 0x00000040, 0x23501208, 0x060000b0, 0xffff0000 }, - { 0x00000040, 0x20b02288, 0x160000b0, 0x00010001 }, - { 0x01000010, 0x20001240, 0x220000b4, 0x000000b0 }, - { 0x00010001, 0x20b01688, 0x10000000, 0x00000000 }, - { 0x00010040, 0x20b12288, 0x160000b1, 0x00010001 }, - { 0x00000001, 0x23580608, 0x00000000, 0x00000000 }, - { 0x00110001, 0x23580609, 0x00000000, 0x04000000 }, - { 0x01000010, 0x20001240, 0x160000ae, 0x00010001 }, - { 0x00110001, 0x23580608, 0x00000000, 0x00000000 }, - { 0x00000040, 0x23580208, 0x22000358, 0x000000b6 }, - { 0x00000005, 0x234c0208, 0x0e000200, 0x0000ffff }, - { 0x00000040, 0x234c0208, 0x0600034c, 0x000e0000 }, - { 0x00000001, 0x235c0208, 0x00000204, 0x00000000 }, - { 0x00000001, 0x23600208, 0x00000208, 0x00000000 }, - { 0x00000005, 0x23640208, 0x0600020c, 0x000000fc }, - { 0x00600001, 0x28000208, 0x008d0140, 0x00000000 }, - { 0x00600001, 0x28200208, 0x008d0340, 0x00000000 }, - { 0x00600001, 0x28400208, 0x008d0360, 0x00000000 }, - { 0x00000040, 0x21080208, 0x06000108, 0x0000000a }, - { 0x0a800031, 0x20000a60, 0x06000800, 0x060a0302 }, - { 0x00000040, 0x21480208, 0x06000148, 0x00000004 }, - { 0x00000040, 0x21e00208, 0x060001e0, 0x00000001 }, - { 0x01000040, 0x20ae1a68, 0x1e0000ae, 0xffffffff }, - { 0x00110020, 0x34000000, 0x0e001400, 0xfffffcd0 }, - { 0x00010020, 0x34000001, 0x0e001400, 0x000000f0 }, - { 0x00600001, 0x28000208, 0x008d0120, 0x00000000 }, - { 0x0a800031, 0x22000a48, 0x06000800, 0x02180001 }, - { 0x00600001, 0x28000208, 0x008d0140, 0x00000000 }, - { 0x00600001, 0x28200208, 0x008d0200, 0x00000000 }, - { 0x0a800031, 0x20000a60, 0x06000800, 0x040a0002 }, - { 0x00000040, 0x21280208, 0x06000128, 0x00000001 }, - { 0x00000040, 0x21480208, 0x06000148, 0x00000001 }, - { 0x01000040, 0x20a81a68, 0x1e0000a8, 0xffffffff }, - { 0x00110020, 0x34000000, 0x0e001400, 0xffffff70 }, - { 0x01000005, 0x20001240, 0x160000ac, 0x00010001 }, - { 0x00010020, 0x34000000, 0x0e001400, 0x00000040 }, - { 0x00600001, 0x28000208, 0x008d0140, 0x00000000 }, - { 0x00400001, 0x28200608, 0x00000000, 0x00000000 }, - { 0x00000001, 0x28240608, 0x00000000, 0x05000000 }, - { 0x0a800031, 0x20000a60, 0x06000800, 0x040a0002 }, - { 0x00600001, 0x28000208, 0x008d0000, 0x00000000 }, - { 0x07800031, 0x24000a00, 0x06000800, 0x82000010 }, diff --git a/src/shaders/utils/mfc_batchbuffer_avc_intra.g9a b/src/shaders/utils/mfc_batchbuffer_avc_intra.g9a deleted file mode 100644 index 22d2ce75..00000000 --- a/src/shaders/utils/mfc_batchbuffer_avc_intra.g9a +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Xiang Haihao - * Li Zhong - */ - -#include "mfc_batchbuffer.inc" -#include "mfc_batchbuffer_head.asm" -#include "mfc_batchbuffer_avc_intra.asm" -#include "mfc_batchbuffer_tail.asm" -#include "end_thread.asm" - diff --git a/src/shaders/utils/mfc_batchbuffer_avc_intra.g9b b/src/shaders/utils/mfc_batchbuffer_avc_intra.g9b deleted file mode 100644 index 8f4d9167..00000000 --- a/src/shaders/utils/mfc_batchbuffer_avc_intra.g9b +++ /dev/null @@ -1,66 +0,0 @@ - { 0x00800001, 0x21000608, 0x00000000, 0x00000000 }, - { 0x00800001, 0x21400608, 0x00000000, 0x00000000 }, - { 0x00000001, 0x21342288, 0x00000014, 0x00000000 }, - { 0x00000001, 0x21280208, 0x000000a0, 0x00000000 }, - { 0x00000001, 0x21542288, 0x00000014, 0x00000000 }, - { 0x00000001, 0x21480208, 0x000000a4, 0x00000000 }, - { 0x01000005, 0x20001240, 0x160000ac, 0x00040004 }, - { 0x00010020, 0x34000000, 0x0e001400, 0x00000090 }, - { 0x00600001, 0x28000208, 0x008d0120, 0x00000000 }, - { 0x0a800031, 0x22000a48, 0x06000800, 0x02180001 }, - { 0x00600001, 0x28000208, 0x008d0140, 0x00000000 }, - { 0x00600001, 0x28200208, 0x008d0200, 0x00000000 }, - { 0x0a800031, 0x20000a60, 0x06000800, 0x040a0002 }, - { 0x00000040, 0x21280208, 0x06000128, 0x00000001 }, - { 0x00000040, 0x21480208, 0x06000148, 0x00000001 }, - { 0x01000040, 0x20aa1a68, 0x1e0000aa, 0xffffffff }, - { 0x00110020, 0x34000000, 0x0e001400, 0xffffff70 }, - { 0x00000041, 0x21081208, 0x220000b4, 0x000000b1 }, - { 0x00000040, 0x21080208, 0x22000108, 0x000000b0 }, - { 0x00800001, 0x23400608, 0x00000000, 0x00000000 }, - { 0x00000001, 0x23400608, 0x00000000, 0x71490009 }, - { 0x00000001, 0x23540608, 0x00000000, 0x000f000f }, - { 0x01000005, 0x20001241, 0x160000ac, 0x00020002 }, - { 0x00600001, 0x28000208, 0x008d0100, 0x00000000 }, - { 0x0a800031, 0x22000a48, 0x06000800, 0x02180000 }, - { 0x00000040, 0x23501208, 0x060000b0, 0xffff0000 }, - { 0x00000040, 0x20b02288, 0x160000b0, 0x00010001 }, - { 0x01000010, 0x20001240, 0x220000b4, 0x000000b0 }, - { 0x00010001, 0x20b01688, 0x10000000, 0x00000000 }, - { 0x00010040, 0x20b12288, 0x160000b1, 0x00010001 }, - { 0x00000001, 0x23580608, 0x00000000, 0x00000000 }, - { 0x00110001, 0x23580609, 0x00000000, 0x04000000 }, - { 0x01000010, 0x20001240, 0x160000ae, 0x00010001 }, - { 0x00110001, 0x23580608, 0x00000000, 0x00000000 }, - { 0x00000040, 0x23580208, 0x22000358, 0x000000b6 }, - { 0x00000005, 0x234c0208, 0x0e000200, 0x0000ffff }, - { 0x00000040, 0x234c0208, 0x0600034c, 0x000e0000 }, - { 0x00000001, 0x235c0208, 0x00000204, 0x00000000 }, - { 0x00000001, 0x23600208, 0x00000208, 0x00000000 }, - { 0x00000005, 0x23640208, 0x0600020c, 0x000000fc }, - { 0x00600001, 0x28000208, 0x008d0140, 0x00000000 }, - { 0x00600001, 0x28200208, 0x008d0340, 0x00000000 }, - { 0x00600001, 0x28400208, 0x008d0360, 0x00000000 }, - { 0x00000040, 0x21080208, 0x06000108, 0x00000001 }, - { 0x0a800031, 0x20000a60, 0x06000800, 0x060a0302 }, - { 0x00000040, 0x21480208, 0x06000148, 0x00000004 }, - { 0x01000040, 0x20ae1a68, 0x1e0000ae, 0xffffffff }, - { 0x00110020, 0x34000000, 0x0e001400, 0xfffffe70 }, - { 0x00010020, 0x34000001, 0x0e001400, 0x000000f0 }, - { 0x00600001, 0x28000208, 0x008d0120, 0x00000000 }, - { 0x0a800031, 0x22000a48, 0x06000800, 0x02180001 }, - { 0x00600001, 0x28000208, 0x008d0140, 0x00000000 }, - { 0x00600001, 0x28200208, 0x008d0200, 0x00000000 }, - { 0x0a800031, 0x20000a60, 0x06000800, 0x040a0002 }, - { 0x00000040, 0x21280208, 0x06000128, 0x00000001 }, - { 0x00000040, 0x21480208, 0x06000148, 0x00000001 }, - { 0x01000040, 0x20a81a68, 0x1e0000a8, 0xffffffff }, - { 0x00110020, 0x34000000, 0x0e001400, 0xffffff70 }, - { 0x01000005, 0x20001240, 0x160000ac, 0x00010001 }, - { 0x00010020, 0x34000000, 0x0e001400, 0x00000040 }, - { 0x00600001, 0x28000208, 0x008d0140, 0x00000000 }, - { 0x00400001, 0x28200608, 0x00000000, 0x00000000 }, - { 0x00000001, 0x28240608, 0x00000000, 0x05000000 }, - { 0x0a800031, 0x20000a60, 0x06000800, 0x040a0002 }, - { 0x00600001, 0x28000208, 0x008d0000, 0x00000000 }, - { 0x07800031, 0x24000a00, 0x06000800, 0x82000010 }, -- cgit v1.2.1 From ce6fc460d6047633cace3dc43df532708c1eb021 Mon Sep 17 00:00:00 2001 From: Pengfei Qu Date: Tue, 23 Aug 2016 10:06:43 +0800 Subject: HEVC ENC:fill the correct chroma intra mode Signed-off-by: Pengfei Qu (cherry picked from commit 7594542eabea59a92ca6c2aa6c55f8e8c58152d1) --- src/gen9_mfc_hevc.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/gen9_mfc_hevc.c b/src/gen9_mfc_hevc.c index 7435d2a0..05682ed5 100644 --- a/src/gen9_mfc_hevc.c +++ b/src/gen9_mfc_hevc.c @@ -1242,15 +1242,16 @@ gen9_hcpe_hevc_fill_indirect_cu_intra(VADriverContextP ctx, int cu_size = 1; int tu_size = 0x55; int tu_count = 4; + int chroma_mode_remap[4]={5,4,3,2}; if (!is_inter) inerpred_idc = 0xff; intraMbMode = (msg[0] & AVC_INTRA_MODE_MASK) >> 4; - + intra_chroma_mode = (msg[3] & 0x3); + intra_chroma_mode = chroma_mode_remap[intra_chroma_mode]; if (intraMbMode == AVC_INTRA_16X16) { cu_part_mode = 0; //2Nx2N - intra_chroma_mode = 5; cu_size = 1; tu_size = 0x55; tu_count = 4; @@ -1260,7 +1261,6 @@ gen9_hcpe_hevc_fill_indirect_cu_intra(VADriverContextP ctx, intraMode[3] = intra_mode_16x16_avc2hevc[msg[1] & 0xf]; } else if (intraMbMode == AVC_INTRA_8X8) { cu_part_mode = 0; //2Nx2N - intra_chroma_mode = 5; cu_size = 0; tu_size = 0; tu_count = 4; @@ -1271,7 +1271,6 @@ gen9_hcpe_hevc_fill_indirect_cu_intra(VADriverContextP ctx, } else { // for 4x4 to use 8x8 replace cu_part_mode = 3; //NxN - intra_chroma_mode = 0; cu_size = 0; tu_size = 0; tu_count = 4; -- cgit v1.2.1 From be8f4d39d393baca0b19e7b8097079d2e703c8fb Mon Sep 17 00:00:00 2001 From: Pengfei Qu Date: Tue, 23 Aug 2016 10:07:46 +0800 Subject: HEVC ENC:set the initial QP threshold to avoid the low quality in the first GOP Signed-off-by: Pengfei Qu (cherry picked from commit 76bedafc8141224c882281c7bf7c1cd77c3c128c) --- src/gen9_mfc_hevc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gen9_mfc_hevc.c b/src/gen9_mfc_hevc.c index 05682ed5..b3ee327e 100644 --- a/src/gen9_mfc_hevc.c +++ b/src/gen9_mfc_hevc.c @@ -2333,9 +2333,9 @@ static void intel_hcpe_brc_init(struct encode_state *encode_state, mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY; mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY = mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY; - BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY, 1, 51); - BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY, 1, 51); - BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY, 1, 51); + BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY, 1, 36); + BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY, 1, 40); + BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY, 1, 45); } int intel_hcpe_update_hrd(struct encode_state *encode_state, -- cgit v1.2.1 From ce444fb412966ca6afbb1331b7cae8ab621c1108 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 22 Aug 2016 12:57:41 +0800 Subject: Update NEWS Signed-off-by: Xiang, Haihao --- NEWS | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 6a78f56a..19db5c0e 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,24 @@ -libva-intel-driver NEWS -- summary of changes. 2016-06-21 +libva-intel-driver NEWS -- summary of changes. 2016-09-xx Copyright (C) 2009-2016 Intel Corporation +Version 1.7.2 - DD.Sep.2016 +* Update PCI IDs for KBL +* Allow up to 8K JPEG decoding/encoding on SKL+ +* Add support for ROI on IVB+ +* Support I420/YV12 input surface for VP9 encoding +* Fix assertion failure when decoding stream through VLC + (https://bugs.freedesktop.org/show_bug.cgi?id=94007) +* Fix image corruption in ColorBalance and STDE on BDW+ + (https://bugs.freedesktop.org/show_bug.cgi?id=95349) +* Fix run2run issue in H.264 encoder + (https://bugs.freedesktop.org/show_bug.cgi?id=96703) +* Fix video rendering corruption when using VAAPI postproc denoise on 1080p videos + (https://bugs.freedesktop.org/show_bug.cgi?id=96739) +* Fix image corruption in ColorBalance with hue=-180 + (https://bugs.freedesktop.org/show_bug.cgi?id=96744) +* Fix memory leak in VP8 encoding + (https://bugs.freedesktop.org/show_bug.cgi?id=97272) + Version 1.7.1 - 21.Jun.2016 * Add support VP9 8bit encoding on KBL * Add support for low-power/high-performance H.264 encoder on SKL -- cgit v1.2.1 From 32c6a553cb40105bada83f3695b9f8ed615f2134 Mon Sep 17 00:00:00 2001 From: "Xiang, Haihao" Date: Mon, 5 Sep 2016 10:36:48 +0800 Subject: libva-intel-driver 1.7.2 Signed-off-by: Xiang, Haihao --- NEWS | 4 ++-- configure.ac | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/NEWS b/NEWS index 19db5c0e..65a27232 100644 --- a/NEWS +++ b/NEWS @@ -1,7 +1,7 @@ -libva-intel-driver NEWS -- summary of changes. 2016-09-xx +libva-intel-driver NEWS -- summary of changes. 2016-09-05 Copyright (C) 2009-2016 Intel Corporation -Version 1.7.2 - DD.Sep.2016 +Version 1.7.2 - 05.Sep.2016 * Update PCI IDs for KBL * Allow up to 8K JPEG decoding/encoding on SKL+ * Add support for ROI on IVB+ diff --git a/configure.ac b/configure.ac index 9923dfee..7df7825f 100644 --- a/configure.ac +++ b/configure.ac @@ -2,7 +2,7 @@ m4_define([intel_driver_major_version], [1]) m4_define([intel_driver_minor_version], [7]) m4_define([intel_driver_micro_version], [2]) -m4_define([intel_driver_pre_version], [1]) +m4_define([intel_driver_pre_version], [0]) m4_define([intel_driver_version], [intel_driver_major_version.intel_driver_minor_version.intel_driver_micro_version]) m4_if(intel_driver_pre_version, [0], [], [ -- cgit v1.2.1