summaryrefslogtreecommitdiff
path: root/src/gen9_mfc.c
diff options
context:
space:
mode:
authorZhong Li <zhong.li@intel.com>2015-01-15 22:04:54 +0800
committerXiang, Haihao <haihao.xiang@intel.com>2015-03-19 10:01:29 +0800
commitd954c80771a83d511d950aec0a8fbe0bd15f4957 (patch)
treeafd6031361353f789126499f07ccbd14efa834d3 /src/gen9_mfc.c
parent4f0df22066c00528f463a0eb6f5c0b6f95e09b05 (diff)
downloadlibva-intel-driver-d954c80771a83d511d950aec0a8fbe0bd15f4957.tar.gz
VP8 HWEnc: Build VP8 PAK pipeline and enabling I frame
Signed-off-by: Zhong Li <zhong.li@intel.com> (cherry picked from commit a18ce4664113d5b1a9b29ed45bf137df6b7a7898) Conflicts: src/i965_encoder_utils.c
Diffstat (limited to 'src/gen9_mfc.c')
-rw-r--r--src/gen9_mfc.c790
1 files changed, 786 insertions, 4 deletions
diff --git a/src/gen9_mfc.c b/src/gen9_mfc.c
index 532695a2..98a2c2ed 100644
--- a/src/gen9_mfc.c
+++ b/src/gen9_mfc.c
@@ -42,6 +42,7 @@
#include "gen6_mfc.h"
#include "gen6_vme.h"
#include "intel_media.h"
+#include "vp8_probs.h"
#define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN8
#define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index)
@@ -87,7 +88,6 @@ static struct i965_kernel gen9_mfc_kernels[] = {
#define INTER_MV8 (4 << 20)
#define INTER_MV32 (6 << 20)
-
static void
gen9_mfc_pipe_mode_select(VADriverContextP ctx,
int standard_select,
@@ -97,7 +97,8 @@ gen9_mfc_pipe_mode_select(VADriverContextP ctx,
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
assert(standard_select == MFX_FORMAT_MPEG2 ||
- standard_select == MFX_FORMAT_AVC);
+ standard_select == MFX_FORMAT_AVC ||
+ standard_select == MFX_FORMAT_VP8);
BEGIN_BCS_BATCH(batch, 5);
@@ -108,6 +109,7 @@ gen9_mfc_pipe_mode_select(VADriverContextP ctx,
(0 << 10) | /* Stream-Out Enable */
((!!mfc_context->post_deblocking_output.bo) << 9) | /* Post Deblocking Output */
((!!mfc_context->pre_deblocking_output.bo) << 8) | /* Pre Deblocking Output */
+ (0 << 6) | /* frame statistics stream-out enable*/
(0 << 5) | /* not in stitch mode */
(1 << 4) | /* encoding mode */
(standard_select << 0)); /* standard select: avc or mpeg2 */
@@ -171,9 +173,18 @@ gen9_mfc_ind_obj_base_addr_state(VADriverContextP ctx,
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
+
/* the DW4-5 is the MFX upper bound */
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
+ if (encoder_context->codec == CODEC_VP8) {
+ OUT_BCS_RELOC(batch,
+ mfc_context->mfc_indirect_pak_bse_object.bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ mfc_context->mfc_indirect_pak_bse_object.end_offset);
+ OUT_BCS_BATCH(batch, 0);
+ } else {
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ }
vme_size = vme_context->vme_output.size_block * vme_context->vme_output.num_blocks;
/* the DW6-10 is for MFX Indirect MV Object Base Address */
@@ -2353,6 +2364,752 @@ gen9_mfc_mpeg2_encode_picture(VADriverContextP ctx,
return VA_STATUS_SUCCESS;
}
+static void vp8_enc_state_init(struct gen6_mfc_context *mfc_context,
+ VAEncPictureParameterBufferVP8 *pic_param,
+ VAQMatrixBufferVP8 *q_matrix)
+{
+
+ int is_key_frame = !pic_param->pic_flags.bits.frame_type;
+ unsigned char *coeff_probs_stream_in_buffer;
+
+ mfc_context->vp8_state.frame_header_lf_update_pos = 0;
+ mfc_context->vp8_state.frame_header_qindex_update_pos = 0;
+ mfc_context->vp8_state.frame_header_token_update_pos = 0;
+ mfc_context->vp8_state.frame_header_bin_mv_upate_pos = 0;
+
+ mfc_context->vp8_state.prob_skip_false = 255;
+ memset(mfc_context->vp8_state.mb_segment_tree_probs, 0, sizeof(mfc_context->vp8_state.mb_segment_tree_probs));
+ memcpy(mfc_context->vp8_state.mv_probs, vp8_default_mv_context, sizeof(mfc_context->vp8_state.mv_probs));
+
+ if (is_key_frame) {
+ memcpy(mfc_context->vp8_state.y_mode_probs, vp8_kf_ymode_prob, sizeof(mfc_context->vp8_state.y_mode_probs));
+ memcpy(mfc_context->vp8_state.uv_mode_probs, vp8_kf_uv_mode_prob, sizeof(mfc_context->vp8_state.uv_mode_probs));
+
+ mfc_context->vp8_state.prob_intra = 255;
+ mfc_context->vp8_state.prob_last = 128;
+ mfc_context->vp8_state.prob_gf = 128;
+ } else {
+ memcpy(mfc_context->vp8_state.y_mode_probs, vp8_ymode_prob, sizeof(mfc_context->vp8_state.y_mode_probs));
+ memcpy(mfc_context->vp8_state.uv_mode_probs, vp8_uv_mode_prob, sizeof(mfc_context->vp8_state.uv_mode_probs));
+
+ mfc_context->vp8_state.prob_intra = 63;
+ mfc_context->vp8_state.prob_last = 128;
+ mfc_context->vp8_state.prob_gf = 128;
+ }
+
+ mfc_context->vp8_state.prob_skip_false = vp8_base_skip_false_prob[q_matrix->quantization_index[0]];
+
+ dri_bo_map(mfc_context->vp8_state.coeff_probs_stream_in_bo, 1);
+ coeff_probs_stream_in_buffer = (unsigned char *)mfc_context->vp8_state.coeff_probs_stream_in_bo->virtual;
+ assert(coeff_probs_stream_in_buffer);
+ memcpy(coeff_probs_stream_in_buffer, vp8_default_coef_probs, sizeof(vp8_default_coef_probs));
+ dri_bo_unmap(mfc_context->vp8_state.coeff_probs_stream_in_bo);
+}
+
+static void vp8_enc_state_update(struct gen6_mfc_context *mfc_context,
+ VAQMatrixBufferVP8 *q_matrix)
+{
+
+ /*some other probabilities need to be updated*/
+}
+
+extern void binarize_vp8_frame_header(VAEncSequenceParameterBufferVP8 *seq_param,
+ VAEncPictureParameterBufferVP8 *pic_param,
+ VAQMatrixBufferVP8 *q_matrix,
+ struct gen6_mfc_context *mfc_context);
+
+static void vp8_enc_frame_header_binarize(struct encode_state *encode_state,
+ struct gen6_mfc_context *mfc_context)
+{
+ VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
+ VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
+ VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
+ unsigned char *frame_header_buffer;
+
+ binarize_vp8_frame_header(seq_param, pic_param, q_matrix, mfc_context);
+
+ dri_bo_map(mfc_context->vp8_state.frame_header_bo, 1);
+ frame_header_buffer = (unsigned char *)mfc_context->vp8_state.frame_header_bo->virtual;
+ assert(frame_header_buffer);
+ memcpy(frame_header_buffer, mfc_context->vp8_state.vp8_frame_header, (mfc_context->vp8_state.frame_header_bit_count + 7) / 8);
+ dri_bo_unmap(mfc_context->vp8_state.frame_header_bo);
+}
+
+#define MAX_VP8_FRAME_HEADER_SIZE 0x2000
+#define VP8_TOKEN_STATISTICS_BUFFER_SIZE 0x2000
+
+static void gen9_mfc_vp8_init(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ dri_bo *bo;
+ int i;
+ int width_in_mbs = 0;
+ int height_in_mbs = 0;
+ int slice_batchbuffer_size;
+
+ VAEncSequenceParameterBufferVP8 *pSequenceParameter = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
+ VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
+ VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
+
+ width_in_mbs = ALIGN(pSequenceParameter->frame_height, 16) / 16;
+ height_in_mbs = ALIGN(pSequenceParameter->frame_height, 16) / 16;
+
+ slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
+ (SLICE_HEADER + SLICE_TAIL);
+
+ /*Encode common setup for MFC*/
+ dri_bo_unreference(mfc_context->post_deblocking_output.bo);
+ mfc_context->post_deblocking_output.bo = NULL;
+
+ dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
+ mfc_context->pre_deblocking_output.bo = NULL;
+
+ dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
+ mfc_context->uncompressed_picture_source.bo = NULL;
+
+ dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
+ mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
+
+ for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
+ if ( mfc_context->direct_mv_buffers[i].bo != NULL)
+ dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
+ mfc_context->direct_mv_buffers[i].bo = NULL;
+ }
+
+ for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
+ if (mfc_context->reference_surfaces[i].bo != NULL)
+ dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
+ mfc_context->reference_surfaces[i].bo = NULL;
+ }
+
+ dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "Buffer",
+ width_in_mbs * 64,
+ 64);
+ assert(bo);
+ mfc_context->intra_row_store_scratch_buffer.bo = bo;
+
+ dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "Buffer",
+ width_in_mbs * height_in_mbs * 16,
+ 64);
+ assert(bo);
+ mfc_context->macroblock_status_buffer.bo = bo;
+
+ dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "Buffer",
+ 4 * width_in_mbs * 64, /* 4 * width_in_mbs * 64 */
+ 64);
+ assert(bo);
+ mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
+
+ dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "Buffer",
+ 2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
+ 0x1000);
+ assert(bo);
+ mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
+
+ dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
+ mfc_context->mfc_batchbuffer_surface.bo = NULL;
+
+ dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
+ mfc_context->aux_batchbuffer_surface.bo = NULL;
+
+ if (mfc_context->aux_batchbuffer)
+ intel_batchbuffer_free(mfc_context->aux_batchbuffer);
+
+ mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
+ mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
+ dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
+ mfc_context->aux_batchbuffer_surface.pitch = 16;
+ mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
+ mfc_context->aux_batchbuffer_surface.size_block = 16;
+
+ i965_gpe_context_init(ctx, &mfc_context->gpe_context);
+
+ /* alloc vp8 encoding buffers*/
+ dri_bo_unreference(mfc_context->vp8_state.frame_header_bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "Buffer",
+ MAX_VP8_FRAME_HEADER_SIZE,
+ 0x1000);
+ assert(bo);
+ mfc_context->vp8_state.frame_header_bo = bo;
+
+ mfc_context->vp8_state.intermediate_buffer_max_size = width_in_mbs * height_in_mbs * 256 * 9;
+ for(i = 0; i < 8; i++) {
+ mfc_context->vp8_state.intermediate_partition_offset[i] = width_in_mbs * height_in_mbs * 256 * (i + 1);
+ }
+ dri_bo_unreference(mfc_context->vp8_state.intermediate_bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "Buffer",
+ mfc_context->vp8_state.intermediate_buffer_max_size,
+ 0x1000);
+ assert(bo);
+ mfc_context->vp8_state.intermediate_bo = bo;
+
+ dri_bo_unreference(mfc_context->vp8_state.stream_out_bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "Buffer",
+ width_in_mbs * height_in_mbs * 16,
+ 0x1000);
+ assert(bo);
+ mfc_context->vp8_state.stream_out_bo = bo;
+
+ dri_bo_unreference(mfc_context->vp8_state.coeff_probs_stream_in_bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "Buffer",
+ sizeof(vp8_default_coef_probs),
+ 0x1000);
+ assert(bo);
+ mfc_context->vp8_state.coeff_probs_stream_in_bo = bo;
+
+ dri_bo_unreference(mfc_context->vp8_state.token_statistics_bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "Buffer",
+ VP8_TOKEN_STATISTICS_BUFFER_SIZE,
+ 0x1000);
+ assert(bo);
+ mfc_context->vp8_state.token_statistics_bo = bo;
+
+ dri_bo_unreference(mfc_context->vp8_state.mpc_row_store_bo);
+ bo = dri_bo_alloc(i965->intel.bufmgr,
+ "Buffer",
+ width_in_mbs * 16 * 64,
+ 0x1000);
+ assert(bo);
+ mfc_context->vp8_state.mpc_row_store_bo = bo;
+
+ vp8_enc_state_init(mfc_context, pic_param, q_matrix);
+ vp8_enc_frame_header_binarize(encode_state, mfc_context);
+}
+
+static VAStatus
+intel_mfc_vp8_prepare(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ struct object_surface *obj_surface;
+ struct object_buffer *obj_buffer;
+ struct i965_coded_buffer_segment *coded_buffer_segment;
+ VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
+ VAStatus vaStatus = VA_STATUS_SUCCESS;
+ dri_bo *bo;
+ int i;
+
+ /* reconstructed surface */
+ obj_surface = encode_state->reconstructed_object;
+ i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
+ if (pic_param->loop_filter_level[0] == 0) {
+ mfc_context->pre_deblocking_output.bo = obj_surface->bo;
+ dri_bo_reference(mfc_context->pre_deblocking_output.bo);
+ } else {
+ mfc_context->post_deblocking_output.bo = obj_surface->bo;
+ dri_bo_reference(mfc_context->post_deblocking_output.bo);
+ }
+
+ mfc_context->surface_state.width = obj_surface->orig_width;
+ mfc_context->surface_state.height = obj_surface->orig_height;
+ mfc_context->surface_state.w_pitch = obj_surface->width;
+ mfc_context->surface_state.h_pitch = obj_surface->height;
+
+ /* forward reference */
+ obj_surface = encode_state->reference_objects[0];
+
+ if (obj_surface && obj_surface->bo) {
+ mfc_context->reference_surfaces[0].bo = obj_surface->bo;
+ dri_bo_reference(mfc_context->reference_surfaces[0].bo);
+ } else
+ mfc_context->reference_surfaces[0].bo = NULL;
+
+ /* backward reference */
+ obj_surface = encode_state->reference_objects[1];
+
+ if (obj_surface && obj_surface->bo) {
+ mfc_context->reference_surfaces[1].bo = obj_surface->bo;
+ dri_bo_reference(mfc_context->reference_surfaces[1].bo);
+ } else {
+ mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
+
+ if (mfc_context->reference_surfaces[1].bo)
+ dri_bo_reference(mfc_context->reference_surfaces[1].bo);
+ }
+
+ for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
+ mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
+
+ if (mfc_context->reference_surfaces[i].bo)
+ dri_bo_reference(mfc_context->reference_surfaces[i].bo);
+ }
+
+ /* input YUV surface */
+ obj_surface = encode_state->input_yuv_object;
+ mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
+ dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
+
+ /* coded buffer */
+ obj_buffer = encode_state->coded_buf_object;
+ bo = obj_buffer->buffer_store->bo;
+ mfc_context->mfc_indirect_pak_bse_object.bo = bo;
+ mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
+ mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
+ dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
+
+ dri_bo_unreference(mfc_context->vp8_state.final_frame_bo);
+ mfc_context->vp8_state.final_frame_bo = mfc_context->mfc_indirect_pak_bse_object.bo;
+ mfc_context->vp8_state.final_frame_byte_offset = I965_CODEDBUFFER_HEADER_SIZE;
+ dri_bo_reference(mfc_context->vp8_state.final_frame_bo);
+
+ /* set the internal flag to 0 to indicate the coded size is unknown */
+ dri_bo_map(bo, 1);
+ coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
+ coded_buffer_segment->mapped = 0;
+ coded_buffer_segment->codec = encoder_context->codec;
+ dri_bo_unmap(bo);
+
+ return vaStatus;
+}
+
+static void
+gen9_mfc_vp8_encoder_cfg(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
+ VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
+
+ BEGIN_BCS_BATCH(batch, 30);
+ OUT_BCS_BATCH(batch, MFX_VP8_ENCODER_CFG | (30 - 2)); /* SKL should be 31-2 ? */
+
+ OUT_BCS_BATCH(batch,
+ 0 << 9 | /* compressed bitstream output disable */
+ 1 << 7 | /* disable per-segment delta qindex and loop filter in RC */
+ 0 << 6 | /* RC initial pass */
+ 0 << 4 | /* upate segment feature date flag */
+ 1 << 3 | /* bitstream statistics output enable */
+ 1 << 2 | /* token statistics output enable */
+ 0 << 1 | /* final bitstream output disable */
+ 0 << 0); /*DW1*/
+
+ OUT_BCS_BATCH(batch, 0); /*DW2*/
+
+ OUT_BCS_BATCH(batch,
+ 0xfff << 16 | /* max intra mb bit count limit */
+ 0xfff << 0 /* max inter mb bit count limit */
+ ); /*DW3*/
+
+ OUT_BCS_BATCH(batch, 0); /*DW4*/
+ OUT_BCS_BATCH(batch, 0); /*DW5*/
+ OUT_BCS_BATCH(batch, 0); /*DW6*/
+ OUT_BCS_BATCH(batch, 0); /*DW7*/
+ OUT_BCS_BATCH(batch, 0); /*DW8*/
+ OUT_BCS_BATCH(batch, 0); /*DW9*/
+ OUT_BCS_BATCH(batch, 0); /*DW10*/
+ OUT_BCS_BATCH(batch, 0); /*DW11*/
+ OUT_BCS_BATCH(batch, 0); /*DW12*/
+ OUT_BCS_BATCH(batch, 0); /*DW13*/
+ OUT_BCS_BATCH(batch, 0); /*DW14*/
+ OUT_BCS_BATCH(batch, 0); /*DW15*/
+ OUT_BCS_BATCH(batch, 0); /*DW16*/
+ OUT_BCS_BATCH(batch, 0); /*DW17*/
+ OUT_BCS_BATCH(batch, 0); /*DW18*/
+ OUT_BCS_BATCH(batch, 0); /*DW19*/
+ OUT_BCS_BATCH(batch, 0); /*DW20*/
+ OUT_BCS_BATCH(batch, 0); /*DW21*/
+
+ OUT_BCS_BATCH(batch,
+ pic_param->pic_flags.bits.show_frame << 23 |
+ pic_param->pic_flags.bits.version << 20
+ ); /*DW22*/
+
+ OUT_BCS_BATCH(batch,
+ (seq_param->frame_height_scale << 14 | seq_param->frame_height) << 16 |
+ (seq_param->frame_width_scale << 14 | seq_param->frame_width) << 0
+ );
+
+ /*DW24*/
+ OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_bit_count); /* frame header bit count */
+
+ /*DW25*/
+ OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_qindex_update_pos); /* frame header bin buffer qindex update pointer */
+
+ /*DW26*/
+ OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_lf_update_pos); /* frame header bin buffer loop filter update pointer*/
+
+ /*DW27*/
+ OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_token_update_pos); /* frame header bin buffer token update pointer */
+
+ /*DW28*/
+ OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_bin_mv_upate_pos); /*frame header bin buffer mv update pointer */
+
+ /*DW29*/
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen9_mfc_vp8_pic_state(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
+ VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
+ VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
+ int i, j, log2num;
+
+ assert(pic_param->pic_flags.bits.num_token_partitions > 0);
+ assert(pic_param->pic_flags.bits.num_token_partitions < 9);
+ log2num = (int)log2(pic_param->pic_flags.bits.num_token_partitions);
+
+ /*update mode and token probs*/
+ vp8_enc_state_update(mfc_context, q_matrix);
+
+ BEGIN_BCS_BATCH(batch, 38);
+ OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
+ OUT_BCS_BATCH(batch,
+ (ALIGN(seq_param->frame_height, 16) / 16 - 1) << 16 |
+ (ALIGN(seq_param->frame_width, 16) / 16 - 1) << 0);
+
+ OUT_BCS_BATCH(batch,
+ log2num << 24 |
+ pic_param->sharpness_level << 16 |
+ pic_param->pic_flags.bits.sign_bias_alternate << 13 |
+ pic_param->pic_flags.bits.sign_bias_golden << 12 |
+ pic_param->pic_flags.bits.loop_filter_adj_enable << 11 |
+ pic_param->pic_flags.bits.mb_no_coeff_skip << 10 |
+ pic_param->pic_flags.bits.update_mb_segmentation_map << 9 |
+ pic_param->pic_flags.bits.segmentation_enabled << 8 |
+ !pic_param->pic_flags.bits.frame_type << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
+ (pic_param->pic_flags.bits.version / 2) << 4 |
+ (pic_param->pic_flags.bits.version == 3) << 1 | /* full pixel mode for version 3 */
+ !!pic_param->pic_flags.bits.version << 0); /* version 0: 6 tap */
+
+ OUT_BCS_BATCH(batch,
+ pic_param->loop_filter_level[3] << 24 |
+ pic_param->loop_filter_level[2] << 16 |
+ pic_param->loop_filter_level[1] << 8 |
+ pic_param->loop_filter_level[0] << 0);
+
+ OUT_BCS_BATCH(batch,
+ q_matrix->quantization_index[3] << 24 |
+ q_matrix->quantization_index[2] << 16 |
+ q_matrix->quantization_index[1] << 8 |
+ q_matrix->quantization_index[0] << 0);
+
+ OUT_BCS_BATCH(batch,
+ ((unsigned short)(q_matrix->quantization_index_delta[4]) >> 15) << 28 |
+ abs(q_matrix->quantization_index_delta[4]) << 24 |
+ ((unsigned short)(q_matrix->quantization_index_delta[3]) >> 15) << 20 |
+ abs(q_matrix->quantization_index_delta[3]) << 16 |
+ ((unsigned short)(q_matrix->quantization_index_delta[2]) >> 15) << 12 |
+ abs(q_matrix->quantization_index_delta[2]) << 8 |
+ ((unsigned short)(q_matrix->quantization_index_delta[1]) >> 15) << 4 |
+ abs(q_matrix->quantization_index_delta[1]) << 0);
+
+ OUT_BCS_BATCH(batch,
+ ((unsigned short)(q_matrix->quantization_index_delta[0]) >> 15) << 4 |
+ abs(q_matrix->quantization_index_delta[0]) << 0);
+
+ OUT_BCS_BATCH(batch,
+ pic_param->clamp_qindex_high << 8 |
+ pic_param->clamp_qindex_low << 0);
+
+ for (i = 8; i < 19; i++) {
+ OUT_BCS_BATCH(batch, 0xffffffff);
+ }
+
+ OUT_BCS_BATCH(batch,
+ mfc_context->vp8_state.mb_segment_tree_probs[2] << 16 |
+ mfc_context->vp8_state.mb_segment_tree_probs[1] << 8 |
+ mfc_context->vp8_state.mb_segment_tree_probs[0] << 0);
+
+ OUT_BCS_BATCH(batch,
+ mfc_context->vp8_state.prob_skip_false << 24 |
+ mfc_context->vp8_state.prob_intra << 16 |
+ mfc_context->vp8_state.prob_last << 8 |
+ mfc_context->vp8_state.prob_gf << 0);
+
+ OUT_BCS_BATCH(batch,
+ mfc_context->vp8_state.y_mode_probs[3] << 24 |
+ mfc_context->vp8_state.y_mode_probs[2] << 16 |
+ mfc_context->vp8_state.y_mode_probs[1] << 8 |
+ mfc_context->vp8_state.y_mode_probs[0] << 0);
+
+ OUT_BCS_BATCH(batch,
+ mfc_context->vp8_state.uv_mode_probs[2] << 16 |
+ mfc_context->vp8_state.uv_mode_probs[1] << 8 |
+ mfc_context->vp8_state.uv_mode_probs[0] << 0);
+
+ /* MV update value, DW23-DW32 */
+ for (i = 0; i < 2; i++) {
+ for (j = 0; j < 20; j += 4) {
+ OUT_BCS_BATCH(batch,
+ (j + 3 == 19 ? 0 : mfc_context->vp8_state.mv_probs[i][j + 3]) << 24 |
+ mfc_context->vp8_state.mv_probs[i][j + 2] << 16 |
+ mfc_context->vp8_state.mv_probs[i][j + 1] << 8 |
+ mfc_context->vp8_state.mv_probs[i][j + 0] << 0);
+ }
+ }
+
+ OUT_BCS_BATCH(batch,
+ (pic_param->ref_lf_delta[3] & 0x7f) << 24 |
+ (pic_param->ref_lf_delta[2] & 0x7f) << 16 |
+ (pic_param->ref_lf_delta[1] & 0x7f) << 8 |
+ (pic_param->ref_lf_delta[0] & 0x7f) << 0);
+
+ OUT_BCS_BATCH(batch,
+ (pic_param->mode_lf_delta[3] & 0x7f) << 24 |
+ (pic_param->mode_lf_delta[2] & 0x7f) << 16 |
+ (pic_param->mode_lf_delta[1] & 0x7f) << 8 |
+ (pic_param->mode_lf_delta[0] & 0x7f) << 0);
+
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+#define OUT_VP8_BUFFER(bo, offset) \
+ if (bo) \
+ OUT_BCS_RELOC(batch, \
+ bo, \
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, \
+ offset); \
+ else \
+ OUT_BCS_BATCH(batch, 0); \
+ OUT_BCS_BATCH(batch, 0); \
+ OUT_BCS_BATCH(batch, 0);
+
+static void
+gen9_mfc_vp8_bsp_buf_base_addr_state(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+
+ BEGIN_BCS_BATCH(batch, 32);
+ OUT_BCS_BATCH(batch, MFX_VP8_BSP_BUF_BASE_ADDR_STATE | (32 - 2));
+
+ OUT_VP8_BUFFER(mfc_context->vp8_state.frame_header_bo, 0);
+
+ OUT_VP8_BUFFER(mfc_context->vp8_state.intermediate_bo, 0);
+ OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[0]);
+ OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[1]);
+ OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[2]);
+ OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[3]);
+ OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[4]);
+ OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[5]);
+ OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[6]);
+ OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[7]);
+ OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_buffer_max_size);
+
+ OUT_VP8_BUFFER(mfc_context->vp8_state.final_frame_bo, I965_CODEDBUFFER_HEADER_SIZE);
+ OUT_BCS_BATCH(batch, 0);
+
+ OUT_VP8_BUFFER(mfc_context->vp8_state.stream_out_bo, 0);
+ OUT_VP8_BUFFER(mfc_context->vp8_state.coeff_probs_stream_in_bo, 0);
+ OUT_VP8_BUFFER(mfc_context->vp8_state.token_statistics_bo, 0);
+ OUT_VP8_BUFFER(mfc_context->vp8_state.mpc_row_store_bo, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen9_mfc_vp8_pipeline_picture_programing(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+
+ mfc_context->pipe_mode_select(ctx, MFX_FORMAT_VP8, encoder_context);
+ mfc_context->set_surface_state(ctx, encoder_context);
+ mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
+ gen9_mfc_pipe_buf_addr_state(ctx, encoder_context);
+ gen9_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
+ gen9_mfc_vp8_bsp_buf_base_addr_state(ctx, encode_state, encoder_context);
+ gen9_mfc_vp8_pic_state(ctx, encode_state,encoder_context);
+ gen9_mfc_vp8_encoder_cfg(ctx, encode_state, encoder_context);
+}
+
+static void
+gen9_mfc_vp8_pak_object_intra(VADriverContextP ctx,
+ struct intel_encoder_context *encoder_context,
+ unsigned int *msg,
+ int x, int y,
+ struct intel_batchbuffer *batch)
+{
+ if (batch == NULL)
+ batch = encoder_context->base.batch;
+
+ BEGIN_BCS_BATCH(batch, 7);
+
+ OUT_BCS_BATCH(batch, MFX_VP8_PAK_OBJECT | (7 - 2));
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch,
+ (0 << 20) | /* mv format: intra mb */
+ (0 << 18) | /* Segment ID */
+ (0 << 17) | /* disable coeff clamp */
+ (1 << 13) | /* intra mb flag */
+ (0 << 11) | /* refer picture select: last frame */
+ (0 << 8) | /* mb type: 16x16 intra mb */
+ (0 << 4) | /* mb uv mode: dc_pred */
+ (0 << 2) | /* skip mb flag: disable */
+ 0);
+
+ OUT_BCS_BATCH(batch, (y << 16) | x);
+ OUT_BCS_BATCH(batch, 0); /* y_mode: dc_pred */
+ OUT_BCS_BATCH(batch, 0);
+
+ ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen9_mfc_vp8_pak_object_inter(VADriverContextP ctx,
+ struct intel_encoder_context *encoder_context,
+ unsigned int *msg,
+ int x, int y,
+ struct intel_batchbuffer *batch)
+{
+ /* Add it later */
+}
+
+static void
+gen9_mfc_vp8_pak_pipeline(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ struct intel_batchbuffer *slice_batch)
+{
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+ VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
+ VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
+ int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
+ int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
+ unsigned int *msg = NULL;
+ unsigned char *msg_ptr = NULL;
+ unsigned int i, is_intra_frame;
+
+ is_intra_frame = !pic_param->pic_flags.bits.frame_type;
+
+ dri_bo_map(vme_context->vme_output.bo , 1);
+ msg = msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
+
+ for( i = 0; i < width_in_mbs * height_in_mbs; i++) {
+ int h_pos = i % width_in_mbs;
+ int v_pos = i / width_in_mbs;
+
+ if (is_intra_frame) {
+ gen9_mfc_vp8_pak_object_intra(ctx,
+ encoder_context,
+ msg,
+ h_pos, v_pos,
+ slice_batch);
+ } else {
+ gen9_mfc_vp8_pak_object_inter(ctx,
+ encoder_context,
+ msg,
+ h_pos, v_pos,
+ slice_batch);
+ }
+ }
+
+ dri_bo_unmap(vme_context->vme_output.bo);
+}
+
+/*
+ * A batch buffer for vp8 pak object commands
+ */
+static dri_bo *
+gen9_mfc_vp8_software_batchbuffer(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ struct intel_batchbuffer *batch;
+ dri_bo *batch_bo;
+
+ batch = mfc_context->aux_batchbuffer;
+ batch_bo = batch->buffer;
+
+ gen9_mfc_vp8_pak_pipeline(ctx, encode_state, encoder_context, batch);
+
+ intel_batchbuffer_align(batch, 8);
+
+ BEGIN_BCS_BATCH(batch, 2);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
+ ADVANCE_BCS_BATCH(batch);
+
+ dri_bo_reference(batch_bo);
+ intel_batchbuffer_free(batch);
+ mfc_context->aux_batchbuffer = NULL;
+
+ return batch_bo;
+}
+
+static void
+gen9_mfc_vp8_pipeline_programing(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct intel_batchbuffer *batch = encoder_context->base.batch;
+ dri_bo *slice_batch_bo;
+
+ slice_batch_bo = gen9_mfc_vp8_software_batchbuffer(ctx, encode_state, encoder_context);
+
+ // begin programing
+ intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
+ intel_batchbuffer_emit_mi_flush(batch);
+
+ // picture level programing
+ gen9_mfc_vp8_pipeline_picture_programing(ctx, encode_state, encoder_context);
+
+ BEGIN_BCS_BATCH(batch, 4);
+ OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
+ OUT_BCS_RELOC(batch,
+ slice_batch_bo,
+ I915_GEM_DOMAIN_COMMAND, 0,
+ 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ ADVANCE_BCS_BATCH(batch);
+
+ // end programing
+ intel_batchbuffer_end_atomic(batch);
+
+ dri_bo_unreference(slice_batch_bo);
+}
+
+static VAStatus
+gen9_mfc_vp8_encode_picture(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ gen9_mfc_vp8_init(ctx, encode_state, encoder_context);
+ intel_mfc_vp8_prepare(ctx, encode_state, encoder_context);
+ /*Programing bcs pipeline*/
+ gen9_mfc_vp8_pipeline_programing(ctx, encode_state, encoder_context);
+ gen9_mfc_run(ctx, encode_state, encoder_context);
+
+ return VA_STATUS_SUCCESS;
+}
+
static void
gen9_mfc_context_destroy(void *context)
{
@@ -2407,6 +3164,27 @@ gen9_mfc_context_destroy(void *context)
mfc_context->aux_batchbuffer = NULL;
+ dri_bo_unreference(mfc_context->vp8_state.coeff_probs_stream_in_bo);
+ mfc_context->vp8_state.coeff_probs_stream_in_bo = NULL;
+
+ dri_bo_unreference(mfc_context->vp8_state.final_frame_bo);
+ mfc_context->vp8_state.final_frame_bo = NULL;
+
+ dri_bo_unreference(mfc_context->vp8_state.frame_header_bo);
+ mfc_context->vp8_state.frame_header_bo = NULL;
+
+ dri_bo_unreference(mfc_context->vp8_state.intermediate_bo);
+ mfc_context->vp8_state.intermediate_bo = NULL;
+
+ dri_bo_unreference(mfc_context->vp8_state.mpc_row_store_bo);
+ mfc_context->vp8_state.mpc_row_store_bo = NULL;
+
+ dri_bo_unreference(mfc_context->vp8_state.stream_out_bo);
+ mfc_context->vp8_state.stream_out_bo = NULL;
+
+ dri_bo_unreference(mfc_context->vp8_state.token_statistics_bo);
+ mfc_context->vp8_state.token_statistics_bo = NULL;
+
free(mfc_context);
}
@@ -2432,6 +3210,10 @@ static VAStatus gen9_mfc_pipeline(VADriverContextP ctx,
vaStatus = gen9_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
break;
+ case VAProfileVP8Version0_3:
+ vaStatus = gen9_mfc_vp8_encode_picture(ctx, encode_state, encoder_context);
+ break;
+
default:
vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
break;