diff options
author | Zhong Li <zhong.li@intel.com> | 2015-01-15 22:04:54 +0800 |
---|---|---|
committer | Xiang, Haihao <haihao.xiang@intel.com> | 2015-03-19 10:01:29 +0800 |
commit | d954c80771a83d511d950aec0a8fbe0bd15f4957 (patch) | |
tree | afd6031361353f789126499f07ccbd14efa834d3 | |
parent | 4f0df22066c00528f463a0eb6f5c0b6f95e09b05 (diff) | |
download | libva-intel-driver-d954c80771a83d511d950aec0a8fbe0bd15f4957.tar.gz |
VP8 HWEnc: Build VP8 PAK pipeline and enabling I frame
Signed-off-by: Zhong Li <zhong.li@intel.com>
(cherry picked from commit a18ce4664113d5b1a9b29ed45bf137df6b7a7898)
Conflicts:
src/i965_encoder_utils.c
-rwxr-xr-x | src/Makefile.am | 1 | ||||
-rw-r--r-- | src/gen6_mfc.h | 32 | ||||
-rw-r--r-- | src/gen9_mfc.c | 790 | ||||
-rwxr-xr-x | src/i965_defines.h | 5 | ||||
-rw-r--r-- | src/i965_drv_video.c | 11 | ||||
-rw-r--r-- | src/i965_drv_video.h | 6 | ||||
-rw-r--r-- | src/i965_encoder.c | 73 | ||||
-rw-r--r-- | src/i965_encoder_utils.c | 141 | ||||
-rw-r--r-- | src/vp8_probs.h | 250 |
9 files changed, 1303 insertions, 6 deletions
diff --git a/src/Makefile.am b/src/Makefile.am index 7e755dc3..e021474b 100755 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -133,6 +133,7 @@ source_h = \ intel_memman.h \ intel_version.h \ object_heap.h \ + vp8_probs.h \ sysdeps.h \ va_backend_compat.h \ i965_fourcc.h \ diff --git a/src/gen6_mfc.h b/src/gen6_mfc.h index c0798637..191de329 100644 --- a/src/gen6_mfc.h +++ b/src/gen6_mfc.h @@ -34,6 +34,7 @@ #include <intel_bufmgr.h> #include "i965_gpe_utils.h" +#include "i965_encoder.h" struct encode_state; @@ -189,6 +190,37 @@ struct gen6_mfc_context int i_cpb_removal_delay_length; int i_dpb_output_delay_length; }vui_hrd; + + struct { + unsigned char *vp8_frame_header; + unsigned int frame_header_bit_count; + unsigned int frame_header_qindex_update_pos; + unsigned int frame_header_lf_update_pos; + unsigned int frame_header_token_update_pos; + unsigned int frame_header_bin_mv_upate_pos; + + unsigned int intermediate_partition_offset[8]; + unsigned int intermediate_buffer_max_size; + unsigned int final_frame_byte_offset; + + unsigned char mb_segment_tree_probs[3]; + unsigned char y_mode_probs[4]; + unsigned char uv_mode_probs[3]; + unsigned char mv_probs[2][19]; + + unsigned char prob_skip_false; + unsigned char prob_intra; + unsigned char prob_last; + unsigned char prob_gf; + + dri_bo *frame_header_bo; + dri_bo *intermediate_bo; + dri_bo *final_frame_bo; + dri_bo *stream_out_bo; + dri_bo *coeff_probs_stream_in_bo; + dri_bo *token_statistics_bo; + dri_bo *mpc_row_store_bo; + }vp8_state; //"buffered_QMatrix" will be used to buffer the QMatrix if the app sends one. // Or else, we will load a default QMatrix from the driver for JPEG encode. diff --git a/src/gen9_mfc.c b/src/gen9_mfc.c index 532695a2..98a2c2ed 100644 --- a/src/gen9_mfc.c +++ b/src/gen9_mfc.c @@ -42,6 +42,7 @@ #include "gen6_mfc.h" #include "gen6_vme.h" #include "intel_media.h" +#include "vp8_probs.h" #define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN8 #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) @@ -87,7 +88,6 @@ static struct i965_kernel gen9_mfc_kernels[] = { #define INTER_MV8 (4 << 20) #define INTER_MV32 (6 << 20) - static void gen9_mfc_pipe_mode_select(VADriverContextP ctx, int standard_select, @@ -97,7 +97,8 @@ gen9_mfc_pipe_mode_select(VADriverContextP ctx, struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; assert(standard_select == MFX_FORMAT_MPEG2 || - standard_select == MFX_FORMAT_AVC); + standard_select == MFX_FORMAT_AVC || + standard_select == MFX_FORMAT_VP8); BEGIN_BCS_BATCH(batch, 5); @@ -108,6 +109,7 @@ gen9_mfc_pipe_mode_select(VADriverContextP ctx, (0 << 10) | /* Stream-Out Enable */ ((!!mfc_context->post_deblocking_output.bo) << 9) | /* Post Deblocking Output */ ((!!mfc_context->pre_deblocking_output.bo) << 8) | /* Pre Deblocking Output */ + (0 << 6) | /* frame statistics stream-out enable*/ (0 << 5) | /* not in stitch mode */ (1 << 4) | /* encoding mode */ (standard_select << 0)); /* standard select: avc or mpeg2 */ @@ -171,9 +173,18 @@ gen9_mfc_ind_obj_base_addr_state(VADriverContextP ctx, OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); + /* the DW4-5 is the MFX upper bound */ - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); + if (encoder_context->codec == CODEC_VP8) { + OUT_BCS_RELOC(batch, + mfc_context->mfc_indirect_pak_bse_object.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + mfc_context->mfc_indirect_pak_bse_object.end_offset); + OUT_BCS_BATCH(batch, 0); + } else { + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + } vme_size = vme_context->vme_output.size_block * vme_context->vme_output.num_blocks; /* the DW6-10 is for MFX Indirect MV Object Base Address */ @@ -2353,6 +2364,752 @@ gen9_mfc_mpeg2_encode_picture(VADriverContextP ctx, return VA_STATUS_SUCCESS; } +static void vp8_enc_state_init(struct gen6_mfc_context *mfc_context, + VAEncPictureParameterBufferVP8 *pic_param, + VAQMatrixBufferVP8 *q_matrix) +{ + + int is_key_frame = !pic_param->pic_flags.bits.frame_type; + unsigned char *coeff_probs_stream_in_buffer; + + mfc_context->vp8_state.frame_header_lf_update_pos = 0; + mfc_context->vp8_state.frame_header_qindex_update_pos = 0; + mfc_context->vp8_state.frame_header_token_update_pos = 0; + mfc_context->vp8_state.frame_header_bin_mv_upate_pos = 0; + + mfc_context->vp8_state.prob_skip_false = 255; + memset(mfc_context->vp8_state.mb_segment_tree_probs, 0, sizeof(mfc_context->vp8_state.mb_segment_tree_probs)); + memcpy(mfc_context->vp8_state.mv_probs, vp8_default_mv_context, sizeof(mfc_context->vp8_state.mv_probs)); + + if (is_key_frame) { + memcpy(mfc_context->vp8_state.y_mode_probs, vp8_kf_ymode_prob, sizeof(mfc_context->vp8_state.y_mode_probs)); + memcpy(mfc_context->vp8_state.uv_mode_probs, vp8_kf_uv_mode_prob, sizeof(mfc_context->vp8_state.uv_mode_probs)); + + mfc_context->vp8_state.prob_intra = 255; + mfc_context->vp8_state.prob_last = 128; + mfc_context->vp8_state.prob_gf = 128; + } else { + memcpy(mfc_context->vp8_state.y_mode_probs, vp8_ymode_prob, sizeof(mfc_context->vp8_state.y_mode_probs)); + memcpy(mfc_context->vp8_state.uv_mode_probs, vp8_uv_mode_prob, sizeof(mfc_context->vp8_state.uv_mode_probs)); + + mfc_context->vp8_state.prob_intra = 63; + mfc_context->vp8_state.prob_last = 128; + mfc_context->vp8_state.prob_gf = 128; + } + + mfc_context->vp8_state.prob_skip_false = vp8_base_skip_false_prob[q_matrix->quantization_index[0]]; + + dri_bo_map(mfc_context->vp8_state.coeff_probs_stream_in_bo, 1); + coeff_probs_stream_in_buffer = (unsigned char *)mfc_context->vp8_state.coeff_probs_stream_in_bo->virtual; + assert(coeff_probs_stream_in_buffer); + memcpy(coeff_probs_stream_in_buffer, vp8_default_coef_probs, sizeof(vp8_default_coef_probs)); + dri_bo_unmap(mfc_context->vp8_state.coeff_probs_stream_in_bo); +} + +static void vp8_enc_state_update(struct gen6_mfc_context *mfc_context, + VAQMatrixBufferVP8 *q_matrix) +{ + + /*some other probabilities need to be updated*/ +} + +extern void binarize_vp8_frame_header(VAEncSequenceParameterBufferVP8 *seq_param, + VAEncPictureParameterBufferVP8 *pic_param, + VAQMatrixBufferVP8 *q_matrix, + struct gen6_mfc_context *mfc_context); + +static void vp8_enc_frame_header_binarize(struct encode_state *encode_state, + struct gen6_mfc_context *mfc_context) +{ + VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer; + VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer; + VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer; + unsigned char *frame_header_buffer; + + binarize_vp8_frame_header(seq_param, pic_param, q_matrix, mfc_context); + + dri_bo_map(mfc_context->vp8_state.frame_header_bo, 1); + frame_header_buffer = (unsigned char *)mfc_context->vp8_state.frame_header_bo->virtual; + assert(frame_header_buffer); + memcpy(frame_header_buffer, mfc_context->vp8_state.vp8_frame_header, (mfc_context->vp8_state.frame_header_bit_count + 7) / 8); + dri_bo_unmap(mfc_context->vp8_state.frame_header_bo); +} + +#define MAX_VP8_FRAME_HEADER_SIZE 0x2000 +#define VP8_TOKEN_STATISTICS_BUFFER_SIZE 0x2000 + +static void gen9_mfc_vp8_init(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + dri_bo *bo; + int i; + int width_in_mbs = 0; + int height_in_mbs = 0; + int slice_batchbuffer_size; + + VAEncSequenceParameterBufferVP8 *pSequenceParameter = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer; + VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer; + VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer; + + width_in_mbs = ALIGN(pSequenceParameter->frame_height, 16) / 16; + height_in_mbs = ALIGN(pSequenceParameter->frame_height, 16) / 16; + + slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 + + (SLICE_HEADER + SLICE_TAIL); + + /*Encode common setup for MFC*/ + dri_bo_unreference(mfc_context->post_deblocking_output.bo); + mfc_context->post_deblocking_output.bo = NULL; + + dri_bo_unreference(mfc_context->pre_deblocking_output.bo); + mfc_context->pre_deblocking_output.bo = NULL; + + dri_bo_unreference(mfc_context->uncompressed_picture_source.bo); + mfc_context->uncompressed_picture_source.bo = NULL; + + dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); + mfc_context->mfc_indirect_pak_bse_object.bo = NULL; + + for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){ + if ( mfc_context->direct_mv_buffers[i].bo != NULL) + dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo); + mfc_context->direct_mv_buffers[i].bo = NULL; + } + + for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){ + if (mfc_context->reference_surfaces[i].bo != NULL) + dri_bo_unreference(mfc_context->reference_surfaces[i].bo); + mfc_context->reference_surfaces[i].bo = NULL; + } + + dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "Buffer", + width_in_mbs * 64, + 64); + assert(bo); + mfc_context->intra_row_store_scratch_buffer.bo = bo; + + dri_bo_unreference(mfc_context->macroblock_status_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "Buffer", + width_in_mbs * height_in_mbs * 16, + 64); + assert(bo); + mfc_context->macroblock_status_buffer.bo = bo; + + dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "Buffer", + 4 * width_in_mbs * 64, /* 4 * width_in_mbs * 64 */ + 64); + assert(bo); + mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo; + + dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "Buffer", + 2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */ + 0x1000); + assert(bo); + mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo; + + dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo); + mfc_context->mfc_batchbuffer_surface.bo = NULL; + + dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo); + mfc_context->aux_batchbuffer_surface.bo = NULL; + + if (mfc_context->aux_batchbuffer) + intel_batchbuffer_free(mfc_context->aux_batchbuffer); + + mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size); + mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer; + dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo); + mfc_context->aux_batchbuffer_surface.pitch = 16; + mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16; + mfc_context->aux_batchbuffer_surface.size_block = 16; + + i965_gpe_context_init(ctx, &mfc_context->gpe_context); + + /* alloc vp8 encoding buffers*/ + dri_bo_unreference(mfc_context->vp8_state.frame_header_bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "Buffer", + MAX_VP8_FRAME_HEADER_SIZE, + 0x1000); + assert(bo); + mfc_context->vp8_state.frame_header_bo = bo; + + mfc_context->vp8_state.intermediate_buffer_max_size = width_in_mbs * height_in_mbs * 256 * 9; + for(i = 0; i < 8; i++) { + mfc_context->vp8_state.intermediate_partition_offset[i] = width_in_mbs * height_in_mbs * 256 * (i + 1); + } + dri_bo_unreference(mfc_context->vp8_state.intermediate_bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "Buffer", + mfc_context->vp8_state.intermediate_buffer_max_size, + 0x1000); + assert(bo); + mfc_context->vp8_state.intermediate_bo = bo; + + dri_bo_unreference(mfc_context->vp8_state.stream_out_bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "Buffer", + width_in_mbs * height_in_mbs * 16, + 0x1000); + assert(bo); + mfc_context->vp8_state.stream_out_bo = bo; + + dri_bo_unreference(mfc_context->vp8_state.coeff_probs_stream_in_bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "Buffer", + sizeof(vp8_default_coef_probs), + 0x1000); + assert(bo); + mfc_context->vp8_state.coeff_probs_stream_in_bo = bo; + + dri_bo_unreference(mfc_context->vp8_state.token_statistics_bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "Buffer", + VP8_TOKEN_STATISTICS_BUFFER_SIZE, + 0x1000); + assert(bo); + mfc_context->vp8_state.token_statistics_bo = bo; + + dri_bo_unreference(mfc_context->vp8_state.mpc_row_store_bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "Buffer", + width_in_mbs * 16 * 64, + 0x1000); + assert(bo); + mfc_context->vp8_state.mpc_row_store_bo = bo; + + vp8_enc_state_init(mfc_context, pic_param, q_matrix); + vp8_enc_frame_header_binarize(encode_state, mfc_context); +} + +static VAStatus +intel_mfc_vp8_prepare(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + struct object_surface *obj_surface; + struct object_buffer *obj_buffer; + struct i965_coded_buffer_segment *coded_buffer_segment; + VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer; + VAStatus vaStatus = VA_STATUS_SUCCESS; + dri_bo *bo; + int i; + + /* reconstructed surface */ + obj_surface = encode_state->reconstructed_object; + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + if (pic_param->loop_filter_level[0] == 0) { + mfc_context->pre_deblocking_output.bo = obj_surface->bo; + dri_bo_reference(mfc_context->pre_deblocking_output.bo); + } else { + mfc_context->post_deblocking_output.bo = obj_surface->bo; + dri_bo_reference(mfc_context->post_deblocking_output.bo); + } + + mfc_context->surface_state.width = obj_surface->orig_width; + mfc_context->surface_state.height = obj_surface->orig_height; + mfc_context->surface_state.w_pitch = obj_surface->width; + mfc_context->surface_state.h_pitch = obj_surface->height; + + /* forward reference */ + obj_surface = encode_state->reference_objects[0]; + + if (obj_surface && obj_surface->bo) { + mfc_context->reference_surfaces[0].bo = obj_surface->bo; + dri_bo_reference(mfc_context->reference_surfaces[0].bo); + } else + mfc_context->reference_surfaces[0].bo = NULL; + + /* backward reference */ + obj_surface = encode_state->reference_objects[1]; + + if (obj_surface && obj_surface->bo) { + mfc_context->reference_surfaces[1].bo = obj_surface->bo; + dri_bo_reference(mfc_context->reference_surfaces[1].bo); + } else { + mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo; + + if (mfc_context->reference_surfaces[1].bo) + dri_bo_reference(mfc_context->reference_surfaces[1].bo); + } + + for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) { + mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo; + + if (mfc_context->reference_surfaces[i].bo) + dri_bo_reference(mfc_context->reference_surfaces[i].bo); + } + + /* input YUV surface */ + obj_surface = encode_state->input_yuv_object; + mfc_context->uncompressed_picture_source.bo = obj_surface->bo; + dri_bo_reference(mfc_context->uncompressed_picture_source.bo); + + /* coded buffer */ + obj_buffer = encode_state->coded_buf_object; + bo = obj_buffer->buffer_store->bo; + mfc_context->mfc_indirect_pak_bse_object.bo = bo; + mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE; + mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000); + dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo); + + dri_bo_unreference(mfc_context->vp8_state.final_frame_bo); + mfc_context->vp8_state.final_frame_bo = mfc_context->mfc_indirect_pak_bse_object.bo; + mfc_context->vp8_state.final_frame_byte_offset = I965_CODEDBUFFER_HEADER_SIZE; + dri_bo_reference(mfc_context->vp8_state.final_frame_bo); + + /* set the internal flag to 0 to indicate the coded size is unknown */ + dri_bo_map(bo, 1); + coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual; + coded_buffer_segment->mapped = 0; + coded_buffer_segment->codec = encoder_context->codec; + dri_bo_unmap(bo); + + return vaStatus; +} + +static void +gen9_mfc_vp8_encoder_cfg(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer; + VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer; + + BEGIN_BCS_BATCH(batch, 30); + OUT_BCS_BATCH(batch, MFX_VP8_ENCODER_CFG | (30 - 2)); /* SKL should be 31-2 ? */ + + OUT_BCS_BATCH(batch, + 0 << 9 | /* compressed bitstream output disable */ + 1 << 7 | /* disable per-segment delta qindex and loop filter in RC */ + 0 << 6 | /* RC initial pass */ + 0 << 4 | /* upate segment feature date flag */ + 1 << 3 | /* bitstream statistics output enable */ + 1 << 2 | /* token statistics output enable */ + 0 << 1 | /* final bitstream output disable */ + 0 << 0); /*DW1*/ + + OUT_BCS_BATCH(batch, 0); /*DW2*/ + + OUT_BCS_BATCH(batch, + 0xfff << 16 | /* max intra mb bit count limit */ + 0xfff << 0 /* max inter mb bit count limit */ + ); /*DW3*/ + + OUT_BCS_BATCH(batch, 0); /*DW4*/ + OUT_BCS_BATCH(batch, 0); /*DW5*/ + OUT_BCS_BATCH(batch, 0); /*DW6*/ + OUT_BCS_BATCH(batch, 0); /*DW7*/ + OUT_BCS_BATCH(batch, 0); /*DW8*/ + OUT_BCS_BATCH(batch, 0); /*DW9*/ + OUT_BCS_BATCH(batch, 0); /*DW10*/ + OUT_BCS_BATCH(batch, 0); /*DW11*/ + OUT_BCS_BATCH(batch, 0); /*DW12*/ + OUT_BCS_BATCH(batch, 0); /*DW13*/ + OUT_BCS_BATCH(batch, 0); /*DW14*/ + OUT_BCS_BATCH(batch, 0); /*DW15*/ + OUT_BCS_BATCH(batch, 0); /*DW16*/ + OUT_BCS_BATCH(batch, 0); /*DW17*/ + OUT_BCS_BATCH(batch, 0); /*DW18*/ + OUT_BCS_BATCH(batch, 0); /*DW19*/ + OUT_BCS_BATCH(batch, 0); /*DW20*/ + OUT_BCS_BATCH(batch, 0); /*DW21*/ + + OUT_BCS_BATCH(batch, + pic_param->pic_flags.bits.show_frame << 23 | + pic_param->pic_flags.bits.version << 20 + ); /*DW22*/ + + OUT_BCS_BATCH(batch, + (seq_param->frame_height_scale << 14 | seq_param->frame_height) << 16 | + (seq_param->frame_width_scale << 14 | seq_param->frame_width) << 0 + ); + + /*DW24*/ + OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_bit_count); /* frame header bit count */ + + /*DW25*/ + OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_qindex_update_pos); /* frame header bin buffer qindex update pointer */ + + /*DW26*/ + OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_lf_update_pos); /* frame header bin buffer loop filter update pointer*/ + + /*DW27*/ + OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_token_update_pos); /* frame header bin buffer token update pointer */ + + /*DW28*/ + OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_bin_mv_upate_pos); /*frame header bin buffer mv update pointer */ + + /*DW29*/ + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); +} + +static void +gen9_mfc_vp8_pic_state(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer; + VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer; + VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer; + int i, j, log2num; + + assert(pic_param->pic_flags.bits.num_token_partitions > 0); + assert(pic_param->pic_flags.bits.num_token_partitions < 9); + log2num = (int)log2(pic_param->pic_flags.bits.num_token_partitions); + + /*update mode and token probs*/ + vp8_enc_state_update(mfc_context, q_matrix); + + BEGIN_BCS_BATCH(batch, 38); + OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2)); + OUT_BCS_BATCH(batch, + (ALIGN(seq_param->frame_height, 16) / 16 - 1) << 16 | + (ALIGN(seq_param->frame_width, 16) / 16 - 1) << 0); + + OUT_BCS_BATCH(batch, + log2num << 24 | + pic_param->sharpness_level << 16 | + pic_param->pic_flags.bits.sign_bias_alternate << 13 | + pic_param->pic_flags.bits.sign_bias_golden << 12 | + pic_param->pic_flags.bits.loop_filter_adj_enable << 11 | + pic_param->pic_flags.bits.mb_no_coeff_skip << 10 | + pic_param->pic_flags.bits.update_mb_segmentation_map << 9 | + pic_param->pic_flags.bits.segmentation_enabled << 8 | + !pic_param->pic_flags.bits.frame_type << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/ + (pic_param->pic_flags.bits.version / 2) << 4 | + (pic_param->pic_flags.bits.version == 3) << 1 | /* full pixel mode for version 3 */ + !!pic_param->pic_flags.bits.version << 0); /* version 0: 6 tap */ + + OUT_BCS_BATCH(batch, + pic_param->loop_filter_level[3] << 24 | + pic_param->loop_filter_level[2] << 16 | + pic_param->loop_filter_level[1] << 8 | + pic_param->loop_filter_level[0] << 0); + + OUT_BCS_BATCH(batch, + q_matrix->quantization_index[3] << 24 | + q_matrix->quantization_index[2] << 16 | + q_matrix->quantization_index[1] << 8 | + q_matrix->quantization_index[0] << 0); + + OUT_BCS_BATCH(batch, + ((unsigned short)(q_matrix->quantization_index_delta[4]) >> 15) << 28 | + abs(q_matrix->quantization_index_delta[4]) << 24 | + ((unsigned short)(q_matrix->quantization_index_delta[3]) >> 15) << 20 | + abs(q_matrix->quantization_index_delta[3]) << 16 | + ((unsigned short)(q_matrix->quantization_index_delta[2]) >> 15) << 12 | + abs(q_matrix->quantization_index_delta[2]) << 8 | + ((unsigned short)(q_matrix->quantization_index_delta[1]) >> 15) << 4 | + abs(q_matrix->quantization_index_delta[1]) << 0); + + OUT_BCS_BATCH(batch, + ((unsigned short)(q_matrix->quantization_index_delta[0]) >> 15) << 4 | + abs(q_matrix->quantization_index_delta[0]) << 0); + + OUT_BCS_BATCH(batch, + pic_param->clamp_qindex_high << 8 | + pic_param->clamp_qindex_low << 0); + + for (i = 8; i < 19; i++) { + OUT_BCS_BATCH(batch, 0xffffffff); + } + + OUT_BCS_BATCH(batch, + mfc_context->vp8_state.mb_segment_tree_probs[2] << 16 | + mfc_context->vp8_state.mb_segment_tree_probs[1] << 8 | + mfc_context->vp8_state.mb_segment_tree_probs[0] << 0); + + OUT_BCS_BATCH(batch, + mfc_context->vp8_state.prob_skip_false << 24 | + mfc_context->vp8_state.prob_intra << 16 | + mfc_context->vp8_state.prob_last << 8 | + mfc_context->vp8_state.prob_gf << 0); + + OUT_BCS_BATCH(batch, + mfc_context->vp8_state.y_mode_probs[3] << 24 | + mfc_context->vp8_state.y_mode_probs[2] << 16 | + mfc_context->vp8_state.y_mode_probs[1] << 8 | + mfc_context->vp8_state.y_mode_probs[0] << 0); + + OUT_BCS_BATCH(batch, + mfc_context->vp8_state.uv_mode_probs[2] << 16 | + mfc_context->vp8_state.uv_mode_probs[1] << 8 | + mfc_context->vp8_state.uv_mode_probs[0] << 0); + + /* MV update value, DW23-DW32 */ + for (i = 0; i < 2; i++) { + for (j = 0; j < 20; j += 4) { + OUT_BCS_BATCH(batch, + (j + 3 == 19 ? 0 : mfc_context->vp8_state.mv_probs[i][j + 3]) << 24 | + mfc_context->vp8_state.mv_probs[i][j + 2] << 16 | + mfc_context->vp8_state.mv_probs[i][j + 1] << 8 | + mfc_context->vp8_state.mv_probs[i][j + 0] << 0); + } + } + + OUT_BCS_BATCH(batch, + (pic_param->ref_lf_delta[3] & 0x7f) << 24 | + (pic_param->ref_lf_delta[2] & 0x7f) << 16 | + (pic_param->ref_lf_delta[1] & 0x7f) << 8 | + (pic_param->ref_lf_delta[0] & 0x7f) << 0); + + OUT_BCS_BATCH(batch, + (pic_param->mode_lf_delta[3] & 0x7f) << 24 | + (pic_param->mode_lf_delta[2] & 0x7f) << 16 | + (pic_param->mode_lf_delta[1] & 0x7f) << 8 | + (pic_param->mode_lf_delta[0] & 0x7f) << 0); + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); +} + +#define OUT_VP8_BUFFER(bo, offset) \ + if (bo) \ + OUT_BCS_RELOC(batch, \ + bo, \ + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, \ + offset); \ + else \ + OUT_BCS_BATCH(batch, 0); \ + OUT_BCS_BATCH(batch, 0); \ + OUT_BCS_BATCH(batch, 0); + +static void +gen9_mfc_vp8_bsp_buf_base_addr_state(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + + BEGIN_BCS_BATCH(batch, 32); + OUT_BCS_BATCH(batch, MFX_VP8_BSP_BUF_BASE_ADDR_STATE | (32 - 2)); + + OUT_VP8_BUFFER(mfc_context->vp8_state.frame_header_bo, 0); + + OUT_VP8_BUFFER(mfc_context->vp8_state.intermediate_bo, 0); + OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[0]); + OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[1]); + OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[2]); + OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[3]); + OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[4]); + OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[5]); + OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[6]); + OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[7]); + OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_buffer_max_size); + + OUT_VP8_BUFFER(mfc_context->vp8_state.final_frame_bo, I965_CODEDBUFFER_HEADER_SIZE); + OUT_BCS_BATCH(batch, 0); + + OUT_VP8_BUFFER(mfc_context->vp8_state.stream_out_bo, 0); + OUT_VP8_BUFFER(mfc_context->vp8_state.coeff_probs_stream_in_bo, 0); + OUT_VP8_BUFFER(mfc_context->vp8_state.token_statistics_bo, 0); + OUT_VP8_BUFFER(mfc_context->vp8_state.mpc_row_store_bo, 0); + + ADVANCE_BCS_BATCH(batch); +} + +static void +gen9_mfc_vp8_pipeline_picture_programing(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + + mfc_context->pipe_mode_select(ctx, MFX_FORMAT_VP8, encoder_context); + mfc_context->set_surface_state(ctx, encoder_context); + mfc_context->ind_obj_base_addr_state(ctx, encoder_context); + gen9_mfc_pipe_buf_addr_state(ctx, encoder_context); + gen9_mfc_bsp_buf_base_addr_state(ctx, encoder_context); + gen9_mfc_vp8_bsp_buf_base_addr_state(ctx, encode_state, encoder_context); + gen9_mfc_vp8_pic_state(ctx, encode_state,encoder_context); + gen9_mfc_vp8_encoder_cfg(ctx, encode_state, encoder_context); +} + +static void +gen9_mfc_vp8_pak_object_intra(VADriverContextP ctx, + struct intel_encoder_context *encoder_context, + unsigned int *msg, + int x, int y, + struct intel_batchbuffer *batch) +{ + if (batch == NULL) + batch = encoder_context->base.batch; + + BEGIN_BCS_BATCH(batch, 7); + + OUT_BCS_BATCH(batch, MFX_VP8_PAK_OBJECT | (7 - 2)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, + (0 << 20) | /* mv format: intra mb */ + (0 << 18) | /* Segment ID */ + (0 << 17) | /* disable coeff clamp */ + (1 << 13) | /* intra mb flag */ + (0 << 11) | /* refer picture select: last frame */ + (0 << 8) | /* mb type: 16x16 intra mb */ + (0 << 4) | /* mb uv mode: dc_pred */ + (0 << 2) | /* skip mb flag: disable */ + 0); + + OUT_BCS_BATCH(batch, (y << 16) | x); + OUT_BCS_BATCH(batch, 0); /* y_mode: dc_pred */ + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); +} + +static void +gen9_mfc_vp8_pak_object_inter(VADriverContextP ctx, + struct intel_encoder_context *encoder_context, + unsigned int *msg, + int x, int y, + struct intel_batchbuffer *batch) +{ + /* Add it later */ +} + +static void +gen9_mfc_vp8_pak_pipeline(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + struct intel_batchbuffer *slice_batch) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer; + VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer; + int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16; + int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16; + unsigned int *msg = NULL; + unsigned char *msg_ptr = NULL; + unsigned int i, is_intra_frame; + + is_intra_frame = !pic_param->pic_flags.bits.frame_type; + + dri_bo_map(vme_context->vme_output.bo , 1); + msg = msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual; + + for( i = 0; i < width_in_mbs * height_in_mbs; i++) { + int h_pos = i % width_in_mbs; + int v_pos = i / width_in_mbs; + + if (is_intra_frame) { + gen9_mfc_vp8_pak_object_intra(ctx, + encoder_context, + msg, + h_pos, v_pos, + slice_batch); + } else { + gen9_mfc_vp8_pak_object_inter(ctx, + encoder_context, + msg, + h_pos, v_pos, + slice_batch); + } + } + + dri_bo_unmap(vme_context->vme_output.bo); +} + +/* + * A batch buffer for vp8 pak object commands + */ +static dri_bo * +gen9_mfc_vp8_software_batchbuffer(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + struct intel_batchbuffer *batch; + dri_bo *batch_bo; + + batch = mfc_context->aux_batchbuffer; + batch_bo = batch->buffer; + + gen9_mfc_vp8_pak_pipeline(ctx, encode_state, encoder_context, batch); + + intel_batchbuffer_align(batch, 8); + + BEGIN_BCS_BATCH(batch, 2); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END); + ADVANCE_BCS_BATCH(batch); + + dri_bo_reference(batch_bo); + intel_batchbuffer_free(batch); + mfc_context->aux_batchbuffer = NULL; + + return batch_bo; +} + +static void +gen9_mfc_vp8_pipeline_programing(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + dri_bo *slice_batch_bo; + + slice_batch_bo = gen9_mfc_vp8_software_batchbuffer(ctx, encode_state, encoder_context); + + // begin programing + intel_batchbuffer_start_atomic_bcs(batch, 0x4000); + intel_batchbuffer_emit_mi_flush(batch); + + // picture level programing + gen9_mfc_vp8_pipeline_picture_programing(ctx, encode_state, encoder_context); + + BEGIN_BCS_BATCH(batch, 4); + OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0)); + OUT_BCS_RELOC(batch, + slice_batch_bo, + I915_GEM_DOMAIN_COMMAND, 0, + 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); + + // end programing + intel_batchbuffer_end_atomic(batch); + + dri_bo_unreference(slice_batch_bo); +} + +static VAStatus +gen9_mfc_vp8_encode_picture(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + gen9_mfc_vp8_init(ctx, encode_state, encoder_context); + intel_mfc_vp8_prepare(ctx, encode_state, encoder_context); + /*Programing bcs pipeline*/ + gen9_mfc_vp8_pipeline_programing(ctx, encode_state, encoder_context); + gen9_mfc_run(ctx, encode_state, encoder_context); + + return VA_STATUS_SUCCESS; +} + static void gen9_mfc_context_destroy(void *context) { @@ -2407,6 +3164,27 @@ gen9_mfc_context_destroy(void *context) mfc_context->aux_batchbuffer = NULL; + dri_bo_unreference(mfc_context->vp8_state.coeff_probs_stream_in_bo); + mfc_context->vp8_state.coeff_probs_stream_in_bo = NULL; + + dri_bo_unreference(mfc_context->vp8_state.final_frame_bo); + mfc_context->vp8_state.final_frame_bo = NULL; + + dri_bo_unreference(mfc_context->vp8_state.frame_header_bo); + mfc_context->vp8_state.frame_header_bo = NULL; + + dri_bo_unreference(mfc_context->vp8_state.intermediate_bo); + mfc_context->vp8_state.intermediate_bo = NULL; + + dri_bo_unreference(mfc_context->vp8_state.mpc_row_store_bo); + mfc_context->vp8_state.mpc_row_store_bo = NULL; + + dri_bo_unreference(mfc_context->vp8_state.stream_out_bo); + mfc_context->vp8_state.stream_out_bo = NULL; + + dri_bo_unreference(mfc_context->vp8_state.token_statistics_bo); + mfc_context->vp8_state.token_statistics_bo = NULL; + free(mfc_context); } @@ -2432,6 +3210,10 @@ static VAStatus gen9_mfc_pipeline(VADriverContextP ctx, vaStatus = gen9_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context); break; + case VAProfileVP8Version0_3: + vaStatus = gen9_mfc_vp8_encode_picture(ctx, encode_state, encoder_context); + break; + default: vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE; break; diff --git a/src/i965_defines.h b/src/i965_defines.h index aa5058f2..86a3725d 100755 --- a/src/i965_defines.h +++ b/src/i965_defines.h @@ -399,6 +399,11 @@ #define MFD_VP8_BSD_OBJECT MFX(2, 4, 1, 8) +#define MFX_VP8_ENCODER_CFG MFX(2, 4, 2, 1) + +#define MFX_VP8_BSP_BUF_BASE_ADDR_STATE MFX(2, 4, 2, 3) + +#define MFX_VP8_PAK_OBJECT MFX(2, 4, 2, 9) #define VEB(pipeline, op, sub_opa, sub_opb) \ (3 << 29 | \ diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 64f4876e..2c2cc467 100644 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -2173,7 +2173,13 @@ i965_MapBuffer(VADriverContextP ctx, //In JPEG End of Image (EOI = 0xDDF9) marker can be used for delimiter. delimiter0 = 0xFF; delimiter1 = 0xD9; - } else { + } else if (coded_buffer_segment->codec == CODEC_VP8) { + delimiter0 = VP8_DELIMITER0; + delimiter1 = VP8_DELIMITER1; + delimiter2 = VP8_DELIMITER2; + delimiter3 = VP8_DELIMITER3; + delimiter4 = VP8_DELIMITER4; + } else { ASSERT_RET(0, VA_STATUS_ERROR_UNSUPPORTED_PROFILE); } @@ -2868,7 +2874,8 @@ i965_EndPicture(VADriverContextP ctx, VAContextID context) return VA_STATUS_ERROR_INVALID_PARAMETER; } if ((obj_context->codec_state.encode.num_slice_params <=0) && - (obj_context->codec_state.encode.num_slice_params_ext <=0)) { + (obj_context->codec_state.encode.num_slice_params_ext <=0) && + (obj_config->profile != VAProfileVP8Version0_3)) { return VA_STATUS_ERROR_INVALID_PARAMETER; } diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index ec34a3f7..f0584f3e 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -468,6 +468,12 @@ va_enc_packed_type_to_idx(int packed_type); #define MPEG2_DELIMITER3 0x00 #define MPEG2_DELIMITER4 0xb0 +#define VP8_DELIMITER0 0x00 +#define VP8_DELIMITER1 0x00 +#define VP8_DELIMITER2 0x00 +#define VP8_DELIMITER3 0x00 +#define VP8_DELIMITER4 0x00 + struct i965_coded_buffer_segment { VACodedBufferSegment base; diff --git a/src/i965_encoder.c b/src/i965_encoder.c index 86f87577..83187b6a 100644 --- a/src/i965_encoder.c +++ b/src/i965_encoder.c @@ -422,6 +422,71 @@ error: } static VAStatus +intel_encoder_check_vp8_parameter(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer; + struct object_surface *obj_surface; + struct object_buffer *obj_buffer; + int i = 0; + int is_key_frame = !pic_param->pic_flags.bits.frame_type; + + obj_surface = SURFACE(pic_param->reconstructed_frame); + assert(obj_surface); /* It is possible the store buffer isn't allocated yet */ + + if (!obj_surface) + goto error; + + encode_state->reconstructed_object = obj_surface; + obj_buffer = BUFFER(pic_param->coded_buf); + assert(obj_buffer && obj_buffer->buffer_store && obj_buffer->buffer_store->bo); + + if (!obj_buffer || !obj_buffer->buffer_store || !obj_buffer->buffer_store->bo) + goto error; + + encode_state->coded_buf_object = obj_buffer; + + if (!is_key_frame) { + assert(pic_param->ref_last_frame != VA_INVALID_SURFACE); + obj_surface = SURFACE(pic_param->ref_last_frame); + assert(obj_surface && obj_surface->bo); + + if (!obj_surface || !obj_surface->bo) + goto error; + + encode_state->reference_objects[i++] = obj_surface; + + assert(pic_param->ref_gf_frame != VA_INVALID_SURFACE); + obj_surface = SURFACE(pic_param->ref_gf_frame); + assert(obj_surface && obj_surface->bo); + + if (!obj_surface || !obj_surface->bo) + goto error; + + encode_state->reference_objects[i++] = obj_surface; + + assert(pic_param->ref_arf_frame != VA_INVALID_SURFACE); + obj_surface = SURFACE(pic_param->ref_arf_frame); + assert(obj_surface && obj_surface->bo); + + if (!obj_surface || !obj_surface->bo) + goto error; + + encode_state->reference_objects[i++] = obj_surface; + } + + for ( ; i < 16; i++) + encode_state->reference_objects[i] = NULL; + + return VA_STATUS_SUCCESS; + +error: + return VA_STATUS_ERROR_INVALID_PARAMETER; +} + +static VAStatus intel_encoder_sanity_check_input(VADriverContextP ctx, VAProfile profile, struct encode_state *encode_state, @@ -458,6 +523,14 @@ intel_encoder_sanity_check_input(VADriverContextP ctx, vaStatus = intel_encoder_check_jpeg_yuv_surface(ctx, profile, encode_state, encoder_context); break; } + + case VAProfileVP8Version0_3: { + vaStatus = intel_encoder_check_vp8_parameter(ctx, encode_state, encoder_context); + if (vaStatus != VA_STATUS_SUCCESS) + goto out; + vaStatus = intel_encoder_check_yuv_surface(ctx, profile, encode_state, encoder_context); + break; + } default: vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE; diff --git a/src/i965_encoder_utils.c b/src/i965_encoder_utils.c index abd25b41..d9b5ce9a 100644 --- a/src/i965_encoder_utils.c +++ b/src/i965_encoder_utils.c @@ -29,7 +29,9 @@ #include <va/va.h> #include <va/va_enc_h264.h> #include <va/va_enc_mpeg2.h> +#include <va/va_enc_vp8.h> +#include "gen6_mfc.h" #include "i965_encoder_utils.h" #define BITSTREAM_ALLOCATE_STEPPING 4096 @@ -496,3 +498,142 @@ build_mpeg2_slice_header(VAEncSequenceParameterBufferMPEG2 *sps_param, return bs.bit_offset; } + +static void binarize_qindex_delta(avc_bitstream *bs, int qindex_delta) +{ + if (qindex_delta == 0) + avc_bitstream_put_ui(bs, 0, 1); + else { + avc_bitstream_put_ui(bs, 1, 1); + avc_bitstream_put_ui(bs, abs(qindex_delta), 4); + + if (qindex_delta < 0) + avc_bitstream_put_ui(bs, 1, 1); + else + avc_bitstream_put_ui(bs, 0, 1); + } +} + +void binarize_vp8_frame_header(VAEncSequenceParameterBufferVP8 *seq_param, + VAEncPictureParameterBufferVP8 *pic_param, + VAQMatrixBufferVP8 *q_matrix, + struct gen6_mfc_context *mfc_context) +{ + avc_bitstream bs; + int i; + int is_intra_frame = !pic_param->pic_flags.bits.frame_type; + int log2num = (int)log2(pic_param->pic_flags.bits.num_token_partitions); + + if (is_intra_frame) { + pic_param->pic_flags.bits.loop_filter_adj_enable = 1; + pic_param->pic_flags.bits.mb_no_coeff_skip = 1; + + pic_param->pic_flags.bits.forced_lf_adjustment = 1; + pic_param->pic_flags.bits.refresh_entropy_probs = 1; + } + + avc_bitstream_start(&bs); + + if (is_intra_frame) { + avc_bitstream_put_ui(&bs, 0, 1); + avc_bitstream_put_ui(&bs, pic_param->pic_flags.bits.clamping_type ,1); + } + + avc_bitstream_put_ui(&bs, pic_param->pic_flags.bits.segmentation_enabled, 1); + + if (pic_param->pic_flags.bits.segmentation_enabled) { + avc_bitstream_put_ui(&bs, pic_param->pic_flags.bits.update_mb_segmentation_map, 1); + avc_bitstream_put_ui(&bs, pic_param->pic_flags.bits.update_segment_feature_data, 1); + if (pic_param->pic_flags.bits.update_segment_feature_data) { + /*add it later*/ + assert(0); + } + if (pic_param->pic_flags.bits.update_mb_segmentation_map) { + for (i = 0; i < 3; i++) { + if (mfc_context->vp8_state.mb_segment_tree_probs[i] == 255) + avc_bitstream_put_ui(&bs, 0, 1); + else { + avc_bitstream_put_ui(&bs, 1, 1); + avc_bitstream_put_ui(&bs, mfc_context->vp8_state.mb_segment_tree_probs[i], 8); + } + } + } + } + + avc_bitstream_put_ui(&bs, pic_param->pic_flags.bits.loop_filter_type, 1); + avc_bitstream_put_ui(&bs, pic_param->loop_filter_level[0], 6); + avc_bitstream_put_ui(&bs, pic_param->sharpness_level, 3); + + mfc_context->vp8_state.frame_header_lf_update_pos = bs.bit_offset; + + if (pic_param->pic_flags.bits.forced_lf_adjustment) { + avc_bitstream_put_ui(&bs, 1, 1);//mode_ref_lf_delta_enable = 1 + avc_bitstream_put_ui(&bs, 1, 1);//mode_ref_lf_delta_update = 1 + + for (i =0; i < 4; i++) { + avc_bitstream_put_ui(&bs, 1, 1); + if (pic_param->ref_lf_delta[i] > 0) { + avc_bitstream_put_ui(&bs, (abs(pic_param->ref_lf_delta[i]) & 0x3F), 6); + avc_bitstream_put_ui(&bs, 0, 1); + } else { + avc_bitstream_put_ui(&bs, (abs(pic_param->ref_lf_delta[i]) & 0x3F), 6); + avc_bitstream_put_ui(&bs, 1, 1); + } + } + + for (i =0; i < 4; i++) { + avc_bitstream_put_ui(&bs, 1, 1); + if (pic_param->mode_lf_delta[i] > 0) { + avc_bitstream_put_ui(&bs, (abs(pic_param->mode_lf_delta[i]) & 0x3F), 6); + avc_bitstream_put_ui(&bs, 0, 1); + } else { + avc_bitstream_put_ui(&bs, (abs(pic_param->mode_lf_delta[i]) & 0x3F), 6); + avc_bitstream_put_ui(&bs, 1, 1); + } + } + + } else { + avc_bitstream_put_ui(&bs, 0, 1);//mode_ref_lf_delta_enable = 0 + } + + avc_bitstream_put_ui(&bs, log2num, 2); + + mfc_context->vp8_state.frame_header_qindex_update_pos = bs.bit_offset; + + avc_bitstream_put_ui(&bs, q_matrix->quantization_index[0], 7); + + for (i = 0; i < 5; i++) + binarize_qindex_delta(&bs, q_matrix->quantization_index_delta[i]); + + if (!is_intra_frame) { + /*put reference frames info*/ + } + + avc_bitstream_put_ui(&bs, pic_param->pic_flags.bits.refresh_entropy_probs, 1); + + if (!is_intra_frame) + avc_bitstream_put_ui(&bs, pic_param->pic_flags.bits.refresh_last, 1); + + mfc_context->vp8_state.frame_header_token_update_pos = bs.bit_offset; + + for (i =0; i < 4 * 8 * 3 * 11; i++) + avc_bitstream_put_ui(&bs, 0, 1); //don't update coeff_probs + + avc_bitstream_put_ui(&bs, pic_param->pic_flags.bits.mb_no_coeff_skip, 1); + if (pic_param->pic_flags.bits.mb_no_coeff_skip) + avc_bitstream_put_ui(&bs, mfc_context->vp8_state.prob_skip_false, 8); + + if (!is_intra_frame) { + avc_bitstream_put_ui(&bs, mfc_context->vp8_state.prob_intra, 8); + avc_bitstream_put_ui(&bs, mfc_context->vp8_state.prob_last, 8); + avc_bitstream_put_ui(&bs, mfc_context->vp8_state.prob_gf, 8); + + mfc_context->vp8_state.frame_header_bin_mv_upate_pos = bs.bit_offset; + /*add mode_probs*/ + } + + avc_bitstream_end(&bs); + + mfc_context->vp8_state.vp8_frame_header = (unsigned char *)bs.buffer; + mfc_context->vp8_state.frame_header_bit_count = bs.bit_offset; +} diff --git a/src/vp8_probs.h b/src/vp8_probs.h new file mode 100644 index 00000000..78f31053 --- /dev/null +++ b/src/vp8_probs.h @@ -0,0 +1,250 @@ +#ifndef VP8_PROBS_H +#define VP8_PROBS_H + +const unsigned char vp8_ymode_prob[4] = +{ + 112, 86, 140, 37 +}; + +const unsigned char vp8_kf_ymode_prob[4] = +{ + 145, 156, 163, 128 +}; + +const unsigned char vp8_uv_mode_prob[3] = +{ + 162, 101, 204 +}; + +static const unsigned char vp8_kf_uv_mode_prob[3] = +{ + 142, 114, 183 +}; + +const unsigned char vp8_base_skip_false_prob[128] = +{ + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, + 251, 248, 244, 240, 236, 232, 229, 225, + 221, 217, 213, 208, 204, 199, 194, 190, + 187, 183, 179, 175, 172, 168, 164, 160, + 157, 153, 149, 145, 142, 138, 134, 130, + 127, 124, 120, 117, 114, 110, 107, 104, + 101, 98, 95, 92, 89, 86, 83, 80, + 77, 74, 71, 68, 65, 62, 59, 56, + 53, 50, 47, 44, 41, 38, 35, 32, + 30, 28, 26, 24, 22, 20, 18, 16, +}; + +const unsigned char vp8_mv_update_probs[2][19] = +{ + { + 237, + 246, + 253, 253, 254, 254, 254, 254, 254, + 254, 254, 254, 254, 254, 250, 250, 252, 254, 254 + }, + { + 231, + 243, + 245, 253, 254, 254, 254, 254, 254, + 254, 254, 254, 254, 254, 251, 251, 254, 254, 254 + } +}; + +const unsigned char vp8_default_mv_context[2][19] = +{ + { + 162, /* is short */ + 128, /* sign */ + 225, 146, 172, 147, 214, 39, 156, /* short tree */ + 128, 129, 132, 75, 145, 178, 206, 239, 254, 254 /* long bits */ + }, + + { + 164, + 128, + 204, 170, 119, 235, 140, 230, 228, + 128, 130, 130, 74, 148, 180, 203, 236, 254, 254 + + } +}; + +const unsigned char vp8_default_coef_probs[4][8][3][11] = +{ + { /* Block Type ( 0 ) */ + { /* Coeff Band ( 0 )*/ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + }, + { /* Coeff Band ( 1 )*/ + { 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 }, + { 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 }, + { 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 } + }, + { /* Coeff Band ( 2 )*/ + { 1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128 }, + { 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 }, + { 78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 } + }, + { /* Coeff Band ( 3 )*/ + { 1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 }, + { 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 }, + { 77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 } + }, + { /* Coeff Band ( 4 )*/ + { 1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 }, + { 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 }, + { 37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 } + }, + { /* Coeff Band ( 5 )*/ + { 1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 }, + { 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 }, + { 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 } + }, + { /* Coeff Band ( 6 )*/ + { 1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 }, + { 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 }, + { 80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 } + }, + { /* Coeff Band ( 7 )*/ + { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + } + }, + { /* Block Type ( 1 ) */ + { /* Coeff Band ( 0 )*/ + { 198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62 }, + { 131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1 }, + { 68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128 } + }, + { /* Coeff Band ( 1 )*/ + { 1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 }, + { 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 }, + { 81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128 } + }, + { /* Coeff Band ( 2 )*/ + { 1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 }, + { 99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 }, + { 23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128 } + }, + { /* Coeff Band ( 3 )*/ + { 1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 }, + { 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 }, + { 44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 } + }, + { /* Coeff Band ( 4 )*/ + { 1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 }, + { 94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 }, + { 22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 } + }, + { /* Coeff Band ( 5 )*/ + { 1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 }, + { 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 }, + { 35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128 } + }, + { /* Coeff Band ( 6 )*/ + { 1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 }, + { 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 }, + { 45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128 } + }, + { /* Coeff Band ( 7 )*/ + { 1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128 }, + { 203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 } + } + }, + { /* Block Type ( 2 ) */ + { /* Coeff Band ( 0 )*/ + { 253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128 }, + { 175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128 }, + { 73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 } + }, + { /* Coeff Band ( 1 )*/ + { 1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128 }, + { 239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128 }, + { 155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128 } + }, + { /* Coeff Band ( 2 )*/ + { 1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128 }, + { 201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128 }, + { 69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128 } + }, + { /* Coeff Band ( 3 )*/ + { 1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 }, + { 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 } + }, + { /* Coeff Band ( 4 )*/ + { 1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128 }, + { 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 } + }, + { /* Coeff Band ( 5 )*/ + { 1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 } + }, + { /* Coeff Band ( 6 )*/ + { 1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128 } + }, + { /* Coeff Band ( 7 )*/ + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } + } + }, + { /* Block Type ( 3 ) */ + { /* Coeff Band ( 0 )*/ + { 202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255 }, + { 126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128 }, + { 61, 46, 138, 219, 151, 178, 240, 170, 255, 216, 128 } + }, + { /* Coeff Band ( 1 )*/ + { 1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 }, + { 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 }, + { 39, 77, 162, 232, 172, 180, 245, 178, 255, 255, 128 } + }, + { /* Coeff Band ( 2 )*/ + { 1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128 }, + { 124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128 }, + { 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 } + }, + { /* Coeff Band ( 3 )*/ + { 1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 }, + { 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 }, + { 28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 } + }, + { /* Coeff Band ( 4 )*/ + { 1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128 }, + { 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 }, + { 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 } + }, + { /* Coeff Band ( 5 )*/ + { 1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 }, + { 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 }, + { 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 } + }, + { /* Coeff Band ( 6 )*/ + { 1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 }, + { 141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128 }, + { 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 } + }, + { /* Coeff Band ( 7 )*/ + { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 } + } + } +}; + +#endif /* _VP8_PROBS_H_ */ |