diff options
Diffstat (limited to 'src/gen9_mfc.c')
-rw-r--r-- | src/gen9_mfc.c | 790 |
1 files changed, 786 insertions, 4 deletions
diff --git a/src/gen9_mfc.c b/src/gen9_mfc.c index 532695a2..98a2c2ed 100644 --- a/src/gen9_mfc.c +++ b/src/gen9_mfc.c @@ -42,6 +42,7 @@ #include "gen6_mfc.h" #include "gen6_vme.h" #include "intel_media.h" +#include "vp8_probs.h" #define SURFACE_STATE_PADDED_SIZE SURFACE_STATE_PADDED_SIZE_GEN8 #define SURFACE_STATE_OFFSET(index) (SURFACE_STATE_PADDED_SIZE * index) @@ -87,7 +88,6 @@ static struct i965_kernel gen9_mfc_kernels[] = { #define INTER_MV8 (4 << 20) #define INTER_MV32 (6 << 20) - static void gen9_mfc_pipe_mode_select(VADriverContextP ctx, int standard_select, @@ -97,7 +97,8 @@ gen9_mfc_pipe_mode_select(VADriverContextP ctx, struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; assert(standard_select == MFX_FORMAT_MPEG2 || - standard_select == MFX_FORMAT_AVC); + standard_select == MFX_FORMAT_AVC || + standard_select == MFX_FORMAT_VP8); BEGIN_BCS_BATCH(batch, 5); @@ -108,6 +109,7 @@ gen9_mfc_pipe_mode_select(VADriverContextP ctx, (0 << 10) | /* Stream-Out Enable */ ((!!mfc_context->post_deblocking_output.bo) << 9) | /* Post Deblocking Output */ ((!!mfc_context->pre_deblocking_output.bo) << 8) | /* Pre Deblocking Output */ + (0 << 6) | /* frame statistics stream-out enable*/ (0 << 5) | /* not in stitch mode */ (1 << 4) | /* encoding mode */ (standard_select << 0)); /* standard select: avc or mpeg2 */ @@ -171,9 +173,18 @@ gen9_mfc_ind_obj_base_addr_state(VADriverContextP ctx, OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); OUT_BCS_BATCH(batch, 0); + /* the DW4-5 is the MFX upper bound */ - OUT_BCS_BATCH(batch, 0); - OUT_BCS_BATCH(batch, 0); + if (encoder_context->codec == CODEC_VP8) { + OUT_BCS_RELOC(batch, + mfc_context->mfc_indirect_pak_bse_object.bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + mfc_context->mfc_indirect_pak_bse_object.end_offset); + OUT_BCS_BATCH(batch, 0); + } else { + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + } vme_size = vme_context->vme_output.size_block * vme_context->vme_output.num_blocks; /* the DW6-10 is for MFX Indirect MV Object Base Address */ @@ -2353,6 +2364,752 @@ gen9_mfc_mpeg2_encode_picture(VADriverContextP ctx, return VA_STATUS_SUCCESS; } +static void vp8_enc_state_init(struct gen6_mfc_context *mfc_context, + VAEncPictureParameterBufferVP8 *pic_param, + VAQMatrixBufferVP8 *q_matrix) +{ + + int is_key_frame = !pic_param->pic_flags.bits.frame_type; + unsigned char *coeff_probs_stream_in_buffer; + + mfc_context->vp8_state.frame_header_lf_update_pos = 0; + mfc_context->vp8_state.frame_header_qindex_update_pos = 0; + mfc_context->vp8_state.frame_header_token_update_pos = 0; + mfc_context->vp8_state.frame_header_bin_mv_upate_pos = 0; + + mfc_context->vp8_state.prob_skip_false = 255; + memset(mfc_context->vp8_state.mb_segment_tree_probs, 0, sizeof(mfc_context->vp8_state.mb_segment_tree_probs)); + memcpy(mfc_context->vp8_state.mv_probs, vp8_default_mv_context, sizeof(mfc_context->vp8_state.mv_probs)); + + if (is_key_frame) { + memcpy(mfc_context->vp8_state.y_mode_probs, vp8_kf_ymode_prob, sizeof(mfc_context->vp8_state.y_mode_probs)); + memcpy(mfc_context->vp8_state.uv_mode_probs, vp8_kf_uv_mode_prob, sizeof(mfc_context->vp8_state.uv_mode_probs)); + + mfc_context->vp8_state.prob_intra = 255; + mfc_context->vp8_state.prob_last = 128; + mfc_context->vp8_state.prob_gf = 128; + } else { + memcpy(mfc_context->vp8_state.y_mode_probs, vp8_ymode_prob, sizeof(mfc_context->vp8_state.y_mode_probs)); + memcpy(mfc_context->vp8_state.uv_mode_probs, vp8_uv_mode_prob, sizeof(mfc_context->vp8_state.uv_mode_probs)); + + mfc_context->vp8_state.prob_intra = 63; + mfc_context->vp8_state.prob_last = 128; + mfc_context->vp8_state.prob_gf = 128; + } + + mfc_context->vp8_state.prob_skip_false = vp8_base_skip_false_prob[q_matrix->quantization_index[0]]; + + dri_bo_map(mfc_context->vp8_state.coeff_probs_stream_in_bo, 1); + coeff_probs_stream_in_buffer = (unsigned char *)mfc_context->vp8_state.coeff_probs_stream_in_bo->virtual; + assert(coeff_probs_stream_in_buffer); + memcpy(coeff_probs_stream_in_buffer, vp8_default_coef_probs, sizeof(vp8_default_coef_probs)); + dri_bo_unmap(mfc_context->vp8_state.coeff_probs_stream_in_bo); +} + +static void vp8_enc_state_update(struct gen6_mfc_context *mfc_context, + VAQMatrixBufferVP8 *q_matrix) +{ + + /*some other probabilities need to be updated*/ +} + +extern void binarize_vp8_frame_header(VAEncSequenceParameterBufferVP8 *seq_param, + VAEncPictureParameterBufferVP8 *pic_param, + VAQMatrixBufferVP8 *q_matrix, + struct gen6_mfc_context *mfc_context); + +static void vp8_enc_frame_header_binarize(struct encode_state *encode_state, + struct gen6_mfc_context *mfc_context) +{ + VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer; + VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer; + VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer; + unsigned char *frame_header_buffer; + + binarize_vp8_frame_header(seq_param, pic_param, q_matrix, mfc_context); + + dri_bo_map(mfc_context->vp8_state.frame_header_bo, 1); + frame_header_buffer = (unsigned char *)mfc_context->vp8_state.frame_header_bo->virtual; + assert(frame_header_buffer); + memcpy(frame_header_buffer, mfc_context->vp8_state.vp8_frame_header, (mfc_context->vp8_state.frame_header_bit_count + 7) / 8); + dri_bo_unmap(mfc_context->vp8_state.frame_header_bo); +} + +#define MAX_VP8_FRAME_HEADER_SIZE 0x2000 +#define VP8_TOKEN_STATISTICS_BUFFER_SIZE 0x2000 + +static void gen9_mfc_vp8_init(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct i965_driver_data *i965 = i965_driver_data(ctx); + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + dri_bo *bo; + int i; + int width_in_mbs = 0; + int height_in_mbs = 0; + int slice_batchbuffer_size; + + VAEncSequenceParameterBufferVP8 *pSequenceParameter = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer; + VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer; + VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer; + + width_in_mbs = ALIGN(pSequenceParameter->frame_height, 16) / 16; + height_in_mbs = ALIGN(pSequenceParameter->frame_height, 16) / 16; + + slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 + + (SLICE_HEADER + SLICE_TAIL); + + /*Encode common setup for MFC*/ + dri_bo_unreference(mfc_context->post_deblocking_output.bo); + mfc_context->post_deblocking_output.bo = NULL; + + dri_bo_unreference(mfc_context->pre_deblocking_output.bo); + mfc_context->pre_deblocking_output.bo = NULL; + + dri_bo_unreference(mfc_context->uncompressed_picture_source.bo); + mfc_context->uncompressed_picture_source.bo = NULL; + + dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo); + mfc_context->mfc_indirect_pak_bse_object.bo = NULL; + + for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){ + if ( mfc_context->direct_mv_buffers[i].bo != NULL) + dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo); + mfc_context->direct_mv_buffers[i].bo = NULL; + } + + for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){ + if (mfc_context->reference_surfaces[i].bo != NULL) + dri_bo_unreference(mfc_context->reference_surfaces[i].bo); + mfc_context->reference_surfaces[i].bo = NULL; + } + + dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "Buffer", + width_in_mbs * 64, + 64); + assert(bo); + mfc_context->intra_row_store_scratch_buffer.bo = bo; + + dri_bo_unreference(mfc_context->macroblock_status_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "Buffer", + width_in_mbs * height_in_mbs * 16, + 64); + assert(bo); + mfc_context->macroblock_status_buffer.bo = bo; + + dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "Buffer", + 4 * width_in_mbs * 64, /* 4 * width_in_mbs * 64 */ + 64); + assert(bo); + mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo; + + dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "Buffer", + 2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */ + 0x1000); + assert(bo); + mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo; + + dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo); + mfc_context->mfc_batchbuffer_surface.bo = NULL; + + dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo); + mfc_context->aux_batchbuffer_surface.bo = NULL; + + if (mfc_context->aux_batchbuffer) + intel_batchbuffer_free(mfc_context->aux_batchbuffer); + + mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size); + mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer; + dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo); + mfc_context->aux_batchbuffer_surface.pitch = 16; + mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16; + mfc_context->aux_batchbuffer_surface.size_block = 16; + + i965_gpe_context_init(ctx, &mfc_context->gpe_context); + + /* alloc vp8 encoding buffers*/ + dri_bo_unreference(mfc_context->vp8_state.frame_header_bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "Buffer", + MAX_VP8_FRAME_HEADER_SIZE, + 0x1000); + assert(bo); + mfc_context->vp8_state.frame_header_bo = bo; + + mfc_context->vp8_state.intermediate_buffer_max_size = width_in_mbs * height_in_mbs * 256 * 9; + for(i = 0; i < 8; i++) { + mfc_context->vp8_state.intermediate_partition_offset[i] = width_in_mbs * height_in_mbs * 256 * (i + 1); + } + dri_bo_unreference(mfc_context->vp8_state.intermediate_bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "Buffer", + mfc_context->vp8_state.intermediate_buffer_max_size, + 0x1000); + assert(bo); + mfc_context->vp8_state.intermediate_bo = bo; + + dri_bo_unreference(mfc_context->vp8_state.stream_out_bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "Buffer", + width_in_mbs * height_in_mbs * 16, + 0x1000); + assert(bo); + mfc_context->vp8_state.stream_out_bo = bo; + + dri_bo_unreference(mfc_context->vp8_state.coeff_probs_stream_in_bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "Buffer", + sizeof(vp8_default_coef_probs), + 0x1000); + assert(bo); + mfc_context->vp8_state.coeff_probs_stream_in_bo = bo; + + dri_bo_unreference(mfc_context->vp8_state.token_statistics_bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "Buffer", + VP8_TOKEN_STATISTICS_BUFFER_SIZE, + 0x1000); + assert(bo); + mfc_context->vp8_state.token_statistics_bo = bo; + + dri_bo_unreference(mfc_context->vp8_state.mpc_row_store_bo); + bo = dri_bo_alloc(i965->intel.bufmgr, + "Buffer", + width_in_mbs * 16 * 64, + 0x1000); + assert(bo); + mfc_context->vp8_state.mpc_row_store_bo = bo; + + vp8_enc_state_init(mfc_context, pic_param, q_matrix); + vp8_enc_frame_header_binarize(encode_state, mfc_context); +} + +static VAStatus +intel_mfc_vp8_prepare(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + struct object_surface *obj_surface; + struct object_buffer *obj_buffer; + struct i965_coded_buffer_segment *coded_buffer_segment; + VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer; + VAStatus vaStatus = VA_STATUS_SUCCESS; + dri_bo *bo; + int i; + + /* reconstructed surface */ + obj_surface = encode_state->reconstructed_object; + i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420); + if (pic_param->loop_filter_level[0] == 0) { + mfc_context->pre_deblocking_output.bo = obj_surface->bo; + dri_bo_reference(mfc_context->pre_deblocking_output.bo); + } else { + mfc_context->post_deblocking_output.bo = obj_surface->bo; + dri_bo_reference(mfc_context->post_deblocking_output.bo); + } + + mfc_context->surface_state.width = obj_surface->orig_width; + mfc_context->surface_state.height = obj_surface->orig_height; + mfc_context->surface_state.w_pitch = obj_surface->width; + mfc_context->surface_state.h_pitch = obj_surface->height; + + /* forward reference */ + obj_surface = encode_state->reference_objects[0]; + + if (obj_surface && obj_surface->bo) { + mfc_context->reference_surfaces[0].bo = obj_surface->bo; + dri_bo_reference(mfc_context->reference_surfaces[0].bo); + } else + mfc_context->reference_surfaces[0].bo = NULL; + + /* backward reference */ + obj_surface = encode_state->reference_objects[1]; + + if (obj_surface && obj_surface->bo) { + mfc_context->reference_surfaces[1].bo = obj_surface->bo; + dri_bo_reference(mfc_context->reference_surfaces[1].bo); + } else { + mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo; + + if (mfc_context->reference_surfaces[1].bo) + dri_bo_reference(mfc_context->reference_surfaces[1].bo); + } + + for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) { + mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo; + + if (mfc_context->reference_surfaces[i].bo) + dri_bo_reference(mfc_context->reference_surfaces[i].bo); + } + + /* input YUV surface */ + obj_surface = encode_state->input_yuv_object; + mfc_context->uncompressed_picture_source.bo = obj_surface->bo; + dri_bo_reference(mfc_context->uncompressed_picture_source.bo); + + /* coded buffer */ + obj_buffer = encode_state->coded_buf_object; + bo = obj_buffer->buffer_store->bo; + mfc_context->mfc_indirect_pak_bse_object.bo = bo; + mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE; + mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000); + dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo); + + dri_bo_unreference(mfc_context->vp8_state.final_frame_bo); + mfc_context->vp8_state.final_frame_bo = mfc_context->mfc_indirect_pak_bse_object.bo; + mfc_context->vp8_state.final_frame_byte_offset = I965_CODEDBUFFER_HEADER_SIZE; + dri_bo_reference(mfc_context->vp8_state.final_frame_bo); + + /* set the internal flag to 0 to indicate the coded size is unknown */ + dri_bo_map(bo, 1); + coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual; + coded_buffer_segment->mapped = 0; + coded_buffer_segment->codec = encoder_context->codec; + dri_bo_unmap(bo); + + return vaStatus; +} + +static void +gen9_mfc_vp8_encoder_cfg(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer; + VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer; + + BEGIN_BCS_BATCH(batch, 30); + OUT_BCS_BATCH(batch, MFX_VP8_ENCODER_CFG | (30 - 2)); /* SKL should be 31-2 ? */ + + OUT_BCS_BATCH(batch, + 0 << 9 | /* compressed bitstream output disable */ + 1 << 7 | /* disable per-segment delta qindex and loop filter in RC */ + 0 << 6 | /* RC initial pass */ + 0 << 4 | /* upate segment feature date flag */ + 1 << 3 | /* bitstream statistics output enable */ + 1 << 2 | /* token statistics output enable */ + 0 << 1 | /* final bitstream output disable */ + 0 << 0); /*DW1*/ + + OUT_BCS_BATCH(batch, 0); /*DW2*/ + + OUT_BCS_BATCH(batch, + 0xfff << 16 | /* max intra mb bit count limit */ + 0xfff << 0 /* max inter mb bit count limit */ + ); /*DW3*/ + + OUT_BCS_BATCH(batch, 0); /*DW4*/ + OUT_BCS_BATCH(batch, 0); /*DW5*/ + OUT_BCS_BATCH(batch, 0); /*DW6*/ + OUT_BCS_BATCH(batch, 0); /*DW7*/ + OUT_BCS_BATCH(batch, 0); /*DW8*/ + OUT_BCS_BATCH(batch, 0); /*DW9*/ + OUT_BCS_BATCH(batch, 0); /*DW10*/ + OUT_BCS_BATCH(batch, 0); /*DW11*/ + OUT_BCS_BATCH(batch, 0); /*DW12*/ + OUT_BCS_BATCH(batch, 0); /*DW13*/ + OUT_BCS_BATCH(batch, 0); /*DW14*/ + OUT_BCS_BATCH(batch, 0); /*DW15*/ + OUT_BCS_BATCH(batch, 0); /*DW16*/ + OUT_BCS_BATCH(batch, 0); /*DW17*/ + OUT_BCS_BATCH(batch, 0); /*DW18*/ + OUT_BCS_BATCH(batch, 0); /*DW19*/ + OUT_BCS_BATCH(batch, 0); /*DW20*/ + OUT_BCS_BATCH(batch, 0); /*DW21*/ + + OUT_BCS_BATCH(batch, + pic_param->pic_flags.bits.show_frame << 23 | + pic_param->pic_flags.bits.version << 20 + ); /*DW22*/ + + OUT_BCS_BATCH(batch, + (seq_param->frame_height_scale << 14 | seq_param->frame_height) << 16 | + (seq_param->frame_width_scale << 14 | seq_param->frame_width) << 0 + ); + + /*DW24*/ + OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_bit_count); /* frame header bit count */ + + /*DW25*/ + OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_qindex_update_pos); /* frame header bin buffer qindex update pointer */ + + /*DW26*/ + OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_lf_update_pos); /* frame header bin buffer loop filter update pointer*/ + + /*DW27*/ + OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_token_update_pos); /* frame header bin buffer token update pointer */ + + /*DW28*/ + OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_bin_mv_upate_pos); /*frame header bin buffer mv update pointer */ + + /*DW29*/ + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); +} + +static void +gen9_mfc_vp8_pic_state(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer; + VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer; + VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer; + int i, j, log2num; + + assert(pic_param->pic_flags.bits.num_token_partitions > 0); + assert(pic_param->pic_flags.bits.num_token_partitions < 9); + log2num = (int)log2(pic_param->pic_flags.bits.num_token_partitions); + + /*update mode and token probs*/ + vp8_enc_state_update(mfc_context, q_matrix); + + BEGIN_BCS_BATCH(batch, 38); + OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2)); + OUT_BCS_BATCH(batch, + (ALIGN(seq_param->frame_height, 16) / 16 - 1) << 16 | + (ALIGN(seq_param->frame_width, 16) / 16 - 1) << 0); + + OUT_BCS_BATCH(batch, + log2num << 24 | + pic_param->sharpness_level << 16 | + pic_param->pic_flags.bits.sign_bias_alternate << 13 | + pic_param->pic_flags.bits.sign_bias_golden << 12 | + pic_param->pic_flags.bits.loop_filter_adj_enable << 11 | + pic_param->pic_flags.bits.mb_no_coeff_skip << 10 | + pic_param->pic_flags.bits.update_mb_segmentation_map << 9 | + pic_param->pic_flags.bits.segmentation_enabled << 8 | + !pic_param->pic_flags.bits.frame_type << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/ + (pic_param->pic_flags.bits.version / 2) << 4 | + (pic_param->pic_flags.bits.version == 3) << 1 | /* full pixel mode for version 3 */ + !!pic_param->pic_flags.bits.version << 0); /* version 0: 6 tap */ + + OUT_BCS_BATCH(batch, + pic_param->loop_filter_level[3] << 24 | + pic_param->loop_filter_level[2] << 16 | + pic_param->loop_filter_level[1] << 8 | + pic_param->loop_filter_level[0] << 0); + + OUT_BCS_BATCH(batch, + q_matrix->quantization_index[3] << 24 | + q_matrix->quantization_index[2] << 16 | + q_matrix->quantization_index[1] << 8 | + q_matrix->quantization_index[0] << 0); + + OUT_BCS_BATCH(batch, + ((unsigned short)(q_matrix->quantization_index_delta[4]) >> 15) << 28 | + abs(q_matrix->quantization_index_delta[4]) << 24 | + ((unsigned short)(q_matrix->quantization_index_delta[3]) >> 15) << 20 | + abs(q_matrix->quantization_index_delta[3]) << 16 | + ((unsigned short)(q_matrix->quantization_index_delta[2]) >> 15) << 12 | + abs(q_matrix->quantization_index_delta[2]) << 8 | + ((unsigned short)(q_matrix->quantization_index_delta[1]) >> 15) << 4 | + abs(q_matrix->quantization_index_delta[1]) << 0); + + OUT_BCS_BATCH(batch, + ((unsigned short)(q_matrix->quantization_index_delta[0]) >> 15) << 4 | + abs(q_matrix->quantization_index_delta[0]) << 0); + + OUT_BCS_BATCH(batch, + pic_param->clamp_qindex_high << 8 | + pic_param->clamp_qindex_low << 0); + + for (i = 8; i < 19; i++) { + OUT_BCS_BATCH(batch, 0xffffffff); + } + + OUT_BCS_BATCH(batch, + mfc_context->vp8_state.mb_segment_tree_probs[2] << 16 | + mfc_context->vp8_state.mb_segment_tree_probs[1] << 8 | + mfc_context->vp8_state.mb_segment_tree_probs[0] << 0); + + OUT_BCS_BATCH(batch, + mfc_context->vp8_state.prob_skip_false << 24 | + mfc_context->vp8_state.prob_intra << 16 | + mfc_context->vp8_state.prob_last << 8 | + mfc_context->vp8_state.prob_gf << 0); + + OUT_BCS_BATCH(batch, + mfc_context->vp8_state.y_mode_probs[3] << 24 | + mfc_context->vp8_state.y_mode_probs[2] << 16 | + mfc_context->vp8_state.y_mode_probs[1] << 8 | + mfc_context->vp8_state.y_mode_probs[0] << 0); + + OUT_BCS_BATCH(batch, + mfc_context->vp8_state.uv_mode_probs[2] << 16 | + mfc_context->vp8_state.uv_mode_probs[1] << 8 | + mfc_context->vp8_state.uv_mode_probs[0] << 0); + + /* MV update value, DW23-DW32 */ + for (i = 0; i < 2; i++) { + for (j = 0; j < 20; j += 4) { + OUT_BCS_BATCH(batch, + (j + 3 == 19 ? 0 : mfc_context->vp8_state.mv_probs[i][j + 3]) << 24 | + mfc_context->vp8_state.mv_probs[i][j + 2] << 16 | + mfc_context->vp8_state.mv_probs[i][j + 1] << 8 | + mfc_context->vp8_state.mv_probs[i][j + 0] << 0); + } + } + + OUT_BCS_BATCH(batch, + (pic_param->ref_lf_delta[3] & 0x7f) << 24 | + (pic_param->ref_lf_delta[2] & 0x7f) << 16 | + (pic_param->ref_lf_delta[1] & 0x7f) << 8 | + (pic_param->ref_lf_delta[0] & 0x7f) << 0); + + OUT_BCS_BATCH(batch, + (pic_param->mode_lf_delta[3] & 0x7f) << 24 | + (pic_param->mode_lf_delta[2] & 0x7f) << 16 | + (pic_param->mode_lf_delta[1] & 0x7f) << 8 | + (pic_param->mode_lf_delta[0] & 0x7f) << 0); + + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); +} + +#define OUT_VP8_BUFFER(bo, offset) \ + if (bo) \ + OUT_BCS_RELOC(batch, \ + bo, \ + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, \ + offset); \ + else \ + OUT_BCS_BATCH(batch, 0); \ + OUT_BCS_BATCH(batch, 0); \ + OUT_BCS_BATCH(batch, 0); + +static void +gen9_mfc_vp8_bsp_buf_base_addr_state(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + + BEGIN_BCS_BATCH(batch, 32); + OUT_BCS_BATCH(batch, MFX_VP8_BSP_BUF_BASE_ADDR_STATE | (32 - 2)); + + OUT_VP8_BUFFER(mfc_context->vp8_state.frame_header_bo, 0); + + OUT_VP8_BUFFER(mfc_context->vp8_state.intermediate_bo, 0); + OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[0]); + OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[1]); + OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[2]); + OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[3]); + OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[4]); + OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[5]); + OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[6]); + OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[7]); + OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_buffer_max_size); + + OUT_VP8_BUFFER(mfc_context->vp8_state.final_frame_bo, I965_CODEDBUFFER_HEADER_SIZE); + OUT_BCS_BATCH(batch, 0); + + OUT_VP8_BUFFER(mfc_context->vp8_state.stream_out_bo, 0); + OUT_VP8_BUFFER(mfc_context->vp8_state.coeff_probs_stream_in_bo, 0); + OUT_VP8_BUFFER(mfc_context->vp8_state.token_statistics_bo, 0); + OUT_VP8_BUFFER(mfc_context->vp8_state.mpc_row_store_bo, 0); + + ADVANCE_BCS_BATCH(batch); +} + +static void +gen9_mfc_vp8_pipeline_picture_programing(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + + mfc_context->pipe_mode_select(ctx, MFX_FORMAT_VP8, encoder_context); + mfc_context->set_surface_state(ctx, encoder_context); + mfc_context->ind_obj_base_addr_state(ctx, encoder_context); + gen9_mfc_pipe_buf_addr_state(ctx, encoder_context); + gen9_mfc_bsp_buf_base_addr_state(ctx, encoder_context); + gen9_mfc_vp8_bsp_buf_base_addr_state(ctx, encode_state, encoder_context); + gen9_mfc_vp8_pic_state(ctx, encode_state,encoder_context); + gen9_mfc_vp8_encoder_cfg(ctx, encode_state, encoder_context); +} + +static void +gen9_mfc_vp8_pak_object_intra(VADriverContextP ctx, + struct intel_encoder_context *encoder_context, + unsigned int *msg, + int x, int y, + struct intel_batchbuffer *batch) +{ + if (batch == NULL) + batch = encoder_context->base.batch; + + BEGIN_BCS_BATCH(batch, 7); + + OUT_BCS_BATCH(batch, MFX_VP8_PAK_OBJECT | (7 - 2)); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, + (0 << 20) | /* mv format: intra mb */ + (0 << 18) | /* Segment ID */ + (0 << 17) | /* disable coeff clamp */ + (1 << 13) | /* intra mb flag */ + (0 << 11) | /* refer picture select: last frame */ + (0 << 8) | /* mb type: 16x16 intra mb */ + (0 << 4) | /* mb uv mode: dc_pred */ + (0 << 2) | /* skip mb flag: disable */ + 0); + + OUT_BCS_BATCH(batch, (y << 16) | x); + OUT_BCS_BATCH(batch, 0); /* y_mode: dc_pred */ + OUT_BCS_BATCH(batch, 0); + + ADVANCE_BCS_BATCH(batch); +} + +static void +gen9_mfc_vp8_pak_object_inter(VADriverContextP ctx, + struct intel_encoder_context *encoder_context, + unsigned int *msg, + int x, int y, + struct intel_batchbuffer *batch) +{ + /* Add it later */ +} + +static void +gen9_mfc_vp8_pak_pipeline(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context, + struct intel_batchbuffer *slice_batch) +{ + struct gen6_vme_context *vme_context = encoder_context->vme_context; + VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer; + VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer; + int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16; + int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16; + unsigned int *msg = NULL; + unsigned char *msg_ptr = NULL; + unsigned int i, is_intra_frame; + + is_intra_frame = !pic_param->pic_flags.bits.frame_type; + + dri_bo_map(vme_context->vme_output.bo , 1); + msg = msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual; + + for( i = 0; i < width_in_mbs * height_in_mbs; i++) { + int h_pos = i % width_in_mbs; + int v_pos = i / width_in_mbs; + + if (is_intra_frame) { + gen9_mfc_vp8_pak_object_intra(ctx, + encoder_context, + msg, + h_pos, v_pos, + slice_batch); + } else { + gen9_mfc_vp8_pak_object_inter(ctx, + encoder_context, + msg, + h_pos, v_pos, + slice_batch); + } + } + + dri_bo_unmap(vme_context->vme_output.bo); +} + +/* + * A batch buffer for vp8 pak object commands + */ +static dri_bo * +gen9_mfc_vp8_software_batchbuffer(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; + struct intel_batchbuffer *batch; + dri_bo *batch_bo; + + batch = mfc_context->aux_batchbuffer; + batch_bo = batch->buffer; + + gen9_mfc_vp8_pak_pipeline(ctx, encode_state, encoder_context, batch); + + intel_batchbuffer_align(batch, 8); + + BEGIN_BCS_BATCH(batch, 2); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END); + ADVANCE_BCS_BATCH(batch); + + dri_bo_reference(batch_bo); + intel_batchbuffer_free(batch); + mfc_context->aux_batchbuffer = NULL; + + return batch_bo; +} + +static void +gen9_mfc_vp8_pipeline_programing(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + struct intel_batchbuffer *batch = encoder_context->base.batch; + dri_bo *slice_batch_bo; + + slice_batch_bo = gen9_mfc_vp8_software_batchbuffer(ctx, encode_state, encoder_context); + + // begin programing + intel_batchbuffer_start_atomic_bcs(batch, 0x4000); + intel_batchbuffer_emit_mi_flush(batch); + + // picture level programing + gen9_mfc_vp8_pipeline_picture_programing(ctx, encode_state, encoder_context); + + BEGIN_BCS_BATCH(batch, 4); + OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0)); + OUT_BCS_RELOC(batch, + slice_batch_bo, + I915_GEM_DOMAIN_COMMAND, 0, + 0); + OUT_BCS_BATCH(batch, 0); + OUT_BCS_BATCH(batch, 0); + ADVANCE_BCS_BATCH(batch); + + // end programing + intel_batchbuffer_end_atomic(batch); + + dri_bo_unreference(slice_batch_bo); +} + +static VAStatus +gen9_mfc_vp8_encode_picture(VADriverContextP ctx, + struct encode_state *encode_state, + struct intel_encoder_context *encoder_context) +{ + gen9_mfc_vp8_init(ctx, encode_state, encoder_context); + intel_mfc_vp8_prepare(ctx, encode_state, encoder_context); + /*Programing bcs pipeline*/ + gen9_mfc_vp8_pipeline_programing(ctx, encode_state, encoder_context); + gen9_mfc_run(ctx, encode_state, encoder_context); + + return VA_STATUS_SUCCESS; +} + static void gen9_mfc_context_destroy(void *context) { @@ -2407,6 +3164,27 @@ gen9_mfc_context_destroy(void *context) mfc_context->aux_batchbuffer = NULL; + dri_bo_unreference(mfc_context->vp8_state.coeff_probs_stream_in_bo); + mfc_context->vp8_state.coeff_probs_stream_in_bo = NULL; + + dri_bo_unreference(mfc_context->vp8_state.final_frame_bo); + mfc_context->vp8_state.final_frame_bo = NULL; + + dri_bo_unreference(mfc_context->vp8_state.frame_header_bo); + mfc_context->vp8_state.frame_header_bo = NULL; + + dri_bo_unreference(mfc_context->vp8_state.intermediate_bo); + mfc_context->vp8_state.intermediate_bo = NULL; + + dri_bo_unreference(mfc_context->vp8_state.mpc_row_store_bo); + mfc_context->vp8_state.mpc_row_store_bo = NULL; + + dri_bo_unreference(mfc_context->vp8_state.stream_out_bo); + mfc_context->vp8_state.stream_out_bo = NULL; + + dri_bo_unreference(mfc_context->vp8_state.token_statistics_bo); + mfc_context->vp8_state.token_statistics_bo = NULL; + free(mfc_context); } @@ -2432,6 +3210,10 @@ static VAStatus gen9_mfc_pipeline(VADriverContextP ctx, vaStatus = gen9_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context); break; + case VAProfileVP8Version0_3: + vaStatus = gen9_mfc_vp8_encode_picture(ctx, encode_state, encoder_context); + break; + default: vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE; break; |