diff options
author | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2017-07-12 14:07:37 +0200 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2017-07-17 10:29:26 +0000 |
commit | ec02ee4181c49b61fce1c8fb99292dbb8139cc90 (patch) | |
tree | 25cde714b2b71eb639d1cd53f5a22e9ba76e14ef /chromium/third_party/libvpx/source/libvpx/vp9 | |
parent | bb09965444b5bb20b096a291445170876225268d (diff) | |
download | qtwebengine-chromium-ec02ee4181c49b61fce1c8fb99292dbb8139cc90.tar.gz |
BASELINE: Update Chromium to 59.0.3071.134
Change-Id: Id02ef6fb2204c5fd21668a1c3e6911c83b17585a
Reviewed-by: Alexandru Croitor <alexandru.croitor@qt.io>
Diffstat (limited to 'chromium/third_party/libvpx/source/libvpx/vp9')
36 files changed, 1200 insertions, 574 deletions
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_debugmodes.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_debugmodes.c index 7d128c9f7f8..28cd4a19249 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_debugmodes.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_debugmodes.c @@ -34,7 +34,7 @@ static void print_mi_data(VP9_COMMON *cm, FILE *file, const char *descriptor, for (mi_row = 0; mi_row < rows; mi_row++) { fprintf(file, "%c ", prefix); for (mi_col = 0; mi_col < cols; mi_col++) { - fprintf(file, "%2d ", *((int *)((char *)(mi[0]) + member_offset))); + fprintf(file, "%2d ", *((char *)((char *)(mi[0]) + member_offset))); mi++; } fprintf(file, "\n"); diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_idct.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_idct.c index 23cbe9bf9ae..55957414cde 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_idct.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_idct.c @@ -363,6 +363,8 @@ void vp9_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest, vpx_highbd_idct32x32_1_add(input, dest, stride, bd); } else if (eob <= 34) { vpx_highbd_idct32x32_34_add(input, dest, stride, bd); + } else if (eob <= 135) { + vpx_highbd_idct32x32_135_add(input, dest, stride, bd); } else { vpx_highbd_idct32x32_1024_add(input, dest, stride, bd); } diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl index cd5da4f6d38..10c779c01d3 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl +++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl @@ -197,7 +197,7 @@ $vp9_full_search_sad_sse4_1=vp9_full_search_sadx8; add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv"; specialize qw/vp9_diamond_search_sad avx/; -add_proto qw/void vp9_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count"; +add_proto qw/void vp9_temporal_filter_apply/, "const uint8_t *frame1, unsigned int stride, const uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count"; specialize qw/vp9_temporal_filter_apply sse2 msa/; if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { @@ -217,7 +217,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vp9_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride"; - add_proto qw/void vp9_highbd_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count"; + add_proto qw/void vp9_highbd_temporal_filter_apply/, "const uint8_t *frame1, unsigned int stride, const uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count"; } # End vp9_high encoder functions diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_temporal_filter_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_temporal_filter_msa.c index 23f7ebace4c..1ab5f36cc59 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_temporal_filter_msa.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_temporal_filter_msa.c @@ -11,10 +11,11 @@ #include "./vp9_rtcd.h" #include "vpx_dsp/mips/macros_msa.h" -static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr, uint32_t stride, - uint8_t *frm2_ptr, int32_t filt_sth, - int32_t filt_wgt, uint32_t *acc, - uint16_t *cnt) { +static void temporal_filter_apply_8size_msa(const uint8_t *frm1_ptr, + uint32_t stride, + const uint8_t *frm2_ptr, + int32_t filt_sth, int32_t filt_wgt, + uint32_t *acc, uint16_t *cnt) { uint32_t row; uint64_t f0, f1, f2, f3; v16i8 frm2, frm1 = { 0 }; @@ -138,8 +139,9 @@ static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr, uint32_t stride, } } -static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr, uint32_t stride, - uint8_t *frm2_ptr, +static void temporal_filter_apply_16size_msa(const uint8_t *frm1_ptr, + uint32_t stride, + const uint8_t *frm2_ptr, int32_t filt_sth, int32_t filt_wgt, uint32_t *acc, uint16_t *cnt) { uint32_t row; @@ -265,8 +267,8 @@ static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr, uint32_t stride, } } -void vp9_temporal_filter_apply_msa(uint8_t *frame1_ptr, uint32_t stride, - uint8_t *frame2_ptr, uint32_t blk_w, +void vp9_temporal_filter_apply_msa(const uint8_t *frame1_ptr, uint32_t stride, + const uint8_t *frame2_ptr, uint32_t blk_w, uint32_t blk_h, int32_t strength, int32_t filt_wgt, uint32_t *accu, uint16_t *cnt) { diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c index b6b85926898..b4a0bbe58bd 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c @@ -240,24 +240,68 @@ void vp9_cyclic_refresh_update_sb_postencode(VP9_COMP *const cpi, } } -// Update the actual number of blocks that were applied the segment delta q. +// From the just encoded frame: update the actual number of blocks that were +// applied the segment delta q, and the amount of low motion in the frame. +// Also check conditions for forcing golden update, or preventing golden +// update if the period is up. void vp9_cyclic_refresh_postencode(VP9_COMP *const cpi) { VP9_COMMON *const cm = &cpi->common; + MODE_INFO **mi = cm->mi_grid_visible; CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; + RATE_CONTROL *const rc = &cpi->rc; unsigned char *const seg_map = cpi->segmentation_map; + double fraction_low = 0.0; + int force_gf_refresh = 0; + int low_content_frame = 0; int mi_row, mi_col; cr->actual_num_seg1_blocks = 0; cr->actual_num_seg2_blocks = 0; - for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) + for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) { for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) { - if (cyclic_refresh_segment_id(seg_map[mi_row * cm->mi_cols + mi_col]) == - CR_SEGMENT_ID_BOOST1) + MV mv = mi[0]->mv[0].as_mv; + int map_index = mi_row * cm->mi_cols + mi_col; + if (cyclic_refresh_segment_id(seg_map[map_index]) == CR_SEGMENT_ID_BOOST1) cr->actual_num_seg1_blocks++; - else if (cyclic_refresh_segment_id( - seg_map[mi_row * cm->mi_cols + mi_col]) == + else if (cyclic_refresh_segment_id(seg_map[map_index]) == CR_SEGMENT_ID_BOOST2) cr->actual_num_seg2_blocks++; + // Accumulate low_content_frame. + if (is_inter_block(mi[0]) && abs(mv.row) < 16 && abs(mv.col) < 16) + low_content_frame++; + mi++; + } + mi += 8; + } + // Check for golden frame update: only for non-SVC and non-golden boost. + if (!cpi->use_svc && cpi->ext_refresh_frame_flags_pending == 0 && + !cpi->oxcf.gf_cbr_boost_pct) { + // Force this frame as a golden update frame if this frame changes the + // resolution (resize_pending != 0). + // TODO(marpan): check on forcing golden update if the background has very + // high motion in current frame. + if (cpi->resize_pending != 0) { + vp9_cyclic_refresh_set_golden_update(cpi); + rc->frames_till_gf_update_due = rc->baseline_gf_interval; + if (rc->frames_till_gf_update_due > rc->frames_to_key) + rc->frames_till_gf_update_due = rc->frames_to_key; + cpi->refresh_golden_frame = 1; + force_gf_refresh = 1; + } + // Update average of low content/motion in the frame. + fraction_low = (double)low_content_frame / (cm->mi_rows * cm->mi_cols); + cr->low_content_avg = (fraction_low + 3 * cr->low_content_avg) / 4; + if (!force_gf_refresh && cpi->refresh_golden_frame == 1 && + rc->frames_since_key > rc->frames_since_golden + 1) { + // Don't update golden reference if the amount of low_content for the + // current encoded frame is small, or if the recursive average of the + // low_content over the update interval window falls below threshold. + if (fraction_low < 0.65 || cr->low_content_avg < 0.6) { + cpi->refresh_golden_frame = 0; + } + // Reset for next internal. + cr->low_content_avg = fraction_low; } + } } // Set golden frame update interval, for non-svc 1 pass CBR mode. @@ -274,72 +318,6 @@ void vp9_cyclic_refresh_set_golden_update(VP9_COMP *const cpi) { if (cpi->oxcf.rc_mode == VPX_VBR) rc->baseline_gf_interval = 20; } -// Update some encoding stats (from the just encoded frame). If this frame's -// background has high motion, refresh the golden frame. Otherwise, if the -// golden reference is to be updated check if we should NOT update the golden -// ref. -void vp9_cyclic_refresh_check_golden_update(VP9_COMP *const cpi) { - VP9_COMMON *const cm = &cpi->common; - CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; - int mi_row, mi_col; - double fraction_low = 0.0; - int low_content_frame = 0; - MODE_INFO **mi = cm->mi_grid_visible; - RATE_CONTROL *const rc = &cpi->rc; - const int rows = cm->mi_rows, cols = cm->mi_cols; - int cnt1 = 0, cnt2 = 0; - int force_gf_refresh = 0; - int flag_force_gf_high_motion = 0; - for (mi_row = 0; mi_row < rows; mi_row++) { - for (mi_col = 0; mi_col < cols; mi_col++) { - if (flag_force_gf_high_motion == 1) { - int16_t abs_mvr = mi[0]->mv[0].as_mv.row >= 0 - ? mi[0]->mv[0].as_mv.row - : -1 * mi[0]->mv[0].as_mv.row; - int16_t abs_mvc = mi[0]->mv[0].as_mv.col >= 0 - ? mi[0]->mv[0].as_mv.col - : -1 * mi[0]->mv[0].as_mv.col; - // Calculate the motion of the background. - if (abs_mvr <= 16 && abs_mvc <= 16) { - cnt1++; - if (abs_mvr == 0 && abs_mvc == 0) cnt2++; - } - } - mi++; - // Accumulate low_content_frame. - if (cr->map[mi_row * cols + mi_col] < 1) low_content_frame++; - } - mi += 8; - } - // For video conference clips, if the background has high motion in current - // frame because of the camera movement, set this frame as the golden frame. - // Use 70% and 5% as the thresholds for golden frame refreshing. - // Also, force this frame as a golden update frame if this frame will change - // the resolution (resize_pending != 0). - if (cpi->resize_pending != 0 || - (cnt1 * 100 > (70 * rows * cols) && cnt2 * 20 < cnt1)) { - vp9_cyclic_refresh_set_golden_update(cpi); - rc->frames_till_gf_update_due = rc->baseline_gf_interval; - - if (rc->frames_till_gf_update_due > rc->frames_to_key) - rc->frames_till_gf_update_due = rc->frames_to_key; - cpi->refresh_golden_frame = 1; - force_gf_refresh = 1; - } - fraction_low = (double)low_content_frame / (rows * cols); - // Update average. - cr->low_content_avg = (fraction_low + 3 * cr->low_content_avg) / 4; - if (!force_gf_refresh && cpi->refresh_golden_frame == 1) { - // Don't update golden reference if the amount of low_content for the - // current encoded frame is small, or if the recursive average of the - // low_content over the update interval window falls below threshold. - if (fraction_low < 0.8 || cr->low_content_avg < 0.7) - cpi->refresh_golden_frame = 0; - // Reset for next internal. - cr->low_content_avg = fraction_low; - } -} - // Update the segmentation map, and related quantities: cyclic refresh map, // refresh sb_index, and target number of blocks to be refreshed. // The map is set to either 0/CR_SEGMENT_ID_BASE (no refresh) or to @@ -400,13 +378,13 @@ static void cyclic_refresh_update_map(VP9_COMP *const cpi) { VPXMIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[BLOCK_64X64]); if (cpi->noise_estimate.enabled && cpi->noise_estimate.level >= kMedium && (xmis <= 2 || ymis <= 2)) - consec_zero_mv_thresh_block = 10; + consec_zero_mv_thresh_block = 4; for (y = 0; y < ymis; y++) { for (x = 0; x < xmis; x++) { const int bl_index2 = bl_index + y * cm->mi_cols + x; // If the block is as a candidate for clean up then mark it // for possible boost/refresh (segment 1). The segment id may get - // reset to 0 later if block gets coded anything other than ZEROMV. + // reset to 0 later depending on the coding mode. if (cr->map[bl_index2] == 0) { count_tot++; if (cr->last_coded_q_map[bl_index2] > qindex_thresh || diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h index c14a647b87c..9de5074d9ec 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h @@ -105,15 +105,15 @@ void vp9_cyclic_refresh_update_sb_postencode(struct VP9_COMP *const cpi, // refresh sb_index, and target number of blocks to be refreshed. void vp9_cyclic_refresh_update__map(struct VP9_COMP *const cpi); -// Update the actual number of blocks that were applied the segment delta q. +// From the just encoded frame: update the actual number of blocks that were +// applied the segment delta q, and the amount of low motion in the frame. +// Also check conditions for forcing golden update, or preventing golden +// update if the period is up. void vp9_cyclic_refresh_postencode(struct VP9_COMP *const cpi); // Set golden frame update interval, for non-svc 1 pass CBR mode. void vp9_cyclic_refresh_set_golden_update(struct VP9_COMP *const cpi); -// Check if we should not update golden reference, based on past refresh stats. -void vp9_cyclic_refresh_check_golden_update(struct VP9_COMP *const cpi); - // Set/update global/frame level refresh parameters. void vp9_cyclic_refresh_update_parameters(struct VP9_COMP *const cpi); diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_block.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_block.h index c0c69f6b5cf..42dc6830d6c 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_block.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_block.h @@ -63,6 +63,11 @@ typedef struct { typedef struct macroblock MACROBLOCK; struct macroblock { +// cf. https://bugs.chromium.org/p/webm/issues/detail?id=1054 +#if defined(_MSC_VER) && _MSC_VER < 1900 + int64_t bsse[MAX_MB_PLANE << 2]; +#endif + struct macroblock_plane plane[MAX_MB_PLANE]; MACROBLOCKD e_mbd; @@ -123,6 +128,9 @@ struct macroblock { // Set during mode selection. Read during block encoding. uint8_t zcoeff_blk[TX_SIZES][256]; + // Accumulate the tx block eobs in a partition block. + int32_t sum_y_eobs[TX_SIZES]; + int skip; int encode_breakout; @@ -149,7 +157,10 @@ struct macroblock { #define SKIP_TXFM_AC_DC 1 #define SKIP_TXFM_AC_ONLY 2 +// cf. https://bugs.chromium.org/p/webm/issues/detail?id=1054 +#if !defined(_MSC_VER) || _MSC_VER >= 1900 int64_t bsse[MAX_MB_PLANE << 2]; +#endif // Used to store sub partition's choices. MV pred_mv[MAX_REF_FRAMES]; @@ -162,6 +173,12 @@ struct macroblock { uint8_t skip_low_source_sad; + uint8_t last_sb_high_content; + + // For each superblock: saves the content value (e.g., low/high sad/sumdiff) + // based on source sad, prior to encoding the frame. + uint8_t content_state_sb; + // Used to save the status of whether a block has a low variance in // choose_partitioning. 0 for 64x64, 1~2 for 64x32, 3~4 for 32x64, 5~8 for // 32x32, 9~24 for 16x16. diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_context_tree.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_context_tree.h index 86ba03d69ff..9e4cbb360c7 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_context_tree.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_context_tree.h @@ -71,6 +71,9 @@ typedef struct { // search loop MV pred_mv[MAX_REF_FRAMES]; INTERP_FILTER pred_interp_filter; + + // Used for the machine learning-based early termination + int32_t sum_y_eobs; } PICK_MODE_CONTEXT; typedef struct PC_TREE { diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_denoiser.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_denoiser.c index c16429caf8b..b92557a9c40 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_denoiser.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_denoiser.c @@ -21,6 +21,8 @@ #include "vp9/encoder/vp9_denoiser.h" #include "vp9/encoder/vp9_encoder.h" +// OUTPUT_YUV_DENOISED + #ifdef OUTPUT_YUV_DENOISED static void make_grayscale(YV12_BUFFER_CONFIG *yuv); #endif @@ -185,7 +187,7 @@ static uint8_t *block_start(uint8_t *framebuf, int stride, int mi_row, } static VP9_DENOISER_DECISION perform_motion_compensation( - VP9_DENOISER *denoiser, MACROBLOCK *mb, BLOCK_SIZE bs, + VP9_COMMON *const cm, VP9_DENOISER *denoiser, MACROBLOCK *mb, BLOCK_SIZE bs, int increase_denoising, int mi_row, int mi_col, PICK_MODE_CONTEXT *ctx, int motion_magnitude, int is_skin, int *zeromv_filter, int consec_zeromv, int num_spatial_layers, int width) { @@ -197,6 +199,7 @@ static VP9_DENOISER_DECISION perform_motion_compensation( int i; struct buf_2d saved_dst[MAX_MB_PLANE]; struct buf_2d saved_pre[MAX_MB_PLANE]; + RefBuffer *saved_block_refs[2]; frame = ctx->best_reference_frame; saved_mi = *mi; @@ -258,6 +261,7 @@ static VP9_DENOISER_DECISION perform_motion_compensation( saved_pre[i] = filter_mbd->plane[i].pre[0]; saved_dst[i] = filter_mbd->plane[i].dst; } + saved_block_refs[0] = filter_mbd->block_refs[0]; // Set the pointers in the MACROBLOCKD to point to the buffers in the denoiser // struct. @@ -287,10 +291,12 @@ static VP9_DENOISER_DECISION perform_motion_compensation( denoiser->mc_running_avg_y.uv_stride, mi_row, mi_col); filter_mbd->plane[2].dst.stride = denoiser->mc_running_avg_y.uv_stride; + set_ref_ptrs(cm, filter_mbd, frame, NONE); vp9_build_inter_predictors_sby(filter_mbd, mi_row, mi_col, bs); // Restore everything to its original state *mi = saved_mi; + filter_mbd->block_refs[0] = saved_block_refs[0]; for (i = 0; i < MAX_MB_PLANE; ++i) { filter_mbd->plane[i].pre[0] = saved_pre[i]; filter_mbd->plane[i].dst = saved_dst[i]; @@ -314,6 +320,7 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col, block_start(mc_avg.y_buffer, mc_avg.y_stride, mi_row, mi_col); struct buf_2d src = mb->plane[0].src; int is_skin = 0; + int increase_denoising = 0; int consec_zeromv = 0; mv_col = ctx->best_sse_mv.as_mv.col; mv_row = ctx->best_sse_mv.as_mv.row; @@ -356,22 +363,21 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col, mb->plane[0].src.stride, mb->plane[1].src.stride, bs, consec_zeromv, motion_level); } - if (!is_skin && denoiser->denoising_level == kDenHigh) { - denoiser->increase_denoising = 1; - } else { - denoiser->increase_denoising = 0; - } + if (!is_skin && denoiser->denoising_level == kDenHigh) increase_denoising = 1; - if (denoiser->denoising_level >= kDenLow) + // TODO(marpan): There is an issue with denoising for speed 5, + // due to the partitioning scheme based on pickmode. + // Remove this speed constraint when issue is resolved. + if (denoiser->denoising_level >= kDenLow && cpi->oxcf.speed > 5) decision = perform_motion_compensation( - denoiser, mb, bs, denoiser->increase_denoising, mi_row, mi_col, ctx, + &cpi->common, denoiser, mb, bs, increase_denoising, mi_row, mi_col, ctx, motion_magnitude, is_skin, &zeromv_filter, consec_zeromv, cpi->svc.number_spatial_layers, cpi->Source->y_width); if (decision == FILTER_BLOCK) { - decision = vp9_denoiser_filter( - src.buf, src.stride, mc_avg_start, mc_avg.y_stride, avg_start, - avg.y_stride, denoiser->increase_denoising, bs, motion_magnitude); + decision = vp9_denoiser_filter(src.buf, src.stride, mc_avg_start, + mc_avg.y_stride, avg_start, avg.y_stride, + increase_denoising, bs, motion_magnitude); } if (decision == FILTER_BLOCK) { @@ -413,15 +419,15 @@ static void swap_frame_buffer(YV12_BUFFER_CONFIG *const dest, src->y_buffer = tmp_buf; } -void vp9_denoiser_update_frame_info(VP9_DENOISER *denoiser, - YV12_BUFFER_CONFIG src, - FRAME_TYPE frame_type, - int refresh_alt_ref_frame, - int refresh_golden_frame, - int refresh_last_frame, int resized) { +void vp9_denoiser_update_frame_info( + VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, FRAME_TYPE frame_type, + int refresh_alt_ref_frame, int refresh_golden_frame, int refresh_last_frame, + int resized, int svc_base_is_key) { // Copy source into denoised reference buffers on KEY_FRAME or - // if the just encoded frame was resized. - if (frame_type == KEY_FRAME || resized != 0 || denoiser->reset) { + // if the just encoded frame was resized. For SVC, copy source if the base + // spatial layer was key frame. + if (frame_type == KEY_FRAME || resized != 0 || denoiser->reset || + svc_base_is_key) { int i; // Start at 1 so as not to overwrite the INTRA_FRAME for (i = 1; i < MAX_REF_FRAMES; ++i) @@ -533,7 +539,6 @@ int vp9_denoiser_alloc(VP9_DENOISER *denoiser, int width, int height, int ssx, #ifdef OUTPUT_YUV_DENOISED make_grayscale(&denoiser->running_avg_y[i]); #endif - denoiser->increase_denoising = 0; denoiser->frame_buffer_initialized = 1; denoiser->denoising_level = kDenLow; denoiser->prev_denoising_level = kDenLow; diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_denoiser.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_denoiser.h index 0ec8622416f..9bded21769d 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_denoiser.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_denoiser.h @@ -38,7 +38,6 @@ typedef struct vp9_denoiser { YV12_BUFFER_CONFIG running_avg_y[MAX_REF_FRAMES]; YV12_BUFFER_CONFIG mc_running_avg_y; YV12_BUFFER_CONFIG last_source; - int increase_denoising; int frame_buffer_initialized; int reset; VP9_DENOISER_LEVEL denoising_level; @@ -59,12 +58,10 @@ typedef struct { struct VP9_COMP; -void vp9_denoiser_update_frame_info(VP9_DENOISER *denoiser, - YV12_BUFFER_CONFIG src, - FRAME_TYPE frame_type, - int refresh_alt_ref_frame, - int refresh_golden_frame, - int refresh_last_frame, int resized); +void vp9_denoiser_update_frame_info( + VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, FRAME_TYPE frame_type, + int refresh_alt_ref_frame, int refresh_golden_frame, int refresh_last_frame, + int resized, int svc_base_is_key); void vp9_denoiser_denoise(struct VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col, BLOCK_SIZE bs, PICK_MODE_CONTEXT *ctx, diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c index e09f94b8954..481f5a0fdac 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c @@ -52,6 +52,33 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t, int output_enabled, int mi_row, int mi_col, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx); +// Machine learning-based early termination parameters. +static const double train_mean[24] = { + 303501.697372, 3042630.372158, 24.694696, 1.392182, + 689.413511, 162.027012, 1.478213, 0.0, + 135382.260230, 912738.513263, 28.845217, 1.515230, + 544.158492, 131.807995, 1.436863, 0.0, + 43682.377587, 208131.711766, 28.084737, 1.356677, + 138.254122, 119.522553, 1.252322, 0.0 +}; + +static const double train_stdm[24] = { + 673689.212982, 5996652.516628, 0.024449, 1.989792, + 985.880847, 0.014638, 2.001898, 0.0, + 208798.775332, 1812548.443284, 0.018693, 1.838009, + 396.986910, 0.015657, 1.332541, 0.0, + 55888.847031, 448587.962714, 0.017900, 1.904776, + 98.652832, 0.016598, 1.320992, 0.0 +}; + +// Error tolerance: 0.01%-0.0.05%-0.1% +static const double classifiers[24] = { + 0.111736, 0.289977, 0.042219, 0.204765, 0.120410, -0.143863, + 0.282376, 0.847811, 0.637161, 0.131570, 0.018636, 0.202134, + 0.112797, 0.028162, 0.182450, 1.124367, 0.386133, 0.083700, + 0.050028, 0.150873, 0.061119, 0.109318, 0.127255, 0.625211 +}; + // This is used as a reference when computing the source variance for the // purpose of activity masking. // Eventually this should be replaced by custom no-reference routines, @@ -130,7 +157,8 @@ unsigned int vp9_high_get_sby_perpixel_variance(VP9_COMP *cpi, CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8), 0, &sse); break; } - return ROUND64_POWER_OF_TWO((int64_t)var, num_pels_log2_lookup[bs]); + return (unsigned int)ROUND64_POWER_OF_TWO((int64_t)var, + num_pels_log2_lookup[bs]); } #endif // CONFIG_VP9_HIGHBITDEPTH @@ -508,7 +536,7 @@ static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q, threshold_base = (7 * threshold_base) >> 3; } #if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0 && + if (cpi->oxcf.noise_sensitivity > 0 && cpi->oxcf.speed > 5 && cpi->denoiser.denoising_level >= kDenLow) threshold_base = vp9_scale_part_thresh( threshold_base, cpi->denoiser.denoising_level, content_state); @@ -918,7 +946,9 @@ static void chroma_check(VP9_COMP *cpi, MACROBLOCK *x, int bsize, unsigned int y_sad, int is_key_frame) { int i; MACROBLOCKD *xd = &x->e_mbd; - if (is_key_frame) return; + // For speed >= 8, avoid the chroma check if y_sad is above threshold. + if (is_key_frame || (cpi->oxcf.speed >= 8 && y_sad > cpi->vbp_thresholds[1])) + return; for (i = 1; i <= 2; ++i) { unsigned int uv_sad = UINT_MAX; @@ -936,6 +966,46 @@ static void chroma_check(VP9_COMP *cpi, MACROBLOCK *x, int bsize, } } +static void avg_source_sad(VP9_COMP *cpi, MACROBLOCK *x, int shift, + int sb_offset) { + unsigned int tmp_sse; + uint64_t tmp_sad; + unsigned int tmp_variance; + const BLOCK_SIZE bsize = BLOCK_64X64; + uint8_t *src_y = cpi->Source->y_buffer; + int src_ystride = cpi->Source->y_stride; + uint8_t *last_src_y = cpi->Last_Source->y_buffer; + int last_src_ystride = cpi->Last_Source->y_stride; + uint64_t avg_source_sad_threshold = 10000; + uint64_t avg_source_sad_threshold2 = 12000; +#if CONFIG_VP9_HIGHBITDEPTH + if (cpi->common.use_highbitdepth) return; +#endif + src_y += shift; + last_src_y += shift; + tmp_sad = + cpi->fn_ptr[bsize].sdf(src_y, src_ystride, last_src_y, last_src_ystride); + tmp_variance = vpx_variance64x64(src_y, src_ystride, last_src_y, + last_src_ystride, &tmp_sse); + // Note: tmp_sse - tmp_variance = ((sum * sum) >> 12) + if (tmp_sad < avg_source_sad_threshold) + x->content_state_sb = ((tmp_sse - tmp_variance) < 25) ? kLowSadLowSumdiff + : kLowSadHighSumdiff; + else + x->content_state_sb = ((tmp_sse - tmp_variance) < 25) ? kHighSadLowSumdiff + : kHighSadHighSumdiff; + if (cpi->content_state_sb_fd != NULL) { + if (tmp_sad < avg_source_sad_threshold2) { + // Cap the increment to 255. + if (cpi->content_state_sb_fd[sb_offset] < 255) + cpi->content_state_sb_fd[sb_offset]++; + } else { + cpi->content_state_sb_fd[sb_offset] = 0; + } + } + return; +} + // This function chooses partitioning based on the variance between source and // reconstructed last, where variance is computed for down-sampled inputs. static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, @@ -951,6 +1021,8 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, int min_var_32x32 = INT_MAX; int var_32x32; int avg_16x16[4]; + int maxvar_16x16[4]; + int minvar_16x16[4]; int64_t threshold_4x4avg; NOISE_LEVEL noise_level = kLow; int content_state = 0; @@ -983,27 +1055,25 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, segment_id = xd->mi[0]->segment_id; if (cpi->sf.use_source_sad && !is_key_frame) { - // The sb_offset2 is to make it consistent with the index in the function - // vp9_avg_source_sad() in vp9_ratectrl.c. int sb_offset2 = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3); - content_state = cpi->content_state_sb[sb_offset2]; + content_state = x->content_state_sb; x->skip_low_source_sad = (content_state == kLowSadLowSumdiff || content_state == kLowSadHighSumdiff) ? 1 : 0; + if (cpi->content_state_sb_fd != NULL) + x->last_sb_high_content = cpi->content_state_sb_fd[sb_offset2]; // If source_sad is low copy the partition without computing the y_sad. if (x->skip_low_source_sad && cpi->sf.copy_partition_flag && copy_partitioning(cpi, x, mi_row, mi_col, segment_id, sb_offset)) { - chroma_check(cpi, x, bsize, y_sad, is_key_frame); return 0; } } - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) { - if (cyclic_refresh_segment_id_boosted(segment_id)) { - int q = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex); - set_vbp_thresholds(cpi, thresholds, q, content_state); - } + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled && + cyclic_refresh_segment_id_boosted(segment_id)) { + int q = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex); + set_vbp_thresholds(cpi, thresholds, q, content_state); } else { set_vbp_thresholds(cpi, thresholds, cm->base_qindex, content_state); } @@ -1072,7 +1142,13 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, mi->mv[0].as_int = 0; mi->interp_filter = BILINEAR; - y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col); + if (cpi->oxcf.speed >= 8 && !low_res) + y_sad = cpi->fn_ptr[bsize].sdf( + x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf, + xd->plane[0].pre[0].stride); + else + y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col); + y_sad_last = y_sad; // Pick ref frame for partitioning, bias last frame when y_sad_g and y_sad // are close if short_circuit_low_temp_var is on. @@ -1143,6 +1219,8 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, const int i2 = i << 2; force_split[i + 1] = 0; avg_16x16[i] = 0; + maxvar_16x16[i] = 0; + minvar_16x16[i] = INT_MAX; for (j = 0; j < 4; j++) { const int x16_idx = x32_idx + ((j & 1) << 4); const int y16_idx = y32_idx + ((j >> 1) << 4); @@ -1159,6 +1237,10 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16); get_variance(&vt.split[i].split[j].part_variances.none); avg_16x16[i] += vt.split[i].split[j].part_variances.none.variance; + if (vt.split[i].split[j].part_variances.none.variance < minvar_16x16[i]) + minvar_16x16[i] = vt.split[i].split[j].part_variances.none.variance; + if (vt.split[i].split[j].part_variances.none.variance > maxvar_16x16[i]) + maxvar_16x16[i] = vt.split[i].split[j].part_variances.none.variance; if (vt.split[i].split[j].part_variances.none.variance > thresholds[2]) { // 16X16 variance is above threshold for split, so force split to 8x8 // for this 16x16 block (this also forces splits for upper levels). @@ -1203,6 +1285,8 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, } } } + if (cpi->noise_estimate.enabled) + noise_level = vp9_noise_estimate_extract_level(&cpi->noise_estimate); // Fill the rest of the variance tree by summing split partition values. avg_32x32 = 0; for (i = 0; i < 4; i++) { @@ -1238,6 +1322,11 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, vt.split[i].part_variances.none.variance > (avg_16x16[i] >> 1))) { force_split[i + 1] = 1; force_split[0] = 1; + } else if (!is_key_frame && noise_level < kLow && cm->height <= 360 && + (maxvar_16x16[i] - minvar_16x16[i]) > (thresholds[1] >> 1) && + maxvar_16x16[i] > thresholds[1]) { + force_split[i + 1] = 1; + force_split[0] = 1; } avg_32x32 += var_32x32; } @@ -1245,8 +1334,6 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, if (!force_split[0]) { fill_variance_tree(&vt, BLOCK_64X64); get_variance(&vt.part_variances.none); - if (cpi->noise_estimate.enabled) - noise_level = vp9_noise_estimate_extract_level(&cpi->noise_estimate); // If variance of this 64x64 block is above (some threshold of) the average // variance over the sub-32x32 blocks, then force this block to split. // Only checking this for noise level >= medium for now. @@ -2685,6 +2772,74 @@ static INLINE int get_motion_inconsistency(MOTION_DIRECTION this_mv, } #endif +// Calculate the score used in machine-learning based partition search early +// termination. +static double compute_score(VP9_COMMON *const cm, MACROBLOCKD *const xd, + PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col, + BLOCK_SIZE bsize) { + const double *clf; + const double *mean; + const double *sd; + const int mag_mv = + abs(ctx->mic.mv[0].as_mv.col) + abs(ctx->mic.mv[0].as_mv.row); + const int left_in_image = !!xd->left_mi; + const int above_in_image = !!xd->above_mi; + MODE_INFO **prev_mi = + &cm->prev_mi_grid_visible[mi_col + cm->mi_stride * mi_row]; + int above_par = 0; // above_partitioning + int left_par = 0; // left_partitioning + int last_par = 0; // last_partitioning + BLOCK_SIZE context_size; + double score; + int offset = 0; + + assert(b_width_log2_lookup[bsize] == b_height_log2_lookup[bsize]); + + if (above_in_image) { + context_size = xd->above_mi->sb_type; + if (context_size < bsize) + above_par = 2; + else if (context_size == bsize) + above_par = 1; + } + + if (left_in_image) { + context_size = xd->left_mi->sb_type; + if (context_size < bsize) + left_par = 2; + else if (context_size == bsize) + left_par = 1; + } + + if (prev_mi) { + context_size = prev_mi[0]->sb_type; + if (context_size < bsize) + last_par = 2; + else if (context_size == bsize) + last_par = 1; + } + + if (bsize == BLOCK_64X64) + offset = 0; + else if (bsize == BLOCK_32X32) + offset = 8; + else if (bsize == BLOCK_16X16) + offset = 16; + + // early termination score calculation + clf = &classifiers[offset]; + mean = &train_mean[offset]; + sd = &train_stdm[offset]; + score = clf[0] * (((double)ctx->rate - mean[0]) / sd[0]) + + clf[1] * (((double)ctx->dist - mean[1]) / sd[1]) + + clf[2] * (((double)mag_mv / 2 - mean[2]) * sd[2]) + + clf[3] * (((double)(left_par + above_par) / 2 - mean[3]) * sd[3]) + + clf[4] * (((double)ctx->sum_y_eobs - mean[4]) / sd[4]) + + clf[5] * (((double)cm->base_qindex - mean[5]) * sd[5]) + + clf[6] * (((double)last_par - mean[6]) * sd[6]) + clf[7]; + return score; +} + // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are // unlikely to be selected depending on previous rate-distortion optimization // results, for encoding speed-up. @@ -2730,8 +2885,8 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, int partition_vert_allowed = !force_horz_split && xss <= yss && bsize >= BLOCK_8X8; - int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_dist_thr; - int rate_breakout_thr = cpi->sf.partition_search_breakout_rate_thr; + int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_thr.dist; + int rate_breakout_thr = cpi->sf.partition_search_breakout_thr.rate; (void)*tp_orig; @@ -2861,18 +3016,34 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, } if (this_rdc.rdcost < best_rdc.rdcost) { + MODE_INFO *mi = xd->mi[0]; + best_rdc = this_rdc; if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; - // If all y, u, v transform blocks in this partition are skippable, and - // the dist & rate are within the thresholds, the partition search is - // terminated for current branch of the partition search tree. - if (!x->e_mbd.lossless && ctx->skippable && - ((best_rdc.dist < (dist_breakout_thr >> 2)) || - (best_rdc.dist < dist_breakout_thr && - best_rdc.rate < rate_breakout_thr))) { - do_split = 0; - do_rect = 0; + if (!cpi->sf.ml_partition_search_early_termination) { + // If all y, u, v transform blocks in this partition are skippable, + // and the dist & rate are within the thresholds, the partition search + // is terminated for current branch of the partition search tree. + if (!x->e_mbd.lossless && ctx->skippable && + ((best_rdc.dist < (dist_breakout_thr >> 2)) || + (best_rdc.dist < dist_breakout_thr && + best_rdc.rate < rate_breakout_thr))) { + do_split = 0; + do_rect = 0; + } + } else { + // Currently, the machine-learning based partition search early + // termination is only used while bsize is 16x16, 32x32 or 64x64, + // VPXMIN(cm->width, cm->height) >= 480, and speed = 0. + if (!x->e_mbd.lossless && + !segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP) && + ctx->mic.mode >= INTRA_MODES && bsize >= BLOCK_16X16) { + if (compute_score(cm, xd, ctx, mi_row, mi_col, bsize) < 0.0) { + do_split = 0; + do_rect = 0; + } + } } #if CONFIG_FP_MB_STATS @@ -2985,7 +3156,8 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, pc_tree->partitioning = PARTITION_SPLIT; // Rate and distortion based partition search termination clause. - if (!x->e_mbd.lossless && ((best_rdc.dist < (dist_breakout_thr >> 2)) || + if (!cpi->sf.ml_partition_search_early_termination && + !x->e_mbd.lossless && ((best_rdc.dist < (dist_breakout_thr >> 2)) || (best_rdc.dist < dist_breakout_thr && best_rdc.rate < rate_breakout_thr))) { do_rect = 0; @@ -3151,7 +3323,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi, ThreadData *td, MODE_INFO **mi = cm->mi_grid_visible + idx_str; (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row, - sb_col_in_tile - 1); + sb_col_in_tile); if (sf->adaptive_pred_interp_filter) { for (i = 0; i < 64; ++i) td->leaf_tree[i].pred_interp_filter = SWITCHABLE; @@ -3486,8 +3658,8 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td, this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist); if (this_rdc.rdcost < best_rdc.rdcost) { - int64_t dist_breakout_thr = sf->partition_search_breakout_dist_thr; - int64_t rate_breakout_thr = sf->partition_search_breakout_rate_thr; + int64_t dist_breakout_thr = sf->partition_search_breakout_thr.dist; + int64_t rate_breakout_thr = sf->partition_search_breakout_thr.rate; dist_breakout_thr >>= 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]); @@ -3908,13 +4080,18 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td, const int mi_col_start = tile_info->mi_col_start; const int mi_col_end = tile_info->mi_col_end; int mi_col; + const int sb_row = mi_row >> MI_BLOCK_SIZE_LOG2; + const int num_sb_cols = + get_num_cols(tile_data->tile_info, MI_BLOCK_SIZE_LOG2); + int sb_col_in_tile; // Initialize the left context for the new SB row memset(&xd->left_context, 0, sizeof(xd->left_context)); memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context)); // Code each SB in the row - for (mi_col = mi_col_start; mi_col < mi_col_end; mi_col += MI_BLOCK_SIZE) { + for (mi_col = mi_col_start, sb_col_in_tile = 0; mi_col < mi_col_end; + mi_col += MI_BLOCK_SIZE, ++sb_col_in_tile) { const struct segmentation *const seg = &cm->seg; RD_COST dummy_rdc; const int idx_str = cm->mi_stride * mi_row + mi_col; @@ -3922,6 +4099,10 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td, PARTITION_SEARCH_TYPE partition_search_type = sf->partition_search_type; BLOCK_SIZE bsize = BLOCK_64X64; int seg_skip = 0; + + (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row, + sb_col_in_tile); + x->source_variance = UINT_MAX; vp9_zero(x->pred_mv); vp9_rd_cost_init(&dummy_rdc); @@ -3929,6 +4110,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td, x->color_sensitivity[1] = 0; x->sb_is_skin = 0; x->skip_low_source_sad = 0; + x->content_state_sb = 0; if (seg->enabled) { const uint8_t *const map = @@ -3940,6 +4122,12 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td, } } + if (cpi->compute_source_sad_onepass && cpi->sf.use_source_sad) { + int shift = cpi->Source->y_stride * (mi_row << 3) + (mi_col << 3); + int sb_offset2 = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3); + avg_source_sad(cpi, x, shift, sb_offset2); + } + // Set the partition type of the 64X64 block switch (partition_search_type) { case VAR_BASED_PARTITION: @@ -3964,14 +4152,11 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td, break; case REFERENCE_PARTITION: set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); - // Use nonrd_pick_partition on scene-cut for VBR, or on qp-segment - // if cyclic_refresh is enabled. + // Use nonrd_pick_partition on scene-cut for VBR mode. // nonrd_pick_partition does not support 4x4 partition, so avoid it // on key frame for now. if ((cpi->oxcf.rc_mode == VPX_VBR && cpi->rc.high_source_sad && - cm->frame_type != KEY_FRAME) || - (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled && - xd->mi[0]->segment_id)) { + cm->frame_type != KEY_FRAME)) { // Use lower max_partition_size for low resoultions. if (cm->width <= 352 && cm->height <= 288) x->max_partition_size = BLOCK_32X32; @@ -3997,6 +4182,9 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td, break; default: assert(0); break; } + + (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row, + sb_col_in_tile, num_sb_cols); } } // end RTC play code @@ -4148,13 +4336,14 @@ void vp9_init_tile_data(VP9_COMP *cpi) { int i, j; for (i = 0; i < BLOCK_SIZES; ++i) { for (j = 0; j < MAX_MODES; ++j) { - tile_data->thresh_freq_fact[i][j] = 32; + tile_data->thresh_freq_fact[i][j] = RD_THRESH_INIT_FACT; tile_data->mode_map[i][j] = j; } } #if CONFIG_MULTITHREAD tile_data->search_count_mutex = NULL; tile_data->enc_row_mt_mutex = NULL; + tile_data->row_base_thresh_freq_fact = NULL; #endif } } @@ -4341,7 +4530,7 @@ static void encode_frame_internal(VP9_COMP *cpi) { } #endif - if (!cpi->new_mt) { + if (!cpi->row_mt) { cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read_dummy; cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write_dummy; // If allowed, encoding tiles in parallel with one thread handling one diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemb.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemb.c index 75a682481f8..0940d9a6153 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemb.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemb.c @@ -80,6 +80,18 @@ static int trellis_get_coeff_context(const int16_t *scan, const int16_t *nb, return pt; } +static const int16_t band_count_table[TX_SIZES][8] = { + { 1, 2, 3, 4, 3, 16 - 13, 0 }, + { 1, 2, 3, 4, 11, 64 - 21, 0 }, + { 1, 2, 3, 4, 11, 256 - 21, 0 }, + { 1, 2, 3, 4, 11, 1024 - 21, 0 }, +}; +static const int16_t band_cum_count_table[TX_SIZES][8] = { + { 0, 1, 3, 6, 10, 13, 16, 0 }, + { 0, 1, 3, 6, 10, 21, 64, 0 }, + { 0, 1, 3, 6, 10, 21, 256, 0 }, + { 0, 1, 3, 6, 10, 21, 1024, 0 }, +}; int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, int ctx) { MACROBLOCKD *const xd = &mb->e_mbd; @@ -108,15 +120,20 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, int rate0, rate1; int64_t error0, error1; int16_t t0, t1; - EXTRABIT e0; - unsigned int(*const token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = - mb->token_costs[tx_size][type][ref]; - int best, band, pt, i, final_eob; + int best, band = (eob < default_eob) ? band_translate[eob] + : band_translate[eob - 1]; + int pt, i, final_eob; #if CONFIG_VP9_HIGHBITDEPTH - const int *cat6_high_cost = vp9_get_high_cost_table(xd->bd); + const uint16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd); #else - const int *cat6_high_cost = vp9_get_high_cost_table(8); + const uint16_t *cat6_high_cost = vp9_get_high_cost_table(8); #endif + unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = + mb->token_costs[tx_size][type][ref]; + const int16_t *band_counts = &band_count_table[tx_size][band]; + int16_t band_left = eob - band_cum_count_table[tx_size][band] + 1; + + token_costs += band; assert((!type && !plane) || (type && plane)); assert(eob <= default_eob); @@ -130,8 +147,10 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, tokens[eob][0].qc = 0; tokens[eob][1] = tokens[eob][0]; - for (i = 0; i < eob; i++) - token_cache[scan[i]] = vp9_pt_energy_class[vp9_get_token(qcoeff[scan[i]])]; + for (i = 0; i < eob; i++) { + const int rc = scan[i]; + token_cache[rc] = vp9_pt_energy_class[vp9_get_token(qcoeff[rc])]; + } for (i = eob; i-- > 0;) { int base_bits, d2, dx; @@ -144,18 +163,16 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, /* Evaluate the first possibility for this state. */ rate0 = tokens[next][0].rate; rate1 = tokens[next][1].rate; - vp9_get_token_extra(x, &t0, &e0); + base_bits = vp9_get_token_cost(x, &t0, cat6_high_cost); /* Consider both possible successor states. */ if (next < default_eob) { - band = band_translate[i + 1]; pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache); - rate0 += token_costs[band][0][pt][tokens[next][0].token]; - rate1 += token_costs[band][0][pt][tokens[next][1].token]; + rate0 += (*token_costs)[0][pt][tokens[next][0].token]; + rate1 += (*token_costs)[0][pt][tokens[next][1].token]; } UPDATE_RD_COST(); /* And pick the best. */ best = rd_cost1 < rd_cost0; - base_bits = vp9_get_cost(t0, e0, cat6_high_cost); dx = (dqcoeff[rc] - coeff[rc]) * (1 << shift); #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { @@ -183,6 +200,12 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, } else { tokens[i][1] = tokens[i][0]; next = i; + + if (!(--band_left)) { + --band_counts; + band_left = *band_counts; + --token_costs; + } continue; } @@ -193,27 +216,25 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, */ t0 = tokens[next][0].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN; t1 = tokens[next][1].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN; - e0 = 0; + base_bits = 0; } else { - vp9_get_token_extra(x, &t0, &e0); + base_bits = vp9_get_token_cost(x, &t0, cat6_high_cost); t1 = t0; } if (next < default_eob) { - band = band_translate[i + 1]; if (t0 != EOB_TOKEN) { pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache); - rate0 += token_costs[band][!x][pt][tokens[next][0].token]; + rate0 += (*token_costs)[!x][pt][tokens[next][0].token]; } if (t1 != EOB_TOKEN) { pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache); - rate1 += token_costs[band][!x][pt][tokens[next][1].token]; + rate1 += (*token_costs)[!x][pt][tokens[next][1].token]; } } UPDATE_RD_COST(); /* And pick the best. */ best = rd_cost1 < rd_cost0; - base_bits = vp9_get_cost(t0, e0, cat6_high_cost); #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { @@ -255,34 +276,38 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, /* There's no choice to make for a zero coefficient, so we don't * add a new trellis node, but we do need to update the costs. */ - band = band_translate[i + 1]; pt = get_coef_context(nb, token_cache, i + 1); t0 = tokens[next][0].token; t1 = tokens[next][1].token; /* Update the cost of each path if we're past the EOB token. */ if (t0 != EOB_TOKEN) { - tokens[next][0].rate += token_costs[band][1][pt][t0]; + tokens[next][0].rate += (*token_costs)[1][pt][t0]; tokens[next][0].token = ZERO_TOKEN; } if (t1 != EOB_TOKEN) { - tokens[next][1].rate += token_costs[band][1][pt][t1]; + tokens[next][1].rate += (*token_costs)[1][pt][t1]; tokens[next][1].token = ZERO_TOKEN; } tokens[i][0].best_index = tokens[i][1].best_index = 0; /* Don't update next, because we didn't add a new node. */ } + + if (!(--band_left)) { + --band_counts; + band_left = *band_counts; + --token_costs; + } } /* Now pick the best path through the whole trellis. */ - band = band_translate[i + 1]; rate0 = tokens[next][0].rate; rate1 = tokens[next][1].rate; error0 = tokens[next][0].error; error1 = tokens[next][1].error; t0 = tokens[next][0].token; t1 = tokens[next][1].token; - rate0 += token_costs[band][0][ctx][t0]; - rate1 += token_costs[band][0][ctx][t1]; + rate0 += (*token_costs)[0][ctx][t0]; + rate1 += (*token_costs)[0][ctx][t1]; UPDATE_RD_COST(); best = rd_cost1 < rd_cost0; final_eob = -1; diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c index 5d64b0a10ae..1dc70d2d361 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c @@ -463,8 +463,8 @@ static void dealloc_compressor_data(VP9_COMP *cpi) { vpx_free(cpi->copied_frame_cnt); cpi->copied_frame_cnt = NULL; - vpx_free(cpi->content_state_sb); - cpi->content_state_sb = NULL; + vpx_free(cpi->content_state_sb_fd); + cpi->content_state_sb_fd = NULL; vp9_cyclic_refresh_free(cpi->cyclic_refresh); cpi->cyclic_refresh = NULL; @@ -1445,6 +1445,33 @@ static void realloc_segmentation_maps(VP9_COMP *cpi) { vpx_calloc(cm->mi_rows * cm->mi_cols, 1)); } +static void alloc_copy_partition_data(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + if (cpi->prev_partition == NULL) { + CHECK_MEM_ERROR(cm, cpi->prev_partition, + (BLOCK_SIZE *)vpx_calloc(cm->mi_stride * cm->mi_rows, + sizeof(*cpi->prev_partition))); + } + if (cpi->prev_segment_id == NULL) { + CHECK_MEM_ERROR( + cm, cpi->prev_segment_id, + (int8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1), + sizeof(*cpi->prev_segment_id))); + } + if (cpi->prev_variance_low == NULL) { + CHECK_MEM_ERROR(cm, cpi->prev_variance_low, + (uint8_t *)vpx_calloc( + (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1) * 25, + sizeof(*cpi->prev_variance_low))); + } + if (cpi->copied_frame_cnt == NULL) { + CHECK_MEM_ERROR( + cm, cpi->copied_frame_cnt, + (uint8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1), + sizeof(*cpi->copied_frame_cnt))); + } +} + void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; @@ -1575,7 +1602,7 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { highbd_set_var_fns(cpi); #endif - vp9_set_new_mt(cpi); + vp9_set_row_mt(cpi); } #ifndef M_LOG2_E @@ -1673,7 +1700,7 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, (FRAME_CONTEXT *)vpx_calloc(FRAME_CONTEXTS, sizeof(*cm->frame_contexts))); cpi->use_svc = 0; - cpi->resize_state = 0; + cpi->resize_state = ORIG; cpi->external_resize = 0; cpi->resize_avg_qp = 0; cpi->resize_buffer_underflow = 0; @@ -2408,44 +2435,6 @@ static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src, } #endif // CONFIG_VP9_HIGHBITDEPTH -void vp9_scale_and_extend_frame_c(const YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst) { - const int src_w = src->y_crop_width; - const int src_h = src->y_crop_height; - const int dst_w = dst->y_crop_width; - const int dst_h = dst->y_crop_height; - const uint8_t *const srcs[3] = { src->y_buffer, src->u_buffer, - src->v_buffer }; - const int src_strides[3] = { src->y_stride, src->uv_stride, src->uv_stride }; - uint8_t *const dsts[3] = { dst->y_buffer, dst->u_buffer, dst->v_buffer }; - const int dst_strides[3] = { dst->y_stride, dst->uv_stride, dst->uv_stride }; - const InterpKernel *const kernel = vp9_filter_kernels[EIGHTTAP]; - int x, y, i; - - for (i = 0; i < MAX_MB_PLANE; ++i) { - const int factor = (i == 0 || i == 3 ? 1 : 2); - const int src_stride = src_strides[i]; - const int dst_stride = dst_strides[i]; - for (y = 0; y < dst_h; y += 16) { - const int y_q4 = y * (16 / factor) * src_h / dst_h; - for (x = 0; x < dst_w; x += 16) { - const int x_q4 = x * (16 / factor) * src_w / dst_w; - const uint8_t *src_ptr = srcs[i] + - (y / factor) * src_h / dst_h * src_stride + - (x / factor) * src_w / dst_w; - uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor); - - vpx_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride, - kernel[x_q4 & 0xf], 16 * src_w / dst_w, - kernel[y_q4 & 0xf], 16 * src_h / dst_h, 16 / factor, - 16 / factor); - } - } - } - - vpx_extend_frame_borders(dst); -} - static int scale_down(VP9_COMP *cpi, int q) { RATE_CONTROL *const rc = &cpi->rc; GF_GROUP *const gf_group = &cpi->twopass.gf_group; @@ -2593,10 +2582,18 @@ void vp9_update_reference_frames(VP9_COMP *cpi) { #if CONFIG_VP9_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) && cpi->denoiser.denoising_level > kDenLowLow) { + int svc_base_is_key = 0; + if (cpi->use_svc) { + int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id, + cpi->svc.temporal_layer_id, + cpi->svc.number_temporal_layers); + LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer]; + svc_base_is_key = lc->is_key_frame; + } vp9_denoiser_update_frame_info( &cpi->denoiser, *cpi->Source, cpi->common.frame_type, cpi->refresh_alt_ref_frame, cpi->refresh_golden_frame, - cpi->refresh_last_frame, cpi->resize_pending); + cpi->refresh_last_frame, cpi->resize_pending, svc_base_is_key); } #endif if (is_one_pass_cbr_svc(cpi)) { @@ -3121,6 +3118,11 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, uint8_t *dest) { VP9_COMMON *const cm = &cpi->common; int q = 0, bottom_index = 0, top_index = 0; // Dummy variables. + // Flag to check if its valid to compute the source sad (used for + // scene detection and for superblock content state in CBR mode). + // The flag may get reset below based on SVC or resizing state. + cpi->compute_source_sad_onepass = + cpi->oxcf.mode == REALTIME && cpi->oxcf.speed >= 5 && cm->show_frame; vpx_clear_system_state(); @@ -3165,8 +3167,20 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, #endif } + if ((cpi->use_svc && + (cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1 || + cpi->svc.current_superframe < 1)) || + cpi->resize_pending || cpi->resize_state || cpi->external_resize || + cpi->resize_state != ORIG) { + cpi->compute_source_sad_onepass = 0; + if (cpi->content_state_sb_fd != NULL) + memset(cpi->content_state_sb_fd, 0, + (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1) * + sizeof(*cpi->content_state_sb_fd)); + } + // Avoid scaling last_source unless its needed. - // Last source is needed if vp9_avg_source_sad() is used, or if + // Last source is needed if avg_source_sad() is used, or if // partition_search_type == SOURCE_VAR_BASED_PARTITION, or if noise // estimation is enabled. if (cpi->unscaled_last_source != NULL && @@ -3174,11 +3188,17 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.mode == REALTIME && cpi->oxcf.speed >= 5) || cpi->sf.partition_search_type == SOURCE_VAR_BASED_PARTITION || - cpi->noise_estimate.enabled || cpi->sf.use_source_sad)) + (cpi->noise_estimate.enabled && !cpi->oxcf.noise_sensitivity) || + cpi->compute_source_sad_onepass)) cpi->Last_Source = vp9_scale_if_required(cm, cpi->unscaled_last_source, &cpi->scaled_last_source, (cpi->oxcf.pass == 0)); + if (cpi->Last_Source == NULL || + cpi->Last_Source->y_width != cpi->Source->y_width || + cpi->Last_Source->y_height != cpi->Source->y_height) + cpi->compute_source_sad_onepass = 0; + if (cm->frame_type == KEY_FRAME || cpi->resize_pending != 0) { memset(cpi->consec_zero_mv, 0, cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv)); @@ -3186,12 +3206,13 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, vp9_update_noise_estimate(cpi); - if (cpi->oxcf.pass == 0 && cpi->oxcf.mode == REALTIME && - cpi->oxcf.speed >= 5 && cpi->resize_state == 0 && - (cpi->oxcf.content == VP9E_CONTENT_SCREEN || - cpi->oxcf.rc_mode == VPX_VBR || cpi->sf.use_source_sad) && - cm->show_frame) - vp9_avg_source_sad(cpi); + // Scene detection is used for VBR mode or screen-content case. + // Make sure compute_source_sad_onepass is set (which handles SVC case + // and dynamic resize). + if (cpi->compute_source_sad_onepass && + (cpi->oxcf.rc_mode == VPX_VBR || + cpi->oxcf.content == VP9E_CONTENT_SCREEN)) + vp9_scene_detection_onepass(cpi); // For 1 pass SVC, since only ZEROMV is allowed for upsampled reference // frame (i.e, svc->force_zero_mode_spatial_ref = 0), we can avoid this @@ -3203,6 +3224,8 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, set_size_independent_vars(cpi); set_size_dependent_vars(cpi, &q, &bottom_index, &top_index); + if (cpi->sf.copy_partition_flag) alloc_copy_partition_data(cpi); + if (cpi->oxcf.speed >= 5 && cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR && cpi->oxcf.content != VP9E_CONTENT_SCREEN && @@ -3240,7 +3263,7 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, // Check if we should drop this frame because of high overshoot. // Only for frames where high temporal-source SAD is detected. if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR && - cpi->resize_state == 0 && cm->frame_type != KEY_FRAME && + cpi->resize_state == ORIG && cm->frame_type != KEY_FRAME && cpi->oxcf.content == VP9E_CONTENT_SCREEN && cpi->rc.high_source_sad == 1) { int frame_size = 0; @@ -3267,13 +3290,10 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, } } - // Update some stats from cyclic refresh, and check if we should not update - // golden reference, for non-SVC 1 pass CBR. - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->frame_type != KEY_FRAME && - !cpi->use_svc && cpi->ext_refresh_frame_flags_pending == 0 && - (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR && - !cpi->oxcf.gf_cbr_boost_pct)) - vp9_cyclic_refresh_check_golden_update(cpi); + // Update some stats from cyclic refresh, and check for golden frame update. + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled && + cm->frame_type != KEY_FRAME) + vp9_cyclic_refresh_postencode(cpi); // Update the skip mb flag probabilities based on the distribution // seen in the last encoder iteration. @@ -4058,7 +4078,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size, #if CONFIG_VP9_TEMPORAL_DENOISING #ifdef OUTPUT_YUV_DENOISED - if (oxcf->noise_sensitivity > 0) { + if (oxcf->noise_sensitivity > 0 && denoise_svc(cpi)) { vp9_write_yuv_frame_420(&cpi->denoiser.running_avg_y[INTRA_FRAME], yuv_denoised_file); } @@ -5223,16 +5243,22 @@ void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags) { } } -void vp9_set_new_mt(VP9_COMP *cpi) { +void vp9_set_row_mt(VP9_COMP *cpi) { // Enable row based multi-threading for supported modes of encoding - cpi->new_mt = 0; + cpi->row_mt = 0; if (((cpi->oxcf.mode == GOOD || cpi->oxcf.mode == BEST) && cpi->oxcf.speed < 5 && cpi->oxcf.pass == 1) && - cpi->oxcf.new_mt && !cpi->use_svc) - cpi->new_mt = 1; + cpi->oxcf.row_mt && !cpi->use_svc) + cpi->row_mt = 1; if (cpi->oxcf.mode == GOOD && cpi->oxcf.speed < 5 && - (cpi->oxcf.pass == 0 || cpi->oxcf.pass == 2) && cpi->oxcf.new_mt && + (cpi->oxcf.pass == 0 || cpi->oxcf.pass == 2) && cpi->oxcf.row_mt && !cpi->use_svc) - cpi->new_mt = 1; + cpi->row_mt = 1; + + // In realtime mode, enable row based multi-threading for all the speed levels + // where non-rd path is used. + if (cpi->oxcf.mode == REALTIME && cpi->oxcf.speed >= 5 && cpi->oxcf.row_mt) { + cpi->row_mt = 1; + } } diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h index 2797ff769a0..6c1cb6073e8 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h @@ -267,8 +267,9 @@ typedef struct VP9EncoderConfig { int render_height; VP9E_TEMPORAL_LAYERING_MODE temporal_layering_mode; - int new_mt; - unsigned int ethread_bit_match; + int row_mt; + unsigned int row_mt_bit_exact; + unsigned int motion_vector_unit_test; } VP9EncoderConfig; static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) { @@ -284,6 +285,9 @@ typedef struct TileDataEnc { int ex_search_count; FIRSTPASS_DATA fp_data; VP9RowMTSync row_mt_sync; + + // Used for adaptive_rd_thresh with row multithreading + int *row_base_thresh_freq_fact; #if CONFIG_MULTITHREAD pthread_mutex_t *search_count_mutex; pthread_mutex_t *enc_row_mt_mutex; @@ -651,7 +655,7 @@ typedef struct VP9_COMP { #endif int resize_pending; - int resize_state; + RESIZE_STATE resize_state; int external_resize; int resize_scale_num; int resize_scale_den; @@ -691,7 +695,7 @@ typedef struct VP9_COMP { void (*row_mt_sync_read_ptr)(VP9RowMTSync *const, int, int); void (*row_mt_sync_write_ptr)(VP9RowMTSync *const, int, int, const int); ARNRFilterData arnr_filter_data; - int new_mt; + int row_mt; // Previous Partition Info BLOCK_SIZE *prev_partition; @@ -705,7 +709,11 @@ typedef struct VP9_COMP { uint8_t *copied_frame_cnt; uint8_t max_copied_frame; - uint8_t *content_state_sb; + // For each superblock: keeps track of the last time (in frame distance) the + // the superblock did not have low source sad. + uint8_t *content_state_sb_fd; + + int compute_source_sad_onepass; LevelConstraint level_constraint; } VP9_COMP; @@ -909,7 +917,7 @@ VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec); void vp9_new_framerate(VP9_COMP *cpi, double framerate); -void vp9_set_new_mt(VP9_COMP *cpi); +void vp9_set_row_mt(VP9_COMP *cpi); #define LAYER_IDS_TO_IDX(sl, tl, num_tl) ((sl) * (num_tl) + (tl)) diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ethread.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ethread.c index 7b7e0fd6e77..681e960c8df 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ethread.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ethread.c @@ -77,8 +77,9 @@ static void create_enc_workers(VP9_COMP *cpi, int num_workers) { int allocated_workers = num_workers; // While using SVC, we need to allocate threads according to the highest - // resolution. - if (cpi->use_svc) { + // resolution. When row based multithreading is enabled, it is OK to + // allocate more threads than the number of max tile columns. + if (cpi->use_svc && !cpi->row_mt) { int max_tile_cols = get_max_tile_cols(cpi); allocated_workers = VPXMIN(cpi->oxcf.max_threads, max_tile_cols); } @@ -319,7 +320,7 @@ void vp9_row_mt_sync_read(VP9RowMTSync *const row_mt_sync, int r, int c) { pthread_mutex_t *const mutex = &row_mt_sync->mutex_[r - 1]; pthread_mutex_lock(mutex); - while (c > row_mt_sync->cur_col[r - 1] - nsync) { + while (c > row_mt_sync->cur_col[r - 1] - nsync + 1) { pthread_cond_wait(&row_mt_sync->cond_[r - 1], mutex); } pthread_mutex_unlock(mutex); @@ -348,7 +349,7 @@ void vp9_row_mt_sync_write(VP9RowMTSync *const row_mt_sync, int r, int c, if (c < cols - 1) { cur = c; - if (c % nsync) sig = 0; + if (c % nsync != nsync - 1) sig = 0; } else { cur = cols + nsync; } @@ -615,7 +616,6 @@ void vp9_encode_tiles_row_mt(VP9_COMP *cpi) { for (i = 0; i < num_workers; i++) { EncWorkerData *thread_data; thread_data = &cpi->tile_thr_data[i]; - // Before encoding a frame, copy the thread data from cpi. if (thread_data->td != &cpi->td) { thread_data->td->mb = cpi->td.mb; @@ -625,6 +625,23 @@ void vp9_encode_tiles_row_mt(VP9_COMP *cpi) { memcpy(thread_data->td->counts, &cpi->common.counts, sizeof(cpi->common.counts)); } + + // Handle use_nonrd_pick_mode case. + if (cpi->sf.use_nonrd_pick_mode) { + MACROBLOCK *const x = &thread_data->td->mb; + MACROBLOCKD *const xd = &x->e_mbd; + struct macroblock_plane *const p = x->plane; + struct macroblockd_plane *const pd = xd->plane; + PICK_MODE_CONTEXT *ctx = &thread_data->td->pc_root->none; + int j; + + for (j = 0; j < MAX_MB_PLANE; ++j) { + p[j].coeff = ctx->coeff_pbuf[j][0]; + p[j].qcoeff = ctx->qcoeff_pbuf[j][0]; + pd[j].dqcoeff = ctx->dqcoeff_pbuf[j][0]; + p[j].eobs = ctx->eobs_pbuf[j][0]; + } + } } launch_enc_workers(cpi, (VPxWorkerHook)enc_row_mt_worker_hook, diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c index 280e3d6a614..222e27a9f26 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c @@ -884,7 +884,7 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, const int mb_index = mb_row * cm->mb_cols + mb_col; #endif - (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, mb_row, c - 1); + (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, mb_row, c); // Adjust to the next column of MBs. x->plane[0].src.buf = cpi->Source->y_buffer + @@ -979,12 +979,12 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, if (log_intra < 10.0) { mb_intra_factor = 1.0 + ((10.0 - log_intra) * 0.05); fp_acc_data->intra_factor += mb_intra_factor; - if (cpi->oxcf.ethread_bit_match) + if (cpi->oxcf.row_mt_bit_exact) cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_intra_factor = mb_intra_factor; } else { fp_acc_data->intra_factor += 1.0; - if (cpi->oxcf.ethread_bit_match) + if (cpi->oxcf.row_mt_bit_exact) cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_intra_factor = 1.0; } @@ -999,12 +999,12 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, if ((level_sample < DARK_THRESH) && (log_intra < 9.0)) { mb_brightness_factor = 1.0 + (0.01 * (DARK_THRESH - level_sample)); fp_acc_data->brightness_factor += mb_brightness_factor; - if (cpi->oxcf.ethread_bit_match) + if (cpi->oxcf.row_mt_bit_exact) cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_brightness_factor = mb_brightness_factor; } else { fp_acc_data->brightness_factor += 1.0; - if (cpi->oxcf.ethread_bit_match) + if (cpi->oxcf.row_mt_bit_exact) cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_brightness_factor = 1.0; } @@ -1166,7 +1166,7 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, if (((this_error - intrapenalty) * 9 <= motion_error * 10) && (this_error < (2 * intrapenalty))) { fp_acc_data->neutral_count += 1.0; - if (cpi->oxcf.ethread_bit_match) + if (cpi->oxcf.row_mt_bit_exact) cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_neutral_count = 1.0; // Also track cases where the intra is not much worse than the inter @@ -1176,7 +1176,7 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, mb_neutral_count = (double)motion_error / DOUBLE_DIVIDE_CHECK((double)this_error); fp_acc_data->neutral_count += mb_neutral_count; - if (cpi->oxcf.ethread_bit_match) + if (cpi->oxcf.row_mt_bit_exact) cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_neutral_count = mb_neutral_count; } @@ -1297,7 +1297,7 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, recon_uvoffset += uv_mb_height; // Accumulate row level stats to the corresponding tile stats - if (cpi->new_mt && mb_col == (tile.mi_col_end >> 1) - 1) + if (cpi->row_mt && mb_col == (tile.mi_col_end >> 1) - 1) accumulate_fp_mb_row_stat(tile_data, fp_acc_data); (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, mb_row, c, @@ -1424,7 +1424,7 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { cm->log2_tile_rows = 0; - if (cpi->oxcf.ethread_bit_match && cpi->twopass.fp_mb_float_stats == NULL) + if (cpi->oxcf.row_mt_bit_exact && cpi->twopass.fp_mb_float_stats == NULL) CHECK_MEM_ERROR( cm, cpi->twopass.fp_mb_float_stats, vpx_calloc(cm->MBs * sizeof(*cpi->twopass.fp_mb_float_stats), 1)); @@ -1432,7 +1432,7 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { { FIRSTPASS_STATS fps; TileDataEnc *first_tile_col; - if (!cpi->new_mt) { + if (!cpi->row_mt) { cm->log2_tile_cols = 0; cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read_dummy; cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write_dummy; @@ -1441,13 +1441,13 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { } else { cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read; cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write; - if (cpi->oxcf.ethread_bit_match) { + if (cpi->oxcf.row_mt_bit_exact) { cm->log2_tile_cols = 0; vp9_zero_array(cpi->twopass.fp_mb_float_stats, cm->MBs); } vp9_encode_fp_row_mt(cpi); first_tile_col = &cpi->tile_data[0]; - if (cpi->oxcf.ethread_bit_match) + if (cpi->oxcf.row_mt_bit_exact) accumulate_floating_point_stats(cpi, first_tile_col); first_pass_stat_calc(cpi, &fps, &(first_tile_col->fp_data)); } @@ -1546,7 +1546,6 @@ static int get_twopass_worst_quality(VP9_COMP *cpi, const double section_err, const RATE_CONTROL *const rc = &cpi->rc; const VP9EncoderConfig *const oxcf = &cpi->oxcf; TWO_PASS *const twopass = &cpi->twopass; - double last_group_rate_err; // Clamp the target rate to VBR min / max limts. const int target_rate = @@ -1555,14 +1554,6 @@ static int get_twopass_worst_quality(VP9_COMP *cpi, const double section_err, noise_factor = fclamp(noise_factor, NOISE_FACTOR_MIN, NOISE_FACTOR_MAX); inactive_zone = fclamp(inactive_zone, 0.0, 1.0); - // based on recent history adjust expectations of bits per macroblock. - last_group_rate_err = - (double)twopass->rolling_arf_group_actual_bits / - DOUBLE_DIVIDE_CHECK((double)twopass->rolling_arf_group_target_bits); - last_group_rate_err = VPXMAX(0.25, VPXMIN(4.0, last_group_rate_err)); - twopass->bpm_factor *= (3.0 + last_group_rate_err) / 4.0; - twopass->bpm_factor = VPXMAX(0.25, VPXMIN(4.0, twopass->bpm_factor)); - if (target_rate <= 0) { return rc->worst_quality; // Highest value allowed } else { @@ -1572,6 +1563,7 @@ static int get_twopass_worst_quality(VP9_COMP *cpi, const double section_err, const int active_mbs = VPXMAX(1, num_mbs - (int)(num_mbs * inactive_zone)); const double av_err_per_mb = section_err / active_mbs; const double speed_term = 1.0 + 0.04 * oxcf->speed; + double last_group_rate_err; const int target_norm_bits_per_mb = (int)(((uint64_t)target_rate << BPER_MB_NORMBITS) / active_mbs); int q; @@ -1580,6 +1572,14 @@ static int get_twopass_worst_quality(VP9_COMP *cpi, const double section_err, if (is_two_pass_svc(cpi) && cpi->svc.spatial_layer_id > 0) is_svc_upper_layer = 1; + // based on recent history adjust expectations of bits per macroblock. + last_group_rate_err = + (double)twopass->rolling_arf_group_actual_bits / + DOUBLE_DIVIDE_CHECK((double)twopass->rolling_arf_group_target_bits); + last_group_rate_err = VPXMAX(0.25, VPXMIN(4.0, last_group_rate_err)); + twopass->bpm_factor *= (3.0 + last_group_rate_err) / 4.0; + twopass->bpm_factor = VPXMAX(0.25, VPXMIN(4.0, twopass->bpm_factor)); + // Try and pick a max Q that will be high enough to encode the // content at the given rate. for (q = rc->best_quality; q < rc->worst_quality; ++q) { @@ -2548,10 +2548,6 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { group_av_noise, vbr_group_bits_per_frame); twopass->active_worst_quality = (tmp_q + (twopass->active_worst_quality * 3)) >> 2; - - // Reset rolling actual and target bits counters for ARF groups. - twopass->rolling_arf_group_target_bits = 0; - twopass->rolling_arf_group_actual_bits = 0; } // Context Adjustment of ARNR filter strength @@ -2586,6 +2582,10 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Default to starting GF groups at normal frame size. cpi->rc.next_frame_size_selector = UNSCALED; } + + // Reset rolling actual and target bits counters for ARF groups. + twopass->rolling_arf_group_target_bits = 0; + twopass->rolling_arf_group_actual_bits = 0; } // Threshold for use of the lagging second reference frame. High second ref diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_frame_scale.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_frame_scale.c new file mode 100644 index 00000000000..349e7bd41d8 --- /dev/null +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_frame_scale.c @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2017 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vp9_rtcd.h" +#include "./vpx_dsp_rtcd.h" +#include "./vpx_scale_rtcd.h" +#include "vp9/common/vp9_blockd.h" +#include "vpx_dsp/vpx_filter.h" +#include "vpx_scale/yv12config.h" + +void vp9_scale_and_extend_frame_c(const YV12_BUFFER_CONFIG *src, + YV12_BUFFER_CONFIG *dst) { + const int src_w = src->y_crop_width; + const int src_h = src->y_crop_height; + const int dst_w = dst->y_crop_width; + const int dst_h = dst->y_crop_height; + const uint8_t *const srcs[3] = { src->y_buffer, src->u_buffer, + src->v_buffer }; + const int src_strides[3] = { src->y_stride, src->uv_stride, src->uv_stride }; + uint8_t *const dsts[3] = { dst->y_buffer, dst->u_buffer, dst->v_buffer }; + const int dst_strides[3] = { dst->y_stride, dst->uv_stride, dst->uv_stride }; + const InterpKernel *const kernel = vp9_filter_kernels[EIGHTTAP]; + int x, y, i; + + for (i = 0; i < MAX_MB_PLANE; ++i) { + const int factor = (i == 0 || i == 3 ? 1 : 2); + const int src_stride = src_strides[i]; + const int dst_stride = dst_strides[i]; + for (y = 0; y < dst_h; y += 16) { + const int y_q4 = y * (16 / factor) * src_h / dst_h; + for (x = 0; x < dst_w; x += 16) { + const int x_q4 = x * (16 / factor) * src_w / dst_w; + const uint8_t *src_ptr = srcs[i] + + (y / factor) * src_h / dst_h * src_stride + + (x / factor) * src_w / dst_w; + uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor); + + vpx_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride, + kernel[x_q4 & 0xf], 16 * src_w / dst_w, + kernel[y_q4 & 0xf], 16 * src_h / dst_h, 16 / factor, + 16 / factor); + } + } + } + + vpx_extend_frame_borders(dst); +} diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mbgraph.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mbgraph.c index e9379f41832..46d626def17 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mbgraph.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mbgraph.c @@ -49,6 +49,9 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, const MV *ref_mv, cond_cost_list(cpi, cost_list), ref_mv, dst_mv, 0, 0); mv_sf->search_method = old_search_method; + /* restore UMV window */ + x->mv_limits = tmp_mv_limits; + // Try sub-pixel MC // if (bestsme > error_thresh && bestsme < INT_MAX) { @@ -66,9 +69,6 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, const MV *ref_mv, vp9_build_inter_predictors_sby(xd, mb_row, mb_col, BLOCK_16X16); - /* restore UMV window */ - x->mv_limits = tmp_mv_limits; - return vpx_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].dst.buf, xd->plane[0].dst.stride); } diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c index 16426b28ebc..a3939a5f85d 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c @@ -21,6 +21,7 @@ #include "vpx_ports/mem.h" #include "vp9/common/vp9_common.h" +#include "vp9/common/vp9_mvref_common.h" #include "vp9/common/vp9_reconinter.h" #include "vp9/encoder/vp9_encoder.h" @@ -52,6 +53,24 @@ void vp9_set_mv_search_range(MvLimits *mv_limits, const MV *mv) { if (mv_limits->row_max > row_max) mv_limits->row_max = row_max; } +void vp9_set_subpel_mv_search_range(MvLimits *subpel_mv_limits, + const MvLimits *umv_window_limits, + const MV *ref_mv) { + subpel_mv_limits->col_min = VPXMAX(umv_window_limits->col_min * 8, + ref_mv->col - MAX_FULL_PEL_VAL * 8); + subpel_mv_limits->col_max = VPXMIN(umv_window_limits->col_max * 8, + ref_mv->col + MAX_FULL_PEL_VAL * 8); + subpel_mv_limits->row_min = VPXMAX(umv_window_limits->row_min * 8, + ref_mv->row - MAX_FULL_PEL_VAL * 8); + subpel_mv_limits->row_max = VPXMIN(umv_window_limits->row_max * 8, + ref_mv->row + MAX_FULL_PEL_VAL * 8); + + subpel_mv_limits->col_min = VPXMAX(MV_LOW + 1, subpel_mv_limits->col_min); + subpel_mv_limits->col_max = VPXMIN(MV_UPP - 1, subpel_mv_limits->col_max); + subpel_mv_limits->row_min = VPXMAX(MV_LOW + 1, subpel_mv_limits->row_min); + subpel_mv_limits->row_max = VPXMIN(MV_UPP - 1, subpel_mv_limits->row_max); +} + int vp9_init_search_range(int size) { int sr = 0; // Minimum search size no matter what the passed in value. @@ -82,10 +101,8 @@ static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost, int *mvcost[2], int error_per_bit) { if (mvcost) { const MV diff = { mv->row - ref->row, mv->col - ref->col }; - // This product sits at a 32-bit ceiling right now and any additional - // accuracy in either bit cost or error cost will cause it to overflow. - return ROUND_POWER_OF_TWO( - (unsigned)mv_cost(&diff, mvjcost, mvcost) * error_per_bit, + return (int)ROUND64_POWER_OF_TWO( + (int64_t)mv_cost(&diff, mvjcost, mvcost) * error_per_bit, RDDIV_BITS + VP9_PROB_COST_SHIFT - RD_EPB_SHIFT + PIXEL_TRANSFORM_ERROR_SCALE); } @@ -267,34 +284,38 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) { } \ } -#define SETUP_SUBPEL_SEARCH \ - const uint8_t *const z = x->plane[0].src.buf; \ - const int src_stride = x->plane[0].src.stride; \ - const MACROBLOCKD *xd = &x->e_mbd; \ - unsigned int besterr = UINT_MAX; \ - unsigned int sse; \ - unsigned int whichdir; \ - int thismse; \ - const unsigned int halfiters = iters_per_step; \ - const unsigned int quarteriters = iters_per_step; \ - const unsigned int eighthiters = iters_per_step; \ - const int y_stride = xd->plane[0].pre[0].stride; \ - const int offset = bestmv->row * y_stride + bestmv->col; \ - const uint8_t *const y = xd->plane[0].pre[0].buf; \ - \ - int rr = ref_mv->row; \ - int rc = ref_mv->col; \ - int br = bestmv->row * 8; \ - int bc = bestmv->col * 8; \ - int hstep = 4; \ - const int minc = VPXMAX(x->mv_limits.col_min * 8, ref_mv->col - MV_MAX); \ - const int maxc = VPXMIN(x->mv_limits.col_max * 8, ref_mv->col + MV_MAX); \ - const int minr = VPXMAX(x->mv_limits.row_min * 8, ref_mv->row - MV_MAX); \ - const int maxr = VPXMIN(x->mv_limits.row_max * 8, ref_mv->row + MV_MAX); \ - int tr = br; \ - int tc = bc; \ - \ - bestmv->row *= 8; \ +#define SETUP_SUBPEL_SEARCH \ + const uint8_t *const z = x->plane[0].src.buf; \ + const int src_stride = x->plane[0].src.stride; \ + const MACROBLOCKD *xd = &x->e_mbd; \ + unsigned int besterr = UINT_MAX; \ + unsigned int sse; \ + unsigned int whichdir; \ + int thismse; \ + const unsigned int halfiters = iters_per_step; \ + const unsigned int quarteriters = iters_per_step; \ + const unsigned int eighthiters = iters_per_step; \ + const int y_stride = xd->plane[0].pre[0].stride; \ + const int offset = bestmv->row * y_stride + bestmv->col; \ + const uint8_t *const y = xd->plane[0].pre[0].buf; \ + \ + int rr = ref_mv->row; \ + int rc = ref_mv->col; \ + int br = bestmv->row * 8; \ + int bc = bestmv->col * 8; \ + int hstep = 4; \ + int minc, maxc, minr, maxr; \ + int tr = br; \ + int tc = bc; \ + MvLimits subpel_mv_limits; \ + \ + vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv); \ + minc = subpel_mv_limits.col_min; \ + maxc = subpel_mv_limits.col_max; \ + minr = subpel_mv_limits.row_min; \ + maxr = subpel_mv_limits.row_max; \ + \ + bestmv->row *= 8; \ bestmv->col *= 8; static unsigned int setup_center_error( @@ -395,10 +416,6 @@ uint32_t vp9_skip_sub_pixel_tree(const MACROBLOCK *x, MV *bestmv, (void)thismse; (void)cost_list; - if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || - (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) - return UINT_MAX; - return besterr; } @@ -464,10 +481,6 @@ uint32_t vp9_find_best_sub_pixel_tree_pruned_evenmore( bestmv->row = br; bestmv->col = bc; - if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || - (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) - return UINT_MAX; - return besterr; } @@ -528,10 +541,6 @@ uint32_t vp9_find_best_sub_pixel_tree_pruned_more( bestmv->row = br; bestmv->col = bc; - if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || - (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) - return UINT_MAX; - return besterr; } @@ -614,10 +623,6 @@ uint32_t vp9_find_best_sub_pixel_tree_pruned( bestmv->row = br; bestmv->col = bc; - if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || - (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) - return UINT_MAX; - return besterr; } @@ -653,16 +658,21 @@ uint32_t vp9_find_best_sub_pixel_tree( int bc = bestmv->col * 8; int hstep = 4; int iter, round = 3 - forced_stop; - const int minc = VPXMAX(x->mv_limits.col_min * 8, ref_mv->col - MV_MAX); - const int maxc = VPXMIN(x->mv_limits.col_max * 8, ref_mv->col + MV_MAX); - const int minr = VPXMAX(x->mv_limits.row_min * 8, ref_mv->row - MV_MAX); - const int maxr = VPXMIN(x->mv_limits.row_max * 8, ref_mv->row + MV_MAX); + + int minc, maxc, minr, maxr; int tr = br; int tc = bc; const MV *search_step = search_step_table; int idx, best_idx = -1; unsigned int cost_array[5]; int kr, kc; + MvLimits subpel_mv_limits; + + vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv); + minc = subpel_mv_limits.col_min; + maxc = subpel_mv_limits.col_max; + minr = subpel_mv_limits.row_min; + maxr = subpel_mv_limits.row_max; if (!(allow_hp && use_mv_hp(ref_mv))) if (round == 3) round = 2; @@ -763,10 +773,6 @@ uint32_t vp9_find_best_sub_pixel_tree( bestmv->row = br; bestmv->col = bc; - if ((abs(bestmv->col - ref_mv->col) > (MAX_FULL_PEL_VAL << 3)) || - (abs(bestmv->row - ref_mv->row) > (MAX_FULL_PEL_VAL << 3))) - return UINT_MAX; - return besterr; } @@ -2470,3 +2476,85 @@ int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, return var; } + +// Note(yunqingwang): The following 2 functions are only used in the motion +// vector unit test, which return extreme motion vectors allowed by the MV +// limits. +#define COMMON_MV_TEST \ + SETUP_SUBPEL_SEARCH; \ + \ + (void)error_per_bit; \ + (void)vfp; \ + (void)z; \ + (void)src_stride; \ + (void)y; \ + (void)y_stride; \ + (void)second_pred; \ + (void)w; \ + (void)h; \ + (void)offset; \ + (void)mvjcost; \ + (void)mvcost; \ + (void)sse1; \ + (void)distortion; \ + \ + (void)halfiters; \ + (void)quarteriters; \ + (void)eighthiters; \ + (void)whichdir; \ + (void)allow_hp; \ + (void)forced_stop; \ + (void)hstep; \ + (void)rr; \ + (void)rc; \ + \ + (void)tr; \ + (void)tc; \ + (void)sse; \ + (void)thismse; \ + (void)cost_list; + +// Return the maximum MV. +uint32_t vp9_return_max_sub_pixel_mv( + const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, + int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, + int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], + uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, + int h) { + COMMON_MV_TEST; + + (void)minr; + (void)minc; + + bestmv->row = maxr; + bestmv->col = maxc; + besterr = 0; + + // In the sub-pel motion search, if hp is not used, then the last bit of mv + // has to be 0. + lower_mv_precision(bestmv, allow_hp && use_mv_hp(ref_mv)); + + return besterr; +} +// Return the minimum MV. +uint32_t vp9_return_min_sub_pixel_mv( + const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, + int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, + int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], + uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, + int h) { + COMMON_MV_TEST; + + (void)maxr; + (void)maxc; + + bestmv->row = minr; + bestmv->col = minc; + besterr = 0; + + // In the sub-pel motion search, if hp is not used, then the last bit of mv + // has to be 0. + lower_mv_precision(bestmv, allow_hp && use_mv_hp(ref_mv)); + + return besterr; +} diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h index d17b8e9bb9b..b8db2c35368 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h @@ -81,6 +81,8 @@ extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned; extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned_more; extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned_evenmore; extern fractional_mv_step_fp vp9_skip_sub_pixel_tree; +extern fractional_mv_step_fp vp9_return_max_sub_pixel_mv; +extern fractional_mv_step_fp vp9_return_min_sub_pixel_mv; typedef int (*vp9_full_search_fn_t)(const MACROBLOCK *x, const MV *ref_mv, int sad_per_bit, int distance, @@ -109,6 +111,10 @@ int vp9_full_pixel_search(struct VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int error_per_bit, int *cost_list, const MV *ref_mv, MV *tmp_mv, int var_max, int rd); +void vp9_set_subpel_mv_search_range(MvLimits *subpel_mv_limits, + const MvLimits *umv_window_limits, + const MV *ref_mv); + #ifdef __cplusplus } // extern "C" #endif diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_multi_thread.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_multi_thread.c index e27b1ed3a53..f5d8e430c8a 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_multi_thread.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_multi_thread.c @@ -82,6 +82,16 @@ void vp9_row_mt_mem_alloc(VP9_COMP *cpi) { for (tile_col = 0; tile_col < tile_cols; tile_col++) { TileDataEnc *this_tile = &cpi->tile_data[tile_col]; vp9_row_mt_sync_mem_alloc(&this_tile->row_mt_sync, cm, jobs_per_tile_col); + if (cpi->sf.adaptive_rd_thresh_row_mt) { + const int sb_rows = + (mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2) + 1; + int i; + this_tile->row_base_thresh_freq_fact = + (int *)vpx_calloc(sb_rows * BLOCK_SIZES * MAX_MODES, + sizeof(*(this_tile->row_base_thresh_freq_fact))); + for (i = 0; i < sb_rows * BLOCK_SIZES * MAX_MODES; i++) + this_tile->row_base_thresh_freq_fact[i] = RD_THRESH_INIT_FACT; + } } // Assign the sync pointer of tile row zero for every tile row > 0 @@ -154,10 +164,15 @@ void vp9_row_mt_mem_dealloc(VP9_COMP *cpi) { TileDataEnc *this_tile = &cpi->tile_data[tile_row * multi_thread_ctxt->allocated_tile_cols + tile_col]; + if (cpi->sf.adaptive_rd_thresh_row_mt) { + if (this_tile->row_base_thresh_freq_fact != NULL) { + vpx_free(this_tile->row_base_thresh_freq_fact); + this_tile->row_base_thresh_freq_fact = NULL; + } + } pthread_mutex_destroy(this_tile->search_count_mutex); vpx_free(this_tile->search_count_mutex); this_tile->search_count_mutex = NULL; - pthread_mutex_destroy(this_tile->enc_row_mt_mutex); vpx_free(this_tile->enc_row_mt_mutex); this_tile->enc_row_mt_mutex = NULL; diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_noise_estimate.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_noise_estimate.c index a32e5cac585..fc2e32448e8 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_noise_estimate.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_noise_estimate.c @@ -131,8 +131,11 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) { // Force noise estimation to 0 and denoiser off if content has high motion. ne->level = kLowLow; #if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi)) + if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) && + cpi->svc.current_superframe > 1) { vp9_denoiser_set_noise_level(&cpi->denoiser, ne->level); + copy_frame(&cpi->denoiser.last_source, cpi->Source); + } #endif return; } else { diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c index 08f3f3801e6..db2bbe7c272 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c @@ -354,7 +354,7 @@ static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize, *sse_y = sse; #if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0) + if (cpi->oxcf.noise_sensitivity > 0 && cpi->oxcf.speed > 5) ac_thr = vp9_scale_acskip_thresh(ac_thr, cpi->denoiser.denoising_level, (abs(sum) >> (bw + bh))); else @@ -652,7 +652,7 @@ static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc, #endif if (cpi->sf.use_simple_block_yrd && cpi->common.frame_type != KEY_FRAME && - !cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id)) { + bsize < BLOCK_32X32) { unsigned int var_y, sse_y; (void)tx_size; model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc->rate, &this_rdc->dist, @@ -1016,6 +1016,32 @@ static int mode_offset(const PREDICTION_MODE mode) { } } +static INLINE int rd_less_than_thresh_row_mt(int64_t best_rd, int thresh, + const int *const thresh_fact) { + int is_rd_less_than_thresh; + is_rd_less_than_thresh = + best_rd < ((int64_t)thresh * (*thresh_fact) >> 5) || thresh == INT_MAX; + return is_rd_less_than_thresh; +} + +static INLINE void update_thresh_freq_fact_row_mt( + VP9_COMP *cpi, TileDataEnc *tile_data, int source_variance, + int thresh_freq_fact_idx, MV_REFERENCE_FRAME ref_frame, + THR_MODES best_mode_idx, PREDICTION_MODE mode) { + THR_MODES thr_mode_idx = mode_idx[ref_frame][mode_offset(mode)]; + int freq_fact_idx = thresh_freq_fact_idx + thr_mode_idx; + int *freq_fact = &tile_data->row_base_thresh_freq_fact[freq_fact_idx]; + if (thr_mode_idx == best_mode_idx) + *freq_fact -= (*freq_fact >> 4); + else if (cpi->sf.limit_newmv_early_exit && mode == NEWMV && + ref_frame == LAST_FRAME && source_variance < 5) { + *freq_fact = VPXMIN(*freq_fact + RD_THRESH_INC, 32); + } else { + *freq_fact = VPXMIN(*freq_fact + RD_THRESH_INC, + cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT); + } +} + static INLINE void update_thresh_freq_fact( VP9_COMP *cpi, TileDataEnc *tile_data, int source_variance, BLOCK_SIZE bsize, MV_REFERENCE_FRAME ref_frame, THR_MODES best_mode_idx, @@ -1398,7 +1424,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, int64_t inter_mode_thresh = RDCOST(x->rdmult, x->rddiv, intra_cost_penalty, 0); const int *const rd_threshes = cpi->rd.threshes[mi->segment_id][bsize]; - const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize]; + const int sb_row = mi_row >> MI_BLOCK_SIZE_LOG2; + int thresh_freq_fact_idx = (sb_row * BLOCK_SIZES + bsize) * MAX_MODES; + const int *const rd_thresh_freq_fact = + (cpi->sf.adaptive_rd_thresh_row_mt) + ? &(tile_data->row_base_thresh_freq_fact[thresh_freq_fact_idx]) + : tile_data->thresh_freq_fact[bsize]; + INTERP_FILTER filter_ref; const int bsl = mi_width_log2_lookup[bsize]; const int pred_filter_search = @@ -1436,6 +1468,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, #if CONFIG_VP9_TEMPORAL_DENOISING VP9_PICKMODE_CTX_DEN ctx_den; int64_t zero_last_cost_orig = INT64_MAX; + int denoise_svc_pickmode = 1; #endif init_ref_frame_cost(cm, xd, ref_frame_cost); @@ -1495,9 +1528,16 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, } #if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) && - cpi->denoiser.denoising_level > kDenLowLow) { - vp9_denoiser_reset_frame_stats(ctx); + if (cpi->oxcf.noise_sensitivity > 0) { + if (cpi->use_svc) { + int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id, + cpi->svc.temporal_layer_id, + cpi->svc.number_temporal_layers); + LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer]; + denoise_svc_pickmode = denoise_svc(cpi) && !lc->is_key_frame; + } + if (cpi->denoiser.denoising_level > kDenLowLow && denoise_svc_pickmode) + vp9_denoiser_reset_frame_stats(ctx); } #endif @@ -1547,6 +1587,11 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, !svc_force_zero_mode[GOLDEN_FRAME - 1] && !force_skip_low_temp_var)) use_golden_nonzeromv = 0; + if (cpi->oxcf.speed >= 8 && !cpi->use_svc && + ((cpi->rc.frames_since_golden + 1) < x->last_sb_high_content || + x->last_sb_high_content > 40)) + usable_ref_frame = LAST_FRAME; + for (ref_frame = LAST_FRAME; ref_frame <= usable_ref_frame; ++ref_frame) { if (!skip_ref_find_pred[ref_frame]) { find_predictors(cpi, x, ref_frame, frame_mv, const_motion, @@ -1666,11 +1711,19 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, cpi->rc.frames_since_golden > 4) mode_rd_thresh = mode_rd_thresh << 3; - if (rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh, + if ((cpi->sf.adaptive_rd_thresh_row_mt && + rd_less_than_thresh_row_mt(best_rdc.rdcost, mode_rd_thresh, + &rd_thresh_freq_fact[mode_index])) || + (!cpi->sf.adaptive_rd_thresh_row_mt && + rd_less_than_thresh( + best_rdc.rdcost, mode_rd_thresh, #if CONFIG_MULTITHREAD - tile_data->enc_row_mt_mutex, + // Synchronization of this function + // is only necessary when + // adaptive_rd_thresh is > 0. + cpi->sf.adaptive_rd_thresh ? tile_data->enc_row_mt_mutex : NULL, #endif - &rd_thresh_freq_fact[mode_index])) + &rd_thresh_freq_fact[mode_index]))) continue; if (this_mode == NEWMV) { @@ -1920,7 +1973,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, } #if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) && + if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc_pickmode && cpi->denoiser.denoising_level > kDenLowLow) { vp9_denoiser_update_frame_stats(mi, sse_y, this_mode, ctx); // Keep track of zero_last cost. @@ -1982,11 +2035,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, perform_intra_pred = 0; // Perform intra prediction search, if the best SAD is above a certain // threshold. - if ((!force_skip_low_temp_var || bsize < BLOCK_32X32) && perform_intra_pred && - (best_rdc.rdcost == INT64_MAX || - (!x->skip && best_rdc.rdcost > inter_mode_thresh && - bsize <= cpi->sf.max_intra_bsize)) && - !x->skip_low_source_sad) { + if (best_rdc.rdcost == INT64_MAX || + ((!force_skip_low_temp_var || bsize < BLOCK_32X32) && + perform_intra_pred && !x->skip && best_rdc.rdcost > inter_mode_thresh && + bsize <= cpi->sf.max_intra_bsize && !x->skip_low_source_sad)) { struct estimate_block_intra_args args = { cpi, x, DC_PRED, 1, 0 }; int i; TX_SIZE best_intra_tx_size = TX_SIZES; @@ -2030,11 +2082,19 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, if (!((1 << this_mode) & cpi->sf.intra_y_mode_bsize_mask[bsize])) continue; - if (rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh, + if ((cpi->sf.adaptive_rd_thresh_row_mt && + rd_less_than_thresh_row_mt(best_rdc.rdcost, mode_rd_thresh, + &rd_thresh_freq_fact[mode_index])) || + (!cpi->sf.adaptive_rd_thresh_row_mt && + rd_less_than_thresh( + best_rdc.rdcost, mode_rd_thresh, #if CONFIG_MULTITHREAD - tile_data->enc_row_mt_mutex, + // Synchronization of this function + // is only necessary when + // adaptive_rd_thresh is > 0. + cpi->sf.adaptive_rd_thresh ? tile_data->enc_row_mt_mutex : NULL, #endif - &rd_thresh_freq_fact[mode_index])) + &rd_thresh_freq_fact[mode_index]))) continue; mi->mode = this_mode; @@ -2117,7 +2177,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, #if CONFIG_VP9_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0 && cpi->resize_pending == 0 && - denoise_svc(cpi) && cpi->denoiser.denoising_level > kDenLowLow && + denoise_svc_pickmode && cpi->denoiser.denoising_level > kDenLowLow && cpi->denoiser.reset == 0) { VP9_DENOISER_DECISION decision = COPY_BLOCK; vp9_pickmode_ctx_den_update(&ctx_den, zero_last_cost_orig, ref_frame_cost, @@ -2142,16 +2202,27 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, // TODO(yunqingwang): Check intra mode mask and only update freq_fact // for those valid modes. for (i = 0; i < intra_modes; i++) { - update_thresh_freq_fact(cpi, tile_data, x->source_variance, bsize, - INTRA_FRAME, best_mode_idx, intra_mode_list[i]); + if (cpi->sf.adaptive_rd_thresh_row_mt) + update_thresh_freq_fact_row_mt(cpi, tile_data, x->source_variance, + thresh_freq_fact_idx, INTRA_FRAME, + best_mode_idx, intra_mode_list[i]); + else + update_thresh_freq_fact(cpi, tile_data, x->source_variance, bsize, + INTRA_FRAME, best_mode_idx, + intra_mode_list[i]); } } else { for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ++ref_frame) { PREDICTION_MODE this_mode; if (best_ref_frame != ref_frame) continue; for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { - update_thresh_freq_fact(cpi, tile_data, x->source_variance, bsize, - ref_frame, best_mode_idx, this_mode); + if (cpi->sf.adaptive_rd_thresh_row_mt) + update_thresh_freq_fact_row_mt(cpi, tile_data, x->source_variance, + thresh_freq_fact_idx, ref_frame, + best_mode_idx, this_mode); + else + update_thresh_freq_fact(cpi, tile_data, x->source_variance, bsize, + ref_frame, best_mode_idx, this_mode); } } } @@ -2298,7 +2369,7 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int mi_row, } vp9_set_mv_search_range(&x->mv_limits, - &mbmi_ext->ref_mvs[0]->as_mv); + &mbmi_ext->ref_mvs[ref_frame][0].as_mv); vp9_full_pixel_search( cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c index 6932c0ccdfc..f79b7c6fc27 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c @@ -688,6 +688,17 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) { ? VPXMIN(rc->avg_frame_qindex[INTER_FRAME], rc->avg_frame_qindex[KEY_FRAME]) : rc->avg_frame_qindex[INTER_FRAME]; + // For SVC if the current base spatial layer was key frame, use the QP from + // that base layer for ambient_qp. + if (cpi->use_svc && cpi->svc.spatial_layer_id > 0) { + int layer = LAYER_IDS_TO_IDX(0, cpi->svc.temporal_layer_id, + cpi->svc.number_temporal_layers); + const LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer]; + if (lc->is_key_frame) { + const RATE_CONTROL *lrc = &lc->rc; + ambient_qp = VPXMIN(ambient_qp, lrc->last_q[KEY_FRAME]); + } + } active_worst_quality = VPXMIN(rc->worst_quality, ambient_qp * 5 >> 2); if (rc->buffer_level > rc->optimal_buffer_level) { // Adjust down. @@ -1352,10 +1363,6 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { RATE_CONTROL *const rc = &cpi->rc; const int qindex = cm->base_qindex; - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) { - vp9_cyclic_refresh_postencode(cpi); - } - // Update rate control heuristics rc->projected_frame_size = (int)(bytes_used << 3); @@ -2202,7 +2209,7 @@ void adjust_gf_boost_lag_one_pass_vbr(VP9_COMP *cpi, uint64_t avg_sad_current) { // in content and allow rate control to react. // This function also handles special case of lag_in_frames, to measure content // level in #future frames set by the lag_in_frames. -void vp9_avg_source_sad(VP9_COMP *cpi) { +void vp9_scene_detection_onepass(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; #if CONFIG_VP9_HIGHBITDEPTH @@ -2273,7 +2280,6 @@ void vp9_avg_source_sad(VP9_COMP *cpi) { int num_samples = 0; int sb_cols = (cm->mi_cols + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE; int sb_rows = (cm->mi_rows + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE; - uint64_t avg_source_sad_threshold = 10000; if (cpi->oxcf.lag_in_frames > 0) { src_y = frames[frame]->y_buffer; src_ystride = frames[frame]->y_stride; @@ -2283,28 +2289,12 @@ void vp9_avg_source_sad(VP9_COMP *cpi) { for (sbi_row = 0; sbi_row < sb_rows; ++sbi_row) { for (sbi_col = 0; sbi_col < sb_cols; ++sbi_col) { // Checker-board pattern, ignore boundary. - // If the use_source_sad is on, compute for every superblock. - if (cpi->sf.use_source_sad || - ((sbi_row > 0 && sbi_col > 0) && + if (((sbi_row > 0 && sbi_col > 0) && (sbi_row < sb_rows - 1 && sbi_col < sb_cols - 1) && ((sbi_row % 2 == 0 && sbi_col % 2 == 0) || (sbi_row % 2 != 0 && sbi_col % 2 != 0)))) { tmp_sad = cpi->fn_ptr[bsize].sdf(src_y, src_ystride, last_src_y, last_src_ystride); - if (cpi->sf.use_source_sad) { - unsigned int tmp_sse; - unsigned int tmp_variance = vpx_variance64x64( - src_y, src_ystride, last_src_y, last_src_ystride, &tmp_sse); - // Note: tmp_sse - tmp_variance = ((sum * sum) >> 12) - if (tmp_sad < avg_source_sad_threshold) - cpi->content_state_sb[num_samples] = - ((tmp_sse - tmp_variance) < 25) ? kLowSadLowSumdiff - : kLowSadHighSumdiff; - else - cpi->content_state_sb[num_samples] = - ((tmp_sse - tmp_variance) < 25) ? kHighSadLowSumdiff - : kHighSadHighSumdiff; - } avg_sad += tmp_sad; num_samples++; } diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.h index 32353d38e46..9e46231955f 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.h @@ -283,7 +283,7 @@ void vp9_set_target_rate(struct VP9_COMP *cpi); int vp9_resize_one_pass_cbr(struct VP9_COMP *cpi); -void vp9_avg_source_sad(struct VP9_COMP *cpi); +void vp9_scene_detection_onepass(struct VP9_COMP *cpi); int vp9_encodedframe_overshoot(struct VP9_COMP *cpi, int frame_size, int *q); diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.c index 21e3b1f6308..3c49fe665d4 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.c @@ -312,63 +312,62 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) { } } -static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) { - // NOTE: The tables below must be of the same size. - - // The functions described below are sampled at the four most significant - // bits of x^2 + 8 / 256. - - // Normalized rate: - // This table models the rate for a Laplacian source with given variance - // when quantized with a uniform quantizer with given stepsize. The - // closed form expression is: - // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)], - // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance), - // and H(x) is the binary entropy function. - static const int rate_tab_q10[] = { - 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142, - 4044, 3958, 3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186, - 3133, 3037, 2952, 2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353, - 2290, 2232, 2179, 2130, 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651, - 1608, 1530, 1460, 1398, 1342, 1290, 1243, 1199, 1159, 1086, 1021, 963, - 911, 864, 821, 781, 745, 680, 623, 574, 530, 490, 455, 424, - 395, 345, 304, 269, 239, 213, 190, 171, 154, 126, 104, 87, - 73, 61, 52, 44, 38, 28, 21, 16, 12, 10, 8, 6, - 5, 3, 2, 1, 1, 1, 0, 0, - }; +// NOTE: The tables below must be of the same size. + +// The functions described below are sampled at the four most significant +// bits of x^2 + 8 / 256. + +// Normalized rate: +// This table models the rate for a Laplacian source with given variance +// when quantized with a uniform quantizer with given stepsize. The +// closed form expression is: +// Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)], +// where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance), +// and H(x) is the binary entropy function. +static const int rate_tab_q10[] = { + 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142, 4044, + 3958, 3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186, 3133, 3037, + 2952, 2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353, 2290, 2232, 2179, + 2130, 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651, 1608, 1530, 1460, 1398, + 1342, 1290, 1243, 1199, 1159, 1086, 1021, 963, 911, 864, 821, 781, 745, + 680, 623, 574, 530, 490, 455, 424, 395, 345, 304, 269, 239, 213, + 190, 171, 154, 126, 104, 87, 73, 61, 52, 44, 38, 28, 21, + 16, 12, 10, 8, 6, 5, 3, 2, 1, 1, 1, 0, 0, +}; - // Normalized distortion: - // This table models the normalized distortion for a Laplacian source - // with given variance when quantized with a uniform quantizer - // with given stepsize. The closed form expression is: - // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2)) - // where x = qpstep / sqrt(variance). - // Note the actual distortion is Dn * variance. - static const int dist_tab_q10[] = { - 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5, - 5, 6, 7, 7, 8, 9, 11, 12, 13, 15, 16, 17, - 18, 21, 24, 26, 29, 31, 34, 36, 39, 44, 49, 54, - 59, 64, 69, 73, 78, 88, 97, 106, 115, 124, 133, 142, - 151, 167, 184, 200, 215, 231, 245, 260, 274, 301, 327, 351, - 375, 397, 418, 439, 458, 495, 528, 559, 587, 613, 637, 659, - 680, 717, 749, 777, 801, 823, 842, 859, 874, 899, 919, 936, - 949, 960, 969, 977, 983, 994, 1001, 1006, 1010, 1013, 1015, 1017, - 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024, - }; - static const int xsq_iq_q10[] = { - 0, 4, 8, 12, 16, 20, 24, 28, 32, - 40, 48, 56, 64, 72, 80, 88, 96, 112, - 128, 144, 160, 176, 192, 208, 224, 256, 288, - 320, 352, 384, 416, 448, 480, 544, 608, 672, - 736, 800, 864, 928, 992, 1120, 1248, 1376, 1504, - 1632, 1760, 1888, 2016, 2272, 2528, 2784, 3040, 3296, - 3552, 3808, 4064, 4576, 5088, 5600, 6112, 6624, 7136, - 7648, 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328, - 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688, 32736, - 36832, 40928, 45024, 49120, 53216, 57312, 61408, 65504, 73696, - 81888, 90080, 98272, 106464, 114656, 122848, 131040, 147424, 163808, - 180192, 196576, 212960, 229344, 245728, - }; +// Normalized distortion: +// This table models the normalized distortion for a Laplacian source +// with given variance when quantized with a uniform quantizer +// with given stepsize. The closed form expression is: +// Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2)) +// where x = qpstep / sqrt(variance). +// Note the actual distortion is Dn * variance. +static const int dist_tab_q10[] = { + 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5, 5, + 6, 7, 7, 8, 9, 11, 12, 13, 15, 16, 17, 18, 21, + 24, 26, 29, 31, 34, 36, 39, 44, 49, 54, 59, 64, 69, + 73, 78, 88, 97, 106, 115, 124, 133, 142, 151, 167, 184, 200, + 215, 231, 245, 260, 274, 301, 327, 351, 375, 397, 418, 439, 458, + 495, 528, 559, 587, 613, 637, 659, 680, 717, 749, 777, 801, 823, + 842, 859, 874, 899, 919, 936, 949, 960, 969, 977, 983, 994, 1001, + 1006, 1010, 1013, 1015, 1017, 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024, +}; +static const int xsq_iq_q10[] = { + 0, 4, 8, 12, 16, 20, 24, 28, 32, + 40, 48, 56, 64, 72, 80, 88, 96, 112, + 128, 144, 160, 176, 192, 208, 224, 256, 288, + 320, 352, 384, 416, 448, 480, 544, 608, 672, + 736, 800, 864, 928, 992, 1120, 1248, 1376, 1504, + 1632, 1760, 1888, 2016, 2272, 2528, 2784, 3040, 3296, + 3552, 3808, 4064, 4576, 5088, 5600, 6112, 6624, 7136, + 7648, 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328, + 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688, 32736, + 36832, 40928, 45024, 49120, 53216, 57312, 61408, 65504, 73696, + 81888, 90080, 98272, 106464, 114656, 122848, 131040, 147424, 163808, + 180192, 196576, 212960, 229344, 245728, +}; + +static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) { const int tmp = (xsq_q10 >> 2) + 8; const int k = get_msb(tmp) - 3; const int xq = (k << 3) + ((tmp >> k) & 0x7); @@ -379,6 +378,24 @@ static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) { *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10; } +static void model_rd_norm_vec(int xsq_q10[MAX_MB_PLANE], + int r_q10[MAX_MB_PLANE], + int d_q10[MAX_MB_PLANE]) { + int i; + const int one_q10 = 1 << 10; + for (i = 0; i < MAX_MB_PLANE; ++i) { + const int tmp = (xsq_q10[i] >> 2) + 8; + const int k = get_msb(tmp) - 3; + const int xq = (k << 3) + ((tmp >> k) & 0x7); + const int a_q10 = ((xsq_q10[i] - xsq_iq_q10[xq]) << 10) >> (2 + k); + const int b_q10 = one_q10 - a_q10; + r_q10[i] = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10; + d_q10[i] = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10; + } +} + +static const uint32_t MAX_XSQ_Q10 = 245727; + void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2, unsigned int qstep, int *rate, int64_t *dist) { @@ -393,7 +410,6 @@ void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2, *dist = 0; } else { int d_q10, r_q10; - static const uint32_t MAX_XSQ_Q10 = 245727; const uint64_t xsq_q10_64 = (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var; const int xsq_q10 = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10); @@ -403,6 +419,30 @@ void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2, } } +// Implements a fixed length vector form of vp9_model_rd_from_var_lapndz where +// vectors are of length MAX_MB_PLANE and all elements of var are non-zero. +void vp9_model_rd_from_var_lapndz_vec(unsigned int var[MAX_MB_PLANE], + unsigned int n_log2[MAX_MB_PLANE], + unsigned int qstep[MAX_MB_PLANE], + int64_t *rate_sum, int64_t *dist_sum) { + int i; + int xsq_q10[MAX_MB_PLANE], d_q10[MAX_MB_PLANE], r_q10[MAX_MB_PLANE]; + for (i = 0; i < MAX_MB_PLANE; ++i) { + const uint64_t xsq_q10_64 = + (((uint64_t)qstep[i] * qstep[i] << (n_log2[i] + 10)) + (var[i] >> 1)) / + var[i]; + xsq_q10[i] = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10); + } + model_rd_norm_vec(xsq_q10, r_q10, d_q10); + for (i = 0; i < MAX_MB_PLANE; ++i) { + int rate = + ROUND_POWER_OF_TWO(r_q10[i] << n_log2[i], 10 - VP9_PROB_COST_SHIFT); + int64_t dist = (var[i] * (int64_t)d_q10[i] + 512) >> 10; + *rate_sum += rate; + *dist_sum += dist; + } +} + void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size, const struct macroblockd_plane *pd, ENTROPY_CONTEXT t_above[16], diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h index 74a2f5d9573..aae47dcdda4 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h @@ -38,6 +38,7 @@ extern "C" { #define MAX_MODES 30 #define MAX_REFS 6 +#define RD_THRESH_INIT_FACT 32 #define RD_THRESH_MAX_FACT 64 #define RD_THRESH_INC 1 @@ -140,6 +141,11 @@ void vp9_initialize_me_consts(struct VP9_COMP *cpi, MACROBLOCK *x, int qindex); void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n, unsigned int qstep, int *rate, int64_t *dist); +void vp9_model_rd_from_var_lapndz_vec(unsigned int var[MAX_MB_PLANE], + unsigned int n_log2[MAX_MB_PLANE], + unsigned int qstep[MAX_MB_PLANE], + int64_t *rate_sum, int64_t *dist_sum); + int vp9_get_switchable_rate(const struct VP9_COMP *cpi, const MACROBLOCKD *const xd); diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c index 1b82b29d473..d23d324466d 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c @@ -164,17 +164,19 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x, const int ref = xd->mi[0]->ref_frame[0]; unsigned int sse; unsigned int var = 0; - unsigned int sum_sse = 0; int64_t total_sse = 0; int skip_flag = 1; const int shift = 6; - int rate; int64_t dist; const int dequant_shift = #if CONFIG_VP9_HIGHBITDEPTH (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 : #endif // CONFIG_VP9_HIGHBITDEPTH 3; + unsigned int qstep_vec[MAX_MB_PLANE]; + unsigned int nlog2_vec[MAX_MB_PLANE]; + unsigned int sum_sse_vec[MAX_MB_PLANE]; + int any_zero_sum_sse = 0; x->pred_sse[ref] = 0; @@ -186,6 +188,7 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x, const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size]; const int64_t dc_thr = p->quant_thred[0] >> shift; const int64_t ac_thr = p->quant_thred[1] >> shift; + unsigned int sum_sse = 0; // The low thresholds are used to measure if the prediction errors are // low enough so that we can skip the mode search. const int64_t low_dc_thr = VPXMIN(50, dc_thr >> 2); @@ -196,8 +199,6 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x, int lw = b_width_log2_lookup[unit_size] + 2; int lh = b_height_log2_lookup[unit_size] + 2; - sum_sse = 0; - for (idy = 0; idy < bh; ++idy) { for (idx = 0; idx < bw; ++idx) { uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw); @@ -233,12 +234,18 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x, } total_sse += sum_sse; + sum_sse_vec[i] = sum_sse; + any_zero_sum_sse = any_zero_sum_sse || (sum_sse == 0); + qstep_vec[i] = pd->dequant[1] >> dequant_shift; + nlog2_vec[i] = num_pels_log2_lookup[bs]; + } - // Fast approximate the modelling function. - if (cpi->sf.simple_model_rd_from_var) { + // Fast approximate the modelling function. + if (cpi->sf.simple_model_rd_from_var) { + for (i = 0; i < MAX_MB_PLANE; ++i) { int64_t rate; - const int64_t square_error = sum_sse; - int quantizer = (pd->dequant[1] >> dequant_shift); + const int64_t square_error = sum_sse_vec[i]; + int quantizer = qstep_vec[i]; if (quantizer < 120) rate = (square_error * (280 - quantizer)) >> (16 - VP9_PROB_COST_SHIFT); @@ -247,12 +254,19 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x, dist = (square_error * quantizer) >> 8; rate_sum += rate; dist_sum += dist; + } + } else { + if (any_zero_sum_sse) { + for (i = 0; i < MAX_MB_PLANE; ++i) { + int rate; + vp9_model_rd_from_var_lapndz(sum_sse_vec[i], nlog2_vec[i], qstep_vec[i], + &rate, &dist); + rate_sum += rate; + dist_sum += dist; + } } else { - vp9_model_rd_from_var_lapndz(sum_sse, num_pels_log2_lookup[bs], - pd->dequant[1] >> dequant_shift, &rate, - &dist); - rate_sum += rate; - dist_sum += dist; + vp9_model_rd_from_var_lapndz_vec(sum_sse_vec, nlog2_vec, qstep_vec, + &rate_sum, &dist_sum); } } @@ -350,9 +364,9 @@ static int cost_coeffs(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size, uint8_t token_cache[32 * 32]; int cost; #if CONFIG_VP9_HIGHBITDEPTH - const int *cat6_high_cost = vp9_get_high_cost_table(xd->bd); + const uint16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd); #else - const int *cat6_high_cost = vp9_get_high_cost_table(8); + const uint16_t *cat6_high_cost = vp9_get_high_cost_table(8); #endif // Check for consistency of tx_size with mode info @@ -501,7 +515,8 @@ static int64_t sum_squares_visible(const MACROBLOCKD *xd, pd->subsampling_y, blk_row); if (tx_bsize == BLOCK_4X4 || (b4x4s_to_right_edge >= tx_4x4_w && b4x4s_to_bottom_edge >= tx_4x4_h)) { - sse = (int64_t)vpx_sum_squares_2d_i16(diff, diff_stride, tx_bsize); + assert(tx_4x4_w == tx_4x4_h); + sse = (int64_t)vpx_sum_squares_2d_i16(diff, diff_stride, tx_4x4_w << 2); } else { int r, c; int max_r = VPXMIN(b4x4s_to_bottom_edge, tx_4x4_h); @@ -511,7 +526,8 @@ static int64_t sum_squares_visible(const MACROBLOCKD *xd, for (r = 0; r < max_r; ++r) { // Skip visiting the sub blocks that are wholly within the UMV. for (c = 0; c < max_c; ++c) { - sse += (int64_t)vpx_sum_squares_2d_i16(diff, diff_stride, BLOCK_4X4); + sse += (int64_t)vpx_sum_squares_2d_i16( + diff + r * diff_stride * 4 + c * 4, diff_stride, 4); } } } @@ -742,9 +758,11 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, // TODO(jingning): temporarily enabled only for luma component rd = VPXMIN(rd1, rd2); - if (plane == 0) + if (plane == 0) { x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] || (rd1 > rd2 && !xd->lossless); + x->sum_y_eobs[tx_size] += x->plane[plane].eobs[block]; + } args->this_rate += rate; args->this_dist += dist; @@ -3190,6 +3208,8 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data, ref_frame = vp9_mode_order[mode_index].ref_frame[0]; second_ref_frame = vp9_mode_order[mode_index].ref_frame[1]; + vp9_zero(x->sum_y_eobs); + // Look at the reference frame of the best mode so far and set the // skip mask to look at a subset of the remaining modes. if (midx == mode_skip_start && best_mode_index >= 0) { @@ -3221,6 +3241,9 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data, if (best_rd < mode_threshold[mode_index]) continue; + // This is only used in motion vector unit test. + if (cpi->oxcf.motion_vector_unit_test && ref_frame == INTRA_FRAME) continue; + if (sf->motion_field_mode_search) { const int mi_width = VPXMIN(num_8x8_blocks_wide_lookup[bsize], tile_info->mi_col_end - mi_col); @@ -3469,6 +3492,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data, if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, max_plane); memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mi->tx_size], sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk); + ctx->sum_y_eobs = x->sum_y_eobs[mi->tx_size]; // TODO(debargha): enhance this test with a better distortion prediction // based on qp, activity mask and history @@ -3576,7 +3600,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data, if (best_mode_index < 0 || best_rd >= best_rd_so_far) { // If adaptive interp filter is enabled, then the current leaf node of 8x8 // data is needed for sub8x8. Hence preserve the context. - if (cpi->new_mt && bsize == BLOCK_8X8) ctx->mic = *xd->mi[0]; + if (cpi->row_mt && bsize == BLOCK_8X8) ctx->mic = *xd->mi[0]; rd_cost->rate = INT_MAX; rd_cost->rdcost = INT64_MAX; return; @@ -3699,6 +3723,8 @@ void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, TileDataEnc *tile_data, mi->mv[0].as_int = 0; x->skip = 1; + ctx->sum_y_eobs = 0; + if (cm->interp_filter != BILINEAR) { best_filter = EIGHTTAP; if (cm->interp_filter == SWITCHABLE && @@ -3853,6 +3879,8 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, TileDataEnc *tile_data, ref_frame = vp9_ref_order[ref_index].ref_frame[0]; second_ref_frame = vp9_ref_order[ref_index].ref_frame[1]; + vp9_zero(x->sum_y_eobs); + #if CONFIG_BETTER_HW_COMPATIBILITY // forbid 8X4 and 4X8 partitions if any reference frame is scaled. if (bsize == BLOCK_8X4 || bsize == BLOCK_4X8) { @@ -3899,6 +3927,9 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, TileDataEnc *tile_data, &rd_thresh_freq_fact[ref_index])) continue; + // This is only used in motion vector unit test. + if (cpi->oxcf.motion_vector_unit_test && ref_frame == INTRA_FRAME) continue; + comp_pred = second_ref_frame > INTRA_FRAME; if (comp_pred) { if (!cpi->allow_comp_inter_inter) continue; @@ -4069,6 +4100,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, TileDataEnc *tile_data, for (i = 0; i < 4; i++) { tmp_best_bmodes[i] = xd->mi[0]->bmi[i]; x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i]; + x->sum_y_eobs[TX_4X4] += x->plane[0].eobs[i]; } pred_exists = 1; if (switchable_filter_index == 0 && sf->use_rd_breakout && @@ -4233,6 +4265,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, TileDataEnc *tile_data, if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, max_plane); memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4], sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk); + ctx->sum_y_eobs = x->sum_y_eobs[TX_4X4]; for (i = 0; i < 4; i++) best_bmodes[i] = xd->mi[0]->bmi[i]; diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c index e4146cdc4dc..f74b6b0e9e3 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c @@ -67,14 +67,26 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi, int speed) { VP9_COMMON *const cm = &cpi->common; + // speed 0 features + sf->partition_search_breakout_thr.dist = (1 << 20); + sf->partition_search_breakout_thr.rate = 80; + + // Currently, the machine-learning based partition search early termination + // is only used while VPXMIN(cm->width, cm->height) >= 480 and speed = 0. + if (VPXMIN(cm->width, cm->height) >= 480) { + sf->ml_partition_search_early_termination = 1; + } + if (speed >= 1) { + sf->ml_partition_search_early_termination = 0; + if (VPXMIN(cm->width, cm->height) >= 720) { sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; - sf->partition_search_breakout_dist_thr = (1 << 23); + sf->partition_search_breakout_thr.dist = (1 << 23); } else { sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; - sf->partition_search_breakout_dist_thr = (1 << 21); + sf->partition_search_breakout_thr.dist = (1 << 21); } } @@ -83,12 +95,12 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi, sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; sf->adaptive_pred_interp_filter = 0; - sf->partition_search_breakout_dist_thr = (1 << 24); - sf->partition_search_breakout_rate_thr = 120; + sf->partition_search_breakout_thr.dist = (1 << 24); + sf->partition_search_breakout_thr.rate = 120; } else { sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; - sf->partition_search_breakout_dist_thr = (1 << 22); - sf->partition_search_breakout_rate_thr = 100; + sf->partition_search_breakout_thr.dist = (1 << 22); + sf->partition_search_breakout_thr.rate = 100; } sf->rd_auto_partition_min_limit = set_partition_min_limit(cm); @@ -108,14 +120,14 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi, if (VPXMIN(cm->width, cm->height) >= 720) { sf->disable_split_mask = DISABLE_ALL_SPLIT; sf->schedule_mode_search = cm->base_qindex < 220 ? 1 : 0; - sf->partition_search_breakout_dist_thr = (1 << 25); - sf->partition_search_breakout_rate_thr = 200; + sf->partition_search_breakout_thr.dist = (1 << 25); + sf->partition_search_breakout_thr.rate = 200; } else { sf->max_intra_bsize = BLOCK_32X32; sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT; sf->schedule_mode_search = cm->base_qindex < 175 ? 1 : 0; - sf->partition_search_breakout_dist_thr = (1 << 23); - sf->partition_search_breakout_rate_thr = 120; + sf->partition_search_breakout_thr.dist = (1 << 23); + sf->partition_search_breakout_thr.rate = 120; } } @@ -129,24 +141,29 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi, } if (speed >= 4) { + sf->partition_search_breakout_thr.rate = 300; if (VPXMIN(cm->width, cm->height) >= 720) { - sf->partition_search_breakout_dist_thr = (1 << 26); + sf->partition_search_breakout_thr.dist = (1 << 26); } else { - sf->partition_search_breakout_dist_thr = (1 << 24); + sf->partition_search_breakout_thr.dist = (1 << 24); } sf->disable_split_mask = DISABLE_ALL_SPLIT; } + + if (speed >= 5) { + sf->partition_search_breakout_thr.rate = 500; + } } static double tx_dom_thresholds[6] = { 99.0, 14.0, 12.0, 8.0, 4.0, 0.0 }; static double qopt_thresholds[6] = { 99.0, 12.0, 10.0, 4.0, 2.0, 0.0 }; -static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, - SPEED_FEATURES *sf, int speed) { +static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, + VP9_COMMON *cm, + SPEED_FEATURES *sf, + int speed) { const int boosted = frame_is_boosted(cpi); - sf->partition_search_breakout_dist_thr = (1 << 20); - sf->partition_search_breakout_rate_thr = 80; sf->tx_size_search_breakout = 1; sf->adaptive_rd_thresh = 1; sf->allow_skip_recode = 1; @@ -245,7 +262,6 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, sf->use_fast_coef_updates = ONE_LOOP_REDUCED; sf->use_fast_coef_costing = 1; sf->motion_field_mode_search = !boosted; - sf->partition_search_breakout_rate_thr = 300; } if (speed >= 5) { @@ -257,7 +273,6 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, sf->intra_y_mode_mask[i] = INTRA_DC; sf->intra_uv_mode_mask[i] = INTRA_DC; } - sf->partition_search_breakout_rate_thr = 500; sf->mv.reduce_first_step_size = 1; sf->simple_model_rd_from_var = 1; } @@ -287,10 +302,11 @@ static void set_rt_speed_feature_framesize_dependent(VP9_COMP *cpi, } if (speed >= 5) { + sf->partition_search_breakout_thr.rate = 200; if (VPXMIN(cm->width, cm->height) >= 720) { - sf->partition_search_breakout_dist_thr = (1 << 25); + sf->partition_search_breakout_thr.dist = (1 << 25); } else { - sf->partition_search_breakout_dist_thr = (1 << 23); + sf->partition_search_breakout_thr.dist = (1 << 23); } } @@ -300,13 +316,14 @@ static void set_rt_speed_feature_framesize_dependent(VP9_COMP *cpi, } } -static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, int speed, - vp9e_tune_content content) { +static void set_rt_speed_feature_framesize_independent( + VP9_COMP *cpi, SPEED_FEATURES *sf, int speed, vp9e_tune_content content) { VP9_COMMON *const cm = &cpi->common; const int is_keyframe = cm->frame_type == KEY_FRAME; const int frames_since_key = is_keyframe ? 0 : cpi->rc.frames_since_key; sf->static_segmentation = 0; sf->adaptive_rd_thresh = 1; + sf->adaptive_rd_thresh_row_mt = 0; sf->use_fast_coef_costing = 1; sf->allow_exhaustive_searches = 0; sf->exhaustive_searches_thresh = INT_MAX; @@ -439,7 +456,6 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, int speed, sf->adaptive_rd_thresh = 2; // This feature is only enabled when partition search is disabled. sf->reuse_inter_pred_sby = 1; - sf->partition_search_breakout_rate_thr = 200; sf->coeff_prob_appx_step = 4; sf->use_fast_coef_updates = is_keyframe ? TWO_LOOP : ONE_LOOP_REDUCED; sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH; @@ -465,9 +481,9 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, int speed, sf->short_circuit_flat_blocks = 1; } if (cpi->oxcf.rc_mode == VPX_CBR && - cpi->oxcf.content != VP9E_CONTENT_SCREEN && !cpi->use_svc) { + cpi->oxcf.content != VP9E_CONTENT_SCREEN) { sf->limit_newmv_early_exit = 1; - sf->bias_golden = 1; + if (!cpi->use_svc) sf->bias_golden = 1; } } @@ -478,8 +494,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, int speed, sf->mv.search_method = NSTEP; sf->mv.reduce_first_step_size = 1; sf->skip_encode_sb = 0; - if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR && - content != VP9E_CONTENT_SCREEN) { + if (cpi->oxcf.rc_mode == VPX_CBR && content != VP9E_CONTENT_SCREEN) { // Enable short circuit for low temporal variance. sf->short_circuit_low_temp_var = 1; } @@ -495,12 +510,12 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, int speed, sf->mv.search_method = NSTEP; sf->mv.fullpel_search_step_param = 6; } - if (!cpi->use_svc && !cpi->resize_pending && !cpi->resize_state && - !cpi->external_resize && cpi->oxcf.resize_mode == RESIZE_NONE) - sf->use_source_sad = 1; + if (!cpi->external_resize) sf->use_source_sad = 1; if (sf->use_source_sad) { - if (cpi->content_state_sb == NULL) { - cpi->content_state_sb = (uint8_t *)vpx_calloc( + if (cpi->content_state_sb_fd == NULL && + (!cpi->use_svc || + cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) { + cpi->content_state_sb_fd = (uint8_t *)vpx_calloc( (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1), sizeof(uint8_t)); } } @@ -509,31 +524,15 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, int speed, if (speed >= 8) { sf->adaptive_rd_thresh = 4; // Enable partition copy - if (!cpi->use_svc && !cpi->resize_pending && !cpi->resize_state && - !cpi->external_resize && cpi->oxcf.resize_mode == RESIZE_NONE) + if (!cpi->use_svc && !cpi->resize_pending && cpi->resize_state == ORIG && + !cpi->external_resize && cpi->oxcf.resize_mode == RESIZE_NONE) { sf->copy_partition_flag = 1; - - if (sf->copy_partition_flag) { - cpi->max_copied_frame = 5; - if (cpi->prev_partition == NULL) { - cpi->prev_partition = (BLOCK_SIZE *)vpx_calloc( - cm->mi_stride * cm->mi_rows, sizeof(BLOCK_SIZE)); - } - if (cpi->prev_segment_id == NULL) { - cpi->prev_segment_id = (int8_t *)vpx_calloc( - (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1), sizeof(int8_t)); - } - if (cpi->prev_variance_low == NULL) { - cpi->prev_variance_low = (uint8_t *)vpx_calloc( - (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1) * 25, - sizeof(uint8_t)); - } - if (cpi->copied_frame_cnt == NULL) { - cpi->copied_frame_cnt = (uint8_t *)vpx_calloc( - (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1), sizeof(uint8_t)); - } + cpi->max_copied_frame = 4; } + if (cpi->row_mt && cpi->oxcf.max_threads > 1) + sf->adaptive_rd_thresh_row_mt = 1; + sf->mv.subpel_force_stop = (content == VP9E_CONTENT_SCREEN) ? 3 : 2; if (content == VP9E_CONTENT_SCREEN) sf->lpf_pick = LPF_PICK_MINIMAL_LPF; // Only keep INTRA_DC mode for speed 8. @@ -554,9 +553,20 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, int speed, vp9_noise_estimate_extract_level(&cpi->noise_estimate); if (noise_level >= kMedium) sf->short_circuit_low_temp_var = 2; } + // Since the short_circuit_low_temp_var is used, reduce the + // adaptive_rd_thresh level. + if (cm->width > 320 && cm->height > 240) + sf->adaptive_rd_thresh = 1; + else + sf->adaptive_rd_thresh = 2; } sf->limit_newmv_early_exit = 0; - sf->use_simple_block_yrd = 0; + if (cm->width > 320 && cm->height > 240) sf->use_simple_block_yrd = 1; + } + // Turn off adaptive_rd_thresh if row_mt is on for speed 5, 6, 7. + if (speed >= 5 && speed < 8 && cpi->row_mt && cpi->num_workers > 1) { + sf->adaptive_rd_thresh = 0; + sf->adaptive_rd_thresh_row_mt = 0; } } @@ -566,6 +576,12 @@ void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) { RD_OPT *const rd = &cpi->rd; int i; + // best quality defaults + // Some speed-up features even for best quality as minimal impact on quality. + sf->partition_search_breakout_thr.dist = (1 << 19); + sf->partition_search_breakout_thr.rate = 80; + sf->ml_partition_search_early_termination = 0; + if (oxcf->mode == REALTIME) { set_rt_speed_feature_framesize_dependent(cpi, sf, oxcf->speed); } else if (oxcf->mode == GOOD) { @@ -591,11 +607,17 @@ void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) { // With row based multi-threading, the following speed features // have to be disabled to guarantee that bitstreams encoded with single thread // and multiple threads match - if (cpi->oxcf.ethread_bit_match) { + if (cpi->oxcf.row_mt_bit_exact) { sf->adaptive_rd_thresh = 0; sf->allow_exhaustive_searches = 0; sf->adaptive_pred_interp_filter = 0; } + + // This is only used in motion vector unit test. + if (cpi->oxcf.motion_vector_unit_test == 1) + cpi->find_fractional_mv_step = vp9_return_max_sub_pixel_mv; + else if (cpi->oxcf.motion_vector_unit_test == 2) + cpi->find_fractional_mv_step = vp9_return_min_sub_pixel_mv; } void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { @@ -688,13 +710,12 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { // Some speed-up features even for best quality as minimal impact on quality. sf->adaptive_rd_thresh = 1; sf->tx_size_search_breakout = 1; - sf->partition_search_breakout_dist_thr = (1 << 19); - sf->partition_search_breakout_rate_thr = 80; if (oxcf->mode == REALTIME) - set_rt_speed_feature(cpi, sf, oxcf->speed, oxcf->content); + set_rt_speed_feature_framesize_independent(cpi, sf, oxcf->speed, + oxcf->content); else if (oxcf->mode == GOOD) - set_good_speed_feature(cpi, cm, sf, oxcf->speed); + set_good_speed_feature_framesize_independent(cpi, cm, sf, oxcf->speed); cpi->full_search_sad = vp9_full_search_sad; cpi->diamond_search_sad = vp9_diamond_search_sad; @@ -762,9 +783,15 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { // With row based multi-threading, the following speed features // have to be disabled to guarantee that bitstreams encoded with single thread // and multiple threads match - if (cpi->oxcf.ethread_bit_match) { + if (cpi->oxcf.row_mt_bit_exact) { sf->adaptive_rd_thresh = 0; sf->allow_exhaustive_searches = 0; sf->adaptive_pred_interp_filter = 0; } + + // This is only used in motion vector unit test. + if (cpi->oxcf.motion_vector_unit_test == 1) + cpi->find_fractional_mv_step = vp9_return_max_sub_pixel_mv; + else if (cpi->oxcf.motion_vector_unit_test == 2) + cpi->find_fractional_mv_step = vp9_return_min_sub_pixel_mv; } diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.h index a7cc7787273..cbdf8bc3090 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.h @@ -193,6 +193,11 @@ typedef struct MV_SPEED_FEATURES { int fullpel_search_step_param; } MV_SPEED_FEATURES; +typedef struct PARTITION_SEARCH_BREAKOUT_THR { + int64_t dist; + int rate; +} PARTITION_SEARCH_BREAKOUT_THR; + #define MAX_MESH_STEP 4 typedef struct MESH_PATTERN { @@ -228,6 +233,9 @@ typedef struct SPEED_FEATURES { // mode to be evaluated. A high value means we will be faster. int adaptive_rd_thresh; + // Flag to use adaptive_rd_thresh when row-mt it enabled. + int adaptive_rd_thresh_row_mt; + // Enables skipping the reconstruction step (idct, recon) in the // intermediate steps assuming the last frame didn't have too many intra // blocks and the q is less than a threshold. @@ -442,8 +450,10 @@ typedef struct SPEED_FEATURES { INTERP_FILTER_MASK interp_filter_search_mask; // Partition search early breakout thresholds. - int64_t partition_search_breakout_dist_thr; - int partition_search_breakout_rate_thr; + PARTITION_SEARCH_BREAKOUT_THR partition_search_breakout_thr; + + // Machine-learning based partition search early termination + int ml_partition_search_early_termination; // Allow skipping partition search for still image frame int allow_partition_search_skip; diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.c index fdaa4814ca6..2b0307f8a11 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.c @@ -89,8 +89,9 @@ void vp9_temporal_filter_init(void) { for (i = 1; i < 512; ++i) fixed_divide[i] = 0x80000 / i; } -void vp9_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride, - uint8_t *frame2, unsigned int block_width, +void vp9_temporal_filter_apply_c(const uint8_t *frame1, unsigned int stride, + const uint8_t *frame2, + unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count) { @@ -152,11 +153,11 @@ void vp9_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride, #if CONFIG_VP9_HIGHBITDEPTH void vp9_highbd_temporal_filter_apply_c( - uint8_t *frame1_8, unsigned int stride, uint8_t *frame2_8, + const uint8_t *frame1_8, unsigned int stride, const uint8_t *frame2_8, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count) { - uint16_t *frame1 = CONVERT_TO_SHORTPTR(frame1_8); - uint16_t *frame2 = CONVERT_TO_SHORTPTR(frame2_8); + const uint16_t *frame1 = CONVERT_TO_SHORTPTR(frame1_8); + const uint16_t *frame2 = CONVERT_TO_SHORTPTR(frame2_8); unsigned int i, j, k; int modifier; int byte = 0; @@ -225,6 +226,7 @@ static uint32_t temporal_filter_find_matching_mb_c(VP9_COMP *cpi, uint32_t distortion; uint32_t sse; int cost_list[5]; + const MvLimits tmp_mv_limits = x->mv_limits; MV best_ref_mv1 = { 0, 0 }; MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */ @@ -245,10 +247,15 @@ static uint32_t temporal_filter_find_matching_mb_c(VP9_COMP *cpi, step_param = mv_sf->reduce_first_step_size; step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2); + vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1); + vp9_full_pixel_search(cpi, x, BLOCK_16X16, &best_ref_mv1_full, step_param, search_method, sadpb, cond_cost_list(cpi, cost_list), &best_ref_mv1, ref_mv, 0, 0); + /* restore UMV window */ + x->mv_limits = tmp_mv_limits; + // Ignore mv costing by sending NULL pointer instead of cost array bestsme = cpi->find_fractional_mv_step( x, ref_mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, @@ -766,7 +773,7 @@ void vp9_temporal_filter(VP9_COMP *cpi, int distance) { set_error_per_bit(&cpi->td.mb, rdmult); vp9_initialize_me_consts(cpi, &cpi->td.mb, ARNR_FILT_QINDEX); - if (!cpi->new_mt) + if (!cpi->row_mt) temporal_filter_iterate_c(cpi); else vp9_temporal_filter_row_mt(cpi); diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_tokenize.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_tokenize.c index dc2616dbe1d..814d769be38 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_tokenize.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_tokenize.c @@ -123,7 +123,7 @@ const int16_t vp9_cat6_low_cost[256] = { 6620, 6632, 6654, 6666, 6677, 6689, 6751, 6763, 6774, 6786, 6808, 6820, 6831, 6843, 6890, 6902, 6913, 6925, 6947, 6959, 6970, 6982 }; -const int vp9_cat6_high_cost[64] = { +const uint16_t vp9_cat6_high_cost[64] = { 88, 2251, 2727, 4890, 3148, 5311, 5787, 7950, 3666, 5829, 6305, 8468, 6726, 8889, 9365, 11528, 3666, 5829, 6305, 8468, 6726, 8889, 9365, 11528, 7244, 9407, 9883, 12046, 10304, 12467, 12943, 15106, 3666, @@ -133,7 +133,7 @@ const int vp9_cat6_high_cost[64] = { }; #if CONFIG_VP9_HIGHBITDEPTH -const int vp9_cat6_high10_high_cost[256] = { +const uint16_t vp9_cat6_high10_high_cost[256] = { 94, 2257, 2733, 4896, 3154, 5317, 5793, 7956, 3672, 5835, 6311, 8474, 6732, 8895, 9371, 11534, 3672, 5835, 6311, 8474, 6732, 8895, 9371, 11534, 7250, 9413, 9889, 12052, 10310, 12473, 12949, 15112, 3672, @@ -159,7 +159,7 @@ const int vp9_cat6_high10_high_cost[256] = { 18075, 20238, 18496, 20659, 21135, 23298, 19014, 21177, 21653, 23816, 22074, 24237, 24713, 26876 }; -const int vp9_cat6_high12_high_cost[1024] = { +const uint16_t vp9_cat6_high12_high_cost[1024] = { 100, 2263, 2739, 4902, 3160, 5323, 5799, 7962, 3678, 5841, 6317, 8480, 6738, 8901, 9377, 11540, 3678, 5841, 6317, 8480, 6738, 8901, 9377, 11540, 7256, 9419, 9895, 12058, 10316, 12479, 12955, 15118, 3678, diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_tokenize.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_tokenize.h index c905715d7dc..b2f63ffef5a 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_tokenize.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_tokenize.h @@ -76,25 +76,18 @@ extern const TOKENVALUE *vp9_dct_value_tokens_ptr; extern const TOKENVALUE *vp9_dct_cat_lt_10_value_tokens; extern const int *vp9_dct_cat_lt_10_value_cost; extern const int16_t vp9_cat6_low_cost[256]; -extern const int vp9_cat6_high_cost[64]; -extern const int vp9_cat6_high10_high_cost[256]; -extern const int vp9_cat6_high12_high_cost[1024]; -static INLINE int vp9_get_cost(int16_t token, EXTRABIT extrabits, - const int *cat6_high_table) { - if (token != CATEGORY6_TOKEN) - return vp9_extra_bits[token].cost[extrabits >> 1]; - return vp9_cat6_low_cost[(extrabits >> 1) & 0xff] + - cat6_high_table[extrabits >> 9]; -} +extern const uint16_t vp9_cat6_high_cost[64]; +extern const uint16_t vp9_cat6_high10_high_cost[256]; +extern const uint16_t vp9_cat6_high12_high_cost[1024]; #if CONFIG_VP9_HIGHBITDEPTH -static INLINE const int *vp9_get_high_cost_table(int bit_depth) { +static INLINE const uint16_t *vp9_get_high_cost_table(int bit_depth) { return bit_depth == 8 ? vp9_cat6_high_cost : (bit_depth == 10 ? vp9_cat6_high10_high_cost : vp9_cat6_high12_high_cost); } #else -static INLINE const int *vp9_get_high_cost_table(int bit_depth) { +static INLINE const uint16_t *vp9_get_high_cost_table(int bit_depth) { (void)bit_depth; return vp9_cat6_high_cost; } @@ -118,7 +111,7 @@ static INLINE int16_t vp9_get_token(int v) { } static INLINE int vp9_get_token_cost(int v, int16_t *token, - const int *cat6_high_table) { + const uint16_t *cat6_high_table) { if (v >= CAT6_MIN_VAL || v <= -CAT6_MIN_VAL) { EXTRABIT extrabits; *token = CATEGORY6_TOKEN; diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c index cc946dfd63e..a335a4ab55d 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c @@ -51,8 +51,9 @@ struct vp9_extracfg { vpx_color_range_t color_range; int render_width; int render_height; - unsigned int new_mt; - unsigned int ethread_bit_match; + unsigned int row_mt; + unsigned int row_mt_bit_exact; + unsigned int motion_vector_unit_test; }; static struct vp9_extracfg default_extra_cfg = { @@ -84,8 +85,9 @@ static struct vp9_extracfg default_extra_cfg = { 0, // color range 0, // render width 0, // render height - 1, // new_mt - 0, // ethread_bit_match + 0, // row_mt + 0, // row_mt_bit_exact + 0, // motion_vector_unit_test }; struct vpx_codec_alg_priv { @@ -249,8 +251,9 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, "kf_min_dist not supported in auto mode, use 0 " "or kf_max_dist instead."); - RANGE_CHECK(extra_cfg, new_mt, 0, 1); - RANGE_CHECK(extra_cfg, ethread_bit_match, 0, 1); + RANGE_CHECK(extra_cfg, row_mt, 0, 1); + RANGE_CHECK(extra_cfg, row_mt_bit_exact, 0, 1); + RANGE_CHECK(extra_cfg, motion_vector_unit_test, 0, 2); RANGE_CHECK(extra_cfg, enable_auto_alt_ref, 0, 2); RANGE_CHECK(extra_cfg, cpu_used, -8, 8); RANGE_CHECK_HI(extra_cfg, noise_sensitivity, 6); @@ -560,8 +563,9 @@ static vpx_codec_err_t set_encoder_config( oxcf->target_level = extra_cfg->target_level; - oxcf->new_mt = extra_cfg->new_mt; - oxcf->ethread_bit_match = extra_cfg->ethread_bit_match; + oxcf->row_mt = extra_cfg->row_mt; + oxcf->row_mt_bit_exact = extra_cfg->row_mt_bit_exact; + oxcf->motion_vector_unit_test = extra_cfg->motion_vector_unit_test; for (sl = 0; sl < oxcf->ss_number_layers; ++sl) { #if CONFIG_SPATIAL_SVC @@ -851,17 +855,25 @@ static vpx_codec_err_t ctrl_set_target_level(vpx_codec_alg_priv_t *ctx, return update_extra_cfg(ctx, &extra_cfg); } -static vpx_codec_err_t ctrl_set_new_mt(vpx_codec_alg_priv_t *ctx, +static vpx_codec_err_t ctrl_set_row_mt(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; - extra_cfg.new_mt = CAST(VP9E_SET_NEW_MT, args); + extra_cfg.row_mt = CAST(VP9E_SET_ROW_MT, args); return update_extra_cfg(ctx, &extra_cfg); } -static vpx_codec_err_t ctrl_set_ethread_bit_match(vpx_codec_alg_priv_t *ctx, - va_list args) { +static vpx_codec_err_t ctrl_enable_row_mt_bit_exact(vpx_codec_alg_priv_t *ctx, + va_list args) { + struct vp9_extracfg extra_cfg = ctx->extra_cfg; + extra_cfg.row_mt_bit_exact = CAST(VP9E_ENABLE_ROW_MT_BIT_EXACT, args); + return update_extra_cfg(ctx, &extra_cfg); +} + +static vpx_codec_err_t ctrl_enable_motion_vector_unit_test( + vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; - extra_cfg.ethread_bit_match = CAST(VP9E_ENABLE_THREAD_BIT_MATCH, args); + extra_cfg.motion_vector_unit_test = + CAST(VP9E_ENABLE_MOTION_VECTOR_UNIT_TEST, args); return update_extra_cfg(ctx, &extra_cfg); } @@ -1460,7 +1472,7 @@ static vpx_codec_err_t ctrl_set_svc(vpx_codec_alg_priv_t *ctx, va_list args) { return VPX_CODEC_INVALID_PARAM; } - vp9_set_new_mt(ctx->cpi); + vp9_set_row_mt(ctx->cpi); return VPX_CODEC_OK; } @@ -1620,8 +1632,9 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = { { VP9E_SET_SVC_REF_FRAME_CONFIG, ctrl_set_svc_ref_frame_config }, { VP9E_SET_RENDER_SIZE, ctrl_set_render_size }, { VP9E_SET_TARGET_LEVEL, ctrl_set_target_level }, - { VP9E_SET_NEW_MT, ctrl_set_new_mt }, - { VP9E_ENABLE_THREAD_BIT_MATCH, ctrl_set_ethread_bit_match }, + { VP9E_SET_ROW_MT, ctrl_set_row_mt }, + { VP9E_ENABLE_ROW_MT_BIT_EXACT, ctrl_enable_row_mt_bit_exact }, + { VP9E_ENABLE_MOTION_VECTOR_UNIT_TEST, ctrl_enable_motion_vector_unit_test }, // Getters { VP8E_GET_LAST_QUANTIZER, ctrl_get_quantizer }, diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.c b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.c index 3d8968896cf..1da1794b760 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.c @@ -47,12 +47,9 @@ static vpx_codec_err_t decoder_init(vpx_codec_ctx_t *ctx, ctx->priv->init_flags = ctx->init_flags; priv->si.sz = sizeof(priv->si); priv->flushed = 0; - // Only do frame parallel decode when threads > 1. - priv->frame_parallel_decode = - (ctx->config.dec && (ctx->config.dec->threads > 1) && - (ctx->init_flags & VPX_CODEC_USE_FRAME_THREADING)) - ? 1 - : 0; + // TODO(jzern): remnants of frame-level parallel decoding should be + // removed. cf., https://bugs.chromium.org/p/webm/issues/detail?id=1395 + priv->frame_parallel_decode = 0; if (ctx->config.dec) { priv->cfg = *ctx->config.dec; ctx->config.dec = &priv->cfg; @@ -1053,7 +1050,10 @@ static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = { CODEC_INTERFACE(vpx_codec_vp9_dx) = { "WebM Project VP9 Decoder" VERSION_STRING, VPX_CODEC_INTERNAL_ABI_VERSION, - VPX_CODEC_CAP_DECODER | VP9_CAP_POSTPROC | +#if CONFIG_VP9_HIGHBITDEPTH + VPX_CODEC_CAP_HIGHBITDEPTH | +#endif + VPX_CODEC_CAP_DECODER | VP9_CAP_POSTPROC | VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER, // vpx_codec_caps_t decoder_init, // vpx_codec_init_fn_t decoder_destroy, // vpx_codec_destroy_fn_t diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/vp9cx.mk b/chromium/third_party/libvpx/source/libvpx/vp9/vp9cx.mk index fe2baf5e830..e0913bea3e6 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/vp9cx.mk +++ b/chromium/third_party/libvpx/source/libvpx/vp9/vp9cx.mk @@ -39,6 +39,7 @@ VP9_CX_SRCS-yes += encoder/vp9_encodemb.h VP9_CX_SRCS-yes += encoder/vp9_encodemv.h VP9_CX_SRCS-yes += encoder/vp9_extend.h VP9_CX_SRCS-yes += encoder/vp9_firstpass.h +VP9_CX_SRCS-yes += encoder/vp9_frame_scale.c VP9_CX_SRCS-yes += encoder/vp9_job_queue.h VP9_CX_SRCS-yes += encoder/vp9_lookahead.c VP9_CX_SRCS-yes += encoder/vp9_lookahead.h |