diff options
Diffstat (limited to 'chromium/third_party/libvpx/source/libvpx/vp9')
49 files changed, 1812 insertions, 713 deletions
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymv.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymv.h index dcc8e299899..ee9d37973ff 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymv.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymv.h @@ -25,7 +25,7 @@ struct VP9Common; void vp9_init_mv_probs(struct VP9Common *cm); -void vp9_adapt_mv_probs(struct VP9Common *cm, int usehp); +void vp9_adapt_mv_probs(struct VP9Common *cm, int allow_hp); static INLINE int use_mv_hp(const MV *ref) { const int kMvRefThresh = 64; // threshold for use of high-precision 1/8 mv @@ -127,7 +127,7 @@ typedef struct { nmv_component_counts comps[2]; } nmv_context_counts; -void vp9_inc_mv(const MV *mv, nmv_context_counts *mvctx); +void vp9_inc_mv(const MV *mv, nmv_context_counts *counts); #ifdef __cplusplus } // extern "C" diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.c index 6c43af8ce80..adbda6c825b 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.c @@ -63,6 +63,20 @@ DECLARE_ALIGNED(256, static const InterpKernel, { 0, -3, 2, 41, 63, 29, -2, -2 }, { 0, -3, 1, 38, 64, 32, -1, -3 } }; -const InterpKernel *vp9_filter_kernels[4] = { - sub_pel_filters_8, sub_pel_filters_8lp, sub_pel_filters_8s, bilinear_filters +// 4-tap filter +DECLARE_ALIGNED(256, static const InterpKernel, + sub_pel_filters_4[SUBPEL_SHIFTS]) = { + { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, -4, 126, 8, -2, 0, 0 }, + { 0, 0, -6, 120, 18, -4, 0, 0 }, { 0, 0, -8, 114, 28, -6, 0, 0 }, + { 0, 0, -10, 108, 36, -6, 0, 0 }, { 0, 0, -12, 102, 46, -8, 0, 0 }, + { 0, 0, -12, 94, 56, -10, 0, 0 }, { 0, 0, -12, 84, 66, -10, 0, 0 }, + { 0, 0, -12, 76, 76, -12, 0, 0 }, { 0, 0, -10, 66, 84, -12, 0, 0 }, + { 0, 0, -10, 56, 94, -12, 0, 0 }, { 0, 0, -8, 46, 102, -12, 0, 0 }, + { 0, 0, -6, 36, 108, -10, 0, 0 }, { 0, 0, -6, 28, 114, -8, 0, 0 }, + { 0, 0, -4, 18, 120, -6, 0, 0 }, { 0, 0, -2, 8, 126, -4, 0, 0 } +}; + +const InterpKernel *vp9_filter_kernels[5] = { + sub_pel_filters_8, sub_pel_filters_8lp, sub_pel_filters_8s, bilinear_filters, + sub_pel_filters_4 }; diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.h index b379665b1c1..0382c88e7c0 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.h @@ -25,6 +25,7 @@ extern "C" { #define EIGHTTAP_SHARP 2 #define SWITCHABLE_FILTERS 3 /* Number of switchable filters */ #define BILINEAR 3 +#define FOURTAP 4 // The codec can operate in four possible inter prediction filter mode: // 8-tap, 8-tap-smooth, 8-tap-sharp, and switching between the three. #define SWITCHABLE_FILTER_CONTEXTS (SWITCHABLE_FILTERS + 1) @@ -32,7 +33,7 @@ extern "C" { typedef uint8_t INTERP_FILTER; -extern const InterpKernel *vp9_filter_kernels[4]; +extern const InterpKernel *vp9_filter_kernels[5]; #ifdef __cplusplus } // extern "C" diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter.c index da9180b71a5..95d6029f3b5 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter.c @@ -880,12 +880,12 @@ void vp9_adjust_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, // This function sets up the bit masks for the entire 64x64 region represented // by mi_row, mi_col. void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, - MODE_INFO **mi, const int mode_info_stride, + MODE_INFO **mi8x8, const int mode_info_stride, LOOP_FILTER_MASK *lfm) { int idx_32, idx_16, idx_8; const loop_filter_info_n *const lfi_n = &cm->lf_info; - MODE_INFO **mip = mi; - MODE_INFO **mip2 = mi; + MODE_INFO **mip = mi8x8; + MODE_INFO **mip2 = mi8x8; // These are offsets to the next mi in the 64x64 block. It is what gets // added to the mi ptr as we go through each loop. It helps us to avoid @@ -1087,13 +1087,19 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm, const int row_step_stride = cm->mi_stride * row_step; struct buf_2d *const dst = &plane->dst; uint8_t *const dst0 = dst->buf; - unsigned int mask_16x16[MI_BLOCK_SIZE] = { 0 }; - unsigned int mask_8x8[MI_BLOCK_SIZE] = { 0 }; - unsigned int mask_4x4[MI_BLOCK_SIZE] = { 0 }; - unsigned int mask_4x4_int[MI_BLOCK_SIZE] = { 0 }; + unsigned int mask_16x16[MI_BLOCK_SIZE]; + unsigned int mask_8x8[MI_BLOCK_SIZE]; + unsigned int mask_4x4[MI_BLOCK_SIZE]; + unsigned int mask_4x4_int[MI_BLOCK_SIZE]; uint8_t lfl[MI_BLOCK_SIZE * MI_BLOCK_SIZE]; int r, c; + vp9_zero(mask_16x16); + vp9_zero(mask_8x8); + vp9_zero(mask_4x4); + vp9_zero(mask_4x4_int); + vp9_zero(lfl); + for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) { unsigned int mask_16x16_c = 0; unsigned int mask_8x8_c = 0; @@ -1330,6 +1336,8 @@ void vp9_filter_block_plane_ss11(VP9_COMMON *const cm, uint16_t mask_4x4 = lfm->left_uv[TX_4X4]; uint16_t mask_4x4_int = lfm->int_4x4_uv; + vp9_zero(lfl_uv); + assert(plane->subsampling_x == 1 && plane->subsampling_y == 1); // Vertical pass: do 2 rows at one time diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter.h index daf3b91315e..39648a72c32 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter.h @@ -97,7 +97,7 @@ struct VP9LfSyncData; // This function sets up the bit masks for the entire 64x64 region represented // by mi_row, mi_col. void vp9_setup_mask(struct VP9Common *const cm, const int mi_row, - const int mi_col, MODE_INFO **mi_8x8, + const int mi_col, MODE_INFO **mi8x8, const int mode_info_stride, LOOP_FILTER_MASK *lfm); void vp9_filter_block_plane_ss00(struct VP9Common *const cm, @@ -120,7 +120,7 @@ void vp9_loop_filter_init(struct VP9Common *cm); void vp9_loop_filter_frame_init(struct VP9Common *cm, int default_filt_lvl); void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame, struct VP9Common *cm, - struct macroblockd *mbd, int filter_level, + struct macroblockd *xd, int frame_filter_level, int y_only, int partial_frame); // Get the superblock lfm for a given mi_row, mi_col. diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_onyxc_int.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_onyxc_int.h index 45d3b0f82f3..662b8ef5e12 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_onyxc_int.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_onyxc_int.h @@ -37,10 +37,9 @@ extern "C" { #define REF_FRAMES_LOG2 3 #define REF_FRAMES (1 << REF_FRAMES_LOG2) -// 1 scratch frame for the new frame, 3 for scaled references on the encoder. -// TODO(jkoleszar): These 3 extra references could probably come from the -// normal reference pool. -#define FRAME_BUFFERS (REF_FRAMES + 4) +// 1 scratch frame for the new frame, REFS_PER_FRAME for scaled references on +// the encoder. +#define FRAME_BUFFERS (REF_FRAMES + 1 + REFS_PER_FRAME) #define FRAME_CONTEXTS_LOG2 2 #define FRAME_CONTEXTS (1 << FRAME_CONTEXTS_LOG2) @@ -259,6 +258,8 @@ typedef struct VP9Common { PARTITION_CONTEXT *above_seg_context; ENTROPY_CONTEXT *above_context; int above_context_alloc_cols; + + int lf_row; } VP9_COMMON; static INLINE YV12_BUFFER_CONFIG *get_buf_frame(VP9_COMMON *cm, int index) { diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_postproc.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_postproc.h index 0aafa72ca8a..67efc1b4e4b 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_postproc.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_postproc.h @@ -38,7 +38,7 @@ struct VP9Common; #define MFQE_PRECISION 4 int vp9_post_proc_frame(struct VP9Common *cm, YV12_BUFFER_CONFIG *dest, - vp9_ppflags_t *flags, int unscaled_width); + vp9_ppflags_t *ppflags, int unscaled_width); void vp9_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q, uint8_t *limits); diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_reconinter.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_reconinter.h index 2c6d6695aba..992e30c344b 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_reconinter.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_reconinter.h @@ -61,15 +61,15 @@ void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, BLOCK_SIZE bsize); void vp9_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst, - int dst_stride, const MV *mv_q3, + int dst_stride, const MV *src_mv, const struct scale_factors *sf, int w, int h, - int do_avg, const InterpKernel *kernel, + int ref, const InterpKernel *kernel, enum mv_precision precision, int x, int y); #if CONFIG_VP9_HIGHBITDEPTH void vp9_highbd_build_inter_predictor( const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, - const MV *mv_q3, const struct scale_factors *sf, int w, int h, int do_avg, + const MV *src_mv, const struct scale_factors *sf, int w, int h, int ref, const InterpKernel *kernel, enum mv_precision precision, int x, int y, int bd); #endif diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl index 6d7f9526098..d7ad2b693bc 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl +++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl @@ -62,7 +62,7 @@ add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, i add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride, int tx_type"; -add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type"; +add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride, int tx_type"; if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") { # Note that there are more specializations appended when @@ -100,7 +100,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vp9_highbd_iht8x8_64_add/, "const tran_low_t *input, uint16_t *dest, int stride, int tx_type, int bd"; - add_proto qw/void vp9_highbd_iht16x16_256_add/, "const tran_low_t *input, uint16_t *output, int pitch, int tx_type, int bd"; + add_proto qw/void vp9_highbd_iht16x16_256_add/, "const tran_low_t *input, uint16_t *dest, int stride, int tx_type, int bd"; if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") { specialize qw/vp9_highbd_iht4x4_16_add neon sse4_1/; diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_scale.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_scale.h index 53c6eef7256..aaafdf86719 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_scale.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_scale.h @@ -42,7 +42,7 @@ MV32 vp9_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf); #if CONFIG_VP9_HIGHBITDEPTH void vp9_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w, int other_h, int this_w, int this_h, - int use_high); + int use_highbd); #else void vp9_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w, int other_h, int this_w, int this_h); diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_thread_common.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_thread_common.c index d4b076645fb..36530fae677 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_thread_common.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_thread_common.c @@ -229,6 +229,28 @@ void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, VP9_COMMON *cm, workers, num_workers, lf_sync); } +void vp9_lpf_mt_init(VP9LfSync *lf_sync, VP9_COMMON *cm, int frame_filter_level, + int num_workers) { + const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; + + if (!frame_filter_level) return; + + if (!lf_sync->sync_range || sb_rows != lf_sync->rows || + num_workers > lf_sync->num_workers) { + vp9_loop_filter_dealloc(lf_sync); + vp9_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers); + } + + // Initialize cur_sb_col to -1 for all SB rows. + memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows); + + lf_sync->corrupted = 0; + + memset(lf_sync->num_tiles_done, 0, + sizeof(*lf_sync->num_tiles_done) * sb_rows); + cm->lf_row = 0; +} + // Set up nsync by width. static INLINE int get_sync_range(int width) { // nsync numbers are picked by testing. For example, for 4k @@ -266,6 +288,25 @@ void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows, pthread_cond_init(&lf_sync->cond[i], NULL); } } + pthread_mutex_init(&lf_sync->lf_mutex, NULL); + + CHECK_MEM_ERROR(cm, lf_sync->recon_done_mutex, + vpx_malloc(sizeof(*lf_sync->recon_done_mutex) * rows)); + if (lf_sync->recon_done_mutex) { + int i; + for (i = 0; i < rows; ++i) { + pthread_mutex_init(&lf_sync->recon_done_mutex[i], NULL); + } + } + + CHECK_MEM_ERROR(cm, lf_sync->recon_done_cond, + vpx_malloc(sizeof(*lf_sync->recon_done_cond) * rows)); + if (lf_sync->recon_done_cond) { + int i; + for (i = 0; i < rows; ++i) { + pthread_cond_init(&lf_sync->recon_done_cond[i], NULL); + } + } } #endif // CONFIG_MULTITHREAD @@ -276,6 +317,11 @@ void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows, CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col, vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows)); + CHECK_MEM_ERROR(cm, lf_sync->num_tiles_done, + vpx_malloc(sizeof(*lf_sync->num_tiles_done) * + mi_cols_aligned_to_sb(cm->mi_rows) >> + MI_BLOCK_SIZE_LOG2)); + // Set up nsync. lf_sync->sync_range = get_sync_range(width); } @@ -298,15 +344,126 @@ void vp9_loop_filter_dealloc(VP9LfSync *lf_sync) { } vpx_free(lf_sync->cond); } + if (lf_sync->recon_done_mutex != NULL) { + int i; + for (i = 0; i < lf_sync->rows; ++i) { + pthread_mutex_destroy(&lf_sync->recon_done_mutex[i]); + } + vpx_free(lf_sync->recon_done_mutex); + } + + pthread_mutex_destroy(&lf_sync->lf_mutex); + if (lf_sync->recon_done_cond != NULL) { + int i; + for (i = 0; i < lf_sync->rows; ++i) { + pthread_cond_destroy(&lf_sync->recon_done_cond[i]); + } + vpx_free(lf_sync->recon_done_cond); + } #endif // CONFIG_MULTITHREAD + vpx_free(lf_sync->lfdata); vpx_free(lf_sync->cur_sb_col); + vpx_free(lf_sync->num_tiles_done); // clear the structure as the source of this call may be a resize in which // case this call will be followed by an _alloc() which may fail. vp9_zero(*lf_sync); } } +static int get_next_row(VP9_COMMON *cm, VP9LfSync *lf_sync) { + int return_val = -1; + int cur_row; + const int max_rows = cm->mi_rows; + +#if CONFIG_MULTITHREAD + const int tile_cols = 1 << cm->log2_tile_cols; + + pthread_mutex_lock(&lf_sync->lf_mutex); + if (cm->lf_row < max_rows) { + cur_row = cm->lf_row >> MI_BLOCK_SIZE_LOG2; + return_val = cm->lf_row; + cm->lf_row += MI_BLOCK_SIZE; + if (cm->lf_row < max_rows) { + /* If this is not the last row, make sure the next row is also decoded. + * This is because the intra predict has to happen before loop filter */ + cur_row += 1; + } + } + pthread_mutex_unlock(&lf_sync->lf_mutex); + + if (return_val == -1) return return_val; + + pthread_mutex_lock(&lf_sync->recon_done_mutex[cur_row]); + if (lf_sync->num_tiles_done[cur_row] < tile_cols) { + pthread_cond_wait(&lf_sync->recon_done_cond[cur_row], + &lf_sync->recon_done_mutex[cur_row]); + } + pthread_mutex_unlock(&lf_sync->recon_done_mutex[cur_row]); + pthread_mutex_lock(&lf_sync->lf_mutex); + if (lf_sync->corrupted) { + return_val = -1; + } + pthread_mutex_unlock(&lf_sync->lf_mutex); +#else + (void)lf_sync; + if (cm->lf_row < max_rows) { + cur_row = cm->lf_row >> MI_BLOCK_SIZE_LOG2; + return_val = cm->lf_row; + cm->lf_row += MI_BLOCK_SIZE; + if (cm->lf_row < max_rows) { + /* If this is not the last row, make sure the next row is also decoded. + * This is because the intra predict has to happen before loop filter */ + cur_row += 1; + } + } +#endif // CONFIG_MULTITHREAD + + return return_val; +} + +void vp9_loopfilter_rows(LFWorkerData *lf_data, VP9LfSync *lf_sync) { + int mi_row; + VP9_COMMON *cm = lf_data->cm; + + while ((mi_row = get_next_row(cm, lf_sync)) != -1 && mi_row < cm->mi_rows) { + lf_data->start = mi_row; + lf_data->stop = mi_row + MI_BLOCK_SIZE; + + thread_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes, + lf_data->start, lf_data->stop, lf_data->y_only, + lf_sync); + } +} + +void vp9_set_row(VP9LfSync *lf_sync, int num_tiles, int row, int is_last_row, + int corrupted) { +#if CONFIG_MULTITHREAD + pthread_mutex_lock(&lf_sync->lf_mutex); + lf_sync->corrupted |= corrupted; + pthread_mutex_unlock(&lf_sync->lf_mutex); + pthread_mutex_lock(&lf_sync->recon_done_mutex[row]); + lf_sync->num_tiles_done[row] += 1; + if (num_tiles == lf_sync->num_tiles_done[row]) { + if (is_last_row) { + /* The last 2 rows wait on the last row to be done. + * So, we have to broadcast the signal in this case. + */ + pthread_cond_broadcast(&lf_sync->recon_done_cond[row]); + } else { + pthread_cond_signal(&lf_sync->recon_done_cond[row]); + } + } + pthread_mutex_unlock(&lf_sync->recon_done_mutex[row]); +#else + (void)lf_sync; + (void)num_tiles; + (void)row; + (void)is_last_row; + (void)corrupted; +#endif // CONFIG_MULTITHREAD +} + // Accumulate frame counts. void vp9_accumulate_frame_counts(FRAME_COUNTS *accum, const FRAME_COUNTS *counts, int is_dec) { diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_thread_common.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_thread_common.h index f92df5bd62d..b97e9ee134d 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_thread_common.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_thread_common.h @@ -37,6 +37,14 @@ typedef struct VP9LfSyncData { // Row-based parallel loopfilter data LFWorkerData *lfdata; int num_workers; + +#if CONFIG_MULTITHREAD + pthread_mutex_t lf_mutex; + pthread_mutex_t *recon_done_mutex; + pthread_cond_t *recon_done_cond; +#endif + int *num_tiles_done; + int corrupted; } VP9LfSync; // Allocate memory for loopfilter row synchronization. @@ -53,6 +61,17 @@ void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, struct VP9Common *cm, int partial_frame, VPxWorker *workers, int num_workers, VP9LfSync *lf_sync); +// Multi-threaded loopfilter initialisations +void vp9_lpf_mt_init(VP9LfSync *lf_sync, struct VP9Common *cm, + int frame_filter_level, int num_workers); + +void vp9_loopfilter_rows(LFWorkerData *lf_data, VP9LfSync *lf_sync); + +void vp9_set_row(VP9LfSync *lf_sync, int num_tiles, int row, int is_last_row, + int corrupted); + +void vp9_set_last_decoded_row(struct VP9Common *cm, int tile_col, int mi_row); + void vp9_accumulate_frame_counts(struct FRAME_COUNTS *accum, const struct FRAME_COUNTS *counts, int is_dec); diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.c b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.c index 48c49e2f5f3..bc0fc6197e6 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.c @@ -1451,6 +1451,25 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, const uint8_t *data, return vpx_reader_find_end(&tile_data->bit_reader); } +static void set_rows_after_error(VP9LfSync *lf_sync, int start_row, int mi_rows, + int num_tiles_left, int total_num_tiles) { + do { + int mi_row; + const int aligned_rows = mi_cols_aligned_to_sb(mi_rows); + const int sb_rows = (aligned_rows >> MI_BLOCK_SIZE_LOG2); + const int corrupted = 1; + for (mi_row = start_row; mi_row < mi_rows; mi_row += MI_BLOCK_SIZE) { + const int is_last_row = (sb_rows - 1 == mi_row >> MI_BLOCK_SIZE_LOG2); + vp9_set_row(lf_sync, total_num_tiles, mi_row >> MI_BLOCK_SIZE_LOG2, + is_last_row, corrupted); + } + /* If there are multiple tiles, the second tile should start marking row + * progress from row 0. + */ + start_row = 0; + } while (num_tiles_left--); +} + // On entry 'tile_data->data_end' points to the end of the input frame, on exit // it is updated to reflect the bitreader position of the final tile column if // present in the tile buffer group or NULL otherwise. @@ -1461,6 +1480,12 @@ static int tile_worker_hook(void *arg1, void *arg2) { TileInfo *volatile tile = &tile_data->xd.tile; const int final_col = (1 << pbi->common.log2_tile_cols) - 1; const uint8_t *volatile bit_reader_end = NULL; + VP9_COMMON *cm = &pbi->common; + + LFWorkerData *lf_data = tile_data->lf_data; + VP9LfSync *lf_sync = tile_data->lf_sync; + + volatile int mi_row = 0; volatile int n = tile_data->buf_start; tile_data->error_info.setjmp = 1; @@ -1468,14 +1493,26 @@ static int tile_worker_hook(void *arg1, void *arg2) { tile_data->error_info.setjmp = 0; tile_data->xd.corrupted = 1; tile_data->data_end = NULL; + if (pbi->lpf_mt_opt && cm->lf.filter_level && !cm->skip_loop_filter) { + const int num_tiles_left = tile_data->buf_end - n; + const int mi_row_start = mi_row; + set_rows_after_error(lf_sync, mi_row_start, cm->mi_rows, num_tiles_left, + 1 << cm->log2_tile_cols); + } return 0; } tile_data->xd.corrupted = 0; do { - int mi_row, mi_col; + int mi_col; const TileBuffer *const buf = pbi->tile_buffers + n; + + /* Initialize to 0 is safe since we do not deal with streams that have + * more than one row of tiles. (So tile->mi_row_start will be 0) + */ + assert(cm->log2_tile_rows == 0); + mi_row = 0; vp9_zero(tile_data->dqcoeff); vp9_tile_init(tile, &pbi->common, 0, buf->col); setup_token_decoder(buf->data, tile_data->data_end, buf->size, @@ -1493,6 +1530,14 @@ static int tile_worker_hook(void *arg1, void *arg2) { mi_col += MI_BLOCK_SIZE) { decode_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4); } + if (pbi->lpf_mt_opt && cm->lf.filter_level && !cm->skip_loop_filter) { + const int aligned_rows = mi_cols_aligned_to_sb(cm->mi_rows); + const int sb_rows = (aligned_rows >> MI_BLOCK_SIZE_LOG2); + const int is_last_row = (sb_rows - 1 == mi_row >> MI_BLOCK_SIZE_LOG2); + vp9_set_row(lf_sync, 1 << cm->log2_tile_cols, + mi_row >> MI_BLOCK_SIZE_LOG2, is_last_row, + tile_data->xd.corrupted); + } } if (buf->col == final_col) { @@ -1500,6 +1545,21 @@ static int tile_worker_hook(void *arg1, void *arg2) { } } while (!tile_data->xd.corrupted && ++n <= tile_data->buf_end); + if (pbi->lpf_mt_opt && n < tile_data->buf_end && cm->lf.filter_level && + !cm->skip_loop_filter) { + /* This was not incremented in the tile loop, so increment before tiles left + * calculation + */ + ++n; + set_rows_after_error(lf_sync, 0, cm->mi_rows, tile_data->buf_end - n, + 1 << cm->log2_tile_cols); + } + + if (pbi->lpf_mt_opt && !tile_data->xd.corrupted && cm->lf.filter_level && + !cm->skip_loop_filter) { + vp9_loopfilter_rows(lf_data, lf_sync); + } + tile_data->data_end = bit_reader_end; return !tile_data->xd.corrupted; } @@ -1516,6 +1576,8 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, const uint8_t *data, VP9_COMMON *const cm = &pbi->common; const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); const uint8_t *bit_reader_end = NULL; + VP9LfSync *lf_row_sync = &pbi->lf_row_sync; + YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm); const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; @@ -1542,12 +1604,26 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, const uint8_t *data, } } + // Initialize LPF + if (pbi->lpf_mt_opt && cm->lf.filter_level && !cm->skip_loop_filter) { + vp9_lpf_mt_init(lf_row_sync, cm, cm->lf.filter_level, + pbi->num_tile_workers); + } + // Reset tile decoding hook for (n = 0; n < num_workers; ++n) { VPxWorker *const worker = &pbi->tile_workers[n]; TileWorkerData *const tile_data = &pbi->tile_worker_data[n + pbi->total_tiles]; winterface->sync(worker); + + if (pbi->lpf_mt_opt && cm->lf.filter_level && !cm->skip_loop_filter) { + tile_data->lf_sync = lf_row_sync; + tile_data->lf_data = &tile_data->lf_sync->lfdata[n]; + vp9_loop_filter_data_reset(tile_data->lf_data, new_fb, cm, pbi->mb.plane); + tile_data->lf_data->y_only = 0; + } + tile_data->xd = pbi->mb; tile_data->xd.counts = cm->frame_parallel_decoding_mode ? NULL : &tile_data->counts; @@ -1908,6 +1984,28 @@ static size_t read_uncompressed_header(VP9Decoder *pbi, setup_segmentation_dequant(cm); setup_tile_info(cm, rb); + if (pbi->row_mt == 1) { + int num_sbs = 1; + + if (pbi->row_mt_worker_data == NULL) { + CHECK_MEM_ERROR(cm, pbi->row_mt_worker_data, + vpx_calloc(1, sizeof(*pbi->row_mt_worker_data))); + } + + if (pbi->max_threads > 1) { + const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols); + const int sb_cols = aligned_cols >> MI_BLOCK_SIZE_LOG2; + const int aligned_rows = mi_cols_aligned_to_sb(cm->mi_rows); + const int sb_rows = aligned_rows >> MI_BLOCK_SIZE_LOG2; + + num_sbs = sb_cols * sb_rows; + } + + if (num_sbs > pbi->row_mt_worker_data->num_sbs) { + vp9_dec_free_row_mt_mem(pbi->row_mt_worker_data); + vp9_dec_alloc_row_mt_mem(pbi->row_mt_worker_data, cm, num_sbs); + } + } sz = vpx_rb_read_literal(rb, 16); if (sz == 0) @@ -2069,17 +2167,19 @@ void vp9_decode_frame(VP9Decoder *pbi, const uint8_t *data, if (pbi->max_threads > 1 && tile_rows == 1 && tile_cols > 1) { // Multi-threaded tile decoder *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end); - if (!xd->corrupted) { - if (!cm->skip_loop_filter) { - // If multiple threads are used to decode tiles, then we use those - // threads to do parallel loopfiltering. - vp9_loop_filter_frame_mt(new_fb, cm, pbi->mb.plane, cm->lf.filter_level, - 0, 0, pbi->tile_workers, pbi->num_tile_workers, - &pbi->lf_row_sync); + if (!pbi->lpf_mt_opt) { + if (!xd->corrupted) { + if (!cm->skip_loop_filter) { + // If multiple threads are used to decode tiles, then we use those + // threads to do parallel loopfiltering. + vp9_loop_filter_frame_mt(new_fb, cm, pbi->mb.plane, + cm->lf.filter_level, 0, 0, pbi->tile_workers, + pbi->num_tile_workers, &pbi->lf_row_sync); + } + } else { + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Decode failed. Frame data is corrupted."); } - } else { - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Decode failed. Frame data is corrupted."); } } else { *p_data_end = decode_tiles(pbi, data + first_partition_size, data_end); diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.c b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.c index 5e41274cc89..1e2a4429347 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.c @@ -55,6 +55,43 @@ static void vp9_dec_setup_mi(VP9_COMMON *cm) { cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mi_grid_base)); } +void vp9_dec_alloc_row_mt_mem(RowMTWorkerData *row_mt_worker_data, + VP9_COMMON *cm, int num_sbs) { + int plane; + const size_t dqcoeff_size = (num_sbs << DQCOEFFS_PER_SB_LOG2) * + sizeof(*row_mt_worker_data->dqcoeff[0]); + row_mt_worker_data->num_sbs = num_sbs; + for (plane = 0; plane < 3; ++plane) { + CHECK_MEM_ERROR(cm, row_mt_worker_data->dqcoeff[plane], + vpx_memalign(16, dqcoeff_size)); + memset(row_mt_worker_data->dqcoeff[plane], 0, dqcoeff_size); + CHECK_MEM_ERROR(cm, row_mt_worker_data->eob[plane], + vpx_calloc(num_sbs << EOBS_PER_SB_LOG2, + sizeof(*row_mt_worker_data->eob[plane]))); + } + CHECK_MEM_ERROR(cm, row_mt_worker_data->partition, + vpx_calloc(num_sbs * PARTITIONS_PER_SB, + sizeof(*row_mt_worker_data->partition))); + CHECK_MEM_ERROR(cm, row_mt_worker_data->recon_map, + vpx_calloc(num_sbs, sizeof(*row_mt_worker_data->recon_map))); +} + +void vp9_dec_free_row_mt_mem(RowMTWorkerData *row_mt_worker_data) { + if (row_mt_worker_data != NULL) { + int plane; + for (plane = 0; plane < 3; ++plane) { + vpx_free(row_mt_worker_data->eob[plane]); + row_mt_worker_data->eob[plane] = NULL; + vpx_free(row_mt_worker_data->dqcoeff[plane]); + row_mt_worker_data->dqcoeff[plane] = NULL; + } + vpx_free(row_mt_worker_data->partition); + row_mt_worker_data->partition = NULL; + vpx_free(row_mt_worker_data->recon_map); + row_mt_worker_data->recon_map = NULL; + } +} + static int vp9_dec_alloc_mi(VP9_COMMON *cm, int mi_size) { cm->mip = vpx_calloc(mi_size, sizeof(*cm->mip)); if (!cm->mip) return 1; @@ -140,6 +177,10 @@ void vp9_decoder_remove(VP9Decoder *pbi) { vp9_loop_filter_dealloc(&pbi->lf_row_sync); } + if (pbi->row_mt == 1) { + vp9_dec_free_row_mt_mem(pbi->row_mt_worker_data); + vpx_free(pbi->row_mt_worker_data); + } vp9_remove_common(&pbi->common); vpx_free(pbi); } diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.h b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.h index 1c488961a8d..9a582fffbb8 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.h @@ -26,6 +26,10 @@ extern "C" { #endif +#define EOBS_PER_SB_LOG2 8 +#define DQCOEFFS_PER_SB_LOG2 12 +#define PARTITIONS_PER_SB 85 + typedef struct TileBuffer { const uint8_t *data; size_t size; @@ -37,12 +41,22 @@ typedef struct TileWorkerData { int buf_start, buf_end; // pbi->tile_buffers to decode, inclusive vpx_reader bit_reader; FRAME_COUNTS counts; + LFWorkerData *lf_data; + VP9LfSync *lf_sync; DECLARE_ALIGNED(16, MACROBLOCKD, xd); /* dqcoeff are shared by all the planes. So planes must be decoded serially */ DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]); struct vpx_internal_error_info error_info; } TileWorkerData; +typedef struct RowMTWorkerData { + int num_sbs; + int *eob[MAX_MB_PLANE]; + PARTITION_TYPE *partition; + tran_low_t *dqcoeff[MAX_MB_PLANE]; + int8_t *recon_map; +} RowMTWorkerData; + typedef struct VP9Decoder { DECLARE_ALIGNED(16, MACROBLOCKD, mb); @@ -74,10 +88,12 @@ typedef struct VP9Decoder { int hold_ref_buf; // hold the reference buffer. int row_mt; + int lpf_mt_opt; + RowMTWorkerData *row_mt_worker_data; } VP9Decoder; int vp9_receive_compressed_data(struct VP9Decoder *pbi, size_t size, - const uint8_t **dest); + const uint8_t **psource); int vp9_get_raw_frame(struct VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd, vp9_ppflags_t *flags); @@ -111,6 +127,10 @@ struct VP9Decoder *vp9_decoder_create(BufferPool *const pool); void vp9_decoder_remove(struct VP9Decoder *pbi); +void vp9_dec_alloc_row_mt_mem(RowMTWorkerData *row_mt_worker_data, + VP9_COMMON *cm, int num_sbs); +void vp9_dec_free_row_mt_mem(RowMTWorkerData *row_mt_worker_data); + static INLINE void decrease_ref_count(int idx, RefCntBuffer *const frame_bufs, BufferPool *const pool) { if (idx >= 0 && frame_bufs[idx].ref_count > 0) { diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_dct_neon.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_dct_neon.c index 513718e7cb1..f8dd0a6f7a9 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_dct_neon.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_dct_neon.c @@ -23,13 +23,13 @@ void vp9_fdct8x8_quant_neon(const int16_t *input, int stride, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - uint16_t *eob_ptr, const int16_t *scan_ptr, - const int16_t *iscan_ptr) { + uint16_t *eob_ptr, const int16_t *scan, + const int16_t *iscan) { tran_low_t temp_buffer[64]; (void)coeff_ptr; vpx_fdct8x8_neon(input, temp_buffer, stride); vp9_quantize_fp_neon(temp_buffer, n_coeffs, skip_block, round_ptr, quant_ptr, - qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan_ptr, - iscan_ptr); + qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, + iscan); } diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c index 97a09bdff6f..8b62b450cef 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c @@ -97,6 +97,9 @@ void vp9_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t count, store_s16q_to_tran_low(qcoeff_ptr + i, v_qcoeff); store_s16q_to_tran_low(dqcoeff_ptr + i, v_dqcoeff); } +#ifdef __aarch64__ + *eob_ptr = vmaxvq_s16(v_eobmax_76543210); +#else { const int16x4_t v_eobmax_3210 = vmax_s16(vget_low_s16(v_eobmax_76543210), vget_high_s16(v_eobmax_76543210)); @@ -111,6 +114,7 @@ void vp9_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t count, *eob_ptr = (uint16_t)vget_lane_s16(v_eobmax_final, 0); } +#endif // __aarch64__ } static INLINE int32x4_t extract_sign_bit(int32x4_t a) { @@ -122,7 +126,7 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan, const int16_t *iscan_ptr) { + const int16_t *scan, const int16_t *iscan) { const int16x8_t one = vdupq_n_s16(1); const int16x8_t neg_one = vdupq_n_s16(-1); @@ -134,8 +138,8 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count, const int16x8_t dequant_thresh = vshrq_n_s16(vld1q_s16(dequant_ptr), 2); // Process dc and the first seven ac coeffs. - const uint16x8_t iscan = - vreinterpretq_u16_s16(vaddq_s16(vld1q_s16(iscan_ptr), one)); + const uint16x8_t v_iscan = + vreinterpretq_u16_s16(vaddq_s16(vld1q_s16(iscan), one)); const int16x8_t coeff = load_tran_low_to_s16q(coeff_ptr); const int16x8_t coeff_sign = vshrq_n_s16(coeff, 15); const int16x8_t coeff_abs = vabsq_s16(coeff); @@ -169,12 +173,12 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count, dqcoeff = vcombine_s16(vshrn_n_s32(dqcoeff_0, 1), vshrn_n_s32(dqcoeff_1, 1)); - eob_max = vandq_u16(vtstq_s16(qcoeff, neg_one), iscan); + eob_max = vandq_u16(vtstq_s16(qcoeff, neg_one), v_iscan); store_s16q_to_tran_low(qcoeff_ptr, qcoeff); store_s16q_to_tran_low(dqcoeff_ptr, dqcoeff); - iscan_ptr += 8; + iscan += 8; coeff_ptr += 8; qcoeff_ptr += 8; dqcoeff_ptr += 8; @@ -188,8 +192,8 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count, // Process the rest of the ac coeffs. for (i = 8; i < 32 * 32; i += 8) { - const uint16x8_t iscan = - vreinterpretq_u16_s16(vaddq_s16(vld1q_s16(iscan_ptr), one)); + const uint16x8_t v_iscan = + vreinterpretq_u16_s16(vaddq_s16(vld1q_s16(iscan), one)); const int16x8_t coeff = load_tran_low_to_s16q(coeff_ptr); const int16x8_t coeff_sign = vshrq_n_s16(coeff, 15); const int16x8_t coeff_abs = vabsq_s16(coeff); @@ -215,17 +219,20 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count, vcombine_s16(vshrn_n_s32(dqcoeff_0, 1), vshrn_n_s32(dqcoeff_1, 1)); eob_max = - vmaxq_u16(eob_max, vandq_u16(vtstq_s16(qcoeff, neg_one), iscan)); + vmaxq_u16(eob_max, vandq_u16(vtstq_s16(qcoeff, neg_one), v_iscan)); store_s16q_to_tran_low(qcoeff_ptr, qcoeff); store_s16q_to_tran_low(dqcoeff_ptr, dqcoeff); - iscan_ptr += 8; + iscan += 8; coeff_ptr += 8; qcoeff_ptr += 8; dqcoeff_ptr += 8; } +#ifdef __aarch64__ + *eob_ptr = vmaxvq_u16(eob_max); +#else { const uint16x4_t eob_max_0 = vmax_u16(vget_low_u16(eob_max), vget_high_u16(eob_max)); @@ -233,5 +240,6 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count, const uint16x4_t eob_max_2 = vpmax_u16(eob_max_1, eob_max_1); vst1_lane_u16(eob_ptr, eob_max_2, 0); } +#endif // __aarch64__ } } diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/ppc/vp9_quantize_vsx.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/ppc/vp9_quantize_vsx.c index 3720b0876d8..4f88b8fff6f 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/ppc/vp9_quantize_vsx.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/ppc/vp9_quantize_vsx.c @@ -42,8 +42,8 @@ void vp9_quantize_fp_vsx(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - uint16_t *eob_ptr, const int16_t *scan_ptr, - const int16_t *iscan_ptr) { + uint16_t *eob_ptr, const int16_t *scan, + const int16_t *iscan) { int16x8_t qcoeff0, qcoeff1, dqcoeff0, dqcoeff1, eob; bool16x8_t zero_coeff0, zero_coeff1; @@ -52,10 +52,10 @@ void vp9_quantize_fp_vsx(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int16x8_t dequant = vec_vsx_ld(0, dequant_ptr); int16x8_t coeff0 = vec_vsx_ld(0, coeff_ptr); int16x8_t coeff1 = vec_vsx_ld(16, coeff_ptr); - int16x8_t scan0 = vec_vsx_ld(0, iscan_ptr); - int16x8_t scan1 = vec_vsx_ld(16, iscan_ptr); + int16x8_t scan0 = vec_vsx_ld(0, iscan); + int16x8_t scan1 = vec_vsx_ld(16, iscan); - (void)scan_ptr; + (void)scan; (void)skip_block; assert(!skip_block); @@ -103,9 +103,9 @@ void vp9_quantize_fp_vsx(const tran_low_t *coeff_ptr, intptr_t n_coeffs, coeff0 = vec_vsx_ld(off0, coeff_ptr); coeff1 = vec_vsx_ld(off1, coeff_ptr); coeff2 = vec_vsx_ld(off2, coeff_ptr); - scan0 = vec_vsx_ld(off0, iscan_ptr); - scan1 = vec_vsx_ld(off1, iscan_ptr); - scan2 = vec_vsx_ld(off2, iscan_ptr); + scan0 = vec_vsx_ld(off0, iscan); + scan1 = vec_vsx_ld(off1, iscan); + scan2 = vec_vsx_ld(off2, iscan); qcoeff0 = vec_mulhi(vec_vaddshs(vec_abs(coeff0), round), quant); zero_coeff0 = vec_cmpeq(qcoeff0, vec_zeros_s16); @@ -169,8 +169,7 @@ void vp9_quantize_fp_32x32_vsx(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan_ptr, - const int16_t *iscan_ptr) { + const int16_t *scan, const int16_t *iscan) { // In stage 1, we quantize 16 coeffs (DC + 15 AC) // In stage 2, we loop 42 times and quantize 24 coeffs per iteration // (32 * 32 - 16) / 24 = 42 @@ -188,13 +187,13 @@ void vp9_quantize_fp_32x32_vsx(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int16x8_t dequant = vec_vsx_ld(0, dequant_ptr); int16x8_t coeff0 = vec_vsx_ld(0, coeff_ptr); int16x8_t coeff1 = vec_vsx_ld(16, coeff_ptr); - int16x8_t scan0 = vec_vsx_ld(0, iscan_ptr); - int16x8_t scan1 = vec_vsx_ld(16, iscan_ptr); + int16x8_t scan0 = vec_vsx_ld(0, iscan); + int16x8_t scan1 = vec_vsx_ld(16, iscan); int16x8_t thres = vec_sra(dequant, vec_splats((uint16_t)2)); int16x8_t abs_coeff0 = vec_abs(coeff0); int16x8_t abs_coeff1 = vec_abs(coeff1); - (void)scan_ptr; + (void)scan; (void)skip_block; (void)n_coeffs; assert(!skip_block); @@ -238,9 +237,9 @@ void vp9_quantize_fp_32x32_vsx(const tran_low_t *coeff_ptr, intptr_t n_coeffs, coeff0 = vec_vsx_ld(off0, coeff_ptr); coeff1 = vec_vsx_ld(off1, coeff_ptr); coeff2 = vec_vsx_ld(off2, coeff_ptr); - scan0 = vec_vsx_ld(off0, iscan_ptr); - scan1 = vec_vsx_ld(off1, iscan_ptr); - scan2 = vec_vsx_ld(off2, iscan_ptr); + scan0 = vec_vsx_ld(off0, iscan); + scan1 = vec_vsx_ld(off1, iscan); + scan2 = vec_vsx_ld(off2, iscan); abs_coeff0 = vec_abs(coeff0); abs_coeff1 = vec_abs(coeff1); diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_block.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_block.h index 06130584f0f..563fdbbdecd 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_block.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_block.h @@ -211,6 +211,8 @@ struct macroblock { #if CONFIG_ML_VAR_PARTITION DECLARE_ALIGNED(16, uint8_t, est_pred[64 * 64]); #endif // CONFIG_ML_VAR_PARTITION + + struct scale_factors *me_sf; }; #ifdef __cplusplus diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_denoiser.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_denoiser.c index 8c039b2cb9d..b70890e68a8 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_denoiser.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_denoiser.c @@ -360,6 +360,7 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col, int is_skin = 0; int increase_denoising = 0; int consec_zeromv = 0; + int last_is_reference = cpi->ref_frame_flags & VP9_LAST_FLAG; mv_col = ctx->best_sse_mv.as_mv.col; mv_row = ctx->best_sse_mv.as_mv.row; motion_magnitude = mv_row * mv_row + mv_col * mv_col; @@ -403,7 +404,12 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col, } if (!is_skin && denoiser->denoising_level == kDenHigh) increase_denoising = 1; - if (denoiser->denoising_level >= kDenLow && !ctx->sb_skip_denoising) + // Copy block if LAST_FRAME is not a reference. + // Last doesn't always exist when SVC layers are dynamically changed, e.g. top + // spatial layer doesn't have last reference when it's brought up for the + // first time on the fly. + if (last_is_reference && denoiser->denoising_level >= kDenLow && + !ctx->sb_skip_denoising) decision = perform_motion_compensation( &cpi->common, denoiser, mb, bs, increase_denoising, mi_row, mi_col, ctx, motion_magnitude, is_skin, &zeromv_filter, consec_zeromv, diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c index ad30951afa3..98343f0d243 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c @@ -930,7 +930,9 @@ static int scale_partitioning_svc(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, PARTITION_TYPE partition_high; if (mi_row_high >= cm->mi_rows || mi_col_high >= cm->mi_cols) return 0; - if (mi_row >= (cm->mi_rows >> 1) || mi_col >= (cm->mi_cols >> 1)) return 0; + if (mi_row >= svc->mi_rows[svc->spatial_layer_id - 1] || + mi_col >= svc->mi_cols[svc->spatial_layer_id - 1]) + return 0; // Find corresponding (mi_col/mi_row) block down-scaled by 2x2. start_pos = mi_row * (svc->mi_stride[svc->spatial_layer_id - 1]) + mi_col; @@ -1378,6 +1380,20 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, x->sb_use_mv_part = 1; x->sb_mvcol_part = mi->mv[0].as_mv.col; x->sb_mvrow_part = mi->mv[0].as_mv.row; + if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && + cpi->svc.spatial_layer_id == 0 && + cpi->rc.high_num_blocks_with_motion && !x->zero_temp_sad_source && + cm->width > 640 && cm->height > 480) { + // Disable split below 16x16 block size when scroll motion is detected. + // TODO(marpan/jianj): Improve this condition: issue is that search + // range is hard-coded/limited in vp9_int_pro_motion_estimation() so + // scroll motion may not be detected here. + if ((abs(x->sb_mvrow_part) >= 48 && abs(x->sb_mvcol_part) <= 8) || + y_sad < 100000) { + compute_minmax_variance = 0; + thresholds[2] = INT64_MAX; + } + } } y_sad_last = y_sad; @@ -3183,7 +3199,7 @@ static int ml_pruning_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd, #define FEATURES 4 // ML-based partition search breakout. -static int ml_predict_breakout(const VP9_COMP *const cpi, BLOCK_SIZE bsize, +static int ml_predict_breakout(VP9_COMP *const cpi, BLOCK_SIZE bsize, const MACROBLOCK *const x, const RD_COST *const rd_cost) { DECLARE_ALIGNED(16, static const uint8_t, vp9_64_zeros[64]) = { 0 }; @@ -3214,14 +3230,29 @@ static int ml_predict_breakout(const VP9_COMP *const cpi, BLOCK_SIZE bsize, if (!linear_weights) return 0; { // Generate feature values. +#if CONFIG_VP9_HIGHBITDEPTH + const int ac_q = + vp9_ac_quant(cm->base_qindex, 0, cm->bit_depth) >> (x->e_mbd.bd - 8); +#else const int ac_q = vp9_ac_quant(qindex, 0, cm->bit_depth); +#endif // CONFIG_VP9_HIGHBITDEPTH const int num_pels_log2 = num_pels_log2_lookup[bsize]; int feature_index = 0; unsigned int var, sse; float rate_f, dist_f; +#if CONFIG_VP9_HIGHBITDEPTH + if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + var = + vp9_high_get_sby_variance(cpi, &x->plane[0].src, bsize, x->e_mbd.bd); + } else { + var = cpi->fn_ptr[bsize].vf(x->plane[0].src.buf, x->plane[0].src.stride, + vp9_64_zeros, 0, &sse); + } +#else var = cpi->fn_ptr[bsize].vf(x->plane[0].src.buf, x->plane[0].src.stride, vp9_64_zeros, 0, &sse); +#endif var = var >> num_pels_log2; vpx_clear_system_state(); @@ -3288,7 +3319,12 @@ static void ml_prune_rect_partition(VP9_COMP *const cpi, MACROBLOCK *const x, { const int64_t none_rdcost = pc_tree->none.rdcost; const VP9_COMMON *const cm = &cpi->common; +#if CONFIG_VP9_HIGHBITDEPTH + const int dc_q = + vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) >> (x->e_mbd.bd - 8); +#else const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth); +#endif // CONFIG_VP9_HIGHBITDEPTH int feature_index = 0; unsigned int block_var = 0; unsigned int sub_block_var[4] = { 0 }; @@ -3404,31 +3440,38 @@ static void ml_predict_var_rd_paritioning(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd = &x->e_mbd; MODE_INFO *mi = xd->mi[0]; const NN_CONFIG *nn_config = NULL; - DECLARE_ALIGNED(16, uint8_t, pred_buf[64 * 64]); +#if CONFIG_VP9_HIGHBITDEPTH + DECLARE_ALIGNED(16, uint8_t, pred_buffer[64 * 64 * 2]); + uint8_t *const pred_buf = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + ? (CONVERT_TO_BYTEPTR(pred_buffer)) + : pred_buffer; +#else + DECLARE_ALIGNED(16, uint8_t, pred_buffer[64 * 64]); + uint8_t *const pred_buf = pred_buffer; +#endif // CONFIG_VP9_HIGHBITDEPTH + const int speed = cpi->oxcf.speed; int i; - float thresh_low = -1.0f; - float thresh_high = 0.0f; + float thresh = 0.0f; switch (bsize) { case BLOCK_64X64: nn_config = &vp9_var_rd_part_nnconfig_64; - thresh_low = -3.0f; - thresh_high = 3.0f; + thresh = speed > 0 ? 3.5f : 3.0f; break; case BLOCK_32X32: nn_config = &vp9_var_rd_part_nnconfig_32; - thresh_low = -3.0; - thresh_high = 3.0f; + thresh = speed > 0 ? 3.5f : 3.0f; break; case BLOCK_16X16: nn_config = &vp9_var_rd_part_nnconfig_16; - thresh_low = -4.0; - thresh_high = 4.0f; + thresh = speed > 0 ? 3.5f : 4.0f; break; case BLOCK_8X8: nn_config = &vp9_var_rd_part_nnconfig_8; - thresh_low = -2.0; - thresh_high = 2.0f; + if (cm->width >= 720 && cm->height >= 720) + thresh = speed > 0 ? 2.5f : 2.0f; + else + thresh = speed > 0 ? 3.5f : 2.0f; break; default: assert(0 && "Unexpected block size."); return; } @@ -3476,7 +3519,12 @@ static void ml_predict_var_rd_paritioning(VP9_COMP *cpi, MACROBLOCK *x, { float features[FEATURES] = { 0.0f }; +#if CONFIG_VP9_HIGHBITDEPTH + const int dc_q = + vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) >> (xd->bd - 8); +#else const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth); +#endif // CONFIG_VP9_HIGHBITDEPTH int feature_idx = 0; float score; @@ -3520,8 +3568,8 @@ static void ml_predict_var_rd_paritioning(VP9_COMP *cpi, MACROBLOCK *x, // partition is better than the non-split partition. So if the score is // high enough, we skip the none-split partition search; if the score is // low enough, we skip the split partition search. - if (score > thresh_high) *none = 0; - if (score < thresh_low) *split = 0; + if (score > thresh) *none = 0; + if (score < -thresh) *split = 0; } } #undef FEATURES @@ -3529,7 +3577,8 @@ static void ml_predict_var_rd_paritioning(VP9_COMP *cpi, MACROBLOCK *x, int get_rdmult_delta(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col, int orig_rdmult) { - TplDepFrame *tpl_frame = &cpi->tpl_stats[cpi->twopass.gf_group.index]; + const int gf_group_index = cpi->twopass.gf_group.index; + TplDepFrame *tpl_frame = &cpi->tpl_stats[gf_group_index]; TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr; int tpl_stride = tpl_frame->stride; int64_t intra_cost = 0; @@ -3544,9 +3593,9 @@ int get_rdmult_delta(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col, if (tpl_frame->is_valid == 0) return orig_rdmult; - if (cpi->common.show_frame) return orig_rdmult; + if (cpi->twopass.gf_group.layer_depth[gf_group_index] > 1) return orig_rdmult; - if (cpi->twopass.gf_group.index >= MAX_LAG_BUFFERS) return orig_rdmult; + if (gf_group_index >= MAX_ARF_GOP_SIZE) return orig_rdmult; for (row = mi_row; row < mi_row + mi_high; ++row) { for (col = mi_col; col < mi_col + mi_wide; ++col) { @@ -3759,14 +3808,10 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, pc_tree->partitioning = PARTITION_NONE; if (cpi->sf.ml_var_partition_pruning) { - int do_ml_var_partition_pruning = + const int do_ml_var_partition_pruning = !frame_is_intra_only(cm) && partition_none_allowed && do_split && mi_row + num_8x8_blocks_high_lookup[bsize] <= cm->mi_rows && mi_col + num_8x8_blocks_wide_lookup[bsize] <= cm->mi_cols; -#if CONFIG_VP9_HIGHBITDEPTH - if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) - do_ml_var_partition_pruning = 0; -#endif // CONFIG_VP9_HIGHBITDEPTH if (do_ml_var_partition_pruning) { ml_predict_var_rd_paritioning(cpi, x, bsize, mi_row, mi_col, &partition_none_allowed, &do_split); @@ -3814,13 +3859,9 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, } if ((do_split || do_rect) && !x->e_mbd.lossless && ctx->skippable) { - int use_ml_based_breakout = + const int use_ml_based_breakout = cpi->sf.use_ml_partition_search_breakout && cm->base_qindex >= 100; -#if CONFIG_VP9_HIGHBITDEPTH - if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) - use_ml_based_breakout = 0; -#endif // CONFIG_VP9_HIGHBITDEPTH if (use_ml_based_breakout) { if (ml_predict_breakout(cpi, bsize, x, &this_rdc)) { do_split = 0; @@ -4019,13 +4060,9 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, } { - int do_ml_rect_partition_pruning = + const int do_ml_rect_partition_pruning = !frame_is_intra_only(cm) && !force_horz_split && !force_vert_split && (partition_horz_allowed || partition_vert_allowed) && bsize > BLOCK_8X8; -#if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) - do_ml_rect_partition_pruning = 0; -#endif if (do_ml_rect_partition_pruning) { ml_prune_rect_partition(cpi, x, bsize, pc_tree, &partition_horz_allowed, &partition_vert_allowed, best_rdc.rdcost, mi_row, @@ -4505,15 +4542,9 @@ static int ml_predict_var_paritioning(VP9_COMP *cpi, MACROBLOCK *x, int mi_col) { VP9_COMMON *const cm = &cpi->common; const NN_CONFIG *nn_config = NULL; - float thresh_low = -0.2f; - float thresh_high = 0.0f; switch (bsize) { - case BLOCK_64X64: - nn_config = &vp9_var_part_nnconfig_64; - thresh_low = -0.3f; - thresh_high = -0.1f; - break; + case BLOCK_64X64: nn_config = &vp9_var_part_nnconfig_64; break; case BLOCK_32X32: nn_config = &vp9_var_part_nnconfig_32; break; case BLOCK_16X16: nn_config = &vp9_var_part_nnconfig_16; break; case BLOCK_8X8: break; @@ -4525,6 +4556,7 @@ static int ml_predict_var_paritioning(VP9_COMP *cpi, MACROBLOCK *x, vpx_clear_system_state(); { + const float thresh = cpi->oxcf.speed <= 5 ? 1.25f : 0.0f; float features[FEATURES] = { 0.0f }; const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth); int feature_idx = 0; @@ -4565,8 +4597,8 @@ static int ml_predict_var_paritioning(VP9_COMP *cpi, MACROBLOCK *x, assert(feature_idx == FEATURES); nn_predict(features, nn_config, score); - if (score[0] > thresh_high) return 3; - if (score[0] < thresh_low) return 0; + if (score[0] > thresh) return PARTITION_SPLIT; + if (score[0] < -thresh) return PARTITION_NONE; return -1; } } @@ -4644,8 +4676,8 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td, if (partition_none_allowed && do_split) { const int ml_predicted_partition = ml_predict_var_paritioning(cpi, x, bsize, mi_row, mi_col); - if (ml_predicted_partition == 0) do_split = 0; - if (ml_predicted_partition == 3) partition_none_allowed = 0; + if (ml_predicted_partition == PARTITION_NONE) do_split = 0; + if (ml_predicted_partition == PARTITION_SPLIT) partition_none_allowed = 0; } } #endif // CONFIG_ML_VAR_PARTITION @@ -5628,7 +5660,6 @@ static void encode_frame_internal(VP9_COMP *cpi) { xd->mi = cm->mi_grid_visible; xd->mi[0] = cm->mi; - vp9_zero(*td->counts); vp9_zero(cpi->td.rd_counts); @@ -5693,7 +5724,7 @@ static void encode_frame_internal(VP9_COMP *cpi) { if (sf->partition_search_type == SOURCE_VAR_BASED_PARTITION) source_var_based_partition_search_method(cpi); - } else if (gf_group_index && gf_group_index < MAX_LAG_BUFFERS && + } else if (gf_group_index && gf_group_index < MAX_ARF_GOP_SIZE && cpi->sf.enable_tpl_model) { TplDepFrame *tpl_frame = &cpi->tpl_stats[cpi->twopass.gf_group.index]; TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr; diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.h index 8bbf857872d..2f1be4b233f 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.h @@ -27,7 +27,7 @@ void vp9_encode_mv(VP9_COMP *cpi, vpx_writer *w, const MV *mv, const MV *ref, unsigned int *const max_mv_magnitude); void vp9_build_nmv_cost_table(int *mvjoint, int *mvcost[2], - const nmv_context *mvctx, int usehp); + const nmv_context *ctx, int usehp); void vp9_update_mv_count(ThreadData *td); diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c index edb4cb288c8..33cfd9f75fe 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c @@ -52,6 +52,9 @@ #include "vp9/encoder/vp9_extend.h" #include "vp9/encoder/vp9_firstpass.h" #include "vp9/encoder/vp9_mbgraph.h" +#if CONFIG_NON_GREEDY_MV +#include "vp9/encoder/vp9_mcomp.h" +#endif #include "vp9/encoder/vp9_multi_thread.h" #include "vp9/encoder/vp9_noise_estimate.h" #include "vp9/encoder/vp9_picklpf.h" @@ -2359,10 +2362,21 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, vp9_set_speed_features_framesize_dependent(cpi); if (cpi->sf.enable_tpl_model) { - for (frame = 0; frame < MAX_LAG_BUFFERS; ++frame) { - int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); - int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows); - + const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); + const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows); +#if CONFIG_NON_GREEDY_MV + CHECK_MEM_ERROR( + cm, cpi->feature_score_loc_arr, + vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_arr))); + CHECK_MEM_ERROR( + cm, cpi->feature_score_loc_sort, + vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_sort))); + CHECK_MEM_ERROR( + cm, cpi->feature_score_loc_heap, + vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_heap))); +#endif + // TODO(jingning): Reduce the actual memory use for tpl model build up. + for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) { CHECK_MEM_ERROR(cm, cpi->tpl_stats[frame].tpl_stats_ptr, vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->tpl_stats[frame].tpl_stats_ptr))); @@ -2373,6 +2387,11 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, cpi->tpl_stats[frame].mi_rows = cm->mi_rows; cpi->tpl_stats[frame].mi_cols = cm->mi_cols; } + + for (frame = 0; frame < REF_FRAMES; ++frame) { + cpi->enc_frame_buf[frame].mem_valid = 0; + cpi->enc_frame_buf[frame].released = 1; + } } // Allocate memory to store variances for a frame. @@ -2449,6 +2468,17 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, vp9_loop_filter_init(cm); + // Set up the unit scaling factor used during motion search. +#if CONFIG_VP9_HIGHBITDEPTH + vp9_setup_scale_factors_for_frame(&cpi->me_sf, cm->width, cm->height, + cm->width, cm->height, + cm->use_highbitdepth); +#else + vp9_setup_scale_factors_for_frame(&cpi->me_sf, cm->width, cm->height, + cm->width, cm->height); +#endif // CONFIG_VP9_HIGHBITDEPTH + cpi->td.mb.me_sf = &cpi->me_sf; + cm->error.setjmp = 0; return cpi; @@ -2561,7 +2591,12 @@ void vp9_remove_compressor(VP9_COMP *cpi) { vp9_denoiser_free(&(cpi->denoiser)); #endif - for (frame = 0; frame < MAX_LAG_BUFFERS; ++frame) { +#if CONFIG_NON_GREEDY_MV + vpx_free(cpi->feature_score_loc_arr); + vpx_free(cpi->feature_score_loc_sort); + vpx_free(cpi->feature_score_loc_heap); +#endif + for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) { vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr); cpi->tpl_stats[frame].is_valid = 0; } @@ -3217,8 +3252,8 @@ void vp9_scale_references(VP9_COMP *cpi) { if (cpi->oxcf.pass == 0 && !cpi->use_svc) { // Check for release of scaled reference. buf_idx = cpi->scaled_ref_idx[ref_frame - 1]; - buf = (buf_idx != INVALID_IDX) ? &pool->frame_bufs[buf_idx] : NULL; - if (buf != NULL) { + if (buf_idx != INVALID_IDX) { + buf = &pool->frame_bufs[buf_idx]; --buf->ref_count; cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX; } @@ -3249,22 +3284,21 @@ static void release_scaled_references(VP9_COMP *cpi) { refresh[2] = (cpi->refresh_alt_ref_frame) ? 1 : 0; for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) { const int idx = cpi->scaled_ref_idx[i - 1]; - RefCntBuffer *const buf = - idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[idx] : NULL; - const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, i); - if (buf != NULL && - (refresh[i - 1] || (buf->buf.y_crop_width == ref->y_crop_width && - buf->buf.y_crop_height == ref->y_crop_height))) { - --buf->ref_count; - cpi->scaled_ref_idx[i - 1] = INVALID_IDX; + if (idx != INVALID_IDX) { + RefCntBuffer *const buf = &cm->buffer_pool->frame_bufs[idx]; + const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, i); + if (refresh[i - 1] || (buf->buf.y_crop_width == ref->y_crop_width && + buf->buf.y_crop_height == ref->y_crop_height)) { + --buf->ref_count; + cpi->scaled_ref_idx[i - 1] = INVALID_IDX; + } } } } else { - for (i = 0; i < MAX_REF_FRAMES; ++i) { + for (i = 0; i < REFS_PER_FRAME; ++i) { const int idx = cpi->scaled_ref_idx[i]; - RefCntBuffer *const buf = - idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[idx] : NULL; - if (buf != NULL) { + if (idx != INVALID_IDX) { + RefCntBuffer *const buf = &cm->buffer_pool->frame_bufs[idx]; --buf->ref_count; cpi->scaled_ref_idx[i] = INVALID_IDX; } @@ -3457,6 +3491,11 @@ static void set_size_dependent_vars(VP9_COMP *cpi, int *q, int *bottom_index, // Decide q and q bounds. *q = vp9_rc_pick_q_and_bounds(cpi, bottom_index, top_index); + if (cpi->oxcf.rc_mode == VPX_CBR && cpi->rc.force_max_q) { + *q = cpi->rc.worst_quality; + cpi->rc.force_max_q = 0; + } + if (!frame_is_intra_only(cm)) { vp9_set_high_precision_mv(cpi, (*q) < HIGH_PRECISION_MV_QTHRESH); } @@ -3661,14 +3700,16 @@ static INLINE void set_raw_source_frame(VP9_COMP *cpi) { static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, uint8_t *dest) { VP9_COMMON *const cm = &cpi->common; + SVC *const svc = &cpi->svc; int q = 0, bottom_index = 0, top_index = 0; + int no_drop_scene_change = 0; const INTERP_FILTER filter_scaler = (is_one_pass_cbr_svc(cpi)) - ? cpi->svc.downsample_filter_type[cpi->svc.spatial_layer_id] + ? svc->downsample_filter_type[svc->spatial_layer_id] : EIGHTTAP; const int phase_scaler = (is_one_pass_cbr_svc(cpi)) - ? cpi->svc.downsample_filter_phase[cpi->svc.spatial_layer_id] + ? svc->downsample_filter_phase[svc->spatial_layer_id] : 0; if (cm->show_existing_frame) { @@ -3676,6 +3717,8 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, return 1; } + svc->time_stamp_prev[svc->spatial_layer_id] = svc->time_stamp_superframe; + // Flag to check if its valid to compute the source sad (used for // scene detection and for superblock content state in CBR mode). // The flag may get reset below based on SVC or resizing state. @@ -3688,25 +3731,25 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, if (is_one_pass_cbr_svc(cpi) && cpi->un_scaled_source->y_width == cm->width << 2 && cpi->un_scaled_source->y_height == cm->height << 2 && - cpi->svc.scaled_temp.y_width == cm->width << 1 && - cpi->svc.scaled_temp.y_height == cm->height << 1) { + svc->scaled_temp.y_width == cm->width << 1 && + svc->scaled_temp.y_height == cm->height << 1) { // For svc, if it is a 1/4x1/4 downscaling, do a two-stage scaling to take // advantage of the 1:2 optimized scaler. In the process, the 1/2x1/2 // result will be saved in scaled_temp and might be used later. - const INTERP_FILTER filter_scaler2 = cpi->svc.downsample_filter_type[1]; - const int phase_scaler2 = cpi->svc.downsample_filter_phase[1]; + const INTERP_FILTER filter_scaler2 = svc->downsample_filter_type[1]; + const int phase_scaler2 = svc->downsample_filter_phase[1]; cpi->Source = vp9_svc_twostage_scale( - cm, cpi->un_scaled_source, &cpi->scaled_source, &cpi->svc.scaled_temp, + cm, cpi->un_scaled_source, &cpi->scaled_source, &svc->scaled_temp, filter_scaler, phase_scaler, filter_scaler2, phase_scaler2); - cpi->svc.scaled_one_half = 1; + svc->scaled_one_half = 1; } else if (is_one_pass_cbr_svc(cpi) && cpi->un_scaled_source->y_width == cm->width << 1 && cpi->un_scaled_source->y_height == cm->height << 1 && - cpi->svc.scaled_one_half) { + svc->scaled_one_half) { // If the spatial layer is 1/2x1/2 and the scaling is already done in the // two-stage scaling, use the result directly. - cpi->Source = &cpi->svc.scaled_temp; - cpi->svc.scaled_one_half = 0; + cpi->Source = &svc->scaled_temp; + svc->scaled_one_half = 0; } else { cpi->Source = vp9_scale_if_required( cm, cpi->un_scaled_source, &cpi->scaled_source, (cpi->oxcf.pass == 0), @@ -3714,8 +3757,8 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, } #ifdef OUTPUT_YUV_SVC_SRC // Write out at most 3 spatial layers. - if (is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id < 3) { - vpx_write_yuv_frame(yuv_svc_src[cpi->svc.spatial_layer_id], cpi->Source); + if (is_one_pass_cbr_svc(cpi) && svc->spatial_layer_id < 3) { + vpx_write_yuv_frame(yuv_svc_src[svc->spatial_layer_id], cpi->Source); } #endif // Unfiltered raw source used in metrics calculation if the source @@ -3735,9 +3778,9 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, } if ((cpi->use_svc && - (cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1 || - cpi->svc.temporal_layer_id < cpi->svc.number_temporal_layers - 1 || - cpi->svc.current_superframe < 1)) || + (svc->spatial_layer_id < svc->number_spatial_layers - 1 || + svc->temporal_layer_id < svc->number_temporal_layers - 1 || + svc->current_superframe < 1)) || cpi->resize_pending || cpi->resize_state || cpi->external_resize || cpi->resize_state != ORIG) { cpi->compute_source_sad_onepass = 0; @@ -3786,18 +3829,33 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, (cpi->oxcf.speed >= 5 && cpi->oxcf.speed < 8))) vp9_scene_detection_onepass(cpi); - if (cpi->svc.spatial_layer_id == 0) - cpi->svc.high_source_sad_superframe = cpi->rc.high_source_sad; + if (svc->spatial_layer_id == svc->first_spatial_layer_to_encode) { + svc->high_source_sad_superframe = cpi->rc.high_source_sad; + // On scene change reset temporal layer pattern to TL0. + // TODO(marpan/jianj): Fix this to handle case where base + // spatial layers are skipped, in which case we should insert + // and reset to spatial layer 0 on scene change. + // Only do this reset for bypass/flexible mode. + if (svc->high_source_sad_superframe && svc->temporal_layer_id > 0 && + svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { + // rc->high_source_sad will get reset so copy it to restore it. + int tmp_high_source_sad = cpi->rc.high_source_sad; + vp9_svc_reset_temporal_layers(cpi, cm->frame_type == KEY_FRAME); + cpi->rc.high_source_sad = tmp_high_source_sad; + } + } // For 1 pass CBR, check if we are dropping this frame. // Never drop on key frame, if base layer is key for svc, // on scene change, or if superframe has layer sync. + if ((cpi->rc.high_source_sad || svc->high_source_sad_superframe) && + !(cpi->rc.use_post_encode_drop && svc->last_layer_dropped[0])) + no_drop_scene_change = 1; if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR && - !frame_is_intra_only(cm) && !cpi->rc.high_source_sad && - !cpi->svc.high_source_sad_superframe && - !cpi->svc.superframe_has_layer_sync && + !frame_is_intra_only(cm) && !no_drop_scene_change && + !svc->superframe_has_layer_sync && (!cpi->use_svc || - !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) { + !svc->layer_context[svc->temporal_layer_id].is_key_frame)) { if (vp9_rc_drop_frame(cpi)) return 0; } @@ -3805,7 +3863,7 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, // when svc->force_zero_mode_spatial_ref = 1. Under those conditions we can // avoid this frame-level upsampling (for non intra_only frames). if (frame_is_intra_only(cm) == 0 && - !(is_one_pass_cbr_svc(cpi) && cpi->svc.force_zero_mode_spatial_ref)) { + !(is_one_pass_cbr_svc(cpi) && svc->force_zero_mode_spatial_ref)) { vp9_scale_references(cpi); } @@ -3815,12 +3873,12 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, if (cpi->sf.copy_partition_flag) alloc_copy_partition_data(cpi); if (cpi->sf.svc_use_lowres_part && - cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2) { - if (cpi->svc.prev_partition_svc == NULL) { + svc->spatial_layer_id == svc->number_spatial_layers - 2) { + if (svc->prev_partition_svc == NULL) { CHECK_MEM_ERROR( - cm, cpi->svc.prev_partition_svc, + cm, svc->prev_partition_svc, (BLOCK_SIZE *)vpx_calloc(cm->mi_stride * cm->mi_rows, - sizeof(*cpi->svc.prev_partition_svc))); + sizeof(*svc->prev_partition_svc))); } } @@ -3832,6 +3890,12 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, cpi->use_skin_detection = 1; } + // Enable post encode frame dropping for CBR on non key frame, when + // ext_use_post_encode_drop is specified by user. + cpi->rc.use_post_encode_drop = cpi->rc.ext_use_post_encode_drop && + cpi->oxcf.rc_mode == VPX_CBR && + cm->frame_type != KEY_FRAME; + vp9_set_quantizer(cm, q); vp9_set_variance_partition_thresholds(cpi, q, 0); @@ -3842,16 +3906,24 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, if (cpi->use_svc) { // On non-zero spatial layer, check for disabling inter-layer // prediction. - if (cpi->svc.spatial_layer_id > 0) vp9_svc_constrain_inter_layer_pred(cpi); + if (svc->spatial_layer_id > 0) vp9_svc_constrain_inter_layer_pred(cpi); vp9_svc_assert_constraints_pattern(cpi); } + if (cpi->rc.last_post_encode_dropped_scene_change) { + cpi->rc.high_source_sad = 1; + svc->high_source_sad_superframe = 1; + // For now disable use_source_sad since Last_Source will not be the previous + // encoded but the dropped one. + cpi->sf.use_source_sad = 0; + cpi->rc.last_post_encode_dropped_scene_change = 0; + } // Check if this high_source_sad (scene/slide change) frame should be // encoded at high/max QP, and if so, set the q and adjust some rate // control parameters. if (cpi->sf.overshoot_detection_cbr_rt == FAST_DETECTION_MAXQ && (cpi->rc.high_source_sad || - (cpi->use_svc && cpi->svc.high_source_sad_superframe))) { + (cpi->use_svc && svc->high_source_sad_superframe))) { if (vp9_encodedframe_overshoot(cpi, -1, &q)) { vp9_set_quantizer(cm, q); vp9_set_variance_partition_thresholds(cpi, q, 0); @@ -3886,7 +3958,7 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, // For SVC: all spatial layers are checked for re-encoding. if (cpi->sf.overshoot_detection_cbr_rt == RE_ENCODE_MAXQ && (cpi->rc.high_source_sad || - (cpi->use_svc && cpi->svc.high_source_sad_superframe))) { + (cpi->use_svc && svc->high_source_sad_superframe))) { int frame_size = 0; // Get an estimate of the encoded frame size. save_coding_context(cpi); @@ -3960,9 +4032,9 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size, set_size_independent_vars(cpi); - enable_acl = cpi->sf.allow_acl - ? (cm->frame_type == KEY_FRAME) || (cm->show_frame == 0) - : 0; + enable_acl = cpi->sf.allow_acl ? (cm->frame_type == KEY_FRAME) || + (cpi->twopass.gf_group.index == 1) + : 0; do { vpx_clear_system_state(); @@ -4622,8 +4694,13 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size, TX_SIZE t; // SVC: skip encoding of enhancement layer if the layer target bandwidth = 0. + // If in constrained layer drop mode (svc.framedrop_mode != LAYER_DROP) and + // base spatial layer was dropped, no need to set svc.skip_enhancement_layer, + // as whole superframe will be dropped. if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 && - cpi->oxcf.target_bandwidth == 0) { + cpi->oxcf.target_bandwidth == 0 && + !(cpi->svc.framedrop_mode != LAYER_DROP && + cpi->svc.drop_spatial_layer[0])) { cpi->svc.skip_enhancement_layer = 1; vp9_rc_postencode_update_drop_frame(cpi); cpi->ext_refresh_frame_flags_pending = 0; @@ -4720,19 +4797,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size, cm->ref_frame_map[cpi->alt_fb_idx]); } - cpi->last_frame_dropped = 0; - cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 0; - // Keep track of the frame buffer index updated/refreshed for the - // current encoded TL0 superframe. - if (cpi->svc.temporal_layer_id == 0) { - if (cpi->refresh_last_frame) - cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->lst_fb_idx; - else if (cpi->refresh_golden_frame) - cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->gld_fb_idx; - else if (cpi->refresh_alt_ref_frame) - cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->alt_fb_idx; - } - // Disable segmentation if it decrease rate/distortion ratio if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ) vp9_try_disable_lookahead_aq(cpi, size, dest); @@ -4779,9 +4843,34 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size, // Pick the loop filter level for the frame. loopfilter_frame(cpi, cm); + if (cpi->rc.use_post_encode_drop) save_coding_context(cpi); + // build the bitstream vp9_pack_bitstream(cpi, dest, size); + if (cpi->rc.use_post_encode_drop && cm->base_qindex < cpi->rc.worst_quality && + cpi->svc.spatial_layer_id == 0 && + post_encode_drop_screen_content(cpi, size)) { + restore_coding_context(cpi); + return; + } + + cpi->last_frame_dropped = 0; + cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 0; + if (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1) + cpi->svc.num_encoded_top_layer++; + + // Keep track of the frame buffer index updated/refreshed for the + // current encoded TL0 superframe. + if (cpi->svc.temporal_layer_id == 0) { + if (cpi->refresh_last_frame) + cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->lst_fb_idx; + else if (cpi->refresh_golden_frame) + cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->gld_fb_idx; + else if (cpi->refresh_alt_ref_frame) + cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->alt_fb_idx; + } + if (cm->seg.update_map) update_reference_segmentation_map(cpi); if (frame_is_intra_only(cm) == 0) { @@ -4910,6 +4999,8 @@ static void init_ref_frame_bufs(VP9_COMMON *cm) { cm->new_fb_idx = INVALID_IDX; for (i = 0; i < REF_FRAMES; ++i) { cm->ref_frame_map[i] = INVALID_IDX; + } + for (i = 0; i < FRAME_BUFFERS; ++i) { pool->frame_bufs[i].ref_count = 0; } } @@ -5335,6 +5426,7 @@ static void update_level_info(VP9_COMP *cpi, size_t *size, int arf_src_index) { typedef struct GF_PICTURE { YV12_BUFFER_CONFIG *frame; int ref_frame[3]; + FRAME_UPDATE_TYPE update_type; } GF_PICTURE; void init_gop_frames(VP9_COMP *cpi, GF_PICTURE *gf_picture, @@ -5345,16 +5437,22 @@ void init_gop_frames(VP9_COMP *cpi, GF_PICTURE *gf_picture, int gld_index = -1; int alt_index = -1; int lst_index = -1; + int arf_index_stack[MAX_ARF_LAYERS]; + int arf_stack_size = 0; int extend_frame_count = 0; int pframe_qindex = cpi->tpl_stats[2].base_qindex; + int frame_gop_offset = 0; RefCntBuffer *frame_bufs = cm->buffer_pool->frame_bufs; - int recon_frame_index[REFS_PER_FRAME + 1] = { -1, -1, -1, -1 }; + int8_t recon_frame_index[REFS_PER_FRAME + MAX_ARF_LAYERS]; + + memset(recon_frame_index, -1, sizeof(recon_frame_index)); + stack_init(arf_index_stack, MAX_ARF_LAYERS); // TODO(jingning): To be used later for gf frame type parsing. (void)gf_group; - for (i = 0; i < FRAME_BUFFERS && frame_idx < REFS_PER_FRAME + 1; ++i) { + for (i = 0; i < FRAME_BUFFERS; ++i) { if (frame_bufs[i].ref_count == 0) { alloc_frame_mvs(cm, i); if (vpx_realloc_frame_buffer(&frame_bufs[i].buf, cm->width, cm->height, @@ -5369,6 +5467,8 @@ void init_gop_frames(VP9_COMP *cpi, GF_PICTURE *gf_picture, recon_frame_index[frame_idx] = i; ++frame_idx; + + if (frame_idx >= REFS_PER_FRAME + cpi->oxcf.enable_auto_arf) break; } } @@ -5382,21 +5482,24 @@ void init_gop_frames(VP9_COMP *cpi, GF_PICTURE *gf_picture, // Initialize Golden reference frame. gf_picture[0].frame = get_ref_frame_buffer(cpi, GOLDEN_FRAME); for (i = 0; i < 3; ++i) gf_picture[0].ref_frame[i] = -1; + gf_picture[0].update_type = gf_group->update_type[0]; gld_index = 0; ++*tpl_group_frames; - // Initialize ARF frame + // Initialize base layer ARF frame gf_picture[1].frame = cpi->Source; gf_picture[1].ref_frame[0] = gld_index; gf_picture[1].ref_frame[1] = lst_index; gf_picture[1].ref_frame[2] = alt_index; + gf_picture[1].update_type = gf_group->update_type[1]; alt_index = 1; ++*tpl_group_frames; // Initialize P frames - for (frame_idx = 2; frame_idx < MAX_LAG_BUFFERS; ++frame_idx) { - struct lookahead_entry *buf = - vp9_lookahead_peek(cpi->lookahead, frame_idx - 2); + for (frame_idx = 2; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) { + struct lookahead_entry *buf; + frame_gop_offset = gf_group->frame_gop_index[frame_idx]; + buf = vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1); if (buf == NULL) break; @@ -5404,25 +5507,44 @@ void init_gop_frames(VP9_COMP *cpi, GF_PICTURE *gf_picture, gf_picture[frame_idx].ref_frame[0] = gld_index; gf_picture[frame_idx].ref_frame[1] = lst_index; gf_picture[frame_idx].ref_frame[2] = alt_index; + gf_picture[frame_idx].update_type = gf_group->update_type[frame_idx]; + + switch (gf_group->update_type[frame_idx]) { + case ARF_UPDATE: + stack_push(arf_index_stack, alt_index, arf_stack_size); + ++arf_stack_size; + alt_index = frame_idx; + break; + case LF_UPDATE: lst_index = frame_idx; break; + case OVERLAY_UPDATE: + gld_index = frame_idx; + alt_index = stack_pop(arf_index_stack, arf_stack_size); + --arf_stack_size; + break; + case USE_BUF_FRAME: + lst_index = alt_index; + alt_index = stack_pop(arf_index_stack, arf_stack_size); + --arf_stack_size; + break; + default: break; + } ++*tpl_group_frames; - lst_index = frame_idx; // The length of group of pictures is baseline_gf_interval, plus the // beginning golden frame from last GOP, plus the last overlay frame in // the same GOP. - if (frame_idx == cpi->rc.baseline_gf_interval + 1) break; + if (frame_idx == gf_group->gf_group_size) break; } - gld_index = frame_idx; - lst_index = VPXMAX(0, frame_idx - 1); alt_index = -1; ++frame_idx; + ++frame_gop_offset; // Extend two frames outside the current gf group. for (; frame_idx < MAX_LAG_BUFFERS && extend_frame_count < 2; ++frame_idx) { struct lookahead_entry *buf = - vp9_lookahead_peek(cpi->lookahead, frame_idx - 2); + vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1); if (buf == NULL) break; @@ -5432,16 +5554,25 @@ void init_gop_frames(VP9_COMP *cpi, GF_PICTURE *gf_picture, gf_picture[frame_idx].ref_frame[0] = gld_index; gf_picture[frame_idx].ref_frame[1] = lst_index; gf_picture[frame_idx].ref_frame[2] = alt_index; + gf_picture[frame_idx].update_type = LF_UPDATE; lst_index = frame_idx; ++*tpl_group_frames; ++extend_frame_count; + ++frame_gop_offset; } } void init_tpl_stats(VP9_COMP *cpi) { int frame_idx; - for (frame_idx = 0; frame_idx < MAX_LAG_BUFFERS; ++frame_idx) { + for (frame_idx = 0; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) { TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx]; +#if CONFIG_NON_GREEDY_MV + int rf_idx; + for (rf_idx = 0; rf_idx < 3; ++rf_idx) { + tpl_frame->mv_dist_sum[rf_idx] = 0; + tpl_frame->mv_cost_sum[rf_idx] = 0; + } +#endif memset(tpl_frame->tpl_stats_ptr, 0, tpl_frame->height * tpl_frame->width * sizeof(*tpl_frame->tpl_stats_ptr)); @@ -5451,20 +5582,22 @@ void init_tpl_stats(VP9_COMP *cpi) { #if CONFIG_NON_GREEDY_MV static void prepare_nb_full_mvs(const TplDepFrame *tpl_frame, int mi_row, - int mi_col, int_mv *nb_full_mvs) { + int mi_col, int rf_idx, BLOCK_SIZE bsize, + int_mv *nb_full_mvs) { + const int mi_unit = num_8x8_blocks_wide_lookup[bsize]; const int dirs[NB_MVS_NUM][2] = { { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 } }; int i; for (i = 0; i < NB_MVS_NUM; ++i) { - int r = dirs[i][0]; - int c = dirs[i][1]; + int r = dirs[i][0] * mi_unit; + int c = dirs[i][1] * mi_unit; if (mi_row + r >= 0 && mi_row + r < tpl_frame->mi_rows && mi_col + c >= 0 && mi_col + c < tpl_frame->mi_cols) { const TplDepStats *tpl_ptr = &tpl_frame ->tpl_stats_ptr[(mi_row + r) * tpl_frame->stride + mi_col + c]; - if (tpl_ptr->ready) { - nb_full_mvs[i].as_mv.row = tpl_ptr->mv.as_mv.row >> 3; - nb_full_mvs[i].as_mv.col = tpl_ptr->mv.as_mv.col >> 3; + if (tpl_ptr->ready[rf_idx]) { + nb_full_mvs[i].as_mv.row = tpl_ptr->mv_arr[rf_idx].as_mv.row >> 3; + nb_full_mvs[i].as_mv.col = tpl_ptr->mv_arr[rf_idx].as_mv.col >> 3; } else { nb_full_mvs[i].as_int = INVALID_MV; } @@ -5503,7 +5636,7 @@ uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td, #if CONFIG_NON_GREEDY_MV // lambda is used to adjust the importance of motion vector consitency. // TODO(angiebird): Figure out lambda's proper value. - double lambda = 10000; + double lambda = cpi->tpl_stats[frame_idx].lambda; int_mv nb_full_mvs[NB_MVS_NUM]; #endif @@ -5527,7 +5660,8 @@ uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td, #if CONFIG_NON_GREEDY_MV (void)search_method; (void)sadpb; - prepare_nb_full_mvs(&cpi->tpl_stats[frame_idx], mi_row, mi_col, nb_full_mvs); + prepare_nb_full_mvs(&cpi->tpl_stats[frame_idx], mi_row, mi_col, rf_idx, bsize, + nb_full_mvs); vp9_full_pixel_diamond_new(cpi, x, &best_ref_mv1_full, step_param, lambda, MAX_MVSEARCH_STEPS - 1 - step_param, 1, &cpi->fn_ptr[bsize], nb_full_mvs, tpl_stats, @@ -5544,12 +5678,13 @@ uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td, /* restore UMV window */ x->mv_limits = tmp_mv_limits; + // TODO(yunqing): may use higher tap interp filter than 2 taps. // Ignore mv costing by sending NULL pointer instead of cost array bestsme = cpi->find_fractional_mv_step( x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level, - cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, - 0); + cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0, + USE_2_TAPS); return bestsme; } @@ -5594,42 +5729,21 @@ int round_floor(int ref_pos, int bsize_pix) { } void tpl_model_store(TplDepStats *tpl_stats, int mi_row, int mi_col, - BLOCK_SIZE bsize, int stride, - const TplDepStats *src_stats) { + BLOCK_SIZE bsize, int stride) { const int mi_height = num_8x8_blocks_high_lookup[bsize]; const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + const TplDepStats *src_stats = &tpl_stats[mi_row * stride + mi_col]; int idx, idy; - int64_t intra_cost = src_stats->intra_cost / (mi_height * mi_width); - int64_t inter_cost = src_stats->inter_cost / (mi_height * mi_width); - - TplDepStats *tpl_ptr; - - intra_cost = VPXMAX(1, intra_cost); - inter_cost = VPXMAX(1, inter_cost); - for (idy = 0; idy < mi_height; ++idy) { - tpl_ptr = &tpl_stats[(mi_row + idy) * stride + mi_col]; for (idx = 0; idx < mi_width; ++idx) { -#if CONFIG_NON_GREEDY_MV - int rf_idx; - for (rf_idx = 0; rf_idx < 3; ++rf_idx) { - tpl_ptr->mv_dist[rf_idx] = src_stats->mv_dist[rf_idx]; - tpl_ptr->mv_cost[rf_idx] = src_stats->mv_cost[rf_idx]; - tpl_ptr->inter_cost_arr[rf_idx] = src_stats->inter_cost; - tpl_ptr->recon_error_arr[rf_idx] = src_stats->recon_error_arr[rf_idx]; - tpl_ptr->sse_arr[rf_idx] = src_stats->sse_arr[rf_idx]; - tpl_ptr->mv_arr[rf_idx].as_int = src_stats->mv_arr[rf_idx].as_int; - } - tpl_ptr->feature_score = src_stats->feature_score; - tpl_ptr->ready = 1; -#endif - tpl_ptr->intra_cost = intra_cost; - tpl_ptr->inter_cost = inter_cost; + TplDepStats *tpl_ptr = &tpl_stats[(mi_row + idy) * stride + mi_col + idx]; + const int64_t mc_flow = tpl_ptr->mc_flow; + const int64_t mc_ref_cost = tpl_ptr->mc_ref_cost; + *tpl_ptr = *src_stats; + tpl_ptr->mc_flow = mc_flow; + tpl_ptr->mc_ref_cost = mc_ref_cost; tpl_ptr->mc_dep_cost = tpl_ptr->intra_cost + tpl_ptr->mc_flow; - tpl_ptr->ref_frame_index = src_stats->ref_frame_index; - tpl_ptr->mv.as_int = src_stats->mv.as_int; - ++tpl_ptr; } } } @@ -5717,9 +5831,21 @@ void get_quantize_error(MACROBLOCK *x, int plane, tran_low_t *coeff, int pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]]; const int shift = tx_size == TX_32X32 ? 0 : 2; +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + vp9_highbd_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp, + p->quant_fp, qcoeff, dqcoeff, pd->dequant, + &eob, scan_order->scan, scan_order->iscan); + } else { + vp9_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp, + p->quant_fp, qcoeff, dqcoeff, pd->dequant, &eob, + scan_order->scan, scan_order->iscan); + } +#else vp9_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp, p->quant_fp, qcoeff, dqcoeff, pd->dequant, &eob, scan_order->scan, scan_order->iscan); +#endif // CONFIG_VP9_HIGHBITDEPTH *recon_error = vp9_block_error(coeff, dqcoeff, pix_num, sse) >> shift; *recon_error = VPXMAX(*recon_error, 1); @@ -5728,6 +5854,19 @@ void get_quantize_error(MACROBLOCK *x, int plane, tran_low_t *coeff, *sse = VPXMAX(*sse, 1); } +#if CONFIG_VP9_HIGHBITDEPTH +void highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff, + TX_SIZE tx_size) { + // TODO(sdeng): Implement SIMD based high bit-depth Hadamard transforms. + switch (tx_size) { + case TX_8X8: vpx_highbd_hadamard_8x8(src_diff, bw, coeff); break; + case TX_16X16: vpx_highbd_hadamard_16x16(src_diff, bw, coeff); break; + case TX_32X32: vpx_highbd_hadamard_32x32(src_diff, bw, coeff); break; + default: assert(0); + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH + void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff, TX_SIZE tx_size) { switch (tx_size) { @@ -5763,14 +5902,23 @@ double get_feature_score(uint8_t *buf, ptrdiff_t stride, int rows, int cols) { } #endif +static void set_mv_limits(const VP9_COMMON *cm, MACROBLOCK *x, int mi_row, + int mi_col) { + x->mv_limits.row_min = -((mi_row * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND)); + x->mv_limits.row_max = + (cm->mi_rows - 1 - mi_row) * MI_SIZE + (17 - 2 * VP9_INTERP_EXTEND); + x->mv_limits.col_min = -((mi_col * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND)); + x->mv_limits.col_max = + ((cm->mi_cols - 1 - mi_col) * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND); +} + void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, struct scale_factors *sf, GF_PICTURE *gf_picture, - int frame_idx, int16_t *src_diff, tran_low_t *coeff, - tran_low_t *qcoeff, tran_low_t *dqcoeff, int mi_row, - int mi_col, BLOCK_SIZE bsize, TX_SIZE tx_size, + int frame_idx, TplDepFrame *tpl_frame, int16_t *src_diff, + tran_low_t *coeff, tran_low_t *qcoeff, tran_low_t *dqcoeff, + int mi_row, int mi_col, BLOCK_SIZE bsize, TX_SIZE tx_size, YV12_BUFFER_CONFIG *ref_frame[], uint8_t *predictor, - int64_t *recon_error, int64_t *sse, - TplDepStats *tpl_stats) { + int64_t *recon_error, int64_t *sse) { VP9_COMMON *cm = &cpi->common; ThreadData *td = &cpi->td; @@ -5789,8 +5937,10 @@ void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, PREDICTION_MODE mode; int mb_y_offset = mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE; MODE_INFO mi_above, mi_left; - - memset(tpl_stats, 0, sizeof(*tpl_stats)); + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + TplDepStats *tpl_stats = + &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col]; xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8); xd->mb_to_bottom_edge = ((cm->mi_rows - 1 - mi_row) * MI_SIZE) * 8; @@ -5816,11 +5966,24 @@ void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, vp9_predict_intra_block(xd, b_width_log2_lookup[bsize], tx_size, mode, src, src_stride, dst, dst_stride, 0, 0, 0); +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + vpx_highbd_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst, + dst_stride, xd->bd); + highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size); + // TODO(sdeng): Implement SIMD based high bit-depth satd. + intra_cost = vpx_satd_c(coeff, pix_num); + } else { + vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst, + dst_stride); + wht_fwd_txfm(src_diff, bw, coeff, tx_size); + intra_cost = vpx_satd(coeff, pix_num); + } +#else vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst, dst_stride); - wht_fwd_txfm(src_diff, bw, coeff, tx_size); - intra_cost = vpx_satd(coeff, pix_num); +#endif // CONFIG_VP9_HIGHBITDEPTH if (intra_cost < best_intra_cost) best_intra_cost = intra_cost; } @@ -5828,31 +5991,14 @@ void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, // Motion compensated prediction best_mv.as_int = 0; - (void)mb_y_offset; - // Motion estimation column boundary - x->mv_limits.col_min = -((mi_col * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND)); - x->mv_limits.col_max = - ((cm->mi_cols - 1 - mi_col) * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND); - -#if CONFIG_NON_GREEDY_MV - tpl_stats->feature_score = get_feature_score( - xd->cur_buf->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bw, bh); -#endif + set_mv_limits(cm, x, mi_row, mi_col); for (rf_idx = 0; rf_idx < 3; ++rf_idx) { int_mv mv; - if (ref_frame[rf_idx] == NULL) { -#if CONFIG_NON_GREEDY_MV - tpl_stats->inter_cost_arr[rf_idx] = -1; -#endif - continue; - } + if (ref_frame[rf_idx] == NULL) continue; #if CONFIG_NON_GREEDY_MV - motion_compensated_prediction( - cpi, td, frame_idx, xd->cur_buf->y_buffer + mb_y_offset, - ref_frame[rf_idx]->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bsize, - mi_row, mi_col, tpl_stats, rf_idx); + (void)td; mv.as_int = tpl_stats->mv_arr[rf_idx].as_int; #else motion_compensated_prediction( @@ -5861,8 +6007,6 @@ void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, mi_row, mi_col, &mv.as_mv); #endif - // TODO(jingning): Not yet support high bit-depth in the next three - // steps. #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { vp9_highbd_build_inter_predictor( @@ -5873,6 +6017,8 @@ void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, vpx_highbd_subtract_block( bh, bw, src_diff, bw, xd->cur_buf->y_buffer + mb_y_offset, xd->cur_buf->y_stride, &predictor[0], bw, xd->bd); + highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size); + inter_cost = vpx_satd_c(coeff, pix_num); } else { vp9_build_inter_predictor( ref_frame[rf_idx]->y_buffer + mb_y_offset, @@ -5881,6 +6027,8 @@ void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, vpx_subtract_block(bh, bw, src_diff, bw, xd->cur_buf->y_buffer + mb_y_offset, xd->cur_buf->y_stride, &predictor[0], bw); + wht_fwd_txfm(src_diff, bw, coeff, tx_size); + inter_cost = vpx_satd(coeff, pix_num); } #else vp9_build_inter_predictor(ref_frame[rf_idx]->y_buffer + mb_y_offset, @@ -5890,10 +6038,9 @@ void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, vpx_subtract_block(bh, bw, src_diff, bw, xd->cur_buf->y_buffer + mb_y_offset, xd->cur_buf->y_stride, &predictor[0], bw); -#endif wht_fwd_txfm(src_diff, bw, coeff, tx_size); - inter_cost = vpx_satd(coeff, pix_num); +#endif #if CONFIG_NON_GREEDY_MV tpl_stats->inter_cost_arr[rf_idx] = inter_cost; @@ -5917,13 +6064,136 @@ void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, } best_intra_cost = VPXMAX(best_intra_cost, 1); best_inter_cost = VPXMIN(best_intra_cost, best_inter_cost); - tpl_stats->inter_cost = best_inter_cost << TPL_DEP_COST_SCALE_LOG2; - tpl_stats->intra_cost = best_intra_cost << TPL_DEP_COST_SCALE_LOG2; - tpl_stats->mc_dep_cost = tpl_stats->intra_cost + tpl_stats->mc_flow; + tpl_stats->inter_cost = VPXMAX( + 1, (best_inter_cost << TPL_DEP_COST_SCALE_LOG2) / (mi_height * mi_width)); + tpl_stats->intra_cost = VPXMAX( + 1, (best_intra_cost << TPL_DEP_COST_SCALE_LOG2) / (mi_height * mi_width)); tpl_stats->ref_frame_index = gf_picture[frame_idx].ref_frame[best_rf_idx]; tpl_stats->mv.as_int = best_mv.as_int; } +#if CONFIG_NON_GREEDY_MV +static int compare_feature_score(const void *a, const void *b) { + const FEATURE_SCORE_LOC *aa = *(FEATURE_SCORE_LOC *const *)a; + const FEATURE_SCORE_LOC *bb = *(FEATURE_SCORE_LOC *const *)b; + if (aa->feature_score < bb->feature_score) { + return 1; + } else if (aa->feature_score > bb->feature_score) { + return -1; + } else { + return 0; + } +} + +static void do_motion_search(VP9_COMP *cpi, ThreadData *td, int frame_idx, + YV12_BUFFER_CONFIG **ref_frame, BLOCK_SIZE bsize, + int mi_row, int mi_col) { + VP9_COMMON *cm = &cpi->common; + MACROBLOCK *x = &td->mb; + MACROBLOCKD *xd = &x->e_mbd; + TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx]; + TplDepStats *tpl_stats = + &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col]; + const int mb_y_offset = + mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE; + int rf_idx; + + set_mv_limits(cm, x, mi_row, mi_col); + + for (rf_idx = 0; rf_idx < 3; ++rf_idx) { + if (ref_frame[rf_idx] == NULL) { + tpl_stats->ready[rf_idx] = 0; + continue; + } else { + tpl_stats->ready[rf_idx] = 1; + } + motion_compensated_prediction( + cpi, td, frame_idx, xd->cur_buf->y_buffer + mb_y_offset, + ref_frame[rf_idx]->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bsize, + mi_row, mi_col, tpl_stats, rf_idx); + } +} + +#define CHANGE_MV_SEARCH_ORDER 1 +#define USE_PQSORT 1 +#define RE_COMPUTE_MV_INCONSISTENCY 1 + +#if CHANGE_MV_SEARCH_ORDER +#if USE_PQSORT +static void max_heap_pop(FEATURE_SCORE_LOC **heap, int *size, + FEATURE_SCORE_LOC **output) { + if (*size > 0) { + *output = heap[0]; + --*size; + if (*size > 0) { + int p, l, r; + heap[0] = heap[*size]; + p = 0; + l = 2 * p + 1; + r = 2 * p + 2; + while (l < *size) { + FEATURE_SCORE_LOC *tmp; + int c = l; + if (r < *size && heap[r]->feature_score > heap[l]->feature_score) { + c = r; + } + if (heap[p]->feature_score >= heap[c]->feature_score) { + break; + } + tmp = heap[p]; + heap[p] = heap[c]; + heap[c] = tmp; + p = c; + l = 2 * p + 1; + r = 2 * p + 2; + } + } + } else { + assert(0); + } +} + +static void max_heap_push(FEATURE_SCORE_LOC **heap, int *size, + FEATURE_SCORE_LOC *input) { + int c, p; + FEATURE_SCORE_LOC *tmp; + heap[*size] = input; + ++*size; + c = *size - 1; + p = c >> 1; + while (c > 0 && heap[c]->feature_score > heap[p]->feature_score) { + tmp = heap[p]; + heap[p] = heap[c]; + heap[c] = tmp; + c = p; + p >>= 1; + } +} + +static void add_nb_blocks_to_heap(VP9_COMP *cpi, const TplDepFrame *tpl_frame, + BLOCK_SIZE bsize, int mi_row, int mi_col, + int *heap_size) { + const int mi_unit = num_8x8_blocks_wide_lookup[bsize]; + const int dirs[NB_MVS_NUM][2] = { { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 } }; + int i; + for (i = 0; i < NB_MVS_NUM; ++i) { + int r = dirs[i][0] * mi_unit; + int c = dirs[i][1] * mi_unit; + if (mi_row + r >= 0 && mi_row + r < tpl_frame->mi_rows && mi_col + c >= 0 && + mi_col + c < tpl_frame->mi_cols) { + FEATURE_SCORE_LOC *fs_loc = + &cpi->feature_score_loc_arr[(mi_row + r) * tpl_frame->stride + + (mi_col + c)]; + if (fs_loc->visited == 0) { + max_heap_push(cpi->feature_score_loc_heap, heap_size, fs_loc); + } + } + } +} +#endif // USE_PQSORT +#endif // CHANGE_MV_SEARCH_ORDER +#endif // CONFIG_NON_GREEDY_MV + void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx, BLOCK_SIZE bsize) { TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx]; @@ -5954,6 +6224,17 @@ void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx, const int mi_height = num_8x8_blocks_high_lookup[bsize]; const int mi_width = num_8x8_blocks_wide_lookup[bsize]; int64_t recon_error, sse; +#if CONFIG_NON_GREEDY_MV + int rf_idx; + int fs_loc_sort_size; +#if CHANGE_MV_SEARCH_ORDER +#if USE_PQSORT + int fs_loc_heap_size; +#else + int i; +#endif // USE_PQSORT +#endif // CHANGE_MV_SEARCH_ORDER +#endif // CONFIG_NON_GREEDY_MV // Setup scaling factor #if CONFIG_VP9_HIGHBITDEPTH @@ -5984,9 +6265,7 @@ void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx, xd->cur_buf = this_frame; // Get rd multiplier set up. - rdmult = - (int)vp9_compute_rd_mult_based_on_qindex(cpi, tpl_frame->base_qindex); - if (rdmult < 1) rdmult = 1; + rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, tpl_frame->base_qindex); set_error_per_bit(&cpi->td.mb, rdmult); vp9_initialize_me_consts(cpi, &cpi->td.mb, tpl_frame->base_qindex); @@ -5995,23 +6274,98 @@ void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx, cm->base_qindex = tpl_frame->base_qindex; vp9_frame_init_quantizer(cpi); +#if CONFIG_NON_GREEDY_MV + tpl_frame->lambda = 250; + fs_loc_sort_size = 0; + for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) { - // Motion estimation row boundary - x->mv_limits.row_min = -((mi_row * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND)); - x->mv_limits.row_max = - (cm->mi_rows - 1 - mi_row) * MI_SIZE + (17 - 2 * VP9_INTERP_EXTEND); for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) { - TplDepStats tpl_stats; - mode_estimation(cpi, x, xd, &sf, gf_picture, frame_idx, src_diff, coeff, - qcoeff, dqcoeff, mi_row, mi_col, bsize, tx_size, - ref_frame, predictor, &recon_error, &sse, &tpl_stats); + const int mb_y_offset = + mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE; + const int bw = 4 << b_width_log2_lookup[bsize]; + const int bh = 4 << b_height_log2_lookup[bsize]; + TplDepStats *tpl_stats = + &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col]; + FEATURE_SCORE_LOC *fs_loc = + &cpi->feature_score_loc_arr[mi_row * tpl_frame->stride + mi_col]; + tpl_stats->feature_score = get_feature_score( + xd->cur_buf->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bw, bh); + fs_loc->visited = 0; + fs_loc->feature_score = tpl_stats->feature_score; + fs_loc->mi_row = mi_row; + fs_loc->mi_col = mi_col; + cpi->feature_score_loc_sort[fs_loc_sort_size] = fs_loc; + ++fs_loc_sort_size; + } + } + + qsort(cpi->feature_score_loc_sort, fs_loc_sort_size, + sizeof(*cpi->feature_score_loc_sort), compare_feature_score); +#if CHANGE_MV_SEARCH_ORDER +#if !USE_PQSORT + for (i = 0; i < fs_loc_sort_size; ++i) { + FEATURE_SCORE_LOC *fs_loc = cpi->feature_score_loc_sort[i]; + do_motion_search(cpi, td, frame_idx, ref_frame, bsize, fs_loc->mi_row, + fs_loc->mi_col); + } +#else // !USE_PQSORT + fs_loc_heap_size = 0; + max_heap_push(cpi->feature_score_loc_heap, &fs_loc_heap_size, + cpi->feature_score_loc_sort[0]); + + while (fs_loc_heap_size > 0) { + FEATURE_SCORE_LOC *fs_loc; + max_heap_pop(cpi->feature_score_loc_heap, &fs_loc_heap_size, &fs_loc); + + fs_loc->visited = 1; + + do_motion_search(cpi, td, frame_idx, ref_frame, bsize, fs_loc->mi_row, + fs_loc->mi_col); + + add_nb_blocks_to_heap(cpi, tpl_frame, bsize, fs_loc->mi_row, fs_loc->mi_col, + &fs_loc_heap_size); + } +#endif // !USE_PQSORT +#else // CHANGE_MV_SEARCH_ORDER + for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) { + for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) { + do_motion_search(cpi, td, frame_idx, ref_frame, bsize, mi_row, mi_col); + } + } +#endif // CHANGE_MV_SEARCH_ORDER +#endif // CONFIG_NON_GREEDY_MV + for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) { + for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) { + mode_estimation(cpi, x, xd, &sf, gf_picture, frame_idx, tpl_frame, + src_diff, coeff, qcoeff, dqcoeff, mi_row, mi_col, bsize, + tx_size, ref_frame, predictor, &recon_error, &sse); // Motion flow dependency dispenser. tpl_model_store(tpl_frame->tpl_stats_ptr, mi_row, mi_col, bsize, - tpl_frame->stride, &tpl_stats); + tpl_frame->stride); tpl_model_update(cpi->tpl_stats, tpl_frame->tpl_stats_ptr, mi_row, mi_col, bsize); +#if CONFIG_NON_GREEDY_MV + { + TplDepStats *this_tpl_stats = + &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col]; + for (rf_idx = 0; rf_idx < 3; ++rf_idx) { +#if RE_COMPUTE_MV_INCONSISTENCY + MV full_mv; + int_mv nb_full_mvs[NB_MVS_NUM]; + prepare_nb_full_mvs(tpl_frame, mi_row, mi_col, rf_idx, bsize, + nb_full_mvs); + full_mv.row = this_tpl_stats->mv_arr[rf_idx].as_mv.row >> 3; + full_mv.col = this_tpl_stats->mv_arr[rf_idx].as_mv.col >> 3; + this_tpl_stats->mv_cost[rf_idx] = + av1_nb_mvs_inconsistency(&full_mv, nb_full_mvs); +#endif // RE_COMPUTE_MV_INCONSISTENCY + tpl_frame->mv_dist_sum[rf_idx] += this_tpl_stats->mv_dist[rf_idx]; + tpl_frame->mv_cost_sum[rf_idx] += this_tpl_stats->mv_cost[rf_idx]; + } + } +#endif // CONFIG_NON_GREEDY_MV } } } @@ -6088,7 +6442,7 @@ static void dump_tpl_stats(const VP9_COMP *cpi, int tpl_group_frames, #endif // CONFIG_NON_GREEDY_MV static void setup_tpl_stats(VP9_COMP *cpi) { - GF_PICTURE gf_picture[MAX_LAG_BUFFERS]; + GF_PICTURE gf_picture[MAX_ARF_GOP_SIZE]; const GF_GROUP *gf_group = &cpi->twopass.gf_group; int tpl_group_frames = 0; int frame_idx; @@ -6100,6 +6454,7 @@ static void setup_tpl_stats(VP9_COMP *cpi) { // Backward propagation from tpl_group_frames to 1. for (frame_idx = tpl_group_frames - 1; frame_idx > 0; --frame_idx) { + if (gf_picture[frame_idx].update_type == USE_BUF_FRAME) continue; mc_flow_dispenser(cpi, gf_picture, frame_idx, bsize); } #if CONFIG_NON_GREEDY_MV @@ -6121,6 +6476,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, struct lookahead_entry *last_source = NULL; struct lookahead_entry *source = NULL; int arf_src_index; + const int gf_group_index = cpi->twopass.gf_group.index; int i; if (is_one_pass_cbr_svc(cpi)) { @@ -6168,7 +6524,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, } // Clear arf index stack before group of pictures processing starts. - if (cpi->twopass.gf_group.index == 1) { + if (gf_group_index == 1) { stack_init(cpi->twopass.gf_group.arf_index_stack, MAX_LAG_BUFFERS * 2); cpi->twopass.gf_group.stack_size = 0; } @@ -6316,10 +6672,12 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, level_rc_framerate(cpi, arf_src_index); if (cpi->oxcf.pass != 0 || cpi->use_svc || frame_is_intra_only(cm) == 1) { - for (i = 0; i < MAX_REF_FRAMES; ++i) cpi->scaled_ref_idx[i] = INVALID_IDX; + for (i = 0; i < REFS_PER_FRAME; ++i) cpi->scaled_ref_idx[i] = INVALID_IDX; } - if (arf_src_index && cpi->sf.enable_tpl_model) { + if (gf_group_index == 1 && + cpi->twopass.gf_group.update_type[gf_group_index] == ARF_UPDATE && + cpi->sf.enable_tpl_model) { vp9_estimate_qp_gop(cpi); setup_tpl_stats(cpi); } diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h index 75f177fcc16..02814599d03 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h @@ -291,7 +291,7 @@ typedef struct TplDepStats { int_mv mv; #if CONFIG_NON_GREEDY_MV - int ready; + int ready[3]; double mv_dist[3]; double mv_cost[3]; int64_t inter_cost_arr[3]; @@ -311,6 +311,11 @@ typedef struct TplDepFrame { int mi_rows; int mi_cols; int base_qindex; +#if CONFIG_NON_GREEDY_MV + double lambda; + double mv_dist_sum[3]; + double mv_cost_sum[3]; +#endif } TplDepFrame; #define TPL_DEP_COST_SCALE_LOG2 4 @@ -490,6 +495,23 @@ typedef struct ARNRFilterData { struct scale_factors sf; } ARNRFilterData; +typedef struct EncFrameBuf { + int mem_valid; + int released; + YV12_BUFFER_CONFIG frame; +} EncFrameBuf; + +// Maximum operating frame buffer size needed for a GOP using ARF reference. +#define MAX_ARF_GOP_SIZE (2 * MAX_LAG_BUFFERS) +#if CONFIG_NON_GREEDY_MV +typedef struct FEATURE_SCORE_LOC { + int visited; + double feature_score; + int mi_row; + int mi_col; +} FEATURE_SCORE_LOC; +#endif + typedef struct VP9_COMP { QUANTS quants; ThreadData td; @@ -513,8 +535,14 @@ typedef struct VP9_COMP { #endif YV12_BUFFER_CONFIG *raw_source_frame; - TplDepFrame tpl_stats[MAX_LAG_BUFFERS]; - YV12_BUFFER_CONFIG *tpl_recon_frames[REFS_PER_FRAME + 1]; + TplDepFrame tpl_stats[MAX_ARF_GOP_SIZE]; + YV12_BUFFER_CONFIG *tpl_recon_frames[REF_FRAMES]; + EncFrameBuf enc_frame_buf[REF_FRAMES]; +#if CONFIG_NON_GREEDY_MV + FEATURE_SCORE_LOC *feature_score_loc_arr; + FEATURE_SCORE_LOC **feature_score_loc_sort; + FEATURE_SCORE_LOC **feature_score_loc_heap; +#endif TileDataEnc *tile_data; int allocated_tiles; // Keep track of memory allocated for tiles. @@ -522,13 +550,12 @@ typedef struct VP9_COMP { // For a still frame, this flag is set to 1 to skip partition search. int partition_search_skippable_frame; - int scaled_ref_idx[MAX_REF_FRAMES]; + int scaled_ref_idx[REFS_PER_FRAME]; int lst_fb_idx; int gld_fb_idx; int alt_fb_idx; int ref_fb_idx[REF_FRAMES]; - int last_show_frame_buf_idx; // last show frame buffer index int refresh_last_frame; int refresh_golden_frame; @@ -600,6 +627,7 @@ typedef struct VP9_COMP { ActiveMap active_map; fractional_mv_step_fp *find_fractional_mv_step; + struct scale_factors me_sf; vp9_diamond_search_fn_t diamond_search_sad; vp9_variance_fn_ptr_t fn_ptr[BLOCK_SIZES]; uint64_t time_receive_data; @@ -783,7 +811,7 @@ void vp9_change_config(VP9_COMP *cpi, const VP9EncoderConfig *oxcf); // frame is made and not just a copy of the pointer.. int vp9_receive_raw_frame(VP9_COMP *cpi, vpx_enc_frame_flags_t frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, - int64_t end_time_stamp); + int64_t end_time); int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, size_t *size, uint8_t *dest, int64_t *time_stamp, @@ -804,9 +832,11 @@ int vp9_set_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag, int vp9_update_entropy(VP9_COMP *cpi, int update); -int vp9_set_active_map(VP9_COMP *cpi, unsigned char *map, int rows, int cols); +int vp9_set_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows, + int cols); -int vp9_get_active_map(VP9_COMP *cpi, unsigned char *map, int rows, int cols); +int vp9_get_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows, + int cols); int vp9_set_internal_size(VP9_COMP *cpi, VPX_SCALING horiz_mode, VPX_SCALING vert_mode); diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c index 58c3a435d9f..e29e86576d2 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c @@ -49,9 +49,6 @@ #define MIN_DECAY_FACTOR 0.01 #define NEW_MV_MODE_PENALTY 32 #define DARK_THRESH 64 -#define DEFAULT_GRP_WEIGHT 1.0 -#define RC_FACTOR_MIN 0.75 -#define RC_FACTOR_MAX 1.75 #define SECTION_NOISE_DEF 250.0 #define LOW_I_THRESH 24000 @@ -1828,10 +1825,12 @@ static int detect_flash(const TWO_PASS *twopass, int offset) { // brief break in prediction (such as a flash) but subsequent frames // are reasonably well predicted by an earlier (pre flash) frame. // The recovery after a flash is indicated by a high pcnt_second_ref - // compared to pcnt_inter. + // useage or a second ref coded error notabley lower than the last + // frame coded error. return next_frame != NULL && - next_frame->pcnt_second_ref > next_frame->pcnt_inter && - next_frame->pcnt_second_ref >= 0.5; + ((next_frame->sr_coded_error < next_frame->coded_error) || + ((next_frame->pcnt_second_ref > next_frame->pcnt_inter) && + (next_frame->pcnt_second_ref >= 0.5))); } // Update the motion related elements to the GF arf boost calculation. @@ -2113,21 +2112,23 @@ static void find_arf_order(VP9_COMP *cpi, GF_GROUP *gf_group, TWO_PASS *twopass = &cpi->twopass; const FIRSTPASS_STATS *const start_pos = twopass->stats_in; FIRSTPASS_STATS fpf_frame; - const int mid = (start + end) >> 1; - const int min_frame_interval = 3; + const int mid = (start + end + 1) >> 1; + const int min_frame_interval = 2; int idx; // Process regular P frames if ((end - start < min_frame_interval) || - (depth > cpi->oxcf.enable_auto_arf)) { - int idx; - for (idx = start; idx < end; ++idx) { + (depth > gf_group->allowed_max_layer_depth)) { + for (idx = start; idx <= end; ++idx) { gf_group->update_type[*index_counter] = LF_UPDATE; gf_group->arf_src_offset[*index_counter] = 0; + gf_group->frame_gop_index[*index_counter] = idx; gf_group->rf_level[*index_counter] = INTER_NORMAL; gf_group->layer_depth[*index_counter] = depth; + gf_group->gfu_boost[*index_counter] = NORMAL_BOOST; ++(*index_counter); } + gf_group->max_layer_depth = VPXMAX(gf_group->max_layer_depth, depth); return; } @@ -2137,22 +2138,25 @@ static void find_arf_order(VP9_COMP *cpi, GF_GROUP *gf_group, gf_group->layer_depth[*index_counter] = depth; gf_group->update_type[*index_counter] = ARF_UPDATE; gf_group->arf_src_offset[*index_counter] = mid - start; + gf_group->frame_gop_index[*index_counter] = mid; gf_group->rf_level[*index_counter] = GF_ARF_LOW; for (idx = 0; idx <= mid; ++idx) if (EOF == input_stats(twopass, &fpf_frame)) break; - gf_group->gfu_boost[*index_counter] = VPXMAX( - MIN_ARF_GF_BOOST, calc_arf_boost(cpi, end - mid, mid - start) >> depth); + gf_group->gfu_boost[*index_counter] = + VPXMAX(MIN_ARF_GF_BOOST, + calc_arf_boost(cpi, end - mid + 1, mid - start) >> depth); reset_fpf_position(twopass, start_pos); ++(*index_counter); - find_arf_order(cpi, gf_group, index_counter, depth + 1, start, mid); + find_arf_order(cpi, gf_group, index_counter, depth + 1, start, mid - 1); gf_group->update_type[*index_counter] = USE_BUF_FRAME; gf_group->arf_src_offset[*index_counter] = 0; + gf_group->frame_gop_index[*index_counter] = mid; gf_group->rf_level[*index_counter] = INTER_NORMAL; gf_group->layer_depth[*index_counter] = depth; ++(*index_counter); @@ -2167,6 +2171,7 @@ static INLINE void set_gf_overlay_frame_type(GF_GROUP *gf_group, gf_group->update_type[frame_index] = OVERLAY_UPDATE; gf_group->rf_level[frame_index] = INTER_NORMAL; gf_group->layer_depth[frame_index] = MAX_ARF_LAYERS - 1; + gf_group->gfu_boost[frame_index] = NORMAL_BOOST; } else { gf_group->update_type[frame_index] = GF_UPDATE; gf_group->rf_level[frame_index] = GF_ARF_STD; @@ -2174,19 +2179,20 @@ static INLINE void set_gf_overlay_frame_type(GF_GROUP *gf_group, } } -static int define_gf_group_structure(VP9_COMP *cpi) { +static void define_gf_group_structure(VP9_COMP *cpi) { RATE_CONTROL *const rc = &cpi->rc; TWO_PASS *const twopass = &cpi->twopass; GF_GROUP *const gf_group = &twopass->gf_group; - int i; int frame_index = 0; - int key_frame; - int normal_frames; - - key_frame = cpi->common.frame_type == KEY_FRAME; + int key_frame = cpi->common.frame_type == KEY_FRAME; + int layer_depth = 1; + int gop_frames = + rc->baseline_gf_interval - (key_frame || rc->source_alt_ref_pending); gf_group->frame_start = cpi->common.current_video_frame; - gf_group->frame_end = gf_group->frame_start + rc->baseline_gf_interval - 1; + gf_group->frame_end = gf_group->frame_start + rc->baseline_gf_interval; + gf_group->max_layer_depth = 0; + gf_group->allowed_max_layer_depth = 0; // For key frames the frame target rate is already set and it // is also the golden frame. @@ -2200,55 +2206,24 @@ static int define_gf_group_structure(VP9_COMP *cpi) { if (rc->source_alt_ref_pending) { gf_group->update_type[frame_index] = ARF_UPDATE; gf_group->rf_level[frame_index] = GF_ARF_STD; - gf_group->layer_depth[frame_index] = 1; + gf_group->layer_depth[frame_index] = layer_depth; gf_group->arf_src_offset[frame_index] = (unsigned char)(rc->baseline_gf_interval - 1); + gf_group->frame_gop_index[frame_index] = rc->baseline_gf_interval; + gf_group->max_layer_depth = 1; ++frame_index; + ++layer_depth; + gf_group->allowed_max_layer_depth = cpi->oxcf.enable_auto_arf; } - if (rc->source_alt_ref_pending && cpi->multi_layer_arf) { - find_arf_order(cpi, gf_group, &frame_index, 2, 0, - rc->baseline_gf_interval - 1); - - set_gf_overlay_frame_type(gf_group, frame_index, - rc->source_alt_ref_pending); - - gf_group->arf_src_offset[frame_index] = 0; - - return frame_index; - } - - normal_frames = - rc->baseline_gf_interval - (key_frame || rc->source_alt_ref_pending); - - for (i = 0; i < normal_frames; ++i) { - if (twopass->stats_in >= twopass->stats_in_end) break; - - gf_group->update_type[frame_index] = LF_UPDATE; - gf_group->rf_level[frame_index] = INTER_NORMAL; - gf_group->arf_src_offset[frame_index] = 0; - gf_group->layer_depth[frame_index] = MAX_ARF_LAYERS - 1; - - ++frame_index; - } - - // Note: - // We need to configure the frame at the end of the sequence + 1 that will be - // the start frame for the next group. Otherwise prior to the call to - // vp9_rc_get_second_pass_params() the data will be undefined. + find_arf_order(cpi, gf_group, &frame_index, layer_depth, 1, gop_frames); set_gf_overlay_frame_type(gf_group, frame_index, rc->source_alt_ref_pending); - - if (rc->source_alt_ref_pending) { - gf_group->update_type[frame_index] = OVERLAY_UPDATE; - gf_group->rf_level[frame_index] = INTER_NORMAL; - } else { - gf_group->update_type[frame_index] = GF_UPDATE; - gf_group->rf_level[frame_index] = GF_ARF_STD; - } gf_group->arf_src_offset[frame_index] = 0; + gf_group->frame_gop_index[frame_index] = rc->baseline_gf_interval; - return frame_index; + // Set the frame ops number. + gf_group->gf_group_size = frame_index; } static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, @@ -2273,7 +2248,7 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, double this_frame_score = 1.0; // Define the GF structure and specify - int gop_frames = define_gf_group_structure(cpi); + int gop_frames = gf_group->gf_group_size; key_frame = cpi->common.frame_type == KEY_FRAME; @@ -2326,8 +2301,9 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, for (idx = 2; idx < MAX_ARF_LAYERS; ++idx) { if (arf_depth_boost[idx] == 0) break; - arf_depth_bits[idx] = calculate_boost_bits( - rc->baseline_gf_interval, arf_depth_boost[idx], total_group_bits); + arf_depth_bits[idx] = + calculate_boost_bits(rc->baseline_gf_interval - total_arfs, + arf_depth_boost[idx], total_group_bits); total_group_bits -= arf_depth_bits[idx]; total_arfs += arf_depth_count[idx]; @@ -2570,17 +2546,17 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { &next_frame, &this_frame_mv_in_out, &mv_in_out_accumulator, &abs_mv_in_out_accumulator, &mv_ratio_accumulator); + // Monitor for static sections. + if ((rc->frames_since_key + i - 1) > 1) { + zero_motion_accumulator = VPXMIN( + zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame)); + } + // Accumulate the effect of prediction quality decay. if (!flash_detected) { last_loop_decay_rate = loop_decay_rate; loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame); - // Monitor for static sections. - if ((rc->frames_since_key + i - 1) > 1) { - zero_motion_accumulator = VPXMIN( - zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame)); - } - // Break clause to detect very still sections after motion. For example, // a static image after a fade or other transition. if (detect_transition_to_still(cpi, i, 5, loop_decay_rate, @@ -2705,6 +2681,9 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Adjust KF group bits and error remaining. twopass->kf_group_error_left -= gf_group_err; + // Decide GOP structure. + define_gf_group_structure(cpi); + // Allocate bits to each of the frames in the GF group. allocate_gf_group_bits(cpi, gf_group_bits, gf_arf_bits); @@ -2748,17 +2727,11 @@ static int slide_transition(const FIRSTPASS_STATS *this_frame, (this_frame->coded_error > (next_frame->coded_error * ERROR_SPIKE)); } -// Threshold for use of the lagging second reference frame. High second ref -// usage may point to a transient event like a flash or occlusion rather than -// a real scene cut. -#define SECOND_REF_USEAGE_THRESH 0.1 // Minimum % intra coding observed in first pass (1.0 = 100%) #define MIN_INTRA_LEVEL 0.25 -// Minimum ratio between the % of intra coding and inter coding in the first -// pass after discounting neutral blocks (discounting neutral blocks in this -// way helps catch scene cuts in clips with very flat areas or letter box -// format clips with image padding. -#define INTRA_VS_INTER_THRESH 2.0 +// Threshold for use of the lagging second reference frame. Scene cuts do not +// usually have a high second ref useage. +#define SECOND_REF_USEAGE_THRESH 0.125 // Hard threshold where the first pass chooses intra for almost all blocks. // In such a case even if the frame is not a scene cut coding a key frame // may be a good option. @@ -2766,12 +2739,6 @@ static int slide_transition(const FIRSTPASS_STATS *this_frame, // Maximum threshold for the relative ratio of intra error score vs best // inter error score. #define KF_II_ERR_THRESHOLD 2.5 -// In real scene cuts there is almost always a sharp change in the intra -// or inter error score. -#define ERR_CHANGE_THRESHOLD 0.4 -// For real scene cuts we expect an improvment in the intra inter error -// ratio in the next frame. -#define II_IMPROVEMENT_THRESHOLD 3.5 #define KF_II_MAX 128.0 #define II_FACTOR 12.5 // Test for very low intra complexity which could cause false key frames @@ -2783,30 +2750,21 @@ static int test_candidate_kf(TWO_PASS *twopass, const FIRSTPASS_STATS *next_frame) { int is_viable_kf = 0; double pcnt_intra = 1.0 - this_frame->pcnt_inter; - double modified_pcnt_inter = - this_frame->pcnt_inter - this_frame->pcnt_neutral; // Does the frame satisfy the primary criteria of a key frame? // See above for an explanation of the test criteria. // If so, then examine how well it predicts subsequent frames. - if ((this_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) && - (next_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) && + if (!detect_flash(twopass, -1) && !detect_flash(twopass, 0) && + (this_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) && ((this_frame->pcnt_inter < VERY_LOW_INTER_THRESH) || (slide_transition(this_frame, last_frame, next_frame)) || - ((pcnt_intra > MIN_INTRA_LEVEL) && - (pcnt_intra > (INTRA_VS_INTER_THRESH * modified_pcnt_inter)) && + (((this_frame->coded_error > (next_frame->coded_error * 1.1)) && + (this_frame->coded_error > (last_frame->coded_error * 1.1))) && + (pcnt_intra > MIN_INTRA_LEVEL) && + ((pcnt_intra + this_frame->pcnt_neutral) > 0.5) && ((this_frame->intra_error / DOUBLE_DIVIDE_CHECK(this_frame->coded_error)) < - KF_II_ERR_THRESHOLD) && - ((fabs(last_frame->coded_error - this_frame->coded_error) / - DOUBLE_DIVIDE_CHECK(this_frame->coded_error) > - ERR_CHANGE_THRESHOLD) || - (fabs(last_frame->intra_error - this_frame->intra_error) / - DOUBLE_DIVIDE_CHECK(this_frame->intra_error) > - ERR_CHANGE_THRESHOLD) || - ((next_frame->intra_error / - DOUBLE_DIVIDE_CHECK(next_frame->coded_error)) > - II_IMPROVEMENT_THRESHOLD))))) { + KF_II_ERR_THRESHOLD)))) { int i; const FIRSTPASS_STATS *start_pos = twopass->stats_in; FIRSTPASS_STATS local_next_frame = *next_frame; @@ -3247,9 +3205,9 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { FILE *fpfile; fpfile = fopen("arf.stt", "a"); ++arf_count; - fprintf(fpfile, "%10d %10ld %10d %10d %10ld\n", cm->current_video_frame, - rc->frames_till_gf_update_due, rc->kf_boost, arf_count, - rc->gfu_boost); + fprintf(fpfile, "%10d %10ld %10d %10d %10ld %10ld\n", + cm->current_video_frame, rc->frames_till_gf_update_due, + rc->kf_boost, arf_count, rc->gfu_boost, cm->frame_type); fclose(fpfile); } diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.h index b5f21eacb97..0807097ac1a 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.h @@ -43,12 +43,6 @@ typedef struct { #define INVALID_ROW -1 -// Length of the bi-predictive frame group (BFG) -// NOTE: Currently each BFG contains one backward ref (BWF) frame plus a certain -// number of bi-predictive frames. -#define BFG_INTERVAL 2 -#define MAX_EXT_ARFS 2 -#define MIN_EXT_ARF_INTERVAL 4 #define MAX_ARF_LAYERS 6 typedef struct { @@ -135,6 +129,7 @@ typedef struct { FRAME_UPDATE_TYPE update_type[MAX_STATIC_GF_GROUP_LENGTH + 2]; unsigned char arf_src_offset[MAX_STATIC_GF_GROUP_LENGTH + 2]; unsigned char layer_depth[MAX_STATIC_GF_GROUP_LENGTH + 2]; + unsigned char frame_gop_index[MAX_STATIC_GF_GROUP_LENGTH + 2]; int bit_allocation[MAX_STATIC_GF_GROUP_LENGTH + 2]; int gfu_boost[MAX_STATIC_GF_GROUP_LENGTH + 2]; @@ -144,6 +139,9 @@ typedef struct { int arf_index_stack[MAX_LAG_BUFFERS * 2]; int top_arf_idx; int stack_size; + int gf_group_size; + int max_layer_depth; + int allowed_max_layer_depth; } GF_GROUP; typedef struct { @@ -200,7 +198,6 @@ struct ThreadData; struct TileDataEnc; void vp9_init_first_pass(struct VP9_COMP *cpi); -void vp9_rc_get_first_pass_params(struct VP9_COMP *cpi); void vp9_first_pass(struct VP9_COMP *cpi, const struct lookahead_entry *source); void vp9_end_first_pass(struct VP9_COMP *cpi); @@ -219,17 +216,6 @@ void vp9_twopass_postencode_update(struct VP9_COMP *cpi); void calculate_coded_size(struct VP9_COMP *cpi, int *scaled_frame_width, int *scaled_frame_height); -static INLINE int get_number_of_extra_arfs(int interval, int arf_pending) { - assert(MAX_EXT_ARFS > 0); - if (arf_pending) { - if (interval >= MIN_EXT_ARF_INTERVAL * (MAX_EXT_ARFS + 1)) - return MAX_EXT_ARFS; - else if (interval >= MIN_EXT_ARF_INTERVAL * MAX_EXT_ARFS) - return MAX_EXT_ARFS - 1; - } - return 0; -} - #ifdef __cplusplus } // extern "C" #endif diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mbgraph.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mbgraph.c index 2ec048b5314..831c79c1753 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mbgraph.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mbgraph.c @@ -57,11 +57,12 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, const MV *ref_mv, { uint32_t distortion; uint32_t sse; + // TODO(yunqing): may use higher tap interp filter than 2 taps if needed. cpi->find_fractional_mv_step( x, dst_mv, ref_mv, cpi->common.allow_high_precision_mv, x->errorperbit, &v_fn_ptr, 0, mv_sf->subpel_search_level, cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, - 0); + 0, USE_2_TAPS); } xd->mi[0]->mode = NEWMV; diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c index 995c54fc74c..a2543035c59 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c @@ -367,14 +367,12 @@ static void get_cost_surf_min(int *cost_list, int *ir, int *ic, int bits) { *ir = (int)divide_and_round(x1 * b, y1); } -uint32_t vp9_skip_sub_pixel_tree(const MACROBLOCK *x, MV *bestmv, - const MV *ref_mv, int allow_hp, - int error_per_bit, - const vp9_variance_fn_ptr_t *vfp, - int forced_stop, int iters_per_step, - int *cost_list, int *mvjcost, int *mvcost[2], - uint32_t *distortion, uint32_t *sse1, - const uint8_t *second_pred, int w, int h) { +uint32_t vp9_skip_sub_pixel_tree( + const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, + int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, + int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], + uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, + int h, int use_accurate_subpel_search) { SETUP_SUBPEL_SEARCH; besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, src_stride, y, y_stride, second_pred, w, h, @@ -397,6 +395,7 @@ uint32_t vp9_skip_sub_pixel_tree(const MACROBLOCK *x, MV *bestmv, (void)sse; (void)thismse; (void)cost_list; + (void)use_accurate_subpel_search; return besterr; } @@ -406,7 +405,7 @@ uint32_t vp9_find_best_sub_pixel_tree_pruned_evenmore( int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, - int h) { + int h, int use_accurate_subpel_search) { SETUP_SUBPEL_SEARCH; besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, src_stride, y, y_stride, second_pred, w, h, @@ -418,6 +417,7 @@ uint32_t vp9_find_best_sub_pixel_tree_pruned_evenmore( (void)allow_hp; (void)forced_stop; (void)hstep; + (void)use_accurate_subpel_search; if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && @@ -471,8 +471,10 @@ uint32_t vp9_find_best_sub_pixel_tree_pruned_more( int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, - int h) { + int h, int use_accurate_subpel_search) { SETUP_SUBPEL_SEARCH; + (void)use_accurate_subpel_search; + besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, src_stride, y, y_stride, second_pred, w, h, offset, mvjcost, mvcost, sse1, distortion); @@ -531,8 +533,10 @@ uint32_t vp9_find_best_sub_pixel_tree_pruned( int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, - int h) { + int h, int use_accurate_subpel_search) { SETUP_SUBPEL_SEARCH; + (void)use_accurate_subpel_search; + besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, src_stride, y, y_stride, second_pred, w, h, offset, mvjcost, mvcost, sse1, distortion); @@ -617,12 +621,119 @@ static const MV search_step_table[12] = { }; /* clang-format on */ +static int accurate_sub_pel_search( + const MACROBLOCKD *xd, const MV *this_mv, const struct scale_factors *sf, + const InterpKernel *kernel, const vp9_variance_fn_ptr_t *vfp, + const uint8_t *const src_address, const int src_stride, + const uint8_t *const pre_address, int y_stride, const uint8_t *second_pred, + int w, int h, uint32_t *sse) { +#if CONFIG_VP9_HIGHBITDEPTH + uint64_t besterr; + assert(sf->x_step_q4 == 16 && sf->y_step_q4 == 16); + assert(w != 0 && h != 0); + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + DECLARE_ALIGNED(16, uint16_t, pred16[64 * 64]); + vp9_highbd_build_inter_predictor(CONVERT_TO_SHORTPTR(pre_address), y_stride, + pred16, w, this_mv, sf, w, h, 0, kernel, + MV_PRECISION_Q3, 0, 0, xd->bd); + if (second_pred != NULL) { + DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]); + vpx_highbd_comp_avg_pred(comp_pred16, CONVERT_TO_SHORTPTR(second_pred), w, + h, pred16, w); + besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src_address, + src_stride, sse); + } else { + besterr = + vfp->vf(CONVERT_TO_BYTEPTR(pred16), w, src_address, src_stride, sse); + } + } else { + DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]); + vp9_build_inter_predictor(pre_address, y_stride, pred, w, this_mv, sf, w, h, + 0, kernel, MV_PRECISION_Q3, 0, 0); + if (second_pred != NULL) { + DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]); + vpx_comp_avg_pred(comp_pred, second_pred, w, h, pred, w); + besterr = vfp->vf(comp_pred, w, src_address, src_stride, sse); + } else { + besterr = vfp->vf(pred, w, src_address, src_stride, sse); + } + } + if (besterr >= UINT_MAX) return UINT_MAX; + return (int)besterr; +#else + int besterr; + DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]); + assert(sf->x_step_q4 == 16 && sf->y_step_q4 == 16); + assert(w != 0 && h != 0); + (void)xd; + + vp9_build_inter_predictor(pre_address, y_stride, pred, w, this_mv, sf, w, h, + 0, kernel, MV_PRECISION_Q3, 0, 0); + if (second_pred != NULL) { + DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]); + vpx_comp_avg_pred(comp_pred, second_pred, w, h, pred, w); + besterr = vfp->vf(comp_pred, w, src_address, src_stride, sse); + } else { + besterr = vfp->vf(pred, w, src_address, src_stride, sse); + } + return besterr; +#endif // CONFIG_VP9_HIGHBITDEPTH +} + +// TODO(yunqing): this part can be further refactored. +#if CONFIG_VP9_HIGHBITDEPTH +/* checks if (r, c) has better score than previous best */ +#define CHECK_BETTER1(v, r, c) \ + if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ + int64_t tmpmse; \ + const MV mv = { r, c }; \ + const MV ref_mv = { rr, rc }; \ + thismse = \ + accurate_sub_pel_search(xd, &mv, x->me_sf, kernel, vfp, z, src_stride, \ + y, y_stride, second_pred, w, h, &sse); \ + tmpmse = thismse; \ + tmpmse += mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit); \ + if (tmpmse >= INT_MAX) { \ + v = INT_MAX; \ + } else if ((v = (uint32_t)tmpmse) < besterr) { \ + besterr = v; \ + br = r; \ + bc = c; \ + *distortion = thismse; \ + *sse1 = sse; \ + } \ + } else { \ + v = INT_MAX; \ + } +#else +/* checks if (r, c) has better score than previous best */ +#define CHECK_BETTER1(v, r, c) \ + if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ + const MV mv = { r, c }; \ + const MV ref_mv = { rr, rc }; \ + thismse = \ + accurate_sub_pel_search(xd, &mv, x->me_sf, kernel, vfp, z, src_stride, \ + y, y_stride, second_pred, w, h, &sse); \ + if ((v = mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit) + \ + thismse) < besterr) { \ + besterr = v; \ + br = r; \ + bc = c; \ + *distortion = thismse; \ + *sse1 = sse; \ + } \ + } else { \ + v = INT_MAX; \ + } + +#endif + uint32_t vp9_find_best_sub_pixel_tree( const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, - int h) { + int h, int use_accurate_subpel_search) { const uint8_t *const z = x->plane[0].src.buf; const uint8_t *const src_address = z; const int src_stride = x->plane[0].src.stride; @@ -650,6 +761,17 @@ uint32_t vp9_find_best_sub_pixel_tree( int kr, kc; MvLimits subpel_mv_limits; + // TODO(yunqing): need to add 4-tap filter optimization to speed up the + // encoder. + const InterpKernel *kernel = + (use_accurate_subpel_search > 0) + ? ((use_accurate_subpel_search == USE_4_TAPS) + ? vp9_filter_kernels[FOURTAP] + : ((use_accurate_subpel_search == USE_8_TAPS) + ? vp9_filter_kernels[EIGHTTAP] + : vp9_filter_kernels[EIGHTTAP_SHARP])) + : vp9_filter_kernels[BILINEAR]; + vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv); minc = subpel_mv_limits.col_min; maxc = subpel_mv_limits.col_max; @@ -674,16 +796,25 @@ uint32_t vp9_find_best_sub_pixel_tree( tr = br + search_step[idx].row; tc = bc + search_step[idx].col; if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { - const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3); MV this_mv; this_mv.row = tr; this_mv.col = tc; - if (second_pred == NULL) - thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address, - src_stride, &sse); - else - thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr), - src_address, src_stride, &sse, second_pred); + + if (use_accurate_subpel_search) { + thismse = accurate_sub_pel_search(xd, &this_mv, x->me_sf, kernel, vfp, + src_address, src_stride, y, + y_stride, second_pred, w, h, &sse); + } else { + const uint8_t *const pre_address = + y + (tr >> 3) * y_stride + (tc >> 3); + if (second_pred == NULL) + thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), + src_address, src_stride, &sse); + else + thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr), + src_address, src_stride, &sse, second_pred); + } + cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); @@ -705,14 +836,21 @@ uint32_t vp9_find_best_sub_pixel_tree( tc = bc + kc; tr = br + kr; if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { - const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3); MV this_mv = { tr, tc }; - if (second_pred == NULL) - thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address, - src_stride, &sse); - else - thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr), src_address, - src_stride, &sse, second_pred); + if (use_accurate_subpel_search) { + thismse = accurate_sub_pel_search(xd, &this_mv, x->me_sf, kernel, vfp, + src_address, src_stride, y, y_stride, + second_pred, w, h, &sse); + } else { + const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3); + if (second_pred == NULL) + thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address, + src_stride, &sse); + else + thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr), + src_address, src_stride, &sse, second_pred); + } + cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); @@ -743,20 +881,36 @@ uint32_t vp9_find_best_sub_pixel_tree( if (tr == br && tc != bc) { kc = bc - tc; if (iters_per_step == 1) { - CHECK_BETTER(second, br0, bc0 + kc); + if (use_accurate_subpel_search) { + CHECK_BETTER1(second, br0, bc0 + kc); + } else { + CHECK_BETTER(second, br0, bc0 + kc); + } } } else if (tr != br && tc == bc) { kr = br - tr; if (iters_per_step == 1) { - CHECK_BETTER(second, br0 + kr, bc0); + if (use_accurate_subpel_search) { + CHECK_BETTER1(second, br0 + kr, bc0); + } else { + CHECK_BETTER(second, br0 + kr, bc0); + } } } if (iters_per_step > 1) { - CHECK_BETTER(second, br0 + kr, bc0); - CHECK_BETTER(second, br0, bc0 + kc); - if (br0 != br || bc0 != bc) { - CHECK_BETTER(second, br0 + kr, bc0 + kc); + if (use_accurate_subpel_search) { + CHECK_BETTER1(second, br0 + kr, bc0); + CHECK_BETTER1(second, br0, bc0 + kc); + if (br0 != br || bc0 != bc) { + CHECK_BETTER1(second, br0 + kr, bc0 + kc); + } + } else { + CHECK_BETTER(second, br0 + kr, bc0); + CHECK_BETTER(second, br0, bc0 + kc); + if (br0 != br || bc0 != bc) { + CHECK_BETTER(second, br0 + kr, bc0 + kc); + } } } } @@ -781,6 +935,7 @@ uint32_t vp9_find_best_sub_pixel_tree( } #undef CHECK_BETTER +#undef CHECK_BETTER1 static INLINE int check_bounds(const MvLimits *mv_limits, int row, int col, int range) { @@ -1578,9 +1733,10 @@ static int exhuastive_mesh_search(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, } #if CONFIG_NON_GREEDY_MV -static double nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs) { +double av1_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs) { int i; - double best_cost = -1; + int update = 0; + double best_cost = 0; vpx_clear_system_state(); for (i = 0; i < NB_MVS_NUM; ++i) { if (nb_mvs[i].as_int != INVALID_MV) { @@ -1589,18 +1745,15 @@ static double nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs) { const double col_diff = mv->col - nb_mv.col; double cost = row_diff * row_diff + col_diff * col_diff; cost = log2(1 + cost); - if (best_cost < 0) { + if (update == 0) { best_cost = cost; + update = 1; } else { best_cost = cost < best_cost ? cost : best_cost; } } } - if (best_cost < 0) { - return 0; - } else { - return best_cost; - } + return best_cost; } double vp9_diamond_search_sad_new(const MACROBLOCK *x, @@ -1646,7 +1799,7 @@ double vp9_diamond_search_sad_new(const MACROBLOCK *x, // Check the starting position *best_mv_dist = fn_ptr->sdf(what, what_stride, in_what, in_what_stride); - *best_mv_cost = nb_mvs_inconsistency(best_full_mv, nb_full_mvs); + *best_mv_cost = av1_nb_mvs_inconsistency(best_full_mv, nb_full_mvs); bestsad = (*best_mv_dist) + lambda * (*best_mv_cost); i = 0; @@ -1679,7 +1832,8 @@ double vp9_diamond_search_sad_new(const MACROBLOCK *x, const MV this_mv = { best_full_mv->row + ss_mv[i].row, best_full_mv->col + ss_mv[i].col }; const double mv_dist = sad_array[t]; - const double mv_cost = nb_mvs_inconsistency(&this_mv, nb_full_mvs); + const double mv_cost = + av1_nb_mvs_inconsistency(&this_mv, nb_full_mvs); double thissad = mv_dist + lambda * mv_cost; if (thissad < bestsad) { bestsad = thissad; @@ -1699,7 +1853,8 @@ double vp9_diamond_search_sad_new(const MACROBLOCK *x, const uint8_t *const check_here = ss_os[i] + best_address; const double mv_dist = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); - const double mv_cost = nb_mvs_inconsistency(&this_mv, nb_full_mvs); + const double mv_cost = + av1_nb_mvs_inconsistency(&this_mv, nb_full_mvs); double thissad = mv_dist + lambda * mv_cost; if (thissad < bestsad) { bestsad = thissad; @@ -2285,7 +2440,7 @@ double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv, vpx_clear_system_state(); *best_mv_dist = fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride); - *best_mv_cost = nb_mvs_inconsistency(best_full_mv, nb_full_mvs); + *best_mv_cost = av1_nb_mvs_inconsistency(best_full_mv, nb_full_mvs); best_sad = (*best_mv_dist) + lambda * (*best_mv_cost); for (i = 0; i < search_range; i++) { @@ -2307,7 +2462,7 @@ double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv, const MV mv = { best_full_mv->row + neighbors[j].row, best_full_mv->col + neighbors[j].col }; const double mv_dist = sads[j]; - const double mv_cost = nb_mvs_inconsistency(&mv, nb_full_mvs); + const double mv_cost = av1_nb_mvs_inconsistency(&mv, nb_full_mvs); const double thissad = mv_dist + lambda * mv_cost; if (thissad < best_sad) { best_sad = thissad; @@ -2325,7 +2480,7 @@ double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv, const double mv_dist = fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv), in_what->stride); - const double mv_cost = nb_mvs_inconsistency(&mv, nb_full_mvs); + const double mv_cost = av1_nb_mvs_inconsistency(&mv, nb_full_mvs); const double thissad = mv_dist + lambda * mv_cost; if (thissad < best_sad) { best_sad = thissad; @@ -2587,7 +2742,8 @@ int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, (void)tc; \ (void)sse; \ (void)thismse; \ - (void)cost_list; + (void)cost_list; \ + (void)use_accurate_subpel_search; // Return the maximum MV. uint32_t vp9_return_max_sub_pixel_mv( @@ -2595,7 +2751,7 @@ uint32_t vp9_return_max_sub_pixel_mv( int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, - int h) { + int h, int use_accurate_subpel_search) { COMMON_MV_TEST; (void)minr; @@ -2617,7 +2773,7 @@ uint32_t vp9_return_min_sub_pixel_mv( int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, - int h) { + int h, int use_accurate_subpel_search) { COMMON_MV_TEST; (void)maxr; diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h index adb02bc1abd..a159cb288ed 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h @@ -59,7 +59,7 @@ struct SPEED_FEATURES; int vp9_init_search_range(int size); int vp9_refining_search_sad(const struct macroblock *x, struct mv *ref_mv, - int sad_per_bit, int distance, + int error_per_bit, int search_range, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv); @@ -75,7 +75,7 @@ typedef uint32_t(fractional_mv_step_fp)( int forced_stop, // 0 - full, 1 - qtr only, 2 - half only int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, - int h); + int h, int use_accurate_subpel_search); extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree; extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned; @@ -134,6 +134,8 @@ double vp9_full_pixel_diamond_new(const struct VP9_COMP *cpi, MACROBLOCK *x, const vp9_variance_fn_ptr_t *fn_ptr, const int_mv *nb_full_mvs, struct TplDepStats *tpl_stats, int rf_idx); + +double av1_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs); #endif // CONFIG_NON_GREEDY_MV #ifdef __cplusplus } // extern "C" diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_noise_estimate.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_noise_estimate.c index 249e03760fa..8c9a40f5586 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_noise_estimate.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_noise_estimate.c @@ -148,7 +148,9 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) { ne->last_h = cm->height; } return; - } else if (cm->current_video_frame > 60 && + } else if (frame_counter > 60 && cpi->svc.num_encoded_top_layer > 1 && + cpi->rc.frames_since_key > cpi->svc.number_spatial_layers && + cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1 && cpi->rc.avg_frame_low_motion < (low_res ? 70 : 50)) { // Force noise estimation to 0 and denoiser off if content has high motion. ne->level = kLowLow; diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c index 416d437e07d..1324b5bc8aa 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c @@ -247,7 +247,8 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x, x, &tmp_mv->as_mv, &ref_mv, cpi->common.allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], subpel_force_stop, cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list), - x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0); + x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0, + cpi->sf.use_accurate_subpel_search); *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); } @@ -1539,7 +1540,8 @@ static int search_new_mv(VP9_COMP *cpi, MACROBLOCK *x, cpi->common.allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list), - x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref_frame], NULL, 0, 0); + x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref_frame], NULL, 0, 0, + cpi->sf.use_accurate_subpel_search); } else if (svc->use_base_mv && svc->spatial_layer_id) { if (frame_mv[NEWMV][ref_frame].as_int != INVALID_MV) { const int pre_stride = xd->plane[0].pre[0].stride; @@ -1730,11 +1732,21 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, if (!cpi->use_svc || (svc->use_gf_temporal_ref_current_layer && !svc->layer_context[svc->temporal_layer_id].is_key_frame)) { + struct scale_factors *const sf_last = &cm->frame_refs[LAST_FRAME - 1].sf; + struct scale_factors *const sf_golden = + &cm->frame_refs[GOLDEN_FRAME - 1].sf; gf_temporal_ref = 1; - if (cpi->rc.avg_frame_low_motion > 70) - thresh_svc_skip_golden = 500; - else - thresh_svc_skip_golden = 0; + // For temporal long term prediction, check that the golden reference + // is same scale as last reference, otherwise disable. + if ((sf_last->x_scale_fp != sf_golden->x_scale_fp) || + (sf_last->y_scale_fp != sf_golden->y_scale_fp)) { + gf_temporal_ref = 0; + } else { + if (cpi->rc.avg_frame_low_motion > 70) + thresh_svc_skip_golden = 500; + else + thresh_svc_skip_golden = 0; + } } init_ref_frame_cost(cm, xd, ref_frame_cost); @@ -2758,7 +2770,8 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int mi_row, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost, &dummy_dist, - &x->pred_sse[ref_frame], NULL, 0, 0); + &x->pred_sse[ref_frame], NULL, 0, 0, + cpi->sf.use_accurate_subpel_search); xd->mi[0]->bmi[i].as_mv[0].as_mv = tmp_mv; } else { diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c index 76e310ac274..cdd824358cd 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c @@ -247,20 +247,65 @@ int vp9_rc_clamp_iframe_target_size(const VP9_COMP *const cpi, int target) { return target; } +// Update the buffer level before encoding with the per-frame-bandwidth, +static void update_buffer_level_preencode(VP9_COMP *cpi) { + RATE_CONTROL *const rc = &cpi->rc; + rc->bits_off_target += rc->avg_frame_bandwidth; + // Clip the buffer level to the maximum specified buffer size. + rc->bits_off_target = VPXMIN(rc->bits_off_target, rc->maximum_buffer_size); + rc->buffer_level = rc->bits_off_target; +} + +// Update the buffer level before encoding with the per-frame-bandwidth +// for SVC. The current and all upper temporal layers are updated, needed +// for the layered rate control which involves cumulative buffer levels for +// the temporal layers. Allow for using the timestamp(pts) delta for the +// framerate when the set_ref_frame_config is used. +static void update_buffer_level_svc_preencode(VP9_COMP *cpi) { + SVC *const svc = &cpi->svc; + int i; + // Set this to 1 to use timestamp delta for "framerate" under + // ref_frame_config usage. + int use_timestamp = 1; + const int64_t ts_delta = + svc->time_stamp_superframe - svc->time_stamp_prev[svc->spatial_layer_id]; + for (i = svc->temporal_layer_id; i < svc->number_temporal_layers; ++i) { + const int layer = + LAYER_IDS_TO_IDX(svc->spatial_layer_id, i, svc->number_temporal_layers); + LAYER_CONTEXT *const lc = &svc->layer_context[layer]; + RATE_CONTROL *const lrc = &lc->rc; + if (use_timestamp && cpi->svc.use_set_ref_frame_config && + svc->number_temporal_layers == 1 && ts_delta > 0 && + svc->current_superframe > 0) { + // TODO(marpan): This may need to be modified for temporal layers. + const double framerate_pts = 10000000.0 / ts_delta; + lrc->bits_off_target += (int)(lc->target_bandwidth / framerate_pts); + } else { + lrc->bits_off_target += (int)(lc->target_bandwidth / lc->framerate); + } + // Clip buffer level to maximum buffer size for the layer. + lrc->bits_off_target = + VPXMIN(lrc->bits_off_target, lrc->maximum_buffer_size); + lrc->buffer_level = lrc->bits_off_target; + if (i == svc->temporal_layer_id) { + cpi->rc.bits_off_target = lrc->bits_off_target; + cpi->rc.buffer_level = lrc->buffer_level; + } + } +} + // Update the buffer level for higher temporal layers, given the encoded current // temporal layer. -static void update_layer_buffer_level(SVC *svc, int encoded_frame_size) { +static void update_layer_buffer_level_postencode(SVC *svc, + int encoded_frame_size) { int i = 0; - int current_temporal_layer = svc->temporal_layer_id; + const int current_temporal_layer = svc->temporal_layer_id; for (i = current_temporal_layer + 1; i < svc->number_temporal_layers; ++i) { const int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, i, svc->number_temporal_layers); LAYER_CONTEXT *lc = &svc->layer_context[layer]; RATE_CONTROL *lrc = &lc->rc; - int bits_off_for_this_layer = - (int)(lc->target_bandwidth / lc->framerate - encoded_frame_size); - lrc->bits_off_target += bits_off_for_this_layer; - + lrc->bits_off_target -= encoded_frame_size; // Clip buffer level to maximum buffer size for the layer. lrc->bits_off_target = VPXMIN(lrc->bits_off_target, lrc->maximum_buffer_size); @@ -268,29 +313,13 @@ static void update_layer_buffer_level(SVC *svc, int encoded_frame_size) { } } -// Update the buffer level: leaky bucket model. -static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) { - const VP9_COMMON *const cm = &cpi->common; +// Update the buffer level after encoding with encoded frame size. +static void update_buffer_level_postencode(VP9_COMP *cpi, + int encoded_frame_size) { RATE_CONTROL *const rc = &cpi->rc; - - // On dropped frame, don't update buffer if its currently stable - // (above optimal level). This can cause issues when full superframe - // can drop (!= LAYER_DROP), since QP is adjusted downwards with buffer - // overflow, which can cause more frame drops. - if (cpi->svc.framedrop_mode != LAYER_DROP && encoded_frame_size == 0 && - rc->buffer_level > rc->optimal_buffer_level) - return; - - // Non-viewable frames are a special case and are treated as pure overhead. - if (!cm->show_frame) { - rc->bits_off_target -= encoded_frame_size; - } else { - rc->bits_off_target += rc->avg_frame_bandwidth - encoded_frame_size; - } - + rc->bits_off_target -= encoded_frame_size; // Clip the buffer level to the maximum specified buffer size. rc->bits_off_target = VPXMIN(rc->bits_off_target, rc->maximum_buffer_size); - // For screen-content mode, and if frame-dropper is off, don't let buffer // level go below threshold, given here as -rc->maximum_ buffer_size. if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && @@ -300,7 +329,7 @@ static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) { rc->buffer_level = rc->bits_off_target; if (is_one_pass_cbr_svc(cpi)) { - update_layer_buffer_level(&cpi->svc, encoded_frame_size); + update_layer_buffer_level_postencode(&cpi->svc, encoded_frame_size); } } @@ -363,6 +392,7 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) { rc->high_source_sad = 0; rc->reset_high_source_sad = 0; rc->high_source_sad_lagindex = -1; + rc->high_num_blocks_with_motion = 0; rc->hybrid_intra_scene_change = 0; rc->re_encode_maxq_scene_change = 0; rc->alt_ref_gf_group = 0; @@ -398,6 +428,11 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) { rc->max_gf_interval = vp9_rc_get_default_max_gf_interval( oxcf->init_framerate, rc->min_gf_interval); rc->baseline_gf_interval = (rc->min_gf_interval + rc->max_gf_interval) / 2; + + rc->force_max_q = 0; + rc->last_post_encode_dropped_scene_change = 0; + rc->use_post_encode_drop = 0; + rc->ext_use_post_encode_drop = 0; } static int check_buffer_above_thresh(VP9_COMP *cpi, int drop_mark) { @@ -515,6 +550,39 @@ static int drop_frame(VP9_COMP *cpi) { } } +int post_encode_drop_screen_content(VP9_COMP *cpi, size_t *size) { + size_t frame_size = *size << 3; + int64_t new_buffer_level = + cpi->rc.buffer_level + cpi->rc.avg_frame_bandwidth - (int64_t)frame_size; + + // For now we drop if new buffer level (given the encoded frame size) goes + // below 0. + if (new_buffer_level < 0) { + *size = 0; + vp9_rc_postencode_update_drop_frame(cpi); + // Update flag to use for next frame. + if (cpi->rc.high_source_sad || + (cpi->use_svc && cpi->svc.high_source_sad_superframe)) + cpi->rc.last_post_encode_dropped_scene_change = 1; + // Force max_q on next fame. + cpi->rc.force_max_q = 1; + cpi->rc.avg_frame_qindex[INTER_FRAME] = cpi->rc.worst_quality; + cpi->last_frame_dropped = 1; + cpi->ext_refresh_frame_flags_pending = 0; + if (cpi->use_svc) { + cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 1; + cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id] = 1; + cpi->svc.drop_count[cpi->svc.spatial_layer_id]++; + cpi->svc.skip_enhancement_layer = 1; + } + return 1; + } + + cpi->rc.force_max_q = 0; + cpi->rc.last_post_encode_dropped_scene_change = 0; + return 0; +} + int vp9_rc_drop_frame(VP9_COMP *cpi) { SVC *svc = &cpi->svc; int svc_prev_layer_dropped = 0; @@ -834,7 +902,7 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) { int active_worst_quality; int ambient_qp; unsigned int num_frames_weight_key = 5 * cpi->svc.number_temporal_layers; - if (frame_is_intra_only(cm) || rc->reset_high_source_sad) + if (frame_is_intra_only(cm) || rc->reset_high_source_sad || rc->force_max_q) return rc->worst_quality; // For ambient_qp we use minimum of avg_frame_qindex[KEY_FRAME/INTER_FRAME] // for the first few frames following key frame. These are both initialized @@ -845,6 +913,7 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) { ? VPXMIN(rc->avg_frame_qindex[INTER_FRAME], rc->avg_frame_qindex[KEY_FRAME]) : rc->avg_frame_qindex[INTER_FRAME]; + active_worst_quality = VPXMIN(rc->worst_quality, (ambient_qp * 5) >> 2); // For SVC if the current base spatial layer was key frame, use the QP from // that base layer for ambient_qp. if (cpi->use_svc && cpi->svc.spatial_layer_id > 0) { @@ -854,9 +923,9 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) { if (lc->is_key_frame) { const RATE_CONTROL *lrc = &lc->rc; ambient_qp = VPXMIN(ambient_qp, lrc->last_q[KEY_FRAME]); + active_worst_quality = VPXMIN(rc->worst_quality, (ambient_qp * 9) >> 3); } } - active_worst_quality = VPXMIN(rc->worst_quality, ambient_qp * 5 >> 2); if (rc->buffer_level > rc->optimal_buffer_level) { // Adjust down. // Maximum limit for down adjustment ~30%; make it lower for screen content. @@ -1216,10 +1285,16 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index, ASSIGN_MINQ_TABLE(cm->bit_depth, inter_minq); if (frame_is_intra_only(cm)) { - // Handle the special case for key frames forced when we have reached - // the maximum key frame interval. Here force the Q to a range - // based on the ambient Q to reduce the risk of popping. - if (rc->this_key_frame_forced) { + if (rc->frames_to_key == 1 && oxcf->rc_mode == VPX_Q) { + // If the next frame is also a key frame or the current frame is the + // only frame in the sequence in AOM_Q mode, just use the cq_level + // as q. + active_best_quality = cq_level; + active_worst_quality = cq_level; + } else if (rc->this_key_frame_forced) { + // Handle the special case for key frames forced when we have reached + // the maximum key frame interval. Here force the Q to a range + // based on the ambient Q to reduce the risk of popping. double last_boosted_q; int delta_qindex; int qindex; @@ -1289,6 +1364,16 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index, // Constrained quality use slightly lower active best. active_best_quality = active_best_quality * 15 / 16; + // Modify best quality for second level arfs. For mode VPX_Q this + // becomes the baseline frame q. + if (gf_group->rf_level[gf_group_index] == GF_ARF_LOW) { + const int layer_depth = gf_group->layer_depth[gf_group_index]; + // linearly fit the frame q depending on the layer depth index from + // the base layer ARF. + active_best_quality = + ((layer_depth - 1) * q + active_best_quality + layer_depth / 2) / + layer_depth; + } } else if (oxcf->rc_mode == VPX_Q) { if (!cpi->refresh_alt_ref_frame) { active_best_quality = cq_level; @@ -1297,8 +1382,14 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index, // Modify best quality for second level arfs. For mode VPX_Q this // becomes the baseline frame q. - if (gf_group->rf_level[gf_group_index] == GF_ARF_LOW) - active_best_quality = (active_best_quality + cq_level + 1) / 2; + if (gf_group->rf_level[gf_group_index] == GF_ARF_LOW) { + const int layer_depth = gf_group->layer_depth[gf_group_index]; + // linearly fit the frame q depending on the layer depth index from + // the base layer ARF. + active_best_quality = ((layer_depth - 1) * cq_level + + active_best_quality + layer_depth / 2) / + layer_depth; + } } } else { active_best_quality = get_gf_active_quality(cpi, q, cm->bit_depth); @@ -1475,12 +1566,14 @@ void vp9_configure_buffer_updates(VP9_COMP *cpi, int gf_group_index) { } void vp9_estimate_qp_gop(VP9_COMP *cpi) { - int gop_length = cpi->rc.baseline_gf_interval; + int gop_length = cpi->twopass.gf_group.gf_group_size; int bottom_index, top_index; int idx; const int gf_index = cpi->twopass.gf_group.index; + const int is_src_frame_alt_ref = cpi->rc.is_src_frame_alt_ref; + const int refresh_frame_context = cpi->common.refresh_frame_context; - for (idx = 1; idx <= gop_length + 1 && idx < MAX_LAG_BUFFERS; ++idx) { + for (idx = 1; idx <= gop_length; ++idx) { TplDepFrame *tpl_frame = &cpi->tpl_stats[idx]; int target_rate = cpi->twopass.gf_group.bit_allocation[idx]; cpi->twopass.gf_group.index = idx; @@ -1492,6 +1585,8 @@ void vp9_estimate_qp_gop(VP9_COMP *cpi) { } // Reset the actual index and frame update cpi->twopass.gf_group.index = gf_index; + cpi->rc.is_src_frame_alt_ref = is_src_frame_alt_ref; + cpi->common.refresh_frame_context = refresh_frame_context; vp9_configure_buffer_updates(cpi, gf_index); } @@ -1672,7 +1767,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { } if (frame_is_intra_only(cm)) rc->last_kf_qindex = qindex; - update_buffer_level(cpi, rc->projected_frame_size); + update_buffer_level_postencode(cpi, rc->projected_frame_size); // Rolling monitors of whether we are over or underspending used to help // regulate min and Max Q in two pass. @@ -1769,14 +1864,20 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { } void vp9_rc_postencode_update_drop_frame(VP9_COMP *cpi) { - // Update buffer level with zero size, update frame counters, and return. - update_buffer_level(cpi, 0); cpi->common.current_video_frame++; cpi->rc.frames_since_key++; cpi->rc.frames_to_key--; cpi->rc.rc_2_frame = 0; cpi->rc.rc_1_frame = 0; cpi->rc.last_avg_frame_bandwidth = cpi->rc.avg_frame_bandwidth; + // For SVC on dropped frame when framedrop_mode != LAYER_DROP: + // in this mode the whole superframe may be dropped if only a single layer + // has buffer underflow (below threshold). Since this can then lead to + // increasing buffer levels/overflow for certain layers even though whole + // superframe is dropped, we cap buffer level if its already stable. + if (cpi->use_svc && cpi->svc.framedrop_mode != LAYER_DROP && + cpi->rc.buffer_level > cpi->rc.optimal_buffer_level) + cpi->rc.buffer_level = cpi->rc.optimal_buffer_level; } static int calc_pframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) { @@ -1822,10 +1923,9 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; int target; - // TODO(yaowu): replace the "auto_key && 0" below with proper decision logic. if (!cpi->refresh_alt_ref_frame && (cm->current_video_frame == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY) || - rc->frames_to_key == 0 || (cpi->oxcf.auto_key && 0))) { + rc->frames_to_key == 0)) { cm->frame_type = KEY_FRAME; rc->this_key_frame_forced = cm->current_video_frame != 0 && rc->frames_to_key == 0; @@ -2031,7 +2131,7 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) { cm->frame_type = KEY_FRAME; rc->source_alt_ref_active = 0; if (is_one_pass_cbr_svc(cpi)) { - if (cm->current_video_frame > 0) vp9_svc_reset_key_frame(cpi); + if (cm->current_video_frame > 0) vp9_svc_reset_temporal_layers(cpi, 1); layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, svc->temporal_layer_id, svc->number_temporal_layers); svc->layer_context[layer].is_key_frame = 1; @@ -2110,15 +2210,15 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) { vp9_cyclic_refresh_update_parameters(cpi); vp9_rc_set_frame_target(cpi, target); + if (cm->show_frame) update_buffer_level_svc_preencode(cpi); } void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; int target; - // TODO(yaowu): replace the "auto_key && 0" below with proper decision logic. if ((cm->current_video_frame == 0) || (cpi->frame_flags & FRAMEFLAGS_KEY) || - rc->frames_to_key == 0 || (cpi->oxcf.auto_key && 0)) { + rc->frames_to_key == 0) { cm->frame_type = KEY_FRAME; rc->frames_to_key = cpi->oxcf.key_freq; rc->kf_boost = DEFAULT_KF_BOOST; @@ -2151,6 +2251,9 @@ void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) { target = calc_pframe_target_size_one_pass_cbr(cpi); vp9_rc_set_frame_target(cpi, target); + + if (cm->show_frame) update_buffer_level_preencode(cpi); + if (cpi->oxcf.resize_mode == RESIZE_DYNAMIC) cpi->resize_pending = vp9_resize_one_pass_cbr(cpi); else @@ -2654,8 +2757,11 @@ void vp9_scene_detection_onepass(VP9_COMP *cpi) { if (cm->use_highbitdepth) return; #endif rc->high_source_sad = 0; - if (cpi->svc.spatial_layer_id == 0 && src_width == last_src_width && - src_height == last_src_height) { + rc->high_num_blocks_with_motion = 0; + // For SVC: scene detection is only checked on first spatial layer of + // the superframe using the original/unscaled resolutions. + if (cpi->svc.spatial_layer_id == cpi->svc.first_spatial_layer_to_encode && + src_width == last_src_width && src_height == last_src_height) { YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS] = { NULL }; int num_mi_cols = cm->mi_cols; int num_mi_rows = cm->mi_rows; @@ -2772,6 +2878,8 @@ void vp9_scene_detection_onepass(VP9_COMP *cpi) { } else { rc->avg_source_sad[lagframe_idx] = avg_sad; } + if (num_zero_temp_sad < (num_samples >> 1)) + rc->high_num_blocks_with_motion = 1; } } // For CBR non-screen content mode, check if we should reset the rate diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.h index 3b441bf1f50..16aa08137ee 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.h @@ -175,6 +175,7 @@ typedef struct { uint64_t avg_source_sad[MAX_LAG_BUFFERS]; uint64_t prev_avg_source_sad_lag; int high_source_sad_lagindex; + int high_num_blocks_with_motion; int alt_ref_gf_group; int last_frame_is_src_altref; int high_source_sad; @@ -186,6 +187,14 @@ typedef struct { int force_qpmin; int reset_high_source_sad; double perc_arf_usage; + int force_max_q; + // Last frame was dropped post encode on scene change. + int last_post_encode_dropped_scene_change; + // Enable post encode frame dropping for screen content. Only enabled when + // ext_use_post_encode_drop is enabled by user. + int use_post_encode_drop; + // External flag to enable post encode frame dropping, controlled by user. + int ext_use_post_encode_drop; } RATE_CONTROL; struct VP9_COMP; @@ -194,7 +203,7 @@ struct VP9EncoderConfig; void vp9_rc_init(const struct VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc); -int vp9_estimate_bits_at_q(FRAME_TYPE frame_kind, int q, int mbs, +int vp9_estimate_bits_at_q(FRAME_TYPE frame_type, int q, int mbs, double correction_factor, vpx_bit_depth_t bit_depth); double vp9_convert_qindex_to_q(int qindex, vpx_bit_depth_t bit_depth); @@ -205,9 +214,9 @@ void vp9_rc_init_minq_luts(void); int vp9_rc_get_default_min_gf_interval(int width, int height, double framerate); // Note vp9_rc_get_default_max_gf_interval() requires the min_gf_interval to -// be passed in to ensure that the max_gf_interval returned is at least as bis +// be passed in to ensure that the max_gf_interval returned is at least as big // as that. -int vp9_rc_get_default_max_gf_interval(double framerate, int min_frame_rate); +int vp9_rc_get_default_max_gf_interval(double framerate, int min_gf_interval); // Generally at the high level, the following flow is expected // to be enforced for rate control: @@ -247,13 +256,16 @@ void vp9_rc_postencode_update_drop_frame(struct VP9_COMP *cpi); // Changes only the rate correction factors in the rate control structure. void vp9_rc_update_rate_correction_factors(struct VP9_COMP *cpi); +// Post encode drop for CBR screen-content mode. +int post_encode_drop_screen_content(struct VP9_COMP *cpi, size_t *size); + // Decide if we should drop this frame: For 1-pass CBR. // Changes only the decimation count in the rate control structure int vp9_rc_drop_frame(struct VP9_COMP *cpi); // Computes frame size bounds. void vp9_rc_compute_frame_size_bounds(const struct VP9_COMP *cpi, - int this_frame_target, + int frame_target, int *frame_under_shoot_limit, int *frame_over_shoot_limit); diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.c index 2e4a4fe9fa2..8323f3af4ee 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.c @@ -173,69 +173,61 @@ static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12, static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = { 128, 144, 128, 128, 144, 144 }; -int64_t vp9_compute_rd_mult_based_on_qindex(const VP9_COMP *cpi, int qindex) { - const int64_t q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth); +int vp9_compute_rd_mult_based_on_qindex(const VP9_COMP *cpi, int qindex) { + // largest dc_quant is 21387, therefore rdmult should always fit in uint32_t + // i.e. 21387 * 21387 * 8 = 3659230152 = 0xDA1B6BC8 + const int q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth); + uint32_t rdmult = q * q; + + if (cpi->common.frame_type != KEY_FRAME) { + rdmult = rdmult * 3 + (rdmult * 2 / 3); + } else { + if (qindex < 64) + rdmult = rdmult * 4; + else if (qindex <= 128) + rdmult = rdmult * 3 + rdmult / 2; + else if (qindex < 190) + rdmult = rdmult * 4 + rdmult / 2; + else + rdmult = rdmult * 7 + rdmult / 2; + } #if CONFIG_VP9_HIGHBITDEPTH - int64_t rdmult = 0; switch (cpi->common.bit_depth) { - case VPX_BITS_8: rdmult = 88 * q * q / 24; break; - case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 4); break; - default: - assert(cpi->common.bit_depth == VPX_BITS_12); - rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8); - break; + case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break; + case VPX_BITS_12: rdmult = ROUND_POWER_OF_TWO(rdmult, 8); break; + default: break; } -#else - int64_t rdmult = 88 * q * q / 24; #endif // CONFIG_VP9_HIGHBITDEPTH - return rdmult; + return rdmult > 0 ? rdmult : 1; } -int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) { - int64_t rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, qindex); - +static int modulate_rdmult(const VP9_COMP *cpi, int rdmult) { + int64_t rdmult_64 = rdmult; if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) { const GF_GROUP *const gf_group = &cpi->twopass.gf_group; const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index]; - const int boost_index = VPXMIN(15, (cpi->rc.gfu_boost / 100)); + const int gfu_boost = cpi->multi_layer_arf + ? gf_group->gfu_boost[gf_group->index] + : cpi->rc.gfu_boost; + const int boost_index = VPXMIN(15, (gfu_boost / 100)); - rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7; - rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7); + rdmult_64 = (rdmult_64 * rd_frame_type_factor[frame_type]) >> 7; + rdmult_64 += ((rdmult_64 * rd_boost_factor[boost_index]) >> 7); } - if (rdmult < 1) rdmult = 1; - return (int)rdmult; + return (int)rdmult_64; } -int vp9_get_adaptive_rdmult(const VP9_COMP *cpi, double beta) { - const VP9_COMMON *cm = &cpi->common; - int64_t q = vp9_dc_quant(cm->base_qindex, 0, cpi->common.bit_depth); - -#if CONFIG_VP9_HIGHBITDEPTH - int64_t rdmult = 0; - switch (cpi->common.bit_depth) { - case VPX_BITS_8: rdmult = (int)((88 * q * q / beta) / 24); break; - case VPX_BITS_10: - rdmult = ROUND_POWER_OF_TWO((int)((88 * q * q / beta) / 24), 4); - break; - default: - assert(cpi->common.bit_depth == VPX_BITS_12); - rdmult = ROUND_POWER_OF_TWO((int)((88 * q * q / beta) / 24), 8); - break; - } -#else - int64_t rdmult = (int)((88 * q * q / beta) / 24); -#endif // CONFIG_VP9_HIGHBITDEPTH - - if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) { - const GF_GROUP *const gf_group = &cpi->twopass.gf_group; - const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index]; - const int boost_index = VPXMIN(15, (cpi->rc.gfu_boost / 100)); +int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) { + int rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, qindex); + return modulate_rdmult(cpi, rdmult); +} - rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7; - rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7); - } - if (rdmult < 1) rdmult = 1; - return (int)rdmult; +int vp9_get_adaptive_rdmult(const VP9_COMP *cpi, double beta) { + int rdmult = + vp9_compute_rd_mult_based_on_qindex(cpi, cpi->common.base_qindex); + rdmult = (int)((double)rdmult / beta); + rdmult = rdmult > 0 ? rdmult : 1; + return modulate_rdmult(cpi, rdmult); } static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) { @@ -631,6 +623,7 @@ YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi, const VP9_COMMON *const cm = &cpi->common; const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1]; const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame); + assert(ref_frame >= LAST_FRAME && ref_frame <= ALTREF_FRAME); return (scaled_idx != ref_idx && scaled_idx != INVALID_IDX) ? &cm->buffer_pool->frame_bufs[scaled_idx].buf : NULL; diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h index f2fc776a4aa..fa85f2176f5 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h @@ -134,8 +134,7 @@ struct TileDataEnc; struct VP9_COMP; struct macroblock; -int64_t vp9_compute_rd_mult_based_on_qindex(const struct VP9_COMP *cpi, - int qindex); +int vp9_compute_rd_mult_based_on_qindex(const struct VP9_COMP *cpi, int qindex); int vp9_compute_rd_mult(const struct VP9_COMP *cpi, int qindex); @@ -145,7 +144,7 @@ void vp9_initialize_rd_consts(struct VP9_COMP *cpi); void vp9_initialize_me_consts(struct VP9_COMP *cpi, MACROBLOCK *x, int qindex); -void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n, +void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2, unsigned int qstep, int *rate, int64_t *dist); void vp9_model_rd_from_var_lapndz_vec(unsigned int var[MAX_MB_PLANE], @@ -176,8 +175,8 @@ void vp9_set_rd_speed_thresholds(struct VP9_COMP *cpi); void vp9_set_rd_speed_thresholds_sub8x8(struct VP9_COMP *cpi); -void vp9_update_rd_thresh_fact(int (*fact)[MAX_MODES], int rd_thresh, int bsize, - int best_mode_index); +void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh, + int bsize, int best_mode_index); static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh, const int *const thresh_fact) { diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c index 698faa343bb..9cde479cd6f 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c @@ -1821,7 +1821,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, x, &tmp_mv, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_search_level, NULL, x->nmvjointcost, x->mvcost, - &dis, &sse, second_pred, pw, ph); + &dis, &sse, second_pred, pw, ph, cpi->sf.use_accurate_subpel_search); } // Restore the pointer to the first (possibly scaled) prediction buffer. @@ -1875,6 +1875,8 @@ static int64_t rd_pick_best_sub8x8_mode( const BLOCK_SIZE bsize = mi->sb_type; const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; + const int pw = num_4x4_blocks_wide << 2; + const int ph = num_4x4_blocks_high << 2; ENTROPY_CONTEXT t_above[2], t_left[2]; int subpelmv = 1, have_ref = 0; SPEED_FEATURES *const sf = &cpi->sf; @@ -2011,7 +2013,8 @@ static int64_t rd_pick_best_sub8x8_mode( x->errorperbit, &cpi->fn_ptr[bsize], sf->mv.subpel_force_stop, sf->mv.subpel_search_level, cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost, &distortion, - &x->pred_sse[mi->ref_frame[0]], NULL, 0, 0); + &x->pred_sse[mi->ref_frame[0]], NULL, pw, ph, + cpi->sf.use_accurate_subpel_search); // save motion search result for use in compound prediction seg_mvs[i][mi->ref_frame[0]].as_mv = *new_mv; @@ -2330,6 +2333,8 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, const int best_predmv_idx = x->mv_best_ref_index[ref]; const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi, ref); + const int pw = num_4x4_blocks_wide_lookup[bsize] << 2; + const int ph = num_4x4_blocks_high_lookup[bsize] << 2; MV pred_mv[3]; pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv; @@ -2452,7 +2457,8 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, x, &tmp_mv->as_mv, &ref_mv, cm->allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list), - x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0); + x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, pw, ph, + cpi->sf.use_accurate_subpel_search); } *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_resize.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_resize.c index 6ac77aeef28..23a320ae553 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_resize.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_resize.c @@ -424,11 +424,11 @@ void vp9_resize_plane(const uint8_t *const input, int height, int width, int in_stride, uint8_t *output, int height2, int width2, int out_stride) { int i; - uint8_t *intbuf = (uint8_t *)malloc(sizeof(uint8_t) * width2 * height); + uint8_t *intbuf = (uint8_t *)calloc(width2 * height, sizeof(*intbuf)); uint8_t *tmpbuf = - (uint8_t *)malloc(sizeof(uint8_t) * (width < height ? height : width)); - uint8_t *arrbuf = (uint8_t *)malloc(sizeof(uint8_t) * height); - uint8_t *arrbuf2 = (uint8_t *)malloc(sizeof(uint8_t) * height2); + (uint8_t *)calloc(width < height ? height : width, sizeof(*tmpbuf)); + uint8_t *arrbuf = (uint8_t *)calloc(height, sizeof(*arrbuf)); + uint8_t *arrbuf2 = (uint8_t *)calloc(height2, sizeof(*arrbuf2)); if (intbuf == NULL || tmpbuf == NULL || arrbuf == NULL || arrbuf2 == NULL) goto Error; assert(width > 0); diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c index 44909239d32..9b6c69a73fd 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c @@ -116,17 +116,13 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi, sf->ml_partition_search_breakout_thresh[1] = -1.0f; sf->ml_partition_search_breakout_thresh[2] = -1.0f; } - #if CONFIG_VP9_HIGHBITDEPTH if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH) { - sf->use_square_only_thresh_high = BLOCK_4X4; - sf->use_square_only_thresh_low = BLOCK_SIZES; - if (is_720p_or_larger) { - sf->partition_search_breakout_thr.dist = (1 << 23); - sf->use_ml_partition_search_breakout = 0; - } + sf->ml_partition_search_breakout_thresh[0] -= 1.0f; + sf->ml_partition_search_breakout_thresh[1] -= 1.0f; + sf->ml_partition_search_breakout_thresh[2] -= 1.0f; } -#endif +#endif // CONFIG_VP9_HIGHBITDEPTH } if (speed >= 2) { @@ -242,14 +238,10 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, if (speed >= 1) { sf->enable_tpl_model = 0; - sf->ml_var_partition_pruning = 0; + sf->ml_var_partition_pruning = !boosted; sf->ml_prune_rect_partition_threhold[1] = 200; sf->ml_prune_rect_partition_threhold[2] = 200; sf->ml_prune_rect_partition_threhold[3] = 200; -#if CONFIG_VP9_HIGHBITDEPTH - if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH) - sf->prune_ref_frame_for_rect_partitions = 0; -#endif // CONFIG_VP9_HIGHBITDEPTH if (oxcf->pass == 2) { TWO_PASS *const twopass = &cpi->twopass; @@ -288,9 +280,11 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, sf->exhaustive_searches_thresh = (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ? (1 << 23) : INT_MAX; + sf->use_accurate_subpel_search = USE_4_TAPS; } if (speed >= 2) { + sf->ml_var_partition_pruning = 0; if (oxcf->vbr_corpus_complexity) sf->recode_loop = ALLOW_RECODE_FIRST; else @@ -328,6 +322,8 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, good_quality_mesh_patterns[mesh_density_level][i].interval; } } + + sf->use_accurate_subpel_search = USE_2_TAPS; } if (speed >= 3) { @@ -450,6 +446,7 @@ static void set_rt_speed_feature_framesize_independent( sf->disable_golden_ref = 0; sf->enable_tpl_model = 0; sf->enhanced_full_pixel_motion_search = 0; + sf->use_accurate_subpel_search = USE_2_TAPS; if (speed >= 1) { sf->allow_txfm_domain_distortion = 1; @@ -565,6 +562,16 @@ static void set_rt_speed_feature_framesize_independent( (frames_since_key % (sf->last_partitioning_redo_frequency << 1) == 1); sf->max_delta_qindex = is_keyframe ? 20 : 15; sf->partition_search_type = REFERENCE_PARTITION; +#if CONFIG_ML_VAR_PARTITION + if (!frame_is_intra_only(cm) && cm->width >= 360 && cm->height >= 360) + sf->partition_search_type = ML_BASED_PARTITION; + else + sf->partition_search_type = REFERENCE_PARTITION; +#if CONFIG_VP9_HIGHBITDEPTH + if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH) + sf->partition_search_type = REFERENCE_PARTITION; +#endif // CONFIG_VP9_HIGHBITDEPTH +#endif // CONFIG_ML_VAR_PARTITION if (cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.lag_in_frames > 0 && cpi->rc.is_src_frame_alt_ref) { sf->partition_search_type = VAR_BASED_PARTITION; @@ -626,9 +633,7 @@ static void set_rt_speed_feature_framesize_independent( sf->use_compound_nonrd_pickmode = 1; } #if CONFIG_ML_VAR_PARTITION - if (!frame_is_intra_only(cm) && cm->width >= 360 && cm->height >= 360) - sf->partition_search_type = ML_BASED_PARTITION; - else + if (frame_is_intra_only(cm) || cm->width < 360 || cm->height < 360) sf->partition_search_type = VAR_BASED_PARTITION; #if CONFIG_VP9_HIGHBITDEPTH if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH) @@ -705,6 +710,7 @@ static void set_rt_speed_feature_framesize_independent( // For SVC: enable use of lower resolution partition for higher resolution, // only for 3 spatial layers and when config/top resolution is above VGA. // Enable only for non-base temporal layer frames. + // TODO(jianj): Investigate webm:1578 if (cpi->use_svc && cpi->svc.use_partition_reuse && cpi->svc.number_spatial_layers == 3 && cpi->svc.temporal_layer_id > 0 && cpi->oxcf.width * cpi->oxcf.height > 640 * 480) @@ -789,6 +795,21 @@ static void set_rt_speed_feature_framesize_independent( sf->partition_search_type = FIXED_PARTITION; sf->always_this_block_size = BLOCK_64X64; } + // Special case for screen content: increase motion search on base spatial + // layer when high motion is detected or previous SL0 frame was dropped. + // Avoid speed 5 for as there is an issue with SVC datarate test. + // TODO(marpan/jianj): Investigate issue at speed 5. + if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && cpi->oxcf.speed > 5 && + cpi->svc.spatial_layer_id == 0 && + (cpi->rc.high_num_blocks_with_motion || cpi->svc.last_layer_dropped[0])) { + sf->mv.search_method = NSTEP; + sf->mv.fullpel_search_step_param = 2; + // TODO(marpan/jianj): Investigate issue for lower setting of step_param + // for spatial layers (namely on lower layers). + if (cpi->use_svc && cm->width != cpi->oxcf.width && + cm->height != cpi->oxcf.height) + sf->mv.fullpel_search_step_param = 4; + } } void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) { @@ -897,12 +918,7 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { sf->allow_quant_coeff_opt = sf->optimize_coefficients; sf->quant_opt_thresh = 99.0; sf->allow_acl = 1; -#if CONFIG_VP9_HIGHBITDEPTH - // TODO(jingning): Make the model support high bit-depth route. - sf->enable_tpl_model = !cm->use_highbitdepth && oxcf->enable_tpl_model; -#else sf->enable_tpl_model = oxcf->enable_tpl_model; -#endif sf->prune_ref_frame_for_rect_partitions = 0; for (i = 0; i < TX_SIZES; i++) { @@ -942,6 +958,7 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { sf->ml_prune_rect_partition_threhold[2] = -1; sf->ml_prune_rect_partition_threhold[3] = -1; sf->ml_var_partition_pruning = 0; + sf->use_accurate_subpel_search = USE_8_TAPS; // Some speed-up features even for best quality as minimal impact on quality. sf->adaptive_rd_thresh = 1; diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.h index a895ed2354b..02673e60200 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.h @@ -243,6 +243,13 @@ typedef enum { RE_ENCODE_MAXQ = 2 } OVERSHOOT_DETECTION_CBR_RT; +typedef enum { + USE_2_TAPS = 0, + USE_4_TAPS, + USE_8_TAPS, + USE_8_TAPS_SHARP, +} SUBPEL_SEARCH_TYPE; + typedef struct SPEED_FEATURES { MV_SPEED_FEATURES mv; @@ -586,6 +593,10 @@ typedef struct SPEED_FEATURES { // Allow for disabling golden reference. int disable_golden_ref; + + // Allow sub-pixel search to use interpolation filters with different taps in + // order to achieve accurate motion search result. + SUBPEL_SEARCH_TYPE use_accurate_subpel_search; } SPEED_FEATURES; struct VP9_COMP; diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.c index 1321c457575..21b920f11ae 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.c @@ -53,6 +53,7 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { svc->previous_frame_is_intra_only = 0; svc->superframe_has_layer_sync = 0; svc->use_set_ref_frame_config = 0; + svc->num_encoded_top_layer = 0; for (i = 0; i < REF_FRAMES; ++i) { svc->fb_idx_spatial_layer_id[i] = -1; @@ -329,6 +330,7 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) { LAYER_CONTEXT *const lc = get_layer_context(cpi); const int old_frame_since_key = cpi->rc.frames_since_key; const int old_frame_to_key = cpi->rc.frames_to_key; + const int old_ext_use_post_encode_drop = cpi->rc.ext_use_post_encode_drop; cpi->rc = lc->rc; cpi->twopass = lc->twopass; @@ -346,7 +348,7 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) { cpi->rc.frames_since_key = old_frame_since_key; cpi->rc.frames_to_key = old_frame_to_key; } - + cpi->rc.ext_use_post_encode_drop = old_ext_use_post_encode_drop; // For spatial-svc, allow cyclic-refresh to be applied on the spatial layers, // for the base temporal layer. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && @@ -736,6 +738,8 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) { } svc->force_zero_mode_spatial_ref = 1; svc->mi_stride[svc->spatial_layer_id] = cpi->common.mi_stride; + svc->mi_rows[svc->spatial_layer_id] = cpi->common.mi_rows; + svc->mi_cols[svc->spatial_layer_id] = cpi->common.mi_cols; if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) { set_flags_and_fb_idx_for_temporal_mode3(cpi); @@ -931,7 +935,7 @@ void vp9_free_svc_cyclic_refresh(VP9_COMP *const cpi) { } // Reset on key frame: reset counters, references and buffer updates. -void vp9_svc_reset_key_frame(VP9_COMP *const cpi) { +void vp9_svc_reset_temporal_layers(VP9_COMP *const cpi, int is_key) { int sl, tl; SVC *const svc = &cpi->svc; LAYER_CONTEXT *lc = NULL; @@ -939,7 +943,7 @@ void vp9_svc_reset_key_frame(VP9_COMP *const cpi) { for (tl = 0; tl < svc->number_temporal_layers; ++tl) { lc = &cpi->svc.layer_context[sl * svc->number_temporal_layers + tl]; lc->current_video_frame_in_layer = 0; - lc->frames_from_key_frame = 0; + if (is_key) lc->frames_from_key_frame = 0; } } if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) { @@ -1089,13 +1093,16 @@ void vp9_svc_assert_constraints_pattern(VP9_COMP *const cpi) { } } else if (svc->use_gf_temporal_ref_current_layer && !svc->layer_context[svc->temporal_layer_id].is_key_frame) { - // If the usage of golden as second long term reference is enabled for this - // layer, then temporal_layer_id of that reference must be base temporal - // layer 0, and spatial_layer_id of that reference must be same as current - // spatial_layer_id. - assert(svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] == - svc->spatial_layer_id); - assert(svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] == 0); + // For the usage of golden as second long term reference: the + // temporal_layer_id of that reference must be base temporal layer 0, and + // spatial_layer_id of that reference must be same as current + // spatial_layer_id. If not, disable feature. + // TODO(marpan): Investigate when this can happen, and maybe put this check + // and reset in a different place. + if (svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] != + svc->spatial_layer_id || + svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] != 0) + svc->use_gf_temporal_ref_current_layer = 0; } } @@ -1107,7 +1114,8 @@ void vp9_svc_check_spatial_layer_sync(VP9_COMP *const cpi) { if (svc->spatial_layer_id == 0) { // On base spatial layer: if the current superframe has a layer sync then // reset the pattern counters and reset to base temporal layer. - if (svc->superframe_has_layer_sync) vp9_svc_reset_key_frame(cpi); + if (svc->superframe_has_layer_sync) + vp9_svc_reset_temporal_layers(cpi, cpi->common.frame_type == KEY_FRAME); } // If the layer sync is set for this current spatial layer then // disable the temporal reference. diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.h index fceab7780bb..94531204497 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.h @@ -125,6 +125,8 @@ typedef struct SVC { BLOCK_SIZE *prev_partition_svc; int mi_stride[VPX_MAX_LAYERS]; + int mi_rows[VPX_MAX_LAYERS]; + int mi_cols[VPX_MAX_LAYERS]; int first_layer_denoise; @@ -178,9 +180,14 @@ typedef struct SVC { int first_spatial_layer_to_encode; + // Parameters for allowing framerate per spatial layer, and buffer + // update based on timestamps. int64_t duration[VPX_SS_MAX_LAYERS]; - int64_t timebase_fac; + int64_t time_stamp_superframe; + int64_t time_stamp_prev[VPX_SS_MAX_LAYERS]; + + int num_encoded_top_layer; } SVC; struct VP9_COMP; @@ -234,7 +241,7 @@ int vp9_one_pass_cbr_svc_start_layer(struct VP9_COMP *const cpi); void vp9_free_svc_cyclic_refresh(struct VP9_COMP *const cpi); -void vp9_svc_reset_key_frame(struct VP9_COMP *const cpi); +void vp9_svc_reset_temporal_layers(struct VP9_COMP *const cpi, int is_key); void vp9_svc_check_reset_layer_rc_flag(struct VP9_COMP *const cpi); diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.c index 51668d01d61..4c1d8894b41 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.c @@ -119,8 +119,13 @@ static void apply_temporal_filter( unsigned int i, j, k, m; int modifier; const int rounding = (1 << strength) >> 1; - const int uv_block_width = block_width >> ss_x; - const int uv_block_height = block_height >> ss_y; + const unsigned int uv_block_width = block_width >> ss_x; + const unsigned int uv_block_height = block_height >> ss_y; + DECLARE_ALIGNED(16, uint16_t, y_diff_sse[256]); + DECLARE_ALIGNED(16, uint16_t, u_diff_sse[256]); + DECLARE_ALIGNED(16, uint16_t, v_diff_sse[256]); + + int idx = 0, idy; assert(strength >= 0); assert(strength <= 6); @@ -128,19 +133,42 @@ static void apply_temporal_filter( assert(filter_weight >= 0); assert(filter_weight <= 2); + memset(y_diff_sse, 0, 256 * sizeof(uint16_t)); + memset(u_diff_sse, 0, 256 * sizeof(uint16_t)); + memset(v_diff_sse, 0, 256 * sizeof(uint16_t)); + + // Calculate diff^2 for each pixel of the 16x16 block. + // TODO(yunqing): the following code needs to be optimized. + for (i = 0; i < block_height; i++) { + for (j = 0; j < block_width; j++) { + const int16_t diff = + y_frame1[i * (int)y_stride + j] - y_pred[i * (int)block_width + j]; + y_diff_sse[idx++] = diff * diff; + } + } + idx = 0; + for (i = 0; i < uv_block_height; i++) { + for (j = 0; j < uv_block_width; j++) { + const int16_t diffu = + u_frame1[i * uv_stride + j] - u_pred[i * uv_buf_stride + j]; + const int16_t diffv = + v_frame1[i * uv_stride + j] - v_pred[i * uv_buf_stride + j]; + u_diff_sse[idx] = diffu * diffu; + v_diff_sse[idx] = diffv * diffv; + idx++; + } + } + for (i = 0, k = 0, m = 0; i < block_height; i++) { for (j = 0; j < block_width; j++) { const int pixel_value = y_pred[i * y_buf_stride + j]; // non-local mean approach - int diff_sse[9] = { 0 }; - int idx, idy; int y_index = 0; const int uv_r = i >> ss_y; const int uv_c = j >> ss_x; - - int diff; + modifier = 0; for (idy = -1; idy <= 1; ++idy) { for (idx = -1; idx <= 1; ++idx) { @@ -149,9 +177,7 @@ static void apply_temporal_filter( if (row >= 0 && row < (int)block_height && col >= 0 && col < (int)block_width) { - const int diff = y_frame1[row * (int)y_stride + col] - - y_pred[row * (int)block_width + col]; - diff_sse[y_index] = diff * diff; + modifier += y_diff_sse[row * (int)block_width + col]; ++y_index; } } @@ -159,16 +185,8 @@ static void apply_temporal_filter( assert(y_index > 0); - modifier = 0; - for (idx = 0; idx < 9; ++idx) modifier += diff_sse[idx]; - - diff = u_frame1[uv_r * uv_stride + uv_c] - - u_pred[uv_r * uv_buf_stride + uv_c]; - modifier += diff * diff; - - diff = v_frame1[uv_r * uv_stride + uv_c] - - v_pred[uv_r * uv_buf_stride + uv_c]; - modifier += diff * diff; + modifier += u_diff_sse[uv_r * uv_block_width + uv_c]; + modifier += v_diff_sse[uv_r * uv_block_width + uv_c]; y_index += 2; @@ -186,9 +204,6 @@ static void apply_temporal_filter( const int v_pixel_value = v_pred[uv_r * uv_buf_stride + uv_c]; // non-local mean approach - int u_diff_sse[9] = { 0 }; - int v_diff_sse[9] = { 0 }; - int idx, idy; int cr_index = 0; int u_mod = 0, v_mod = 0; int y_diff = 0; @@ -198,16 +213,10 @@ static void apply_temporal_filter( const int row = uv_r + idy; const int col = uv_c + idx; - if (row >= 0 && row < uv_block_height && col >= 0 && - col < uv_block_width) { - int diff = u_frame1[row * uv_stride + col] - - u_pred[row * uv_buf_stride + col]; - u_diff_sse[cr_index] = diff * diff; - - diff = v_frame1[row * uv_stride + col] - - v_pred[row * uv_buf_stride + col]; - v_diff_sse[cr_index] = diff * diff; - + if (row >= 0 && row < (int)uv_block_height && col >= 0 && + col < (int)uv_block_width) { + u_mod += u_diff_sse[row * uv_block_width + col]; + v_mod += v_diff_sse[row * uv_block_width + col]; ++cr_index; } } @@ -215,18 +224,11 @@ static void apply_temporal_filter( assert(cr_index > 0); - for (idx = 0; idx < 9; ++idx) { - u_mod += u_diff_sse[idx]; - v_mod += v_diff_sse[idx]; - } - for (idy = 0; idy < 1 + ss_y; ++idy) { for (idx = 0; idx < 1 + ss_x; ++idx) { const int row = (uv_r << ss_y) + idy; const int col = (uv_c << ss_x) + idx; - const int diff = y_frame1[row * (int)y_stride + col] - - y_pred[row * (int)block_width + col]; - y_diff += diff * diff; + y_diff += y_diff_sse[row * (int)block_width + col]; ++cr_index; } } @@ -325,13 +327,23 @@ void vp9_highbd_temporal_filter_apply_c( const uint16_t *frame2 = CONVERT_TO_SHORTPTR(frame2_8); unsigned int i, j, k; int modifier; - int byte = 0; const int rounding = strength > 0 ? 1 << (strength - 1) : 0; + int diff_sse[256] = { 0 }; + int this_idx = 0; + + for (i = 0; i < block_height; i++) { + for (j = 0; j < block_width; j++) { + const int diff = + frame1[i * (int)stride + j] - frame2[i * (int)block_width + j]; + diff_sse[this_idx++] = diff * diff; + } + } + + modifier = 0; for (i = 0, k = 0; i < block_height; i++) { for (j = 0; j < block_width; j++, k++) { - int pixel_value = *frame2; - int diff_sse[9] = { 0 }; + int pixel_value = frame2[i * (int)block_width + j]; int idx, idy, index = 0; for (idy = -1; idy <= 1; ++idy) { @@ -341,22 +353,16 @@ void vp9_highbd_temporal_filter_apply_c( if (row >= 0 && row < (int)block_height && col >= 0 && col < (int)block_width) { - int diff = frame1[byte + idy * (int)stride + idx] - - frame2[idy * (int)block_width + idx]; - diff_sse[index] = diff * diff; + modifier += diff_sse[row * (int)block_width + col]; ++index; } } } assert(index > 0); - modifier = 0; - for (idx = 0; idx < 9; ++idx) modifier += diff_sse[idx]; - modifier *= 3; modifier /= index; - ++frame2; modifier += rounding; modifier >>= strength; @@ -367,11 +373,7 @@ void vp9_highbd_temporal_filter_apply_c( count[k] += modifier; accumulator[k] += modifier * pixel_value; - - byte++; } - - byte += stride - block_width; } } #endif // CONFIG_VP9_HIGHBITDEPTH @@ -421,12 +423,13 @@ static uint32_t temporal_filter_find_matching_mb_c(VP9_COMP *cpi, /* restore UMV window */ x->mv_limits = tmp_mv_limits; + // TODO(yunqing): may use higher tap interp filter than 2 taps if needed. // Ignore mv costing by sending NULL pointer instead of cost array bestsme = cpi->find_fractional_mv_step( x, ref_mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], 0, mv_sf->subpel_search_level, - cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, - 0); + cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 16, + 16, USE_8_TAPS_SHARP); // Restore input state x->plane[0].src = src; @@ -949,8 +952,7 @@ void vp9_temporal_filter(VP9_COMP *cpi, int distance) { } // Initialize errorperbit and sabperbit. - rdmult = (int)vp9_compute_rd_mult_based_on_qindex(cpi, ARNR_FILT_QINDEX); - if (rdmult < 1) rdmult = 1; + rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, ARNR_FILT_QINDEX); set_error_per_bit(&cpi->td.mb, rdmult); vp9_initialize_me_consts(cpi, &cpi->td.mb, ARNR_FILT_QINDEX); diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_intrin_sse2.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_intrin_sse2.c index 293cdcd675a..0cecd654019 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_intrin_sse2.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_intrin_sse2.c @@ -185,8 +185,8 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, - uint16_t *eob_ptr, const int16_t *scan_ptr, - const int16_t *iscan_ptr) { + uint16_t *eob_ptr, const int16_t *scan, + const int16_t *iscan) { __m128i zero; int pass; @@ -215,7 +215,7 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride, __m128i *in[8]; int index = 0; - (void)scan_ptr; + (void)scan; (void)coeff_ptr; // Pre-condition input (shift by two) @@ -449,7 +449,7 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride, in7 = _mm_srai_epi16(in7, 1); } - iscan_ptr += n_coeffs; + iscan += n_coeffs; qcoeff_ptr += n_coeffs; dqcoeff_ptr += n_coeffs; n_coeffs = -n_coeffs; @@ -518,8 +518,8 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride, zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero); nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero); nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero); - iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs)); - iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1); + iscan0 = _mm_load_si128((const __m128i *)(iscan + n_coeffs)); + iscan1 = _mm_load_si128((const __m128i *)(iscan + n_coeffs) + 1); // Add one to convert from indices to counts iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0); iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1); @@ -582,8 +582,8 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride, zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero); nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero); nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero); - iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs)); - iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1); + iscan0 = _mm_load_si128((const __m128i *)(iscan + n_coeffs)); + iscan1 = _mm_load_si128((const __m128i *)(iscan + n_coeffs) + 1); // Add one to convert from indices to counts iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0); iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1); diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c index bf874a09ec5..99c19389486 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c @@ -18,11 +18,13 @@ #include "vpx_dsp/x86/inv_txfm_sse2.h" #include "vpx_dsp/x86/txfm_common_sse2.h" -void vp9_fdct8x8_quant_ssse3( - const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, - int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, - tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - uint16_t *eob_ptr, const int16_t *scan_ptr, const int16_t *iscan_ptr) { +void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride, + tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *round_ptr, + const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + const int16_t *dequant_ptr, uint16_t *eob_ptr, + const int16_t *scan, const int16_t *iscan) { __m128i zero; int pass; @@ -52,7 +54,7 @@ void vp9_fdct8x8_quant_ssse3( __m128i *in[8]; int index = 0; - (void)scan_ptr; + (void)scan; (void)coeff_ptr; // Pre-condition input (shift by two) @@ -280,7 +282,7 @@ void vp9_fdct8x8_quant_ssse3( in7 = _mm_srai_epi16(in7, 1); } - iscan_ptr += n_coeffs; + iscan += n_coeffs; qcoeff_ptr += n_coeffs; dqcoeff_ptr += n_coeffs; n_coeffs = -n_coeffs; @@ -350,8 +352,8 @@ void vp9_fdct8x8_quant_ssse3( zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero); nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero); nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero); - iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs)); - iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1); + iscan0 = _mm_load_si128((const __m128i *)(iscan + n_coeffs)); + iscan1 = _mm_load_si128((const __m128i *)(iscan + n_coeffs) + 1); // Add one to convert from indices to counts iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0); iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1); @@ -427,8 +429,8 @@ void vp9_fdct8x8_quant_ssse3( zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero); nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero); nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero); - iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs)); - iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1); + iscan0 = _mm_load_si128((const __m128i *)(iscan + n_coeffs)); + iscan1 = _mm_load_si128((const __m128i *)(iscan + n_coeffs) + 1); // Add one to convert from indices to counts iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0); iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1); diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_avx2.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_avx2.c index 4bebc34d676..8dfdbd50f6c 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_avx2.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_avx2.c @@ -15,7 +15,7 @@ #include "vpx/vpx_integer.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_dsp/x86/bitdepth_conversion_avx2.h" -#include "vpx_dsp/x86/quantize_x86.h" +#include "vpx_dsp/x86/quantize_sse2.h" // Zero fill 8 positions in the output buffer. static INLINE void store_zero_tran_low(tran_low_t *a) { @@ -50,18 +50,18 @@ void vp9_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - uint16_t *eob_ptr, const int16_t *scan_ptr, - const int16_t *iscan_ptr) { + uint16_t *eob_ptr, const int16_t *scan, + const int16_t *iscan) { __m128i eob; __m256i round256, quant256, dequant256; __m256i eob256, thr256; - (void)scan_ptr; + (void)scan; (void)skip_block; assert(!skip_block); coeff_ptr += n_coeffs; - iscan_ptr += n_coeffs; + iscan += n_coeffs; qcoeff_ptr += n_coeffs; dqcoeff_ptr += n_coeffs; n_coeffs = -n_coeffs; @@ -97,7 +97,7 @@ void vp9_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, store_tran_low(coeff256, dqcoeff_ptr + n_coeffs); } - eob256 = scan_eob_256((const __m256i *)(iscan_ptr + n_coeffs), &coeff256); + eob256 = scan_eob_256((const __m256i *)(iscan + n_coeffs), &coeff256); n_coeffs += 8 * 2; } @@ -124,8 +124,7 @@ void vp9_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, coeff256 = _mm256_mullo_epi16(qcoeff256, dequant256); store_tran_low(coeff256, dqcoeff_ptr + n_coeffs); eob256 = _mm256_max_epi16( - eob256, - scan_eob_256((const __m256i *)(iscan_ptr + n_coeffs), &coeff256)); + eob256, scan_eob_256((const __m256i *)(iscan + n_coeffs), &coeff256)); } else { store_zero_tran_low(qcoeff_ptr + n_coeffs); store_zero_tran_low(dqcoeff_ptr + n_coeffs); diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c index ca0ad4407e5..885220a7129 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c @@ -21,20 +21,20 @@ void vp9_quantize_fp_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - uint16_t *eob_ptr, const int16_t *scan_ptr, - const int16_t *iscan_ptr) { + uint16_t *eob_ptr, const int16_t *scan, + const int16_t *iscan) { __m128i zero; __m128i thr; int16_t nzflag; __m128i eob; __m128i round, quant, dequant; - (void)scan_ptr; + (void)scan; (void)skip_block; assert(!skip_block); coeff_ptr += n_coeffs; - iscan_ptr += n_coeffs; + iscan += n_coeffs; qcoeff_ptr += n_coeffs; dqcoeff_ptr += n_coeffs; n_coeffs = -n_coeffs; @@ -100,8 +100,8 @@ void vp9_quantize_fp_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero); nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero); nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero); - iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs)); - iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1); + iscan0 = _mm_load_si128((const __m128i *)(iscan + n_coeffs)); + iscan1 = _mm_load_si128((const __m128i *)(iscan + n_coeffs) + 1); // Add one to convert from indices to counts iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0); iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1); @@ -175,8 +175,8 @@ void vp9_quantize_fp_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero); nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero); nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero); - iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs)); - iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1); + iscan0 = _mm_load_si128((const __m128i *)(iscan + n_coeffs)); + iscan1 = _mm_load_si128((const __m128i *)(iscan + n_coeffs) + 1); // Add one to convert from indices to counts iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0); iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1); diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c index 3b2d9a86617..85f83a66249 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c @@ -1151,6 +1151,7 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, unsigned char *cx_data; cpi->svc.timebase_fac = timebase_units_to_ticks(timebase, 1); + cpi->svc.time_stamp_superframe = dst_time_stamp; // Set up internal flags if (ctx->base.init_flags & VPX_CODEC_USE_PSNR) cpi->b_calculate_psnr = 1; @@ -1625,6 +1626,14 @@ static vpx_codec_err_t ctrl_set_render_size(vpx_codec_alg_priv_t *ctx, return update_extra_cfg(ctx, &extra_cfg); } +static vpx_codec_err_t ctrl_set_postencode_drop(vpx_codec_alg_priv_t *ctx, + va_list args) { + VP9_COMP *const cpi = ctx->cpi; + const unsigned int data = va_arg(args, unsigned int); + cpi->rc.ext_use_post_encode_drop = data; + return VPX_CODEC_OK; +} + static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = { { VP8_COPY_REFERENCE, ctrl_copy_reference }, @@ -1668,6 +1677,7 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = { { VP9E_SET_RENDER_SIZE, ctrl_set_render_size }, { VP9E_SET_TARGET_LEVEL, ctrl_set_target_level }, { VP9E_SET_ROW_MT, ctrl_set_row_mt }, + { VP9E_SET_POSTENCODE_DROP, ctrl_set_postencode_drop }, { VP9E_ENABLE_MOTION_VECTOR_UNIT_TEST, ctrl_enable_motion_vector_unit_test }, { VP9E_SET_SVC_INTER_LAYER_PRED, ctrl_set_svc_inter_layer_pred }, { VP9E_SET_SVC_FRAME_DROP_LAYER, ctrl_set_svc_frame_drop_layer }, @@ -1690,7 +1700,7 @@ static vpx_codec_enc_cfg_map_t encoder_usage_cfg_map[] = { { 0, { // NOLINT - 0, // g_usage + 0, // g_usage (unused) 8, // g_threads 0, // g_profile diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.c b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.c index fdff877682a..6a4cb9acf6f 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.c @@ -270,6 +270,9 @@ static vpx_codec_err_t init_decoder(vpx_codec_alg_priv_t *ctx) { RANGE_CHECK(ctx, row_mt, 0, 1); ctx->pbi->row_mt = ctx->row_mt; + RANGE_CHECK(ctx, lpf_opt, 0, 1); + ctx->pbi->lpf_mt_opt = ctx->lpf_opt; + // If postprocessing was enabled by the application and a // configuration has not been provided, default it. if (!ctx->postproc_cfg_set && (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)) @@ -658,6 +661,13 @@ static vpx_codec_err_t ctrl_set_row_mt(vpx_codec_alg_priv_t *ctx, return VPX_CODEC_OK; } +static vpx_codec_err_t ctrl_enable_lpf_opt(vpx_codec_alg_priv_t *ctx, + va_list args) { + ctx->lpf_opt = va_arg(args, int); + + return VPX_CODEC_OK; +} + static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = { { VP8_COPY_REFERENCE, ctrl_copy_reference }, @@ -670,6 +680,7 @@ static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = { { VP9_SET_SKIP_LOOP_FILTER, ctrl_set_skip_loop_filter }, { VP9_DECODE_SVC_SPATIAL_LAYER, ctrl_set_spatial_layer_svc }, { VP9D_SET_ROW_MT, ctrl_set_row_mt }, + { VP9D_SET_LOOP_FILTER_OPT, ctrl_enable_lpf_opt }, // Getters { VPXD_GET_LAST_QUANTIZER, ctrl_get_quantizer }, diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.h b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.h index a1c335278d2..f60688c4db2 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.h @@ -46,6 +46,7 @@ struct vpx_codec_alg_priv { int svc_decoding; int svc_spatial_layer; int row_mt; + int lpf_opt; }; #endif // VPX_VP9_VP9_DX_IFACE_H_ |