summaryrefslogtreecommitdiff
path: root/chromium/third_party/libvpx/source/libvpx/vp9
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/third_party/libvpx/source/libvpx/vp9')
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymv.h4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.c18
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.h3
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter.c22
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter.h4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_onyxc_int.h9
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_postproc.h2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_reconinter.h6
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_scale.h2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_thread_common.c157
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_thread_common.h19
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.c122
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.c41
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.h22
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_dct_neon.c8
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c26
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/ppc/vp9_quantize_vsx.c31
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_block.h2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_denoiser.c8
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c123
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.h2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c696
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h46
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c166
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.h22
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mbgraph.c3
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c254
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h6
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_noise_estimate.c4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c27
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c202
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.h20
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.c91
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h9
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c12
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_resize.c8
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c59
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.h11
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.c30
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.h11
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.c118
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_intrin_sse2.c16
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c24
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_avx2.c15
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c16
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c12
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.c11
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.h1
49 files changed, 1812 insertions, 713 deletions
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymv.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymv.h
index dcc8e299899..ee9d37973ff 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymv.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymv.h
@@ -25,7 +25,7 @@ struct VP9Common;
void vp9_init_mv_probs(struct VP9Common *cm);
-void vp9_adapt_mv_probs(struct VP9Common *cm, int usehp);
+void vp9_adapt_mv_probs(struct VP9Common *cm, int allow_hp);
static INLINE int use_mv_hp(const MV *ref) {
const int kMvRefThresh = 64; // threshold for use of high-precision 1/8 mv
@@ -127,7 +127,7 @@ typedef struct {
nmv_component_counts comps[2];
} nmv_context_counts;
-void vp9_inc_mv(const MV *mv, nmv_context_counts *mvctx);
+void vp9_inc_mv(const MV *mv, nmv_context_counts *counts);
#ifdef __cplusplus
} // extern "C"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.c
index 6c43af8ce80..adbda6c825b 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.c
@@ -63,6 +63,20 @@ DECLARE_ALIGNED(256, static const InterpKernel,
{ 0, -3, 2, 41, 63, 29, -2, -2 }, { 0, -3, 1, 38, 64, 32, -1, -3 }
};
-const InterpKernel *vp9_filter_kernels[4] = {
- sub_pel_filters_8, sub_pel_filters_8lp, sub_pel_filters_8s, bilinear_filters
+// 4-tap filter
+DECLARE_ALIGNED(256, static const InterpKernel,
+ sub_pel_filters_4[SUBPEL_SHIFTS]) = {
+ { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, -4, 126, 8, -2, 0, 0 },
+ { 0, 0, -6, 120, 18, -4, 0, 0 }, { 0, 0, -8, 114, 28, -6, 0, 0 },
+ { 0, 0, -10, 108, 36, -6, 0, 0 }, { 0, 0, -12, 102, 46, -8, 0, 0 },
+ { 0, 0, -12, 94, 56, -10, 0, 0 }, { 0, 0, -12, 84, 66, -10, 0, 0 },
+ { 0, 0, -12, 76, 76, -12, 0, 0 }, { 0, 0, -10, 66, 84, -12, 0, 0 },
+ { 0, 0, -10, 56, 94, -12, 0, 0 }, { 0, 0, -8, 46, 102, -12, 0, 0 },
+ { 0, 0, -6, 36, 108, -10, 0, 0 }, { 0, 0, -6, 28, 114, -8, 0, 0 },
+ { 0, 0, -4, 18, 120, -6, 0, 0 }, { 0, 0, -2, 8, 126, -4, 0, 0 }
+};
+
+const InterpKernel *vp9_filter_kernels[5] = {
+ sub_pel_filters_8, sub_pel_filters_8lp, sub_pel_filters_8s, bilinear_filters,
+ sub_pel_filters_4
};
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.h
index b379665b1c1..0382c88e7c0 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.h
@@ -25,6 +25,7 @@ extern "C" {
#define EIGHTTAP_SHARP 2
#define SWITCHABLE_FILTERS 3 /* Number of switchable filters */
#define BILINEAR 3
+#define FOURTAP 4
// The codec can operate in four possible inter prediction filter mode:
// 8-tap, 8-tap-smooth, 8-tap-sharp, and switching between the three.
#define SWITCHABLE_FILTER_CONTEXTS (SWITCHABLE_FILTERS + 1)
@@ -32,7 +33,7 @@ extern "C" {
typedef uint8_t INTERP_FILTER;
-extern const InterpKernel *vp9_filter_kernels[4];
+extern const InterpKernel *vp9_filter_kernels[5];
#ifdef __cplusplus
} // extern "C"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter.c
index da9180b71a5..95d6029f3b5 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter.c
@@ -880,12 +880,12 @@ void vp9_adjust_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
// This function sets up the bit masks for the entire 64x64 region represented
// by mi_row, mi_col.
void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
- MODE_INFO **mi, const int mode_info_stride,
+ MODE_INFO **mi8x8, const int mode_info_stride,
LOOP_FILTER_MASK *lfm) {
int idx_32, idx_16, idx_8;
const loop_filter_info_n *const lfi_n = &cm->lf_info;
- MODE_INFO **mip = mi;
- MODE_INFO **mip2 = mi;
+ MODE_INFO **mip = mi8x8;
+ MODE_INFO **mip2 = mi8x8;
// These are offsets to the next mi in the 64x64 block. It is what gets
// added to the mi ptr as we go through each loop. It helps us to avoid
@@ -1087,13 +1087,19 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm,
const int row_step_stride = cm->mi_stride * row_step;
struct buf_2d *const dst = &plane->dst;
uint8_t *const dst0 = dst->buf;
- unsigned int mask_16x16[MI_BLOCK_SIZE] = { 0 };
- unsigned int mask_8x8[MI_BLOCK_SIZE] = { 0 };
- unsigned int mask_4x4[MI_BLOCK_SIZE] = { 0 };
- unsigned int mask_4x4_int[MI_BLOCK_SIZE] = { 0 };
+ unsigned int mask_16x16[MI_BLOCK_SIZE];
+ unsigned int mask_8x8[MI_BLOCK_SIZE];
+ unsigned int mask_4x4[MI_BLOCK_SIZE];
+ unsigned int mask_4x4_int[MI_BLOCK_SIZE];
uint8_t lfl[MI_BLOCK_SIZE * MI_BLOCK_SIZE];
int r, c;
+ vp9_zero(mask_16x16);
+ vp9_zero(mask_8x8);
+ vp9_zero(mask_4x4);
+ vp9_zero(mask_4x4_int);
+ vp9_zero(lfl);
+
for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) {
unsigned int mask_16x16_c = 0;
unsigned int mask_8x8_c = 0;
@@ -1330,6 +1336,8 @@ void vp9_filter_block_plane_ss11(VP9_COMMON *const cm,
uint16_t mask_4x4 = lfm->left_uv[TX_4X4];
uint16_t mask_4x4_int = lfm->int_4x4_uv;
+ vp9_zero(lfl_uv);
+
assert(plane->subsampling_x == 1 && plane->subsampling_y == 1);
// Vertical pass: do 2 rows at one time
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter.h
index daf3b91315e..39648a72c32 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter.h
@@ -97,7 +97,7 @@ struct VP9LfSyncData;
// This function sets up the bit masks for the entire 64x64 region represented
// by mi_row, mi_col.
void vp9_setup_mask(struct VP9Common *const cm, const int mi_row,
- const int mi_col, MODE_INFO **mi_8x8,
+ const int mi_col, MODE_INFO **mi8x8,
const int mode_info_stride, LOOP_FILTER_MASK *lfm);
void vp9_filter_block_plane_ss00(struct VP9Common *const cm,
@@ -120,7 +120,7 @@ void vp9_loop_filter_init(struct VP9Common *cm);
void vp9_loop_filter_frame_init(struct VP9Common *cm, int default_filt_lvl);
void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame, struct VP9Common *cm,
- struct macroblockd *mbd, int filter_level,
+ struct macroblockd *xd, int frame_filter_level,
int y_only, int partial_frame);
// Get the superblock lfm for a given mi_row, mi_col.
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_onyxc_int.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_onyxc_int.h
index 45d3b0f82f3..662b8ef5e12 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_onyxc_int.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_onyxc_int.h
@@ -37,10 +37,9 @@ extern "C" {
#define REF_FRAMES_LOG2 3
#define REF_FRAMES (1 << REF_FRAMES_LOG2)
-// 1 scratch frame for the new frame, 3 for scaled references on the encoder.
-// TODO(jkoleszar): These 3 extra references could probably come from the
-// normal reference pool.
-#define FRAME_BUFFERS (REF_FRAMES + 4)
+// 1 scratch frame for the new frame, REFS_PER_FRAME for scaled references on
+// the encoder.
+#define FRAME_BUFFERS (REF_FRAMES + 1 + REFS_PER_FRAME)
#define FRAME_CONTEXTS_LOG2 2
#define FRAME_CONTEXTS (1 << FRAME_CONTEXTS_LOG2)
@@ -259,6 +258,8 @@ typedef struct VP9Common {
PARTITION_CONTEXT *above_seg_context;
ENTROPY_CONTEXT *above_context;
int above_context_alloc_cols;
+
+ int lf_row;
} VP9_COMMON;
static INLINE YV12_BUFFER_CONFIG *get_buf_frame(VP9_COMMON *cm, int index) {
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_postproc.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_postproc.h
index 0aafa72ca8a..67efc1b4e4b 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_postproc.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_postproc.h
@@ -38,7 +38,7 @@ struct VP9Common;
#define MFQE_PRECISION 4
int vp9_post_proc_frame(struct VP9Common *cm, YV12_BUFFER_CONFIG *dest,
- vp9_ppflags_t *flags, int unscaled_width);
+ vp9_ppflags_t *ppflags, int unscaled_width);
void vp9_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q,
uint8_t *limits);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_reconinter.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_reconinter.h
index 2c6d6695aba..992e30c344b 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_reconinter.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_reconinter.h
@@ -61,15 +61,15 @@ void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col,
BLOCK_SIZE bsize);
void vp9_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, const MV *mv_q3,
+ int dst_stride, const MV *src_mv,
const struct scale_factors *sf, int w, int h,
- int do_avg, const InterpKernel *kernel,
+ int ref, const InterpKernel *kernel,
enum mv_precision precision, int x, int y);
#if CONFIG_VP9_HIGHBITDEPTH
void vp9_highbd_build_inter_predictor(
const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride,
- const MV *mv_q3, const struct scale_factors *sf, int w, int h, int do_avg,
+ const MV *src_mv, const struct scale_factors *sf, int w, int h, int ref,
const InterpKernel *kernel, enum mv_precision precision, int x, int y,
int bd);
#endif
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl
index 6d7f9526098..d7ad2b693bc 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl
@@ -62,7 +62,7 @@ add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, i
add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride, int tx_type";
-add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
+add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride, int tx_type";
if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") {
# Note that there are more specializations appended when
@@ -100,7 +100,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp9_highbd_iht8x8_64_add/, "const tran_low_t *input, uint16_t *dest, int stride, int tx_type, int bd";
- add_proto qw/void vp9_highbd_iht16x16_256_add/, "const tran_low_t *input, uint16_t *output, int pitch, int tx_type, int bd";
+ add_proto qw/void vp9_highbd_iht16x16_256_add/, "const tran_low_t *input, uint16_t *dest, int stride, int tx_type, int bd";
if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") {
specialize qw/vp9_highbd_iht4x4_16_add neon sse4_1/;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_scale.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_scale.h
index 53c6eef7256..aaafdf86719 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_scale.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_scale.h
@@ -42,7 +42,7 @@ MV32 vp9_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf);
#if CONFIG_VP9_HIGHBITDEPTH
void vp9_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w,
int other_h, int this_w, int this_h,
- int use_high);
+ int use_highbd);
#else
void vp9_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w,
int other_h, int this_w, int this_h);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_thread_common.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_thread_common.c
index d4b076645fb..36530fae677 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_thread_common.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_thread_common.c
@@ -229,6 +229,28 @@ void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, VP9_COMMON *cm,
workers, num_workers, lf_sync);
}
+void vp9_lpf_mt_init(VP9LfSync *lf_sync, VP9_COMMON *cm, int frame_filter_level,
+ int num_workers) {
+ const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
+
+ if (!frame_filter_level) return;
+
+ if (!lf_sync->sync_range || sb_rows != lf_sync->rows ||
+ num_workers > lf_sync->num_workers) {
+ vp9_loop_filter_dealloc(lf_sync);
+ vp9_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers);
+ }
+
+ // Initialize cur_sb_col to -1 for all SB rows.
+ memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
+
+ lf_sync->corrupted = 0;
+
+ memset(lf_sync->num_tiles_done, 0,
+ sizeof(*lf_sync->num_tiles_done) * sb_rows);
+ cm->lf_row = 0;
+}
+
// Set up nsync by width.
static INLINE int get_sync_range(int width) {
// nsync numbers are picked by testing. For example, for 4k
@@ -266,6 +288,25 @@ void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows,
pthread_cond_init(&lf_sync->cond[i], NULL);
}
}
+ pthread_mutex_init(&lf_sync->lf_mutex, NULL);
+
+ CHECK_MEM_ERROR(cm, lf_sync->recon_done_mutex,
+ vpx_malloc(sizeof(*lf_sync->recon_done_mutex) * rows));
+ if (lf_sync->recon_done_mutex) {
+ int i;
+ for (i = 0; i < rows; ++i) {
+ pthread_mutex_init(&lf_sync->recon_done_mutex[i], NULL);
+ }
+ }
+
+ CHECK_MEM_ERROR(cm, lf_sync->recon_done_cond,
+ vpx_malloc(sizeof(*lf_sync->recon_done_cond) * rows));
+ if (lf_sync->recon_done_cond) {
+ int i;
+ for (i = 0; i < rows; ++i) {
+ pthread_cond_init(&lf_sync->recon_done_cond[i], NULL);
+ }
+ }
}
#endif // CONFIG_MULTITHREAD
@@ -276,6 +317,11 @@ void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows,
CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col,
vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows));
+ CHECK_MEM_ERROR(cm, lf_sync->num_tiles_done,
+ vpx_malloc(sizeof(*lf_sync->num_tiles_done) *
+ mi_cols_aligned_to_sb(cm->mi_rows) >>
+ MI_BLOCK_SIZE_LOG2));
+
// Set up nsync.
lf_sync->sync_range = get_sync_range(width);
}
@@ -298,15 +344,126 @@ void vp9_loop_filter_dealloc(VP9LfSync *lf_sync) {
}
vpx_free(lf_sync->cond);
}
+ if (lf_sync->recon_done_mutex != NULL) {
+ int i;
+ for (i = 0; i < lf_sync->rows; ++i) {
+ pthread_mutex_destroy(&lf_sync->recon_done_mutex[i]);
+ }
+ vpx_free(lf_sync->recon_done_mutex);
+ }
+
+ pthread_mutex_destroy(&lf_sync->lf_mutex);
+ if (lf_sync->recon_done_cond != NULL) {
+ int i;
+ for (i = 0; i < lf_sync->rows; ++i) {
+ pthread_cond_destroy(&lf_sync->recon_done_cond[i]);
+ }
+ vpx_free(lf_sync->recon_done_cond);
+ }
#endif // CONFIG_MULTITHREAD
+
vpx_free(lf_sync->lfdata);
vpx_free(lf_sync->cur_sb_col);
+ vpx_free(lf_sync->num_tiles_done);
// clear the structure as the source of this call may be a resize in which
// case this call will be followed by an _alloc() which may fail.
vp9_zero(*lf_sync);
}
}
+static int get_next_row(VP9_COMMON *cm, VP9LfSync *lf_sync) {
+ int return_val = -1;
+ int cur_row;
+ const int max_rows = cm->mi_rows;
+
+#if CONFIG_MULTITHREAD
+ const int tile_cols = 1 << cm->log2_tile_cols;
+
+ pthread_mutex_lock(&lf_sync->lf_mutex);
+ if (cm->lf_row < max_rows) {
+ cur_row = cm->lf_row >> MI_BLOCK_SIZE_LOG2;
+ return_val = cm->lf_row;
+ cm->lf_row += MI_BLOCK_SIZE;
+ if (cm->lf_row < max_rows) {
+ /* If this is not the last row, make sure the next row is also decoded.
+ * This is because the intra predict has to happen before loop filter */
+ cur_row += 1;
+ }
+ }
+ pthread_mutex_unlock(&lf_sync->lf_mutex);
+
+ if (return_val == -1) return return_val;
+
+ pthread_mutex_lock(&lf_sync->recon_done_mutex[cur_row]);
+ if (lf_sync->num_tiles_done[cur_row] < tile_cols) {
+ pthread_cond_wait(&lf_sync->recon_done_cond[cur_row],
+ &lf_sync->recon_done_mutex[cur_row]);
+ }
+ pthread_mutex_unlock(&lf_sync->recon_done_mutex[cur_row]);
+ pthread_mutex_lock(&lf_sync->lf_mutex);
+ if (lf_sync->corrupted) {
+ return_val = -1;
+ }
+ pthread_mutex_unlock(&lf_sync->lf_mutex);
+#else
+ (void)lf_sync;
+ if (cm->lf_row < max_rows) {
+ cur_row = cm->lf_row >> MI_BLOCK_SIZE_LOG2;
+ return_val = cm->lf_row;
+ cm->lf_row += MI_BLOCK_SIZE;
+ if (cm->lf_row < max_rows) {
+ /* If this is not the last row, make sure the next row is also decoded.
+ * This is because the intra predict has to happen before loop filter */
+ cur_row += 1;
+ }
+ }
+#endif // CONFIG_MULTITHREAD
+
+ return return_val;
+}
+
+void vp9_loopfilter_rows(LFWorkerData *lf_data, VP9LfSync *lf_sync) {
+ int mi_row;
+ VP9_COMMON *cm = lf_data->cm;
+
+ while ((mi_row = get_next_row(cm, lf_sync)) != -1 && mi_row < cm->mi_rows) {
+ lf_data->start = mi_row;
+ lf_data->stop = mi_row + MI_BLOCK_SIZE;
+
+ thread_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
+ lf_data->start, lf_data->stop, lf_data->y_only,
+ lf_sync);
+ }
+}
+
+void vp9_set_row(VP9LfSync *lf_sync, int num_tiles, int row, int is_last_row,
+ int corrupted) {
+#if CONFIG_MULTITHREAD
+ pthread_mutex_lock(&lf_sync->lf_mutex);
+ lf_sync->corrupted |= corrupted;
+ pthread_mutex_unlock(&lf_sync->lf_mutex);
+ pthread_mutex_lock(&lf_sync->recon_done_mutex[row]);
+ lf_sync->num_tiles_done[row] += 1;
+ if (num_tiles == lf_sync->num_tiles_done[row]) {
+ if (is_last_row) {
+ /* The last 2 rows wait on the last row to be done.
+ * So, we have to broadcast the signal in this case.
+ */
+ pthread_cond_broadcast(&lf_sync->recon_done_cond[row]);
+ } else {
+ pthread_cond_signal(&lf_sync->recon_done_cond[row]);
+ }
+ }
+ pthread_mutex_unlock(&lf_sync->recon_done_mutex[row]);
+#else
+ (void)lf_sync;
+ (void)num_tiles;
+ (void)row;
+ (void)is_last_row;
+ (void)corrupted;
+#endif // CONFIG_MULTITHREAD
+}
+
// Accumulate frame counts.
void vp9_accumulate_frame_counts(FRAME_COUNTS *accum,
const FRAME_COUNTS *counts, int is_dec) {
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_thread_common.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_thread_common.h
index f92df5bd62d..b97e9ee134d 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_thread_common.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_thread_common.h
@@ -37,6 +37,14 @@ typedef struct VP9LfSyncData {
// Row-based parallel loopfilter data
LFWorkerData *lfdata;
int num_workers;
+
+#if CONFIG_MULTITHREAD
+ pthread_mutex_t lf_mutex;
+ pthread_mutex_t *recon_done_mutex;
+ pthread_cond_t *recon_done_cond;
+#endif
+ int *num_tiles_done;
+ int corrupted;
} VP9LfSync;
// Allocate memory for loopfilter row synchronization.
@@ -53,6 +61,17 @@ void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, struct VP9Common *cm,
int partial_frame, VPxWorker *workers,
int num_workers, VP9LfSync *lf_sync);
+// Multi-threaded loopfilter initialisations
+void vp9_lpf_mt_init(VP9LfSync *lf_sync, struct VP9Common *cm,
+ int frame_filter_level, int num_workers);
+
+void vp9_loopfilter_rows(LFWorkerData *lf_data, VP9LfSync *lf_sync);
+
+void vp9_set_row(VP9LfSync *lf_sync, int num_tiles, int row, int is_last_row,
+ int corrupted);
+
+void vp9_set_last_decoded_row(struct VP9Common *cm, int tile_col, int mi_row);
+
void vp9_accumulate_frame_counts(struct FRAME_COUNTS *accum,
const struct FRAME_COUNTS *counts, int is_dec);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.c b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.c
index 48c49e2f5f3..bc0fc6197e6 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.c
@@ -1451,6 +1451,25 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, const uint8_t *data,
return vpx_reader_find_end(&tile_data->bit_reader);
}
+static void set_rows_after_error(VP9LfSync *lf_sync, int start_row, int mi_rows,
+ int num_tiles_left, int total_num_tiles) {
+ do {
+ int mi_row;
+ const int aligned_rows = mi_cols_aligned_to_sb(mi_rows);
+ const int sb_rows = (aligned_rows >> MI_BLOCK_SIZE_LOG2);
+ const int corrupted = 1;
+ for (mi_row = start_row; mi_row < mi_rows; mi_row += MI_BLOCK_SIZE) {
+ const int is_last_row = (sb_rows - 1 == mi_row >> MI_BLOCK_SIZE_LOG2);
+ vp9_set_row(lf_sync, total_num_tiles, mi_row >> MI_BLOCK_SIZE_LOG2,
+ is_last_row, corrupted);
+ }
+ /* If there are multiple tiles, the second tile should start marking row
+ * progress from row 0.
+ */
+ start_row = 0;
+ } while (num_tiles_left--);
+}
+
// On entry 'tile_data->data_end' points to the end of the input frame, on exit
// it is updated to reflect the bitreader position of the final tile column if
// present in the tile buffer group or NULL otherwise.
@@ -1461,6 +1480,12 @@ static int tile_worker_hook(void *arg1, void *arg2) {
TileInfo *volatile tile = &tile_data->xd.tile;
const int final_col = (1 << pbi->common.log2_tile_cols) - 1;
const uint8_t *volatile bit_reader_end = NULL;
+ VP9_COMMON *cm = &pbi->common;
+
+ LFWorkerData *lf_data = tile_data->lf_data;
+ VP9LfSync *lf_sync = tile_data->lf_sync;
+
+ volatile int mi_row = 0;
volatile int n = tile_data->buf_start;
tile_data->error_info.setjmp = 1;
@@ -1468,14 +1493,26 @@ static int tile_worker_hook(void *arg1, void *arg2) {
tile_data->error_info.setjmp = 0;
tile_data->xd.corrupted = 1;
tile_data->data_end = NULL;
+ if (pbi->lpf_mt_opt && cm->lf.filter_level && !cm->skip_loop_filter) {
+ const int num_tiles_left = tile_data->buf_end - n;
+ const int mi_row_start = mi_row;
+ set_rows_after_error(lf_sync, mi_row_start, cm->mi_rows, num_tiles_left,
+ 1 << cm->log2_tile_cols);
+ }
return 0;
}
tile_data->xd.corrupted = 0;
do {
- int mi_row, mi_col;
+ int mi_col;
const TileBuffer *const buf = pbi->tile_buffers + n;
+
+ /* Initialize to 0 is safe since we do not deal with streams that have
+ * more than one row of tiles. (So tile->mi_row_start will be 0)
+ */
+ assert(cm->log2_tile_rows == 0);
+ mi_row = 0;
vp9_zero(tile_data->dqcoeff);
vp9_tile_init(tile, &pbi->common, 0, buf->col);
setup_token_decoder(buf->data, tile_data->data_end, buf->size,
@@ -1493,6 +1530,14 @@ static int tile_worker_hook(void *arg1, void *arg2) {
mi_col += MI_BLOCK_SIZE) {
decode_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4);
}
+ if (pbi->lpf_mt_opt && cm->lf.filter_level && !cm->skip_loop_filter) {
+ const int aligned_rows = mi_cols_aligned_to_sb(cm->mi_rows);
+ const int sb_rows = (aligned_rows >> MI_BLOCK_SIZE_LOG2);
+ const int is_last_row = (sb_rows - 1 == mi_row >> MI_BLOCK_SIZE_LOG2);
+ vp9_set_row(lf_sync, 1 << cm->log2_tile_cols,
+ mi_row >> MI_BLOCK_SIZE_LOG2, is_last_row,
+ tile_data->xd.corrupted);
+ }
}
if (buf->col == final_col) {
@@ -1500,6 +1545,21 @@ static int tile_worker_hook(void *arg1, void *arg2) {
}
} while (!tile_data->xd.corrupted && ++n <= tile_data->buf_end);
+ if (pbi->lpf_mt_opt && n < tile_data->buf_end && cm->lf.filter_level &&
+ !cm->skip_loop_filter) {
+ /* This was not incremented in the tile loop, so increment before tiles left
+ * calculation
+ */
+ ++n;
+ set_rows_after_error(lf_sync, 0, cm->mi_rows, tile_data->buf_end - n,
+ 1 << cm->log2_tile_cols);
+ }
+
+ if (pbi->lpf_mt_opt && !tile_data->xd.corrupted && cm->lf.filter_level &&
+ !cm->skip_loop_filter) {
+ vp9_loopfilter_rows(lf_data, lf_sync);
+ }
+
tile_data->data_end = bit_reader_end;
return !tile_data->xd.corrupted;
}
@@ -1516,6 +1576,8 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, const uint8_t *data,
VP9_COMMON *const cm = &pbi->common;
const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
const uint8_t *bit_reader_end = NULL;
+ VP9LfSync *lf_row_sync = &pbi->lf_row_sync;
+ YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm);
const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows;
@@ -1542,12 +1604,26 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, const uint8_t *data,
}
}
+ // Initialize LPF
+ if (pbi->lpf_mt_opt && cm->lf.filter_level && !cm->skip_loop_filter) {
+ vp9_lpf_mt_init(lf_row_sync, cm, cm->lf.filter_level,
+ pbi->num_tile_workers);
+ }
+
// Reset tile decoding hook
for (n = 0; n < num_workers; ++n) {
VPxWorker *const worker = &pbi->tile_workers[n];
TileWorkerData *const tile_data =
&pbi->tile_worker_data[n + pbi->total_tiles];
winterface->sync(worker);
+
+ if (pbi->lpf_mt_opt && cm->lf.filter_level && !cm->skip_loop_filter) {
+ tile_data->lf_sync = lf_row_sync;
+ tile_data->lf_data = &tile_data->lf_sync->lfdata[n];
+ vp9_loop_filter_data_reset(tile_data->lf_data, new_fb, cm, pbi->mb.plane);
+ tile_data->lf_data->y_only = 0;
+ }
+
tile_data->xd = pbi->mb;
tile_data->xd.counts =
cm->frame_parallel_decoding_mode ? NULL : &tile_data->counts;
@@ -1908,6 +1984,28 @@ static size_t read_uncompressed_header(VP9Decoder *pbi,
setup_segmentation_dequant(cm);
setup_tile_info(cm, rb);
+ if (pbi->row_mt == 1) {
+ int num_sbs = 1;
+
+ if (pbi->row_mt_worker_data == NULL) {
+ CHECK_MEM_ERROR(cm, pbi->row_mt_worker_data,
+ vpx_calloc(1, sizeof(*pbi->row_mt_worker_data)));
+ }
+
+ if (pbi->max_threads > 1) {
+ const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols);
+ const int sb_cols = aligned_cols >> MI_BLOCK_SIZE_LOG2;
+ const int aligned_rows = mi_cols_aligned_to_sb(cm->mi_rows);
+ const int sb_rows = aligned_rows >> MI_BLOCK_SIZE_LOG2;
+
+ num_sbs = sb_cols * sb_rows;
+ }
+
+ if (num_sbs > pbi->row_mt_worker_data->num_sbs) {
+ vp9_dec_free_row_mt_mem(pbi->row_mt_worker_data);
+ vp9_dec_alloc_row_mt_mem(pbi->row_mt_worker_data, cm, num_sbs);
+ }
+ }
sz = vpx_rb_read_literal(rb, 16);
if (sz == 0)
@@ -2069,17 +2167,19 @@ void vp9_decode_frame(VP9Decoder *pbi, const uint8_t *data,
if (pbi->max_threads > 1 && tile_rows == 1 && tile_cols > 1) {
// Multi-threaded tile decoder
*p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end);
- if (!xd->corrupted) {
- if (!cm->skip_loop_filter) {
- // If multiple threads are used to decode tiles, then we use those
- // threads to do parallel loopfiltering.
- vp9_loop_filter_frame_mt(new_fb, cm, pbi->mb.plane, cm->lf.filter_level,
- 0, 0, pbi->tile_workers, pbi->num_tile_workers,
- &pbi->lf_row_sync);
+ if (!pbi->lpf_mt_opt) {
+ if (!xd->corrupted) {
+ if (!cm->skip_loop_filter) {
+ // If multiple threads are used to decode tiles, then we use those
+ // threads to do parallel loopfiltering.
+ vp9_loop_filter_frame_mt(new_fb, cm, pbi->mb.plane,
+ cm->lf.filter_level, 0, 0, pbi->tile_workers,
+ pbi->num_tile_workers, &pbi->lf_row_sync);
+ }
+ } else {
+ vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
+ "Decode failed. Frame data is corrupted.");
}
- } else {
- vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
- "Decode failed. Frame data is corrupted.");
}
} else {
*p_data_end = decode_tiles(pbi, data + first_partition_size, data_end);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.c b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.c
index 5e41274cc89..1e2a4429347 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.c
@@ -55,6 +55,43 @@ static void vp9_dec_setup_mi(VP9_COMMON *cm) {
cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mi_grid_base));
}
+void vp9_dec_alloc_row_mt_mem(RowMTWorkerData *row_mt_worker_data,
+ VP9_COMMON *cm, int num_sbs) {
+ int plane;
+ const size_t dqcoeff_size = (num_sbs << DQCOEFFS_PER_SB_LOG2) *
+ sizeof(*row_mt_worker_data->dqcoeff[0]);
+ row_mt_worker_data->num_sbs = num_sbs;
+ for (plane = 0; plane < 3; ++plane) {
+ CHECK_MEM_ERROR(cm, row_mt_worker_data->dqcoeff[plane],
+ vpx_memalign(16, dqcoeff_size));
+ memset(row_mt_worker_data->dqcoeff[plane], 0, dqcoeff_size);
+ CHECK_MEM_ERROR(cm, row_mt_worker_data->eob[plane],
+ vpx_calloc(num_sbs << EOBS_PER_SB_LOG2,
+ sizeof(*row_mt_worker_data->eob[plane])));
+ }
+ CHECK_MEM_ERROR(cm, row_mt_worker_data->partition,
+ vpx_calloc(num_sbs * PARTITIONS_PER_SB,
+ sizeof(*row_mt_worker_data->partition)));
+ CHECK_MEM_ERROR(cm, row_mt_worker_data->recon_map,
+ vpx_calloc(num_sbs, sizeof(*row_mt_worker_data->recon_map)));
+}
+
+void vp9_dec_free_row_mt_mem(RowMTWorkerData *row_mt_worker_data) {
+ if (row_mt_worker_data != NULL) {
+ int plane;
+ for (plane = 0; plane < 3; ++plane) {
+ vpx_free(row_mt_worker_data->eob[plane]);
+ row_mt_worker_data->eob[plane] = NULL;
+ vpx_free(row_mt_worker_data->dqcoeff[plane]);
+ row_mt_worker_data->dqcoeff[plane] = NULL;
+ }
+ vpx_free(row_mt_worker_data->partition);
+ row_mt_worker_data->partition = NULL;
+ vpx_free(row_mt_worker_data->recon_map);
+ row_mt_worker_data->recon_map = NULL;
+ }
+}
+
static int vp9_dec_alloc_mi(VP9_COMMON *cm, int mi_size) {
cm->mip = vpx_calloc(mi_size, sizeof(*cm->mip));
if (!cm->mip) return 1;
@@ -140,6 +177,10 @@ void vp9_decoder_remove(VP9Decoder *pbi) {
vp9_loop_filter_dealloc(&pbi->lf_row_sync);
}
+ if (pbi->row_mt == 1) {
+ vp9_dec_free_row_mt_mem(pbi->row_mt_worker_data);
+ vpx_free(pbi->row_mt_worker_data);
+ }
vp9_remove_common(&pbi->common);
vpx_free(pbi);
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.h b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.h
index 1c488961a8d..9a582fffbb8 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.h
@@ -26,6 +26,10 @@
extern "C" {
#endif
+#define EOBS_PER_SB_LOG2 8
+#define DQCOEFFS_PER_SB_LOG2 12
+#define PARTITIONS_PER_SB 85
+
typedef struct TileBuffer {
const uint8_t *data;
size_t size;
@@ -37,12 +41,22 @@ typedef struct TileWorkerData {
int buf_start, buf_end; // pbi->tile_buffers to decode, inclusive
vpx_reader bit_reader;
FRAME_COUNTS counts;
+ LFWorkerData *lf_data;
+ VP9LfSync *lf_sync;
DECLARE_ALIGNED(16, MACROBLOCKD, xd);
/* dqcoeff are shared by all the planes. So planes must be decoded serially */
DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]);
struct vpx_internal_error_info error_info;
} TileWorkerData;
+typedef struct RowMTWorkerData {
+ int num_sbs;
+ int *eob[MAX_MB_PLANE];
+ PARTITION_TYPE *partition;
+ tran_low_t *dqcoeff[MAX_MB_PLANE];
+ int8_t *recon_map;
+} RowMTWorkerData;
+
typedef struct VP9Decoder {
DECLARE_ALIGNED(16, MACROBLOCKD, mb);
@@ -74,10 +88,12 @@ typedef struct VP9Decoder {
int hold_ref_buf; // hold the reference buffer.
int row_mt;
+ int lpf_mt_opt;
+ RowMTWorkerData *row_mt_worker_data;
} VP9Decoder;
int vp9_receive_compressed_data(struct VP9Decoder *pbi, size_t size,
- const uint8_t **dest);
+ const uint8_t **psource);
int vp9_get_raw_frame(struct VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd,
vp9_ppflags_t *flags);
@@ -111,6 +127,10 @@ struct VP9Decoder *vp9_decoder_create(BufferPool *const pool);
void vp9_decoder_remove(struct VP9Decoder *pbi);
+void vp9_dec_alloc_row_mt_mem(RowMTWorkerData *row_mt_worker_data,
+ VP9_COMMON *cm, int num_sbs);
+void vp9_dec_free_row_mt_mem(RowMTWorkerData *row_mt_worker_data);
+
static INLINE void decrease_ref_count(int idx, RefCntBuffer *const frame_bufs,
BufferPool *const pool) {
if (idx >= 0 && frame_bufs[idx].ref_count > 0) {
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_dct_neon.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_dct_neon.c
index 513718e7cb1..f8dd0a6f7a9 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_dct_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_dct_neon.c
@@ -23,13 +23,13 @@ void vp9_fdct8x8_quant_neon(const int16_t *input, int stride,
int skip_block, const int16_t *round_ptr,
const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
- uint16_t *eob_ptr, const int16_t *scan_ptr,
- const int16_t *iscan_ptr) {
+ uint16_t *eob_ptr, const int16_t *scan,
+ const int16_t *iscan) {
tran_low_t temp_buffer[64];
(void)coeff_ptr;
vpx_fdct8x8_neon(input, temp_buffer, stride);
vp9_quantize_fp_neon(temp_buffer, n_coeffs, skip_block, round_ptr, quant_ptr,
- qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan_ptr,
- iscan_ptr);
+ qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan,
+ iscan);
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c
index 97a09bdff6f..8b62b450cef 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c
@@ -97,6 +97,9 @@ void vp9_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t count,
store_s16q_to_tran_low(qcoeff_ptr + i, v_qcoeff);
store_s16q_to_tran_low(dqcoeff_ptr + i, v_dqcoeff);
}
+#ifdef __aarch64__
+ *eob_ptr = vmaxvq_s16(v_eobmax_76543210);
+#else
{
const int16x4_t v_eobmax_3210 = vmax_s16(vget_low_s16(v_eobmax_76543210),
vget_high_s16(v_eobmax_76543210));
@@ -111,6 +114,7 @@ void vp9_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t count,
*eob_ptr = (uint16_t)vget_lane_s16(v_eobmax_final, 0);
}
+#endif // __aarch64__
}
static INLINE int32x4_t extract_sign_bit(int32x4_t a) {
@@ -122,7 +126,7 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count,
const int16_t *quant_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan_ptr) {
+ const int16_t *scan, const int16_t *iscan) {
const int16x8_t one = vdupq_n_s16(1);
const int16x8_t neg_one = vdupq_n_s16(-1);
@@ -134,8 +138,8 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count,
const int16x8_t dequant_thresh = vshrq_n_s16(vld1q_s16(dequant_ptr), 2);
// Process dc and the first seven ac coeffs.
- const uint16x8_t iscan =
- vreinterpretq_u16_s16(vaddq_s16(vld1q_s16(iscan_ptr), one));
+ const uint16x8_t v_iscan =
+ vreinterpretq_u16_s16(vaddq_s16(vld1q_s16(iscan), one));
const int16x8_t coeff = load_tran_low_to_s16q(coeff_ptr);
const int16x8_t coeff_sign = vshrq_n_s16(coeff, 15);
const int16x8_t coeff_abs = vabsq_s16(coeff);
@@ -169,12 +173,12 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count,
dqcoeff = vcombine_s16(vshrn_n_s32(dqcoeff_0, 1), vshrn_n_s32(dqcoeff_1, 1));
- eob_max = vandq_u16(vtstq_s16(qcoeff, neg_one), iscan);
+ eob_max = vandq_u16(vtstq_s16(qcoeff, neg_one), v_iscan);
store_s16q_to_tran_low(qcoeff_ptr, qcoeff);
store_s16q_to_tran_low(dqcoeff_ptr, dqcoeff);
- iscan_ptr += 8;
+ iscan += 8;
coeff_ptr += 8;
qcoeff_ptr += 8;
dqcoeff_ptr += 8;
@@ -188,8 +192,8 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count,
// Process the rest of the ac coeffs.
for (i = 8; i < 32 * 32; i += 8) {
- const uint16x8_t iscan =
- vreinterpretq_u16_s16(vaddq_s16(vld1q_s16(iscan_ptr), one));
+ const uint16x8_t v_iscan =
+ vreinterpretq_u16_s16(vaddq_s16(vld1q_s16(iscan), one));
const int16x8_t coeff = load_tran_low_to_s16q(coeff_ptr);
const int16x8_t coeff_sign = vshrq_n_s16(coeff, 15);
const int16x8_t coeff_abs = vabsq_s16(coeff);
@@ -215,17 +219,20 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count,
vcombine_s16(vshrn_n_s32(dqcoeff_0, 1), vshrn_n_s32(dqcoeff_1, 1));
eob_max =
- vmaxq_u16(eob_max, vandq_u16(vtstq_s16(qcoeff, neg_one), iscan));
+ vmaxq_u16(eob_max, vandq_u16(vtstq_s16(qcoeff, neg_one), v_iscan));
store_s16q_to_tran_low(qcoeff_ptr, qcoeff);
store_s16q_to_tran_low(dqcoeff_ptr, dqcoeff);
- iscan_ptr += 8;
+ iscan += 8;
coeff_ptr += 8;
qcoeff_ptr += 8;
dqcoeff_ptr += 8;
}
+#ifdef __aarch64__
+ *eob_ptr = vmaxvq_u16(eob_max);
+#else
{
const uint16x4_t eob_max_0 =
vmax_u16(vget_low_u16(eob_max), vget_high_u16(eob_max));
@@ -233,5 +240,6 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count,
const uint16x4_t eob_max_2 = vpmax_u16(eob_max_1, eob_max_1);
vst1_lane_u16(eob_ptr, eob_max_2, 0);
}
+#endif // __aarch64__
}
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/ppc/vp9_quantize_vsx.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/ppc/vp9_quantize_vsx.c
index 3720b0876d8..4f88b8fff6f 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/ppc/vp9_quantize_vsx.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/ppc/vp9_quantize_vsx.c
@@ -42,8 +42,8 @@ void vp9_quantize_fp_vsx(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *round_ptr,
const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
- uint16_t *eob_ptr, const int16_t *scan_ptr,
- const int16_t *iscan_ptr) {
+ uint16_t *eob_ptr, const int16_t *scan,
+ const int16_t *iscan) {
int16x8_t qcoeff0, qcoeff1, dqcoeff0, dqcoeff1, eob;
bool16x8_t zero_coeff0, zero_coeff1;
@@ -52,10 +52,10 @@ void vp9_quantize_fp_vsx(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int16x8_t dequant = vec_vsx_ld(0, dequant_ptr);
int16x8_t coeff0 = vec_vsx_ld(0, coeff_ptr);
int16x8_t coeff1 = vec_vsx_ld(16, coeff_ptr);
- int16x8_t scan0 = vec_vsx_ld(0, iscan_ptr);
- int16x8_t scan1 = vec_vsx_ld(16, iscan_ptr);
+ int16x8_t scan0 = vec_vsx_ld(0, iscan);
+ int16x8_t scan1 = vec_vsx_ld(16, iscan);
- (void)scan_ptr;
+ (void)scan;
(void)skip_block;
assert(!skip_block);
@@ -103,9 +103,9 @@ void vp9_quantize_fp_vsx(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
coeff0 = vec_vsx_ld(off0, coeff_ptr);
coeff1 = vec_vsx_ld(off1, coeff_ptr);
coeff2 = vec_vsx_ld(off2, coeff_ptr);
- scan0 = vec_vsx_ld(off0, iscan_ptr);
- scan1 = vec_vsx_ld(off1, iscan_ptr);
- scan2 = vec_vsx_ld(off2, iscan_ptr);
+ scan0 = vec_vsx_ld(off0, iscan);
+ scan1 = vec_vsx_ld(off1, iscan);
+ scan2 = vec_vsx_ld(off2, iscan);
qcoeff0 = vec_mulhi(vec_vaddshs(vec_abs(coeff0), round), quant);
zero_coeff0 = vec_cmpeq(qcoeff0, vec_zeros_s16);
@@ -169,8 +169,7 @@ void vp9_quantize_fp_32x32_vsx(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan_ptr,
- const int16_t *iscan_ptr) {
+ const int16_t *scan, const int16_t *iscan) {
// In stage 1, we quantize 16 coeffs (DC + 15 AC)
// In stage 2, we loop 42 times and quantize 24 coeffs per iteration
// (32 * 32 - 16) / 24 = 42
@@ -188,13 +187,13 @@ void vp9_quantize_fp_32x32_vsx(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int16x8_t dequant = vec_vsx_ld(0, dequant_ptr);
int16x8_t coeff0 = vec_vsx_ld(0, coeff_ptr);
int16x8_t coeff1 = vec_vsx_ld(16, coeff_ptr);
- int16x8_t scan0 = vec_vsx_ld(0, iscan_ptr);
- int16x8_t scan1 = vec_vsx_ld(16, iscan_ptr);
+ int16x8_t scan0 = vec_vsx_ld(0, iscan);
+ int16x8_t scan1 = vec_vsx_ld(16, iscan);
int16x8_t thres = vec_sra(dequant, vec_splats((uint16_t)2));
int16x8_t abs_coeff0 = vec_abs(coeff0);
int16x8_t abs_coeff1 = vec_abs(coeff1);
- (void)scan_ptr;
+ (void)scan;
(void)skip_block;
(void)n_coeffs;
assert(!skip_block);
@@ -238,9 +237,9 @@ void vp9_quantize_fp_32x32_vsx(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
coeff0 = vec_vsx_ld(off0, coeff_ptr);
coeff1 = vec_vsx_ld(off1, coeff_ptr);
coeff2 = vec_vsx_ld(off2, coeff_ptr);
- scan0 = vec_vsx_ld(off0, iscan_ptr);
- scan1 = vec_vsx_ld(off1, iscan_ptr);
- scan2 = vec_vsx_ld(off2, iscan_ptr);
+ scan0 = vec_vsx_ld(off0, iscan);
+ scan1 = vec_vsx_ld(off1, iscan);
+ scan2 = vec_vsx_ld(off2, iscan);
abs_coeff0 = vec_abs(coeff0);
abs_coeff1 = vec_abs(coeff1);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_block.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_block.h
index 06130584f0f..563fdbbdecd 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_block.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_block.h
@@ -211,6 +211,8 @@ struct macroblock {
#if CONFIG_ML_VAR_PARTITION
DECLARE_ALIGNED(16, uint8_t, est_pred[64 * 64]);
#endif // CONFIG_ML_VAR_PARTITION
+
+ struct scale_factors *me_sf;
};
#ifdef __cplusplus
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_denoiser.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_denoiser.c
index 8c039b2cb9d..b70890e68a8 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_denoiser.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_denoiser.c
@@ -360,6 +360,7 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col,
int is_skin = 0;
int increase_denoising = 0;
int consec_zeromv = 0;
+ int last_is_reference = cpi->ref_frame_flags & VP9_LAST_FLAG;
mv_col = ctx->best_sse_mv.as_mv.col;
mv_row = ctx->best_sse_mv.as_mv.row;
motion_magnitude = mv_row * mv_row + mv_col * mv_col;
@@ -403,7 +404,12 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col,
}
if (!is_skin && denoiser->denoising_level == kDenHigh) increase_denoising = 1;
- if (denoiser->denoising_level >= kDenLow && !ctx->sb_skip_denoising)
+ // Copy block if LAST_FRAME is not a reference.
+ // Last doesn't always exist when SVC layers are dynamically changed, e.g. top
+ // spatial layer doesn't have last reference when it's brought up for the
+ // first time on the fly.
+ if (last_is_reference && denoiser->denoising_level >= kDenLow &&
+ !ctx->sb_skip_denoising)
decision = perform_motion_compensation(
&cpi->common, denoiser, mb, bs, increase_denoising, mi_row, mi_col, ctx,
motion_magnitude, is_skin, &zeromv_filter, consec_zeromv,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c
index ad30951afa3..98343f0d243 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c
@@ -930,7 +930,9 @@ static int scale_partitioning_svc(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
PARTITION_TYPE partition_high;
if (mi_row_high >= cm->mi_rows || mi_col_high >= cm->mi_cols) return 0;
- if (mi_row >= (cm->mi_rows >> 1) || mi_col >= (cm->mi_cols >> 1)) return 0;
+ if (mi_row >= svc->mi_rows[svc->spatial_layer_id - 1] ||
+ mi_col >= svc->mi_cols[svc->spatial_layer_id - 1])
+ return 0;
// Find corresponding (mi_col/mi_row) block down-scaled by 2x2.
start_pos = mi_row * (svc->mi_stride[svc->spatial_layer_id - 1]) + mi_col;
@@ -1378,6 +1380,20 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
x->sb_use_mv_part = 1;
x->sb_mvcol_part = mi->mv[0].as_mv.col;
x->sb_mvrow_part = mi->mv[0].as_mv.row;
+ if (cpi->oxcf.content == VP9E_CONTENT_SCREEN &&
+ cpi->svc.spatial_layer_id == 0 &&
+ cpi->rc.high_num_blocks_with_motion && !x->zero_temp_sad_source &&
+ cm->width > 640 && cm->height > 480) {
+ // Disable split below 16x16 block size when scroll motion is detected.
+ // TODO(marpan/jianj): Improve this condition: issue is that search
+ // range is hard-coded/limited in vp9_int_pro_motion_estimation() so
+ // scroll motion may not be detected here.
+ if ((abs(x->sb_mvrow_part) >= 48 && abs(x->sb_mvcol_part) <= 8) ||
+ y_sad < 100000) {
+ compute_minmax_variance = 0;
+ thresholds[2] = INT64_MAX;
+ }
+ }
}
y_sad_last = y_sad;
@@ -3183,7 +3199,7 @@ static int ml_pruning_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd,
#define FEATURES 4
// ML-based partition search breakout.
-static int ml_predict_breakout(const VP9_COMP *const cpi, BLOCK_SIZE bsize,
+static int ml_predict_breakout(VP9_COMP *const cpi, BLOCK_SIZE bsize,
const MACROBLOCK *const x,
const RD_COST *const rd_cost) {
DECLARE_ALIGNED(16, static const uint8_t, vp9_64_zeros[64]) = { 0 };
@@ -3214,14 +3230,29 @@ static int ml_predict_breakout(const VP9_COMP *const cpi, BLOCK_SIZE bsize,
if (!linear_weights) return 0;
{ // Generate feature values.
+#if CONFIG_VP9_HIGHBITDEPTH
+ const int ac_q =
+ vp9_ac_quant(cm->base_qindex, 0, cm->bit_depth) >> (x->e_mbd.bd - 8);
+#else
const int ac_q = vp9_ac_quant(qindex, 0, cm->bit_depth);
+#endif // CONFIG_VP9_HIGHBITDEPTH
const int num_pels_log2 = num_pels_log2_lookup[bsize];
int feature_index = 0;
unsigned int var, sse;
float rate_f, dist_f;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ var =
+ vp9_high_get_sby_variance(cpi, &x->plane[0].src, bsize, x->e_mbd.bd);
+ } else {
+ var = cpi->fn_ptr[bsize].vf(x->plane[0].src.buf, x->plane[0].src.stride,
+ vp9_64_zeros, 0, &sse);
+ }
+#else
var = cpi->fn_ptr[bsize].vf(x->plane[0].src.buf, x->plane[0].src.stride,
vp9_64_zeros, 0, &sse);
+#endif
var = var >> num_pels_log2;
vpx_clear_system_state();
@@ -3288,7 +3319,12 @@ static void ml_prune_rect_partition(VP9_COMP *const cpi, MACROBLOCK *const x,
{
const int64_t none_rdcost = pc_tree->none.rdcost;
const VP9_COMMON *const cm = &cpi->common;
+#if CONFIG_VP9_HIGHBITDEPTH
+ const int dc_q =
+ vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) >> (x->e_mbd.bd - 8);
+#else
const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth);
+#endif // CONFIG_VP9_HIGHBITDEPTH
int feature_index = 0;
unsigned int block_var = 0;
unsigned int sub_block_var[4] = { 0 };
@@ -3404,31 +3440,38 @@ static void ml_predict_var_rd_paritioning(VP9_COMP *cpi, MACROBLOCK *x,
MACROBLOCKD *xd = &x->e_mbd;
MODE_INFO *mi = xd->mi[0];
const NN_CONFIG *nn_config = NULL;
- DECLARE_ALIGNED(16, uint8_t, pred_buf[64 * 64]);
+#if CONFIG_VP9_HIGHBITDEPTH
+ DECLARE_ALIGNED(16, uint8_t, pred_buffer[64 * 64 * 2]);
+ uint8_t *const pred_buf = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ ? (CONVERT_TO_BYTEPTR(pred_buffer))
+ : pred_buffer;
+#else
+ DECLARE_ALIGNED(16, uint8_t, pred_buffer[64 * 64]);
+ uint8_t *const pred_buf = pred_buffer;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ const int speed = cpi->oxcf.speed;
int i;
- float thresh_low = -1.0f;
- float thresh_high = 0.0f;
+ float thresh = 0.0f;
switch (bsize) {
case BLOCK_64X64:
nn_config = &vp9_var_rd_part_nnconfig_64;
- thresh_low = -3.0f;
- thresh_high = 3.0f;
+ thresh = speed > 0 ? 3.5f : 3.0f;
break;
case BLOCK_32X32:
nn_config = &vp9_var_rd_part_nnconfig_32;
- thresh_low = -3.0;
- thresh_high = 3.0f;
+ thresh = speed > 0 ? 3.5f : 3.0f;
break;
case BLOCK_16X16:
nn_config = &vp9_var_rd_part_nnconfig_16;
- thresh_low = -4.0;
- thresh_high = 4.0f;
+ thresh = speed > 0 ? 3.5f : 4.0f;
break;
case BLOCK_8X8:
nn_config = &vp9_var_rd_part_nnconfig_8;
- thresh_low = -2.0;
- thresh_high = 2.0f;
+ if (cm->width >= 720 && cm->height >= 720)
+ thresh = speed > 0 ? 2.5f : 2.0f;
+ else
+ thresh = speed > 0 ? 3.5f : 2.0f;
break;
default: assert(0 && "Unexpected block size."); return;
}
@@ -3476,7 +3519,12 @@ static void ml_predict_var_rd_paritioning(VP9_COMP *cpi, MACROBLOCK *x,
{
float features[FEATURES] = { 0.0f };
+#if CONFIG_VP9_HIGHBITDEPTH
+ const int dc_q =
+ vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) >> (xd->bd - 8);
+#else
const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth);
+#endif // CONFIG_VP9_HIGHBITDEPTH
int feature_idx = 0;
float score;
@@ -3520,8 +3568,8 @@ static void ml_predict_var_rd_paritioning(VP9_COMP *cpi, MACROBLOCK *x,
// partition is better than the non-split partition. So if the score is
// high enough, we skip the none-split partition search; if the score is
// low enough, we skip the split partition search.
- if (score > thresh_high) *none = 0;
- if (score < thresh_low) *split = 0;
+ if (score > thresh) *none = 0;
+ if (score < -thresh) *split = 0;
}
}
#undef FEATURES
@@ -3529,7 +3577,8 @@ static void ml_predict_var_rd_paritioning(VP9_COMP *cpi, MACROBLOCK *x,
int get_rdmult_delta(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col,
int orig_rdmult) {
- TplDepFrame *tpl_frame = &cpi->tpl_stats[cpi->twopass.gf_group.index];
+ const int gf_group_index = cpi->twopass.gf_group.index;
+ TplDepFrame *tpl_frame = &cpi->tpl_stats[gf_group_index];
TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
int tpl_stride = tpl_frame->stride;
int64_t intra_cost = 0;
@@ -3544,9 +3593,9 @@ int get_rdmult_delta(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col,
if (tpl_frame->is_valid == 0) return orig_rdmult;
- if (cpi->common.show_frame) return orig_rdmult;
+ if (cpi->twopass.gf_group.layer_depth[gf_group_index] > 1) return orig_rdmult;
- if (cpi->twopass.gf_group.index >= MAX_LAG_BUFFERS) return orig_rdmult;
+ if (gf_group_index >= MAX_ARF_GOP_SIZE) return orig_rdmult;
for (row = mi_row; row < mi_row + mi_high; ++row) {
for (col = mi_col; col < mi_col + mi_wide; ++col) {
@@ -3759,14 +3808,10 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
pc_tree->partitioning = PARTITION_NONE;
if (cpi->sf.ml_var_partition_pruning) {
- int do_ml_var_partition_pruning =
+ const int do_ml_var_partition_pruning =
!frame_is_intra_only(cm) && partition_none_allowed && do_split &&
mi_row + num_8x8_blocks_high_lookup[bsize] <= cm->mi_rows &&
mi_col + num_8x8_blocks_wide_lookup[bsize] <= cm->mi_cols;
-#if CONFIG_VP9_HIGHBITDEPTH
- if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
- do_ml_var_partition_pruning = 0;
-#endif // CONFIG_VP9_HIGHBITDEPTH
if (do_ml_var_partition_pruning) {
ml_predict_var_rd_paritioning(cpi, x, bsize, mi_row, mi_col,
&partition_none_allowed, &do_split);
@@ -3814,13 +3859,9 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
}
if ((do_split || do_rect) && !x->e_mbd.lossless && ctx->skippable) {
- int use_ml_based_breakout =
+ const int use_ml_based_breakout =
cpi->sf.use_ml_partition_search_breakout &&
cm->base_qindex >= 100;
-#if CONFIG_VP9_HIGHBITDEPTH
- if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
- use_ml_based_breakout = 0;
-#endif // CONFIG_VP9_HIGHBITDEPTH
if (use_ml_based_breakout) {
if (ml_predict_breakout(cpi, bsize, x, &this_rdc)) {
do_split = 0;
@@ -4019,13 +4060,9 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
}
{
- int do_ml_rect_partition_pruning =
+ const int do_ml_rect_partition_pruning =
!frame_is_intra_only(cm) && !force_horz_split && !force_vert_split &&
(partition_horz_allowed || partition_vert_allowed) && bsize > BLOCK_8X8;
-#if CONFIG_VP9_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
- do_ml_rect_partition_pruning = 0;
-#endif
if (do_ml_rect_partition_pruning) {
ml_prune_rect_partition(cpi, x, bsize, pc_tree, &partition_horz_allowed,
&partition_vert_allowed, best_rdc.rdcost, mi_row,
@@ -4505,15 +4542,9 @@ static int ml_predict_var_paritioning(VP9_COMP *cpi, MACROBLOCK *x,
int mi_col) {
VP9_COMMON *const cm = &cpi->common;
const NN_CONFIG *nn_config = NULL;
- float thresh_low = -0.2f;
- float thresh_high = 0.0f;
switch (bsize) {
- case BLOCK_64X64:
- nn_config = &vp9_var_part_nnconfig_64;
- thresh_low = -0.3f;
- thresh_high = -0.1f;
- break;
+ case BLOCK_64X64: nn_config = &vp9_var_part_nnconfig_64; break;
case BLOCK_32X32: nn_config = &vp9_var_part_nnconfig_32; break;
case BLOCK_16X16: nn_config = &vp9_var_part_nnconfig_16; break;
case BLOCK_8X8: break;
@@ -4525,6 +4556,7 @@ static int ml_predict_var_paritioning(VP9_COMP *cpi, MACROBLOCK *x,
vpx_clear_system_state();
{
+ const float thresh = cpi->oxcf.speed <= 5 ? 1.25f : 0.0f;
float features[FEATURES] = { 0.0f };
const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth);
int feature_idx = 0;
@@ -4565,8 +4597,8 @@ static int ml_predict_var_paritioning(VP9_COMP *cpi, MACROBLOCK *x,
assert(feature_idx == FEATURES);
nn_predict(features, nn_config, score);
- if (score[0] > thresh_high) return 3;
- if (score[0] < thresh_low) return 0;
+ if (score[0] > thresh) return PARTITION_SPLIT;
+ if (score[0] < -thresh) return PARTITION_NONE;
return -1;
}
}
@@ -4644,8 +4676,8 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td,
if (partition_none_allowed && do_split) {
const int ml_predicted_partition =
ml_predict_var_paritioning(cpi, x, bsize, mi_row, mi_col);
- if (ml_predicted_partition == 0) do_split = 0;
- if (ml_predicted_partition == 3) partition_none_allowed = 0;
+ if (ml_predicted_partition == PARTITION_NONE) do_split = 0;
+ if (ml_predicted_partition == PARTITION_SPLIT) partition_none_allowed = 0;
}
}
#endif // CONFIG_ML_VAR_PARTITION
@@ -5628,7 +5660,6 @@ static void encode_frame_internal(VP9_COMP *cpi) {
xd->mi = cm->mi_grid_visible;
xd->mi[0] = cm->mi;
-
vp9_zero(*td->counts);
vp9_zero(cpi->td.rd_counts);
@@ -5693,7 +5724,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
if (sf->partition_search_type == SOURCE_VAR_BASED_PARTITION)
source_var_based_partition_search_method(cpi);
- } else if (gf_group_index && gf_group_index < MAX_LAG_BUFFERS &&
+ } else if (gf_group_index && gf_group_index < MAX_ARF_GOP_SIZE &&
cpi->sf.enable_tpl_model) {
TplDepFrame *tpl_frame = &cpi->tpl_stats[cpi->twopass.gf_group.index];
TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.h
index 8bbf857872d..2f1be4b233f 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.h
@@ -27,7 +27,7 @@ void vp9_encode_mv(VP9_COMP *cpi, vpx_writer *w, const MV *mv, const MV *ref,
unsigned int *const max_mv_magnitude);
void vp9_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
- const nmv_context *mvctx, int usehp);
+ const nmv_context *ctx, int usehp);
void vp9_update_mv_count(ThreadData *td);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c
index edb4cb288c8..33cfd9f75fe 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c
@@ -52,6 +52,9 @@
#include "vp9/encoder/vp9_extend.h"
#include "vp9/encoder/vp9_firstpass.h"
#include "vp9/encoder/vp9_mbgraph.h"
+#if CONFIG_NON_GREEDY_MV
+#include "vp9/encoder/vp9_mcomp.h"
+#endif
#include "vp9/encoder/vp9_multi_thread.h"
#include "vp9/encoder/vp9_noise_estimate.h"
#include "vp9/encoder/vp9_picklpf.h"
@@ -2359,10 +2362,21 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
vp9_set_speed_features_framesize_dependent(cpi);
if (cpi->sf.enable_tpl_model) {
- for (frame = 0; frame < MAX_LAG_BUFFERS; ++frame) {
- int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
- int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
-
+ const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
+ const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
+#if CONFIG_NON_GREEDY_MV
+ CHECK_MEM_ERROR(
+ cm, cpi->feature_score_loc_arr,
+ vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_arr)));
+ CHECK_MEM_ERROR(
+ cm, cpi->feature_score_loc_sort,
+ vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_sort)));
+ CHECK_MEM_ERROR(
+ cm, cpi->feature_score_loc_heap,
+ vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_heap)));
+#endif
+ // TODO(jingning): Reduce the actual memory use for tpl model build up.
+ for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
CHECK_MEM_ERROR(cm, cpi->tpl_stats[frame].tpl_stats_ptr,
vpx_calloc(mi_rows * mi_cols,
sizeof(*cpi->tpl_stats[frame].tpl_stats_ptr)));
@@ -2373,6 +2387,11 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
cpi->tpl_stats[frame].mi_rows = cm->mi_rows;
cpi->tpl_stats[frame].mi_cols = cm->mi_cols;
}
+
+ for (frame = 0; frame < REF_FRAMES; ++frame) {
+ cpi->enc_frame_buf[frame].mem_valid = 0;
+ cpi->enc_frame_buf[frame].released = 1;
+ }
}
// Allocate memory to store variances for a frame.
@@ -2449,6 +2468,17 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
vp9_loop_filter_init(cm);
+ // Set up the unit scaling factor used during motion search.
+#if CONFIG_VP9_HIGHBITDEPTH
+ vp9_setup_scale_factors_for_frame(&cpi->me_sf, cm->width, cm->height,
+ cm->width, cm->height,
+ cm->use_highbitdepth);
+#else
+ vp9_setup_scale_factors_for_frame(&cpi->me_sf, cm->width, cm->height,
+ cm->width, cm->height);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ cpi->td.mb.me_sf = &cpi->me_sf;
+
cm->error.setjmp = 0;
return cpi;
@@ -2561,7 +2591,12 @@ void vp9_remove_compressor(VP9_COMP *cpi) {
vp9_denoiser_free(&(cpi->denoiser));
#endif
- for (frame = 0; frame < MAX_LAG_BUFFERS; ++frame) {
+#if CONFIG_NON_GREEDY_MV
+ vpx_free(cpi->feature_score_loc_arr);
+ vpx_free(cpi->feature_score_loc_sort);
+ vpx_free(cpi->feature_score_loc_heap);
+#endif
+ for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
cpi->tpl_stats[frame].is_valid = 0;
}
@@ -3217,8 +3252,8 @@ void vp9_scale_references(VP9_COMP *cpi) {
if (cpi->oxcf.pass == 0 && !cpi->use_svc) {
// Check for release of scaled reference.
buf_idx = cpi->scaled_ref_idx[ref_frame - 1];
- buf = (buf_idx != INVALID_IDX) ? &pool->frame_bufs[buf_idx] : NULL;
- if (buf != NULL) {
+ if (buf_idx != INVALID_IDX) {
+ buf = &pool->frame_bufs[buf_idx];
--buf->ref_count;
cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
}
@@ -3249,22 +3284,21 @@ static void release_scaled_references(VP9_COMP *cpi) {
refresh[2] = (cpi->refresh_alt_ref_frame) ? 1 : 0;
for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
const int idx = cpi->scaled_ref_idx[i - 1];
- RefCntBuffer *const buf =
- idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[idx] : NULL;
- const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, i);
- if (buf != NULL &&
- (refresh[i - 1] || (buf->buf.y_crop_width == ref->y_crop_width &&
- buf->buf.y_crop_height == ref->y_crop_height))) {
- --buf->ref_count;
- cpi->scaled_ref_idx[i - 1] = INVALID_IDX;
+ if (idx != INVALID_IDX) {
+ RefCntBuffer *const buf = &cm->buffer_pool->frame_bufs[idx];
+ const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, i);
+ if (refresh[i - 1] || (buf->buf.y_crop_width == ref->y_crop_width &&
+ buf->buf.y_crop_height == ref->y_crop_height)) {
+ --buf->ref_count;
+ cpi->scaled_ref_idx[i - 1] = INVALID_IDX;
+ }
}
}
} else {
- for (i = 0; i < MAX_REF_FRAMES; ++i) {
+ for (i = 0; i < REFS_PER_FRAME; ++i) {
const int idx = cpi->scaled_ref_idx[i];
- RefCntBuffer *const buf =
- idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[idx] : NULL;
- if (buf != NULL) {
+ if (idx != INVALID_IDX) {
+ RefCntBuffer *const buf = &cm->buffer_pool->frame_bufs[idx];
--buf->ref_count;
cpi->scaled_ref_idx[i] = INVALID_IDX;
}
@@ -3457,6 +3491,11 @@ static void set_size_dependent_vars(VP9_COMP *cpi, int *q, int *bottom_index,
// Decide q and q bounds.
*q = vp9_rc_pick_q_and_bounds(cpi, bottom_index, top_index);
+ if (cpi->oxcf.rc_mode == VPX_CBR && cpi->rc.force_max_q) {
+ *q = cpi->rc.worst_quality;
+ cpi->rc.force_max_q = 0;
+ }
+
if (!frame_is_intra_only(cm)) {
vp9_set_high_precision_mv(cpi, (*q) < HIGH_PRECISION_MV_QTHRESH);
}
@@ -3661,14 +3700,16 @@ static INLINE void set_raw_source_frame(VP9_COMP *cpi) {
static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
uint8_t *dest) {
VP9_COMMON *const cm = &cpi->common;
+ SVC *const svc = &cpi->svc;
int q = 0, bottom_index = 0, top_index = 0;
+ int no_drop_scene_change = 0;
const INTERP_FILTER filter_scaler =
(is_one_pass_cbr_svc(cpi))
- ? cpi->svc.downsample_filter_type[cpi->svc.spatial_layer_id]
+ ? svc->downsample_filter_type[svc->spatial_layer_id]
: EIGHTTAP;
const int phase_scaler =
(is_one_pass_cbr_svc(cpi))
- ? cpi->svc.downsample_filter_phase[cpi->svc.spatial_layer_id]
+ ? svc->downsample_filter_phase[svc->spatial_layer_id]
: 0;
if (cm->show_existing_frame) {
@@ -3676,6 +3717,8 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
return 1;
}
+ svc->time_stamp_prev[svc->spatial_layer_id] = svc->time_stamp_superframe;
+
// Flag to check if its valid to compute the source sad (used for
// scene detection and for superblock content state in CBR mode).
// The flag may get reset below based on SVC or resizing state.
@@ -3688,25 +3731,25 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
if (is_one_pass_cbr_svc(cpi) &&
cpi->un_scaled_source->y_width == cm->width << 2 &&
cpi->un_scaled_source->y_height == cm->height << 2 &&
- cpi->svc.scaled_temp.y_width == cm->width << 1 &&
- cpi->svc.scaled_temp.y_height == cm->height << 1) {
+ svc->scaled_temp.y_width == cm->width << 1 &&
+ svc->scaled_temp.y_height == cm->height << 1) {
// For svc, if it is a 1/4x1/4 downscaling, do a two-stage scaling to take
// advantage of the 1:2 optimized scaler. In the process, the 1/2x1/2
// result will be saved in scaled_temp and might be used later.
- const INTERP_FILTER filter_scaler2 = cpi->svc.downsample_filter_type[1];
- const int phase_scaler2 = cpi->svc.downsample_filter_phase[1];
+ const INTERP_FILTER filter_scaler2 = svc->downsample_filter_type[1];
+ const int phase_scaler2 = svc->downsample_filter_phase[1];
cpi->Source = vp9_svc_twostage_scale(
- cm, cpi->un_scaled_source, &cpi->scaled_source, &cpi->svc.scaled_temp,
+ cm, cpi->un_scaled_source, &cpi->scaled_source, &svc->scaled_temp,
filter_scaler, phase_scaler, filter_scaler2, phase_scaler2);
- cpi->svc.scaled_one_half = 1;
+ svc->scaled_one_half = 1;
} else if (is_one_pass_cbr_svc(cpi) &&
cpi->un_scaled_source->y_width == cm->width << 1 &&
cpi->un_scaled_source->y_height == cm->height << 1 &&
- cpi->svc.scaled_one_half) {
+ svc->scaled_one_half) {
// If the spatial layer is 1/2x1/2 and the scaling is already done in the
// two-stage scaling, use the result directly.
- cpi->Source = &cpi->svc.scaled_temp;
- cpi->svc.scaled_one_half = 0;
+ cpi->Source = &svc->scaled_temp;
+ svc->scaled_one_half = 0;
} else {
cpi->Source = vp9_scale_if_required(
cm, cpi->un_scaled_source, &cpi->scaled_source, (cpi->oxcf.pass == 0),
@@ -3714,8 +3757,8 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
}
#ifdef OUTPUT_YUV_SVC_SRC
// Write out at most 3 spatial layers.
- if (is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id < 3) {
- vpx_write_yuv_frame(yuv_svc_src[cpi->svc.spatial_layer_id], cpi->Source);
+ if (is_one_pass_cbr_svc(cpi) && svc->spatial_layer_id < 3) {
+ vpx_write_yuv_frame(yuv_svc_src[svc->spatial_layer_id], cpi->Source);
}
#endif
// Unfiltered raw source used in metrics calculation if the source
@@ -3735,9 +3778,9 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
}
if ((cpi->use_svc &&
- (cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1 ||
- cpi->svc.temporal_layer_id < cpi->svc.number_temporal_layers - 1 ||
- cpi->svc.current_superframe < 1)) ||
+ (svc->spatial_layer_id < svc->number_spatial_layers - 1 ||
+ svc->temporal_layer_id < svc->number_temporal_layers - 1 ||
+ svc->current_superframe < 1)) ||
cpi->resize_pending || cpi->resize_state || cpi->external_resize ||
cpi->resize_state != ORIG) {
cpi->compute_source_sad_onepass = 0;
@@ -3786,18 +3829,33 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
(cpi->oxcf.speed >= 5 && cpi->oxcf.speed < 8)))
vp9_scene_detection_onepass(cpi);
- if (cpi->svc.spatial_layer_id == 0)
- cpi->svc.high_source_sad_superframe = cpi->rc.high_source_sad;
+ if (svc->spatial_layer_id == svc->first_spatial_layer_to_encode) {
+ svc->high_source_sad_superframe = cpi->rc.high_source_sad;
+ // On scene change reset temporal layer pattern to TL0.
+ // TODO(marpan/jianj): Fix this to handle case where base
+ // spatial layers are skipped, in which case we should insert
+ // and reset to spatial layer 0 on scene change.
+ // Only do this reset for bypass/flexible mode.
+ if (svc->high_source_sad_superframe && svc->temporal_layer_id > 0 &&
+ svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
+ // rc->high_source_sad will get reset so copy it to restore it.
+ int tmp_high_source_sad = cpi->rc.high_source_sad;
+ vp9_svc_reset_temporal_layers(cpi, cm->frame_type == KEY_FRAME);
+ cpi->rc.high_source_sad = tmp_high_source_sad;
+ }
+ }
// For 1 pass CBR, check if we are dropping this frame.
// Never drop on key frame, if base layer is key for svc,
// on scene change, or if superframe has layer sync.
+ if ((cpi->rc.high_source_sad || svc->high_source_sad_superframe) &&
+ !(cpi->rc.use_post_encode_drop && svc->last_layer_dropped[0]))
+ no_drop_scene_change = 1;
if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR &&
- !frame_is_intra_only(cm) && !cpi->rc.high_source_sad &&
- !cpi->svc.high_source_sad_superframe &&
- !cpi->svc.superframe_has_layer_sync &&
+ !frame_is_intra_only(cm) && !no_drop_scene_change &&
+ !svc->superframe_has_layer_sync &&
(!cpi->use_svc ||
- !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) {
+ !svc->layer_context[svc->temporal_layer_id].is_key_frame)) {
if (vp9_rc_drop_frame(cpi)) return 0;
}
@@ -3805,7 +3863,7 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
// when svc->force_zero_mode_spatial_ref = 1. Under those conditions we can
// avoid this frame-level upsampling (for non intra_only frames).
if (frame_is_intra_only(cm) == 0 &&
- !(is_one_pass_cbr_svc(cpi) && cpi->svc.force_zero_mode_spatial_ref)) {
+ !(is_one_pass_cbr_svc(cpi) && svc->force_zero_mode_spatial_ref)) {
vp9_scale_references(cpi);
}
@@ -3815,12 +3873,12 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
if (cpi->sf.copy_partition_flag) alloc_copy_partition_data(cpi);
if (cpi->sf.svc_use_lowres_part &&
- cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2) {
- if (cpi->svc.prev_partition_svc == NULL) {
+ svc->spatial_layer_id == svc->number_spatial_layers - 2) {
+ if (svc->prev_partition_svc == NULL) {
CHECK_MEM_ERROR(
- cm, cpi->svc.prev_partition_svc,
+ cm, svc->prev_partition_svc,
(BLOCK_SIZE *)vpx_calloc(cm->mi_stride * cm->mi_rows,
- sizeof(*cpi->svc.prev_partition_svc)));
+ sizeof(*svc->prev_partition_svc)));
}
}
@@ -3832,6 +3890,12 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
cpi->use_skin_detection = 1;
}
+ // Enable post encode frame dropping for CBR on non key frame, when
+ // ext_use_post_encode_drop is specified by user.
+ cpi->rc.use_post_encode_drop = cpi->rc.ext_use_post_encode_drop &&
+ cpi->oxcf.rc_mode == VPX_CBR &&
+ cm->frame_type != KEY_FRAME;
+
vp9_set_quantizer(cm, q);
vp9_set_variance_partition_thresholds(cpi, q, 0);
@@ -3842,16 +3906,24 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
if (cpi->use_svc) {
// On non-zero spatial layer, check for disabling inter-layer
// prediction.
- if (cpi->svc.spatial_layer_id > 0) vp9_svc_constrain_inter_layer_pred(cpi);
+ if (svc->spatial_layer_id > 0) vp9_svc_constrain_inter_layer_pred(cpi);
vp9_svc_assert_constraints_pattern(cpi);
}
+ if (cpi->rc.last_post_encode_dropped_scene_change) {
+ cpi->rc.high_source_sad = 1;
+ svc->high_source_sad_superframe = 1;
+ // For now disable use_source_sad since Last_Source will not be the previous
+ // encoded but the dropped one.
+ cpi->sf.use_source_sad = 0;
+ cpi->rc.last_post_encode_dropped_scene_change = 0;
+ }
// Check if this high_source_sad (scene/slide change) frame should be
// encoded at high/max QP, and if so, set the q and adjust some rate
// control parameters.
if (cpi->sf.overshoot_detection_cbr_rt == FAST_DETECTION_MAXQ &&
(cpi->rc.high_source_sad ||
- (cpi->use_svc && cpi->svc.high_source_sad_superframe))) {
+ (cpi->use_svc && svc->high_source_sad_superframe))) {
if (vp9_encodedframe_overshoot(cpi, -1, &q)) {
vp9_set_quantizer(cm, q);
vp9_set_variance_partition_thresholds(cpi, q, 0);
@@ -3886,7 +3958,7 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
// For SVC: all spatial layers are checked for re-encoding.
if (cpi->sf.overshoot_detection_cbr_rt == RE_ENCODE_MAXQ &&
(cpi->rc.high_source_sad ||
- (cpi->use_svc && cpi->svc.high_source_sad_superframe))) {
+ (cpi->use_svc && svc->high_source_sad_superframe))) {
int frame_size = 0;
// Get an estimate of the encoded frame size.
save_coding_context(cpi);
@@ -3960,9 +4032,9 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size,
set_size_independent_vars(cpi);
- enable_acl = cpi->sf.allow_acl
- ? (cm->frame_type == KEY_FRAME) || (cm->show_frame == 0)
- : 0;
+ enable_acl = cpi->sf.allow_acl ? (cm->frame_type == KEY_FRAME) ||
+ (cpi->twopass.gf_group.index == 1)
+ : 0;
do {
vpx_clear_system_state();
@@ -4622,8 +4694,13 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
TX_SIZE t;
// SVC: skip encoding of enhancement layer if the layer target bandwidth = 0.
+ // If in constrained layer drop mode (svc.framedrop_mode != LAYER_DROP) and
+ // base spatial layer was dropped, no need to set svc.skip_enhancement_layer,
+ // as whole superframe will be dropped.
if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 &&
- cpi->oxcf.target_bandwidth == 0) {
+ cpi->oxcf.target_bandwidth == 0 &&
+ !(cpi->svc.framedrop_mode != LAYER_DROP &&
+ cpi->svc.drop_spatial_layer[0])) {
cpi->svc.skip_enhancement_layer = 1;
vp9_rc_postencode_update_drop_frame(cpi);
cpi->ext_refresh_frame_flags_pending = 0;
@@ -4720,19 +4797,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
cm->ref_frame_map[cpi->alt_fb_idx]);
}
- cpi->last_frame_dropped = 0;
- cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 0;
- // Keep track of the frame buffer index updated/refreshed for the
- // current encoded TL0 superframe.
- if (cpi->svc.temporal_layer_id == 0) {
- if (cpi->refresh_last_frame)
- cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->lst_fb_idx;
- else if (cpi->refresh_golden_frame)
- cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->gld_fb_idx;
- else if (cpi->refresh_alt_ref_frame)
- cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->alt_fb_idx;
- }
-
// Disable segmentation if it decrease rate/distortion ratio
if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ)
vp9_try_disable_lookahead_aq(cpi, size, dest);
@@ -4779,9 +4843,34 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
// Pick the loop filter level for the frame.
loopfilter_frame(cpi, cm);
+ if (cpi->rc.use_post_encode_drop) save_coding_context(cpi);
+
// build the bitstream
vp9_pack_bitstream(cpi, dest, size);
+ if (cpi->rc.use_post_encode_drop && cm->base_qindex < cpi->rc.worst_quality &&
+ cpi->svc.spatial_layer_id == 0 &&
+ post_encode_drop_screen_content(cpi, size)) {
+ restore_coding_context(cpi);
+ return;
+ }
+
+ cpi->last_frame_dropped = 0;
+ cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 0;
+ if (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)
+ cpi->svc.num_encoded_top_layer++;
+
+ // Keep track of the frame buffer index updated/refreshed for the
+ // current encoded TL0 superframe.
+ if (cpi->svc.temporal_layer_id == 0) {
+ if (cpi->refresh_last_frame)
+ cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->lst_fb_idx;
+ else if (cpi->refresh_golden_frame)
+ cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->gld_fb_idx;
+ else if (cpi->refresh_alt_ref_frame)
+ cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->alt_fb_idx;
+ }
+
if (cm->seg.update_map) update_reference_segmentation_map(cpi);
if (frame_is_intra_only(cm) == 0) {
@@ -4910,6 +4999,8 @@ static void init_ref_frame_bufs(VP9_COMMON *cm) {
cm->new_fb_idx = INVALID_IDX;
for (i = 0; i < REF_FRAMES; ++i) {
cm->ref_frame_map[i] = INVALID_IDX;
+ }
+ for (i = 0; i < FRAME_BUFFERS; ++i) {
pool->frame_bufs[i].ref_count = 0;
}
}
@@ -5335,6 +5426,7 @@ static void update_level_info(VP9_COMP *cpi, size_t *size, int arf_src_index) {
typedef struct GF_PICTURE {
YV12_BUFFER_CONFIG *frame;
int ref_frame[3];
+ FRAME_UPDATE_TYPE update_type;
} GF_PICTURE;
void init_gop_frames(VP9_COMP *cpi, GF_PICTURE *gf_picture,
@@ -5345,16 +5437,22 @@ void init_gop_frames(VP9_COMP *cpi, GF_PICTURE *gf_picture,
int gld_index = -1;
int alt_index = -1;
int lst_index = -1;
+ int arf_index_stack[MAX_ARF_LAYERS];
+ int arf_stack_size = 0;
int extend_frame_count = 0;
int pframe_qindex = cpi->tpl_stats[2].base_qindex;
+ int frame_gop_offset = 0;
RefCntBuffer *frame_bufs = cm->buffer_pool->frame_bufs;
- int recon_frame_index[REFS_PER_FRAME + 1] = { -1, -1, -1, -1 };
+ int8_t recon_frame_index[REFS_PER_FRAME + MAX_ARF_LAYERS];
+
+ memset(recon_frame_index, -1, sizeof(recon_frame_index));
+ stack_init(arf_index_stack, MAX_ARF_LAYERS);
// TODO(jingning): To be used later for gf frame type parsing.
(void)gf_group;
- for (i = 0; i < FRAME_BUFFERS && frame_idx < REFS_PER_FRAME + 1; ++i) {
+ for (i = 0; i < FRAME_BUFFERS; ++i) {
if (frame_bufs[i].ref_count == 0) {
alloc_frame_mvs(cm, i);
if (vpx_realloc_frame_buffer(&frame_bufs[i].buf, cm->width, cm->height,
@@ -5369,6 +5467,8 @@ void init_gop_frames(VP9_COMP *cpi, GF_PICTURE *gf_picture,
recon_frame_index[frame_idx] = i;
++frame_idx;
+
+ if (frame_idx >= REFS_PER_FRAME + cpi->oxcf.enable_auto_arf) break;
}
}
@@ -5382,21 +5482,24 @@ void init_gop_frames(VP9_COMP *cpi, GF_PICTURE *gf_picture,
// Initialize Golden reference frame.
gf_picture[0].frame = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
for (i = 0; i < 3; ++i) gf_picture[0].ref_frame[i] = -1;
+ gf_picture[0].update_type = gf_group->update_type[0];
gld_index = 0;
++*tpl_group_frames;
- // Initialize ARF frame
+ // Initialize base layer ARF frame
gf_picture[1].frame = cpi->Source;
gf_picture[1].ref_frame[0] = gld_index;
gf_picture[1].ref_frame[1] = lst_index;
gf_picture[1].ref_frame[2] = alt_index;
+ gf_picture[1].update_type = gf_group->update_type[1];
alt_index = 1;
++*tpl_group_frames;
// Initialize P frames
- for (frame_idx = 2; frame_idx < MAX_LAG_BUFFERS; ++frame_idx) {
- struct lookahead_entry *buf =
- vp9_lookahead_peek(cpi->lookahead, frame_idx - 2);
+ for (frame_idx = 2; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) {
+ struct lookahead_entry *buf;
+ frame_gop_offset = gf_group->frame_gop_index[frame_idx];
+ buf = vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1);
if (buf == NULL) break;
@@ -5404,25 +5507,44 @@ void init_gop_frames(VP9_COMP *cpi, GF_PICTURE *gf_picture,
gf_picture[frame_idx].ref_frame[0] = gld_index;
gf_picture[frame_idx].ref_frame[1] = lst_index;
gf_picture[frame_idx].ref_frame[2] = alt_index;
+ gf_picture[frame_idx].update_type = gf_group->update_type[frame_idx];
+
+ switch (gf_group->update_type[frame_idx]) {
+ case ARF_UPDATE:
+ stack_push(arf_index_stack, alt_index, arf_stack_size);
+ ++arf_stack_size;
+ alt_index = frame_idx;
+ break;
+ case LF_UPDATE: lst_index = frame_idx; break;
+ case OVERLAY_UPDATE:
+ gld_index = frame_idx;
+ alt_index = stack_pop(arf_index_stack, arf_stack_size);
+ --arf_stack_size;
+ break;
+ case USE_BUF_FRAME:
+ lst_index = alt_index;
+ alt_index = stack_pop(arf_index_stack, arf_stack_size);
+ --arf_stack_size;
+ break;
+ default: break;
+ }
++*tpl_group_frames;
- lst_index = frame_idx;
// The length of group of pictures is baseline_gf_interval, plus the
// beginning golden frame from last GOP, plus the last overlay frame in
// the same GOP.
- if (frame_idx == cpi->rc.baseline_gf_interval + 1) break;
+ if (frame_idx == gf_group->gf_group_size) break;
}
- gld_index = frame_idx;
- lst_index = VPXMAX(0, frame_idx - 1);
alt_index = -1;
++frame_idx;
+ ++frame_gop_offset;
// Extend two frames outside the current gf group.
for (; frame_idx < MAX_LAG_BUFFERS && extend_frame_count < 2; ++frame_idx) {
struct lookahead_entry *buf =
- vp9_lookahead_peek(cpi->lookahead, frame_idx - 2);
+ vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1);
if (buf == NULL) break;
@@ -5432,16 +5554,25 @@ void init_gop_frames(VP9_COMP *cpi, GF_PICTURE *gf_picture,
gf_picture[frame_idx].ref_frame[0] = gld_index;
gf_picture[frame_idx].ref_frame[1] = lst_index;
gf_picture[frame_idx].ref_frame[2] = alt_index;
+ gf_picture[frame_idx].update_type = LF_UPDATE;
lst_index = frame_idx;
++*tpl_group_frames;
++extend_frame_count;
+ ++frame_gop_offset;
}
}
void init_tpl_stats(VP9_COMP *cpi) {
int frame_idx;
- for (frame_idx = 0; frame_idx < MAX_LAG_BUFFERS; ++frame_idx) {
+ for (frame_idx = 0; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) {
TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
+#if CONFIG_NON_GREEDY_MV
+ int rf_idx;
+ for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
+ tpl_frame->mv_dist_sum[rf_idx] = 0;
+ tpl_frame->mv_cost_sum[rf_idx] = 0;
+ }
+#endif
memset(tpl_frame->tpl_stats_ptr, 0,
tpl_frame->height * tpl_frame->width *
sizeof(*tpl_frame->tpl_stats_ptr));
@@ -5451,20 +5582,22 @@ void init_tpl_stats(VP9_COMP *cpi) {
#if CONFIG_NON_GREEDY_MV
static void prepare_nb_full_mvs(const TplDepFrame *tpl_frame, int mi_row,
- int mi_col, int_mv *nb_full_mvs) {
+ int mi_col, int rf_idx, BLOCK_SIZE bsize,
+ int_mv *nb_full_mvs) {
+ const int mi_unit = num_8x8_blocks_wide_lookup[bsize];
const int dirs[NB_MVS_NUM][2] = { { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 } };
int i;
for (i = 0; i < NB_MVS_NUM; ++i) {
- int r = dirs[i][0];
- int c = dirs[i][1];
+ int r = dirs[i][0] * mi_unit;
+ int c = dirs[i][1] * mi_unit;
if (mi_row + r >= 0 && mi_row + r < tpl_frame->mi_rows && mi_col + c >= 0 &&
mi_col + c < tpl_frame->mi_cols) {
const TplDepStats *tpl_ptr =
&tpl_frame
->tpl_stats_ptr[(mi_row + r) * tpl_frame->stride + mi_col + c];
- if (tpl_ptr->ready) {
- nb_full_mvs[i].as_mv.row = tpl_ptr->mv.as_mv.row >> 3;
- nb_full_mvs[i].as_mv.col = tpl_ptr->mv.as_mv.col >> 3;
+ if (tpl_ptr->ready[rf_idx]) {
+ nb_full_mvs[i].as_mv.row = tpl_ptr->mv_arr[rf_idx].as_mv.row >> 3;
+ nb_full_mvs[i].as_mv.col = tpl_ptr->mv_arr[rf_idx].as_mv.col >> 3;
} else {
nb_full_mvs[i].as_int = INVALID_MV;
}
@@ -5503,7 +5636,7 @@ uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td,
#if CONFIG_NON_GREEDY_MV
// lambda is used to adjust the importance of motion vector consitency.
// TODO(angiebird): Figure out lambda's proper value.
- double lambda = 10000;
+ double lambda = cpi->tpl_stats[frame_idx].lambda;
int_mv nb_full_mvs[NB_MVS_NUM];
#endif
@@ -5527,7 +5660,8 @@ uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td,
#if CONFIG_NON_GREEDY_MV
(void)search_method;
(void)sadpb;
- prepare_nb_full_mvs(&cpi->tpl_stats[frame_idx], mi_row, mi_col, nb_full_mvs);
+ prepare_nb_full_mvs(&cpi->tpl_stats[frame_idx], mi_row, mi_col, rf_idx, bsize,
+ nb_full_mvs);
vp9_full_pixel_diamond_new(cpi, x, &best_ref_mv1_full, step_param, lambda,
MAX_MVSEARCH_STEPS - 1 - step_param, 1,
&cpi->fn_ptr[bsize], nb_full_mvs, tpl_stats,
@@ -5544,12 +5678,13 @@ uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td,
/* restore UMV window */
x->mv_limits = tmp_mv_limits;
+ // TODO(yunqing): may use higher tap interp filter than 2 taps.
// Ignore mv costing by sending NULL pointer instead of cost array
bestsme = cpi->find_fractional_mv_step(
x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
&cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level,
- cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0,
- 0);
+ cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0,
+ USE_2_TAPS);
return bestsme;
}
@@ -5594,42 +5729,21 @@ int round_floor(int ref_pos, int bsize_pix) {
}
void tpl_model_store(TplDepStats *tpl_stats, int mi_row, int mi_col,
- BLOCK_SIZE bsize, int stride,
- const TplDepStats *src_stats) {
+ BLOCK_SIZE bsize, int stride) {
const int mi_height = num_8x8_blocks_high_lookup[bsize];
const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ const TplDepStats *src_stats = &tpl_stats[mi_row * stride + mi_col];
int idx, idy;
- int64_t intra_cost = src_stats->intra_cost / (mi_height * mi_width);
- int64_t inter_cost = src_stats->inter_cost / (mi_height * mi_width);
-
- TplDepStats *tpl_ptr;
-
- intra_cost = VPXMAX(1, intra_cost);
- inter_cost = VPXMAX(1, inter_cost);
-
for (idy = 0; idy < mi_height; ++idy) {
- tpl_ptr = &tpl_stats[(mi_row + idy) * stride + mi_col];
for (idx = 0; idx < mi_width; ++idx) {
-#if CONFIG_NON_GREEDY_MV
- int rf_idx;
- for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
- tpl_ptr->mv_dist[rf_idx] = src_stats->mv_dist[rf_idx];
- tpl_ptr->mv_cost[rf_idx] = src_stats->mv_cost[rf_idx];
- tpl_ptr->inter_cost_arr[rf_idx] = src_stats->inter_cost;
- tpl_ptr->recon_error_arr[rf_idx] = src_stats->recon_error_arr[rf_idx];
- tpl_ptr->sse_arr[rf_idx] = src_stats->sse_arr[rf_idx];
- tpl_ptr->mv_arr[rf_idx].as_int = src_stats->mv_arr[rf_idx].as_int;
- }
- tpl_ptr->feature_score = src_stats->feature_score;
- tpl_ptr->ready = 1;
-#endif
- tpl_ptr->intra_cost = intra_cost;
- tpl_ptr->inter_cost = inter_cost;
+ TplDepStats *tpl_ptr = &tpl_stats[(mi_row + idy) * stride + mi_col + idx];
+ const int64_t mc_flow = tpl_ptr->mc_flow;
+ const int64_t mc_ref_cost = tpl_ptr->mc_ref_cost;
+ *tpl_ptr = *src_stats;
+ tpl_ptr->mc_flow = mc_flow;
+ tpl_ptr->mc_ref_cost = mc_ref_cost;
tpl_ptr->mc_dep_cost = tpl_ptr->intra_cost + tpl_ptr->mc_flow;
- tpl_ptr->ref_frame_index = src_stats->ref_frame_index;
- tpl_ptr->mv.as_int = src_stats->mv.as_int;
- ++tpl_ptr;
}
}
}
@@ -5717,9 +5831,21 @@ void get_quantize_error(MACROBLOCK *x, int plane, tran_low_t *coeff,
int pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]];
const int shift = tx_size == TX_32X32 ? 0 : 2;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ vp9_highbd_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp,
+ p->quant_fp, qcoeff, dqcoeff, pd->dequant,
+ &eob, scan_order->scan, scan_order->iscan);
+ } else {
+ vp9_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp,
+ p->quant_fp, qcoeff, dqcoeff, pd->dequant, &eob,
+ scan_order->scan, scan_order->iscan);
+ }
+#else
vp9_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp, p->quant_fp,
qcoeff, dqcoeff, pd->dequant, &eob, scan_order->scan,
scan_order->iscan);
+#endif // CONFIG_VP9_HIGHBITDEPTH
*recon_error = vp9_block_error(coeff, dqcoeff, pix_num, sse) >> shift;
*recon_error = VPXMAX(*recon_error, 1);
@@ -5728,6 +5854,19 @@ void get_quantize_error(MACROBLOCK *x, int plane, tran_low_t *coeff,
*sse = VPXMAX(*sse, 1);
}
+#if CONFIG_VP9_HIGHBITDEPTH
+void highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
+ TX_SIZE tx_size) {
+ // TODO(sdeng): Implement SIMD based high bit-depth Hadamard transforms.
+ switch (tx_size) {
+ case TX_8X8: vpx_highbd_hadamard_8x8(src_diff, bw, coeff); break;
+ case TX_16X16: vpx_highbd_hadamard_16x16(src_diff, bw, coeff); break;
+ case TX_32X32: vpx_highbd_hadamard_32x32(src_diff, bw, coeff); break;
+ default: assert(0);
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
TX_SIZE tx_size) {
switch (tx_size) {
@@ -5763,14 +5902,23 @@ double get_feature_score(uint8_t *buf, ptrdiff_t stride, int rows, int cols) {
}
#endif
+static void set_mv_limits(const VP9_COMMON *cm, MACROBLOCK *x, int mi_row,
+ int mi_col) {
+ x->mv_limits.row_min = -((mi_row * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND));
+ x->mv_limits.row_max =
+ (cm->mi_rows - 1 - mi_row) * MI_SIZE + (17 - 2 * VP9_INTERP_EXTEND);
+ x->mv_limits.col_min = -((mi_col * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND));
+ x->mv_limits.col_max =
+ ((cm->mi_cols - 1 - mi_col) * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND);
+}
+
void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
struct scale_factors *sf, GF_PICTURE *gf_picture,
- int frame_idx, int16_t *src_diff, tran_low_t *coeff,
- tran_low_t *qcoeff, tran_low_t *dqcoeff, int mi_row,
- int mi_col, BLOCK_SIZE bsize, TX_SIZE tx_size,
+ int frame_idx, TplDepFrame *tpl_frame, int16_t *src_diff,
+ tran_low_t *coeff, tran_low_t *qcoeff, tran_low_t *dqcoeff,
+ int mi_row, int mi_col, BLOCK_SIZE bsize, TX_SIZE tx_size,
YV12_BUFFER_CONFIG *ref_frame[], uint8_t *predictor,
- int64_t *recon_error, int64_t *sse,
- TplDepStats *tpl_stats) {
+ int64_t *recon_error, int64_t *sse) {
VP9_COMMON *cm = &cpi->common;
ThreadData *td = &cpi->td;
@@ -5789,8 +5937,10 @@ void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
PREDICTION_MODE mode;
int mb_y_offset = mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
MODE_INFO mi_above, mi_left;
-
- memset(tpl_stats, 0, sizeof(*tpl_stats));
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ TplDepStats *tpl_stats =
+ &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
xd->mb_to_bottom_edge = ((cm->mi_rows - 1 - mi_row) * MI_SIZE) * 8;
@@ -5816,11 +5966,24 @@ void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
vp9_predict_intra_block(xd, b_width_log2_lookup[bsize], tx_size, mode, src,
src_stride, dst, dst_stride, 0, 0, 0);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ vpx_highbd_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
+ dst_stride, xd->bd);
+ highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
+ // TODO(sdeng): Implement SIMD based high bit-depth satd.
+ intra_cost = vpx_satd_c(coeff, pix_num);
+ } else {
+ vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
+ dst_stride);
+ wht_fwd_txfm(src_diff, bw, coeff, tx_size);
+ intra_cost = vpx_satd(coeff, pix_num);
+ }
+#else
vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst, dst_stride);
-
wht_fwd_txfm(src_diff, bw, coeff, tx_size);
-
intra_cost = vpx_satd(coeff, pix_num);
+#endif // CONFIG_VP9_HIGHBITDEPTH
if (intra_cost < best_intra_cost) best_intra_cost = intra_cost;
}
@@ -5828,31 +5991,14 @@ void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
// Motion compensated prediction
best_mv.as_int = 0;
- (void)mb_y_offset;
- // Motion estimation column boundary
- x->mv_limits.col_min = -((mi_col * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND));
- x->mv_limits.col_max =
- ((cm->mi_cols - 1 - mi_col) * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND);
-
-#if CONFIG_NON_GREEDY_MV
- tpl_stats->feature_score = get_feature_score(
- xd->cur_buf->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bw, bh);
-#endif
+ set_mv_limits(cm, x, mi_row, mi_col);
for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
int_mv mv;
- if (ref_frame[rf_idx] == NULL) {
-#if CONFIG_NON_GREEDY_MV
- tpl_stats->inter_cost_arr[rf_idx] = -1;
-#endif
- continue;
- }
+ if (ref_frame[rf_idx] == NULL) continue;
#if CONFIG_NON_GREEDY_MV
- motion_compensated_prediction(
- cpi, td, frame_idx, xd->cur_buf->y_buffer + mb_y_offset,
- ref_frame[rf_idx]->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bsize,
- mi_row, mi_col, tpl_stats, rf_idx);
+ (void)td;
mv.as_int = tpl_stats->mv_arr[rf_idx].as_int;
#else
motion_compensated_prediction(
@@ -5861,8 +6007,6 @@ void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
mi_row, mi_col, &mv.as_mv);
#endif
- // TODO(jingning): Not yet support high bit-depth in the next three
- // steps.
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
vp9_highbd_build_inter_predictor(
@@ -5873,6 +6017,8 @@ void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
vpx_highbd_subtract_block(
bh, bw, src_diff, bw, xd->cur_buf->y_buffer + mb_y_offset,
xd->cur_buf->y_stride, &predictor[0], bw, xd->bd);
+ highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
+ inter_cost = vpx_satd_c(coeff, pix_num);
} else {
vp9_build_inter_predictor(
ref_frame[rf_idx]->y_buffer + mb_y_offset,
@@ -5881,6 +6027,8 @@ void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
vpx_subtract_block(bh, bw, src_diff, bw,
xd->cur_buf->y_buffer + mb_y_offset,
xd->cur_buf->y_stride, &predictor[0], bw);
+ wht_fwd_txfm(src_diff, bw, coeff, tx_size);
+ inter_cost = vpx_satd(coeff, pix_num);
}
#else
vp9_build_inter_predictor(ref_frame[rf_idx]->y_buffer + mb_y_offset,
@@ -5890,10 +6038,9 @@ void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
vpx_subtract_block(bh, bw, src_diff, bw,
xd->cur_buf->y_buffer + mb_y_offset,
xd->cur_buf->y_stride, &predictor[0], bw);
-#endif
wht_fwd_txfm(src_diff, bw, coeff, tx_size);
-
inter_cost = vpx_satd(coeff, pix_num);
+#endif
#if CONFIG_NON_GREEDY_MV
tpl_stats->inter_cost_arr[rf_idx] = inter_cost;
@@ -5917,13 +6064,136 @@ void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
}
best_intra_cost = VPXMAX(best_intra_cost, 1);
best_inter_cost = VPXMIN(best_intra_cost, best_inter_cost);
- tpl_stats->inter_cost = best_inter_cost << TPL_DEP_COST_SCALE_LOG2;
- tpl_stats->intra_cost = best_intra_cost << TPL_DEP_COST_SCALE_LOG2;
- tpl_stats->mc_dep_cost = tpl_stats->intra_cost + tpl_stats->mc_flow;
+ tpl_stats->inter_cost = VPXMAX(
+ 1, (best_inter_cost << TPL_DEP_COST_SCALE_LOG2) / (mi_height * mi_width));
+ tpl_stats->intra_cost = VPXMAX(
+ 1, (best_intra_cost << TPL_DEP_COST_SCALE_LOG2) / (mi_height * mi_width));
tpl_stats->ref_frame_index = gf_picture[frame_idx].ref_frame[best_rf_idx];
tpl_stats->mv.as_int = best_mv.as_int;
}
+#if CONFIG_NON_GREEDY_MV
+static int compare_feature_score(const void *a, const void *b) {
+ const FEATURE_SCORE_LOC *aa = *(FEATURE_SCORE_LOC *const *)a;
+ const FEATURE_SCORE_LOC *bb = *(FEATURE_SCORE_LOC *const *)b;
+ if (aa->feature_score < bb->feature_score) {
+ return 1;
+ } else if (aa->feature_score > bb->feature_score) {
+ return -1;
+ } else {
+ return 0;
+ }
+}
+
+static void do_motion_search(VP9_COMP *cpi, ThreadData *td, int frame_idx,
+ YV12_BUFFER_CONFIG **ref_frame, BLOCK_SIZE bsize,
+ int mi_row, int mi_col) {
+ VP9_COMMON *cm = &cpi->common;
+ MACROBLOCK *x = &td->mb;
+ MACROBLOCKD *xd = &x->e_mbd;
+ TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
+ TplDepStats *tpl_stats =
+ &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
+ const int mb_y_offset =
+ mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
+ int rf_idx;
+
+ set_mv_limits(cm, x, mi_row, mi_col);
+
+ for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
+ if (ref_frame[rf_idx] == NULL) {
+ tpl_stats->ready[rf_idx] = 0;
+ continue;
+ } else {
+ tpl_stats->ready[rf_idx] = 1;
+ }
+ motion_compensated_prediction(
+ cpi, td, frame_idx, xd->cur_buf->y_buffer + mb_y_offset,
+ ref_frame[rf_idx]->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bsize,
+ mi_row, mi_col, tpl_stats, rf_idx);
+ }
+}
+
+#define CHANGE_MV_SEARCH_ORDER 1
+#define USE_PQSORT 1
+#define RE_COMPUTE_MV_INCONSISTENCY 1
+
+#if CHANGE_MV_SEARCH_ORDER
+#if USE_PQSORT
+static void max_heap_pop(FEATURE_SCORE_LOC **heap, int *size,
+ FEATURE_SCORE_LOC **output) {
+ if (*size > 0) {
+ *output = heap[0];
+ --*size;
+ if (*size > 0) {
+ int p, l, r;
+ heap[0] = heap[*size];
+ p = 0;
+ l = 2 * p + 1;
+ r = 2 * p + 2;
+ while (l < *size) {
+ FEATURE_SCORE_LOC *tmp;
+ int c = l;
+ if (r < *size && heap[r]->feature_score > heap[l]->feature_score) {
+ c = r;
+ }
+ if (heap[p]->feature_score >= heap[c]->feature_score) {
+ break;
+ }
+ tmp = heap[p];
+ heap[p] = heap[c];
+ heap[c] = tmp;
+ p = c;
+ l = 2 * p + 1;
+ r = 2 * p + 2;
+ }
+ }
+ } else {
+ assert(0);
+ }
+}
+
+static void max_heap_push(FEATURE_SCORE_LOC **heap, int *size,
+ FEATURE_SCORE_LOC *input) {
+ int c, p;
+ FEATURE_SCORE_LOC *tmp;
+ heap[*size] = input;
+ ++*size;
+ c = *size - 1;
+ p = c >> 1;
+ while (c > 0 && heap[c]->feature_score > heap[p]->feature_score) {
+ tmp = heap[p];
+ heap[p] = heap[c];
+ heap[c] = tmp;
+ c = p;
+ p >>= 1;
+ }
+}
+
+static void add_nb_blocks_to_heap(VP9_COMP *cpi, const TplDepFrame *tpl_frame,
+ BLOCK_SIZE bsize, int mi_row, int mi_col,
+ int *heap_size) {
+ const int mi_unit = num_8x8_blocks_wide_lookup[bsize];
+ const int dirs[NB_MVS_NUM][2] = { { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 } };
+ int i;
+ for (i = 0; i < NB_MVS_NUM; ++i) {
+ int r = dirs[i][0] * mi_unit;
+ int c = dirs[i][1] * mi_unit;
+ if (mi_row + r >= 0 && mi_row + r < tpl_frame->mi_rows && mi_col + c >= 0 &&
+ mi_col + c < tpl_frame->mi_cols) {
+ FEATURE_SCORE_LOC *fs_loc =
+ &cpi->feature_score_loc_arr[(mi_row + r) * tpl_frame->stride +
+ (mi_col + c)];
+ if (fs_loc->visited == 0) {
+ max_heap_push(cpi->feature_score_loc_heap, heap_size, fs_loc);
+ }
+ }
+ }
+}
+#endif // USE_PQSORT
+#endif // CHANGE_MV_SEARCH_ORDER
+#endif // CONFIG_NON_GREEDY_MV
+
void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx,
BLOCK_SIZE bsize) {
TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
@@ -5954,6 +6224,17 @@ void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx,
const int mi_height = num_8x8_blocks_high_lookup[bsize];
const int mi_width = num_8x8_blocks_wide_lookup[bsize];
int64_t recon_error, sse;
+#if CONFIG_NON_GREEDY_MV
+ int rf_idx;
+ int fs_loc_sort_size;
+#if CHANGE_MV_SEARCH_ORDER
+#if USE_PQSORT
+ int fs_loc_heap_size;
+#else
+ int i;
+#endif // USE_PQSORT
+#endif // CHANGE_MV_SEARCH_ORDER
+#endif // CONFIG_NON_GREEDY_MV
// Setup scaling factor
#if CONFIG_VP9_HIGHBITDEPTH
@@ -5984,9 +6265,7 @@ void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx,
xd->cur_buf = this_frame;
// Get rd multiplier set up.
- rdmult =
- (int)vp9_compute_rd_mult_based_on_qindex(cpi, tpl_frame->base_qindex);
- if (rdmult < 1) rdmult = 1;
+ rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, tpl_frame->base_qindex);
set_error_per_bit(&cpi->td.mb, rdmult);
vp9_initialize_me_consts(cpi, &cpi->td.mb, tpl_frame->base_qindex);
@@ -5995,23 +6274,98 @@ void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx,
cm->base_qindex = tpl_frame->base_qindex;
vp9_frame_init_quantizer(cpi);
+#if CONFIG_NON_GREEDY_MV
+ tpl_frame->lambda = 250;
+ fs_loc_sort_size = 0;
+
for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
- // Motion estimation row boundary
- x->mv_limits.row_min = -((mi_row * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND));
- x->mv_limits.row_max =
- (cm->mi_rows - 1 - mi_row) * MI_SIZE + (17 - 2 * VP9_INTERP_EXTEND);
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
- TplDepStats tpl_stats;
- mode_estimation(cpi, x, xd, &sf, gf_picture, frame_idx, src_diff, coeff,
- qcoeff, dqcoeff, mi_row, mi_col, bsize, tx_size,
- ref_frame, predictor, &recon_error, &sse, &tpl_stats);
+ const int mb_y_offset =
+ mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
+ const int bw = 4 << b_width_log2_lookup[bsize];
+ const int bh = 4 << b_height_log2_lookup[bsize];
+ TplDepStats *tpl_stats =
+ &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
+ FEATURE_SCORE_LOC *fs_loc =
+ &cpi->feature_score_loc_arr[mi_row * tpl_frame->stride + mi_col];
+ tpl_stats->feature_score = get_feature_score(
+ xd->cur_buf->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bw, bh);
+ fs_loc->visited = 0;
+ fs_loc->feature_score = tpl_stats->feature_score;
+ fs_loc->mi_row = mi_row;
+ fs_loc->mi_col = mi_col;
+ cpi->feature_score_loc_sort[fs_loc_sort_size] = fs_loc;
+ ++fs_loc_sort_size;
+ }
+ }
+
+ qsort(cpi->feature_score_loc_sort, fs_loc_sort_size,
+ sizeof(*cpi->feature_score_loc_sort), compare_feature_score);
+#if CHANGE_MV_SEARCH_ORDER
+#if !USE_PQSORT
+ for (i = 0; i < fs_loc_sort_size; ++i) {
+ FEATURE_SCORE_LOC *fs_loc = cpi->feature_score_loc_sort[i];
+ do_motion_search(cpi, td, frame_idx, ref_frame, bsize, fs_loc->mi_row,
+ fs_loc->mi_col);
+ }
+#else // !USE_PQSORT
+ fs_loc_heap_size = 0;
+ max_heap_push(cpi->feature_score_loc_heap, &fs_loc_heap_size,
+ cpi->feature_score_loc_sort[0]);
+
+ while (fs_loc_heap_size > 0) {
+ FEATURE_SCORE_LOC *fs_loc;
+ max_heap_pop(cpi->feature_score_loc_heap, &fs_loc_heap_size, &fs_loc);
+
+ fs_loc->visited = 1;
+
+ do_motion_search(cpi, td, frame_idx, ref_frame, bsize, fs_loc->mi_row,
+ fs_loc->mi_col);
+
+ add_nb_blocks_to_heap(cpi, tpl_frame, bsize, fs_loc->mi_row, fs_loc->mi_col,
+ &fs_loc_heap_size);
+ }
+#endif // !USE_PQSORT
+#else // CHANGE_MV_SEARCH_ORDER
+ for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
+ for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
+ do_motion_search(cpi, td, frame_idx, ref_frame, bsize, mi_row, mi_col);
+ }
+ }
+#endif // CHANGE_MV_SEARCH_ORDER
+#endif // CONFIG_NON_GREEDY_MV
+ for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
+ for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
+ mode_estimation(cpi, x, xd, &sf, gf_picture, frame_idx, tpl_frame,
+ src_diff, coeff, qcoeff, dqcoeff, mi_row, mi_col, bsize,
+ tx_size, ref_frame, predictor, &recon_error, &sse);
// Motion flow dependency dispenser.
tpl_model_store(tpl_frame->tpl_stats_ptr, mi_row, mi_col, bsize,
- tpl_frame->stride, &tpl_stats);
+ tpl_frame->stride);
tpl_model_update(cpi->tpl_stats, tpl_frame->tpl_stats_ptr, mi_row, mi_col,
bsize);
+#if CONFIG_NON_GREEDY_MV
+ {
+ TplDepStats *this_tpl_stats =
+ &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
+ for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
+#if RE_COMPUTE_MV_INCONSISTENCY
+ MV full_mv;
+ int_mv nb_full_mvs[NB_MVS_NUM];
+ prepare_nb_full_mvs(tpl_frame, mi_row, mi_col, rf_idx, bsize,
+ nb_full_mvs);
+ full_mv.row = this_tpl_stats->mv_arr[rf_idx].as_mv.row >> 3;
+ full_mv.col = this_tpl_stats->mv_arr[rf_idx].as_mv.col >> 3;
+ this_tpl_stats->mv_cost[rf_idx] =
+ av1_nb_mvs_inconsistency(&full_mv, nb_full_mvs);
+#endif // RE_COMPUTE_MV_INCONSISTENCY
+ tpl_frame->mv_dist_sum[rf_idx] += this_tpl_stats->mv_dist[rf_idx];
+ tpl_frame->mv_cost_sum[rf_idx] += this_tpl_stats->mv_cost[rf_idx];
+ }
+ }
+#endif // CONFIG_NON_GREEDY_MV
}
}
}
@@ -6088,7 +6442,7 @@ static void dump_tpl_stats(const VP9_COMP *cpi, int tpl_group_frames,
#endif // CONFIG_NON_GREEDY_MV
static void setup_tpl_stats(VP9_COMP *cpi) {
- GF_PICTURE gf_picture[MAX_LAG_BUFFERS];
+ GF_PICTURE gf_picture[MAX_ARF_GOP_SIZE];
const GF_GROUP *gf_group = &cpi->twopass.gf_group;
int tpl_group_frames = 0;
int frame_idx;
@@ -6100,6 +6454,7 @@ static void setup_tpl_stats(VP9_COMP *cpi) {
// Backward propagation from tpl_group_frames to 1.
for (frame_idx = tpl_group_frames - 1; frame_idx > 0; --frame_idx) {
+ if (gf_picture[frame_idx].update_type == USE_BUF_FRAME) continue;
mc_flow_dispenser(cpi, gf_picture, frame_idx, bsize);
}
#if CONFIG_NON_GREEDY_MV
@@ -6121,6 +6476,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
struct lookahead_entry *last_source = NULL;
struct lookahead_entry *source = NULL;
int arf_src_index;
+ const int gf_group_index = cpi->twopass.gf_group.index;
int i;
if (is_one_pass_cbr_svc(cpi)) {
@@ -6168,7 +6524,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
}
// Clear arf index stack before group of pictures processing starts.
- if (cpi->twopass.gf_group.index == 1) {
+ if (gf_group_index == 1) {
stack_init(cpi->twopass.gf_group.arf_index_stack, MAX_LAG_BUFFERS * 2);
cpi->twopass.gf_group.stack_size = 0;
}
@@ -6316,10 +6672,12 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
level_rc_framerate(cpi, arf_src_index);
if (cpi->oxcf.pass != 0 || cpi->use_svc || frame_is_intra_only(cm) == 1) {
- for (i = 0; i < MAX_REF_FRAMES; ++i) cpi->scaled_ref_idx[i] = INVALID_IDX;
+ for (i = 0; i < REFS_PER_FRAME; ++i) cpi->scaled_ref_idx[i] = INVALID_IDX;
}
- if (arf_src_index && cpi->sf.enable_tpl_model) {
+ if (gf_group_index == 1 &&
+ cpi->twopass.gf_group.update_type[gf_group_index] == ARF_UPDATE &&
+ cpi->sf.enable_tpl_model) {
vp9_estimate_qp_gop(cpi);
setup_tpl_stats(cpi);
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h
index 75f177fcc16..02814599d03 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h
@@ -291,7 +291,7 @@ typedef struct TplDepStats {
int_mv mv;
#if CONFIG_NON_GREEDY_MV
- int ready;
+ int ready[3];
double mv_dist[3];
double mv_cost[3];
int64_t inter_cost_arr[3];
@@ -311,6 +311,11 @@ typedef struct TplDepFrame {
int mi_rows;
int mi_cols;
int base_qindex;
+#if CONFIG_NON_GREEDY_MV
+ double lambda;
+ double mv_dist_sum[3];
+ double mv_cost_sum[3];
+#endif
} TplDepFrame;
#define TPL_DEP_COST_SCALE_LOG2 4
@@ -490,6 +495,23 @@ typedef struct ARNRFilterData {
struct scale_factors sf;
} ARNRFilterData;
+typedef struct EncFrameBuf {
+ int mem_valid;
+ int released;
+ YV12_BUFFER_CONFIG frame;
+} EncFrameBuf;
+
+// Maximum operating frame buffer size needed for a GOP using ARF reference.
+#define MAX_ARF_GOP_SIZE (2 * MAX_LAG_BUFFERS)
+#if CONFIG_NON_GREEDY_MV
+typedef struct FEATURE_SCORE_LOC {
+ int visited;
+ double feature_score;
+ int mi_row;
+ int mi_col;
+} FEATURE_SCORE_LOC;
+#endif
+
typedef struct VP9_COMP {
QUANTS quants;
ThreadData td;
@@ -513,8 +535,14 @@ typedef struct VP9_COMP {
#endif
YV12_BUFFER_CONFIG *raw_source_frame;
- TplDepFrame tpl_stats[MAX_LAG_BUFFERS];
- YV12_BUFFER_CONFIG *tpl_recon_frames[REFS_PER_FRAME + 1];
+ TplDepFrame tpl_stats[MAX_ARF_GOP_SIZE];
+ YV12_BUFFER_CONFIG *tpl_recon_frames[REF_FRAMES];
+ EncFrameBuf enc_frame_buf[REF_FRAMES];
+#if CONFIG_NON_GREEDY_MV
+ FEATURE_SCORE_LOC *feature_score_loc_arr;
+ FEATURE_SCORE_LOC **feature_score_loc_sort;
+ FEATURE_SCORE_LOC **feature_score_loc_heap;
+#endif
TileDataEnc *tile_data;
int allocated_tiles; // Keep track of memory allocated for tiles.
@@ -522,13 +550,12 @@ typedef struct VP9_COMP {
// For a still frame, this flag is set to 1 to skip partition search.
int partition_search_skippable_frame;
- int scaled_ref_idx[MAX_REF_FRAMES];
+ int scaled_ref_idx[REFS_PER_FRAME];
int lst_fb_idx;
int gld_fb_idx;
int alt_fb_idx;
int ref_fb_idx[REF_FRAMES];
- int last_show_frame_buf_idx; // last show frame buffer index
int refresh_last_frame;
int refresh_golden_frame;
@@ -600,6 +627,7 @@ typedef struct VP9_COMP {
ActiveMap active_map;
fractional_mv_step_fp *find_fractional_mv_step;
+ struct scale_factors me_sf;
vp9_diamond_search_fn_t diamond_search_sad;
vp9_variance_fn_ptr_t fn_ptr[BLOCK_SIZES];
uint64_t time_receive_data;
@@ -783,7 +811,7 @@ void vp9_change_config(VP9_COMP *cpi, const VP9EncoderConfig *oxcf);
// frame is made and not just a copy of the pointer..
int vp9_receive_raw_frame(VP9_COMP *cpi, vpx_enc_frame_flags_t frame_flags,
YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
- int64_t end_time_stamp);
+ int64_t end_time);
int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
size_t *size, uint8_t *dest, int64_t *time_stamp,
@@ -804,9 +832,11 @@ int vp9_set_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
int vp9_update_entropy(VP9_COMP *cpi, int update);
-int vp9_set_active_map(VP9_COMP *cpi, unsigned char *map, int rows, int cols);
+int vp9_set_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows,
+ int cols);
-int vp9_get_active_map(VP9_COMP *cpi, unsigned char *map, int rows, int cols);
+int vp9_get_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows,
+ int cols);
int vp9_set_internal_size(VP9_COMP *cpi, VPX_SCALING horiz_mode,
VPX_SCALING vert_mode);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c
index 58c3a435d9f..e29e86576d2 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c
@@ -49,9 +49,6 @@
#define MIN_DECAY_FACTOR 0.01
#define NEW_MV_MODE_PENALTY 32
#define DARK_THRESH 64
-#define DEFAULT_GRP_WEIGHT 1.0
-#define RC_FACTOR_MIN 0.75
-#define RC_FACTOR_MAX 1.75
#define SECTION_NOISE_DEF 250.0
#define LOW_I_THRESH 24000
@@ -1828,10 +1825,12 @@ static int detect_flash(const TWO_PASS *twopass, int offset) {
// brief break in prediction (such as a flash) but subsequent frames
// are reasonably well predicted by an earlier (pre flash) frame.
// The recovery after a flash is indicated by a high pcnt_second_ref
- // compared to pcnt_inter.
+ // useage or a second ref coded error notabley lower than the last
+ // frame coded error.
return next_frame != NULL &&
- next_frame->pcnt_second_ref > next_frame->pcnt_inter &&
- next_frame->pcnt_second_ref >= 0.5;
+ ((next_frame->sr_coded_error < next_frame->coded_error) ||
+ ((next_frame->pcnt_second_ref > next_frame->pcnt_inter) &&
+ (next_frame->pcnt_second_ref >= 0.5)));
}
// Update the motion related elements to the GF arf boost calculation.
@@ -2113,21 +2112,23 @@ static void find_arf_order(VP9_COMP *cpi, GF_GROUP *gf_group,
TWO_PASS *twopass = &cpi->twopass;
const FIRSTPASS_STATS *const start_pos = twopass->stats_in;
FIRSTPASS_STATS fpf_frame;
- const int mid = (start + end) >> 1;
- const int min_frame_interval = 3;
+ const int mid = (start + end + 1) >> 1;
+ const int min_frame_interval = 2;
int idx;
// Process regular P frames
if ((end - start < min_frame_interval) ||
- (depth > cpi->oxcf.enable_auto_arf)) {
- int idx;
- for (idx = start; idx < end; ++idx) {
+ (depth > gf_group->allowed_max_layer_depth)) {
+ for (idx = start; idx <= end; ++idx) {
gf_group->update_type[*index_counter] = LF_UPDATE;
gf_group->arf_src_offset[*index_counter] = 0;
+ gf_group->frame_gop_index[*index_counter] = idx;
gf_group->rf_level[*index_counter] = INTER_NORMAL;
gf_group->layer_depth[*index_counter] = depth;
+ gf_group->gfu_boost[*index_counter] = NORMAL_BOOST;
++(*index_counter);
}
+ gf_group->max_layer_depth = VPXMAX(gf_group->max_layer_depth, depth);
return;
}
@@ -2137,22 +2138,25 @@ static void find_arf_order(VP9_COMP *cpi, GF_GROUP *gf_group,
gf_group->layer_depth[*index_counter] = depth;
gf_group->update_type[*index_counter] = ARF_UPDATE;
gf_group->arf_src_offset[*index_counter] = mid - start;
+ gf_group->frame_gop_index[*index_counter] = mid;
gf_group->rf_level[*index_counter] = GF_ARF_LOW;
for (idx = 0; idx <= mid; ++idx)
if (EOF == input_stats(twopass, &fpf_frame)) break;
- gf_group->gfu_boost[*index_counter] = VPXMAX(
- MIN_ARF_GF_BOOST, calc_arf_boost(cpi, end - mid, mid - start) >> depth);
+ gf_group->gfu_boost[*index_counter] =
+ VPXMAX(MIN_ARF_GF_BOOST,
+ calc_arf_boost(cpi, end - mid + 1, mid - start) >> depth);
reset_fpf_position(twopass, start_pos);
++(*index_counter);
- find_arf_order(cpi, gf_group, index_counter, depth + 1, start, mid);
+ find_arf_order(cpi, gf_group, index_counter, depth + 1, start, mid - 1);
gf_group->update_type[*index_counter] = USE_BUF_FRAME;
gf_group->arf_src_offset[*index_counter] = 0;
+ gf_group->frame_gop_index[*index_counter] = mid;
gf_group->rf_level[*index_counter] = INTER_NORMAL;
gf_group->layer_depth[*index_counter] = depth;
++(*index_counter);
@@ -2167,6 +2171,7 @@ static INLINE void set_gf_overlay_frame_type(GF_GROUP *gf_group,
gf_group->update_type[frame_index] = OVERLAY_UPDATE;
gf_group->rf_level[frame_index] = INTER_NORMAL;
gf_group->layer_depth[frame_index] = MAX_ARF_LAYERS - 1;
+ gf_group->gfu_boost[frame_index] = NORMAL_BOOST;
} else {
gf_group->update_type[frame_index] = GF_UPDATE;
gf_group->rf_level[frame_index] = GF_ARF_STD;
@@ -2174,19 +2179,20 @@ static INLINE void set_gf_overlay_frame_type(GF_GROUP *gf_group,
}
}
-static int define_gf_group_structure(VP9_COMP *cpi) {
+static void define_gf_group_structure(VP9_COMP *cpi) {
RATE_CONTROL *const rc = &cpi->rc;
TWO_PASS *const twopass = &cpi->twopass;
GF_GROUP *const gf_group = &twopass->gf_group;
- int i;
int frame_index = 0;
- int key_frame;
- int normal_frames;
-
- key_frame = cpi->common.frame_type == KEY_FRAME;
+ int key_frame = cpi->common.frame_type == KEY_FRAME;
+ int layer_depth = 1;
+ int gop_frames =
+ rc->baseline_gf_interval - (key_frame || rc->source_alt_ref_pending);
gf_group->frame_start = cpi->common.current_video_frame;
- gf_group->frame_end = gf_group->frame_start + rc->baseline_gf_interval - 1;
+ gf_group->frame_end = gf_group->frame_start + rc->baseline_gf_interval;
+ gf_group->max_layer_depth = 0;
+ gf_group->allowed_max_layer_depth = 0;
// For key frames the frame target rate is already set and it
// is also the golden frame.
@@ -2200,55 +2206,24 @@ static int define_gf_group_structure(VP9_COMP *cpi) {
if (rc->source_alt_ref_pending) {
gf_group->update_type[frame_index] = ARF_UPDATE;
gf_group->rf_level[frame_index] = GF_ARF_STD;
- gf_group->layer_depth[frame_index] = 1;
+ gf_group->layer_depth[frame_index] = layer_depth;
gf_group->arf_src_offset[frame_index] =
(unsigned char)(rc->baseline_gf_interval - 1);
+ gf_group->frame_gop_index[frame_index] = rc->baseline_gf_interval;
+ gf_group->max_layer_depth = 1;
++frame_index;
+ ++layer_depth;
+ gf_group->allowed_max_layer_depth = cpi->oxcf.enable_auto_arf;
}
- if (rc->source_alt_ref_pending && cpi->multi_layer_arf) {
- find_arf_order(cpi, gf_group, &frame_index, 2, 0,
- rc->baseline_gf_interval - 1);
-
- set_gf_overlay_frame_type(gf_group, frame_index,
- rc->source_alt_ref_pending);
-
- gf_group->arf_src_offset[frame_index] = 0;
-
- return frame_index;
- }
-
- normal_frames =
- rc->baseline_gf_interval - (key_frame || rc->source_alt_ref_pending);
-
- for (i = 0; i < normal_frames; ++i) {
- if (twopass->stats_in >= twopass->stats_in_end) break;
-
- gf_group->update_type[frame_index] = LF_UPDATE;
- gf_group->rf_level[frame_index] = INTER_NORMAL;
- gf_group->arf_src_offset[frame_index] = 0;
- gf_group->layer_depth[frame_index] = MAX_ARF_LAYERS - 1;
-
- ++frame_index;
- }
-
- // Note:
- // We need to configure the frame at the end of the sequence + 1 that will be
- // the start frame for the next group. Otherwise prior to the call to
- // vp9_rc_get_second_pass_params() the data will be undefined.
+ find_arf_order(cpi, gf_group, &frame_index, layer_depth, 1, gop_frames);
set_gf_overlay_frame_type(gf_group, frame_index, rc->source_alt_ref_pending);
-
- if (rc->source_alt_ref_pending) {
- gf_group->update_type[frame_index] = OVERLAY_UPDATE;
- gf_group->rf_level[frame_index] = INTER_NORMAL;
- } else {
- gf_group->update_type[frame_index] = GF_UPDATE;
- gf_group->rf_level[frame_index] = GF_ARF_STD;
- }
gf_group->arf_src_offset[frame_index] = 0;
+ gf_group->frame_gop_index[frame_index] = rc->baseline_gf_interval;
- return frame_index;
+ // Set the frame ops number.
+ gf_group->gf_group_size = frame_index;
}
static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
@@ -2273,7 +2248,7 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
double this_frame_score = 1.0;
// Define the GF structure and specify
- int gop_frames = define_gf_group_structure(cpi);
+ int gop_frames = gf_group->gf_group_size;
key_frame = cpi->common.frame_type == KEY_FRAME;
@@ -2326,8 +2301,9 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
for (idx = 2; idx < MAX_ARF_LAYERS; ++idx) {
if (arf_depth_boost[idx] == 0) break;
- arf_depth_bits[idx] = calculate_boost_bits(
- rc->baseline_gf_interval, arf_depth_boost[idx], total_group_bits);
+ arf_depth_bits[idx] =
+ calculate_boost_bits(rc->baseline_gf_interval - total_arfs,
+ arf_depth_boost[idx], total_group_bits);
total_group_bits -= arf_depth_bits[idx];
total_arfs += arf_depth_count[idx];
@@ -2570,17 +2546,17 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
&next_frame, &this_frame_mv_in_out, &mv_in_out_accumulator,
&abs_mv_in_out_accumulator, &mv_ratio_accumulator);
+ // Monitor for static sections.
+ if ((rc->frames_since_key + i - 1) > 1) {
+ zero_motion_accumulator = VPXMIN(
+ zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame));
+ }
+
// Accumulate the effect of prediction quality decay.
if (!flash_detected) {
last_loop_decay_rate = loop_decay_rate;
loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame);
- // Monitor for static sections.
- if ((rc->frames_since_key + i - 1) > 1) {
- zero_motion_accumulator = VPXMIN(
- zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame));
- }
-
// Break clause to detect very still sections after motion. For example,
// a static image after a fade or other transition.
if (detect_transition_to_still(cpi, i, 5, loop_decay_rate,
@@ -2705,6 +2681,9 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Adjust KF group bits and error remaining.
twopass->kf_group_error_left -= gf_group_err;
+ // Decide GOP structure.
+ define_gf_group_structure(cpi);
+
// Allocate bits to each of the frames in the GF group.
allocate_gf_group_bits(cpi, gf_group_bits, gf_arf_bits);
@@ -2748,17 +2727,11 @@ static int slide_transition(const FIRSTPASS_STATS *this_frame,
(this_frame->coded_error > (next_frame->coded_error * ERROR_SPIKE));
}
-// Threshold for use of the lagging second reference frame. High second ref
-// usage may point to a transient event like a flash or occlusion rather than
-// a real scene cut.
-#define SECOND_REF_USEAGE_THRESH 0.1
// Minimum % intra coding observed in first pass (1.0 = 100%)
#define MIN_INTRA_LEVEL 0.25
-// Minimum ratio between the % of intra coding and inter coding in the first
-// pass after discounting neutral blocks (discounting neutral blocks in this
-// way helps catch scene cuts in clips with very flat areas or letter box
-// format clips with image padding.
-#define INTRA_VS_INTER_THRESH 2.0
+// Threshold for use of the lagging second reference frame. Scene cuts do not
+// usually have a high second ref useage.
+#define SECOND_REF_USEAGE_THRESH 0.125
// Hard threshold where the first pass chooses intra for almost all blocks.
// In such a case even if the frame is not a scene cut coding a key frame
// may be a good option.
@@ -2766,12 +2739,6 @@ static int slide_transition(const FIRSTPASS_STATS *this_frame,
// Maximum threshold for the relative ratio of intra error score vs best
// inter error score.
#define KF_II_ERR_THRESHOLD 2.5
-// In real scene cuts there is almost always a sharp change in the intra
-// or inter error score.
-#define ERR_CHANGE_THRESHOLD 0.4
-// For real scene cuts we expect an improvment in the intra inter error
-// ratio in the next frame.
-#define II_IMPROVEMENT_THRESHOLD 3.5
#define KF_II_MAX 128.0
#define II_FACTOR 12.5
// Test for very low intra complexity which could cause false key frames
@@ -2783,30 +2750,21 @@ static int test_candidate_kf(TWO_PASS *twopass,
const FIRSTPASS_STATS *next_frame) {
int is_viable_kf = 0;
double pcnt_intra = 1.0 - this_frame->pcnt_inter;
- double modified_pcnt_inter =
- this_frame->pcnt_inter - this_frame->pcnt_neutral;
// Does the frame satisfy the primary criteria of a key frame?
// See above for an explanation of the test criteria.
// If so, then examine how well it predicts subsequent frames.
- if ((this_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) &&
- (next_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) &&
+ if (!detect_flash(twopass, -1) && !detect_flash(twopass, 0) &&
+ (this_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) &&
((this_frame->pcnt_inter < VERY_LOW_INTER_THRESH) ||
(slide_transition(this_frame, last_frame, next_frame)) ||
- ((pcnt_intra > MIN_INTRA_LEVEL) &&
- (pcnt_intra > (INTRA_VS_INTER_THRESH * modified_pcnt_inter)) &&
+ (((this_frame->coded_error > (next_frame->coded_error * 1.1)) &&
+ (this_frame->coded_error > (last_frame->coded_error * 1.1))) &&
+ (pcnt_intra > MIN_INTRA_LEVEL) &&
+ ((pcnt_intra + this_frame->pcnt_neutral) > 0.5) &&
((this_frame->intra_error /
DOUBLE_DIVIDE_CHECK(this_frame->coded_error)) <
- KF_II_ERR_THRESHOLD) &&
- ((fabs(last_frame->coded_error - this_frame->coded_error) /
- DOUBLE_DIVIDE_CHECK(this_frame->coded_error) >
- ERR_CHANGE_THRESHOLD) ||
- (fabs(last_frame->intra_error - this_frame->intra_error) /
- DOUBLE_DIVIDE_CHECK(this_frame->intra_error) >
- ERR_CHANGE_THRESHOLD) ||
- ((next_frame->intra_error /
- DOUBLE_DIVIDE_CHECK(next_frame->coded_error)) >
- II_IMPROVEMENT_THRESHOLD))))) {
+ KF_II_ERR_THRESHOLD)))) {
int i;
const FIRSTPASS_STATS *start_pos = twopass->stats_in;
FIRSTPASS_STATS local_next_frame = *next_frame;
@@ -3247,9 +3205,9 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
FILE *fpfile;
fpfile = fopen("arf.stt", "a");
++arf_count;
- fprintf(fpfile, "%10d %10ld %10d %10d %10ld\n", cm->current_video_frame,
- rc->frames_till_gf_update_due, rc->kf_boost, arf_count,
- rc->gfu_boost);
+ fprintf(fpfile, "%10d %10ld %10d %10d %10ld %10ld\n",
+ cm->current_video_frame, rc->frames_till_gf_update_due,
+ rc->kf_boost, arf_count, rc->gfu_boost, cm->frame_type);
fclose(fpfile);
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.h
index b5f21eacb97..0807097ac1a 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.h
@@ -43,12 +43,6 @@ typedef struct {
#define INVALID_ROW -1
-// Length of the bi-predictive frame group (BFG)
-// NOTE: Currently each BFG contains one backward ref (BWF) frame plus a certain
-// number of bi-predictive frames.
-#define BFG_INTERVAL 2
-#define MAX_EXT_ARFS 2
-#define MIN_EXT_ARF_INTERVAL 4
#define MAX_ARF_LAYERS 6
typedef struct {
@@ -135,6 +129,7 @@ typedef struct {
FRAME_UPDATE_TYPE update_type[MAX_STATIC_GF_GROUP_LENGTH + 2];
unsigned char arf_src_offset[MAX_STATIC_GF_GROUP_LENGTH + 2];
unsigned char layer_depth[MAX_STATIC_GF_GROUP_LENGTH + 2];
+ unsigned char frame_gop_index[MAX_STATIC_GF_GROUP_LENGTH + 2];
int bit_allocation[MAX_STATIC_GF_GROUP_LENGTH + 2];
int gfu_boost[MAX_STATIC_GF_GROUP_LENGTH + 2];
@@ -144,6 +139,9 @@ typedef struct {
int arf_index_stack[MAX_LAG_BUFFERS * 2];
int top_arf_idx;
int stack_size;
+ int gf_group_size;
+ int max_layer_depth;
+ int allowed_max_layer_depth;
} GF_GROUP;
typedef struct {
@@ -200,7 +198,6 @@ struct ThreadData;
struct TileDataEnc;
void vp9_init_first_pass(struct VP9_COMP *cpi);
-void vp9_rc_get_first_pass_params(struct VP9_COMP *cpi);
void vp9_first_pass(struct VP9_COMP *cpi, const struct lookahead_entry *source);
void vp9_end_first_pass(struct VP9_COMP *cpi);
@@ -219,17 +216,6 @@ void vp9_twopass_postencode_update(struct VP9_COMP *cpi);
void calculate_coded_size(struct VP9_COMP *cpi, int *scaled_frame_width,
int *scaled_frame_height);
-static INLINE int get_number_of_extra_arfs(int interval, int arf_pending) {
- assert(MAX_EXT_ARFS > 0);
- if (arf_pending) {
- if (interval >= MIN_EXT_ARF_INTERVAL * (MAX_EXT_ARFS + 1))
- return MAX_EXT_ARFS;
- else if (interval >= MIN_EXT_ARF_INTERVAL * MAX_EXT_ARFS)
- return MAX_EXT_ARFS - 1;
- }
- return 0;
-}
-
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mbgraph.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mbgraph.c
index 2ec048b5314..831c79c1753 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mbgraph.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mbgraph.c
@@ -57,11 +57,12 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, const MV *ref_mv,
{
uint32_t distortion;
uint32_t sse;
+ // TODO(yunqing): may use higher tap interp filter than 2 taps if needed.
cpi->find_fractional_mv_step(
x, dst_mv, ref_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
&v_fn_ptr, 0, mv_sf->subpel_search_level,
cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0,
- 0);
+ 0, USE_2_TAPS);
}
xd->mi[0]->mode = NEWMV;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c
index 995c54fc74c..a2543035c59 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c
@@ -367,14 +367,12 @@ static void get_cost_surf_min(int *cost_list, int *ir, int *ic, int bits) {
*ir = (int)divide_and_round(x1 * b, y1);
}
-uint32_t vp9_skip_sub_pixel_tree(const MACROBLOCK *x, MV *bestmv,
- const MV *ref_mv, int allow_hp,
- int error_per_bit,
- const vp9_variance_fn_ptr_t *vfp,
- int forced_stop, int iters_per_step,
- int *cost_list, int *mvjcost, int *mvcost[2],
- uint32_t *distortion, uint32_t *sse1,
- const uint8_t *second_pred, int w, int h) {
+uint32_t vp9_skip_sub_pixel_tree(
+ const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
+ int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
+ int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
+ uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
+ int h, int use_accurate_subpel_search) {
SETUP_SUBPEL_SEARCH;
besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
src_stride, y, y_stride, second_pred, w, h,
@@ -397,6 +395,7 @@ uint32_t vp9_skip_sub_pixel_tree(const MACROBLOCK *x, MV *bestmv,
(void)sse;
(void)thismse;
(void)cost_list;
+ (void)use_accurate_subpel_search;
return besterr;
}
@@ -406,7 +405,7 @@ uint32_t vp9_find_best_sub_pixel_tree_pruned_evenmore(
int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
- int h) {
+ int h, int use_accurate_subpel_search) {
SETUP_SUBPEL_SEARCH;
besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
src_stride, y, y_stride, second_pred, w, h,
@@ -418,6 +417,7 @@ uint32_t vp9_find_best_sub_pixel_tree_pruned_evenmore(
(void)allow_hp;
(void)forced_stop;
(void)hstep;
+ (void)use_accurate_subpel_search;
if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
@@ -471,8 +471,10 @@ uint32_t vp9_find_best_sub_pixel_tree_pruned_more(
int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
- int h) {
+ int h, int use_accurate_subpel_search) {
SETUP_SUBPEL_SEARCH;
+ (void)use_accurate_subpel_search;
+
besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
src_stride, y, y_stride, second_pred, w, h,
offset, mvjcost, mvcost, sse1, distortion);
@@ -531,8 +533,10 @@ uint32_t vp9_find_best_sub_pixel_tree_pruned(
int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
- int h) {
+ int h, int use_accurate_subpel_search) {
SETUP_SUBPEL_SEARCH;
+ (void)use_accurate_subpel_search;
+
besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
src_stride, y, y_stride, second_pred, w, h,
offset, mvjcost, mvcost, sse1, distortion);
@@ -617,12 +621,119 @@ static const MV search_step_table[12] = {
};
/* clang-format on */
+static int accurate_sub_pel_search(
+ const MACROBLOCKD *xd, const MV *this_mv, const struct scale_factors *sf,
+ const InterpKernel *kernel, const vp9_variance_fn_ptr_t *vfp,
+ const uint8_t *const src_address, const int src_stride,
+ const uint8_t *const pre_address, int y_stride, const uint8_t *second_pred,
+ int w, int h, uint32_t *sse) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ uint64_t besterr;
+ assert(sf->x_step_q4 == 16 && sf->y_step_q4 == 16);
+ assert(w != 0 && h != 0);
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ DECLARE_ALIGNED(16, uint16_t, pred16[64 * 64]);
+ vp9_highbd_build_inter_predictor(CONVERT_TO_SHORTPTR(pre_address), y_stride,
+ pred16, w, this_mv, sf, w, h, 0, kernel,
+ MV_PRECISION_Q3, 0, 0, xd->bd);
+ if (second_pred != NULL) {
+ DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]);
+ vpx_highbd_comp_avg_pred(comp_pred16, CONVERT_TO_SHORTPTR(second_pred), w,
+ h, pred16, w);
+ besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src_address,
+ src_stride, sse);
+ } else {
+ besterr =
+ vfp->vf(CONVERT_TO_BYTEPTR(pred16), w, src_address, src_stride, sse);
+ }
+ } else {
+ DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]);
+ vp9_build_inter_predictor(pre_address, y_stride, pred, w, this_mv, sf, w, h,
+ 0, kernel, MV_PRECISION_Q3, 0, 0);
+ if (second_pred != NULL) {
+ DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
+ vpx_comp_avg_pred(comp_pred, second_pred, w, h, pred, w);
+ besterr = vfp->vf(comp_pred, w, src_address, src_stride, sse);
+ } else {
+ besterr = vfp->vf(pred, w, src_address, src_stride, sse);
+ }
+ }
+ if (besterr >= UINT_MAX) return UINT_MAX;
+ return (int)besterr;
+#else
+ int besterr;
+ DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]);
+ assert(sf->x_step_q4 == 16 && sf->y_step_q4 == 16);
+ assert(w != 0 && h != 0);
+ (void)xd;
+
+ vp9_build_inter_predictor(pre_address, y_stride, pred, w, this_mv, sf, w, h,
+ 0, kernel, MV_PRECISION_Q3, 0, 0);
+ if (second_pred != NULL) {
+ DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
+ vpx_comp_avg_pred(comp_pred, second_pred, w, h, pred, w);
+ besterr = vfp->vf(comp_pred, w, src_address, src_stride, sse);
+ } else {
+ besterr = vfp->vf(pred, w, src_address, src_stride, sse);
+ }
+ return besterr;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+}
+
+// TODO(yunqing): this part can be further refactored.
+#if CONFIG_VP9_HIGHBITDEPTH
+/* checks if (r, c) has better score than previous best */
+#define CHECK_BETTER1(v, r, c) \
+ if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
+ int64_t tmpmse; \
+ const MV mv = { r, c }; \
+ const MV ref_mv = { rr, rc }; \
+ thismse = \
+ accurate_sub_pel_search(xd, &mv, x->me_sf, kernel, vfp, z, src_stride, \
+ y, y_stride, second_pred, w, h, &sse); \
+ tmpmse = thismse; \
+ tmpmse += mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit); \
+ if (tmpmse >= INT_MAX) { \
+ v = INT_MAX; \
+ } else if ((v = (uint32_t)tmpmse) < besterr) { \
+ besterr = v; \
+ br = r; \
+ bc = c; \
+ *distortion = thismse; \
+ *sse1 = sse; \
+ } \
+ } else { \
+ v = INT_MAX; \
+ }
+#else
+/* checks if (r, c) has better score than previous best */
+#define CHECK_BETTER1(v, r, c) \
+ if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
+ const MV mv = { r, c }; \
+ const MV ref_mv = { rr, rc }; \
+ thismse = \
+ accurate_sub_pel_search(xd, &mv, x->me_sf, kernel, vfp, z, src_stride, \
+ y, y_stride, second_pred, w, h, &sse); \
+ if ((v = mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit) + \
+ thismse) < besterr) { \
+ besterr = v; \
+ br = r; \
+ bc = c; \
+ *distortion = thismse; \
+ *sse1 = sse; \
+ } \
+ } else { \
+ v = INT_MAX; \
+ }
+
+#endif
+
uint32_t vp9_find_best_sub_pixel_tree(
const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
- int h) {
+ int h, int use_accurate_subpel_search) {
const uint8_t *const z = x->plane[0].src.buf;
const uint8_t *const src_address = z;
const int src_stride = x->plane[0].src.stride;
@@ -650,6 +761,17 @@ uint32_t vp9_find_best_sub_pixel_tree(
int kr, kc;
MvLimits subpel_mv_limits;
+ // TODO(yunqing): need to add 4-tap filter optimization to speed up the
+ // encoder.
+ const InterpKernel *kernel =
+ (use_accurate_subpel_search > 0)
+ ? ((use_accurate_subpel_search == USE_4_TAPS)
+ ? vp9_filter_kernels[FOURTAP]
+ : ((use_accurate_subpel_search == USE_8_TAPS)
+ ? vp9_filter_kernels[EIGHTTAP]
+ : vp9_filter_kernels[EIGHTTAP_SHARP]))
+ : vp9_filter_kernels[BILINEAR];
+
vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv);
minc = subpel_mv_limits.col_min;
maxc = subpel_mv_limits.col_max;
@@ -674,16 +796,25 @@ uint32_t vp9_find_best_sub_pixel_tree(
tr = br + search_step[idx].row;
tc = bc + search_step[idx].col;
if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
- const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
MV this_mv;
this_mv.row = tr;
this_mv.col = tc;
- if (second_pred == NULL)
- thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address,
- src_stride, &sse);
- else
- thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
- src_address, src_stride, &sse, second_pred);
+
+ if (use_accurate_subpel_search) {
+ thismse = accurate_sub_pel_search(xd, &this_mv, x->me_sf, kernel, vfp,
+ src_address, src_stride, y,
+ y_stride, second_pred, w, h, &sse);
+ } else {
+ const uint8_t *const pre_address =
+ y + (tr >> 3) * y_stride + (tc >> 3);
+ if (second_pred == NULL)
+ thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
+ src_address, src_stride, &sse);
+ else
+ thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
+ src_address, src_stride, &sse, second_pred);
+ }
+
cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
mvcost, error_per_bit);
@@ -705,14 +836,21 @@ uint32_t vp9_find_best_sub_pixel_tree(
tc = bc + kc;
tr = br + kr;
if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
- const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
MV this_mv = { tr, tc };
- if (second_pred == NULL)
- thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address,
- src_stride, &sse);
- else
- thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr), src_address,
- src_stride, &sse, second_pred);
+ if (use_accurate_subpel_search) {
+ thismse = accurate_sub_pel_search(xd, &this_mv, x->me_sf, kernel, vfp,
+ src_address, src_stride, y, y_stride,
+ second_pred, w, h, &sse);
+ } else {
+ const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
+ if (second_pred == NULL)
+ thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address,
+ src_stride, &sse);
+ else
+ thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
+ src_address, src_stride, &sse, second_pred);
+ }
+
cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
error_per_bit);
@@ -743,20 +881,36 @@ uint32_t vp9_find_best_sub_pixel_tree(
if (tr == br && tc != bc) {
kc = bc - tc;
if (iters_per_step == 1) {
- CHECK_BETTER(second, br0, bc0 + kc);
+ if (use_accurate_subpel_search) {
+ CHECK_BETTER1(second, br0, bc0 + kc);
+ } else {
+ CHECK_BETTER(second, br0, bc0 + kc);
+ }
}
} else if (tr != br && tc == bc) {
kr = br - tr;
if (iters_per_step == 1) {
- CHECK_BETTER(second, br0 + kr, bc0);
+ if (use_accurate_subpel_search) {
+ CHECK_BETTER1(second, br0 + kr, bc0);
+ } else {
+ CHECK_BETTER(second, br0 + kr, bc0);
+ }
}
}
if (iters_per_step > 1) {
- CHECK_BETTER(second, br0 + kr, bc0);
- CHECK_BETTER(second, br0, bc0 + kc);
- if (br0 != br || bc0 != bc) {
- CHECK_BETTER(second, br0 + kr, bc0 + kc);
+ if (use_accurate_subpel_search) {
+ CHECK_BETTER1(second, br0 + kr, bc0);
+ CHECK_BETTER1(second, br0, bc0 + kc);
+ if (br0 != br || bc0 != bc) {
+ CHECK_BETTER1(second, br0 + kr, bc0 + kc);
+ }
+ } else {
+ CHECK_BETTER(second, br0 + kr, bc0);
+ CHECK_BETTER(second, br0, bc0 + kc);
+ if (br0 != br || bc0 != bc) {
+ CHECK_BETTER(second, br0 + kr, bc0 + kc);
+ }
}
}
}
@@ -781,6 +935,7 @@ uint32_t vp9_find_best_sub_pixel_tree(
}
#undef CHECK_BETTER
+#undef CHECK_BETTER1
static INLINE int check_bounds(const MvLimits *mv_limits, int row, int col,
int range) {
@@ -1578,9 +1733,10 @@ static int exhuastive_mesh_search(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
}
#if CONFIG_NON_GREEDY_MV
-static double nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs) {
+double av1_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs) {
int i;
- double best_cost = -1;
+ int update = 0;
+ double best_cost = 0;
vpx_clear_system_state();
for (i = 0; i < NB_MVS_NUM; ++i) {
if (nb_mvs[i].as_int != INVALID_MV) {
@@ -1589,18 +1745,15 @@ static double nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs) {
const double col_diff = mv->col - nb_mv.col;
double cost = row_diff * row_diff + col_diff * col_diff;
cost = log2(1 + cost);
- if (best_cost < 0) {
+ if (update == 0) {
best_cost = cost;
+ update = 1;
} else {
best_cost = cost < best_cost ? cost : best_cost;
}
}
}
- if (best_cost < 0) {
- return 0;
- } else {
- return best_cost;
- }
+ return best_cost;
}
double vp9_diamond_search_sad_new(const MACROBLOCK *x,
@@ -1646,7 +1799,7 @@ double vp9_diamond_search_sad_new(const MACROBLOCK *x,
// Check the starting position
*best_mv_dist = fn_ptr->sdf(what, what_stride, in_what, in_what_stride);
- *best_mv_cost = nb_mvs_inconsistency(best_full_mv, nb_full_mvs);
+ *best_mv_cost = av1_nb_mvs_inconsistency(best_full_mv, nb_full_mvs);
bestsad = (*best_mv_dist) + lambda * (*best_mv_cost);
i = 0;
@@ -1679,7 +1832,8 @@ double vp9_diamond_search_sad_new(const MACROBLOCK *x,
const MV this_mv = { best_full_mv->row + ss_mv[i].row,
best_full_mv->col + ss_mv[i].col };
const double mv_dist = sad_array[t];
- const double mv_cost = nb_mvs_inconsistency(&this_mv, nb_full_mvs);
+ const double mv_cost =
+ av1_nb_mvs_inconsistency(&this_mv, nb_full_mvs);
double thissad = mv_dist + lambda * mv_cost;
if (thissad < bestsad) {
bestsad = thissad;
@@ -1699,7 +1853,8 @@ double vp9_diamond_search_sad_new(const MACROBLOCK *x,
const uint8_t *const check_here = ss_os[i] + best_address;
const double mv_dist =
fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
- const double mv_cost = nb_mvs_inconsistency(&this_mv, nb_full_mvs);
+ const double mv_cost =
+ av1_nb_mvs_inconsistency(&this_mv, nb_full_mvs);
double thissad = mv_dist + lambda * mv_cost;
if (thissad < bestsad) {
bestsad = thissad;
@@ -2285,7 +2440,7 @@ double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv,
vpx_clear_system_state();
*best_mv_dist =
fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride);
- *best_mv_cost = nb_mvs_inconsistency(best_full_mv, nb_full_mvs);
+ *best_mv_cost = av1_nb_mvs_inconsistency(best_full_mv, nb_full_mvs);
best_sad = (*best_mv_dist) + lambda * (*best_mv_cost);
for (i = 0; i < search_range; i++) {
@@ -2307,7 +2462,7 @@ double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv,
const MV mv = { best_full_mv->row + neighbors[j].row,
best_full_mv->col + neighbors[j].col };
const double mv_dist = sads[j];
- const double mv_cost = nb_mvs_inconsistency(&mv, nb_full_mvs);
+ const double mv_cost = av1_nb_mvs_inconsistency(&mv, nb_full_mvs);
const double thissad = mv_dist + lambda * mv_cost;
if (thissad < best_sad) {
best_sad = thissad;
@@ -2325,7 +2480,7 @@ double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv,
const double mv_dist =
fn_ptr->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &mv), in_what->stride);
- const double mv_cost = nb_mvs_inconsistency(&mv, nb_full_mvs);
+ const double mv_cost = av1_nb_mvs_inconsistency(&mv, nb_full_mvs);
const double thissad = mv_dist + lambda * mv_cost;
if (thissad < best_sad) {
best_sad = thissad;
@@ -2587,7 +2742,8 @@ int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
(void)tc; \
(void)sse; \
(void)thismse; \
- (void)cost_list;
+ (void)cost_list; \
+ (void)use_accurate_subpel_search;
// Return the maximum MV.
uint32_t vp9_return_max_sub_pixel_mv(
@@ -2595,7 +2751,7 @@ uint32_t vp9_return_max_sub_pixel_mv(
int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
- int h) {
+ int h, int use_accurate_subpel_search) {
COMMON_MV_TEST;
(void)minr;
@@ -2617,7 +2773,7 @@ uint32_t vp9_return_min_sub_pixel_mv(
int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
- int h) {
+ int h, int use_accurate_subpel_search) {
COMMON_MV_TEST;
(void)maxr;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h
index adb02bc1abd..a159cb288ed 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h
@@ -59,7 +59,7 @@ struct SPEED_FEATURES;
int vp9_init_search_range(int size);
int vp9_refining_search_sad(const struct macroblock *x, struct mv *ref_mv,
- int sad_per_bit, int distance,
+ int error_per_bit, int search_range,
const struct vp9_variance_vtable *fn_ptr,
const struct mv *center_mv);
@@ -75,7 +75,7 @@ typedef uint32_t(fractional_mv_step_fp)(
int forced_stop, // 0 - full, 1 - qtr only, 2 - half only
int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
- int h);
+ int h, int use_accurate_subpel_search);
extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree;
extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned;
@@ -134,6 +134,8 @@ double vp9_full_pixel_diamond_new(const struct VP9_COMP *cpi, MACROBLOCK *x,
const vp9_variance_fn_ptr_t *fn_ptr,
const int_mv *nb_full_mvs,
struct TplDepStats *tpl_stats, int rf_idx);
+
+double av1_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs);
#endif // CONFIG_NON_GREEDY_MV
#ifdef __cplusplus
} // extern "C"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_noise_estimate.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_noise_estimate.c
index 249e03760fa..8c9a40f5586 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_noise_estimate.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_noise_estimate.c
@@ -148,7 +148,9 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
ne->last_h = cm->height;
}
return;
- } else if (cm->current_video_frame > 60 &&
+ } else if (frame_counter > 60 && cpi->svc.num_encoded_top_layer > 1 &&
+ cpi->rc.frames_since_key > cpi->svc.number_spatial_layers &&
+ cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1 &&
cpi->rc.avg_frame_low_motion < (low_res ? 70 : 50)) {
// Force noise estimation to 0 and denoiser off if content has high motion.
ne->level = kLowLow;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c
index 416d437e07d..1324b5bc8aa 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c
@@ -247,7 +247,8 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
x, &tmp_mv->as_mv, &ref_mv, cpi->common.allow_high_precision_mv,
x->errorperbit, &cpi->fn_ptr[bsize], subpel_force_stop,
cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list),
- x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0);
+ x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0,
+ cpi->sf.use_accurate_subpel_search);
*rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost,
x->mvcost, MV_COST_WEIGHT);
}
@@ -1539,7 +1540,8 @@ static int search_new_mv(VP9_COMP *cpi, MACROBLOCK *x,
cpi->common.allow_high_precision_mv, x->errorperbit,
&cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list),
- x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref_frame], NULL, 0, 0);
+ x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref_frame], NULL, 0, 0,
+ cpi->sf.use_accurate_subpel_search);
} else if (svc->use_base_mv && svc->spatial_layer_id) {
if (frame_mv[NEWMV][ref_frame].as_int != INVALID_MV) {
const int pre_stride = xd->plane[0].pre[0].stride;
@@ -1730,11 +1732,21 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
if (!cpi->use_svc ||
(svc->use_gf_temporal_ref_current_layer &&
!svc->layer_context[svc->temporal_layer_id].is_key_frame)) {
+ struct scale_factors *const sf_last = &cm->frame_refs[LAST_FRAME - 1].sf;
+ struct scale_factors *const sf_golden =
+ &cm->frame_refs[GOLDEN_FRAME - 1].sf;
gf_temporal_ref = 1;
- if (cpi->rc.avg_frame_low_motion > 70)
- thresh_svc_skip_golden = 500;
- else
- thresh_svc_skip_golden = 0;
+ // For temporal long term prediction, check that the golden reference
+ // is same scale as last reference, otherwise disable.
+ if ((sf_last->x_scale_fp != sf_golden->x_scale_fp) ||
+ (sf_last->y_scale_fp != sf_golden->y_scale_fp)) {
+ gf_temporal_ref = 0;
+ } else {
+ if (cpi->rc.avg_frame_low_motion > 70)
+ thresh_svc_skip_golden = 500;
+ else
+ thresh_svc_skip_golden = 0;
+ }
}
init_ref_frame_cost(cm, xd, ref_frame_cost);
@@ -2758,7 +2770,8 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int mi_row,
&cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list),
x->nmvjointcost, x->mvcost, &dummy_dist,
- &x->pred_sse[ref_frame], NULL, 0, 0);
+ &x->pred_sse[ref_frame], NULL, 0, 0,
+ cpi->sf.use_accurate_subpel_search);
xd->mi[0]->bmi[i].as_mv[0].as_mv = tmp_mv;
} else {
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c
index 76e310ac274..cdd824358cd 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c
@@ -247,20 +247,65 @@ int vp9_rc_clamp_iframe_target_size(const VP9_COMP *const cpi, int target) {
return target;
}
+// Update the buffer level before encoding with the per-frame-bandwidth,
+static void update_buffer_level_preencode(VP9_COMP *cpi) {
+ RATE_CONTROL *const rc = &cpi->rc;
+ rc->bits_off_target += rc->avg_frame_bandwidth;
+ // Clip the buffer level to the maximum specified buffer size.
+ rc->bits_off_target = VPXMIN(rc->bits_off_target, rc->maximum_buffer_size);
+ rc->buffer_level = rc->bits_off_target;
+}
+
+// Update the buffer level before encoding with the per-frame-bandwidth
+// for SVC. The current and all upper temporal layers are updated, needed
+// for the layered rate control which involves cumulative buffer levels for
+// the temporal layers. Allow for using the timestamp(pts) delta for the
+// framerate when the set_ref_frame_config is used.
+static void update_buffer_level_svc_preencode(VP9_COMP *cpi) {
+ SVC *const svc = &cpi->svc;
+ int i;
+ // Set this to 1 to use timestamp delta for "framerate" under
+ // ref_frame_config usage.
+ int use_timestamp = 1;
+ const int64_t ts_delta =
+ svc->time_stamp_superframe - svc->time_stamp_prev[svc->spatial_layer_id];
+ for (i = svc->temporal_layer_id; i < svc->number_temporal_layers; ++i) {
+ const int layer =
+ LAYER_IDS_TO_IDX(svc->spatial_layer_id, i, svc->number_temporal_layers);
+ LAYER_CONTEXT *const lc = &svc->layer_context[layer];
+ RATE_CONTROL *const lrc = &lc->rc;
+ if (use_timestamp && cpi->svc.use_set_ref_frame_config &&
+ svc->number_temporal_layers == 1 && ts_delta > 0 &&
+ svc->current_superframe > 0) {
+ // TODO(marpan): This may need to be modified for temporal layers.
+ const double framerate_pts = 10000000.0 / ts_delta;
+ lrc->bits_off_target += (int)(lc->target_bandwidth / framerate_pts);
+ } else {
+ lrc->bits_off_target += (int)(lc->target_bandwidth / lc->framerate);
+ }
+ // Clip buffer level to maximum buffer size for the layer.
+ lrc->bits_off_target =
+ VPXMIN(lrc->bits_off_target, lrc->maximum_buffer_size);
+ lrc->buffer_level = lrc->bits_off_target;
+ if (i == svc->temporal_layer_id) {
+ cpi->rc.bits_off_target = lrc->bits_off_target;
+ cpi->rc.buffer_level = lrc->buffer_level;
+ }
+ }
+}
+
// Update the buffer level for higher temporal layers, given the encoded current
// temporal layer.
-static void update_layer_buffer_level(SVC *svc, int encoded_frame_size) {
+static void update_layer_buffer_level_postencode(SVC *svc,
+ int encoded_frame_size) {
int i = 0;
- int current_temporal_layer = svc->temporal_layer_id;
+ const int current_temporal_layer = svc->temporal_layer_id;
for (i = current_temporal_layer + 1; i < svc->number_temporal_layers; ++i) {
const int layer =
LAYER_IDS_TO_IDX(svc->spatial_layer_id, i, svc->number_temporal_layers);
LAYER_CONTEXT *lc = &svc->layer_context[layer];
RATE_CONTROL *lrc = &lc->rc;
- int bits_off_for_this_layer =
- (int)(lc->target_bandwidth / lc->framerate - encoded_frame_size);
- lrc->bits_off_target += bits_off_for_this_layer;
-
+ lrc->bits_off_target -= encoded_frame_size;
// Clip buffer level to maximum buffer size for the layer.
lrc->bits_off_target =
VPXMIN(lrc->bits_off_target, lrc->maximum_buffer_size);
@@ -268,29 +313,13 @@ static void update_layer_buffer_level(SVC *svc, int encoded_frame_size) {
}
}
-// Update the buffer level: leaky bucket model.
-static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) {
- const VP9_COMMON *const cm = &cpi->common;
+// Update the buffer level after encoding with encoded frame size.
+static void update_buffer_level_postencode(VP9_COMP *cpi,
+ int encoded_frame_size) {
RATE_CONTROL *const rc = &cpi->rc;
-
- // On dropped frame, don't update buffer if its currently stable
- // (above optimal level). This can cause issues when full superframe
- // can drop (!= LAYER_DROP), since QP is adjusted downwards with buffer
- // overflow, which can cause more frame drops.
- if (cpi->svc.framedrop_mode != LAYER_DROP && encoded_frame_size == 0 &&
- rc->buffer_level > rc->optimal_buffer_level)
- return;
-
- // Non-viewable frames are a special case and are treated as pure overhead.
- if (!cm->show_frame) {
- rc->bits_off_target -= encoded_frame_size;
- } else {
- rc->bits_off_target += rc->avg_frame_bandwidth - encoded_frame_size;
- }
-
+ rc->bits_off_target -= encoded_frame_size;
// Clip the buffer level to the maximum specified buffer size.
rc->bits_off_target = VPXMIN(rc->bits_off_target, rc->maximum_buffer_size);
-
// For screen-content mode, and if frame-dropper is off, don't let buffer
// level go below threshold, given here as -rc->maximum_ buffer_size.
if (cpi->oxcf.content == VP9E_CONTENT_SCREEN &&
@@ -300,7 +329,7 @@ static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) {
rc->buffer_level = rc->bits_off_target;
if (is_one_pass_cbr_svc(cpi)) {
- update_layer_buffer_level(&cpi->svc, encoded_frame_size);
+ update_layer_buffer_level_postencode(&cpi->svc, encoded_frame_size);
}
}
@@ -363,6 +392,7 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
rc->high_source_sad = 0;
rc->reset_high_source_sad = 0;
rc->high_source_sad_lagindex = -1;
+ rc->high_num_blocks_with_motion = 0;
rc->hybrid_intra_scene_change = 0;
rc->re_encode_maxq_scene_change = 0;
rc->alt_ref_gf_group = 0;
@@ -398,6 +428,11 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
rc->max_gf_interval = vp9_rc_get_default_max_gf_interval(
oxcf->init_framerate, rc->min_gf_interval);
rc->baseline_gf_interval = (rc->min_gf_interval + rc->max_gf_interval) / 2;
+
+ rc->force_max_q = 0;
+ rc->last_post_encode_dropped_scene_change = 0;
+ rc->use_post_encode_drop = 0;
+ rc->ext_use_post_encode_drop = 0;
}
static int check_buffer_above_thresh(VP9_COMP *cpi, int drop_mark) {
@@ -515,6 +550,39 @@ static int drop_frame(VP9_COMP *cpi) {
}
}
+int post_encode_drop_screen_content(VP9_COMP *cpi, size_t *size) {
+ size_t frame_size = *size << 3;
+ int64_t new_buffer_level =
+ cpi->rc.buffer_level + cpi->rc.avg_frame_bandwidth - (int64_t)frame_size;
+
+ // For now we drop if new buffer level (given the encoded frame size) goes
+ // below 0.
+ if (new_buffer_level < 0) {
+ *size = 0;
+ vp9_rc_postencode_update_drop_frame(cpi);
+ // Update flag to use for next frame.
+ if (cpi->rc.high_source_sad ||
+ (cpi->use_svc && cpi->svc.high_source_sad_superframe))
+ cpi->rc.last_post_encode_dropped_scene_change = 1;
+ // Force max_q on next fame.
+ cpi->rc.force_max_q = 1;
+ cpi->rc.avg_frame_qindex[INTER_FRAME] = cpi->rc.worst_quality;
+ cpi->last_frame_dropped = 1;
+ cpi->ext_refresh_frame_flags_pending = 0;
+ if (cpi->use_svc) {
+ cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 1;
+ cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id] = 1;
+ cpi->svc.drop_count[cpi->svc.spatial_layer_id]++;
+ cpi->svc.skip_enhancement_layer = 1;
+ }
+ return 1;
+ }
+
+ cpi->rc.force_max_q = 0;
+ cpi->rc.last_post_encode_dropped_scene_change = 0;
+ return 0;
+}
+
int vp9_rc_drop_frame(VP9_COMP *cpi) {
SVC *svc = &cpi->svc;
int svc_prev_layer_dropped = 0;
@@ -834,7 +902,7 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) {
int active_worst_quality;
int ambient_qp;
unsigned int num_frames_weight_key = 5 * cpi->svc.number_temporal_layers;
- if (frame_is_intra_only(cm) || rc->reset_high_source_sad)
+ if (frame_is_intra_only(cm) || rc->reset_high_source_sad || rc->force_max_q)
return rc->worst_quality;
// For ambient_qp we use minimum of avg_frame_qindex[KEY_FRAME/INTER_FRAME]
// for the first few frames following key frame. These are both initialized
@@ -845,6 +913,7 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) {
? VPXMIN(rc->avg_frame_qindex[INTER_FRAME],
rc->avg_frame_qindex[KEY_FRAME])
: rc->avg_frame_qindex[INTER_FRAME];
+ active_worst_quality = VPXMIN(rc->worst_quality, (ambient_qp * 5) >> 2);
// For SVC if the current base spatial layer was key frame, use the QP from
// that base layer for ambient_qp.
if (cpi->use_svc && cpi->svc.spatial_layer_id > 0) {
@@ -854,9 +923,9 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) {
if (lc->is_key_frame) {
const RATE_CONTROL *lrc = &lc->rc;
ambient_qp = VPXMIN(ambient_qp, lrc->last_q[KEY_FRAME]);
+ active_worst_quality = VPXMIN(rc->worst_quality, (ambient_qp * 9) >> 3);
}
}
- active_worst_quality = VPXMIN(rc->worst_quality, ambient_qp * 5 >> 2);
if (rc->buffer_level > rc->optimal_buffer_level) {
// Adjust down.
// Maximum limit for down adjustment ~30%; make it lower for screen content.
@@ -1216,10 +1285,16 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index,
ASSIGN_MINQ_TABLE(cm->bit_depth, inter_minq);
if (frame_is_intra_only(cm)) {
- // Handle the special case for key frames forced when we have reached
- // the maximum key frame interval. Here force the Q to a range
- // based on the ambient Q to reduce the risk of popping.
- if (rc->this_key_frame_forced) {
+ if (rc->frames_to_key == 1 && oxcf->rc_mode == VPX_Q) {
+ // If the next frame is also a key frame or the current frame is the
+ // only frame in the sequence in AOM_Q mode, just use the cq_level
+ // as q.
+ active_best_quality = cq_level;
+ active_worst_quality = cq_level;
+ } else if (rc->this_key_frame_forced) {
+ // Handle the special case for key frames forced when we have reached
+ // the maximum key frame interval. Here force the Q to a range
+ // based on the ambient Q to reduce the risk of popping.
double last_boosted_q;
int delta_qindex;
int qindex;
@@ -1289,6 +1364,16 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index,
// Constrained quality use slightly lower active best.
active_best_quality = active_best_quality * 15 / 16;
+ // Modify best quality for second level arfs. For mode VPX_Q this
+ // becomes the baseline frame q.
+ if (gf_group->rf_level[gf_group_index] == GF_ARF_LOW) {
+ const int layer_depth = gf_group->layer_depth[gf_group_index];
+ // linearly fit the frame q depending on the layer depth index from
+ // the base layer ARF.
+ active_best_quality =
+ ((layer_depth - 1) * q + active_best_quality + layer_depth / 2) /
+ layer_depth;
+ }
} else if (oxcf->rc_mode == VPX_Q) {
if (!cpi->refresh_alt_ref_frame) {
active_best_quality = cq_level;
@@ -1297,8 +1382,14 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index,
// Modify best quality for second level arfs. For mode VPX_Q this
// becomes the baseline frame q.
- if (gf_group->rf_level[gf_group_index] == GF_ARF_LOW)
- active_best_quality = (active_best_quality + cq_level + 1) / 2;
+ if (gf_group->rf_level[gf_group_index] == GF_ARF_LOW) {
+ const int layer_depth = gf_group->layer_depth[gf_group_index];
+ // linearly fit the frame q depending on the layer depth index from
+ // the base layer ARF.
+ active_best_quality = ((layer_depth - 1) * cq_level +
+ active_best_quality + layer_depth / 2) /
+ layer_depth;
+ }
}
} else {
active_best_quality = get_gf_active_quality(cpi, q, cm->bit_depth);
@@ -1475,12 +1566,14 @@ void vp9_configure_buffer_updates(VP9_COMP *cpi, int gf_group_index) {
}
void vp9_estimate_qp_gop(VP9_COMP *cpi) {
- int gop_length = cpi->rc.baseline_gf_interval;
+ int gop_length = cpi->twopass.gf_group.gf_group_size;
int bottom_index, top_index;
int idx;
const int gf_index = cpi->twopass.gf_group.index;
+ const int is_src_frame_alt_ref = cpi->rc.is_src_frame_alt_ref;
+ const int refresh_frame_context = cpi->common.refresh_frame_context;
- for (idx = 1; idx <= gop_length + 1 && idx < MAX_LAG_BUFFERS; ++idx) {
+ for (idx = 1; idx <= gop_length; ++idx) {
TplDepFrame *tpl_frame = &cpi->tpl_stats[idx];
int target_rate = cpi->twopass.gf_group.bit_allocation[idx];
cpi->twopass.gf_group.index = idx;
@@ -1492,6 +1585,8 @@ void vp9_estimate_qp_gop(VP9_COMP *cpi) {
}
// Reset the actual index and frame update
cpi->twopass.gf_group.index = gf_index;
+ cpi->rc.is_src_frame_alt_ref = is_src_frame_alt_ref;
+ cpi->common.refresh_frame_context = refresh_frame_context;
vp9_configure_buffer_updates(cpi, gf_index);
}
@@ -1672,7 +1767,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
}
if (frame_is_intra_only(cm)) rc->last_kf_qindex = qindex;
- update_buffer_level(cpi, rc->projected_frame_size);
+ update_buffer_level_postencode(cpi, rc->projected_frame_size);
// Rolling monitors of whether we are over or underspending used to help
// regulate min and Max Q in two pass.
@@ -1769,14 +1864,20 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
}
void vp9_rc_postencode_update_drop_frame(VP9_COMP *cpi) {
- // Update buffer level with zero size, update frame counters, and return.
- update_buffer_level(cpi, 0);
cpi->common.current_video_frame++;
cpi->rc.frames_since_key++;
cpi->rc.frames_to_key--;
cpi->rc.rc_2_frame = 0;
cpi->rc.rc_1_frame = 0;
cpi->rc.last_avg_frame_bandwidth = cpi->rc.avg_frame_bandwidth;
+ // For SVC on dropped frame when framedrop_mode != LAYER_DROP:
+ // in this mode the whole superframe may be dropped if only a single layer
+ // has buffer underflow (below threshold). Since this can then lead to
+ // increasing buffer levels/overflow for certain layers even though whole
+ // superframe is dropped, we cap buffer level if its already stable.
+ if (cpi->use_svc && cpi->svc.framedrop_mode != LAYER_DROP &&
+ cpi->rc.buffer_level > cpi->rc.optimal_buffer_level)
+ cpi->rc.buffer_level = cpi->rc.optimal_buffer_level;
}
static int calc_pframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) {
@@ -1822,10 +1923,9 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
int target;
- // TODO(yaowu): replace the "auto_key && 0" below with proper decision logic.
if (!cpi->refresh_alt_ref_frame &&
(cm->current_video_frame == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY) ||
- rc->frames_to_key == 0 || (cpi->oxcf.auto_key && 0))) {
+ rc->frames_to_key == 0)) {
cm->frame_type = KEY_FRAME;
rc->this_key_frame_forced =
cm->current_video_frame != 0 && rc->frames_to_key == 0;
@@ -2031,7 +2131,7 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) {
cm->frame_type = KEY_FRAME;
rc->source_alt_ref_active = 0;
if (is_one_pass_cbr_svc(cpi)) {
- if (cm->current_video_frame > 0) vp9_svc_reset_key_frame(cpi);
+ if (cm->current_video_frame > 0) vp9_svc_reset_temporal_layers(cpi, 1);
layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, svc->temporal_layer_id,
svc->number_temporal_layers);
svc->layer_context[layer].is_key_frame = 1;
@@ -2110,15 +2210,15 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) {
vp9_cyclic_refresh_update_parameters(cpi);
vp9_rc_set_frame_target(cpi, target);
+ if (cm->show_frame) update_buffer_level_svc_preencode(cpi);
}
void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
int target;
- // TODO(yaowu): replace the "auto_key && 0" below with proper decision logic.
if ((cm->current_video_frame == 0) || (cpi->frame_flags & FRAMEFLAGS_KEY) ||
- rc->frames_to_key == 0 || (cpi->oxcf.auto_key && 0)) {
+ rc->frames_to_key == 0) {
cm->frame_type = KEY_FRAME;
rc->frames_to_key = cpi->oxcf.key_freq;
rc->kf_boost = DEFAULT_KF_BOOST;
@@ -2151,6 +2251,9 @@ void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) {
target = calc_pframe_target_size_one_pass_cbr(cpi);
vp9_rc_set_frame_target(cpi, target);
+
+ if (cm->show_frame) update_buffer_level_preencode(cpi);
+
if (cpi->oxcf.resize_mode == RESIZE_DYNAMIC)
cpi->resize_pending = vp9_resize_one_pass_cbr(cpi);
else
@@ -2654,8 +2757,11 @@ void vp9_scene_detection_onepass(VP9_COMP *cpi) {
if (cm->use_highbitdepth) return;
#endif
rc->high_source_sad = 0;
- if (cpi->svc.spatial_layer_id == 0 && src_width == last_src_width &&
- src_height == last_src_height) {
+ rc->high_num_blocks_with_motion = 0;
+ // For SVC: scene detection is only checked on first spatial layer of
+ // the superframe using the original/unscaled resolutions.
+ if (cpi->svc.spatial_layer_id == cpi->svc.first_spatial_layer_to_encode &&
+ src_width == last_src_width && src_height == last_src_height) {
YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS] = { NULL };
int num_mi_cols = cm->mi_cols;
int num_mi_rows = cm->mi_rows;
@@ -2772,6 +2878,8 @@ void vp9_scene_detection_onepass(VP9_COMP *cpi) {
} else {
rc->avg_source_sad[lagframe_idx] = avg_sad;
}
+ if (num_zero_temp_sad < (num_samples >> 1))
+ rc->high_num_blocks_with_motion = 1;
}
}
// For CBR non-screen content mode, check if we should reset the rate
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.h
index 3b441bf1f50..16aa08137ee 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.h
@@ -175,6 +175,7 @@ typedef struct {
uint64_t avg_source_sad[MAX_LAG_BUFFERS];
uint64_t prev_avg_source_sad_lag;
int high_source_sad_lagindex;
+ int high_num_blocks_with_motion;
int alt_ref_gf_group;
int last_frame_is_src_altref;
int high_source_sad;
@@ -186,6 +187,14 @@ typedef struct {
int force_qpmin;
int reset_high_source_sad;
double perc_arf_usage;
+ int force_max_q;
+ // Last frame was dropped post encode on scene change.
+ int last_post_encode_dropped_scene_change;
+ // Enable post encode frame dropping for screen content. Only enabled when
+ // ext_use_post_encode_drop is enabled by user.
+ int use_post_encode_drop;
+ // External flag to enable post encode frame dropping, controlled by user.
+ int ext_use_post_encode_drop;
} RATE_CONTROL;
struct VP9_COMP;
@@ -194,7 +203,7 @@ struct VP9EncoderConfig;
void vp9_rc_init(const struct VP9EncoderConfig *oxcf, int pass,
RATE_CONTROL *rc);
-int vp9_estimate_bits_at_q(FRAME_TYPE frame_kind, int q, int mbs,
+int vp9_estimate_bits_at_q(FRAME_TYPE frame_type, int q, int mbs,
double correction_factor, vpx_bit_depth_t bit_depth);
double vp9_convert_qindex_to_q(int qindex, vpx_bit_depth_t bit_depth);
@@ -205,9 +214,9 @@ void vp9_rc_init_minq_luts(void);
int vp9_rc_get_default_min_gf_interval(int width, int height, double framerate);
// Note vp9_rc_get_default_max_gf_interval() requires the min_gf_interval to
-// be passed in to ensure that the max_gf_interval returned is at least as bis
+// be passed in to ensure that the max_gf_interval returned is at least as big
// as that.
-int vp9_rc_get_default_max_gf_interval(double framerate, int min_frame_rate);
+int vp9_rc_get_default_max_gf_interval(double framerate, int min_gf_interval);
// Generally at the high level, the following flow is expected
// to be enforced for rate control:
@@ -247,13 +256,16 @@ void vp9_rc_postencode_update_drop_frame(struct VP9_COMP *cpi);
// Changes only the rate correction factors in the rate control structure.
void vp9_rc_update_rate_correction_factors(struct VP9_COMP *cpi);
+// Post encode drop for CBR screen-content mode.
+int post_encode_drop_screen_content(struct VP9_COMP *cpi, size_t *size);
+
// Decide if we should drop this frame: For 1-pass CBR.
// Changes only the decimation count in the rate control structure
int vp9_rc_drop_frame(struct VP9_COMP *cpi);
// Computes frame size bounds.
void vp9_rc_compute_frame_size_bounds(const struct VP9_COMP *cpi,
- int this_frame_target,
+ int frame_target,
int *frame_under_shoot_limit,
int *frame_over_shoot_limit);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.c
index 2e4a4fe9fa2..8323f3af4ee 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.c
@@ -173,69 +173,61 @@ static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12,
static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = { 128, 144, 128,
128, 144, 144 };
-int64_t vp9_compute_rd_mult_based_on_qindex(const VP9_COMP *cpi, int qindex) {
- const int64_t q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth);
+int vp9_compute_rd_mult_based_on_qindex(const VP9_COMP *cpi, int qindex) {
+ // largest dc_quant is 21387, therefore rdmult should always fit in uint32_t
+ // i.e. 21387 * 21387 * 8 = 3659230152 = 0xDA1B6BC8
+ const int q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth);
+ uint32_t rdmult = q * q;
+
+ if (cpi->common.frame_type != KEY_FRAME) {
+ rdmult = rdmult * 3 + (rdmult * 2 / 3);
+ } else {
+ if (qindex < 64)
+ rdmult = rdmult * 4;
+ else if (qindex <= 128)
+ rdmult = rdmult * 3 + rdmult / 2;
+ else if (qindex < 190)
+ rdmult = rdmult * 4 + rdmult / 2;
+ else
+ rdmult = rdmult * 7 + rdmult / 2;
+ }
#if CONFIG_VP9_HIGHBITDEPTH
- int64_t rdmult = 0;
switch (cpi->common.bit_depth) {
- case VPX_BITS_8: rdmult = 88 * q * q / 24; break;
- case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 4); break;
- default:
- assert(cpi->common.bit_depth == VPX_BITS_12);
- rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8);
- break;
+ case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break;
+ case VPX_BITS_12: rdmult = ROUND_POWER_OF_TWO(rdmult, 8); break;
+ default: break;
}
-#else
- int64_t rdmult = 88 * q * q / 24;
#endif // CONFIG_VP9_HIGHBITDEPTH
- return rdmult;
+ return rdmult > 0 ? rdmult : 1;
}
-int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
- int64_t rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, qindex);
-
+static int modulate_rdmult(const VP9_COMP *cpi, int rdmult) {
+ int64_t rdmult_64 = rdmult;
if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
- const int boost_index = VPXMIN(15, (cpi->rc.gfu_boost / 100));
+ const int gfu_boost = cpi->multi_layer_arf
+ ? gf_group->gfu_boost[gf_group->index]
+ : cpi->rc.gfu_boost;
+ const int boost_index = VPXMIN(15, (gfu_boost / 100));
- rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7;
- rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
+ rdmult_64 = (rdmult_64 * rd_frame_type_factor[frame_type]) >> 7;
+ rdmult_64 += ((rdmult_64 * rd_boost_factor[boost_index]) >> 7);
}
- if (rdmult < 1) rdmult = 1;
- return (int)rdmult;
+ return (int)rdmult_64;
}
-int vp9_get_adaptive_rdmult(const VP9_COMP *cpi, double beta) {
- const VP9_COMMON *cm = &cpi->common;
- int64_t q = vp9_dc_quant(cm->base_qindex, 0, cpi->common.bit_depth);
-
-#if CONFIG_VP9_HIGHBITDEPTH
- int64_t rdmult = 0;
- switch (cpi->common.bit_depth) {
- case VPX_BITS_8: rdmult = (int)((88 * q * q / beta) / 24); break;
- case VPX_BITS_10:
- rdmult = ROUND_POWER_OF_TWO((int)((88 * q * q / beta) / 24), 4);
- break;
- default:
- assert(cpi->common.bit_depth == VPX_BITS_12);
- rdmult = ROUND_POWER_OF_TWO((int)((88 * q * q / beta) / 24), 8);
- break;
- }
-#else
- int64_t rdmult = (int)((88 * q * q / beta) / 24);
-#endif // CONFIG_VP9_HIGHBITDEPTH
-
- if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
- const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
- const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
- const int boost_index = VPXMIN(15, (cpi->rc.gfu_boost / 100));
+int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
+ int rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, qindex);
+ return modulate_rdmult(cpi, rdmult);
+}
- rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7;
- rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
- }
- if (rdmult < 1) rdmult = 1;
- return (int)rdmult;
+int vp9_get_adaptive_rdmult(const VP9_COMP *cpi, double beta) {
+ int rdmult =
+ vp9_compute_rd_mult_based_on_qindex(cpi, cpi->common.base_qindex);
+ rdmult = (int)((double)rdmult / beta);
+ rdmult = rdmult > 0 ? rdmult : 1;
+ return modulate_rdmult(cpi, rdmult);
}
static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
@@ -631,6 +623,7 @@ YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
const VP9_COMMON *const cm = &cpi->common;
const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame);
+ assert(ref_frame >= LAST_FRAME && ref_frame <= ALTREF_FRAME);
return (scaled_idx != ref_idx && scaled_idx != INVALID_IDX)
? &cm->buffer_pool->frame_bufs[scaled_idx].buf
: NULL;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h
index f2fc776a4aa..fa85f2176f5 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h
@@ -134,8 +134,7 @@ struct TileDataEnc;
struct VP9_COMP;
struct macroblock;
-int64_t vp9_compute_rd_mult_based_on_qindex(const struct VP9_COMP *cpi,
- int qindex);
+int vp9_compute_rd_mult_based_on_qindex(const struct VP9_COMP *cpi, int qindex);
int vp9_compute_rd_mult(const struct VP9_COMP *cpi, int qindex);
@@ -145,7 +144,7 @@ void vp9_initialize_rd_consts(struct VP9_COMP *cpi);
void vp9_initialize_me_consts(struct VP9_COMP *cpi, MACROBLOCK *x, int qindex);
-void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n,
+void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2,
unsigned int qstep, int *rate, int64_t *dist);
void vp9_model_rd_from_var_lapndz_vec(unsigned int var[MAX_MB_PLANE],
@@ -176,8 +175,8 @@ void vp9_set_rd_speed_thresholds(struct VP9_COMP *cpi);
void vp9_set_rd_speed_thresholds_sub8x8(struct VP9_COMP *cpi);
-void vp9_update_rd_thresh_fact(int (*fact)[MAX_MODES], int rd_thresh, int bsize,
- int best_mode_index);
+void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh,
+ int bsize, int best_mode_index);
static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh,
const int *const thresh_fact) {
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c
index 698faa343bb..9cde479cd6f 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c
@@ -1821,7 +1821,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
x, &tmp_mv, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv,
x->errorperbit, &cpi->fn_ptr[bsize], 0,
cpi->sf.mv.subpel_search_level, NULL, x->nmvjointcost, x->mvcost,
- &dis, &sse, second_pred, pw, ph);
+ &dis, &sse, second_pred, pw, ph, cpi->sf.use_accurate_subpel_search);
}
// Restore the pointer to the first (possibly scaled) prediction buffer.
@@ -1875,6 +1875,8 @@ static int64_t rd_pick_best_sub8x8_mode(
const BLOCK_SIZE bsize = mi->sb_type;
const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
+ const int pw = num_4x4_blocks_wide << 2;
+ const int ph = num_4x4_blocks_high << 2;
ENTROPY_CONTEXT t_above[2], t_left[2];
int subpelmv = 1, have_ref = 0;
SPEED_FEATURES *const sf = &cpi->sf;
@@ -2011,7 +2013,8 @@ static int64_t rd_pick_best_sub8x8_mode(
x->errorperbit, &cpi->fn_ptr[bsize], sf->mv.subpel_force_stop,
sf->mv.subpel_search_level, cond_cost_list(cpi, cost_list),
x->nmvjointcost, x->mvcost, &distortion,
- &x->pred_sse[mi->ref_frame[0]], NULL, 0, 0);
+ &x->pred_sse[mi->ref_frame[0]], NULL, pw, ph,
+ cpi->sf.use_accurate_subpel_search);
// save motion search result for use in compound prediction
seg_mvs[i][mi->ref_frame[0]].as_mv = *new_mv;
@@ -2330,6 +2333,8 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
const int best_predmv_idx = x->mv_best_ref_index[ref];
const YV12_BUFFER_CONFIG *scaled_ref_frame =
vp9_get_scaled_ref_frame(cpi, ref);
+ const int pw = num_4x4_blocks_wide_lookup[bsize] << 2;
+ const int ph = num_4x4_blocks_high_lookup[bsize] << 2;
MV pred_mv[3];
pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv;
@@ -2452,7 +2457,8 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
x, &tmp_mv->as_mv, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
&cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list),
- x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0);
+ x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, pw, ph,
+ cpi->sf.use_accurate_subpel_search);
}
*rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost,
x->mvcost, MV_COST_WEIGHT);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_resize.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_resize.c
index 6ac77aeef28..23a320ae553 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_resize.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_resize.c
@@ -424,11 +424,11 @@ void vp9_resize_plane(const uint8_t *const input, int height, int width,
int in_stride, uint8_t *output, int height2, int width2,
int out_stride) {
int i;
- uint8_t *intbuf = (uint8_t *)malloc(sizeof(uint8_t) * width2 * height);
+ uint8_t *intbuf = (uint8_t *)calloc(width2 * height, sizeof(*intbuf));
uint8_t *tmpbuf =
- (uint8_t *)malloc(sizeof(uint8_t) * (width < height ? height : width));
- uint8_t *arrbuf = (uint8_t *)malloc(sizeof(uint8_t) * height);
- uint8_t *arrbuf2 = (uint8_t *)malloc(sizeof(uint8_t) * height2);
+ (uint8_t *)calloc(width < height ? height : width, sizeof(*tmpbuf));
+ uint8_t *arrbuf = (uint8_t *)calloc(height, sizeof(*arrbuf));
+ uint8_t *arrbuf2 = (uint8_t *)calloc(height2, sizeof(*arrbuf2));
if (intbuf == NULL || tmpbuf == NULL || arrbuf == NULL || arrbuf2 == NULL)
goto Error;
assert(width > 0);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c
index 44909239d32..9b6c69a73fd 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c
@@ -116,17 +116,13 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi,
sf->ml_partition_search_breakout_thresh[1] = -1.0f;
sf->ml_partition_search_breakout_thresh[2] = -1.0f;
}
-
#if CONFIG_VP9_HIGHBITDEPTH
if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH) {
- sf->use_square_only_thresh_high = BLOCK_4X4;
- sf->use_square_only_thresh_low = BLOCK_SIZES;
- if (is_720p_or_larger) {
- sf->partition_search_breakout_thr.dist = (1 << 23);
- sf->use_ml_partition_search_breakout = 0;
- }
+ sf->ml_partition_search_breakout_thresh[0] -= 1.0f;
+ sf->ml_partition_search_breakout_thresh[1] -= 1.0f;
+ sf->ml_partition_search_breakout_thresh[2] -= 1.0f;
}
-#endif
+#endif // CONFIG_VP9_HIGHBITDEPTH
}
if (speed >= 2) {
@@ -242,14 +238,10 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
if (speed >= 1) {
sf->enable_tpl_model = 0;
- sf->ml_var_partition_pruning = 0;
+ sf->ml_var_partition_pruning = !boosted;
sf->ml_prune_rect_partition_threhold[1] = 200;
sf->ml_prune_rect_partition_threhold[2] = 200;
sf->ml_prune_rect_partition_threhold[3] = 200;
-#if CONFIG_VP9_HIGHBITDEPTH
- if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH)
- sf->prune_ref_frame_for_rect_partitions = 0;
-#endif // CONFIG_VP9_HIGHBITDEPTH
if (oxcf->pass == 2) {
TWO_PASS *const twopass = &cpi->twopass;
@@ -288,9 +280,11 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
sf->exhaustive_searches_thresh =
(cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ? (1 << 23)
: INT_MAX;
+ sf->use_accurate_subpel_search = USE_4_TAPS;
}
if (speed >= 2) {
+ sf->ml_var_partition_pruning = 0;
if (oxcf->vbr_corpus_complexity)
sf->recode_loop = ALLOW_RECODE_FIRST;
else
@@ -328,6 +322,8 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
good_quality_mesh_patterns[mesh_density_level][i].interval;
}
}
+
+ sf->use_accurate_subpel_search = USE_2_TAPS;
}
if (speed >= 3) {
@@ -450,6 +446,7 @@ static void set_rt_speed_feature_framesize_independent(
sf->disable_golden_ref = 0;
sf->enable_tpl_model = 0;
sf->enhanced_full_pixel_motion_search = 0;
+ sf->use_accurate_subpel_search = USE_2_TAPS;
if (speed >= 1) {
sf->allow_txfm_domain_distortion = 1;
@@ -565,6 +562,16 @@ static void set_rt_speed_feature_framesize_independent(
(frames_since_key % (sf->last_partitioning_redo_frequency << 1) == 1);
sf->max_delta_qindex = is_keyframe ? 20 : 15;
sf->partition_search_type = REFERENCE_PARTITION;
+#if CONFIG_ML_VAR_PARTITION
+ if (!frame_is_intra_only(cm) && cm->width >= 360 && cm->height >= 360)
+ sf->partition_search_type = ML_BASED_PARTITION;
+ else
+ sf->partition_search_type = REFERENCE_PARTITION;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH)
+ sf->partition_search_type = REFERENCE_PARTITION;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#endif // CONFIG_ML_VAR_PARTITION
if (cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.lag_in_frames > 0 &&
cpi->rc.is_src_frame_alt_ref) {
sf->partition_search_type = VAR_BASED_PARTITION;
@@ -626,9 +633,7 @@ static void set_rt_speed_feature_framesize_independent(
sf->use_compound_nonrd_pickmode = 1;
}
#if CONFIG_ML_VAR_PARTITION
- if (!frame_is_intra_only(cm) && cm->width >= 360 && cm->height >= 360)
- sf->partition_search_type = ML_BASED_PARTITION;
- else
+ if (frame_is_intra_only(cm) || cm->width < 360 || cm->height < 360)
sf->partition_search_type = VAR_BASED_PARTITION;
#if CONFIG_VP9_HIGHBITDEPTH
if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH)
@@ -705,6 +710,7 @@ static void set_rt_speed_feature_framesize_independent(
// For SVC: enable use of lower resolution partition for higher resolution,
// only for 3 spatial layers and when config/top resolution is above VGA.
// Enable only for non-base temporal layer frames.
+ // TODO(jianj): Investigate webm:1578
if (cpi->use_svc && cpi->svc.use_partition_reuse &&
cpi->svc.number_spatial_layers == 3 && cpi->svc.temporal_layer_id > 0 &&
cpi->oxcf.width * cpi->oxcf.height > 640 * 480)
@@ -789,6 +795,21 @@ static void set_rt_speed_feature_framesize_independent(
sf->partition_search_type = FIXED_PARTITION;
sf->always_this_block_size = BLOCK_64X64;
}
+ // Special case for screen content: increase motion search on base spatial
+ // layer when high motion is detected or previous SL0 frame was dropped.
+ // Avoid speed 5 for as there is an issue with SVC datarate test.
+ // TODO(marpan/jianj): Investigate issue at speed 5.
+ if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && cpi->oxcf.speed > 5 &&
+ cpi->svc.spatial_layer_id == 0 &&
+ (cpi->rc.high_num_blocks_with_motion || cpi->svc.last_layer_dropped[0])) {
+ sf->mv.search_method = NSTEP;
+ sf->mv.fullpel_search_step_param = 2;
+ // TODO(marpan/jianj): Investigate issue for lower setting of step_param
+ // for spatial layers (namely on lower layers).
+ if (cpi->use_svc && cm->width != cpi->oxcf.width &&
+ cm->height != cpi->oxcf.height)
+ sf->mv.fullpel_search_step_param = 4;
+ }
}
void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) {
@@ -897,12 +918,7 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
sf->allow_quant_coeff_opt = sf->optimize_coefficients;
sf->quant_opt_thresh = 99.0;
sf->allow_acl = 1;
-#if CONFIG_VP9_HIGHBITDEPTH
- // TODO(jingning): Make the model support high bit-depth route.
- sf->enable_tpl_model = !cm->use_highbitdepth && oxcf->enable_tpl_model;
-#else
sf->enable_tpl_model = oxcf->enable_tpl_model;
-#endif
sf->prune_ref_frame_for_rect_partitions = 0;
for (i = 0; i < TX_SIZES; i++) {
@@ -942,6 +958,7 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
sf->ml_prune_rect_partition_threhold[2] = -1;
sf->ml_prune_rect_partition_threhold[3] = -1;
sf->ml_var_partition_pruning = 0;
+ sf->use_accurate_subpel_search = USE_8_TAPS;
// Some speed-up features even for best quality as minimal impact on quality.
sf->adaptive_rd_thresh = 1;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.h
index a895ed2354b..02673e60200 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.h
@@ -243,6 +243,13 @@ typedef enum {
RE_ENCODE_MAXQ = 2
} OVERSHOOT_DETECTION_CBR_RT;
+typedef enum {
+ USE_2_TAPS = 0,
+ USE_4_TAPS,
+ USE_8_TAPS,
+ USE_8_TAPS_SHARP,
+} SUBPEL_SEARCH_TYPE;
+
typedef struct SPEED_FEATURES {
MV_SPEED_FEATURES mv;
@@ -586,6 +593,10 @@ typedef struct SPEED_FEATURES {
// Allow for disabling golden reference.
int disable_golden_ref;
+
+ // Allow sub-pixel search to use interpolation filters with different taps in
+ // order to achieve accurate motion search result.
+ SUBPEL_SEARCH_TYPE use_accurate_subpel_search;
} SPEED_FEATURES;
struct VP9_COMP;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.c
index 1321c457575..21b920f11ae 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.c
@@ -53,6 +53,7 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
svc->previous_frame_is_intra_only = 0;
svc->superframe_has_layer_sync = 0;
svc->use_set_ref_frame_config = 0;
+ svc->num_encoded_top_layer = 0;
for (i = 0; i < REF_FRAMES; ++i) {
svc->fb_idx_spatial_layer_id[i] = -1;
@@ -329,6 +330,7 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) {
LAYER_CONTEXT *const lc = get_layer_context(cpi);
const int old_frame_since_key = cpi->rc.frames_since_key;
const int old_frame_to_key = cpi->rc.frames_to_key;
+ const int old_ext_use_post_encode_drop = cpi->rc.ext_use_post_encode_drop;
cpi->rc = lc->rc;
cpi->twopass = lc->twopass;
@@ -346,7 +348,7 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) {
cpi->rc.frames_since_key = old_frame_since_key;
cpi->rc.frames_to_key = old_frame_to_key;
}
-
+ cpi->rc.ext_use_post_encode_drop = old_ext_use_post_encode_drop;
// For spatial-svc, allow cyclic-refresh to be applied on the spatial layers,
// for the base temporal layer.
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
@@ -736,6 +738,8 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
}
svc->force_zero_mode_spatial_ref = 1;
svc->mi_stride[svc->spatial_layer_id] = cpi->common.mi_stride;
+ svc->mi_rows[svc->spatial_layer_id] = cpi->common.mi_rows;
+ svc->mi_cols[svc->spatial_layer_id] = cpi->common.mi_cols;
if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) {
set_flags_and_fb_idx_for_temporal_mode3(cpi);
@@ -931,7 +935,7 @@ void vp9_free_svc_cyclic_refresh(VP9_COMP *const cpi) {
}
// Reset on key frame: reset counters, references and buffer updates.
-void vp9_svc_reset_key_frame(VP9_COMP *const cpi) {
+void vp9_svc_reset_temporal_layers(VP9_COMP *const cpi, int is_key) {
int sl, tl;
SVC *const svc = &cpi->svc;
LAYER_CONTEXT *lc = NULL;
@@ -939,7 +943,7 @@ void vp9_svc_reset_key_frame(VP9_COMP *const cpi) {
for (tl = 0; tl < svc->number_temporal_layers; ++tl) {
lc = &cpi->svc.layer_context[sl * svc->number_temporal_layers + tl];
lc->current_video_frame_in_layer = 0;
- lc->frames_from_key_frame = 0;
+ if (is_key) lc->frames_from_key_frame = 0;
}
}
if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) {
@@ -1089,13 +1093,16 @@ void vp9_svc_assert_constraints_pattern(VP9_COMP *const cpi) {
}
} else if (svc->use_gf_temporal_ref_current_layer &&
!svc->layer_context[svc->temporal_layer_id].is_key_frame) {
- // If the usage of golden as second long term reference is enabled for this
- // layer, then temporal_layer_id of that reference must be base temporal
- // layer 0, and spatial_layer_id of that reference must be same as current
- // spatial_layer_id.
- assert(svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] ==
- svc->spatial_layer_id);
- assert(svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] == 0);
+ // For the usage of golden as second long term reference: the
+ // temporal_layer_id of that reference must be base temporal layer 0, and
+ // spatial_layer_id of that reference must be same as current
+ // spatial_layer_id. If not, disable feature.
+ // TODO(marpan): Investigate when this can happen, and maybe put this check
+ // and reset in a different place.
+ if (svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] !=
+ svc->spatial_layer_id ||
+ svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] != 0)
+ svc->use_gf_temporal_ref_current_layer = 0;
}
}
@@ -1107,7 +1114,8 @@ void vp9_svc_check_spatial_layer_sync(VP9_COMP *const cpi) {
if (svc->spatial_layer_id == 0) {
// On base spatial layer: if the current superframe has a layer sync then
// reset the pattern counters and reset to base temporal layer.
- if (svc->superframe_has_layer_sync) vp9_svc_reset_key_frame(cpi);
+ if (svc->superframe_has_layer_sync)
+ vp9_svc_reset_temporal_layers(cpi, cpi->common.frame_type == KEY_FRAME);
}
// If the layer sync is set for this current spatial layer then
// disable the temporal reference.
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.h
index fceab7780bb..94531204497 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.h
@@ -125,6 +125,8 @@ typedef struct SVC {
BLOCK_SIZE *prev_partition_svc;
int mi_stride[VPX_MAX_LAYERS];
+ int mi_rows[VPX_MAX_LAYERS];
+ int mi_cols[VPX_MAX_LAYERS];
int first_layer_denoise;
@@ -178,9 +180,14 @@ typedef struct SVC {
int first_spatial_layer_to_encode;
+ // Parameters for allowing framerate per spatial layer, and buffer
+ // update based on timestamps.
int64_t duration[VPX_SS_MAX_LAYERS];
-
int64_t timebase_fac;
+ int64_t time_stamp_superframe;
+ int64_t time_stamp_prev[VPX_SS_MAX_LAYERS];
+
+ int num_encoded_top_layer;
} SVC;
struct VP9_COMP;
@@ -234,7 +241,7 @@ int vp9_one_pass_cbr_svc_start_layer(struct VP9_COMP *const cpi);
void vp9_free_svc_cyclic_refresh(struct VP9_COMP *const cpi);
-void vp9_svc_reset_key_frame(struct VP9_COMP *const cpi);
+void vp9_svc_reset_temporal_layers(struct VP9_COMP *const cpi, int is_key);
void vp9_svc_check_reset_layer_rc_flag(struct VP9_COMP *const cpi);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.c
index 51668d01d61..4c1d8894b41 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.c
@@ -119,8 +119,13 @@ static void apply_temporal_filter(
unsigned int i, j, k, m;
int modifier;
const int rounding = (1 << strength) >> 1;
- const int uv_block_width = block_width >> ss_x;
- const int uv_block_height = block_height >> ss_y;
+ const unsigned int uv_block_width = block_width >> ss_x;
+ const unsigned int uv_block_height = block_height >> ss_y;
+ DECLARE_ALIGNED(16, uint16_t, y_diff_sse[256]);
+ DECLARE_ALIGNED(16, uint16_t, u_diff_sse[256]);
+ DECLARE_ALIGNED(16, uint16_t, v_diff_sse[256]);
+
+ int idx = 0, idy;
assert(strength >= 0);
assert(strength <= 6);
@@ -128,19 +133,42 @@ static void apply_temporal_filter(
assert(filter_weight >= 0);
assert(filter_weight <= 2);
+ memset(y_diff_sse, 0, 256 * sizeof(uint16_t));
+ memset(u_diff_sse, 0, 256 * sizeof(uint16_t));
+ memset(v_diff_sse, 0, 256 * sizeof(uint16_t));
+
+ // Calculate diff^2 for each pixel of the 16x16 block.
+ // TODO(yunqing): the following code needs to be optimized.
+ for (i = 0; i < block_height; i++) {
+ for (j = 0; j < block_width; j++) {
+ const int16_t diff =
+ y_frame1[i * (int)y_stride + j] - y_pred[i * (int)block_width + j];
+ y_diff_sse[idx++] = diff * diff;
+ }
+ }
+ idx = 0;
+ for (i = 0; i < uv_block_height; i++) {
+ for (j = 0; j < uv_block_width; j++) {
+ const int16_t diffu =
+ u_frame1[i * uv_stride + j] - u_pred[i * uv_buf_stride + j];
+ const int16_t diffv =
+ v_frame1[i * uv_stride + j] - v_pred[i * uv_buf_stride + j];
+ u_diff_sse[idx] = diffu * diffu;
+ v_diff_sse[idx] = diffv * diffv;
+ idx++;
+ }
+ }
+
for (i = 0, k = 0, m = 0; i < block_height; i++) {
for (j = 0; j < block_width; j++) {
const int pixel_value = y_pred[i * y_buf_stride + j];
// non-local mean approach
- int diff_sse[9] = { 0 };
- int idx, idy;
int y_index = 0;
const int uv_r = i >> ss_y;
const int uv_c = j >> ss_x;
-
- int diff;
+ modifier = 0;
for (idy = -1; idy <= 1; ++idy) {
for (idx = -1; idx <= 1; ++idx) {
@@ -149,9 +177,7 @@ static void apply_temporal_filter(
if (row >= 0 && row < (int)block_height && col >= 0 &&
col < (int)block_width) {
- const int diff = y_frame1[row * (int)y_stride + col] -
- y_pred[row * (int)block_width + col];
- diff_sse[y_index] = diff * diff;
+ modifier += y_diff_sse[row * (int)block_width + col];
++y_index;
}
}
@@ -159,16 +185,8 @@ static void apply_temporal_filter(
assert(y_index > 0);
- modifier = 0;
- for (idx = 0; idx < 9; ++idx) modifier += diff_sse[idx];
-
- diff = u_frame1[uv_r * uv_stride + uv_c] -
- u_pred[uv_r * uv_buf_stride + uv_c];
- modifier += diff * diff;
-
- diff = v_frame1[uv_r * uv_stride + uv_c] -
- v_pred[uv_r * uv_buf_stride + uv_c];
- modifier += diff * diff;
+ modifier += u_diff_sse[uv_r * uv_block_width + uv_c];
+ modifier += v_diff_sse[uv_r * uv_block_width + uv_c];
y_index += 2;
@@ -186,9 +204,6 @@ static void apply_temporal_filter(
const int v_pixel_value = v_pred[uv_r * uv_buf_stride + uv_c];
// non-local mean approach
- int u_diff_sse[9] = { 0 };
- int v_diff_sse[9] = { 0 };
- int idx, idy;
int cr_index = 0;
int u_mod = 0, v_mod = 0;
int y_diff = 0;
@@ -198,16 +213,10 @@ static void apply_temporal_filter(
const int row = uv_r + idy;
const int col = uv_c + idx;
- if (row >= 0 && row < uv_block_height && col >= 0 &&
- col < uv_block_width) {
- int diff = u_frame1[row * uv_stride + col] -
- u_pred[row * uv_buf_stride + col];
- u_diff_sse[cr_index] = diff * diff;
-
- diff = v_frame1[row * uv_stride + col] -
- v_pred[row * uv_buf_stride + col];
- v_diff_sse[cr_index] = diff * diff;
-
+ if (row >= 0 && row < (int)uv_block_height && col >= 0 &&
+ col < (int)uv_block_width) {
+ u_mod += u_diff_sse[row * uv_block_width + col];
+ v_mod += v_diff_sse[row * uv_block_width + col];
++cr_index;
}
}
@@ -215,18 +224,11 @@ static void apply_temporal_filter(
assert(cr_index > 0);
- for (idx = 0; idx < 9; ++idx) {
- u_mod += u_diff_sse[idx];
- v_mod += v_diff_sse[idx];
- }
-
for (idy = 0; idy < 1 + ss_y; ++idy) {
for (idx = 0; idx < 1 + ss_x; ++idx) {
const int row = (uv_r << ss_y) + idy;
const int col = (uv_c << ss_x) + idx;
- const int diff = y_frame1[row * (int)y_stride + col] -
- y_pred[row * (int)block_width + col];
- y_diff += diff * diff;
+ y_diff += y_diff_sse[row * (int)block_width + col];
++cr_index;
}
}
@@ -325,13 +327,23 @@ void vp9_highbd_temporal_filter_apply_c(
const uint16_t *frame2 = CONVERT_TO_SHORTPTR(frame2_8);
unsigned int i, j, k;
int modifier;
- int byte = 0;
const int rounding = strength > 0 ? 1 << (strength - 1) : 0;
+ int diff_sse[256] = { 0 };
+ int this_idx = 0;
+
+ for (i = 0; i < block_height; i++) {
+ for (j = 0; j < block_width; j++) {
+ const int diff =
+ frame1[i * (int)stride + j] - frame2[i * (int)block_width + j];
+ diff_sse[this_idx++] = diff * diff;
+ }
+ }
+
+ modifier = 0;
for (i = 0, k = 0; i < block_height; i++) {
for (j = 0; j < block_width; j++, k++) {
- int pixel_value = *frame2;
- int diff_sse[9] = { 0 };
+ int pixel_value = frame2[i * (int)block_width + j];
int idx, idy, index = 0;
for (idy = -1; idy <= 1; ++idy) {
@@ -341,22 +353,16 @@ void vp9_highbd_temporal_filter_apply_c(
if (row >= 0 && row < (int)block_height && col >= 0 &&
col < (int)block_width) {
- int diff = frame1[byte + idy * (int)stride + idx] -
- frame2[idy * (int)block_width + idx];
- diff_sse[index] = diff * diff;
+ modifier += diff_sse[row * (int)block_width + col];
++index;
}
}
}
assert(index > 0);
- modifier = 0;
- for (idx = 0; idx < 9; ++idx) modifier += diff_sse[idx];
-
modifier *= 3;
modifier /= index;
- ++frame2;
modifier += rounding;
modifier >>= strength;
@@ -367,11 +373,7 @@ void vp9_highbd_temporal_filter_apply_c(
count[k] += modifier;
accumulator[k] += modifier * pixel_value;
-
- byte++;
}
-
- byte += stride - block_width;
}
}
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -421,12 +423,13 @@ static uint32_t temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
/* restore UMV window */
x->mv_limits = tmp_mv_limits;
+ // TODO(yunqing): may use higher tap interp filter than 2 taps if needed.
// Ignore mv costing by sending NULL pointer instead of cost array
bestsme = cpi->find_fractional_mv_step(
x, ref_mv, &best_ref_mv1, cpi->common.allow_high_precision_mv,
x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], 0, mv_sf->subpel_search_level,
- cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0,
- 0);
+ cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 16,
+ 16, USE_8_TAPS_SHARP);
// Restore input state
x->plane[0].src = src;
@@ -949,8 +952,7 @@ void vp9_temporal_filter(VP9_COMP *cpi, int distance) {
}
// Initialize errorperbit and sabperbit.
- rdmult = (int)vp9_compute_rd_mult_based_on_qindex(cpi, ARNR_FILT_QINDEX);
- if (rdmult < 1) rdmult = 1;
+ rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, ARNR_FILT_QINDEX);
set_error_per_bit(&cpi->td.mb, rdmult);
vp9_initialize_me_consts(cpi, &cpi->td.mb, ARNR_FILT_QINDEX);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_intrin_sse2.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_intrin_sse2.c
index 293cdcd675a..0cecd654019 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_intrin_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_intrin_sse2.c
@@ -185,8 +185,8 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
int skip_block, const int16_t *round_ptr,
const int16_t *quant_ptr, int16_t *qcoeff_ptr,
int16_t *dqcoeff_ptr, const int16_t *dequant_ptr,
- uint16_t *eob_ptr, const int16_t *scan_ptr,
- const int16_t *iscan_ptr) {
+ uint16_t *eob_ptr, const int16_t *scan,
+ const int16_t *iscan) {
__m128i zero;
int pass;
@@ -215,7 +215,7 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
__m128i *in[8];
int index = 0;
- (void)scan_ptr;
+ (void)scan;
(void)coeff_ptr;
// Pre-condition input (shift by two)
@@ -449,7 +449,7 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
in7 = _mm_srai_epi16(in7, 1);
}
- iscan_ptr += n_coeffs;
+ iscan += n_coeffs;
qcoeff_ptr += n_coeffs;
dqcoeff_ptr += n_coeffs;
n_coeffs = -n_coeffs;
@@ -518,8 +518,8 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
- iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
- iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
+ iscan0 = _mm_load_si128((const __m128i *)(iscan + n_coeffs));
+ iscan1 = _mm_load_si128((const __m128i *)(iscan + n_coeffs) + 1);
// Add one to convert from indices to counts
iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
@@ -582,8 +582,8 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
- iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
- iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
+ iscan0 = _mm_load_si128((const __m128i *)(iscan + n_coeffs));
+ iscan1 = _mm_load_si128((const __m128i *)(iscan + n_coeffs) + 1);
// Add one to convert from indices to counts
iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c
index bf874a09ec5..99c19389486 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c
@@ -18,11 +18,13 @@
#include "vpx_dsp/x86/inv_txfm_sse2.h"
#include "vpx_dsp/x86/txfm_common_sse2.h"
-void vp9_fdct8x8_quant_ssse3(
- const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs,
- int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
- uint16_t *eob_ptr, const int16_t *scan_ptr, const int16_t *iscan_ptr) {
+void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
+ tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t *round_ptr,
+ const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
__m128i zero;
int pass;
@@ -52,7 +54,7 @@ void vp9_fdct8x8_quant_ssse3(
__m128i *in[8];
int index = 0;
- (void)scan_ptr;
+ (void)scan;
(void)coeff_ptr;
// Pre-condition input (shift by two)
@@ -280,7 +282,7 @@ void vp9_fdct8x8_quant_ssse3(
in7 = _mm_srai_epi16(in7, 1);
}
- iscan_ptr += n_coeffs;
+ iscan += n_coeffs;
qcoeff_ptr += n_coeffs;
dqcoeff_ptr += n_coeffs;
n_coeffs = -n_coeffs;
@@ -350,8 +352,8 @@ void vp9_fdct8x8_quant_ssse3(
zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
- iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
- iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
+ iscan0 = _mm_load_si128((const __m128i *)(iscan + n_coeffs));
+ iscan1 = _mm_load_si128((const __m128i *)(iscan + n_coeffs) + 1);
// Add one to convert from indices to counts
iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
@@ -427,8 +429,8 @@ void vp9_fdct8x8_quant_ssse3(
zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
- iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
- iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
+ iscan0 = _mm_load_si128((const __m128i *)(iscan + n_coeffs));
+ iscan1 = _mm_load_si128((const __m128i *)(iscan + n_coeffs) + 1);
// Add one to convert from indices to counts
iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_avx2.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_avx2.c
index 4bebc34d676..8dfdbd50f6c 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_avx2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_avx2.c
@@ -15,7 +15,7 @@
#include "vpx/vpx_integer.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_dsp/x86/bitdepth_conversion_avx2.h"
-#include "vpx_dsp/x86/quantize_x86.h"
+#include "vpx_dsp/x86/quantize_sse2.h"
// Zero fill 8 positions in the output buffer.
static INLINE void store_zero_tran_low(tran_low_t *a) {
@@ -50,18 +50,18 @@ void vp9_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *round_ptr,
const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
- uint16_t *eob_ptr, const int16_t *scan_ptr,
- const int16_t *iscan_ptr) {
+ uint16_t *eob_ptr, const int16_t *scan,
+ const int16_t *iscan) {
__m128i eob;
__m256i round256, quant256, dequant256;
__m256i eob256, thr256;
- (void)scan_ptr;
+ (void)scan;
(void)skip_block;
assert(!skip_block);
coeff_ptr += n_coeffs;
- iscan_ptr += n_coeffs;
+ iscan += n_coeffs;
qcoeff_ptr += n_coeffs;
dqcoeff_ptr += n_coeffs;
n_coeffs = -n_coeffs;
@@ -97,7 +97,7 @@ void vp9_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
store_tran_low(coeff256, dqcoeff_ptr + n_coeffs);
}
- eob256 = scan_eob_256((const __m256i *)(iscan_ptr + n_coeffs), &coeff256);
+ eob256 = scan_eob_256((const __m256i *)(iscan + n_coeffs), &coeff256);
n_coeffs += 8 * 2;
}
@@ -124,8 +124,7 @@ void vp9_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
coeff256 = _mm256_mullo_epi16(qcoeff256, dequant256);
store_tran_low(coeff256, dqcoeff_ptr + n_coeffs);
eob256 = _mm256_max_epi16(
- eob256,
- scan_eob_256((const __m256i *)(iscan_ptr + n_coeffs), &coeff256));
+ eob256, scan_eob_256((const __m256i *)(iscan + n_coeffs), &coeff256));
} else {
store_zero_tran_low(qcoeff_ptr + n_coeffs);
store_zero_tran_low(dqcoeff_ptr + n_coeffs);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c
index ca0ad4407e5..885220a7129 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c
@@ -21,20 +21,20 @@ void vp9_quantize_fp_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *round_ptr,
const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
- uint16_t *eob_ptr, const int16_t *scan_ptr,
- const int16_t *iscan_ptr) {
+ uint16_t *eob_ptr, const int16_t *scan,
+ const int16_t *iscan) {
__m128i zero;
__m128i thr;
int16_t nzflag;
__m128i eob;
__m128i round, quant, dequant;
- (void)scan_ptr;
+ (void)scan;
(void)skip_block;
assert(!skip_block);
coeff_ptr += n_coeffs;
- iscan_ptr += n_coeffs;
+ iscan += n_coeffs;
qcoeff_ptr += n_coeffs;
dqcoeff_ptr += n_coeffs;
n_coeffs = -n_coeffs;
@@ -100,8 +100,8 @@ void vp9_quantize_fp_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
- iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
- iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
+ iscan0 = _mm_load_si128((const __m128i *)(iscan + n_coeffs));
+ iscan1 = _mm_load_si128((const __m128i *)(iscan + n_coeffs) + 1);
// Add one to convert from indices to counts
iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
@@ -175,8 +175,8 @@ void vp9_quantize_fp_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
- iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
- iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
+ iscan0 = _mm_load_si128((const __m128i *)(iscan + n_coeffs));
+ iscan1 = _mm_load_si128((const __m128i *)(iscan + n_coeffs) + 1);
// Add one to convert from indices to counts
iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c
index 3b2d9a86617..85f83a66249 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c
@@ -1151,6 +1151,7 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
unsigned char *cx_data;
cpi->svc.timebase_fac = timebase_units_to_ticks(timebase, 1);
+ cpi->svc.time_stamp_superframe = dst_time_stamp;
// Set up internal flags
if (ctx->base.init_flags & VPX_CODEC_USE_PSNR) cpi->b_calculate_psnr = 1;
@@ -1625,6 +1626,14 @@ static vpx_codec_err_t ctrl_set_render_size(vpx_codec_alg_priv_t *ctx,
return update_extra_cfg(ctx, &extra_cfg);
}
+static vpx_codec_err_t ctrl_set_postencode_drop(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ VP9_COMP *const cpi = ctx->cpi;
+ const unsigned int data = va_arg(args, unsigned int);
+ cpi->rc.ext_use_post_encode_drop = data;
+ return VPX_CODEC_OK;
+}
+
static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
{ VP8_COPY_REFERENCE, ctrl_copy_reference },
@@ -1668,6 +1677,7 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
{ VP9E_SET_RENDER_SIZE, ctrl_set_render_size },
{ VP9E_SET_TARGET_LEVEL, ctrl_set_target_level },
{ VP9E_SET_ROW_MT, ctrl_set_row_mt },
+ { VP9E_SET_POSTENCODE_DROP, ctrl_set_postencode_drop },
{ VP9E_ENABLE_MOTION_VECTOR_UNIT_TEST, ctrl_enable_motion_vector_unit_test },
{ VP9E_SET_SVC_INTER_LAYER_PRED, ctrl_set_svc_inter_layer_pred },
{ VP9E_SET_SVC_FRAME_DROP_LAYER, ctrl_set_svc_frame_drop_layer },
@@ -1690,7 +1700,7 @@ static vpx_codec_enc_cfg_map_t encoder_usage_cfg_map[] = {
{ 0,
{
// NOLINT
- 0, // g_usage
+ 0, // g_usage (unused)
8, // g_threads
0, // g_profile
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.c b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.c
index fdff877682a..6a4cb9acf6f 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.c
@@ -270,6 +270,9 @@ static vpx_codec_err_t init_decoder(vpx_codec_alg_priv_t *ctx) {
RANGE_CHECK(ctx, row_mt, 0, 1);
ctx->pbi->row_mt = ctx->row_mt;
+ RANGE_CHECK(ctx, lpf_opt, 0, 1);
+ ctx->pbi->lpf_mt_opt = ctx->lpf_opt;
+
// If postprocessing was enabled by the application and a
// configuration has not been provided, default it.
if (!ctx->postproc_cfg_set && (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC))
@@ -658,6 +661,13 @@ static vpx_codec_err_t ctrl_set_row_mt(vpx_codec_alg_priv_t *ctx,
return VPX_CODEC_OK;
}
+static vpx_codec_err_t ctrl_enable_lpf_opt(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ ctx->lpf_opt = va_arg(args, int);
+
+ return VPX_CODEC_OK;
+}
+
static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = {
{ VP8_COPY_REFERENCE, ctrl_copy_reference },
@@ -670,6 +680,7 @@ static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = {
{ VP9_SET_SKIP_LOOP_FILTER, ctrl_set_skip_loop_filter },
{ VP9_DECODE_SVC_SPATIAL_LAYER, ctrl_set_spatial_layer_svc },
{ VP9D_SET_ROW_MT, ctrl_set_row_mt },
+ { VP9D_SET_LOOP_FILTER_OPT, ctrl_enable_lpf_opt },
// Getters
{ VPXD_GET_LAST_QUANTIZER, ctrl_get_quantizer },
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.h b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.h
index a1c335278d2..f60688c4db2 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.h
@@ -46,6 +46,7 @@ struct vpx_codec_alg_priv {
int svc_decoding;
int svc_spatial_layer;
int row_mt;
+ int lpf_opt;
};
#endif // VPX_VP9_VP9_DX_IFACE_H_