summaryrefslogtreecommitdiff
path: root/chromium/third_party/libvpx
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@qt.io>2018-08-24 12:15:48 +0200
committerAllan Sandfeld Jensen <allan.jensen@qt.io>2018-08-28 13:30:04 +0000
commitb014812705fc80bff0a5c120dfcef88f349816dc (patch)
tree25a2e2d9fa285f1add86aa333389a839f81a39ae /chromium/third_party/libvpx
parent9f4560b1027ae06fdb497023cdcaf91b8511fa74 (diff)
downloadqtwebengine-chromium-b014812705fc80bff0a5c120dfcef88f349816dc.tar.gz
BASELINE: Update Chromium to 68.0.3440.125
Change-Id: I23f19369e01f688e496f5bf179abb521ad73874f Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'chromium/third_party/libvpx')
-rw-r--r--chromium/third_party/libvpx/README.chromium4
-rwxr-xr-xchromium/third_party/libvpx/generate_gni.sh9
-rw-r--r--chromium/third_party/libvpx/libvpx_srcs.gni3
-rw-r--r--chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_config.asm1
-rw-r--r--chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_config.h1
-rw-r--r--chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_dsp_rtcd.h3
-rw-r--r--chromium/third_party/libvpx/source/config/ios/arm64/vpx_config.asm1
-rw-r--r--chromium/third_party/libvpx/source/config/ios/arm64/vpx_config.h1
-rw-r--r--chromium/third_party/libvpx/source/config/ios/arm64/vpx_dsp_rtcd.h3
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.asm1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.h1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_dsp_rtcd.h8
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.asm1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.h1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_dsp_rtcd.h3
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm/vpx_config.asm1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm/vpx_config.h1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.asm1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.h1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm64/vpx_dsp_rtcd.h3
-rw-r--r--chromium/third_party/libvpx/source/config/linux/generic/vpx_config.asm1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/generic/vpx_config.h1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.asm1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.h1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/ia32/vpx_dsp_rtcd.h28
-rw-r--r--chromium/third_party/libvpx/source/config/linux/mips64el/vpx_config.h1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/mipsel/vpx_config.h1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/x64/vpx_config.asm1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/x64/vpx_config.h1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/x64/vpx_dsp_rtcd.h56
-rw-r--r--chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.asm1
-rw-r--r--chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.h1
-rw-r--r--chromium/third_party/libvpx/source/config/mac/ia32/vpx_dsp_rtcd.h28
-rw-r--r--chromium/third_party/libvpx/source/config/mac/x64/vpx_config.asm1
-rw-r--r--chromium/third_party/libvpx/source/config/mac/x64/vpx_config.h1
-rw-r--r--chromium/third_party/libvpx/source/config/mac/x64/vpx_dsp_rtcd.h56
-rw-r--r--chromium/third_party/libvpx/source/config/nacl/vpx_config.h1
-rw-r--r--chromium/third_party/libvpx/source/config/vpx_version.h6
-rw-r--r--chromium/third_party/libvpx/source/config/win/ia32/vpx_config.asm1
-rw-r--r--chromium/third_party/libvpx/source/config/win/ia32/vpx_config.h1
-rw-r--r--chromium/third_party/libvpx/source/config/win/ia32/vpx_dsp_rtcd.h28
-rw-r--r--chromium/third_party/libvpx/source/config/win/x64/vpx_config.asm1
-rw-r--r--chromium/third_party/libvpx/source/config/win/x64/vpx_config.h1
-rw-r--r--chromium/third_party/libvpx/source/config/win/x64/vpx_dsp_rtcd.h56
-rw-r--r--chromium/third_party/libvpx/source/libvpx/README1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/build/make/configure.sh21
-rwxr-xr-xchromium/third_party/libvpx/source/libvpx/build/make/iosbuild.sh3
-rwxr-xr-xchromium/third_party/libvpx/source/libvpx/configure27
-rw-r--r--chromium/third_party/libvpx/source/libvpx/examples/vp9_spatial_svc_encoder.c2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/ivfdec.c8
-rw-r--r--chromium/third_party/libvpx/source/libvpx/third_party/libwebm/Android.mk2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/third_party/libwebm/README.libvpx14
-rw-r--r--chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/file_util.cc15
-rw-r--r--chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/file_util.h5
-rw-r--r--chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/hdr_util.cc8
-rw-r--r--chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/hdr_util.h10
-rw-r--r--chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.cc25
-rw-r--r--chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvparser/mkvparser.cc35
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/common/postproc.c2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/decoder/decodeframe.c6
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/vp8_cx_iface.c4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_pred_common.h6
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c3
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.c30
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.h6
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c106
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemb.c40
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c278
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h13
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c319
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c11
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h3
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_picklpf.c8
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c88
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_quantize.c5
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c129
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.c48
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c66
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c31
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.h29
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.c223
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.h44
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c47
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx/src/vpx_image.c21
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx/vp8cx.h26
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx/vpx_encoder.h2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx/vpx_image.h19
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/avg_pred_neon.c42
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/mem_neon.h12
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/sad4d_neon.c380
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/subtract_neon.c83
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/sum_neon.h9
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/sum_squares_neon.c85
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/vpx_convolve8_mmi.c217
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/inv_txfm_vsx.c86
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/quantize_vsx.c307
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/types_vsx.h21
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/variance_vsx.c174
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/ssim.c2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/sum_squares.c5
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp.mk2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl50
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/mem_sse2.h5
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/sum_squares_sse2.c188
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_avx2.c363
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_sse2.c412
-rw-r--r--chromium/third_party/libvpx/source/libvpx/y4menc.c8
-rw-r--r--chromium/third_party/libvpx/source/libvpx/y4minput.c24
109 files changed, 3042 insertions, 1549 deletions
diff --git a/chromium/third_party/libvpx/README.chromium b/chromium/third_party/libvpx/README.chromium
index ef97e284423..3fb61e4b51a 100644
--- a/chromium/third_party/libvpx/README.chromium
+++ b/chromium/third_party/libvpx/README.chromium
@@ -5,9 +5,9 @@ License: BSD
License File: source/libvpx/LICENSE
Security Critical: yes
-Date: Monday April 09 2018
+Date: Monday May 21 2018
Branch: master
-Commit: be5df6080154e58db88fa3640e127efd18c04bde
+Commit: e27a331778c4c99ec37262ea786a3b4cc2a491ac
Description:
Contains the sources used to compile libvpx binaries used by Google Chrome and
diff --git a/chromium/third_party/libvpx/generate_gni.sh b/chromium/third_party/libvpx/generate_gni.sh
index 5704e76062f..2c94f6f685a 100755
--- a/chromium/third_party/libvpx/generate_gni.sh
+++ b/chromium/third_party/libvpx/generate_gni.sh
@@ -226,6 +226,7 @@ function print_config_basic {
# $3 - Optional - any additional arguments to pass through.
function gen_rtcd_header {
echo "Generate $LIBVPX_CONFIG_DIR/$1/*_rtcd.h files."
+ format="clang-format -i -style=Chromium"
rm -rf $BASE_DIR/$TEMP_DIR/libvpx.config
if [[ "$2" == "mipsel" || "$2" == "mips64el" || "$2" == nacl ]]; then
@@ -244,7 +245,7 @@ function gen_rtcd_header {
$BASE_DIR/$LIBVPX_SRC_DIR/vp8/common/rtcd_defs.pl \
> $BASE_DIR/$LIBVPX_CONFIG_DIR/$1/vp8_rtcd.h
- clang-format -i $BASE_DIR/$LIBVPX_CONFIG_DIR/$1/vp8_rtcd.h
+ ${format} $BASE_DIR/$LIBVPX_CONFIG_DIR/$1/vp8_rtcd.h
$BASE_DIR/$LIBVPX_SRC_DIR/build/make/rtcd.pl \
--arch=$2 \
@@ -253,7 +254,7 @@ function gen_rtcd_header {
$BASE_DIR/$LIBVPX_SRC_DIR/vp9/common/vp9_rtcd_defs.pl \
> $BASE_DIR/$LIBVPX_CONFIG_DIR/$1/vp9_rtcd.h
- clang-format -i $BASE_DIR/$LIBVPX_CONFIG_DIR/$1/vp9_rtcd.h
+ ${format} $BASE_DIR/$LIBVPX_CONFIG_DIR/$1/vp9_rtcd.h
$BASE_DIR/$LIBVPX_SRC_DIR/build/make/rtcd.pl \
--arch=$2 \
@@ -262,7 +263,7 @@ function gen_rtcd_header {
$BASE_DIR/$LIBVPX_SRC_DIR/vpx_scale/vpx_scale_rtcd.pl \
> $BASE_DIR/$LIBVPX_CONFIG_DIR/$1/vpx_scale_rtcd.h
- clang-format -i $BASE_DIR/$LIBVPX_CONFIG_DIR/$1/vpx_scale_rtcd.h
+ ${format} $BASE_DIR/$LIBVPX_CONFIG_DIR/$1/vpx_scale_rtcd.h
$BASE_DIR/$LIBVPX_SRC_DIR/build/make/rtcd.pl \
--arch=$2 \
@@ -271,7 +272,7 @@ function gen_rtcd_header {
$BASE_DIR/$LIBVPX_SRC_DIR/vpx_dsp/vpx_dsp_rtcd_defs.pl \
> $BASE_DIR/$LIBVPX_CONFIG_DIR/$1/vpx_dsp_rtcd.h
- clang-format -i $BASE_DIR/$LIBVPX_CONFIG_DIR/$1/vpx_dsp_rtcd.h
+ ${format} $BASE_DIR/$LIBVPX_CONFIG_DIR/$1/vpx_dsp_rtcd.h
rm -rf $BASE_DIR/$TEMP_DIR/libvpx.config
}
diff --git a/chromium/third_party/libvpx/libvpx_srcs.gni b/chromium/third_party/libvpx/libvpx_srcs.gni
index a59ffeb95f6..24e6a8959ae 100644
--- a/chromium/third_party/libvpx/libvpx_srcs.gni
+++ b/chromium/third_party/libvpx/libvpx_srcs.gni
@@ -1632,6 +1632,7 @@ libvpx_srcs_arm_neon = [
"//third_party/libvpx/source/libvpx/vpx_dsp/arm/subpel_variance_neon.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/arm/subtract_neon.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/arm/sum_neon.h",
+ "//third_party/libvpx/source/libvpx/vpx_dsp/arm/sum_squares_neon.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/arm/transpose_neon.h",
"//third_party/libvpx/source/libvpx/vpx_dsp/arm/variance_neon.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/arm/vpx_convolve8_neon.h",
@@ -2128,6 +2129,7 @@ libvpx_srcs_arm_neon_cpu_detect_neon = [
"//third_party/libvpx/source/libvpx/vpx_dsp/arm/sad_neon.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/arm/subpel_variance_neon.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/arm/subtract_neon.c",
+ "//third_party/libvpx/source/libvpx/vpx_dsp/arm/sum_squares_neon.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/arm/variance_neon.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/arm/vpx_convolve_neon.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/arm/vpx_scaled_convolve8_neon.c",
@@ -2467,6 +2469,7 @@ libvpx_srcs_arm64 = [
"//third_party/libvpx/source/libvpx/vpx_dsp/arm/subpel_variance_neon.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/arm/subtract_neon.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/arm/sum_neon.h",
+ "//third_party/libvpx/source/libvpx/vpx_dsp/arm/sum_squares_neon.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/arm/transpose_neon.h",
"//third_party/libvpx/source/libvpx/vpx_dsp/arm/variance_neon.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/arm/vpx_convolve8_neon.c",
diff --git a/chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_config.asm b/chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_config.asm
index d459dae055b..aeaea997f54 100644
--- a/chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_config.asm
@@ -78,6 +78,7 @@
.set CONFIG_MULTI_RES_ENCODING , 1
.set CONFIG_TEMPORAL_DENOISING , 1
.set CONFIG_VP9_TEMPORAL_DENOISING , 1
+.set CONFIG_CONSISTENT_RECODE , 0
.set CONFIG_COEFFICIENT_RANGE_CHECKING , 0
.set CONFIG_VP9_HIGHBITDEPTH , 0
.set CONFIG_BETTER_HW_COMPATIBILITY , 0
diff --git a/chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_config.h b/chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_config.h
index b80461bdae0..365206fe64a 100644
--- a/chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_config.h
@@ -84,6 +84,7 @@
#define CONFIG_MULTI_RES_ENCODING 1
#define CONFIG_TEMPORAL_DENOISING 1
#define CONFIG_VP9_TEMPORAL_DENOISING 1
+#define CONFIG_CONSISTENT_RECODE 0
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
#define CONFIG_VP9_HIGHBITDEPTH 0
#define CONFIG_BETTER_HW_COMPATIBILITY 0
diff --git a/chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_dsp_rtcd.h
index cc0b382fb76..0056935cbd0 100644
--- a/chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_dsp_rtcd.h
@@ -2210,7 +2210,8 @@ void vpx_subtract_block_neon(int rows,
#define vpx_subtract_block vpx_subtract_block_neon
uint64_t vpx_sum_squares_2d_i16_c(const int16_t* src, int stride, int size);
-#define vpx_sum_squares_2d_i16 vpx_sum_squares_2d_i16_c
+uint64_t vpx_sum_squares_2d_i16_neon(const int16_t* src, int stride, int size);
+#define vpx_sum_squares_2d_i16 vpx_sum_squares_2d_i16_neon
void vpx_tm_predictor_16x16_c(uint8_t* dst,
ptrdiff_t y_stride,
diff --git a/chromium/third_party/libvpx/source/config/ios/arm64/vpx_config.asm b/chromium/third_party/libvpx/source/config/ios/arm64/vpx_config.asm
index 63979ef8f30..296266dedc3 100644
--- a/chromium/third_party/libvpx/source/config/ios/arm64/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/ios/arm64/vpx_config.asm
@@ -78,6 +78,7 @@
.set CONFIG_MULTI_RES_ENCODING , 1
.set CONFIG_TEMPORAL_DENOISING , 1
.set CONFIG_VP9_TEMPORAL_DENOISING , 1
+.set CONFIG_CONSISTENT_RECODE , 0
.set CONFIG_COEFFICIENT_RANGE_CHECKING , 0
.set CONFIG_VP9_HIGHBITDEPTH , 0
.set CONFIG_BETTER_HW_COMPATIBILITY , 0
diff --git a/chromium/third_party/libvpx/source/config/ios/arm64/vpx_config.h b/chromium/third_party/libvpx/source/config/ios/arm64/vpx_config.h
index 1ab268cab88..13e7637569b 100644
--- a/chromium/third_party/libvpx/source/config/ios/arm64/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/ios/arm64/vpx_config.h
@@ -84,6 +84,7 @@
#define CONFIG_MULTI_RES_ENCODING 1
#define CONFIG_TEMPORAL_DENOISING 1
#define CONFIG_VP9_TEMPORAL_DENOISING 1
+#define CONFIG_CONSISTENT_RECODE 0
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
#define CONFIG_VP9_HIGHBITDEPTH 0
#define CONFIG_BETTER_HW_COMPATIBILITY 0
diff --git a/chromium/third_party/libvpx/source/config/ios/arm64/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/ios/arm64/vpx_dsp_rtcd.h
index cc0b382fb76..0056935cbd0 100644
--- a/chromium/third_party/libvpx/source/config/ios/arm64/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/ios/arm64/vpx_dsp_rtcd.h
@@ -2210,7 +2210,8 @@ void vpx_subtract_block_neon(int rows,
#define vpx_subtract_block vpx_subtract_block_neon
uint64_t vpx_sum_squares_2d_i16_c(const int16_t* src, int stride, int size);
-#define vpx_sum_squares_2d_i16 vpx_sum_squares_2d_i16_c
+uint64_t vpx_sum_squares_2d_i16_neon(const int16_t* src, int stride, int size);
+#define vpx_sum_squares_2d_i16 vpx_sum_squares_2d_i16_neon
void vpx_tm_predictor_16x16_c(uint8_t* dst,
ptrdiff_t y_stride,
diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.asm
index ccf2d701a4e..e6fa07b327e 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.asm
@@ -75,6 +75,7 @@
.equ CONFIG_MULTI_RES_ENCODING , 1
.equ CONFIG_TEMPORAL_DENOISING , 1
.equ CONFIG_VP9_TEMPORAL_DENOISING , 1
+.equ CONFIG_CONSISTENT_RECODE , 0
.equ CONFIG_COEFFICIENT_RANGE_CHECKING , 0
.equ CONFIG_VP9_HIGHBITDEPTH , 0
.equ CONFIG_BETTER_HW_COMPATIBILITY , 0
diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.h
index 9cb0939b234..5b8efaedac0 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.h
@@ -84,6 +84,7 @@
#define CONFIG_MULTI_RES_ENCODING 1
#define CONFIG_TEMPORAL_DENOISING 1
#define CONFIG_VP9_TEMPORAL_DENOISING 1
+#define CONFIG_CONSISTENT_RECODE 0
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
#define CONFIG_VP9_HIGHBITDEPTH 0
#define CONFIG_BETTER_HW_COMPATIBILITY 0
diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_dsp_rtcd.h
index c518d2b8fbe..3308094509a 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_dsp_rtcd.h
@@ -2891,7 +2891,10 @@ RTCD_EXTERN void (*vpx_subtract_block)(int rows,
ptrdiff_t pred_stride);
uint64_t vpx_sum_squares_2d_i16_c(const int16_t* src, int stride, int size);
-#define vpx_sum_squares_2d_i16 vpx_sum_squares_2d_i16_c
+uint64_t vpx_sum_squares_2d_i16_neon(const int16_t* src, int stride, int size);
+RTCD_EXTERN uint64_t (*vpx_sum_squares_2d_i16)(const int16_t* src,
+ int stride,
+ int size);
void vpx_tm_predictor_16x16_c(uint8_t* dst,
ptrdiff_t y_stride,
@@ -3694,6 +3697,9 @@ static void setup_rtcd_internal(void) {
vpx_subtract_block = vpx_subtract_block_c;
if (flags & HAS_NEON)
vpx_subtract_block = vpx_subtract_block_neon;
+ vpx_sum_squares_2d_i16 = vpx_sum_squares_2d_i16_c;
+ if (flags & HAS_NEON)
+ vpx_sum_squares_2d_i16 = vpx_sum_squares_2d_i16_neon;
vpx_tm_predictor_16x16 = vpx_tm_predictor_16x16_c;
if (flags & HAS_NEON)
vpx_tm_predictor_16x16 = vpx_tm_predictor_16x16_neon;
diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.asm
index 87cfb6acbb9..1137b0ff007 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.asm
@@ -75,6 +75,7 @@
.equ CONFIG_MULTI_RES_ENCODING , 1
.equ CONFIG_TEMPORAL_DENOISING , 1
.equ CONFIG_VP9_TEMPORAL_DENOISING , 1
+.equ CONFIG_CONSISTENT_RECODE , 0
.equ CONFIG_COEFFICIENT_RANGE_CHECKING , 0
.equ CONFIG_VP9_HIGHBITDEPTH , 0
.equ CONFIG_BETTER_HW_COMPATIBILITY , 0
diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.h
index b80461bdae0..365206fe64a 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.h
@@ -84,6 +84,7 @@
#define CONFIG_MULTI_RES_ENCODING 1
#define CONFIG_TEMPORAL_DENOISING 1
#define CONFIG_VP9_TEMPORAL_DENOISING 1
+#define CONFIG_CONSISTENT_RECODE 0
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
#define CONFIG_VP9_HIGHBITDEPTH 0
#define CONFIG_BETTER_HW_COMPATIBILITY 0
diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_dsp_rtcd.h
index cc0b382fb76..0056935cbd0 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_dsp_rtcd.h
@@ -2210,7 +2210,8 @@ void vpx_subtract_block_neon(int rows,
#define vpx_subtract_block vpx_subtract_block_neon
uint64_t vpx_sum_squares_2d_i16_c(const int16_t* src, int stride, int size);
-#define vpx_sum_squares_2d_i16 vpx_sum_squares_2d_i16_c
+uint64_t vpx_sum_squares_2d_i16_neon(const int16_t* src, int stride, int size);
+#define vpx_sum_squares_2d_i16 vpx_sum_squares_2d_i16_neon
void vpx_tm_predictor_16x16_c(uint8_t* dst,
ptrdiff_t y_stride,
diff --git a/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.asm
index 6cc4a695c68..51d4f390c17 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.asm
@@ -75,6 +75,7 @@
.equ CONFIG_MULTI_RES_ENCODING , 1
.equ CONFIG_TEMPORAL_DENOISING , 1
.equ CONFIG_VP9_TEMPORAL_DENOISING , 1
+.equ CONFIG_CONSISTENT_RECODE , 0
.equ CONFIG_COEFFICIENT_RANGE_CHECKING , 0
.equ CONFIG_VP9_HIGHBITDEPTH , 0
.equ CONFIG_BETTER_HW_COMPATIBILITY , 0
diff --git a/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.h
index 826ac8d1897..fc57694a68d 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.h
@@ -84,6 +84,7 @@
#define CONFIG_MULTI_RES_ENCODING 1
#define CONFIG_TEMPORAL_DENOISING 1
#define CONFIG_VP9_TEMPORAL_DENOISING 1
+#define CONFIG_CONSISTENT_RECODE 0
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
#define CONFIG_VP9_HIGHBITDEPTH 0
#define CONFIG_BETTER_HW_COMPATIBILITY 0
diff --git a/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.asm
index c5284f8017b..54efd55c5bf 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.asm
@@ -75,6 +75,7 @@
.equ CONFIG_MULTI_RES_ENCODING , 1
.equ CONFIG_TEMPORAL_DENOISING , 1
.equ CONFIG_VP9_TEMPORAL_DENOISING , 1
+.equ CONFIG_CONSISTENT_RECODE , 0
.equ CONFIG_COEFFICIENT_RANGE_CHECKING , 0
.equ CONFIG_VP9_HIGHBITDEPTH , 0
.equ CONFIG_BETTER_HW_COMPATIBILITY , 0
diff --git a/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.h
index 1ab268cab88..13e7637569b 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.h
@@ -84,6 +84,7 @@
#define CONFIG_MULTI_RES_ENCODING 1
#define CONFIG_TEMPORAL_DENOISING 1
#define CONFIG_VP9_TEMPORAL_DENOISING 1
+#define CONFIG_CONSISTENT_RECODE 0
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
#define CONFIG_VP9_HIGHBITDEPTH 0
#define CONFIG_BETTER_HW_COMPATIBILITY 0
diff --git a/chromium/third_party/libvpx/source/config/linux/arm64/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/linux/arm64/vpx_dsp_rtcd.h
index cc0b382fb76..0056935cbd0 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm64/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm64/vpx_dsp_rtcd.h
@@ -2210,7 +2210,8 @@ void vpx_subtract_block_neon(int rows,
#define vpx_subtract_block vpx_subtract_block_neon
uint64_t vpx_sum_squares_2d_i16_c(const int16_t* src, int stride, int size);
-#define vpx_sum_squares_2d_i16 vpx_sum_squares_2d_i16_c
+uint64_t vpx_sum_squares_2d_i16_neon(const int16_t* src, int stride, int size);
+#define vpx_sum_squares_2d_i16 vpx_sum_squares_2d_i16_neon
void vpx_tm_predictor_16x16_c(uint8_t* dst,
ptrdiff_t y_stride,
diff --git a/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.asm
index fc694fbc78e..650636a78b1 100644
--- a/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.asm
@@ -75,6 +75,7 @@
.equ CONFIG_MULTI_RES_ENCODING , 1
.equ CONFIG_TEMPORAL_DENOISING , 1
.equ CONFIG_VP9_TEMPORAL_DENOISING , 1
+.equ CONFIG_CONSISTENT_RECODE , 0
.equ CONFIG_COEFFICIENT_RANGE_CHECKING , 0
.equ CONFIG_VP9_HIGHBITDEPTH , 1
.equ CONFIG_BETTER_HW_COMPATIBILITY , 0
diff --git a/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.h
index cc20ff9ec8c..df5f6f87029 100644
--- a/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.h
@@ -84,6 +84,7 @@
#define CONFIG_MULTI_RES_ENCODING 1
#define CONFIG_TEMPORAL_DENOISING 1
#define CONFIG_VP9_TEMPORAL_DENOISING 1
+#define CONFIG_CONSISTENT_RECODE 0
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
#define CONFIG_VP9_HIGHBITDEPTH 1
#define CONFIG_BETTER_HW_COMPATIBILITY 0
diff --git a/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.asm
index b1c79dd9ab8..6aa13d720aa 100644
--- a/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.asm
@@ -72,6 +72,7 @@
%define CONFIG_MULTI_RES_ENCODING 1
%define CONFIG_TEMPORAL_DENOISING 1
%define CONFIG_VP9_TEMPORAL_DENOISING 1
+%define CONFIG_CONSISTENT_RECODE 0
%define CONFIG_COEFFICIENT_RANGE_CHECKING 0
%define CONFIG_VP9_HIGHBITDEPTH 1
%define CONFIG_BETTER_HW_COMPATIBILITY 0
diff --git a/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.h
index dbd8c1d3933..7749f38f9be 100644
--- a/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.h
@@ -84,6 +84,7 @@
#define CONFIG_MULTI_RES_ENCODING 1
#define CONFIG_TEMPORAL_DENOISING 1
#define CONFIG_VP9_TEMPORAL_DENOISING 1
+#define CONFIG_CONSISTENT_RECODE 0
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
#define CONFIG_VP9_HIGHBITDEPTH 1
#define CONFIG_BETTER_HW_COMPATIBILITY 0
diff --git a/chromium/third_party/libvpx/source/config/linux/ia32/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/linux/ia32/vpx_dsp_rtcd.h
index 4e665ccf81e..ecd6c94b1f6 100644
--- a/chromium/third_party/libvpx/source/config/linux/ia32/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/ia32/vpx_dsp_rtcd.h
@@ -6458,6 +6458,11 @@ unsigned int vpx_mse16x8_sse2(const uint8_t* src_ptr,
const uint8_t* ref_ptr,
int recon_stride,
unsigned int* sse);
+unsigned int vpx_mse16x8_avx2(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int recon_stride,
+ unsigned int* sse);
RTCD_EXTERN unsigned int (*vpx_mse16x8)(const uint8_t* src_ptr,
int source_stride,
const uint8_t* ref_ptr,
@@ -8588,6 +8593,11 @@ unsigned int vpx_variance16x32_sse2(const uint8_t* src_ptr,
const uint8_t* ref_ptr,
int ref_stride,
unsigned int* sse);
+unsigned int vpx_variance16x32_avx2(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int ref_stride,
+ unsigned int* sse);
RTCD_EXTERN unsigned int (*vpx_variance16x32)(const uint8_t* src_ptr,
int source_stride,
const uint8_t* ref_ptr,
@@ -8604,6 +8614,11 @@ unsigned int vpx_variance16x8_sse2(const uint8_t* src_ptr,
const uint8_t* ref_ptr,
int ref_stride,
unsigned int* sse);
+unsigned int vpx_variance16x8_avx2(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int ref_stride,
+ unsigned int* sse);
RTCD_EXTERN unsigned int (*vpx_variance16x8)(const uint8_t* src_ptr,
int source_stride,
const uint8_t* ref_ptr,
@@ -8662,6 +8677,11 @@ unsigned int vpx_variance32x64_sse2(const uint8_t* src_ptr,
const uint8_t* ref_ptr,
int ref_stride,
unsigned int* sse);
+unsigned int vpx_variance32x64_avx2(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int ref_stride,
+ unsigned int* sse);
RTCD_EXTERN unsigned int (*vpx_variance32x64)(const uint8_t* src_ptr,
int source_stride,
const uint8_t* ref_ptr,
@@ -9957,6 +9977,8 @@ static void setup_rtcd_internal(void) {
vpx_mse16x8 = vpx_mse16x8_c;
if (flags & HAS_SSE2)
vpx_mse16x8 = vpx_mse16x8_sse2;
+ if (flags & HAS_AVX2)
+ vpx_mse16x8 = vpx_mse16x8_avx2;
vpx_mse8x16 = vpx_mse8x16_c;
if (flags & HAS_SSE2)
vpx_mse8x16 = vpx_mse8x16_sse2;
@@ -10341,9 +10363,13 @@ static void setup_rtcd_internal(void) {
vpx_variance16x32 = vpx_variance16x32_c;
if (flags & HAS_SSE2)
vpx_variance16x32 = vpx_variance16x32_sse2;
+ if (flags & HAS_AVX2)
+ vpx_variance16x32 = vpx_variance16x32_avx2;
vpx_variance16x8 = vpx_variance16x8_c;
if (flags & HAS_SSE2)
vpx_variance16x8 = vpx_variance16x8_sse2;
+ if (flags & HAS_AVX2)
+ vpx_variance16x8 = vpx_variance16x8_avx2;
vpx_variance32x16 = vpx_variance32x16_c;
if (flags & HAS_SSE2)
vpx_variance32x16 = vpx_variance32x16_sse2;
@@ -10357,6 +10383,8 @@ static void setup_rtcd_internal(void) {
vpx_variance32x64 = vpx_variance32x64_c;
if (flags & HAS_SSE2)
vpx_variance32x64 = vpx_variance32x64_sse2;
+ if (flags & HAS_AVX2)
+ vpx_variance32x64 = vpx_variance32x64_avx2;
vpx_variance4x4 = vpx_variance4x4_c;
if (flags & HAS_SSE2)
vpx_variance4x4 = vpx_variance4x4_sse2;
diff --git a/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_config.h
index d876f25ba4f..98374b198c0 100644
--- a/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_config.h
@@ -84,6 +84,7 @@
#define CONFIG_MULTI_RES_ENCODING 1
#define CONFIG_TEMPORAL_DENOISING 1
#define CONFIG_VP9_TEMPORAL_DENOISING 1
+#define CONFIG_CONSISTENT_RECODE 0
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
#define CONFIG_VP9_HIGHBITDEPTH 0
#define CONFIG_BETTER_HW_COMPATIBILITY 0
diff --git a/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_config.h
index 967e5443fb9..1a8a71b75f4 100644
--- a/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_config.h
@@ -84,6 +84,7 @@
#define CONFIG_MULTI_RES_ENCODING 1
#define CONFIG_TEMPORAL_DENOISING 1
#define CONFIG_VP9_TEMPORAL_DENOISING 1
+#define CONFIG_CONSISTENT_RECODE 0
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
#define CONFIG_VP9_HIGHBITDEPTH 0
#define CONFIG_BETTER_HW_COMPATIBILITY 0
diff --git a/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.asm
index 84f986d3fcc..ffaf2d94f9d 100644
--- a/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.asm
@@ -72,6 +72,7 @@
%define CONFIG_MULTI_RES_ENCODING 1
%define CONFIG_TEMPORAL_DENOISING 1
%define CONFIG_VP9_TEMPORAL_DENOISING 1
+%define CONFIG_CONSISTENT_RECODE 0
%define CONFIG_COEFFICIENT_RANGE_CHECKING 0
%define CONFIG_VP9_HIGHBITDEPTH 1
%define CONFIG_BETTER_HW_COMPATIBILITY 0
diff --git a/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.h
index aedc47647fd..c6e9e82ee32 100644
--- a/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.h
@@ -84,6 +84,7 @@
#define CONFIG_MULTI_RES_ENCODING 1
#define CONFIG_TEMPORAL_DENOISING 1
#define CONFIG_VP9_TEMPORAL_DENOISING 1
+#define CONFIG_CONSISTENT_RECODE 0
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
#define CONFIG_VP9_HIGHBITDEPTH 1
#define CONFIG_BETTER_HW_COMPATIBILITY 0
diff --git a/chromium/third_party/libvpx/source/config/linux/x64/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/linux/x64/vpx_dsp_rtcd.h
index 49a73711abe..258994f0076 100644
--- a/chromium/third_party/libvpx/source/config/linux/x64/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/x64/vpx_dsp_rtcd.h
@@ -5376,7 +5376,16 @@ unsigned int vpx_mse16x8_sse2(const uint8_t* src_ptr,
const uint8_t* ref_ptr,
int recon_stride,
unsigned int* sse);
-#define vpx_mse16x8 vpx_mse16x8_sse2
+unsigned int vpx_mse16x8_avx2(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int recon_stride,
+ unsigned int* sse);
+RTCD_EXTERN unsigned int (*vpx_mse16x8)(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int recon_stride,
+ unsigned int* sse);
unsigned int vpx_mse8x16_c(const uint8_t* src_ptr,
int source_stride,
@@ -7350,7 +7359,16 @@ unsigned int vpx_variance16x32_sse2(const uint8_t* src_ptr,
const uint8_t* ref_ptr,
int ref_stride,
unsigned int* sse);
-#define vpx_variance16x32 vpx_variance16x32_sse2
+unsigned int vpx_variance16x32_avx2(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int ref_stride,
+ unsigned int* sse);
+RTCD_EXTERN unsigned int (*vpx_variance16x32)(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int ref_stride,
+ unsigned int* sse);
unsigned int vpx_variance16x8_c(const uint8_t* src_ptr,
int source_stride,
@@ -7362,7 +7380,16 @@ unsigned int vpx_variance16x8_sse2(const uint8_t* src_ptr,
const uint8_t* ref_ptr,
int ref_stride,
unsigned int* sse);
-#define vpx_variance16x8 vpx_variance16x8_sse2
+unsigned int vpx_variance16x8_avx2(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int ref_stride,
+ unsigned int* sse);
+RTCD_EXTERN unsigned int (*vpx_variance16x8)(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int ref_stride,
+ unsigned int* sse);
unsigned int vpx_variance32x16_c(const uint8_t* src_ptr,
int source_stride,
@@ -7416,7 +7443,16 @@ unsigned int vpx_variance32x64_sse2(const uint8_t* src_ptr,
const uint8_t* ref_ptr,
int ref_stride,
unsigned int* sse);
-#define vpx_variance32x64 vpx_variance32x64_sse2
+unsigned int vpx_variance32x64_avx2(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int ref_stride,
+ unsigned int* sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x64)(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int ref_stride,
+ unsigned int* sse);
unsigned int vpx_variance4x4_c(const uint8_t* src_ptr,
int source_stride,
@@ -7743,6 +7779,9 @@ static void setup_rtcd_internal(void) {
vpx_mse16x16 = vpx_mse16x16_sse2;
if (flags & HAS_AVX2)
vpx_mse16x16 = vpx_mse16x16_avx2;
+ vpx_mse16x8 = vpx_mse16x8_sse2;
+ if (flags & HAS_AVX2)
+ vpx_mse16x8 = vpx_mse16x8_avx2;
vpx_quantize_b = vpx_quantize_b_sse2;
if (flags & HAS_SSSE3)
vpx_quantize_b = vpx_quantize_b_ssse3;
@@ -7918,12 +7957,21 @@ static void setup_rtcd_internal(void) {
vpx_variance16x16 = vpx_variance16x16_sse2;
if (flags & HAS_AVX2)
vpx_variance16x16 = vpx_variance16x16_avx2;
+ vpx_variance16x32 = vpx_variance16x32_sse2;
+ if (flags & HAS_AVX2)
+ vpx_variance16x32 = vpx_variance16x32_avx2;
+ vpx_variance16x8 = vpx_variance16x8_sse2;
+ if (flags & HAS_AVX2)
+ vpx_variance16x8 = vpx_variance16x8_avx2;
vpx_variance32x16 = vpx_variance32x16_sse2;
if (flags & HAS_AVX2)
vpx_variance32x16 = vpx_variance32x16_avx2;
vpx_variance32x32 = vpx_variance32x32_sse2;
if (flags & HAS_AVX2)
vpx_variance32x32 = vpx_variance32x32_avx2;
+ vpx_variance32x64 = vpx_variance32x64_sse2;
+ if (flags & HAS_AVX2)
+ vpx_variance32x64 = vpx_variance32x64_avx2;
vpx_variance64x32 = vpx_variance64x32_sse2;
if (flags & HAS_AVX2)
vpx_variance64x32 = vpx_variance64x32_avx2;
diff --git a/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.asm b/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.asm
index b1c79dd9ab8..6aa13d720aa 100644
--- a/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.asm
@@ -72,6 +72,7 @@
%define CONFIG_MULTI_RES_ENCODING 1
%define CONFIG_TEMPORAL_DENOISING 1
%define CONFIG_VP9_TEMPORAL_DENOISING 1
+%define CONFIG_CONSISTENT_RECODE 0
%define CONFIG_COEFFICIENT_RANGE_CHECKING 0
%define CONFIG_VP9_HIGHBITDEPTH 1
%define CONFIG_BETTER_HW_COMPATIBILITY 0
diff --git a/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.h b/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.h
index dbd8c1d3933..7749f38f9be 100644
--- a/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.h
@@ -84,6 +84,7 @@
#define CONFIG_MULTI_RES_ENCODING 1
#define CONFIG_TEMPORAL_DENOISING 1
#define CONFIG_VP9_TEMPORAL_DENOISING 1
+#define CONFIG_CONSISTENT_RECODE 0
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
#define CONFIG_VP9_HIGHBITDEPTH 1
#define CONFIG_BETTER_HW_COMPATIBILITY 0
diff --git a/chromium/third_party/libvpx/source/config/mac/ia32/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/mac/ia32/vpx_dsp_rtcd.h
index 4e665ccf81e..ecd6c94b1f6 100644
--- a/chromium/third_party/libvpx/source/config/mac/ia32/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/mac/ia32/vpx_dsp_rtcd.h
@@ -6458,6 +6458,11 @@ unsigned int vpx_mse16x8_sse2(const uint8_t* src_ptr,
const uint8_t* ref_ptr,
int recon_stride,
unsigned int* sse);
+unsigned int vpx_mse16x8_avx2(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int recon_stride,
+ unsigned int* sse);
RTCD_EXTERN unsigned int (*vpx_mse16x8)(const uint8_t* src_ptr,
int source_stride,
const uint8_t* ref_ptr,
@@ -8588,6 +8593,11 @@ unsigned int vpx_variance16x32_sse2(const uint8_t* src_ptr,
const uint8_t* ref_ptr,
int ref_stride,
unsigned int* sse);
+unsigned int vpx_variance16x32_avx2(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int ref_stride,
+ unsigned int* sse);
RTCD_EXTERN unsigned int (*vpx_variance16x32)(const uint8_t* src_ptr,
int source_stride,
const uint8_t* ref_ptr,
@@ -8604,6 +8614,11 @@ unsigned int vpx_variance16x8_sse2(const uint8_t* src_ptr,
const uint8_t* ref_ptr,
int ref_stride,
unsigned int* sse);
+unsigned int vpx_variance16x8_avx2(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int ref_stride,
+ unsigned int* sse);
RTCD_EXTERN unsigned int (*vpx_variance16x8)(const uint8_t* src_ptr,
int source_stride,
const uint8_t* ref_ptr,
@@ -8662,6 +8677,11 @@ unsigned int vpx_variance32x64_sse2(const uint8_t* src_ptr,
const uint8_t* ref_ptr,
int ref_stride,
unsigned int* sse);
+unsigned int vpx_variance32x64_avx2(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int ref_stride,
+ unsigned int* sse);
RTCD_EXTERN unsigned int (*vpx_variance32x64)(const uint8_t* src_ptr,
int source_stride,
const uint8_t* ref_ptr,
@@ -9957,6 +9977,8 @@ static void setup_rtcd_internal(void) {
vpx_mse16x8 = vpx_mse16x8_c;
if (flags & HAS_SSE2)
vpx_mse16x8 = vpx_mse16x8_sse2;
+ if (flags & HAS_AVX2)
+ vpx_mse16x8 = vpx_mse16x8_avx2;
vpx_mse8x16 = vpx_mse8x16_c;
if (flags & HAS_SSE2)
vpx_mse8x16 = vpx_mse8x16_sse2;
@@ -10341,9 +10363,13 @@ static void setup_rtcd_internal(void) {
vpx_variance16x32 = vpx_variance16x32_c;
if (flags & HAS_SSE2)
vpx_variance16x32 = vpx_variance16x32_sse2;
+ if (flags & HAS_AVX2)
+ vpx_variance16x32 = vpx_variance16x32_avx2;
vpx_variance16x8 = vpx_variance16x8_c;
if (flags & HAS_SSE2)
vpx_variance16x8 = vpx_variance16x8_sse2;
+ if (flags & HAS_AVX2)
+ vpx_variance16x8 = vpx_variance16x8_avx2;
vpx_variance32x16 = vpx_variance32x16_c;
if (flags & HAS_SSE2)
vpx_variance32x16 = vpx_variance32x16_sse2;
@@ -10357,6 +10383,8 @@ static void setup_rtcd_internal(void) {
vpx_variance32x64 = vpx_variance32x64_c;
if (flags & HAS_SSE2)
vpx_variance32x64 = vpx_variance32x64_sse2;
+ if (flags & HAS_AVX2)
+ vpx_variance32x64 = vpx_variance32x64_avx2;
vpx_variance4x4 = vpx_variance4x4_c;
if (flags & HAS_SSE2)
vpx_variance4x4 = vpx_variance4x4_sse2;
diff --git a/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.asm b/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.asm
index 84f986d3fcc..ffaf2d94f9d 100644
--- a/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.asm
@@ -72,6 +72,7 @@
%define CONFIG_MULTI_RES_ENCODING 1
%define CONFIG_TEMPORAL_DENOISING 1
%define CONFIG_VP9_TEMPORAL_DENOISING 1
+%define CONFIG_CONSISTENT_RECODE 0
%define CONFIG_COEFFICIENT_RANGE_CHECKING 0
%define CONFIG_VP9_HIGHBITDEPTH 1
%define CONFIG_BETTER_HW_COMPATIBILITY 0
diff --git a/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.h b/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.h
index aedc47647fd..c6e9e82ee32 100644
--- a/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.h
@@ -84,6 +84,7 @@
#define CONFIG_MULTI_RES_ENCODING 1
#define CONFIG_TEMPORAL_DENOISING 1
#define CONFIG_VP9_TEMPORAL_DENOISING 1
+#define CONFIG_CONSISTENT_RECODE 0
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
#define CONFIG_VP9_HIGHBITDEPTH 1
#define CONFIG_BETTER_HW_COMPATIBILITY 0
diff --git a/chromium/third_party/libvpx/source/config/mac/x64/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/mac/x64/vpx_dsp_rtcd.h
index 49a73711abe..258994f0076 100644
--- a/chromium/third_party/libvpx/source/config/mac/x64/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/mac/x64/vpx_dsp_rtcd.h
@@ -5376,7 +5376,16 @@ unsigned int vpx_mse16x8_sse2(const uint8_t* src_ptr,
const uint8_t* ref_ptr,
int recon_stride,
unsigned int* sse);
-#define vpx_mse16x8 vpx_mse16x8_sse2
+unsigned int vpx_mse16x8_avx2(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int recon_stride,
+ unsigned int* sse);
+RTCD_EXTERN unsigned int (*vpx_mse16x8)(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int recon_stride,
+ unsigned int* sse);
unsigned int vpx_mse8x16_c(const uint8_t* src_ptr,
int source_stride,
@@ -7350,7 +7359,16 @@ unsigned int vpx_variance16x32_sse2(const uint8_t* src_ptr,
const uint8_t* ref_ptr,
int ref_stride,
unsigned int* sse);
-#define vpx_variance16x32 vpx_variance16x32_sse2
+unsigned int vpx_variance16x32_avx2(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int ref_stride,
+ unsigned int* sse);
+RTCD_EXTERN unsigned int (*vpx_variance16x32)(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int ref_stride,
+ unsigned int* sse);
unsigned int vpx_variance16x8_c(const uint8_t* src_ptr,
int source_stride,
@@ -7362,7 +7380,16 @@ unsigned int vpx_variance16x8_sse2(const uint8_t* src_ptr,
const uint8_t* ref_ptr,
int ref_stride,
unsigned int* sse);
-#define vpx_variance16x8 vpx_variance16x8_sse2
+unsigned int vpx_variance16x8_avx2(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int ref_stride,
+ unsigned int* sse);
+RTCD_EXTERN unsigned int (*vpx_variance16x8)(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int ref_stride,
+ unsigned int* sse);
unsigned int vpx_variance32x16_c(const uint8_t* src_ptr,
int source_stride,
@@ -7416,7 +7443,16 @@ unsigned int vpx_variance32x64_sse2(const uint8_t* src_ptr,
const uint8_t* ref_ptr,
int ref_stride,
unsigned int* sse);
-#define vpx_variance32x64 vpx_variance32x64_sse2
+unsigned int vpx_variance32x64_avx2(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int ref_stride,
+ unsigned int* sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x64)(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int ref_stride,
+ unsigned int* sse);
unsigned int vpx_variance4x4_c(const uint8_t* src_ptr,
int source_stride,
@@ -7743,6 +7779,9 @@ static void setup_rtcd_internal(void) {
vpx_mse16x16 = vpx_mse16x16_sse2;
if (flags & HAS_AVX2)
vpx_mse16x16 = vpx_mse16x16_avx2;
+ vpx_mse16x8 = vpx_mse16x8_sse2;
+ if (flags & HAS_AVX2)
+ vpx_mse16x8 = vpx_mse16x8_avx2;
vpx_quantize_b = vpx_quantize_b_sse2;
if (flags & HAS_SSSE3)
vpx_quantize_b = vpx_quantize_b_ssse3;
@@ -7918,12 +7957,21 @@ static void setup_rtcd_internal(void) {
vpx_variance16x16 = vpx_variance16x16_sse2;
if (flags & HAS_AVX2)
vpx_variance16x16 = vpx_variance16x16_avx2;
+ vpx_variance16x32 = vpx_variance16x32_sse2;
+ if (flags & HAS_AVX2)
+ vpx_variance16x32 = vpx_variance16x32_avx2;
+ vpx_variance16x8 = vpx_variance16x8_sse2;
+ if (flags & HAS_AVX2)
+ vpx_variance16x8 = vpx_variance16x8_avx2;
vpx_variance32x16 = vpx_variance32x16_sse2;
if (flags & HAS_AVX2)
vpx_variance32x16 = vpx_variance32x16_avx2;
vpx_variance32x32 = vpx_variance32x32_sse2;
if (flags & HAS_AVX2)
vpx_variance32x32 = vpx_variance32x32_avx2;
+ vpx_variance32x64 = vpx_variance32x64_sse2;
+ if (flags & HAS_AVX2)
+ vpx_variance32x64 = vpx_variance32x64_avx2;
vpx_variance64x32 = vpx_variance64x32_sse2;
if (flags & HAS_AVX2)
vpx_variance64x32 = vpx_variance64x32_avx2;
diff --git a/chromium/third_party/libvpx/source/config/nacl/vpx_config.h b/chromium/third_party/libvpx/source/config/nacl/vpx_config.h
index cc20ff9ec8c..df5f6f87029 100644
--- a/chromium/third_party/libvpx/source/config/nacl/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/nacl/vpx_config.h
@@ -84,6 +84,7 @@
#define CONFIG_MULTI_RES_ENCODING 1
#define CONFIG_TEMPORAL_DENOISING 1
#define CONFIG_VP9_TEMPORAL_DENOISING 1
+#define CONFIG_CONSISTENT_RECODE 0
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
#define CONFIG_VP9_HIGHBITDEPTH 1
#define CONFIG_BETTER_HW_COMPATIBILITY 0
diff --git a/chromium/third_party/libvpx/source/config/vpx_version.h b/chromium/third_party/libvpx/source/config/vpx_version.h
index cf24c88e12e..d208d662f33 100644
--- a/chromium/third_party/libvpx/source/config/vpx_version.h
+++ b/chromium/third_party/libvpx/source/config/vpx_version.h
@@ -2,7 +2,7 @@
#define VERSION_MAJOR 1
#define VERSION_MINOR 7
#define VERSION_PATCH 0
-#define VERSION_EXTRA "262-gbe5df6080"
+#define VERSION_EXTRA "387-ge27a33177"
#define VERSION_PACKED ((VERSION_MAJOR<<16)|(VERSION_MINOR<<8)|(VERSION_PATCH))
-#define VERSION_STRING_NOSP "v1.7.0-262-gbe5df6080"
-#define VERSION_STRING " v1.7.0-262-gbe5df6080"
+#define VERSION_STRING_NOSP "v1.7.0-387-ge27a33177"
+#define VERSION_STRING " v1.7.0-387-ge27a33177"
diff --git a/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.asm b/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.asm
index 0592279663f..4e7f6863eb2 100644
--- a/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.asm
@@ -72,6 +72,7 @@
%define CONFIG_MULTI_RES_ENCODING 1
%define CONFIG_TEMPORAL_DENOISING 1
%define CONFIG_VP9_TEMPORAL_DENOISING 1
+%define CONFIG_CONSISTENT_RECODE 0
%define CONFIG_COEFFICIENT_RANGE_CHECKING 0
%define CONFIG_VP9_HIGHBITDEPTH 1
%define CONFIG_BETTER_HW_COMPATIBILITY 0
diff --git a/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.h b/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.h
index 0725ed34f04..2cf19145915 100644
--- a/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.h
@@ -84,6 +84,7 @@
#define CONFIG_MULTI_RES_ENCODING 1
#define CONFIG_TEMPORAL_DENOISING 1
#define CONFIG_VP9_TEMPORAL_DENOISING 1
+#define CONFIG_CONSISTENT_RECODE 0
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
#define CONFIG_VP9_HIGHBITDEPTH 1
#define CONFIG_BETTER_HW_COMPATIBILITY 0
diff --git a/chromium/third_party/libvpx/source/config/win/ia32/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/win/ia32/vpx_dsp_rtcd.h
index 4e665ccf81e..ecd6c94b1f6 100644
--- a/chromium/third_party/libvpx/source/config/win/ia32/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/win/ia32/vpx_dsp_rtcd.h
@@ -6458,6 +6458,11 @@ unsigned int vpx_mse16x8_sse2(const uint8_t* src_ptr,
const uint8_t* ref_ptr,
int recon_stride,
unsigned int* sse);
+unsigned int vpx_mse16x8_avx2(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int recon_stride,
+ unsigned int* sse);
RTCD_EXTERN unsigned int (*vpx_mse16x8)(const uint8_t* src_ptr,
int source_stride,
const uint8_t* ref_ptr,
@@ -8588,6 +8593,11 @@ unsigned int vpx_variance16x32_sse2(const uint8_t* src_ptr,
const uint8_t* ref_ptr,
int ref_stride,
unsigned int* sse);
+unsigned int vpx_variance16x32_avx2(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int ref_stride,
+ unsigned int* sse);
RTCD_EXTERN unsigned int (*vpx_variance16x32)(const uint8_t* src_ptr,
int source_stride,
const uint8_t* ref_ptr,
@@ -8604,6 +8614,11 @@ unsigned int vpx_variance16x8_sse2(const uint8_t* src_ptr,
const uint8_t* ref_ptr,
int ref_stride,
unsigned int* sse);
+unsigned int vpx_variance16x8_avx2(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int ref_stride,
+ unsigned int* sse);
RTCD_EXTERN unsigned int (*vpx_variance16x8)(const uint8_t* src_ptr,
int source_stride,
const uint8_t* ref_ptr,
@@ -8662,6 +8677,11 @@ unsigned int vpx_variance32x64_sse2(const uint8_t* src_ptr,
const uint8_t* ref_ptr,
int ref_stride,
unsigned int* sse);
+unsigned int vpx_variance32x64_avx2(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int ref_stride,
+ unsigned int* sse);
RTCD_EXTERN unsigned int (*vpx_variance32x64)(const uint8_t* src_ptr,
int source_stride,
const uint8_t* ref_ptr,
@@ -9957,6 +9977,8 @@ static void setup_rtcd_internal(void) {
vpx_mse16x8 = vpx_mse16x8_c;
if (flags & HAS_SSE2)
vpx_mse16x8 = vpx_mse16x8_sse2;
+ if (flags & HAS_AVX2)
+ vpx_mse16x8 = vpx_mse16x8_avx2;
vpx_mse8x16 = vpx_mse8x16_c;
if (flags & HAS_SSE2)
vpx_mse8x16 = vpx_mse8x16_sse2;
@@ -10341,9 +10363,13 @@ static void setup_rtcd_internal(void) {
vpx_variance16x32 = vpx_variance16x32_c;
if (flags & HAS_SSE2)
vpx_variance16x32 = vpx_variance16x32_sse2;
+ if (flags & HAS_AVX2)
+ vpx_variance16x32 = vpx_variance16x32_avx2;
vpx_variance16x8 = vpx_variance16x8_c;
if (flags & HAS_SSE2)
vpx_variance16x8 = vpx_variance16x8_sse2;
+ if (flags & HAS_AVX2)
+ vpx_variance16x8 = vpx_variance16x8_avx2;
vpx_variance32x16 = vpx_variance32x16_c;
if (flags & HAS_SSE2)
vpx_variance32x16 = vpx_variance32x16_sse2;
@@ -10357,6 +10383,8 @@ static void setup_rtcd_internal(void) {
vpx_variance32x64 = vpx_variance32x64_c;
if (flags & HAS_SSE2)
vpx_variance32x64 = vpx_variance32x64_sse2;
+ if (flags & HAS_AVX2)
+ vpx_variance32x64 = vpx_variance32x64_avx2;
vpx_variance4x4 = vpx_variance4x4_c;
if (flags & HAS_SSE2)
vpx_variance4x4 = vpx_variance4x4_sse2;
diff --git a/chromium/third_party/libvpx/source/config/win/x64/vpx_config.asm b/chromium/third_party/libvpx/source/config/win/x64/vpx_config.asm
index c17b1e336d6..ef886a31f30 100644
--- a/chromium/third_party/libvpx/source/config/win/x64/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/win/x64/vpx_config.asm
@@ -72,6 +72,7 @@
%define CONFIG_MULTI_RES_ENCODING 1
%define CONFIG_TEMPORAL_DENOISING 1
%define CONFIG_VP9_TEMPORAL_DENOISING 1
+%define CONFIG_CONSISTENT_RECODE 0
%define CONFIG_COEFFICIENT_RANGE_CHECKING 0
%define CONFIG_VP9_HIGHBITDEPTH 1
%define CONFIG_BETTER_HW_COMPATIBILITY 0
diff --git a/chromium/third_party/libvpx/source/config/win/x64/vpx_config.h b/chromium/third_party/libvpx/source/config/win/x64/vpx_config.h
index f0fbf897849..6d539498521 100644
--- a/chromium/third_party/libvpx/source/config/win/x64/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/win/x64/vpx_config.h
@@ -84,6 +84,7 @@
#define CONFIG_MULTI_RES_ENCODING 1
#define CONFIG_TEMPORAL_DENOISING 1
#define CONFIG_VP9_TEMPORAL_DENOISING 1
+#define CONFIG_CONSISTENT_RECODE 0
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
#define CONFIG_VP9_HIGHBITDEPTH 1
#define CONFIG_BETTER_HW_COMPATIBILITY 0
diff --git a/chromium/third_party/libvpx/source/config/win/x64/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/win/x64/vpx_dsp_rtcd.h
index 49a73711abe..258994f0076 100644
--- a/chromium/third_party/libvpx/source/config/win/x64/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/win/x64/vpx_dsp_rtcd.h
@@ -5376,7 +5376,16 @@ unsigned int vpx_mse16x8_sse2(const uint8_t* src_ptr,
const uint8_t* ref_ptr,
int recon_stride,
unsigned int* sse);
-#define vpx_mse16x8 vpx_mse16x8_sse2
+unsigned int vpx_mse16x8_avx2(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int recon_stride,
+ unsigned int* sse);
+RTCD_EXTERN unsigned int (*vpx_mse16x8)(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int recon_stride,
+ unsigned int* sse);
unsigned int vpx_mse8x16_c(const uint8_t* src_ptr,
int source_stride,
@@ -7350,7 +7359,16 @@ unsigned int vpx_variance16x32_sse2(const uint8_t* src_ptr,
const uint8_t* ref_ptr,
int ref_stride,
unsigned int* sse);
-#define vpx_variance16x32 vpx_variance16x32_sse2
+unsigned int vpx_variance16x32_avx2(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int ref_stride,
+ unsigned int* sse);
+RTCD_EXTERN unsigned int (*vpx_variance16x32)(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int ref_stride,
+ unsigned int* sse);
unsigned int vpx_variance16x8_c(const uint8_t* src_ptr,
int source_stride,
@@ -7362,7 +7380,16 @@ unsigned int vpx_variance16x8_sse2(const uint8_t* src_ptr,
const uint8_t* ref_ptr,
int ref_stride,
unsigned int* sse);
-#define vpx_variance16x8 vpx_variance16x8_sse2
+unsigned int vpx_variance16x8_avx2(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int ref_stride,
+ unsigned int* sse);
+RTCD_EXTERN unsigned int (*vpx_variance16x8)(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int ref_stride,
+ unsigned int* sse);
unsigned int vpx_variance32x16_c(const uint8_t* src_ptr,
int source_stride,
@@ -7416,7 +7443,16 @@ unsigned int vpx_variance32x64_sse2(const uint8_t* src_ptr,
const uint8_t* ref_ptr,
int ref_stride,
unsigned int* sse);
-#define vpx_variance32x64 vpx_variance32x64_sse2
+unsigned int vpx_variance32x64_avx2(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int ref_stride,
+ unsigned int* sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x64)(const uint8_t* src_ptr,
+ int source_stride,
+ const uint8_t* ref_ptr,
+ int ref_stride,
+ unsigned int* sse);
unsigned int vpx_variance4x4_c(const uint8_t* src_ptr,
int source_stride,
@@ -7743,6 +7779,9 @@ static void setup_rtcd_internal(void) {
vpx_mse16x16 = vpx_mse16x16_sse2;
if (flags & HAS_AVX2)
vpx_mse16x16 = vpx_mse16x16_avx2;
+ vpx_mse16x8 = vpx_mse16x8_sse2;
+ if (flags & HAS_AVX2)
+ vpx_mse16x8 = vpx_mse16x8_avx2;
vpx_quantize_b = vpx_quantize_b_sse2;
if (flags & HAS_SSSE3)
vpx_quantize_b = vpx_quantize_b_ssse3;
@@ -7918,12 +7957,21 @@ static void setup_rtcd_internal(void) {
vpx_variance16x16 = vpx_variance16x16_sse2;
if (flags & HAS_AVX2)
vpx_variance16x16 = vpx_variance16x16_avx2;
+ vpx_variance16x32 = vpx_variance16x32_sse2;
+ if (flags & HAS_AVX2)
+ vpx_variance16x32 = vpx_variance16x32_avx2;
+ vpx_variance16x8 = vpx_variance16x8_sse2;
+ if (flags & HAS_AVX2)
+ vpx_variance16x8 = vpx_variance16x8_avx2;
vpx_variance32x16 = vpx_variance32x16_sse2;
if (flags & HAS_AVX2)
vpx_variance32x16 = vpx_variance32x16_avx2;
vpx_variance32x32 = vpx_variance32x32_sse2;
if (flags & HAS_AVX2)
vpx_variance32x32 = vpx_variance32x32_avx2;
+ vpx_variance32x64 = vpx_variance32x64_sse2;
+ if (flags & HAS_AVX2)
+ vpx_variance32x64 = vpx_variance32x64_avx2;
vpx_variance64x32 = vpx_variance64x32_sse2;
if (flags & HAS_AVX2)
vpx_variance64x32 = vpx_variance64x32_avx2;
diff --git a/chromium/third_party/libvpx/source/libvpx/README b/chromium/third_party/libvpx/source/libvpx/README
index a900c807787..49407ed9ff3 100644
--- a/chromium/third_party/libvpx/source/libvpx/README
+++ b/chromium/third_party/libvpx/source/libvpx/README
@@ -76,7 +76,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
armv8-linux-gcc
mips32-linux-gcc
mips64-linux-gcc
- ppc64-linux-gcc
ppc64le-linux-gcc
sparc-solaris-gcc
x86-android-gcc
diff --git a/chromium/third_party/libvpx/source/libvpx/build/make/configure.sh b/chromium/third_party/libvpx/source/libvpx/build/make/configure.sh
index a6c76612fcf..f1d0e34c3f6 100644
--- a/chromium/third_party/libvpx/source/libvpx/build/make/configure.sh
+++ b/chromium/third_party/libvpx/source/libvpx/build/make/configure.sh
@@ -319,6 +319,12 @@ check_ld() {
&& check_cmd ${LD} ${LDFLAGS} "$@" -o ${TMP_X} ${TMP_O} ${extralibs}
}
+check_lib() {
+ log check_lib "$@"
+ check_cc $@ \
+ && check_cmd ${LD} ${LDFLAGS} -o ${TMP_X} ${TMP_O} "$@" ${extralibs}
+}
+
check_header(){
log check_header "$@"
header=$1
@@ -713,11 +719,8 @@ process_common_toolchain() {
*sparc*)
tgt_isa=sparc
;;
- power*64*-*)
- tgt_isa=ppc64
- ;;
- power*)
- tgt_isa=ppc
+ power*64le*-*)
+ tgt_isa=ppc64le
;;
*mips64el*)
tgt_isa=mips64
@@ -1215,7 +1218,7 @@ EOF
check_add_asflags -march=${tgt_isa}
check_add_asflags -KPIC
;;
- ppc*)
+ ppc64le*)
link_with_cc=gcc
setup_gnu_toolchain
check_gcc_machine_option "vsx"
@@ -1485,7 +1488,11 @@ EOF
# bionic includes basic pthread functionality, obviating -lpthread.
;;
*)
- check_header pthread.h && add_extralibs -lpthread
+ check_header pthread.h && check_lib -lpthread <<EOF && add_extralibs -lpthread || disable_feature pthread_h
+#include <pthread.h>
+#include <stddef.h>
+int main(void) { return pthread_create(NULL, NULL, NULL, NULL); }
+EOF
;;
esac
fi
diff --git a/chromium/third_party/libvpx/source/libvpx/build/make/iosbuild.sh b/chromium/third_party/libvpx/source/libvpx/build/make/iosbuild.sh
index 365a8c01306..e102442bd8e 100755
--- a/chromium/third_party/libvpx/source/libvpx/build/make/iosbuild.sh
+++ b/chromium/third_party/libvpx/source/libvpx/build/make/iosbuild.sh
@@ -132,7 +132,8 @@ create_vpx_framework_config_shim() {
done
# Consume the last line of output from the loop: We don't want it.
- sed -i '' -e '$d' "${config_file}"
+ sed -i.bak -e '$d' "${config_file}"
+ rm "${config_file}.bak"
printf "#endif\n\n" >> "${config_file}"
printf "#endif // ${include_guard}" >> "${config_file}"
diff --git a/chromium/third_party/libvpx/source/libvpx/configure b/chromium/third_party/libvpx/source/libvpx/configure
index 2f198e9a61e..8be95d60236 100755
--- a/chromium/third_party/libvpx/source/libvpx/configure
+++ b/chromium/third_party/libvpx/source/libvpx/configure
@@ -116,7 +116,6 @@ all_platforms="${all_platforms} armv7s-darwin-gcc"
all_platforms="${all_platforms} armv8-linux-gcc"
all_platforms="${all_platforms} mips32-linux-gcc"
all_platforms="${all_platforms} mips64-linux-gcc"
-all_platforms="${all_platforms} ppc64-linux-gcc"
all_platforms="${all_platforms} ppc64le-linux-gcc"
all_platforms="${all_platforms} sparc-solaris-gcc"
all_platforms="${all_platforms} x86-android-gcc"
@@ -328,6 +327,7 @@ CONFIG_LIST="
multi_res_encoding
temporal_denoising
vp9_temporal_denoising
+ consistent_recode
coefficient_range_checking
vp9_highbitdepth
better_hw_compatibility
@@ -389,6 +389,7 @@ CMDLINE_SELECT="
multi_res_encoding
temporal_denoising
vp9_temporal_denoising
+ consistent_recode
coefficient_range_checking
better_hw_compatibility
vp9_highbitdepth
@@ -573,16 +574,30 @@ process_detect() {
check_ld() {
true
}
+ check_lib() {
+ true
+ }
fi
check_header stdio.h || die "Unable to invoke compiler: ${CC} ${CFLAGS}"
check_ld <<EOF || die "Toolchain is unable to link executables"
int main(void) {return 0;}
EOF
# check system headers
- check_header pthread.h
+
+ # Use both check_header and check_lib here, since check_lib
+ # could be a stub that always returns true.
+ check_header pthread.h && check_lib -lpthread <<EOF || disable_feature pthread_h
+#include <pthread.h>
+#include <stddef.h>
+int main(void) { return pthread_create(NULL, NULL, NULL, NULL); }
+EOF
check_header unistd.h # for sysconf(3) and friends.
check_header vpx/vpx_integer.h -I${source_path} && enable_feature vpx_ports
+
+ if enabled neon && ! enabled external_build; then
+ check_header arm_neon.h || die "Unable to find arm_neon.h"
+ fi
}
process_toolchain() {
@@ -708,9 +723,7 @@ process_toolchain() {
check_cxx "$@" <<EOF && soft_enable unit_tests
int z;
EOF
- check_cxx "$@" <<EOF && soft_enable webm_io
-int z;
-EOF
+ check_add_cxxflags -std=c++11 && soft_enable webm_io
check_cxx "$@" <<EOF && soft_enable libyuv
int z;
EOF
@@ -719,9 +732,7 @@ EOF
enabled pthread_h && check_cxx "$@" <<EOF && soft_enable unit_tests
int z;
EOF
- check_cxx "$@" <<EOF && soft_enable webm_io
-int z;
-EOF
+ check_add_cxxflags -std=c++11 && soft_enable webm_io
check_cxx "$@" <<EOF && soft_enable libyuv
int z;
EOF
diff --git a/chromium/third_party/libvpx/source/libvpx/examples/vp9_spatial_svc_encoder.c b/chromium/third_party/libvpx/source/libvpx/examples/vp9_spatial_svc_encoder.c
index 747f79ffba1..091c6954d12 100644
--- a/chromium/third_party/libvpx/source/libvpx/examples/vp9_spatial_svc_encoder.c
+++ b/chromium/third_party/libvpx/source/libvpx/examples/vp9_spatial_svc_encoder.c
@@ -730,6 +730,8 @@ int main(int argc, const char **argv) {
vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, 0);
+ vpx_codec_control(&codec, VP9E_SET_TUNE_CONTENT, 0);
+
// Encode frames
while (!end_of_stream) {
vpx_codec_iter_t iter = NULL;
diff --git a/chromium/third_party/libvpx/source/libvpx/ivfdec.c b/chromium/third_party/libvpx/source/libvpx/ivfdec.c
index f64e594ab0e..3e179bc6ed2 100644
--- a/chromium/third_party/libvpx/source/libvpx/ivfdec.c
+++ b/chromium/third_party/libvpx/source/libvpx/ivfdec.c
@@ -76,12 +76,12 @@ int ivf_read_frame(FILE *infile, uint8_t **buffer, size_t *bytes_read,
size_t frame_size = 0;
if (fread(raw_header, IVF_FRAME_HDR_SZ, 1, infile) != 1) {
- if (!feof(infile)) warn("Failed to read frame size\n");
+ if (!feof(infile)) warn("Failed to read frame size");
} else {
frame_size = mem_get_le32(raw_header);
if (frame_size > 256 * 1024 * 1024) {
- warn("Read invalid frame size (%u)\n", (unsigned int)frame_size);
+ warn("Read invalid frame size (%u)", (unsigned int)frame_size);
frame_size = 0;
}
@@ -92,7 +92,7 @@ int ivf_read_frame(FILE *infile, uint8_t **buffer, size_t *bytes_read,
*buffer = new_buffer;
*buffer_size = 2 * frame_size;
} else {
- warn("Failed to allocate compressed data buffer\n");
+ warn("Failed to allocate compressed data buffer");
frame_size = 0;
}
}
@@ -100,7 +100,7 @@ int ivf_read_frame(FILE *infile, uint8_t **buffer, size_t *bytes_read,
if (!feof(infile)) {
if (fread(*buffer, 1, frame_size, infile) != frame_size) {
- warn("Failed to read full frame\n");
+ warn("Failed to read full frame");
return 1;
}
diff --git a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/Android.mk b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/Android.mk
index 8149a083f4f..b46ba101d42 100644
--- a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/Android.mk
+++ b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/Android.mk
@@ -3,7 +3,7 @@ LOCAL_PATH:= $(call my-dir)
include $(CLEAR_VARS)
LOCAL_MODULE:= libwebm
LOCAL_CPPFLAGS:=-D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS
-LOCAL_CPPFLAGS+=-D__STDC_LIMIT_MACROS -Wno-extern-c-compat
+LOCAL_CPPFLAGS+=-D__STDC_LIMIT_MACROS -std=c++11
LOCAL_C_INCLUDES:= $(LOCAL_PATH)
LOCAL_EXPORT_C_INCLUDES:= $(LOCAL_PATH)
diff --git a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/README.libvpx b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/README.libvpx
index ebb5ff2f4d7..6d8b0b4ccc6 100644
--- a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/README.libvpx
+++ b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/README.libvpx
@@ -1,5 +1,5 @@
URL: https://chromium.googlesource.com/webm/libwebm
-Version: 0ae757087f5e6eb01dfea16cc09205b2425cfb74
+Version: af81f26025b7435fa9a14ad07c58b44cf9280430
License: BSD
License File: LICENSE.txt
@@ -7,4 +7,14 @@ Description:
libwebm is used to handle WebM container I/O.
Local Changes:
-* <none>
+Only keep:
+ - Android.mk
+ - AUTHORS.TXT
+ - common/
+ file_util.cc/h
+ hdr_util.cc/h
+ webmids.h
+ - LICENSE.TXT
+ - mkvmuxer/
+ - mkvparser/
+ - PATENTS.TXT
diff --git a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/file_util.cc b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/file_util.cc
index 6dab146dd98..618ffc087fd 100644
--- a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/file_util.cc
+++ b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/file_util.cc
@@ -17,6 +17,7 @@
#include <cstring>
#include <fstream>
#include <ios>
+#include <string>
namespace libwebm {
@@ -41,7 +42,12 @@ std::string GetTempFileName() {
return temp_file_name;
#else
char tmp_file_name[_MAX_PATH];
+#if defined _MSC_VER || defined MINGW_HAS_SECURE_API
errno_t err = tmpnam_s(tmp_file_name);
+#else
+ char* fname_pointer = tmpnam(tmp_file_name);
+ errno_t err = (fname_pointer == &tmp_file_name[0]) ? 0 : -1;
+#endif
if (err == 0) {
return std::string(tmp_file_name);
}
@@ -65,6 +71,15 @@ uint64_t GetFileSize(const std::string& file_name) {
return file_size;
}
+bool GetFileContents(const std::string& file_name, std::string* contents) {
+ std::ifstream file(file_name.c_str());
+ *contents = std::string(static_cast<size_t>(GetFileSize(file_name)), 0);
+ if (file.good() && contents->size()) {
+ file.read(&(*contents)[0], contents->size());
+ }
+ return !file.fail();
+}
+
TempFileDeleter::TempFileDeleter() { file_name_ = GetTempFileName(); }
TempFileDeleter::~TempFileDeleter() {
diff --git a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/file_util.h b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/file_util.h
index 0e71eac11e4..a8737346418 100644
--- a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/file_util.h
+++ b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/file_util.h
@@ -22,6 +22,9 @@ std::string GetTempFileName();
// Returns size of file specified by |file_name|, or 0 upon failure.
uint64_t GetFileSize(const std::string& file_name);
+// Gets the contents file_name as a string. Returns false on error.
+bool GetFileContents(const std::string& file_name, std::string* contents);
+
// Manages life of temporary file specified at time of construction. Deletes
// file upon destruction.
class TempFileDeleter {
@@ -38,4 +41,4 @@ class TempFileDeleter {
} // namespace libwebm
-#endif // LIBWEBM_COMMON_FILE_UTIL_H_ \ No newline at end of file
+#endif // LIBWEBM_COMMON_FILE_UTIL_H_
diff --git a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/hdr_util.cc b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/hdr_util.cc
index e1618ce75a7..916f7170b67 100644
--- a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/hdr_util.cc
+++ b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/hdr_util.cc
@@ -36,10 +36,10 @@ bool CopyMasteringMetadata(const mkvparser::MasteringMetadata& parser_mm,
if (MasteringMetadataValuePresent(parser_mm.luminance_min))
muxer_mm->set_luminance_min(parser_mm.luminance_min);
- PrimaryChromaticityPtr r_ptr(NULL);
- PrimaryChromaticityPtr g_ptr(NULL);
- PrimaryChromaticityPtr b_ptr(NULL);
- PrimaryChromaticityPtr wp_ptr(NULL);
+ PrimaryChromaticityPtr r_ptr(nullptr);
+ PrimaryChromaticityPtr g_ptr(nullptr);
+ PrimaryChromaticityPtr b_ptr(nullptr);
+ PrimaryChromaticityPtr wp_ptr(nullptr);
if (parser_mm.r) {
if (!CopyPrimaryChromaticity(*parser_mm.r, &r_ptr))
diff --git a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/hdr_util.h b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/hdr_util.h
index 3ef5388fd03..78e2eeb7058 100644
--- a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/hdr_util.h
+++ b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/hdr_util.h
@@ -47,15 +47,7 @@ struct Vp9CodecFeatures {
int chroma_subsampling;
};
-// disable deprecation warnings for auto_ptr
-#if defined(__GNUC__) && __GNUC__ >= 5
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-#endif
-typedef std::auto_ptr<mkvmuxer::PrimaryChromaticity> PrimaryChromaticityPtr;
-#if defined(__GNUC__) && __GNUC__ >= 5
-#pragma GCC diagnostic pop
-#endif
+typedef std::unique_ptr<mkvmuxer::PrimaryChromaticity> PrimaryChromaticityPtr;
bool CopyPrimaryChromaticity(const mkvparser::PrimaryChromaticity& parser_pc,
PrimaryChromaticityPtr* muxer_pc);
diff --git a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.cc b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.cc
index 15b9a908d8a..481771db297 100644
--- a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.cc
+++ b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.cc
@@ -8,6 +8,8 @@
#include "mkvmuxer/mkvmuxer.h"
+#include <stdint.h>
+
#include <cfloat>
#include <climits>
#include <cstdio>
@@ -24,11 +26,6 @@
#include "mkvmuxer/mkvwriter.h"
#include "mkvparser/mkvparser.h"
-// disable deprecation warnings for auto_ptr
-#if defined(__GNUC__) && __GNUC__ >= 5
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-#endif
-
namespace mkvmuxer {
const float PrimaryChromaticity::kChromaticityMin = 0.0f;
@@ -72,7 +69,7 @@ bool StrCpy(const char* src, char** dst_ptr) {
return true;
}
-typedef std::auto_ptr<PrimaryChromaticity> PrimaryChromaticityPtr;
+typedef std::unique_ptr<PrimaryChromaticity> PrimaryChromaticityPtr;
bool CopyChromaticity(const PrimaryChromaticity* src,
PrimaryChromaticityPtr* dst) {
if (!dst)
@@ -1057,22 +1054,22 @@ bool MasteringMetadata::Write(IMkvWriter* writer) const {
bool MasteringMetadata::SetChromaticity(
const PrimaryChromaticity* r, const PrimaryChromaticity* g,
const PrimaryChromaticity* b, const PrimaryChromaticity* white_point) {
- PrimaryChromaticityPtr r_ptr(NULL);
+ PrimaryChromaticityPtr r_ptr(nullptr);
if (r) {
if (!CopyChromaticity(r, &r_ptr))
return false;
}
- PrimaryChromaticityPtr g_ptr(NULL);
+ PrimaryChromaticityPtr g_ptr(nullptr);
if (g) {
if (!CopyChromaticity(g, &g_ptr))
return false;
}
- PrimaryChromaticityPtr b_ptr(NULL);
+ PrimaryChromaticityPtr b_ptr(nullptr);
if (b) {
if (!CopyChromaticity(b, &b_ptr))
return false;
}
- PrimaryChromaticityPtr wp_ptr(NULL);
+ PrimaryChromaticityPtr wp_ptr(nullptr);
if (white_point) {
if (!CopyChromaticity(white_point, &wp_ptr))
return false;
@@ -1238,7 +1235,7 @@ bool Colour::Write(IMkvWriter* writer) const {
}
bool Colour::SetMasteringMetadata(const MasteringMetadata& mastering_metadata) {
- std::auto_ptr<MasteringMetadata> mm_ptr(new MasteringMetadata());
+ std::unique_ptr<MasteringMetadata> mm_ptr(new MasteringMetadata());
if (!mm_ptr.get())
return false;
@@ -1546,7 +1543,7 @@ bool VideoTrack::Write(IMkvWriter* writer) const {
}
bool VideoTrack::SetColour(const Colour& colour) {
- std::auto_ptr<Colour> colour_ptr(new Colour());
+ std::unique_ptr<Colour> colour_ptr(new Colour());
if (!colour_ptr.get())
return false;
@@ -1574,7 +1571,7 @@ bool VideoTrack::SetColour(const Colour& colour) {
}
bool VideoTrack::SetProjection(const Projection& projection) {
- std::auto_ptr<Projection> projection_ptr(new Projection());
+ std::unique_ptr<Projection> projection_ptr(new Projection());
if (!projection_ptr.get())
return false;
@@ -2666,7 +2663,7 @@ bool Cluster::QueueOrWriteFrame(const Frame* const frame) {
// and write it if it is okay to do so (i.e.) no other track has an held back
// frame with timestamp <= the timestamp of the frame in question.
std::vector<std::list<Frame*>::iterator> frames_to_erase;
- for (std::list<Frame *>::iterator
+ for (std::list<Frame*>::iterator
current_track_iterator = stored_frames_[track_number].begin(),
end = --stored_frames_[track_number].end();
current_track_iterator != end; ++current_track_iterator) {
diff --git a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvparser/mkvparser.cc b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvparser/mkvparser.cc
index 37f230d0a95..e7b76f7da11 100644
--- a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvparser/mkvparser.cc
+++ b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvparser/mkvparser.cc
@@ -22,12 +22,8 @@
#include "common/webmids.h"
-// disable deprecation warnings for auto_ptr
-#if defined(__GNUC__) && __GNUC__ >= 5
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-#endif
-
namespace mkvparser {
+const long long kStringElementSizeLimit = 20 * 1000 * 1000;
const float MasteringMetadata::kValueNotPresent = FLT_MAX;
const long long Colour::kValueNotPresent = LLONG_MAX;
const float Projection::kValueNotPresent = FLT_MAX;
@@ -330,7 +326,7 @@ long UnserializeString(IMkvReader* pReader, long long pos, long long size,
delete[] str;
str = NULL;
- if (size >= LONG_MAX || size < 0)
+ if (size >= LONG_MAX || size < 0 || size > kStringElementSizeLimit)
return E_FILE_FORMAT_INVALID;
// +1 for '\0' terminator
@@ -5015,7 +5011,7 @@ bool MasteringMetadata::Parse(IMkvReader* reader, long long mm_start,
if (!reader || *mm)
return false;
- std::auto_ptr<MasteringMetadata> mm_ptr(new MasteringMetadata());
+ std::unique_ptr<MasteringMetadata> mm_ptr(new MasteringMetadata());
if (!mm_ptr.get())
return false;
@@ -5035,6 +5031,10 @@ bool MasteringMetadata::Parse(IMkvReader* reader, long long mm_start,
double value = 0;
const long long value_parse_status =
UnserializeFloat(reader, read_pos, child_size, value);
+ if (value < -FLT_MAX || value > FLT_MAX ||
+ (value > 0.0 && value < FLT_MIN)) {
+ return false;
+ }
mm_ptr->luminance_max = static_cast<float>(value);
if (value_parse_status < 0 || mm_ptr->luminance_max < 0.0 ||
mm_ptr->luminance_max > 9999.99) {
@@ -5044,6 +5044,10 @@ bool MasteringMetadata::Parse(IMkvReader* reader, long long mm_start,
double value = 0;
const long long value_parse_status =
UnserializeFloat(reader, read_pos, child_size, value);
+ if (value < -FLT_MAX || value > FLT_MAX ||
+ (value > 0.0 && value < FLT_MIN)) {
+ return false;
+ }
mm_ptr->luminance_min = static_cast<float>(value);
if (value_parse_status < 0 || mm_ptr->luminance_min < 0.0 ||
mm_ptr->luminance_min > 999.9999) {
@@ -5096,7 +5100,7 @@ bool Colour::Parse(IMkvReader* reader, long long colour_start,
if (!reader || *colour)
return false;
- std::auto_ptr<Colour> colour_ptr(new Colour());
+ std::unique_ptr<Colour> colour_ptr(new Colour());
if (!colour_ptr.get())
return false;
@@ -5194,7 +5198,7 @@ bool Projection::Parse(IMkvReader* reader, long long start, long long size,
if (!reader || *projection)
return false;
- std::auto_ptr<Projection> projection_ptr(new Projection());
+ std::unique_ptr<Projection> projection_ptr(new Projection());
if (!projection_ptr.get())
return false;
@@ -7903,6 +7907,10 @@ long Block::Parse(const Cluster* pCluster) {
return E_FILE_FORMAT_INVALID;
curr.len = static_cast<long>(frame_size);
+ // Check if size + curr.len could overflow.
+ if (size > LLONG_MAX - curr.len) {
+ return E_FILE_FORMAT_INVALID;
+ }
size += curr.len; // contribution of this frame
--frame_count;
@@ -7964,6 +7972,11 @@ long long Block::GetTimeCode(const Cluster* pCluster) const {
const long long tc0 = pCluster->GetTimeCode();
assert(tc0 >= 0);
+ // Check if tc0 + m_timecode would overflow.
+ if (tc0 < 0 || LLONG_MAX - tc0 < m_timecode) {
+ return -1;
+ }
+
const long long tc = tc0 + m_timecode;
return tc; // unscaled timecode units
@@ -7981,6 +7994,10 @@ long long Block::GetTime(const Cluster* pCluster) const {
const long long scale = pInfo->GetTimeCodeScale();
assert(scale >= 1);
+ // Check if tc * scale could overflow.
+ if (tc != 0 && scale > LLONG_MAX / tc) {
+ return -1;
+ }
const long long ns = tc * scale;
return ns;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/postproc.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/postproc.c
index d67ee8a57d8..8c292d6161d 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/postproc.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/postproc.c
@@ -65,7 +65,7 @@ void vp8_deblock(VP8_COMMON *cm, YV12_BUFFER_CONFIG *source,
double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065;
int ppl = (int)(level + .5);
- const MODE_INFO *mode_info_context = cm->show_frame_mi;
+ const MODE_INFO *mode_info_context = cm->mi;
int mbr, mbc;
/* The pixel thresholds are adjusted according to if or not the macroblock
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/decoder/decodeframe.c b/chromium/third_party/libvpx/source/libvpx/vp8/decoder/decodeframe.c
index 8bfd3cea3dc..0d54a9442b9 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/decoder/decodeframe.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/decoder/decodeframe.c
@@ -686,6 +686,12 @@ static unsigned int read_available_partition_size(
const unsigned char *partition_size_ptr = token_part_sizes + i * 3;
unsigned int partition_size = 0;
ptrdiff_t bytes_left = fragment_end - fragment_start;
+ if (bytes_left < 0) {
+ vpx_internal_error(
+ &pc->error, VPX_CODEC_CORRUPT_FRAME,
+ "Truncated packet or corrupt partition. No bytes left %d.",
+ (int)bytes_left);
+ }
/* Calculate the length of this partition. The last partition
* size is implicit. If the partition size can't be read, then
* either use the remaining data in the buffer (for EC mode)
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/vp8_cx_iface.c b/chromium/third_party/libvpx/source/libvpx/vp8/vp8_cx_iface.c
index 2bdc2b34a7b..e1c31341bc1 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/vp8_cx_iface.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/vp8_cx_iface.c
@@ -258,9 +258,7 @@ static vpx_codec_err_t validate_img(vpx_codec_alg_priv_t *ctx,
const vpx_image_t *img) {
switch (img->fmt) {
case VPX_IMG_FMT_YV12:
- case VPX_IMG_FMT_I420:
- case VPX_IMG_FMT_VPXI420:
- case VPX_IMG_FMT_VPXYV12: break;
+ case VPX_IMG_FMT_I420: break;
default:
ERROR("Invalid image format. Only YV12 and I420 images are supported");
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_pred_common.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_pred_common.h
index 8400bd70f1d..005290980e6 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_pred_common.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_pred_common.h
@@ -176,12 +176,6 @@ static INLINE const vpx_prob *get_tx_probs(TX_SIZE max_tx_size, int ctx,
}
}
-static INLINE const vpx_prob *get_tx_probs2(TX_SIZE max_tx_size,
- const MACROBLOCKD *xd,
- const struct tx_probs *tx_probs) {
- return get_tx_probs(max_tx_size, get_tx_size_context(xd), tx_probs);
-}
-
static INLINE unsigned int *get_tx_counts(TX_SIZE max_tx_size, int ctx,
struct tx_counts *tx_counts) {
switch (max_tx_size) {
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c
index ef8484f988b..b47840795e1 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -413,7 +413,8 @@ static void cyclic_refresh_update_map(VP9_COMP *const cpi) {
} while (cr->target_num_seg_blocks < block_count && i != cr->sb_index);
cr->sb_index = i;
cr->reduce_refresh = 0;
- if (count_sel<(3 * count_tot)>> 2) cr->reduce_refresh = 1;
+ if (cpi->oxcf.content != VP9E_CONTENT_SCREEN)
+ if (count_sel<(3 * count_tot)>> 2) cr->reduce_refresh = 1;
}
// Set cyclic refresh parameters.
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.c
index d346cd57aa0..4e7d99f505a 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.c
@@ -86,7 +86,7 @@ static void write_selected_tx_size(const VP9_COMMON *cm,
BLOCK_SIZE bsize = xd->mi[0]->sb_type;
const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
const vpx_prob *const tx_probs =
- get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
+ get_tx_probs(max_tx_size, get_tx_size_context(xd), &cm->fc->tx_probs);
vpx_write(w, tx_size != TX_4X4, tx_probs[0]);
if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) {
vpx_write(w, tx_size != TX_8X8, tx_probs[1]);
@@ -459,7 +459,8 @@ static void write_modes_sb(
write_modes_b(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col + bs,
max_mv_magnitude, interp_filter_selected);
break;
- case PARTITION_SPLIT:
+ default:
+ assert(partition == PARTITION_SPLIT);
write_modes_sb(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col, subsize,
max_mv_magnitude, interp_filter_selected);
write_modes_sb(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col + bs,
@@ -469,7 +470,6 @@ static void write_modes_sb(
write_modes_sb(cpi, xd, tile, w, tok, tok_end, mi_row + bs, mi_col + bs,
subsize, max_mv_magnitude, interp_filter_selected);
break;
- default: assert(0);
}
}
@@ -618,9 +618,10 @@ static void update_coef_probs_common(vpx_writer *const bc, VP9_COMP *cpi,
return;
}
- case ONE_LOOP_REDUCED: {
+ default: {
int updates = 0;
int noupdates_before_first = 0;
+ assert(cpi->sf.use_fast_coef_updates == ONE_LOOP_REDUCED);
for (i = 0; i < PLANE_TYPES; ++i) {
for (j = 0; j < REF_TYPES; ++j) {
for (k = 0; k < COEF_BANDS; ++k) {
@@ -670,7 +671,6 @@ static void update_coef_probs_common(vpx_writer *const bc, VP9_COMP *cpi,
}
return;
}
- default: assert(0);
}
}
@@ -1117,11 +1117,7 @@ static void write_frame_size_with_refs(VP9_COMP *cpi,
((cpi->svc.number_temporal_layers > 1 &&
cpi->oxcf.rc_mode == VPX_CBR) ||
(cpi->svc.number_spatial_layers > 1 &&
- cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame) ||
- (is_two_pass_svc(cpi) &&
- cpi->svc.encode_empty_frame_state == ENCODING &&
- cpi->svc.layer_context[0].frames_from_key_frame <
- cpi->svc.number_temporal_layers + 1))) {
+ cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame))) {
found = 0;
} else if (cfg != NULL) {
found =
@@ -1153,8 +1149,10 @@ static void write_profile(BITSTREAM_PROFILE profile,
case PROFILE_0: vpx_wb_write_literal(wb, 0, 2); break;
case PROFILE_1: vpx_wb_write_literal(wb, 2, 2); break;
case PROFILE_2: vpx_wb_write_literal(wb, 1, 2); break;
- case PROFILE_3: vpx_wb_write_literal(wb, 6, 3); break;
- default: assert(0);
+ default:
+ assert(profile == PROFILE_3);
+ vpx_wb_write_literal(wb, 6, 3);
+ break;
}
}
@@ -1201,14 +1199,6 @@ static void write_uncompressed_header(VP9_COMP *cpi,
write_bitdepth_colorspace_sampling(cm, wb);
write_frame_size(cm, wb);
} else {
- // In spatial svc if it's not error_resilient_mode then we need to code all
- // visible frames as invisible. But we need to keep the show_frame flag so
- // that the publisher could know whether it is supposed to be visible.
- // So we will code the show_frame flag as it is. Then code the intra_only
- // bit here. This will make the bitstream incompatible. In the player we
- // will change to show_frame flag to 0, then add an one byte frame with
- // show_existing_frame flag which tells the decoder which frame we want to
- // show.
if (!cm->show_frame) vpx_wb_write_bit(wb, cm->intra_only);
if (!cm->error_resilient_mode)
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.h
index 339c3fecb13..b296560b922 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.h
@@ -39,11 +39,7 @@ void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size);
static INLINE int vp9_preserve_existing_gf(VP9_COMP *cpi) {
return !cpi->multi_arf_allowed && cpi->refresh_golden_frame &&
- cpi->rc.is_src_frame_alt_ref &&
- (!cpi->use_svc || // Add spatial svc base layer case here
- (is_two_pass_svc(cpi) && cpi->svc.spatial_layer_id == 0 &&
- cpi->svc.layer_context[0].gold_ref_idx >= 0 &&
- cpi->oxcf.ss_enable_auto_arf[0]));
+ cpi->rc.is_src_frame_alt_ref && !cpi->use_svc;
}
#ifdef __cplusplus
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c
index a283d92a889..c7e9f9b2a02 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c
@@ -385,16 +385,13 @@ static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) {
node->split[i] = &vt->split[i].part_variances.none;
break;
}
- case BLOCK_4X4: {
+ default: {
v4x4 *vt = (v4x4 *)data;
+ assert(bsize == BLOCK_4X4);
node->part_variances = &vt->part_variances;
for (i = 0; i < 4; i++) node->split[i] = &vt->split[i];
break;
}
- default: {
- assert(0);
- break;
- }
}
}
@@ -586,6 +583,7 @@ static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q,
} else {
thresholds[1] = (5 * threshold_base) >> 1;
}
+ if (cpi->sf.disable_16x16part_nonkey) thresholds[2] = INT64_MAX;
}
}
@@ -885,13 +883,13 @@ static void copy_partitioning_helper(VP9_COMP *cpi, MACROBLOCK *x,
set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
set_block_size(cpi, x, xd, mi_row, mi_col + bs, subsize);
break;
- case PARTITION_SPLIT:
+ default:
+ assert(partition == PARTITION_SPLIT);
copy_partitioning_helper(cpi, x, xd, subsize, mi_row, mi_col);
copy_partitioning_helper(cpi, x, xd, subsize, mi_row + bs, mi_col);
copy_partitioning_helper(cpi, x, xd, subsize, mi_row, mi_col + bs);
copy_partitioning_helper(cpi, x, xd, subsize, mi_row + bs, mi_col + bs);
break;
- default: assert(0);
}
}
}
@@ -1004,7 +1002,8 @@ static int scale_partitioning_svc(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
set_block_size(cpi, x, xd, mi_row_high, mi_col_high + bs_high,
subsize_high);
break;
- case PARTITION_SPLIT:
+ default:
+ assert(partition_high == PARTITION_SPLIT);
if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row, mi_col,
mi_row_high, mi_col_high))
return 1;
@@ -1020,7 +1019,6 @@ static int scale_partitioning_svc(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
mi_col_high + bs_high))
return 1;
break;
- default: assert(0);
}
}
@@ -1067,13 +1065,13 @@ static void update_partition_svc(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row,
prev_part[start_pos] = subsize;
if (mi_col + bs < cm->mi_cols) prev_part[start_pos + bs] = subsize;
break;
- case PARTITION_SPLIT:
+ default:
+ assert(partition == PARTITION_SPLIT);
update_partition_svc(cpi, subsize, mi_row, mi_col);
update_partition_svc(cpi, subsize, mi_row + bs, mi_col);
update_partition_svc(cpi, subsize, mi_row, mi_col + bs);
update_partition_svc(cpi, subsize, mi_row + bs, mi_col + bs);
break;
- default: assert(0);
}
}
}
@@ -1108,13 +1106,13 @@ static void update_prev_partition_helper(VP9_COMP *cpi, BLOCK_SIZE bsize,
prev_part[start_pos] = subsize;
if (mi_col + bs < cm->mi_cols) prev_part[start_pos + bs] = subsize;
break;
- case PARTITION_SPLIT:
+ default:
+ assert(partition == PARTITION_SPLIT);
update_prev_partition_helper(cpi, subsize, mi_row, mi_col);
update_prev_partition_helper(cpi, subsize, mi_row + bs, mi_col);
update_prev_partition_helper(cpi, subsize, mi_row, mi_col + bs);
update_prev_partition_helper(cpi, subsize, mi_row + bs, mi_col + bs);
break;
- default: assert(0);
}
}
}
@@ -1387,7 +1385,9 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
xd->plane[0].pre[0].stride);
} else {
- y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col);
+ const MV dummy_mv = { 0, 0 };
+ y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col,
+ &dummy_mv);
x->sb_use_mv_part = 1;
x->sb_mvcol_part = mi->mv[0].as_mv.col;
x->sb_mvrow_part = mi->mv[0].as_mv.row;
@@ -2181,7 +2181,8 @@ static void encode_sb(VP9_COMP *cpi, ThreadData *td, const TileInfo *const tile,
subsize, &pc_tree->horizontal[1]);
}
break;
- case PARTITION_SPLIT:
+ default:
+ assert(partition == PARTITION_SPLIT);
if (bsize == BLOCK_8X8) {
encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
pc_tree->leaf_split[0]);
@@ -2196,7 +2197,6 @@ static void encode_sb(VP9_COMP *cpi, ThreadData *td, const TileInfo *const tile,
subsize, pc_tree->split[3]);
}
break;
- default: assert(0 && "Invalid partition type."); break;
}
if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
@@ -2522,7 +2522,8 @@ static void encode_sb_rt(VP9_COMP *cpi, ThreadData *td,
subsize, &pc_tree->horizontal[1]);
}
break;
- case PARTITION_SPLIT:
+ default:
+ assert(partition == PARTITION_SPLIT);
subsize = get_subsize(bsize, PARTITION_SPLIT);
encode_sb_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
pc_tree->split[0]);
@@ -2533,7 +2534,6 @@ static void encode_sb_rt(VP9_COMP *cpi, ThreadData *td,
encode_sb_rt(cpi, td, tile, tp, mi_row + hbs, mi_col + hbs,
output_enabled, subsize, pc_tree->split[3]);
break;
- default: assert(0 && "Invalid partition type."); break;
}
if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
@@ -2672,7 +2672,8 @@ static void rd_use_partition(VP9_COMP *cpi, ThreadData *td,
last_part_rdc.rdcost += tmp_rdc.rdcost;
}
break;
- case PARTITION_SPLIT:
+ default:
+ assert(partition == PARTITION_SPLIT);
if (bsize == BLOCK_8X8) {
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
subsize, pc_tree->leaf_split[0], INT64_MAX);
@@ -2702,7 +2703,6 @@ static void rd_use_partition(VP9_COMP *cpi, ThreadData *td,
last_part_rdc.dist += tmp_rdc.dist;
}
break;
- default: assert(0); break;
}
pl = partition_plane_context(xd, mi_row, mi_col, bsize);
@@ -4208,7 +4208,8 @@ static void nonrd_select_partition(VP9_COMP *cpi, ThreadData *td,
}
}
break;
- case PARTITION_SPLIT:
+ default:
+ assert(partition == PARTITION_SPLIT);
subsize = get_subsize(bsize, PARTITION_SPLIT);
nonrd_select_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
subsize, output_enabled, rd_cost,
@@ -4238,7 +4239,6 @@ static void nonrd_select_partition(VP9_COMP *cpi, ThreadData *td,
rd_cost->dist += this_rdc.dist;
}
break;
- default: assert(0 && "Invalid partition type."); break;
}
}
@@ -4327,7 +4327,8 @@ static void nonrd_use_partition(VP9_COMP *cpi, ThreadData *td,
output_enabled, subsize, &pc_tree->horizontal[1]);
}
break;
- case PARTITION_SPLIT:
+ default:
+ assert(partition == PARTITION_SPLIT);
subsize = get_subsize(bsize, PARTITION_SPLIT);
if (bsize == BLOCK_8X8) {
nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
@@ -4348,7 +4349,6 @@ static void nonrd_use_partition(VP9_COMP *cpi, ThreadData *td,
dummy_cost, pc_tree->split[3]);
}
break;
- default: assert(0 && "Invalid partition type."); break;
}
if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
@@ -4452,7 +4452,8 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td,
nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
break;
- case REFERENCE_PARTITION:
+ default:
+ assert(partition_search_type == REFERENCE_PARTITION);
x->sb_pickmode_part = 1;
set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
// Use nonrd_pick_partition on scene-cut for VBR mode.
@@ -4484,7 +4485,6 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td,
}
break;
- default: assert(0); break;
}
// Update ref_frame usage for inter frame if this group is ARF group.
@@ -4551,16 +4551,12 @@ static int set_var_thresh_from_histogram(VP9_COMP *cpi) {
&var16->sse, &var16->sum);
var16->var = variance_highbd(var16);
break;
- case VPX_BITS_12:
+ default:
+ assert(cm->bit_depth == VPX_BITS_12);
vpx_highbd_12_get16x16var(src, src_stride, last_src, last_stride,
&var16->sse, &var16->sum);
var16->var = variance_highbd(var16);
break;
- default:
- assert(0 &&
- "cm->bit_depth should be VPX_BITS_8, VPX_BITS_10"
- " or VPX_BITS_12");
- return -1;
}
} else {
vpx_get16x16var(src, src_stride, last_src, last_stride, &var16->sse,
@@ -4668,6 +4664,9 @@ void vp9_init_tile_data(VP9_COMP *cpi) {
for (i = 0; i < BLOCK_SIZES; ++i) {
for (j = 0; j < MAX_MODES; ++j) {
tile_data->thresh_freq_fact[i][j] = RD_THRESH_INIT_FACT;
+#if CONFIG_CONSISTENT_RECODE
+ tile_data->thresh_freq_fact_prev[i][j] = RD_THRESH_INIT_FACT;
+#endif
tile_data->mode_map[i][j] = j;
}
}
@@ -4792,7 +4791,9 @@ static void encode_frame_internal(VP9_COMP *cpi) {
x->fwd_txfm4x4 = xd->lossless ? vp9_fwht4x4 : vpx_fdct4x4;
#endif // CONFIG_VP9_HIGHBITDEPTH
x->inv_txfm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
-
+#if CONFIG_CONSISTENT_RECODE
+ x->optimize = sf->optimize_coefficients == 1 && cpi->oxcf.pass != 1;
+#endif
if (xd->lossless) x->optimize = 0;
cm->tx_mode = select_tx_mode(cpi, xd);
@@ -4917,9 +4918,48 @@ static int compute_frame_aq_offset(struct VP9_COMP *cpi) {
return sum_delta / (cm->mi_rows * cm->mi_cols);
}
+#if CONFIG_CONSISTENT_RECODE
+static void restore_encode_params(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ const int tile_cols = 1 << cm->log2_tile_cols;
+ const int tile_rows = 1 << cm->log2_tile_rows;
+ int tile_col, tile_row;
+ int i, j;
+ RD_OPT *rd_opt = &cpi->rd;
+ for (i = 0; i < MAX_REF_FRAMES; i++) {
+ for (j = 0; j < REFERENCE_MODES; j++)
+ rd_opt->prediction_type_threshes[i][j] =
+ rd_opt->prediction_type_threshes_prev[i][j];
+
+ for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; j++)
+ rd_opt->filter_threshes[i][j] = rd_opt->filter_threshes_prev[i][j];
+ }
+
+ if (cpi->tile_data != NULL) {
+ for (tile_row = 0; tile_row < tile_rows; ++tile_row)
+ for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
+ TileDataEnc *tile_data =
+ &cpi->tile_data[tile_row * tile_cols + tile_col];
+ for (i = 0; i < BLOCK_SIZES; ++i) {
+ for (j = 0; j < MAX_MODES; ++j) {
+ tile_data->thresh_freq_fact[i][j] =
+ tile_data->thresh_freq_fact_prev[i][j];
+ }
+ }
+ }
+ }
+
+ cm->interp_filter = cpi->sf.default_interp_filter;
+}
+#endif
+
void vp9_encode_frame(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
+#if CONFIG_CONSISTENT_RECODE
+ restore_encode_params(cpi);
+#endif
+
// In the longer term the encoder should be generalized to match the
// decoder such that we allow compound where one of the 3 buffers has a
// different sign bias and that buffer is then the fixed ref. However, this
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemb.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemb.c
index 970077d8943..bc276572882 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemb.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemb.c
@@ -358,13 +358,13 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col,
p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob,
scan_order->scan, scan_order->iscan);
break;
- case TX_4X4:
+ default:
+ assert(tx_size == TX_4X4);
x->fwd_txfm4x4(src_diff, coeff, diff_stride);
vp9_highbd_quantize_fp(coeff, 16, x->skip_block, p->round_fp,
p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob,
scan_order->scan, scan_order->iscan);
break;
- default: assert(0);
}
return;
}
@@ -388,13 +388,13 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col,
p->round_fp, p->quant_fp, qcoeff, dqcoeff, pd->dequant,
eob, scan_order->scan, scan_order->iscan);
break;
- case TX_4X4:
+ default:
+ assert(tx_size == TX_4X4);
x->fwd_txfm4x4(src_diff, coeff, diff_stride);
vp9_quantize_fp(coeff, 16, x->skip_block, p->round_fp, p->quant_fp,
qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
scan_order->iscan);
break;
- default: assert(0); break;
}
}
@@ -434,13 +434,13 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col,
p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0],
eob);
break;
- case TX_4X4:
+ default:
+ assert(tx_size == TX_4X4);
x->fwd_txfm4x4(src_diff, coeff, diff_stride);
vpx_highbd_quantize_dc(coeff, 16, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0],
eob);
break;
- default: assert(0);
}
return;
}
@@ -462,12 +462,12 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col,
vpx_quantize_dc(coeff, 64, x->skip_block, p->round, p->quant_fp[0],
qcoeff, dqcoeff, pd->dequant[0], eob);
break;
- case TX_4X4:
+ default:
+ assert(tx_size == TX_4X4);
x->fwd_txfm4x4(src_diff, coeff, diff_stride);
vpx_quantize_dc(coeff, 16, x->skip_block, p->round, p->quant_fp[0],
qcoeff, dqcoeff, pd->dequant[0], eob);
break;
- default: assert(0); break;
}
}
@@ -511,14 +511,14 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col,
pd->dequant, eob, scan_order->scan,
scan_order->iscan);
break;
- case TX_4X4:
+ default:
+ assert(tx_size == TX_4X4);
x->fwd_txfm4x4(src_diff, coeff, diff_stride);
vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob, scan_order->scan,
scan_order->iscan);
break;
- default: assert(0);
}
return;
}
@@ -544,13 +544,13 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col,
p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
scan_order->scan, scan_order->iscan);
break;
- case TX_4X4:
+ default:
+ assert(tx_size == TX_4X4);
x->fwd_txfm4x4(src_diff, coeff, diff_stride);
vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
scan_order->scan, scan_order->iscan);
break;
- default: assert(0); break;
}
}
@@ -634,14 +634,14 @@ static void encode_block(int plane, int block, int row, int col,
vp9_highbd_idct8x8_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
xd->bd);
break;
- case TX_4X4:
+ default:
+ assert(tx_size == TX_4X4);
// this is like vp9_short_idct4x4 but has a special case around eob<=1
// which is significant (not just an optimization) for the lossless
// case.
x->highbd_inv_txfm_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
xd->bd);
break;
- default: assert(0 && "Invalid transform size");
}
return;
}
@@ -657,13 +657,13 @@ static void encode_block(int plane, int block, int row, int col,
case TX_8X8:
vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
break;
- case TX_4X4:
+ default:
+ assert(tx_size == TX_4X4);
// this is like vp9_short_idct4x4 but has a special case around eob<=1
// which is significant (not just an optimization) for the lossless
// case.
x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
break;
- default: assert(0 && "Invalid transform size"); break;
}
}
@@ -848,7 +848,8 @@ void vp9_encode_block_intra(int plane, int block, int row, int col,
xd->bd);
}
break;
- case TX_4X4:
+ default:
+ assert(tx_size == TX_4X4);
if (!x->skip_recode) {
vpx_highbd_subtract_block(4, 4, src_diff, diff_stride, src,
src_stride, dst, dst_stride, xd->bd);
@@ -876,7 +877,6 @@ void vp9_encode_block_intra(int plane, int block, int row, int col,
}
}
break;
- default: assert(0); return;
}
if (*eob) *(args->skip) = 0;
return;
@@ -930,7 +930,8 @@ void vp9_encode_block_intra(int plane, int block, int row, int col,
if (!x->skip_encode && *eob)
vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob);
break;
- case TX_4X4:
+ default:
+ assert(tx_size == TX_4X4);
if (!x->skip_recode) {
vpx_subtract_block(4, 4, src_diff, diff_stride, src, src_stride, dst,
dst_stride);
@@ -955,7 +956,6 @@ void vp9_encode_block_intra(int plane, int block, int row, int col,
vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type);
}
break;
- default: assert(0); break;
}
if (*eob) *(args->skip) = 0;
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c
index aa9c3bf46cb..e41768a8f13 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c
@@ -483,14 +483,10 @@ static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) {
*hr = 3;
*hs = 5;
break;
- case ONETWO:
- *hr = 1;
- *hs = 2;
- break;
default:
+ assert(mode == ONETWO);
*hr = 1;
- *hs = 1;
- assert(0);
+ *hs = 2;
break;
}
}
@@ -791,7 +787,7 @@ static void setup_frame(VP9_COMP *cpi) {
}
if (cm->frame_type == KEY_FRAME) {
- if (!is_two_pass_svc(cpi)) cpi->refresh_golden_frame = 1;
+ cpi->refresh_golden_frame = 1;
cpi->refresh_alt_ref_frame = 1;
vp9_zero(cpi->interp_filter_selected);
} else {
@@ -1347,15 +1343,9 @@ static void set_tile_limits(VP9_COMP *cpi) {
int min_log2_tile_cols, max_log2_tile_cols;
vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
- if (is_two_pass_svc(cpi) && (cpi->svc.encode_empty_frame_state == ENCODING ||
- cpi->svc.number_spatial_layers > 1)) {
- cm->log2_tile_cols = 0;
- cm->log2_tile_rows = 0;
- } else {
- cm->log2_tile_cols =
- clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols);
- cm->log2_tile_rows = cpi->oxcf.tile_rows;
- }
+ cm->log2_tile_cols =
+ clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols);
+ cm->log2_tile_rows = cpi->oxcf.tile_rows;
if (cpi->oxcf.target_level == LEVEL_AUTO) {
const int level_tile_cols =
@@ -1378,18 +1368,6 @@ static void update_frame_size(VP9_COMP *cpi) {
cm->mi_rows * cm->mi_cols * sizeof(*cpi->mbmi_ext_base));
set_tile_limits(cpi);
-
- if (is_two_pass_svc(cpi)) {
- if (vpx_realloc_frame_buffer(&cpi->alt_ref_buffer, cm->width, cm->height,
- cm->subsampling_x, cm->subsampling_y,
-#if CONFIG_VP9_HIGHBITDEPTH
- cm->use_highbitdepth,
-#endif
- VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
- NULL, NULL, NULL))
- vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
- "Failed to reallocate alt_ref_buffer");
- }
}
static void init_buffer_indices(VP9_COMP *cpi) {
@@ -1744,7 +1722,8 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
vpx_highbd_sad4x4x4d_bits10)
break;
- case VPX_BITS_12:
+ default:
+ assert(cm->bit_depth == VPX_BITS_12);
HIGHBD_BFP(BLOCK_32X16, vpx_highbd_sad32x16_bits12,
vpx_highbd_sad32x16_avg_bits12, vpx_highbd_12_variance32x16,
vpx_highbd_12_sub_pixel_variance32x16,
@@ -1823,11 +1802,6 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
vpx_highbd_12_sub_pixel_avg_variance4x4,
vpx_highbd_sad4x4x4d_bits12)
break;
-
- default:
- assert(0 &&
- "cm->bit_depth should be VPX_BITS_8, "
- "VPX_BITS_10 or VPX_BITS_12");
}
}
}
@@ -2971,11 +2945,6 @@ void vp9_update_reference_frames(VP9_COMP *cpi) {
tmp = cpi->alt_fb_idx;
cpi->alt_fb_idx = cpi->gld_fb_idx;
cpi->gld_fb_idx = tmp;
-
- if (is_two_pass_svc(cpi)) {
- cpi->svc.layer_context[0].gold_ref_idx = cpi->gld_fb_idx;
- cpi->svc.layer_context[0].alt_ref_idx = cpi->alt_fb_idx;
- }
} else { /* For non key/golden frames */
if (cpi->refresh_alt_ref_frame) {
int arf_idx = cpi->alt_fb_idx;
@@ -3054,17 +3023,32 @@ void vp9_update_reference_frames(VP9_COMP *cpi) {
// Keep track of frame index for each reference frame.
SVC *const svc = &cpi->svc;
if (cm->frame_type == KEY_FRAME) {
- svc->ref_frame_index[cpi->lst_fb_idx] = svc->current_superframe;
- svc->ref_frame_index[cpi->gld_fb_idx] = svc->current_superframe;
- svc->ref_frame_index[cpi->alt_fb_idx] = svc->current_superframe;
+ int i;
+ // On key frame update all reference frame slots.
+ for (i = 0; i < REF_FRAMES; i++) {
+ svc->fb_idx_spatial_layer_id[i] = svc->spatial_layer_id;
+ svc->fb_idx_temporal_layer_id[i] = svc->temporal_layer_id;
+ // LAST/GOLDEN/ALTREF is already updated above.
+ if (i != cpi->lst_fb_idx && i != cpi->gld_fb_idx &&
+ i != cpi->alt_fb_idx)
+ ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[i], cm->new_fb_idx);
+ }
} else {
- if (cpi->refresh_last_frame)
- svc->ref_frame_index[cpi->lst_fb_idx] = svc->current_superframe;
- if (cpi->refresh_golden_frame)
- svc->ref_frame_index[cpi->gld_fb_idx] = svc->current_superframe;
- if (cpi->refresh_alt_ref_frame)
- svc->ref_frame_index[cpi->alt_fb_idx] = svc->current_superframe;
+ if (cpi->refresh_last_frame) {
+ svc->fb_idx_spatial_layer_id[cpi->lst_fb_idx] = svc->spatial_layer_id;
+ svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] = svc->temporal_layer_id;
+ }
+ if (cpi->refresh_golden_frame) {
+ svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] = svc->spatial_layer_id;
+ svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] = svc->temporal_layer_id;
+ }
+ if (cpi->refresh_alt_ref_frame) {
+ svc->fb_idx_spatial_layer_id[cpi->alt_fb_idx] = svc->spatial_layer_id;
+ svc->fb_idx_temporal_layer_id[cpi->alt_fb_idx] = svc->temporal_layer_id;
+ }
}
+ // Copy flags from encoder to SVC struct.
+ vp9_copy_flags_ref_update_idx(cpi);
}
}
@@ -3307,11 +3291,9 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
case VPX_BITS_10:
dc_quant_devisor = 16.0;
break;
- case VPX_BITS_12:
- dc_quant_devisor = 64.0;
- break;
default:
- assert(0 && "bit_depth must be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
+ assert(cm->bit_depth == VPX_BITS_12);
+ dc_quant_devisor = 64.0;
break;
}
#else
@@ -3550,9 +3532,7 @@ static void set_frame_size(VP9_COMP *cpi) {
#endif
}
- if ((oxcf->pass == 2) &&
- (!cpi->use_svc || (is_two_pass_svc(cpi) &&
- cpi->svc.encode_empty_frame_state != ENCODING))) {
+ if ((oxcf->pass == 2) && !cpi->use_svc) {
vp9_set_target_rate(cpi);
}
@@ -3599,6 +3579,39 @@ static void set_frame_size(VP9_COMP *cpi) {
set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME);
}
+#if CONFIG_CONSISTENT_RECODE
+static void save_encode_params(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ const int tile_cols = 1 << cm->log2_tile_cols;
+ const int tile_rows = 1 << cm->log2_tile_rows;
+ int tile_col, tile_row;
+ int i, j;
+ RD_OPT *rd_opt = &cpi->rd;
+ for (i = 0; i < MAX_REF_FRAMES; i++) {
+ for (j = 0; j < REFERENCE_MODES; j++)
+ rd_opt->prediction_type_threshes_prev[i][j] =
+ rd_opt->prediction_type_threshes[i][j];
+
+ for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; j++)
+ rd_opt->filter_threshes_prev[i][j] = rd_opt->filter_threshes[i][j];
+ }
+
+ if (cpi->tile_data != NULL) {
+ for (tile_row = 0; tile_row < tile_rows; ++tile_row)
+ for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
+ TileDataEnc *tile_data =
+ &cpi->tile_data[tile_row * tile_cols + tile_col];
+ for (i = 0; i < BLOCK_SIZES; ++i) {
+ for (j = 0; j < MAX_MODES; ++j) {
+ tile_data->thresh_freq_fact_prev[i][j] =
+ tile_data->thresh_freq_fact[i][j];
+ }
+ }
+ }
+ }
+}
+#endif
+
static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
uint8_t *dest) {
VP9_COMMON *const cm = &cpi->common;
@@ -3708,12 +3721,15 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
// For other cases (e.g., CBR mode) use it for 5 <= speed < 8 for now
// (need to check encoding time cost for doing this for speed 8).
cpi->rc.high_source_sad = 0;
- if (cpi->compute_source_sad_onepass && cm->show_frame &&
+ if (cm->show_frame && cpi->oxcf.mode == REALTIME &&
(cpi->oxcf.rc_mode == VPX_VBR ||
cpi->oxcf.content == VP9E_CONTENT_SCREEN ||
- (cpi->oxcf.speed >= 5 && cpi->oxcf.speed < 8 && !cpi->use_svc)))
+ (cpi->oxcf.speed >= 5 && cpi->oxcf.speed < 8)))
vp9_scene_detection_onepass(cpi);
+ if (cpi->svc.spatial_layer_id == 0)
+ cpi->svc.high_source_sad_superframe = cpi->rc.high_source_sad;
+
// For 1 pass CBR SVC, only ZEROMV is allowed for spatial reference frame
// when svc->force_zero_mode_spatial_ref = 1. Under those conditions we can
// avoid this frame-level upsampling (for non intra_only frames).
@@ -3751,28 +3767,11 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
suppress_active_map(cpi);
- // For SVC on non-zero spatial layer: check for disabling inter-layer
- // (spatial) prediction, if svc.disable_inter_layer_pred is set.
- // if the previous spatial layer was dropped then disable the prediction from
- // this (scaled) reference.
- if (cpi->use_svc && cpi->svc.spatial_layer_id > 0) {
- if ((cpi->svc.disable_inter_layer_pred == INTER_LAYER_PRED_OFF_NONKEY &&
- !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) ||
- cpi->svc.disable_inter_layer_pred == INTER_LAYER_PRED_OFF ||
- cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id - 1]) {
- MV_REFERENCE_FRAME ref_frame;
- static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
- VP9_ALT_FLAG };
- for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
- const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
- if (yv12 != NULL && (cpi->ref_frame_flags & flag_list[ref_frame])) {
- const struct scale_factors *const scale_fac =
- &cm->frame_refs[ref_frame - 1].sf;
- if (vp9_is_scaled(scale_fac))
- cpi->ref_frame_flags &= (~flag_list[ref_frame]);
- }
- }
- }
+ if (cpi->use_svc) {
+ // On non-zero spatial layer, check for disabling inter-layer
+ // prediction.
+ if (cpi->svc.spatial_layer_id > 0) vp9_svc_constrain_inter_layer_pred(cpi);
+ vp9_svc_assert_constraints_pattern(cpi);
}
// Variance adaptive and in frame q adjustment experiments are mutually
@@ -3799,10 +3798,10 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
// Check if we should drop this frame because of high overshoot.
// Only for frames where high temporal-source SAD is detected.
- if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR &&
- cpi->resize_state == ORIG && cm->frame_type != KEY_FRAME &&
- cpi->oxcf.content == VP9E_CONTENT_SCREEN &&
- cpi->rc.high_source_sad == 1) {
+ // For SVC: all spatial layers are checked for re-encoding.
+ if (cpi->sf.re_encode_overshoot_rt &&
+ (cpi->rc.high_source_sad ||
+ (cpi->use_svc && cpi->svc.high_source_sad_superframe))) {
int frame_size = 0;
// Get an estimate of the encoded frame size.
save_coding_context(cpi);
@@ -4526,11 +4525,21 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
cpi->oxcf.target_bandwidth == 0) {
cpi->svc.skip_enhancement_layer = 1;
vp9_rc_postencode_update_drop_frame(cpi);
- vp9_inc_frame_in_layer(cpi);
cpi->ext_refresh_frame_flags_pending = 0;
cpi->last_frame_dropped = 1;
cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 1;
cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id] = 1;
+ if (cpi->svc.framedrop_mode != CONSTRAINED_LAYER_DROP ||
+ cpi->svc.drop_spatial_layer[0] == 0) {
+ // For the case of CONSTRAINED_LAYER_DROP where the base is dropped
+ // (drop_spatial_layer[0] == 1), which means full superframe dropped,
+ // we don't increment the svc frame counters. In particular temporal
+ // layer counter (which is incremented in vp9_inc_frame_in_layer())
+ // won't be incremented, so on a dropped frame we try the same
+ // temporal_layer_id on next incoming frame. This is to avoid an
+ // issue with temporal alignement with full superframe dropping.
+ vp9_inc_frame_in_layer(cpi);
+ }
return;
}
@@ -4578,44 +4587,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
cm->reset_frame_context = 2;
}
}
- if (is_two_pass_svc(cpi) && cm->error_resilient_mode == 0) {
- // Use context 0 for intra only empty frame, but the last frame context
- // for other empty frames.
- if (cpi->svc.encode_empty_frame_state == ENCODING) {
- if (cpi->svc.encode_intra_empty_frame != 0)
- cm->frame_context_idx = 0;
- else
- cm->frame_context_idx = FRAME_CONTEXTS - 1;
- } else {
- cm->frame_context_idx =
- cpi->svc.spatial_layer_id * cpi->svc.number_temporal_layers +
- cpi->svc.temporal_layer_id;
- }
-
- cm->frame_parallel_decoding_mode = oxcf->frame_parallel_decoding_mode;
-
- // The probs will be updated based on the frame type of its previous
- // frame if frame_parallel_decoding_mode is 0. The type may vary for
- // the frame after a key frame in base layer since we may drop enhancement
- // layers. So set frame_parallel_decoding_mode to 1 in this case.
- if (cm->frame_parallel_decoding_mode == 0) {
- if (cpi->svc.number_temporal_layers == 1) {
- if (cpi->svc.spatial_layer_id == 0 &&
- cpi->svc.layer_context[0].last_frame_type == KEY_FRAME)
- cm->frame_parallel_decoding_mode = 1;
- } else if (cpi->svc.spatial_layer_id == 0) {
- // Find the 2nd frame in temporal base layer and 1st frame in temporal
- // enhancement layers from the key frame.
- int i;
- for (i = 0; i < cpi->svc.number_temporal_layers; ++i) {
- if (cpi->svc.layer_context[0].frames_from_key_frame == 1 << i) {
- cm->frame_parallel_decoding_mode = 1;
- break;
- }
- }
- }
- }
- }
// For 1 pass CBR, check if we are dropping this frame.
// Never drop on key frame, or if base layer is key for svc.
@@ -4639,8 +4610,18 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
if (cpi->use_svc) {
cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 1;
cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id] = 1;
- vp9_inc_frame_in_layer(cpi);
cpi->svc.skip_enhancement_layer = 1;
+ if (cpi->svc.framedrop_mode != CONSTRAINED_LAYER_DROP ||
+ cpi->svc.drop_spatial_layer[0] == 0) {
+ // For the case of CONSTRAINED_LAYER_DROP where the base is dropped
+ // (drop_spatial_layer[0] == 1), which means full superframe dropped,
+ // we don't increment the svc frame counters. In particular temporal
+ // layer counter (which is incremented in vp9_inc_frame_in_layer())
+ // won't be incremented, so on a dropped frame we try the same
+ // temporal_layer_id on next incoming frame. This is to avoid an
+ // issue with temporal alignement with full superframe dropping.
+ vp9_inc_frame_in_layer(cpi);
+ }
if (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1) {
int i;
int all_layers_drop = 1;
@@ -4663,6 +4644,10 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
memset(cpi->mode_chosen_counts, 0,
MAX_MODES * sizeof(*cpi->mode_chosen_counts));
#endif
+#if CONFIG_CONSISTENT_RECODE
+ // Backup to ensure consistency between recodes
+ save_encode_params(cpi);
+#endif
if (cpi->sf.recode_loop == DISALLOW_RECODE) {
encode_without_recode_loop(cpi, size, dest);
@@ -4672,6 +4657,16 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
cpi->last_frame_dropped = 0;
cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 0;
+ // Keep track of the frame buffer index updated/refreshed for the
+ // current encoded TL0 superframe.
+ if (cpi->svc.temporal_layer_id == 0) {
+ if (cpi->refresh_last_frame)
+ cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->lst_fb_idx;
+ else if (cpi->refresh_golden_frame)
+ cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->gld_fb_idx;
+ else if (cpi->refresh_alt_ref_frame)
+ cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->alt_fb_idx;
+ }
// Disable segmentation if it decrease rate/distortion ratio
if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ)
@@ -4759,8 +4754,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
cm->last_frame_type = cm->frame_type;
- if (!(is_two_pass_svc(cpi) && cpi->svc.encode_empty_frame_state == ENCODING))
- vp9_rc_postencode_update(cpi, *size);
+ vp9_rc_postencode_update(cpi, *size);
#if 0
output_frame_level_debug_stats(cpi);
@@ -4830,8 +4824,7 @@ static void Pass2Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
encode_frame_to_data_rate(cpi, size, dest, frame_flags);
- if (!(is_two_pass_svc(cpi) && cpi->svc.encode_empty_frame_state == ENCODING))
- vp9_twopass_postencode_update(cpi);
+ vp9_twopass_postencode_update(cpi);
}
#endif // !CONFIG_REALTIME_ONLY
@@ -5271,9 +5264,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
int arf_src_index;
int i;
- if (is_two_pass_svc(cpi)) {
- if (oxcf->pass == 2) vp9_restore_layer_context(cpi);
- } else if (is_one_pass_cbr_svc(cpi)) {
+ if (is_one_pass_cbr_svc(cpi)) {
vp9_one_pass_cbr_svc_start_layer(cpi);
}
@@ -5301,9 +5292,6 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
// Should we encode an arf frame.
arf_src_index = get_arf_src_index(cpi);
- // Skip alt frame if we encode the empty frame
- if (is_two_pass_svc(cpi) && source != NULL) arf_src_index = 0;
-
if (arf_src_index) {
for (i = 0; i <= arf_src_index; ++i) {
struct lookahead_entry *e = vp9_lookahead_peek(cpi->lookahead, i);
@@ -5456,9 +5444,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
cpi->frame_flags = *frame_flags;
#if !CONFIG_REALTIME_ONLY
- if ((oxcf->pass == 2) &&
- (!cpi->use_svc || (is_two_pass_svc(cpi) &&
- cpi->svc.encode_empty_frame_state != ENCODING))) {
+ if ((oxcf->pass == 2) && !cpi->use_svc) {
vp9_rc_get_second_pass_params(cpi);
} else if (oxcf->pass == 1) {
set_frame_size(cpi);
@@ -5482,7 +5468,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
Pass0Encode(cpi, size, dest, frame_flags);
}
#else // !CONFIG_REALTIME_ONLY
- if (oxcf->pass == 1 && (!cpi->use_svc || is_two_pass_svc(cpi))) {
+ if (oxcf->pass == 1 && !cpi->use_svc) {
const int lossless = is_lossless_requested(oxcf);
#if CONFIG_VP9_HIGHBITDEPTH
if (cpi->oxcf.use_highbitdepth)
@@ -5497,7 +5483,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
#endif // CONFIG_VP9_HIGHBITDEPTH
cpi->td.mb.inv_txfm_add = lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
vp9_first_pass(cpi, source);
- } else if (oxcf->pass == 2 && (!cpi->use_svc || is_two_pass_svc(cpi))) {
+ } else if (oxcf->pass == 2 && !cpi->use_svc) {
Pass2Encode(cpi, size, dest, frame_flags);
} else if (cpi->use_svc) {
SvcEncode(cpi, size, dest, frame_flags);
@@ -5698,21 +5684,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
#endif
- if (is_two_pass_svc(cpi)) {
- if (cpi->svc.encode_empty_frame_state == ENCODING) {
- cpi->svc.encode_empty_frame_state = ENCODED;
- cpi->svc.encode_intra_empty_frame = 0;
- }
-
- if (cm->show_frame) {
- ++cpi->svc.spatial_layer_to_encode;
- if (cpi->svc.spatial_layer_to_encode >= cpi->svc.number_spatial_layers)
- cpi->svc.spatial_layer_to_encode = 0;
-
- // May need the empty frame after an visible frame.
- cpi->svc.encode_empty_frame_state = NEED_TO_ENCODE;
- }
- } else if (is_one_pass_cbr_svc(cpi)) {
+ if (is_one_pass_cbr_svc(cpi)) {
if (cm->show_frame) {
++cpi->svc.spatial_layer_to_encode;
if (cpi->svc.spatial_layer_to_encode >= cpi->svc.number_spatial_layers)
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h
index 05bfd6930d5..f66c13046a9 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h
@@ -282,6 +282,9 @@ static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) {
typedef struct TileDataEnc {
TileInfo tile_info;
int thresh_freq_fact[BLOCK_SIZES][MAX_MODES];
+#if CONFIG_CONSISTENT_RECODE
+ int thresh_freq_fact_prev[BLOCK_SIZES][MAX_MODES];
+#endif
int8_t mode_map[BLOCK_SIZES][MAX_MODES];
FIRSTPASS_DATA fp_data;
VP9RowMTSync row_mt_sync;
@@ -645,6 +648,8 @@ typedef struct VP9_COMP {
int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
int partition_cost[PARTITION_CONTEXTS][PARTITION_TYPES];
+ // Indices are: max_tx_size-1, tx_size_ctx, tx_size
+ int tx_size_cost[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES];
int multi_arf_allowed;
int multi_arf_enabled;
@@ -860,10 +865,6 @@ YV12_BUFFER_CONFIG *vp9_scale_if_required(
void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags);
-static INLINE int is_two_pass_svc(const struct VP9_COMP *const cpi) {
- return cpi->use_svc && cpi->oxcf.pass != 0;
-}
-
static INLINE int is_one_pass_cbr_svc(const struct VP9_COMP *const cpi) {
return (cpi->use_svc && cpi->oxcf.pass == 0);
}
@@ -879,9 +880,7 @@ static INLINE int denoise_svc(const struct VP9_COMP *const cpi) {
static INLINE int is_altref_enabled(const VP9_COMP *const cpi) {
return !(cpi->oxcf.mode == REALTIME && cpi->oxcf.rc_mode == VPX_CBR) &&
cpi->oxcf.lag_in_frames >= MIN_LOOKAHEAD_FOR_ARFS &&
- (cpi->oxcf.enable_auto_arf &&
- (!is_two_pass_svc(cpi) ||
- cpi->oxcf.ss_enable_auto_arf[cpi->svc.spatial_layer_id]));
+ cpi->oxcf.enable_auto_arf;
}
static INLINE void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c
index e102b493960..453879fb834 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c
@@ -316,16 +316,7 @@ void vp9_init_first_pass(VP9_COMP *cpi) {
}
void vp9_end_first_pass(VP9_COMP *cpi) {
- if (is_two_pass_svc(cpi)) {
- int i;
- for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
- output_stats(&cpi->svc.layer_context[i].twopass.total_stats,
- cpi->output_pkt_list);
- }
- } else {
- output_stats(&cpi->twopass.total_stats, cpi->output_pkt_list);
- }
-
+ output_stats(&cpi->twopass.total_stats, cpi->output_pkt_list);
vpx_free(cpi->twopass.fp_mb_float_stats);
cpi->twopass.fp_mb_float_stats = NULL;
}
@@ -503,11 +494,10 @@ static int scale_sse_threshold(VP9_COMMON *cm, int thresh) {
switch (cm->bit_depth) {
case VPX_BITS_8: ret_val = thresh; break;
case VPX_BITS_10: ret_val = thresh << 4; break;
- case VPX_BITS_12: ret_val = thresh << 8; break;
default:
- assert(0 &&
- "cm->bit_depth should be VPX_BITS_8, "
- "VPX_BITS_10 or VPX_BITS_12");
+ assert(cm->bit_depth == VPX_BITS_12);
+ ret_val = thresh << 8;
+ break;
}
}
#else
@@ -529,11 +519,10 @@ static int get_ul_intra_threshold(VP9_COMMON *cm) {
switch (cm->bit_depth) {
case VPX_BITS_8: ret_val = UL_INTRA_THRESH; break;
case VPX_BITS_10: ret_val = UL_INTRA_THRESH << 2; break;
- case VPX_BITS_12: ret_val = UL_INTRA_THRESH << 4; break;
default:
- assert(0 &&
- "cm->bit_depth should be VPX_BITS_8, "
- "VPX_BITS_10 or VPX_BITS_12");
+ assert(cm->bit_depth == VPX_BITS_12);
+ ret_val = UL_INTRA_THRESH << 4;
+ break;
}
}
#else
@@ -550,11 +539,10 @@ static int get_smooth_intra_threshold(VP9_COMMON *cm) {
switch (cm->bit_depth) {
case VPX_BITS_8: ret_val = SMOOTH_INTRA_THRESH; break;
case VPX_BITS_10: ret_val = SMOOTH_INTRA_THRESH << 4; break;
- case VPX_BITS_12: ret_val = SMOOTH_INTRA_THRESH << 8; break;
default:
- assert(0 &&
- "cm->bit_depth should be VPX_BITS_8, "
- "VPX_BITS_10 or VPX_BITS_12");
+ assert(cm->bit_depth == VPX_BITS_12);
+ ret_val = SMOOTH_INTRA_THRESH << 8;
+ break;
}
}
#else
@@ -849,9 +837,6 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
YV12_BUFFER_CONFIG *const new_yv12 = get_frame_new_buffer(cm);
const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12;
- LAYER_CONTEXT *const lc =
- is_two_pass_svc(cpi) ? &cpi->svc.layer_context[cpi->svc.spatial_layer_id]
- : NULL;
MODE_INFO mi_above, mi_left;
double mb_intra_factor;
@@ -860,25 +845,7 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
// First pass code requires valid last and new frame buffers.
assert(new_yv12 != NULL);
- assert((lc != NULL) || frame_is_intra_only(cm) || (lst_yv12 != NULL));
-
- if (lc != NULL) {
- // Use either last frame or alt frame for motion search.
- if (cpi->ref_frame_flags & VP9_LAST_FLAG) {
- first_ref_buf = vp9_get_scaled_ref_frame(cpi, LAST_FRAME);
- if (first_ref_buf == NULL)
- first_ref_buf = get_ref_frame_buffer(cpi, LAST_FRAME);
- }
-
- if (cpi->ref_frame_flags & VP9_GOLD_FLAG) {
- gld_yv12 = vp9_get_scaled_ref_frame(cpi, GOLDEN_FRAME);
- if (gld_yv12 == NULL) {
- gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
- }
- } else {
- gld_yv12 = NULL;
- }
- }
+ assert(frame_is_intra_only(cm) || (lst_yv12 != NULL));
xd->mi = cm->mi_grid_visible + xd->mi_stride * (mb_row << 1) +
(tile.mi_col_start >> 1);
@@ -1001,12 +968,10 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
switch (cm->bit_depth) {
case VPX_BITS_8: break;
case VPX_BITS_10: this_error >>= 4; break;
- case VPX_BITS_12: this_error >>= 8; break;
default:
- assert(0 &&
- "cm->bit_depth should be VPX_BITS_8, "
- "VPX_BITS_10 or VPX_BITS_12");
- return;
+ assert(cm->bit_depth == VPX_BITS_12);
+ this_error >>= 8;
+ break;
}
}
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -1072,8 +1037,7 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
((cm->mb_cols - 1 - mb_col) * 16) + BORDER_MV_PIXELS_B16;
// Other than for the first frame do a motion search.
- if ((lc == NULL && cm->current_video_frame > 0) ||
- (lc != NULL && lc->current_video_frame_in_layer > 0)) {
+ if (cm->current_video_frame > 0) {
int tmp_err, motion_error, raw_motion_error;
// Assume 0,0 motion with no mv overhead.
MV mv = { 0, 0 }, tmp_mv = { 0, 0 };
@@ -1113,7 +1077,7 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
#endif // CONFIG_VP9_HIGHBITDEPTH
// TODO(pengchong): Replace the hard-coded threshold
- if (raw_motion_error > 25 || lc != NULL) {
+ if (raw_motion_error > 25) {
// Test last reference frame using the previous best mv as the
// starting point (best reference) for the search.
first_pass_motion_search(cpi, x, best_ref_mv, &mv, &motion_error);
@@ -1131,9 +1095,7 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
}
// Search in an older reference frame.
- if (((lc == NULL && cm->current_video_frame > 1) ||
- (lc != NULL && lc->current_video_frame_in_layer > 1)) &&
- gld_yv12 != NULL) {
+ if ((cm->current_video_frame > 1) && gld_yv12 != NULL) {
// Assume 0,0 motion with no mv overhead.
int gf_motion_error;
@@ -1371,9 +1333,6 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
YV12_BUFFER_CONFIG *const new_yv12 = get_frame_new_buffer(cm);
const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12;
- LAYER_CONTEXT *const lc =
- is_two_pass_svc(cpi) ? &cpi->svc.layer_context[cpi->svc.spatial_layer_id]
- : NULL;
BufferPool *const pool = cm->buffer_pool;
FIRSTPASS_DATA fp_temp_data;
@@ -1385,7 +1344,7 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
// First pass code requires valid last and new frame buffers.
assert(new_yv12 != NULL);
- assert((lc != NULL) || frame_is_intra_only(cm) || (lst_yv12 != NULL));
+ assert(frame_is_intra_only(cm) || (lst_yv12 != NULL));
#if CONFIG_FP_MB_STATS
if (cpi->use_fp_mb_stats) {
@@ -1396,50 +1355,6 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
set_first_pass_params(cpi);
vp9_set_quantizer(cm, find_fp_qindex(cm->bit_depth));
- if (lc != NULL) {
- twopass = &lc->twopass;
-
- cpi->lst_fb_idx = cpi->svc.spatial_layer_id;
- cpi->ref_frame_flags = VP9_LAST_FLAG;
-
- if (cpi->svc.number_spatial_layers + cpi->svc.spatial_layer_id <
- REF_FRAMES) {
- cpi->gld_fb_idx =
- cpi->svc.number_spatial_layers + cpi->svc.spatial_layer_id;
- cpi->ref_frame_flags |= VP9_GOLD_FLAG;
- cpi->refresh_golden_frame = (lc->current_video_frame_in_layer == 0);
- } else {
- cpi->refresh_golden_frame = 0;
- }
-
- if (lc->current_video_frame_in_layer == 0) cpi->ref_frame_flags = 0;
-
- vp9_scale_references(cpi);
-
- // Use either last frame or alt frame for motion search.
- if (cpi->ref_frame_flags & VP9_LAST_FLAG) {
- first_ref_buf = vp9_get_scaled_ref_frame(cpi, LAST_FRAME);
- if (first_ref_buf == NULL)
- first_ref_buf = get_ref_frame_buffer(cpi, LAST_FRAME);
- }
-
- if (cpi->ref_frame_flags & VP9_GOLD_FLAG) {
- gld_yv12 = vp9_get_scaled_ref_frame(cpi, GOLDEN_FRAME);
- if (gld_yv12 == NULL) {
- gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
- }
- } else {
- gld_yv12 = NULL;
- }
-
- set_ref_ptrs(cm, xd,
- (cpi->ref_frame_flags & VP9_LAST_FLAG) ? LAST_FRAME : NONE,
- (cpi->ref_frame_flags & VP9_GOLD_FLAG) ? GOLDEN_FRAME : NONE);
-
- cpi->Source = vp9_scale_if_required(cm, cpi->un_scaled_source,
- &cpi->scaled_source, 0, EIGHTTAP, 0);
- }
-
vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
vp9_setup_src_planes(x, cpi->Source, 0, 0);
@@ -1523,18 +1438,13 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
vpx_extend_frame_borders(new_yv12);
- if (lc != NULL) {
- vp9_update_reference_frames(cpi);
- } else {
- // The frame we just compressed now becomes the last frame.
- ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx],
- cm->new_fb_idx);
- }
+ // The frame we just compressed now becomes the last frame.
+ ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx],
+ cm->new_fb_idx);
// Special case for the first frame. Copy into the GF buffer as a second
// reference.
- if (cm->current_video_frame == 0 && cpi->gld_fb_idx != INVALID_IDX &&
- lc == NULL) {
+ if (cm->current_video_frame == 0 && cpi->gld_fb_idx != INVALID_IDX) {
ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx],
cm->ref_frame_map[cpi->lst_fb_idx]);
}
@@ -1588,7 +1498,9 @@ static double wq_err_divisor(VP9_COMP *cpi) {
// Use a different error per mb factor for calculating boost for
// different formats.
- if (screen_area < 1280 * 720) {
+ if (screen_area <= 640 * 360) {
+ return 115.0;
+ } else if (screen_area < 1280 * 720) {
return 125.0;
} else if (screen_area <= 1920 * 1080) {
return 130.0;
@@ -1706,14 +1618,9 @@ void calculate_coded_size(VP9_COMP *cpi, int *scaled_frame_width,
}
void vp9_init_second_pass(VP9_COMP *cpi) {
- SVC *const svc = &cpi->svc;
VP9EncoderConfig *const oxcf = &cpi->oxcf;
- const int is_two_pass_svc =
- (svc->number_spatial_layers > 1) || (svc->number_temporal_layers > 1);
RATE_CONTROL *const rc = &cpi->rc;
- TWO_PASS *const twopass =
- is_two_pass_svc ? &svc->layer_context[svc->spatial_layer_id].twopass
- : &cpi->twopass;
+ TWO_PASS *const twopass = &cpi->twopass;
double frame_rate;
FIRSTPASS_STATS *stats;
@@ -1790,18 +1697,9 @@ void vp9_init_second_pass(VP9_COMP *cpi) {
// encoded in the second pass is a guess. However, the sum duration is not.
// It is calculated based on the actual durations of all frames from the
// first pass.
-
- if (is_two_pass_svc) {
- vp9_update_spatial_layer_framerate(cpi, frame_rate);
- twopass->bits_left =
- (int64_t)(stats->duration *
- svc->layer_context[svc->spatial_layer_id].target_bandwidth /
- 10000000.0);
- } else {
- vp9_new_framerate(cpi, frame_rate);
- twopass->bits_left =
- (int64_t)(stats->duration * oxcf->target_bandwidth / 10000000.0);
- }
+ vp9_new_framerate(cpi, frame_rate);
+ twopass->bits_left =
+ (int64_t)(stats->duration * oxcf->target_bandwidth / 10000000.0);
// This variable monitors how far behind the second ref update is lagging.
twopass->sr_update_lag = 1;
@@ -2239,8 +2137,6 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
int mid_frame_idx;
unsigned char arf_buffer_indices[MAX_ACTIVE_ARFS];
int alt_frame_index = frame_index;
- int has_temporal_layers =
- is_two_pass_svc(cpi) && cpi->svc.number_temporal_layers > 1;
int normal_frames;
int normal_frame_bits;
int last_frame_reduction = 0;
@@ -2248,11 +2144,7 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
double tot_norm_frame_score = 1.0;
double this_frame_score = 1.0;
- // Only encode alt reference frame in temporal base layer.
- if (has_temporal_layers) alt_frame_index = cpi->svc.number_temporal_layers;
-
- key_frame =
- cpi->common.frame_type == KEY_FRAME || vp9_is_upper_layer_key_frame(cpi);
+ key_frame = cpi->common.frame_type == KEY_FRAME;
get_arf_buffer_indices(arf_buffer_indices);
@@ -2282,19 +2174,14 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
gf_group->rf_level[alt_frame_index] = GF_ARF_STD;
gf_group->bit_allocation[alt_frame_index] = gf_arf_bits;
- if (has_temporal_layers)
- gf_group->arf_src_offset[alt_frame_index] =
- (unsigned char)(rc->baseline_gf_interval -
- cpi->svc.number_temporal_layers);
- else
- gf_group->arf_src_offset[alt_frame_index] =
- (unsigned char)(rc->baseline_gf_interval - 1);
+ gf_group->arf_src_offset[alt_frame_index] =
+ (unsigned char)(rc->baseline_gf_interval - 1);
gf_group->arf_update_idx[alt_frame_index] = arf_buffer_indices[0];
gf_group->arf_ref_idx[alt_frame_index] =
arf_buffer_indices[cpi->multi_arf_last_grp_enabled &&
rc->source_alt_ref_active];
- if (!has_temporal_layers) ++frame_index;
+ ++frame_index;
if (cpi->multi_arf_enabled) {
// Set aside a slot for a level 1 arf.
@@ -2330,11 +2217,6 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
for (i = 0; i < normal_frames; ++i) {
int arf_idx = 0;
if (EOF == input_stats(twopass, &frame_stats)) break;
-
- if (has_temporal_layers && frame_index == alt_frame_index) {
- ++frame_index;
- }
-
if (oxcf->vbr_corpus_complexity) {
this_frame_score = calculate_norm_frame_score(cpi, twopass, oxcf,
&frame_stats, av_score);
@@ -2559,8 +2441,9 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame);
// Monitor for static sections.
- zero_motion_accumulator = VPXMIN(
- zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame));
+ if ((rc->frames_since_key + i - 1) > 1) {
+ zero_motion_accumulator *= get_zero_motion_factor(cpi, &next_frame);
+ }
// Break clause to detect very still sections after motion. For example,
// a static image after a fade or other transition.
@@ -2582,8 +2465,17 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Break out conditions.
// Break at maximum of active_max_gf_interval unless almost totally static.
- if (((twopass->kf_zeromotion_pct < STATIC_KF_GROUP_THRESH) &&
- (i >= active_max_gf_interval) && (zero_motion_accumulator < 0.995)) ||
+ //
+ // Note that the addition of a test of rc->source_alt_ref_active is
+ // deliberate. The effect of this is that after a normal altref group even
+ // if the material is static there will be one normal length GF group
+ // before allowing longer GF groups. The reason for this is that in cases
+ // such as slide shows where slides are separated by a complex transition
+ // such as a fade, the arf group spanning the transition may not be coded
+ // at a very high quality and hence this frame (with its overlay) is a
+ // poor golden frame to use for an extended group.
+ if (((i >= active_max_gf_interval) &&
+ ((zero_motion_accumulator < 0.995) || (rc->source_alt_ref_active))) ||
(
// Don't break out with a very short interval.
(i >= active_min_gf_interval) &&
@@ -2603,7 +2495,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
rc->constrained_gf_group = (i >= rc->frames_to_key) ? 1 : 0;
// Should we use the alternate reference frame.
- if ((twopass->kf_zeromotion_pct < STATIC_KF_GROUP_THRESH) && allow_alt_ref &&
+ if ((zero_motion_accumulator < 0.995) && allow_alt_ref &&
(i < cpi->oxcf.lag_in_frames) && (i >= rc->min_gf_interval)) {
const int forward_frames = (rc->frames_to_key - i >= i - 1)
? i - 1
@@ -2631,32 +2523,11 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
rc->gfu_boost = VPXMIN((int)rc->gfu_boost, i * 200);
#endif
- // Set the interval until the next gf.
rc->baseline_gf_interval =
- (twopass->kf_zeromotion_pct < STATIC_KF_GROUP_THRESH)
- ? (i - (is_key_frame || rc->source_alt_ref_pending))
- : i;
-
- // Only encode alt reference frame in temporal base layer. So
- // baseline_gf_interval should be multiple of a temporal layer group
- // (typically the frame distance between two base layer frames)
- if (is_two_pass_svc(cpi) && cpi->svc.number_temporal_layers > 1) {
- int count = (1 << (cpi->svc.number_temporal_layers - 1)) - 1;
- int new_gf_interval = (rc->baseline_gf_interval + count) & (~count);
- int j;
- for (j = 0; j < new_gf_interval - rc->baseline_gf_interval; ++j) {
- if (EOF == input_stats(twopass, this_frame)) break;
- gf_group_err +=
- calculate_norm_frame_score(cpi, twopass, oxcf, this_frame, av_err);
- gf_group_raw_error += this_frame->coded_error;
- gf_group_noise += this_frame->frame_noise_energy;
- gf_group_skip_pct += this_frame->intra_skip_pct;
- gf_group_inactive_zone_rows += this_frame->inactive_zone_rows;
- gf_group_inter += this_frame->pcnt_inter;
- gf_group_motion += this_frame->pcnt_motion;
- }
- rc->baseline_gf_interval = new_gf_interval;
- }
+ ((twopass->kf_zeromotion_pct >= STATIC_KF_GROUP_THRESH) &&
+ (i >= rc->frames_to_key))
+ ? i
+ : (i - (is_key_frame || rc->source_alt_ref_pending));
rc->frames_till_gf_update_due = rc->baseline_gf_interval;
@@ -2903,6 +2774,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
vp9_zero(next_frame);
cpi->common.frame_type = KEY_FRAME;
+ rc->frames_since_key = 0;
// Reset the GF group data structures.
vp9_zero(*gf_group);
@@ -3008,18 +2880,6 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
rc->next_key_frame_forced = 0;
}
- if (is_two_pass_svc(cpi) && cpi->svc.number_temporal_layers > 1) {
- int count = (1 << (cpi->svc.number_temporal_layers - 1)) - 1;
- int new_frame_to_key = (rc->frames_to_key + count) & (~count);
- int j;
- for (j = 0; j < new_frame_to_key - rc->frames_to_key; ++j) {
- if (EOF == input_stats(twopass, this_frame)) break;
- kf_group_err +=
- calculate_norm_frame_score(cpi, twopass, oxcf, this_frame, av_err);
- }
- rc->frames_to_key = new_frame_to_key;
- }
-
// Special case for the last key frame of the file.
if (twopass->stats_in >= twopass->stats_in_end) {
// Accumulate kf group error.
@@ -3059,7 +2919,10 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
for (i = 0; i < (rc->frames_to_key - 1); ++i) {
if (EOF == input_stats(twopass, &next_frame)) break;
- if (i <= KF_BOOST_SCAN_MAX_FRAMES) {
+ // The zero motion test here insures that if we mark a kf group as static
+ // it is static throughout not just the first KF_BOOST_SCAN_MAX_FRAMES.
+ // It also allows for a larger boost on long static groups.
+ if ((i <= KF_BOOST_SCAN_MAX_FRAMES) || (zero_motion_accumulator >= 0.99)) {
double frame_boost;
double zm_factor;
@@ -3171,21 +3034,13 @@ static void configure_buffer_updates(VP9_COMP *cpi) {
cpi->refresh_alt_ref_frame = 0;
cpi->rc.is_src_frame_alt_ref = 1;
break;
- case ARF_UPDATE:
+ default:
+ assert(twopass->gf_group.update_type[twopass->gf_group.index] ==
+ ARF_UPDATE);
cpi->refresh_last_frame = 0;
cpi->refresh_golden_frame = 0;
cpi->refresh_alt_ref_frame = 1;
break;
- default: assert(0); break;
- }
- if (is_two_pass_svc(cpi)) {
- if (cpi->svc.temporal_layer_id > 0) {
- cpi->refresh_last_frame = 0;
- cpi->refresh_golden_frame = 0;
- }
- if (cpi->svc.layer_context[cpi->svc.spatial_layer_id].gold_ref_idx < 0)
- cpi->refresh_golden_frame = 0;
- if (cpi->alt_ref_source == NULL) cpi->refresh_alt_ref_frame = 0;
}
}
@@ -3194,10 +3049,7 @@ static int is_skippable_frame(const VP9_COMP *cpi) {
// first pass, and so do its previous and forward frames, then this frame
// can be skipped for partition check, and the partition size is assigned
// according to the variance
- const SVC *const svc = &cpi->svc;
- const TWO_PASS *const twopass =
- is_two_pass_svc(cpi) ? &svc->layer_context[svc->spatial_layer_id].twopass
- : &cpi->twopass;
+ const TWO_PASS *const twopass = &cpi->twopass;
return (!frame_is_intra_only(&cpi->common) &&
twopass->stats_in - 2 > twopass->stats_in_start &&
@@ -3219,9 +3071,6 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
FIRSTPASS_STATS this_frame;
int target_rate;
- LAYER_CONTEXT *const lc =
- is_two_pass_svc(cpi) ? &cpi->svc.layer_context[cpi->svc.spatial_layer_id]
- : 0;
if (!twopass->stats_in) return;
@@ -3236,20 +3085,10 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
cm->frame_type = INTER_FRAME;
- if (lc != NULL) {
- if (cpi->svc.spatial_layer_id == 0) {
- lc->is_key_frame = 0;
- } else {
- lc->is_key_frame = cpi->svc.layer_context[0].is_key_frame;
-
- if (lc->is_key_frame) cpi->ref_frame_flags &= (~VP9_LAST_FLAG);
- }
- }
-
// Do the firstpass stats indicate that this frame is skippable for the
// partition search?
if (cpi->sf.allow_partition_search_skip && cpi->oxcf.pass == 2 &&
- (!cpi->use_svc || is_two_pass_svc(cpi))) {
+ !cpi->use_svc) {
cpi->partition_search_skippable_frame = is_skippable_frame(cpi);
}
@@ -3260,12 +3099,9 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
if (cpi->oxcf.rc_mode == VPX_Q) {
twopass->active_worst_quality = cpi->oxcf.cq_level;
- } else if (cm->current_video_frame == 0 ||
- (lc != NULL && lc->current_video_frame_in_layer == 0)) {
+ } else if (cm->current_video_frame == 0) {
const int frames_left =
- (int)(twopass->total_stats.count -
- ((lc != NULL) ? lc->current_video_frame_in_layer
- : cm->current_video_frame));
+ (int)(twopass->total_stats.count - cm->current_video_frame);
// Special case code for first frame.
const int section_target_bandwidth =
(int)(twopass->bits_left / frames_left);
@@ -3314,33 +3150,11 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
cm->frame_type = INTER_FRAME;
}
- if (lc != NULL) {
- if (cpi->svc.spatial_layer_id == 0) {
- lc->is_key_frame = (cm->frame_type == KEY_FRAME);
- if (lc->is_key_frame) {
- cpi->ref_frame_flags &=
- (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG);
- lc->frames_from_key_frame = 0;
- // Encode an intra only empty frame since we have a key frame.
- cpi->svc.encode_intra_empty_frame = 1;
- }
- } else {
- cm->frame_type = INTER_FRAME;
- lc->is_key_frame = cpi->svc.layer_context[0].is_key_frame;
-
- if (lc->is_key_frame) {
- cpi->ref_frame_flags &= (~VP9_LAST_FLAG);
- lc->frames_from_key_frame = 0;
- }
- }
- }
-
// Define a new GF/ARF group. (Should always enter here for key frames).
if (rc->frames_till_gf_update_due == 0) {
define_gf_group(cpi, &this_frame);
rc->frames_till_gf_update_due = rc->baseline_gf_interval;
- if (lc != NULL) cpi->refresh_golden_frame = 1;
#if ARF_STATS_OUTPUT
{
@@ -3361,7 +3175,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
// Do the firstpass stats indicate that this frame is skippable for the
// partition search?
if (cpi->sf.allow_partition_search_skip && cpi->oxcf.pass == 2 &&
- (!cpi->use_svc || is_two_pass_svc(cpi))) {
+ !cpi->use_svc) {
cpi->partition_search_skippable_frame = is_skippable_frame(cpi);
}
@@ -3407,8 +3221,7 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi) {
rc->rate_error_estimate = 0;
}
- if (cpi->common.frame_type != KEY_FRAME &&
- !vp9_is_upper_layer_key_frame(cpi)) {
+ if (cpi->common.frame_type != KEY_FRAME) {
twopass->kf_group_bits -= bits_used;
twopass->last_kfgroup_zeromotion_pct = twopass->kf_zeromotion_pct;
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c
index 1cb978667b5..ba72c0be5ed 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c
@@ -1793,7 +1793,7 @@ static const MV search_pos[4] = {
unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, int mi_row,
- int mi_col) {
+ int mi_col, const MV *ref_mv) {
MACROBLOCKD *xd = &x->e_mbd;
MODE_INFO *mi = xd->mi[0];
struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0 } };
@@ -1815,6 +1815,7 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
const int norm_factor = 3 + (bw >> 5);
const YV12_BUFFER_CONFIG *scaled_ref_frame =
vp9_get_scaled_ref_frame(cpi, mi->ref_frame[0]);
+ MvLimits subpel_mv_limits;
if (scaled_ref_frame) {
int i;
@@ -1917,6 +1918,10 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
tmp_mv->row *= 8;
tmp_mv->col *= 8;
+ vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv);
+ clamp_mv(tmp_mv, subpel_mv_limits.col_min, subpel_mv_limits.col_max,
+ subpel_mv_limits.row_min, subpel_mv_limits.row_max);
+
if (scaled_ref_frame) {
int i;
for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
@@ -2210,7 +2215,8 @@ int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
var = bigdia_search(x, mvp_full, step_param, error_per_bit, 1, cost_list,
fn_ptr, 1, ref_mv, tmp_mv);
break;
- case NSTEP:
+ default:
+ assert(method == NSTEP);
var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
MAX_MVSEARCH_STEPS - 1 - step_param, 1,
cost_list, fn_ptr, ref_mv, tmp_mv);
@@ -2236,7 +2242,6 @@ int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
}
}
break;
- default: assert(0 && "Invalid search method.");
}
if (method != NSTEP && rd && var < var_max)
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h
index b8db2c35368..b4787fe1fc5 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h
@@ -66,7 +66,8 @@ int vp9_refining_search_sad(const struct macroblock *x, struct mv *ref_mv,
// Perform integral projection based motion estimation.
unsigned int vp9_int_pro_motion_estimation(const struct VP9_COMP *cpi,
MACROBLOCK *x, BLOCK_SIZE bsize,
- int mi_row, int mi_col);
+ int mi_row, int mi_col,
+ const MV *ref_mv);
typedef uint32_t(fractional_mv_step_fp)(
const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_picklpf.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_picklpf.c
index 1c2c55b9e4b..4e96490658b 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_picklpf.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_picklpf.c
@@ -169,14 +169,10 @@ void vp9_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
case VPX_BITS_10:
filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 4060632, 20);
break;
- case VPX_BITS_12:
+ default:
+ assert(cm->bit_depth == VPX_BITS_12);
filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 16242526, 22);
break;
- default:
- assert(0 &&
- "bit_depth should be VPX_BITS_8, VPX_BITS_10 "
- "or VPX_BITS_12");
- return;
}
#else
int filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 1015158, 18);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c
index a9c7c7d3d19..b61a89471a9 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c
@@ -224,6 +224,14 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
if (rv && search_subpel) {
int subpel_force_stop = cpi->sf.mv.subpel_force_stop;
if (use_base_mv && cpi->sf.base_mv_aggressive) subpel_force_stop = 2;
+ if (cpi->sf.mv.enable_adaptive_subpel_force_stop) {
+ int mv_thresh = cpi->sf.mv.adapt_subpel_force_stop.mv_thresh;
+ if (abs(tmp_mv->as_mv.row) >= mv_thresh ||
+ abs(tmp_mv->as_mv.col) >= mv_thresh)
+ subpel_force_stop = cpi->sf.mv.adapt_subpel_force_stop.force_stop_above;
+ else
+ subpel_force_stop = cpi->sf.mv.adapt_subpel_force_stop.force_stop_below;
+ }
cpi->find_fractional_mv_step(
x, &tmp_mv->as_mv, &ref_mv, cpi->common.allow_high_precision_mv,
x->errorperbit, &cpi->fn_ptr[bsize], subpel_force_stop,
@@ -726,13 +734,13 @@ static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc,
qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
scan_order->iscan);
break;
- case TX_4X4:
+ default:
+ assert(tx_size == TX_4X4);
x->fwd_txfm4x4(src_diff, coeff, diff_stride);
vp9_quantize_fp(coeff, 16, x->skip_block, p->round_fp, p->quant_fp,
qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
scan_order->iscan);
break;
- default: assert(0); break;
}
*skippable &= (*eob == 0);
eob_cost += 1;
@@ -1421,7 +1429,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
VP9_COMMON *const cm = &cpi->common;
SPEED_FEATURES *const sf = &cpi->sf;
- const SVC *const svc = &cpi->svc;
+ SVC *const svc = &cpi->svc;
MACROBLOCKD *const xd = &x->e_mbd;
MODE_INFO *const mi = xd->mi[0];
struct macroblockd_plane *const pd = &xd->plane[0];
@@ -1495,27 +1503,37 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
#endif
INTERP_FILTER filter_gf_svc = EIGHTTAP;
MV_REFERENCE_FRAME best_second_ref_frame = NONE;
- MV_REFERENCE_FRAME spatial_ref = GOLDEN_FRAME;
+ MV_REFERENCE_FRAME inter_layer_ref = GOLDEN_FRAME;
const struct segmentation *const seg = &cm->seg;
int comp_modes = 0;
int num_inter_modes = (cpi->use_svc) ? RT_INTER_MODES_SVC : RT_INTER_MODES;
int flag_svc_subpel = 0;
int svc_mv_col = 0;
int svc_mv_row = 0;
+ int no_scaling = 0;
unsigned int thresh_svc_skip_golden = 500;
+ if (cpi->use_svc && svc->spatial_layer_id > 0) {
+ int layer =
+ LAYER_IDS_TO_IDX(svc->spatial_layer_id - 1, svc->temporal_layer_id,
+ svc->number_temporal_layers);
+ LAYER_CONTEXT *const lc = &svc->layer_context[layer];
+ if (lc->scaling_factor_num == lc->scaling_factor_den) no_scaling = 1;
+ }
+ if (svc->spatial_layer_id > 0 &&
+ (svc->high_source_sad_superframe || no_scaling))
+ thresh_svc_skip_golden = 0;
// Lower the skip threshold if lower spatial layer is better quality relative
// to current layer.
- if (cpi->svc.spatial_layer_id > 0 && cm->base_qindex > 150 &&
- cm->base_qindex > cpi->svc.lower_layer_qindex + 15)
+ else if (svc->spatial_layer_id > 0 && cm->base_qindex > 150 &&
+ cm->base_qindex > svc->lower_layer_qindex + 15)
thresh_svc_skip_golden = 100;
// Increase skip threshold if lower spatial layer is lower quality relative
// to current layer.
- else if (cpi->svc.spatial_layer_id > 0 && cm->base_qindex < 140 &&
- cm->base_qindex < cpi->svc.lower_layer_qindex - 20)
+ else if (svc->spatial_layer_id > 0 && cm->base_qindex < 140 &&
+ cm->base_qindex < svc->lower_layer_qindex - 20)
thresh_svc_skip_golden = 1000;
init_ref_frame_cost(cm, xd, ref_frame_cost);
-
memset(&mode_checked[0][0], 0, MB_MODE_COUNT * MAX_REF_FRAMES);
if (reuse_inter_pred) {
@@ -1575,10 +1593,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
#if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0) {
if (cpi->use_svc) {
- int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id,
- cpi->svc.temporal_layer_id,
- cpi->svc.number_temporal_layers);
- LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
+ int layer =
+ LAYER_IDS_TO_IDX(svc->spatial_layer_id, svc->temporal_layer_id,
+ svc->number_temporal_layers);
+ LAYER_CONTEXT *lc = &svc->layer_context[layer];
denoise_svc_pickmode = denoise_svc(cpi) && !lc->is_key_frame;
}
if (cpi->denoiser.denoising_level > kDenLowLow && denoise_svc_pickmode)
@@ -1613,19 +1631,19 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
// For svc mode, on spatial_layer_id > 0: if the reference has different scale
// constrain the inter mode to only test zero motion.
if (cpi->use_svc && svc->force_zero_mode_spatial_ref &&
- cpi->svc.spatial_layer_id > 0) {
+ svc->spatial_layer_id > 0) {
if (cpi->ref_frame_flags & flag_list[LAST_FRAME]) {
struct scale_factors *const sf = &cm->frame_refs[LAST_FRAME - 1].sf;
if (vp9_is_scaled(sf)) {
svc_force_zero_mode[LAST_FRAME - 1] = 1;
- spatial_ref = LAST_FRAME;
+ inter_layer_ref = LAST_FRAME;
}
}
if (cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) {
struct scale_factors *const sf = &cm->frame_refs[GOLDEN_FRAME - 1].sf;
if (vp9_is_scaled(sf)) {
svc_force_zero_mode[GOLDEN_FRAME - 1] = 1;
- spatial_ref = GOLDEN_FRAME;
+ inter_layer_ref = GOLDEN_FRAME;
}
}
}
@@ -1642,6 +1660,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
}
}
+ if (sf->disable_golden_ref && (x->content_state_sb != kVeryHighSad ||
+ cpi->rc.avg_frame_low_motion < 60))
+ usable_ref_frame = LAST_FRAME;
+
if (!((cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) &&
!svc_force_zero_mode[GOLDEN_FRAME - 1] && !force_skip_low_temp_var))
use_golden_nonzeromv = 0;
@@ -1667,6 +1689,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
}
for (ref_frame = LAST_FRAME; ref_frame <= usable_ref_frame; ++ref_frame) {
+ // Skip find_predictor if the reference frame is not in the
+ // ref_frame_flags (i.e., not used as a reference for this frame).
+ skip_ref_find_pred[ref_frame] =
+ !(cpi->ref_frame_flags & flag_list[ref_frame]);
if (!skip_ref_find_pred[ref_frame]) {
find_predictors(cpi, x, ref_frame, frame_mv, const_motion,
&ref_frame_skip_mask, flag_list, tile_data, mi_row,
@@ -1682,9 +1708,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
// an averaging filter for downsampling (phase = 8). If so, we will test
// a nonzero motion mode on the spatial reference.
// The nonzero motion is half pixel shifted to left and top (-4, -4).
- if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 &&
- svc_force_zero_mode[spatial_ref - 1] &&
- cpi->svc.downsample_filter_phase[cpi->svc.spatial_layer_id - 1] == 8) {
+ if (cpi->use_svc && svc->spatial_layer_id > 0 &&
+ svc_force_zero_mode[inter_layer_ref - 1] &&
+ svc->downsample_filter_phase[svc->spatial_layer_id - 1] == 8) {
svc_mv_col = -4;
svc_mv_row = -4;
flag_svc_subpel = 1;
@@ -1733,7 +1759,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
get_segdata(seg, mi->segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame)
continue;
- if (flag_svc_subpel && ref_frame == spatial_ref) {
+ if (flag_svc_subpel && ref_frame == inter_layer_ref) {
force_gf_mv = 1;
// Only test mode if NEARESTMV/NEARMV is (svc_mv_col, svc_mv_row),
// otherwise set NEWMV to (svc_mv_col, svc_mv_row).
@@ -1761,8 +1787,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
sse_zeromv_normalized < thresh_svc_skip_golden)
continue;
+ if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue;
+
if (sf->short_circuit_flat_blocks && x->source_variance == 0 &&
- this_mode != NEARESTMV) {
+ frame_mv[this_mode][ref_frame].as_int != 0) {
continue;
}
@@ -1792,8 +1820,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
continue;
}
- if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue;
-
if (const_motion[ref_frame] && this_mode == NEARMV) continue;
// Skip non-zeromv mode search for golden frame if force_skip_low_temp_var
@@ -1873,7 +1899,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
(!cpi->sf.adaptive_rd_thresh_row_mt &&
rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh,
&rd_thresh_freq_fact[mode_index])))
- continue;
+ if (frame_mv[this_mode][ref_frame].as_int != 0) continue;
if (this_mode == NEWMV && !force_gf_mv) {
if (ref_frame > LAST_FRAME && !cpi->use_svc &&
@@ -1884,7 +1910,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
if (bsize < BLOCK_16X16) continue;
- tmp_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col);
+ tmp_sad = vp9_int_pro_motion_estimation(
+ cpi, x, bsize, mi_row, mi_col,
+ &x->mbmi_ext->ref_mvs[ref_frame][0].as_mv);
if (tmp_sad > x->pred_mv_sad[LAST_FRAME]) continue;
if (tmp_sad + (num_pels_log2_lookup[bsize] << 4) > best_pred_sad)
@@ -1919,7 +1947,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
// Exit NEWMV search if base_mv is (0,0) && bsize < BLOCK_16x16,
// for SVC encoding.
- if (cpi->use_svc && cpi->svc.use_base_mv && bsize < BLOCK_16X16 &&
+ if (cpi->use_svc && svc->use_base_mv && bsize < BLOCK_16X16 &&
frame_mv[NEWMV][ref_frame].as_mv.row == 0 &&
frame_mv[NEWMV][ref_frame].as_mv.col == 0)
continue;
@@ -2242,12 +2270,12 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
// layer is chosen as the reference. Always perform intra prediction if
// LAST is the only reference, or is_key_frame is set, or on base
// temporal layer.
- if (cpi->svc.spatial_layer_id) {
+ if (svc->spatial_layer_id) {
perform_intra_pred =
- cpi->svc.temporal_layer_id == 0 ||
- cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame ||
+ svc->temporal_layer_id == 0 ||
+ svc->layer_context[svc->temporal_layer_id].is_key_frame ||
!(cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) ||
- (!cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
+ (!svc->layer_context[svc->temporal_layer_id].is_key_frame &&
svc_force_zero_mode[best_ref_frame - 1]);
inter_mode_thresh = (inter_mode_thresh << 1) + inter_mode_thresh;
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_quantize.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_quantize.c
index 09f61ead263..276022a56b8 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_quantize.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_quantize.c
@@ -204,10 +204,9 @@ static int get_qzbin_factor(int q, vpx_bit_depth_t bit_depth) {
switch (bit_depth) {
case VPX_BITS_8: return q == 0 ? 64 : (quant < 148 ? 84 : 80);
case VPX_BITS_10: return q == 0 ? 64 : (quant < 592 ? 84 : 80);
- case VPX_BITS_12: return q == 0 ? 64 : (quant < 2368 ? 84 : 80);
default:
- assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
- return -1;
+ assert(bit_depth == VPX_BITS_12);
+ return q == 0 ? 64 : (quant < 2368 ? 84 : 80);
}
#else
(void)bit_depth;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c
index b5f1a5c5c71..c349a807aa2 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c
@@ -48,18 +48,16 @@
#define MAX_BPB_FACTOR 50
#if CONFIG_VP9_HIGHBITDEPTH
-#define ASSIGN_MINQ_TABLE(bit_depth, name) \
- do { \
- switch (bit_depth) { \
- case VPX_BITS_8: name = name##_8; break; \
- case VPX_BITS_10: name = name##_10; break; \
- case VPX_BITS_12: name = name##_12; break; \
- default: \
- assert(0 && \
- "bit_depth should be VPX_BITS_8, VPX_BITS_10" \
- " or VPX_BITS_12"); \
- name = NULL; \
- } \
+#define ASSIGN_MINQ_TABLE(bit_depth, name) \
+ do { \
+ switch (bit_depth) { \
+ case VPX_BITS_8: name = name##_8; break; \
+ case VPX_BITS_10: name = name##_10; break; \
+ default: \
+ assert(bit_depth == VPX_BITS_12); \
+ name = name##_12; \
+ break; \
+ } \
} while (0)
#else
#define ASSIGN_MINQ_TABLE(bit_depth, name) \
@@ -167,10 +165,9 @@ double vp9_convert_qindex_to_q(int qindex, vpx_bit_depth_t bit_depth) {
switch (bit_depth) {
case VPX_BITS_8: return vp9_ac_quant(qindex, 0, bit_depth) / 4.0;
case VPX_BITS_10: return vp9_ac_quant(qindex, 0, bit_depth) / 16.0;
- case VPX_BITS_12: return vp9_ac_quant(qindex, 0, bit_depth) / 64.0;
default:
- assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
- return -1.0;
+ assert(bit_depth == VPX_BITS_12);
+ return vp9_ac_quant(qindex, 0, bit_depth) / 64.0;
}
#else
return vp9_ac_quant(qindex, 0, bit_depth) / 4.0;
@@ -620,8 +617,14 @@ int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame,
!(cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame)) &&
(cpi->rc.rc_1_frame * cpi->rc.rc_2_frame == -1) &&
cpi->rc.q_1_frame != cpi->rc.q_2_frame) {
- q = clamp(q, VPXMIN(cpi->rc.q_1_frame, cpi->rc.q_2_frame),
- VPXMAX(cpi->rc.q_1_frame, cpi->rc.q_2_frame));
+ int qclamp = clamp(q, VPXMIN(cpi->rc.q_1_frame, cpi->rc.q_2_frame),
+ VPXMAX(cpi->rc.q_1_frame, cpi->rc.q_2_frame));
+ // If the previous had overshoot and the current q needs to increase above
+ // the clamped value, reduce the clamp for faster reaction to overshoot.
+ if (cpi->rc.rc_1_frame == -1 && q > qclamp)
+ q = (q + qclamp) >> 1;
+ else
+ q = qclamp;
}
return q;
}
@@ -832,19 +835,6 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi,
*top_index = active_worst_quality;
*bottom_index = active_best_quality;
-#if LIMIT_QRANGE_FOR_ALTREF_AND_KEY
- // Limit Q range for the adaptive loop.
- if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced &&
- !(cm->current_video_frame == 0)) {
- int qdelta = 0;
- vpx_clear_system_state();
- qdelta = vp9_compute_qdelta_by_rate(
- &cpi->rc, cm->frame_type, active_worst_quality, 2.0, cm->bit_depth);
- *top_index = active_worst_quality + qdelta;
- *top_index = (*top_index > *bottom_index) ? *top_index : *bottom_index;
- }
-#endif
-
// Special case code to try and match quality with forced key frames
if (cm->frame_type == KEY_FRAME && rc->this_key_frame_forced) {
q = rc->last_boosted_qindex;
@@ -1097,7 +1087,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index,
int *inter_minq;
ASSIGN_MINQ_TABLE(cm->bit_depth, inter_minq);
- if (frame_is_intra_only(cm) || vp9_is_upper_layer_key_frame(cpi)) {
+ if (frame_is_intra_only(cm)) {
// Handle the special case for key frames forced when we have reached
// the maximum key frame interval. Here force the Q to a range
// based on the ambient Q to reduce the risk of popping.
@@ -1213,9 +1203,8 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index,
#if LIMIT_QRANGE_FOR_ALTREF_AND_KEY
vpx_clear_system_state();
// Static forced key frames Q restrictions dealt with elsewhere.
- if (!((frame_is_intra_only(cm) || vp9_is_upper_layer_key_frame(cpi))) ||
- !rc->this_key_frame_forced ||
- (cpi->twopass.last_kfgroup_zeromotion_pct < STATIC_MOTION_THRESH)) {
+ if (!frame_is_intra_only(cm) || !rc->this_key_frame_forced ||
+ cpi->twopass.last_kfgroup_zeromotion_pct < STATIC_MOTION_THRESH) {
int qdelta = vp9_frame_type_qdelta(cpi, gf_group->rf_level[gf_group->index],
active_worst_quality);
active_worst_quality =
@@ -1239,8 +1228,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index,
if (oxcf->rc_mode == VPX_Q) {
q = active_best_quality;
// Special case code to try and match quality with forced key frames.
- } else if ((frame_is_intra_only(cm) || vp9_is_upper_layer_key_frame(cpi)) &&
- rc->this_key_frame_forced) {
+ } else if (frame_is_intra_only(cm) && rc->this_key_frame_forced) {
// If static since last kf use better of last boosted and last kf q.
if (cpi->twopass.last_kfgroup_zeromotion_pct >= STATIC_MOTION_THRESH) {
q = VPXMIN(rc->last_kf_qindex, rc->last_boosted_qindex);
@@ -1488,7 +1476,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
rc->total_target_vs_actual = rc->total_actual_bits - rc->total_target_bits;
- if (!cpi->use_svc || is_two_pass_svc(cpi)) {
+ if (!cpi->use_svc) {
if (is_altref_enabled(cpi) && cpi->refresh_alt_ref_frame &&
(cm->frame_type != KEY_FRAME))
// Update the alternate reference frame stats as appropriate.
@@ -1734,10 +1722,7 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) {
cpi->svc.spatial_layer_id == 0)) {
cm->frame_type = KEY_FRAME;
rc->source_alt_ref_active = 0;
- if (is_two_pass_svc(cpi)) {
- cpi->svc.layer_context[layer].is_key_frame = 1;
- cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG);
- } else if (is_one_pass_cbr_svc(cpi)) {
+ if (is_one_pass_cbr_svc(cpi)) {
if (cm->current_video_frame > 0) vp9_svc_reset_key_frame(cpi);
layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id,
cpi->svc.temporal_layer_id,
@@ -1750,17 +1735,7 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) {
}
} else {
cm->frame_type = INTER_FRAME;
- if (is_two_pass_svc(cpi)) {
- LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
- if (cpi->svc.spatial_layer_id == 0) {
- lc->is_key_frame = 0;
- } else {
- lc->is_key_frame =
- cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame;
- if (lc->is_key_frame) cpi->ref_frame_flags &= (~VP9_LAST_FLAG);
- }
- cpi->ref_frame_flags &= (~VP9_ALT_FLAG);
- } else if (is_one_pass_cbr_svc(cpi)) {
+ if (is_one_pass_cbr_svc(cpi)) {
LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
if (cpi->svc.spatial_layer_id == cpi->svc.first_spatial_layer_to_encode) {
lc->is_key_frame = 0;
@@ -1790,8 +1765,6 @@ void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) {
if ((cm->current_video_frame == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY) ||
rc->frames_to_key == 0 || (cpi->oxcf.auto_key && 0))) {
cm->frame_type = KEY_FRAME;
- rc->this_key_frame_forced =
- cm->current_video_frame != 0 && rc->frames_to_key == 0;
rc->frames_to_key = cpi->oxcf.key_freq;
rc->kf_boost = DEFAULT_KF_BOOST;
rc->source_alt_ref_active = 0;
@@ -2301,18 +2274,34 @@ static void adjust_gf_boost_lag_one_pass_vbr(VP9_COMP *cpi,
void vp9_scene_detection_onepass(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
+ YV12_BUFFER_CONFIG const *unscaled_src = cpi->un_scaled_source;
+ YV12_BUFFER_CONFIG const *unscaled_last_src = cpi->unscaled_last_source;
+ uint8_t *src_y;
+ int src_ystride;
+ int src_width;
+ int src_height;
+ uint8_t *last_src_y;
+ int last_src_ystride;
+ int last_src_width;
+ int last_src_height;
+ if (cpi->un_scaled_source == NULL || cpi->unscaled_last_source == NULL ||
+ (cpi->use_svc && cpi->svc.current_superframe == 0))
+ return;
+ src_y = unscaled_src->y_buffer;
+ src_ystride = unscaled_src->y_stride;
+ src_width = unscaled_src->y_width;
+ src_height = unscaled_src->y_height;
+ last_src_y = unscaled_last_src->y_buffer;
+ last_src_ystride = unscaled_last_src->y_stride;
+ last_src_width = unscaled_last_src->y_width;
+ last_src_height = unscaled_last_src->y_height;
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) return;
#endif
rc->high_source_sad = 0;
- if (cpi->Last_Source != NULL &&
- cpi->Last_Source->y_width == cpi->Source->y_width &&
- cpi->Last_Source->y_height == cpi->Source->y_height) {
+ if (cpi->svc.spatial_layer_id == 0 && src_width == last_src_width &&
+ src_height == last_src_height) {
YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS] = { NULL };
- uint8_t *src_y = cpi->Source->y_buffer;
- int src_ystride = cpi->Source->y_stride;
- uint8_t *last_src_y = cpi->Last_Source->y_buffer;
- int last_src_ystride = cpi->Last_Source->y_stride;
int start_frame = 0;
int frames_to_buffer = 1;
int frame = 0;
@@ -2437,6 +2426,19 @@ void vp9_scene_detection_onepass(VP9_COMP *cpi) {
if (cm->frame_type != KEY_FRAME && rc->reset_high_source_sad)
rc->this_frame_target = rc->avg_frame_bandwidth;
}
+ // For SVC the new (updated) avg_source_sad[0] for the current superframe
+ // updates the setting for all layers.
+ if (cpi->use_svc) {
+ int sl, tl;
+ SVC *const svc = &cpi->svc;
+ for (sl = 0; sl < svc->number_spatial_layers; ++sl)
+ for (tl = 0; tl < svc->number_temporal_layers; ++tl) {
+ int layer = LAYER_IDS_TO_IDX(sl, tl, svc->number_temporal_layers);
+ LAYER_CONTEXT *const lc = &svc->layer_context[layer];
+ RATE_CONTROL *const lrc = &lc->rc;
+ lrc->avg_source_sad[0] = rc->avg_source_sad[0];
+ }
+ }
// For VBR, under scene change/high content change, force golden refresh.
if (cpi->oxcf.rc_mode == VPX_VBR && cm->frame_type != KEY_FRAME &&
rc->high_source_sad && rc->frames_to_key > 3 &&
@@ -2471,7 +2473,10 @@ int vp9_encodedframe_overshoot(VP9_COMP *cpi, int frame_size, int *q) {
VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
int thresh_qp = 3 * (rc->worst_quality >> 2);
- int thresh_rate = rc->avg_frame_bandwidth * 10;
+ int thresh_rate = rc->avg_frame_bandwidth << 3;
+ // Lower rate threshold for video.
+ if (cpi->oxcf.content != VP9E_CONTENT_SCREEN)
+ thresh_rate = rc->avg_frame_bandwidth << 2;
if (cm->base_qindex < thresh_qp && frame_size > thresh_rate) {
double rate_correction_factor =
cpi->rc.rate_correction_factors[INTER_NORMAL];
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.c
index 6b2306ce9b0..3407e74c64f 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.c
@@ -69,10 +69,12 @@ static void fill_mode_costs(VP9_COMP *cpi) {
const FRAME_CONTEXT *const fc = cpi->common.fc;
int i, j;
- for (i = 0; i < INTRA_MODES; ++i)
- for (j = 0; j < INTRA_MODES; ++j)
+ for (i = 0; i < INTRA_MODES; ++i) {
+ for (j = 0; j < INTRA_MODES; ++j) {
vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
vp9_intra_mode_tree);
+ }
+ }
vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
for (i = 0; i < INTRA_MODES; ++i) {
@@ -82,9 +84,28 @@ static void fill_mode_costs(VP9_COMP *cpi) {
fc->uv_mode_prob[i], vp9_intra_mode_tree);
}
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
+ for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
vp9_cost_tokens(cpi->switchable_interp_costs[i],
fc->switchable_interp_prob[i], vp9_switchable_interp_tree);
+ }
+
+ for (i = TX_8X8; i < TX_SIZES; ++i) {
+ for (j = 0; j < TX_SIZE_CONTEXTS; ++j) {
+ const vpx_prob *tx_probs = get_tx_probs(i, j, &fc->tx_probs);
+ int k;
+ for (k = 0; k <= i; ++k) {
+ int cost = 0;
+ int m;
+ for (m = 0; m <= k - (k == i); ++m) {
+ if (m == k)
+ cost += vp9_cost_zero(tx_probs[m]);
+ else
+ cost += vp9_cost_one(tx_probs[m]);
+ }
+ cpi->tx_size_cost[i - 1][j][k] = cost;
+ }
+ }
+ }
}
static void fill_token_costs(vp9_coeff_cost *c,
@@ -153,10 +174,10 @@ int64_t vp9_compute_rd_mult_based_on_qindex(const VP9_COMP *cpi, int qindex) {
switch (cpi->common.bit_depth) {
case VPX_BITS_8: rdmult = 88 * q * q / 24; break;
case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 4); break;
- case VPX_BITS_12: rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8); break;
default:
- assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
- return -1;
+ assert(cpi->common.bit_depth == VPX_BITS_12);
+ rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8);
+ break;
}
#else
int64_t rdmult = 88 * q * q / 24;
@@ -185,10 +206,10 @@ static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
switch (bit_depth) {
case VPX_BITS_8: q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0; break;
case VPX_BITS_10: q = vp9_dc_quant(qindex, 0, VPX_BITS_10) / 16.0; break;
- case VPX_BITS_12: q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0; break;
default:
- assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
- return -1;
+ assert(bit_depth == VPX_BITS_12);
+ q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0;
+ break;
}
#else
(void)bit_depth;
@@ -209,12 +230,11 @@ void vp9_initialize_me_consts(VP9_COMP *cpi, MACROBLOCK *x, int qindex) {
x->sadperbit16 = sad_per_bit16lut_10[qindex];
x->sadperbit4 = sad_per_bit4lut_10[qindex];
break;
- case VPX_BITS_12:
+ default:
+ assert(cpi->common.bit_depth == VPX_BITS_12);
x->sadperbit16 = sad_per_bit16lut_12[qindex];
x->sadperbit4 = sad_per_bit4lut_12[qindex];
break;
- default:
- assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
}
#else
(void)cpi;
@@ -471,13 +491,13 @@ void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
for (i = 0; i < num_4x4_h; i += 4)
t_left[i] = !!*(const uint32_t *)&left[i];
break;
- case TX_32X32:
+ default:
+ assert(tx_size == TX_32X32);
for (i = 0; i < num_4x4_w; i += 8)
t_above[i] = !!*(const uint64_t *)&above[i];
for (i = 0; i < num_4x4_h; i += 8)
t_left[i] = !!*(const uint64_t *)&left[i];
break;
- default: assert(0 && "Invalid transform size."); break;
}
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h
index 59022c106e2..8201bba7039 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h
@@ -108,7 +108,11 @@ typedef struct RD_OPT {
int64_t prediction_type_threshes[MAX_REF_FRAMES][REFERENCE_MODES];
int64_t filter_threshes[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS];
+#if CONFIG_CONSISTENT_RECODE
+ int64_t prediction_type_threshes_prev[MAX_REF_FRAMES][REFERENCE_MODES];
+ int64_t filter_threshes_prev[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS];
+#endif
int RDMULT;
int RDDIV;
} RD_OPT;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c
index b6541b0f735..e3672edf529 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c
@@ -543,8 +543,9 @@ static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane,
MACROBLOCKD *const xd = &x->e_mbd;
const struct macroblock_plane *const p = &x->plane[plane];
const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const int eob = p->eobs[block];
- if (x->block_tx_domain) {
+ if (x->block_tx_domain && eob) {
const int ss_txfrm_size = tx_size << 1;
int64_t this_sse;
const int shift = tx_size == TX_32X32 ? 0 : 2;
@@ -584,14 +585,13 @@ static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane,
const uint8_t *src = &p->src.buf[src_idx];
const uint8_t *dst = &pd->dst.buf[dst_idx];
const tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
- const uint16_t *eob = &p->eobs[block];
unsigned int tmp;
tmp = pixel_sse(cpi, xd, pd, src, src_stride, dst, dst_stride, blk_row,
blk_col, plane_bsize, tx_bsize);
*out_sse = (int64_t)tmp * 16;
- if (*eob) {
+ if (eob) {
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint16_t, recon16[1024]);
uint8_t *recon = (uint8_t *)recon16;
@@ -604,22 +604,22 @@ static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane,
vpx_highbd_convolve_copy(CONVERT_TO_SHORTPTR(dst), dst_stride, recon16,
32, NULL, 0, 0, 0, 0, bs, bs, xd->bd);
if (xd->lossless) {
- vp9_highbd_iwht4x4_add(dqcoeff, recon16, 32, *eob, xd->bd);
+ vp9_highbd_iwht4x4_add(dqcoeff, recon16, 32, eob, xd->bd);
} else {
switch (tx_size) {
case TX_4X4:
- vp9_highbd_idct4x4_add(dqcoeff, recon16, 32, *eob, xd->bd);
+ vp9_highbd_idct4x4_add(dqcoeff, recon16, 32, eob, xd->bd);
break;
case TX_8X8:
- vp9_highbd_idct8x8_add(dqcoeff, recon16, 32, *eob, xd->bd);
+ vp9_highbd_idct8x8_add(dqcoeff, recon16, 32, eob, xd->bd);
break;
case TX_16X16:
- vp9_highbd_idct16x16_add(dqcoeff, recon16, 32, *eob, xd->bd);
+ vp9_highbd_idct16x16_add(dqcoeff, recon16, 32, eob, xd->bd);
break;
- case TX_32X32:
- vp9_highbd_idct32x32_add(dqcoeff, recon16, 32, *eob, xd->bd);
+ default:
+ assert(tx_size == TX_32X32);
+ vp9_highbd_idct32x32_add(dqcoeff, recon16, 32, eob, xd->bd);
break;
- default: assert(0 && "Invalid transform size");
}
}
recon = CONVERT_TO_BYTEPTR(recon16);
@@ -627,16 +627,16 @@ static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane,
#endif // CONFIG_VP9_HIGHBITDEPTH
vpx_convolve_copy(dst, dst_stride, recon, 32, NULL, 0, 0, 0, 0, bs, bs);
switch (tx_size) {
- case TX_32X32: vp9_idct32x32_add(dqcoeff, recon, 32, *eob); break;
- case TX_16X16: vp9_idct16x16_add(dqcoeff, recon, 32, *eob); break;
- case TX_8X8: vp9_idct8x8_add(dqcoeff, recon, 32, *eob); break;
- case TX_4X4:
+ case TX_32X32: vp9_idct32x32_add(dqcoeff, recon, 32, eob); break;
+ case TX_16X16: vp9_idct16x16_add(dqcoeff, recon, 32, eob); break;
+ case TX_8X8: vp9_idct8x8_add(dqcoeff, recon, 32, eob); break;
+ default:
+ assert(tx_size == TX_4X4);
// this is like vp9_short_idct4x4 but has a special case around
// eob<=1, which is significant (not just an optimization) for
// the lossless case.
- x->inv_txfm_add(dqcoeff, recon, 32, *eob);
+ x->inv_txfm_add(dqcoeff, recon, 32, eob);
break;
- default: assert(0 && "Invalid transform size"); break;
}
#if CONFIG_VP9_HIGHBITDEPTH
}
@@ -845,20 +845,20 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
{ INT64_MAX, INT64_MAX },
{ INT64_MAX, INT64_MAX },
{ INT64_MAX, INT64_MAX } };
- int n, m;
+ int n;
int s0, s1;
- int64_t best_rd = INT64_MAX;
+ int64_t best_rd = ref_best_rd;
TX_SIZE best_tx = max_tx_size;
int start_tx, end_tx;
-
- const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
+ const int tx_size_ctx = get_tx_size_context(xd);
assert(skip_prob > 0);
s0 = vp9_cost_bit(skip_prob, 0);
s1 = vp9_cost_bit(skip_prob, 1);
if (cm->tx_mode == TX_MODE_SELECT) {
start_tx = max_tx_size;
- end_tx = 0;
+ end_tx = VPXMAX(start_tx - cpi->sf.tx_size_search_depth, 0);
+ if (bs > BLOCK_32X32) end_tx = VPXMIN(end_tx + 1, start_tx);
} else {
TX_SIZE chosen_tx_size =
VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[cm->tx_mode]);
@@ -867,15 +867,9 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
}
for (n = start_tx; n >= end_tx; n--) {
- int r_tx_size = 0;
- for (m = 0; m <= n - (n == (int)max_tx_size); m++) {
- if (m == n)
- r_tx_size += vp9_cost_zero(tx_probs[m]);
- else
- r_tx_size += vp9_cost_one(tx_probs[m]);
- }
- txfm_rd_in_plane(cpi, x, &r[n][0], &d[n], &s[n], &sse[n], ref_best_rd, 0,
- bs, n, cpi->sf.use_fast_coef_costing);
+ const int r_tx_size = cpi->tx_size_cost[max_tx_size - 1][tx_size_ctx][n];
+ txfm_rd_in_plane(cpi, x, &r[n][0], &d[n], &s[n], &sse[n], best_rd, 0, bs, n,
+ cpi->sf.use_fast_coef_costing);
r[n][1] = r[n][0];
if (r[n][0] < INT_MAX) {
r[n][1] += r_tx_size;
@@ -1468,11 +1462,11 @@ static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
if (is_compound)
this_mv[1].as_int = frame_mv[mode][mi->ref_frame[1]].as_int;
break;
- case ZEROMV:
+ default:
+ assert(mode == ZEROMV);
this_mv[0].as_int = 0;
if (is_compound) this_mv[1].as_int = 0;
break;
- default: break;
}
mi->bmi[i].as_mv[0].as_int = this_mv[0].as_int;
@@ -3618,9 +3612,13 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data,
}
if (best_mode_index < 0 || best_rd >= best_rd_so_far) {
- // If adaptive interp filter is enabled, then the current leaf node of 8x8
- // data is needed for sub8x8. Hence preserve the context.
+// If adaptive interp filter is enabled, then the current leaf node of 8x8
+// data is needed for sub8x8. Hence preserve the context.
+#if CONFIG_CONSISTENT_RECODE
+ if (bsize == BLOCK_8X8) ctx->mic = *xd->mi[0];
+#else
if (cpi->row_mt && bsize == BLOCK_8X8) ctx->mic = *xd->mi[0];
+#endif
rd_cost->rate = INT_MAX;
rd_cost->rdcost = INT64_MAX;
return;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c
index 9a46e98839b..d2842697dae 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c
@@ -32,7 +32,7 @@ static MESH_PATTERN
// Intra only frames, golden frames (except alt ref overlays) and
// alt ref frames tend to be coded at a higher than ambient quality
static int frame_is_boosted(const VP9_COMP *cpi) {
- return frame_is_kf_gf_arf(cpi) || vp9_is_upper_layer_key_frame(cpi);
+ return frame_is_kf_gf_arf(cpi);
}
// Sets a partition size down to which the auto partition code will always
@@ -374,6 +374,9 @@ static void set_rt_speed_feature_framesize_independent(
sf->use_compound_nonrd_pickmode = 0;
sf->nonrd_keyframe = 0;
sf->svc_use_lowres_part = 0;
+ sf->re_encode_overshoot_rt = 0;
+ sf->disable_16x16part_nonkey = 0;
+ sf->disable_golden_ref = 0;
if (speed >= 1) {
sf->allow_txfm_domain_distortion = 1;
@@ -534,6 +537,16 @@ static void set_rt_speed_feature_framesize_independent(
// Keep nonrd_keyframe = 1 for non-base spatial layers to prevent
// increase in encoding time.
if (cpi->use_svc && cpi->svc.spatial_layer_id > 0) sf->nonrd_keyframe = 1;
+ if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR &&
+ cm->frame_type != KEY_FRAME && cpi->resize_state == ORIG &&
+ (cpi->use_svc || cpi->oxcf.content == VP9E_CONTENT_SCREEN)) {
+ sf->re_encode_overshoot_rt = 1;
+ }
+ if (cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.lag_in_frames > 0 &&
+ cm->width <= 1280 && cm->height <= 720) {
+ sf->use_altref_onepass = 1;
+ sf->use_compound_nonrd_pickmode = 1;
+ }
}
if (speed >= 6) {
@@ -656,6 +669,21 @@ static void set_rt_speed_feature_framesize_independent(
sf->limit_newmv_early_exit = 0;
sf->use_simple_block_yrd = 1;
}
+
+ if (speed >= 9) {
+ sf->mv.enable_adaptive_subpel_force_stop = 1;
+ sf->mv.adapt_subpel_force_stop.mv_thresh = 2;
+ if (cpi->rc.avg_frame_low_motion < 40)
+ sf->mv.adapt_subpel_force_stop.mv_thresh = 1;
+ sf->mv.adapt_subpel_force_stop.force_stop_below = 1;
+ sf->mv.adapt_subpel_force_stop.force_stop_above = 2;
+ // Disable partition blocks below 16x16, except for low-resolutions.
+ if (cm->frame_type != KEY_FRAME && cm->width >= 320 && cm->height >= 240)
+ sf->disable_16x16part_nonkey = 1;
+ // Allow for disabling GOLDEN reference, for CBR mode.
+ if (cpi->oxcf.rc_mode == VPX_CBR) sf->disable_golden_ref = 1;
+ }
+
if (sf->use_altref_onepass) {
if (cpi->rc.is_src_frame_alt_ref && cm->frame_type != KEY_FRAME) {
sf->partition_search_type = FIXED_PARTITION;
@@ -812,6 +840,7 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
// Some speed-up features even for best quality as minimal impact on quality.
sf->adaptive_rd_thresh = 1;
sf->tx_size_search_breakout = 1;
+ sf->tx_size_search_depth = 2;
sf->exhaustive_searches_thresh =
(cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ? (1 << 20)
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.h
index 50d52bc23a4..251cfdbcdf1 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.h
@@ -161,6 +161,17 @@ typedef enum {
ONE_LOOP_REDUCED = 1
} FAST_COEFF_UPDATE;
+typedef struct ADAPT_SUBPEL_FORCE_STOP {
+ // Threshold for full pixel motion vector;
+ int mv_thresh;
+
+ // subpel_force_stop if full pixel MV is below the threshold.
+ int force_stop_below;
+
+ // subpel_force_stop if full pixel MV is equal to or above the threshold.
+ int force_stop_above;
+} ADAPT_SUBPEL_FORCE_STOP;
+
typedef struct MV_SPEED_FEATURES {
// Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc).
SEARCH_METHODS search_method;
@@ -189,6 +200,11 @@ typedef struct MV_SPEED_FEATURES {
// 3: Stop at full pixel.
int subpel_force_stop;
+ // If it's enabled, different subpel_force_stop will be used for different MV.
+ int enable_adaptive_subpel_force_stop;
+
+ ADAPT_SUBPEL_FORCE_STOP adapt_subpel_force_stop;
+
// This variable sets the step_param used in full pel motion search.
int fullpel_search_step_param;
} MV_SPEED_FEATURES;
@@ -272,6 +288,9 @@ typedef struct SPEED_FEATURES {
// for intra and model coefs for the rest.
TX_SIZE_SEARCH_METHOD tx_size_search_method;
+ // How many levels of tx size to search, starting from the largest.
+ int tx_size_search_depth;
+
// Low precision 32x32 fdct keeps everything in 16 bits and thus is less
// precise but significantly faster than the non lp version.
int use_lp32x32fdct;
@@ -508,6 +527,16 @@ typedef struct SPEED_FEATURES {
// For SVC: enables use of partition from lower spatial resolution.
int svc_use_lowres_part;
+
+ // Enable re-encoding on scene change with potential high overshoot,
+ // for real-time encoding flow.
+ int re_encode_overshoot_rt;
+
+ // Disable partitioning of 16x16 blocks.
+ int disable_16x16part_nonkey;
+
+ // Allow for disabling golden reference.
+ int disable_golden_ref;
} SPEED_FEATURES;
struct VP9_COMP;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.c
index 4dfdc65b727..fec0fa8930d 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.c
@@ -41,17 +41,21 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
svc->disable_inter_layer_pred = INTER_LAYER_PRED_ON;
svc->framedrop_mode = CONSTRAINED_LAYER_DROP;
- for (i = 0; i < REF_FRAMES; ++i) svc->ref_frame_index[i] = -1;
+ for (i = 0; i < REF_FRAMES; ++i) {
+ svc->fb_idx_spatial_layer_id[i] = -1;
+ svc->fb_idx_temporal_layer_id[i] = -1;
+ }
for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
svc->last_layer_dropped[sl] = 0;
svc->drop_spatial_layer[sl] = 0;
svc->ext_frame_flags[sl] = 0;
- svc->ext_lst_fb_idx[sl] = 0;
- svc->ext_gld_fb_idx[sl] = 1;
- svc->ext_alt_fb_idx[sl] = 2;
+ svc->lst_fb_idx[sl] = 0;
+ svc->gld_fb_idx[sl] = 1;
+ svc->alt_fb_idx[sl] = 2;
svc->downsample_filter_type[sl] = BILINEAR;
svc->downsample_filter_phase[sl] = 8; // Set to 8 for averaging filter.
svc->framedrop_thresh[sl] = oxcf->drop_frames_water_mark;
+ svc->fb_idx_upd_tl0[sl] = -1;
}
if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) {
@@ -311,7 +315,7 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) {
// Reset the frames_since_key and frames_to_key counters to their values
// before the layer restore. Keep these defined for the stream (not layer).
if (cpi->svc.number_temporal_layers > 1 ||
- (cpi->svc.number_spatial_layers > 1 && !is_two_pass_svc(cpi))) {
+ cpi->svc.number_spatial_layers > 1) {
cpi->rc.frames_since_key = old_frame_since_key;
cpi->rc.frames_to_key = old_frame_to_key;
}
@@ -389,15 +393,6 @@ void vp9_inc_frame_in_layer(VP9_COMP *const cpi) {
++cpi->svc.current_superframe;
}
-int vp9_is_upper_layer_key_frame(const VP9_COMP *const cpi) {
- return is_two_pass_svc(cpi) && cpi->svc.spatial_layer_id > 0 &&
- cpi->svc
- .layer_context[cpi->svc.spatial_layer_id *
- cpi->svc.number_temporal_layers +
- cpi->svc.temporal_layer_id]
- .is_key_frame;
-}
-
void get_layer_resolution(const int width_org, const int height_org,
const int num, const int den, int *width_out,
int *height_out) {
@@ -416,6 +411,40 @@ void get_layer_resolution(const int width_org, const int height_org,
*height_out = h;
}
+void reset_fb_idx_unused(VP9_COMP *const cpi) {
+ // If a reference frame is not referenced or refreshed, then set the
+ // fb_idx for that reference to the first one used/referenced.
+ // This is to avoid setting fb_idx for a reference to a slot that is not
+ // used/needed (i.e., since that reference is not referenced or refreshed).
+ static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
+ VP9_ALT_FLAG };
+ MV_REFERENCE_FRAME ref_frame;
+ MV_REFERENCE_FRAME first_ref = 0;
+ int first_fb_idx = 0;
+ int fb_idx[3] = { cpi->lst_fb_idx, cpi->gld_fb_idx, cpi->alt_fb_idx };
+ for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
+ if (cpi->ref_frame_flags & flag_list[ref_frame]) {
+ first_ref = ref_frame;
+ first_fb_idx = fb_idx[ref_frame - 1];
+ break;
+ }
+ }
+ if (first_ref > 0) {
+ if (first_ref != LAST_FRAME &&
+ !(cpi->ref_frame_flags & flag_list[LAST_FRAME]) &&
+ !cpi->ext_refresh_last_frame)
+ cpi->lst_fb_idx = first_fb_idx;
+ else if (first_ref != GOLDEN_FRAME &&
+ !(cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) &&
+ !cpi->ext_refresh_golden_frame)
+ cpi->gld_fb_idx = first_fb_idx;
+ else if (first_ref != ALTREF_FRAME &&
+ !(cpi->ref_frame_flags & flag_list[ALTREF_FRAME]) &&
+ !cpi->ext_refresh_alt_ref_frame)
+ cpi->alt_fb_idx = first_fb_idx;
+ }
+}
+
// The function sets proper ref_frame_flags, buffer indices, and buffer update
// variables for temporal layering mode 3 - that does 0-2-1-2 temporal layering
// scheme.
@@ -519,6 +548,8 @@ static void set_flags_and_fb_idx_for_temporal_mode3(VP9_COMP *const cpi) {
cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1;
cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id;
}
+
+ reset_fb_idx_unused(cpi);
}
// The function sets proper ref_frame_flags, buffer indices, and buffer update
@@ -578,6 +609,8 @@ static void set_flags_and_fb_idx_for_temporal_mode2(VP9_COMP *const cpi) {
cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1;
cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id;
}
+
+ reset_fb_idx_unused(cpi);
}
// The function sets proper ref_frame_flags, buffer indices, and buffer update
@@ -610,6 +643,28 @@ static void set_flags_and_fb_idx_for_temporal_mode_noLayering(
} else {
cpi->gld_fb_idx = 0;
}
+
+ reset_fb_idx_unused(cpi);
+}
+
+void vp9_copy_flags_ref_update_idx(VP9_COMP *const cpi) {
+ SVC *const svc = &cpi->svc;
+ static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
+ VP9_ALT_FLAG };
+ int sl = svc->spatial_layer_id;
+ svc->lst_fb_idx[sl] = cpi->lst_fb_idx;
+ svc->gld_fb_idx[sl] = cpi->gld_fb_idx;
+ svc->alt_fb_idx[sl] = cpi->alt_fb_idx;
+
+ svc->update_last[sl] = (uint8_t)cpi->refresh_last_frame;
+ svc->update_golden[sl] = (uint8_t)cpi->refresh_golden_frame;
+ svc->update_altref[sl] = (uint8_t)cpi->refresh_alt_ref_frame;
+ svc->reference_last[sl] =
+ (uint8_t)(cpi->ref_frame_flags & flag_list[LAST_FRAME]);
+ svc->reference_golden[sl] =
+ (uint8_t)(cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]);
+ svc->reference_altref[sl] =
+ (uint8_t)(cpi->ref_frame_flags & flag_list[ALTREF_FRAME]);
}
int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
@@ -646,18 +701,30 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode;
sl = cpi->svc.spatial_layer_id;
vp9_apply_encoding_flags(cpi, cpi->svc.ext_frame_flags[sl]);
- cpi->lst_fb_idx = cpi->svc.ext_lst_fb_idx[sl];
- cpi->gld_fb_idx = cpi->svc.ext_gld_fb_idx[sl];
- cpi->alt_fb_idx = cpi->svc.ext_alt_fb_idx[sl];
+ cpi->lst_fb_idx = cpi->svc.lst_fb_idx[sl];
+ cpi->gld_fb_idx = cpi->svc.gld_fb_idx[sl];
+ cpi->alt_fb_idx = cpi->svc.alt_fb_idx[sl];
}
}
// Reset the drop flags for all spatial layers, on the base layer.
if (cpi->svc.spatial_layer_id == 0) {
- int i;
- for (i = 0; i < cpi->svc.number_spatial_layers; i++) {
- cpi->svc.drop_spatial_layer[i] = 0;
+ vp9_zero(cpi->svc.drop_spatial_layer);
+ // TODO(jianj/marpan): Investigate why setting cpi->svc.lst/gld/alt_fb_idx
+ // causes an issue with frame dropping and temporal layers, when the frame
+ // flags are passed via the encode call (bypass mode). Issue is that we're
+ // resetting ext_refresh_frame_flags_pending to 0 on frame drops.
+ if (cpi->svc.temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
+ memset(&cpi->svc.lst_fb_idx, -1, sizeof(cpi->svc.lst_fb_idx));
+ memset(&cpi->svc.gld_fb_idx, -1, sizeof(cpi->svc.lst_fb_idx));
+ memset(&cpi->svc.alt_fb_idx, -1, sizeof(cpi->svc.lst_fb_idx));
}
+ vp9_zero(cpi->svc.update_last);
+ vp9_zero(cpi->svc.update_golden);
+ vp9_zero(cpi->svc.update_altref);
+ vp9_zero(cpi->svc.reference_last);
+ vp9_zero(cpi->svc.reference_golden);
+ vp9_zero(cpi->svc.reference_altref);
}
lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id *
@@ -721,6 +788,19 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
cpi->svc.non_reference_frame = 1;
}
+ if (cpi->svc.spatial_layer_id == 0) cpi->svc.high_source_sad_superframe = 0;
+
+ if (cpi->svc.temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS &&
+ cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] &&
+ cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] != -1 &&
+ !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) {
+ // For fixed/non-flexible mode, if the previous frame (same spatial layer
+ // from previous superframe) was dropped, make sure the lst_fb_idx
+ // for this frame corresponds to the buffer index updated on (last) encoded
+ // TL0 frame (with same spatial layer).
+ cpi->lst_fb_idx = cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id];
+ }
+
if (vp9_set_size_literal(cpi, width, height) != 0)
return VPX_CODEC_INVALID_PARAM;
@@ -806,3 +886,106 @@ void vp9_svc_check_reset_layer_rc_flag(VP9_COMP *const cpi) {
}
}
}
+
+void vp9_svc_constrain_inter_layer_pred(VP9_COMP *const cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ // Check for disabling inter-layer (spatial) prediction, if
+ // svc.disable_inter_layer_pred is set. If the previous spatial layer was
+ // dropped then disable the prediction from this (scaled) reference.
+ if ((cpi->svc.disable_inter_layer_pred == INTER_LAYER_PRED_OFF_NONKEY &&
+ !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) ||
+ cpi->svc.disable_inter_layer_pred == INTER_LAYER_PRED_OFF ||
+ cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id - 1]) {
+ MV_REFERENCE_FRAME ref_frame;
+ static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
+ VP9_ALT_FLAG };
+ for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+ const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
+ if (yv12 != NULL && (cpi->ref_frame_flags & flag_list[ref_frame])) {
+ const struct scale_factors *const scale_fac =
+ &cm->frame_refs[ref_frame - 1].sf;
+ if (vp9_is_scaled(scale_fac))
+ cpi->ref_frame_flags &= (~flag_list[ref_frame]);
+ }
+ }
+ }
+ // Check for disabling inter-layer prediction if the reference for inter-layer
+ // prediction (the reference that is scaled) is not the previous spatial layer
+ // from the same superframe, then we disable inter-layer prediction.
+ // Only need to check when inter_layer prediction is not set to OFF mode.
+ if (cpi->svc.disable_inter_layer_pred != INTER_LAYER_PRED_OFF) {
+ // We only use LAST and GOLDEN for prediction in real-time mode, so we
+ // check both here.
+ MV_REFERENCE_FRAME ref_frame;
+ for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ref_frame++) {
+ struct scale_factors *scale_fac = &cm->frame_refs[ref_frame - 1].sf;
+ if (vp9_is_scaled(scale_fac)) {
+ // If this reference was updated on the previous spatial layer of the
+ // current superframe, then we keep this reference (don't disable).
+ // Otherwise we disable the inter-layer prediction.
+ // This condition is verified by checking if the current frame buffer
+ // index is equal to any of the slots for the previous spatial layer,
+ // and if so, check if that slot was updated/refreshed. If that is the
+ // case, then this reference is valid for inter-layer prediction under
+ // the mode INTER_LAYER_PRED_ON_CONSTRAINED.
+ int fb_idx =
+ ref_frame == LAST_FRAME ? cpi->lst_fb_idx : cpi->gld_fb_idx;
+ int ref_flag = ref_frame == LAST_FRAME ? VP9_LAST_FLAG : VP9_GOLD_FLAG;
+ int sl = cpi->svc.spatial_layer_id;
+ int disable = 1;
+ if ((fb_idx == cpi->svc.lst_fb_idx[sl - 1] &&
+ cpi->svc.update_last[sl - 1]) ||
+ (fb_idx == cpi->svc.gld_fb_idx[sl - 1] &&
+ cpi->svc.update_golden[sl - 1]) ||
+ (fb_idx == cpi->svc.alt_fb_idx[sl - 1] &&
+ cpi->svc.update_altref[sl - 1]))
+ disable = 0;
+ if (disable) cpi->ref_frame_flags &= (~ref_flag);
+ }
+ }
+ }
+}
+
+void vp9_svc_assert_constraints_pattern(VP9_COMP *const cpi) {
+ SVC *const svc = &cpi->svc;
+ // For fixed/non-flexible mode, and with CONSTRAINED frame drop
+ // mode (default), the folllowing constraint are expected, when
+ // inter-layer prediciton is on (default).
+ if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS &&
+ svc->disable_inter_layer_pred == INTER_LAYER_PRED_ON &&
+ svc->framedrop_mode == CONSTRAINED_LAYER_DROP) {
+ if (!cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) {
+ // On non-key frames: LAST is always temporal reference, GOLDEN is
+ // spatial reference.
+ if (svc->temporal_layer_id == 0)
+ // Base temporal only predicts from base temporal.
+ assert(svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] == 0);
+ else
+ // Non-base temporal only predicts from lower temporal layer.
+ assert(svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] <
+ svc->temporal_layer_id);
+ if (svc->spatial_layer_id > 0) {
+ // Non-base spatial only predicts from lower spatial layer with same
+ // temporal_id.
+ assert(svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] ==
+ svc->spatial_layer_id - 1);
+ assert(svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] ==
+ svc->temporal_layer_id);
+ }
+ } else if (svc->spatial_layer_id > 0) {
+ // Only 1 reference for frame whose base is key; reference may be LAST
+ // or GOLDEN, so we check both.
+ if (cpi->ref_frame_flags & VP9_LAST_FLAG) {
+ assert(svc->fb_idx_spatial_layer_id[cpi->lst_fb_idx] ==
+ svc->spatial_layer_id - 1);
+ assert(svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] ==
+ svc->temporal_layer_id);
+ } else if (cpi->ref_frame_flags & VP9_GOLD_FLAG) {
+ assert(svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] ==
+ svc->spatial_layer_id - 1);
+ assert(svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] ==
+ svc->temporal_layer_id);
+ }
+ }
+ }
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.h
index a7fa26924f3..367c93a2f60 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.h
@@ -20,9 +20,16 @@ extern "C" {
#endif
typedef enum {
+ // Inter-layer prediction is on on all frames.
INTER_LAYER_PRED_ON,
+ // Inter-layer prediction is off on all frames.
INTER_LAYER_PRED_OFF,
- INTER_LAYER_PRED_OFF_NONKEY
+ // Inter-layer prediction is off on non-key frames.
+ INTER_LAYER_PRED_OFF_NONKEY,
+ // Inter-layer prediction is on on all frames, but constrained such
+ // that any layer S (> 0) can only predict from previous spatial
+ // layer S-1, from the same superframe.
+ INTER_LAYER_PRED_ON_CONSTRAINED
} INTER_LAYER_PRED;
typedef struct {
@@ -86,10 +93,9 @@ typedef struct SVC {
// Frame flags and buffer indexes for each spatial layer, set by the
// application (external settings).
int ext_frame_flags[VPX_MAX_LAYERS];
- int ext_lst_fb_idx[VPX_MAX_LAYERS];
- int ext_gld_fb_idx[VPX_MAX_LAYERS];
- int ext_alt_fb_idx[VPX_MAX_LAYERS];
- int ref_frame_index[REF_FRAMES];
+ int lst_fb_idx[VPX_MAX_LAYERS];
+ int gld_fb_idx[VPX_MAX_LAYERS];
+ int alt_fb_idx[VPX_MAX_LAYERS];
int force_zero_mode_spatial_ref;
int current_superframe;
int non_reference_frame;
@@ -118,6 +124,28 @@ typedef struct SVC {
SVC_LAYER_DROP_MODE framedrop_mode;
INTER_LAYER_PRED disable_inter_layer_pred;
+
+ // Flag to indicate scene change at current superframe, scene detection is
+ // currently checked for each superframe prior to encoding, on the full
+ // resolution source.
+ int high_source_sad_superframe;
+
+ // Flags used to get SVC pattern info.
+ uint8_t update_last[VPX_SS_MAX_LAYERS];
+ uint8_t update_golden[VPX_SS_MAX_LAYERS];
+ uint8_t update_altref[VPX_SS_MAX_LAYERS];
+ uint8_t reference_last[VPX_SS_MAX_LAYERS];
+ uint8_t reference_golden[VPX_SS_MAX_LAYERS];
+ uint8_t reference_altref[VPX_SS_MAX_LAYERS];
+
+ // Keep track of the frame buffer index updated/refreshed on the base
+ // temporal superframe.
+ int fb_idx_upd_tl0[VPX_SS_MAX_LAYERS];
+
+ // Keep track of the spatial and temporal layer id of the frame that last
+ // updated the frame buffer index.
+ uint8_t fb_idx_spatial_layer_id[REF_FRAMES];
+ uint8_t fb_idx_temporal_layer_id[REF_FRAMES];
} SVC;
struct VP9_COMP;
@@ -165,6 +193,8 @@ struct lookahead_entry *vp9_svc_lookahead_pop(struct VP9_COMP *const cpi,
// Start a frame and initialize svc parameters
int vp9_svc_start_frame(struct VP9_COMP *const cpi);
+void vp9_copy_flags_ref_update_idx(struct VP9_COMP *const cpi);
+
int vp9_one_pass_cbr_svc_start_layer(struct VP9_COMP *const cpi);
void vp9_free_svc_cyclic_refresh(struct VP9_COMP *const cpi);
@@ -173,6 +203,10 @@ void vp9_svc_reset_key_frame(struct VP9_COMP *const cpi);
void vp9_svc_check_reset_layer_rc_flag(struct VP9_COMP *const cpi);
+void vp9_svc_constrain_inter_layer_pred(struct VP9_COMP *const cpi);
+
+void vp9_svc_assert_constraints_pattern(struct VP9_COMP *const cpi);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c
index c84e9fc1a2e..d6c6ece9168 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c
@@ -248,7 +248,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
RANGE_CHECK(extra_cfg, row_mt, 0, 1);
RANGE_CHECK(extra_cfg, motion_vector_unit_test, 0, 2);
RANGE_CHECK(extra_cfg, enable_auto_alt_ref, 0, 2);
- RANGE_CHECK(extra_cfg, cpu_used, -8, 8);
+ RANGE_CHECK(extra_cfg, cpu_used, -9, 9);
RANGE_CHECK_HI(extra_cfg, noise_sensitivity, 6);
RANGE_CHECK(extra_cfg, tile_columns, 0, 6);
RANGE_CHECK(extra_cfg, tile_rows, 0, 2);
@@ -1074,23 +1074,11 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
if (cpi->oxcf.pass == 2 && cpi->level_constraint.level_index >= 0 &&
!cpi->level_constraint.rc_config_updated) {
- SVC *const svc = &cpi->svc;
- const int is_two_pass_svc =
- (svc->number_spatial_layers > 1) || (svc->number_temporal_layers > 1);
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
TWO_PASS *const twopass = &cpi->twopass;
FIRSTPASS_STATS *stats = &twopass->total_stats;
- if (is_two_pass_svc) {
- const double frame_rate = 10000000.0 * stats->count / stats->duration;
- vp9_update_spatial_layer_framerate(cpi, frame_rate);
- twopass->bits_left =
- (int64_t)(stats->duration *
- svc->layer_context[svc->spatial_layer_id].target_bandwidth /
- 10000000.0);
- } else {
- twopass->bits_left =
- (int64_t)(stats->duration * oxcf->target_bandwidth / 10000000.0);
- }
+ twopass->bits_left =
+ (int64_t)(stats->duration * oxcf->target_bandwidth / 10000000.0);
cpi->level_constraint.rc_config_updated = 1;
}
@@ -1460,9 +1448,6 @@ static vpx_codec_err_t ctrl_set_svc_layer_id(vpx_codec_alg_priv_t *ctx,
svc->first_spatial_layer_to_encode >= (int)ctx->cfg.ss_number_layers) {
return VPX_CODEC_INVALID_PARAM;
}
- // First spatial layer to encode not implemented for two-pass.
- if (is_two_pass_svc(cpi) && svc->first_spatial_layer_to_encode > 0)
- return VPX_CODEC_INVALID_PARAM;
return VPX_CODEC_OK;
}
@@ -1502,6 +1487,25 @@ static vpx_codec_err_t ctrl_set_svc_parameters(vpx_codec_alg_priv_t *ctx,
return VPX_CODEC_OK;
}
+static vpx_codec_err_t ctrl_get_svc_ref_frame_config(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ VP9_COMP *const cpi = ctx->cpi;
+ vpx_svc_ref_frame_config_t *data = va_arg(args, vpx_svc_ref_frame_config_t *);
+ int sl;
+ for (sl = 0; sl <= cpi->svc.spatial_layer_id; sl++) {
+ data->update_last[sl] = cpi->svc.update_last[sl];
+ data->update_golden[sl] = cpi->svc.update_golden[sl];
+ data->update_alt_ref[sl] = cpi->svc.update_altref[sl];
+ data->reference_last[sl] = cpi->svc.reference_last[sl];
+ data->reference_golden[sl] = cpi->svc.reference_golden[sl];
+ data->reference_alt_ref[sl] = cpi->svc.reference_altref[sl];
+ data->lst_fb_idx[sl] = cpi->svc.lst_fb_idx[sl];
+ data->gld_fb_idx[sl] = cpi->svc.gld_fb_idx[sl];
+ data->alt_fb_idx[sl] = cpi->svc.alt_fb_idx[sl];
+ }
+ return VPX_CODEC_OK;
+}
+
static vpx_codec_err_t ctrl_set_svc_ref_frame_config(vpx_codec_alg_priv_t *ctx,
va_list args) {
VP9_COMP *const cpi = ctx->cpi;
@@ -1509,9 +1513,9 @@ static vpx_codec_err_t ctrl_set_svc_ref_frame_config(vpx_codec_alg_priv_t *ctx,
int sl;
for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) {
cpi->svc.ext_frame_flags[sl] = data->frame_flags[sl];
- cpi->svc.ext_lst_fb_idx[sl] = data->lst_fb_idx[sl];
- cpi->svc.ext_gld_fb_idx[sl] = data->gld_fb_idx[sl];
- cpi->svc.ext_alt_fb_idx[sl] = data->alt_fb_idx[sl];
+ cpi->svc.lst_fb_idx[sl] = data->lst_fb_idx[sl];
+ cpi->svc.gld_fb_idx[sl] = data->gld_fb_idx[sl];
+ cpi->svc.alt_fb_idx[sl] = data->alt_fb_idx[sl];
}
return VPX_CODEC_OK;
}
@@ -1628,6 +1632,7 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
{ VP9E_GET_SVC_LAYER_ID, ctrl_get_svc_layer_id },
{ VP9E_GET_ACTIVEMAP, ctrl_get_active_map },
{ VP9E_GET_LEVEL, ctrl_get_level },
+ { VP9E_GET_SVC_REF_FRAME_CONFIG, ctrl_get_svc_ref_frame_config },
{ -1, NULL },
};
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx/src/vpx_image.c b/chromium/third_party/libvpx/source/libvpx/vpx/src/vpx_image.c
index af7c529a7ba..a7c6ec0ceab 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx/src/vpx_image.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx/src/vpx_image.c
@@ -38,23 +38,8 @@ static vpx_image_t *img_alloc_helper(vpx_image_t *img, vpx_img_fmt_t fmt,
/* Get sample size for this format */
switch (fmt) {
- case VPX_IMG_FMT_RGB32:
- case VPX_IMG_FMT_RGB32_LE:
- case VPX_IMG_FMT_ARGB:
- case VPX_IMG_FMT_ARGB_LE: bps = 32; break;
- case VPX_IMG_FMT_RGB24:
- case VPX_IMG_FMT_BGR24: bps = 24; break;
- case VPX_IMG_FMT_RGB565:
- case VPX_IMG_FMT_RGB565_LE:
- case VPX_IMG_FMT_RGB555:
- case VPX_IMG_FMT_RGB555_LE:
- case VPX_IMG_FMT_UYVY:
- case VPX_IMG_FMT_YUY2:
- case VPX_IMG_FMT_YVYU: bps = 16; break;
case VPX_IMG_FMT_I420:
- case VPX_IMG_FMT_YV12:
- case VPX_IMG_FMT_VPXI420:
- case VPX_IMG_FMT_VPXYV12: bps = 12; break;
+ case VPX_IMG_FMT_YV12: bps = 12; break;
case VPX_IMG_FMT_I422:
case VPX_IMG_FMT_I440: bps = 16; break;
case VPX_IMG_FMT_I444: bps = 24; break;
@@ -69,8 +54,6 @@ static vpx_image_t *img_alloc_helper(vpx_image_t *img, vpx_img_fmt_t fmt,
switch (fmt) {
case VPX_IMG_FMT_I420:
case VPX_IMG_FMT_YV12:
- case VPX_IMG_FMT_VPXI420:
- case VPX_IMG_FMT_VPXYV12:
case VPX_IMG_FMT_I422:
case VPX_IMG_FMT_I42016:
case VPX_IMG_FMT_I42216: xcs = 1; break;
@@ -81,8 +64,6 @@ static vpx_image_t *img_alloc_helper(vpx_image_t *img, vpx_img_fmt_t fmt,
case VPX_IMG_FMT_I420:
case VPX_IMG_FMT_I440:
case VPX_IMG_FMT_YV12:
- case VPX_IMG_FMT_VPXI420:
- case VPX_IMG_FMT_VPXYV12:
case VPX_IMG_FMT_I42016:
case VPX_IMG_FMT_I44016: ycs = 1; break;
default: ycs = 0; break;
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx/vp8cx.h b/chromium/third_party/libvpx/source/libvpx/vpx/vp8cx.h
index f409844b590..b201d96f4fa 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx/vp8cx.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx/vp8cx.h
@@ -620,6 +620,13 @@ enum vp8e_enc_control_id {
* Supported in codecs: VP9
*/
VP9E_SET_SVC_FRAME_DROP_LAYER,
+
+ /*!\brief Codec control function to get the refresh and reference flags and
+ * the buffer indices, up to the last encoded spatial layer.
+ *
+ * Supported in codecs: VP9
+ */
+ VP9E_GET_SVC_REF_FRAME_CONFIG,
};
/*!\brief vpx 1-D scaling mode
@@ -757,10 +764,18 @@ typedef struct vpx_svc_layer_id {
*
*/
typedef struct vpx_svc_ref_frame_config {
- int frame_flags[VPX_TS_MAX_LAYERS]; /**< Frame flags. */
- int lst_fb_idx[VPX_TS_MAX_LAYERS]; /**< Last buffer index. */
- int gld_fb_idx[VPX_TS_MAX_LAYERS]; /**< Golden buffer index. */
- int alt_fb_idx[VPX_TS_MAX_LAYERS]; /**< Altref buffer index. */
+ // TODO(jianj/marpan): Remove the usage of frame_flags, instead use the
+ // update and reference flags.
+ int frame_flags[VPX_SS_MAX_LAYERS]; /**< Frame flags. */
+ int lst_fb_idx[VPX_SS_MAX_LAYERS]; /**< Last buffer index. */
+ int gld_fb_idx[VPX_SS_MAX_LAYERS]; /**< Golden buffer index. */
+ int alt_fb_idx[VPX_SS_MAX_LAYERS]; /**< Altref buffer index. */
+ int update_last[VPX_SS_MAX_LAYERS]; /**< Update last. */
+ int update_golden[VPX_SS_MAX_LAYERS]; /**< Update golden. */
+ int update_alt_ref[VPX_SS_MAX_LAYERS]; /**< Update altref. */
+ int reference_last[VPX_SS_MAX_LAYERS]; /**< Last as eference. */
+ int reference_golden[VPX_SS_MAX_LAYERS]; /**< Golden as reference. */
+ int reference_alt_ref[VPX_SS_MAX_LAYERS]; /**< Altref as reference. */
} vpx_svc_ref_frame_config_t;
/*!\brief VP9 svc frame dropping mode.
@@ -927,6 +942,9 @@ VPX_CTRL_USE_TYPE(VP9E_SET_SVC_INTER_LAYER_PRED, unsigned int)
VPX_CTRL_USE_TYPE(VP9E_SET_SVC_FRAME_DROP_LAYER, vpx_svc_frame_drop_t *)
#define VPX_CTRL_VP9E_SET_SVC_FRAME_DROP_LAYER
+VPX_CTRL_USE_TYPE(VP9E_GET_SVC_REF_FRAME_CONFIG, vpx_svc_ref_frame_config_t *)
+#define VPX_CTRL_VP9E_GET_SVC_REF_FRAME_CONFIG
+
/*!\endcond */
/*! @} - end defgroup vp8_encoder */
#ifdef __cplusplus
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx/vpx_encoder.h b/chromium/third_party/libvpx/source/libvpx/vpx/vpx_encoder.h
index 4017e5719a5..8c08017b6ee 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx/vpx_encoder.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx/vpx_encoder.h
@@ -63,7 +63,7 @@ extern "C" {
* fields to structures
*/
#define VPX_ENCODER_ABI_VERSION \
- (11 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/
+ (12 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/
/*! \brief Encoder capabilities bitfield
*
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx/vpx_image.h b/chromium/third_party/libvpx/source/libvpx/vpx/vpx_image.h
index d6d3166d2ff..0c9cac73678 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx/vpx_image.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx/vpx_image.h
@@ -27,7 +27,7 @@ extern "C" {
* types, removing or reassigning enums, adding/removing/rearranging
* fields to structures
*/
-#define VPX_IMAGE_ABI_VERSION (4) /**<\hideinitializer*/
+#define VPX_IMAGE_ABI_VERSION (5) /**<\hideinitializer*/
#define VPX_IMG_FMT_PLANAR 0x100 /**< Image is a planar format. */
#define VPX_IMG_FMT_UV_FLIP 0x200 /**< V plane precedes U in memory. */
@@ -37,29 +37,12 @@ extern "C" {
/*!\brief List of supported image formats */
typedef enum vpx_img_fmt {
VPX_IMG_FMT_NONE,
- VPX_IMG_FMT_RGB24, /**< 24 bit per pixel packed RGB */
- VPX_IMG_FMT_RGB32, /**< 32 bit per pixel packed 0RGB */
- VPX_IMG_FMT_RGB565, /**< 16 bit per pixel, 565 */
- VPX_IMG_FMT_RGB555, /**< 16 bit per pixel, 555 */
- VPX_IMG_FMT_UYVY, /**< UYVY packed YUV */
- VPX_IMG_FMT_YUY2, /**< YUYV packed YUV */
- VPX_IMG_FMT_YVYU, /**< YVYU packed YUV */
- VPX_IMG_FMT_BGR24, /**< 24 bit per pixel packed BGR */
- VPX_IMG_FMT_RGB32_LE, /**< 32 bit packed BGR0 */
- VPX_IMG_FMT_ARGB, /**< 32 bit packed ARGB, alpha=255 */
- VPX_IMG_FMT_ARGB_LE, /**< 32 bit packed BGRA, alpha=255 */
- VPX_IMG_FMT_RGB565_LE, /**< 16 bit per pixel, gggbbbbb rrrrrggg */
- VPX_IMG_FMT_RGB555_LE, /**< 16 bit per pixel, gggbbbbb 0rrrrrgg */
VPX_IMG_FMT_YV12 =
VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_UV_FLIP | 1, /**< planar YVU */
VPX_IMG_FMT_I420 = VPX_IMG_FMT_PLANAR | 2,
- VPX_IMG_FMT_VPXYV12 = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_UV_FLIP |
- 3, /** < planar 4:2:0 format with vpx color space */
- VPX_IMG_FMT_VPXI420 = VPX_IMG_FMT_PLANAR | 4,
VPX_IMG_FMT_I422 = VPX_IMG_FMT_PLANAR | 5,
VPX_IMG_FMT_I444 = VPX_IMG_FMT_PLANAR | 6,
VPX_IMG_FMT_I440 = VPX_IMG_FMT_PLANAR | 7,
- VPX_IMG_FMT_444A = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_HAS_ALPHA | 6,
VPX_IMG_FMT_I42016 = VPX_IMG_FMT_I420 | VPX_IMG_FMT_HIGHBITDEPTH,
VPX_IMG_FMT_I42216 = VPX_IMG_FMT_I422 | VPX_IMG_FMT_HIGHBITDEPTH,
VPX_IMG_FMT_I44416 = VPX_IMG_FMT_I444 | VPX_IMG_FMT_HIGHBITDEPTH,
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/avg_pred_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/avg_pred_neon.c
index 1370ec2d2ea..5afdece0aba 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/avg_pred_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/avg_pred_neon.c
@@ -17,8 +17,8 @@
void vpx_comp_avg_pred_neon(uint8_t *comp, const uint8_t *pred, int width,
int height, const uint8_t *ref, int ref_stride) {
if (width > 8) {
- int x, y;
- for (y = 0; y < height; ++y) {
+ int x, y = height;
+ do {
for (x = 0; x < width; x += 16) {
const uint8x16_t p = vld1q_u8(pred + x);
const uint8x16_t r = vld1q_u8(ref + x);
@@ -28,28 +28,38 @@ void vpx_comp_avg_pred_neon(uint8_t *comp, const uint8_t *pred, int width,
comp += width;
pred += width;
ref += ref_stride;
- }
+ } while (--y);
+ } else if (width == 8) {
+ int i = width * height;
+ do {
+ const uint8x16_t p = vld1q_u8(pred);
+ uint8x16_t r;
+ const uint8x8_t r_0 = vld1_u8(ref);
+ const uint8x8_t r_1 = vld1_u8(ref + ref_stride);
+ r = vcombine_u8(r_0, r_1);
+ ref += 2 * ref_stride;
+ r = vrhaddq_u8(r, p);
+ vst1q_u8(comp, r);
+
+ pred += 16;
+ comp += 16;
+ i -= 16;
+ } while (i);
} else {
- int i;
- for (i = 0; i < width * height; i += 16) {
+ int i = width * height;
+ assert(width == 4);
+ do {
const uint8x16_t p = vld1q_u8(pred);
uint8x16_t r;
- if (width == 4) {
- r = load_unaligned_u8q(ref, ref_stride);
- ref += 4 * ref_stride;
- } else {
- const uint8x8_t r_0 = vld1_u8(ref);
- const uint8x8_t r_1 = vld1_u8(ref + ref_stride);
- assert(width == 8);
- r = vcombine_u8(r_0, r_1);
- ref += 2 * ref_stride;
- }
+ r = load_unaligned_u8q(ref, ref_stride);
+ ref += 4 * ref_stride;
r = vrhaddq_u8(r, p);
vst1q_u8(comp, r);
pred += 16;
comp += 16;
- }
+ i -= 16;
+ } while (i);
}
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/mem_neon.h b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/mem_neon.h
index 12c0a54c899..6745464d738 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/mem_neon.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/mem_neon.h
@@ -101,9 +101,9 @@ static INLINE uint8x8_t load_unaligned_u8(const uint8_t *buf, int stride) {
if (stride == 4) return vld1_u8(buf);
memcpy(&a, buf, 4);
buf += stride;
- a_u32 = vld1_lane_u32(&a, a_u32, 0);
+ a_u32 = vset_lane_u32(a, a_u32, 0);
memcpy(&a, buf, 4);
- a_u32 = vld1_lane_u32(&a, a_u32, 1);
+ a_u32 = vset_lane_u32(a, a_u32, 1);
return vreinterpret_u8_u32(a_u32);
}
@@ -127,16 +127,16 @@ static INLINE uint8x16_t load_unaligned_u8q(const uint8_t *buf, int stride) {
if (stride == 4) return vld1q_u8(buf);
memcpy(&a, buf, 4);
buf += stride;
- a_u32 = vld1q_lane_u32(&a, a_u32, 0);
+ a_u32 = vsetq_lane_u32(a, a_u32, 0);
memcpy(&a, buf, 4);
buf += stride;
- a_u32 = vld1q_lane_u32(&a, a_u32, 1);
+ a_u32 = vsetq_lane_u32(a, a_u32, 1);
memcpy(&a, buf, 4);
buf += stride;
- a_u32 = vld1q_lane_u32(&a, a_u32, 2);
+ a_u32 = vsetq_lane_u32(a, a_u32, 2);
memcpy(&a, buf, 4);
buf += stride;
- a_u32 = vld1q_lane_u32(&a, a_u32, 3);
+ a_u32 = vsetq_lane_u32(a, a_u32, 3);
return vreinterpretq_u8_u32(a_u32);
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/sad4d_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/sad4d_neon.c
index b04de3aff26..535ec0f0d6d 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/sad4d_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/sad4d_neon.c
@@ -10,64 +10,152 @@
#include <arm_neon.h>
+#include <assert.h>
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "vpx/vpx_integer.h"
#include "vpx_dsp/arm/mem_neon.h"
#include "vpx_dsp/arm/sum_neon.h"
+static INLINE uint8x8_t load_unaligned_2_buffers(const void *const buf0,
+ const void *const buf1) {
+ uint32_t a;
+ uint32x2_t aa = vdup_n_u32(0);
+ memcpy(&a, buf0, 4);
+ aa = vset_lane_u32(a, aa, 0);
+ memcpy(&a, buf1, 4);
+ aa = vset_lane_u32(a, aa, 1);
+ return vreinterpret_u8_u32(aa);
+}
+
+static INLINE void sad4x_4d(const uint8_t *const src, const int src_stride,
+ const uint8_t *const ref[4], const int ref_stride,
+ const int height, uint32_t *const res) {
+ int i;
+ uint16x8_t abs[2] = { vdupq_n_u16(0), vdupq_n_u16(0) };
+ uint16x4_t a[2];
+ uint32x4_t r;
+
+ assert(!((intptr_t)src % sizeof(uint32_t)));
+ assert(!(src_stride % sizeof(uint32_t)));
+
+ for (i = 0; i < height; ++i) {
+ const uint8x8_t s = vreinterpret_u8_u32(
+ vld1_dup_u32((const uint32_t *)(src + i * src_stride)));
+ const uint8x8_t ref01 = load_unaligned_2_buffers(ref[0] + i * ref_stride,
+ ref[1] + i * ref_stride);
+ const uint8x8_t ref23 = load_unaligned_2_buffers(ref[2] + i * ref_stride,
+ ref[3] + i * ref_stride);
+ abs[0] = vabal_u8(abs[0], s, ref01);
+ abs[1] = vabal_u8(abs[1], s, ref23);
+ }
+
+ a[0] = vpadd_u16(vget_low_u16(abs[0]), vget_high_u16(abs[0]));
+ a[1] = vpadd_u16(vget_low_u16(abs[1]), vget_high_u16(abs[1]));
+ r = vpaddlq_u16(vcombine_u16(a[0], a[1]));
+ vst1q_u32(res, r);
+}
+
void vpx_sad4x4x4d_neon(const uint8_t *src, int src_stride,
const uint8_t *const ref[4], int ref_stride,
uint32_t *res) {
- int i;
- const uint8x16_t src_u8 = load_unaligned_u8q(src, src_stride);
- for (i = 0; i < 4; ++i) {
- const uint8x16_t ref_u8 = load_unaligned_u8q(ref[i], ref_stride);
- uint16x8_t abs = vabdl_u8(vget_low_u8(src_u8), vget_low_u8(ref_u8));
- abs = vabal_u8(abs, vget_high_u8(src_u8), vget_high_u8(ref_u8));
- res[i] = vget_lane_u32(horizontal_add_uint16x8(abs), 0);
- }
+ sad4x_4d(src, src_stride, ref, ref_stride, 4, res);
}
void vpx_sad4x8x4d_neon(const uint8_t *src, int src_stride,
const uint8_t *const ref[4], int ref_stride,
uint32_t *res) {
- int i;
- const uint8x16_t src_0 = load_unaligned_u8q(src, src_stride);
- const uint8x16_t src_1 = load_unaligned_u8q(src + 4 * src_stride, src_stride);
- for (i = 0; i < 4; ++i) {
- const uint8x16_t ref_0 = load_unaligned_u8q(ref[i], ref_stride);
- const uint8x16_t ref_1 =
- load_unaligned_u8q(ref[i] + 4 * ref_stride, ref_stride);
- uint16x8_t abs = vabdl_u8(vget_low_u8(src_0), vget_low_u8(ref_0));
- abs = vabal_u8(abs, vget_high_u8(src_0), vget_high_u8(ref_0));
- abs = vabal_u8(abs, vget_low_u8(src_1), vget_low_u8(ref_1));
- abs = vabal_u8(abs, vget_high_u8(src_1), vget_high_u8(ref_1));
- res[i] = vget_lane_u32(horizontal_add_uint16x8(abs), 0);
- }
+ sad4x_4d(src, src_stride, ref, ref_stride, 8, res);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+// Can handle 512 pixels' sad sum (such as 16x32 or 32x16)
+static INLINE void sad_512_pel_final_neon(const uint16x8_t *sum /*[4]*/,
+ uint32_t *const res) {
+ const uint16x4_t a0 = vadd_u16(vget_low_u16(sum[0]), vget_high_u16(sum[0]));
+ const uint16x4_t a1 = vadd_u16(vget_low_u16(sum[1]), vget_high_u16(sum[1]));
+ const uint16x4_t a2 = vadd_u16(vget_low_u16(sum[2]), vget_high_u16(sum[2]));
+ const uint16x4_t a3 = vadd_u16(vget_low_u16(sum[3]), vget_high_u16(sum[3]));
+ const uint16x4_t b0 = vpadd_u16(a0, a1);
+ const uint16x4_t b1 = vpadd_u16(a2, a3);
+ const uint32x4_t r = vpaddlq_u16(vcombine_u16(b0, b1));
+ vst1q_u32(res, r);
}
-static INLINE void sad8x_4d(const uint8_t *a, int a_stride,
- const uint8_t *const b[4], int b_stride,
- uint32_t *result, const int height) {
+// Can handle 1024 pixels' sad sum (such as 32x32)
+static INLINE void sad_1024_pel_final_neon(const uint16x8_t *sum /*[4]*/,
+ uint32_t *const res) {
+ const uint16x4_t a0 = vpadd_u16(vget_low_u16(sum[0]), vget_high_u16(sum[0]));
+ const uint16x4_t a1 = vpadd_u16(vget_low_u16(sum[1]), vget_high_u16(sum[1]));
+ const uint16x4_t a2 = vpadd_u16(vget_low_u16(sum[2]), vget_high_u16(sum[2]));
+ const uint16x4_t a3 = vpadd_u16(vget_low_u16(sum[3]), vget_high_u16(sum[3]));
+ const uint32x4_t b0 = vpaddlq_u16(vcombine_u16(a0, a1));
+ const uint32x4_t b1 = vpaddlq_u16(vcombine_u16(a2, a3));
+ const uint32x2_t c0 = vpadd_u32(vget_low_u32(b0), vget_high_u32(b0));
+ const uint32x2_t c1 = vpadd_u32(vget_low_u32(b1), vget_high_u32(b1));
+ vst1q_u32(res, vcombine_u32(c0, c1));
+}
+
+// Can handle 2048 pixels' sad sum (such as 32x64 or 64x32)
+static INLINE void sad_2048_pel_final_neon(const uint16x8_t *sum /*[4]*/,
+ uint32_t *const res) {
+ const uint32x4_t a0 = vpaddlq_u16(sum[0]);
+ const uint32x4_t a1 = vpaddlq_u16(sum[1]);
+ const uint32x4_t a2 = vpaddlq_u16(sum[2]);
+ const uint32x4_t a3 = vpaddlq_u16(sum[3]);
+ const uint32x2_t b0 = vadd_u32(vget_low_u32(a0), vget_high_u32(a0));
+ const uint32x2_t b1 = vadd_u32(vget_low_u32(a1), vget_high_u32(a1));
+ const uint32x2_t b2 = vadd_u32(vget_low_u32(a2), vget_high_u32(a2));
+ const uint32x2_t b3 = vadd_u32(vget_low_u32(a3), vget_high_u32(a3));
+ const uint32x2_t c0 = vpadd_u32(b0, b1);
+ const uint32x2_t c1 = vpadd_u32(b2, b3);
+ vst1q_u32(res, vcombine_u32(c0, c1));
+}
+
+// Can handle 4096 pixels' sad sum (such as 64x64)
+static INLINE void sad_4096_pel_final_neon(const uint16x8_t *sum /*[8]*/,
+ uint32_t *const res) {
+ const uint32x4_t a0 = vpaddlq_u16(sum[0]);
+ const uint32x4_t a1 = vpaddlq_u16(sum[1]);
+ const uint32x4_t a2 = vpaddlq_u16(sum[2]);
+ const uint32x4_t a3 = vpaddlq_u16(sum[3]);
+ const uint32x4_t a4 = vpaddlq_u16(sum[4]);
+ const uint32x4_t a5 = vpaddlq_u16(sum[5]);
+ const uint32x4_t a6 = vpaddlq_u16(sum[6]);
+ const uint32x4_t a7 = vpaddlq_u16(sum[7]);
+ const uint32x4_t b0 = vaddq_u32(a0, a1);
+ const uint32x4_t b1 = vaddq_u32(a2, a3);
+ const uint32x4_t b2 = vaddq_u32(a4, a5);
+ const uint32x4_t b3 = vaddq_u32(a6, a7);
+ const uint32x2_t c0 = vadd_u32(vget_low_u32(b0), vget_high_u32(b0));
+ const uint32x2_t c1 = vadd_u32(vget_low_u32(b1), vget_high_u32(b1));
+ const uint32x2_t c2 = vadd_u32(vget_low_u32(b2), vget_high_u32(b2));
+ const uint32x2_t c3 = vadd_u32(vget_low_u32(b3), vget_high_u32(b3));
+ const uint32x2_t d0 = vpadd_u32(c0, c1);
+ const uint32x2_t d1 = vpadd_u32(c2, c3);
+ vst1q_u32(res, vcombine_u32(d0, d1));
+}
+
+static INLINE void sad8x_4d(const uint8_t *src, int src_stride,
+ const uint8_t *const ref[4], int ref_stride,
+ uint32_t *res, const int height) {
int i, j;
+ const uint8_t *ref_loop[4] = { ref[0], ref[1], ref[2], ref[3] };
uint16x8_t sum[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0),
vdupq_n_u16(0) };
- const uint8_t *b_loop[4] = { b[0], b[1], b[2], b[3] };
for (i = 0; i < height; ++i) {
- const uint8x8_t a_u8 = vld1_u8(a);
- a += a_stride;
+ const uint8x8_t s = vld1_u8(src);
+ src += src_stride;
for (j = 0; j < 4; ++j) {
- const uint8x8_t b_u8 = vld1_u8(b_loop[j]);
- b_loop[j] += b_stride;
- sum[j] = vabal_u8(sum[j], a_u8, b_u8);
+ const uint8x8_t b_u8 = vld1_u8(ref_loop[j]);
+ ref_loop[j] += ref_stride;
+ sum[j] = vabal_u8(sum[j], s, b_u8);
}
}
- for (j = 0; j < 4; ++j) {
- result[j] = vget_lane_u32(horizontal_add_uint16x8(sum[j]), 0);
- }
+ sad_512_pel_final_neon(sum, res);
}
void vpx_sad8x4x4d_neon(const uint8_t *src, int src_stride,
@@ -88,28 +176,33 @@ void vpx_sad8x16x4d_neon(const uint8_t *src, int src_stride,
sad8x_4d(src, src_stride, ref, ref_stride, res, 16);
}
-static INLINE void sad16x_4d(const uint8_t *a, int a_stride,
- const uint8_t *const b[4], int b_stride,
- uint32_t *result, const int height) {
+////////////////////////////////////////////////////////////////////////////////
+
+static INLINE void sad16_neon(const uint8_t *ref, const uint8x16_t src,
+ uint16x8_t *const sum) {
+ const uint8x16_t r = vld1q_u8(ref);
+ *sum = vabal_u8(*sum, vget_low_u8(src), vget_low_u8(r));
+ *sum = vabal_u8(*sum, vget_high_u8(src), vget_high_u8(r));
+}
+
+static INLINE void sad16x_4d(const uint8_t *src, int src_stride,
+ const uint8_t *const ref[4], int ref_stride,
+ uint32_t *res, const int height) {
int i, j;
+ const uint8_t *ref_loop[4] = { ref[0], ref[1], ref[2], ref[3] };
uint16x8_t sum[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0),
vdupq_n_u16(0) };
- const uint8_t *b_loop[4] = { b[0], b[1], b[2], b[3] };
for (i = 0; i < height; ++i) {
- const uint8x16_t a_u8 = vld1q_u8(a);
- a += a_stride;
+ const uint8x16_t s = vld1q_u8(src);
+ src += src_stride;
for (j = 0; j < 4; ++j) {
- const uint8x16_t b_u8 = vld1q_u8(b_loop[j]);
- b_loop[j] += b_stride;
- sum[j] = vabal_u8(sum[j], vget_low_u8(a_u8), vget_low_u8(b_u8));
- sum[j] = vabal_u8(sum[j], vget_high_u8(a_u8), vget_high_u8(b_u8));
+ sad16_neon(ref_loop[j], s, &sum[j]);
+ ref_loop[j] += ref_stride;
}
}
- for (j = 0; j < 4; ++j) {
- result[j] = vget_lane_u32(horizontal_add_uint16x8(sum[j]), 0);
- }
+ sad_512_pel_final_neon(sum, res);
}
void vpx_sad16x8x4d_neon(const uint8_t *src, int src_stride,
@@ -130,113 +223,152 @@ void vpx_sad16x32x4d_neon(const uint8_t *src, int src_stride,
sad16x_4d(src, src_stride, ref, ref_stride, res, 32);
}
-static INLINE void sad32x_4d(const uint8_t *a, int a_stride,
- const uint8_t *const b[4], int b_stride,
- uint32_t *result, const int height) {
- int i, j;
- uint16x8_t sum[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0),
- vdupq_n_u16(0) };
- const uint8_t *b_loop[4] = { b[0], b[1], b[2], b[3] };
+////////////////////////////////////////////////////////////////////////////////
+
+static INLINE void sad32x_4d(const uint8_t *src, int src_stride,
+ const uint8_t *const ref[4], int ref_stride,
+ const int height, uint16x8_t *const sum) {
+ int i;
+ const uint8_t *ref_loop[4] = { ref[0], ref[1], ref[2], ref[3] };
+
+ sum[0] = sum[1] = sum[2] = sum[3] = vdupq_n_u16(0);
for (i = 0; i < height; ++i) {
- const uint8x16_t a_0 = vld1q_u8(a);
- const uint8x16_t a_1 = vld1q_u8(a + 16);
- a += a_stride;
- for (j = 0; j < 4; ++j) {
- const uint8x16_t b_0 = vld1q_u8(b_loop[j]);
- const uint8x16_t b_1 = vld1q_u8(b_loop[j] + 16);
- b_loop[j] += b_stride;
- sum[j] = vabal_u8(sum[j], vget_low_u8(a_0), vget_low_u8(b_0));
- sum[j] = vabal_u8(sum[j], vget_high_u8(a_0), vget_high_u8(b_0));
- sum[j] = vabal_u8(sum[j], vget_low_u8(a_1), vget_low_u8(b_1));
- sum[j] = vabal_u8(sum[j], vget_high_u8(a_1), vget_high_u8(b_1));
- }
- }
+ uint8x16_t s;
- for (j = 0; j < 4; ++j) {
- result[j] = vget_lane_u32(horizontal_add_uint16x8(sum[j]), 0);
+ s = vld1q_u8(src + 0 * 16);
+ sad16_neon(ref_loop[0] + 0 * 16, s, &sum[0]);
+ sad16_neon(ref_loop[1] + 0 * 16, s, &sum[1]);
+ sad16_neon(ref_loop[2] + 0 * 16, s, &sum[2]);
+ sad16_neon(ref_loop[3] + 0 * 16, s, &sum[3]);
+
+ s = vld1q_u8(src + 1 * 16);
+ sad16_neon(ref_loop[0] + 1 * 16, s, &sum[0]);
+ sad16_neon(ref_loop[1] + 1 * 16, s, &sum[1]);
+ sad16_neon(ref_loop[2] + 1 * 16, s, &sum[2]);
+ sad16_neon(ref_loop[3] + 1 * 16, s, &sum[3]);
+
+ src += src_stride;
+ ref_loop[0] += ref_stride;
+ ref_loop[1] += ref_stride;
+ ref_loop[2] += ref_stride;
+ ref_loop[3] += ref_stride;
}
}
void vpx_sad32x16x4d_neon(const uint8_t *src, int src_stride,
const uint8_t *const ref[4], int ref_stride,
uint32_t *res) {
- sad32x_4d(src, src_stride, ref, ref_stride, res, 16);
+ uint16x8_t sum[4];
+ sad32x_4d(src, src_stride, ref, ref_stride, 16, sum);
+ sad_512_pel_final_neon(sum, res);
}
void vpx_sad32x32x4d_neon(const uint8_t *src, int src_stride,
const uint8_t *const ref[4], int ref_stride,
uint32_t *res) {
- sad32x_4d(src, src_stride, ref, ref_stride, res, 32);
+ uint16x8_t sum[4];
+ sad32x_4d(src, src_stride, ref, ref_stride, 32, sum);
+ sad_1024_pel_final_neon(sum, res);
}
void vpx_sad32x64x4d_neon(const uint8_t *src, int src_stride,
const uint8_t *const ref[4], int ref_stride,
uint32_t *res) {
- sad32x_4d(src, src_stride, ref, ref_stride, res, 64);
+ uint16x8_t sum[4];
+ sad32x_4d(src, src_stride, ref, ref_stride, 64, sum);
+ sad_2048_pel_final_neon(sum, res);
}
-static INLINE void sum64x(const uint8x16_t a_0, const uint8x16_t a_1,
- const uint8x16_t b_0, const uint8x16_t b_1,
- uint16x8_t *sum) {
- *sum = vabal_u8(*sum, vget_low_u8(a_0), vget_low_u8(b_0));
- *sum = vabal_u8(*sum, vget_high_u8(a_0), vget_high_u8(b_0));
- *sum = vabal_u8(*sum, vget_low_u8(a_1), vget_low_u8(b_1));
- *sum = vabal_u8(*sum, vget_high_u8(a_1), vget_high_u8(b_1));
-}
+////////////////////////////////////////////////////////////////////////////////
-static INLINE void sad64x_4d(const uint8_t *a, int a_stride,
- const uint8_t *const b[4], int b_stride,
- uint32_t *result, const int height) {
+void vpx_sad64x32x4d_neon(const uint8_t *src, int src_stride,
+ const uint8_t *const ref[4], int ref_stride,
+ uint32_t *res) {
int i;
- uint16x8_t sum_0 = vdupq_n_u16(0);
- uint16x8_t sum_1 = vdupq_n_u16(0);
- uint16x8_t sum_2 = vdupq_n_u16(0);
- uint16x8_t sum_3 = vdupq_n_u16(0);
- uint16x8_t sum_4 = vdupq_n_u16(0);
- uint16x8_t sum_5 = vdupq_n_u16(0);
- uint16x8_t sum_6 = vdupq_n_u16(0);
- uint16x8_t sum_7 = vdupq_n_u16(0);
- const uint8_t *b_loop[4] = { b[0], b[1], b[2], b[3] };
+ const uint8_t *ref_loop[4] = { ref[0], ref[1], ref[2], ref[3] };
+ uint16x8_t sum[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0),
+ vdupq_n_u16(0) };
- for (i = 0; i < height; ++i) {
- const uint8x16_t a_0 = vld1q_u8(a);
- const uint8x16_t a_1 = vld1q_u8(a + 16);
- const uint8x16_t a_2 = vld1q_u8(a + 32);
- const uint8x16_t a_3 = vld1q_u8(a + 48);
- a += a_stride;
- sum64x(a_0, a_1, vld1q_u8(b_loop[0]), vld1q_u8(b_loop[0] + 16), &sum_0);
- sum64x(a_2, a_3, vld1q_u8(b_loop[0] + 32), vld1q_u8(b_loop[0] + 48),
- &sum_1);
- b_loop[0] += b_stride;
- sum64x(a_0, a_1, vld1q_u8(b_loop[1]), vld1q_u8(b_loop[1] + 16), &sum_2);
- sum64x(a_2, a_3, vld1q_u8(b_loop[1] + 32), vld1q_u8(b_loop[1] + 48),
- &sum_3);
- b_loop[1] += b_stride;
- sum64x(a_0, a_1, vld1q_u8(b_loop[2]), vld1q_u8(b_loop[2] + 16), &sum_4);
- sum64x(a_2, a_3, vld1q_u8(b_loop[2] + 32), vld1q_u8(b_loop[2] + 48),
- &sum_5);
- b_loop[2] += b_stride;
- sum64x(a_0, a_1, vld1q_u8(b_loop[3]), vld1q_u8(b_loop[3] + 16), &sum_6);
- sum64x(a_2, a_3, vld1q_u8(b_loop[3] + 32), vld1q_u8(b_loop[3] + 48),
- &sum_7);
- b_loop[3] += b_stride;
- }
+ for (i = 0; i < 32; ++i) {
+ uint8x16_t s;
- result[0] = vget_lane_u32(horizontal_add_long_uint16x8(sum_0, sum_1), 0);
- result[1] = vget_lane_u32(horizontal_add_long_uint16x8(sum_2, sum_3), 0);
- result[2] = vget_lane_u32(horizontal_add_long_uint16x8(sum_4, sum_5), 0);
- result[3] = vget_lane_u32(horizontal_add_long_uint16x8(sum_6, sum_7), 0);
-}
+ s = vld1q_u8(src + 0 * 16);
+ sad16_neon(ref_loop[0] + 0 * 16, s, &sum[0]);
+ sad16_neon(ref_loop[1] + 0 * 16, s, &sum[1]);
+ sad16_neon(ref_loop[2] + 0 * 16, s, &sum[2]);
+ sad16_neon(ref_loop[3] + 0 * 16, s, &sum[3]);
-void vpx_sad64x32x4d_neon(const uint8_t *src, int src_stride,
- const uint8_t *const ref[4], int ref_stride,
- uint32_t *res) {
- sad64x_4d(src, src_stride, ref, ref_stride, res, 32);
+ s = vld1q_u8(src + 1 * 16);
+ sad16_neon(ref_loop[0] + 1 * 16, s, &sum[0]);
+ sad16_neon(ref_loop[1] + 1 * 16, s, &sum[1]);
+ sad16_neon(ref_loop[2] + 1 * 16, s, &sum[2]);
+ sad16_neon(ref_loop[3] + 1 * 16, s, &sum[3]);
+
+ s = vld1q_u8(src + 2 * 16);
+ sad16_neon(ref_loop[0] + 2 * 16, s, &sum[0]);
+ sad16_neon(ref_loop[1] + 2 * 16, s, &sum[1]);
+ sad16_neon(ref_loop[2] + 2 * 16, s, &sum[2]);
+ sad16_neon(ref_loop[3] + 2 * 16, s, &sum[3]);
+
+ s = vld1q_u8(src + 3 * 16);
+ sad16_neon(ref_loop[0] + 3 * 16, s, &sum[0]);
+ sad16_neon(ref_loop[1] + 3 * 16, s, &sum[1]);
+ sad16_neon(ref_loop[2] + 3 * 16, s, &sum[2]);
+ sad16_neon(ref_loop[3] + 3 * 16, s, &sum[3]);
+
+ src += src_stride;
+ ref_loop[0] += ref_stride;
+ ref_loop[1] += ref_stride;
+ ref_loop[2] += ref_stride;
+ ref_loop[3] += ref_stride;
+ }
+
+ sad_2048_pel_final_neon(sum, res);
}
void vpx_sad64x64x4d_neon(const uint8_t *src, int src_stride,
const uint8_t *const ref[4], int ref_stride,
uint32_t *res) {
- sad64x_4d(src, src_stride, ref, ref_stride, res, 64);
+ int i;
+ const uint8_t *ref_loop[4] = { ref[0], ref[1], ref[2], ref[3] };
+ uint16x8_t sum[8] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0),
+ vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0),
+ vdupq_n_u16(0), vdupq_n_u16(0) };
+
+ for (i = 0; i < 64; ++i) {
+ uint8x16_t s;
+
+ s = vld1q_u8(src + 0 * 16);
+ sad16_neon(ref_loop[0] + 0 * 16, s, &sum[0]);
+ sad16_neon(ref_loop[1] + 0 * 16, s, &sum[2]);
+ sad16_neon(ref_loop[2] + 0 * 16, s, &sum[4]);
+ sad16_neon(ref_loop[3] + 0 * 16, s, &sum[6]);
+
+ s = vld1q_u8(src + 1 * 16);
+ sad16_neon(ref_loop[0] + 1 * 16, s, &sum[0]);
+ sad16_neon(ref_loop[1] + 1 * 16, s, &sum[2]);
+ sad16_neon(ref_loop[2] + 1 * 16, s, &sum[4]);
+ sad16_neon(ref_loop[3] + 1 * 16, s, &sum[6]);
+
+ s = vld1q_u8(src + 2 * 16);
+ sad16_neon(ref_loop[0] + 2 * 16, s, &sum[1]);
+ sad16_neon(ref_loop[1] + 2 * 16, s, &sum[3]);
+ sad16_neon(ref_loop[2] + 2 * 16, s, &sum[5]);
+ sad16_neon(ref_loop[3] + 2 * 16, s, &sum[7]);
+
+ s = vld1q_u8(src + 3 * 16);
+ sad16_neon(ref_loop[0] + 3 * 16, s, &sum[1]);
+ sad16_neon(ref_loop[1] + 3 * 16, s, &sum[3]);
+ sad16_neon(ref_loop[2] + 3 * 16, s, &sum[5]);
+ sad16_neon(ref_loop[3] + 3 * 16, s, &sum[7]);
+
+ src += src_stride;
+ ref_loop[0] += ref_stride;
+ ref_loop[1] += ref_stride;
+ ref_loop[2] += ref_stride;
+ ref_loop[3] += ref_stride;
+ }
+
+ sad_4096_pel_final_neon(sum, res);
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/subtract_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/subtract_neon.c
index ce81fb630f2..eef123368d0 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/subtract_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/subtract_neon.c
@@ -9,71 +9,72 @@
*/
#include <arm_neon.h>
+#include <assert.h>
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
+#include "vpx_dsp/arm/mem_neon.h"
void vpx_subtract_block_neon(int rows, int cols, int16_t *diff,
ptrdiff_t diff_stride, const uint8_t *src,
ptrdiff_t src_stride, const uint8_t *pred,
ptrdiff_t pred_stride) {
- int r, c;
+ int r = rows, c;
if (cols > 16) {
- for (r = 0; r < rows; ++r) {
+ do {
for (c = 0; c < cols; c += 32) {
- const uint8x16_t v_src_00 = vld1q_u8(&src[c + 0]);
- const uint8x16_t v_src_16 = vld1q_u8(&src[c + 16]);
- const uint8x16_t v_pred_00 = vld1q_u8(&pred[c + 0]);
- const uint8x16_t v_pred_16 = vld1q_u8(&pred[c + 16]);
- const uint16x8_t v_diff_lo_00 =
- vsubl_u8(vget_low_u8(v_src_00), vget_low_u8(v_pred_00));
- const uint16x8_t v_diff_hi_00 =
- vsubl_u8(vget_high_u8(v_src_00), vget_high_u8(v_pred_00));
- const uint16x8_t v_diff_lo_16 =
- vsubl_u8(vget_low_u8(v_src_16), vget_low_u8(v_pred_16));
- const uint16x8_t v_diff_hi_16 =
- vsubl_u8(vget_high_u8(v_src_16), vget_high_u8(v_pred_16));
- vst1q_s16(&diff[c + 0], vreinterpretq_s16_u16(v_diff_lo_00));
- vst1q_s16(&diff[c + 8], vreinterpretq_s16_u16(v_diff_hi_00));
- vst1q_s16(&diff[c + 16], vreinterpretq_s16_u16(v_diff_lo_16));
- vst1q_s16(&diff[c + 24], vreinterpretq_s16_u16(v_diff_hi_16));
+ const uint8x16_t s0 = vld1q_u8(&src[c + 0]);
+ const uint8x16_t s1 = vld1q_u8(&src[c + 16]);
+ const uint8x16_t p0 = vld1q_u8(&pred[c + 0]);
+ const uint8x16_t p1 = vld1q_u8(&pred[c + 16]);
+ const uint16x8_t d0 = vsubl_u8(vget_low_u8(s0), vget_low_u8(p0));
+ const uint16x8_t d1 = vsubl_u8(vget_high_u8(s0), vget_high_u8(p0));
+ const uint16x8_t d2 = vsubl_u8(vget_low_u8(s1), vget_low_u8(p1));
+ const uint16x8_t d3 = vsubl_u8(vget_high_u8(s1), vget_high_u8(p1));
+ vst1q_s16(&diff[c + 0], vreinterpretq_s16_u16(d0));
+ vst1q_s16(&diff[c + 8], vreinterpretq_s16_u16(d1));
+ vst1q_s16(&diff[c + 16], vreinterpretq_s16_u16(d2));
+ vst1q_s16(&diff[c + 24], vreinterpretq_s16_u16(d3));
}
diff += diff_stride;
pred += pred_stride;
src += src_stride;
- }
+ } while (--r);
} else if (cols > 8) {
- for (r = 0; r < rows; ++r) {
- const uint8x16_t v_src = vld1q_u8(&src[0]);
- const uint8x16_t v_pred = vld1q_u8(&pred[0]);
- const uint16x8_t v_diff_lo =
- vsubl_u8(vget_low_u8(v_src), vget_low_u8(v_pred));
- const uint16x8_t v_diff_hi =
- vsubl_u8(vget_high_u8(v_src), vget_high_u8(v_pred));
- vst1q_s16(&diff[0], vreinterpretq_s16_u16(v_diff_lo));
- vst1q_s16(&diff[8], vreinterpretq_s16_u16(v_diff_hi));
+ do {
+ const uint8x16_t s = vld1q_u8(&src[0]);
+ const uint8x16_t p = vld1q_u8(&pred[0]);
+ const uint16x8_t d0 = vsubl_u8(vget_low_u8(s), vget_low_u8(p));
+ const uint16x8_t d1 = vsubl_u8(vget_high_u8(s), vget_high_u8(p));
+ vst1q_s16(&diff[0], vreinterpretq_s16_u16(d0));
+ vst1q_s16(&diff[8], vreinterpretq_s16_u16(d1));
diff += diff_stride;
pred += pred_stride;
src += src_stride;
- }
+ } while (--r);
} else if (cols > 4) {
- for (r = 0; r < rows; ++r) {
- const uint8x8_t v_src = vld1_u8(&src[0]);
- const uint8x8_t v_pred = vld1_u8(&pred[0]);
- const uint16x8_t v_diff = vsubl_u8(v_src, v_pred);
+ do {
+ const uint8x8_t s = vld1_u8(&src[0]);
+ const uint8x8_t p = vld1_u8(&pred[0]);
+ const uint16x8_t v_diff = vsubl_u8(s, p);
vst1q_s16(&diff[0], vreinterpretq_s16_u16(v_diff));
diff += diff_stride;
pred += pred_stride;
src += src_stride;
- }
+ } while (--r);
} else {
- for (r = 0; r < rows; ++r) {
- for (c = 0; c < cols; ++c) diff[c] = src[c] - pred[c];
-
- diff += diff_stride;
- pred += pred_stride;
- src += src_stride;
- }
+ assert(cols == 4);
+ do {
+ const uint8x8_t s = load_unaligned_u8(src, (int)src_stride);
+ const uint8x8_t p = load_unaligned_u8(pred, (int)pred_stride);
+ const uint16x8_t d = vsubl_u8(s, p);
+ vst1_s16(diff + 0 * diff_stride, vreinterpret_s16_u16(vget_low_u16(d)));
+ vst1_s16(diff + 1 * diff_stride, vreinterpret_s16_u16(vget_high_u16(d)));
+ diff += 2 * diff_stride;
+ pred += 2 * pred_stride;
+ src += 2 * src_stride;
+ r -= 2;
+ } while (r);
}
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/sum_neon.h b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/sum_neon.h
index d74fe0cde42..c09841223c8 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/sum_neon.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/sum_neon.h
@@ -30,15 +30,6 @@ static INLINE uint32x2_t horizontal_add_uint16x8(const uint16x8_t a) {
vreinterpret_u32_u64(vget_high_u64(c)));
}
-static INLINE uint32x2_t horizontal_add_long_uint16x8(const uint16x8_t a,
- const uint16x8_t b) {
- const uint32x4_t c = vpaddlq_u16(a);
- const uint32x4_t d = vpadalq_u16(c, b);
- const uint64x2_t e = vpaddlq_u32(d);
- return vadd_u32(vreinterpret_u32_u64(vget_low_u64(e)),
- vreinterpret_u32_u64(vget_high_u64(e)));
-}
-
static INLINE uint32x2_t horizontal_add_uint32x4(const uint32x4_t a) {
const uint64x2_t b = vpaddlq_u32(a);
return vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/sum_squares_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/sum_squares_neon.c
new file mode 100644
index 00000000000..8942ba83bc2
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/sum_squares_neon.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+#include <assert.h>
+#include "./vpx_dsp_rtcd.h"
+
+uint64_t vpx_sum_squares_2d_i16_neon(const int16_t *src, int stride, int size) {
+ int64x1_t s2;
+
+ if (size == 4) {
+ int16x4_t s[4];
+ int32x4_t s0;
+ uint32x2_t s1;
+
+ s[0] = vld1_s16(src + 0 * stride);
+ s[1] = vld1_s16(src + 1 * stride);
+ s[2] = vld1_s16(src + 2 * stride);
+ s[3] = vld1_s16(src + 3 * stride);
+ s0 = vmull_s16(s[0], s[0]);
+ s0 = vmlal_s16(s0, s[1], s[1]);
+ s0 = vmlal_s16(s0, s[2], s[2]);
+ s0 = vmlal_s16(s0, s[3], s[3]);
+ s1 = vpadd_u32(vget_low_u32(vreinterpretq_u32_s32(s0)),
+ vget_high_u32(vreinterpretq_u32_s32(s0)));
+ s2 = vpaddl_u32(s1);
+ } else {
+ int r = size;
+ uint64x2_t s1 = vdupq_n_u64(0);
+
+ do {
+ int c = size;
+ int32x4_t s0 = vdupq_n_s32(0);
+ const int16_t *src_t = src;
+
+ do {
+ int16x8_t s[8];
+
+ s[0] = vld1q_s16(src_t + 0 * stride);
+ s[1] = vld1q_s16(src_t + 1 * stride);
+ s[2] = vld1q_s16(src_t + 2 * stride);
+ s[3] = vld1q_s16(src_t + 3 * stride);
+ s[4] = vld1q_s16(src_t + 4 * stride);
+ s[5] = vld1q_s16(src_t + 5 * stride);
+ s[6] = vld1q_s16(src_t + 6 * stride);
+ s[7] = vld1q_s16(src_t + 7 * stride);
+ s0 = vmlal_s16(s0, vget_low_s16(s[0]), vget_low_s16(s[0]));
+ s0 = vmlal_s16(s0, vget_low_s16(s[1]), vget_low_s16(s[1]));
+ s0 = vmlal_s16(s0, vget_low_s16(s[2]), vget_low_s16(s[2]));
+ s0 = vmlal_s16(s0, vget_low_s16(s[3]), vget_low_s16(s[3]));
+ s0 = vmlal_s16(s0, vget_low_s16(s[4]), vget_low_s16(s[4]));
+ s0 = vmlal_s16(s0, vget_low_s16(s[5]), vget_low_s16(s[5]));
+ s0 = vmlal_s16(s0, vget_low_s16(s[6]), vget_low_s16(s[6]));
+ s0 = vmlal_s16(s0, vget_low_s16(s[7]), vget_low_s16(s[7]));
+ s0 = vmlal_s16(s0, vget_high_s16(s[0]), vget_high_s16(s[0]));
+ s0 = vmlal_s16(s0, vget_high_s16(s[1]), vget_high_s16(s[1]));
+ s0 = vmlal_s16(s0, vget_high_s16(s[2]), vget_high_s16(s[2]));
+ s0 = vmlal_s16(s0, vget_high_s16(s[3]), vget_high_s16(s[3]));
+ s0 = vmlal_s16(s0, vget_high_s16(s[4]), vget_high_s16(s[4]));
+ s0 = vmlal_s16(s0, vget_high_s16(s[5]), vget_high_s16(s[5]));
+ s0 = vmlal_s16(s0, vget_high_s16(s[6]), vget_high_s16(s[6]));
+ s0 = vmlal_s16(s0, vget_high_s16(s[7]), vget_high_s16(s[7]));
+ src_t += 8;
+ c -= 8;
+ } while (c);
+
+ s1 = vaddw_u32(s1, vget_low_u32(vreinterpretq_u32_s32(s0)));
+ s1 = vaddw_u32(s1, vget_high_u32(vreinterpretq_u32_s32(s0)));
+ src += 8 * stride;
+ r -= 8;
+ } while (r);
+
+ s2 = vadd_u64(vget_low_u64(s1), vget_high_u64(s1));
+ }
+
+ return vget_lane_u64(s2, 0);
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/vpx_convolve8_mmi.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/vpx_convolve8_mmi.c
index 0cfb81e4df1..ba9ceb86658 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/vpx_convolve8_mmi.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/vpx_convolve8_mmi.c
@@ -254,6 +254,89 @@ static void convolve_vert_mmi(const uint8_t *src, ptrdiff_t src_stride,
);
}
+static void convolve_avg_horiz_mmi(const uint8_t *src, ptrdiff_t src_stride,
+ uint8_t *dst, ptrdiff_t dst_stride,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int32_t w, int32_t h) {
+ const int16_t *filter_x = filter[x0_q4];
+ double ftmp[14];
+ uint32_t tmp[2];
+ uint32_t para[2];
+ para[0] = (1 << ((FILTER_BITS)-1));
+ para[1] = FILTER_BITS;
+ src -= SUBPEL_TAPS / 2 - 1;
+ src_stride -= w;
+ dst_stride -= w;
+ (void)x_step_q4;
+
+ __asm__ volatile(
+ "move %[tmp1], %[width] \n\t"
+ "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
+ "gsldlc1 %[filter1], 0x03(%[filter]) \n\t"
+ "gsldrc1 %[filter1], 0x00(%[filter]) \n\t"
+ "gsldlc1 %[filter2], 0x0b(%[filter]) \n\t"
+ "gsldrc1 %[filter2], 0x08(%[filter]) \n\t"
+ "1: \n\t"
+ /* Get 8 data per row */
+ "gsldlc1 %[ftmp5], 0x07(%[src]) \n\t"
+ "gsldrc1 %[ftmp5], 0x00(%[src]) \n\t"
+ "gsldlc1 %[ftmp7], 0x08(%[src]) \n\t"
+ "gsldrc1 %[ftmp7], 0x01(%[src]) \n\t"
+ "gsldlc1 %[ftmp9], 0x09(%[src]) \n\t"
+ "gsldrc1 %[ftmp9], 0x02(%[src]) \n\t"
+ "gsldlc1 %[ftmp11], 0x0A(%[src]) \n\t"
+ "gsldrc1 %[ftmp11], 0x03(%[src]) \n\t"
+ "punpcklbh %[ftmp4], %[ftmp5], %[ftmp0] \n\t"
+ "punpckhbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
+ "punpcklbh %[ftmp6], %[ftmp7], %[ftmp0] \n\t"
+ "punpckhbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
+ "punpcklbh %[ftmp8], %[ftmp9], %[ftmp0] \n\t"
+ "punpckhbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
+ "punpcklbh %[ftmp10], %[ftmp11], %[ftmp0] \n\t"
+ "punpckhbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t"
+ MMI_ADDIU(%[width], %[width], -0x04)
+ /* Get raw data */
+ GET_DATA_H_MMI
+ ROUND_POWER_OF_TWO_MMI
+ CLIP_PIXEL_MMI
+ "punpcklbh %[ftmp12], %[ftmp12], %[ftmp0] \n\t"
+ "gsldlc1 %[ftmp4], 0x07(%[dst]) \n\t"
+ "gsldrc1 %[ftmp4], 0x00(%[dst]) \n\t"
+ "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
+ "paddh %[ftmp12], %[ftmp12], %[ftmp4] \n\t"
+ "li %[tmp0], 0x10001 \n\t"
+ MMI_MTC1(%[tmp0], %[ftmp5])
+ "punpcklhw %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
+ "paddh %[ftmp12], %[ftmp12], %[ftmp5] \n\t"
+ "psrah %[ftmp12], %[ftmp12], %[ftmp5] \n\t"
+ "packushb %[ftmp12], %[ftmp12], %[ftmp0] \n\t"
+ "swc1 %[ftmp12], 0x00(%[dst]) \n\t"
+ MMI_ADDIU(%[dst], %[dst], 0x04)
+ MMI_ADDIU(%[src], %[src], 0x04)
+ /* Loop count */
+ "bnez %[width], 1b \n\t"
+ "move %[width], %[tmp1] \n\t"
+ MMI_ADDU(%[src], %[src], %[src_stride])
+ MMI_ADDU(%[dst], %[dst], %[dst_stride])
+ MMI_ADDIU(%[height], %[height], -0x01)
+ "bnez %[height], 1b \n\t"
+ : [srcl]"=&f"(ftmp[0]), [srch]"=&f"(ftmp[1]),
+ [filter1]"=&f"(ftmp[2]), [filter2]"=&f"(ftmp[3]),
+ [ftmp0]"=&f"(ftmp[4]), [ftmp4]"=&f"(ftmp[5]),
+ [ftmp5]"=&f"(ftmp[6]), [ftmp6]"=&f"(ftmp[7]),
+ [ftmp7]"=&f"(ftmp[8]), [ftmp8]"=&f"(ftmp[9]),
+ [ftmp9]"=&f"(ftmp[10]), [ftmp10]"=&f"(ftmp[11]),
+ [ftmp11]"=&f"(ftmp[12]), [ftmp12]"=&f"(ftmp[13]),
+ [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
+ [src]"+&r"(src), [width]"+&r"(w),
+ [dst]"+&r"(dst), [height]"+&r"(h)
+ : [filter]"r"(filter_x), [para]"r"(para),
+ [src_stride]"r"((mips_reg)src_stride),
+ [dst_stride]"r"((mips_reg)dst_stride)
+ : "memory"
+ );
+}
+
static void convolve_avg_vert_mmi(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const InterpKernel *filter, int y0_q4,
@@ -362,52 +445,63 @@ void vpx_convolve_avg_mmi(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const InterpKernel *filter, int x0_q4, int x_step_q4,
int y0_q4, int y_step_q4, int w, int h) {
- double ftmp[4];
- uint32_t tmp[2];
- src_stride -= w;
- dst_stride -= w;
+ int x, y;
+
(void)filter;
(void)x0_q4;
(void)x_step_q4;
(void)y0_q4;
(void)y_step_q4;
- __asm__ volatile(
- "move %[tmp1], %[width] \n\t"
- "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
- "li %[tmp0], 0x10001 \n\t"
- MMI_MTC1(%[tmp0], %[ftmp3])
- "punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
- "1: \n\t"
- "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t"
- "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t"
- "gsldlc1 %[ftmp2], 0x07(%[dst]) \n\t"
- "gsldrc1 %[ftmp2], 0x00(%[dst]) \n\t"
- "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
- "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
- "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
- "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
- "psrah %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
- "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
- "swc1 %[ftmp1], 0x00(%[dst]) \n\t"
- MMI_ADDIU(%[width], %[width], -0x04)
- MMI_ADDIU(%[dst], %[dst], 0x04)
- MMI_ADDIU(%[src], %[src], 0x04)
- "bnez %[width], 1b \n\t"
- "move %[width], %[tmp1] \n\t"
- MMI_ADDU(%[dst], %[dst], %[dst_stride])
- MMI_ADDU(%[src], %[src], %[src_stride])
- MMI_ADDIU(%[height], %[height], -0x01)
- "bnez %[height], 1b \n\t"
- : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
- [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
- [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
- [src]"+&r"(src), [dst]"+&r"(dst),
- [width]"+&r"(w), [height]"+&r"(h)
- : [src_stride]"r"((mips_reg)src_stride),
- [dst_stride]"r"((mips_reg)dst_stride)
- : "memory"
- );
+ if (w & 0x03) {
+ for (y = 0; y < h; ++y) {
+ for (x = 0; x < w; ++x) dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
+ src += src_stride;
+ dst += dst_stride;
+ }
+ } else {
+ double ftmp[4];
+ uint32_t tmp[2];
+ src_stride -= w;
+ dst_stride -= w;
+
+ __asm__ volatile(
+ "move %[tmp1], %[width] \n\t"
+ "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
+ "li %[tmp0], 0x10001 \n\t"
+ MMI_MTC1(%[tmp0], %[ftmp3])
+ "punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
+ "1: \n\t"
+ "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t"
+ "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t"
+ "gsldlc1 %[ftmp2], 0x07(%[dst]) \n\t"
+ "gsldrc1 %[ftmp2], 0x00(%[dst]) \n\t"
+ "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
+ "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
+ "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
+ "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
+ "psrah %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
+ "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
+ "swc1 %[ftmp1], 0x00(%[dst]) \n\t"
+ MMI_ADDIU(%[width], %[width], -0x04)
+ MMI_ADDIU(%[dst], %[dst], 0x04)
+ MMI_ADDIU(%[src], %[src], 0x04)
+ "bnez %[width], 1b \n\t"
+ "move %[width], %[tmp1] \n\t"
+ MMI_ADDU(%[dst], %[dst], %[dst_stride])
+ MMI_ADDU(%[src], %[src], %[src_stride])
+ MMI_ADDIU(%[height], %[height], -0x01)
+ "bnez %[height], 1b \n\t"
+ : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
+ [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
+ [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
+ [src]"+&r"(src), [dst]"+&r"(dst),
+ [width]"+&r"(w), [height]"+&r"(h)
+ : [src_stride]"r"((mips_reg)src_stride),
+ [dst_stride]"r"((mips_reg)dst_stride)
+ : "memory"
+ );
+ }
}
static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride,
@@ -481,6 +575,29 @@ static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride,
}
}
+static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride,
+ uint8_t *dst, ptrdiff_t dst_stride,
+ const InterpKernel *x_filters, int x0_q4,
+ int x_step_q4, int w, int h) {
+ int x, y;
+ src -= SUBPEL_TAPS / 2 - 1;
+
+ for (y = 0; y < h; ++y) {
+ int x_q4 = x0_q4;
+ for (x = 0; x < w; ++x) {
+ const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
+ const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
+ int k, sum = 0;
+ for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k];
+ dst[x] = ROUND_POWER_OF_TWO(
+ dst[x] + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1);
+ x_q4 += x_step_q4;
+ }
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
void vpx_convolve8_mmi(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
ptrdiff_t dst_stride, const InterpKernel *filter,
int x0_q4, int32_t x_step_q4, int y0_q4,
@@ -553,6 +670,21 @@ void vpx_convolve8_vert_mmi(const uint8_t *src, ptrdiff_t src_stride,
y_step_q4, w, h);
}
+void vpx_convolve8_avg_horiz_mmi(const uint8_t *src, ptrdiff_t src_stride,
+ uint8_t *dst, ptrdiff_t dst_stride,
+ const InterpKernel *filter, int x0_q4,
+ int32_t x_step_q4, int y0_q4, int y_step_q4,
+ int w, int h) {
+ (void)y0_q4;
+ (void)y_step_q4;
+ if (w & 0x03)
+ convolve_avg_horiz(src, src_stride, dst, dst_stride, filter, x0_q4,
+ x_step_q4, w, h);
+ else
+ convolve_avg_horiz_mmi(src, src_stride, dst, dst_stride, filter, x0_q4,
+ x_step_q4, w, h);
+}
+
void vpx_convolve8_avg_vert_mmi(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const InterpKernel *filter, int x0_q4,
@@ -580,8 +712,5 @@ void vpx_convolve8_avg_mmi(const uint8_t *src, ptrdiff_t src_stride,
vpx_convolve8_mmi(src, src_stride, temp, 64, filter, x0_q4, x_step_q4, y0_q4,
y_step_q4, w, h);
- if (w & 0x03)
- vpx_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, 0, 0, 0, w, h);
- else
- vpx_convolve_avg_mmi(temp, 64, dst, dst_stride, NULL, 0, 0, 0, 0, w, h);
+ vpx_convolve_avg_mmi(temp, 64, dst, dst_stride, NULL, 0, 0, 0, 0, w, h);
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/inv_txfm_vsx.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/inv_txfm_vsx.c
index f095cb0a481..6603b85acba 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/inv_txfm_vsx.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/inv_txfm_vsx.c
@@ -76,6 +76,8 @@ static int16x8_t cospi29_v = { 2404, 2404, 2404, 2404, 2404, 2404, 2404, 2404 };
static int16x8_t cospi30_v = { 1606, 1606, 1606, 1606, 1606, 1606, 1606, 1606 };
static int16x8_t cospi31_v = { 804, 804, 804, 804, 804, 804, 804, 804 };
+static uint8x16_t mask1 = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 };
#define ROUND_SHIFT_INIT \
const int32x4_t shift = vec_sl(vec_splat_s32(1), vec_splat_u32(13)); \
const uint32x4_t shift14 = vec_splat_u32(14);
@@ -107,6 +109,15 @@ static int16x8_t cospi31_v = { 804, 804, 804, 804, 804, 804, 804, 804 };
out1 = vec_sub(step0, step1); \
out1 = vec_perm(out1, out1, mask0);
+#define PACK_STORE(v0, v1) \
+ tmp16_0 = vec_add(vec_perm(d_u0, d_u1, mask1), v0); \
+ tmp16_1 = vec_add(vec_perm(d_u2, d_u3, mask1), v1); \
+ output_v = vec_packsu(tmp16_0, tmp16_1); \
+ \
+ vec_vsx_st(output_v, 0, tmp_dest); \
+ for (i = 0; i < 4; i++) \
+ for (j = 0; j < 4; j++) dest[j * stride + i] = tmp_dest[j * 4 + i];
+
void vpx_idct4x4_16_add_vsx(const tran_low_t *input, uint8_t *dest,
int stride) {
int i, j;
@@ -114,13 +125,10 @@ void vpx_idct4x4_16_add_vsx(const tran_low_t *input, uint8_t *dest,
int16x8_t step0, step1, tmp16_0, tmp16_1, t_out0, t_out1;
uint8x16_t mask0 = { 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF,
0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 };
- uint8x16_t mask1 = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
- 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 };
int16x8_t v0 = load_tran_low(0, input);
int16x8_t v1 = load_tran_low(8 * sizeof(*input), input);
int16x8_t t0 = vec_mergeh(v0, v1);
int16x8_t t1 = vec_mergel(v0, v1);
-
uint8x16_t dest0 = vec_vsx_ld(0, dest);
uint8x16_t dest1 = vec_vsx_ld(stride, dest);
uint8x16_t dest2 = vec_vsx_ld(2 * stride, dest);
@@ -130,6 +138,7 @@ void vpx_idct4x4_16_add_vsx(const tran_low_t *input, uint8_t *dest,
int16x8_t d_u1 = (int16x8_t)vec_mergeh(dest1, zerov);
int16x8_t d_u2 = (int16x8_t)vec_mergeh(dest2, zerov);
int16x8_t d_u3 = (int16x8_t)vec_mergeh(dest3, zerov);
+
uint8x16_t output_v;
uint8_t tmp_dest[16];
ROUND_SHIFT_INIT
@@ -148,13 +157,8 @@ void vpx_idct4x4_16_add_vsx(const tran_low_t *input, uint8_t *dest,
PIXEL_ADD4(v0, t_out0);
PIXEL_ADD4(v1, t_out1);
- tmp16_0 = vec_add(vec_perm(d_u0, d_u1, mask1), v0);
- tmp16_1 = vec_add(vec_perm(d_u2, d_u3, mask1), v1);
- output_v = vec_packsu(tmp16_0, tmp16_1);
- vec_vsx_st(output_v, 0, tmp_dest);
- for (i = 0; i < 4; i++)
- for (j = 0; j < 4; j++) dest[j * stride + i] = tmp_dest[j * 4 + i];
+ PACK_STORE(v0, v1);
}
#define TRANSPOSE8x8(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, \
@@ -1062,3 +1066,67 @@ void vpx_idct32x32_1024_add_vsx(const tran_low_t *input, uint8_t *dest,
ADD_STORE_BLOCK(src2, 16);
ADD_STORE_BLOCK(src3, 24);
}
+
+#define TRANSFORM_COLS \
+ v32_a = vec_add(v32_a, v32_c); \
+ v32_d = vec_sub(v32_d, v32_b); \
+ v32_e = vec_sub(v32_a, v32_d); \
+ v32_e = vec_sra(v32_e, one); \
+ v32_b = vec_sub(v32_e, v32_b); \
+ v32_c = vec_sub(v32_e, v32_c); \
+ v32_a = vec_sub(v32_a, v32_b); \
+ v32_d = vec_add(v32_d, v32_c); \
+ v_a = vec_packs(v32_a, v32_b); \
+ v_c = vec_packs(v32_c, v32_d);
+
+#define TRANSPOSE_WHT \
+ tmp_a = vec_mergeh(v_a, v_c); \
+ tmp_c = vec_mergel(v_a, v_c); \
+ v_a = vec_mergeh(tmp_a, tmp_c); \
+ v_c = vec_mergel(tmp_a, tmp_c);
+
+void vpx_iwht4x4_16_add_vsx(const tran_low_t *input, uint8_t *dest,
+ int stride) {
+ int16x8_t v_a = load_tran_low(0, input);
+ int16x8_t v_c = load_tran_low(8 * sizeof(*input), input);
+ int16x8_t tmp_a, tmp_c;
+ uint16x8_t two = vec_splat_u16(2);
+ uint32x4_t one = vec_splat_u32(1);
+ int16x8_t tmp16_0, tmp16_1;
+ int32x4_t v32_a, v32_c, v32_d, v32_b, v32_e;
+ uint8x16_t dest0 = vec_vsx_ld(0, dest);
+ uint8x16_t dest1 = vec_vsx_ld(stride, dest);
+ uint8x16_t dest2 = vec_vsx_ld(2 * stride, dest);
+ uint8x16_t dest3 = vec_vsx_ld(3 * stride, dest);
+ int16x8_t d_u0 = (int16x8_t)unpack_to_u16_h(dest0);
+ int16x8_t d_u1 = (int16x8_t)unpack_to_u16_h(dest1);
+ int16x8_t d_u2 = (int16x8_t)unpack_to_u16_h(dest2);
+ int16x8_t d_u3 = (int16x8_t)unpack_to_u16_h(dest3);
+ uint8x16_t output_v;
+ uint8_t tmp_dest[16];
+ int i, j;
+
+ v_a = vec_sra(v_a, two);
+ v_c = vec_sra(v_c, two);
+
+ TRANSPOSE_WHT;
+
+ v32_a = vec_unpackh(v_a);
+ v32_c = vec_unpackl(v_a);
+
+ v32_d = vec_unpackh(v_c);
+ v32_b = vec_unpackl(v_c);
+
+ TRANSFORM_COLS;
+
+ TRANSPOSE_WHT;
+
+ v32_a = vec_unpackh(v_a);
+ v32_c = vec_unpackl(v_a);
+ v32_d = vec_unpackh(v_c);
+ v32_b = vec_unpackl(v_c);
+
+ TRANSFORM_COLS;
+
+ PACK_STORE(v_a, v_c);
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/quantize_vsx.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/quantize_vsx.c
new file mode 100644
index 00000000000..3a9092f64a0
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/quantize_vsx.c
@@ -0,0 +1,307 @@
+/*
+ * Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/ppc/types_vsx.h"
+
+// Negate 16-bit integers in a when the corresponding signed 16-bit
+// integer in b is negative.
+static INLINE int16x8_t vec_sign(int16x8_t a, int16x8_t b) {
+ const int16x8_t mask = vec_sra(b, vec_shift_sign_s16);
+ return vec_xor(vec_add(a, mask), mask);
+}
+
+// Sets the value of a 32-bit integers to 1 when the corresponding value in a is
+// negative.
+static INLINE int32x4_t vec_is_neg(int32x4_t a) {
+ return vec_sr(a, vec_shift_sign_s32);
+}
+
+// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit
+// integers, and return the high 16 bits of the intermediate integers.
+// (a * b) >> 16
+static INLINE int16x8_t vec_mulhi(int16x8_t a, int16x8_t b) {
+ // madds does ((A * B) >>15) + C, we need >> 16, so we perform an extra right
+ // shift.
+ return vec_sra(vec_madds(a, b, vec_zeros_s16), vec_ones_u16);
+}
+
+// Quantization function used for 4x4, 8x8 and 16x16 blocks.
+static INLINE int16x8_t quantize_coeff(int16x8_t coeff, int16x8_t coeff_abs,
+ int16x8_t round, int16x8_t quant,
+ int16x8_t quant_shift, bool16x8_t mask) {
+ const int16x8_t rounded = vec_vaddshs(coeff_abs, round);
+ int16x8_t qcoeff = vec_mulhi(rounded, quant);
+ qcoeff = vec_add(qcoeff, rounded);
+ qcoeff = vec_mulhi(qcoeff, quant_shift);
+ qcoeff = vec_sign(qcoeff, coeff);
+ return vec_and(qcoeff, mask);
+}
+
+// Quantization function used for 32x32 blocks.
+static INLINE int16x8_t quantize_coeff_32(int16x8_t coeff, int16x8_t coeff_abs,
+ int16x8_t round, int16x8_t quant,
+ int16x8_t quant_shift,
+ bool16x8_t mask) {
+ const int16x8_t rounded = vec_vaddshs(coeff_abs, round);
+ int16x8_t qcoeff = vec_mulhi(rounded, quant);
+ qcoeff = vec_add(qcoeff, rounded);
+ // 32x32 blocks require an extra multiplication by 2, this compensates for the
+ // extra right shift added in vec_mulhi, as such vec_madds can be used
+ // directly instead of vec_mulhi (((a * b) >> 15) >> 1) << 1 == (a * b >> 15)
+ qcoeff = vec_madds(qcoeff, quant_shift, vec_zeros_s16);
+ qcoeff = vec_sign(qcoeff, coeff);
+ return vec_and(qcoeff, mask);
+}
+
+// DeQuantization function used for 32x32 blocks. Quantized coeff of 32x32
+// blocks are twice as big as for other block sizes. As such, using
+// vec_mladd results in overflow.
+static INLINE int16x8_t dequantize_coeff_32(int16x8_t qcoeff,
+ int16x8_t dequant) {
+ int16x8_t dqcoeff;
+ int32x4_t dqcoeffe = vec_mule(qcoeff, dequant);
+ int32x4_t dqcoeffo = vec_mulo(qcoeff, dequant);
+ // Add 1 if negative to round towards zero because the C uses division.
+ dqcoeffe = vec_add(dqcoeffe, vec_is_neg(dqcoeffe));
+ dqcoeffo = vec_add(dqcoeffo, vec_is_neg(dqcoeffo));
+ dqcoeffe = vec_sra(dqcoeffe, vec_ones_u32);
+ dqcoeffo = vec_sra(dqcoeffo, vec_ones_u32);
+ dqcoeff = vec_pack(dqcoeffe, dqcoeffo);
+ return vec_perm(dqcoeff, dqcoeff, vec_perm_merge);
+}
+
+static INLINE int16x8_t nonzero_scanindex(int16x8_t qcoeff, bool16x8_t mask,
+ const int16_t *iscan_ptr, int index) {
+ int16x8_t scan = vec_vsx_ld(index, iscan_ptr);
+ bool16x8_t zero_coeff = vec_cmpeq(qcoeff, vec_zeros_s16);
+ scan = vec_sub(scan, mask);
+ return vec_andc(scan, zero_coeff);
+}
+
+// Compare packed 16-bit integers across a, and return the maximum value in
+// every element. Returns a vector containing the biggest value across vector a.
+static INLINE int16x8_t vec_max_across(int16x8_t a) {
+ a = vec_max(a, vec_perm(a, a, vec_perm64));
+ a = vec_max(a, vec_perm(a, a, vec_perm32));
+ return vec_max(a, vec_perm(a, a, vec_perm16));
+}
+
+void vpx_quantize_b_vsx(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t *zbin_ptr,
+ const int16_t *round_ptr, const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
+ uint16_t *eob_ptr, const int16_t *scan_ptr,
+ const int16_t *iscan_ptr) {
+ int16x8_t qcoeff0, qcoeff1, dqcoeff0, dqcoeff1, eob;
+ bool16x8_t zero_mask0, zero_mask1;
+
+ // First set of 8 coeff starts with DC + 7 AC
+ int16x8_t zbin = vec_vsx_ld(0, zbin_ptr);
+ int16x8_t round = vec_vsx_ld(0, round_ptr);
+ int16x8_t quant = vec_vsx_ld(0, quant_ptr);
+ int16x8_t dequant = vec_vsx_ld(0, dequant_ptr);
+ int16x8_t quant_shift = vec_vsx_ld(0, quant_shift_ptr);
+
+ int16x8_t coeff0 = vec_vsx_ld(0, coeff_ptr);
+ int16x8_t coeff1 = vec_vsx_ld(16, coeff_ptr);
+
+ int16x8_t coeff0_abs = vec_abs(coeff0);
+ int16x8_t coeff1_abs = vec_abs(coeff1);
+
+ zero_mask0 = vec_cmpge(coeff0_abs, zbin);
+ zbin = vec_splat(zbin, 1);
+ zero_mask1 = vec_cmpge(coeff1_abs, zbin);
+
+ (void)scan_ptr;
+ (void)skip_block;
+ assert(!skip_block);
+
+ qcoeff0 =
+ quantize_coeff(coeff0, coeff0_abs, round, quant, quant_shift, zero_mask0);
+ vec_vsx_st(qcoeff0, 0, qcoeff_ptr);
+ round = vec_splat(round, 1);
+ quant = vec_splat(quant, 1);
+ quant_shift = vec_splat(quant_shift, 1);
+ qcoeff1 =
+ quantize_coeff(coeff1, coeff1_abs, round, quant, quant_shift, zero_mask1);
+ vec_vsx_st(qcoeff1, 16, qcoeff_ptr);
+
+ dqcoeff0 = vec_mladd(qcoeff0, dequant, vec_zeros_s16);
+ vec_vsx_st(dqcoeff0, 0, dqcoeff_ptr);
+ dequant = vec_splat(dequant, 1);
+ dqcoeff1 = vec_mladd(qcoeff1, dequant, vec_zeros_s16);
+ vec_vsx_st(dqcoeff1, 16, dqcoeff_ptr);
+
+ eob = vec_max(nonzero_scanindex(qcoeff0, zero_mask0, iscan_ptr, 0),
+ nonzero_scanindex(qcoeff1, zero_mask1, iscan_ptr, 16));
+
+ if (n_coeffs > 16) {
+ int index = 16;
+ int off0 = 32;
+ int off1 = 48;
+ int off2 = 64;
+ do {
+ int16x8_t coeff2, coeff2_abs, qcoeff2, dqcoeff2, eob2;
+ bool16x8_t zero_mask2;
+ coeff0 = vec_vsx_ld(off0, coeff_ptr);
+ coeff1 = vec_vsx_ld(off1, coeff_ptr);
+ coeff2 = vec_vsx_ld(off2, coeff_ptr);
+ coeff0_abs = vec_abs(coeff0);
+ coeff1_abs = vec_abs(coeff1);
+ coeff2_abs = vec_abs(coeff2);
+ zero_mask0 = vec_cmpge(coeff0_abs, zbin);
+ zero_mask1 = vec_cmpge(coeff1_abs, zbin);
+ zero_mask2 = vec_cmpge(coeff2_abs, zbin);
+ qcoeff0 = quantize_coeff(coeff0, coeff0_abs, round, quant, quant_shift,
+ zero_mask0);
+ qcoeff1 = quantize_coeff(coeff1, coeff1_abs, round, quant, quant_shift,
+ zero_mask1);
+ qcoeff2 = quantize_coeff(coeff2, coeff2_abs, round, quant, quant_shift,
+ zero_mask2);
+ vec_vsx_st(qcoeff0, off0, qcoeff_ptr);
+ vec_vsx_st(qcoeff1, off1, qcoeff_ptr);
+ vec_vsx_st(qcoeff2, off2, qcoeff_ptr);
+
+ dqcoeff0 = vec_mladd(qcoeff0, dequant, vec_zeros_s16);
+ dqcoeff1 = vec_mladd(qcoeff1, dequant, vec_zeros_s16);
+ dqcoeff2 = vec_mladd(qcoeff2, dequant, vec_zeros_s16);
+
+ vec_vsx_st(dqcoeff0, off0, dqcoeff_ptr);
+ vec_vsx_st(dqcoeff1, off1, dqcoeff_ptr);
+ vec_vsx_st(dqcoeff2, off2, dqcoeff_ptr);
+
+ eob =
+ vec_max(eob, nonzero_scanindex(qcoeff0, zero_mask0, iscan_ptr, off0));
+ eob2 = vec_max(nonzero_scanindex(qcoeff1, zero_mask1, iscan_ptr, off1),
+ nonzero_scanindex(qcoeff2, zero_mask2, iscan_ptr, off2));
+ eob = vec_max(eob, eob2);
+
+ index += 24;
+ off0 += 48;
+ off1 += 48;
+ off2 += 48;
+ } while (index < n_coeffs);
+ }
+
+ eob = vec_max_across(eob);
+ *eob_ptr = eob[0];
+}
+
+void vpx_quantize_b_32x32_vsx(
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block,
+ const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr,
+ const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan_ptr, const int16_t *iscan_ptr) {
+ // In stage 1, we quantize 16 coeffs (DC + 15 AC)
+ // In stage 2, we loop 42 times and quantize 24 coeffs per iteration
+ // (32 * 32 - 16) / 24 = 42
+ int num_itr = 42;
+ // Offsets are in bytes, 16 coeffs = 32 bytes
+ int off0 = 32;
+ int off1 = 48;
+ int off2 = 64;
+
+ int16x8_t qcoeff0, qcoeff1, eob;
+ bool16x8_t zero_mask0, zero_mask1;
+
+ int16x8_t zbin = vec_vsx_ld(0, zbin_ptr);
+ int16x8_t round = vec_vsx_ld(0, round_ptr);
+ int16x8_t quant = vec_vsx_ld(0, quant_ptr);
+ int16x8_t dequant = vec_vsx_ld(0, dequant_ptr);
+ int16x8_t quant_shift = vec_vsx_ld(0, quant_shift_ptr);
+
+ int16x8_t coeff0 = vec_vsx_ld(0, coeff_ptr);
+ int16x8_t coeff1 = vec_vsx_ld(16, coeff_ptr);
+
+ int16x8_t coeff0_abs = vec_abs(coeff0);
+ int16x8_t coeff1_abs = vec_abs(coeff1);
+
+ (void)scan_ptr;
+ (void)skip_block;
+ (void)n_coeffs;
+ assert(!skip_block);
+
+ // 32x32 quantization requires that zbin and round be divided by 2
+ zbin = vec_sra(vec_add(zbin, vec_ones_s16), vec_ones_u16);
+ round = vec_sra(vec_add(round, vec_ones_s16), vec_ones_u16);
+
+ zero_mask0 = vec_cmpge(coeff0_abs, zbin);
+ zbin = vec_splat(zbin, 1); // remove DC from zbin
+ zero_mask1 = vec_cmpge(coeff1_abs, zbin);
+
+ qcoeff0 = quantize_coeff_32(coeff0, coeff0_abs, round, quant, quant_shift,
+ zero_mask0);
+ round = vec_splat(round, 1); // remove DC from round
+ quant = vec_splat(quant, 1); // remove DC from quant
+ quant_shift = vec_splat(quant_shift, 1); // remove DC from quant_shift
+ qcoeff1 = quantize_coeff_32(coeff1, coeff1_abs, round, quant, quant_shift,
+ zero_mask1);
+
+ vec_vsx_st(qcoeff0, 0, qcoeff_ptr);
+ vec_vsx_st(qcoeff1, 16, qcoeff_ptr);
+
+ vec_vsx_st(dequantize_coeff_32(qcoeff0, dequant), 0, dqcoeff_ptr);
+ dequant = vec_splat(dequant, 1); // remove DC from dequant
+ vec_vsx_st(dequantize_coeff_32(qcoeff1, dequant), 16, dqcoeff_ptr);
+
+ eob = vec_max(nonzero_scanindex(qcoeff0, zero_mask0, iscan_ptr, 0),
+ nonzero_scanindex(qcoeff1, zero_mask1, iscan_ptr, 16));
+
+ do {
+ int16x8_t coeff2, coeff2_abs, qcoeff2, eob2;
+ bool16x8_t zero_mask2;
+
+ coeff0 = vec_vsx_ld(off0, coeff_ptr);
+ coeff1 = vec_vsx_ld(off1, coeff_ptr);
+ coeff2 = vec_vsx_ld(off2, coeff_ptr);
+
+ coeff0_abs = vec_abs(coeff0);
+ coeff1_abs = vec_abs(coeff1);
+ coeff2_abs = vec_abs(coeff2);
+
+ zero_mask0 = vec_cmpge(coeff0_abs, zbin);
+ zero_mask1 = vec_cmpge(coeff1_abs, zbin);
+ zero_mask2 = vec_cmpge(coeff2_abs, zbin);
+
+ qcoeff0 = quantize_coeff_32(coeff0, coeff0_abs, round, quant, quant_shift,
+ zero_mask0);
+ qcoeff1 = quantize_coeff_32(coeff1, coeff1_abs, round, quant, quant_shift,
+ zero_mask1);
+ qcoeff2 = quantize_coeff_32(coeff2, coeff2_abs, round, quant, quant_shift,
+ zero_mask2);
+
+ vec_vsx_st(qcoeff0, off0, qcoeff_ptr);
+ vec_vsx_st(qcoeff1, off1, qcoeff_ptr);
+ vec_vsx_st(qcoeff2, off2, qcoeff_ptr);
+
+ vec_vsx_st(dequantize_coeff_32(qcoeff0, dequant), off0, dqcoeff_ptr);
+ vec_vsx_st(dequantize_coeff_32(qcoeff1, dequant), off1, dqcoeff_ptr);
+ vec_vsx_st(dequantize_coeff_32(qcoeff2, dequant), off2, dqcoeff_ptr);
+
+ eob = vec_max(eob, nonzero_scanindex(qcoeff0, zero_mask0, iscan_ptr, off0));
+ eob2 = vec_max(nonzero_scanindex(qcoeff1, zero_mask1, iscan_ptr, off1),
+ nonzero_scanindex(qcoeff2, zero_mask2, iscan_ptr, off2));
+ eob = vec_max(eob, eob2);
+
+ // 24 int16_t is 48 bytes
+ off0 += 48;
+ off1 += 48;
+ off2 += 48;
+ num_itr--;
+ } while (num_itr != 0);
+
+ eob = vec_max_across(eob);
+ *eob_ptr = eob[0];
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/types_vsx.h b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/types_vsx.h
index f611d02d2d5..a5d2a225526 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/types_vsx.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/types_vsx.h
@@ -19,6 +19,7 @@ typedef vector signed short int16x8_t;
typedef vector unsigned short uint16x8_t;
typedef vector signed int int32x4_t;
typedef vector unsigned int uint32x4_t;
+typedef vector bool short bool16x8_t;
#ifdef __clang__
static const uint8x16_t xxpermdi0_perm = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
@@ -65,4 +66,24 @@ static const uint8x16_t xxpermdi3_perm = { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D,
#endif
#endif
+static const int16x8_t vec_zeros_s16 = { 0, 0, 0, 0, 0, 0, 0, 0 };
+static const int16x8_t vec_ones_s16 = { 1, 1, 1, 1, 1, 1, 1, 1 };
+static const uint16x8_t vec_ones_u16 = { 1, 1, 1, 1, 1, 1, 1, 1 };
+static const uint32x4_t vec_ones_u32 = { 1, 1, 1, 1 };
+static const uint16x8_t vec_shift_sign_s16 = { 15, 15, 15, 15, 15, 15, 15, 15 };
+static const uint32x4_t vec_shift_sign_s32 = { 31, 31, 31, 31 };
+static const uint8x16_t vec_perm64 = { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D,
+ 0x0E, 0x0F, 0x00, 0x01, 0x02, 0x03,
+ 0x04, 0x05, 0x06, 0x07 };
+static const uint8x16_t vec_perm32 = { 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
+ 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x00, 0x01, 0x02, 0x03 };
+static const uint8x16_t vec_perm16 = { 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0A, 0x0B, 0x0E, 0x0D,
+ 0x0E, 0x0F, 0x00, 0x01 };
+
+static const uint8x16_t vec_perm_merge = { 0x00, 0x01, 0x08, 0x09, 0x02, 0x03,
+ 0x0A, 0x0B, 0x04, 0x05, 0x0C, 0x0D,
+ 0x06, 0x07, 0x0E, 0x0F };
+
#endif // VPX_DSP_PPC_TYPES_VSX_H_
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/variance_vsx.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/variance_vsx.c
index 1efe2f00569..d3f257b63eb 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/variance_vsx.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/variance_vsx.c
@@ -10,10 +10,11 @@
#include <assert.h>
+#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/ppc/types_vsx.h"
-static inline uint8x16_t read4x2(const uint8_t *a, int stride) {
+static INLINE uint8x16_t read4x2(const uint8_t *a, int stride) {
const uint32x4_t a0 = (uint32x4_t)vec_vsx_ld(0, a);
const uint32x4_t a1 = (uint32x4_t)vec_vsx_ld(0, a + stride);
@@ -101,3 +102,174 @@ void vpx_comp_avg_pred_vsx(uint8_t *comp_pred, const uint8_t *pred, int width,
}
}
}
+
+static INLINE void variance_inner_32(const uint8_t *a, const uint8_t *b,
+ int32x4_t *sum_squared, int32x4_t *sum) {
+ int32x4_t s = *sum;
+ int32x4_t ss = *sum_squared;
+
+ const uint8x16_t va0 = vec_vsx_ld(0, a);
+ const uint8x16_t vb0 = vec_vsx_ld(0, b);
+ const uint8x16_t va1 = vec_vsx_ld(16, a);
+ const uint8x16_t vb1 = vec_vsx_ld(16, b);
+
+ const int16x8_t a0 = unpack_to_s16_h(va0);
+ const int16x8_t b0 = unpack_to_s16_h(vb0);
+ const int16x8_t a1 = unpack_to_s16_l(va0);
+ const int16x8_t b1 = unpack_to_s16_l(vb0);
+ const int16x8_t a2 = unpack_to_s16_h(va1);
+ const int16x8_t b2 = unpack_to_s16_h(vb1);
+ const int16x8_t a3 = unpack_to_s16_l(va1);
+ const int16x8_t b3 = unpack_to_s16_l(vb1);
+ const int16x8_t d0 = vec_sub(a0, b0);
+ const int16x8_t d1 = vec_sub(a1, b1);
+ const int16x8_t d2 = vec_sub(a2, b2);
+ const int16x8_t d3 = vec_sub(a3, b3);
+
+ s = vec_sum4s(d0, s);
+ ss = vec_msum(d0, d0, ss);
+ s = vec_sum4s(d1, s);
+ ss = vec_msum(d1, d1, ss);
+ s = vec_sum4s(d2, s);
+ ss = vec_msum(d2, d2, ss);
+ s = vec_sum4s(d3, s);
+ ss = vec_msum(d3, d3, ss);
+ *sum = s;
+ *sum_squared = ss;
+}
+
+static INLINE void variance(const uint8_t *a, int a_stride, const uint8_t *b,
+ int b_stride, int w, int h, uint32_t *sse,
+ int *sum) {
+ int i;
+
+ int32x4_t s = vec_splat_s32(0);
+ int32x4_t ss = vec_splat_s32(0);
+
+ switch (w) {
+ case 4:
+ for (i = 0; i < h / 2; ++i) {
+ const int16x8_t a0 = unpack_to_s16_h(read4x2(a, a_stride));
+ const int16x8_t b0 = unpack_to_s16_h(read4x2(b, b_stride));
+ const int16x8_t d = vec_sub(a0, b0);
+ s = vec_sum4s(d, s);
+ ss = vec_msum(d, d, ss);
+ a += a_stride * 2;
+ b += b_stride * 2;
+ }
+ break;
+ case 8:
+ for (i = 0; i < h; ++i) {
+ const int16x8_t a0 = unpack_to_s16_h(vec_vsx_ld(0, a));
+ const int16x8_t b0 = unpack_to_s16_h(vec_vsx_ld(0, b));
+ const int16x8_t d = vec_sub(a0, b0);
+
+ s = vec_sum4s(d, s);
+ ss = vec_msum(d, d, ss);
+ a += a_stride;
+ b += b_stride;
+ }
+ break;
+ case 16:
+ for (i = 0; i < h; ++i) {
+ const uint8x16_t va = vec_vsx_ld(0, a);
+ const uint8x16_t vb = vec_vsx_ld(0, b);
+ const int16x8_t a0 = unpack_to_s16_h(va);
+ const int16x8_t b0 = unpack_to_s16_h(vb);
+ const int16x8_t a1 = unpack_to_s16_l(va);
+ const int16x8_t b1 = unpack_to_s16_l(vb);
+ const int16x8_t d0 = vec_sub(a0, b0);
+ const int16x8_t d1 = vec_sub(a1, b1);
+
+ s = vec_sum4s(d0, s);
+ ss = vec_msum(d0, d0, ss);
+ s = vec_sum4s(d1, s);
+ ss = vec_msum(d1, d1, ss);
+
+ a += a_stride;
+ b += b_stride;
+ }
+ break;
+ case 32:
+ for (i = 0; i < h; ++i) {
+ variance_inner_32(a, b, &ss, &s);
+ a += a_stride;
+ b += b_stride;
+ }
+ break;
+ case 64:
+ for (i = 0; i < h; ++i) {
+ variance_inner_32(a, b, &ss, &s);
+ variance_inner_32(a + 32, b + 32, &ss, &s);
+
+ a += a_stride;
+ b += b_stride;
+ }
+ break;
+ }
+
+ s = vec_splat(vec_sums(s, vec_splat_s32(0)), 3);
+
+ vec_ste(s, 0, sum);
+
+ ss = vec_splat(vec_sums(ss, vec_splat_s32(0)), 3);
+
+ vec_ste((uint32x4_t)ss, 0, sse);
+}
+
+/* Identical to the variance call except it takes an additional parameter, sum,
+ * and returns that value using pass-by-reference instead of returning
+ * sse - sum^2 / w*h
+ */
+#define GET_VAR(W, H) \
+ void vpx_get##W##x##H##var_vsx(const uint8_t *a, int a_stride, \
+ const uint8_t *b, int b_stride, \
+ uint32_t *sse, int *sum) { \
+ variance(a, a_stride, b, b_stride, W, H, sse, sum); \
+ }
+
+/* Identical to the variance call except it does not calculate the
+ * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in
+ * variable.
+ */
+#define MSE(W, H) \
+ uint32_t vpx_mse##W##x##H##_vsx(const uint8_t *a, int a_stride, \
+ const uint8_t *b, int b_stride, \
+ uint32_t *sse) { \
+ int sum; \
+ variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
+ return *sse; \
+ }
+
+#define VAR(W, H) \
+ uint32_t vpx_variance##W##x##H##_vsx(const uint8_t *a, int a_stride, \
+ const uint8_t *b, int b_stride, \
+ uint32_t *sse) { \
+ int sum; \
+ variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
+ return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \
+ }
+
+#define VARIANCES(W, H) VAR(W, H)
+
+VARIANCES(64, 64)
+VARIANCES(64, 32)
+VARIANCES(32, 64)
+VARIANCES(32, 32)
+VARIANCES(32, 16)
+VARIANCES(16, 32)
+VARIANCES(16, 16)
+VARIANCES(16, 8)
+VARIANCES(8, 16)
+VARIANCES(8, 8)
+VARIANCES(8, 4)
+VARIANCES(4, 8)
+VARIANCES(4, 4)
+
+GET_VAR(16, 16)
+GET_VAR(8, 8)
+
+MSE(16, 16)
+MSE(16, 8)
+MSE(8, 16)
+MSE(8, 8)
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ssim.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ssim.c
index 7a29bd29f9f..ba73eb293a4 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ssim.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ssim.c
@@ -284,7 +284,7 @@ double vpx_get_ssim_metrics(uint8_t *img1, int img1_pitch, uint8_t *img2,
for (i = 0; i < height;
i += 4, img1 += img1_pitch * 4, img2 += img2_pitch * 4) {
for (j = 0; j < width; j += 4, ++c) {
- Ssimv sv = { 0 };
+ Ssimv sv = { 0, 0, 0, 0, 0, 0 };
double ssim;
double ssim2;
double dssim;
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/sum_squares.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/sum_squares.c
index 7c535ac2db6..b80cd588e42 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/sum_squares.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/sum_squares.c
@@ -10,8 +10,7 @@
#include "./vpx_dsp_rtcd.h"
-uint64_t vpx_sum_squares_2d_i16_c(const int16_t *src, int src_stride,
- int size) {
+uint64_t vpx_sum_squares_2d_i16_c(const int16_t *src, int stride, int size) {
int r, c;
uint64_t ss = 0;
@@ -20,7 +19,7 @@ uint64_t vpx_sum_squares_2d_i16_c(const int16_t *src, int src_stride,
const int16_t v = src[c];
ss += v * v;
}
- src += src_stride;
+ src += stride;
}
return ss;
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp.mk b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp.mk
index 16701103498..cb06a476f2a 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp.mk
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp.mk
@@ -286,6 +286,7 @@ DSP_SRCS-$(HAVE_SSE2) += x86/quantize_sse2.c
DSP_SRCS-$(HAVE_SSSE3) += x86/quantize_ssse3.c
DSP_SRCS-$(HAVE_AVX) += x86/quantize_avx.c
DSP_SRCS-$(HAVE_NEON) += arm/quantize_neon.c
+DSP_SRCS-$(HAVE_VSX) += ppc/quantize_vsx.c
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_quantize_intrin_sse2.c
endif
@@ -312,6 +313,7 @@ ifeq ($(CONFIG_ENCODERS),yes)
DSP_SRCS-yes += sad.c
DSP_SRCS-yes += subtract.c
DSP_SRCS-yes += sum_squares.c
+DSP_SRCS-$(HAVE_NEON) += arm/sum_squares_neon.c
DSP_SRCS-$(HAVE_SSE2) += x86/sum_squares_sse2.c
DSP_SRCS-$(HAVE_MSA) += mips/sum_squares_msa.c
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl
index a51761cd3c3..824ae0f43b0 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -363,7 +363,7 @@ add_proto qw/void vpx_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride,
specialize qw/vpx_convolve_copy neon dspr2 msa sse2 vsx/;
add_proto qw/void vpx_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
-specialize qw/vpx_convolve_avg neon dspr2 msa sse2 vsx/;
+specialize qw/vpx_convolve_avg neon dspr2 msa sse2 vsx mmi/;
add_proto qw/void vpx_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
specialize qw/vpx_convolve8 sse2 ssse3 avx2 neon dspr2 msa vsx mmi/;
@@ -378,7 +378,7 @@ add_proto qw/void vpx_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride,
specialize qw/vpx_convolve8_avg sse2 ssse3 avx2 neon dspr2 msa vsx mmi/;
add_proto qw/void vpx_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
-specialize qw/vpx_convolve8_avg_horiz sse2 ssse3 avx2 neon dspr2 msa vsx/;
+specialize qw/vpx_convolve8_avg_horiz sse2 ssse3 avx2 neon dspr2 msa vsx mmi/;
add_proto qw/void vpx_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
specialize qw/vpx_convolve8_avg_vert sse2 ssse3 avx2 neon dspr2 msa vsx mmi/;
@@ -626,7 +626,7 @@ if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") {
specialize qw/vpx_idct32x32_135_add neon sse2 ssse3/;
specialize qw/vpx_idct32x32_34_add neon sse2 ssse3/;
specialize qw/vpx_idct32x32_1_add neon sse2/;
- specialize qw/vpx_iwht4x4_16_add sse2/;
+ specialize qw/vpx_iwht4x4_16_add sse2 vsx/;
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") ne "yes") {
# Note that these specializations are appended to the above ones.
@@ -699,10 +699,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
#
if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
add_proto qw/void vpx_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vpx_quantize_b neon sse2 ssse3 avx/;
+ specialize qw/vpx_quantize_b neon sse2 ssse3 avx vsx/;
add_proto qw/void vpx_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
- specialize qw/vpx_quantize_b_32x32 neon ssse3 avx/;
+ specialize qw/vpx_quantize_b_32x32 neon ssse3 avx vsx/;
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vpx_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
@@ -922,7 +922,7 @@ add_proto qw/void vpx_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const
specialize qw/vpx_sad4x4x4d neon msa sse2 mmi/;
add_proto qw/uint64_t vpx_sum_squares_2d_i16/, "const int16_t *src, int stride, int size";
-specialize qw/vpx_sum_squares_2d_i16 sse2 msa/;
+specialize qw/vpx_sum_squares_2d_i16 neon sse2 msa/;
#
# Structured Similarity (SSIM)
@@ -1082,64 +1082,64 @@ if (vpx_config("CONFIG_ENCODERS") eq "yes" || vpx_config("CONFIG_POSTPROC") eq "
# Variance
#
add_proto qw/unsigned int vpx_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance64x64 sse2 avx2 neon msa mmi/;
+ specialize qw/vpx_variance64x64 sse2 avx2 neon msa mmi vsx/;
add_proto qw/unsigned int vpx_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance64x32 sse2 avx2 neon msa mmi/;
+ specialize qw/vpx_variance64x32 sse2 avx2 neon msa mmi vsx/;
add_proto qw/unsigned int vpx_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance32x64 sse2 neon msa mmi/;
+ specialize qw/vpx_variance32x64 sse2 avx2 neon msa mmi vsx/;
add_proto qw/unsigned int vpx_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance32x32 sse2 avx2 neon msa mmi/;
+ specialize qw/vpx_variance32x32 sse2 avx2 neon msa mmi vsx/;
add_proto qw/unsigned int vpx_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance32x16 sse2 avx2 neon msa mmi/;
+ specialize qw/vpx_variance32x16 sse2 avx2 neon msa mmi vsx/;
add_proto qw/unsigned int vpx_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance16x32 sse2 neon msa mmi/;
+ specialize qw/vpx_variance16x32 sse2 avx2 neon msa mmi vsx/;
add_proto qw/unsigned int vpx_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance16x16 sse2 avx2 neon msa mmi/;
+ specialize qw/vpx_variance16x16 sse2 avx2 neon msa mmi vsx/;
add_proto qw/unsigned int vpx_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance16x8 sse2 neon msa mmi/;
+ specialize qw/vpx_variance16x8 sse2 avx2 neon msa mmi vsx/;
add_proto qw/unsigned int vpx_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance8x16 sse2 neon msa mmi/;
+ specialize qw/vpx_variance8x16 sse2 neon msa mmi vsx/;
add_proto qw/unsigned int vpx_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance8x8 sse2 neon msa mmi/;
+ specialize qw/vpx_variance8x8 sse2 neon msa mmi vsx/;
add_proto qw/unsigned int vpx_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance8x4 sse2 neon msa mmi/;
+ specialize qw/vpx_variance8x4 sse2 neon msa mmi vsx/;
add_proto qw/unsigned int vpx_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance4x8 sse2 neon msa mmi/;
+ specialize qw/vpx_variance4x8 sse2 neon msa mmi vsx/;
add_proto qw/unsigned int vpx_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance4x4 sse2 neon msa mmi/;
+ specialize qw/vpx_variance4x4 sse2 neon msa mmi vsx/;
#
# Specialty Variance
#
add_proto qw/void vpx_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- specialize qw/vpx_get16x16var sse2 avx2 neon msa/;
+ specialize qw/vpx_get16x16var sse2 avx2 neon msa vsx/;
add_proto qw/void vpx_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- specialize qw/vpx_get8x8var sse2 neon msa/;
+ specialize qw/vpx_get8x8var sse2 neon msa vsx/;
add_proto qw/unsigned int vpx_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vpx_mse16x16 sse2 avx2 neon msa mmi/;
+ specialize qw/vpx_mse16x16 sse2 avx2 neon msa mmi vsx/;
add_proto qw/unsigned int vpx_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vpx_mse16x8 sse2 msa mmi/;
+ specialize qw/vpx_mse16x8 sse2 avx2 msa mmi vsx/;
add_proto qw/unsigned int vpx_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vpx_mse8x16 sse2 msa mmi/;
+ specialize qw/vpx_mse8x16 sse2 msa mmi vsx/;
add_proto qw/unsigned int vpx_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vpx_mse8x8 sse2 msa mmi/;
+ specialize qw/vpx_mse8x8 sse2 msa mmi vsx/;
add_proto qw/unsigned int vpx_get_mb_ss/, "const int16_t *";
specialize qw/vpx_get_mb_ss sse2 msa vsx/;
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/mem_sse2.h b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/mem_sse2.h
index 2ce738fb770..419f1786309 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/mem_sse2.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/mem_sse2.h
@@ -15,6 +15,11 @@
#include "./vpx_config.h"
+static INLINE __m128i loadh_epi64(const __m128i s, const void *const src) {
+ return _mm_castps_si128(
+ _mm_loadh_pi(_mm_castsi128_ps(s), (const __m64 *)src));
+}
+
static INLINE void load_8bit_4x4(const uint8_t *const s, const ptrdiff_t stride,
__m128i *const d) {
d[0] = _mm_cvtsi32_si128(*(const int *)(s + 0 * stride));
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/sum_squares_sse2.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/sum_squares_sse2.c
index 026d0ca2f27..9eaf6ee1b8f 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/sum_squares_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/sum_squares_sse2.c
@@ -10,120 +10,96 @@
#include <assert.h>
#include <emmintrin.h>
-#include <stdio.h>
#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/x86/mem_sse2.h"
-static uint64_t vpx_sum_squares_2d_i16_4x4_sse2(const int16_t *src,
- int stride) {
- const __m128i v_val_0_w =
- _mm_loadl_epi64((const __m128i *)(src + 0 * stride));
- const __m128i v_val_1_w =
- _mm_loadl_epi64((const __m128i *)(src + 1 * stride));
- const __m128i v_val_2_w =
- _mm_loadl_epi64((const __m128i *)(src + 2 * stride));
- const __m128i v_val_3_w =
- _mm_loadl_epi64((const __m128i *)(src + 3 * stride));
-
- const __m128i v_sq_0_d = _mm_madd_epi16(v_val_0_w, v_val_0_w);
- const __m128i v_sq_1_d = _mm_madd_epi16(v_val_1_w, v_val_1_w);
- const __m128i v_sq_2_d = _mm_madd_epi16(v_val_2_w, v_val_2_w);
- const __m128i v_sq_3_d = _mm_madd_epi16(v_val_3_w, v_val_3_w);
-
- const __m128i v_sum_01_d = _mm_add_epi32(v_sq_0_d, v_sq_1_d);
- const __m128i v_sum_23_d = _mm_add_epi32(v_sq_2_d, v_sq_3_d);
- const __m128i v_sum_0123_d = _mm_add_epi32(v_sum_01_d, v_sum_23_d);
-
- const __m128i v_sum_d =
- _mm_add_epi32(v_sum_0123_d, _mm_srli_epi64(v_sum_0123_d, 32));
-
- return (uint64_t)_mm_cvtsi128_si32(v_sum_d);
-}
-
-// TODO(jingning): Evaluate the performance impact here.
-#ifdef __GNUC__
-// This prevents GCC/Clang from inlining this function into
-// vpx_sum_squares_2d_i16_sse2, which in turn saves some stack
-// maintenance instructions in the common case of 4x4.
-__attribute__((noinline))
-#endif
-static uint64_t
-vpx_sum_squares_2d_i16_nxn_sse2(const int16_t *src, int stride, int size) {
- int r, c;
- const __m128i v_zext_mask_q = _mm_set_epi32(0, 0xffffffff, 0, 0xffffffff);
- __m128i v_acc_q = _mm_setzero_si128();
-
- for (r = 0; r < size; r += 8) {
- __m128i v_acc_d = _mm_setzero_si128();
-
- for (c = 0; c < size; c += 8) {
- const int16_t *b = src + c;
- const __m128i v_val_0_w =
- _mm_load_si128((const __m128i *)(b + 0 * stride));
- const __m128i v_val_1_w =
- _mm_load_si128((const __m128i *)(b + 1 * stride));
- const __m128i v_val_2_w =
- _mm_load_si128((const __m128i *)(b + 2 * stride));
- const __m128i v_val_3_w =
- _mm_load_si128((const __m128i *)(b + 3 * stride));
- const __m128i v_val_4_w =
- _mm_load_si128((const __m128i *)(b + 4 * stride));
- const __m128i v_val_5_w =
- _mm_load_si128((const __m128i *)(b + 5 * stride));
- const __m128i v_val_6_w =
- _mm_load_si128((const __m128i *)(b + 6 * stride));
- const __m128i v_val_7_w =
- _mm_load_si128((const __m128i *)(b + 7 * stride));
-
- const __m128i v_sq_0_d = _mm_madd_epi16(v_val_0_w, v_val_0_w);
- const __m128i v_sq_1_d = _mm_madd_epi16(v_val_1_w, v_val_1_w);
- const __m128i v_sq_2_d = _mm_madd_epi16(v_val_2_w, v_val_2_w);
- const __m128i v_sq_3_d = _mm_madd_epi16(v_val_3_w, v_val_3_w);
- const __m128i v_sq_4_d = _mm_madd_epi16(v_val_4_w, v_val_4_w);
- const __m128i v_sq_5_d = _mm_madd_epi16(v_val_5_w, v_val_5_w);
- const __m128i v_sq_6_d = _mm_madd_epi16(v_val_6_w, v_val_6_w);
- const __m128i v_sq_7_d = _mm_madd_epi16(v_val_7_w, v_val_7_w);
-
- const __m128i v_sum_01_d = _mm_add_epi32(v_sq_0_d, v_sq_1_d);
- const __m128i v_sum_23_d = _mm_add_epi32(v_sq_2_d, v_sq_3_d);
- const __m128i v_sum_45_d = _mm_add_epi32(v_sq_4_d, v_sq_5_d);
- const __m128i v_sum_67_d = _mm_add_epi32(v_sq_6_d, v_sq_7_d);
-
- const __m128i v_sum_0123_d = _mm_add_epi32(v_sum_01_d, v_sum_23_d);
- const __m128i v_sum_4567_d = _mm_add_epi32(v_sum_45_d, v_sum_67_d);
-
- v_acc_d = _mm_add_epi32(v_acc_d, v_sum_0123_d);
- v_acc_d = _mm_add_epi32(v_acc_d, v_sum_4567_d);
- }
-
- v_acc_q = _mm_add_epi64(v_acc_q, _mm_and_si128(v_acc_d, v_zext_mask_q));
- v_acc_q = _mm_add_epi64(v_acc_q, _mm_srli_epi64(v_acc_d, 32));
+uint64_t vpx_sum_squares_2d_i16_sse2(const int16_t *src, int stride, int size) {
+ // Over 75% of all calls are with size == 4.
+ if (size == 4) {
+ __m128i s[2], sq[2], ss;
+
+ s[0] = _mm_loadl_epi64((const __m128i *)(src + 0 * stride));
+ s[0] = loadh_epi64(s[0], src + 1 * stride);
+ s[1] = _mm_loadl_epi64((const __m128i *)(src + 2 * stride));
+ s[1] = loadh_epi64(s[1], src + 3 * stride);
+ sq[0] = _mm_madd_epi16(s[0], s[0]);
+ sq[1] = _mm_madd_epi16(s[1], s[1]);
+ sq[0] = _mm_add_epi32(sq[0], sq[1]);
+ ss = _mm_add_epi32(sq[0], _mm_srli_si128(sq[0], 8));
+ ss = _mm_add_epi32(ss, _mm_srli_epi64(ss, 32));
+
+ return (uint64_t)_mm_cvtsi128_si32(ss);
+ } else {
+ // Generic case
+ int r = size;
+ const __m128i v_zext_mask_q = _mm_set_epi32(0, 0xffffffff, 0, 0xffffffff);
+ __m128i v_acc_q = _mm_setzero_si128();
- src += 8 * stride;
- }
+ assert(size % 8 == 0);
- v_acc_q = _mm_add_epi64(v_acc_q, _mm_srli_si128(v_acc_q, 8));
+ do {
+ int c = 0;
+ __m128i v_acc_d = _mm_setzero_si128();
+
+ do {
+ const int16_t *const b = src + c;
+ const __m128i v_val_0_w =
+ _mm_load_si128((const __m128i *)(b + 0 * stride));
+ const __m128i v_val_1_w =
+ _mm_load_si128((const __m128i *)(b + 1 * stride));
+ const __m128i v_val_2_w =
+ _mm_load_si128((const __m128i *)(b + 2 * stride));
+ const __m128i v_val_3_w =
+ _mm_load_si128((const __m128i *)(b + 3 * stride));
+ const __m128i v_val_4_w =
+ _mm_load_si128((const __m128i *)(b + 4 * stride));
+ const __m128i v_val_5_w =
+ _mm_load_si128((const __m128i *)(b + 5 * stride));
+ const __m128i v_val_6_w =
+ _mm_load_si128((const __m128i *)(b + 6 * stride));
+ const __m128i v_val_7_w =
+ _mm_load_si128((const __m128i *)(b + 7 * stride));
+
+ const __m128i v_sq_0_d = _mm_madd_epi16(v_val_0_w, v_val_0_w);
+ const __m128i v_sq_1_d = _mm_madd_epi16(v_val_1_w, v_val_1_w);
+ const __m128i v_sq_2_d = _mm_madd_epi16(v_val_2_w, v_val_2_w);
+ const __m128i v_sq_3_d = _mm_madd_epi16(v_val_3_w, v_val_3_w);
+ const __m128i v_sq_4_d = _mm_madd_epi16(v_val_4_w, v_val_4_w);
+ const __m128i v_sq_5_d = _mm_madd_epi16(v_val_5_w, v_val_5_w);
+ const __m128i v_sq_6_d = _mm_madd_epi16(v_val_6_w, v_val_6_w);
+ const __m128i v_sq_7_d = _mm_madd_epi16(v_val_7_w, v_val_7_w);
+
+ const __m128i v_sum_01_d = _mm_add_epi32(v_sq_0_d, v_sq_1_d);
+ const __m128i v_sum_23_d = _mm_add_epi32(v_sq_2_d, v_sq_3_d);
+ const __m128i v_sum_45_d = _mm_add_epi32(v_sq_4_d, v_sq_5_d);
+ const __m128i v_sum_67_d = _mm_add_epi32(v_sq_6_d, v_sq_7_d);
+
+ const __m128i v_sum_0123_d = _mm_add_epi32(v_sum_01_d, v_sum_23_d);
+ const __m128i v_sum_4567_d = _mm_add_epi32(v_sum_45_d, v_sum_67_d);
+
+ v_acc_d = _mm_add_epi32(v_acc_d, v_sum_0123_d);
+ v_acc_d = _mm_add_epi32(v_acc_d, v_sum_4567_d);
+ c += 8;
+ } while (c < size);
+
+ v_acc_q = _mm_add_epi64(v_acc_q, _mm_and_si128(v_acc_d, v_zext_mask_q));
+ v_acc_q = _mm_add_epi64(v_acc_q, _mm_srli_epi64(v_acc_d, 32));
+
+ src += 8 * stride;
+ r -= 8;
+ } while (r);
+
+ v_acc_q = _mm_add_epi64(v_acc_q, _mm_srli_si128(v_acc_q, 8));
#if ARCH_X86_64
- return (uint64_t)_mm_cvtsi128_si64(v_acc_q);
+ return (uint64_t)_mm_cvtsi128_si64(v_acc_q);
#else
- {
- uint64_t tmp;
- _mm_storel_epi64((__m128i *)&tmp, v_acc_q);
- return tmp;
- }
+ {
+ uint64_t tmp;
+ _mm_storel_epi64((__m128i *)&tmp, v_acc_q);
+ return tmp;
+ }
#endif
-}
-
-uint64_t vpx_sum_squares_2d_i16_sse2(const int16_t *src, int stride, int size) {
- // 4 elements per row only requires half an XMM register, so this
- // must be a special case, but also note that over 75% of all calls
- // are with size == 4, so it is also the common case.
- if (size == 4) {
- return vpx_sum_squares_2d_i16_4x4_sse2(src, stride);
- } else {
- // Generic case
- assert(size % 8 == 0);
- return vpx_sum_squares_2d_i16_nxn_sse2(src, stride, size);
}
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_avx2.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_avx2.c
index d15a89c746b..d938b81ea2c 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_avx2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_avx2.c
@@ -38,130 +38,140 @@ DECLARE_ALIGNED(32, static const int8_t, adjacent_sub_avx2[32]) = {
};
/* clang-format on */
-void vpx_get16x16var_avx2(const unsigned char *src_ptr, int source_stride,
- const unsigned char *ref_ptr, int recon_stride,
- unsigned int *sse, int *sum) {
- unsigned int i, src_2strides, ref_2strides;
- __m256i sum_reg = _mm256_setzero_si256();
- __m256i sse_reg = _mm256_setzero_si256();
- // process two 16 byte locations in a 256 bit register
- src_2strides = source_stride << 1;
- ref_2strides = recon_stride << 1;
- for (i = 0; i < 8; ++i) {
- // convert up values in 128 bit registers across lanes
- const __m256i src0 =
- _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const *)(src_ptr)));
- const __m256i src1 = _mm256_cvtepu8_epi16(
- _mm_loadu_si128((__m128i const *)(src_ptr + source_stride)));
- const __m256i ref0 =
- _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const *)(ref_ptr)));
- const __m256i ref1 = _mm256_cvtepu8_epi16(
- _mm_loadu_si128((__m128i const *)(ref_ptr + recon_stride)));
- const __m256i diff0 = _mm256_sub_epi16(src0, ref0);
- const __m256i diff1 = _mm256_sub_epi16(src1, ref1);
- const __m256i madd0 = _mm256_madd_epi16(diff0, diff0);
- const __m256i madd1 = _mm256_madd_epi16(diff1, diff1);
-
- // add to the running totals
- sum_reg = _mm256_add_epi16(sum_reg, _mm256_add_epi16(diff0, diff1));
- sse_reg = _mm256_add_epi32(sse_reg, _mm256_add_epi32(madd0, madd1));
-
- src_ptr += src_2strides;
- ref_ptr += ref_2strides;
- }
- {
- // extract the low lane and add it to the high lane
- const __m128i sum_reg_128 = _mm_add_epi16(
- _mm256_castsi256_si128(sum_reg), _mm256_extractf128_si256(sum_reg, 1));
- const __m128i sse_reg_128 = _mm_add_epi32(
- _mm256_castsi256_si128(sse_reg), _mm256_extractf128_si256(sse_reg, 1));
-
- // sum upper and lower 64 bits together and convert up to 32 bit values
- const __m128i sum_reg_64 =
- _mm_add_epi16(sum_reg_128, _mm_srli_si128(sum_reg_128, 8));
- const __m128i sum_int32 = _mm_cvtepi16_epi32(sum_reg_64);
-
- // unpack sse and sum registers and add
- const __m128i sse_sum_lo = _mm_unpacklo_epi32(sse_reg_128, sum_int32);
- const __m128i sse_sum_hi = _mm_unpackhi_epi32(sse_reg_128, sum_int32);
- const __m128i sse_sum = _mm_add_epi32(sse_sum_lo, sse_sum_hi);
-
- // perform the final summation and extract the results
- const __m128i res = _mm_add_epi32(sse_sum, _mm_srli_si128(sse_sum, 8));
- *((int *)sse) = _mm_cvtsi128_si32(res);
- *((int *)sum) = _mm_extract_epi32(res, 1);
+static INLINE void variance_kernel_avx2(const __m256i src, const __m256i ref,
+ __m256i *const sse,
+ __m256i *const sum) {
+ const __m256i adj_sub = _mm256_load_si256((__m256i const *)adjacent_sub_avx2);
+
+ // unpack into pairs of source and reference values
+ const __m256i src_ref0 = _mm256_unpacklo_epi8(src, ref);
+ const __m256i src_ref1 = _mm256_unpackhi_epi8(src, ref);
+
+ // subtract adjacent elements using src*1 + ref*-1
+ const __m256i diff0 = _mm256_maddubs_epi16(src_ref0, adj_sub);
+ const __m256i diff1 = _mm256_maddubs_epi16(src_ref1, adj_sub);
+ const __m256i madd0 = _mm256_madd_epi16(diff0, diff0);
+ const __m256i madd1 = _mm256_madd_epi16(diff1, diff1);
+
+ // add to the running totals
+ *sum = _mm256_add_epi16(*sum, _mm256_add_epi16(diff0, diff1));
+ *sse = _mm256_add_epi32(*sse, _mm256_add_epi32(madd0, madd1));
+}
+
+static INLINE void variance_final_from_32bit_sum_avx2(__m256i vsse,
+ __m128i vsum,
+ unsigned int *const sse,
+ int *const sum) {
+ // extract the low lane and add it to the high lane
+ const __m128i sse_reg_128 = _mm_add_epi32(_mm256_castsi256_si128(vsse),
+ _mm256_extractf128_si256(vsse, 1));
+
+ // unpack sse and sum registers and add
+ const __m128i sse_sum_lo = _mm_unpacklo_epi32(sse_reg_128, vsum);
+ const __m128i sse_sum_hi = _mm_unpackhi_epi32(sse_reg_128, vsum);
+ const __m128i sse_sum = _mm_add_epi32(sse_sum_lo, sse_sum_hi);
+
+ // perform the final summation and extract the results
+ const __m128i res = _mm_add_epi32(sse_sum, _mm_srli_si128(sse_sum, 8));
+ *((int *)sse) = _mm_cvtsi128_si32(res);
+ *((int *)sum) = _mm_extract_epi32(res, 1);
+}
+
+static INLINE void variance_final_from_16bit_sum_avx2(__m256i vsse,
+ __m256i vsum,
+ unsigned int *const sse,
+ int *const sum) {
+ // extract the low lane and add it to the high lane
+ const __m128i sum_reg_128 = _mm_add_epi16(_mm256_castsi256_si128(vsum),
+ _mm256_extractf128_si256(vsum, 1));
+ const __m128i sum_reg_64 =
+ _mm_add_epi16(sum_reg_128, _mm_srli_si128(sum_reg_128, 8));
+ const __m128i sum_int32 = _mm_cvtepi16_epi32(sum_reg_64);
+
+ variance_final_from_32bit_sum_avx2(vsse, sum_int32, sse, sum);
+}
+
+static INLINE __m256i sum_to_32bit_avx2(const __m256i sum) {
+ const __m256i sum_lo = _mm256_cvtepi16_epi32(_mm256_castsi256_si128(sum));
+ const __m256i sum_hi =
+ _mm256_cvtepi16_epi32(_mm256_extractf128_si256(sum, 1));
+ return _mm256_add_epi32(sum_lo, sum_hi);
+}
+
+static INLINE void variance16_kernel_avx2(
+ const uint8_t *const src, const int src_stride, const uint8_t *const ref,
+ const int ref_stride, __m256i *const sse, __m256i *const sum) {
+ const __m128i s0 = _mm_loadu_si128((__m128i const *)(src + 0 * src_stride));
+ const __m128i s1 = _mm_loadu_si128((__m128i const *)(src + 1 * src_stride));
+ const __m128i r0 = _mm_loadu_si128((__m128i const *)(ref + 0 * ref_stride));
+ const __m128i r1 = _mm_loadu_si128((__m128i const *)(ref + 1 * ref_stride));
+ const __m256i s = _mm256_inserti128_si256(_mm256_castsi128_si256(s0), s1, 1);
+ const __m256i r = _mm256_inserti128_si256(_mm256_castsi128_si256(r0), r1, 1);
+ variance_kernel_avx2(s, r, sse, sum);
+}
+
+static INLINE void variance32_kernel_avx2(const uint8_t *const src,
+ const uint8_t *const ref,
+ __m256i *const sse,
+ __m256i *const sum) {
+ const __m256i s = _mm256_loadu_si256((__m256i const *)(src));
+ const __m256i r = _mm256_loadu_si256((__m256i const *)(ref));
+ variance_kernel_avx2(s, r, sse, sum);
+}
+
+static INLINE void variance16_avx2(const uint8_t *src, const int src_stride,
+ const uint8_t *ref, const int ref_stride,
+ const int h, __m256i *const vsse,
+ __m256i *const vsum) {
+ int i;
+ *vsum = _mm256_setzero_si256();
+ *vsse = _mm256_setzero_si256();
+
+ for (i = 0; i < h; i += 2) {
+ variance16_kernel_avx2(src, src_stride, ref, ref_stride, vsse, vsum);
+ src += 2 * src_stride;
+ ref += 2 * ref_stride;
}
}
-static void get32x16var_avx2(const unsigned char *src_ptr, int source_stride,
- const unsigned char *ref_ptr, int recon_stride,
- unsigned int *sse, int *sum) {
- unsigned int i, src_2strides, ref_2strides;
- const __m256i adj_sub = _mm256_load_si256((__m256i const *)adjacent_sub_avx2);
- __m256i sum_reg = _mm256_setzero_si256();
- __m256i sse_reg = _mm256_setzero_si256();
+static INLINE void variance32_avx2(const uint8_t *src, const int src_stride,
+ const uint8_t *ref, const int ref_stride,
+ const int h, __m256i *const vsse,
+ __m256i *const vsum) {
+ int i;
+ *vsum = _mm256_setzero_si256();
+ *vsse = _mm256_setzero_si256();
- // process 64 elements in an iteration
- src_2strides = source_stride << 1;
- ref_2strides = recon_stride << 1;
- for (i = 0; i < 8; i++) {
- const __m256i src0 = _mm256_loadu_si256((__m256i const *)(src_ptr));
- const __m256i src1 =
- _mm256_loadu_si256((__m256i const *)(src_ptr + source_stride));
- const __m256i ref0 = _mm256_loadu_si256((__m256i const *)(ref_ptr));
- const __m256i ref1 =
- _mm256_loadu_si256((__m256i const *)(ref_ptr + recon_stride));
-
- // unpack into pairs of source and reference values
- const __m256i src_ref0 = _mm256_unpacklo_epi8(src0, ref0);
- const __m256i src_ref1 = _mm256_unpackhi_epi8(src0, ref0);
- const __m256i src_ref2 = _mm256_unpacklo_epi8(src1, ref1);
- const __m256i src_ref3 = _mm256_unpackhi_epi8(src1, ref1);
-
- // subtract adjacent elements using src*1 + ref*-1
- const __m256i diff0 = _mm256_maddubs_epi16(src_ref0, adj_sub);
- const __m256i diff1 = _mm256_maddubs_epi16(src_ref1, adj_sub);
- const __m256i diff2 = _mm256_maddubs_epi16(src_ref2, adj_sub);
- const __m256i diff3 = _mm256_maddubs_epi16(src_ref3, adj_sub);
- const __m256i madd0 = _mm256_madd_epi16(diff0, diff0);
- const __m256i madd1 = _mm256_madd_epi16(diff1, diff1);
- const __m256i madd2 = _mm256_madd_epi16(diff2, diff2);
- const __m256i madd3 = _mm256_madd_epi16(diff3, diff3);
-
- // add to the running totals
- sum_reg = _mm256_add_epi16(sum_reg, _mm256_add_epi16(diff0, diff1));
- sum_reg = _mm256_add_epi16(sum_reg, _mm256_add_epi16(diff2, diff3));
- sse_reg = _mm256_add_epi32(sse_reg, _mm256_add_epi32(madd0, madd1));
- sse_reg = _mm256_add_epi32(sse_reg, _mm256_add_epi32(madd2, madd3));
-
- src_ptr += src_2strides;
- ref_ptr += ref_2strides;
+ for (i = 0; i < h; i++) {
+ variance32_kernel_avx2(src, ref, vsse, vsum);
+ src += src_stride;
+ ref += ref_stride;
}
+}
+
+static INLINE void variance64_avx2(const uint8_t *src, const int src_stride,
+ const uint8_t *ref, const int ref_stride,
+ const int h, __m256i *const vsse,
+ __m256i *const vsum) {
+ int i;
+ *vsum = _mm256_setzero_si256();
- {
- // extract the low lane and add it to the high lane
- const __m128i sum_reg_128 = _mm_add_epi16(
- _mm256_castsi256_si128(sum_reg), _mm256_extractf128_si256(sum_reg, 1));
- const __m128i sse_reg_128 = _mm_add_epi32(
- _mm256_castsi256_si128(sse_reg), _mm256_extractf128_si256(sse_reg, 1));
-
- // sum upper and lower 64 bits together and convert up to 32 bit values
- const __m128i sum_reg_64 =
- _mm_add_epi16(sum_reg_128, _mm_srli_si128(sum_reg_128, 8));
- const __m128i sum_int32 = _mm_cvtepi16_epi32(sum_reg_64);
-
- // unpack sse and sum registers and add
- const __m128i sse_sum_lo = _mm_unpacklo_epi32(sse_reg_128, sum_int32);
- const __m128i sse_sum_hi = _mm_unpackhi_epi32(sse_reg_128, sum_int32);
- const __m128i sse_sum = _mm_add_epi32(sse_sum_lo, sse_sum_hi);
-
- // perform the final summation and extract the results
- const __m128i res = _mm_add_epi32(sse_sum, _mm_srli_si128(sse_sum, 8));
- *((int *)sse) = _mm_cvtsi128_si32(res);
- *((int *)sum) = _mm_extract_epi32(res, 1);
+ for (i = 0; i < h; i++) {
+ variance32_kernel_avx2(src + 0, ref + 0, vsse, vsum);
+ variance32_kernel_avx2(src + 32, ref + 32, vsse, vsum);
+ src += src_stride;
+ ref += ref_stride;
}
}
+void vpx_get16x16var_avx2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride, unsigned int *sse,
+ int *sum) {
+ __m256i vsse, vsum;
+ variance16_avx2(src, src_stride, ref, ref_stride, 16, &vsse, &vsum);
+ variance_final_from_16bit_sum_avx2(vsse, vsum, sse, sum);
+}
+
#define FILTER_SRC(filter) \
/* filter the source */ \
exp_src_lo = _mm256_maddubs_epi16(exp_src_lo, filter); \
@@ -593,50 +603,43 @@ typedef void (*get_var_avx2)(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride,
unsigned int *sse, int *sum);
-static void variance_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride, int w, int h,
- unsigned int *sse, int *sum, get_var_avx2 var_fn,
- int block_size) {
- int i, j;
-
- *sse = 0;
- *sum = 0;
-
- for (i = 0; i < h; i += 16) {
- for (j = 0; j < w; j += block_size) {
- unsigned int sse0;
- int sum0;
- var_fn(&src[src_stride * i + j], src_stride, &ref[ref_stride * i + j],
- ref_stride, &sse0, &sum0);
- *sse += sse0;
- *sum += sum0;
- }
- }
+unsigned int vpx_variance16x8_avx2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ __m256i vsse, vsum;
+ variance16_avx2(src, src_stride, ref, ref_stride, 8, &vsse, &vsum);
+ variance_final_from_16bit_sum_avx2(vsse, vsum, sse, &sum);
+ return *sse - (uint32_t)(((int64_t)sum * sum) >> 7);
}
unsigned int vpx_variance16x16_avx2(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride,
unsigned int *sse) {
int sum;
- variance_avx2(src, src_stride, ref, ref_stride, 16, 16, sse, &sum,
- vpx_get16x16var_avx2, 16);
+ __m256i vsse, vsum;
+ variance16_avx2(src, src_stride, ref, ref_stride, 16, &vsse, &vsum);
+ variance_final_from_16bit_sum_avx2(vsse, vsum, sse, &sum);
return *sse - (uint32_t)(((int64_t)sum * sum) >> 8);
}
-unsigned int vpx_mse16x16_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse) {
+unsigned int vpx_variance16x32_avx2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse) {
int sum;
- vpx_get16x16var_avx2(src, src_stride, ref, ref_stride, sse, &sum);
- return *sse;
+ __m256i vsse, vsum;
+ variance16_avx2(src, src_stride, ref, ref_stride, 32, &vsse, &vsum);
+ variance_final_from_16bit_sum_avx2(vsse, vsum, sse, &sum);
+ return *sse - (uint32_t)(((int64_t)sum * sum) >> 9);
}
unsigned int vpx_variance32x16_avx2(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride,
unsigned int *sse) {
int sum;
- variance_avx2(src, src_stride, ref, ref_stride, 32, 16, sse, &sum,
- get32x16var_avx2, 32);
+ __m256i vsse, vsum;
+ variance32_avx2(src, src_stride, ref, ref_stride, 16, &vsse, &vsum);
+ variance_final_from_16bit_sum_avx2(vsse, vsum, sse, &sum);
return *sse - (uint32_t)(((int64_t)sum * sum) >> 9);
}
@@ -644,29 +647,87 @@ unsigned int vpx_variance32x32_avx2(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride,
unsigned int *sse) {
int sum;
- variance_avx2(src, src_stride, ref, ref_stride, 32, 32, sse, &sum,
- get32x16var_avx2, 32);
+ __m256i vsse, vsum;
+ __m128i vsum_128;
+ variance32_avx2(src, src_stride, ref, ref_stride, 32, &vsse, &vsum);
+ vsum_128 = _mm_add_epi16(_mm256_castsi256_si128(vsum),
+ _mm256_extractf128_si256(vsum, 1));
+ vsum_128 = _mm_add_epi32(_mm_cvtepi16_epi32(vsum_128),
+ _mm_cvtepi16_epi32(_mm_srli_si128(vsum_128, 8)));
+ variance_final_from_32bit_sum_avx2(vsse, vsum_128, sse, &sum);
return *sse - (uint32_t)(((int64_t)sum * sum) >> 10);
}
-unsigned int vpx_variance64x64_avx2(const uint8_t *src, int src_stride,
+unsigned int vpx_variance32x64_avx2(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride,
unsigned int *sse) {
int sum;
- variance_avx2(src, src_stride, ref, ref_stride, 64, 64, sse, &sum,
- get32x16var_avx2, 32);
- return *sse - (uint32_t)(((int64_t)sum * sum) >> 12);
+ __m256i vsse, vsum;
+ __m128i vsum_128;
+ variance32_avx2(src, src_stride, ref, ref_stride, 64, &vsse, &vsum);
+ vsum = sum_to_32bit_avx2(vsum);
+ vsum_128 = _mm_add_epi32(_mm256_castsi256_si128(vsum),
+ _mm256_extractf128_si256(vsum, 1));
+ variance_final_from_32bit_sum_avx2(vsse, vsum_128, sse, &sum);
+ return *sse - (uint32_t)(((int64_t)sum * sum) >> 11);
}
unsigned int vpx_variance64x32_avx2(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride,
unsigned int *sse) {
+ __m256i vsse = _mm256_setzero_si256();
+ __m256i vsum = _mm256_setzero_si256();
+ __m128i vsum_128;
int sum;
- variance_avx2(src, src_stride, ref, ref_stride, 64, 32, sse, &sum,
- get32x16var_avx2, 32);
+ variance64_avx2(src, src_stride, ref, ref_stride, 32, &vsse, &vsum);
+ vsum = sum_to_32bit_avx2(vsum);
+ vsum_128 = _mm_add_epi32(_mm256_castsi256_si128(vsum),
+ _mm256_extractf128_si256(vsum, 1));
+ variance_final_from_32bit_sum_avx2(vsse, vsum_128, sse, &sum);
return *sse - (uint32_t)(((int64_t)sum * sum) >> 11);
}
+unsigned int vpx_variance64x64_avx2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse) {
+ __m256i vsse = _mm256_setzero_si256();
+ __m256i vsum = _mm256_setzero_si256();
+ __m128i vsum_128;
+ int sum;
+ int i = 0;
+
+ for (i = 0; i < 2; i++) {
+ __m256i vsum16;
+ variance64_avx2(src + 32 * i * src_stride, src_stride,
+ ref + 32 * i * ref_stride, ref_stride, 32, &vsse, &vsum16);
+ vsum = _mm256_add_epi32(vsum, sum_to_32bit_avx2(vsum16));
+ }
+ vsum_128 = _mm_add_epi32(_mm256_castsi256_si128(vsum),
+ _mm256_extractf128_si256(vsum, 1));
+ variance_final_from_32bit_sum_avx2(vsse, vsum_128, sse, &sum);
+ return *sse - (unsigned int)(((int64_t)sum * sum) >> 12);
+}
+
+unsigned int vpx_mse16x8_avx2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ __m256i vsse, vsum;
+ variance16_avx2(src, src_stride, ref, ref_stride, 8, &vsse, &vsum);
+ variance_final_from_16bit_sum_avx2(vsse, vsum, sse, &sum);
+ return *sse;
+}
+
+unsigned int vpx_mse16x16_avx2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ __m256i vsse, vsum;
+ variance16_avx2(src, src_stride, ref, ref_stride, 16, &vsse, &vsum);
+ variance_final_from_16bit_sum_avx2(vsse, vsum, sse, &sum);
+ return *sse;
+}
+
unsigned int vpx_sub_pixel_variance64x64_avx2(const uint8_t *src,
int src_stride, int x_offset,
int y_offset, const uint8_t *dst,
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_sse2.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_sse2.c
index 8d8bf183b28..a2a13a68b67 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_sse2.c
@@ -8,16 +8,18 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include <assert.h>
#include <emmintrin.h> // SSE2
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
-
#include "vpx_ports/mem.h"
-typedef void (*getNxMvar_fn_t)(const unsigned char *src, int src_stride,
- const unsigned char *ref, int ref_stride,
- unsigned int *sse, int *sum);
+static INLINE unsigned int add32x4_sse2(__m128i val) {
+ val = _mm_add_epi32(val, _mm_srli_si128(val, 8));
+ val = _mm_add_epi32(val, _mm_srli_si128(val, 4));
+ return _mm_cvtsi128_si32(val);
+}
unsigned int vpx_get_mb_ss_sse2(const int16_t *src) {
__m128i vsum = _mm_setzero_si128();
@@ -29,254 +31,360 @@ unsigned int vpx_get_mb_ss_sse2(const int16_t *src) {
src += 8;
}
- vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 8));
- vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 4));
- return _mm_cvtsi128_si32(vsum);
+ return add32x4_sse2(vsum);
}
-#define READ64(p, stride, i) \
- _mm_unpacklo_epi8( \
- _mm_cvtsi32_si128(*(const uint32_t *)(p + i * stride)), \
- _mm_cvtsi32_si128(*(const uint32_t *)(p + (i + 1) * stride)))
+static INLINE __m128i load4x2_sse2(const uint8_t *const p, const int stride) {
+ const __m128i p0 = _mm_cvtsi32_si128(*(const uint32_t *)(p + 0 * stride));
+ const __m128i p1 = _mm_cvtsi32_si128(*(const uint32_t *)(p + 1 * stride));
+ const __m128i p01 = _mm_unpacklo_epi32(p0, p1);
+ return _mm_unpacklo_epi8(p01, _mm_setzero_si128());
+}
+
+static INLINE void variance_kernel_sse2(const __m128i src, const __m128i ref,
+ __m128i *const sse,
+ __m128i *const sum) {
+ const __m128i diff = _mm_sub_epi16(src, ref);
+ *sse = _mm_add_epi32(*sse, _mm_madd_epi16(diff, diff));
+ *sum = _mm_add_epi16(*sum, diff);
+}
+
+// Can handle 128 pixels' diff sum (such as 8x16 or 16x8)
+// Slightly faster than variance_final_256_pel_sse2()
+static INLINE void variance_final_128_pel_sse2(__m128i vsse, __m128i vsum,
+ unsigned int *const sse,
+ int *const sum) {
+ *sse = add32x4_sse2(vsse);
-static void get4x4var_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse, int *sum) {
- const __m128i zero = _mm_setzero_si128();
- const __m128i src0 = _mm_unpacklo_epi8(READ64(src, src_stride, 0), zero);
- const __m128i src1 = _mm_unpacklo_epi8(READ64(src, src_stride, 2), zero);
- const __m128i ref0 = _mm_unpacklo_epi8(READ64(ref, ref_stride, 0), zero);
- const __m128i ref1 = _mm_unpacklo_epi8(READ64(ref, ref_stride, 2), zero);
- const __m128i diff0 = _mm_sub_epi16(src0, ref0);
- const __m128i diff1 = _mm_sub_epi16(src1, ref1);
-
- // sum
- __m128i vsum = _mm_add_epi16(diff0, diff1);
vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8));
vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4));
vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 2));
*sum = (int16_t)_mm_extract_epi16(vsum, 0);
+}
+
+// Can handle 256 pixels' diff sum (such as 16x16)
+static INLINE void variance_final_256_pel_sse2(__m128i vsse, __m128i vsum,
+ unsigned int *const sse,
+ int *const sum) {
+ *sse = add32x4_sse2(vsse);
- // sse
- vsum =
- _mm_add_epi32(_mm_madd_epi16(diff0, diff0), _mm_madd_epi16(diff1, diff1));
- vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 8));
- vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 4));
- *sse = _mm_cvtsi128_si32(vsum);
+ vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8));
+ vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4));
+ *sum = (int16_t)_mm_extract_epi16(vsum, 0);
+ *sum += (int16_t)_mm_extract_epi16(vsum, 1);
}
-void vpx_get8x8var_sse2(const uint8_t *src, int src_stride, const uint8_t *ref,
- int ref_stride, unsigned int *sse, int *sum) {
- const __m128i zero = _mm_setzero_si128();
- __m128i vsum = _mm_setzero_si128();
- __m128i vsse = _mm_setzero_si128();
+// Can handle 512 pixels' diff sum (such as 16x32 or 32x16)
+static INLINE void variance_final_512_pel_sse2(__m128i vsse, __m128i vsum,
+ unsigned int *const sse,
+ int *const sum) {
+ *sse = add32x4_sse2(vsse);
+
+ vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8));
+ vsum = _mm_unpacklo_epi16(vsum, vsum);
+ vsum = _mm_srai_epi32(vsum, 16);
+ *sum = add32x4_sse2(vsum);
+}
+
+static INLINE __m128i sum_to_32bit_sse2(const __m128i sum) {
+ const __m128i sum_lo = _mm_srai_epi32(_mm_unpacklo_epi16(sum, sum), 16);
+ const __m128i sum_hi = _mm_srai_epi32(_mm_unpackhi_epi16(sum, sum), 16);
+ return _mm_add_epi32(sum_lo, sum_hi);
+}
+
+// Can handle 1024 pixels' diff sum (such as 32x32)
+static INLINE int sum_final_sse2(const __m128i sum) {
+ const __m128i t = sum_to_32bit_sse2(sum);
+ return add32x4_sse2(t);
+}
+
+static INLINE void variance4_sse2(const uint8_t *src, const int src_stride,
+ const uint8_t *ref, const int ref_stride,
+ const int h, __m128i *const sse,
+ __m128i *const sum) {
int i;
- for (i = 0; i < 8; i += 2) {
- const __m128i src0 = _mm_unpacklo_epi8(
- _mm_loadl_epi64((const __m128i *)(src + i * src_stride)), zero);
- const __m128i ref0 = _mm_unpacklo_epi8(
- _mm_loadl_epi64((const __m128i *)(ref + i * ref_stride)), zero);
- const __m128i diff0 = _mm_sub_epi16(src0, ref0);
-
- const __m128i src1 = _mm_unpacklo_epi8(
- _mm_loadl_epi64((const __m128i *)(src + (i + 1) * src_stride)), zero);
- const __m128i ref1 = _mm_unpacklo_epi8(
- _mm_loadl_epi64((const __m128i *)(ref + (i + 1) * ref_stride)), zero);
- const __m128i diff1 = _mm_sub_epi16(src1, ref1);
-
- vsum = _mm_add_epi16(vsum, diff0);
- vsum = _mm_add_epi16(vsum, diff1);
- vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff0, diff0));
- vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff1, diff1));
+ assert(h <= 256); // May overflow for larger height.
+ *sse = _mm_setzero_si128();
+ *sum = _mm_setzero_si128();
+
+ for (i = 0; i < h; i += 2) {
+ const __m128i s = load4x2_sse2(src, src_stride);
+ const __m128i r = load4x2_sse2(ref, ref_stride);
+
+ variance_kernel_sse2(s, r, sse, sum);
+ src += 2 * src_stride;
+ ref += 2 * ref_stride;
}
+}
- // sum
- vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8));
- vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4));
- vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 2));
- *sum = (int16_t)_mm_extract_epi16(vsum, 0);
+static INLINE void variance8_sse2(const uint8_t *src, const int src_stride,
+ const uint8_t *ref, const int ref_stride,
+ const int h, __m128i *const sse,
+ __m128i *const sum) {
+ const __m128i zero = _mm_setzero_si128();
+ int i;
- // sse
- vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 8));
- vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 4));
- *sse = _mm_cvtsi128_si32(vsse);
+ assert(h <= 128); // May overflow for larger height.
+ *sse = _mm_setzero_si128();
+ *sum = _mm_setzero_si128();
+
+ for (i = 0; i < h; i++) {
+ const __m128i s =
+ _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)src), zero);
+ const __m128i r =
+ _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)ref), zero);
+
+ variance_kernel_sse2(s, r, sse, sum);
+ src += src_stride;
+ ref += ref_stride;
+ }
}
-void vpx_get16x16var_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride, unsigned int *sse,
- int *sum) {
+static INLINE void variance16_kernel_sse2(const uint8_t *const src,
+ const uint8_t *const ref,
+ __m128i *const sse,
+ __m128i *const sum) {
const __m128i zero = _mm_setzero_si128();
- __m128i vsum = _mm_setzero_si128();
- __m128i vsse = _mm_setzero_si128();
+ const __m128i s = _mm_loadu_si128((const __m128i *)src);
+ const __m128i r = _mm_loadu_si128((const __m128i *)ref);
+ const __m128i src0 = _mm_unpacklo_epi8(s, zero);
+ const __m128i ref0 = _mm_unpacklo_epi8(r, zero);
+ const __m128i src1 = _mm_unpackhi_epi8(s, zero);
+ const __m128i ref1 = _mm_unpackhi_epi8(r, zero);
+
+ variance_kernel_sse2(src0, ref0, sse, sum);
+ variance_kernel_sse2(src1, ref1, sse, sum);
+}
+
+static INLINE void variance16_sse2(const uint8_t *src, const int src_stride,
+ const uint8_t *ref, const int ref_stride,
+ const int h, __m128i *const sse,
+ __m128i *const sum) {
int i;
- for (i = 0; i < 16; ++i) {
- const __m128i s = _mm_loadu_si128((const __m128i *)src);
- const __m128i r = _mm_loadu_si128((const __m128i *)ref);
+ assert(h <= 64); // May overflow for larger height.
+ *sse = _mm_setzero_si128();
+ *sum = _mm_setzero_si128();
- const __m128i src0 = _mm_unpacklo_epi8(s, zero);
- const __m128i ref0 = _mm_unpacklo_epi8(r, zero);
- const __m128i diff0 = _mm_sub_epi16(src0, ref0);
+ for (i = 0; i < h; ++i) {
+ variance16_kernel_sse2(src, ref, sse, sum);
+ src += src_stride;
+ ref += ref_stride;
+ }
+}
- const __m128i src1 = _mm_unpackhi_epi8(s, zero);
- const __m128i ref1 = _mm_unpackhi_epi8(r, zero);
- const __m128i diff1 = _mm_sub_epi16(src1, ref1);
+static INLINE void variance32_sse2(const uint8_t *src, const int src_stride,
+ const uint8_t *ref, const int ref_stride,
+ const int h, __m128i *const sse,
+ __m128i *const sum) {
+ int i;
- vsum = _mm_add_epi16(vsum, diff0);
- vsum = _mm_add_epi16(vsum, diff1);
- vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff0, diff0));
- vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff1, diff1));
+ assert(h <= 32); // May overflow for larger height.
+ // Don't initialize sse here since it's an accumulation.
+ *sum = _mm_setzero_si128();
+ for (i = 0; i < h; ++i) {
+ variance16_kernel_sse2(src + 0, ref + 0, sse, sum);
+ variance16_kernel_sse2(src + 16, ref + 16, sse, sum);
src += src_stride;
ref += ref_stride;
}
+}
- // sum
- vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8));
- vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4));
- *sum =
- (int16_t)_mm_extract_epi16(vsum, 0) + (int16_t)_mm_extract_epi16(vsum, 1);
-
- // sse
- vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 8));
- vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 4));
- *sse = _mm_cvtsi128_si32(vsse);
-}
-
-static void variance_sse2(const unsigned char *src, int src_stride,
- const unsigned char *ref, int ref_stride, int w,
- int h, unsigned int *sse, int *sum,
- getNxMvar_fn_t var_fn, int block_size) {
- int i, j;
-
- *sse = 0;
- *sum = 0;
-
- for (i = 0; i < h; i += block_size) {
- for (j = 0; j < w; j += block_size) {
- unsigned int sse0;
- int sum0;
- var_fn(src + src_stride * i + j, src_stride, ref + ref_stride * i + j,
- ref_stride, &sse0, &sum0);
- *sse += sse0;
- *sum += sum0;
- }
+static INLINE void variance64_sse2(const uint8_t *src, const int src_stride,
+ const uint8_t *ref, const int ref_stride,
+ const int h, __m128i *const sse,
+ __m128i *const sum) {
+ int i;
+
+ assert(h <= 16); // May overflow for larger height.
+ // Don't initialize sse here since it's an accumulation.
+ *sum = _mm_setzero_si128();
+
+ for (i = 0; i < h; ++i) {
+ variance16_kernel_sse2(src + 0, ref + 0, sse, sum);
+ variance16_kernel_sse2(src + 16, ref + 16, sse, sum);
+ variance16_kernel_sse2(src + 32, ref + 32, sse, sum);
+ variance16_kernel_sse2(src + 48, ref + 48, sse, sum);
+ src += src_stride;
+ ref += ref_stride;
}
}
-unsigned int vpx_variance4x4_sse2(const unsigned char *src, int src_stride,
- const unsigned char *ref, int ref_stride,
+void vpx_get8x8var_sse2(const uint8_t *src, int src_stride, const uint8_t *ref,
+ int ref_stride, unsigned int *sse, int *sum) {
+ __m128i vsse, vsum;
+ variance8_sse2(src, src_stride, ref, ref_stride, 8, &vsse, &vsum);
+ variance_final_128_pel_sse2(vsse, vsum, sse, sum);
+}
+
+void vpx_get16x16var_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride, unsigned int *sse,
+ int *sum) {
+ __m128i vsse, vsum;
+ variance16_sse2(src, src_stride, ref, ref_stride, 16, &vsse, &vsum);
+ variance_final_256_pel_sse2(vsse, vsum, sse, sum);
+}
+
+unsigned int vpx_variance4x4_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
unsigned int *sse) {
+ __m128i vsse, vsum;
int sum;
- get4x4var_sse2(src, src_stride, ref, ref_stride, sse, &sum);
+ variance4_sse2(src, src_stride, ref, ref_stride, 4, &vsse, &vsum);
+ variance_final_128_pel_sse2(vsse, vsum, sse, &sum);
return *sse - ((sum * sum) >> 4);
}
-unsigned int vpx_variance8x4_sse2(const uint8_t *src, int src_stride,
+unsigned int vpx_variance4x8_sse2(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride,
unsigned int *sse) {
+ __m128i vsse, vsum;
int sum;
- variance_sse2(src, src_stride, ref, ref_stride, 8, 4, sse, &sum,
- get4x4var_sse2, 4);
+ variance4_sse2(src, src_stride, ref, ref_stride, 8, &vsse, &vsum);
+ variance_final_128_pel_sse2(vsse, vsum, sse, &sum);
return *sse - ((sum * sum) >> 5);
}
-unsigned int vpx_variance4x8_sse2(const uint8_t *src, int src_stride,
+unsigned int vpx_variance8x4_sse2(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride,
unsigned int *sse) {
+ __m128i vsse, vsum;
int sum;
- variance_sse2(src, src_stride, ref, ref_stride, 4, 8, sse, &sum,
- get4x4var_sse2, 4);
+ variance8_sse2(src, src_stride, ref, ref_stride, 4, &vsse, &vsum);
+ variance_final_128_pel_sse2(vsse, vsum, sse, &sum);
return *sse - ((sum * sum) >> 5);
}
-unsigned int vpx_variance8x8_sse2(const unsigned char *src, int src_stride,
- const unsigned char *ref, int ref_stride,
+unsigned int vpx_variance8x8_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
unsigned int *sse) {
+ __m128i vsse, vsum;
int sum;
- vpx_get8x8var_sse2(src, src_stride, ref, ref_stride, sse, &sum);
+ variance8_sse2(src, src_stride, ref, ref_stride, 8, &vsse, &vsum);
+ variance_final_128_pel_sse2(vsse, vsum, sse, &sum);
return *sse - ((sum * sum) >> 6);
}
-unsigned int vpx_variance16x8_sse2(const unsigned char *src, int src_stride,
- const unsigned char *ref, int ref_stride,
+unsigned int vpx_variance8x16_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
unsigned int *sse) {
+ __m128i vsse, vsum;
int sum;
- variance_sse2(src, src_stride, ref, ref_stride, 16, 8, sse, &sum,
- vpx_get8x8var_sse2, 8);
+ variance8_sse2(src, src_stride, ref, ref_stride, 16, &vsse, &vsum);
+ variance_final_128_pel_sse2(vsse, vsum, sse, &sum);
return *sse - ((sum * sum) >> 7);
}
-unsigned int vpx_variance8x16_sse2(const unsigned char *src, int src_stride,
- const unsigned char *ref, int ref_stride,
+unsigned int vpx_variance16x8_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
unsigned int *sse) {
+ __m128i vsse, vsum;
int sum;
- variance_sse2(src, src_stride, ref, ref_stride, 8, 16, sse, &sum,
- vpx_get8x8var_sse2, 8);
+ variance16_sse2(src, src_stride, ref, ref_stride, 8, &vsse, &vsum);
+ variance_final_128_pel_sse2(vsse, vsum, sse, &sum);
return *sse - ((sum * sum) >> 7);
}
-unsigned int vpx_variance16x16_sse2(const unsigned char *src, int src_stride,
- const unsigned char *ref, int ref_stride,
+unsigned int vpx_variance16x16_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
unsigned int *sse) {
+ __m128i vsse, vsum;
int sum;
- vpx_get16x16var_sse2(src, src_stride, ref, ref_stride, sse, &sum);
+ variance16_sse2(src, src_stride, ref, ref_stride, 16, &vsse, &vsum);
+ variance_final_256_pel_sse2(vsse, vsum, sse, &sum);
return *sse - (uint32_t)(((int64_t)sum * sum) >> 8);
}
-unsigned int vpx_variance32x32_sse2(const uint8_t *src, int src_stride,
+unsigned int vpx_variance16x32_sse2(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride,
unsigned int *sse) {
+ __m128i vsse, vsum;
int sum;
- variance_sse2(src, src_stride, ref, ref_stride, 32, 32, sse, &sum,
- vpx_get16x16var_sse2, 16);
- return *sse - (unsigned int)(((int64_t)sum * sum) >> 10);
+ variance16_sse2(src, src_stride, ref, ref_stride, 32, &vsse, &vsum);
+ variance_final_512_pel_sse2(vsse, vsum, sse, &sum);
+ return *sse - (unsigned int)(((int64_t)sum * sum) >> 9);
}
unsigned int vpx_variance32x16_sse2(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride,
unsigned int *sse) {
+ __m128i vsse = _mm_setzero_si128();
+ __m128i vsum;
int sum;
- variance_sse2(src, src_stride, ref, ref_stride, 32, 16, sse, &sum,
- vpx_get16x16var_sse2, 16);
+ variance32_sse2(src, src_stride, ref, ref_stride, 16, &vsse, &vsum);
+ variance_final_512_pel_sse2(vsse, vsum, sse, &sum);
return *sse - (unsigned int)(((int64_t)sum * sum) >> 9);
}
-unsigned int vpx_variance16x32_sse2(const uint8_t *src, int src_stride,
+unsigned int vpx_variance32x32_sse2(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride,
unsigned int *sse) {
+ __m128i vsse = _mm_setzero_si128();
+ __m128i vsum;
int sum;
- variance_sse2(src, src_stride, ref, ref_stride, 16, 32, sse, &sum,
- vpx_get16x16var_sse2, 16);
- return *sse - (unsigned int)(((int64_t)sum * sum) >> 9);
+ variance32_sse2(src, src_stride, ref, ref_stride, 32, &vsse, &vsum);
+ *sse = add32x4_sse2(vsse);
+ sum = sum_final_sse2(vsum);
+ return *sse - (unsigned int)(((int64_t)sum * sum) >> 10);
}
-unsigned int vpx_variance64x64_sse2(const uint8_t *src, int src_stride,
+unsigned int vpx_variance32x64_sse2(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride,
unsigned int *sse) {
+ __m128i vsse = _mm_setzero_si128();
+ __m128i vsum = _mm_setzero_si128();
int sum;
- variance_sse2(src, src_stride, ref, ref_stride, 64, 64, sse, &sum,
- vpx_get16x16var_sse2, 16);
- return *sse - (unsigned int)(((int64_t)sum * sum) >> 12);
+ int i = 0;
+
+ for (i = 0; i < 2; i++) {
+ __m128i vsum16;
+ variance32_sse2(src + 32 * i * src_stride, src_stride,
+ ref + 32 * i * ref_stride, ref_stride, 32, &vsse, &vsum16);
+ vsum = _mm_add_epi32(vsum, sum_to_32bit_sse2(vsum16));
+ }
+ *sse = add32x4_sse2(vsse);
+ sum = add32x4_sse2(vsum);
+ return *sse - (unsigned int)(((int64_t)sum * sum) >> 11);
}
unsigned int vpx_variance64x32_sse2(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride,
unsigned int *sse) {
+ __m128i vsse = _mm_setzero_si128();
+ __m128i vsum = _mm_setzero_si128();
int sum;
- variance_sse2(src, src_stride, ref, ref_stride, 64, 32, sse, &sum,
- vpx_get16x16var_sse2, 16);
+ int i = 0;
+
+ for (i = 0; i < 2; i++) {
+ __m128i vsum16;
+ variance64_sse2(src + 16 * i * src_stride, src_stride,
+ ref + 16 * i * ref_stride, ref_stride, 16, &vsse, &vsum16);
+ vsum = _mm_add_epi32(vsum, sum_to_32bit_sse2(vsum16));
+ }
+ *sse = add32x4_sse2(vsse);
+ sum = add32x4_sse2(vsum);
return *sse - (unsigned int)(((int64_t)sum * sum) >> 11);
}
-unsigned int vpx_variance32x64_sse2(const uint8_t *src, int src_stride,
+unsigned int vpx_variance64x64_sse2(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride,
unsigned int *sse) {
+ __m128i vsse = _mm_setzero_si128();
+ __m128i vsum = _mm_setzero_si128();
int sum;
- variance_sse2(src, src_stride, ref, ref_stride, 32, 64, sse, &sum,
- vpx_get16x16var_sse2, 16);
- return *sse - (unsigned int)(((int64_t)sum * sum) >> 11);
+ int i = 0;
+
+ for (i = 0; i < 4; i++) {
+ __m128i vsum16;
+ variance64_sse2(src + 16 * i * src_stride, src_stride,
+ ref + 16 * i * ref_stride, ref_stride, 16, &vsse, &vsum16);
+ vsum = _mm_add_epi32(vsum, sum_to_32bit_sse2(vsum16));
+ }
+ *sse = add32x4_sse2(vsse);
+ sum = add32x4_sse2(vsum);
+ return *sse - (unsigned int)(((int64_t)sum * sum) >> 12);
}
unsigned int vpx_mse8x8_sse2(const uint8_t *src, int src_stride,
diff --git a/chromium/third_party/libvpx/source/libvpx/y4menc.c b/chromium/third_party/libvpx/source/libvpx/y4menc.c
index 05018dbc433..02b729e5bbb 100644
--- a/chromium/third_party/libvpx/source/libvpx/y4menc.c
+++ b/chromium/third_party/libvpx/source/libvpx/y4menc.c
@@ -17,11 +17,9 @@ int y4m_write_file_header(char *buf, size_t len, int width, int height,
const char *color;
switch (bit_depth) {
case 8:
- color = fmt == VPX_IMG_FMT_444A
- ? "C444alpha\n"
- : fmt == VPX_IMG_FMT_I444
- ? "C444\n"
- : fmt == VPX_IMG_FMT_I422 ? "C422\n" : "C420jpeg\n";
+ color = fmt == VPX_IMG_FMT_I444
+ ? "C444\n"
+ : fmt == VPX_IMG_FMT_I422 ? "C422\n" : "C420jpeg\n";
break;
case 9:
color = fmt == VPX_IMG_FMT_I44416
diff --git a/chromium/third_party/libvpx/source/libvpx/y4minput.c b/chromium/third_party/libvpx/source/libvpx/y4minput.c
index 56d5598276f..007bd9971b4 100644
--- a/chromium/third_party/libvpx/source/libvpx/y4minput.c
+++ b/chromium/third_party/libvpx/source/libvpx/y4minput.c
@@ -1031,30 +1031,6 @@ int y4m_input_open(y4m_input *_y4m, FILE *_fin, char *_skip, int _nskip,
fprintf(stderr, "Unsupported conversion from 444p12 to 420jpeg\n");
return -1;
}
- } else if (strcmp(_y4m->chroma_type, "444alpha") == 0) {
- _y4m->src_c_dec_h = 1;
- _y4m->src_c_dec_v = 1;
- if (only_420) {
- _y4m->dst_c_dec_h = 2;
- _y4m->dst_c_dec_v = 2;
- _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
- /*Chroma filter required: read into the aux buf first.
- We need to make two filter passes, so we need some extra space in the
- aux buffer.
- The extra plane also gets read into the aux buf.
- It will be discarded.*/
- _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 3 * _y4m->pic_w * _y4m->pic_h;
- _y4m->convert = y4m_convert_444_420jpeg;
- } else {
- _y4m->vpx_fmt = VPX_IMG_FMT_444A;
- _y4m->bps = 32;
- _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
- _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
- _y4m->dst_buf_read_sz = 4 * _y4m->pic_w * _y4m->pic_h;
- /*Natively supported: no conversion required.*/
- _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
- _y4m->convert = y4m_convert_null;
- }
} else if (strcmp(_y4m->chroma_type, "mono") == 0) {
_y4m->src_c_dec_h = _y4m->src_c_dec_v = 0;
_y4m->dst_c_dec_h = _y4m->dst_c_dec_v = 2;