diff options
author | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2018-08-24 12:15:48 +0200 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2018-08-28 13:30:04 +0000 |
commit | b014812705fc80bff0a5c120dfcef88f349816dc (patch) | |
tree | 25a2e2d9fa285f1add86aa333389a839f81a39ae /chromium/third_party/libvpx | |
parent | 9f4560b1027ae06fdb497023cdcaf91b8511fa74 (diff) | |
download | qtwebengine-chromium-b014812705fc80bff0a5c120dfcef88f349816dc.tar.gz |
BASELINE: Update Chromium to 68.0.3440.125
Change-Id: I23f19369e01f688e496f5bf179abb521ad73874f
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
Diffstat (limited to 'chromium/third_party/libvpx')
109 files changed, 3042 insertions, 1549 deletions
diff --git a/chromium/third_party/libvpx/README.chromium b/chromium/third_party/libvpx/README.chromium index ef97e284423..3fb61e4b51a 100644 --- a/chromium/third_party/libvpx/README.chromium +++ b/chromium/third_party/libvpx/README.chromium @@ -5,9 +5,9 @@ License: BSD License File: source/libvpx/LICENSE Security Critical: yes -Date: Monday April 09 2018 +Date: Monday May 21 2018 Branch: master -Commit: be5df6080154e58db88fa3640e127efd18c04bde +Commit: e27a331778c4c99ec37262ea786a3b4cc2a491ac Description: Contains the sources used to compile libvpx binaries used by Google Chrome and diff --git a/chromium/third_party/libvpx/generate_gni.sh b/chromium/third_party/libvpx/generate_gni.sh index 5704e76062f..2c94f6f685a 100755 --- a/chromium/third_party/libvpx/generate_gni.sh +++ b/chromium/third_party/libvpx/generate_gni.sh @@ -226,6 +226,7 @@ function print_config_basic { # $3 - Optional - any additional arguments to pass through. function gen_rtcd_header { echo "Generate $LIBVPX_CONFIG_DIR/$1/*_rtcd.h files." + format="clang-format -i -style=Chromium" rm -rf $BASE_DIR/$TEMP_DIR/libvpx.config if [[ "$2" == "mipsel" || "$2" == "mips64el" || "$2" == nacl ]]; then @@ -244,7 +245,7 @@ function gen_rtcd_header { $BASE_DIR/$LIBVPX_SRC_DIR/vp8/common/rtcd_defs.pl \ > $BASE_DIR/$LIBVPX_CONFIG_DIR/$1/vp8_rtcd.h - clang-format -i $BASE_DIR/$LIBVPX_CONFIG_DIR/$1/vp8_rtcd.h + ${format} $BASE_DIR/$LIBVPX_CONFIG_DIR/$1/vp8_rtcd.h $BASE_DIR/$LIBVPX_SRC_DIR/build/make/rtcd.pl \ --arch=$2 \ @@ -253,7 +254,7 @@ function gen_rtcd_header { $BASE_DIR/$LIBVPX_SRC_DIR/vp9/common/vp9_rtcd_defs.pl \ > $BASE_DIR/$LIBVPX_CONFIG_DIR/$1/vp9_rtcd.h - clang-format -i $BASE_DIR/$LIBVPX_CONFIG_DIR/$1/vp9_rtcd.h + ${format} $BASE_DIR/$LIBVPX_CONFIG_DIR/$1/vp9_rtcd.h $BASE_DIR/$LIBVPX_SRC_DIR/build/make/rtcd.pl \ --arch=$2 \ @@ -262,7 +263,7 @@ function gen_rtcd_header { $BASE_DIR/$LIBVPX_SRC_DIR/vpx_scale/vpx_scale_rtcd.pl \ > $BASE_DIR/$LIBVPX_CONFIG_DIR/$1/vpx_scale_rtcd.h - clang-format -i $BASE_DIR/$LIBVPX_CONFIG_DIR/$1/vpx_scale_rtcd.h + ${format} $BASE_DIR/$LIBVPX_CONFIG_DIR/$1/vpx_scale_rtcd.h $BASE_DIR/$LIBVPX_SRC_DIR/build/make/rtcd.pl \ --arch=$2 \ @@ -271,7 +272,7 @@ function gen_rtcd_header { $BASE_DIR/$LIBVPX_SRC_DIR/vpx_dsp/vpx_dsp_rtcd_defs.pl \ > $BASE_DIR/$LIBVPX_CONFIG_DIR/$1/vpx_dsp_rtcd.h - clang-format -i $BASE_DIR/$LIBVPX_CONFIG_DIR/$1/vpx_dsp_rtcd.h + ${format} $BASE_DIR/$LIBVPX_CONFIG_DIR/$1/vpx_dsp_rtcd.h rm -rf $BASE_DIR/$TEMP_DIR/libvpx.config } diff --git a/chromium/third_party/libvpx/libvpx_srcs.gni b/chromium/third_party/libvpx/libvpx_srcs.gni index a59ffeb95f6..24e6a8959ae 100644 --- a/chromium/third_party/libvpx/libvpx_srcs.gni +++ b/chromium/third_party/libvpx/libvpx_srcs.gni @@ -1632,6 +1632,7 @@ libvpx_srcs_arm_neon = [ "//third_party/libvpx/source/libvpx/vpx_dsp/arm/subpel_variance_neon.c", "//third_party/libvpx/source/libvpx/vpx_dsp/arm/subtract_neon.c", "//third_party/libvpx/source/libvpx/vpx_dsp/arm/sum_neon.h", + "//third_party/libvpx/source/libvpx/vpx_dsp/arm/sum_squares_neon.c", "//third_party/libvpx/source/libvpx/vpx_dsp/arm/transpose_neon.h", "//third_party/libvpx/source/libvpx/vpx_dsp/arm/variance_neon.c", "//third_party/libvpx/source/libvpx/vpx_dsp/arm/vpx_convolve8_neon.h", @@ -2128,6 +2129,7 @@ libvpx_srcs_arm_neon_cpu_detect_neon = [ "//third_party/libvpx/source/libvpx/vpx_dsp/arm/sad_neon.c", "//third_party/libvpx/source/libvpx/vpx_dsp/arm/subpel_variance_neon.c", "//third_party/libvpx/source/libvpx/vpx_dsp/arm/subtract_neon.c", + "//third_party/libvpx/source/libvpx/vpx_dsp/arm/sum_squares_neon.c", "//third_party/libvpx/source/libvpx/vpx_dsp/arm/variance_neon.c", "//third_party/libvpx/source/libvpx/vpx_dsp/arm/vpx_convolve_neon.c", "//third_party/libvpx/source/libvpx/vpx_dsp/arm/vpx_scaled_convolve8_neon.c", @@ -2467,6 +2469,7 @@ libvpx_srcs_arm64 = [ "//third_party/libvpx/source/libvpx/vpx_dsp/arm/subpel_variance_neon.c", "//third_party/libvpx/source/libvpx/vpx_dsp/arm/subtract_neon.c", "//third_party/libvpx/source/libvpx/vpx_dsp/arm/sum_neon.h", + "//third_party/libvpx/source/libvpx/vpx_dsp/arm/sum_squares_neon.c", "//third_party/libvpx/source/libvpx/vpx_dsp/arm/transpose_neon.h", "//third_party/libvpx/source/libvpx/vpx_dsp/arm/variance_neon.c", "//third_party/libvpx/source/libvpx/vpx_dsp/arm/vpx_convolve8_neon.c", diff --git a/chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_config.asm b/chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_config.asm index d459dae055b..aeaea997f54 100644 --- a/chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_config.asm +++ b/chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_config.asm @@ -78,6 +78,7 @@ .set CONFIG_MULTI_RES_ENCODING , 1 .set CONFIG_TEMPORAL_DENOISING , 1 .set CONFIG_VP9_TEMPORAL_DENOISING , 1 +.set CONFIG_CONSISTENT_RECODE , 0 .set CONFIG_COEFFICIENT_RANGE_CHECKING , 0 .set CONFIG_VP9_HIGHBITDEPTH , 0 .set CONFIG_BETTER_HW_COMPATIBILITY , 0 diff --git a/chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_config.h b/chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_config.h index b80461bdae0..365206fe64a 100644 --- a/chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_config.h @@ -84,6 +84,7 @@ #define CONFIG_MULTI_RES_ENCODING 1 #define CONFIG_TEMPORAL_DENOISING 1 #define CONFIG_VP9_TEMPORAL_DENOISING 1 +#define CONFIG_CONSISTENT_RECODE 0 #define CONFIG_COEFFICIENT_RANGE_CHECKING 0 #define CONFIG_VP9_HIGHBITDEPTH 0 #define CONFIG_BETTER_HW_COMPATIBILITY 0 diff --git a/chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_dsp_rtcd.h index cc0b382fb76..0056935cbd0 100644 --- a/chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_dsp_rtcd.h +++ b/chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_dsp_rtcd.h @@ -2210,7 +2210,8 @@ void vpx_subtract_block_neon(int rows, #define vpx_subtract_block vpx_subtract_block_neon uint64_t vpx_sum_squares_2d_i16_c(const int16_t* src, int stride, int size); -#define vpx_sum_squares_2d_i16 vpx_sum_squares_2d_i16_c +uint64_t vpx_sum_squares_2d_i16_neon(const int16_t* src, int stride, int size); +#define vpx_sum_squares_2d_i16 vpx_sum_squares_2d_i16_neon void vpx_tm_predictor_16x16_c(uint8_t* dst, ptrdiff_t y_stride, diff --git a/chromium/third_party/libvpx/source/config/ios/arm64/vpx_config.asm b/chromium/third_party/libvpx/source/config/ios/arm64/vpx_config.asm index 63979ef8f30..296266dedc3 100644 --- a/chromium/third_party/libvpx/source/config/ios/arm64/vpx_config.asm +++ b/chromium/third_party/libvpx/source/config/ios/arm64/vpx_config.asm @@ -78,6 +78,7 @@ .set CONFIG_MULTI_RES_ENCODING , 1 .set CONFIG_TEMPORAL_DENOISING , 1 .set CONFIG_VP9_TEMPORAL_DENOISING , 1 +.set CONFIG_CONSISTENT_RECODE , 0 .set CONFIG_COEFFICIENT_RANGE_CHECKING , 0 .set CONFIG_VP9_HIGHBITDEPTH , 0 .set CONFIG_BETTER_HW_COMPATIBILITY , 0 diff --git a/chromium/third_party/libvpx/source/config/ios/arm64/vpx_config.h b/chromium/third_party/libvpx/source/config/ios/arm64/vpx_config.h index 1ab268cab88..13e7637569b 100644 --- a/chromium/third_party/libvpx/source/config/ios/arm64/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/ios/arm64/vpx_config.h @@ -84,6 +84,7 @@ #define CONFIG_MULTI_RES_ENCODING 1 #define CONFIG_TEMPORAL_DENOISING 1 #define CONFIG_VP9_TEMPORAL_DENOISING 1 +#define CONFIG_CONSISTENT_RECODE 0 #define CONFIG_COEFFICIENT_RANGE_CHECKING 0 #define CONFIG_VP9_HIGHBITDEPTH 0 #define CONFIG_BETTER_HW_COMPATIBILITY 0 diff --git a/chromium/third_party/libvpx/source/config/ios/arm64/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/ios/arm64/vpx_dsp_rtcd.h index cc0b382fb76..0056935cbd0 100644 --- a/chromium/third_party/libvpx/source/config/ios/arm64/vpx_dsp_rtcd.h +++ b/chromium/third_party/libvpx/source/config/ios/arm64/vpx_dsp_rtcd.h @@ -2210,7 +2210,8 @@ void vpx_subtract_block_neon(int rows, #define vpx_subtract_block vpx_subtract_block_neon uint64_t vpx_sum_squares_2d_i16_c(const int16_t* src, int stride, int size); -#define vpx_sum_squares_2d_i16 vpx_sum_squares_2d_i16_c +uint64_t vpx_sum_squares_2d_i16_neon(const int16_t* src, int stride, int size); +#define vpx_sum_squares_2d_i16 vpx_sum_squares_2d_i16_neon void vpx_tm_predictor_16x16_c(uint8_t* dst, ptrdiff_t y_stride, diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.asm index ccf2d701a4e..e6fa07b327e 100644 --- a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.asm +++ b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.asm @@ -75,6 +75,7 @@ .equ CONFIG_MULTI_RES_ENCODING , 1 .equ CONFIG_TEMPORAL_DENOISING , 1 .equ CONFIG_VP9_TEMPORAL_DENOISING , 1 +.equ CONFIG_CONSISTENT_RECODE , 0 .equ CONFIG_COEFFICIENT_RANGE_CHECKING , 0 .equ CONFIG_VP9_HIGHBITDEPTH , 0 .equ CONFIG_BETTER_HW_COMPATIBILITY , 0 diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.h index 9cb0939b234..5b8efaedac0 100644 --- a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.h @@ -84,6 +84,7 @@ #define CONFIG_MULTI_RES_ENCODING 1 #define CONFIG_TEMPORAL_DENOISING 1 #define CONFIG_VP9_TEMPORAL_DENOISING 1 +#define CONFIG_CONSISTENT_RECODE 0 #define CONFIG_COEFFICIENT_RANGE_CHECKING 0 #define CONFIG_VP9_HIGHBITDEPTH 0 #define CONFIG_BETTER_HW_COMPATIBILITY 0 diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_dsp_rtcd.h index c518d2b8fbe..3308094509a 100644 --- a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_dsp_rtcd.h +++ b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_dsp_rtcd.h @@ -2891,7 +2891,10 @@ RTCD_EXTERN void (*vpx_subtract_block)(int rows, ptrdiff_t pred_stride); uint64_t vpx_sum_squares_2d_i16_c(const int16_t* src, int stride, int size); -#define vpx_sum_squares_2d_i16 vpx_sum_squares_2d_i16_c +uint64_t vpx_sum_squares_2d_i16_neon(const int16_t* src, int stride, int size); +RTCD_EXTERN uint64_t (*vpx_sum_squares_2d_i16)(const int16_t* src, + int stride, + int size); void vpx_tm_predictor_16x16_c(uint8_t* dst, ptrdiff_t y_stride, @@ -3694,6 +3697,9 @@ static void setup_rtcd_internal(void) { vpx_subtract_block = vpx_subtract_block_c; if (flags & HAS_NEON) vpx_subtract_block = vpx_subtract_block_neon; + vpx_sum_squares_2d_i16 = vpx_sum_squares_2d_i16_c; + if (flags & HAS_NEON) + vpx_sum_squares_2d_i16 = vpx_sum_squares_2d_i16_neon; vpx_tm_predictor_16x16 = vpx_tm_predictor_16x16_c; if (flags & HAS_NEON) vpx_tm_predictor_16x16 = vpx_tm_predictor_16x16_neon; diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.asm index 87cfb6acbb9..1137b0ff007 100644 --- a/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.asm +++ b/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.asm @@ -75,6 +75,7 @@ .equ CONFIG_MULTI_RES_ENCODING , 1 .equ CONFIG_TEMPORAL_DENOISING , 1 .equ CONFIG_VP9_TEMPORAL_DENOISING , 1 +.equ CONFIG_CONSISTENT_RECODE , 0 .equ CONFIG_COEFFICIENT_RANGE_CHECKING , 0 .equ CONFIG_VP9_HIGHBITDEPTH , 0 .equ CONFIG_BETTER_HW_COMPATIBILITY , 0 diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.h index b80461bdae0..365206fe64a 100644 --- a/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.h @@ -84,6 +84,7 @@ #define CONFIG_MULTI_RES_ENCODING 1 #define CONFIG_TEMPORAL_DENOISING 1 #define CONFIG_VP9_TEMPORAL_DENOISING 1 +#define CONFIG_CONSISTENT_RECODE 0 #define CONFIG_COEFFICIENT_RANGE_CHECKING 0 #define CONFIG_VP9_HIGHBITDEPTH 0 #define CONFIG_BETTER_HW_COMPATIBILITY 0 diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_dsp_rtcd.h index cc0b382fb76..0056935cbd0 100644 --- a/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_dsp_rtcd.h +++ b/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_dsp_rtcd.h @@ -2210,7 +2210,8 @@ void vpx_subtract_block_neon(int rows, #define vpx_subtract_block vpx_subtract_block_neon uint64_t vpx_sum_squares_2d_i16_c(const int16_t* src, int stride, int size); -#define vpx_sum_squares_2d_i16 vpx_sum_squares_2d_i16_c +uint64_t vpx_sum_squares_2d_i16_neon(const int16_t* src, int stride, int size); +#define vpx_sum_squares_2d_i16 vpx_sum_squares_2d_i16_neon void vpx_tm_predictor_16x16_c(uint8_t* dst, ptrdiff_t y_stride, diff --git a/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.asm index 6cc4a695c68..51d4f390c17 100644 --- a/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.asm +++ b/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.asm @@ -75,6 +75,7 @@ .equ CONFIG_MULTI_RES_ENCODING , 1 .equ CONFIG_TEMPORAL_DENOISING , 1 .equ CONFIG_VP9_TEMPORAL_DENOISING , 1 +.equ CONFIG_CONSISTENT_RECODE , 0 .equ CONFIG_COEFFICIENT_RANGE_CHECKING , 0 .equ CONFIG_VP9_HIGHBITDEPTH , 0 .equ CONFIG_BETTER_HW_COMPATIBILITY , 0 diff --git a/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.h index 826ac8d1897..fc57694a68d 100644 --- a/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.h @@ -84,6 +84,7 @@ #define CONFIG_MULTI_RES_ENCODING 1 #define CONFIG_TEMPORAL_DENOISING 1 #define CONFIG_VP9_TEMPORAL_DENOISING 1 +#define CONFIG_CONSISTENT_RECODE 0 #define CONFIG_COEFFICIENT_RANGE_CHECKING 0 #define CONFIG_VP9_HIGHBITDEPTH 0 #define CONFIG_BETTER_HW_COMPATIBILITY 0 diff --git a/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.asm index c5284f8017b..54efd55c5bf 100644 --- a/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.asm +++ b/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.asm @@ -75,6 +75,7 @@ .equ CONFIG_MULTI_RES_ENCODING , 1 .equ CONFIG_TEMPORAL_DENOISING , 1 .equ CONFIG_VP9_TEMPORAL_DENOISING , 1 +.equ CONFIG_CONSISTENT_RECODE , 0 .equ CONFIG_COEFFICIENT_RANGE_CHECKING , 0 .equ CONFIG_VP9_HIGHBITDEPTH , 0 .equ CONFIG_BETTER_HW_COMPATIBILITY , 0 diff --git a/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.h index 1ab268cab88..13e7637569b 100644 --- a/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.h @@ -84,6 +84,7 @@ #define CONFIG_MULTI_RES_ENCODING 1 #define CONFIG_TEMPORAL_DENOISING 1 #define CONFIG_VP9_TEMPORAL_DENOISING 1 +#define CONFIG_CONSISTENT_RECODE 0 #define CONFIG_COEFFICIENT_RANGE_CHECKING 0 #define CONFIG_VP9_HIGHBITDEPTH 0 #define CONFIG_BETTER_HW_COMPATIBILITY 0 diff --git a/chromium/third_party/libvpx/source/config/linux/arm64/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/linux/arm64/vpx_dsp_rtcd.h index cc0b382fb76..0056935cbd0 100644 --- a/chromium/third_party/libvpx/source/config/linux/arm64/vpx_dsp_rtcd.h +++ b/chromium/third_party/libvpx/source/config/linux/arm64/vpx_dsp_rtcd.h @@ -2210,7 +2210,8 @@ void vpx_subtract_block_neon(int rows, #define vpx_subtract_block vpx_subtract_block_neon uint64_t vpx_sum_squares_2d_i16_c(const int16_t* src, int stride, int size); -#define vpx_sum_squares_2d_i16 vpx_sum_squares_2d_i16_c +uint64_t vpx_sum_squares_2d_i16_neon(const int16_t* src, int stride, int size); +#define vpx_sum_squares_2d_i16 vpx_sum_squares_2d_i16_neon void vpx_tm_predictor_16x16_c(uint8_t* dst, ptrdiff_t y_stride, diff --git a/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.asm index fc694fbc78e..650636a78b1 100644 --- a/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.asm +++ b/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.asm @@ -75,6 +75,7 @@ .equ CONFIG_MULTI_RES_ENCODING , 1 .equ CONFIG_TEMPORAL_DENOISING , 1 .equ CONFIG_VP9_TEMPORAL_DENOISING , 1 +.equ CONFIG_CONSISTENT_RECODE , 0 .equ CONFIG_COEFFICIENT_RANGE_CHECKING , 0 .equ CONFIG_VP9_HIGHBITDEPTH , 1 .equ CONFIG_BETTER_HW_COMPATIBILITY , 0 diff --git a/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.h index cc20ff9ec8c..df5f6f87029 100644 --- a/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.h @@ -84,6 +84,7 @@ #define CONFIG_MULTI_RES_ENCODING 1 #define CONFIG_TEMPORAL_DENOISING 1 #define CONFIG_VP9_TEMPORAL_DENOISING 1 +#define CONFIG_CONSISTENT_RECODE 0 #define CONFIG_COEFFICIENT_RANGE_CHECKING 0 #define CONFIG_VP9_HIGHBITDEPTH 1 #define CONFIG_BETTER_HW_COMPATIBILITY 0 diff --git a/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.asm index b1c79dd9ab8..6aa13d720aa 100644 --- a/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.asm +++ b/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.asm @@ -72,6 +72,7 @@ %define CONFIG_MULTI_RES_ENCODING 1 %define CONFIG_TEMPORAL_DENOISING 1 %define CONFIG_VP9_TEMPORAL_DENOISING 1 +%define CONFIG_CONSISTENT_RECODE 0 %define CONFIG_COEFFICIENT_RANGE_CHECKING 0 %define CONFIG_VP9_HIGHBITDEPTH 1 %define CONFIG_BETTER_HW_COMPATIBILITY 0 diff --git a/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.h index dbd8c1d3933..7749f38f9be 100644 --- a/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.h @@ -84,6 +84,7 @@ #define CONFIG_MULTI_RES_ENCODING 1 #define CONFIG_TEMPORAL_DENOISING 1 #define CONFIG_VP9_TEMPORAL_DENOISING 1 +#define CONFIG_CONSISTENT_RECODE 0 #define CONFIG_COEFFICIENT_RANGE_CHECKING 0 #define CONFIG_VP9_HIGHBITDEPTH 1 #define CONFIG_BETTER_HW_COMPATIBILITY 0 diff --git a/chromium/third_party/libvpx/source/config/linux/ia32/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/linux/ia32/vpx_dsp_rtcd.h index 4e665ccf81e..ecd6c94b1f6 100644 --- a/chromium/third_party/libvpx/source/config/linux/ia32/vpx_dsp_rtcd.h +++ b/chromium/third_party/libvpx/source/config/linux/ia32/vpx_dsp_rtcd.h @@ -6458,6 +6458,11 @@ unsigned int vpx_mse16x8_sse2(const uint8_t* src_ptr, const uint8_t* ref_ptr, int recon_stride, unsigned int* sse); +unsigned int vpx_mse16x8_avx2(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int recon_stride, + unsigned int* sse); RTCD_EXTERN unsigned int (*vpx_mse16x8)(const uint8_t* src_ptr, int source_stride, const uint8_t* ref_ptr, @@ -8588,6 +8593,11 @@ unsigned int vpx_variance16x32_sse2(const uint8_t* src_ptr, const uint8_t* ref_ptr, int ref_stride, unsigned int* sse); +unsigned int vpx_variance16x32_avx2(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int ref_stride, + unsigned int* sse); RTCD_EXTERN unsigned int (*vpx_variance16x32)(const uint8_t* src_ptr, int source_stride, const uint8_t* ref_ptr, @@ -8604,6 +8614,11 @@ unsigned int vpx_variance16x8_sse2(const uint8_t* src_ptr, const uint8_t* ref_ptr, int ref_stride, unsigned int* sse); +unsigned int vpx_variance16x8_avx2(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int ref_stride, + unsigned int* sse); RTCD_EXTERN unsigned int (*vpx_variance16x8)(const uint8_t* src_ptr, int source_stride, const uint8_t* ref_ptr, @@ -8662,6 +8677,11 @@ unsigned int vpx_variance32x64_sse2(const uint8_t* src_ptr, const uint8_t* ref_ptr, int ref_stride, unsigned int* sse); +unsigned int vpx_variance32x64_avx2(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int ref_stride, + unsigned int* sse); RTCD_EXTERN unsigned int (*vpx_variance32x64)(const uint8_t* src_ptr, int source_stride, const uint8_t* ref_ptr, @@ -9957,6 +9977,8 @@ static void setup_rtcd_internal(void) { vpx_mse16x8 = vpx_mse16x8_c; if (flags & HAS_SSE2) vpx_mse16x8 = vpx_mse16x8_sse2; + if (flags & HAS_AVX2) + vpx_mse16x8 = vpx_mse16x8_avx2; vpx_mse8x16 = vpx_mse8x16_c; if (flags & HAS_SSE2) vpx_mse8x16 = vpx_mse8x16_sse2; @@ -10341,9 +10363,13 @@ static void setup_rtcd_internal(void) { vpx_variance16x32 = vpx_variance16x32_c; if (flags & HAS_SSE2) vpx_variance16x32 = vpx_variance16x32_sse2; + if (flags & HAS_AVX2) + vpx_variance16x32 = vpx_variance16x32_avx2; vpx_variance16x8 = vpx_variance16x8_c; if (flags & HAS_SSE2) vpx_variance16x8 = vpx_variance16x8_sse2; + if (flags & HAS_AVX2) + vpx_variance16x8 = vpx_variance16x8_avx2; vpx_variance32x16 = vpx_variance32x16_c; if (flags & HAS_SSE2) vpx_variance32x16 = vpx_variance32x16_sse2; @@ -10357,6 +10383,8 @@ static void setup_rtcd_internal(void) { vpx_variance32x64 = vpx_variance32x64_c; if (flags & HAS_SSE2) vpx_variance32x64 = vpx_variance32x64_sse2; + if (flags & HAS_AVX2) + vpx_variance32x64 = vpx_variance32x64_avx2; vpx_variance4x4 = vpx_variance4x4_c; if (flags & HAS_SSE2) vpx_variance4x4 = vpx_variance4x4_sse2; diff --git a/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_config.h index d876f25ba4f..98374b198c0 100644 --- a/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_config.h @@ -84,6 +84,7 @@ #define CONFIG_MULTI_RES_ENCODING 1 #define CONFIG_TEMPORAL_DENOISING 1 #define CONFIG_VP9_TEMPORAL_DENOISING 1 +#define CONFIG_CONSISTENT_RECODE 0 #define CONFIG_COEFFICIENT_RANGE_CHECKING 0 #define CONFIG_VP9_HIGHBITDEPTH 0 #define CONFIG_BETTER_HW_COMPATIBILITY 0 diff --git a/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_config.h index 967e5443fb9..1a8a71b75f4 100644 --- a/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_config.h @@ -84,6 +84,7 @@ #define CONFIG_MULTI_RES_ENCODING 1 #define CONFIG_TEMPORAL_DENOISING 1 #define CONFIG_VP9_TEMPORAL_DENOISING 1 +#define CONFIG_CONSISTENT_RECODE 0 #define CONFIG_COEFFICIENT_RANGE_CHECKING 0 #define CONFIG_VP9_HIGHBITDEPTH 0 #define CONFIG_BETTER_HW_COMPATIBILITY 0 diff --git a/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.asm index 84f986d3fcc..ffaf2d94f9d 100644 --- a/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.asm +++ b/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.asm @@ -72,6 +72,7 @@ %define CONFIG_MULTI_RES_ENCODING 1 %define CONFIG_TEMPORAL_DENOISING 1 %define CONFIG_VP9_TEMPORAL_DENOISING 1 +%define CONFIG_CONSISTENT_RECODE 0 %define CONFIG_COEFFICIENT_RANGE_CHECKING 0 %define CONFIG_VP9_HIGHBITDEPTH 1 %define CONFIG_BETTER_HW_COMPATIBILITY 0 diff --git a/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.h index aedc47647fd..c6e9e82ee32 100644 --- a/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.h @@ -84,6 +84,7 @@ #define CONFIG_MULTI_RES_ENCODING 1 #define CONFIG_TEMPORAL_DENOISING 1 #define CONFIG_VP9_TEMPORAL_DENOISING 1 +#define CONFIG_CONSISTENT_RECODE 0 #define CONFIG_COEFFICIENT_RANGE_CHECKING 0 #define CONFIG_VP9_HIGHBITDEPTH 1 #define CONFIG_BETTER_HW_COMPATIBILITY 0 diff --git a/chromium/third_party/libvpx/source/config/linux/x64/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/linux/x64/vpx_dsp_rtcd.h index 49a73711abe..258994f0076 100644 --- a/chromium/third_party/libvpx/source/config/linux/x64/vpx_dsp_rtcd.h +++ b/chromium/third_party/libvpx/source/config/linux/x64/vpx_dsp_rtcd.h @@ -5376,7 +5376,16 @@ unsigned int vpx_mse16x8_sse2(const uint8_t* src_ptr, const uint8_t* ref_ptr, int recon_stride, unsigned int* sse); -#define vpx_mse16x8 vpx_mse16x8_sse2 +unsigned int vpx_mse16x8_avx2(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int recon_stride, + unsigned int* sse); +RTCD_EXTERN unsigned int (*vpx_mse16x8)(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int recon_stride, + unsigned int* sse); unsigned int vpx_mse8x16_c(const uint8_t* src_ptr, int source_stride, @@ -7350,7 +7359,16 @@ unsigned int vpx_variance16x32_sse2(const uint8_t* src_ptr, const uint8_t* ref_ptr, int ref_stride, unsigned int* sse); -#define vpx_variance16x32 vpx_variance16x32_sse2 +unsigned int vpx_variance16x32_avx2(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int ref_stride, + unsigned int* sse); +RTCD_EXTERN unsigned int (*vpx_variance16x32)(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int ref_stride, + unsigned int* sse); unsigned int vpx_variance16x8_c(const uint8_t* src_ptr, int source_stride, @@ -7362,7 +7380,16 @@ unsigned int vpx_variance16x8_sse2(const uint8_t* src_ptr, const uint8_t* ref_ptr, int ref_stride, unsigned int* sse); -#define vpx_variance16x8 vpx_variance16x8_sse2 +unsigned int vpx_variance16x8_avx2(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int ref_stride, + unsigned int* sse); +RTCD_EXTERN unsigned int (*vpx_variance16x8)(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int ref_stride, + unsigned int* sse); unsigned int vpx_variance32x16_c(const uint8_t* src_ptr, int source_stride, @@ -7416,7 +7443,16 @@ unsigned int vpx_variance32x64_sse2(const uint8_t* src_ptr, const uint8_t* ref_ptr, int ref_stride, unsigned int* sse); -#define vpx_variance32x64 vpx_variance32x64_sse2 +unsigned int vpx_variance32x64_avx2(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int ref_stride, + unsigned int* sse); +RTCD_EXTERN unsigned int (*vpx_variance32x64)(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int ref_stride, + unsigned int* sse); unsigned int vpx_variance4x4_c(const uint8_t* src_ptr, int source_stride, @@ -7743,6 +7779,9 @@ static void setup_rtcd_internal(void) { vpx_mse16x16 = vpx_mse16x16_sse2; if (flags & HAS_AVX2) vpx_mse16x16 = vpx_mse16x16_avx2; + vpx_mse16x8 = vpx_mse16x8_sse2; + if (flags & HAS_AVX2) + vpx_mse16x8 = vpx_mse16x8_avx2; vpx_quantize_b = vpx_quantize_b_sse2; if (flags & HAS_SSSE3) vpx_quantize_b = vpx_quantize_b_ssse3; @@ -7918,12 +7957,21 @@ static void setup_rtcd_internal(void) { vpx_variance16x16 = vpx_variance16x16_sse2; if (flags & HAS_AVX2) vpx_variance16x16 = vpx_variance16x16_avx2; + vpx_variance16x32 = vpx_variance16x32_sse2; + if (flags & HAS_AVX2) + vpx_variance16x32 = vpx_variance16x32_avx2; + vpx_variance16x8 = vpx_variance16x8_sse2; + if (flags & HAS_AVX2) + vpx_variance16x8 = vpx_variance16x8_avx2; vpx_variance32x16 = vpx_variance32x16_sse2; if (flags & HAS_AVX2) vpx_variance32x16 = vpx_variance32x16_avx2; vpx_variance32x32 = vpx_variance32x32_sse2; if (flags & HAS_AVX2) vpx_variance32x32 = vpx_variance32x32_avx2; + vpx_variance32x64 = vpx_variance32x64_sse2; + if (flags & HAS_AVX2) + vpx_variance32x64 = vpx_variance32x64_avx2; vpx_variance64x32 = vpx_variance64x32_sse2; if (flags & HAS_AVX2) vpx_variance64x32 = vpx_variance64x32_avx2; diff --git a/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.asm b/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.asm index b1c79dd9ab8..6aa13d720aa 100644 --- a/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.asm +++ b/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.asm @@ -72,6 +72,7 @@ %define CONFIG_MULTI_RES_ENCODING 1 %define CONFIG_TEMPORAL_DENOISING 1 %define CONFIG_VP9_TEMPORAL_DENOISING 1 +%define CONFIG_CONSISTENT_RECODE 0 %define CONFIG_COEFFICIENT_RANGE_CHECKING 0 %define CONFIG_VP9_HIGHBITDEPTH 1 %define CONFIG_BETTER_HW_COMPATIBILITY 0 diff --git a/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.h b/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.h index dbd8c1d3933..7749f38f9be 100644 --- a/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.h @@ -84,6 +84,7 @@ #define CONFIG_MULTI_RES_ENCODING 1 #define CONFIG_TEMPORAL_DENOISING 1 #define CONFIG_VP9_TEMPORAL_DENOISING 1 +#define CONFIG_CONSISTENT_RECODE 0 #define CONFIG_COEFFICIENT_RANGE_CHECKING 0 #define CONFIG_VP9_HIGHBITDEPTH 1 #define CONFIG_BETTER_HW_COMPATIBILITY 0 diff --git a/chromium/third_party/libvpx/source/config/mac/ia32/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/mac/ia32/vpx_dsp_rtcd.h index 4e665ccf81e..ecd6c94b1f6 100644 --- a/chromium/third_party/libvpx/source/config/mac/ia32/vpx_dsp_rtcd.h +++ b/chromium/third_party/libvpx/source/config/mac/ia32/vpx_dsp_rtcd.h @@ -6458,6 +6458,11 @@ unsigned int vpx_mse16x8_sse2(const uint8_t* src_ptr, const uint8_t* ref_ptr, int recon_stride, unsigned int* sse); +unsigned int vpx_mse16x8_avx2(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int recon_stride, + unsigned int* sse); RTCD_EXTERN unsigned int (*vpx_mse16x8)(const uint8_t* src_ptr, int source_stride, const uint8_t* ref_ptr, @@ -8588,6 +8593,11 @@ unsigned int vpx_variance16x32_sse2(const uint8_t* src_ptr, const uint8_t* ref_ptr, int ref_stride, unsigned int* sse); +unsigned int vpx_variance16x32_avx2(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int ref_stride, + unsigned int* sse); RTCD_EXTERN unsigned int (*vpx_variance16x32)(const uint8_t* src_ptr, int source_stride, const uint8_t* ref_ptr, @@ -8604,6 +8614,11 @@ unsigned int vpx_variance16x8_sse2(const uint8_t* src_ptr, const uint8_t* ref_ptr, int ref_stride, unsigned int* sse); +unsigned int vpx_variance16x8_avx2(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int ref_stride, + unsigned int* sse); RTCD_EXTERN unsigned int (*vpx_variance16x8)(const uint8_t* src_ptr, int source_stride, const uint8_t* ref_ptr, @@ -8662,6 +8677,11 @@ unsigned int vpx_variance32x64_sse2(const uint8_t* src_ptr, const uint8_t* ref_ptr, int ref_stride, unsigned int* sse); +unsigned int vpx_variance32x64_avx2(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int ref_stride, + unsigned int* sse); RTCD_EXTERN unsigned int (*vpx_variance32x64)(const uint8_t* src_ptr, int source_stride, const uint8_t* ref_ptr, @@ -9957,6 +9977,8 @@ static void setup_rtcd_internal(void) { vpx_mse16x8 = vpx_mse16x8_c; if (flags & HAS_SSE2) vpx_mse16x8 = vpx_mse16x8_sse2; + if (flags & HAS_AVX2) + vpx_mse16x8 = vpx_mse16x8_avx2; vpx_mse8x16 = vpx_mse8x16_c; if (flags & HAS_SSE2) vpx_mse8x16 = vpx_mse8x16_sse2; @@ -10341,9 +10363,13 @@ static void setup_rtcd_internal(void) { vpx_variance16x32 = vpx_variance16x32_c; if (flags & HAS_SSE2) vpx_variance16x32 = vpx_variance16x32_sse2; + if (flags & HAS_AVX2) + vpx_variance16x32 = vpx_variance16x32_avx2; vpx_variance16x8 = vpx_variance16x8_c; if (flags & HAS_SSE2) vpx_variance16x8 = vpx_variance16x8_sse2; + if (flags & HAS_AVX2) + vpx_variance16x8 = vpx_variance16x8_avx2; vpx_variance32x16 = vpx_variance32x16_c; if (flags & HAS_SSE2) vpx_variance32x16 = vpx_variance32x16_sse2; @@ -10357,6 +10383,8 @@ static void setup_rtcd_internal(void) { vpx_variance32x64 = vpx_variance32x64_c; if (flags & HAS_SSE2) vpx_variance32x64 = vpx_variance32x64_sse2; + if (flags & HAS_AVX2) + vpx_variance32x64 = vpx_variance32x64_avx2; vpx_variance4x4 = vpx_variance4x4_c; if (flags & HAS_SSE2) vpx_variance4x4 = vpx_variance4x4_sse2; diff --git a/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.asm b/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.asm index 84f986d3fcc..ffaf2d94f9d 100644 --- a/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.asm +++ b/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.asm @@ -72,6 +72,7 @@ %define CONFIG_MULTI_RES_ENCODING 1 %define CONFIG_TEMPORAL_DENOISING 1 %define CONFIG_VP9_TEMPORAL_DENOISING 1 +%define CONFIG_CONSISTENT_RECODE 0 %define CONFIG_COEFFICIENT_RANGE_CHECKING 0 %define CONFIG_VP9_HIGHBITDEPTH 1 %define CONFIG_BETTER_HW_COMPATIBILITY 0 diff --git a/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.h b/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.h index aedc47647fd..c6e9e82ee32 100644 --- a/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.h @@ -84,6 +84,7 @@ #define CONFIG_MULTI_RES_ENCODING 1 #define CONFIG_TEMPORAL_DENOISING 1 #define CONFIG_VP9_TEMPORAL_DENOISING 1 +#define CONFIG_CONSISTENT_RECODE 0 #define CONFIG_COEFFICIENT_RANGE_CHECKING 0 #define CONFIG_VP9_HIGHBITDEPTH 1 #define CONFIG_BETTER_HW_COMPATIBILITY 0 diff --git a/chromium/third_party/libvpx/source/config/mac/x64/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/mac/x64/vpx_dsp_rtcd.h index 49a73711abe..258994f0076 100644 --- a/chromium/third_party/libvpx/source/config/mac/x64/vpx_dsp_rtcd.h +++ b/chromium/third_party/libvpx/source/config/mac/x64/vpx_dsp_rtcd.h @@ -5376,7 +5376,16 @@ unsigned int vpx_mse16x8_sse2(const uint8_t* src_ptr, const uint8_t* ref_ptr, int recon_stride, unsigned int* sse); -#define vpx_mse16x8 vpx_mse16x8_sse2 +unsigned int vpx_mse16x8_avx2(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int recon_stride, + unsigned int* sse); +RTCD_EXTERN unsigned int (*vpx_mse16x8)(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int recon_stride, + unsigned int* sse); unsigned int vpx_mse8x16_c(const uint8_t* src_ptr, int source_stride, @@ -7350,7 +7359,16 @@ unsigned int vpx_variance16x32_sse2(const uint8_t* src_ptr, const uint8_t* ref_ptr, int ref_stride, unsigned int* sse); -#define vpx_variance16x32 vpx_variance16x32_sse2 +unsigned int vpx_variance16x32_avx2(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int ref_stride, + unsigned int* sse); +RTCD_EXTERN unsigned int (*vpx_variance16x32)(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int ref_stride, + unsigned int* sse); unsigned int vpx_variance16x8_c(const uint8_t* src_ptr, int source_stride, @@ -7362,7 +7380,16 @@ unsigned int vpx_variance16x8_sse2(const uint8_t* src_ptr, const uint8_t* ref_ptr, int ref_stride, unsigned int* sse); -#define vpx_variance16x8 vpx_variance16x8_sse2 +unsigned int vpx_variance16x8_avx2(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int ref_stride, + unsigned int* sse); +RTCD_EXTERN unsigned int (*vpx_variance16x8)(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int ref_stride, + unsigned int* sse); unsigned int vpx_variance32x16_c(const uint8_t* src_ptr, int source_stride, @@ -7416,7 +7443,16 @@ unsigned int vpx_variance32x64_sse2(const uint8_t* src_ptr, const uint8_t* ref_ptr, int ref_stride, unsigned int* sse); -#define vpx_variance32x64 vpx_variance32x64_sse2 +unsigned int vpx_variance32x64_avx2(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int ref_stride, + unsigned int* sse); +RTCD_EXTERN unsigned int (*vpx_variance32x64)(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int ref_stride, + unsigned int* sse); unsigned int vpx_variance4x4_c(const uint8_t* src_ptr, int source_stride, @@ -7743,6 +7779,9 @@ static void setup_rtcd_internal(void) { vpx_mse16x16 = vpx_mse16x16_sse2; if (flags & HAS_AVX2) vpx_mse16x16 = vpx_mse16x16_avx2; + vpx_mse16x8 = vpx_mse16x8_sse2; + if (flags & HAS_AVX2) + vpx_mse16x8 = vpx_mse16x8_avx2; vpx_quantize_b = vpx_quantize_b_sse2; if (flags & HAS_SSSE3) vpx_quantize_b = vpx_quantize_b_ssse3; @@ -7918,12 +7957,21 @@ static void setup_rtcd_internal(void) { vpx_variance16x16 = vpx_variance16x16_sse2; if (flags & HAS_AVX2) vpx_variance16x16 = vpx_variance16x16_avx2; + vpx_variance16x32 = vpx_variance16x32_sse2; + if (flags & HAS_AVX2) + vpx_variance16x32 = vpx_variance16x32_avx2; + vpx_variance16x8 = vpx_variance16x8_sse2; + if (flags & HAS_AVX2) + vpx_variance16x8 = vpx_variance16x8_avx2; vpx_variance32x16 = vpx_variance32x16_sse2; if (flags & HAS_AVX2) vpx_variance32x16 = vpx_variance32x16_avx2; vpx_variance32x32 = vpx_variance32x32_sse2; if (flags & HAS_AVX2) vpx_variance32x32 = vpx_variance32x32_avx2; + vpx_variance32x64 = vpx_variance32x64_sse2; + if (flags & HAS_AVX2) + vpx_variance32x64 = vpx_variance32x64_avx2; vpx_variance64x32 = vpx_variance64x32_sse2; if (flags & HAS_AVX2) vpx_variance64x32 = vpx_variance64x32_avx2; diff --git a/chromium/third_party/libvpx/source/config/nacl/vpx_config.h b/chromium/third_party/libvpx/source/config/nacl/vpx_config.h index cc20ff9ec8c..df5f6f87029 100644 --- a/chromium/third_party/libvpx/source/config/nacl/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/nacl/vpx_config.h @@ -84,6 +84,7 @@ #define CONFIG_MULTI_RES_ENCODING 1 #define CONFIG_TEMPORAL_DENOISING 1 #define CONFIG_VP9_TEMPORAL_DENOISING 1 +#define CONFIG_CONSISTENT_RECODE 0 #define CONFIG_COEFFICIENT_RANGE_CHECKING 0 #define CONFIG_VP9_HIGHBITDEPTH 1 #define CONFIG_BETTER_HW_COMPATIBILITY 0 diff --git a/chromium/third_party/libvpx/source/config/vpx_version.h b/chromium/third_party/libvpx/source/config/vpx_version.h index cf24c88e12e..d208d662f33 100644 --- a/chromium/third_party/libvpx/source/config/vpx_version.h +++ b/chromium/third_party/libvpx/source/config/vpx_version.h @@ -2,7 +2,7 @@ #define VERSION_MAJOR 1 #define VERSION_MINOR 7 #define VERSION_PATCH 0 -#define VERSION_EXTRA "262-gbe5df6080" +#define VERSION_EXTRA "387-ge27a33177" #define VERSION_PACKED ((VERSION_MAJOR<<16)|(VERSION_MINOR<<8)|(VERSION_PATCH)) -#define VERSION_STRING_NOSP "v1.7.0-262-gbe5df6080" -#define VERSION_STRING " v1.7.0-262-gbe5df6080" +#define VERSION_STRING_NOSP "v1.7.0-387-ge27a33177" +#define VERSION_STRING " v1.7.0-387-ge27a33177" diff --git a/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.asm b/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.asm index 0592279663f..4e7f6863eb2 100644 --- a/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.asm +++ b/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.asm @@ -72,6 +72,7 @@ %define CONFIG_MULTI_RES_ENCODING 1 %define CONFIG_TEMPORAL_DENOISING 1 %define CONFIG_VP9_TEMPORAL_DENOISING 1 +%define CONFIG_CONSISTENT_RECODE 0 %define CONFIG_COEFFICIENT_RANGE_CHECKING 0 %define CONFIG_VP9_HIGHBITDEPTH 1 %define CONFIG_BETTER_HW_COMPATIBILITY 0 diff --git a/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.h b/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.h index 0725ed34f04..2cf19145915 100644 --- a/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.h @@ -84,6 +84,7 @@ #define CONFIG_MULTI_RES_ENCODING 1 #define CONFIG_TEMPORAL_DENOISING 1 #define CONFIG_VP9_TEMPORAL_DENOISING 1 +#define CONFIG_CONSISTENT_RECODE 0 #define CONFIG_COEFFICIENT_RANGE_CHECKING 0 #define CONFIG_VP9_HIGHBITDEPTH 1 #define CONFIG_BETTER_HW_COMPATIBILITY 0 diff --git a/chromium/third_party/libvpx/source/config/win/ia32/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/win/ia32/vpx_dsp_rtcd.h index 4e665ccf81e..ecd6c94b1f6 100644 --- a/chromium/third_party/libvpx/source/config/win/ia32/vpx_dsp_rtcd.h +++ b/chromium/third_party/libvpx/source/config/win/ia32/vpx_dsp_rtcd.h @@ -6458,6 +6458,11 @@ unsigned int vpx_mse16x8_sse2(const uint8_t* src_ptr, const uint8_t* ref_ptr, int recon_stride, unsigned int* sse); +unsigned int vpx_mse16x8_avx2(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int recon_stride, + unsigned int* sse); RTCD_EXTERN unsigned int (*vpx_mse16x8)(const uint8_t* src_ptr, int source_stride, const uint8_t* ref_ptr, @@ -8588,6 +8593,11 @@ unsigned int vpx_variance16x32_sse2(const uint8_t* src_ptr, const uint8_t* ref_ptr, int ref_stride, unsigned int* sse); +unsigned int vpx_variance16x32_avx2(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int ref_stride, + unsigned int* sse); RTCD_EXTERN unsigned int (*vpx_variance16x32)(const uint8_t* src_ptr, int source_stride, const uint8_t* ref_ptr, @@ -8604,6 +8614,11 @@ unsigned int vpx_variance16x8_sse2(const uint8_t* src_ptr, const uint8_t* ref_ptr, int ref_stride, unsigned int* sse); +unsigned int vpx_variance16x8_avx2(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int ref_stride, + unsigned int* sse); RTCD_EXTERN unsigned int (*vpx_variance16x8)(const uint8_t* src_ptr, int source_stride, const uint8_t* ref_ptr, @@ -8662,6 +8677,11 @@ unsigned int vpx_variance32x64_sse2(const uint8_t* src_ptr, const uint8_t* ref_ptr, int ref_stride, unsigned int* sse); +unsigned int vpx_variance32x64_avx2(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int ref_stride, + unsigned int* sse); RTCD_EXTERN unsigned int (*vpx_variance32x64)(const uint8_t* src_ptr, int source_stride, const uint8_t* ref_ptr, @@ -9957,6 +9977,8 @@ static void setup_rtcd_internal(void) { vpx_mse16x8 = vpx_mse16x8_c; if (flags & HAS_SSE2) vpx_mse16x8 = vpx_mse16x8_sse2; + if (flags & HAS_AVX2) + vpx_mse16x8 = vpx_mse16x8_avx2; vpx_mse8x16 = vpx_mse8x16_c; if (flags & HAS_SSE2) vpx_mse8x16 = vpx_mse8x16_sse2; @@ -10341,9 +10363,13 @@ static void setup_rtcd_internal(void) { vpx_variance16x32 = vpx_variance16x32_c; if (flags & HAS_SSE2) vpx_variance16x32 = vpx_variance16x32_sse2; + if (flags & HAS_AVX2) + vpx_variance16x32 = vpx_variance16x32_avx2; vpx_variance16x8 = vpx_variance16x8_c; if (flags & HAS_SSE2) vpx_variance16x8 = vpx_variance16x8_sse2; + if (flags & HAS_AVX2) + vpx_variance16x8 = vpx_variance16x8_avx2; vpx_variance32x16 = vpx_variance32x16_c; if (flags & HAS_SSE2) vpx_variance32x16 = vpx_variance32x16_sse2; @@ -10357,6 +10383,8 @@ static void setup_rtcd_internal(void) { vpx_variance32x64 = vpx_variance32x64_c; if (flags & HAS_SSE2) vpx_variance32x64 = vpx_variance32x64_sse2; + if (flags & HAS_AVX2) + vpx_variance32x64 = vpx_variance32x64_avx2; vpx_variance4x4 = vpx_variance4x4_c; if (flags & HAS_SSE2) vpx_variance4x4 = vpx_variance4x4_sse2; diff --git a/chromium/third_party/libvpx/source/config/win/x64/vpx_config.asm b/chromium/third_party/libvpx/source/config/win/x64/vpx_config.asm index c17b1e336d6..ef886a31f30 100644 --- a/chromium/third_party/libvpx/source/config/win/x64/vpx_config.asm +++ b/chromium/third_party/libvpx/source/config/win/x64/vpx_config.asm @@ -72,6 +72,7 @@ %define CONFIG_MULTI_RES_ENCODING 1 %define CONFIG_TEMPORAL_DENOISING 1 %define CONFIG_VP9_TEMPORAL_DENOISING 1 +%define CONFIG_CONSISTENT_RECODE 0 %define CONFIG_COEFFICIENT_RANGE_CHECKING 0 %define CONFIG_VP9_HIGHBITDEPTH 1 %define CONFIG_BETTER_HW_COMPATIBILITY 0 diff --git a/chromium/third_party/libvpx/source/config/win/x64/vpx_config.h b/chromium/third_party/libvpx/source/config/win/x64/vpx_config.h index f0fbf897849..6d539498521 100644 --- a/chromium/third_party/libvpx/source/config/win/x64/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/win/x64/vpx_config.h @@ -84,6 +84,7 @@ #define CONFIG_MULTI_RES_ENCODING 1 #define CONFIG_TEMPORAL_DENOISING 1 #define CONFIG_VP9_TEMPORAL_DENOISING 1 +#define CONFIG_CONSISTENT_RECODE 0 #define CONFIG_COEFFICIENT_RANGE_CHECKING 0 #define CONFIG_VP9_HIGHBITDEPTH 1 #define CONFIG_BETTER_HW_COMPATIBILITY 0 diff --git a/chromium/third_party/libvpx/source/config/win/x64/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/win/x64/vpx_dsp_rtcd.h index 49a73711abe..258994f0076 100644 --- a/chromium/third_party/libvpx/source/config/win/x64/vpx_dsp_rtcd.h +++ b/chromium/third_party/libvpx/source/config/win/x64/vpx_dsp_rtcd.h @@ -5376,7 +5376,16 @@ unsigned int vpx_mse16x8_sse2(const uint8_t* src_ptr, const uint8_t* ref_ptr, int recon_stride, unsigned int* sse); -#define vpx_mse16x8 vpx_mse16x8_sse2 +unsigned int vpx_mse16x8_avx2(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int recon_stride, + unsigned int* sse); +RTCD_EXTERN unsigned int (*vpx_mse16x8)(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int recon_stride, + unsigned int* sse); unsigned int vpx_mse8x16_c(const uint8_t* src_ptr, int source_stride, @@ -7350,7 +7359,16 @@ unsigned int vpx_variance16x32_sse2(const uint8_t* src_ptr, const uint8_t* ref_ptr, int ref_stride, unsigned int* sse); -#define vpx_variance16x32 vpx_variance16x32_sse2 +unsigned int vpx_variance16x32_avx2(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int ref_stride, + unsigned int* sse); +RTCD_EXTERN unsigned int (*vpx_variance16x32)(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int ref_stride, + unsigned int* sse); unsigned int vpx_variance16x8_c(const uint8_t* src_ptr, int source_stride, @@ -7362,7 +7380,16 @@ unsigned int vpx_variance16x8_sse2(const uint8_t* src_ptr, const uint8_t* ref_ptr, int ref_stride, unsigned int* sse); -#define vpx_variance16x8 vpx_variance16x8_sse2 +unsigned int vpx_variance16x8_avx2(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int ref_stride, + unsigned int* sse); +RTCD_EXTERN unsigned int (*vpx_variance16x8)(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int ref_stride, + unsigned int* sse); unsigned int vpx_variance32x16_c(const uint8_t* src_ptr, int source_stride, @@ -7416,7 +7443,16 @@ unsigned int vpx_variance32x64_sse2(const uint8_t* src_ptr, const uint8_t* ref_ptr, int ref_stride, unsigned int* sse); -#define vpx_variance32x64 vpx_variance32x64_sse2 +unsigned int vpx_variance32x64_avx2(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int ref_stride, + unsigned int* sse); +RTCD_EXTERN unsigned int (*vpx_variance32x64)(const uint8_t* src_ptr, + int source_stride, + const uint8_t* ref_ptr, + int ref_stride, + unsigned int* sse); unsigned int vpx_variance4x4_c(const uint8_t* src_ptr, int source_stride, @@ -7743,6 +7779,9 @@ static void setup_rtcd_internal(void) { vpx_mse16x16 = vpx_mse16x16_sse2; if (flags & HAS_AVX2) vpx_mse16x16 = vpx_mse16x16_avx2; + vpx_mse16x8 = vpx_mse16x8_sse2; + if (flags & HAS_AVX2) + vpx_mse16x8 = vpx_mse16x8_avx2; vpx_quantize_b = vpx_quantize_b_sse2; if (flags & HAS_SSSE3) vpx_quantize_b = vpx_quantize_b_ssse3; @@ -7918,12 +7957,21 @@ static void setup_rtcd_internal(void) { vpx_variance16x16 = vpx_variance16x16_sse2; if (flags & HAS_AVX2) vpx_variance16x16 = vpx_variance16x16_avx2; + vpx_variance16x32 = vpx_variance16x32_sse2; + if (flags & HAS_AVX2) + vpx_variance16x32 = vpx_variance16x32_avx2; + vpx_variance16x8 = vpx_variance16x8_sse2; + if (flags & HAS_AVX2) + vpx_variance16x8 = vpx_variance16x8_avx2; vpx_variance32x16 = vpx_variance32x16_sse2; if (flags & HAS_AVX2) vpx_variance32x16 = vpx_variance32x16_avx2; vpx_variance32x32 = vpx_variance32x32_sse2; if (flags & HAS_AVX2) vpx_variance32x32 = vpx_variance32x32_avx2; + vpx_variance32x64 = vpx_variance32x64_sse2; + if (flags & HAS_AVX2) + vpx_variance32x64 = vpx_variance32x64_avx2; vpx_variance64x32 = vpx_variance64x32_sse2; if (flags & HAS_AVX2) vpx_variance64x32 = vpx_variance64x32_avx2; diff --git a/chromium/third_party/libvpx/source/libvpx/README b/chromium/third_party/libvpx/source/libvpx/README index a900c807787..49407ed9ff3 100644 --- a/chromium/third_party/libvpx/source/libvpx/README +++ b/chromium/third_party/libvpx/source/libvpx/README @@ -76,7 +76,6 @@ COMPILING THE APPLICATIONS/LIBRARIES: armv8-linux-gcc mips32-linux-gcc mips64-linux-gcc - ppc64-linux-gcc ppc64le-linux-gcc sparc-solaris-gcc x86-android-gcc diff --git a/chromium/third_party/libvpx/source/libvpx/build/make/configure.sh b/chromium/third_party/libvpx/source/libvpx/build/make/configure.sh index a6c76612fcf..f1d0e34c3f6 100644 --- a/chromium/third_party/libvpx/source/libvpx/build/make/configure.sh +++ b/chromium/third_party/libvpx/source/libvpx/build/make/configure.sh @@ -319,6 +319,12 @@ check_ld() { && check_cmd ${LD} ${LDFLAGS} "$@" -o ${TMP_X} ${TMP_O} ${extralibs} } +check_lib() { + log check_lib "$@" + check_cc $@ \ + && check_cmd ${LD} ${LDFLAGS} -o ${TMP_X} ${TMP_O} "$@" ${extralibs} +} + check_header(){ log check_header "$@" header=$1 @@ -713,11 +719,8 @@ process_common_toolchain() { *sparc*) tgt_isa=sparc ;; - power*64*-*) - tgt_isa=ppc64 - ;; - power*) - tgt_isa=ppc + power*64le*-*) + tgt_isa=ppc64le ;; *mips64el*) tgt_isa=mips64 @@ -1215,7 +1218,7 @@ EOF check_add_asflags -march=${tgt_isa} check_add_asflags -KPIC ;; - ppc*) + ppc64le*) link_with_cc=gcc setup_gnu_toolchain check_gcc_machine_option "vsx" @@ -1485,7 +1488,11 @@ EOF # bionic includes basic pthread functionality, obviating -lpthread. ;; *) - check_header pthread.h && add_extralibs -lpthread + check_header pthread.h && check_lib -lpthread <<EOF && add_extralibs -lpthread || disable_feature pthread_h +#include <pthread.h> +#include <stddef.h> +int main(void) { return pthread_create(NULL, NULL, NULL, NULL); } +EOF ;; esac fi diff --git a/chromium/third_party/libvpx/source/libvpx/build/make/iosbuild.sh b/chromium/third_party/libvpx/source/libvpx/build/make/iosbuild.sh index 365a8c01306..e102442bd8e 100755 --- a/chromium/third_party/libvpx/source/libvpx/build/make/iosbuild.sh +++ b/chromium/third_party/libvpx/source/libvpx/build/make/iosbuild.sh @@ -132,7 +132,8 @@ create_vpx_framework_config_shim() { done # Consume the last line of output from the loop: We don't want it. - sed -i '' -e '$d' "${config_file}" + sed -i.bak -e '$d' "${config_file}" + rm "${config_file}.bak" printf "#endif\n\n" >> "${config_file}" printf "#endif // ${include_guard}" >> "${config_file}" diff --git a/chromium/third_party/libvpx/source/libvpx/configure b/chromium/third_party/libvpx/source/libvpx/configure index 2f198e9a61e..8be95d60236 100755 --- a/chromium/third_party/libvpx/source/libvpx/configure +++ b/chromium/third_party/libvpx/source/libvpx/configure @@ -116,7 +116,6 @@ all_platforms="${all_platforms} armv7s-darwin-gcc" all_platforms="${all_platforms} armv8-linux-gcc" all_platforms="${all_platforms} mips32-linux-gcc" all_platforms="${all_platforms} mips64-linux-gcc" -all_platforms="${all_platforms} ppc64-linux-gcc" all_platforms="${all_platforms} ppc64le-linux-gcc" all_platforms="${all_platforms} sparc-solaris-gcc" all_platforms="${all_platforms} x86-android-gcc" @@ -328,6 +327,7 @@ CONFIG_LIST=" multi_res_encoding temporal_denoising vp9_temporal_denoising + consistent_recode coefficient_range_checking vp9_highbitdepth better_hw_compatibility @@ -389,6 +389,7 @@ CMDLINE_SELECT=" multi_res_encoding temporal_denoising vp9_temporal_denoising + consistent_recode coefficient_range_checking better_hw_compatibility vp9_highbitdepth @@ -573,16 +574,30 @@ process_detect() { check_ld() { true } + check_lib() { + true + } fi check_header stdio.h || die "Unable to invoke compiler: ${CC} ${CFLAGS}" check_ld <<EOF || die "Toolchain is unable to link executables" int main(void) {return 0;} EOF # check system headers - check_header pthread.h + + # Use both check_header and check_lib here, since check_lib + # could be a stub that always returns true. + check_header pthread.h && check_lib -lpthread <<EOF || disable_feature pthread_h +#include <pthread.h> +#include <stddef.h> +int main(void) { return pthread_create(NULL, NULL, NULL, NULL); } +EOF check_header unistd.h # for sysconf(3) and friends. check_header vpx/vpx_integer.h -I${source_path} && enable_feature vpx_ports + + if enabled neon && ! enabled external_build; then + check_header arm_neon.h || die "Unable to find arm_neon.h" + fi } process_toolchain() { @@ -708,9 +723,7 @@ process_toolchain() { check_cxx "$@" <<EOF && soft_enable unit_tests int z; EOF - check_cxx "$@" <<EOF && soft_enable webm_io -int z; -EOF + check_add_cxxflags -std=c++11 && soft_enable webm_io check_cxx "$@" <<EOF && soft_enable libyuv int z; EOF @@ -719,9 +732,7 @@ EOF enabled pthread_h && check_cxx "$@" <<EOF && soft_enable unit_tests int z; EOF - check_cxx "$@" <<EOF && soft_enable webm_io -int z; -EOF + check_add_cxxflags -std=c++11 && soft_enable webm_io check_cxx "$@" <<EOF && soft_enable libyuv int z; EOF diff --git a/chromium/third_party/libvpx/source/libvpx/examples/vp9_spatial_svc_encoder.c b/chromium/third_party/libvpx/source/libvpx/examples/vp9_spatial_svc_encoder.c index 747f79ffba1..091c6954d12 100644 --- a/chromium/third_party/libvpx/source/libvpx/examples/vp9_spatial_svc_encoder.c +++ b/chromium/third_party/libvpx/source/libvpx/examples/vp9_spatial_svc_encoder.c @@ -730,6 +730,8 @@ int main(int argc, const char **argv) { vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, 0); + vpx_codec_control(&codec, VP9E_SET_TUNE_CONTENT, 0); + // Encode frames while (!end_of_stream) { vpx_codec_iter_t iter = NULL; diff --git a/chromium/third_party/libvpx/source/libvpx/ivfdec.c b/chromium/third_party/libvpx/source/libvpx/ivfdec.c index f64e594ab0e..3e179bc6ed2 100644 --- a/chromium/third_party/libvpx/source/libvpx/ivfdec.c +++ b/chromium/third_party/libvpx/source/libvpx/ivfdec.c @@ -76,12 +76,12 @@ int ivf_read_frame(FILE *infile, uint8_t **buffer, size_t *bytes_read, size_t frame_size = 0; if (fread(raw_header, IVF_FRAME_HDR_SZ, 1, infile) != 1) { - if (!feof(infile)) warn("Failed to read frame size\n"); + if (!feof(infile)) warn("Failed to read frame size"); } else { frame_size = mem_get_le32(raw_header); if (frame_size > 256 * 1024 * 1024) { - warn("Read invalid frame size (%u)\n", (unsigned int)frame_size); + warn("Read invalid frame size (%u)", (unsigned int)frame_size); frame_size = 0; } @@ -92,7 +92,7 @@ int ivf_read_frame(FILE *infile, uint8_t **buffer, size_t *bytes_read, *buffer = new_buffer; *buffer_size = 2 * frame_size; } else { - warn("Failed to allocate compressed data buffer\n"); + warn("Failed to allocate compressed data buffer"); frame_size = 0; } } @@ -100,7 +100,7 @@ int ivf_read_frame(FILE *infile, uint8_t **buffer, size_t *bytes_read, if (!feof(infile)) { if (fread(*buffer, 1, frame_size, infile) != frame_size) { - warn("Failed to read full frame\n"); + warn("Failed to read full frame"); return 1; } diff --git a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/Android.mk b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/Android.mk index 8149a083f4f..b46ba101d42 100644 --- a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/Android.mk +++ b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/Android.mk @@ -3,7 +3,7 @@ LOCAL_PATH:= $(call my-dir) include $(CLEAR_VARS) LOCAL_MODULE:= libwebm LOCAL_CPPFLAGS:=-D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -LOCAL_CPPFLAGS+=-D__STDC_LIMIT_MACROS -Wno-extern-c-compat +LOCAL_CPPFLAGS+=-D__STDC_LIMIT_MACROS -std=c++11 LOCAL_C_INCLUDES:= $(LOCAL_PATH) LOCAL_EXPORT_C_INCLUDES:= $(LOCAL_PATH) diff --git a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/README.libvpx b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/README.libvpx index ebb5ff2f4d7..6d8b0b4ccc6 100644 --- a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/README.libvpx +++ b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/README.libvpx @@ -1,5 +1,5 @@ URL: https://chromium.googlesource.com/webm/libwebm -Version: 0ae757087f5e6eb01dfea16cc09205b2425cfb74 +Version: af81f26025b7435fa9a14ad07c58b44cf9280430 License: BSD License File: LICENSE.txt @@ -7,4 +7,14 @@ Description: libwebm is used to handle WebM container I/O. Local Changes: -* <none> +Only keep: + - Android.mk + - AUTHORS.TXT + - common/ + file_util.cc/h + hdr_util.cc/h + webmids.h + - LICENSE.TXT + - mkvmuxer/ + - mkvparser/ + - PATENTS.TXT diff --git a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/file_util.cc b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/file_util.cc index 6dab146dd98..618ffc087fd 100644 --- a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/file_util.cc +++ b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/file_util.cc @@ -17,6 +17,7 @@ #include <cstring> #include <fstream> #include <ios> +#include <string> namespace libwebm { @@ -41,7 +42,12 @@ std::string GetTempFileName() { return temp_file_name; #else char tmp_file_name[_MAX_PATH]; +#if defined _MSC_VER || defined MINGW_HAS_SECURE_API errno_t err = tmpnam_s(tmp_file_name); +#else + char* fname_pointer = tmpnam(tmp_file_name); + errno_t err = (fname_pointer == &tmp_file_name[0]) ? 0 : -1; +#endif if (err == 0) { return std::string(tmp_file_name); } @@ -65,6 +71,15 @@ uint64_t GetFileSize(const std::string& file_name) { return file_size; } +bool GetFileContents(const std::string& file_name, std::string* contents) { + std::ifstream file(file_name.c_str()); + *contents = std::string(static_cast<size_t>(GetFileSize(file_name)), 0); + if (file.good() && contents->size()) { + file.read(&(*contents)[0], contents->size()); + } + return !file.fail(); +} + TempFileDeleter::TempFileDeleter() { file_name_ = GetTempFileName(); } TempFileDeleter::~TempFileDeleter() { diff --git a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/file_util.h b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/file_util.h index 0e71eac11e4..a8737346418 100644 --- a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/file_util.h +++ b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/file_util.h @@ -22,6 +22,9 @@ std::string GetTempFileName(); // Returns size of file specified by |file_name|, or 0 upon failure. uint64_t GetFileSize(const std::string& file_name); +// Gets the contents file_name as a string. Returns false on error. +bool GetFileContents(const std::string& file_name, std::string* contents); + // Manages life of temporary file specified at time of construction. Deletes // file upon destruction. class TempFileDeleter { @@ -38,4 +41,4 @@ class TempFileDeleter { } // namespace libwebm -#endif // LIBWEBM_COMMON_FILE_UTIL_H_
\ No newline at end of file +#endif // LIBWEBM_COMMON_FILE_UTIL_H_ diff --git a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/hdr_util.cc b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/hdr_util.cc index e1618ce75a7..916f7170b67 100644 --- a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/hdr_util.cc +++ b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/hdr_util.cc @@ -36,10 +36,10 @@ bool CopyMasteringMetadata(const mkvparser::MasteringMetadata& parser_mm, if (MasteringMetadataValuePresent(parser_mm.luminance_min)) muxer_mm->set_luminance_min(parser_mm.luminance_min); - PrimaryChromaticityPtr r_ptr(NULL); - PrimaryChromaticityPtr g_ptr(NULL); - PrimaryChromaticityPtr b_ptr(NULL); - PrimaryChromaticityPtr wp_ptr(NULL); + PrimaryChromaticityPtr r_ptr(nullptr); + PrimaryChromaticityPtr g_ptr(nullptr); + PrimaryChromaticityPtr b_ptr(nullptr); + PrimaryChromaticityPtr wp_ptr(nullptr); if (parser_mm.r) { if (!CopyPrimaryChromaticity(*parser_mm.r, &r_ptr)) diff --git a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/hdr_util.h b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/hdr_util.h index 3ef5388fd03..78e2eeb7058 100644 --- a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/hdr_util.h +++ b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/hdr_util.h @@ -47,15 +47,7 @@ struct Vp9CodecFeatures { int chroma_subsampling; }; -// disable deprecation warnings for auto_ptr -#if defined(__GNUC__) && __GNUC__ >= 5 -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#endif -typedef std::auto_ptr<mkvmuxer::PrimaryChromaticity> PrimaryChromaticityPtr; -#if defined(__GNUC__) && __GNUC__ >= 5 -#pragma GCC diagnostic pop -#endif +typedef std::unique_ptr<mkvmuxer::PrimaryChromaticity> PrimaryChromaticityPtr; bool CopyPrimaryChromaticity(const mkvparser::PrimaryChromaticity& parser_pc, PrimaryChromaticityPtr* muxer_pc); diff --git a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.cc b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.cc index 15b9a908d8a..481771db297 100644 --- a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.cc +++ b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.cc @@ -8,6 +8,8 @@ #include "mkvmuxer/mkvmuxer.h" +#include <stdint.h> + #include <cfloat> #include <climits> #include <cstdio> @@ -24,11 +26,6 @@ #include "mkvmuxer/mkvwriter.h" #include "mkvparser/mkvparser.h" -// disable deprecation warnings for auto_ptr -#if defined(__GNUC__) && __GNUC__ >= 5 -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#endif - namespace mkvmuxer { const float PrimaryChromaticity::kChromaticityMin = 0.0f; @@ -72,7 +69,7 @@ bool StrCpy(const char* src, char** dst_ptr) { return true; } -typedef std::auto_ptr<PrimaryChromaticity> PrimaryChromaticityPtr; +typedef std::unique_ptr<PrimaryChromaticity> PrimaryChromaticityPtr; bool CopyChromaticity(const PrimaryChromaticity* src, PrimaryChromaticityPtr* dst) { if (!dst) @@ -1057,22 +1054,22 @@ bool MasteringMetadata::Write(IMkvWriter* writer) const { bool MasteringMetadata::SetChromaticity( const PrimaryChromaticity* r, const PrimaryChromaticity* g, const PrimaryChromaticity* b, const PrimaryChromaticity* white_point) { - PrimaryChromaticityPtr r_ptr(NULL); + PrimaryChromaticityPtr r_ptr(nullptr); if (r) { if (!CopyChromaticity(r, &r_ptr)) return false; } - PrimaryChromaticityPtr g_ptr(NULL); + PrimaryChromaticityPtr g_ptr(nullptr); if (g) { if (!CopyChromaticity(g, &g_ptr)) return false; } - PrimaryChromaticityPtr b_ptr(NULL); + PrimaryChromaticityPtr b_ptr(nullptr); if (b) { if (!CopyChromaticity(b, &b_ptr)) return false; } - PrimaryChromaticityPtr wp_ptr(NULL); + PrimaryChromaticityPtr wp_ptr(nullptr); if (white_point) { if (!CopyChromaticity(white_point, &wp_ptr)) return false; @@ -1238,7 +1235,7 @@ bool Colour::Write(IMkvWriter* writer) const { } bool Colour::SetMasteringMetadata(const MasteringMetadata& mastering_metadata) { - std::auto_ptr<MasteringMetadata> mm_ptr(new MasteringMetadata()); + std::unique_ptr<MasteringMetadata> mm_ptr(new MasteringMetadata()); if (!mm_ptr.get()) return false; @@ -1546,7 +1543,7 @@ bool VideoTrack::Write(IMkvWriter* writer) const { } bool VideoTrack::SetColour(const Colour& colour) { - std::auto_ptr<Colour> colour_ptr(new Colour()); + std::unique_ptr<Colour> colour_ptr(new Colour()); if (!colour_ptr.get()) return false; @@ -1574,7 +1571,7 @@ bool VideoTrack::SetColour(const Colour& colour) { } bool VideoTrack::SetProjection(const Projection& projection) { - std::auto_ptr<Projection> projection_ptr(new Projection()); + std::unique_ptr<Projection> projection_ptr(new Projection()); if (!projection_ptr.get()) return false; @@ -2666,7 +2663,7 @@ bool Cluster::QueueOrWriteFrame(const Frame* const frame) { // and write it if it is okay to do so (i.e.) no other track has an held back // frame with timestamp <= the timestamp of the frame in question. std::vector<std::list<Frame*>::iterator> frames_to_erase; - for (std::list<Frame *>::iterator + for (std::list<Frame*>::iterator current_track_iterator = stored_frames_[track_number].begin(), end = --stored_frames_[track_number].end(); current_track_iterator != end; ++current_track_iterator) { diff --git a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvparser/mkvparser.cc b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvparser/mkvparser.cc index 37f230d0a95..e7b76f7da11 100644 --- a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvparser/mkvparser.cc +++ b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvparser/mkvparser.cc @@ -22,12 +22,8 @@ #include "common/webmids.h" -// disable deprecation warnings for auto_ptr -#if defined(__GNUC__) && __GNUC__ >= 5 -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -#endif - namespace mkvparser { +const long long kStringElementSizeLimit = 20 * 1000 * 1000; const float MasteringMetadata::kValueNotPresent = FLT_MAX; const long long Colour::kValueNotPresent = LLONG_MAX; const float Projection::kValueNotPresent = FLT_MAX; @@ -330,7 +326,7 @@ long UnserializeString(IMkvReader* pReader, long long pos, long long size, delete[] str; str = NULL; - if (size >= LONG_MAX || size < 0) + if (size >= LONG_MAX || size < 0 || size > kStringElementSizeLimit) return E_FILE_FORMAT_INVALID; // +1 for '\0' terminator @@ -5015,7 +5011,7 @@ bool MasteringMetadata::Parse(IMkvReader* reader, long long mm_start, if (!reader || *mm) return false; - std::auto_ptr<MasteringMetadata> mm_ptr(new MasteringMetadata()); + std::unique_ptr<MasteringMetadata> mm_ptr(new MasteringMetadata()); if (!mm_ptr.get()) return false; @@ -5035,6 +5031,10 @@ bool MasteringMetadata::Parse(IMkvReader* reader, long long mm_start, double value = 0; const long long value_parse_status = UnserializeFloat(reader, read_pos, child_size, value); + if (value < -FLT_MAX || value > FLT_MAX || + (value > 0.0 && value < FLT_MIN)) { + return false; + } mm_ptr->luminance_max = static_cast<float>(value); if (value_parse_status < 0 || mm_ptr->luminance_max < 0.0 || mm_ptr->luminance_max > 9999.99) { @@ -5044,6 +5044,10 @@ bool MasteringMetadata::Parse(IMkvReader* reader, long long mm_start, double value = 0; const long long value_parse_status = UnserializeFloat(reader, read_pos, child_size, value); + if (value < -FLT_MAX || value > FLT_MAX || + (value > 0.0 && value < FLT_MIN)) { + return false; + } mm_ptr->luminance_min = static_cast<float>(value); if (value_parse_status < 0 || mm_ptr->luminance_min < 0.0 || mm_ptr->luminance_min > 999.9999) { @@ -5096,7 +5100,7 @@ bool Colour::Parse(IMkvReader* reader, long long colour_start, if (!reader || *colour) return false; - std::auto_ptr<Colour> colour_ptr(new Colour()); + std::unique_ptr<Colour> colour_ptr(new Colour()); if (!colour_ptr.get()) return false; @@ -5194,7 +5198,7 @@ bool Projection::Parse(IMkvReader* reader, long long start, long long size, if (!reader || *projection) return false; - std::auto_ptr<Projection> projection_ptr(new Projection()); + std::unique_ptr<Projection> projection_ptr(new Projection()); if (!projection_ptr.get()) return false; @@ -7903,6 +7907,10 @@ long Block::Parse(const Cluster* pCluster) { return E_FILE_FORMAT_INVALID; curr.len = static_cast<long>(frame_size); + // Check if size + curr.len could overflow. + if (size > LLONG_MAX - curr.len) { + return E_FILE_FORMAT_INVALID; + } size += curr.len; // contribution of this frame --frame_count; @@ -7964,6 +7972,11 @@ long long Block::GetTimeCode(const Cluster* pCluster) const { const long long tc0 = pCluster->GetTimeCode(); assert(tc0 >= 0); + // Check if tc0 + m_timecode would overflow. + if (tc0 < 0 || LLONG_MAX - tc0 < m_timecode) { + return -1; + } + const long long tc = tc0 + m_timecode; return tc; // unscaled timecode units @@ -7981,6 +7994,10 @@ long long Block::GetTime(const Cluster* pCluster) const { const long long scale = pInfo->GetTimeCodeScale(); assert(scale >= 1); + // Check if tc * scale could overflow. + if (tc != 0 && scale > LLONG_MAX / tc) { + return -1; + } const long long ns = tc * scale; return ns; diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/postproc.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/postproc.c index d67ee8a57d8..8c292d6161d 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp8/common/postproc.c +++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/postproc.c @@ -65,7 +65,7 @@ void vp8_deblock(VP8_COMMON *cm, YV12_BUFFER_CONFIG *source, double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065; int ppl = (int)(level + .5); - const MODE_INFO *mode_info_context = cm->show_frame_mi; + const MODE_INFO *mode_info_context = cm->mi; int mbr, mbc; /* The pixel thresholds are adjusted according to if or not the macroblock diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/decoder/decodeframe.c b/chromium/third_party/libvpx/source/libvpx/vp8/decoder/decodeframe.c index 8bfd3cea3dc..0d54a9442b9 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp8/decoder/decodeframe.c +++ b/chromium/third_party/libvpx/source/libvpx/vp8/decoder/decodeframe.c @@ -686,6 +686,12 @@ static unsigned int read_available_partition_size( const unsigned char *partition_size_ptr = token_part_sizes + i * 3; unsigned int partition_size = 0; ptrdiff_t bytes_left = fragment_end - fragment_start; + if (bytes_left < 0) { + vpx_internal_error( + &pc->error, VPX_CODEC_CORRUPT_FRAME, + "Truncated packet or corrupt partition. No bytes left %d.", + (int)bytes_left); + } /* Calculate the length of this partition. The last partition * size is implicit. If the partition size can't be read, then * either use the remaining data in the buffer (for EC mode) diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/vp8_cx_iface.c b/chromium/third_party/libvpx/source/libvpx/vp8/vp8_cx_iface.c index 2bdc2b34a7b..e1c31341bc1 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp8/vp8_cx_iface.c +++ b/chromium/third_party/libvpx/source/libvpx/vp8/vp8_cx_iface.c @@ -258,9 +258,7 @@ static vpx_codec_err_t validate_img(vpx_codec_alg_priv_t *ctx, const vpx_image_t *img) { switch (img->fmt) { case VPX_IMG_FMT_YV12: - case VPX_IMG_FMT_I420: - case VPX_IMG_FMT_VPXI420: - case VPX_IMG_FMT_VPXYV12: break; + case VPX_IMG_FMT_I420: break; default: ERROR("Invalid image format. Only YV12 and I420 images are supported"); } diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_pred_common.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_pred_common.h index 8400bd70f1d..005290980e6 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_pred_common.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_pred_common.h @@ -176,12 +176,6 @@ static INLINE const vpx_prob *get_tx_probs(TX_SIZE max_tx_size, int ctx, } } -static INLINE const vpx_prob *get_tx_probs2(TX_SIZE max_tx_size, - const MACROBLOCKD *xd, - const struct tx_probs *tx_probs) { - return get_tx_probs(max_tx_size, get_tx_size_context(xd), tx_probs); -} - static INLINE unsigned int *get_tx_counts(TX_SIZE max_tx_size, int ctx, struct tx_counts *tx_counts) { switch (max_tx_size) { diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c index ef8484f988b..b47840795e1 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c @@ -413,7 +413,8 @@ static void cyclic_refresh_update_map(VP9_COMP *const cpi) { } while (cr->target_num_seg_blocks < block_count && i != cr->sb_index); cr->sb_index = i; cr->reduce_refresh = 0; - if (count_sel<(3 * count_tot)>> 2) cr->reduce_refresh = 1; + if (cpi->oxcf.content != VP9E_CONTENT_SCREEN) + if (count_sel<(3 * count_tot)>> 2) cr->reduce_refresh = 1; } // Set cyclic refresh parameters. diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.c index d346cd57aa0..4e7d99f505a 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.c @@ -86,7 +86,7 @@ static void write_selected_tx_size(const VP9_COMMON *cm, BLOCK_SIZE bsize = xd->mi[0]->sb_type; const TX_SIZE max_tx_size = max_txsize_lookup[bsize]; const vpx_prob *const tx_probs = - get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs); + get_tx_probs(max_tx_size, get_tx_size_context(xd), &cm->fc->tx_probs); vpx_write(w, tx_size != TX_4X4, tx_probs[0]); if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) { vpx_write(w, tx_size != TX_8X8, tx_probs[1]); @@ -459,7 +459,8 @@ static void write_modes_sb( write_modes_b(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col + bs, max_mv_magnitude, interp_filter_selected); break; - case PARTITION_SPLIT: + default: + assert(partition == PARTITION_SPLIT); write_modes_sb(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col, subsize, max_mv_magnitude, interp_filter_selected); write_modes_sb(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col + bs, @@ -469,7 +470,6 @@ static void write_modes_sb( write_modes_sb(cpi, xd, tile, w, tok, tok_end, mi_row + bs, mi_col + bs, subsize, max_mv_magnitude, interp_filter_selected); break; - default: assert(0); } } @@ -618,9 +618,10 @@ static void update_coef_probs_common(vpx_writer *const bc, VP9_COMP *cpi, return; } - case ONE_LOOP_REDUCED: { + default: { int updates = 0; int noupdates_before_first = 0; + assert(cpi->sf.use_fast_coef_updates == ONE_LOOP_REDUCED); for (i = 0; i < PLANE_TYPES; ++i) { for (j = 0; j < REF_TYPES; ++j) { for (k = 0; k < COEF_BANDS; ++k) { @@ -670,7 +671,6 @@ static void update_coef_probs_common(vpx_writer *const bc, VP9_COMP *cpi, } return; } - default: assert(0); } } @@ -1117,11 +1117,7 @@ static void write_frame_size_with_refs(VP9_COMP *cpi, ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) || (cpi->svc.number_spatial_layers > 1 && - cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame) || - (is_two_pass_svc(cpi) && - cpi->svc.encode_empty_frame_state == ENCODING && - cpi->svc.layer_context[0].frames_from_key_frame < - cpi->svc.number_temporal_layers + 1))) { + cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame))) { found = 0; } else if (cfg != NULL) { found = @@ -1153,8 +1149,10 @@ static void write_profile(BITSTREAM_PROFILE profile, case PROFILE_0: vpx_wb_write_literal(wb, 0, 2); break; case PROFILE_1: vpx_wb_write_literal(wb, 2, 2); break; case PROFILE_2: vpx_wb_write_literal(wb, 1, 2); break; - case PROFILE_3: vpx_wb_write_literal(wb, 6, 3); break; - default: assert(0); + default: + assert(profile == PROFILE_3); + vpx_wb_write_literal(wb, 6, 3); + break; } } @@ -1201,14 +1199,6 @@ static void write_uncompressed_header(VP9_COMP *cpi, write_bitdepth_colorspace_sampling(cm, wb); write_frame_size(cm, wb); } else { - // In spatial svc if it's not error_resilient_mode then we need to code all - // visible frames as invisible. But we need to keep the show_frame flag so - // that the publisher could know whether it is supposed to be visible. - // So we will code the show_frame flag as it is. Then code the intra_only - // bit here. This will make the bitstream incompatible. In the player we - // will change to show_frame flag to 0, then add an one byte frame with - // show_existing_frame flag which tells the decoder which frame we want to - // show. if (!cm->show_frame) vpx_wb_write_bit(wb, cm->intra_only); if (!cm->error_resilient_mode) diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.h index 339c3fecb13..b296560b922 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.h @@ -39,11 +39,7 @@ void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size); static INLINE int vp9_preserve_existing_gf(VP9_COMP *cpi) { return !cpi->multi_arf_allowed && cpi->refresh_golden_frame && - cpi->rc.is_src_frame_alt_ref && - (!cpi->use_svc || // Add spatial svc base layer case here - (is_two_pass_svc(cpi) && cpi->svc.spatial_layer_id == 0 && - cpi->svc.layer_context[0].gold_ref_idx >= 0 && - cpi->oxcf.ss_enable_auto_arf[0])); + cpi->rc.is_src_frame_alt_ref && !cpi->use_svc; } #ifdef __cplusplus diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c index a283d92a889..c7e9f9b2a02 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c @@ -385,16 +385,13 @@ static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) { node->split[i] = &vt->split[i].part_variances.none; break; } - case BLOCK_4X4: { + default: { v4x4 *vt = (v4x4 *)data; + assert(bsize == BLOCK_4X4); node->part_variances = &vt->part_variances; for (i = 0; i < 4; i++) node->split[i] = &vt->split[i]; break; } - default: { - assert(0); - break; - } } } @@ -586,6 +583,7 @@ static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q, } else { thresholds[1] = (5 * threshold_base) >> 1; } + if (cpi->sf.disable_16x16part_nonkey) thresholds[2] = INT64_MAX; } } @@ -885,13 +883,13 @@ static void copy_partitioning_helper(VP9_COMP *cpi, MACROBLOCK *x, set_block_size(cpi, x, xd, mi_row, mi_col, subsize); set_block_size(cpi, x, xd, mi_row, mi_col + bs, subsize); break; - case PARTITION_SPLIT: + default: + assert(partition == PARTITION_SPLIT); copy_partitioning_helper(cpi, x, xd, subsize, mi_row, mi_col); copy_partitioning_helper(cpi, x, xd, subsize, mi_row + bs, mi_col); copy_partitioning_helper(cpi, x, xd, subsize, mi_row, mi_col + bs); copy_partitioning_helper(cpi, x, xd, subsize, mi_row + bs, mi_col + bs); break; - default: assert(0); } } } @@ -1004,7 +1002,8 @@ static int scale_partitioning_svc(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, set_block_size(cpi, x, xd, mi_row_high, mi_col_high + bs_high, subsize_high); break; - case PARTITION_SPLIT: + default: + assert(partition_high == PARTITION_SPLIT); if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row, mi_col, mi_row_high, mi_col_high)) return 1; @@ -1020,7 +1019,6 @@ static int scale_partitioning_svc(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, mi_col_high + bs_high)) return 1; break; - default: assert(0); } } @@ -1067,13 +1065,13 @@ static void update_partition_svc(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, prev_part[start_pos] = subsize; if (mi_col + bs < cm->mi_cols) prev_part[start_pos + bs] = subsize; break; - case PARTITION_SPLIT: + default: + assert(partition == PARTITION_SPLIT); update_partition_svc(cpi, subsize, mi_row, mi_col); update_partition_svc(cpi, subsize, mi_row + bs, mi_col); update_partition_svc(cpi, subsize, mi_row, mi_col + bs); update_partition_svc(cpi, subsize, mi_row + bs, mi_col + bs); break; - default: assert(0); } } } @@ -1108,13 +1106,13 @@ static void update_prev_partition_helper(VP9_COMP *cpi, BLOCK_SIZE bsize, prev_part[start_pos] = subsize; if (mi_col + bs < cm->mi_cols) prev_part[start_pos + bs] = subsize; break; - case PARTITION_SPLIT: + default: + assert(partition == PARTITION_SPLIT); update_prev_partition_helper(cpi, subsize, mi_row, mi_col); update_prev_partition_helper(cpi, subsize, mi_row + bs, mi_col); update_prev_partition_helper(cpi, subsize, mi_row, mi_col + bs); update_prev_partition_helper(cpi, subsize, mi_row + bs, mi_col + bs); break; - default: assert(0); } } } @@ -1387,7 +1385,9 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride); } else { - y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col); + const MV dummy_mv = { 0, 0 }; + y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col, + &dummy_mv); x->sb_use_mv_part = 1; x->sb_mvcol_part = mi->mv[0].as_mv.col; x->sb_mvrow_part = mi->mv[0].as_mv.row; @@ -2181,7 +2181,8 @@ static void encode_sb(VP9_COMP *cpi, ThreadData *td, const TileInfo *const tile, subsize, &pc_tree->horizontal[1]); } break; - case PARTITION_SPLIT: + default: + assert(partition == PARTITION_SPLIT); if (bsize == BLOCK_8X8) { encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize, pc_tree->leaf_split[0]); @@ -2196,7 +2197,6 @@ static void encode_sb(VP9_COMP *cpi, ThreadData *td, const TileInfo *const tile, subsize, pc_tree->split[3]); } break; - default: assert(0 && "Invalid partition type."); break; } if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) @@ -2522,7 +2522,8 @@ static void encode_sb_rt(VP9_COMP *cpi, ThreadData *td, subsize, &pc_tree->horizontal[1]); } break; - case PARTITION_SPLIT: + default: + assert(partition == PARTITION_SPLIT); subsize = get_subsize(bsize, PARTITION_SPLIT); encode_sb_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize, pc_tree->split[0]); @@ -2533,7 +2534,6 @@ static void encode_sb_rt(VP9_COMP *cpi, ThreadData *td, encode_sb_rt(cpi, td, tile, tp, mi_row + hbs, mi_col + hbs, output_enabled, subsize, pc_tree->split[3]); break; - default: assert(0 && "Invalid partition type."); break; } if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) @@ -2672,7 +2672,8 @@ static void rd_use_partition(VP9_COMP *cpi, ThreadData *td, last_part_rdc.rdcost += tmp_rdc.rdcost; } break; - case PARTITION_SPLIT: + default: + assert(partition == PARTITION_SPLIT); if (bsize == BLOCK_8X8) { rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, subsize, pc_tree->leaf_split[0], INT64_MAX); @@ -2702,7 +2703,6 @@ static void rd_use_partition(VP9_COMP *cpi, ThreadData *td, last_part_rdc.dist += tmp_rdc.dist; } break; - default: assert(0); break; } pl = partition_plane_context(xd, mi_row, mi_col, bsize); @@ -4208,7 +4208,8 @@ static void nonrd_select_partition(VP9_COMP *cpi, ThreadData *td, } } break; - case PARTITION_SPLIT: + default: + assert(partition == PARTITION_SPLIT); subsize = get_subsize(bsize, PARTITION_SPLIT); nonrd_select_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, subsize, output_enabled, rd_cost, @@ -4238,7 +4239,6 @@ static void nonrd_select_partition(VP9_COMP *cpi, ThreadData *td, rd_cost->dist += this_rdc.dist; } break; - default: assert(0 && "Invalid partition type."); break; } } @@ -4327,7 +4327,8 @@ static void nonrd_use_partition(VP9_COMP *cpi, ThreadData *td, output_enabled, subsize, &pc_tree->horizontal[1]); } break; - case PARTITION_SPLIT: + default: + assert(partition == PARTITION_SPLIT); subsize = get_subsize(bsize, PARTITION_SPLIT); if (bsize == BLOCK_8X8) { nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost, @@ -4348,7 +4349,6 @@ static void nonrd_use_partition(VP9_COMP *cpi, ThreadData *td, dummy_cost, pc_tree->split[3]); } break; - default: assert(0 && "Invalid partition type."); break; } if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) @@ -4452,7 +4452,8 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td, nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64, 1, &dummy_rdc, td->pc_root); break; - case REFERENCE_PARTITION: + default: + assert(partition_search_type == REFERENCE_PARTITION); x->sb_pickmode_part = 1; set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); // Use nonrd_pick_partition on scene-cut for VBR mode. @@ -4484,7 +4485,6 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td, } break; - default: assert(0); break; } // Update ref_frame usage for inter frame if this group is ARF group. @@ -4551,16 +4551,12 @@ static int set_var_thresh_from_histogram(VP9_COMP *cpi) { &var16->sse, &var16->sum); var16->var = variance_highbd(var16); break; - case VPX_BITS_12: + default: + assert(cm->bit_depth == VPX_BITS_12); vpx_highbd_12_get16x16var(src, src_stride, last_src, last_stride, &var16->sse, &var16->sum); var16->var = variance_highbd(var16); break; - default: - assert(0 && - "cm->bit_depth should be VPX_BITS_8, VPX_BITS_10" - " or VPX_BITS_12"); - return -1; } } else { vpx_get16x16var(src, src_stride, last_src, last_stride, &var16->sse, @@ -4668,6 +4664,9 @@ void vp9_init_tile_data(VP9_COMP *cpi) { for (i = 0; i < BLOCK_SIZES; ++i) { for (j = 0; j < MAX_MODES; ++j) { tile_data->thresh_freq_fact[i][j] = RD_THRESH_INIT_FACT; +#if CONFIG_CONSISTENT_RECODE + tile_data->thresh_freq_fact_prev[i][j] = RD_THRESH_INIT_FACT; +#endif tile_data->mode_map[i][j] = j; } } @@ -4792,7 +4791,9 @@ static void encode_frame_internal(VP9_COMP *cpi) { x->fwd_txfm4x4 = xd->lossless ? vp9_fwht4x4 : vpx_fdct4x4; #endif // CONFIG_VP9_HIGHBITDEPTH x->inv_txfm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add; - +#if CONFIG_CONSISTENT_RECODE + x->optimize = sf->optimize_coefficients == 1 && cpi->oxcf.pass != 1; +#endif if (xd->lossless) x->optimize = 0; cm->tx_mode = select_tx_mode(cpi, xd); @@ -4917,9 +4918,48 @@ static int compute_frame_aq_offset(struct VP9_COMP *cpi) { return sum_delta / (cm->mi_rows * cm->mi_cols); } +#if CONFIG_CONSISTENT_RECODE +static void restore_encode_params(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + const int tile_cols = 1 << cm->log2_tile_cols; + const int tile_rows = 1 << cm->log2_tile_rows; + int tile_col, tile_row; + int i, j; + RD_OPT *rd_opt = &cpi->rd; + for (i = 0; i < MAX_REF_FRAMES; i++) { + for (j = 0; j < REFERENCE_MODES; j++) + rd_opt->prediction_type_threshes[i][j] = + rd_opt->prediction_type_threshes_prev[i][j]; + + for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; j++) + rd_opt->filter_threshes[i][j] = rd_opt->filter_threshes_prev[i][j]; + } + + if (cpi->tile_data != NULL) { + for (tile_row = 0; tile_row < tile_rows; ++tile_row) + for (tile_col = 0; tile_col < tile_cols; ++tile_col) { + TileDataEnc *tile_data = + &cpi->tile_data[tile_row * tile_cols + tile_col]; + for (i = 0; i < BLOCK_SIZES; ++i) { + for (j = 0; j < MAX_MODES; ++j) { + tile_data->thresh_freq_fact[i][j] = + tile_data->thresh_freq_fact_prev[i][j]; + } + } + } + } + + cm->interp_filter = cpi->sf.default_interp_filter; +} +#endif + void vp9_encode_frame(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; +#if CONFIG_CONSISTENT_RECODE + restore_encode_params(cpi); +#endif + // In the longer term the encoder should be generalized to match the // decoder such that we allow compound where one of the 3 buffers has a // different sign bias and that buffer is then the fixed ref. However, this diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemb.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemb.c index 970077d8943..bc276572882 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemb.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemb.c @@ -358,13 +358,13 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col, p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; - case TX_4X4: + default: + assert(tx_size == TX_4X4); x->fwd_txfm4x4(src_diff, coeff, diff_stride); vp9_highbd_quantize_fp(coeff, 16, x->skip_block, p->round_fp, p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; - default: assert(0); } return; } @@ -388,13 +388,13 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col, p->round_fp, p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; - case TX_4X4: + default: + assert(tx_size == TX_4X4); x->fwd_txfm4x4(src_diff, coeff, diff_stride); vp9_quantize_fp(coeff, 16, x->skip_block, p->round_fp, p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; - default: assert(0); break; } } @@ -434,13 +434,13 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col, p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], eob); break; - case TX_4X4: + default: + assert(tx_size == TX_4X4); x->fwd_txfm4x4(src_diff, coeff, diff_stride); vpx_highbd_quantize_dc(coeff, 16, x->skip_block, p->round, p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], eob); break; - default: assert(0); } return; } @@ -462,12 +462,12 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col, vpx_quantize_dc(coeff, 64, x->skip_block, p->round, p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], eob); break; - case TX_4X4: + default: + assert(tx_size == TX_4X4); x->fwd_txfm4x4(src_diff, coeff, diff_stride); vpx_quantize_dc(coeff, 16, x->skip_block, p->round, p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], eob); break; - default: assert(0); break; } } @@ -511,14 +511,14 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; - case TX_4X4: + default: + assert(tx_size == TX_4X4); x->fwd_txfm4x4(src_diff, coeff, diff_stride); vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; - default: assert(0); } return; } @@ -544,13 +544,13 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; - case TX_4X4: + default: + assert(tx_size == TX_4X4); x->fwd_txfm4x4(src_diff, coeff, diff_stride); vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; - default: assert(0); break; } } @@ -634,14 +634,14 @@ static void encode_block(int plane, int block, int row, int col, vp9_highbd_idct8x8_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block], xd->bd); break; - case TX_4X4: + default: + assert(tx_size == TX_4X4); // this is like vp9_short_idct4x4 but has a special case around eob<=1 // which is significant (not just an optimization) for the lossless // case. x->highbd_inv_txfm_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block], xd->bd); break; - default: assert(0 && "Invalid transform size"); } return; } @@ -657,13 +657,13 @@ static void encode_block(int plane, int block, int row, int col, case TX_8X8: vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); break; - case TX_4X4: + default: + assert(tx_size == TX_4X4); // this is like vp9_short_idct4x4 but has a special case around eob<=1 // which is significant (not just an optimization) for the lossless // case. x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); break; - default: assert(0 && "Invalid transform size"); break; } } @@ -848,7 +848,8 @@ void vp9_encode_block_intra(int plane, int block, int row, int col, xd->bd); } break; - case TX_4X4: + default: + assert(tx_size == TX_4X4); if (!x->skip_recode) { vpx_highbd_subtract_block(4, 4, src_diff, diff_stride, src, src_stride, dst, dst_stride, xd->bd); @@ -876,7 +877,6 @@ void vp9_encode_block_intra(int plane, int block, int row, int col, } } break; - default: assert(0); return; } if (*eob) *(args->skip) = 0; return; @@ -930,7 +930,8 @@ void vp9_encode_block_intra(int plane, int block, int row, int col, if (!x->skip_encode && *eob) vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob); break; - case TX_4X4: + default: + assert(tx_size == TX_4X4); if (!x->skip_recode) { vpx_subtract_block(4, 4, src_diff, diff_stride, src, src_stride, dst, dst_stride); @@ -955,7 +956,6 @@ void vp9_encode_block_intra(int plane, int block, int row, int col, vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type); } break; - default: assert(0); break; } if (*eob) *(args->skip) = 0; } diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c index aa9c3bf46cb..e41768a8f13 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c @@ -483,14 +483,10 @@ static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) { *hr = 3; *hs = 5; break; - case ONETWO: - *hr = 1; - *hs = 2; - break; default: + assert(mode == ONETWO); *hr = 1; - *hs = 1; - assert(0); + *hs = 2; break; } } @@ -791,7 +787,7 @@ static void setup_frame(VP9_COMP *cpi) { } if (cm->frame_type == KEY_FRAME) { - if (!is_two_pass_svc(cpi)) cpi->refresh_golden_frame = 1; + cpi->refresh_golden_frame = 1; cpi->refresh_alt_ref_frame = 1; vp9_zero(cpi->interp_filter_selected); } else { @@ -1347,15 +1343,9 @@ static void set_tile_limits(VP9_COMP *cpi) { int min_log2_tile_cols, max_log2_tile_cols; vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols); - if (is_two_pass_svc(cpi) && (cpi->svc.encode_empty_frame_state == ENCODING || - cpi->svc.number_spatial_layers > 1)) { - cm->log2_tile_cols = 0; - cm->log2_tile_rows = 0; - } else { - cm->log2_tile_cols = - clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols); - cm->log2_tile_rows = cpi->oxcf.tile_rows; - } + cm->log2_tile_cols = + clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols); + cm->log2_tile_rows = cpi->oxcf.tile_rows; if (cpi->oxcf.target_level == LEVEL_AUTO) { const int level_tile_cols = @@ -1378,18 +1368,6 @@ static void update_frame_size(VP9_COMP *cpi) { cm->mi_rows * cm->mi_cols * sizeof(*cpi->mbmi_ext_base)); set_tile_limits(cpi); - - if (is_two_pass_svc(cpi)) { - if (vpx_realloc_frame_buffer(&cpi->alt_ref_buffer, cm->width, cm->height, - cm->subsampling_x, cm->subsampling_y, -#if CONFIG_VP9_HIGHBITDEPTH - cm->use_highbitdepth, -#endif - VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment, - NULL, NULL, NULL)) - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, - "Failed to reallocate alt_ref_buffer"); - } } static void init_buffer_indices(VP9_COMP *cpi) { @@ -1744,7 +1722,8 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) { vpx_highbd_sad4x4x4d_bits10) break; - case VPX_BITS_12: + default: + assert(cm->bit_depth == VPX_BITS_12); HIGHBD_BFP(BLOCK_32X16, vpx_highbd_sad32x16_bits12, vpx_highbd_sad32x16_avg_bits12, vpx_highbd_12_variance32x16, vpx_highbd_12_sub_pixel_variance32x16, @@ -1823,11 +1802,6 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) { vpx_highbd_12_sub_pixel_avg_variance4x4, vpx_highbd_sad4x4x4d_bits12) break; - - default: - assert(0 && - "cm->bit_depth should be VPX_BITS_8, " - "VPX_BITS_10 or VPX_BITS_12"); } } } @@ -2971,11 +2945,6 @@ void vp9_update_reference_frames(VP9_COMP *cpi) { tmp = cpi->alt_fb_idx; cpi->alt_fb_idx = cpi->gld_fb_idx; cpi->gld_fb_idx = tmp; - - if (is_two_pass_svc(cpi)) { - cpi->svc.layer_context[0].gold_ref_idx = cpi->gld_fb_idx; - cpi->svc.layer_context[0].alt_ref_idx = cpi->alt_fb_idx; - } } else { /* For non key/golden frames */ if (cpi->refresh_alt_ref_frame) { int arf_idx = cpi->alt_fb_idx; @@ -3054,17 +3023,32 @@ void vp9_update_reference_frames(VP9_COMP *cpi) { // Keep track of frame index for each reference frame. SVC *const svc = &cpi->svc; if (cm->frame_type == KEY_FRAME) { - svc->ref_frame_index[cpi->lst_fb_idx] = svc->current_superframe; - svc->ref_frame_index[cpi->gld_fb_idx] = svc->current_superframe; - svc->ref_frame_index[cpi->alt_fb_idx] = svc->current_superframe; + int i; + // On key frame update all reference frame slots. + for (i = 0; i < REF_FRAMES; i++) { + svc->fb_idx_spatial_layer_id[i] = svc->spatial_layer_id; + svc->fb_idx_temporal_layer_id[i] = svc->temporal_layer_id; + // LAST/GOLDEN/ALTREF is already updated above. + if (i != cpi->lst_fb_idx && i != cpi->gld_fb_idx && + i != cpi->alt_fb_idx) + ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[i], cm->new_fb_idx); + } } else { - if (cpi->refresh_last_frame) - svc->ref_frame_index[cpi->lst_fb_idx] = svc->current_superframe; - if (cpi->refresh_golden_frame) - svc->ref_frame_index[cpi->gld_fb_idx] = svc->current_superframe; - if (cpi->refresh_alt_ref_frame) - svc->ref_frame_index[cpi->alt_fb_idx] = svc->current_superframe; + if (cpi->refresh_last_frame) { + svc->fb_idx_spatial_layer_id[cpi->lst_fb_idx] = svc->spatial_layer_id; + svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] = svc->temporal_layer_id; + } + if (cpi->refresh_golden_frame) { + svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] = svc->spatial_layer_id; + svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] = svc->temporal_layer_id; + } + if (cpi->refresh_alt_ref_frame) { + svc->fb_idx_spatial_layer_id[cpi->alt_fb_idx] = svc->spatial_layer_id; + svc->fb_idx_temporal_layer_id[cpi->alt_fb_idx] = svc->temporal_layer_id; + } } + // Copy flags from encoder to SVC struct. + vp9_copy_flags_ref_update_idx(cpi); } } @@ -3307,11 +3291,9 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) { case VPX_BITS_10: dc_quant_devisor = 16.0; break; - case VPX_BITS_12: - dc_quant_devisor = 64.0; - break; default: - assert(0 && "bit_depth must be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); + assert(cm->bit_depth == VPX_BITS_12); + dc_quant_devisor = 64.0; break; } #else @@ -3550,9 +3532,7 @@ static void set_frame_size(VP9_COMP *cpi) { #endif } - if ((oxcf->pass == 2) && - (!cpi->use_svc || (is_two_pass_svc(cpi) && - cpi->svc.encode_empty_frame_state != ENCODING))) { + if ((oxcf->pass == 2) && !cpi->use_svc) { vp9_set_target_rate(cpi); } @@ -3599,6 +3579,39 @@ static void set_frame_size(VP9_COMP *cpi) { set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME); } +#if CONFIG_CONSISTENT_RECODE +static void save_encode_params(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + const int tile_cols = 1 << cm->log2_tile_cols; + const int tile_rows = 1 << cm->log2_tile_rows; + int tile_col, tile_row; + int i, j; + RD_OPT *rd_opt = &cpi->rd; + for (i = 0; i < MAX_REF_FRAMES; i++) { + for (j = 0; j < REFERENCE_MODES; j++) + rd_opt->prediction_type_threshes_prev[i][j] = + rd_opt->prediction_type_threshes[i][j]; + + for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; j++) + rd_opt->filter_threshes_prev[i][j] = rd_opt->filter_threshes[i][j]; + } + + if (cpi->tile_data != NULL) { + for (tile_row = 0; tile_row < tile_rows; ++tile_row) + for (tile_col = 0; tile_col < tile_cols; ++tile_col) { + TileDataEnc *tile_data = + &cpi->tile_data[tile_row * tile_cols + tile_col]; + for (i = 0; i < BLOCK_SIZES; ++i) { + for (j = 0; j < MAX_MODES; ++j) { + tile_data->thresh_freq_fact_prev[i][j] = + tile_data->thresh_freq_fact[i][j]; + } + } + } + } +} +#endif + static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, uint8_t *dest) { VP9_COMMON *const cm = &cpi->common; @@ -3708,12 +3721,15 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, // For other cases (e.g., CBR mode) use it for 5 <= speed < 8 for now // (need to check encoding time cost for doing this for speed 8). cpi->rc.high_source_sad = 0; - if (cpi->compute_source_sad_onepass && cm->show_frame && + if (cm->show_frame && cpi->oxcf.mode == REALTIME && (cpi->oxcf.rc_mode == VPX_VBR || cpi->oxcf.content == VP9E_CONTENT_SCREEN || - (cpi->oxcf.speed >= 5 && cpi->oxcf.speed < 8 && !cpi->use_svc))) + (cpi->oxcf.speed >= 5 && cpi->oxcf.speed < 8))) vp9_scene_detection_onepass(cpi); + if (cpi->svc.spatial_layer_id == 0) + cpi->svc.high_source_sad_superframe = cpi->rc.high_source_sad; + // For 1 pass CBR SVC, only ZEROMV is allowed for spatial reference frame // when svc->force_zero_mode_spatial_ref = 1. Under those conditions we can // avoid this frame-level upsampling (for non intra_only frames). @@ -3751,28 +3767,11 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, suppress_active_map(cpi); - // For SVC on non-zero spatial layer: check for disabling inter-layer - // (spatial) prediction, if svc.disable_inter_layer_pred is set. - // if the previous spatial layer was dropped then disable the prediction from - // this (scaled) reference. - if (cpi->use_svc && cpi->svc.spatial_layer_id > 0) { - if ((cpi->svc.disable_inter_layer_pred == INTER_LAYER_PRED_OFF_NONKEY && - !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) || - cpi->svc.disable_inter_layer_pred == INTER_LAYER_PRED_OFF || - cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id - 1]) { - MV_REFERENCE_FRAME ref_frame; - static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, - VP9_ALT_FLAG }; - for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { - const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame); - if (yv12 != NULL && (cpi->ref_frame_flags & flag_list[ref_frame])) { - const struct scale_factors *const scale_fac = - &cm->frame_refs[ref_frame - 1].sf; - if (vp9_is_scaled(scale_fac)) - cpi->ref_frame_flags &= (~flag_list[ref_frame]); - } - } - } + if (cpi->use_svc) { + // On non-zero spatial layer, check for disabling inter-layer + // prediction. + if (cpi->svc.spatial_layer_id > 0) vp9_svc_constrain_inter_layer_pred(cpi); + vp9_svc_assert_constraints_pattern(cpi); } // Variance adaptive and in frame q adjustment experiments are mutually @@ -3799,10 +3798,10 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, // Check if we should drop this frame because of high overshoot. // Only for frames where high temporal-source SAD is detected. - if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR && - cpi->resize_state == ORIG && cm->frame_type != KEY_FRAME && - cpi->oxcf.content == VP9E_CONTENT_SCREEN && - cpi->rc.high_source_sad == 1) { + // For SVC: all spatial layers are checked for re-encoding. + if (cpi->sf.re_encode_overshoot_rt && + (cpi->rc.high_source_sad || + (cpi->use_svc && cpi->svc.high_source_sad_superframe))) { int frame_size = 0; // Get an estimate of the encoded frame size. save_coding_context(cpi); @@ -4526,11 +4525,21 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size, cpi->oxcf.target_bandwidth == 0) { cpi->svc.skip_enhancement_layer = 1; vp9_rc_postencode_update_drop_frame(cpi); - vp9_inc_frame_in_layer(cpi); cpi->ext_refresh_frame_flags_pending = 0; cpi->last_frame_dropped = 1; cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 1; cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id] = 1; + if (cpi->svc.framedrop_mode != CONSTRAINED_LAYER_DROP || + cpi->svc.drop_spatial_layer[0] == 0) { + // For the case of CONSTRAINED_LAYER_DROP where the base is dropped + // (drop_spatial_layer[0] == 1), which means full superframe dropped, + // we don't increment the svc frame counters. In particular temporal + // layer counter (which is incremented in vp9_inc_frame_in_layer()) + // won't be incremented, so on a dropped frame we try the same + // temporal_layer_id on next incoming frame. This is to avoid an + // issue with temporal alignement with full superframe dropping. + vp9_inc_frame_in_layer(cpi); + } return; } @@ -4578,44 +4587,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size, cm->reset_frame_context = 2; } } - if (is_two_pass_svc(cpi) && cm->error_resilient_mode == 0) { - // Use context 0 for intra only empty frame, but the last frame context - // for other empty frames. - if (cpi->svc.encode_empty_frame_state == ENCODING) { - if (cpi->svc.encode_intra_empty_frame != 0) - cm->frame_context_idx = 0; - else - cm->frame_context_idx = FRAME_CONTEXTS - 1; - } else { - cm->frame_context_idx = - cpi->svc.spatial_layer_id * cpi->svc.number_temporal_layers + - cpi->svc.temporal_layer_id; - } - - cm->frame_parallel_decoding_mode = oxcf->frame_parallel_decoding_mode; - - // The probs will be updated based on the frame type of its previous - // frame if frame_parallel_decoding_mode is 0. The type may vary for - // the frame after a key frame in base layer since we may drop enhancement - // layers. So set frame_parallel_decoding_mode to 1 in this case. - if (cm->frame_parallel_decoding_mode == 0) { - if (cpi->svc.number_temporal_layers == 1) { - if (cpi->svc.spatial_layer_id == 0 && - cpi->svc.layer_context[0].last_frame_type == KEY_FRAME) - cm->frame_parallel_decoding_mode = 1; - } else if (cpi->svc.spatial_layer_id == 0) { - // Find the 2nd frame in temporal base layer and 1st frame in temporal - // enhancement layers from the key frame. - int i; - for (i = 0; i < cpi->svc.number_temporal_layers; ++i) { - if (cpi->svc.layer_context[0].frames_from_key_frame == 1 << i) { - cm->frame_parallel_decoding_mode = 1; - break; - } - } - } - } - } // For 1 pass CBR, check if we are dropping this frame. // Never drop on key frame, or if base layer is key for svc. @@ -4639,8 +4610,18 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size, if (cpi->use_svc) { cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 1; cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id] = 1; - vp9_inc_frame_in_layer(cpi); cpi->svc.skip_enhancement_layer = 1; + if (cpi->svc.framedrop_mode != CONSTRAINED_LAYER_DROP || + cpi->svc.drop_spatial_layer[0] == 0) { + // For the case of CONSTRAINED_LAYER_DROP where the base is dropped + // (drop_spatial_layer[0] == 1), which means full superframe dropped, + // we don't increment the svc frame counters. In particular temporal + // layer counter (which is incremented in vp9_inc_frame_in_layer()) + // won't be incremented, so on a dropped frame we try the same + // temporal_layer_id on next incoming frame. This is to avoid an + // issue with temporal alignement with full superframe dropping. + vp9_inc_frame_in_layer(cpi); + } if (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1) { int i; int all_layers_drop = 1; @@ -4663,6 +4644,10 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size, memset(cpi->mode_chosen_counts, 0, MAX_MODES * sizeof(*cpi->mode_chosen_counts)); #endif +#if CONFIG_CONSISTENT_RECODE + // Backup to ensure consistency between recodes + save_encode_params(cpi); +#endif if (cpi->sf.recode_loop == DISALLOW_RECODE) { encode_without_recode_loop(cpi, size, dest); @@ -4672,6 +4657,16 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size, cpi->last_frame_dropped = 0; cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 0; + // Keep track of the frame buffer index updated/refreshed for the + // current encoded TL0 superframe. + if (cpi->svc.temporal_layer_id == 0) { + if (cpi->refresh_last_frame) + cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->lst_fb_idx; + else if (cpi->refresh_golden_frame) + cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->gld_fb_idx; + else if (cpi->refresh_alt_ref_frame) + cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->alt_fb_idx; + } // Disable segmentation if it decrease rate/distortion ratio if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ) @@ -4759,8 +4754,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size, cm->last_frame_type = cm->frame_type; - if (!(is_two_pass_svc(cpi) && cpi->svc.encode_empty_frame_state == ENCODING)) - vp9_rc_postencode_update(cpi, *size); + vp9_rc_postencode_update(cpi, *size); #if 0 output_frame_level_debug_stats(cpi); @@ -4830,8 +4824,7 @@ static void Pass2Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest, cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED; encode_frame_to_data_rate(cpi, size, dest, frame_flags); - if (!(is_two_pass_svc(cpi) && cpi->svc.encode_empty_frame_state == ENCODING)) - vp9_twopass_postencode_update(cpi); + vp9_twopass_postencode_update(cpi); } #endif // !CONFIG_REALTIME_ONLY @@ -5271,9 +5264,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, int arf_src_index; int i; - if (is_two_pass_svc(cpi)) { - if (oxcf->pass == 2) vp9_restore_layer_context(cpi); - } else if (is_one_pass_cbr_svc(cpi)) { + if (is_one_pass_cbr_svc(cpi)) { vp9_one_pass_cbr_svc_start_layer(cpi); } @@ -5301,9 +5292,6 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, // Should we encode an arf frame. arf_src_index = get_arf_src_index(cpi); - // Skip alt frame if we encode the empty frame - if (is_two_pass_svc(cpi) && source != NULL) arf_src_index = 0; - if (arf_src_index) { for (i = 0; i <= arf_src_index; ++i) { struct lookahead_entry *e = vp9_lookahead_peek(cpi->lookahead, i); @@ -5456,9 +5444,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, cpi->frame_flags = *frame_flags; #if !CONFIG_REALTIME_ONLY - if ((oxcf->pass == 2) && - (!cpi->use_svc || (is_two_pass_svc(cpi) && - cpi->svc.encode_empty_frame_state != ENCODING))) { + if ((oxcf->pass == 2) && !cpi->use_svc) { vp9_rc_get_second_pass_params(cpi); } else if (oxcf->pass == 1) { set_frame_size(cpi); @@ -5482,7 +5468,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, Pass0Encode(cpi, size, dest, frame_flags); } #else // !CONFIG_REALTIME_ONLY - if (oxcf->pass == 1 && (!cpi->use_svc || is_two_pass_svc(cpi))) { + if (oxcf->pass == 1 && !cpi->use_svc) { const int lossless = is_lossless_requested(oxcf); #if CONFIG_VP9_HIGHBITDEPTH if (cpi->oxcf.use_highbitdepth) @@ -5497,7 +5483,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, #endif // CONFIG_VP9_HIGHBITDEPTH cpi->td.mb.inv_txfm_add = lossless ? vp9_iwht4x4_add : vp9_idct4x4_add; vp9_first_pass(cpi, source); - } else if (oxcf->pass == 2 && (!cpi->use_svc || is_two_pass_svc(cpi))) { + } else if (oxcf->pass == 2 && !cpi->use_svc) { Pass2Encode(cpi, size, dest, frame_flags); } else if (cpi->use_svc) { SvcEncode(cpi, size, dest, frame_flags); @@ -5698,21 +5684,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, #endif - if (is_two_pass_svc(cpi)) { - if (cpi->svc.encode_empty_frame_state == ENCODING) { - cpi->svc.encode_empty_frame_state = ENCODED; - cpi->svc.encode_intra_empty_frame = 0; - } - - if (cm->show_frame) { - ++cpi->svc.spatial_layer_to_encode; - if (cpi->svc.spatial_layer_to_encode >= cpi->svc.number_spatial_layers) - cpi->svc.spatial_layer_to_encode = 0; - - // May need the empty frame after an visible frame. - cpi->svc.encode_empty_frame_state = NEED_TO_ENCODE; - } - } else if (is_one_pass_cbr_svc(cpi)) { + if (is_one_pass_cbr_svc(cpi)) { if (cm->show_frame) { ++cpi->svc.spatial_layer_to_encode; if (cpi->svc.spatial_layer_to_encode >= cpi->svc.number_spatial_layers) diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h index 05bfd6930d5..f66c13046a9 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h @@ -282,6 +282,9 @@ static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) { typedef struct TileDataEnc { TileInfo tile_info; int thresh_freq_fact[BLOCK_SIZES][MAX_MODES]; +#if CONFIG_CONSISTENT_RECODE + int thresh_freq_fact_prev[BLOCK_SIZES][MAX_MODES]; +#endif int8_t mode_map[BLOCK_SIZES][MAX_MODES]; FIRSTPASS_DATA fp_data; VP9RowMTSync row_mt_sync; @@ -645,6 +648,8 @@ typedef struct VP9_COMP { int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES]; int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS]; int partition_cost[PARTITION_CONTEXTS][PARTITION_TYPES]; + // Indices are: max_tx_size-1, tx_size_ctx, tx_size + int tx_size_cost[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES]; int multi_arf_allowed; int multi_arf_enabled; @@ -860,10 +865,6 @@ YV12_BUFFER_CONFIG *vp9_scale_if_required( void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags); -static INLINE int is_two_pass_svc(const struct VP9_COMP *const cpi) { - return cpi->use_svc && cpi->oxcf.pass != 0; -} - static INLINE int is_one_pass_cbr_svc(const struct VP9_COMP *const cpi) { return (cpi->use_svc && cpi->oxcf.pass == 0); } @@ -879,9 +880,7 @@ static INLINE int denoise_svc(const struct VP9_COMP *const cpi) { static INLINE int is_altref_enabled(const VP9_COMP *const cpi) { return !(cpi->oxcf.mode == REALTIME && cpi->oxcf.rc_mode == VPX_CBR) && cpi->oxcf.lag_in_frames >= MIN_LOOKAHEAD_FOR_ARFS && - (cpi->oxcf.enable_auto_arf && - (!is_two_pass_svc(cpi) || - cpi->oxcf.ss_enable_auto_arf[cpi->svc.spatial_layer_id])); + cpi->oxcf.enable_auto_arf; } static INLINE void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd, diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c index e102b493960..453879fb834 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c @@ -316,16 +316,7 @@ void vp9_init_first_pass(VP9_COMP *cpi) { } void vp9_end_first_pass(VP9_COMP *cpi) { - if (is_two_pass_svc(cpi)) { - int i; - for (i = 0; i < cpi->svc.number_spatial_layers; ++i) { - output_stats(&cpi->svc.layer_context[i].twopass.total_stats, - cpi->output_pkt_list); - } - } else { - output_stats(&cpi->twopass.total_stats, cpi->output_pkt_list); - } - + output_stats(&cpi->twopass.total_stats, cpi->output_pkt_list); vpx_free(cpi->twopass.fp_mb_float_stats); cpi->twopass.fp_mb_float_stats = NULL; } @@ -503,11 +494,10 @@ static int scale_sse_threshold(VP9_COMMON *cm, int thresh) { switch (cm->bit_depth) { case VPX_BITS_8: ret_val = thresh; break; case VPX_BITS_10: ret_val = thresh << 4; break; - case VPX_BITS_12: ret_val = thresh << 8; break; default: - assert(0 && - "cm->bit_depth should be VPX_BITS_8, " - "VPX_BITS_10 or VPX_BITS_12"); + assert(cm->bit_depth == VPX_BITS_12); + ret_val = thresh << 8; + break; } } #else @@ -529,11 +519,10 @@ static int get_ul_intra_threshold(VP9_COMMON *cm) { switch (cm->bit_depth) { case VPX_BITS_8: ret_val = UL_INTRA_THRESH; break; case VPX_BITS_10: ret_val = UL_INTRA_THRESH << 2; break; - case VPX_BITS_12: ret_val = UL_INTRA_THRESH << 4; break; default: - assert(0 && - "cm->bit_depth should be VPX_BITS_8, " - "VPX_BITS_10 or VPX_BITS_12"); + assert(cm->bit_depth == VPX_BITS_12); + ret_val = UL_INTRA_THRESH << 4; + break; } } #else @@ -550,11 +539,10 @@ static int get_smooth_intra_threshold(VP9_COMMON *cm) { switch (cm->bit_depth) { case VPX_BITS_8: ret_val = SMOOTH_INTRA_THRESH; break; case VPX_BITS_10: ret_val = SMOOTH_INTRA_THRESH << 4; break; - case VPX_BITS_12: ret_val = SMOOTH_INTRA_THRESH << 8; break; default: - assert(0 && - "cm->bit_depth should be VPX_BITS_8, " - "VPX_BITS_10 or VPX_BITS_12"); + assert(cm->bit_depth == VPX_BITS_12); + ret_val = SMOOTH_INTRA_THRESH << 8; + break; } } #else @@ -849,9 +837,6 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, YV12_BUFFER_CONFIG *const new_yv12 = get_frame_new_buffer(cm); const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12; - LAYER_CONTEXT *const lc = - is_two_pass_svc(cpi) ? &cpi->svc.layer_context[cpi->svc.spatial_layer_id] - : NULL; MODE_INFO mi_above, mi_left; double mb_intra_factor; @@ -860,25 +845,7 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, // First pass code requires valid last and new frame buffers. assert(new_yv12 != NULL); - assert((lc != NULL) || frame_is_intra_only(cm) || (lst_yv12 != NULL)); - - if (lc != NULL) { - // Use either last frame or alt frame for motion search. - if (cpi->ref_frame_flags & VP9_LAST_FLAG) { - first_ref_buf = vp9_get_scaled_ref_frame(cpi, LAST_FRAME); - if (first_ref_buf == NULL) - first_ref_buf = get_ref_frame_buffer(cpi, LAST_FRAME); - } - - if (cpi->ref_frame_flags & VP9_GOLD_FLAG) { - gld_yv12 = vp9_get_scaled_ref_frame(cpi, GOLDEN_FRAME); - if (gld_yv12 == NULL) { - gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME); - } - } else { - gld_yv12 = NULL; - } - } + assert(frame_is_intra_only(cm) || (lst_yv12 != NULL)); xd->mi = cm->mi_grid_visible + xd->mi_stride * (mb_row << 1) + (tile.mi_col_start >> 1); @@ -1001,12 +968,10 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, switch (cm->bit_depth) { case VPX_BITS_8: break; case VPX_BITS_10: this_error >>= 4; break; - case VPX_BITS_12: this_error >>= 8; break; default: - assert(0 && - "cm->bit_depth should be VPX_BITS_8, " - "VPX_BITS_10 or VPX_BITS_12"); - return; + assert(cm->bit_depth == VPX_BITS_12); + this_error >>= 8; + break; } } #endif // CONFIG_VP9_HIGHBITDEPTH @@ -1072,8 +1037,7 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, ((cm->mb_cols - 1 - mb_col) * 16) + BORDER_MV_PIXELS_B16; // Other than for the first frame do a motion search. - if ((lc == NULL && cm->current_video_frame > 0) || - (lc != NULL && lc->current_video_frame_in_layer > 0)) { + if (cm->current_video_frame > 0) { int tmp_err, motion_error, raw_motion_error; // Assume 0,0 motion with no mv overhead. MV mv = { 0, 0 }, tmp_mv = { 0, 0 }; @@ -1113,7 +1077,7 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, #endif // CONFIG_VP9_HIGHBITDEPTH // TODO(pengchong): Replace the hard-coded threshold - if (raw_motion_error > 25 || lc != NULL) { + if (raw_motion_error > 25) { // Test last reference frame using the previous best mv as the // starting point (best reference) for the search. first_pass_motion_search(cpi, x, best_ref_mv, &mv, &motion_error); @@ -1131,9 +1095,7 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, } // Search in an older reference frame. - if (((lc == NULL && cm->current_video_frame > 1) || - (lc != NULL && lc->current_video_frame_in_layer > 1)) && - gld_yv12 != NULL) { + if ((cm->current_video_frame > 1) && gld_yv12 != NULL) { // Assume 0,0 motion with no mv overhead. int gf_motion_error; @@ -1371,9 +1333,6 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { YV12_BUFFER_CONFIG *const new_yv12 = get_frame_new_buffer(cm); const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12; - LAYER_CONTEXT *const lc = - is_two_pass_svc(cpi) ? &cpi->svc.layer_context[cpi->svc.spatial_layer_id] - : NULL; BufferPool *const pool = cm->buffer_pool; FIRSTPASS_DATA fp_temp_data; @@ -1385,7 +1344,7 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { // First pass code requires valid last and new frame buffers. assert(new_yv12 != NULL); - assert((lc != NULL) || frame_is_intra_only(cm) || (lst_yv12 != NULL)); + assert(frame_is_intra_only(cm) || (lst_yv12 != NULL)); #if CONFIG_FP_MB_STATS if (cpi->use_fp_mb_stats) { @@ -1396,50 +1355,6 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { set_first_pass_params(cpi); vp9_set_quantizer(cm, find_fp_qindex(cm->bit_depth)); - if (lc != NULL) { - twopass = &lc->twopass; - - cpi->lst_fb_idx = cpi->svc.spatial_layer_id; - cpi->ref_frame_flags = VP9_LAST_FLAG; - - if (cpi->svc.number_spatial_layers + cpi->svc.spatial_layer_id < - REF_FRAMES) { - cpi->gld_fb_idx = - cpi->svc.number_spatial_layers + cpi->svc.spatial_layer_id; - cpi->ref_frame_flags |= VP9_GOLD_FLAG; - cpi->refresh_golden_frame = (lc->current_video_frame_in_layer == 0); - } else { - cpi->refresh_golden_frame = 0; - } - - if (lc->current_video_frame_in_layer == 0) cpi->ref_frame_flags = 0; - - vp9_scale_references(cpi); - - // Use either last frame or alt frame for motion search. - if (cpi->ref_frame_flags & VP9_LAST_FLAG) { - first_ref_buf = vp9_get_scaled_ref_frame(cpi, LAST_FRAME); - if (first_ref_buf == NULL) - first_ref_buf = get_ref_frame_buffer(cpi, LAST_FRAME); - } - - if (cpi->ref_frame_flags & VP9_GOLD_FLAG) { - gld_yv12 = vp9_get_scaled_ref_frame(cpi, GOLDEN_FRAME); - if (gld_yv12 == NULL) { - gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME); - } - } else { - gld_yv12 = NULL; - } - - set_ref_ptrs(cm, xd, - (cpi->ref_frame_flags & VP9_LAST_FLAG) ? LAST_FRAME : NONE, - (cpi->ref_frame_flags & VP9_GOLD_FLAG) ? GOLDEN_FRAME : NONE); - - cpi->Source = vp9_scale_if_required(cm, cpi->un_scaled_source, - &cpi->scaled_source, 0, EIGHTTAP, 0); - } - vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y); vp9_setup_src_planes(x, cpi->Source, 0, 0); @@ -1523,18 +1438,13 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { vpx_extend_frame_borders(new_yv12); - if (lc != NULL) { - vp9_update_reference_frames(cpi); - } else { - // The frame we just compressed now becomes the last frame. - ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx], - cm->new_fb_idx); - } + // The frame we just compressed now becomes the last frame. + ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx], + cm->new_fb_idx); // Special case for the first frame. Copy into the GF buffer as a second // reference. - if (cm->current_video_frame == 0 && cpi->gld_fb_idx != INVALID_IDX && - lc == NULL) { + if (cm->current_video_frame == 0 && cpi->gld_fb_idx != INVALID_IDX) { ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx], cm->ref_frame_map[cpi->lst_fb_idx]); } @@ -1588,7 +1498,9 @@ static double wq_err_divisor(VP9_COMP *cpi) { // Use a different error per mb factor for calculating boost for // different formats. - if (screen_area < 1280 * 720) { + if (screen_area <= 640 * 360) { + return 115.0; + } else if (screen_area < 1280 * 720) { return 125.0; } else if (screen_area <= 1920 * 1080) { return 130.0; @@ -1706,14 +1618,9 @@ void calculate_coded_size(VP9_COMP *cpi, int *scaled_frame_width, } void vp9_init_second_pass(VP9_COMP *cpi) { - SVC *const svc = &cpi->svc; VP9EncoderConfig *const oxcf = &cpi->oxcf; - const int is_two_pass_svc = - (svc->number_spatial_layers > 1) || (svc->number_temporal_layers > 1); RATE_CONTROL *const rc = &cpi->rc; - TWO_PASS *const twopass = - is_two_pass_svc ? &svc->layer_context[svc->spatial_layer_id].twopass - : &cpi->twopass; + TWO_PASS *const twopass = &cpi->twopass; double frame_rate; FIRSTPASS_STATS *stats; @@ -1790,18 +1697,9 @@ void vp9_init_second_pass(VP9_COMP *cpi) { // encoded in the second pass is a guess. However, the sum duration is not. // It is calculated based on the actual durations of all frames from the // first pass. - - if (is_two_pass_svc) { - vp9_update_spatial_layer_framerate(cpi, frame_rate); - twopass->bits_left = - (int64_t)(stats->duration * - svc->layer_context[svc->spatial_layer_id].target_bandwidth / - 10000000.0); - } else { - vp9_new_framerate(cpi, frame_rate); - twopass->bits_left = - (int64_t)(stats->duration * oxcf->target_bandwidth / 10000000.0); - } + vp9_new_framerate(cpi, frame_rate); + twopass->bits_left = + (int64_t)(stats->duration * oxcf->target_bandwidth / 10000000.0); // This variable monitors how far behind the second ref update is lagging. twopass->sr_update_lag = 1; @@ -2239,8 +2137,6 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, int mid_frame_idx; unsigned char arf_buffer_indices[MAX_ACTIVE_ARFS]; int alt_frame_index = frame_index; - int has_temporal_layers = - is_two_pass_svc(cpi) && cpi->svc.number_temporal_layers > 1; int normal_frames; int normal_frame_bits; int last_frame_reduction = 0; @@ -2248,11 +2144,7 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, double tot_norm_frame_score = 1.0; double this_frame_score = 1.0; - // Only encode alt reference frame in temporal base layer. - if (has_temporal_layers) alt_frame_index = cpi->svc.number_temporal_layers; - - key_frame = - cpi->common.frame_type == KEY_FRAME || vp9_is_upper_layer_key_frame(cpi); + key_frame = cpi->common.frame_type == KEY_FRAME; get_arf_buffer_indices(arf_buffer_indices); @@ -2282,19 +2174,14 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, gf_group->rf_level[alt_frame_index] = GF_ARF_STD; gf_group->bit_allocation[alt_frame_index] = gf_arf_bits; - if (has_temporal_layers) - gf_group->arf_src_offset[alt_frame_index] = - (unsigned char)(rc->baseline_gf_interval - - cpi->svc.number_temporal_layers); - else - gf_group->arf_src_offset[alt_frame_index] = - (unsigned char)(rc->baseline_gf_interval - 1); + gf_group->arf_src_offset[alt_frame_index] = + (unsigned char)(rc->baseline_gf_interval - 1); gf_group->arf_update_idx[alt_frame_index] = arf_buffer_indices[0]; gf_group->arf_ref_idx[alt_frame_index] = arf_buffer_indices[cpi->multi_arf_last_grp_enabled && rc->source_alt_ref_active]; - if (!has_temporal_layers) ++frame_index; + ++frame_index; if (cpi->multi_arf_enabled) { // Set aside a slot for a level 1 arf. @@ -2330,11 +2217,6 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, for (i = 0; i < normal_frames; ++i) { int arf_idx = 0; if (EOF == input_stats(twopass, &frame_stats)) break; - - if (has_temporal_layers && frame_index == alt_frame_index) { - ++frame_index; - } - if (oxcf->vbr_corpus_complexity) { this_frame_score = calculate_norm_frame_score(cpi, twopass, oxcf, &frame_stats, av_score); @@ -2559,8 +2441,9 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame); // Monitor for static sections. - zero_motion_accumulator = VPXMIN( - zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame)); + if ((rc->frames_since_key + i - 1) > 1) { + zero_motion_accumulator *= get_zero_motion_factor(cpi, &next_frame); + } // Break clause to detect very still sections after motion. For example, // a static image after a fade or other transition. @@ -2582,8 +2465,17 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { // Break out conditions. // Break at maximum of active_max_gf_interval unless almost totally static. - if (((twopass->kf_zeromotion_pct < STATIC_KF_GROUP_THRESH) && - (i >= active_max_gf_interval) && (zero_motion_accumulator < 0.995)) || + // + // Note that the addition of a test of rc->source_alt_ref_active is + // deliberate. The effect of this is that after a normal altref group even + // if the material is static there will be one normal length GF group + // before allowing longer GF groups. The reason for this is that in cases + // such as slide shows where slides are separated by a complex transition + // such as a fade, the arf group spanning the transition may not be coded + // at a very high quality and hence this frame (with its overlay) is a + // poor golden frame to use for an extended group. + if (((i >= active_max_gf_interval) && + ((zero_motion_accumulator < 0.995) || (rc->source_alt_ref_active))) || ( // Don't break out with a very short interval. (i >= active_min_gf_interval) && @@ -2603,7 +2495,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { rc->constrained_gf_group = (i >= rc->frames_to_key) ? 1 : 0; // Should we use the alternate reference frame. - if ((twopass->kf_zeromotion_pct < STATIC_KF_GROUP_THRESH) && allow_alt_ref && + if ((zero_motion_accumulator < 0.995) && allow_alt_ref && (i < cpi->oxcf.lag_in_frames) && (i >= rc->min_gf_interval)) { const int forward_frames = (rc->frames_to_key - i >= i - 1) ? i - 1 @@ -2631,32 +2523,11 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { rc->gfu_boost = VPXMIN((int)rc->gfu_boost, i * 200); #endif - // Set the interval until the next gf. rc->baseline_gf_interval = - (twopass->kf_zeromotion_pct < STATIC_KF_GROUP_THRESH) - ? (i - (is_key_frame || rc->source_alt_ref_pending)) - : i; - - // Only encode alt reference frame in temporal base layer. So - // baseline_gf_interval should be multiple of a temporal layer group - // (typically the frame distance between two base layer frames) - if (is_two_pass_svc(cpi) && cpi->svc.number_temporal_layers > 1) { - int count = (1 << (cpi->svc.number_temporal_layers - 1)) - 1; - int new_gf_interval = (rc->baseline_gf_interval + count) & (~count); - int j; - for (j = 0; j < new_gf_interval - rc->baseline_gf_interval; ++j) { - if (EOF == input_stats(twopass, this_frame)) break; - gf_group_err += - calculate_norm_frame_score(cpi, twopass, oxcf, this_frame, av_err); - gf_group_raw_error += this_frame->coded_error; - gf_group_noise += this_frame->frame_noise_energy; - gf_group_skip_pct += this_frame->intra_skip_pct; - gf_group_inactive_zone_rows += this_frame->inactive_zone_rows; - gf_group_inter += this_frame->pcnt_inter; - gf_group_motion += this_frame->pcnt_motion; - } - rc->baseline_gf_interval = new_gf_interval; - } + ((twopass->kf_zeromotion_pct >= STATIC_KF_GROUP_THRESH) && + (i >= rc->frames_to_key)) + ? i + : (i - (is_key_frame || rc->source_alt_ref_pending)); rc->frames_till_gf_update_due = rc->baseline_gf_interval; @@ -2903,6 +2774,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { vp9_zero(next_frame); cpi->common.frame_type = KEY_FRAME; + rc->frames_since_key = 0; // Reset the GF group data structures. vp9_zero(*gf_group); @@ -3008,18 +2880,6 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { rc->next_key_frame_forced = 0; } - if (is_two_pass_svc(cpi) && cpi->svc.number_temporal_layers > 1) { - int count = (1 << (cpi->svc.number_temporal_layers - 1)) - 1; - int new_frame_to_key = (rc->frames_to_key + count) & (~count); - int j; - for (j = 0; j < new_frame_to_key - rc->frames_to_key; ++j) { - if (EOF == input_stats(twopass, this_frame)) break; - kf_group_err += - calculate_norm_frame_score(cpi, twopass, oxcf, this_frame, av_err); - } - rc->frames_to_key = new_frame_to_key; - } - // Special case for the last key frame of the file. if (twopass->stats_in >= twopass->stats_in_end) { // Accumulate kf group error. @@ -3059,7 +2919,10 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { for (i = 0; i < (rc->frames_to_key - 1); ++i) { if (EOF == input_stats(twopass, &next_frame)) break; - if (i <= KF_BOOST_SCAN_MAX_FRAMES) { + // The zero motion test here insures that if we mark a kf group as static + // it is static throughout not just the first KF_BOOST_SCAN_MAX_FRAMES. + // It also allows for a larger boost on long static groups. + if ((i <= KF_BOOST_SCAN_MAX_FRAMES) || (zero_motion_accumulator >= 0.99)) { double frame_boost; double zm_factor; @@ -3171,21 +3034,13 @@ static void configure_buffer_updates(VP9_COMP *cpi) { cpi->refresh_alt_ref_frame = 0; cpi->rc.is_src_frame_alt_ref = 1; break; - case ARF_UPDATE: + default: + assert(twopass->gf_group.update_type[twopass->gf_group.index] == + ARF_UPDATE); cpi->refresh_last_frame = 0; cpi->refresh_golden_frame = 0; cpi->refresh_alt_ref_frame = 1; break; - default: assert(0); break; - } - if (is_two_pass_svc(cpi)) { - if (cpi->svc.temporal_layer_id > 0) { - cpi->refresh_last_frame = 0; - cpi->refresh_golden_frame = 0; - } - if (cpi->svc.layer_context[cpi->svc.spatial_layer_id].gold_ref_idx < 0) - cpi->refresh_golden_frame = 0; - if (cpi->alt_ref_source == NULL) cpi->refresh_alt_ref_frame = 0; } } @@ -3194,10 +3049,7 @@ static int is_skippable_frame(const VP9_COMP *cpi) { // first pass, and so do its previous and forward frames, then this frame // can be skipped for partition check, and the partition size is assigned // according to the variance - const SVC *const svc = &cpi->svc; - const TWO_PASS *const twopass = - is_two_pass_svc(cpi) ? &svc->layer_context[svc->spatial_layer_id].twopass - : &cpi->twopass; + const TWO_PASS *const twopass = &cpi->twopass; return (!frame_is_intra_only(&cpi->common) && twopass->stats_in - 2 > twopass->stats_in_start && @@ -3219,9 +3071,6 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { FIRSTPASS_STATS this_frame; int target_rate; - LAYER_CONTEXT *const lc = - is_two_pass_svc(cpi) ? &cpi->svc.layer_context[cpi->svc.spatial_layer_id] - : 0; if (!twopass->stats_in) return; @@ -3236,20 +3085,10 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { cm->frame_type = INTER_FRAME; - if (lc != NULL) { - if (cpi->svc.spatial_layer_id == 0) { - lc->is_key_frame = 0; - } else { - lc->is_key_frame = cpi->svc.layer_context[0].is_key_frame; - - if (lc->is_key_frame) cpi->ref_frame_flags &= (~VP9_LAST_FLAG); - } - } - // Do the firstpass stats indicate that this frame is skippable for the // partition search? if (cpi->sf.allow_partition_search_skip && cpi->oxcf.pass == 2 && - (!cpi->use_svc || is_two_pass_svc(cpi))) { + !cpi->use_svc) { cpi->partition_search_skippable_frame = is_skippable_frame(cpi); } @@ -3260,12 +3099,9 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { if (cpi->oxcf.rc_mode == VPX_Q) { twopass->active_worst_quality = cpi->oxcf.cq_level; - } else if (cm->current_video_frame == 0 || - (lc != NULL && lc->current_video_frame_in_layer == 0)) { + } else if (cm->current_video_frame == 0) { const int frames_left = - (int)(twopass->total_stats.count - - ((lc != NULL) ? lc->current_video_frame_in_layer - : cm->current_video_frame)); + (int)(twopass->total_stats.count - cm->current_video_frame); // Special case code for first frame. const int section_target_bandwidth = (int)(twopass->bits_left / frames_left); @@ -3314,33 +3150,11 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { cm->frame_type = INTER_FRAME; } - if (lc != NULL) { - if (cpi->svc.spatial_layer_id == 0) { - lc->is_key_frame = (cm->frame_type == KEY_FRAME); - if (lc->is_key_frame) { - cpi->ref_frame_flags &= - (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG); - lc->frames_from_key_frame = 0; - // Encode an intra only empty frame since we have a key frame. - cpi->svc.encode_intra_empty_frame = 1; - } - } else { - cm->frame_type = INTER_FRAME; - lc->is_key_frame = cpi->svc.layer_context[0].is_key_frame; - - if (lc->is_key_frame) { - cpi->ref_frame_flags &= (~VP9_LAST_FLAG); - lc->frames_from_key_frame = 0; - } - } - } - // Define a new GF/ARF group. (Should always enter here for key frames). if (rc->frames_till_gf_update_due == 0) { define_gf_group(cpi, &this_frame); rc->frames_till_gf_update_due = rc->baseline_gf_interval; - if (lc != NULL) cpi->refresh_golden_frame = 1; #if ARF_STATS_OUTPUT { @@ -3361,7 +3175,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { // Do the firstpass stats indicate that this frame is skippable for the // partition search? if (cpi->sf.allow_partition_search_skip && cpi->oxcf.pass == 2 && - (!cpi->use_svc || is_two_pass_svc(cpi))) { + !cpi->use_svc) { cpi->partition_search_skippable_frame = is_skippable_frame(cpi); } @@ -3407,8 +3221,7 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi) { rc->rate_error_estimate = 0; } - if (cpi->common.frame_type != KEY_FRAME && - !vp9_is_upper_layer_key_frame(cpi)) { + if (cpi->common.frame_type != KEY_FRAME) { twopass->kf_group_bits -= bits_used; twopass->last_kfgroup_zeromotion_pct = twopass->kf_zeromotion_pct; } diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c index 1cb978667b5..ba72c0be5ed 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c @@ -1793,7 +1793,7 @@ static const MV search_pos[4] = { unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, - int mi_col) { + int mi_col, const MV *ref_mv) { MACROBLOCKD *xd = &x->e_mbd; MODE_INFO *mi = xd->mi[0]; struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0 } }; @@ -1815,6 +1815,7 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x, const int norm_factor = 3 + (bw >> 5); const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi, mi->ref_frame[0]); + MvLimits subpel_mv_limits; if (scaled_ref_frame) { int i; @@ -1917,6 +1918,10 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x, tmp_mv->row *= 8; tmp_mv->col *= 8; + vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv); + clamp_mv(tmp_mv, subpel_mv_limits.col_min, subpel_mv_limits.col_max, + subpel_mv_limits.row_min, subpel_mv_limits.row_max); + if (scaled_ref_frame) { int i; for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i]; @@ -2210,7 +2215,8 @@ int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, var = bigdia_search(x, mvp_full, step_param, error_per_bit, 1, cost_list, fn_ptr, 1, ref_mv, tmp_mv); break; - case NSTEP: + default: + assert(method == NSTEP); var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit, MAX_MVSEARCH_STEPS - 1 - step_param, 1, cost_list, fn_ptr, ref_mv, tmp_mv); @@ -2236,7 +2242,6 @@ int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, } } break; - default: assert(0 && "Invalid search method."); } if (method != NSTEP && rd && var < var_max) diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h index b8db2c35368..b4787fe1fc5 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h @@ -66,7 +66,8 @@ int vp9_refining_search_sad(const struct macroblock *x, struct mv *ref_mv, // Perform integral projection based motion estimation. unsigned int vp9_int_pro_motion_estimation(const struct VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, - int mi_row, int mi_col); + int mi_row, int mi_col, + const MV *ref_mv); typedef uint32_t(fractional_mv_step_fp)( const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_picklpf.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_picklpf.c index 1c2c55b9e4b..4e96490658b 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_picklpf.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_picklpf.c @@ -169,14 +169,10 @@ void vp9_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, case VPX_BITS_10: filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 4060632, 20); break; - case VPX_BITS_12: + default: + assert(cm->bit_depth == VPX_BITS_12); filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 16242526, 22); break; - default: - assert(0 && - "bit_depth should be VPX_BITS_8, VPX_BITS_10 " - "or VPX_BITS_12"); - return; } #else int filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 1015158, 18); diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c index a9c7c7d3d19..b61a89471a9 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c @@ -224,6 +224,14 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x, if (rv && search_subpel) { int subpel_force_stop = cpi->sf.mv.subpel_force_stop; if (use_base_mv && cpi->sf.base_mv_aggressive) subpel_force_stop = 2; + if (cpi->sf.mv.enable_adaptive_subpel_force_stop) { + int mv_thresh = cpi->sf.mv.adapt_subpel_force_stop.mv_thresh; + if (abs(tmp_mv->as_mv.row) >= mv_thresh || + abs(tmp_mv->as_mv.col) >= mv_thresh) + subpel_force_stop = cpi->sf.mv.adapt_subpel_force_stop.force_stop_above; + else + subpel_force_stop = cpi->sf.mv.adapt_subpel_force_stop.force_stop_below; + } cpi->find_fractional_mv_step( x, &tmp_mv->as_mv, &ref_mv, cpi->common.allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], subpel_force_stop, @@ -726,13 +734,13 @@ static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; - case TX_4X4: + default: + assert(tx_size == TX_4X4); x->fwd_txfm4x4(src_diff, coeff, diff_stride); vp9_quantize_fp(coeff, 16, x->skip_block, p->round_fp, p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; - default: assert(0); break; } *skippable &= (*eob == 0); eob_cost += 1; @@ -1421,7 +1429,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) { VP9_COMMON *const cm = &cpi->common; SPEED_FEATURES *const sf = &cpi->sf; - const SVC *const svc = &cpi->svc; + SVC *const svc = &cpi->svc; MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mi = xd->mi[0]; struct macroblockd_plane *const pd = &xd->plane[0]; @@ -1495,27 +1503,37 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, #endif INTERP_FILTER filter_gf_svc = EIGHTTAP; MV_REFERENCE_FRAME best_second_ref_frame = NONE; - MV_REFERENCE_FRAME spatial_ref = GOLDEN_FRAME; + MV_REFERENCE_FRAME inter_layer_ref = GOLDEN_FRAME; const struct segmentation *const seg = &cm->seg; int comp_modes = 0; int num_inter_modes = (cpi->use_svc) ? RT_INTER_MODES_SVC : RT_INTER_MODES; int flag_svc_subpel = 0; int svc_mv_col = 0; int svc_mv_row = 0; + int no_scaling = 0; unsigned int thresh_svc_skip_golden = 500; + if (cpi->use_svc && svc->spatial_layer_id > 0) { + int layer = + LAYER_IDS_TO_IDX(svc->spatial_layer_id - 1, svc->temporal_layer_id, + svc->number_temporal_layers); + LAYER_CONTEXT *const lc = &svc->layer_context[layer]; + if (lc->scaling_factor_num == lc->scaling_factor_den) no_scaling = 1; + } + if (svc->spatial_layer_id > 0 && + (svc->high_source_sad_superframe || no_scaling)) + thresh_svc_skip_golden = 0; // Lower the skip threshold if lower spatial layer is better quality relative // to current layer. - if (cpi->svc.spatial_layer_id > 0 && cm->base_qindex > 150 && - cm->base_qindex > cpi->svc.lower_layer_qindex + 15) + else if (svc->spatial_layer_id > 0 && cm->base_qindex > 150 && + cm->base_qindex > svc->lower_layer_qindex + 15) thresh_svc_skip_golden = 100; // Increase skip threshold if lower spatial layer is lower quality relative // to current layer. - else if (cpi->svc.spatial_layer_id > 0 && cm->base_qindex < 140 && - cm->base_qindex < cpi->svc.lower_layer_qindex - 20) + else if (svc->spatial_layer_id > 0 && cm->base_qindex < 140 && + cm->base_qindex < svc->lower_layer_qindex - 20) thresh_svc_skip_golden = 1000; init_ref_frame_cost(cm, xd, ref_frame_cost); - memset(&mode_checked[0][0], 0, MB_MODE_COUNT * MAX_REF_FRAMES); if (reuse_inter_pred) { @@ -1575,10 +1593,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, #if CONFIG_VP9_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0) { if (cpi->use_svc) { - int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id, - cpi->svc.temporal_layer_id, - cpi->svc.number_temporal_layers); - LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer]; + int layer = + LAYER_IDS_TO_IDX(svc->spatial_layer_id, svc->temporal_layer_id, + svc->number_temporal_layers); + LAYER_CONTEXT *lc = &svc->layer_context[layer]; denoise_svc_pickmode = denoise_svc(cpi) && !lc->is_key_frame; } if (cpi->denoiser.denoising_level > kDenLowLow && denoise_svc_pickmode) @@ -1613,19 +1631,19 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, // For svc mode, on spatial_layer_id > 0: if the reference has different scale // constrain the inter mode to only test zero motion. if (cpi->use_svc && svc->force_zero_mode_spatial_ref && - cpi->svc.spatial_layer_id > 0) { + svc->spatial_layer_id > 0) { if (cpi->ref_frame_flags & flag_list[LAST_FRAME]) { struct scale_factors *const sf = &cm->frame_refs[LAST_FRAME - 1].sf; if (vp9_is_scaled(sf)) { svc_force_zero_mode[LAST_FRAME - 1] = 1; - spatial_ref = LAST_FRAME; + inter_layer_ref = LAST_FRAME; } } if (cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) { struct scale_factors *const sf = &cm->frame_refs[GOLDEN_FRAME - 1].sf; if (vp9_is_scaled(sf)) { svc_force_zero_mode[GOLDEN_FRAME - 1] = 1; - spatial_ref = GOLDEN_FRAME; + inter_layer_ref = GOLDEN_FRAME; } } } @@ -1642,6 +1660,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, } } + if (sf->disable_golden_ref && (x->content_state_sb != kVeryHighSad || + cpi->rc.avg_frame_low_motion < 60)) + usable_ref_frame = LAST_FRAME; + if (!((cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) && !svc_force_zero_mode[GOLDEN_FRAME - 1] && !force_skip_low_temp_var)) use_golden_nonzeromv = 0; @@ -1667,6 +1689,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, } for (ref_frame = LAST_FRAME; ref_frame <= usable_ref_frame; ++ref_frame) { + // Skip find_predictor if the reference frame is not in the + // ref_frame_flags (i.e., not used as a reference for this frame). + skip_ref_find_pred[ref_frame] = + !(cpi->ref_frame_flags & flag_list[ref_frame]); if (!skip_ref_find_pred[ref_frame]) { find_predictors(cpi, x, ref_frame, frame_mv, const_motion, &ref_frame_skip_mask, flag_list, tile_data, mi_row, @@ -1682,9 +1708,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, // an averaging filter for downsampling (phase = 8). If so, we will test // a nonzero motion mode on the spatial reference. // The nonzero motion is half pixel shifted to left and top (-4, -4). - if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 && - svc_force_zero_mode[spatial_ref - 1] && - cpi->svc.downsample_filter_phase[cpi->svc.spatial_layer_id - 1] == 8) { + if (cpi->use_svc && svc->spatial_layer_id > 0 && + svc_force_zero_mode[inter_layer_ref - 1] && + svc->downsample_filter_phase[svc->spatial_layer_id - 1] == 8) { svc_mv_col = -4; svc_mv_row = -4; flag_svc_subpel = 1; @@ -1733,7 +1759,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, get_segdata(seg, mi->segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) continue; - if (flag_svc_subpel && ref_frame == spatial_ref) { + if (flag_svc_subpel && ref_frame == inter_layer_ref) { force_gf_mv = 1; // Only test mode if NEARESTMV/NEARMV is (svc_mv_col, svc_mv_row), // otherwise set NEWMV to (svc_mv_col, svc_mv_row). @@ -1761,8 +1787,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, sse_zeromv_normalized < thresh_svc_skip_golden) continue; + if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue; + if (sf->short_circuit_flat_blocks && x->source_variance == 0 && - this_mode != NEARESTMV) { + frame_mv[this_mode][ref_frame].as_int != 0) { continue; } @@ -1792,8 +1820,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, continue; } - if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue; - if (const_motion[ref_frame] && this_mode == NEARMV) continue; // Skip non-zeromv mode search for golden frame if force_skip_low_temp_var @@ -1873,7 +1899,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, (!cpi->sf.adaptive_rd_thresh_row_mt && rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh, &rd_thresh_freq_fact[mode_index]))) - continue; + if (frame_mv[this_mode][ref_frame].as_int != 0) continue; if (this_mode == NEWMV && !force_gf_mv) { if (ref_frame > LAST_FRAME && !cpi->use_svc && @@ -1884,7 +1910,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, if (bsize < BLOCK_16X16) continue; - tmp_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col); + tmp_sad = vp9_int_pro_motion_estimation( + cpi, x, bsize, mi_row, mi_col, + &x->mbmi_ext->ref_mvs[ref_frame][0].as_mv); if (tmp_sad > x->pred_mv_sad[LAST_FRAME]) continue; if (tmp_sad + (num_pels_log2_lookup[bsize] << 4) > best_pred_sad) @@ -1919,7 +1947,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, // Exit NEWMV search if base_mv is (0,0) && bsize < BLOCK_16x16, // for SVC encoding. - if (cpi->use_svc && cpi->svc.use_base_mv && bsize < BLOCK_16X16 && + if (cpi->use_svc && svc->use_base_mv && bsize < BLOCK_16X16 && frame_mv[NEWMV][ref_frame].as_mv.row == 0 && frame_mv[NEWMV][ref_frame].as_mv.col == 0) continue; @@ -2242,12 +2270,12 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, // layer is chosen as the reference. Always perform intra prediction if // LAST is the only reference, or is_key_frame is set, or on base // temporal layer. - if (cpi->svc.spatial_layer_id) { + if (svc->spatial_layer_id) { perform_intra_pred = - cpi->svc.temporal_layer_id == 0 || - cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame || + svc->temporal_layer_id == 0 || + svc->layer_context[svc->temporal_layer_id].is_key_frame || !(cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) || - (!cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame && + (!svc->layer_context[svc->temporal_layer_id].is_key_frame && svc_force_zero_mode[best_ref_frame - 1]); inter_mode_thresh = (inter_mode_thresh << 1) + inter_mode_thresh; } diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_quantize.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_quantize.c index 09f61ead263..276022a56b8 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_quantize.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_quantize.c @@ -204,10 +204,9 @@ static int get_qzbin_factor(int q, vpx_bit_depth_t bit_depth) { switch (bit_depth) { case VPX_BITS_8: return q == 0 ? 64 : (quant < 148 ? 84 : 80); case VPX_BITS_10: return q == 0 ? 64 : (quant < 592 ? 84 : 80); - case VPX_BITS_12: return q == 0 ? 64 : (quant < 2368 ? 84 : 80); default: - assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); - return -1; + assert(bit_depth == VPX_BITS_12); + return q == 0 ? 64 : (quant < 2368 ? 84 : 80); } #else (void)bit_depth; diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c index b5f1a5c5c71..c349a807aa2 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c @@ -48,18 +48,16 @@ #define MAX_BPB_FACTOR 50 #if CONFIG_VP9_HIGHBITDEPTH -#define ASSIGN_MINQ_TABLE(bit_depth, name) \ - do { \ - switch (bit_depth) { \ - case VPX_BITS_8: name = name##_8; break; \ - case VPX_BITS_10: name = name##_10; break; \ - case VPX_BITS_12: name = name##_12; break; \ - default: \ - assert(0 && \ - "bit_depth should be VPX_BITS_8, VPX_BITS_10" \ - " or VPX_BITS_12"); \ - name = NULL; \ - } \ +#define ASSIGN_MINQ_TABLE(bit_depth, name) \ + do { \ + switch (bit_depth) { \ + case VPX_BITS_8: name = name##_8; break; \ + case VPX_BITS_10: name = name##_10; break; \ + default: \ + assert(bit_depth == VPX_BITS_12); \ + name = name##_12; \ + break; \ + } \ } while (0) #else #define ASSIGN_MINQ_TABLE(bit_depth, name) \ @@ -167,10 +165,9 @@ double vp9_convert_qindex_to_q(int qindex, vpx_bit_depth_t bit_depth) { switch (bit_depth) { case VPX_BITS_8: return vp9_ac_quant(qindex, 0, bit_depth) / 4.0; case VPX_BITS_10: return vp9_ac_quant(qindex, 0, bit_depth) / 16.0; - case VPX_BITS_12: return vp9_ac_quant(qindex, 0, bit_depth) / 64.0; default: - assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); - return -1.0; + assert(bit_depth == VPX_BITS_12); + return vp9_ac_quant(qindex, 0, bit_depth) / 64.0; } #else return vp9_ac_quant(qindex, 0, bit_depth) / 4.0; @@ -620,8 +617,14 @@ int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame, !(cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame)) && (cpi->rc.rc_1_frame * cpi->rc.rc_2_frame == -1) && cpi->rc.q_1_frame != cpi->rc.q_2_frame) { - q = clamp(q, VPXMIN(cpi->rc.q_1_frame, cpi->rc.q_2_frame), - VPXMAX(cpi->rc.q_1_frame, cpi->rc.q_2_frame)); + int qclamp = clamp(q, VPXMIN(cpi->rc.q_1_frame, cpi->rc.q_2_frame), + VPXMAX(cpi->rc.q_1_frame, cpi->rc.q_2_frame)); + // If the previous had overshoot and the current q needs to increase above + // the clamped value, reduce the clamp for faster reaction to overshoot. + if (cpi->rc.rc_1_frame == -1 && q > qclamp) + q = (q + qclamp) >> 1; + else + q = qclamp; } return q; } @@ -832,19 +835,6 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi, *top_index = active_worst_quality; *bottom_index = active_best_quality; -#if LIMIT_QRANGE_FOR_ALTREF_AND_KEY - // Limit Q range for the adaptive loop. - if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced && - !(cm->current_video_frame == 0)) { - int qdelta = 0; - vpx_clear_system_state(); - qdelta = vp9_compute_qdelta_by_rate( - &cpi->rc, cm->frame_type, active_worst_quality, 2.0, cm->bit_depth); - *top_index = active_worst_quality + qdelta; - *top_index = (*top_index > *bottom_index) ? *top_index : *bottom_index; - } -#endif - // Special case code to try and match quality with forced key frames if (cm->frame_type == KEY_FRAME && rc->this_key_frame_forced) { q = rc->last_boosted_qindex; @@ -1097,7 +1087,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index, int *inter_minq; ASSIGN_MINQ_TABLE(cm->bit_depth, inter_minq); - if (frame_is_intra_only(cm) || vp9_is_upper_layer_key_frame(cpi)) { + if (frame_is_intra_only(cm)) { // Handle the special case for key frames forced when we have reached // the maximum key frame interval. Here force the Q to a range // based on the ambient Q to reduce the risk of popping. @@ -1213,9 +1203,8 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index, #if LIMIT_QRANGE_FOR_ALTREF_AND_KEY vpx_clear_system_state(); // Static forced key frames Q restrictions dealt with elsewhere. - if (!((frame_is_intra_only(cm) || vp9_is_upper_layer_key_frame(cpi))) || - !rc->this_key_frame_forced || - (cpi->twopass.last_kfgroup_zeromotion_pct < STATIC_MOTION_THRESH)) { + if (!frame_is_intra_only(cm) || !rc->this_key_frame_forced || + cpi->twopass.last_kfgroup_zeromotion_pct < STATIC_MOTION_THRESH) { int qdelta = vp9_frame_type_qdelta(cpi, gf_group->rf_level[gf_group->index], active_worst_quality); active_worst_quality = @@ -1239,8 +1228,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index, if (oxcf->rc_mode == VPX_Q) { q = active_best_quality; // Special case code to try and match quality with forced key frames. - } else if ((frame_is_intra_only(cm) || vp9_is_upper_layer_key_frame(cpi)) && - rc->this_key_frame_forced) { + } else if (frame_is_intra_only(cm) && rc->this_key_frame_forced) { // If static since last kf use better of last boosted and last kf q. if (cpi->twopass.last_kfgroup_zeromotion_pct >= STATIC_MOTION_THRESH) { q = VPXMIN(rc->last_kf_qindex, rc->last_boosted_qindex); @@ -1488,7 +1476,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { rc->total_target_vs_actual = rc->total_actual_bits - rc->total_target_bits; - if (!cpi->use_svc || is_two_pass_svc(cpi)) { + if (!cpi->use_svc) { if (is_altref_enabled(cpi) && cpi->refresh_alt_ref_frame && (cm->frame_type != KEY_FRAME)) // Update the alternate reference frame stats as appropriate. @@ -1734,10 +1722,7 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) { cpi->svc.spatial_layer_id == 0)) { cm->frame_type = KEY_FRAME; rc->source_alt_ref_active = 0; - if (is_two_pass_svc(cpi)) { - cpi->svc.layer_context[layer].is_key_frame = 1; - cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG); - } else if (is_one_pass_cbr_svc(cpi)) { + if (is_one_pass_cbr_svc(cpi)) { if (cm->current_video_frame > 0) vp9_svc_reset_key_frame(cpi); layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id, cpi->svc.temporal_layer_id, @@ -1750,17 +1735,7 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) { } } else { cm->frame_type = INTER_FRAME; - if (is_two_pass_svc(cpi)) { - LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer]; - if (cpi->svc.spatial_layer_id == 0) { - lc->is_key_frame = 0; - } else { - lc->is_key_frame = - cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame; - if (lc->is_key_frame) cpi->ref_frame_flags &= (~VP9_LAST_FLAG); - } - cpi->ref_frame_flags &= (~VP9_ALT_FLAG); - } else if (is_one_pass_cbr_svc(cpi)) { + if (is_one_pass_cbr_svc(cpi)) { LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer]; if (cpi->svc.spatial_layer_id == cpi->svc.first_spatial_layer_to_encode) { lc->is_key_frame = 0; @@ -1790,8 +1765,6 @@ void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) { if ((cm->current_video_frame == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY) || rc->frames_to_key == 0 || (cpi->oxcf.auto_key && 0))) { cm->frame_type = KEY_FRAME; - rc->this_key_frame_forced = - cm->current_video_frame != 0 && rc->frames_to_key == 0; rc->frames_to_key = cpi->oxcf.key_freq; rc->kf_boost = DEFAULT_KF_BOOST; rc->source_alt_ref_active = 0; @@ -2301,18 +2274,34 @@ static void adjust_gf_boost_lag_one_pass_vbr(VP9_COMP *cpi, void vp9_scene_detection_onepass(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; + YV12_BUFFER_CONFIG const *unscaled_src = cpi->un_scaled_source; + YV12_BUFFER_CONFIG const *unscaled_last_src = cpi->unscaled_last_source; + uint8_t *src_y; + int src_ystride; + int src_width; + int src_height; + uint8_t *last_src_y; + int last_src_ystride; + int last_src_width; + int last_src_height; + if (cpi->un_scaled_source == NULL || cpi->unscaled_last_source == NULL || + (cpi->use_svc && cpi->svc.current_superframe == 0)) + return; + src_y = unscaled_src->y_buffer; + src_ystride = unscaled_src->y_stride; + src_width = unscaled_src->y_width; + src_height = unscaled_src->y_height; + last_src_y = unscaled_last_src->y_buffer; + last_src_ystride = unscaled_last_src->y_stride; + last_src_width = unscaled_last_src->y_width; + last_src_height = unscaled_last_src->y_height; #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) return; #endif rc->high_source_sad = 0; - if (cpi->Last_Source != NULL && - cpi->Last_Source->y_width == cpi->Source->y_width && - cpi->Last_Source->y_height == cpi->Source->y_height) { + if (cpi->svc.spatial_layer_id == 0 && src_width == last_src_width && + src_height == last_src_height) { YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS] = { NULL }; - uint8_t *src_y = cpi->Source->y_buffer; - int src_ystride = cpi->Source->y_stride; - uint8_t *last_src_y = cpi->Last_Source->y_buffer; - int last_src_ystride = cpi->Last_Source->y_stride; int start_frame = 0; int frames_to_buffer = 1; int frame = 0; @@ -2437,6 +2426,19 @@ void vp9_scene_detection_onepass(VP9_COMP *cpi) { if (cm->frame_type != KEY_FRAME && rc->reset_high_source_sad) rc->this_frame_target = rc->avg_frame_bandwidth; } + // For SVC the new (updated) avg_source_sad[0] for the current superframe + // updates the setting for all layers. + if (cpi->use_svc) { + int sl, tl; + SVC *const svc = &cpi->svc; + for (sl = 0; sl < svc->number_spatial_layers; ++sl) + for (tl = 0; tl < svc->number_temporal_layers; ++tl) { + int layer = LAYER_IDS_TO_IDX(sl, tl, svc->number_temporal_layers); + LAYER_CONTEXT *const lc = &svc->layer_context[layer]; + RATE_CONTROL *const lrc = &lc->rc; + lrc->avg_source_sad[0] = rc->avg_source_sad[0]; + } + } // For VBR, under scene change/high content change, force golden refresh. if (cpi->oxcf.rc_mode == VPX_VBR && cm->frame_type != KEY_FRAME && rc->high_source_sad && rc->frames_to_key > 3 && @@ -2471,7 +2473,10 @@ int vp9_encodedframe_overshoot(VP9_COMP *cpi, int frame_size, int *q) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; int thresh_qp = 3 * (rc->worst_quality >> 2); - int thresh_rate = rc->avg_frame_bandwidth * 10; + int thresh_rate = rc->avg_frame_bandwidth << 3; + // Lower rate threshold for video. + if (cpi->oxcf.content != VP9E_CONTENT_SCREEN) + thresh_rate = rc->avg_frame_bandwidth << 2; if (cm->base_qindex < thresh_qp && frame_size > thresh_rate) { double rate_correction_factor = cpi->rc.rate_correction_factors[INTER_NORMAL]; diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.c index 6b2306ce9b0..3407e74c64f 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.c @@ -69,10 +69,12 @@ static void fill_mode_costs(VP9_COMP *cpi) { const FRAME_CONTEXT *const fc = cpi->common.fc; int i, j; - for (i = 0; i < INTRA_MODES; ++i) - for (j = 0; j < INTRA_MODES; ++j) + for (i = 0; i < INTRA_MODES; ++i) { + for (j = 0; j < INTRA_MODES; ++j) { vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j], vp9_intra_mode_tree); + } + } vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree); for (i = 0; i < INTRA_MODES; ++i) { @@ -82,9 +84,28 @@ static void fill_mode_costs(VP9_COMP *cpi) { fc->uv_mode_prob[i], vp9_intra_mode_tree); } - for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) + for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) { vp9_cost_tokens(cpi->switchable_interp_costs[i], fc->switchable_interp_prob[i], vp9_switchable_interp_tree); + } + + for (i = TX_8X8; i < TX_SIZES; ++i) { + for (j = 0; j < TX_SIZE_CONTEXTS; ++j) { + const vpx_prob *tx_probs = get_tx_probs(i, j, &fc->tx_probs); + int k; + for (k = 0; k <= i; ++k) { + int cost = 0; + int m; + for (m = 0; m <= k - (k == i); ++m) { + if (m == k) + cost += vp9_cost_zero(tx_probs[m]); + else + cost += vp9_cost_one(tx_probs[m]); + } + cpi->tx_size_cost[i - 1][j][k] = cost; + } + } + } } static void fill_token_costs(vp9_coeff_cost *c, @@ -153,10 +174,10 @@ int64_t vp9_compute_rd_mult_based_on_qindex(const VP9_COMP *cpi, int qindex) { switch (cpi->common.bit_depth) { case VPX_BITS_8: rdmult = 88 * q * q / 24; break; case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 4); break; - case VPX_BITS_12: rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8); break; default: - assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); - return -1; + assert(cpi->common.bit_depth == VPX_BITS_12); + rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8); + break; } #else int64_t rdmult = 88 * q * q / 24; @@ -185,10 +206,10 @@ static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) { switch (bit_depth) { case VPX_BITS_8: q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0; break; case VPX_BITS_10: q = vp9_dc_quant(qindex, 0, VPX_BITS_10) / 16.0; break; - case VPX_BITS_12: q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0; break; default: - assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); - return -1; + assert(bit_depth == VPX_BITS_12); + q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0; + break; } #else (void)bit_depth; @@ -209,12 +230,11 @@ void vp9_initialize_me_consts(VP9_COMP *cpi, MACROBLOCK *x, int qindex) { x->sadperbit16 = sad_per_bit16lut_10[qindex]; x->sadperbit4 = sad_per_bit4lut_10[qindex]; break; - case VPX_BITS_12: + default: + assert(cpi->common.bit_depth == VPX_BITS_12); x->sadperbit16 = sad_per_bit16lut_12[qindex]; x->sadperbit4 = sad_per_bit4lut_12[qindex]; break; - default: - assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); } #else (void)cpi; @@ -471,13 +491,13 @@ void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size, for (i = 0; i < num_4x4_h; i += 4) t_left[i] = !!*(const uint32_t *)&left[i]; break; - case TX_32X32: + default: + assert(tx_size == TX_32X32); for (i = 0; i < num_4x4_w; i += 8) t_above[i] = !!*(const uint64_t *)&above[i]; for (i = 0; i < num_4x4_h; i += 8) t_left[i] = !!*(const uint64_t *)&left[i]; break; - default: assert(0 && "Invalid transform size."); break; } } diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h index 59022c106e2..8201bba7039 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h @@ -108,7 +108,11 @@ typedef struct RD_OPT { int64_t prediction_type_threshes[MAX_REF_FRAMES][REFERENCE_MODES]; int64_t filter_threshes[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS]; +#if CONFIG_CONSISTENT_RECODE + int64_t prediction_type_threshes_prev[MAX_REF_FRAMES][REFERENCE_MODES]; + int64_t filter_threshes_prev[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS]; +#endif int RDMULT; int RDDIV; } RD_OPT; diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c index b6541b0f735..e3672edf529 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c @@ -543,8 +543,9 @@ static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane, MACROBLOCKD *const xd = &x->e_mbd; const struct macroblock_plane *const p = &x->plane[plane]; const struct macroblockd_plane *const pd = &xd->plane[plane]; + const int eob = p->eobs[block]; - if (x->block_tx_domain) { + if (x->block_tx_domain && eob) { const int ss_txfrm_size = tx_size << 1; int64_t this_sse; const int shift = tx_size == TX_32X32 ? 0 : 2; @@ -584,14 +585,13 @@ static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane, const uint8_t *src = &p->src.buf[src_idx]; const uint8_t *dst = &pd->dst.buf[dst_idx]; const tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); - const uint16_t *eob = &p->eobs[block]; unsigned int tmp; tmp = pixel_sse(cpi, xd, pd, src, src_stride, dst, dst_stride, blk_row, blk_col, plane_bsize, tx_bsize); *out_sse = (int64_t)tmp * 16; - if (*eob) { + if (eob) { #if CONFIG_VP9_HIGHBITDEPTH DECLARE_ALIGNED(16, uint16_t, recon16[1024]); uint8_t *recon = (uint8_t *)recon16; @@ -604,22 +604,22 @@ static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane, vpx_highbd_convolve_copy(CONVERT_TO_SHORTPTR(dst), dst_stride, recon16, 32, NULL, 0, 0, 0, 0, bs, bs, xd->bd); if (xd->lossless) { - vp9_highbd_iwht4x4_add(dqcoeff, recon16, 32, *eob, xd->bd); + vp9_highbd_iwht4x4_add(dqcoeff, recon16, 32, eob, xd->bd); } else { switch (tx_size) { case TX_4X4: - vp9_highbd_idct4x4_add(dqcoeff, recon16, 32, *eob, xd->bd); + vp9_highbd_idct4x4_add(dqcoeff, recon16, 32, eob, xd->bd); break; case TX_8X8: - vp9_highbd_idct8x8_add(dqcoeff, recon16, 32, *eob, xd->bd); + vp9_highbd_idct8x8_add(dqcoeff, recon16, 32, eob, xd->bd); break; case TX_16X16: - vp9_highbd_idct16x16_add(dqcoeff, recon16, 32, *eob, xd->bd); + vp9_highbd_idct16x16_add(dqcoeff, recon16, 32, eob, xd->bd); break; - case TX_32X32: - vp9_highbd_idct32x32_add(dqcoeff, recon16, 32, *eob, xd->bd); + default: + assert(tx_size == TX_32X32); + vp9_highbd_idct32x32_add(dqcoeff, recon16, 32, eob, xd->bd); break; - default: assert(0 && "Invalid transform size"); } } recon = CONVERT_TO_BYTEPTR(recon16); @@ -627,16 +627,16 @@ static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane, #endif // CONFIG_VP9_HIGHBITDEPTH vpx_convolve_copy(dst, dst_stride, recon, 32, NULL, 0, 0, 0, 0, bs, bs); switch (tx_size) { - case TX_32X32: vp9_idct32x32_add(dqcoeff, recon, 32, *eob); break; - case TX_16X16: vp9_idct16x16_add(dqcoeff, recon, 32, *eob); break; - case TX_8X8: vp9_idct8x8_add(dqcoeff, recon, 32, *eob); break; - case TX_4X4: + case TX_32X32: vp9_idct32x32_add(dqcoeff, recon, 32, eob); break; + case TX_16X16: vp9_idct16x16_add(dqcoeff, recon, 32, eob); break; + case TX_8X8: vp9_idct8x8_add(dqcoeff, recon, 32, eob); break; + default: + assert(tx_size == TX_4X4); // this is like vp9_short_idct4x4 but has a special case around // eob<=1, which is significant (not just an optimization) for // the lossless case. - x->inv_txfm_add(dqcoeff, recon, 32, *eob); + x->inv_txfm_add(dqcoeff, recon, 32, eob); break; - default: assert(0 && "Invalid transform size"); break; } #if CONFIG_VP9_HIGHBITDEPTH } @@ -845,20 +845,20 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, { INT64_MAX, INT64_MAX }, { INT64_MAX, INT64_MAX }, { INT64_MAX, INT64_MAX } }; - int n, m; + int n; int s0, s1; - int64_t best_rd = INT64_MAX; + int64_t best_rd = ref_best_rd; TX_SIZE best_tx = max_tx_size; int start_tx, end_tx; - - const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs); + const int tx_size_ctx = get_tx_size_context(xd); assert(skip_prob > 0); s0 = vp9_cost_bit(skip_prob, 0); s1 = vp9_cost_bit(skip_prob, 1); if (cm->tx_mode == TX_MODE_SELECT) { start_tx = max_tx_size; - end_tx = 0; + end_tx = VPXMAX(start_tx - cpi->sf.tx_size_search_depth, 0); + if (bs > BLOCK_32X32) end_tx = VPXMIN(end_tx + 1, start_tx); } else { TX_SIZE chosen_tx_size = VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[cm->tx_mode]); @@ -867,15 +867,9 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, } for (n = start_tx; n >= end_tx; n--) { - int r_tx_size = 0; - for (m = 0; m <= n - (n == (int)max_tx_size); m++) { - if (m == n) - r_tx_size += vp9_cost_zero(tx_probs[m]); - else - r_tx_size += vp9_cost_one(tx_probs[m]); - } - txfm_rd_in_plane(cpi, x, &r[n][0], &d[n], &s[n], &sse[n], ref_best_rd, 0, - bs, n, cpi->sf.use_fast_coef_costing); + const int r_tx_size = cpi->tx_size_cost[max_tx_size - 1][tx_size_ctx][n]; + txfm_rd_in_plane(cpi, x, &r[n][0], &d[n], &s[n], &sse[n], best_rd, 0, bs, n, + cpi->sf.use_fast_coef_costing); r[n][1] = r[n][0]; if (r[n][0] < INT_MAX) { r[n][1] += r_tx_size; @@ -1468,11 +1462,11 @@ static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, if (is_compound) this_mv[1].as_int = frame_mv[mode][mi->ref_frame[1]].as_int; break; - case ZEROMV: + default: + assert(mode == ZEROMV); this_mv[0].as_int = 0; if (is_compound) this_mv[1].as_int = 0; break; - default: break; } mi->bmi[i].as_mv[0].as_int = this_mv[0].as_int; @@ -3618,9 +3612,13 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data, } if (best_mode_index < 0 || best_rd >= best_rd_so_far) { - // If adaptive interp filter is enabled, then the current leaf node of 8x8 - // data is needed for sub8x8. Hence preserve the context. +// If adaptive interp filter is enabled, then the current leaf node of 8x8 +// data is needed for sub8x8. Hence preserve the context. +#if CONFIG_CONSISTENT_RECODE + if (bsize == BLOCK_8X8) ctx->mic = *xd->mi[0]; +#else if (cpi->row_mt && bsize == BLOCK_8X8) ctx->mic = *xd->mi[0]; +#endif rd_cost->rate = INT_MAX; rd_cost->rdcost = INT64_MAX; return; diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c index 9a46e98839b..d2842697dae 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c @@ -32,7 +32,7 @@ static MESH_PATTERN // Intra only frames, golden frames (except alt ref overlays) and // alt ref frames tend to be coded at a higher than ambient quality static int frame_is_boosted(const VP9_COMP *cpi) { - return frame_is_kf_gf_arf(cpi) || vp9_is_upper_layer_key_frame(cpi); + return frame_is_kf_gf_arf(cpi); } // Sets a partition size down to which the auto partition code will always @@ -374,6 +374,9 @@ static void set_rt_speed_feature_framesize_independent( sf->use_compound_nonrd_pickmode = 0; sf->nonrd_keyframe = 0; sf->svc_use_lowres_part = 0; + sf->re_encode_overshoot_rt = 0; + sf->disable_16x16part_nonkey = 0; + sf->disable_golden_ref = 0; if (speed >= 1) { sf->allow_txfm_domain_distortion = 1; @@ -534,6 +537,16 @@ static void set_rt_speed_feature_framesize_independent( // Keep nonrd_keyframe = 1 for non-base spatial layers to prevent // increase in encoding time. if (cpi->use_svc && cpi->svc.spatial_layer_id > 0) sf->nonrd_keyframe = 1; + if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR && + cm->frame_type != KEY_FRAME && cpi->resize_state == ORIG && + (cpi->use_svc || cpi->oxcf.content == VP9E_CONTENT_SCREEN)) { + sf->re_encode_overshoot_rt = 1; + } + if (cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.lag_in_frames > 0 && + cm->width <= 1280 && cm->height <= 720) { + sf->use_altref_onepass = 1; + sf->use_compound_nonrd_pickmode = 1; + } } if (speed >= 6) { @@ -656,6 +669,21 @@ static void set_rt_speed_feature_framesize_independent( sf->limit_newmv_early_exit = 0; sf->use_simple_block_yrd = 1; } + + if (speed >= 9) { + sf->mv.enable_adaptive_subpel_force_stop = 1; + sf->mv.adapt_subpel_force_stop.mv_thresh = 2; + if (cpi->rc.avg_frame_low_motion < 40) + sf->mv.adapt_subpel_force_stop.mv_thresh = 1; + sf->mv.adapt_subpel_force_stop.force_stop_below = 1; + sf->mv.adapt_subpel_force_stop.force_stop_above = 2; + // Disable partition blocks below 16x16, except for low-resolutions. + if (cm->frame_type != KEY_FRAME && cm->width >= 320 && cm->height >= 240) + sf->disable_16x16part_nonkey = 1; + // Allow for disabling GOLDEN reference, for CBR mode. + if (cpi->oxcf.rc_mode == VPX_CBR) sf->disable_golden_ref = 1; + } + if (sf->use_altref_onepass) { if (cpi->rc.is_src_frame_alt_ref && cm->frame_type != KEY_FRAME) { sf->partition_search_type = FIXED_PARTITION; @@ -812,6 +840,7 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { // Some speed-up features even for best quality as minimal impact on quality. sf->adaptive_rd_thresh = 1; sf->tx_size_search_breakout = 1; + sf->tx_size_search_depth = 2; sf->exhaustive_searches_thresh = (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ? (1 << 20) diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.h index 50d52bc23a4..251cfdbcdf1 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.h @@ -161,6 +161,17 @@ typedef enum { ONE_LOOP_REDUCED = 1 } FAST_COEFF_UPDATE; +typedef struct ADAPT_SUBPEL_FORCE_STOP { + // Threshold for full pixel motion vector; + int mv_thresh; + + // subpel_force_stop if full pixel MV is below the threshold. + int force_stop_below; + + // subpel_force_stop if full pixel MV is equal to or above the threshold. + int force_stop_above; +} ADAPT_SUBPEL_FORCE_STOP; + typedef struct MV_SPEED_FEATURES { // Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc). SEARCH_METHODS search_method; @@ -189,6 +200,11 @@ typedef struct MV_SPEED_FEATURES { // 3: Stop at full pixel. int subpel_force_stop; + // If it's enabled, different subpel_force_stop will be used for different MV. + int enable_adaptive_subpel_force_stop; + + ADAPT_SUBPEL_FORCE_STOP adapt_subpel_force_stop; + // This variable sets the step_param used in full pel motion search. int fullpel_search_step_param; } MV_SPEED_FEATURES; @@ -272,6 +288,9 @@ typedef struct SPEED_FEATURES { // for intra and model coefs for the rest. TX_SIZE_SEARCH_METHOD tx_size_search_method; + // How many levels of tx size to search, starting from the largest. + int tx_size_search_depth; + // Low precision 32x32 fdct keeps everything in 16 bits and thus is less // precise but significantly faster than the non lp version. int use_lp32x32fdct; @@ -508,6 +527,16 @@ typedef struct SPEED_FEATURES { // For SVC: enables use of partition from lower spatial resolution. int svc_use_lowres_part; + + // Enable re-encoding on scene change with potential high overshoot, + // for real-time encoding flow. + int re_encode_overshoot_rt; + + // Disable partitioning of 16x16 blocks. + int disable_16x16part_nonkey; + + // Allow for disabling golden reference. + int disable_golden_ref; } SPEED_FEATURES; struct VP9_COMP; diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.c index 4dfdc65b727..fec0fa8930d 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.c @@ -41,17 +41,21 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { svc->disable_inter_layer_pred = INTER_LAYER_PRED_ON; svc->framedrop_mode = CONSTRAINED_LAYER_DROP; - for (i = 0; i < REF_FRAMES; ++i) svc->ref_frame_index[i] = -1; + for (i = 0; i < REF_FRAMES; ++i) { + svc->fb_idx_spatial_layer_id[i] = -1; + svc->fb_idx_temporal_layer_id[i] = -1; + } for (sl = 0; sl < oxcf->ss_number_layers; ++sl) { svc->last_layer_dropped[sl] = 0; svc->drop_spatial_layer[sl] = 0; svc->ext_frame_flags[sl] = 0; - svc->ext_lst_fb_idx[sl] = 0; - svc->ext_gld_fb_idx[sl] = 1; - svc->ext_alt_fb_idx[sl] = 2; + svc->lst_fb_idx[sl] = 0; + svc->gld_fb_idx[sl] = 1; + svc->alt_fb_idx[sl] = 2; svc->downsample_filter_type[sl] = BILINEAR; svc->downsample_filter_phase[sl] = 8; // Set to 8 for averaging filter. svc->framedrop_thresh[sl] = oxcf->drop_frames_water_mark; + svc->fb_idx_upd_tl0[sl] = -1; } if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) { @@ -311,7 +315,7 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) { // Reset the frames_since_key and frames_to_key counters to their values // before the layer restore. Keep these defined for the stream (not layer). if (cpi->svc.number_temporal_layers > 1 || - (cpi->svc.number_spatial_layers > 1 && !is_two_pass_svc(cpi))) { + cpi->svc.number_spatial_layers > 1) { cpi->rc.frames_since_key = old_frame_since_key; cpi->rc.frames_to_key = old_frame_to_key; } @@ -389,15 +393,6 @@ void vp9_inc_frame_in_layer(VP9_COMP *const cpi) { ++cpi->svc.current_superframe; } -int vp9_is_upper_layer_key_frame(const VP9_COMP *const cpi) { - return is_two_pass_svc(cpi) && cpi->svc.spatial_layer_id > 0 && - cpi->svc - .layer_context[cpi->svc.spatial_layer_id * - cpi->svc.number_temporal_layers + - cpi->svc.temporal_layer_id] - .is_key_frame; -} - void get_layer_resolution(const int width_org, const int height_org, const int num, const int den, int *width_out, int *height_out) { @@ -416,6 +411,40 @@ void get_layer_resolution(const int width_org, const int height_org, *height_out = h; } +void reset_fb_idx_unused(VP9_COMP *const cpi) { + // If a reference frame is not referenced or refreshed, then set the + // fb_idx for that reference to the first one used/referenced. + // This is to avoid setting fb_idx for a reference to a slot that is not + // used/needed (i.e., since that reference is not referenced or refreshed). + static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, + VP9_ALT_FLAG }; + MV_REFERENCE_FRAME ref_frame; + MV_REFERENCE_FRAME first_ref = 0; + int first_fb_idx = 0; + int fb_idx[3] = { cpi->lst_fb_idx, cpi->gld_fb_idx, cpi->alt_fb_idx }; + for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { + if (cpi->ref_frame_flags & flag_list[ref_frame]) { + first_ref = ref_frame; + first_fb_idx = fb_idx[ref_frame - 1]; + break; + } + } + if (first_ref > 0) { + if (first_ref != LAST_FRAME && + !(cpi->ref_frame_flags & flag_list[LAST_FRAME]) && + !cpi->ext_refresh_last_frame) + cpi->lst_fb_idx = first_fb_idx; + else if (first_ref != GOLDEN_FRAME && + !(cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) && + !cpi->ext_refresh_golden_frame) + cpi->gld_fb_idx = first_fb_idx; + else if (first_ref != ALTREF_FRAME && + !(cpi->ref_frame_flags & flag_list[ALTREF_FRAME]) && + !cpi->ext_refresh_alt_ref_frame) + cpi->alt_fb_idx = first_fb_idx; + } +} + // The function sets proper ref_frame_flags, buffer indices, and buffer update // variables for temporal layering mode 3 - that does 0-2-1-2 temporal layering // scheme. @@ -519,6 +548,8 @@ static void set_flags_and_fb_idx_for_temporal_mode3(VP9_COMP *const cpi) { cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1; cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id; } + + reset_fb_idx_unused(cpi); } // The function sets proper ref_frame_flags, buffer indices, and buffer update @@ -578,6 +609,8 @@ static void set_flags_and_fb_idx_for_temporal_mode2(VP9_COMP *const cpi) { cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1; cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id; } + + reset_fb_idx_unused(cpi); } // The function sets proper ref_frame_flags, buffer indices, and buffer update @@ -610,6 +643,28 @@ static void set_flags_and_fb_idx_for_temporal_mode_noLayering( } else { cpi->gld_fb_idx = 0; } + + reset_fb_idx_unused(cpi); +} + +void vp9_copy_flags_ref_update_idx(VP9_COMP *const cpi) { + SVC *const svc = &cpi->svc; + static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, + VP9_ALT_FLAG }; + int sl = svc->spatial_layer_id; + svc->lst_fb_idx[sl] = cpi->lst_fb_idx; + svc->gld_fb_idx[sl] = cpi->gld_fb_idx; + svc->alt_fb_idx[sl] = cpi->alt_fb_idx; + + svc->update_last[sl] = (uint8_t)cpi->refresh_last_frame; + svc->update_golden[sl] = (uint8_t)cpi->refresh_golden_frame; + svc->update_altref[sl] = (uint8_t)cpi->refresh_alt_ref_frame; + svc->reference_last[sl] = + (uint8_t)(cpi->ref_frame_flags & flag_list[LAST_FRAME]); + svc->reference_golden[sl] = + (uint8_t)(cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]); + svc->reference_altref[sl] = + (uint8_t)(cpi->ref_frame_flags & flag_list[ALTREF_FRAME]); } int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) { @@ -646,18 +701,30 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) { cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode; sl = cpi->svc.spatial_layer_id; vp9_apply_encoding_flags(cpi, cpi->svc.ext_frame_flags[sl]); - cpi->lst_fb_idx = cpi->svc.ext_lst_fb_idx[sl]; - cpi->gld_fb_idx = cpi->svc.ext_gld_fb_idx[sl]; - cpi->alt_fb_idx = cpi->svc.ext_alt_fb_idx[sl]; + cpi->lst_fb_idx = cpi->svc.lst_fb_idx[sl]; + cpi->gld_fb_idx = cpi->svc.gld_fb_idx[sl]; + cpi->alt_fb_idx = cpi->svc.alt_fb_idx[sl]; } } // Reset the drop flags for all spatial layers, on the base layer. if (cpi->svc.spatial_layer_id == 0) { - int i; - for (i = 0; i < cpi->svc.number_spatial_layers; i++) { - cpi->svc.drop_spatial_layer[i] = 0; + vp9_zero(cpi->svc.drop_spatial_layer); + // TODO(jianj/marpan): Investigate why setting cpi->svc.lst/gld/alt_fb_idx + // causes an issue with frame dropping and temporal layers, when the frame + // flags are passed via the encode call (bypass mode). Issue is that we're + // resetting ext_refresh_frame_flags_pending to 0 on frame drops. + if (cpi->svc.temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { + memset(&cpi->svc.lst_fb_idx, -1, sizeof(cpi->svc.lst_fb_idx)); + memset(&cpi->svc.gld_fb_idx, -1, sizeof(cpi->svc.lst_fb_idx)); + memset(&cpi->svc.alt_fb_idx, -1, sizeof(cpi->svc.lst_fb_idx)); } + vp9_zero(cpi->svc.update_last); + vp9_zero(cpi->svc.update_golden); + vp9_zero(cpi->svc.update_altref); + vp9_zero(cpi->svc.reference_last); + vp9_zero(cpi->svc.reference_golden); + vp9_zero(cpi->svc.reference_altref); } lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id * @@ -721,6 +788,19 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) { cpi->svc.non_reference_frame = 1; } + if (cpi->svc.spatial_layer_id == 0) cpi->svc.high_source_sad_superframe = 0; + + if (cpi->svc.temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS && + cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] && + cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] != -1 && + !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) { + // For fixed/non-flexible mode, if the previous frame (same spatial layer + // from previous superframe) was dropped, make sure the lst_fb_idx + // for this frame corresponds to the buffer index updated on (last) encoded + // TL0 frame (with same spatial layer). + cpi->lst_fb_idx = cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id]; + } + if (vp9_set_size_literal(cpi, width, height) != 0) return VPX_CODEC_INVALID_PARAM; @@ -806,3 +886,106 @@ void vp9_svc_check_reset_layer_rc_flag(VP9_COMP *const cpi) { } } } + +void vp9_svc_constrain_inter_layer_pred(VP9_COMP *const cpi) { + VP9_COMMON *const cm = &cpi->common; + // Check for disabling inter-layer (spatial) prediction, if + // svc.disable_inter_layer_pred is set. If the previous spatial layer was + // dropped then disable the prediction from this (scaled) reference. + if ((cpi->svc.disable_inter_layer_pred == INTER_LAYER_PRED_OFF_NONKEY && + !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) || + cpi->svc.disable_inter_layer_pred == INTER_LAYER_PRED_OFF || + cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id - 1]) { + MV_REFERENCE_FRAME ref_frame; + static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, + VP9_ALT_FLAG }; + for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { + const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame); + if (yv12 != NULL && (cpi->ref_frame_flags & flag_list[ref_frame])) { + const struct scale_factors *const scale_fac = + &cm->frame_refs[ref_frame - 1].sf; + if (vp9_is_scaled(scale_fac)) + cpi->ref_frame_flags &= (~flag_list[ref_frame]); + } + } + } + // Check for disabling inter-layer prediction if the reference for inter-layer + // prediction (the reference that is scaled) is not the previous spatial layer + // from the same superframe, then we disable inter-layer prediction. + // Only need to check when inter_layer prediction is not set to OFF mode. + if (cpi->svc.disable_inter_layer_pred != INTER_LAYER_PRED_OFF) { + // We only use LAST and GOLDEN for prediction in real-time mode, so we + // check both here. + MV_REFERENCE_FRAME ref_frame; + for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ref_frame++) { + struct scale_factors *scale_fac = &cm->frame_refs[ref_frame - 1].sf; + if (vp9_is_scaled(scale_fac)) { + // If this reference was updated on the previous spatial layer of the + // current superframe, then we keep this reference (don't disable). + // Otherwise we disable the inter-layer prediction. + // This condition is verified by checking if the current frame buffer + // index is equal to any of the slots for the previous spatial layer, + // and if so, check if that slot was updated/refreshed. If that is the + // case, then this reference is valid for inter-layer prediction under + // the mode INTER_LAYER_PRED_ON_CONSTRAINED. + int fb_idx = + ref_frame == LAST_FRAME ? cpi->lst_fb_idx : cpi->gld_fb_idx; + int ref_flag = ref_frame == LAST_FRAME ? VP9_LAST_FLAG : VP9_GOLD_FLAG; + int sl = cpi->svc.spatial_layer_id; + int disable = 1; + if ((fb_idx == cpi->svc.lst_fb_idx[sl - 1] && + cpi->svc.update_last[sl - 1]) || + (fb_idx == cpi->svc.gld_fb_idx[sl - 1] && + cpi->svc.update_golden[sl - 1]) || + (fb_idx == cpi->svc.alt_fb_idx[sl - 1] && + cpi->svc.update_altref[sl - 1])) + disable = 0; + if (disable) cpi->ref_frame_flags &= (~ref_flag); + } + } + } +} + +void vp9_svc_assert_constraints_pattern(VP9_COMP *const cpi) { + SVC *const svc = &cpi->svc; + // For fixed/non-flexible mode, and with CONSTRAINED frame drop + // mode (default), the folllowing constraint are expected, when + // inter-layer prediciton is on (default). + if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS && + svc->disable_inter_layer_pred == INTER_LAYER_PRED_ON && + svc->framedrop_mode == CONSTRAINED_LAYER_DROP) { + if (!cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) { + // On non-key frames: LAST is always temporal reference, GOLDEN is + // spatial reference. + if (svc->temporal_layer_id == 0) + // Base temporal only predicts from base temporal. + assert(svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] == 0); + else + // Non-base temporal only predicts from lower temporal layer. + assert(svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] < + svc->temporal_layer_id); + if (svc->spatial_layer_id > 0) { + // Non-base spatial only predicts from lower spatial layer with same + // temporal_id. + assert(svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] == + svc->spatial_layer_id - 1); + assert(svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] == + svc->temporal_layer_id); + } + } else if (svc->spatial_layer_id > 0) { + // Only 1 reference for frame whose base is key; reference may be LAST + // or GOLDEN, so we check both. + if (cpi->ref_frame_flags & VP9_LAST_FLAG) { + assert(svc->fb_idx_spatial_layer_id[cpi->lst_fb_idx] == + svc->spatial_layer_id - 1); + assert(svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] == + svc->temporal_layer_id); + } else if (cpi->ref_frame_flags & VP9_GOLD_FLAG) { + assert(svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] == + svc->spatial_layer_id - 1); + assert(svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] == + svc->temporal_layer_id); + } + } + } +} diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.h index a7fa26924f3..367c93a2f60 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.h @@ -20,9 +20,16 @@ extern "C" { #endif typedef enum { + // Inter-layer prediction is on on all frames. INTER_LAYER_PRED_ON, + // Inter-layer prediction is off on all frames. INTER_LAYER_PRED_OFF, - INTER_LAYER_PRED_OFF_NONKEY + // Inter-layer prediction is off on non-key frames. + INTER_LAYER_PRED_OFF_NONKEY, + // Inter-layer prediction is on on all frames, but constrained such + // that any layer S (> 0) can only predict from previous spatial + // layer S-1, from the same superframe. + INTER_LAYER_PRED_ON_CONSTRAINED } INTER_LAYER_PRED; typedef struct { @@ -86,10 +93,9 @@ typedef struct SVC { // Frame flags and buffer indexes for each spatial layer, set by the // application (external settings). int ext_frame_flags[VPX_MAX_LAYERS]; - int ext_lst_fb_idx[VPX_MAX_LAYERS]; - int ext_gld_fb_idx[VPX_MAX_LAYERS]; - int ext_alt_fb_idx[VPX_MAX_LAYERS]; - int ref_frame_index[REF_FRAMES]; + int lst_fb_idx[VPX_MAX_LAYERS]; + int gld_fb_idx[VPX_MAX_LAYERS]; + int alt_fb_idx[VPX_MAX_LAYERS]; int force_zero_mode_spatial_ref; int current_superframe; int non_reference_frame; @@ -118,6 +124,28 @@ typedef struct SVC { SVC_LAYER_DROP_MODE framedrop_mode; INTER_LAYER_PRED disable_inter_layer_pred; + + // Flag to indicate scene change at current superframe, scene detection is + // currently checked for each superframe prior to encoding, on the full + // resolution source. + int high_source_sad_superframe; + + // Flags used to get SVC pattern info. + uint8_t update_last[VPX_SS_MAX_LAYERS]; + uint8_t update_golden[VPX_SS_MAX_LAYERS]; + uint8_t update_altref[VPX_SS_MAX_LAYERS]; + uint8_t reference_last[VPX_SS_MAX_LAYERS]; + uint8_t reference_golden[VPX_SS_MAX_LAYERS]; + uint8_t reference_altref[VPX_SS_MAX_LAYERS]; + + // Keep track of the frame buffer index updated/refreshed on the base + // temporal superframe. + int fb_idx_upd_tl0[VPX_SS_MAX_LAYERS]; + + // Keep track of the spatial and temporal layer id of the frame that last + // updated the frame buffer index. + uint8_t fb_idx_spatial_layer_id[REF_FRAMES]; + uint8_t fb_idx_temporal_layer_id[REF_FRAMES]; } SVC; struct VP9_COMP; @@ -165,6 +193,8 @@ struct lookahead_entry *vp9_svc_lookahead_pop(struct VP9_COMP *const cpi, // Start a frame and initialize svc parameters int vp9_svc_start_frame(struct VP9_COMP *const cpi); +void vp9_copy_flags_ref_update_idx(struct VP9_COMP *const cpi); + int vp9_one_pass_cbr_svc_start_layer(struct VP9_COMP *const cpi); void vp9_free_svc_cyclic_refresh(struct VP9_COMP *const cpi); @@ -173,6 +203,10 @@ void vp9_svc_reset_key_frame(struct VP9_COMP *const cpi); void vp9_svc_check_reset_layer_rc_flag(struct VP9_COMP *const cpi); +void vp9_svc_constrain_inter_layer_pred(struct VP9_COMP *const cpi); + +void vp9_svc_assert_constraints_pattern(struct VP9_COMP *const cpi); + #ifdef __cplusplus } // extern "C" #endif diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c index c84e9fc1a2e..d6c6ece9168 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c @@ -248,7 +248,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, RANGE_CHECK(extra_cfg, row_mt, 0, 1); RANGE_CHECK(extra_cfg, motion_vector_unit_test, 0, 2); RANGE_CHECK(extra_cfg, enable_auto_alt_ref, 0, 2); - RANGE_CHECK(extra_cfg, cpu_used, -8, 8); + RANGE_CHECK(extra_cfg, cpu_used, -9, 9); RANGE_CHECK_HI(extra_cfg, noise_sensitivity, 6); RANGE_CHECK(extra_cfg, tile_columns, 0, 6); RANGE_CHECK(extra_cfg, tile_rows, 0, 2); @@ -1074,23 +1074,11 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, if (cpi->oxcf.pass == 2 && cpi->level_constraint.level_index >= 0 && !cpi->level_constraint.rc_config_updated) { - SVC *const svc = &cpi->svc; - const int is_two_pass_svc = - (svc->number_spatial_layers > 1) || (svc->number_temporal_layers > 1); const VP9EncoderConfig *const oxcf = &cpi->oxcf; TWO_PASS *const twopass = &cpi->twopass; FIRSTPASS_STATS *stats = &twopass->total_stats; - if (is_two_pass_svc) { - const double frame_rate = 10000000.0 * stats->count / stats->duration; - vp9_update_spatial_layer_framerate(cpi, frame_rate); - twopass->bits_left = - (int64_t)(stats->duration * - svc->layer_context[svc->spatial_layer_id].target_bandwidth / - 10000000.0); - } else { - twopass->bits_left = - (int64_t)(stats->duration * oxcf->target_bandwidth / 10000000.0); - } + twopass->bits_left = + (int64_t)(stats->duration * oxcf->target_bandwidth / 10000000.0); cpi->level_constraint.rc_config_updated = 1; } @@ -1460,9 +1448,6 @@ static vpx_codec_err_t ctrl_set_svc_layer_id(vpx_codec_alg_priv_t *ctx, svc->first_spatial_layer_to_encode >= (int)ctx->cfg.ss_number_layers) { return VPX_CODEC_INVALID_PARAM; } - // First spatial layer to encode not implemented for two-pass. - if (is_two_pass_svc(cpi) && svc->first_spatial_layer_to_encode > 0) - return VPX_CODEC_INVALID_PARAM; return VPX_CODEC_OK; } @@ -1502,6 +1487,25 @@ static vpx_codec_err_t ctrl_set_svc_parameters(vpx_codec_alg_priv_t *ctx, return VPX_CODEC_OK; } +static vpx_codec_err_t ctrl_get_svc_ref_frame_config(vpx_codec_alg_priv_t *ctx, + va_list args) { + VP9_COMP *const cpi = ctx->cpi; + vpx_svc_ref_frame_config_t *data = va_arg(args, vpx_svc_ref_frame_config_t *); + int sl; + for (sl = 0; sl <= cpi->svc.spatial_layer_id; sl++) { + data->update_last[sl] = cpi->svc.update_last[sl]; + data->update_golden[sl] = cpi->svc.update_golden[sl]; + data->update_alt_ref[sl] = cpi->svc.update_altref[sl]; + data->reference_last[sl] = cpi->svc.reference_last[sl]; + data->reference_golden[sl] = cpi->svc.reference_golden[sl]; + data->reference_alt_ref[sl] = cpi->svc.reference_altref[sl]; + data->lst_fb_idx[sl] = cpi->svc.lst_fb_idx[sl]; + data->gld_fb_idx[sl] = cpi->svc.gld_fb_idx[sl]; + data->alt_fb_idx[sl] = cpi->svc.alt_fb_idx[sl]; + } + return VPX_CODEC_OK; +} + static vpx_codec_err_t ctrl_set_svc_ref_frame_config(vpx_codec_alg_priv_t *ctx, va_list args) { VP9_COMP *const cpi = ctx->cpi; @@ -1509,9 +1513,9 @@ static vpx_codec_err_t ctrl_set_svc_ref_frame_config(vpx_codec_alg_priv_t *ctx, int sl; for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) { cpi->svc.ext_frame_flags[sl] = data->frame_flags[sl]; - cpi->svc.ext_lst_fb_idx[sl] = data->lst_fb_idx[sl]; - cpi->svc.ext_gld_fb_idx[sl] = data->gld_fb_idx[sl]; - cpi->svc.ext_alt_fb_idx[sl] = data->alt_fb_idx[sl]; + cpi->svc.lst_fb_idx[sl] = data->lst_fb_idx[sl]; + cpi->svc.gld_fb_idx[sl] = data->gld_fb_idx[sl]; + cpi->svc.alt_fb_idx[sl] = data->alt_fb_idx[sl]; } return VPX_CODEC_OK; } @@ -1628,6 +1632,7 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = { { VP9E_GET_SVC_LAYER_ID, ctrl_get_svc_layer_id }, { VP9E_GET_ACTIVEMAP, ctrl_get_active_map }, { VP9E_GET_LEVEL, ctrl_get_level }, + { VP9E_GET_SVC_REF_FRAME_CONFIG, ctrl_get_svc_ref_frame_config }, { -1, NULL }, }; diff --git a/chromium/third_party/libvpx/source/libvpx/vpx/src/vpx_image.c b/chromium/third_party/libvpx/source/libvpx/vpx/src/vpx_image.c index af7c529a7ba..a7c6ec0ceab 100644 --- a/chromium/third_party/libvpx/source/libvpx/vpx/src/vpx_image.c +++ b/chromium/third_party/libvpx/source/libvpx/vpx/src/vpx_image.c @@ -38,23 +38,8 @@ static vpx_image_t *img_alloc_helper(vpx_image_t *img, vpx_img_fmt_t fmt, /* Get sample size for this format */ switch (fmt) { - case VPX_IMG_FMT_RGB32: - case VPX_IMG_FMT_RGB32_LE: - case VPX_IMG_FMT_ARGB: - case VPX_IMG_FMT_ARGB_LE: bps = 32; break; - case VPX_IMG_FMT_RGB24: - case VPX_IMG_FMT_BGR24: bps = 24; break; - case VPX_IMG_FMT_RGB565: - case VPX_IMG_FMT_RGB565_LE: - case VPX_IMG_FMT_RGB555: - case VPX_IMG_FMT_RGB555_LE: - case VPX_IMG_FMT_UYVY: - case VPX_IMG_FMT_YUY2: - case VPX_IMG_FMT_YVYU: bps = 16; break; case VPX_IMG_FMT_I420: - case VPX_IMG_FMT_YV12: - case VPX_IMG_FMT_VPXI420: - case VPX_IMG_FMT_VPXYV12: bps = 12; break; + case VPX_IMG_FMT_YV12: bps = 12; break; case VPX_IMG_FMT_I422: case VPX_IMG_FMT_I440: bps = 16; break; case VPX_IMG_FMT_I444: bps = 24; break; @@ -69,8 +54,6 @@ static vpx_image_t *img_alloc_helper(vpx_image_t *img, vpx_img_fmt_t fmt, switch (fmt) { case VPX_IMG_FMT_I420: case VPX_IMG_FMT_YV12: - case VPX_IMG_FMT_VPXI420: - case VPX_IMG_FMT_VPXYV12: case VPX_IMG_FMT_I422: case VPX_IMG_FMT_I42016: case VPX_IMG_FMT_I42216: xcs = 1; break; @@ -81,8 +64,6 @@ static vpx_image_t *img_alloc_helper(vpx_image_t *img, vpx_img_fmt_t fmt, case VPX_IMG_FMT_I420: case VPX_IMG_FMT_I440: case VPX_IMG_FMT_YV12: - case VPX_IMG_FMT_VPXI420: - case VPX_IMG_FMT_VPXYV12: case VPX_IMG_FMT_I42016: case VPX_IMG_FMT_I44016: ycs = 1; break; default: ycs = 0; break; diff --git a/chromium/third_party/libvpx/source/libvpx/vpx/vp8cx.h b/chromium/third_party/libvpx/source/libvpx/vpx/vp8cx.h index f409844b590..b201d96f4fa 100644 --- a/chromium/third_party/libvpx/source/libvpx/vpx/vp8cx.h +++ b/chromium/third_party/libvpx/source/libvpx/vpx/vp8cx.h @@ -620,6 +620,13 @@ enum vp8e_enc_control_id { * Supported in codecs: VP9 */ VP9E_SET_SVC_FRAME_DROP_LAYER, + + /*!\brief Codec control function to get the refresh and reference flags and + * the buffer indices, up to the last encoded spatial layer. + * + * Supported in codecs: VP9 + */ + VP9E_GET_SVC_REF_FRAME_CONFIG, }; /*!\brief vpx 1-D scaling mode @@ -757,10 +764,18 @@ typedef struct vpx_svc_layer_id { * */ typedef struct vpx_svc_ref_frame_config { - int frame_flags[VPX_TS_MAX_LAYERS]; /**< Frame flags. */ - int lst_fb_idx[VPX_TS_MAX_LAYERS]; /**< Last buffer index. */ - int gld_fb_idx[VPX_TS_MAX_LAYERS]; /**< Golden buffer index. */ - int alt_fb_idx[VPX_TS_MAX_LAYERS]; /**< Altref buffer index. */ + // TODO(jianj/marpan): Remove the usage of frame_flags, instead use the + // update and reference flags. + int frame_flags[VPX_SS_MAX_LAYERS]; /**< Frame flags. */ + int lst_fb_idx[VPX_SS_MAX_LAYERS]; /**< Last buffer index. */ + int gld_fb_idx[VPX_SS_MAX_LAYERS]; /**< Golden buffer index. */ + int alt_fb_idx[VPX_SS_MAX_LAYERS]; /**< Altref buffer index. */ + int update_last[VPX_SS_MAX_LAYERS]; /**< Update last. */ + int update_golden[VPX_SS_MAX_LAYERS]; /**< Update golden. */ + int update_alt_ref[VPX_SS_MAX_LAYERS]; /**< Update altref. */ + int reference_last[VPX_SS_MAX_LAYERS]; /**< Last as eference. */ + int reference_golden[VPX_SS_MAX_LAYERS]; /**< Golden as reference. */ + int reference_alt_ref[VPX_SS_MAX_LAYERS]; /**< Altref as reference. */ } vpx_svc_ref_frame_config_t; /*!\brief VP9 svc frame dropping mode. @@ -927,6 +942,9 @@ VPX_CTRL_USE_TYPE(VP9E_SET_SVC_INTER_LAYER_PRED, unsigned int) VPX_CTRL_USE_TYPE(VP9E_SET_SVC_FRAME_DROP_LAYER, vpx_svc_frame_drop_t *) #define VPX_CTRL_VP9E_SET_SVC_FRAME_DROP_LAYER +VPX_CTRL_USE_TYPE(VP9E_GET_SVC_REF_FRAME_CONFIG, vpx_svc_ref_frame_config_t *) +#define VPX_CTRL_VP9E_GET_SVC_REF_FRAME_CONFIG + /*!\endcond */ /*! @} - end defgroup vp8_encoder */ #ifdef __cplusplus diff --git a/chromium/third_party/libvpx/source/libvpx/vpx/vpx_encoder.h b/chromium/third_party/libvpx/source/libvpx/vpx/vpx_encoder.h index 4017e5719a5..8c08017b6ee 100644 --- a/chromium/third_party/libvpx/source/libvpx/vpx/vpx_encoder.h +++ b/chromium/third_party/libvpx/source/libvpx/vpx/vpx_encoder.h @@ -63,7 +63,7 @@ extern "C" { * fields to structures */ #define VPX_ENCODER_ABI_VERSION \ - (11 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/ + (12 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/ /*! \brief Encoder capabilities bitfield * diff --git a/chromium/third_party/libvpx/source/libvpx/vpx/vpx_image.h b/chromium/third_party/libvpx/source/libvpx/vpx/vpx_image.h index d6d3166d2ff..0c9cac73678 100644 --- a/chromium/third_party/libvpx/source/libvpx/vpx/vpx_image.h +++ b/chromium/third_party/libvpx/source/libvpx/vpx/vpx_image.h @@ -27,7 +27,7 @@ extern "C" { * types, removing or reassigning enums, adding/removing/rearranging * fields to structures */ -#define VPX_IMAGE_ABI_VERSION (4) /**<\hideinitializer*/ +#define VPX_IMAGE_ABI_VERSION (5) /**<\hideinitializer*/ #define VPX_IMG_FMT_PLANAR 0x100 /**< Image is a planar format. */ #define VPX_IMG_FMT_UV_FLIP 0x200 /**< V plane precedes U in memory. */ @@ -37,29 +37,12 @@ extern "C" { /*!\brief List of supported image formats */ typedef enum vpx_img_fmt { VPX_IMG_FMT_NONE, - VPX_IMG_FMT_RGB24, /**< 24 bit per pixel packed RGB */ - VPX_IMG_FMT_RGB32, /**< 32 bit per pixel packed 0RGB */ - VPX_IMG_FMT_RGB565, /**< 16 bit per pixel, 565 */ - VPX_IMG_FMT_RGB555, /**< 16 bit per pixel, 555 */ - VPX_IMG_FMT_UYVY, /**< UYVY packed YUV */ - VPX_IMG_FMT_YUY2, /**< YUYV packed YUV */ - VPX_IMG_FMT_YVYU, /**< YVYU packed YUV */ - VPX_IMG_FMT_BGR24, /**< 24 bit per pixel packed BGR */ - VPX_IMG_FMT_RGB32_LE, /**< 32 bit packed BGR0 */ - VPX_IMG_FMT_ARGB, /**< 32 bit packed ARGB, alpha=255 */ - VPX_IMG_FMT_ARGB_LE, /**< 32 bit packed BGRA, alpha=255 */ - VPX_IMG_FMT_RGB565_LE, /**< 16 bit per pixel, gggbbbbb rrrrrggg */ - VPX_IMG_FMT_RGB555_LE, /**< 16 bit per pixel, gggbbbbb 0rrrrrgg */ VPX_IMG_FMT_YV12 = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_UV_FLIP | 1, /**< planar YVU */ VPX_IMG_FMT_I420 = VPX_IMG_FMT_PLANAR | 2, - VPX_IMG_FMT_VPXYV12 = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_UV_FLIP | - 3, /** < planar 4:2:0 format with vpx color space */ - VPX_IMG_FMT_VPXI420 = VPX_IMG_FMT_PLANAR | 4, VPX_IMG_FMT_I422 = VPX_IMG_FMT_PLANAR | 5, VPX_IMG_FMT_I444 = VPX_IMG_FMT_PLANAR | 6, VPX_IMG_FMT_I440 = VPX_IMG_FMT_PLANAR | 7, - VPX_IMG_FMT_444A = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_HAS_ALPHA | 6, VPX_IMG_FMT_I42016 = VPX_IMG_FMT_I420 | VPX_IMG_FMT_HIGHBITDEPTH, VPX_IMG_FMT_I42216 = VPX_IMG_FMT_I422 | VPX_IMG_FMT_HIGHBITDEPTH, VPX_IMG_FMT_I44416 = VPX_IMG_FMT_I444 | VPX_IMG_FMT_HIGHBITDEPTH, diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/avg_pred_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/avg_pred_neon.c index 1370ec2d2ea..5afdece0aba 100644 --- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/avg_pred_neon.c +++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/avg_pred_neon.c @@ -17,8 +17,8 @@ void vpx_comp_avg_pred_neon(uint8_t *comp, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride) { if (width > 8) { - int x, y; - for (y = 0; y < height; ++y) { + int x, y = height; + do { for (x = 0; x < width; x += 16) { const uint8x16_t p = vld1q_u8(pred + x); const uint8x16_t r = vld1q_u8(ref + x); @@ -28,28 +28,38 @@ void vpx_comp_avg_pred_neon(uint8_t *comp, const uint8_t *pred, int width, comp += width; pred += width; ref += ref_stride; - } + } while (--y); + } else if (width == 8) { + int i = width * height; + do { + const uint8x16_t p = vld1q_u8(pred); + uint8x16_t r; + const uint8x8_t r_0 = vld1_u8(ref); + const uint8x8_t r_1 = vld1_u8(ref + ref_stride); + r = vcombine_u8(r_0, r_1); + ref += 2 * ref_stride; + r = vrhaddq_u8(r, p); + vst1q_u8(comp, r); + + pred += 16; + comp += 16; + i -= 16; + } while (i); } else { - int i; - for (i = 0; i < width * height; i += 16) { + int i = width * height; + assert(width == 4); + do { const uint8x16_t p = vld1q_u8(pred); uint8x16_t r; - if (width == 4) { - r = load_unaligned_u8q(ref, ref_stride); - ref += 4 * ref_stride; - } else { - const uint8x8_t r_0 = vld1_u8(ref); - const uint8x8_t r_1 = vld1_u8(ref + ref_stride); - assert(width == 8); - r = vcombine_u8(r_0, r_1); - ref += 2 * ref_stride; - } + r = load_unaligned_u8q(ref, ref_stride); + ref += 4 * ref_stride; r = vrhaddq_u8(r, p); vst1q_u8(comp, r); pred += 16; comp += 16; - } + i -= 16; + } while (i); } } diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/mem_neon.h b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/mem_neon.h index 12c0a54c899..6745464d738 100644 --- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/mem_neon.h +++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/mem_neon.h @@ -101,9 +101,9 @@ static INLINE uint8x8_t load_unaligned_u8(const uint8_t *buf, int stride) { if (stride == 4) return vld1_u8(buf); memcpy(&a, buf, 4); buf += stride; - a_u32 = vld1_lane_u32(&a, a_u32, 0); + a_u32 = vset_lane_u32(a, a_u32, 0); memcpy(&a, buf, 4); - a_u32 = vld1_lane_u32(&a, a_u32, 1); + a_u32 = vset_lane_u32(a, a_u32, 1); return vreinterpret_u8_u32(a_u32); } @@ -127,16 +127,16 @@ static INLINE uint8x16_t load_unaligned_u8q(const uint8_t *buf, int stride) { if (stride == 4) return vld1q_u8(buf); memcpy(&a, buf, 4); buf += stride; - a_u32 = vld1q_lane_u32(&a, a_u32, 0); + a_u32 = vsetq_lane_u32(a, a_u32, 0); memcpy(&a, buf, 4); buf += stride; - a_u32 = vld1q_lane_u32(&a, a_u32, 1); + a_u32 = vsetq_lane_u32(a, a_u32, 1); memcpy(&a, buf, 4); buf += stride; - a_u32 = vld1q_lane_u32(&a, a_u32, 2); + a_u32 = vsetq_lane_u32(a, a_u32, 2); memcpy(&a, buf, 4); buf += stride; - a_u32 = vld1q_lane_u32(&a, a_u32, 3); + a_u32 = vsetq_lane_u32(a, a_u32, 3); return vreinterpretq_u8_u32(a_u32); } diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/sad4d_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/sad4d_neon.c index b04de3aff26..535ec0f0d6d 100644 --- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/sad4d_neon.c +++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/sad4d_neon.c @@ -10,64 +10,152 @@ #include <arm_neon.h> +#include <assert.h> #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/arm/mem_neon.h" #include "vpx_dsp/arm/sum_neon.h" +static INLINE uint8x8_t load_unaligned_2_buffers(const void *const buf0, + const void *const buf1) { + uint32_t a; + uint32x2_t aa = vdup_n_u32(0); + memcpy(&a, buf0, 4); + aa = vset_lane_u32(a, aa, 0); + memcpy(&a, buf1, 4); + aa = vset_lane_u32(a, aa, 1); + return vreinterpret_u8_u32(aa); +} + +static INLINE void sad4x_4d(const uint8_t *const src, const int src_stride, + const uint8_t *const ref[4], const int ref_stride, + const int height, uint32_t *const res) { + int i; + uint16x8_t abs[2] = { vdupq_n_u16(0), vdupq_n_u16(0) }; + uint16x4_t a[2]; + uint32x4_t r; + + assert(!((intptr_t)src % sizeof(uint32_t))); + assert(!(src_stride % sizeof(uint32_t))); + + for (i = 0; i < height; ++i) { + const uint8x8_t s = vreinterpret_u8_u32( + vld1_dup_u32((const uint32_t *)(src + i * src_stride))); + const uint8x8_t ref01 = load_unaligned_2_buffers(ref[0] + i * ref_stride, + ref[1] + i * ref_stride); + const uint8x8_t ref23 = load_unaligned_2_buffers(ref[2] + i * ref_stride, + ref[3] + i * ref_stride); + abs[0] = vabal_u8(abs[0], s, ref01); + abs[1] = vabal_u8(abs[1], s, ref23); + } + + a[0] = vpadd_u16(vget_low_u16(abs[0]), vget_high_u16(abs[0])); + a[1] = vpadd_u16(vget_low_u16(abs[1]), vget_high_u16(abs[1])); + r = vpaddlq_u16(vcombine_u16(a[0], a[1])); + vst1q_u32(res, r); +} + void vpx_sad4x4x4d_neon(const uint8_t *src, int src_stride, const uint8_t *const ref[4], int ref_stride, uint32_t *res) { - int i; - const uint8x16_t src_u8 = load_unaligned_u8q(src, src_stride); - for (i = 0; i < 4; ++i) { - const uint8x16_t ref_u8 = load_unaligned_u8q(ref[i], ref_stride); - uint16x8_t abs = vabdl_u8(vget_low_u8(src_u8), vget_low_u8(ref_u8)); - abs = vabal_u8(abs, vget_high_u8(src_u8), vget_high_u8(ref_u8)); - res[i] = vget_lane_u32(horizontal_add_uint16x8(abs), 0); - } + sad4x_4d(src, src_stride, ref, ref_stride, 4, res); } void vpx_sad4x8x4d_neon(const uint8_t *src, int src_stride, const uint8_t *const ref[4], int ref_stride, uint32_t *res) { - int i; - const uint8x16_t src_0 = load_unaligned_u8q(src, src_stride); - const uint8x16_t src_1 = load_unaligned_u8q(src + 4 * src_stride, src_stride); - for (i = 0; i < 4; ++i) { - const uint8x16_t ref_0 = load_unaligned_u8q(ref[i], ref_stride); - const uint8x16_t ref_1 = - load_unaligned_u8q(ref[i] + 4 * ref_stride, ref_stride); - uint16x8_t abs = vabdl_u8(vget_low_u8(src_0), vget_low_u8(ref_0)); - abs = vabal_u8(abs, vget_high_u8(src_0), vget_high_u8(ref_0)); - abs = vabal_u8(abs, vget_low_u8(src_1), vget_low_u8(ref_1)); - abs = vabal_u8(abs, vget_high_u8(src_1), vget_high_u8(ref_1)); - res[i] = vget_lane_u32(horizontal_add_uint16x8(abs), 0); - } + sad4x_4d(src, src_stride, ref, ref_stride, 8, res); +} + +//////////////////////////////////////////////////////////////////////////////// + +// Can handle 512 pixels' sad sum (such as 16x32 or 32x16) +static INLINE void sad_512_pel_final_neon(const uint16x8_t *sum /*[4]*/, + uint32_t *const res) { + const uint16x4_t a0 = vadd_u16(vget_low_u16(sum[0]), vget_high_u16(sum[0])); + const uint16x4_t a1 = vadd_u16(vget_low_u16(sum[1]), vget_high_u16(sum[1])); + const uint16x4_t a2 = vadd_u16(vget_low_u16(sum[2]), vget_high_u16(sum[2])); + const uint16x4_t a3 = vadd_u16(vget_low_u16(sum[3]), vget_high_u16(sum[3])); + const uint16x4_t b0 = vpadd_u16(a0, a1); + const uint16x4_t b1 = vpadd_u16(a2, a3); + const uint32x4_t r = vpaddlq_u16(vcombine_u16(b0, b1)); + vst1q_u32(res, r); } -static INLINE void sad8x_4d(const uint8_t *a, int a_stride, - const uint8_t *const b[4], int b_stride, - uint32_t *result, const int height) { +// Can handle 1024 pixels' sad sum (such as 32x32) +static INLINE void sad_1024_pel_final_neon(const uint16x8_t *sum /*[4]*/, + uint32_t *const res) { + const uint16x4_t a0 = vpadd_u16(vget_low_u16(sum[0]), vget_high_u16(sum[0])); + const uint16x4_t a1 = vpadd_u16(vget_low_u16(sum[1]), vget_high_u16(sum[1])); + const uint16x4_t a2 = vpadd_u16(vget_low_u16(sum[2]), vget_high_u16(sum[2])); + const uint16x4_t a3 = vpadd_u16(vget_low_u16(sum[3]), vget_high_u16(sum[3])); + const uint32x4_t b0 = vpaddlq_u16(vcombine_u16(a0, a1)); + const uint32x4_t b1 = vpaddlq_u16(vcombine_u16(a2, a3)); + const uint32x2_t c0 = vpadd_u32(vget_low_u32(b0), vget_high_u32(b0)); + const uint32x2_t c1 = vpadd_u32(vget_low_u32(b1), vget_high_u32(b1)); + vst1q_u32(res, vcombine_u32(c0, c1)); +} + +// Can handle 2048 pixels' sad sum (such as 32x64 or 64x32) +static INLINE void sad_2048_pel_final_neon(const uint16x8_t *sum /*[4]*/, + uint32_t *const res) { + const uint32x4_t a0 = vpaddlq_u16(sum[0]); + const uint32x4_t a1 = vpaddlq_u16(sum[1]); + const uint32x4_t a2 = vpaddlq_u16(sum[2]); + const uint32x4_t a3 = vpaddlq_u16(sum[3]); + const uint32x2_t b0 = vadd_u32(vget_low_u32(a0), vget_high_u32(a0)); + const uint32x2_t b1 = vadd_u32(vget_low_u32(a1), vget_high_u32(a1)); + const uint32x2_t b2 = vadd_u32(vget_low_u32(a2), vget_high_u32(a2)); + const uint32x2_t b3 = vadd_u32(vget_low_u32(a3), vget_high_u32(a3)); + const uint32x2_t c0 = vpadd_u32(b0, b1); + const uint32x2_t c1 = vpadd_u32(b2, b3); + vst1q_u32(res, vcombine_u32(c0, c1)); +} + +// Can handle 4096 pixels' sad sum (such as 64x64) +static INLINE void sad_4096_pel_final_neon(const uint16x8_t *sum /*[8]*/, + uint32_t *const res) { + const uint32x4_t a0 = vpaddlq_u16(sum[0]); + const uint32x4_t a1 = vpaddlq_u16(sum[1]); + const uint32x4_t a2 = vpaddlq_u16(sum[2]); + const uint32x4_t a3 = vpaddlq_u16(sum[3]); + const uint32x4_t a4 = vpaddlq_u16(sum[4]); + const uint32x4_t a5 = vpaddlq_u16(sum[5]); + const uint32x4_t a6 = vpaddlq_u16(sum[6]); + const uint32x4_t a7 = vpaddlq_u16(sum[7]); + const uint32x4_t b0 = vaddq_u32(a0, a1); + const uint32x4_t b1 = vaddq_u32(a2, a3); + const uint32x4_t b2 = vaddq_u32(a4, a5); + const uint32x4_t b3 = vaddq_u32(a6, a7); + const uint32x2_t c0 = vadd_u32(vget_low_u32(b0), vget_high_u32(b0)); + const uint32x2_t c1 = vadd_u32(vget_low_u32(b1), vget_high_u32(b1)); + const uint32x2_t c2 = vadd_u32(vget_low_u32(b2), vget_high_u32(b2)); + const uint32x2_t c3 = vadd_u32(vget_low_u32(b3), vget_high_u32(b3)); + const uint32x2_t d0 = vpadd_u32(c0, c1); + const uint32x2_t d1 = vpadd_u32(c2, c3); + vst1q_u32(res, vcombine_u32(d0, d1)); +} + +static INLINE void sad8x_4d(const uint8_t *src, int src_stride, + const uint8_t *const ref[4], int ref_stride, + uint32_t *res, const int height) { int i, j; + const uint8_t *ref_loop[4] = { ref[0], ref[1], ref[2], ref[3] }; uint16x8_t sum[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0) }; - const uint8_t *b_loop[4] = { b[0], b[1], b[2], b[3] }; for (i = 0; i < height; ++i) { - const uint8x8_t a_u8 = vld1_u8(a); - a += a_stride; + const uint8x8_t s = vld1_u8(src); + src += src_stride; for (j = 0; j < 4; ++j) { - const uint8x8_t b_u8 = vld1_u8(b_loop[j]); - b_loop[j] += b_stride; - sum[j] = vabal_u8(sum[j], a_u8, b_u8); + const uint8x8_t b_u8 = vld1_u8(ref_loop[j]); + ref_loop[j] += ref_stride; + sum[j] = vabal_u8(sum[j], s, b_u8); } } - for (j = 0; j < 4; ++j) { - result[j] = vget_lane_u32(horizontal_add_uint16x8(sum[j]), 0); - } + sad_512_pel_final_neon(sum, res); } void vpx_sad8x4x4d_neon(const uint8_t *src, int src_stride, @@ -88,28 +176,33 @@ void vpx_sad8x16x4d_neon(const uint8_t *src, int src_stride, sad8x_4d(src, src_stride, ref, ref_stride, res, 16); } -static INLINE void sad16x_4d(const uint8_t *a, int a_stride, - const uint8_t *const b[4], int b_stride, - uint32_t *result, const int height) { +//////////////////////////////////////////////////////////////////////////////// + +static INLINE void sad16_neon(const uint8_t *ref, const uint8x16_t src, + uint16x8_t *const sum) { + const uint8x16_t r = vld1q_u8(ref); + *sum = vabal_u8(*sum, vget_low_u8(src), vget_low_u8(r)); + *sum = vabal_u8(*sum, vget_high_u8(src), vget_high_u8(r)); +} + +static INLINE void sad16x_4d(const uint8_t *src, int src_stride, + const uint8_t *const ref[4], int ref_stride, + uint32_t *res, const int height) { int i, j; + const uint8_t *ref_loop[4] = { ref[0], ref[1], ref[2], ref[3] }; uint16x8_t sum[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0) }; - const uint8_t *b_loop[4] = { b[0], b[1], b[2], b[3] }; for (i = 0; i < height; ++i) { - const uint8x16_t a_u8 = vld1q_u8(a); - a += a_stride; + const uint8x16_t s = vld1q_u8(src); + src += src_stride; for (j = 0; j < 4; ++j) { - const uint8x16_t b_u8 = vld1q_u8(b_loop[j]); - b_loop[j] += b_stride; - sum[j] = vabal_u8(sum[j], vget_low_u8(a_u8), vget_low_u8(b_u8)); - sum[j] = vabal_u8(sum[j], vget_high_u8(a_u8), vget_high_u8(b_u8)); + sad16_neon(ref_loop[j], s, &sum[j]); + ref_loop[j] += ref_stride; } } - for (j = 0; j < 4; ++j) { - result[j] = vget_lane_u32(horizontal_add_uint16x8(sum[j]), 0); - } + sad_512_pel_final_neon(sum, res); } void vpx_sad16x8x4d_neon(const uint8_t *src, int src_stride, @@ -130,113 +223,152 @@ void vpx_sad16x32x4d_neon(const uint8_t *src, int src_stride, sad16x_4d(src, src_stride, ref, ref_stride, res, 32); } -static INLINE void sad32x_4d(const uint8_t *a, int a_stride, - const uint8_t *const b[4], int b_stride, - uint32_t *result, const int height) { - int i, j; - uint16x8_t sum[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0), - vdupq_n_u16(0) }; - const uint8_t *b_loop[4] = { b[0], b[1], b[2], b[3] }; +//////////////////////////////////////////////////////////////////////////////// + +static INLINE void sad32x_4d(const uint8_t *src, int src_stride, + const uint8_t *const ref[4], int ref_stride, + const int height, uint16x8_t *const sum) { + int i; + const uint8_t *ref_loop[4] = { ref[0], ref[1], ref[2], ref[3] }; + + sum[0] = sum[1] = sum[2] = sum[3] = vdupq_n_u16(0); for (i = 0; i < height; ++i) { - const uint8x16_t a_0 = vld1q_u8(a); - const uint8x16_t a_1 = vld1q_u8(a + 16); - a += a_stride; - for (j = 0; j < 4; ++j) { - const uint8x16_t b_0 = vld1q_u8(b_loop[j]); - const uint8x16_t b_1 = vld1q_u8(b_loop[j] + 16); - b_loop[j] += b_stride; - sum[j] = vabal_u8(sum[j], vget_low_u8(a_0), vget_low_u8(b_0)); - sum[j] = vabal_u8(sum[j], vget_high_u8(a_0), vget_high_u8(b_0)); - sum[j] = vabal_u8(sum[j], vget_low_u8(a_1), vget_low_u8(b_1)); - sum[j] = vabal_u8(sum[j], vget_high_u8(a_1), vget_high_u8(b_1)); - } - } + uint8x16_t s; - for (j = 0; j < 4; ++j) { - result[j] = vget_lane_u32(horizontal_add_uint16x8(sum[j]), 0); + s = vld1q_u8(src + 0 * 16); + sad16_neon(ref_loop[0] + 0 * 16, s, &sum[0]); + sad16_neon(ref_loop[1] + 0 * 16, s, &sum[1]); + sad16_neon(ref_loop[2] + 0 * 16, s, &sum[2]); + sad16_neon(ref_loop[3] + 0 * 16, s, &sum[3]); + + s = vld1q_u8(src + 1 * 16); + sad16_neon(ref_loop[0] + 1 * 16, s, &sum[0]); + sad16_neon(ref_loop[1] + 1 * 16, s, &sum[1]); + sad16_neon(ref_loop[2] + 1 * 16, s, &sum[2]); + sad16_neon(ref_loop[3] + 1 * 16, s, &sum[3]); + + src += src_stride; + ref_loop[0] += ref_stride; + ref_loop[1] += ref_stride; + ref_loop[2] += ref_stride; + ref_loop[3] += ref_stride; } } void vpx_sad32x16x4d_neon(const uint8_t *src, int src_stride, const uint8_t *const ref[4], int ref_stride, uint32_t *res) { - sad32x_4d(src, src_stride, ref, ref_stride, res, 16); + uint16x8_t sum[4]; + sad32x_4d(src, src_stride, ref, ref_stride, 16, sum); + sad_512_pel_final_neon(sum, res); } void vpx_sad32x32x4d_neon(const uint8_t *src, int src_stride, const uint8_t *const ref[4], int ref_stride, uint32_t *res) { - sad32x_4d(src, src_stride, ref, ref_stride, res, 32); + uint16x8_t sum[4]; + sad32x_4d(src, src_stride, ref, ref_stride, 32, sum); + sad_1024_pel_final_neon(sum, res); } void vpx_sad32x64x4d_neon(const uint8_t *src, int src_stride, const uint8_t *const ref[4], int ref_stride, uint32_t *res) { - sad32x_4d(src, src_stride, ref, ref_stride, res, 64); + uint16x8_t sum[4]; + sad32x_4d(src, src_stride, ref, ref_stride, 64, sum); + sad_2048_pel_final_neon(sum, res); } -static INLINE void sum64x(const uint8x16_t a_0, const uint8x16_t a_1, - const uint8x16_t b_0, const uint8x16_t b_1, - uint16x8_t *sum) { - *sum = vabal_u8(*sum, vget_low_u8(a_0), vget_low_u8(b_0)); - *sum = vabal_u8(*sum, vget_high_u8(a_0), vget_high_u8(b_0)); - *sum = vabal_u8(*sum, vget_low_u8(a_1), vget_low_u8(b_1)); - *sum = vabal_u8(*sum, vget_high_u8(a_1), vget_high_u8(b_1)); -} +//////////////////////////////////////////////////////////////////////////////// -static INLINE void sad64x_4d(const uint8_t *a, int a_stride, - const uint8_t *const b[4], int b_stride, - uint32_t *result, const int height) { +void vpx_sad64x32x4d_neon(const uint8_t *src, int src_stride, + const uint8_t *const ref[4], int ref_stride, + uint32_t *res) { int i; - uint16x8_t sum_0 = vdupq_n_u16(0); - uint16x8_t sum_1 = vdupq_n_u16(0); - uint16x8_t sum_2 = vdupq_n_u16(0); - uint16x8_t sum_3 = vdupq_n_u16(0); - uint16x8_t sum_4 = vdupq_n_u16(0); - uint16x8_t sum_5 = vdupq_n_u16(0); - uint16x8_t sum_6 = vdupq_n_u16(0); - uint16x8_t sum_7 = vdupq_n_u16(0); - const uint8_t *b_loop[4] = { b[0], b[1], b[2], b[3] }; + const uint8_t *ref_loop[4] = { ref[0], ref[1], ref[2], ref[3] }; + uint16x8_t sum[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0), + vdupq_n_u16(0) }; - for (i = 0; i < height; ++i) { - const uint8x16_t a_0 = vld1q_u8(a); - const uint8x16_t a_1 = vld1q_u8(a + 16); - const uint8x16_t a_2 = vld1q_u8(a + 32); - const uint8x16_t a_3 = vld1q_u8(a + 48); - a += a_stride; - sum64x(a_0, a_1, vld1q_u8(b_loop[0]), vld1q_u8(b_loop[0] + 16), &sum_0); - sum64x(a_2, a_3, vld1q_u8(b_loop[0] + 32), vld1q_u8(b_loop[0] + 48), - &sum_1); - b_loop[0] += b_stride; - sum64x(a_0, a_1, vld1q_u8(b_loop[1]), vld1q_u8(b_loop[1] + 16), &sum_2); - sum64x(a_2, a_3, vld1q_u8(b_loop[1] + 32), vld1q_u8(b_loop[1] + 48), - &sum_3); - b_loop[1] += b_stride; - sum64x(a_0, a_1, vld1q_u8(b_loop[2]), vld1q_u8(b_loop[2] + 16), &sum_4); - sum64x(a_2, a_3, vld1q_u8(b_loop[2] + 32), vld1q_u8(b_loop[2] + 48), - &sum_5); - b_loop[2] += b_stride; - sum64x(a_0, a_1, vld1q_u8(b_loop[3]), vld1q_u8(b_loop[3] + 16), &sum_6); - sum64x(a_2, a_3, vld1q_u8(b_loop[3] + 32), vld1q_u8(b_loop[3] + 48), - &sum_7); - b_loop[3] += b_stride; - } + for (i = 0; i < 32; ++i) { + uint8x16_t s; - result[0] = vget_lane_u32(horizontal_add_long_uint16x8(sum_0, sum_1), 0); - result[1] = vget_lane_u32(horizontal_add_long_uint16x8(sum_2, sum_3), 0); - result[2] = vget_lane_u32(horizontal_add_long_uint16x8(sum_4, sum_5), 0); - result[3] = vget_lane_u32(horizontal_add_long_uint16x8(sum_6, sum_7), 0); -} + s = vld1q_u8(src + 0 * 16); + sad16_neon(ref_loop[0] + 0 * 16, s, &sum[0]); + sad16_neon(ref_loop[1] + 0 * 16, s, &sum[1]); + sad16_neon(ref_loop[2] + 0 * 16, s, &sum[2]); + sad16_neon(ref_loop[3] + 0 * 16, s, &sum[3]); -void vpx_sad64x32x4d_neon(const uint8_t *src, int src_stride, - const uint8_t *const ref[4], int ref_stride, - uint32_t *res) { - sad64x_4d(src, src_stride, ref, ref_stride, res, 32); + s = vld1q_u8(src + 1 * 16); + sad16_neon(ref_loop[0] + 1 * 16, s, &sum[0]); + sad16_neon(ref_loop[1] + 1 * 16, s, &sum[1]); + sad16_neon(ref_loop[2] + 1 * 16, s, &sum[2]); + sad16_neon(ref_loop[3] + 1 * 16, s, &sum[3]); + + s = vld1q_u8(src + 2 * 16); + sad16_neon(ref_loop[0] + 2 * 16, s, &sum[0]); + sad16_neon(ref_loop[1] + 2 * 16, s, &sum[1]); + sad16_neon(ref_loop[2] + 2 * 16, s, &sum[2]); + sad16_neon(ref_loop[3] + 2 * 16, s, &sum[3]); + + s = vld1q_u8(src + 3 * 16); + sad16_neon(ref_loop[0] + 3 * 16, s, &sum[0]); + sad16_neon(ref_loop[1] + 3 * 16, s, &sum[1]); + sad16_neon(ref_loop[2] + 3 * 16, s, &sum[2]); + sad16_neon(ref_loop[3] + 3 * 16, s, &sum[3]); + + src += src_stride; + ref_loop[0] += ref_stride; + ref_loop[1] += ref_stride; + ref_loop[2] += ref_stride; + ref_loop[3] += ref_stride; + } + + sad_2048_pel_final_neon(sum, res); } void vpx_sad64x64x4d_neon(const uint8_t *src, int src_stride, const uint8_t *const ref[4], int ref_stride, uint32_t *res) { - sad64x_4d(src, src_stride, ref, ref_stride, res, 64); + int i; + const uint8_t *ref_loop[4] = { ref[0], ref[1], ref[2], ref[3] }; + uint16x8_t sum[8] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0), + vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0), + vdupq_n_u16(0), vdupq_n_u16(0) }; + + for (i = 0; i < 64; ++i) { + uint8x16_t s; + + s = vld1q_u8(src + 0 * 16); + sad16_neon(ref_loop[0] + 0 * 16, s, &sum[0]); + sad16_neon(ref_loop[1] + 0 * 16, s, &sum[2]); + sad16_neon(ref_loop[2] + 0 * 16, s, &sum[4]); + sad16_neon(ref_loop[3] + 0 * 16, s, &sum[6]); + + s = vld1q_u8(src + 1 * 16); + sad16_neon(ref_loop[0] + 1 * 16, s, &sum[0]); + sad16_neon(ref_loop[1] + 1 * 16, s, &sum[2]); + sad16_neon(ref_loop[2] + 1 * 16, s, &sum[4]); + sad16_neon(ref_loop[3] + 1 * 16, s, &sum[6]); + + s = vld1q_u8(src + 2 * 16); + sad16_neon(ref_loop[0] + 2 * 16, s, &sum[1]); + sad16_neon(ref_loop[1] + 2 * 16, s, &sum[3]); + sad16_neon(ref_loop[2] + 2 * 16, s, &sum[5]); + sad16_neon(ref_loop[3] + 2 * 16, s, &sum[7]); + + s = vld1q_u8(src + 3 * 16); + sad16_neon(ref_loop[0] + 3 * 16, s, &sum[1]); + sad16_neon(ref_loop[1] + 3 * 16, s, &sum[3]); + sad16_neon(ref_loop[2] + 3 * 16, s, &sum[5]); + sad16_neon(ref_loop[3] + 3 * 16, s, &sum[7]); + + src += src_stride; + ref_loop[0] += ref_stride; + ref_loop[1] += ref_stride; + ref_loop[2] += ref_stride; + ref_loop[3] += ref_stride; + } + + sad_4096_pel_final_neon(sum, res); } diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/subtract_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/subtract_neon.c index ce81fb630f2..eef123368d0 100644 --- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/subtract_neon.c +++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/subtract_neon.c @@ -9,71 +9,72 @@ */ #include <arm_neon.h> +#include <assert.h> #include "./vpx_config.h" #include "vpx/vpx_integer.h" +#include "vpx_dsp/arm/mem_neon.h" void vpx_subtract_block_neon(int rows, int cols, int16_t *diff, ptrdiff_t diff_stride, const uint8_t *src, ptrdiff_t src_stride, const uint8_t *pred, ptrdiff_t pred_stride) { - int r, c; + int r = rows, c; if (cols > 16) { - for (r = 0; r < rows; ++r) { + do { for (c = 0; c < cols; c += 32) { - const uint8x16_t v_src_00 = vld1q_u8(&src[c + 0]); - const uint8x16_t v_src_16 = vld1q_u8(&src[c + 16]); - const uint8x16_t v_pred_00 = vld1q_u8(&pred[c + 0]); - const uint8x16_t v_pred_16 = vld1q_u8(&pred[c + 16]); - const uint16x8_t v_diff_lo_00 = - vsubl_u8(vget_low_u8(v_src_00), vget_low_u8(v_pred_00)); - const uint16x8_t v_diff_hi_00 = - vsubl_u8(vget_high_u8(v_src_00), vget_high_u8(v_pred_00)); - const uint16x8_t v_diff_lo_16 = - vsubl_u8(vget_low_u8(v_src_16), vget_low_u8(v_pred_16)); - const uint16x8_t v_diff_hi_16 = - vsubl_u8(vget_high_u8(v_src_16), vget_high_u8(v_pred_16)); - vst1q_s16(&diff[c + 0], vreinterpretq_s16_u16(v_diff_lo_00)); - vst1q_s16(&diff[c + 8], vreinterpretq_s16_u16(v_diff_hi_00)); - vst1q_s16(&diff[c + 16], vreinterpretq_s16_u16(v_diff_lo_16)); - vst1q_s16(&diff[c + 24], vreinterpretq_s16_u16(v_diff_hi_16)); + const uint8x16_t s0 = vld1q_u8(&src[c + 0]); + const uint8x16_t s1 = vld1q_u8(&src[c + 16]); + const uint8x16_t p0 = vld1q_u8(&pred[c + 0]); + const uint8x16_t p1 = vld1q_u8(&pred[c + 16]); + const uint16x8_t d0 = vsubl_u8(vget_low_u8(s0), vget_low_u8(p0)); + const uint16x8_t d1 = vsubl_u8(vget_high_u8(s0), vget_high_u8(p0)); + const uint16x8_t d2 = vsubl_u8(vget_low_u8(s1), vget_low_u8(p1)); + const uint16x8_t d3 = vsubl_u8(vget_high_u8(s1), vget_high_u8(p1)); + vst1q_s16(&diff[c + 0], vreinterpretq_s16_u16(d0)); + vst1q_s16(&diff[c + 8], vreinterpretq_s16_u16(d1)); + vst1q_s16(&diff[c + 16], vreinterpretq_s16_u16(d2)); + vst1q_s16(&diff[c + 24], vreinterpretq_s16_u16(d3)); } diff += diff_stride; pred += pred_stride; src += src_stride; - } + } while (--r); } else if (cols > 8) { - for (r = 0; r < rows; ++r) { - const uint8x16_t v_src = vld1q_u8(&src[0]); - const uint8x16_t v_pred = vld1q_u8(&pred[0]); - const uint16x8_t v_diff_lo = - vsubl_u8(vget_low_u8(v_src), vget_low_u8(v_pred)); - const uint16x8_t v_diff_hi = - vsubl_u8(vget_high_u8(v_src), vget_high_u8(v_pred)); - vst1q_s16(&diff[0], vreinterpretq_s16_u16(v_diff_lo)); - vst1q_s16(&diff[8], vreinterpretq_s16_u16(v_diff_hi)); + do { + const uint8x16_t s = vld1q_u8(&src[0]); + const uint8x16_t p = vld1q_u8(&pred[0]); + const uint16x8_t d0 = vsubl_u8(vget_low_u8(s), vget_low_u8(p)); + const uint16x8_t d1 = vsubl_u8(vget_high_u8(s), vget_high_u8(p)); + vst1q_s16(&diff[0], vreinterpretq_s16_u16(d0)); + vst1q_s16(&diff[8], vreinterpretq_s16_u16(d1)); diff += diff_stride; pred += pred_stride; src += src_stride; - } + } while (--r); } else if (cols > 4) { - for (r = 0; r < rows; ++r) { - const uint8x8_t v_src = vld1_u8(&src[0]); - const uint8x8_t v_pred = vld1_u8(&pred[0]); - const uint16x8_t v_diff = vsubl_u8(v_src, v_pred); + do { + const uint8x8_t s = vld1_u8(&src[0]); + const uint8x8_t p = vld1_u8(&pred[0]); + const uint16x8_t v_diff = vsubl_u8(s, p); vst1q_s16(&diff[0], vreinterpretq_s16_u16(v_diff)); diff += diff_stride; pred += pred_stride; src += src_stride; - } + } while (--r); } else { - for (r = 0; r < rows; ++r) { - for (c = 0; c < cols; ++c) diff[c] = src[c] - pred[c]; - - diff += diff_stride; - pred += pred_stride; - src += src_stride; - } + assert(cols == 4); + do { + const uint8x8_t s = load_unaligned_u8(src, (int)src_stride); + const uint8x8_t p = load_unaligned_u8(pred, (int)pred_stride); + const uint16x8_t d = vsubl_u8(s, p); + vst1_s16(diff + 0 * diff_stride, vreinterpret_s16_u16(vget_low_u16(d))); + vst1_s16(diff + 1 * diff_stride, vreinterpret_s16_u16(vget_high_u16(d))); + diff += 2 * diff_stride; + pred += 2 * pred_stride; + src += 2 * src_stride; + r -= 2; + } while (r); } } diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/sum_neon.h b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/sum_neon.h index d74fe0cde42..c09841223c8 100644 --- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/sum_neon.h +++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/sum_neon.h @@ -30,15 +30,6 @@ static INLINE uint32x2_t horizontal_add_uint16x8(const uint16x8_t a) { vreinterpret_u32_u64(vget_high_u64(c))); } -static INLINE uint32x2_t horizontal_add_long_uint16x8(const uint16x8_t a, - const uint16x8_t b) { - const uint32x4_t c = vpaddlq_u16(a); - const uint32x4_t d = vpadalq_u16(c, b); - const uint64x2_t e = vpaddlq_u32(d); - return vadd_u32(vreinterpret_u32_u64(vget_low_u64(e)), - vreinterpret_u32_u64(vget_high_u64(e))); -} - static INLINE uint32x2_t horizontal_add_uint32x4(const uint32x4_t a) { const uint64x2_t b = vpaddlq_u32(a); return vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)), diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/sum_squares_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/sum_squares_neon.c new file mode 100644 index 00000000000..8942ba83bc2 --- /dev/null +++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/sum_squares_neon.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <arm_neon.h> + +#include <assert.h> +#include "./vpx_dsp_rtcd.h" + +uint64_t vpx_sum_squares_2d_i16_neon(const int16_t *src, int stride, int size) { + int64x1_t s2; + + if (size == 4) { + int16x4_t s[4]; + int32x4_t s0; + uint32x2_t s1; + + s[0] = vld1_s16(src + 0 * stride); + s[1] = vld1_s16(src + 1 * stride); + s[2] = vld1_s16(src + 2 * stride); + s[3] = vld1_s16(src + 3 * stride); + s0 = vmull_s16(s[0], s[0]); + s0 = vmlal_s16(s0, s[1], s[1]); + s0 = vmlal_s16(s0, s[2], s[2]); + s0 = vmlal_s16(s0, s[3], s[3]); + s1 = vpadd_u32(vget_low_u32(vreinterpretq_u32_s32(s0)), + vget_high_u32(vreinterpretq_u32_s32(s0))); + s2 = vpaddl_u32(s1); + } else { + int r = size; + uint64x2_t s1 = vdupq_n_u64(0); + + do { + int c = size; + int32x4_t s0 = vdupq_n_s32(0); + const int16_t *src_t = src; + + do { + int16x8_t s[8]; + + s[0] = vld1q_s16(src_t + 0 * stride); + s[1] = vld1q_s16(src_t + 1 * stride); + s[2] = vld1q_s16(src_t + 2 * stride); + s[3] = vld1q_s16(src_t + 3 * stride); + s[4] = vld1q_s16(src_t + 4 * stride); + s[5] = vld1q_s16(src_t + 5 * stride); + s[6] = vld1q_s16(src_t + 6 * stride); + s[7] = vld1q_s16(src_t + 7 * stride); + s0 = vmlal_s16(s0, vget_low_s16(s[0]), vget_low_s16(s[0])); + s0 = vmlal_s16(s0, vget_low_s16(s[1]), vget_low_s16(s[1])); + s0 = vmlal_s16(s0, vget_low_s16(s[2]), vget_low_s16(s[2])); + s0 = vmlal_s16(s0, vget_low_s16(s[3]), vget_low_s16(s[3])); + s0 = vmlal_s16(s0, vget_low_s16(s[4]), vget_low_s16(s[4])); + s0 = vmlal_s16(s0, vget_low_s16(s[5]), vget_low_s16(s[5])); + s0 = vmlal_s16(s0, vget_low_s16(s[6]), vget_low_s16(s[6])); + s0 = vmlal_s16(s0, vget_low_s16(s[7]), vget_low_s16(s[7])); + s0 = vmlal_s16(s0, vget_high_s16(s[0]), vget_high_s16(s[0])); + s0 = vmlal_s16(s0, vget_high_s16(s[1]), vget_high_s16(s[1])); + s0 = vmlal_s16(s0, vget_high_s16(s[2]), vget_high_s16(s[2])); + s0 = vmlal_s16(s0, vget_high_s16(s[3]), vget_high_s16(s[3])); + s0 = vmlal_s16(s0, vget_high_s16(s[4]), vget_high_s16(s[4])); + s0 = vmlal_s16(s0, vget_high_s16(s[5]), vget_high_s16(s[5])); + s0 = vmlal_s16(s0, vget_high_s16(s[6]), vget_high_s16(s[6])); + s0 = vmlal_s16(s0, vget_high_s16(s[7]), vget_high_s16(s[7])); + src_t += 8; + c -= 8; + } while (c); + + s1 = vaddw_u32(s1, vget_low_u32(vreinterpretq_u32_s32(s0))); + s1 = vaddw_u32(s1, vget_high_u32(vreinterpretq_u32_s32(s0))); + src += 8 * stride; + r -= 8; + } while (r); + + s2 = vadd_u64(vget_low_u64(s1), vget_high_u64(s1)); + } + + return vget_lane_u64(s2, 0); +} diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/vpx_convolve8_mmi.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/vpx_convolve8_mmi.c index 0cfb81e4df1..ba9ceb86658 100644 --- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/vpx_convolve8_mmi.c +++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/vpx_convolve8_mmi.c @@ -254,6 +254,89 @@ static void convolve_vert_mmi(const uint8_t *src, ptrdiff_t src_stride, ); } +static void convolve_avg_horiz_mmi(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const InterpKernel *filter, int x0_q4, + int x_step_q4, int32_t w, int32_t h) { + const int16_t *filter_x = filter[x0_q4]; + double ftmp[14]; + uint32_t tmp[2]; + uint32_t para[2]; + para[0] = (1 << ((FILTER_BITS)-1)); + para[1] = FILTER_BITS; + src -= SUBPEL_TAPS / 2 - 1; + src_stride -= w; + dst_stride -= w; + (void)x_step_q4; + + __asm__ volatile( + "move %[tmp1], %[width] \n\t" + "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" + "gsldlc1 %[filter1], 0x03(%[filter]) \n\t" + "gsldrc1 %[filter1], 0x00(%[filter]) \n\t" + "gsldlc1 %[filter2], 0x0b(%[filter]) \n\t" + "gsldrc1 %[filter2], 0x08(%[filter]) \n\t" + "1: \n\t" + /* Get 8 data per row */ + "gsldlc1 %[ftmp5], 0x07(%[src]) \n\t" + "gsldrc1 %[ftmp5], 0x00(%[src]) \n\t" + "gsldlc1 %[ftmp7], 0x08(%[src]) \n\t" + "gsldrc1 %[ftmp7], 0x01(%[src]) \n\t" + "gsldlc1 %[ftmp9], 0x09(%[src]) \n\t" + "gsldrc1 %[ftmp9], 0x02(%[src]) \n\t" + "gsldlc1 %[ftmp11], 0x0A(%[src]) \n\t" + "gsldrc1 %[ftmp11], 0x03(%[src]) \n\t" + "punpcklbh %[ftmp4], %[ftmp5], %[ftmp0] \n\t" + "punpckhbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" + "punpcklbh %[ftmp6], %[ftmp7], %[ftmp0] \n\t" + "punpckhbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t" + "punpcklbh %[ftmp8], %[ftmp9], %[ftmp0] \n\t" + "punpckhbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t" + "punpcklbh %[ftmp10], %[ftmp11], %[ftmp0] \n\t" + "punpckhbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t" + MMI_ADDIU(%[width], %[width], -0x04) + /* Get raw data */ + GET_DATA_H_MMI + ROUND_POWER_OF_TWO_MMI + CLIP_PIXEL_MMI + "punpcklbh %[ftmp12], %[ftmp12], %[ftmp0] \n\t" + "gsldlc1 %[ftmp4], 0x07(%[dst]) \n\t" + "gsldrc1 %[ftmp4], 0x00(%[dst]) \n\t" + "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" + "paddh %[ftmp12], %[ftmp12], %[ftmp4] \n\t" + "li %[tmp0], 0x10001 \n\t" + MMI_MTC1(%[tmp0], %[ftmp5]) + "punpcklhw %[ftmp5], %[ftmp5], %[ftmp5] \n\t" + "paddh %[ftmp12], %[ftmp12], %[ftmp5] \n\t" + "psrah %[ftmp12], %[ftmp12], %[ftmp5] \n\t" + "packushb %[ftmp12], %[ftmp12], %[ftmp0] \n\t" + "swc1 %[ftmp12], 0x00(%[dst]) \n\t" + MMI_ADDIU(%[dst], %[dst], 0x04) + MMI_ADDIU(%[src], %[src], 0x04) + /* Loop count */ + "bnez %[width], 1b \n\t" + "move %[width], %[tmp1] \n\t" + MMI_ADDU(%[src], %[src], %[src_stride]) + MMI_ADDU(%[dst], %[dst], %[dst_stride]) + MMI_ADDIU(%[height], %[height], -0x01) + "bnez %[height], 1b \n\t" + : [srcl]"=&f"(ftmp[0]), [srch]"=&f"(ftmp[1]), + [filter1]"=&f"(ftmp[2]), [filter2]"=&f"(ftmp[3]), + [ftmp0]"=&f"(ftmp[4]), [ftmp4]"=&f"(ftmp[5]), + [ftmp5]"=&f"(ftmp[6]), [ftmp6]"=&f"(ftmp[7]), + [ftmp7]"=&f"(ftmp[8]), [ftmp8]"=&f"(ftmp[9]), + [ftmp9]"=&f"(ftmp[10]), [ftmp10]"=&f"(ftmp[11]), + [ftmp11]"=&f"(ftmp[12]), [ftmp12]"=&f"(ftmp[13]), + [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]), + [src]"+&r"(src), [width]"+&r"(w), + [dst]"+&r"(dst), [height]"+&r"(h) + : [filter]"r"(filter_x), [para]"r"(para), + [src_stride]"r"((mips_reg)src_stride), + [dst_stride]"r"((mips_reg)dst_stride) + : "memory" + ); +} + static void convolve_avg_vert_mmi(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int y0_q4, @@ -362,52 +445,63 @@ void vpx_convolve_avg_mmi(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { - double ftmp[4]; - uint32_t tmp[2]; - src_stride -= w; - dst_stride -= w; + int x, y; + (void)filter; (void)x0_q4; (void)x_step_q4; (void)y0_q4; (void)y_step_q4; - __asm__ volatile( - "move %[tmp1], %[width] \n\t" - "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" - "li %[tmp0], 0x10001 \n\t" - MMI_MTC1(%[tmp0], %[ftmp3]) - "punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" - "1: \n\t" - "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" - "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" - "gsldlc1 %[ftmp2], 0x07(%[dst]) \n\t" - "gsldrc1 %[ftmp2], 0x00(%[dst]) \n\t" - "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" - "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" - "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t" - "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" - "psrah %[ftmp1], %[ftmp1], %[ftmp3] \n\t" - "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" - "swc1 %[ftmp1], 0x00(%[dst]) \n\t" - MMI_ADDIU(%[width], %[width], -0x04) - MMI_ADDIU(%[dst], %[dst], 0x04) - MMI_ADDIU(%[src], %[src], 0x04) - "bnez %[width], 1b \n\t" - "move %[width], %[tmp1] \n\t" - MMI_ADDU(%[dst], %[dst], %[dst_stride]) - MMI_ADDU(%[src], %[src], %[src_stride]) - MMI_ADDIU(%[height], %[height], -0x01) - "bnez %[height], 1b \n\t" - : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), - [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), - [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]), - [src]"+&r"(src), [dst]"+&r"(dst), - [width]"+&r"(w), [height]"+&r"(h) - : [src_stride]"r"((mips_reg)src_stride), - [dst_stride]"r"((mips_reg)dst_stride) - : "memory" - ); + if (w & 0x03) { + for (y = 0; y < h; ++y) { + for (x = 0; x < w; ++x) dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); + src += src_stride; + dst += dst_stride; + } + } else { + double ftmp[4]; + uint32_t tmp[2]; + src_stride -= w; + dst_stride -= w; + + __asm__ volatile( + "move %[tmp1], %[width] \n\t" + "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" + "li %[tmp0], 0x10001 \n\t" + MMI_MTC1(%[tmp0], %[ftmp3]) + "punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" + "1: \n\t" + "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" + "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" + "gsldlc1 %[ftmp2], 0x07(%[dst]) \n\t" + "gsldrc1 %[ftmp2], 0x00(%[dst]) \n\t" + "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" + "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" + "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t" + "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" + "psrah %[ftmp1], %[ftmp1], %[ftmp3] \n\t" + "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" + "swc1 %[ftmp1], 0x00(%[dst]) \n\t" + MMI_ADDIU(%[width], %[width], -0x04) + MMI_ADDIU(%[dst], %[dst], 0x04) + MMI_ADDIU(%[src], %[src], 0x04) + "bnez %[width], 1b \n\t" + "move %[width], %[tmp1] \n\t" + MMI_ADDU(%[dst], %[dst], %[dst_stride]) + MMI_ADDU(%[src], %[src], %[src_stride]) + MMI_ADDIU(%[height], %[height], -0x01) + "bnez %[height], 1b \n\t" + : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), + [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), + [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]), + [src]"+&r"(src), [dst]"+&r"(dst), + [width]"+&r"(w), [height]"+&r"(h) + : [src_stride]"r"((mips_reg)src_stride), + [dst_stride]"r"((mips_reg)dst_stride) + : "memory" + ); + } } static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride, @@ -481,6 +575,29 @@ static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride, } } +static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const InterpKernel *x_filters, int x0_q4, + int x_step_q4, int w, int h) { + int x, y; + src -= SUBPEL_TAPS / 2 - 1; + + for (y = 0; y < h; ++y) { + int x_q4 = x0_q4; + for (x = 0; x < w; ++x) { + const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; + const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; + int k, sum = 0; + for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k]; + dst[x] = ROUND_POWER_OF_TWO( + dst[x] + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); + x_q4 += x_step_q4; + } + src += src_stride; + dst += dst_stride; + } +} + void vpx_convolve8_mmi(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int32_t x_step_q4, int y0_q4, @@ -553,6 +670,21 @@ void vpx_convolve8_vert_mmi(const uint8_t *src, ptrdiff_t src_stride, y_step_q4, w, h); } +void vpx_convolve8_avg_horiz_mmi(const uint8_t *src, ptrdiff_t src_stride, + uint8_t *dst, ptrdiff_t dst_stride, + const InterpKernel *filter, int x0_q4, + int32_t x_step_q4, int y0_q4, int y_step_q4, + int w, int h) { + (void)y0_q4; + (void)y_step_q4; + if (w & 0x03) + convolve_avg_horiz(src, src_stride, dst, dst_stride, filter, x0_q4, + x_step_q4, w, h); + else + convolve_avg_horiz_mmi(src, src_stride, dst, dst_stride, filter, x0_q4, + x_step_q4, w, h); +} + void vpx_convolve8_avg_vert_mmi(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, @@ -580,8 +712,5 @@ void vpx_convolve8_avg_mmi(const uint8_t *src, ptrdiff_t src_stride, vpx_convolve8_mmi(src, src_stride, temp, 64, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); - if (w & 0x03) - vpx_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, 0, 0, 0, w, h); - else - vpx_convolve_avg_mmi(temp, 64, dst, dst_stride, NULL, 0, 0, 0, 0, w, h); + vpx_convolve_avg_mmi(temp, 64, dst, dst_stride, NULL, 0, 0, 0, 0, w, h); } diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/inv_txfm_vsx.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/inv_txfm_vsx.c index f095cb0a481..6603b85acba 100644 --- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/inv_txfm_vsx.c +++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/inv_txfm_vsx.c @@ -76,6 +76,8 @@ static int16x8_t cospi29_v = { 2404, 2404, 2404, 2404, 2404, 2404, 2404, 2404 }; static int16x8_t cospi30_v = { 1606, 1606, 1606, 1606, 1606, 1606, 1606, 1606 }; static int16x8_t cospi31_v = { 804, 804, 804, 804, 804, 804, 804, 804 }; +static uint8x16_t mask1 = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 }; #define ROUND_SHIFT_INIT \ const int32x4_t shift = vec_sl(vec_splat_s32(1), vec_splat_u32(13)); \ const uint32x4_t shift14 = vec_splat_u32(14); @@ -107,6 +109,15 @@ static int16x8_t cospi31_v = { 804, 804, 804, 804, 804, 804, 804, 804 }; out1 = vec_sub(step0, step1); \ out1 = vec_perm(out1, out1, mask0); +#define PACK_STORE(v0, v1) \ + tmp16_0 = vec_add(vec_perm(d_u0, d_u1, mask1), v0); \ + tmp16_1 = vec_add(vec_perm(d_u2, d_u3, mask1), v1); \ + output_v = vec_packsu(tmp16_0, tmp16_1); \ + \ + vec_vsx_st(output_v, 0, tmp_dest); \ + for (i = 0; i < 4; i++) \ + for (j = 0; j < 4; j++) dest[j * stride + i] = tmp_dest[j * 4 + i]; + void vpx_idct4x4_16_add_vsx(const tran_low_t *input, uint8_t *dest, int stride) { int i, j; @@ -114,13 +125,10 @@ void vpx_idct4x4_16_add_vsx(const tran_low_t *input, uint8_t *dest, int16x8_t step0, step1, tmp16_0, tmp16_1, t_out0, t_out1; uint8x16_t mask0 = { 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; - uint8x16_t mask1 = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 }; int16x8_t v0 = load_tran_low(0, input); int16x8_t v1 = load_tran_low(8 * sizeof(*input), input); int16x8_t t0 = vec_mergeh(v0, v1); int16x8_t t1 = vec_mergel(v0, v1); - uint8x16_t dest0 = vec_vsx_ld(0, dest); uint8x16_t dest1 = vec_vsx_ld(stride, dest); uint8x16_t dest2 = vec_vsx_ld(2 * stride, dest); @@ -130,6 +138,7 @@ void vpx_idct4x4_16_add_vsx(const tran_low_t *input, uint8_t *dest, int16x8_t d_u1 = (int16x8_t)vec_mergeh(dest1, zerov); int16x8_t d_u2 = (int16x8_t)vec_mergeh(dest2, zerov); int16x8_t d_u3 = (int16x8_t)vec_mergeh(dest3, zerov); + uint8x16_t output_v; uint8_t tmp_dest[16]; ROUND_SHIFT_INIT @@ -148,13 +157,8 @@ void vpx_idct4x4_16_add_vsx(const tran_low_t *input, uint8_t *dest, PIXEL_ADD4(v0, t_out0); PIXEL_ADD4(v1, t_out1); - tmp16_0 = vec_add(vec_perm(d_u0, d_u1, mask1), v0); - tmp16_1 = vec_add(vec_perm(d_u2, d_u3, mask1), v1); - output_v = vec_packsu(tmp16_0, tmp16_1); - vec_vsx_st(output_v, 0, tmp_dest); - for (i = 0; i < 4; i++) - for (j = 0; j < 4; j++) dest[j * stride + i] = tmp_dest[j * 4 + i]; + PACK_STORE(v0, v1); } #define TRANSPOSE8x8(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, \ @@ -1062,3 +1066,67 @@ void vpx_idct32x32_1024_add_vsx(const tran_low_t *input, uint8_t *dest, ADD_STORE_BLOCK(src2, 16); ADD_STORE_BLOCK(src3, 24); } + +#define TRANSFORM_COLS \ + v32_a = vec_add(v32_a, v32_c); \ + v32_d = vec_sub(v32_d, v32_b); \ + v32_e = vec_sub(v32_a, v32_d); \ + v32_e = vec_sra(v32_e, one); \ + v32_b = vec_sub(v32_e, v32_b); \ + v32_c = vec_sub(v32_e, v32_c); \ + v32_a = vec_sub(v32_a, v32_b); \ + v32_d = vec_add(v32_d, v32_c); \ + v_a = vec_packs(v32_a, v32_b); \ + v_c = vec_packs(v32_c, v32_d); + +#define TRANSPOSE_WHT \ + tmp_a = vec_mergeh(v_a, v_c); \ + tmp_c = vec_mergel(v_a, v_c); \ + v_a = vec_mergeh(tmp_a, tmp_c); \ + v_c = vec_mergel(tmp_a, tmp_c); + +void vpx_iwht4x4_16_add_vsx(const tran_low_t *input, uint8_t *dest, + int stride) { + int16x8_t v_a = load_tran_low(0, input); + int16x8_t v_c = load_tran_low(8 * sizeof(*input), input); + int16x8_t tmp_a, tmp_c; + uint16x8_t two = vec_splat_u16(2); + uint32x4_t one = vec_splat_u32(1); + int16x8_t tmp16_0, tmp16_1; + int32x4_t v32_a, v32_c, v32_d, v32_b, v32_e; + uint8x16_t dest0 = vec_vsx_ld(0, dest); + uint8x16_t dest1 = vec_vsx_ld(stride, dest); + uint8x16_t dest2 = vec_vsx_ld(2 * stride, dest); + uint8x16_t dest3 = vec_vsx_ld(3 * stride, dest); + int16x8_t d_u0 = (int16x8_t)unpack_to_u16_h(dest0); + int16x8_t d_u1 = (int16x8_t)unpack_to_u16_h(dest1); + int16x8_t d_u2 = (int16x8_t)unpack_to_u16_h(dest2); + int16x8_t d_u3 = (int16x8_t)unpack_to_u16_h(dest3); + uint8x16_t output_v; + uint8_t tmp_dest[16]; + int i, j; + + v_a = vec_sra(v_a, two); + v_c = vec_sra(v_c, two); + + TRANSPOSE_WHT; + + v32_a = vec_unpackh(v_a); + v32_c = vec_unpackl(v_a); + + v32_d = vec_unpackh(v_c); + v32_b = vec_unpackl(v_c); + + TRANSFORM_COLS; + + TRANSPOSE_WHT; + + v32_a = vec_unpackh(v_a); + v32_c = vec_unpackl(v_a); + v32_d = vec_unpackh(v_c); + v32_b = vec_unpackl(v_c); + + TRANSFORM_COLS; + + PACK_STORE(v_a, v_c); +} diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/quantize_vsx.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/quantize_vsx.c new file mode 100644 index 00000000000..3a9092f64a0 --- /dev/null +++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/quantize_vsx.c @@ -0,0 +1,307 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <assert.h> + +#include "./vpx_dsp_rtcd.h" +#include "vpx_dsp/ppc/types_vsx.h" + +// Negate 16-bit integers in a when the corresponding signed 16-bit +// integer in b is negative. +static INLINE int16x8_t vec_sign(int16x8_t a, int16x8_t b) { + const int16x8_t mask = vec_sra(b, vec_shift_sign_s16); + return vec_xor(vec_add(a, mask), mask); +} + +// Sets the value of a 32-bit integers to 1 when the corresponding value in a is +// negative. +static INLINE int32x4_t vec_is_neg(int32x4_t a) { + return vec_sr(a, vec_shift_sign_s32); +} + +// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit +// integers, and return the high 16 bits of the intermediate integers. +// (a * b) >> 16 +static INLINE int16x8_t vec_mulhi(int16x8_t a, int16x8_t b) { + // madds does ((A * B) >>15) + C, we need >> 16, so we perform an extra right + // shift. + return vec_sra(vec_madds(a, b, vec_zeros_s16), vec_ones_u16); +} + +// Quantization function used for 4x4, 8x8 and 16x16 blocks. +static INLINE int16x8_t quantize_coeff(int16x8_t coeff, int16x8_t coeff_abs, + int16x8_t round, int16x8_t quant, + int16x8_t quant_shift, bool16x8_t mask) { + const int16x8_t rounded = vec_vaddshs(coeff_abs, round); + int16x8_t qcoeff = vec_mulhi(rounded, quant); + qcoeff = vec_add(qcoeff, rounded); + qcoeff = vec_mulhi(qcoeff, quant_shift); + qcoeff = vec_sign(qcoeff, coeff); + return vec_and(qcoeff, mask); +} + +// Quantization function used for 32x32 blocks. +static INLINE int16x8_t quantize_coeff_32(int16x8_t coeff, int16x8_t coeff_abs, + int16x8_t round, int16x8_t quant, + int16x8_t quant_shift, + bool16x8_t mask) { + const int16x8_t rounded = vec_vaddshs(coeff_abs, round); + int16x8_t qcoeff = vec_mulhi(rounded, quant); + qcoeff = vec_add(qcoeff, rounded); + // 32x32 blocks require an extra multiplication by 2, this compensates for the + // extra right shift added in vec_mulhi, as such vec_madds can be used + // directly instead of vec_mulhi (((a * b) >> 15) >> 1) << 1 == (a * b >> 15) + qcoeff = vec_madds(qcoeff, quant_shift, vec_zeros_s16); + qcoeff = vec_sign(qcoeff, coeff); + return vec_and(qcoeff, mask); +} + +// DeQuantization function used for 32x32 blocks. Quantized coeff of 32x32 +// blocks are twice as big as for other block sizes. As such, using +// vec_mladd results in overflow. +static INLINE int16x8_t dequantize_coeff_32(int16x8_t qcoeff, + int16x8_t dequant) { + int16x8_t dqcoeff; + int32x4_t dqcoeffe = vec_mule(qcoeff, dequant); + int32x4_t dqcoeffo = vec_mulo(qcoeff, dequant); + // Add 1 if negative to round towards zero because the C uses division. + dqcoeffe = vec_add(dqcoeffe, vec_is_neg(dqcoeffe)); + dqcoeffo = vec_add(dqcoeffo, vec_is_neg(dqcoeffo)); + dqcoeffe = vec_sra(dqcoeffe, vec_ones_u32); + dqcoeffo = vec_sra(dqcoeffo, vec_ones_u32); + dqcoeff = vec_pack(dqcoeffe, dqcoeffo); + return vec_perm(dqcoeff, dqcoeff, vec_perm_merge); +} + +static INLINE int16x8_t nonzero_scanindex(int16x8_t qcoeff, bool16x8_t mask, + const int16_t *iscan_ptr, int index) { + int16x8_t scan = vec_vsx_ld(index, iscan_ptr); + bool16x8_t zero_coeff = vec_cmpeq(qcoeff, vec_zeros_s16); + scan = vec_sub(scan, mask); + return vec_andc(scan, zero_coeff); +} + +// Compare packed 16-bit integers across a, and return the maximum value in +// every element. Returns a vector containing the biggest value across vector a. +static INLINE int16x8_t vec_max_across(int16x8_t a) { + a = vec_max(a, vec_perm(a, a, vec_perm64)); + a = vec_max(a, vec_perm(a, a, vec_perm32)); + return vec_max(a, vec_perm(a, a, vec_perm16)); +} + +void vpx_quantize_b_vsx(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *zbin_ptr, + const int16_t *round_ptr, const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, + uint16_t *eob_ptr, const int16_t *scan_ptr, + const int16_t *iscan_ptr) { + int16x8_t qcoeff0, qcoeff1, dqcoeff0, dqcoeff1, eob; + bool16x8_t zero_mask0, zero_mask1; + + // First set of 8 coeff starts with DC + 7 AC + int16x8_t zbin = vec_vsx_ld(0, zbin_ptr); + int16x8_t round = vec_vsx_ld(0, round_ptr); + int16x8_t quant = vec_vsx_ld(0, quant_ptr); + int16x8_t dequant = vec_vsx_ld(0, dequant_ptr); + int16x8_t quant_shift = vec_vsx_ld(0, quant_shift_ptr); + + int16x8_t coeff0 = vec_vsx_ld(0, coeff_ptr); + int16x8_t coeff1 = vec_vsx_ld(16, coeff_ptr); + + int16x8_t coeff0_abs = vec_abs(coeff0); + int16x8_t coeff1_abs = vec_abs(coeff1); + + zero_mask0 = vec_cmpge(coeff0_abs, zbin); + zbin = vec_splat(zbin, 1); + zero_mask1 = vec_cmpge(coeff1_abs, zbin); + + (void)scan_ptr; + (void)skip_block; + assert(!skip_block); + + qcoeff0 = + quantize_coeff(coeff0, coeff0_abs, round, quant, quant_shift, zero_mask0); + vec_vsx_st(qcoeff0, 0, qcoeff_ptr); + round = vec_splat(round, 1); + quant = vec_splat(quant, 1); + quant_shift = vec_splat(quant_shift, 1); + qcoeff1 = + quantize_coeff(coeff1, coeff1_abs, round, quant, quant_shift, zero_mask1); + vec_vsx_st(qcoeff1, 16, qcoeff_ptr); + + dqcoeff0 = vec_mladd(qcoeff0, dequant, vec_zeros_s16); + vec_vsx_st(dqcoeff0, 0, dqcoeff_ptr); + dequant = vec_splat(dequant, 1); + dqcoeff1 = vec_mladd(qcoeff1, dequant, vec_zeros_s16); + vec_vsx_st(dqcoeff1, 16, dqcoeff_ptr); + + eob = vec_max(nonzero_scanindex(qcoeff0, zero_mask0, iscan_ptr, 0), + nonzero_scanindex(qcoeff1, zero_mask1, iscan_ptr, 16)); + + if (n_coeffs > 16) { + int index = 16; + int off0 = 32; + int off1 = 48; + int off2 = 64; + do { + int16x8_t coeff2, coeff2_abs, qcoeff2, dqcoeff2, eob2; + bool16x8_t zero_mask2; + coeff0 = vec_vsx_ld(off0, coeff_ptr); + coeff1 = vec_vsx_ld(off1, coeff_ptr); + coeff2 = vec_vsx_ld(off2, coeff_ptr); + coeff0_abs = vec_abs(coeff0); + coeff1_abs = vec_abs(coeff1); + coeff2_abs = vec_abs(coeff2); + zero_mask0 = vec_cmpge(coeff0_abs, zbin); + zero_mask1 = vec_cmpge(coeff1_abs, zbin); + zero_mask2 = vec_cmpge(coeff2_abs, zbin); + qcoeff0 = quantize_coeff(coeff0, coeff0_abs, round, quant, quant_shift, + zero_mask0); + qcoeff1 = quantize_coeff(coeff1, coeff1_abs, round, quant, quant_shift, + zero_mask1); + qcoeff2 = quantize_coeff(coeff2, coeff2_abs, round, quant, quant_shift, + zero_mask2); + vec_vsx_st(qcoeff0, off0, qcoeff_ptr); + vec_vsx_st(qcoeff1, off1, qcoeff_ptr); + vec_vsx_st(qcoeff2, off2, qcoeff_ptr); + + dqcoeff0 = vec_mladd(qcoeff0, dequant, vec_zeros_s16); + dqcoeff1 = vec_mladd(qcoeff1, dequant, vec_zeros_s16); + dqcoeff2 = vec_mladd(qcoeff2, dequant, vec_zeros_s16); + + vec_vsx_st(dqcoeff0, off0, dqcoeff_ptr); + vec_vsx_st(dqcoeff1, off1, dqcoeff_ptr); + vec_vsx_st(dqcoeff2, off2, dqcoeff_ptr); + + eob = + vec_max(eob, nonzero_scanindex(qcoeff0, zero_mask0, iscan_ptr, off0)); + eob2 = vec_max(nonzero_scanindex(qcoeff1, zero_mask1, iscan_ptr, off1), + nonzero_scanindex(qcoeff2, zero_mask2, iscan_ptr, off2)); + eob = vec_max(eob, eob2); + + index += 24; + off0 += 48; + off1 += 48; + off2 += 48; + } while (index < n_coeffs); + } + + eob = vec_max_across(eob); + *eob_ptr = eob[0]; +} + +void vpx_quantize_b_32x32_vsx( + const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, + const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, + const int16_t *scan_ptr, const int16_t *iscan_ptr) { + // In stage 1, we quantize 16 coeffs (DC + 15 AC) + // In stage 2, we loop 42 times and quantize 24 coeffs per iteration + // (32 * 32 - 16) / 24 = 42 + int num_itr = 42; + // Offsets are in bytes, 16 coeffs = 32 bytes + int off0 = 32; + int off1 = 48; + int off2 = 64; + + int16x8_t qcoeff0, qcoeff1, eob; + bool16x8_t zero_mask0, zero_mask1; + + int16x8_t zbin = vec_vsx_ld(0, zbin_ptr); + int16x8_t round = vec_vsx_ld(0, round_ptr); + int16x8_t quant = vec_vsx_ld(0, quant_ptr); + int16x8_t dequant = vec_vsx_ld(0, dequant_ptr); + int16x8_t quant_shift = vec_vsx_ld(0, quant_shift_ptr); + + int16x8_t coeff0 = vec_vsx_ld(0, coeff_ptr); + int16x8_t coeff1 = vec_vsx_ld(16, coeff_ptr); + + int16x8_t coeff0_abs = vec_abs(coeff0); + int16x8_t coeff1_abs = vec_abs(coeff1); + + (void)scan_ptr; + (void)skip_block; + (void)n_coeffs; + assert(!skip_block); + + // 32x32 quantization requires that zbin and round be divided by 2 + zbin = vec_sra(vec_add(zbin, vec_ones_s16), vec_ones_u16); + round = vec_sra(vec_add(round, vec_ones_s16), vec_ones_u16); + + zero_mask0 = vec_cmpge(coeff0_abs, zbin); + zbin = vec_splat(zbin, 1); // remove DC from zbin + zero_mask1 = vec_cmpge(coeff1_abs, zbin); + + qcoeff0 = quantize_coeff_32(coeff0, coeff0_abs, round, quant, quant_shift, + zero_mask0); + round = vec_splat(round, 1); // remove DC from round + quant = vec_splat(quant, 1); // remove DC from quant + quant_shift = vec_splat(quant_shift, 1); // remove DC from quant_shift + qcoeff1 = quantize_coeff_32(coeff1, coeff1_abs, round, quant, quant_shift, + zero_mask1); + + vec_vsx_st(qcoeff0, 0, qcoeff_ptr); + vec_vsx_st(qcoeff1, 16, qcoeff_ptr); + + vec_vsx_st(dequantize_coeff_32(qcoeff0, dequant), 0, dqcoeff_ptr); + dequant = vec_splat(dequant, 1); // remove DC from dequant + vec_vsx_st(dequantize_coeff_32(qcoeff1, dequant), 16, dqcoeff_ptr); + + eob = vec_max(nonzero_scanindex(qcoeff0, zero_mask0, iscan_ptr, 0), + nonzero_scanindex(qcoeff1, zero_mask1, iscan_ptr, 16)); + + do { + int16x8_t coeff2, coeff2_abs, qcoeff2, eob2; + bool16x8_t zero_mask2; + + coeff0 = vec_vsx_ld(off0, coeff_ptr); + coeff1 = vec_vsx_ld(off1, coeff_ptr); + coeff2 = vec_vsx_ld(off2, coeff_ptr); + + coeff0_abs = vec_abs(coeff0); + coeff1_abs = vec_abs(coeff1); + coeff2_abs = vec_abs(coeff2); + + zero_mask0 = vec_cmpge(coeff0_abs, zbin); + zero_mask1 = vec_cmpge(coeff1_abs, zbin); + zero_mask2 = vec_cmpge(coeff2_abs, zbin); + + qcoeff0 = quantize_coeff_32(coeff0, coeff0_abs, round, quant, quant_shift, + zero_mask0); + qcoeff1 = quantize_coeff_32(coeff1, coeff1_abs, round, quant, quant_shift, + zero_mask1); + qcoeff2 = quantize_coeff_32(coeff2, coeff2_abs, round, quant, quant_shift, + zero_mask2); + + vec_vsx_st(qcoeff0, off0, qcoeff_ptr); + vec_vsx_st(qcoeff1, off1, qcoeff_ptr); + vec_vsx_st(qcoeff2, off2, qcoeff_ptr); + + vec_vsx_st(dequantize_coeff_32(qcoeff0, dequant), off0, dqcoeff_ptr); + vec_vsx_st(dequantize_coeff_32(qcoeff1, dequant), off1, dqcoeff_ptr); + vec_vsx_st(dequantize_coeff_32(qcoeff2, dequant), off2, dqcoeff_ptr); + + eob = vec_max(eob, nonzero_scanindex(qcoeff0, zero_mask0, iscan_ptr, off0)); + eob2 = vec_max(nonzero_scanindex(qcoeff1, zero_mask1, iscan_ptr, off1), + nonzero_scanindex(qcoeff2, zero_mask2, iscan_ptr, off2)); + eob = vec_max(eob, eob2); + + // 24 int16_t is 48 bytes + off0 += 48; + off1 += 48; + off2 += 48; + num_itr--; + } while (num_itr != 0); + + eob = vec_max_across(eob); + *eob_ptr = eob[0]; +} diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/types_vsx.h b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/types_vsx.h index f611d02d2d5..a5d2a225526 100644 --- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/types_vsx.h +++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/types_vsx.h @@ -19,6 +19,7 @@ typedef vector signed short int16x8_t; typedef vector unsigned short uint16x8_t; typedef vector signed int int32x4_t; typedef vector unsigned int uint32x4_t; +typedef vector bool short bool16x8_t; #ifdef __clang__ static const uint8x16_t xxpermdi0_perm = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, @@ -65,4 +66,24 @@ static const uint8x16_t xxpermdi3_perm = { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, #endif #endif +static const int16x8_t vec_zeros_s16 = { 0, 0, 0, 0, 0, 0, 0, 0 }; +static const int16x8_t vec_ones_s16 = { 1, 1, 1, 1, 1, 1, 1, 1 }; +static const uint16x8_t vec_ones_u16 = { 1, 1, 1, 1, 1, 1, 1, 1 }; +static const uint32x4_t vec_ones_u32 = { 1, 1, 1, 1 }; +static const uint16x8_t vec_shift_sign_s16 = { 15, 15, 15, 15, 15, 15, 15, 15 }; +static const uint32x4_t vec_shift_sign_s32 = { 31, 31, 31, 31 }; +static const uint8x16_t vec_perm64 = { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, + 0x0E, 0x0F, 0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07 }; +static const uint8x16_t vec_perm32 = { 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, + 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, + 0x00, 0x01, 0x02, 0x03 }; +static const uint8x16_t vec_perm16 = { 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0A, 0x0B, 0x0E, 0x0D, + 0x0E, 0x0F, 0x00, 0x01 }; + +static const uint8x16_t vec_perm_merge = { 0x00, 0x01, 0x08, 0x09, 0x02, 0x03, + 0x0A, 0x0B, 0x04, 0x05, 0x0C, 0x0D, + 0x06, 0x07, 0x0E, 0x0F }; + #endif // VPX_DSP_PPC_TYPES_VSX_H_ diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/variance_vsx.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/variance_vsx.c index 1efe2f00569..d3f257b63eb 100644 --- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/variance_vsx.c +++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/variance_vsx.c @@ -10,10 +10,11 @@ #include <assert.h> +#include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/ppc/types_vsx.h" -static inline uint8x16_t read4x2(const uint8_t *a, int stride) { +static INLINE uint8x16_t read4x2(const uint8_t *a, int stride) { const uint32x4_t a0 = (uint32x4_t)vec_vsx_ld(0, a); const uint32x4_t a1 = (uint32x4_t)vec_vsx_ld(0, a + stride); @@ -101,3 +102,174 @@ void vpx_comp_avg_pred_vsx(uint8_t *comp_pred, const uint8_t *pred, int width, } } } + +static INLINE void variance_inner_32(const uint8_t *a, const uint8_t *b, + int32x4_t *sum_squared, int32x4_t *sum) { + int32x4_t s = *sum; + int32x4_t ss = *sum_squared; + + const uint8x16_t va0 = vec_vsx_ld(0, a); + const uint8x16_t vb0 = vec_vsx_ld(0, b); + const uint8x16_t va1 = vec_vsx_ld(16, a); + const uint8x16_t vb1 = vec_vsx_ld(16, b); + + const int16x8_t a0 = unpack_to_s16_h(va0); + const int16x8_t b0 = unpack_to_s16_h(vb0); + const int16x8_t a1 = unpack_to_s16_l(va0); + const int16x8_t b1 = unpack_to_s16_l(vb0); + const int16x8_t a2 = unpack_to_s16_h(va1); + const int16x8_t b2 = unpack_to_s16_h(vb1); + const int16x8_t a3 = unpack_to_s16_l(va1); + const int16x8_t b3 = unpack_to_s16_l(vb1); + const int16x8_t d0 = vec_sub(a0, b0); + const int16x8_t d1 = vec_sub(a1, b1); + const int16x8_t d2 = vec_sub(a2, b2); + const int16x8_t d3 = vec_sub(a3, b3); + + s = vec_sum4s(d0, s); + ss = vec_msum(d0, d0, ss); + s = vec_sum4s(d1, s); + ss = vec_msum(d1, d1, ss); + s = vec_sum4s(d2, s); + ss = vec_msum(d2, d2, ss); + s = vec_sum4s(d3, s); + ss = vec_msum(d3, d3, ss); + *sum = s; + *sum_squared = ss; +} + +static INLINE void variance(const uint8_t *a, int a_stride, const uint8_t *b, + int b_stride, int w, int h, uint32_t *sse, + int *sum) { + int i; + + int32x4_t s = vec_splat_s32(0); + int32x4_t ss = vec_splat_s32(0); + + switch (w) { + case 4: + for (i = 0; i < h / 2; ++i) { + const int16x8_t a0 = unpack_to_s16_h(read4x2(a, a_stride)); + const int16x8_t b0 = unpack_to_s16_h(read4x2(b, b_stride)); + const int16x8_t d = vec_sub(a0, b0); + s = vec_sum4s(d, s); + ss = vec_msum(d, d, ss); + a += a_stride * 2; + b += b_stride * 2; + } + break; + case 8: + for (i = 0; i < h; ++i) { + const int16x8_t a0 = unpack_to_s16_h(vec_vsx_ld(0, a)); + const int16x8_t b0 = unpack_to_s16_h(vec_vsx_ld(0, b)); + const int16x8_t d = vec_sub(a0, b0); + + s = vec_sum4s(d, s); + ss = vec_msum(d, d, ss); + a += a_stride; + b += b_stride; + } + break; + case 16: + for (i = 0; i < h; ++i) { + const uint8x16_t va = vec_vsx_ld(0, a); + const uint8x16_t vb = vec_vsx_ld(0, b); + const int16x8_t a0 = unpack_to_s16_h(va); + const int16x8_t b0 = unpack_to_s16_h(vb); + const int16x8_t a1 = unpack_to_s16_l(va); + const int16x8_t b1 = unpack_to_s16_l(vb); + const int16x8_t d0 = vec_sub(a0, b0); + const int16x8_t d1 = vec_sub(a1, b1); + + s = vec_sum4s(d0, s); + ss = vec_msum(d0, d0, ss); + s = vec_sum4s(d1, s); + ss = vec_msum(d1, d1, ss); + + a += a_stride; + b += b_stride; + } + break; + case 32: + for (i = 0; i < h; ++i) { + variance_inner_32(a, b, &ss, &s); + a += a_stride; + b += b_stride; + } + break; + case 64: + for (i = 0; i < h; ++i) { + variance_inner_32(a, b, &ss, &s); + variance_inner_32(a + 32, b + 32, &ss, &s); + + a += a_stride; + b += b_stride; + } + break; + } + + s = vec_splat(vec_sums(s, vec_splat_s32(0)), 3); + + vec_ste(s, 0, sum); + + ss = vec_splat(vec_sums(ss, vec_splat_s32(0)), 3); + + vec_ste((uint32x4_t)ss, 0, sse); +} + +/* Identical to the variance call except it takes an additional parameter, sum, + * and returns that value using pass-by-reference instead of returning + * sse - sum^2 / w*h + */ +#define GET_VAR(W, H) \ + void vpx_get##W##x##H##var_vsx(const uint8_t *a, int a_stride, \ + const uint8_t *b, int b_stride, \ + uint32_t *sse, int *sum) { \ + variance(a, a_stride, b, b_stride, W, H, sse, sum); \ + } + +/* Identical to the variance call except it does not calculate the + * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in + * variable. + */ +#define MSE(W, H) \ + uint32_t vpx_mse##W##x##H##_vsx(const uint8_t *a, int a_stride, \ + const uint8_t *b, int b_stride, \ + uint32_t *sse) { \ + int sum; \ + variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ + return *sse; \ + } + +#define VAR(W, H) \ + uint32_t vpx_variance##W##x##H##_vsx(const uint8_t *a, int a_stride, \ + const uint8_t *b, int b_stride, \ + uint32_t *sse) { \ + int sum; \ + variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ + return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \ + } + +#define VARIANCES(W, H) VAR(W, H) + +VARIANCES(64, 64) +VARIANCES(64, 32) +VARIANCES(32, 64) +VARIANCES(32, 32) +VARIANCES(32, 16) +VARIANCES(16, 32) +VARIANCES(16, 16) +VARIANCES(16, 8) +VARIANCES(8, 16) +VARIANCES(8, 8) +VARIANCES(8, 4) +VARIANCES(4, 8) +VARIANCES(4, 4) + +GET_VAR(16, 16) +GET_VAR(8, 8) + +MSE(16, 16) +MSE(16, 8) +MSE(8, 16) +MSE(8, 8) diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ssim.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ssim.c index 7a29bd29f9f..ba73eb293a4 100644 --- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ssim.c +++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ssim.c @@ -284,7 +284,7 @@ double vpx_get_ssim_metrics(uint8_t *img1, int img1_pitch, uint8_t *img2, for (i = 0; i < height; i += 4, img1 += img1_pitch * 4, img2 += img2_pitch * 4) { for (j = 0; j < width; j += 4, ++c) { - Ssimv sv = { 0 }; + Ssimv sv = { 0, 0, 0, 0, 0, 0 }; double ssim; double ssim2; double dssim; diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/sum_squares.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/sum_squares.c index 7c535ac2db6..b80cd588e42 100644 --- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/sum_squares.c +++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/sum_squares.c @@ -10,8 +10,7 @@ #include "./vpx_dsp_rtcd.h" -uint64_t vpx_sum_squares_2d_i16_c(const int16_t *src, int src_stride, - int size) { +uint64_t vpx_sum_squares_2d_i16_c(const int16_t *src, int stride, int size) { int r, c; uint64_t ss = 0; @@ -20,7 +19,7 @@ uint64_t vpx_sum_squares_2d_i16_c(const int16_t *src, int src_stride, const int16_t v = src[c]; ss += v * v; } - src += src_stride; + src += stride; } return ss; diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp.mk b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp.mk index 16701103498..cb06a476f2a 100644 --- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp.mk +++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp.mk @@ -286,6 +286,7 @@ DSP_SRCS-$(HAVE_SSE2) += x86/quantize_sse2.c DSP_SRCS-$(HAVE_SSSE3) += x86/quantize_ssse3.c DSP_SRCS-$(HAVE_AVX) += x86/quantize_avx.c DSP_SRCS-$(HAVE_NEON) += arm/quantize_neon.c +DSP_SRCS-$(HAVE_VSX) += ppc/quantize_vsx.c ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) DSP_SRCS-$(HAVE_SSE2) += x86/highbd_quantize_intrin_sse2.c endif @@ -312,6 +313,7 @@ ifeq ($(CONFIG_ENCODERS),yes) DSP_SRCS-yes += sad.c DSP_SRCS-yes += subtract.c DSP_SRCS-yes += sum_squares.c +DSP_SRCS-$(HAVE_NEON) += arm/sum_squares_neon.c DSP_SRCS-$(HAVE_SSE2) += x86/sum_squares_sse2.c DSP_SRCS-$(HAVE_MSA) += mips/sum_squares_msa.c diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl index a51761cd3c3..824ae0f43b0 100644 --- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -363,7 +363,7 @@ add_proto qw/void vpx_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, specialize qw/vpx_convolve_copy neon dspr2 msa sse2 vsx/; add_proto qw/void vpx_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h"; -specialize qw/vpx_convolve_avg neon dspr2 msa sse2 vsx/; +specialize qw/vpx_convolve_avg neon dspr2 msa sse2 vsx mmi/; add_proto qw/void vpx_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h"; specialize qw/vpx_convolve8 sse2 ssse3 avx2 neon dspr2 msa vsx mmi/; @@ -378,7 +378,7 @@ add_proto qw/void vpx_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, specialize qw/vpx_convolve8_avg sse2 ssse3 avx2 neon dspr2 msa vsx mmi/; add_proto qw/void vpx_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h"; -specialize qw/vpx_convolve8_avg_horiz sse2 ssse3 avx2 neon dspr2 msa vsx/; +specialize qw/vpx_convolve8_avg_horiz sse2 ssse3 avx2 neon dspr2 msa vsx mmi/; add_proto qw/void vpx_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h"; specialize qw/vpx_convolve8_avg_vert sse2 ssse3 avx2 neon dspr2 msa vsx mmi/; @@ -626,7 +626,7 @@ if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") { specialize qw/vpx_idct32x32_135_add neon sse2 ssse3/; specialize qw/vpx_idct32x32_34_add neon sse2 ssse3/; specialize qw/vpx_idct32x32_1_add neon sse2/; - specialize qw/vpx_iwht4x4_16_add sse2/; + specialize qw/vpx_iwht4x4_16_add sse2 vsx/; if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") ne "yes") { # Note that these specializations are appended to the above ones. @@ -699,10 +699,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") { add_proto qw/void vpx_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/vpx_quantize_b neon sse2 ssse3 avx/; + specialize qw/vpx_quantize_b neon sse2 ssse3 avx vsx/; add_proto qw/void vpx_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/vpx_quantize_b_32x32 neon ssse3 avx/; + specialize qw/vpx_quantize_b_32x32 neon ssse3 avx vsx/; if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vpx_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; @@ -922,7 +922,7 @@ add_proto qw/void vpx_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const specialize qw/vpx_sad4x4x4d neon msa sse2 mmi/; add_proto qw/uint64_t vpx_sum_squares_2d_i16/, "const int16_t *src, int stride, int size"; -specialize qw/vpx_sum_squares_2d_i16 sse2 msa/; +specialize qw/vpx_sum_squares_2d_i16 neon sse2 msa/; # # Structured Similarity (SSIM) @@ -1082,64 +1082,64 @@ if (vpx_config("CONFIG_ENCODERS") eq "yes" || vpx_config("CONFIG_POSTPROC") eq " # Variance # add_proto qw/unsigned int vpx_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance64x64 sse2 avx2 neon msa mmi/; + specialize qw/vpx_variance64x64 sse2 avx2 neon msa mmi vsx/; add_proto qw/unsigned int vpx_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance64x32 sse2 avx2 neon msa mmi/; + specialize qw/vpx_variance64x32 sse2 avx2 neon msa mmi vsx/; add_proto qw/unsigned int vpx_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance32x64 sse2 neon msa mmi/; + specialize qw/vpx_variance32x64 sse2 avx2 neon msa mmi vsx/; add_proto qw/unsigned int vpx_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance32x32 sse2 avx2 neon msa mmi/; + specialize qw/vpx_variance32x32 sse2 avx2 neon msa mmi vsx/; add_proto qw/unsigned int vpx_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance32x16 sse2 avx2 neon msa mmi/; + specialize qw/vpx_variance32x16 sse2 avx2 neon msa mmi vsx/; add_proto qw/unsigned int vpx_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance16x32 sse2 neon msa mmi/; + specialize qw/vpx_variance16x32 sse2 avx2 neon msa mmi vsx/; add_proto qw/unsigned int vpx_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance16x16 sse2 avx2 neon msa mmi/; + specialize qw/vpx_variance16x16 sse2 avx2 neon msa mmi vsx/; add_proto qw/unsigned int vpx_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance16x8 sse2 neon msa mmi/; + specialize qw/vpx_variance16x8 sse2 avx2 neon msa mmi vsx/; add_proto qw/unsigned int vpx_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance8x16 sse2 neon msa mmi/; + specialize qw/vpx_variance8x16 sse2 neon msa mmi vsx/; add_proto qw/unsigned int vpx_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance8x8 sse2 neon msa mmi/; + specialize qw/vpx_variance8x8 sse2 neon msa mmi vsx/; add_proto qw/unsigned int vpx_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance8x4 sse2 neon msa mmi/; + specialize qw/vpx_variance8x4 sse2 neon msa mmi vsx/; add_proto qw/unsigned int vpx_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance4x8 sse2 neon msa mmi/; + specialize qw/vpx_variance4x8 sse2 neon msa mmi vsx/; add_proto qw/unsigned int vpx_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance4x4 sse2 neon msa mmi/; + specialize qw/vpx_variance4x4 sse2 neon msa mmi vsx/; # # Specialty Variance # add_proto qw/void vpx_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; - specialize qw/vpx_get16x16var sse2 avx2 neon msa/; + specialize qw/vpx_get16x16var sse2 avx2 neon msa vsx/; add_proto qw/void vpx_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; - specialize qw/vpx_get8x8var sse2 neon msa/; + specialize qw/vpx_get8x8var sse2 neon msa vsx/; add_proto qw/unsigned int vpx_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; - specialize qw/vpx_mse16x16 sse2 avx2 neon msa mmi/; + specialize qw/vpx_mse16x16 sse2 avx2 neon msa mmi vsx/; add_proto qw/unsigned int vpx_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; - specialize qw/vpx_mse16x8 sse2 msa mmi/; + specialize qw/vpx_mse16x8 sse2 avx2 msa mmi vsx/; add_proto qw/unsigned int vpx_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; - specialize qw/vpx_mse8x16 sse2 msa mmi/; + specialize qw/vpx_mse8x16 sse2 msa mmi vsx/; add_proto qw/unsigned int vpx_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; - specialize qw/vpx_mse8x8 sse2 msa mmi/; + specialize qw/vpx_mse8x8 sse2 msa mmi vsx/; add_proto qw/unsigned int vpx_get_mb_ss/, "const int16_t *"; specialize qw/vpx_get_mb_ss sse2 msa vsx/; diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/mem_sse2.h b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/mem_sse2.h index 2ce738fb770..419f1786309 100644 --- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/mem_sse2.h +++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/mem_sse2.h @@ -15,6 +15,11 @@ #include "./vpx_config.h" +static INLINE __m128i loadh_epi64(const __m128i s, const void *const src) { + return _mm_castps_si128( + _mm_loadh_pi(_mm_castsi128_ps(s), (const __m64 *)src)); +} + static INLINE void load_8bit_4x4(const uint8_t *const s, const ptrdiff_t stride, __m128i *const d) { d[0] = _mm_cvtsi32_si128(*(const int *)(s + 0 * stride)); diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/sum_squares_sse2.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/sum_squares_sse2.c index 026d0ca2f27..9eaf6ee1b8f 100644 --- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/sum_squares_sse2.c +++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/sum_squares_sse2.c @@ -10,120 +10,96 @@ #include <assert.h> #include <emmintrin.h> -#include <stdio.h> #include "./vpx_dsp_rtcd.h" +#include "vpx_dsp/x86/mem_sse2.h" -static uint64_t vpx_sum_squares_2d_i16_4x4_sse2(const int16_t *src, - int stride) { - const __m128i v_val_0_w = - _mm_loadl_epi64((const __m128i *)(src + 0 * stride)); - const __m128i v_val_1_w = - _mm_loadl_epi64((const __m128i *)(src + 1 * stride)); - const __m128i v_val_2_w = - _mm_loadl_epi64((const __m128i *)(src + 2 * stride)); - const __m128i v_val_3_w = - _mm_loadl_epi64((const __m128i *)(src + 3 * stride)); - - const __m128i v_sq_0_d = _mm_madd_epi16(v_val_0_w, v_val_0_w); - const __m128i v_sq_1_d = _mm_madd_epi16(v_val_1_w, v_val_1_w); - const __m128i v_sq_2_d = _mm_madd_epi16(v_val_2_w, v_val_2_w); - const __m128i v_sq_3_d = _mm_madd_epi16(v_val_3_w, v_val_3_w); - - const __m128i v_sum_01_d = _mm_add_epi32(v_sq_0_d, v_sq_1_d); - const __m128i v_sum_23_d = _mm_add_epi32(v_sq_2_d, v_sq_3_d); - const __m128i v_sum_0123_d = _mm_add_epi32(v_sum_01_d, v_sum_23_d); - - const __m128i v_sum_d = - _mm_add_epi32(v_sum_0123_d, _mm_srli_epi64(v_sum_0123_d, 32)); - - return (uint64_t)_mm_cvtsi128_si32(v_sum_d); -} - -// TODO(jingning): Evaluate the performance impact here. -#ifdef __GNUC__ -// This prevents GCC/Clang from inlining this function into -// vpx_sum_squares_2d_i16_sse2, which in turn saves some stack -// maintenance instructions in the common case of 4x4. -__attribute__((noinline)) -#endif -static uint64_t -vpx_sum_squares_2d_i16_nxn_sse2(const int16_t *src, int stride, int size) { - int r, c; - const __m128i v_zext_mask_q = _mm_set_epi32(0, 0xffffffff, 0, 0xffffffff); - __m128i v_acc_q = _mm_setzero_si128(); - - for (r = 0; r < size; r += 8) { - __m128i v_acc_d = _mm_setzero_si128(); - - for (c = 0; c < size; c += 8) { - const int16_t *b = src + c; - const __m128i v_val_0_w = - _mm_load_si128((const __m128i *)(b + 0 * stride)); - const __m128i v_val_1_w = - _mm_load_si128((const __m128i *)(b + 1 * stride)); - const __m128i v_val_2_w = - _mm_load_si128((const __m128i *)(b + 2 * stride)); - const __m128i v_val_3_w = - _mm_load_si128((const __m128i *)(b + 3 * stride)); - const __m128i v_val_4_w = - _mm_load_si128((const __m128i *)(b + 4 * stride)); - const __m128i v_val_5_w = - _mm_load_si128((const __m128i *)(b + 5 * stride)); - const __m128i v_val_6_w = - _mm_load_si128((const __m128i *)(b + 6 * stride)); - const __m128i v_val_7_w = - _mm_load_si128((const __m128i *)(b + 7 * stride)); - - const __m128i v_sq_0_d = _mm_madd_epi16(v_val_0_w, v_val_0_w); - const __m128i v_sq_1_d = _mm_madd_epi16(v_val_1_w, v_val_1_w); - const __m128i v_sq_2_d = _mm_madd_epi16(v_val_2_w, v_val_2_w); - const __m128i v_sq_3_d = _mm_madd_epi16(v_val_3_w, v_val_3_w); - const __m128i v_sq_4_d = _mm_madd_epi16(v_val_4_w, v_val_4_w); - const __m128i v_sq_5_d = _mm_madd_epi16(v_val_5_w, v_val_5_w); - const __m128i v_sq_6_d = _mm_madd_epi16(v_val_6_w, v_val_6_w); - const __m128i v_sq_7_d = _mm_madd_epi16(v_val_7_w, v_val_7_w); - - const __m128i v_sum_01_d = _mm_add_epi32(v_sq_0_d, v_sq_1_d); - const __m128i v_sum_23_d = _mm_add_epi32(v_sq_2_d, v_sq_3_d); - const __m128i v_sum_45_d = _mm_add_epi32(v_sq_4_d, v_sq_5_d); - const __m128i v_sum_67_d = _mm_add_epi32(v_sq_6_d, v_sq_7_d); - - const __m128i v_sum_0123_d = _mm_add_epi32(v_sum_01_d, v_sum_23_d); - const __m128i v_sum_4567_d = _mm_add_epi32(v_sum_45_d, v_sum_67_d); - - v_acc_d = _mm_add_epi32(v_acc_d, v_sum_0123_d); - v_acc_d = _mm_add_epi32(v_acc_d, v_sum_4567_d); - } - - v_acc_q = _mm_add_epi64(v_acc_q, _mm_and_si128(v_acc_d, v_zext_mask_q)); - v_acc_q = _mm_add_epi64(v_acc_q, _mm_srli_epi64(v_acc_d, 32)); +uint64_t vpx_sum_squares_2d_i16_sse2(const int16_t *src, int stride, int size) { + // Over 75% of all calls are with size == 4. + if (size == 4) { + __m128i s[2], sq[2], ss; + + s[0] = _mm_loadl_epi64((const __m128i *)(src + 0 * stride)); + s[0] = loadh_epi64(s[0], src + 1 * stride); + s[1] = _mm_loadl_epi64((const __m128i *)(src + 2 * stride)); + s[1] = loadh_epi64(s[1], src + 3 * stride); + sq[0] = _mm_madd_epi16(s[0], s[0]); + sq[1] = _mm_madd_epi16(s[1], s[1]); + sq[0] = _mm_add_epi32(sq[0], sq[1]); + ss = _mm_add_epi32(sq[0], _mm_srli_si128(sq[0], 8)); + ss = _mm_add_epi32(ss, _mm_srli_epi64(ss, 32)); + + return (uint64_t)_mm_cvtsi128_si32(ss); + } else { + // Generic case + int r = size; + const __m128i v_zext_mask_q = _mm_set_epi32(0, 0xffffffff, 0, 0xffffffff); + __m128i v_acc_q = _mm_setzero_si128(); - src += 8 * stride; - } + assert(size % 8 == 0); - v_acc_q = _mm_add_epi64(v_acc_q, _mm_srli_si128(v_acc_q, 8)); + do { + int c = 0; + __m128i v_acc_d = _mm_setzero_si128(); + + do { + const int16_t *const b = src + c; + const __m128i v_val_0_w = + _mm_load_si128((const __m128i *)(b + 0 * stride)); + const __m128i v_val_1_w = + _mm_load_si128((const __m128i *)(b + 1 * stride)); + const __m128i v_val_2_w = + _mm_load_si128((const __m128i *)(b + 2 * stride)); + const __m128i v_val_3_w = + _mm_load_si128((const __m128i *)(b + 3 * stride)); + const __m128i v_val_4_w = + _mm_load_si128((const __m128i *)(b + 4 * stride)); + const __m128i v_val_5_w = + _mm_load_si128((const __m128i *)(b + 5 * stride)); + const __m128i v_val_6_w = + _mm_load_si128((const __m128i *)(b + 6 * stride)); + const __m128i v_val_7_w = + _mm_load_si128((const __m128i *)(b + 7 * stride)); + + const __m128i v_sq_0_d = _mm_madd_epi16(v_val_0_w, v_val_0_w); + const __m128i v_sq_1_d = _mm_madd_epi16(v_val_1_w, v_val_1_w); + const __m128i v_sq_2_d = _mm_madd_epi16(v_val_2_w, v_val_2_w); + const __m128i v_sq_3_d = _mm_madd_epi16(v_val_3_w, v_val_3_w); + const __m128i v_sq_4_d = _mm_madd_epi16(v_val_4_w, v_val_4_w); + const __m128i v_sq_5_d = _mm_madd_epi16(v_val_5_w, v_val_5_w); + const __m128i v_sq_6_d = _mm_madd_epi16(v_val_6_w, v_val_6_w); + const __m128i v_sq_7_d = _mm_madd_epi16(v_val_7_w, v_val_7_w); + + const __m128i v_sum_01_d = _mm_add_epi32(v_sq_0_d, v_sq_1_d); + const __m128i v_sum_23_d = _mm_add_epi32(v_sq_2_d, v_sq_3_d); + const __m128i v_sum_45_d = _mm_add_epi32(v_sq_4_d, v_sq_5_d); + const __m128i v_sum_67_d = _mm_add_epi32(v_sq_6_d, v_sq_7_d); + + const __m128i v_sum_0123_d = _mm_add_epi32(v_sum_01_d, v_sum_23_d); + const __m128i v_sum_4567_d = _mm_add_epi32(v_sum_45_d, v_sum_67_d); + + v_acc_d = _mm_add_epi32(v_acc_d, v_sum_0123_d); + v_acc_d = _mm_add_epi32(v_acc_d, v_sum_4567_d); + c += 8; + } while (c < size); + + v_acc_q = _mm_add_epi64(v_acc_q, _mm_and_si128(v_acc_d, v_zext_mask_q)); + v_acc_q = _mm_add_epi64(v_acc_q, _mm_srli_epi64(v_acc_d, 32)); + + src += 8 * stride; + r -= 8; + } while (r); + + v_acc_q = _mm_add_epi64(v_acc_q, _mm_srli_si128(v_acc_q, 8)); #if ARCH_X86_64 - return (uint64_t)_mm_cvtsi128_si64(v_acc_q); + return (uint64_t)_mm_cvtsi128_si64(v_acc_q); #else - { - uint64_t tmp; - _mm_storel_epi64((__m128i *)&tmp, v_acc_q); - return tmp; - } + { + uint64_t tmp; + _mm_storel_epi64((__m128i *)&tmp, v_acc_q); + return tmp; + } #endif -} - -uint64_t vpx_sum_squares_2d_i16_sse2(const int16_t *src, int stride, int size) { - // 4 elements per row only requires half an XMM register, so this - // must be a special case, but also note that over 75% of all calls - // are with size == 4, so it is also the common case. - if (size == 4) { - return vpx_sum_squares_2d_i16_4x4_sse2(src, stride); - } else { - // Generic case - assert(size % 8 == 0); - return vpx_sum_squares_2d_i16_nxn_sse2(src, stride, size); } } diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_avx2.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_avx2.c index d15a89c746b..d938b81ea2c 100644 --- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_avx2.c +++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_avx2.c @@ -38,130 +38,140 @@ DECLARE_ALIGNED(32, static const int8_t, adjacent_sub_avx2[32]) = { }; /* clang-format on */ -void vpx_get16x16var_avx2(const unsigned char *src_ptr, int source_stride, - const unsigned char *ref_ptr, int recon_stride, - unsigned int *sse, int *sum) { - unsigned int i, src_2strides, ref_2strides; - __m256i sum_reg = _mm256_setzero_si256(); - __m256i sse_reg = _mm256_setzero_si256(); - // process two 16 byte locations in a 256 bit register - src_2strides = source_stride << 1; - ref_2strides = recon_stride << 1; - for (i = 0; i < 8; ++i) { - // convert up values in 128 bit registers across lanes - const __m256i src0 = - _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const *)(src_ptr))); - const __m256i src1 = _mm256_cvtepu8_epi16( - _mm_loadu_si128((__m128i const *)(src_ptr + source_stride))); - const __m256i ref0 = - _mm256_cvtepu8_epi16(_mm_loadu_si128((__m128i const *)(ref_ptr))); - const __m256i ref1 = _mm256_cvtepu8_epi16( - _mm_loadu_si128((__m128i const *)(ref_ptr + recon_stride))); - const __m256i diff0 = _mm256_sub_epi16(src0, ref0); - const __m256i diff1 = _mm256_sub_epi16(src1, ref1); - const __m256i madd0 = _mm256_madd_epi16(diff0, diff0); - const __m256i madd1 = _mm256_madd_epi16(diff1, diff1); - - // add to the running totals - sum_reg = _mm256_add_epi16(sum_reg, _mm256_add_epi16(diff0, diff1)); - sse_reg = _mm256_add_epi32(sse_reg, _mm256_add_epi32(madd0, madd1)); - - src_ptr += src_2strides; - ref_ptr += ref_2strides; - } - { - // extract the low lane and add it to the high lane - const __m128i sum_reg_128 = _mm_add_epi16( - _mm256_castsi256_si128(sum_reg), _mm256_extractf128_si256(sum_reg, 1)); - const __m128i sse_reg_128 = _mm_add_epi32( - _mm256_castsi256_si128(sse_reg), _mm256_extractf128_si256(sse_reg, 1)); - - // sum upper and lower 64 bits together and convert up to 32 bit values - const __m128i sum_reg_64 = - _mm_add_epi16(sum_reg_128, _mm_srli_si128(sum_reg_128, 8)); - const __m128i sum_int32 = _mm_cvtepi16_epi32(sum_reg_64); - - // unpack sse and sum registers and add - const __m128i sse_sum_lo = _mm_unpacklo_epi32(sse_reg_128, sum_int32); - const __m128i sse_sum_hi = _mm_unpackhi_epi32(sse_reg_128, sum_int32); - const __m128i sse_sum = _mm_add_epi32(sse_sum_lo, sse_sum_hi); - - // perform the final summation and extract the results - const __m128i res = _mm_add_epi32(sse_sum, _mm_srli_si128(sse_sum, 8)); - *((int *)sse) = _mm_cvtsi128_si32(res); - *((int *)sum) = _mm_extract_epi32(res, 1); +static INLINE void variance_kernel_avx2(const __m256i src, const __m256i ref, + __m256i *const sse, + __m256i *const sum) { + const __m256i adj_sub = _mm256_load_si256((__m256i const *)adjacent_sub_avx2); + + // unpack into pairs of source and reference values + const __m256i src_ref0 = _mm256_unpacklo_epi8(src, ref); + const __m256i src_ref1 = _mm256_unpackhi_epi8(src, ref); + + // subtract adjacent elements using src*1 + ref*-1 + const __m256i diff0 = _mm256_maddubs_epi16(src_ref0, adj_sub); + const __m256i diff1 = _mm256_maddubs_epi16(src_ref1, adj_sub); + const __m256i madd0 = _mm256_madd_epi16(diff0, diff0); + const __m256i madd1 = _mm256_madd_epi16(diff1, diff1); + + // add to the running totals + *sum = _mm256_add_epi16(*sum, _mm256_add_epi16(diff0, diff1)); + *sse = _mm256_add_epi32(*sse, _mm256_add_epi32(madd0, madd1)); +} + +static INLINE void variance_final_from_32bit_sum_avx2(__m256i vsse, + __m128i vsum, + unsigned int *const sse, + int *const sum) { + // extract the low lane and add it to the high lane + const __m128i sse_reg_128 = _mm_add_epi32(_mm256_castsi256_si128(vsse), + _mm256_extractf128_si256(vsse, 1)); + + // unpack sse and sum registers and add + const __m128i sse_sum_lo = _mm_unpacklo_epi32(sse_reg_128, vsum); + const __m128i sse_sum_hi = _mm_unpackhi_epi32(sse_reg_128, vsum); + const __m128i sse_sum = _mm_add_epi32(sse_sum_lo, sse_sum_hi); + + // perform the final summation and extract the results + const __m128i res = _mm_add_epi32(sse_sum, _mm_srli_si128(sse_sum, 8)); + *((int *)sse) = _mm_cvtsi128_si32(res); + *((int *)sum) = _mm_extract_epi32(res, 1); +} + +static INLINE void variance_final_from_16bit_sum_avx2(__m256i vsse, + __m256i vsum, + unsigned int *const sse, + int *const sum) { + // extract the low lane and add it to the high lane + const __m128i sum_reg_128 = _mm_add_epi16(_mm256_castsi256_si128(vsum), + _mm256_extractf128_si256(vsum, 1)); + const __m128i sum_reg_64 = + _mm_add_epi16(sum_reg_128, _mm_srli_si128(sum_reg_128, 8)); + const __m128i sum_int32 = _mm_cvtepi16_epi32(sum_reg_64); + + variance_final_from_32bit_sum_avx2(vsse, sum_int32, sse, sum); +} + +static INLINE __m256i sum_to_32bit_avx2(const __m256i sum) { + const __m256i sum_lo = _mm256_cvtepi16_epi32(_mm256_castsi256_si128(sum)); + const __m256i sum_hi = + _mm256_cvtepi16_epi32(_mm256_extractf128_si256(sum, 1)); + return _mm256_add_epi32(sum_lo, sum_hi); +} + +static INLINE void variance16_kernel_avx2( + const uint8_t *const src, const int src_stride, const uint8_t *const ref, + const int ref_stride, __m256i *const sse, __m256i *const sum) { + const __m128i s0 = _mm_loadu_si128((__m128i const *)(src + 0 * src_stride)); + const __m128i s1 = _mm_loadu_si128((__m128i const *)(src + 1 * src_stride)); + const __m128i r0 = _mm_loadu_si128((__m128i const *)(ref + 0 * ref_stride)); + const __m128i r1 = _mm_loadu_si128((__m128i const *)(ref + 1 * ref_stride)); + const __m256i s = _mm256_inserti128_si256(_mm256_castsi128_si256(s0), s1, 1); + const __m256i r = _mm256_inserti128_si256(_mm256_castsi128_si256(r0), r1, 1); + variance_kernel_avx2(s, r, sse, sum); +} + +static INLINE void variance32_kernel_avx2(const uint8_t *const src, + const uint8_t *const ref, + __m256i *const sse, + __m256i *const sum) { + const __m256i s = _mm256_loadu_si256((__m256i const *)(src)); + const __m256i r = _mm256_loadu_si256((__m256i const *)(ref)); + variance_kernel_avx2(s, r, sse, sum); +} + +static INLINE void variance16_avx2(const uint8_t *src, const int src_stride, + const uint8_t *ref, const int ref_stride, + const int h, __m256i *const vsse, + __m256i *const vsum) { + int i; + *vsum = _mm256_setzero_si256(); + *vsse = _mm256_setzero_si256(); + + for (i = 0; i < h; i += 2) { + variance16_kernel_avx2(src, src_stride, ref, ref_stride, vsse, vsum); + src += 2 * src_stride; + ref += 2 * ref_stride; } } -static void get32x16var_avx2(const unsigned char *src_ptr, int source_stride, - const unsigned char *ref_ptr, int recon_stride, - unsigned int *sse, int *sum) { - unsigned int i, src_2strides, ref_2strides; - const __m256i adj_sub = _mm256_load_si256((__m256i const *)adjacent_sub_avx2); - __m256i sum_reg = _mm256_setzero_si256(); - __m256i sse_reg = _mm256_setzero_si256(); +static INLINE void variance32_avx2(const uint8_t *src, const int src_stride, + const uint8_t *ref, const int ref_stride, + const int h, __m256i *const vsse, + __m256i *const vsum) { + int i; + *vsum = _mm256_setzero_si256(); + *vsse = _mm256_setzero_si256(); - // process 64 elements in an iteration - src_2strides = source_stride << 1; - ref_2strides = recon_stride << 1; - for (i = 0; i < 8; i++) { - const __m256i src0 = _mm256_loadu_si256((__m256i const *)(src_ptr)); - const __m256i src1 = - _mm256_loadu_si256((__m256i const *)(src_ptr + source_stride)); - const __m256i ref0 = _mm256_loadu_si256((__m256i const *)(ref_ptr)); - const __m256i ref1 = - _mm256_loadu_si256((__m256i const *)(ref_ptr + recon_stride)); - - // unpack into pairs of source and reference values - const __m256i src_ref0 = _mm256_unpacklo_epi8(src0, ref0); - const __m256i src_ref1 = _mm256_unpackhi_epi8(src0, ref0); - const __m256i src_ref2 = _mm256_unpacklo_epi8(src1, ref1); - const __m256i src_ref3 = _mm256_unpackhi_epi8(src1, ref1); - - // subtract adjacent elements using src*1 + ref*-1 - const __m256i diff0 = _mm256_maddubs_epi16(src_ref0, adj_sub); - const __m256i diff1 = _mm256_maddubs_epi16(src_ref1, adj_sub); - const __m256i diff2 = _mm256_maddubs_epi16(src_ref2, adj_sub); - const __m256i diff3 = _mm256_maddubs_epi16(src_ref3, adj_sub); - const __m256i madd0 = _mm256_madd_epi16(diff0, diff0); - const __m256i madd1 = _mm256_madd_epi16(diff1, diff1); - const __m256i madd2 = _mm256_madd_epi16(diff2, diff2); - const __m256i madd3 = _mm256_madd_epi16(diff3, diff3); - - // add to the running totals - sum_reg = _mm256_add_epi16(sum_reg, _mm256_add_epi16(diff0, diff1)); - sum_reg = _mm256_add_epi16(sum_reg, _mm256_add_epi16(diff2, diff3)); - sse_reg = _mm256_add_epi32(sse_reg, _mm256_add_epi32(madd0, madd1)); - sse_reg = _mm256_add_epi32(sse_reg, _mm256_add_epi32(madd2, madd3)); - - src_ptr += src_2strides; - ref_ptr += ref_2strides; + for (i = 0; i < h; i++) { + variance32_kernel_avx2(src, ref, vsse, vsum); + src += src_stride; + ref += ref_stride; } +} + +static INLINE void variance64_avx2(const uint8_t *src, const int src_stride, + const uint8_t *ref, const int ref_stride, + const int h, __m256i *const vsse, + __m256i *const vsum) { + int i; + *vsum = _mm256_setzero_si256(); - { - // extract the low lane and add it to the high lane - const __m128i sum_reg_128 = _mm_add_epi16( - _mm256_castsi256_si128(sum_reg), _mm256_extractf128_si256(sum_reg, 1)); - const __m128i sse_reg_128 = _mm_add_epi32( - _mm256_castsi256_si128(sse_reg), _mm256_extractf128_si256(sse_reg, 1)); - - // sum upper and lower 64 bits together and convert up to 32 bit values - const __m128i sum_reg_64 = - _mm_add_epi16(sum_reg_128, _mm_srli_si128(sum_reg_128, 8)); - const __m128i sum_int32 = _mm_cvtepi16_epi32(sum_reg_64); - - // unpack sse and sum registers and add - const __m128i sse_sum_lo = _mm_unpacklo_epi32(sse_reg_128, sum_int32); - const __m128i sse_sum_hi = _mm_unpackhi_epi32(sse_reg_128, sum_int32); - const __m128i sse_sum = _mm_add_epi32(sse_sum_lo, sse_sum_hi); - - // perform the final summation and extract the results - const __m128i res = _mm_add_epi32(sse_sum, _mm_srli_si128(sse_sum, 8)); - *((int *)sse) = _mm_cvtsi128_si32(res); - *((int *)sum) = _mm_extract_epi32(res, 1); + for (i = 0; i < h; i++) { + variance32_kernel_avx2(src + 0, ref + 0, vsse, vsum); + variance32_kernel_avx2(src + 32, ref + 32, vsse, vsum); + src += src_stride; + ref += ref_stride; } } +void vpx_get16x16var_avx2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, unsigned int *sse, + int *sum) { + __m256i vsse, vsum; + variance16_avx2(src, src_stride, ref, ref_stride, 16, &vsse, &vsum); + variance_final_from_16bit_sum_avx2(vsse, vsum, sse, sum); +} + #define FILTER_SRC(filter) \ /* filter the source */ \ exp_src_lo = _mm256_maddubs_epi16(exp_src_lo, filter); \ @@ -593,50 +603,43 @@ typedef void (*get_var_avx2)(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, unsigned int *sse, int *sum); -static void variance_avx2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, int w, int h, - unsigned int *sse, int *sum, get_var_avx2 var_fn, - int block_size) { - int i, j; - - *sse = 0; - *sum = 0; - - for (i = 0; i < h; i += 16) { - for (j = 0; j < w; j += block_size) { - unsigned int sse0; - int sum0; - var_fn(&src[src_stride * i + j], src_stride, &ref[ref_stride * i + j], - ref_stride, &sse0, &sum0); - *sse += sse0; - *sum += sum0; - } - } +unsigned int vpx_variance16x8_avx2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse) { + int sum; + __m256i vsse, vsum; + variance16_avx2(src, src_stride, ref, ref_stride, 8, &vsse, &vsum); + variance_final_from_16bit_sum_avx2(vsse, vsum, sse, &sum); + return *sse - (uint32_t)(((int64_t)sum * sum) >> 7); } unsigned int vpx_variance16x16_avx2(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, unsigned int *sse) { int sum; - variance_avx2(src, src_stride, ref, ref_stride, 16, 16, sse, &sum, - vpx_get16x16var_avx2, 16); + __m256i vsse, vsum; + variance16_avx2(src, src_stride, ref, ref_stride, 16, &vsse, &vsum); + variance_final_from_16bit_sum_avx2(vsse, vsum, sse, &sum); return *sse - (uint32_t)(((int64_t)sum * sum) >> 8); } -unsigned int vpx_mse16x16_avx2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse) { +unsigned int vpx_variance16x32_avx2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse) { int sum; - vpx_get16x16var_avx2(src, src_stride, ref, ref_stride, sse, &sum); - return *sse; + __m256i vsse, vsum; + variance16_avx2(src, src_stride, ref, ref_stride, 32, &vsse, &vsum); + variance_final_from_16bit_sum_avx2(vsse, vsum, sse, &sum); + return *sse - (uint32_t)(((int64_t)sum * sum) >> 9); } unsigned int vpx_variance32x16_avx2(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, unsigned int *sse) { int sum; - variance_avx2(src, src_stride, ref, ref_stride, 32, 16, sse, &sum, - get32x16var_avx2, 32); + __m256i vsse, vsum; + variance32_avx2(src, src_stride, ref, ref_stride, 16, &vsse, &vsum); + variance_final_from_16bit_sum_avx2(vsse, vsum, sse, &sum); return *sse - (uint32_t)(((int64_t)sum * sum) >> 9); } @@ -644,29 +647,87 @@ unsigned int vpx_variance32x32_avx2(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, unsigned int *sse) { int sum; - variance_avx2(src, src_stride, ref, ref_stride, 32, 32, sse, &sum, - get32x16var_avx2, 32); + __m256i vsse, vsum; + __m128i vsum_128; + variance32_avx2(src, src_stride, ref, ref_stride, 32, &vsse, &vsum); + vsum_128 = _mm_add_epi16(_mm256_castsi256_si128(vsum), + _mm256_extractf128_si256(vsum, 1)); + vsum_128 = _mm_add_epi32(_mm_cvtepi16_epi32(vsum_128), + _mm_cvtepi16_epi32(_mm_srli_si128(vsum_128, 8))); + variance_final_from_32bit_sum_avx2(vsse, vsum_128, sse, &sum); return *sse - (uint32_t)(((int64_t)sum * sum) >> 10); } -unsigned int vpx_variance64x64_avx2(const uint8_t *src, int src_stride, +unsigned int vpx_variance32x64_avx2(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, unsigned int *sse) { int sum; - variance_avx2(src, src_stride, ref, ref_stride, 64, 64, sse, &sum, - get32x16var_avx2, 32); - return *sse - (uint32_t)(((int64_t)sum * sum) >> 12); + __m256i vsse, vsum; + __m128i vsum_128; + variance32_avx2(src, src_stride, ref, ref_stride, 64, &vsse, &vsum); + vsum = sum_to_32bit_avx2(vsum); + vsum_128 = _mm_add_epi32(_mm256_castsi256_si128(vsum), + _mm256_extractf128_si256(vsum, 1)); + variance_final_from_32bit_sum_avx2(vsse, vsum_128, sse, &sum); + return *sse - (uint32_t)(((int64_t)sum * sum) >> 11); } unsigned int vpx_variance64x32_avx2(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, unsigned int *sse) { + __m256i vsse = _mm256_setzero_si256(); + __m256i vsum = _mm256_setzero_si256(); + __m128i vsum_128; int sum; - variance_avx2(src, src_stride, ref, ref_stride, 64, 32, sse, &sum, - get32x16var_avx2, 32); + variance64_avx2(src, src_stride, ref, ref_stride, 32, &vsse, &vsum); + vsum = sum_to_32bit_avx2(vsum); + vsum_128 = _mm_add_epi32(_mm256_castsi256_si128(vsum), + _mm256_extractf128_si256(vsum, 1)); + variance_final_from_32bit_sum_avx2(vsse, vsum_128, sse, &sum); return *sse - (uint32_t)(((int64_t)sum * sum) >> 11); } +unsigned int vpx_variance64x64_avx2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse) { + __m256i vsse = _mm256_setzero_si256(); + __m256i vsum = _mm256_setzero_si256(); + __m128i vsum_128; + int sum; + int i = 0; + + for (i = 0; i < 2; i++) { + __m256i vsum16; + variance64_avx2(src + 32 * i * src_stride, src_stride, + ref + 32 * i * ref_stride, ref_stride, 32, &vsse, &vsum16); + vsum = _mm256_add_epi32(vsum, sum_to_32bit_avx2(vsum16)); + } + vsum_128 = _mm_add_epi32(_mm256_castsi256_si128(vsum), + _mm256_extractf128_si256(vsum, 1)); + variance_final_from_32bit_sum_avx2(vsse, vsum_128, sse, &sum); + return *sse - (unsigned int)(((int64_t)sum * sum) >> 12); +} + +unsigned int vpx_mse16x8_avx2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse) { + int sum; + __m256i vsse, vsum; + variance16_avx2(src, src_stride, ref, ref_stride, 8, &vsse, &vsum); + variance_final_from_16bit_sum_avx2(vsse, vsum, sse, &sum); + return *sse; +} + +unsigned int vpx_mse16x16_avx2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, + unsigned int *sse) { + int sum; + __m256i vsse, vsum; + variance16_avx2(src, src_stride, ref, ref_stride, 16, &vsse, &vsum); + variance_final_from_16bit_sum_avx2(vsse, vsum, sse, &sum); + return *sse; +} + unsigned int vpx_sub_pixel_variance64x64_avx2(const uint8_t *src, int src_stride, int x_offset, int y_offset, const uint8_t *dst, diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_sse2.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_sse2.c index 8d8bf183b28..a2a13a68b67 100644 --- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_sse2.c +++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_sse2.c @@ -8,16 +8,18 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include <assert.h> #include <emmintrin.h> // SSE2 #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" - #include "vpx_ports/mem.h" -typedef void (*getNxMvar_fn_t)(const unsigned char *src, int src_stride, - const unsigned char *ref, int ref_stride, - unsigned int *sse, int *sum); +static INLINE unsigned int add32x4_sse2(__m128i val) { + val = _mm_add_epi32(val, _mm_srli_si128(val, 8)); + val = _mm_add_epi32(val, _mm_srli_si128(val, 4)); + return _mm_cvtsi128_si32(val); +} unsigned int vpx_get_mb_ss_sse2(const int16_t *src) { __m128i vsum = _mm_setzero_si128(); @@ -29,254 +31,360 @@ unsigned int vpx_get_mb_ss_sse2(const int16_t *src) { src += 8; } - vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 8)); - vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 4)); - return _mm_cvtsi128_si32(vsum); + return add32x4_sse2(vsum); } -#define READ64(p, stride, i) \ - _mm_unpacklo_epi8( \ - _mm_cvtsi32_si128(*(const uint32_t *)(p + i * stride)), \ - _mm_cvtsi32_si128(*(const uint32_t *)(p + (i + 1) * stride))) +static INLINE __m128i load4x2_sse2(const uint8_t *const p, const int stride) { + const __m128i p0 = _mm_cvtsi32_si128(*(const uint32_t *)(p + 0 * stride)); + const __m128i p1 = _mm_cvtsi32_si128(*(const uint32_t *)(p + 1 * stride)); + const __m128i p01 = _mm_unpacklo_epi32(p0, p1); + return _mm_unpacklo_epi8(p01, _mm_setzero_si128()); +} + +static INLINE void variance_kernel_sse2(const __m128i src, const __m128i ref, + __m128i *const sse, + __m128i *const sum) { + const __m128i diff = _mm_sub_epi16(src, ref); + *sse = _mm_add_epi32(*sse, _mm_madd_epi16(diff, diff)); + *sum = _mm_add_epi16(*sum, diff); +} + +// Can handle 128 pixels' diff sum (such as 8x16 or 16x8) +// Slightly faster than variance_final_256_pel_sse2() +static INLINE void variance_final_128_pel_sse2(__m128i vsse, __m128i vsum, + unsigned int *const sse, + int *const sum) { + *sse = add32x4_sse2(vsse); -static void get4x4var_sse2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, - unsigned int *sse, int *sum) { - const __m128i zero = _mm_setzero_si128(); - const __m128i src0 = _mm_unpacklo_epi8(READ64(src, src_stride, 0), zero); - const __m128i src1 = _mm_unpacklo_epi8(READ64(src, src_stride, 2), zero); - const __m128i ref0 = _mm_unpacklo_epi8(READ64(ref, ref_stride, 0), zero); - const __m128i ref1 = _mm_unpacklo_epi8(READ64(ref, ref_stride, 2), zero); - const __m128i diff0 = _mm_sub_epi16(src0, ref0); - const __m128i diff1 = _mm_sub_epi16(src1, ref1); - - // sum - __m128i vsum = _mm_add_epi16(diff0, diff1); vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8)); vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4)); vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 2)); *sum = (int16_t)_mm_extract_epi16(vsum, 0); +} + +// Can handle 256 pixels' diff sum (such as 16x16) +static INLINE void variance_final_256_pel_sse2(__m128i vsse, __m128i vsum, + unsigned int *const sse, + int *const sum) { + *sse = add32x4_sse2(vsse); - // sse - vsum = - _mm_add_epi32(_mm_madd_epi16(diff0, diff0), _mm_madd_epi16(diff1, diff1)); - vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 8)); - vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 4)); - *sse = _mm_cvtsi128_si32(vsum); + vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8)); + vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4)); + *sum = (int16_t)_mm_extract_epi16(vsum, 0); + *sum += (int16_t)_mm_extract_epi16(vsum, 1); } -void vpx_get8x8var_sse2(const uint8_t *src, int src_stride, const uint8_t *ref, - int ref_stride, unsigned int *sse, int *sum) { - const __m128i zero = _mm_setzero_si128(); - __m128i vsum = _mm_setzero_si128(); - __m128i vsse = _mm_setzero_si128(); +// Can handle 512 pixels' diff sum (such as 16x32 or 32x16) +static INLINE void variance_final_512_pel_sse2(__m128i vsse, __m128i vsum, + unsigned int *const sse, + int *const sum) { + *sse = add32x4_sse2(vsse); + + vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8)); + vsum = _mm_unpacklo_epi16(vsum, vsum); + vsum = _mm_srai_epi32(vsum, 16); + *sum = add32x4_sse2(vsum); +} + +static INLINE __m128i sum_to_32bit_sse2(const __m128i sum) { + const __m128i sum_lo = _mm_srai_epi32(_mm_unpacklo_epi16(sum, sum), 16); + const __m128i sum_hi = _mm_srai_epi32(_mm_unpackhi_epi16(sum, sum), 16); + return _mm_add_epi32(sum_lo, sum_hi); +} + +// Can handle 1024 pixels' diff sum (such as 32x32) +static INLINE int sum_final_sse2(const __m128i sum) { + const __m128i t = sum_to_32bit_sse2(sum); + return add32x4_sse2(t); +} + +static INLINE void variance4_sse2(const uint8_t *src, const int src_stride, + const uint8_t *ref, const int ref_stride, + const int h, __m128i *const sse, + __m128i *const sum) { int i; - for (i = 0; i < 8; i += 2) { - const __m128i src0 = _mm_unpacklo_epi8( - _mm_loadl_epi64((const __m128i *)(src + i * src_stride)), zero); - const __m128i ref0 = _mm_unpacklo_epi8( - _mm_loadl_epi64((const __m128i *)(ref + i * ref_stride)), zero); - const __m128i diff0 = _mm_sub_epi16(src0, ref0); - - const __m128i src1 = _mm_unpacklo_epi8( - _mm_loadl_epi64((const __m128i *)(src + (i + 1) * src_stride)), zero); - const __m128i ref1 = _mm_unpacklo_epi8( - _mm_loadl_epi64((const __m128i *)(ref + (i + 1) * ref_stride)), zero); - const __m128i diff1 = _mm_sub_epi16(src1, ref1); - - vsum = _mm_add_epi16(vsum, diff0); - vsum = _mm_add_epi16(vsum, diff1); - vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff0, diff0)); - vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff1, diff1)); + assert(h <= 256); // May overflow for larger height. + *sse = _mm_setzero_si128(); + *sum = _mm_setzero_si128(); + + for (i = 0; i < h; i += 2) { + const __m128i s = load4x2_sse2(src, src_stride); + const __m128i r = load4x2_sse2(ref, ref_stride); + + variance_kernel_sse2(s, r, sse, sum); + src += 2 * src_stride; + ref += 2 * ref_stride; } +} - // sum - vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8)); - vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4)); - vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 2)); - *sum = (int16_t)_mm_extract_epi16(vsum, 0); +static INLINE void variance8_sse2(const uint8_t *src, const int src_stride, + const uint8_t *ref, const int ref_stride, + const int h, __m128i *const sse, + __m128i *const sum) { + const __m128i zero = _mm_setzero_si128(); + int i; - // sse - vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 8)); - vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 4)); - *sse = _mm_cvtsi128_si32(vsse); + assert(h <= 128); // May overflow for larger height. + *sse = _mm_setzero_si128(); + *sum = _mm_setzero_si128(); + + for (i = 0; i < h; i++) { + const __m128i s = + _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)src), zero); + const __m128i r = + _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)ref), zero); + + variance_kernel_sse2(s, r, sse, sum); + src += src_stride; + ref += ref_stride; + } } -void vpx_get16x16var_sse2(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, unsigned int *sse, - int *sum) { +static INLINE void variance16_kernel_sse2(const uint8_t *const src, + const uint8_t *const ref, + __m128i *const sse, + __m128i *const sum) { const __m128i zero = _mm_setzero_si128(); - __m128i vsum = _mm_setzero_si128(); - __m128i vsse = _mm_setzero_si128(); + const __m128i s = _mm_loadu_si128((const __m128i *)src); + const __m128i r = _mm_loadu_si128((const __m128i *)ref); + const __m128i src0 = _mm_unpacklo_epi8(s, zero); + const __m128i ref0 = _mm_unpacklo_epi8(r, zero); + const __m128i src1 = _mm_unpackhi_epi8(s, zero); + const __m128i ref1 = _mm_unpackhi_epi8(r, zero); + + variance_kernel_sse2(src0, ref0, sse, sum); + variance_kernel_sse2(src1, ref1, sse, sum); +} + +static INLINE void variance16_sse2(const uint8_t *src, const int src_stride, + const uint8_t *ref, const int ref_stride, + const int h, __m128i *const sse, + __m128i *const sum) { int i; - for (i = 0; i < 16; ++i) { - const __m128i s = _mm_loadu_si128((const __m128i *)src); - const __m128i r = _mm_loadu_si128((const __m128i *)ref); + assert(h <= 64); // May overflow for larger height. + *sse = _mm_setzero_si128(); + *sum = _mm_setzero_si128(); - const __m128i src0 = _mm_unpacklo_epi8(s, zero); - const __m128i ref0 = _mm_unpacklo_epi8(r, zero); - const __m128i diff0 = _mm_sub_epi16(src0, ref0); + for (i = 0; i < h; ++i) { + variance16_kernel_sse2(src, ref, sse, sum); + src += src_stride; + ref += ref_stride; + } +} - const __m128i src1 = _mm_unpackhi_epi8(s, zero); - const __m128i ref1 = _mm_unpackhi_epi8(r, zero); - const __m128i diff1 = _mm_sub_epi16(src1, ref1); +static INLINE void variance32_sse2(const uint8_t *src, const int src_stride, + const uint8_t *ref, const int ref_stride, + const int h, __m128i *const sse, + __m128i *const sum) { + int i; - vsum = _mm_add_epi16(vsum, diff0); - vsum = _mm_add_epi16(vsum, diff1); - vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff0, diff0)); - vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff1, diff1)); + assert(h <= 32); // May overflow for larger height. + // Don't initialize sse here since it's an accumulation. + *sum = _mm_setzero_si128(); + for (i = 0; i < h; ++i) { + variance16_kernel_sse2(src + 0, ref + 0, sse, sum); + variance16_kernel_sse2(src + 16, ref + 16, sse, sum); src += src_stride; ref += ref_stride; } +} - // sum - vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8)); - vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4)); - *sum = - (int16_t)_mm_extract_epi16(vsum, 0) + (int16_t)_mm_extract_epi16(vsum, 1); - - // sse - vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 8)); - vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 4)); - *sse = _mm_cvtsi128_si32(vsse); -} - -static void variance_sse2(const unsigned char *src, int src_stride, - const unsigned char *ref, int ref_stride, int w, - int h, unsigned int *sse, int *sum, - getNxMvar_fn_t var_fn, int block_size) { - int i, j; - - *sse = 0; - *sum = 0; - - for (i = 0; i < h; i += block_size) { - for (j = 0; j < w; j += block_size) { - unsigned int sse0; - int sum0; - var_fn(src + src_stride * i + j, src_stride, ref + ref_stride * i + j, - ref_stride, &sse0, &sum0); - *sse += sse0; - *sum += sum0; - } +static INLINE void variance64_sse2(const uint8_t *src, const int src_stride, + const uint8_t *ref, const int ref_stride, + const int h, __m128i *const sse, + __m128i *const sum) { + int i; + + assert(h <= 16); // May overflow for larger height. + // Don't initialize sse here since it's an accumulation. + *sum = _mm_setzero_si128(); + + for (i = 0; i < h; ++i) { + variance16_kernel_sse2(src + 0, ref + 0, sse, sum); + variance16_kernel_sse2(src + 16, ref + 16, sse, sum); + variance16_kernel_sse2(src + 32, ref + 32, sse, sum); + variance16_kernel_sse2(src + 48, ref + 48, sse, sum); + src += src_stride; + ref += ref_stride; } } -unsigned int vpx_variance4x4_sse2(const unsigned char *src, int src_stride, - const unsigned char *ref, int ref_stride, +void vpx_get8x8var_sse2(const uint8_t *src, int src_stride, const uint8_t *ref, + int ref_stride, unsigned int *sse, int *sum) { + __m128i vsse, vsum; + variance8_sse2(src, src_stride, ref, ref_stride, 8, &vsse, &vsum); + variance_final_128_pel_sse2(vsse, vsum, sse, sum); +} + +void vpx_get16x16var_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, unsigned int *sse, + int *sum) { + __m128i vsse, vsum; + variance16_sse2(src, src_stride, ref, ref_stride, 16, &vsse, &vsum); + variance_final_256_pel_sse2(vsse, vsum, sse, sum); +} + +unsigned int vpx_variance4x4_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, unsigned int *sse) { + __m128i vsse, vsum; int sum; - get4x4var_sse2(src, src_stride, ref, ref_stride, sse, &sum); + variance4_sse2(src, src_stride, ref, ref_stride, 4, &vsse, &vsum); + variance_final_128_pel_sse2(vsse, vsum, sse, &sum); return *sse - ((sum * sum) >> 4); } -unsigned int vpx_variance8x4_sse2(const uint8_t *src, int src_stride, +unsigned int vpx_variance4x8_sse2(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, unsigned int *sse) { + __m128i vsse, vsum; int sum; - variance_sse2(src, src_stride, ref, ref_stride, 8, 4, sse, &sum, - get4x4var_sse2, 4); + variance4_sse2(src, src_stride, ref, ref_stride, 8, &vsse, &vsum); + variance_final_128_pel_sse2(vsse, vsum, sse, &sum); return *sse - ((sum * sum) >> 5); } -unsigned int vpx_variance4x8_sse2(const uint8_t *src, int src_stride, +unsigned int vpx_variance8x4_sse2(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, unsigned int *sse) { + __m128i vsse, vsum; int sum; - variance_sse2(src, src_stride, ref, ref_stride, 4, 8, sse, &sum, - get4x4var_sse2, 4); + variance8_sse2(src, src_stride, ref, ref_stride, 4, &vsse, &vsum); + variance_final_128_pel_sse2(vsse, vsum, sse, &sum); return *sse - ((sum * sum) >> 5); } -unsigned int vpx_variance8x8_sse2(const unsigned char *src, int src_stride, - const unsigned char *ref, int ref_stride, +unsigned int vpx_variance8x8_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, unsigned int *sse) { + __m128i vsse, vsum; int sum; - vpx_get8x8var_sse2(src, src_stride, ref, ref_stride, sse, &sum); + variance8_sse2(src, src_stride, ref, ref_stride, 8, &vsse, &vsum); + variance_final_128_pel_sse2(vsse, vsum, sse, &sum); return *sse - ((sum * sum) >> 6); } -unsigned int vpx_variance16x8_sse2(const unsigned char *src, int src_stride, - const unsigned char *ref, int ref_stride, +unsigned int vpx_variance8x16_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, unsigned int *sse) { + __m128i vsse, vsum; int sum; - variance_sse2(src, src_stride, ref, ref_stride, 16, 8, sse, &sum, - vpx_get8x8var_sse2, 8); + variance8_sse2(src, src_stride, ref, ref_stride, 16, &vsse, &vsum); + variance_final_128_pel_sse2(vsse, vsum, sse, &sum); return *sse - ((sum * sum) >> 7); } -unsigned int vpx_variance8x16_sse2(const unsigned char *src, int src_stride, - const unsigned char *ref, int ref_stride, +unsigned int vpx_variance16x8_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, unsigned int *sse) { + __m128i vsse, vsum; int sum; - variance_sse2(src, src_stride, ref, ref_stride, 8, 16, sse, &sum, - vpx_get8x8var_sse2, 8); + variance16_sse2(src, src_stride, ref, ref_stride, 8, &vsse, &vsum); + variance_final_128_pel_sse2(vsse, vsum, sse, &sum); return *sse - ((sum * sum) >> 7); } -unsigned int vpx_variance16x16_sse2(const unsigned char *src, int src_stride, - const unsigned char *ref, int ref_stride, +unsigned int vpx_variance16x16_sse2(const uint8_t *src, int src_stride, + const uint8_t *ref, int ref_stride, unsigned int *sse) { + __m128i vsse, vsum; int sum; - vpx_get16x16var_sse2(src, src_stride, ref, ref_stride, sse, &sum); + variance16_sse2(src, src_stride, ref, ref_stride, 16, &vsse, &vsum); + variance_final_256_pel_sse2(vsse, vsum, sse, &sum); return *sse - (uint32_t)(((int64_t)sum * sum) >> 8); } -unsigned int vpx_variance32x32_sse2(const uint8_t *src, int src_stride, +unsigned int vpx_variance16x32_sse2(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, unsigned int *sse) { + __m128i vsse, vsum; int sum; - variance_sse2(src, src_stride, ref, ref_stride, 32, 32, sse, &sum, - vpx_get16x16var_sse2, 16); - return *sse - (unsigned int)(((int64_t)sum * sum) >> 10); + variance16_sse2(src, src_stride, ref, ref_stride, 32, &vsse, &vsum); + variance_final_512_pel_sse2(vsse, vsum, sse, &sum); + return *sse - (unsigned int)(((int64_t)sum * sum) >> 9); } unsigned int vpx_variance32x16_sse2(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, unsigned int *sse) { + __m128i vsse = _mm_setzero_si128(); + __m128i vsum; int sum; - variance_sse2(src, src_stride, ref, ref_stride, 32, 16, sse, &sum, - vpx_get16x16var_sse2, 16); + variance32_sse2(src, src_stride, ref, ref_stride, 16, &vsse, &vsum); + variance_final_512_pel_sse2(vsse, vsum, sse, &sum); return *sse - (unsigned int)(((int64_t)sum * sum) >> 9); } -unsigned int vpx_variance16x32_sse2(const uint8_t *src, int src_stride, +unsigned int vpx_variance32x32_sse2(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, unsigned int *sse) { + __m128i vsse = _mm_setzero_si128(); + __m128i vsum; int sum; - variance_sse2(src, src_stride, ref, ref_stride, 16, 32, sse, &sum, - vpx_get16x16var_sse2, 16); - return *sse - (unsigned int)(((int64_t)sum * sum) >> 9); + variance32_sse2(src, src_stride, ref, ref_stride, 32, &vsse, &vsum); + *sse = add32x4_sse2(vsse); + sum = sum_final_sse2(vsum); + return *sse - (unsigned int)(((int64_t)sum * sum) >> 10); } -unsigned int vpx_variance64x64_sse2(const uint8_t *src, int src_stride, +unsigned int vpx_variance32x64_sse2(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, unsigned int *sse) { + __m128i vsse = _mm_setzero_si128(); + __m128i vsum = _mm_setzero_si128(); int sum; - variance_sse2(src, src_stride, ref, ref_stride, 64, 64, sse, &sum, - vpx_get16x16var_sse2, 16); - return *sse - (unsigned int)(((int64_t)sum * sum) >> 12); + int i = 0; + + for (i = 0; i < 2; i++) { + __m128i vsum16; + variance32_sse2(src + 32 * i * src_stride, src_stride, + ref + 32 * i * ref_stride, ref_stride, 32, &vsse, &vsum16); + vsum = _mm_add_epi32(vsum, sum_to_32bit_sse2(vsum16)); + } + *sse = add32x4_sse2(vsse); + sum = add32x4_sse2(vsum); + return *sse - (unsigned int)(((int64_t)sum * sum) >> 11); } unsigned int vpx_variance64x32_sse2(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, unsigned int *sse) { + __m128i vsse = _mm_setzero_si128(); + __m128i vsum = _mm_setzero_si128(); int sum; - variance_sse2(src, src_stride, ref, ref_stride, 64, 32, sse, &sum, - vpx_get16x16var_sse2, 16); + int i = 0; + + for (i = 0; i < 2; i++) { + __m128i vsum16; + variance64_sse2(src + 16 * i * src_stride, src_stride, + ref + 16 * i * ref_stride, ref_stride, 16, &vsse, &vsum16); + vsum = _mm_add_epi32(vsum, sum_to_32bit_sse2(vsum16)); + } + *sse = add32x4_sse2(vsse); + sum = add32x4_sse2(vsum); return *sse - (unsigned int)(((int64_t)sum * sum) >> 11); } -unsigned int vpx_variance32x64_sse2(const uint8_t *src, int src_stride, +unsigned int vpx_variance64x64_sse2(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, unsigned int *sse) { + __m128i vsse = _mm_setzero_si128(); + __m128i vsum = _mm_setzero_si128(); int sum; - variance_sse2(src, src_stride, ref, ref_stride, 32, 64, sse, &sum, - vpx_get16x16var_sse2, 16); - return *sse - (unsigned int)(((int64_t)sum * sum) >> 11); + int i = 0; + + for (i = 0; i < 4; i++) { + __m128i vsum16; + variance64_sse2(src + 16 * i * src_stride, src_stride, + ref + 16 * i * ref_stride, ref_stride, 16, &vsse, &vsum16); + vsum = _mm_add_epi32(vsum, sum_to_32bit_sse2(vsum16)); + } + *sse = add32x4_sse2(vsse); + sum = add32x4_sse2(vsum); + return *sse - (unsigned int)(((int64_t)sum * sum) >> 12); } unsigned int vpx_mse8x8_sse2(const uint8_t *src, int src_stride, diff --git a/chromium/third_party/libvpx/source/libvpx/y4menc.c b/chromium/third_party/libvpx/source/libvpx/y4menc.c index 05018dbc433..02b729e5bbb 100644 --- a/chromium/third_party/libvpx/source/libvpx/y4menc.c +++ b/chromium/third_party/libvpx/source/libvpx/y4menc.c @@ -17,11 +17,9 @@ int y4m_write_file_header(char *buf, size_t len, int width, int height, const char *color; switch (bit_depth) { case 8: - color = fmt == VPX_IMG_FMT_444A - ? "C444alpha\n" - : fmt == VPX_IMG_FMT_I444 - ? "C444\n" - : fmt == VPX_IMG_FMT_I422 ? "C422\n" : "C420jpeg\n"; + color = fmt == VPX_IMG_FMT_I444 + ? "C444\n" + : fmt == VPX_IMG_FMT_I422 ? "C422\n" : "C420jpeg\n"; break; case 9: color = fmt == VPX_IMG_FMT_I44416 diff --git a/chromium/third_party/libvpx/source/libvpx/y4minput.c b/chromium/third_party/libvpx/source/libvpx/y4minput.c index 56d5598276f..007bd9971b4 100644 --- a/chromium/third_party/libvpx/source/libvpx/y4minput.c +++ b/chromium/third_party/libvpx/source/libvpx/y4minput.c @@ -1031,30 +1031,6 @@ int y4m_input_open(y4m_input *_y4m, FILE *_fin, char *_skip, int _nskip, fprintf(stderr, "Unsupported conversion from 444p12 to 420jpeg\n"); return -1; } - } else if (strcmp(_y4m->chroma_type, "444alpha") == 0) { - _y4m->src_c_dec_h = 1; - _y4m->src_c_dec_v = 1; - if (only_420) { - _y4m->dst_c_dec_h = 2; - _y4m->dst_c_dec_v = 2; - _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h; - /*Chroma filter required: read into the aux buf first. - We need to make two filter passes, so we need some extra space in the - aux buffer. - The extra plane also gets read into the aux buf. - It will be discarded.*/ - _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 3 * _y4m->pic_w * _y4m->pic_h; - _y4m->convert = y4m_convert_444_420jpeg; - } else { - _y4m->vpx_fmt = VPX_IMG_FMT_444A; - _y4m->bps = 32; - _y4m->dst_c_dec_h = _y4m->src_c_dec_h; - _y4m->dst_c_dec_v = _y4m->src_c_dec_v; - _y4m->dst_buf_read_sz = 4 * _y4m->pic_w * _y4m->pic_h; - /*Natively supported: no conversion required.*/ - _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0; - _y4m->convert = y4m_convert_null; - } } else if (strcmp(_y4m->chroma_type, "mono") == 0) { _y4m->src_c_dec_h = _y4m->src_c_dec_v = 0; _y4m->dst_c_dec_h = _y4m->dst_c_dec_v = 2; |