diff options
author | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2019-05-16 09:59:13 +0200 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2019-05-20 10:28:53 +0000 |
commit | 6c11fb357ec39bf087b8b632e2b1e375aef1b38b (patch) | |
tree | c8315530db18a8ee566521c39ab8a6af4f72bc03 /chromium/third_party/libvpx | |
parent | 3ffaed019d0772e59d6cdb2d0d32fe4834c31f72 (diff) | |
download | qtwebengine-chromium-6c11fb357ec39bf087b8b632e2b1e375aef1b38b.tar.gz |
BASELINE: Update Chromium to 74.0.3729.159
Change-Id: I8d2497da544c275415aedd94dd25328d555de811
Reviewed-by: Michael BrĂ¼ning <michael.bruning@qt.io>
Diffstat (limited to 'chromium/third_party/libvpx')
105 files changed, 4093 insertions, 2078 deletions
diff --git a/chromium/third_party/libvpx/BUILD.gn b/chromium/third_party/libvpx/BUILD.gn index 764ead5b356..4a6d99b26b6 100644 --- a/chromium/third_party/libvpx/BUILD.gn +++ b/chromium/third_party/libvpx/BUILD.gn @@ -335,7 +335,7 @@ static_library("libvpx") { sources = libvpx_srcs_arm } } else if (current_cpu == "arm64") { - if (is_chromeos) { + if (is_chromeos || is_win) { sources = libvpx_srcs_arm64_highbd } else { sources = libvpx_srcs_arm64 diff --git a/chromium/third_party/libvpx/README.chromium b/chromium/third_party/libvpx/README.chromium index 79996f48cc6..331b7f0917f 100644 --- a/chromium/third_party/libvpx/README.chromium +++ b/chromium/third_party/libvpx/README.chromium @@ -1,13 +1,13 @@ Name: libvpx URL: http://www.webmproject.org -Version: v1.7.0 +Version: v1.8.0 License: BSD License File: source/libvpx/LICENSE Security Critical: yes -Date: Wednesday January 16 2019 -Branch: master -Commit: 9ecc0e779a29281e5698451bfd1b3ebe8f053bfd +Date: Monday April 01 2019 +Branch: m74-3729 +Commit: e178ce25958b765ca67e72093d3248d4cbae7967 Description: Contains the sources used to compile libvpx binaries used by Google Chrome and diff --git a/chromium/third_party/libvpx/generate_gni.sh b/chromium/third_party/libvpx/generate_gni.sh index 8c0ac5c5418..2d6c8154e81 100755 --- a/chromium/third_party/libvpx/generate_gni.sh +++ b/chromium/third_party/libvpx/generate_gni.sh @@ -481,6 +481,8 @@ if [ -z $ONLY_CONFIGS ]; then make libvpx_srcs.txt target=libs $config > /dev/null convert_srcs_to_project_files libvpx_srcs.txt libvpx_srcs_arm64_highbd + echo "ARM64 Windows uses the ARM64 Linux HighBD source list. No need to generate it." + echo "Generate MIPS source list." config=$(print_config_basic linux/mipsel) make_clean diff --git a/chromium/third_party/libvpx/libvpx_srcs.gni b/chromium/third_party/libvpx/libvpx_srcs.gni index 1b76cc9bb4e..13614df8e3b 100644 --- a/chromium/third_party/libvpx/libvpx_srcs.gni +++ b/chromium/third_party/libvpx/libvpx_srcs.gni @@ -188,6 +188,8 @@ libvpx_srcs_x86 = [ "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_detokenize.h", "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_dsubexp.c", "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_dsubexp.h", + "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_job_queue.c", + "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_job_queue.h", "//third_party/libvpx/source/libvpx/vp9/encoder/vp9_alt_ref_aq.c", "//third_party/libvpx/source/libvpx/vp9/encoder/vp9_alt_ref_aq.h", "//third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_360.c", @@ -442,7 +444,8 @@ libvpx_srcs_x86_sse2 = [ "//third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_sse2.c", "//third_party/libvpx/source/libvpx/vpx_dsp/x86/vpx_subpixel_4t_intrin_sse2.c", ] -libvpx_srcs_x86_sse3 = [] +libvpx_srcs_x86_sse3 = [ +] libvpx_srcs_x86_ssse3 = [ "//third_party/libvpx/source/libvpx/vp8/encoder/x86/vp8_quantize_ssse3.c", "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c", @@ -478,7 +481,8 @@ libvpx_srcs_x86_avx2 = [ "//third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_avx2.c", "//third_party/libvpx/source/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c", ] -libvpx_srcs_x86_avx512 = [] +libvpx_srcs_x86_avx512 = [ +] libvpx_srcs_x86_64 = [ "//third_party/libvpx/source/libvpx/vp8/common/alloccommon.c", "//third_party/libvpx/source/libvpx/vp8/common/alloccommon.h", @@ -664,6 +668,8 @@ libvpx_srcs_x86_64 = [ "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_detokenize.h", "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_dsubexp.c", "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_dsubexp.h", + "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_job_queue.c", + "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_job_queue.h", "//third_party/libvpx/source/libvpx/vp9/encoder/vp9_alt_ref_aq.c", "//third_party/libvpx/source/libvpx/vp9/encoder/vp9_alt_ref_aq.h", "//third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_360.c", @@ -891,8 +897,9 @@ libvpx_srcs_x86_64_assembly = [ "//third_party/libvpx/source/libvpx/vpx_ports/emms_mmx.asm", "//third_party/libvpx/source/libvpx/vpx_ports/float_control_word.asm", ] -libvpx_srcs_x86_64_mmx = - [ "//third_party/libvpx/source/libvpx/vp8/common/x86/idct_blk_mmx.c" ] +libvpx_srcs_x86_64_mmx = [ + "//third_party/libvpx/source/libvpx/vp8/common/x86/idct_blk_mmx.c", +] libvpx_srcs_x86_64_sse2 = [ "//third_party/libvpx/source/libvpx/vp8/common/x86/bilinear_filter_sse2.c", "//third_party/libvpx/source/libvpx/vp8/common/x86/idct_blk_sse2.c", @@ -923,7 +930,8 @@ libvpx_srcs_x86_64_sse2 = [ "//third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_sse2.c", "//third_party/libvpx/source/libvpx/vpx_dsp/x86/vpx_subpixel_4t_intrin_sse2.c", ] -libvpx_srcs_x86_64_sse3 = [] +libvpx_srcs_x86_64_sse3 = [ +] libvpx_srcs_x86_64_ssse3 = [ "//third_party/libvpx/source/libvpx/vp8/encoder/x86/vp8_quantize_ssse3.c", "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c", @@ -959,7 +967,8 @@ libvpx_srcs_x86_64_avx2 = [ "//third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_avx2.c", "//third_party/libvpx/source/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c", ] -libvpx_srcs_x86_64_avx512 = [] +libvpx_srcs_x86_64_avx512 = [ +] libvpx_srcs_arm = [ "//third_party/libvpx/source/libvpx/vp8/common/alloccommon.c", "//third_party/libvpx/source/libvpx/vp8/common/alloccommon.h", @@ -1143,6 +1152,8 @@ libvpx_srcs_arm = [ "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_detokenize.h", "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_dsubexp.c", "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_dsubexp.h", + "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_job_queue.c", + "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_job_queue.h", "//third_party/libvpx/source/libvpx/vp9/encoder/vp9_alt_ref_aq.c", "//third_party/libvpx/source/libvpx/vp9/encoder/vp9_alt_ref_aq.h", "//third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_360.c", @@ -1300,7 +1311,8 @@ libvpx_srcs_arm = [ "//third_party/libvpx/source/libvpx/vpx_util/vpx_write_yuv_frame.c", "//third_party/libvpx/source/libvpx/vpx_util/vpx_write_yuv_frame.h", ] -libvpx_srcs_arm_assembly = [] +libvpx_srcs_arm_assembly = [ +] libvpx_srcs_arm_neon = [ "//third_party/libvpx/source/libvpx/vp8/common/alloccommon.c", "//third_party/libvpx/source/libvpx/vp8/common/alloccommon.h", @@ -1507,6 +1519,8 @@ libvpx_srcs_arm_neon = [ "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_detokenize.h", "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_dsubexp.c", "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_dsubexp.h", + "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_job_queue.c", + "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_job_queue.h", "//third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_dct_neon.c", "//third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_denoiser_neon.c", "//third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_error_neon.c", @@ -1909,6 +1923,8 @@ libvpx_srcs_arm_neon_cpu_detect = [ "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_detokenize.h", "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_dsubexp.c", "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_dsubexp.h", + "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_job_queue.c", + "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_job_queue.h", "//third_party/libvpx/source/libvpx/vp9/encoder/vp9_alt_ref_aq.c", "//third_party/libvpx/source/libvpx/vp9/encoder/vp9_alt_ref_aq.h", "//third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_360.c", @@ -2352,6 +2368,8 @@ libvpx_srcs_arm64 = [ "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_detokenize.h", "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_dsubexp.c", "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_dsubexp.h", + "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_job_queue.c", + "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_job_queue.h", "//third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_dct_neon.c", "//third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_denoiser_neon.c", "//third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_error_neon.c", @@ -2552,7 +2570,8 @@ libvpx_srcs_arm64 = [ "//third_party/libvpx/source/libvpx/vpx_util/vpx_write_yuv_frame.c", "//third_party/libvpx/source/libvpx/vpx_util/vpx_write_yuv_frame.h", ] -libvpx_srcs_arm64_assembly = [] +libvpx_srcs_arm64_assembly = [ +] libvpx_srcs_arm_neon_highbd = [ "//third_party/libvpx/source/libvpx/vp8/common/alloccommon.c", "//third_party/libvpx/source/libvpx/vp8/common/alloccommon.h", @@ -2762,6 +2781,8 @@ libvpx_srcs_arm_neon_highbd = [ "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_detokenize.h", "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_dsubexp.c", "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_dsubexp.h", + "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_job_queue.c", + "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_job_queue.h", "//third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_dct_neon.c", "//third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_denoiser_neon.c", "//third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_frame_scale_neon.c", @@ -3200,6 +3221,8 @@ libvpx_srcs_arm64_highbd = [ "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_detokenize.h", "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_dsubexp.c", "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_dsubexp.h", + "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_job_queue.c", + "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_job_queue.h", "//third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_dct_neon.c", "//third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_denoiser_neon.c", "//third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_frame_scale_neon.c", @@ -3413,7 +3436,8 @@ libvpx_srcs_arm64_highbd = [ "//third_party/libvpx/source/libvpx/vpx_util/vpx_write_yuv_frame.c", "//third_party/libvpx/source/libvpx/vpx_util/vpx_write_yuv_frame.h", ] -libvpx_srcs_arm64_highbd_assembly = [] +libvpx_srcs_arm64_highbd_assembly = [ +] libvpx_srcs_mips = [ "//third_party/libvpx/source/libvpx/vp8/common/alloccommon.c", "//third_party/libvpx/source/libvpx/vp8/common/alloccommon.h", @@ -3597,6 +3621,8 @@ libvpx_srcs_mips = [ "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_detokenize.h", "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_dsubexp.c", "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_dsubexp.h", + "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_job_queue.c", + "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_job_queue.h", "//third_party/libvpx/source/libvpx/vp9/encoder/vp9_alt_ref_aq.c", "//third_party/libvpx/source/libvpx/vp9/encoder/vp9_alt_ref_aq.h", "//third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_360.c", @@ -3753,7 +3779,8 @@ libvpx_srcs_mips = [ "//third_party/libvpx/source/libvpx/vpx_util/vpx_write_yuv_frame.c", "//third_party/libvpx/source/libvpx/vpx_util/vpx_write_yuv_frame.h", ] -libvpx_srcs_mips_assembly = [] +libvpx_srcs_mips_assembly = [ +] libvpx_srcs_nacl = [ "//third_party/libvpx/source/libvpx/vp8/common/alloccommon.c", "//third_party/libvpx/source/libvpx/vp8/common/alloccommon.h", @@ -3937,6 +3964,8 @@ libvpx_srcs_nacl = [ "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_detokenize.h", "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_dsubexp.c", "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_dsubexp.h", + "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_job_queue.c", + "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_job_queue.h", "//third_party/libvpx/source/libvpx/vp9/encoder/vp9_alt_ref_aq.c", "//third_party/libvpx/source/libvpx/vp9/encoder/vp9_alt_ref_aq.h", "//third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_360.c", @@ -4092,7 +4121,8 @@ libvpx_srcs_nacl = [ "//third_party/libvpx/source/libvpx/vpx_util/vpx_write_yuv_frame.c", "//third_party/libvpx/source/libvpx/vpx_util/vpx_write_yuv_frame.h", ] -libvpx_srcs_nacl_assembly = [] +libvpx_srcs_nacl_assembly = [ +] libvpx_srcs_generic = [ "//third_party/libvpx/source/libvpx/vp8/common/alloccommon.c", "//third_party/libvpx/source/libvpx/vp8/common/alloccommon.h", @@ -4276,6 +4306,8 @@ libvpx_srcs_generic = [ "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_detokenize.h", "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_dsubexp.c", "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_dsubexp.h", + "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_job_queue.c", + "//third_party/libvpx/source/libvpx/vp9/decoder/vp9_job_queue.h", "//third_party/libvpx/source/libvpx/vp9/encoder/vp9_alt_ref_aq.c", "//third_party/libvpx/source/libvpx/vp9/encoder/vp9_alt_ref_aq.h", "//third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_360.c", @@ -4431,4 +4463,5 @@ libvpx_srcs_generic = [ "//third_party/libvpx/source/libvpx/vpx_util/vpx_write_yuv_frame.c", "//third_party/libvpx/source/libvpx/vpx_util/vpx_write_yuv_frame.h", ] -libvpx_srcs_generic_assembly = [] +libvpx_srcs_generic_assembly = [ +] diff --git a/chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_config.asm b/chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_config.asm index beb3351d00d..7a7f7fb2f44 100644 --- a/chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_config.asm +++ b/chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_config.asm @@ -3,7 +3,6 @@ .set WIDE_REFERENCE, 0 .set ARCHITECTURE, 5 - .set DO1STROUNDING, 0 .syntax unified .set ARCH_ARM , 1 .set ARCH_MIPS , 0 @@ -89,6 +88,5 @@ .set CONFIG_FP_MB_STATS , 0 .set CONFIG_EMULATE_HARDWARE , 0 .set CONFIG_NON_GREEDY_MV , 0 -.set CONFIG_ML_VAR_PARTITION , 0 .set DECODE_WIDTH_LIMIT , 16384 .set DECODE_HEIGHT_LIMIT , 16384 diff --git a/chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_config.h b/chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_config.h index 7c92a2269b9..c19d1d90a3a 100644 --- a/chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/ios/arm-neon/vpx_config.h @@ -94,7 +94,6 @@ #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 #define CONFIG_NON_GREEDY_MV 0 -#define CONFIG_ML_VAR_PARTITION 0 #define DECODE_WIDTH_LIMIT 16384 #define DECODE_HEIGHT_LIMIT 16384 #endif /* VPX_CONFIG_H */ diff --git a/chromium/third_party/libvpx/source/config/ios/arm64/vpx_config.asm b/chromium/third_party/libvpx/source/config/ios/arm64/vpx_config.asm index 2a02bb3a236..c4860ad7d6e 100644 --- a/chromium/third_party/libvpx/source/config/ios/arm64/vpx_config.asm +++ b/chromium/third_party/libvpx/source/config/ios/arm64/vpx_config.asm @@ -3,7 +3,6 @@ .set WIDE_REFERENCE, 0 .set ARCHITECTURE, 5 - .set DO1STROUNDING, 0 .syntax unified .set ARCH_ARM , 1 .set ARCH_MIPS , 0 @@ -89,6 +88,5 @@ .set CONFIG_FP_MB_STATS , 0 .set CONFIG_EMULATE_HARDWARE , 0 .set CONFIG_NON_GREEDY_MV , 0 -.set CONFIG_ML_VAR_PARTITION , 0 .set DECODE_WIDTH_LIMIT , 16384 .set DECODE_HEIGHT_LIMIT , 16384 diff --git a/chromium/third_party/libvpx/source/config/ios/arm64/vpx_config.h b/chromium/third_party/libvpx/source/config/ios/arm64/vpx_config.h index 3437c03c2d6..e4de0be8381 100644 --- a/chromium/third_party/libvpx/source/config/ios/arm64/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/ios/arm64/vpx_config.h @@ -94,7 +94,6 @@ #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 #define CONFIG_NON_GREEDY_MV 0 -#define CONFIG_ML_VAR_PARTITION 0 #define DECODE_WIDTH_LIMIT 16384 #define DECODE_HEIGHT_LIMIT 16384 #endif /* VPX_CONFIG_H */ diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.asm index 1733c41a544..0d29fb529db 100644 --- a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.asm +++ b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.asm @@ -1,6 +1,5 @@ @ This file was created from a .asm file @ using the ads2gas.pl script. - .equ DO1STROUNDING, 0 .syntax unified .equ ARCH_ARM , 1 .equ ARCH_MIPS , 0 @@ -86,7 +85,6 @@ .equ CONFIG_FP_MB_STATS , 0 .equ CONFIG_EMULATE_HARDWARE , 0 .equ CONFIG_NON_GREEDY_MV , 0 -.equ CONFIG_ML_VAR_PARTITION , 0 .equ DECODE_WIDTH_LIMIT , 16384 .equ DECODE_HEIGHT_LIMIT , 16384 .section .note.GNU-stack,"",%progbits diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.h index 1af5ae8954c..58b42323255 100644 --- a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.h @@ -94,7 +94,6 @@ #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 #define CONFIG_NON_GREEDY_MV 0 -#define CONFIG_ML_VAR_PARTITION 0 #define DECODE_WIDTH_LIMIT 16384 #define DECODE_HEIGHT_LIMIT 16384 #endif /* VPX_CONFIG_H */ diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon-highbd/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/arm-neon-highbd/vpx_config.asm index 75e9f0fccf1..f7a28f844a8 100644 --- a/chromium/third_party/libvpx/source/config/linux/arm-neon-highbd/vpx_config.asm +++ b/chromium/third_party/libvpx/source/config/linux/arm-neon-highbd/vpx_config.asm @@ -1,6 +1,5 @@ @ This file was created from a .asm file @ using the ads2gas.pl script. - .equ DO1STROUNDING, 0 .syntax unified .equ ARCH_ARM , 1 .equ ARCH_MIPS , 0 @@ -86,7 +85,6 @@ .equ CONFIG_FP_MB_STATS , 0 .equ CONFIG_EMULATE_HARDWARE , 0 .equ CONFIG_NON_GREEDY_MV , 0 -.equ CONFIG_ML_VAR_PARTITION , 0 .equ DECODE_WIDTH_LIMIT , 16384 .equ DECODE_HEIGHT_LIMIT , 16384 .section .note.GNU-stack,"",%progbits diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon-highbd/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/arm-neon-highbd/vpx_config.h index 3e56331e538..5df6afd8cb1 100644 --- a/chromium/third_party/libvpx/source/config/linux/arm-neon-highbd/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/linux/arm-neon-highbd/vpx_config.h @@ -94,7 +94,6 @@ #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 #define CONFIG_NON_GREEDY_MV 0 -#define CONFIG_ML_VAR_PARTITION 0 #define DECODE_WIDTH_LIMIT 16384 #define DECODE_HEIGHT_LIMIT 16384 #endif /* VPX_CONFIG_H */ diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.asm index 9887adfc6a7..64cec8570ec 100644 --- a/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.asm +++ b/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.asm @@ -1,6 +1,5 @@ @ This file was created from a .asm file @ using the ads2gas.pl script. - .equ DO1STROUNDING, 0 .syntax unified .equ ARCH_ARM , 1 .equ ARCH_MIPS , 0 @@ -86,7 +85,6 @@ .equ CONFIG_FP_MB_STATS , 0 .equ CONFIG_EMULATE_HARDWARE , 0 .equ CONFIG_NON_GREEDY_MV , 0 -.equ CONFIG_ML_VAR_PARTITION , 0 .equ DECODE_WIDTH_LIMIT , 16384 .equ DECODE_HEIGHT_LIMIT , 16384 .section .note.GNU-stack,"",%progbits diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.h index 7c92a2269b9..c19d1d90a3a 100644 --- a/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.h @@ -94,7 +94,6 @@ #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 #define CONFIG_NON_GREEDY_MV 0 -#define CONFIG_ML_VAR_PARTITION 0 #define DECODE_WIDTH_LIMIT 16384 #define DECODE_HEIGHT_LIMIT 16384 #endif /* VPX_CONFIG_H */ diff --git a/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.asm index 13377ed6f8b..a13825c7c28 100644 --- a/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.asm +++ b/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.asm @@ -1,6 +1,5 @@ @ This file was created from a .asm file @ using the ads2gas.pl script. - .equ DO1STROUNDING, 0 .syntax unified .equ ARCH_ARM , 1 .equ ARCH_MIPS , 0 @@ -86,7 +85,6 @@ .equ CONFIG_FP_MB_STATS , 0 .equ CONFIG_EMULATE_HARDWARE , 0 .equ CONFIG_NON_GREEDY_MV , 0 -.equ CONFIG_ML_VAR_PARTITION , 0 .equ DECODE_WIDTH_LIMIT , 16384 .equ DECODE_HEIGHT_LIMIT , 16384 .section .note.GNU-stack,"",%progbits diff --git a/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.h index e699eabf404..acc49355eb9 100644 --- a/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.h @@ -94,7 +94,6 @@ #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 #define CONFIG_NON_GREEDY_MV 0 -#define CONFIG_ML_VAR_PARTITION 0 #define DECODE_WIDTH_LIMIT 16384 #define DECODE_HEIGHT_LIMIT 16384 #endif /* VPX_CONFIG_H */ diff --git a/chromium/third_party/libvpx/source/config/linux/arm64-highbd/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/arm64-highbd/vpx_config.asm index 8863b2cbee2..f94c4910c52 100644 --- a/chromium/third_party/libvpx/source/config/linux/arm64-highbd/vpx_config.asm +++ b/chromium/third_party/libvpx/source/config/linux/arm64-highbd/vpx_config.asm @@ -1,6 +1,5 @@ @ This file was created from a .asm file @ using the ads2gas.pl script. - .equ DO1STROUNDING, 0 .syntax unified .equ ARCH_ARM , 1 .equ ARCH_MIPS , 0 @@ -86,7 +85,6 @@ .equ CONFIG_FP_MB_STATS , 0 .equ CONFIG_EMULATE_HARDWARE , 0 .equ CONFIG_NON_GREEDY_MV , 0 -.equ CONFIG_ML_VAR_PARTITION , 0 .equ DECODE_WIDTH_LIMIT , 16384 .equ DECODE_HEIGHT_LIMIT , 16384 .section .note.GNU-stack,"",%progbits diff --git a/chromium/third_party/libvpx/source/config/linux/arm64-highbd/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/arm64-highbd/vpx_config.h index e57b1c42889..1a03c79a33c 100644 --- a/chromium/third_party/libvpx/source/config/linux/arm64-highbd/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/linux/arm64-highbd/vpx_config.h @@ -94,7 +94,6 @@ #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 #define CONFIG_NON_GREEDY_MV 0 -#define CONFIG_ML_VAR_PARTITION 0 #define DECODE_WIDTH_LIMIT 16384 #define DECODE_HEIGHT_LIMIT 16384 #endif /* VPX_CONFIG_H */ diff --git a/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.asm index d4de564055a..559e41d6a99 100644 --- a/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.asm +++ b/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.asm @@ -1,6 +1,5 @@ @ This file was created from a .asm file @ using the ads2gas.pl script. - .equ DO1STROUNDING, 0 .syntax unified .equ ARCH_ARM , 1 .equ ARCH_MIPS , 0 @@ -86,7 +85,6 @@ .equ CONFIG_FP_MB_STATS , 0 .equ CONFIG_EMULATE_HARDWARE , 0 .equ CONFIG_NON_GREEDY_MV , 0 -.equ CONFIG_ML_VAR_PARTITION , 0 .equ DECODE_WIDTH_LIMIT , 16384 .equ DECODE_HEIGHT_LIMIT , 16384 .section .note.GNU-stack,"",%progbits diff --git a/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.h index 3437c03c2d6..e4de0be8381 100644 --- a/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.h @@ -94,7 +94,6 @@ #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 #define CONFIG_NON_GREEDY_MV 0 -#define CONFIG_ML_VAR_PARTITION 0 #define DECODE_WIDTH_LIMIT 16384 #define DECODE_HEIGHT_LIMIT 16384 #endif /* VPX_CONFIG_H */ diff --git a/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.asm index 9bae4f7ce9e..fdb927f255d 100644 --- a/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.asm +++ b/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.asm @@ -1,6 +1,5 @@ @ This file was created from a .asm file @ using the ads2gas.pl script. - .equ DO1STROUNDING, 0 .syntax unified .equ ARCH_ARM , 0 .equ ARCH_MIPS , 0 @@ -86,7 +85,6 @@ .equ CONFIG_FP_MB_STATS , 0 .equ CONFIG_EMULATE_HARDWARE , 0 .equ CONFIG_NON_GREEDY_MV , 0 -.equ CONFIG_ML_VAR_PARTITION , 0 .equ DECODE_WIDTH_LIMIT , 16384 .equ DECODE_HEIGHT_LIMIT , 16384 .section .note.GNU-stack,"",%progbits diff --git a/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.h index 9578b563a5d..abc94bfc69f 100644 --- a/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.h @@ -94,7 +94,6 @@ #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 #define CONFIG_NON_GREEDY_MV 0 -#define CONFIG_ML_VAR_PARTITION 0 #define DECODE_WIDTH_LIMIT 16384 #define DECODE_HEIGHT_LIMIT 16384 #endif /* VPX_CONFIG_H */ diff --git a/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.asm index 57637ae08a6..5569f0582c4 100644 --- a/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.asm +++ b/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.asm @@ -82,6 +82,5 @@ %define CONFIG_FP_MB_STATS 0 %define CONFIG_EMULATE_HARDWARE 0 %define CONFIG_NON_GREEDY_MV 0 -%define CONFIG_ML_VAR_PARTITION 0 %define DECODE_WIDTH_LIMIT 16384 %define DECODE_HEIGHT_LIMIT 16384 diff --git a/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.h index a65ea2d53e2..fff9290cfe5 100644 --- a/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.h @@ -94,7 +94,6 @@ #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 #define CONFIG_NON_GREEDY_MV 0 -#define CONFIG_ML_VAR_PARTITION 0 #define DECODE_WIDTH_LIMIT 16384 #define DECODE_HEIGHT_LIMIT 16384 #endif /* VPX_CONFIG_H */ diff --git a/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_config.h index d79a46a9ea3..7941761ff29 100644 --- a/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_config.h @@ -94,7 +94,6 @@ #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 #define CONFIG_NON_GREEDY_MV 0 -#define CONFIG_ML_VAR_PARTITION 0 #define DECODE_WIDTH_LIMIT 16384 #define DECODE_HEIGHT_LIMIT 16384 #endif /* VPX_CONFIG_H */ diff --git a/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_config.h index 9910a7ff03b..38c667e3639 100644 --- a/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_config.h @@ -94,7 +94,6 @@ #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 #define CONFIG_NON_GREEDY_MV 0 -#define CONFIG_ML_VAR_PARTITION 0 #define DECODE_WIDTH_LIMIT 16384 #define DECODE_HEIGHT_LIMIT 16384 #endif /* VPX_CONFIG_H */ diff --git a/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.asm index 0f7956f63aa..613735d8ea8 100644 --- a/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.asm +++ b/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.asm @@ -82,6 +82,5 @@ %define CONFIG_FP_MB_STATS 0 %define CONFIG_EMULATE_HARDWARE 0 %define CONFIG_NON_GREEDY_MV 0 -%define CONFIG_ML_VAR_PARTITION 0 %define DECODE_WIDTH_LIMIT 16384 %define DECODE_HEIGHT_LIMIT 16384 diff --git a/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.h index 260602b9784..677a9fcf702 100644 --- a/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.h @@ -94,7 +94,6 @@ #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 #define CONFIG_NON_GREEDY_MV 0 -#define CONFIG_ML_VAR_PARTITION 0 #define DECODE_WIDTH_LIMIT 16384 #define DECODE_HEIGHT_LIMIT 16384 #endif /* VPX_CONFIG_H */ diff --git a/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.asm b/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.asm index 57637ae08a6..5569f0582c4 100644 --- a/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.asm +++ b/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.asm @@ -82,6 +82,5 @@ %define CONFIG_FP_MB_STATS 0 %define CONFIG_EMULATE_HARDWARE 0 %define CONFIG_NON_GREEDY_MV 0 -%define CONFIG_ML_VAR_PARTITION 0 %define DECODE_WIDTH_LIMIT 16384 %define DECODE_HEIGHT_LIMIT 16384 diff --git a/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.h b/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.h index a65ea2d53e2..fff9290cfe5 100644 --- a/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.h @@ -94,7 +94,6 @@ #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 #define CONFIG_NON_GREEDY_MV 0 -#define CONFIG_ML_VAR_PARTITION 0 #define DECODE_WIDTH_LIMIT 16384 #define DECODE_HEIGHT_LIMIT 16384 #endif /* VPX_CONFIG_H */ diff --git a/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.asm b/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.asm index 0f7956f63aa..613735d8ea8 100644 --- a/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.asm +++ b/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.asm @@ -82,6 +82,5 @@ %define CONFIG_FP_MB_STATS 0 %define CONFIG_EMULATE_HARDWARE 0 %define CONFIG_NON_GREEDY_MV 0 -%define CONFIG_ML_VAR_PARTITION 0 %define DECODE_WIDTH_LIMIT 16384 %define DECODE_HEIGHT_LIMIT 16384 diff --git a/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.h b/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.h index 260602b9784..677a9fcf702 100644 --- a/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.h @@ -94,7 +94,6 @@ #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 #define CONFIG_NON_GREEDY_MV 0 -#define CONFIG_ML_VAR_PARTITION 0 #define DECODE_WIDTH_LIMIT 16384 #define DECODE_HEIGHT_LIMIT 16384 #endif /* VPX_CONFIG_H */ diff --git a/chromium/third_party/libvpx/source/config/nacl/vpx_config.h b/chromium/third_party/libvpx/source/config/nacl/vpx_config.h index 9578b563a5d..abc94bfc69f 100644 --- a/chromium/third_party/libvpx/source/config/nacl/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/nacl/vpx_config.h @@ -94,7 +94,6 @@ #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 #define CONFIG_NON_GREEDY_MV 0 -#define CONFIG_ML_VAR_PARTITION 0 #define DECODE_WIDTH_LIMIT 16384 #define DECODE_HEIGHT_LIMIT 16384 #endif /* VPX_CONFIG_H */ diff --git a/chromium/third_party/libvpx/source/config/vpx_version.h b/chromium/third_party/libvpx/source/config/vpx_version.h index 5e0b5830767..a36787a57e6 100644 --- a/chromium/third_party/libvpx/source/config/vpx_version.h +++ b/chromium/third_party/libvpx/source/config/vpx_version.h @@ -1,8 +1,8 @@ // This file is generated. Do not edit. #define VERSION_MAJOR 1 -#define VERSION_MINOR 7 +#define VERSION_MINOR 8 #define VERSION_PATCH 0 -#define VERSION_EXTRA "1676-g9ecc0e779a" +#define VERSION_EXTRA "208-ge178ce2595" #define VERSION_PACKED ((VERSION_MAJOR<<16)|(VERSION_MINOR<<8)|(VERSION_PATCH)) -#define VERSION_STRING_NOSP "v1.7.0-1676-g9ecc0e779a" -#define VERSION_STRING " v1.7.0-1676-g9ecc0e779a" +#define VERSION_STRING_NOSP "v1.8.0-208-ge178ce2595" +#define VERSION_STRING " v1.8.0-208-ge178ce2595" diff --git a/chromium/third_party/libvpx/source/config/win/arm64/vpx_config.asm b/chromium/third_party/libvpx/source/config/win/arm64/vpx_config.asm index f0976ca75d3..02c1c51ad59 100644 --- a/chromium/third_party/libvpx/source/config/win/arm64/vpx_config.asm +++ b/chromium/third_party/libvpx/source/config/win/arm64/vpx_config.asm @@ -3,7 +3,6 @@ .set WIDE_REFERENCE, 0 .set ARCHITECTURE, 5 - .set DO1STROUNDING, 0 .syntax unified .set ARCH_ARM , 1 .set ARCH_MIPS , 0 @@ -89,6 +88,5 @@ .set CONFIG_FP_MB_STATS , 0 .set CONFIG_EMULATE_HARDWARE , 0 .set CONFIG_NON_GREEDY_MV , 0 -.set CONFIG_ML_VAR_PARTITION , 0 .set DECODE_WIDTH_LIMIT , 16384 .set DECODE_HEIGHT_LIMIT , 16384 diff --git a/chromium/third_party/libvpx/source/config/win/arm64/vpx_config.h b/chromium/third_party/libvpx/source/config/win/arm64/vpx_config.h index 0f793cb6dba..e05718cce25 100644 --- a/chromium/third_party/libvpx/source/config/win/arm64/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/win/arm64/vpx_config.h @@ -94,7 +94,6 @@ #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 #define CONFIG_NON_GREEDY_MV 0 -#define CONFIG_ML_VAR_PARTITION 0 #define DECODE_WIDTH_LIMIT 16384 #define DECODE_HEIGHT_LIMIT 16384 #endif /* VPX_CONFIG_H */ diff --git a/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.asm b/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.asm index 054b531a305..6c3ef6ca310 100644 --- a/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.asm +++ b/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.asm @@ -82,6 +82,5 @@ %define CONFIG_FP_MB_STATS 0 %define CONFIG_EMULATE_HARDWARE 0 %define CONFIG_NON_GREEDY_MV 0 -%define CONFIG_ML_VAR_PARTITION 0 %define DECODE_WIDTH_LIMIT 16384 %define DECODE_HEIGHT_LIMIT 16384 diff --git a/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.h b/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.h index 2444863db9f..2d967a0de60 100644 --- a/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.h @@ -94,7 +94,6 @@ #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 #define CONFIG_NON_GREEDY_MV 0 -#define CONFIG_ML_VAR_PARTITION 0 #define DECODE_WIDTH_LIMIT 16384 #define DECODE_HEIGHT_LIMIT 16384 #endif /* VPX_CONFIG_H */ diff --git a/chromium/third_party/libvpx/source/config/win/x64/vpx_config.asm b/chromium/third_party/libvpx/source/config/win/x64/vpx_config.asm index f992b5147dd..d104fd3fcd9 100644 --- a/chromium/third_party/libvpx/source/config/win/x64/vpx_config.asm +++ b/chromium/third_party/libvpx/source/config/win/x64/vpx_config.asm @@ -82,6 +82,5 @@ %define CONFIG_FP_MB_STATS 0 %define CONFIG_EMULATE_HARDWARE 0 %define CONFIG_NON_GREEDY_MV 0 -%define CONFIG_ML_VAR_PARTITION 0 %define DECODE_WIDTH_LIMIT 16384 %define DECODE_HEIGHT_LIMIT 16384 diff --git a/chromium/third_party/libvpx/source/config/win/x64/vpx_config.h b/chromium/third_party/libvpx/source/config/win/x64/vpx_config.h index 093f2546478..8a07c942ebc 100644 --- a/chromium/third_party/libvpx/source/config/win/x64/vpx_config.h +++ b/chromium/third_party/libvpx/source/config/win/x64/vpx_config.h @@ -94,7 +94,6 @@ #define CONFIG_FP_MB_STATS 0 #define CONFIG_EMULATE_HARDWARE 0 #define CONFIG_NON_GREEDY_MV 0 -#define CONFIG_ML_VAR_PARTITION 0 #define DECODE_WIDTH_LIMIT 16384 #define DECODE_HEIGHT_LIMIT 16384 #endif /* VPX_CONFIG_H */ diff --git a/chromium/third_party/libvpx/source/libvpx/.mailmap b/chromium/third_party/libvpx/source/libvpx/.mailmap index 29af5106504..7c26790b8e5 100644 --- a/chromium/third_party/libvpx/source/libvpx/.mailmap +++ b/chromium/third_party/libvpx/source/libvpx/.mailmap @@ -4,9 +4,12 @@ Aâ„“ex Converse <aconverse@google.com> <alex.converse@gmail.com> Alexis Ballier <aballier@gentoo.org> <alexis.ballier@gmail.com> Alpha Lam <hclam@google.com> <hclam@chromium.org> Chris Cunningham <chcunningham@chromium.org> +Chi Yo Tsai <chiyotsai@google.com> Daniele Castagna <dcastagna@chromium.org> <dcastagna@google.com> Deb Mukherjee <debargha@google.com> +Elliott Karpilovsky <elliottk@google.com> Erik Niemeyer <erik.a.niemeyer@intel.com> <erik.a.niemeyer@gmail.com> +Fyodor Kyslov <kyslov@google.com> Guillaume Martres <gmartres@google.com> <smarter3@gmail.com> Hangyu Kuang <hkuang@google.com> Hui Su <huisu@google.com> @@ -20,6 +23,7 @@ John Koleszar <jkoleszar@google.com> Joshua Litt <joshualitt@google.com> <joshualitt@chromium.org> Marco Paniconi <marpan@google.com> Marco Paniconi <marpan@google.com> <marpan@chromium.org> +Martin Storsjö <martin@martin.st> Pascal Massimino <pascal.massimino@gmail.com> Paul Wilkins <paulwilkins@google.com> Peter Boström <pbos@chromium.org> <pbos@google.com> @@ -28,6 +32,7 @@ Peter de Rivaz <peter.derivaz@gmail.com> <peter.derivaz@argondesign.com> Ralph Giles <giles@xiph.org> <giles@entropywave.com> Ralph Giles <giles@xiph.org> <giles@mozilla.com> Ronald S. Bultje <rsbultje@gmail.com> <rbultje@google.com> +Sai Deng <sdeng@google.com> Sami Pietilä <samipietila@google.com> Shiyou Yin <yinshiyou-hf@loongson.cn> Tamar Levy <tamar.levy@intel.com> @@ -40,3 +45,4 @@ Urvang Joshi <urvang@google.com> <urvang@chromium.org> Yaowu Xu <yaowu@google.com> <adam@xuyaowu.com> Yaowu Xu <yaowu@google.com> <yaowu@xuyaowu.com> Yaowu Xu <yaowu@google.com> <Yaowu Xu> +xiwei gu <guxiwei-hf@loongson.cn> diff --git a/chromium/third_party/libvpx/source/libvpx/AUTHORS b/chromium/third_party/libvpx/source/libvpx/AUTHORS index 04c28724329..3f7a86dcd08 100644 --- a/chromium/third_party/libvpx/source/libvpx/AUTHORS +++ b/chromium/third_party/libvpx/source/libvpx/AUTHORS @@ -26,6 +26,7 @@ Brion Vibber <bvibber@wikimedia.org> changjun.yang <changjun.yang@intel.com> Charles 'Buck' Krasic <ckrasic@google.com> Cheng Chen <chengchen@google.com> +Chi Yo Tsai <chiyotsai@google.com> chm <chm@rock-chips.com> Chris Cunningham <chcunningham@chromium.org> Christian Duvivier <cduvivier@google.com> @@ -38,11 +39,13 @@ Dmitry Kovalev <dkovalev@google.com> Dragan Mrdjan <dmrdjan@mips.com> Ed Baker <edward.baker@intel.com> Ehsan Akhgari <ehsan.akhgari@gmail.com> +Elliott Karpilovsky <elliottk@google.com> Erik Niemeyer <erik.a.niemeyer@intel.com> Fabio Pedretti <fabio.ped@libero.it> Frank Galligan <fgalligan@google.com> Fredrik Söderquist <fs@opera.com> Fritz Koenig <frkoenig@google.com> +Fyodor Kyslov <kyslov@google.com> Gabriel Marin <gmx@chromium.org> Gaute Strokkenes <gaute.strokkenes@broadcom.com> Geza Lore <gezalore@gmail.com> @@ -55,6 +58,7 @@ Guillermo Ballester Valor <gbvalor@gmail.com> Hangyu Kuang <hkuang@google.com> Hanno Böck <hanno@hboeck.de> Han Shen <shenhan@google.com> +Harish Mahendrakar <harish.mahendrakar@ittiam.com> Henrik Lundin <hlundin@google.com> Hui Su <huisu@google.com> Ivan Krasin <krasin@chromium.org> @@ -81,6 +85,7 @@ Johann Koenig <johannkoenig@google.com> John Koleszar <jkoleszar@google.com> Johnny Klonaris <google@jawknee.com> John Stark <jhnstrk@gmail.com> +Jon Kunkee <jkunkee@microsoft.com> Joshua Bleecher Snyder <josh@treelinelabs.com> Joshua Litt <joshualitt@google.com> Julia Robson <juliamrobson@gmail.com> @@ -91,15 +96,18 @@ KO Myung-Hun <komh@chollian.net> Kyle Siefring <kylesiefring@gmail.com> Lawrence VelĂ¡zquez <larryv@macports.org> Linfeng Zhang <linfengz@google.com> +Liu Peng <pengliu.mail@gmail.com> Lou Quillio <louquillio@google.com> Luca Barbato <lu_zero@gentoo.org> +Luc Trudeau <luc@trud.ca> Makoto Kato <makoto.kt@gmail.com> Mans Rullgard <mans@mansr.com> Marco Paniconi <marpan@google.com> Mark Mentovai <mark@chromium.org> Martin Ettl <ettl.martin78@googlemail.com> -Martin Storsjo <martin@martin.st> +Martin Storsjö <martin@martin.st> Matthew Heaney <matthewjheaney@chromium.org> +Matthias Räncker <theonetruecamper@gmx.de> Michael Kohler <michaelkohler@live.com> Mike Frysinger <vapier@chromium.org> Mike Hommey <mhommey@mozilla.com> @@ -107,10 +115,12 @@ Mikhal Shemer <mikhal@google.com> Min Chen <chenm003@gmail.com> Minghai Shang <minghai@google.com> Min Ye <yeemmi@google.com> +Mirko Bonadei <mbonadei@google.com> Moriyoshi Koizumi <mozo@mozo.jp> Morton Jonuschat <yabawock@gmail.com> Nathan E. Egge <negge@mozilla.com> Nico Weber <thakis@chromium.org> +Niveditha Rau <niveditha.rau@gmail.com> Parag Salasakar <img.mips1@gmail.com> Pascal Massimino <pascal.massimino@gmail.com> Patrik Westin <patrik.westin@gmail.com> @@ -129,9 +139,12 @@ Rafael de Lucena Valle <rafaeldelucena@gmail.com> Rahul Chaudhry <rahulchaudhry@google.com> Ralph Giles <giles@xiph.org> Ranjit Kumar Tulabandu <ranjit.tulabandu@ittiam.com> +Raphael Kubo da Costa <raphael.kubo.da.costa@intel.com> +Ritu Baldwa <ritu.baldwa@ittiam.com> Rob Bradford <rob@linux.intel.com> Ronald S. Bultje <rsbultje@gmail.com> Rui Ueyama <ruiu@google.com> +Sai Deng <sdeng@google.com> Sami Pietilä <samipietila@google.com> Sarah Parker <sarahparker@google.com> Sasi Inguva <isasi@google.com> @@ -139,12 +152,15 @@ Scott Graham <scottmg@chromium.org> Scott LaVarnway <slavarnway@google.com> Sean McGovern <gseanmcg@gmail.com> Sergey Kolomenkin <kolomenkin@gmail.com> +Sergey Silkin <ssilkin@google.com> Sergey Ulanov <sergeyu@chromium.org> Shimon Doodkin <helpmepro1@gmail.com> Shiyou Yin <yinshiyou-hf@loongson.cn> +Shubham Tandle <shubham.tandle@ittiam.com> Shunyao Li <shunyaoli@google.com> Stefan Holmer <holmer@google.com> Suman Sunkara <sunkaras@google.com> +Supradeep T R <supradeep.tr@ittiam.com> Sylvestre Ledru <sylvestre@mozilla.com> Taekhyun Kim <takim@nvidia.com> Takanori MATSUURA <t.matsuu@gmail.com> @@ -157,8 +173,11 @@ Timothy B. Terriberry <tterribe@xiph.org> Tom Finegan <tomfinegan@google.com> Tristan Matthews <le.businessman@gmail.com> Urvang Joshi <urvang@google.com> +Venkatarama NG. Avadhani <venkatarama.avadhani@ittiam.com> Vignesh Venkatasubramanian <vigneshv@google.com> Vlad Tsyrklevich <vtsyrklevich@chromium.org> +Wan-Teh Chang <wtc@google.com> +xiwei gu <guxiwei-hf@loongson.cn> Yaowu Xu <yaowu@google.com> Yi Luo <luoyi@google.com> Yongzhe Wang <yongzhe@google.com> diff --git a/chromium/third_party/libvpx/source/libvpx/CHANGELOG b/chromium/third_party/libvpx/source/libvpx/CHANGELOG index 52089df0600..3bdf8acfb1e 100644 --- a/chromium/third_party/libvpx/source/libvpx/CHANGELOG +++ b/chromium/third_party/libvpx/source/libvpx/CHANGELOG @@ -1,3 +1,44 @@ +2019-01-31 v1.8.0 "Northern Shoveler Duck" + This release focused on encoding performance for realtime and VOD use cases. + + - Upgrading: + This adds and improves several vp9 controls. Most are related to SVC: + VP9E_SET_SVC_FRAME_DROP_LAYER: + - Frame dropping in SVC. + VP9E_SET_SVC_INTER_LAYER_PRED: + - Inter-layer prediction in SVC. + VP9E_SET_SVC_GF_TEMPORAL_REF: + - Enable long term temporal reference in SVC. + VP9E_SET_SVC_REF_FRAME_CONFIG/VP9E_GET_SVC_REF_FRAME_CONFIG: + - Extend and improve this control for better flexibility in setting SVC + pattern dynamically. + VP9E_SET_POSTENCODE_DROP: + - Allow for post-encode frame dropping (applies to non-SVC too). + VP9E_SET_SVC_SPATIAL_LAYER_SYNC: + - Enable spatial layer sync frames. + VP9E_SET_SVC_LAYER_ID: + - Extend api to specify temporal id for each spatial layers. + VP9E_SET_ROI_MAP: + - Extend Region of Interest functionality to VP9. + + - Enhancements: + 2 pass vp9 encoding has improved substantially. When using --auto-alt-ref=6, + we see approximately 8% for VBR and 10% for CQ. When using --auto-alt-ref=1, + the gains are approximately 4% for VBR and 5% for CQ. + + For real-time encoding, speed 7 has improved by ~5-10%. Encodes targeted at + screen sharing have improved when the content changes significantly (slide + sharing) or scrolls. There is a new speed 9 setting for mobile devices which + is about 10-20% faster than speed 8. + + - Bug fixes: + VP9 denoiser issue. + VP9 partition issue for 1080p. + VP9 rate control improvments. + Postprocessing Multi Frame Quality Enhancement (MFQE) issue. + VP8 multithread decoder issues. + A variety of fuzzing issues. + 2018-01-04 v1.7.0 "Mandarin Duck" This release focused on high bit depth performance (10/12 bit) and vp9 encoding improvements. diff --git a/chromium/third_party/libvpx/source/libvpx/README b/chromium/third_party/libvpx/source/libvpx/README index 318846ffbf1..61bee3e69c8 100644 --- a/chromium/third_party/libvpx/source/libvpx/README +++ b/chromium/third_party/libvpx/source/libvpx/README @@ -1,4 +1,4 @@ -README - 24 January 2018 +README - 31 January 2019 Welcome to the WebM VP8/VP9 Codec SDK! @@ -63,12 +63,14 @@ COMPILING THE APPLICATIONS/LIBRARIES: arm64-android-gcc arm64-darwin-gcc arm64-linux-gcc + arm64-win64-gcc arm64-win64-vs15 armv7-android-gcc armv7-darwin-gcc armv7-linux-rvct armv7-linux-gcc armv7-none-rvct + armv7-win32-gcc armv7-win32-vs14 armv7-win32-vs15 armv7s-darwin-gcc @@ -89,6 +91,7 @@ COMPILING THE APPLICATIONS/LIBRARIES: x86-darwin14-gcc x86-darwin15-gcc x86-darwin16-gcc + x86-darwin17-gcc x86-iphonesimulator-gcc x86-linux-gcc x86-linux-icc @@ -106,6 +109,7 @@ COMPILING THE APPLICATIONS/LIBRARIES: x86_64-darwin14-gcc x86_64-darwin15-gcc x86_64-darwin16-gcc + x86_64-darwin17-gcc x86_64-iphonesimulator-gcc x86_64-linux-gcc x86_64-linux-icc diff --git a/chromium/third_party/libvpx/source/libvpx/build/make/ads2gas.pl b/chromium/third_party/libvpx/source/libvpx/build/make/ads2gas.pl index 0bf4816cc98..b6a8f53eae2 100755 --- a/chromium/third_party/libvpx/source/libvpx/build/make/ads2gas.pl +++ b/chromium/third_party/libvpx/source/libvpx/build/make/ads2gas.pl @@ -32,7 +32,6 @@ foreach my $arg (@ARGV) { print "@ This file was created from a .asm file\n"; print "@ using the ads2gas.pl script.\n"; -print "\t.equ DO1STROUNDING, 0\n"; print "\t.syntax unified\n"; if ($thumb) { print "\t.thumb\n"; diff --git a/chromium/third_party/libvpx/source/libvpx/build/make/ads2gas_apple.pl b/chromium/third_party/libvpx/source/libvpx/build/make/ads2gas_apple.pl index 806fdd8b394..51248f73092 100755 --- a/chromium/third_party/libvpx/source/libvpx/build/make/ads2gas_apple.pl +++ b/chromium/third_party/libvpx/source/libvpx/build/make/ads2gas_apple.pl @@ -22,7 +22,6 @@ print "@ This file was created from a .asm file\n"; print "@ using the ads2gas_apple.pl script.\n\n"; print "\t.set WIDE_REFERENCE, 0\n"; print "\t.set ARCHITECTURE, 5\n"; -print "\t.set DO1STROUNDING, 0\n"; print "\t.syntax unified\n"; my %register_aliases; diff --git a/chromium/third_party/libvpx/source/libvpx/build/make/gen_msvs_vcxproj.sh b/chromium/third_party/libvpx/source/libvpx/build/make/gen_msvs_vcxproj.sh index ae2b1cd4c8b..84515ecff4e 100755 --- a/chromium/third_party/libvpx/source/libvpx/build/make/gen_msvs_vcxproj.sh +++ b/chromium/third_party/libvpx/source/libvpx/build/make/gen_msvs_vcxproj.sh @@ -312,15 +312,15 @@ generate_vcxproj() { tag_content ApplicationType "Windows Store" tag_content ApplicationTypeRevision 8.1 fi - if [ $vs_ver -eq 15 ] && [ "${platforms[0]}" = "ARM64" ]; then + if [ "${platforms[0]}" = "ARM64" ]; then # Require the first Visual Studio version to have ARM64 support. tag_content MinimumVisualStudioVersion 15.9 - # Require a Windows SDK that has ARM64 support rather than the - # default of 8.1. + fi + if [ $vs_ver -eq 15 ] && [ "${platforms[0]}" = "ARM64" ]; then # Since VS 15 does not have a 'use latest SDK version' facility, - # set WindowsTargetPlatformVersion to the first official SDK - # version to have ARM64 support. - tag_content WindowsTargetPlatformVersion 10.0.17134.0 + # specifically require the contemporaneous SDK with official ARM64 + # support. + tag_content WindowsTargetPlatformVersion 10.0.17763.0 fi close_tag PropertyGroup diff --git a/chromium/third_party/libvpx/source/libvpx/configure b/chromium/third_party/libvpx/source/libvpx/configure index 6204f10b6ca..2174544c308 100755 --- a/chromium/third_party/libvpx/source/libvpx/configure +++ b/chromium/third_party/libvpx/source/libvpx/configure @@ -272,7 +272,6 @@ EXPERIMENT_LIST=" fp_mb_stats emulate_hardware non_greedy_mv - ml_var_partition " CONFIG_LIST=" dependency_tracking @@ -420,6 +419,12 @@ process_cmdline() { } post_process_cmdline() { + if enabled coefficient_range_checking; then + echo "coefficient-range-checking is for decoders only, disabling encoders:" + soft_disable vp8_encoder + soft_disable vp9_encoder + fi + c="" # Enable all detected codecs, if they haven't been disabled @@ -620,9 +625,16 @@ process_toolchain() { check_cflags -Wundef && add_cflags_only -Wundef check_cflags -Wframe-larger-than=52000 && \ add_cflags_only -Wframe-larger-than=52000 + check_cflags -Wmissing-declarations && \ + add_cflags_only -Wmissing-declarations + check_cflags -Wmissing-prototypes && \ + add_cflags_only -Wmissing-prototypes if enabled mips || [ -z "${INLINE}" ]; then enabled extra_warnings || check_add_cflags -Wno-unused-function fi + # Enforce c89 for c files. Don't be too strict about it though. Allow + # gnu extensions like "//" for comments. + check_cflags -std=gnu89 && add_cflags_only -std=gnu89 # Avoid this warning for third_party C++ sources. Some reorganization # would be needed to apply this only to test/*.cc. check_cflags -Wshorten-64-to-32 && add_cflags_only -Wshorten-64-to-32 diff --git a/chromium/third_party/libvpx/source/libvpx/examples.mk b/chromium/third_party/libvpx/source/libvpx/examples.mk index a1d4eb68c2d..e0da4caa26e 100644 --- a/chromium/third_party/libvpx/source/libvpx/examples.mk +++ b/chromium/third_party/libvpx/source/libvpx/examples.mk @@ -72,6 +72,7 @@ vpxdec.SRCS += vpx_ports/vpx_timer.h vpxdec.SRCS += vpx/vpx_integer.h vpxdec.SRCS += args.c args.h vpxdec.SRCS += ivfdec.c ivfdec.h +vpxdec.SRCS += y4minput.c y4minput.h vpxdec.SRCS += tools_common.c tools_common.h vpxdec.SRCS += y4menc.c y4menc.h ifeq ($(CONFIG_LIBYUV),yes) @@ -113,6 +114,7 @@ vpxenc.DESCRIPTION = Full featured encoder EXAMPLES-$(CONFIG_VP9_ENCODER) += vp9_spatial_svc_encoder.c vp9_spatial_svc_encoder.SRCS += args.c args.h vp9_spatial_svc_encoder.SRCS += ivfenc.c ivfenc.h +vp9_spatial_svc_encoder.SRCS += y4minput.c y4minput.h vp9_spatial_svc_encoder.SRCS += tools_common.c tools_common.h vp9_spatial_svc_encoder.SRCS += video_common.h vp9_spatial_svc_encoder.SRCS += video_writer.h video_writer.c @@ -129,6 +131,7 @@ endif EXAMPLES-$(CONFIG_ENCODERS) += vpx_temporal_svc_encoder.c vpx_temporal_svc_encoder.SRCS += ivfenc.c ivfenc.h +vpx_temporal_svc_encoder.SRCS += y4minput.c y4minput.h vpx_temporal_svc_encoder.SRCS += tools_common.c tools_common.h vpx_temporal_svc_encoder.SRCS += video_common.h vpx_temporal_svc_encoder.SRCS += video_writer.h video_writer.c @@ -138,6 +141,7 @@ vpx_temporal_svc_encoder.DESCRIPTION = Temporal SVC Encoder EXAMPLES-$(CONFIG_DECODERS) += simple_decoder.c simple_decoder.GUID = D3BBF1E9-2427-450D-BBFF-B2843C1D44CC simple_decoder.SRCS += ivfdec.h ivfdec.c +simple_decoder.SRCS += y4minput.c y4minput.h simple_decoder.SRCS += tools_common.h tools_common.c simple_decoder.SRCS += video_common.h simple_decoder.SRCS += video_reader.h video_reader.c @@ -147,6 +151,7 @@ simple_decoder.SRCS += vpx_ports/msvc.h simple_decoder.DESCRIPTION = Simplified decoder loop EXAMPLES-$(CONFIG_DECODERS) += postproc.c postproc.SRCS += ivfdec.h ivfdec.c +postproc.SRCS += y4minput.c y4minput.h postproc.SRCS += tools_common.h tools_common.c postproc.SRCS += video_common.h postproc.SRCS += video_reader.h video_reader.c @@ -158,6 +163,7 @@ postproc.DESCRIPTION = Decoder postprocessor control EXAMPLES-$(CONFIG_DECODERS) += decode_to_md5.c decode_to_md5.SRCS += md5_utils.h md5_utils.c decode_to_md5.SRCS += ivfdec.h ivfdec.c +decode_to_md5.SRCS += y4minput.c y4minput.h decode_to_md5.SRCS += tools_common.h tools_common.c decode_to_md5.SRCS += video_common.h decode_to_md5.SRCS += video_reader.h video_reader.c @@ -168,6 +174,7 @@ decode_to_md5.GUID = 59120B9B-2735-4BFE-B022-146CA340FE42 decode_to_md5.DESCRIPTION = Frame by frame MD5 checksum EXAMPLES-$(CONFIG_ENCODERS) += simple_encoder.c simple_encoder.SRCS += ivfenc.h ivfenc.c +simple_encoder.SRCS += y4minput.c y4minput.h simple_encoder.SRCS += tools_common.h tools_common.c simple_encoder.SRCS += video_common.h simple_encoder.SRCS += video_writer.h video_writer.c @@ -176,6 +183,7 @@ simple_encoder.GUID = 4607D299-8A71-4D2C-9B1D-071899B6FBFD simple_encoder.DESCRIPTION = Simplified encoder loop EXAMPLES-$(CONFIG_VP9_ENCODER) += vp9_lossless_encoder.c vp9_lossless_encoder.SRCS += ivfenc.h ivfenc.c +vp9_lossless_encoder.SRCS += y4minput.c y4minput.h vp9_lossless_encoder.SRCS += tools_common.h tools_common.c vp9_lossless_encoder.SRCS += video_common.h vp9_lossless_encoder.SRCS += video_writer.h video_writer.c @@ -184,6 +192,7 @@ vp9_lossless_encoder.GUID = B63C7C88-5348-46DC-A5A6-CC151EF93366 vp9_lossless_encoder.DESCRIPTION = Simplified lossless VP9 encoder EXAMPLES-$(CONFIG_ENCODERS) += twopass_encoder.c twopass_encoder.SRCS += ivfenc.h ivfenc.c +twopass_encoder.SRCS += y4minput.c y4minput.h twopass_encoder.SRCS += tools_common.h tools_common.c twopass_encoder.SRCS += video_common.h twopass_encoder.SRCS += video_writer.h video_writer.c @@ -192,6 +201,7 @@ twopass_encoder.GUID = 73494FA6-4AF9-4763-8FBB-265C92402FD8 twopass_encoder.DESCRIPTION = Two-pass encoder loop EXAMPLES-$(CONFIG_DECODERS) += decode_with_drops.c decode_with_drops.SRCS += ivfdec.h ivfdec.c +decode_with_drops.SRCS += y4minput.c y4minput.h decode_with_drops.SRCS += tools_common.h tools_common.c decode_with_drops.SRCS += video_common.h decode_with_drops.SRCS += video_reader.h video_reader.c @@ -202,6 +212,7 @@ decode_with_drops.GUID = CE5C53C4-8DDA-438A-86ED-0DDD3CDB8D26 decode_with_drops.DESCRIPTION = Drops frames while decoding EXAMPLES-$(CONFIG_ENCODERS) += set_maps.c set_maps.SRCS += ivfenc.h ivfenc.c +set_maps.SRCS += y4minput.c y4minput.h set_maps.SRCS += tools_common.h tools_common.c set_maps.SRCS += video_common.h set_maps.SRCS += video_writer.h video_writer.c @@ -210,6 +221,7 @@ set_maps.GUID = ECB2D24D-98B8-4015-A465-A4AF3DCC145F set_maps.DESCRIPTION = Set active and ROI maps EXAMPLES-$(CONFIG_VP8_ENCODER) += vp8cx_set_ref.c vp8cx_set_ref.SRCS += ivfenc.h ivfenc.c +vp8cx_set_ref.SRCS += y4minput.c y4minput.h vp8cx_set_ref.SRCS += tools_common.h tools_common.c vp8cx_set_ref.SRCS += video_common.h vp8cx_set_ref.SRCS += video_writer.h video_writer.c @@ -221,6 +233,7 @@ ifeq ($(CONFIG_VP9_ENCODER),yes) ifeq ($(CONFIG_DECODERS),yes) EXAMPLES-yes += vp9cx_set_ref.c vp9cx_set_ref.SRCS += ivfenc.h ivfenc.c +vp9cx_set_ref.SRCS += y4minput.c y4minput.h vp9cx_set_ref.SRCS += tools_common.h tools_common.c vp9cx_set_ref.SRCS += video_common.h vp9cx_set_ref.SRCS += video_writer.h video_writer.c @@ -233,6 +246,7 @@ ifeq ($(CONFIG_MULTI_RES_ENCODING),yes) ifeq ($(CONFIG_LIBYUV),yes) EXAMPLES-$(CONFIG_VP8_ENCODER) += vp8_multi_resolution_encoder.c vp8_multi_resolution_encoder.SRCS += ivfenc.h ivfenc.c +vp8_multi_resolution_encoder.SRCS += y4minput.c y4minput.h vp8_multi_resolution_encoder.SRCS += tools_common.h tools_common.c vp8_multi_resolution_encoder.SRCS += video_writer.h video_writer.c vp8_multi_resolution_encoder.SRCS += vpx_ports/msvc.h diff --git a/chromium/third_party/libvpx/source/libvpx/examples/vp8_multi_resolution_encoder.c b/chromium/third_party/libvpx/source/libvpx/examples/vp8_multi_resolution_encoder.c index b14b1ff3972..e72f8a01970 100644 --- a/chromium/third_party/libvpx/source/libvpx/examples/vp8_multi_resolution_encoder.c +++ b/chromium/third_party/libvpx/source/libvpx/examples/vp8_multi_resolution_encoder.c @@ -61,7 +61,7 @@ void usage_exit(void) { exit(EXIT_FAILURE); } int (*read_frame_p)(FILE *f, vpx_image_t *img); -static int read_frame(FILE *f, vpx_image_t *img) { +static int mulres_read_frame(FILE *f, vpx_image_t *img) { size_t nbytes, to_read; int res = 1; @@ -75,7 +75,7 @@ static int read_frame(FILE *f, vpx_image_t *img) { return res; } -static int read_frame_by_row(FILE *f, vpx_image_t *img) { +static int mulres_read_frame_by_row(FILE *f, vpx_image_t *img) { size_t nbytes, to_read; int res = 1; int plane; @@ -471,9 +471,9 @@ int main(int argc, char **argv) { die("Failed to allocate image", cfg[i].g_w, cfg[i].g_h); if (raw[0].stride[VPX_PLANE_Y] == (int)raw[0].d_w) - read_frame_p = read_frame; + read_frame_p = mulres_read_frame; else - read_frame_p = read_frame_by_row; + read_frame_p = mulres_read_frame_by_row; for (i = 0; i < NUM_ENCODERS; i++) if (outfile[i]) write_ivf_file_header(outfile[i], &cfg[i], 0); diff --git a/chromium/third_party/libvpx/source/libvpx/examples/vp9_spatial_svc_encoder.c b/chromium/third_party/libvpx/source/libvpx/examples/vp9_spatial_svc_encoder.c index f8093e1bf14..05fd4d9d0cf 100644 --- a/chromium/third_party/libvpx/source/libvpx/examples/vp9_spatial_svc_encoder.c +++ b/chromium/third_party/libvpx/source/libvpx/examples/vp9_spatial_svc_encoder.c @@ -30,8 +30,12 @@ #include "vpx/vpx_encoder.h" #include "../vpxstats.h" #include "vp9/encoder/vp9_encoder.h" +#include "./y4minput.h" + #define OUTPUT_RC_STATS 1 +static const arg_def_t outputfile = + ARG_DEF("o", "output", 1, "Output filename"); static const arg_def_t skip_frames_arg = ARG_DEF("s", "skip-frames", 1, "input frames to skip"); static const arg_def_t frames_arg = @@ -110,6 +114,7 @@ static const arg_def_t bitdepth_arg = ARG_DEF_ENUM( #endif // CONFIG_VP9_HIGHBITDEPTH static const arg_def_t *svc_args[] = { &frames_arg, + &outputfile, &width_arg, &height_arg, &timebase_arg, @@ -161,7 +166,6 @@ static const int32_t default_speed = -1; // -1 means use library default. static const uint32_t default_threads = 0; // zero means use library default. typedef struct { - const char *input_filename; const char *output_filename; uint32_t frames_to_code; uint32_t frames_to_skip; @@ -176,7 +180,7 @@ typedef struct { static const char *exec_name; void usage_exit(void) { - fprintf(stderr, "Usage: %s <options> input_filename output_filename\n", + fprintf(stderr, "Usage: %s <options> input_filename -o output_filename\n", exec_name); fprintf(stderr, "Options:\n"); arg_show_usage(stderr, svc_args); @@ -235,6 +239,8 @@ static void parse_command_line(int argc, const char **argv_, if (arg_match(&arg, &frames_arg, argi)) { app_input->frames_to_code = arg_parse_uint(&arg); + } else if (arg_match(&arg, &outputfile, argi)) { + app_input->output_filename = arg.val; } else if (arg_match(&arg, &width_arg, argi)) { enc_cfg->g_w = arg_parse_uint(&arg); } else if (arg_match(&arg, &height_arg, argi)) { @@ -390,13 +396,18 @@ static void parse_command_line(int argc, const char **argv_, if (argi[0][0] == '-' && strlen(argi[0]) > 1) die("Error: Unrecognized option %s\n", *argi); - if (argv[0] == NULL || argv[1] == 0) { + if (argv[0] == NULL) { usage_exit(); } - app_input->input_filename = argv[0]; - app_input->output_filename = argv[1]; + app_input->input_ctx.filename = argv[0]; free(argv); + open_input_file(&app_input->input_ctx); + if (app_input->input_ctx.file_type == FILE_TYPE_Y4M) { + enc_cfg->g_w = app_input->input_ctx.width; + enc_cfg->g_h = app_input->input_ctx.height; + } + if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 || enc_cfg->g_h % 2) die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h); @@ -738,11 +749,157 @@ static void set_frame_flags_bypass_mode_ex1( } } +#if CONFIG_VP9_DECODER +static void test_decode(vpx_codec_ctx_t *encoder, vpx_codec_ctx_t *decoder, + const int frames_out, int *mismatch_seen) { + vpx_image_t enc_img, dec_img; + struct vp9_ref_frame ref_enc, ref_dec; + if (*mismatch_seen) return; + /* Get the internal reference frame */ + ref_enc.idx = 0; + ref_dec.idx = 0; + vpx_codec_control(encoder, VP9_GET_REFERENCE, &ref_enc); + enc_img = ref_enc.img; + vpx_codec_control(decoder, VP9_GET_REFERENCE, &ref_dec); + dec_img = ref_dec.img; +#if CONFIG_VP9_HIGHBITDEPTH + if ((enc_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) != + (dec_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH)) { + if (enc_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) { + vpx_img_alloc(&enc_img, enc_img.fmt - VPX_IMG_FMT_HIGHBITDEPTH, + enc_img.d_w, enc_img.d_h, 16); + vpx_img_truncate_16_to_8(&enc_img, &ref_enc.img); + } + if (dec_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) { + vpx_img_alloc(&dec_img, dec_img.fmt - VPX_IMG_FMT_HIGHBITDEPTH, + dec_img.d_w, dec_img.d_h, 16); + vpx_img_truncate_16_to_8(&dec_img, &ref_dec.img); + } + } +#endif + + if (!compare_img(&enc_img, &dec_img)) { + int y[4], u[4], v[4]; +#if CONFIG_VP9_HIGHBITDEPTH + if (enc_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) { + find_mismatch_high(&enc_img, &dec_img, y, u, v); + } else { + find_mismatch(&enc_img, &dec_img, y, u, v); + } +#else + find_mismatch(&enc_img, &dec_img, y, u, v); +#endif + decoder->err = 1; + printf( + "Encode/decode mismatch on frame %d at" + " Y[%d, %d] {%d/%d}," + " U[%d, %d] {%d/%d}," + " V[%d, %d] {%d/%d}\n", + frames_out, y[0], y[1], y[2], y[3], u[0], u[1], u[2], u[3], v[0], v[1], + v[2], v[3]); + *mismatch_seen = frames_out; + } + + vpx_img_free(&enc_img); + vpx_img_free(&dec_img); +} +#endif + +#if OUTPUT_RC_STATS +static void svc_output_rc_stats( + vpx_codec_ctx_t *codec, vpx_codec_enc_cfg_t *enc_cfg, + vpx_svc_layer_id_t *layer_id, const vpx_codec_cx_pkt_t *cx_pkt, + struct RateControlStats *rc, VpxVideoWriter **outfile, + const uint32_t frame_cnt, const double framerate) { + int num_layers_encoded = 0; + unsigned int sl, tl; + uint64_t sizes[8]; + uint64_t sizes_parsed[8]; + int count = 0; + double sum_bitrate = 0.0; + double sum_bitrate2 = 0.0; + vp9_zero(sizes); + vp9_zero(sizes_parsed); + vpx_codec_control(codec, VP9E_GET_SVC_LAYER_ID, layer_id); + parse_superframe_index(cx_pkt->data.frame.buf, cx_pkt->data.frame.sz, + sizes_parsed, &count); + if (enc_cfg->ss_number_layers == 1) sizes[0] = cx_pkt->data.frame.sz; + for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) { + sizes[sl] = 0; + if (cx_pkt->data.frame.spatial_layer_encoded[sl]) { + sizes[sl] = sizes_parsed[num_layers_encoded]; + num_layers_encoded++; + } + } + for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) { + unsigned int sl2; + uint64_t tot_size = 0; + for (sl2 = 0; sl2 <= sl; ++sl2) { + if (cx_pkt->data.frame.spatial_layer_encoded[sl2]) tot_size += sizes[sl2]; + } + if (tot_size > 0) + vpx_video_writer_write_frame(outfile[sl], cx_pkt->data.frame.buf, + (size_t)(tot_size), cx_pkt->data.frame.pts); + } + for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) { + if (cx_pkt->data.frame.spatial_layer_encoded[sl]) { + for (tl = layer_id->temporal_layer_id; tl < enc_cfg->ts_number_layers; + ++tl) { + const int layer = sl * enc_cfg->ts_number_layers + tl; + ++rc->layer_tot_enc_frames[layer]; + rc->layer_encoding_bitrate[layer] += 8.0 * sizes[sl]; + // Keep count of rate control stats per layer, for non-key + // frames. + if (tl == (unsigned int)layer_id->temporal_layer_id && + !(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)) { + rc->layer_avg_frame_size[layer] += 8.0 * sizes[sl]; + rc->layer_avg_rate_mismatch[layer] += + fabs(8.0 * sizes[sl] - rc->layer_pfb[layer]) / + rc->layer_pfb[layer]; + ++rc->layer_enc_frames[layer]; + } + } + } + } + + // Update for short-time encoding bitrate states, for moving + // window of size rc->window, shifted by rc->window / 2. + // Ignore first window segment, due to key frame. + if (frame_cnt > (unsigned int)rc->window_size) { + for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) { + if (cx_pkt->data.frame.spatial_layer_encoded[sl]) + sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate; + } + if (frame_cnt % rc->window_size == 0) { + rc->window_count += 1; + rc->avg_st_encoding_bitrate += sum_bitrate / rc->window_size; + rc->variance_st_encoding_bitrate += + (sum_bitrate / rc->window_size) * (sum_bitrate / rc->window_size); + } + } + + // Second shifted window. + if (frame_cnt > (unsigned int)(rc->window_size + rc->window_size / 2)) { + for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) { + sum_bitrate2 += 0.001 * 8.0 * sizes[sl] * framerate; + } + + if (frame_cnt > (unsigned int)(2 * rc->window_size) && + frame_cnt % rc->window_size == 0) { + rc->window_count += 1; + rc->avg_st_encoding_bitrate += sum_bitrate2 / rc->window_size; + rc->variance_st_encoding_bitrate += + (sum_bitrate2 / rc->window_size) * (sum_bitrate2 / rc->window_size); + } + } +} +#endif + int main(int argc, const char **argv) { AppInput app_input; VpxVideoWriter *writer = NULL; VpxVideoInfo info; - vpx_codec_ctx_t codec; + vpx_codec_ctx_t encoder; vpx_codec_enc_cfg_t enc_cfg; SvcContext svc_ctx; vpx_svc_frame_drop_t svc_drop_frame; @@ -752,7 +909,6 @@ int main(int argc, const char **argv) { vpx_codec_err_t res; int pts = 0; /* PTS starts at 0 */ int frame_duration = 1; /* 1 timebase tick per frame */ - FILE *infile = NULL; int end_of_stream = 0; int frames_received = 0; #if OUTPUT_RC_STATS @@ -760,42 +916,59 @@ int main(int argc, const char **argv) { struct RateControlStats rc; vpx_svc_layer_id_t layer_id; vpx_svc_ref_frame_config_t ref_frame_config; - unsigned int sl, tl; - double sum_bitrate = 0.0; - double sum_bitrate2 = 0.0; + unsigned int sl; double framerate = 30.0; #endif struct vpx_usec_timer timer; int64_t cx_time = 0; +#if CONFIG_INTERNAL_STATS + FILE *f = fopen("opsnr.stt", "a"); +#endif +#if CONFIG_VP9_DECODER + int mismatch_seen = 0; + vpx_codec_ctx_t decoder; +#endif memset(&svc_ctx, 0, sizeof(svc_ctx)); memset(&app_input, 0, sizeof(AppInput)); memset(&info, 0, sizeof(VpxVideoInfo)); memset(&layer_id, 0, sizeof(vpx_svc_layer_id_t)); memset(&rc, 0, sizeof(struct RateControlStats)); exec_name = argv[0]; + + /* Setup default input stream settings */ + app_input.input_ctx.framerate.numerator = 30; + app_input.input_ctx.framerate.denominator = 1; + app_input.input_ctx.only_i420 = 1; + app_input.input_ctx.bit_depth = 0; + parse_command_line(argc, argv, &app_input, &svc_ctx, &enc_cfg); + // Y4M reader handles its own allocation. + if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) { // Allocate image buffer #if CONFIG_VP9_HIGHBITDEPTH - if (!vpx_img_alloc(&raw, - enc_cfg.g_input_bit_depth == 8 ? VPX_IMG_FMT_I420 - : VPX_IMG_FMT_I42016, - enc_cfg.g_w, enc_cfg.g_h, 32)) { - die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h); - } + if (!vpx_img_alloc(&raw, + enc_cfg.g_input_bit_depth == 8 ? VPX_IMG_FMT_I420 + : VPX_IMG_FMT_I42016, + enc_cfg.g_w, enc_cfg.g_h, 32)) { + die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h); + } #else - if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, enc_cfg.g_w, enc_cfg.g_h, 32)) { - die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h); - } + if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, enc_cfg.g_w, enc_cfg.g_h, 32)) { + die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h); + } #endif // CONFIG_VP9_HIGHBITDEPTH - - if (!(infile = fopen(app_input.input_filename, "rb"))) - die("Failed to open %s for reading\n", app_input.input_filename); + } // Initialize codec - if (vpx_svc_init(&svc_ctx, &codec, vpx_codec_vp9_cx(), &enc_cfg) != + if (vpx_svc_init(&svc_ctx, &encoder, vpx_codec_vp9_cx(), &enc_cfg) != VPX_CODEC_OK) die("Failed to initialize encoder\n"); +#if CONFIG_VP9_DECODER + if (vpx_codec_dec_init( + &decoder, get_vpx_decoder_by_name("vp9")->codec_interface(), NULL, 0)) + die("Failed to initialize decoder\n"); +#endif #if OUTPUT_RC_STATS rc.window_count = 1; @@ -809,6 +982,8 @@ int main(int argc, const char **argv) { #endif info.codec_fourcc = VP9_FOURCC; + info.frame_width = enc_cfg.g_w; + info.frame_height = enc_cfg.g_h; info.time_base.numerator = enc_cfg.g_timebase.num; info.time_base.denominator = enc_cfg.g_timebase.den; @@ -835,35 +1010,37 @@ int main(int argc, const char **argv) { #endif // skip initial frames - for (i = 0; i < app_input.frames_to_skip; ++i) vpx_img_read(&raw, infile); + for (i = 0; i < app_input.frames_to_skip; ++i) + read_frame(&app_input.input_ctx, &raw); if (svc_ctx.speed != -1) - vpx_codec_control(&codec, VP8E_SET_CPUUSED, svc_ctx.speed); + vpx_codec_control(&encoder, VP8E_SET_CPUUSED, svc_ctx.speed); if (svc_ctx.threads) { - vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, get_msb(svc_ctx.threads)); + vpx_codec_control(&encoder, VP9E_SET_TILE_COLUMNS, + get_msb(svc_ctx.threads)); if (svc_ctx.threads > 1) - vpx_codec_control(&codec, VP9E_SET_ROW_MT, 1); + vpx_codec_control(&encoder, VP9E_SET_ROW_MT, 1); else - vpx_codec_control(&codec, VP9E_SET_ROW_MT, 0); + vpx_codec_control(&encoder, VP9E_SET_ROW_MT, 0); } if (svc_ctx.speed >= 5 && svc_ctx.aqmode == 1) - vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3); + vpx_codec_control(&encoder, VP9E_SET_AQ_MODE, 3); if (svc_ctx.speed >= 5) - vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1); - vpx_codec_control(&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT, 900); + vpx_codec_control(&encoder, VP8E_SET_STATIC_THRESHOLD, 1); + vpx_codec_control(&encoder, VP8E_SET_MAX_INTRA_BITRATE_PCT, 900); - vpx_codec_control(&codec, VP9E_SET_SVC_INTER_LAYER_PRED, + vpx_codec_control(&encoder, VP9E_SET_SVC_INTER_LAYER_PRED, app_input.inter_layer_pred); - vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, 0); + vpx_codec_control(&encoder, VP9E_SET_NOISE_SENSITIVITY, 0); - vpx_codec_control(&codec, VP9E_SET_TUNE_CONTENT, app_input.tune_content); + vpx_codec_control(&encoder, VP9E_SET_TUNE_CONTENT, app_input.tune_content); svc_drop_frame.framedrop_mode = FULL_SUPERFRAME_DROP; for (sl = 0; sl < (unsigned int)svc_ctx.spatial_layers; ++sl) svc_drop_frame.framedrop_thresh[sl] = enc_cfg.rc_dropframe_thresh; svc_drop_frame.max_consec_drop = INT_MAX; - vpx_codec_control(&codec, VP9E_SET_SVC_FRAME_DROP_LAYER, &svc_drop_frame); + vpx_codec_control(&encoder, VP9E_SET_SVC_FRAME_DROP_LAYER, &svc_drop_frame); // Encode frames while (!end_of_stream) { @@ -875,7 +1052,8 @@ int main(int argc, const char **argv) { // layers, with SL0 only has TL0, and SL1 has both TL0 and TL1. This example // uses the extended API. int example_pattern = 0; - if (frame_cnt >= app_input.frames_to_code || !vpx_img_read(&raw, infile)) { + if (frame_cnt >= app_input.frames_to_code || + !read_frame(&app_input.input_ctx, &raw)) { // We need one extra vpx_svc_encode call at end of stream to flush // encoder and get remaining data end_of_stream = 1; @@ -914,7 +1092,7 @@ int main(int argc, const char **argv) { layer_id.spatial_layer_id = 1; } } - vpx_codec_control(&codec, VP9E_SET_SVC_LAYER_ID, &layer_id); + vpx_codec_control(&encoder, VP9E_SET_SVC_LAYER_ID, &layer_id); // TODO(jianj): Fix the parameter passing for "is_key_frame" in // set_frame_flags_bypass_model() for case of periodic key frames. if (example_pattern == 0) { @@ -929,7 +1107,7 @@ int main(int argc, const char **argv) { ref_frame_config.duration[0] = frame_duration * 1; ref_frame_config.duration[1] = frame_duration * 1; - vpx_codec_control(&codec, VP9E_SET_SVC_REF_FRAME_CONFIG, + vpx_codec_control(&encoder, VP9E_SET_SVC_REF_FRAME_CONFIG, &ref_frame_config); // Keep track of input frames, to account for frame drops in rate control // stats/metrics. @@ -952,116 +1130,28 @@ int main(int argc, const char **argv) { vpx_usec_timer_start(&timer); res = vpx_svc_encode( - &svc_ctx, &codec, (end_of_stream ? NULL : &raw), pts, frame_duration, + &svc_ctx, &encoder, (end_of_stream ? NULL : &raw), pts, frame_duration, svc_ctx.speed >= 5 ? VPX_DL_REALTIME : VPX_DL_GOOD_QUALITY); vpx_usec_timer_mark(&timer); cx_time += vpx_usec_timer_elapsed(&timer); fflush(stdout); if (res != VPX_CODEC_OK) { - die_codec(&codec, "Failed to encode frame"); + die_codec(&encoder, "Failed to encode frame"); } - while ((cx_pkt = vpx_codec_get_cx_data(&codec, &iter)) != NULL) { + while ((cx_pkt = vpx_codec_get_cx_data(&encoder, &iter)) != NULL) { switch (cx_pkt->kind) { case VPX_CODEC_CX_FRAME_PKT: { SvcInternal_t *const si = (SvcInternal_t *)svc_ctx.internal; if (cx_pkt->data.frame.sz > 0) { -#if OUTPUT_RC_STATS - uint64_t sizes[8]; - uint64_t sizes_parsed[8]; - int count = 0; - vp9_zero(sizes); - vp9_zero(sizes_parsed); -#endif vpx_video_writer_write_frame(writer, cx_pkt->data.frame.buf, cx_pkt->data.frame.sz, cx_pkt->data.frame.pts); #if OUTPUT_RC_STATS - // TODO(marpan): Put this (to line728) in separate function. if (svc_ctx.output_rc_stat) { - int num_layers_encoded = 0; - vpx_codec_control(&codec, VP9E_GET_SVC_LAYER_ID, &layer_id); - parse_superframe_index(cx_pkt->data.frame.buf, - cx_pkt->data.frame.sz, sizes_parsed, - &count); - if (enc_cfg.ss_number_layers == 1) - sizes[0] = cx_pkt->data.frame.sz; - for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) { - sizes[sl] = 0; - if (cx_pkt->data.frame.spatial_layer_encoded[sl]) { - sizes[sl] = sizes_parsed[num_layers_encoded]; - num_layers_encoded++; - } - } - for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) { - unsigned int sl2; - uint64_t tot_size = 0; - for (sl2 = 0; sl2 <= sl; ++sl2) { - if (cx_pkt->data.frame.spatial_layer_encoded[sl2]) - tot_size += sizes[sl2]; - } - if (tot_size > 0) - vpx_video_writer_write_frame( - outfile[sl], cx_pkt->data.frame.buf, (size_t)(tot_size), - cx_pkt->data.frame.pts); - } - for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) { - if (cx_pkt->data.frame.spatial_layer_encoded[sl]) { - for (tl = layer_id.temporal_layer_id; - tl < enc_cfg.ts_number_layers; ++tl) { - const int layer = sl * enc_cfg.ts_number_layers + tl; - ++rc.layer_tot_enc_frames[layer]; - rc.layer_encoding_bitrate[layer] += 8.0 * sizes[sl]; - // Keep count of rate control stats per layer, for non-key - // frames. - if (tl == (unsigned int)layer_id.temporal_layer_id && - !(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)) { - rc.layer_avg_frame_size[layer] += 8.0 * sizes[sl]; - rc.layer_avg_rate_mismatch[layer] += - fabs(8.0 * sizes[sl] - rc.layer_pfb[layer]) / - rc.layer_pfb[layer]; - ++rc.layer_enc_frames[layer]; - } - } - } - } - - // Update for short-time encoding bitrate states, for moving - // window of size rc->window, shifted by rc->window / 2. - // Ignore first window segment, due to key frame. - if (frame_cnt > (unsigned int)rc.window_size) { - for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) { - if (cx_pkt->data.frame.spatial_layer_encoded[sl]) - sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate; - } - if (frame_cnt % rc.window_size == 0) { - rc.window_count += 1; - rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size; - rc.variance_st_encoding_bitrate += - (sum_bitrate / rc.window_size) * - (sum_bitrate / rc.window_size); - sum_bitrate = 0.0; - } - } - - // Second shifted window. - if (frame_cnt > - (unsigned int)(rc.window_size + rc.window_size / 2)) { - for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) { - sum_bitrate2 += 0.001 * 8.0 * sizes[sl] * framerate; - } - - if (frame_cnt > (unsigned int)(2 * rc.window_size) && - frame_cnt % rc.window_size == 0) { - rc.window_count += 1; - rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size; - rc.variance_st_encoding_bitrate += - (sum_bitrate2 / rc.window_size) * - (sum_bitrate2 / rc.window_size); - sum_bitrate2 = 0.0; - } - } + svc_output_rc_stats(&encoder, &enc_cfg, &layer_id, cx_pkt, &rc, + outfile, frame_cnt, framerate); } #endif } @@ -1073,6 +1163,11 @@ int main(int argc, const char **argv) { if (enc_cfg.ss_number_layers == 1 && enc_cfg.ts_number_layers == 1) si->bytes_sum[0] += (int)cx_pkt->data.frame.sz; ++frames_received; +#if CONFIG_VP9_DECODER + if (vpx_codec_decode(&decoder, cx_pkt->data.frame.buf, + (unsigned int)cx_pkt->data.frame.sz, NULL, 0)) + die_codec(&decoder, "Failed to decode frame."); +#endif break; } case VPX_CODEC_STATS_PKT: { @@ -1082,6 +1177,18 @@ int main(int argc, const char **argv) { } default: { break; } } + +#if CONFIG_VP9_DECODER + vpx_codec_control(&encoder, VP9E_GET_SVC_LAYER_ID, &layer_id); + // Don't look for mismatch on top spatial and top temporal layers as they + // are non reference frames. + if (!(layer_id.temporal_layer_id > 0 && + layer_id.temporal_layer_id == (int)enc_cfg.ts_number_layers - 1 && + cx_pkt->data.frame + .spatial_layer_encoded[enc_cfg.ss_number_layers - 1])) { + test_decode(&encoder, &decoder, frame_cnt, &mismatch_seen); + } +#endif } if (!end_of_stream) { @@ -1091,14 +1198,17 @@ int main(int argc, const char **argv) { } printf("Processed %d frames\n", frame_cnt); - fclose(infile); + + close_input_file(&app_input.input_ctx); + #if OUTPUT_RC_STATS if (svc_ctx.output_rc_stat) { printout_rate_control_summary(&rc, &enc_cfg, frame_cnt); printf("\n"); } #endif - if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec"); + if (vpx_codec_destroy(&encoder)) + die_codec(&encoder, "Failed to destroy codec"); if (app_input.passes == 2) stats_close(&app_input.rc_stats, 1); if (writer) { vpx_video_writer_close(writer); @@ -1110,10 +1220,20 @@ int main(int argc, const char **argv) { } } #endif +#if CONFIG_INTERNAL_STATS + if (mismatch_seen) { + fprintf(f, "First mismatch occurred in frame %d\n", mismatch_seen); + } else { + fprintf(f, "No mismatch detected in recon buffers\n"); + } + fclose(f); +#endif printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n", frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000), 1000000 * (double)frame_cnt / (double)cx_time); - vpx_img_free(&raw); + if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) { + vpx_img_free(&raw); + } // display average size, psnr vpx_svc_dump_statistics(&svc_ctx); vpx_svc_release(&svc_ctx); diff --git a/chromium/third_party/libvpx/source/libvpx/examples/vp9cx_set_ref.c b/chromium/third_party/libvpx/source/libvpx/examples/vp9cx_set_ref.c index 3472689db2f..911ad38630c 100644 --- a/chromium/third_party/libvpx/source/libvpx/examples/vp9cx_set_ref.c +++ b/chromium/third_party/libvpx/source/libvpx/examples/vp9cx_set_ref.c @@ -68,128 +68,6 @@ void usage_exit() { exit(EXIT_FAILURE); } -static int compare_img(const vpx_image_t *const img1, - const vpx_image_t *const img2) { - uint32_t l_w = img1->d_w; - uint32_t c_w = (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift; - const uint32_t c_h = - (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift; - uint32_t i; - int match = 1; - - match &= (img1->fmt == img2->fmt); - match &= (img1->d_w == img2->d_w); - match &= (img1->d_h == img2->d_h); - - for (i = 0; i < img1->d_h; ++i) - match &= (memcmp(img1->planes[VPX_PLANE_Y] + i * img1->stride[VPX_PLANE_Y], - img2->planes[VPX_PLANE_Y] + i * img2->stride[VPX_PLANE_Y], - l_w) == 0); - - for (i = 0; i < c_h; ++i) - match &= (memcmp(img1->planes[VPX_PLANE_U] + i * img1->stride[VPX_PLANE_U], - img2->planes[VPX_PLANE_U] + i * img2->stride[VPX_PLANE_U], - c_w) == 0); - - for (i = 0; i < c_h; ++i) - match &= (memcmp(img1->planes[VPX_PLANE_V] + i * img1->stride[VPX_PLANE_V], - img2->planes[VPX_PLANE_V] + i * img2->stride[VPX_PLANE_V], - c_w) == 0); - - return match; -} - -#define mmin(a, b) ((a) < (b) ? (a) : (b)) -static void find_mismatch(const vpx_image_t *const img1, - const vpx_image_t *const img2, int yloc[4], - int uloc[4], int vloc[4]) { - const uint32_t bsize = 64; - const uint32_t bsizey = bsize >> img1->y_chroma_shift; - const uint32_t bsizex = bsize >> img1->x_chroma_shift; - const uint32_t c_w = - (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift; - const uint32_t c_h = - (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift; - int match = 1; - uint32_t i, j; - yloc[0] = yloc[1] = yloc[2] = yloc[3] = -1; - for (i = 0, match = 1; match && i < img1->d_h; i += bsize) { - for (j = 0; match && j < img1->d_w; j += bsize) { - int k, l; - const int si = mmin(i + bsize, img1->d_h) - i; - const int sj = mmin(j + bsize, img1->d_w) - j; - for (k = 0; match && k < si; ++k) { - for (l = 0; match && l < sj; ++l) { - if (*(img1->planes[VPX_PLANE_Y] + - (i + k) * img1->stride[VPX_PLANE_Y] + j + l) != - *(img2->planes[VPX_PLANE_Y] + - (i + k) * img2->stride[VPX_PLANE_Y] + j + l)) { - yloc[0] = i + k; - yloc[1] = j + l; - yloc[2] = *(img1->planes[VPX_PLANE_Y] + - (i + k) * img1->stride[VPX_PLANE_Y] + j + l); - yloc[3] = *(img2->planes[VPX_PLANE_Y] + - (i + k) * img2->stride[VPX_PLANE_Y] + j + l); - match = 0; - break; - } - } - } - } - } - - uloc[0] = uloc[1] = uloc[2] = uloc[3] = -1; - for (i = 0, match = 1; match && i < c_h; i += bsizey) { - for (j = 0; match && j < c_w; j += bsizex) { - int k, l; - const int si = mmin(i + bsizey, c_h - i); - const int sj = mmin(j + bsizex, c_w - j); - for (k = 0; match && k < si; ++k) { - for (l = 0; match && l < sj; ++l) { - if (*(img1->planes[VPX_PLANE_U] + - (i + k) * img1->stride[VPX_PLANE_U] + j + l) != - *(img2->planes[VPX_PLANE_U] + - (i + k) * img2->stride[VPX_PLANE_U] + j + l)) { - uloc[0] = i + k; - uloc[1] = j + l; - uloc[2] = *(img1->planes[VPX_PLANE_U] + - (i + k) * img1->stride[VPX_PLANE_U] + j + l); - uloc[3] = *(img2->planes[VPX_PLANE_U] + - (i + k) * img2->stride[VPX_PLANE_U] + j + l); - match = 0; - break; - } - } - } - } - } - vloc[0] = vloc[1] = vloc[2] = vloc[3] = -1; - for (i = 0, match = 1; match && i < c_h; i += bsizey) { - for (j = 0; match && j < c_w; j += bsizex) { - int k, l; - const int si = mmin(i + bsizey, c_h - i); - const int sj = mmin(j + bsizex, c_w - j); - for (k = 0; match && k < si; ++k) { - for (l = 0; match && l < sj; ++l) { - if (*(img1->planes[VPX_PLANE_V] + - (i + k) * img1->stride[VPX_PLANE_V] + j + l) != - *(img2->planes[VPX_PLANE_V] + - (i + k) * img2->stride[VPX_PLANE_V] + j + l)) { - vloc[0] = i + k; - vloc[1] = j + l; - vloc[2] = *(img1->planes[VPX_PLANE_V] + - (i + k) * img1->stride[VPX_PLANE_V] + j + l); - vloc[3] = *(img2->planes[VPX_PLANE_V] + - (i + k) * img2->stride[VPX_PLANE_V] + j + l); - match = 0; - break; - } - } - } - } - } -} - static void testing_decode(vpx_codec_ctx_t *encoder, vpx_codec_ctx_t *decoder, unsigned int frame_out, int *mismatch_seen) { vpx_image_t enc_img, dec_img; diff --git a/chromium/third_party/libvpx/source/libvpx/examples/vpx_dec_fuzzer.cc b/chromium/third_party/libvpx/source/libvpx/examples/vpx_dec_fuzzer.cc index b74b47c230d..e3b0d2e0af0 100644 --- a/chromium/third_party/libvpx/source/libvpx/examples/vpx_dec_fuzzer.cc +++ b/chromium/third_party/libvpx/source/libvpx/examples/vpx_dec_fuzzer.cc @@ -33,7 +33,8 @@ * Out of memory errors when running generated fuzzer binary $../libvpx/configure --disable-unit-tests --size-limit=12288x12288 \ --extra-cflags="-DVPX_MAX_ALLOCABLE_MEMORY=1073741824" \ - --disable-webm-io --enable-debug + --disable-webm-io --enable-debug --disable-vp8-encoder \ + --disable-vp9-encoder --disable-examples * Build libvpx $make -j32 @@ -42,7 +43,7 @@ $ $CXX $CXXFLAGS -std=c++11 -DDECODER=vp9 \ -fsanitize=fuzzer -I../libvpx -I. -Wl,--start-group \ ../libvpx/examples/vpx_dec_fuzzer.cc -o ./vpx_dec_fuzzer_vp9 \ - ./libvpx.a ./tools_common.c.o -Wl,--end-group + ./libvpx.a -Wl,--end-group * DECODER should be defined as vp9 or vp8 to enable vp9/vp8 * @@ -66,13 +67,15 @@ #include <stdlib.h> #include <memory> -#include "./tools_common.h" #include "vpx/vp8dx.h" #include "vpx/vpx_decoder.h" #include "vpx_ports/mem_ops.h" -#define VPX_TOSTRING(str) #str -#define VPX_STRINGIFY(str) VPX_TOSTRING(str) +#define IVF_FRAME_HDR_SZ (4 + 8) /* 4 byte size + 8 byte timestamp */ +#define IVF_FILE_HDR_SZ 32 + +#define VPXD_INTERFACE(name) VPXD_INTERFACE_(name) +#define VPXD_INTERFACE_(name) vpx_codec_##name##_dx() static void CloseFile(FILE *file) { fclose(file); } @@ -131,16 +134,12 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { if (fread(header, 1, IVF_FILE_HDR_SZ, file.get()) != IVF_FILE_HDR_SZ) { return 0; } - const VpxInterface *decoder = get_vpx_decoder_by_name(VPX_STRINGIFY(DECODER)); - if (decoder == nullptr) { - return 0; - } vpx_codec_ctx_t codec; // Set thread count in the range [1, 64]. const unsigned int threads = (data[IVF_FILE_HDR_SZ] & 0x3f) + 1; vpx_codec_dec_cfg_t cfg = { threads, 0, 0 }; - if (vpx_codec_dec_init(&codec, decoder->codec_interface(), &cfg, 0)) { + if (vpx_codec_dec_init(&codec, VPXD_INTERFACE(DECODER), &cfg, 0)) { return 0; } diff --git a/chromium/third_party/libvpx/source/libvpx/examples/vpx_temporal_svc_encoder.c b/chromium/third_party/libvpx/source/libvpx/examples/vpx_temporal_svc_encoder.c index f49ef7b1d8d..ba71ca7126f 100644 --- a/chromium/third_party/libvpx/source/libvpx/examples/vpx_temporal_svc_encoder.c +++ b/chromium/third_party/libvpx/source/libvpx/examples/vpx_temporal_svc_encoder.c @@ -19,6 +19,7 @@ #include <string.h> #include "./vpx_config.h" +#include "./y4minput.h" #include "../vpx_ports/vpx_timer.h" #include "vpx/vp8cx.h" #include "vpx/vpx_encoder.h" @@ -92,14 +93,15 @@ struct RateControlMetrics { // in the stream. static void set_rate_control_metrics(struct RateControlMetrics *rc, vpx_codec_enc_cfg_t *cfg) { - unsigned int i = 0; + int i = 0; // Set the layer (cumulative) framerate and the target layer (non-cumulative) // per-frame-bandwidth, for the rate control encoding stats below. const double framerate = cfg->g_timebase.den / cfg->g_timebase.num; + const int ts_number_layers = cfg->ts_number_layers; rc->layer_framerate[0] = framerate / cfg->ts_rate_decimator[0]; rc->layer_pfb[0] = 1000.0 * rc->layer_target_bitrate[0] / rc->layer_framerate[0]; - for (i = 0; i < cfg->ts_number_layers; ++i) { + for (i = 0; i < ts_number_layers; ++i) { if (i > 0) { rc->layer_framerate[i] = framerate / cfg->ts_rate_decimator[i]; rc->layer_pfb[i] = @@ -118,6 +120,9 @@ static void set_rate_control_metrics(struct RateControlMetrics *rc, rc->window_size = 15; rc->avg_st_encoding_bitrate = 0.0; rc->variance_st_encoding_bitrate = 0.0; + // Target bandwidth for the whole stream. + // Set to layer_target_bitrate for highest layer (total bitrate). + cfg->rc_target_bitrate = rc->layer_target_bitrate[ts_number_layers - 1]; } static void printout_rate_control_summary(struct RateControlMetrics *rc, @@ -594,7 +599,7 @@ int main(int argc, char **argv) { #endif vpx_svc_layer_id_t layer_id; const VpxInterface *encoder = NULL; - FILE *infile = NULL; + struct VpxInputContext input_ctx; struct RateControlMetrics rc; int64_t cx_time = 0; const int min_args_base = 13; @@ -611,6 +616,13 @@ int main(int argc, char **argv) { zero(rc.layer_target_bitrate); memset(&layer_id, 0, sizeof(vpx_svc_layer_id_t)); + memset(&input_ctx, 0, sizeof(input_ctx)); + /* Setup default input stream settings */ + input_ctx.framerate.numerator = 30; + input_ctx.framerate.denominator = 1; + input_ctx.only_i420 = 1; + input_ctx.bit_depth = 0; + exec_name = argv[0]; // Check usage and arguments. if (argc < min_args) { @@ -649,6 +661,9 @@ int main(int argc, char **argv) { die("Invalid number of arguments"); } + input_ctx.filename = argv[1]; + open_input_file(&input_ctx); + #if CONFIG_VP9_HIGHBITDEPTH switch (strtol(argv[argc - 1], NULL, 0)) { case 8: @@ -665,14 +680,22 @@ int main(int argc, char **argv) { break; default: die("Invalid bit depth (8, 10, 12) %s", argv[argc - 1]); } - if (!vpx_img_alloc( - &raw, bit_depth == VPX_BITS_8 ? VPX_IMG_FMT_I420 : VPX_IMG_FMT_I42016, - width, height, 32)) { - die("Failed to allocate image", width, height); + + // Y4M reader has its own allocation. + if (input_ctx.file_type != FILE_TYPE_Y4M) { + if (!vpx_img_alloc( + &raw, + bit_depth == VPX_BITS_8 ? VPX_IMG_FMT_I420 : VPX_IMG_FMT_I42016, + width, height, 32)) { + die("Failed to allocate image", width, height); + } } #else - if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, width, height, 32)) { - die("Failed to allocate image", width, height); + // Y4M reader has its own allocation. + if (input_ctx.file_type != FILE_TYPE_Y4M) { + if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, width, height, 32)) { + die("Failed to allocate image", width, height); + } } #endif // CONFIG_VP9_HIGHBITDEPTH @@ -750,13 +773,15 @@ int main(int argc, char **argv) { set_rate_control_metrics(&rc, &cfg); - // Target bandwidth for the whole stream. - // Set to layer_target_bitrate for highest layer (total bitrate). - cfg.rc_target_bitrate = rc.layer_target_bitrate[cfg.ts_number_layers - 1]; - - // Open input file. - if (!(infile = fopen(argv[1], "rb"))) { - die("Failed to open %s for reading", argv[1]); + if (input_ctx.file_type == FILE_TYPE_Y4M) { + if (input_ctx.width != cfg.g_w || input_ctx.height != cfg.g_h) { + die("Incorrect width or height: %d x %d", cfg.g_w, cfg.g_h); + } + if (input_ctx.framerate.numerator != cfg.g_timebase.den || + input_ctx.framerate.denominator != cfg.g_timebase.num) { + die("Incorrect framerate: numerator %d denominator %d", + cfg.g_timebase.num, cfg.g_timebase.den); + } } framerate = cfg.g_timebase.den / cfg.g_timebase.num; @@ -865,7 +890,7 @@ int main(int argc, char **argv) { } flags = layer_flags[frame_cnt % flag_periodicity]; if (layering_mode == 0) flags = 0; - frame_avail = vpx_img_read(&raw, infile); + frame_avail = read_frame(&input_ctx, &raw); if (frame_avail) ++rc.layer_input_frames[layer_id.temporal_layer_id]; vpx_usec_timer_start(&timer); if (vpx_codec_encode(&codec, frame_avail ? &raw : NULL, pts, 1, flags, @@ -933,7 +958,7 @@ int main(int argc, char **argv) { ++frame_cnt; pts += frame_duration; } - fclose(infile); + close_input_file(&input_ctx); printout_rate_control_summary(&rc, &cfg, frame_cnt); printf("\n"); printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n", @@ -945,7 +970,10 @@ int main(int argc, char **argv) { // Try to rewrite the output file headers with the actual frame count. for (i = 0; i < cfg.ts_number_layers; ++i) vpx_video_writer_close(outfile[i]); - vpx_img_free(&raw); + if (input_ctx.file_type != FILE_TYPE_Y4M) { + vpx_img_free(&raw); + } + #if ROI_MAP free(roi.roi_map); #endif diff --git a/chromium/third_party/libvpx/source/libvpx/libs.mk b/chromium/third_party/libvpx/source/libvpx/libs.mk index 7ec8c87568d..d0c4d6426a3 100644 --- a/chromium/third_party/libvpx/source/libvpx/libs.mk +++ b/chromium/third_party/libvpx/source/libvpx/libs.mk @@ -233,7 +233,7 @@ OBJS-yes += $(LIBVPX_OBJS) LIBS-$(if yes,$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a $(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS) -SO_VERSION_MAJOR := 5 +SO_VERSION_MAJOR := 6 SO_VERSION_MINOR := 0 SO_VERSION_PATCH := 0 ifeq ($(filter darwin%,$(TGT_OS)),$(TGT_OS)) diff --git a/chromium/third_party/libvpx/source/libvpx/tools_common.c b/chromium/third_party/libvpx/source/libvpx/tools_common.c index 6f14c255617..59978b7f93a 100644 --- a/chromium/third_party/libvpx/source/libvpx/tools_common.c +++ b/chromium/third_party/libvpx/source/libvpx/tools_common.c @@ -46,6 +46,14 @@ va_end(ap); \ } while (0) +#if CONFIG_ENCODERS +/* Swallow warnings about unused results of fread/fwrite */ +static size_t wrap_fread(void *ptr, size_t size, size_t nmemb, FILE *stream) { + return fread(ptr, size, nmemb, stream); +} +#define fread wrap_fread +#endif + FILE *set_binary_mode(FILE *stream) { (void)stream; #if defined(_WIN32) || defined(__OS2__) @@ -200,8 +208,6 @@ const VpxInterface *get_vpx_decoder_by_fourcc(uint32_t fourcc) { #endif // CONFIG_DECODERS -// TODO(dkovalev): move this function to vpx_image.{c, h}, so it will be part -// of vpx_image_t support int vpx_img_plane_width(const vpx_image_t *img, int plane) { if (plane > 0 && img->x_chroma_shift > 0) return (img->d_w + 1) >> img->x_chroma_shift; @@ -266,6 +272,88 @@ double sse_to_psnr(double samples, double peak, double sse) { } } +#if CONFIG_ENCODERS +int read_frame(struct VpxInputContext *input_ctx, vpx_image_t *img) { + FILE *f = input_ctx->file; + y4m_input *y4m = &input_ctx->y4m; + int shortread = 0; + + if (input_ctx->file_type == FILE_TYPE_Y4M) { + if (y4m_input_fetch_frame(y4m, f, img) < 1) return 0; + } else { + shortread = read_yuv_frame(input_ctx, img); + } + + return !shortread; +} + +int file_is_y4m(const char detect[4]) { + if (memcmp(detect, "YUV4", 4) == 0) { + return 1; + } + return 0; +} + +int fourcc_is_ivf(const char detect[4]) { + if (memcmp(detect, "DKIF", 4) == 0) { + return 1; + } + return 0; +} + +void open_input_file(struct VpxInputContext *input) { + /* Parse certain options from the input file, if possible */ + input->file = strcmp(input->filename, "-") ? fopen(input->filename, "rb") + : set_binary_mode(stdin); + + if (!input->file) fatal("Failed to open input file"); + + if (!fseeko(input->file, 0, SEEK_END)) { + /* Input file is seekable. Figure out how long it is, so we can get + * progress info. + */ + input->length = ftello(input->file); + rewind(input->file); + } + + /* Default to 1:1 pixel aspect ratio. */ + input->pixel_aspect_ratio.numerator = 1; + input->pixel_aspect_ratio.denominator = 1; + + /* For RAW input sources, these bytes will applied on the first frame + * in read_frame(). + */ + input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file); + input->detect.position = 0; + + if (input->detect.buf_read == 4 && file_is_y4m(input->detect.buf)) { + if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4, + input->only_i420) >= 0) { + input->file_type = FILE_TYPE_Y4M; + input->width = input->y4m.pic_w; + input->height = input->y4m.pic_h; + input->pixel_aspect_ratio.numerator = input->y4m.par_n; + input->pixel_aspect_ratio.denominator = input->y4m.par_d; + input->framerate.numerator = input->y4m.fps_n; + input->framerate.denominator = input->y4m.fps_d; + input->fmt = input->y4m.vpx_fmt; + input->bit_depth = input->y4m.bit_depth; + } else { + fatal("Unsupported Y4M stream."); + } + } else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) { + fatal("IVF is not supported as input."); + } else { + input->file_type = FILE_TYPE_RAW; + } +} + +void close_input_file(struct VpxInputContext *input) { + fclose(input->file); + if (input->file_type == FILE_TYPE_Y4M) y4m_input_close(&input->y4m); +} +#endif + // TODO(debargha): Consolidate the functions below into a separate file. #if CONFIG_VP9_HIGHBITDEPTH static void highbd_img_upshift(vpx_image_t *dst, vpx_image_t *src, @@ -459,3 +547,225 @@ void vpx_img_downshift(vpx_image_t *dst, vpx_image_t *src, int down_shift) { } } #endif // CONFIG_VP9_HIGHBITDEPTH + +int compare_img(const vpx_image_t *const img1, const vpx_image_t *const img2) { + uint32_t l_w = img1->d_w; + uint32_t c_w = (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift; + const uint32_t c_h = + (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift; + uint32_t i; + int match = 1; + + match &= (img1->fmt == img2->fmt); + match &= (img1->d_w == img2->d_w); + match &= (img1->d_h == img2->d_h); +#if CONFIG_VP9_HIGHBITDEPTH + if (img1->fmt & VPX_IMG_FMT_HIGHBITDEPTH) { + l_w *= 2; + c_w *= 2; + } +#endif + + for (i = 0; i < img1->d_h; ++i) + match &= (memcmp(img1->planes[VPX_PLANE_Y] + i * img1->stride[VPX_PLANE_Y], + img2->planes[VPX_PLANE_Y] + i * img2->stride[VPX_PLANE_Y], + l_w) == 0); + + for (i = 0; i < c_h; ++i) + match &= (memcmp(img1->planes[VPX_PLANE_U] + i * img1->stride[VPX_PLANE_U], + img2->planes[VPX_PLANE_U] + i * img2->stride[VPX_PLANE_U], + c_w) == 0); + + for (i = 0; i < c_h; ++i) + match &= (memcmp(img1->planes[VPX_PLANE_V] + i * img1->stride[VPX_PLANE_V], + img2->planes[VPX_PLANE_V] + i * img2->stride[VPX_PLANE_V], + c_w) == 0); + + return match; +} + +#define mmin(a, b) ((a) < (b) ? (a) : (b)) + +#if CONFIG_VP9_HIGHBITDEPTH +void find_mismatch_high(const vpx_image_t *const img1, + const vpx_image_t *const img2, int yloc[4], int uloc[4], + int vloc[4]) { + uint16_t *plane1, *plane2; + uint32_t stride1, stride2; + const uint32_t bsize = 64; + const uint32_t bsizey = bsize >> img1->y_chroma_shift; + const uint32_t bsizex = bsize >> img1->x_chroma_shift; + const uint32_t c_w = + (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift; + const uint32_t c_h = + (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift; + int match = 1; + uint32_t i, j; + yloc[0] = yloc[1] = yloc[2] = yloc[3] = -1; + plane1 = (uint16_t *)img1->planes[VPX_PLANE_Y]; + plane2 = (uint16_t *)img2->planes[VPX_PLANE_Y]; + stride1 = img1->stride[VPX_PLANE_Y] / 2; + stride2 = img2->stride[VPX_PLANE_Y] / 2; + for (i = 0, match = 1; match && i < img1->d_h; i += bsize) { + for (j = 0; match && j < img1->d_w; j += bsize) { + int k, l; + const int si = mmin(i + bsize, img1->d_h) - i; + const int sj = mmin(j + bsize, img1->d_w) - j; + for (k = 0; match && k < si; ++k) { + for (l = 0; match && l < sj; ++l) { + if (*(plane1 + (i + k) * stride1 + j + l) != + *(plane2 + (i + k) * stride2 + j + l)) { + yloc[0] = i + k; + yloc[1] = j + l; + yloc[2] = *(plane1 + (i + k) * stride1 + j + l); + yloc[3] = *(plane2 + (i + k) * stride2 + j + l); + match = 0; + break; + } + } + } + } + } + + uloc[0] = uloc[1] = uloc[2] = uloc[3] = -1; + plane1 = (uint16_t *)img1->planes[VPX_PLANE_U]; + plane2 = (uint16_t *)img2->planes[VPX_PLANE_U]; + stride1 = img1->stride[VPX_PLANE_U] / 2; + stride2 = img2->stride[VPX_PLANE_U] / 2; + for (i = 0, match = 1; match && i < c_h; i += bsizey) { + for (j = 0; match && j < c_w; j += bsizex) { + int k, l; + const int si = mmin(i + bsizey, c_h - i); + const int sj = mmin(j + bsizex, c_w - j); + for (k = 0; match && k < si; ++k) { + for (l = 0; match && l < sj; ++l) { + if (*(plane1 + (i + k) * stride1 + j + l) != + *(plane2 + (i + k) * stride2 + j + l)) { + uloc[0] = i + k; + uloc[1] = j + l; + uloc[2] = *(plane1 + (i + k) * stride1 + j + l); + uloc[3] = *(plane2 + (i + k) * stride2 + j + l); + match = 0; + break; + } + } + } + } + } + + vloc[0] = vloc[1] = vloc[2] = vloc[3] = -1; + plane1 = (uint16_t *)img1->planes[VPX_PLANE_V]; + plane2 = (uint16_t *)img2->planes[VPX_PLANE_V]; + stride1 = img1->stride[VPX_PLANE_V] / 2; + stride2 = img2->stride[VPX_PLANE_V] / 2; + for (i = 0, match = 1; match && i < c_h; i += bsizey) { + for (j = 0; match && j < c_w; j += bsizex) { + int k, l; + const int si = mmin(i + bsizey, c_h - i); + const int sj = mmin(j + bsizex, c_w - j); + for (k = 0; match && k < si; ++k) { + for (l = 0; match && l < sj; ++l) { + if (*(plane1 + (i + k) * stride1 + j + l) != + *(plane2 + (i + k) * stride2 + j + l)) { + vloc[0] = i + k; + vloc[1] = j + l; + vloc[2] = *(plane1 + (i + k) * stride1 + j + l); + vloc[3] = *(plane2 + (i + k) * stride2 + j + l); + match = 0; + break; + } + } + } + } + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +void find_mismatch(const vpx_image_t *const img1, const vpx_image_t *const img2, + int yloc[4], int uloc[4], int vloc[4]) { + const uint32_t bsize = 64; + const uint32_t bsizey = bsize >> img1->y_chroma_shift; + const uint32_t bsizex = bsize >> img1->x_chroma_shift; + const uint32_t c_w = + (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift; + const uint32_t c_h = + (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift; + int match = 1; + uint32_t i, j; + yloc[0] = yloc[1] = yloc[2] = yloc[3] = -1; + for (i = 0, match = 1; match && i < img1->d_h; i += bsize) { + for (j = 0; match && j < img1->d_w; j += bsize) { + int k, l; + const int si = mmin(i + bsize, img1->d_h) - i; + const int sj = mmin(j + bsize, img1->d_w) - j; + for (k = 0; match && k < si; ++k) { + for (l = 0; match && l < sj; ++l) { + if (*(img1->planes[VPX_PLANE_Y] + + (i + k) * img1->stride[VPX_PLANE_Y] + j + l) != + *(img2->planes[VPX_PLANE_Y] + + (i + k) * img2->stride[VPX_PLANE_Y] + j + l)) { + yloc[0] = i + k; + yloc[1] = j + l; + yloc[2] = *(img1->planes[VPX_PLANE_Y] + + (i + k) * img1->stride[VPX_PLANE_Y] + j + l); + yloc[3] = *(img2->planes[VPX_PLANE_Y] + + (i + k) * img2->stride[VPX_PLANE_Y] + j + l); + match = 0; + break; + } + } + } + } + } + + uloc[0] = uloc[1] = uloc[2] = uloc[3] = -1; + for (i = 0, match = 1; match && i < c_h; i += bsizey) { + for (j = 0; match && j < c_w; j += bsizex) { + int k, l; + const int si = mmin(i + bsizey, c_h - i); + const int sj = mmin(j + bsizex, c_w - j); + for (k = 0; match && k < si; ++k) { + for (l = 0; match && l < sj; ++l) { + if (*(img1->planes[VPX_PLANE_U] + + (i + k) * img1->stride[VPX_PLANE_U] + j + l) != + *(img2->planes[VPX_PLANE_U] + + (i + k) * img2->stride[VPX_PLANE_U] + j + l)) { + uloc[0] = i + k; + uloc[1] = j + l; + uloc[2] = *(img1->planes[VPX_PLANE_U] + + (i + k) * img1->stride[VPX_PLANE_U] + j + l); + uloc[3] = *(img2->planes[VPX_PLANE_U] + + (i + k) * img2->stride[VPX_PLANE_U] + j + l); + match = 0; + break; + } + } + } + } + } + vloc[0] = vloc[1] = vloc[2] = vloc[3] = -1; + for (i = 0, match = 1; match && i < c_h; i += bsizey) { + for (j = 0; match && j < c_w; j += bsizex) { + int k, l; + const int si = mmin(i + bsizey, c_h - i); + const int sj = mmin(j + bsizex, c_w - j); + for (k = 0; match && k < si; ++k) { + for (l = 0; match && l < sj; ++l) { + if (*(img1->planes[VPX_PLANE_V] + + (i + k) * img1->stride[VPX_PLANE_V] + j + l) != + *(img2->planes[VPX_PLANE_V] + + (i + k) * img2->stride[VPX_PLANE_V] + j + l)) { + vloc[0] = i + k; + vloc[1] = j + l; + vloc[2] = *(img1->planes[VPX_PLANE_V] + + (i + k) * img1->stride[VPX_PLANE_V] + j + l); + vloc[3] = *(img2->planes[VPX_PLANE_V] + + (i + k) * img2->stride[VPX_PLANE_V] + j + l); + match = 0; + break; + } + } + } + } + } +} diff --git a/chromium/third_party/libvpx/source/libvpx/tools_common.h b/chromium/third_party/libvpx/source/libvpx/tools_common.h index 313acd2cfb7..4526d9f165c 100644 --- a/chromium/third_party/libvpx/source/libvpx/tools_common.h +++ b/chromium/third_party/libvpx/source/libvpx/tools_common.h @@ -145,8 +145,6 @@ const VpxInterface *get_vpx_decoder_by_index(int i); const VpxInterface *get_vpx_decoder_by_name(const char *name); const VpxInterface *get_vpx_decoder_by_fourcc(uint32_t fourcc); -// TODO(dkovalev): move this function to vpx_image.{c, h}, so it will be part -// of vpx_image_t support int vpx_img_plane_width(const vpx_image_t *img, int plane); int vpx_img_plane_height(const vpx_image_t *img, int plane); void vpx_img_write(const vpx_image_t *img, FILE *file); @@ -154,12 +152,29 @@ int vpx_img_read(vpx_image_t *img, FILE *file); double sse_to_psnr(double samples, double peak, double mse); +#if CONFIG_ENCODERS +int read_frame(struct VpxInputContext *input_ctx, vpx_image_t *img); +int file_is_y4m(const char detect[4]); +int fourcc_is_ivf(const char detect[4]); +void open_input_file(struct VpxInputContext *input); +void close_input_file(struct VpxInputContext *input); +#endif + #if CONFIG_VP9_HIGHBITDEPTH void vpx_img_upshift(vpx_image_t *dst, vpx_image_t *src, int input_shift); void vpx_img_downshift(vpx_image_t *dst, vpx_image_t *src, int down_shift); void vpx_img_truncate_16_to_8(vpx_image_t *dst, vpx_image_t *src); #endif +int compare_img(const vpx_image_t *const img1, const vpx_image_t *const img2); +#if CONFIG_VP9_HIGHBITDEPTH +void find_mismatch_high(const vpx_image_t *const img1, + const vpx_image_t *const img2, int yloc[4], int uloc[4], + int vloc[4]); +#endif +void find_mismatch(const vpx_image_t *const img1, const vpx_image_t *const img2, + int yloc[4], int uloc[4], int vloc[4]); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/chromium/third_party/libvpx/source/libvpx/video_reader.c b/chromium/third_party/libvpx/source/libvpx/video_reader.c index a0ba2521c61..16822eff3c1 100644 --- a/chromium/third_party/libvpx/source/libvpx/video_reader.c +++ b/chromium/third_party/libvpx/source/libvpx/video_reader.c @@ -30,17 +30,37 @@ VpxVideoReader *vpx_video_reader_open(const char *filename) { char header[32]; VpxVideoReader *reader = NULL; FILE *const file = fopen(filename, "rb"); - if (!file) return NULL; // Can't open file + if (!file) { + fprintf(stderr, "%s can't be opened.\n", filename); // Can't open file + return NULL; + } - if (fread(header, 1, 32, file) != 32) return NULL; // Can't read file header + if (fread(header, 1, 32, file) != 32) { + fprintf(stderr, "File header on %s can't be read.\n", + filename); // Can't read file header + return NULL; + } + if (memcmp(kIVFSignature, header, 4) != 0) { + fprintf(stderr, "The IVF signature on %s is wrong.\n", + filename); // Wrong IVF signature - if (memcmp(kIVFSignature, header, 4) != 0) - return NULL; // Wrong IVF signature + return NULL; + } + if (mem_get_le16(header + 4) != 0) { + fprintf(stderr, "%s uses the wrong IVF version.\n", + filename); // Wrong IVF version - if (mem_get_le16(header + 4) != 0) return NULL; // Wrong IVF version + return NULL; + } reader = calloc(1, sizeof(*reader)); - if (!reader) return NULL; // Can't allocate VpxVideoReader + if (!reader) { + fprintf( + stderr, + "Can't allocate VpxVideoReader\n"); // Can't allocate VpxVideoReader + + return NULL; + } reader->file = file; reader->info.codec_fourcc = mem_get_le32(header + 8); diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_if.c b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_if.c index adc25024cfc..a30821ac198 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_if.c +++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_if.c @@ -218,6 +218,8 @@ static void save_layer_context(VP8_COMP *cpi) { lc->frames_since_last_drop_overshoot = cpi->frames_since_last_drop_overshoot; lc->force_maxqp = cpi->force_maxqp; lc->last_frame_percent_intra = cpi->last_frame_percent_intra; + lc->last_q[0] = cpi->last_q[0]; + lc->last_q[1] = cpi->last_q[1]; memcpy(lc->count_mb_ref_frame_usage, cpi->mb.count_mb_ref_frame_usage, sizeof(cpi->mb.count_mb_ref_frame_usage)); @@ -255,6 +257,8 @@ static void restore_layer_context(VP8_COMP *cpi, const int layer) { cpi->frames_since_last_drop_overshoot = lc->frames_since_last_drop_overshoot; cpi->force_maxqp = lc->force_maxqp; cpi->last_frame_percent_intra = lc->last_frame_percent_intra; + cpi->last_q[0] = lc->last_q[0]; + cpi->last_q[1] = lc->last_q[1]; memcpy(cpi->mb.count_mb_ref_frame_usage, lc->count_mb_ref_frame_usage, sizeof(cpi->mb.count_mb_ref_frame_usage)); @@ -3950,6 +3954,9 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size, if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { if (vp8_drop_encodedframe_overshoot(cpi, Q)) return; + if (cm->frame_type != KEY_FRAME) + cpi->last_pred_err_mb = + (int)(cpi->mb.prediction_error / cpi->common.MBs); } cpi->projected_frame_size -= vp8_estimate_entropy_savings(cpi); diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_int.h b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_int.h index 603de8bcbc2..a4e3f2f7902 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_int.h +++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_int.h @@ -257,6 +257,7 @@ typedef struct { int count_mb_ref_frame_usage[MAX_REF_FRAMES]; + int last_q[2]; } LAYER_CONTEXT; typedef struct VP8_COMP { @@ -510,6 +511,7 @@ typedef struct VP8_COMP { int force_maxqp; int frames_since_last_drop_overshoot; + int last_pred_err_mb; // GF update for 1 pass cbr. int gf_update_onepass_cbr; diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ratectrl.c b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ratectrl.c index ce07a6f197f..dbd76edad05 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ratectrl.c +++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ratectrl.c @@ -1125,6 +1125,14 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var) { } } +static int limit_q_cbr_inter(int last_q, int current_q) { + int limit_down = 12; + if (last_q - current_q > limit_down) + return (last_q - limit_down); + else + return current_q; +} + int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame) { int Q = cpi->active_worst_quality; @@ -1264,6 +1272,12 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame) { } } + // Limit decrease in Q for 1 pass CBR screen content mode. + if (cpi->common.frame_type != KEY_FRAME && cpi->pass == 0 && + cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER && + cpi->oxcf.screen_content_mode) + Q = limit_q_cbr_inter(cpi->last_q[1], Q); + return Q; } @@ -1484,7 +1498,8 @@ int vp8_drop_encodedframe_overshoot(VP8_COMP *cpi, int Q) { if (cpi->drop_frames_allowed && pred_err_mb > (thresh_pred_err_mb << 4)) thresh_rate = thresh_rate >> 3; if ((Q < thresh_qp && cpi->projected_frame_size > thresh_rate && - pred_err_mb > thresh_pred_err_mb) || + pred_err_mb > thresh_pred_err_mb && + pred_err_mb > 2 * cpi->last_pred_err_mb) || force_drop_overshoot) { unsigned int i; double new_correction_factor; diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_blockd.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_blockd.h index f0887157e56..e07a9f2d38e 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_blockd.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_blockd.h @@ -176,7 +176,7 @@ typedef struct macroblockd { FRAME_CONTEXT *fc; /* pointers to reference frames */ - RefBuffer *block_refs[2]; + const RefBuffer *block_refs[2]; /* pointer to current frame */ const YV12_BUFFER_CONFIG *cur_buf; diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_enums.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_enums.h index bc665534de7..b33a3a2978c 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_enums.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_enums.h @@ -41,6 +41,8 @@ typedef enum BITSTREAM_PROFILE { MAX_PROFILES } BITSTREAM_PROFILE; +typedef enum PARSE_RECON_FLAG { PARSE = 1, RECON = 2 } PARSE_RECON_FLAG; + #define BLOCK_4X4 0 #define BLOCK_4X8 1 #define BLOCK_8X4 2 diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl index 8bb68cfdfa3..00c4414adad 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl +++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl @@ -183,11 +183,19 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") ne "yes") { add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv"; specialize qw/vp9_diamond_search_sad avx/; +# +# Apply temporal filter +# if (vpx_config("CONFIG_REALTIME_ONLY") ne "yes") { -add_proto qw/void vp9_temporal_filter_apply/, "const uint8_t *frame1, unsigned int stride, const uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, uint32_t *accumulator, uint16_t *count"; -specialize qw/vp9_temporal_filter_apply sse4_1/; +add_proto qw/void vp9_apply_temporal_filter/, "const uint8_t *y_src, int y_src_stride, const uint8_t *y_pre, int y_pre_stride, const uint8_t *u_src, const uint8_t *v_src, int uv_src_stride, const uint8_t *u_pre, const uint8_t *v_pre, int uv_pre_stride, unsigned int block_width, unsigned int block_height, int ss_x, int ss_y, int strength, const int *const blk_fw, int use_32x32, uint32_t *y_accumulator, uint16_t *y_count, uint32_t *u_accumulator, uint16_t *u_count, uint32_t *v_accumulator, uint16_t *v_count"; +specialize qw/vp9_apply_temporal_filter sse4_1/; + + if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { + add_proto qw/void vp9_highbd_apply_temporal_filter/, "const uint16_t *y_src, int y_src_stride, const uint16_t *y_pre, int y_pre_stride, const uint16_t *u_src, const uint16_t *v_src, int uv_src_stride, const uint16_t *u_pre, const uint16_t *v_pre, int uv_pre_stride, unsigned int block_width, unsigned int block_height, int ss_x, int ss_y, int strength, const int *const blk_fw, int use_32x32, uint32_t *y_accum, uint16_t *y_count, uint32_t *u_accum, uint16_t *u_count, uint32_t *v_accum, uint16_t *v_count"; + } } + if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # ENCODEMB INVOKE diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_thread_common.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_thread_common.c index b008ed5cf60..00882a5f94a 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_thread_common.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_thread_common.c @@ -475,6 +475,12 @@ void vp9_set_row(VP9LfSync *lf_sync, int num_tiles, int row, int is_last_row, #endif // CONFIG_MULTITHREAD } +void vp9_loopfilter_job(LFWorkerData *lf_data, VP9LfSync *lf_sync) { + thread_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes, + lf_data->start, lf_data->stop, lf_data->y_only, + lf_sync); +} + // Accumulate frame counts. void vp9_accumulate_frame_counts(FRAME_COUNTS *accum, const FRAME_COUNTS *counts, int is_dec) { diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_thread_common.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_thread_common.h index b97e9ee134d..1a2d79abd41 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_thread_common.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_thread_common.h @@ -70,7 +70,7 @@ void vp9_loopfilter_rows(LFWorkerData *lf_data, VP9LfSync *lf_sync); void vp9_set_row(VP9LfSync *lf_sync, int num_tiles, int row, int is_last_row, int corrupted); -void vp9_set_last_decoded_row(struct VP9Common *cm, int tile_col, int mi_row); +void vp9_loopfilter_job(LFWorkerData *lf_data, VP9LfSync *lf_sync); void vp9_accumulate_frame_counts(struct FRAME_COUNTS *accum, const struct FRAME_COUNTS *counts, int is_dec); diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.c b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.c index c9c85053d5a..c75c3d9a44c 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.c @@ -42,6 +42,7 @@ #include "vp9/decoder/vp9_decodemv.h" #include "vp9/decoder/vp9_decoder.h" #include "vp9/decoder/vp9_dsubexp.h" +#include "vp9/decoder/vp9_job_queue.h" #define MAX_VP9_HEADER_SIZE 80 @@ -1027,7 +1028,6 @@ static void recon_block(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row, static void parse_block(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row, int mi_col, BLOCK_SIZE bsize, int bwl, int bhl) { VP9_COMMON *const cm = &pbi->common; - const int less8x8 = bsize < BLOCK_8X8; const int bw = 1 << (bwl - 1); const int bh = 1 << (bhl - 1); const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col); @@ -1059,7 +1059,7 @@ static void parse_block(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row, const int eobtotal = predict_recon_inter(xd, mi, twd, parse_inter_block_row_mt); - if (!less8x8 && eobtotal == 0) mi->skip = 1; // skip loopfilter + if (bsize >= BLOCK_8X8 && eobtotal == 0) mi->skip = 1; // skip loopfilter } } @@ -1172,9 +1172,10 @@ static void decode_partition(TileWorkerData *twd, VP9Decoder *const pbi, dec_update_partition_context(twd, mi_row, mi_col, subsize, num_8x8_wh); } -static void recon_partition(TileWorkerData *twd, VP9Decoder *const pbi, - int mi_row, int mi_col, BLOCK_SIZE bsize, - int n4x4_l2) { +static void process_partition(TileWorkerData *twd, VP9Decoder *const pbi, + int mi_row, int mi_col, BLOCK_SIZE bsize, + int n4x4_l2, int parse_recon_flag, + process_block_fn_t process_block) { VP9_COMMON *const cm = &pbi->common; const int n8x8_l2 = n4x4_l2 - 1; const int num_8x8_wh = 1 << n8x8_l2; @@ -1187,60 +1188,10 @@ static void recon_partition(TileWorkerData *twd, VP9Decoder *const pbi, if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - partition = *xd->partition; - xd->partition++; - - subsize = get_subsize(bsize, partition); - if (!hbs) { - // calculate bmode block dimensions (log 2) - xd->bmode_blocks_wl = 1 >> !!(partition & PARTITION_VERT); - xd->bmode_blocks_hl = 1 >> !!(partition & PARTITION_HORZ); - recon_block(twd, pbi, mi_row, mi_col, subsize, 1, 1); - } else { - switch (partition) { - case PARTITION_NONE: - recon_block(twd, pbi, mi_row, mi_col, subsize, n4x4_l2, n4x4_l2); - break; - case PARTITION_HORZ: - recon_block(twd, pbi, mi_row, mi_col, subsize, n4x4_l2, n8x8_l2); - if (has_rows) - recon_block(twd, pbi, mi_row + hbs, mi_col, subsize, n4x4_l2, - n8x8_l2); - break; - case PARTITION_VERT: - recon_block(twd, pbi, mi_row, mi_col, subsize, n8x8_l2, n4x4_l2); - if (has_cols) - recon_block(twd, pbi, mi_row, mi_col + hbs, subsize, n8x8_l2, - n4x4_l2); - break; - case PARTITION_SPLIT: - recon_partition(twd, pbi, mi_row, mi_col, subsize, n8x8_l2); - recon_partition(twd, pbi, mi_row, mi_col + hbs, subsize, n8x8_l2); - recon_partition(twd, pbi, mi_row + hbs, mi_col, subsize, n8x8_l2); - recon_partition(twd, pbi, mi_row + hbs, mi_col + hbs, subsize, n8x8_l2); - break; - default: assert(0 && "Invalid partition type"); - } + if (parse_recon_flag & PARSE) { + *xd->partition = + read_partition(twd, mi_row, mi_col, has_rows, has_cols, n8x8_l2); } -} - -static void parse_partition(TileWorkerData *twd, VP9Decoder *const pbi, - int mi_row, int mi_col, BLOCK_SIZE bsize, - int n4x4_l2) { - VP9_COMMON *const cm = &pbi->common; - const int n8x8_l2 = n4x4_l2 - 1; - const int num_8x8_wh = 1 << n8x8_l2; - const int hbs = num_8x8_wh >> 1; - PARTITION_TYPE partition; - BLOCK_SIZE subsize; - const int has_rows = (mi_row + hbs) < cm->mi_rows; - const int has_cols = (mi_col + hbs) < cm->mi_cols; - MACROBLOCKD *const xd = &twd->xd; - - if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - - *xd->partition = - read_partition(twd, mi_row, mi_col, has_rows, has_cols, n8x8_l2); partition = *xd->partition; xd->partition++; @@ -1250,38 +1201,44 @@ static void parse_partition(TileWorkerData *twd, VP9Decoder *const pbi, // calculate bmode block dimensions (log 2) xd->bmode_blocks_wl = 1 >> !!(partition & PARTITION_VERT); xd->bmode_blocks_hl = 1 >> !!(partition & PARTITION_HORZ); - parse_block(twd, pbi, mi_row, mi_col, subsize, 1, 1); + process_block(twd, pbi, mi_row, mi_col, subsize, 1, 1); } else { switch (partition) { case PARTITION_NONE: - parse_block(twd, pbi, mi_row, mi_col, subsize, n4x4_l2, n4x4_l2); + process_block(twd, pbi, mi_row, mi_col, subsize, n4x4_l2, n4x4_l2); break; case PARTITION_HORZ: - parse_block(twd, pbi, mi_row, mi_col, subsize, n4x4_l2, n8x8_l2); + process_block(twd, pbi, mi_row, mi_col, subsize, n4x4_l2, n8x8_l2); if (has_rows) - parse_block(twd, pbi, mi_row + hbs, mi_col, subsize, n4x4_l2, - n8x8_l2); + process_block(twd, pbi, mi_row + hbs, mi_col, subsize, n4x4_l2, + n8x8_l2); break; case PARTITION_VERT: - parse_block(twd, pbi, mi_row, mi_col, subsize, n8x8_l2, n4x4_l2); + process_block(twd, pbi, mi_row, mi_col, subsize, n8x8_l2, n4x4_l2); if (has_cols) - parse_block(twd, pbi, mi_row, mi_col + hbs, subsize, n8x8_l2, - n4x4_l2); + process_block(twd, pbi, mi_row, mi_col + hbs, subsize, n8x8_l2, + n4x4_l2); break; case PARTITION_SPLIT: - parse_partition(twd, pbi, mi_row, mi_col, subsize, n8x8_l2); - parse_partition(twd, pbi, mi_row, mi_col + hbs, subsize, n8x8_l2); - parse_partition(twd, pbi, mi_row + hbs, mi_col, subsize, n8x8_l2); - parse_partition(twd, pbi, mi_row + hbs, mi_col + hbs, subsize, n8x8_l2); + process_partition(twd, pbi, mi_row, mi_col, subsize, n8x8_l2, + parse_recon_flag, process_block); + process_partition(twd, pbi, mi_row, mi_col + hbs, subsize, n8x8_l2, + parse_recon_flag, process_block); + process_partition(twd, pbi, mi_row + hbs, mi_col, subsize, n8x8_l2, + parse_recon_flag, process_block); + process_partition(twd, pbi, mi_row + hbs, mi_col + hbs, subsize, + n8x8_l2, parse_recon_flag, process_block); break; default: assert(0 && "Invalid partition type"); } } - // update partition context - if (bsize >= BLOCK_8X8 && - (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) - dec_update_partition_context(twd, mi_row, mi_col, subsize, num_8x8_wh); + if (parse_recon_flag & PARSE) { + // update partition context + if ((bsize == BLOCK_8X8 || partition != PARTITION_SPLIT) && + bsize >= BLOCK_8X8) + dec_update_partition_context(twd, mi_row, mi_col, subsize, num_8x8_wh); + } } static void setup_token_decoder(const uint8_t *data, const uint8_t *data_end, @@ -1688,6 +1645,317 @@ static void get_tile_buffers(VP9Decoder *pbi, const uint8_t *data, } } +static void map_write(RowMTWorkerData *const row_mt_worker_data, int map_idx, + int sync_idx) { +#if CONFIG_MULTITHREAD + pthread_mutex_lock(&row_mt_worker_data->recon_sync_mutex[sync_idx]); + row_mt_worker_data->recon_map[map_idx] = 1; + pthread_cond_signal(&row_mt_worker_data->recon_sync_cond[sync_idx]); + pthread_mutex_unlock(&row_mt_worker_data->recon_sync_mutex[sync_idx]); +#else + (void)row_mt_worker_data; + (void)map_idx; + (void)sync_idx; +#endif // CONFIG_MULTITHREAD +} + +static void map_read(RowMTWorkerData *const row_mt_worker_data, int map_idx, + int sync_idx) { +#if CONFIG_MULTITHREAD + volatile int8_t *map = row_mt_worker_data->recon_map + map_idx; + pthread_mutex_t *const mutex = + &row_mt_worker_data->recon_sync_mutex[sync_idx]; + pthread_mutex_lock(mutex); + while (!(*map)) { + pthread_cond_wait(&row_mt_worker_data->recon_sync_cond[sync_idx], mutex); + } + pthread_mutex_unlock(mutex); +#else + (void)row_mt_worker_data; + (void)map_idx; + (void)sync_idx; +#endif // CONFIG_MULTITHREAD +} + +static int lpf_map_write_check(VP9LfSync *lf_sync, int row, int num_tile_cols) { + int return_val = 0; +#if CONFIG_MULTITHREAD + int corrupted; + pthread_mutex_lock(&lf_sync->lf_mutex); + corrupted = lf_sync->corrupted; + pthread_mutex_unlock(&lf_sync->lf_mutex); + if (!corrupted) { + pthread_mutex_lock(&lf_sync->recon_done_mutex[row]); + lf_sync->num_tiles_done[row] += 1; + if (num_tile_cols == lf_sync->num_tiles_done[row]) return_val = 1; + pthread_mutex_unlock(&lf_sync->recon_done_mutex[row]); + } +#else + (void)lf_sync; + (void)row; + (void)num_tile_cols; +#endif + return return_val; +} + +static void vp9_tile_done(VP9Decoder *pbi) { +#if CONFIG_MULTITHREAD + int terminate; + RowMTWorkerData *const row_mt_worker_data = pbi->row_mt_worker_data; + const int all_parse_done = 1 << pbi->common.log2_tile_cols; + pthread_mutex_lock(&row_mt_worker_data->recon_done_mutex); + row_mt_worker_data->num_tiles_done++; + terminate = all_parse_done == row_mt_worker_data->num_tiles_done; + pthread_mutex_unlock(&row_mt_worker_data->recon_done_mutex); + if (terminate) { + vp9_jobq_terminate(&row_mt_worker_data->jobq); + } +#else + (void)pbi; +#endif +} + +static void vp9_jobq_alloc(VP9Decoder *pbi) { + VP9_COMMON *const cm = &pbi->common; + RowMTWorkerData *const row_mt_worker_data = pbi->row_mt_worker_data; + const int aligned_rows = mi_cols_aligned_to_sb(cm->mi_rows); + const int sb_rows = aligned_rows >> MI_BLOCK_SIZE_LOG2; + const int tile_cols = 1 << cm->log2_tile_cols; + const size_t jobq_size = (tile_cols * sb_rows * 2 + sb_rows) * sizeof(Job); + + if (jobq_size > row_mt_worker_data->jobq_size) { + vpx_free(row_mt_worker_data->jobq_buf); + CHECK_MEM_ERROR(cm, row_mt_worker_data->jobq_buf, vpx_calloc(1, jobq_size)); + vp9_jobq_init(&row_mt_worker_data->jobq, row_mt_worker_data->jobq_buf, + jobq_size); + row_mt_worker_data->jobq_size = jobq_size; + } +} + +static void recon_tile_row(TileWorkerData *tile_data, VP9Decoder *pbi, + int mi_row, int is_last_row, VP9LfSync *lf_sync, + int cur_tile_col) { + VP9_COMMON *const cm = &pbi->common; + RowMTWorkerData *const row_mt_worker_data = pbi->row_mt_worker_data; + const int tile_cols = 1 << cm->log2_tile_cols; + const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols); + const int sb_cols = aligned_cols >> MI_BLOCK_SIZE_LOG2; + const int cur_sb_row = mi_row >> MI_BLOCK_SIZE_LOG2; + int mi_col_start = tile_data->xd.tile.mi_col_start; + int mi_col_end = tile_data->xd.tile.mi_col_end; + int mi_col; + + vp9_zero(tile_data->xd.left_context); + vp9_zero(tile_data->xd.left_seg_context); + for (mi_col = mi_col_start; mi_col < mi_col_end; mi_col += MI_BLOCK_SIZE) { + const int c = mi_col >> MI_BLOCK_SIZE_LOG2; + int plane; + const int sb_num = (cur_sb_row * (aligned_cols >> MI_BLOCK_SIZE_LOG2) + c); + + // Top Dependency + if (cur_sb_row) { + map_read(row_mt_worker_data, ((cur_sb_row - 1) * sb_cols) + c, + ((cur_sb_row - 1) * tile_cols) + cur_tile_col); + } + + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + tile_data->xd.plane[plane].eob = + row_mt_worker_data->eob[plane] + (sb_num << EOBS_PER_SB_LOG2); + tile_data->xd.plane[plane].dqcoeff = + row_mt_worker_data->dqcoeff[plane] + (sb_num << DQCOEFFS_PER_SB_LOG2); + } + tile_data->xd.partition = + row_mt_worker_data->partition + (sb_num * PARTITIONS_PER_SB); + process_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4, RECON, + recon_block); + if (cm->lf.filter_level && !cm->skip_loop_filter) { + // Queue LPF_JOB + int is_lpf_job_ready = 0; + + if (mi_col + MI_BLOCK_SIZE >= mi_col_end) { + // Checks if this row has been decoded in all tiles + is_lpf_job_ready = lpf_map_write_check(lf_sync, cur_sb_row, tile_cols); + + if (is_lpf_job_ready) { + Job lpf_job; + lpf_job.job_type = LPF_JOB; + if (cur_sb_row > 0) { + lpf_job.row_num = mi_row - MI_BLOCK_SIZE; + vp9_jobq_queue(&row_mt_worker_data->jobq, &lpf_job, + sizeof(lpf_job)); + } + if (is_last_row) { + lpf_job.row_num = mi_row; + vp9_jobq_queue(&row_mt_worker_data->jobq, &lpf_job, + sizeof(lpf_job)); + } + } + } + } + map_write(row_mt_worker_data, (cur_sb_row * sb_cols) + c, + (cur_sb_row * tile_cols) + cur_tile_col); + } +} + +static void parse_tile_row(TileWorkerData *tile_data, VP9Decoder *pbi, + int mi_row, int cur_tile_col, uint8_t **data_end) { + int mi_col; + VP9_COMMON *const cm = &pbi->common; + RowMTWorkerData *const row_mt_worker_data = pbi->row_mt_worker_data; + TileInfo *tile = &tile_data->xd.tile; + TileBuffer *const buf = &pbi->tile_buffers[cur_tile_col]; + const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols); + + vp9_zero(tile_data->dqcoeff); + vp9_tile_init(tile, cm, 0, cur_tile_col); + + /* Update reader only at the beginning of each row in a tile */ + if (mi_row == 0) { + setup_token_decoder(buf->data, *data_end, buf->size, &tile_data->error_info, + &tile_data->bit_reader, pbi->decrypt_cb, + pbi->decrypt_state); + } + vp9_init_macroblockd(cm, &tile_data->xd, tile_data->dqcoeff); + tile_data->xd.error_info = &tile_data->error_info; + + vp9_zero(tile_data->xd.left_context); + vp9_zero(tile_data->xd.left_seg_context); + for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; + mi_col += MI_BLOCK_SIZE) { + const int r = mi_row >> MI_BLOCK_SIZE_LOG2; + const int c = mi_col >> MI_BLOCK_SIZE_LOG2; + int plane; + const int sb_num = (r * (aligned_cols >> MI_BLOCK_SIZE_LOG2) + c); + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + tile_data->xd.plane[plane].eob = + row_mt_worker_data->eob[plane] + (sb_num << EOBS_PER_SB_LOG2); + tile_data->xd.plane[plane].dqcoeff = + row_mt_worker_data->dqcoeff[plane] + (sb_num << DQCOEFFS_PER_SB_LOG2); + } + tile_data->xd.partition = + row_mt_worker_data->partition + sb_num * PARTITIONS_PER_SB; + process_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4, PARSE, + parse_block); + } +} + +static int row_decode_worker_hook(ThreadData *const thread_data, + uint8_t **data_end) { + VP9Decoder *const pbi = thread_data->pbi; + VP9_COMMON *const cm = &pbi->common; + RowMTWorkerData *const row_mt_worker_data = pbi->row_mt_worker_data; + const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols); + const int aligned_rows = mi_cols_aligned_to_sb(cm->mi_rows); + const int sb_rows = aligned_rows >> MI_BLOCK_SIZE_LOG2; + const int tile_cols = 1 << cm->log2_tile_cols; + Job job; + LFWorkerData *lf_data = thread_data->lf_data; + VP9LfSync *lf_sync = thread_data->lf_sync; + volatile int corrupted = 0; + + while (!vp9_jobq_dequeue(&row_mt_worker_data->jobq, &job, sizeof(job), 1)) { + int mi_col; + const int mi_row = job.row_num; + + if (job.job_type == LPF_JOB) { + lf_data->start = mi_row; + lf_data->stop = lf_data->start + MI_BLOCK_SIZE; + + if (cm->lf.filter_level && !cm->skip_loop_filter && + mi_row < cm->mi_rows) { + vp9_loopfilter_job(lf_data, lf_sync); + } + } else if (job.job_type == RECON_JOB) { + const int cur_sb_row = mi_row >> MI_BLOCK_SIZE_LOG2; + const int is_last_row = sb_rows - 1 == cur_sb_row; + TileWorkerData twd_recon; + TileWorkerData *const tile_data_recon = &twd_recon; + int mi_col_start, mi_col_end; + + tile_data_recon->xd = pbi->mb; + vp9_tile_init(&tile_data_recon->xd.tile, cm, 0, job.tile_col); + vp9_init_macroblockd(cm, &tile_data_recon->xd, tile_data_recon->dqcoeff); + mi_col_start = tile_data_recon->xd.tile.mi_col_start; + mi_col_end = tile_data_recon->xd.tile.mi_col_end; + + if (setjmp(tile_data_recon->error_info.jmp)) { + const int sb_cols = aligned_cols >> MI_BLOCK_SIZE_LOG2; + tile_data_recon->error_info.setjmp = 0; + corrupted = 1; + for (mi_col = mi_col_start; mi_col < mi_col_end; + mi_col += MI_BLOCK_SIZE) { + const int c = mi_col >> MI_BLOCK_SIZE_LOG2; + map_write(row_mt_worker_data, (cur_sb_row * sb_cols) + c, + (cur_sb_row * tile_cols) + job.tile_col); + } + if (is_last_row) { + vp9_tile_done(pbi); + } + continue; + } + + tile_data_recon->error_info.setjmp = 1; + tile_data_recon->xd.error_info = &tile_data_recon->error_info; + + recon_tile_row(tile_data_recon, pbi, mi_row, is_last_row, lf_sync, + job.tile_col); + + if (corrupted) + vpx_internal_error(&tile_data_recon->error_info, + VPX_CODEC_CORRUPT_FRAME, + "Failed to decode tile data"); + + if (is_last_row) { + vp9_tile_done(pbi); + } + } else if (job.job_type == PARSE_JOB) { + TileWorkerData *const tile_data = &pbi->tile_worker_data[job.tile_col]; + + if (setjmp(tile_data->error_info.jmp)) { + tile_data->error_info.setjmp = 0; + corrupted = 1; + vp9_tile_done(pbi); + continue; + } + + tile_data->xd = pbi->mb; + tile_data->xd.counts = + cm->frame_parallel_decoding_mode ? 0 : &tile_data->counts; + + tile_data->error_info.setjmp = 1; + + parse_tile_row(tile_data, pbi, mi_row, job.tile_col, data_end); + + corrupted |= tile_data->xd.corrupted; + if (corrupted) + vpx_internal_error(&tile_data->error_info, VPX_CODEC_CORRUPT_FRAME, + "Failed to decode tile data"); + + /* Queue in the recon_job for this row */ + { + Job recon_job; + recon_job.row_num = mi_row; + recon_job.tile_col = job.tile_col; + recon_job.job_type = RECON_JOB; + vp9_jobq_queue(&row_mt_worker_data->jobq, &recon_job, + sizeof(recon_job)); + } + + /* Queue next parse job */ + if (mi_row + MI_BLOCK_SIZE < cm->mi_rows) { + Job parse_job; + parse_job.row_num = mi_row + MI_BLOCK_SIZE; + parse_job.tile_col = job.tile_col; + parse_job.job_type = PARSE_JOB; + vp9_jobq_queue(&row_mt_worker_data->jobq, &parse_job, + sizeof(parse_job)); + } + } + } + + return !corrupted; +} + static const uint8_t *decode_tiles(VP9Decoder *pbi, const uint8_t *data, const uint8_t *data_end) { VP9_COMMON *const cm = &pbi->common; @@ -1775,7 +2043,8 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, const uint8_t *data, row_mt_worker_data->dqcoeff[plane]; } tile_data->xd.partition = row_mt_worker_data->partition; - parse_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4); + process_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4, + PARSE, parse_block); for (plane = 0; plane < MAX_MB_PLANE; ++plane) { tile_data->xd.plane[plane].eob = row_mt_worker_data->eob[plane]; @@ -1783,7 +2052,8 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, const uint8_t *data, row_mt_worker_data->dqcoeff[plane]; } tile_data->xd.partition = row_mt_worker_data->partition; - recon_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4); + process_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4, + RECON, recon_block); } else { decode_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4); } @@ -1951,22 +2221,12 @@ static int compare_tile_buffers(const void *a, const void *b) { return (buf_a->size < buf_b->size) - (buf_a->size > buf_b->size); } -static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, const uint8_t *data, - const uint8_t *data_end) { +static INLINE void init_mt(VP9Decoder *pbi) { + int n; VP9_COMMON *const cm = &pbi->common; - const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); - const uint8_t *bit_reader_end = NULL; VP9LfSync *lf_row_sync = &pbi->lf_row_sync; - YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm); const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); - const int tile_cols = 1 << cm->log2_tile_cols; - const int tile_rows = 1 << cm->log2_tile_rows; - const int num_workers = VPXMIN(pbi->max_threads, tile_cols); - int n; - - assert(tile_cols <= (1 << 6)); - assert(tile_rows == 1); - (void)tile_rows; + const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); if (pbi->num_tile_workers == 0) { const int num_threads = pbi->max_threads; @@ -1985,11 +2245,158 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, const uint8_t *data, } // Initialize LPF - if (pbi->lpf_mt_opt && cm->lf.filter_level && !cm->skip_loop_filter) { + if ((pbi->lpf_mt_opt || pbi->row_mt) && cm->lf.filter_level && + !cm->skip_loop_filter) { vp9_lpf_mt_init(lf_row_sync, cm, cm->lf.filter_level, pbi->num_tile_workers); } + // Note: this memset assumes above_context[0], [1] and [2] + // are allocated as part of the same buffer. + memset(cm->above_context, 0, + sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_mi_cols); + + memset(cm->above_seg_context, 0, + sizeof(*cm->above_seg_context) * aligned_mi_cols); + + vp9_reset_lfm(cm); +} + +static const uint8_t *decode_tiles_row_wise_mt(VP9Decoder *pbi, + const uint8_t *data, + const uint8_t *data_end) { + VP9_COMMON *const cm = &pbi->common; + RowMTWorkerData *const row_mt_worker_data = pbi->row_mt_worker_data; + const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); + const int tile_cols = 1 << cm->log2_tile_cols; + const int tile_rows = 1 << cm->log2_tile_rows; + const int num_workers = pbi->max_threads; + int i, n; + int col; + int corrupted = 0; + const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; + const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2; + VP9LfSync *lf_row_sync = &pbi->lf_row_sync; + YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm); + + assert(tile_cols <= (1 << 6)); + assert(tile_rows == 1); + (void)tile_rows; + + memset(row_mt_worker_data->recon_map, 0, + sb_rows * sb_cols * sizeof(*row_mt_worker_data->recon_map)); + + init_mt(pbi); + + // Reset tile decoding hook + for (n = 0; n < num_workers; ++n) { + VPxWorker *const worker = &pbi->tile_workers[n]; + ThreadData *const thread_data = &pbi->row_mt_worker_data->thread_data[n]; + winterface->sync(worker); + + if (cm->lf.filter_level && !cm->skip_loop_filter) { + thread_data->lf_sync = lf_row_sync; + thread_data->lf_data = &thread_data->lf_sync->lfdata[n]; + vp9_loop_filter_data_reset(thread_data->lf_data, new_fb, cm, + pbi->mb.plane); + } + + thread_data->pbi = pbi; + + worker->hook = (VPxWorkerHook)row_decode_worker_hook; + worker->data1 = thread_data; + worker->data2 = (void *)&row_mt_worker_data->data_end; + } + + for (col = 0; col < tile_cols; ++col) { + TileWorkerData *const tile_data = &pbi->tile_worker_data[col]; + tile_data->xd = pbi->mb; + tile_data->xd.counts = + cm->frame_parallel_decoding_mode ? NULL : &tile_data->counts; + } + + /* Reset the jobq to start of the jobq buffer */ + vp9_jobq_reset(&row_mt_worker_data->jobq); + row_mt_worker_data->num_tiles_done = 0; + row_mt_worker_data->data_end = NULL; + + // Load tile data into tile_buffers + get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, + &pbi->tile_buffers); + + // Initialize thread frame counts. + if (!cm->frame_parallel_decoding_mode) { + for (col = 0; col < tile_cols; ++col) { + TileWorkerData *const tile_data = &pbi->tile_worker_data[col]; + vp9_zero(tile_data->counts); + } + } + + // queue parse jobs for 0th row of every tile + for (col = 0; col < tile_cols; ++col) { + Job parse_job; + parse_job.row_num = 0; + parse_job.tile_col = col; + parse_job.job_type = PARSE_JOB; + vp9_jobq_queue(&row_mt_worker_data->jobq, &parse_job, sizeof(parse_job)); + } + + for (i = 0; i < num_workers; ++i) { + VPxWorker *const worker = &pbi->tile_workers[i]; + worker->had_error = 0; + if (i == num_workers - 1) { + winterface->execute(worker); + } else { + winterface->launch(worker); + } + } + + for (; n > 0; --n) { + VPxWorker *const worker = &pbi->tile_workers[n - 1]; + // TODO(jzern): The tile may have specific error data associated with + // its vpx_internal_error_info which could be propagated to the main info + // in cm. Additionally once the threads have been synced and an error is + // detected, there's no point in continuing to decode tiles. + corrupted |= !winterface->sync(worker); + } + + pbi->mb.corrupted = corrupted; + + { + /* Set data end */ + TileWorkerData *const tile_data = &pbi->tile_worker_data[tile_cols - 1]; + row_mt_worker_data->data_end = vpx_reader_find_end(&tile_data->bit_reader); + } + + // Accumulate thread frame counts. + if (!cm->frame_parallel_decoding_mode) { + for (i = 0; i < tile_cols; ++i) { + TileWorkerData *const tile_data = &pbi->tile_worker_data[i]; + vp9_accumulate_frame_counts(&cm->counts, &tile_data->counts, 1); + } + } + + return row_mt_worker_data->data_end; +} + +static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, const uint8_t *data, + const uint8_t *data_end) { + VP9_COMMON *const cm = &pbi->common; + const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); + const uint8_t *bit_reader_end = NULL; + VP9LfSync *lf_row_sync = &pbi->lf_row_sync; + YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm); + const int tile_cols = 1 << cm->log2_tile_cols; + const int tile_rows = 1 << cm->log2_tile_rows; + const int num_workers = VPXMIN(pbi->max_threads, tile_cols); + int n; + + assert(tile_cols <= (1 << 6)); + assert(tile_rows == 1); + (void)tile_rows; + + init_mt(pbi); + // Reset tile decoding hook for (n = 0; n < num_workers; ++n) { VPxWorker *const worker = &pbi->tile_workers[n]; @@ -2012,15 +2419,6 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, const uint8_t *data, worker->data2 = pbi; } - // Note: this memset assumes above_context[0], [1] and [2] - // are allocated as part of the same buffer. - memset(cm->above_context, 0, - sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_mi_cols); - memset(cm->above_seg_context, 0, - sizeof(*cm->above_seg_context) * aligned_mi_cols); - - vp9_reset_lfm(cm); - // Load tile data into tile_buffers get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, &pbi->tile_buffers); @@ -2366,25 +2764,32 @@ static size_t read_uncompressed_header(VP9Decoder *pbi, setup_tile_info(cm, rb); if (pbi->row_mt == 1) { int num_sbs = 1; + const int aligned_rows = mi_cols_aligned_to_sb(cm->mi_rows); + const int sb_rows = aligned_rows >> MI_BLOCK_SIZE_LOG2; + const int num_jobs = sb_rows << cm->log2_tile_cols; if (pbi->row_mt_worker_data == NULL) { CHECK_MEM_ERROR(cm, pbi->row_mt_worker_data, vpx_calloc(1, sizeof(*pbi->row_mt_worker_data))); +#if CONFIG_MULTITHREAD + pthread_mutex_init(&pbi->row_mt_worker_data->recon_done_mutex, NULL); +#endif } if (pbi->max_threads > 1) { const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols); const int sb_cols = aligned_cols >> MI_BLOCK_SIZE_LOG2; - const int aligned_rows = mi_cols_aligned_to_sb(cm->mi_rows); - const int sb_rows = aligned_rows >> MI_BLOCK_SIZE_LOG2; num_sbs = sb_cols * sb_rows; } - if (num_sbs > pbi->row_mt_worker_data->num_sbs) { + if (num_sbs > pbi->row_mt_worker_data->num_sbs || + num_jobs > pbi->row_mt_worker_data->num_jobs) { vp9_dec_free_row_mt_mem(pbi->row_mt_worker_data); - vp9_dec_alloc_row_mt_mem(pbi->row_mt_worker_data, cm, num_sbs); + vp9_dec_alloc_row_mt_mem(pbi->row_mt_worker_data, cm, num_sbs, + pbi->max_threads, num_jobs); } + vp9_jobq_alloc(pbi); } sz = vpx_rb_read_literal(rb, 16); @@ -2544,21 +2949,27 @@ void vp9_decode_frame(VP9Decoder *pbi, const uint8_t *data, pbi->total_tiles = tile_rows * tile_cols; } - if (pbi->max_threads > 1 && tile_rows == 1 && tile_cols > 1) { - // Multi-threaded tile decoder - *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end); - if (!pbi->lpf_mt_opt) { - if (!xd->corrupted) { - if (!cm->skip_loop_filter) { - // If multiple threads are used to decode tiles, then we use those - // threads to do parallel loopfiltering. - vp9_loop_filter_frame_mt(new_fb, cm, pbi->mb.plane, - cm->lf.filter_level, 0, 0, pbi->tile_workers, - pbi->num_tile_workers, &pbi->lf_row_sync); + if (pbi->max_threads > 1 && tile_rows == 1 && + (tile_cols > 1 || pbi->row_mt == 1)) { + if (pbi->row_mt == 1) { + *p_data_end = + decode_tiles_row_wise_mt(pbi, data + first_partition_size, data_end); + } else { + // Multi-threaded tile decoder + *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end); + if (!pbi->lpf_mt_opt) { + if (!xd->corrupted) { + if (!cm->skip_loop_filter) { + // If multiple threads are used to decode tiles, then we use those + // threads to do parallel loopfiltering. + vp9_loop_filter_frame_mt( + new_fb, cm, pbi->mb.plane, cm->lf.filter_level, 0, 0, + pbi->tile_workers, pbi->num_tile_workers, &pbi->lf_row_sync); + } + } else { + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Decode failed. Frame data is corrupted."); } - } else { - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Decode failed. Frame data is corrupted."); } } } else { diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.c b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.c index 7fde0b07fff..0aed3d717c5 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.c @@ -56,10 +56,34 @@ static void vp9_dec_setup_mi(VP9_COMMON *cm) { } void vp9_dec_alloc_row_mt_mem(RowMTWorkerData *row_mt_worker_data, - VP9_COMMON *cm, int num_sbs) { + VP9_COMMON *cm, int num_sbs, int max_threads, + int num_jobs) { int plane; const size_t dqcoeff_size = (num_sbs << DQCOEFFS_PER_SB_LOG2) * sizeof(*row_mt_worker_data->dqcoeff[0]); + row_mt_worker_data->num_jobs = num_jobs; +#if CONFIG_MULTITHREAD + { + int i; + CHECK_MEM_ERROR( + cm, row_mt_worker_data->recon_sync_mutex, + vpx_malloc(sizeof(*row_mt_worker_data->recon_sync_mutex) * num_jobs)); + if (row_mt_worker_data->recon_sync_mutex) { + for (i = 0; i < num_jobs; ++i) { + pthread_mutex_init(&row_mt_worker_data->recon_sync_mutex[i], NULL); + } + } + + CHECK_MEM_ERROR( + cm, row_mt_worker_data->recon_sync_cond, + vpx_malloc(sizeof(*row_mt_worker_data->recon_sync_cond) * num_jobs)); + if (row_mt_worker_data->recon_sync_cond) { + for (i = 0; i < num_jobs; ++i) { + pthread_cond_init(&row_mt_worker_data->recon_sync_cond[i], NULL); + } + } + } +#endif row_mt_worker_data->num_sbs = num_sbs; for (plane = 0; plane < 3; ++plane) { CHECK_MEM_ERROR(cm, row_mt_worker_data->dqcoeff[plane], @@ -74,11 +98,36 @@ void vp9_dec_alloc_row_mt_mem(RowMTWorkerData *row_mt_worker_data, sizeof(*row_mt_worker_data->partition))); CHECK_MEM_ERROR(cm, row_mt_worker_data->recon_map, vpx_calloc(num_sbs, sizeof(*row_mt_worker_data->recon_map))); + + // allocate memory for thread_data + if (row_mt_worker_data->thread_data == NULL) { + const size_t thread_size = + max_threads * sizeof(*row_mt_worker_data->thread_data); + CHECK_MEM_ERROR(cm, row_mt_worker_data->thread_data, + vpx_memalign(32, thread_size)); + } } void vp9_dec_free_row_mt_mem(RowMTWorkerData *row_mt_worker_data) { if (row_mt_worker_data != NULL) { int plane; +#if CONFIG_MULTITHREAD + int i; + if (row_mt_worker_data->recon_sync_mutex != NULL) { + for (i = 0; i < row_mt_worker_data->num_jobs; ++i) { + pthread_mutex_destroy(&row_mt_worker_data->recon_sync_mutex[i]); + } + vpx_free(row_mt_worker_data->recon_sync_mutex); + row_mt_worker_data->recon_sync_mutex = NULL; + } + if (row_mt_worker_data->recon_sync_cond != NULL) { + for (i = 0; i < row_mt_worker_data->num_jobs; ++i) { + pthread_cond_destroy(&row_mt_worker_data->recon_sync_cond[i]); + } + vpx_free(row_mt_worker_data->recon_sync_cond); + row_mt_worker_data->recon_sync_cond = NULL; + } +#endif for (plane = 0; plane < 3; ++plane) { vpx_free(row_mt_worker_data->eob[plane]); row_mt_worker_data->eob[plane] = NULL; @@ -89,6 +138,8 @@ void vp9_dec_free_row_mt_mem(RowMTWorkerData *row_mt_worker_data) { row_mt_worker_data->partition = NULL; vpx_free(row_mt_worker_data->recon_map); row_mt_worker_data->recon_map = NULL; + vpx_free(row_mt_worker_data->thread_data); + row_mt_worker_data->thread_data = NULL; } } @@ -179,8 +230,16 @@ void vp9_decoder_remove(VP9Decoder *pbi) { if (pbi->row_mt == 1) { vp9_dec_free_row_mt_mem(pbi->row_mt_worker_data); + if (pbi->row_mt_worker_data != NULL) { + vp9_jobq_deinit(&pbi->row_mt_worker_data->jobq); + vpx_free(pbi->row_mt_worker_data->jobq_buf); +#if CONFIG_MULTITHREAD + pthread_mutex_destroy(&pbi->row_mt_worker_data->recon_done_mutex); +#endif + } vpx_free(pbi->row_mt_worker_data); } + vp9_remove_common(&pbi->common); vpx_free(pbi); } diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.h b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.h index 9a582fffbb8..4a22aa6b5ba 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.h @@ -21,6 +21,7 @@ #include "vp9/common/vp9_thread_common.h" #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_ppflags.h" +#include "./vp9_job_queue.h" #ifdef __cplusplus extern "C" { @@ -30,6 +31,14 @@ extern "C" { #define DQCOEFFS_PER_SB_LOG2 12 #define PARTITIONS_PER_SB 85 +typedef enum JobType { PARSE_JOB, RECON_JOB, LPF_JOB } JobType; + +typedef struct ThreadData { + struct VP9Decoder *pbi; + LFWorkerData *lf_data; + VP9LfSync *lf_sync; +} ThreadData; + typedef struct TileBuffer { const uint8_t *data; size_t size; @@ -49,14 +58,38 @@ typedef struct TileWorkerData { struct vpx_internal_error_info error_info; } TileWorkerData; +typedef void (*process_block_fn_t)(TileWorkerData *twd, + struct VP9Decoder *const pbi, int mi_row, + int mi_col, BLOCK_SIZE bsize, int bwl, + int bhl); + typedef struct RowMTWorkerData { int num_sbs; int *eob[MAX_MB_PLANE]; PARTITION_TYPE *partition; tran_low_t *dqcoeff[MAX_MB_PLANE]; int8_t *recon_map; + const uint8_t *data_end; + uint8_t *jobq_buf; + JobQueueRowMt jobq; + size_t jobq_size; + int num_tiles_done; + int num_jobs; +#if CONFIG_MULTITHREAD + pthread_mutex_t recon_done_mutex; + pthread_mutex_t *recon_sync_mutex; + pthread_cond_t *recon_sync_cond; +#endif + ThreadData *thread_data; } RowMTWorkerData; +/* Structure to queue and dequeue row decode jobs */ +typedef struct Job { + int row_num; + int tile_col; + JobType job_type; +} Job; + typedef struct VP9Decoder { DECLARE_ALIGNED(16, MACROBLOCKD, mb); @@ -128,7 +161,8 @@ struct VP9Decoder *vp9_decoder_create(BufferPool *const pool); void vp9_decoder_remove(struct VP9Decoder *pbi); void vp9_dec_alloc_row_mt_mem(RowMTWorkerData *row_mt_worker_data, - VP9_COMMON *cm, int num_sbs); + VP9_COMMON *cm, int num_sbs, int max_threads, + int num_jobs); void vp9_dec_free_row_mt_mem(RowMTWorkerData *row_mt_worker_data); static INLINE void decrease_ref_count(int idx, RefCntBuffer *const frame_bufs, diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_job_queue.c b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_job_queue.c new file mode 100644 index 00000000000..9a31f5a6d09 --- /dev/null +++ b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_job_queue.c @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <assert.h> +#include <string.h> + +#include "vpx/vpx_integer.h" + +#include "vp9/decoder/vp9_job_queue.h" + +void vp9_jobq_init(JobQueueRowMt *jobq, uint8_t *buf, size_t buf_size) { +#if CONFIG_MULTITHREAD + pthread_mutex_init(&jobq->mutex, NULL); + pthread_cond_init(&jobq->cond, NULL); +#endif + jobq->buf_base = buf; + jobq->buf_wr = buf; + jobq->buf_rd = buf; + jobq->buf_end = buf + buf_size; + jobq->terminate = 0; +} + +void vp9_jobq_reset(JobQueueRowMt *jobq) { +#if CONFIG_MULTITHREAD + pthread_mutex_lock(&jobq->mutex); +#endif + jobq->buf_wr = jobq->buf_base; + jobq->buf_rd = jobq->buf_base; + jobq->terminate = 0; +#if CONFIG_MULTITHREAD + pthread_mutex_unlock(&jobq->mutex); +#endif +} + +void vp9_jobq_deinit(JobQueueRowMt *jobq) { + vp9_jobq_reset(jobq); +#if CONFIG_MULTITHREAD + pthread_mutex_destroy(&jobq->mutex); + pthread_cond_destroy(&jobq->cond); +#endif +} + +void vp9_jobq_terminate(JobQueueRowMt *jobq) { +#if CONFIG_MULTITHREAD + pthread_mutex_lock(&jobq->mutex); +#endif + jobq->terminate = 1; +#if CONFIG_MULTITHREAD + pthread_cond_broadcast(&jobq->cond); + pthread_mutex_unlock(&jobq->mutex); +#endif +} + +int vp9_jobq_queue(JobQueueRowMt *jobq, void *job, size_t job_size) { + int ret = 0; +#if CONFIG_MULTITHREAD + pthread_mutex_lock(&jobq->mutex); +#endif + if (jobq->buf_end >= jobq->buf_wr + job_size) { + memcpy(jobq->buf_wr, job, job_size); + jobq->buf_wr = jobq->buf_wr + job_size; +#if CONFIG_MULTITHREAD + pthread_cond_signal(&jobq->cond); +#endif + ret = 0; + } else { + /* Wrap around case is not supported */ + assert(0); + ret = 1; + } +#if CONFIG_MULTITHREAD + pthread_mutex_unlock(&jobq->mutex); +#endif + return ret; +} + +int vp9_jobq_dequeue(JobQueueRowMt *jobq, void *job, size_t job_size, + int blocking) { + int ret = 0; +#if CONFIG_MULTITHREAD + pthread_mutex_lock(&jobq->mutex); +#endif + if (jobq->buf_end >= jobq->buf_rd + job_size) { + while (1) { + if (jobq->buf_wr >= jobq->buf_rd + job_size) { + memcpy(job, jobq->buf_rd, job_size); + jobq->buf_rd = jobq->buf_rd + job_size; + ret = 0; + break; + } else { + /* If all the entries have been dequeued, then break and return */ + if (jobq->terminate == 1) { + ret = 1; + break; + } + if (blocking == 1) { +#if CONFIG_MULTITHREAD + pthread_cond_wait(&jobq->cond, &jobq->mutex); +#endif + } else { + /* If there is no job available, + * and this is non blocking call then return fail */ + ret = 1; + break; + } + } + } + } else { + /* Wrap around case is not supported */ + ret = 1; + } +#if CONFIG_MULTITHREAD + pthread_mutex_unlock(&jobq->mutex); +#endif + + return ret; +} diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_job_queue.h b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_job_queue.h new file mode 100644 index 00000000000..bc23bf9c2c1 --- /dev/null +++ b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_job_queue.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2018 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VPX_VP9_DECODER_VP9_JOB_QUEUE_H_ +#define VPX_VP9_DECODER_VP9_JOB_QUEUE_H_ + +#include "vpx_util/vpx_thread.h" + +typedef struct { + // Pointer to buffer base which contains the jobs + uint8_t *buf_base; + + // Pointer to current address where new job can be added + uint8_t *volatile buf_wr; + + // Pointer to current address from where next job can be obtained + uint8_t *volatile buf_rd; + + // Pointer to end of job buffer + uint8_t *buf_end; + + int terminate; + +#if CONFIG_MULTITHREAD + pthread_mutex_t mutex; + pthread_cond_t cond; +#endif +} JobQueueRowMt; + +void vp9_jobq_init(JobQueueRowMt *jobq, uint8_t *buf, size_t buf_size); +void vp9_jobq_reset(JobQueueRowMt *jobq); +void vp9_jobq_deinit(JobQueueRowMt *jobq); +void vp9_jobq_terminate(JobQueueRowMt *jobq); +int vp9_jobq_queue(JobQueueRowMt *jobq, void *job, size_t job_size); +int vp9_jobq_dequeue(JobQueueRowMt *jobq, void *job, size_t job_size, + int blocking); + +#endif // VPX_VP9_DECODER_VP9_JOB_QUEUE_H_ diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c index a2a742493bc..ef8cd46b485 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c @@ -479,7 +479,8 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) { double weight_segment_target = 0; double weight_segment = 0; int thresh_low_motion = (cm->width < 720) ? 55 : 20; - int qp_thresh = VPXMIN(20, rc->best_quality << 1); + int qp_thresh = VPXMIN((cpi->oxcf.content == VP9E_CONTENT_SCREEN) ? 35 : 20, + rc->best_quality << 1); cr->apply_cyclic_refresh = 1; if (frame_is_intra_only(cm) || cpi->svc.temporal_layer_id > 0 || is_lossless_requested(&cpi->oxcf) || diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h index a4a9f1c98bf..b6d7fdeae77 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h @@ -104,10 +104,6 @@ void vp9_cyclic_refresh_update_sb_postencode(struct VP9_COMP *const cpi, int mi_row, int mi_col, BLOCK_SIZE bsize); -// Update the segmentation map, and related quantities: cyclic refresh map, -// refresh sb_index, and target number of blocks to be refreshed. -void vp9_cyclic_refresh_update__map(struct VP9_COMP *const cpi); - // From the just encoded frame: update the actual number of blocks that were // applied the segment delta q, and the amount of low motion in the frame. // Also check conditions for forcing golden update, or preventing golden diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_variance.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_variance.c index 9cd8819c36f..1f9ce2354ce 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_variance.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_variance.c @@ -109,7 +109,7 @@ static void aq_variance(const uint8_t *a, int a_stride, const uint8_t *b, #if CONFIG_VP9_HIGHBITDEPTH static void aq_highbd_variance64(const uint8_t *a8, int a_stride, const uint8_t *b8, int b_stride, int w, int h, - uint64_t *sse, uint64_t *sum) { + uint64_t *sse, int64_t *sum) { int i, j; uint16_t *a = CONVERT_TO_SHORTPTR(a8); @@ -128,15 +128,6 @@ static void aq_highbd_variance64(const uint8_t *a8, int a_stride, } } -static void aq_highbd_8_variance(const uint8_t *a8, int a_stride, - const uint8_t *b8, int b_stride, int w, int h, - unsigned int *sse, int *sum) { - uint64_t sse_long = 0; - uint64_t sum_long = 0; - aq_highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); - *sse = (unsigned int)sse_long; - *sum = (int)sum_long; -} #endif // CONFIG_VP9_HIGHBITDEPTH static unsigned int block_variance(VP9_COMP *cpi, MACROBLOCK *x, @@ -154,11 +145,13 @@ static unsigned int block_variance(VP9_COMP *cpi, MACROBLOCK *x, int avg; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - aq_highbd_8_variance(x->plane[0].src.buf, x->plane[0].src.stride, + uint64_t sse64 = 0; + int64_t sum64 = 0; + aq_highbd_variance64(x->plane[0].src.buf, x->plane[0].src.stride, CONVERT_TO_BYTEPTR(vp9_highbd_64_zeros), 0, bw, bh, - &sse, &avg); - sse >>= 2 * (xd->bd - 8); - avg >>= (xd->bd - 8); + &sse64, &sum64); + sse = (unsigned int)(sse64 >> (2 * (xd->bd - 8))); + avg = (int)(sum64 >> (xd->bd - 8)); } else { aq_variance(x->plane[0].src.buf, x->plane[0].src.stride, vp9_64_zeros, 0, bw, bh, &sse, &avg); diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_block.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_block.h index 563fdbbdecd..11ec035e9dd 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_block.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_block.h @@ -208,9 +208,7 @@ struct macroblock { void (*highbd_inv_txfm_add)(const tran_low_t *input, uint16_t *dest, int stride, int eob, int bd); #endif -#if CONFIG_ML_VAR_PARTITION DECLARE_ALIGNED(16, uint8_t, est_pred[64 * 64]); -#endif // CONFIG_ML_VAR_PARTITION struct scale_factors *me_sf; }; diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_context_tree.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_context_tree.h index d2cdb101054..4e301cc17df 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_context_tree.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_context_tree.h @@ -91,6 +91,9 @@ typedef struct PC_TREE { struct PC_TREE *split[4]; PICK_MODE_CONTEXT *leaf_split[4]; }; + // Obtained from a simple motion search. Used by the ML based partition search + // speed feature. + MV mv; } PC_TREE; void vp9_setup_pc_tree(struct VP9Common *cm, struct ThreadData *td); diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_denoiser.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_denoiser.c index 2820b71b419..2885223b59e 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_denoiser.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_denoiser.c @@ -201,7 +201,7 @@ static VP9_DENOISER_DECISION perform_motion_compensation( int i; struct buf_2d saved_dst[MAX_MB_PLANE]; struct buf_2d saved_pre[MAX_MB_PLANE]; - RefBuffer *saved_block_refs[2]; + const RefBuffer *saved_block_refs[2]; MV_REFERENCE_FRAME saved_frame; frame = ctx->best_reference_frame; @@ -219,9 +219,7 @@ static VP9_DENOISER_DECISION perform_motion_compensation( // If the best reference frame uses inter-prediction and there is enough of a // difference in sum-squared-error, use it. - if (frame != INTRA_FRAME && frame != ALTREF_FRAME && - (frame != GOLDEN_FRAME || num_spatial_layers == 1 || - use_gf_temporal_ref) && + if (frame != INTRA_FRAME && frame != ALTREF_FRAME && frame != GOLDEN_FRAME && sse_diff > sse_diff_thresh(bs, increase_denoising, motion_magnitude)) { mi->ref_frame[0] = ctx->best_reference_frame; mi->mode = ctx->best_sse_inter_mode; @@ -233,6 +231,7 @@ static VP9_DENOISER_DECISION perform_motion_compensation( // Bias to last reference. if ((num_spatial_layers > 1 && !use_gf_temporal_ref) || frame == ALTREF_FRAME || + (frame == GOLDEN_FRAME && use_gf_temporal_ref) || (frame != LAST_FRAME && ((ctx->zeromv_lastref_sse<(5 * ctx->zeromv_sse)>> 2) || denoiser->denoising_level >= kDenHigh))) { @@ -689,8 +688,8 @@ int vp9_denoiser_alloc(VP9_COMMON *cm, struct SVC *svc, VP9_DENOISER *denoiser, make_grayscale(&denoiser->running_avg_y[i]); #endif denoiser->frame_buffer_initialized = 1; - denoiser->denoising_level = kDenLow; - denoiser->prev_denoising_level = kDenLow; + denoiser->denoising_level = kDenMedium; + denoiser->prev_denoising_level = kDenMedium; denoiser->reset = 0; denoiser->current_denoiser_frame = 0; return 0; diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c index 5adefac1a7f..5ed46f2f7de 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c @@ -235,6 +235,10 @@ static void set_segment_index(VP9_COMP *cpi, MACROBLOCK *const x, int mi_row, break; } + // Set segment index from ROI map if it's enabled. + if (cpi->roi.enabled) + mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); + vp9_init_plane_quantizers(cpi, x); } @@ -1261,9 +1265,10 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, int pixels_wide = 64, pixels_high = 64; int64_t thresholds[4] = { cpi->vbp_thresholds[0], cpi->vbp_thresholds[1], cpi->vbp_thresholds[2], cpi->vbp_thresholds[3] }; - int scene_change_detected = + int force_64_split = cpi->rc.high_source_sad || - (cpi->use_svc && cpi->svc.high_source_sad_superframe); + (cpi->use_svc && cpi->svc.high_source_sad_superframe) || + (cpi->oxcf.content == VP9E_CONTENT_SCREEN && !x->zero_temp_sad_source); // For the variance computation under SVC mode, we treat the frame as key if // the reference (base layer frame) is key frame (i.e., is_key_frame == 1). @@ -1326,7 +1331,7 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, } // If source_sad is low copy the partition without computing the y_sad. if (x->skip_low_source_sad && cpi->sf.copy_partition_flag && - !scene_change_detected && + !force_64_split && copy_partitioning(cpi, x, xd, mi_row, mi_col, segment_id, sb_offset)) { x->sb_use_mv_part = 1; if (cpi->sf.svc_use_lowres_part && @@ -1355,7 +1360,7 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks, // 5-20 for the 16x16 blocks. - force_split[0] = scene_change_detected; + force_split[0] = force_64_split; if (!is_key_frame) { // In the case of spatial/temporal scalable coding, the assumption here is @@ -1420,14 +1425,16 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, x->sb_mvcol_part = mi->mv[0].as_mv.col; x->sb_mvrow_part = mi->mv[0].as_mv.row; if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && - cpi->svc.spatial_layer_id == 0 && + cpi->svc.spatial_layer_id == cpi->svc.first_spatial_layer_to_encode && cpi->svc.high_num_blocks_with_motion && !x->zero_temp_sad_source && cm->width > 640 && cm->height > 480) { - // Disable split below 16x16 block size when scroll motion is detected. + // Disable split below 16x16 block size when scroll motion (horz or + // vert) is detected. // TODO(marpan/jianj): Improve this condition: issue is that search // range is hard-coded/limited in vp9_int_pro_motion_estimation() so // scroll motion may not be detected here. - if ((abs(x->sb_mvrow_part) >= 48 && abs(x->sb_mvcol_part) <= 8) || + if (((abs(x->sb_mvrow_part) >= 48 && abs(x->sb_mvcol_part) <= 8) || + (abs(x->sb_mvcol_part) >= 48 && abs(x->sb_mvrow_part) <= 8)) && y_sad < 100000) { compute_minmax_variance = 0; thresholds[2] = INT64_MAX; @@ -1894,15 +1901,12 @@ static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode, static void set_segment_rdmult(VP9_COMP *const cpi, MACROBLOCK *const x, int mi_row, int mi_col, BLOCK_SIZE bsize, AQ_MODE aq_mode) { - int segment_qindex; VP9_COMMON *const cm = &cpi->common; const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map; vp9_init_plane_quantizers(cpi, x); vpx_clear_system_state(); - segment_qindex = - vp9_get_qindex(&cm->seg, x->e_mbd.mi[0]->segment_id, cm->base_qindex); if (aq_mode == NO_AQ || aq_mode == PSNR_AQ) { if (cpi->sf.enable_tpl_model) x->rdmult = x->cb_rdmult; @@ -1917,7 +1921,7 @@ static void set_segment_rdmult(VP9_COMP *const cpi, MACROBLOCK *const x, return; } - x->rdmult = vp9_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q); + x->rdmult = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q); } static void rd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data, @@ -2440,17 +2444,15 @@ static void update_state_rt(VP9_COMP *cpi, ThreadData *td, *(xd->mi[0]) = ctx->mic; *(x->mbmi_ext) = ctx->mbmi_ext; - if (seg->enabled && cpi->oxcf.aq_mode != NO_AQ) { - // For in frame complexity AQ or variance AQ, copy segment_id from - // segmentation_map. - if (cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ) { + if (seg->enabled && (cpi->oxcf.aq_mode != NO_AQ || cpi->roi.enabled)) { + // Setting segmentation map for cyclic_refresh. + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { + vp9_cyclic_refresh_update_segment(cpi, mi, mi_row, mi_col, bsize, + ctx->rate, ctx->dist, x->skip, p); + } else { const uint8_t *const map = seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); - } else { - // Setting segmentation map for cyclic_refresh. - vp9_cyclic_refresh_update_segment(cpi, mi, mi_row, mi_col, bsize, - ctx->rate, ctx->dist, x->skip, p); } vp9_init_plane_quantizers(cpi, x); } @@ -3288,23 +3290,24 @@ static int ml_predict_breakout(VP9_COMP *const cpi, BLOCK_SIZE bsize, linear_score += linear_weights[i] * features[i]; } - return linear_score >= cpi->sf.ml_partition_search_breakout_thresh[q_ctx]; + return linear_score >= cpi->sf.rd_ml_partition.search_breakout_thresh[q_ctx]; } #undef FEATURES -#define FEATURES 17 +#define FEATURES 8 #define LABELS 4 static void ml_prune_rect_partition(VP9_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize, const PC_TREE *const pc_tree, int *allow_horz, int *allow_vert, - int64_t ref_rd, int mi_row, int mi_col) { + int64_t ref_rd) { const NN_CONFIG *nn_config = NULL; float score[LABELS] = { 0.0f, }; int thresh = -1; int i; + (void)x; if (ref_rd <= 0 || ref_rd > 1000000000) return; @@ -3312,15 +3315,15 @@ static void ml_prune_rect_partition(VP9_COMP *const cpi, MACROBLOCK *const x, case BLOCK_8X8: break; case BLOCK_16X16: nn_config = &vp9_rect_part_nnconfig_16; - thresh = cpi->sf.ml_prune_rect_partition_threhold[1]; + thresh = cpi->sf.rd_ml_partition.prune_rect_thresh[1]; break; case BLOCK_32X32: nn_config = &vp9_rect_part_nnconfig_32; - thresh = cpi->sf.ml_prune_rect_partition_threhold[2]; + thresh = cpi->sf.rd_ml_partition.prune_rect_thresh[2]; break; case BLOCK_64X64: nn_config = &vp9_rect_part_nnconfig_64; - thresh = cpi->sf.ml_prune_rect_partition_threhold[3]; + thresh = cpi->sf.rd_ml_partition.prune_rect_thresh[3]; break; default: assert(0 && "Unexpected block size."); return; } @@ -3328,7 +3331,6 @@ static void ml_prune_rect_partition(VP9_COMP *const cpi, MACROBLOCK *const x, // Feature extraction and model score calculation. { - const int64_t none_rdcost = pc_tree->none.rdcost; const VP9_COMMON *const cm = &cpi->common; #if CONFIG_VP9_HIGHBITDEPTH const int dc_q = @@ -3336,83 +3338,32 @@ static void ml_prune_rect_partition(VP9_COMP *const cpi, MACROBLOCK *const x, #else const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth); #endif // CONFIG_VP9_HIGHBITDEPTH + const int bs = 4 * num_4x4_blocks_wide_lookup[bsize]; int feature_index = 0; - unsigned int block_var = 0; - unsigned int sub_block_var[4] = { 0 }; float features[FEATURES]; + features[feature_index++] = logf((float)dc_q + 1.0f); features[feature_index++] = (float)(pc_tree->partitioning == PARTITION_NONE); - features[feature_index++] = logf((float)(dc_q * dc_q) / 256.0f + 1.0f); + features[feature_index++] = logf((float)ref_rd / bs / bs + 1.0f); - // Calculate source pixel variance. { - struct buf_2d buf; - const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT); - const int bs = 4 * num_4x4_blocks_wide_lookup[bsize]; - const MACROBLOCKD *const xd = &x->e_mbd; - vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); - - (void)xd; -#if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - block_var = vp9_high_get_sby_perpixel_variance(cpi, &x->plane[0].src, - bsize, xd->bd); - } else { - block_var = vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); - } -#else - block_var = vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); -#endif // CONFIG_VP9_HIGHBITDEPTH + const float norm_factor = 1.0f / ((float)ref_rd + 1.0f); + const int64_t none_rdcost = pc_tree->none.rdcost; + float rd_ratio = 2.0f; + if (none_rdcost > 0 && none_rdcost < 1000000000) + rd_ratio = (float)none_rdcost * norm_factor; + features[feature_index++] = VPXMIN(rd_ratio, 2.0f); - buf.stride = x->plane[0].src.stride; for (i = 0; i < 4; ++i) { - const int x_idx = (i & 1) * bs / 2; - const int y_idx = (i >> 1) * bs / 2; - buf.buf = x->plane[0].src.buf + x_idx + y_idx * buf.stride; -#if CONFIG_VP9_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - sub_block_var[i] = - vp9_high_get_sby_perpixel_variance(cpi, &buf, subsize, xd->bd); - } else { - sub_block_var[i] = vp9_get_sby_perpixel_variance(cpi, &buf, subsize); - } -#else - sub_block_var[i] = vp9_get_sby_perpixel_variance(cpi, &buf, subsize); -#endif // CONFIG_VP9_HIGHBITDEPTH + const int64_t this_rd = pc_tree->split[i]->none.rdcost; + const int rd_valid = this_rd > 0 && this_rd < 1000000000; + // Ratio between sub-block RD and whole block RD. + features[feature_index++] = + rd_valid ? (float)this_rd * norm_factor : 1.0f; } } - features[feature_index++] = logf((float)block_var + 1.0f); - features[feature_index++] = logf((float)ref_rd + 1.0f); - features[feature_index++] = (none_rdcost > 0 && none_rdcost < 1000000000) - ? (float)pc_tree->none.skippable - : 0.0f; - - for (i = 0; i < 4; ++i) { - const int64_t this_rd = pc_tree->split[i]->none.rdcost; - const int rd_valid = this_rd > 0 && this_rd < 1000000000; - // Ratio between sub-block RD and whole block RD. - features[feature_index++] = - rd_valid ? ((float)this_rd / (float)ref_rd) : 1.0f; - // Sub-block skippable. - features[feature_index++] = - rd_valid ? ((float)pc_tree->split[i]->none.skippable) : 0.0f; - } - - { - const float denom = (float)(block_var + 1); - const float low_b = 0.1f; - const float high_b = 10.0f; - for (i = 0; i < 4; ++i) { - // Ratio between the quarter sub-block variance and the - // whole-block variance. - float var_ratio = (float)(sub_block_var[i] + 1) / denom; - if (var_ratio < low_b) var_ratio = low_b; - if (var_ratio > high_b) var_ratio = high_b; - features[feature_index++] = var_ratio; - } - } assert(feature_index == FEATURES); nn_predict(features, nn_config, score); } @@ -3440,18 +3391,59 @@ static void ml_prune_rect_partition(VP9_COMP *const cpi, MACROBLOCK *const x, #undef FEATURES #undef LABELS +// Perform fast and coarse motion search for the given block. This is a +// pre-processing step for the ML based partition search speedup. +static void simple_motion_search(const VP9_COMP *const cpi, MACROBLOCK *const x, + BLOCK_SIZE bsize, int mi_row, int mi_col, + MV ref_mv, MV_REFERENCE_FRAME ref, + uint8_t *const pred_buf) { + const VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &x->e_mbd; + MODE_INFO *const mi = xd->mi[0]; + const YV12_BUFFER_CONFIG *const yv12 = get_ref_frame_buffer(cpi, ref); + const int step_param = 1; + const MvLimits tmp_mv_limits = x->mv_limits; + const SEARCH_METHODS search_method = NSTEP; + const int sadpb = x->sadperbit16; + MV ref_mv_full = { ref_mv.row >> 3, ref_mv.col >> 3 }; + MV best_mv = { 0, 0 }; + int cost_list[5]; + + assert(yv12 != NULL); + if (!yv12) return; + vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, + &cm->frame_refs[ref - 1].sf); + mi->ref_frame[0] = ref; + mi->ref_frame[1] = NONE; + mi->sb_type = bsize; + vp9_set_mv_search_range(&x->mv_limits, &ref_mv); + vp9_full_pixel_search(cpi, x, bsize, &ref_mv_full, step_param, search_method, + sadpb, cond_cost_list(cpi, cost_list), &ref_mv, + &best_mv, 0, 0); + best_mv.row *= 8; + best_mv.col *= 8; + x->mv_limits = tmp_mv_limits; + mi->mv[0].as_mv = best_mv; + + set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); + xd->plane[0].dst.buf = pred_buf; + xd->plane[0].dst.stride = 64; + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); +} + // Use a neural net model to prune partition-none and partition-split search. -// The model uses prediction residue variance and quantization step size as -// input features. -#define FEATURES 6 -static void ml_predict_var_rd_paritioning(VP9_COMP *cpi, MACROBLOCK *x, +// Features used: QP; spatial block size contexts; variance of prediction +// residue after simple_motion_search. +#define FEATURES 12 +static void ml_predict_var_rd_paritioning(const VP9_COMP *const cpi, + MACROBLOCK *const x, + PC_TREE *const pc_tree, BLOCK_SIZE bsize, int mi_row, int mi_col, int *none, int *split) { - VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; - MODE_INFO *mi = xd->mi[0]; + const VP9_COMMON *const cm = &cpi->common; const NN_CONFIG *nn_config = NULL; #if CONFIG_VP9_HIGHBITDEPTH + MACROBLOCKD *xd = &x->e_mbd; DECLARE_ALIGNED(16, uint8_t, pred_buffer[64 * 64 * 2]); uint8_t *const pred_buf = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? (CONVERT_TO_BYTEPTR(pred_buffer)) @@ -3461,69 +3453,48 @@ static void ml_predict_var_rd_paritioning(VP9_COMP *cpi, MACROBLOCK *x, uint8_t *const pred_buf = pred_buffer; #endif // CONFIG_VP9_HIGHBITDEPTH const int speed = cpi->oxcf.speed; - int i; float thresh = 0.0f; switch (bsize) { case BLOCK_64X64: - nn_config = &vp9_var_rd_part_nnconfig_64; - thresh = speed > 0 ? 3.5f : 3.0f; + nn_config = &vp9_part_split_nnconfig_64; + thresh = speed > 0 ? 2.8f : 3.0f; break; case BLOCK_32X32: - nn_config = &vp9_var_rd_part_nnconfig_32; + nn_config = &vp9_part_split_nnconfig_32; thresh = speed > 0 ? 3.5f : 3.0f; break; case BLOCK_16X16: - nn_config = &vp9_var_rd_part_nnconfig_16; - thresh = speed > 0 ? 3.5f : 4.0f; + nn_config = &vp9_part_split_nnconfig_16; + thresh = speed > 0 ? 3.8f : 4.0f; break; case BLOCK_8X8: - nn_config = &vp9_var_rd_part_nnconfig_8; + nn_config = &vp9_part_split_nnconfig_8; if (cm->width >= 720 && cm->height >= 720) thresh = speed > 0 ? 2.5f : 2.0f; else - thresh = speed > 0 ? 3.5f : 2.0f; + thresh = speed > 0 ? 3.8f : 2.0f; break; default: assert(0 && "Unexpected block size."); return; } if (!nn_config) return; - mi->ref_frame[1] = NONE; - mi->sb_type = bsize; // Do a simple single motion search to find a prediction for current block. // The variance of the residue will be used as input features. { + MV ref_mv; const MV_REFERENCE_FRAME ref = cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME : LAST_FRAME; - YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref); - MV ref_mv = { 0, 0 }; - MV ref_mv_full = { 0, 0 }; - const int step_param = 1; - const MvLimits tmp_mv_limits = x->mv_limits; - const SEARCH_METHODS search_method = NSTEP; - const int sadpb = x->sadperbit16; - MV best_mv = { 0, 0 }; - int cost_list[5]; - - assert(yv12 != NULL); - if (!yv12) return; - vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, - &cm->frame_refs[ref - 1].sf); - mi->ref_frame[0] = ref; - vp9_set_mv_search_range(&x->mv_limits, &ref_mv); - vp9_full_pixel_search(cpi, x, bsize, &ref_mv_full, step_param, - search_method, sadpb, cond_cost_list(cpi, cost_list), - &ref_mv, &best_mv, 0, 0); - best_mv.row *= 8; - best_mv.col *= 8; - x->mv_limits = tmp_mv_limits; - mi->mv[0].as_mv = best_mv; - - set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); - xd->plane[0].dst.buf = pred_buf; - xd->plane[0].dst.stride = 64; - vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); + // If bsize is 64x64, use zero MV as reference; otherwise, use MV result + // of previous(larger) block as reference. + if (bsize == BLOCK_64X64) + ref_mv.row = ref_mv.col = 0; + else + ref_mv = pc_tree->mv; + vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); + simple_motion_search(cpi, x, bsize, mi_row, mi_col, ref_mv, ref, pred_buf); + pc_tree->mv = x->e_mbd.mi[0]->mv[0].as_mv; } vpx_clear_system_state(); @@ -3540,8 +3511,8 @@ static void ml_predict_var_rd_paritioning(VP9_COMP *cpi, MACROBLOCK *x, float score; // Generate model input features. - features[feature_idx++] = logf((float)(dc_q * dc_q) / 256.0f + 1.0f); - vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); + features[feature_idx++] = logf((float)dc_q + 1.0f); + // Get the variance of the residue as input features. { const int bs = 4 * num_4x4_blocks_wide_lookup[bsize]; @@ -3555,7 +3526,19 @@ static void ml_predict_var_rd_paritioning(VP9_COMP *cpi, MACROBLOCK *x, const unsigned int var = cpi->fn_ptr[bsize].vf(src, src_stride, pred, pred_stride, &sse); const float factor = (var == 0) ? 1.0f : (1.0f / (float)var); + const MACROBLOCKD *const xd = &x->e_mbd; + const int has_above = !!xd->above_mi; + const int has_left = !!xd->left_mi; + const BLOCK_SIZE above_bsize = has_above ? xd->above_mi->sb_type : bsize; + const BLOCK_SIZE left_bsize = has_left ? xd->left_mi->sb_type : bsize; + int i; + features[feature_idx++] = (float)has_above; + features[feature_idx++] = (float)b_width_log2_lookup[above_bsize]; + features[feature_idx++] = (float)b_height_log2_lookup[above_bsize]; + features[feature_idx++] = (float)has_left; + features[feature_idx++] = (float)b_width_log2_lookup[left_bsize]; + features[feature_idx++] = (float)b_height_log2_lookup[left_bsize]; features[feature_idx++] = logf((float)var + 1.0f); for (i = 0; i < 4; ++i) { const int x_idx = (i & 1) * bs / 2; @@ -3584,7 +3567,6 @@ static void ml_predict_var_rd_paritioning(VP9_COMP *cpi, MACROBLOCK *x, } } #undef FEATURES -#undef LABELS static int get_rdmult_delta(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col, int orig_rdmult) { @@ -3818,14 +3800,19 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, pc_tree->partitioning = PARTITION_NONE; - if (cpi->sf.ml_var_partition_pruning) { - const int do_ml_var_partition_pruning = - !frame_is_intra_only(cm) && partition_none_allowed && do_split && + if (cpi->sf.rd_ml_partition.var_pruning && !frame_is_intra_only(cm)) { + const int do_rd_ml_partition_var_pruning = + partition_none_allowed && do_split && mi_row + num_8x8_blocks_high_lookup[bsize] <= cm->mi_rows && mi_col + num_8x8_blocks_wide_lookup[bsize] <= cm->mi_cols; - if (do_ml_var_partition_pruning) { - ml_predict_var_rd_paritioning(cpi, x, bsize, mi_row, mi_col, + if (do_rd_ml_partition_var_pruning) { + ml_predict_var_rd_paritioning(cpi, x, pc_tree, bsize, mi_row, mi_col, &partition_none_allowed, &do_split); + } else { + vp9_zero(pc_tree->mv); + } + if (bsize > BLOCK_8X8) { // Store MV result as reference for subblocks. + for (i = 0; i < 4; ++i) pc_tree->split[i]->mv = pc_tree->mv; } } @@ -3855,7 +3842,7 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, best_rdc = this_rdc; if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; - if (cpi->sf.ml_partition_search_early_termination) { + if (cpi->sf.rd_ml_partition.search_early_termination) { // Currently, the machine-learning based partition search early // termination is only used while bsize is 16x16, 32x32 or 64x64, // VPXMIN(cm->width, cm->height) >= 480, and speed = 0. @@ -3871,15 +3858,14 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, if ((do_split || do_rect) && !x->e_mbd.lossless && ctx->skippable) { const int use_ml_based_breakout = - cpi->sf.use_ml_partition_search_breakout && - cm->base_qindex >= 100; + cpi->sf.rd_ml_partition.search_breakout && cm->base_qindex >= 100; if (use_ml_based_breakout) { if (ml_predict_breakout(cpi, bsize, x, &this_rdc)) { do_split = 0; do_rect = 0; } } else { - if (!cpi->sf.ml_partition_search_early_termination) { + if (!cpi->sf.rd_ml_partition.search_early_termination) { if ((best_rdc.dist < (dist_breakout_thr >> 2)) || (best_rdc.dist < dist_breakout_thr && best_rdc.rate < rate_breakout_thr)) { @@ -4035,7 +4021,7 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, pc_tree->partitioning = PARTITION_SPLIT; // Rate and distortion based partition search termination clause. - if (!cpi->sf.ml_partition_search_early_termination && + if (!cpi->sf.rd_ml_partition.search_early_termination && !x->e_mbd.lossless && ((best_rdc.dist < (dist_breakout_thr >> 2)) || (best_rdc.dist < dist_breakout_thr && @@ -4076,8 +4062,7 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, (partition_horz_allowed || partition_vert_allowed) && bsize > BLOCK_8X8; if (do_ml_rect_partition_pruning) { ml_prune_rect_partition(cpi, x, bsize, pc_tree, &partition_horz_allowed, - &partition_vert_allowed, best_rdc.rdcost, mi_row, - mi_col); + &partition_vert_allowed, best_rdc.rdcost); } } @@ -4548,7 +4533,6 @@ static void pred_pixel_ready_reset(PC_TREE *pc_tree, BLOCK_SIZE bsize) { } } -#if CONFIG_ML_VAR_PARTITION #define FEATURES 6 #define LABELS 2 static int ml_predict_var_paritioning(VP9_COMP *cpi, MACROBLOCK *x, @@ -4618,7 +4602,6 @@ static int ml_predict_var_paritioning(VP9_COMP *cpi, MACROBLOCK *x, } #undef FEATURES #undef LABELS -#endif // CONFIG_ML_VAR_PARTITION static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td, TileDataEnc *tile_data, TOKENEXTRA **tp, @@ -4649,10 +4632,8 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td, !force_vert_split && yss <= xss && bsize >= BLOCK_8X8; int partition_vert_allowed = !force_horz_split && xss <= yss && bsize >= BLOCK_8X8; -#if CONFIG_ML_VAR_PARTITION const int use_ml_based_partitioning = sf->partition_search_type == ML_BASED_PARTITION; -#endif // CONFIG_ML_VAR_PARTITION (void)*tp_orig; @@ -4684,7 +4665,6 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td, partition_vert_allowed &= force_vert_split; } -#if CONFIG_ML_VAR_PARTITION if (use_ml_based_partitioning) { if (partition_none_allowed || do_split) do_rect = 0; if (partition_none_allowed && do_split) { @@ -4694,7 +4674,6 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td, if (ml_predicted_partition == PARTITION_SPLIT) partition_none_allowed = 0; } } -#endif // CONFIG_ML_VAR_PARTITION if (!partition_none_allowed && !do_split) do_rect = 1; @@ -4719,10 +4698,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td, best_rdc = this_rdc; if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; -#if CONFIG_ML_VAR_PARTITION - if (!use_ml_based_partitioning) -#endif // CONFIG_ML_VAR_PARTITION - { + if (!use_ml_based_partitioning) { int64_t dist_breakout_thr = sf->partition_search_breakout_thr.dist; int64_t rate_breakout_thr = sf->partition_search_breakout_thr.rate; dist_breakout_thr >>= @@ -5131,31 +5107,26 @@ static void nonrd_use_partition(VP9_COMP *cpi, ThreadData *td, update_partition_context(xd, mi_row, mi_col, subsize, bsize); } -#if CONFIG_ML_VAR_PARTITION // Get a prediction(stored in x->est_pred) for the whole 64x64 superblock. static void get_estimated_pred(VP9_COMP *cpi, const TileInfo *const tile, MACROBLOCK *x, int mi_row, int mi_col) { VP9_COMMON *const cm = &cpi->common; const int is_key_frame = frame_is_intra_only(cm); + MACROBLOCKD *xd = &x->e_mbd; set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64); if (!is_key_frame) { - MACROBLOCKD *xd = &x->e_mbd; MODE_INFO *mi = xd->mi[0]; YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); const YV12_BUFFER_CONFIG *yv12_g = NULL; const BLOCK_SIZE bsize = BLOCK_32X32 + (mi_col + 4 < cm->mi_cols) * 2 + (mi_row + 4 < cm->mi_rows); - int pixels_wide = 64, pixels_high = 64; unsigned int y_sad_g, y_sad_thr; unsigned int y_sad = UINT_MAX; assert(yv12 != NULL); - if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3); - if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3); - if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id) || cpi->svc.use_gf_temporal_ref_current_layer) { // For now, GOLDEN will not be used for non-zero spatial layers, since @@ -5209,7 +5180,6 @@ static void get_estimated_pred(VP9_COMP *cpi, const TileInfo *const tile, &cm->frame_refs[GOLDEN_FRAME - 1].sf); mi->ref_frame[0] = GOLDEN_FRAME; mi->mv[0].as_int = 0; - y_sad = y_sad_g; } else { x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv; } @@ -5234,7 +5204,6 @@ static void get_estimated_pred(VP9_COMP *cpi, const TileInfo *const tile, #endif // CONFIG_VP9_HIGHBITDEPTH } } -#endif // CONFIG_ML_VAR_PARTITION static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td, TileDataEnc *tile_data, int mi_row, @@ -5327,7 +5296,6 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td, nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64, 1, &dummy_rdc, td->pc_root); break; -#if CONFIG_ML_VAR_PARTITION case ML_BASED_PARTITION: get_estimated_pred(cpi, tile_info, x, mi_row, mi_col); x->max_partition_size = BLOCK_64X64; @@ -5337,7 +5305,6 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td, BLOCK_64X64, &dummy_rdc, 1, INT64_MAX, td->pc_root); break; -#endif // CONFIG_ML_VAR_PARTITION case SOURCE_VAR_BASED_PARTITION: set_source_var_based_partition(cpi, tile_info, x, mi, mi_row, mi_col); nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c index bf35b35708f..ec52d745a47 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c @@ -29,6 +29,9 @@ #include "vp9/common/vp9_alloccommon.h" #include "vp9/common/vp9_filter.h" #include "vp9/common/vp9_idct.h" +#if CONFIG_NON_GREEDY_MV +#include "vp9/common/vp9_mvref_common.h" +#endif #if CONFIG_VP9_POSTPROC #include "vp9/common/vp9_postproc.h" #endif @@ -2366,6 +2369,7 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf, #if CONFIG_NON_GREEDY_MV cpi->feature_score_loc_alloc = 0; + cpi->tpl_ready = 0; #endif // CONFIG_NON_GREEDY_MV for (i = 0; i < MAX_ARF_GOP_SIZE; ++i) cpi->tpl_stats[i].tpl_stats_ptr = NULL; @@ -2570,8 +2574,20 @@ void vp9_remove_compressor(VP9_COMP *cpi) { vpx_free(cpi->feature_score_loc_arr); vpx_free(cpi->feature_score_loc_sort); vpx_free(cpi->feature_score_loc_heap); + vpx_free(cpi->select_mv_arr); #endif for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) { +#if CONFIG_NON_GREEDY_MV + int rf_idx; + for (rf_idx = 0; rf_idx < 3; ++rf_idx) { + int sqr_bsize; + for (sqr_bsize = 0; sqr_bsize < SQUARE_BLOCK_SIZES; ++sqr_bsize) { + vpx_free(cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize]); + } + vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]); + vpx_free(cpi->tpl_stats[frame].rd_diff_arr[rf_idx]); + } +#endif vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr); cpi->tpl_stats[frame].is_valid = 0; } @@ -5829,31 +5845,6 @@ static void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff, } } -#if CONFIG_NON_GREEDY_MV -double get_feature_score(uint8_t *buf, ptrdiff_t stride, int rows, int cols) { - double IxIx = 0; - double IxIy = 0; - double IyIy = 0; - double score; - int r, c; - vpx_clear_system_state(); - for (r = 0; r + 1 < rows; ++r) { - for (c = 0; c + 1 < cols; ++c) { - int diff_x = buf[r * stride + c] - buf[r * stride + c + 1]; - int diff_y = buf[r * stride + c] - buf[(r + 1) * stride + c]; - IxIx += diff_x * diff_x; - IxIy += diff_x * diff_y; - IyIy += diff_y * diff_y; - } - } - IxIx /= (rows - 1) * (cols - 1); - IxIy /= (rows - 1) * (cols - 1); - IyIy /= (rows - 1) * (cols - 1); - score = (IxIx * IyIy - IxIy * IxIy + 0.0001) / (IxIx + IyIy + 0.0001); - return score; -} -#endif - static void set_mv_limits(const VP9_COMMON *cm, MACROBLOCK *x, int mi_row, int mi_col) { x->mv_limits.row_min = -((mi_row * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND)); @@ -6026,6 +6017,377 @@ static void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, } #if CONFIG_NON_GREEDY_MV +static int get_block_src_pred_buf(MACROBLOCKD *xd, GF_PICTURE *gf_picture, + int frame_idx, int rf_idx, int mi_row, + int mi_col, struct buf_2d *src, + struct buf_2d *pre) { + const int mb_y_offset = + mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE; + YV12_BUFFER_CONFIG *ref_frame = NULL; + int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx]; + if (ref_frame_idx != -1) { + ref_frame = gf_picture[ref_frame_idx].frame; + src->buf = xd->cur_buf->y_buffer + mb_y_offset; + src->stride = xd->cur_buf->y_stride; + pre->buf = ref_frame->y_buffer + mb_y_offset; + pre->stride = ref_frame->y_stride; + assert(src->stride == pre->stride); + return 1; + } else { + printf("invalid ref_frame_idx"); + assert(ref_frame_idx != -1); + return 0; + } +} + +#define kMvPreCheckLines 5 +#define kMvPreCheckSize 15 + +#define MV_REF_POS_NUM 3 +POSITION mv_ref_pos[MV_REF_POS_NUM] = { + { -1, 0 }, + { 0, -1 }, + { -1, -1 }, +}; + +static int_mv *get_select_mv(VP9_COMP *cpi, TplDepFrame *tpl_frame, int mi_row, + int mi_col) { + return &cpi->select_mv_arr[mi_row * tpl_frame->stride + mi_col]; +} + +static int_mv find_ref_mv(int mv_mode, VP9_COMP *cpi, TplDepFrame *tpl_frame, + BLOCK_SIZE bsize, int mi_row, int mi_col) { + int i; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + int_mv nearest_mv, near_mv, invalid_mv; + nearest_mv.as_int = INVALID_MV; + near_mv.as_int = INVALID_MV; + invalid_mv.as_int = INVALID_MV; + for (i = 0; i < MV_REF_POS_NUM; ++i) { + int nb_row = mi_row + mv_ref_pos[i].row * mi_height; + int nb_col = mi_col + mv_ref_pos[i].col * mi_width; + assert(mv_ref_pos[i].row <= 0); + assert(mv_ref_pos[i].col <= 0); + if (nb_row >= 0 && nb_col >= 0) { + if (nearest_mv.as_int == INVALID_MV) { + nearest_mv = *get_select_mv(cpi, tpl_frame, nb_row, nb_col); + } else { + int_mv mv = *get_select_mv(cpi, tpl_frame, nb_row, nb_col); + if (mv.as_int == nearest_mv.as_int) { + continue; + } else { + near_mv = mv; + break; + } + } + } + } + if (nearest_mv.as_int == INVALID_MV) { + nearest_mv.as_mv.row = 0; + nearest_mv.as_mv.col = 0; + } + if (near_mv.as_int == INVALID_MV) { + near_mv.as_mv.row = 0; + near_mv.as_mv.col = 0; + } + if (mv_mode == NEAREST_MV_MODE) { + return nearest_mv; + } + if (mv_mode == NEAR_MV_MODE) { + return near_mv; + } + assert(0); + return invalid_mv; +} + +static int_mv get_mv_from_mv_mode(int mv_mode, VP9_COMP *cpi, + TplDepFrame *tpl_frame, int rf_idx, + BLOCK_SIZE bsize, int mi_row, int mi_col) { + int_mv mv; + switch (mv_mode) { + case ZERO_MV_MODE: + mv.as_mv.row = 0; + mv.as_mv.col = 0; + break; + case NEW_MV_MODE: + mv = *get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row, mi_col); + break; + case NEAREST_MV_MODE: + mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col); + break; + case NEAR_MV_MODE: + mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col); + break; + default: + mv.as_int = INVALID_MV; + assert(0); + break; + } + return mv; +} + +static double get_mv_dist(int mv_mode, VP9_COMP *cpi, MACROBLOCKD *xd, + GF_PICTURE *gf_picture, int frame_idx, + TplDepFrame *tpl_frame, int rf_idx, BLOCK_SIZE bsize, + int mi_row, int mi_col, int_mv *mv) { + uint32_t sse; + struct buf_2d src; + struct buf_2d pre; + MV full_mv; + *mv = get_mv_from_mv_mode(mv_mode, cpi, tpl_frame, rf_idx, bsize, mi_row, + mi_col); + full_mv = get_full_mv(&mv->as_mv); + if (get_block_src_pred_buf(xd, gf_picture, frame_idx, rf_idx, mi_row, mi_col, + &src, &pre)) { + // TODO(angiebird): Consider subpixel when computing the sse. + cpi->fn_ptr[bsize].vf(src.buf, src.stride, get_buf_from_mv(&pre, &full_mv), + pre.stride, &sse); + return (double)(sse << VP9_DIST_SCALE_LOG2); + } else { + assert(0); + return 0; + } +} + +static int get_mv_mode_cost(int mv_mode) { + // TODO(angiebird): The probabilities are roughly inferred from + // default_inter_mode_probs. Check if there is a better way to set the + // probabilities. + const int zero_mv_prob = 9; + const int new_mv_prob = 77; + const int ref_mv_prob = 170; + assert(zero_mv_prob + new_mv_prob + ref_mv_prob == 256); + switch (mv_mode) { + case ZERO_MV_MODE: return vp9_prob_cost[zero_mv_prob]; break; + case NEW_MV_MODE: return vp9_prob_cost[new_mv_prob]; break; + case NEAREST_MV_MODE: return vp9_prob_cost[ref_mv_prob]; break; + case NEAR_MV_MODE: return vp9_prob_cost[ref_mv_prob]; break; + default: assert(0); return -1; + } +} + +static INLINE double get_mv_diff_cost(MV *new_mv, MV *ref_mv) { + double mv_diff_cost = log2(1 + abs(new_mv->row - ref_mv->row)) + + log2(1 + abs(new_mv->col - ref_mv->col)); + mv_diff_cost *= (1 << VP9_PROB_COST_SHIFT); + return mv_diff_cost; +} +static double get_mv_cost(int mv_mode, VP9_COMP *cpi, TplDepFrame *tpl_frame, + int rf_idx, BLOCK_SIZE bsize, int mi_row, + int mi_col) { + double mv_cost = get_mv_mode_cost(mv_mode); + if (mv_mode == NEW_MV_MODE) { + MV new_mv = get_mv_from_mv_mode(mv_mode, cpi, tpl_frame, rf_idx, bsize, + mi_row, mi_col) + .as_mv; + MV nearest_mv = get_mv_from_mv_mode(NEAREST_MV_MODE, cpi, tpl_frame, rf_idx, + bsize, mi_row, mi_col) + .as_mv; + MV near_mv = get_mv_from_mv_mode(NEAR_MV_MODE, cpi, tpl_frame, rf_idx, + bsize, mi_row, mi_col) + .as_mv; + double nearest_cost = get_mv_diff_cost(&new_mv, &nearest_mv); + double near_cost = get_mv_diff_cost(&new_mv, &near_mv); + mv_cost += nearest_cost < near_cost ? nearest_cost : near_cost; + } + return mv_cost; +} + +static double rd_cost(int rdmult, int rddiv, double rate, double dist) { + return (rate * rdmult) / (1 << 9) + dist * (1 << rddiv); +} + +static double eval_mv_mode(int mv_mode, VP9_COMP *cpi, MACROBLOCK *x, + GF_PICTURE *gf_picture, int frame_idx, + TplDepFrame *tpl_frame, int rf_idx, BLOCK_SIZE bsize, + int mi_row, int mi_col, int_mv *mv) { + MACROBLOCKD *xd = &x->e_mbd; + double mv_dist = get_mv_dist(mv_mode, cpi, xd, gf_picture, frame_idx, + tpl_frame, rf_idx, bsize, mi_row, mi_col, mv); + double mv_cost = + get_mv_cost(mv_mode, cpi, tpl_frame, rf_idx, bsize, mi_row, mi_col); + + return rd_cost(x->rdmult, x->rddiv, mv_cost, mv_dist); +} + +static int find_best_ref_mv_mode(VP9_COMP *cpi, MACROBLOCK *x, + GF_PICTURE *gf_picture, int frame_idx, + TplDepFrame *tpl_frame, int rf_idx, + BLOCK_SIZE bsize, int mi_row, int mi_col, + double *rd, int_mv *mv) { + int best_mv_mode = ZERO_MV_MODE; + int update = 0; + int mv_mode; + *rd = 0; + for (mv_mode = 0; mv_mode < MAX_MV_MODE; ++mv_mode) { + double this_rd; + int_mv this_mv; + if (mv_mode == NEW_MV_MODE) { + continue; + } + this_rd = eval_mv_mode(mv_mode, cpi, x, gf_picture, frame_idx, tpl_frame, + rf_idx, bsize, mi_row, mi_col, &this_mv); + if (update == 0) { + *rd = this_rd; + *mv = this_mv; + best_mv_mode = mv_mode; + update = 1; + } else { + if (this_rd < *rd) { + *rd = this_rd; + *mv = this_mv; + best_mv_mode = mv_mode; + } + } + } + return best_mv_mode; +} + +static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x, + GF_PICTURE *gf_picture, int frame_idx, + TplDepFrame *tpl_frame, int rf_idx, + BLOCK_SIZE bsize, int mi_row, int mi_col) { + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + int tmp_mv_mode_arr[kMvPreCheckSize]; + int *mv_mode_arr = tpl_frame->mv_mode_arr[rf_idx]; + double *rd_diff_arr = tpl_frame->rd_diff_arr[rf_idx]; + int_mv *select_mv_arr = cpi->select_mv_arr; + int_mv tmp_select_mv_arr[kMvPreCheckSize]; + int stride = tpl_frame->stride; + double new_mv_rd = 0; + double no_new_mv_rd = 0; + double this_new_mv_rd = 0; + double this_no_new_mv_rd = 0; + int idx; + int tmp_idx; + assert(kMvPreCheckSize == (kMvPreCheckLines * (kMvPreCheckLines + 1)) >> 1); + + // no new mv + // diagnal scan order + tmp_idx = 0; + for (idx = 0; idx < kMvPreCheckLines; ++idx) { + int r; + for (r = 0; r <= idx; ++r) { + int c = idx - r; + int nb_row = mi_row + r * mi_height; + int nb_col = mi_col + c * mi_width; + if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) { + double this_rd; + int_mv *mv = &select_mv_arr[nb_row * stride + nb_col]; + mv_mode_arr[nb_row * stride + nb_col] = + find_best_ref_mv_mode(cpi, x, gf_picture, frame_idx, tpl_frame, + rf_idx, bsize, nb_row, nb_col, &this_rd, mv); + if (r == 0 && c == 0) { + this_no_new_mv_rd = this_rd; + } + no_new_mv_rd += this_rd; + tmp_mv_mode_arr[tmp_idx] = mv_mode_arr[nb_row * stride + nb_col]; + tmp_select_mv_arr[tmp_idx] = select_mv_arr[nb_row * stride + nb_col]; + ++tmp_idx; + } + } + } + + // new mv + mv_mode_arr[mi_row * stride + mi_col] = NEW_MV_MODE; + this_new_mv_rd = eval_mv_mode(NEW_MV_MODE, cpi, x, gf_picture, frame_idx, + tpl_frame, rf_idx, bsize, mi_row, mi_col, + &select_mv_arr[mi_row * stride + mi_col]); + new_mv_rd = this_new_mv_rd; + // We start from idx = 1 because idx = 0 is evaluated as NEW_MV_MODE + // beforehand. + for (idx = 1; idx < kMvPreCheckLines; ++idx) { + int r; + for (r = 0; r <= idx; ++r) { + int c = idx - r; + int nb_row = mi_row + r * mi_height; + int nb_col = mi_col + c * mi_width; + if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) { + double this_rd; + int_mv *mv = &select_mv_arr[nb_row * stride + nb_col]; + mv_mode_arr[nb_row * stride + nb_col] = + find_best_ref_mv_mode(cpi, x, gf_picture, frame_idx, tpl_frame, + rf_idx, bsize, nb_row, nb_col, &this_rd, mv); + new_mv_rd += this_rd; + } + } + } + + // update best_mv_mode + tmp_idx = 0; + if (no_new_mv_rd < new_mv_rd) { + for (idx = 0; idx < kMvPreCheckLines; ++idx) { + int r; + for (r = 0; r <= idx; ++r) { + int c = idx - r; + int nb_row = mi_row + r * mi_height; + int nb_col = mi_col + c * mi_width; + if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) { + mv_mode_arr[nb_row * stride + nb_col] = tmp_mv_mode_arr[tmp_idx]; + select_mv_arr[nb_row * stride + nb_col] = tmp_select_mv_arr[tmp_idx]; + ++tmp_idx; + } + } + } + rd_diff_arr[mi_row * stride + mi_col] = 0; + } else { + rd_diff_arr[mi_row * stride + mi_col] = + (no_new_mv_rd - this_no_new_mv_rd) - (new_mv_rd - this_new_mv_rd); + } +} + +static void predict_mv_mode_arr(VP9_COMP *cpi, MACROBLOCK *x, + GF_PICTURE *gf_picture, int frame_idx, + TplDepFrame *tpl_frame, int rf_idx, + BLOCK_SIZE bsize) { + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + const int unit_rows = tpl_frame->mi_rows / mi_height; + const int unit_cols = tpl_frame->mi_cols / mi_width; + const int max_diagonal_lines = unit_rows + unit_cols - 1; + int idx; + for (idx = 0; idx < max_diagonal_lines; ++idx) { + int r; + for (r = VPXMAX(idx - unit_cols + 1, 0); r <= VPXMIN(idx, unit_rows - 1); + ++r) { + int c = idx - r; + int mi_row = r * mi_height; + int mi_col = c * mi_width; + assert(c >= 0 && c < unit_cols); + assert(mi_row >= 0 && mi_row < tpl_frame->mi_rows); + assert(mi_col >= 0 && mi_col < tpl_frame->mi_cols); + predict_mv_mode(cpi, x, gf_picture, frame_idx, tpl_frame, rf_idx, bsize, + mi_row, mi_col); + } + } +} + +static double get_feature_score(uint8_t *buf, ptrdiff_t stride, int rows, + int cols) { + double IxIx = 0; + double IxIy = 0; + double IyIy = 0; + double score; + int r, c; + vpx_clear_system_state(); + for (r = 0; r + 1 < rows; ++r) { + for (c = 0; c + 1 < cols; ++c) { + int diff_x = buf[r * stride + c] - buf[r * stride + c + 1]; + int diff_y = buf[r * stride + c] - buf[(r + 1) * stride + c]; + IxIx += diff_x * diff_x; + IxIy += diff_x * diff_y; + IyIy += diff_y * diff_y; + } + } + IxIx /= (rows - 1) * (cols - 1); + IxIy /= (rows - 1) * (cols - 1); + IyIy /= (rows - 1) * (cols - 1); + score = (IxIx * IyIy - IxIy * IxIy + 0.0001) / (IxIx + IyIy + 0.0001); + return score; +} + static int compare_feature_score(const void *a, const void *b) { const FEATURE_SCORE_LOC *aa = *(FEATURE_SCORE_LOC *const *)a; const FEATURE_SCORE_LOC *bb = *(FEATURE_SCORE_LOC *const *)b; @@ -6112,6 +6474,7 @@ static void max_heap_push(FEATURE_SCORE_LOC **heap, int *size, FEATURE_SCORE_LOC *input) { int c, p; FEATURE_SCORE_LOC *tmp; + input->visited = 1; heap[*size] = input; ++*size; c = *size - 1; @@ -6156,11 +6519,13 @@ static void build_motion_field(VP9_COMP *cpi, MACROBLOCKD *xd, int frame_idx, TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx]; const int mi_height = num_8x8_blocks_high_lookup[bsize]; const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + const int pw = num_4x4_blocks_wide_lookup[bsize] << 2; + const int ph = num_4x4_blocks_high_lookup[bsize] << 2; int fs_loc_sort_size; int fs_loc_heap_size; int mi_row, mi_col; - tpl_frame->lambda = 250; + tpl_frame->lambda = (pw * ph) / 4; fs_loc_sort_size = 0; for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) { @@ -6214,8 +6579,6 @@ static void build_motion_field(VP9_COMP *cpi, MACROBLOCKD *xd, int frame_idx, FEATURE_SCORE_LOC *fs_loc; max_heap_pop(cpi->feature_score_loc_heap, &fs_loc_heap_size, &fs_loc); - fs_loc->visited = 1; - do_motion_search(cpi, td, frame_idx, ref_frame, bsize, fs_loc->mi_row, fs_loc->mi_col); @@ -6265,6 +6628,7 @@ static void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int64_t recon_error, sse; #if CONFIG_NON_GREEDY_MV int square_block_idx; + int rf_idx; #endif // Setup scaling factor @@ -6311,6 +6675,13 @@ static void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, BLOCK_SIZE square_bsize = square_block_idx_to_bsize(square_block_idx); build_motion_field(cpi, xd, frame_idx, ref_frame, square_bsize); } + for (rf_idx = 0; rf_idx < 3; ++rf_idx) { + int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx]; + if (ref_frame_idx != -1) { + predict_mv_mode_arr(cpi, x, gf_picture, frame_idx, tpl_frame, rf_idx, + bsize); + } + } #endif for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) { @@ -6355,9 +6726,10 @@ static void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, #define DUMP_TPL_STATS 0 #if DUMP_TPL_STATS static void dump_buf(uint8_t *buf, int stride, int row, int col, int h, int w) { + int i, j; printf("%d %d\n", h, w); - for (int i = 0; i < h; ++i) { - for (int j = 0; j < w; ++j) { + for (i = 0; i < h; ++i) { + for (j = 0; j < w; ++j) { printf("%d ", buf[(row + i) * stride + col + j]); } } @@ -6390,8 +6762,6 @@ static void dump_tpl_stats(const VP9_COMP *cpi, int tpl_group_frames, for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row) { for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) { if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) { - const TplDepStats *tpl_ptr = - &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col]; int_mv mv = *get_pyramid_mv(tpl_frame, idx, bsize, mi_row, mi_col); printf("%d %d %d %d\n", mi_row, mi_col, mv.as_mv.row, mv.as_mv.col); } @@ -6449,6 +6819,10 @@ static void init_tpl_buffer(VP9_COMP *cpi) { cpi->feature_score_loc_alloc = 1; } + vpx_free(cpi->select_mv_arr); + CHECK_MEM_ERROR( + cm, cpi->select_mv_arr, + vpx_calloc(mi_rows * mi_cols * 4, sizeof(*cpi->select_mv_arr))); #endif // TODO(jingning): Reduce the actual memory use for tpl model build up. @@ -6459,16 +6833,26 @@ static void init_tpl_buffer(VP9_COMP *cpi) { continue; #if CONFIG_NON_GREEDY_MV - vpx_free(cpi->tpl_stats[frame].pyramid_mv_arr); for (rf_idx = 0; rf_idx < 3; ++rf_idx) { for (sqr_bsize = 0; sqr_bsize < SQUARE_BLOCK_SIZES; ++sqr_bsize) { + vpx_free(cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize]); CHECK_MEM_ERROR( cm, cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize], vpx_calloc( - mi_rows * mi_cols, + mi_rows * mi_cols * 4, sizeof( *cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize]))); } + vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]); + CHECK_MEM_ERROR( + cm, cpi->tpl_stats[frame].mv_mode_arr[rf_idx], + vpx_calloc(mi_rows * mi_cols * 4, + sizeof(*cpi->tpl_stats[frame].mv_mode_arr[rf_idx]))); + vpx_free(cpi->tpl_stats[frame].rd_diff_arr[rf_idx]); + CHECK_MEM_ERROR( + cm, cpi->tpl_stats[frame].rd_diff_arr[rf_idx], + vpx_calloc(mi_rows * mi_cols * 4, + sizeof(*cpi->tpl_stats[frame].rd_diff_arr[rf_idx]))); } #endif vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr); @@ -6494,7 +6878,7 @@ static void setup_tpl_stats(VP9_COMP *cpi) { const GF_GROUP *gf_group = &cpi->twopass.gf_group; int tpl_group_frames = 0; int frame_idx; - const BLOCK_SIZE bsize = BLOCK_32X32; + cpi->tpl_bsize = BLOCK_32X32; init_gop_frames(cpi, gf_picture, gf_group, &tpl_group_frames); @@ -6503,11 +6887,12 @@ static void setup_tpl_stats(VP9_COMP *cpi) { // Backward propagation from tpl_group_frames to 1. for (frame_idx = tpl_group_frames - 1; frame_idx > 0; --frame_idx) { if (gf_picture[frame_idx].update_type == USE_BUF_FRAME) continue; - mc_flow_dispenser(cpi, gf_picture, frame_idx, bsize); + mc_flow_dispenser(cpi, gf_picture, frame_idx, cpi->tpl_bsize); } #if CONFIG_NON_GREEDY_MV + cpi->tpl_ready = 1; #if DUMP_TPL_STATS - dump_tpl_stats(cpi, tpl_group_frames, gf_picture, bsize); + dump_tpl_stats(cpi, tpl_group_frames, gf_picture, cpi->tpl_bsize); #endif // DUMP_TPL_STATS #endif // CONFIG_NON_GREEDY_MV } diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h index cb9ea2de2ff..d1a782bfb31 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h @@ -304,6 +304,12 @@ typedef struct TplDepStats { #if CONFIG_NON_GREEDY_MV #define SQUARE_BLOCK_SIZES 4 + +#define ZERO_MV_MODE 0 +#define NEW_MV_MODE 1 +#define NEAREST_MV_MODE 2 +#define NEAR_MV_MODE 3 +#define MAX_MV_MODE 4 #endif typedef struct TplDepFrame { @@ -320,6 +326,8 @@ typedef struct TplDepFrame { double mv_dist_sum[3]; double mv_cost_sum[3]; int_mv *pyramid_mv_arr[3][SQUARE_BLOCK_SIZES]; + int *mv_mode_arr[3]; + double *rd_diff_arr[3]; #endif } TplDepFrame; @@ -582,14 +590,17 @@ typedef struct VP9_COMP { #endif YV12_BUFFER_CONFIG *raw_source_frame; + BLOCK_SIZE tpl_bsize; TplDepFrame tpl_stats[MAX_ARF_GOP_SIZE]; YV12_BUFFER_CONFIG *tpl_recon_frames[REF_FRAMES]; EncFrameBuf enc_frame_buf[REF_FRAMES]; #if CONFIG_NON_GREEDY_MV + int tpl_ready; int feature_score_loc_alloc; FEATURE_SCORE_LOC *feature_score_loc_arr; FEATURE_SCORE_LOC **feature_score_loc_sort; FEATURE_SCORE_LOC **feature_score_loc_heap; + int_mv *select_mv_arr; #endif TileDataEnc *tile_data; @@ -945,8 +956,8 @@ static INLINE RefCntBuffer *get_ref_cnt_buffer(VP9_COMMON *cm, int fb_idx) { } static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer( - VP9_COMP *cpi, MV_REFERENCE_FRAME ref_frame) { - VP9_COMMON *const cm = &cpi->common; + const VP9_COMP *const cpi, MV_REFERENCE_FRAME ref_frame) { + const VP9_COMMON *const cm = &cpi->common; const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame); return buf_idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[buf_idx].buf : NULL; @@ -1025,7 +1036,7 @@ static INLINE int is_altref_enabled(const VP9_COMP *const cpi) { cpi->oxcf.enable_auto_arf; } -static INLINE void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd, +static INLINE void set_ref_ptrs(const VP9_COMMON *const cm, MACROBLOCKD *xd, MV_REFERENCE_FRAME ref0, MV_REFERENCE_FRAME ref1) { xd->block_refs[0] = diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c index 8f0da48a2a1..620d21f5aff 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c @@ -549,7 +549,7 @@ static int get_smooth_intra_threshold(VP9_COMMON *cm) { } #define FP_DN_THRESH 8 -#define FP_MAX_DN_THRESH 16 +#define FP_MAX_DN_THRESH 24 #define KERNEL_SIZE 3 // Baseline Kernal weights for first pass noise metric @@ -843,6 +843,7 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, double mb_intra_factor; double mb_brightness_factor; double mb_neutral_count; + int scaled_low_intra_thresh = scale_sse_threshold(cm, LOW_I_THRESH); // First pass code requires valid last and new frame buffers. assert(new_yv12 != NULL); @@ -1254,7 +1255,6 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, } } #endif - // Does the row vector point inwards or outwards? if (mb_row < cm->mb_rows / 2) { if (mv.row > 0) @@ -1280,14 +1280,13 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, else if (mv.col < 0) --(fp_acc_data->sum_in_vectors); } - fp_acc_data->frame_noise_energy += (int64_t)SECTION_NOISE_DEF; - } else if (this_intra_error < scale_sse_threshold(cm, LOW_I_THRESH)) { + } + if (this_intra_error < scaled_low_intra_thresh) { fp_acc_data->frame_noise_energy += fp_estimate_block_noise(x, bsize); - } else { // 0,0 mv but high error + } else { fp_acc_data->frame_noise_energy += (int64_t)SECTION_NOISE_DEF; } } else { // Intra < inter error - int scaled_low_intra_thresh = scale_sse_threshold(cm, LOW_I_THRESH); if (this_intra_error < scaled_low_intra_thresh) { fp_acc_data->frame_noise_energy += fp_estimate_block_noise(x, bsize); if (this_motion_error < scaled_low_intra_thresh) { @@ -2340,7 +2339,7 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, switch (gf_group->update_type[idx]) { case ARF_UPDATE: gf_group->bit_allocation[idx] = - (int)((arf_depth_bits[gf_group->layer_depth[idx]] * + (int)(((int64_t)arf_depth_bits[gf_group->layer_depth[idx]] * gf_group->gfu_boost[idx]) / arf_depth_boost[gf_group->layer_depth[idx]]); break; @@ -2399,8 +2398,12 @@ static void adjust_group_arnr_filter(VP9_COMP *cpi, double section_noise, twopass->arnr_strength_adjustment = 0; - if ((section_zeromv < 0.10) || (section_noise <= (SECTION_NOISE_DEF * 0.75))) + if (section_noise < 150) { twopass->arnr_strength_adjustment -= 1; + if (section_noise < 75) twopass->arnr_strength_adjustment -= 1; + } else if (section_noise > 250) + twopass->arnr_strength_adjustment += 1; + if (section_zeromv > 0.50) twopass->arnr_strength_adjustment += 1; } @@ -2633,7 +2636,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) { rc->gfu_boost = calc_arf_boost(cpi, forward_frames, (i - 1)); rc->source_alt_ref_pending = 1; } else { - rc->gfu_boost = VPXMIN(MAX_GF_BOOST, calc_arf_boost(cpi, 0, (i - 1))); + reset_fpf_position(twopass, start_pos); + rc->gfu_boost = VPXMIN(MAX_GF_BOOST, calc_arf_boost(cpi, (i - 1), 0)); rc->source_alt_ref_pending = 0; } diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c index 534b15acc88..b82cbcdaa74 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c @@ -29,11 +29,6 @@ // #define NEW_DIAMOND_SEARCH -static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf, - const MV *mv) { - return &buf->buf[mv->row * buf->stride + mv->col]; -} - void vp9_set_mv_search_range(MvLimits *mv_limits, const MV *mv) { int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0); int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0); @@ -1906,13 +1901,12 @@ static double full_pixel_exhaustive_new(const VP9_COMP *cpi, MACROBLOCK *x, return bestsme; } -double vp9_diamond_search_sad_new(const MACROBLOCK *x, - const search_site_config *cfg, - const MV *init_full_mv, MV *best_full_mv, - double *best_mv_dist, double *best_mv_cost, - int search_param, double lambda, int *num00, - const vp9_variance_fn_ptr_t *fn_ptr, - const int_mv *nb_full_mvs, int full_mv_num) { +static double diamond_search_sad_new( + const MACROBLOCK *x, const search_site_config *cfg, const MV *init_full_mv, + MV *best_full_mv, double *best_mv_dist, double *best_mv_cost, + int search_param, double lambda, int *num00, + const vp9_variance_fn_ptr_t *fn_ptr, const int_mv *nb_full_mvs, + int full_mv_num) { int i, j, step; const MACROBLOCKD *const xd = &x->e_mbd; @@ -2430,7 +2424,7 @@ double vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x, const int further_steps = MAX_MVSEARCH_STEPS - 1 - step_param; const MV center_mv = { 0, 0 }; vpx_clear_system_state(); - bestsme = vp9_diamond_search_sad_new( + bestsme = diamond_search_sad_new( x, &cpi->ss_cfg, mvp_full, best_mv, best_mv_dist, best_mv_cost, step_param, lambda, &n, fn_ptr, nb_full_mvs, full_mv_num); @@ -2448,7 +2442,7 @@ double vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x, MV temp_mv; double mv_dist; double mv_cost; - thissme = vp9_diamond_search_sad_new( + thissme = diamond_search_sad_new( x, &cpi->ss_cfg, mvp_full, &temp_mv, &mv_dist, &mv_cost, step_param + n, lambda, &num00, fn_ptr, nb_full_mvs, full_mv_num); thissme = vp9_get_mvpred_var(x, &temp_mv, ¢er_mv, fn_ptr, 0); @@ -2492,7 +2486,8 @@ double vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x, /* do_refine: If last step (1-away) of n-step search doesn't pick the center point as the best match, we will do a final 1-away diamond refining search */ -static int full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x, MV *mvp_full, +static int full_pixel_diamond(const VP9_COMP *const cpi, + const MACROBLOCK *const x, MV *mvp_full, int step_param, int sadpb, int further_steps, int do_refine, int *cost_list, const vp9_variance_fn_ptr_t *fn_ptr, @@ -2554,8 +2549,9 @@ static int full_pixel_diamond(const VP9_COMP *cpi, MACROBLOCK *x, MV *mvp_full, // Runs an limited range exhaustive mesh search using a pattern set // according to the encode speed profile. -static int full_pixel_exhaustive(VP9_COMP *cpi, MACROBLOCK *x, - MV *centre_mv_full, int sadpb, int *cost_list, +static int full_pixel_exhaustive(const VP9_COMP *const cpi, + const MACROBLOCK *const x, MV *centre_mv_full, + int sadpb, int *cost_list, const vp9_variance_fn_ptr_t *fn_ptr, const MV *ref_mv, MV *dst_mv) { const SPEED_FEATURES *const sf = &cpi->sf; @@ -2817,13 +2813,13 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, MV *ref_mv, int error_per_bit, return best_sad; } -int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, - MV *mvp_full, int step_param, int search_method, - int error_per_bit, int *cost_list, const MV *ref_mv, - MV *tmp_mv, int var_max, int rd) { +int vp9_full_pixel_search(const VP9_COMP *const cpi, const MACROBLOCK *const x, + BLOCK_SIZE bsize, MV *mvp_full, int step_param, + int search_method, int error_per_bit, int *cost_list, + const MV *ref_mv, MV *tmp_mv, int var_max, int rd) { const SPEED_FEATURES *const sf = &cpi->sf; const SEARCH_METHODS method = (SEARCH_METHODS)search_method; - vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize]; + const vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize]; int var = 0; int run_exhaustive_search = 0; diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h index 6bef8874758..da93c5d44a2 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h @@ -38,6 +38,11 @@ typedef struct search_site_config { int total_steps; } search_site_config; +static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf, + const MV *mv) { + return &buf->buf[mv->row * buf->stride + mv->col]; +} + void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride); void vp9_init3smotion_compensation(search_site_config *cfg, int stride); @@ -110,7 +115,8 @@ struct VP9_COMP; // "mvp_full" is the MV search starting point; // "ref_mv" is the context reference MV; // "tmp_mv" is the searched best MV. -int vp9_full_pixel_search(struct VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, +int vp9_full_pixel_search(const struct VP9_COMP *const cpi, + const MACROBLOCK *const x, BLOCK_SIZE bsize, MV *mvp_full, int step_param, int search_method, int error_per_bit, int *cost_list, const MV *ref_mv, MV *tmp_mv, int var_max, int rd); @@ -143,7 +149,6 @@ static INLINE MV get_full_mv(const MV *mv) { out_mv.col = mv->col >> 3; return out_mv; } - struct TplDepFrame; void vp9_prepare_nb_full_mvs(const struct TplDepFrame *tpl_frame, int mi_row, int mi_col, int rf_idx, BLOCK_SIZE bsize, diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_noise_estimate.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_noise_estimate.c index fc189dbb1f9..bd98d221a0b 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_noise_estimate.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_noise_estimate.c @@ -32,7 +32,7 @@ static INLINE int noise_est_svc(const struct VP9_COMP *const cpi) { void vp9_noise_estimate_init(NOISE_ESTIMATE *const ne, int width, int height) { ne->enabled = 0; - ne->level = kLowLow; + ne->level = (width * height < 1280 * 720) ? kLowLow : kLow; ne->value = 0; ne->count = 0; ne->thresh = 90; @@ -97,7 +97,7 @@ NOISE_LEVEL vp9_noise_estimate_extract_level(NOISE_ESTIMATE *const ne) { } else { if (ne->value > ne->thresh) noise_level = kMedium; - else if (ne->value > ((9 * ne->thresh) >> 4)) + else if (ne->value > (ne->thresh >> 1)) noise_level = kLow; else noise_level = kLowLow; @@ -125,7 +125,7 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) { // Tune these thresholds for different resolutions when denoising is // enabled. if (cm->width > 640 && cm->width < 1920) { - thresh_consec_zeromv = 4; + thresh_consec_zeromv = 2; thresh_sum_diff = 200; thresh_sum_spatial = (120 * 120) << 8; thresh_spatial_var = (48 * 48) << 8; @@ -151,7 +151,7 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) { } else if (frame_counter > 60 && cpi->svc.num_encoded_top_layer > 1 && cpi->rc.frames_since_key > cpi->svc.number_spatial_layers && cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1 && - cpi->rc.avg_frame_low_motion < (low_res ? 70 : 50)) { + cpi->rc.avg_frame_low_motion < (low_res ? 60 : 40)) { // Force noise estimation to 0 and denoiser off if content has high motion. ne->level = kLowLow; ne->count = 0; diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_partition_models.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_partition_models.h index 904d214001b..09c0e30a47d 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_partition_models.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_partition_models.h @@ -241,141 +241,59 @@ static const float vp9_partition_breakout_weights_8[RESOLUTION_CTX][Q_CTX] #undef RESOLUTION_CTX // Rectangular partition search pruning model. -#define FEATURES 17 +#define FEATURES 8 #define LABELS 4 -static const float vp9_rect_part_nn_weights_16_layer0[FEATURES * 32] = { - 1.262885f, -0.533345f, -0.161280f, 0.106098f, 0.194799f, 0.003600f, - 0.394783f, -0.053954f, 0.264474f, -0.016651f, 0.376765f, 0.221471f, - 0.489799f, 0.054924f, 0.018292f, 0.037633f, -0.053430f, 1.092426f, - 0.205791f, -0.055661f, -0.227335f, 0.301274f, -0.169917f, 0.100426f, - 0.254388f, 0.103465f, 0.189560f, 0.116479f, 1.647195f, -0.667044f, - 0.067795f, -0.044580f, 0.019428f, 0.072938f, -0.797569f, -0.077539f, - -0.225636f, 0.262883f, -1.048009f, 0.210118f, -0.416156f, -0.143741f, - -0.296985f, 0.205918f, -0.517383f, -0.118527f, -0.396606f, -0.113128f, - -0.279468f, 0.096141f, -0.342051f, -0.337036f, 0.143222f, -0.860280f, - 0.137169f, 0.339767f, -0.336076f, 0.071988f, 0.251557f, -0.004068f, - 0.170734f, 0.237283f, -0.332443f, 0.073643f, 0.375357f, 0.220407f, - 0.150708f, -0.176979f, 0.265786f, -0.105878f, -0.337465f, -0.000491f, - 0.234308f, -0.098973f, 0.129038f, -0.205936f, -0.034793f, -0.106981f, - 0.009974f, 0.037861f, -0.282874f, -0.354414f, 0.023021f, -0.266749f, - -0.041762f, -0.721725f, 0.182262f, -0.273945f, 0.123722f, -0.036749f, - -0.788645f, -0.081560f, -0.472226f, 0.004654f, -0.756766f, -0.132186f, - 1.085412f, -0.221324f, -0.072577f, -0.172834f, -0.104831f, -1.391641f, - -0.345893f, 0.194442f, -0.306583f, -0.041813f, -0.267635f, -0.218568f, - -0.178452f, 0.044421f, -0.128042f, -0.094797f, -0.253724f, 0.273931f, - 0.144843f, -0.401416f, -0.014354f, -0.348929f, 0.123550f, 0.494504f, - -0.007050f, -0.143830f, 0.111292f, 0.211057f, -1.579988f, 0.117744f, - -1.732487f, 0.009320f, -1.162696f, 0.176687f, -0.705609f, 0.524827f, - 0.089822f, 0.082976f, -0.023681f, 0.006120f, -0.907175f, -0.026273f, - 0.019027f, 0.027170f, -0.462563f, -0.535335f, 0.202231f, 0.709803f, - -0.112251f, -1.213869f, 0.225714f, 0.323785f, -0.518254f, -0.014235f, - -0.070790f, -0.369589f, 0.373399f, 0.002738f, 0.175113f, 0.084529f, - -0.101586f, -0.018978f, 0.773392f, -0.673230f, -0.549279f, 0.790196f, - 0.658609f, -0.826831f, -0.514211f, 0.575341f, -0.711311f, 0.276289f, - -0.435715f, 0.392986f, -0.079298f, -0.318719f, 0.188429f, -0.114366f, - 0.172527f, -0.261721f, -0.216761f, 0.163822f, -0.189374f, -0.391901f, - 0.142013f, -0.135046f, 0.144419f, 0.053887f, 0.074673f, -0.290791f, - -0.039560f, -0.103830f, -0.330263f, -0.042091f, 0.050646f, -0.057466f, - -0.069064f, -0.412864f, 0.071097f, 0.126693f, 0.175397f, -0.168485f, - 0.018129f, -0.419188f, -0.272024f, -0.436859f, -0.425711f, -0.024382f, - 0.248042f, -0.169090f, -0.346878f, -0.070926f, 0.292278f, -0.197610f, - -0.218286f, 0.290846f, 0.297843f, 0.247394f, -0.160736f, 0.110314f, - 0.276000f, -0.301676f, -0.232816f, -0.127576f, -0.174457f, -0.124503f, - 0.264880f, -0.332379f, 0.012659f, -0.197333f, 0.604700f, 0.801582f, - 0.758702f, 0.691880f, 0.440917f, 0.773548f, 0.064242f, 1.147508f, - -0.127543f, -0.189628f, -0.122994f, -0.226776f, -0.053531f, -0.187548f, - 0.226554f, -0.273451f, 0.011751f, 0.009133f, 0.185091f, 0.003031f, - 0.000525f, 0.221829f, 0.331550f, -0.202558f, -0.286550f, 0.100683f, - 0.268818f, 0.179971f, -0.050016f, 0.579665f, 0.015911f, 0.033068f, - 0.077768f, -0.017757f, -1.411251f, 0.051519f, -1.745767f, 0.011258f, - -1.947372f, 0.111396f, -1.112755f, -0.008989f, -0.006211f, -0.002098f, - -0.015236f, -0.095697f, -0.095820f, 0.044622f, -0.112096f, 0.060000f, - 0.138957f, -0.462708f, 0.590790f, -0.021405f, -0.283744f, -1.141749f, - 0.213121f, -0.332311f, -0.314090f, -0.789311f, 0.157605f, -0.438019f, - 0.642189f, -0.340764f, -0.996025f, 0.109871f, 0.106128f, -0.010505f, - -0.117233f, -0.223194f, 0.344105f, -0.308754f, 0.386020f, -0.305270f, - -0.538281f, -0.270720f, -0.101688f, 0.207580f, 0.237153f, -0.055730f, - 0.842779f, 0.393543f, 0.007886f, -0.318167f, 0.603768f, 0.388241f, - 0.421536f, 0.632080f, 0.423965f, 0.371472f, 0.456827f, 0.488134f, - 0.358997f, 0.032621f, -0.017104f, 0.032198f, 0.113266f, -0.312277f, - 0.178189f, 0.234180f, 0.134271f, -0.414889f, 0.774141f, -0.225043f, - 0.614052f, -0.279921f, 1.329141f, -0.140827f, 0.797267f, -0.171361f, - 0.066205f, 0.339976f, 0.015223f, 0.193725f, -0.245067f, -0.035578f, - -0.084043f, 0.086756f, 0.029478f, -0.845370f, 0.388613f, -1.215236f, - 0.304573f, -0.439884f, -0.293969f, -0.107988f, -0.267837f, -0.695339f, - -0.702099f, 0.359047f, 0.511730f, 1.429516f, 0.216959f, -0.313828f, - 0.068062f, -0.124917f, -0.648327f, -0.308411f, -0.378467f, -0.429288f, - -0.032415f, -0.357005f, 0.170068f, 0.161167f, -0.250280f, -0.320468f, - -0.408987f, -0.201496f, -0.155996f, 0.021067f, 0.141083f, -0.202733f, - -0.130953f, -0.278148f, -0.042051f, 0.070576f, 0.009982f, -0.044326f, - -0.346851f, -0.255397f, -0.346456f, 0.281781f, 0.001618f, 0.120648f, - 0.297140f, 0.198343f, 0.186104f, 0.183548f, -0.344482f, 0.182258f, - 0.291003f, -0.330228f, -0.048174f, 0.133694f, 0.264582f, 0.229671f, - -0.167251f, -0.316040f, 0.191829f, 0.153417f, -0.345158f, -0.212790f, - -0.878872f, -0.313099f, -0.028368f, 0.065869f, -0.695388f, 1.102812f, - -0.605539f, 0.400680f, -0.350120f, -0.432965f, 0.034553f, -0.693476f, - -0.045708f, 0.492409f, -0.043825f, -0.430522f, 0.071159f, -0.317376f, - -1.164842f, 0.112394f, 0.034137f, -0.611882f, 0.251020f, -0.245113f, - 0.286093f, -0.187883f, 0.340263f, -0.211592f, -0.065706f, -0.332148f, - 0.104026f, -0.003206f, 0.036397f, 0.206499f, 0.161962f, 0.037663f, - -0.313039f, -0.199837f, 0.117952f, -0.182145f, -0.343724f, 0.017625f, - 0.033427f, -0.288075f, -0.101873f, -0.083378f, 0.147870f, 0.049598f, - -0.241824f, 0.070494f, 0.140942f, -0.013795f, 0.020023f, -0.192213f, - -0.320505f, -0.193072f, 0.147260f, 0.311352f, 0.053486f, 0.183716f, - 0.142535f, 0.294333f, -0.054853f, 0.293314f, -0.025398f, 0.190815f, - -0.137574f, -0.191864f, -0.190950f, -0.205988f, -0.199046f, -0.017582f, - -0.149347f, 0.131040f, 0.006854f, -0.350732f, 0.113301f, -0.194371f, - -0.296885f, -0.249199f, -0.193946f, 0.116150f, -0.310411f, -0.325851f, - -0.053275f, -0.063419f, 0.204170f, -0.091940f, -0.146229f, 0.298173f, - 0.053349f, -0.368540f, 0.235629f, -0.317825f, -0.107304f, -0.114618f, - 0.058709f, -0.272070f, 0.076224f, 0.110668f, -0.193282f, -0.135440f, - -0.267950f, -0.102285f, 0.102699f, -0.159082f, 0.262721f, -0.263227f, - 0.094509f, -0.113405f, 0.069888f, -0.169665f, 0.070800f, 0.035432f, - 0.054243f, 0.264229f, 0.117416f, 0.091568f, -0.022069f, -0.069214f, - 0.124543f, 0.070413f, -0.039343f, 0.082823f, -0.838348f, 0.153727f, - -0.000947f, 0.270348f, -1.404952f, -0.159680f, -0.234320f, 0.061023f, - 0.271660f, -0.541834f, 0.570828f, -0.277254f, -}; - -static const float vp9_rect_part_nn_bias_16_layer0[32] = { - 0.045740f, 0.292685f, -0.754007f, -0.150412f, -0.006171f, 0.005915f, - 0.000167f, 0.322797f, -0.381793f, 0.349786f, 0.003878f, -0.307203f, - 0.000000f, 0.029122f, 0.000000f, 0.625494f, 0.302105f, -0.362807f, - -0.034002f, -0.573278f, 0.240021f, 0.083965f, 0.000000f, -0.018979f, - -0.147739f, -0.036990f, 0.000000f, 0.000000f, -0.026790f, -0.000036f, - -0.073448f, 0.398328f, -}; - -static const float vp9_rect_part_nn_weights_16_layer1[32 * LABELS] = { - 0.095090f, 0.831754f, 0.484433f, 0.472945f, 0.086165f, -0.442388f, - 0.176263f, -0.760247f, 0.419932f, -0.131377f, 0.075814f, 0.089844f, - -0.294718f, 0.299808f, -0.318435f, -0.623205f, -0.346703f, 0.494356f, - 0.949221f, 0.524653f, 0.044095f, 0.428540f, 0.402571f, -0.216920f, - 0.423915f, 1.023334f, -0.366449f, 0.395057f, 0.057576f, 0.094019f, - 0.247685f, -0.007200f, -0.420023f, -0.728965f, -0.063040f, -0.071321f, - 0.209298f, 0.486625f, -0.244375f, 0.263219f, -0.250463f, -0.260301f, - 0.068579f, 0.177644f, -0.155311f, -0.027606f, -0.101614f, 0.553046f, - -0.462729f, -0.237568f, -0.589316f, 0.045182f, 0.551759f, -0.196872f, - 0.183040f, 0.054341f, 0.252784f, -0.536486f, -0.024425f, 0.154942f, - -0.086636f, 0.360416f, 0.214773f, -0.170876f, -0.363522f, -0.464099f, - 0.145494f, -0.099329f, 0.343718f, 0.286427f, 0.085540f, -0.105182f, - 0.155543f, 0.290939f, -0.067069f, 0.228399f, 0.178247f, 0.113031f, - -0.067336f, 0.441062f, 0.132364f, -0.263403f, -0.263925f, -0.083613f, - -0.268577f, -0.204442f, 0.052526f, 0.334787f, -0.064285f, -0.197875f, - 0.296405f, 0.396440f, 0.033231f, 0.229087f, 0.118289f, 0.490894f, - -0.527582f, -0.897206f, -0.325708f, -0.433018f, -0.053989f, 0.223814f, - -0.352319f, 0.772440f, -0.108648f, -0.082859f, -0.342718f, 0.033022f, - -0.309199f, -0.560337f, 0.208476f, 0.520309f, -0.241035f, -0.560391f, - -1.268968f, -0.267567f, 0.129461f, -0.385547f, 0.080142f, 0.065785f, - -0.159324f, -0.580704f, -0.315150f, -0.224900f, -0.110807f, -0.230163f, - 0.307266f, 0.153446f, +#define NODES 16 +static const float vp9_rect_part_nn_weights_16_layer0[FEATURES * NODES] = { + -0.432522f, 0.133070f, -0.169187f, 0.768340f, 0.891228f, 0.554458f, + 0.356000f, 0.403621f, 0.809165f, 0.778214f, -0.520357f, 0.301451f, + -0.386972f, -0.314402f, 0.021878f, 1.148746f, -0.462258f, -0.175524f, + -0.344589f, -0.475159f, -0.232322f, 0.471147f, -0.489948f, 0.467740f, + -0.391550f, 0.208601f, 0.054138f, 0.076859f, -0.309497f, -0.095927f, + 0.225917f, 0.011582f, -0.520730f, -0.585497f, 0.174036f, 0.072521f, + 0.120771f, -0.517234f, -0.581908f, -0.034003f, -0.694722f, -0.364368f, + 0.290584f, 0.038373f, 0.685654f, 0.394019f, 0.759667f, 1.257502f, + -0.610516f, -0.185434f, 0.211997f, -0.172458f, 0.044605f, 0.145316f, + -0.182525f, -0.147376f, 0.578742f, 0.312412f, -0.446135f, -0.389112f, + 0.454033f, 0.260490f, 0.664285f, 0.395856f, -0.231827f, 0.215228f, + 0.014856f, -0.395462f, 0.479646f, -0.391445f, -0.357788f, 0.166238f, + -0.056818f, -0.027783f, 0.060880f, -1.604710f, 0.531268f, 0.282184f, + 0.714944f, 0.093523f, -0.218312f, -0.095546f, -0.285621f, -0.190871f, + -0.448340f, -0.016611f, 0.413913f, -0.286720f, -0.158828f, -0.092635f, + -0.279551f, 0.166509f, -0.088162f, 0.446543f, -0.276830f, -0.065642f, + -0.176346f, -0.984754f, 0.338738f, 0.403809f, 0.738065f, 1.154439f, + 0.750764f, 0.770959f, -0.269403f, 0.295651f, -0.331858f, 0.367144f, + 0.279279f, 0.157419f, -0.348227f, -0.168608f, -0.956000f, -0.647136f, + 0.250516f, 0.858084f, 0.809802f, 0.492408f, 0.804841f, 0.282802f, + 0.079395f, -0.291771f, -0.024382f, -1.615880f, -0.445166f, -0.407335f, + -0.483044f, 0.141126f, +}; + +static const float vp9_rect_part_nn_bias_16_layer0[NODES] = { + 0.275384f, -0.053745f, 0.000000f, 0.000000f, -0.178103f, 0.513965f, + -0.161352f, 0.228551f, 0.000000f, 1.013712f, 0.000000f, 0.000000f, + -1.144009f, -0.000006f, -0.241727f, 2.048764f, +}; + +static const float vp9_rect_part_nn_weights_16_layer1[NODES * LABELS] = { + -1.435278f, 2.204691f, -0.410718f, 0.202708f, 0.109208f, 1.059142f, + -0.306360f, 0.845906f, 0.489654f, -1.121915f, -0.169133f, -0.003385f, + 0.660590f, -0.018711f, 1.227158f, -2.967504f, 1.407345f, -1.293243f, + -0.386921f, 0.300492f, 0.338824f, -0.083250f, -0.069454f, -1.001827f, + -0.327891f, 0.899353f, 0.367397f, -0.118601f, -0.171936f, -0.420646f, + -0.803319f, 2.029634f, 0.940268f, -0.664484f, 0.339916f, 0.315944f, + 0.157374f, -0.402482f, -0.491695f, 0.595827f, 0.015031f, 0.255887f, + -0.466327f, -0.212598f, 0.136485f, 0.033363f, -0.796921f, 1.414304f, + -0.282185f, -2.673571f, -0.280994f, 0.382658f, -0.350902f, 0.227926f, + 0.062602f, -1.000199f, 0.433731f, 1.176439f, -0.163216f, -0.229015f, + -0.640098f, -0.438852f, -0.947700f, 2.203434f, }; static const float vp9_rect_part_nn_bias_16_layer1[LABELS] = { - -0.455437f, - 0.255310f, - 0.452974f, - -0.278733f, + -0.875510f, + 0.982408f, + 0.560854f, + -0.415209f, }; static const NN_CONFIG vp9_rect_part_nnconfig_16 = { @@ -383,7 +301,7 @@ static const NN_CONFIG vp9_rect_part_nnconfig_16 = { LABELS, // num_outputs 1, // num_hidden_layers { - 32, + NODES, }, // num_hidden_nodes { vp9_rect_part_nn_weights_16_layer0, @@ -395,139 +313,56 @@ static const NN_CONFIG vp9_rect_part_nnconfig_16 = { }, }; -static const float vp9_rect_part_nn_weights_32_layer0[FEATURES * 32] = { - 0.735110f, -0.238477f, 0.101978f, 0.311671f, -0.123833f, 1.596506f, - -0.341982f, -0.480170f, -0.247587f, 0.613159f, -0.279899f, -0.740856f, - 0.499051f, 0.039041f, 0.056763f, 0.258874f, 0.470812f, -0.121635f, - -0.318852f, -0.098677f, -0.214714f, -0.159974f, -0.305400f, -0.344477f, - -0.260653f, -0.007737f, -0.053016f, -0.158079f, 0.151911f, -0.057685f, - -0.230948f, -0.165940f, -0.127591f, -0.192084f, 1.890390f, -0.315123f, - -0.714531f, -0.015355f, 0.186437f, 0.305504f, 0.035343f, -0.556783f, - 0.239364f, -0.297789f, 0.202735f, -0.707576f, 0.710250f, 0.223346f, - -0.291511f, 0.235778f, 0.455338f, -0.059402f, 0.084530f, -0.115117f, - -0.103696f, -0.192821f, 0.114579f, -0.223487f, 0.306864f, 0.021887f, - -0.028040f, 0.087866f, 0.038870f, -0.081742f, -0.056052f, -0.130837f, - 0.201058f, 0.293391f, 1.880344f, 0.339162f, 0.040928f, -0.503942f, - 0.476333f, 0.259272f, 0.629416f, 0.869369f, 0.622841f, 1.012843f, - 0.715795f, 1.958844f, -1.697462f, 0.071334f, 0.074189f, 0.014585f, - -0.002536f, 0.021900f, 0.151883f, 0.169501f, -0.333018f, -0.247512f, - -0.418575f, -0.473960f, -0.004501f, -0.280939f, -0.162188f, -0.355632f, - 0.136654f, -0.100967f, -0.350435f, -0.135386f, 0.037237f, 0.136982f, - -0.084157f, -0.073248f, 0.021792f, 0.077429f, -0.083042f, -3.169569f, - 0.016261f, -3.351328f, 0.021120f, -3.572247f, 0.023870f, -4.312754f, - 0.040973f, -0.038328f, -0.015052f, 0.017702f, 0.101427f, 0.115458f, - -0.304792f, 0.021826f, -0.157998f, 0.341022f, -0.013465f, 0.105076f, - -0.261465f, 0.318730f, 0.065701f, 0.314879f, -0.064785f, 0.282824f, - 0.100542f, 0.057260f, -0.003756f, -0.026214f, -0.264641f, 0.275545f, - -0.049201f, -0.283015f, -0.057363f, 0.183570f, 0.243161f, -0.255764f, - 0.099747f, -0.156157f, -0.262494f, 0.231521f, -0.262617f, -0.186096f, - 0.171720f, 0.018983f, -0.145545f, 0.197662f, -0.001502f, -0.267526f, - 0.001960f, 0.003260f, 0.045237f, -0.377174f, -0.042499f, -0.015278f, - -0.196779f, -0.262797f, -0.318427f, -0.126092f, -0.339723f, 0.205288f, - -0.544284f, -0.507896f, -0.316622f, -0.090312f, -0.250917f, -0.337263f, - -0.220199f, -0.296591f, -0.116816f, 0.052381f, 0.145681f, 0.016521f, - -0.093549f, -0.097822f, 0.023140f, -0.010346f, 0.036181f, 0.145826f, - -0.139123f, -0.462638f, -0.007315f, 0.156533f, -0.102787f, 0.143586f, - -0.092094f, -0.144220f, -0.168994f, -0.045833f, 0.021628f, -0.421794f, - -0.055857f, 0.217931f, -0.061937f, -0.028768f, -0.078250f, -0.426939f, - -0.223118f, -0.230080f, -0.194988f, -0.197673f, -0.020918f, 0.139945f, - 0.186951f, -0.071317f, -0.084007f, -0.138597f, 0.101950f, 0.093870f, - 0.153226f, 0.017799f, -0.088539f, -0.037796f, 0.340412f, 0.183305f, - 0.391880f, -1.127417f, 0.132762f, -0.228565f, 0.399035f, 0.017483f, - -0.041619f, 0.017849f, 0.092340f, 0.054204f, 0.681185f, 0.421034f, - 0.112520f, -0.040618f, -0.040148f, -0.360647f, 0.053555f, 0.192854f, - 0.076968f, -0.179224f, -0.081617f, -0.287661f, -0.191072f, -0.310227f, - -0.332226f, -0.039786f, -0.247795f, -0.232201f, -0.333533f, -0.077995f, - -0.471732f, 0.051829f, 0.090488f, 0.142465f, -0.120490f, -0.286151f, - -0.049117f, -0.251082f, 0.211884f, -0.223366f, 0.063565f, 0.229938f, - -0.059348f, -0.029573f, -0.064303f, -0.156148f, 0.086958f, -0.297613f, - -0.125107f, 0.062718f, 0.339137f, -0.218896f, -0.057290f, -0.236670f, - -0.143783f, -0.119429f, 0.242320f, -0.323464f, -0.178377f, 0.238275f, - -0.025042f, 0.074798f, 0.111329f, -0.299773f, -0.151748f, -0.261607f, - 0.215626f, 0.202243f, -0.121896f, -0.024283f, -0.293854f, -0.018232f, - -0.012629f, -0.199297f, -0.060595f, 0.432339f, -0.158735f, -0.028380f, - 0.326639f, 0.222546f, -0.218135f, -0.495955f, -0.015055f, -0.104206f, - -0.268823f, 0.116765f, 0.041769f, -0.187095f, 0.225090f, 0.198195f, - 0.001502f, -0.219212f, -0.244779f, -0.017690f, -0.033197f, -0.339813f, - -0.325453f, 0.002499f, -0.066113f, 0.043235f, 0.324275f, -0.630642f, - -1.440551f, 0.174527f, 0.124619f, -1.187345f, 1.372693f, -0.278393f, - -0.058673f, -0.286338f, 1.708757f, -0.325094f, -0.543172f, -0.229411f, - 0.169927f, 0.175064f, 0.198321f, 0.117351f, 0.220882f, 0.138078f, - -0.158000f, -0.286708f, 0.096046f, -0.321788f, 0.206949f, -0.014473f, - -0.321234f, 0.100033f, -0.108266f, 0.166824f, 0.032904f, -0.065760f, - -0.303896f, 0.180342f, -0.301145f, -0.352554f, 0.149089f, 0.013277f, - 0.256019f, -0.109770f, 1.832588f, -0.132568f, 1.527658f, -0.164252f, - -0.857880f, -0.242694f, -0.553797f, 0.334023f, -0.332759f, -0.166203f, - -0.223175f, 0.007953f, -0.175865f, -0.134590f, -0.023858f, -0.011983f, - 0.054403f, -0.147054f, -0.176901f, -0.166893f, -0.292662f, -0.010569f, - -0.041744f, -0.060398f, -0.237584f, 0.154246f, -0.083270f, -0.314016f, - -0.374736f, 0.100063f, 0.048401f, -0.061952f, -0.178816f, 0.157243f, - 0.221991f, -0.065035f, 0.098517f, -0.190704f, -0.210613f, -0.274884f, - -0.341442f, -0.205281f, 0.073644f, 0.130667f, 0.149194f, -0.018172f, - 1.796154f, -1.017806f, -0.169655f, 0.104239f, 0.344313f, 0.643042f, - 0.730177f, 0.270776f, 0.581631f, -1.090649f, 0.707472f, 1.411035f, - 0.268739f, 0.178860f, -0.062251f, -0.118611f, -0.215759f, 0.023485f, - -0.105320f, 0.036396f, -0.059604f, 0.090024f, 0.095224f, -0.053497f, - -0.084040f, 0.055836f, 0.111678f, 0.014886f, -0.178380f, 0.079662f, - -0.123580f, 0.057379f, -0.409844f, -0.305386f, -0.987808f, -0.291094f, - 0.063966f, 0.263709f, -0.337221f, 0.720093f, 0.105030f, 0.848950f, - 0.071835f, 0.228972f, 0.057705f, -2.154561f, -0.201303f, -0.058856f, - -0.020081f, 0.029375f, 0.234837f, -0.001063f, 0.042527f, 0.014567f, - -0.299420f, -0.289117f, 0.275219f, 0.263596f, -0.186026f, -0.111364f, - -0.118393f, -0.318778f, 0.010710f, -0.286836f, -0.070330f, -0.049497f, - 0.093162f, -0.298085f, 0.204761f, -0.206633f, -0.009057f, -0.235372f, - 0.185300f, -0.271814f, 0.281732f, 0.268149f, -0.018967f, 0.162748f, - -0.086694f, -0.063839f, -0.097473f, -0.280120f, 0.324688f, 0.157911f, - -0.064794f, -0.266017f, -0.305608f, -0.196854f, -0.185767f, 0.199455f, - 0.102264f, 0.070866f, 0.172045f, 0.266433f, -0.176167f, 0.251657f, - -0.239220f, 0.229667f, 0.156115f, -0.221345f, 0.270720f, 0.109367f, - 0.230352f, -0.384561f, -0.026329f, 0.005928f, -0.087685f, -0.097995f, - -0.153864f, 0.117211f, -0.226492f, -0.379832f, -0.201714f, 0.049707f, - -0.292120f, 0.114074f, -0.085307f, -0.485356f, -0.347405f, 0.089361f, - -0.419273f, -0.320764f, -0.107254f, -0.274615f, -0.292991f, 0.095602f, - -0.078789f, 0.138927f, 0.270813f, 0.205814f, 0.065003f, 0.169171f, - 0.056142f, -0.005792f, 0.059483f, 0.060149f, -}; - -static const float vp9_rect_part_nn_bias_32_layer0[32] = { - -1.749808f, 0.000000f, 0.239736f, -0.000424f, 0.431792f, -0.150833f, - 2.866760f, 0.000000f, 0.000000f, -0.281434f, 0.000000f, -0.150086f, - 0.000000f, -0.008346f, -0.204104f, -0.006581f, 0.000000f, -0.197006f, - 0.000000f, -0.735287f, -0.028345f, -1.180116f, -0.106524f, 0.000000f, - 0.075879f, -0.150966f, -2.438914f, 0.000000f, -0.011775f, -0.024204f, - -0.138235f, -0.123763f, -}; - -static const float vp9_rect_part_nn_weights_32_layer1[32 * LABELS] = { - 0.622235f, 0.264894f, -0.424216f, 0.103989f, 1.401192f, -0.063838f, - -5.216846f, 0.329234f, -0.293113f, 0.457519f, -0.271899f, 0.043771f, - -0.203823f, 0.573535f, -0.192703f, 0.054939f, 0.163019f, 0.124803f, - 0.160664f, 0.385406f, -0.091403f, 0.320204f, 0.101181f, -0.157792f, - -0.095555f, -0.255011f, 1.326614f, -0.138076f, -0.082434f, -0.342442f, - 0.184067f, -0.076395f, 0.050263f, 0.251065f, 0.291743f, 0.197838f, - -0.950922f, 0.280202f, 2.904905f, -0.219434f, 0.284386f, 0.375005f, - 0.193817f, -0.298663f, -0.255364f, -0.297545f, 0.030518f, -0.023892f, - -0.396120f, -0.253027f, 0.237235f, -0.550249f, -0.076817f, -0.201374f, - 0.292708f, 0.341936f, -0.532215f, 0.180634f, -0.943291f, -0.217179f, - 0.251611f, -0.306310f, 0.229054f, -0.350337f, -0.192707f, 0.146781f, - 0.409007f, 0.279088f, -0.307357f, 0.199059f, 2.780962f, 0.163723f, - -0.226445f, 0.242830f, 0.220356f, -0.057621f, 0.196677f, -0.179975f, - -0.314636f, 0.218271f, -0.278653f, -0.226286f, 0.034275f, -0.320149f, - 0.154779f, 0.074937f, -0.015650f, -0.281735f, -0.495227f, -0.075036f, - -0.871024f, -0.350643f, 0.343468f, 0.095665f, 0.447121f, -0.059040f, - 0.244757f, 0.223122f, 0.272544f, 0.129678f, -1.700183f, 0.254869f, - 2.528983f, 0.217362f, 0.327765f, -0.129369f, -0.003560f, -0.532537f, - 0.080216f, -0.739488f, -0.299813f, 0.185421f, 0.265994f, 0.152268f, - -0.401829f, -0.901380f, 0.347747f, -0.524845f, -0.201163f, 0.063585f, - -0.517479f, -0.077816f, -0.735739f, -0.161411f, -0.113607f, -0.306188f, - 0.190817f, -0.362567f, +static const float vp9_rect_part_nn_weights_32_layer0[FEATURES * NODES] = { + -0.147312f, -0.753248f, 0.540206f, 0.661415f, 0.484117f, -0.341609f, + 0.016183f, 0.064177f, 0.781580f, 0.902232f, -0.505342f, 0.325183f, + -0.231072f, -0.120107f, -0.076216f, 0.120038f, 0.403695f, -0.463301f, + -0.192158f, 0.407442f, 0.106633f, 1.072371f, -0.446779f, 0.467353f, + 0.318812f, -0.505996f, -0.008768f, -0.239598f, 0.085480f, 0.284640f, + -0.365045f, -0.048083f, -0.112090f, -0.067089f, 0.304138f, -0.228809f, + 0.383651f, -0.196882f, 0.477039f, -0.217978f, -0.506931f, -0.125675f, + 0.050456f, 1.086598f, 0.732128f, 0.326941f, 0.103952f, 0.121769f, + -0.154487f, -0.255514f, 0.030591f, -0.382797f, -0.019981f, -0.326570f, + 0.149691f, -0.435633f, -0.070795f, 0.167691f, 0.251413f, -0.153405f, + 0.160347f, 0.455107f, -0.968580f, -0.575879f, 0.623115f, -0.069793f, + -0.379768f, -0.965807f, -0.062057f, 0.071312f, 0.457098f, 0.350372f, + -0.460659f, -0.985393f, 0.359963f, -0.093677f, 0.404272f, -0.326896f, + -0.277752f, 0.609322f, -0.114193f, -0.230701f, 0.089208f, 0.645381f, + 0.494485f, 0.467876f, -0.166187f, 0.251044f, -0.394661f, 0.192895f, + -0.344777f, -0.041893f, -0.111163f, 0.066347f, 0.378158f, -0.455465f, + 0.339839f, -0.418207f, -0.356515f, -0.227536f, -0.211091f, -0.122945f, + 0.361772f, -0.338095f, 0.004564f, -0.398510f, 0.060876f, -2.132504f, + -0.086776f, -0.029166f, 0.039241f, 0.222534f, -0.188565f, -0.288792f, + -0.160789f, -0.123905f, 0.397916f, -0.063779f, 0.167210f, -0.445004f, + 0.056889f, 0.207280f, 0.000101f, 0.384507f, -1.721239f, -2.036402f, + -2.084403f, -2.060483f, +}; + +static const float vp9_rect_part_nn_bias_32_layer0[NODES] = { + -0.859251f, -0.109938f, 0.091838f, 0.187817f, -0.728265f, 0.253080f, + 0.000000f, -0.357195f, -0.031290f, -1.373237f, -0.761086f, 0.000000f, + -0.024504f, 1.765711f, 0.000000f, 1.505390f, +}; + +static const float vp9_rect_part_nn_weights_32_layer1[NODES * LABELS] = { + 0.680940f, 1.367178f, 0.403075f, 0.029957f, 0.500917f, 1.407776f, + -0.354002f, 0.011667f, 1.663767f, 0.959155f, 0.428323f, -0.205345f, + -0.081850f, -3.920103f, -0.243802f, -4.253933f, -0.034020f, -1.361057f, + 0.128236f, -0.138422f, -0.025790f, -0.563518f, -0.148715f, -0.344381f, + -1.677389f, -0.868332f, -0.063792f, 0.052052f, 0.359591f, 2.739808f, + -0.414304f, 3.036597f, -0.075368f, -1.019680f, 0.642501f, 0.209779f, + -0.374539f, -0.718294f, -0.116616f, -0.043212f, -1.787809f, -0.773262f, + 0.068734f, 0.508309f, 0.099334f, 1.802239f, -0.333538f, 2.708645f, + -0.447682f, -2.355555f, -0.506674f, -0.061028f, -0.310305f, -0.375475f, + 0.194572f, 0.431788f, -0.789624f, -0.031962f, 0.358353f, 0.382937f, + 0.232002f, 2.321813f, -0.037523f, 2.104652f, }; static const float vp9_rect_part_nn_bias_32_layer1[LABELS] = { - -0.833530f, - 0.860502f, - 0.708645f, - -1.083700f, + -0.693383f, + 0.773661f, + 0.426878f, + -0.070619f, }; static const NN_CONFIG vp9_rect_part_nnconfig_32 = { @@ -535,7 +370,7 @@ static const NN_CONFIG vp9_rect_part_nnconfig_32 = { LABELS, // num_outputs 1, // num_hidden_layers { - 32, + NODES, }, // num_hidden_nodes { vp9_rect_part_nn_weights_32_layer0, @@ -546,140 +381,75 @@ static const NN_CONFIG vp9_rect_part_nnconfig_32 = { vp9_rect_part_nn_bias_32_layer1, }, }; - -static const float vp9_rect_part_nn_weights_64_layer0[FEATURES * 32] = { - 0.029424f, -0.295893f, -0.313259f, -0.090484f, -0.104946f, 0.121361f, - 0.137971f, -0.137984f, -0.328158f, -0.137280f, -0.276995f, -0.153118f, - 0.187893f, 0.105787f, -0.236591f, -0.114325f, -0.000708f, 1.936191f, - 0.048491f, -0.026048f, -0.206916f, 0.830237f, -0.152354f, 0.074191f, - -0.153813f, 0.148942f, -0.103457f, 0.028252f, 1.758264f, -2.123016f, - 0.120182f, 0.049954f, 0.110450f, -0.199360f, 0.642198f, 0.040225f, - -0.140886f, 0.091833f, -0.122788f, 1.172115f, -0.833333f, -0.505218f, - 0.736050f, -0.109958f, -0.839030f, -0.399916f, 1.029718f, 0.408977f, - -0.836882f, 0.389683f, -1.134413f, -1.529672f, -0.146351f, 0.089298f, - 0.083772f, -0.697869f, 1.683311f, -0.882446f, 0.494428f, -0.122128f, - 0.659819f, -0.057178f, -0.915390f, -0.192412f, 0.046613f, 0.010697f, - 0.040782f, 0.110807f, -0.225332f, -0.327730f, -0.114825f, 0.063511f, - 0.050503f, 0.023602f, 0.006524f, -0.274547f, -0.607145f, -0.143812f, - -0.327689f, -0.333072f, -0.017138f, -0.183992f, -0.200622f, -0.262463f, - -0.132799f, -0.018155f, -0.534214f, -0.385994f, 0.116278f, -0.752879f, - -0.090734f, -0.249152f, 0.071716f, 0.029603f, -0.382456f, -0.122894f, - 1.349552f, -0.885192f, 0.257903f, -0.265945f, -0.045579f, 0.112247f, - -0.122810f, -0.258285f, -0.145427f, -0.127442f, 0.072778f, 0.072549f, - 0.182149f, 0.239403f, 0.167205f, -0.291616f, -0.281237f, 0.335735f, - 0.208511f, -0.239628f, -0.022236f, -0.177370f, 0.207808f, 0.023535f, - 0.137455f, 0.016406f, -0.138685f, 0.188732f, 0.205513f, 0.209787f, - 0.060592f, 0.239954f, -0.128341f, -0.291585f, 0.022141f, -0.311201f, - -0.010199f, -0.314224f, -0.351915f, -0.079775f, -0.260028f, -0.015953f, - 0.007404f, 0.051589f, 0.019771f, -2.337926f, 0.024596f, -2.512399f, - -0.023138f, -2.421380f, 0.016515f, -3.269775f, 0.026844f, -0.053660f, - -0.013213f, -0.029248f, 0.114357f, 0.259100f, -0.141749f, -0.106802f, - -0.117323f, -0.294698f, -0.316012f, -0.328013f, 0.016459f, 0.136175f, - 0.223327f, 0.322312f, -0.297297f, 0.118286f, -0.317197f, -0.116692f, - 0.262236f, -0.032443f, -0.392128f, -0.199989f, -0.383621f, 0.008347f, - -0.079302f, -0.005529f, 0.049261f, 0.145948f, -0.263592f, -0.317109f, - 0.260015f, -0.499341f, -0.171764f, -0.017815f, 0.149186f, 0.178294f, - -0.492198f, 0.016956f, 0.008067f, -0.057734f, -0.189979f, -0.131489f, - -0.163303f, 0.121378f, -0.172272f, 0.125891f, 0.120654f, 0.071314f, - 0.117423f, -0.242167f, 0.047170f, 0.234302f, -0.355370f, -0.336112f, - -0.255471f, -0.267792f, -0.135367f, -0.284411f, 0.254592f, 0.098749f, - 0.224989f, 0.258450f, -0.306878f, 0.153551f, -0.175806f, -0.244459f, - -0.274922f, 0.254346f, 0.110309f, 0.036054f, 0.095133f, -0.589646f, - 0.080543f, 0.154155f, 0.133797f, -0.401518f, 0.798127f, 0.066742f, - 1.449216f, 0.282498f, 1.210638f, -0.280643f, 0.572386f, -0.308133f, - -0.053143f, 0.008437f, 0.269565f, 0.347616f, 0.087180f, -0.771104f, - 0.200800f, 0.157578f, 0.474128f, -0.971488f, 0.193451f, 0.340339f, - -0.123425f, 0.560754f, -0.139621f, -0.281721f, -0.100162f, 0.250926f, - 0.281100f, 0.197680f, 0.138629f, 1.045823f, 0.339047f, 0.036698f, - -0.159210f, 0.727869f, -1.371850f, 0.116241f, -2.180194f, 0.214055f, - -0.213691f, 0.447957f, -1.129966f, 0.543598f, 0.147599f, 0.060034f, - -0.049415f, -0.095858f, 0.290599f, 0.059512f, 0.198343f, -0.211903f, - 0.158736f, -0.090220f, -0.221992f, 0.198320f, 0.028632f, -0.408238f, - -0.368266f, -0.218740f, -0.379023f, -0.173573f, -0.035179f, 0.240176f, - 0.237714f, -0.417132f, -0.184989f, 0.046818f, -0.016965f, -0.524012f, - -0.094848f, -0.225678f, 0.021766f, -0.028366f, 0.072343f, -0.039980f, - 0.023334f, -0.392397f, 0.164450f, -0.201650f, -0.519754f, -0.023352f, - -4.559466f, -0.115996f, 0.135844f, 0.152599f, -0.111570f, 1.870310f, - 0.003522f, 1.893098f, -0.134055f, 1.850787f, 0.085160f, -2.203354f, - 0.380799f, -0.074047f, 0.023760f, 0.077310f, 0.273381f, -1.163135f, - -0.024976f, 0.093252f, 0.011445f, -0.129009f, -2.200677f, -0.013703f, - -1.964109f, -0.027246f, -2.135679f, 0.049465f, -3.879032f, 0.195114f, - -0.018085f, 0.016755f, 0.036330f, 0.169138f, 0.003548f, -0.028565f, - -0.178196f, -0.020577f, -0.104330f, -0.270961f, -0.282822f, -0.228735f, - -0.292561f, 0.271648f, 0.129171f, 0.376168f, -0.265005f, -0.093002f, - -0.185514f, 0.025598f, 0.055265f, -0.212784f, -0.249005f, 0.051507f, - -0.267868f, 0.162227f, -0.237365f, 0.267479f, -0.051543f, -0.288800f, - -0.246119f, 0.216296f, 0.226888f, -0.123005f, 0.068040f, -0.096630f, - -0.100500f, 0.161640f, -0.349187f, -0.061229f, 0.042915f, 0.024949f, - -0.083086f, -0.407249f, -0.428306f, -0.381137f, -0.508822f, 0.354796f, - -0.612346f, -0.230076f, -0.734103f, -0.550571f, -0.318788f, -0.300091f, - -0.336045f, -0.494406f, -0.206900f, 0.079942f, 0.149065f, -0.533360f, - 0.940431f, -0.078860f, 1.418633f, -0.117527f, 1.349170f, 0.242658f, - 0.559328f, 0.258770f, -0.014508f, -0.204775f, -0.292631f, 0.498345f, - -0.274918f, 0.051670f, 0.157748f, -0.179721f, -0.183330f, -0.393550f, - -0.208848f, 0.060742f, -0.159654f, 0.047757f, -0.400256f, -0.084606f, - -0.080619f, -0.359664f, -0.078305f, -0.455653f, 0.227624f, -0.385606f, - -0.060326f, -0.209831f, -0.077008f, 0.148862f, 0.209908f, 0.047655f, - -0.342292f, -0.088375f, -0.115465f, 0.082700f, 0.036465f, -0.001792f, - -0.285730f, 0.114632f, 0.239254f, -0.348543f, 0.044916f, -0.299003f, - -0.244756f, -0.180802f, 0.314253f, -0.127788f, -0.221512f, 0.034787f, - -0.208388f, 0.349156f, 0.265975f, -0.068335f, 0.261372f, 0.146705f, - -0.098729f, 0.293699f, -0.111342f, 0.207402f, -0.038772f, 0.124135f, - -0.237450f, -0.191511f, -0.052240f, -0.237151f, 0.005013f, 0.139441f, - -0.153634f, -0.021596f, -0.036220f, -0.077873f, -0.085995f, -0.254555f, - -0.204382f, -0.082362f, 0.941796f, 0.253800f, -0.957468f, 0.095795f, - 0.122046f, -0.310364f, 0.087301f, 0.012704f, 0.193265f, -0.058303f, - 0.250452f, 0.835269f, 0.507383f, 0.109957f, -0.145028f, -0.114419f, - -0.225618f, 0.132387f, -0.063335f, -0.325776f, -0.346173f, -0.006653f, - -0.133534f, -0.085549f, -0.050177f, 0.173103f, 0.025421f, 0.105512f, - 0.258036f, 0.153116f, 0.290202f, -0.333699f, -0.072405f, -0.124069f, - -0.241933f, -0.313318f, 0.013623f, -0.237440f, -0.232228f, -0.170850f, - -0.039212f, 0.162468f, -0.330162f, -0.218462f, -0.287064f, -0.181673f, - -0.161059f, 0.024664f, -0.108642f, -0.231707f, 0.217994f, -1.128878f, - 0.093010f, 0.101513f, 0.055895f, -0.354538f, 0.844174f, 0.254335f, - 1.920298f, -0.230777f, 0.798144f, 0.206425f, 0.580655f, -0.177645f, - -0.412061f, 0.112629f, -0.476438f, 0.209436f, -}; - -static const float vp9_rect_part_nn_bias_64_layer0[32] = { - 0.000000f, 0.345406f, -0.499542f, -1.718246f, -0.147443f, -0.408843f, - -0.008997f, -0.107946f, 2.117510f, 0.000000f, -0.141830f, -0.049079f, - 0.000000f, -1.331136f, -1.417843f, -0.485054f, -0.100856f, -0.230750f, - -2.574372f, 2.310627f, -0.030363f, 0.000000f, -0.310119f, -1.314316f, - -0.108766f, -0.107918f, 0.000000f, 0.000000f, 0.093643f, 0.000000f, - 0.000000f, -0.902343f, -}; - -static const float vp9_rect_part_nn_weights_64_layer1[32 * LABELS] = { - 0.404567f, 1.168492f, 0.051714f, 0.827941f, 0.135334f, 0.456922f, - -0.370524f, 0.062865f, -3.076300f, -0.290613f, 0.280029f, -0.101778f, - 0.250216f, 0.347721f, 0.466400f, 0.030845f, 0.114570f, 0.089456f, - 1.519938f, -3.493788f, 0.264212f, -0.109125f, 0.306644f, 0.368206f, - -0.052168f, -0.229630f, -0.339932f, -0.080472f, 0.319845f, 0.143818f, - -0.172595f, 0.372777f, -0.082072f, -0.505781f, -0.288321f, -0.473028f, - -0.027567f, -0.034329f, -0.291965f, -0.063262f, 1.721741f, 0.118914f, - 0.183681f, 0.041611f, 0.266371f, 0.005896f, -0.484705f, 0.665535f, - -0.240945f, -0.017963f, -1.409440f, 2.031976f, 0.240327f, -0.116604f, - 0.273245f, -0.170570f, -0.085491f, -0.340315f, -0.209651f, -0.217460f, - -0.249373f, 0.009193f, 0.009467f, -0.272909f, 0.308472f, -0.551173f, - 0.168374f, -0.583229f, 0.140082f, -0.585715f, -0.010929f, 0.159779f, - 1.438104f, 0.293111f, -0.053339f, -0.101828f, -0.280573f, -0.211265f, - -0.323605f, -0.540908f, 0.101366f, -0.005288f, -1.517046f, 2.078767f, - 0.215597f, 0.144012f, 0.315888f, -0.251324f, 0.150482f, -0.137871f, - 0.235116f, -0.194202f, -0.153475f, -0.312384f, -0.375510f, 0.336488f, - -0.379837f, -1.004979f, -0.312587f, -0.406174f, 0.154290f, -0.539766f, - -0.230074f, 0.303564f, 0.719439f, -0.235108f, -0.204978f, 0.399229f, - 0.290222f, -0.278713f, -0.667069f, -0.420550f, 0.164893f, -0.459689f, - -1.035368f, 0.818909f, 0.275137f, -0.291006f, -0.061505f, 0.052737f, - -0.084871f, -0.348335f, 0.312544f, 0.120753f, -0.707222f, -0.010050f, - -0.137148f, -0.351765f, +#undef NODES + +#define NODES 24 +static const float vp9_rect_part_nn_weights_64_layer0[FEATURES * NODES] = { + 0.024671f, -0.220610f, -0.284362f, -0.069556f, -0.315700f, 0.187861f, + 0.139782f, 0.063110f, 0.796561f, 0.172868f, -0.662194f, -1.393074f, + 0.085003f, 0.393381f, 0.358477f, -0.187268f, -0.370745f, 0.218287f, + 0.027271f, -0.254089f, -0.048236f, -0.459137f, 0.253171f, 0.122598f, + -0.550107f, -0.568456f, 0.159866f, -0.246534f, 0.096384f, -0.255460f, + 0.077864f, -0.334837f, 0.026921f, -0.697252f, 0.345262f, 1.343578f, + 0.815984f, 1.118211f, 1.574016f, 0.578476f, -0.285967f, -0.508672f, + 0.118137f, 0.037695f, 1.540510f, 1.256648f, 1.163819f, 1.172027f, + 0.661551f, -0.111980f, -0.434204f, -0.894217f, 0.570524f, 0.050292f, + -0.113680f, 0.000784f, -0.211554f, -0.369394f, 0.158306f, -0.512505f, + -0.238696f, 0.091498f, -0.448490f, -0.491268f, -0.353112f, -0.303315f, + -0.428438f, 0.127998f, -0.406790f, -0.401786f, -0.279888f, -0.384223f, + 0.026100f, 0.041621f, -0.315818f, -0.087888f, 0.353497f, 0.163123f, + -0.380128f, -0.090334f, -0.216647f, -0.117849f, -0.173502f, 0.301871f, + 0.070854f, 0.114627f, -0.050545f, -0.160381f, 0.595294f, 0.492696f, + -0.453858f, -1.154139f, 0.126000f, 0.034550f, 0.456665f, -0.236618f, + -0.112640f, 0.050759f, -0.449162f, 0.110059f, 0.147116f, 0.249358f, + -0.049894f, 0.063351f, -0.004467f, 0.057242f, -0.482015f, -0.174335f, + -0.085617f, -0.333808f, -0.358440f, -0.069006f, 0.099260f, -1.243430f, + -0.052963f, 0.112088f, -2.661115f, -2.445893f, -2.688174f, -2.624232f, + 0.030494f, 0.161311f, 0.012136f, 0.207564f, -2.776856f, -2.791940f, + -2.623962f, -2.918820f, 1.231619f, -0.376692f, -0.698078f, 0.110336f, + -0.285378f, 0.258367f, -0.180159f, -0.376608f, -0.034348f, -0.130206f, + 0.160020f, 0.852977f, 0.580573f, 1.450782f, 1.357596f, 0.787382f, + -0.544004f, -0.014795f, 0.032121f, -0.557696f, 0.159994f, -0.540908f, + 0.180380f, -0.398045f, 0.705095f, 0.515103f, -0.511521f, -1.271374f, + -0.231019f, 0.423647f, 0.064907f, -0.255338f, -0.877748f, -0.667205f, + 0.267847f, 0.135229f, 0.617844f, 1.349849f, 1.012623f, 0.730506f, + -0.078571f, 0.058401f, 0.053221f, -2.426146f, -0.098808f, -0.138508f, + -0.153299f, 0.149116f, -0.444243f, 0.301807f, 0.065066f, 0.092929f, + -0.372784f, -0.095540f, 0.192269f, 0.237894f, 0.080228f, -0.214074f, + -0.011426f, -2.352367f, -0.085394f, -0.190361f, -0.001177f, 0.089197f, +}; + +static const float vp9_rect_part_nn_bias_64_layer0[NODES] = { + 0.000000f, -0.057652f, -0.175413f, -0.175389f, -1.084097f, -1.423801f, + -0.076307f, -0.193803f, 0.000000f, -0.066474f, -0.050318f, -0.019832f, + -0.038814f, -0.144184f, 2.652451f, 2.415006f, 0.197464f, -0.729842f, + -0.173774f, 0.239171f, 0.486425f, 2.463304f, -0.175279f, 2.352637f, +}; + +static const float vp9_rect_part_nn_weights_64_layer1[NODES * LABELS] = { + -0.063237f, 1.925696f, -0.182145f, -0.226687f, 0.602941f, -0.941140f, + 0.814598f, -0.117063f, 0.282988f, 0.066369f, 0.096951f, 1.049735f, + -0.188188f, -0.281227f, -4.836746f, -5.047797f, 0.892358f, 0.417145f, + -0.279849f, 1.335945f, 0.660338f, -2.757938f, -0.115714f, -1.862183f, + -0.045980f, -1.597624f, -0.586822f, -0.615589f, -0.330537f, 1.068496f, + -0.167290f, 0.141290f, -0.112100f, 0.232761f, 0.252307f, -0.399653f, + 0.353118f, 0.241583f, 2.635241f, 4.026119f, -1.137327f, -0.052446f, + -0.139814f, -1.104256f, -0.759391f, 2.508457f, -0.526297f, 2.095348f, + -0.444473f, -1.090452f, 0.584122f, 0.468729f, -0.368865f, 1.041425f, + -1.079504f, 0.348837f, 0.390091f, 0.416191f, 0.212906f, -0.660255f, + 0.053630f, 0.209476f, 3.595525f, 2.257293f, -0.514030f, 0.074203f, + -0.375862f, -1.998307f, -0.930310f, 1.866686f, -0.247137f, 1.087789f, + 0.100186f, 0.298150f, 0.165265f, 0.050478f, 0.249167f, 0.371789f, + -0.294497f, 0.202954f, 0.037310f, 0.193159f, 0.161551f, 0.301597f, + 0.299286f, 0.185946f, 0.822976f, 2.066130f, -1.724588f, 0.055977f, + -0.330747f, -0.067747f, -0.475801f, 1.555958f, -0.025808f, -0.081516f, }; static const float vp9_rect_part_nn_bias_64_layer1[LABELS] = { - -0.926768f, - 0.765832f, - 0.663683f, - -0.621865f, + -0.090723f, + 0.894968f, + 0.844754f, + -3.496194f, }; static const NN_CONFIG vp9_rect_part_nnconfig_64 = { @@ -687,7 +457,7 @@ static const NN_CONFIG vp9_rect_part_nnconfig_64 = { LABELS, // num_outputs 1, // num_hidden_layers { - 32, + NODES, }, // num_hidden_nodes { vp9_rect_part_nn_weights_64_layer0, @@ -700,6 +470,7 @@ static const NN_CONFIG vp9_rect_part_nnconfig_64 = { }; #undef FEATURES #undef LABELS +#undef NODES #define FEATURES 7 // Partition pruning model(neural nets). @@ -836,7 +607,6 @@ static const NN_CONFIG vp9_partition_nnconfig_16x16 = { }; #undef FEATURES -#if CONFIG_ML_VAR_PARTITION #define FEATURES 6 static const float vp9_var_part_nn_weights_64_layer0[FEATURES * 8] = { -0.249572f, 0.205532f, -2.175608f, 1.094836f, -2.986370f, 0.193160f, @@ -964,177 +734,210 @@ static const NN_CONFIG vp9_var_part_nnconfig_16 = { }, }; #undef FEATURES -#endif // CONFIG_ML_VAR_PARTITION -#define FEATURES 6 +#define FEATURES 12 #define LABELS 1 -static const float vp9_var_rd_part_nn_weights_64_layer0[FEATURES * 8] = { - -0.100129f, 0.128867f, -1.375086f, -2.268096f, -1.470368f, -2.296274f, - 0.034445f, -0.062993f, -2.151904f, 0.523215f, 1.611269f, 1.530051f, - 0.418182f, -1.330239f, 0.828388f, 0.386546f, -0.026188f, -0.055459f, - -0.474437f, 0.861295f, -2.208743f, -0.652991f, -2.985873f, -1.728956f, - 0.388052f, -0.420720f, 2.015495f, 1.280342f, 3.040914f, 1.760749f, - -0.009062f, 0.009623f, 1.579270f, -2.012891f, 1.629662f, -1.796016f, - -0.279782f, -0.288359f, 1.875618f, 1.639855f, 0.903020f, 0.906438f, - 0.553394f, -1.621589f, 0.185063f, 0.605207f, -0.133560f, 0.588689f, +#define NODES 8 +static const float vp9_part_split_nn_weights_64_layer0[FEATURES * NODES] = { + -0.609728f, -0.409099f, -0.472449f, 0.183769f, -0.457740f, 0.081089f, + 0.171003f, 0.578696f, -0.019043f, -0.856142f, 0.557369f, -1.779424f, + -0.274044f, -0.320632f, -0.392531f, -0.359462f, -0.404106f, -0.288357f, + 0.200620f, 0.038013f, -0.430093f, 0.235083f, -0.487442f, 0.424814f, + -0.232758f, -0.442943f, 0.229397f, -0.540301f, -0.648421f, -0.649747f, + -0.171638f, 0.603824f, 0.468497f, -0.421580f, 0.178840f, -0.533838f, + -0.029471f, -0.076296f, 0.197426f, -0.187908f, -0.003950f, -0.065740f, + 0.085165f, -0.039674f, -5.640702f, 1.909538f, -1.434604f, 3.294606f, + -0.788812f, 0.196864f, 0.057012f, -0.019757f, 0.336233f, 0.075378f, + 0.081503f, 0.491864f, -1.899470f, -1.764173f, -1.888137f, -1.762343f, + 0.845542f, 0.202285f, 0.381948f, -0.150996f, 0.556893f, -0.305354f, + 0.561482f, -0.021974f, -0.703117f, 0.268638f, -0.665736f, 1.191005f, + -0.081568f, -0.115653f, 0.272029f, -0.140074f, 0.072683f, 0.092651f, + -0.472287f, -0.055790f, -0.434425f, 0.352055f, 0.048246f, 0.372865f, + 0.111499f, -0.338304f, 0.739133f, 0.156519f, -0.594644f, 0.137295f, + 0.613350f, -0.165102f, -1.003731f, 0.043070f, -0.887896f, -0.174202f, }; -static const float vp9_var_rd_part_nn_bias_64_layer0[8] = { - 0.659717f, 0.120912f, 0.329894f, -1.586385f, - 1.715839f, 0.085754f, 2.038774f, 0.268119f, +static const float vp9_part_split_nn_bias_64_layer0[NODES] = { + 1.182714f, 0.000000f, 0.902019f, 0.953115f, + -1.372486f, -1.288740f, -0.155144f, -3.041362f, }; -static const float vp9_var_rd_part_nn_weights_64_layer1[8 * LABELS] = { - -3.445586f, 2.375620f, 1.236970f, 0.804030f, - -2.448384f, 2.827254f, 2.291478f, 0.790252f, +static const float vp9_part_split_nn_weights_64_layer1[NODES * LABELS] = { + 0.841214f, 0.456016f, 0.869270f, 1.692999f, + -1.700494f, -0.911761f, 0.030111f, -1.447548f, }; -static const float vp9_var_rd_part_nn_bias_64_layer1[LABELS] = { - -1.16608453f, +static const float vp9_part_split_nn_bias_64_layer1[LABELS] = { + 1.17782545f, }; -static const NN_CONFIG vp9_var_rd_part_nnconfig_64 = { +static const NN_CONFIG vp9_part_split_nnconfig_64 = { FEATURES, // num_inputs LABELS, // num_outputs 1, // num_hidden_layers { - 8, + NODES, }, // num_hidden_nodes { - vp9_var_rd_part_nn_weights_64_layer0, - vp9_var_rd_part_nn_weights_64_layer1, + vp9_part_split_nn_weights_64_layer0, + vp9_part_split_nn_weights_64_layer1, }, { - vp9_var_rd_part_nn_bias_64_layer0, - vp9_var_rd_part_nn_bias_64_layer1, + vp9_part_split_nn_bias_64_layer0, + vp9_part_split_nn_bias_64_layer1, }, }; -static const float vp9_var_rd_part_nn_weights_32_layer0[FEATURES * 8] = { - 0.022420f, -0.032201f, 1.228065f, -2.767655f, 1.928743f, 0.566863f, - 0.459229f, 0.422048f, 0.833395f, 0.822960f, -0.232227f, 0.586895f, - 0.442856f, -0.018564f, 0.227672f, -1.291306f, 0.119428f, -0.776563f, - -0.042947f, 0.183129f, 0.592231f, 1.174859f, -0.503868f, 0.270102f, - -0.330537f, -0.036340f, 1.144630f, 1.783710f, 1.216929f, 2.038085f, - 0.373782f, -0.430258f, 1.957002f, 1.383908f, 2.012261f, 1.585693f, - -0.394399f, -0.337523f, -0.238335f, 0.007819f, -0.368294f, 0.437875f, - -0.318923f, -0.242000f, 2.276263f, 1.501432f, 0.645706f, 0.344774f, +static const float vp9_part_split_nn_weights_32_layer0[FEATURES * NODES] = { + -0.105488f, -0.218662f, 0.010980f, -0.226979f, 0.028076f, 0.743430f, + 0.789266f, 0.031907f, -1.464200f, 0.222336f, -1.068493f, -0.052712f, + -0.176181f, -0.102654f, -0.973932f, -0.182637f, -0.198000f, 0.335977f, + 0.271346f, 0.133005f, 1.674203f, 0.689567f, 0.657133f, 0.283524f, + 0.115529f, 0.738327f, 0.317184f, -0.179736f, 0.403691f, 0.679350f, + 0.048925f, 0.271338f, -1.538921f, -0.900737f, -1.377845f, 0.084245f, + 0.803122f, -0.107806f, 0.103045f, -0.023335f, -0.098116f, -0.127809f, + 0.037665f, -0.523225f, 1.622185f, 1.903999f, 1.358889f, 1.680785f, + 0.027743f, 0.117906f, -0.158810f, 0.057775f, 0.168257f, 0.062414f, + 0.086228f, -0.087381f, -3.066082f, 3.021855f, -4.092155f, 2.550104f, + -0.230022f, -0.207445f, -0.000347f, 0.034042f, 0.097057f, 0.220088f, + -0.228841f, -0.029405f, -1.507174f, -1.455184f, 2.624904f, 2.643355f, + 0.319912f, 0.585531f, -1.018225f, -0.699606f, 1.026490f, 0.169952f, + -0.093579f, -0.142352f, -0.107256f, 0.059598f, 0.043190f, 0.507543f, + -0.138617f, 0.030197f, 0.059574f, -0.634051f, -0.586724f, -0.148020f, + -0.334380f, 0.459547f, 1.620600f, 0.496850f, 0.639480f, -0.465715f, }; -static const float vp9_var_rd_part_nn_bias_32_layer0[8] = { - -0.023846f, -1.348117f, 1.365007f, -1.644164f, - 0.062992f, 1.257980f, -0.098642f, 1.388472f, +static const float vp9_part_split_nn_bias_32_layer0[NODES] = { + -1.125885f, 0.753197f, -0.825808f, 0.004839f, + 0.583920f, 0.718062f, 0.976741f, 0.796188f, }; -static const float vp9_var_rd_part_nn_weights_32_layer1[8 * LABELS] = { - 3.016729f, 0.622684f, -1.021302f, 1.490383f, - 1.702046f, -2.964618f, 0.689045f, 1.711754f, +static const float vp9_part_split_nn_weights_32_layer1[NODES * LABELS] = { + -0.458745f, 0.724624f, -0.479720f, -2.199872f, + 1.162661f, 1.194153f, -0.716896f, 0.824080f, }; -static const float vp9_var_rd_part_nn_bias_32_layer1[LABELS] = { - -1.28798676f, +static const float vp9_part_split_nn_bias_32_layer1[LABELS] = { + 0.71644074f, }; -static const NN_CONFIG vp9_var_rd_part_nnconfig_32 = { +static const NN_CONFIG vp9_part_split_nnconfig_32 = { FEATURES, // num_inputs LABELS, // num_outputs 1, // num_hidden_layers { - 8, + NODES, }, // num_hidden_nodes { - vp9_var_rd_part_nn_weights_32_layer0, - vp9_var_rd_part_nn_weights_32_layer1, + vp9_part_split_nn_weights_32_layer0, + vp9_part_split_nn_weights_32_layer1, }, { - vp9_var_rd_part_nn_bias_32_layer0, - vp9_var_rd_part_nn_bias_32_layer1, + vp9_part_split_nn_bias_32_layer0, + vp9_part_split_nn_bias_32_layer1, }, }; -static const float vp9_var_rd_part_nn_weights_16_layer0[FEATURES * 8] = { - -0.726813f, -0.026748f, 1.376946f, 1.467961f, 1.961810f, 1.690412f, - 0.596484f, -0.261486f, -0.310905f, -0.366311f, -1.300086f, -0.534336f, - 0.040520f, -0.032391f, -1.194214f, 2.438063f, -3.915334f, 1.997270f, - 0.673696f, -0.676393f, 1.654886f, 1.553838f, 1.129691f, 1.360201f, - 0.255001f, 0.336442f, -0.487759f, -0.634555f, 0.479170f, -0.110475f, - -0.661852f, -0.158872f, -0.350243f, -0.303957f, -0.045018f, 0.586151f, - -0.262463f, 0.228079f, -1.688776f, -1.594502f, -2.261078f, -1.802535f, - 0.034748f, -0.028476f, 2.713258f, 0.212446f, -1.529202f, -2.560178f, +static const float vp9_part_split_nn_weights_16_layer0[FEATURES * NODES] = { + -0.003629f, -0.046852f, 0.220428f, -0.033042f, 0.049365f, 0.112818f, + -0.306149f, -0.005872f, 1.066947f, -2.290226f, 2.159505f, -0.618714f, + -0.213294f, 0.451372f, -0.199459f, 0.223730f, -0.321709f, 0.063364f, + 0.148704f, -0.293371f, 0.077225f, -0.421947f, -0.515543f, -0.240975f, + -0.418516f, 1.036523f, -0.009165f, 0.032484f, 1.086549f, 0.220322f, + -0.247585f, -0.221232f, -0.225050f, 0.993051f, 0.285907f, 1.308846f, + 0.707456f, 0.335152f, 0.234556f, 0.264590f, -0.078033f, 0.542226f, + 0.057777f, 0.163471f, 0.039245f, -0.725960f, 0.963780f, -0.972001f, + 0.252237f, -0.192745f, -0.836571f, -0.460539f, -0.528713f, -0.160198f, + -0.621108f, 0.486405f, -0.221923f, 1.519426f, -0.857871f, 0.411595f, + 0.947188f, 0.203339f, 0.174526f, 0.016382f, 0.256879f, 0.049818f, + 0.057836f, -0.659096f, 0.459894f, 0.174695f, 0.379359f, 0.062530f, + -0.210201f, -0.355788f, -0.208432f, -0.401723f, -0.115373f, 0.191336f, + -0.109342f, 0.002455f, -0.078746f, -0.391871f, 0.149892f, -0.239615f, + -0.520709f, 0.118568f, -0.437975f, 0.118116f, -0.565426f, -0.206446f, + 0.113407f, 0.558894f, 0.534627f, 1.154350f, -0.116833f, 1.723311f, }; -static const float vp9_var_rd_part_nn_bias_16_layer0[8] = { - 0.495983f, 1.858545f, 0.162974f, 1.992247f, - -2.698863f, 0.110020f, 0.550830f, 0.420941f, +static const float vp9_part_split_nn_bias_16_layer0[NODES] = { + 0.013109f, -0.034341f, 0.679845f, -0.035781f, + -0.104183f, 0.098055f, -0.041130f, 0.160107f, }; -static const float vp9_var_rd_part_nn_weights_16_layer1[8 * LABELS] = { - 1.768409f, -1.394240f, 1.076846f, -1.762808f, - 1.517405f, 0.535195f, -0.426827f, 1.002272f, +static const float vp9_part_split_nn_weights_16_layer1[NODES * LABELS] = { + 1.499564f, -0.403259f, 1.366532f, -0.469868f, + 0.482227f, -2.076697f, 0.527691f, 0.540495f, }; -static const float vp9_var_rd_part_nn_bias_16_layer1[LABELS] = { - -1.65894794f, +static const float vp9_part_split_nn_bias_16_layer1[LABELS] = { + 0.01134653f, }; -static const NN_CONFIG vp9_var_rd_part_nnconfig_16 = { +static const NN_CONFIG vp9_part_split_nnconfig_16 = { FEATURES, // num_inputs LABELS, // num_outputs 1, // num_hidden_layers { - 8, + NODES, }, // num_hidden_nodes { - vp9_var_rd_part_nn_weights_16_layer0, - vp9_var_rd_part_nn_weights_16_layer1, + vp9_part_split_nn_weights_16_layer0, + vp9_part_split_nn_weights_16_layer1, }, { - vp9_var_rd_part_nn_bias_16_layer0, - vp9_var_rd_part_nn_bias_16_layer1, + vp9_part_split_nn_bias_16_layer0, + vp9_part_split_nn_bias_16_layer1, }, }; -static const float vp9_var_rd_part_nn_weights_8_layer0[FEATURES * 8] = { - -0.804900f, -1.214983f, 0.840202f, 0.686566f, 0.155804f, 0.025542f, - -1.244635f, -0.368403f, 0.364150f, 1.081073f, 0.552387f, 0.452715f, - 0.652968f, -0.293058f, 0.048967f, 0.021240f, -0.662981f, 0.424700f, - 0.008293f, -0.013088f, 0.747007f, -1.453907f, -1.498226f, 1.593252f, - -0.239557f, -0.143766f, 0.064311f, 1.320998f, -0.477411f, 0.026374f, - 0.730884f, -0.675124f, 0.965521f, 0.863658f, 0.809186f, 0.812280f, - 0.513131f, 0.185102f, 0.211354f, 0.793666f, 0.121714f, -0.015383f, - -0.650980f, -0.046581f, 0.911141f, 0.806319f, 0.974773f, 0.815893f, +static const float vp9_part_split_nn_weights_8_layer0[FEATURES * NODES] = { + -0.668875f, -0.159078f, -0.062663f, -0.483785f, -0.146814f, -0.608975f, + -0.589145f, 0.203704f, -0.051007f, -0.113769f, -0.477511f, -0.122603f, + -1.329890f, 1.403386f, 0.199636f, -0.161139f, 2.182090f, -0.014307f, + 0.015755f, -0.208468f, 0.884353f, 0.815920f, 0.632464f, 0.838225f, + 1.369483f, -0.029068f, 0.570213f, -0.573546f, 0.029617f, 0.562054f, + -0.653093f, -0.211910f, -0.661013f, -0.384418f, -0.574038f, -0.510069f, + 0.173047f, -0.274231f, -1.044008f, -0.422040f, -0.810296f, 0.144069f, + -0.406704f, 0.411230f, -0.144023f, 0.745651f, -0.595091f, 0.111787f, + 0.840651f, 0.030123f, -0.242155f, 0.101486f, -0.017889f, -0.254467f, + -0.285407f, -0.076675f, -0.549542f, -0.013544f, -0.686566f, -0.755150f, + 1.623949f, -0.286369f, 0.170976f, 0.016442f, -0.598353f, -0.038540f, + 0.202597f, -0.933582f, 0.599510f, 0.362273f, 0.577722f, 0.477603f, + 0.767097f, 0.431532f, 0.457034f, 0.223279f, 0.381349f, 0.033777f, + 0.423923f, -0.664762f, 0.385662f, 0.075744f, 0.182681f, 0.024118f, + 0.319408f, -0.528864f, 0.976537f, -0.305971f, -0.189380f, -0.241689f, + -1.318092f, 0.088647f, -0.109030f, -0.945654f, 1.082797f, 0.184564f, }; -static const float vp9_var_rd_part_nn_bias_8_layer0[8] = { - 0.176134f, 0.651308f, 2.007761f, 0.068812f, - 1.061517f, 1.487161f, -2.308147f, 1.099828f, +static const float vp9_part_split_nn_bias_8_layer0[NODES] = { + -0.237472f, 2.051396f, 0.297062f, -0.730194f, + 0.060472f, -0.565959f, 0.560869f, -0.395448f, }; -static const float vp9_var_rd_part_nn_weights_8_layer1[8 * LABELS] = { - 0.683032f, 1.326393f, -1.661539f, 1.438920f, - 1.118023f, -2.237380f, 1.518468f, 2.010416f, +static const float vp9_part_split_nn_weights_8_layer1[NODES * LABELS] = { + 0.568121f, 1.575915f, -0.544309f, 0.751595f, + -0.117911f, -1.340730f, -0.739671f, 0.661216f, }; -static const float vp9_var_rd_part_nn_bias_8_layer1[LABELS] = { - -1.65423989f, +static const float vp9_part_split_nn_bias_8_layer1[LABELS] = { + -0.63375306f, }; -static const NN_CONFIG vp9_var_rd_part_nnconfig_8 = { +static const NN_CONFIG vp9_part_split_nnconfig_8 = { FEATURES, // num_inputs LABELS, // num_outputs 1, // num_hidden_layers { - 8, + NODES, }, // num_hidden_nodes { - vp9_var_rd_part_nn_weights_8_layer0, - vp9_var_rd_part_nn_weights_8_layer1, + vp9_part_split_nn_weights_8_layer0, + vp9_part_split_nn_weights_8_layer1, }, { - vp9_var_rd_part_nn_bias_8_layer0, - vp9_var_rd_part_nn_bias_8_layer1, + vp9_part_split_nn_bias_8_layer0, + vp9_part_split_nn_bias_8_layer1, }, }; +#undef NODES #undef FEATURES #undef LABELS diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c index a3240513f15..fcdcdf70d8e 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c @@ -717,9 +717,19 @@ static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc, // The max tx_size passed in is TX_16X16. assert(tx_size != TX_32X32); - +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + vpx_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, + p->src.stride, pd->dst.buf, pd->dst.stride, + x->e_mbd.bd); + } else { + vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride, + pd->dst.buf, pd->dst.stride); + } +#else vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride); +#endif *skippable = 1; // Keep track of the row and column of the blocks we use so that we know // if we are in the unrestricted motion border. @@ -1683,6 +1693,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, unsigned int sse_zeromv_normalized = UINT_MAX; unsigned int best_sse_sofar = UINT_MAX; int gf_temporal_ref = 0; + int force_test_gf_zeromv = 0; #if CONFIG_VP9_TEMPORAL_DENOISING VP9_PICKMODE_CTX_DEN ctx_den; int64_t zero_last_cost_orig = INT64_MAX; @@ -1698,6 +1709,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, int svc_mv_row = 0; int no_scaling = 0; unsigned int thresh_svc_skip_golden = 500; + unsigned int thresh_skip_golden = 500; int scene_change_detected = cpi->rc.high_source_sad || (cpi->use_svc && cpi->svc.high_source_sad_superframe); @@ -1811,7 +1823,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, x->source_variance = vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); - if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && mi->segment_id > 0 && + if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && + cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && mi->segment_id > 0 && x->zero_temp_sad_source && x->source_variance == 0) { mi->segment_id = 0; vp9_init_plane_quantizers(cpi, x); @@ -1939,6 +1952,14 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, flag_svc_subpel = 1; } + // For SVC with quality layers, when QP of lower layer is lower + // than current layer: force check of GF-ZEROMV before early exit + // due to skip flag. + if (svc->spatial_layer_id > 0 && no_scaling && + (cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) && + cm->base_qindex > svc->lower_layer_qindex + 10) + force_test_gf_zeromv = 1; + for (idx = 0; idx < num_inter_modes + comp_modes; ++idx) { int rate_mv = 0; int mode_rd_thresh; @@ -2009,17 +2030,22 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME)) continue; } - // For SVC, skip the golden (spatial) reference search if sse of zeromv_last - // is below threshold. - if (cpi->use_svc && ref_frame == GOLDEN_FRAME && - sse_zeromv_normalized < thresh_svc_skip_golden) + // For CBR mode: skip the golden reference search if sse of zeromv_last is + // below threshold. + if (ref_frame == GOLDEN_FRAME && cpi->oxcf.rc_mode == VPX_CBR && + ((cpi->use_svc && sse_zeromv_normalized < thresh_svc_skip_golden) || + (!cpi->use_svc && sse_zeromv_normalized < thresh_skip_golden))) continue; if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue; - if (sf->short_circuit_flat_blocks && x->source_variance == 0 && - (frame_mv[this_mode][ref_frame].as_int != 0 || - (cpi->oxcf.content == VP9E_CONTENT_SCREEN && !svc->spatial_layer_id && + // For screen content on flat blocks: skip non-zero motion check for + // stationary blocks, only skip zero motion check for non-stationary blocks. + if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && + sf->short_circuit_flat_blocks && x->source_variance == 0 && + ((frame_mv[this_mode][ref_frame].as_int != 0 && + x->zero_temp_sad_source) || + (frame_mv[this_mode][ref_frame].as_int == 0 && !x->zero_temp_sad_source))) { continue; } @@ -2231,7 +2257,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, &var_y, &sse_y); } // Save normalized sse (between current and last frame) for (0, 0) motion. - if (cpi->use_svc && ref_frame == LAST_FRAME && + if (ref_frame == LAST_FRAME && frame_mv[this_mode][ref_frame].as_int == 0) { sse_zeromv_normalized = sse_y >> (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]); @@ -2349,11 +2375,14 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, if (reuse_inter_pred) free_pred_buffer(this_mode_pred); } - if (x->skip) break; + if (x->skip && + (!force_test_gf_zeromv || mode_checked[ZEROMV][GOLDEN_FRAME])) + break; // If early termination flag is 1 and at least 2 modes are checked, // the mode search is terminated. - if (best_early_term && idx > 0 && !scene_change_detected) { + if (best_early_term && idx > 0 && !scene_change_detected && + (!force_test_gf_zeromv || mode_checked[ZEROMV][GOLDEN_FRAME])) { x->skip = 1; break; } @@ -2396,6 +2425,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, // Perform intra prediction search, if the best SAD is above a certain // threshold. if (best_rdc.rdcost == INT64_MAX || + (cpi->oxcf.content == VP9E_CONTENT_SCREEN && x->source_variance == 0) || (scene_change_detected && perform_intra_pred) || ((!force_skip_low_temp_var || bsize < BLOCK_32X32 || x->content_state_sb == kVeryHighSad) && @@ -2438,8 +2468,11 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, const PREDICTION_MODE this_mode = intra_mode_list[i]; THR_MODES mode_index = mode_idx[INTRA_FRAME][mode_offset(this_mode)]; int mode_rd_thresh = rd_threshes[mode_index]; + // For spatially flat blocks, under short_circuit_flat_blocks flag: + // only check DC mode for stationary blocks, otherwise also check + // H and V mode. if (sf->short_circuit_flat_blocks && x->source_variance == 0 && - this_mode != DC_PRED) { + ((x->zero_temp_sad_source && this_mode != DC_PRED) || i > 2)) { continue; } diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c index 9df2eb333bf..152efa7e072 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c @@ -668,7 +668,7 @@ static int adjust_q_cbr(const VP9_COMP *cpi, int q) { } if (cpi->oxcf.content == VP9E_CONTENT_SCREEN) vp9_cyclic_refresh_limit_q(cpi, &q); - return q; + return VPXMAX(VPXMIN(q, cpi->rc.worst_quality), cpi->rc.best_quality); } static double get_rate_correction_factor(const VP9_COMP *cpi) { @@ -1076,6 +1076,7 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi, q = *top_index; } } + assert(*top_index <= rc->worst_quality && *top_index >= rc->best_quality); assert(*bottom_index <= rc->worst_quality && *bottom_index >= rc->best_quality); @@ -2271,7 +2272,7 @@ void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) { RATE_CONTROL *const rc = &cpi->rc; int target; if ((cm->current_video_frame == 0) || (cpi->frame_flags & FRAMEFLAGS_KEY) || - rc->frames_to_key == 0) { + (cpi->oxcf.auto_key && rc->frames_to_key == 0)) { cm->frame_type = KEY_FRAME; rc->frames_to_key = cpi->oxcf.key_freq; rc->kf_boost = DEFAULT_KF_BOOST; @@ -2931,7 +2932,7 @@ void vp9_scene_detection_onepass(VP9_COMP *cpi) { } else { rc->avg_source_sad[lagframe_idx] = avg_sad; } - if (num_zero_temp_sad < (num_samples >> 1)) + if (num_zero_temp_sad < (3 * num_samples >> 2)) rc->high_num_blocks_with_motion = 1; } } diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h index fa85f2176f5..062ca3277cf 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h @@ -42,6 +42,9 @@ extern "C" { #define RD_THRESH_MAX_FACT 64 #define RD_THRESH_INC 1 +#define VP9_DIST_SCALE_LOG2 4 +#define VP9_DIST_SCALE (1 << VP9_DIST_SCALE_LOG2) + // This enumerator type needs to be kept aligned with the mode order in // const MODE_DEFINITION vp9_mode_order[MAX_MODES] used in the rd code. typedef enum { diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c index c1a079ff080..5fc3b7c05b3 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c @@ -273,9 +273,9 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x, } *skip_txfm_sb = skip_flag; - *skip_sse_sb = total_sse << 4; + *skip_sse_sb = total_sse << VP9_DIST_SCALE_LOG2; *out_rate_sum = (int)rate_sum; - *out_dist_sum = dist_sum << 4; + *out_dist_sum = dist_sum << VP9_DIST_SCALE_LOG2; } #if CONFIG_VP9_HIGHBITDEPTH @@ -2538,14 +2538,49 @@ static INLINE void restore_dst_buf(MACROBLOCKD *xd, // visual quality. static int discount_newmv_test(const VP9_COMP *cpi, int this_mode, int_mv this_mv, - int_mv (*mode_mv)[MAX_REF_FRAMES], - int ref_frame) { + int_mv (*mode_mv)[MAX_REF_FRAMES], int ref_frame, + int mi_row, int mi_col, BLOCK_SIZE bsize) { +#if CONFIG_NON_GREEDY_MV + (void)mode_mv; + (void)this_mv; + if (this_mode == NEWMV && bsize >= BLOCK_8X8 && cpi->tpl_ready) { + const int gf_group_idx = cpi->twopass.gf_group.index; + const int gf_rf_idx = ref_frame_to_gf_rf_idx(ref_frame); + const TplDepFrame tpl_frame = cpi->tpl_stats[gf_group_idx]; + const int tpl_block_mi_h = num_8x8_blocks_high_lookup[cpi->tpl_bsize]; + const int tpl_block_mi_w = num_8x8_blocks_wide_lookup[cpi->tpl_bsize]; + const int tpl_mi_row = mi_row - (mi_row % tpl_block_mi_h); + const int tpl_mi_col = mi_col - (mi_col % tpl_block_mi_w); + const int mv_mode = + tpl_frame + .mv_mode_arr[gf_rf_idx][tpl_mi_row * tpl_frame.stride + tpl_mi_col]; + if (mv_mode == NEW_MV_MODE) { + int_mv tpl_new_mv = *get_pyramid_mv(&tpl_frame, gf_rf_idx, cpi->tpl_bsize, + tpl_mi_row, tpl_mi_col); + int row_diff = abs(tpl_new_mv.as_mv.row - this_mv.as_mv.row); + int col_diff = abs(tpl_new_mv.as_mv.col - this_mv.as_mv.col); + if (VPXMAX(row_diff, col_diff) <= 8) { + return 1; + } else { + return 0; + } + } else { + return 0; + } + } else { + return 0; + } +#else + (void)mi_row; + (void)mi_col; + (void)bsize; return (!cpi->rc.is_src_frame_alt_ref && (this_mode == NEWMV) && (this_mv.as_int != 0) && ((mode_mv[NEARESTMV][ref_frame].as_int == 0) || (mode_mv[NEARESTMV][ref_frame].as_int == INVALID_MV)) && ((mode_mv[NEARMV][ref_frame].as_int == 0) || (mode_mv[NEARMV][ref_frame].as_int == INVALID_MV))); +#endif } static int64_t handle_inter_mode( @@ -2658,7 +2693,8 @@ static int64_t handle_inter_mode( // under certain circumstances where we want to help initiate a weak // motion field, where the distortion gain for a single block may not // be enough to overcome the cost of a new mv. - if (discount_newmv_test(cpi, this_mode, tmp_mv, mode_mv, refs[0])) { + if (discount_newmv_test(cpi, this_mode, tmp_mv, mode_mv, refs[0], mi_row, + mi_col, bsize)) { *rate2 += VPXMAX((rate_mv / NEW_MV_DISCOUNT_FACTOR), 1); } else { *rate2 += rate_mv; @@ -2691,8 +2727,8 @@ static int64_t handle_inter_mode( // // Under some circumstances we discount the cost of new mv mode to encourage // initiation of a motion field. - if (discount_newmv_test(cpi, this_mode, frame_mv[refs[0]], mode_mv, - refs[0])) { + if (discount_newmv_test(cpi, this_mode, frame_mv[refs[0]], mode_mv, refs[0], + mi_row, mi_col, bsize)) { *rate2 += VPXMIN(cost_mv_ref(cpi, this_mode, mbmi_ext->mode_context[refs[0]]), cost_mv_ref(cpi, NEARESTMV, mbmi_ext->mode_context[refs[0]])); @@ -2963,8 +2999,8 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost, // on the relative variance of the source and reconstruction. #define VERY_LOW_VAR_THRESH 2 #define LOW_VAR_THRESH 5 -#define VAR_MULT 100 -static unsigned int max_var_adjust[VP9E_CONTENT_INVALID] = { 16, 16, 100 }; +#define VAR_MULT 250 +static unsigned int max_var_adjust[VP9E_CONTENT_INVALID] = { 16, 16, 250 }; static void rd_variance_adjustment(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int64_t *this_rd, @@ -3034,7 +3070,7 @@ static void rd_variance_adjustment(VP9_COMP *cpi, MACROBLOCK *x, if (content_type == VP9E_CONTENT_FILM) { if (src_rec_min <= VERY_LOW_VAR_THRESH) { if (ref_frame == INTRA_FRAME) *this_rd *= 2; - if (bsize > 6) *this_rd *= 2; + if (bsize > BLOCK_16X16) *this_rd *= 2; } } } diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c index 5aede927b9f..8c59662c370 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c @@ -76,27 +76,27 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi, if (is_480p_or_larger) { // Currently, the machine-learning based partition search early termination // is only used while VPXMIN(cm->width, cm->height) >= 480 and speed = 0. - sf->ml_partition_search_early_termination = 1; + sf->rd_ml_partition.search_early_termination = 1; } else { sf->use_square_only_thresh_high = BLOCK_32X32; } if (!is_1080p_or_larger) { - sf->use_ml_partition_search_breakout = 1; + sf->rd_ml_partition.search_breakout = 1; if (is_720p_or_larger) { - sf->ml_partition_search_breakout_thresh[0] = 0.0f; - sf->ml_partition_search_breakout_thresh[1] = 0.0f; - sf->ml_partition_search_breakout_thresh[2] = 0.0f; + sf->rd_ml_partition.search_breakout_thresh[0] = 0.0f; + sf->rd_ml_partition.search_breakout_thresh[1] = 0.0f; + sf->rd_ml_partition.search_breakout_thresh[2] = 0.0f; } else { - sf->ml_partition_search_breakout_thresh[0] = 2.5f; - sf->ml_partition_search_breakout_thresh[1] = 1.5f; - sf->ml_partition_search_breakout_thresh[2] = 1.5f; + sf->rd_ml_partition.search_breakout_thresh[0] = 2.5f; + sf->rd_ml_partition.search_breakout_thresh[1] = 1.5f; + sf->rd_ml_partition.search_breakout_thresh[2] = 1.5f; } } if (speed >= 1) { - sf->ml_partition_search_early_termination = 0; - sf->use_ml_partition_search_breakout = 1; + sf->rd_ml_partition.search_early_termination = 0; + sf->rd_ml_partition.search_breakout = 1; if (is_480p_or_larger) sf->use_square_only_thresh_high = BLOCK_64X64; else @@ -106,21 +106,21 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi, sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; sf->partition_search_breakout_thr.dist = (1 << 22); - sf->ml_partition_search_breakout_thresh[0] = -5.0f; - sf->ml_partition_search_breakout_thresh[1] = -5.0f; - sf->ml_partition_search_breakout_thresh[2] = -9.0f; + sf->rd_ml_partition.search_breakout_thresh[0] = -5.0f; + sf->rd_ml_partition.search_breakout_thresh[1] = -5.0f; + sf->rd_ml_partition.search_breakout_thresh[2] = -9.0f; } else { sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; sf->partition_search_breakout_thr.dist = (1 << 21); - sf->ml_partition_search_breakout_thresh[0] = -1.0f; - sf->ml_partition_search_breakout_thresh[1] = -1.0f; - sf->ml_partition_search_breakout_thresh[2] = -1.0f; + sf->rd_ml_partition.search_breakout_thresh[0] = -1.0f; + sf->rd_ml_partition.search_breakout_thresh[1] = -1.0f; + sf->rd_ml_partition.search_breakout_thresh[2] = -1.0f; } #if CONFIG_VP9_HIGHBITDEPTH if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH) { - sf->ml_partition_search_breakout_thresh[0] -= 1.0f; - sf->ml_partition_search_breakout_thresh[1] -= 1.0f; - sf->ml_partition_search_breakout_thresh[2] -= 1.0f; + sf->rd_ml_partition.search_breakout_thresh[0] -= 1.0f; + sf->rd_ml_partition.search_breakout_thresh[1] -= 1.0f; + sf->rd_ml_partition.search_breakout_thresh[2] -= 1.0f; } #endif // CONFIG_VP9_HIGHBITDEPTH } @@ -134,14 +134,14 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi, sf->adaptive_pred_interp_filter = 0; sf->partition_search_breakout_thr.dist = (1 << 24); sf->partition_search_breakout_thr.rate = 120; - sf->use_ml_partition_search_breakout = 0; + sf->rd_ml_partition.search_breakout = 0; } else { sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; sf->partition_search_breakout_thr.dist = (1 << 22); sf->partition_search_breakout_thr.rate = 100; - sf->ml_partition_search_breakout_thresh[0] = 0.0f; - sf->ml_partition_search_breakout_thresh[1] = -1.0f; - sf->ml_partition_search_breakout_thresh[2] = -4.0f; + sf->rd_ml_partition.search_breakout_thresh[0] = 0.0f; + sf->rd_ml_partition.search_breakout_thresh[1] = -1.0f; + sf->rd_ml_partition.search_breakout_thresh[2] = -4.0f; } sf->rd_auto_partition_min_limit = set_partition_min_limit(cm); @@ -158,7 +158,7 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi, } if (speed >= 3) { - sf->use_ml_partition_search_breakout = 0; + sf->rd_ml_partition.search_breakout = 0; if (is_720p_or_larger) { sf->disable_split_mask = DISABLE_ALL_SPLIT; sf->schedule_mode_search = cm->base_qindex < 220 ? 1 : 0; @@ -215,12 +215,12 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, sf->less_rectangular_check = 1; sf->use_square_partition_only = !boosted; sf->prune_ref_frame_for_rect_partitions = 1; - sf->ml_var_partition_pruning = 1; + sf->rd_ml_partition.var_pruning = 1; - sf->ml_prune_rect_partition_threhold[0] = -1; - sf->ml_prune_rect_partition_threhold[1] = 350; - sf->ml_prune_rect_partition_threhold[2] = 325; - sf->ml_prune_rect_partition_threhold[3] = 250; + sf->rd_ml_partition.prune_rect_thresh[0] = -1; + sf->rd_ml_partition.prune_rect_thresh[1] = 350; + sf->rd_ml_partition.prune_rect_thresh[2] = 325; + sf->rd_ml_partition.prune_rect_thresh[3] = 250; if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) { sf->exhaustive_searches_thresh = (1 << 22); @@ -237,10 +237,11 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, } if (speed >= 1) { - sf->ml_var_partition_pruning = !boosted; - sf->ml_prune_rect_partition_threhold[1] = 200; - sf->ml_prune_rect_partition_threhold[2] = 200; - sf->ml_prune_rect_partition_threhold[3] = 200; + sf->temporal_filter_search_method = NSTEP; + sf->rd_ml_partition.var_pruning = !boosted; + sf->rd_ml_partition.prune_rect_thresh[1] = 225; + sf->rd_ml_partition.prune_rect_thresh[2] = 225; + sf->rd_ml_partition.prune_rect_thresh[3] = 225; if (oxcf->pass == 2) { TWO_PASS *const twopass = &cpi->twopass; @@ -283,7 +284,7 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, } if (speed >= 2) { - sf->ml_var_partition_pruning = 0; + sf->rd_ml_partition.var_pruning = 0; if (oxcf->vbr_corpus_complexity) sf->recode_loop = ALLOW_RECODE_FIRST; else @@ -307,9 +308,9 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, sf->recode_tolerance_high = 45; sf->enhanced_full_pixel_motion_search = 0; sf->prune_ref_frame_for_rect_partitions = 0; - sf->ml_prune_rect_partition_threhold[1] = -1; - sf->ml_prune_rect_partition_threhold[2] = -1; - sf->ml_prune_rect_partition_threhold[3] = -1; + sf->rd_ml_partition.prune_rect_thresh[1] = -1; + sf->rd_ml_partition.prune_rect_thresh[2] = -1; + sf->rd_ml_partition.prune_rect_thresh[3] = -1; sf->mv.subpel_search_level = 0; if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) { @@ -447,6 +448,7 @@ static void set_rt_speed_feature_framesize_independent( sf->enable_tpl_model = 0; sf->enhanced_full_pixel_motion_search = 0; sf->use_accurate_subpel_search = USE_2_TAPS; + sf->nonrd_use_ml_partition = 0; if (speed >= 1) { sf->allow_txfm_domain_distortion = 1; @@ -562,16 +564,6 @@ static void set_rt_speed_feature_framesize_independent( (frames_since_key % (sf->last_partitioning_redo_frequency << 1) == 1); sf->max_delta_qindex = is_keyframe ? 20 : 15; sf->partition_search_type = REFERENCE_PARTITION; -#if CONFIG_ML_VAR_PARTITION - if (!frame_is_intra_only(cm) && cm->width >= 360 && cm->height >= 360) - sf->partition_search_type = ML_BASED_PARTITION; - else - sf->partition_search_type = REFERENCE_PARTITION; -#if CONFIG_VP9_HIGHBITDEPTH - if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH) - sf->partition_search_type = REFERENCE_PARTITION; -#endif // CONFIG_VP9_HIGHBITDEPTH -#endif // CONFIG_ML_VAR_PARTITION if (cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.lag_in_frames > 0 && cpi->rc.is_src_frame_alt_ref) { sf->partition_search_type = VAR_BASED_PARTITION; @@ -632,16 +624,7 @@ static void set_rt_speed_feature_framesize_independent( sf->use_altref_onepass = 1; sf->use_compound_nonrd_pickmode = 1; } -#if CONFIG_ML_VAR_PARTITION - if (frame_is_intra_only(cm) || cm->width < 360 || cm->height < 360) - sf->partition_search_type = VAR_BASED_PARTITION; -#if CONFIG_VP9_HIGHBITDEPTH - if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH) - sf->partition_search_type = VAR_BASED_PARTITION; -#endif // CONFIG_VP9_HIGHBITDEPTH -#else sf->partition_search_type = VAR_BASED_PARTITION; -#endif // CONFIG_ML_VAR_PARTITION sf->mv.search_method = NSTEP; sf->mv.reduce_first_step_size = 1; sf->skip_encode_sb = 0; @@ -728,7 +711,10 @@ static void set_rt_speed_feature_framesize_independent( if (!cpi->use_svc) cpi->max_copied_frame = 4; if (cpi->row_mt && cpi->oxcf.max_threads > 1) sf->adaptive_rd_thresh_row_mt = 1; - + // Enable ML based partition for low res. + if (!frame_is_intra_only(cm) && cm->width * cm->height <= 352 * 288) { + sf->nonrd_use_ml_partition = 1; + } if (content == VP9E_CONTENT_SCREEN) sf->mv.subpel_force_stop = FULL_PEL; if (content == VP9E_CONTENT_SCREEN) sf->lpf_pick = LPF_PICK_MINIMAL_LPF; // Only keep INTRA_DC mode for speed 8. @@ -762,9 +748,7 @@ static void set_rt_speed_feature_framesize_independent( if (speed >= 9) { sf->mv.enable_adaptive_subpel_force_stop = 1; - sf->mv.adapt_subpel_force_stop.mv_thresh = 2; - if (cpi->rc.avg_frame_low_motion < 40) - sf->mv.adapt_subpel_force_stop.mv_thresh = 1; + sf->mv.adapt_subpel_force_stop.mv_thresh = 1; sf->mv.adapt_subpel_force_stop.force_stop_below = QUARTER_PEL; sf->mv.adapt_subpel_force_stop.force_stop_above = HALF_PEL; // Disable partition blocks below 16x16, except for low-resolutions. @@ -772,9 +756,12 @@ static void set_rt_speed_feature_framesize_independent( sf->disable_16x16part_nonkey = 1; // Allow for disabling GOLDEN reference, for CBR mode. if (cpi->oxcf.rc_mode == VPX_CBR) sf->disable_golden_ref = 1; - if (cpi->rc.avg_frame_low_motion < 65) sf->default_interp_filter = BILINEAR; + sf->default_interp_filter = BILINEAR; } + if (sf->nonrd_use_ml_partition) + sf->partition_search_type = ML_BASED_PARTITION; + if (sf->use_altref_onepass) { if (cpi->rc.is_src_frame_alt_ref && cm->frame_type != KEY_FRAME) { sf->partition_search_type = FIXED_PARTITION; @@ -814,8 +801,8 @@ void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) { // Some speed-up features even for best quality as minimal impact on quality. sf->partition_search_breakout_thr.dist = (1 << 19); sf->partition_search_breakout_thr.rate = 80; - sf->ml_partition_search_early_termination = 0; - sf->use_ml_partition_search_breakout = 0; + sf->rd_ml_partition.search_early_termination = 0; + sf->rd_ml_partition.search_breakout = 0; if (oxcf->mode == REALTIME) { set_rt_speed_feature_framesize_dependent(cpi, sf, oxcf->speed); @@ -906,6 +893,7 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { sf->allow_acl = 1; sf->enable_tpl_model = oxcf->enable_tpl_model; sf->prune_ref_frame_for_rect_partitions = 0; + sf->temporal_filter_search_method = MESH; for (i = 0; i < TX_SIZES; i++) { sf->intra_y_mode_mask[i] = INTRA_ALL; @@ -939,11 +927,11 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { sf->limit_newmv_early_exit = 0; sf->bias_golden = 0; sf->base_mv_aggressive = 0; - sf->ml_prune_rect_partition_threhold[0] = -1; - sf->ml_prune_rect_partition_threhold[1] = -1; - sf->ml_prune_rect_partition_threhold[2] = -1; - sf->ml_prune_rect_partition_threhold[3] = -1; - sf->ml_var_partition_pruning = 0; + sf->rd_ml_partition.prune_rect_thresh[0] = -1; + sf->rd_ml_partition.prune_rect_thresh[1] = -1; + sf->rd_ml_partition.prune_rect_thresh[2] = -1; + sf->rd_ml_partition.prune_rect_thresh[3] = -1; + sf->rd_ml_partition.var_pruning = 0; sf->use_accurate_subpel_search = USE_8_TAPS; // Some speed-up features even for best quality as minimal impact on quality. diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.h index 9b09ec4748e..8609c983722 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.h @@ -151,10 +151,8 @@ typedef enum { // Use non-fixed partitions based on source variance. SOURCE_VAR_BASED_PARTITION, -#if CONFIG_ML_VAR_PARTITION // Make partition decisions with machine learning models. ML_BASED_PARTITION -#endif // CONFIG_ML_VAR_PARTITION } PARTITION_SEARCH_TYPE; typedef enum { @@ -351,12 +349,6 @@ typedef struct SPEED_FEATURES { // Prune reference frames for rectangular partitions. int prune_ref_frame_for_rect_partitions; - // Threshold values used for ML based rectangular partition search pruning. - // If < 0, the feature is turned off. - // Higher values mean more aggressiveness to skip rectangular partition - // search that results in better encoding speed but worse coding performance. - int ml_prune_rect_partition_threhold[4]; - // Sets min and max partition sizes for this 64x64 region based on the // same 64x64 in last encoded frame, and the left and above neighbor. AUTO_MIN_MAX_MODE auto_min_max_partition_size; @@ -511,18 +503,27 @@ typedef struct SPEED_FEATURES { // Partition search early breakout thresholds. PARTITION_SEARCH_BREAKOUT_THR partition_search_breakout_thr; - // Use ML-based partition search early breakout. - int use_ml_partition_search_breakout; - // Higher values mean more aggressiveness for partition search breakout that - // results in better encoding speed but worse compression performance. - float ml_partition_search_breakout_thresh[3]; + struct { + // Use ML-based partition search early breakout. + int search_breakout; + // Higher values mean more aggressiveness for partition search breakout that + // results in better encoding speed but worse compression performance. + float search_breakout_thresh[3]; - // Machine-learning based partition search early termination - int ml_partition_search_early_termination; + // Machine-learning based partition search early termination + int search_early_termination; - // Machine-learning based partition search pruning using prediction residue - // variance. - int ml_var_partition_pruning; + // Machine-learning based partition search pruning using prediction residue + // variance. + int var_pruning; + + // Threshold values used for ML based rectangular partition search pruning. + // If < 0, the feature is turned off. + // Higher values mean more aggressiveness to skip rectangular partition + // search that results in better encoding speed but worse coding + // performance. + int prune_rect_thresh[4]; + } rd_ml_partition; // Allow skipping partition search for still image frame int allow_partition_search_skip; @@ -595,6 +596,12 @@ typedef struct SPEED_FEATURES { // Allow sub-pixel search to use interpolation filters with different taps in // order to achieve accurate motion search result. SUBPEL_SEARCH_TYPE use_accurate_subpel_search; + + // Search method used by temporal filtering in full_pixel_motion_search. + SEARCH_METHODS temporal_filter_search_method; + + // Use machine learning based partition search. + int nonrd_use_ml_partition; } SPEED_FEATURES; struct VP9_COMP; diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.c index cd340c3943a..370af86424d 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.c @@ -34,6 +34,9 @@ #include "vpx_scale/vpx_scale.h" static int fixed_divide[512]; +static unsigned int index_mult[14] = { + 0, 0, 0, 0, 49152, 39322, 32768, 28087, 24576, 21846, 19661, 17874, 0, 15124 +}; static void temporal_filter_predictors_mb_c( MACROBLOCKD *xd, uint8_t *y_mb_ptr, uint8_t *u_mb_ptr, uint8_t *v_mb_ptr, @@ -184,7 +187,28 @@ void vp9_temporal_filter_init(void) { static INLINE int mod_index(int sum_dist, int index, int rounding, int strength, int filter_weight) { - int mod = (sum_dist * 3) / index; + int mod; + + assert(index >= 0 && index <= 13); + assert(index_mult[index] != 0); + + mod = + ((unsigned int)clamp(sum_dist, 0, UINT16_MAX) * index_mult[index]) >> 16; + mod += rounding; + mod >>= strength; + + mod = VPXMIN(16, mod); + + mod = 16 - mod; + mod *= filter_weight; + + return mod; +} + +#if CONFIG_VP9_HIGHBITDEPTH +static INLINE int highbd_mod_index(int sum_dist, int index, int rounding, + int strength, int filter_weight) { + int mod = sum_dist * 3 / index; mod += rounding; mod >>= strength; @@ -195,37 +219,38 @@ static INLINE int mod_index(int sum_dist, int index, int rounding, int strength, return mod; } +#endif // CONFIG_VP9_HIGHBITDEPTH static INLINE int get_filter_weight(unsigned int i, unsigned int j, unsigned int block_height, - unsigned int block_width, int *blk_fw, - int use_32x32) { - int filter_weight = 0; - - if (use_32x32) - // blk_fw[0] ~ blk_fw[3] are the same. + unsigned int block_width, + const int *const blk_fw, int use_32x32) { + // blk_fw[0] ~ blk_fw[3] are the same. + if (use_32x32) { return blk_fw[0]; + } if (i < block_height / 2) { - if (j < block_width / 2) - filter_weight = blk_fw[0]; - else - filter_weight = blk_fw[1]; - } else { - if (j < block_width / 2) - filter_weight = blk_fw[2]; - else - filter_weight = blk_fw[3]; + if (j < block_width / 2) { + return blk_fw[0]; + } + + return blk_fw[1]; + } + + if (j < block_width / 2) { + return blk_fw[2]; } - return filter_weight; + + return blk_fw[3]; } -static void apply_temporal_filter( +void vp9_apply_temporal_filter_c( const uint8_t *y_frame1, int y_stride, const uint8_t *y_pred, int y_buf_stride, const uint8_t *u_frame1, const uint8_t *v_frame1, int uv_stride, const uint8_t *u_pred, const uint8_t *v_pred, int uv_buf_stride, unsigned int block_width, unsigned int block_height, - int ss_x, int ss_y, int strength, int *blk_fw, int use_32x32, + int ss_x, int ss_y, int strength, const int *const blk_fw, int use_32x32, uint32_t *y_accumulator, uint16_t *y_count, uint32_t *u_accumulator, uint16_t *u_count, uint32_t *v_accumulator, uint16_t *v_count) { unsigned int i, j, k, m; @@ -271,7 +296,7 @@ static void apply_temporal_filter( for (i = 0, k = 0, m = 0; i < block_height; i++) { for (j = 0; j < block_width; j++) { const int pixel_value = y_pred[i * y_buf_stride + j]; - int filter_weight = + const int filter_weight = get_filter_weight(i, j, block_height, block_width, blk_fw, use_32x32); // non-local mean approach @@ -361,133 +386,152 @@ static void apply_temporal_filter( } } -// TODO(any): This function is not used anymore. Should be removed. -void vp9_temporal_filter_apply_c(const uint8_t *frame1, unsigned int stride, - const uint8_t *frame2, - unsigned int block_width, - unsigned int block_height, int strength, - int filter_weight, uint32_t *accumulator, - uint16_t *count) { - unsigned int i, j, k; - int modifier; - int byte = 0; - const int rounding = (1 << strength) >> 1; +#if CONFIG_VP9_HIGHBITDEPTH +void vp9_highbd_apply_temporal_filter_c( + const uint16_t *y_src, int y_src_stride, const uint16_t *y_pre, + int y_pre_stride, const uint16_t *u_src, const uint16_t *v_src, + int uv_src_stride, const uint16_t *u_pre, const uint16_t *v_pre, + int uv_pre_stride, unsigned int block_width, unsigned int block_height, + int ss_x, int ss_y, int strength, const int *const blk_fw, int use_32x32, + uint32_t *y_accum, uint16_t *y_count, uint32_t *u_accum, uint16_t *u_count, + uint32_t *v_accum, uint16_t *v_count) { + const int uv_block_width = block_width >> ss_x; + const int uv_block_height = block_height >> ss_y; + const int y_diff_stride = BW; + const int uv_diff_stride = BW; + + DECLARE_ALIGNED(16, uint32_t, y_diff_sse[BLK_PELS]); + DECLARE_ALIGNED(16, uint32_t, u_diff_sse[BLK_PELS]); + DECLARE_ALIGNED(16, uint32_t, v_diff_sse[BLK_PELS]); - assert(strength >= 0); - assert(strength <= 6); + const int rounding = (1 << strength) >> 1; - assert(filter_weight >= 0); - assert(filter_weight <= 2); + // Loop variables + int row, col; + int uv_row, uv_col; + int row_step, col_step; - for (i = 0, k = 0; i < block_height; i++) { - for (j = 0; j < block_width; j++, k++) { - int pixel_value = *frame2; + memset(y_diff_sse, 0, BLK_PELS * sizeof(uint32_t)); + memset(u_diff_sse, 0, BLK_PELS * sizeof(uint32_t)); + memset(v_diff_sse, 0, BLK_PELS * sizeof(uint32_t)); - // non-local mean approach - int diff_sse[9] = { 0 }; - int idx, idy, index = 0; + // Get the square diffs + for (row = 0; row < (int)block_height; row++) { + for (col = 0; col < (int)block_width; col++) { + const int diff = + y_src[row * y_src_stride + col] - y_pre[row * y_pre_stride + col]; + y_diff_sse[row * y_diff_stride + col] = diff * diff; + } + } - for (idy = -1; idy <= 1; ++idy) { - for (idx = -1; idx <= 1; ++idx) { - int row = (int)i + idy; - int col = (int)j + idx; + for (row = 0; row < uv_block_height; row++) { + for (col = 0; col < uv_block_width; col++) { + const int u_diff = + u_src[row * uv_src_stride + col] - u_pre[row * uv_pre_stride + col]; + const int v_diff = + v_src[row * uv_src_stride + col] - v_pre[row * uv_pre_stride + col]; + u_diff_sse[row * uv_diff_stride + col] = u_diff * u_diff; + v_diff_sse[row * uv_diff_stride + col] = v_diff * v_diff; + } + } - if (row >= 0 && row < (int)block_height && col >= 0 && - col < (int)block_width) { - int diff = frame1[byte + idy * (int)stride + idx] - - frame2[idy * (int)block_width + idx]; - diff_sse[index] = diff * diff; - ++index; + // Apply the filter to luma + for (row = 0; row < (int)block_height; row++) { + for (col = 0; col < (int)block_width; col++) { + const int uv_row = row >> ss_y; + const int uv_col = col >> ss_x; + const int filter_weight = get_filter_weight( + row, col, block_height, block_width, blk_fw, use_32x32); + + // First we get the modifier for the current y pixel + const int y_pixel = y_pre[row * y_pre_stride + col]; + int y_num_used = 0; + int y_mod = 0; + + // Sum the neighboring 3x3 y pixels + for (row_step = -1; row_step <= 1; row_step++) { + for (col_step = -1; col_step <= 1; col_step++) { + const int sub_row = row + row_step; + const int sub_col = col + col_step; + + if (sub_row >= 0 && sub_row < (int)block_height && sub_col >= 0 && + sub_col < (int)block_width) { + y_mod += y_diff_sse[sub_row * y_diff_stride + sub_col]; + y_num_used++; } } } - assert(index > 0); - - modifier = 0; - for (idx = 0; idx < 9; ++idx) modifier += diff_sse[idx]; - - modifier *= 3; - modifier /= index; - - ++frame2; - - modifier += rounding; - modifier >>= strength; + // Sum the corresponding uv pixels to the current y modifier + // Note we are rounding down instead of rounding to the nearest pixel. + y_mod += u_diff_sse[uv_row * uv_diff_stride + uv_col]; + y_mod += v_diff_sse[uv_row * uv_diff_stride + uv_col]; - if (modifier > 16) modifier = 16; - - modifier = 16 - modifier; - modifier *= filter_weight; - - count[k] += modifier; - accumulator[k] += modifier * pixel_value; - - byte++; - } - - byte += stride - block_width; - } -} - -#if CONFIG_VP9_HIGHBITDEPTH -void vp9_highbd_temporal_filter_apply_c( - const uint8_t *frame1_8, unsigned int stride, const uint8_t *frame2_8, - unsigned int block_width, unsigned int block_height, int strength, - int *blk_fw, int use_32x32, uint32_t *accumulator, uint16_t *count) { - const uint16_t *frame1 = CONVERT_TO_SHORTPTR(frame1_8); - const uint16_t *frame2 = CONVERT_TO_SHORTPTR(frame2_8); - unsigned int i, j, k; - int modifier; - const int rounding = strength > 0 ? 1 << (strength - 1) : 0; + y_num_used += 2; - int diff_sse[BLK_PELS] = { 0 }; - int this_idx = 0; + // Set the modifier + y_mod = highbd_mod_index(y_mod, y_num_used, rounding, strength, + filter_weight); - for (i = 0; i < block_height; i++) { - for (j = 0; j < block_width; j++) { - const int diff = - frame1[i * (int)stride + j] - frame2[i * (int)block_width + j]; - diff_sse[this_idx++] = diff * diff; + // Accumulate the result + y_count[row * block_width + col] += y_mod; + y_accum[row * block_width + col] += y_mod * y_pixel; } } - modifier = 0; - for (i = 0, k = 0; i < block_height; i++) { - for (j = 0; j < block_width; j++, k++) { - int pixel_value = frame2[i * (int)block_width + j]; - int filter_weight = - get_filter_weight(i, j, block_height, block_width, blk_fw, use_32x32); - - int idx, idy, index = 0; - - for (idy = -1; idy <= 1; ++idy) { - for (idx = -1; idx <= 1; ++idx) { - int row = (int)i + idy; - int col = (int)j + idx; - - if (row >= 0 && row < (int)block_height && col >= 0 && - col < (int)block_width) { - modifier += diff_sse[row * (int)block_width + col]; - ++index; + // Apply the filter to chroma + for (uv_row = 0; uv_row < uv_block_height; uv_row++) { + for (uv_col = 0; uv_col < uv_block_width; uv_col++) { + const int y_row = uv_row << ss_y; + const int y_col = uv_col << ss_x; + const int filter_weight = get_filter_weight( + uv_row, uv_col, uv_block_height, uv_block_width, blk_fw, use_32x32); + + const int u_pixel = u_pre[uv_row * uv_pre_stride + uv_col]; + const int v_pixel = v_pre[uv_row * uv_pre_stride + uv_col]; + + int uv_num_used = 0; + int u_mod = 0, v_mod = 0; + + // Sum the neighboring 3x3 chromal pixels to the chroma modifier + for (row_step = -1; row_step <= 1; row_step++) { + for (col_step = -1; col_step <= 1; col_step++) { + const int sub_row = uv_row + row_step; + const int sub_col = uv_col + col_step; + + if (sub_row >= 0 && sub_row < uv_block_height && sub_col >= 0 && + sub_col < uv_block_width) { + u_mod += u_diff_sse[sub_row * uv_diff_stride + sub_col]; + v_mod += v_diff_sse[sub_row * uv_diff_stride + sub_col]; + uv_num_used++; } } } - assert(index > 0); - - modifier *= 3; - modifier /= index; - - modifier += rounding; - modifier >>= strength; - if (modifier > 16) modifier = 16; + // Sum all the luma pixels associated with the current luma pixel + for (row_step = 0; row_step < 1 + ss_y; row_step++) { + for (col_step = 0; col_step < 1 + ss_x; col_step++) { + const int sub_row = y_row + row_step; + const int sub_col = y_col + col_step; + const int y_diff = y_diff_sse[sub_row * y_diff_stride + sub_col]; - modifier = 16 - modifier; - modifier *= filter_weight; + u_mod += y_diff; + v_mod += y_diff; + uv_num_used++; + } + } - count[k] += modifier; - accumulator[k] += modifier * pixel_value; + // Set the modifier + u_mod = highbd_mod_index(u_mod, uv_num_used, rounding, strength, + filter_weight); + v_mod = highbd_mod_index(v_mod, uv_num_used, rounding, strength, + filter_weight); + + // Accumulate the result + u_count[uv_row * uv_block_width + uv_col] += u_mod; + u_accum[uv_row * uv_block_width + uv_col] += u_mod * u_pixel; + v_count[uv_row * uv_block_width + uv_col] += v_mod; + v_accum[uv_row * uv_block_width + uv_col] += v_mod * v_pixel; } } } @@ -501,6 +545,7 @@ static uint32_t temporal_filter_find_matching_mb_c( MACROBLOCKD *const xd = &x->e_mbd; MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv; const SEARCH_METHODS search_method = MESH; + const SEARCH_METHODS search_method_16 = cpi->sf.temporal_filter_search_method; int step_param; int sadpb = x->sadperbit16; uint32_t bestsme = UINT_MAX; @@ -563,7 +608,7 @@ static uint32_t temporal_filter_find_matching_mb_c( vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1); vp9_full_pixel_search(cpi, x, TF_SUB_BLOCK, &best_ref_mv1_full, - step_param, search_method, sadpb, + step_param, search_method_16, sadpb, cond_cost_list(cpi, cost_list), &best_ref_mv1, &blk_mvs[k], 0, 0); /* restore UMV window */ @@ -671,7 +716,9 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td, src_variance = vp9_get_sby_perpixel_variance(cpi, &src, TF_BLOCK); #endif // CONFIG_VP9_HIGHBITDEPTH - if (src_variance <= 2) strength = VPXMAX(0, (int)strength - 2); + if (src_variance <= 2) { + strength = VPXMAX(0, arnr_filter_data->strength - 2); + } } for (frame = 0; frame < frame_count; frame++) { @@ -740,7 +787,7 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td, } } - if (blk_fw[0] || blk_fw[1] || blk_fw[2] || blk_fw[3]) { + if (blk_fw[0] | blk_fw[1] | blk_fw[2] | blk_fw[3]) { // Construct the predictors temporal_filter_predictors_mb_c( mbd, frames[frame]->y_buffer + mb_y_offset, @@ -753,21 +800,20 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td, if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { int adj_strength = strength + 2 * (mbd->bd - 8); // Apply the filter (YUV) - vp9_highbd_temporal_filter_apply( - f->y_buffer + mb_y_offset, f->y_stride, predictor, BW, BH, - adj_strength, blk_fw, use_32x32, accumulator, count); - vp9_highbd_temporal_filter_apply( - f->u_buffer + mb_uv_offset, f->uv_stride, predictor + BLK_PELS, - mb_uv_width, mb_uv_height, adj_strength, blk_fw, use_32x32, - accumulator + BLK_PELS, count + BLK_PELS); - vp9_highbd_temporal_filter_apply( - f->v_buffer + mb_uv_offset, f->uv_stride, - predictor + (BLK_PELS << 1), mb_uv_width, mb_uv_height, - adj_strength, blk_fw, use_32x32, accumulator + (BLK_PELS << 1), - count + (BLK_PELS << 1)); + vp9_highbd_apply_temporal_filter( + CONVERT_TO_SHORTPTR(f->y_buffer + mb_y_offset), f->y_stride, + CONVERT_TO_SHORTPTR(predictor), BW, + CONVERT_TO_SHORTPTR(f->u_buffer + mb_uv_offset), + CONVERT_TO_SHORTPTR(f->v_buffer + mb_uv_offset), f->uv_stride, + CONVERT_TO_SHORTPTR(predictor + BLK_PELS), + CONVERT_TO_SHORTPTR(predictor + (BLK_PELS << 1)), mb_uv_width, BW, + BH, mbd->plane[1].subsampling_x, mbd->plane[1].subsampling_y, + adj_strength, blk_fw, use_32x32, accumulator, count, + accumulator + BLK_PELS, count + BLK_PELS, + accumulator + (BLK_PELS << 1), count + (BLK_PELS << 1)); } else { // Apply the filter (YUV) - apply_temporal_filter( + vp9_apply_temporal_filter( f->y_buffer + mb_y_offset, f->y_stride, predictor, BW, f->u_buffer + mb_uv_offset, f->v_buffer + mb_uv_offset, f->uv_stride, predictor + BLK_PELS, predictor + (BLK_PELS << 1), @@ -778,7 +824,7 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td, } #else // Apply the filter (YUV) - apply_temporal_filter( + vp9_apply_temporal_filter( f->y_buffer + mb_y_offset, f->y_stride, predictor, BW, f->u_buffer + mb_uv_offset, f->v_buffer + mb_uv_offset, f->uv_stride, predictor + BLK_PELS, predictor + (BLK_PELS << 1), diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.h index f5fa194d16f..553a468280f 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.h +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.h @@ -24,7 +24,7 @@ static const MV kZeroMv = { 0, 0 }; #define BH_LOG2 5 #define BW 32 #define BW_LOG2 5 -#define BLK_PELS 1024 // Pixels in the block +#define BLK_PELS ((BH) * (BW)) // Pixels in the block #define TF_SHIFT 2 #define TF_ROUND 3 #define THR_SHIFT 2 diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/temporal_filter_constants.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/temporal_filter_constants.h new file mode 100644 index 00000000000..20b7085a3db --- /dev/null +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/temporal_filter_constants.h @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2019 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vpx_config.h" + +// Division using multiplication and shifting. The C implementation does: +// modifier *= 3; +// modifier /= index; +// where 'modifier' is a set of summed values and 'index' is the number of +// summed values. +// +// This equation works out to (m * 3) / i which reduces to: +// m * 3/4 +// m * 1/2 +// m * 1/3 +// +// By pairing the multiply with a down shift by 16 (_mm_mulhi_epu16): +// m * C / 65536 +// we can create a C to replicate the division. +// +// m * 49152 / 65536 = m * 3/4 +// m * 32758 / 65536 = m * 1/2 +// m * 21846 / 65536 = m * 0.3333 +// +// These are loaded using an instruction expecting int16_t values but are used +// with _mm_mulhi_epu16(), which treats them as unsigned. +#define NEIGHBOR_CONSTANT_4 (int16_t)49152 +#define NEIGHBOR_CONSTANT_5 (int16_t)39322 +#define NEIGHBOR_CONSTANT_6 (int16_t)32768 +#define NEIGHBOR_CONSTANT_7 (int16_t)28087 +#define NEIGHBOR_CONSTANT_8 (int16_t)24576 +#define NEIGHBOR_CONSTANT_9 (int16_t)21846 +#define NEIGHBOR_CONSTANT_10 (int16_t)19661 +#define NEIGHBOR_CONSTANT_11 (int16_t)17874 +#define NEIGHBOR_CONSTANT_13 (int16_t)15124 + +DECLARE_ALIGNED(16, static const int16_t, LEFT_CORNER_NEIGHBORS_PLUS_1[8]) = { + NEIGHBOR_CONSTANT_5, NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, + NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, + NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7 +}; + +DECLARE_ALIGNED(16, static const int16_t, RIGHT_CORNER_NEIGHBORS_PLUS_1[8]) = { + NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, + NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, + NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_5 +}; + +DECLARE_ALIGNED(16, static const int16_t, LEFT_EDGE_NEIGHBORS_PLUS_1[8]) = { + NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10 +}; + +DECLARE_ALIGNED(16, static const int16_t, RIGHT_EDGE_NEIGHBORS_PLUS_1[8]) = { + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_7 +}; + +DECLARE_ALIGNED(16, static const int16_t, MIDDLE_EDGE_NEIGHBORS_PLUS_1[8]) = { + NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, + NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, + NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7 +}; + +DECLARE_ALIGNED(16, static const int16_t, MIDDLE_CENTER_NEIGHBORS_PLUS_1[8]) = { + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10 +}; + +DECLARE_ALIGNED(16, static const int16_t, LEFT_CORNER_NEIGHBORS_PLUS_2[8]) = { + NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, + NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, + NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8 +}; + +DECLARE_ALIGNED(16, static const int16_t, RIGHT_CORNER_NEIGHBORS_PLUS_2[8]) = { + NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, + NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, + NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_6 +}; + +DECLARE_ALIGNED(16, static const int16_t, LEFT_EDGE_NEIGHBORS_PLUS_2[8]) = { + NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, + NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, + NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11 +}; + +DECLARE_ALIGNED(16, static const int16_t, RIGHT_EDGE_NEIGHBORS_PLUS_2[8]) = { + NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, + NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, + NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_8 +}; + +DECLARE_ALIGNED(16, static const int16_t, MIDDLE_EDGE_NEIGHBORS_PLUS_2[8]) = { + NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, + NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, + NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8 +}; + +DECLARE_ALIGNED(16, static const int16_t, MIDDLE_CENTER_NEIGHBORS_PLUS_2[8]) = { + NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, + NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, + NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11 +}; + +DECLARE_ALIGNED(16, static const int16_t, TWO_CORNER_NEIGHBORS_PLUS_2[8]) = { + NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, + NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, + NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_6 +}; + +DECLARE_ALIGNED(16, static const int16_t, TWO_EDGE_NEIGHBORS_PLUS_2[8]) = { + NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, + NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, + NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_8 +}; + +DECLARE_ALIGNED(16, static const int16_t, LEFT_CORNER_NEIGHBORS_PLUS_4[8]) = { + NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10 +}; + +DECLARE_ALIGNED(16, static const int16_t, RIGHT_CORNER_NEIGHBORS_PLUS_4[8]) = { + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_8 +}; + +DECLARE_ALIGNED(16, static const int16_t, LEFT_EDGE_NEIGHBORS_PLUS_4[8]) = { + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, + NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, + NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13 +}; + +DECLARE_ALIGNED(16, static const int16_t, RIGHT_EDGE_NEIGHBORS_PLUS_4[8]) = { + NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, + NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, + NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_10 +}; + +DECLARE_ALIGNED(16, static const int16_t, MIDDLE_EDGE_NEIGHBORS_PLUS_4[8]) = { + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10 +}; + +DECLARE_ALIGNED(16, static const int16_t, MIDDLE_CENTER_NEIGHBORS_PLUS_4[8]) = { + NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, + NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, + NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13 +}; + +DECLARE_ALIGNED(16, static const int16_t, TWO_CORNER_NEIGHBORS_PLUS_4[8]) = { + NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_8 +}; + +DECLARE_ALIGNED(16, static const int16_t, TWO_EDGE_NEIGHBORS_PLUS_4[8]) = { + NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, + NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, + NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_10 +}; + +static const int16_t *const LUMA_LEFT_COLUMN_NEIGHBORS[2] = { + LEFT_CORNER_NEIGHBORS_PLUS_2, LEFT_EDGE_NEIGHBORS_PLUS_2 +}; + +static const int16_t *const LUMA_MIDDLE_COLUMN_NEIGHBORS[2] = { + MIDDLE_EDGE_NEIGHBORS_PLUS_2, MIDDLE_CENTER_NEIGHBORS_PLUS_2 +}; + +static const int16_t *const LUMA_RIGHT_COLUMN_NEIGHBORS[2] = { + RIGHT_CORNER_NEIGHBORS_PLUS_2, RIGHT_EDGE_NEIGHBORS_PLUS_2 +}; + +static const int16_t *const CHROMA_NO_SS_LEFT_COLUMN_NEIGHBORS[2] = { + LEFT_CORNER_NEIGHBORS_PLUS_1, LEFT_EDGE_NEIGHBORS_PLUS_1 +}; + +static const int16_t *const CHROMA_NO_SS_MIDDLE_COLUMN_NEIGHBORS[2] = { + MIDDLE_EDGE_NEIGHBORS_PLUS_1, MIDDLE_CENTER_NEIGHBORS_PLUS_1 +}; + +static const int16_t *const CHROMA_NO_SS_RIGHT_COLUMN_NEIGHBORS[2] = { + RIGHT_CORNER_NEIGHBORS_PLUS_1, RIGHT_EDGE_NEIGHBORS_PLUS_1 +}; + +static const int16_t *const CHROMA_SINGLE_SS_LEFT_COLUMN_NEIGHBORS[2] = { + LEFT_CORNER_NEIGHBORS_PLUS_2, LEFT_EDGE_NEIGHBORS_PLUS_2 +}; + +static const int16_t *const CHROMA_SINGLE_SS_MIDDLE_COLUMN_NEIGHBORS[2] = { + MIDDLE_EDGE_NEIGHBORS_PLUS_2, MIDDLE_CENTER_NEIGHBORS_PLUS_2 +}; + +static const int16_t *const CHROMA_SINGLE_SS_RIGHT_COLUMN_NEIGHBORS[2] = { + RIGHT_CORNER_NEIGHBORS_PLUS_2, RIGHT_EDGE_NEIGHBORS_PLUS_2 +}; + +static const int16_t *const CHROMA_SINGLE_SS_SINGLE_COLUMN_NEIGHBORS[2] = { + TWO_CORNER_NEIGHBORS_PLUS_2, TWO_EDGE_NEIGHBORS_PLUS_2 +}; + +static const int16_t *const CHROMA_DOUBLE_SS_LEFT_COLUMN_NEIGHBORS[2] = { + LEFT_CORNER_NEIGHBORS_PLUS_4, LEFT_EDGE_NEIGHBORS_PLUS_4 +}; + +static const int16_t *const CHROMA_DOUBLE_SS_MIDDLE_COLUMN_NEIGHBORS[2] = { + MIDDLE_EDGE_NEIGHBORS_PLUS_4, MIDDLE_CENTER_NEIGHBORS_PLUS_4 +}; + +static const int16_t *const CHROMA_DOUBLE_SS_RIGHT_COLUMN_NEIGHBORS[2] = { + RIGHT_CORNER_NEIGHBORS_PLUS_4, RIGHT_EDGE_NEIGHBORS_PLUS_4 +}; + +static const int16_t *const CHROMA_DOUBLE_SS_SINGLE_COLUMN_NEIGHBORS[2] = { + TWO_CORNER_NEIGHBORS_PLUS_4, TWO_EDGE_NEIGHBORS_PLUS_4 +}; + +#define DIST_STRIDE ((BW) + 2) diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/temporal_filter_sse4.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/temporal_filter_sse4.c index e5860d39ced..a97c96dee43 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/temporal_filter_sse4.c +++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/temporal_filter_sse4.c @@ -14,96 +14,58 @@ #include "./vp9_rtcd.h" #include "./vpx_config.h" #include "vpx/vpx_integer.h" +#include "vp9/encoder/vp9_encoder.h" +#include "vp9/encoder/vp9_temporal_filter.h" +#include "vp9/encoder/x86/temporal_filter_constants.h" -// Division using multiplication and shifting. The C implementation does: -// modifier *= 3; -// modifier /= index; -// where 'modifier' is a set of summed values and 'index' is the number of -// summed values. 'index' may be 4, 6, or 9, representing a block of 9 values -// which may be bound by the edges of the block being filtered. -// -// This equation works out to (m * 3) / i which reduces to: -// m * 3/4 -// m * 1/2 -// m * 1/3 -// -// By pairing the multiply with a down shift by 16 (_mm_mulhi_epu16): -// m * C / 65536 -// we can create a C to replicate the division. -// -// m * 49152 / 65536 = m * 3/4 -// m * 32758 / 65536 = m * 1/2 -// m * 21846 / 65536 = m * 0.3333 -// -// These are loaded using an instruction expecting int16_t values but are used -// with _mm_mulhi_epu16(), which treats them as unsigned. -#define NEIGHBOR_CONSTANT_4 (int16_t)49152 -#define NEIGHBOR_CONSTANT_6 (int16_t)32768 -#define NEIGHBOR_CONSTANT_9 (int16_t)21846 - -// Load values from 'a' and 'b'. Compute the difference squared and sum -// neighboring values such that: -// sum[1] = (a[0]-b[0])^2 + (a[1]-b[1])^2 + (a[2]-b[2])^2 -// Values to the left and right of the row are set to 0. -// The values are returned in sum_0 and sum_1 as *unsigned* 16 bit values. -static void sum_8(const uint8_t *a, const uint8_t *b, __m128i *sum) { - const __m128i a_u8 = _mm_loadl_epi64((const __m128i *)a); - const __m128i b_u8 = _mm_loadl_epi64((const __m128i *)b); - - const __m128i a_u16 = _mm_cvtepu8_epi16(a_u8); - const __m128i b_u16 = _mm_cvtepu8_epi16(b_u8); - - const __m128i diff_s16 = _mm_sub_epi16(a_u16, b_u16); - const __m128i diff_sq_u16 = _mm_mullo_epi16(diff_s16, diff_s16); - - // Shift all the values one place to the left/right so we can efficiently sum - // diff_sq_u16[i - 1] + diff_sq_u16[i] + diff_sq_u16[i + 1]. - const __m128i shift_left = _mm_slli_si128(diff_sq_u16, 2); - const __m128i shift_right = _mm_srli_si128(diff_sq_u16, 2); - - // It becomes necessary to treat the values as unsigned at this point. The - // 255^2 fits in uint16_t but not int16_t. Use saturating adds from this point - // forward since the filter is only applied to smooth small pixel changes. - // Once the value has saturated to uint16_t it is well outside the useful - // range. - __m128i sum_u16 = _mm_adds_epu16(diff_sq_u16, shift_left); - sum_u16 = _mm_adds_epu16(sum_u16, shift_right); - - *sum = sum_u16; -} +// Read in 8 pixels from a and b as 8-bit unsigned integers, compute the +// difference squared, and store as unsigned 16-bit integer to dst. +static INLINE void store_dist_8(const uint8_t *a, const uint8_t *b, + uint16_t *dst) { + const __m128i a_reg = _mm_loadl_epi64((const __m128i *)a); + const __m128i b_reg = _mm_loadl_epi64((const __m128i *)b); -static void sum_16(const uint8_t *a, const uint8_t *b, __m128i *sum_0, - __m128i *sum_1) { - const __m128i zero = _mm_setzero_si128(); - const __m128i a_u8 = _mm_loadu_si128((const __m128i *)a); - const __m128i b_u8 = _mm_loadu_si128((const __m128i *)b); + const __m128i a_first = _mm_cvtepu8_epi16(a_reg); + const __m128i b_first = _mm_cvtepu8_epi16(b_reg); - const __m128i a_0_u16 = _mm_cvtepu8_epi16(a_u8); - const __m128i a_1_u16 = _mm_unpackhi_epi8(a_u8, zero); - const __m128i b_0_u16 = _mm_cvtepu8_epi16(b_u8); - const __m128i b_1_u16 = _mm_unpackhi_epi8(b_u8, zero); + __m128i dist_first; - const __m128i diff_0_s16 = _mm_sub_epi16(a_0_u16, b_0_u16); - const __m128i diff_1_s16 = _mm_sub_epi16(a_1_u16, b_1_u16); - const __m128i diff_sq_0_u16 = _mm_mullo_epi16(diff_0_s16, diff_0_s16); - const __m128i diff_sq_1_u16 = _mm_mullo_epi16(diff_1_s16, diff_1_s16); + dist_first = _mm_sub_epi16(a_first, b_first); + dist_first = _mm_mullo_epi16(dist_first, dist_first); - __m128i shift_left = _mm_slli_si128(diff_sq_0_u16, 2); - // Use _mm_alignr_epi8() to "shift in" diff_sq_u16[8]. - __m128i shift_right = _mm_alignr_epi8(diff_sq_1_u16, diff_sq_0_u16, 2); + _mm_storeu_si128((__m128i *)dst, dist_first); +} - __m128i sum_u16 = _mm_adds_epu16(diff_sq_0_u16, shift_left); - sum_u16 = _mm_adds_epu16(sum_u16, shift_right); +static INLINE void store_dist_16(const uint8_t *a, const uint8_t *b, + uint16_t *dst) { + const __m128i zero = _mm_setzero_si128(); + const __m128i a_reg = _mm_loadu_si128((const __m128i *)a); + const __m128i b_reg = _mm_loadu_si128((const __m128i *)b); - *sum_0 = sum_u16; + const __m128i a_first = _mm_cvtepu8_epi16(a_reg); + const __m128i a_second = _mm_unpackhi_epi8(a_reg, zero); + const __m128i b_first = _mm_cvtepu8_epi16(b_reg); + const __m128i b_second = _mm_unpackhi_epi8(b_reg, zero); - shift_left = _mm_alignr_epi8(diff_sq_1_u16, diff_sq_0_u16, 14); - shift_right = _mm_srli_si128(diff_sq_1_u16, 2); + __m128i dist_first, dist_second; - sum_u16 = _mm_adds_epu16(diff_sq_1_u16, shift_left); - sum_u16 = _mm_adds_epu16(sum_u16, shift_right); + dist_first = _mm_sub_epi16(a_first, b_first); + dist_second = _mm_sub_epi16(a_second, b_second); + dist_first = _mm_mullo_epi16(dist_first, dist_first); + dist_second = _mm_mullo_epi16(dist_second, dist_second); + + _mm_storeu_si128((__m128i *)dst, dist_first); + _mm_storeu_si128((__m128i *)(dst + 8), dist_second); +} - *sum_1 = sum_u16; +static INLINE void read_dist_8(const uint16_t *dist, __m128i *dist_reg) { + *dist_reg = _mm_loadu_si128((const __m128i *)dist); +} + +static INLINE void read_dist_16(const uint16_t *dist, __m128i *reg_first, + __m128i *reg_second) { + read_dist_8(dist, reg_first); + read_dist_8(dist + 8, reg_second); } // Average the value based on the number of values summed (9 for pixels away @@ -111,7 +73,7 @@ static void sum_16(const uint8_t *a, const uint8_t *b, __m128i *sum_0, // // Add in the rounding factor and shift, clamp to 16, invert and shift. Multiply // by weight. -static __m128i average_8(__m128i sum, const __m128i mul_constants, +static __m128i average_8(__m128i sum, const __m128i *mul_constants, const int strength, const int rounding, const int weight) { // _mm_srl_epi16 uses the lower 64 bit value for the shift. @@ -121,7 +83,7 @@ static __m128i average_8(__m128i sum, const __m128i mul_constants, const __m128i sixteen = _mm_set1_epi16(16); // modifier * 3 / index; - sum = _mm_mulhi_epu16(sum, mul_constants); + sum = _mm_mulhi_epu16(sum, *mul_constants); sum = _mm_adds_epu16(sum, rounding_u16); sum = _mm_srl_epi16(sum, strength_u128); @@ -136,20 +98,48 @@ static __m128i average_8(__m128i sum, const __m128i mul_constants, return _mm_mullo_epi16(sum, weight_u16); } -static void average_16(__m128i *sum_0_u16, __m128i *sum_1_u16, - const __m128i mul_constants_0, - const __m128i mul_constants_1, const int strength, - const int rounding, const int weight) { +static __m128i average_4_4(__m128i sum, const __m128i *mul_constants, + const int strength, const int rounding, + const int weight_0, const int weight_1) { + // _mm_srl_epi16 uses the lower 64 bit value for the shift. + const __m128i strength_u128 = _mm_set_epi32(0, 0, 0, strength); + const __m128i rounding_u16 = _mm_set1_epi16(rounding); + const __m128i weight_u16 = + _mm_setr_epi16(weight_0, weight_0, weight_0, weight_0, weight_1, weight_1, + weight_1, weight_1); + const __m128i sixteen = _mm_set1_epi16(16); + + // modifier * 3 / index; + sum = _mm_mulhi_epu16(sum, *mul_constants); + + sum = _mm_adds_epu16(sum, rounding_u16); + sum = _mm_srl_epi16(sum, strength_u128); + + // The maximum input to this comparison is UINT16_MAX * NEIGHBOR_CONSTANT_4 + // >> 16 (also NEIGHBOR_CONSTANT_4 -1) which is 49151 / 0xbfff / -16385 + // So this needs to use the epu16 version which did not come until SSE4. + sum = _mm_min_epu16(sum, sixteen); + + sum = _mm_sub_epi16(sixteen, sum); + + return _mm_mullo_epi16(sum, weight_u16); +} + +static INLINE void average_16(__m128i *sum_0_u16, __m128i *sum_1_u16, + const __m128i *mul_constants_0, + const __m128i *mul_constants_1, + const int strength, const int rounding, + const int weight) { const __m128i strength_u128 = _mm_set_epi32(0, 0, 0, strength); const __m128i rounding_u16 = _mm_set1_epi16(rounding); const __m128i weight_u16 = _mm_set1_epi16(weight); const __m128i sixteen = _mm_set1_epi16(16); __m128i input_0, input_1; - input_0 = _mm_mulhi_epu16(*sum_0_u16, mul_constants_0); + input_0 = _mm_mulhi_epu16(*sum_0_u16, *mul_constants_0); input_0 = _mm_adds_epu16(input_0, rounding_u16); - input_1 = _mm_mulhi_epu16(*sum_1_u16, mul_constants_1); + input_1 = _mm_mulhi_epu16(*sum_1_u16, *mul_constants_1); input_1 = _mm_adds_epu16(input_1, rounding_u16); input_0 = _mm_srl_epi16(input_0, strength_u128); @@ -192,10 +182,10 @@ static void accumulate_and_store_8(const __m128i sum_u16, const uint8_t *pred, _mm_storeu_si128((__m128i *)(accumulator + 4), accum_1_u32); } -static void accumulate_and_store_16(const __m128i sum_0_u16, - const __m128i sum_1_u16, - const uint8_t *pred, uint16_t *count, - uint32_t *accumulator) { +static INLINE void accumulate_and_store_16(const __m128i sum_0_u16, + const __m128i sum_1_u16, + const uint8_t *pred, uint16_t *count, + uint32_t *accumulator) { const __m128i pred_u8 = _mm_loadu_si128((const __m128i *)pred); const __m128i zero = _mm_setzero_si128(); __m128i count_0_u16 = _mm_loadu_si128((const __m128i *)count), @@ -235,142 +225,782 @@ static void accumulate_and_store_16(const __m128i sum_0_u16, _mm_storeu_si128((__m128i *)(accumulator + 12), accum_3_u32); } -void vp9_temporal_filter_apply_sse4_1(const uint8_t *a, unsigned int stride, - const uint8_t *b, unsigned int width, - unsigned int height, int strength, - int weight, uint32_t *accumulator, - uint16_t *count) { - unsigned int h; +// Read in 8 pixels from y_dist. For each index i, compute y_dist[i-1] + +// y_dist[i] + y_dist[i+1] and store in sum as 16-bit unsigned int. +static INLINE void get_sum_8(const uint16_t *y_dist, __m128i *sum) { + __m128i dist_reg, dist_left, dist_right; + + dist_reg = _mm_loadu_si128((const __m128i *)y_dist); + dist_left = _mm_loadu_si128((const __m128i *)(y_dist - 1)); + dist_right = _mm_loadu_si128((const __m128i *)(y_dist + 1)); + + *sum = _mm_adds_epu16(dist_reg, dist_left); + *sum = _mm_adds_epu16(*sum, dist_right); +} + +// Read in 16 pixels from y_dist. For each index i, compute y_dist[i-1] + +// y_dist[i] + y_dist[i+1]. Store the result for first 8 pixels in sum_first and +// the rest in sum_second. +static INLINE void get_sum_16(const uint16_t *y_dist, __m128i *sum_first, + __m128i *sum_second) { + get_sum_8(y_dist, sum_first); + get_sum_8(y_dist + 8, sum_second); +} + +// Read in a row of chroma values corresponds to a row of 16 luma values. +static INLINE void read_chroma_dist_row_16(int ss_x, const uint16_t *u_dist, + const uint16_t *v_dist, + __m128i *u_first, __m128i *u_second, + __m128i *v_first, + __m128i *v_second) { + if (!ss_x) { + // If there is no chroma subsampling in the horizontal direction, then we + // need to load 16 entries from chroma. + read_dist_16(u_dist, u_first, u_second); + read_dist_16(v_dist, v_first, v_second); + } else { // ss_x == 1 + // Otherwise, we only need to load 8 entries + __m128i u_reg, v_reg; + + read_dist_8(u_dist, &u_reg); + + *u_first = _mm_unpacklo_epi16(u_reg, u_reg); + *u_second = _mm_unpackhi_epi16(u_reg, u_reg); + + read_dist_8(v_dist, &v_reg); + + *v_first = _mm_unpacklo_epi16(v_reg, v_reg); + *v_second = _mm_unpackhi_epi16(v_reg, v_reg); + } +} + +// Horizontal add unsigned 16-bit ints in src and store them as signed 32-bit +// int in dst. +static INLINE void hadd_epu16(__m128i *src, __m128i *dst) { + const __m128i zero = _mm_setzero_si128(); + const __m128i shift_right = _mm_srli_si128(*src, 2); + + const __m128i odd = _mm_blend_epi16(shift_right, zero, 170); + const __m128i even = _mm_blend_epi16(*src, zero, 170); + + *dst = _mm_add_epi32(even, odd); +} + +// Add a row of luma distortion to 8 corresponding chroma mods. +static INLINE void add_luma_dist_to_8_chroma_mod(const uint16_t *y_dist, + int ss_x, int ss_y, + __m128i *u_mod, + __m128i *v_mod) { + __m128i y_reg; + if (!ss_x) { + read_dist_8(y_dist, &y_reg); + if (ss_y == 1) { + __m128i y_tmp; + read_dist_8(y_dist + DIST_STRIDE, &y_tmp); + + y_reg = _mm_adds_epu16(y_reg, y_tmp); + } + } else { + __m128i y_first, y_second; + read_dist_16(y_dist, &y_first, &y_second); + if (ss_y == 1) { + __m128i y_tmp_0, y_tmp_1; + read_dist_16(y_dist + DIST_STRIDE, &y_tmp_0, &y_tmp_1); + + y_first = _mm_adds_epu16(y_first, y_tmp_0); + y_second = _mm_adds_epu16(y_second, y_tmp_1); + } + + hadd_epu16(&y_first, &y_first); + hadd_epu16(&y_second, &y_second); + + y_reg = _mm_packus_epi32(y_first, y_second); + } + + *u_mod = _mm_adds_epu16(*u_mod, y_reg); + *v_mod = _mm_adds_epu16(*v_mod, y_reg); +} + +// Apply temporal filter to the luma components. This performs temporal +// filtering on a luma block of 16 X block_height. Use blk_fw as an array of +// size 4 for the weights for each of the 4 subblocks if blk_fw is not NULL, +// else use top_weight for top half, and bottom weight for bottom half. +static void vp9_apply_temporal_filter_luma_16( + const uint8_t *y_src, int y_src_stride, const uint8_t *y_pre, + int y_pre_stride, const uint8_t *u_src, const uint8_t *v_src, + int uv_src_stride, const uint8_t *u_pre, const uint8_t *v_pre, + int uv_pre_stride, unsigned int block_width, unsigned int block_height, + int ss_x, int ss_y, int strength, int use_whole_blk, uint32_t *y_accum, + uint16_t *y_count, const uint16_t *y_dist, const uint16_t *u_dist, + const uint16_t *v_dist, const int16_t *const *neighbors_first, + const int16_t *const *neighbors_second, int top_weight, int bottom_weight, + const int *blk_fw) { const int rounding = (1 << strength) >> 1; + int weight = top_weight; + + __m128i mul_first, mul_second; + + __m128i sum_row_1_first, sum_row_1_second; + __m128i sum_row_2_first, sum_row_2_second; + __m128i sum_row_3_first, sum_row_3_second; + + __m128i u_first, u_second; + __m128i v_first, v_second; + + __m128i sum_row_first; + __m128i sum_row_second; + + // Loop variables + unsigned int h; assert(strength >= 0); assert(strength <= 6); - assert(weight >= 0); - assert(weight <= 2); - - assert(width == 8 || width == 16); - - if (width == 8) { - __m128i sum_row_a, sum_row_b, sum_row_c; - __m128i mul_constants = _mm_setr_epi16( - NEIGHBOR_CONSTANT_4, NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, - NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, - NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_4); - - sum_8(a, b, &sum_row_a); - sum_8(a + stride, b + width, &sum_row_b); - sum_row_c = _mm_adds_epu16(sum_row_a, sum_row_b); - sum_row_c = average_8(sum_row_c, mul_constants, strength, rounding, weight); - accumulate_and_store_8(sum_row_c, b, count, accumulator); - - a += stride + stride; - b += width; - count += width; - accumulator += width; - - mul_constants = _mm_setr_epi16(NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_9, - NEIGHBOR_CONSTANT_9, NEIGHBOR_CONSTANT_9, - NEIGHBOR_CONSTANT_9, NEIGHBOR_CONSTANT_9, - NEIGHBOR_CONSTANT_9, NEIGHBOR_CONSTANT_6); - - for (h = 0; h < height - 2; ++h) { - sum_8(a, b + width, &sum_row_c); - sum_row_a = _mm_adds_epu16(sum_row_a, sum_row_b); - sum_row_a = _mm_adds_epu16(sum_row_a, sum_row_c); - sum_row_a = - average_8(sum_row_a, mul_constants, strength, rounding, weight); - accumulate_and_store_8(sum_row_a, b, count, accumulator); - - a += stride; - b += width; - count += width; - accumulator += width; - - sum_row_a = sum_row_b; - sum_row_b = sum_row_c; - } + assert(block_width == 16); + + (void)block_width; + + // First row + mul_first = _mm_loadu_si128((const __m128i *)neighbors_first[0]); + mul_second = _mm_loadu_si128((const __m128i *)neighbors_second[0]); + + // Add luma values + get_sum_16(y_dist, &sum_row_2_first, &sum_row_2_second); + get_sum_16(y_dist + DIST_STRIDE, &sum_row_3_first, &sum_row_3_second); - mul_constants = _mm_setr_epi16(NEIGHBOR_CONSTANT_4, NEIGHBOR_CONSTANT_6, - NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, - NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, - NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_4); - sum_row_a = _mm_adds_epu16(sum_row_a, sum_row_b); - sum_row_a = average_8(sum_row_a, mul_constants, strength, rounding, weight); - accumulate_and_store_8(sum_row_a, b, count, accumulator); - - } else { // width == 16 - __m128i sum_row_a_0, sum_row_a_1; - __m128i sum_row_b_0, sum_row_b_1; - __m128i sum_row_c_0, sum_row_c_1; - __m128i mul_constants_0 = _mm_setr_epi16( - NEIGHBOR_CONSTANT_4, NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, - NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, - NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6), - mul_constants_1 = _mm_setr_epi16( - NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, - NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, - NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_4); - - sum_16(a, b, &sum_row_a_0, &sum_row_a_1); - sum_16(a + stride, b + width, &sum_row_b_0, &sum_row_b_1); - - sum_row_c_0 = _mm_adds_epu16(sum_row_a_0, sum_row_b_0); - sum_row_c_1 = _mm_adds_epu16(sum_row_a_1, sum_row_b_1); - - average_16(&sum_row_c_0, &sum_row_c_1, mul_constants_0, mul_constants_1, + sum_row_first = _mm_adds_epu16(sum_row_2_first, sum_row_3_first); + sum_row_second = _mm_adds_epu16(sum_row_2_second, sum_row_3_second); + + // Add chroma values + read_chroma_dist_row_16(ss_x, u_dist, v_dist, &u_first, &u_second, &v_first, + &v_second); + + sum_row_first = _mm_adds_epu16(sum_row_first, u_first); + sum_row_second = _mm_adds_epu16(sum_row_second, u_second); + + sum_row_first = _mm_adds_epu16(sum_row_first, v_first); + sum_row_second = _mm_adds_epu16(sum_row_second, v_second); + + // Get modifier and store result + if (blk_fw) { + sum_row_first = + average_8(sum_row_first, &mul_first, strength, rounding, blk_fw[0]); + sum_row_second = + average_8(sum_row_second, &mul_second, strength, rounding, blk_fw[1]); + } else { + average_16(&sum_row_first, &sum_row_second, &mul_first, &mul_second, strength, rounding, weight); - accumulate_and_store_16(sum_row_c_0, sum_row_c_1, b, count, accumulator); - - a += stride + stride; - b += width; - count += width; - accumulator += width; - - mul_constants_0 = _mm_setr_epi16(NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_9, - NEIGHBOR_CONSTANT_9, NEIGHBOR_CONSTANT_9, - NEIGHBOR_CONSTANT_9, NEIGHBOR_CONSTANT_9, - NEIGHBOR_CONSTANT_9, NEIGHBOR_CONSTANT_9); - mul_constants_1 = _mm_setr_epi16(NEIGHBOR_CONSTANT_9, NEIGHBOR_CONSTANT_9, - NEIGHBOR_CONSTANT_9, NEIGHBOR_CONSTANT_9, - NEIGHBOR_CONSTANT_9, NEIGHBOR_CONSTANT_9, - NEIGHBOR_CONSTANT_9, NEIGHBOR_CONSTANT_6); - for (h = 0; h < height - 2; ++h) { - sum_16(a, b + width, &sum_row_c_0, &sum_row_c_1); - - sum_row_a_0 = _mm_adds_epu16(sum_row_a_0, sum_row_b_0); - sum_row_a_0 = _mm_adds_epu16(sum_row_a_0, sum_row_c_0); - sum_row_a_1 = _mm_adds_epu16(sum_row_a_1, sum_row_b_1); - sum_row_a_1 = _mm_adds_epu16(sum_row_a_1, sum_row_c_1); - - average_16(&sum_row_a_0, &sum_row_a_1, mul_constants_0, mul_constants_1, + } + accumulate_and_store_16(sum_row_first, sum_row_second, y_pre, y_count, + y_accum); + + y_src += y_src_stride; + y_pre += y_pre_stride; + y_count += y_pre_stride; + y_accum += y_pre_stride; + y_dist += DIST_STRIDE; + + u_src += uv_src_stride; + u_pre += uv_pre_stride; + u_dist += DIST_STRIDE; + v_src += uv_src_stride; + v_pre += uv_pre_stride; + v_dist += DIST_STRIDE; + + // Then all the rows except the last one + mul_first = _mm_loadu_si128((const __m128i *)neighbors_first[1]); + mul_second = _mm_loadu_si128((const __m128i *)neighbors_second[1]); + + for (h = 1; h < block_height - 1; ++h) { + // Move the weight to bottom half + if (!use_whole_blk && h == block_height / 2) { + if (blk_fw) { + blk_fw += 2; + } else { + weight = bottom_weight; + } + } + // Shift the rows up + sum_row_1_first = sum_row_2_first; + sum_row_1_second = sum_row_2_second; + sum_row_2_first = sum_row_3_first; + sum_row_2_second = sum_row_3_second; + + // Add luma values to the modifier + sum_row_first = _mm_adds_epu16(sum_row_1_first, sum_row_2_first); + sum_row_second = _mm_adds_epu16(sum_row_1_second, sum_row_2_second); + + get_sum_16(y_dist + DIST_STRIDE, &sum_row_3_first, &sum_row_3_second); + + sum_row_first = _mm_adds_epu16(sum_row_first, sum_row_3_first); + sum_row_second = _mm_adds_epu16(sum_row_second, sum_row_3_second); + + // Add chroma values to the modifier + if (ss_y == 0 || h % 2 == 0) { + // Only calculate the new chroma distortion if we are at a pixel that + // corresponds to a new chroma row + read_chroma_dist_row_16(ss_x, u_dist, v_dist, &u_first, &u_second, + &v_first, &v_second); + + u_src += uv_src_stride; + u_pre += uv_pre_stride; + u_dist += DIST_STRIDE; + v_src += uv_src_stride; + v_pre += uv_pre_stride; + v_dist += DIST_STRIDE; + } + + sum_row_first = _mm_adds_epu16(sum_row_first, u_first); + sum_row_second = _mm_adds_epu16(sum_row_second, u_second); + sum_row_first = _mm_adds_epu16(sum_row_first, v_first); + sum_row_second = _mm_adds_epu16(sum_row_second, v_second); + + // Get modifier and store result + if (blk_fw) { + sum_row_first = + average_8(sum_row_first, &mul_first, strength, rounding, blk_fw[0]); + sum_row_second = + average_8(sum_row_second, &mul_second, strength, rounding, blk_fw[1]); + } else { + average_16(&sum_row_first, &sum_row_second, &mul_first, &mul_second, strength, rounding, weight); - accumulate_and_store_16(sum_row_a_0, sum_row_a_1, b, count, accumulator); + } + accumulate_and_store_16(sum_row_first, sum_row_second, y_pre, y_count, + y_accum); + + y_src += y_src_stride; + y_pre += y_pre_stride; + y_count += y_pre_stride; + y_accum += y_pre_stride; + y_dist += DIST_STRIDE; + } + + // The last row + mul_first = _mm_loadu_si128((const __m128i *)neighbors_first[0]); + mul_second = _mm_loadu_si128((const __m128i *)neighbors_second[0]); + + // Shift the rows up + sum_row_1_first = sum_row_2_first; + sum_row_1_second = sum_row_2_second; + sum_row_2_first = sum_row_3_first; + sum_row_2_second = sum_row_3_second; + + // Add luma values to the modifier + sum_row_first = _mm_adds_epu16(sum_row_1_first, sum_row_2_first); + sum_row_second = _mm_adds_epu16(sum_row_1_second, sum_row_2_second); + + // Add chroma values to the modifier + if (ss_y == 0) { + // Only calculate the new chroma distortion if we are at a pixel that + // corresponds to a new chroma row + read_chroma_dist_row_16(ss_x, u_dist, v_dist, &u_first, &u_second, &v_first, + &v_second); + } - a += stride; - b += width; - count += width; - accumulator += width; + sum_row_first = _mm_adds_epu16(sum_row_first, u_first); + sum_row_second = _mm_adds_epu16(sum_row_second, u_second); + sum_row_first = _mm_adds_epu16(sum_row_first, v_first); + sum_row_second = _mm_adds_epu16(sum_row_second, v_second); + + // Get modifier and store result + if (blk_fw) { + sum_row_first = + average_8(sum_row_first, &mul_first, strength, rounding, blk_fw[0]); + sum_row_second = + average_8(sum_row_second, &mul_second, strength, rounding, blk_fw[1]); + } else { + average_16(&sum_row_first, &sum_row_second, &mul_first, &mul_second, + strength, rounding, weight); + } + accumulate_and_store_16(sum_row_first, sum_row_second, y_pre, y_count, + y_accum); +} - sum_row_a_0 = sum_row_b_0; - sum_row_a_1 = sum_row_b_1; - sum_row_b_0 = sum_row_c_0; - sum_row_b_1 = sum_row_c_1; +// Perform temporal filter for the luma component. +static void vp9_apply_temporal_filter_luma( + const uint8_t *y_src, int y_src_stride, const uint8_t *y_pre, + int y_pre_stride, const uint8_t *u_src, const uint8_t *v_src, + int uv_src_stride, const uint8_t *u_pre, const uint8_t *v_pre, + int uv_pre_stride, unsigned int block_width, unsigned int block_height, + int ss_x, int ss_y, int strength, const int *blk_fw, int use_whole_blk, + uint32_t *y_accum, uint16_t *y_count, const uint16_t *y_dist, + const uint16_t *u_dist, const uint16_t *v_dist) { + unsigned int blk_col = 0, uv_blk_col = 0; + const unsigned int blk_col_step = 16, uv_blk_col_step = 16 >> ss_x; + const unsigned int mid_width = block_width >> 1, + last_width = block_width - blk_col_step; + int top_weight = blk_fw[0], + bottom_weight = use_whole_blk ? blk_fw[0] : blk_fw[2]; + const int16_t *const *neighbors_first; + const int16_t *const *neighbors_second; + + if (block_width == 16) { + // Special Case: The blockwidth is 16 and we are operating on a row of 16 + // chroma pixels. In this case, we can't use the usualy left-midle-right + // pattern. We also don't support splitting now. + neighbors_first = LUMA_LEFT_COLUMN_NEIGHBORS; + neighbors_second = LUMA_RIGHT_COLUMN_NEIGHBORS; + if (use_whole_blk) { + vp9_apply_temporal_filter_luma_16( + y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, + u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, + u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, 16, + block_height, ss_x, ss_y, strength, use_whole_blk, y_accum + blk_col, + y_count + blk_col, y_dist + blk_col, u_dist + uv_blk_col, + v_dist + uv_blk_col, neighbors_first, neighbors_second, top_weight, + bottom_weight, NULL); + } else { + vp9_apply_temporal_filter_luma_16( + y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, + u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, + u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, 16, + block_height, ss_x, ss_y, strength, use_whole_blk, y_accum + blk_col, + y_count + blk_col, y_dist + blk_col, u_dist + uv_blk_col, + v_dist + uv_blk_col, neighbors_first, neighbors_second, 0, 0, blk_fw); } - mul_constants_0 = _mm_setr_epi16(NEIGHBOR_CONSTANT_4, NEIGHBOR_CONSTANT_6, - NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, - NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, - NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6); - mul_constants_1 = _mm_setr_epi16(NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, - NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, - NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_6, - NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_4); - sum_row_c_0 = _mm_adds_epu16(sum_row_a_0, sum_row_b_0); - sum_row_c_1 = _mm_adds_epu16(sum_row_a_1, sum_row_b_1); - - average_16(&sum_row_c_0, &sum_row_c_1, mul_constants_0, mul_constants_1, - strength, rounding, weight); - accumulate_and_store_16(sum_row_c_0, sum_row_c_1, b, count, accumulator); + return; } + + // Left + neighbors_first = LUMA_LEFT_COLUMN_NEIGHBORS; + neighbors_second = LUMA_MIDDLE_COLUMN_NEIGHBORS; + vp9_apply_temporal_filter_luma_16( + y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, + u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col, + v_pre + uv_blk_col, uv_pre_stride, 16, block_height, ss_x, ss_y, strength, + use_whole_blk, y_accum + blk_col, y_count + blk_col, y_dist + blk_col, + u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors_first, + neighbors_second, top_weight, bottom_weight, NULL); + + blk_col += blk_col_step; + uv_blk_col += uv_blk_col_step; + + // Middle First + neighbors_first = LUMA_MIDDLE_COLUMN_NEIGHBORS; + for (; blk_col < mid_width; + blk_col += blk_col_step, uv_blk_col += uv_blk_col_step) { + vp9_apply_temporal_filter_luma_16( + y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, + u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, + u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, 16, block_height, + ss_x, ss_y, strength, use_whole_blk, y_accum + blk_col, + y_count + blk_col, y_dist + blk_col, u_dist + uv_blk_col, + v_dist + uv_blk_col, neighbors_first, neighbors_second, top_weight, + bottom_weight, NULL); + } + + if (!use_whole_blk) { + top_weight = blk_fw[1]; + bottom_weight = blk_fw[3]; + } + + // Middle Second + for (; blk_col < last_width; + blk_col += blk_col_step, uv_blk_col += uv_blk_col_step) { + vp9_apply_temporal_filter_luma_16( + y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, + u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, + u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, 16, block_height, + ss_x, ss_y, strength, use_whole_blk, y_accum + blk_col, + y_count + blk_col, y_dist + blk_col, u_dist + uv_blk_col, + v_dist + uv_blk_col, neighbors_first, neighbors_second, top_weight, + bottom_weight, NULL); + } + + // Right + neighbors_second = LUMA_RIGHT_COLUMN_NEIGHBORS; + vp9_apply_temporal_filter_luma_16( + y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, + u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col, + v_pre + uv_blk_col, uv_pre_stride, 16, block_height, ss_x, ss_y, strength, + use_whole_blk, y_accum + blk_col, y_count + blk_col, y_dist + blk_col, + u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors_first, + neighbors_second, top_weight, bottom_weight, NULL); +} + +// Apply temporal filter to the chroma components. This performs temporal +// filtering on a chroma block of 8 X uv_height. If blk_fw is not NULL, use +// blk_fw as an array of size 4 for the weights for each of the 4 subblocks, +// else use top_weight for top half, and bottom weight for bottom half. +static void vp9_apply_temporal_filter_chroma_8( + const uint8_t *y_src, int y_src_stride, const uint8_t *y_pre, + int y_pre_stride, const uint8_t *u_src, const uint8_t *v_src, + int uv_src_stride, const uint8_t *u_pre, const uint8_t *v_pre, + int uv_pre_stride, unsigned int uv_block_width, + unsigned int uv_block_height, int ss_x, int ss_y, int strength, + uint32_t *u_accum, uint16_t *u_count, uint32_t *v_accum, uint16_t *v_count, + const uint16_t *y_dist, const uint16_t *u_dist, const uint16_t *v_dist, + const int16_t *const *neighbors, int top_weight, int bottom_weight, + const int *blk_fw) { + const int rounding = (1 << strength) >> 1; + int weight = top_weight; + + __m128i mul; + + __m128i u_sum_row_1, u_sum_row_2, u_sum_row_3; + __m128i v_sum_row_1, v_sum_row_2, v_sum_row_3; + + __m128i u_sum_row, v_sum_row; + + // Loop variable + unsigned int h; + + (void)uv_block_width; + + // First row + mul = _mm_loadu_si128((const __m128i *)neighbors[0]); + + // Add chroma values + get_sum_8(u_dist, &u_sum_row_2); + get_sum_8(u_dist + DIST_STRIDE, &u_sum_row_3); + + u_sum_row = _mm_adds_epu16(u_sum_row_2, u_sum_row_3); + + get_sum_8(v_dist, &v_sum_row_2); + get_sum_8(v_dist + DIST_STRIDE, &v_sum_row_3); + + v_sum_row = _mm_adds_epu16(v_sum_row_2, v_sum_row_3); + + // Add luma values + add_luma_dist_to_8_chroma_mod(y_dist, ss_x, ss_y, &u_sum_row, &v_sum_row); + + // Get modifier and store result + if (blk_fw) { + u_sum_row = + average_4_4(u_sum_row, &mul, strength, rounding, blk_fw[0], blk_fw[1]); + v_sum_row = + average_4_4(v_sum_row, &mul, strength, rounding, blk_fw[0], blk_fw[1]); + } else { + u_sum_row = average_8(u_sum_row, &mul, strength, rounding, weight); + v_sum_row = average_8(v_sum_row, &mul, strength, rounding, weight); + } + accumulate_and_store_8(u_sum_row, u_pre, u_count, u_accum); + accumulate_and_store_8(v_sum_row, v_pre, v_count, v_accum); + + u_src += uv_src_stride; + u_pre += uv_pre_stride; + u_dist += DIST_STRIDE; + v_src += uv_src_stride; + v_pre += uv_pre_stride; + v_dist += DIST_STRIDE; + u_count += uv_pre_stride; + u_accum += uv_pre_stride; + v_count += uv_pre_stride; + v_accum += uv_pre_stride; + + y_src += y_src_stride * (1 + ss_y); + y_pre += y_pre_stride * (1 + ss_y); + y_dist += DIST_STRIDE * (1 + ss_y); + + // Then all the rows except the last one + mul = _mm_loadu_si128((const __m128i *)neighbors[1]); + + for (h = 1; h < uv_block_height - 1; ++h) { + // Move the weight pointer to the bottom half of the blocks + if (h == uv_block_height / 2) { + if (blk_fw) { + blk_fw += 2; + } else { + weight = bottom_weight; + } + } + + // Shift the rows up + u_sum_row_1 = u_sum_row_2; + u_sum_row_2 = u_sum_row_3; + + v_sum_row_1 = v_sum_row_2; + v_sum_row_2 = v_sum_row_3; + + // Add chroma values + u_sum_row = _mm_adds_epu16(u_sum_row_1, u_sum_row_2); + get_sum_8(u_dist + DIST_STRIDE, &u_sum_row_3); + u_sum_row = _mm_adds_epu16(u_sum_row, u_sum_row_3); + + v_sum_row = _mm_adds_epu16(v_sum_row_1, v_sum_row_2); + get_sum_8(v_dist + DIST_STRIDE, &v_sum_row_3); + v_sum_row = _mm_adds_epu16(v_sum_row, v_sum_row_3); + + // Add luma values + add_luma_dist_to_8_chroma_mod(y_dist, ss_x, ss_y, &u_sum_row, &v_sum_row); + + // Get modifier and store result + if (blk_fw) { + u_sum_row = average_4_4(u_sum_row, &mul, strength, rounding, blk_fw[0], + blk_fw[1]); + v_sum_row = average_4_4(v_sum_row, &mul, strength, rounding, blk_fw[0], + blk_fw[1]); + } else { + u_sum_row = average_8(u_sum_row, &mul, strength, rounding, weight); + v_sum_row = average_8(v_sum_row, &mul, strength, rounding, weight); + } + + accumulate_and_store_8(u_sum_row, u_pre, u_count, u_accum); + accumulate_and_store_8(v_sum_row, v_pre, v_count, v_accum); + + u_src += uv_src_stride; + u_pre += uv_pre_stride; + u_dist += DIST_STRIDE; + v_src += uv_src_stride; + v_pre += uv_pre_stride; + v_dist += DIST_STRIDE; + u_count += uv_pre_stride; + u_accum += uv_pre_stride; + v_count += uv_pre_stride; + v_accum += uv_pre_stride; + + y_src += y_src_stride * (1 + ss_y); + y_pre += y_pre_stride * (1 + ss_y); + y_dist += DIST_STRIDE * (1 + ss_y); + } + + // The last row + mul = _mm_loadu_si128((const __m128i *)neighbors[0]); + + // Shift the rows up + u_sum_row_1 = u_sum_row_2; + u_sum_row_2 = u_sum_row_3; + + v_sum_row_1 = v_sum_row_2; + v_sum_row_2 = v_sum_row_3; + + // Add chroma values + u_sum_row = _mm_adds_epu16(u_sum_row_1, u_sum_row_2); + v_sum_row = _mm_adds_epu16(v_sum_row_1, v_sum_row_2); + + // Add luma values + add_luma_dist_to_8_chroma_mod(y_dist, ss_x, ss_y, &u_sum_row, &v_sum_row); + + // Get modifier and store result + if (blk_fw) { + u_sum_row = + average_4_4(u_sum_row, &mul, strength, rounding, blk_fw[0], blk_fw[1]); + v_sum_row = + average_4_4(v_sum_row, &mul, strength, rounding, blk_fw[0], blk_fw[1]); + } else { + u_sum_row = average_8(u_sum_row, &mul, strength, rounding, weight); + v_sum_row = average_8(v_sum_row, &mul, strength, rounding, weight); + } + + accumulate_and_store_8(u_sum_row, u_pre, u_count, u_accum); + accumulate_and_store_8(v_sum_row, v_pre, v_count, v_accum); +} + +// Perform temporal filter for the chroma components. +static void vp9_apply_temporal_filter_chroma( + const uint8_t *y_src, int y_src_stride, const uint8_t *y_pre, + int y_pre_stride, const uint8_t *u_src, const uint8_t *v_src, + int uv_src_stride, const uint8_t *u_pre, const uint8_t *v_pre, + int uv_pre_stride, unsigned int block_width, unsigned int block_height, + int ss_x, int ss_y, int strength, const int *blk_fw, int use_whole_blk, + uint32_t *u_accum, uint16_t *u_count, uint32_t *v_accum, uint16_t *v_count, + const uint16_t *y_dist, const uint16_t *u_dist, const uint16_t *v_dist) { + const unsigned int uv_width = block_width >> ss_x, + uv_height = block_height >> ss_y; + + unsigned int blk_col = 0, uv_blk_col = 0; + const unsigned int uv_blk_col_step = 8, blk_col_step = 8 << ss_x; + const unsigned int uv_mid_width = uv_width >> 1, + uv_last_width = uv_width - uv_blk_col_step; + int top_weight = blk_fw[0], + bottom_weight = use_whole_blk ? blk_fw[0] : blk_fw[2]; + const int16_t *const *neighbors; + + if (uv_width == 8) { + // Special Case: We are subsampling in x direction on a 16x16 block. Since + // we are operating on a row of 8 chroma pixels, we can't use the usual + // left-middle-right pattern. + assert(ss_x); + + if (ss_y) { + neighbors = CHROMA_DOUBLE_SS_SINGLE_COLUMN_NEIGHBORS; + } else { + neighbors = CHROMA_SINGLE_SS_SINGLE_COLUMN_NEIGHBORS; + } + + if (use_whole_blk) { + vp9_apply_temporal_filter_chroma_8( + y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, + u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, + u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, uv_width, + uv_height, ss_x, ss_y, strength, u_accum + uv_blk_col, + u_count + uv_blk_col, v_accum + uv_blk_col, v_count + uv_blk_col, + y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors, + top_weight, bottom_weight, NULL); + } else { + vp9_apply_temporal_filter_chroma_8( + y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, + u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, + u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, uv_width, + uv_height, ss_x, ss_y, strength, u_accum + uv_blk_col, + u_count + uv_blk_col, v_accum + uv_blk_col, v_count + uv_blk_col, + y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors, + 0, 0, blk_fw); + } + + return; + } + + // Left + if (ss_x && ss_y) { + neighbors = CHROMA_DOUBLE_SS_LEFT_COLUMN_NEIGHBORS; + } else if (ss_x || ss_y) { + neighbors = CHROMA_SINGLE_SS_LEFT_COLUMN_NEIGHBORS; + } else { + neighbors = CHROMA_NO_SS_LEFT_COLUMN_NEIGHBORS; + } + + vp9_apply_temporal_filter_chroma_8( + y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, + u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col, + v_pre + uv_blk_col, uv_pre_stride, uv_width, uv_height, ss_x, ss_y, + strength, u_accum + uv_blk_col, u_count + uv_blk_col, + v_accum + uv_blk_col, v_count + uv_blk_col, y_dist + blk_col, + u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors, top_weight, + bottom_weight, NULL); + + blk_col += blk_col_step; + uv_blk_col += uv_blk_col_step; + + // Middle First + if (ss_x && ss_y) { + neighbors = CHROMA_DOUBLE_SS_MIDDLE_COLUMN_NEIGHBORS; + } else if (ss_x || ss_y) { + neighbors = CHROMA_SINGLE_SS_MIDDLE_COLUMN_NEIGHBORS; + } else { + neighbors = CHROMA_NO_SS_MIDDLE_COLUMN_NEIGHBORS; + } + + for (; uv_blk_col < uv_mid_width; + blk_col += blk_col_step, uv_blk_col += uv_blk_col_step) { + vp9_apply_temporal_filter_chroma_8( + y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, + u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, + u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, uv_width, + uv_height, ss_x, ss_y, strength, u_accum + uv_blk_col, + u_count + uv_blk_col, v_accum + uv_blk_col, v_count + uv_blk_col, + y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors, + top_weight, bottom_weight, NULL); + } + + if (!use_whole_blk) { + top_weight = blk_fw[1]; + bottom_weight = blk_fw[3]; + } + + // Middle Second + for (; uv_blk_col < uv_last_width; + blk_col += blk_col_step, uv_blk_col += uv_blk_col_step) { + vp9_apply_temporal_filter_chroma_8( + y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, + u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, + u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, uv_width, + uv_height, ss_x, ss_y, strength, u_accum + uv_blk_col, + u_count + uv_blk_col, v_accum + uv_blk_col, v_count + uv_blk_col, + y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors, + top_weight, bottom_weight, NULL); + } + + // Right + if (ss_x && ss_y) { + neighbors = CHROMA_DOUBLE_SS_RIGHT_COLUMN_NEIGHBORS; + } else if (ss_x || ss_y) { + neighbors = CHROMA_SINGLE_SS_RIGHT_COLUMN_NEIGHBORS; + } else { + neighbors = CHROMA_NO_SS_RIGHT_COLUMN_NEIGHBORS; + } + + vp9_apply_temporal_filter_chroma_8( + y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, + u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col, + v_pre + uv_blk_col, uv_pre_stride, uv_width, uv_height, ss_x, ss_y, + strength, u_accum + uv_blk_col, u_count + uv_blk_col, + v_accum + uv_blk_col, v_count + uv_blk_col, y_dist + blk_col, + u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors, top_weight, + bottom_weight, NULL); +} + +void vp9_apply_temporal_filter_sse4_1( + const uint8_t *y_src, int y_src_stride, const uint8_t *y_pre, + int y_pre_stride, const uint8_t *u_src, const uint8_t *v_src, + int uv_src_stride, const uint8_t *u_pre, const uint8_t *v_pre, + int uv_pre_stride, unsigned int block_width, unsigned int block_height, + int ss_x, int ss_y, int strength, const int *const blk_fw, + int use_whole_blk, uint32_t *y_accum, uint16_t *y_count, uint32_t *u_accum, + uint16_t *u_count, uint32_t *v_accum, uint16_t *v_count) { + const unsigned int chroma_height = block_height >> ss_y, + chroma_width = block_width >> ss_x; + + DECLARE_ALIGNED(16, uint16_t, y_dist[BH * DIST_STRIDE]) = { 0 }; + DECLARE_ALIGNED(16, uint16_t, u_dist[BH * DIST_STRIDE]) = { 0 }; + DECLARE_ALIGNED(16, uint16_t, v_dist[BH * DIST_STRIDE]) = { 0 }; + const int *blk_fw_ptr = blk_fw; + + uint16_t *y_dist_ptr = y_dist + 1, *u_dist_ptr = u_dist + 1, + *v_dist_ptr = v_dist + 1; + const uint8_t *y_src_ptr = y_src, *u_src_ptr = u_src, *v_src_ptr = v_src; + const uint8_t *y_pre_ptr = y_pre, *u_pre_ptr = u_pre, *v_pre_ptr = v_pre; + + // Loop variables + unsigned int row, blk_col; + + assert(block_width <= BW && "block width too large"); + assert(block_height <= BH && "block height too large"); + assert(block_width % 16 == 0 && "block width must be multiple of 16"); + assert(block_height % 2 == 0 && "block height must be even"); + assert((ss_x == 0 || ss_x == 1) && (ss_y == 0 || ss_y == 1) && + "invalid chroma subsampling"); + assert(strength >= 0 && strength <= 6 && "invalid temporal filter strength"); + assert(blk_fw[0] >= 0 && "filter weight must be positive"); + assert( + (use_whole_blk || (blk_fw[1] >= 0 && blk_fw[2] >= 0 && blk_fw[3] >= 0)) && + "subblock filter weight must be positive"); + assert(blk_fw[0] <= 2 && "sublock filter weight must be less than 2"); + assert( + (use_whole_blk || (blk_fw[1] <= 2 && blk_fw[2] <= 2 && blk_fw[3] <= 2)) && + "subblock filter weight must be less than 2"); + + // Precompute the difference sqaured + for (row = 0; row < block_height; row++) { + for (blk_col = 0; blk_col < block_width; blk_col += 16) { + store_dist_16(y_src_ptr + blk_col, y_pre_ptr + blk_col, + y_dist_ptr + blk_col); + } + y_src_ptr += y_src_stride; + y_pre_ptr += y_pre_stride; + y_dist_ptr += DIST_STRIDE; + } + + for (row = 0; row < chroma_height; row++) { + for (blk_col = 0; blk_col < chroma_width; blk_col += 8) { + store_dist_8(u_src_ptr + blk_col, u_pre_ptr + blk_col, + u_dist_ptr + blk_col); + store_dist_8(v_src_ptr + blk_col, v_pre_ptr + blk_col, + v_dist_ptr + blk_col); + } + + u_src_ptr += uv_src_stride; + u_pre_ptr += uv_pre_stride; + u_dist_ptr += DIST_STRIDE; + v_src_ptr += uv_src_stride; + v_pre_ptr += uv_pre_stride; + v_dist_ptr += DIST_STRIDE; + } + + y_dist_ptr = y_dist + 1; + u_dist_ptr = u_dist + 1; + v_dist_ptr = v_dist + 1; + + vp9_apply_temporal_filter_luma( + y_src, y_src_stride, y_pre, y_pre_stride, u_src, v_src, uv_src_stride, + u_pre, v_pre, uv_pre_stride, block_width, block_height, ss_x, ss_y, + strength, blk_fw_ptr, use_whole_blk, y_accum, y_count, y_dist_ptr, + u_dist_ptr, v_dist_ptr); + + vp9_apply_temporal_filter_chroma( + y_src, y_src_stride, y_pre, y_pre_stride, u_src, v_src, uv_src_stride, + u_pre, v_pre, uv_pre_stride, block_width, block_height, ss_x, ss_y, + strength, blk_fw_ptr, use_whole_blk, u_accum, u_count, v_accum, v_count, + y_dist_ptr, u_dist_ptr, v_dist_ptr); } diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_common.mk b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_common.mk index 7ca4004b0e4..c9a55669e16 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_common.mk +++ b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_common.mk @@ -64,9 +64,12 @@ VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.c VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.h VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.c +ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes) VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct4x4_msa.c VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct8x8_msa.c VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct16x16_msa.c +endif # !CONFIG_VP9_HIGHBITDEPTH + VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c VP9_COMMON_SRCS-$(HAVE_VSX) += common/ppc/vp9_idct_vsx.c VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht4x4_add_neon.c diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/vp9cx.mk b/chromium/third_party/libvpx/source/libvpx/vp9/vp9cx.mk index 05981d6899a..67e5389a725 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/vp9cx.mk +++ b/chromium/third_party/libvpx/source/libvpx/vp9/vp9cx.mk @@ -103,6 +103,7 @@ VP9_CX_SRCS-yes += encoder/vp9_mbgraph.c VP9_CX_SRCS-yes += encoder/vp9_mbgraph.h VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/temporal_filter_sse4.c +VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/temporal_filter_constants.h VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_quantize_sse2.c VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_quantize_avx2.c @@ -137,10 +138,13 @@ VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_frame_scale_neon.c VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_quantize_neon.c VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_error_msa.c + +ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes) VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct4x4_msa.c VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct8x8_msa.c VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct16x16_msa.c VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct_msa.h +endif # !CONFIG_VP9_HIGHBITDEPTH VP9_CX_SRCS-$(HAVE_VSX) += encoder/ppc/vp9_quantize_vsx.c @@ -149,5 +153,6 @@ VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/vp9_firstpass.c VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/vp9_mbgraph.c VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/vp9_temporal_filter.c VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/x86/temporal_filter_sse4.c +VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/x86/temporal_filter_constants.h VP9_CX_SRCS-yes := $(filter-out $(VP9_CX_SRCS_REMOVE-yes),$(VP9_CX_SRCS-yes)) diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/vp9dx.mk b/chromium/third_party/libvpx/source/libvpx/vp9/vp9dx.mk index 59f612b94c9..93a5f368bdf 100644 --- a/chromium/third_party/libvpx/source/libvpx/vp9/vp9dx.mk +++ b/chromium/third_party/libvpx/source/libvpx/vp9/vp9dx.mk @@ -28,5 +28,7 @@ VP9_DX_SRCS-yes += decoder/vp9_decoder.c VP9_DX_SRCS-yes += decoder/vp9_decoder.h VP9_DX_SRCS-yes += decoder/vp9_dsubexp.c VP9_DX_SRCS-yes += decoder/vp9_dsubexp.h +VP9_DX_SRCS-yes += decoder/vp9_job_queue.c +VP9_DX_SRCS-yes += decoder/vp9_job_queue.h VP9_DX_SRCS-yes := $(filter-out $(VP9_DX_SRCS_REMOVE-yes),$(VP9_DX_SRCS-yes)) diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/fastssim.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/fastssim.c index 0469071a176..6ab6f557e25 100644 --- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/fastssim.c +++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/fastssim.c @@ -128,10 +128,12 @@ static void fs_downsample_level(fs_ctx *_ctx, int _l) { int i1; i0 = 2 * i; i1 = FS_MINI(i0 + 1, w2); - dst1[j * w + i] = src1[j0offs + i0] + src1[j0offs + i1] + - src1[j1offs + i0] + src1[j1offs + i1]; - dst2[j * w + i] = src2[j0offs + i0] + src2[j0offs + i1] + - src2[j1offs + i0] + src2[j1offs + i1]; + dst1[j * w + i] = + (uint32_t)((int64_t)src1[j0offs + i0] + src1[j0offs + i1] + + src1[j1offs + i0] + src1[j1offs + i1]); + dst2[j * w + i] = + (uint32_t)((int64_t)src2[j0offs + i0] + src2[j0offs + i1] + + src2[j1offs + i0] + src2[j1offs + i1]); } } } @@ -220,12 +222,12 @@ static void fs_apply_luminance(fs_ctx *_ctx, int _l, int bit_depth) { ssim = _ctx->level[_l].ssim; c1 = (double)(ssim_c1 * 4096 * (1 << 4 * _l)); for (j = 0; j < h; j++) { - unsigned mux; - unsigned muy; + int64_t mux; + int64_t muy; int i0; int i1; - mux = 5 * col_sums_x[0]; - muy = 5 * col_sums_y[0]; + mux = (int64_t)5 * col_sums_x[0]; + muy = (int64_t)5 * col_sums_y[0]; for (i = 1; i < 4; i++) { i1 = FS_MINI(i, w - 1); mux += col_sums_x[i1]; @@ -237,8 +239,8 @@ static void fs_apply_luminance(fs_ctx *_ctx, int _l, int bit_depth) { if (i + 1 < w) { i0 = FS_MAXI(0, i - 4); i1 = FS_MINI(i + 4, w - 1); - mux += col_sums_x[i1] - col_sums_x[i0]; - muy += col_sums_x[i1] - col_sums_x[i0]; + mux += (int)col_sums_x[i1] - (int)col_sums_x[i0]; + muy += (int)col_sums_x[i1] - (int)col_sums_x[i0]; } } if (j + 1 < h) { @@ -246,8 +248,10 @@ static void fs_apply_luminance(fs_ctx *_ctx, int _l, int bit_depth) { for (i = 0; i < w; i++) col_sums_x[i] -= im1[j0offs + i]; for (i = 0; i < w; i++) col_sums_y[i] -= im2[j0offs + i]; j1offs = FS_MINI(j + 4, h - 1) * w; - for (i = 0; i < w; i++) col_sums_x[i] += im1[j1offs + i]; - for (i = 0; i < w; i++) col_sums_y[i] += im2[j1offs + i]; + for (i = 0; i < w; i++) + col_sums_x[i] = (uint32_t)((int64_t)col_sums_x[i] + im1[j1offs + i]); + for (i = 0; i < w; i++) + col_sums_y[i] = (uint32_t)((int64_t)col_sums_y[i] + im2[j1offs + i]); } } } @@ -343,18 +347,18 @@ static void fs_calc_structure(fs_ctx *_ctx, int _l, int bit_depth) { for (j = 0; j < h + 4; j++) { if (j < h - 1) { for (i = 0; i < w - 1; i++) { - unsigned g1; - unsigned g2; - unsigned gx; - unsigned gy; - g1 = abs((int)im1[(j + 1) * w + i + 1] - (int)im1[j * w + i]); - g2 = abs((int)im1[(j + 1) * w + i] - (int)im1[j * w + i + 1]); + int64_t g1; + int64_t g2; + int64_t gx; + int64_t gy; + g1 = labs((int64_t)im1[(j + 1) * w + i + 1] - (int64_t)im1[j * w + i]); + g2 = labs((int64_t)im1[(j + 1) * w + i] - (int64_t)im1[j * w + i + 1]); gx = 4 * FS_MAXI(g1, g2) + FS_MINI(g1, g2); - g1 = abs((int)im2[(j + 1) * w + i + 1] - (int)im2[j * w + i]); - g2 = abs((int)im2[(j + 1) * w + i] - (int)im2[j * w + i + 1]); - gy = 4 * FS_MAXI(g1, g2) + FS_MINI(g1, g2); - gx_buf[(j & 7) * stride + i + 4] = gx; - gy_buf[(j & 7) * stride + i + 4] = gy; + g1 = labs((int64_t)im2[(j + 1) * w + i + 1] - (int64_t)im2[j * w + i]); + g2 = labs((int64_t)im2[(j + 1) * w + i] - (int64_t)im2[j * w + i + 1]); + gy = ((int64_t)4 * FS_MAXI(g1, g2) + FS_MINI(g1, g2)); + gx_buf[(j & 7) * stride + i + 4] = (uint32_t)gx; + gy_buf[(j & 7) * stride + i + 4] = (uint32_t)gy; } } else { memset(gx_buf + (j & 7) * stride, 0, stride * sizeof(*gx_buf)); diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/fdct32x32_vsx.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/fdct32x32_vsx.c index 611071689b7..328b0e31301 100644 --- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/fdct32x32_vsx.c +++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/fdct32x32_vsx.c @@ -227,10 +227,11 @@ static void fdct32_vsx(const int16x8_t *in, int16x8_t *out, int pass) { int16x8_t temp0[32]; // Hold stages: 1, 4, 7 int16x8_t temp1[32]; // Hold stages: 2, 5 int16x8_t temp2[32]; // Hold stages: 3, 6 + int i; // Stage 1 // Unrolling this loops actually slows down Power9 benchmarks - for (int i = 0; i < 16; i++) { + for (i = 0; i < 16; i++) { temp0[i] = vec_add(in[i], in[31 - i]); // pass through to stage 3. temp1[i + 16] = vec_sub(in[15 - i], in[i + 16]); @@ -238,7 +239,7 @@ static void fdct32_vsx(const int16x8_t *in, int16x8_t *out, int pass) { // Stage 2 // Unrolling this loops actually slows down Power9 benchmarks - for (int i = 0; i < 8; i++) { + for (i = 0; i < 8; i++) { temp1[i] = vec_add(temp0[i], temp0[15 - i]); temp1[i + 8] = vec_sub(temp0[7 - i], temp0[i + 8]); } @@ -461,7 +462,7 @@ static void fdct32_vsx(const int16x8_t *in, int16x8_t *out, int pass) { &out[3]); if (pass == 0) { - for (int i = 0; i < 32; i++) { + for (i = 0; i < 32; i++) { out[i] = sub_round_shift(out[i]); } } diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp.mk b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp.mk index 87460bedf17..91ce96bb634 100644 --- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp.mk +++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp.mk @@ -218,7 +218,11 @@ DSP_SRCS-$(HAVE_NEON) += arm/fdct_partial_neon.c DSP_SRCS-$(HAVE_NEON) += arm/fwd_txfm_neon.c DSP_SRCS-$(HAVE_MSA) += mips/fwd_txfm_msa.h DSP_SRCS-$(HAVE_MSA) += mips/fwd_txfm_msa.c + +ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes) DSP_SRCS-$(HAVE_MSA) += mips/fwd_dct32x32_msa.c +endif # !CONFIG_VP9_HIGHBITDEPTH + DSP_SRCS-$(HAVE_VSX) += ppc/fdct32x32_vsx.c endif # CONFIG_VP9_ENCODER diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_ports/system_state.h b/chromium/third_party/libvpx/source/libvpx/vpx_ports/system_state.h index 03557e4bd73..452cb5739bf 100644 --- a/chromium/third_party/libvpx/source/libvpx/vpx_ports/system_state.h +++ b/chromium/third_party/libvpx/source/libvpx/vpx_ports/system_state.h @@ -18,7 +18,7 @@ extern "C" { #endif #if (ARCH_X86 || ARCH_X86_64) && HAVE_MMX -extern void vpx_clear_system_state(); +extern void vpx_clear_system_state(void); #else #define vpx_clear_system_state() #endif // (ARCH_X86 || ARCH_X86_64) && HAVE_MMX diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_util/vpx_thread.h b/chromium/third_party/libvpx/source/libvpx/vpx_util/vpx_thread.h index 4c20f378b07..6d308e949b1 100644 --- a/chromium/third_party/libvpx/source/libvpx/vpx_util/vpx_thread.h +++ b/chromium/third_party/libvpx/source/libvpx/vpx_util/vpx_thread.h @@ -211,6 +211,7 @@ static INLINE int pthread_cond_wait(pthread_cond_t *const condition, #endif return !ok; } + #elif defined(__OS2__) #define INCL_DOS #include <os2.h> // NOLINT diff --git a/chromium/third_party/libvpx/source/libvpx/vpxdec.c b/chromium/third_party/libvpx/source/libvpx/vpxdec.c index 7f544d4bcc0..c60eb5c30ba 100644 --- a/chromium/third_party/libvpx/source/libvpx/vpxdec.c +++ b/chromium/third_party/libvpx/source/libvpx/vpxdec.c @@ -265,8 +265,8 @@ static int raw_read_frame(FILE *infile, uint8_t **buffer, size_t *bytes_read, return 1; } -static int read_frame(struct VpxDecInputContext *input, uint8_t **buf, - size_t *bytes_in_buffer, size_t *buffer_size) { +static int dec_read_frame(struct VpxDecInputContext *input, uint8_t **buf, + size_t *bytes_in_buffer, size_t *buffer_size) { switch (input->vpx_input_ctx->file_type) { #if CONFIG_WEBM_IO case FILE_TYPE_WEBM: @@ -806,7 +806,7 @@ static int main_loop(int argc, const char **argv_) { if (arg_skip) fprintf(stderr, "Skipping first %d frames.\n", arg_skip); while (arg_skip) { - if (read_frame(&input, &buf, &bytes_in_buffer, &buffer_size)) break; + if (dec_read_frame(&input, &buf, &bytes_in_buffer, &buffer_size)) break; arg_skip--; } @@ -837,7 +837,7 @@ static int main_loop(int argc, const char **argv_) { frame_avail = 0; if (!stop_after || frame_in < stop_after) { - if (!read_frame(&input, &buf, &bytes_in_buffer, &buffer_size)) { + if (!dec_read_frame(&input, &buf, &bytes_in_buffer, &buffer_size)) { frame_avail = 1; frame_in++; diff --git a/chromium/third_party/libvpx/source/libvpx/vpxenc.c b/chromium/third_party/libvpx/source/libvpx/vpxenc.c index b7841522de7..54e098fb5a1 100644 --- a/chromium/third_party/libvpx/source/libvpx/vpxenc.c +++ b/chromium/third_party/libvpx/source/libvpx/vpxenc.c @@ -50,12 +50,6 @@ #endif #include "./y4minput.h" -/* Swallow warnings about unused results of fread/fwrite */ -static size_t wrap_fread(void *ptr, size_t size, size_t nmemb, FILE *stream) { - return fread(ptr, size, nmemb, stream); -} -#define fread wrap_fread - static size_t wrap_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream) { return fwrite(ptr, size, nmemb, stream); @@ -95,34 +89,6 @@ static void warn_or_exit_on_error(vpx_codec_ctx_t *ctx, int fatal, va_end(ap); } -static int read_frame(struct VpxInputContext *input_ctx, vpx_image_t *img) { - FILE *f = input_ctx->file; - y4m_input *y4m = &input_ctx->y4m; - int shortread = 0; - - if (input_ctx->file_type == FILE_TYPE_Y4M) { - if (y4m_input_fetch_frame(y4m, f, img) < 1) return 0; - } else { - shortread = read_yuv_frame(input_ctx, img); - } - - return !shortread; -} - -static int file_is_y4m(const char detect[4]) { - if (memcmp(detect, "YUV4", 4) == 0) { - return 1; - } - return 0; -} - -static int fourcc_is_ivf(const char detect[4]) { - if (memcmp(detect, "DKIF", 4) == 0) { - return 1; - } - return 0; -} - static const arg_def_t help = ARG_DEF(NULL, "help", 0, "Show usage options and exit"); static const arg_def_t debugmode = @@ -611,230 +577,6 @@ void usage_exit(void) { exit(EXIT_FAILURE); } -#define mmin(a, b) ((a) < (b) ? (a) : (b)) - -#if CONFIG_VP9_HIGHBITDEPTH -static void find_mismatch_high(const vpx_image_t *const img1, - const vpx_image_t *const img2, int yloc[4], - int uloc[4], int vloc[4]) { - uint16_t *plane1, *plane2; - uint32_t stride1, stride2; - const uint32_t bsize = 64; - const uint32_t bsizey = bsize >> img1->y_chroma_shift; - const uint32_t bsizex = bsize >> img1->x_chroma_shift; - const uint32_t c_w = - (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift; - const uint32_t c_h = - (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift; - int match = 1; - uint32_t i, j; - yloc[0] = yloc[1] = yloc[2] = yloc[3] = -1; - plane1 = (uint16_t *)img1->planes[VPX_PLANE_Y]; - plane2 = (uint16_t *)img2->planes[VPX_PLANE_Y]; - stride1 = img1->stride[VPX_PLANE_Y] / 2; - stride2 = img2->stride[VPX_PLANE_Y] / 2; - for (i = 0, match = 1; match && i < img1->d_h; i += bsize) { - for (j = 0; match && j < img1->d_w; j += bsize) { - int k, l; - const int si = mmin(i + bsize, img1->d_h) - i; - const int sj = mmin(j + bsize, img1->d_w) - j; - for (k = 0; match && k < si; ++k) { - for (l = 0; match && l < sj; ++l) { - if (*(plane1 + (i + k) * stride1 + j + l) != - *(plane2 + (i + k) * stride2 + j + l)) { - yloc[0] = i + k; - yloc[1] = j + l; - yloc[2] = *(plane1 + (i + k) * stride1 + j + l); - yloc[3] = *(plane2 + (i + k) * stride2 + j + l); - match = 0; - break; - } - } - } - } - } - - uloc[0] = uloc[1] = uloc[2] = uloc[3] = -1; - plane1 = (uint16_t *)img1->planes[VPX_PLANE_U]; - plane2 = (uint16_t *)img2->planes[VPX_PLANE_U]; - stride1 = img1->stride[VPX_PLANE_U] / 2; - stride2 = img2->stride[VPX_PLANE_U] / 2; - for (i = 0, match = 1; match && i < c_h; i += bsizey) { - for (j = 0; match && j < c_w; j += bsizex) { - int k, l; - const int si = mmin(i + bsizey, c_h - i); - const int sj = mmin(j + bsizex, c_w - j); - for (k = 0; match && k < si; ++k) { - for (l = 0; match && l < sj; ++l) { - if (*(plane1 + (i + k) * stride1 + j + l) != - *(plane2 + (i + k) * stride2 + j + l)) { - uloc[0] = i + k; - uloc[1] = j + l; - uloc[2] = *(plane1 + (i + k) * stride1 + j + l); - uloc[3] = *(plane2 + (i + k) * stride2 + j + l); - match = 0; - break; - } - } - } - } - } - - vloc[0] = vloc[1] = vloc[2] = vloc[3] = -1; - plane1 = (uint16_t *)img1->planes[VPX_PLANE_V]; - plane2 = (uint16_t *)img2->planes[VPX_PLANE_V]; - stride1 = img1->stride[VPX_PLANE_V] / 2; - stride2 = img2->stride[VPX_PLANE_V] / 2; - for (i = 0, match = 1; match && i < c_h; i += bsizey) { - for (j = 0; match && j < c_w; j += bsizex) { - int k, l; - const int si = mmin(i + bsizey, c_h - i); - const int sj = mmin(j + bsizex, c_w - j); - for (k = 0; match && k < si; ++k) { - for (l = 0; match && l < sj; ++l) { - if (*(plane1 + (i + k) * stride1 + j + l) != - *(plane2 + (i + k) * stride2 + j + l)) { - vloc[0] = i + k; - vloc[1] = j + l; - vloc[2] = *(plane1 + (i + k) * stride1 + j + l); - vloc[3] = *(plane2 + (i + k) * stride2 + j + l); - match = 0; - break; - } - } - } - } - } -} -#endif - -static void find_mismatch(const vpx_image_t *const img1, - const vpx_image_t *const img2, int yloc[4], - int uloc[4], int vloc[4]) { - const uint32_t bsize = 64; - const uint32_t bsizey = bsize >> img1->y_chroma_shift; - const uint32_t bsizex = bsize >> img1->x_chroma_shift; - const uint32_t c_w = - (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift; - const uint32_t c_h = - (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift; - int match = 1; - uint32_t i, j; - yloc[0] = yloc[1] = yloc[2] = yloc[3] = -1; - for (i = 0, match = 1; match && i < img1->d_h; i += bsize) { - for (j = 0; match && j < img1->d_w; j += bsize) { - int k, l; - const int si = mmin(i + bsize, img1->d_h) - i; - const int sj = mmin(j + bsize, img1->d_w) - j; - for (k = 0; match && k < si; ++k) { - for (l = 0; match && l < sj; ++l) { - if (*(img1->planes[VPX_PLANE_Y] + - (i + k) * img1->stride[VPX_PLANE_Y] + j + l) != - *(img2->planes[VPX_PLANE_Y] + - (i + k) * img2->stride[VPX_PLANE_Y] + j + l)) { - yloc[0] = i + k; - yloc[1] = j + l; - yloc[2] = *(img1->planes[VPX_PLANE_Y] + - (i + k) * img1->stride[VPX_PLANE_Y] + j + l); - yloc[3] = *(img2->planes[VPX_PLANE_Y] + - (i + k) * img2->stride[VPX_PLANE_Y] + j + l); - match = 0; - break; - } - } - } - } - } - - uloc[0] = uloc[1] = uloc[2] = uloc[3] = -1; - for (i = 0, match = 1; match && i < c_h; i += bsizey) { - for (j = 0; match && j < c_w; j += bsizex) { - int k, l; - const int si = mmin(i + bsizey, c_h - i); - const int sj = mmin(j + bsizex, c_w - j); - for (k = 0; match && k < si; ++k) { - for (l = 0; match && l < sj; ++l) { - if (*(img1->planes[VPX_PLANE_U] + - (i + k) * img1->stride[VPX_PLANE_U] + j + l) != - *(img2->planes[VPX_PLANE_U] + - (i + k) * img2->stride[VPX_PLANE_U] + j + l)) { - uloc[0] = i + k; - uloc[1] = j + l; - uloc[2] = *(img1->planes[VPX_PLANE_U] + - (i + k) * img1->stride[VPX_PLANE_U] + j + l); - uloc[3] = *(img2->planes[VPX_PLANE_U] + - (i + k) * img2->stride[VPX_PLANE_U] + j + l); - match = 0; - break; - } - } - } - } - } - vloc[0] = vloc[1] = vloc[2] = vloc[3] = -1; - for (i = 0, match = 1; match && i < c_h; i += bsizey) { - for (j = 0; match && j < c_w; j += bsizex) { - int k, l; - const int si = mmin(i + bsizey, c_h - i); - const int sj = mmin(j + bsizex, c_w - j); - for (k = 0; match && k < si; ++k) { - for (l = 0; match && l < sj; ++l) { - if (*(img1->planes[VPX_PLANE_V] + - (i + k) * img1->stride[VPX_PLANE_V] + j + l) != - *(img2->planes[VPX_PLANE_V] + - (i + k) * img2->stride[VPX_PLANE_V] + j + l)) { - vloc[0] = i + k; - vloc[1] = j + l; - vloc[2] = *(img1->planes[VPX_PLANE_V] + - (i + k) * img1->stride[VPX_PLANE_V] + j + l); - vloc[3] = *(img2->planes[VPX_PLANE_V] + - (i + k) * img2->stride[VPX_PLANE_V] + j + l); - match = 0; - break; - } - } - } - } - } -} - -static int compare_img(const vpx_image_t *const img1, - const vpx_image_t *const img2) { - uint32_t l_w = img1->d_w; - uint32_t c_w = (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift; - const uint32_t c_h = - (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift; - uint32_t i; - int match = 1; - - match &= (img1->fmt == img2->fmt); - match &= (img1->d_w == img2->d_w); - match &= (img1->d_h == img2->d_h); -#if CONFIG_VP9_HIGHBITDEPTH - if (img1->fmt & VPX_IMG_FMT_HIGHBITDEPTH) { - l_w *= 2; - c_w *= 2; - } -#endif - - for (i = 0; i < img1->d_h; ++i) - match &= (memcmp(img1->planes[VPX_PLANE_Y] + i * img1->stride[VPX_PLANE_Y], - img2->planes[VPX_PLANE_Y] + i * img2->stride[VPX_PLANE_Y], - l_w) == 0); - - for (i = 0; i < c_h; ++i) - match &= (memcmp(img1->planes[VPX_PLANE_U] + i * img1->stride[VPX_PLANE_U], - img2->planes[VPX_PLANE_U] + i * img2->stride[VPX_PLANE_U], - c_w) == 0); - - for (i = 0; i < c_h; ++i) - match &= (memcmp(img1->planes[VPX_PLANE_V] + i * img1->stride[VPX_PLANE_V], - img2->planes[VPX_PLANE_V] + i * img2->stride[VPX_PLANE_V], - c_w) == 0); - - return match; -} - #define NELEMENTS(x) (sizeof(x) / sizeof(x[0])) #if CONFIG_VP9_ENCODER #define ARG_CTRL_CNT_MAX NELEMENTS(vp9_arg_ctrl_map) @@ -1020,57 +762,6 @@ static void parse_global_config(struct VpxEncoderConfig *global, char **argv) { } } -static void open_input_file(struct VpxInputContext *input) { - /* Parse certain options from the input file, if possible */ - input->file = strcmp(input->filename, "-") ? fopen(input->filename, "rb") - : set_binary_mode(stdin); - - if (!input->file) fatal("Failed to open input file"); - - if (!fseeko(input->file, 0, SEEK_END)) { - /* Input file is seekable. Figure out how long it is, so we can get - * progress info. - */ - input->length = ftello(input->file); - rewind(input->file); - } - - /* Default to 1:1 pixel aspect ratio. */ - input->pixel_aspect_ratio.numerator = 1; - input->pixel_aspect_ratio.denominator = 1; - - /* For RAW input sources, these bytes will applied on the first frame - * in read_frame(). - */ - input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file); - input->detect.position = 0; - - if (input->detect.buf_read == 4 && file_is_y4m(input->detect.buf)) { - if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4, - input->only_i420) >= 0) { - input->file_type = FILE_TYPE_Y4M; - input->width = input->y4m.pic_w; - input->height = input->y4m.pic_h; - input->pixel_aspect_ratio.numerator = input->y4m.par_n; - input->pixel_aspect_ratio.denominator = input->y4m.par_d; - input->framerate.numerator = input->y4m.fps_n; - input->framerate.denominator = input->y4m.fps_d; - input->fmt = input->y4m.vpx_fmt; - input->bit_depth = input->y4m.bit_depth; - } else - fatal("Unsupported Y4M stream."); - } else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) { - fatal("IVF is not supported as input."); - } else { - input->file_type = FILE_TYPE_RAW; - } -} - -static void close_input_file(struct VpxInputContext *input) { - fclose(input->file); - if (input->file_type == FILE_TYPE_Y4M) y4m_input_close(&input->y4m); -} - static struct stream_state *new_stream(struct VpxEncoderConfig *global, struct stream_state *prev) { struct stream_state *stream; |