summaryrefslogtreecommitdiff
path: root/chromium/third_party/libvpx
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@theqtcompany.com>2015-08-14 11:38:45 +0200
committerAllan Sandfeld Jensen <allan.jensen@theqtcompany.com>2015-08-14 17:16:47 +0000
commit3a97ca8dd9b96b599ae2d33e40df0dd2f7ea5859 (patch)
tree43cc572ba067417c7341db81f71ae7cc6e0fcc3e /chromium/third_party/libvpx
parentf61ab1ac7f855cd281809255c0aedbb1895e1823 (diff)
downloadqtwebengine-chromium-3a97ca8dd9b96b599ae2d33e40df0dd2f7ea5859.tar.gz
BASELINE: Update chromium to 45.0.2454.40
Change-Id: Id2121d9f11a8fc633677236c65a3e41feef589e4 Reviewed-by: Andras Becsi <andras.becsi@theqtcompany.com>
Diffstat (limited to 'chromium/third_party/libvpx')
-rw-r--r--chromium/third_party/libvpx/BUILD.gn6
-rw-r--r--chromium/third_party/libvpx/README.chromium4
-rw-r--r--chromium/third_party/libvpx/codereview.settings7
-rw-r--r--chromium/third_party/libvpx/libvpx_srcs.gni79
-rw-r--r--chromium/third_party/libvpx/libvpx_srcs_arm.gypi6
-rw-r--r--chromium/third_party/libvpx/libvpx_srcs_arm64.gypi5
-rw-r--r--chromium/third_party/libvpx/libvpx_srcs_arm_neon.gypi10
-rw-r--r--chromium/third_party/libvpx/libvpx_srcs_arm_neon_cpu_detect.gypi6
-rw-r--r--chromium/third_party/libvpx/libvpx_srcs_arm_neon_cpu_detect_intrinsics.gypi4
-rw-r--r--chromium/third_party/libvpx/libvpx_srcs_generic.gypi2
-rw-r--r--chromium/third_party/libvpx/libvpx_srcs_mips.gypi2
-rw-r--r--chromium/third_party/libvpx/libvpx_srcs_nacl.gypi2
-rw-r--r--chromium/third_party/libvpx/libvpx_srcs_x86.gypi9
-rw-r--r--chromium/third_party/libvpx/libvpx_srcs_x86_64.gypi9
-rw-r--r--chromium/third_party/libvpx/libvpx_srcs_x86_64_intrinsics.gypi12
-rw-r--r--chromium/third_party/libvpx/libvpx_srcs_x86_intrinsics.gypi12
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vp8_rtcd.h58
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h184
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.asm1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.c1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.h1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_dsp_rtcd.h105
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm-neon/vp8_rtcd.h42
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm-neon/vp9_rtcd.h128
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.asm1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.c1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.h1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_dsp_rtcd.h81
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm/vp8_rtcd.h30
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm/vp9_rtcd.h60
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm/vpx_config.asm1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm/vpx_config.c1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm/vpx_config.h1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm/vpx_dsp_rtcd.h69
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm64/vp8_rtcd.h36
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm64/vp9_rtcd.h128
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.asm1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.c1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.h1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm64/vpx_dsp_rtcd.h78
-rw-r--r--chromium/third_party/libvpx/source/config/linux/generic/vp8_rtcd.h27
-rw-r--r--chromium/third_party/libvpx/source/config/linux/generic/vp9_rtcd.h60
-rw-r--r--chromium/third_party/libvpx/source/config/linux/generic/vpx_config.asm1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/generic/vpx_config.c1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/generic/vpx_config.h1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/generic/vpx_dsp_rtcd.h66
-rw-r--r--chromium/third_party/libvpx/source/config/linux/ia32/vp8_rtcd.h78
-rw-r--r--chromium/third_party/libvpx/source/config/linux/ia32/vp9_rtcd.h139
-rw-r--r--chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.asm1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.c1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.h1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/ia32/vpx_dsp_rtcd.h156
-rw-r--r--chromium/third_party/libvpx/source/config/linux/mips64el/vp8_rtcd.h27
-rw-r--r--chromium/third_party/libvpx/source/config/linux/mips64el/vp9_rtcd.h60
-rw-r--r--chromium/third_party/libvpx/source/config/linux/mips64el/vpx_config.c1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/mips64el/vpx_config.h1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/mips64el/vpx_dsp_rtcd.h66
-rw-r--r--chromium/third_party/libvpx/source/config/linux/mipsel/vp8_rtcd.h27
-rw-r--r--chromium/third_party/libvpx/source/config/linux/mipsel/vp9_rtcd.h60
-rw-r--r--chromium/third_party/libvpx/source/config/linux/mipsel/vpx_config.c1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/mipsel/vpx_config.h1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/mipsel/vpx_dsp_rtcd.h66
-rw-r--r--chromium/third_party/libvpx/source/config/linux/x64/vp8_rtcd.h52
-rw-r--r--chromium/third_party/libvpx/source/config/linux/x64/vp9_rtcd.h106
-rw-r--r--chromium/third_party/libvpx/source/config/linux/x64/vpx_config.asm1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/x64/vpx_config.c1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/x64/vpx_config.h1
-rw-r--r--chromium/third_party/libvpx/source/config/linux/x64/vpx_dsp_rtcd.h115
-rw-r--r--chromium/third_party/libvpx/source/config/mac/ia32/vp8_rtcd.h78
-rw-r--r--chromium/third_party/libvpx/source/config/mac/ia32/vp9_rtcd.h139
-rw-r--r--chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.asm1
-rw-r--r--chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.c1
-rw-r--r--chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.h1
-rw-r--r--chromium/third_party/libvpx/source/config/mac/ia32/vpx_dsp_rtcd.h156
-rw-r--r--chromium/third_party/libvpx/source/config/mac/x64/vp8_rtcd.h52
-rw-r--r--chromium/third_party/libvpx/source/config/mac/x64/vp9_rtcd.h106
-rw-r--r--chromium/third_party/libvpx/source/config/mac/x64/vpx_config.asm1
-rw-r--r--chromium/third_party/libvpx/source/config/mac/x64/vpx_config.c1
-rw-r--r--chromium/third_party/libvpx/source/config/mac/x64/vpx_config.h1
-rw-r--r--chromium/third_party/libvpx/source/config/mac/x64/vpx_dsp_rtcd.h115
-rw-r--r--chromium/third_party/libvpx/source/config/nacl/vp8_rtcd.h27
-rw-r--r--chromium/third_party/libvpx/source/config/nacl/vp9_rtcd.h60
-rw-r--r--chromium/third_party/libvpx/source/config/nacl/vpx_config.asm1
-rw-r--r--chromium/third_party/libvpx/source/config/nacl/vpx_config.c1
-rw-r--r--chromium/third_party/libvpx/source/config/nacl/vpx_config.h1
-rw-r--r--chromium/third_party/libvpx/source/config/nacl/vpx_dsp_rtcd.h66
-rw-r--r--chromium/third_party/libvpx/source/config/win/ia32/vp8_rtcd.h78
-rw-r--r--chromium/third_party/libvpx/source/config/win/ia32/vp9_rtcd.h139
-rw-r--r--chromium/third_party/libvpx/source/config/win/ia32/vpx_config.asm1
-rw-r--r--chromium/third_party/libvpx/source/config/win/ia32/vpx_config.c1
-rw-r--r--chromium/third_party/libvpx/source/config/win/ia32/vpx_config.h1
-rw-r--r--chromium/third_party/libvpx/source/config/win/ia32/vpx_dsp_rtcd.h156
-rw-r--r--chromium/third_party/libvpx/source/config/win/x64/vp8_rtcd.h52
-rw-r--r--chromium/third_party/libvpx/source/config/win/x64/vp9_rtcd.h106
-rw-r--r--chromium/third_party/libvpx/source/config/win/x64/vpx_config.asm1
-rw-r--r--chromium/third_party/libvpx/source/config/win/x64/vpx_config.c1
-rw-r--r--chromium/third_party/libvpx/source/config/win/x64/vpx_config.h1
-rw-r--r--chromium/third_party/libvpx/source/config/win/x64/vpx_dsp_rtcd.h115
-rw-r--r--chromium/third_party/libvpx/source/libvpx/CHANGELOG5
-rw-r--r--chromium/third_party/libvpx/source/libvpx/README7
-rw-r--r--chromium/third_party/libvpx/source/libvpx/args.c4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/build/make/Android.mk4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/build/make/Makefile36
-rw-r--r--chromium/third_party/libvpx/source/libvpx/build/make/configure.sh28
-rwxr-xr-xchromium/third_party/libvpx/source/libvpx/build/make/gen_msvs_vcxproj.sh4
-rwxr-xr-xchromium/third_party/libvpx/source/libvpx/configure88
-rw-r--r--chromium/third_party/libvpx/source/libvpx/examples.mk44
-rw-r--r--chromium/third_party/libvpx/source/libvpx/examples/decode_to_md5.c2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/examples/decode_with_drops.c2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/examples/postproc.c2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/examples/resize_util.c3
-rw-r--r--chromium/third_party/libvpx/source/libvpx/examples/set_maps.c2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/examples/simple_decoder.c2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/examples/simple_encoder.c2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/examples/twopass_encoder.c2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/examples/vp8_multi_resolution_encoder.c5
-rw-r--r--chromium/third_party/libvpx/source/libvpx/examples/vp8cx_set_ref.c2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/examples/vp9_lossless_encoder.c2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/examples/vp9_spatial_svc_encoder.c370
-rw-r--r--chromium/third_party/libvpx/source/libvpx/examples/vpx_temporal_svc_encoder.c48
-rw-r--r--chromium/third_party/libvpx/source/libvpx/libs.mk167
-rw-r--r--chromium/third_party/libvpx/source/libvpx/md5_utils.c2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/rate_hist.c3
-rw-r--r--chromium/third_party/libvpx/source/libvpx/tools_common.c4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/tools_common.h9
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/common/alloccommon.c1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm154
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm101
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/common/arm/neon/variance_neon.c320
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/common/arm/neon/vp8_subpixelvariance_neon.c8
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/common/arm/variance_arm.c19
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/common/copy_c.c4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/common/filter.c1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/common/generic/systemdependent.c1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/common/idctllm.c1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/common/mfqe.c20
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/common/postproc.c4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/common/rtcd_defs.pl56
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/common/variance.h36
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/common/variance_c.c165
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/common/x86/variance_impl_sse2.asm387
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/common/x86/variance_ssse3.c10
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/common/x86/vp8_variance_impl_mmx.asm353
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/common/x86/vp8_variance_mmx.c (renamed from chromium/third_party/libvpx/source/libvpx/vp8/common/x86/variance_mmx.c)155
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/common/x86/vp8_variance_sse2.c (renamed from chromium/third_party/libvpx/source/libvpx/vp8/common/x86/variance_sse2.c)156
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/decoder/onyxd_if.c2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm138
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.c131
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/encoder/dct.c2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/encoder/encodeframe.c3
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/encoder/encodeintra.c3
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/encoder/ethreading.c2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/encoder/firstpass.c15
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/encoder/modecosts.c1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/encoder/modecosts.h4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_if.c36
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_int.h7
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/encoder/pickinter.c40
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/encoder/picklpf.c3
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/encoder/ratectrl.c48
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/encoder/ratectrl.h2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/encoder/rdopt.c7
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/encoder/rdopt.h3
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/encoder/x86/quantize_ssse3.c2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/vp8_common.mk9
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/vp8_cx_iface.c6
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/vp8cx_arm.mk2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct16x16_1_add_neon.c2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct32x32_1_add_neon.c4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct4x4_1_add_neon.c2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct8x8_1_add_neon.c2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_reconintra_neon.c1197
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_reconintra_neon_asm.asm34
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_avg_horiz_msa.c782
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_avg_msa.c679
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_avg_vert_msa.c753
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_horiz_msa.c955
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_msa.c784
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_vert_msa.c873
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve_avg_msa.c247
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve_copy_msa.c155
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve_msa.h222
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct16x16_msa.c893
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct32x32_msa.c1224
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct4x4_msa.c147
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct8x8_msa.c183
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct_msa.h481
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_intra_predict_msa.c737
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_loopfilter_16_msa.c1480
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_loopfilter_4_msa.c152
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_loopfilter_8_msa.c348
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_loopfilter_msa.h246
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_macros_msa.h2699
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_mfqe_msa.c137
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_alloccommon.c1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_blockd.h116
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_common.h9
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropy.c6
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropy.h34
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymode.c4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymode.h17
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_enums.h84
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.c27
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.h29
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_idct.c1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_idct.h1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter.c5
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter_filters.c1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_mfqe.c6
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_mvref_common.c17
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_mvref_common.h6
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_onyxc_int.h43
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_postproc.c4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_quant_common.c4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_reconintra.c302
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_reconintra.h2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl383
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_scan.h12
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_seg_common.c12
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_seg_common.h16
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_systemdependent.h11
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/x86/convolve.h296
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_asm_stubs.c416
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c70
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_loopfilter_intrin_avx2.c2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_loopfilter_intrin_sse2.c2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c122
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c201
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.c852
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.h14
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodemv.c164
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodemv.h1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.c6
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_detokenize.c40
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_detokenize.h4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_variance_neon.c195
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_avg_msa.c56
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_error_msa.c114
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct16x16_msa.c688
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct32x32_msa.c956
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct4x4_msa.c129
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct8x8_msa.c90
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct_msa.h548
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_subtract_msa.c264
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_temporal_filter_msa.c289
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_complexity.c1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_complexity.h2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c94
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h5
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_variance.c71
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_avg.c42
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.c19
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_dct.c1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c230
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.h2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemb.c2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.c2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.h2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c466
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h20
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ethread.c34
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_extend.c1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c211
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.h13
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c96
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h3
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_picklpf.c1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c60
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_quantize.c3
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c211
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.h5
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.c7
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c29
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_resize.c23
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c71
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ssim.c1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_subexp.c1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_subexp.h10
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.c478
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.h14
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.c6
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.h2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_tokenize.c7
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_variance.c357
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_variance.h32
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_writer.h2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_avg_intrin_sse2.c11
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_avx2_impl.h (renamed from chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_avx2.c)2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h (renamed from chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_sse2.c)2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_avx2.c4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c36
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_sse2_impl.h (renamed from chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_impl_sse2.c)2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_error_intrin_avx2.c3
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_highbd_subpel_variance.asm16
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_highbd_variance_sse2.c231
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_subpel_variance.asm24
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c18
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_avx2.c88
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_sse2.c300
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/vp9_common.mk19
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c144
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.c19
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/vp9_iface_common.h2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/vp9cx.mk18
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx/exports_dec2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx/src/svc_encodeframe.c122
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx/svc_context.h5
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx/vp8cx.h60
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx/vp8dx.h7
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx/vpx_codec.mk28
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx/vpx_encoder.h38
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/variance_media.asm358
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/variance_neon.c418
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/sad.c9
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/variance.c306
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp.mk20
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl208
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_variance_impl_sse2.asm (renamed from chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_highbd_variance_impl_sse2.asm)12
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_variance_sse2.c245
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/sad4d_avx2.c43
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/sad_avx2.c1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_avx2.c93
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_impl_avx2.c (renamed from chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_impl_intrin_avx2.c)6
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_impl_mmx.asm (renamed from chromium/third_party/libvpx/source/libvpx/vp8/common/x86/variance_impl_mmx.asm)447
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_mmx.c107
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_sse2.c309
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_ports/mem.h12
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_ports/msvc.h22
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_ports/vpx_ports.mk11
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/gen_scalers.c2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/vpx_scale.c1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/yv12config.c4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/yv12extend.c2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpxdec.c87
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpxenc.c67
-rw-r--r--chromium/third_party/libvpx/source/libvpx/webmenc.cc12
-rw-r--r--chromium/third_party/libvpx/source/libvpx/webmenc.h3
347 files changed, 22658 insertions, 13118 deletions
diff --git a/chromium/third_party/libvpx/BUILD.gn b/chromium/third_party/libvpx/BUILD.gn
index bb55816b80b..d33761594d9 100644
--- a/chromium/third_party/libvpx/BUILD.gn
+++ b/chromium/third_party/libvpx/BUILD.gn
@@ -89,7 +89,7 @@ static_library("libvpx_intrinsics_sse2") {
configs += [ ":libvpx_config" ]
configs -= [ "//build/config/compiler:chromium_code" ]
configs += [ "//build/config/compiler:no_chromium_code" ]
- if (!is_win) {
+ if (!is_win || is_clang) {
cflags = [ "-msse2" ]
}
if (current_cpu == "x86") {
@@ -103,7 +103,7 @@ static_library("libvpx_intrinsics_ssse3") {
configs += [ ":libvpx_config" ]
configs -= [ "//build/config/compiler:chromium_code" ]
configs += [ "//build/config/compiler:no_chromium_code" ]
- if (!is_win) {
+ if (!is_win || is_clang) {
cflags = [ "-mssse3" ]
}
if (current_cpu == "x86") {
@@ -117,7 +117,7 @@ static_library("libvpx_intrinsics_sse4_1") {
configs += [ ":libvpx_config" ]
configs -= [ "//build/config/compiler:chromium_code" ]
configs += [ "//build/config/compiler:no_chromium_code" ]
- if (!is_win) {
+ if (!is_win || is_clang) {
cflags = [ "-msse4.1" ]
}
if (current_cpu == "x86") {
diff --git a/chromium/third_party/libvpx/README.chromium b/chromium/third_party/libvpx/README.chromium
index a8025f3705c..af0dcef0703 100644
--- a/chromium/third_party/libvpx/README.chromium
+++ b/chromium/third_party/libvpx/README.chromium
@@ -5,9 +5,9 @@ License: BSD
License File: source/libvpx/LICENSE
Security Critical: yes
-Date: Monday May 11 2015
+Date: Monday June 29 2015
Branch: master
-Commit: 6d227137221ea4eead9d81daf3d2f0915560bbff
+Commit: f3a1295cffe62806255bace4abb49c8831b7a61f
Description:
Contains the sources used to compile libvpx binaries used by Google Chrome and
diff --git a/chromium/third_party/libvpx/codereview.settings b/chromium/third_party/libvpx/codereview.settings
index db0d04cc885..0e7d0e87548 100644
--- a/chromium/third_party/libvpx/codereview.settings
+++ b/chromium/third_party/libvpx/codereview.settings
@@ -1,4 +1,9 @@
-# This file is used by gcl to get repository specific information.
CODE_REVIEW_SERVER: codereview.chromium.org
+CC_LIST: chromium-reviews@chromium.org
VIEW_VC: https://chromium.googlesource.com/chromium/deps/libvpx/+/
STATUS: http://chromium-status.appspot.com/status
+TRY_ON_UPLOAD: False
+TRYSERVER_SVN_URL: svn://svn.chromium.org/chrome-try/try
+GITCL_PREUPLOAD: http://src.chromium.org/viewvc/chrome/trunk/tools/depot_tools/git-cl-upload-hook?revision=HEAD
+GITCL_PREDCOMMIT: http://src.chromium.org/viewvc/chrome/trunk/tools/depot_tools/git-cl-upload-hook?revision=HEAD
+PROJECT: chromium_deps
diff --git a/chromium/third_party/libvpx/libvpx_srcs.gni b/chromium/third_party/libvpx/libvpx_srcs.gni
index 39f445180ea..1f94e7ce601 100644
--- a/chromium/third_party/libvpx/libvpx_srcs.gni
+++ b/chromium/third_party/libvpx/libvpx_srcs.gni
@@ -188,6 +188,7 @@ libvpx_srcs_x86 = [
"//third_party/libvpx/source/libvpx/vp9/common/vp9_thread_common.h",
"//third_party/libvpx/source/libvpx/vp9/common/vp9_tile_common.c",
"//third_party/libvpx/source/libvpx/vp9/common/vp9_tile_common.h",
+ "//third_party/libvpx/source/libvpx/vp9/common/x86/convolve.h",
"//third_party/libvpx/source/libvpx/vp9/common/x86/vp9_asm_stubs.c",
"//third_party/libvpx/source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.h",
"//third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.c",
@@ -280,7 +281,10 @@ libvpx_srcs_x86 = [
"//third_party/libvpx/source/libvpx/vp9/encoder/vp9_write_bit_buffer.h",
"//third_party/libvpx/source/libvpx/vp9/encoder/vp9_writer.c",
"//third_party/libvpx/source/libvpx/vp9/encoder/vp9_writer.h",
+ "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_avx2_impl.h",
+ "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h",
"//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_sse2.h",
+ "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_sse2_impl.h",
"//third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c",
"//third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.c",
"//third_party/libvpx/source/libvpx/vp9/vp9_iface_common.h",
@@ -301,6 +305,7 @@ libvpx_srcs_x86 = [
"//third_party/libvpx/source/libvpx/vpx/vpx_image.h",
"//third_party/libvpx/source/libvpx/vpx/vpx_integer.h",
"//third_party/libvpx/source/libvpx/vpx_dsp/sad.c",
+ "//third_party/libvpx/source/libvpx/vpx_dsp/variance.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd.c",
"//third_party/libvpx/source/libvpx/vpx_mem/include/vpx_mem_intrnl.h",
"//third_party/libvpx/source/libvpx/vpx_mem/vpx_mem.c",
@@ -309,6 +314,7 @@ libvpx_srcs_x86 = [
"//third_party/libvpx/source/libvpx/vpx_ports/mem.h",
"//third_party/libvpx/source/libvpx/vpx_ports/mem_ops.h",
"//third_party/libvpx/source/libvpx/vpx_ports/mem_ops_aligned.h",
+ "//third_party/libvpx/source/libvpx/vpx_ports/msvc.h",
"//third_party/libvpx/source/libvpx/vpx_ports/vpx_once.h",
"//third_party/libvpx/source/libvpx/vpx_ports/vpx_timer.h",
"//third_party/libvpx/source/libvpx/vpx_ports/x86.h",
@@ -338,9 +344,9 @@ libvpx_srcs_x86_assembly = [
"//third_party/libvpx/source/libvpx/vp8/common/x86/subpixel_mmx.asm",
"//third_party/libvpx/source/libvpx/vp8/common/x86/subpixel_sse2.asm",
"//third_party/libvpx/source/libvpx/vp8/common/x86/subpixel_ssse3.asm",
- "//third_party/libvpx/source/libvpx/vp8/common/x86/variance_impl_mmx.asm",
"//third_party/libvpx/source/libvpx/vp8/common/x86/variance_impl_sse2.asm",
"//third_party/libvpx/source/libvpx/vp8/common/x86/variance_impl_ssse3.asm",
+ "//third_party/libvpx/source/libvpx/vp8/common/x86/vp8_variance_impl_mmx.asm",
"//third_party/libvpx/source/libvpx/vp8/encoder/x86/dct_mmx.asm",
"//third_party/libvpx/source/libvpx/vp8/encoder/x86/dct_sse2.asm",
"//third_party/libvpx/source/libvpx/vp8/encoder/x86/encodeopt.asm",
@@ -369,30 +375,31 @@ libvpx_srcs_x86_assembly = [
"//third_party/libvpx/source/libvpx/vpx_dsp/x86/sad_sse3.asm",
"//third_party/libvpx/source/libvpx/vpx_dsp/x86/sad_sse4.asm",
"//third_party/libvpx/source/libvpx/vpx_dsp/x86/sad_ssse3.asm",
+ "//third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_impl_mmx.asm",
"//third_party/libvpx/source/libvpx/vpx_ports/emms.asm",
"//third_party/libvpx/source/libvpx/vpx_ports/x86_abi_support.asm",
]
libvpx_srcs_x86_mmx = [
"//third_party/libvpx/source/libvpx/vp8/common/x86/idct_blk_mmx.c",
- "//third_party/libvpx/source/libvpx/vp8/common/x86/variance_mmx.c",
+ "//third_party/libvpx/source/libvpx/vp8/common/x86/vp8_variance_mmx.c",
"//third_party/libvpx/source/libvpx/vp8/encoder/x86/vp8_enc_stubs_mmx.c",
+ "//third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_mmx.c",
]
libvpx_srcs_x86_sse2 = [
"//third_party/libvpx/source/libvpx/vp8/common/x86/idct_blk_sse2.c",
"//third_party/libvpx/source/libvpx/vp8/common/x86/recon_wrapper_sse2.c",
- "//third_party/libvpx/source/libvpx/vp8/common/x86/variance_sse2.c",
+ "//third_party/libvpx/source/libvpx/vp8/common/x86/vp8_variance_sse2.c",
"//third_party/libvpx/source/libvpx/vp8/encoder/x86/denoising_sse2.c",
"//third_party/libvpx/source/libvpx/vp8/encoder/x86/quantize_sse2.c",
"//third_party/libvpx/source/libvpx/vp8/encoder/x86/vp8_enc_stubs_sse2.c",
"//third_party/libvpx/source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c",
"//third_party/libvpx/source/libvpx/vp9/common/x86/vp9_loopfilter_intrin_sse2.c",
"//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_avg_intrin_sse2.c",
- "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_sse2.c",
- "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_impl_sse2.c",
"//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c",
"//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_denoiser_sse2.c",
"//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c",
"//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_sse2.c",
+ "//third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_sse2.c",
]
libvpx_srcs_x86_sse3 = [
]
@@ -410,14 +417,14 @@ libvpx_srcs_x86_avx = [
libvpx_srcs_x86_avx2 = [
"//third_party/libvpx/source/libvpx/vp9/common/x86/vp9_loopfilter_intrin_avx2.c",
"//third_party/libvpx/source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c",
- "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_avx2.c",
"//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_avx2.c",
"//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_error_intrin_avx2.c",
"//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c",
"//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_avx2.c",
- "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_impl_intrin_avx2.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/x86/sad4d_avx2.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/x86/sad_avx2.c",
+ "//third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_avx2.c",
+ "//third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_impl_avx2.c",
]
libvpx_srcs_x86_64 = [
"//third_party/libvpx/source/libvpx/vp8/common/alloccommon.c",
@@ -604,6 +611,7 @@ libvpx_srcs_x86_64 = [
"//third_party/libvpx/source/libvpx/vp9/common/vp9_thread_common.h",
"//third_party/libvpx/source/libvpx/vp9/common/vp9_tile_common.c",
"//third_party/libvpx/source/libvpx/vp9/common/vp9_tile_common.h",
+ "//third_party/libvpx/source/libvpx/vp9/common/x86/convolve.h",
"//third_party/libvpx/source/libvpx/vp9/common/x86/vp9_asm_stubs.c",
"//third_party/libvpx/source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.h",
"//third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.c",
@@ -696,7 +704,10 @@ libvpx_srcs_x86_64 = [
"//third_party/libvpx/source/libvpx/vp9/encoder/vp9_write_bit_buffer.h",
"//third_party/libvpx/source/libvpx/vp9/encoder/vp9_writer.c",
"//third_party/libvpx/source/libvpx/vp9/encoder/vp9_writer.h",
+ "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_avx2_impl.h",
+ "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h",
"//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_sse2.h",
+ "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_sse2_impl.h",
"//third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c",
"//third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.c",
"//third_party/libvpx/source/libvpx/vp9/vp9_iface_common.h",
@@ -717,6 +728,7 @@ libvpx_srcs_x86_64 = [
"//third_party/libvpx/source/libvpx/vpx/vpx_image.h",
"//third_party/libvpx/source/libvpx/vpx/vpx_integer.h",
"//third_party/libvpx/source/libvpx/vpx_dsp/sad.c",
+ "//third_party/libvpx/source/libvpx/vpx_dsp/variance.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd.c",
"//third_party/libvpx/source/libvpx/vpx_mem/include/vpx_mem_intrnl.h",
"//third_party/libvpx/source/libvpx/vpx_mem/vpx_mem.c",
@@ -725,6 +737,7 @@ libvpx_srcs_x86_64 = [
"//third_party/libvpx/source/libvpx/vpx_ports/mem.h",
"//third_party/libvpx/source/libvpx/vpx_ports/mem_ops.h",
"//third_party/libvpx/source/libvpx/vpx_ports/mem_ops_aligned.h",
+ "//third_party/libvpx/source/libvpx/vpx_ports/msvc.h",
"//third_party/libvpx/source/libvpx/vpx_ports/vpx_once.h",
"//third_party/libvpx/source/libvpx/vpx_ports/vpx_timer.h",
"//third_party/libvpx/source/libvpx/vpx_ports/x86.h",
@@ -755,9 +768,9 @@ libvpx_srcs_x86_64_assembly = [
"//third_party/libvpx/source/libvpx/vp8/common/x86/subpixel_mmx.asm",
"//third_party/libvpx/source/libvpx/vp8/common/x86/subpixel_sse2.asm",
"//third_party/libvpx/source/libvpx/vp8/common/x86/subpixel_ssse3.asm",
- "//third_party/libvpx/source/libvpx/vp8/common/x86/variance_impl_mmx.asm",
"//third_party/libvpx/source/libvpx/vp8/common/x86/variance_impl_sse2.asm",
"//third_party/libvpx/source/libvpx/vp8/common/x86/variance_impl_ssse3.asm",
+ "//third_party/libvpx/source/libvpx/vp8/common/x86/vp8_variance_impl_mmx.asm",
"//third_party/libvpx/source/libvpx/vp8/encoder/x86/dct_mmx.asm",
"//third_party/libvpx/source/libvpx/vp8/encoder/x86/dct_sse2.asm",
"//third_party/libvpx/source/libvpx/vp8/encoder/x86/encodeopt.asm",
@@ -791,30 +804,31 @@ libvpx_srcs_x86_64_assembly = [
"//third_party/libvpx/source/libvpx/vpx_dsp/x86/sad_sse3.asm",
"//third_party/libvpx/source/libvpx/vpx_dsp/x86/sad_sse4.asm",
"//third_party/libvpx/source/libvpx/vpx_dsp/x86/sad_ssse3.asm",
+ "//third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_impl_mmx.asm",
"//third_party/libvpx/source/libvpx/vpx_ports/emms.asm",
"//third_party/libvpx/source/libvpx/vpx_ports/x86_abi_support.asm",
]
libvpx_srcs_x86_64_mmx = [
"//third_party/libvpx/source/libvpx/vp8/common/x86/idct_blk_mmx.c",
- "//third_party/libvpx/source/libvpx/vp8/common/x86/variance_mmx.c",
+ "//third_party/libvpx/source/libvpx/vp8/common/x86/vp8_variance_mmx.c",
"//third_party/libvpx/source/libvpx/vp8/encoder/x86/vp8_enc_stubs_mmx.c",
+ "//third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_mmx.c",
]
libvpx_srcs_x86_64_sse2 = [
"//third_party/libvpx/source/libvpx/vp8/common/x86/idct_blk_sse2.c",
"//third_party/libvpx/source/libvpx/vp8/common/x86/recon_wrapper_sse2.c",
- "//third_party/libvpx/source/libvpx/vp8/common/x86/variance_sse2.c",
+ "//third_party/libvpx/source/libvpx/vp8/common/x86/vp8_variance_sse2.c",
"//third_party/libvpx/source/libvpx/vp8/encoder/x86/denoising_sse2.c",
"//third_party/libvpx/source/libvpx/vp8/encoder/x86/quantize_sse2.c",
"//third_party/libvpx/source/libvpx/vp8/encoder/x86/vp8_enc_stubs_sse2.c",
"//third_party/libvpx/source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c",
"//third_party/libvpx/source/libvpx/vp9/common/x86/vp9_loopfilter_intrin_sse2.c",
"//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_avg_intrin_sse2.c",
- "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_sse2.c",
- "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_impl_sse2.c",
"//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c",
"//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_denoiser_sse2.c",
"//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c",
"//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_sse2.c",
+ "//third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_sse2.c",
]
libvpx_srcs_x86_64_sse3 = [
]
@@ -832,14 +846,14 @@ libvpx_srcs_x86_64_avx = [
libvpx_srcs_x86_64_avx2 = [
"//third_party/libvpx/source/libvpx/vp9/common/x86/vp9_loopfilter_intrin_avx2.c",
"//third_party/libvpx/source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c",
- "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_avx2.c",
"//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_avx2.c",
"//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_error_intrin_avx2.c",
"//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c",
"//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_avx2.c",
- "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_impl_intrin_avx2.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/x86/sad4d_avx2.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/x86/sad_avx2.c",
+ "//third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_avx2.c",
+ "//third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_impl_avx2.c",
]
libvpx_srcs_arm = [
"//third_party/libvpx/source/libvpx/vp8/common/alloccommon.c",
@@ -1140,6 +1154,7 @@ libvpx_srcs_arm = [
"//third_party/libvpx/source/libvpx/vpx/vpx_image.h",
"//third_party/libvpx/source/libvpx/vpx/vpx_integer.h",
"//third_party/libvpx/source/libvpx/vpx_dsp/sad.c",
+ "//third_party/libvpx/source/libvpx/vpx_dsp/variance.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd.c",
"//third_party/libvpx/source/libvpx/vpx_mem/include/vpx_mem_intrnl.h",
"//third_party/libvpx/source/libvpx/vpx_mem/vpx_mem.c",
@@ -1150,6 +1165,7 @@ libvpx_srcs_arm = [
"//third_party/libvpx/source/libvpx/vpx_ports/mem.h",
"//third_party/libvpx/source/libvpx/vpx_ports/mem_ops.h",
"//third_party/libvpx/source/libvpx/vpx_ports/mem_ops_aligned.h",
+ "//third_party/libvpx/source/libvpx/vpx_ports/msvc.h",
"//third_party/libvpx/source/libvpx/vpx_ports/vpx_once.h",
"//third_party/libvpx/source/libvpx/vpx_ports/vpx_timer.h",
"//third_party/libvpx/source/libvpx/vpx_scale/generic/gen_scalers.c",
@@ -1175,15 +1191,13 @@ libvpx_srcs_arm_assembly = [
"//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/loopfilter_v6.asm",
"//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/simpleloopfilter_v6.asm",
"//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/sixtappredict8x4_v6.asm",
- "//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm",
- "//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm",
"//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm",
"//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm",
"//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm",
- "//third_party/libvpx/source/libvpx/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm",
"//third_party/libvpx/source/libvpx/vp8/encoder/arm/armv6/vp8_short_fdct4x4_armv6.asm",
"//third_party/libvpx/source/libvpx/vp8/encoder/arm/armv6/walsh_v6.asm",
"//third_party/libvpx/source/libvpx/vpx_dsp/arm/sad_media.asm",
+ "//third_party/libvpx/source/libvpx/vpx_dsp/arm/variance_media.asm",
]
libvpx_srcs_arm_neon = [
"//third_party/libvpx/source/libvpx/vp8/common/alloccommon.c",
@@ -1210,7 +1224,6 @@ libvpx_srcs_arm_neon = [
"//third_party/libvpx/source/libvpx/vp8/common/arm/neon/reconintra_neon.c",
"//third_party/libvpx/source/libvpx/vp8/common/arm/neon/shortidct4x4llm_neon.c",
"//third_party/libvpx/source/libvpx/vp8/common/arm/neon/sixtappredict_neon.c",
- "//third_party/libvpx/source/libvpx/vp8/common/arm/neon/variance_neon.c",
"//third_party/libvpx/source/libvpx/vp8/common/arm/neon/vp8_subpixelvariance_neon.c",
"//third_party/libvpx/source/libvpx/vp8/common/arm/variance_arm.c",
"//third_party/libvpx/source/libvpx/vp8/common/blockd.c",
@@ -1288,7 +1301,6 @@ libvpx_srcs_arm_neon = [
"//third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/fastquantizeb_neon.c",
"//third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/shortfdct_neon.c",
"//third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/subtract_neon.c",
- "//third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.c",
"//third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c",
"//third_party/libvpx/source/libvpx/vp8/encoder/bitstream.c",
"//third_party/libvpx/source/libvpx/vp8/encoder/bitstream.h",
@@ -1343,6 +1355,7 @@ libvpx_srcs_arm_neon = [
"//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c",
"//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c",
"//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_loopfilter_neon.c",
+ "//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_reconintra_neon.c",
"//third_party/libvpx/source/libvpx/vp9/common/vp9_alloccommon.c",
"//third_party/libvpx/source/libvpx/vp9/common/vp9_alloccommon.h",
"//third_party/libvpx/source/libvpx/vp9/common/vp9_blockd.c",
@@ -1519,7 +1532,9 @@ libvpx_srcs_arm_neon = [
"//third_party/libvpx/source/libvpx/vpx/vpx_integer.h",
"//third_party/libvpx/source/libvpx/vpx_dsp/arm/sad4d_neon.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/arm/sad_neon.c",
+ "//third_party/libvpx/source/libvpx/vpx_dsp/arm/variance_neon.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/sad.c",
+ "//third_party/libvpx/source/libvpx/vpx_dsp/variance.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd.c",
"//third_party/libvpx/source/libvpx/vpx_mem/include/vpx_mem_intrnl.h",
"//third_party/libvpx/source/libvpx/vpx_mem/vpx_mem.c",
@@ -1530,6 +1545,7 @@ libvpx_srcs_arm_neon = [
"//third_party/libvpx/source/libvpx/vpx_ports/mem.h",
"//third_party/libvpx/source/libvpx/vpx_ports/mem_ops.h",
"//third_party/libvpx/source/libvpx/vpx_ports/mem_ops_aligned.h",
+ "//third_party/libvpx/source/libvpx/vpx_ports/msvc.h",
"//third_party/libvpx/source/libvpx/vpx_ports/vpx_once.h",
"//third_party/libvpx/source/libvpx/vpx_ports/vpx_timer.h",
"//third_party/libvpx/source/libvpx/vpx_scale/generic/gen_scalers.c",
@@ -1555,12 +1571,9 @@ libvpx_srcs_arm_neon_assembly = [
"//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/loopfilter_v6.asm",
"//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/simpleloopfilter_v6.asm",
"//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/sixtappredict8x4_v6.asm",
- "//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm",
- "//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm",
"//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm",
"//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm",
"//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm",
- "//third_party/libvpx/source/libvpx/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm",
"//third_party/libvpx/source/libvpx/vp8/encoder/arm/armv6/vp8_short_fdct4x4_armv6.asm",
"//third_party/libvpx/source/libvpx/vp8/encoder/arm/armv6/walsh_v6.asm",
"//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_convolve8_avg_neon_asm.asm",
@@ -1582,6 +1595,7 @@ libvpx_srcs_arm_neon_assembly = [
"//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_reconintra_neon_asm.asm",
"//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_save_reg_neon.asm",
"//third_party/libvpx/source/libvpx/vpx_dsp/arm/sad_media.asm",
+ "//third_party/libvpx/source/libvpx/vpx_dsp/arm/variance_media.asm",
]
libvpx_srcs_arm_neon_cpu_detect = [
"//third_party/libvpx/source/libvpx/vp8/common/alloccommon.c",
@@ -1882,6 +1896,7 @@ libvpx_srcs_arm_neon_cpu_detect = [
"//third_party/libvpx/source/libvpx/vpx/vpx_image.h",
"//third_party/libvpx/source/libvpx/vpx/vpx_integer.h",
"//third_party/libvpx/source/libvpx/vpx_dsp/sad.c",
+ "//third_party/libvpx/source/libvpx/vpx_dsp/variance.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd.c",
"//third_party/libvpx/source/libvpx/vpx_mem/include/vpx_mem_intrnl.h",
"//third_party/libvpx/source/libvpx/vpx_mem/vpx_mem.c",
@@ -1892,6 +1907,7 @@ libvpx_srcs_arm_neon_cpu_detect = [
"//third_party/libvpx/source/libvpx/vpx_ports/mem.h",
"//third_party/libvpx/source/libvpx/vpx_ports/mem_ops.h",
"//third_party/libvpx/source/libvpx/vpx_ports/mem_ops_aligned.h",
+ "//third_party/libvpx/source/libvpx/vpx_ports/msvc.h",
"//third_party/libvpx/source/libvpx/vpx_ports/vpx_once.h",
"//third_party/libvpx/source/libvpx/vpx_ports/vpx_timer.h",
"//third_party/libvpx/source/libvpx/vpx_scale/generic/gen_scalers.c",
@@ -1917,15 +1933,13 @@ libvpx_srcs_arm_neon_cpu_detect_assembly = [
"//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/loopfilter_v6.asm",
"//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/simpleloopfilter_v6.asm",
"//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/sixtappredict8x4_v6.asm",
- "//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm",
- "//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm",
"//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm",
"//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm",
"//third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm",
- "//third_party/libvpx/source/libvpx/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm",
"//third_party/libvpx/source/libvpx/vp8/encoder/arm/armv6/vp8_short_fdct4x4_armv6.asm",
"//third_party/libvpx/source/libvpx/vp8/encoder/arm/armv6/walsh_v6.asm",
"//third_party/libvpx/source/libvpx/vpx_dsp/arm/sad_media.asm",
+ "//third_party/libvpx/source/libvpx/vpx_dsp/arm/variance_media.asm",
"//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_convolve8_avg_neon_asm.asm",
"//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_convolve8_neon_asm.asm",
"//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_convolve_avg_neon_asm.asm",
@@ -1962,19 +1976,18 @@ libvpx_srcs_arm_neon_cpu_detect_neon = [
"//third_party/libvpx/source/libvpx/vp8/common/arm/neon/reconintra_neon.c",
"//third_party/libvpx/source/libvpx/vp8/common/arm/neon/shortidct4x4llm_neon.c",
"//third_party/libvpx/source/libvpx/vp8/common/arm/neon/sixtappredict_neon.c",
- "//third_party/libvpx/source/libvpx/vp8/common/arm/neon/variance_neon.c",
"//third_party/libvpx/source/libvpx/vp8/common/arm/neon/vp8_subpixelvariance_neon.c",
"//third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/denoising_neon.c",
"//third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/fastquantizeb_neon.c",
"//third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/shortfdct_neon.c",
"//third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/subtract_neon.c",
- "//third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.c",
"//third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c",
"//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_convolve_neon.c",
"//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct16x16_neon.c",
"//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c",
"//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c",
"//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_loopfilter_neon.c",
+ "//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_reconintra_neon.c",
"//third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_avg_neon.c",
"//third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_dct_neon.c",
"//third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c",
@@ -1982,6 +1995,7 @@ libvpx_srcs_arm_neon_cpu_detect_neon = [
"//third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_variance_neon.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/arm/sad4d_neon.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/arm/sad_neon.c",
+ "//third_party/libvpx/source/libvpx/vpx_dsp/arm/variance_neon.c",
]
libvpx_srcs_arm64 = [
"//third_party/libvpx/source/libvpx/vp8/common/alloccommon.c",
@@ -2005,7 +2019,6 @@ libvpx_srcs_arm64 = [
"//third_party/libvpx/source/libvpx/vp8/common/arm/neon/reconintra_neon.c",
"//third_party/libvpx/source/libvpx/vp8/common/arm/neon/shortidct4x4llm_neon.c",
"//third_party/libvpx/source/libvpx/vp8/common/arm/neon/sixtappredict_neon.c",
- "//third_party/libvpx/source/libvpx/vp8/common/arm/neon/variance_neon.c",
"//third_party/libvpx/source/libvpx/vp8/common/arm/neon/vp8_subpixelvariance_neon.c",
"//third_party/libvpx/source/libvpx/vp8/common/arm/variance_arm.c",
"//third_party/libvpx/source/libvpx/vp8/common/blockd.c",
@@ -2083,7 +2096,6 @@ libvpx_srcs_arm64 = [
"//third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/fastquantizeb_neon.c",
"//third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/shortfdct_neon.c",
"//third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/subtract_neon.c",
- "//third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.c",
"//third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c",
"//third_party/libvpx/source/libvpx/vp8/encoder/bitstream.c",
"//third_party/libvpx/source/libvpx/vp8/encoder/bitstream.h",
@@ -2329,7 +2341,9 @@ libvpx_srcs_arm64 = [
"//third_party/libvpx/source/libvpx/vpx/vpx_integer.h",
"//third_party/libvpx/source/libvpx/vpx_dsp/arm/sad4d_neon.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/arm/sad_neon.c",
+ "//third_party/libvpx/source/libvpx/vpx_dsp/arm/variance_neon.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/sad.c",
+ "//third_party/libvpx/source/libvpx/vpx_dsp/variance.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd.c",
"//third_party/libvpx/source/libvpx/vpx_mem/include/vpx_mem_intrnl.h",
"//third_party/libvpx/source/libvpx/vpx_mem/vpx_mem.c",
@@ -2340,6 +2354,7 @@ libvpx_srcs_arm64 = [
"//third_party/libvpx/source/libvpx/vpx_ports/mem.h",
"//third_party/libvpx/source/libvpx/vpx_ports/mem_ops.h",
"//third_party/libvpx/source/libvpx/vpx_ports/mem_ops_aligned.h",
+ "//third_party/libvpx/source/libvpx/vpx_ports/msvc.h",
"//third_party/libvpx/source/libvpx/vpx_ports/vpx_once.h",
"//third_party/libvpx/source/libvpx/vpx_ports/vpx_timer.h",
"//third_party/libvpx/source/libvpx/vpx_scale/generic/gen_scalers.c",
@@ -2645,6 +2660,7 @@ libvpx_srcs_mips = [
"//third_party/libvpx/source/libvpx/vpx/vpx_image.h",
"//third_party/libvpx/source/libvpx/vpx/vpx_integer.h",
"//third_party/libvpx/source/libvpx/vpx_dsp/sad.c",
+ "//third_party/libvpx/source/libvpx/vpx_dsp/variance.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd.c",
"//third_party/libvpx/source/libvpx/vpx_mem/include/vpx_mem_intrnl.h",
"//third_party/libvpx/source/libvpx/vpx_mem/vpx_mem.c",
@@ -2653,6 +2669,7 @@ libvpx_srcs_mips = [
"//third_party/libvpx/source/libvpx/vpx_ports/mem.h",
"//third_party/libvpx/source/libvpx/vpx_ports/mem_ops.h",
"//third_party/libvpx/source/libvpx/vpx_ports/mem_ops_aligned.h",
+ "//third_party/libvpx/source/libvpx/vpx_ports/msvc.h",
"//third_party/libvpx/source/libvpx/vpx_ports/vpx_once.h",
"//third_party/libvpx/source/libvpx/vpx_ports/vpx_timer.h",
"//third_party/libvpx/source/libvpx/vpx_scale/generic/gen_scalers.c",
@@ -2956,6 +2973,7 @@ libvpx_srcs_nacl = [
"//third_party/libvpx/source/libvpx/vpx/vpx_image.h",
"//third_party/libvpx/source/libvpx/vpx/vpx_integer.h",
"//third_party/libvpx/source/libvpx/vpx_dsp/sad.c",
+ "//third_party/libvpx/source/libvpx/vpx_dsp/variance.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd.c",
"//third_party/libvpx/source/libvpx/vpx_mem/include/vpx_mem_intrnl.h",
"//third_party/libvpx/source/libvpx/vpx_mem/vpx_mem.c",
@@ -2964,6 +2982,7 @@ libvpx_srcs_nacl = [
"//third_party/libvpx/source/libvpx/vpx_ports/mem.h",
"//third_party/libvpx/source/libvpx/vpx_ports/mem_ops.h",
"//third_party/libvpx/source/libvpx/vpx_ports/mem_ops_aligned.h",
+ "//third_party/libvpx/source/libvpx/vpx_ports/msvc.h",
"//third_party/libvpx/source/libvpx/vpx_ports/vpx_once.h",
"//third_party/libvpx/source/libvpx/vpx_ports/vpx_timer.h",
"//third_party/libvpx/source/libvpx/vpx_scale/generic/gen_scalers.c",
@@ -3267,6 +3286,7 @@ libvpx_srcs_generic = [
"//third_party/libvpx/source/libvpx/vpx/vpx_image.h",
"//third_party/libvpx/source/libvpx/vpx/vpx_integer.h",
"//third_party/libvpx/source/libvpx/vpx_dsp/sad.c",
+ "//third_party/libvpx/source/libvpx/vpx_dsp/variance.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd.c",
"//third_party/libvpx/source/libvpx/vpx_mem/include/vpx_mem_intrnl.h",
"//third_party/libvpx/source/libvpx/vpx_mem/vpx_mem.c",
@@ -3275,6 +3295,7 @@ libvpx_srcs_generic = [
"//third_party/libvpx/source/libvpx/vpx_ports/mem.h",
"//third_party/libvpx/source/libvpx/vpx_ports/mem_ops.h",
"//third_party/libvpx/source/libvpx/vpx_ports/mem_ops_aligned.h",
+ "//third_party/libvpx/source/libvpx/vpx_ports/msvc.h",
"//third_party/libvpx/source/libvpx/vpx_ports/vpx_once.h",
"//third_party/libvpx/source/libvpx/vpx_ports/vpx_timer.h",
"//third_party/libvpx/source/libvpx/vpx_scale/generic/gen_scalers.c",
diff --git a/chromium/third_party/libvpx/libvpx_srcs_arm.gypi b/chromium/third_party/libvpx/libvpx_srcs_arm.gypi
index bd6d32b07b7..1f457e299f6 100644
--- a/chromium/third_party/libvpx/libvpx_srcs_arm.gypi
+++ b/chromium/third_party/libvpx/libvpx_srcs_arm.gypi
@@ -22,8 +22,6 @@
'<(libvpx_source)/vp8/common/arm/armv6/loopfilter_v6.asm',
'<(libvpx_source)/vp8/common/arm/armv6/simpleloopfilter_v6.asm',
'<(libvpx_source)/vp8/common/arm/armv6/sixtappredict8x4_v6.asm',
- '<(libvpx_source)/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm',
- '<(libvpx_source)/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm',
'<(libvpx_source)/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm',
'<(libvpx_source)/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm',
'<(libvpx_source)/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm',
@@ -103,7 +101,6 @@
'<(libvpx_source)/vp8/decoder/onyxd_int.h',
'<(libvpx_source)/vp8/decoder/threading.c',
'<(libvpx_source)/vp8/decoder/treereader.h',
- '<(libvpx_source)/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm',
'<(libvpx_source)/vp8/encoder/arm/armv6/vp8_short_fdct4x4_armv6.asm',
'<(libvpx_source)/vp8/encoder/arm/armv6/walsh_v6.asm',
'<(libvpx_source)/vp8/encoder/arm/dct_arm.c',
@@ -325,7 +322,9 @@
'<(libvpx_source)/vpx/vpx_image.h',
'<(libvpx_source)/vpx/vpx_integer.h',
'<(libvpx_source)/vpx_dsp/arm/sad_media.asm',
+ '<(libvpx_source)/vpx_dsp/arm/variance_media.asm',
'<(libvpx_source)/vpx_dsp/sad.c',
+ '<(libvpx_source)/vpx_dsp/variance.c',
'<(libvpx_source)/vpx_dsp/vpx_dsp_rtcd.c',
'<(libvpx_source)/vpx_mem/include/vpx_mem_intrnl.h',
'<(libvpx_source)/vpx_mem/vpx_mem.c',
@@ -336,6 +335,7 @@
'<(libvpx_source)/vpx_ports/mem.h',
'<(libvpx_source)/vpx_ports/mem_ops.h',
'<(libvpx_source)/vpx_ports/mem_ops_aligned.h',
+ '<(libvpx_source)/vpx_ports/msvc.h',
'<(libvpx_source)/vpx_ports/vpx_once.h',
'<(libvpx_source)/vpx_ports/vpx_timer.h',
'<(libvpx_source)/vpx_scale/generic/gen_scalers.c',
diff --git a/chromium/third_party/libvpx/libvpx_srcs_arm64.gypi b/chromium/third_party/libvpx/libvpx_srcs_arm64.gypi
index 31e6fca63f0..5b4acd877ea 100644
--- a/chromium/third_party/libvpx/libvpx_srcs_arm64.gypi
+++ b/chromium/third_party/libvpx/libvpx_srcs_arm64.gypi
@@ -26,7 +26,6 @@
'<(libvpx_source)/vp8/common/arm/neon/reconintra_neon.c',
'<(libvpx_source)/vp8/common/arm/neon/shortidct4x4llm_neon.c',
'<(libvpx_source)/vp8/common/arm/neon/sixtappredict_neon.c',
- '<(libvpx_source)/vp8/common/arm/neon/variance_neon.c',
'<(libvpx_source)/vp8/common/arm/neon/vp8_subpixelvariance_neon.c',
'<(libvpx_source)/vp8/common/arm/variance_arm.c',
'<(libvpx_source)/vp8/common/blockd.c',
@@ -104,7 +103,6 @@
'<(libvpx_source)/vp8/encoder/arm/neon/fastquantizeb_neon.c',
'<(libvpx_source)/vp8/encoder/arm/neon/shortfdct_neon.c',
'<(libvpx_source)/vp8/encoder/arm/neon/subtract_neon.c',
- '<(libvpx_source)/vp8/encoder/arm/neon/vp8_mse16x16_neon.c',
'<(libvpx_source)/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c',
'<(libvpx_source)/vp8/encoder/bitstream.c',
'<(libvpx_source)/vp8/encoder/bitstream.h',
@@ -350,7 +348,9 @@
'<(libvpx_source)/vpx/vpx_integer.h',
'<(libvpx_source)/vpx_dsp/arm/sad4d_neon.c',
'<(libvpx_source)/vpx_dsp/arm/sad_neon.c',
+ '<(libvpx_source)/vpx_dsp/arm/variance_neon.c',
'<(libvpx_source)/vpx_dsp/sad.c',
+ '<(libvpx_source)/vpx_dsp/variance.c',
'<(libvpx_source)/vpx_dsp/vpx_dsp_rtcd.c',
'<(libvpx_source)/vpx_mem/include/vpx_mem_intrnl.h',
'<(libvpx_source)/vpx_mem/vpx_mem.c',
@@ -361,6 +361,7 @@
'<(libvpx_source)/vpx_ports/mem.h',
'<(libvpx_source)/vpx_ports/mem_ops.h',
'<(libvpx_source)/vpx_ports/mem_ops_aligned.h',
+ '<(libvpx_source)/vpx_ports/msvc.h',
'<(libvpx_source)/vpx_ports/vpx_once.h',
'<(libvpx_source)/vpx_ports/vpx_timer.h',
'<(libvpx_source)/vpx_scale/generic/gen_scalers.c',
diff --git a/chromium/third_party/libvpx/libvpx_srcs_arm_neon.gypi b/chromium/third_party/libvpx/libvpx_srcs_arm_neon.gypi
index 277180640de..81e03e049b2 100644
--- a/chromium/third_party/libvpx/libvpx_srcs_arm_neon.gypi
+++ b/chromium/third_party/libvpx/libvpx_srcs_arm_neon.gypi
@@ -22,8 +22,6 @@
'<(libvpx_source)/vp8/common/arm/armv6/loopfilter_v6.asm',
'<(libvpx_source)/vp8/common/arm/armv6/simpleloopfilter_v6.asm',
'<(libvpx_source)/vp8/common/arm/armv6/sixtappredict8x4_v6.asm',
- '<(libvpx_source)/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm',
- '<(libvpx_source)/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm',
'<(libvpx_source)/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm',
'<(libvpx_source)/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm',
'<(libvpx_source)/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm',
@@ -48,7 +46,6 @@
'<(libvpx_source)/vp8/common/arm/neon/reconintra_neon.c',
'<(libvpx_source)/vp8/common/arm/neon/shortidct4x4llm_neon.c',
'<(libvpx_source)/vp8/common/arm/neon/sixtappredict_neon.c',
- '<(libvpx_source)/vp8/common/arm/neon/variance_neon.c',
'<(libvpx_source)/vp8/common/arm/neon/vp8_subpixelvariance_neon.c',
'<(libvpx_source)/vp8/common/arm/variance_arm.c',
'<(libvpx_source)/vp8/common/blockd.c',
@@ -121,7 +118,6 @@
'<(libvpx_source)/vp8/decoder/onyxd_int.h',
'<(libvpx_source)/vp8/decoder/threading.c',
'<(libvpx_source)/vp8/decoder/treereader.h',
- '<(libvpx_source)/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm',
'<(libvpx_source)/vp8/encoder/arm/armv6/vp8_short_fdct4x4_armv6.asm',
'<(libvpx_source)/vp8/encoder/arm/armv6/walsh_v6.asm',
'<(libvpx_source)/vp8/encoder/arm/dct_arm.c',
@@ -129,7 +125,6 @@
'<(libvpx_source)/vp8/encoder/arm/neon/fastquantizeb_neon.c',
'<(libvpx_source)/vp8/encoder/arm/neon/shortfdct_neon.c',
'<(libvpx_source)/vp8/encoder/arm/neon/subtract_neon.c',
- '<(libvpx_source)/vp8/encoder/arm/neon/vp8_mse16x16_neon.c',
'<(libvpx_source)/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c',
'<(libvpx_source)/vp8/encoder/bitstream.c',
'<(libvpx_source)/vp8/encoder/bitstream.h',
@@ -200,6 +195,7 @@
'<(libvpx_source)/vp9/common/arm/neon/vp9_loopfilter_8_neon_asm.asm',
'<(libvpx_source)/vp9/common/arm/neon/vp9_loopfilter_neon.c',
'<(libvpx_source)/vp9/common/arm/neon/vp9_mb_lpf_neon.asm',
+ '<(libvpx_source)/vp9/common/arm/neon/vp9_reconintra_neon.c',
'<(libvpx_source)/vp9/common/arm/neon/vp9_reconintra_neon_asm.asm',
'<(libvpx_source)/vp9/common/arm/neon/vp9_save_reg_neon.asm',
'<(libvpx_source)/vp9/common/vp9_alloccommon.c',
@@ -379,7 +375,10 @@
'<(libvpx_source)/vpx_dsp/arm/sad4d_neon.c',
'<(libvpx_source)/vpx_dsp/arm/sad_media.asm',
'<(libvpx_source)/vpx_dsp/arm/sad_neon.c',
+ '<(libvpx_source)/vpx_dsp/arm/variance_media.asm',
+ '<(libvpx_source)/vpx_dsp/arm/variance_neon.c',
'<(libvpx_source)/vpx_dsp/sad.c',
+ '<(libvpx_source)/vpx_dsp/variance.c',
'<(libvpx_source)/vpx_dsp/vpx_dsp_rtcd.c',
'<(libvpx_source)/vpx_mem/include/vpx_mem_intrnl.h',
'<(libvpx_source)/vpx_mem/vpx_mem.c',
@@ -390,6 +389,7 @@
'<(libvpx_source)/vpx_ports/mem.h',
'<(libvpx_source)/vpx_ports/mem_ops.h',
'<(libvpx_source)/vpx_ports/mem_ops_aligned.h',
+ '<(libvpx_source)/vpx_ports/msvc.h',
'<(libvpx_source)/vpx_ports/vpx_once.h',
'<(libvpx_source)/vpx_ports/vpx_timer.h',
'<(libvpx_source)/vpx_scale/generic/gen_scalers.c',
diff --git a/chromium/third_party/libvpx/libvpx_srcs_arm_neon_cpu_detect.gypi b/chromium/third_party/libvpx/libvpx_srcs_arm_neon_cpu_detect.gypi
index bd6d32b07b7..1f457e299f6 100644
--- a/chromium/third_party/libvpx/libvpx_srcs_arm_neon_cpu_detect.gypi
+++ b/chromium/third_party/libvpx/libvpx_srcs_arm_neon_cpu_detect.gypi
@@ -22,8 +22,6 @@
'<(libvpx_source)/vp8/common/arm/armv6/loopfilter_v6.asm',
'<(libvpx_source)/vp8/common/arm/armv6/simpleloopfilter_v6.asm',
'<(libvpx_source)/vp8/common/arm/armv6/sixtappredict8x4_v6.asm',
- '<(libvpx_source)/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm',
- '<(libvpx_source)/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm',
'<(libvpx_source)/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6.asm',
'<(libvpx_source)/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6.asm',
'<(libvpx_source)/vp8/common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6.asm',
@@ -103,7 +101,6 @@
'<(libvpx_source)/vp8/decoder/onyxd_int.h',
'<(libvpx_source)/vp8/decoder/threading.c',
'<(libvpx_source)/vp8/decoder/treereader.h',
- '<(libvpx_source)/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm',
'<(libvpx_source)/vp8/encoder/arm/armv6/vp8_short_fdct4x4_armv6.asm',
'<(libvpx_source)/vp8/encoder/arm/armv6/walsh_v6.asm',
'<(libvpx_source)/vp8/encoder/arm/dct_arm.c',
@@ -325,7 +322,9 @@
'<(libvpx_source)/vpx/vpx_image.h',
'<(libvpx_source)/vpx/vpx_integer.h',
'<(libvpx_source)/vpx_dsp/arm/sad_media.asm',
+ '<(libvpx_source)/vpx_dsp/arm/variance_media.asm',
'<(libvpx_source)/vpx_dsp/sad.c',
+ '<(libvpx_source)/vpx_dsp/variance.c',
'<(libvpx_source)/vpx_dsp/vpx_dsp_rtcd.c',
'<(libvpx_source)/vpx_mem/include/vpx_mem_intrnl.h',
'<(libvpx_source)/vpx_mem/vpx_mem.c',
@@ -336,6 +335,7 @@
'<(libvpx_source)/vpx_ports/mem.h',
'<(libvpx_source)/vpx_ports/mem_ops.h',
'<(libvpx_source)/vpx_ports/mem_ops_aligned.h',
+ '<(libvpx_source)/vpx_ports/msvc.h',
'<(libvpx_source)/vpx_ports/vpx_once.h',
'<(libvpx_source)/vpx_ports/vpx_timer.h',
'<(libvpx_source)/vpx_scale/generic/gen_scalers.c',
diff --git a/chromium/third_party/libvpx/libvpx_srcs_arm_neon_cpu_detect_intrinsics.gypi b/chromium/third_party/libvpx/libvpx_srcs_arm_neon_cpu_detect_intrinsics.gypi
index 6b60be6cfe0..b60479e37c5 100644
--- a/chromium/third_party/libvpx/libvpx_srcs_arm_neon_cpu_detect_intrinsics.gypi
+++ b/chromium/third_party/libvpx/libvpx_srcs_arm_neon_cpu_detect_intrinsics.gypi
@@ -29,13 +29,11 @@
'<(libvpx_source)/vp8/common/arm/neon/reconintra_neon.c',
'<(libvpx_source)/vp8/common/arm/neon/shortidct4x4llm_neon.c',
'<(libvpx_source)/vp8/common/arm/neon/sixtappredict_neon.c',
- '<(libvpx_source)/vp8/common/arm/neon/variance_neon.c',
'<(libvpx_source)/vp8/common/arm/neon/vp8_subpixelvariance_neon.c',
'<(libvpx_source)/vp8/encoder/arm/neon/denoising_neon.c',
'<(libvpx_source)/vp8/encoder/arm/neon/fastquantizeb_neon.c',
'<(libvpx_source)/vp8/encoder/arm/neon/shortfdct_neon.c',
'<(libvpx_source)/vp8/encoder/arm/neon/subtract_neon.c',
- '<(libvpx_source)/vp8/encoder/arm/neon/vp8_mse16x16_neon.c',
'<(libvpx_source)/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c',
'<(libvpx_source)/vp9/common/arm/neon/vp9_convolve8_avg_neon_asm.asm',
'<(libvpx_source)/vp9/common/arm/neon/vp9_convolve8_neon_asm.asm',
@@ -58,6 +56,7 @@
'<(libvpx_source)/vp9/common/arm/neon/vp9_loopfilter_8_neon_asm.asm',
'<(libvpx_source)/vp9/common/arm/neon/vp9_loopfilter_neon.c',
'<(libvpx_source)/vp9/common/arm/neon/vp9_mb_lpf_neon.asm',
+ '<(libvpx_source)/vp9/common/arm/neon/vp9_reconintra_neon.c',
'<(libvpx_source)/vp9/common/arm/neon/vp9_reconintra_neon_asm.asm',
'<(libvpx_source)/vp9/common/arm/neon/vp9_save_reg_neon.asm',
'<(libvpx_source)/vp9/encoder/arm/neon/vp9_avg_neon.c',
@@ -67,6 +66,7 @@
'<(libvpx_source)/vp9/encoder/arm/neon/vp9_variance_neon.c',
'<(libvpx_source)/vpx_dsp/arm/sad4d_neon.c',
'<(libvpx_source)/vpx_dsp/arm/sad_neon.c',
+ '<(libvpx_source)/vpx_dsp/arm/variance_neon.c',
],
'includes': [ 'ads2gas.gypi' ],
'cflags!': [ '-mfpu=vfpv3-d16' ],
diff --git a/chromium/third_party/libvpx/libvpx_srcs_generic.gypi b/chromium/third_party/libvpx/libvpx_srcs_generic.gypi
index eb808cc1436..71a5c4adcb8 100644
--- a/chromium/third_party/libvpx/libvpx_srcs_generic.gypi
+++ b/chromium/third_party/libvpx/libvpx_srcs_generic.gypi
@@ -295,6 +295,7 @@
'<(libvpx_source)/vpx/vpx_image.h',
'<(libvpx_source)/vpx/vpx_integer.h',
'<(libvpx_source)/vpx_dsp/sad.c',
+ '<(libvpx_source)/vpx_dsp/variance.c',
'<(libvpx_source)/vpx_dsp/vpx_dsp_rtcd.c',
'<(libvpx_source)/vpx_mem/include/vpx_mem_intrnl.h',
'<(libvpx_source)/vpx_mem/vpx_mem.c',
@@ -303,6 +304,7 @@
'<(libvpx_source)/vpx_ports/mem.h',
'<(libvpx_source)/vpx_ports/mem_ops.h',
'<(libvpx_source)/vpx_ports/mem_ops_aligned.h',
+ '<(libvpx_source)/vpx_ports/msvc.h',
'<(libvpx_source)/vpx_ports/vpx_once.h',
'<(libvpx_source)/vpx_ports/vpx_timer.h',
'<(libvpx_source)/vpx_scale/generic/gen_scalers.c',
diff --git a/chromium/third_party/libvpx/libvpx_srcs_mips.gypi b/chromium/third_party/libvpx/libvpx_srcs_mips.gypi
index 8abf1082a11..df3f66fa791 100644
--- a/chromium/third_party/libvpx/libvpx_srcs_mips.gypi
+++ b/chromium/third_party/libvpx/libvpx_srcs_mips.gypi
@@ -297,6 +297,7 @@
'<(libvpx_source)/vpx/vpx_image.h',
'<(libvpx_source)/vpx/vpx_integer.h',
'<(libvpx_source)/vpx_dsp/sad.c',
+ '<(libvpx_source)/vpx_dsp/variance.c',
'<(libvpx_source)/vpx_dsp/vpx_dsp_rtcd.c',
'<(libvpx_source)/vpx_mem/include/vpx_mem_intrnl.h',
'<(libvpx_source)/vpx_mem/vpx_mem.c',
@@ -305,6 +306,7 @@
'<(libvpx_source)/vpx_ports/mem.h',
'<(libvpx_source)/vpx_ports/mem_ops.h',
'<(libvpx_source)/vpx_ports/mem_ops_aligned.h',
+ '<(libvpx_source)/vpx_ports/msvc.h',
'<(libvpx_source)/vpx_ports/vpx_once.h',
'<(libvpx_source)/vpx_ports/vpx_timer.h',
'<(libvpx_source)/vpx_scale/generic/gen_scalers.c',
diff --git a/chromium/third_party/libvpx/libvpx_srcs_nacl.gypi b/chromium/third_party/libvpx/libvpx_srcs_nacl.gypi
index eb808cc1436..71a5c4adcb8 100644
--- a/chromium/third_party/libvpx/libvpx_srcs_nacl.gypi
+++ b/chromium/third_party/libvpx/libvpx_srcs_nacl.gypi
@@ -295,6 +295,7 @@
'<(libvpx_source)/vpx/vpx_image.h',
'<(libvpx_source)/vpx/vpx_integer.h',
'<(libvpx_source)/vpx_dsp/sad.c',
+ '<(libvpx_source)/vpx_dsp/variance.c',
'<(libvpx_source)/vpx_dsp/vpx_dsp_rtcd.c',
'<(libvpx_source)/vpx_mem/include/vpx_mem_intrnl.h',
'<(libvpx_source)/vpx_mem/vpx_mem.c',
@@ -303,6 +304,7 @@
'<(libvpx_source)/vpx_ports/mem.h',
'<(libvpx_source)/vpx_ports/mem_ops.h',
'<(libvpx_source)/vpx_ports/mem_ops_aligned.h',
+ '<(libvpx_source)/vpx_ports/msvc.h',
'<(libvpx_source)/vpx_ports/vpx_once.h',
'<(libvpx_source)/vpx_ports/vpx_timer.h',
'<(libvpx_source)/vpx_scale/generic/gen_scalers.c',
diff --git a/chromium/third_party/libvpx/libvpx_srcs_x86.gypi b/chromium/third_party/libvpx/libvpx_srcs_x86.gypi
index d61c186aca5..63f05e35f01 100644
--- a/chromium/third_party/libvpx/libvpx_srcs_x86.gypi
+++ b/chromium/third_party/libvpx/libvpx_srcs_x86.gypi
@@ -85,10 +85,10 @@
'<(libvpx_source)/vp8/common/x86/subpixel_mmx.asm',
'<(libvpx_source)/vp8/common/x86/subpixel_sse2.asm',
'<(libvpx_source)/vp8/common/x86/subpixel_ssse3.asm',
- '<(libvpx_source)/vp8/common/x86/variance_impl_mmx.asm',
'<(libvpx_source)/vp8/common/x86/variance_impl_sse2.asm',
'<(libvpx_source)/vp8/common/x86/variance_impl_ssse3.asm',
'<(libvpx_source)/vp8/common/x86/vp8_asm_stubs.c',
+ '<(libvpx_source)/vp8/common/x86/vp8_variance_impl_mmx.asm',
'<(libvpx_source)/vp8/decoder/dboolhuff.c',
'<(libvpx_source)/vp8/decoder/dboolhuff.h',
'<(libvpx_source)/vp8/decoder/decodeframe.c',
@@ -216,6 +216,7 @@
'<(libvpx_source)/vp9/common/vp9_thread_common.h',
'<(libvpx_source)/vp9/common/vp9_tile_common.c',
'<(libvpx_source)/vp9/common/vp9_tile_common.h',
+ '<(libvpx_source)/vp9/common/x86/convolve.h',
'<(libvpx_source)/vp9/common/x86/vp9_asm_stubs.c',
'<(libvpx_source)/vp9/common/x86/vp9_copy_sse2.asm',
'<(libvpx_source)/vp9/common/x86/vp9_idct_intrin_sse2.h',
@@ -318,8 +319,11 @@
'<(libvpx_source)/vp9/encoder/vp9_write_bit_buffer.h',
'<(libvpx_source)/vp9/encoder/vp9_writer.c',
'<(libvpx_source)/vp9/encoder/vp9_writer.h',
+ '<(libvpx_source)/vp9/encoder/x86/vp9_dct32x32_avx2_impl.h',
+ '<(libvpx_source)/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h',
'<(libvpx_source)/vp9/encoder/x86/vp9_dct_mmx.asm',
'<(libvpx_source)/vp9/encoder/x86/vp9_dct_sse2.h',
+ '<(libvpx_source)/vp9/encoder/x86/vp9_dct_sse2_impl.h',
'<(libvpx_source)/vp9/encoder/x86/vp9_error_sse2.asm',
'<(libvpx_source)/vp9/encoder/x86/vp9_subpel_variance.asm',
'<(libvpx_source)/vp9/encoder/x86/vp9_subtract_sse2.asm',
@@ -344,6 +348,7 @@
'<(libvpx_source)/vpx/vpx_image.h',
'<(libvpx_source)/vpx/vpx_integer.h',
'<(libvpx_source)/vpx_dsp/sad.c',
+ '<(libvpx_source)/vpx_dsp/variance.c',
'<(libvpx_source)/vpx_dsp/vpx_dsp_rtcd.c',
'<(libvpx_source)/vpx_dsp/x86/sad4d_sse2.asm',
'<(libvpx_source)/vpx_dsp/x86/sad_mmx.asm',
@@ -351,6 +356,7 @@
'<(libvpx_source)/vpx_dsp/x86/sad_sse3.asm',
'<(libvpx_source)/vpx_dsp/x86/sad_sse4.asm',
'<(libvpx_source)/vpx_dsp/x86/sad_ssse3.asm',
+ '<(libvpx_source)/vpx_dsp/x86/variance_impl_mmx.asm',
'<(libvpx_source)/vpx_mem/include/vpx_mem_intrnl.h',
'<(libvpx_source)/vpx_mem/vpx_mem.c',
'<(libvpx_source)/vpx_mem/vpx_mem.h',
@@ -359,6 +365,7 @@
'<(libvpx_source)/vpx_ports/mem.h',
'<(libvpx_source)/vpx_ports/mem_ops.h',
'<(libvpx_source)/vpx_ports/mem_ops_aligned.h',
+ '<(libvpx_source)/vpx_ports/msvc.h',
'<(libvpx_source)/vpx_ports/vpx_once.h',
'<(libvpx_source)/vpx_ports/vpx_timer.h',
'<(libvpx_source)/vpx_ports/x86.h',
diff --git a/chromium/third_party/libvpx/libvpx_srcs_x86_64.gypi b/chromium/third_party/libvpx/libvpx_srcs_x86_64.gypi
index c99ebee722a..5072d33fa0c 100644
--- a/chromium/third_party/libvpx/libvpx_srcs_x86_64.gypi
+++ b/chromium/third_party/libvpx/libvpx_srcs_x86_64.gypi
@@ -86,10 +86,10 @@
'<(libvpx_source)/vp8/common/x86/subpixel_mmx.asm',
'<(libvpx_source)/vp8/common/x86/subpixel_sse2.asm',
'<(libvpx_source)/vp8/common/x86/subpixel_ssse3.asm',
- '<(libvpx_source)/vp8/common/x86/variance_impl_mmx.asm',
'<(libvpx_source)/vp8/common/x86/variance_impl_sse2.asm',
'<(libvpx_source)/vp8/common/x86/variance_impl_ssse3.asm',
'<(libvpx_source)/vp8/common/x86/vp8_asm_stubs.c',
+ '<(libvpx_source)/vp8/common/x86/vp8_variance_impl_mmx.asm',
'<(libvpx_source)/vp8/decoder/dboolhuff.c',
'<(libvpx_source)/vp8/decoder/dboolhuff.h',
'<(libvpx_source)/vp8/decoder/decodeframe.c',
@@ -218,6 +218,7 @@
'<(libvpx_source)/vp9/common/vp9_thread_common.h',
'<(libvpx_source)/vp9/common/vp9_tile_common.c',
'<(libvpx_source)/vp9/common/vp9_tile_common.h',
+ '<(libvpx_source)/vp9/common/x86/convolve.h',
'<(libvpx_source)/vp9/common/x86/vp9_asm_stubs.c',
'<(libvpx_source)/vp9/common/x86/vp9_copy_sse2.asm',
'<(libvpx_source)/vp9/common/x86/vp9_idct_intrin_sse2.h',
@@ -321,8 +322,11 @@
'<(libvpx_source)/vp9/encoder/vp9_write_bit_buffer.h',
'<(libvpx_source)/vp9/encoder/vp9_writer.c',
'<(libvpx_source)/vp9/encoder/vp9_writer.h',
+ '<(libvpx_source)/vp9/encoder/x86/vp9_dct32x32_avx2_impl.h',
+ '<(libvpx_source)/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h',
'<(libvpx_source)/vp9/encoder/x86/vp9_dct_mmx.asm',
'<(libvpx_source)/vp9/encoder/x86/vp9_dct_sse2.h',
+ '<(libvpx_source)/vp9/encoder/x86/vp9_dct_sse2_impl.h',
'<(libvpx_source)/vp9/encoder/x86/vp9_dct_ssse3_x86_64.asm',
'<(libvpx_source)/vp9/encoder/x86/vp9_error_sse2.asm',
'<(libvpx_source)/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm',
@@ -350,6 +354,7 @@
'<(libvpx_source)/vpx/vpx_image.h',
'<(libvpx_source)/vpx/vpx_integer.h',
'<(libvpx_source)/vpx_dsp/sad.c',
+ '<(libvpx_source)/vpx_dsp/variance.c',
'<(libvpx_source)/vpx_dsp/vpx_dsp_rtcd.c',
'<(libvpx_source)/vpx_dsp/x86/sad4d_sse2.asm',
'<(libvpx_source)/vpx_dsp/x86/sad_mmx.asm',
@@ -357,6 +362,7 @@
'<(libvpx_source)/vpx_dsp/x86/sad_sse3.asm',
'<(libvpx_source)/vpx_dsp/x86/sad_sse4.asm',
'<(libvpx_source)/vpx_dsp/x86/sad_ssse3.asm',
+ '<(libvpx_source)/vpx_dsp/x86/variance_impl_mmx.asm',
'<(libvpx_source)/vpx_mem/include/vpx_mem_intrnl.h',
'<(libvpx_source)/vpx_mem/vpx_mem.c',
'<(libvpx_source)/vpx_mem/vpx_mem.h',
@@ -365,6 +371,7 @@
'<(libvpx_source)/vpx_ports/mem.h',
'<(libvpx_source)/vpx_ports/mem_ops.h',
'<(libvpx_source)/vpx_ports/mem_ops_aligned.h',
+ '<(libvpx_source)/vpx_ports/msvc.h',
'<(libvpx_source)/vpx_ports/vpx_once.h',
'<(libvpx_source)/vpx_ports/vpx_timer.h',
'<(libvpx_source)/vpx_ports/x86.h',
diff --git a/chromium/third_party/libvpx/libvpx_srcs_x86_64_intrinsics.gypi b/chromium/third_party/libvpx/libvpx_srcs_x86_64_intrinsics.gypi
index ecf9fb7e810..f8590f576ac 100644
--- a/chromium/third_party/libvpx/libvpx_srcs_x86_64_intrinsics.gypi
+++ b/chromium/third_party/libvpx/libvpx_srcs_x86_64_intrinsics.gypi
@@ -14,8 +14,9 @@
],
'sources': [
'<(libvpx_source)/vp8/common/x86/idct_blk_mmx.c',
- '<(libvpx_source)/vp8/common/x86/variance_mmx.c',
+ '<(libvpx_source)/vp8/common/x86/vp8_variance_mmx.c',
'<(libvpx_source)/vp8/encoder/x86/vp8_enc_stubs_mmx.c',
+ '<(libvpx_source)/vpx_dsp/x86/variance_mmx.c',
],
'cflags': [ '-mmmx', ],
'xcode_settings': { 'OTHER_CFLAGS': [ '-mmmx' ] },
@@ -30,19 +31,18 @@
'sources': [
'<(libvpx_source)/vp8/common/x86/idct_blk_sse2.c',
'<(libvpx_source)/vp8/common/x86/recon_wrapper_sse2.c',
- '<(libvpx_source)/vp8/common/x86/variance_sse2.c',
+ '<(libvpx_source)/vp8/common/x86/vp8_variance_sse2.c',
'<(libvpx_source)/vp8/encoder/x86/denoising_sse2.c',
'<(libvpx_source)/vp8/encoder/x86/quantize_sse2.c',
'<(libvpx_source)/vp8/encoder/x86/vp8_enc_stubs_sse2.c',
'<(libvpx_source)/vp9/common/x86/vp9_idct_intrin_sse2.c',
'<(libvpx_source)/vp9/common/x86/vp9_loopfilter_intrin_sse2.c',
'<(libvpx_source)/vp9/encoder/x86/vp9_avg_intrin_sse2.c',
- '<(libvpx_source)/vp9/encoder/x86/vp9_dct32x32_sse2.c',
- '<(libvpx_source)/vp9/encoder/x86/vp9_dct_impl_sse2.c',
'<(libvpx_source)/vp9/encoder/x86/vp9_dct_sse2.c',
'<(libvpx_source)/vp9/encoder/x86/vp9_denoiser_sse2.c',
'<(libvpx_source)/vp9/encoder/x86/vp9_quantize_sse2.c',
'<(libvpx_source)/vp9/encoder/x86/vp9_variance_sse2.c',
+ '<(libvpx_source)/vpx_dsp/x86/variance_sse2.c',
],
'cflags': [ '-msse2', ],
'xcode_settings': { 'OTHER_CFLAGS': [ '-msse2' ] },
@@ -104,14 +104,14 @@
'sources': [
'<(libvpx_source)/vp9/common/x86/vp9_loopfilter_intrin_avx2.c',
'<(libvpx_source)/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c',
- '<(libvpx_source)/vp9/encoder/x86/vp9_dct32x32_avx2.c',
'<(libvpx_source)/vp9/encoder/x86/vp9_dct_avx2.c',
'<(libvpx_source)/vp9/encoder/x86/vp9_error_intrin_avx2.c',
'<(libvpx_source)/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c',
'<(libvpx_source)/vp9/encoder/x86/vp9_variance_avx2.c',
- '<(libvpx_source)/vp9/encoder/x86/vp9_variance_impl_intrin_avx2.c',
'<(libvpx_source)/vpx_dsp/x86/sad4d_avx2.c',
'<(libvpx_source)/vpx_dsp/x86/sad_avx2.c',
+ '<(libvpx_source)/vpx_dsp/x86/variance_avx2.c',
+ '<(libvpx_source)/vpx_dsp/x86/variance_impl_avx2.c',
],
'cflags': [ '-mavx2', ],
'xcode_settings': { 'OTHER_CFLAGS': [ '-mavx2' ] },
diff --git a/chromium/third_party/libvpx/libvpx_srcs_x86_intrinsics.gypi b/chromium/third_party/libvpx/libvpx_srcs_x86_intrinsics.gypi
index ecf9fb7e810..f8590f576ac 100644
--- a/chromium/third_party/libvpx/libvpx_srcs_x86_intrinsics.gypi
+++ b/chromium/third_party/libvpx/libvpx_srcs_x86_intrinsics.gypi
@@ -14,8 +14,9 @@
],
'sources': [
'<(libvpx_source)/vp8/common/x86/idct_blk_mmx.c',
- '<(libvpx_source)/vp8/common/x86/variance_mmx.c',
+ '<(libvpx_source)/vp8/common/x86/vp8_variance_mmx.c',
'<(libvpx_source)/vp8/encoder/x86/vp8_enc_stubs_mmx.c',
+ '<(libvpx_source)/vpx_dsp/x86/variance_mmx.c',
],
'cflags': [ '-mmmx', ],
'xcode_settings': { 'OTHER_CFLAGS': [ '-mmmx' ] },
@@ -30,19 +31,18 @@
'sources': [
'<(libvpx_source)/vp8/common/x86/idct_blk_sse2.c',
'<(libvpx_source)/vp8/common/x86/recon_wrapper_sse2.c',
- '<(libvpx_source)/vp8/common/x86/variance_sse2.c',
+ '<(libvpx_source)/vp8/common/x86/vp8_variance_sse2.c',
'<(libvpx_source)/vp8/encoder/x86/denoising_sse2.c',
'<(libvpx_source)/vp8/encoder/x86/quantize_sse2.c',
'<(libvpx_source)/vp8/encoder/x86/vp8_enc_stubs_sse2.c',
'<(libvpx_source)/vp9/common/x86/vp9_idct_intrin_sse2.c',
'<(libvpx_source)/vp9/common/x86/vp9_loopfilter_intrin_sse2.c',
'<(libvpx_source)/vp9/encoder/x86/vp9_avg_intrin_sse2.c',
- '<(libvpx_source)/vp9/encoder/x86/vp9_dct32x32_sse2.c',
- '<(libvpx_source)/vp9/encoder/x86/vp9_dct_impl_sse2.c',
'<(libvpx_source)/vp9/encoder/x86/vp9_dct_sse2.c',
'<(libvpx_source)/vp9/encoder/x86/vp9_denoiser_sse2.c',
'<(libvpx_source)/vp9/encoder/x86/vp9_quantize_sse2.c',
'<(libvpx_source)/vp9/encoder/x86/vp9_variance_sse2.c',
+ '<(libvpx_source)/vpx_dsp/x86/variance_sse2.c',
],
'cflags': [ '-msse2', ],
'xcode_settings': { 'OTHER_CFLAGS': [ '-msse2' ] },
@@ -104,14 +104,14 @@
'sources': [
'<(libvpx_source)/vp9/common/x86/vp9_loopfilter_intrin_avx2.c',
'<(libvpx_source)/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c',
- '<(libvpx_source)/vp9/encoder/x86/vp9_dct32x32_avx2.c',
'<(libvpx_source)/vp9/encoder/x86/vp9_dct_avx2.c',
'<(libvpx_source)/vp9/encoder/x86/vp9_error_intrin_avx2.c',
'<(libvpx_source)/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c',
'<(libvpx_source)/vp9/encoder/x86/vp9_variance_avx2.c',
- '<(libvpx_source)/vp9/encoder/x86/vp9_variance_impl_intrin_avx2.c',
'<(libvpx_source)/vpx_dsp/x86/sad4d_avx2.c',
'<(libvpx_source)/vpx_dsp/x86/sad_avx2.c',
+ '<(libvpx_source)/vpx_dsp/x86/variance_avx2.c',
+ '<(libvpx_source)/vpx_dsp/x86/variance_impl_avx2.c',
],
'cflags': [ '-mavx2', ],
'xcode_settings': { 'OTHER_CFLAGS': [ '-mavx2' ] },
diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vp8_rtcd.h b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vp8_rtcd.h
index 21caf9804bc..16cd808528c 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vp8_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vp8_rtcd.h
@@ -33,8 +33,7 @@ RTCD_EXTERN void (*vp8_bilinear_predict16x16)(unsigned char *src, int src_pitch,
void vp8_bilinear_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_bilinear_predict4x4_armv6(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
-void vp8_bilinear_predict4x4_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
-RTCD_EXTERN void (*vp8_bilinear_predict4x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
+#define vp8_bilinear_predict4x4 vp8_bilinear_predict4x4_armv6
void vp8_bilinear_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_bilinear_predict8x4_armv6(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
@@ -136,13 +135,6 @@ void vp8_filter_by_weight8x8_c(unsigned char *src, int src_stride, unsigned char
int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
#define vp8_full_search_sad vp8_full_search_sad_c
-unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
-unsigned int vp8_get4x4sse_cs_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
-RTCD_EXTERN unsigned int (*vp8_get4x4sse_cs)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
-
-unsigned int vp8_get_mb_ss_c(const short *);
-#define vp8_get_mb_ss vp8_get_mb_ss_c
-
void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
void vp8_intra4x4_predict_armv6(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
#define vp8_intra4x4_predict vp8_intra4x4_predict_armv6
@@ -199,11 +191,6 @@ void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols,in
int vp8_mbuverror_c(struct macroblock *mb);
#define vp8_mbuverror vp8_mbuverror_c
-unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_armv6(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_mse16x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-
void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
#define vp8_plane_add_noise vp8_plane_add_noise_c
@@ -263,9 +250,6 @@ void vp8_sixtap_predict8x8_armv6(unsigned char *src, int src_pitch, int xofst, i
void vp8_sixtap_predict8x8_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
-unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-#define vp8_sub_pixel_mse16x16 vp8_sub_pixel_mse16x16_c
-
unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
unsigned int vp8_sub_pixel_variance16x16_armv6(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
unsigned int vp8_sub_pixel_variance16x16_neon(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
@@ -282,8 +266,7 @@ unsigned int vp8_sub_pixel_variance8x16_c(const unsigned char *src_ptr, int so
unsigned int vp8_sub_pixel_variance8x8_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
unsigned int vp8_sub_pixel_variance8x8_armv6(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-unsigned int vp8_sub_pixel_variance8x8_neon(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_sub_pixel_variance8x8)(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
+#define vp8_sub_pixel_variance8x8 vp8_sub_pixel_variance8x8_armv6
void vp8_subtract_b_c(struct block *be, struct blockd *bd, int pitch);
void vp8_subtract_b_neon(struct block *be, struct blockd *bd, int pitch);
@@ -297,27 +280,6 @@ void vp8_subtract_mby_c(short *diff, unsigned char *src, int src_stride, unsigne
void vp8_subtract_mby_neon(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride);
RTCD_EXTERN void (*vp8_subtract_mby)(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride);
-unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_armv6(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance16x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x8_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance16x8)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance4x4 vp8_variance4x4_c
-
-unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x16_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance8x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_armv6(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance8x8)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-
unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
unsigned int vp8_variance_halfpixvar16x16_h_armv6(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
unsigned int vp8_variance_halfpixvar16x16_h_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
@@ -347,8 +309,6 @@ static void setup_rtcd_internal(void)
vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_armv6;
if (flags & HAS_NEON) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_neon;
- vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_armv6;
- if (flags & HAS_NEON) vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_neon;
vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_armv6;
if (flags & HAS_NEON) vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_neon;
vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_armv6;
@@ -379,8 +339,6 @@ static void setup_rtcd_internal(void)
if (flags & HAS_NEON) vp8_dequantize_b = vp8_dequantize_b_neon;
vp8_fast_quantize_b = vp8_fast_quantize_b_c;
if (flags & HAS_NEON) vp8_fast_quantize_b = vp8_fast_quantize_b_neon;
- vp8_get4x4sse_cs = vp8_get4x4sse_cs_c;
- if (flags & HAS_NEON) vp8_get4x4sse_cs = vp8_get4x4sse_cs_neon;
vp8_loop_filter_bh = vp8_loop_filter_bh_armv6;
if (flags & HAS_NEON) vp8_loop_filter_bh = vp8_loop_filter_bh_neon;
vp8_loop_filter_bv = vp8_loop_filter_bv_armv6;
@@ -397,8 +355,6 @@ static void setup_rtcd_internal(void)
if (flags & HAS_NEON) vp8_loop_filter_simple_mbh = vp8_loop_filter_mbhs_neon;
vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_armv6;
if (flags & HAS_NEON) vp8_loop_filter_simple_mbv = vp8_loop_filter_mbvs_neon;
- vp8_mse16x16 = vp8_mse16x16_armv6;
- if (flags & HAS_NEON) vp8_mse16x16 = vp8_mse16x16_neon;
vp8_short_fdct4x4 = vp8_short_fdct4x4_armv6;
if (flags & HAS_NEON) vp8_short_fdct4x4 = vp8_short_fdct4x4_neon;
vp8_short_fdct8x4 = vp8_short_fdct8x4_armv6;
@@ -417,22 +373,12 @@ static void setup_rtcd_internal(void)
if (flags & HAS_NEON) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_neon;
vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_armv6;
if (flags & HAS_NEON) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_neon;
- vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_armv6;
- if (flags & HAS_NEON) vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_neon;
vp8_subtract_b = vp8_subtract_b_c;
if (flags & HAS_NEON) vp8_subtract_b = vp8_subtract_b_neon;
vp8_subtract_mbuv = vp8_subtract_mbuv_c;
if (flags & HAS_NEON) vp8_subtract_mbuv = vp8_subtract_mbuv_neon;
vp8_subtract_mby = vp8_subtract_mby_c;
if (flags & HAS_NEON) vp8_subtract_mby = vp8_subtract_mby_neon;
- vp8_variance16x16 = vp8_variance16x16_armv6;
- if (flags & HAS_NEON) vp8_variance16x16 = vp8_variance16x16_neon;
- vp8_variance16x8 = vp8_variance16x8_c;
- if (flags & HAS_NEON) vp8_variance16x8 = vp8_variance16x8_neon;
- vp8_variance8x16 = vp8_variance8x16_c;
- if (flags & HAS_NEON) vp8_variance8x16 = vp8_variance8x16_neon;
- vp8_variance8x8 = vp8_variance8x8_armv6;
- if (flags & HAS_NEON) vp8_variance8x8 = vp8_variance8x8_neon;
vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_armv6;
if (flags & HAS_NEON) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_neon;
vp8_variance_halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_armv6;
diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h
index eefc3fd7a47..a4108d1f790 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h
@@ -93,7 +93,8 @@ void vp9_d135_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t
#define vp9_d135_predictor_32x32 vp9_d135_predictor_32x32_c
void vp9_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d135_predictor_4x4 vp9_d135_predictor_4x4_c
+void vp9_d135_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_d135_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_d135_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vp9_d135_predictor_8x8 vp9_d135_predictor_8x8_c
@@ -123,16 +124,19 @@ void vp9_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *a
#define vp9_d207_predictor_8x8 vp9_d207_predictor_8x8_c
void vp9_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d45_predictor_16x16 vp9_d45_predictor_16x16_c
+void vp9_d45_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_d45_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_d45_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vp9_d45_predictor_32x32 vp9_d45_predictor_32x32_c
void vp9_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d45_predictor_4x4 vp9_d45_predictor_4x4_c
+void vp9_d45_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_d45_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d45_predictor_8x8 vp9_d45_predictor_8x8_c
+void vp9_d45_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_d45_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vp9_d63_predictor_16x16 vp9_d63_predictor_16x16_c
@@ -147,52 +151,68 @@ void vp9_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *ab
#define vp9_d63_predictor_8x8 vp9_d63_predictor_8x8_c
void vp9_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_128_predictor_16x16 vp9_dc_128_predictor_16x16_c
+void vp9_dc_128_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_dc_128_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_128_predictor_32x32 vp9_dc_128_predictor_32x32_c
+void vp9_dc_128_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_dc_128_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_128_predictor_4x4 vp9_dc_128_predictor_4x4_c
+void vp9_dc_128_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_dc_128_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_128_predictor_8x8 vp9_dc_128_predictor_8x8_c
+void vp9_dc_128_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_dc_128_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_left_predictor_16x16 vp9_dc_left_predictor_16x16_c
+void vp9_dc_left_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_dc_left_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_left_predictor_32x32 vp9_dc_left_predictor_32x32_c
+void vp9_dc_left_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_dc_left_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_left_predictor_4x4 vp9_dc_left_predictor_4x4_c
+void vp9_dc_left_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_dc_left_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_left_predictor_8x8 vp9_dc_left_predictor_8x8_c
+void vp9_dc_left_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_dc_left_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_predictor_16x16 vp9_dc_predictor_16x16_c
+void vp9_dc_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_dc_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_predictor_32x32 vp9_dc_predictor_32x32_c
+void vp9_dc_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_dc_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_predictor_4x4 vp9_dc_predictor_4x4_c
+void vp9_dc_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_dc_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_predictor_8x8 vp9_dc_predictor_8x8_c
+void vp9_dc_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_dc_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_top_predictor_16x16 vp9_dc_top_predictor_16x16_c
+void vp9_dc_top_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_dc_top_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_top_predictor_32x32 vp9_dc_top_predictor_32x32_c
+void vp9_dc_top_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_dc_top_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_top_predictor_4x4 vp9_dc_top_predictor_4x4_c
+void vp9_dc_top_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_dc_top_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_c
+void vp9_dc_top_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_dc_top_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
int vp9_denoiser_filter_c(const uint8_t *sig, int sig_stride, const uint8_t *mc_avg, int mc_avg_stride, uint8_t *avg, int avg_stride, int increase_denoising, BLOCK_SIZE bs, int motion_magnitude);
#define vp9_denoiser_filter vp9_denoiser_filter_c
@@ -257,17 +277,6 @@ int vp9_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, i
void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
#define vp9_fwht4x4 vp9_fwht4x4_c
-void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get16x16var_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-RTCD_EXTERN void (*vp9_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-
-void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get8x8var_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-RTCD_EXTERN void (*vp9_get8x8var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-
-unsigned int vp9_get_mb_ss_c(const int16_t *);
-#define vp9_get_mb_ss vp9_get_mb_ss_c
-
void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vp9_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
@@ -410,18 +419,6 @@ void vp9_mbpost_proc_down_c(uint8_t *dst, int pitch, int rows, int cols, int fli
void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max);
#define vp9_minmax_8x8 vp9_minmax_8x8_c
-unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse16x16 vp9_mse16x16_c
-
-unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse16x8 vp9_mse16x8_c
-
-unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse8x16 vp9_mse8x16_c
-
-unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse8x8 vp9_mse8x8_c
-
void vp9_plane_add_noise_c(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
#define vp9_plane_add_noise vp9_plane_add_noise_c
@@ -565,51 +562,6 @@ void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov
void vp9_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vp9_v_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x16_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x32 vp9_variance16x32_c
-
-unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x8 vp9_variance16x8_c
-
-unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x16 vp9_variance32x16_c
-
-unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x32_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x64_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x4 vp9_variance4x4_c
-
-unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x8 vp9_variance4x8_c
-
-unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x32_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x64_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x16 vp9_variance8x16_c
-
-unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x4 vp9_variance8x4_c
-
-unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x8_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl);
#define vp9_vector_var vp9_vector_var_c
@@ -643,16 +595,52 @@ static void setup_rtcd_internal(void)
if (flags & HAS_NEON) vp9_convolve_avg = vp9_convolve_avg_neon;
vp9_convolve_copy = vp9_convolve_copy_c;
if (flags & HAS_NEON) vp9_convolve_copy = vp9_convolve_copy_neon;
+ vp9_d135_predictor_4x4 = vp9_d135_predictor_4x4_c;
+ if (flags & HAS_NEON) vp9_d135_predictor_4x4 = vp9_d135_predictor_4x4_neon;
+ vp9_d45_predictor_16x16 = vp9_d45_predictor_16x16_c;
+ if (flags & HAS_NEON) vp9_d45_predictor_16x16 = vp9_d45_predictor_16x16_neon;
+ vp9_d45_predictor_4x4 = vp9_d45_predictor_4x4_c;
+ if (flags & HAS_NEON) vp9_d45_predictor_4x4 = vp9_d45_predictor_4x4_neon;
+ vp9_d45_predictor_8x8 = vp9_d45_predictor_8x8_c;
+ if (flags & HAS_NEON) vp9_d45_predictor_8x8 = vp9_d45_predictor_8x8_neon;
+ vp9_dc_128_predictor_16x16 = vp9_dc_128_predictor_16x16_c;
+ if (flags & HAS_NEON) vp9_dc_128_predictor_16x16 = vp9_dc_128_predictor_16x16_neon;
+ vp9_dc_128_predictor_32x32 = vp9_dc_128_predictor_32x32_c;
+ if (flags & HAS_NEON) vp9_dc_128_predictor_32x32 = vp9_dc_128_predictor_32x32_neon;
+ vp9_dc_128_predictor_4x4 = vp9_dc_128_predictor_4x4_c;
+ if (flags & HAS_NEON) vp9_dc_128_predictor_4x4 = vp9_dc_128_predictor_4x4_neon;
+ vp9_dc_128_predictor_8x8 = vp9_dc_128_predictor_8x8_c;
+ if (flags & HAS_NEON) vp9_dc_128_predictor_8x8 = vp9_dc_128_predictor_8x8_neon;
+ vp9_dc_left_predictor_16x16 = vp9_dc_left_predictor_16x16_c;
+ if (flags & HAS_NEON) vp9_dc_left_predictor_16x16 = vp9_dc_left_predictor_16x16_neon;
+ vp9_dc_left_predictor_32x32 = vp9_dc_left_predictor_32x32_c;
+ if (flags & HAS_NEON) vp9_dc_left_predictor_32x32 = vp9_dc_left_predictor_32x32_neon;
+ vp9_dc_left_predictor_4x4 = vp9_dc_left_predictor_4x4_c;
+ if (flags & HAS_NEON) vp9_dc_left_predictor_4x4 = vp9_dc_left_predictor_4x4_neon;
+ vp9_dc_left_predictor_8x8 = vp9_dc_left_predictor_8x8_c;
+ if (flags & HAS_NEON) vp9_dc_left_predictor_8x8 = vp9_dc_left_predictor_8x8_neon;
+ vp9_dc_predictor_16x16 = vp9_dc_predictor_16x16_c;
+ if (flags & HAS_NEON) vp9_dc_predictor_16x16 = vp9_dc_predictor_16x16_neon;
+ vp9_dc_predictor_32x32 = vp9_dc_predictor_32x32_c;
+ if (flags & HAS_NEON) vp9_dc_predictor_32x32 = vp9_dc_predictor_32x32_neon;
+ vp9_dc_predictor_4x4 = vp9_dc_predictor_4x4_c;
+ if (flags & HAS_NEON) vp9_dc_predictor_4x4 = vp9_dc_predictor_4x4_neon;
+ vp9_dc_predictor_8x8 = vp9_dc_predictor_8x8_c;
+ if (flags & HAS_NEON) vp9_dc_predictor_8x8 = vp9_dc_predictor_8x8_neon;
+ vp9_dc_top_predictor_16x16 = vp9_dc_top_predictor_16x16_c;
+ if (flags & HAS_NEON) vp9_dc_top_predictor_16x16 = vp9_dc_top_predictor_16x16_neon;
+ vp9_dc_top_predictor_32x32 = vp9_dc_top_predictor_32x32_c;
+ if (flags & HAS_NEON) vp9_dc_top_predictor_32x32 = vp9_dc_top_predictor_32x32_neon;
+ vp9_dc_top_predictor_4x4 = vp9_dc_top_predictor_4x4_c;
+ if (flags & HAS_NEON) vp9_dc_top_predictor_4x4 = vp9_dc_top_predictor_4x4_neon;
+ vp9_dc_top_predictor_8x8 = vp9_dc_top_predictor_8x8_c;
+ if (flags & HAS_NEON) vp9_dc_top_predictor_8x8 = vp9_dc_top_predictor_8x8_neon;
vp9_fdct8x8 = vp9_fdct8x8_c;
if (flags & HAS_NEON) vp9_fdct8x8 = vp9_fdct8x8_neon;
vp9_fdct8x8_1 = vp9_fdct8x8_1_c;
if (flags & HAS_NEON) vp9_fdct8x8_1 = vp9_fdct8x8_1_neon;
vp9_fdct8x8_quant = vp9_fdct8x8_quant_c;
if (flags & HAS_NEON) vp9_fdct8x8_quant = vp9_fdct8x8_quant_neon;
- vp9_get16x16var = vp9_get16x16var_c;
- if (flags & HAS_NEON) vp9_get16x16var = vp9_get16x16var_neon;
- vp9_get8x8var = vp9_get8x8var_c;
- if (flags & HAS_NEON) vp9_get8x8var = vp9_get8x8var_neon;
vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c;
if (flags & HAS_NEON) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_neon;
vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c;
@@ -737,18 +725,6 @@ static void setup_rtcd_internal(void)
if (flags & HAS_NEON) vp9_v_predictor_4x4 = vp9_v_predictor_4x4_neon;
vp9_v_predictor_8x8 = vp9_v_predictor_8x8_c;
if (flags & HAS_NEON) vp9_v_predictor_8x8 = vp9_v_predictor_8x8_neon;
- vp9_variance16x16 = vp9_variance16x16_c;
- if (flags & HAS_NEON) vp9_variance16x16 = vp9_variance16x16_neon;
- vp9_variance32x32 = vp9_variance32x32_c;
- if (flags & HAS_NEON) vp9_variance32x32 = vp9_variance32x32_neon;
- vp9_variance32x64 = vp9_variance32x64_c;
- if (flags & HAS_NEON) vp9_variance32x64 = vp9_variance32x64_neon;
- vp9_variance64x32 = vp9_variance64x32_c;
- if (flags & HAS_NEON) vp9_variance64x32 = vp9_variance64x32_neon;
- vp9_variance64x64 = vp9_variance64x64_c;
- if (flags & HAS_NEON) vp9_variance64x64 = vp9_variance64x64_neon;
- vp9_variance8x8 = vp9_variance8x8_c;
- if (flags & HAS_NEON) vp9_variance8x8 = vp9_variance8x8_neon;
}
#endif
diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.asm
index 10c565ed61b..2fcb7d25b85 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.asm
@@ -23,7 +23,6 @@
.equ HAVE_AVX2 , 0
.equ HAVE_VPX_PORTS , 1
.equ HAVE_STDINT_H , 1
-.equ HAVE_ALT_TREE_LAYOUT , 0
.equ HAVE_PTHREAD_H , 1
.equ HAVE_SYS_MMAN_H , 1
.equ HAVE_UNISTD_H , 0
diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.c b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.c
index 9b5a0ea3583..721ab0ae421 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.c
+++ b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.c
@@ -5,5 +5,6 @@
/* tree. An additional intellectual property rights grant can be found */
/* in the file PATENTS. All contributing project authors may */
/* be found in the AUTHORS file in the root of the source tree. */
+#include "vpx/vpx_codec.h"
static const char* const cfg = "--target=armv7-linux-gcc --enable-pic --enable-realtime-only --enable-runtime-cpu-detect --disable-edsp --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --enable-vp9-temporal-denoising --enable-vp9-postproc --size-limit=16384x16384";
const char *vpx_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.h
index 3fb03579215..a1a3dcd04a7 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_config.h
@@ -32,7 +32,6 @@
#define HAVE_AVX2 0
#define HAVE_VPX_PORTS 1
#define HAVE_STDINT_H 1
-#define HAVE_ALT_TREE_LAYOUT 0
#define HAVE_PTHREAD_H 1
#define HAVE_SYS_MMAN_H 1
#define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_dsp_rtcd.h
index 5d3e9df812b..f926cf29a59 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vpx_dsp_rtcd.h
@@ -18,6 +18,38 @@
extern "C" {
#endif
+void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride);
+#define vpx_comp_avg_pred vpx_comp_avg_pred_c
+
+void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get16x16var_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+RTCD_EXTERN void (*vpx_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+
+unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
+unsigned int vpx_get4x4sse_cs_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
+RTCD_EXTERN unsigned int (*vpx_get4x4sse_cs)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
+
+void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get8x8var_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+RTCD_EXTERN void (*vpx_get8x8var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+
+unsigned int vpx_get_mb_ss_c(const int16_t *);
+#define vpx_get_mb_ss vpx_get_mb_ss_c
+
+unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_media(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+
+unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse16x8 vpx_mse16x8_c
+
+unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse8x16 vpx_mse8x16_c
+
+unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse8x8 vpx_mse8x8_c
+
unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad16x16_media(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad16x16_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
@@ -194,6 +226,55 @@ void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * con
void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
#define vpx_sad8x8x8 vpx_sad8x8x8_c
+unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_media(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x32 vpx_variance16x32_c
+
+unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x8_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x16 vpx_variance32x16_c
+
+unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x32_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x64_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x4 vpx_variance4x4_c
+
+unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x8 vpx_variance4x8_c
+
+unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x32_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x64_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x16_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x4 vpx_variance8x4_c
+
+unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_media(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
void vpx_dsp_rtcd(void);
#include "vpx_config.h"
@@ -206,6 +287,14 @@ static void setup_rtcd_internal(void)
(void)flags;
+ vpx_get16x16var = vpx_get16x16var_c;
+ if (flags & HAS_NEON) vpx_get16x16var = vpx_get16x16var_neon;
+ vpx_get4x4sse_cs = vpx_get4x4sse_cs_c;
+ if (flags & HAS_NEON) vpx_get4x4sse_cs = vpx_get4x4sse_cs_neon;
+ vpx_get8x8var = vpx_get8x8var_c;
+ if (flags & HAS_NEON) vpx_get8x8var = vpx_get8x8var_neon;
+ vpx_mse16x16 = vpx_mse16x16_media;
+ if (flags & HAS_NEON) vpx_mse16x16 = vpx_mse16x16_neon;
vpx_sad16x16 = vpx_sad16x16_media;
if (flags & HAS_NEON) vpx_sad16x16 = vpx_sad16x16_neon;
vpx_sad16x16x4d = vpx_sad16x16x4d_c;
@@ -226,6 +315,22 @@ static void setup_rtcd_internal(void)
if (flags & HAS_NEON) vpx_sad8x16 = vpx_sad8x16_neon;
vpx_sad8x8 = vpx_sad8x8_c;
if (flags & HAS_NEON) vpx_sad8x8 = vpx_sad8x8_neon;
+ vpx_variance16x16 = vpx_variance16x16_media;
+ if (flags & HAS_NEON) vpx_variance16x16 = vpx_variance16x16_neon;
+ vpx_variance16x8 = vpx_variance16x8_c;
+ if (flags & HAS_NEON) vpx_variance16x8 = vpx_variance16x8_neon;
+ vpx_variance32x32 = vpx_variance32x32_c;
+ if (flags & HAS_NEON) vpx_variance32x32 = vpx_variance32x32_neon;
+ vpx_variance32x64 = vpx_variance32x64_c;
+ if (flags & HAS_NEON) vpx_variance32x64 = vpx_variance32x64_neon;
+ vpx_variance64x32 = vpx_variance64x32_c;
+ if (flags & HAS_NEON) vpx_variance64x32 = vpx_variance64x32_neon;
+ vpx_variance64x64 = vpx_variance64x64_c;
+ if (flags & HAS_NEON) vpx_variance64x64 = vpx_variance64x64_neon;
+ vpx_variance8x16 = vpx_variance8x16_c;
+ if (flags & HAS_NEON) vpx_variance8x16 = vpx_variance8x16_neon;
+ vpx_variance8x8 = vpx_variance8x8_media;
+ if (flags & HAS_NEON) vpx_variance8x8 = vpx_variance8x8_neon;
}
#endif
diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon/vp8_rtcd.h b/chromium/third_party/libvpx/source/config/linux/arm-neon/vp8_rtcd.h
index 8710a5784d1..a7a593501ad 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm-neon/vp8_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm-neon/vp8_rtcd.h
@@ -33,8 +33,7 @@ void vp8_bilinear_predict16x16_neon(unsigned char *src, int src_pitch, int xofst
void vp8_bilinear_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_bilinear_predict4x4_armv6(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
-void vp8_bilinear_predict4x4_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
-#define vp8_bilinear_predict4x4 vp8_bilinear_predict4x4_neon
+#define vp8_bilinear_predict4x4 vp8_bilinear_predict4x4_armv6
void vp8_bilinear_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_bilinear_predict8x4_armv6(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
@@ -136,13 +135,6 @@ void vp8_filter_by_weight8x8_c(unsigned char *src, int src_stride, unsigned char
int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
#define vp8_full_search_sad vp8_full_search_sad_c
-unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
-unsigned int vp8_get4x4sse_cs_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
-#define vp8_get4x4sse_cs vp8_get4x4sse_cs_neon
-
-unsigned int vp8_get_mb_ss_c(const short *);
-#define vp8_get_mb_ss vp8_get_mb_ss_c
-
void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
void vp8_intra4x4_predict_armv6(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
#define vp8_intra4x4_predict vp8_intra4x4_predict_armv6
@@ -199,11 +191,6 @@ void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols,in
int vp8_mbuverror_c(struct macroblock *mb);
#define vp8_mbuverror vp8_mbuverror_c
-unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_armv6(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_mse16x16 vp8_mse16x16_neon
-
void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
#define vp8_plane_add_noise vp8_plane_add_noise_c
@@ -263,9 +250,6 @@ void vp8_sixtap_predict8x8_armv6(unsigned char *src, int src_pitch, int xofst, i
void vp8_sixtap_predict8x8_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
#define vp8_sixtap_predict8x8 vp8_sixtap_predict8x8_neon
-unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-#define vp8_sub_pixel_mse16x16 vp8_sub_pixel_mse16x16_c
-
unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
unsigned int vp8_sub_pixel_variance16x16_armv6(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
unsigned int vp8_sub_pixel_variance16x16_neon(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
@@ -282,8 +266,7 @@ unsigned int vp8_sub_pixel_variance8x16_c(const unsigned char *src_ptr, int so
unsigned int vp8_sub_pixel_variance8x8_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
unsigned int vp8_sub_pixel_variance8x8_armv6(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-unsigned int vp8_sub_pixel_variance8x8_neon(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-#define vp8_sub_pixel_variance8x8 vp8_sub_pixel_variance8x8_neon
+#define vp8_sub_pixel_variance8x8 vp8_sub_pixel_variance8x8_armv6
void vp8_subtract_b_c(struct block *be, struct blockd *bd, int pitch);
void vp8_subtract_b_neon(struct block *be, struct blockd *bd, int pitch);
@@ -297,27 +280,6 @@ void vp8_subtract_mby_c(short *diff, unsigned char *src, int src_stride, unsigne
void vp8_subtract_mby_neon(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride);
#define vp8_subtract_mby vp8_subtract_mby_neon
-unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_armv6(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance16x16 vp8_variance16x16_neon
-
-unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x8_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance16x8 vp8_variance16x8_neon
-
-unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance4x4 vp8_variance4x4_c
-
-unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x16_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance8x16 vp8_variance8x16_neon
-
-unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_armv6(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance8x8 vp8_variance8x8_neon
-
unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
unsigned int vp8_variance_halfpixvar16x16_h_armv6(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
unsigned int vp8_variance_halfpixvar16x16_h_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/linux/arm-neon/vp9_rtcd.h
index 9e445d5476f..8cc98e853c4 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm-neon/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm-neon/vp9_rtcd.h
@@ -93,7 +93,8 @@ void vp9_d135_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t
#define vp9_d135_predictor_32x32 vp9_d135_predictor_32x32_c
void vp9_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d135_predictor_4x4 vp9_d135_predictor_4x4_c
+void vp9_d135_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d135_predictor_4x4 vp9_d135_predictor_4x4_neon
void vp9_d135_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vp9_d135_predictor_8x8 vp9_d135_predictor_8x8_c
@@ -123,16 +124,19 @@ void vp9_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *a
#define vp9_d207_predictor_8x8 vp9_d207_predictor_8x8_c
void vp9_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d45_predictor_16x16 vp9_d45_predictor_16x16_c
+void vp9_d45_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d45_predictor_16x16 vp9_d45_predictor_16x16_neon
void vp9_d45_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vp9_d45_predictor_32x32 vp9_d45_predictor_32x32_c
void vp9_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d45_predictor_4x4 vp9_d45_predictor_4x4_c
+void vp9_d45_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d45_predictor_4x4 vp9_d45_predictor_4x4_neon
void vp9_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d45_predictor_8x8 vp9_d45_predictor_8x8_c
+void vp9_d45_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d45_predictor_8x8 vp9_d45_predictor_8x8_neon
void vp9_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vp9_d63_predictor_16x16 vp9_d63_predictor_16x16_c
@@ -147,52 +151,68 @@ void vp9_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *ab
#define vp9_d63_predictor_8x8 vp9_d63_predictor_8x8_c
void vp9_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_128_predictor_16x16 vp9_dc_128_predictor_16x16_c
+void vp9_dc_128_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_128_predictor_16x16 vp9_dc_128_predictor_16x16_neon
void vp9_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_128_predictor_32x32 vp9_dc_128_predictor_32x32_c
+void vp9_dc_128_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_128_predictor_32x32 vp9_dc_128_predictor_32x32_neon
void vp9_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_128_predictor_4x4 vp9_dc_128_predictor_4x4_c
+void vp9_dc_128_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_128_predictor_4x4 vp9_dc_128_predictor_4x4_neon
void vp9_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_128_predictor_8x8 vp9_dc_128_predictor_8x8_c
+void vp9_dc_128_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_128_predictor_8x8 vp9_dc_128_predictor_8x8_neon
void vp9_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_left_predictor_16x16 vp9_dc_left_predictor_16x16_c
+void vp9_dc_left_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_left_predictor_16x16 vp9_dc_left_predictor_16x16_neon
void vp9_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_left_predictor_32x32 vp9_dc_left_predictor_32x32_c
+void vp9_dc_left_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_left_predictor_32x32 vp9_dc_left_predictor_32x32_neon
void vp9_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_left_predictor_4x4 vp9_dc_left_predictor_4x4_c
+void vp9_dc_left_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_left_predictor_4x4 vp9_dc_left_predictor_4x4_neon
void vp9_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_left_predictor_8x8 vp9_dc_left_predictor_8x8_c
+void vp9_dc_left_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_left_predictor_8x8 vp9_dc_left_predictor_8x8_neon
void vp9_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_predictor_16x16 vp9_dc_predictor_16x16_c
+void vp9_dc_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_predictor_16x16 vp9_dc_predictor_16x16_neon
void vp9_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_predictor_32x32 vp9_dc_predictor_32x32_c
+void vp9_dc_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_predictor_32x32 vp9_dc_predictor_32x32_neon
void vp9_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_predictor_4x4 vp9_dc_predictor_4x4_c
+void vp9_dc_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_predictor_4x4 vp9_dc_predictor_4x4_neon
void vp9_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_predictor_8x8 vp9_dc_predictor_8x8_c
+void vp9_dc_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_predictor_8x8 vp9_dc_predictor_8x8_neon
void vp9_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_top_predictor_16x16 vp9_dc_top_predictor_16x16_c
+void vp9_dc_top_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_top_predictor_16x16 vp9_dc_top_predictor_16x16_neon
void vp9_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_top_predictor_32x32 vp9_dc_top_predictor_32x32_c
+void vp9_dc_top_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_top_predictor_32x32 vp9_dc_top_predictor_32x32_neon
void vp9_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_top_predictor_4x4 vp9_dc_top_predictor_4x4_c
+void vp9_dc_top_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_top_predictor_4x4 vp9_dc_top_predictor_4x4_neon
void vp9_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_c
+void vp9_dc_top_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_neon
int vp9_denoiser_filter_c(const uint8_t *sig, int sig_stride, const uint8_t *mc_avg, int mc_avg_stride, uint8_t *avg, int avg_stride, int increase_denoising, BLOCK_SIZE bs, int motion_magnitude);
#define vp9_denoiser_filter vp9_denoiser_filter_c
@@ -257,17 +277,6 @@ int vp9_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, i
void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
#define vp9_fwht4x4 vp9_fwht4x4_c
-void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get16x16var_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get16x16var vp9_get16x16var_neon
-
-void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get8x8var_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get8x8var vp9_get8x8var_neon
-
-unsigned int vp9_get_mb_ss_c(const int16_t *);
-#define vp9_get_mb_ss vp9_get_mb_ss_c
-
void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vp9_h_predictor_16x16 vp9_h_predictor_16x16_neon
@@ -410,18 +419,6 @@ void vp9_mbpost_proc_down_c(uint8_t *dst, int pitch, int rows, int cols, int fli
void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max);
#define vp9_minmax_8x8 vp9_minmax_8x8_c
-unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse16x16 vp9_mse16x16_c
-
-unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse16x8 vp9_mse16x8_c
-
-unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse8x16 vp9_mse8x16_c
-
-unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse8x8 vp9_mse8x8_c
-
void vp9_plane_add_noise_c(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
#define vp9_plane_add_noise vp9_plane_add_noise_c
@@ -565,51 +562,6 @@ void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov
void vp9_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vp9_v_predictor_8x8 vp9_v_predictor_8x8_neon
-unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x16_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x16 vp9_variance16x16_neon
-
-unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x32 vp9_variance16x32_c
-
-unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x8 vp9_variance16x8_c
-
-unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x16 vp9_variance32x16_c
-
-unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x32_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x32 vp9_variance32x32_neon
-
-unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x64_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x64 vp9_variance32x64_neon
-
-unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x4 vp9_variance4x4_c
-
-unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x8 vp9_variance4x8_c
-
-unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x32_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance64x32 vp9_variance64x32_neon
-
-unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x64_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance64x64 vp9_variance64x64_neon
-
-unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x16 vp9_variance8x16_c
-
-unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x4 vp9_variance8x4_c
-
-unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x8_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x8 vp9_variance8x8_neon
-
int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl);
#define vp9_vector_var vp9_vector_var_c
diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.asm
index fb523269888..b1043e656d9 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.asm
@@ -23,7 +23,6 @@
.equ HAVE_AVX2 , 0
.equ HAVE_VPX_PORTS , 1
.equ HAVE_STDINT_H , 1
-.equ HAVE_ALT_TREE_LAYOUT , 0
.equ HAVE_PTHREAD_H , 1
.equ HAVE_SYS_MMAN_H , 1
.equ HAVE_UNISTD_H , 0
diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.c b/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.c
index 58a32f0238e..e05d6991036 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.c
+++ b/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.c
@@ -5,5 +5,6 @@
/* tree. An additional intellectual property rights grant can be found */
/* in the file PATENTS. All contributing project authors may */
/* be found in the AUTHORS file in the root of the source tree. */
+#include "vpx/vpx_codec.h"
static const char* const cfg = "--target=armv7-linux-gcc --enable-pic --enable-realtime-only --disable-edsp --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --enable-vp9-temporal-denoising --enable-vp9-postproc --size-limit=16384x16384";
const char *vpx_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.h
index 477014ee9c6..4aaf47ba413 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_config.h
@@ -32,7 +32,6 @@
#define HAVE_AVX2 0
#define HAVE_VPX_PORTS 1
#define HAVE_STDINT_H 1
-#define HAVE_ALT_TREE_LAYOUT 0
#define HAVE_PTHREAD_H 1
#define HAVE_SYS_MMAN_H 1
#define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_dsp_rtcd.h
index 955c7640f79..82aa34db379 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm-neon/vpx_dsp_rtcd.h
@@ -18,6 +18,38 @@
extern "C" {
#endif
+void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride);
+#define vpx_comp_avg_pred vpx_comp_avg_pred_c
+
+void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get16x16var_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vpx_get16x16var vpx_get16x16var_neon
+
+unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
+unsigned int vpx_get4x4sse_cs_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
+#define vpx_get4x4sse_cs vpx_get4x4sse_cs_neon
+
+void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get8x8var_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vpx_get8x8var vpx_get8x8var_neon
+
+unsigned int vpx_get_mb_ss_c(const int16_t *);
+#define vpx_get_mb_ss vpx_get_mb_ss_c
+
+unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_media(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse16x16 vpx_mse16x16_neon
+
+unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse16x8 vpx_mse16x8_c
+
+unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse8x16 vpx_mse8x16_c
+
+unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse8x8 vpx_mse8x8_c
+
unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad16x16_media(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad16x16_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
@@ -194,6 +226,55 @@ void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * con
void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
#define vpx_sad8x8x8 vpx_sad8x8x8_c
+unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_media(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x16 vpx_variance16x16_neon
+
+unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x32 vpx_variance16x32_c
+
+unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x8_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x8 vpx_variance16x8_neon
+
+unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x16 vpx_variance32x16_c
+
+unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x32_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x32 vpx_variance32x32_neon
+
+unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x64_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x64 vpx_variance32x64_neon
+
+unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x4 vpx_variance4x4_c
+
+unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x8 vpx_variance4x8_c
+
+unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x32_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance64x32 vpx_variance64x32_neon
+
+unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x64_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance64x64 vpx_variance64x64_neon
+
+unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x16_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x16 vpx_variance8x16_neon
+
+unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x4 vpx_variance8x4_c
+
+unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_media(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x8 vpx_variance8x8_neon
+
void vpx_dsp_rtcd(void);
#include "vpx_config.h"
diff --git a/chromium/third_party/libvpx/source/config/linux/arm/vp8_rtcd.h b/chromium/third_party/libvpx/source/config/linux/arm/vp8_rtcd.h
index 37e4a3529d3..6d28eb60d2f 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm/vp8_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm/vp8_rtcd.h
@@ -119,12 +119,6 @@ void vp8_filter_by_weight8x8_c(unsigned char *src, int src_stride, unsigned char
int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
#define vp8_full_search_sad vp8_full_search_sad_c
-unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
-#define vp8_get4x4sse_cs vp8_get4x4sse_cs_c
-
-unsigned int vp8_get_mb_ss_c(const short *);
-#define vp8_get_mb_ss vp8_get_mb_ss_c
-
void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
void vp8_intra4x4_predict_armv6(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
#define vp8_intra4x4_predict vp8_intra4x4_predict_armv6
@@ -173,10 +167,6 @@ void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols,in
int vp8_mbuverror_c(struct macroblock *mb);
#define vp8_mbuverror vp8_mbuverror_c
-unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_armv6(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_mse16x16 vp8_mse16x16_armv6
-
void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
#define vp8_plane_add_noise vp8_plane_add_noise_c
@@ -228,9 +218,6 @@ void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int y
void vp8_sixtap_predict8x8_armv6(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
#define vp8_sixtap_predict8x8 vp8_sixtap_predict8x8_armv6
-unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-#define vp8_sub_pixel_mse16x16 vp8_sub_pixel_mse16x16_c
-
unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
unsigned int vp8_sub_pixel_variance16x16_armv6(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
#define vp8_sub_pixel_variance16x16 vp8_sub_pixel_variance16x16_armv6
@@ -257,23 +244,6 @@ void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc,
void vp8_subtract_mby_c(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride);
#define vp8_subtract_mby vp8_subtract_mby_c
-unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_armv6(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance16x16 vp8_variance16x16_armv6
-
-unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance16x8 vp8_variance16x8_c
-
-unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance4x4 vp8_variance4x4_c
-
-unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance8x16 vp8_variance8x16_c
-
-unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_armv6(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance8x8 vp8_variance8x8_armv6
-
unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
unsigned int vp8_variance_halfpixvar16x16_h_armv6(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_armv6
diff --git a/chromium/third_party/libvpx/source/config/linux/arm/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/linux/arm/vp9_rtcd.h
index fcca3063998..d1974a6f368 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm/vp9_rtcd.h
@@ -245,15 +245,6 @@ int vp9_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, i
void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
#define vp9_fwht4x4 vp9_fwht4x4_c
-void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get16x16var vp9_get16x16var_c
-
-void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get8x8var vp9_get8x8var_c
-
-unsigned int vp9_get_mb_ss_c(const int16_t *);
-#define vp9_get_mb_ss vp9_get_mb_ss_c
-
void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vp9_h_predictor_16x16 vp9_h_predictor_16x16_c
@@ -368,18 +359,6 @@ void vp9_mbpost_proc_down_c(uint8_t *dst, int pitch, int rows, int cols, int fli
void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max);
#define vp9_minmax_8x8 vp9_minmax_8x8_c
-unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse16x16 vp9_mse16x16_c
-
-unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse16x8 vp9_mse16x8_c
-
-unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse8x16 vp9_mse8x16_c
-
-unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse8x8 vp9_mse8x8_c
-
void vp9_plane_add_noise_c(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
#define vp9_plane_add_noise vp9_plane_add_noise_c
@@ -509,45 +488,6 @@ void vp9_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov
void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vp9_v_predictor_8x8 vp9_v_predictor_8x8_c
-unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x16 vp9_variance16x16_c
-
-unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x32 vp9_variance16x32_c
-
-unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x8 vp9_variance16x8_c
-
-unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x16 vp9_variance32x16_c
-
-unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x32 vp9_variance32x32_c
-
-unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x64 vp9_variance32x64_c
-
-unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x4 vp9_variance4x4_c
-
-unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x8 vp9_variance4x8_c
-
-unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance64x32 vp9_variance64x32_c
-
-unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance64x64 vp9_variance64x64_c
-
-unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x16 vp9_variance8x16_c
-
-unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x4 vp9_variance8x4_c
-
-unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x8 vp9_variance8x8_c
-
int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl);
#define vp9_vector_var vp9_vector_var_c
diff --git a/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.asm
index cf7330af874..c0e9fb24ff9 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.asm
@@ -23,7 +23,6 @@
.equ HAVE_AVX2 , 0
.equ HAVE_VPX_PORTS , 1
.equ HAVE_STDINT_H , 1
-.equ HAVE_ALT_TREE_LAYOUT , 0
.equ HAVE_PTHREAD_H , 1
.equ HAVE_SYS_MMAN_H , 1
.equ HAVE_UNISTD_H , 0
diff --git a/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.c b/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.c
index 336b29cfd0c..0b06ef9adf1 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.c
+++ b/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.c
@@ -5,5 +5,6 @@
/* tree. An additional intellectual property rights grant can be found */
/* in the file PATENTS. All contributing project authors may */
/* be found in the AUTHORS file in the root of the source tree. */
+#include "vpx/vpx_codec.h"
static const char* const cfg = "--target=armv6-linux-gcc --enable-pic --enable-realtime-only --disable-install-bins --disable-install-libs --disable-edsp --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --enable-vp9-temporal-denoising --enable-vp9-postproc --size-limit=16384x16384";
const char *vpx_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.h
index 99f4a7cea0a..0328467f029 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm/vpx_config.h
@@ -32,7 +32,6 @@
#define HAVE_AVX2 0
#define HAVE_VPX_PORTS 1
#define HAVE_STDINT_H 1
-#define HAVE_ALT_TREE_LAYOUT 0
#define HAVE_PTHREAD_H 1
#define HAVE_SYS_MMAN_H 1
#define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/linux/arm/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/linux/arm/vpx_dsp_rtcd.h
index 20948755101..ea77e14aa19 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm/vpx_dsp_rtcd.h
@@ -18,6 +18,34 @@
extern "C" {
#endif
+void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride);
+#define vpx_comp_avg_pred vpx_comp_avg_pred_c
+
+void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vpx_get16x16var vpx_get16x16var_c
+
+unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
+#define vpx_get4x4sse_cs vpx_get4x4sse_cs_c
+
+void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vpx_get8x8var vpx_get8x8var_c
+
+unsigned int vpx_get_mb_ss_c(const int16_t *);
+#define vpx_get_mb_ss vpx_get_mb_ss_c
+
+unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_media(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse16x16 vpx_mse16x16_media
+
+unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse16x8 vpx_mse16x8_c
+
+unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse8x16 vpx_mse8x16_c
+
+unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse8x8 vpx_mse8x8_c
+
unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad16x16_media(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad16x16 vpx_sad16x16_media
@@ -184,6 +212,47 @@ void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * con
void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
#define vpx_sad8x8x8 vpx_sad8x8x8_c
+unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_media(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x16 vpx_variance16x16_media
+
+unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x32 vpx_variance16x32_c
+
+unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x8 vpx_variance16x8_c
+
+unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x16 vpx_variance32x16_c
+
+unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x32 vpx_variance32x32_c
+
+unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x64 vpx_variance32x64_c
+
+unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x4 vpx_variance4x4_c
+
+unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x8 vpx_variance4x8_c
+
+unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance64x32 vpx_variance64x32_c
+
+unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance64x64 vpx_variance64x64_c
+
+unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x16 vpx_variance8x16_c
+
+unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x4 vpx_variance8x4_c
+
+unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_media(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x8 vpx_variance8x8_media
+
void vpx_dsp_rtcd(void);
#include "vpx_config.h"
diff --git a/chromium/third_party/libvpx/source/config/linux/arm64/vp8_rtcd.h b/chromium/third_party/libvpx/source/config/linux/arm64/vp8_rtcd.h
index 60f13cf8812..af969c3617f 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm64/vp8_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm64/vp8_rtcd.h
@@ -31,8 +31,7 @@ void vp8_bilinear_predict16x16_neon(unsigned char *src, int src_pitch, int xofst
#define vp8_bilinear_predict16x16 vp8_bilinear_predict16x16_neon
void vp8_bilinear_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
-void vp8_bilinear_predict4x4_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
-#define vp8_bilinear_predict4x4 vp8_bilinear_predict4x4_neon
+#define vp8_bilinear_predict4x4 vp8_bilinear_predict4x4_c
void vp8_bilinear_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_bilinear_predict8x4_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
@@ -124,13 +123,6 @@ void vp8_filter_by_weight8x8_c(unsigned char *src, int src_stride, unsigned char
int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
#define vp8_full_search_sad vp8_full_search_sad_c
-unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
-unsigned int vp8_get4x4sse_cs_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
-#define vp8_get4x4sse_cs vp8_get4x4sse_cs_neon
-
-unsigned int vp8_get_mb_ss_c(const short *);
-#define vp8_get_mb_ss vp8_get_mb_ss_c
-
void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
#define vp8_intra4x4_predict vp8_intra4x4_predict_c
@@ -178,10 +170,6 @@ void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols,in
int vp8_mbuverror_c(struct macroblock *mb);
#define vp8_mbuverror vp8_mbuverror_c
-unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_mse16x16 vp8_mse16x16_neon
-
void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
#define vp8_plane_add_noise vp8_plane_add_noise_c
@@ -232,9 +220,6 @@ void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int y
void vp8_sixtap_predict8x8_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
#define vp8_sixtap_predict8x8 vp8_sixtap_predict8x8_neon
-unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-#define vp8_sub_pixel_mse16x16 vp8_sub_pixel_mse16x16_c
-
unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
#define vp8_sub_pixel_variance16x16 vp8_sub_pixel_variance16x16_c
@@ -262,25 +247,6 @@ void vp8_subtract_mby_c(short *diff, unsigned char *src, int src_stride, unsigne
void vp8_subtract_mby_neon(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride);
#define vp8_subtract_mby vp8_subtract_mby_neon
-unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance16x16 vp8_variance16x16_neon
-
-unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x8_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance16x8 vp8_variance16x8_neon
-
-unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance4x4 vp8_variance4x4_c
-
-unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x16_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance8x16 vp8_variance8x16_neon
-
-unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance8x8 vp8_variance8x8_neon
-
unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
unsigned int vp8_variance_halfpixvar16x16_h_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_neon
diff --git a/chromium/third_party/libvpx/source/config/linux/arm64/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/linux/arm64/vp9_rtcd.h
index a976f9a7731..1541cf05522 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm64/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm64/vp9_rtcd.h
@@ -93,7 +93,8 @@ void vp9_d135_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t
#define vp9_d135_predictor_32x32 vp9_d135_predictor_32x32_c
void vp9_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d135_predictor_4x4 vp9_d135_predictor_4x4_c
+void vp9_d135_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d135_predictor_4x4 vp9_d135_predictor_4x4_neon
void vp9_d135_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vp9_d135_predictor_8x8 vp9_d135_predictor_8x8_c
@@ -123,16 +124,19 @@ void vp9_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *a
#define vp9_d207_predictor_8x8 vp9_d207_predictor_8x8_c
void vp9_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d45_predictor_16x16 vp9_d45_predictor_16x16_c
+void vp9_d45_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d45_predictor_16x16 vp9_d45_predictor_16x16_neon
void vp9_d45_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vp9_d45_predictor_32x32 vp9_d45_predictor_32x32_c
void vp9_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d45_predictor_4x4 vp9_d45_predictor_4x4_c
+void vp9_d45_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d45_predictor_4x4 vp9_d45_predictor_4x4_neon
void vp9_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d45_predictor_8x8 vp9_d45_predictor_8x8_c
+void vp9_d45_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_d45_predictor_8x8 vp9_d45_predictor_8x8_neon
void vp9_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vp9_d63_predictor_16x16 vp9_d63_predictor_16x16_c
@@ -147,52 +151,68 @@ void vp9_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *ab
#define vp9_d63_predictor_8x8 vp9_d63_predictor_8x8_c
void vp9_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_128_predictor_16x16 vp9_dc_128_predictor_16x16_c
+void vp9_dc_128_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_128_predictor_16x16 vp9_dc_128_predictor_16x16_neon
void vp9_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_128_predictor_32x32 vp9_dc_128_predictor_32x32_c
+void vp9_dc_128_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_128_predictor_32x32 vp9_dc_128_predictor_32x32_neon
void vp9_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_128_predictor_4x4 vp9_dc_128_predictor_4x4_c
+void vp9_dc_128_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_128_predictor_4x4 vp9_dc_128_predictor_4x4_neon
void vp9_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_128_predictor_8x8 vp9_dc_128_predictor_8x8_c
+void vp9_dc_128_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_128_predictor_8x8 vp9_dc_128_predictor_8x8_neon
void vp9_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_left_predictor_16x16 vp9_dc_left_predictor_16x16_c
+void vp9_dc_left_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_left_predictor_16x16 vp9_dc_left_predictor_16x16_neon
void vp9_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_left_predictor_32x32 vp9_dc_left_predictor_32x32_c
+void vp9_dc_left_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_left_predictor_32x32 vp9_dc_left_predictor_32x32_neon
void vp9_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_left_predictor_4x4 vp9_dc_left_predictor_4x4_c
+void vp9_dc_left_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_left_predictor_4x4 vp9_dc_left_predictor_4x4_neon
void vp9_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_left_predictor_8x8 vp9_dc_left_predictor_8x8_c
+void vp9_dc_left_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_left_predictor_8x8 vp9_dc_left_predictor_8x8_neon
void vp9_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_predictor_16x16 vp9_dc_predictor_16x16_c
+void vp9_dc_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_predictor_16x16 vp9_dc_predictor_16x16_neon
void vp9_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_predictor_32x32 vp9_dc_predictor_32x32_c
+void vp9_dc_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_predictor_32x32 vp9_dc_predictor_32x32_neon
void vp9_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_predictor_4x4 vp9_dc_predictor_4x4_c
+void vp9_dc_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_predictor_4x4 vp9_dc_predictor_4x4_neon
void vp9_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_predictor_8x8 vp9_dc_predictor_8x8_c
+void vp9_dc_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_predictor_8x8 vp9_dc_predictor_8x8_neon
void vp9_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_top_predictor_16x16 vp9_dc_top_predictor_16x16_c
+void vp9_dc_top_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_top_predictor_16x16 vp9_dc_top_predictor_16x16_neon
void vp9_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_top_predictor_32x32 vp9_dc_top_predictor_32x32_c
+void vp9_dc_top_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_top_predictor_32x32 vp9_dc_top_predictor_32x32_neon
void vp9_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_top_predictor_4x4 vp9_dc_top_predictor_4x4_c
+void vp9_dc_top_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_top_predictor_4x4 vp9_dc_top_predictor_4x4_neon
void vp9_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_c
+void vp9_dc_top_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+#define vp9_dc_top_predictor_8x8 vp9_dc_top_predictor_8x8_neon
int vp9_denoiser_filter_c(const uint8_t *sig, int sig_stride, const uint8_t *mc_avg, int mc_avg_stride, uint8_t *avg, int avg_stride, int increase_denoising, BLOCK_SIZE bs, int motion_magnitude);
#define vp9_denoiser_filter vp9_denoiser_filter_c
@@ -257,17 +277,6 @@ int vp9_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, i
void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
#define vp9_fwht4x4 vp9_fwht4x4_c
-void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get16x16var_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get16x16var vp9_get16x16var_neon
-
-void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get8x8var_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get8x8var vp9_get8x8var_neon
-
-unsigned int vp9_get_mb_ss_c(const int16_t *);
-#define vp9_get_mb_ss vp9_get_mb_ss_c
-
void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vp9_h_predictor_16x16 vp9_h_predictor_16x16_neon
@@ -402,18 +411,6 @@ void vp9_mbpost_proc_down_c(uint8_t *dst, int pitch, int rows, int cols, int fli
void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max);
#define vp9_minmax_8x8 vp9_minmax_8x8_c
-unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse16x16 vp9_mse16x16_c
-
-unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse16x8 vp9_mse16x8_c
-
-unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse8x16 vp9_mse8x16_c
-
-unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse8x8 vp9_mse8x8_c
-
void vp9_plane_add_noise_c(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
#define vp9_plane_add_noise vp9_plane_add_noise_c
@@ -557,51 +554,6 @@ void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov
void vp9_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vp9_v_predictor_8x8 vp9_v_predictor_8x8_neon
-unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x16_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x16 vp9_variance16x16_neon
-
-unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x32 vp9_variance16x32_c
-
-unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x8 vp9_variance16x8_c
-
-unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x16 vp9_variance32x16_c
-
-unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x32_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x32 vp9_variance32x32_neon
-
-unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x64_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x64 vp9_variance32x64_neon
-
-unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x4 vp9_variance4x4_c
-
-unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x8 vp9_variance4x8_c
-
-unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x32_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance64x32 vp9_variance64x32_neon
-
-unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x64_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance64x64 vp9_variance64x64_neon
-
-unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x16 vp9_variance8x16_c
-
-unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x4 vp9_variance8x4_c
-
-unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x8_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x8 vp9_variance8x8_neon
-
int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl);
#define vp9_vector_var vp9_vector_var_c
diff --git a/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.asm
index c7ef4366028..8d1201d83be 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.asm
@@ -23,7 +23,6 @@
.equ HAVE_AVX2 , 0
.equ HAVE_VPX_PORTS , 1
.equ HAVE_STDINT_H , 1
-.equ HAVE_ALT_TREE_LAYOUT , 0
.equ HAVE_PTHREAD_H , 1
.equ HAVE_SYS_MMAN_H , 1
.equ HAVE_UNISTD_H , 0
diff --git a/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.c b/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.c
index 84cebcdae2f..6674d94aed4 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.c
+++ b/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.c
@@ -5,5 +5,6 @@
/* tree. An additional intellectual property rights grant can be found */
/* in the file PATENTS. All contributing project authors may */
/* be found in the AUTHORS file in the root of the source tree. */
+#include "vpx/vpx_codec.h"
static const char* const cfg = "--force-target=armv8-linux-gcc --enable-pic --enable-realtime-only --disable-edsp --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --enable-vp9-temporal-denoising --enable-vp9-postproc --size-limit=16384x16384";
const char *vpx_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.h
index 0409b886a57..f54e589ef32 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm64/vpx_config.h
@@ -32,7 +32,6 @@
#define HAVE_AVX2 0
#define HAVE_VPX_PORTS 1
#define HAVE_STDINT_H 1
-#define HAVE_ALT_TREE_LAYOUT 0
#define HAVE_PTHREAD_H 1
#define HAVE_SYS_MMAN_H 1
#define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/linux/arm64/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/linux/arm64/vpx_dsp_rtcd.h
index a0b6898b29b..af0741cca13 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm64/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm64/vpx_dsp_rtcd.h
@@ -18,6 +18,37 @@
extern "C" {
#endif
+void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride);
+#define vpx_comp_avg_pred vpx_comp_avg_pred_c
+
+void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get16x16var_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vpx_get16x16var vpx_get16x16var_neon
+
+unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
+unsigned int vpx_get4x4sse_cs_neon(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
+#define vpx_get4x4sse_cs vpx_get4x4sse_cs_neon
+
+void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get8x8var_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vpx_get8x8var vpx_get8x8var_neon
+
+unsigned int vpx_get_mb_ss_c(const int16_t *);
+#define vpx_get_mb_ss vpx_get_mb_ss_c
+
+unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse16x16 vpx_mse16x16_neon
+
+unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse16x8 vpx_mse16x8_c
+
+unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse8x16 vpx_mse8x16_c
+
+unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse8x8 vpx_mse8x8_c
+
unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad16x16_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad16x16 vpx_sad16x16_neon
@@ -193,6 +224,53 @@ void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * con
void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
#define vpx_sad8x8x8 vpx_sad8x8x8_c
+unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x16 vpx_variance16x16_neon
+
+unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x32 vpx_variance16x32_c
+
+unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x8_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x8 vpx_variance16x8_neon
+
+unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x16 vpx_variance32x16_c
+
+unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x32_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x32 vpx_variance32x32_neon
+
+unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x64_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x64 vpx_variance32x64_neon
+
+unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x4 vpx_variance4x4_c
+
+unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x8 vpx_variance4x8_c
+
+unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x32_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance64x32 vpx_variance64x32_neon
+
+unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x64_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance64x64 vpx_variance64x64_neon
+
+unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x16_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x16 vpx_variance8x16_neon
+
+unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x4 vpx_variance8x4_c
+
+unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_neon(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x8 vpx_variance8x8_neon
+
void vpx_dsp_rtcd(void);
#include "vpx_config.h"
diff --git a/chromium/third_party/libvpx/source/config/linux/generic/vp8_rtcd.h b/chromium/third_party/libvpx/source/config/linux/generic/vp8_rtcd.h
index 1bd7496fe55..7a33ed2effb 100644
--- a/chromium/third_party/libvpx/source/config/linux/generic/vp8_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/generic/vp8_rtcd.h
@@ -107,12 +107,6 @@ void vp8_filter_by_weight8x8_c(unsigned char *src, int src_stride, unsigned char
int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
#define vp8_full_search_sad vp8_full_search_sad_c
-unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
-#define vp8_get4x4sse_cs vp8_get4x4sse_cs_c
-
-unsigned int vp8_get_mb_ss_c(const short *);
-#define vp8_get_mb_ss vp8_get_mb_ss_c
-
void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
#define vp8_intra4x4_predict vp8_intra4x4_predict_c
@@ -152,9 +146,6 @@ void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols,in
int vp8_mbuverror_c(struct macroblock *mb);
#define vp8_mbuverror vp8_mbuverror_c
-unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_mse16x16 vp8_mse16x16_c
-
void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
#define vp8_plane_add_noise vp8_plane_add_noise_c
@@ -197,9 +188,6 @@ void vp8_sixtap_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int y
void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
#define vp8_sixtap_predict8x8 vp8_sixtap_predict8x8_c
-unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-#define vp8_sub_pixel_mse16x16 vp8_sub_pixel_mse16x16_c
-
unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
#define vp8_sub_pixel_variance16x16 vp8_sub_pixel_variance16x16_c
@@ -224,21 +212,6 @@ void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc,
void vp8_subtract_mby_c(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride);
#define vp8_subtract_mby vp8_subtract_mby_c
-unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance16x16 vp8_variance16x16_c
-
-unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance16x8 vp8_variance16x8_c
-
-unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance4x4 vp8_variance4x4_c
-
-unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance8x16 vp8_variance8x16_c
-
-unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance8x8 vp8_variance8x8_c
-
unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_c
diff --git a/chromium/third_party/libvpx/source/config/linux/generic/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/linux/generic/vp9_rtcd.h
index 84bd45f2876..8cf86073577 100644
--- a/chromium/third_party/libvpx/source/config/linux/generic/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/generic/vp9_rtcd.h
@@ -245,15 +245,6 @@ int vp9_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, i
void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
#define vp9_fwht4x4 vp9_fwht4x4_c
-void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get16x16var vp9_get16x16var_c
-
-void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get8x8var vp9_get8x8var_c
-
-unsigned int vp9_get_mb_ss_c(const int16_t *);
-#define vp9_get_mb_ss vp9_get_mb_ss_c
-
void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vp9_h_predictor_16x16 vp9_h_predictor_16x16_c
@@ -368,18 +359,6 @@ void vp9_mbpost_proc_down_c(uint8_t *dst, int pitch, int rows, int cols, int fli
void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max);
#define vp9_minmax_8x8 vp9_minmax_8x8_c
-unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse16x16 vp9_mse16x16_c
-
-unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse16x8 vp9_mse16x8_c
-
-unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse8x16 vp9_mse8x16_c
-
-unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse8x8 vp9_mse8x8_c
-
void vp9_plane_add_noise_c(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
#define vp9_plane_add_noise vp9_plane_add_noise_c
@@ -509,45 +488,6 @@ void vp9_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov
void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vp9_v_predictor_8x8 vp9_v_predictor_8x8_c
-unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x16 vp9_variance16x16_c
-
-unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x32 vp9_variance16x32_c
-
-unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x8 vp9_variance16x8_c
-
-unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x16 vp9_variance32x16_c
-
-unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x32 vp9_variance32x32_c
-
-unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x64 vp9_variance32x64_c
-
-unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x4 vp9_variance4x4_c
-
-unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x8 vp9_variance4x8_c
-
-unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance64x32 vp9_variance64x32_c
-
-unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance64x64 vp9_variance64x64_c
-
-unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x16 vp9_variance8x16_c
-
-unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x4 vp9_variance8x4_c
-
-unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x8 vp9_variance8x8_c
-
int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl);
#define vp9_vector_var vp9_vector_var_c
diff --git a/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.asm
index 20db06465bf..a2a18fdc7fd 100644
--- a/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.asm
@@ -23,7 +23,6 @@
.equ HAVE_AVX2 , 0
.equ HAVE_VPX_PORTS , 1
.equ HAVE_STDINT_H , 1
-.equ HAVE_ALT_TREE_LAYOUT , 0
.equ HAVE_PTHREAD_H , 1
.equ HAVE_SYS_MMAN_H , 1
.equ HAVE_UNISTD_H , 0
diff --git a/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.c b/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.c
index 0e8d7265f36..2a18a45e3e3 100644
--- a/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.c
+++ b/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.c
@@ -5,5 +5,6 @@
/* tree. An additional intellectual property rights grant can be found */
/* in the file PATENTS. All contributing project authors may */
/* be found in the AUTHORS file in the root of the source tree. */
+#include "vpx/vpx_codec.h"
static const char* const cfg = "--target=generic-gnu --enable-pic --enable-realtime-only --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --enable-vp9-temporal-denoising --enable-vp9-postproc --size-limit=16384x16384";
const char *vpx_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.h
index ec8222e18c2..290508c1c88 100644
--- a/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/linux/generic/vpx_config.h
@@ -32,7 +32,6 @@
#define HAVE_AVX2 0
#define HAVE_VPX_PORTS 1
#define HAVE_STDINT_H 1
-#define HAVE_ALT_TREE_LAYOUT 0
#define HAVE_PTHREAD_H 1
#define HAVE_SYS_MMAN_H 1
#define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/linux/generic/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/linux/generic/vpx_dsp_rtcd.h
index 86c5b4425dc..f086946da23 100644
--- a/chromium/third_party/libvpx/source/config/linux/generic/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/generic/vpx_dsp_rtcd.h
@@ -18,6 +18,33 @@
extern "C" {
#endif
+void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride);
+#define vpx_comp_avg_pred vpx_comp_avg_pred_c
+
+void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vpx_get16x16var vpx_get16x16var_c
+
+unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
+#define vpx_get4x4sse_cs vpx_get4x4sse_cs_c
+
+void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vpx_get8x8var vpx_get8x8var_c
+
+unsigned int vpx_get_mb_ss_c(const int16_t *);
+#define vpx_get_mb_ss vpx_get_mb_ss_c
+
+unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse16x16 vpx_mse16x16_c
+
+unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse16x8 vpx_mse16x8_c
+
+unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse8x16 vpx_mse8x16_c
+
+unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse8x8 vpx_mse8x8_c
+
unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad16x16 vpx_sad16x16_c
@@ -183,6 +210,45 @@ void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * con
void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
#define vpx_sad8x8x8 vpx_sad8x8x8_c
+unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x16 vpx_variance16x16_c
+
+unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x32 vpx_variance16x32_c
+
+unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x8 vpx_variance16x8_c
+
+unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x16 vpx_variance32x16_c
+
+unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x32 vpx_variance32x32_c
+
+unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x64 vpx_variance32x64_c
+
+unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x4 vpx_variance4x4_c
+
+unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x8 vpx_variance4x8_c
+
+unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance64x32 vpx_variance64x32_c
+
+unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance64x64 vpx_variance64x64_c
+
+unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x16 vpx_variance8x16_c
+
+unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x4 vpx_variance8x4_c
+
+unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x8 vpx_variance8x8_c
+
void vpx_dsp_rtcd(void);
#include "vpx_config.h"
diff --git a/chromium/third_party/libvpx/source/config/linux/ia32/vp8_rtcd.h b/chromium/third_party/libvpx/source/config/linux/ia32/vp8_rtcd.h
index b60e7ac7301..38fe6d360b0 100644
--- a/chromium/third_party/libvpx/source/config/linux/ia32/vp8_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/ia32/vp8_rtcd.h
@@ -74,10 +74,10 @@ void vp8_clear_system_state_c();
void vpx_reset_mmx_state();
RTCD_EXTERN void (*vp8_clear_system_state)();
-void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
+void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
void vp8_copy_mem16x16_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
@@ -147,15 +147,6 @@ int vp8_full_search_sadx3(struct macroblock *x, struct block *b, struct blockd *
int vp8_full_search_sadx8(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
RTCD_EXTERN int (*vp8_full_search_sad)(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
-unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
-unsigned int vp8_get4x4sse_cs_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
-RTCD_EXTERN unsigned int (*vp8_get4x4sse_cs)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
-
-unsigned int vp8_get_mb_ss_c(const short *);
-unsigned int vp8_get_mb_ss_mmx(const short *);
-unsigned int vp8_get_mb_ss_sse2(const short *);
-RTCD_EXTERN unsigned int (*vp8_get_mb_ss)(const short *);
-
void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
#define vp8_intra4x4_predict vp8_intra4x4_predict_c
@@ -218,11 +209,6 @@ int vp8_mbuverror_mmx(struct macroblock *mb);
int vp8_mbuverror_xmm(struct macroblock *mb);
RTCD_EXTERN int (*vp8_mbuverror)(struct macroblock *mb);
-unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_mse16x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-
void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
void vp8_plane_add_noise_mmx(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
void vp8_plane_add_noise_wmt(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
@@ -290,11 +276,6 @@ void vp8_sixtap_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, in
void vp8_sixtap_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
-unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-unsigned int vp8_sub_pixel_mse16x16_mmx(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-unsigned int vp8_sub_pixel_mse16x16_wmt(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_sub_pixel_mse16x16)(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-
unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
unsigned int vp8_sub_pixel_variance16x16_mmx(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
unsigned int vp8_sub_pixel_variance16x16_wmt(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
@@ -337,31 +318,6 @@ void vp8_subtract_mby_mmx(short *diff, unsigned char *src, int src_stride, unsig
void vp8_subtract_mby_sse2(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride);
RTCD_EXTERN void (*vp8_subtract_mby)(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride);
-unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance16x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance16x8)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance4x4_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance4x4_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance4x4)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance8x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance8x8)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-
unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
unsigned int vp8_variance_halfpixvar16x16_h_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
unsigned int vp8_variance_halfpixvar16x16_h_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
@@ -448,11 +404,6 @@ static void setup_rtcd_internal(void)
vp8_full_search_sad = vp8_full_search_sad_c;
if (flags & HAS_SSE3) vp8_full_search_sad = vp8_full_search_sadx3;
if (flags & HAS_SSE4_1) vp8_full_search_sad = vp8_full_search_sadx8;
- vp8_get4x4sse_cs = vp8_get4x4sse_cs_c;
- if (flags & HAS_MMX) vp8_get4x4sse_cs = vp8_get4x4sse_cs_mmx;
- vp8_get_mb_ss = vp8_get_mb_ss_c;
- if (flags & HAS_MMX) vp8_get_mb_ss = vp8_get_mb_ss_mmx;
- if (flags & HAS_SSE2) vp8_get_mb_ss = vp8_get_mb_ss_sse2;
vp8_loop_filter_bh = vp8_loop_filter_bh_c;
if (flags & HAS_MMX) vp8_loop_filter_bh = vp8_loop_filter_bh_mmx;
if (flags & HAS_SSE2) vp8_loop_filter_bh = vp8_loop_filter_bh_sse2;
@@ -488,9 +439,6 @@ static void setup_rtcd_internal(void)
vp8_mbuverror = vp8_mbuverror_c;
if (flags & HAS_MMX) vp8_mbuverror = vp8_mbuverror_mmx;
if (flags & HAS_SSE2) vp8_mbuverror = vp8_mbuverror_xmm;
- vp8_mse16x16 = vp8_mse16x16_c;
- if (flags & HAS_MMX) vp8_mse16x16 = vp8_mse16x16_mmx;
- if (flags & HAS_SSE2) vp8_mse16x16 = vp8_mse16x16_wmt;
vp8_plane_add_noise = vp8_plane_add_noise_c;
if (flags & HAS_MMX) vp8_plane_add_noise = vp8_plane_add_noise_mmx;
if (flags & HAS_SSE2) vp8_plane_add_noise = vp8_plane_add_noise_wmt;
@@ -529,9 +477,6 @@ static void setup_rtcd_internal(void)
if (flags & HAS_MMX) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_mmx;
if (flags & HAS_SSE2) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_sse2;
if (flags & HAS_SSSE3) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_ssse3;
- vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_c;
- if (flags & HAS_MMX) vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_mmx;
- if (flags & HAS_SSE2) vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_wmt;
vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_c;
if (flags & HAS_MMX) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_mmx;
if (flags & HAS_SSE2) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_wmt;
@@ -558,21 +503,6 @@ static void setup_rtcd_internal(void)
vp8_subtract_mby = vp8_subtract_mby_c;
if (flags & HAS_MMX) vp8_subtract_mby = vp8_subtract_mby_mmx;
if (flags & HAS_SSE2) vp8_subtract_mby = vp8_subtract_mby_sse2;
- vp8_variance16x16 = vp8_variance16x16_c;
- if (flags & HAS_MMX) vp8_variance16x16 = vp8_variance16x16_mmx;
- if (flags & HAS_SSE2) vp8_variance16x16 = vp8_variance16x16_wmt;
- vp8_variance16x8 = vp8_variance16x8_c;
- if (flags & HAS_MMX) vp8_variance16x8 = vp8_variance16x8_mmx;
- if (flags & HAS_SSE2) vp8_variance16x8 = vp8_variance16x8_wmt;
- vp8_variance4x4 = vp8_variance4x4_c;
- if (flags & HAS_MMX) vp8_variance4x4 = vp8_variance4x4_mmx;
- if (flags & HAS_SSE2) vp8_variance4x4 = vp8_variance4x4_wmt;
- vp8_variance8x16 = vp8_variance8x16_c;
- if (flags & HAS_MMX) vp8_variance8x16 = vp8_variance8x16_mmx;
- if (flags & HAS_SSE2) vp8_variance8x16 = vp8_variance8x16_wmt;
- vp8_variance8x8 = vp8_variance8x8_c;
- if (flags & HAS_MMX) vp8_variance8x8 = vp8_variance8x8_mmx;
- if (flags & HAS_SSE2) vp8_variance8x8 = vp8_variance8x8_wmt;
vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_c;
if (flags & HAS_MMX) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_mmx;
if (flags & HAS_SSE2) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_wmt;
diff --git a/chromium/third_party/libvpx/source/config/linux/ia32/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/linux/ia32/vp9_rtcd.h
index 7425dc281bd..1da20dc1bcc 100644
--- a/chromium/third_party/libvpx/source/config/linux/ia32/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/ia32/vp9_rtcd.h
@@ -116,7 +116,8 @@ void vp9_d153_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint
RTCD_EXTERN void (*vp9_d153_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_d153_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d153_predictor_32x32 vp9_d153_predictor_32x32_c
+void vp9_d153_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_d153_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_d153_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
@@ -320,19 +321,6 @@ void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
void vp9_fwht4x4_mmx(const int16_t *input, tran_low_t *output, int stride);
RTCD_EXTERN void (*vp9_fwht4x4)(const int16_t *input, tran_low_t *output, int stride);
-void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-RTCD_EXTERN void (*vp9_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-
-void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-RTCD_EXTERN void (*vp9_get8x8var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-
-unsigned int vp9_get_mb_ss_c(const int16_t *);
-unsigned int vp9_get_mb_ss_sse2(const int16_t *);
-RTCD_EXTERN unsigned int (*vp9_get_mb_ss)(const int16_t *);
-
void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_h_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vp9_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
@@ -484,23 +472,6 @@ void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *mi
void vp9_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max);
RTCD_EXTERN void (*vp9_minmax_8x8)(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max);
-unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-
-unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_mse16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-
-unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-unsigned int vp9_mse8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_mse8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-
-unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-unsigned int vp9_mse8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_mse8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-
void vp9_plane_add_noise_c(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
void vp9_plane_add_noise_wmt(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
RTCD_EXTERN void (*vp9_plane_add_noise)(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
@@ -700,63 +671,6 @@ void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov
void vp9_v_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vp9_v_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance16x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance4x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance4x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance8x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl);
int vp9_vector_var_sse2(int16_t const *ref, int16_t const *src, const int bwl);
RTCD_EXTERN int (*vp9_vector_var)(int16_t const *ref, int16_t const *src, const int bwl);
@@ -807,6 +721,8 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSE2) vp9_convolve_copy = vp9_convolve_copy_sse2;
vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_c;
if (flags & HAS_SSSE3) vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_ssse3;
+ vp9_d153_predictor_32x32 = vp9_d153_predictor_32x32_c;
+ if (flags & HAS_SSSE3) vp9_d153_predictor_32x32 = vp9_d153_predictor_32x32_ssse3;
vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_c;
if (flags & HAS_SSSE3) vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_ssse3;
vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_c;
@@ -907,13 +823,6 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8;
vp9_fwht4x4 = vp9_fwht4x4_c;
if (flags & HAS_MMX) vp9_fwht4x4 = vp9_fwht4x4_mmx;
- vp9_get16x16var = vp9_get16x16var_c;
- if (flags & HAS_SSE2) vp9_get16x16var = vp9_get16x16var_sse2;
- if (flags & HAS_AVX2) vp9_get16x16var = vp9_get16x16var_avx2;
- vp9_get8x8var = vp9_get8x8var_c;
- if (flags & HAS_SSE2) vp9_get8x8var = vp9_get8x8var_sse2;
- vp9_get_mb_ss = vp9_get_mb_ss_c;
- if (flags & HAS_SSE2) vp9_get_mb_ss = vp9_get_mb_ss_sse2;
vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c;
if (flags & HAS_SSSE3) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_ssse3;
vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c;
@@ -987,15 +896,6 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSE2) vp9_mbpost_proc_down = vp9_mbpost_proc_down_xmm;
vp9_minmax_8x8 = vp9_minmax_8x8_c;
if (flags & HAS_SSE2) vp9_minmax_8x8 = vp9_minmax_8x8_sse2;
- vp9_mse16x16 = vp9_mse16x16_c;
- if (flags & HAS_SSE2) vp9_mse16x16 = vp9_mse16x16_sse2;
- if (flags & HAS_AVX2) vp9_mse16x16 = vp9_mse16x16_avx2;
- vp9_mse16x8 = vp9_mse16x8_c;
- if (flags & HAS_SSE2) vp9_mse16x8 = vp9_mse16x8_sse2;
- vp9_mse8x16 = vp9_mse8x16_c;
- if (flags & HAS_SSE2) vp9_mse8x16 = vp9_mse8x16_sse2;
- vp9_mse8x8 = vp9_mse8x8_c;
- if (flags & HAS_SSE2) vp9_mse8x8 = vp9_mse8x8_sse2;
vp9_plane_add_noise = vp9_plane_add_noise_c;
if (flags & HAS_SSE2) vp9_plane_add_noise = vp9_plane_add_noise_wmt;
vp9_post_proc_down_and_across = vp9_post_proc_down_and_across_c;
@@ -1106,37 +1006,6 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSE) vp9_v_predictor_4x4 = vp9_v_predictor_4x4_sse;
vp9_v_predictor_8x8 = vp9_v_predictor_8x8_c;
if (flags & HAS_SSE) vp9_v_predictor_8x8 = vp9_v_predictor_8x8_sse;
- vp9_variance16x16 = vp9_variance16x16_c;
- if (flags & HAS_SSE2) vp9_variance16x16 = vp9_variance16x16_sse2;
- if (flags & HAS_AVX2) vp9_variance16x16 = vp9_variance16x16_avx2;
- vp9_variance16x32 = vp9_variance16x32_c;
- if (flags & HAS_SSE2) vp9_variance16x32 = vp9_variance16x32_sse2;
- vp9_variance16x8 = vp9_variance16x8_c;
- if (flags & HAS_SSE2) vp9_variance16x8 = vp9_variance16x8_sse2;
- vp9_variance32x16 = vp9_variance32x16_c;
- if (flags & HAS_SSE2) vp9_variance32x16 = vp9_variance32x16_sse2;
- if (flags & HAS_AVX2) vp9_variance32x16 = vp9_variance32x16_avx2;
- vp9_variance32x32 = vp9_variance32x32_c;
- if (flags & HAS_SSE2) vp9_variance32x32 = vp9_variance32x32_sse2;
- if (flags & HAS_AVX2) vp9_variance32x32 = vp9_variance32x32_avx2;
- vp9_variance32x64 = vp9_variance32x64_c;
- if (flags & HAS_SSE2) vp9_variance32x64 = vp9_variance32x64_sse2;
- vp9_variance4x4 = vp9_variance4x4_c;
- if (flags & HAS_SSE2) vp9_variance4x4 = vp9_variance4x4_sse2;
- vp9_variance4x8 = vp9_variance4x8_c;
- if (flags & HAS_SSE2) vp9_variance4x8 = vp9_variance4x8_sse2;
- vp9_variance64x32 = vp9_variance64x32_c;
- if (flags & HAS_SSE2) vp9_variance64x32 = vp9_variance64x32_sse2;
- if (flags & HAS_AVX2) vp9_variance64x32 = vp9_variance64x32_avx2;
- vp9_variance64x64 = vp9_variance64x64_c;
- if (flags & HAS_SSE2) vp9_variance64x64 = vp9_variance64x64_sse2;
- if (flags & HAS_AVX2) vp9_variance64x64 = vp9_variance64x64_avx2;
- vp9_variance8x16 = vp9_variance8x16_c;
- if (flags & HAS_SSE2) vp9_variance8x16 = vp9_variance8x16_sse2;
- vp9_variance8x4 = vp9_variance8x4_c;
- if (flags & HAS_SSE2) vp9_variance8x4 = vp9_variance8x4_sse2;
- vp9_variance8x8 = vp9_variance8x8_c;
- if (flags & HAS_SSE2) vp9_variance8x8 = vp9_variance8x8_sse2;
vp9_vector_var = vp9_vector_var_c;
if (flags & HAS_SSE2) vp9_vector_var = vp9_vector_var_sse2;
}
diff --git a/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.asm
index b9418dda315..9550f440153 100644
--- a/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.asm
@@ -20,7 +20,6 @@
%define HAVE_AVX2 1
%define HAVE_VPX_PORTS 1
%define HAVE_STDINT_H 1
-%define HAVE_ALT_TREE_LAYOUT 0
%define HAVE_PTHREAD_H 1
%define HAVE_SYS_MMAN_H 1
%define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.c b/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.c
index f254f6d7167..08b58cc7244 100644
--- a/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.c
+++ b/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.c
@@ -5,5 +5,6 @@
/* tree. An additional intellectual property rights grant can be found */
/* in the file PATENTS. All contributing project authors may */
/* be found in the AUTHORS file in the root of the source tree. */
+#include "vpx/vpx_codec.h"
static const char* const cfg = "--target=x86-linux-gcc --disable-ccache --enable-pic --enable-realtime-only --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --enable-vp9-temporal-denoising --enable-vp9-postproc --size-limit=16384x16384";
const char *vpx_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.h
index 3715b4aef3e..c20daf6517f 100644
--- a/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/linux/ia32/vpx_config.h
@@ -32,7 +32,6 @@
#define HAVE_AVX2 1
#define HAVE_VPX_PORTS 1
#define HAVE_STDINT_H 1
-#define HAVE_ALT_TREE_LAYOUT 0
#define HAVE_PTHREAD_H 1
#define HAVE_SYS_MMAN_H 1
#define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/linux/ia32/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/linux/ia32/vpx_dsp_rtcd.h
index 8cc2fa5b737..32ee77e25ce 100644
--- a/chromium/third_party/libvpx/source/config/linux/ia32/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/ia32/vpx_dsp_rtcd.h
@@ -18,6 +18,45 @@
extern "C" {
#endif
+void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride);
+#define vpx_comp_avg_pred vpx_comp_avg_pred_c
+
+void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+RTCD_EXTERN void (*vpx_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+
+unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
+#define vpx_get4x4sse_cs vpx_get4x4sse_cs_c
+
+void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get8x8var_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+RTCD_EXTERN void (*vpx_get8x8var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+
+unsigned int vpx_get_mb_ss_c(const int16_t *);
+unsigned int vpx_get_mb_ss_mmx(const int16_t *);
+unsigned int vpx_get_mb_ss_sse2(const int16_t *);
+RTCD_EXTERN unsigned int (*vpx_get_mb_ss)(const int16_t *);
+
+unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+
+unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_mse16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+
+unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_mse8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+
+unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_mse8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+
unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad16x16_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
@@ -251,6 +290,68 @@ void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_p
void vpx_sad8x8x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
RTCD_EXTERN void (*vpx_sad8x8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
+unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance16x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x4_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance4x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance4x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance8x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
void vpx_dsp_rtcd(void);
#ifdef RTCD_C
@@ -261,6 +362,25 @@ static void setup_rtcd_internal(void)
(void)flags;
+ vpx_get16x16var = vpx_get16x16var_c;
+ if (flags & HAS_SSE2) vpx_get16x16var = vpx_get16x16var_sse2;
+ if (flags & HAS_AVX2) vpx_get16x16var = vpx_get16x16var_avx2;
+ vpx_get8x8var = vpx_get8x8var_c;
+ if (flags & HAS_MMX) vpx_get8x8var = vpx_get8x8var_mmx;
+ if (flags & HAS_SSE2) vpx_get8x8var = vpx_get8x8var_sse2;
+ vpx_get_mb_ss = vpx_get_mb_ss_c;
+ if (flags & HAS_MMX) vpx_get_mb_ss = vpx_get_mb_ss_mmx;
+ if (flags & HAS_SSE2) vpx_get_mb_ss = vpx_get_mb_ss_sse2;
+ vpx_mse16x16 = vpx_mse16x16_c;
+ if (flags & HAS_MMX) vpx_mse16x16 = vpx_mse16x16_mmx;
+ if (flags & HAS_SSE2) vpx_mse16x16 = vpx_mse16x16_sse2;
+ if (flags & HAS_AVX2) vpx_mse16x16 = vpx_mse16x16_avx2;
+ vpx_mse16x8 = vpx_mse16x8_c;
+ if (flags & HAS_SSE2) vpx_mse16x8 = vpx_mse16x8_sse2;
+ vpx_mse8x16 = vpx_mse8x16_c;
+ if (flags & HAS_SSE2) vpx_mse8x16 = vpx_mse8x16_sse2;
+ vpx_mse8x8 = vpx_mse8x8_c;
+ if (flags & HAS_SSE2) vpx_mse8x8 = vpx_mse8x8_sse2;
vpx_sad16x16 = vpx_sad16x16_c;
if (flags & HAS_MMX) vpx_sad16x16 = vpx_sad16x16_mmx;
if (flags & HAS_SSE2) vpx_sad16x16 = vpx_sad16x16_sse2;
@@ -378,6 +498,42 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSE2) vpx_sad8x8x4d = vpx_sad8x8x4d_sse2;
vpx_sad8x8x8 = vpx_sad8x8x8_c;
if (flags & HAS_SSE4_1) vpx_sad8x8x8 = vpx_sad8x8x8_sse4_1;
+ vpx_variance16x16 = vpx_variance16x16_c;
+ if (flags & HAS_MMX) vpx_variance16x16 = vpx_variance16x16_mmx;
+ if (flags & HAS_SSE2) vpx_variance16x16 = vpx_variance16x16_sse2;
+ if (flags & HAS_AVX2) vpx_variance16x16 = vpx_variance16x16_avx2;
+ vpx_variance16x32 = vpx_variance16x32_c;
+ if (flags & HAS_SSE2) vpx_variance16x32 = vpx_variance16x32_sse2;
+ vpx_variance16x8 = vpx_variance16x8_c;
+ if (flags & HAS_MMX) vpx_variance16x8 = vpx_variance16x8_mmx;
+ if (flags & HAS_SSE2) vpx_variance16x8 = vpx_variance16x8_sse2;
+ vpx_variance32x16 = vpx_variance32x16_c;
+ if (flags & HAS_SSE2) vpx_variance32x16 = vpx_variance32x16_sse2;
+ if (flags & HAS_AVX2) vpx_variance32x16 = vpx_variance32x16_avx2;
+ vpx_variance32x32 = vpx_variance32x32_c;
+ if (flags & HAS_SSE2) vpx_variance32x32 = vpx_variance32x32_sse2;
+ if (flags & HAS_AVX2) vpx_variance32x32 = vpx_variance32x32_avx2;
+ vpx_variance32x64 = vpx_variance32x64_c;
+ if (flags & HAS_SSE2) vpx_variance32x64 = vpx_variance32x64_sse2;
+ vpx_variance4x4 = vpx_variance4x4_c;
+ if (flags & HAS_MMX) vpx_variance4x4 = vpx_variance4x4_mmx;
+ if (flags & HAS_SSE2) vpx_variance4x4 = vpx_variance4x4_sse2;
+ vpx_variance4x8 = vpx_variance4x8_c;
+ if (flags & HAS_SSE2) vpx_variance4x8 = vpx_variance4x8_sse2;
+ vpx_variance64x32 = vpx_variance64x32_c;
+ if (flags & HAS_SSE2) vpx_variance64x32 = vpx_variance64x32_sse2;
+ if (flags & HAS_AVX2) vpx_variance64x32 = vpx_variance64x32_avx2;
+ vpx_variance64x64 = vpx_variance64x64_c;
+ if (flags & HAS_SSE2) vpx_variance64x64 = vpx_variance64x64_sse2;
+ if (flags & HAS_AVX2) vpx_variance64x64 = vpx_variance64x64_avx2;
+ vpx_variance8x16 = vpx_variance8x16_c;
+ if (flags & HAS_MMX) vpx_variance8x16 = vpx_variance8x16_mmx;
+ if (flags & HAS_SSE2) vpx_variance8x16 = vpx_variance8x16_sse2;
+ vpx_variance8x4 = vpx_variance8x4_c;
+ if (flags & HAS_SSE2) vpx_variance8x4 = vpx_variance8x4_sse2;
+ vpx_variance8x8 = vpx_variance8x8_c;
+ if (flags & HAS_MMX) vpx_variance8x8 = vpx_variance8x8_mmx;
+ if (flags & HAS_SSE2) vpx_variance8x8 = vpx_variance8x8_sse2;
}
#endif
diff --git a/chromium/third_party/libvpx/source/config/linux/mips64el/vp8_rtcd.h b/chromium/third_party/libvpx/source/config/linux/mips64el/vp8_rtcd.h
index ec819184a76..3befb52db1e 100644
--- a/chromium/third_party/libvpx/source/config/linux/mips64el/vp8_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/mips64el/vp8_rtcd.h
@@ -107,12 +107,6 @@ void vp8_filter_by_weight8x8_c(unsigned char *src, int src_stride, unsigned char
int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
#define vp8_full_search_sad vp8_full_search_sad_c
-unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
-#define vp8_get4x4sse_cs vp8_get4x4sse_cs_c
-
-unsigned int vp8_get_mb_ss_c(const short *);
-#define vp8_get_mb_ss vp8_get_mb_ss_c
-
void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
#define vp8_intra4x4_predict vp8_intra4x4_predict_c
@@ -152,9 +146,6 @@ void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols,in
int vp8_mbuverror_c(struct macroblock *mb);
#define vp8_mbuverror vp8_mbuverror_c
-unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_mse16x16 vp8_mse16x16_c
-
void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
#define vp8_plane_add_noise vp8_plane_add_noise_c
@@ -197,9 +188,6 @@ void vp8_sixtap_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int y
void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
#define vp8_sixtap_predict8x8 vp8_sixtap_predict8x8_c
-unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-#define vp8_sub_pixel_mse16x16 vp8_sub_pixel_mse16x16_c
-
unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
#define vp8_sub_pixel_variance16x16 vp8_sub_pixel_variance16x16_c
@@ -227,21 +215,6 @@ void vp8_subtract_mby_c(short *diff, unsigned char *src, int src_stride, unsigne
void vp8_temporal_filter_apply_c(unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count);
#define vp8_temporal_filter_apply vp8_temporal_filter_apply_c
-unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance16x16 vp8_variance16x16_c
-
-unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance16x8 vp8_variance16x8_c
-
-unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance4x4 vp8_variance4x4_c
-
-unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance8x16 vp8_variance8x16_c
-
-unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance8x8 vp8_variance8x8_c
-
unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_c
diff --git a/chromium/third_party/libvpx/source/config/linux/mips64el/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/linux/mips64el/vp9_rtcd.h
index 84bd45f2876..8cf86073577 100644
--- a/chromium/third_party/libvpx/source/config/linux/mips64el/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/mips64el/vp9_rtcd.h
@@ -245,15 +245,6 @@ int vp9_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, i
void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
#define vp9_fwht4x4 vp9_fwht4x4_c
-void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get16x16var vp9_get16x16var_c
-
-void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get8x8var vp9_get8x8var_c
-
-unsigned int vp9_get_mb_ss_c(const int16_t *);
-#define vp9_get_mb_ss vp9_get_mb_ss_c
-
void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vp9_h_predictor_16x16 vp9_h_predictor_16x16_c
@@ -368,18 +359,6 @@ void vp9_mbpost_proc_down_c(uint8_t *dst, int pitch, int rows, int cols, int fli
void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max);
#define vp9_minmax_8x8 vp9_minmax_8x8_c
-unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse16x16 vp9_mse16x16_c
-
-unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse16x8 vp9_mse16x8_c
-
-unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse8x16 vp9_mse8x16_c
-
-unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse8x8 vp9_mse8x8_c
-
void vp9_plane_add_noise_c(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
#define vp9_plane_add_noise vp9_plane_add_noise_c
@@ -509,45 +488,6 @@ void vp9_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov
void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vp9_v_predictor_8x8 vp9_v_predictor_8x8_c
-unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x16 vp9_variance16x16_c
-
-unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x32 vp9_variance16x32_c
-
-unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x8 vp9_variance16x8_c
-
-unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x16 vp9_variance32x16_c
-
-unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x32 vp9_variance32x32_c
-
-unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x64 vp9_variance32x64_c
-
-unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x4 vp9_variance4x4_c
-
-unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x8 vp9_variance4x8_c
-
-unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance64x32 vp9_variance64x32_c
-
-unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance64x64 vp9_variance64x64_c
-
-unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x16 vp9_variance8x16_c
-
-unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x4 vp9_variance8x4_c
-
-unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x8 vp9_variance8x8_c
-
int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl);
#define vp9_vector_var vp9_vector_var_c
diff --git a/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_config.c b/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_config.c
index 1000fbb39b3..9e6876b5859 100644
--- a/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_config.c
+++ b/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_config.c
@@ -5,5 +5,6 @@
/* tree. An additional intellectual property rights grant can be found */
/* in the file PATENTS. All contributing project authors may */
/* be found in the AUTHORS file in the root of the source tree. */
+#include "vpx/vpx_codec.h"
static const char* const cfg = "--target=mips64-linux-gcc --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --enable-vp9-temporal-denoising --enable-vp9-postproc --size-limit=16384x16384";
const char *vpx_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_config.h
index 194dfa5d91d..a9e7f3ac8d9 100644
--- a/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_config.h
@@ -32,7 +32,6 @@
#define HAVE_AVX2 0
#define HAVE_VPX_PORTS 1
#define HAVE_STDINT_H 1
-#define HAVE_ALT_TREE_LAYOUT 0
#define HAVE_PTHREAD_H 1
#define HAVE_SYS_MMAN_H 1
#define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_dsp_rtcd.h
index 86c5b4425dc..f086946da23 100644
--- a/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/mips64el/vpx_dsp_rtcd.h
@@ -18,6 +18,33 @@
extern "C" {
#endif
+void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride);
+#define vpx_comp_avg_pred vpx_comp_avg_pred_c
+
+void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vpx_get16x16var vpx_get16x16var_c
+
+unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
+#define vpx_get4x4sse_cs vpx_get4x4sse_cs_c
+
+void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vpx_get8x8var vpx_get8x8var_c
+
+unsigned int vpx_get_mb_ss_c(const int16_t *);
+#define vpx_get_mb_ss vpx_get_mb_ss_c
+
+unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse16x16 vpx_mse16x16_c
+
+unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse16x8 vpx_mse16x8_c
+
+unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse8x16 vpx_mse8x16_c
+
+unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse8x8 vpx_mse8x8_c
+
unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad16x16 vpx_sad16x16_c
@@ -183,6 +210,45 @@ void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * con
void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
#define vpx_sad8x8x8 vpx_sad8x8x8_c
+unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x16 vpx_variance16x16_c
+
+unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x32 vpx_variance16x32_c
+
+unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x8 vpx_variance16x8_c
+
+unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x16 vpx_variance32x16_c
+
+unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x32 vpx_variance32x32_c
+
+unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x64 vpx_variance32x64_c
+
+unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x4 vpx_variance4x4_c
+
+unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x8 vpx_variance4x8_c
+
+unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance64x32 vpx_variance64x32_c
+
+unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance64x64 vpx_variance64x64_c
+
+unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x16 vpx_variance8x16_c
+
+unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x4 vpx_variance8x4_c
+
+unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x8 vpx_variance8x8_c
+
void vpx_dsp_rtcd(void);
#include "vpx_config.h"
diff --git a/chromium/third_party/libvpx/source/config/linux/mipsel/vp8_rtcd.h b/chromium/third_party/libvpx/source/config/linux/mipsel/vp8_rtcd.h
index ec819184a76..3befb52db1e 100644
--- a/chromium/third_party/libvpx/source/config/linux/mipsel/vp8_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/mipsel/vp8_rtcd.h
@@ -107,12 +107,6 @@ void vp8_filter_by_weight8x8_c(unsigned char *src, int src_stride, unsigned char
int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
#define vp8_full_search_sad vp8_full_search_sad_c
-unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
-#define vp8_get4x4sse_cs vp8_get4x4sse_cs_c
-
-unsigned int vp8_get_mb_ss_c(const short *);
-#define vp8_get_mb_ss vp8_get_mb_ss_c
-
void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
#define vp8_intra4x4_predict vp8_intra4x4_predict_c
@@ -152,9 +146,6 @@ void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols,in
int vp8_mbuverror_c(struct macroblock *mb);
#define vp8_mbuverror vp8_mbuverror_c
-unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_mse16x16 vp8_mse16x16_c
-
void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
#define vp8_plane_add_noise vp8_plane_add_noise_c
@@ -197,9 +188,6 @@ void vp8_sixtap_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int y
void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
#define vp8_sixtap_predict8x8 vp8_sixtap_predict8x8_c
-unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-#define vp8_sub_pixel_mse16x16 vp8_sub_pixel_mse16x16_c
-
unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
#define vp8_sub_pixel_variance16x16 vp8_sub_pixel_variance16x16_c
@@ -227,21 +215,6 @@ void vp8_subtract_mby_c(short *diff, unsigned char *src, int src_stride, unsigne
void vp8_temporal_filter_apply_c(unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count);
#define vp8_temporal_filter_apply vp8_temporal_filter_apply_c
-unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance16x16 vp8_variance16x16_c
-
-unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance16x8 vp8_variance16x8_c
-
-unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance4x4 vp8_variance4x4_c
-
-unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance8x16 vp8_variance8x16_c
-
-unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance8x8 vp8_variance8x8_c
-
unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_c
diff --git a/chromium/third_party/libvpx/source/config/linux/mipsel/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/linux/mipsel/vp9_rtcd.h
index 84bd45f2876..8cf86073577 100644
--- a/chromium/third_party/libvpx/source/config/linux/mipsel/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/mipsel/vp9_rtcd.h
@@ -245,15 +245,6 @@ int vp9_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, i
void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
#define vp9_fwht4x4 vp9_fwht4x4_c
-void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get16x16var vp9_get16x16var_c
-
-void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get8x8var vp9_get8x8var_c
-
-unsigned int vp9_get_mb_ss_c(const int16_t *);
-#define vp9_get_mb_ss vp9_get_mb_ss_c
-
void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vp9_h_predictor_16x16 vp9_h_predictor_16x16_c
@@ -368,18 +359,6 @@ void vp9_mbpost_proc_down_c(uint8_t *dst, int pitch, int rows, int cols, int fli
void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max);
#define vp9_minmax_8x8 vp9_minmax_8x8_c
-unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse16x16 vp9_mse16x16_c
-
-unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse16x8 vp9_mse16x8_c
-
-unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse8x16 vp9_mse8x16_c
-
-unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse8x8 vp9_mse8x8_c
-
void vp9_plane_add_noise_c(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
#define vp9_plane_add_noise vp9_plane_add_noise_c
@@ -509,45 +488,6 @@ void vp9_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov
void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vp9_v_predictor_8x8 vp9_v_predictor_8x8_c
-unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x16 vp9_variance16x16_c
-
-unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x32 vp9_variance16x32_c
-
-unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x8 vp9_variance16x8_c
-
-unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x16 vp9_variance32x16_c
-
-unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x32 vp9_variance32x32_c
-
-unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x64 vp9_variance32x64_c
-
-unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x4 vp9_variance4x4_c
-
-unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x8 vp9_variance4x8_c
-
-unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance64x32 vp9_variance64x32_c
-
-unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance64x64 vp9_variance64x64_c
-
-unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x16 vp9_variance8x16_c
-
-unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x4 vp9_variance8x4_c
-
-unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x8 vp9_variance8x8_c
-
int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl);
#define vp9_vector_var vp9_vector_var_c
diff --git a/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_config.c b/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_config.c
index 7f5c1f6e858..2c1375d883a 100644
--- a/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_config.c
+++ b/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_config.c
@@ -5,5 +5,6 @@
/* tree. An additional intellectual property rights grant can be found */
/* in the file PATENTS. All contributing project authors may */
/* be found in the AUTHORS file in the root of the source tree. */
+#include "vpx/vpx_codec.h"
static const char* const cfg = "--target=mips32-linux-gcc --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --enable-vp9-temporal-denoising --enable-vp9-postproc --size-limit=16384x16384";
const char *vpx_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_config.h
index 1783d40cca5..4210a7c868d 100644
--- a/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_config.h
@@ -32,7 +32,6 @@
#define HAVE_AVX2 0
#define HAVE_VPX_PORTS 1
#define HAVE_STDINT_H 1
-#define HAVE_ALT_TREE_LAYOUT 0
#define HAVE_PTHREAD_H 1
#define HAVE_SYS_MMAN_H 1
#define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_dsp_rtcd.h
index 86c5b4425dc..f086946da23 100644
--- a/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/mipsel/vpx_dsp_rtcd.h
@@ -18,6 +18,33 @@
extern "C" {
#endif
+void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride);
+#define vpx_comp_avg_pred vpx_comp_avg_pred_c
+
+void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vpx_get16x16var vpx_get16x16var_c
+
+unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
+#define vpx_get4x4sse_cs vpx_get4x4sse_cs_c
+
+void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vpx_get8x8var vpx_get8x8var_c
+
+unsigned int vpx_get_mb_ss_c(const int16_t *);
+#define vpx_get_mb_ss vpx_get_mb_ss_c
+
+unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse16x16 vpx_mse16x16_c
+
+unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse16x8 vpx_mse16x8_c
+
+unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse8x16 vpx_mse8x16_c
+
+unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse8x8 vpx_mse8x8_c
+
unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad16x16 vpx_sad16x16_c
@@ -183,6 +210,45 @@ void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * con
void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
#define vpx_sad8x8x8 vpx_sad8x8x8_c
+unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x16 vpx_variance16x16_c
+
+unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x32 vpx_variance16x32_c
+
+unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x8 vpx_variance16x8_c
+
+unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x16 vpx_variance32x16_c
+
+unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x32 vpx_variance32x32_c
+
+unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x64 vpx_variance32x64_c
+
+unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x4 vpx_variance4x4_c
+
+unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x8 vpx_variance4x8_c
+
+unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance64x32 vpx_variance64x32_c
+
+unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance64x64 vpx_variance64x64_c
+
+unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x16 vpx_variance8x16_c
+
+unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x4 vpx_variance8x4_c
+
+unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x8 vpx_variance8x8_c
+
void vpx_dsp_rtcd(void);
#include "vpx_config.h"
diff --git a/chromium/third_party/libvpx/source/config/linux/x64/vp8_rtcd.h b/chromium/third_party/libvpx/source/config/linux/x64/vp8_rtcd.h
index 9989eb09995..5285ac71055 100644
--- a/chromium/third_party/libvpx/source/config/linux/x64/vp8_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/x64/vp8_rtcd.h
@@ -74,10 +74,10 @@ void vp8_clear_system_state_c();
void vpx_reset_mmx_state();
#define vp8_clear_system_state vpx_reset_mmx_state
-void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
+void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
void vp8_copy_mem16x16_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
@@ -147,15 +147,6 @@ int vp8_full_search_sadx3(struct macroblock *x, struct block *b, struct blockd *
int vp8_full_search_sadx8(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
RTCD_EXTERN int (*vp8_full_search_sad)(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
-unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
-unsigned int vp8_get4x4sse_cs_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
-#define vp8_get4x4sse_cs vp8_get4x4sse_cs_mmx
-
-unsigned int vp8_get_mb_ss_c(const short *);
-unsigned int vp8_get_mb_ss_mmx(const short *);
-unsigned int vp8_get_mb_ss_sse2(const short *);
-#define vp8_get_mb_ss vp8_get_mb_ss_sse2
-
void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
#define vp8_intra4x4_predict vp8_intra4x4_predict_c
@@ -218,11 +209,6 @@ int vp8_mbuverror_mmx(struct macroblock *mb);
int vp8_mbuverror_xmm(struct macroblock *mb);
#define vp8_mbuverror vp8_mbuverror_xmm
-unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_mse16x16 vp8_mse16x16_wmt
-
void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
void vp8_plane_add_noise_mmx(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
void vp8_plane_add_noise_wmt(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
@@ -290,11 +276,6 @@ void vp8_sixtap_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, in
void vp8_sixtap_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
-unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-unsigned int vp8_sub_pixel_mse16x16_mmx(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-unsigned int vp8_sub_pixel_mse16x16_wmt(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-#define vp8_sub_pixel_mse16x16 vp8_sub_pixel_mse16x16_wmt
-
unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
unsigned int vp8_sub_pixel_variance16x16_mmx(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
unsigned int vp8_sub_pixel_variance16x16_wmt(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
@@ -337,31 +318,6 @@ void vp8_subtract_mby_mmx(short *diff, unsigned char *src, int src_stride, unsig
void vp8_subtract_mby_sse2(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride);
#define vp8_subtract_mby vp8_subtract_mby_sse2
-unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance16x16 vp8_variance16x16_wmt
-
-unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance16x8 vp8_variance16x8_wmt
-
-unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance4x4_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance4x4_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance4x4 vp8_variance4x4_wmt
-
-unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance8x16 vp8_variance8x16_wmt
-
-unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance8x8 vp8_variance8x8_wmt
-
unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
unsigned int vp8_variance_halfpixvar16x16_h_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
unsigned int vp8_variance_halfpixvar16x16_h_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
diff --git a/chromium/third_party/libvpx/source/config/linux/x64/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/linux/x64/vp9_rtcd.h
index 10a6b8401db..0834e762f34 100644
--- a/chromium/third_party/libvpx/source/config/linux/x64/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/x64/vp9_rtcd.h
@@ -116,7 +116,8 @@ void vp9_d153_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint
RTCD_EXTERN void (*vp9_d153_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_d153_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d153_predictor_32x32 vp9_d153_predictor_32x32_c
+void vp9_d153_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_d153_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_d153_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
@@ -321,19 +322,6 @@ void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
void vp9_fwht4x4_mmx(const int16_t *input, tran_low_t *output, int stride);
#define vp9_fwht4x4 vp9_fwht4x4_mmx
-void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-RTCD_EXTERN void (*vp9_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-
-void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get8x8var vp9_get8x8var_sse2
-
-unsigned int vp9_get_mb_ss_c(const int16_t *);
-unsigned int vp9_get_mb_ss_sse2(const int16_t *);
-#define vp9_get_mb_ss vp9_get_mb_ss_sse2
-
void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_h_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vp9_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
@@ -488,23 +476,6 @@ void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *mi
void vp9_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max);
#define vp9_minmax_8x8 vp9_minmax_8x8_sse2
-unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-
-unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse16x8 vp9_mse16x8_sse2
-
-unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-unsigned int vp9_mse8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse8x16 vp9_mse8x16_sse2
-
-unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-unsigned int vp9_mse8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse8x8 vp9_mse8x8_sse2
-
void vp9_plane_add_noise_c(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
void vp9_plane_add_noise_wmt(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
#define vp9_plane_add_noise vp9_plane_add_noise_wmt
@@ -709,63 +680,6 @@ void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov
void vp9_v_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vp9_v_predictor_8x8 vp9_v_predictor_8x8_sse
-unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x32 vp9_variance16x32_sse2
-
-unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x8 vp9_variance16x8_sse2
-
-unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x64 vp9_variance32x64_sse2
-
-unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x4 vp9_variance4x4_sse2
-
-unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x8 vp9_variance4x8_sse2
-
-unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x16 vp9_variance8x16_sse2
-
-unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x4 vp9_variance8x4_sse2
-
-unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x8 vp9_variance8x8_sse2
-
int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl);
int vp9_vector_var_sse2(int16_t const *ref, int16_t const *src, const int bwl);
#define vp9_vector_var vp9_vector_var_sse2
@@ -799,6 +713,8 @@ static void setup_rtcd_internal(void)
if (flags & HAS_AVX2) vp9_convolve8_vert = vp9_convolve8_vert_avx2;
vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_c;
if (flags & HAS_SSSE3) vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_ssse3;
+ vp9_d153_predictor_32x32 = vp9_d153_predictor_32x32_c;
+ if (flags & HAS_SSSE3) vp9_d153_predictor_32x32 = vp9_d153_predictor_32x32_ssse3;
vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_c;
if (flags & HAS_SSSE3) vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_ssse3;
vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_c;
@@ -838,8 +754,6 @@ static void setup_rtcd_internal(void)
vp9_full_search_sad = vp9_full_search_sad_c;
if (flags & HAS_SSE3) vp9_full_search_sad = vp9_full_search_sadx3;
if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8;
- vp9_get16x16var = vp9_get16x16var_sse2;
- if (flags & HAS_AVX2) vp9_get16x16var = vp9_get16x16var_avx2;
vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c;
if (flags & HAS_SSSE3) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_ssse3;
vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c;
@@ -856,8 +770,6 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSSE3) vp9_idct8x8_64_add = vp9_idct8x8_64_add_ssse3;
vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_sse2;
if (flags & HAS_AVX2) vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_avx2;
- vp9_mse16x16 = vp9_mse16x16_sse2;
- if (flags & HAS_AVX2) vp9_mse16x16 = vp9_mse16x16_avx2;
vp9_quantize_b = vp9_quantize_b_sse2;
if (flags & HAS_SSSE3) vp9_quantize_b = vp9_quantize_b_ssse3;
vp9_quantize_b_32x32 = vp9_quantize_b_32x32_c;
@@ -922,16 +834,6 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_ssse3;
vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_sse2;
if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_ssse3;
- vp9_variance16x16 = vp9_variance16x16_sse2;
- if (flags & HAS_AVX2) vp9_variance16x16 = vp9_variance16x16_avx2;
- vp9_variance32x16 = vp9_variance32x16_sse2;
- if (flags & HAS_AVX2) vp9_variance32x16 = vp9_variance32x16_avx2;
- vp9_variance32x32 = vp9_variance32x32_sse2;
- if (flags & HAS_AVX2) vp9_variance32x32 = vp9_variance32x32_avx2;
- vp9_variance64x32 = vp9_variance64x32_sse2;
- if (flags & HAS_AVX2) vp9_variance64x32 = vp9_variance64x32_avx2;
- vp9_variance64x64 = vp9_variance64x64_sse2;
- if (flags & HAS_AVX2) vp9_variance64x64 = vp9_variance64x64_avx2;
}
#endif
diff --git a/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.asm b/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.asm
index 68a0d73808d..0c4b550b78a 100644
--- a/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.asm
@@ -20,7 +20,6 @@
%define HAVE_AVX2 1
%define HAVE_VPX_PORTS 1
%define HAVE_STDINT_H 1
-%define HAVE_ALT_TREE_LAYOUT 0
%define HAVE_PTHREAD_H 1
%define HAVE_SYS_MMAN_H 1
%define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.c b/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.c
index 1221f4849ea..4f190b1958d 100644
--- a/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.c
+++ b/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.c
@@ -5,5 +5,6 @@
/* tree. An additional intellectual property rights grant can be found */
/* in the file PATENTS. All contributing project authors may */
/* be found in the AUTHORS file in the root of the source tree. */
+#include "vpx/vpx_codec.h"
static const char* const cfg = "--target=x86_64-linux-gcc --disable-ccache --enable-pic --enable-realtime-only --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --enable-vp9-temporal-denoising --enable-vp9-postproc --size-limit=16384x16384";
const char *vpx_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.h b/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.h
index 72f336395ce..27d91efdbd4 100644
--- a/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/linux/x64/vpx_config.h
@@ -32,7 +32,6 @@
#define HAVE_AVX2 1
#define HAVE_VPX_PORTS 1
#define HAVE_STDINT_H 1
-#define HAVE_ALT_TREE_LAYOUT 0
#define HAVE_PTHREAD_H 1
#define HAVE_SYS_MMAN_H 1
#define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/linux/x64/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/linux/x64/vpx_dsp_rtcd.h
index b5df5e08c57..d93c56eb765 100644
--- a/chromium/third_party/libvpx/source/config/linux/x64/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/x64/vpx_dsp_rtcd.h
@@ -18,6 +18,45 @@
extern "C" {
#endif
+void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride);
+#define vpx_comp_avg_pred vpx_comp_avg_pred_c
+
+void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+RTCD_EXTERN void (*vpx_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+
+unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
+#define vpx_get4x4sse_cs vpx_get4x4sse_cs_c
+
+void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get8x8var_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vpx_get8x8var vpx_get8x8var_sse2
+
+unsigned int vpx_get_mb_ss_c(const int16_t *);
+unsigned int vpx_get_mb_ss_mmx(const int16_t *);
+unsigned int vpx_get_mb_ss_sse2(const int16_t *);
+#define vpx_get_mb_ss vpx_get_mb_ss_sse2
+
+unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+
+unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse16x8 vpx_mse16x8_sse2
+
+unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse8x16 vpx_mse8x16_sse2
+
+unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse8x8 vpx_mse8x8_sse2
+
unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad16x16_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
@@ -251,6 +290,68 @@ void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_p
void vpx_sad8x8x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
RTCD_EXTERN void (*vpx_sad8x8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
+unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x32 vpx_variance16x32_sse2
+
+unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x8 vpx_variance16x8_sse2
+
+unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x64 vpx_variance32x64_sse2
+
+unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x4_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x4 vpx_variance4x4_sse2
+
+unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x8 vpx_variance4x8_sse2
+
+unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x16 vpx_variance8x16_sse2
+
+unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x4 vpx_variance8x4_sse2
+
+unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x8 vpx_variance8x8_sse2
+
void vpx_dsp_rtcd(void);
#ifdef RTCD_C
@@ -261,6 +362,10 @@ static void setup_rtcd_internal(void)
(void)flags;
+ vpx_get16x16var = vpx_get16x16var_sse2;
+ if (flags & HAS_AVX2) vpx_get16x16var = vpx_get16x16var_avx2;
+ vpx_mse16x16 = vpx_mse16x16_sse2;
+ if (flags & HAS_AVX2) vpx_mse16x16 = vpx_mse16x16_avx2;
vpx_sad16x16x3 = vpx_sad16x16x3_c;
if (flags & HAS_SSE3) vpx_sad16x16x3 = vpx_sad16x16x3_sse3;
if (flags & HAS_SSSE3) vpx_sad16x16x3 = vpx_sad16x16x3_ssse3;
@@ -307,6 +412,16 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSE3) vpx_sad8x8x3 = vpx_sad8x8x3_sse3;
vpx_sad8x8x8 = vpx_sad8x8x8_c;
if (flags & HAS_SSE4_1) vpx_sad8x8x8 = vpx_sad8x8x8_sse4_1;
+ vpx_variance16x16 = vpx_variance16x16_sse2;
+ if (flags & HAS_AVX2) vpx_variance16x16 = vpx_variance16x16_avx2;
+ vpx_variance32x16 = vpx_variance32x16_sse2;
+ if (flags & HAS_AVX2) vpx_variance32x16 = vpx_variance32x16_avx2;
+ vpx_variance32x32 = vpx_variance32x32_sse2;
+ if (flags & HAS_AVX2) vpx_variance32x32 = vpx_variance32x32_avx2;
+ vpx_variance64x32 = vpx_variance64x32_sse2;
+ if (flags & HAS_AVX2) vpx_variance64x32 = vpx_variance64x32_avx2;
+ vpx_variance64x64 = vpx_variance64x64_sse2;
+ if (flags & HAS_AVX2) vpx_variance64x64 = vpx_variance64x64_avx2;
}
#endif
diff --git a/chromium/third_party/libvpx/source/config/mac/ia32/vp8_rtcd.h b/chromium/third_party/libvpx/source/config/mac/ia32/vp8_rtcd.h
index b60e7ac7301..38fe6d360b0 100644
--- a/chromium/third_party/libvpx/source/config/mac/ia32/vp8_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/mac/ia32/vp8_rtcd.h
@@ -74,10 +74,10 @@ void vp8_clear_system_state_c();
void vpx_reset_mmx_state();
RTCD_EXTERN void (*vp8_clear_system_state)();
-void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
+void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
void vp8_copy_mem16x16_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
@@ -147,15 +147,6 @@ int vp8_full_search_sadx3(struct macroblock *x, struct block *b, struct blockd *
int vp8_full_search_sadx8(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
RTCD_EXTERN int (*vp8_full_search_sad)(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
-unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
-unsigned int vp8_get4x4sse_cs_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
-RTCD_EXTERN unsigned int (*vp8_get4x4sse_cs)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
-
-unsigned int vp8_get_mb_ss_c(const short *);
-unsigned int vp8_get_mb_ss_mmx(const short *);
-unsigned int vp8_get_mb_ss_sse2(const short *);
-RTCD_EXTERN unsigned int (*vp8_get_mb_ss)(const short *);
-
void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
#define vp8_intra4x4_predict vp8_intra4x4_predict_c
@@ -218,11 +209,6 @@ int vp8_mbuverror_mmx(struct macroblock *mb);
int vp8_mbuverror_xmm(struct macroblock *mb);
RTCD_EXTERN int (*vp8_mbuverror)(struct macroblock *mb);
-unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_mse16x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-
void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
void vp8_plane_add_noise_mmx(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
void vp8_plane_add_noise_wmt(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
@@ -290,11 +276,6 @@ void vp8_sixtap_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, in
void vp8_sixtap_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
-unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-unsigned int vp8_sub_pixel_mse16x16_mmx(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-unsigned int vp8_sub_pixel_mse16x16_wmt(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_sub_pixel_mse16x16)(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-
unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
unsigned int vp8_sub_pixel_variance16x16_mmx(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
unsigned int vp8_sub_pixel_variance16x16_wmt(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
@@ -337,31 +318,6 @@ void vp8_subtract_mby_mmx(short *diff, unsigned char *src, int src_stride, unsig
void vp8_subtract_mby_sse2(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride);
RTCD_EXTERN void (*vp8_subtract_mby)(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride);
-unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance16x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance16x8)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance4x4_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance4x4_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance4x4)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance8x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance8x8)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-
unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
unsigned int vp8_variance_halfpixvar16x16_h_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
unsigned int vp8_variance_halfpixvar16x16_h_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
@@ -448,11 +404,6 @@ static void setup_rtcd_internal(void)
vp8_full_search_sad = vp8_full_search_sad_c;
if (flags & HAS_SSE3) vp8_full_search_sad = vp8_full_search_sadx3;
if (flags & HAS_SSE4_1) vp8_full_search_sad = vp8_full_search_sadx8;
- vp8_get4x4sse_cs = vp8_get4x4sse_cs_c;
- if (flags & HAS_MMX) vp8_get4x4sse_cs = vp8_get4x4sse_cs_mmx;
- vp8_get_mb_ss = vp8_get_mb_ss_c;
- if (flags & HAS_MMX) vp8_get_mb_ss = vp8_get_mb_ss_mmx;
- if (flags & HAS_SSE2) vp8_get_mb_ss = vp8_get_mb_ss_sse2;
vp8_loop_filter_bh = vp8_loop_filter_bh_c;
if (flags & HAS_MMX) vp8_loop_filter_bh = vp8_loop_filter_bh_mmx;
if (flags & HAS_SSE2) vp8_loop_filter_bh = vp8_loop_filter_bh_sse2;
@@ -488,9 +439,6 @@ static void setup_rtcd_internal(void)
vp8_mbuverror = vp8_mbuverror_c;
if (flags & HAS_MMX) vp8_mbuverror = vp8_mbuverror_mmx;
if (flags & HAS_SSE2) vp8_mbuverror = vp8_mbuverror_xmm;
- vp8_mse16x16 = vp8_mse16x16_c;
- if (flags & HAS_MMX) vp8_mse16x16 = vp8_mse16x16_mmx;
- if (flags & HAS_SSE2) vp8_mse16x16 = vp8_mse16x16_wmt;
vp8_plane_add_noise = vp8_plane_add_noise_c;
if (flags & HAS_MMX) vp8_plane_add_noise = vp8_plane_add_noise_mmx;
if (flags & HAS_SSE2) vp8_plane_add_noise = vp8_plane_add_noise_wmt;
@@ -529,9 +477,6 @@ static void setup_rtcd_internal(void)
if (flags & HAS_MMX) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_mmx;
if (flags & HAS_SSE2) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_sse2;
if (flags & HAS_SSSE3) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_ssse3;
- vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_c;
- if (flags & HAS_MMX) vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_mmx;
- if (flags & HAS_SSE2) vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_wmt;
vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_c;
if (flags & HAS_MMX) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_mmx;
if (flags & HAS_SSE2) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_wmt;
@@ -558,21 +503,6 @@ static void setup_rtcd_internal(void)
vp8_subtract_mby = vp8_subtract_mby_c;
if (flags & HAS_MMX) vp8_subtract_mby = vp8_subtract_mby_mmx;
if (flags & HAS_SSE2) vp8_subtract_mby = vp8_subtract_mby_sse2;
- vp8_variance16x16 = vp8_variance16x16_c;
- if (flags & HAS_MMX) vp8_variance16x16 = vp8_variance16x16_mmx;
- if (flags & HAS_SSE2) vp8_variance16x16 = vp8_variance16x16_wmt;
- vp8_variance16x8 = vp8_variance16x8_c;
- if (flags & HAS_MMX) vp8_variance16x8 = vp8_variance16x8_mmx;
- if (flags & HAS_SSE2) vp8_variance16x8 = vp8_variance16x8_wmt;
- vp8_variance4x4 = vp8_variance4x4_c;
- if (flags & HAS_MMX) vp8_variance4x4 = vp8_variance4x4_mmx;
- if (flags & HAS_SSE2) vp8_variance4x4 = vp8_variance4x4_wmt;
- vp8_variance8x16 = vp8_variance8x16_c;
- if (flags & HAS_MMX) vp8_variance8x16 = vp8_variance8x16_mmx;
- if (flags & HAS_SSE2) vp8_variance8x16 = vp8_variance8x16_wmt;
- vp8_variance8x8 = vp8_variance8x8_c;
- if (flags & HAS_MMX) vp8_variance8x8 = vp8_variance8x8_mmx;
- if (flags & HAS_SSE2) vp8_variance8x8 = vp8_variance8x8_wmt;
vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_c;
if (flags & HAS_MMX) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_mmx;
if (flags & HAS_SSE2) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_wmt;
diff --git a/chromium/third_party/libvpx/source/config/mac/ia32/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/mac/ia32/vp9_rtcd.h
index 7425dc281bd..1da20dc1bcc 100644
--- a/chromium/third_party/libvpx/source/config/mac/ia32/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/mac/ia32/vp9_rtcd.h
@@ -116,7 +116,8 @@ void vp9_d153_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint
RTCD_EXTERN void (*vp9_d153_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_d153_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d153_predictor_32x32 vp9_d153_predictor_32x32_c
+void vp9_d153_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_d153_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_d153_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
@@ -320,19 +321,6 @@ void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
void vp9_fwht4x4_mmx(const int16_t *input, tran_low_t *output, int stride);
RTCD_EXTERN void (*vp9_fwht4x4)(const int16_t *input, tran_low_t *output, int stride);
-void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-RTCD_EXTERN void (*vp9_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-
-void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-RTCD_EXTERN void (*vp9_get8x8var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-
-unsigned int vp9_get_mb_ss_c(const int16_t *);
-unsigned int vp9_get_mb_ss_sse2(const int16_t *);
-RTCD_EXTERN unsigned int (*vp9_get_mb_ss)(const int16_t *);
-
void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_h_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vp9_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
@@ -484,23 +472,6 @@ void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *mi
void vp9_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max);
RTCD_EXTERN void (*vp9_minmax_8x8)(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max);
-unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-
-unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_mse16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-
-unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-unsigned int vp9_mse8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_mse8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-
-unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-unsigned int vp9_mse8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_mse8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-
void vp9_plane_add_noise_c(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
void vp9_plane_add_noise_wmt(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
RTCD_EXTERN void (*vp9_plane_add_noise)(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
@@ -700,63 +671,6 @@ void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov
void vp9_v_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vp9_v_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance16x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance4x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance4x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance8x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl);
int vp9_vector_var_sse2(int16_t const *ref, int16_t const *src, const int bwl);
RTCD_EXTERN int (*vp9_vector_var)(int16_t const *ref, int16_t const *src, const int bwl);
@@ -807,6 +721,8 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSE2) vp9_convolve_copy = vp9_convolve_copy_sse2;
vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_c;
if (flags & HAS_SSSE3) vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_ssse3;
+ vp9_d153_predictor_32x32 = vp9_d153_predictor_32x32_c;
+ if (flags & HAS_SSSE3) vp9_d153_predictor_32x32 = vp9_d153_predictor_32x32_ssse3;
vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_c;
if (flags & HAS_SSSE3) vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_ssse3;
vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_c;
@@ -907,13 +823,6 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8;
vp9_fwht4x4 = vp9_fwht4x4_c;
if (flags & HAS_MMX) vp9_fwht4x4 = vp9_fwht4x4_mmx;
- vp9_get16x16var = vp9_get16x16var_c;
- if (flags & HAS_SSE2) vp9_get16x16var = vp9_get16x16var_sse2;
- if (flags & HAS_AVX2) vp9_get16x16var = vp9_get16x16var_avx2;
- vp9_get8x8var = vp9_get8x8var_c;
- if (flags & HAS_SSE2) vp9_get8x8var = vp9_get8x8var_sse2;
- vp9_get_mb_ss = vp9_get_mb_ss_c;
- if (flags & HAS_SSE2) vp9_get_mb_ss = vp9_get_mb_ss_sse2;
vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c;
if (flags & HAS_SSSE3) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_ssse3;
vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c;
@@ -987,15 +896,6 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSE2) vp9_mbpost_proc_down = vp9_mbpost_proc_down_xmm;
vp9_minmax_8x8 = vp9_minmax_8x8_c;
if (flags & HAS_SSE2) vp9_minmax_8x8 = vp9_minmax_8x8_sse2;
- vp9_mse16x16 = vp9_mse16x16_c;
- if (flags & HAS_SSE2) vp9_mse16x16 = vp9_mse16x16_sse2;
- if (flags & HAS_AVX2) vp9_mse16x16 = vp9_mse16x16_avx2;
- vp9_mse16x8 = vp9_mse16x8_c;
- if (flags & HAS_SSE2) vp9_mse16x8 = vp9_mse16x8_sse2;
- vp9_mse8x16 = vp9_mse8x16_c;
- if (flags & HAS_SSE2) vp9_mse8x16 = vp9_mse8x16_sse2;
- vp9_mse8x8 = vp9_mse8x8_c;
- if (flags & HAS_SSE2) vp9_mse8x8 = vp9_mse8x8_sse2;
vp9_plane_add_noise = vp9_plane_add_noise_c;
if (flags & HAS_SSE2) vp9_plane_add_noise = vp9_plane_add_noise_wmt;
vp9_post_proc_down_and_across = vp9_post_proc_down_and_across_c;
@@ -1106,37 +1006,6 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSE) vp9_v_predictor_4x4 = vp9_v_predictor_4x4_sse;
vp9_v_predictor_8x8 = vp9_v_predictor_8x8_c;
if (flags & HAS_SSE) vp9_v_predictor_8x8 = vp9_v_predictor_8x8_sse;
- vp9_variance16x16 = vp9_variance16x16_c;
- if (flags & HAS_SSE2) vp9_variance16x16 = vp9_variance16x16_sse2;
- if (flags & HAS_AVX2) vp9_variance16x16 = vp9_variance16x16_avx2;
- vp9_variance16x32 = vp9_variance16x32_c;
- if (flags & HAS_SSE2) vp9_variance16x32 = vp9_variance16x32_sse2;
- vp9_variance16x8 = vp9_variance16x8_c;
- if (flags & HAS_SSE2) vp9_variance16x8 = vp9_variance16x8_sse2;
- vp9_variance32x16 = vp9_variance32x16_c;
- if (flags & HAS_SSE2) vp9_variance32x16 = vp9_variance32x16_sse2;
- if (flags & HAS_AVX2) vp9_variance32x16 = vp9_variance32x16_avx2;
- vp9_variance32x32 = vp9_variance32x32_c;
- if (flags & HAS_SSE2) vp9_variance32x32 = vp9_variance32x32_sse2;
- if (flags & HAS_AVX2) vp9_variance32x32 = vp9_variance32x32_avx2;
- vp9_variance32x64 = vp9_variance32x64_c;
- if (flags & HAS_SSE2) vp9_variance32x64 = vp9_variance32x64_sse2;
- vp9_variance4x4 = vp9_variance4x4_c;
- if (flags & HAS_SSE2) vp9_variance4x4 = vp9_variance4x4_sse2;
- vp9_variance4x8 = vp9_variance4x8_c;
- if (flags & HAS_SSE2) vp9_variance4x8 = vp9_variance4x8_sse2;
- vp9_variance64x32 = vp9_variance64x32_c;
- if (flags & HAS_SSE2) vp9_variance64x32 = vp9_variance64x32_sse2;
- if (flags & HAS_AVX2) vp9_variance64x32 = vp9_variance64x32_avx2;
- vp9_variance64x64 = vp9_variance64x64_c;
- if (flags & HAS_SSE2) vp9_variance64x64 = vp9_variance64x64_sse2;
- if (flags & HAS_AVX2) vp9_variance64x64 = vp9_variance64x64_avx2;
- vp9_variance8x16 = vp9_variance8x16_c;
- if (flags & HAS_SSE2) vp9_variance8x16 = vp9_variance8x16_sse2;
- vp9_variance8x4 = vp9_variance8x4_c;
- if (flags & HAS_SSE2) vp9_variance8x4 = vp9_variance8x4_sse2;
- vp9_variance8x8 = vp9_variance8x8_c;
- if (flags & HAS_SSE2) vp9_variance8x8 = vp9_variance8x8_sse2;
vp9_vector_var = vp9_vector_var_c;
if (flags & HAS_SSE2) vp9_vector_var = vp9_vector_var_sse2;
}
diff --git a/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.asm b/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.asm
index b9418dda315..9550f440153 100644
--- a/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.asm
@@ -20,7 +20,6 @@
%define HAVE_AVX2 1
%define HAVE_VPX_PORTS 1
%define HAVE_STDINT_H 1
-%define HAVE_ALT_TREE_LAYOUT 0
%define HAVE_PTHREAD_H 1
%define HAVE_SYS_MMAN_H 1
%define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.c b/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.c
index d8cb73c99ab..90849ad6fbf 100644
--- a/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.c
+++ b/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.c
@@ -5,5 +5,6 @@
/* tree. An additional intellectual property rights grant can be found */
/* in the file PATENTS. All contributing project authors may */
/* be found in the AUTHORS file in the root of the source tree. */
+#include "vpx/vpx_codec.h"
static const char* const cfg = "--target=x86-darwin9-gcc --enable-pic --enable-realtime-only --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --enable-vp9-temporal-denoising --enable-vp9-postproc --size-limit=16384x16384";
const char *vpx_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.h b/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.h
index 3715b4aef3e..c20daf6517f 100644
--- a/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/mac/ia32/vpx_config.h
@@ -32,7 +32,6 @@
#define HAVE_AVX2 1
#define HAVE_VPX_PORTS 1
#define HAVE_STDINT_H 1
-#define HAVE_ALT_TREE_LAYOUT 0
#define HAVE_PTHREAD_H 1
#define HAVE_SYS_MMAN_H 1
#define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/mac/ia32/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/mac/ia32/vpx_dsp_rtcd.h
index 8cc2fa5b737..32ee77e25ce 100644
--- a/chromium/third_party/libvpx/source/config/mac/ia32/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/mac/ia32/vpx_dsp_rtcd.h
@@ -18,6 +18,45 @@
extern "C" {
#endif
+void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride);
+#define vpx_comp_avg_pred vpx_comp_avg_pred_c
+
+void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+RTCD_EXTERN void (*vpx_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+
+unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
+#define vpx_get4x4sse_cs vpx_get4x4sse_cs_c
+
+void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get8x8var_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+RTCD_EXTERN void (*vpx_get8x8var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+
+unsigned int vpx_get_mb_ss_c(const int16_t *);
+unsigned int vpx_get_mb_ss_mmx(const int16_t *);
+unsigned int vpx_get_mb_ss_sse2(const int16_t *);
+RTCD_EXTERN unsigned int (*vpx_get_mb_ss)(const int16_t *);
+
+unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+
+unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_mse16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+
+unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_mse8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+
+unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_mse8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+
unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad16x16_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
@@ -251,6 +290,68 @@ void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_p
void vpx_sad8x8x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
RTCD_EXTERN void (*vpx_sad8x8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
+unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance16x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x4_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance4x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance4x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance8x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
void vpx_dsp_rtcd(void);
#ifdef RTCD_C
@@ -261,6 +362,25 @@ static void setup_rtcd_internal(void)
(void)flags;
+ vpx_get16x16var = vpx_get16x16var_c;
+ if (flags & HAS_SSE2) vpx_get16x16var = vpx_get16x16var_sse2;
+ if (flags & HAS_AVX2) vpx_get16x16var = vpx_get16x16var_avx2;
+ vpx_get8x8var = vpx_get8x8var_c;
+ if (flags & HAS_MMX) vpx_get8x8var = vpx_get8x8var_mmx;
+ if (flags & HAS_SSE2) vpx_get8x8var = vpx_get8x8var_sse2;
+ vpx_get_mb_ss = vpx_get_mb_ss_c;
+ if (flags & HAS_MMX) vpx_get_mb_ss = vpx_get_mb_ss_mmx;
+ if (flags & HAS_SSE2) vpx_get_mb_ss = vpx_get_mb_ss_sse2;
+ vpx_mse16x16 = vpx_mse16x16_c;
+ if (flags & HAS_MMX) vpx_mse16x16 = vpx_mse16x16_mmx;
+ if (flags & HAS_SSE2) vpx_mse16x16 = vpx_mse16x16_sse2;
+ if (flags & HAS_AVX2) vpx_mse16x16 = vpx_mse16x16_avx2;
+ vpx_mse16x8 = vpx_mse16x8_c;
+ if (flags & HAS_SSE2) vpx_mse16x8 = vpx_mse16x8_sse2;
+ vpx_mse8x16 = vpx_mse8x16_c;
+ if (flags & HAS_SSE2) vpx_mse8x16 = vpx_mse8x16_sse2;
+ vpx_mse8x8 = vpx_mse8x8_c;
+ if (flags & HAS_SSE2) vpx_mse8x8 = vpx_mse8x8_sse2;
vpx_sad16x16 = vpx_sad16x16_c;
if (flags & HAS_MMX) vpx_sad16x16 = vpx_sad16x16_mmx;
if (flags & HAS_SSE2) vpx_sad16x16 = vpx_sad16x16_sse2;
@@ -378,6 +498,42 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSE2) vpx_sad8x8x4d = vpx_sad8x8x4d_sse2;
vpx_sad8x8x8 = vpx_sad8x8x8_c;
if (flags & HAS_SSE4_1) vpx_sad8x8x8 = vpx_sad8x8x8_sse4_1;
+ vpx_variance16x16 = vpx_variance16x16_c;
+ if (flags & HAS_MMX) vpx_variance16x16 = vpx_variance16x16_mmx;
+ if (flags & HAS_SSE2) vpx_variance16x16 = vpx_variance16x16_sse2;
+ if (flags & HAS_AVX2) vpx_variance16x16 = vpx_variance16x16_avx2;
+ vpx_variance16x32 = vpx_variance16x32_c;
+ if (flags & HAS_SSE2) vpx_variance16x32 = vpx_variance16x32_sse2;
+ vpx_variance16x8 = vpx_variance16x8_c;
+ if (flags & HAS_MMX) vpx_variance16x8 = vpx_variance16x8_mmx;
+ if (flags & HAS_SSE2) vpx_variance16x8 = vpx_variance16x8_sse2;
+ vpx_variance32x16 = vpx_variance32x16_c;
+ if (flags & HAS_SSE2) vpx_variance32x16 = vpx_variance32x16_sse2;
+ if (flags & HAS_AVX2) vpx_variance32x16 = vpx_variance32x16_avx2;
+ vpx_variance32x32 = vpx_variance32x32_c;
+ if (flags & HAS_SSE2) vpx_variance32x32 = vpx_variance32x32_sse2;
+ if (flags & HAS_AVX2) vpx_variance32x32 = vpx_variance32x32_avx2;
+ vpx_variance32x64 = vpx_variance32x64_c;
+ if (flags & HAS_SSE2) vpx_variance32x64 = vpx_variance32x64_sse2;
+ vpx_variance4x4 = vpx_variance4x4_c;
+ if (flags & HAS_MMX) vpx_variance4x4 = vpx_variance4x4_mmx;
+ if (flags & HAS_SSE2) vpx_variance4x4 = vpx_variance4x4_sse2;
+ vpx_variance4x8 = vpx_variance4x8_c;
+ if (flags & HAS_SSE2) vpx_variance4x8 = vpx_variance4x8_sse2;
+ vpx_variance64x32 = vpx_variance64x32_c;
+ if (flags & HAS_SSE2) vpx_variance64x32 = vpx_variance64x32_sse2;
+ if (flags & HAS_AVX2) vpx_variance64x32 = vpx_variance64x32_avx2;
+ vpx_variance64x64 = vpx_variance64x64_c;
+ if (flags & HAS_SSE2) vpx_variance64x64 = vpx_variance64x64_sse2;
+ if (flags & HAS_AVX2) vpx_variance64x64 = vpx_variance64x64_avx2;
+ vpx_variance8x16 = vpx_variance8x16_c;
+ if (flags & HAS_MMX) vpx_variance8x16 = vpx_variance8x16_mmx;
+ if (flags & HAS_SSE2) vpx_variance8x16 = vpx_variance8x16_sse2;
+ vpx_variance8x4 = vpx_variance8x4_c;
+ if (flags & HAS_SSE2) vpx_variance8x4 = vpx_variance8x4_sse2;
+ vpx_variance8x8 = vpx_variance8x8_c;
+ if (flags & HAS_MMX) vpx_variance8x8 = vpx_variance8x8_mmx;
+ if (flags & HAS_SSE2) vpx_variance8x8 = vpx_variance8x8_sse2;
}
#endif
diff --git a/chromium/third_party/libvpx/source/config/mac/x64/vp8_rtcd.h b/chromium/third_party/libvpx/source/config/mac/x64/vp8_rtcd.h
index 9989eb09995..5285ac71055 100644
--- a/chromium/third_party/libvpx/source/config/mac/x64/vp8_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/mac/x64/vp8_rtcd.h
@@ -74,10 +74,10 @@ void vp8_clear_system_state_c();
void vpx_reset_mmx_state();
#define vp8_clear_system_state vpx_reset_mmx_state
-void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
+void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
void vp8_copy_mem16x16_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
@@ -147,15 +147,6 @@ int vp8_full_search_sadx3(struct macroblock *x, struct block *b, struct blockd *
int vp8_full_search_sadx8(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
RTCD_EXTERN int (*vp8_full_search_sad)(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
-unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
-unsigned int vp8_get4x4sse_cs_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
-#define vp8_get4x4sse_cs vp8_get4x4sse_cs_mmx
-
-unsigned int vp8_get_mb_ss_c(const short *);
-unsigned int vp8_get_mb_ss_mmx(const short *);
-unsigned int vp8_get_mb_ss_sse2(const short *);
-#define vp8_get_mb_ss vp8_get_mb_ss_sse2
-
void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
#define vp8_intra4x4_predict vp8_intra4x4_predict_c
@@ -218,11 +209,6 @@ int vp8_mbuverror_mmx(struct macroblock *mb);
int vp8_mbuverror_xmm(struct macroblock *mb);
#define vp8_mbuverror vp8_mbuverror_xmm
-unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_mse16x16 vp8_mse16x16_wmt
-
void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
void vp8_plane_add_noise_mmx(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
void vp8_plane_add_noise_wmt(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
@@ -290,11 +276,6 @@ void vp8_sixtap_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, in
void vp8_sixtap_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
-unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-unsigned int vp8_sub_pixel_mse16x16_mmx(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-unsigned int vp8_sub_pixel_mse16x16_wmt(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-#define vp8_sub_pixel_mse16x16 vp8_sub_pixel_mse16x16_wmt
-
unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
unsigned int vp8_sub_pixel_variance16x16_mmx(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
unsigned int vp8_sub_pixel_variance16x16_wmt(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
@@ -337,31 +318,6 @@ void vp8_subtract_mby_mmx(short *diff, unsigned char *src, int src_stride, unsig
void vp8_subtract_mby_sse2(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride);
#define vp8_subtract_mby vp8_subtract_mby_sse2
-unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance16x16 vp8_variance16x16_wmt
-
-unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance16x8 vp8_variance16x8_wmt
-
-unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance4x4_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance4x4_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance4x4 vp8_variance4x4_wmt
-
-unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance8x16 vp8_variance8x16_wmt
-
-unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance8x8 vp8_variance8x8_wmt
-
unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
unsigned int vp8_variance_halfpixvar16x16_h_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
unsigned int vp8_variance_halfpixvar16x16_h_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
diff --git a/chromium/third_party/libvpx/source/config/mac/x64/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/mac/x64/vp9_rtcd.h
index 10a6b8401db..0834e762f34 100644
--- a/chromium/third_party/libvpx/source/config/mac/x64/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/mac/x64/vp9_rtcd.h
@@ -116,7 +116,8 @@ void vp9_d153_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint
RTCD_EXTERN void (*vp9_d153_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_d153_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d153_predictor_32x32 vp9_d153_predictor_32x32_c
+void vp9_d153_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_d153_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_d153_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
@@ -321,19 +322,6 @@ void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
void vp9_fwht4x4_mmx(const int16_t *input, tran_low_t *output, int stride);
#define vp9_fwht4x4 vp9_fwht4x4_mmx
-void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-RTCD_EXTERN void (*vp9_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-
-void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get8x8var vp9_get8x8var_sse2
-
-unsigned int vp9_get_mb_ss_c(const int16_t *);
-unsigned int vp9_get_mb_ss_sse2(const int16_t *);
-#define vp9_get_mb_ss vp9_get_mb_ss_sse2
-
void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_h_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vp9_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
@@ -488,23 +476,6 @@ void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *mi
void vp9_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max);
#define vp9_minmax_8x8 vp9_minmax_8x8_sse2
-unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-
-unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse16x8 vp9_mse16x8_sse2
-
-unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-unsigned int vp9_mse8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse8x16 vp9_mse8x16_sse2
-
-unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-unsigned int vp9_mse8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse8x8 vp9_mse8x8_sse2
-
void vp9_plane_add_noise_c(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
void vp9_plane_add_noise_wmt(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
#define vp9_plane_add_noise vp9_plane_add_noise_wmt
@@ -709,63 +680,6 @@ void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov
void vp9_v_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vp9_v_predictor_8x8 vp9_v_predictor_8x8_sse
-unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x32 vp9_variance16x32_sse2
-
-unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x8 vp9_variance16x8_sse2
-
-unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x64 vp9_variance32x64_sse2
-
-unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x4 vp9_variance4x4_sse2
-
-unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x8 vp9_variance4x8_sse2
-
-unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x16 vp9_variance8x16_sse2
-
-unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x4 vp9_variance8x4_sse2
-
-unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x8 vp9_variance8x8_sse2
-
int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl);
int vp9_vector_var_sse2(int16_t const *ref, int16_t const *src, const int bwl);
#define vp9_vector_var vp9_vector_var_sse2
@@ -799,6 +713,8 @@ static void setup_rtcd_internal(void)
if (flags & HAS_AVX2) vp9_convolve8_vert = vp9_convolve8_vert_avx2;
vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_c;
if (flags & HAS_SSSE3) vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_ssse3;
+ vp9_d153_predictor_32x32 = vp9_d153_predictor_32x32_c;
+ if (flags & HAS_SSSE3) vp9_d153_predictor_32x32 = vp9_d153_predictor_32x32_ssse3;
vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_c;
if (flags & HAS_SSSE3) vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_ssse3;
vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_c;
@@ -838,8 +754,6 @@ static void setup_rtcd_internal(void)
vp9_full_search_sad = vp9_full_search_sad_c;
if (flags & HAS_SSE3) vp9_full_search_sad = vp9_full_search_sadx3;
if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8;
- vp9_get16x16var = vp9_get16x16var_sse2;
- if (flags & HAS_AVX2) vp9_get16x16var = vp9_get16x16var_avx2;
vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c;
if (flags & HAS_SSSE3) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_ssse3;
vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c;
@@ -856,8 +770,6 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSSE3) vp9_idct8x8_64_add = vp9_idct8x8_64_add_ssse3;
vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_sse2;
if (flags & HAS_AVX2) vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_avx2;
- vp9_mse16x16 = vp9_mse16x16_sse2;
- if (flags & HAS_AVX2) vp9_mse16x16 = vp9_mse16x16_avx2;
vp9_quantize_b = vp9_quantize_b_sse2;
if (flags & HAS_SSSE3) vp9_quantize_b = vp9_quantize_b_ssse3;
vp9_quantize_b_32x32 = vp9_quantize_b_32x32_c;
@@ -922,16 +834,6 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_ssse3;
vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_sse2;
if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_ssse3;
- vp9_variance16x16 = vp9_variance16x16_sse2;
- if (flags & HAS_AVX2) vp9_variance16x16 = vp9_variance16x16_avx2;
- vp9_variance32x16 = vp9_variance32x16_sse2;
- if (flags & HAS_AVX2) vp9_variance32x16 = vp9_variance32x16_avx2;
- vp9_variance32x32 = vp9_variance32x32_sse2;
- if (flags & HAS_AVX2) vp9_variance32x32 = vp9_variance32x32_avx2;
- vp9_variance64x32 = vp9_variance64x32_sse2;
- if (flags & HAS_AVX2) vp9_variance64x32 = vp9_variance64x32_avx2;
- vp9_variance64x64 = vp9_variance64x64_sse2;
- if (flags & HAS_AVX2) vp9_variance64x64 = vp9_variance64x64_avx2;
}
#endif
diff --git a/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.asm b/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.asm
index 68a0d73808d..0c4b550b78a 100644
--- a/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.asm
@@ -20,7 +20,6 @@
%define HAVE_AVX2 1
%define HAVE_VPX_PORTS 1
%define HAVE_STDINT_H 1
-%define HAVE_ALT_TREE_LAYOUT 0
%define HAVE_PTHREAD_H 1
%define HAVE_SYS_MMAN_H 1
%define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.c b/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.c
index 62f0db7d0c7..cba8a3d5809 100644
--- a/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.c
+++ b/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.c
@@ -5,5 +5,6 @@
/* tree. An additional intellectual property rights grant can be found */
/* in the file PATENTS. All contributing project authors may */
/* be found in the AUTHORS file in the root of the source tree. */
+#include "vpx/vpx_codec.h"
static const char* const cfg = "--target=x86_64-darwin9-gcc --enable-pic --enable-realtime-only --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --enable-vp9-temporal-denoising --enable-vp9-postproc --size-limit=16384x16384";
const char *vpx_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.h b/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.h
index 72f336395ce..27d91efdbd4 100644
--- a/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/mac/x64/vpx_config.h
@@ -32,7 +32,6 @@
#define HAVE_AVX2 1
#define HAVE_VPX_PORTS 1
#define HAVE_STDINT_H 1
-#define HAVE_ALT_TREE_LAYOUT 0
#define HAVE_PTHREAD_H 1
#define HAVE_SYS_MMAN_H 1
#define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/mac/x64/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/mac/x64/vpx_dsp_rtcd.h
index b5df5e08c57..d93c56eb765 100644
--- a/chromium/third_party/libvpx/source/config/mac/x64/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/mac/x64/vpx_dsp_rtcd.h
@@ -18,6 +18,45 @@
extern "C" {
#endif
+void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride);
+#define vpx_comp_avg_pred vpx_comp_avg_pred_c
+
+void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+RTCD_EXTERN void (*vpx_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+
+unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
+#define vpx_get4x4sse_cs vpx_get4x4sse_cs_c
+
+void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get8x8var_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vpx_get8x8var vpx_get8x8var_sse2
+
+unsigned int vpx_get_mb_ss_c(const int16_t *);
+unsigned int vpx_get_mb_ss_mmx(const int16_t *);
+unsigned int vpx_get_mb_ss_sse2(const int16_t *);
+#define vpx_get_mb_ss vpx_get_mb_ss_sse2
+
+unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+
+unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse16x8 vpx_mse16x8_sse2
+
+unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse8x16 vpx_mse8x16_sse2
+
+unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse8x8 vpx_mse8x8_sse2
+
unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad16x16_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
@@ -251,6 +290,68 @@ void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_p
void vpx_sad8x8x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
RTCD_EXTERN void (*vpx_sad8x8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
+unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x32 vpx_variance16x32_sse2
+
+unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x8 vpx_variance16x8_sse2
+
+unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x64 vpx_variance32x64_sse2
+
+unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x4_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x4 vpx_variance4x4_sse2
+
+unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x8 vpx_variance4x8_sse2
+
+unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x16 vpx_variance8x16_sse2
+
+unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x4 vpx_variance8x4_sse2
+
+unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x8 vpx_variance8x8_sse2
+
void vpx_dsp_rtcd(void);
#ifdef RTCD_C
@@ -261,6 +362,10 @@ static void setup_rtcd_internal(void)
(void)flags;
+ vpx_get16x16var = vpx_get16x16var_sse2;
+ if (flags & HAS_AVX2) vpx_get16x16var = vpx_get16x16var_avx2;
+ vpx_mse16x16 = vpx_mse16x16_sse2;
+ if (flags & HAS_AVX2) vpx_mse16x16 = vpx_mse16x16_avx2;
vpx_sad16x16x3 = vpx_sad16x16x3_c;
if (flags & HAS_SSE3) vpx_sad16x16x3 = vpx_sad16x16x3_sse3;
if (flags & HAS_SSSE3) vpx_sad16x16x3 = vpx_sad16x16x3_ssse3;
@@ -307,6 +412,16 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSE3) vpx_sad8x8x3 = vpx_sad8x8x3_sse3;
vpx_sad8x8x8 = vpx_sad8x8x8_c;
if (flags & HAS_SSE4_1) vpx_sad8x8x8 = vpx_sad8x8x8_sse4_1;
+ vpx_variance16x16 = vpx_variance16x16_sse2;
+ if (flags & HAS_AVX2) vpx_variance16x16 = vpx_variance16x16_avx2;
+ vpx_variance32x16 = vpx_variance32x16_sse2;
+ if (flags & HAS_AVX2) vpx_variance32x16 = vpx_variance32x16_avx2;
+ vpx_variance32x32 = vpx_variance32x32_sse2;
+ if (flags & HAS_AVX2) vpx_variance32x32 = vpx_variance32x32_avx2;
+ vpx_variance64x32 = vpx_variance64x32_sse2;
+ if (flags & HAS_AVX2) vpx_variance64x32 = vpx_variance64x32_avx2;
+ vpx_variance64x64 = vpx_variance64x64_sse2;
+ if (flags & HAS_AVX2) vpx_variance64x64 = vpx_variance64x64_avx2;
}
#endif
diff --git a/chromium/third_party/libvpx/source/config/nacl/vp8_rtcd.h b/chromium/third_party/libvpx/source/config/nacl/vp8_rtcd.h
index 1bd7496fe55..7a33ed2effb 100644
--- a/chromium/third_party/libvpx/source/config/nacl/vp8_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/nacl/vp8_rtcd.h
@@ -107,12 +107,6 @@ void vp8_filter_by_weight8x8_c(unsigned char *src, int src_stride, unsigned char
int vp8_full_search_sad_c(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
#define vp8_full_search_sad vp8_full_search_sad_c
-unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
-#define vp8_get4x4sse_cs vp8_get4x4sse_cs_c
-
-unsigned int vp8_get_mb_ss_c(const short *);
-#define vp8_get_mb_ss vp8_get_mb_ss_c
-
void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
#define vp8_intra4x4_predict vp8_intra4x4_predict_c
@@ -152,9 +146,6 @@ void vp8_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols,in
int vp8_mbuverror_c(struct macroblock *mb);
#define vp8_mbuverror vp8_mbuverror_c
-unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_mse16x16 vp8_mse16x16_c
-
void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
#define vp8_plane_add_noise vp8_plane_add_noise_c
@@ -197,9 +188,6 @@ void vp8_sixtap_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int y
void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
#define vp8_sixtap_predict8x8 vp8_sixtap_predict8x8_c
-unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-#define vp8_sub_pixel_mse16x16 vp8_sub_pixel_mse16x16_c
-
unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
#define vp8_sub_pixel_variance16x16 vp8_sub_pixel_variance16x16_c
@@ -224,21 +212,6 @@ void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc,
void vp8_subtract_mby_c(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride);
#define vp8_subtract_mby vp8_subtract_mby_c
-unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance16x16 vp8_variance16x16_c
-
-unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance16x8 vp8_variance16x8_c
-
-unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance4x4 vp8_variance4x4_c
-
-unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance8x16 vp8_variance8x16_c
-
-unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance8x8 vp8_variance8x8_c
-
unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_c
diff --git a/chromium/third_party/libvpx/source/config/nacl/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/nacl/vp9_rtcd.h
index 84bd45f2876..8cf86073577 100644
--- a/chromium/third_party/libvpx/source/config/nacl/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/nacl/vp9_rtcd.h
@@ -245,15 +245,6 @@ int vp9_full_search_sad_c(const struct macroblock *x, const struct mv *ref_mv, i
void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
#define vp9_fwht4x4 vp9_fwht4x4_c
-void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get16x16var vp9_get16x16var_c
-
-void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get8x8var vp9_get8x8var_c
-
-unsigned int vp9_get_mb_ss_c(const int16_t *);
-#define vp9_get_mb_ss vp9_get_mb_ss_c
-
void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vp9_h_predictor_16x16 vp9_h_predictor_16x16_c
@@ -368,18 +359,6 @@ void vp9_mbpost_proc_down_c(uint8_t *dst, int pitch, int rows, int cols, int fli
void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max);
#define vp9_minmax_8x8 vp9_minmax_8x8_c
-unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse16x16 vp9_mse16x16_c
-
-unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse16x8 vp9_mse16x8_c
-
-unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse8x16 vp9_mse8x16_c
-
-unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse8x8 vp9_mse8x8_c
-
void vp9_plane_add_noise_c(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
#define vp9_plane_add_noise vp9_plane_add_noise_c
@@ -509,45 +488,6 @@ void vp9_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov
void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vp9_v_predictor_8x8 vp9_v_predictor_8x8_c
-unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x16 vp9_variance16x16_c
-
-unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x32 vp9_variance16x32_c
-
-unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x8 vp9_variance16x8_c
-
-unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x16 vp9_variance32x16_c
-
-unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x32 vp9_variance32x32_c
-
-unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x64 vp9_variance32x64_c
-
-unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x4 vp9_variance4x4_c
-
-unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x8 vp9_variance4x8_c
-
-unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance64x32 vp9_variance64x32_c
-
-unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance64x64 vp9_variance64x64_c
-
-unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x16 vp9_variance8x16_c
-
-unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x4 vp9_variance8x4_c
-
-unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x8 vp9_variance8x8_c
-
int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl);
#define vp9_vector_var vp9_vector_var_c
diff --git a/chromium/third_party/libvpx/source/config/nacl/vpx_config.asm b/chromium/third_party/libvpx/source/config/nacl/vpx_config.asm
index 20db06465bf..a2a18fdc7fd 100644
--- a/chromium/third_party/libvpx/source/config/nacl/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/nacl/vpx_config.asm
@@ -23,7 +23,6 @@
.equ HAVE_AVX2 , 0
.equ HAVE_VPX_PORTS , 1
.equ HAVE_STDINT_H , 1
-.equ HAVE_ALT_TREE_LAYOUT , 0
.equ HAVE_PTHREAD_H , 1
.equ HAVE_SYS_MMAN_H , 1
.equ HAVE_UNISTD_H , 0
diff --git a/chromium/third_party/libvpx/source/config/nacl/vpx_config.c b/chromium/third_party/libvpx/source/config/nacl/vpx_config.c
index 0e8d7265f36..2a18a45e3e3 100644
--- a/chromium/third_party/libvpx/source/config/nacl/vpx_config.c
+++ b/chromium/third_party/libvpx/source/config/nacl/vpx_config.c
@@ -5,5 +5,6 @@
/* tree. An additional intellectual property rights grant can be found */
/* in the file PATENTS. All contributing project authors may */
/* be found in the AUTHORS file in the root of the source tree. */
+#include "vpx/vpx_codec.h"
static const char* const cfg = "--target=generic-gnu --enable-pic --enable-realtime-only --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --enable-vp9-temporal-denoising --enable-vp9-postproc --size-limit=16384x16384";
const char *vpx_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libvpx/source/config/nacl/vpx_config.h b/chromium/third_party/libvpx/source/config/nacl/vpx_config.h
index ec8222e18c2..290508c1c88 100644
--- a/chromium/third_party/libvpx/source/config/nacl/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/nacl/vpx_config.h
@@ -32,7 +32,6 @@
#define HAVE_AVX2 0
#define HAVE_VPX_PORTS 1
#define HAVE_STDINT_H 1
-#define HAVE_ALT_TREE_LAYOUT 0
#define HAVE_PTHREAD_H 1
#define HAVE_SYS_MMAN_H 1
#define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/nacl/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/nacl/vpx_dsp_rtcd.h
index 86c5b4425dc..f086946da23 100644
--- a/chromium/third_party/libvpx/source/config/nacl/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/nacl/vpx_dsp_rtcd.h
@@ -18,6 +18,33 @@
extern "C" {
#endif
+void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride);
+#define vpx_comp_avg_pred vpx_comp_avg_pred_c
+
+void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vpx_get16x16var vpx_get16x16var_c
+
+unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
+#define vpx_get4x4sse_cs vpx_get4x4sse_cs_c
+
+void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vpx_get8x8var vpx_get8x8var_c
+
+unsigned int vpx_get_mb_ss_c(const int16_t *);
+#define vpx_get_mb_ss vpx_get_mb_ss_c
+
+unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse16x16 vpx_mse16x16_c
+
+unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse16x8 vpx_mse16x8_c
+
+unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse8x16 vpx_mse8x16_c
+
+unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse8x8 vpx_mse8x8_c
+
unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad16x16 vpx_sad16x16_c
@@ -183,6 +210,45 @@ void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * con
void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
#define vpx_sad8x8x8 vpx_sad8x8x8_c
+unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x16 vpx_variance16x16_c
+
+unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x32 vpx_variance16x32_c
+
+unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x8 vpx_variance16x8_c
+
+unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x16 vpx_variance32x16_c
+
+unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x32 vpx_variance32x32_c
+
+unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x64 vpx_variance32x64_c
+
+unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x4 vpx_variance4x4_c
+
+unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x8 vpx_variance4x8_c
+
+unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance64x32 vpx_variance64x32_c
+
+unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance64x64 vpx_variance64x64_c
+
+unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x16 vpx_variance8x16_c
+
+unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x4 vpx_variance8x4_c
+
+unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x8 vpx_variance8x8_c
+
void vpx_dsp_rtcd(void);
#include "vpx_config.h"
diff --git a/chromium/third_party/libvpx/source/config/win/ia32/vp8_rtcd.h b/chromium/third_party/libvpx/source/config/win/ia32/vp8_rtcd.h
index b60e7ac7301..38fe6d360b0 100644
--- a/chromium/third_party/libvpx/source/config/win/ia32/vp8_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/win/ia32/vp8_rtcd.h
@@ -74,10 +74,10 @@ void vp8_clear_system_state_c();
void vpx_reset_mmx_state();
RTCD_EXTERN void (*vp8_clear_system_state)();
-void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
+void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
void vp8_copy_mem16x16_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
@@ -147,15 +147,6 @@ int vp8_full_search_sadx3(struct macroblock *x, struct block *b, struct blockd *
int vp8_full_search_sadx8(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
RTCD_EXTERN int (*vp8_full_search_sad)(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
-unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
-unsigned int vp8_get4x4sse_cs_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
-RTCD_EXTERN unsigned int (*vp8_get4x4sse_cs)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
-
-unsigned int vp8_get_mb_ss_c(const short *);
-unsigned int vp8_get_mb_ss_mmx(const short *);
-unsigned int vp8_get_mb_ss_sse2(const short *);
-RTCD_EXTERN unsigned int (*vp8_get_mb_ss)(const short *);
-
void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
#define vp8_intra4x4_predict vp8_intra4x4_predict_c
@@ -218,11 +209,6 @@ int vp8_mbuverror_mmx(struct macroblock *mb);
int vp8_mbuverror_xmm(struct macroblock *mb);
RTCD_EXTERN int (*vp8_mbuverror)(struct macroblock *mb);
-unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_mse16x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-
void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
void vp8_plane_add_noise_mmx(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
void vp8_plane_add_noise_wmt(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
@@ -290,11 +276,6 @@ void vp8_sixtap_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, in
void vp8_sixtap_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
-unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-unsigned int vp8_sub_pixel_mse16x16_mmx(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-unsigned int vp8_sub_pixel_mse16x16_wmt(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_sub_pixel_mse16x16)(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-
unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
unsigned int vp8_sub_pixel_variance16x16_mmx(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
unsigned int vp8_sub_pixel_variance16x16_wmt(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
@@ -337,31 +318,6 @@ void vp8_subtract_mby_mmx(short *diff, unsigned char *src, int src_stride, unsig
void vp8_subtract_mby_sse2(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride);
RTCD_EXTERN void (*vp8_subtract_mby)(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride);
-unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance16x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance16x8)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance4x4_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance4x4_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance4x4)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance8x16)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp8_variance8x8)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-
unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
unsigned int vp8_variance_halfpixvar16x16_h_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
unsigned int vp8_variance_halfpixvar16x16_h_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
@@ -448,11 +404,6 @@ static void setup_rtcd_internal(void)
vp8_full_search_sad = vp8_full_search_sad_c;
if (flags & HAS_SSE3) vp8_full_search_sad = vp8_full_search_sadx3;
if (flags & HAS_SSE4_1) vp8_full_search_sad = vp8_full_search_sadx8;
- vp8_get4x4sse_cs = vp8_get4x4sse_cs_c;
- if (flags & HAS_MMX) vp8_get4x4sse_cs = vp8_get4x4sse_cs_mmx;
- vp8_get_mb_ss = vp8_get_mb_ss_c;
- if (flags & HAS_MMX) vp8_get_mb_ss = vp8_get_mb_ss_mmx;
- if (flags & HAS_SSE2) vp8_get_mb_ss = vp8_get_mb_ss_sse2;
vp8_loop_filter_bh = vp8_loop_filter_bh_c;
if (flags & HAS_MMX) vp8_loop_filter_bh = vp8_loop_filter_bh_mmx;
if (flags & HAS_SSE2) vp8_loop_filter_bh = vp8_loop_filter_bh_sse2;
@@ -488,9 +439,6 @@ static void setup_rtcd_internal(void)
vp8_mbuverror = vp8_mbuverror_c;
if (flags & HAS_MMX) vp8_mbuverror = vp8_mbuverror_mmx;
if (flags & HAS_SSE2) vp8_mbuverror = vp8_mbuverror_xmm;
- vp8_mse16x16 = vp8_mse16x16_c;
- if (flags & HAS_MMX) vp8_mse16x16 = vp8_mse16x16_mmx;
- if (flags & HAS_SSE2) vp8_mse16x16 = vp8_mse16x16_wmt;
vp8_plane_add_noise = vp8_plane_add_noise_c;
if (flags & HAS_MMX) vp8_plane_add_noise = vp8_plane_add_noise_mmx;
if (flags & HAS_SSE2) vp8_plane_add_noise = vp8_plane_add_noise_wmt;
@@ -529,9 +477,6 @@ static void setup_rtcd_internal(void)
if (flags & HAS_MMX) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_mmx;
if (flags & HAS_SSE2) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_sse2;
if (flags & HAS_SSSE3) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_ssse3;
- vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_c;
- if (flags & HAS_MMX) vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_mmx;
- if (flags & HAS_SSE2) vp8_sub_pixel_mse16x16 = vp8_sub_pixel_mse16x16_wmt;
vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_c;
if (flags & HAS_MMX) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_mmx;
if (flags & HAS_SSE2) vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_wmt;
@@ -558,21 +503,6 @@ static void setup_rtcd_internal(void)
vp8_subtract_mby = vp8_subtract_mby_c;
if (flags & HAS_MMX) vp8_subtract_mby = vp8_subtract_mby_mmx;
if (flags & HAS_SSE2) vp8_subtract_mby = vp8_subtract_mby_sse2;
- vp8_variance16x16 = vp8_variance16x16_c;
- if (flags & HAS_MMX) vp8_variance16x16 = vp8_variance16x16_mmx;
- if (flags & HAS_SSE2) vp8_variance16x16 = vp8_variance16x16_wmt;
- vp8_variance16x8 = vp8_variance16x8_c;
- if (flags & HAS_MMX) vp8_variance16x8 = vp8_variance16x8_mmx;
- if (flags & HAS_SSE2) vp8_variance16x8 = vp8_variance16x8_wmt;
- vp8_variance4x4 = vp8_variance4x4_c;
- if (flags & HAS_MMX) vp8_variance4x4 = vp8_variance4x4_mmx;
- if (flags & HAS_SSE2) vp8_variance4x4 = vp8_variance4x4_wmt;
- vp8_variance8x16 = vp8_variance8x16_c;
- if (flags & HAS_MMX) vp8_variance8x16 = vp8_variance8x16_mmx;
- if (flags & HAS_SSE2) vp8_variance8x16 = vp8_variance8x16_wmt;
- vp8_variance8x8 = vp8_variance8x8_c;
- if (flags & HAS_MMX) vp8_variance8x8 = vp8_variance8x8_mmx;
- if (flags & HAS_SSE2) vp8_variance8x8 = vp8_variance8x8_wmt;
vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_c;
if (flags & HAS_MMX) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_mmx;
if (flags & HAS_SSE2) vp8_variance_halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_wmt;
diff --git a/chromium/third_party/libvpx/source/config/win/ia32/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/win/ia32/vp9_rtcd.h
index 7425dc281bd..1da20dc1bcc 100644
--- a/chromium/third_party/libvpx/source/config/win/ia32/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/win/ia32/vp9_rtcd.h
@@ -116,7 +116,8 @@ void vp9_d153_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint
RTCD_EXTERN void (*vp9_d153_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_d153_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d153_predictor_32x32 vp9_d153_predictor_32x32_c
+void vp9_d153_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_d153_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_d153_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
@@ -320,19 +321,6 @@ void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
void vp9_fwht4x4_mmx(const int16_t *input, tran_low_t *output, int stride);
RTCD_EXTERN void (*vp9_fwht4x4)(const int16_t *input, tran_low_t *output, int stride);
-void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-RTCD_EXTERN void (*vp9_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-
-void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-RTCD_EXTERN void (*vp9_get8x8var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-
-unsigned int vp9_get_mb_ss_c(const int16_t *);
-unsigned int vp9_get_mb_ss_sse2(const int16_t *);
-RTCD_EXTERN unsigned int (*vp9_get_mb_ss)(const int16_t *);
-
void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_h_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vp9_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
@@ -484,23 +472,6 @@ void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *mi
void vp9_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max);
RTCD_EXTERN void (*vp9_minmax_8x8)(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max);
-unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-
-unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_mse16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-
-unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-unsigned int vp9_mse8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_mse8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-
-unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-unsigned int vp9_mse8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_mse8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-
void vp9_plane_add_noise_c(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
void vp9_plane_add_noise_wmt(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
RTCD_EXTERN void (*vp9_plane_add_noise)(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
@@ -700,63 +671,6 @@ void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov
void vp9_v_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vp9_v_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance16x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance4x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance4x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance8x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl);
int vp9_vector_var_sse2(int16_t const *ref, int16_t const *src, const int bwl);
RTCD_EXTERN int (*vp9_vector_var)(int16_t const *ref, int16_t const *src, const int bwl);
@@ -807,6 +721,8 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSE2) vp9_convolve_copy = vp9_convolve_copy_sse2;
vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_c;
if (flags & HAS_SSSE3) vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_ssse3;
+ vp9_d153_predictor_32x32 = vp9_d153_predictor_32x32_c;
+ if (flags & HAS_SSSE3) vp9_d153_predictor_32x32 = vp9_d153_predictor_32x32_ssse3;
vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_c;
if (flags & HAS_SSSE3) vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_ssse3;
vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_c;
@@ -907,13 +823,6 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8;
vp9_fwht4x4 = vp9_fwht4x4_c;
if (flags & HAS_MMX) vp9_fwht4x4 = vp9_fwht4x4_mmx;
- vp9_get16x16var = vp9_get16x16var_c;
- if (flags & HAS_SSE2) vp9_get16x16var = vp9_get16x16var_sse2;
- if (flags & HAS_AVX2) vp9_get16x16var = vp9_get16x16var_avx2;
- vp9_get8x8var = vp9_get8x8var_c;
- if (flags & HAS_SSE2) vp9_get8x8var = vp9_get8x8var_sse2;
- vp9_get_mb_ss = vp9_get_mb_ss_c;
- if (flags & HAS_SSE2) vp9_get_mb_ss = vp9_get_mb_ss_sse2;
vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c;
if (flags & HAS_SSSE3) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_ssse3;
vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c;
@@ -987,15 +896,6 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSE2) vp9_mbpost_proc_down = vp9_mbpost_proc_down_xmm;
vp9_minmax_8x8 = vp9_minmax_8x8_c;
if (flags & HAS_SSE2) vp9_minmax_8x8 = vp9_minmax_8x8_sse2;
- vp9_mse16x16 = vp9_mse16x16_c;
- if (flags & HAS_SSE2) vp9_mse16x16 = vp9_mse16x16_sse2;
- if (flags & HAS_AVX2) vp9_mse16x16 = vp9_mse16x16_avx2;
- vp9_mse16x8 = vp9_mse16x8_c;
- if (flags & HAS_SSE2) vp9_mse16x8 = vp9_mse16x8_sse2;
- vp9_mse8x16 = vp9_mse8x16_c;
- if (flags & HAS_SSE2) vp9_mse8x16 = vp9_mse8x16_sse2;
- vp9_mse8x8 = vp9_mse8x8_c;
- if (flags & HAS_SSE2) vp9_mse8x8 = vp9_mse8x8_sse2;
vp9_plane_add_noise = vp9_plane_add_noise_c;
if (flags & HAS_SSE2) vp9_plane_add_noise = vp9_plane_add_noise_wmt;
vp9_post_proc_down_and_across = vp9_post_proc_down_and_across_c;
@@ -1106,37 +1006,6 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSE) vp9_v_predictor_4x4 = vp9_v_predictor_4x4_sse;
vp9_v_predictor_8x8 = vp9_v_predictor_8x8_c;
if (flags & HAS_SSE) vp9_v_predictor_8x8 = vp9_v_predictor_8x8_sse;
- vp9_variance16x16 = vp9_variance16x16_c;
- if (flags & HAS_SSE2) vp9_variance16x16 = vp9_variance16x16_sse2;
- if (flags & HAS_AVX2) vp9_variance16x16 = vp9_variance16x16_avx2;
- vp9_variance16x32 = vp9_variance16x32_c;
- if (flags & HAS_SSE2) vp9_variance16x32 = vp9_variance16x32_sse2;
- vp9_variance16x8 = vp9_variance16x8_c;
- if (flags & HAS_SSE2) vp9_variance16x8 = vp9_variance16x8_sse2;
- vp9_variance32x16 = vp9_variance32x16_c;
- if (flags & HAS_SSE2) vp9_variance32x16 = vp9_variance32x16_sse2;
- if (flags & HAS_AVX2) vp9_variance32x16 = vp9_variance32x16_avx2;
- vp9_variance32x32 = vp9_variance32x32_c;
- if (flags & HAS_SSE2) vp9_variance32x32 = vp9_variance32x32_sse2;
- if (flags & HAS_AVX2) vp9_variance32x32 = vp9_variance32x32_avx2;
- vp9_variance32x64 = vp9_variance32x64_c;
- if (flags & HAS_SSE2) vp9_variance32x64 = vp9_variance32x64_sse2;
- vp9_variance4x4 = vp9_variance4x4_c;
- if (flags & HAS_SSE2) vp9_variance4x4 = vp9_variance4x4_sse2;
- vp9_variance4x8 = vp9_variance4x8_c;
- if (flags & HAS_SSE2) vp9_variance4x8 = vp9_variance4x8_sse2;
- vp9_variance64x32 = vp9_variance64x32_c;
- if (flags & HAS_SSE2) vp9_variance64x32 = vp9_variance64x32_sse2;
- if (flags & HAS_AVX2) vp9_variance64x32 = vp9_variance64x32_avx2;
- vp9_variance64x64 = vp9_variance64x64_c;
- if (flags & HAS_SSE2) vp9_variance64x64 = vp9_variance64x64_sse2;
- if (flags & HAS_AVX2) vp9_variance64x64 = vp9_variance64x64_avx2;
- vp9_variance8x16 = vp9_variance8x16_c;
- if (flags & HAS_SSE2) vp9_variance8x16 = vp9_variance8x16_sse2;
- vp9_variance8x4 = vp9_variance8x4_c;
- if (flags & HAS_SSE2) vp9_variance8x4 = vp9_variance8x4_sse2;
- vp9_variance8x8 = vp9_variance8x8_c;
- if (flags & HAS_SSE2) vp9_variance8x8 = vp9_variance8x8_sse2;
vp9_vector_var = vp9_vector_var_c;
if (flags & HAS_SSE2) vp9_vector_var = vp9_vector_var_sse2;
}
diff --git a/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.asm b/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.asm
index 2509394a18d..c817164acd9 100644
--- a/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.asm
@@ -20,7 +20,6 @@
%define HAVE_AVX2 1
%define HAVE_VPX_PORTS 1
%define HAVE_STDINT_H 0
-%define HAVE_ALT_TREE_LAYOUT 0
%define HAVE_PTHREAD_H 0
%define HAVE_SYS_MMAN_H 0
%define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.c b/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.c
index b2271e28dac..d88b6d68609 100644
--- a/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.c
+++ b/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.c
@@ -5,5 +5,6 @@
/* tree. An additional intellectual property rights grant can be found */
/* in the file PATENTS. All contributing project authors may */
/* be found in the AUTHORS file in the root of the source tree. */
+#include "vpx/vpx_codec.h"
static const char* const cfg = "--target=x86-win32-vs12 --enable-realtime-only --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --enable-vp9-temporal-denoising --enable-vp9-postproc --size-limit=16384x16384";
const char *vpx_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.h b/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.h
index fa8997e86c1..e5c65cf5956 100644
--- a/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/win/ia32/vpx_config.h
@@ -32,7 +32,6 @@
#define HAVE_AVX2 1
#define HAVE_VPX_PORTS 1
#define HAVE_STDINT_H 0
-#define HAVE_ALT_TREE_LAYOUT 0
#define HAVE_PTHREAD_H 0
#define HAVE_SYS_MMAN_H 0
#define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/win/ia32/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/win/ia32/vpx_dsp_rtcd.h
index 8cc2fa5b737..32ee77e25ce 100644
--- a/chromium/third_party/libvpx/source/config/win/ia32/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/win/ia32/vpx_dsp_rtcd.h
@@ -18,6 +18,45 @@
extern "C" {
#endif
+void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride);
+#define vpx_comp_avg_pred vpx_comp_avg_pred_c
+
+void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+RTCD_EXTERN void (*vpx_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+
+unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
+#define vpx_get4x4sse_cs vpx_get4x4sse_cs_c
+
+void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get8x8var_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+RTCD_EXTERN void (*vpx_get8x8var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+
+unsigned int vpx_get_mb_ss_c(const int16_t *);
+unsigned int vpx_get_mb_ss_mmx(const int16_t *);
+unsigned int vpx_get_mb_ss_sse2(const int16_t *);
+RTCD_EXTERN unsigned int (*vpx_get_mb_ss)(const int16_t *);
+
+unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+
+unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_mse16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+
+unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_mse8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+
+unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_mse8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+
unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad16x16_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
@@ -251,6 +290,68 @@ void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_p
void vpx_sad8x8x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
RTCD_EXTERN void (*vpx_sad8x8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
+unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance16x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance16x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x4_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance4x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance4x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance8x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance8x4)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance8x8)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
void vpx_dsp_rtcd(void);
#ifdef RTCD_C
@@ -261,6 +362,25 @@ static void setup_rtcd_internal(void)
(void)flags;
+ vpx_get16x16var = vpx_get16x16var_c;
+ if (flags & HAS_SSE2) vpx_get16x16var = vpx_get16x16var_sse2;
+ if (flags & HAS_AVX2) vpx_get16x16var = vpx_get16x16var_avx2;
+ vpx_get8x8var = vpx_get8x8var_c;
+ if (flags & HAS_MMX) vpx_get8x8var = vpx_get8x8var_mmx;
+ if (flags & HAS_SSE2) vpx_get8x8var = vpx_get8x8var_sse2;
+ vpx_get_mb_ss = vpx_get_mb_ss_c;
+ if (flags & HAS_MMX) vpx_get_mb_ss = vpx_get_mb_ss_mmx;
+ if (flags & HAS_SSE2) vpx_get_mb_ss = vpx_get_mb_ss_sse2;
+ vpx_mse16x16 = vpx_mse16x16_c;
+ if (flags & HAS_MMX) vpx_mse16x16 = vpx_mse16x16_mmx;
+ if (flags & HAS_SSE2) vpx_mse16x16 = vpx_mse16x16_sse2;
+ if (flags & HAS_AVX2) vpx_mse16x16 = vpx_mse16x16_avx2;
+ vpx_mse16x8 = vpx_mse16x8_c;
+ if (flags & HAS_SSE2) vpx_mse16x8 = vpx_mse16x8_sse2;
+ vpx_mse8x16 = vpx_mse8x16_c;
+ if (flags & HAS_SSE2) vpx_mse8x16 = vpx_mse8x16_sse2;
+ vpx_mse8x8 = vpx_mse8x8_c;
+ if (flags & HAS_SSE2) vpx_mse8x8 = vpx_mse8x8_sse2;
vpx_sad16x16 = vpx_sad16x16_c;
if (flags & HAS_MMX) vpx_sad16x16 = vpx_sad16x16_mmx;
if (flags & HAS_SSE2) vpx_sad16x16 = vpx_sad16x16_sse2;
@@ -378,6 +498,42 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSE2) vpx_sad8x8x4d = vpx_sad8x8x4d_sse2;
vpx_sad8x8x8 = vpx_sad8x8x8_c;
if (flags & HAS_SSE4_1) vpx_sad8x8x8 = vpx_sad8x8x8_sse4_1;
+ vpx_variance16x16 = vpx_variance16x16_c;
+ if (flags & HAS_MMX) vpx_variance16x16 = vpx_variance16x16_mmx;
+ if (flags & HAS_SSE2) vpx_variance16x16 = vpx_variance16x16_sse2;
+ if (flags & HAS_AVX2) vpx_variance16x16 = vpx_variance16x16_avx2;
+ vpx_variance16x32 = vpx_variance16x32_c;
+ if (flags & HAS_SSE2) vpx_variance16x32 = vpx_variance16x32_sse2;
+ vpx_variance16x8 = vpx_variance16x8_c;
+ if (flags & HAS_MMX) vpx_variance16x8 = vpx_variance16x8_mmx;
+ if (flags & HAS_SSE2) vpx_variance16x8 = vpx_variance16x8_sse2;
+ vpx_variance32x16 = vpx_variance32x16_c;
+ if (flags & HAS_SSE2) vpx_variance32x16 = vpx_variance32x16_sse2;
+ if (flags & HAS_AVX2) vpx_variance32x16 = vpx_variance32x16_avx2;
+ vpx_variance32x32 = vpx_variance32x32_c;
+ if (flags & HAS_SSE2) vpx_variance32x32 = vpx_variance32x32_sse2;
+ if (flags & HAS_AVX2) vpx_variance32x32 = vpx_variance32x32_avx2;
+ vpx_variance32x64 = vpx_variance32x64_c;
+ if (flags & HAS_SSE2) vpx_variance32x64 = vpx_variance32x64_sse2;
+ vpx_variance4x4 = vpx_variance4x4_c;
+ if (flags & HAS_MMX) vpx_variance4x4 = vpx_variance4x4_mmx;
+ if (flags & HAS_SSE2) vpx_variance4x4 = vpx_variance4x4_sse2;
+ vpx_variance4x8 = vpx_variance4x8_c;
+ if (flags & HAS_SSE2) vpx_variance4x8 = vpx_variance4x8_sse2;
+ vpx_variance64x32 = vpx_variance64x32_c;
+ if (flags & HAS_SSE2) vpx_variance64x32 = vpx_variance64x32_sse2;
+ if (flags & HAS_AVX2) vpx_variance64x32 = vpx_variance64x32_avx2;
+ vpx_variance64x64 = vpx_variance64x64_c;
+ if (flags & HAS_SSE2) vpx_variance64x64 = vpx_variance64x64_sse2;
+ if (flags & HAS_AVX2) vpx_variance64x64 = vpx_variance64x64_avx2;
+ vpx_variance8x16 = vpx_variance8x16_c;
+ if (flags & HAS_MMX) vpx_variance8x16 = vpx_variance8x16_mmx;
+ if (flags & HAS_SSE2) vpx_variance8x16 = vpx_variance8x16_sse2;
+ vpx_variance8x4 = vpx_variance8x4_c;
+ if (flags & HAS_SSE2) vpx_variance8x4 = vpx_variance8x4_sse2;
+ vpx_variance8x8 = vpx_variance8x8_c;
+ if (flags & HAS_MMX) vpx_variance8x8 = vpx_variance8x8_mmx;
+ if (flags & HAS_SSE2) vpx_variance8x8 = vpx_variance8x8_sse2;
}
#endif
diff --git a/chromium/third_party/libvpx/source/config/win/x64/vp8_rtcd.h b/chromium/third_party/libvpx/source/config/win/x64/vp8_rtcd.h
index 9989eb09995..5285ac71055 100644
--- a/chromium/third_party/libvpx/source/config/win/x64/vp8_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/win/x64/vp8_rtcd.h
@@ -74,10 +74,10 @@ void vp8_clear_system_state_c();
void vpx_reset_mmx_state();
#define vp8_clear_system_state vpx_reset_mmx_state
-void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
-RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n);
+void vp8_copy32xn_c(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+void vp8_copy32xn_sse2(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+void vp8_copy32xn_sse3(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
+RTCD_EXTERN void (*vp8_copy32xn)(const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n);
void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
void vp8_copy_mem16x16_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
@@ -147,15 +147,6 @@ int vp8_full_search_sadx3(struct macroblock *x, struct block *b, struct blockd *
int vp8_full_search_sadx8(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
RTCD_EXTERN int (*vp8_full_search_sad)(struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int sad_per_bit, int distance, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv);
-unsigned int vp8_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
-unsigned int vp8_get4x4sse_cs_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
-#define vp8_get4x4sse_cs vp8_get4x4sse_cs_mmx
-
-unsigned int vp8_get_mb_ss_c(const short *);
-unsigned int vp8_get_mb_ss_mmx(const short *);
-unsigned int vp8_get_mb_ss_sse2(const short *);
-#define vp8_get_mb_ss vp8_get_mb_ss_sse2
-
void vp8_intra4x4_predict_c(unsigned char *Above, unsigned char *yleft, int left_stride, int b_mode, unsigned char *dst, int dst_stride, unsigned char top_left);
#define vp8_intra4x4_predict vp8_intra4x4_predict_c
@@ -218,11 +209,6 @@ int vp8_mbuverror_mmx(struct macroblock *mb);
int vp8_mbuverror_xmm(struct macroblock *mb);
#define vp8_mbuverror vp8_mbuverror_xmm
-unsigned int vp8_mse16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_mse16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_mse16x16 vp8_mse16x16_wmt
-
void vp8_plane_add_noise_c(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
void vp8_plane_add_noise_mmx(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
void vp8_plane_add_noise_wmt(unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch);
@@ -290,11 +276,6 @@ void vp8_sixtap_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, in
void vp8_sixtap_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
-unsigned int vp8_sub_pixel_mse16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-unsigned int vp8_sub_pixel_mse16x16_mmx(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-unsigned int vp8_sub_pixel_mse16x16_wmt(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
-#define vp8_sub_pixel_mse16x16 vp8_sub_pixel_mse16x16_wmt
-
unsigned int vp8_sub_pixel_variance16x16_c(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
unsigned int vp8_sub_pixel_variance16x16_mmx(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
unsigned int vp8_sub_pixel_variance16x16_wmt(const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse);
@@ -337,31 +318,6 @@ void vp8_subtract_mby_mmx(short *diff, unsigned char *src, int src_stride, unsig
void vp8_subtract_mby_sse2(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride);
#define vp8_subtract_mby vp8_subtract_mby_sse2
-unsigned int vp8_variance16x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance16x16 vp8_variance16x16_wmt
-
-unsigned int vp8_variance16x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance16x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance16x8 vp8_variance16x8_wmt
-
-unsigned int vp8_variance4x4_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance4x4_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance4x4_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance4x4 vp8_variance4x4_wmt
-
-unsigned int vp8_variance8x16_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x16_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x16_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance8x16 vp8_variance8x16_wmt
-
-unsigned int vp8_variance8x8_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp8_variance8x8_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp8_variance8x8 vp8_variance8x8_wmt
-
unsigned int vp8_variance_halfpixvar16x16_h_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
unsigned int vp8_variance_halfpixvar16x16_h_mmx(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
unsigned int vp8_variance_halfpixvar16x16_h_wmt(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse);
diff --git a/chromium/third_party/libvpx/source/config/win/x64/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/win/x64/vp9_rtcd.h
index 10a6b8401db..0834e762f34 100644
--- a/chromium/third_party/libvpx/source/config/win/x64/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/win/x64/vp9_rtcd.h
@@ -116,7 +116,8 @@ void vp9_d153_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint
RTCD_EXTERN void (*vp9_d153_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_d153_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
-#define vp9_d153_predictor_32x32 vp9_d153_predictor_32x32_c
+void vp9_d153_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
+RTCD_EXTERN void (*vp9_d153_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_d153_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
@@ -321,19 +322,6 @@ void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride);
void vp9_fwht4x4_mmx(const int16_t *input, tran_low_t *output, int stride);
#define vp9_fwht4x4 vp9_fwht4x4_mmx
-void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-RTCD_EXTERN void (*vp9_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-
-void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-void vp9_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
-#define vp9_get8x8var vp9_get8x8var_sse2
-
-unsigned int vp9_get_mb_ss_c(const int16_t *);
-unsigned int vp9_get_mb_ss_sse2(const int16_t *);
-#define vp9_get_mb_ss vp9_get_mb_ss_sse2
-
void vp9_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vp9_h_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vp9_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
@@ -488,23 +476,6 @@ void vp9_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *mi
void vp9_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max);
#define vp9_minmax_8x8 vp9_minmax_8x8_sse2
-unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-
-unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-unsigned int vp9_mse16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse16x8 vp9_mse16x8_sse2
-
-unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-unsigned int vp9_mse8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse8x16 vp9_mse8x16_sse2
-
-unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-unsigned int vp9_mse8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
-#define vp9_mse8x8 vp9_mse8x8_sse2
-
void vp9_plane_add_noise_c(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
void vp9_plane_add_noise_wmt(uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch);
#define vp9_plane_add_noise vp9_plane_add_noise_wmt
@@ -709,63 +680,6 @@ void vp9_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *abov
void vp9_v_predictor_8x8_sse(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vp9_v_predictor_8x8 vp9_v_predictor_8x8_sse
-unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x32 vp9_variance16x32_sse2
-
-unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance16x8 vp9_variance16x8_sse2
-
-unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance32x64 vp9_variance32x64_sse2
-
-unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x4 vp9_variance4x4_sse2
-
-unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance4x8 vp9_variance4x8_sse2
-
-unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-RTCD_EXTERN unsigned int (*vp9_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-
-unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x16 vp9_variance8x16_sse2
-
-unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x4 vp9_variance8x4_sse2
-
-unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-unsigned int vp9_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
-#define vp9_variance8x8 vp9_variance8x8_sse2
-
int vp9_vector_var_c(int16_t const *ref, int16_t const *src, const int bwl);
int vp9_vector_var_sse2(int16_t const *ref, int16_t const *src, const int bwl);
#define vp9_vector_var vp9_vector_var_sse2
@@ -799,6 +713,8 @@ static void setup_rtcd_internal(void)
if (flags & HAS_AVX2) vp9_convolve8_vert = vp9_convolve8_vert_avx2;
vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_c;
if (flags & HAS_SSSE3) vp9_d153_predictor_16x16 = vp9_d153_predictor_16x16_ssse3;
+ vp9_d153_predictor_32x32 = vp9_d153_predictor_32x32_c;
+ if (flags & HAS_SSSE3) vp9_d153_predictor_32x32 = vp9_d153_predictor_32x32_ssse3;
vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_c;
if (flags & HAS_SSSE3) vp9_d153_predictor_4x4 = vp9_d153_predictor_4x4_ssse3;
vp9_d153_predictor_8x8 = vp9_d153_predictor_8x8_c;
@@ -838,8 +754,6 @@ static void setup_rtcd_internal(void)
vp9_full_search_sad = vp9_full_search_sad_c;
if (flags & HAS_SSE3) vp9_full_search_sad = vp9_full_search_sadx3;
if (flags & HAS_SSE4_1) vp9_full_search_sad = vp9_full_search_sadx8;
- vp9_get16x16var = vp9_get16x16var_sse2;
- if (flags & HAS_AVX2) vp9_get16x16var = vp9_get16x16var_avx2;
vp9_h_predictor_16x16 = vp9_h_predictor_16x16_c;
if (flags & HAS_SSSE3) vp9_h_predictor_16x16 = vp9_h_predictor_16x16_ssse3;
vp9_h_predictor_32x32 = vp9_h_predictor_32x32_c;
@@ -856,8 +770,6 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSSE3) vp9_idct8x8_64_add = vp9_idct8x8_64_add_ssse3;
vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_sse2;
if (flags & HAS_AVX2) vp9_lpf_horizontal_16 = vp9_lpf_horizontal_16_avx2;
- vp9_mse16x16 = vp9_mse16x16_sse2;
- if (flags & HAS_AVX2) vp9_mse16x16 = vp9_mse16x16_avx2;
vp9_quantize_b = vp9_quantize_b_sse2;
if (flags & HAS_SSSE3) vp9_quantize_b = vp9_quantize_b_ssse3;
vp9_quantize_b_32x32 = vp9_quantize_b_32x32_c;
@@ -922,16 +834,6 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x4 = vp9_sub_pixel_variance8x4_ssse3;
vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_sse2;
if (flags & HAS_SSSE3) vp9_sub_pixel_variance8x8 = vp9_sub_pixel_variance8x8_ssse3;
- vp9_variance16x16 = vp9_variance16x16_sse2;
- if (flags & HAS_AVX2) vp9_variance16x16 = vp9_variance16x16_avx2;
- vp9_variance32x16 = vp9_variance32x16_sse2;
- if (flags & HAS_AVX2) vp9_variance32x16 = vp9_variance32x16_avx2;
- vp9_variance32x32 = vp9_variance32x32_sse2;
- if (flags & HAS_AVX2) vp9_variance32x32 = vp9_variance32x32_avx2;
- vp9_variance64x32 = vp9_variance64x32_sse2;
- if (flags & HAS_AVX2) vp9_variance64x32 = vp9_variance64x32_avx2;
- vp9_variance64x64 = vp9_variance64x64_sse2;
- if (flags & HAS_AVX2) vp9_variance64x64 = vp9_variance64x64_avx2;
}
#endif
diff --git a/chromium/third_party/libvpx/source/config/win/x64/vpx_config.asm b/chromium/third_party/libvpx/source/config/win/x64/vpx_config.asm
index b2d9a3776f4..27abdda9cb1 100644
--- a/chromium/third_party/libvpx/source/config/win/x64/vpx_config.asm
+++ b/chromium/third_party/libvpx/source/config/win/x64/vpx_config.asm
@@ -20,7 +20,6 @@
%define HAVE_AVX2 1
%define HAVE_VPX_PORTS 1
%define HAVE_STDINT_H 0
-%define HAVE_ALT_TREE_LAYOUT 0
%define HAVE_PTHREAD_H 0
%define HAVE_SYS_MMAN_H 0
%define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/win/x64/vpx_config.c b/chromium/third_party/libvpx/source/config/win/x64/vpx_config.c
index 67a4c84a4d2..6dd3005d3c5 100644
--- a/chromium/third_party/libvpx/source/config/win/x64/vpx_config.c
+++ b/chromium/third_party/libvpx/source/config/win/x64/vpx_config.c
@@ -5,5 +5,6 @@
/* tree. An additional intellectual property rights grant can be found */
/* in the file PATENTS. All contributing project authors may */
/* be found in the AUTHORS file in the root of the source tree. */
+#include "vpx/vpx_codec.h"
static const char* const cfg = "--target=x86_64-win64-vs12 --enable-realtime-only --enable-external-build --enable-postproc --disable-install-srcs --enable-multi-res-encoding --enable-temporal-denoising --disable-unit-tests --disable-install-docs --disable-examples --enable-vp9-temporal-denoising --enable-vp9-postproc --size-limit=16384x16384";
const char *vpx_codec_build_config(void) {return cfg;}
diff --git a/chromium/third_party/libvpx/source/config/win/x64/vpx_config.h b/chromium/third_party/libvpx/source/config/win/x64/vpx_config.h
index 5e6837cbe9a..d97def150e8 100644
--- a/chromium/third_party/libvpx/source/config/win/x64/vpx_config.h
+++ b/chromium/third_party/libvpx/source/config/win/x64/vpx_config.h
@@ -32,7 +32,6 @@
#define HAVE_AVX2 1
#define HAVE_VPX_PORTS 1
#define HAVE_STDINT_H 0
-#define HAVE_ALT_TREE_LAYOUT 0
#define HAVE_PTHREAD_H 0
#define HAVE_SYS_MMAN_H 0
#define HAVE_UNISTD_H 0
diff --git a/chromium/third_party/libvpx/source/config/win/x64/vpx_dsp_rtcd.h b/chromium/third_party/libvpx/source/config/win/x64/vpx_dsp_rtcd.h
index b5df5e08c57..d93c56eb765 100644
--- a/chromium/third_party/libvpx/source/config/win/x64/vpx_dsp_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/win/x64/vpx_dsp_rtcd.h
@@ -18,6 +18,45 @@
extern "C" {
#endif
+void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride);
+#define vpx_comp_avg_pred vpx_comp_avg_pred_c
+
+void vpx_get16x16var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get16x16var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get16x16var_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+RTCD_EXTERN void (*vpx_get16x16var)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+
+unsigned int vpx_get4x4sse_cs_c(const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride);
+#define vpx_get4x4sse_cs vpx_get4x4sse_cs_c
+
+void vpx_get8x8var_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get8x8var_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+void vpx_get8x8var_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum);
+#define vpx_get8x8var vpx_get8x8var_sse2
+
+unsigned int vpx_get_mb_ss_c(const int16_t *);
+unsigned int vpx_get_mb_ss_mmx(const int16_t *);
+unsigned int vpx_get_mb_ss_sse2(const int16_t *);
+#define vpx_get_mb_ss vpx_get_mb_ss_sse2
+
+unsigned int vpx_mse16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_mse16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+
+unsigned int vpx_mse16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse16x8 vpx_mse16x8_sse2
+
+unsigned int vpx_mse8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse8x16 vpx_mse8x16_sse2
+
+unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+unsigned int vpx_mse8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse);
+#define vpx_mse8x8 vpx_mse8x8_sse2
+
unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad16x16_mmx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
@@ -251,6 +290,68 @@ void vpx_sad8x8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_p
void vpx_sad8x8x8_sse4_1(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
RTCD_EXTERN void (*vpx_sad8x8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array);
+unsigned int vpx_variance16x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance16x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance16x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x32 vpx_variance16x32_sse2
+
+unsigned int vpx_variance16x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance16x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance16x8 vpx_variance16x8_sse2
+
+unsigned int vpx_variance32x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x16_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x16)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance32x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance32x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance32x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance32x64 vpx_variance32x64_sse2
+
+unsigned int vpx_variance4x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x4_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x4 vpx_variance4x4_sse2
+
+unsigned int vpx_variance4x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance4x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance4x8 vpx_variance4x8_sse2
+
+unsigned int vpx_variance64x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x32_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x32_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance64x32)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance64x64_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x64_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance64x64_avx2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+RTCD_EXTERN unsigned int (*vpx_variance64x64)(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+
+unsigned int vpx_variance8x16_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x16_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x16_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x16 vpx_variance8x16_sse2
+
+unsigned int vpx_variance8x4_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x4_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x4 vpx_variance8x4_sse2
+
+unsigned int vpx_variance8x8_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_mmx(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+unsigned int vpx_variance8x8_sse2(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse);
+#define vpx_variance8x8 vpx_variance8x8_sse2
+
void vpx_dsp_rtcd(void);
#ifdef RTCD_C
@@ -261,6 +362,10 @@ static void setup_rtcd_internal(void)
(void)flags;
+ vpx_get16x16var = vpx_get16x16var_sse2;
+ if (flags & HAS_AVX2) vpx_get16x16var = vpx_get16x16var_avx2;
+ vpx_mse16x16 = vpx_mse16x16_sse2;
+ if (flags & HAS_AVX2) vpx_mse16x16 = vpx_mse16x16_avx2;
vpx_sad16x16x3 = vpx_sad16x16x3_c;
if (flags & HAS_SSE3) vpx_sad16x16x3 = vpx_sad16x16x3_sse3;
if (flags & HAS_SSSE3) vpx_sad16x16x3 = vpx_sad16x16x3_ssse3;
@@ -307,6 +412,16 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSE3) vpx_sad8x8x3 = vpx_sad8x8x3_sse3;
vpx_sad8x8x8 = vpx_sad8x8x8_c;
if (flags & HAS_SSE4_1) vpx_sad8x8x8 = vpx_sad8x8x8_sse4_1;
+ vpx_variance16x16 = vpx_variance16x16_sse2;
+ if (flags & HAS_AVX2) vpx_variance16x16 = vpx_variance16x16_avx2;
+ vpx_variance32x16 = vpx_variance32x16_sse2;
+ if (flags & HAS_AVX2) vpx_variance32x16 = vpx_variance32x16_avx2;
+ vpx_variance32x32 = vpx_variance32x32_sse2;
+ if (flags & HAS_AVX2) vpx_variance32x32 = vpx_variance32x32_avx2;
+ vpx_variance64x32 = vpx_variance64x32_sse2;
+ if (flags & HAS_AVX2) vpx_variance64x32 = vpx_variance64x32_avx2;
+ vpx_variance64x64 = vpx_variance64x64_sse2;
+ if (flags & HAS_AVX2) vpx_variance64x64 = vpx_variance64x64_avx2;
}
#endif
diff --git a/chromium/third_party/libvpx/source/libvpx/CHANGELOG b/chromium/third_party/libvpx/source/libvpx/CHANGELOG
index a318784150a..b0d306442b3 100644
--- a/chromium/third_party/libvpx/source/libvpx/CHANGELOG
+++ b/chromium/third_party/libvpx/source/libvpx/CHANGELOG
@@ -1,3 +1,8 @@
+xxxx-yy-zz v1.4.0 "Changes for next release"
+ vpxenc is changed to use VP9 by default.
+ Encoder controls added for 1 pass SVC.
+ Decoder control to toggle on/off loopfilter.
+
2015-04-03 v1.4.0 "Indian Runner Duck"
This release includes significant improvements to the VP9 codec.
diff --git a/chromium/third_party/libvpx/source/libvpx/README b/chromium/third_party/libvpx/source/libvpx/README
index fcd1c2e18cf..7b44ba7a22a 100644
--- a/chromium/third_party/libvpx/source/libvpx/README
+++ b/chromium/third_party/libvpx/source/libvpx/README
@@ -101,13 +101,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
x86_64-win64-vs10
x86_64-win64-vs11
x86_64-win64-vs12
- universal-darwin8-gcc
- universal-darwin9-gcc
- universal-darwin10-gcc
- universal-darwin11-gcc
- universal-darwin12-gcc
- universal-darwin13-gcc
- universal-darwin14-gcc
generic-gnu
The generic-gnu target, in conjunction with the CROSS environment variable,
diff --git a/chromium/third_party/libvpx/source/libvpx/args.c b/chromium/third_party/libvpx/source/libvpx/args.c
index 9dabc9bdd6d..14b031040a4 100644
--- a/chromium/third_party/libvpx/source/libvpx/args.c
+++ b/chromium/third_party/libvpx/source/libvpx/args.c
@@ -14,9 +14,7 @@
#include <limits.h>
#include "args.h"
-#ifdef _MSC_VER
-#define snprintf _snprintf
-#endif
+#include "vpx_ports/msvc.h"
#if defined(__GNUC__) && __GNUC__
extern void die(const char *fmt, ...) __attribute__((noreturn));
diff --git a/chromium/third_party/libvpx/source/libvpx/build/make/Android.mk b/chromium/third_party/libvpx/source/libvpx/build/make/Android.mk
index 0add523f99d..e971c9d1c45 100644
--- a/chromium/third_party/libvpx/source/libvpx/build/make/Android.mk
+++ b/chromium/third_party/libvpx/source/libvpx/build/make/Android.mk
@@ -163,6 +163,7 @@ ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes)
endif
# Add a dependency to force generation of the RTCD files.
+define rtcd_dep_template
ifeq ($(CONFIG_VP8), yes)
$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vp8_rtcd.h
endif
@@ -175,6 +176,9 @@ $(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_dsp_rtcd.h
ifeq ($(TARGET_ARCH_ABI),x86)
$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_config.asm
endif
+endef
+
+$(eval $(call rtcd_dep_template))
.PHONY: clean
clean:
diff --git a/chromium/third_party/libvpx/source/libvpx/build/make/Makefile b/chromium/third_party/libvpx/source/libvpx/build/make/Makefile
index fc7749a5519..f1b1cca33bf 100644
--- a/chromium/third_party/libvpx/source/libvpx/build/make/Makefile
+++ b/chromium/third_party/libvpx/source/libvpx/build/make/Makefile
@@ -22,8 +22,10 @@ clean:: .DEFAULT
exampletest: .DEFAULT
install:: .DEFAULT
test:: .DEFAULT
+test-no-data-check:: .DEFAULT
testdata:: .DEFAULT
utiltest: .DEFAULT
+exampletest-no-data-check utiltest-no-data-check: .DEFAULT
# Note: md5sum is not installed on OS X, but openssl is. Openssl may not be
@@ -56,13 +58,10 @@ dist:
fi
endif
+# Since we invoke make recursively for multiple targets we need to include the
+# .mk file for the correct target, but only when $(target) is non-empty.
ifneq ($(target),)
-# Normally, we want to build the filename from the target and the toolchain.
-# This disambiguates from the $(target).mk file that exists in the source tree.
-# However, the toolchain is part of the target in universal builds, so we
-# don't want to include TOOLCHAIN in that case. FAT_ARCHS is used to test
-# if we're in the universal case.
-include $(target)$(if $(FAT_ARCHS),,-$(TOOLCHAIN)).mk
+include $(target)-$(TOOLCHAIN).mk
endif
BUILD_ROOT?=.
VPATH=$(SRC_PATH_BARE)
@@ -116,6 +115,9 @@ test::
testdata::
.PHONY: utiltest
utiltest:
+.PHONY: test-no-data-check exampletest-no-data-check utiltest-no-data-check
+test-no-data-check::
+exampletest-no-data-check utiltest-no-data-check:
# Add compiler flags for intrinsic files
ifeq ($(TOOLCHAIN), x86-os2-gcc)
@@ -313,18 +315,15 @@ $(1):
$$(filter %.o,$$^) $$(extralibs)
endef
-
-
-define lipo_lib_template
-$(1): $(addsuffix /$(1),$(FAT_ARCHS))
- $(if $(quiet),@echo " [LIPO] $$@")
- $(qexec)libtool -static -o $$@ $$?
-endef
-
-define lipo_bin_template
-$(1): $(addsuffix /$(1),$(FAT_ARCHS))
- $(if $(quiet),@echo " [LIPO] $$@")
- $(qexec)lipo -output $$@ -create $$?
+define dll_template
+# Not using a pattern rule here because we don't want to generate empty
+# archives when they are listed as a dependency in files not responsible
+# for creating them.
+$(1):
+ $(if $(quiet),@echo " [LD] $$@")
+ $(qexec)$$(LD) -Zdll $$(LDFLAGS) \
+ -o $$@ \
+ $$(filter %.o,$$^) $$(extralibs) $$(EXPORTS_FILE)
endef
@@ -385,6 +384,7 @@ LIBS=$(call enabled,LIBS)
$(foreach lib,$(filter %_g.a,$(LIBS)),$(eval $(call archive_template,$(lib))))
$(foreach lib,$(filter %so.$(SO_VERSION_MAJOR).$(SO_VERSION_MINOR).$(SO_VERSION_PATCH),$(LIBS)),$(eval $(call so_template,$(lib))))
$(foreach lib,$(filter %$(SO_VERSION_MAJOR).dylib,$(LIBS)),$(eval $(call dl_template,$(lib))))
+$(foreach lib,$(filter %$(SO_VERSION_MAJOR).dll,$(LIBS)),$(eval $(call dll_template,$(lib))))
INSTALL-LIBS=$(call cond_enabled,CONFIG_INSTALL_LIBS,INSTALL-LIBS)
ifeq ($(MAKECMDGOALS),dist)
diff --git a/chromium/third_party/libvpx/source/libvpx/build/make/configure.sh b/chromium/third_party/libvpx/source/libvpx/build/make/configure.sh
index 68cc8bb4a5d..688fa12c52a 100644
--- a/chromium/third_party/libvpx/source/libvpx/build/make/configure.sh
+++ b/chromium/third_party/libvpx/source/libvpx/build/make/configure.sh
@@ -390,7 +390,7 @@ write_common_config_banner() {
write_common_config_targets() {
for t in ${all_targets}; do
if enabled ${t}; then
- if enabled universal || enabled child; then
+ if enabled child; then
fwrite config.mk "ALL_TARGETS += ${t}-${toolchain}"
else
fwrite config.mk "ALL_TARGETS += ${t}"
@@ -647,14 +647,6 @@ process_common_toolchain() {
# detect tgt_os
case "$gcctarget" in
- *darwin8*)
- tgt_isa=universal
- tgt_os=darwin8
- ;;
- *darwin9*)
- tgt_isa=universal
- tgt_os=darwin9
- ;;
*darwin10*)
tgt_isa=x86_64
tgt_os=darwin10
@@ -736,6 +728,13 @@ process_common_toolchain() {
# Handle darwin variants. Newer SDKs allow targeting older
# platforms, so use the newest one available.
case ${toolchain} in
+ arm*-darwin*)
+ ios_sdk_dir="$(show_darwin_sdk_path iphoneos)"
+ if [ -d "${ios_sdk_dir}" ]; then
+ add_cflags "-isysroot ${ios_sdk_dir}"
+ add_ldflags "-isysroot ${ios_sdk_dir}"
+ fi
+ ;;
*-darwin*)
osx_sdk_dir="$(show_darwin_sdk_path macosx)"
if [ -d "${osx_sdk_dir}" ]; then
@@ -811,7 +810,14 @@ process_common_toolchain() {
if disabled neon && enabled neon_asm; then
die "Disabling neon while keeping neon-asm is not supported"
fi
- soft_enable media
+ case ${toolchain} in
+ *-darwin*)
+ # Neon is guaranteed on iOS 6+ devices, while old media extensions
+ # no longer assemble with iOS 9 SDK
+ ;;
+ *)
+ soft_enable media
+ esac
;;
armv6)
soft_enable media
@@ -1215,7 +1221,7 @@ EOF
;;
esac
;;
- universal*|*-gcc|generic-gnu)
+ *-gcc|generic-gnu)
link_with_cc=gcc
enable_feature gcc
setup_gnu_toolchain
diff --git a/chromium/third_party/libvpx/source/libvpx/build/make/gen_msvs_vcxproj.sh b/chromium/third_party/libvpx/source/libvpx/build/make/gen_msvs_vcxproj.sh
index 643ebd634be..b653651030e 100755
--- a/chromium/third_party/libvpx/source/libvpx/build/make/gen_msvs_vcxproj.sh
+++ b/chromium/third_party/libvpx/source/libvpx/build/make/gen_msvs_vcxproj.sh
@@ -263,8 +263,8 @@ case "$target" in
;;
arm*)
platforms[0]="ARM"
- asm_Debug_cmdline="armasm -nologo &quot;%(FullPath)&quot;"
- asm_Release_cmdline="armasm -nologo &quot;%(FullPath)&quot;"
+ asm_Debug_cmdline="armasm -nologo -oldit &quot;%(FullPath)&quot;"
+ asm_Release_cmdline="armasm -nologo -oldit &quot;%(FullPath)&quot;"
;;
*) die "Unsupported target $target!"
;;
diff --git a/chromium/third_party/libvpx/source/libvpx/configure b/chromium/third_party/libvpx/source/libvpx/configure
index 98542855a3e..6cac15aeea5 100755
--- a/chromium/third_party/libvpx/source/libvpx/configure
+++ b/chromium/third_party/libvpx/source/libvpx/configure
@@ -148,13 +148,6 @@ all_platforms="${all_platforms} x86_64-win64-vs9"
all_platforms="${all_platforms} x86_64-win64-vs10"
all_platforms="${all_platforms} x86_64-win64-vs11"
all_platforms="${all_platforms} x86_64-win64-vs12"
-all_platforms="${all_platforms} universal-darwin8-gcc"
-all_platforms="${all_platforms} universal-darwin9-gcc"
-all_platforms="${all_platforms} universal-darwin10-gcc"
-all_platforms="${all_platforms} universal-darwin11-gcc"
-all_platforms="${all_platforms} universal-darwin12-gcc"
-all_platforms="${all_platforms} universal-darwin13-gcc"
-all_platforms="${all_platforms} universal-darwin14-gcc"
all_platforms="${all_platforms} generic-gnu"
# all_targets is a list of all targets that can be configured
@@ -191,6 +184,10 @@ if [ ${doxy_major:-0} -ge 1 ]; then
[ $doxy_minor -eq 5 ] && [ $doxy_patch -ge 3 ] && enable_feature doxygen
fi
+# disable codecs when their source directory does not exist
+[ -d "${source_path}/vp8" ] || disable_feature vp8
+[ -d "${source_path}/vp9" ] || disable_feature vp9
+
# install everything except the sources, by default. sources will have
# to be enabled when doing dist builds, since that's no longer a common
# case.
@@ -206,31 +203,16 @@ enable_feature multithread
enable_feature os_support
enable_feature temporal_denoising
-[ -d "${source_path}/../include" ] && enable_feature alt_tree_layout
-for d in vp8 vp9; do
- [ -d "${source_path}/${d}" ] && disable_feature alt_tree_layout;
-done
-
-if ! enabled alt_tree_layout; then
-# development environment
-[ -d "${source_path}/vp8" ] && CODECS="${CODECS} vp8_encoder vp8_decoder"
-[ -d "${source_path}/vp9" ] && CODECS="${CODECS} vp9_encoder vp9_decoder"
-else
-# customer environment
-[ -f "${source_path}/../include/vpx/vp8cx.h" ] && CODECS="${CODECS} vp8_encoder"
-[ -f "${source_path}/../include/vpx/vp8dx.h" ] && CODECS="${CODECS} vp8_decoder"
-[ -f "${source_path}/../include/vpx/vp9cx.h" ] && CODECS="${CODECS} vp9_encoder"
-[ -f "${source_path}/../include/vpx/vp9dx.h" ] && CODECS="${CODECS} vp9_decoder"
-[ -f "${source_path}/../include/vpx/vp8cx.h" ] || disable_feature vp8_encoder
-[ -f "${source_path}/../include/vpx/vp8dx.h" ] || disable_feature vp8_decoder
-[ -f "${source_path}/../include/vpx/vp9cx.h" ] || disable_feature vp9_encoder
-[ -f "${source_path}/../include/vpx/vp9dx.h" ] || disable_feature vp9_decoder
-
-[ -f "${source_path}/../lib/*/*mt.lib" ] && soft_enable static_msvcrt
-fi
-
-CODECS="$(echo ${CODECS} | tr ' ' '\n')"
-CODEC_FAMILIES="$(for c in ${CODECS}; do echo ${c%_*}; done | sort | uniq)"
+CODECS="
+ vp8_encoder
+ vp8_decoder
+ vp9_encoder
+ vp9_decoder
+"
+CODEC_FAMILIES="
+ vp8
+ vp9
+"
ARCH_LIST="
arm
@@ -262,7 +244,6 @@ HAVE_LIST="
${ARCH_EXT_LIST}
vpx_ports
stdint_h
- alt_tree_layout
pthread_h
sys_mman_h
unistd_h
@@ -436,22 +417,8 @@ post_process_cmdline() {
process_targets() {
enabled child || write_common_config_banner
- enabled universal || write_common_target_config_h ${BUILD_PFX}vpx_config.h
-
- # For fat binaries, call configure recursively to configure for each
- # binary architecture to be included.
- if enabled universal; then
- # Call configure (ourselves) for each subarchitecture
- for arch in $fat_bin_archs; do
- BUILD_PFX=${arch}/ toolchain=${arch} $self --child $cmdline_args || exit $?
- done
- fi
-
- # The write_common_config (config.mk) logic is deferred until after the
- # recursive calls to configure complete, because we want our universal
- # targets to be executed last.
+ write_common_target_config_h ${BUILD_PFX}vpx_config.h
write_common_config_targets
- enabled universal && echo "FAT_ARCHS=${fat_bin_archs}" >> config.mk
# Calculate the default distribution name, based on the enabled features
cf=""
@@ -527,11 +494,11 @@ process_detect() {
# Can only build shared libs on a subset of platforms. Doing this check
# here rather than at option parse time because the target auto-detect
# magic happens after the command line has been parsed.
- if ! enabled linux; then
+ if ! enabled linux && ! enabled os2; then
if enabled gnu; then
echo "--enable-shared is only supported on ELF; assuming this is OK"
else
- die "--enable-shared only supported on ELF for now"
+ die "--enable-shared only supported on ELF and OS/2 for now"
fi
fi
fi
@@ -596,24 +563,6 @@ EOF
process_toolchain() {
process_common_toolchain
- # Handle universal binaries for this architecture
- case $toolchain in
- universal-darwin*)
- darwin_ver=${tgt_os##darwin}
-
- # Tiger (10.4/darwin8) brought support for x86
- if [ $darwin_ver -ge 8 ]; then
- fat_bin_archs="$fat_bin_archs x86-${tgt_os}-${tgt_cc}"
- fi
-
- # Leopard (10.5/darwin9) brought 64 bit support
- if [ $darwin_ver -ge 9 ]; then
- fat_bin_archs="$fat_bin_archs x86_64-${tgt_os}-${tgt_cc}"
- fi
- ;;
- esac
-
-
# Enable some useful compiler flags
if enabled gcc; then
enabled werror && check_add_cflags -Werror
@@ -701,7 +650,7 @@ process_toolchain() {
esac
# Other toolchain specific defaults
- case $toolchain in x86*|universal*) soft_enable postproc;; esac
+ case $toolchain in x86*) soft_enable postproc;; esac
if enabled postproc_visualizer; then
enabled postproc || die "postproc_visualizer requires postproc to be enabled"
@@ -765,6 +714,7 @@ CONFIGURE_ARGS="$@"
process "$@"
print_webm_license ${BUILD_PFX}vpx_config.c "/*" " */"
cat <<EOF >> ${BUILD_PFX}vpx_config.c
+#include "vpx/vpx_codec.h"
static const char* const cfg = "$CONFIGURE_ARGS";
const char *vpx_codec_build_config(void) {return cfg;}
EOF
diff --git a/chromium/third_party/libvpx/source/libvpx/examples.mk b/chromium/third_party/libvpx/source/libvpx/examples.mk
index 4ff1de4eeae..fad02cfcd2b 100644
--- a/chromium/third_party/libvpx/source/libvpx/examples.mk
+++ b/chromium/third_party/libvpx/source/libvpx/examples.mk
@@ -56,6 +56,7 @@ UTILS-$(CONFIG_DECODERS) += vpxdec.c
vpxdec.SRCS += md5_utils.c md5_utils.h
vpxdec.SRCS += vpx_ports/mem_ops.h
vpxdec.SRCS += vpx_ports/mem_ops_aligned.h
+vpxdec.SRCS += vpx_ports/msvc.h
vpxdec.SRCS += vpx_ports/vpx_timer.h
vpxdec.SRCS += vpx/vpx_integer.h
vpxdec.SRCS += args.c args.h
@@ -80,6 +81,7 @@ vpxenc.SRCS += tools_common.c tools_common.h
vpxenc.SRCS += warnings.c warnings.h
vpxenc.SRCS += vpx_ports/mem_ops.h
vpxenc.SRCS += vpx_ports/mem_ops_aligned.h
+vpxenc.SRCS += vpx_ports/msvc.h
vpxenc.SRCS += vpx_ports/vpx_timer.h
vpxenc.SRCS += vpxstats.c vpxstats.h
ifeq ($(CONFIG_LIBYUV),yes)
@@ -98,6 +100,7 @@ ifeq ($(CONFIG_SPATIAL_SVC),yes)
vp9_spatial_svc_encoder.SRCS += tools_common.c tools_common.h
vp9_spatial_svc_encoder.SRCS += video_common.h
vp9_spatial_svc_encoder.SRCS += video_writer.h video_writer.c
+ vp9_spatial_svc_encoder.SRCS += vpx_ports/msvc.h
vp9_spatial_svc_encoder.SRCS += vpxstats.c vpxstats.h
vp9_spatial_svc_encoder.GUID = 4A38598D-627D-4505-9C7B-D4020C84100D
vp9_spatial_svc_encoder.DESCRIPTION = VP9 Spatial SVC Encoder
@@ -112,6 +115,7 @@ vpx_temporal_svc_encoder.SRCS += ivfenc.c ivfenc.h
vpx_temporal_svc_encoder.SRCS += tools_common.c tools_common.h
vpx_temporal_svc_encoder.SRCS += video_common.h
vpx_temporal_svc_encoder.SRCS += video_writer.h video_writer.c
+vpx_temporal_svc_encoder.SRCS += vpx_ports/msvc.h
vpx_temporal_svc_encoder.GUID = B18C08F2-A439-4502-A78E-849BE3D60947
vpx_temporal_svc_encoder.DESCRIPTION = Temporal SVC Encoder
EXAMPLES-$(CONFIG_DECODERS) += simple_decoder.c
@@ -122,6 +126,7 @@ simple_decoder.SRCS += video_common.h
simple_decoder.SRCS += video_reader.h video_reader.c
simple_decoder.SRCS += vpx_ports/mem_ops.h
simple_decoder.SRCS += vpx_ports/mem_ops_aligned.h
+simple_decoder.SRCS += vpx_ports/msvc.h
simple_decoder.DESCRIPTION = Simplified decoder loop
EXAMPLES-$(CONFIG_DECODERS) += postproc.c
postproc.SRCS += ivfdec.h ivfdec.c
@@ -130,6 +135,7 @@ postproc.SRCS += video_common.h
postproc.SRCS += video_reader.h video_reader.c
postproc.SRCS += vpx_ports/mem_ops.h
postproc.SRCS += vpx_ports/mem_ops_aligned.h
+postproc.SRCS += vpx_ports/msvc.h
postproc.GUID = 65E33355-F35E-4088-884D-3FD4905881D7
postproc.DESCRIPTION = Decoder postprocessor control
EXAMPLES-$(CONFIG_DECODERS) += decode_to_md5.c
@@ -140,6 +146,7 @@ decode_to_md5.SRCS += video_common.h
decode_to_md5.SRCS += video_reader.h video_reader.c
decode_to_md5.SRCS += vpx_ports/mem_ops.h
decode_to_md5.SRCS += vpx_ports/mem_ops_aligned.h
+decode_to_md5.SRCS += vpx_ports/msvc.h
decode_to_md5.GUID = 59120B9B-2735-4BFE-B022-146CA340FE42
decode_to_md5.DESCRIPTION = Frame by frame MD5 checksum
EXAMPLES-$(CONFIG_ENCODERS) += simple_encoder.c
@@ -147,6 +154,7 @@ simple_encoder.SRCS += ivfenc.h ivfenc.c
simple_encoder.SRCS += tools_common.h tools_common.c
simple_encoder.SRCS += video_common.h
simple_encoder.SRCS += video_writer.h video_writer.c
+simple_encoder.SRCS += vpx_ports/msvc.h
simple_encoder.GUID = 4607D299-8A71-4D2C-9B1D-071899B6FBFD
simple_encoder.DESCRIPTION = Simplified encoder loop
EXAMPLES-$(CONFIG_VP9_ENCODER) += vp9_lossless_encoder.c
@@ -154,6 +162,7 @@ vp9_lossless_encoder.SRCS += ivfenc.h ivfenc.c
vp9_lossless_encoder.SRCS += tools_common.h tools_common.c
vp9_lossless_encoder.SRCS += video_common.h
vp9_lossless_encoder.SRCS += video_writer.h video_writer.c
+vp9_lossless_encoder.SRCS += vpx_ports/msvc.h
vp9_lossless_encoder.GUID = B63C7C88-5348-46DC-A5A6-CC151EF93366
vp9_lossless_encoder.DESCRIPTION = Simplified lossless VP9 encoder
EXAMPLES-$(CONFIG_ENCODERS) += twopass_encoder.c
@@ -161,6 +170,7 @@ twopass_encoder.SRCS += ivfenc.h ivfenc.c
twopass_encoder.SRCS += tools_common.h tools_common.c
twopass_encoder.SRCS += video_common.h
twopass_encoder.SRCS += video_writer.h video_writer.c
+twopass_encoder.SRCS += vpx_ports/msvc.h
twopass_encoder.GUID = 73494FA6-4AF9-4763-8FBB-265C92402FD8
twopass_encoder.DESCRIPTION = Two-pass encoder loop
EXAMPLES-$(CONFIG_DECODERS) += decode_with_drops.c
@@ -170,6 +180,7 @@ decode_with_drops.SRCS += video_common.h
decode_with_drops.SRCS += video_reader.h video_reader.c
decode_with_drops.SRCS += vpx_ports/mem_ops.h
decode_with_drops.SRCS += vpx_ports/mem_ops_aligned.h
+decode_with_drops.SRCS += vpx_ports/msvc.h
decode_with_drops.GUID = CE5C53C4-8DDA-438A-86ED-0DDD3CDB8D26
decode_with_drops.DESCRIPTION = Drops frames while decoding
EXAMPLES-$(CONFIG_ENCODERS) += set_maps.c
@@ -177,6 +188,7 @@ set_maps.SRCS += ivfenc.h ivfenc.c
set_maps.SRCS += tools_common.h tools_common.c
set_maps.SRCS += video_common.h
set_maps.SRCS += video_writer.h video_writer.c
+set_maps.SRCS += vpx_ports/msvc.h
set_maps.GUID = ECB2D24D-98B8-4015-A465-A4AF3DCC145F
set_maps.DESCRIPTION = Set active and ROI maps
EXAMPLES-$(CONFIG_VP8_ENCODER) += vp8cx_set_ref.c
@@ -184,6 +196,7 @@ vp8cx_set_ref.SRCS += ivfenc.h ivfenc.c
vp8cx_set_ref.SRCS += tools_common.h tools_common.c
vp8cx_set_ref.SRCS += video_common.h
vp8cx_set_ref.SRCS += video_writer.h video_writer.c
+vp8cx_set_ref.SRCS += vpx_ports/msvc.h
vp8cx_set_ref.GUID = C5E31F7F-96F6-48BD-BD3E-10EBF6E8057A
vp8cx_set_ref.DESCRIPTION = VP8 set encoder reference frame
@@ -194,6 +207,7 @@ EXAMPLES-$(CONFIG_VP8_ENCODER) += vp8_multi_resolution_encoder.c
vp8_multi_resolution_encoder.SRCS += ivfenc.h ivfenc.c
vp8_multi_resolution_encoder.SRCS += tools_common.h tools_common.c
vp8_multi_resolution_encoder.SRCS += video_writer.h video_writer.c
+vp8_multi_resolution_encoder.SRCS += vpx_ports/msvc.h
vp8_multi_resolution_encoder.SRCS += $(LIBYUV_SRCS)
vp8_multi_resolution_encoder.GUID = 04f8738e-63c8-423b-90fa-7c2703a374de
vp8_multi_resolution_encoder.DESCRIPTION = VP8 Multiple-resolution Encoding
@@ -254,14 +268,6 @@ CODEC_EXTRA_LIBS=$(sort $(call enabled,CODEC_EXTRA_LIBS))
$(foreach ex,$(ALL_EXAMPLES),$(eval $(notdir $(ex:.c=)).SRCS += $(ex) examples.mk))
-# If this is a universal (fat) binary, then all the subarchitectures have
-# already been built and our job is to stitch them together. The
-# BUILD_OBJS variable indicates whether we should be building
-# (compiling, linking) the library. The LIPO_OBJS variable indicates
-# that we're stitching.
-$(eval $(if $(filter universal%,$(TOOLCHAIN)),LIPO_OBJS,BUILD_OBJS):=yes)
-
-
# Create build/install dependencies for all examples. The common case
# is handled here. The MSVS case is handled below.
NOT_MSVS = $(if $(CONFIG_MSVS),,yes)
@@ -269,24 +275,28 @@ DIST-BINS-$(NOT_MSVS) += $(addprefix bin/,$(ALL_EXAMPLES:.c=$(EXE_SFX)))
INSTALL-BINS-$(NOT_MSVS) += $(addprefix bin/,$(UTILS:.c=$(EXE_SFX)))
DIST-SRCS-yes += $(ALL_SRCS)
INSTALL-SRCS-yes += $(UTIL_SRCS)
-OBJS-$(NOT_MSVS) += $(if $(BUILD_OBJS),$(call objs,$(ALL_SRCS)))
+OBJS-$(NOT_MSVS) += $(call objs,$(ALL_SRCS))
BINS-$(NOT_MSVS) += $(addprefix $(BUILD_PFX),$(ALL_EXAMPLES:.c=$(EXE_SFX)))
# Instantiate linker template for all examples.
CODEC_LIB=$(if $(CONFIG_DEBUG_LIBS),vpx_g,vpx)
-SHARED_LIB_SUF=$(if $(filter darwin%,$(TGT_OS)),.dylib,.so)
+ifneq ($(filter darwin%,$(TGT_OS)),)
+SHARED_LIB_SUF=.dylib
+else
+ifneq ($(filter os2%,$(TGT_OS)),)
+SHARED_LIB_SUF=_dll.a
+else
+SHARED_LIB_SUF=.so
+endif
+endif
CODEC_LIB_SUF=$(if $(CONFIG_SHARED),$(SHARED_LIB_SUF),.a)
$(foreach bin,$(BINS-yes),\
- $(if $(BUILD_OBJS),$(eval $(bin):\
- $(LIB_PATH)/lib$(CODEC_LIB)$(CODEC_LIB_SUF)))\
- $(if $(BUILD_OBJS),$(eval $(call linker_template,$(bin),\
+ $(eval $(bin):$(LIB_PATH)/lib$(CODEC_LIB)$(CODEC_LIB_SUF))\
+ $(eval $(call linker_template,$(bin),\
$(call objs,$($(notdir $(bin:$(EXE_SFX)=)).SRCS)) \
-l$(CODEC_LIB) $(addprefix -l,$(CODEC_EXTRA_LIBS))\
- )))\
- $(if $(LIPO_OBJS),$(eval $(call lipo_bin_template,$(bin))))\
- )
-
+ )))
# The following pairs define a mapping of locations in the distribution
# tree to locations in the source/build trees.
diff --git a/chromium/third_party/libvpx/source/libvpx/examples/decode_to_md5.c b/chromium/third_party/libvpx/source/libvpx/examples/decode_to_md5.c
index a3843bed336..1ae7a4b57f5 100644
--- a/chromium/third_party/libvpx/source/libvpx/examples/decode_to_md5.c
+++ b/chromium/third_party/libvpx/source/libvpx/examples/decode_to_md5.c
@@ -71,7 +71,7 @@ static void print_md5(FILE *stream, unsigned char digest[16]) {
static const char *exec_name;
-void usage_exit() {
+void usage_exit(void) {
fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name);
exit(EXIT_FAILURE);
}
diff --git a/chromium/third_party/libvpx/source/libvpx/examples/decode_with_drops.c b/chromium/third_party/libvpx/source/libvpx/examples/decode_with_drops.c
index 36f7d80e127..2233e473d36 100644
--- a/chromium/third_party/libvpx/source/libvpx/examples/decode_with_drops.c
+++ b/chromium/third_party/libvpx/source/libvpx/examples/decode_with_drops.c
@@ -65,7 +65,7 @@
static const char *exec_name;
-void usage_exit() {
+void usage_exit(void) {
fprintf(stderr, "Usage: %s <infile> <outfile> <N-M|N/M>\n", exec_name);
exit(EXIT_FAILURE);
}
diff --git a/chromium/third_party/libvpx/source/libvpx/examples/postproc.c b/chromium/third_party/libvpx/source/libvpx/examples/postproc.c
index e34426a6194..a8ac208d9bd 100644
--- a/chromium/third_party/libvpx/source/libvpx/examples/postproc.c
+++ b/chromium/third_party/libvpx/source/libvpx/examples/postproc.c
@@ -52,7 +52,7 @@
static const char *exec_name;
-void usage_exit() {
+void usage_exit(void) {
fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name);
exit(EXIT_FAILURE);
}
diff --git a/chromium/third_party/libvpx/source/libvpx/examples/resize_util.c b/chromium/third_party/libvpx/source/libvpx/examples/resize_util.c
index f8c35255fa2..e6fdd5bb2af 100644
--- a/chromium/third_party/libvpx/source/libvpx/examples/resize_util.c
+++ b/chromium/third_party/libvpx/source/libvpx/examples/resize_util.c
@@ -15,6 +15,7 @@
#include <stdlib.h>
#include <string.h>
+#include "../tools_common.h"
#include "../vp9/encoder/vp9_resize.h"
static const char *exec_name = NULL;
@@ -26,7 +27,7 @@ static void usage() {
printf("<output_yuv> [<frames>]\n");
}
-void usage_exit() {
+void usage_exit(void) {
usage();
exit(EXIT_FAILURE);
}
diff --git a/chromium/third_party/libvpx/source/libvpx/examples/set_maps.c b/chromium/third_party/libvpx/source/libvpx/examples/set_maps.c
index 5555baac22e..1dc3ac0c98f 100644
--- a/chromium/third_party/libvpx/source/libvpx/examples/set_maps.c
+++ b/chromium/third_party/libvpx/source/libvpx/examples/set_maps.c
@@ -55,7 +55,7 @@
static const char *exec_name;
-void usage_exit() {
+void usage_exit(void) {
fprintf(stderr, "Usage: %s <codec> <width> <height> <infile> <outfile>\n",
exec_name);
exit(EXIT_FAILURE);
diff --git a/chromium/third_party/libvpx/source/libvpx/examples/simple_decoder.c b/chromium/third_party/libvpx/source/libvpx/examples/simple_decoder.c
index 08a21668542..8ccc81035e3 100644
--- a/chromium/third_party/libvpx/source/libvpx/examples/simple_decoder.c
+++ b/chromium/third_party/libvpx/source/libvpx/examples/simple_decoder.c
@@ -88,7 +88,7 @@
static const char *exec_name;
-void usage_exit() {
+void usage_exit(void) {
fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name);
exit(EXIT_FAILURE);
}
diff --git a/chromium/third_party/libvpx/source/libvpx/examples/simple_encoder.c b/chromium/third_party/libvpx/source/libvpx/examples/simple_encoder.c
index e805c258747..a3077297318 100644
--- a/chromium/third_party/libvpx/source/libvpx/examples/simple_encoder.c
+++ b/chromium/third_party/libvpx/source/libvpx/examples/simple_encoder.c
@@ -106,7 +106,7 @@
static const char *exec_name;
-void usage_exit() {
+void usage_exit(void) {
fprintf(stderr,
"Usage: %s <codec> <width> <height> <infile> <outfile> "
"<keyframe-interval> [<error-resilient>]\nSee comments in "
diff --git a/chromium/third_party/libvpx/source/libvpx/examples/twopass_encoder.c b/chromium/third_party/libvpx/source/libvpx/examples/twopass_encoder.c
index 0ec83ddccdf..aecc11d3f4e 100644
--- a/chromium/third_party/libvpx/source/libvpx/examples/twopass_encoder.c
+++ b/chromium/third_party/libvpx/source/libvpx/examples/twopass_encoder.c
@@ -58,7 +58,7 @@
static const char *exec_name;
-void usage_exit() {
+void usage_exit(void) {
fprintf(stderr, "Usage: %s <codec> <width> <height> <infile> <outfile>\n",
exec_name);
exit(EXIT_FAILURE);
diff --git a/chromium/third_party/libvpx/source/libvpx/examples/vp8_multi_resolution_encoder.c b/chromium/third_party/libvpx/source/libvpx/examples/vp8_multi_resolution_encoder.c
index e623567b8fe..2b032049c0b 100644
--- a/chromium/third_party/libvpx/source/libvpx/examples/vp8_multi_resolution_encoder.c
+++ b/chromium/third_party/libvpx/source/libvpx/examples/vp8_multi_resolution_encoder.c
@@ -37,15 +37,14 @@
#include <unistd.h>
#endif
#include "vpx_ports/vpx_timer.h"
-#define VPX_CODEC_DISABLE_COMPAT 1
#include "vpx/vpx_encoder.h"
#include "vpx/vp8cx.h"
#include "vpx_ports/mem_ops.h"
-#include "./tools_common.h"
+#include "../tools_common.h"
#define interface (vpx_codec_vp8_cx())
#define fourcc 0x30385056
-void usage_exit() {
+void usage_exit(void) {
exit(EXIT_FAILURE);
}
diff --git a/chromium/third_party/libvpx/source/libvpx/examples/vp8cx_set_ref.c b/chromium/third_party/libvpx/source/libvpx/examples/vp8cx_set_ref.c
index a2982821a42..8b4cc303d3c 100644
--- a/chromium/third_party/libvpx/source/libvpx/examples/vp8cx_set_ref.c
+++ b/chromium/third_party/libvpx/source/libvpx/examples/vp8cx_set_ref.c
@@ -58,7 +58,7 @@
static const char *exec_name;
-void usage_exit() {
+void usage_exit(void) {
fprintf(stderr, "Usage: %s <width> <height> <infile> <outfile> <frame>\n",
exec_name);
exit(EXIT_FAILURE);
diff --git a/chromium/third_party/libvpx/source/libvpx/examples/vp9_lossless_encoder.c b/chromium/third_party/libvpx/source/libvpx/examples/vp9_lossless_encoder.c
index 54275770d5f..82725168304 100644
--- a/chromium/third_party/libvpx/source/libvpx/examples/vp9_lossless_encoder.c
+++ b/chromium/third_party/libvpx/source/libvpx/examples/vp9_lossless_encoder.c
@@ -20,7 +20,7 @@
static const char *exec_name;
-void usage_exit() {
+void usage_exit(void) {
fprintf(stderr, "vp9_lossless_encoder: Example demonstrating VP9 lossless "
"encoding feature. Supports raw input only.\n");
fprintf(stderr, "Usage: %s <width> <height> <infile> <outfile>\n", exec_name);
diff --git a/chromium/third_party/libvpx/source/libvpx/examples/vp9_spatial_svc_encoder.c b/chromium/third_party/libvpx/source/libvpx/examples/vp9_spatial_svc_encoder.c
index f4deb693b2f..5a609766561 100644
--- a/chromium/third_party/libvpx/source/libvpx/examples/vp9_spatial_svc_encoder.c
+++ b/chromium/third_party/libvpx/source/libvpx/examples/vp9_spatial_svc_encoder.c
@@ -14,11 +14,13 @@
* that benefit from a scalable bitstream.
*/
+#include <math.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
+
#include "../args.h"
#include "../tools_common.h"
#include "../video_writer.h"
@@ -27,11 +29,18 @@
#include "vpx/vp8cx.h"
#include "vpx/vpx_encoder.h"
#include "../vpxstats.h"
+#define OUTPUT_RC_STATS 1
static const arg_def_t skip_frames_arg =
ARG_DEF("s", "skip-frames", 1, "input frames to skip");
static const arg_def_t frames_arg =
ARG_DEF("f", "frames", 1, "number of frames to encode");
+static const arg_def_t threads_arg =
+ ARG_DEF("th", "threads", 1, "number of threads to use");
+#if OUTPUT_RC_STATS
+static const arg_def_t output_rc_stats_arg =
+ ARG_DEF("rcstat", "output_rc_stats", 1, "output rc stats");
+#endif
static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "source width");
static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "source height");
static const arg_def_t timebase_arg =
@@ -42,6 +51,9 @@ static const arg_def_t spatial_layers_arg =
ARG_DEF("sl", "spatial-layers", 1, "number of spatial SVC layers");
static const arg_def_t temporal_layers_arg =
ARG_DEF("tl", "temporal-layers", 1, "number of temporal SVC layers");
+static const arg_def_t temporal_layering_mode_arg =
+ ARG_DEF("tlm", "temporal-layering-mode", 1, "temporal layering scheme."
+ "VP9E_TEMPORAL_LAYERING_MODE");
static const arg_def_t kf_dist_arg =
ARG_DEF("k", "kf-dist", 1, "number of frames between keyframes");
static const arg_def_t scale_factors_arg =
@@ -65,6 +77,8 @@ static const arg_def_t lag_in_frame_arg =
"generating any outputs");
static const arg_def_t rc_end_usage_arg =
ARG_DEF(NULL, "rc-end-usage", 1, "0 - 3: VBR, CBR, CQ, Q");
+static const arg_def_t speed_arg =
+ ARG_DEF("sp", "speed", 1, "speed configuration");
#if CONFIG_VP9_HIGHBITDEPTH
static const struct arg_enum_list bitdepth_enum[] = {
@@ -85,10 +99,16 @@ static const arg_def_t *svc_args[] = {
&timebase_arg, &bitrate_arg, &skip_frames_arg, &spatial_layers_arg,
&kf_dist_arg, &scale_factors_arg, &passes_arg, &pass_arg,
&fpf_name_arg, &min_q_arg, &max_q_arg, &min_bitrate_arg,
- &max_bitrate_arg, &temporal_layers_arg, &lag_in_frame_arg,
+ &max_bitrate_arg, &temporal_layers_arg, &temporal_layering_mode_arg,
+ &lag_in_frame_arg, &threads_arg,
+#if OUTPUT_RC_STATS
+ &output_rc_stats_arg,
+#endif
+
#if CONFIG_VP9_HIGHBITDEPTH
&bitdepth_arg,
#endif
+ &speed_arg,
&rc_end_usage_arg, NULL
};
@@ -102,6 +122,10 @@ static const uint32_t default_bitrate = 1000;
static const uint32_t default_spatial_layers = 5;
static const uint32_t default_temporal_layers = 1;
static const uint32_t default_kf_dist = 100;
+static const uint32_t default_temporal_layering_mode = 0;
+static const uint32_t default_output_rc_stats = 0;
+static const int32_t default_speed = -1; // -1 means use library default.
+static const uint32_t default_threads = 0; // zero means use library default.
typedef struct {
const char *input_filename;
@@ -116,7 +140,7 @@ typedef struct {
static const char *exec_name;
-void usage_exit() {
+void usage_exit(void) {
fprintf(stderr, "Usage: %s <options> input_filename output_filename\n",
exec_name);
fprintf(stderr, "Options:\n");
@@ -143,6 +167,12 @@ static void parse_command_line(int argc, const char **argv_,
svc_ctx->log_level = SVC_LOG_DEBUG;
svc_ctx->spatial_layers = default_spatial_layers;
svc_ctx->temporal_layers = default_temporal_layers;
+ svc_ctx->temporal_layering_mode = default_temporal_layering_mode;
+#if OUTPUT_RC_STATS
+ svc_ctx->output_rc_stat = default_output_rc_stats;
+#endif
+ svc_ctx->speed = default_speed;
+ svc_ctx->threads = default_threads;
// start with default encoder configuration
res = vpx_codec_enc_config_default(vpx_codec_vp9_cx(), enc_cfg, 0);
@@ -184,6 +214,20 @@ static void parse_command_line(int argc, const char **argv_,
svc_ctx->spatial_layers = arg_parse_uint(&arg);
} else if (arg_match(&arg, &temporal_layers_arg, argi)) {
svc_ctx->temporal_layers = arg_parse_uint(&arg);
+#if OUTPUT_RC_STATS
+ } else if (arg_match(&arg, &output_rc_stats_arg, argi)) {
+ svc_ctx->output_rc_stat = arg_parse_uint(&arg);
+#endif
+ } else if (arg_match(&arg, &speed_arg, argi)) {
+ svc_ctx->speed = arg_parse_uint(&arg);
+ } else if (arg_match(&arg, &threads_arg, argi)) {
+ svc_ctx->threads = arg_parse_uint(&arg);
+ } else if (arg_match(&arg, &temporal_layering_mode_arg, argi)) {
+ svc_ctx->temporal_layering_mode =
+ enc_cfg->temporal_layering_mode = arg_parse_int(&arg);
+ if (svc_ctx->temporal_layering_mode) {
+ enc_cfg->g_error_resilient = 1;
+ }
} else if (arg_match(&arg, &kf_dist_arg, argi)) {
enc_cfg->kf_min_dist = arg_parse_uint(&arg);
enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;
@@ -316,6 +360,185 @@ static void parse_command_line(int argc, const char **argv_,
enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
}
+#if OUTPUT_RC_STATS
+// For rate control encoding stats.
+struct RateControlStats {
+ // Number of input frames per layer.
+ int layer_input_frames[VPX_MAX_LAYERS];
+ // Total (cumulative) number of encoded frames per layer.
+ int layer_tot_enc_frames[VPX_MAX_LAYERS];
+ // Number of encoded non-key frames per layer.
+ int layer_enc_frames[VPX_MAX_LAYERS];
+ // Framerate per layer (cumulative).
+ double layer_framerate[VPX_MAX_LAYERS];
+ // Target average frame size per layer (per-frame-bandwidth per layer).
+ double layer_pfb[VPX_MAX_LAYERS];
+ // Actual average frame size per layer.
+ double layer_avg_frame_size[VPX_MAX_LAYERS];
+ // Average rate mismatch per layer (|target - actual| / target).
+ double layer_avg_rate_mismatch[VPX_MAX_LAYERS];
+ // Actual encoding bitrate per layer (cumulative).
+ double layer_encoding_bitrate[VPX_MAX_LAYERS];
+ // Average of the short-time encoder actual bitrate.
+ // TODO(marpan): Should we add these short-time stats for each layer?
+ double avg_st_encoding_bitrate;
+ // Variance of the short-time encoder actual bitrate.
+ double variance_st_encoding_bitrate;
+ // Window (number of frames) for computing short-time encoding bitrate.
+ int window_size;
+ // Number of window measurements.
+ int window_count;
+};
+
+// Note: these rate control stats assume only 1 key frame in the
+// sequence (i.e., first frame only).
+static void set_rate_control_stats(struct RateControlStats *rc,
+ vpx_codec_enc_cfg_t *cfg) {
+ unsigned int sl, tl;
+ // Set the layer (cumulative) framerate and the target layer (non-cumulative)
+ // per-frame-bandwidth, for the rate control encoding stats below.
+ const double framerate = cfg->g_timebase.den / cfg->g_timebase.num;
+
+ for (sl = 0; sl < cfg->ss_number_layers; ++sl) {
+ for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
+ const int layer = sl * cfg->ts_number_layers + tl;
+ const int tlayer0 = sl * cfg->ts_number_layers;
+ rc->layer_framerate[layer] =
+ framerate / cfg->ts_rate_decimator[tl];
+ if (tl > 0) {
+ rc->layer_pfb[layer] = 1000.0 *
+ (cfg->layer_target_bitrate[layer] -
+ cfg->layer_target_bitrate[layer - 1]) /
+ (rc->layer_framerate[layer] -
+ rc->layer_framerate[layer - 1]);
+ } else {
+ rc->layer_pfb[tlayer0] = 1000.0 *
+ cfg->layer_target_bitrate[tlayer0] /
+ rc->layer_framerate[tlayer0];
+ }
+ rc->layer_input_frames[layer] = 0;
+ rc->layer_enc_frames[layer] = 0;
+ rc->layer_tot_enc_frames[layer] = 0;
+ rc->layer_encoding_bitrate[layer] = 0.0;
+ rc->layer_avg_frame_size[layer] = 0.0;
+ rc->layer_avg_rate_mismatch[layer] = 0.0;
+ }
+ }
+ rc->window_count = 0;
+ rc->window_size = 15;
+ rc->avg_st_encoding_bitrate = 0.0;
+ rc->variance_st_encoding_bitrate = 0.0;
+}
+
+static void printout_rate_control_summary(struct RateControlStats *rc,
+ vpx_codec_enc_cfg_t *cfg,
+ int frame_cnt) {
+ unsigned int sl, tl;
+ int tot_num_frames = 0;
+ double perc_fluctuation = 0.0;
+ printf("Total number of processed frames: %d\n\n", frame_cnt - 1);
+ printf("Rate control layer stats for sl%d tl%d layer(s):\n\n",
+ cfg->ss_number_layers, cfg->ts_number_layers);
+ for (sl = 0; sl < cfg->ss_number_layers; ++sl) {
+ for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
+ const int layer = sl * cfg->ts_number_layers + tl;
+ const int num_dropped = (tl > 0) ?
+ (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer]) :
+ (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer] - 1);
+ if (!sl)
+ tot_num_frames += rc->layer_input_frames[layer];
+ rc->layer_encoding_bitrate[layer] = 0.001 * rc->layer_framerate[layer] *
+ rc->layer_encoding_bitrate[layer] / tot_num_frames;
+ rc->layer_avg_frame_size[layer] = rc->layer_avg_frame_size[layer] /
+ rc->layer_enc_frames[layer];
+ rc->layer_avg_rate_mismatch[layer] =
+ 100.0 * rc->layer_avg_rate_mismatch[layer] /
+ rc->layer_enc_frames[layer];
+ printf("For layer#: sl%d tl%d \n", sl, tl);
+ printf("Bitrate (target vs actual): %d %f.0 kbps\n",
+ cfg->layer_target_bitrate[layer],
+ rc->layer_encoding_bitrate[layer]);
+ printf("Average frame size (target vs actual): %f %f bits\n",
+ rc->layer_pfb[layer], rc->layer_avg_frame_size[layer]);
+ printf("Average rate_mismatch: %f\n",
+ rc->layer_avg_rate_mismatch[layer]);
+ printf("Number of input frames, encoded (non-key) frames, "
+ "and percent dropped frames: %d %d %f.0 \n",
+ rc->layer_input_frames[layer], rc->layer_enc_frames[layer],
+ 100.0 * num_dropped / rc->layer_input_frames[layer]);
+ printf("\n");
+ }
+ }
+ rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
+ rc->variance_st_encoding_bitrate =
+ rc->variance_st_encoding_bitrate / rc->window_count -
+ (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
+ perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
+ rc->avg_st_encoding_bitrate;
+ printf("Short-time stats, for window of %d frames: \n", rc->window_size);
+ printf("Average, rms-variance, and percent-fluct: %f %f %f \n",
+ rc->avg_st_encoding_bitrate,
+ sqrt(rc->variance_st_encoding_bitrate),
+ perc_fluctuation);
+ if (frame_cnt != tot_num_frames)
+ die("Error: Number of input frames not equal to output encoded frames != "
+ "%d tot_num_frames = %d\n", frame_cnt, tot_num_frames);
+}
+
+vpx_codec_err_t parse_superframe_index(const uint8_t *data,
+ size_t data_sz,
+ uint32_t sizes[8], int *count) {
+ // A chunk ending with a byte matching 0xc0 is an invalid chunk unless
+ // it is a super frame index. If the last byte of real video compression
+ // data is 0xc0 the encoder must add a 0 byte. If we have the marker but
+ // not the associated matching marker byte at the front of the index we have
+ // an invalid bitstream and need to return an error.
+
+ uint8_t marker;
+
+ marker = *(data + data_sz - 1);
+ *count = 0;
+
+
+ if ((marker & 0xe0) == 0xc0) {
+ const uint32_t frames = (marker & 0x7) + 1;
+ const uint32_t mag = ((marker >> 3) & 0x3) + 1;
+ const size_t index_sz = 2 + mag * frames;
+
+ // This chunk is marked as having a superframe index but doesn't have
+ // enough data for it, thus it's an invalid superframe index.
+ if (data_sz < index_sz)
+ return VPX_CODEC_CORRUPT_FRAME;
+
+ {
+ const uint8_t marker2 = *(data + data_sz - index_sz);
+
+ // This chunk is marked as having a superframe index but doesn't have
+ // the matching marker byte at the front of the index therefore it's an
+ // invalid chunk.
+ if (marker != marker2)
+ return VPX_CODEC_CORRUPT_FRAME;
+ }
+
+ {
+ // Found a valid superframe index.
+ uint32_t i, j;
+ const uint8_t *x = &data[data_sz - index_sz + 1];
+
+ for (i = 0; i < frames; ++i) {
+ uint32_t this_sz = 0;
+
+ for (j = 0; j < mag; ++j)
+ this_sz |= (*x++) << (j * 8);
+ sizes[i] = this_sz;
+ }
+ *count = frames;
+ }
+ }
+ return VPX_CODEC_OK;
+}
+#endif
+
int main(int argc, const char **argv) {
AppInput app_input = {0};
VpxVideoWriter *writer = NULL;
@@ -332,7 +555,15 @@ int main(int argc, const char **argv) {
FILE *infile = NULL;
int end_of_stream = 0;
int frames_received = 0;
-
+#if OUTPUT_RC_STATS
+ VpxVideoWriter *outfile[VPX_TS_MAX_LAYERS] = {NULL};
+ struct RateControlStats rc;
+ vpx_svc_layer_id_t layer_id;
+ int sl, tl;
+ double sum_bitrate = 0.0;
+ double sum_bitrate2 = 0.0;
+ double framerate = 30.0;
+#endif
memset(&svc_ctx, 0, sizeof(svc_ctx));
svc_ctx.log_print = 1;
exec_name = argv[0];
@@ -359,6 +590,13 @@ int main(int argc, const char **argv) {
VPX_CODEC_OK)
die("Failed to initialize encoder\n");
+#if OUTPUT_RC_STATS
+ if (svc_ctx.output_rc_stat) {
+ set_rate_control_stats(&rc, &enc_cfg);
+ framerate = enc_cfg.g_timebase.den / enc_cfg.g_timebase.num;
+ }
+#endif
+
info.codec_fourcc = VP9_FOURCC;
info.time_base.numerator = enc_cfg.g_timebase.num;
info.time_base.denominator = enc_cfg.g_timebase.den;
@@ -370,11 +608,31 @@ int main(int argc, const char **argv) {
if (!writer)
die("Failed to open %s for writing\n", app_input.output_filename);
}
+#if OUTPUT_RC_STATS
+ // For now, just write temporal layer streams.
+ // TODO(wonkap): do spatial by re-writing superframe.
+ if (svc_ctx.output_rc_stat) {
+ for (tl = 0; tl < enc_cfg.ts_number_layers; ++tl) {
+ char file_name[PATH_MAX];
+
+ snprintf(file_name, sizeof(file_name), "%s_t%d.ivf",
+ app_input.output_filename, tl);
+ outfile[tl] = vpx_video_writer_open(file_name, kContainerIVF, &info);
+ if (!outfile[tl])
+ die("Failed to open %s for writing", file_name);
+ }
+ }
+#endif
// skip initial frames
for (i = 0; i < app_input.frames_to_skip; ++i)
vpx_img_read(&raw, infile);
+ if (svc_ctx.speed != -1)
+ vpx_codec_control(&codec, VP8E_SET_CPUUSED, svc_ctx.speed);
+ if (svc_ctx.threads)
+ vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (svc_ctx.threads >> 1));
+
// Encode frames
while (!end_of_stream) {
vpx_codec_iter_t iter = NULL;
@@ -386,7 +644,9 @@ int main(int argc, const char **argv) {
}
res = vpx_svc_encode(&svc_ctx, &codec, (end_of_stream ? NULL : &raw),
- pts, frame_duration, VPX_DL_GOOD_QUALITY);
+ pts, frame_duration, svc_ctx.speed >= 5 ?
+ VPX_DL_REALTIME : VPX_DL_GOOD_QUALITY);
+
printf("%s", vpx_svc_get_message(&svc_ctx));
if (res != VPX_CODEC_OK) {
die_codec(&codec, "Failed to encode frame");
@@ -395,11 +655,90 @@ int main(int argc, const char **argv) {
while ((cx_pkt = vpx_codec_get_cx_data(&codec, &iter)) != NULL) {
switch (cx_pkt->kind) {
case VPX_CODEC_CX_FRAME_PKT: {
- if (cx_pkt->data.frame.sz > 0)
+ if (cx_pkt->data.frame.sz > 0) {
+#if OUTPUT_RC_STATS
+ uint32_t sizes[8];
+ int count = 0;
+#endif
vpx_video_writer_write_frame(writer,
cx_pkt->data.frame.buf,
cx_pkt->data.frame.sz,
cx_pkt->data.frame.pts);
+#if OUTPUT_RC_STATS
+ // TODO(marpan/wonkap): Put this (to line728) in separate function.
+ if (svc_ctx.output_rc_stat) {
+ vpx_codec_control(&codec, VP9E_GET_SVC_LAYER_ID, &layer_id);
+ parse_superframe_index(cx_pkt->data.frame.buf,
+ cx_pkt->data.frame.sz, sizes, &count);
+ for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
+ ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers +
+ layer_id.temporal_layer_id];
+ }
+ for (tl = layer_id.temporal_layer_id;
+ tl < enc_cfg.ts_number_layers; ++tl) {
+ vpx_video_writer_write_frame(outfile[tl],
+ cx_pkt->data.frame.buf,
+ cx_pkt->data.frame.sz,
+ cx_pkt->data.frame.pts);
+ }
+
+ for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
+ for (tl = layer_id.temporal_layer_id;
+ tl < enc_cfg.ts_number_layers; ++tl) {
+ const int layer = sl * enc_cfg.ts_number_layers + tl;
+ ++rc.layer_tot_enc_frames[layer];
+ rc.layer_encoding_bitrate[layer] += 8.0 * sizes[sl];
+ // Keep count of rate control stats per layer, for non-key
+ // frames.
+ if (tl == layer_id.temporal_layer_id &&
+ !(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)) {
+ rc.layer_avg_frame_size[layer] += 8.0 * sizes[sl];
+ rc.layer_avg_rate_mismatch[layer] +=
+ fabs(8.0 * sizes[sl] - rc.layer_pfb[layer]) /
+ rc.layer_pfb[layer];
+ ++rc.layer_enc_frames[layer];
+ }
+ }
+ }
+
+ // Update for short-time encoding bitrate states, for moving
+ // window of size rc->window, shifted by rc->window / 2.
+ // Ignore first window segment, due to key frame.
+ if (frame_cnt > rc.window_size) {
+ tl = layer_id.temporal_layer_id;
+ for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
+ sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate;
+ }
+ if (frame_cnt % rc.window_size == 0) {
+ rc.window_count += 1;
+ rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
+ rc.variance_st_encoding_bitrate +=
+ (sum_bitrate / rc.window_size) *
+ (sum_bitrate / rc.window_size);
+ sum_bitrate = 0.0;
+ }
+ }
+
+ // Second shifted window.
+ if (frame_cnt > rc.window_size + rc.window_size / 2) {
+ tl = layer_id.temporal_layer_id;
+ for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
+ sum_bitrate2 += 0.001 * 8.0 * sizes[sl] * framerate;
+ }
+
+ if (frame_cnt > 2 * rc.window_size &&
+ frame_cnt % rc.window_size == 0) {
+ rc.window_count += 1;
+ rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
+ rc.variance_st_encoding_bitrate +=
+ (sum_bitrate2 / rc.window_size) *
+ (sum_bitrate2 / rc.window_size);
+ sum_bitrate2 = 0.0;
+ }
+ }
+ }
+#endif
+ }
printf("SVC frame: %d, kf: %d, size: %d, pts: %d\n", frames_received,
!!(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY),
@@ -424,25 +763,30 @@ int main(int argc, const char **argv) {
pts += frame_duration;
}
}
-
printf("Processed %d frames\n", frame_cnt);
-
fclose(infile);
+#if OUTPUT_RC_STATS
+ if (svc_ctx.output_rc_stat) {
+ printout_rate_control_summary(&rc, &enc_cfg, frame_cnt);
+ printf("\n");
+ }
+#endif
if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec");
-
if (app_input.passes == 2)
stats_close(&app_input.rc_stats, 1);
-
if (writer) {
vpx_video_writer_close(writer);
}
-
+#if OUTPUT_RC_STATS
+ if (svc_ctx.output_rc_stat) {
+ for (tl = 0; tl < enc_cfg.ts_number_layers; ++tl) {
+ vpx_video_writer_close(outfile[tl]);
+ }
+ }
+#endif
vpx_img_free(&raw);
-
// display average size, psnr
printf("%s", vpx_svc_dump_statistics(&svc_ctx));
-
vpx_svc_release(&svc_ctx);
-
return EXIT_SUCCESS;
}
diff --git a/chromium/third_party/libvpx/source/libvpx/examples/vpx_temporal_svc_encoder.c b/chromium/third_party/libvpx/source/libvpx/examples/vpx_temporal_svc_encoder.c
index 349875997b5..484deb5b360 100644
--- a/chromium/third_party/libvpx/source/libvpx/examples/vpx_temporal_svc_encoder.c
+++ b/chromium/third_party/libvpx/source/libvpx/examples/vpx_temporal_svc_encoder.c
@@ -28,7 +28,7 @@
static const char *exec_name;
-void usage_exit() {
+void usage_exit(void) {
exit(EXIT_FAILURE);
}
@@ -70,6 +70,7 @@ struct RateControlMetrics {
int window_size;
// Number of window measurements.
int window_count;
+ int layer_target_bitrate[VPX_MAX_LAYERS];
};
// Note: these rate control metrics assume only 1 key frame in the
@@ -85,13 +86,13 @@ static void set_rate_control_metrics(struct RateControlMetrics *rc,
// per-frame-bandwidth, for the rate control encoding stats below.
const double framerate = cfg->g_timebase.den / cfg->g_timebase.num;
rc->layer_framerate[0] = framerate / cfg->ts_rate_decimator[0];
- rc->layer_pfb[0] = 1000.0 * cfg->ts_target_bitrate[0] /
+ rc->layer_pfb[0] = 1000.0 * rc->layer_target_bitrate[0] /
rc->layer_framerate[0];
for (i = 0; i < cfg->ts_number_layers; ++i) {
if (i > 0) {
rc->layer_framerate[i] = framerate / cfg->ts_rate_decimator[i];
rc->layer_pfb[i] = 1000.0 *
- (cfg->ts_target_bitrate[i] - cfg->ts_target_bitrate[i - 1]) /
+ (rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) /
(rc->layer_framerate[i] - rc->layer_framerate[i - 1]);
}
rc->layer_input_frames[i] = 0;
@@ -128,7 +129,7 @@ static void printout_rate_control_summary(struct RateControlMetrics *rc,
rc->layer_avg_rate_mismatch[i] = 100.0 * rc->layer_avg_rate_mismatch[i] /
rc->layer_enc_frames[i];
printf("For layer#: %d \n", i);
- printf("Bitrate (target vs actual): %d %f \n", cfg->ts_target_bitrate[i],
+ printf("Bitrate (target vs actual): %d %f \n", rc->layer_target_bitrate[i],
rc->layer_encoding_bitrate[i]);
printf("Average frame size (target vs actual): %f %f \n", rc->layer_pfb[i],
rc->layer_avg_frame_size[i]);
@@ -597,13 +598,16 @@ int main(int argc, char **argv) {
for (i = min_args_base;
(int)i < min_args_base + mode_to_num_layers[layering_mode];
++i) {
- cfg.ts_target_bitrate[i - 11] = strtol(argv[i], NULL, 0);
+ rc.layer_target_bitrate[i - 11] = strtol(argv[i], NULL, 0);
+ if (strncmp(encoder->name, "vp8", 3) == 0)
+ cfg.ts_target_bitrate[i - 11] = rc.layer_target_bitrate[i - 11];
+ else if (strncmp(encoder->name, "vp9", 3) == 0)
+ cfg.layer_target_bitrate[i - 11] = rc.layer_target_bitrate[i - 11];
}
// Real time parameters.
cfg.rc_dropframe_thresh = strtol(argv[9], NULL, 0);
cfg.rc_end_usage = VPX_CBR;
- cfg.rc_resize_allowed = 0;
cfg.rc_min_quantizer = 2;
cfg.rc_max_quantizer = 56;
if (strncmp(encoder->name, "vp9", 3) == 0)
@@ -614,6 +618,9 @@ int main(int argc, char **argv) {
cfg.rc_buf_optimal_sz = 600;
cfg.rc_buf_sz = 1000;
+ // Disable dynamic resizing by default.
+ cfg.rc_resize_allowed = 0;
+
// Use 1 thread as default.
cfg.g_threads = 1;
@@ -625,6 +632,8 @@ int main(int argc, char **argv) {
// Disable automatic keyframe placement.
cfg.kf_min_dist = cfg.kf_max_dist = 3000;
+ cfg.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
+
set_temporal_layer_pattern(layering_mode,
&cfg,
layer_flags,
@@ -633,8 +642,8 @@ int main(int argc, char **argv) {
set_rate_control_metrics(&rc, &cfg);
// Target bandwidth for the whole stream.
- // Set to ts_target_bitrate for highest layer (total bitrate).
- cfg.rc_target_bitrate = cfg.ts_target_bitrate[cfg.ts_number_layers - 1];
+ // Set to layer_target_bitrate for highest layer (total bitrate).
+ cfg.rc_target_bitrate = rc.layer_target_bitrate[cfg.ts_number_layers - 1];
// Open input file.
if (!(infile = fopen(argv[1], "rb"))) {
@@ -677,15 +686,22 @@ int main(int argc, char **argv) {
vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kDenoiserOff);
vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 0);
} else if (strncmp(encoder->name, "vp9", 3) == 0) {
- vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed);
- vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
- vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0);
- vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, 0);
- vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 0);
- vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1));
- if (vpx_codec_control(&codec, VP9E_SET_SVC, layering_mode > 0 ? 1: 0)) {
- die_codec(&codec, "Failed to set SVC");
+ vpx_svc_extra_cfg_t svc_params;
+ vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed);
+ vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
+ vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0);
+ vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, 0);
+ vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 0);
+ vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1));
+ if (vpx_codec_control(&codec, VP9E_SET_SVC, layering_mode > 0 ? 1: 0))
+ die_codec(&codec, "Failed to set SVC");
+ for (i = 0; i < cfg.ts_number_layers; ++i) {
+ svc_params.max_quantizers[i] = cfg.rc_max_quantizer;
+ svc_params.min_quantizers[i] = cfg.rc_min_quantizer;
}
+ svc_params.scaling_factor_num[0] = cfg.g_h;
+ svc_params.scaling_factor_den[0] = cfg.g_h;
+ vpx_codec_control(&codec, VP9E_SET_SVC_PARAMETERS, &svc_params);
}
if (strncmp(encoder->name, "vp8", 3) == 0) {
vpx_codec_control(&codec, VP8E_SET_SCREEN_CONTENT_MODE, 0);
diff --git a/chromium/third_party/libvpx/source/libvpx/libs.mk b/chromium/third_party/libvpx/source/libvpx/libs.mk
index 6eee0039c29..6215990c9bf 100644
--- a/chromium/third_party/libvpx/source/libvpx/libs.mk
+++ b/chromium/third_party/libvpx/source/libvpx/libs.mk
@@ -25,7 +25,7 @@ $$(BUILD_PFX)$(1).h: $$(SRC_PATH_BARE)/$(2)
@echo " [CREATE] $$@"
$$(qexec)$$(SRC_PATH_BARE)/build/make/rtcd.pl --arch=$$(TGT_ISA) \
--sym=$(1) \
- --config=$$(CONFIG_DIR)$$(target)$$(if $$(FAT_ARCHS),,-$$(TOOLCHAIN)).mk \
+ --config=$$(CONFIG_DIR)$$(target)-$$(TOOLCHAIN).mk \
$$(RTCD_OPTIONS) $$^ > $$@
CLEAN-OBJS += $$(BUILD_PFX)$(1).h
RTCD += $$(BUILD_PFX)$(1).h
@@ -34,13 +34,6 @@ endef
CODEC_SRCS-yes += CHANGELOG
CODEC_SRCS-yes += libs.mk
-# If this is a universal (fat) binary, then all the subarchitectures have
-# already been built and our job is to stitch them together. The
-# BUILD_LIBVPX variable indicates whether we should be building
-# (compiling, linking) the library. The LIPO_LIBVPX variable indicates
-# that we're stitching.
-$(eval $(if $(filter universal%,$(TOOLCHAIN)),LIPO_LIBVPX,BUILD_LIBVPX):=yes)
-
include $(SRC_PATH_BARE)/vpx/vpx_codec.mk
CODEC_SRCS-yes += $(addprefix vpx/,$(call enabled,API_SRCS))
CODEC_DOC_SRCS += $(addprefix vpx/,$(call enabled,API_DOC_SRCS))
@@ -140,18 +133,18 @@ INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/% $(p)/Release/%)
INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/% $(p)/Debug/%)
endif
-CODEC_SRCS-$(BUILD_LIBVPX) += build/make/version.sh
-CODEC_SRCS-$(BUILD_LIBVPX) += build/make/rtcd.pl
-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/emmintrin_compat.h
-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/mem_ops.h
-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/mem_ops_aligned.h
-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/vpx_once.h
-CODEC_SRCS-$(BUILD_LIBVPX) += $(BUILD_PFX)vpx_config.c
+CODEC_SRCS-yes += build/make/version.sh
+CODEC_SRCS-yes += build/make/rtcd.pl
+CODEC_SRCS-yes += vpx_ports/emmintrin_compat.h
+CODEC_SRCS-yes += vpx_ports/mem_ops.h
+CODEC_SRCS-yes += vpx_ports/mem_ops_aligned.h
+CODEC_SRCS-yes += vpx_ports/vpx_once.h
+CODEC_SRCS-yes += $(BUILD_PFX)vpx_config.c
INSTALL-SRCS-no += $(BUILD_PFX)vpx_config.c
ifeq ($(ARCH_X86)$(ARCH_X86_64),yes)
INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += third_party/x86inc/x86inc.asm
endif
-CODEC_EXPORTS-$(BUILD_LIBVPX) += vpx/exports_com
+CODEC_EXPORTS-yes += vpx/exports_com
CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc
CODEC_EXPORTS-$(CONFIG_DECODERS) += vpx/exports_dec
@@ -218,7 +211,7 @@ vpx.$(VCPROJ_SFX): $(CODEC_SRCS) vpx.def
$(filter-out $(addprefix %, $(ASM_INCLUDES)), $^) \
--src-path-bare="$(SRC_PATH_BARE)" \
-PROJECTS-$(BUILD_LIBVPX) += vpx.$(VCPROJ_SFX)
+PROJECTS-yes += vpx.$(VCPROJ_SFX)
vpx.$(VCPROJ_SFX): vpx_config.asm
vpx.$(VCPROJ_SFX): $(RTCD)
@@ -226,31 +219,39 @@ vpx.$(VCPROJ_SFX): $(RTCD)
endif
else
LIBVPX_OBJS=$(call objs,$(CODEC_SRCS))
-OBJS-$(BUILD_LIBVPX) += $(LIBVPX_OBJS)
-LIBS-$(if $(BUILD_LIBVPX),$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a
+OBJS-yes += $(LIBVPX_OBJS)
+LIBS-$(if yes,$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a
$(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS)
-
-BUILD_LIBVPX_SO := $(if $(BUILD_LIBVPX),$(CONFIG_SHARED))
-
SO_VERSION_MAJOR := 2
SO_VERSION_MINOR := 0
SO_VERSION_PATCH := 0
ifeq ($(filter darwin%,$(TGT_OS)),$(TGT_OS))
LIBVPX_SO := libvpx.$(SO_VERSION_MAJOR).dylib
+SHARED_LIB_SUF := .dylib
EXPORT_FILE := libvpx.syms
LIBVPX_SO_SYMLINKS := $(addprefix $(LIBSUBDIR)/, \
libvpx.dylib )
else
+ifeq ($(filter os2%,$(TGT_OS)),$(TGT_OS))
+LIBVPX_SO := libvpx$(SO_VERSION_MAJOR).dll
+SHARED_LIB_SUF := _dll.a
+EXPORT_FILE := libvpx.def
+LIBVPX_SO_SYMLINKS :=
+LIBVPX_SO_IMPLIB := libvpx_dll.a
+else
LIBVPX_SO := libvpx.so.$(SO_VERSION_MAJOR).$(SO_VERSION_MINOR).$(SO_VERSION_PATCH)
+SHARED_LIB_SUF := .so
EXPORT_FILE := libvpx.ver
LIBVPX_SO_SYMLINKS := $(addprefix $(LIBSUBDIR)/, \
libvpx.so libvpx.so.$(SO_VERSION_MAJOR) \
libvpx.so.$(SO_VERSION_MAJOR).$(SO_VERSION_MINOR))
endif
+endif
-LIBS-$(BUILD_LIBVPX_SO) += $(BUILD_PFX)$(LIBVPX_SO)\
- $(notdir $(LIBVPX_SO_SYMLINKS))
+LIBS-$(CONFIG_SHARED) += $(BUILD_PFX)$(LIBVPX_SO)\
+ $(notdir $(LIBVPX_SO_SYMLINKS)) \
+ $(if $(LIBVPX_SO_IMPLIB), $(BUILD_PFX)$(LIBVPX_SO_IMPLIB))
$(BUILD_PFX)$(LIBVPX_SO): $(LIBVPX_OBJS) $(EXPORT_FILE)
$(BUILD_PFX)$(LIBVPX_SO): extralibs += -lm
$(BUILD_PFX)$(LIBVPX_SO): SONAME = libvpx.so.$(SO_VERSION_MAJOR)
@@ -268,6 +269,19 @@ libvpx.syms: $(call enabled,CODEC_EXPORTS)
$(qexec)awk '{print "_"$$2}' $^ >$@
CLEAN-OBJS += libvpx.syms
+libvpx.def: $(call enabled,CODEC_EXPORTS)
+ @echo " [CREATE] $@"
+ $(qexec)echo LIBRARY $(LIBVPX_SO:.dll=) INITINSTANCE TERMINSTANCE > $@
+ $(qexec)echo "DATA MULTIPLE NONSHARED" >> $@
+ $(qexec)echo "EXPORTS" >> $@
+ $(qexec)awk '!/vpx_svc_*/ {print "_"$$2}' $^ >>$@
+CLEAN-OBJS += libvpx.def
+
+libvpx_dll.a: $(LIBVPX_SO)
+ @echo " [IMPLIB] $@"
+ $(qexec)emximp -o $@ $<
+CLEAN-OBJS += libvpx_dll.a
+
define libvpx_symlink_template
$(1): $(2)
@echo " [LN] $(2) $$@"
@@ -283,11 +297,12 @@ $(eval $(call libvpx_symlink_template,\
$(LIBVPX_SO)))
-INSTALL-LIBS-$(BUILD_LIBVPX_SO) += $(LIBVPX_SO_SYMLINKS)
-INSTALL-LIBS-$(BUILD_LIBVPX_SO) += $(LIBSUBDIR)/$(LIBVPX_SO)
+INSTALL-LIBS-$(CONFIG_SHARED) += $(LIBVPX_SO_SYMLINKS)
+INSTALL-LIBS-$(CONFIG_SHARED) += $(LIBSUBDIR)/$(LIBVPX_SO)
+INSTALL-LIBS-$(CONFIG_SHARED) += $(if $(LIBVPX_SO_IMPLIB),$(LIBSUBDIR)/$(LIBVPX_SO_IMPLIB))
-LIBS-$(BUILD_LIBVPX) += vpx.pc
+LIBS-yes += vpx.pc
vpx.pc: config.mk libs.mk
@echo " [CREATE] $@"
$(qexec)echo '# pkg-config file from libvpx $(VERSION_STRING)' > $@
@@ -313,9 +328,6 @@ INSTALL_MAPS += $(LIBSUBDIR)/pkgconfig/%.pc %.pc
CLEAN-OBJS += vpx.pc
endif
-LIBS-$(LIPO_LIBVPX) += libvpx.a
-$(eval $(if $(LIPO_LIBVPX),$(call lipo_lib_template,libvpx.a)))
-
#
# Rule to make assembler configuration file from C configuration file
#
@@ -354,11 +366,15 @@ LIBVPX_TEST_DATA_PATH ?= .
include $(SRC_PATH_BARE)/test/test.mk
LIBVPX_TEST_SRCS=$(addprefix test/,$(call enabled,LIBVPX_TEST_SRCS))
-LIBVPX_TEST_BINS=./test_libvpx$(EXE_SFX)
+LIBVPX_TEST_BIN=./test_libvpx$(EXE_SFX)
LIBVPX_TEST_DATA=$(addprefix $(LIBVPX_TEST_DATA_PATH)/,\
$(call enabled,LIBVPX_TEST_DATA))
libvpx_test_data_url=http://downloads.webmproject.org/test_data/libvpx/$(1)
+TEST_INTRA_PRED_SPEED_BIN=./test_intra_pred_speed$(EXE_SFX)
+TEST_INTRA_PRED_SPEED_SRCS=$(addprefix test/,$(call enabled,TEST_INTRA_PRED_SPEED_SRCS))
+TEST_INTRA_PRED_SPEED_OBJS := $(sort $(call objs,$(TEST_INTRA_PRED_SPEED_SRCS)))
+
libvpx_test_srcs.txt:
@echo " [CREATE] $@"
@echo $(LIBVPX_TEST_SRCS) | xargs -n1 echo | LC_ALL=C sort -u > $@
@@ -422,7 +438,25 @@ test_libvpx.$(VCPROJ_SFX): $(LIBVPX_TEST_SRCS) vpx.$(VCPROJ_SFX) gtest.$(VCPROJ_
PROJECTS-$(CONFIG_MSVS) += test_libvpx.$(VCPROJ_SFX)
-LIBVPX_TEST_BINS := $(addprefix $(TGT_OS:win64=x64)/Release/,$(notdir $(LIBVPX_TEST_BINS)))
+LIBVPX_TEST_BIN := $(addprefix $(TGT_OS:win64=x64)/Release/,$(notdir $(LIBVPX_TEST_BIN)))
+
+ifneq ($(strip $(TEST_INTRA_PRED_SPEED_OBJS)),)
+PROJECTS-$(CONFIG_MSVS) += test_intra_pred_speed.$(VCPROJ_SFX)
+test_intra_pred_speed.$(VCPROJ_SFX): $(TEST_INTRA_PRED_SPEED_SRCS) vpx.$(VCPROJ_SFX) gtest.$(VCPROJ_SFX)
+ @echo " [CREATE] $@"
+ $(qexec)$(GEN_VCPROJ) \
+ --exe \
+ --target=$(TOOLCHAIN) \
+ --name=test_intra_pred_speed \
+ -D_VARIADIC_MAX=10 \
+ --proj-guid=CD837F5F-52D8-4314-A370-895D614166A7 \
+ --ver=$(CONFIG_VS_VERSION) \
+ --src-path-bare="$(SRC_PATH_BARE)" \
+ $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
+ --out=$@ $(INTERNAL_CFLAGS) $(CFLAGS) \
+ -I. -I"$(SRC_PATH_BARE)/third_party/googletest/src/include" \
+ -L. -l$(CODEC_LIB) -l$(GTEST_LIB) $^
+endif # TEST_INTRA_PRED_SPEED
endif
else
@@ -433,45 +467,54 @@ ifeq ($(filter win%,$(TGT_OS)),$(TGT_OS))
# Disabling pthreads globally will cause issues on darwin and possibly elsewhere
$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -DGTEST_HAS_PTHREAD=0
endif
-$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src
-$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
-OBJS-$(BUILD_LIBVPX) += $(GTEST_OBJS)
-LIBS-$(BUILD_LIBVPX) += $(BUILD_PFX)libgtest.a $(BUILD_PFX)libgtest_g.a
+GTEST_INCLUDES := -I$(SRC_PATH_BARE)/third_party/googletest/src
+GTEST_INCLUDES += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
+$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += $(GTEST_INCLUDES)
+OBJS-yes += $(GTEST_OBJS)
+LIBS-yes += $(BUILD_PFX)libgtest.a $(BUILD_PFX)libgtest_g.a
$(BUILD_PFX)libgtest_g.a: $(GTEST_OBJS)
LIBVPX_TEST_OBJS=$(sort $(call objs,$(LIBVPX_TEST_SRCS)))
-$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src
-$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
-OBJS-$(BUILD_LIBVPX) += $(LIBVPX_TEST_OBJS)
-BINS-$(BUILD_LIBVPX) += $(LIBVPX_TEST_BINS)
+$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CXXFLAGS += $(GTEST_INCLUDES)
+OBJS-yes += $(LIBVPX_TEST_OBJS)
+BINS-yes += $(LIBVPX_TEST_BIN)
CODEC_LIB=$(if $(CONFIG_DEBUG_LIBS),vpx_g,vpx)
-CODEC_LIB_SUF=$(if $(CONFIG_SHARED),.so,.a)
-$(foreach bin,$(LIBVPX_TEST_BINS),\
- $(if $(BUILD_LIBVPX),$(eval $(bin): \
- lib$(CODEC_LIB)$(CODEC_LIB_SUF) libgtest.a ))\
- $(if $(BUILD_LIBVPX),$(eval $(call linkerxx_template,$(bin),\
- $(LIBVPX_TEST_OBJS) \
- -L. -lvpx -lgtest $(extralibs) -lm)\
- )))\
- $(if $(LIPO_LIBS),$(eval $(call lipo_bin_template,$(bin))))\
-
-endif
+CODEC_LIB_SUF=$(if $(CONFIG_SHARED),$(SHARED_LIB_SUF),.a)
+TEST_LIBS := lib$(CODEC_LIB)$(CODEC_LIB_SUF) libgtest.a
+$(LIBVPX_TEST_BIN): $(TEST_LIBS)
+$(eval $(call linkerxx_template,$(LIBVPX_TEST_BIN), \
+ $(LIBVPX_TEST_OBJS) \
+ -L. -lvpx -lgtest $(extralibs) -lm))
+
+ifneq ($(strip $(TEST_INTRA_PRED_SPEED_OBJS)),)
+$(TEST_INTRA_PRED_SPEED_OBJS) $(TEST_INTRA_PRED_SPEED_OBJS:.o=.d): CXXFLAGS += $(GTEST_INCLUDES)
+OBJS-yes += $(TEST_INTRA_PRED_SPEED_OBJS)
+BINS-yes += $(TEST_INTRA_PRED_SPEED_BIN)
+
+$(TEST_INTRA_PRED_SPEED_BIN): $(TEST_LIBS)
+$(eval $(call linkerxx_template,$(TEST_INTRA_PRED_SPEED_BIN), \
+ $(TEST_INTRA_PRED_SPEED_OBJS) \
+ -L. -lvpx -lgtest $(extralibs) -lm))
+endif # TEST_INTRA_PRED_SPEED
+
+endif # CONFIG_UNIT_TESTS
# Install test sources only if codec source is included
INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(patsubst $(SRC_PATH_BARE)/%,%,\
$(shell find $(SRC_PATH_BARE)/third_party/googletest -type f))
INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(LIBVPX_TEST_SRCS)
+INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(TEST_INTRA_PRED_SPEED_SRCS)
define test_shard_template
test:: test_shard.$(1)
-test_shard.$(1): $(LIBVPX_TEST_BINS) testdata
+test-no-data-check:: test_shard_ndc.$(1)
+test_shard.$(1) test_shard_ndc.$(1): $(LIBVPX_TEST_BIN)
@set -e; \
- for t in $(LIBVPX_TEST_BINS); do \
- export GTEST_SHARD_INDEX=$(1); \
- export GTEST_TOTAL_SHARDS=$(2); \
- $$$$t; \
- done
+ export GTEST_SHARD_INDEX=$(1); \
+ export GTEST_TOTAL_SHARDS=$(2); \
+ $(LIBVPX_TEST_BIN)
+test_shard.$(1): testdata
.PHONY: test_shard.$(1)
endef
@@ -516,15 +559,16 @@ ifeq ($(CONFIG_MSVS),yes)
# TODO(tomfinegan): Support running the debug versions of tools?
TEST_BIN_PATH := $(addsuffix /$(TGT_OS:win64=x64)/Release, $(TEST_BIN_PATH))
endif
-utiltest: testdata
+utiltest utiltest-no-data-check:
$(qexec)$(SRC_PATH_BARE)/test/vpxdec.sh \
--test-data-path $(LIBVPX_TEST_DATA_PATH) \
--bin-path $(TEST_BIN_PATH)
$(qexec)$(SRC_PATH_BARE)/test/vpxenc.sh \
--test-data-path $(LIBVPX_TEST_DATA_PATH) \
--bin-path $(TEST_BIN_PATH)
+utiltest: testdata
else
-utiltest:
+utiltest utiltest-no-data-check:
@echo Unit tests must be enabled to make the utiltest target.
endif
@@ -542,11 +586,12 @@ ifeq ($(CONFIG_MSVS),yes)
# TODO(tomfinegan): Support running the debug versions of tools?
EXAMPLES_BIN_PATH := $(TGT_OS:win64=x64)/Release
endif
-exampletest: examples testdata
+exampletest exampletest-no-data-check: examples
$(qexec)$(SRC_PATH_BARE)/test/examples.sh \
--test-data-path $(LIBVPX_TEST_DATA_PATH) \
--bin-path $(EXAMPLES_BIN_PATH)
+exampletest: testdata
else
-exampletest:
+exampletest exampletest-no-data-check:
@echo Unit tests must be enabled to make the exampletest target.
endif
diff --git a/chromium/third_party/libvpx/source/libvpx/md5_utils.c b/chromium/third_party/libvpx/source/libvpx/md5_utils.c
index 8fb26e2084b..f4f893a2d67 100644
--- a/chromium/third_party/libvpx/source/libvpx/md5_utils.c
+++ b/chromium/third_party/libvpx/source/libvpx/md5_utils.c
@@ -24,7 +24,7 @@
#include "md5_utils.h"
-void
+static void
byteSwap(UWORD32 *buf, unsigned words) {
md5byte *p;
diff --git a/chromium/third_party/libvpx/source/libvpx/rate_hist.c b/chromium/third_party/libvpx/source/libvpx/rate_hist.c
index 1cef19bdd64..a77222b1618 100644
--- a/chromium/third_party/libvpx/source/libvpx/rate_hist.c
+++ b/chromium/third_party/libvpx/source/libvpx/rate_hist.c
@@ -88,6 +88,9 @@ void update_rate_histogram(struct rate_hist *hist,
if (now < cfg->rc_buf_initial_sz)
return;
+ if (!cfg->rc_target_bitrate)
+ return;
+
then = now;
/* Sum the size over the past rc_buf_sz ms */
diff --git a/chromium/third_party/libvpx/source/libvpx/tools_common.c b/chromium/third_party/libvpx/source/libvpx/tools_common.c
index e243a91575a..901734e0f34 100644
--- a/chromium/third_party/libvpx/source/libvpx/tools_common.c
+++ b/chromium/third_party/libvpx/source/libvpx/tools_common.c
@@ -140,7 +140,7 @@ static const VpxInterface vpx_encoders[] = {
#endif
};
-int get_vpx_encoder_count() {
+int get_vpx_encoder_count(void) {
return sizeof(vpx_encoders) / sizeof(vpx_encoders[0]);
}
@@ -170,7 +170,7 @@ static const VpxInterface vpx_decoders[] = {
#endif
};
-int get_vpx_decoder_count() {
+int get_vpx_decoder_count(void) {
return sizeof(vpx_decoders) / sizeof(vpx_decoders[0]);
}
diff --git a/chromium/third_party/libvpx/source/libvpx/tools_common.h b/chromium/third_party/libvpx/source/libvpx/tools_common.h
index de6c38f0f5b..adccec88bb3 100644
--- a/chromium/third_party/libvpx/source/libvpx/tools_common.h
+++ b/chromium/third_party/libvpx/source/libvpx/tools_common.h
@@ -16,6 +16,7 @@
#include "vpx/vpx_codec.h"
#include "vpx/vpx_image.h"
#include "vpx/vpx_integer.h"
+#include "vpx_ports/msvc.h"
#if CONFIG_ENCODERS
#include "./y4minput.h"
@@ -34,7 +35,6 @@
#if CONFIG_OS_SUPPORT
#if defined(_MSC_VER)
#include <io.h> /* NOLINT */
-#define snprintf _snprintf
#define isatty _isatty
#define fileno _fileno
#else
@@ -89,6 +89,7 @@ struct VpxInputContext {
enum VideoFileType file_type;
uint32_t width;
uint32_t height;
+ struct VpxRational pixel_aspect_ratio;
vpx_img_fmt_t fmt;
vpx_bit_depth_t bit_depth;
int only_i420;
@@ -119,7 +120,7 @@ void warn(const char *fmt, ...);
void die_codec(vpx_codec_ctx_t *ctx, const char *s) VPX_NO_RETURN;
/* The tool including this file must define usage_exit() */
-void usage_exit() VPX_NO_RETURN;
+void usage_exit(void) VPX_NO_RETURN;
#undef VPX_NO_RETURN
@@ -131,11 +132,11 @@ typedef struct VpxInterface {
vpx_codec_iface_t *(*const codec_interface)();
} VpxInterface;
-int get_vpx_encoder_count();
+int get_vpx_encoder_count(void);
const VpxInterface *get_vpx_encoder_by_index(int i);
const VpxInterface *get_vpx_encoder_by_name(const char *name);
-int get_vpx_decoder_count();
+int get_vpx_decoder_count(void);
const VpxInterface *get_vpx_decoder_by_index(int i);
const VpxInterface *get_vpx_decoder_by_name(const char *name);
const VpxInterface *get_vpx_decoder_by_fourcc(uint32_t fourcc);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/alloccommon.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/alloccommon.c
index b9d875a2ff7..8dfd4ce203e 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/alloccommon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/alloccommon.c
@@ -10,6 +10,7 @@
#include "vpx_config.h"
+#include "alloccommon.h"
#include "blockd.h"
#include "vpx_mem/vpx_mem.h"
#include "onyxc_int.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm b/chromium/third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm
deleted file mode 100644
index 39919579f80..00000000000
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance16x16_armv6.asm
+++ /dev/null
@@ -1,154 +0,0 @@
-;
-; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_variance16x16_armv6|
-
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-
-; r0 unsigned char *src_ptr
-; r1 int source_stride
-; r2 unsigned char *ref_ptr
-; r3 int recon_stride
-; stack unsigned int *sse
-|vp8_variance16x16_armv6| PROC
-
- stmfd sp!, {r4-r12, lr}
-
- pld [r0, r1, lsl #0]
- pld [r2, r3, lsl #0]
-
- mov r8, #0 ; initialize sum = 0
- mov r11, #0 ; initialize sse = 0
- mov r12, #16 ; set loop counter to 16 (=block height)
-
-loop
- ; 1st 4 pixels
- ldr r4, [r0, #0] ; load 4 src pixels
- ldr r5, [r2, #0] ; load 4 ref pixels
-
- mov lr, #0 ; constant zero
-
- usub8 r6, r4, r5 ; calculate difference
- pld [r0, r1, lsl #1]
- sel r7, r6, lr ; select bytes with positive difference
- usub8 r9, r5, r4 ; calculate difference with reversed operands
- pld [r2, r3, lsl #1]
- sel r6, r9, lr ; select bytes with negative difference
-
- ; calculate partial sums
- usad8 r4, r7, lr ; calculate sum of positive differences
- usad8 r5, r6, lr ; calculate sum of negative differences
- orr r6, r6, r7 ; differences of all 4 pixels
- ; calculate total sum
- adds r8, r8, r4 ; add positive differences to sum
- subs r8, r8, r5 ; subtract negative differences from sum
-
- ; calculate sse
- uxtb16 r5, r6 ; byte (two pixels) to halfwords
- uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
- smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
-
- ; 2nd 4 pixels
- ldr r4, [r0, #4] ; load 4 src pixels
- ldr r5, [r2, #4] ; load 4 ref pixels
- smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
-
- usub8 r6, r4, r5 ; calculate difference
- sel r7, r6, lr ; select bytes with positive difference
- usub8 r9, r5, r4 ; calculate difference with reversed operands
- sel r6, r9, lr ; select bytes with negative difference
-
- ; calculate partial sums
- usad8 r4, r7, lr ; calculate sum of positive differences
- usad8 r5, r6, lr ; calculate sum of negative differences
- orr r6, r6, r7 ; differences of all 4 pixels
-
- ; calculate total sum
- add r8, r8, r4 ; add positive differences to sum
- sub r8, r8, r5 ; subtract negative differences from sum
-
- ; calculate sse
- uxtb16 r5, r6 ; byte (two pixels) to halfwords
- uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
- smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
-
- ; 3rd 4 pixels
- ldr r4, [r0, #8] ; load 4 src pixels
- ldr r5, [r2, #8] ; load 4 ref pixels
- smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
-
- usub8 r6, r4, r5 ; calculate difference
- sel r7, r6, lr ; select bytes with positive difference
- usub8 r9, r5, r4 ; calculate difference with reversed operands
- sel r6, r9, lr ; select bytes with negative difference
-
- ; calculate partial sums
- usad8 r4, r7, lr ; calculate sum of positive differences
- usad8 r5, r6, lr ; calculate sum of negative differences
- orr r6, r6, r7 ; differences of all 4 pixels
-
- ; calculate total sum
- add r8, r8, r4 ; add positive differences to sum
- sub r8, r8, r5 ; subtract negative differences from sum
-
- ; calculate sse
- uxtb16 r5, r6 ; byte (two pixels) to halfwords
- uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
- smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
-
- ; 4th 4 pixels
- ldr r4, [r0, #12] ; load 4 src pixels
- ldr r5, [r2, #12] ; load 4 ref pixels
- smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
-
- usub8 r6, r4, r5 ; calculate difference
- add r0, r0, r1 ; set src_ptr to next row
- sel r7, r6, lr ; select bytes with positive difference
- usub8 r9, r5, r4 ; calculate difference with reversed operands
- add r2, r2, r3 ; set dst_ptr to next row
- sel r6, r9, lr ; select bytes with negative difference
-
- ; calculate partial sums
- usad8 r4, r7, lr ; calculate sum of positive differences
- usad8 r5, r6, lr ; calculate sum of negative differences
- orr r6, r6, r7 ; differences of all 4 pixels
-
- ; calculate total sum
- add r8, r8, r4 ; add positive differences to sum
- sub r8, r8, r5 ; subtract negative differences from sum
-
- ; calculate sse
- uxtb16 r5, r6 ; byte (two pixels) to halfwords
- uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
- smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
- smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
-
-
- subs r12, r12, #1
-
- bne loop
-
- ; return stuff
- ldr r6, [sp, #40] ; get address of sse
- mul r0, r8, r8 ; sum * sum
- str r11, [r6] ; store sse
- sub r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
-
- ldmfd sp!, {r4-r12, pc}
-
- ENDP
-
- END
-
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm b/chromium/third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm
deleted file mode 100644
index 915ee499309..00000000000
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/arm/armv6/vp8_variance8x8_armv6.asm
+++ /dev/null
@@ -1,101 +0,0 @@
-;
-; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_variance8x8_armv6|
-
- ARM
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-
-; r0 unsigned char *src_ptr
-; r1 int source_stride
-; r2 unsigned char *ref_ptr
-; r3 int recon_stride
-; stack unsigned int *sse
-|vp8_variance8x8_armv6| PROC
-
- push {r4-r10, lr}
-
- pld [r0, r1, lsl #0]
- pld [r2, r3, lsl #0]
-
- mov r12, #8 ; set loop counter to 8 (=block height)
- mov r4, #0 ; initialize sum = 0
- mov r5, #0 ; initialize sse = 0
-
-loop
- ; 1st 4 pixels
- ldr r6, [r0, #0x0] ; load 4 src pixels
- ldr r7, [r2, #0x0] ; load 4 ref pixels
-
- mov lr, #0 ; constant zero
-
- usub8 r8, r6, r7 ; calculate difference
- pld [r0, r1, lsl #1]
- sel r10, r8, lr ; select bytes with positive difference
- usub8 r9, r7, r6 ; calculate difference with reversed operands
- pld [r2, r3, lsl #1]
- sel r8, r9, lr ; select bytes with negative difference
-
- ; calculate partial sums
- usad8 r6, r10, lr ; calculate sum of positive differences
- usad8 r7, r8, lr ; calculate sum of negative differences
- orr r8, r8, r10 ; differences of all 4 pixels
- ; calculate total sum
- add r4, r4, r6 ; add positive differences to sum
- sub r4, r4, r7 ; subtract negative differences from sum
-
- ; calculate sse
- uxtb16 r7, r8 ; byte (two pixels) to halfwords
- uxtb16 r10, r8, ror #8 ; another two pixels to halfwords
- smlad r5, r7, r7, r5 ; dual signed multiply, add and accumulate (1)
-
- ; 2nd 4 pixels
- ldr r6, [r0, #0x4] ; load 4 src pixels
- ldr r7, [r2, #0x4] ; load 4 ref pixels
- smlad r5, r10, r10, r5 ; dual signed multiply, add and accumulate (2)
-
- usub8 r8, r6, r7 ; calculate difference
- add r0, r0, r1 ; set src_ptr to next row
- sel r10, r8, lr ; select bytes with positive difference
- usub8 r9, r7, r6 ; calculate difference with reversed operands
- add r2, r2, r3 ; set dst_ptr to next row
- sel r8, r9, lr ; select bytes with negative difference
-
- ; calculate partial sums
- usad8 r6, r10, lr ; calculate sum of positive differences
- usad8 r7, r8, lr ; calculate sum of negative differences
- orr r8, r8, r10 ; differences of all 4 pixels
-
- ; calculate total sum
- add r4, r4, r6 ; add positive differences to sum
- sub r4, r4, r7 ; subtract negative differences from sum
-
- ; calculate sse
- uxtb16 r7, r8 ; byte (two pixels) to halfwords
- uxtb16 r10, r8, ror #8 ; another two pixels to halfwords
- smlad r5, r7, r7, r5 ; dual signed multiply, add and accumulate (1)
- subs r12, r12, #1 ; next row
- smlad r5, r10, r10, r5 ; dual signed multiply, add and accumulate (2)
-
- bne loop
-
- ; return stuff
- ldr r8, [sp, #32] ; get address of sse
- mul r1, r4, r4 ; sum * sum
- str r5, [r8] ; store sse
- sub r0, r5, r1, ASR #6 ; return (sse - ((sum * sum) >> 6))
-
- pop {r4-r10, pc}
-
- ENDP
-
- END
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/arm/neon/variance_neon.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/arm/neon/variance_neon.c
deleted file mode 100644
index 1b1979073e5..00000000000
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/arm/neon/variance_neon.c
+++ /dev/null
@@ -1,320 +0,0 @@
-/*
- * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <arm_neon.h>
-#include "vpx_ports/mem.h"
-
-unsigned int vp8_variance16x16_neon(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- int i;
- int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
- uint32x2_t d0u32, d10u32;
- int64x1_t d0s64, d1s64;
- uint8x16_t q0u8, q1u8, q2u8, q3u8;
- uint16x8_t q11u16, q12u16, q13u16, q14u16;
- int32x4_t q8s32, q9s32, q10s32;
- int64x2_t q0s64, q1s64, q5s64;
-
- q8s32 = vdupq_n_s32(0);
- q9s32 = vdupq_n_s32(0);
- q10s32 = vdupq_n_s32(0);
-
- for (i = 0; i < 8; i++) {
- q0u8 = vld1q_u8(src_ptr);
- src_ptr += source_stride;
- q1u8 = vld1q_u8(src_ptr);
- src_ptr += source_stride;
- __builtin_prefetch(src_ptr);
-
- q2u8 = vld1q_u8(ref_ptr);
- ref_ptr += recon_stride;
- q3u8 = vld1q_u8(ref_ptr);
- ref_ptr += recon_stride;
- __builtin_prefetch(ref_ptr);
-
- q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
- q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
- q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
- q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
-
- d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
- d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
- q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
- q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
- q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
-
- d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
- d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
- q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
- q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
- q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
-
- d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
- d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
- q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16));
- q9s32 = vmlal_s16(q9s32, d26s16, d26s16);
- q10s32 = vmlal_s16(q10s32, d27s16, d27s16);
-
- d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
- d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
- q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16));
- q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
- q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
- }
-
- q10s32 = vaddq_s32(q10s32, q9s32);
- q0s64 = vpaddlq_s32(q8s32);
- q1s64 = vpaddlq_s32(q10s32);
-
- d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
- d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
-
- q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
- vreinterpret_s32_s64(d0s64));
- vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
-
- d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 8);
- d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
-
- return vget_lane_u32(d0u32, 0);
-}
-
-unsigned int vp8_variance16x8_neon(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- int i;
- int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
- uint32x2_t d0u32, d10u32;
- int64x1_t d0s64, d1s64;
- uint8x16_t q0u8, q1u8, q2u8, q3u8;
- uint16x8_t q11u16, q12u16, q13u16, q14u16;
- int32x4_t q8s32, q9s32, q10s32;
- int64x2_t q0s64, q1s64, q5s64;
-
- q8s32 = vdupq_n_s32(0);
- q9s32 = vdupq_n_s32(0);
- q10s32 = vdupq_n_s32(0);
-
- for (i = 0; i < 4; i++) { // variance16x8_neon_loop
- q0u8 = vld1q_u8(src_ptr);
- src_ptr += source_stride;
- q1u8 = vld1q_u8(src_ptr);
- src_ptr += source_stride;
- __builtin_prefetch(src_ptr);
-
- q2u8 = vld1q_u8(ref_ptr);
- ref_ptr += recon_stride;
- q3u8 = vld1q_u8(ref_ptr);
- ref_ptr += recon_stride;
- __builtin_prefetch(ref_ptr);
-
- q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
- q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
- q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
- q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
-
- d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
- d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
- q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
- q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
- q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
-
- d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
- d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
- q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
- q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
- q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
-
- d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
- d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
- q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16));
- q9s32 = vmlal_s16(q9s32, d26s16, d26s16);
- q10s32 = vmlal_s16(q10s32, d27s16, d27s16);
-
- d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
- d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
- q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16));
- q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
- q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
- }
-
- q10s32 = vaddq_s32(q10s32, q9s32);
- q0s64 = vpaddlq_s32(q8s32);
- q1s64 = vpaddlq_s32(q10s32);
-
- d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
- d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
-
- q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
- vreinterpret_s32_s64(d0s64));
- vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
-
- d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 7);
- d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
-
- return vget_lane_u32(d0u32, 0);
-}
-
-unsigned int vp8_variance8x16_neon(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- int i;
- uint8x8_t d0u8, d2u8, d4u8, d6u8;
- int16x4_t d22s16, d23s16, d24s16, d25s16;
- uint32x2_t d0u32, d10u32;
- int64x1_t d0s64, d1s64;
- uint16x8_t q11u16, q12u16;
- int32x4_t q8s32, q9s32, q10s32;
- int64x2_t q0s64, q1s64, q5s64;
-
- q8s32 = vdupq_n_s32(0);
- q9s32 = vdupq_n_s32(0);
- q10s32 = vdupq_n_s32(0);
-
- for (i = 0; i < 8; i++) { // variance8x16_neon_loop
- d0u8 = vld1_u8(src_ptr);
- src_ptr += source_stride;
- d2u8 = vld1_u8(src_ptr);
- src_ptr += source_stride;
- __builtin_prefetch(src_ptr);
-
- d4u8 = vld1_u8(ref_ptr);
- ref_ptr += recon_stride;
- d6u8 = vld1_u8(ref_ptr);
- ref_ptr += recon_stride;
- __builtin_prefetch(ref_ptr);
-
- q11u16 = vsubl_u8(d0u8, d4u8);
- q12u16 = vsubl_u8(d2u8, d6u8);
-
- d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
- d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
- q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
- q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
- q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
-
- d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
- d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
- q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
- q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
- q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
- }
-
- q10s32 = vaddq_s32(q10s32, q9s32);
- q0s64 = vpaddlq_s32(q8s32);
- q1s64 = vpaddlq_s32(q10s32);
-
- d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
- d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
-
- q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
- vreinterpret_s32_s64(d0s64));
- vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
-
- d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 7);
- d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
-
- return vget_lane_u32(d0u32, 0);
-}
-
-unsigned int vp8_variance8x8_neon(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- int i;
- uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
- int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
- uint32x2_t d0u32, d10u32;
- int64x1_t d0s64, d1s64;
- uint16x8_t q11u16, q12u16, q13u16, q14u16;
- int32x4_t q8s32, q9s32, q10s32;
- int64x2_t q0s64, q1s64, q5s64;
-
- q8s32 = vdupq_n_s32(0);
- q9s32 = vdupq_n_s32(0);
- q10s32 = vdupq_n_s32(0);
-
- for (i = 0; i < 2; i++) { // variance8x8_neon_loop
- d0u8 = vld1_u8(src_ptr);
- src_ptr += source_stride;
- d1u8 = vld1_u8(src_ptr);
- src_ptr += source_stride;
- d2u8 = vld1_u8(src_ptr);
- src_ptr += source_stride;
- d3u8 = vld1_u8(src_ptr);
- src_ptr += source_stride;
-
- d4u8 = vld1_u8(ref_ptr);
- ref_ptr += recon_stride;
- d5u8 = vld1_u8(ref_ptr);
- ref_ptr += recon_stride;
- d6u8 = vld1_u8(ref_ptr);
- ref_ptr += recon_stride;
- d7u8 = vld1_u8(ref_ptr);
- ref_ptr += recon_stride;
-
- q11u16 = vsubl_u8(d0u8, d4u8);
- q12u16 = vsubl_u8(d1u8, d5u8);
- q13u16 = vsubl_u8(d2u8, d6u8);
- q14u16 = vsubl_u8(d3u8, d7u8);
-
- d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
- d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
- q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
- q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
- q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
-
- d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
- d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
- q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
- q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
- q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
-
- d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
- d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
- q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16));
- q9s32 = vmlal_s16(q9s32, d26s16, d26s16);
- q10s32 = vmlal_s16(q10s32, d27s16, d27s16);
-
- d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
- d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
- q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16));
- q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
- q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
- }
-
- q10s32 = vaddq_s32(q10s32, q9s32);
- q0s64 = vpaddlq_s32(q8s32);
- q1s64 = vpaddlq_s32(q10s32);
-
- d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
- d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
-
- q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
- vreinterpret_s32_s64(d0s64));
- vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
-
- d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 6);
- d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
-
- return vget_lane_u32(d0u32, 0);
-}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/arm/neon/vp8_subpixelvariance_neon.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/arm/neon/vp8_subpixelvariance_neon.c
index 974d3b6532b..3c8ed11f070 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/arm/neon/vp8_subpixelvariance_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/arm/neon/vp8_subpixelvariance_neon.c
@@ -12,7 +12,7 @@
#include "vpx_ports/mem.h"
#include "vpx/vpx_integer.h"
-static const uint16_t bilinear_taps_coeff[8][2] = {
+static const uint8_t bilinear_taps_coeff[8][2] = {
{128, 0},
{112, 16},
{ 96, 32},
@@ -972,9 +972,9 @@ static void var_filter_block2d_bil_w8(const uint8_t *src_ptr,
int pixel_step,
unsigned int output_height,
unsigned int output_width,
- const uint16_t *vpx_filter) {
- const uint8x8_t f0 = vmov_n_u8((uint8_t)vpx_filter[0]);
- const uint8x8_t f1 = vmov_n_u8((uint8_t)vpx_filter[1]);
+ const uint8_t *vpx_filter) {
+ const uint8x8_t f0 = vmov_n_u8(vpx_filter[0]);
+ const uint8x8_t f1 = vmov_n_u8(vpx_filter[1]);
unsigned int i;
for (i = 0; i < output_height; ++i) {
const uint8x8_t src_0 = vld1_u8(&src_ptr[0]);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/arm/variance_arm.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/arm/variance_arm.c
index 467a509420e..0f293f03d94 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/arm/variance_arm.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/arm/variance_arm.c
@@ -9,10 +9,14 @@
*/
#include "vpx_config.h"
-#include "vp8_rtcd.h"
+#include "./vp8_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
#include "vp8/common/variance.h"
#include "vp8/common/filter.h"
+// TODO(johannkoenig): Move this to vpx_dsp or vp8/encoder
+#if CONFIG_VP8_ENCODER
+
#if HAVE_MEDIA
#include "vp8/common/arm/bilinearfilter_arm.h"
@@ -40,8 +44,8 @@ unsigned int vp8_sub_pixel_variance8x8_armv6
vp8_filter_block2d_bil_second_pass_armv6(first_pass, second_pass,
8, 8, 8, VFilter);
- return vp8_variance8x8_armv6(second_pass, 8, dst_ptr,
- dst_pixels_per_line, sse);
+ return vpx_variance8x8_media(second_pass, 8, dst_ptr,
+ dst_pixels_per_line, sse);
}
unsigned int vp8_sub_pixel_variance16x16_armv6
@@ -86,13 +90,13 @@ unsigned int vp8_sub_pixel_variance16x16_armv6
vp8_filter_block2d_bil_second_pass_armv6(first_pass, second_pass,
16, 16, 16, VFilter);
- var = vp8_variance16x16_armv6(second_pass, 16, dst_ptr,
- dst_pixels_per_line, sse);
+ var = vpx_variance16x16_media(second_pass, 16, dst_ptr,
+ dst_pixels_per_line, sse);
}
return var;
}
-#endif /* HAVE_MEDIA */
+#endif // HAVE_MEDIA
#if HAVE_NEON
@@ -129,4 +133,5 @@ unsigned int vp8_sub_pixel_variance16x16_neon
return vp8_sub_pixel_variance16x16_neon_func(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
}
-#endif
+#endif // HAVE_NEON
+#endif // CONFIG_VP8_ENCODER
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/copy_c.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/copy_c.c
index fd96c863491..e3392913f63 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/copy_c.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/copy_c.c
@@ -11,11 +11,11 @@
#include <string.h>
-#include "vpx_config.h"
+#include "./vp8_rtcd.h"
#include "vpx/vpx_integer.h"
/* Copy 2 macroblocks to a buffer */
-void vp8_copy32xn_c(unsigned char *src_ptr, int src_stride,
+void vp8_copy32xn_c(const unsigned char *src_ptr, int src_stride,
unsigned char *dst_ptr, int dst_stride,
int height)
{
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/filter.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/filter.c
index 25266f86827..84c608effaa 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/filter.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/filter.c
@@ -10,6 +10,7 @@
#include "filter.h"
+#include "./vp8_rtcd.h"
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) =
{
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/generic/systemdependent.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/generic/systemdependent.c
index d84df334810..4393ced48c8 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/generic/systemdependent.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/generic/systemdependent.c
@@ -17,6 +17,7 @@
#include "vpx_ports/x86.h"
#endif
#include "vp8/common/onyxc_int.h"
+#include "vp8/common/systemdependent.h"
#if CONFIG_MULTITHREAD
#if HAVE_UNISTD_H && !defined(__OS2__)
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/idctllm.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/idctllm.c
index 47af52f04e7..f5403c5aaf7 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/idctllm.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/idctllm.c
@@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include "./vp8_rtcd.h"
/****************************************************************************
* Notes:
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/mfqe.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/mfqe.c
index d12dea19364..5c0680f42d4 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/mfqe.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/mfqe.c
@@ -151,14 +151,14 @@ static void multiframe_quality_enhance_block
if (blksize == 16)
{
- actd = (vp8_variance16x16(yd, yd_stride, VP8_ZEROS, 0, &sse)+128)>>8;
- act = (vp8_variance16x16(y, y_stride, VP8_ZEROS, 0, &sse)+128)>>8;
+ actd = (vpx_variance16x16(yd, yd_stride, VP8_ZEROS, 0, &sse)+128)>>8;
+ act = (vpx_variance16x16(y, y_stride, VP8_ZEROS, 0, &sse)+128)>>8;
#ifdef USE_SSD
- vp8_variance16x16(y, y_stride, yd, yd_stride, &sse);
+ vpx_variance16x16(y, y_stride, yd, yd_stride, &sse);
sad = (sse + 128)>>8;
- vp8_variance8x8(u, uv_stride, ud, uvd_stride, &sse);
+ vpx_variance8x8(u, uv_stride, ud, uvd_stride, &sse);
usad = (sse + 32)>>6;
- vp8_variance8x8(v, uv_stride, vd, uvd_stride, &sse);
+ vpx_variance8x8(v, uv_stride, vd, uvd_stride, &sse);
vsad = (sse + 32)>>6;
#else
sad = (vpx_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8;
@@ -168,14 +168,14 @@ static void multiframe_quality_enhance_block
}
else /* if (blksize == 8) */
{
- actd = (vp8_variance8x8(yd, yd_stride, VP8_ZEROS, 0, &sse)+32)>>6;
- act = (vp8_variance8x8(y, y_stride, VP8_ZEROS, 0, &sse)+32)>>6;
+ actd = (vpx_variance8x8(yd, yd_stride, VP8_ZEROS, 0, &sse)+32)>>6;
+ act = (vpx_variance8x8(y, y_stride, VP8_ZEROS, 0, &sse)+32)>>6;
#ifdef USE_SSD
- vp8_variance8x8(y, y_stride, yd, yd_stride, &sse);
+ vpx_variance8x8(y, y_stride, yd, yd_stride, &sse);
sad = (sse + 32)>>6;
- vp8_variance4x4(u, uv_stride, ud, uvd_stride, &sse);
+ vpx_variance4x4(u, uv_stride, ud, uvd_stride, &sse);
usad = (sse + 8)>>4;
- vp8_variance4x4(v, uv_stride, vd, uvd_stride, &sse);
+ vpx_variance4x4(v, uv_stride, vd, uvd_stride, &sse);
vsad = (sse + 8)>>4;
#else
sad = (vpx_sad8x8(y, y_stride, yd, yd_stride) + 32) >> 6;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/postproc.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/postproc.c
index 266431a3240..a4e6ae170c9 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/postproc.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/postproc.c
@@ -427,7 +427,7 @@ void vp8_de_noise(VP8_COMMON *cm,
}
}
-double vp8_gaussian(double sigma, double mu, double x)
+static double gaussian(double sigma, double mu, double x)
{
return 1 / (sigma * sqrt(2.0 * 3.14159265)) *
(exp(-(x - mu) * (x - mu) / (2 * sigma * sigma)));
@@ -455,7 +455,7 @@ static void fillrd(struct postproc_state *state, int q, int a)
for (i = -32; i < 32; i++)
{
- const int v = (int)(.5 + 256 * vp8_gaussian(sigma, 0, i));
+ const int v = (int)(.5 + 256 * gaussian(sigma, 0, i));
if (v)
{
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/rtcd_defs.pl b/chromium/third_party/libvpx/source/libvpx/vp8/common/rtcd_defs.pl
index 56b7db7ec33..fed20887f8e 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/rtcd_defs.pl
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/rtcd_defs.pl
@@ -215,7 +215,7 @@ $vp8_sixtap_predict8x4_media=vp8_sixtap_predict8x4_armv6;
$vp8_sixtap_predict8x4_dspr2=vp8_sixtap_predict8x4_dspr2;
add_proto qw/void vp8_sixtap_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
-# Disable neon while investigating https://code.google.com/p/webm/issues/detail?id=817
+#TODO(johannkoenig): fix the neon version https://code.google.com/p/webm/issues/detail?id=817
specialize qw/vp8_sixtap_predict4x4 mmx ssse3 media dspr2/;
$vp8_sixtap_predict4x4_media=vp8_sixtap_predict4x4_armv6;
$vp8_sixtap_predict4x4_dspr2=vp8_sixtap_predict4x4_dspr2;
@@ -233,35 +233,11 @@ specialize qw/vp8_bilinear_predict8x4 mmx media neon/;
$vp8_bilinear_predict8x4_media=vp8_bilinear_predict8x4_armv6;
add_proto qw/void vp8_bilinear_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
-specialize qw/vp8_bilinear_predict4x4 mmx media neon/;
+#TODO(johannkoenig): fix the neon version https://code.google.com/p/webm/issues/detail?id=892
+specialize qw/vp8_bilinear_predict4x4 mmx media/;
$vp8_bilinear_predict4x4_media=vp8_bilinear_predict4x4_armv6;
#
-# Whole-pixel Variance
-#
-add_proto qw/unsigned int vp8_variance4x4/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp8_variance4x4 mmx sse2/;
-$vp8_variance4x4_sse2=vp8_variance4x4_wmt;
-
-add_proto qw/unsigned int vp8_variance8x8/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp8_variance8x8 mmx sse2 media neon/;
-$vp8_variance8x8_sse2=vp8_variance8x8_wmt;
-$vp8_variance8x8_media=vp8_variance8x8_armv6;
-
-add_proto qw/unsigned int vp8_variance8x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp8_variance8x16 mmx sse2 neon/;
-$vp8_variance8x16_sse2=vp8_variance8x16_wmt;
-
-add_proto qw/unsigned int vp8_variance16x8/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp8_variance16x8 mmx sse2 neon/;
-$vp8_variance16x8_sse2=vp8_variance16x8_wmt;
-
-add_proto qw/unsigned int vp8_variance16x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp8_variance16x16 mmx sse2 media neon/;
-$vp8_variance16x16_sse2=vp8_variance16x16_wmt;
-$vp8_variance16x16_media=vp8_variance16x16_armv6;
-
-#
# Sub-pixel Variance
#
add_proto qw/unsigned int vp8_sub_pixel_variance4x4/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
@@ -269,10 +245,9 @@ specialize qw/vp8_sub_pixel_variance4x4 mmx sse2/;
$vp8_sub_pixel_variance4x4_sse2=vp8_sub_pixel_variance4x4_wmt;
add_proto qw/unsigned int vp8_sub_pixel_variance8x8/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
-specialize qw/vp8_sub_pixel_variance8x8 mmx sse2 media neon_asm/;
+specialize qw/vp8_sub_pixel_variance8x8 mmx sse2 media/;
$vp8_sub_pixel_variance8x8_sse2=vp8_sub_pixel_variance8x8_wmt;
$vp8_sub_pixel_variance8x8_media=vp8_sub_pixel_variance8x8_armv6;
-$vp8_sub_pixel_variance8x8_neon_asm=vp8_sub_pixel_variance8x8_neon;
add_proto qw/unsigned int vp8_sub_pixel_variance8x16/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
specialize qw/vp8_sub_pixel_variance8x16 mmx sse2/;
@@ -309,31 +284,10 @@ $vp8_variance_halfpixvar16x16_hv_media=vp8_variance_halfpixvar16x16_hv_armv6;
if (vpx_config("CONFIG_VP8_ENCODER") eq "yes") {
#
-# Sum of squares (vector)
-#
-add_proto qw/unsigned int vp8_get_mb_ss/, "const short *";
-specialize qw/vp8_get_mb_ss mmx sse2/;
-
-#
-# SSE (Sum Squared Error)
-#
-add_proto qw/unsigned int vp8_sub_pixel_mse16x16/, "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse";
-specialize qw/vp8_sub_pixel_mse16x16 mmx sse2/;
-$vp8_sub_pixel_mse16x16_sse2=vp8_sub_pixel_mse16x16_wmt;
-
-add_proto qw/unsigned int vp8_mse16x16/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp8_mse16x16 mmx sse2 media neon/;
-$vp8_mse16x16_sse2=vp8_mse16x16_wmt;
-$vp8_mse16x16_media=vp8_mse16x16_armv6;
-
-add_proto qw/unsigned int vp8_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride";
-specialize qw/vp8_get4x4sse_cs mmx neon/;
-
-#
# Block copy
#
if ($opts{arch} =~ /x86/) {
- add_proto qw/void vp8_copy32xn/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n";
+ add_proto qw/void vp8_copy32xn/, "const unsigned char *src_ptr, int source_stride, unsigned char *dst_ptr, int dst_stride, int n";
specialize qw/vp8_copy32xn sse2 sse3/;
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/variance.h b/chromium/third_party/libvpx/source/libvpx/vp8/common/variance.h
index 552a28025e6..c6c9f41bf6a 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/variance.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/variance.h
@@ -29,7 +29,7 @@ typedef unsigned int(*vpx_sad_fn_t)(
typedef void (*vp8_copy32xn_fn_t)(
const unsigned char *src_ptr,
int source_stride,
- const unsigned char *ref_ptr,
+ unsigned char *ref_ptr,
int ref_stride,
int n);
@@ -39,6 +39,7 @@ typedef void (*vpx_sad_multi_fn_t)(
const unsigned char *ref_array,
int ref_stride,
unsigned int *sad_array);
+
typedef void (*vpx_sad_multi_d_fn_t)
(
const unsigned char *src_ptr,
@@ -48,7 +49,7 @@ typedef void (*vpx_sad_multi_d_fn_t)
unsigned int *sad_array
);
-typedef unsigned int (*vp8_variance_fn_t)
+typedef unsigned int (*vpx_variance_fn_t)
(
const unsigned char *src_ptr,
int source_stride,
@@ -68,37 +69,14 @@ typedef unsigned int (*vp8_subpixvariance_fn_t)
unsigned int *sse
);
-typedef void (*vp8_ssimpf_fn_t)
- (
- unsigned char *s,
- int sp,
- unsigned char *r,
- int rp,
- unsigned long *sum_s,
- unsigned long *sum_r,
- unsigned long *sum_sq_s,
- unsigned long *sum_sq_r,
- unsigned long *sum_sxr
- );
-
-typedef unsigned int (*vp8_getmbss_fn_t)(const short *);
-
-typedef unsigned int (*vp8_get16x16prederror_fn_t)
- (
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int ref_stride
- );
-
typedef struct variance_vtable
{
vpx_sad_fn_t sdf;
- vp8_variance_fn_t vf;
+ vpx_variance_fn_t vf;
vp8_subpixvariance_fn_t svf;
- vp8_variance_fn_t svf_halfpix_h;
- vp8_variance_fn_t svf_halfpix_v;
- vp8_variance_fn_t svf_halfpix_hv;
+ vpx_variance_fn_t svf_halfpix_h;
+ vpx_variance_fn_t svf_halfpix_v;
+ vpx_variance_fn_t svf_halfpix_hv;
vpx_sad_multi_fn_t sdx3f;
vpx_sad_multi_fn_t sdx8f;
vpx_sad_multi_d_fn_t sdx4df;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/variance_c.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/variance_c.c
index 773b655efc5..02915a4defd 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/variance_c.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/variance_c.c
@@ -8,43 +8,34 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-
-#include "variance.h"
+#include "./vp8_rtcd.h"
#include "filter.h"
+#include "variance.h"
-
-unsigned int vp8_get_mb_ss_c
-(
- const short *src_ptr
-)
-{
- unsigned int i = 0, sum = 0;
-
- do
- {
- sum += (src_ptr[i] * src_ptr[i]);
- i++;
- }
- while (i < 256);
-
- return sum;
+/* This is a bad idea.
+ * ctz = count trailing zeros */
+static int ctz(int a) {
+ int b = 0;
+ while (a != 1) {
+ a >>= 1;
+ b++;
+ }
+ return b;
}
-
-static void variance(
+static unsigned int variance(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
int w,
int h,
- unsigned int *sse,
- int *sum)
+ unsigned int *sse)
{
int i, j;
- int diff;
+ int diff, sum;
- *sum = 0;
+ sum = 0;
*sse = 0;
for (i = 0; i < h; i++)
@@ -52,114 +43,17 @@ static void variance(
for (j = 0; j < w; j++)
{
diff = src_ptr[j] - ref_ptr[j];
- *sum += diff;
+ sum += diff;
*sse += diff * diff;
}
src_ptr += source_stride;
ref_ptr += recon_stride;
}
-}
-
-
-unsigned int vp8_variance16x16_c(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse)
-{
- unsigned int var;
- int avg;
-
- variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
- *sse = var;
- return (var - (((unsigned int)avg * avg) >> 8));
-}
-
-unsigned int vp8_variance8x16_c(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse)
-{
- unsigned int var;
- int avg;
-
-
- variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
- *sse = var;
- return (var - (((unsigned int)avg * avg) >> 7));
+ return (*sse - (((unsigned int)sum * sum) >> (int)((ctz(w) + ctz(h)))));
}
-unsigned int vp8_variance16x8_c(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse)
-{
- unsigned int var;
- int avg;
-
-
- variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
- *sse = var;
- return (var - (((unsigned int)avg * avg) >> 7));
-}
-
-
-unsigned int vp8_variance8x8_c(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse)
-{
- unsigned int var;
- int avg;
-
-
- variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
- *sse = var;
- return (var - (((unsigned int)avg * avg) >> 6));
-}
-
-unsigned int vp8_variance4x4_c(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse)
-{
- unsigned int var;
- int avg;
-
-
- variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
- *sse = var;
- return (var - (((unsigned int)avg * avg) >> 4));
-}
-
-
-unsigned int vp8_mse16x16_c(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse)
-{
- unsigned int var;
- int avg;
-
- variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
- *sse = var;
- return var;
-}
-
-
/****************************************************************************
*
* ROUTINE : filter_block2d_bil_first_pass
@@ -303,7 +197,7 @@ unsigned int vp8_sub_pixel_variance4x4_c
/* Now filter Verticaly */
var_filter_block2d_bil_second_pass(FData3, temp2, 4, 4, 4, 4, VFilter);
- return vp8_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
+ return variance(temp2, 4, dst_ptr, dst_pixels_per_line, 4, 4, sse);
}
@@ -328,7 +222,7 @@ unsigned int vp8_sub_pixel_variance8x8_c
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 8, HFilter);
var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 8, 8, VFilter);
- return vp8_variance8x8_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
+ return variance(temp2, 8, dst_ptr, dst_pixels_per_line, 8, 8, sse);
}
unsigned int vp8_sub_pixel_variance16x16_c
@@ -352,7 +246,7 @@ unsigned int vp8_sub_pixel_variance16x16_c
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 16, HFilter);
var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 16, 16, VFilter);
- return vp8_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
+ return variance(temp2, 16, dst_ptr, dst_pixels_per_line, 16, 16, sse);
}
@@ -392,21 +286,6 @@ unsigned int vp8_variance_halfpixvar16x16_hv_c(
}
-unsigned int vp8_sub_pixel_mse16x16_c
-(
- const unsigned char *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- const unsigned char *dst_ptr,
- int dst_pixels_per_line,
- unsigned int *sse
-)
-{
- vp8_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
- return *sse;
-}
-
unsigned int vp8_sub_pixel_variance16x8_c
(
const unsigned char *src_ptr,
@@ -428,7 +307,7 @@ unsigned int vp8_sub_pixel_variance16x8_c
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 16, HFilter);
var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 8, 16, VFilter);
- return vp8_variance16x8_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
+ return variance(temp2, 16, dst_ptr, dst_pixels_per_line, 16, 8, sse);
}
unsigned int vp8_sub_pixel_variance8x16_c
@@ -454,5 +333,5 @@ unsigned int vp8_sub_pixel_variance8x16_c
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 8, HFilter);
var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 16, 8, VFilter);
- return vp8_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
+ return variance(temp2, 8, dst_ptr, dst_pixels_per_line, 8, 16, sse);
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/variance_impl_sse2.asm b/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/variance_impl_sse2.asm
index 761433c11ea..26de5e86097 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/variance_impl_sse2.asm
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/variance_impl_sse2.asm
@@ -13,393 +13,6 @@
%define xmm_filter_shift 7
-;unsigned int vp8_get_mb_ss_sse2
-;(
-; short *src_ptr
-;)
-global sym(vp8_get_mb_ss_sse2) PRIVATE
-sym(vp8_get_mb_ss_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 1
- GET_GOT rbx
- push rsi
- push rdi
- sub rsp, 16
- ; end prolog
-
-
- mov rax, arg(0) ;[src_ptr]
- mov rcx, 8
- pxor xmm4, xmm4
-
-.NEXTROW:
- movdqa xmm0, [rax]
- movdqa xmm1, [rax+16]
- movdqa xmm2, [rax+32]
- movdqa xmm3, [rax+48]
- pmaddwd xmm0, xmm0
- pmaddwd xmm1, xmm1
- pmaddwd xmm2, xmm2
- pmaddwd xmm3, xmm3
-
- paddd xmm0, xmm1
- paddd xmm2, xmm3
- paddd xmm4, xmm0
- paddd xmm4, xmm2
-
- add rax, 0x40
- dec rcx
- ja .NEXTROW
-
- movdqa xmm3,xmm4
- psrldq xmm4,8
- paddd xmm4,xmm3
- movdqa xmm3,xmm4
- psrldq xmm4,4
- paddd xmm4,xmm3
- movq rax,xmm4
-
-
- ; begin epilog
- add rsp, 16
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-;unsigned int vp8_get16x16var_sse2
-;(
-; unsigned char * src_ptr,
-; int source_stride,
-; unsigned char * ref_ptr,
-; int recon_stride,
-; unsigned int * SSE,
-; int * Sum
-;)
-global sym(vp8_get16x16var_sse2) PRIVATE
-sym(vp8_get16x16var_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- push rbx
- push rsi
- push rdi
- ; end prolog
-
- mov rsi, arg(0) ;[src_ptr]
- mov rdi, arg(2) ;[ref_ptr]
-
- movsxd rax, DWORD PTR arg(1) ;[source_stride]
- movsxd rdx, DWORD PTR arg(3) ;[recon_stride]
-
- ; Prefetch data
- lea rcx, [rax+rax*2]
- prefetcht0 [rsi]
- prefetcht0 [rsi+rax]
- prefetcht0 [rsi+rax*2]
- prefetcht0 [rsi+rcx]
- lea rbx, [rsi+rax*4]
- prefetcht0 [rbx]
- prefetcht0 [rbx+rax]
- prefetcht0 [rbx+rax*2]
- prefetcht0 [rbx+rcx]
-
- lea rcx, [rdx+rdx*2]
- prefetcht0 [rdi]
- prefetcht0 [rdi+rdx]
- prefetcht0 [rdi+rdx*2]
- prefetcht0 [rdi+rcx]
- lea rbx, [rdi+rdx*4]
- prefetcht0 [rbx]
- prefetcht0 [rbx+rdx]
- prefetcht0 [rbx+rdx*2]
- prefetcht0 [rbx+rcx]
-
- pxor xmm0, xmm0 ; clear xmm0 for unpack
- pxor xmm7, xmm7 ; clear xmm7 for accumulating diffs
-
- pxor xmm6, xmm6 ; clear xmm6 for accumulating sse
- mov rcx, 16
-
-.var16loop:
- movdqu xmm1, XMMWORD PTR [rsi]
- movdqu xmm2, XMMWORD PTR [rdi]
-
- prefetcht0 [rsi+rax*8]
- prefetcht0 [rdi+rdx*8]
-
- movdqa xmm3, xmm1
- movdqa xmm4, xmm2
-
-
- punpcklbw xmm1, xmm0
- punpckhbw xmm3, xmm0
-
- punpcklbw xmm2, xmm0
- punpckhbw xmm4, xmm0
-
-
- psubw xmm1, xmm2
- psubw xmm3, xmm4
-
- paddw xmm7, xmm1
- pmaddwd xmm1, xmm1
-
- paddw xmm7, xmm3
- pmaddwd xmm3, xmm3
-
- paddd xmm6, xmm1
- paddd xmm6, xmm3
-
- add rsi, rax
- add rdi, rdx
-
- sub rcx, 1
- jnz .var16loop
-
-
- movdqa xmm1, xmm6
- pxor xmm6, xmm6
-
- pxor xmm5, xmm5
- punpcklwd xmm6, xmm7
-
- punpckhwd xmm5, xmm7
- psrad xmm5, 16
-
- psrad xmm6, 16
- paddd xmm6, xmm5
-
- movdqa xmm2, xmm1
- punpckldq xmm1, xmm0
-
- punpckhdq xmm2, xmm0
- movdqa xmm7, xmm6
-
- paddd xmm1, xmm2
- punpckldq xmm6, xmm0
-
- punpckhdq xmm7, xmm0
- paddd xmm6, xmm7
-
- movdqa xmm2, xmm1
- movdqa xmm7, xmm6
-
- psrldq xmm1, 8
- psrldq xmm6, 8
-
- paddd xmm7, xmm6
- paddd xmm1, xmm2
-
- mov rax, arg(5) ;[Sum]
- mov rdi, arg(4) ;[SSE]
-
- movd DWORD PTR [rax], xmm7
- movd DWORD PTR [rdi], xmm1
-
-
- ; begin epilog
- pop rdi
- pop rsi
- pop rbx
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-
-
-;unsigned int vp8_get8x8var_sse2
-;(
-; unsigned char * src_ptr,
-; int source_stride,
-; unsigned char * ref_ptr,
-; int recon_stride,
-; unsigned int * SSE,
-; int * Sum
-;)
-global sym(vp8_get8x8var_sse2) PRIVATE
-sym(vp8_get8x8var_sse2):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
- SAVE_XMM 7
- GET_GOT rbx
- push rsi
- push rdi
- sub rsp, 16
- ; end prolog
-
- mov rsi, arg(0) ;[src_ptr]
- mov rdi, arg(2) ;[ref_ptr]
-
- movsxd rax, DWORD PTR arg(1) ;[source_stride]
- movsxd rdx, DWORD PTR arg(3) ;[recon_stride]
-
- pxor xmm0, xmm0 ; clear xmm0 for unpack
- pxor xmm7, xmm7 ; clear xmm7 for accumulating diffs
-
- movq xmm1, QWORD PTR [rsi]
- movq xmm2, QWORD PTR [rdi]
-
- punpcklbw xmm1, xmm0
- punpcklbw xmm2, xmm0
-
- psubsw xmm1, xmm2
- paddw xmm7, xmm1
-
- pmaddwd xmm1, xmm1
-
- movq xmm2, QWORD PTR[rsi + rax]
- movq xmm3, QWORD PTR[rdi + rdx]
-
- punpcklbw xmm2, xmm0
- punpcklbw xmm3, xmm0
-
- psubsw xmm2, xmm3
- paddw xmm7, xmm2
-
- pmaddwd xmm2, xmm2
- paddd xmm1, xmm2
-
-
- movq xmm2, QWORD PTR[rsi + rax * 2]
- movq xmm3, QWORD PTR[rdi + rdx * 2]
-
- punpcklbw xmm2, xmm0
- punpcklbw xmm3, xmm0
-
- psubsw xmm2, xmm3
- paddw xmm7, xmm2
-
- pmaddwd xmm2, xmm2
- paddd xmm1, xmm2
-
-
- lea rsi, [rsi + rax * 2]
- lea rdi, [rdi + rdx * 2]
- movq xmm2, QWORD PTR[rsi + rax]
- movq xmm3, QWORD PTR[rdi + rdx]
-
- punpcklbw xmm2, xmm0
- punpcklbw xmm3, xmm0
-
- psubsw xmm2, xmm3
- paddw xmm7, xmm2
-
- pmaddwd xmm2, xmm2
- paddd xmm1, xmm2
-
- movq xmm2, QWORD PTR[rsi + rax *2]
- movq xmm3, QWORD PTR[rdi + rdx *2]
-
- punpcklbw xmm2, xmm0
- punpcklbw xmm3, xmm0
-
- psubsw xmm2, xmm3
- paddw xmm7, xmm2
-
- pmaddwd xmm2, xmm2
- paddd xmm1, xmm2
-
-
- lea rsi, [rsi + rax * 2]
- lea rdi, [rdi + rdx * 2]
-
-
- movq xmm2, QWORD PTR[rsi + rax]
- movq xmm3, QWORD PTR[rdi + rdx]
-
- punpcklbw xmm2, xmm0
- punpcklbw xmm3, xmm0
-
- psubsw xmm2, xmm3
- paddw xmm7, xmm2
-
- pmaddwd xmm2, xmm2
- paddd xmm1, xmm2
-
- movq xmm2, QWORD PTR[rsi + rax *2]
- movq xmm3, QWORD PTR[rdi + rdx *2]
-
- punpcklbw xmm2, xmm0
- punpcklbw xmm3, xmm0
-
- psubsw xmm2, xmm3
- paddw xmm7, xmm2
-
- pmaddwd xmm2, xmm2
- paddd xmm1, xmm2
-
-
- lea rsi, [rsi + rax * 2]
- lea rdi, [rdi + rdx * 2]
-
- movq xmm2, QWORD PTR[rsi + rax]
- movq xmm3, QWORD PTR[rdi + rdx]
-
- punpcklbw xmm2, xmm0
- punpcklbw xmm3, xmm0
-
- psubsw xmm2, xmm3
- paddw xmm7, xmm2
-
- pmaddwd xmm2, xmm2
- paddd xmm1, xmm2
-
-
- movdqa xmm6, xmm7
- punpcklwd xmm6, xmm0
-
- punpckhwd xmm7, xmm0
- movdqa xmm2, xmm1
-
- paddw xmm6, xmm7
- punpckldq xmm1, xmm0
-
- punpckhdq xmm2, xmm0
- movdqa xmm7, xmm6
-
- paddd xmm1, xmm2
- punpckldq xmm6, xmm0
-
- punpckhdq xmm7, xmm0
- paddw xmm6, xmm7
-
- movdqa xmm2, xmm1
- movdqa xmm7, xmm6
-
- psrldq xmm1, 8
- psrldq xmm6, 8
-
- paddw xmm7, xmm6
- paddd xmm1, xmm2
-
- mov rax, arg(5) ;[Sum]
- mov rdi, arg(4) ;[SSE]
-
- movq rdx, xmm7
- movsx rcx, dx
-
- mov dword ptr [rax], ecx
- movd DWORD PTR [rdi], xmm1
-
- ; begin epilog
- add rsp, 16
- pop rdi
- pop rsi
- RESTORE_GOT
- RESTORE_XMM
- UNSHADOW_ARGS
- pop rbp
- ret
-
;void vp8_filter_block2d_bil_var_sse2
;(
; unsigned char *ref_ptr,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/variance_ssse3.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/variance_ssse3.c
index 73eb90df61f..2a0df640a90 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/variance_ssse3.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/variance_ssse3.c
@@ -8,19 +8,11 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include "./vp8_rtcd.h"
#include "vpx_config.h"
#include "vp8/common/variance.h"
#include "vpx_ports/mem.h"
-extern unsigned int vp8_get16x16var_sse2
-(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *SSE,
- int *Sum
-);
extern void vp8_half_horiz_vert_variance16x_h_sse2
(
const unsigned char *ref_ptr,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/vp8_variance_impl_mmx.asm b/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/vp8_variance_impl_mmx.asm
new file mode 100644
index 00000000000..97f25275df2
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/vp8_variance_impl_mmx.asm
@@ -0,0 +1,353 @@
+;
+; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+%define mmx_filter_shift 7
+
+;void vp8_filter_block2d_bil4x4_var_mmx
+;(
+; unsigned char *ref_ptr,
+; int ref_pixels_per_line,
+; unsigned char *src_ptr,
+; int src_pixels_per_line,
+; unsigned short *HFilter,
+; unsigned short *VFilter,
+; int *sum,
+; unsigned int *sumsquared
+;)
+global sym(vp8_filter_block2d_bil4x4_var_mmx) PRIVATE
+sym(vp8_filter_block2d_bil4x4_var_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 8
+ GET_GOT rbx
+ push rsi
+ push rdi
+ sub rsp, 16
+ ; end prolog
+
+
+ pxor mm6, mm6 ;
+ pxor mm7, mm7 ;
+
+ mov rax, arg(4) ;HFilter ;
+ mov rdx, arg(5) ;VFilter ;
+
+ mov rsi, arg(0) ;ref_ptr ;
+ mov rdi, arg(2) ;src_ptr ;
+
+ mov rcx, 4 ;
+ pxor mm0, mm0 ;
+
+ movd mm1, [rsi] ;
+ movd mm3, [rsi+1] ;
+
+ punpcklbw mm1, mm0 ;
+ pmullw mm1, [rax] ;
+
+ punpcklbw mm3, mm0 ;
+ pmullw mm3, [rax+8] ;
+
+ paddw mm1, mm3 ;
+ paddw mm1, [GLOBAL(mmx_bi_rd)] ;
+
+ psraw mm1, mmx_filter_shift ;
+ movq mm5, mm1
+
+%if ABI_IS_32BIT
+ add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
+%else
+ movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ;
+ add rsi, r8
+%endif
+
+.filter_block2d_bil4x4_var_mmx_loop:
+
+ movd mm1, [rsi] ;
+ movd mm3, [rsi+1] ;
+
+ punpcklbw mm1, mm0 ;
+ pmullw mm1, [rax] ;
+
+ punpcklbw mm3, mm0 ;
+ pmullw mm3, [rax+8] ;
+
+ paddw mm1, mm3 ;
+ paddw mm1, [GLOBAL(mmx_bi_rd)] ;
+
+ psraw mm1, mmx_filter_shift ;
+ movq mm3, mm5 ;
+
+ movq mm5, mm1 ;
+ pmullw mm3, [rdx] ;
+
+ pmullw mm1, [rdx+8] ;
+ paddw mm1, mm3 ;
+
+
+ paddw mm1, [GLOBAL(mmx_bi_rd)] ;
+ psraw mm1, mmx_filter_shift ;
+
+ movd mm3, [rdi] ;
+ punpcklbw mm3, mm0 ;
+
+ psubw mm1, mm3 ;
+ paddw mm6, mm1 ;
+
+ pmaddwd mm1, mm1 ;
+ paddd mm7, mm1 ;
+
+%if ABI_IS_32BIT
+ add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
+ add rdi, dword ptr arg(3) ;src_pixels_per_line ;
+%else
+ movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
+ movsxd r9, dword ptr arg(3) ;src_pixels_per_line
+ add rsi, r8
+ add rdi, r9
+%endif
+ sub rcx, 1 ;
+ jnz .filter_block2d_bil4x4_var_mmx_loop ;
+
+
+ pxor mm3, mm3 ;
+ pxor mm2, mm2 ;
+
+ punpcklwd mm2, mm6 ;
+ punpckhwd mm3, mm6 ;
+
+ paddd mm2, mm3 ;
+ movq mm6, mm2 ;
+
+ psrlq mm6, 32 ;
+ paddd mm2, mm6 ;
+
+ psrad mm2, 16 ;
+ movq mm4, mm7 ;
+
+ psrlq mm4, 32 ;
+ paddd mm4, mm7 ;
+
+ mov rdi, arg(6) ;sum
+ mov rsi, arg(7) ;sumsquared
+
+ movd dword ptr [rdi], mm2 ;
+ movd dword ptr [rsi], mm4 ;
+
+
+
+ ; begin epilog
+ add rsp, 16
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+
+
+;void vp8_filter_block2d_bil_var_mmx
+;(
+; unsigned char *ref_ptr,
+; int ref_pixels_per_line,
+; unsigned char *src_ptr,
+; int src_pixels_per_line,
+; unsigned int Height,
+; unsigned short *HFilter,
+; unsigned short *VFilter,
+; int *sum,
+; unsigned int *sumsquared
+;)
+global sym(vp8_filter_block2d_bil_var_mmx) PRIVATE
+sym(vp8_filter_block2d_bil_var_mmx):
+ push rbp
+ mov rbp, rsp
+ SHADOW_ARGS_TO_STACK 9
+ GET_GOT rbx
+ push rsi
+ push rdi
+ sub rsp, 16
+ ; end prolog
+
+ pxor mm6, mm6 ;
+ pxor mm7, mm7 ;
+ mov rax, arg(5) ;HFilter ;
+
+ mov rdx, arg(6) ;VFilter ;
+ mov rsi, arg(0) ;ref_ptr ;
+
+ mov rdi, arg(2) ;src_ptr ;
+ movsxd rcx, dword ptr arg(4) ;Height ;
+
+ pxor mm0, mm0 ;
+ movq mm1, [rsi] ;
+
+ movq mm3, [rsi+1] ;
+ movq mm2, mm1 ;
+
+ movq mm4, mm3 ;
+ punpcklbw mm1, mm0 ;
+
+ punpckhbw mm2, mm0 ;
+ pmullw mm1, [rax] ;
+
+ pmullw mm2, [rax] ;
+ punpcklbw mm3, mm0 ;
+
+ punpckhbw mm4, mm0 ;
+ pmullw mm3, [rax+8] ;
+
+ pmullw mm4, [rax+8] ;
+ paddw mm1, mm3 ;
+
+ paddw mm2, mm4 ;
+ paddw mm1, [GLOBAL(mmx_bi_rd)] ;
+
+ psraw mm1, mmx_filter_shift ;
+ paddw mm2, [GLOBAL(mmx_bi_rd)] ;
+
+ psraw mm2, mmx_filter_shift ;
+ movq mm5, mm1
+
+ packuswb mm5, mm2 ;
+%if ABI_IS_32BIT
+ add rsi, dword ptr arg(1) ;ref_pixels_per_line
+%else
+ movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
+ add rsi, r8
+%endif
+
+.filter_block2d_bil_var_mmx_loop:
+
+ movq mm1, [rsi] ;
+ movq mm3, [rsi+1] ;
+
+ movq mm2, mm1 ;
+ movq mm4, mm3 ;
+
+ punpcklbw mm1, mm0 ;
+ punpckhbw mm2, mm0 ;
+
+ pmullw mm1, [rax] ;
+ pmullw mm2, [rax] ;
+
+ punpcklbw mm3, mm0 ;
+ punpckhbw mm4, mm0 ;
+
+ pmullw mm3, [rax+8] ;
+ pmullw mm4, [rax+8] ;
+
+ paddw mm1, mm3 ;
+ paddw mm2, mm4 ;
+
+ paddw mm1, [GLOBAL(mmx_bi_rd)] ;
+ psraw mm1, mmx_filter_shift ;
+
+ paddw mm2, [GLOBAL(mmx_bi_rd)] ;
+ psraw mm2, mmx_filter_shift ;
+
+ movq mm3, mm5 ;
+ movq mm4, mm5 ;
+
+ punpcklbw mm3, mm0 ;
+ punpckhbw mm4, mm0 ;
+
+ movq mm5, mm1 ;
+ packuswb mm5, mm2 ;
+
+ pmullw mm3, [rdx] ;
+ pmullw mm4, [rdx] ;
+
+ pmullw mm1, [rdx+8] ;
+ pmullw mm2, [rdx+8] ;
+
+ paddw mm1, mm3 ;
+ paddw mm2, mm4 ;
+
+ paddw mm1, [GLOBAL(mmx_bi_rd)] ;
+ paddw mm2, [GLOBAL(mmx_bi_rd)] ;
+
+ psraw mm1, mmx_filter_shift ;
+ psraw mm2, mmx_filter_shift ;
+
+ movq mm3, [rdi] ;
+ movq mm4, mm3 ;
+
+ punpcklbw mm3, mm0 ;
+ punpckhbw mm4, mm0 ;
+
+ psubw mm1, mm3 ;
+ psubw mm2, mm4 ;
+
+ paddw mm6, mm1 ;
+ pmaddwd mm1, mm1 ;
+
+ paddw mm6, mm2 ;
+ pmaddwd mm2, mm2 ;
+
+ paddd mm7, mm1 ;
+ paddd mm7, mm2 ;
+
+%if ABI_IS_32BIT
+ add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
+ add rdi, dword ptr arg(3) ;src_pixels_per_line ;
+%else
+ movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ;
+ movsxd r9, dword ptr arg(3) ;src_pixels_per_line ;
+ add rsi, r8
+ add rdi, r9
+%endif
+ sub rcx, 1 ;
+ jnz .filter_block2d_bil_var_mmx_loop ;
+
+
+ pxor mm3, mm3 ;
+ pxor mm2, mm2 ;
+
+ punpcklwd mm2, mm6 ;
+ punpckhwd mm3, mm6 ;
+
+ paddd mm2, mm3 ;
+ movq mm6, mm2 ;
+
+ psrlq mm6, 32 ;
+ paddd mm2, mm6 ;
+
+ psrad mm2, 16 ;
+ movq mm4, mm7 ;
+
+ psrlq mm4, 32 ;
+ paddd mm4, mm7 ;
+
+ mov rdi, arg(7) ;sum
+ mov rsi, arg(8) ;sumsquared
+
+ movd dword ptr [rdi], mm2 ;
+ movd dword ptr [rsi], mm4 ;
+
+ ; begin epilog
+ add rsp, 16
+ pop rdi
+ pop rsi
+ RESTORE_GOT
+ UNSHADOW_ARGS
+ pop rbp
+ ret
+
+
+SECTION_RODATA
+;short mmx_bi_rd[4] = { 64, 64, 64, 64};
+align 16
+mmx_bi_rd:
+ times 4 dw 64
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/variance_mmx.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/vp8_variance_mmx.c
index 02e02420f46..e594b1e65ee 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/variance_mmx.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/vp8_variance_mmx.c
@@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include "./vp8_rtcd.h"
#include "vpx_config.h"
#include "vp8/common/variance.h"
#include "vpx_ports/mem.h"
@@ -34,25 +35,6 @@ extern void filter_block1d_v6_mmx
short *filter
);
-extern unsigned int vp8_get_mb_ss_mmx(const short *src_ptr);
-extern unsigned int vp8_get8x8var_mmx
-(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *SSE,
- int *Sum
-);
-extern unsigned int vp8_get4x4var_mmx
-(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *SSE,
- int *Sum
-);
extern void vp8_filter_block2d_bil4x4_var_mmx
(
const unsigned char *ref_ptr,
@@ -77,127 +59,6 @@ extern void vp8_filter_block2d_bil_var_mmx
unsigned int *sumsquared
);
-
-unsigned int vp8_variance4x4_mmx(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse)
-{
- unsigned int var;
- int avg;
-
- vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
- *sse = var;
- return (var - (((unsigned int)avg * avg) >> 4));
-
-}
-
-unsigned int vp8_variance8x8_mmx(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse)
-{
- unsigned int var;
- int avg;
-
- vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
- *sse = var;
-
- return (var - (((unsigned int)avg * avg) >> 6));
-
-}
-
-unsigned int vp8_mse16x16_mmx(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse)
-{
- unsigned int sse0, sse1, sse2, sse3, var;
- int sum0, sum1, sum2, sum3;
-
-
- vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
- vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
- vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
- vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
-
- var = sse0 + sse1 + sse2 + sse3;
- *sse = var;
- return var;
-}
-
-
-unsigned int vp8_variance16x16_mmx(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse)
-{
- unsigned int sse0, sse1, sse2, sse3, var;
- int sum0, sum1, sum2, sum3, avg;
-
-
- vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
- vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
- vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse2, &sum2) ;
- vp8_get8x8var_mmx(src_ptr + 8 * source_stride + 8, source_stride, ref_ptr + 8 * recon_stride + 8, recon_stride, &sse3, &sum3);
-
- var = sse0 + sse1 + sse2 + sse3;
- avg = sum0 + sum1 + sum2 + sum3;
- *sse = var;
- return (var - (((unsigned int)avg * avg) >> 8));
-}
-
-unsigned int vp8_variance16x8_mmx(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse)
-{
- unsigned int sse0, sse1, var;
- int sum0, sum1, avg;
-
- vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
- vp8_get8x8var_mmx(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
-
- var = sse0 + sse1;
- avg = sum0 + sum1;
- *sse = var;
- return (var - (((unsigned int)avg * avg) >> 7));
-
-}
-
-
-unsigned int vp8_variance8x16_mmx(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse)
-{
- unsigned int sse0, sse1, var;
- int sum0, sum1, avg;
-
- vp8_get8x8var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
- vp8_get8x8var_mmx(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ;
-
- var = sse0 + sse1;
- avg = sum0 + sum1;
- *sse = var;
-
- return (var - (((unsigned int)avg * avg) >> 7));
-
-}
-
-
unsigned int vp8_sub_pixel_variance4x4_mmx
(
const unsigned char *src_ptr,
@@ -286,20 +147,6 @@ unsigned int vp8_sub_pixel_variance16x16_mmx
}
-unsigned int vp8_sub_pixel_mse16x16_mmx(
- const unsigned char *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- const unsigned char *dst_ptr,
- int dst_pixels_per_line,
- unsigned int *sse
-)
-{
- vp8_sub_pixel_variance16x16_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
- return *sse;
-}
-
unsigned int vp8_sub_pixel_variance16x8_mmx
(
const unsigned char *src_ptr,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/variance_sse2.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/vp8_variance_sse2.c
index 1fe127bf2c6..1c15ed88097 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/variance_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/vp8_variance_sse2.c
@@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include "./vp8_rtcd.h"
#include "vpx_config.h"
#include "vp8/common/variance.h"
#include "vpx_ports/mem.h"
@@ -30,38 +31,6 @@ extern void vp8_filter_block2d_bil4x4_var_mmx
unsigned int *sumsquared
);
-extern unsigned int vp8_get4x4var_mmx
-(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *SSE,
- int *Sum
-);
-
-unsigned int vp8_get_mb_ss_sse2
-(
- const short *src_ptr
-);
-unsigned int vp8_get16x16var_sse2
-(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *SSE,
- int *Sum
-);
-unsigned int vp8_get8x8var_sse2
-(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *SSE,
- int *Sum
-);
void vp8_filter_block2d_bil_var_sse2
(
const unsigned char *ref_ptr,
@@ -135,115 +104,6 @@ void vp8_half_vert_variance16x_h_sse2
unsigned int *sumsquared
);
-unsigned int vp8_variance4x4_wmt(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse)
-{
- unsigned int var;
- int avg;
-
- vp8_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
- *sse = var;
- return (var - (((unsigned int)avg * avg) >> 4));
-
-}
-
-unsigned int vp8_variance8x8_wmt
-(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse)
-{
- unsigned int var;
- int avg;
-
- vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg) ;
- *sse = var;
- return (var - (((unsigned int)avg * avg) >> 6));
-
-}
-
-
-unsigned int vp8_variance16x16_wmt
-(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse)
-{
- unsigned int sse0;
- int sum0;
-
-
- vp8_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
- *sse = sse0;
- return (sse0 - (((unsigned int)sum0 * sum0) >> 8));
-}
-unsigned int vp8_mse16x16_wmt(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse)
-{
-
- unsigned int sse0;
- int sum0;
- vp8_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
- *sse = sse0;
- return sse0;
-
-}
-
-
-unsigned int vp8_variance16x8_wmt
-(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse)
-{
- unsigned int sse0, sse1, var;
- int sum0, sum1, avg;
-
- vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
- vp8_get8x8var_sse2(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse1, &sum1);
-
- var = sse0 + sse1;
- avg = sum0 + sum1;
- *sse = var;
- return (var - (((unsigned int)avg * avg) >> 7));
-
-}
-
-unsigned int vp8_variance8x16_wmt
-(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse)
-{
- unsigned int sse0, sse1, var;
- int sum0, sum1, avg;
-
- vp8_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0) ;
- vp8_get8x8var_sse2(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * recon_stride, recon_stride, &sse1, &sum1) ;
-
- var = sse0 + sse1;
- avg = sum0 + sum1;
- *sse = var;
- return (var - (((unsigned int)avg * avg) >> 7));
-
-}
-
unsigned int vp8_sub_pixel_variance4x4_wmt
(
const unsigned char *src_ptr,
@@ -378,20 +238,6 @@ unsigned int vp8_sub_pixel_variance16x16_wmt
return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
}
-unsigned int vp8_sub_pixel_mse16x16_wmt(
- const unsigned char *src_ptr,
- int src_pixels_per_line,
- int xoffset,
- int yoffset,
- const unsigned char *dst_ptr,
- int dst_pixels_per_line,
- unsigned int *sse
-)
-{
- vp8_sub_pixel_variance16x16_wmt(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
- return *sse;
-}
-
unsigned int vp8_sub_pixel_variance16x8_wmt
(
const unsigned char *src_ptr,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/decoder/onyxd_if.c b/chromium/third_party/libvpx/source/libvpx/vp8/decoder/onyxd_if.c
index d7b8c76dc26..9015fcbb496 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/decoder/onyxd_if.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/decoder/onyxd_if.c
@@ -259,7 +259,7 @@ static int swap_frame_buffers (VP8_COMMON *cm)
return err;
}
-int check_fragments_for_errors(VP8D_COMP *pbi)
+static int check_fragments_for_errors(VP8D_COMP *pbi)
{
if (!pbi->ec_active &&
pbi->fragments.count <= 1 && pbi->fragments.sizes[0] == 0)
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm
deleted file mode 100644
index 000805d4fed..00000000000
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/arm/armv6/vp8_mse16x16_armv6.asm
+++ /dev/null
@@ -1,138 +0,0 @@
-;
-; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-
- EXPORT |vp8_mse16x16_armv6|
-
- ARM
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-
-; r0 unsigned char *src_ptr
-; r1 int source_stride
-; r2 unsigned char *ref_ptr
-; r3 int recon_stride
-; stack unsigned int *sse
-;
-;note: Based on vp8_variance16x16_armv6. In this function, sum is never used.
-; So, we can remove this part of calculation.
-
-|vp8_mse16x16_armv6| PROC
-
- push {r4-r9, lr}
-
- pld [r0, r1, lsl #0]
- pld [r2, r3, lsl #0]
-
- mov r12, #16 ; set loop counter to 16 (=block height)
- mov r4, #0 ; initialize sse = 0
-
-loop
- ; 1st 4 pixels
- ldr r5, [r0, #0x0] ; load 4 src pixels
- ldr r6, [r2, #0x0] ; load 4 ref pixels
-
- mov lr, #0 ; constant zero
-
- usub8 r8, r5, r6 ; calculate difference
- pld [r0, r1, lsl #1]
- sel r7, r8, lr ; select bytes with positive difference
- usub8 r9, r6, r5 ; calculate difference with reversed operands
- pld [r2, r3, lsl #1]
- sel r8, r9, lr ; select bytes with negative difference
-
- ; calculate partial sums
- usad8 r5, r7, lr ; calculate sum of positive differences
- usad8 r6, r8, lr ; calculate sum of negative differences
- orr r8, r8, r7 ; differences of all 4 pixels
-
- ldr r5, [r0, #0x4] ; load 4 src pixels
-
- ; calculate sse
- uxtb16 r6, r8 ; byte (two pixels) to halfwords
- uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
- smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
-
- ; 2nd 4 pixels
- ldr r6, [r2, #0x4] ; load 4 ref pixels
- smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
-
- usub8 r8, r5, r6 ; calculate difference
- sel r7, r8, lr ; select bytes with positive difference
- usub8 r9, r6, r5 ; calculate difference with reversed operands
- sel r8, r9, lr ; select bytes with negative difference
-
- ; calculate partial sums
- usad8 r5, r7, lr ; calculate sum of positive differences
- usad8 r6, r8, lr ; calculate sum of negative differences
- orr r8, r8, r7 ; differences of all 4 pixels
- ldr r5, [r0, #0x8] ; load 4 src pixels
- ; calculate sse
- uxtb16 r6, r8 ; byte (two pixels) to halfwords
- uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
- smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
-
- ; 3rd 4 pixels
- ldr r6, [r2, #0x8] ; load 4 ref pixels
- smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
-
- usub8 r8, r5, r6 ; calculate difference
- sel r7, r8, lr ; select bytes with positive difference
- usub8 r9, r6, r5 ; calculate difference with reversed operands
- sel r8, r9, lr ; select bytes with negative difference
-
- ; calculate partial sums
- usad8 r5, r7, lr ; calculate sum of positive differences
- usad8 r6, r8, lr ; calculate sum of negative differences
- orr r8, r8, r7 ; differences of all 4 pixels
-
- ldr r5, [r0, #0xc] ; load 4 src pixels
-
- ; calculate sse
- uxtb16 r6, r8 ; byte (two pixels) to halfwords
- uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
- smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
-
- ; 4th 4 pixels
- ldr r6, [r2, #0xc] ; load 4 ref pixels
- smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
-
- usub8 r8, r5, r6 ; calculate difference
- add r0, r0, r1 ; set src_ptr to next row
- sel r7, r8, lr ; select bytes with positive difference
- usub8 r9, r6, r5 ; calculate difference with reversed operands
- add r2, r2, r3 ; set dst_ptr to next row
- sel r8, r9, lr ; select bytes with negative difference
-
- ; calculate partial sums
- usad8 r5, r7, lr ; calculate sum of positive differences
- usad8 r6, r8, lr ; calculate sum of negative differences
- orr r8, r8, r7 ; differences of all 4 pixels
-
- subs r12, r12, #1 ; next row
-
- ; calculate sse
- uxtb16 r6, r8 ; byte (two pixels) to halfwords
- uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
- smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
- smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
-
- bne loop
-
- ; return stuff
- ldr r1, [sp, #28] ; get address of sse
- mov r0, r4 ; return sse
- str r4, [r1] ; store sse
-
- pop {r4-r9, pc}
-
- ENDP
-
- END
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.c b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.c
deleted file mode 100644
index f806809df5b..00000000000
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.c
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <arm_neon.h>
-
-unsigned int vp8_mse16x16_neon(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride,
- unsigned int *sse) {
- int i;
- int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
- int64x1_t d0s64;
- uint8x16_t q0u8, q1u8, q2u8, q3u8;
- int32x4_t q7s32, q8s32, q9s32, q10s32;
- uint16x8_t q11u16, q12u16, q13u16, q14u16;
- int64x2_t q1s64;
-
- q7s32 = vdupq_n_s32(0);
- q8s32 = vdupq_n_s32(0);
- q9s32 = vdupq_n_s32(0);
- q10s32 = vdupq_n_s32(0);
-
- for (i = 0; i < 8; i++) { // mse16x16_neon_loop
- q0u8 = vld1q_u8(src_ptr);
- src_ptr += source_stride;
- q1u8 = vld1q_u8(src_ptr);
- src_ptr += source_stride;
- q2u8 = vld1q_u8(ref_ptr);
- ref_ptr += recon_stride;
- q3u8 = vld1q_u8(ref_ptr);
- ref_ptr += recon_stride;
-
- q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
- q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
- q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
- q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
-
- d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
- d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
- q7s32 = vmlal_s16(q7s32, d22s16, d22s16);
- q8s32 = vmlal_s16(q8s32, d23s16, d23s16);
-
- d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
- d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
- q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
- q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
-
- d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
- d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
- q7s32 = vmlal_s16(q7s32, d26s16, d26s16);
- q8s32 = vmlal_s16(q8s32, d27s16, d27s16);
-
- d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
- d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
- q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
- q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
- }
-
- q7s32 = vaddq_s32(q7s32, q8s32);
- q9s32 = vaddq_s32(q9s32, q10s32);
- q10s32 = vaddq_s32(q7s32, q9s32);
-
- q1s64 = vpaddlq_s32(q10s32);
- d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
-
- vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d0s64), 0);
- return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0);
-}
-
-unsigned int vp8_get4x4sse_cs_neon(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride) {
- int16x4_t d22s16, d24s16, d26s16, d28s16;
- int64x1_t d0s64;
- uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
- int32x4_t q7s32, q8s32, q9s32, q10s32;
- uint16x8_t q11u16, q12u16, q13u16, q14u16;
- int64x2_t q1s64;
-
- d0u8 = vld1_u8(src_ptr);
- src_ptr += source_stride;
- d4u8 = vld1_u8(ref_ptr);
- ref_ptr += recon_stride;
- d1u8 = vld1_u8(src_ptr);
- src_ptr += source_stride;
- d5u8 = vld1_u8(ref_ptr);
- ref_ptr += recon_stride;
- d2u8 = vld1_u8(src_ptr);
- src_ptr += source_stride;
- d6u8 = vld1_u8(ref_ptr);
- ref_ptr += recon_stride;
- d3u8 = vld1_u8(src_ptr);
- src_ptr += source_stride;
- d7u8 = vld1_u8(ref_ptr);
- ref_ptr += recon_stride;
-
- q11u16 = vsubl_u8(d0u8, d4u8);
- q12u16 = vsubl_u8(d1u8, d5u8);
- q13u16 = vsubl_u8(d2u8, d6u8);
- q14u16 = vsubl_u8(d3u8, d7u8);
-
- d22s16 = vget_low_s16(vreinterpretq_s16_u16(q11u16));
- d24s16 = vget_low_s16(vreinterpretq_s16_u16(q12u16));
- d26s16 = vget_low_s16(vreinterpretq_s16_u16(q13u16));
- d28s16 = vget_low_s16(vreinterpretq_s16_u16(q14u16));
-
- q7s32 = vmull_s16(d22s16, d22s16);
- q8s32 = vmull_s16(d24s16, d24s16);
- q9s32 = vmull_s16(d26s16, d26s16);
- q10s32 = vmull_s16(d28s16, d28s16);
-
- q7s32 = vaddq_s32(q7s32, q8s32);
- q9s32 = vaddq_s32(q9s32, q10s32);
- q9s32 = vaddq_s32(q7s32, q9s32);
-
- q1s64 = vpaddlq_s32(q9s32);
- d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
-
- return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0);
-}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/dct.c b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/dct.c
index 091554a5d50..0c7198d5d3a 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/dct.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/dct.c
@@ -11,6 +11,8 @@
#include <math.h>
+#include "./vp8_rtcd.h"
+
void vp8_short_fdct4x4_c(short *input, short *output, int pitch)
{
int i;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/encodeframe.c b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/encodeframe.c
index 378e902c6a4..d381d8ddf45 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/encodeframe.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/encodeframe.c
@@ -11,6 +11,7 @@
#include "vpx_config.h"
#include "vp8_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
#include "encodemb.h"
#include "encodemv.h"
#include "vp8/common/common.h"
@@ -90,7 +91,7 @@ static unsigned int tt_activity_measure( VP8_COMP *cpi, MACROBLOCK *x )
* lambda using a non-linear combination (e.g., the smallest, or second
* smallest, etc.).
*/
- act = vp8_variance16x16(x->src.y_buffer,
+ act = vpx_variance16x16(x->src.y_buffer,
x->src.y_stride, VP8_VAR_OFFS, 0, &sse);
act = act<<4;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/encodeintra.c b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/encodeintra.c
index cfa4cb927f6..e2de5eecbc4 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/encodeintra.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/encodeintra.c
@@ -11,6 +11,7 @@
#include "vpx_config.h"
#include "vp8_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
#include "quantize.h"
#include "vp8/common/reconintra4x4.h"
#include "encodemb.h"
@@ -44,7 +45,7 @@ int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred)
}
}
- intra_pred_var = vp8_get_mb_ss(x->src_diff);
+ intra_pred_var = vpx_get_mb_ss(x->src_diff);
return intra_pred_var;
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ethreading.c b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ethreading.c
index 977b0b0321e..4e234ccd58b 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ethreading.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ethreading.c
@@ -19,8 +19,6 @@
extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip);
-extern void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm);
-
static THREAD_FUNCTION thread_loopfilter(void *p_data)
{
VP8_COMP *cpi = (VP8_COMP *)(((LPFTHREAD_DATA *)p_data)->ptr1);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/firstpass.c b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/firstpass.c
index 75c1362610f..3deb4abb337 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/firstpass.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/firstpass.c
@@ -12,6 +12,7 @@
#include <limits.h>
#include <stdio.h>
+#include "./vpx_dsp_rtcd.h"
#include "./vpx_scale_rtcd.h"
#include "block.h"
#include "onyx_int.h"
@@ -34,8 +35,6 @@
/* #define OUTPUT_FPF 1 */
extern void vp8cx_frame_init_quantizer(VP8_COMP *cpi);
-extern void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv);
-extern void vp8_alloc_compressor_data(VP8_COMP *cpi);
#define GFQ_ADJUSTMENT vp8_gf_boost_qadjustment[Q]
extern int vp8_kf_boost_qadjustment[QINDEX_RANGE];
@@ -424,14 +423,14 @@ static void zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x,
/* Set up pointers for this macro block raw buffer */
raw_ptr = (unsigned char *)(raw_buffer->y_buffer + recon_yoffset
+ d->offset);
- vp8_mse16x16 ( src_ptr, src_stride, raw_ptr, raw_stride,
- (unsigned int *)(raw_motion_err));
+ vpx_mse16x16(src_ptr, src_stride, raw_ptr, raw_stride,
+ (unsigned int *)(raw_motion_err));
/* Set up pointers for this macro block recon buffer */
xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset;
ref_ptr = (unsigned char *)(xd->pre.y_buffer + d->offset );
- vp8_mse16x16 ( src_ptr, src_stride, ref_ptr, ref_stride,
- (unsigned int *)(best_motion_err));
+ vpx_mse16x16(src_ptr, src_stride, ref_ptr, ref_stride,
+ (unsigned int *)(best_motion_err));
}
static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x,
@@ -455,7 +454,7 @@ static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x,
int new_mv_mode_penalty = 256;
/* override the default variance function to use MSE */
- v_fn_ptr.vf = vp8_mse16x16;
+ v_fn_ptr.vf = vpx_mse16x16;
/* Set up pointers for this macro block recon buffer */
xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset;
@@ -1329,8 +1328,6 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta
return Q;
}
-extern void vp8_new_framerate(VP8_COMP *cpi, double framerate);
-
void vp8_init_second_pass(VP8_COMP *cpi)
{
FIRSTPASS_STATS this_frame;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/modecosts.c b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/modecosts.c
index c61563c56f7..ad0e9308dc1 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/modecosts.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/modecosts.c
@@ -10,6 +10,7 @@
#include "vp8/common/blockd.h"
+#include "modecosts.h"
#include "onyx_int.h"
#include "treewriter.h"
#include "vp8/common/entropymode.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/modecosts.h b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/modecosts.h
index 9281551c8d5..9871bfffdf9 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/modecosts.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/modecosts.h
@@ -16,7 +16,9 @@
extern "C" {
#endif
-void vp8_init_mode_costs(VP8_COMP *x);
+struct VP8_COMP;
+
+void vp8_init_mode_costs(struct VP8_COMP *x);
#ifdef __cplusplus
} // extern "C"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_if.c b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_if.c
index 5b452312ed2..40e29e191af 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_if.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_if.c
@@ -587,7 +587,8 @@ static void cyclic_background_refresh(VP8_COMP *cpi, int Q, int lf_adjustment)
// Turn-off under certain conditions (i.e., away from key frame, and if
// we are at good quality (low Q) and most of the blocks were skipped-encoded
// in previous frame.
- if (Q >= 100) {
+ int qp_thresh = (cpi->oxcf.screen_content_mode == 2) ? 80 : 100;
+ if (Q >= qp_thresh) {
cpi->cyclic_refresh_mode_max_mbs_perframe =
(cpi->common.mb_rows * cpi->common.mb_cols) / 10;
} else if (cpi->frames_since_key > 250 &&
@@ -2011,6 +2012,8 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
cpi->source_alt_ref_active = 0;
cpi->common.refresh_alt_ref_frame = 0;
+ cpi->force_maxqp = 0;
+
cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS;
#if CONFIG_INTERNAL_STATS
cpi->b_calculate_ssimg = 0;
@@ -2128,7 +2131,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
#endif
cpi->fn_ptr[BLOCK_16X16].sdf = vpx_sad16x16;
- cpi->fn_ptr[BLOCK_16X16].vf = vp8_variance16x16;
+ cpi->fn_ptr[BLOCK_16X16].vf = vpx_variance16x16;
cpi->fn_ptr[BLOCK_16X16].svf = vp8_sub_pixel_variance16x16;
cpi->fn_ptr[BLOCK_16X16].svf_halfpix_h = vp8_variance_halfpixvar16x16_h;
cpi->fn_ptr[BLOCK_16X16].svf_halfpix_v = vp8_variance_halfpixvar16x16_v;
@@ -2138,7 +2141,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
cpi->fn_ptr[BLOCK_16X16].sdx4df = vpx_sad16x16x4d;
cpi->fn_ptr[BLOCK_16X8].sdf = vpx_sad16x8;
- cpi->fn_ptr[BLOCK_16X8].vf = vp8_variance16x8;
+ cpi->fn_ptr[BLOCK_16X8].vf = vpx_variance16x8;
cpi->fn_ptr[BLOCK_16X8].svf = vp8_sub_pixel_variance16x8;
cpi->fn_ptr[BLOCK_16X8].svf_halfpix_h = NULL;
cpi->fn_ptr[BLOCK_16X8].svf_halfpix_v = NULL;
@@ -2148,7 +2151,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
cpi->fn_ptr[BLOCK_16X8].sdx4df = vpx_sad16x8x4d;
cpi->fn_ptr[BLOCK_8X16].sdf = vpx_sad8x16;
- cpi->fn_ptr[BLOCK_8X16].vf = vp8_variance8x16;
+ cpi->fn_ptr[BLOCK_8X16].vf = vpx_variance8x16;
cpi->fn_ptr[BLOCK_8X16].svf = vp8_sub_pixel_variance8x16;
cpi->fn_ptr[BLOCK_8X16].svf_halfpix_h = NULL;
cpi->fn_ptr[BLOCK_8X16].svf_halfpix_v = NULL;
@@ -2158,7 +2161,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
cpi->fn_ptr[BLOCK_8X16].sdx4df = vpx_sad8x16x4d;
cpi->fn_ptr[BLOCK_8X8].sdf = vpx_sad8x8;
- cpi->fn_ptr[BLOCK_8X8].vf = vp8_variance8x8;
+ cpi->fn_ptr[BLOCK_8X8].vf = vpx_variance8x8;
cpi->fn_ptr[BLOCK_8X8].svf = vp8_sub_pixel_variance8x8;
cpi->fn_ptr[BLOCK_8X8].svf_halfpix_h = NULL;
cpi->fn_ptr[BLOCK_8X8].svf_halfpix_v = NULL;
@@ -2168,7 +2171,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)
cpi->fn_ptr[BLOCK_8X8].sdx4df = vpx_sad8x8x4d;
cpi->fn_ptr[BLOCK_4X4].sdf = vpx_sad4x4;
- cpi->fn_ptr[BLOCK_4X4].vf = vp8_variance4x4;
+ cpi->fn_ptr[BLOCK_4X4].vf = vpx_variance4x4;
cpi->fn_ptr[BLOCK_4X4].svf = vp8_sub_pixel_variance4x4;
cpi->fn_ptr[BLOCK_4X4].svf_halfpix_h = NULL;
cpi->fn_ptr[BLOCK_4X4].svf_halfpix_v = NULL;
@@ -2555,7 +2558,7 @@ static uint64_t calc_plane_error(unsigned char *orig, int orig_stride,
{
unsigned int sse;
- vp8_mse16x16(orig + col, orig_stride,
+ vpx_mse16x16(orig + col, orig_stride,
recon + col, recon_stride,
&sse);
total_sse += sse;
@@ -3381,7 +3384,7 @@ static int measure_square_diff_partial(YV12_BUFFER_CONFIG *source,
int index = block_index_row + (j >> 4);
if (cpi->consec_zero_last[index] >= min_consec_zero_last) {
unsigned int sse;
- Total += vp8_mse16x16(src + j,
+ Total += vpx_mse16x16(src + j,
source->y_stride,
dst + j, dest->y_stride,
&sse);
@@ -3445,7 +3448,7 @@ static void process_denoiser_mode_change(VP8_COMP *cpi) {
int index = block_index_row + (j >> 4);
if (cpi->consec_zero_last[index] >= min_consec_zero_last) {
unsigned int sse;
- const unsigned int var = vp8_variance16x16(src + j,
+ const unsigned int var = vpx_variance16x16(src + j,
ystride,
dst + j,
ystride,
@@ -3455,7 +3458,7 @@ static void process_denoiser_mode_change(VP8_COMP *cpi) {
// is small (to avoid effects from lighting change).
if ((sse - var) < 128) {
unsigned int sse2;
- const unsigned int act = vp8_variance16x16(src + j,
+ const unsigned int act = vpx_variance16x16(src + j,
ystride,
const_source,
0,
@@ -4184,7 +4187,10 @@ static void encode_frame_to_data_rate
*/
if (cpi->cyclic_refresh_mode_enabled)
{
- if (cpi->current_layer==0)
+ // Special case for screen_content_mode with golden frame updates.
+ int disable_cr_gf = (cpi->oxcf.screen_content_mode == 2 &&
+ cm->refresh_golden_frame);
+ if (cpi->current_layer == 0 && cpi->force_maxqp == 0 && !disable_cr_gf)
cyclic_background_refresh(cpi, Q, 0);
else
disable_segmentation(cpi);
@@ -4406,6 +4412,11 @@ static void encode_frame_to_data_rate
/* transform / motion compensation build reconstruction frame */
vp8_encode_frame(cpi);
+ if (cpi->oxcf.screen_content_mode == 2) {
+ if (vp8_drop_encodedframe_overshoot(cpi, Q))
+ return;
+ }
+
cpi->projected_frame_size -= vp8_estimate_entropy_savings(cpi);
cpi->projected_frame_size = (cpi->projected_frame_size > 0) ? cpi->projected_frame_size : 0;
#endif
@@ -5982,7 +5993,8 @@ int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest)
for (j = 0; j < source->y_width; j += 16)
{
unsigned int sse;
- Total += vp8_mse16x16(src + j, source->y_stride, dst + j, dest->y_stride, &sse);
+ Total += vpx_mse16x16(src + j, source->y_stride,
+ dst + j, dest->y_stride, &sse);
}
src += 16 * source->y_stride;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_int.h b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_int.h
index 82d7453902c..c48e2f4478b 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_int.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_int.h
@@ -526,6 +526,8 @@ typedef struct VP8_COMP
// Measure of average squared difference between source and denoised signal.
int mse_source_denoised;
+ int force_maxqp;
+
#if CONFIG_MULTITHREAD
/* multithread data */
int * mt_current_mb_col;
@@ -714,6 +716,11 @@ typedef struct VP8_COMP
} rd_costs;
} VP8_COMP;
+void vp8_alloc_compressor_data(VP8_COMP *cpi);
+int vp8_reverse_trans(int x);
+void vp8_new_framerate(VP8_COMP *cpi, double framerate);
+void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm);
+
void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest,
unsigned char *dest_end, unsigned long *size);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/pickinter.c b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/pickinter.c
index c4c0e7e9e23..053bf119aa9 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/pickinter.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/pickinter.c
@@ -11,6 +11,7 @@
#include <limits.h>
#include "vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
#include "onyx_int.h"
#include "modecosts.h"
#include "encodeintra.h"
@@ -29,8 +30,6 @@
#include "denoising.h"
#endif
-extern int VP8_UVSSE(MACROBLOCK *x);
-
#ifdef SPEEDSTATS
extern unsigned int cnt_pm;
#endif
@@ -38,8 +37,6 @@ extern unsigned int cnt_pm;
extern const int vp8_ref_frame_order[MAX_MODES];
extern const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES];
-extern int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]);
-
// Fixed point implementation of a skin color classifier. Skin color
// is model by a Gaussian distribution in the CbCr color space.
// See ../../test/skin_color_detector_test.cc where the reference
@@ -219,33 +216,6 @@ int vp8_get_inter_mbpred_error(MACROBLOCK *mb,
}
-
-unsigned int vp8_get4x4sse_cs_c
-(
- const unsigned char *src_ptr,
- int source_stride,
- const unsigned char *ref_ptr,
- int recon_stride
-)
-{
- int distortion = 0;
- int r, c;
-
- for (r = 0; r < 4; r++)
- {
- for (c = 0; c < 4; c++)
- {
- int diff = src_ptr[c] - ref_ptr[c];
- distortion += diff * diff;
- }
-
- src_ptr += source_stride;
- ref_ptr += recon_stride;
- }
-
- return distortion;
-}
-
static int get_prediction_error(BLOCK *be, BLOCKD *b)
{
unsigned char *sptr;
@@ -253,7 +223,7 @@ static int get_prediction_error(BLOCK *be, BLOCKD *b)
sptr = (*(be->base_src) + be->src);
dptr = b->predictor;
- return vp8_get4x4sse_cs(sptr, be->src_stride, dptr, 16);
+ return vpx_get4x4sse_cs(sptr, be->src_stride, dptr, 16);
}
@@ -1041,7 +1011,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
else
{
rate2 += rate;
- distortion2 = vp8_variance16x16(
+ distortion2 = vpx_variance16x16(
*(b->base_src), b->src_stride,
x->e_mbd.predictor, 16, &sse);
this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
@@ -1070,7 +1040,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset,
xd->dst.y_stride,
xd->predictor,
16);
- distortion2 = vp8_variance16x16
+ distortion2 = vpx_variance16x16
(*(b->base_src), b->src_stride,
x->e_mbd.predictor, 16, &sse);
rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
@@ -1551,7 +1521,7 @@ void vp8_pick_intra_mode(MACROBLOCK *x, int *rate_)
xd->dst.y_stride,
xd->predictor,
16);
- distortion = vp8_variance16x16
+ distortion = vpx_variance16x16
(*(b->base_src), b->src_stride, xd->predictor, 16, &sse);
rate = x->mbmode_cost[xd->frame_type][mode];
this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/picklpf.c b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/picklpf.c
index 890053dcfdc..875b37f6841 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/picklpf.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/picklpf.c
@@ -9,6 +9,7 @@
*/
+#include "./vpx_dsp_rtcd.h"
#include "./vpx_scale_rtcd.h"
#include "vp8/common/onyxc_int.h"
#include "onyx_int.h"
@@ -83,7 +84,7 @@ static int calc_partial_ssl_err(YV12_BUFFER_CONFIG *source,
for (j = 0; j < source->y_width; j += 16)
{
unsigned int sse;
- Total += vp8_mse16x16(src + j, source->y_stride,
+ Total += vpx_mse16x16(src + j, source->y_stride,
dst + j, dest->y_stride,
&sse);
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ratectrl.c b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ratectrl.c
index 25d7a4998cb..e8796a1fcfb 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ratectrl.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ratectrl.c
@@ -1215,6 +1215,11 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame)
{
int Q = cpi->active_worst_quality;
+ if (cpi->force_maxqp == 1) {
+ cpi->active_worst_quality = cpi->worst_quality;
+ return cpi->worst_quality;
+ }
+
/* Reset Zbin OQ value */
cpi->mb.zbin_over_quant = 0;
@@ -1559,3 +1564,46 @@ int vp8_pick_frame_size(VP8_COMP *cpi)
}
return 1;
}
+// If this just encoded frame (mcomp/transform/quant, but before loopfilter and
+// pack_bitstream) has large overshoot, and was not being encoded close to the
+// max QP, then drop this frame and force next frame to be encoded at max QP.
+// Condition this on 1 pass CBR with screen content mode and frame dropper off.
+// TODO(marpan): Should do this exit condition during the encode_frame
+// (i.e., halfway during the encoding of the frame) to save cycles.
+int vp8_drop_encodedframe_overshoot(VP8_COMP *cpi, int Q) {
+ if (cpi->pass == 0 &&
+ cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER &&
+ cpi->drop_frames_allowed == 0 &&
+ cpi->common.frame_type != KEY_FRAME) {
+ // Note: the "projected_frame_size" from encode_frame() only gives estimate
+ // of mode/motion vector rate (in non-rd mode): so below we only require
+ // that projected_frame_size is somewhat greater than per-frame-bandwidth,
+ // but add additional condition with high threshold on prediction residual.
+
+ // QP threshold: only allow dropping if we are not close to qp_max.
+ int thresh_qp = 3 * cpi->worst_quality >> 2;
+ // Rate threshold, in bytes.
+ int thresh_rate = 2 * (cpi->av_per_frame_bandwidth >> 3);
+ // Threshold for the average (over all macroblocks) of the pixel-sum
+ // residual error over 16x16 block. Should add QP dependence on threshold?
+ int thresh_pred_err_mb = (256 << 4);
+ int pred_err_mb = (int)(cpi->mb.prediction_error / cpi->common.MBs);
+ if (Q < thresh_qp &&
+ cpi->projected_frame_size > thresh_rate &&
+ pred_err_mb > thresh_pred_err_mb) {
+ // Drop this frame: advance frame counters, and set force_maxqp flag.
+ cpi->common.current_video_frame++;
+ cpi->frames_since_key++;
+ // Flag to indicate we will force next frame to be encoded at max QP.
+ cpi->force_maxqp = 1;
+ return 1;
+ } else {
+ cpi->force_maxqp = 0;
+ return 0;
+ }
+ cpi->force_maxqp = 0;
+ return 0;
+ }
+ cpi->force_maxqp = 0;
+ return 0;
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ratectrl.h b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ratectrl.h
index 829697f391f..703de9ff550 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ratectrl.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ratectrl.h
@@ -30,6 +30,8 @@ extern void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_
/* return of 0 means drop frame */
extern int vp8_pick_frame_size(VP8_COMP *cpi);
+extern int vp8_drop_encodedframe_overshoot(VP8_COMP *cpi, int Q);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/rdopt.c b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/rdopt.c
index 9ccd85eb93f..17194f0d449 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/rdopt.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/rdopt.c
@@ -15,6 +15,7 @@
#include <assert.h>
#include "vpx_config.h"
#include "vp8_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
#include "tokenize.h"
#include "treewriter.h"
#include "onyx_int.h"
@@ -507,9 +508,9 @@ int VP8_UVSSE(MACROBLOCK *x)
}
else
{
- vp8_variance8x8(uptr, pre_stride,
+ vpx_variance8x8(uptr, pre_stride,
upred_ptr, uv_stride, &sse2);
- vp8_variance8x8(vptr, pre_stride,
+ vpx_variance8x8(vptr, pre_stride,
vpred_ptr, uv_stride, &sse1);
sse2 += sse1;
}
@@ -1783,7 +1784,7 @@ static int evaluate_inter_mode_rd(int mdcounts[4],
if(threshold < x->encode_breakout)
threshold = x->encode_breakout;
- var = vp8_variance16x16
+ var = vpx_variance16x16
(*(b->base_src), b->src_stride,
x->e_mbd.predictor, 16, &sse);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/rdopt.h b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/rdopt.h
index e0da35e203c..b4fcd10b61e 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/rdopt.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/rdopt.h
@@ -136,6 +136,9 @@ extern void vp8_mv_pred
int near_sadidx[]
);
void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffset, int near_sadidx[]);
+int VP8_UVSSE(MACROBLOCK *x);
+int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]);
+void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv);
#ifdef __cplusplus
} // extern "C"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/x86/quantize_ssse3.c b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/x86/quantize_ssse3.c
index 448217ff412..14282db8016 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/x86/quantize_ssse3.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/x86/quantize_ssse3.c
@@ -17,7 +17,7 @@
#include <intrin.h>
#pragma intrinsic(_BitScanReverse)
static int bsr(int mask) {
- int eob;
+ unsigned long eob;
_BitScanReverse(&eob, mask);
eob++;
if (mask == 0)
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/vp8_common.mk b/chromium/third_party/libvpx/source/libvpx/vp8/vp8_common.mk
index b4c814075c7..236d8a5d803 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/vp8_common.mk
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/vp8_common.mk
@@ -86,8 +86,8 @@ VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/iwalsh_mmx.asm
VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/loopfilter_mmx.asm
VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/recon_mmx.asm
VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/subpixel_mmx.asm
-VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/variance_mmx.c
-VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/variance_impl_mmx.asm
+VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp8_variance_mmx.c
+VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/vp8_variance_impl_mmx.asm
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/copy_sse2.asm
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idct_blk_sse2.c
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idctllm_sse2.asm
@@ -96,7 +96,7 @@ VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_wrapper_sse2.c
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/subpixel_sse2.asm
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/loopfilter_sse2.asm
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/iwalsh_sse2.asm
-VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/variance_sse2.c
+VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp8_variance_sse2.c
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/variance_impl_sse2.asm
VP8_COMMON_SRCS-$(HAVE_SSE3) += common/x86/copy_sse3.asm
VP8_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/subpixel_ssse3.asm
@@ -145,8 +145,6 @@ VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/intra4x4_predict_v6$(ASM)
VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/dequant_idct_v6$(ASM)
VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/dequantize_v6$(ASM)
VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/idct_blk_v6.c
-VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/vp8_variance8x8_armv6$(ASM)
-VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/vp8_variance16x16_armv6$(ASM)
VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/vp8_variance_halfpixvar16x16_h_armv6$(ASM)
VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/vp8_variance_halfpixvar16x16_v_armv6$(ASM)
VP8_COMMON_SRCS-$(HAVE_MEDIA) += common/arm/armv6/vp8_variance_halfpixvar16x16_hv_armv6$(ASM)
@@ -168,7 +166,6 @@ VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/mbloopfilter_neon.c
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/reconintra_neon.c
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/shortidct4x4llm_neon.c
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sixtappredict_neon.c
-VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/variance_neon.c
VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_subpixelvariance_neon.c
$(eval $(call rtcd_h_template,vp8_rtcd,vp8/common/rtcd_defs.pl))
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/vp8_cx_iface.c b/chromium/third_party/libvpx/source/libvpx/vp8/vp8_cx_iface.c
index af9cc7320b9..8697377892e 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/vp8_cx_iface.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/vp8_cx_iface.c
@@ -135,7 +135,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
RANGE_CHECK(cfg, g_w, 1, 16383); /* 14 bits available */
RANGE_CHECK(cfg, g_h, 1, 16383); /* 14 bits available */
RANGE_CHECK(cfg, g_timebase.den, 1, 1000000000);
- RANGE_CHECK(cfg, g_timebase.num, 1, cfg->g_timebase.den);
+ RANGE_CHECK(cfg, g_timebase.num, 1, 1000000000);
RANGE_CHECK_HI(cfg, g_profile, 3);
RANGE_CHECK_HI(cfg, rc_max_quantizer, 63);
RANGE_CHECK_HI(cfg, rc_min_quantizer, cfg->rc_max_quantizer);
@@ -199,7 +199,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
RANGE_CHECK_HI(vp8_cfg, arnr_strength, 6);
RANGE_CHECK(vp8_cfg, arnr_type, 1, 3);
RANGE_CHECK(vp8_cfg, cq_level, 0, 63);
- RANGE_CHECK_BOOL(vp8_cfg, screen_content_mode);
+ RANGE_CHECK_HI(vp8_cfg, screen_content_mode, 2);
if (finalize && (cfg->rc_end_usage == VPX_CQ || cfg->rc_end_usage == VPX_Q))
RANGE_CHECK(vp8_cfg, cq_level,
cfg->rc_min_quantizer, cfg->rc_max_quantizer);
@@ -478,8 +478,6 @@ static vpx_codec_err_t vp8e_set_config(vpx_codec_alg_priv_t *ctx,
return res;
}
-int vp8_reverse_trans(int);
-
static vpx_codec_err_t get_quantizer(vpx_codec_alg_priv_t *ctx, va_list args)
{
int *const arg = va_arg(args, int *);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/vp8cx_arm.mk b/chromium/third_party/libvpx/source/libvpx/vp8/vp8cx_arm.mk
index 05003017982..0b0f6a70a12 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/vp8cx_arm.mk
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/vp8cx_arm.mk
@@ -18,7 +18,6 @@ VP8_CX_SRCS-$(ARCH_ARM) += encoder/arm/dct_arm.c
#File list for media
# encoder
VP8_CX_SRCS-$(HAVE_MEDIA) += encoder/arm/armv6/vp8_short_fdct4x4_armv6$(ASM)
-VP8_CX_SRCS-$(HAVE_MEDIA) += encoder/arm/armv6/vp8_mse16x16_armv6$(ASM)
VP8_CX_SRCS-$(HAVE_MEDIA) += encoder/arm/armv6/walsh_v6$(ASM)
#File list for neon
@@ -27,5 +26,4 @@ VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/denoising_neon.c
VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/fastquantizeb_neon.c
VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/shortfdct_neon.c
VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/subtract_neon.c
-VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp8_mse16x16_neon.c
VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp8_shortwalsh4x4_neon.c
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct16x16_1_add_neon.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct16x16_1_add_neon.c
index 3c8c6a9348d..0233877dd38 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct16x16_1_add_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct16x16_1_add_neon.c
@@ -9,6 +9,8 @@
*/
#include <arm_neon.h>
+
+#include "vpx_ports/mem.h"
#include "vp9/common/vp9_idct.h"
void vp9_idct16x16_1_add_neon(
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct32x32_1_add_neon.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct32x32_1_add_neon.c
index d0e4b4f4014..0ce45f2bfa8 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct32x32_1_add_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct32x32_1_add_neon.c
@@ -9,10 +9,12 @@
*/
#include <arm_neon.h>
-#include "vp9/common/vp9_idct.h"
#include "./vpx_config.h"
+#include "vpx_ports/mem.h"
+#include "vp9/common/vp9_idct.h"
+
static INLINE void LD_16x8(
uint8_t *d,
int d_stride,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct4x4_1_add_neon.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct4x4_1_add_neon.c
index 7c8a930b645..f0457358e6c 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct4x4_1_add_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct4x4_1_add_neon.c
@@ -9,6 +9,8 @@
*/
#include <arm_neon.h>
+
+#include "vpx_ports/mem.h"
#include "vp9/common/vp9_idct.h"
void vp9_idct4x4_1_add_neon(
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct8x8_1_add_neon.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct8x8_1_add_neon.c
index 24c29fb77f6..5369697c7d1 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct8x8_1_add_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_idct8x8_1_add_neon.c
@@ -9,6 +9,8 @@
*/
#include <arm_neon.h>
+
+#include "vpx_ports/mem.h"
#include "vp9/common/vp9_idct.h"
void vp9_idct8x8_1_add_neon(
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_reconintra_neon.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_reconintra_neon.c
index d0beaa7208f..92706bf2c69 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_reconintra_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_reconintra_neon.c
@@ -8,466 +8,815 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#include <stddef.h>
#include <arm_neon.h>
-void vp9_v_predictor_4x4_neon(
- uint8_t *dst,
- ptrdiff_t y_stride,
- const uint8_t *above,
- const uint8_t *left) {
+#include "./vp9_rtcd.h"
+#include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
+
+//------------------------------------------------------------------------------
+// DC 4x4
+
+// 'do_above' and 'do_left' facilitate branch removal when inlined.
+static INLINE void dc_4x4(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left,
+ int do_above, int do_left) {
+ uint16x8_t sum_top;
+ uint16x8_t sum_left;
+ uint8x8_t dc0;
+
+ if (do_above) {
+ const uint8x8_t A = vld1_u8(above); // top row
+ const uint16x4_t p0 = vpaddl_u8(A); // cascading summation of the top
+ const uint16x4_t p1 = vpadd_u16(p0, p0);
+ sum_top = vcombine_u16(p1, p1);
+ }
+
+ if (do_left) {
+ const uint8x8_t L = vld1_u8(left); // left border
+ const uint16x4_t p0 = vpaddl_u8(L); // cascading summation of the left
+ const uint16x4_t p1 = vpadd_u16(p0, p0);
+ sum_left = vcombine_u16(p1, p1);
+ }
+
+ if (do_above && do_left) {
+ const uint16x8_t sum = vaddq_u16(sum_left, sum_top);
+ dc0 = vrshrn_n_u16(sum, 3);
+ } else if (do_above) {
+ dc0 = vrshrn_n_u16(sum_top, 2);
+ } else if (do_left) {
+ dc0 = vrshrn_n_u16(sum_left, 2);
+ } else {
+ dc0 = vdup_n_u8(0x80);
+ }
+
+ {
+ const uint8x8_t dc = vdup_lane_u8(dc0, 0);
int i;
- uint32x2_t d0u32 = vdup_n_u32(0);
- (void)left;
+ for (i = 0; i < 4; ++i) {
+ vst1_lane_u32((uint32_t*)(dst + i * stride), vreinterpret_u32_u8(dc), 0);
+ }
+ }
+}
+
+void vp9_dc_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ dc_4x4(dst, stride, above, left, 1, 1);
+}
+
+void vp9_dc_left_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+ dc_4x4(dst, stride, NULL, left, 0, 1);
+}
+
+void vp9_dc_top_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)left;
+ dc_4x4(dst, stride, above, NULL, 1, 0);
+}
- d0u32 = vld1_lane_u32((const uint32_t *)above, d0u32, 0);
- for (i = 0; i < 4; i++, dst += y_stride)
- vst1_lane_u32((uint32_t *)dst, d0u32, 0);
- return;
+void vp9_dc_128_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+ (void)left;
+ dc_4x4(dst, stride, NULL, NULL, 0, 0);
}
-void vp9_v_predictor_8x8_neon(
- uint8_t *dst,
- ptrdiff_t y_stride,
- const uint8_t *above,
- const uint8_t *left) {
+//------------------------------------------------------------------------------
+// DC 8x8
+
+// 'do_above' and 'do_left' facilitate branch removal when inlined.
+static INLINE void dc_8x8(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left,
+ int do_above, int do_left) {
+ uint16x8_t sum_top;
+ uint16x8_t sum_left;
+ uint8x8_t dc0;
+
+ if (do_above) {
+ const uint8x8_t A = vld1_u8(above); // top row
+ const uint16x4_t p0 = vpaddl_u8(A); // cascading summation of the top
+ const uint16x4_t p1 = vpadd_u16(p0, p0);
+ const uint16x4_t p2 = vpadd_u16(p1, p1);
+ sum_top = vcombine_u16(p2, p2);
+ }
+
+ if (do_left) {
+ const uint8x8_t L = vld1_u8(left); // left border
+ const uint16x4_t p0 = vpaddl_u8(L); // cascading summation of the left
+ const uint16x4_t p1 = vpadd_u16(p0, p0);
+ const uint16x4_t p2 = vpadd_u16(p1, p1);
+ sum_left = vcombine_u16(p2, p2);
+ }
+
+ if (do_above && do_left) {
+ const uint16x8_t sum = vaddq_u16(sum_left, sum_top);
+ dc0 = vrshrn_n_u16(sum, 4);
+ } else if (do_above) {
+ dc0 = vrshrn_n_u16(sum_top, 3);
+ } else if (do_left) {
+ dc0 = vrshrn_n_u16(sum_left, 3);
+ } else {
+ dc0 = vdup_n_u8(0x80);
+ }
+
+ {
+ const uint8x8_t dc = vdup_lane_u8(dc0, 0);
int i;
- uint8x8_t d0u8 = vdup_n_u8(0);
- (void)left;
+ for (i = 0; i < 8; ++i) {
+ vst1_u32((uint32_t*)(dst + i * stride), vreinterpret_u32_u8(dc));
+ }
+ }
+}
- d0u8 = vld1_u8(above);
- for (i = 0; i < 8; i++, dst += y_stride)
- vst1_u8(dst, d0u8);
- return;
+void vp9_dc_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ dc_8x8(dst, stride, above, left, 1, 1);
}
-void vp9_v_predictor_16x16_neon(
- uint8_t *dst,
- ptrdiff_t y_stride,
- const uint8_t *above,
- const uint8_t *left) {
+void vp9_dc_left_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+ dc_8x8(dst, stride, NULL, left, 0, 1);
+}
+
+void vp9_dc_top_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)left;
+ dc_8x8(dst, stride, above, NULL, 1, 0);
+}
+
+void vp9_dc_128_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+ (void)left;
+ dc_8x8(dst, stride, NULL, NULL, 0, 0);
+}
+
+//------------------------------------------------------------------------------
+// DC 16x16
+
+// 'do_above' and 'do_left' facilitate branch removal when inlined.
+static INLINE void dc_16x16(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left,
+ int do_above, int do_left) {
+ uint16x8_t sum_top;
+ uint16x8_t sum_left;
+ uint8x8_t dc0;
+
+ if (do_above) {
+ const uint8x16_t A = vld1q_u8(above); // top row
+ const uint16x8_t p0 = vpaddlq_u8(A); // cascading summation of the top
+ const uint16x4_t p1 = vadd_u16(vget_low_u16(p0), vget_high_u16(p0));
+ const uint16x4_t p2 = vpadd_u16(p1, p1);
+ const uint16x4_t p3 = vpadd_u16(p2, p2);
+ sum_top = vcombine_u16(p3, p3);
+ }
+
+ if (do_left) {
+ const uint8x16_t L = vld1q_u8(left); // left row
+ const uint16x8_t p0 = vpaddlq_u8(L); // cascading summation of the left
+ const uint16x4_t p1 = vadd_u16(vget_low_u16(p0), vget_high_u16(p0));
+ const uint16x4_t p2 = vpadd_u16(p1, p1);
+ const uint16x4_t p3 = vpadd_u16(p2, p2);
+ sum_left = vcombine_u16(p3, p3);
+ }
+
+ if (do_above && do_left) {
+ const uint16x8_t sum = vaddq_u16(sum_left, sum_top);
+ dc0 = vrshrn_n_u16(sum, 5);
+ } else if (do_above) {
+ dc0 = vrshrn_n_u16(sum_top, 4);
+ } else if (do_left) {
+ dc0 = vrshrn_n_u16(sum_left, 4);
+ } else {
+ dc0 = vdup_n_u8(0x80);
+ }
+
+ {
+ const uint8x16_t dc = vdupq_lane_u8(dc0, 0);
int i;
- uint8x16_t q0u8 = vdupq_n_u8(0);
- (void)left;
+ for (i = 0; i < 16; ++i) {
+ vst1q_u8(dst + i * stride, dc);
+ }
+ }
+}
+
+void vp9_dc_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ dc_16x16(dst, stride, above, left, 1, 1);
+}
+
+void vp9_dc_left_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above,
+ const uint8_t *left) {
+ (void)above;
+ dc_16x16(dst, stride, NULL, left, 0, 1);
+}
- q0u8 = vld1q_u8(above);
- for (i = 0; i < 16; i++, dst += y_stride)
- vst1q_u8(dst, q0u8);
- return;
+void vp9_dc_top_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above,
+ const uint8_t *left) {
+ (void)left;
+ dc_16x16(dst, stride, above, NULL, 1, 0);
}
-void vp9_v_predictor_32x32_neon(
- uint8_t *dst,
- ptrdiff_t y_stride,
- const uint8_t *above,
- const uint8_t *left) {
+void vp9_dc_128_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above,
+ const uint8_t *left) {
+ (void)above;
+ (void)left;
+ dc_16x16(dst, stride, NULL, NULL, 0, 0);
+}
+
+//------------------------------------------------------------------------------
+// DC 32x32
+
+// 'do_above' and 'do_left' facilitate branch removal when inlined.
+static INLINE void dc_32x32(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left,
+ int do_above, int do_left) {
+ uint16x8_t sum_top;
+ uint16x8_t sum_left;
+ uint8x8_t dc0;
+
+ if (do_above) {
+ const uint8x16_t A0 = vld1q_u8(above); // top row
+ const uint8x16_t A1 = vld1q_u8(above + 16);
+ const uint16x8_t p0 = vpaddlq_u8(A0); // cascading summation of the top
+ const uint16x8_t p1 = vpaddlq_u8(A1);
+ const uint16x8_t p2 = vaddq_u16(p0, p1);
+ const uint16x4_t p3 = vadd_u16(vget_low_u16(p2), vget_high_u16(p2));
+ const uint16x4_t p4 = vpadd_u16(p3, p3);
+ const uint16x4_t p5 = vpadd_u16(p4, p4);
+ sum_top = vcombine_u16(p5, p5);
+ }
+
+ if (do_left) {
+ const uint8x16_t L0 = vld1q_u8(left); // left row
+ const uint8x16_t L1 = vld1q_u8(left + 16);
+ const uint16x8_t p0 = vpaddlq_u8(L0); // cascading summation of the left
+ const uint16x8_t p1 = vpaddlq_u8(L1);
+ const uint16x8_t p2 = vaddq_u16(p0, p1);
+ const uint16x4_t p3 = vadd_u16(vget_low_u16(p2), vget_high_u16(p2));
+ const uint16x4_t p4 = vpadd_u16(p3, p3);
+ const uint16x4_t p5 = vpadd_u16(p4, p4);
+ sum_left = vcombine_u16(p5, p5);
+ }
+
+ if (do_above && do_left) {
+ const uint16x8_t sum = vaddq_u16(sum_left, sum_top);
+ dc0 = vrshrn_n_u16(sum, 6);
+ } else if (do_above) {
+ dc0 = vrshrn_n_u16(sum_top, 5);
+ } else if (do_left) {
+ dc0 = vrshrn_n_u16(sum_left, 5);
+ } else {
+ dc0 = vdup_n_u8(0x80);
+ }
+
+ {
+ const uint8x16_t dc = vdupq_lane_u8(dc0, 0);
int i;
- uint8x16_t q0u8 = vdupq_n_u8(0);
- uint8x16_t q1u8 = vdupq_n_u8(0);
- (void)left;
-
- q0u8 = vld1q_u8(above);
- q1u8 = vld1q_u8(above + 16);
- for (i = 0; i < 32; i++, dst += y_stride) {
- vst1q_u8(dst, q0u8);
- vst1q_u8(dst + 16, q1u8);
+ for (i = 0; i < 32; ++i) {
+ vst1q_u8(dst + i * stride, dc);
+ vst1q_u8(dst + i * stride + 16, dc);
}
- return;
+ }
}
-void vp9_h_predictor_4x4_neon(
- uint8_t *dst,
- ptrdiff_t y_stride,
- const uint8_t *above,
- const uint8_t *left) {
- uint8x8_t d0u8 = vdup_n_u8(0);
- uint32x2_t d1u32 = vdup_n_u32(0);
- (void)above;
+void vp9_dc_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ dc_32x32(dst, stride, above, left, 1, 1);
+}
- d1u32 = vld1_lane_u32((const uint32_t *)left, d1u32, 0);
+void vp9_dc_left_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above,
+ const uint8_t *left) {
+ (void)above;
+ dc_32x32(dst, stride, NULL, left, 0, 1);
+}
- d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 0);
- vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0);
- dst += y_stride;
- d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 1);
- vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0);
- dst += y_stride;
- d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 2);
- vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0);
- dst += y_stride;
- d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 3);
- vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0);
- return;
+void vp9_dc_top_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above,
+ const uint8_t *left) {
+ (void)left;
+ dc_32x32(dst, stride, above, NULL, 1, 0);
}
-void vp9_h_predictor_8x8_neon(
- uint8_t *dst,
- ptrdiff_t y_stride,
- const uint8_t *above,
- const uint8_t *left) {
- uint8x8_t d0u8 = vdup_n_u8(0);
- uint64x1_t d1u64 = vdup_n_u64(0);
- (void)above;
+void vp9_dc_128_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above,
+ const uint8_t *left) {
+ (void)above;
+ (void)left;
+ dc_32x32(dst, stride, NULL, NULL, 0, 0);
+}
- d1u64 = vld1_u64((const uint64_t *)left);
+// -----------------------------------------------------------------------------
- d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 0);
- vst1_u8(dst, d0u8);
- dst += y_stride;
- d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 1);
- vst1_u8(dst, d0u8);
- dst += y_stride;
- d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 2);
- vst1_u8(dst, d0u8);
- dst += y_stride;
- d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 3);
- vst1_u8(dst, d0u8);
- dst += y_stride;
- d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 4);
- vst1_u8(dst, d0u8);
- dst += y_stride;
- d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 5);
- vst1_u8(dst, d0u8);
- dst += y_stride;
- d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 6);
- vst1_u8(dst, d0u8);
- dst += y_stride;
- d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 7);
+void vp9_d45_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ const uint64x1_t A0 = vreinterpret_u64_u8(vld1_u8(above)); // top row
+ const uint64x1_t A1 = vshr_n_u64(A0, 8);
+ const uint64x1_t A2 = vshr_n_u64(A0, 16);
+ const uint8x8_t ABCDEFGH = vreinterpret_u8_u64(A0);
+ const uint8x8_t BCDEFGH0 = vreinterpret_u8_u64(A1);
+ const uint8x8_t CDEFGH00 = vreinterpret_u8_u64(A2);
+ const uint8x8_t avg1 = vhadd_u8(ABCDEFGH, CDEFGH00);
+ const uint8x8_t avg2 = vrhadd_u8(avg1, BCDEFGH0);
+ const uint64x1_t avg2_u64 = vreinterpret_u64_u8(avg2);
+ const uint32x2_t r0 = vreinterpret_u32_u8(avg2);
+ const uint32x2_t r1 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 8));
+ const uint32x2_t r2 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 16));
+ const uint32x2_t r3 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 24));
+ (void)left;
+ vst1_lane_u32((uint32_t *)(dst + 0 * stride), r0, 0);
+ vst1_lane_u32((uint32_t *)(dst + 1 * stride), r1, 0);
+ vst1_lane_u32((uint32_t *)(dst + 2 * stride), r2, 0);
+ vst1_lane_u32((uint32_t *)(dst + 3 * stride), r3, 0);
+ dst[3 * stride + 3] = above[7];
+}
+
+void vp9_d45_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ static const uint8_t shuffle1[8] = { 1, 2, 3, 4, 5, 6, 7, 7 };
+ static const uint8_t shuffle2[8] = { 2, 3, 4, 5, 6, 7, 7, 7 };
+ const uint8x8_t sh_12345677 = vld1_u8(shuffle1);
+ const uint8x8_t sh_23456777 = vld1_u8(shuffle2);
+ const uint8x8_t A0 = vld1_u8(above); // top row
+ const uint8x8_t A1 = vtbl1_u8(A0, sh_12345677);
+ const uint8x8_t A2 = vtbl1_u8(A0, sh_23456777);
+ const uint8x8_t avg1 = vhadd_u8(A0, A2);
+ uint8x8_t row = vrhadd_u8(avg1, A1);
+ int i;
+ (void)left;
+ for (i = 0; i < 7; ++i) {
+ vst1_u8(dst + i * stride, row);
+ row = vtbl1_u8(row, sh_12345677);
+ }
+ vst1_u8(dst + i * stride, row);
+}
+
+void vp9_d45_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ const uint8x16_t A0 = vld1q_u8(above); // top row
+ const uint8x16_t above_right = vld1q_dup_u8(above + 15);
+ const uint8x16_t A1 = vextq_u8(A0, above_right, 1);
+ const uint8x16_t A2 = vextq_u8(A0, above_right, 2);
+ const uint8x16_t avg1 = vhaddq_u8(A0, A2);
+ uint8x16_t row = vrhaddq_u8(avg1, A1);
+ int i;
+ (void)left;
+ for (i = 0; i < 15; ++i) {
+ vst1q_u8(dst + i * stride, row);
+ row = vextq_u8(row, above_right, 1);
+ }
+ vst1q_u8(dst + i * stride, row);
+}
+
+// -----------------------------------------------------------------------------
+
+void vp9_d135_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ const uint8x8_t XABCD_u8 = vld1_u8(above - 1);
+ const uint64x1_t XABCD = vreinterpret_u64_u8(XABCD_u8);
+ const uint64x1_t ____XABC = vshl_n_u64(XABCD, 32);
+ const uint32x2_t zero = vdup_n_u32(0);
+ const uint32x2_t IJKL = vld1_lane_u32((const uint32_t *)left, zero, 0);
+ const uint8x8_t IJKL_u8 = vreinterpret_u8_u32(IJKL);
+ const uint64x1_t LKJI____ = vreinterpret_u64_u8(vrev32_u8(IJKL_u8));
+ const uint64x1_t LKJIXABC = vorr_u64(LKJI____, ____XABC);
+ const uint8x8_t KJIXABC_ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 8));
+ const uint8x8_t JIXABC__ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 16));
+ const uint8_t D = vget_lane_u8(XABCD_u8, 4);
+ const uint8x8_t JIXABCD_ = vset_lane_u8(D, JIXABC__, 6);
+ const uint8x8_t LKJIXABC_u8 = vreinterpret_u8_u64(LKJIXABC);
+ const uint8x8_t avg1 = vhadd_u8(JIXABCD_, LKJIXABC_u8);
+ const uint8x8_t avg2 = vrhadd_u8(avg1, KJIXABC_);
+ const uint64x1_t avg2_u64 = vreinterpret_u64_u8(avg2);
+ const uint32x2_t r3 = vreinterpret_u32_u8(avg2);
+ const uint32x2_t r2 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 8));
+ const uint32x2_t r1 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 16));
+ const uint32x2_t r0 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 24));
+ vst1_lane_u32((uint32_t *)(dst + 0 * stride), r0, 0);
+ vst1_lane_u32((uint32_t *)(dst + 1 * stride), r1, 0);
+ vst1_lane_u32((uint32_t *)(dst + 2 * stride), r2, 0);
+ vst1_lane_u32((uint32_t *)(dst + 3 * stride), r3, 0);
+}
+
+#if !HAVE_NEON_ASM
+
+void vp9_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ int i;
+ uint32x2_t d0u32 = vdup_n_u32(0);
+ (void)left;
+
+ d0u32 = vld1_lane_u32((const uint32_t *)above, d0u32, 0);
+ for (i = 0; i < 4; i++, dst += stride)
+ vst1_lane_u32((uint32_t *)dst, d0u32, 0);
+}
+
+void vp9_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ int i;
+ uint8x8_t d0u8 = vdup_n_u8(0);
+ (void)left;
+
+ d0u8 = vld1_u8(above);
+ for (i = 0; i < 8; i++, dst += stride)
vst1_u8(dst, d0u8);
- return;
}
-void vp9_h_predictor_16x16_neon(
- uint8_t *dst,
- ptrdiff_t y_stride,
- const uint8_t *above,
- const uint8_t *left) {
- int j;
- uint8x8_t d2u8 = vdup_n_u8(0);
- uint8x16_t q0u8 = vdupq_n_u8(0);
- uint8x16_t q1u8 = vdupq_n_u8(0);
- (void)above;
+void vp9_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ int i;
+ uint8x16_t q0u8 = vdupq_n_u8(0);
+ (void)left;
+
+ q0u8 = vld1q_u8(above);
+ for (i = 0; i < 16; i++, dst += stride)
+ vst1q_u8(dst, q0u8);
+}
+
+void vp9_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ int i;
+ uint8x16_t q0u8 = vdupq_n_u8(0);
+ uint8x16_t q1u8 = vdupq_n_u8(0);
+ (void)left;
+
+ q0u8 = vld1q_u8(above);
+ q1u8 = vld1q_u8(above + 16);
+ for (i = 0; i < 32; i++, dst += stride) {
+ vst1q_u8(dst, q0u8);
+ vst1q_u8(dst + 16, q1u8);
+ }
+}
+
+void vp9_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ uint8x8_t d0u8 = vdup_n_u8(0);
+ uint32x2_t d1u32 = vdup_n_u32(0);
+ (void)above;
+
+ d1u32 = vld1_lane_u32((const uint32_t *)left, d1u32, 0);
+
+ d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 0);
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0);
+ dst += stride;
+ d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 1);
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0);
+ dst += stride;
+ d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 2);
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0);
+ dst += stride;
+ d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 3);
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0);
+}
+
+void vp9_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ uint8x8_t d0u8 = vdup_n_u8(0);
+ uint64x1_t d1u64 = vdup_n_u64(0);
+ (void)above;
+
+ d1u64 = vld1_u64((const uint64_t *)left);
+
+ d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 0);
+ vst1_u8(dst, d0u8);
+ dst += stride;
+ d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 1);
+ vst1_u8(dst, d0u8);
+ dst += stride;
+ d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 2);
+ vst1_u8(dst, d0u8);
+ dst += stride;
+ d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 3);
+ vst1_u8(dst, d0u8);
+ dst += stride;
+ d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 4);
+ vst1_u8(dst, d0u8);
+ dst += stride;
+ d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 5);
+ vst1_u8(dst, d0u8);
+ dst += stride;
+ d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 6);
+ vst1_u8(dst, d0u8);
+ dst += stride;
+ d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 7);
+ vst1_u8(dst, d0u8);
+}
+
+void vp9_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ int j;
+ uint8x8_t d2u8 = vdup_n_u8(0);
+ uint8x16_t q0u8 = vdupq_n_u8(0);
+ uint8x16_t q1u8 = vdupq_n_u8(0);
+ (void)above;
+
+ q1u8 = vld1q_u8(left);
+ d2u8 = vget_low_u8(q1u8);
+ for (j = 0; j < 2; j++, d2u8 = vget_high_u8(q1u8)) {
+ q0u8 = vdupq_lane_u8(d2u8, 0);
+ vst1q_u8(dst, q0u8);
+ dst += stride;
+ q0u8 = vdupq_lane_u8(d2u8, 1);
+ vst1q_u8(dst, q0u8);
+ dst += stride;
+ q0u8 = vdupq_lane_u8(d2u8, 2);
+ vst1q_u8(dst, q0u8);
+ dst += stride;
+ q0u8 = vdupq_lane_u8(d2u8, 3);
+ vst1q_u8(dst, q0u8);
+ dst += stride;
+ q0u8 = vdupq_lane_u8(d2u8, 4);
+ vst1q_u8(dst, q0u8);
+ dst += stride;
+ q0u8 = vdupq_lane_u8(d2u8, 5);
+ vst1q_u8(dst, q0u8);
+ dst += stride;
+ q0u8 = vdupq_lane_u8(d2u8, 6);
+ vst1q_u8(dst, q0u8);
+ dst += stride;
+ q0u8 = vdupq_lane_u8(d2u8, 7);
+ vst1q_u8(dst, q0u8);
+ dst += stride;
+ }
+}
+
+void vp9_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ int j, k;
+ uint8x8_t d2u8 = vdup_n_u8(0);
+ uint8x16_t q0u8 = vdupq_n_u8(0);
+ uint8x16_t q1u8 = vdupq_n_u8(0);
+ (void)above;
+ for (k = 0; k < 2; k++, left += 16) {
q1u8 = vld1q_u8(left);
d2u8 = vget_low_u8(q1u8);
for (j = 0; j < 2; j++, d2u8 = vget_high_u8(q1u8)) {
- q0u8 = vdupq_lane_u8(d2u8, 0);
- vst1q_u8(dst, q0u8);
- dst += y_stride;
- q0u8 = vdupq_lane_u8(d2u8, 1);
- vst1q_u8(dst, q0u8);
- dst += y_stride;
- q0u8 = vdupq_lane_u8(d2u8, 2);
- vst1q_u8(dst, q0u8);
- dst += y_stride;
- q0u8 = vdupq_lane_u8(d2u8, 3);
- vst1q_u8(dst, q0u8);
- dst += y_stride;
- q0u8 = vdupq_lane_u8(d2u8, 4);
- vst1q_u8(dst, q0u8);
- dst += y_stride;
- q0u8 = vdupq_lane_u8(d2u8, 5);
- vst1q_u8(dst, q0u8);
- dst += y_stride;
- q0u8 = vdupq_lane_u8(d2u8, 6);
- vst1q_u8(dst, q0u8);
- dst += y_stride;
- q0u8 = vdupq_lane_u8(d2u8, 7);
- vst1q_u8(dst, q0u8);
- dst += y_stride;
- }
- return;
-}
-
-void vp9_h_predictor_32x32_neon(
- uint8_t *dst,
- ptrdiff_t y_stride,
- const uint8_t *above,
- const uint8_t *left) {
- int j, k;
- uint8x8_t d2u8 = vdup_n_u8(0);
- uint8x16_t q0u8 = vdupq_n_u8(0);
- uint8x16_t q1u8 = vdupq_n_u8(0);
- (void)above;
-
- for (k = 0; k < 2; k++, left += 16) {
- q1u8 = vld1q_u8(left);
- d2u8 = vget_low_u8(q1u8);
- for (j = 0; j < 2; j++, d2u8 = vget_high_u8(q1u8)) {
- q0u8 = vdupq_lane_u8(d2u8, 0);
- vst1q_u8(dst, q0u8);
- vst1q_u8(dst + 16, q0u8);
- dst += y_stride;
- q0u8 = vdupq_lane_u8(d2u8, 1);
- vst1q_u8(dst, q0u8);
- vst1q_u8(dst + 16, q0u8);
- dst += y_stride;
- q0u8 = vdupq_lane_u8(d2u8, 2);
- vst1q_u8(dst, q0u8);
- vst1q_u8(dst + 16, q0u8);
- dst += y_stride;
- q0u8 = vdupq_lane_u8(d2u8, 3);
- vst1q_u8(dst, q0u8);
- vst1q_u8(dst + 16, q0u8);
- dst += y_stride;
- q0u8 = vdupq_lane_u8(d2u8, 4);
- vst1q_u8(dst, q0u8);
- vst1q_u8(dst + 16, q0u8);
- dst += y_stride;
- q0u8 = vdupq_lane_u8(d2u8, 5);
- vst1q_u8(dst, q0u8);
- vst1q_u8(dst + 16, q0u8);
- dst += y_stride;
- q0u8 = vdupq_lane_u8(d2u8, 6);
- vst1q_u8(dst, q0u8);
- vst1q_u8(dst + 16, q0u8);
- dst += y_stride;
- q0u8 = vdupq_lane_u8(d2u8, 7);
- vst1q_u8(dst, q0u8);
- vst1q_u8(dst + 16, q0u8);
- dst += y_stride;
- }
+ q0u8 = vdupq_lane_u8(d2u8, 0);
+ vst1q_u8(dst, q0u8);
+ vst1q_u8(dst + 16, q0u8);
+ dst += stride;
+ q0u8 = vdupq_lane_u8(d2u8, 1);
+ vst1q_u8(dst, q0u8);
+ vst1q_u8(dst + 16, q0u8);
+ dst += stride;
+ q0u8 = vdupq_lane_u8(d2u8, 2);
+ vst1q_u8(dst, q0u8);
+ vst1q_u8(dst + 16, q0u8);
+ dst += stride;
+ q0u8 = vdupq_lane_u8(d2u8, 3);
+ vst1q_u8(dst, q0u8);
+ vst1q_u8(dst + 16, q0u8);
+ dst += stride;
+ q0u8 = vdupq_lane_u8(d2u8, 4);
+ vst1q_u8(dst, q0u8);
+ vst1q_u8(dst + 16, q0u8);
+ dst += stride;
+ q0u8 = vdupq_lane_u8(d2u8, 5);
+ vst1q_u8(dst, q0u8);
+ vst1q_u8(dst + 16, q0u8);
+ dst += stride;
+ q0u8 = vdupq_lane_u8(d2u8, 6);
+ vst1q_u8(dst, q0u8);
+ vst1q_u8(dst + 16, q0u8);
+ dst += stride;
+ q0u8 = vdupq_lane_u8(d2u8, 7);
+ vst1q_u8(dst, q0u8);
+ vst1q_u8(dst + 16, q0u8);
+ dst += stride;
}
- return;
+ }
}
-void vp9_tm_predictor_4x4_neon(
- uint8_t *dst,
- ptrdiff_t y_stride,
- const uint8_t *above,
- const uint8_t *left) {
- int i;
- uint16x8_t q1u16, q3u16;
- int16x8_t q1s16;
- uint8x8_t d0u8 = vdup_n_u8(0);
- uint32x2_t d2u32 = vdup_n_u32(0);
-
- d0u8 = vdup_n_u8(above[-1]);
- d2u32 = vld1_lane_u32((const uint32_t *)above, d2u32, 0);
- q3u16 = vsubl_u8(vreinterpret_u8_u32(d2u32), d0u8);
- for (i = 0; i < 4; i++, dst += y_stride) {
- q1u16 = vdupq_n_u16((uint16_t)left[i]);
- q1s16 = vaddq_s16(vreinterpretq_s16_u16(q1u16),
- vreinterpretq_s16_u16(q3u16));
- d0u8 = vqmovun_s16(q1s16);
- vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0);
- }
- return;
-}
-
-void vp9_tm_predictor_8x8_neon(
- uint8_t *dst,
- ptrdiff_t y_stride,
- const uint8_t *above,
- const uint8_t *left) {
- int j;
- uint16x8_t q0u16, q3u16, q10u16;
- int16x8_t q0s16;
- uint16x4_t d20u16;
- uint8x8_t d0u8, d2u8, d30u8;
-
- d0u8 = vdup_n_u8(above[-1]);
- d30u8 = vld1_u8(left);
- d2u8 = vld1_u8(above);
- q10u16 = vmovl_u8(d30u8);
- q3u16 = vsubl_u8(d2u8, d0u8);
+void vp9_tm_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ int i;
+ uint16x8_t q1u16, q3u16;
+ int16x8_t q1s16;
+ uint8x8_t d0u8 = vdup_n_u8(0);
+ uint32x2_t d2u32 = vdup_n_u32(0);
+
+ d0u8 = vld1_dup_u8(above - 1);
+ d2u32 = vld1_lane_u32((const uint32_t *)above, d2u32, 0);
+ q3u16 = vsubl_u8(vreinterpret_u8_u32(d2u32), d0u8);
+ for (i = 0; i < 4; i++, dst += stride) {
+ q1u16 = vdupq_n_u16((uint16_t)left[i]);
+ q1s16 = vaddq_s16(vreinterpretq_s16_u16(q1u16),
+ vreinterpretq_s16_u16(q3u16));
+ d0u8 = vqmovun_s16(q1s16);
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0);
+ }
+}
+
+void vp9_tm_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ int j;
+ uint16x8_t q0u16, q3u16, q10u16;
+ int16x8_t q0s16;
+ uint16x4_t d20u16;
+ uint8x8_t d0u8, d2u8, d30u8;
+
+ d0u8 = vld1_dup_u8(above - 1);
+ d30u8 = vld1_u8(left);
+ d2u8 = vld1_u8(above);
+ q10u16 = vmovl_u8(d30u8);
+ q3u16 = vsubl_u8(d2u8, d0u8);
+ d20u16 = vget_low_u16(q10u16);
+ for (j = 0; j < 2; j++, d20u16 = vget_high_u16(q10u16)) {
+ q0u16 = vdupq_lane_u16(d20u16, 0);
+ q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16),
+ vreinterpretq_s16_u16(q0u16));
+ d0u8 = vqmovun_s16(q0s16);
+ vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8));
+ dst += stride;
+ q0u16 = vdupq_lane_u16(d20u16, 1);
+ q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16),
+ vreinterpretq_s16_u16(q0u16));
+ d0u8 = vqmovun_s16(q0s16);
+ vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8));
+ dst += stride;
+ q0u16 = vdupq_lane_u16(d20u16, 2);
+ q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16),
+ vreinterpretq_s16_u16(q0u16));
+ d0u8 = vqmovun_s16(q0s16);
+ vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8));
+ dst += stride;
+ q0u16 = vdupq_lane_u16(d20u16, 3);
+ q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16),
+ vreinterpretq_s16_u16(q0u16));
+ d0u8 = vqmovun_s16(q0s16);
+ vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8));
+ dst += stride;
+ }
+}
+
+void vp9_tm_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ int j, k;
+ uint16x8_t q0u16, q2u16, q3u16, q8u16, q10u16;
+ uint8x16_t q0u8, q1u8;
+ int16x8_t q0s16, q1s16, q8s16, q11s16;
+ uint16x4_t d20u16;
+ uint8x8_t d2u8, d3u8, d18u8, d22u8, d23u8;
+
+ q0u8 = vld1q_dup_u8(above - 1);
+ q1u8 = vld1q_u8(above);
+ q2u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q0u8));
+ q3u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q0u8));
+ for (k = 0; k < 2; k++, left += 8) {
+ d18u8 = vld1_u8(left);
+ q10u16 = vmovl_u8(d18u8);
d20u16 = vget_low_u16(q10u16);
for (j = 0; j < 2; j++, d20u16 = vget_high_u16(q10u16)) {
- q0u16 = vdupq_lane_u16(d20u16, 0);
- q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16),
- vreinterpretq_s16_u16(q0u16));
- d0u8 = vqmovun_s16(q0s16);
- vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8));
- dst += y_stride;
- q0u16 = vdupq_lane_u16(d20u16, 1);
- q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16),
- vreinterpretq_s16_u16(q0u16));
- d0u8 = vqmovun_s16(q0s16);
- vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8));
- dst += y_stride;
- q0u16 = vdupq_lane_u16(d20u16, 2);
- q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16),
- vreinterpretq_s16_u16(q0u16));
- d0u8 = vqmovun_s16(q0s16);
- vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8));
- dst += y_stride;
- q0u16 = vdupq_lane_u16(d20u16, 3);
- q0s16 = vaddq_s16(vreinterpretq_s16_u16(q3u16),
- vreinterpretq_s16_u16(q0u16));
- d0u8 = vqmovun_s16(q0s16);
- vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8));
- dst += y_stride;
- }
- return;
-}
-
-void vp9_tm_predictor_16x16_neon(
- uint8_t *dst,
- ptrdiff_t y_stride,
- const uint8_t *above,
- const uint8_t *left) {
- int j, k;
- uint16x8_t q0u16, q2u16, q3u16, q8u16, q10u16;
- uint8x16_t q0u8, q1u8;
- int16x8_t q0s16, q1s16, q8s16, q11s16;
- uint16x4_t d20u16;
- uint8x8_t d2u8, d3u8, d18u8, d22u8, d23u8;
-
- q0u8 = vdupq_n_u8(above[-1]);
- q1u8 = vld1q_u8(above);
- q2u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q0u8));
- q3u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q0u8));
- for (k = 0; k < 2; k++, left += 8) {
- d18u8 = vld1_u8(left);
- q10u16 = vmovl_u8(d18u8);
- d20u16 = vget_low_u16(q10u16);
- for (j = 0; j < 2; j++, d20u16 = vget_high_u16(q10u16)) {
- q0u16 = vdupq_lane_u16(d20u16, 0);
- q8u16 = vdupq_lane_u16(d20u16, 1);
- q1s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
- vreinterpretq_s16_u16(q2u16));
- q0s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
- vreinterpretq_s16_u16(q3u16));
- q11s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16),
- vreinterpretq_s16_u16(q2u16));
- q8s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16),
- vreinterpretq_s16_u16(q3u16));
- d2u8 = vqmovun_s16(q1s16);
- d3u8 = vqmovun_s16(q0s16);
- d22u8 = vqmovun_s16(q11s16);
- d23u8 = vqmovun_s16(q8s16);
- vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d2u8));
- vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d3u8));
- dst += y_stride;
- vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d22u8));
- vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d23u8));
- dst += y_stride;
-
- q0u16 = vdupq_lane_u16(d20u16, 2);
- q8u16 = vdupq_lane_u16(d20u16, 3);
- q1s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
- vreinterpretq_s16_u16(q2u16));
- q0s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
- vreinterpretq_s16_u16(q3u16));
- q11s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16),
- vreinterpretq_s16_u16(q2u16));
- q8s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16),
- vreinterpretq_s16_u16(q3u16));
- d2u8 = vqmovun_s16(q1s16);
- d3u8 = vqmovun_s16(q0s16);
- d22u8 = vqmovun_s16(q11s16);
- d23u8 = vqmovun_s16(q8s16);
- vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d2u8));
- vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d3u8));
- dst += y_stride;
- vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d22u8));
- vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d23u8));
- dst += y_stride;
- }
+ q0u16 = vdupq_lane_u16(d20u16, 0);
+ q8u16 = vdupq_lane_u16(d20u16, 1);
+ q1s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+ vreinterpretq_s16_u16(q2u16));
+ q0s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+ vreinterpretq_s16_u16(q3u16));
+ q11s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16),
+ vreinterpretq_s16_u16(q2u16));
+ q8s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16),
+ vreinterpretq_s16_u16(q3u16));
+ d2u8 = vqmovun_s16(q1s16);
+ d3u8 = vqmovun_s16(q0s16);
+ d22u8 = vqmovun_s16(q11s16);
+ d23u8 = vqmovun_s16(q8s16);
+ vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d2u8));
+ vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d3u8));
+ dst += stride;
+ vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d22u8));
+ vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d23u8));
+ dst += stride;
+
+ q0u16 = vdupq_lane_u16(d20u16, 2);
+ q8u16 = vdupq_lane_u16(d20u16, 3);
+ q1s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+ vreinterpretq_s16_u16(q2u16));
+ q0s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+ vreinterpretq_s16_u16(q3u16));
+ q11s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16),
+ vreinterpretq_s16_u16(q2u16));
+ q8s16 = vaddq_s16(vreinterpretq_s16_u16(q8u16),
+ vreinterpretq_s16_u16(q3u16));
+ d2u8 = vqmovun_s16(q1s16);
+ d3u8 = vqmovun_s16(q0s16);
+ d22u8 = vqmovun_s16(q11s16);
+ d23u8 = vqmovun_s16(q8s16);
+ vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d2u8));
+ vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d3u8));
+ dst += stride;
+ vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d22u8));
+ vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d23u8));
+ dst += stride;
}
- return;
-}
-
-void vp9_tm_predictor_32x32_neon(
- uint8_t *dst,
- ptrdiff_t y_stride,
- const uint8_t *above,
- const uint8_t *left) {
- int j, k;
- uint16x8_t q0u16, q3u16, q8u16, q9u16, q10u16, q11u16;
- uint8x16_t q0u8, q1u8, q2u8;
- int16x8_t q12s16, q13s16, q14s16, q15s16;
- uint16x4_t d6u16;
- uint8x8_t d0u8, d1u8, d2u8, d3u8, d26u8;
-
- q0u8 = vdupq_n_u8(above[-1]);
- q1u8 = vld1q_u8(above);
- q2u8 = vld1q_u8(above + 16);
- q8u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q0u8));
- q9u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q0u8));
- q10u16 = vsubl_u8(vget_low_u8(q2u8), vget_low_u8(q0u8));
- q11u16 = vsubl_u8(vget_high_u8(q2u8), vget_high_u8(q0u8));
- for (k = 0; k < 4; k++, left += 8) {
- d26u8 = vld1_u8(left);
- q3u16 = vmovl_u8(d26u8);
- d6u16 = vget_low_u16(q3u16);
- for (j = 0; j < 2; j++, d6u16 = vget_high_u16(q3u16)) {
- q0u16 = vdupq_lane_u16(d6u16, 0);
- q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
- vreinterpretq_s16_u16(q8u16));
- q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
- vreinterpretq_s16_u16(q9u16));
- q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
- vreinterpretq_s16_u16(q10u16));
- q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
- vreinterpretq_s16_u16(q11u16));
- d0u8 = vqmovun_s16(q12s16);
- d1u8 = vqmovun_s16(q13s16);
- d2u8 = vqmovun_s16(q14s16);
- d3u8 = vqmovun_s16(q15s16);
- q0u8 = vcombine_u8(d0u8, d1u8);
- q1u8 = vcombine_u8(d2u8, d3u8);
- vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8));
- vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8));
- dst += y_stride;
-
- q0u16 = vdupq_lane_u16(d6u16, 1);
- q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
- vreinterpretq_s16_u16(q8u16));
- q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
- vreinterpretq_s16_u16(q9u16));
- q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
- vreinterpretq_s16_u16(q10u16));
- q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
- vreinterpretq_s16_u16(q11u16));
- d0u8 = vqmovun_s16(q12s16);
- d1u8 = vqmovun_s16(q13s16);
- d2u8 = vqmovun_s16(q14s16);
- d3u8 = vqmovun_s16(q15s16);
- q0u8 = vcombine_u8(d0u8, d1u8);
- q1u8 = vcombine_u8(d2u8, d3u8);
- vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8));
- vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8));
- dst += y_stride;
-
- q0u16 = vdupq_lane_u16(d6u16, 2);
- q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
- vreinterpretq_s16_u16(q8u16));
- q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
- vreinterpretq_s16_u16(q9u16));
- q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
- vreinterpretq_s16_u16(q10u16));
- q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
- vreinterpretq_s16_u16(q11u16));
- d0u8 = vqmovun_s16(q12s16);
- d1u8 = vqmovun_s16(q13s16);
- d2u8 = vqmovun_s16(q14s16);
- d3u8 = vqmovun_s16(q15s16);
- q0u8 = vcombine_u8(d0u8, d1u8);
- q1u8 = vcombine_u8(d2u8, d3u8);
- vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8));
- vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8));
- dst += y_stride;
-
- q0u16 = vdupq_lane_u16(d6u16, 3);
- q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
- vreinterpretq_s16_u16(q8u16));
- q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
- vreinterpretq_s16_u16(q9u16));
- q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
- vreinterpretq_s16_u16(q10u16));
- q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
- vreinterpretq_s16_u16(q11u16));
- d0u8 = vqmovun_s16(q12s16);
- d1u8 = vqmovun_s16(q13s16);
- d2u8 = vqmovun_s16(q14s16);
- d3u8 = vqmovun_s16(q15s16);
- q0u8 = vcombine_u8(d0u8, d1u8);
- q1u8 = vcombine_u8(d2u8, d3u8);
- vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8));
- vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8));
- dst += y_stride;
- }
+ }
+}
+
+void vp9_tm_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ int j, k;
+ uint16x8_t q0u16, q3u16, q8u16, q9u16, q10u16, q11u16;
+ uint8x16_t q0u8, q1u8, q2u8;
+ int16x8_t q12s16, q13s16, q14s16, q15s16;
+ uint16x4_t d6u16;
+ uint8x8_t d0u8, d1u8, d2u8, d3u8, d26u8;
+
+ q0u8 = vld1q_dup_u8(above - 1);
+ q1u8 = vld1q_u8(above);
+ q2u8 = vld1q_u8(above + 16);
+ q8u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q0u8));
+ q9u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q0u8));
+ q10u16 = vsubl_u8(vget_low_u8(q2u8), vget_low_u8(q0u8));
+ q11u16 = vsubl_u8(vget_high_u8(q2u8), vget_high_u8(q0u8));
+ for (k = 0; k < 4; k++, left += 8) {
+ d26u8 = vld1_u8(left);
+ q3u16 = vmovl_u8(d26u8);
+ d6u16 = vget_low_u16(q3u16);
+ for (j = 0; j < 2; j++, d6u16 = vget_high_u16(q3u16)) {
+ q0u16 = vdupq_lane_u16(d6u16, 0);
+ q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+ vreinterpretq_s16_u16(q8u16));
+ q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+ vreinterpretq_s16_u16(q9u16));
+ q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+ vreinterpretq_s16_u16(q10u16));
+ q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+ vreinterpretq_s16_u16(q11u16));
+ d0u8 = vqmovun_s16(q12s16);
+ d1u8 = vqmovun_s16(q13s16);
+ d2u8 = vqmovun_s16(q14s16);
+ d3u8 = vqmovun_s16(q15s16);
+ q0u8 = vcombine_u8(d0u8, d1u8);
+ q1u8 = vcombine_u8(d2u8, d3u8);
+ vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8));
+ vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8));
+ dst += stride;
+
+ q0u16 = vdupq_lane_u16(d6u16, 1);
+ q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+ vreinterpretq_s16_u16(q8u16));
+ q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+ vreinterpretq_s16_u16(q9u16));
+ q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+ vreinterpretq_s16_u16(q10u16));
+ q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+ vreinterpretq_s16_u16(q11u16));
+ d0u8 = vqmovun_s16(q12s16);
+ d1u8 = vqmovun_s16(q13s16);
+ d2u8 = vqmovun_s16(q14s16);
+ d3u8 = vqmovun_s16(q15s16);
+ q0u8 = vcombine_u8(d0u8, d1u8);
+ q1u8 = vcombine_u8(d2u8, d3u8);
+ vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8));
+ vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8));
+ dst += stride;
+
+ q0u16 = vdupq_lane_u16(d6u16, 2);
+ q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+ vreinterpretq_s16_u16(q8u16));
+ q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+ vreinterpretq_s16_u16(q9u16));
+ q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+ vreinterpretq_s16_u16(q10u16));
+ q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+ vreinterpretq_s16_u16(q11u16));
+ d0u8 = vqmovun_s16(q12s16);
+ d1u8 = vqmovun_s16(q13s16);
+ d2u8 = vqmovun_s16(q14s16);
+ d3u8 = vqmovun_s16(q15s16);
+ q0u8 = vcombine_u8(d0u8, d1u8);
+ q1u8 = vcombine_u8(d2u8, d3u8);
+ vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8));
+ vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8));
+ dst += stride;
+
+ q0u16 = vdupq_lane_u16(d6u16, 3);
+ q12s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+ vreinterpretq_s16_u16(q8u16));
+ q13s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+ vreinterpretq_s16_u16(q9u16));
+ q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+ vreinterpretq_s16_u16(q10u16));
+ q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
+ vreinterpretq_s16_u16(q11u16));
+ d0u8 = vqmovun_s16(q12s16);
+ d1u8 = vqmovun_s16(q13s16);
+ d2u8 = vqmovun_s16(q14s16);
+ d3u8 = vqmovun_s16(q15s16);
+ q0u8 = vcombine_u8(d0u8, d1u8);
+ q1u8 = vcombine_u8(d2u8, d3u8);
+ vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8));
+ vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8));
+ dst += stride;
}
- return;
+ }
}
+#endif // !HAVE_NEON_ASM
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_reconintra_neon_asm.asm b/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_reconintra_neon_asm.asm
index dc9856fa887..14f574a50e1 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_reconintra_neon_asm.asm
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_reconintra_neon_asm.asm
@@ -298,8 +298,7 @@ loop_h
|vp9_tm_predictor_4x4_neon| PROC
; Load ytop_left = above[-1];
sub r12, r2, #1
- ldrb r12, [r12]
- vdup.u8 d0, r12
+ vld1.u8 {d0[]}, [r12]
; Load above 4 pixels
vld1.32 {d2[0]}, [r2]
@@ -309,10 +308,10 @@ loop_h
; Load left row by row and compute left + (above - ytop_left)
; 1st row and 2nd row
- ldrb r12, [r3], #1
- ldrb r2, [r3], #1
- vdup.u16 q1, r12
- vdup.u16 q2, r2
+ vld1.u8 {d2[]}, [r3]!
+ vld1.u8 {d4[]}, [r3]!
+ vmovl.u8 q1, d2
+ vmovl.u8 q2, d4
vadd.s16 q1, q1, q3
vadd.s16 q2, q2, q3
vqmovun.s16 d0, q1
@@ -321,10 +320,10 @@ loop_h
vst1.32 {d1[0]}, [r0], r1
; 3rd row and 4th row
- ldrb r12, [r3], #1
- ldrb r2, [r3], #1
- vdup.u16 q1, r12
- vdup.u16 q2, r2
+ vld1.u8 {d2[]}, [r3]!
+ vld1.u8 {d4[]}, [r3]
+ vmovl.u8 q1, d2
+ vmovl.u8 q2, d4
vadd.s16 q1, q1, q3
vadd.s16 q2, q2, q3
vqmovun.s16 d0, q1
@@ -345,8 +344,7 @@ loop_h
|vp9_tm_predictor_8x8_neon| PROC
; Load ytop_left = above[-1];
sub r12, r2, #1
- ldrb r12, [r12]
- vdup.u8 d0, r12
+ vld1.8 {d0[]}, [r12]
; preload 8 left
vld1.8 {d30}, [r3]
@@ -418,8 +416,7 @@ loop_h
|vp9_tm_predictor_16x16_neon| PROC
; Load ytop_left = above[-1];
sub r12, r2, #1
- ldrb r12, [r12]
- vdup.u8 q0, r12
+ vld1.8 {d0[]}, [r12]
; Load above 8 pixels
vld1.8 {q1}, [r2]
@@ -429,7 +426,7 @@ loop_h
; Compute above - ytop_left
vsubl.u8 q2, d2, d0
- vsubl.u8 q3, d3, d1
+ vsubl.u8 q3, d3, d0
vmovl.u8 q10, d18
@@ -512,8 +509,7 @@ loop_16x16_neon
|vp9_tm_predictor_32x32_neon| PROC
; Load ytop_left = above[-1];
sub r12, r2, #1
- ldrb r12, [r12]
- vdup.u8 q0, r12
+ vld1.8 {d0[]}, [r12]
; Load above 32 pixels
vld1.8 {q1}, [r2]!
@@ -524,9 +520,9 @@ loop_16x16_neon
; Compute above - ytop_left
vsubl.u8 q8, d2, d0
- vsubl.u8 q9, d3, d1
+ vsubl.u8 q9, d3, d0
vsubl.u8 q10, d4, d0
- vsubl.u8 q11, d5, d1
+ vsubl.u8 q11, d5, d0
vmovl.u8 q3, d26
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c
index 19c582fd109..202d9138199 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c
@@ -13,6 +13,7 @@
#include "./vpx_config.h"
#include "./vp9_rtcd.h"
+#include "vpx_ports/mem.h"
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_idct.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c
index 132d88ce5f7..7ceebb6d88c 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans32_cols_dspr2.c
@@ -12,6 +12,7 @@
#include "./vpx_config.h"
#include "./vp9_rtcd.h"
+#include "vpx_ports/mem.h"
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_idct.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c
index 1990348b83a..280190a39ba 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c
@@ -13,6 +13,7 @@
#include "./vpx_config.h"
#include "./vp9_rtcd.h"
+#include "vpx_ports/mem.h"
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_idct.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c
index fc44ffa311d..04d226663f9 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c
@@ -13,6 +13,7 @@
#include "./vpx_config.h"
#include "./vp9_rtcd.h"
+#include "vpx_ports/mem.h"
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_idct.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h
index 008cf8cacd9..675db654ab9 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/dspr2/vp9_loopfilter_filters_dspr2.h
@@ -14,6 +14,7 @@
#include <stdlib.h>
#include "./vp9_rtcd.h"
+#include "vpx_ports/mem.h"
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_onyxc_int.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_avg_horiz_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_avg_horiz_msa.c
new file mode 100644
index 00000000000..89364cb95cb
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_avg_horiz_msa.c
@@ -0,0 +1,782 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/mips/msa/vp9_convolve_msa.h"
+
+static void common_hz_8t_and_aver_dst_4x4_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter) {
+ v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+ v16u8 dst0, dst1, dst2, dst3, res2, res3;
+ v16u8 mask0, mask1, mask2, mask3;
+ v8i16 filt, res0, res1;
+
+ mask0 = LD_UB(&mc_filt_mask_arr[16]);
+ src -= 3;
+
+ /* rearranging filter */
+ filt = LD_SH(filter);
+ SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+ mask1 = mask0 + 2;
+ mask2 = mask0 + 4;
+ mask3 = mask0 + 6;
+
+ LD_SB4(src, src_stride, src0, src1, src2, src3);
+ XORI_B4_128_SB(src0, src1, src2, src3);
+ HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3,
+ filt0, filt1, filt2, filt3, res0, res1);
+ LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+ SRARI_H2_SH(res0, res1, FILTER_BITS);
+ SAT_SH2_SH(res0, res1, 7);
+ PCKEV_B2_UB(res0, res0, res1, res1, res2, res3);
+ ILVR_W2_UB(dst1, dst0, dst3, dst2, dst0, dst2);
+ XORI_B2_128_UB(res2, res3);
+ AVER_UB2_UB(res2, dst0, res3, dst2, res2, res3);
+ ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride);
+}
+
+static void common_hz_8t_and_aver_dst_4x8_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter) {
+ v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+ v16u8 mask0, mask1, mask2, mask3, res0, res1, res2, res3;
+ v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+ v8i16 filt, vec0, vec1, vec2, vec3;
+
+ mask0 = LD_UB(&mc_filt_mask_arr[16]);
+ src -= 3;
+
+ /* rearranging filter */
+ filt = LD_SH(filter);
+ SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+ mask1 = mask0 + 2;
+ mask2 = mask0 + 4;
+ mask3 = mask0 + 6;
+
+ LD_SB4(src, src_stride, src0, src1, src2, src3);
+ XORI_B4_128_SB(src0, src1, src2, src3);
+ src += (4 * src_stride);
+ LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
+ HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3,
+ filt0, filt1, filt2, filt3, vec0, vec1);
+ LD_SB4(src, src_stride, src0, src1, src2, src3);
+ XORI_B4_128_SB(src0, src1, src2, src3);
+ HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3,
+ filt0, filt1, filt2, filt3, vec2, vec3);
+ SRARI_H4_SH(vec0, vec1, vec2, vec3, FILTER_BITS);
+ SAT_SH4_SH(vec0, vec1, vec2, vec3, 7);
+ PCKEV_B4_UB(vec0, vec0, vec1, vec1, vec2, vec2, vec3, vec3, res0, res1, res2,
+ res3);
+ ILVR_D2_UB(res1, res0, res3, res2, res0, res2);
+ XORI_B2_128_UB(res0, res2);
+ ILVR_W4_UB(dst1, dst0, dst3, dst2, dst5, dst4, dst7, dst6, dst0, dst2, dst4,
+ dst6);
+ ILVR_D2_UB(dst2, dst0, dst6, dst4, dst0, dst4);
+ AVER_UB2_UB(res0, dst0, res2, dst4, res0, res2);
+ ST4x8_UB(res0, res2, dst, dst_stride);
+}
+
+static void common_hz_8t_and_aver_dst_4w_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter,
+ int32_t height) {
+ if (4 == height) {
+ common_hz_8t_and_aver_dst_4x4_msa(src, src_stride, dst, dst_stride, filter);
+ } else if (8 == height) {
+ common_hz_8t_and_aver_dst_4x8_msa(src, src_stride, dst, dst_stride, filter);
+ }
+}
+
+static void common_hz_8t_and_aver_dst_8w_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter,
+ int32_t height) {
+ int32_t loop_cnt;
+ v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+ v16u8 mask0, mask1, mask2, mask3, dst0, dst1, dst2, dst3;
+ v8i16 filt, out0, out1, out2, out3;
+
+ mask0 = LD_UB(&mc_filt_mask_arr[0]);
+ src -= 3;
+
+ /* rearranging filter */
+ filt = LD_SH(filter);
+ SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+ mask1 = mask0 + 2;
+ mask2 = mask0 + 4;
+ mask3 = mask0 + 6;
+
+ for (loop_cnt = (height >> 2); loop_cnt--;) {
+ LD_SB4(src, src_stride, src0, src1, src2, src3);
+ XORI_B4_128_SB(src0, src1, src2, src3);
+ src += (4 * src_stride);
+ HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
+ mask3, filt0, filt1, filt2, filt3, out0, out1,
+ out2, out3);
+ LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+ SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
+ SAT_SH4_SH(out0, out1, out2, out3, 7);
+ CONVERT_UB_AVG_ST8x4_UB(out0, out1, out2, out3, dst0, dst1, dst2, dst3,
+ dst, dst_stride);
+ dst += (4 * dst_stride);
+ }
+}
+
+static void common_hz_8t_and_aver_dst_16w_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter,
+ int32_t height) {
+ int32_t loop_cnt;
+ v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+ v16u8 mask0, mask1, mask2, mask3, dst0, dst1;
+ v8i16 filt, out0, out1, out2, out3;
+ v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+ v8i16 vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15;
+
+ mask0 = LD_UB(&mc_filt_mask_arr[0]);
+ src -= 3;
+
+ /* rearranging filter */
+ filt = LD_SH(filter);
+ SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+ mask1 = mask0 + 2;
+ mask2 = mask0 + 4;
+ mask3 = mask0 + 6;
+
+ for (loop_cnt = height >> 1; loop_cnt--;) {
+ LD_SB2(src, src_stride, src0, src2);
+ LD_SB2(src + 8, src_stride, src1, src3);
+ src += (2 * src_stride);
+
+ XORI_B4_128_SB(src0, src1, src2, src3);
+ VSHF_B4_SH(src0, src0, mask0, mask1, mask2, mask3, vec0, vec4, vec8, vec12);
+ VSHF_B4_SH(src1, src1, mask0, mask1, mask2, mask3, vec1, vec5, vec9, vec13);
+ VSHF_B4_SH(src2, src2, mask0, mask1, mask2, mask3, vec2, vec6, vec10,
+ vec14);
+ VSHF_B4_SH(src3, src3, mask0, mask1, mask2, mask3, vec3, vec7, vec11,
+ vec15);
+ DOTP_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
+ vec2, vec3);
+ DOTP_SB4_SH(vec8, vec9, vec10, vec11, filt2, filt2, filt2, filt2, vec8,
+ vec9, vec10, vec11);
+ DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt1, filt1, filt1, filt1, vec0, vec1,
+ vec2, vec3);
+ DPADD_SB4_SH(vec12, vec13, vec14, vec15, filt3, filt3, filt3, filt3, vec8,
+ vec9, vec10, vec11);
+ ADDS_SH4_SH(vec0, vec8, vec1, vec9, vec2, vec10, vec3, vec11, out0, out1,
+ out2, out3);
+ LD_UB2(dst, dst_stride, dst0, dst1);
+ SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
+ SAT_SH4_SH(out0, out1, out2, out3, 7);
+ PCKEV_XORI128_AVG_ST_UB(out1, out0, dst0, dst);
+ dst += dst_stride;
+ PCKEV_XORI128_AVG_ST_UB(out3, out2, dst1, dst);
+ dst += dst_stride;
+ }
+}
+
+static void common_hz_8t_and_aver_dst_32w_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter,
+ int32_t height) {
+ uint32_t loop_cnt;
+ v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+ v16u8 dst1, dst2, mask0, mask1, mask2, mask3;
+ v8i16 filt, out0, out1, out2, out3;
+ v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+ v8i16 vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15;
+
+ mask0 = LD_UB(&mc_filt_mask_arr[0]);
+ src -= 3;
+
+ /* rearranging filter */
+ filt = LD_SH(filter);
+ SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+ mask1 = mask0 + 2;
+ mask2 = mask0 + 4;
+ mask3 = mask0 + 6;
+
+ for (loop_cnt = height; loop_cnt--;) {
+ src0 = LD_SB(src);
+ src2 = LD_SB(src + 16);
+ src3 = LD_SB(src + 24);
+ src1 = __msa_sldi_b(src2, src0, 8);
+ src += src_stride;
+
+ XORI_B4_128_SB(src0, src1, src2, src3);
+ VSHF_B4_SH(src0, src0, mask0, mask1, mask2, mask3, vec0, vec4, vec8, vec12);
+ VSHF_B4_SH(src1, src1, mask0, mask1, mask2, mask3, vec1, vec5, vec9, vec13);
+ VSHF_B4_SH(src2, src2, mask0, mask1, mask2, mask3, vec2, vec6, vec10,
+ vec14);
+ VSHF_B4_SH(src3, src3, mask0, mask1, mask2, mask3, vec3, vec7, vec11,
+ vec15);
+ DOTP_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
+ vec2, vec3);
+ DOTP_SB4_SH(vec8, vec9, vec10, vec11, filt2, filt2, filt2, filt2, vec8,
+ vec9, vec10, vec11);
+ DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt1, filt1, filt1, filt1, vec0, vec1,
+ vec2, vec3);
+ DPADD_SB4_SH(vec12, vec13, vec14, vec15, filt3, filt3, filt3, filt3, vec8,
+ vec9, vec10, vec11);
+ ADDS_SH4_SH(vec0, vec8, vec1, vec9, vec2, vec10, vec3, vec11, out0, out1,
+ out2, out3);
+ SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
+ SAT_SH4_SH(out0, out1, out2, out3, 7);
+ LD_UB2(dst, 16, dst1, dst2);
+ PCKEV_XORI128_AVG_ST_UB(out1, out0, dst1, dst);
+ PCKEV_XORI128_AVG_ST_UB(out3, out2, dst2, dst + 16);
+ dst += dst_stride;
+ }
+}
+
+static void common_hz_8t_and_aver_dst_64w_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter,
+ int32_t height) {
+ uint32_t loop_cnt, cnt;
+ v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+ v16u8 dst1, dst2, mask0, mask1, mask2, mask3;
+ v8i16 filt, out0, out1, out2, out3;
+ v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+ v8i16 vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15;
+
+ mask0 = LD_UB(&mc_filt_mask_arr[0]);
+ src -= 3;
+
+ /* rearranging filter */
+ filt = LD_SH(filter);
+ SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+ mask1 = mask0 + 2;
+ mask2 = mask0 + 4;
+ mask3 = mask0 + 6;
+
+ for (loop_cnt = height; loop_cnt--;) {
+ for (cnt = 0; cnt < 2; ++cnt) {
+ src0 = LD_SB(&src[cnt << 5]);
+ src2 = LD_SB(&src[16 + (cnt << 5)]);
+ src3 = LD_SB(&src[24 + (cnt << 5)]);
+ src1 = __msa_sldi_b(src2, src0, 8);
+
+ XORI_B4_128_SB(src0, src1, src2, src3);
+ VSHF_B4_SH(src0, src0, mask0, mask1, mask2, mask3, vec0, vec4, vec8,
+ vec12);
+ VSHF_B4_SH(src1, src1, mask0, mask1, mask2, mask3, vec1, vec5, vec9,
+ vec13);
+ VSHF_B4_SH(src2, src2, mask0, mask1, mask2, mask3, vec2, vec6, vec10,
+ vec14);
+ VSHF_B4_SH(src3, src3, mask0, mask1, mask2, mask3, vec3, vec7, vec11,
+ vec15);
+ DOTP_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0,
+ vec1, vec2, vec3);
+ DOTP_SB4_SH(vec8, vec9, vec10, vec11, filt2, filt2, filt2, filt2, vec8,
+ vec9, vec10, vec11);
+ DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt1, filt1, filt1, filt1, vec0,
+ vec1, vec2, vec3);
+ DPADD_SB4_SH(vec12, vec13, vec14, vec15, filt3, filt3, filt3, filt3, vec8,
+ vec9, vec10, vec11);
+ ADDS_SH4_SH(vec0, vec8, vec1, vec9, vec2, vec10, vec3, vec11, out0, out1,
+ out2, out3);
+ SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
+ SAT_SH4_SH(out0, out1, out2, out3, 7);
+ LD_UB2(&dst[cnt << 5], 16, dst1, dst2);
+ PCKEV_XORI128_AVG_ST_UB(out1, out0, dst1, &dst[cnt << 5]);
+ PCKEV_XORI128_AVG_ST_UB(out3, out2, dst2, &dst[16 + (cnt << 5)]);
+ }
+
+ src += src_stride;
+ dst += dst_stride;
+ }
+}
+
+static void common_hz_2t_and_aver_dst_4x4_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter) {
+ v16i8 src0, src1, src2, src3, mask;
+ v16u8 filt0, dst0, dst1, dst2, dst3, vec0, vec1, res0, res1;
+ v8u16 vec2, vec3, const255, filt;
+
+ mask = LD_SB(&mc_filt_mask_arr[16]);
+
+ /* rearranging filter */
+ filt = LD_UH(filter);
+ filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+ const255 = (v8u16)__msa_ldi_h(255);
+
+ LD_SB4(src, src_stride, src0, src1, src2, src3);
+ LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+ VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1);
+ DOTP_UB2_UH(vec0, vec1, filt0, filt0, vec2, vec3);
+ SRARI_H2_UH(vec2, vec3, FILTER_BITS);
+ MIN_UH2_UH(vec2, vec3, const255);
+ PCKEV_B2_UB(vec2, vec2, vec3, vec3, res0, res1);
+ ILVR_W2_UB(dst1, dst0, dst3, dst2, dst0, dst2);
+ AVER_UB2_UB(res0, dst0, res1, dst2, res0, res1);
+ ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
+}
+
+static void common_hz_2t_and_aver_dst_4x8_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter) {
+ v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
+ v16u8 filt0, vec0, vec1, vec2, vec3, res0, res1, res2, res3;
+ v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+ v8u16 vec4, vec5, vec6, vec7, const255, filt;
+
+ mask = LD_SB(&mc_filt_mask_arr[16]);
+
+ /* rearranging filter */
+ filt = LD_UH(filter);
+ filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+ const255 = (v8u16)__msa_ldi_h(255);
+
+ LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+ LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
+ VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1);
+ VSHF_B2_UB(src4, src5, src6, src7, mask, mask, vec2, vec3);
+ DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec4, vec5,
+ vec6, vec7);
+ SRARI_H4_UH(vec4, vec5, vec6, vec7, FILTER_BITS);
+ MIN_UH4_UH(vec4, vec5, vec6, vec7, const255);
+ PCKEV_B4_UB(vec4, vec4, vec5, vec5, vec6, vec6, vec7, vec7, res0, res1, res2,
+ res3);
+ ILVR_W4_UB(dst1, dst0, dst3, dst2, dst5, dst4, dst7, dst6, dst0, dst2, dst4,
+ dst6);
+ AVER_UB4_UB(res0, dst0, res1, dst2, res2, dst4, res3, dst6, res0, res1, res2,
+ res3);
+ ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
+ dst += (4 * dst_stride);
+ ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride);
+}
+
+static void common_hz_2t_and_aver_dst_4w_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter,
+ int32_t height) {
+ if (4 == height) {
+ common_hz_2t_and_aver_dst_4x4_msa(src, src_stride, dst, dst_stride, filter);
+ } else if (8 == height) {
+ common_hz_2t_and_aver_dst_4x8_msa(src, src_stride, dst, dst_stride, filter);
+ }
+}
+
+static void common_hz_2t_and_aver_dst_8x4_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter) {
+ v16i8 src0, src1, src2, src3, mask;
+ v16u8 filt0, dst0, dst1, dst2, dst3;
+ v8u16 vec0, vec1, vec2, vec3, const255, filt;
+
+ mask = LD_SB(&mc_filt_mask_arr[0]);
+
+ /* rearranging filter */
+ filt = LD_UH(filter);
+ filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+ const255 = (v8u16)__msa_ldi_h(255);
+
+ LD_SB4(src, src_stride, src0, src1, src2, src3);
+ VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+ VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+ DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
+ vec2, vec3);
+ SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
+ LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+ MIN_UH4_UH(vec0, vec1, vec2, vec3, const255);
+ PCKEV_AVG_ST8x4_UB(vec0, dst0, vec1, dst1, vec2, dst2, vec3, dst3,
+ dst, dst_stride);
+}
+
+static void common_hz_2t_and_aver_dst_8x8mult_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter,
+ int32_t height) {
+ v16i8 src0, src1, src2, src3, mask;
+ v16u8 filt0, dst0, dst1, dst2, dst3;
+ v8u16 vec0, vec1, vec2, vec3, const255, filt;
+
+ mask = LD_SB(&mc_filt_mask_arr[0]);
+
+ /* rearranging filter */
+ filt = LD_UH(filter);
+ filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+ const255 = (v8u16)__msa_ldi_h(255);
+
+ LD_SB4(src, src_stride, src0, src1, src2, src3);
+ src += (4 * src_stride);
+ VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+ VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+ DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
+ vec2, vec3);
+ SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
+ LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+ MIN_UH4_UH(vec0, vec1, vec2, vec3, const255);
+ LD_SB4(src, src_stride, src0, src1, src2, src3);
+ src += (4 * src_stride);
+ PCKEV_AVG_ST8x4_UB(vec0, dst0, vec1, dst1, vec2, dst2, vec3, dst3,
+ dst, dst_stride);
+ dst += (4 * dst_stride);
+
+ VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+ VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+ DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
+ vec2, vec3);
+ SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
+ LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+ MIN_UH4_UH(vec0, vec1, vec2, vec3, const255);
+ PCKEV_AVG_ST8x4_UB(vec0, dst0, vec1, dst1, vec2, dst2, vec3, dst3,
+ dst, dst_stride);
+ dst += (4 * dst_stride);
+
+ if (16 == height) {
+ LD_SB4(src, src_stride, src0, src1, src2, src3);
+ src += (4 * src_stride);
+
+ VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+ VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+ DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
+ vec2, vec3);
+ SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
+ LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+ MIN_UH4_UH(vec0, vec1, vec2, vec3, const255);
+ LD_SB4(src, src_stride, src0, src1, src2, src3);
+ PCKEV_AVG_ST8x4_UB(vec0, dst0, vec1, dst1, vec2, dst2, vec3, dst3,
+ dst, dst_stride);
+ dst += (4 * dst_stride);
+
+ VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+ VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+ DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
+ vec2, vec3);
+ SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
+ LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+ MIN_UH4_UH(vec0, vec1, vec2, vec3, const255);
+ PCKEV_AVG_ST8x4_UB(vec0, dst0, vec1, dst1, vec2, dst2, vec3, dst3,
+ dst, dst_stride);
+ }
+}
+
+static void common_hz_2t_and_aver_dst_8w_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter,
+ int32_t height) {
+ if (4 == height) {
+ common_hz_2t_and_aver_dst_8x4_msa(src, src_stride, dst, dst_stride, filter);
+ } else {
+ common_hz_2t_and_aver_dst_8x8mult_msa(src, src_stride, dst, dst_stride,
+ filter, height);
+ }
+}
+
+static void common_hz_2t_and_aver_dst_16w_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter,
+ int32_t height) {
+ uint32_t loop_cnt;
+ v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
+ v16u8 filt0, dst0, dst1, dst2, dst3;
+ v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+ v8u16 res0, res1, res2, res3, res4, res5, res6, res7, const255, filt;
+
+ mask = LD_SB(&mc_filt_mask_arr[0]);
+
+ /* rearranging filter */
+ filt = LD_UH(filter);
+ filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+ const255 = (v8u16)__msa_ldi_h(255);
+
+ LD_SB4(src, src_stride, src0, src2, src4, src6);
+ LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
+ src += (4 * src_stride);
+
+ VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
+ VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
+ VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
+ VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
+ DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, res0, res1,
+ res2, res3);
+ DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, res4, res5,
+ res6, res7);
+ SRARI_H4_UH(res0, res1, res2, res3, FILTER_BITS);
+ SRARI_H4_UH(res4, res5, res6, res7, FILTER_BITS);
+ LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+ MIN_UH4_UH(res0, res1, res2, res3, const255);
+ MIN_UH4_UH(res4, res5, res6, res7, const255);
+ PCKEV_AVG_ST_UB(res1, res0, dst0, dst);
+ dst += dst_stride;
+ PCKEV_AVG_ST_UB(res3, res2, dst1, dst);
+ dst += dst_stride;
+ PCKEV_AVG_ST_UB(res5, res4, dst2, dst);
+ dst += dst_stride;
+ PCKEV_AVG_ST_UB(res7, res6, dst3, dst);
+ dst += dst_stride;
+
+ for (loop_cnt = (height >> 2) - 1; loop_cnt--;) {
+ LD_SB4(src, src_stride, src0, src2, src4, src6);
+ LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
+ src += (4 * src_stride);
+
+ VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
+ VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
+ VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
+ VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
+ DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, res0, res1,
+ res2, res3);
+ DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, res4, res5,
+ res6, res7);
+ SRARI_H4_UH(res0, res1, res2, res3, FILTER_BITS);
+ SRARI_H4_UH(res4, res5, res6, res7, FILTER_BITS);
+ LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+ MIN_UH4_UH(res0, res1, res2, res3, const255);
+ MIN_UH4_UH(res4, res5, res6, res7, const255);
+ PCKEV_AVG_ST_UB(res1, res0, dst0, dst);
+ dst += dst_stride;
+ PCKEV_AVG_ST_UB(res3, res2, dst1, dst);
+ dst += dst_stride;
+ PCKEV_AVG_ST_UB(res5, res4, dst2, dst);
+ dst += dst_stride;
+ PCKEV_AVG_ST_UB(res7, res6, dst3, dst);
+ dst += dst_stride;
+ }
+}
+
+static void common_hz_2t_and_aver_dst_32w_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter,
+ int32_t height) {
+ uint32_t loop_cnt;
+ v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
+ v16u8 filt0, dst0, dst1, dst2, dst3;
+ v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+ v8u16 res0, res1, res2, res3, res4, res5, res6, res7, const255, filt;
+
+ mask = LD_SB(&mc_filt_mask_arr[0]);
+
+ /* rearranging filter */
+ filt = LD_UH(filter);
+ filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+ const255 = (v8u16)__msa_ldi_h(255);
+
+ for (loop_cnt = (height >> 1); loop_cnt--;) {
+ src0 = LD_SB(src);
+ src2 = LD_SB(src + 16);
+ src3 = LD_SB(src + 24);
+ src1 = __msa_sldi_b(src2, src0, 8);
+ src += src_stride;
+ src4 = LD_SB(src);
+ src6 = LD_SB(src + 16);
+ src7 = LD_SB(src + 24);
+ src5 = __msa_sldi_b(src6, src4, 8);
+ src += src_stride;
+
+ VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
+ VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
+ VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
+ VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
+ DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, res0, res1,
+ res2, res3);
+ DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, res4, res5,
+ res6, res7);
+ SRARI_H4_UH(res0, res1, res2, res3, FILTER_BITS);
+ SRARI_H4_UH(res4, res5, res6, res7, FILTER_BITS);
+ MIN_UH4_UH(res0, res1, res2, res3, const255);
+ MIN_UH4_UH(res4, res5, res6, res7, const255);
+ LD_UB2(dst, 16, dst0, dst1);
+ PCKEV_AVG_ST_UB(res1, res0, dst0, dst);
+ PCKEV_AVG_ST_UB(res3, res2, dst1, (dst + 16));
+ dst += dst_stride;
+ LD_UB2(dst, 16, dst2, dst3);
+ PCKEV_AVG_ST_UB(res5, res4, dst2, dst);
+ PCKEV_AVG_ST_UB(res7, res6, dst3, (dst + 16));
+ dst += dst_stride;
+ }
+}
+
+static void common_hz_2t_and_aver_dst_64w_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter,
+ int32_t height) {
+ uint32_t loop_cnt;
+ v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
+ v16u8 filt0, dst0, dst1, dst2, dst3;
+ v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+ v8u16 out0, out1, out2, out3, out4, out5, out6, out7, const255, filt;
+
+ mask = LD_SB(&mc_filt_mask_arr[0]);
+
+ /* rearranging filter */
+ filt = LD_UH(filter);
+ filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+ const255 = (v8u16)__msa_ldi_h(255);
+
+ for (loop_cnt = height; loop_cnt--;) {
+ LD_SB4(src, 16, src0, src2, src4, src6);
+ src7 = LD_SB(src + 56);
+ SLDI_B3_SB(src2, src4, src6, src0, src2, src4, src1, src3, src5, 8);
+ src += src_stride;
+
+ VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
+ VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
+ VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
+ VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
+ DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, out0, out1,
+ out2, out3);
+ DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, out4, out5,
+ out6, out7);
+ SRARI_H4_UH(out0, out1, out2, out3, FILTER_BITS);
+ SRARI_H4_UH(out4, out5, out6, out7, FILTER_BITS);
+ LD_UB4(dst, 16, dst0, dst1, dst2, dst3);
+ MIN_UH4_UH(out0, out1, out2, out3, const255);
+ MIN_UH4_UH(out4, out5, out6, out7, const255);
+ PCKEV_AVG_ST_UB(out1, out0, dst0, dst);
+ PCKEV_AVG_ST_UB(out3, out2, dst1, dst + 16);
+ PCKEV_AVG_ST_UB(out5, out4, dst2, dst + 32);
+ PCKEV_AVG_ST_UB(out7, out6, dst3, dst + 48);
+ dst += dst_stride;
+ }
+}
+
+void vp9_convolve8_avg_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
+ uint8_t *dst, ptrdiff_t dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ int8_t cnt, filt_hor[8];
+
+ if (16 != x_step_q4) {
+ vp9_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h);
+ return;
+ }
+
+ if (((const int32_t *)filter_x)[1] == 0x800000) {
+ vp9_convolve_avg(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h);
+ return;
+ }
+
+ for (cnt = 0; cnt < 8; ++cnt) {
+ filt_hor[cnt] = filter_x[cnt];
+ }
+
+ if (((const int32_t *)filter_x)[0] == 0) {
+ switch (w) {
+ case 4:
+ common_hz_2t_and_aver_dst_4w_msa(src, (int32_t)src_stride,
+ dst, (int32_t)dst_stride,
+ &filt_hor[3], h);
+ break;
+ case 8:
+ common_hz_2t_and_aver_dst_8w_msa(src, (int32_t)src_stride,
+ dst, (int32_t)dst_stride,
+ &filt_hor[3], h);
+ break;
+ case 16:
+ common_hz_2t_and_aver_dst_16w_msa(src, (int32_t)src_stride,
+ dst, (int32_t)dst_stride,
+ &filt_hor[3], h);
+ break;
+ case 32:
+ common_hz_2t_and_aver_dst_32w_msa(src, (int32_t)src_stride,
+ dst, (int32_t)dst_stride,
+ &filt_hor[3], h);
+ break;
+ case 64:
+ common_hz_2t_and_aver_dst_64w_msa(src, (int32_t)src_stride,
+ dst, (int32_t)dst_stride,
+ &filt_hor[3], h);
+ break;
+ default:
+ vp9_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h);
+ break;
+ }
+ } else {
+ switch (w) {
+ case 4:
+ common_hz_8t_and_aver_dst_4w_msa(src, (int32_t)src_stride,
+ dst, (int32_t)dst_stride,
+ filt_hor, h);
+ break;
+ case 8:
+ common_hz_8t_and_aver_dst_8w_msa(src, (int32_t)src_stride,
+ dst, (int32_t)dst_stride,
+ filt_hor, h);
+ break;
+ case 16:
+ common_hz_8t_and_aver_dst_16w_msa(src, (int32_t)src_stride,
+ dst, (int32_t)dst_stride,
+ filt_hor, h);
+ break;
+ case 32:
+ common_hz_8t_and_aver_dst_32w_msa(src, (int32_t)src_stride,
+ dst, (int32_t)dst_stride,
+ filt_hor, h);
+ break;
+ case 64:
+ common_hz_8t_and_aver_dst_64w_msa(src, (int32_t)src_stride,
+ dst, (int32_t)dst_stride,
+ filt_hor, h);
+ break;
+ default:
+ vp9_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h);
+ break;
+ }
+ }
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_avg_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_avg_msa.c
new file mode 100644
index 00000000000..e9f3a9dc3f2
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_avg_msa.c
@@ -0,0 +1,679 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/mips/msa/vp9_convolve_msa.h"
+
+static void common_hv_8ht_8vt_and_aver_dst_4w_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter_horiz,
+ int8_t *filter_vert,
+ int32_t height) {
+ uint32_t loop_cnt;
+ v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+ v16u8 dst0, dst1, dst2, dst3, mask0, mask1, mask2, mask3, tmp0, tmp1;
+ v16i8 filt_hz0, filt_hz1, filt_hz2, filt_hz3;
+ v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+ v8i16 hz_out7, hz_out8, hz_out9, res0, res1, vec0, vec1, vec2, vec3, vec4;
+ v8i16 filt, filt_vt0, filt_vt1, filt_vt2, filt_vt3;
+
+ mask0 = LD_UB(&mc_filt_mask_arr[16]);
+ src -= (3 + 3 * src_stride);
+
+ /* rearranging filter */
+ filt = LD_SH(filter_horiz);
+ SPLATI_H4_SB(filt, 0, 1, 2, 3, filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+
+ mask1 = mask0 + 2;
+ mask2 = mask0 + 4;
+ mask3 = mask0 + 6;
+
+ LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+ XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+ src += (7 * src_stride);
+
+ hz_out0 = HORIZ_8TAP_FILT(src0, src1, mask0, mask1, mask2, mask3, filt_hz0,
+ filt_hz1, filt_hz2, filt_hz3);
+ hz_out2 = HORIZ_8TAP_FILT(src2, src3, mask0, mask1, mask2, mask3, filt_hz0,
+ filt_hz1, filt_hz2, filt_hz3);
+ hz_out4 = HORIZ_8TAP_FILT(src4, src5, mask0, mask1, mask2, mask3, filt_hz0,
+ filt_hz1, filt_hz2, filt_hz3);
+ hz_out5 = HORIZ_8TAP_FILT(src5, src6, mask0, mask1, mask2, mask3, filt_hz0,
+ filt_hz1, filt_hz2, filt_hz3);
+ SLDI_B2_SH(hz_out2, hz_out4, hz_out0, hz_out2, hz_out1, hz_out3, 8);
+
+ filt = LD_SH(filter_vert);
+ SPLATI_H4_SH(filt, 0, 1, 2, 3, filt_vt0, filt_vt1, filt_vt2, filt_vt3);
+
+ ILVEV_B2_SH(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
+ vec2 = (v8i16)__msa_ilvev_b((v16i8)hz_out5, (v16i8)hz_out4);
+
+ for (loop_cnt = (height >> 2); loop_cnt--;) {
+ LD_SB4(src, src_stride, src7, src8, src9, src10);
+ XORI_B4_128_SB(src7, src8, src9, src10);
+ src += (4 * src_stride);
+
+ LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+ hz_out7 = HORIZ_8TAP_FILT(src7, src8, mask0, mask1, mask2, mask3,
+ filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+ hz_out6 = (v8i16)__msa_sldi_b((v16i8)hz_out7, (v16i8)hz_out5, 8);
+ vec3 = (v8i16)__msa_ilvev_b((v16i8)hz_out7, (v16i8)hz_out6);
+ res0 = FILT_8TAP_DPADD_S_H(vec0, vec1, vec2, vec3, filt_vt0, filt_vt1,
+ filt_vt2, filt_vt3);
+
+ hz_out9 = HORIZ_8TAP_FILT(src9, src10, mask0, mask1, mask2, mask3,
+ filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+ hz_out8 = (v8i16)__msa_sldi_b((v16i8)hz_out9, (v16i8)hz_out7, 8);
+ vec4 = (v8i16)__msa_ilvev_b((v16i8)hz_out9, (v16i8)hz_out8);
+ res1 = FILT_8TAP_DPADD_S_H(vec1, vec2, vec3, vec4, filt_vt0, filt_vt1,
+ filt_vt2, filt_vt3);
+ ILVR_W2_UB(dst1, dst0, dst3, dst2, dst0, dst2);
+
+ SRARI_H2_SH(res0, res1, FILTER_BITS);
+ SAT_SH2_SH(res0, res1, 7);
+ PCKEV_B2_UB(res0, res0, res1, res1, tmp0, tmp1);
+ XORI_B2_128_UB(tmp0, tmp1);
+ AVER_UB2_UB(tmp0, dst0, tmp1, dst2, tmp0, tmp1);
+ ST4x4_UB(tmp0, tmp1, 0, 1, 0, 1, dst, dst_stride);
+ dst += (4 * dst_stride);
+
+ hz_out5 = hz_out9;
+ vec0 = vec2;
+ vec1 = vec3;
+ vec2 = vec4;
+ }
+}
+
+static void common_hv_8ht_8vt_and_aver_dst_8w_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter_horiz,
+ int8_t *filter_vert,
+ int32_t height) {
+ uint32_t loop_cnt;
+ v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+ v16i8 filt_hz0, filt_hz1, filt_hz2, filt_hz3;
+ v8i16 filt, filt_vt0, filt_vt1, filt_vt2, filt_vt3;
+ v16u8 dst0, dst1, dst2, dst3, mask0, mask1, mask2, mask3;
+ v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+ v8i16 hz_out7, hz_out8, hz_out9, hz_out10, tmp0, tmp1, tmp2, tmp3;
+ v8i16 out0, out1, out2, out3, out4, out5, out6, out7, out8, out9;
+
+ mask0 = LD_UB(&mc_filt_mask_arr[0]);
+ src -= (3 + 3 * src_stride);
+
+ /* rearranging filter */
+ filt = LD_SH(filter_horiz);
+ SPLATI_H4_SB(filt, 0, 1, 2, 3, filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+
+ mask1 = mask0 + 2;
+ mask2 = mask0 + 4;
+ mask3 = mask0 + 6;
+
+ LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+ src += (7 * src_stride);
+
+ XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+ hz_out0 = HORIZ_8TAP_FILT(src0, src0, mask0, mask1, mask2, mask3, filt_hz0,
+ filt_hz1, filt_hz2, filt_hz3);
+ hz_out1 = HORIZ_8TAP_FILT(src1, src1, mask0, mask1, mask2, mask3, filt_hz0,
+ filt_hz1, filt_hz2, filt_hz3);
+ hz_out2 = HORIZ_8TAP_FILT(src2, src2, mask0, mask1, mask2, mask3, filt_hz0,
+ filt_hz1, filt_hz2, filt_hz3);
+ hz_out3 = HORIZ_8TAP_FILT(src3, src3, mask0, mask1, mask2, mask3, filt_hz0,
+ filt_hz1, filt_hz2, filt_hz3);
+ hz_out4 = HORIZ_8TAP_FILT(src4, src4, mask0, mask1, mask2, mask3, filt_hz0,
+ filt_hz1, filt_hz2, filt_hz3);
+ hz_out5 = HORIZ_8TAP_FILT(src5, src5, mask0, mask1, mask2, mask3, filt_hz0,
+ filt_hz1, filt_hz2, filt_hz3);
+ hz_out6 = HORIZ_8TAP_FILT(src6, src6, mask0, mask1, mask2, mask3, filt_hz0,
+ filt_hz1, filt_hz2, filt_hz3);
+
+ filt = LD_SH(filter_vert);
+ SPLATI_H4_SH(filt, 0, 1, 2, 3, filt_vt0, filt_vt1, filt_vt2, filt_vt3);
+
+ ILVEV_B2_SH(hz_out0, hz_out1, hz_out2, hz_out3, out0, out1);
+ ILVEV_B2_SH(hz_out4, hz_out5, hz_out1, hz_out2, out2, out4);
+ ILVEV_B2_SH(hz_out3, hz_out4, hz_out5, hz_out6, out5, out6);
+
+ for (loop_cnt = (height >> 2); loop_cnt--;) {
+ LD_SB4(src, src_stride, src7, src8, src9, src10);
+ XORI_B4_128_SB(src7, src8, src9, src10);
+ src += (4 * src_stride);
+
+ LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+
+ hz_out7 = HORIZ_8TAP_FILT(src7, src7, mask0, mask1, mask2, mask3,
+ filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+ out3 = (v8i16)__msa_ilvev_b((v16i8)hz_out7, (v16i8)hz_out6);
+ tmp0 = FILT_8TAP_DPADD_S_H(out0, out1, out2, out3, filt_vt0, filt_vt1,
+ filt_vt2, filt_vt3);
+
+ hz_out8 = HORIZ_8TAP_FILT(src8, src8, mask0, mask1, mask2, mask3,
+ filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+ out7 = (v8i16)__msa_ilvev_b((v16i8)hz_out8, (v16i8)hz_out7);
+ tmp1 = FILT_8TAP_DPADD_S_H(out4, out5, out6, out7, filt_vt0, filt_vt1,
+ filt_vt2, filt_vt3);
+
+ hz_out9 = HORIZ_8TAP_FILT(src9, src9, mask0, mask1, mask2, mask3,
+ filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+ out8 = (v8i16)__msa_ilvev_b((v16i8)hz_out9, (v16i8)hz_out8);
+ tmp2 = FILT_8TAP_DPADD_S_H(out1, out2, out3, out8, filt_vt0, filt_vt1,
+ filt_vt2, filt_vt3);
+
+ hz_out10 = HORIZ_8TAP_FILT(src10, src10, mask0, mask1, mask2, mask3,
+ filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+ out9 = (v8i16)__msa_ilvev_b((v16i8)hz_out10, (v16i8)hz_out9);
+ tmp3 = FILT_8TAP_DPADD_S_H(out5, out6, out7, out9, filt_vt0, filt_vt1,
+ filt_vt2, filt_vt3);
+
+ SRARI_H4_SH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
+ SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+ CONVERT_UB_AVG_ST8x4_UB(tmp0, tmp1, tmp2, tmp3, dst0, dst1, dst2, dst3,
+ dst, dst_stride);
+ dst += (4 * dst_stride);
+
+ hz_out6 = hz_out10;
+ out0 = out2;
+ out1 = out3;
+ out2 = out8;
+ out4 = out6;
+ out5 = out7;
+ out6 = out9;
+ }
+}
+
+static void common_hv_8ht_8vt_and_aver_dst_16w_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter_horiz,
+ int8_t *filter_vert,
+ int32_t height) {
+ int32_t multiple8_cnt;
+ for (multiple8_cnt = 2; multiple8_cnt--;) {
+ common_hv_8ht_8vt_and_aver_dst_8w_msa(src, src_stride, dst, dst_stride,
+ filter_horiz, filter_vert, height);
+ src += 8;
+ dst += 8;
+ }
+}
+
+static void common_hv_8ht_8vt_and_aver_dst_32w_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter_horiz,
+ int8_t *filter_vert,
+ int32_t height) {
+ int32_t multiple8_cnt;
+ for (multiple8_cnt = 4; multiple8_cnt--;) {
+ common_hv_8ht_8vt_and_aver_dst_8w_msa(src, src_stride, dst, dst_stride,
+ filter_horiz, filter_vert, height);
+ src += 8;
+ dst += 8;
+ }
+}
+
+static void common_hv_8ht_8vt_and_aver_dst_64w_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter_horiz,
+ int8_t *filter_vert,
+ int32_t height) {
+ int32_t multiple8_cnt;
+ for (multiple8_cnt = 8; multiple8_cnt--;) {
+ common_hv_8ht_8vt_and_aver_dst_8w_msa(src, src_stride, dst, dst_stride,
+ filter_horiz, filter_vert, height);
+ src += 8;
+ dst += 8;
+ }
+}
+
+static void common_hv_2ht_2vt_and_aver_dst_4x4_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter_horiz,
+ int8_t *filter_vert) {
+ v16i8 src0, src1, src2, src3, src4, mask;
+ v16u8 filt_hz, filt_vt, vec0, vec1;
+ v16u8 dst0, dst1, dst2, dst3, res0, res1;
+ v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, tmp0, tmp1, filt;
+
+ mask = LD_SB(&mc_filt_mask_arr[16]);
+
+ /* rearranging filter */
+ filt = LD_UH(filter_horiz);
+ filt_hz = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+ filt = LD_UH(filter_vert);
+ filt_vt = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+ LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+
+ hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, FILTER_BITS);
+ hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, FILTER_BITS);
+ hz_out4 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
+ hz_out1 = (v8u16)__msa_sldi_b((v16i8)hz_out2, (v16i8)hz_out0, 8);
+ hz_out3 = (v8u16)__msa_pckod_d((v2i64)hz_out4, (v2i64)hz_out2);
+ ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
+
+ LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+ ILVR_W2_UB(dst1, dst0, dst3, dst2, dst0, dst2);
+ DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
+ SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+ SAT_UH2_UH(tmp0, tmp1, 7);
+ PCKEV_B2_UB(tmp0, tmp0, tmp1, tmp1, res0, res1);
+ AVER_UB2_UB(res0, dst0, res1, dst2, res0, res1);
+ ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
+}
+
+static void common_hv_2ht_2vt_and_aver_dst_4x8_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter_horiz,
+ int8_t *filter_vert) {
+ v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, mask;
+ v16u8 filt_hz, filt_vt, vec0, vec1, vec2, vec3, res0, res1, res2, res3;
+ v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+ v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+ v8u16 hz_out7, hz_out8, tmp0, tmp1, tmp2, tmp3;
+ v8i16 filt;
+
+ mask = LD_SB(&mc_filt_mask_arr[16]);
+
+ /* rearranging filter */
+ filt = LD_SH(filter_horiz);
+ filt_hz = (v16u8)__msa_splati_h(filt, 0);
+
+ filt = LD_SH(filter_vert);
+ filt_vt = (v16u8)__msa_splati_h(filt, 0);
+
+ LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+ src += (8 * src_stride);
+ src8 = LD_SB(src);
+
+ hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, FILTER_BITS);
+ hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, FILTER_BITS);
+ hz_out4 = HORIZ_2TAP_FILT_UH(src4, src5, mask, filt_hz, FILTER_BITS);
+ hz_out6 = HORIZ_2TAP_FILT_UH(src6, src7, mask, filt_hz, FILTER_BITS);
+ hz_out8 = HORIZ_2TAP_FILT_UH(src8, src8, mask, filt_hz, FILTER_BITS);
+ SLDI_B3_UH(hz_out2, hz_out4, hz_out6, hz_out0, hz_out2, hz_out4, hz_out1,
+ hz_out3, hz_out5, 8);
+ hz_out7 = (v8u16)__msa_pckod_d((v2i64)hz_out8, (v2i64)hz_out6);
+
+ LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
+ ILVR_W4_UB(dst1, dst0, dst3, dst2, dst5, dst4, dst7, dst6, dst0, dst2,
+ dst4, dst6);
+ ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
+ ILVEV_B2_UB(hz_out4, hz_out5, hz_out6, hz_out7, vec2, vec3);
+ DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt_vt, filt_vt, filt_vt, filt_vt,
+ tmp0, tmp1, tmp2, tmp3);
+ SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
+ SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+ PCKEV_B4_UB(tmp0, tmp0, tmp1, tmp1, tmp2, tmp2, tmp3, tmp3, res0, res1,
+ res2, res3);
+ AVER_UB4_UB(res0, dst0, res1, dst2, res2, dst4, res3, dst6, res0, res1,
+ res2, res3);
+ ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
+ dst += (4 * dst_stride);
+ ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride);
+}
+
+static void common_hv_2ht_2vt_and_aver_dst_4w_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter_horiz,
+ int8_t *filter_vert,
+ int32_t height) {
+ if (4 == height) {
+ common_hv_2ht_2vt_and_aver_dst_4x4_msa(src, src_stride, dst, dst_stride,
+ filter_horiz, filter_vert);
+ } else if (8 == height) {
+ common_hv_2ht_2vt_and_aver_dst_4x8_msa(src, src_stride, dst, dst_stride,
+ filter_horiz, filter_vert);
+ }
+}
+
+static void common_hv_2ht_2vt_and_aver_dst_8x4_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter_horiz,
+ int8_t *filter_vert) {
+ v16i8 src0, src1, src2, src3, src4, mask;
+ v16u8 filt_hz, filt_vt, dst0, dst1, dst2, dst3, vec0, vec1, vec2, vec3;
+ v8u16 hz_out0, hz_out1, tmp0, tmp1, tmp2, tmp3;
+ v8i16 filt;
+
+ mask = LD_SB(&mc_filt_mask_arr[0]);
+
+ /* rearranging filter */
+ filt = LD_SH(filter_horiz);
+ filt_hz = (v16u8)__msa_splati_h(filt, 0);
+
+ filt = LD_SH(filter_vert);
+ filt_vt = (v16u8)__msa_splati_h(filt, 0);
+
+ LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+ src += (5 * src_stride);
+
+ LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+ hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS);
+ hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
+ vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
+ tmp0 = __msa_dotp_u_h(vec0, filt_vt);
+
+ hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS);
+ vec1 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
+ tmp1 = __msa_dotp_u_h(vec1, filt_vt);
+
+ hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS);
+ vec2 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
+ tmp2 = __msa_dotp_u_h(vec2, filt_vt);
+
+ hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
+ vec3 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
+ tmp3 = __msa_dotp_u_h(vec3, filt_vt);
+
+ SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
+ SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+ PCKEV_AVG_ST8x4_UB(tmp0, dst0, tmp1, dst1, tmp2, dst2, tmp3, dst3,
+ dst, dst_stride);
+}
+
+static void common_hv_2ht_2vt_and_aver_dst_8x8mult_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter_horiz,
+ int8_t *filter_vert,
+ int32_t height) {
+ uint32_t loop_cnt;
+ v16i8 src0, src1, src2, src3, src4, mask;
+ v16u8 filt_hz, filt_vt, vec0, dst0, dst1, dst2, dst3;
+ v8u16 hz_out0, hz_out1, tmp0, tmp1, tmp2, tmp3;
+ v8i16 filt;
+
+ mask = LD_SB(&mc_filt_mask_arr[0]);
+
+ /* rearranging filter */
+ filt = LD_SH(filter_horiz);
+ filt_hz = (v16u8)__msa_splati_h(filt, 0);
+
+ filt = LD_SH(filter_vert);
+ filt_vt = (v16u8)__msa_splati_h(filt, 0);
+
+ src0 = LD_SB(src);
+ src += src_stride;
+
+ hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS);
+
+ for (loop_cnt = (height >> 2); loop_cnt--;) {
+ LD_SB4(src, src_stride, src1, src2, src3, src4);
+ src += (4 * src_stride);
+
+ hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
+ vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
+ tmp0 = __msa_dotp_u_h(vec0, filt_vt);
+
+ hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS);
+ vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
+ tmp1 = __msa_dotp_u_h(vec0, filt_vt);
+
+ SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+ SAT_UH2_UH(tmp0, tmp1, 7);
+
+ hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS);
+ vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
+ tmp2 = __msa_dotp_u_h(vec0, filt_vt);
+
+ hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
+ vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
+ tmp3 = __msa_dotp_u_h(vec0, filt_vt);
+
+ SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+ SAT_UH2_UH(tmp2, tmp3, 7);
+ LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+ PCKEV_AVG_ST8x4_UB(tmp0, dst0, tmp1, dst1, tmp2, dst2, tmp3, dst3,
+ dst, dst_stride);
+ dst += (4 * dst_stride);
+ }
+}
+
+static void common_hv_2ht_2vt_and_aver_dst_8w_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter_horiz,
+ int8_t *filter_vert,
+ int32_t height) {
+ if (4 == height) {
+ common_hv_2ht_2vt_and_aver_dst_8x4_msa(src, src_stride, dst, dst_stride,
+ filter_horiz, filter_vert);
+ } else {
+ common_hv_2ht_2vt_and_aver_dst_8x8mult_msa(src, src_stride, dst, dst_stride,
+ filter_horiz, filter_vert,
+ height);
+ }
+}
+
+static void common_hv_2ht_2vt_and_aver_dst_16w_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter_horiz,
+ int8_t *filter_vert,
+ int32_t height) {
+ uint32_t loop_cnt;
+ v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
+ v16u8 filt_hz, filt_vt, vec0, vec1, dst0, dst1, dst2, dst3;
+ v8u16 hz_out0, hz_out1, hz_out2, hz_out3, tmp0, tmp1;
+ v8i16 filt;
+
+ mask = LD_SB(&mc_filt_mask_arr[0]);
+
+ /* rearranging filter */
+ filt = LD_SH(filter_horiz);
+ filt_hz = (v16u8)__msa_splati_h(filt, 0);
+
+ filt = LD_SH(filter_vert);
+ filt_vt = (v16u8)__msa_splati_h(filt, 0);
+
+ LD_SB2(src, 8, src0, src1);
+ src += src_stride;
+
+ hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS);
+ hz_out2 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
+
+ for (loop_cnt = (height >> 2); loop_cnt--;) {
+ LD_SB4(src, src_stride, src0, src2, src4, src6);
+ LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
+ src += (4 * src_stride);
+ LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+
+ hz_out1 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS);
+ hz_out3 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
+ ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
+ DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
+ SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+ SAT_UH2_UH(tmp0, tmp1, 7);
+ PCKEV_AVG_ST_UB(tmp1, tmp0, dst0, dst);
+ dst += dst_stride;
+
+ hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS);
+ hz_out2 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS);
+ ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1);
+ DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
+ SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+ SAT_UH2_UH(tmp0, tmp1, 7);
+ PCKEV_AVG_ST_UB(tmp1, tmp0, dst1, dst);
+ dst += dst_stride;
+
+ hz_out1 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
+ hz_out3 = HORIZ_2TAP_FILT_UH(src5, src5, mask, filt_hz, FILTER_BITS);
+ ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
+ DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
+ SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+ SAT_UH2_UH(tmp0, tmp1, 7);
+ PCKEV_AVG_ST_UB(tmp1, tmp0, dst2, dst);
+ dst += dst_stride;
+
+ hz_out0 = HORIZ_2TAP_FILT_UH(src6, src6, mask, filt_hz, FILTER_BITS);
+ hz_out2 = HORIZ_2TAP_FILT_UH(src7, src7, mask, filt_hz, FILTER_BITS);
+ ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1);
+ DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
+ SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+ SAT_UH2_UH(tmp0, tmp1, 7);
+ PCKEV_AVG_ST_UB(tmp1, tmp0, dst3, dst);
+ dst += dst_stride;
+ }
+}
+
+static void common_hv_2ht_2vt_and_aver_dst_32w_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter_horiz,
+ int8_t *filter_vert,
+ int32_t height) {
+ int32_t multiple8_cnt;
+ for (multiple8_cnt = 2; multiple8_cnt--;) {
+ common_hv_2ht_2vt_and_aver_dst_16w_msa(src, src_stride, dst, dst_stride,
+ filter_horiz, filter_vert, height);
+ src += 16;
+ dst += 16;
+ }
+}
+
+static void common_hv_2ht_2vt_and_aver_dst_64w_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter_horiz,
+ int8_t *filter_vert,
+ int32_t height) {
+ int32_t multiple8_cnt;
+ for (multiple8_cnt = 4; multiple8_cnt--;) {
+ common_hv_2ht_2vt_and_aver_dst_16w_msa(src, src_stride, dst, dst_stride,
+ filter_horiz, filter_vert, height);
+ src += 16;
+ dst += 16;
+ }
+}
+
+void vp9_convolve8_avg_msa(const uint8_t *src, ptrdiff_t src_stride,
+ uint8_t *dst, ptrdiff_t dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ int8_t cnt, filt_hor[8], filt_ver[8];
+
+ if (16 != x_step_q4 || 16 != y_step_q4) {
+ vp9_convolve8_avg_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h);
+ return;
+ }
+
+ if (((const int32_t *)filter_x)[1] == 0x800000 &&
+ ((const int32_t *)filter_y)[1] == 0x800000) {
+ vp9_convolve_avg(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h);
+ return;
+ }
+
+ for (cnt = 0; cnt < 8; ++cnt) {
+ filt_hor[cnt] = filter_x[cnt];
+ filt_ver[cnt] = filter_y[cnt];
+ }
+
+ if (((const int32_t *)filter_x)[0] == 0 &&
+ ((const int32_t *)filter_y)[0] == 0) {
+ switch (w) {
+ case 4:
+ common_hv_2ht_2vt_and_aver_dst_4w_msa(src, (int32_t)src_stride,
+ dst, (int32_t)dst_stride,
+ &filt_hor[3], &filt_ver[3], h);
+ break;
+ case 8:
+ common_hv_2ht_2vt_and_aver_dst_8w_msa(src, (int32_t)src_stride,
+ dst, (int32_t)dst_stride,
+ &filt_hor[3], &filt_ver[3], h);
+ break;
+ case 16:
+ common_hv_2ht_2vt_and_aver_dst_16w_msa(src, (int32_t)src_stride,
+ dst, (int32_t)dst_stride,
+ &filt_hor[3], &filt_ver[3], h);
+ break;
+ case 32:
+ common_hv_2ht_2vt_and_aver_dst_32w_msa(src, (int32_t)src_stride,
+ dst, (int32_t)dst_stride,
+ &filt_hor[3], &filt_ver[3], h);
+ break;
+ case 64:
+ common_hv_2ht_2vt_and_aver_dst_64w_msa(src, (int32_t)src_stride,
+ dst, (int32_t)dst_stride,
+ &filt_hor[3], &filt_ver[3], h);
+ break;
+ default:
+ vp9_convolve8_avg_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h);
+ break;
+ }
+ } else if (((const int32_t *)filter_x)[0] == 0 ||
+ ((const int32_t *)filter_y)[0] == 0) {
+ vp9_convolve8_avg_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h);
+ } else {
+ switch (w) {
+ case 4:
+ common_hv_8ht_8vt_and_aver_dst_4w_msa(src, (int32_t)src_stride,
+ dst, (int32_t)dst_stride,
+ filt_hor, filt_ver, h);
+ break;
+ case 8:
+ common_hv_8ht_8vt_and_aver_dst_8w_msa(src, (int32_t)src_stride,
+ dst, (int32_t)dst_stride,
+ filt_hor, filt_ver, h);
+ break;
+ case 16:
+ common_hv_8ht_8vt_and_aver_dst_16w_msa(src, (int32_t)src_stride,
+ dst, (int32_t)dst_stride,
+ filt_hor, filt_ver, h);
+ break;
+ case 32:
+ common_hv_8ht_8vt_and_aver_dst_32w_msa(src, (int32_t)src_stride,
+ dst, (int32_t)dst_stride,
+ filt_hor, filt_ver, h);
+ break;
+ case 64:
+ common_hv_8ht_8vt_and_aver_dst_64w_msa(src, (int32_t)src_stride,
+ dst, (int32_t)dst_stride,
+ filt_hor, filt_ver, h);
+ break;
+ default:
+ vp9_convolve8_avg_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h);
+ break;
+ }
+ }
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_avg_vert_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_avg_vert_msa.c
new file mode 100644
index 00000000000..85dfd30c7de
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_avg_vert_msa.c
@@ -0,0 +1,753 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/mips/msa/vp9_convolve_msa.h"
+
+static void common_vt_8t_and_aver_dst_4w_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter,
+ int32_t height) {
+ uint32_t loop_cnt;
+ v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+ v16u8 dst0, dst1, dst2, dst3, out;
+ v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r;
+ v16i8 src65_r, src87_r, src109_r, src2110, src4332, src6554, src8776;
+ v16i8 src10998, filt0, filt1, filt2, filt3;
+ v8i16 filt, out10, out32;
+
+ src -= (3 * src_stride);
+
+ filt = LD_SH(filter);
+ SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+ LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+ src += (7 * src_stride);
+
+ ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r, src32_r,
+ src54_r, src21_r);
+ ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r);
+ ILVR_D3_SB(src21_r, src10_r, src43_r, src32_r, src65_r, src54_r, src2110,
+ src4332, src6554);
+ XORI_B3_128_SB(src2110, src4332, src6554);
+
+ for (loop_cnt = (height >> 2); loop_cnt--;) {
+ LD_SB4(src, src_stride, src7, src8, src9, src10);
+ src += (4 * src_stride);
+
+ LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+ ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r,
+ src87_r, src98_r, src109_r);
+ ILVR_D2_SB(src87_r, src76_r, src109_r, src98_r, src8776, src10998);
+ XORI_B2_128_SB(src8776, src10998);
+ out10 = FILT_8TAP_DPADD_S_H(src2110, src4332, src6554, src8776, filt0,
+ filt1, filt2, filt3);
+ out32 = FILT_8TAP_DPADD_S_H(src4332, src6554, src8776, src10998, filt0,
+ filt1, filt2, filt3);
+ SRARI_H2_SH(out10, out32, FILTER_BITS);
+ SAT_SH2_SH(out10, out32, 7);
+ out = PCKEV_XORI128_UB(out10, out32);
+ ILVR_W2_UB(dst1, dst0, dst3, dst2, dst0, dst2);
+
+ dst0 = (v16u8)__msa_ilvr_d((v2i64)dst2, (v2i64)dst0);
+ out = __msa_aver_u_b(out, dst0);
+
+ ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+ dst += (4 * dst_stride);
+
+ src2110 = src6554;
+ src4332 = src8776;
+ src6554 = src10998;
+ src6 = src10;
+ }
+}
+
+static void common_vt_8t_and_aver_dst_8w_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter,
+ int32_t height) {
+ uint32_t loop_cnt;
+ v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+ v16u8 dst0, dst1, dst2, dst3;
+ v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r;
+ v16i8 src65_r, src87_r, src109_r, filt0, filt1, filt2, filt3;
+ v8i16 filt, out0, out1, out2, out3;
+
+ src -= (3 * src_stride);
+
+ filt = LD_SH(filter);
+ SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+ LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+ src += (7 * src_stride);
+
+ XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+ ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r, src32_r,
+ src54_r, src21_r);
+ ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r);
+
+ for (loop_cnt = (height >> 2); loop_cnt--;) {
+ LD_SB4(src, src_stride, src7, src8, src9, src10);
+ src += (4 * src_stride);
+
+ LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+ XORI_B4_128_SB(src7, src8, src9, src10);
+ ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r,
+ src87_r, src98_r, src109_r);
+ out0 = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r, filt0,
+ filt1, filt2, filt3);
+ out1 = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r, filt0,
+ filt1, filt2, filt3);
+ out2 = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r, filt0,
+ filt1, filt2, filt3);
+ out3 = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r, filt0,
+ filt1, filt2, filt3);
+ SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
+ SAT_SH4_SH(out0, out1, out2, out3, 7);
+ CONVERT_UB_AVG_ST8x4_UB(out0, out1, out2, out3, dst0, dst1, dst2, dst3,
+ dst, dst_stride);
+ dst += (4 * dst_stride);
+
+ src10_r = src54_r;
+ src32_r = src76_r;
+ src54_r = src98_r;
+ src21_r = src65_r;
+ src43_r = src87_r;
+ src65_r = src109_r;
+ src6 = src10;
+ }
+}
+
+static void common_vt_8t_and_aver_dst_16w_mult_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter,
+ int32_t height,
+ int32_t width) {
+ const uint8_t *src_tmp;
+ uint8_t *dst_tmp;
+ uint32_t loop_cnt, cnt;
+ v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+ v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r;
+ v16i8 src65_r, src87_r, src109_r, src10_l, src32_l, src54_l, src76_l;
+ v16i8 src98_l, src21_l, src43_l, src65_l, src87_l, src109_l;
+ v16i8 filt0, filt1, filt2, filt3;
+ v16u8 dst0, dst1, dst2, dst3, tmp0, tmp1, tmp2, tmp3;
+ v8i16 out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l, filt;
+
+ src -= (3 * src_stride);
+
+ filt = LD_SH(filter);
+ SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+ for (cnt = (width >> 4); cnt--;) {
+ src_tmp = src;
+ dst_tmp = dst;
+
+ LD_SB7(src_tmp, src_stride, src0, src1, src2, src3, src4, src5, src6);
+ XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+ src_tmp += (7 * src_stride);
+
+ ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r, src32_r,
+ src54_r, src21_r);
+ ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r);
+ ILVL_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_l, src32_l,
+ src54_l, src21_l);
+ ILVL_B2_SB(src4, src3, src6, src5, src43_l, src65_l);
+
+ for (loop_cnt = (height >> 2); loop_cnt--;) {
+ LD_SB4(src_tmp, src_stride, src7, src8, src9, src10);
+ src_tmp += (4 * src_stride);
+
+ LD_UB4(dst_tmp, dst_stride, dst0, dst1, dst2, dst3);
+ XORI_B4_128_SB(src7, src8, src9, src10);
+ ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r,
+ src87_r, src98_r, src109_r);
+ ILVL_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_l,
+ src87_l, src98_l, src109_l);
+ out0_r = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r, filt0,
+ filt1, filt2, filt3);
+ out1_r = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r, filt0,
+ filt1, filt2, filt3);
+ out2_r = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r, filt0,
+ filt1, filt2, filt3);
+ out3_r = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r, filt0,
+ filt1, filt2, filt3);
+ out0_l = FILT_8TAP_DPADD_S_H(src10_l, src32_l, src54_l, src76_l, filt0,
+ filt1, filt2, filt3);
+ out1_l = FILT_8TAP_DPADD_S_H(src21_l, src43_l, src65_l, src87_l, filt0,
+ filt1, filt2, filt3);
+ out2_l = FILT_8TAP_DPADD_S_H(src32_l, src54_l, src76_l, src98_l, filt0,
+ filt1, filt2, filt3);
+ out3_l = FILT_8TAP_DPADD_S_H(src43_l, src65_l, src87_l, src109_l, filt0,
+ filt1, filt2, filt3);
+ SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, FILTER_BITS);
+ SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, FILTER_BITS);
+ SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+ SAT_SH4_SH(out0_l, out1_l, out2_l, out3_l, 7);
+ PCKEV_B4_UB(out0_l, out0_r, out1_l, out1_r, out2_l, out2_r, out3_l,
+ out3_r, tmp0, tmp1, tmp2, tmp3);
+ XORI_B4_128_UB(tmp0, tmp1, tmp2, tmp3);
+ AVER_UB4_UB(tmp0, dst0, tmp1, dst1, tmp2, dst2, tmp3, dst3, dst0, dst1,
+ dst2, dst3);
+ ST_UB4(dst0, dst1, dst2, dst3, dst_tmp, dst_stride);
+ dst_tmp += (4 * dst_stride);
+
+ src10_r = src54_r;
+ src32_r = src76_r;
+ src54_r = src98_r;
+ src21_r = src65_r;
+ src43_r = src87_r;
+ src65_r = src109_r;
+ src10_l = src54_l;
+ src32_l = src76_l;
+ src54_l = src98_l;
+ src21_l = src65_l;
+ src43_l = src87_l;
+ src65_l = src109_l;
+ src6 = src10;
+ }
+
+ src += 16;
+ dst += 16;
+ }
+}
+
+static void common_vt_8t_and_aver_dst_16w_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter,
+ int32_t height) {
+ common_vt_8t_and_aver_dst_16w_mult_msa(src, src_stride, dst, dst_stride,
+ filter, height, 16);
+}
+
+static void common_vt_8t_and_aver_dst_32w_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter,
+ int32_t height) {
+ common_vt_8t_and_aver_dst_16w_mult_msa(src, src_stride, dst, dst_stride,
+ filter, height, 32);
+}
+
+static void common_vt_8t_and_aver_dst_64w_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter,
+ int32_t height) {
+ common_vt_8t_and_aver_dst_16w_mult_msa(src, src_stride, dst, dst_stride,
+ filter, height, 64);
+}
+
+static void common_vt_2t_and_aver_dst_4x4_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter) {
+ v16i8 src0, src1, src2, src3, src4;
+ v16u8 dst0, dst1, dst2, dst3, out, filt0, src2110, src4332;
+ v16i8 src10_r, src32_r, src21_r, src43_r;
+ v8i16 filt;
+ v8u16 tmp0, tmp1;
+
+ filt = LD_SH(filter);
+ filt0 = (v16u8)__msa_splati_h(filt, 0);
+
+ LD_SB4(src, src_stride, src0, src1, src2, src3);
+ src += (4 * src_stride);
+
+ src4 = LD_SB(src);
+ src += src_stride;
+
+ LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+ ILVR_W2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
+ dst0 = (v16u8)__msa_ilvr_d((v2i64)dst1, (v2i64)dst0);
+ ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
+ src32_r, src43_r);
+ ILVR_D2_UB(src21_r, src10_r, src43_r, src32_r, src2110, src4332);
+ DOTP_UB2_UH(src2110, src4332, filt0, filt0, tmp0, tmp1);
+ SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+ SAT_UH2_UH(tmp0, tmp1, 7);
+
+ out = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
+ out = __msa_aver_u_b(out, dst0);
+
+ ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
+}
+
+static void common_vt_2t_and_aver_dst_4x8_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter) {
+ v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+ v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src87_r;
+ v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r, src65_r;
+ v16u8 src2110, src4332, src6554, src8776, filt0;
+ v8u16 tmp0, tmp1, tmp2, tmp3;
+ v8i16 filt;
+
+ filt = LD_SH(filter);
+ filt0 = (v16u8)__msa_splati_h(filt, 0);
+
+ LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+ src += (8 * src_stride);
+ src8 = LD_SB(src);
+
+ LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
+ ILVR_W4_UB(dst1, dst0, dst3, dst2, dst5, dst4, dst7, dst6, dst0, dst1,
+ dst2, dst3);
+ ILVR_D2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
+ ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
+ src32_r, src43_r);
+ ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_r, src65_r,
+ src76_r, src87_r);
+ ILVR_D4_UB(src21_r, src10_r, src43_r, src32_r, src65_r, src54_r,
+ src87_r, src76_r, src2110, src4332, src6554, src8776);
+ DOTP_UB4_UH(src2110, src4332, src6554, src8776, filt0, filt0, filt0, filt0,
+ tmp0, tmp1, tmp2, tmp3);
+ SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
+ SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+ PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, src2110, src4332);
+ AVER_UB2_UB(src2110, dst0, src4332, dst1, src2110, src4332);
+ ST4x4_UB(src2110, src2110, 0, 1, 2, 3, dst, dst_stride);
+ dst += (4 * dst_stride);
+ ST4x4_UB(src4332, src4332, 0, 1, 2, 3, dst, dst_stride);
+}
+
+static void common_vt_2t_and_aver_dst_4w_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter,
+ int32_t height) {
+ if (4 == height) {
+ common_vt_2t_and_aver_dst_4x4_msa(src, src_stride, dst, dst_stride, filter);
+ } else if (8 == height) {
+ common_vt_2t_and_aver_dst_4x8_msa(src, src_stride, dst, dst_stride, filter);
+ }
+}
+
+static void common_vt_2t_and_aver_dst_8x4_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter) {
+ v16u8 src0, src1, src2, src3, src4;
+ v16u8 dst0, dst1, dst2, dst3, vec0, vec1, vec2, vec3, filt0;
+ v8u16 tmp0, tmp1, tmp2, tmp3;
+ v8i16 filt;
+
+ /* rearranging filter_y */
+ filt = LD_SH(filter);
+ filt0 = (v16u8)__msa_splati_h(filt, 0);
+
+ LD_UB5(src, src_stride, src0, src1, src2, src3, src4);
+ LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+ ILVR_B2_UB(src1, src0, src2, src1, vec0, vec1);
+ ILVR_B2_UB(src3, src2, src4, src3, vec2, vec3);
+ DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, tmp0, tmp1,
+ tmp2, tmp3);
+ SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
+ SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+ PCKEV_AVG_ST8x4_UB(tmp0, dst0, tmp1, dst1, tmp2, dst2, tmp3, dst3,
+ dst, dst_stride);
+}
+
+static void common_vt_2t_and_aver_dst_8x8mult_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter,
+ int32_t height) {
+ uint32_t loop_cnt;
+ v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
+ v16u8 dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8;
+ v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
+ v8u16 tmp0, tmp1, tmp2, tmp3;
+ v8i16 filt;
+
+ /* rearranging filter_y */
+ filt = LD_SH(filter);
+ filt0 = (v16u8)__msa_splati_h(filt, 0);
+
+ src0 = LD_UB(src);
+ src += src_stride;
+
+ for (loop_cnt = (height >> 3); loop_cnt--;) {
+ LD_UB8(src, src_stride, src1, src2, src3, src4, src5, src6, src7, src8);
+ src += (8 * src_stride);
+ LD_UB8(dst, dst_stride, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst8);
+
+ ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3, vec0, vec1,
+ vec2, vec3);
+ ILVR_B4_UB(src5, src4, src6, src5, src7, src6, src8, src7, vec4, vec5,
+ vec6, vec7);
+ DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, tmp0, tmp1,
+ tmp2, tmp3);
+ SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
+ SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+ PCKEV_AVG_ST8x4_UB(tmp0, dst1, tmp1, dst2, tmp2, dst3, tmp3, dst4,
+ dst, dst_stride);
+ dst += (4 * dst_stride);
+
+ DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, tmp0, tmp1,
+ tmp2, tmp3);
+ SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
+ SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+ PCKEV_AVG_ST8x4_UB(tmp0, dst5, tmp1, dst6, tmp2, dst7, tmp3, dst8,
+ dst, dst_stride);
+ dst += (4 * dst_stride);
+
+ src0 = src8;
+ }
+}
+
+static void common_vt_2t_and_aver_dst_8w_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter,
+ int32_t height) {
+ if (4 == height) {
+ common_vt_2t_and_aver_dst_8x4_msa(src, src_stride, dst, dst_stride, filter);
+ } else {
+ common_vt_2t_and_aver_dst_8x8mult_msa(src, src_stride, dst, dst_stride,
+ filter, height);
+ }
+}
+
+static void common_vt_2t_and_aver_dst_16w_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter,
+ int32_t height) {
+ uint32_t loop_cnt;
+ v16u8 src0, src1, src2, src3, src4, dst0, dst1, dst2, dst3, filt0;
+ v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+ v8u16 tmp0, tmp1, tmp2, tmp3, filt;
+
+ /* rearranging filter_y */
+ filt = LD_UH(filter);
+ filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+ src0 = LD_UB(src);
+ src += src_stride;
+
+ for (loop_cnt = (height >> 2); loop_cnt--;) {
+ LD_UB4(src, src_stride, src1, src2, src3, src4);
+ src += (4 * src_stride);
+
+ LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+ ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2);
+ ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3);
+ DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
+ SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+ SAT_UH2_UH(tmp0, tmp1, 7);
+ PCKEV_AVG_ST_UB(tmp1, tmp0, dst0, dst);
+ dst += dst_stride;
+
+ ILVR_B2_UB(src3, src2, src4, src3, vec4, vec6);
+ ILVL_B2_UB(src3, src2, src4, src3, vec5, vec7);
+ DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
+ SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+ SAT_UH2_UH(tmp2, tmp3, 7);
+ PCKEV_AVG_ST_UB(tmp3, tmp2, dst1, dst);
+ dst += dst_stride;
+
+ DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
+ SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+ SAT_UH2_UH(tmp0, tmp1, 7);
+ PCKEV_AVG_ST_UB(tmp1, tmp0, dst2, dst);
+ dst += dst_stride;
+
+ DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
+ SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+ SAT_UH2_UH(tmp2, tmp3, 7);
+ PCKEV_AVG_ST_UB(tmp3, tmp2, dst3, dst);
+ dst += dst_stride;
+
+ src0 = src4;
+ }
+}
+
+static void common_vt_2t_and_aver_dst_32w_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter,
+ int32_t height) {
+ uint32_t loop_cnt;
+ v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9;
+ v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+ v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
+ v8u16 tmp0, tmp1, tmp2, tmp3, filt;
+
+ /* rearranging filter_y */
+ filt = LD_UH(filter);
+ filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+ LD_UB2(src, 16, src0, src5);
+ src += src_stride;
+
+ for (loop_cnt = (height >> 2); loop_cnt--;) {
+ LD_UB4(src, src_stride, src1, src2, src3, src4);
+ LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+ ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2);
+ ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3);
+
+ LD_UB4(src + 16, src_stride, src6, src7, src8, src9);
+ LD_UB4(dst + 16, dst_stride, dst4, dst5, dst6, dst7);
+ src += (4 * src_stride);
+
+ DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
+ SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+ SAT_UH2_UH(tmp0, tmp1, 7);
+ PCKEV_AVG_ST_UB(tmp1, tmp0, dst0, dst);
+
+ DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
+ SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+ SAT_UH2_UH(tmp2, tmp3, 7);
+ PCKEV_AVG_ST_UB(tmp3, tmp2, dst1, dst + dst_stride);
+
+ ILVR_B2_UB(src3, src2, src4, src3, vec4, vec6);
+ ILVL_B2_UB(src3, src2, src4, src3, vec5, vec7);
+ DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
+ SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+ SAT_UH2_UH(tmp0, tmp1, 7);
+ PCKEV_AVG_ST_UB(tmp1, tmp0, dst2, dst + 2 * dst_stride);
+
+ DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
+ SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+ SAT_UH2_UH(tmp2, tmp3, 7);
+ PCKEV_AVG_ST_UB(tmp3, tmp2, dst3, dst + 3 * dst_stride);
+
+ ILVR_B2_UB(src6, src5, src7, src6, vec0, vec2);
+ ILVL_B2_UB(src6, src5, src7, src6, vec1, vec3);
+ DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
+ SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+ SAT_UH2_UH(tmp0, tmp1, 7);
+ PCKEV_AVG_ST_UB(tmp1, tmp0, dst4, dst + 16);
+
+ DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
+ SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+ SAT_UH2_UH(tmp2, tmp3, 7);
+ PCKEV_AVG_ST_UB(tmp3, tmp2, dst5, dst + 16 + dst_stride);
+
+ ILVR_B2_UB(src8, src7, src9, src8, vec4, vec6);
+ ILVL_B2_UB(src8, src7, src9, src8, vec5, vec7);
+ DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
+ SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+ SAT_UH2_UH(tmp0, tmp1, 7);
+ PCKEV_AVG_ST_UB(tmp1, tmp0, dst6, dst + 16 + 2 * dst_stride);
+
+ DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
+ SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+ SAT_UH2_UH(tmp2, tmp3, 7);
+ PCKEV_AVG_ST_UB(tmp3, tmp2, dst7, dst + 16 + 3 * dst_stride);
+ dst += (4 * dst_stride);
+
+ src0 = src4;
+ src5 = src9;
+ }
+}
+
+static void common_vt_2t_and_aver_dst_64w_msa(const uint8_t *src,
+ int32_t src_stride,
+ uint8_t *dst,
+ int32_t dst_stride,
+ int8_t *filter,
+ int32_t height) {
+ uint32_t loop_cnt;
+ v16u8 src0, src1, src2, src3, src4, src5;
+ v16u8 src6, src7, src8, src9, src10, src11, filt0;
+ v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+ v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+ v8u16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ v8u16 filt;
+
+ /* rearranging filter_y */
+ filt = LD_UH(filter);
+ filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+ LD_UB4(src, 16, src0, src3, src6, src9);
+ src += src_stride;
+
+ for (loop_cnt = (height >> 1); loop_cnt--;) {
+ LD_UB2(src, src_stride, src1, src2);
+ LD_UB2(dst, dst_stride, dst0, dst1);
+ LD_UB2(src + 16, src_stride, src4, src5);
+ LD_UB2(dst + 16, dst_stride, dst2, dst3);
+ LD_UB2(src + 32, src_stride, src7, src8);
+ LD_UB2(dst + 32, dst_stride, dst4, dst5);
+ LD_UB2(src + 48, src_stride, src10, src11);
+ LD_UB2(dst + 48, dst_stride, dst6, dst7);
+ src += (2 * src_stride);
+
+ ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2);
+ ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3);
+ DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
+ SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+ SAT_UH2_UH(tmp0, tmp1, 7);
+ PCKEV_AVG_ST_UB(tmp1, tmp0, dst0, dst);
+
+ DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
+ SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+ SAT_UH2_UH(tmp2, tmp3, 7);
+ PCKEV_AVG_ST_UB(tmp3, tmp2, dst1, dst + dst_stride);
+
+ ILVR_B2_UB(src4, src3, src5, src4, vec4, vec6);
+ ILVL_B2_UB(src4, src3, src5, src4, vec5, vec7);
+ DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp4, tmp5);
+ SRARI_H2_UH(tmp4, tmp5, FILTER_BITS);
+ SAT_UH2_UH(tmp4, tmp5, 7);
+ PCKEV_AVG_ST_UB(tmp5, tmp4, dst2, dst + 16);
+
+ DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp6, tmp7);
+ SRARI_H2_UH(tmp6, tmp7, FILTER_BITS);
+ SAT_UH2_UH(tmp6, tmp7, 7);
+ PCKEV_AVG_ST_UB(tmp7, tmp6, dst3, dst + 16 + dst_stride);
+
+ ILVR_B2_UB(src7, src6, src8, src7, vec0, vec2);
+ ILVL_B2_UB(src7, src6, src8, src7, vec1, vec3);
+ DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
+ SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+ SAT_UH2_UH(tmp0, tmp1, 7);
+ PCKEV_AVG_ST_UB(tmp1, tmp0, dst4, dst + 32);
+
+ DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
+ SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+ SAT_UH2_UH(tmp2, tmp3, 7);
+ PCKEV_AVG_ST_UB(tmp3, tmp2, dst5, dst + 32 + dst_stride);
+
+ ILVR_B2_UB(src10, src9, src11, src10, vec4, vec6);
+ ILVL_B2_UB(src10, src9, src11, src10, vec5, vec7);
+ DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp4, tmp5);
+ SRARI_H2_UH(tmp4, tmp5, FILTER_BITS);
+ SAT_UH2_UH(tmp4, tmp5, 7);
+ PCKEV_AVG_ST_UB(tmp5, tmp4, dst6, (dst + 48));
+
+ DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp6, tmp7);
+ SRARI_H2_UH(tmp6, tmp7, FILTER_BITS);
+ SAT_UH2_UH(tmp6, tmp7, 7);
+ PCKEV_AVG_ST_UB(tmp7, tmp6, dst7, dst + 48 + dst_stride);
+ dst += (2 * dst_stride);
+
+ src0 = src2;
+ src3 = src5;
+ src6 = src8;
+ src9 = src11;
+ }
+}
+
+void vp9_convolve8_avg_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
+ uint8_t *dst, ptrdiff_t dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h) {
+ int8_t cnt, filt_ver[8];
+
+ if (16 != y_step_q4) {
+ vp9_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h);
+ return;
+ }
+
+ if (((const int32_t *)filter_y)[1] == 0x800000) {
+ vp9_convolve_avg(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h);
+ return;
+ }
+
+ for (cnt = 0; cnt < 8; ++cnt) {
+ filt_ver[cnt] = filter_y[cnt];
+ }
+
+ if (((const int32_t *)filter_y)[0] == 0) {
+ switch (w) {
+ case 4:
+ common_vt_2t_and_aver_dst_4w_msa(src, (int32_t)src_stride,
+ dst, (int32_t)dst_stride,
+ &filt_ver[3], h);
+ break;
+ case 8:
+ common_vt_2t_and_aver_dst_8w_msa(src, (int32_t)src_stride,
+ dst, (int32_t)dst_stride,
+ &filt_ver[3], h);
+ break;
+ case 16:
+ common_vt_2t_and_aver_dst_16w_msa(src, (int32_t)src_stride,
+ dst, (int32_t)dst_stride,
+ &filt_ver[3], h);
+ break;
+ case 32:
+ common_vt_2t_and_aver_dst_32w_msa(src, (int32_t)src_stride,
+ dst, (int32_t)dst_stride,
+ &filt_ver[3], h);
+ break;
+ case 64:
+ common_vt_2t_and_aver_dst_64w_msa(src, (int32_t)src_stride,
+ dst, (int32_t)dst_stride,
+ &filt_ver[3], h);
+ break;
+ default:
+ vp9_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h);
+ break;
+ }
+ } else {
+ switch (w) {
+ case 4:
+ common_vt_8t_and_aver_dst_4w_msa(src, (int32_t)src_stride,
+ dst, (int32_t)dst_stride,
+ filt_ver, h);
+ break;
+ case 8:
+ common_vt_8t_and_aver_dst_8w_msa(src, (int32_t)src_stride,
+ dst, (int32_t)dst_stride,
+ filt_ver, h);
+ break;
+ case 16:
+ common_vt_8t_and_aver_dst_16w_msa(src, (int32_t)src_stride,
+ dst, (int32_t)dst_stride,
+ filt_ver, h);
+
+ break;
+ case 32:
+ common_vt_8t_and_aver_dst_32w_msa(src, (int32_t)src_stride,
+ dst, (int32_t)dst_stride,
+ filt_ver, h);
+ break;
+ case 64:
+ common_vt_8t_and_aver_dst_64w_msa(src, (int32_t)src_stride,
+ dst, (int32_t)dst_stride,
+ filt_ver, h);
+ break;
+ default:
+ vp9_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h);
+ break;
+ }
+ }
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_horiz_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_horiz_msa.c
index e2247435e88..f175bf9b663 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_horiz_msa.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_horiz_msa.c
@@ -14,37 +14,29 @@
static void common_hz_8t_4x4_msa(const uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
int8_t *filter) {
- v16i8 filt0, filt1, filt2, filt3;
- v16i8 src0, src1, src2, src3;
- v16u8 mask0, mask1, mask2, mask3;
+ v16u8 mask0, mask1, mask2, mask3, out;
+ v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
v8i16 filt, out0, out1;
- mask0 = LOAD_UB(&mc_filt_mask_arr[16]);
-
+ mask0 = LD_UB(&mc_filt_mask_arr[16]);
src -= 3;
/* rearranging filter */
- filt = LOAD_SH(filter);
- filt0 = (v16i8)__msa_splati_h(filt, 0);
- filt1 = (v16i8)__msa_splati_h(filt, 1);
- filt2 = (v16i8)__msa_splati_h(filt, 2);
- filt3 = (v16i8)__msa_splati_h(filt, 3);
+ filt = LD_SH(filter);
+ SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
mask1 = mask0 + 2;
mask2 = mask0 + 4;
mask3 = mask0 + 6;
- LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3);
-
- XORI_B_4VECS_SB(src0, src1, src2, src3, src0, src1, src2, src3, 128);
-
+ LD_SB4(src, src_stride, src0, src1, src2, src3);
+ XORI_B4_128_SB(src0, src1, src2, src3);
HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3,
filt0, filt1, filt2, filt3, out0, out1);
-
- out0 = SRARI_SATURATE_SIGNED_H(out0, FILTER_BITS, 7);
- out1 = SRARI_SATURATE_SIGNED_H(out1, FILTER_BITS, 7);
-
- PCKEV_2B_XORI128_STORE_4_BYTES_4(out0, out1, dst, dst_stride);
+ SRARI_H2_SH(out0, out1, FILTER_BITS);
+ SAT_SH2_SH(out0, out1, 7);
+ out = PCKEV_XORI128_UB(out0, out1);
+ ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
}
static void common_hz_8t_4x8_msa(const uint8_t *src, int32_t src_stride,
@@ -52,47 +44,36 @@ static void common_hz_8t_4x8_msa(const uint8_t *src, int32_t src_stride,
int8_t *filter) {
v16i8 filt0, filt1, filt2, filt3;
v16i8 src0, src1, src2, src3;
- v16u8 mask0, mask1, mask2, mask3;
+ v16u8 mask0, mask1, mask2, mask3, out;
v8i16 filt, out0, out1, out2, out3;
- mask0 = LOAD_UB(&mc_filt_mask_arr[16]);
-
+ mask0 = LD_UB(&mc_filt_mask_arr[16]);
src -= 3;
/* rearranging filter */
- filt = LOAD_SH(filter);
- filt0 = (v16i8)__msa_splati_h(filt, 0);
- filt1 = (v16i8)__msa_splati_h(filt, 1);
- filt2 = (v16i8)__msa_splati_h(filt, 2);
- filt3 = (v16i8)__msa_splati_h(filt, 3);
+ filt = LD_SH(filter);
+ SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
mask1 = mask0 + 2;
mask2 = mask0 + 4;
mask3 = mask0 + 6;
- LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3);
+ LD_SB4(src, src_stride, src0, src1, src2, src3);
+ XORI_B4_128_SB(src0, src1, src2, src3);
src += (4 * src_stride);
-
- XORI_B_4VECS_SB(src0, src1, src2, src3, src0, src1, src2, src3, 128);
-
HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3,
filt0, filt1, filt2, filt3, out0, out1);
-
- LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3);
-
- XORI_B_4VECS_SB(src0, src1, src2, src3, src0, src1, src2, src3, 128);
-
+ LD_SB4(src, src_stride, src0, src1, src2, src3);
+ XORI_B4_128_SB(src0, src1, src2, src3);
HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3,
filt0, filt1, filt2, filt3, out2, out3);
-
- out0 = SRARI_SATURATE_SIGNED_H(out0, FILTER_BITS, 7);
- out1 = SRARI_SATURATE_SIGNED_H(out1, FILTER_BITS, 7);
- out2 = SRARI_SATURATE_SIGNED_H(out2, FILTER_BITS, 7);
- out3 = SRARI_SATURATE_SIGNED_H(out3, FILTER_BITS, 7);
-
- PCKEV_2B_XORI128_STORE_4_BYTES_4(out0, out1, dst, dst_stride);
+ SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
+ SAT_SH4_SH(out0, out1, out2, out3, 7);
+ out = PCKEV_XORI128_UB(out0, out1);
+ ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
dst += (4 * dst_stride);
- PCKEV_2B_XORI128_STORE_4_BYTES_4(out2, out3, dst, dst_stride);
+ out = PCKEV_XORI128_UB(out2, out3);
+ ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
}
static void common_hz_8t_4w_msa(const uint8_t *src, int32_t src_stride,
@@ -108,82 +89,64 @@ static void common_hz_8t_4w_msa(const uint8_t *src, int32_t src_stride,
static void common_hz_8t_8x4_msa(const uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
int8_t *filter) {
- v16i8 filt0, filt1, filt2, filt3;
- v16i8 src0, src1, src2, src3;
- v16u8 mask0, mask1, mask2, mask3;
+ v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+ v16u8 mask0, mask1, mask2, mask3, tmp0, tmp1;
v8i16 filt, out0, out1, out2, out3;
- mask0 = LOAD_UB(&mc_filt_mask_arr[0]);
-
+ mask0 = LD_UB(&mc_filt_mask_arr[0]);
src -= 3;
/* rearranging filter */
- filt = LOAD_SH(filter);
- filt0 = (v16i8)__msa_splati_h(filt, 0);
- filt1 = (v16i8)__msa_splati_h(filt, 1);
- filt2 = (v16i8)__msa_splati_h(filt, 2);
- filt3 = (v16i8)__msa_splati_h(filt, 3);
+ filt = LD_SH(filter);
+ SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
mask1 = mask0 + 2;
mask2 = mask0 + 4;
mask3 = mask0 + 6;
- LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3);
-
- XORI_B_4VECS_SB(src0, src1, src2, src3, src0, src1, src2, src3, 128);
-
+ LD_SB4(src, src_stride, src0, src1, src2, src3);
+ XORI_B4_128_SB(src0, src1, src2, src3);
HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3,
filt0, filt1, filt2, filt3, out0, out1, out2,
out3);
-
- out0 = SRARI_SATURATE_SIGNED_H(out0, FILTER_BITS, 7);
- out1 = SRARI_SATURATE_SIGNED_H(out1, FILTER_BITS, 7);
- out2 = SRARI_SATURATE_SIGNED_H(out2, FILTER_BITS, 7);
- out3 = SRARI_SATURATE_SIGNED_H(out3, FILTER_BITS, 7);
-
- PCKEV_B_4_XORI128_STORE_8_BYTES_4(out0, out1, out2, out3, dst, dst_stride);
+ SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
+ SAT_SH4_SH(out0, out1, out2, out3, 7);
+ tmp0 = PCKEV_XORI128_UB(out0, out1);
+ tmp1 = PCKEV_XORI128_UB(out2, out3);
+ ST8x4_UB(tmp0, tmp1, dst, dst_stride);
}
static void common_hz_8t_8x8mult_msa(const uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
int8_t *filter, int32_t height) {
uint32_t loop_cnt;
- v16i8 filt0, filt1, filt2, filt3;
- v16i8 src0, src1, src2, src3;
- v16u8 mask0, mask1, mask2, mask3;
+ v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+ v16u8 mask0, mask1, mask2, mask3, tmp0, tmp1;
v8i16 filt, out0, out1, out2, out3;
- mask0 = LOAD_UB(&mc_filt_mask_arr[0]);
-
+ mask0 = LD_UB(&mc_filt_mask_arr[0]);
src -= 3;
/* rearranging filter */
- filt = LOAD_SH(filter);
- filt0 = (v16i8)__msa_splati_h(filt, 0);
- filt1 = (v16i8)__msa_splati_h(filt, 1);
- filt2 = (v16i8)__msa_splati_h(filt, 2);
- filt3 = (v16i8)__msa_splati_h(filt, 3);
+ filt = LD_SH(filter);
+ SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
mask1 = mask0 + 2;
mask2 = mask0 + 4;
mask3 = mask0 + 6;
for (loop_cnt = (height >> 2); loop_cnt--;) {
- LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3);
+ LD_SB4(src, src_stride, src0, src1, src2, src3);
+ XORI_B4_128_SB(src0, src1, src2, src3);
src += (4 * src_stride);
-
- XORI_B_4VECS_SB(src0, src1, src2, src3, src0, src1, src2, src3, 128);
-
HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
mask3, filt0, filt1, filt2, filt3, out0, out1,
out2, out3);
-
- out0 = SRARI_SATURATE_SIGNED_H(out0, FILTER_BITS, 7);
- out1 = SRARI_SATURATE_SIGNED_H(out1, FILTER_BITS, 7);
- out2 = SRARI_SATURATE_SIGNED_H(out2, FILTER_BITS, 7);
- out3 = SRARI_SATURATE_SIGNED_H(out3, FILTER_BITS, 7);
-
- PCKEV_B_4_XORI128_STORE_8_BYTES_4(out0, out1, out2, out3, dst, dst_stride);
+ SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
+ SAT_SH4_SH(out0, out1, out2, out3, 7);
+ tmp0 = PCKEV_XORI128_UB(out0, out1);
+ tmp1 = PCKEV_XORI128_UB(out2, out3);
+ ST8x4_UB(tmp0, tmp1, dst, dst_stride);
dst += (4 * dst_stride);
}
}
@@ -202,48 +165,36 @@ static void common_hz_8t_16w_msa(const uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
int8_t *filter, int32_t height) {
uint32_t loop_cnt;
- v16i8 src0, src1, src2, src3;
- v16i8 filt0, filt1, filt2, filt3;
- v16u8 mask0, mask1, mask2, mask3;
+ v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+ v16u8 mask0, mask1, mask2, mask3, out;
v8i16 filt, out0, out1, out2, out3;
- mask0 = LOAD_UB(&mc_filt_mask_arr[0]);
-
+ mask0 = LD_UB(&mc_filt_mask_arr[0]);
src -= 3;
/* rearranging filter */
- filt = LOAD_SH(filter);
- filt0 = (v16i8)__msa_splati_h(filt, 0);
- filt1 = (v16i8)__msa_splati_h(filt, 1);
- filt2 = (v16i8)__msa_splati_h(filt, 2);
- filt3 = (v16i8)__msa_splati_h(filt, 3);
+ filt = LD_SH(filter);
+ SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
mask1 = mask0 + 2;
mask2 = mask0 + 4;
mask3 = mask0 + 6;
for (loop_cnt = (height >> 1); loop_cnt--;) {
- src0 = LOAD_SB(src);
- src1 = LOAD_SB(src + 8);
- src += src_stride;
- src2 = LOAD_SB(src);
- src3 = LOAD_SB(src + 8);
- src += src_stride;
-
- XORI_B_4VECS_SB(src0, src1, src2, src3, src0, src1, src2, src3, 128);
-
+ LD_SB2(src, src_stride, src0, src2);
+ LD_SB2(src + 8, src_stride, src1, src3);
+ XORI_B4_128_SB(src0, src1, src2, src3);
+ src += (2 * src_stride);
HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
mask3, filt0, filt1, filt2, filt3, out0, out1,
out2, out3);
-
- out0 = SRARI_SATURATE_SIGNED_H(out0, FILTER_BITS, 7);
- out1 = SRARI_SATURATE_SIGNED_H(out1, FILTER_BITS, 7);
- out2 = SRARI_SATURATE_SIGNED_H(out2, FILTER_BITS, 7);
- out3 = SRARI_SATURATE_SIGNED_H(out3, FILTER_BITS, 7);
-
- PCKEV_B_XORI128_STORE_VEC(out1, out0, dst);
+ SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
+ SAT_SH4_SH(out0, out1, out2, out3, 7);
+ out = PCKEV_XORI128_UB(out0, out1);
+ ST_UB(out, dst);
dst += dst_stride;
- PCKEV_B_XORI128_STORE_VEC(out3, out2, dst);
+ out = PCKEV_XORI128_UB(out2, out3);
+ ST_UB(out, dst);
dst += dst_stride;
}
}
@@ -252,68 +203,56 @@ static void common_hz_8t_32w_msa(const uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
int8_t *filter, int32_t height) {
uint32_t loop_cnt;
- v16i8 src0, src1, src2, src3;
- v16i8 filt0, filt1, filt2, filt3;
- v16u8 mask0, mask1, mask2, mask3;
+ v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+ v16u8 mask0, mask1, mask2, mask3, out;
v8i16 filt, out0, out1, out2, out3;
- mask0 = LOAD_UB(&mc_filt_mask_arr[0]);
-
+ mask0 = LD_UB(&mc_filt_mask_arr[0]);
src -= 3;
/* rearranging filter */
- filt = LOAD_SH(filter);
- filt0 = (v16i8)__msa_splati_h(filt, 0);
- filt1 = (v16i8)__msa_splati_h(filt, 1);
- filt2 = (v16i8)__msa_splati_h(filt, 2);
- filt3 = (v16i8)__msa_splati_h(filt, 3);
+ filt = LD_SH(filter);
+ SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
mask1 = mask0 + 2;
mask2 = mask0 + 4;
mask3 = mask0 + 6;
for (loop_cnt = (height >> 1); loop_cnt--;) {
- src0 = LOAD_SB(src);
- src2 = LOAD_SB(src + 16);
- src3 = LOAD_SB(src + 24);
- src1 = __msa_sld_b((v16i8)src2, (v16i8)src0, 8);
+ src0 = LD_SB(src);
+ src2 = LD_SB(src + 16);
+ src3 = LD_SB(src + 24);
+ src1 = __msa_sldi_b(src2, src0, 8);
src += src_stride;
-
- XORI_B_4VECS_SB(src0, src1, src2, src3, src0, src1, src2, src3, 128);
-
+ XORI_B4_128_SB(src0, src1, src2, src3);
HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
mask3, filt0, filt1, filt2, filt3, out0, out1,
out2, out3);
+ SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
+ SAT_SH4_SH(out0, out1, out2, out3, 7);
- out0 = SRARI_SATURATE_SIGNED_H(out0, FILTER_BITS, 7);
- out1 = SRARI_SATURATE_SIGNED_H(out1, FILTER_BITS, 7);
- out2 = SRARI_SATURATE_SIGNED_H(out2, FILTER_BITS, 7);
- out3 = SRARI_SATURATE_SIGNED_H(out3, FILTER_BITS, 7);
-
- src0 = LOAD_SB(src);
- src2 = LOAD_SB(src + 16);
- src3 = LOAD_SB(src + 24);
- src1 = __msa_sld_b((v16i8)src2, (v16i8)src0, 8);
+ src0 = LD_SB(src);
+ src2 = LD_SB(src + 16);
+ src3 = LD_SB(src + 24);
+ src1 = __msa_sldi_b(src2, src0, 8);
+ src += src_stride;
- PCKEV_B_XORI128_STORE_VEC(out1, out0, dst);
- PCKEV_B_XORI128_STORE_VEC(out3, out2, (dst + 16));
+ out = PCKEV_XORI128_UB(out0, out1);
+ ST_UB(out, dst);
+ out = PCKEV_XORI128_UB(out2, out3);
+ ST_UB(out, dst + 16);
dst += dst_stride;
- XORI_B_4VECS_SB(src0, src1, src2, src3, src0, src1, src2, src3, 128);
-
+ XORI_B4_128_SB(src0, src1, src2, src3);
HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
mask3, filt0, filt1, filt2, filt3, out0, out1,
out2, out3);
-
- out0 = SRARI_SATURATE_SIGNED_H(out0, FILTER_BITS, 7);
- out1 = SRARI_SATURATE_SIGNED_H(out1, FILTER_BITS, 7);
- out2 = SRARI_SATURATE_SIGNED_H(out2, FILTER_BITS, 7);
- out3 = SRARI_SATURATE_SIGNED_H(out3, FILTER_BITS, 7);
-
- PCKEV_B_XORI128_STORE_VEC(out1, out0, dst);
- PCKEV_B_XORI128_STORE_VEC(out3, out2, (dst + 16));
-
- src += src_stride;
+ SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
+ SAT_SH4_SH(out0, out1, out2, out3, 7);
+ out = PCKEV_XORI128_UB(out0, out1);
+ ST_UB(out, dst);
+ out = PCKEV_XORI128_UB(out2, out3);
+ ST_UB(out, dst + 16);
dst += dst_stride;
}
}
@@ -321,50 +260,55 @@ static void common_hz_8t_32w_msa(const uint8_t *src, int32_t src_stride,
static void common_hz_8t_64w_msa(const uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
int8_t *filter, int32_t height) {
- uint32_t loop_cnt, cnt;
- v16i8 src0, src1, src2, src3;
- v16i8 filt0, filt1, filt2, filt3;
- v16u8 mask0, mask1, mask2, mask3;
+ int32_t loop_cnt;
+ v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3;
+ v16u8 mask0, mask1, mask2, mask3, out;
v8i16 filt, out0, out1, out2, out3;
- mask0 = LOAD_UB(&mc_filt_mask_arr[0]);
-
+ mask0 = LD_UB(&mc_filt_mask_arr[0]);
src -= 3;
/* rearranging filter */
- filt = LOAD_SH(filter);
- filt0 = (v16i8)__msa_splati_h(filt, 0);
- filt1 = (v16i8)__msa_splati_h(filt, 1);
- filt2 = (v16i8)__msa_splati_h(filt, 2);
- filt3 = (v16i8)__msa_splati_h(filt, 3);
+ filt = LD_SH(filter);
+ SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
mask1 = mask0 + 2;
mask2 = mask0 + 4;
mask3 = mask0 + 6;
for (loop_cnt = height; loop_cnt--;) {
- for (cnt = 0; cnt < 2; ++cnt) {
- src0 = LOAD_SB(&src[cnt << 5]);
- src2 = LOAD_SB(&src[16 + (cnt << 5)]);
- src3 = LOAD_SB(&src[24 + (cnt << 5)]);
- src1 = __msa_sld_b((v16i8)src2, (v16i8)src0, 8);
-
- XORI_B_4VECS_SB(src0, src1, src2, src3, src0, src1, src2, src3, 128);
-
- HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
- mask3, filt0, filt1, filt2, filt3, out0, out1,
- out2, out3);
-
- out0 = SRARI_SATURATE_SIGNED_H(out0, FILTER_BITS, 7);
- out1 = SRARI_SATURATE_SIGNED_H(out1, FILTER_BITS, 7);
- out2 = SRARI_SATURATE_SIGNED_H(out2, FILTER_BITS, 7);
- out3 = SRARI_SATURATE_SIGNED_H(out3, FILTER_BITS, 7);
-
- PCKEV_B_XORI128_STORE_VEC(out1, out0, &dst[cnt << 5]);
- PCKEV_B_XORI128_STORE_VEC(out3, out2, &dst[16 + (cnt << 5)]);
- }
+ src0 = LD_SB(src);
+ src2 = LD_SB(src + 16);
+ src3 = LD_SB(src + 24);
+ src1 = __msa_sldi_b(src2, src0, 8);
+ XORI_B4_128_SB(src0, src1, src2, src3);
+ HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
+ mask3, filt0, filt1, filt2, filt3, out0, out1,
+ out2, out3);
+ SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
+ SAT_SH4_SH(out0, out1, out2, out3, 7);
+ out = PCKEV_XORI128_UB(out0, out1);
+ ST_UB(out, dst);
+ out = PCKEV_XORI128_UB(out2, out3);
+ ST_UB(out, dst + 16);
+
+ src0 = LD_SB(src + 32);
+ src2 = LD_SB(src + 48);
+ src3 = LD_SB(src + 56);
+ src1 = __msa_sldi_b(src2, src0, 8);
src += src_stride;
+
+ XORI_B4_128_SB(src0, src1, src2, src3);
+ HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2,
+ mask3, filt0, filt1, filt2, filt3, out0, out1,
+ out2, out3);
+ SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS);
+ SAT_SH4_SH(out0, out1, out2, out3, 7);
+ out = PCKEV_XORI128_UB(out0, out1);
+ ST_UB(out, dst + 32);
+ out = PCKEV_XORI128_UB(out2, out3);
+ ST_UB(out, dst + 48);
dst += dst_stride;
}
}
@@ -372,124 +316,55 @@ static void common_hz_8t_64w_msa(const uint8_t *src, int32_t src_stride,
static void common_hz_2t_4x4_msa(const uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
int8_t *filter) {
- uint32_t out0, out1, out2, out3;
v16i8 src0, src1, src2, src3, mask;
- v16u8 vec0, vec1, filt0;
- v16i8 res0, res1;
+ v16u8 filt0, vec0, vec1, res0, res1;
v8u16 vec2, vec3, filt, const255;
- mask = LOAD_SB(&mc_filt_mask_arr[16]);
+ mask = LD_SB(&mc_filt_mask_arr[16]);
/* rearranging filter */
- filt = LOAD_UH(filter);
- filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
-
- const255 = (v8u16)__msa_ldi_h(255);
-
- LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3);
-
- vec0 = (v16u8)__msa_vshf_b(mask, src1, src0);
- vec1 = (v16u8)__msa_vshf_b(mask, src3, src2);
-
- vec2 = __msa_dotp_u_h(vec0, filt0);
- vec3 = __msa_dotp_u_h(vec1, filt0);
-
- vec2 = (v8u16)__msa_srari_h((v8i16)vec2, FILTER_BITS);
- vec3 = (v8u16)__msa_srari_h((v8i16)vec3, FILTER_BITS);
-
- vec2 = __msa_min_u_h(vec2, const255);
- vec3 = __msa_min_u_h(vec3, const255);
-
- res0 = __msa_pckev_b((v16i8)vec2, (v16i8)vec2);
- res1 = __msa_pckev_b((v16i8)vec3, (v16i8)vec3);
-
- out0 = __msa_copy_u_w((v4i32)res0, 0);
- out1 = __msa_copy_u_w((v4i32)res0, 1);
- out2 = __msa_copy_u_w((v4i32)res1, 0);
- out3 = __msa_copy_u_w((v4i32)res1, 1);
-
- STORE_WORD(dst, out0);
- dst += dst_stride;
- STORE_WORD(dst, out1);
- dst += dst_stride;
- STORE_WORD(dst, out2);
- dst += dst_stride;
- STORE_WORD(dst, out3);
+ filt = LD_UH(filter);
+ filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+ const255 = (v8u16) __msa_ldi_h(255);
+
+ LD_SB4(src, src_stride, src0, src1, src2, src3);
+ VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1);
+ DOTP_UB2_UH(vec0, vec1, filt0, filt0, vec2, vec3);
+ SRARI_H2_UH(vec2, vec3, FILTER_BITS);
+ MIN_UH2_UH(vec2, vec3, const255);
+ PCKEV_B2_UB(vec2, vec2, vec3, vec3, res0, res1);
+ ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
}
static void common_hz_2t_4x8_msa(const uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
int8_t *filter) {
- uint32_t out0, out1, out2, out3;
- v16u8 filt0;
+ v16u8 vec0, vec1, vec2, vec3, filt0;
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
- v16u8 vec0, vec1, vec2, vec3;
- v8u16 vec4, vec5, vec6, vec7;
v16i8 res0, res1, res2, res3;
- v8u16 filt, const255;
+ v8u16 vec4, vec5, vec6, vec7, filt, const255;
- mask = LOAD_SB(&mc_filt_mask_arr[16]);
+ mask = LD_SB(&mc_filt_mask_arr[16]);
/* rearranging filter */
- filt = LOAD_UH(filter);
- filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
-
- const255 = (v8u16)__msa_ldi_h(255);
-
- LOAD_8VECS_SB(src, src_stride,
- src0, src1, src2, src3, src4, src5, src6, src7);
-
- vec0 = (v16u8)__msa_vshf_b(mask, src1, src0);
- vec1 = (v16u8)__msa_vshf_b(mask, src3, src2);
- vec2 = (v16u8)__msa_vshf_b(mask, src5, src4);
- vec3 = (v16u8)__msa_vshf_b(mask, src7, src6);
-
- vec4 = __msa_dotp_u_h(vec0, filt0);
- vec5 = __msa_dotp_u_h(vec1, filt0);
- vec6 = __msa_dotp_u_h(vec2, filt0);
- vec7 = __msa_dotp_u_h(vec3, filt0);
-
- vec4 = (v8u16)__msa_srari_h((v8i16)vec4, FILTER_BITS);
- vec5 = (v8u16)__msa_srari_h((v8i16)vec5, FILTER_BITS);
- vec6 = (v8u16)__msa_srari_h((v8i16)vec6, FILTER_BITS);
- vec7 = (v8u16)__msa_srari_h((v8i16)vec7, FILTER_BITS);
-
- vec4 = __msa_min_u_h(vec4, const255);
- vec5 = __msa_min_u_h(vec5, const255);
- vec6 = __msa_min_u_h(vec6, const255);
- vec7 = __msa_min_u_h(vec7, const255);
-
- res0 = __msa_pckev_b((v16i8)vec4, (v16i8)vec4);
- res1 = __msa_pckev_b((v16i8)vec5, (v16i8)vec5);
- res2 = __msa_pckev_b((v16i8)vec6, (v16i8)vec6);
- res3 = __msa_pckev_b((v16i8)vec7, (v16i8)vec7);
-
- out0 = __msa_copy_u_w((v4i32)res0, 0);
- out1 = __msa_copy_u_w((v4i32)res0, 1);
- out2 = __msa_copy_u_w((v4i32)res1, 0);
- out3 = __msa_copy_u_w((v4i32)res1, 1);
-
- STORE_WORD(dst, out0);
- dst += dst_stride;
- STORE_WORD(dst, out1);
- dst += dst_stride;
- STORE_WORD(dst, out2);
- dst += dst_stride;
- STORE_WORD(dst, out3);
- dst += dst_stride;
-
- out0 = __msa_copy_u_w((v4i32)res2, 0);
- out1 = __msa_copy_u_w((v4i32)res2, 1);
- out2 = __msa_copy_u_w((v4i32)res3, 0);
- out3 = __msa_copy_u_w((v4i32)res3, 1);
-
- STORE_WORD(dst, out0);
- dst += dst_stride;
- STORE_WORD(dst, out1);
- dst += dst_stride;
- STORE_WORD(dst, out2);
- dst += dst_stride;
- STORE_WORD(dst, out3);
+ filt = LD_UH(filter);
+ filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+ const255 = (v8u16) __msa_ldi_h(255);
+
+ LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+ VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1);
+ VSHF_B2_UB(src4, src5, src6, src7, mask, mask, vec2, vec3);
+ DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec4, vec5,
+ vec6, vec7);
+ SRARI_H4_UH(vec4, vec5, vec6, vec7, FILTER_BITS);
+ MIN_UH4_UH(vec4, vec5, vec6, vec7, const255);
+ PCKEV_B4_SB(vec4, vec4, vec5, vec5, vec6, vec6, vec7, vec7, res0, res1,
+ res2, res3);
+ ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
+ dst += (4 * dst_stride);
+ ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride);
}
static void common_hz_2t_4w_msa(const uint8_t *src, int32_t src_stride,
@@ -507,149 +382,93 @@ static void common_hz_2t_8x4_msa(const uint8_t *src, int32_t src_stride,
int8_t *filter) {
v16u8 filt0;
v16i8 src0, src1, src2, src3, mask;
- v8u16 vec0, vec1, vec2, vec3;
- v8u16 out0, out1, out2, out3;
- v8u16 const255, filt;
+ v8u16 vec0, vec1, vec2, vec3, const255, filt;
- mask = LOAD_SB(&mc_filt_mask_arr[0]);
+ mask = LD_SB(&mc_filt_mask_arr[0]);
/* rearranging filter */
- filt = LOAD_UH(filter);
- filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
-
- const255 = (v8u16)__msa_ldi_h(255);
-
- LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3);
-
- vec0 = (v8u16)__msa_vshf_b(mask, src0, src0);
- vec1 = (v8u16)__msa_vshf_b(mask, src1, src1);
- vec2 = (v8u16)__msa_vshf_b(mask, src2, src2);
- vec3 = (v8u16)__msa_vshf_b(mask, src3, src3);
-
- vec0 = __msa_dotp_u_h((v16u8)vec0, filt0);
- vec1 = __msa_dotp_u_h((v16u8)vec1, filt0);
- vec2 = __msa_dotp_u_h((v16u8)vec2, filt0);
- vec3 = __msa_dotp_u_h((v16u8)vec3, filt0);
-
- SRARI_H_4VECS_UH(vec0, vec1, vec2, vec3, vec0, vec1, vec2, vec3, FILTER_BITS);
-
- out0 = __msa_min_u_h(vec0, const255);
- out1 = __msa_min_u_h(vec1, const255);
- out2 = __msa_min_u_h(vec2, const255);
- out3 = __msa_min_u_h(vec3, const255);
-
- PCKEV_B_STORE_8_BYTES_4(out0, out1, out2, out3, dst, dst_stride);
+ filt = LD_UH(filter);
+ filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+ const255 = (v8u16) __msa_ldi_h(255);
+
+ LD_SB4(src, src_stride, src0, src1, src2, src3);
+ VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+ VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+ DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
+ vec2, vec3);
+ SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
+ MIN_UH4_UH(vec0, vec1, vec2, vec3, const255);
+ PCKEV_B2_SB(vec1, vec0, vec3, vec2, src0, src1);
+ ST8x4_UB(src0, src1, dst, dst_stride);
}
static void common_hz_2t_8x8mult_msa(const uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
int8_t *filter, int32_t height) {
v16u8 filt0;
- v16i8 src0, src1, src2, src3, mask;
- v8u16 vec0, vec1, vec2, vec3;
- v8u16 filt, const255;
+ v16i8 src0, src1, src2, src3, mask, out0, out1;
+ v8u16 vec0, vec1, vec2, vec3, filt, const255;
- mask = LOAD_SB(&mc_filt_mask_arr[0]);
+ mask = LD_SB(&mc_filt_mask_arr[0]);
/* rearranging filter */
- filt = LOAD_UH(filter);
- filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+ filt = LD_UH(filter);
+ filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
- const255 = (v8u16)__msa_ldi_h(255);
+ const255 = (v8u16) __msa_ldi_h(255);
- LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3);
+ LD_SB4(src, src_stride, src0, src1, src2, src3);
src += (4 * src_stride);
- vec0 = (v8u16)__msa_vshf_b(mask, src0, src0);
- vec1 = (v8u16)__msa_vshf_b(mask, src1, src1);
- vec2 = (v8u16)__msa_vshf_b(mask, src2, src2);
- vec3 = (v8u16)__msa_vshf_b(mask, src3, src3);
-
- vec0 = __msa_dotp_u_h((v16u8)vec0, filt0);
- vec1 = __msa_dotp_u_h((v16u8)vec1, filt0);
- vec2 = __msa_dotp_u_h((v16u8)vec2, filt0);
- vec3 = __msa_dotp_u_h((v16u8)vec3, filt0);
+ VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+ VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+ DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
+ vec2, vec3);
+ SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
+ MIN_UH4_UH(vec0, vec1, vec2, vec3, const255);
- SRARI_H_4VECS_UH(vec0, vec1, vec2, vec3, vec0, vec1, vec2, vec3, FILTER_BITS);
-
- vec0 = __msa_min_u_h(vec0, const255);
- vec1 = __msa_min_u_h(vec1, const255);
- vec2 = __msa_min_u_h(vec2, const255);
- vec3 = __msa_min_u_h(vec3, const255);
-
- LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3);
+ LD_SB4(src, src_stride, src0, src1, src2, src3);
src += (4 * src_stride);
- PCKEV_B_STORE_8_BYTES_4(vec0, vec1, vec2, vec3, dst, dst_stride);
+ PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
+ ST8x4_UB(out0, out1, dst, dst_stride);
dst += (4 * dst_stride);
- vec0 = (v8u16)__msa_vshf_b(mask, src0, src0);
- vec1 = (v8u16)__msa_vshf_b(mask, src1, src1);
- vec2 = (v8u16)__msa_vshf_b(mask, src2, src2);
- vec3 = (v8u16)__msa_vshf_b(mask, src3, src3);
-
- vec0 = __msa_dotp_u_h((v16u8)vec0, filt0);
- vec1 = __msa_dotp_u_h((v16u8)vec1, filt0);
- vec2 = __msa_dotp_u_h((v16u8)vec2, filt0);
- vec3 = __msa_dotp_u_h((v16u8)vec3, filt0);
-
- SRARI_H_4VECS_UH(vec0, vec1, vec2, vec3, vec0, vec1, vec2, vec3, FILTER_BITS);
-
- vec0 = __msa_min_u_h(vec0, const255);
- vec1 = __msa_min_u_h(vec1, const255);
- vec2 = __msa_min_u_h(vec2, const255);
- vec3 = __msa_min_u_h(vec3, const255);
-
- PCKEV_B_STORE_8_BYTES_4(vec0, vec1, vec2, vec3, dst, dst_stride);
+ VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+ VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+ DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
+ vec2, vec3);
+ SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
+ MIN_UH4_UH(vec0, vec1, vec2, vec3, const255);
+ PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
+ ST8x4_UB(out0, out1, dst, dst_stride);
dst += (4 * dst_stride);
if (16 == height) {
- LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3);
+ LD_SB4(src, src_stride, src0, src1, src2, src3);
src += (4 * src_stride);
- vec0 = (v8u16)__msa_vshf_b(mask, src0, src0);
- vec1 = (v8u16)__msa_vshf_b(mask, src1, src1);
- vec2 = (v8u16)__msa_vshf_b(mask, src2, src2);
- vec3 = (v8u16)__msa_vshf_b(mask, src3, src3);
-
- vec0 = __msa_dotp_u_h((v16u8)vec0, filt0);
- vec1 = __msa_dotp_u_h((v16u8)vec1, filt0);
- vec2 = __msa_dotp_u_h((v16u8)vec2, filt0);
- vec3 = __msa_dotp_u_h((v16u8)vec3, filt0);
-
- SRARI_H_4VECS_UH(vec0, vec1, vec2, vec3,
- vec0, vec1, vec2, vec3, FILTER_BITS);
-
- vec0 = __msa_min_u_h(vec0, const255);
- vec1 = __msa_min_u_h(vec1, const255);
- vec2 = __msa_min_u_h(vec2, const255);
- vec3 = __msa_min_u_h(vec3, const255);
-
- LOAD_4VECS_SB(src, src_stride, src0, src1, src2, src3);
+ VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+ VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+ DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
+ vec2, vec3);
+ SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
+ MIN_UH4_UH(vec0, vec1, vec2, vec3, const255);
+ LD_SB4(src, src_stride, src0, src1, src2, src3);
src += (4 * src_stride);
- PCKEV_B_STORE_8_BYTES_4(vec0, vec1, vec2, vec3, dst, dst_stride);
- dst += (4 * dst_stride);
-
- vec0 = (v8u16)__msa_vshf_b(mask, src0, src0);
- vec1 = (v8u16)__msa_vshf_b(mask, src1, src1);
- vec2 = (v8u16)__msa_vshf_b(mask, src2, src2);
- vec3 = (v8u16)__msa_vshf_b(mask, src3, src3);
-
- vec0 = __msa_dotp_u_h((v16u8)vec0, filt0);
- vec1 = __msa_dotp_u_h((v16u8)vec1, filt0);
- vec2 = __msa_dotp_u_h((v16u8)vec2, filt0);
- vec3 = __msa_dotp_u_h((v16u8)vec3, filt0);
-
- SRARI_H_4VECS_UH(vec0, vec1, vec2, vec3,
- vec0, vec1, vec2, vec3, FILTER_BITS);
-
- vec0 = __msa_min_u_h(vec0, const255);
- vec1 = __msa_min_u_h(vec1, const255);
- vec2 = __msa_min_u_h(vec2, const255);
- vec3 = __msa_min_u_h(vec3, const255);
-
- PCKEV_B_STORE_8_BYTES_4(vec0, vec1, vec2, vec3, dst, dst_stride);
+ PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
+ ST8x4_UB(out0, out1, dst, dst_stride);
+
+ VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1);
+ VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3);
+ DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1,
+ vec2, vec3);
+ SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS);
+ MIN_UH4_UH(vec0, vec1, vec2, vec3, const255);
+ PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1);
+ ST8x4_UB(out0, out1, dst + 4 * dst_stride, dst_stride);
}
}
@@ -668,136 +487,68 @@ static void common_hz_2t_16w_msa(const uint8_t *src, int32_t src_stride,
int8_t *filter, int32_t height) {
uint32_t loop_cnt;
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
- v16u8 filt0;
- v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
- v8u16 out0, out1, out2, out3, out4, out5, out6, out7;
- v8u16 filt, const255;
+ v16u8 filt0, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+ v8u16 out0, out1, out2, out3, out4, out5, out6, out7, filt, const255;
- mask = LOAD_SB(&mc_filt_mask_arr[0]);
+ mask = LD_SB(&mc_filt_mask_arr[0]);
loop_cnt = (height >> 2) - 1;
/* rearranging filter */
- filt = LOAD_UH(filter);
- filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
-
- const255 = (v8u16)__msa_ldi_h(255);
-
- src0 = LOAD_SB(src);
- src1 = LOAD_SB(src + 8);
- src += src_stride;
- src2 = LOAD_SB(src);
- src3 = LOAD_SB(src + 8);
- src += src_stride;
- src4 = LOAD_SB(src);
- src5 = LOAD_SB(src + 8);
- src += src_stride;
- src6 = LOAD_SB(src);
- src7 = LOAD_SB(src + 8);
- src += src_stride;
-
- vec0 = (v16u8)__msa_vshf_b(mask, src0, src0);
- vec1 = (v16u8)__msa_vshf_b(mask, src1, src1);
- vec2 = (v16u8)__msa_vshf_b(mask, src2, src2);
- vec3 = (v16u8)__msa_vshf_b(mask, src3, src3);
- vec4 = (v16u8)__msa_vshf_b(mask, src4, src4);
- vec5 = (v16u8)__msa_vshf_b(mask, src5, src5);
- vec6 = (v16u8)__msa_vshf_b(mask, src6, src6);
- vec7 = (v16u8)__msa_vshf_b(mask, src7, src7);
-
- out0 = __msa_dotp_u_h(vec0, filt0);
- out1 = __msa_dotp_u_h(vec1, filt0);
- out2 = __msa_dotp_u_h(vec2, filt0);
- out3 = __msa_dotp_u_h(vec3, filt0);
- out4 = __msa_dotp_u_h(vec4, filt0);
- out5 = __msa_dotp_u_h(vec5, filt0);
- out6 = __msa_dotp_u_h(vec6, filt0);
- out7 = __msa_dotp_u_h(vec7, filt0);
-
- out0 = (v8u16)__msa_srari_h((v8i16)out0, FILTER_BITS);
- out1 = (v8u16)__msa_srari_h((v8i16)out1, FILTER_BITS);
- out2 = (v8u16)__msa_srari_h((v8i16)out2, FILTER_BITS);
- out3 = (v8u16)__msa_srari_h((v8i16)out3, FILTER_BITS);
- out4 = (v8u16)__msa_srari_h((v8i16)out4, FILTER_BITS);
- out5 = (v8u16)__msa_srari_h((v8i16)out5, FILTER_BITS);
- out6 = (v8u16)__msa_srari_h((v8i16)out6, FILTER_BITS);
- out7 = (v8u16)__msa_srari_h((v8i16)out7, FILTER_BITS);
-
- out0 = __msa_min_u_h(out0, const255);
- out1 = __msa_min_u_h(out1, const255);
- out2 = __msa_min_u_h(out2, const255);
- out3 = __msa_min_u_h(out3, const255);
- out4 = __msa_min_u_h(out4, const255);
- out5 = __msa_min_u_h(out5, const255);
- out6 = __msa_min_u_h(out6, const255);
- out7 = __msa_min_u_h(out7, const255);
-
- PCKEV_B_STORE_VEC(out1, out0, dst);
+ filt = LD_UH(filter);
+ filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
+
+ const255 = (v8u16) __msa_ldi_h(255);
+
+ LD_SB4(src, src_stride, src0, src2, src4, src6);
+ LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
+ src += (4 * src_stride);
+
+ VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
+ VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
+ VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
+ VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
+ DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, out0, out1,
+ out2, out3);
+ DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, out4, out5,
+ out6, out7);
+ SRARI_H4_UH(out0, out1, out2, out3, FILTER_BITS);
+ SRARI_H4_UH(out4, out5, out6, out7, FILTER_BITS);
+ MIN_UH4_UH(out0, out1, out2, out3, const255);
+ MIN_UH4_UH(out4, out5, out6, out7, const255);
+ PCKEV_ST_SB(out0, out1, dst);
dst += dst_stride;
- PCKEV_B_STORE_VEC(out3, out2, dst);
+ PCKEV_ST_SB(out2, out3, dst);
dst += dst_stride;
- PCKEV_B_STORE_VEC(out5, out4, dst);
+ PCKEV_ST_SB(out4, out5, dst);
dst += dst_stride;
- PCKEV_B_STORE_VEC(out7, out6, dst);
+ PCKEV_ST_SB(out6, out7, dst);
dst += dst_stride;
for (; loop_cnt--;) {
- src0 = LOAD_SB(src);
- src1 = LOAD_SB(src + 8);
- src += src_stride;
- src2 = LOAD_SB(src);
- src3 = LOAD_SB(src + 8);
- src += src_stride;
- src4 = LOAD_SB(src);
- src5 = LOAD_SB(src + 8);
- src += src_stride;
- src6 = LOAD_SB(src);
- src7 = LOAD_SB(src + 8);
- src += src_stride;
+ LD_SB4(src, src_stride, src0, src2, src4, src6);
+ LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
+ src += (4 * src_stride);
- vec0 = (v16u8)__msa_vshf_b(mask, src0, src0);
- vec1 = (v16u8)__msa_vshf_b(mask, src1, src1);
- vec2 = (v16u8)__msa_vshf_b(mask, src2, src2);
- vec3 = (v16u8)__msa_vshf_b(mask, src3, src3);
- vec4 = (v16u8)__msa_vshf_b(mask, src4, src4);
- vec5 = (v16u8)__msa_vshf_b(mask, src5, src5);
- vec6 = (v16u8)__msa_vshf_b(mask, src6, src6);
- vec7 = (v16u8)__msa_vshf_b(mask, src7, src7);
-
- out0 = __msa_dotp_u_h(vec0, filt0);
- out1 = __msa_dotp_u_h(vec1, filt0);
- out2 = __msa_dotp_u_h(vec2, filt0);
- out3 = __msa_dotp_u_h(vec3, filt0);
- out4 = __msa_dotp_u_h(vec4, filt0);
- out5 = __msa_dotp_u_h(vec5, filt0);
- out6 = __msa_dotp_u_h(vec6, filt0);
- out7 = __msa_dotp_u_h(vec7, filt0);
-
- out0 = (v8u16)__msa_srari_h((v8i16)out0, FILTER_BITS);
- out1 = (v8u16)__msa_srari_h((v8i16)out1, FILTER_BITS);
- out2 = (v8u16)__msa_srari_h((v8i16)out2, FILTER_BITS);
- out3 = (v8u16)__msa_srari_h((v8i16)out3, FILTER_BITS);
- out4 = (v8u16)__msa_srari_h((v8i16)out4, FILTER_BITS);
- out5 = (v8u16)__msa_srari_h((v8i16)out5, FILTER_BITS);
- out6 = (v8u16)__msa_srari_h((v8i16)out6, FILTER_BITS);
- out7 = (v8u16)__msa_srari_h((v8i16)out7, FILTER_BITS);
-
- out0 = __msa_min_u_h(out0, const255);
- out1 = __msa_min_u_h(out1, const255);
- out2 = __msa_min_u_h(out2, const255);
- out3 = __msa_min_u_h(out3, const255);
- out4 = __msa_min_u_h(out4, const255);
- out5 = __msa_min_u_h(out5, const255);
- out6 = __msa_min_u_h(out6, const255);
- out7 = __msa_min_u_h(out7, const255);
-
- PCKEV_B_STORE_VEC(out1, out0, dst);
+ VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
+ VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
+ VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
+ VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
+ DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, out0, out1,
+ out2, out3);
+ DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, out4, out5,
+ out6, out7);
+ SRARI_H4_UH(out0, out1, out2, out3, FILTER_BITS);
+ SRARI_H4_UH(out4, out5, out6, out7, FILTER_BITS);
+ MIN_UH4_UH(out0, out1, out2, out3, const255);
+ MIN_UH4_UH(out4, out5, out6, out7, const255);
+ PCKEV_ST_SB(out0, out1, dst);
dst += dst_stride;
- PCKEV_B_STORE_VEC(out3, out2, dst);
+ PCKEV_ST_SB(out2, out3, dst);
dst += dst_stride;
- PCKEV_B_STORE_VEC(out5, out4, dst);
+ PCKEV_ST_SB(out4, out5, dst);
dst += dst_stride;
- PCKEV_B_STORE_VEC(out7, out6, dst);
+ PCKEV_ST_SB(out6, out7, dst);
dst += dst_stride;
}
}
@@ -807,72 +558,46 @@ static void common_hz_2t_32w_msa(const uint8_t *src, int32_t src_stride,
int8_t *filter, int32_t height) {
uint32_t loop_cnt;
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
- v16u8 filt0;
- v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
- v8u16 out0, out1, out2, out3, out4, out5, out6, out7;
- v8u16 filt, const255;
+ v16u8 filt0, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+ v8u16 out0, out1, out2, out3, out4, out5, out6, out7, filt, const255;
- mask = LOAD_SB(&mc_filt_mask_arr[0]);
+ mask = LD_SB(&mc_filt_mask_arr[0]);
/* rearranging filter */
- filt = LOAD_UH(filter);
- filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+ filt = LD_UH(filter);
+ filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
- const255 = (v8u16)__msa_ldi_h(255);
+ const255 = (v8u16) __msa_ldi_h(255);
for (loop_cnt = height >> 1; loop_cnt--;) {
- src0 = LOAD_SB(src);
- src2 = LOAD_SB(src + 16);
- src3 = LOAD_SB(src + 24);
- src1 = __msa_sld_b(src2, src0, 8);
+ src0 = LD_SB(src);
+ src2 = LD_SB(src + 16);
+ src3 = LD_SB(src + 24);
+ src1 = __msa_sldi_b(src2, src0, 8);
src += src_stride;
- src4 = LOAD_SB(src);
- src6 = LOAD_SB(src + 16);
- src7 = LOAD_SB(src + 24);
- src5 = __msa_sld_b(src6, src4, 8);
+ src4 = LD_SB(src);
+ src6 = LD_SB(src + 16);
+ src7 = LD_SB(src + 24);
+ src5 = __msa_sldi_b(src6, src4, 8);
src += src_stride;
- vec0 = (v16u8)__msa_vshf_b(mask, src0, src0);
- vec1 = (v16u8)__msa_vshf_b(mask, src1, src1);
- vec2 = (v16u8)__msa_vshf_b(mask, src2, src2);
- vec3 = (v16u8)__msa_vshf_b(mask, src3, src3);
- vec4 = (v16u8)__msa_vshf_b(mask, src4, src4);
- vec5 = (v16u8)__msa_vshf_b(mask, src5, src5);
- vec6 = (v16u8)__msa_vshf_b(mask, src6, src6);
- vec7 = (v16u8)__msa_vshf_b(mask, src7, src7);
-
- out0 = __msa_dotp_u_h(vec0, filt0);
- out1 = __msa_dotp_u_h(vec1, filt0);
- out2 = __msa_dotp_u_h(vec2, filt0);
- out3 = __msa_dotp_u_h(vec3, filt0);
- out4 = __msa_dotp_u_h(vec4, filt0);
- out5 = __msa_dotp_u_h(vec5, filt0);
- out6 = __msa_dotp_u_h(vec6, filt0);
- out7 = __msa_dotp_u_h(vec7, filt0);
-
- out0 = (v8u16)__msa_srari_h((v8i16)out0, FILTER_BITS);
- out1 = (v8u16)__msa_srari_h((v8i16)out1, FILTER_BITS);
- out2 = (v8u16)__msa_srari_h((v8i16)out2, FILTER_BITS);
- out3 = (v8u16)__msa_srari_h((v8i16)out3, FILTER_BITS);
- out4 = (v8u16)__msa_srari_h((v8i16)out4, FILTER_BITS);
- out5 = (v8u16)__msa_srari_h((v8i16)out5, FILTER_BITS);
- out6 = (v8u16)__msa_srari_h((v8i16)out6, FILTER_BITS);
- out7 = (v8u16)__msa_srari_h((v8i16)out7, FILTER_BITS);
-
- out0 = __msa_min_u_h(out0, const255);
- out1 = __msa_min_u_h(out1, const255);
- out2 = __msa_min_u_h(out2, const255);
- out3 = __msa_min_u_h(out3, const255);
- out4 = __msa_min_u_h(out4, const255);
- out5 = __msa_min_u_h(out5, const255);
- out6 = __msa_min_u_h(out6, const255);
- out7 = __msa_min_u_h(out7, const255);
-
- PCKEV_B_STORE_VEC(out1, out0, dst);
- PCKEV_B_STORE_VEC(out3, out2, dst + 16);
+ VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
+ VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
+ VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
+ VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
+ DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, out0, out1,
+ out2, out3);
+ DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, out4, out5,
+ out6, out7);
+ SRARI_H4_UH(out0, out1, out2, out3, FILTER_BITS);
+ SRARI_H4_UH(out4, out5, out6, out7, FILTER_BITS);
+ MIN_UH4_UH(out0, out1, out2, out3, const255);
+ MIN_UH4_UH(out4, out5, out6, out7, const255);
+ PCKEV_ST_SB(out0, out1, dst);
+ PCKEV_ST_SB(out2, out3, dst + 16);
dst += dst_stride;
- PCKEV_B_STORE_VEC(out5, out4, dst);
- PCKEV_B_STORE_VEC(out7, out6, dst + 16);
+ PCKEV_ST_SB(out4, out5, dst);
+ PCKEV_ST_SB(out6, out7, dst + 16);
dst += dst_stride;
}
}
@@ -882,70 +607,42 @@ static void common_hz_2t_64w_msa(const uint8_t *src, int32_t src_stride,
int8_t *filter, int32_t height) {
uint32_t loop_cnt;
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
- v16u8 filt0;
- v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
- v8u16 out0, out1, out2, out3, out4, out5, out6, out7;
- v8u16 filt, const255;
+ v16u8 filt0, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+ v8u16 out0, out1, out2, out3, out4, out5, out6, out7, filt, const255;
- mask = LOAD_SB(&mc_filt_mask_arr[0]);
+ mask = LD_SB(&mc_filt_mask_arr[0]);
/* rearranging filter */
- filt = LOAD_UH(filter);
- filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+ filt = LD_UH(filter);
+ filt0 = (v16u8) __msa_splati_h((v8i16) filt, 0);
- const255 = (v8u16)__msa_ldi_h(255);
+ const255 = (v8u16) __msa_ldi_h(255);
for (loop_cnt = height; loop_cnt--;) {
- src0 = LOAD_SB(src);
- src2 = LOAD_SB(src + 16);
- src4 = LOAD_SB(src + 32);
- src6 = LOAD_SB(src + 48);
- src7 = LOAD_SB(src + 56);
- src1 = __msa_sld_b(src2, src0, 8);
- src3 = __msa_sld_b(src4, src2, 8);
- src5 = __msa_sld_b(src6, src4, 8);
+ src0 = LD_SB(src);
+ src2 = LD_SB(src + 16);
+ src4 = LD_SB(src + 32);
+ src6 = LD_SB(src + 48);
+ src7 = LD_SB(src + 56);
+ SLDI_B3_SB(src2, src4, src6, src0, src2, src4, src1, src3, src5, 8);
src += src_stride;
- vec0 = (v16u8)__msa_vshf_b(mask, src0, src0);
- vec1 = (v16u8)__msa_vshf_b(mask, src1, src1);
- vec2 = (v16u8)__msa_vshf_b(mask, src2, src2);
- vec3 = (v16u8)__msa_vshf_b(mask, src3, src3);
- vec4 = (v16u8)__msa_vshf_b(mask, src4, src4);
- vec5 = (v16u8)__msa_vshf_b(mask, src5, src5);
- vec6 = (v16u8)__msa_vshf_b(mask, src6, src6);
- vec7 = (v16u8)__msa_vshf_b(mask, src7, src7);
-
- out0 = __msa_dotp_u_h(vec0, filt0);
- out1 = __msa_dotp_u_h(vec1, filt0);
- out2 = __msa_dotp_u_h(vec2, filt0);
- out3 = __msa_dotp_u_h(vec3, filt0);
- out4 = __msa_dotp_u_h(vec4, filt0);
- out5 = __msa_dotp_u_h(vec5, filt0);
- out6 = __msa_dotp_u_h(vec6, filt0);
- out7 = __msa_dotp_u_h(vec7, filt0);
-
- out0 = (v8u16)__msa_srari_h((v8i16)out0, FILTER_BITS);
- out1 = (v8u16)__msa_srari_h((v8i16)out1, FILTER_BITS);
- out2 = (v8u16)__msa_srari_h((v8i16)out2, FILTER_BITS);
- out3 = (v8u16)__msa_srari_h((v8i16)out3, FILTER_BITS);
- out4 = (v8u16)__msa_srari_h((v8i16)out4, FILTER_BITS);
- out5 = (v8u16)__msa_srari_h((v8i16)out5, FILTER_BITS);
- out6 = (v8u16)__msa_srari_h((v8i16)out6, FILTER_BITS);
- out7 = (v8u16)__msa_srari_h((v8i16)out7, FILTER_BITS);
-
- out0 = __msa_min_u_h(out0, const255);
- out1 = __msa_min_u_h(out1, const255);
- out2 = __msa_min_u_h(out2, const255);
- out3 = __msa_min_u_h(out3, const255);
- out4 = __msa_min_u_h(out4, const255);
- out5 = __msa_min_u_h(out5, const255);
- out6 = __msa_min_u_h(out6, const255);
- out7 = __msa_min_u_h(out7, const255);
-
- PCKEV_B_STORE_VEC(out1, out0, dst);
- PCKEV_B_STORE_VEC(out3, out2, dst + 16);
- PCKEV_B_STORE_VEC(out5, out4, dst + 32);
- PCKEV_B_STORE_VEC(out7, out6, dst + 48);
+ VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1);
+ VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3);
+ VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5);
+ VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7);
+ DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, out0, out1,
+ out2, out3);
+ DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, out4, out5,
+ out6, out7);
+ SRARI_H4_UH(out0, out1, out2, out3, FILTER_BITS);
+ SRARI_H4_UH(out4, out5, out6, out7, FILTER_BITS);
+ MIN_UH4_UH(out0, out1, out2, out3, const255);
+ MIN_UH4_UH(out4, out5, out6, out7, const255);
+ PCKEV_ST_SB(out0, out1, dst);
+ PCKEV_ST_SB(out2, out3, dst + 16);
+ PCKEV_ST_SB(out4, out5, dst + 32);
+ PCKEV_ST_SB(out6, out7, dst + 48);
dst += dst_stride;
}
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_msa.c
index d0c374648c9..b1279d97ce5 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_msa.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_msa.c
@@ -26,93 +26,68 @@ static void common_hv_8ht_8vt_4w_msa(const uint8_t *src, int32_t src_stride,
int32_t height) {
uint32_t loop_cnt;
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
- v16i8 filt_horiz0, filt_horiz1, filt_horiz2, filt_horiz3;
- v16u8 mask0, mask1, mask2, mask3;
- v8i16 filt_horiz;
- v8i16 horiz_out0, horiz_out1, horiz_out2, horiz_out3, horiz_out4;
- v8i16 horiz_out5, horiz_out6, horiz_out7, horiz_out8, horiz_out9;
- v8i16 tmp0, tmp1, out0, out1, out2, out3, out4;
- v8i16 filt, filt_vert0, filt_vert1, filt_vert2, filt_vert3;
-
- mask0 = LOAD_UB(&mc_filt_mask_arr[16]);
+ v16i8 filt_hz0, filt_hz1, filt_hz2, filt_hz3;
+ v16u8 mask0, mask1, mask2, mask3, out;
+ v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+ v8i16 hz_out7, hz_out8, hz_out9, tmp0, tmp1, out0, out1, out2, out3, out4;
+ v8i16 filt, filt_vt0, filt_vt1, filt_vt2, filt_vt3;
+ mask0 = LD_UB(&mc_filt_mask_arr[16]);
src -= (3 + 3 * src_stride);
/* rearranging filter */
- filt_horiz = LOAD_SH(filter_horiz);
- filt_horiz0 = (v16i8)__msa_splati_h(filt_horiz, 0);
- filt_horiz1 = (v16i8)__msa_splati_h(filt_horiz, 1);
- filt_horiz2 = (v16i8)__msa_splati_h(filt_horiz, 2);
- filt_horiz3 = (v16i8)__msa_splati_h(filt_horiz, 3);
+ filt = LD_SH(filter_horiz);
+ SPLATI_H4_SB(filt, 0, 1, 2, 3, filt_hz0, filt_hz1, filt_hz2, filt_hz3);
mask1 = mask0 + 2;
mask2 = mask0 + 4;
mask3 = mask0 + 6;
- LOAD_7VECS_SB(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+ LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+ XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
src += (7 * src_stride);
- XORI_B_7VECS_SB(src0, src1, src2, src3, src4, src5, src6,
- src0, src1, src2, src3, src4, src5, src6, 128);
-
- horiz_out0 = HORIZ_8TAP_FILT_2VECS(src0, src1, mask0, mask1, mask2, mask3,
- filt_horiz0, filt_horiz1, filt_horiz2,
- filt_horiz3);
- horiz_out2 = HORIZ_8TAP_FILT_2VECS(src2, src3, mask0, mask1, mask2, mask3,
- filt_horiz0, filt_horiz1, filt_horiz2,
- filt_horiz3);
- horiz_out4 = HORIZ_8TAP_FILT_2VECS(src4, src5, mask0, mask1, mask2, mask3,
- filt_horiz0, filt_horiz1, filt_horiz2,
- filt_horiz3);
- horiz_out5 = HORIZ_8TAP_FILT_2VECS(src5, src6, mask0, mask1, mask2, mask3,
- filt_horiz0, filt_horiz1, filt_horiz2,
- filt_horiz3);
- horiz_out1 = (v8i16)__msa_sldi_b((v16i8)horiz_out2, (v16i8)horiz_out0, 8);
- horiz_out3 = (v8i16)__msa_sldi_b((v16i8)horiz_out4, (v16i8)horiz_out2, 8);
-
- filt = LOAD_SH(filter_vert);
- filt_vert0 = __msa_splati_h(filt, 0);
- filt_vert1 = __msa_splati_h(filt, 1);
- filt_vert2 = __msa_splati_h(filt, 2);
- filt_vert3 = __msa_splati_h(filt, 3);
-
- out0 = (v8i16)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
- out1 = (v8i16)__msa_ilvev_b((v16i8)horiz_out3, (v16i8)horiz_out2);
- out2 = (v8i16)__msa_ilvev_b((v16i8)horiz_out5, (v16i8)horiz_out4);
-
- for (loop_cnt = (height >> 2); loop_cnt--;) {
- LOAD_4VECS_SB(src, src_stride, src7, src8, src9, src10);
- src += (4 * src_stride);
-
- XORI_B_4VECS_SB(src7, src8, src9, src10, src7, src8, src9, src10, 128);
+ hz_out0 = HORIZ_8TAP_FILT(src0, src1, mask0, mask1, mask2, mask3, filt_hz0,
+ filt_hz1, filt_hz2, filt_hz3);
+ hz_out2 = HORIZ_8TAP_FILT(src2, src3, mask0, mask1, mask2, mask3, filt_hz0,
+ filt_hz1, filt_hz2, filt_hz3);
+ hz_out4 = HORIZ_8TAP_FILT(src4, src5, mask0, mask1, mask2, mask3, filt_hz0,
+ filt_hz1, filt_hz2, filt_hz3);
+ hz_out5 = HORIZ_8TAP_FILT(src5, src6, mask0, mask1, mask2, mask3, filt_hz0,
+ filt_hz1, filt_hz2, filt_hz3);
+ SLDI_B2_SH(hz_out2, hz_out4, hz_out0, hz_out2, hz_out1, hz_out3, 8);
- horiz_out7 = HORIZ_8TAP_FILT_2VECS(src7, src8, mask0, mask1, mask2, mask3,
- filt_horiz0, filt_horiz1, filt_horiz2,
- filt_horiz3);
- horiz_out6 = (v8i16)__msa_sldi_b((v16i8)horiz_out7, (v16i8)horiz_out5, 8);
+ filt = LD_SH(filter_vert);
+ SPLATI_H4_SH(filt, 0, 1, 2, 3, filt_vt0, filt_vt1, filt_vt2, filt_vt3);
- out3 = (v8i16)__msa_ilvev_b((v16i8)horiz_out7, (v16i8)horiz_out6);
+ ILVEV_B2_SH(hz_out0, hz_out1, hz_out2, hz_out3, out0, out1);
+ out2 = (v8i16)__msa_ilvev_b((v16i8)hz_out5, (v16i8)hz_out4);
- tmp0 = FILT_8TAP_DPADD_S_H(out0, out1, out2, out3, filt_vert0, filt_vert1,
- filt_vert2, filt_vert3);
-
- horiz_out9 = HORIZ_8TAP_FILT_2VECS(src9, src10, mask0, mask1, mask2, mask3,
- filt_horiz0, filt_horiz1, filt_horiz2,
- filt_horiz3);
- horiz_out8 = (v8i16)__msa_sldi_b((v16i8)horiz_out9, (v16i8)horiz_out7, 8);
-
- out4 = (v8i16)__msa_ilvev_b((v16i8)horiz_out9, (v16i8)horiz_out8);
-
- tmp1 = FILT_8TAP_DPADD_S_H(out1, out2, out3, out4, filt_vert0, filt_vert1,
- filt_vert2, filt_vert3);
- tmp0 = SRARI_SATURATE_SIGNED_H(tmp0, FILTER_BITS, 7);
- tmp1 = SRARI_SATURATE_SIGNED_H(tmp1, FILTER_BITS, 7);
+ for (loop_cnt = (height >> 2); loop_cnt--;) {
+ LD_SB4(src, src_stride, src7, src8, src9, src10);
+ XORI_B4_128_SB(src7, src8, src9, src10);
+ src += (4 * src_stride);
- PCKEV_2B_XORI128_STORE_4_BYTES_4(tmp0, tmp1, dst, dst_stride);
+ hz_out7 = HORIZ_8TAP_FILT(src7, src8, mask0, mask1, mask2, mask3,
+ filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+ hz_out6 = (v8i16)__msa_sldi_b((v16i8)hz_out7, (v16i8)hz_out5, 8);
+ out3 = (v8i16)__msa_ilvev_b((v16i8)hz_out7, (v16i8)hz_out6);
+ tmp0 = FILT_8TAP_DPADD_S_H(out0, out1, out2, out3, filt_vt0, filt_vt1,
+ filt_vt2, filt_vt3);
+
+ hz_out9 = HORIZ_8TAP_FILT(src9, src10, mask0, mask1, mask2, mask3,
+ filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+ hz_out8 = (v8i16)__msa_sldi_b((v16i8)hz_out9, (v16i8)hz_out7, 8);
+ out4 = (v8i16)__msa_ilvev_b((v16i8)hz_out9, (v16i8)hz_out8);
+ tmp1 = FILT_8TAP_DPADD_S_H(out1, out2, out3, out4, filt_vt0, filt_vt1,
+ filt_vt2, filt_vt3);
+ SRARI_H2_SH(tmp0, tmp1, FILTER_BITS);
+ SAT_SH2_SH(tmp0, tmp1, 7);
+ out = PCKEV_XORI128_UB(tmp0, tmp1);
+ ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
dst += (4 * dst_stride);
- horiz_out5 = horiz_out9;
-
+ hz_out5 = hz_out9;
out0 = out2;
out1 = out3;
out2 = out4;
@@ -125,108 +100,87 @@ static void common_hv_8ht_8vt_8w_msa(const uint8_t *src, int32_t src_stride,
int32_t height) {
uint32_t loop_cnt;
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
- v16i8 filt_horiz0, filt_horiz1, filt_horiz2, filt_horiz3;
- v8i16 filt_horiz, filt, filt_vert0, filt_vert1, filt_vert2, filt_vert3;
- v16u8 mask0, mask1, mask2, mask3;
- v8i16 horiz_out0, horiz_out1, horiz_out2, horiz_out3;
- v8i16 horiz_out4, horiz_out5, horiz_out6, horiz_out7;
- v8i16 horiz_out8, horiz_out9, horiz_out10;
+ v16i8 filt_hz0, filt_hz1, filt_hz2, filt_hz3;
+ v16u8 mask0, mask1, mask2, mask3, vec0, vec1;
+ v8i16 filt, filt_vt0, filt_vt1, filt_vt2, filt_vt3;
+ v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+ v8i16 hz_out7, hz_out8, hz_out9, hz_out10, tmp0, tmp1, tmp2, tmp3;
v8i16 out0, out1, out2, out3, out4, out5, out6, out7, out8, out9;
- v8i16 tmp0, tmp1, tmp2, tmp3;
-
- mask0 = LOAD_UB(&mc_filt_mask_arr[0]);
+ mask0 = LD_UB(&mc_filt_mask_arr[0]);
src -= (3 + 3 * src_stride);
/* rearranging filter */
- filt_horiz = LOAD_SH(filter_horiz);
- filt_horiz0 = (v16i8)__msa_splati_h(filt_horiz, 0);
- filt_horiz1 = (v16i8)__msa_splati_h(filt_horiz, 1);
- filt_horiz2 = (v16i8)__msa_splati_h(filt_horiz, 2);
- filt_horiz3 = (v16i8)__msa_splati_h(filt_horiz, 3);
+ filt = LD_SH(filter_horiz);
+ SPLATI_H4_SB(filt, 0, 1, 2, 3, filt_hz0, filt_hz1, filt_hz2, filt_hz3);
mask1 = mask0 + 2;
mask2 = mask0 + 4;
mask3 = mask0 + 6;
- LOAD_7VECS_SB(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+ LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
src += (7 * src_stride);
- XORI_B_7VECS_SB(src0, src1, src2, src3, src4, src5, src6,
- src0, src1, src2, src3, src4, src5, src6, 128);
-
- horiz_out0 = HORIZ_8TAP_FILT(src0, mask0, mask1, mask2, mask3, filt_horiz0,
- filt_horiz1, filt_horiz2, filt_horiz3);
- horiz_out1 = HORIZ_8TAP_FILT(src1, mask0, mask1, mask2, mask3, filt_horiz0,
- filt_horiz1, filt_horiz2, filt_horiz3);
- horiz_out2 = HORIZ_8TAP_FILT(src2, mask0, mask1, mask2, mask3, filt_horiz0,
- filt_horiz1, filt_horiz2, filt_horiz3);
- horiz_out3 = HORIZ_8TAP_FILT(src3, mask0, mask1, mask2, mask3, filt_horiz0,
- filt_horiz1, filt_horiz2, filt_horiz3);
- horiz_out4 = HORIZ_8TAP_FILT(src4, mask0, mask1, mask2, mask3, filt_horiz0,
- filt_horiz1, filt_horiz2, filt_horiz3);
- horiz_out5 = HORIZ_8TAP_FILT(src5, mask0, mask1, mask2, mask3, filt_horiz0,
- filt_horiz1, filt_horiz2, filt_horiz3);
- horiz_out6 = HORIZ_8TAP_FILT(src6, mask0, mask1, mask2, mask3, filt_horiz0,
- filt_horiz1, filt_horiz2, filt_horiz3);
-
- filt = LOAD_SH(filter_vert);
- filt_vert0 = __msa_splati_h(filt, 0);
- filt_vert1 = __msa_splati_h(filt, 1);
- filt_vert2 = __msa_splati_h(filt, 2);
- filt_vert3 = __msa_splati_h(filt, 3);
-
- out0 = (v8i16)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
- out1 = (v8i16)__msa_ilvev_b((v16i8)horiz_out3, (v16i8)horiz_out2);
- out2 = (v8i16)__msa_ilvev_b((v16i8)horiz_out5, (v16i8)horiz_out4);
- out4 = (v8i16)__msa_ilvev_b((v16i8)horiz_out2, (v16i8)horiz_out1);
- out5 = (v8i16)__msa_ilvev_b((v16i8)horiz_out4, (v16i8)horiz_out3);
- out6 = (v8i16)__msa_ilvev_b((v16i8)horiz_out6, (v16i8)horiz_out5);
+ XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+ hz_out0 = HORIZ_8TAP_FILT(src0, src0, mask0, mask1, mask2, mask3, filt_hz0,
+ filt_hz1, filt_hz2, filt_hz3);
+ hz_out1 = HORIZ_8TAP_FILT(src1, src1, mask0, mask1, mask2, mask3, filt_hz0,
+ filt_hz1, filt_hz2, filt_hz3);
+ hz_out2 = HORIZ_8TAP_FILT(src2, src2, mask0, mask1, mask2, mask3, filt_hz0,
+ filt_hz1, filt_hz2, filt_hz3);
+ hz_out3 = HORIZ_8TAP_FILT(src3, src3, mask0, mask1, mask2, mask3, filt_hz0,
+ filt_hz1, filt_hz2, filt_hz3);
+ hz_out4 = HORIZ_8TAP_FILT(src4, src4, mask0, mask1, mask2, mask3, filt_hz0,
+ filt_hz1, filt_hz2, filt_hz3);
+ hz_out5 = HORIZ_8TAP_FILT(src5, src5, mask0, mask1, mask2, mask3, filt_hz0,
+ filt_hz1, filt_hz2, filt_hz3);
+ hz_out6 = HORIZ_8TAP_FILT(src6, src6, mask0, mask1, mask2, mask3, filt_hz0,
+ filt_hz1, filt_hz2, filt_hz3);
+
+ filt = LD_SH(filter_vert);
+ SPLATI_H4_SH(filt, 0, 1, 2, 3, filt_vt0, filt_vt1, filt_vt2, filt_vt3);
+
+ ILVEV_B2_SH(hz_out0, hz_out1, hz_out2, hz_out3, out0, out1);
+ ILVEV_B2_SH(hz_out4, hz_out5, hz_out1, hz_out2, out2, out4);
+ ILVEV_B2_SH(hz_out3, hz_out4, hz_out5, hz_out6, out5, out6);
for (loop_cnt = (height >> 2); loop_cnt--;) {
- LOAD_4VECS_SB(src, src_stride, src7, src8, src9, src10);
+ LD_SB4(src, src_stride, src7, src8, src9, src10);
src += (4 * src_stride);
- XORI_B_4VECS_SB(src7, src8, src9, src10, src7, src8, src9, src10, 128);
-
- horiz_out7 = HORIZ_8TAP_FILT(src7, mask0, mask1, mask2, mask3, filt_horiz0,
- filt_horiz1, filt_horiz2, filt_horiz3);
-
- out3 = (v8i16)__msa_ilvev_b((v16i8)horiz_out7, (v16i8)horiz_out6);
- tmp0 = FILT_8TAP_DPADD_S_H(out0, out1, out2, out3, filt_vert0, filt_vert1,
- filt_vert2, filt_vert3);
- tmp0 = SRARI_SATURATE_SIGNED_H(tmp0, FILTER_BITS, 7);
-
- horiz_out8 = HORIZ_8TAP_FILT(src8, mask0, mask1, mask2, mask3, filt_horiz0,
- filt_horiz1, filt_horiz2, filt_horiz3);
-
- out7 = (v8i16)__msa_ilvev_b((v16i8)horiz_out8, (v16i8)horiz_out7);
- tmp1 = FILT_8TAP_DPADD_S_H(out4, out5, out6, out7, filt_vert0, filt_vert1,
- filt_vert2, filt_vert3);
- tmp1 = SRARI_SATURATE_SIGNED_H(tmp1, FILTER_BITS, 7);
-
- horiz_out9 = HORIZ_8TAP_FILT(src9, mask0, mask1, mask2, mask3, filt_horiz0,
- filt_horiz1, filt_horiz2, filt_horiz3);
-
- out8 = (v8i16)__msa_ilvev_b((v16i8)horiz_out9, (v16i8)horiz_out8);
- tmp2 = FILT_8TAP_DPADD_S_H(out1, out2, out3, out8, filt_vert0, filt_vert1,
- filt_vert2, filt_vert3);
- tmp2 = SRARI_SATURATE_SIGNED_H(tmp2, FILTER_BITS, 7);
-
- horiz_out10 = HORIZ_8TAP_FILT(src10, mask0, mask1, mask2, mask3,
- filt_horiz0, filt_horiz1, filt_horiz2,
- filt_horiz3);
-
- out9 = (v8i16)__msa_ilvev_b((v16i8)horiz_out10, (v16i8)horiz_out9);
- tmp3 = FILT_8TAP_DPADD_S_H(out5, out6, out7, out9, filt_vert0, filt_vert1,
- filt_vert2, filt_vert3);
- tmp3 = SRARI_SATURATE_SIGNED_H(tmp3, FILTER_BITS, 7);
-
- PCKEV_B_4_XORI128_STORE_8_BYTES_4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride);
+ XORI_B4_128_SB(src7, src8, src9, src10);
+
+ hz_out7 = HORIZ_8TAP_FILT(src7, src7, mask0, mask1, mask2, mask3,
+ filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+ out3 = (v8i16)__msa_ilvev_b((v16i8)hz_out7, (v16i8)hz_out6);
+ tmp0 = FILT_8TAP_DPADD_S_H(out0, out1, out2, out3, filt_vt0, filt_vt1,
+ filt_vt2, filt_vt3);
+
+ hz_out8 = HORIZ_8TAP_FILT(src8, src8, mask0, mask1, mask2, mask3,
+ filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+ out7 = (v8i16)__msa_ilvev_b((v16i8)hz_out8, (v16i8)hz_out7);
+ tmp1 = FILT_8TAP_DPADD_S_H(out4, out5, out6, out7, filt_vt0, filt_vt1,
+ filt_vt2, filt_vt3);
+
+ hz_out9 = HORIZ_8TAP_FILT(src9, src9, mask0, mask1, mask2, mask3,
+ filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+ out8 = (v8i16)__msa_ilvev_b((v16i8)hz_out9, (v16i8)hz_out8);
+ tmp2 = FILT_8TAP_DPADD_S_H(out1, out2, out3, out8, filt_vt0, filt_vt1,
+ filt_vt2, filt_vt3);
+
+ hz_out10 = HORIZ_8TAP_FILT(src10, src10, mask0, mask1, mask2, mask3,
+ filt_hz0, filt_hz1, filt_hz2, filt_hz3);
+ out9 = (v8i16)__msa_ilvev_b((v16i8)hz_out10, (v16i8)hz_out9);
+ tmp3 = FILT_8TAP_DPADD_S_H(out5, out6, out7, out9, filt_vt0, filt_vt1,
+ filt_vt2, filt_vt3);
+ SRARI_H4_SH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
+ SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7);
+ vec0 = PCKEV_XORI128_UB(tmp0, tmp1);
+ vec1 = PCKEV_XORI128_UB(tmp2, tmp3);
+ ST8x4_UB(vec0, vec1, dst, dst_stride);
dst += (4 * dst_stride);
- horiz_out6 = horiz_out10;
-
+ hz_out6 = hz_out10;
out0 = out2;
out1 = out3;
out2 = out8;
@@ -279,175 +233,89 @@ static void common_hv_2ht_2vt_4x4_msa(const uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
int8_t *filter_horiz,
int8_t *filter_vert) {
- uint32_t out0, out1, out2, out3;
v16i8 src0, src1, src2, src3, src4, mask;
- v16u8 res0, res1, horiz_vec;
- v16u8 filt_vert, filt_horiz, vec0, vec1;
- v8u16 filt, tmp0, tmp1;
- v8u16 horiz_out0, horiz_out1, horiz_out2, horiz_out3, horiz_out4;
+ v16u8 filt_vt, filt_hz, vec0, vec1, res0, res1;
+ v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, filt, tmp0, tmp1;
- mask = LOAD_SB(&mc_filt_mask_arr[16]);
+ mask = LD_SB(&mc_filt_mask_arr[16]);
/* rearranging filter */
- filt = LOAD_UH(filter_horiz);
- filt_horiz = (v16u8)__msa_splati_h((v8i16)filt, 0);
-
- filt = LOAD_UH(filter_vert);
- filt_vert = (v16u8)__msa_splati_h((v8i16)filt, 0);
-
- LOAD_5VECS_SB(src, src_stride, src0, src1, src2, src3, src4);
-
- horiz_vec = (v16u8)__msa_vshf_b(mask, src1, src0);
- horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
- horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
-
- horiz_vec = (v16u8)__msa_vshf_b(mask, src3, src2);
- horiz_out2 = __msa_dotp_u_h(horiz_vec, filt_horiz);
- horiz_out2 = SRARI_SATURATE_UNSIGNED_H(horiz_out2, FILTER_BITS, 7);
-
- horiz_vec = (v16u8)__msa_vshf_b(mask, src4, src4);
- horiz_out4 = __msa_dotp_u_h(horiz_vec, filt_horiz);
- horiz_out4 = SRARI_SATURATE_UNSIGNED_H(horiz_out4, FILTER_BITS, 7);
-
- horiz_out1 = (v8u16)__msa_sldi_b((v16i8)horiz_out2, (v16i8)horiz_out0, 8);
- horiz_out3 = (v8u16)__msa_pckod_d((v2i64)horiz_out4, (v2i64)horiz_out2);
-
- vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
- vec1 = (v16u8)__msa_ilvev_b((v16i8)horiz_out3, (v16i8)horiz_out2);
-
- tmp0 = __msa_dotp_u_h(vec0, filt_vert);
- tmp1 = __msa_dotp_u_h(vec1, filt_vert);
- tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
- tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
-
- res0 = (v16u8)__msa_pckev_b((v16i8)tmp0, (v16i8)tmp0);
- res1 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp1);
-
- out0 = __msa_copy_u_w((v4i32)res0, 0);
- out1 = __msa_copy_u_w((v4i32)res0, 1);
- out2 = __msa_copy_u_w((v4i32)res1, 0);
- out3 = __msa_copy_u_w((v4i32)res1, 1);
-
- STORE_WORD(dst, out0);
- dst += dst_stride;
- STORE_WORD(dst, out1);
- dst += dst_stride;
- STORE_WORD(dst, out2);
- dst += dst_stride;
- STORE_WORD(dst, out3);
+ filt = LD_UH(filter_horiz);
+ filt_hz = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+ filt = LD_UH(filter_vert);
+ filt_vt = (v16u8)__msa_splati_h((v8i16)filt, 0);
+
+ LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
+ hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, FILTER_BITS);
+ hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, FILTER_BITS);
+ hz_out4 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
+ hz_out1 = (v8u16)__msa_sldi_b((v16i8)hz_out2, (v16i8)hz_out0, 8);
+ hz_out3 = (v8u16)__msa_pckod_d((v2i64)hz_out4, (v2i64)hz_out2);
+
+ ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
+ DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1);
+ SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+ SAT_UH2_UH(tmp0, tmp1, 7);
+ PCKEV_B2_UB(tmp0, tmp0, tmp1, tmp1, res0, res1);
+ ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
}
static void common_hv_2ht_2vt_4x8_msa(const uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
int8_t *filter_horiz,
int8_t *filter_vert) {
- uint32_t out0, out1, out2, out3;
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, mask;
- v16u8 filt_horiz, filt_vert, horiz_vec;
- v16u8 vec0, vec1, vec2, vec3;
- v8u16 horiz_out0, horiz_out1, horiz_out2, horiz_out3;
- v8u16 vec4, vec5, vec6, vec7, filt;
- v8u16 horiz_out4, horiz_out5, horiz_out6, horiz_out7, horiz_out8;
v16i8 res0, res1, res2, res3;
+ v16u8 filt_hz, filt_vt, vec0, vec1, vec2, vec3;
+ v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6;
+ v8u16 hz_out7, hz_out8, vec4, vec5, vec6, vec7, filt;
- mask = LOAD_SB(&mc_filt_mask_arr[16]);
+ mask = LD_SB(&mc_filt_mask_arr[16]);
/* rearranging filter */
- filt = LOAD_UH(filter_horiz);
- filt_horiz = (v16u8)__msa_splati_h((v8i16)filt, 0);
+ filt = LD_UH(filter_horiz);
+ filt_hz = (v16u8)__msa_splati_h((v8i16)filt, 0);
- filt = LOAD_UH(filter_vert);
- filt_vert = (v16u8)__msa_splati_h((v8i16)filt, 0);
+ filt = LD_UH(filter_vert);
+ filt_vt = (v16u8)__msa_splati_h((v8i16)filt, 0);
- LOAD_8VECS_SB(src, src_stride,
- src0, src1, src2, src3, src4, src5, src6, src7);
+ LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
src += (8 * src_stride);
- src8 = LOAD_SB(src);
-
- horiz_vec = (v16u8)__msa_vshf_b(mask, src1, src0);
- horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
- horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
-
- horiz_vec = (v16u8)__msa_vshf_b(mask, src3, src2);
- horiz_out2 = __msa_dotp_u_h(horiz_vec, filt_horiz);
- horiz_out2 = SRARI_SATURATE_UNSIGNED_H(horiz_out2, FILTER_BITS, 7);
-
- horiz_vec = (v16u8)__msa_vshf_b(mask, src5, src4);
- horiz_out4 = __msa_dotp_u_h(horiz_vec, filt_horiz);
- horiz_out4 = SRARI_SATURATE_UNSIGNED_H(horiz_out4, FILTER_BITS, 7);
-
- horiz_vec = (v16u8)__msa_vshf_b(mask, src7, src6);
- horiz_out6 = __msa_dotp_u_h(horiz_vec, filt_horiz);
- horiz_out6 = SRARI_SATURATE_UNSIGNED_H(horiz_out6, FILTER_BITS, 7);
-
- horiz_vec = (v16u8)__msa_vshf_b(mask, src8, src8);
- horiz_out8 = __msa_dotp_u_h(horiz_vec, filt_horiz);
- horiz_out8 = SRARI_SATURATE_UNSIGNED_H(horiz_out8, FILTER_BITS, 7);
-
- horiz_out1 = (v8u16)__msa_sldi_b((v16i8)horiz_out2, (v16i8)horiz_out0, 8);
- horiz_out3 = (v8u16)__msa_sldi_b((v16i8)horiz_out4, (v16i8)horiz_out2, 8);
- horiz_out5 = (v8u16)__msa_sldi_b((v16i8)horiz_out6, (v16i8)horiz_out4, 8);
- horiz_out7 = (v8u16)__msa_pckod_d((v2i64)horiz_out8, (v2i64)horiz_out6);
-
- vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
- vec1 = (v16u8)__msa_ilvev_b((v16i8)horiz_out3, (v16i8)horiz_out2);
- vec2 = (v16u8)__msa_ilvev_b((v16i8)horiz_out5, (v16i8)horiz_out4);
- vec3 = (v16u8)__msa_ilvev_b((v16i8)horiz_out7, (v16i8)horiz_out6);
-
- vec4 = __msa_dotp_u_h(vec0, filt_vert);
- vec5 = __msa_dotp_u_h(vec1, filt_vert);
- vec6 = __msa_dotp_u_h(vec2, filt_vert);
- vec7 = __msa_dotp_u_h(vec3, filt_vert);
-
- vec4 = SRARI_SATURATE_UNSIGNED_H(vec4, FILTER_BITS, 7);
- vec5 = SRARI_SATURATE_UNSIGNED_H(vec5, FILTER_BITS, 7);
- vec6 = SRARI_SATURATE_UNSIGNED_H(vec6, FILTER_BITS, 7);
- vec7 = SRARI_SATURATE_UNSIGNED_H(vec7, FILTER_BITS, 7);
-
- res0 = __msa_pckev_b((v16i8)vec4, (v16i8)vec4);
- res1 = __msa_pckev_b((v16i8)vec5, (v16i8)vec5);
- res2 = __msa_pckev_b((v16i8)vec6, (v16i8)vec6);
- res3 = __msa_pckev_b((v16i8)vec7, (v16i8)vec7);
-
- out0 = __msa_copy_u_w((v4i32)res0, 0);
- out1 = __msa_copy_u_w((v4i32)res0, 1);
- out2 = __msa_copy_u_w((v4i32)res1, 0);
- out3 = __msa_copy_u_w((v4i32)res1, 1);
-
- STORE_WORD(dst, out0);
- dst += dst_stride;
- STORE_WORD(dst, out1);
- dst += dst_stride;
- STORE_WORD(dst, out2);
- dst += dst_stride;
- STORE_WORD(dst, out3);
- dst += dst_stride;
-
- out0 = __msa_copy_u_w((v4i32)res2, 0);
- out1 = __msa_copy_u_w((v4i32)res2, 1);
- out2 = __msa_copy_u_w((v4i32)res3, 0);
- out3 = __msa_copy_u_w((v4i32)res3, 1);
-
- STORE_WORD(dst, out0);
- dst += dst_stride;
- STORE_WORD(dst, out1);
- dst += dst_stride;
- STORE_WORD(dst, out2);
- dst += dst_stride;
- STORE_WORD(dst, out3);
+ src8 = LD_SB(src);
+
+ hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, FILTER_BITS);
+ hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, FILTER_BITS);
+ hz_out4 = HORIZ_2TAP_FILT_UH(src4, src5, mask, filt_hz, FILTER_BITS);
+ hz_out6 = HORIZ_2TAP_FILT_UH(src6, src7, mask, filt_hz, FILTER_BITS);
+ hz_out8 = HORIZ_2TAP_FILT_UH(src8, src8, mask, filt_hz, FILTER_BITS);
+ SLDI_B3_UH(hz_out2, hz_out4, hz_out6, hz_out0, hz_out2, hz_out4, hz_out1,
+ hz_out3, hz_out5, 8);
+ hz_out7 = (v8u16)__msa_pckod_d((v2i64)hz_out8, (v2i64)hz_out6);
+
+ ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
+ ILVEV_B2_UB(hz_out4, hz_out5, hz_out6, hz_out7, vec2, vec3);
+ DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt_vt, filt_vt, filt_vt, filt_vt,
+ vec4, vec5, vec6, vec7);
+ SRARI_H4_UH(vec4, vec5, vec6, vec7, FILTER_BITS);
+ SAT_UH4_UH(vec4, vec5, vec6, vec7, 7);
+ PCKEV_B4_SB(vec4, vec4, vec5, vec5, vec6, vec6, vec7, vec7, res0, res1,
+ res2, res3);
+ ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride);
+ dst += (4 * dst_stride);
+ ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride);
}
static void common_hv_2ht_2vt_4w_msa(const uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
- int8_t *filter_horiz,
- int8_t *filter_vert,
+ int8_t *filter_horiz, int8_t *filter_vert,
int32_t height) {
if (4 == height) {
- common_hv_2ht_2vt_4x4_msa(src, src_stride, dst, dst_stride,
- filter_horiz, filter_vert);
+ common_hv_2ht_2vt_4x4_msa(src, src_stride, dst, dst_stride, filter_horiz,
+ filter_vert);
} else if (8 == height) {
- common_hv_2ht_2vt_4x8_msa(src, src_stride, dst, dst_stride,
- filter_horiz, filter_vert);
+ common_hv_2ht_2vt_4x8_msa(src, src_stride, dst, dst_stride, filter_horiz,
+ filter_vert);
}
}
@@ -455,63 +323,43 @@ static void common_hv_2ht_2vt_8x4_msa(const uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
int8_t *filter_horiz,
int8_t *filter_vert) {
- v16i8 src0, src1, src2, src3, src4, mask;
- v16u8 filt_horiz, filt_vert, horiz_vec;
- v16u8 vec0, vec1, vec2, vec3;
- v8u16 horiz_out0, horiz_out1;
- v8u16 tmp0, tmp1, tmp2, tmp3;
+ v16i8 src0, src1, src2, src3, src4, mask, out0, out1;
+ v16u8 filt_hz, filt_vt, vec0, vec1, vec2, vec3;
+ v8u16 hz_out0, hz_out1, tmp0, tmp1, tmp2, tmp3;
v8i16 filt;
- mask = LOAD_SB(&mc_filt_mask_arr[0]);
+ mask = LD_SB(&mc_filt_mask_arr[0]);
/* rearranging filter */
- filt = LOAD_SH(filter_horiz);
- filt_horiz = (v16u8)__msa_splati_h(filt, 0);
-
- filt = LOAD_SH(filter_vert);
- filt_vert = (v16u8)__msa_splati_h(filt, 0);
-
- LOAD_5VECS_SB(src, src_stride, src0, src1, src2, src3, src4);
- src += (5 * src_stride);
-
- horiz_vec = (v16u8)__msa_vshf_b(mask, src0, src0);
- horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
- horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
-
- horiz_vec = (v16u8)__msa_vshf_b(mask, src1, src1);
- horiz_out1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
- horiz_out1 = SRARI_SATURATE_UNSIGNED_H(horiz_out1, FILTER_BITS, 7);
+ filt = LD_SH(filter_horiz);
+ filt_hz = (v16u8)__msa_splati_h(filt, 0);
- vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
- tmp0 = __msa_dotp_u_h(vec0, filt_vert);
+ filt = LD_SH(filter_vert);
+ filt_vt = (v16u8)__msa_splati_h(filt, 0);
- horiz_vec = (v16u8)__msa_vshf_b(mask, src2, src2);
- horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
- horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
+ LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
- vec1 = (v16u8)__msa_ilvev_b((v16i8)horiz_out0, (v16i8)horiz_out1);
- tmp1 = __msa_dotp_u_h(vec1, filt_vert);
+ hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS);
+ hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
+ vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
+ tmp0 = __msa_dotp_u_h(vec0, filt_vt);
- horiz_vec = (v16u8)__msa_vshf_b(mask, src3, src3);
- horiz_out1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
- horiz_out1 = SRARI_SATURATE_UNSIGNED_H(horiz_out1, FILTER_BITS, 7);
+ hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS);
+ vec1 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
+ tmp1 = __msa_dotp_u_h(vec1, filt_vt);
- vec2 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
- tmp2 = __msa_dotp_u_h(vec2, filt_vert);
+ hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS);
+ vec2 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
+ tmp2 = __msa_dotp_u_h(vec2, filt_vt);
- horiz_vec = (v16u8)__msa_vshf_b(mask, src4, src4);
- horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
- horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
+ hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
+ vec3 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
+ tmp3 = __msa_dotp_u_h(vec3, filt_vt);
- vec3 = (v16u8)__msa_ilvev_b((v16i8)horiz_out0, (v16i8)horiz_out1);
- tmp3 = __msa_dotp_u_h(vec3, filt_vert);
-
- tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
- tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
- tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
- tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
-
- PCKEV_B_STORE_8_BYTES_4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride);
+ SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
+ SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+ PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+ ST8x4_UB(out0, out1, dst, dst_stride);
}
static void common_hv_2ht_2vt_8x8mult_msa(const uint8_t *src,
@@ -522,106 +370,76 @@ static void common_hv_2ht_2vt_8x8mult_msa(const uint8_t *src,
int8_t *filter_vert,
int32_t height) {
uint32_t loop_cnt;
- v16i8 src0, src1, src2, src3, src4, mask;
- v16u8 filt_horiz, filt_vert, vec0, horiz_vec;
- v8u16 horiz_out0, horiz_out1;
- v8u16 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
+ v16i8 src0, src1, src2, src3, src4, mask, out0, out1;
+ v16u8 filt_hz, filt_vt, vec0;
+ v8u16 hz_out0, hz_out1, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
v8i16 filt;
- mask = LOAD_SB(&mc_filt_mask_arr[0]);
+ mask = LD_SB(&mc_filt_mask_arr[0]);
/* rearranging filter */
- filt = LOAD_SH(filter_horiz);
- filt_horiz = (v16u8)__msa_splati_h(filt, 0);
+ filt = LD_SH(filter_horiz);
+ filt_hz = (v16u8)__msa_splati_h(filt, 0);
- filt = LOAD_SH(filter_vert);
- filt_vert = (v16u8)__msa_splati_h(filt, 0);
+ filt = LD_SH(filter_vert);
+ filt_vt = (v16u8)__msa_splati_h(filt, 0);
- src0 = LOAD_SB(src);
+ src0 = LD_SB(src);
src += src_stride;
- horiz_vec = (v16u8)__msa_vshf_b(mask, src0, src0);
- horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
- horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
+ hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS);
for (loop_cnt = (height >> 3); loop_cnt--;) {
- LOAD_4VECS_SB(src, src_stride, src1, src2, src3, src4);
+ LD_SB4(src, src_stride, src1, src2, src3, src4);
src += (4 * src_stride);
- horiz_vec = (v16u8)__msa_vshf_b(mask, src1, src1);
- horiz_out1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
- horiz_out1 = SRARI_SATURATE_UNSIGNED_H(horiz_out1, FILTER_BITS, 7);
-
- vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
- tmp1 = __msa_dotp_u_h(vec0, filt_vert);
+ hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
+ vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
+ tmp1 = __msa_dotp_u_h(vec0, filt_vt);
- horiz_vec = (v16u8)__msa_vshf_b(mask, src2, src2);
- horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
- horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
+ hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS);
+ vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
+ tmp2 = __msa_dotp_u_h(vec0, filt_vt);
- vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out0, (v16i8)horiz_out1);
- tmp2 = (v8u16)__msa_dotp_u_h(vec0, filt_vert);
+ SRARI_H2_UH(tmp1, tmp2, FILTER_BITS);
+ SAT_UH2_UH(tmp1, tmp2, 7);
- tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
- tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
+ hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS);
+ vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
+ tmp3 = __msa_dotp_u_h(vec0, filt_vt);
- horiz_vec = (v16u8)__msa_vshf_b(mask, src3, src3);
- horiz_out1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
- horiz_out1 = SRARI_SATURATE_UNSIGNED_H(horiz_out1, FILTER_BITS, 7);
-
- vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
- tmp3 = __msa_dotp_u_h(vec0, filt_vert);
-
- horiz_vec = (v16u8)__msa_vshf_b(mask, src4, src4);
- horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
- horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
-
- LOAD_4VECS_SB(src, src_stride, src1, src2, src3, src4);
+ hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
+ LD_SB4(src, src_stride, src1, src2, src3, src4);
src += (4 * src_stride);
+ vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
+ tmp4 = __msa_dotp_u_h(vec0, filt_vt);
- vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out0, (v16i8)horiz_out1);
- tmp4 = __msa_dotp_u_h(vec0, filt_vert);
-
- tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
- tmp4 = SRARI_SATURATE_UNSIGNED_H(tmp4, FILTER_BITS, 7);
-
- PCKEV_B_STORE_8_BYTES_4(tmp1, tmp2, tmp3, tmp4, dst, dst_stride);
+ SRARI_H2_UH(tmp3, tmp4, FILTER_BITS);
+ SAT_UH2_UH(tmp3, tmp4, 7);
+ PCKEV_B2_SB(tmp2, tmp1, tmp4, tmp3, out0, out1);
+ ST8x4_UB(out0, out1, dst, dst_stride);
dst += (4 * dst_stride);
- horiz_vec = (v16u8)__msa_vshf_b(mask, src1, src1);
- horiz_out1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
- horiz_out1 = SRARI_SATURATE_UNSIGNED_H(horiz_out1, FILTER_BITS, 7);
-
- vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
- tmp5 = __msa_dotp_u_h(vec0, filt_vert);
-
- horiz_vec = (v16u8)__msa_vshf_b(mask, src2, src2);
- horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
- horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
-
- vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out0, (v16i8)horiz_out1);
- tmp6 = __msa_dotp_u_h(vec0, filt_vert);
-
- horiz_vec = (v16u8)__msa_vshf_b(mask, src3, src3);
- horiz_out1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
- horiz_out1 = SRARI_SATURATE_UNSIGNED_H(horiz_out1, FILTER_BITS, 7);
+ hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
+ vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
+ tmp5 = __msa_dotp_u_h(vec0, filt_vt);
- vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
- tmp7 = __msa_dotp_u_h(vec0, filt_vert);
+ hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS);
+ vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
+ tmp6 = __msa_dotp_u_h(vec0, filt_vt);
- horiz_vec = (v16u8)__msa_vshf_b(mask, src4, src4);
- horiz_out0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
- horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_out0, FILTER_BITS, 7);
+ hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS);
+ vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0);
+ tmp7 = __msa_dotp_u_h(vec0, filt_vt);
- vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out0, (v16i8)horiz_out1);
- tmp8 = __msa_dotp_u_h(vec0, filt_vert);
+ hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
+ vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1);
+ tmp8 = __msa_dotp_u_h(vec0, filt_vt);
- tmp5 = SRARI_SATURATE_UNSIGNED_H(tmp5, FILTER_BITS, 7);
- tmp6 = SRARI_SATURATE_UNSIGNED_H(tmp6, FILTER_BITS, 7);
- tmp7 = SRARI_SATURATE_UNSIGNED_H(tmp7, FILTER_BITS, 7);
- tmp8 = SRARI_SATURATE_UNSIGNED_H(tmp8, FILTER_BITS, 7);
-
- PCKEV_B_STORE_8_BYTES_4(tmp5, tmp6, tmp7, tmp8, dst, dst_stride);
+ SRARI_H4_UH(tmp5, tmp6, tmp7, tmp8, FILTER_BITS);
+ SAT_UH4_UH(tmp5, tmp6, tmp7, tmp8, 7);
+ PCKEV_B2_SB(tmp6, tmp5, tmp8, tmp7, out0, out1);
+ ST8x4_UB(out0, out1, dst, dst_stride);
dst += (4 * dst_stride);
}
}
@@ -645,108 +463,64 @@ static void common_hv_2ht_2vt_16w_msa(const uint8_t *src, int32_t src_stride,
int32_t height) {
uint32_t loop_cnt;
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask;
- v16u8 filt_horiz, filt_vert, vec0, horiz_vec;
- v8u16 horiz_vec0, horiz_vec1, tmp1, tmp2;
- v8u16 horiz_out0, horiz_out1, horiz_out2, horiz_out3;
+ v16u8 filt_hz, filt_vt, vec0, vec1;
+ v8u16 tmp1, tmp2, hz_out0, hz_out1, hz_out2, hz_out3;
v8i16 filt;
- mask = LOAD_SB(&mc_filt_mask_arr[0]);
+ mask = LD_SB(&mc_filt_mask_arr[0]);
/* rearranging filter */
- filt = LOAD_SH(filter_horiz);
- filt_horiz = (v16u8)__msa_splati_h(filt, 0);
-
- filt = LOAD_SH(filter_vert);
- filt_vert = (v16u8)__msa_splati_h(filt, 0);
-
- src0 = LOAD_SB(src);
- src1 = LOAD_SB(src + 8);
+ filt = LD_SH(filter_horiz);
+ filt_hz = (v16u8)__msa_splati_h(filt, 0);
- horiz_vec = (v16u8)__msa_vshf_b(mask, src0, src0);
- horiz_vec0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
- horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_vec0, FILTER_BITS, 7);
-
- horiz_vec = (v16u8)__msa_vshf_b(mask, src1, src1);
- horiz_vec1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
- horiz_out2 = SRARI_SATURATE_UNSIGNED_H(horiz_vec1, FILTER_BITS, 7);
+ filt = LD_SH(filter_vert);
+ filt_vt = (v16u8)__msa_splati_h(filt, 0);
+ LD_SB2(src, 8, src0, src1);
src += src_stride;
+ hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS);
+ hz_out2 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
+
for (loop_cnt = (height >> 2); loop_cnt--;) {
- LOAD_4VECS_SB(src, src_stride, src0, src2, src4, src6);
- LOAD_4VECS_SB(src + 8, src_stride, src1, src3, src5, src7);
+ LD_SB4(src, src_stride, src0, src2, src4, src6);
+ LD_SB4(src + 8, src_stride, src1, src3, src5, src7);
src += (4 * src_stride);
- horiz_vec = (v16u8)__msa_vshf_b(mask, src0, src0);
- horiz_vec0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
- horiz_out1 = SRARI_SATURATE_UNSIGNED_H(horiz_vec0, FILTER_BITS, 7);
-
- horiz_vec = (v16u8)__msa_vshf_b(mask, src1, src1);
- horiz_vec1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
- horiz_out3 = SRARI_SATURATE_UNSIGNED_H(horiz_vec1, FILTER_BITS, 7);
-
- vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
- tmp1 = __msa_dotp_u_h(vec0, filt_vert);
- vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out3, (v16i8)horiz_out2);
- tmp2 = __msa_dotp_u_h(vec0, filt_vert);
- tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
- tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
-
- PCKEV_B_STORE_VEC(tmp2, tmp1, dst);
+ hz_out1 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS);
+ hz_out3 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS);
+ ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
+ DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2);
+ SRARI_H2_UH(tmp1, tmp2, FILTER_BITS);
+ SAT_UH2_UH(tmp1, tmp2, 7);
+ PCKEV_ST_SB(tmp1, tmp2, dst);
dst += dst_stride;
- horiz_vec = (v16u8)__msa_vshf_b(mask, src2, src2);
- horiz_vec0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
- horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_vec0, FILTER_BITS, 7);
-
- horiz_vec = (v16u8)__msa_vshf_b(mask, src3, src3);
- horiz_vec1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
- horiz_out2 = SRARI_SATURATE_UNSIGNED_H(horiz_vec1, FILTER_BITS, 7);
-
- vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out0, (v16i8)horiz_out1);
- tmp1 = __msa_dotp_u_h(vec0, filt_vert);
- vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out2, (v16i8)horiz_out3);
- tmp2 = __msa_dotp_u_h(vec0, filt_vert);
- tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
- tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
-
- PCKEV_B_STORE_VEC(tmp2, tmp1, dst);
+ hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS);
+ hz_out2 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS);
+ ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1);
+ DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2);
+ SRARI_H2_UH(tmp1, tmp2, FILTER_BITS);
+ SAT_UH2_UH(tmp1, tmp2, 7);
+ PCKEV_ST_SB(tmp1, tmp2, dst);
dst += dst_stride;
- horiz_vec = (v16u8)__msa_vshf_b(mask, src4, src4);
- horiz_vec0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
- horiz_out1 = SRARI_SATURATE_UNSIGNED_H(horiz_vec0, FILTER_BITS, 7);
-
- horiz_vec = (v16u8)__msa_vshf_b(mask, src5, src5);
- horiz_vec1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
- horiz_out3 = SRARI_SATURATE_UNSIGNED_H(horiz_vec1, FILTER_BITS, 7);
-
- vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out1, (v16i8)horiz_out0);
- tmp1 = __msa_dotp_u_h(vec0, filt_vert);
- vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out3, (v16i8)horiz_out2);
- tmp2 = __msa_dotp_u_h(vec0, filt_vert);
- tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
- tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
-
- PCKEV_B_STORE_VEC(tmp2, tmp1, dst);
+ hz_out1 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS);
+ hz_out3 = HORIZ_2TAP_FILT_UH(src5, src5, mask, filt_hz, FILTER_BITS);
+ ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1);
+ DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2);
+ SRARI_H2_UH(tmp1, tmp2, FILTER_BITS);
+ SAT_UH2_UH(tmp1, tmp2, 7);
+ PCKEV_ST_SB(tmp1, tmp2, dst);
dst += dst_stride;
- horiz_vec = (v16u8)__msa_vshf_b(mask, src6, src6);
- horiz_vec0 = __msa_dotp_u_h(horiz_vec, filt_horiz);
- horiz_out0 = SRARI_SATURATE_UNSIGNED_H(horiz_vec0, FILTER_BITS, 7);
-
- horiz_vec = (v16u8)__msa_vshf_b(mask, src7, src7);
- horiz_vec1 = __msa_dotp_u_h(horiz_vec, filt_horiz);
- horiz_out2 = SRARI_SATURATE_UNSIGNED_H(horiz_vec1, FILTER_BITS, 7);
-
- vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out0, (v16i8)horiz_out1);
- tmp1 = __msa_dotp_u_h(vec0, filt_vert);
- vec0 = (v16u8)__msa_ilvev_b((v16i8)horiz_out2, (v16i8)horiz_out3);
- tmp2 = __msa_dotp_u_h(vec0, filt_vert);
- tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
- tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
-
- PCKEV_B_STORE_VEC(tmp2, tmp1, dst);
+ hz_out0 = HORIZ_2TAP_FILT_UH(src6, src6, mask, filt_hz, FILTER_BITS);
+ hz_out2 = HORIZ_2TAP_FILT_UH(src7, src7, mask, filt_hz, FILTER_BITS);
+ ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1);
+ DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2);
+ SRARI_H2_UH(tmp1, tmp2, FILTER_BITS);
+ SAT_UH2_UH(tmp1, tmp2, 7);
+ PCKEV_ST_SB(tmp1, tmp2, dst);
dst += dst_stride;
}
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_vert_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_vert_msa.c
index 6b71ec1c0e4..e9ec2507a0a 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_vert_msa.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve8_vert_msa.c
@@ -16,58 +16,48 @@ static void common_vt_8t_4w_msa(const uint8_t *src, int32_t src_stride,
int8_t *filter, int32_t height) {
uint32_t loop_cnt;
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
- v16i8 src10_r, src32_r, src54_r, src76_r, src98_r;
- v16i8 src21_r, src43_r, src65_r, src87_r, src109_r;
- v16i8 src2110, src4332, src6554, src8776, src10998;
+ v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r;
+ v16i8 src65_r, src87_r, src109_r, src2110, src4332, src6554, src8776;
+ v16i8 src10998, filt0, filt1, filt2, filt3;
+ v16u8 out;
v8i16 filt, out10, out32;
- v16i8 filt0, filt1, filt2, filt3;
src -= (3 * src_stride);
- filt = LOAD_SH(filter);
- filt0 = (v16i8)__msa_splati_h(filt, 0);
- filt1 = (v16i8)__msa_splati_h(filt, 1);
- filt2 = (v16i8)__msa_splati_h(filt, 2);
- filt3 = (v16i8)__msa_splati_h(filt, 3);
+ filt = LD_SH(filter);
+ SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
- LOAD_7VECS_SB(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+ LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
src += (7 * src_stride);
- ILVR_B_6VECS_SB(src0, src2, src4, src1, src3, src5,
- src1, src3, src5, src2, src4, src6,
- src10_r, src32_r, src54_r, src21_r, src43_r, src65_r);
-
- ILVR_D_3VECS_SB(src2110, src21_r, src10_r, src4332, src43_r, src32_r,
- src6554, src65_r, src54_r);
-
- XORI_B_3VECS_SB(src2110, src4332, src6554, src2110, src4332, src6554, 128);
+ ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r, src32_r,
+ src54_r, src21_r);
+ ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r);
+ ILVR_D3_SB(src21_r, src10_r, src43_r, src32_r, src65_r, src54_r, src2110,
+ src4332, src6554);
+ XORI_B3_128_SB(src2110, src4332, src6554);
for (loop_cnt = (height >> 2); loop_cnt--;) {
- LOAD_4VECS_SB(src, src_stride, src7, src8, src9, src10);
+ LD_SB4(src, src_stride, src7, src8, src9, src10);
src += (4 * src_stride);
- ILVR_B_4VECS_SB(src6, src7, src8, src9, src7, src8, src9, src10,
- src76_r, src87_r, src98_r, src109_r);
-
- ILVR_D_2VECS_SB(src8776, src87_r, src76_r, src10998, src109_r, src98_r);
-
- XORI_B_2VECS_SB(src8776, src10998, src8776, src10998, 128);
-
- out10 = FILT_8TAP_DPADD_S_H(src2110, src4332, src6554, src8776,
- filt0, filt1, filt2, filt3);
- out32 = FILT_8TAP_DPADD_S_H(src4332, src6554, src8776, src10998,
- filt0, filt1, filt2, filt3);
-
- out10 = SRARI_SATURATE_SIGNED_H(out10, FILTER_BITS, 7);
- out32 = SRARI_SATURATE_SIGNED_H(out32, FILTER_BITS, 7);
-
- PCKEV_2B_XORI128_STORE_4_BYTES_4(out10, out32, dst, dst_stride);
+ ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r,
+ src87_r, src98_r, src109_r);
+ ILVR_D2_SB(src87_r, src76_r, src109_r, src98_r, src8776, src10998);
+ XORI_B2_128_SB(src8776, src10998);
+ out10 = FILT_8TAP_DPADD_S_H(src2110, src4332, src6554, src8776, filt0,
+ filt1, filt2, filt3);
+ out32 = FILT_8TAP_DPADD_S_H(src4332, src6554, src8776, src10998, filt0,
+ filt1, filt2, filt3);
+ SRARI_H2_SH(out10, out32, FILTER_BITS);
+ SAT_SH2_SH(out10, out32, 7);
+ out = PCKEV_XORI128_UB(out10, out32);
+ ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride);
dst += (4 * dst_stride);
src2110 = src6554;
src4332 = src8776;
src6554 = src10998;
-
src6 = src10;
}
}
@@ -77,54 +67,115 @@ static void common_vt_8t_8w_msa(const uint8_t *src, int32_t src_stride,
int8_t *filter, int32_t height) {
uint32_t loop_cnt;
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
- v16i8 src10_r, src32_r, src54_r, src76_r, src98_r;
- v16i8 src21_r, src43_r, src65_r, src87_r, src109_r;
- v16i8 filt0, filt1, filt2, filt3;
+ v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r;
+ v16i8 src65_r, src87_r, src109_r, filt0, filt1, filt2, filt3;
+ v16u8 tmp0, tmp1;
v8i16 filt, out0_r, out1_r, out2_r, out3_r;
src -= (3 * src_stride);
- filt = LOAD_SH(filter);
- filt0 = (v16i8)__msa_splati_h(filt, 0);
- filt1 = (v16i8)__msa_splati_h(filt, 1);
- filt2 = (v16i8)__msa_splati_h(filt, 2);
- filt3 = (v16i8)__msa_splati_h(filt, 3);
+ filt = LD_SH(filter);
+ SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
- LOAD_7VECS_SB(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+ LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+ XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
src += (7 * src_stride);
-
- XORI_B_7VECS_SB(src0, src1, src2, src3, src4, src5, src6,
- src0, src1, src2, src3, src4, src5, src6, 128);
-
- ILVR_B_6VECS_SB(src0, src2, src4, src1, src3, src5,
- src1, src3, src5, src2, src4, src6,
- src10_r, src32_r, src54_r, src21_r, src43_r, src65_r);
+ ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r, src32_r,
+ src54_r, src21_r);
+ ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r);
for (loop_cnt = (height >> 2); loop_cnt--;) {
- LOAD_4VECS_SB(src, src_stride, src7, src8, src9, src10);
+ LD_SB4(src, src_stride, src7, src8, src9, src10);
+ XORI_B4_128_SB(src7, src8, src9, src10);
src += (4 * src_stride);
- XORI_B_4VECS_SB(src7, src8, src9, src10, src7, src8, src9, src10, 128);
+ ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r,
+ src87_r, src98_r, src109_r);
+ out0_r = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r, filt0,
+ filt1, filt2, filt3);
+ out1_r = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r, filt0,
+ filt1, filt2, filt3);
+ out2_r = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r, filt0,
+ filt1, filt2, filt3);
+ out3_r = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r, filt0,
+ filt1, filt2, filt3);
+ SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, FILTER_BITS);
+ SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+ tmp0 = PCKEV_XORI128_UB(out0_r, out1_r);
+ tmp1 = PCKEV_XORI128_UB(out2_r, out3_r);
+ ST8x4_UB(tmp0, tmp1, dst, dst_stride);
+ dst += (4 * dst_stride);
- ILVR_B_4VECS_SB(src6, src7, src8, src9, src7, src8, src9, src10,
- src76_r, src87_r, src98_r, src109_r);
+ src10_r = src54_r;
+ src32_r = src76_r;
+ src54_r = src98_r;
+ src21_r = src65_r;
+ src43_r = src87_r;
+ src65_r = src109_r;
+ src6 = src10;
+ }
+}
- out0_r = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r,
- filt0, filt1, filt2, filt3);
- out1_r = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r,
- filt0, filt1, filt2, filt3);
- out2_r = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r,
- filt0, filt1, filt2, filt3);
- out3_r = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r,
- filt0, filt1, filt2, filt3);
+static void common_vt_8t_16w_msa(const uint8_t *src, int32_t src_stride,
+ uint8_t *dst, int32_t dst_stride,
+ int8_t *filter, int32_t height) {
+ uint32_t loop_cnt;
+ v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+ v16i8 filt0, filt1, filt2, filt3;
+ v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r;
+ v16i8 src65_r, src87_r, src109_r, src10_l, src32_l, src54_l, src76_l;
+ v16i8 src98_l, src21_l, src43_l, src65_l, src87_l, src109_l;
+ v16u8 tmp0, tmp1, tmp2, tmp3;
+ v8i16 filt, out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l;
+
+ src -= (3 * src_stride);
- out0_r = SRARI_SATURATE_SIGNED_H(out0_r, FILTER_BITS, 7);
- out1_r = SRARI_SATURATE_SIGNED_H(out1_r, FILTER_BITS, 7);
- out2_r = SRARI_SATURATE_SIGNED_H(out2_r, FILTER_BITS, 7);
- out3_r = SRARI_SATURATE_SIGNED_H(out3_r, FILTER_BITS, 7);
+ filt = LD_SH(filter);
+ SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
+
+ LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6);
+ XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
+ src += (7 * src_stride);
+ ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r, src32_r,
+ src54_r, src21_r);
+ ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r);
+ ILVL_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_l, src32_l,
+ src54_l, src21_l);
+ ILVL_B2_SB(src4, src3, src6, src5, src43_l, src65_l);
+
+ for (loop_cnt = (height >> 2); loop_cnt--;) {
+ LD_SB4(src, src_stride, src7, src8, src9, src10);
+ XORI_B4_128_SB(src7, src8, src9, src10);
+ src += (4 * src_stride);
- PCKEV_B_4_XORI128_STORE_8_BYTES_4(out0_r, out1_r, out2_r, out3_r,
- dst, dst_stride);
+ ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r,
+ src87_r, src98_r, src109_r);
+ ILVL_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_l,
+ src87_l, src98_l, src109_l);
+ out0_r = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r, filt0,
+ filt1, filt2, filt3);
+ out1_r = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r, filt0,
+ filt1, filt2, filt3);
+ out2_r = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r, filt0,
+ filt1, filt2, filt3);
+ out3_r = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r, filt0,
+ filt1, filt2, filt3);
+ out0_l = FILT_8TAP_DPADD_S_H(src10_l, src32_l, src54_l, src76_l, filt0,
+ filt1, filt2, filt3);
+ out1_l = FILT_8TAP_DPADD_S_H(src21_l, src43_l, src65_l, src87_l, filt0,
+ filt1, filt2, filt3);
+ out2_l = FILT_8TAP_DPADD_S_H(src32_l, src54_l, src76_l, src98_l, filt0,
+ filt1, filt2, filt3);
+ out3_l = FILT_8TAP_DPADD_S_H(src43_l, src65_l, src87_l, src109_l, filt0,
+ filt1, filt2, filt3);
+ SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, FILTER_BITS);
+ SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, FILTER_BITS);
+ SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+ SAT_SH4_SH(out0_l, out1_l, out2_l, out3_l, 7);
+ PCKEV_B4_UB(out0_l, out0_r, out1_l, out1_r, out2_l, out2_r, out3_l, out3_r,
+ tmp0, tmp1, tmp2, tmp3);
+ XORI_B4_128_UB(tmp0, tmp1, tmp2, tmp3);
+ ST_UB4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride);
dst += (4 * dst_stride);
src10_r = src54_r;
@@ -133,7 +184,12 @@ static void common_vt_8t_8w_msa(const uint8_t *src, int32_t src_stride,
src21_r = src65_r;
src43_r = src87_r;
src65_r = src109_r;
-
+ src10_l = src54_l;
+ src32_l = src76_l;
+ src54_l = src98_l;
+ src21_l = src65_l;
+ src43_l = src87_l;
+ src65_l = src109_l;
src6 = src10;
}
}
@@ -147,89 +203,63 @@ static void common_vt_8t_16w_mult_msa(const uint8_t *src, int32_t src_stride,
uint32_t loop_cnt, cnt;
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
v16i8 filt0, filt1, filt2, filt3;
- v16i8 src10_r, src32_r, src54_r, src76_r, src98_r;
- v16i8 src21_r, src43_r, src65_r, src87_r, src109_r;
- v16i8 src10_l, src32_l, src54_l, src76_l, src98_l;
- v16i8 src21_l, src43_l, src65_l, src87_l, src109_l;
- v8i16 out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l;
- v8i16 filt;
+ v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r;
+ v16i8 src65_r, src87_r, src109_r, src10_l, src32_l, src54_l, src76_l;
+ v16i8 src98_l, src21_l, src43_l, src65_l, src87_l, src109_l;
v16u8 tmp0, tmp1, tmp2, tmp3;
+ v8i16 filt, out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l;
src -= (3 * src_stride);
- filt = LOAD_SH(filter);
- filt0 = (v16i8)__msa_splati_h(filt, 0);
- filt1 = (v16i8)__msa_splati_h(filt, 1);
- filt2 = (v16i8)__msa_splati_h(filt, 2);
- filt3 = (v16i8)__msa_splati_h(filt, 3);
+ filt = LD_SH(filter);
+ SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3);
for (cnt = (width >> 4); cnt--;) {
src_tmp = src;
dst_tmp = dst;
- LOAD_7VECS_SB(src_tmp, src_stride,
- src0, src1, src2, src3, src4, src5, src6);
+ LD_SB7(src_tmp, src_stride, src0, src1, src2, src3, src4, src5, src6);
+ XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6);
src_tmp += (7 * src_stride);
-
- XORI_B_7VECS_SB(src0, src1, src2, src3, src4, src5, src6,
- src0, src1, src2, src3, src4, src5, src6, 128);
-
- ILVR_B_6VECS_SB(src0, src2, src4, src1, src3, src5,
- src1, src3, src5, src2, src4, src6,
- src10_r, src32_r, src54_r, src21_r, src43_r, src65_r);
-
- ILVL_B_6VECS_SB(src0, src2, src4, src1, src3, src5,
- src1, src3, src5, src2, src4, src6,
- src10_l, src32_l, src54_l, src21_l, src43_l, src65_l);
+ ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r,
+ src32_r, src54_r, src21_r);
+ ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r);
+ ILVL_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_l,
+ src32_l, src54_l, src21_l);
+ ILVL_B2_SB(src4, src3, src6, src5, src43_l, src65_l);
for (loop_cnt = (height >> 2); loop_cnt--;) {
- LOAD_4VECS_SB(src_tmp, src_stride, src7, src8, src9, src10);
+ LD_SB4(src_tmp, src_stride, src7, src8, src9, src10);
+ XORI_B4_128_SB(src7, src8, src9, src10);
src_tmp += (4 * src_stride);
-
- XORI_B_4VECS_SB(src7, src8, src9, src10, src7, src8, src9, src10, 128);
-
- ILVR_B_4VECS_SB(src6, src7, src8, src9, src7, src8, src9, src10,
- src76_r, src87_r, src98_r, src109_r);
-
- ILVL_B_4VECS_SB(src6, src7, src8, src9, src7, src8, src9, src10,
- src76_l, src87_l, src98_l, src109_l);
-
- out0_r = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r,
- filt0, filt1, filt2, filt3);
- out1_r = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r,
- filt0, filt1, filt2, filt3);
- out2_r = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r,
- filt0, filt1, filt2, filt3);
- out3_r = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r,
- filt0, filt1, filt2, filt3);
-
- out0_l = FILT_8TAP_DPADD_S_H(src10_l, src32_l, src54_l, src76_l,
- filt0, filt1, filt2, filt3);
- out1_l = FILT_8TAP_DPADD_S_H(src21_l, src43_l, src65_l, src87_l,
- filt0, filt1, filt2, filt3);
- out2_l = FILT_8TAP_DPADD_S_H(src32_l, src54_l, src76_l, src98_l,
- filt0, filt1, filt2, filt3);
- out3_l = FILT_8TAP_DPADD_S_H(src43_l, src65_l, src87_l, src109_l,
- filt0, filt1, filt2, filt3);
-
- out0_r = SRARI_SATURATE_SIGNED_H(out0_r, FILTER_BITS, 7);
- out1_r = SRARI_SATURATE_SIGNED_H(out1_r, FILTER_BITS, 7);
- out2_r = SRARI_SATURATE_SIGNED_H(out2_r, FILTER_BITS, 7);
- out3_r = SRARI_SATURATE_SIGNED_H(out3_r, FILTER_BITS, 7);
- out0_l = SRARI_SATURATE_SIGNED_H(out0_l, FILTER_BITS, 7);
- out1_l = SRARI_SATURATE_SIGNED_H(out1_l, FILTER_BITS, 7);
- out2_l = SRARI_SATURATE_SIGNED_H(out2_l, FILTER_BITS, 7);
- out3_l = SRARI_SATURATE_SIGNED_H(out3_l, FILTER_BITS, 7);
-
- out0_r = (v8i16)__msa_pckev_b((v16i8)out0_l, (v16i8)out0_r);
- out1_r = (v8i16)__msa_pckev_b((v16i8)out1_l, (v16i8)out1_r);
- out2_r = (v8i16)__msa_pckev_b((v16i8)out2_l, (v16i8)out2_r);
- out3_r = (v8i16)__msa_pckev_b((v16i8)out3_l, (v16i8)out3_r);
-
- XORI_B_4VECS_UB(out0_r, out1_r, out2_r, out3_r,
- tmp0, tmp1, tmp2, tmp3, 128);
-
- STORE_4VECS_UB(dst_tmp, dst_stride, tmp0, tmp1, tmp2, tmp3);
+ ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r,
+ src87_r, src98_r, src109_r);
+ ILVL_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_l,
+ src87_l, src98_l, src109_l);
+ out0_r = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r, filt0,
+ filt1, filt2, filt3);
+ out1_r = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r, filt0,
+ filt1, filt2, filt3);
+ out2_r = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r, filt0,
+ filt1, filt2, filt3);
+ out3_r = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r, filt0,
+ filt1, filt2, filt3);
+ out0_l = FILT_8TAP_DPADD_S_H(src10_l, src32_l, src54_l, src76_l, filt0,
+ filt1, filt2, filt3);
+ out1_l = FILT_8TAP_DPADD_S_H(src21_l, src43_l, src65_l, src87_l, filt0,
+ filt1, filt2, filt3);
+ out2_l = FILT_8TAP_DPADD_S_H(src32_l, src54_l, src76_l, src98_l, filt0,
+ filt1, filt2, filt3);
+ out3_l = FILT_8TAP_DPADD_S_H(src43_l, src65_l, src87_l, src109_l, filt0,
+ filt1, filt2, filt3);
+ SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, FILTER_BITS);
+ SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, FILTER_BITS);
+ SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7);
+ SAT_SH4_SH(out0_l, out1_l, out2_l, out3_l, 7);
+ PCKEV_B4_UB(out0_l, out0_r, out1_l, out1_r, out2_l, out2_r, out3_l,
+ out3_r, tmp0, tmp1, tmp2, tmp3);
+ XORI_B4_128_UB(tmp0, tmp1, tmp2, tmp3);
+ ST_UB4(tmp0, tmp1, tmp2, tmp3, dst_tmp, dst_stride);
dst_tmp += (4 * dst_stride);
src10_r = src54_r;
@@ -238,14 +268,12 @@ static void common_vt_8t_16w_mult_msa(const uint8_t *src, int32_t src_stride,
src21_r = src65_r;
src43_r = src87_r;
src65_r = src109_r;
-
src10_l = src54_l;
src32_l = src76_l;
src54_l = src98_l;
src21_l = src65_l;
src43_l = src87_l;
src65_l = src109_l;
-
src6 = src10;
}
@@ -254,134 +282,77 @@ static void common_vt_8t_16w_mult_msa(const uint8_t *src, int32_t src_stride,
}
}
-static void common_vt_8t_16w_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int8_t *filter, int32_t height) {
- common_vt_8t_16w_mult_msa(src, src_stride, dst, dst_stride,
- filter, height, 16);
-}
-
static void common_vt_8t_32w_msa(const uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
int8_t *filter, int32_t height) {
- common_vt_8t_16w_mult_msa(src, src_stride, dst, dst_stride,
- filter, height, 32);
+ common_vt_8t_16w_mult_msa(src, src_stride, dst, dst_stride, filter, height,
+ 32);
}
static void common_vt_8t_64w_msa(const uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
int8_t *filter, int32_t height) {
- common_vt_8t_16w_mult_msa(src, src_stride, dst, dst_stride,
- filter, height, 64);
+ common_vt_8t_16w_mult_msa(src, src_stride, dst, dst_stride, filter, height,
+ 64);
}
static void common_vt_2t_4x4_msa(const uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
int8_t *filter) {
- uint32_t out0, out1, out2, out3;
v16i8 src0, src1, src2, src3, src4;
v16i8 src10_r, src32_r, src21_r, src43_r, src2110, src4332;
- v16i8 filt0;
- v8u16 filt;
+ v16u8 filt0;
+ v8i16 filt;
+ v8u16 tmp0, tmp1;
- filt = LOAD_UH(filter);
- filt0 = (v16i8)__msa_splati_h((v8i16)filt, 0);
+ filt = LD_SH(filter);
+ filt0 = (v16u8)__msa_splati_h(filt, 0);
- LOAD_5VECS_SB(src, src_stride, src0, src1, src2, src3, src4);
+ LD_SB5(src, src_stride, src0, src1, src2, src3, src4);
src += (5 * src_stride);
- ILVR_B_4VECS_SB(src0, src1, src2, src3, src1, src2, src3, src4,
- src10_r, src21_r, src32_r, src43_r);
-
- ILVR_D_2VECS_SB(src2110, src21_r, src10_r, src4332, src43_r, src32_r);
-
- src2110 = (v16i8)__msa_dotp_u_h((v16u8)src2110, (v16u8)filt0);
- src4332 = (v16i8)__msa_dotp_u_h((v16u8)src4332, (v16u8)filt0);
-
- src2110 = (v16i8)SRARI_SATURATE_UNSIGNED_H(src2110, FILTER_BITS, 7);
- src4332 = (v16i8)SRARI_SATURATE_UNSIGNED_H(src4332, FILTER_BITS, 7);
-
- src2110 = (v16i8)__msa_pckev_b((v16i8)src4332, (v16i8)src2110);
-
- out0 = __msa_copy_u_w((v4i32)src2110, 0);
- out1 = __msa_copy_u_w((v4i32)src2110, 1);
- out2 = __msa_copy_u_w((v4i32)src2110, 2);
- out3 = __msa_copy_u_w((v4i32)src2110, 3);
-
- STORE_WORD(dst, out0);
- dst += dst_stride;
- STORE_WORD(dst, out1);
- dst += dst_stride;
- STORE_WORD(dst, out2);
- dst += dst_stride;
- STORE_WORD(dst, out3);
+ ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
+ src32_r, src43_r);
+ ILVR_D2_SB(src21_r, src10_r, src43_r, src32_r, src2110, src4332);
+ DOTP_UB2_UH(src2110, src4332, filt0, filt0, tmp0, tmp1);
+ SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+ SAT_UH2_UH(tmp0, tmp1, 7);
+ src2110 = __msa_pckev_b((v16i8)tmp1, (v16i8)tmp0);
+ ST4x4_UB(src2110, src2110, 0, 1, 2, 3, dst, dst_stride);
}
static void common_vt_2t_4x8_msa(const uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
int8_t *filter) {
- uint32_t out0, out1, out2, out3, out4, out5, out6, out7;
v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r;
v16i8 src65_r, src87_r, src2110, src4332, src6554, src8776;
- v16i8 filt0;
- v8u16 filt;
+ v8u16 tmp0, tmp1, tmp2, tmp3;
+ v16u8 filt0;
+ v8i16 filt;
- filt = LOAD_UH(filter);
- filt0 = (v16i8)__msa_splati_h((v8i16)filt, 0);
+ filt = LD_SH(filter);
+ filt0 = (v16u8)__msa_splati_h(filt, 0);
- LOAD_8VECS_SB(src, src_stride,
- src0, src1, src2, src3, src4, src5, src6, src7);
+ LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
src += (8 * src_stride);
- src8 = LOAD_SB(src);
+ src8 = LD_SB(src);
src += src_stride;
- ILVR_B_8VECS_SB(src0, src1, src2, src3, src4, src5, src6, src7,
- src1, src2, src3, src4, src5, src6, src7, src8,
- src10_r, src21_r, src32_r, src43_r,
- src54_r, src65_r, src76_r, src87_r);
-
- ILVR_D_4VECS_SB(src2110, src21_r, src10_r, src4332, src43_r, src32_r,
- src6554, src65_r, src54_r, src8776, src87_r, src76_r);
-
- src2110 = (v16i8)__msa_dotp_u_h((v16u8)src2110, (v16u8)filt0);
- src4332 = (v16i8)__msa_dotp_u_h((v16u8)src4332, (v16u8)filt0);
- src6554 = (v16i8)__msa_dotp_u_h((v16u8)src6554, (v16u8)filt0);
- src8776 = (v16i8)__msa_dotp_u_h((v16u8)src8776, (v16u8)filt0);
-
- src2110 = (v16i8)SRARI_SATURATE_UNSIGNED_H(src2110, FILTER_BITS, 7);
- src4332 = (v16i8)SRARI_SATURATE_UNSIGNED_H(src4332, FILTER_BITS, 7);
- src6554 = (v16i8)SRARI_SATURATE_UNSIGNED_H(src6554, FILTER_BITS, 7);
- src8776 = (v16i8)SRARI_SATURATE_UNSIGNED_H(src8776, FILTER_BITS, 7);
-
- src2110 = (v16i8)__msa_pckev_b((v16i8)src4332, (v16i8)src2110);
- src4332 = (v16i8)__msa_pckev_b((v16i8)src8776, (v16i8)src6554);
-
- out0 = __msa_copy_u_w((v4i32)src2110, 0);
- out1 = __msa_copy_u_w((v4i32)src2110, 1);
- out2 = __msa_copy_u_w((v4i32)src2110, 2);
- out3 = __msa_copy_u_w((v4i32)src2110, 3);
- out4 = __msa_copy_u_w((v4i32)src4332, 0);
- out5 = __msa_copy_u_w((v4i32)src4332, 1);
- out6 = __msa_copy_u_w((v4i32)src4332, 2);
- out7 = __msa_copy_u_w((v4i32)src4332, 3);
-
- STORE_WORD(dst, out0);
- dst += dst_stride;
- STORE_WORD(dst, out1);
- dst += dst_stride;
- STORE_WORD(dst, out2);
- dst += dst_stride;
- STORE_WORD(dst, out3);
- dst += dst_stride;
- STORE_WORD(dst, out4);
- dst += dst_stride;
- STORE_WORD(dst, out5);
- dst += dst_stride;
- STORE_WORD(dst, out6);
- dst += dst_stride;
- STORE_WORD(dst, out7);
+ ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r,
+ src32_r, src43_r);
+ ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_r, src65_r,
+ src76_r, src87_r);
+ ILVR_D4_SB(src21_r, src10_r, src43_r, src32_r, src65_r, src54_r,
+ src87_r, src76_r, src2110, src4332, src6554, src8776);
+ DOTP_UB4_UH(src2110, src4332, src6554, src8776, filt0, filt0, filt0, filt0,
+ tmp0, tmp1, tmp2, tmp3);
+ SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
+ SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+ PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, src2110, src4332);
+ ST4x4_UB(src2110, src2110, 0, 1, 2, 3, dst, dst_stride);
+ ST4x4_UB(src4332, src4332, 0, 1, 2, 3, dst + 4 * dst_stride, dst_stride);
}
static void common_vt_2t_4w_msa(const uint8_t *src, int32_t src_stride,
@@ -397,32 +368,24 @@ static void common_vt_2t_4w_msa(const uint8_t *src, int32_t src_stride,
static void common_vt_2t_8x4_msa(const uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
int8_t *filter) {
- v16u8 src0, src1, src2, src3, src4;
- v16u8 vec0, vec1, vec2, vec3, filt0;
+ v16u8 src0, src1, src2, src3, src4, vec0, vec1, vec2, vec3, filt0;
+ v16i8 out0, out1;
v8u16 tmp0, tmp1, tmp2, tmp3;
- v8u16 filt;
+ v8i16 filt;
/* rearranging filter_y */
- filt = LOAD_UH(filter);
- filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
-
- LOAD_5VECS_UB(src, src_stride, src0, src1, src2, src3, src4);
-
- ILVR_B_2VECS_UB(src0, src1, src1, src2, vec0, vec1);
- ILVR_B_2VECS_UB(src2, src3, src3, src4, vec2, vec3);
-
- /* filter calc */
- tmp0 = __msa_dotp_u_h(vec0, filt0);
- tmp1 = __msa_dotp_u_h(vec1, filt0);
- tmp2 = __msa_dotp_u_h(vec2, filt0);
- tmp3 = __msa_dotp_u_h(vec3, filt0);
-
- tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
- tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
- tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
- tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
-
- PCKEV_B_STORE_8_BYTES_4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride);
+ filt = LD_SH(filter);
+ filt0 = (v16u8)__msa_splati_h(filt, 0);
+
+ LD_UB5(src, src_stride, src0, src1, src2, src3, src4);
+ ILVR_B2_UB(src1, src0, src2, src1, vec0, vec1);
+ ILVR_B2_UB(src3, src2, src4, src3, vec2, vec3);
+ DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, tmp0, tmp1,
+ tmp2, tmp3);
+ SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
+ SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+ PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+ ST8x4_UB(out0, out1, dst, dst_stride);
}
static void common_vt_2t_8x8mult_msa(const uint8_t *src, int32_t src_stride,
@@ -431,51 +394,39 @@ static void common_vt_2t_8x8mult_msa(const uint8_t *src, int32_t src_stride,
uint32_t loop_cnt;
v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8;
v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
+ v16i8 out0, out1;
v8u16 tmp0, tmp1, tmp2, tmp3;
- v8u16 filt;
+ v8i16 filt;
/* rearranging filter_y */
- filt = LOAD_UH(filter);
- filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+ filt = LD_SH(filter);
+ filt0 = (v16u8)__msa_splati_h(filt, 0);
- src0 = LOAD_UB(src);
+ src0 = LD_UB(src);
src += src_stride;
for (loop_cnt = (height >> 3); loop_cnt--;) {
- LOAD_8VECS_UB(src, src_stride,
- src1, src2, src3, src4, src5, src6, src7, src8);
+ LD_UB8(src, src_stride, src1, src2, src3, src4, src5, src6, src7, src8);
src += (8 * src_stride);
- ILVR_B_4VECS_UB(src0, src1, src2, src3, src1, src2, src3, src4,
- vec0, vec1, vec2, vec3);
-
- ILVR_B_4VECS_UB(src4, src5, src6, src7, src5, src6, src7, src8,
- vec4, vec5, vec6, vec7);
-
- tmp0 = __msa_dotp_u_h(vec0, filt0);
- tmp1 = __msa_dotp_u_h(vec1, filt0);
- tmp2 = __msa_dotp_u_h(vec2, filt0);
- tmp3 = __msa_dotp_u_h(vec3, filt0);
-
- tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
- tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
- tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
- tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
-
- PCKEV_B_STORE_8_BYTES_4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride);
+ ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3, vec0, vec1,
+ vec2, vec3);
+ ILVR_B4_UB(src5, src4, src6, src5, src7, src6, src8, src7, vec4, vec5,
+ vec6, vec7);
+ DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, tmp0, tmp1,
+ tmp2, tmp3);
+ SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
+ SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+ PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+ ST8x4_UB(out0, out1, dst, dst_stride);
dst += (4 * dst_stride);
- tmp0 = __msa_dotp_u_h(vec4, filt0);
- tmp1 = __msa_dotp_u_h(vec5, filt0);
- tmp2 = __msa_dotp_u_h(vec6, filt0);
- tmp3 = __msa_dotp_u_h(vec7, filt0);
-
- tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
- tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
- tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
- tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
-
- PCKEV_B_STORE_8_BYTES_4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride);
+ DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, tmp0, tmp1,
+ tmp2, tmp3);
+ SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS);
+ SAT_UH4_UH(tmp0, tmp1, tmp2, tmp3, 7);
+ PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1);
+ ST8x4_UB(out0, out1, dst, dst_stride);
dst += (4 * dst_stride);
src0 = src8;
@@ -499,57 +450,45 @@ static void common_vt_2t_16w_msa(const uint8_t *src, int32_t src_stride,
v16u8 src0, src1, src2, src3, src4;
v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
v8u16 tmp0, tmp1, tmp2, tmp3;
- v8u16 filt;
+ v8i16 filt;
/* rearranging filter_y */
- filt = LOAD_UH(filter);
- filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+ filt = LD_SH(filter);
+ filt0 = (v16u8)__msa_splati_h(filt, 0);
- src0 = LOAD_UB(src);
+ src0 = LD_UB(src);
src += src_stride;
for (loop_cnt = (height >> 2); loop_cnt--;) {
- LOAD_4VECS_UB(src, src_stride, src1, src2, src3, src4);
+ LD_UB4(src, src_stride, src1, src2, src3, src4);
src += (4 * src_stride);
- ILV_B_LRLR_UB(src0, src1, src1, src2, vec1, vec0, vec3, vec2);
-
- tmp0 = __msa_dotp_u_h(vec0, filt0);
- tmp1 = __msa_dotp_u_h(vec1, filt0);
-
- tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
- tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
-
- PCKEV_B_STORE_VEC(tmp1, tmp0, dst);
+ ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2);
+ ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3);
+ DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
+ SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+ SAT_UH2_UH(tmp0, tmp1, 7);
+ PCKEV_ST_SB(tmp0, tmp1, dst);
dst += dst_stride;
- ILV_B_LRLR_UB(src2, src3, src3, src4, vec5, vec4, vec7, vec6);
-
- tmp2 = __msa_dotp_u_h(vec2, filt0);
- tmp3 = __msa_dotp_u_h(vec3, filt0);
-
- tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
- tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
-
- PCKEV_B_STORE_VEC(tmp3, tmp2, dst);
+ ILVR_B2_UB(src3, src2, src4, src3, vec4, vec6);
+ ILVL_B2_UB(src3, src2, src4, src3, vec5, vec7);
+ DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
+ SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+ SAT_UH2_UH(tmp2, tmp3, 7);
+ PCKEV_ST_SB(tmp2, tmp3, dst);
dst += dst_stride;
- tmp0 = __msa_dotp_u_h(vec4, filt0);
- tmp1 = __msa_dotp_u_h(vec5, filt0);
-
- tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
- tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
-
- PCKEV_B_STORE_VEC(tmp1, tmp0, dst);
+ DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
+ SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+ SAT_UH2_UH(tmp0, tmp1, 7);
+ PCKEV_ST_SB(tmp0, tmp1, dst);
dst += dst_stride;
- tmp2 = __msa_dotp_u_h(vec6, filt0);
- tmp3 = __msa_dotp_u_h(vec7, filt0);
-
- tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
- tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
-
- PCKEV_B_STORE_VEC(tmp3, tmp2, dst);
+ DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
+ SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+ SAT_UH2_UH(tmp2, tmp3, 7);
+ PCKEV_ST_SB(tmp2, tmp3, dst);
dst += dst_stride;
src0 = src4;
@@ -563,93 +502,68 @@ static void common_vt_2t_32w_msa(const uint8_t *src, int32_t src_stride,
v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9;
v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
v8u16 tmp0, tmp1, tmp2, tmp3;
- v8u16 filt;
+ v8i16 filt;
/* rearranging filter_y */
- filt = LOAD_UH(filter);
- filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+ filt = LD_SH(filter);
+ filt0 = (v16u8)__msa_splati_h(filt, 0);
- src0 = LOAD_UB(src);
- src5 = LOAD_UB(src + 16);
+ src0 = LD_UB(src);
+ src5 = LD_UB(src + 16);
src += src_stride;
for (loop_cnt = (height >> 2); loop_cnt--;) {
- LOAD_4VECS_UB(src, src_stride, src1, src2, src3, src4);
+ LD_UB4(src, src_stride, src1, src2, src3, src4);
+ ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2);
+ ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3);
- ILV_B_LRLR_UB(src0, src1, src1, src2, vec1, vec0, vec3, vec2);
-
- LOAD_4VECS_UB(src + 16, src_stride, src6, src7, src8, src9);
+ LD_UB4(src + 16, src_stride, src6, src7, src8, src9);
src += (4 * src_stride);
- tmp0 = __msa_dotp_u_h(vec0, filt0);
- tmp1 = __msa_dotp_u_h(vec1, filt0);
-
- tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
- tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
-
- PCKEV_B_STORE_VEC(tmp1, tmp0, dst);
-
- tmp2 = __msa_dotp_u_h(vec2, filt0);
- tmp3 = __msa_dotp_u_h(vec3, filt0);
-
- tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
- tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
-
- PCKEV_B_STORE_VEC(tmp3, tmp2, dst + dst_stride);
-
- ILV_B_LRLR_UB(src2, src3, src3, src4, vec5, vec4, vec7, vec6);
-
- tmp0 = __msa_dotp_u_h(vec4, filt0);
- tmp1 = __msa_dotp_u_h(vec5, filt0);
-
- tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
- tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
-
- PCKEV_B_STORE_VEC(tmp1, tmp0, dst + 2 * dst_stride);
-
- tmp2 = __msa_dotp_u_h(vec6, filt0);
- tmp3 = __msa_dotp_u_h(vec7, filt0);
-
- tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
- tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
-
- PCKEV_B_STORE_VEC(tmp3, tmp2, dst + 3 * dst_stride);
-
- ILV_B_LRLR_UB(src5, src6, src6, src7, vec1, vec0, vec3, vec2);
-
- tmp0 = __msa_dotp_u_h(vec0, filt0);
- tmp1 = __msa_dotp_u_h(vec1, filt0);
-
- tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
- tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
-
- PCKEV_B_STORE_VEC(tmp1, tmp0, dst + 16);
-
- tmp2 = __msa_dotp_u_h(vec2, filt0);
- tmp3 = __msa_dotp_u_h(vec3, filt0);
-
- tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
- tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
-
- PCKEV_B_STORE_VEC(tmp3, tmp2, dst + 16 + dst_stride);
-
- ILV_B_LRLR_UB(src7, src8, src8, src9, vec5, vec4, vec7, vec6);
-
- tmp0 = __msa_dotp_u_h(vec4, filt0);
- tmp1 = __msa_dotp_u_h(vec5, filt0);
-
- tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
- tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
-
- PCKEV_B_STORE_VEC(tmp1, tmp0, dst + 16 + 2 * dst_stride);
-
- tmp2 = __msa_dotp_u_h(vec6, filt0);
- tmp3 = __msa_dotp_u_h(vec7, filt0);
-
- tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
- tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
-
- PCKEV_B_STORE_VEC(tmp3, tmp2, dst + 16 + 3 * dst_stride);
+ DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
+ SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+ SAT_UH2_UH(tmp0, tmp1, 7);
+ PCKEV_ST_SB(tmp0, tmp1, dst);
+ DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
+ SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+ SAT_UH2_UH(tmp2, tmp3, 7);
+ PCKEV_ST_SB(tmp2, tmp3, dst + dst_stride);
+
+ ILVR_B2_UB(src3, src2, src4, src3, vec4, vec6);
+ ILVL_B2_UB(src3, src2, src4, src3, vec5, vec7);
+ DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
+ SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+ SAT_UH2_UH(tmp0, tmp1, 7);
+ PCKEV_ST_SB(tmp0, tmp1, dst + 2 * dst_stride);
+
+ DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
+ SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+ SAT_UH2_UH(tmp2, tmp3, 7);
+ PCKEV_ST_SB(tmp2, tmp3, dst + 3 * dst_stride);
+
+ ILVR_B2_UB(src6, src5, src7, src6, vec0, vec2);
+ ILVL_B2_UB(src6, src5, src7, src6, vec1, vec3);
+ DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
+ SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+ SAT_UH2_UH(tmp0, tmp1, 7);
+ PCKEV_ST_SB(tmp0, tmp1, dst + 16);
+
+ DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
+ SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+ SAT_UH2_UH(tmp2, tmp3, 7);
+ PCKEV_ST_SB(tmp2, tmp3, dst + 16 + dst_stride);
+
+ ILVR_B2_UB(src8, src7, src9, src8, vec4, vec6);
+ ILVL_B2_UB(src8, src7, src9, src8, vec5, vec7);
+ DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1);
+ SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+ SAT_UH2_UH(tmp0, tmp1, 7);
+ PCKEV_ST_SB(tmp0, tmp1, dst + 16 + 2 * dst_stride);
+
+ DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3);
+ SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+ SAT_UH2_UH(tmp2, tmp3, 7);
+ PCKEV_ST_SB(tmp2, tmp3, dst + 16 + 3 * dst_stride);
dst += (4 * dst_stride);
src0 = src4;
@@ -661,97 +575,72 @@ static void common_vt_2t_64w_msa(const uint8_t *src, int32_t src_stride,
uint8_t *dst, int32_t dst_stride,
int8_t *filter, int32_t height) {
uint32_t loop_cnt;
- v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
- v16u8 src8, src9, src10, src11;
- v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
+ v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10;
+ v16u8 src11, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0;
v8u16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
- v8u16 filt;
+ v8i16 filt;
/* rearranging filter_y */
- filt = LOAD_UH(filter);
- filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0);
+ filt = LD_SH(filter);
+ filt0 = (v16u8)__msa_splati_h(filt, 0);
- LOAD_4VECS_UB(src, 16, src0, src3, src6, src9);
+ LD_UB4(src, 16, src0, src3, src6, src9);
src += src_stride;
for (loop_cnt = (height >> 1); loop_cnt--;) {
- LOAD_2VECS_UB(src, src_stride, src1, src2);
- LOAD_2VECS_UB(src + 16, src_stride, src4, src5);
- LOAD_2VECS_UB(src + 32, src_stride, src7, src8);
- LOAD_2VECS_UB(src + 48, src_stride, src10, src11);
+ LD_UB2(src, src_stride, src1, src2);
+ LD_UB2(src + 16, src_stride, src4, src5);
+ LD_UB2(src + 32, src_stride, src7, src8);
+ LD_UB2(src + 48, src_stride, src10, src11);
src += (2 * src_stride);
- ILV_B_LRLR_UB(src0, src1, src1, src2, vec1, vec0, vec3, vec2);
-
- tmp0 = __msa_dotp_u_h(vec0, filt0);
- tmp1 = __msa_dotp_u_h(vec1, filt0);
-
- tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
- tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
-
- PCKEV_B_STORE_VEC(tmp1, tmp0, dst);
-
- tmp2 = __msa_dotp_u_h(vec2, filt0);
- tmp3 = __msa_dotp_u_h(vec3, filt0);
-
- tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
- tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
-
- PCKEV_B_STORE_VEC(tmp3, tmp2, dst + dst_stride);
-
- ILV_B_LRLR_UB(src3, src4, src4, src5, vec5, vec4, vec7, vec6);
-
- tmp4 = __msa_dotp_u_h(vec4, filt0);
- tmp5 = __msa_dotp_u_h(vec5, filt0);
-
- tmp4 = SRARI_SATURATE_UNSIGNED_H(tmp4, FILTER_BITS, 7);
- tmp5 = SRARI_SATURATE_UNSIGNED_H(tmp5, FILTER_BITS, 7);
-
- PCKEV_B_STORE_VEC(tmp5, tmp4, dst + 16);
-
- tmp6 = __msa_dotp_u_h(vec6, filt0);
- tmp7 = __msa_dotp_u_h(vec7, filt0);
-
- tmp6 = SRARI_SATURATE_UNSIGNED_H(tmp6, FILTER_BITS, 7);
- tmp7 = SRARI_SATURATE_UNSIGNED_H(tmp7, FILTER_BITS, 7);
-
- PCKEV_B_STORE_VEC(tmp7, tmp6, dst + 16 + dst_stride);
-
- ILV_B_LRLR_UB(src6, src7, src7, src8, vec1, vec0, vec3, vec2);
-
- tmp0 = __msa_dotp_u_h(vec0, filt0);
- tmp1 = __msa_dotp_u_h(vec1, filt0);
-
- tmp0 = SRARI_SATURATE_UNSIGNED_H(tmp0, FILTER_BITS, 7);
- tmp1 = SRARI_SATURATE_UNSIGNED_H(tmp1, FILTER_BITS, 7);
-
- PCKEV_B_STORE_VEC(tmp1, tmp0, dst + 32);
-
- tmp2 = __msa_dotp_u_h(vec2, filt0);
- tmp3 = __msa_dotp_u_h(vec3, filt0);
-
- tmp2 = SRARI_SATURATE_UNSIGNED_H(tmp2, FILTER_BITS, 7);
- tmp3 = SRARI_SATURATE_UNSIGNED_H(tmp3, FILTER_BITS, 7);
-
- PCKEV_B_STORE_VEC(tmp3, tmp2, dst + 32 + dst_stride);
-
- ILV_B_LRLR_UB(src9, src10, src10, src11, vec5, vec4, vec7, vec6);
-
- tmp4 = __msa_dotp_u_h(vec4, filt0);
- tmp5 = __msa_dotp_u_h(vec5, filt0);
-
- tmp4 = SRARI_SATURATE_UNSIGNED_H(tmp4, FILTER_BITS, 7);
- tmp5 = SRARI_SATURATE_UNSIGNED_H(tmp5, FILTER_BITS, 7);
-
- PCKEV_B_STORE_VEC(tmp5, tmp4, dst + 48);
-
- tmp6 = __msa_dotp_u_h(vec6, filt0);
- tmp7 = __msa_dotp_u_h(vec7, filt0);
-
- tmp6 = SRARI_SATURATE_UNSIGNED_H(tmp6, FILTER_BITS, 7);
- tmp7 = SRARI_SATURATE_UNSIGNED_H(tmp7, FILTER_BITS, 7);
-
- PCKEV_B_STORE_VEC(tmp7, tmp6, dst + 48 + dst_stride);
+ ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2);
+ ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3);
+ DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
+ SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+ SAT_UH2_UH(tmp0, tmp1, 7);
+ PCKEV_ST_SB(tmp0, tmp1, dst);
+
+ DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
+ SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+ SAT_UH2_UH(tmp2, tmp3, 7);
+ PCKEV_ST_SB(tmp2, tmp3, dst + dst_stride);
+
+ ILVR_B2_UB(src4, src3, src5, src4, vec4, vec6);
+ ILVL_B2_UB(src4, src3, src5, src4, vec5, vec7);
+ DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp4, tmp5);
+ SRARI_H2_UH(tmp4, tmp5, FILTER_BITS);
+ SAT_UH2_UH(tmp4, tmp5, 7);
+ PCKEV_ST_SB(tmp4, tmp5, dst + 16);
+
+ DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp6, tmp7);
+ SRARI_H2_UH(tmp6, tmp7, FILTER_BITS);
+ SAT_UH2_UH(tmp6, tmp7, 7);
+ PCKEV_ST_SB(tmp6, tmp7, dst + 16 + dst_stride);
+
+ ILVR_B2_UB(src7, src6, src8, src7, vec0, vec2);
+ ILVL_B2_UB(src7, src6, src8, src7, vec1, vec3);
+ DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1);
+ SRARI_H2_UH(tmp0, tmp1, FILTER_BITS);
+ SAT_UH2_UH(tmp0, tmp1, 7);
+ PCKEV_ST_SB(tmp0, tmp1, dst + 32);
+
+ DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3);
+ SRARI_H2_UH(tmp2, tmp3, FILTER_BITS);
+ SAT_UH2_UH(tmp2, tmp3, 7);
+ PCKEV_ST_SB(tmp2, tmp3, dst + 32 + dst_stride);
+
+ ILVR_B2_UB(src10, src9, src11, src10, vec4, vec6);
+ ILVL_B2_UB(src10, src9, src11, src10, vec5, vec7);
+ DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp4, tmp5);
+ SRARI_H2_UH(tmp4, tmp5, FILTER_BITS);
+ SAT_UH2_UH(tmp4, tmp5, 7);
+ PCKEV_ST_SB(tmp4, tmp5, dst + 48);
+
+ DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp6, tmp7);
+ SRARI_H2_UH(tmp6, tmp7, FILTER_BITS);
+ SAT_UH2_UH(tmp6, tmp7, 7);
+ PCKEV_ST_SB(tmp6, tmp7, dst + 48 + dst_stride);
dst += (2 * dst_stride);
src0 = src2;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve_avg_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve_avg_msa.c
index 72b8ab71397..eb8776078b2 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve_avg_msa.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve_avg_msa.c
@@ -19,46 +19,35 @@ static void avg_width4_msa(const uint8_t *src, int32_t src_stride,
if (0 == (height % 4)) {
for (cnt = (height / 4); cnt--;) {
- LOAD_4VECS_UB(src, src_stride, src0, src1, src2, src3);
+ LD_UB4(src, src_stride, src0, src1, src2, src3);
src += (4 * src_stride);
- LOAD_4VECS_UB(dst, dst_stride, dst0, dst1, dst2, dst3);
+ LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
- dst0 = __msa_aver_u_b(src0, dst0);
- dst1 = __msa_aver_u_b(src1, dst1);
- dst2 = __msa_aver_u_b(src2, dst2);
- dst3 = __msa_aver_u_b(src3, dst3);
+ AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3,
+ dst0, dst1, dst2, dst3);
out0 = __msa_copy_u_w((v4i32)dst0, 0);
out1 = __msa_copy_u_w((v4i32)dst1, 0);
out2 = __msa_copy_u_w((v4i32)dst2, 0);
out3 = __msa_copy_u_w((v4i32)dst3, 0);
-
- STORE_WORD(dst, out0);
- dst += dst_stride;
- STORE_WORD(dst, out1);
- dst += dst_stride;
- STORE_WORD(dst, out2);
- dst += dst_stride;
- STORE_WORD(dst, out3);
- dst += dst_stride;
+ SW4(out0, out1, out2, out3, dst, dst_stride);
+ dst += (4 * dst_stride);
}
} else if (0 == (height % 2)) {
for (cnt = (height / 2); cnt--;) {
- LOAD_2VECS_UB(src, src_stride, src0, src1);
+ LD_UB2(src, src_stride, src0, src1);
src += (2 * src_stride);
- LOAD_2VECS_UB(dst, dst_stride, dst0, dst1);
+ LD_UB2(dst, dst_stride, dst0, dst1);
- dst0 = __msa_aver_u_b(src0, dst0);
- dst1 = __msa_aver_u_b(src1, dst1);
+ AVER_UB2_UB(src0, dst0, src1, dst1, dst0, dst1);
out0 = __msa_copy_u_w((v4i32)dst0, 0);
out1 = __msa_copy_u_w((v4i32)dst1, 0);
-
- STORE_WORD(dst, out0);
+ SW(out0, dst);
dst += dst_stride;
- STORE_WORD(dst, out1);
+ SW(out1, dst);
dst += dst_stride;
}
}
@@ -72,29 +61,19 @@ static void avg_width8_msa(const uint8_t *src, int32_t src_stride,
v16u8 dst0, dst1, dst2, dst3;
for (cnt = (height / 4); cnt--;) {
- LOAD_4VECS_UB(src, src_stride, src0, src1, src2, src3);
+ LD_UB4(src, src_stride, src0, src1, src2, src3);
src += (4 * src_stride);
+ LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
- LOAD_4VECS_UB(dst, dst_stride, dst0, dst1, dst2, dst3);
-
- dst0 = __msa_aver_u_b(src0, dst0);
- dst1 = __msa_aver_u_b(src1, dst1);
- dst2 = __msa_aver_u_b(src2, dst2);
- dst3 = __msa_aver_u_b(src3, dst3);
+ AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3,
+ dst0, dst1, dst2, dst3);
out0 = __msa_copy_u_d((v2i64)dst0, 0);
out1 = __msa_copy_u_d((v2i64)dst1, 0);
out2 = __msa_copy_u_d((v2i64)dst2, 0);
out3 = __msa_copy_u_d((v2i64)dst3, 0);
-
- STORE_DWORD(dst, out0);
- dst += dst_stride;
- STORE_DWORD(dst, out1);
- dst += dst_stride;
- STORE_DWORD(dst, out2);
- dst += dst_stride;
- STORE_DWORD(dst, out3);
- dst += dst_stride;
+ SD4(out0, out1, out2, out3, dst, dst_stride);
+ dst += (4 * dst_stride);
}
}
@@ -105,24 +84,15 @@ static void avg_width16_msa(const uint8_t *src, int32_t src_stride,
v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
for (cnt = (height / 8); cnt--;) {
- LOAD_8VECS_UB(src, src_stride,
- src0, src1, src2, src3, src4, src5, src6, src7);
+ LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
src += (8 * src_stride);
+ LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
- LOAD_8VECS_UB(dst, dst_stride,
- dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
-
- dst0 = __msa_aver_u_b(src0, dst0);
- dst1 = __msa_aver_u_b(src1, dst1);
- dst2 = __msa_aver_u_b(src2, dst2);
- dst3 = __msa_aver_u_b(src3, dst3);
- dst4 = __msa_aver_u_b(src4, dst4);
- dst5 = __msa_aver_u_b(src5, dst5);
- dst6 = __msa_aver_u_b(src6, dst6);
- dst7 = __msa_aver_u_b(src7, dst7);
-
- STORE_8VECS_UB(dst, dst_stride,
- dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7);
+ AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3,
+ dst0, dst1, dst2, dst3);
+ AVER_UB4_UB(src4, dst4, src5, dst5, src6, dst6, src7, dst7,
+ dst4, dst5, dst6, dst7);
+ ST_UB8(dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst, dst_stride);
dst += (8 * dst_stride);
}
}
@@ -137,99 +107,34 @@ static void avg_width32_msa(const uint8_t *src, int32_t src_stride,
v16u8 dst8, dst9, dst10, dst11, dst12, dst13, dst14, dst15;
for (cnt = (height / 8); cnt--;) {
- src0 = LOAD_UB(src);
- src1 = LOAD_UB(src + 16);
- src += src_stride;
- src2 = LOAD_UB(src);
- src3 = LOAD_UB(src + 16);
- src += src_stride;
- src4 = LOAD_UB(src);
- src5 = LOAD_UB(src + 16);
- src += src_stride;
- src6 = LOAD_UB(src);
- src7 = LOAD_UB(src + 16);
- src += src_stride;
-
- dst0 = LOAD_UB(dst_dup);
- dst1 = LOAD_UB(dst_dup + 16);
- dst_dup += dst_stride;
- dst2 = LOAD_UB(dst_dup);
- dst3 = LOAD_UB(dst_dup + 16);
- dst_dup += dst_stride;
- dst4 = LOAD_UB(dst_dup);
- dst5 = LOAD_UB(dst_dup + 16);
- dst_dup += dst_stride;
- dst6 = LOAD_UB(dst_dup);
- dst7 = LOAD_UB(dst_dup + 16);
- dst_dup += dst_stride;
-
- src8 = LOAD_UB(src);
- src9 = LOAD_UB(src + 16);
- src += src_stride;
- src10 = LOAD_UB(src);
- src11 = LOAD_UB(src + 16);
- src += src_stride;
- src12 = LOAD_UB(src);
- src13 = LOAD_UB(src + 16);
- src += src_stride;
- src14 = LOAD_UB(src);
- src15 = LOAD_UB(src + 16);
- src += src_stride;
-
- dst8 = LOAD_UB(dst_dup);
- dst9 = LOAD_UB(dst_dup + 16);
- dst_dup += dst_stride;
- dst10 = LOAD_UB(dst_dup);
- dst11 = LOAD_UB(dst_dup + 16);
- dst_dup += dst_stride;
- dst12 = LOAD_UB(dst_dup);
- dst13 = LOAD_UB(dst_dup + 16);
- dst_dup += dst_stride;
- dst14 = LOAD_UB(dst_dup);
- dst15 = LOAD_UB(dst_dup + 16);
- dst_dup += dst_stride;
-
- dst0 = __msa_aver_u_b(src0, dst0);
- dst1 = __msa_aver_u_b(src1, dst1);
- dst2 = __msa_aver_u_b(src2, dst2);
- dst3 = __msa_aver_u_b(src3, dst3);
- dst4 = __msa_aver_u_b(src4, dst4);
- dst5 = __msa_aver_u_b(src5, dst5);
- dst6 = __msa_aver_u_b(src6, dst6);
- dst7 = __msa_aver_u_b(src7, dst7);
- dst8 = __msa_aver_u_b(src8, dst8);
- dst9 = __msa_aver_u_b(src9, dst9);
- dst10 = __msa_aver_u_b(src10, dst10);
- dst11 = __msa_aver_u_b(src11, dst11);
- dst12 = __msa_aver_u_b(src12, dst12);
- dst13 = __msa_aver_u_b(src13, dst13);
- dst14 = __msa_aver_u_b(src14, dst14);
- dst15 = __msa_aver_u_b(src15, dst15);
-
- STORE_UB(dst0, dst);
- STORE_UB(dst1, dst + 16);
- dst += dst_stride;
- STORE_UB(dst2, dst);
- STORE_UB(dst3, dst + 16);
- dst += dst_stride;
- STORE_UB(dst4, dst);
- STORE_UB(dst5, dst + 16);
- dst += dst_stride;
- STORE_UB(dst6, dst);
- STORE_UB(dst7, dst + 16);
- dst += dst_stride;
- STORE_UB(dst8, dst);
- STORE_UB(dst9, dst + 16);
- dst += dst_stride;
- STORE_UB(dst10, dst);
- STORE_UB(dst11, dst + 16);
- dst += dst_stride;
- STORE_UB(dst12, dst);
- STORE_UB(dst13, dst + 16);
- dst += dst_stride;
- STORE_UB(dst14, dst);
- STORE_UB(dst15, dst + 16);
- dst += dst_stride;
+ LD_UB4(src, src_stride, src0, src2, src4, src6);
+ LD_UB4(src + 16, src_stride, src1, src3, src5, src7);
+ src += (4 * src_stride);
+ LD_UB4(dst_dup, dst_stride, dst0, dst2, dst4, dst6);
+ LD_UB4(dst_dup + 16, dst_stride, dst1, dst3, dst5, dst7);
+ dst_dup += (4 * dst_stride);
+ LD_UB4(src, src_stride, src8, src10, src12, src14);
+ LD_UB4(src + 16, src_stride, src9, src11, src13, src15);
+ src += (4 * src_stride);
+ LD_UB4(dst_dup, dst_stride, dst8, dst10, dst12, dst14);
+ LD_UB4(dst_dup + 16, dst_stride, dst9, dst11, dst13, dst15);
+ dst_dup += (4 * dst_stride);
+
+ AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3,
+ dst0, dst1, dst2, dst3);
+ AVER_UB4_UB(src4, dst4, src5, dst5, src6, dst6, src7, dst7,
+ dst4, dst5, dst6, dst7);
+ AVER_UB4_UB(src8, dst8, src9, dst9, src10, dst10, src11, dst11,
+ dst8, dst9, dst10, dst11);
+ AVER_UB4_UB(src12, dst12, src13, dst13, src14, dst14, src15, dst15,
+ dst12, dst13, dst14, dst15);
+
+ ST_UB4(dst0, dst2, dst4, dst6, dst, dst_stride);
+ ST_UB4(dst1, dst3, dst5, dst7, dst + 16, dst_stride);
+ dst += (4 * dst_stride);
+ ST_UB4(dst8, dst10, dst12, dst14, dst, dst_stride);
+ ST_UB4(dst9, dst11, dst13, dst15, dst + 16, dst_stride);
+ dst += (4 * dst_stride);
}
}
@@ -243,48 +148,40 @@ static void avg_width64_msa(const uint8_t *src, int32_t src_stride,
v16u8 dst8, dst9, dst10, dst11, dst12, dst13, dst14, dst15;
for (cnt = (height / 4); cnt--;) {
- LOAD_4VECS_UB(src, 16, src0, src1, src2, src3);
+ LD_UB4(src, 16, src0, src1, src2, src3);
src += src_stride;
- LOAD_4VECS_UB(src, 16, src4, src5, src6, src7);
+ LD_UB4(src, 16, src4, src5, src6, src7);
src += src_stride;
- LOAD_4VECS_UB(src, 16, src8, src9, src10, src11);
+ LD_UB4(src, 16, src8, src9, src10, src11);
src += src_stride;
- LOAD_4VECS_UB(src, 16, src12, src13, src14, src15);
+ LD_UB4(src, 16, src12, src13, src14, src15);
src += src_stride;
- LOAD_4VECS_UB(dst_dup, 16, dst0, dst1, dst2, dst3);
+ LD_UB4(dst_dup, 16, dst0, dst1, dst2, dst3);
dst_dup += dst_stride;
- LOAD_4VECS_UB(dst_dup, 16, dst4, dst5, dst6, dst7);
+ LD_UB4(dst_dup, 16, dst4, dst5, dst6, dst7);
dst_dup += dst_stride;
- LOAD_4VECS_UB(dst_dup, 16, dst8, dst9, dst10, dst11);
+ LD_UB4(dst_dup, 16, dst8, dst9, dst10, dst11);
dst_dup += dst_stride;
- LOAD_4VECS_UB(dst_dup, 16, dst12, dst13, dst14, dst15);
+ LD_UB4(dst_dup, 16, dst12, dst13, dst14, dst15);
dst_dup += dst_stride;
- dst0 = __msa_aver_u_b(src0, dst0);
- dst1 = __msa_aver_u_b(src1, dst1);
- dst2 = __msa_aver_u_b(src2, dst2);
- dst3 = __msa_aver_u_b(src3, dst3);
- dst4 = __msa_aver_u_b(src4, dst4);
- dst5 = __msa_aver_u_b(src5, dst5);
- dst6 = __msa_aver_u_b(src6, dst6);
- dst7 = __msa_aver_u_b(src7, dst7);
- dst8 = __msa_aver_u_b(src8, dst8);
- dst9 = __msa_aver_u_b(src9, dst9);
- dst10 = __msa_aver_u_b(src10, dst10);
- dst11 = __msa_aver_u_b(src11, dst11);
- dst12 = __msa_aver_u_b(src12, dst12);
- dst13 = __msa_aver_u_b(src13, dst13);
- dst14 = __msa_aver_u_b(src14, dst14);
- dst15 = __msa_aver_u_b(src15, dst15);
+ AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3,
+ dst0, dst1, dst2, dst3);
+ AVER_UB4_UB(src4, dst4, src5, dst5, src6, dst6, src7, dst7,
+ dst4, dst5, dst6, dst7);
+ AVER_UB4_UB(src8, dst8, src9, dst9, src10, dst10, src11, dst11,
+ dst8, dst9, dst10, dst11);
+ AVER_UB4_UB(src12, dst12, src13, dst13, src14, dst14, src15, dst15,
+ dst12, dst13, dst14, dst15);
- STORE_4VECS_UB(dst, 16, dst0, dst1, dst2, dst3);
+ ST_UB4(dst0, dst1, dst2, dst3, dst, 16);
dst += dst_stride;
- STORE_4VECS_UB(dst, 16, dst4, dst5, dst6, dst7);
+ ST_UB4(dst4, dst5, dst6, dst7, dst, 16);
dst += dst_stride;
- STORE_4VECS_UB(dst, 16, dst8, dst9, dst10, dst11);
+ ST_UB4(dst8, dst9, dst10, dst11, dst, 16);
dst += dst_stride;
- STORE_4VECS_UB(dst, 16, dst12, dst13, dst14, dst15);
+ ST_UB4(dst12, dst13, dst14, dst15, dst, 16);
dst += dst_stride;
}
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve_copy_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve_copy_msa.c
index 064ba762fa0..7a292c5ce16 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve_copy_msa.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve_copy_msa.c
@@ -12,16 +12,14 @@
#include "vp9/common/mips/msa/vp9_macros_msa.h"
static void copy_width8_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int32_t height) {
+ uint8_t *dst, int32_t dst_stride, int32_t height) {
int32_t cnt;
uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
if (0 == height % 12) {
for (cnt = (height / 12); cnt--;) {
- LOAD_8VECS_UB(src, src_stride,
- src0, src1, src2, src3, src4, src5, src6, src7);
+ LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
src += (8 * src_stride);
out0 = __msa_copy_u_d((v2i64)src0, 0);
@@ -33,44 +31,24 @@ static void copy_width8_msa(const uint8_t *src, int32_t src_stride,
out6 = __msa_copy_u_d((v2i64)src6, 0);
out7 = __msa_copy_u_d((v2i64)src7, 0);
- STORE_DWORD(dst, out0);
- dst += dst_stride;
- STORE_DWORD(dst, out1);
- dst += dst_stride;
- STORE_DWORD(dst, out2);
- dst += dst_stride;
- STORE_DWORD(dst, out3);
- dst += dst_stride;
- STORE_DWORD(dst, out4);
- dst += dst_stride;
- STORE_DWORD(dst, out5);
- dst += dst_stride;
- STORE_DWORD(dst, out6);
- dst += dst_stride;
- STORE_DWORD(dst, out7);
- dst += dst_stride;
+ SD4(out0, out1, out2, out3, dst, dst_stride);
+ dst += (4 * dst_stride);
+ SD4(out4, out5, out6, out7, dst, dst_stride);
+ dst += (4 * dst_stride);
- LOAD_4VECS_UB(src, src_stride, src0, src1, src2, src3);
+ LD_UB4(src, src_stride, src0, src1, src2, src3);
src += (4 * src_stride);
out0 = __msa_copy_u_d((v2i64)src0, 0);
out1 = __msa_copy_u_d((v2i64)src1, 0);
out2 = __msa_copy_u_d((v2i64)src2, 0);
out3 = __msa_copy_u_d((v2i64)src3, 0);
-
- STORE_DWORD(dst, out0);
- dst += dst_stride;
- STORE_DWORD(dst, out1);
- dst += dst_stride;
- STORE_DWORD(dst, out2);
- dst += dst_stride;
- STORE_DWORD(dst, out3);
- dst += dst_stride;
+ SD4(out0, out1, out2, out3, dst, dst_stride);
+ dst += (4 * dst_stride);
}
} else if (0 == height % 8) {
for (cnt = height >> 3; cnt--;) {
- LOAD_8VECS_UB(src, src_stride,
- src0, src1, src2, src3, src4, src5, src6, src7);
+ LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
src += (8 * src_stride);
out0 = __msa_copy_u_d((v2i64)src0, 0);
@@ -82,53 +60,33 @@ static void copy_width8_msa(const uint8_t *src, int32_t src_stride,
out6 = __msa_copy_u_d((v2i64)src6, 0);
out7 = __msa_copy_u_d((v2i64)src7, 0);
- STORE_DWORD(dst, out0);
- dst += dst_stride;
- STORE_DWORD(dst, out1);
- dst += dst_stride;
- STORE_DWORD(dst, out2);
- dst += dst_stride;
- STORE_DWORD(dst, out3);
- dst += dst_stride;
- STORE_DWORD(dst, out4);
- dst += dst_stride;
- STORE_DWORD(dst, out5);
- dst += dst_stride;
- STORE_DWORD(dst, out6);
- dst += dst_stride;
- STORE_DWORD(dst, out7);
- dst += dst_stride;
+ SD4(out0, out1, out2, out3, dst, dst_stride);
+ dst += (4 * dst_stride);
+ SD4(out4, out5, out6, out7, dst, dst_stride);
+ dst += (4 * dst_stride);
}
} else if (0 == height % 4) {
for (cnt = (height / 4); cnt--;) {
- LOAD_4VECS_UB(src, src_stride, src0, src1, src2, src3);
+ LD_UB4(src, src_stride, src0, src1, src2, src3);
src += (4 * src_stride);
-
out0 = __msa_copy_u_d((v2i64)src0, 0);
out1 = __msa_copy_u_d((v2i64)src1, 0);
out2 = __msa_copy_u_d((v2i64)src2, 0);
out3 = __msa_copy_u_d((v2i64)src3, 0);
- STORE_DWORD(dst, out0);
- dst += dst_stride;
- STORE_DWORD(dst, out1);
- dst += dst_stride;
- STORE_DWORD(dst, out2);
- dst += dst_stride;
- STORE_DWORD(dst, out3);
- dst += dst_stride;
+ SD4(out0, out1, out2, out3, dst, dst_stride);
+ dst += (4 * dst_stride);
}
} else if (0 == height % 2) {
for (cnt = (height / 2); cnt--;) {
- LOAD_2VECS_UB(src, src_stride, src0, src1);
+ LD_UB2(src, src_stride, src0, src1);
src += (2 * src_stride);
-
out0 = __msa_copy_u_d((v2i64)src0, 0);
out1 = __msa_copy_u_d((v2i64)src1, 0);
- STORE_DWORD(dst, out0);
+ SD(out0, dst);
dst += dst_stride;
- STORE_DWORD(dst, out1);
+ SD(out1, dst);
dst += dst_stride;
}
}
@@ -147,12 +105,12 @@ static void copy_16multx8mult_msa(const uint8_t *src, int32_t src_stride,
dst_tmp = dst;
for (loop_cnt = (height >> 3); loop_cnt--;) {
- LOAD_8VECS_UB(src_tmp, src_stride,
- src0, src1, src2, src3, src4, src5, src6, src7);
+ LD_UB8(src_tmp, src_stride,
+ src0, src1, src2, src3, src4, src5, src6, src7);
src_tmp += (8 * src_stride);
- STORE_8VECS_UB(dst_tmp, dst_stride,
- src0, src1, src2, src3, src4, src5, src6, src7);
+ ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7,
+ dst_tmp, dst_stride);
dst_tmp += (8 * dst_stride);
}
@@ -162,90 +120,79 @@ static void copy_16multx8mult_msa(const uint8_t *src, int32_t src_stride,
}
static void copy_width16_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int32_t height) {
+ uint8_t *dst, int32_t dst_stride, int32_t height) {
int32_t cnt;
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
if (0 == height % 12) {
for (cnt = (height / 12); cnt--;) {
- LOAD_8VECS_UB(src, src_stride,
- src0, src1, src2, src3, src4, src5, src6, src7);
+ LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
src += (8 * src_stride);
-
- STORE_8VECS_UB(dst, dst_stride,
- src0, src1, src2, src3, src4, src5, src6, src7);
+ ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, dst, dst_stride);
dst += (8 * dst_stride);
- LOAD_4VECS_UB(src, src_stride, src0, src1, src2, src3);
+ LD_UB4(src, src_stride, src0, src1, src2, src3);
src += (4 * src_stride);
-
- STORE_4VECS_UB(dst, dst_stride, src0, src1, src2, src3);
+ ST_UB4(src0, src1, src2, src3, dst, dst_stride);
dst += (4 * dst_stride);
}
} else if (0 == height % 8) {
copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 16);
} else if (0 == height % 4) {
for (cnt = (height >> 2); cnt--;) {
- LOAD_4VECS_UB(src, src_stride, src0, src1, src2, src3);
+ LD_UB4(src, src_stride, src0, src1, src2, src3);
src += (4 * src_stride);
- STORE_4VECS_UB(dst, dst_stride, src0, src1, src2, src3);
+ ST_UB4(src0, src1, src2, src3, dst, dst_stride);
dst += (4 * dst_stride);
}
}
}
static void copy_width32_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int32_t height) {
+ uint8_t *dst, int32_t dst_stride, int32_t height) {
int32_t cnt;
v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
if (0 == height % 12) {
for (cnt = (height / 12); cnt--;) {
- LOAD_4VECS_UB(src, src_stride, src0, src1, src2, src3);
- LOAD_4VECS_UB(src + 16, src_stride, src4, src5, src6, src7);
+ LD_UB4(src, src_stride, src0, src1, src2, src3);
+ LD_UB4(src + 16, src_stride, src4, src5, src6, src7);
src += (4 * src_stride);
-
- STORE_4VECS_UB(dst, dst_stride, src0, src1, src2, src3);
- STORE_4VECS_UB(dst + 16, dst_stride, src4, src5, src6, src7);
+ ST_UB4(src0, src1, src2, src3, dst, dst_stride);
+ ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride);
dst += (4 * dst_stride);
- LOAD_4VECS_UB(src, src_stride, src0, src1, src2, src3);
- LOAD_4VECS_UB(src + 16, src_stride, src4, src5, src6, src7);
+ LD_UB4(src, src_stride, src0, src1, src2, src3);
+ LD_UB4(src + 16, src_stride, src4, src5, src6, src7);
src += (4 * src_stride);
-
- STORE_4VECS_UB(dst, dst_stride, src0, src1, src2, src3);
- STORE_4VECS_UB(dst + 16, dst_stride, src4, src5, src6, src7);
+ ST_UB4(src0, src1, src2, src3, dst, dst_stride);
+ ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride);
dst += (4 * dst_stride);
- LOAD_4VECS_UB(src, src_stride, src0, src1, src2, src3);
- LOAD_4VECS_UB(src + 16, src_stride, src4, src5, src6, src7);
+ LD_UB4(src, src_stride, src0, src1, src2, src3);
+ LD_UB4(src + 16, src_stride, src4, src5, src6, src7);
src += (4 * src_stride);
-
- STORE_4VECS_UB(dst, dst_stride, src0, src1, src2, src3);
- STORE_4VECS_UB(dst + 16, dst_stride, src4, src5, src6, src7);
+ ST_UB4(src0, src1, src2, src3, dst, dst_stride);
+ ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride);
dst += (4 * dst_stride);
}
} else if (0 == height % 8) {
copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 32);
} else if (0 == height % 4) {
for (cnt = (height >> 2); cnt--;) {
- LOAD_4VECS_UB(src, src_stride, src0, src1, src2, src3);
- LOAD_4VECS_UB(src + 16, src_stride, src4, src5, src6, src7);
+ LD_UB4(src, src_stride, src0, src1, src2, src3);
+ LD_UB4(src + 16, src_stride, src4, src5, src6, src7);
src += (4 * src_stride);
-
- STORE_4VECS_UB(dst, dst_stride, src0, src1, src2, src3);
- STORE_4VECS_UB(dst + 16, dst_stride, src4, src5, src6, src7);
+ ST_UB4(src0, src1, src2, src3, dst, dst_stride);
+ ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride);
dst += (4 * dst_stride);
}
}
}
static void copy_width64_msa(const uint8_t *src, int32_t src_stride,
- uint8_t *dst, int32_t dst_stride,
- int32_t height) {
+ uint8_t *dst, int32_t dst_stride, int32_t height) {
copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 64);
}
@@ -264,8 +211,8 @@ void vp9_convolve_copy_msa(const uint8_t *src, ptrdiff_t src_stride,
uint32_t cnt, tmp;
/* 1 word storage */
for (cnt = h; cnt--;) {
- tmp = LOAD_WORD(src);
- STORE_WORD(dst, tmp);
+ tmp = LW(src);
+ SW(tmp, dst);
src += src_stride;
dst += dst_stride;
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve_msa.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve_msa.h
index b109a4014f8..40fe94d3be5 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve_msa.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_convolve_msa.h
@@ -16,142 +16,104 @@
extern const uint8_t mc_filt_mask_arr[16 * 3];
-#define HORIZ_8TAP_FILT(src, mask0, mask1, mask2, mask3, \
- filt_h0, filt_h1, filt_h2, filt_h3) ({ \
- v8i16 vec0, vec1, vec2, vec3, horiz_out; \
- \
- vec0 = (v8i16)__msa_vshf_b((v16i8)(mask0), (v16i8)(src), (v16i8)(src)); \
- vec0 = __msa_dotp_s_h((v16i8)vec0, (v16i8)(filt_h0)); \
- vec1 = (v8i16)__msa_vshf_b((v16i8)(mask1), (v16i8)(src), (v16i8)(src)); \
- vec0 = __msa_dpadd_s_h(vec0, (v16i8)(filt_h1), (v16i8)vec1); \
- vec2 = (v8i16)__msa_vshf_b((v16i8)(mask2), (v16i8)(src), (v16i8)(src)); \
- vec2 = __msa_dotp_s_h((v16i8)vec2, (v16i8)(filt_h2)); \
- vec3 = (v8i16)__msa_vshf_b((v16i8)(mask3), (v16i8)(src), (v16i8)(src)); \
- vec2 = __msa_dpadd_s_h(vec2, (v16i8)(filt_h3), (v16i8)vec3); \
- vec0 = __msa_adds_s_h(vec0, vec2); \
- horiz_out = SRARI_SATURATE_SIGNED_H(vec0, FILTER_BITS, 7); \
- \
- horiz_out; \
+#define FILT_8TAP_DPADD_S_H(vec0, vec1, vec2, vec3, \
+ filt0, filt1, filt2, filt3) ({ \
+ v8i16 tmp0, tmp1; \
+ \
+ tmp0 = __msa_dotp_s_h((v16i8)vec0, (v16i8)filt0); \
+ tmp0 = __msa_dpadd_s_h(tmp0, (v16i8)vec1, (v16i8)filt1); \
+ tmp1 = __msa_dotp_s_h((v16i8)vec2, (v16i8)filt2); \
+ tmp1 = __msa_dpadd_s_h(tmp1, (v16i8)vec3, (v16i8)filt3); \
+ tmp0 = __msa_adds_s_h(tmp0, tmp1); \
+ \
+ tmp0; \
})
-#define HORIZ_8TAP_FILT_2VECS(src0, src1, mask0, mask1, mask2, mask3, \
- filt_h0, filt_h1, filt_h2, filt_h3) ({ \
- v8i16 vec0, vec1, vec2, vec3, horiz_out; \
- \
- vec0 = (v8i16)__msa_vshf_b((v16i8)(mask0), (v16i8)(src1), (v16i8)(src0)); \
- vec0 = __msa_dotp_s_h((v16i8)vec0, (v16i8)(filt_h0)); \
- vec1 = (v8i16)__msa_vshf_b((v16i8)(mask1), (v16i8)(src1), (v16i8)(src0)); \
- vec0 = __msa_dpadd_s_h(vec0, (v16i8)(filt_h1), (v16i8)vec1); \
- vec2 = (v8i16)__msa_vshf_b((v16i8)(mask2), (v16i8)(src1), (v16i8)(src0)); \
- vec2 = __msa_dotp_s_h((v16i8)vec2, (v16i8)(filt_h2)); \
- vec3 = (v8i16)__msa_vshf_b((v16i8)(mask3), (v16i8)(src1), (v16i8)(src0)); \
- vec2 = __msa_dpadd_s_h(vec2, ((v16i8)filt_h3), (v16i8)vec3); \
- vec0 = __msa_adds_s_h(vec0, vec2); \
- horiz_out = (v8i16)SRARI_SATURATE_SIGNED_H(vec0, FILTER_BITS, 7); \
- \
- horiz_out; \
+#define HORIZ_8TAP_FILT(src0, src1, mask0, mask1, mask2, mask3, \
+ filt_h0, filt_h1, filt_h2, filt_h3) ({ \
+ v16i8 vec0_m, vec1_m, vec2_m, vec3_m; \
+ v8i16 hz_out_m; \
+ \
+ VSHF_B4_SB(src0, src1, mask0, mask1, mask2, mask3, \
+ vec0_m, vec1_m, vec2_m, vec3_m); \
+ hz_out_m = FILT_8TAP_DPADD_S_H(vec0_m, vec1_m, vec2_m, vec3_m, \
+ filt_h0, filt_h1, filt_h2, filt_h3); \
+ \
+ hz_out_m = __msa_srari_h(hz_out_m, FILTER_BITS); \
+ hz_out_m = __msa_sat_s_h(hz_out_m, 7); \
+ \
+ hz_out_m; \
})
-#define FILT_8TAP_DPADD_S_H(vec0, vec1, vec2, vec3, \
- filt0, filt1, filt2, filt3) ({ \
- v8i16 tmp0, tmp1; \
- \
- tmp0 = __msa_dotp_s_h((v16i8)(vec0), (v16i8)(filt0)); \
- tmp0 = __msa_dpadd_s_h(tmp0, (v16i8)(vec1), (v16i8)(filt1)); \
- tmp1 = __msa_dotp_s_h((v16i8)(vec2), (v16i8)(filt2)); \
- tmp1 = __msa_dpadd_s_h(tmp1, (v16i8)(vec3), ((v16i8)filt3)); \
- tmp0 = __msa_adds_s_h(tmp0, tmp1); \
- \
- tmp0; \
-})
+#define HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, \
+ mask0, mask1, mask2, mask3, \
+ filt0, filt1, filt2, filt3, \
+ out0, out1) { \
+ v16i8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \
+ v8i16 res0_m, res1_m, res2_m, res3_m; \
+ \
+ VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0_m, vec1_m); \
+ DOTP_SB2_SH(vec0_m, vec1_m, filt0, filt0, res0_m, res1_m); \
+ VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec2_m, vec3_m); \
+ DPADD_SB2_SH(vec2_m, vec3_m, filt1, filt1, res0_m, res1_m); \
+ VSHF_B2_SB(src0, src1, src2, src3, mask2, mask2, vec4_m, vec5_m); \
+ DOTP_SB2_SH(vec4_m, vec5_m, filt2, filt2, res2_m, res3_m); \
+ VSHF_B2_SB(src0, src1, src2, src3, mask3, mask3, vec6_m, vec7_m); \
+ DPADD_SB2_SH(vec6_m, vec7_m, filt3, filt3, res2_m, res3_m); \
+ ADDS_SH2_SH(res0_m, res2_m, res1_m, res3_m, out0, out1); \
+}
+
+#define HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, \
+ mask0, mask1, mask2, mask3, \
+ filt0, filt1, filt2, filt3, \
+ out0, out1, out2, out3) { \
+ v16i8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \
+ v8i16 res0_m, res1_m, res2_m, res3_m, res4_m, res5_m, res6_m, res7_m; \
+ \
+ VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0_m, vec1_m); \
+ VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2_m, vec3_m); \
+ DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt0, filt0, filt0, filt0, \
+ res0_m, res1_m, res2_m, res3_m); \
+ VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec0_m, vec1_m); \
+ VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec2_m, vec3_m); \
+ DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt2, filt2, filt2, filt2, \
+ res4_m, res5_m, res6_m, res7_m); \
+ VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec4_m, vec5_m); \
+ VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec6_m, vec7_m); \
+ DPADD_SB4_SH(vec4_m, vec5_m, vec6_m, vec7_m, filt1, filt1, filt1, filt1, \
+ res0_m, res1_m, res2_m, res3_m); \
+ VSHF_B2_SB(src0, src0, src1, src1, mask3, mask3, vec4_m, vec5_m); \
+ VSHF_B2_SB(src2, src2, src3, src3, mask3, mask3, vec6_m, vec7_m); \
+ DPADD_SB4_SH(vec4_m, vec5_m, vec6_m, vec7_m, filt3, filt3, filt3, filt3, \
+ res4_m, res5_m, res6_m, res7_m); \
+ ADDS_SH4_SH(res0_m, res4_m, res1_m, res5_m, res2_m, res6_m, res3_m, \
+ res7_m, out0, out1, out2, out3); \
+}
+
+#define PCKEV_XORI128_AVG_ST_UB(in0, in1, dst, pdst) { \
+ v16u8 tmp_m; \
+ \
+ tmp_m = PCKEV_XORI128_UB(in1, in0); \
+ tmp_m = __msa_aver_u_b(tmp_m, (v16u8)dst); \
+ ST_UB(tmp_m, (pdst)); \
+}
-#define HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, \
- mask0, mask1, mask2, mask3, \
- filt0, filt1, filt2, filt3, \
- out0, out1) { \
- v8i16 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \
- v8i16 res0_m, res1_m, res2_m, res3_m; \
- \
- vec0_m = (v8i16)__msa_vshf_b((v16i8)(mask0), (v16i8)(src1), (v16i8)(src0)); \
- vec1_m = (v8i16)__msa_vshf_b((v16i8)(mask0), (v16i8)(src3), (v16i8)(src2)); \
- \
- res0_m = __msa_dotp_s_h((v16i8)vec0_m, (v16i8)(filt0)); \
- res1_m = __msa_dotp_s_h((v16i8)vec1_m, (v16i8)(filt0)); \
- \
- vec2_m = (v8i16)__msa_vshf_b((v16i8)(mask1), (v16i8)(src1), (v16i8)(src0)); \
- vec3_m = (v8i16)__msa_vshf_b((v16i8)(mask1), (v16i8)(src3), (v16i8)(src2)); \
- \
- res0_m = __msa_dpadd_s_h(res0_m, (filt1), (v16i8)vec2_m); \
- res1_m = __msa_dpadd_s_h(res1_m, (filt1), (v16i8)vec3_m); \
- \
- vec4_m = (v8i16)__msa_vshf_b((v16i8)(mask2), (v16i8)(src1), (v16i8)(src0)); \
- vec5_m = (v8i16)__msa_vshf_b((v16i8)(mask2), (v16i8)(src3), (v16i8)(src2)); \
- \
- res2_m = __msa_dotp_s_h((v16i8)(filt2), (v16i8)vec4_m); \
- res3_m = __msa_dotp_s_h((v16i8)(filt2), (v16i8)vec5_m); \
- \
- vec6_m = (v8i16)__msa_vshf_b((v16i8)(mask3), (v16i8)(src1), (v16i8)(src0)); \
- vec7_m = (v8i16)__msa_vshf_b((v16i8)(mask3), (v16i8)(src3), (v16i8)(src2)); \
- \
- res2_m = __msa_dpadd_s_h(res2_m, (v16i8)(filt3), (v16i8)vec6_m); \
- res3_m = __msa_dpadd_s_h(res3_m, (v16i8)(filt3), (v16i8)vec7_m); \
- \
- out0 = __msa_adds_s_h(res0_m, res2_m); \
- out1 = __msa_adds_s_h(res1_m, res3_m); \
+#define PCKEV_AVG_ST_UB(in0, in1, dst, pdst) { \
+ v16u8 tmp_m; \
+ \
+ tmp_m = (v16u8)__msa_pckev_b((v16i8)in0, (v16i8)in1); \
+ tmp_m = __msa_aver_u_b(tmp_m, (v16u8)dst); \
+ ST_UB(tmp_m, (pdst)); \
}
-#define HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, \
- mask0, mask1, mask2, mask3, \
- filt0, filt1, filt2, filt3, \
- out0, out1, out2, out3) { \
- v8i16 vec0_m, vec1_m, vec2_m, vec3_m; \
- v8i16 vec4_m, vec5_m, vec6_m, vec7_m; \
- v8i16 res0_m, res1_m, res2_m, res3_m; \
- v8i16 res4_m, res5_m, res6_m, res7_m; \
- \
- vec0_m = (v8i16)__msa_vshf_b((v16i8)(mask0), (v16i8)(src0), (v16i8)(src0)); \
- vec1_m = (v8i16)__msa_vshf_b((v16i8)(mask0), (v16i8)(src1), (v16i8)(src1)); \
- vec2_m = (v8i16)__msa_vshf_b((v16i8)(mask0), (v16i8)(src2), (v16i8)(src2)); \
- vec3_m = (v8i16)__msa_vshf_b((v16i8)(mask0), (v16i8)(src3), (v16i8)(src3)); \
- \
- res0_m = __msa_dotp_s_h((v16i8)vec0_m, (v16i8)(filt0)); \
- res1_m = __msa_dotp_s_h((v16i8)vec1_m, (v16i8)(filt0)); \
- res2_m = __msa_dotp_s_h((v16i8)vec2_m, (v16i8)(filt0)); \
- res3_m = __msa_dotp_s_h((v16i8)vec3_m, (v16i8)(filt0)); \
- \
- vec0_m = (v8i16)__msa_vshf_b((v16i8)(mask2), (v16i8)(src0), (v16i8)(src0)); \
- vec1_m = (v8i16)__msa_vshf_b((v16i8)(mask2), (v16i8)(src1), (v16i8)(src1)); \
- vec2_m = (v8i16)__msa_vshf_b((v16i8)(mask2), (v16i8)(src2), (v16i8)(src2)); \
- vec3_m = (v8i16)__msa_vshf_b((v16i8)(mask2), (v16i8)(src3), (v16i8)(src3)); \
- \
- res4_m = __msa_dotp_s_h((v16i8)vec0_m, (v16i8)(filt2)); \
- res5_m = __msa_dotp_s_h((v16i8)vec1_m, (v16i8)(filt2)); \
- res6_m = __msa_dotp_s_h((v16i8)vec2_m, (v16i8)(filt2)); \
- res7_m = __msa_dotp_s_h((v16i8)vec3_m, (v16i8)(filt2)); \
- \
- vec4_m = (v8i16)__msa_vshf_b((v16i8)(mask1), (v16i8)(src0), (v16i8)(src0)); \
- vec5_m = (v8i16)__msa_vshf_b((v16i8)(mask1), (v16i8)(src1), (v16i8)(src1)); \
- vec6_m = (v8i16)__msa_vshf_b((v16i8)(mask1), (v16i8)(src2), (v16i8)(src2)); \
- vec7_m = (v8i16)__msa_vshf_b((v16i8)(mask1), (v16i8)(src3), (v16i8)(src3)); \
- \
- res0_m = __msa_dpadd_s_h(res0_m, (v16i8)(filt1), (v16i8)vec4_m); \
- res1_m = __msa_dpadd_s_h(res1_m, (v16i8)(filt1), (v16i8)vec5_m); \
- res2_m = __msa_dpadd_s_h(res2_m, (v16i8)(filt1), (v16i8)vec6_m); \
- res3_m = __msa_dpadd_s_h(res3_m, (v16i8)(filt1), (v16i8)vec7_m); \
- \
- vec4_m = (v8i16)__msa_vshf_b((v16i8)(mask3), (v16i8)(src0), (v16i8)(src0)); \
- vec5_m = (v8i16)__msa_vshf_b((v16i8)(mask3), (v16i8)(src1), (v16i8)(src1)); \
- vec6_m = (v8i16)__msa_vshf_b((v16i8)(mask3), (v16i8)(src2), (v16i8)(src2)); \
- vec7_m = (v8i16)__msa_vshf_b((v16i8)(mask3), (v16i8)(src3), (v16i8)(src3)); \
- \
- res4_m = __msa_dpadd_s_h(res4_m, (v16i8)(filt3), (v16i8)vec4_m); \
- res5_m = __msa_dpadd_s_h(res5_m, (v16i8)(filt3), (v16i8)vec5_m); \
- res6_m = __msa_dpadd_s_h(res6_m, (v16i8)(filt3), (v16i8)vec6_m); \
- res7_m = __msa_dpadd_s_h(res7_m, (v16i8)(filt3), (v16i8)vec7_m); \
- \
- out0 = __msa_adds_s_h(res0_m, res4_m); \
- out1 = __msa_adds_s_h(res1_m, res5_m); \
- out2 = __msa_adds_s_h(res2_m, res6_m); \
- out3 = __msa_adds_s_h(res3_m, res7_m); \
+#define PCKEV_AVG_ST8x4_UB(in1, dst0, in2, dst1, in3, dst2, in4, dst3, \
+ pdst, stride) { \
+ v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
+ uint8_t *pdst_m = (uint8_t *)(pdst); \
+ \
+ PCKEV_B2_UB(in2, in1, in4, in3, tmp0_m, tmp1_m); \
+ PCKEV_D2_UB(dst1, dst0, dst3, dst2, tmp2_m, tmp3_m); \
+ AVER_UB2_UB(tmp0_m, tmp2_m, tmp1_m, tmp3_m, tmp0_m, tmp1_m); \
+ ST8x4_UB(tmp0_m, tmp1_m, pdst_m, stride); \
}
#endif /* VP9_COMMON_MIPS_MSA_VP9_CONVOLVE_MSA_H_ */
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct16x16_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct16x16_msa.c
index e5c0eaa32f3..dd7ca35d3bd 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct16x16_msa.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct16x16_msa.c
@@ -9,265 +9,7 @@
*/
#include <assert.h>
-
-#include "vpx_ports/mem.h"
-#include "vp9/common/vp9_idct.h"
-#include "vp9/common/mips/msa/vp9_macros_msa.h"
-
-#define SET_COSPI_PAIR(c0_h, c1_h) ({ \
- v8i16 out0, r0_m, r1_m; \
- \
- r0_m = __msa_fill_h(c0_h); \
- r1_m = __msa_fill_h(c1_h); \
- out0 = __msa_ilvev_h(r1_m, r0_m); \
- \
- out0; \
-})
-
-#define DOTP_CONST_PAIR(reg0, reg1, const0, const1, out0, out1) { \
- v8i16 k0_m = __msa_fill_h(const0); \
- v8i16 s0_m, s1_m, s2_m, s3_m; \
- \
- s0_m = __msa_fill_h(const1); \
- k0_m = __msa_ilvev_h(s0_m, k0_m); \
- \
- s0_m = __msa_ilvl_h(-reg1, reg0); \
- s1_m = __msa_ilvr_h(-reg1, reg0); \
- s2_m = __msa_ilvl_h(reg0, reg1); \
- s3_m = __msa_ilvr_h(reg0, reg1); \
- s1_m = (v8i16)__msa_dotp_s_w(s1_m, k0_m); \
- s0_m = (v8i16)__msa_dotp_s_w(s0_m, k0_m); \
- s1_m = (v8i16)__msa_srari_w((v4i32)s1_m, DCT_CONST_BITS); \
- s0_m = (v8i16)__msa_srari_w((v4i32)s0_m, DCT_CONST_BITS); \
- out0 = __msa_pckev_h(s0_m, s1_m); \
- \
- s1_m = (v8i16)__msa_dotp_s_w(s3_m, k0_m); \
- s0_m = (v8i16)__msa_dotp_s_w(s2_m, k0_m); \
- s1_m = (v8i16)__msa_srari_w((v4i32)s1_m, DCT_CONST_BITS); \
- s0_m = (v8i16)__msa_srari_w((v4i32)s0_m, DCT_CONST_BITS); \
- out1 = __msa_pckev_h(s0_m, s1_m); \
-}
-
-#define VP9_MADD_SHORT(m0, m1, c0, c1, res0, res1) { \
- v4i32 madd0_m, madd1_m, madd2_m, madd3_m; \
- v8i16 madd_s0_m, madd_s1_m; \
- \
- ILV_H_LR_SH(m0, m1, madd_s1_m, madd_s0_m); \
- \
- DOTP_S_W_4VECS_SW(madd_s0_m, c0, madd_s1_m, c0, \
- madd_s0_m, c1, madd_s1_m, c1, \
- madd0_m, madd1_m, madd2_m, madd3_m); \
- \
- SRARI_W_4VECS_SW(madd0_m, madd1_m, madd2_m, madd3_m, \
- madd0_m, madd1_m, madd2_m, madd3_m, \
- DCT_CONST_BITS); \
- \
- PCKEV_H_2VECS_SH(madd1_m, madd0_m, madd3_m, madd2_m, \
- res0, res1); \
-}
-
-#define VP9_MADD_BF(inp0, inp1, inp2, inp3, \
- cst0, cst1, cst2, cst3, \
- out0, out1, out2, out3) { \
- v8i16 madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m; \
- v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
- v4i32 m4_m, m5_m; \
- \
- ILV_H_LRLR_SH(inp0, inp1, inp2, inp3, \
- madd_s1_m, madd_s0_m, madd_s3_m, madd_s2_m); \
- \
- DOTP_S_W_4VECS_SW(madd_s0_m, cst0, madd_s1_m, cst0, \
- madd_s2_m, cst2, madd_s3_m, cst2, \
- tmp0_m, tmp1_m, tmp2_m, tmp3_m); \
- \
- m4_m = tmp0_m + tmp2_m; \
- m5_m = tmp1_m + tmp3_m; \
- tmp3_m = tmp1_m - tmp3_m; \
- tmp2_m = tmp0_m - tmp2_m; \
- \
- SRARI_W_4VECS_SW(m4_m, m5_m, tmp2_m, tmp3_m, \
- m4_m, m5_m, tmp2_m, tmp3_m, \
- DCT_CONST_BITS); \
- \
- PCKEV_H_2VECS_SH(m5_m, m4_m, tmp3_m, tmp2_m, out0, out1); \
- \
- DOTP_S_W_4VECS_SW(madd_s0_m, cst1, madd_s1_m, cst1, \
- madd_s2_m, cst3, madd_s3_m, cst3, \
- tmp0_m, tmp1_m, tmp2_m, tmp3_m); \
- \
- m4_m = tmp0_m + tmp2_m; \
- m5_m = tmp1_m + tmp3_m; \
- tmp3_m = tmp1_m - tmp3_m; \
- tmp2_m = tmp0_m - tmp2_m; \
- \
- SRARI_W_4VECS_SW(m4_m, m5_m, tmp2_m, tmp3_m, \
- m4_m, m5_m, tmp2_m, tmp3_m, \
- DCT_CONST_BITS); \
- \
- PCKEV_H_2VECS_SH(m5_m, m4_m, tmp3_m, tmp2_m, out2, out3); \
-}
-
-#define TRANSPOSE8x8_H1(in0, in1, in2, in3, \
- in4, in5, in6, in7, \
- out0, out1, out2, out3, \
- out4, out5, out6, out7) { \
- v8i16 loc0_m, loc1_m; \
- v8i16 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
- v8i16 tmp4_m, tmp5_m, tmp6_m, tmp7_m; \
- \
- loc0_m = __msa_ilvr_h((in6), (in4)); \
- loc1_m = __msa_ilvr_h((in7), (in5)); \
- tmp0_m = __msa_ilvr_h(loc1_m, loc0_m); \
- tmp1_m = __msa_ilvl_h(loc1_m, loc0_m); \
- \
- loc0_m = __msa_ilvl_h((in6), (in4)); \
- loc1_m = __msa_ilvl_h((in7), (in5)); \
- tmp2_m = __msa_ilvr_h(loc1_m, loc0_m); \
- tmp3_m = __msa_ilvl_h(loc1_m, loc0_m); \
- \
- loc0_m = __msa_ilvr_h((in2), (in0)); \
- loc1_m = __msa_ilvr_h((in3), (in1)); \
- tmp4_m = __msa_ilvr_h(loc1_m, loc0_m); \
- tmp5_m = __msa_ilvl_h(loc1_m, loc0_m); \
- \
- loc0_m = __msa_ilvl_h((in2), (in0)); \
- loc1_m = __msa_ilvl_h((in3), (in1)); \
- tmp6_m = __msa_ilvr_h(loc1_m, loc0_m); \
- tmp7_m = __msa_ilvl_h(loc1_m, loc0_m); \
- \
- out0 = (v8i16)__msa_pckev_d((v2i64)tmp0_m, (v2i64)tmp4_m); \
- out1 = (v8i16)__msa_pckod_d((v2i64)tmp0_m, (v2i64)tmp4_m); \
- out2 = (v8i16)__msa_pckev_d((v2i64)tmp1_m, (v2i64)tmp5_m); \
- out3 = (v8i16)__msa_pckod_d((v2i64)tmp1_m, (v2i64)tmp5_m); \
- out4 = (v8i16)__msa_pckev_d((v2i64)tmp2_m, (v2i64)tmp6_m); \
- out5 = (v8i16)__msa_pckod_d((v2i64)tmp2_m, (v2i64)tmp6_m); \
- out6 = (v8i16)__msa_pckev_d((v2i64)tmp3_m, (v2i64)tmp7_m); \
- out7 = (v8i16)__msa_pckod_d((v2i64)tmp3_m, (v2i64)tmp7_m); \
-}
-
-#define VP9_IADST8x16_1D(r0, r1, r2, r3, r4, r5, r6, r7, \
- r8, r9, r10, r11, r12, r13, r14, r15, \
- out0, out1, out2, out3, out4, out5, out6, out7, \
- out8, out9, out10, out11, \
- out12, out13, out14, out15) { \
- v8i16 g0_m, g1_m, g2_m, g3_m, g4_m, g5_m, g6_m, g7_m; \
- v8i16 g8_m, g9_m, g10_m, g11_m, g12_m, g13_m, g14_m, g15_m; \
- v8i16 h0_m, h1_m, h2_m, h3_m, h4_m, h5_m, h6_m, h7_m; \
- v8i16 h8_m, h9_m, h10_m, h11_m; \
- v8i16 k0_m, k1_m, k2_m, k3_m; \
- \
- /* stage 1 */ \
- k0_m = SET_COSPI_PAIR(cospi_1_64, cospi_31_64); \
- k1_m = SET_COSPI_PAIR(cospi_31_64, -cospi_1_64); \
- k2_m = SET_COSPI_PAIR(cospi_17_64, cospi_15_64); \
- k3_m = SET_COSPI_PAIR(cospi_15_64, -cospi_17_64); \
- VP9_MADD_BF(r15, r0, r7, r8, k0_m, k1_m, k2_m, k3_m, \
- g0_m, g1_m, g2_m, g3_m); \
- \
- k0_m = SET_COSPI_PAIR(cospi_5_64, cospi_27_64); \
- k1_m = SET_COSPI_PAIR(cospi_27_64, -cospi_5_64); \
- k2_m = SET_COSPI_PAIR(cospi_21_64, cospi_11_64); \
- k3_m = SET_COSPI_PAIR(cospi_11_64, -cospi_21_64); \
- VP9_MADD_BF(r13, r2, r5, r10, k0_m, k1_m, k2_m, k3_m, \
- g4_m, g5_m, g6_m, g7_m); \
- \
- k0_m = SET_COSPI_PAIR(cospi_9_64, cospi_23_64); \
- k1_m = SET_COSPI_PAIR(cospi_23_64, -cospi_9_64); \
- k2_m = SET_COSPI_PAIR(cospi_25_64, cospi_7_64); \
- k3_m = SET_COSPI_PAIR(cospi_7_64, -cospi_25_64); \
- VP9_MADD_BF(r11, r4, r3, r12, k0_m, k1_m, k2_m, k3_m, \
- g8_m, g9_m, g10_m, g11_m); \
- \
- k0_m = SET_COSPI_PAIR(cospi_13_64, cospi_19_64); \
- k1_m = SET_COSPI_PAIR(cospi_19_64, -cospi_13_64); \
- k2_m = SET_COSPI_PAIR(cospi_29_64, cospi_3_64); \
- k3_m = SET_COSPI_PAIR(cospi_3_64, -cospi_29_64); \
- VP9_MADD_BF(r9, r6, r1, r14, k0_m, k1_m, k2_m, k3_m, \
- g12_m, g13_m, g14_m, g15_m); \
- \
- /* stage 2 */ \
- k0_m = SET_COSPI_PAIR(cospi_4_64, cospi_28_64); \
- k1_m = SET_COSPI_PAIR(cospi_28_64, -cospi_4_64); \
- k2_m = SET_COSPI_PAIR(-cospi_28_64, cospi_4_64); \
- VP9_MADD_BF(g1_m, g3_m, g9_m, g11_m, k0_m, k1_m, k2_m, k0_m, \
- h0_m, h1_m, h2_m, h3_m); \
- \
- k0_m = SET_COSPI_PAIR(cospi_12_64, cospi_20_64); \
- k1_m = SET_COSPI_PAIR(-cospi_20_64, cospi_12_64); \
- k2_m = SET_COSPI_PAIR(cospi_20_64, -cospi_12_64); \
- VP9_MADD_BF(g7_m, g5_m, g15_m, g13_m, k0_m, k1_m, k2_m, k0_m, \
- h4_m, h5_m, h6_m, h7_m); \
- \
- BUTTERFLY_4(h0_m, h2_m, h6_m, h4_m, out8, out9, out11, out10); \
- \
- BUTTERFLY_8(g0_m, g2_m, g4_m, g6_m, g14_m, g12_m, g10_m, g8_m, \
- h8_m, h9_m, h10_m, h11_m, h6_m, h4_m, h2_m, h0_m); \
- \
- /* stage 3 */ \
- BUTTERFLY_4(h8_m, h9_m, h11_m, h10_m, out0, out1, h11_m, h10_m); \
- \
- k0_m = SET_COSPI_PAIR(cospi_8_64, cospi_24_64); \
- k1_m = SET_COSPI_PAIR(cospi_24_64, -cospi_8_64); \
- k2_m = SET_COSPI_PAIR(-cospi_24_64, cospi_8_64); \
- VP9_MADD_BF(h0_m, h2_m, h4_m, h6_m, k0_m, k1_m, k2_m, k0_m, \
- out4, out6, out5, out7); \
- VP9_MADD_BF(h1_m, h3_m, h5_m, h7_m, k0_m, k1_m, k2_m, k0_m, \
- out12, out14, out13, out15); \
- \
- /* stage 4 */ \
- k0_m = SET_COSPI_PAIR(cospi_16_64, cospi_16_64); \
- k1_m = SET_COSPI_PAIR(-cospi_16_64, -cospi_16_64); \
- k2_m = SET_COSPI_PAIR(cospi_16_64, -cospi_16_64); \
- k3_m = SET_COSPI_PAIR(-cospi_16_64, cospi_16_64); \
- VP9_MADD_SHORT(h10_m, h11_m, k1_m, k2_m, out2, out3); \
- VP9_MADD_SHORT(out6, out7, k0_m, k3_m, out6, out7); \
- VP9_MADD_SHORT(out10, out11, k0_m, k3_m, out10, out11); \
- VP9_MADD_SHORT(out14, out15, k1_m, k2_m, out14, out15); \
-}
-
-#define VP9_ADDBLK_CLIP_AND_STORE_8_BYTES_4(dest, dest_stride, \
- in0, in1, in2, in3) { \
- uint64_t out0_m, out1_m, out2_m, out3_m; \
- v8i16 res0_m, res1_m, res2_m, res3_m; \
- v16u8 dest0_m, dest1_m, dest2_m, dest3_m; \
- v16i8 tmp0_m, tmp1_m; \
- v16i8 zero_m = { 0 }; \
- uint8_t *dst_m = (uint8_t *)(dest); \
- \
- LOAD_4VECS_UB(dst_m, (dest_stride), \
- dest0_m, dest1_m, dest2_m, dest3_m); \
- \
- res0_m = (v8i16)__msa_ilvr_b(zero_m, (v16i8)dest0_m); \
- res1_m = (v8i16)__msa_ilvr_b(zero_m, (v16i8)dest1_m); \
- res2_m = (v8i16)__msa_ilvr_b(zero_m, (v16i8)dest2_m); \
- res3_m = (v8i16)__msa_ilvr_b(zero_m, (v16i8)dest3_m); \
- \
- res0_m += (v8i16)(in0); \
- res1_m += (v8i16)(in1); \
- res2_m += (v8i16)(in2); \
- res3_m += (v8i16)(in3); \
- \
- res0_m = CLIP_UNSIGNED_CHAR_H(res0_m); \
- res1_m = CLIP_UNSIGNED_CHAR_H(res1_m); \
- res2_m = CLIP_UNSIGNED_CHAR_H(res2_m); \
- res3_m = CLIP_UNSIGNED_CHAR_H(res3_m); \
- \
- tmp0_m = __msa_pckev_b((v16i8)res1_m, (v16i8)res0_m); \
- tmp1_m = __msa_pckev_b((v16i8)res3_m, (v16i8)res2_m); \
- \
- out0_m = __msa_copy_u_d((v2i64)tmp0_m, 0); \
- out1_m = __msa_copy_u_d((v2i64)tmp0_m, 1); \
- out2_m = __msa_copy_u_d((v2i64)tmp1_m, 0); \
- out3_m = __msa_copy_u_d((v2i64)tmp1_m, 1); \
- \
- STORE_DWORD(dst_m, out0_m); \
- dst_m += (dest_stride); \
- STORE_DWORD(dst_m, out1_m); \
- dst_m += (dest_stride); \
- STORE_DWORD(dst_m, out2_m); \
- dst_m += (dest_stride); \
- STORE_DWORD(dst_m, out3_m); \
-}
+#include "vp9/common/mips/msa/vp9_idct_msa.h"
void vp9_idct16_1d_rows_msa(const int16_t *input, int16_t *output) {
v8i16 loc0, loc1, loc2, loc3;
@@ -275,63 +17,38 @@ void vp9_idct16_1d_rows_msa(const int16_t *input, int16_t *output) {
v8i16 reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15;
v8i16 tmp5, tmp6, tmp7;
- /* load left top 8x8 */
- LOAD_8VECS_SH(input, 16, reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7);
-
- /* load right top 8x8 */
- LOAD_8VECS_SH((input + 8), 16,
- reg8, reg9, reg10, reg11, reg12, reg13, reg14, reg15);
-
- /* transpose block */
- TRANSPOSE8x8_H1(reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7,
- reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7);
-
- /* transpose block */
- TRANSPOSE8x8_H1(reg8, reg9, reg10, reg11, reg12, reg13, reg14, reg15,
- reg8, reg9, reg10, reg11, reg12, reg13, reg14, reg15);
-
- DOTP_CONST_PAIR(reg2, reg14, cospi_28_64, cospi_4_64, reg2, reg14);
- DOTP_CONST_PAIR(reg10, reg6, cospi_12_64, cospi_20_64, reg10, reg6);
-
- loc0 = reg2 + reg10;
- reg2 = reg2 - reg10;
- loc1 = reg14 + reg6;
- reg14 = reg14 - reg6;
-
- DOTP_CONST_PAIR(reg14, reg2, cospi_16_64, cospi_16_64, loc2, loc3);
- DOTP_CONST_PAIR(reg0, reg8, cospi_16_64, cospi_16_64, reg0, reg8);
- DOTP_CONST_PAIR(reg4, reg12, cospi_24_64, cospi_8_64, reg4, reg12);
-
- reg14 = reg8 - reg12;
- reg2 = reg8 + reg12;
- reg10 = reg0 - reg4;
- reg6 = reg0 + reg4;
-
- reg0 = reg2 - loc1;
- reg2 = reg2 + loc1;
- reg12 = reg14 - loc0;
- reg14 = reg14 + loc0;
- reg4 = reg6 - loc3;
- reg6 = reg6 + loc3;
- reg8 = reg10 - loc2;
- reg10 = reg10 + loc2;
+ LD_SH8(input, 16, reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7);
+ input += 8;
+ LD_SH8(input, 16, reg8, reg9, reg10, reg11, reg12, reg13, reg14, reg15);
+
+ TRANSPOSE8x8_SH_SH(reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7,
+ reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7);
+ TRANSPOSE8x8_SH_SH(reg8, reg9, reg10, reg11, reg12, reg13, reg14, reg15,
+ reg8, reg9, reg10, reg11, reg12, reg13, reg14, reg15);
+ VP9_DOTP_CONST_PAIR(reg2, reg14, cospi_28_64, cospi_4_64, reg2, reg14);
+ VP9_DOTP_CONST_PAIR(reg10, reg6, cospi_12_64, cospi_20_64, reg10, reg6);
+ BUTTERFLY_4(reg2, reg14, reg6, reg10, loc0, loc1, reg14, reg2);
+ VP9_DOTP_CONST_PAIR(reg14, reg2, cospi_16_64, cospi_16_64, loc2, loc3);
+ VP9_DOTP_CONST_PAIR(reg0, reg8, cospi_16_64, cospi_16_64, reg0, reg8);
+ VP9_DOTP_CONST_PAIR(reg4, reg12, cospi_24_64, cospi_8_64, reg4, reg12);
+ BUTTERFLY_4(reg8, reg0, reg4, reg12, reg2, reg6, reg10, reg14);
+ SUB4(reg2, loc1, reg14, loc0, reg6, loc3, reg10, loc2, reg0, reg12, reg4,
+ reg8);
+ ADD4(reg2, loc1, reg14, loc0, reg6, loc3, reg10, loc2, reg2, reg14, reg6,
+ reg10);
/* stage 2 */
- DOTP_CONST_PAIR(reg1, reg15, cospi_30_64, cospi_2_64, reg1, reg15);
- DOTP_CONST_PAIR(reg9, reg7, cospi_14_64, cospi_18_64, loc2, loc3);
+ VP9_DOTP_CONST_PAIR(reg1, reg15, cospi_30_64, cospi_2_64, reg1, reg15);
+ VP9_DOTP_CONST_PAIR(reg9, reg7, cospi_14_64, cospi_18_64, loc2, loc3);
reg9 = reg1 - loc2;
reg1 = reg1 + loc2;
reg7 = reg15 - loc3;
reg15 = reg15 + loc3;
- DOTP_CONST_PAIR(reg5, reg11, cospi_22_64, cospi_10_64, reg5, reg11);
- DOTP_CONST_PAIR(reg13, reg3, cospi_6_64, cospi_26_64, loc0, loc1);
-
- reg13 = loc0 + reg5;
- reg5 = loc0 - reg5;
- reg3 = loc1 + reg11;
- reg11 = loc1 - reg11;
+ VP9_DOTP_CONST_PAIR(reg5, reg11, cospi_22_64, cospi_10_64, reg5, reg11);
+ VP9_DOTP_CONST_PAIR(reg13, reg3, cospi_6_64, cospi_26_64, loc0, loc1);
+ BUTTERFLY_4(loc0, loc1, reg11, reg5, reg13, reg3, reg11, reg5);
loc1 = reg15 + reg3;
reg3 = reg15 - reg3;
@@ -346,8 +63,8 @@ void vp9_idct16_1d_rows_msa(const int16_t *input, int16_t *output) {
tmp7 = loc1;
reg0 = loc2;
- DOTP_CONST_PAIR(reg7, reg9, cospi_24_64, cospi_8_64, reg7, reg9);
- DOTP_CONST_PAIR((-reg5), (-reg11), cospi_8_64, cospi_24_64, reg5, reg11);
+ VP9_DOTP_CONST_PAIR(reg7, reg9, cospi_24_64, cospi_8_64, reg7, reg9);
+ VP9_DOTP_CONST_PAIR((-reg5), (-reg11), cospi_8_64, cospi_24_64, reg5, reg11);
loc0 = reg9 + reg5;
reg5 = reg9 - reg5;
@@ -360,21 +77,15 @@ void vp9_idct16_1d_rows_msa(const int16_t *input, int16_t *output) {
loc2 = reg4 - loc0;
tmp5 = loc1;
- DOTP_CONST_PAIR(reg5, reg11, cospi_16_64, cospi_16_64, reg5, reg11);
+ VP9_DOTP_CONST_PAIR(reg5, reg11, cospi_16_64, cospi_16_64, reg5, reg11);
+ BUTTERFLY_4(reg8, reg10, reg11, reg5, loc0, reg4, reg9, loc1);
- loc0 = reg8 + reg5;
- loc1 = reg8 - reg5;
- reg4 = reg10 + reg11;
- reg9 = reg10 - reg11;
reg10 = loc0;
reg11 = loc1;
- DOTP_CONST_PAIR(reg3, reg13, cospi_16_64, cospi_16_64, reg3, reg13);
+ VP9_DOTP_CONST_PAIR(reg3, reg13, cospi_16_64, cospi_16_64, reg3, reg13);
+ BUTTERFLY_4(reg12, reg14, reg13, reg3, reg8, reg6, reg7, reg5);
- reg8 = reg12 + reg3;
- reg5 = reg12 - reg3;
- reg6 = reg14 + reg13;
- reg7 = reg14 - reg13;
reg13 = loc2;
/* Transpose and store the output */
@@ -383,49 +94,36 @@ void vp9_idct16_1d_rows_msa(const int16_t *input, int16_t *output) {
reg3 = tmp7;
/* transpose block */
- TRANSPOSE8x8_H1(reg0, reg2, reg4, reg6, reg8, reg10, reg12, reg14,
- reg0, reg2, reg4, reg6, reg8, reg10, reg12, reg14);
-
- STORE_8VECS_SH(output, 16, reg0, reg2, reg4, reg6, reg8, reg10, reg12, reg14);
+ TRANSPOSE8x8_SH_SH(reg0, reg2, reg4, reg6, reg8, reg10, reg12, reg14,
+ reg0, reg2, reg4, reg6, reg8, reg10, reg12, reg14);
+ ST_SH8(reg0, reg2, reg4, reg6, reg8, reg10, reg12, reg14, output, 16);
/* transpose block */
- TRANSPOSE8x8_H1(reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15,
- reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15);
-
- STORE_8VECS_SH((output + 8), 16,
- reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15);
+ TRANSPOSE8x8_SH_SH(reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15,
+ reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15);
+ ST_SH8(reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15, (output + 8), 16);
}
-void vp9_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dest,
- int32_t dest_stride) {
+void vp9_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
+ int32_t dst_stride) {
v8i16 loc0, loc1, loc2, loc3;
v8i16 reg0, reg2, reg4, reg6, reg8, reg10, reg12, reg14;
v8i16 reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15;
v8i16 tmp5, tmp6, tmp7;
/* load up 8x8 */
- LOAD_8VECS_SH(input, 16, reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7);
-
+ LD_SH8(input, 16, reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7);
+ input += 8 * 16;
/* load bottom 8x8 */
- LOAD_8VECS_SH((input + 8 * 16), 16,
- reg8, reg9, reg10, reg11, reg12, reg13, reg14, reg15);
+ LD_SH8(input, 16, reg8, reg9, reg10, reg11, reg12, reg13, reg14, reg15);
- DOTP_CONST_PAIR(reg2, reg14, cospi_28_64, cospi_4_64, reg2, reg14);
- DOTP_CONST_PAIR(reg10, reg6, cospi_12_64, cospi_20_64, reg10, reg6);
-
- loc0 = reg2 + reg10;
- reg2 = reg2 - reg10;
- loc1 = reg14 + reg6;
- reg14 = reg14 - reg6;
-
- DOTP_CONST_PAIR(reg14, reg2, cospi_16_64, cospi_16_64, loc2, loc3);
- DOTP_CONST_PAIR(reg0, reg8, cospi_16_64, cospi_16_64, reg0, reg8);
- DOTP_CONST_PAIR(reg4, reg12, cospi_24_64, cospi_8_64, reg4, reg12);
-
- reg14 = reg8 - reg12;
- reg2 = reg8 + reg12;
- reg10 = reg0 - reg4;
- reg6 = reg0 + reg4;
+ VP9_DOTP_CONST_PAIR(reg2, reg14, cospi_28_64, cospi_4_64, reg2, reg14);
+ VP9_DOTP_CONST_PAIR(reg10, reg6, cospi_12_64, cospi_20_64, reg10, reg6);
+ BUTTERFLY_4(reg2, reg14, reg6, reg10, loc0, loc1, reg14, reg2);
+ VP9_DOTP_CONST_PAIR(reg14, reg2, cospi_16_64, cospi_16_64, loc2, loc3);
+ VP9_DOTP_CONST_PAIR(reg0, reg8, cospi_16_64, cospi_16_64, reg0, reg8);
+ VP9_DOTP_CONST_PAIR(reg4, reg12, cospi_24_64, cospi_8_64, reg4, reg12);
+ BUTTERFLY_4(reg8, reg0, reg4, reg12, reg2, reg6, reg10, reg14);
reg0 = reg2 - loc1;
reg2 = reg2 + loc1;
@@ -437,21 +135,17 @@ void vp9_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dest,
reg10 = reg10 + loc2;
/* stage 2 */
- DOTP_CONST_PAIR(reg1, reg15, cospi_30_64, cospi_2_64, reg1, reg15);
- DOTP_CONST_PAIR(reg9, reg7, cospi_14_64, cospi_18_64, loc2, loc3);
+ VP9_DOTP_CONST_PAIR(reg1, reg15, cospi_30_64, cospi_2_64, reg1, reg15);
+ VP9_DOTP_CONST_PAIR(reg9, reg7, cospi_14_64, cospi_18_64, loc2, loc3);
reg9 = reg1 - loc2;
reg1 = reg1 + loc2;
reg7 = reg15 - loc3;
reg15 = reg15 + loc3;
- DOTP_CONST_PAIR(reg5, reg11, cospi_22_64, cospi_10_64, reg5, reg11);
- DOTP_CONST_PAIR(reg13, reg3, cospi_6_64, cospi_26_64, loc0, loc1);
-
- reg13 = loc0 + reg5;
- reg5 = loc0 - reg5;
- reg3 = loc1 + reg11;
- reg11 = loc1 - reg11;
+ VP9_DOTP_CONST_PAIR(reg5, reg11, cospi_22_64, cospi_10_64, reg5, reg11);
+ VP9_DOTP_CONST_PAIR(reg13, reg3, cospi_6_64, cospi_26_64, loc0, loc1);
+ BUTTERFLY_4(loc0, loc1, reg11, reg5, reg13, reg3, reg11, reg5);
loc1 = reg15 + reg3;
reg3 = reg15 - reg3;
@@ -466,8 +160,8 @@ void vp9_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dest,
tmp7 = loc1;
reg0 = loc2;
- DOTP_CONST_PAIR(reg7, reg9, cospi_24_64, cospi_8_64, reg7, reg9);
- DOTP_CONST_PAIR((-reg5), (-reg11), cospi_8_64, cospi_24_64, reg5, reg11);
+ VP9_DOTP_CONST_PAIR(reg7, reg9, cospi_24_64, cospi_8_64, reg7, reg9);
+ VP9_DOTP_CONST_PAIR((-reg5), (-reg11), cospi_8_64, cospi_24_64, reg5, reg11);
loc0 = reg9 + reg5;
reg5 = reg9 - reg5;
@@ -480,21 +174,14 @@ void vp9_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dest,
loc2 = reg4 - loc0;
tmp5 = loc1;
- DOTP_CONST_PAIR(reg5, reg11, cospi_16_64, cospi_16_64, reg5, reg11);
+ VP9_DOTP_CONST_PAIR(reg5, reg11, cospi_16_64, cospi_16_64, reg5, reg11);
+ BUTTERFLY_4(reg8, reg10, reg11, reg5, loc0, reg4, reg9, loc1);
- loc0 = reg8 + reg5;
- loc1 = reg8 - reg5;
- reg4 = reg10 + reg11;
- reg9 = reg10 - reg11;
reg10 = loc0;
reg11 = loc1;
- DOTP_CONST_PAIR(reg3, reg13, cospi_16_64, cospi_16_64, reg3, reg13);
-
- reg8 = reg12 + reg3;
- reg5 = reg12 - reg3;
- reg6 = reg14 + reg13;
- reg7 = reg14 - reg13;
+ VP9_DOTP_CONST_PAIR(reg3, reg13, cospi_16_64, cospi_16_64, reg3, reg13);
+ BUTTERFLY_4(reg12, reg14, reg13, reg3, reg8, reg6, reg7, reg5);
reg13 = loc2;
/* Transpose and store the output */
@@ -502,22 +189,21 @@ void vp9_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dest,
reg14 = tmp6;
reg3 = tmp7;
- SRARI_H_4VECS_SH(reg0, reg2, reg4, reg6, reg0, reg2, reg4, reg6, 6);
- VP9_ADDBLK_CLIP_AND_STORE_8_BYTES_4(dest, dest_stride,
- reg0, reg2, reg4, reg6);
- SRARI_H_4VECS_SH(reg8, reg10, reg12, reg14, reg8, reg10, reg12, reg14, 6);
- VP9_ADDBLK_CLIP_AND_STORE_8_BYTES_4((dest + (4 * dest_stride)),
- dest_stride, reg8, reg10, reg12, reg14);
- SRARI_H_4VECS_SH(reg3, reg13, reg11, reg5, reg3, reg13, reg11, reg5, 6);
- VP9_ADDBLK_CLIP_AND_STORE_8_BYTES_4((dest + (8 * dest_stride)),
- dest_stride, reg3, reg13, reg11, reg5);
- SRARI_H_4VECS_SH(reg7, reg9, reg1, reg15, reg7, reg9, reg1, reg15, 6);
- VP9_ADDBLK_CLIP_AND_STORE_8_BYTES_4((dest + (12 * dest_stride)),
- dest_stride, reg7, reg9, reg1, reg15);
+ SRARI_H4_SH(reg0, reg2, reg4, reg6, 6);
+ VP9_ADDBLK_ST8x4_UB(dst, dst_stride, reg0, reg2, reg4, reg6);
+ dst += (4 * dst_stride);
+ SRARI_H4_SH(reg8, reg10, reg12, reg14, 6);
+ VP9_ADDBLK_ST8x4_UB(dst, dst_stride, reg8, reg10, reg12, reg14);
+ dst += (4 * dst_stride);
+ SRARI_H4_SH(reg3, reg13, reg11, reg5, 6);
+ VP9_ADDBLK_ST8x4_UB(dst, dst_stride, reg3, reg13, reg11, reg5);
+ dst += (4 * dst_stride);
+ SRARI_H4_SH(reg7, reg9, reg1, reg15, 6);
+ VP9_ADDBLK_ST8x4_UB(dst, dst_stride, reg7, reg9, reg1, reg15);
}
-void vp9_idct16x16_256_add_msa(const int16_t *input, uint8_t *dest,
- int32_t dest_stride) {
+void vp9_idct16x16_256_add_msa(const int16_t *input, uint8_t *dst,
+ int32_t dst_stride) {
int32_t i;
DECLARE_ALIGNED(32, int16_t, out_arr[16 * 16]);
int16_t *out = out_arr;
@@ -531,13 +217,13 @@ void vp9_idct16x16_256_add_msa(const int16_t *input, uint8_t *dest,
/* transform columns */
for (i = 0; i < 2; ++i) {
/* process 8 * 16 block */
- vp9_idct16_1d_columns_addblk_msa((out + (i << 3)), (dest + (i << 3)),
- dest_stride);
+ vp9_idct16_1d_columns_addblk_msa((out + (i << 3)), (dst + (i << 3)),
+ dst_stride);
}
}
-void vp9_idct16x16_10_add_msa(const int16_t *input, uint8_t *dest,
- int32_t dest_stride) {
+void vp9_idct16x16_10_add_msa(const int16_t *input, uint8_t *dst,
+ int32_t dst_stride) {
uint8_t i;
DECLARE_ALIGNED(32, int16_t, out_arr[16 * 16]);
int16_t *out = out_arr;
@@ -570,64 +256,38 @@ void vp9_idct16x16_10_add_msa(const int16_t *input, uint8_t *dest,
/* transform columns */
for (i = 0; i < 2; ++i) {
/* process 8 * 16 block */
- vp9_idct16_1d_columns_addblk_msa((out + (i << 3)), (dest + (i << 3)),
- dest_stride);
+ vp9_idct16_1d_columns_addblk_msa((out + (i << 3)), (dst + (i << 3)),
+ dst_stride);
}
}
-void vp9_idct16x16_1_add_msa(const int16_t *input, uint8_t *dest,
- int32_t dest_stride) {
+void vp9_idct16x16_1_add_msa(const int16_t *input, uint8_t *dst,
+ int32_t dst_stride) {
uint8_t i;
- int32_t const1;
int16_t out;
- v8i16 const2, res0, res1, res2, res3, res4, res5, res6, res7;
- v16u8 dest0, dest1, dest2, dest3;
- v16u8 tmp0, tmp1, tmp2, tmp3;
- v16i8 zero = { 0 };
-
- out = dct_const_round_shift(input[0] * cospi_16_64);
- out = dct_const_round_shift(out * cospi_16_64);
- const1 = ROUND_POWER_OF_TWO(out, 6);
-
- const2 = __msa_fill_h(const1);
-
- for (i = 0; i < 4; ++i) {
- LOAD_4VECS_UB(dest, dest_stride, dest0, dest1, dest2, dest3);
-
- res0 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest0);
- res1 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest1);
- res2 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest2);
- res3 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest3);
- res4 = (v8i16)__msa_ilvl_b(zero, (v16i8)dest0);
- res5 = (v8i16)__msa_ilvl_b(zero, (v16i8)dest1);
- res6 = (v8i16)__msa_ilvl_b(zero, (v16i8)dest2);
- res7 = (v8i16)__msa_ilvl_b(zero, (v16i8)dest3);
-
- res0 += const2;
- res1 += const2;
- res2 += const2;
- res3 += const2;
- res4 += const2;
- res5 += const2;
- res6 += const2;
- res7 += const2;
-
- res0 = CLIP_UNSIGNED_CHAR_H(res0);
- res1 = CLIP_UNSIGNED_CHAR_H(res1);
- res2 = CLIP_UNSIGNED_CHAR_H(res2);
- res3 = CLIP_UNSIGNED_CHAR_H(res3);
- res4 = CLIP_UNSIGNED_CHAR_H(res4);
- res5 = CLIP_UNSIGNED_CHAR_H(res5);
- res6 = CLIP_UNSIGNED_CHAR_H(res6);
- res7 = CLIP_UNSIGNED_CHAR_H(res7);
-
- tmp0 = (v16u8)__msa_pckev_b((v16i8)res4, (v16i8)res0);
- tmp1 = (v16u8)__msa_pckev_b((v16i8)res5, (v16i8)res1);
- tmp2 = (v16u8)__msa_pckev_b((v16i8)res6, (v16i8)res2);
- tmp3 = (v16u8)__msa_pckev_b((v16i8)res7, (v16i8)res3);
-
- STORE_4VECS_UB(dest, dest_stride, tmp0, tmp1, tmp2, tmp3);
- dest += (4 * dest_stride);
+ v8i16 vec, res0, res1, res2, res3, res4, res5, res6, res7;
+ v16u8 dst0, dst1, dst2, dst3, tmp0, tmp1, tmp2, tmp3;
+
+ out = ROUND_POWER_OF_TWO((input[0] * cospi_16_64), DCT_CONST_BITS);
+ out = ROUND_POWER_OF_TWO((out * cospi_16_64), DCT_CONST_BITS);
+ out = ROUND_POWER_OF_TWO(out, 6);
+
+ vec = __msa_fill_h(out);
+
+ for (i = 4; i--;) {
+ LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
+ UNPCK_UB_SH(dst0, res0, res4);
+ UNPCK_UB_SH(dst1, res1, res5);
+ UNPCK_UB_SH(dst2, res2, res6);
+ UNPCK_UB_SH(dst3, res3, res7);
+ ADD4(res0, vec, res1, vec, res2, vec, res3, vec, res0, res1, res2, res3);
+ ADD4(res4, vec, res5, vec, res6, vec, res7, vec, res4, res5, res6, res7);
+ CLIP_SH4_0_255(res0, res1, res2, res3);
+ CLIP_SH4_0_255(res4, res5, res6, res7);
+ PCKEV_B4_UB(res4, res0, res5, res1, res6, res2, res7, res3,
+ tmp0, tmp1, tmp2, tmp3);
+ ST_UB4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride);
+ dst += (4 * dst_stride);
}
}
@@ -636,15 +296,12 @@ static void vp9_iadst16_1d_rows_msa(const int16_t *input, int16_t *output) {
v8i16 l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14, l15;
/* load input data */
- LOAD_16VECS_SH(input, 8,
- l0, l8, l1, l9, l2, l10, l3, l11,
- l4, l12, l5, l13, l6, l14, l7, l15);
-
- TRANSPOSE8x8_H_SH(l0, l1, l2, l3, l4, l5, l6, l7,
- l0, l1, l2, l3, l4, l5, l6, l7);
-
- TRANSPOSE8x8_H_SH(l8, l9, l10, l11, l12, l13, l14, l15,
- l8, l9, l10, l11, l12, l13, l14, l15);
+ LD_SH16(input, 8,
+ l0, l8, l1, l9, l2, l10, l3, l11, l4, l12, l5, l13, l6, l14, l7, l15);
+ TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7,
+ l0, l1, l2, l3, l4, l5, l6, l7);
+ TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15,
+ l8, l9, l10, l11, l12, l13, l14, l15);
/* ADST in horizontal */
VP9_IADST8x16_1D(l0, l1, l2, l3, l4, l5, l6, l7,
@@ -657,19 +314,16 @@ static void vp9_iadst16_1d_rows_msa(const int16_t *input, int16_t *output) {
l13 = -r13;
l15 = -r1;
- TRANSPOSE8x8_H_SH(r0, l1, r12, l3, r6, r14, r10, r2,
- l0, l1, l2, l3, l4, l5, l6, l7);
-
- STORE_8VECS_SH(output, 16, l0, l1, l2, l3, l4, l5, l6, l7);
-
- TRANSPOSE8x8_H_SH(r3, r11, r15, r7, r5, l13, r9, l15,
- l8, l9, l10, l11, l12, l13, l14, l15);
-
- STORE_8VECS_SH((output + 8), 16, l8, l9, l10, l11, l12, l13, l14, l15);
+ TRANSPOSE8x8_SH_SH(r0, l1, r12, l3, r6, r14, r10, r2,
+ l0, l1, l2, l3, l4, l5, l6, l7);
+ ST_SH8(l0, l1, l2, l3, l4, l5, l6, l7, output, 16);
+ TRANSPOSE8x8_SH_SH(r3, r11, r15, r7, r5, l13, r9, l15,
+ l8, l9, l10, l11, l12, l13, l14, l15);
+ ST_SH8(l8, l9, l10, l11, l12, l13, l14, l15, (output + 8), 16);
}
-static void vp9_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dest,
- int32_t dest_stride) {
+static void vp9_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
+ int32_t dst_stride) {
v8i16 v0, v2, v4, v6, k0, k1, k2, k3;
v8i16 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15;
v8i16 out0, out1, out2, out3, out4, out5, out6, out7;
@@ -678,210 +332,163 @@ static void vp9_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dest,
v8i16 h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11;
v8i16 res0, res1, res2, res3, res4, res5, res6, res7;
v8i16 res8, res9, res10, res11, res12, res13, res14, res15;
- v16u8 dest0, dest1, dest2, dest3, dest4, dest5, dest6, dest7;
- v16u8 dest8, dest9, dest10, dest11, dest12, dest13, dest14, dest15;
+ v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+ v16u8 dst8, dst9, dst10, dst11, dst12, dst13, dst14, dst15;
v16i8 zero = { 0 };
- r0 = LOAD_SH(input + 0 * 16);
- r3 = LOAD_SH(input + 3 * 16);
- r4 = LOAD_SH(input + 4 * 16);
- r7 = LOAD_SH(input + 7 * 16);
- r8 = LOAD_SH(input + 8 * 16);
- r11 = LOAD_SH(input + 11 * 16);
- r12 = LOAD_SH(input + 12 * 16);
- r15 = LOAD_SH(input + 15 * 16);
+ r0 = LD_SH(input + 0 * 16);
+ r3 = LD_SH(input + 3 * 16);
+ r4 = LD_SH(input + 4 * 16);
+ r7 = LD_SH(input + 7 * 16);
+ r8 = LD_SH(input + 8 * 16);
+ r11 = LD_SH(input + 11 * 16);
+ r12 = LD_SH(input + 12 * 16);
+ r15 = LD_SH(input + 15 * 16);
/* stage 1 */
- k0 = SET_COSPI_PAIR(cospi_1_64, cospi_31_64);
- k1 = SET_COSPI_PAIR(cospi_31_64, -cospi_1_64);
- k2 = SET_COSPI_PAIR(cospi_17_64, cospi_15_64);
- k3 = SET_COSPI_PAIR(cospi_15_64, -cospi_17_64);
+ k0 = VP9_SET_COSPI_PAIR(cospi_1_64, cospi_31_64);
+ k1 = VP9_SET_COSPI_PAIR(cospi_31_64, -cospi_1_64);
+ k2 = VP9_SET_COSPI_PAIR(cospi_17_64, cospi_15_64);
+ k3 = VP9_SET_COSPI_PAIR(cospi_15_64, -cospi_17_64);
VP9_MADD_BF(r15, r0, r7, r8, k0, k1, k2, k3, g0, g1, g2, g3);
-
- k0 = SET_COSPI_PAIR(cospi_9_64, cospi_23_64);
- k1 = SET_COSPI_PAIR(cospi_23_64, -cospi_9_64);
- k2 = SET_COSPI_PAIR(cospi_25_64, cospi_7_64);
- k3 = SET_COSPI_PAIR(cospi_7_64, -cospi_25_64);
+ k0 = VP9_SET_COSPI_PAIR(cospi_9_64, cospi_23_64);
+ k1 = VP9_SET_COSPI_PAIR(cospi_23_64, -cospi_9_64);
+ k2 = VP9_SET_COSPI_PAIR(cospi_25_64, cospi_7_64);
+ k3 = VP9_SET_COSPI_PAIR(cospi_7_64, -cospi_25_64);
VP9_MADD_BF(r11, r4, r3, r12, k0, k1, k2, k3, g8, g9, g10, g11);
-
BUTTERFLY_4(g0, g2, g10, g8, h8, h9, v2, v0);
-
- k0 = SET_COSPI_PAIR(cospi_4_64, cospi_28_64);
- k1 = SET_COSPI_PAIR(cospi_28_64, -cospi_4_64);
- k2 = SET_COSPI_PAIR(-cospi_28_64, cospi_4_64);
+ k0 = VP9_SET_COSPI_PAIR(cospi_4_64, cospi_28_64);
+ k1 = VP9_SET_COSPI_PAIR(cospi_28_64, -cospi_4_64);
+ k2 = VP9_SET_COSPI_PAIR(-cospi_28_64, cospi_4_64);
VP9_MADD_BF(g1, g3, g9, g11, k0, k1, k2, k0, h0, h1, h2, h3);
- r1 = LOAD_SH(input + 1 * 16);
- r2 = LOAD_SH(input + 2 * 16);
- r5 = LOAD_SH(input + 5 * 16);
- r6 = LOAD_SH(input + 6 * 16);
- r9 = LOAD_SH(input + 9 * 16);
- r10 = LOAD_SH(input + 10 * 16);
- r13 = LOAD_SH(input + 13 * 16);
- r14 = LOAD_SH(input + 14 * 16);
-
- k0 = SET_COSPI_PAIR(cospi_5_64, cospi_27_64);
- k1 = SET_COSPI_PAIR(cospi_27_64, -cospi_5_64);
- k2 = SET_COSPI_PAIR(cospi_21_64, cospi_11_64);
- k3 = SET_COSPI_PAIR(cospi_11_64, -cospi_21_64);
+ r1 = LD_SH(input + 1 * 16);
+ r2 = LD_SH(input + 2 * 16);
+ r5 = LD_SH(input + 5 * 16);
+ r6 = LD_SH(input + 6 * 16);
+ r9 = LD_SH(input + 9 * 16);
+ r10 = LD_SH(input + 10 * 16);
+ r13 = LD_SH(input + 13 * 16);
+ r14 = LD_SH(input + 14 * 16);
+
+ k0 = VP9_SET_COSPI_PAIR(cospi_5_64, cospi_27_64);
+ k1 = VP9_SET_COSPI_PAIR(cospi_27_64, -cospi_5_64);
+ k2 = VP9_SET_COSPI_PAIR(cospi_21_64, cospi_11_64);
+ k3 = VP9_SET_COSPI_PAIR(cospi_11_64, -cospi_21_64);
VP9_MADD_BF(r13, r2, r5, r10, k0, k1, k2, k3, g4, g5, g6, g7);
-
- k0 = SET_COSPI_PAIR(cospi_13_64, cospi_19_64);
- k1 = SET_COSPI_PAIR(cospi_19_64, -cospi_13_64);
- k2 = SET_COSPI_PAIR(cospi_29_64, cospi_3_64);
- k3 = SET_COSPI_PAIR(cospi_3_64, -cospi_29_64);
+ k0 = VP9_SET_COSPI_PAIR(cospi_13_64, cospi_19_64);
+ k1 = VP9_SET_COSPI_PAIR(cospi_19_64, -cospi_13_64);
+ k2 = VP9_SET_COSPI_PAIR(cospi_29_64, cospi_3_64);
+ k3 = VP9_SET_COSPI_PAIR(cospi_3_64, -cospi_29_64);
VP9_MADD_BF(r9, r6, r1, r14, k0, k1, k2, k3, g12, g13, g14, g15);
-
BUTTERFLY_4(g4, g6, g14, g12, h10, h11, v6, v4);
-
BUTTERFLY_4(h8, h9, h11, h10, out0, out1, h11, h10);
out1 = -out1;
- out0 = __msa_srari_h(out0, 6);
- out1 = __msa_srari_h(out1, 6);
- dest0 = LOAD_UB(dest + 0 * dest_stride);
- dest1 = LOAD_UB(dest + 15 * dest_stride);
- res0 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest0);
- res1 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest1);
- res0 += out0;
- res1 += out1;
- res0 = CLIP_UNSIGNED_CHAR_H(res0);
- res1 = CLIP_UNSIGNED_CHAR_H(res1);
- res0 = (v8i16)__msa_pckev_b((v16i8)res0, (v16i8)res0);
- res1 = (v8i16)__msa_pckev_b((v16i8)res1, (v16i8)res1);
- STORE_DWORD(dest, __msa_copy_u_d((v2i64)res0, 0));
- STORE_DWORD(dest + 15 * dest_stride, __msa_copy_u_d((v2i64)res1, 0));
-
- k0 = SET_COSPI_PAIR(cospi_12_64, cospi_20_64);
- k1 = SET_COSPI_PAIR(-cospi_20_64, cospi_12_64);
- k2 = SET_COSPI_PAIR(cospi_20_64, -cospi_12_64);
+ SRARI_H2_SH(out0, out1, 6);
+ dst0 = LD_UB(dst + 0 * dst_stride);
+ dst1 = LD_UB(dst + 15 * dst_stride);
+ ILVR_B2_SH(zero, dst0, zero, dst1, res0, res1);
+ ADD2(res0, out0, res1, out1, res0, res1);
+ CLIP_SH2_0_255(res0, res1);
+ PCKEV_B2_SH(res0, res0, res1, res1, res0, res1);
+ ST8x1_UB(res0, dst);
+ ST8x1_UB(res1, dst + 15 * dst_stride);
+
+ k0 = VP9_SET_COSPI_PAIR(cospi_12_64, cospi_20_64);
+ k1 = VP9_SET_COSPI_PAIR(-cospi_20_64, cospi_12_64);
+ k2 = VP9_SET_COSPI_PAIR(cospi_20_64, -cospi_12_64);
VP9_MADD_BF(g7, g5, g15, g13, k0, k1, k2, k0, h4, h5, h6, h7);
-
BUTTERFLY_4(h0, h2, h6, h4, out8, out9, out11, out10);
out8 = -out8;
- out8 = __msa_srari_h(out8, 6);
- out9 = __msa_srari_h(out9, 6);
- dest8 = LOAD_UB(dest + 1 * dest_stride);
- dest9 = LOAD_UB(dest + 14 * dest_stride);
- res8 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest8);
- res9 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest9);
- res8 += out8;
- res9 += out9;
- res8 = CLIP_UNSIGNED_CHAR_H(res8);
- res9 = CLIP_UNSIGNED_CHAR_H(res9);
- res8 = (v8i16)__msa_pckev_b((v16i8)res8, (v16i8)res8);
- res9 = (v8i16)__msa_pckev_b((v16i8)res9, (v16i8)res9);
- STORE_DWORD(dest + dest_stride, __msa_copy_u_d((v2i64)res8, 0));
- STORE_DWORD(dest + 14 * dest_stride, __msa_copy_u_d((v2i64)res9, 0));
-
- k0 = SET_COSPI_PAIR(cospi_8_64, cospi_24_64);
- k1 = SET_COSPI_PAIR(cospi_24_64, -cospi_8_64);
- k2 = SET_COSPI_PAIR(-cospi_24_64, cospi_8_64);
+ SRARI_H2_SH(out8, out9, 6);
+ dst8 = LD_UB(dst + 1 * dst_stride);
+ dst9 = LD_UB(dst + 14 * dst_stride);
+ ILVR_B2_SH(zero, dst8, zero, dst9, res8, res9);
+ ADD2(res8, out8, res9, out9, res8, res9);
+ CLIP_SH2_0_255(res8, res9);
+ PCKEV_B2_SH(res8, res8, res9, res9, res8, res9);
+ ST8x1_UB(res8, dst + dst_stride);
+ ST8x1_UB(res9, dst + 14 * dst_stride);
+
+ k0 = VP9_SET_COSPI_PAIR(cospi_8_64, cospi_24_64);
+ k1 = VP9_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64);
+ k2 = VP9_SET_COSPI_PAIR(-cospi_24_64, cospi_8_64);
VP9_MADD_BF(v0, v2, v4, v6, k0, k1, k2, k0, out4, out6, out5, out7);
out4 = -out4;
- out4 = __msa_srari_h(out4, 6);
- out5 = __msa_srari_h(out5, 6);
- dest4 = LOAD_UB(dest + 3 * dest_stride);
- dest5 = LOAD_UB(dest + 12 * dest_stride);
- res4 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest4);
- res5 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest5);
- res4 += out4;
- res5 += out5;
- res4 = CLIP_UNSIGNED_CHAR_H(res4);
- res5 = CLIP_UNSIGNED_CHAR_H(res5);
- res4 = (v8i16)__msa_pckev_b((v16i8)res4, (v16i8)res4);
- res5 = (v8i16)__msa_pckev_b((v16i8)res5, (v16i8)res5);
- STORE_DWORD(dest + 3 * dest_stride, __msa_copy_u_d((v2i64)res4, 0));
- STORE_DWORD(dest + 12 * dest_stride, __msa_copy_u_d((v2i64)res5, 0));
+ SRARI_H2_SH(out4, out5, 6);
+ dst4 = LD_UB(dst + 3 * dst_stride);
+ dst5 = LD_UB(dst + 12 * dst_stride);
+ ILVR_B2_SH(zero, dst4, zero, dst5, res4, res5);
+ ADD2(res4, out4, res5, out5, res4, res5);
+ CLIP_SH2_0_255(res4, res5);
+ PCKEV_B2_SH(res4, res4, res5, res5, res4, res5);
+ ST8x1_UB(res4, dst + 3 * dst_stride);
+ ST8x1_UB(res5, dst + 12 * dst_stride);
VP9_MADD_BF(h1, h3, h5, h7, k0, k1, k2, k0, out12, out14, out13, out15);
out13 = -out13;
- out12 = __msa_srari_h(out12, 6);
- out13 = __msa_srari_h(out13, 6);
- dest12 = LOAD_UB(dest + 2 * dest_stride);
- dest13 = LOAD_UB(dest + 13 * dest_stride);
- res12 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest12);
- res13 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest13);
- res12 += out12;
- res13 += out13;
- res12 = CLIP_UNSIGNED_CHAR_H(res12);
- res13 = CLIP_UNSIGNED_CHAR_H(res13);
- res12 = (v8i16)__msa_pckev_b((v16i8)res12, (v16i8)res12);
- res13 = (v8i16)__msa_pckev_b((v16i8)res13, (v16i8)res13);
- STORE_DWORD(dest + 2 * dest_stride, __msa_copy_u_d((v2i64)res12, 0));
- STORE_DWORD(dest + 13 * dest_stride, __msa_copy_u_d((v2i64)res13, 0));
-
- k0 = SET_COSPI_PAIR(cospi_16_64, cospi_16_64);
- k3 = SET_COSPI_PAIR(-cospi_16_64, cospi_16_64);
+ SRARI_H2_SH(out12, out13, 6);
+ dst12 = LD_UB(dst + 2 * dst_stride);
+ dst13 = LD_UB(dst + 13 * dst_stride);
+ ILVR_B2_SH(zero, dst12, zero, dst13, res12, res13);
+ ADD2(res12, out12, res13, out13, res12, res13);
+ CLIP_SH2_0_255(res12, res13);
+ PCKEV_B2_SH(res12, res12, res13, res13, res12, res13);
+ ST8x1_UB(res12, dst + 2 * dst_stride);
+ ST8x1_UB(res13, dst + 13 * dst_stride);
+
+ k0 = VP9_SET_COSPI_PAIR(cospi_16_64, cospi_16_64);
+ k3 = VP9_SET_COSPI_PAIR(-cospi_16_64, cospi_16_64);
VP9_MADD_SHORT(out6, out7, k0, k3, out6, out7);
- out6 = __msa_srari_h(out6, 6);
- out7 = __msa_srari_h(out7, 6);
- dest6 = LOAD_UB(dest + 4 * dest_stride);
- dest7 = LOAD_UB(dest + 11 * dest_stride);
- res6 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest6);
- res7 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest7);
- res6 += out6;
- res7 += out7;
- res6 = CLIP_UNSIGNED_CHAR_H(res6);
- res7 = CLIP_UNSIGNED_CHAR_H(res7);
- res6 = (v8i16)__msa_pckev_b((v16i8)res6, (v16i8)res6);
- res7 = (v8i16)__msa_pckev_b((v16i8)res7, (v16i8)res7);
- STORE_DWORD(dest + 4 * dest_stride, __msa_copy_u_d((v2i64)res6, 0));
- STORE_DWORD(dest + 11 * dest_stride, __msa_copy_u_d((v2i64)res7, 0));
+ SRARI_H2_SH(out6, out7, 6);
+ dst6 = LD_UB(dst + 4 * dst_stride);
+ dst7 = LD_UB(dst + 11 * dst_stride);
+ ILVR_B2_SH(zero, dst6, zero, dst7, res6, res7);
+ ADD2(res6, out6, res7, out7, res6, res7);
+ CLIP_SH2_0_255(res6, res7);
+ PCKEV_B2_SH(res6, res6, res7, res7, res6, res7);
+ ST8x1_UB(res6, dst + 4 * dst_stride);
+ ST8x1_UB(res7, dst + 11 * dst_stride);
VP9_MADD_SHORT(out10, out11, k0, k3, out10, out11);
- out10 = __msa_srari_h(out10, 6);
- out11 = __msa_srari_h(out11, 6);
- dest10 = LOAD_UB(dest + 6 * dest_stride);
- dest11 = LOAD_UB(dest + 9 * dest_stride);
- res10 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest10);
- res11 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest11);
- res10 += out10;
- res11 += out11;
- res10 = CLIP_UNSIGNED_CHAR_H(res10);
- res11 = CLIP_UNSIGNED_CHAR_H(res11);
- res10 = (v8i16)__msa_pckev_b((v16i8)res10, (v16i8)res10);
- res11 = (v8i16)__msa_pckev_b((v16i8)res11, (v16i8)res11);
- STORE_DWORD(dest + 6 * dest_stride, __msa_copy_u_d((v2i64)res10, 0));
- STORE_DWORD(dest + 9 * dest_stride, __msa_copy_u_d((v2i64)res11, 0));
-
- k1 = SET_COSPI_PAIR(-cospi_16_64, -cospi_16_64);
- k2 = SET_COSPI_PAIR(cospi_16_64, -cospi_16_64);
+ SRARI_H2_SH(out10, out11, 6);
+ dst10 = LD_UB(dst + 6 * dst_stride);
+ dst11 = LD_UB(dst + 9 * dst_stride);
+ ILVR_B2_SH(zero, dst10, zero, dst11, res10, res11);
+ ADD2(res10, out10, res11, out11, res10, res11);
+ CLIP_SH2_0_255(res10, res11);
+ PCKEV_B2_SH(res10, res10, res11, res11, res10, res11);
+ ST8x1_UB(res10, dst + 6 * dst_stride);
+ ST8x1_UB(res11, dst + 9 * dst_stride);
+
+ k1 = VP9_SET_COSPI_PAIR(-cospi_16_64, -cospi_16_64);
+ k2 = VP9_SET_COSPI_PAIR(cospi_16_64, -cospi_16_64);
VP9_MADD_SHORT(h10, h11, k1, k2, out2, out3);
- out2 = __msa_srari_h(out2, 6);
- out3 = __msa_srari_h(out3, 6);
- dest2 = LOAD_UB(dest + 7 * dest_stride);
- dest3 = LOAD_UB(dest + 8 * dest_stride);
- res2 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest2);
- res3 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest3);
- res2 += out2;
- res3 += out3;
- res2 = CLIP_UNSIGNED_CHAR_H(res2);
- res3 = CLIP_UNSIGNED_CHAR_H(res3);
- res2 = (v8i16)__msa_pckev_b((v16i8)res2, (v16i8)res2);
- res3 = (v8i16)__msa_pckev_b((v16i8)res3, (v16i8)res3);
- STORE_DWORD(dest + 7 * dest_stride, __msa_copy_u_d((v2i64)res2, 0));
- STORE_DWORD(dest + 8 * dest_stride, __msa_copy_u_d((v2i64)res3, 0));
+ SRARI_H2_SH(out2, out3, 6);
+ dst2 = LD_UB(dst + 7 * dst_stride);
+ dst3 = LD_UB(dst + 8 * dst_stride);
+ ILVR_B2_SH(zero, dst2, zero, dst3, res2, res3);
+ ADD2(res2, out2, res3, out3, res2, res3);
+ CLIP_SH2_0_255(res2, res3);
+ PCKEV_B2_SH(res2, res2, res3, res3, res2, res3);
+ ST8x1_UB(res2, dst + 7 * dst_stride);
+ ST8x1_UB(res3, dst + 8 * dst_stride);
VP9_MADD_SHORT(out14, out15, k1, k2, out14, out15);
- out14 = __msa_srari_h(out14, 6);
- out15 = __msa_srari_h(out15, 6);
- dest14 = LOAD_UB(dest + 5 * dest_stride);
- dest15 = LOAD_UB(dest + 10 * dest_stride);
- res14 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest14);
- res15 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest15);
- res14 += out14;
- res15 += out15;
- res14 = CLIP_UNSIGNED_CHAR_H(res14);
- res15 = CLIP_UNSIGNED_CHAR_H(res15);
- res14 = (v8i16)__msa_pckev_b((v16i8)res14, (v16i8)res14);
- res15 = (v8i16)__msa_pckev_b((v16i8)res15, (v16i8)res15);
- STORE_DWORD(dest + 5 * dest_stride, __msa_copy_u_d((v2i64)res14, 0));
- STORE_DWORD(dest + 10 * dest_stride, __msa_copy_u_d((v2i64)res15, 0));
+ SRARI_H2_SH(out14, out15, 6);
+ dst14 = LD_UB(dst + 5 * dst_stride);
+ dst15 = LD_UB(dst + 10 * dst_stride);
+ ILVR_B2_SH(zero, dst14, zero, dst15, res14, res15);
+ ADD2(res14, out14, res15, out15, res14, res15);
+ CLIP_SH2_0_255(res14, res15);
+ PCKEV_B2_SH(res14, res14, res15, res15, res14, res15);
+ ST8x1_UB(res14, dst + 5 * dst_stride);
+ ST8x1_UB(res15, dst + 10 * dst_stride);
}
-void vp9_iht16x16_256_add_msa(const int16_t *input, uint8_t *dest,
- int32_t dest_stride, int32_t tx_type) {
+void vp9_iht16x16_256_add_msa(const int16_t *input, uint8_t *dst,
+ int32_t dst_stride, int32_t tx_type) {
int32_t i;
DECLARE_ALIGNED(32, int16_t, out[16 * 16]);
int16_t *out_ptr = &out[0];
@@ -897,8 +504,8 @@ void vp9_iht16x16_256_add_msa(const int16_t *input, uint8_t *dest,
/* transform columns */
for (i = 0; i < 2; ++i) {
/* process 8 * 16 block */
- vp9_idct16_1d_columns_addblk_msa((out_ptr + (i << 3)),
- (dest + (i << 3)), dest_stride);
+ vp9_idct16_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)),
+ dst_stride);
}
break;
case ADST_DCT:
@@ -911,7 +518,7 @@ void vp9_iht16x16_256_add_msa(const int16_t *input, uint8_t *dest,
/* transform columns */
for (i = 0; i < 2; ++i) {
vp9_iadst16_1d_columns_addblk_msa((out_ptr + (i << 3)),
- (dest + (i << 3)), dest_stride);
+ (dst + (i << 3)), dst_stride);
}
break;
case DCT_ADST:
@@ -924,8 +531,8 @@ void vp9_iht16x16_256_add_msa(const int16_t *input, uint8_t *dest,
/* transform columns */
for (i = 0; i < 2; ++i) {
/* process 8 * 16 block */
- vp9_idct16_1d_columns_addblk_msa((out_ptr + (i << 3)),
- (dest + (i << 3)), dest_stride);
+ vp9_idct16_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)),
+ dst_stride);
}
break;
case ADST_ADST:
@@ -938,7 +545,7 @@ void vp9_iht16x16_256_add_msa(const int16_t *input, uint8_t *dest,
/* transform columns */
for (i = 0; i < 2; ++i) {
vp9_iadst16_1d_columns_addblk_msa((out_ptr + (i << 3)),
- (dest + (i << 3)), dest_stride);
+ (dst + (i << 3)), dst_stride);
}
break;
default:
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct32x32_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct32x32_msa.c
index f576b50ea07..77d53a4c35f 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct32x32_msa.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct32x32_msa.c
@@ -8,108 +8,34 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#include "vpx_ports/mem.h"
-#include "vp9/common/vp9_idct.h"
-#include "vp9/common/mips/msa/vp9_macros_msa.h"
-
-#define DOTP_CONST_PAIR(reg0, reg1, const0, const1, out0, out1) { \
- v8i16 k0_m = __msa_fill_h(const0); \
- v8i16 s0_m, s1_m, s2_m, s3_m; \
- \
- s0_m = __msa_fill_h(const1); \
- k0_m = __msa_ilvev_h(s0_m, k0_m); \
- \
- s0_m = __msa_ilvl_h(-reg1, reg0); \
- s1_m = __msa_ilvr_h(-reg1, reg0); \
- s2_m = __msa_ilvl_h(reg0, reg1); \
- s3_m = __msa_ilvr_h(reg0, reg1); \
- s1_m = (v8i16)__msa_dotp_s_w(s1_m, k0_m); \
- s0_m = (v8i16)__msa_dotp_s_w(s0_m, k0_m); \
- s1_m = (v8i16)__msa_srari_w((v4i32)s1_m, DCT_CONST_BITS); \
- s0_m = (v8i16)__msa_srari_w((v4i32)s0_m, DCT_CONST_BITS); \
- out0 = __msa_pckev_h(s0_m, s1_m); \
- \
- s1_m = (v8i16)__msa_dotp_s_w(s3_m, k0_m); \
- s0_m = (v8i16)__msa_dotp_s_w(s2_m, k0_m); \
- s1_m = (v8i16)__msa_srari_w((v4i32)s1_m, DCT_CONST_BITS); \
- s0_m = (v8i16)__msa_srari_w((v4i32)s0_m, DCT_CONST_BITS); \
- out1 = __msa_pckev_h(s0_m, s1_m); \
-}
-
-#define VP9_ADDBLK_CLIP_AND_STORE_OFF_4H_VECS(dest, dest_stride, \
- in0, in1, in2, in3) { \
- uint64_t out0_m, out1_m, out2_m, out3_m; \
- v8i16 res0_m, res1_m, res2_m, res3_m; \
- v16u8 dest0_m, dest1_m, dest2_m, dest3_m; \
- v16i8 tmp0_m, tmp1_m; \
- v16i8 zero_m = { 0 }; \
- uint8_t *dst_m = (uint8_t *)(dest); \
- \
- dest0_m = LOAD_UB(dst_m); \
- dest1_m = LOAD_UB(dst_m + 4 * dest_stride); \
- dest2_m = LOAD_UB(dst_m + 8 * dest_stride); \
- dest3_m = LOAD_UB(dst_m + 12 * dest_stride); \
- \
- res0_m = (v8i16)__msa_ilvr_b(zero_m, (v16i8)dest0_m); \
- res1_m = (v8i16)__msa_ilvr_b(zero_m, (v16i8)dest1_m); \
- res2_m = (v8i16)__msa_ilvr_b(zero_m, (v16i8)dest2_m); \
- res3_m = (v8i16)__msa_ilvr_b(zero_m, (v16i8)dest3_m); \
- \
- res0_m += (v8i16)(in0); \
- res1_m += (v8i16)(in1); \
- res2_m += (v8i16)(in2); \
- res3_m += (v8i16)(in3); \
- \
- res0_m = CLIP_UNSIGNED_CHAR_H(res0_m); \
- res1_m = CLIP_UNSIGNED_CHAR_H(res1_m); \
- res2_m = CLIP_UNSIGNED_CHAR_H(res2_m); \
- res3_m = CLIP_UNSIGNED_CHAR_H(res3_m); \
- \
- tmp0_m = __msa_pckev_b((v16i8)res1_m, (v16i8)res0_m); \
- tmp1_m = __msa_pckev_b((v16i8)res3_m, (v16i8)res2_m); \
- \
- out0_m = __msa_copy_u_d((v2i64)tmp0_m, 0); \
- out1_m = __msa_copy_u_d((v2i64)tmp0_m, 1); \
- out2_m = __msa_copy_u_d((v2i64)tmp1_m, 0); \
- out3_m = __msa_copy_u_d((v2i64)tmp1_m, 1); \
- \
- STORE_DWORD(dst_m, out0_m); \
- dst_m += (4 * dest_stride); \
- STORE_DWORD(dst_m, out1_m); \
- dst_m += (4 * dest_stride); \
- STORE_DWORD(dst_m, out2_m); \
- dst_m += (4 * dest_stride); \
- STORE_DWORD(dst_m, out3_m); \
-}
+#include "vp9/common/mips/msa/vp9_idct_msa.h"
static void vp9_idct32x8_row_transpose_store(const int16_t *input,
int16_t *tmp_buf) {
- v8i16 m0, m1, m2, m3, m4, m5, m6, m7;
- v8i16 n0, n1, n2, n3, n4, n5, n6, n7;
+ v8i16 m0, m1, m2, m3, m4, m5, m6, m7, n0, n1, n2, n3, n4, n5, n6, n7;
/* 1st & 2nd 8x8 */
- LOAD_8VECS_SH(input, 32, m0, n0, m1, n1, m2, n2, m3, n3);
- LOAD_8VECS_SH((input + 8), 32, m4, n4, m5, n5, m6, n6, m7, n7);
- TRANSPOSE8x8_H_SH(m0, n0, m1, n1, m2, n2, m3, n3,
- m0, n0, m1, n1, m2, n2, m3, n3);
- TRANSPOSE8x8_H_SH(m4, n4, m5, n5, m6, n6, m7, n7,
- m4, n4, m5, n5, m6, n6, m7, n7);
- STORE_4VECS_SH((tmp_buf), 8, m0, n0, m1, n1);
- STORE_4VECS_SH((tmp_buf + 4 * 8), 8, m2, n2, m3, n3);
- STORE_4VECS_SH((tmp_buf + 8 * 8), 8, m4, n4, m5, n5);
- STORE_4VECS_SH((tmp_buf + 12 * 8), 8, m6, n6, m7, n7);
+ LD_SH8(input, 32, m0, n0, m1, n1, m2, n2, m3, n3);
+ LD_SH8((input + 8), 32, m4, n4, m5, n5, m6, n6, m7, n7);
+ TRANSPOSE8x8_SH_SH(m0, n0, m1, n1, m2, n2, m3, n3,
+ m0, n0, m1, n1, m2, n2, m3, n3);
+ TRANSPOSE8x8_SH_SH(m4, n4, m5, n5, m6, n6, m7, n7,
+ m4, n4, m5, n5, m6, n6, m7, n7);
+ ST_SH8(m0, n0, m1, n1, m2, n2, m3, n3, (tmp_buf), 8);
+ ST_SH4(m4, n4, m5, n5, (tmp_buf + 8 * 8), 8);
+ ST_SH4(m6, n6, m7, n7, (tmp_buf + 12 * 8), 8);
/* 3rd & 4th 8x8 */
- LOAD_8VECS_SH((input + 16), 32, m0, n0, m1, n1, m2, n2, m3, n3);
- LOAD_8VECS_SH((input + 24), 32, m4, n4, m5, n5, m6, n6, m7, n7);
- TRANSPOSE8x8_H_SH(m0, n0, m1, n1, m2, n2, m3, n3,
- m0, n0, m1, n1, m2, n2, m3, n3);
- TRANSPOSE8x8_H_SH(m4, n4, m5, n5, m6, n6, m7, n7,
- m4, n4, m5, n5, m6, n6, m7, n7);
- STORE_4VECS_SH((tmp_buf + 16 * 8), 8, m0, n0, m1, n1);
- STORE_4VECS_SH((tmp_buf + 20 * 8), 8, m2, n2, m3, n3);
- STORE_4VECS_SH((tmp_buf + 24 * 8), 8, m4, n4, m5, n5);
- STORE_4VECS_SH((tmp_buf + 28 * 8), 8, m6, n6, m7, n7);
+ LD_SH8((input + 16), 32, m0, n0, m1, n1, m2, n2, m3, n3);
+ LD_SH8((input + 24), 32, m4, n4, m5, n5, m6, n6, m7, n7);
+ TRANSPOSE8x8_SH_SH(m0, n0, m1, n1, m2, n2, m3, n3,
+ m0, n0, m1, n1, m2, n2, m3, n3);
+ TRANSPOSE8x8_SH_SH(m4, n4, m5, n5, m6, n6, m7, n7,
+ m4, n4, m5, n5, m6, n6, m7, n7);
+ ST_SH4(m0, n0, m1, n1, (tmp_buf + 16 * 8), 8);
+ ST_SH4(m2, n2, m3, n3, (tmp_buf + 20 * 8), 8);
+ ST_SH4(m4, n4, m5, n5, (tmp_buf + 24 * 8), 8);
+ ST_SH4(m6, n6, m7, n7, (tmp_buf + 28 * 8), 8);
}
static void vp9_idct32x8_row_even_process_store(int16_t *tmp_buf,
@@ -119,46 +45,28 @@ static void vp9_idct32x8_row_even_process_store(int16_t *tmp_buf,
v8i16 stp0, stp1, stp2, stp3, stp4, stp5, stp6, stp7;
/* Even stage 1 */
- LOAD_8VECS_SH(tmp_buf, 32, reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7);
+ LD_SH8(tmp_buf, 32, reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7);
- DOTP_CONST_PAIR(reg1, reg7, cospi_28_64, cospi_4_64, reg1, reg7);
- DOTP_CONST_PAIR(reg5, reg3, cospi_12_64, cospi_20_64, reg5, reg3);
-
- vec0 = reg1 - reg5;
- vec1 = reg1 + reg5;
- vec2 = reg7 - reg3;
- vec3 = reg7 + reg3;
-
- DOTP_CONST_PAIR(vec2, vec0, cospi_16_64, cospi_16_64, loc2, loc3);
+ VP9_DOTP_CONST_PAIR(reg1, reg7, cospi_28_64, cospi_4_64, reg1, reg7);
+ VP9_DOTP_CONST_PAIR(reg5, reg3, cospi_12_64, cospi_20_64, reg5, reg3);
+ BUTTERFLY_4(reg1, reg7, reg3, reg5, vec1, vec3, vec2, vec0);
+ VP9_DOTP_CONST_PAIR(vec2, vec0, cospi_16_64, cospi_16_64, loc2, loc3);
loc1 = vec3;
loc0 = vec1;
- DOTP_CONST_PAIR(reg0, reg4, cospi_16_64, cospi_16_64, reg0, reg4);
- DOTP_CONST_PAIR(reg2, reg6, cospi_24_64, cospi_8_64, reg2, reg6);
-
- vec0 = reg4 - reg6;
- vec1 = reg4 + reg6;
- vec2 = reg0 - reg2;
- vec3 = reg0 + reg2;
-
- stp4 = vec0 - loc0;
- stp3 = vec0 + loc0;
- stp7 = vec1 - loc1;
- stp0 = vec1 + loc1;
- stp5 = vec2 - loc2;
- stp2 = vec2 + loc2;
- stp6 = vec3 - loc3;
- stp1 = vec3 + loc3;
+ VP9_DOTP_CONST_PAIR(reg0, reg4, cospi_16_64, cospi_16_64, reg0, reg4);
+ VP9_DOTP_CONST_PAIR(reg2, reg6, cospi_24_64, cospi_8_64, reg2, reg6);
+ BUTTERFLY_4(reg4, reg0, reg2, reg6, vec1, vec3, vec2, vec0);
+ BUTTERFLY_4(vec0, vec1, loc1, loc0, stp3, stp0, stp7, stp4);
+ BUTTERFLY_4(vec2, vec3, loc3, loc2, stp2, stp1, stp6, stp5);
/* Even stage 2 */
- LOAD_8VECS_SH((tmp_buf + 16), 32,
- reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7);
-
- DOTP_CONST_PAIR(reg0, reg7, cospi_30_64, cospi_2_64, reg0, reg7);
- DOTP_CONST_PAIR(reg4, reg3, cospi_14_64, cospi_18_64, reg4, reg3);
- DOTP_CONST_PAIR(reg2, reg5, cospi_22_64, cospi_10_64, reg2, reg5);
- DOTP_CONST_PAIR(reg6, reg1, cospi_6_64, cospi_26_64, reg6, reg1);
+ LD_SH8((tmp_buf + 16), 32, reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7);
+ VP9_DOTP_CONST_PAIR(reg0, reg7, cospi_30_64, cospi_2_64, reg0, reg7);
+ VP9_DOTP_CONST_PAIR(reg4, reg3, cospi_14_64, cospi_18_64, reg4, reg3);
+ VP9_DOTP_CONST_PAIR(reg2, reg5, cospi_22_64, cospi_10_64, reg2, reg5);
+ VP9_DOTP_CONST_PAIR(reg6, reg1, cospi_6_64, cospi_26_64, reg6, reg1);
vec0 = reg0 + reg4;
reg0 = reg0 - reg4;
@@ -176,54 +84,42 @@ static void vp9_idct32x8_row_even_process_store(int16_t *tmp_buf,
reg4 = reg5 - vec1;
reg5 = reg5 + vec1;
- DOTP_CONST_PAIR(reg7, reg0, cospi_24_64, cospi_8_64, reg0, reg7);
- DOTP_CONST_PAIR((-reg6), reg1, cospi_24_64, cospi_8_64, reg6, reg1);
+ VP9_DOTP_CONST_PAIR(reg7, reg0, cospi_24_64, cospi_8_64, reg0, reg7);
+ VP9_DOTP_CONST_PAIR((-reg6), reg1, cospi_24_64, cospi_8_64, reg6, reg1);
vec0 = reg0 - reg6;
reg0 = reg0 + reg6;
vec1 = reg7 - reg1;
reg7 = reg7 + reg1;
- DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, reg6, reg1);
- DOTP_CONST_PAIR(reg4, reg3, cospi_16_64, cospi_16_64, reg3, reg4);
+ VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, reg6, reg1);
+ VP9_DOTP_CONST_PAIR(reg4, reg3, cospi_16_64, cospi_16_64, reg3, reg4);
/* Even stage 3 : Dependency on Even stage 1 & Even stage 2 */
- loc0 = stp0 - reg5;
- loc1 = stp0 + reg5;
- loc2 = stp1 - reg7;
- loc3 = stp1 + reg7;
- STORE_SH(loc0, (tmp_eve_buf + 15 * 8));
- STORE_SH(loc1, (tmp_eve_buf));
- STORE_SH(loc2, (tmp_eve_buf + 14 * 8));
- STORE_SH(loc3, (tmp_eve_buf + 8));
-
- loc0 = stp2 - reg1;
- loc1 = stp2 + reg1;
- loc2 = stp3 - reg4;
- loc3 = stp3 + reg4;
- STORE_SH(loc0, (tmp_eve_buf + 13 * 8));
- STORE_SH(loc1, (tmp_eve_buf + 2 * 8));
- STORE_SH(loc2, (tmp_eve_buf + 12 * 8));
- STORE_SH(loc3, (tmp_eve_buf + 3 * 8));
+ BUTTERFLY_4(stp0, stp1, reg7, reg5, loc1, loc3, loc2, loc0);
+ ST_SH(loc0, (tmp_eve_buf + 15 * 8));
+ ST_SH(loc1, (tmp_eve_buf));
+ ST_SH(loc2, (tmp_eve_buf + 14 * 8));
+ ST_SH(loc3, (tmp_eve_buf + 8));
+
+ BUTTERFLY_4(stp2, stp3, reg4, reg1, loc1, loc3, loc2, loc0);
+ ST_SH(loc0, (tmp_eve_buf + 13 * 8));
+ ST_SH(loc1, (tmp_eve_buf + 2 * 8));
+ ST_SH(loc2, (tmp_eve_buf + 12 * 8));
+ ST_SH(loc3, (tmp_eve_buf + 3 * 8));
/* Store 8 */
- loc0 = stp4 - reg3;
- loc1 = stp4 + reg3;
- loc2 = stp5 - reg6;
- loc3 = stp5 + reg6;
- STORE_SH(loc0, (tmp_eve_buf + 11 * 8));
- STORE_SH(loc1, (tmp_eve_buf + 4 * 8));
- STORE_SH(loc2, (tmp_eve_buf + 10 * 8));
- STORE_SH(loc3, (tmp_eve_buf + 5 * 8));
-
- loc0 = stp6 - reg0;
- loc1 = stp6 + reg0;
- loc2 = stp7 - reg2;
- loc3 = stp7 + reg2;
- STORE_SH(loc0, (tmp_eve_buf + 9 * 8));
- STORE_SH(loc1, (tmp_eve_buf + 6 * 8));
- STORE_SH(loc2, (tmp_eve_buf + 8 * 8));
- STORE_SH(loc3, (tmp_eve_buf + 7 * 8));
+ BUTTERFLY_4(stp4, stp5, reg6, reg3, loc1, loc3, loc2, loc0);
+ ST_SH(loc0, (tmp_eve_buf + 11 * 8));
+ ST_SH(loc1, (tmp_eve_buf + 4 * 8));
+ ST_SH(loc2, (tmp_eve_buf + 10 * 8));
+ ST_SH(loc3, (tmp_eve_buf + 5 * 8));
+
+ BUTTERFLY_4(stp6, stp7, reg2, reg0, loc1, loc3, loc2, loc0);
+ ST_SH(loc0, (tmp_eve_buf + 9 * 8));
+ ST_SH(loc1, (tmp_eve_buf + 6 * 8));
+ ST_SH(loc2, (tmp_eve_buf + 8 * 8));
+ ST_SH(loc3, (tmp_eve_buf + 7 * 8));
}
static void vp9_idct32x8_row_odd_process_store(int16_t *tmp_buf,
@@ -232,19 +128,19 @@ static void vp9_idct32x8_row_odd_process_store(int16_t *tmp_buf,
v8i16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;
/* Odd stage 1 */
- reg0 = LOAD_SH(tmp_buf + 8);
- reg1 = LOAD_SH(tmp_buf + 7 * 8);
- reg2 = LOAD_SH(tmp_buf + 9 * 8);
- reg3 = LOAD_SH(tmp_buf + 15 * 8);
- reg4 = LOAD_SH(tmp_buf + 17 * 8);
- reg5 = LOAD_SH(tmp_buf + 23 * 8);
- reg6 = LOAD_SH(tmp_buf + 25 * 8);
- reg7 = LOAD_SH(tmp_buf + 31 * 8);
-
- DOTP_CONST_PAIR(reg0, reg7, cospi_31_64, cospi_1_64, reg0, reg7);
- DOTP_CONST_PAIR(reg4, reg3, cospi_15_64, cospi_17_64, reg3, reg4);
- DOTP_CONST_PAIR(reg2, reg5, cospi_23_64, cospi_9_64, reg2, reg5);
- DOTP_CONST_PAIR(reg6, reg1, cospi_7_64, cospi_25_64, reg1, reg6);
+ reg0 = LD_SH(tmp_buf + 8);
+ reg1 = LD_SH(tmp_buf + 7 * 8);
+ reg2 = LD_SH(tmp_buf + 9 * 8);
+ reg3 = LD_SH(tmp_buf + 15 * 8);
+ reg4 = LD_SH(tmp_buf + 17 * 8);
+ reg5 = LD_SH(tmp_buf + 23 * 8);
+ reg6 = LD_SH(tmp_buf + 25 * 8);
+ reg7 = LD_SH(tmp_buf + 31 * 8);
+
+ VP9_DOTP_CONST_PAIR(reg0, reg7, cospi_31_64, cospi_1_64, reg0, reg7);
+ VP9_DOTP_CONST_PAIR(reg4, reg3, cospi_15_64, cospi_17_64, reg3, reg4);
+ VP9_DOTP_CONST_PAIR(reg2, reg5, cospi_23_64, cospi_9_64, reg2, reg5);
+ VP9_DOTP_CONST_PAIR(reg6, reg1, cospi_7_64, cospi_25_64, reg1, reg6);
vec0 = reg0 + reg3;
reg0 = reg0 - reg3;
@@ -257,262 +153,192 @@ static void vp9_idct32x8_row_odd_process_store(int16_t *tmp_buf,
reg5 = vec0;
/* 4 Stores */
- vec0 = reg5 + reg4;
- vec1 = reg3 + reg2;
- STORE_SH(vec0, (tmp_odd_buf + 4 * 8));
- STORE_SH(vec1, (tmp_odd_buf + 5 * 8));
+ ADD2(reg5, reg4, reg3, reg2, vec0, vec1);
+ ST_SH2(vec0, vec1, (tmp_odd_buf + 4 * 8), 8);
- vec0 = reg5 - reg4;
- vec1 = reg3 - reg2;
- DOTP_CONST_PAIR(vec1, vec0, cospi_24_64, cospi_8_64, vec0, vec1);
- STORE_SH(vec0, (tmp_odd_buf));
- STORE_SH(vec1, (tmp_odd_buf + 8));
+ SUB2(reg5, reg4, reg3, reg2, vec0, vec1);
+ VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_24_64, cospi_8_64, vec0, vec1);
+ ST_SH2(vec0, vec1, (tmp_odd_buf), 8);
/* 4 Stores */
- DOTP_CONST_PAIR(reg7, reg0, cospi_28_64, cospi_4_64, reg0, reg7);
- DOTP_CONST_PAIR(reg6, reg1, -cospi_4_64, cospi_28_64, reg1, reg6);
-
- vec0 = reg0 + reg1;
- vec2 = reg7 - reg6;
- vec1 = reg7 + reg6;
- vec3 = reg0 - reg1;
- STORE_SH(vec0, (tmp_odd_buf + 6 * 8));
- STORE_SH(vec1, (tmp_odd_buf + 7 * 8));
+ VP9_DOTP_CONST_PAIR(reg7, reg0, cospi_28_64, cospi_4_64, reg0, reg7);
+ VP9_DOTP_CONST_PAIR(reg6, reg1, -cospi_4_64, cospi_28_64, reg1, reg6);
+ BUTTERFLY_4(reg0, reg7, reg6, reg1, vec0, vec1, vec2, vec3);
+ ST_SH2(vec0, vec1, (tmp_odd_buf + 6 * 8), 8);
- DOTP_CONST_PAIR(vec2, vec3, cospi_24_64, cospi_8_64, vec2, vec3);
- STORE_SH(vec2, (tmp_odd_buf + 2 * 8));
- STORE_SH(vec3, (tmp_odd_buf + 3 * 8));
+ VP9_DOTP_CONST_PAIR(vec2, vec3, cospi_24_64, cospi_8_64, vec2, vec3);
+ ST_SH2(vec2, vec3, (tmp_odd_buf + 2 * 8), 8);
/* Odd stage 2 */
-
/* 8 loads */
- reg0 = LOAD_SH(tmp_buf + 3 * 8);
- reg1 = LOAD_SH(tmp_buf + 5 * 8);
- reg2 = LOAD_SH(tmp_buf + 11 * 8);
- reg3 = LOAD_SH(tmp_buf + 13 * 8);
- reg4 = LOAD_SH(tmp_buf + 19 * 8);
- reg5 = LOAD_SH(tmp_buf + 21 * 8);
- reg6 = LOAD_SH(tmp_buf + 27 * 8);
- reg7 = LOAD_SH(tmp_buf + 29 * 8);
-
- DOTP_CONST_PAIR(reg1, reg6, cospi_27_64, cospi_5_64, reg1, reg6);
- DOTP_CONST_PAIR(reg5, reg2, cospi_11_64, cospi_21_64, reg2, reg5);
- DOTP_CONST_PAIR(reg3, reg4, cospi_19_64, cospi_13_64, reg3, reg4);
- DOTP_CONST_PAIR(reg7, reg0, cospi_3_64, cospi_29_64, reg0, reg7);
+ reg0 = LD_SH(tmp_buf + 3 * 8);
+ reg1 = LD_SH(tmp_buf + 5 * 8);
+ reg2 = LD_SH(tmp_buf + 11 * 8);
+ reg3 = LD_SH(tmp_buf + 13 * 8);
+ reg4 = LD_SH(tmp_buf + 19 * 8);
+ reg5 = LD_SH(tmp_buf + 21 * 8);
+ reg6 = LD_SH(tmp_buf + 27 * 8);
+ reg7 = LD_SH(tmp_buf + 29 * 8);
+
+ VP9_DOTP_CONST_PAIR(reg1, reg6, cospi_27_64, cospi_5_64, reg1, reg6);
+ VP9_DOTP_CONST_PAIR(reg5, reg2, cospi_11_64, cospi_21_64, reg2, reg5);
+ VP9_DOTP_CONST_PAIR(reg3, reg4, cospi_19_64, cospi_13_64, reg3, reg4);
+ VP9_DOTP_CONST_PAIR(reg7, reg0, cospi_3_64, cospi_29_64, reg0, reg7);
/* 4 Stores */
- vec0 = reg1 - reg2;
- vec1 = reg6 - reg5;
- vec2 = reg0 - reg3;
- vec3 = reg7 - reg4;
- DOTP_CONST_PAIR(vec1, vec0, cospi_12_64, cospi_20_64, loc0, loc1);
- DOTP_CONST_PAIR(vec3, vec2, -cospi_20_64, cospi_12_64, loc2, loc3);
-
- vec2 = loc2 - loc0;
- vec3 = loc3 - loc1;
- vec0 = loc2 + loc0;
- vec1 = loc3 + loc1;
- STORE_SH(vec0, (tmp_odd_buf + 12 * 8));
- STORE_SH(vec1, (tmp_odd_buf + 15 * 8));
+ SUB4(reg1, reg2, reg6, reg5, reg0, reg3, reg7, reg4,
+ vec0, vec1, vec2, vec3);
+ VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_12_64, cospi_20_64, loc0, loc1);
+ VP9_DOTP_CONST_PAIR(vec3, vec2, -cospi_20_64, cospi_12_64, loc2, loc3);
- DOTP_CONST_PAIR(vec3, vec2, -cospi_8_64, cospi_24_64, vec0, vec1);
+ BUTTERFLY_4(loc3, loc2, loc0, loc1, vec1, vec0, vec2, vec3);
+ ST_SH2(vec0, vec1, (tmp_odd_buf + 12 * 8), 3 * 8);
- STORE_SH(vec0, (tmp_odd_buf + 10 * 8));
- STORE_SH(vec1, (tmp_odd_buf + 11 * 8));
+ VP9_DOTP_CONST_PAIR(vec3, vec2, -cospi_8_64, cospi_24_64, vec0, vec1);
+ ST_SH2(vec0, vec1, (tmp_odd_buf + 10 * 8), 8);
/* 4 Stores */
- vec0 = reg0 + reg3;
- vec1 = reg1 + reg2;
- vec2 = reg6 + reg5;
- vec3 = reg7 + reg4;
- reg0 = vec0 + vec1;
- reg1 = vec3 + vec2;
- reg2 = vec0 - vec1;
- reg3 = vec3 - vec2;
- STORE_SH(reg0, (tmp_odd_buf + 13 * 8));
- STORE_SH(reg1, (tmp_odd_buf + 14 * 8));
-
- DOTP_CONST_PAIR(reg3, reg2, -cospi_8_64, cospi_24_64, reg0, reg1);
+ ADD4(reg1, reg2, reg6, reg5, reg0, reg3, reg7, reg4,
+ vec1, vec2, vec0, vec3);
+ BUTTERFLY_4(vec0, vec3, vec2, vec1, reg0, reg1, reg3, reg2);
+ ST_SH(reg0, (tmp_odd_buf + 13 * 8));
+ ST_SH(reg1, (tmp_odd_buf + 14 * 8));
- STORE_SH(reg0, (tmp_odd_buf + 8 * 8));
- STORE_SH(reg1, (tmp_odd_buf + 9 * 8));
+ VP9_DOTP_CONST_PAIR(reg3, reg2, -cospi_8_64, cospi_24_64, reg0, reg1);
+ ST_SH2(reg0, reg1, (tmp_odd_buf + 8 * 8), 8);
/* Odd stage 3 : Dependency on Odd stage 1 & Odd stage 2 */
/* Load 8 & Store 8 */
- reg0 = LOAD_SH(tmp_odd_buf);
- reg1 = LOAD_SH(tmp_odd_buf + 1 * 8);
- reg2 = LOAD_SH(tmp_odd_buf + 2 * 8);
- reg3 = LOAD_SH(tmp_odd_buf + 3 * 8);
- reg4 = LOAD_SH(tmp_odd_buf + 8 * 8);
- reg5 = LOAD_SH(tmp_odd_buf + 9 * 8);
- reg6 = LOAD_SH(tmp_odd_buf + 10 * 8);
- reg7 = LOAD_SH(tmp_odd_buf + 11 * 8);
-
- loc0 = reg0 + reg4;
- loc1 = reg1 + reg5;
- loc2 = reg2 + reg6;
- loc3 = reg3 + reg7;
- STORE_SH(loc0, (tmp_odd_buf));
- STORE_SH(loc1, (tmp_odd_buf + 1 * 8));
- STORE_SH(loc2, (tmp_odd_buf + 2 * 8));
- STORE_SH(loc3, (tmp_odd_buf + 3 * 8));
-
- vec0 = reg0 - reg4;
- vec1 = reg1 - reg5;
- DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc0, loc1);
-
- vec0 = reg2 - reg6;
- vec1 = reg3 - reg7;
- DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc2, loc3);
-
- STORE_SH(loc0, (tmp_odd_buf + 8 * 8));
- STORE_SH(loc1, (tmp_odd_buf + 9 * 8));
- STORE_SH(loc2, (tmp_odd_buf + 10 * 8));
- STORE_SH(loc3, (tmp_odd_buf + 11 * 8));
+ LD_SH4(tmp_odd_buf, 8, reg0, reg1, reg2, reg3);
+ LD_SH4((tmp_odd_buf + 8 * 8), 8, reg4, reg5, reg6, reg7);
+
+ ADD4(reg0, reg4, reg1, reg5, reg2, reg6, reg3, reg7,
+ loc0, loc1, loc2, loc3);
+ ST_SH4(loc0, loc1, loc2, loc3, tmp_odd_buf, 8);
+
+ SUB2(reg0, reg4, reg1, reg5, vec0, vec1);
+ VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc0, loc1);
+
+ SUB2(reg2, reg6, reg3, reg7, vec0, vec1);
+ VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc2, loc3);
+ ST_SH4(loc0, loc1, loc2, loc3, (tmp_odd_buf + 8 * 8), 8);
/* Load 8 & Store 8 */
- reg1 = LOAD_SH(tmp_odd_buf + 4 * 8);
- reg2 = LOAD_SH(tmp_odd_buf + 5 * 8);
- reg0 = LOAD_SH(tmp_odd_buf + 6 * 8);
- reg3 = LOAD_SH(tmp_odd_buf + 7 * 8);
- reg4 = LOAD_SH(tmp_odd_buf + 12 * 8);
- reg5 = LOAD_SH(tmp_odd_buf + 13 * 8);
- reg6 = LOAD_SH(tmp_odd_buf + 14 * 8);
- reg7 = LOAD_SH(tmp_odd_buf + 15 * 8);
-
- loc0 = reg0 + reg4;
- loc1 = reg1 + reg5;
- loc2 = reg2 + reg6;
- loc3 = reg3 + reg7;
- STORE_SH(loc0, (tmp_odd_buf + 4 * 8));
- STORE_SH(loc1, (tmp_odd_buf + 5 * 8));
- STORE_SH(loc2, (tmp_odd_buf + 6 * 8));
- STORE_SH(loc3, (tmp_odd_buf + 7 * 8));
-
- vec0 = reg0 - reg4;
- vec1 = reg3 - reg7;
- DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc0, loc1);
-
- vec0 = reg1 - reg5;
- vec1 = reg2 - reg6;
- DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc2, loc3);
-
- STORE_SH(loc0, (tmp_odd_buf + 12 * 8));
- STORE_SH(loc1, (tmp_odd_buf + 13 * 8));
- STORE_SH(loc2, (tmp_odd_buf + 14 * 8));
- STORE_SH(loc3, (tmp_odd_buf + 15 * 8));
+ LD_SH4((tmp_odd_buf + 4 * 8), 8, reg1, reg2, reg0, reg3);
+ LD_SH4((tmp_odd_buf + 12 * 8), 8, reg4, reg5, reg6, reg7);
+
+ ADD4(reg0, reg4, reg1, reg5, reg2, reg6, reg3, reg7,
+ loc0, loc1, loc2, loc3);
+ ST_SH4(loc0, loc1, loc2, loc3, (tmp_odd_buf + 4 * 8), 8);
+
+ SUB2(reg0, reg4, reg3, reg7, vec0, vec1);
+ VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc0, loc1);
+
+ SUB2(reg1, reg5, reg2, reg6, vec0, vec1);
+ VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc2, loc3);
+ ST_SH4(loc0, loc1, loc2, loc3, (tmp_odd_buf + 12 * 8), 8);
}
static void vp9_idct_butterfly_transpose_store(int16_t *tmp_buf,
int16_t *tmp_eve_buf,
int16_t *tmp_odd_buf,
- int16_t *dest) {
+ int16_t *dst) {
v8i16 vec0, vec1, vec2, vec3, loc0, loc1, loc2, loc3;
- v8i16 m0, m1, m2, m3, m4, m5, m6, m7;
- v8i16 n0, n1, n2, n3, n4, n5, n6, n7;
+ v8i16 m0, m1, m2, m3, m4, m5, m6, m7, n0, n1, n2, n3, n4, n5, n6, n7;
/* FINAL BUTTERFLY : Dependency on Even & Odd */
- /* Total: 32 loads, 32 stores */
- vec0 = LOAD_SH(tmp_odd_buf);
- vec1 = LOAD_SH(tmp_odd_buf + 9 * 8);
- vec2 = LOAD_SH(tmp_odd_buf + 14 * 8);
- vec3 = LOAD_SH(tmp_odd_buf + 6 * 8);
- loc0 = LOAD_SH(tmp_eve_buf);
- loc1 = LOAD_SH(tmp_eve_buf + 8 * 8);
- loc2 = LOAD_SH(tmp_eve_buf + 4 * 8);
- loc3 = LOAD_SH(tmp_eve_buf + 12 * 8);
-
- m0 = (loc0 + vec3);
- STORE_SH((loc0 - vec3), (tmp_buf + 31 * 8));
- STORE_SH((loc1 - vec2), (tmp_buf + 23 * 8));
- m4 = (loc1 + vec2);
- STORE_SH((loc2 - vec1), (tmp_buf + 27 * 8));
- m2 = (loc2 + vec1);
- STORE_SH((loc3 - vec0), (tmp_buf + 19 * 8));
- m6 = (loc3 + vec0);
+ vec0 = LD_SH(tmp_odd_buf);
+ vec1 = LD_SH(tmp_odd_buf + 9 * 8);
+ vec2 = LD_SH(tmp_odd_buf + 14 * 8);
+ vec3 = LD_SH(tmp_odd_buf + 6 * 8);
+ loc0 = LD_SH(tmp_eve_buf);
+ loc1 = LD_SH(tmp_eve_buf + 8 * 8);
+ loc2 = LD_SH(tmp_eve_buf + 4 * 8);
+ loc3 = LD_SH(tmp_eve_buf + 12 * 8);
+
+ ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, m0, m4, m2, m6);
+
+ ST_SH((loc0 - vec3), (tmp_buf + 31 * 8));
+ ST_SH((loc1 - vec2), (tmp_buf + 23 * 8));
+ ST_SH((loc2 - vec1), (tmp_buf + 27 * 8));
+ ST_SH((loc3 - vec0), (tmp_buf + 19 * 8));
/* Load 8 & Store 8 */
- vec0 = LOAD_SH(tmp_odd_buf + 4 * 8);
- vec1 = LOAD_SH(tmp_odd_buf + 13 * 8);
- vec2 = LOAD_SH(tmp_odd_buf + 10 * 8);
- vec3 = LOAD_SH(tmp_odd_buf + 3 * 8);
- loc0 = LOAD_SH(tmp_eve_buf + 2 * 8);
- loc1 = LOAD_SH(tmp_eve_buf + 10 * 8);
- loc2 = LOAD_SH(tmp_eve_buf + 6 * 8);
- loc3 = LOAD_SH(tmp_eve_buf + 14 * 8);
-
- m1 = (loc0 + vec3);
- STORE_SH((loc0 - vec3), (tmp_buf + 29 * 8));
- STORE_SH((loc1 - vec2), (tmp_buf + 21 * 8));
- m5 = (loc1 + vec2);
- STORE_SH((loc2 - vec1), (tmp_buf + 25 * 8));
- m3 = (loc2 + vec1);
- STORE_SH((loc3 - vec0), (tmp_buf + 17 * 8));
- m7 = (loc3 + vec0);
+ vec0 = LD_SH(tmp_odd_buf + 4 * 8);
+ vec1 = LD_SH(tmp_odd_buf + 13 * 8);
+ vec2 = LD_SH(tmp_odd_buf + 10 * 8);
+ vec3 = LD_SH(tmp_odd_buf + 3 * 8);
+ loc0 = LD_SH(tmp_eve_buf + 2 * 8);
+ loc1 = LD_SH(tmp_eve_buf + 10 * 8);
+ loc2 = LD_SH(tmp_eve_buf + 6 * 8);
+ loc3 = LD_SH(tmp_eve_buf + 14 * 8);
+
+ ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, m1, m5, m3, m7);
+
+ ST_SH((loc0 - vec3), (tmp_buf + 29 * 8));
+ ST_SH((loc1 - vec2), (tmp_buf + 21 * 8));
+ ST_SH((loc2 - vec1), (tmp_buf + 25 * 8));
+ ST_SH((loc3 - vec0), (tmp_buf + 17 * 8));
/* Load 8 & Store 8 */
- vec0 = LOAD_SH(tmp_odd_buf + 2 * 8);
- vec1 = LOAD_SH(tmp_odd_buf + 11 * 8);
- vec2 = LOAD_SH(tmp_odd_buf + 12 * 8);
- vec3 = LOAD_SH(tmp_odd_buf + 7 * 8);
- loc0 = LOAD_SH(tmp_eve_buf + 1 * 8);
- loc1 = LOAD_SH(tmp_eve_buf + 9 * 8);
- loc2 = LOAD_SH(tmp_eve_buf + 5 * 8);
- loc3 = LOAD_SH(tmp_eve_buf + 13 * 8);
-
- n0 = (loc0 + vec3);
- STORE_SH((loc0 - vec3), (tmp_buf + 30 * 8));
- STORE_SH((loc1 - vec2), (tmp_buf + 22 * 8));
- n4 = (loc1 + vec2);
- STORE_SH((loc2 - vec1), (tmp_buf + 26 * 8));
- n2 = (loc2 + vec1);
- STORE_SH((loc3 - vec0), (tmp_buf + 18 * 8));
- n6 = (loc3 + vec0);
+ vec0 = LD_SH(tmp_odd_buf + 2 * 8);
+ vec1 = LD_SH(tmp_odd_buf + 11 * 8);
+ vec2 = LD_SH(tmp_odd_buf + 12 * 8);
+ vec3 = LD_SH(tmp_odd_buf + 7 * 8);
+ loc0 = LD_SH(tmp_eve_buf + 1 * 8);
+ loc1 = LD_SH(tmp_eve_buf + 9 * 8);
+ loc2 = LD_SH(tmp_eve_buf + 5 * 8);
+ loc3 = LD_SH(tmp_eve_buf + 13 * 8);
+
+ ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, n0, n4, n2, n6);
+
+ ST_SH((loc0 - vec3), (tmp_buf + 30 * 8));
+ ST_SH((loc1 - vec2), (tmp_buf + 22 * 8));
+ ST_SH((loc2 - vec1), (tmp_buf + 26 * 8));
+ ST_SH((loc3 - vec0), (tmp_buf + 18 * 8));
/* Load 8 & Store 8 */
- vec0 = LOAD_SH(tmp_odd_buf + 5 * 8);
- vec1 = LOAD_SH(tmp_odd_buf + 15 * 8);
- vec2 = LOAD_SH(tmp_odd_buf + 8 * 8);
- vec3 = LOAD_SH(tmp_odd_buf + 1 * 8);
- loc0 = LOAD_SH(tmp_eve_buf + 3 * 8);
- loc1 = LOAD_SH(tmp_eve_buf + 11 * 8);
- loc2 = LOAD_SH(tmp_eve_buf + 7 * 8);
- loc3 = LOAD_SH(tmp_eve_buf + 15 * 8);
-
- n1 = (loc0 + vec3);
- STORE_SH((loc0 - vec3), (tmp_buf + 28 * 8));
- STORE_SH((loc1 - vec2), (tmp_buf + 20 * 8));
- n5 = (loc1 + vec2);
- STORE_SH((loc2 - vec1), (tmp_buf + 24 * 8));
- n3 = (loc2 + vec1);
- STORE_SH((loc3 - vec0), (tmp_buf + 16 * 8));
- n7 = (loc3 + vec0);
+ vec0 = LD_SH(tmp_odd_buf + 5 * 8);
+ vec1 = LD_SH(tmp_odd_buf + 15 * 8);
+ vec2 = LD_SH(tmp_odd_buf + 8 * 8);
+ vec3 = LD_SH(tmp_odd_buf + 1 * 8);
+ loc0 = LD_SH(tmp_eve_buf + 3 * 8);
+ loc1 = LD_SH(tmp_eve_buf + 11 * 8);
+ loc2 = LD_SH(tmp_eve_buf + 7 * 8);
+ loc3 = LD_SH(tmp_eve_buf + 15 * 8);
+
+ ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, n1, n5, n3, n7);
+
+ ST_SH((loc0 - vec3), (tmp_buf + 28 * 8));
+ ST_SH((loc1 - vec2), (tmp_buf + 20 * 8));
+ ST_SH((loc2 - vec1), (tmp_buf + 24 * 8));
+ ST_SH((loc3 - vec0), (tmp_buf + 16 * 8));
/* Transpose : 16 vectors */
/* 1st & 2nd 8x8 */
- TRANSPOSE8x8_H_SH(m0, n0, m1, n1, m2, n2, m3, n3,
- m0, n0, m1, n1, m2, n2, m3, n3);
- STORE_4VECS_SH((dest + 0), 32, m0, n0, m1, n1);
- STORE_4VECS_SH((dest + 4 * 32), 32, m2, n2, m3, n3);
+ TRANSPOSE8x8_SH_SH(m0, n0, m1, n1, m2, n2, m3, n3,
+ m0, n0, m1, n1, m2, n2, m3, n3);
+ ST_SH4(m0, n0, m1, n1, (dst + 0), 32);
+ ST_SH4(m2, n2, m3, n3, (dst + 4 * 32), 32);
- TRANSPOSE8x8_H_SH(m4, n4, m5, n5, m6, n6, m7, n7,
- m4, n4, m5, n5, m6, n6, m7, n7);
- STORE_4VECS_SH((dest + 8), 32, m4, n4, m5, n5);
- STORE_4VECS_SH((dest + 8 + 4 * 32), 32, m6, n6, m7, n7);
+ TRANSPOSE8x8_SH_SH(m4, n4, m5, n5, m6, n6, m7, n7,
+ m4, n4, m5, n5, m6, n6, m7, n7);
+ ST_SH4(m4, n4, m5, n5, (dst + 8), 32);
+ ST_SH4(m6, n6, m7, n7, (dst + 8 + 4 * 32), 32);
/* 3rd & 4th 8x8 */
- LOAD_8VECS_SH((tmp_buf + 8 * 16), 8, m0, n0, m1, n1, m2, n2, m3, n3);
- LOAD_8VECS_SH((tmp_buf + 12 * 16), 8, m4, n4, m5, n5, m6, n6, m7, n7);
- TRANSPOSE8x8_H_SH(m0, n0, m1, n1, m2, n2, m3, n3,
- m0, n0, m1, n1, m2, n2, m3, n3);
- STORE_4VECS_SH((dest + 16), 32, m0, n0, m1, n1);
- STORE_4VECS_SH((dest + 16 + 4 * 32), 32, m2, n2, m3, n3);
-
- TRANSPOSE8x8_H_SH(m4, n4, m5, n5, m6, n6, m7, n7,
- m4, n4, m5, n5, m6, n6, m7, n7);
- STORE_4VECS_SH((dest + 24), 32, m4, n4, m5, n5);
- STORE_4VECS_SH((dest + 24 + 4 * 32), 32, m6, n6, m7, n7);
+ LD_SH8((tmp_buf + 8 * 16), 8, m0, n0, m1, n1, m2, n2, m3, n3);
+ LD_SH8((tmp_buf + 12 * 16), 8, m4, n4, m5, n5, m6, n6, m7, n7);
+ TRANSPOSE8x8_SH_SH(m0, n0, m1, n1, m2, n2, m3, n3,
+ m0, n0, m1, n1, m2, n2, m3, n3);
+ ST_SH4(m0, n0, m1, n1, (dst + 16), 32);
+ ST_SH4(m2, n2, m3, n3, (dst + 16 + 4 * 32), 32);
+
+ TRANSPOSE8x8_SH_SH(m4, n4, m5, n5, m6, n6, m7, n7,
+ m4, n4, m5, n5, m6, n6, m7, n7);
+ ST_SH4(m4, n4, m5, n5, (dst + 24), 32);
+ ST_SH4(m6, n6, m7, n7, (dst + 24 + 4 * 32), 32);
}
static void vp9_idct32x8_1d_rows_msa(const int16_t *input, int16_t *output) {
@@ -521,11 +347,8 @@ static void vp9_idct32x8_1d_rows_msa(const int16_t *input, int16_t *output) {
DECLARE_ALIGNED(32, int16_t, tmp_eve_buf[16 * 8]);
vp9_idct32x8_row_transpose_store(input, &tmp_buf[0]);
-
vp9_idct32x8_row_even_process_store(&tmp_buf[0], &tmp_eve_buf[0]);
-
vp9_idct32x8_row_odd_process_store(&tmp_buf[0], &tmp_odd_buf[0]);
-
vp9_idct_butterfly_transpose_store(&tmp_buf[0], &tmp_eve_buf[0],
&tmp_odd_buf[0], output);
}
@@ -537,48 +360,31 @@ static void vp9_idct8x32_column_even_process_store(int16_t *tmp_buf,
v8i16 stp0, stp1, stp2, stp3, stp4, stp5, stp6, stp7;
/* Even stage 1 */
- LOAD_8VECS_SH(tmp_buf, (4 * 32),
- reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7);
+ LD_SH8(tmp_buf, (4 * 32), reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7);
+ tmp_buf += (2 * 32);
- DOTP_CONST_PAIR(reg1, reg7, cospi_28_64, cospi_4_64, reg1, reg7);
- DOTP_CONST_PAIR(reg5, reg3, cospi_12_64, cospi_20_64, reg5, reg3);
-
- vec0 = reg1 - reg5;
- vec1 = reg1 + reg5;
- vec2 = reg7 - reg3;
- vec3 = reg7 + reg3;
-
- DOTP_CONST_PAIR(vec2, vec0, cospi_16_64, cospi_16_64, loc2, loc3);
+ VP9_DOTP_CONST_PAIR(reg1, reg7, cospi_28_64, cospi_4_64, reg1, reg7);
+ VP9_DOTP_CONST_PAIR(reg5, reg3, cospi_12_64, cospi_20_64, reg5, reg3);
+ BUTTERFLY_4(reg1, reg7, reg3, reg5, vec1, vec3, vec2, vec0);
+ VP9_DOTP_CONST_PAIR(vec2, vec0, cospi_16_64, cospi_16_64, loc2, loc3);
loc1 = vec3;
loc0 = vec1;
- DOTP_CONST_PAIR(reg0, reg4, cospi_16_64, cospi_16_64, reg0, reg4);
- DOTP_CONST_PAIR(reg2, reg6, cospi_24_64, cospi_8_64, reg2, reg6);
-
- vec0 = reg4 - reg6;
- vec1 = reg4 + reg6;
- vec2 = reg0 - reg2;
- vec3 = reg0 + reg2;
-
- stp4 = vec0 - loc0;
- stp3 = vec0 + loc0;
- stp7 = vec1 - loc1;
- stp0 = vec1 + loc1;
- stp5 = vec2 - loc2;
- stp2 = vec2 + loc2;
- stp6 = vec3 - loc3;
- stp1 = vec3 + loc3;
+ VP9_DOTP_CONST_PAIR(reg0, reg4, cospi_16_64, cospi_16_64, reg0, reg4);
+ VP9_DOTP_CONST_PAIR(reg2, reg6, cospi_24_64, cospi_8_64, reg2, reg6);
+ BUTTERFLY_4(reg4, reg0, reg2, reg6, vec1, vec3, vec2, vec0);
+ BUTTERFLY_4(vec0, vec1, loc1, loc0, stp3, stp0, stp7, stp4);
+ BUTTERFLY_4(vec2, vec3, loc3, loc2, stp2, stp1, stp6, stp5);
/* Even stage 2 */
/* Load 8 */
- LOAD_8VECS_SH((tmp_buf + 2 * 32), (4 * 32),
- reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7);
+ LD_SH8(tmp_buf, (4 * 32), reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7);
- DOTP_CONST_PAIR(reg0, reg7, cospi_30_64, cospi_2_64, reg0, reg7);
- DOTP_CONST_PAIR(reg4, reg3, cospi_14_64, cospi_18_64, reg4, reg3);
- DOTP_CONST_PAIR(reg2, reg5, cospi_22_64, cospi_10_64, reg2, reg5);
- DOTP_CONST_PAIR(reg6, reg1, cospi_6_64, cospi_26_64, reg6, reg1);
+ VP9_DOTP_CONST_PAIR(reg0, reg7, cospi_30_64, cospi_2_64, reg0, reg7);
+ VP9_DOTP_CONST_PAIR(reg4, reg3, cospi_14_64, cospi_18_64, reg4, reg3);
+ VP9_DOTP_CONST_PAIR(reg2, reg5, cospi_22_64, cospi_10_64, reg2, reg5);
+ VP9_DOTP_CONST_PAIR(reg6, reg1, cospi_6_64, cospi_26_64, reg6, reg1);
vec0 = reg0 + reg4;
reg0 = reg0 - reg4;
@@ -596,55 +402,35 @@ static void vp9_idct8x32_column_even_process_store(int16_t *tmp_buf,
reg4 = reg5 - vec1;
reg5 = reg5 + vec1;
- DOTP_CONST_PAIR(reg7, reg0, cospi_24_64, cospi_8_64, reg0, reg7);
- DOTP_CONST_PAIR((-reg6), reg1, cospi_24_64, cospi_8_64, reg6, reg1);
+ VP9_DOTP_CONST_PAIR(reg7, reg0, cospi_24_64, cospi_8_64, reg0, reg7);
+ VP9_DOTP_CONST_PAIR((-reg6), reg1, cospi_24_64, cospi_8_64, reg6, reg1);
vec0 = reg0 - reg6;
reg0 = reg0 + reg6;
vec1 = reg7 - reg1;
reg7 = reg7 + reg1;
- DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, reg6, reg1);
- DOTP_CONST_PAIR(reg4, reg3, cospi_16_64, cospi_16_64, reg3, reg4);
+ VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, reg6, reg1);
+ VP9_DOTP_CONST_PAIR(reg4, reg3, cospi_16_64, cospi_16_64, reg3, reg4);
/* Even stage 3 : Dependency on Even stage 1 & Even stage 2 */
/* Store 8 */
- loc0 = stp0 - reg5;
- loc1 = stp0 + reg5;
- loc2 = stp1 - reg7;
- loc3 = stp1 + reg7;
- STORE_SH(loc0, (tmp_eve_buf + 15 * 8));
- STORE_SH(loc1, (tmp_eve_buf));
- STORE_SH(loc2, (tmp_eve_buf + 14 * 8));
- STORE_SH(loc3, (tmp_eve_buf + 1 * 8));
-
- loc0 = stp2 - reg1;
- loc1 = stp2 + reg1;
- loc2 = stp3 - reg4;
- loc3 = stp3 + reg4;
- STORE_SH(loc0, (tmp_eve_buf + 13 * 8));
- STORE_SH(loc1, (tmp_eve_buf + 2 * 8));
- STORE_SH(loc2, (tmp_eve_buf + 12 * 8));
- STORE_SH(loc3, (tmp_eve_buf + 3 * 8));
+ BUTTERFLY_4(stp0, stp1, reg7, reg5, loc1, loc3, loc2, loc0);
+ ST_SH2(loc1, loc3, tmp_eve_buf, 8);
+ ST_SH2(loc2, loc0, (tmp_eve_buf + 14 * 8), 8);
+
+ BUTTERFLY_4(stp2, stp3, reg4, reg1, loc1, loc3, loc2, loc0);
+ ST_SH2(loc1, loc3, (tmp_eve_buf + 2 * 8), 8);
+ ST_SH2(loc2, loc0, (tmp_eve_buf + 12 * 8), 8);
/* Store 8 */
- loc0 = stp4 - reg3;
- loc1 = stp4 + reg3;
- loc2 = stp5 - reg6;
- loc3 = stp5 + reg6;
- STORE_SH(loc0, (tmp_eve_buf + 11 * 8));
- STORE_SH(loc1, (tmp_eve_buf + 4 * 8));
- STORE_SH(loc2, (tmp_eve_buf + 10 * 8));
- STORE_SH(loc3, (tmp_eve_buf + 5 * 8));
-
- loc0 = stp6 - reg0;
- loc1 = stp6 + reg0;
- loc2 = stp7 - reg2;
- loc3 = stp7 + reg2;
- STORE_SH(loc0, (tmp_eve_buf + 9 * 8));
- STORE_SH(loc1, (tmp_eve_buf + 6 * 8));
- STORE_SH(loc2, (tmp_eve_buf + 8 * 8));
- STORE_SH(loc3, (tmp_eve_buf + 7 * 8));
+ BUTTERFLY_4(stp4, stp5, reg6, reg3, loc1, loc3, loc2, loc0);
+ ST_SH2(loc1, loc3, (tmp_eve_buf + 4 * 8), 8);
+ ST_SH2(loc2, loc0, (tmp_eve_buf + 10 * 8), 8);
+
+ BUTTERFLY_4(stp6, stp7, reg2, reg0, loc1, loc3, loc2, loc0);
+ ST_SH2(loc1, loc3, (tmp_eve_buf + 6 * 8), 8);
+ ST_SH2(loc2, loc0, (tmp_eve_buf + 8 * 8), 8);
}
static void vp9_idct8x32_column_odd_process_store(int16_t *tmp_buf,
@@ -653,19 +439,19 @@ static void vp9_idct8x32_column_odd_process_store(int16_t *tmp_buf,
v8i16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;
/* Odd stage 1 */
- reg0 = LOAD_SH(tmp_buf + 32);
- reg1 = LOAD_SH(tmp_buf + 7 * 32);
- reg2 = LOAD_SH(tmp_buf + 9 * 32);
- reg3 = LOAD_SH(tmp_buf + 15 * 32);
- reg4 = LOAD_SH(tmp_buf + 17 * 32);
- reg5 = LOAD_SH(tmp_buf + 23 * 32);
- reg6 = LOAD_SH(tmp_buf + 25 * 32);
- reg7 = LOAD_SH(tmp_buf + 31 * 32);
-
- DOTP_CONST_PAIR(reg0, reg7, cospi_31_64, cospi_1_64, reg0, reg7);
- DOTP_CONST_PAIR(reg4, reg3, cospi_15_64, cospi_17_64, reg3, reg4);
- DOTP_CONST_PAIR(reg2, reg5, cospi_23_64, cospi_9_64, reg2, reg5);
- DOTP_CONST_PAIR(reg6, reg1, cospi_7_64, cospi_25_64, reg1, reg6);
+ reg0 = LD_SH(tmp_buf + 32);
+ reg1 = LD_SH(tmp_buf + 7 * 32);
+ reg2 = LD_SH(tmp_buf + 9 * 32);
+ reg3 = LD_SH(tmp_buf + 15 * 32);
+ reg4 = LD_SH(tmp_buf + 17 * 32);
+ reg5 = LD_SH(tmp_buf + 23 * 32);
+ reg6 = LD_SH(tmp_buf + 25 * 32);
+ reg7 = LD_SH(tmp_buf + 31 * 32);
+
+ VP9_DOTP_CONST_PAIR(reg0, reg7, cospi_31_64, cospi_1_64, reg0, reg7);
+ VP9_DOTP_CONST_PAIR(reg4, reg3, cospi_15_64, cospi_17_64, reg3, reg4);
+ VP9_DOTP_CONST_PAIR(reg2, reg5, cospi_23_64, cospi_9_64, reg2, reg5);
+ VP9_DOTP_CONST_PAIR(reg6, reg1, cospi_7_64, cospi_25_64, reg1, reg6);
vec0 = reg0 + reg3;
reg0 = reg0 - reg3;
@@ -678,278 +464,182 @@ static void vp9_idct8x32_column_odd_process_store(int16_t *tmp_buf,
reg5 = vec0;
/* 4 Stores */
- vec0 = reg5 + reg4;
- vec1 = reg3 + reg2;
- STORE_SH(vec0, (tmp_odd_buf + 4 * 8));
- STORE_SH(vec1, (tmp_odd_buf + 5 * 8));
-
- vec0 = reg5 - reg4;
- vec1 = reg3 - reg2;
- DOTP_CONST_PAIR(vec1, vec0, cospi_24_64, cospi_8_64, vec0, vec1);
- STORE_SH(vec0, (tmp_odd_buf));
- STORE_SH(vec1, (tmp_odd_buf + 1 * 8));
+ ADD2(reg5, reg4, reg3, reg2, vec0, vec1);
+ ST_SH2(vec0, vec1, (tmp_odd_buf + 4 * 8), 8);
+ SUB2(reg5, reg4, reg3, reg2, vec0, vec1);
+ VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_24_64, cospi_8_64, vec0, vec1);
+ ST_SH2(vec0, vec1, tmp_odd_buf, 8);
/* 4 Stores */
- DOTP_CONST_PAIR(reg7, reg0, cospi_28_64, cospi_4_64, reg0, reg7);
- DOTP_CONST_PAIR(reg6, reg1, -cospi_4_64, cospi_28_64, reg1, reg6);
-
- vec0 = reg0 + reg1;
- vec2 = reg7 - reg6;
- vec1 = reg7 + reg6;
- vec3 = reg0 - reg1;
- STORE_SH(vec0, (tmp_odd_buf + 6 * 8));
- STORE_SH(vec1, (tmp_odd_buf + 7 * 8));
-
- DOTP_CONST_PAIR(vec2, vec3, cospi_24_64, cospi_8_64, vec2, vec3);
- STORE_SH(vec2, (tmp_odd_buf + 2 * 8));
- STORE_SH(vec3, (tmp_odd_buf + 3 * 8));
+ VP9_DOTP_CONST_PAIR(reg7, reg0, cospi_28_64, cospi_4_64, reg0, reg7);
+ VP9_DOTP_CONST_PAIR(reg6, reg1, -cospi_4_64, cospi_28_64, reg1, reg6);
+ BUTTERFLY_4(reg0, reg7, reg6, reg1, vec0, vec1, vec2, vec3);
+ ST_SH2(vec0, vec1, (tmp_odd_buf + 6 * 8), 8);
+ VP9_DOTP_CONST_PAIR(vec2, vec3, cospi_24_64, cospi_8_64, vec2, vec3);
+ ST_SH2(vec2, vec3, (tmp_odd_buf + 2 * 8), 8);
/* Odd stage 2 */
/* 8 loads */
- reg0 = LOAD_SH(tmp_buf + 3 * 32);
- reg1 = LOAD_SH(tmp_buf + 5 * 32);
- reg2 = LOAD_SH(tmp_buf + 11 * 32);
- reg3 = LOAD_SH(tmp_buf + 13 * 32);
- reg4 = LOAD_SH(tmp_buf + 19 * 32);
- reg5 = LOAD_SH(tmp_buf + 21 * 32);
- reg6 = LOAD_SH(tmp_buf + 27 * 32);
- reg7 = LOAD_SH(tmp_buf + 29 * 32);
-
- DOTP_CONST_PAIR(reg1, reg6, cospi_27_64, cospi_5_64, reg1, reg6);
- DOTP_CONST_PAIR(reg5, reg2, cospi_11_64, cospi_21_64, reg2, reg5);
- DOTP_CONST_PAIR(reg3, reg4, cospi_19_64, cospi_13_64, reg3, reg4);
- DOTP_CONST_PAIR(reg7, reg0, cospi_3_64, cospi_29_64, reg0, reg7);
+ reg0 = LD_SH(tmp_buf + 3 * 32);
+ reg1 = LD_SH(tmp_buf + 5 * 32);
+ reg2 = LD_SH(tmp_buf + 11 * 32);
+ reg3 = LD_SH(tmp_buf + 13 * 32);
+ reg4 = LD_SH(tmp_buf + 19 * 32);
+ reg5 = LD_SH(tmp_buf + 21 * 32);
+ reg6 = LD_SH(tmp_buf + 27 * 32);
+ reg7 = LD_SH(tmp_buf + 29 * 32);
+
+ VP9_DOTP_CONST_PAIR(reg1, reg6, cospi_27_64, cospi_5_64, reg1, reg6);
+ VP9_DOTP_CONST_PAIR(reg5, reg2, cospi_11_64, cospi_21_64, reg2, reg5);
+ VP9_DOTP_CONST_PAIR(reg3, reg4, cospi_19_64, cospi_13_64, reg3, reg4);
+ VP9_DOTP_CONST_PAIR(reg7, reg0, cospi_3_64, cospi_29_64, reg0, reg7);
/* 4 Stores */
- vec0 = reg1 - reg2;
- vec1 = reg6 - reg5;
- vec2 = reg0 - reg3;
- vec3 = reg7 - reg4;
- DOTP_CONST_PAIR(vec1, vec0, cospi_12_64, cospi_20_64, loc0, loc1);
- DOTP_CONST_PAIR(vec3, vec2, -cospi_20_64, cospi_12_64, loc2, loc3);
+ SUB4(reg1, reg2, reg6, reg5, reg0, reg3, reg7, reg4, vec0, vec1, vec2, vec3);
+ VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_12_64, cospi_20_64, loc0, loc1);
+ VP9_DOTP_CONST_PAIR(vec3, vec2, -cospi_20_64, cospi_12_64, loc2, loc3);
+ BUTTERFLY_4(loc2, loc3, loc1, loc0, vec0, vec1, vec3, vec2);
+ ST_SH2(vec0, vec1, (tmp_odd_buf + 12 * 8), 3 * 8);
+ VP9_DOTP_CONST_PAIR(vec3, vec2, -cospi_8_64, cospi_24_64, vec0, vec1);
+ ST_SH2(vec0, vec1, (tmp_odd_buf + 10 * 8), 8);
- vec2 = loc2 - loc0;
- vec3 = loc3 - loc1;
- vec0 = loc2 + loc0;
- vec1 = loc3 + loc1;
- STORE_SH(vec0, (tmp_odd_buf + 12 * 8));
- STORE_SH(vec1, (tmp_odd_buf + 15 * 8));
-
- DOTP_CONST_PAIR(vec3, vec2, -cospi_8_64, cospi_24_64, vec0, vec1);
+ /* 4 Stores */
+ ADD4(reg0, reg3, reg1, reg2, reg5, reg6, reg4, reg7, vec0, vec1, vec2, vec3);
+ BUTTERFLY_4(vec0, vec3, vec2, vec1, reg0, reg1, reg3, reg2);
+ ST_SH2(reg0, reg1, (tmp_odd_buf + 13 * 8), 8);
+ VP9_DOTP_CONST_PAIR(reg3, reg2, -cospi_8_64, cospi_24_64, reg0, reg1);
+ ST_SH2(reg0, reg1, (tmp_odd_buf + 8 * 8), 8);
- STORE_SH(vec0, (tmp_odd_buf + 10 * 8));
- STORE_SH(vec1, (tmp_odd_buf + 11 * 8));
+ /* Odd stage 3 : Dependency on Odd stage 1 & Odd stage 2 */
+ /* Load 8 & Store 8 */
+ LD_SH4(tmp_odd_buf, 8, reg0, reg1, reg2, reg3);
+ LD_SH4((tmp_odd_buf + 8 * 8), 8, reg4, reg5, reg6, reg7);
- /* 4 Stores */
- vec0 = reg0 + reg3;
- vec1 = reg1 + reg2;
- vec2 = reg6 + reg5;
- vec3 = reg7 + reg4;
- reg0 = vec0 + vec1;
- reg1 = vec3 + vec2;
- reg2 = vec0 - vec1;
- reg3 = vec3 - vec2;
- STORE_SH(reg0, (tmp_odd_buf + 13 * 8));
- STORE_SH(reg1, (tmp_odd_buf + 14 * 8));
+ ADD4(reg0, reg4, reg1, reg5, reg2, reg6, reg3, reg7, loc0, loc1, loc2, loc3);
+ ST_SH4(loc0, loc1, loc2, loc3, tmp_odd_buf, 8);
- DOTP_CONST_PAIR(reg3, reg2, -cospi_8_64, cospi_24_64, reg0, reg1);
+ SUB2(reg0, reg4, reg1, reg5, vec0, vec1);
+ VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc0, loc1);
- STORE_SH(reg0, (tmp_odd_buf + 8 * 8));
- STORE_SH(reg1, (tmp_odd_buf + 9 * 8));
+ SUB2(reg2, reg6, reg3, reg7, vec0, vec1);
+ VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc2, loc3);
+ ST_SH4(loc0, loc1, loc2, loc3, (tmp_odd_buf + 8 * 8), 8);
- /* Odd stage 3 : Dependency on Odd stage 1 & Odd stage 2 */
/* Load 8 & Store 8 */
- reg0 = LOAD_SH(tmp_odd_buf);
- reg1 = LOAD_SH(tmp_odd_buf + 1 * 8);
- reg2 = LOAD_SH(tmp_odd_buf + 2 * 8);
- reg3 = LOAD_SH(tmp_odd_buf + 3 * 8);
- reg4 = LOAD_SH(tmp_odd_buf + 8 * 8);
- reg5 = LOAD_SH(tmp_odd_buf + 9 * 8);
- reg6 = LOAD_SH(tmp_odd_buf + 10 * 8);
- reg7 = LOAD_SH(tmp_odd_buf + 11 * 8);
-
- loc0 = reg0 + reg4;
- loc1 = reg1 + reg5;
- loc2 = reg2 + reg6;
- loc3 = reg3 + reg7;
- STORE_SH(loc0, (tmp_odd_buf));
- STORE_SH(loc1, (tmp_odd_buf + 1 * 8));
- STORE_SH(loc2, (tmp_odd_buf + 2 * 8));
- STORE_SH(loc3, (tmp_odd_buf + 3 * 8));
-
- vec0 = reg0 - reg4;
- vec1 = reg1 - reg5;
- DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc0, loc1);
-
- vec0 = reg2 - reg6;
- vec1 = reg3 - reg7;
- DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc2, loc3);
-
- STORE_SH(loc0, (tmp_odd_buf + 8 * 8));
- STORE_SH(loc1, (tmp_odd_buf + 9 * 8));
- STORE_SH(loc2, (tmp_odd_buf + 10 * 8));
- STORE_SH(loc3, (tmp_odd_buf + 11 * 8));
+ LD_SH4((tmp_odd_buf + 4 * 8), 8, reg1, reg2, reg0, reg3);
+ LD_SH4((tmp_odd_buf + 12 * 8), 8, reg4, reg5, reg6, reg7);
- /* Load 8 & Store 8 */
- reg1 = LOAD_SH(tmp_odd_buf + 4 * 8);
- reg2 = LOAD_SH(tmp_odd_buf + 5 * 8);
- reg0 = LOAD_SH(tmp_odd_buf + 6 * 8);
- reg3 = LOAD_SH(tmp_odd_buf + 7 * 8);
- reg4 = LOAD_SH(tmp_odd_buf + 12 * 8);
- reg5 = LOAD_SH(tmp_odd_buf + 13 * 8);
- reg6 = LOAD_SH(tmp_odd_buf + 14 * 8);
- reg7 = LOAD_SH(tmp_odd_buf + 15 * 8);
-
- loc0 = reg0 + reg4;
- loc1 = reg1 + reg5;
- loc2 = reg2 + reg6;
- loc3 = reg3 + reg7;
- STORE_SH(loc0, (tmp_odd_buf + 4 * 8));
- STORE_SH(loc1, (tmp_odd_buf + 5 * 8));
- STORE_SH(loc2, (tmp_odd_buf + 6 * 8));
- STORE_SH(loc3, (tmp_odd_buf + 7 * 8));
-
- vec0 = reg0 - reg4;
- vec1 = reg3 - reg7;
- DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc0, loc1);
-
- vec0 = reg1 - reg5;
- vec1 = reg2 - reg6;
- DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc2, loc3);
-
- STORE_SH(loc0, (tmp_odd_buf + 12 * 8));
- STORE_SH(loc1, (tmp_odd_buf + 13 * 8));
- STORE_SH(loc2, (tmp_odd_buf + 14 * 8));
- STORE_SH(loc3, (tmp_odd_buf + 15 * 8));
+ ADD4(reg0, reg4, reg1, reg5, reg2, reg6, reg3, reg7, loc0, loc1, loc2, loc3);
+ ST_SH4(loc0, loc1, loc2, loc3, (tmp_odd_buf + 4 * 8), 8);
+
+ SUB2(reg0, reg4, reg3, reg7, vec0, vec1);
+ VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc0, loc1);
+
+ SUB2(reg1, reg5, reg2, reg6, vec0, vec1);
+ VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc2, loc3);
+ ST_SH4(loc0, loc1, loc2, loc3, (tmp_odd_buf + 12 * 8), 8);
}
static void vp9_idct8x32_column_butterfly_addblk(int16_t *tmp_eve_buf,
int16_t *tmp_odd_buf,
- uint8_t *dest,
- int32_t dest_stride) {
+ uint8_t *dst,
+ int32_t dst_stride) {
v8i16 vec0, vec1, vec2, vec3, loc0, loc1, loc2, loc3;
- v8i16 m0, m1, m2, m3, m4, m5, m6, m7;
- v8i16 n0, n1, n2, n3, n4, n5, n6, n7;
+ v8i16 m0, m1, m2, m3, m4, m5, m6, m7, n0, n1, n2, n3, n4, n5, n6, n7;
/* FINAL BUTTERFLY : Dependency on Even & Odd */
- vec0 = LOAD_SH(tmp_odd_buf);
- vec1 = LOAD_SH(tmp_odd_buf + 9 * 8);
- vec2 = LOAD_SH(tmp_odd_buf + 14 * 8);
- vec3 = LOAD_SH(tmp_odd_buf + 6 * 8);
- loc0 = LOAD_SH(tmp_eve_buf);
- loc1 = LOAD_SH(tmp_eve_buf + 8 * 8);
- loc2 = LOAD_SH(tmp_eve_buf + 4 * 8);
- loc3 = LOAD_SH(tmp_eve_buf + 12 * 8);
-
- m0 = (loc0 + vec3);
- m4 = (loc1 + vec2);
- m2 = (loc2 + vec1);
- m6 = (loc3 + vec0);
- SRARI_H_4VECS_SH(m0, m2, m4, m6, m0, m2, m4, m6, 6);
- VP9_ADDBLK_CLIP_AND_STORE_OFF_4H_VECS(dest, dest_stride, m0, m2, m4, m6);
-
- m6 = (loc0 - vec3);
- m2 = (loc1 - vec2);
- m4 = (loc2 - vec1);
- m0 = (loc3 - vec0);
- SRARI_H_4VECS_SH(m0, m2, m4, m6, m0, m2, m4, m6, 6);
- VP9_ADDBLK_CLIP_AND_STORE_OFF_4H_VECS((dest + 19 * dest_stride),
- dest_stride, m0, m2, m4, m6);
+ vec0 = LD_SH(tmp_odd_buf);
+ vec1 = LD_SH(tmp_odd_buf + 9 * 8);
+ vec2 = LD_SH(tmp_odd_buf + 14 * 8);
+ vec3 = LD_SH(tmp_odd_buf + 6 * 8);
+ loc0 = LD_SH(tmp_eve_buf);
+ loc1 = LD_SH(tmp_eve_buf + 8 * 8);
+ loc2 = LD_SH(tmp_eve_buf + 4 * 8);
+ loc3 = LD_SH(tmp_eve_buf + 12 * 8);
+
+ ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, m0, m4, m2, m6);
+ SRARI_H4_SH(m0, m2, m4, m6, 6);
+ VP9_ADDBLK_ST8x4_UB(dst, (4 * dst_stride), m0, m2, m4, m6);
+
+ SUB4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, m6, m2, m4, m0);
+ SRARI_H4_SH(m0, m2, m4, m6, 6);
+ VP9_ADDBLK_ST8x4_UB((dst + 19 * dst_stride), (4 * dst_stride),
+ m0, m2, m4, m6);
/* Load 8 & Store 8 */
- vec0 = LOAD_SH(tmp_odd_buf + 4 * 8);
- vec1 = LOAD_SH(tmp_odd_buf + 13 * 8);
- vec2 = LOAD_SH(tmp_odd_buf + 10 * 8);
- vec3 = LOAD_SH(tmp_odd_buf + 3 * 8);
- loc0 = LOAD_SH(tmp_eve_buf + 2 * 8);
- loc1 = LOAD_SH(tmp_eve_buf + 10 * 8);
- loc2 = LOAD_SH(tmp_eve_buf + 6 * 8);
- loc3 = LOAD_SH(tmp_eve_buf + 14 * 8);
-
- m1 = (loc0 + vec3);
- m5 = (loc1 + vec2);
- m3 = (loc2 + vec1);
- m7 = (loc3 + vec0);
- SRARI_H_4VECS_SH(m1, m3, m5, m7, m1, m3, m5, m7, 6);
- VP9_ADDBLK_CLIP_AND_STORE_OFF_4H_VECS((dest + 2 * dest_stride),
- dest_stride, m1, m3, m5, m7);
-
- m7 = (loc0 - vec3);
- m3 = (loc1 - vec2);
- m5 = (loc2 - vec1);
- m1 = (loc3 - vec0);
- SRARI_H_4VECS_SH(m1, m3, m5, m7, m1, m3, m5, m7, 6);
- VP9_ADDBLK_CLIP_AND_STORE_OFF_4H_VECS((dest + 17 * dest_stride),
- dest_stride, m1, m3, m5, m7);
+ vec0 = LD_SH(tmp_odd_buf + 4 * 8);
+ vec1 = LD_SH(tmp_odd_buf + 13 * 8);
+ vec2 = LD_SH(tmp_odd_buf + 10 * 8);
+ vec3 = LD_SH(tmp_odd_buf + 3 * 8);
+ loc0 = LD_SH(tmp_eve_buf + 2 * 8);
+ loc1 = LD_SH(tmp_eve_buf + 10 * 8);
+ loc2 = LD_SH(tmp_eve_buf + 6 * 8);
+ loc3 = LD_SH(tmp_eve_buf + 14 * 8);
+
+ ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, m1, m5, m3, m7);
+ SRARI_H4_SH(m1, m3, m5, m7, 6);
+ VP9_ADDBLK_ST8x4_UB((dst + 2 * dst_stride), (4 * dst_stride),
+ m1, m3, m5, m7);
+
+ SUB4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, m7, m3, m5, m1);
+ SRARI_H4_SH(m1, m3, m5, m7, 6);
+ VP9_ADDBLK_ST8x4_UB((dst + 17 * dst_stride), (4 * dst_stride),
+ m1, m3, m5, m7);
/* Load 8 & Store 8 */
- vec0 = LOAD_SH(tmp_odd_buf + 2 * 8);
- vec1 = LOAD_SH(tmp_odd_buf + 11 * 8);
- vec2 = LOAD_SH(tmp_odd_buf + 12 * 8);
- vec3 = LOAD_SH(tmp_odd_buf + 7 * 8);
- loc0 = LOAD_SH(tmp_eve_buf + 1 * 8);
- loc1 = LOAD_SH(tmp_eve_buf + 9 * 8);
- loc2 = LOAD_SH(tmp_eve_buf + 5 * 8);
- loc3 = LOAD_SH(tmp_eve_buf + 13 * 8);
-
- n0 = (loc0 + vec3);
- n4 = (loc1 + vec2);
- n2 = (loc2 + vec1);
- n6 = (loc3 + vec0);
- SRARI_H_4VECS_SH(n0, n2, n4, n6, n0, n2, n4, n6, 6);
- VP9_ADDBLK_CLIP_AND_STORE_OFF_4H_VECS((dest + 1 * dest_stride),
- dest_stride, n0, n2, n4, n6);
-
- n6 = (loc0 - vec3);
- n2 = (loc1 - vec2);
- n4 = (loc2 - vec1);
- n0 = (loc3 - vec0);
- SRARI_H_4VECS_SH(n0, n2, n4, n6, n0, n2, n4, n6, 6);
- VP9_ADDBLK_CLIP_AND_STORE_OFF_4H_VECS((dest + 18 * dest_stride),
- dest_stride, n0, n2, n4, n6);
+ vec0 = LD_SH(tmp_odd_buf + 2 * 8);
+ vec1 = LD_SH(tmp_odd_buf + 11 * 8);
+ vec2 = LD_SH(tmp_odd_buf + 12 * 8);
+ vec3 = LD_SH(tmp_odd_buf + 7 * 8);
+ loc0 = LD_SH(tmp_eve_buf + 1 * 8);
+ loc1 = LD_SH(tmp_eve_buf + 9 * 8);
+ loc2 = LD_SH(tmp_eve_buf + 5 * 8);
+ loc3 = LD_SH(tmp_eve_buf + 13 * 8);
+
+ ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, n0, n4, n2, n6);
+ SRARI_H4_SH(n0, n2, n4, n6, 6);
+ VP9_ADDBLK_ST8x4_UB((dst + 1 * dst_stride), (4 * dst_stride),
+ n0, n2, n4, n6);
+
+ SUB4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, n6, n2, n4, n0);
+ SRARI_H4_SH(n0, n2, n4, n6, 6);
+ VP9_ADDBLK_ST8x4_UB((dst + 18 * dst_stride), (4 * dst_stride),
+ n0, n2, n4, n6);
/* Load 8 & Store 8 */
- vec0 = LOAD_SH(tmp_odd_buf + 5 * 8);
- vec1 = LOAD_SH(tmp_odd_buf + 15 * 8);
- vec2 = LOAD_SH(tmp_odd_buf + 8 * 8);
- vec3 = LOAD_SH(tmp_odd_buf + 1 * 8);
- loc0 = LOAD_SH(tmp_eve_buf + 3 * 8);
- loc1 = LOAD_SH(tmp_eve_buf + 11 * 8);
- loc2 = LOAD_SH(tmp_eve_buf + 7 * 8);
- loc3 = LOAD_SH(tmp_eve_buf + 15 * 8);
-
- n1 = (loc0 + vec3);
- n5 = (loc1 + vec2);
- n3 = (loc2 + vec1);
- n7 = (loc3 + vec0);
- SRARI_H_4VECS_SH(n1, n3, n5, n7, n1, n3, n5, n7, 6);
- VP9_ADDBLK_CLIP_AND_STORE_OFF_4H_VECS((dest + 3 * dest_stride),
- dest_stride, n1, n3, n5, n7);
-
- n7 = (loc0 - vec3);
- n3 = (loc1 - vec2);
- n5 = (loc2 - vec1);
- n1 = (loc3 - vec0);
- SRARI_H_4VECS_SH(n1, n3, n5, n7, n1, n3, n5, n7, 6);
- VP9_ADDBLK_CLIP_AND_STORE_OFF_4H_VECS((dest + 16 * dest_stride),
- dest_stride, n1, n3, n5, n7);
+ vec0 = LD_SH(tmp_odd_buf + 5 * 8);
+ vec1 = LD_SH(tmp_odd_buf + 15 * 8);
+ vec2 = LD_SH(tmp_odd_buf + 8 * 8);
+ vec3 = LD_SH(tmp_odd_buf + 1 * 8);
+ loc0 = LD_SH(tmp_eve_buf + 3 * 8);
+ loc1 = LD_SH(tmp_eve_buf + 11 * 8);
+ loc2 = LD_SH(tmp_eve_buf + 7 * 8);
+ loc3 = LD_SH(tmp_eve_buf + 15 * 8);
+
+ ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, n1, n5, n3, n7);
+ SRARI_H4_SH(n1, n3, n5, n7, 6);
+ VP9_ADDBLK_ST8x4_UB((dst + 3 * dst_stride), (4 * dst_stride),
+ n1, n3, n5, n7);
+
+ SUB4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, n7, n3, n5, n1);
+ SRARI_H4_SH(n1, n3, n5, n7, 6);
+ VP9_ADDBLK_ST8x4_UB((dst + 16 * dst_stride), (4 * dst_stride),
+ n1, n3, n5, n7);
}
-static void vp9_idct8x32_1d_columns_addblk_msa(int16_t *input, uint8_t *dest,
- int32_t dest_stride) {
+static void vp9_idct8x32_1d_columns_addblk_msa(int16_t *input, uint8_t *dst,
+ int32_t dst_stride) {
DECLARE_ALIGNED(32, int16_t, tmp_odd_buf[16 * 8]);
DECLARE_ALIGNED(32, int16_t, tmp_eve_buf[16 * 8]);
vp9_idct8x32_column_even_process_store(input, &tmp_eve_buf[0]);
-
vp9_idct8x32_column_odd_process_store(input, &tmp_odd_buf[0]);
-
vp9_idct8x32_column_butterfly_addblk(&tmp_eve_buf[0], &tmp_odd_buf[0],
- dest, dest_stride);
+ dst, dst_stride);
}
-void vp9_idct32x32_1024_add_msa(const int16_t *input, uint8_t *dest,
- int32_t dest_stride) {
+void vp9_idct32x32_1024_add_msa(const int16_t *input, uint8_t *dst,
+ int32_t dst_stride) {
int32_t i;
DECLARE_ALIGNED(32, int16_t, out_arr[32 * 32]);
int16_t *out_ptr = out_arr;
@@ -963,13 +653,13 @@ void vp9_idct32x32_1024_add_msa(const int16_t *input, uint8_t *dest,
/* transform columns */
for (i = 0; i < 4; ++i) {
/* process 8 * 32 block */
- vp9_idct8x32_1d_columns_addblk_msa((out_ptr + (i << 3)), (dest + (i << 3)),
- dest_stride);
+ vp9_idct8x32_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)),
+ dst_stride);
}
}
-void vp9_idct32x32_34_add_msa(const int16_t *input, uint8_t *dest,
- int32_t dest_stride) {
+void vp9_idct32x32_34_add_msa(const int16_t *input, uint8_t *dst,
+ int32_t dst_stride) {
int32_t i;
DECLARE_ALIGNED(32, int16_t, out_arr[32 * 32]);
int16_t *out_ptr = out_arr;
@@ -1008,70 +698,42 @@ void vp9_idct32x32_34_add_msa(const int16_t *input, uint8_t *dest,
/* transform columns */
for (i = 0; i < 4; ++i) {
/* process 8 * 32 block */
- vp9_idct8x32_1d_columns_addblk_msa((out_ptr + (i << 3)), (dest + (i << 3)),
- dest_stride);
+ vp9_idct8x32_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)),
+ dst_stride);
}
}
-void vp9_idct32x32_1_add_msa(const int16_t *input, uint8_t *dest,
- int32_t dest_stride) {
- int32_t i, const1;
- v8i16 const2;
+void vp9_idct32x32_1_add_msa(const int16_t *input, uint8_t *dst,
+ int32_t dst_stride) {
+ int32_t i;
int16_t out;
- v8i16 res0, res1, res2, res3, res4, res5, res6, res7;
- v16u8 dest0, dest1, dest2, dest3;
- v16u8 tmp0, tmp1, tmp2, tmp3;
- v16i8 zero = { 0 };
-
- out = dct_const_round_shift(input[0] * cospi_16_64);
- out = dct_const_round_shift(out * cospi_16_64);
- const1 = ROUND_POWER_OF_TWO(out, 6);
-
- const2 = __msa_fill_h(const1);
-
- for (i = 0; i < 16; ++i) {
- dest0 = LOAD_UB(dest);
- dest1 = LOAD_UB(dest + 16);
- dest2 = LOAD_UB(dest + dest_stride);
- dest3 = LOAD_UB(dest + dest_stride + 16);
-
- res0 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest0);
- res1 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest1);
- res2 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest2);
- res3 = (v8i16)__msa_ilvr_b(zero, (v16i8)dest3);
- res4 = (v8i16)__msa_ilvl_b(zero, (v16i8)dest0);
- res5 = (v8i16)__msa_ilvl_b(zero, (v16i8)dest1);
- res6 = (v8i16)__msa_ilvl_b(zero, (v16i8)dest2);
- res7 = (v8i16)__msa_ilvl_b(zero, (v16i8)dest3);
-
- res0 += const2;
- res1 += const2;
- res2 += const2;
- res3 += const2;
- res4 += const2;
- res5 += const2;
- res6 += const2;
- res7 += const2;
-
- res0 = CLIP_UNSIGNED_CHAR_H(res0);
- res1 = CLIP_UNSIGNED_CHAR_H(res1);
- res2 = CLIP_UNSIGNED_CHAR_H(res2);
- res3 = CLIP_UNSIGNED_CHAR_H(res3);
- res4 = CLIP_UNSIGNED_CHAR_H(res4);
- res5 = CLIP_UNSIGNED_CHAR_H(res5);
- res6 = CLIP_UNSIGNED_CHAR_H(res6);
- res7 = CLIP_UNSIGNED_CHAR_H(res7);
-
- tmp0 = (v16u8)__msa_pckev_b((v16i8)res4, (v16i8)res0);
- tmp1 = (v16u8)__msa_pckev_b((v16i8)res5, (v16i8)res1);
- tmp2 = (v16u8)__msa_pckev_b((v16i8)res6, (v16i8)res2);
- tmp3 = (v16u8)__msa_pckev_b((v16i8)res7, (v16i8)res3);
-
- STORE_UB(tmp0, dest);
- STORE_UB(tmp1, dest + 16);
- dest += dest_stride;
- STORE_UB(tmp2, dest);
- STORE_UB(tmp3, dest + 16);
- dest += dest_stride;
+ v16u8 dst0, dst1, dst2, dst3, tmp0, tmp1, tmp2, tmp3;
+ v8i16 res0, res1, res2, res3, res4, res5, res6, res7, vec;
+
+ out = ROUND_POWER_OF_TWO((input[0] * cospi_16_64), DCT_CONST_BITS);
+ out = ROUND_POWER_OF_TWO((out * cospi_16_64), DCT_CONST_BITS);
+ out = ROUND_POWER_OF_TWO(out, 6);
+
+ vec = __msa_fill_h(out);
+
+ for (i = 16; i--;) {
+ LD_UB2(dst, 16, dst0, dst1);
+ LD_UB2(dst + dst_stride, 16, dst2, dst3);
+
+ UNPCK_UB_SH(dst0, res0, res4);
+ UNPCK_UB_SH(dst1, res1, res5);
+ UNPCK_UB_SH(dst2, res2, res6);
+ UNPCK_UB_SH(dst3, res3, res7);
+ ADD4(res0, vec, res1, vec, res2, vec, res3, vec, res0, res1, res2, res3);
+ ADD4(res4, vec, res5, vec, res6, vec, res7, vec, res4, res5, res6, res7);
+ CLIP_SH4_0_255(res0, res1, res2, res3);
+ CLIP_SH4_0_255(res4, res5, res6, res7);
+ PCKEV_B4_UB(res4, res0, res5, res1, res6, res2, res7, res3,
+ tmp0, tmp1, tmp2, tmp3);
+
+ ST_UB2(tmp0, tmp1, dst, 16);
+ dst += dst_stride;
+ ST_UB2(tmp2, tmp3, dst, 16);
+ dst += dst_stride;
}
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct4x4_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct4x4_msa.c
new file mode 100644
index 00000000000..a3df1a4d320
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct4x4_msa.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include "vp9/common/mips/msa/vp9_idct_msa.h"
+
+void vp9_iwht4x4_16_add_msa(const int16_t *input, uint8_t *dst,
+ int32_t dst_stride) {
+ v8i16 in0, in1, in2, in3;
+ v4i32 in0_r, in1_r, in2_r, in3_r, in4_r;
+
+ /* load vector elements of 4x4 block */
+ LD4x4_SH(input, in0, in2, in3, in1);
+ TRANSPOSE4x4_SH_SH(in0, in2, in3, in1, in0, in2, in3, in1);
+ UNPCK_R_SH_SW(in0, in0_r);
+ UNPCK_R_SH_SW(in2, in2_r);
+ UNPCK_R_SH_SW(in3, in3_r);
+ UNPCK_R_SH_SW(in1, in1_r);
+ SRA_4V(in0_r, in1_r, in2_r, in3_r, UNIT_QUANT_SHIFT);
+
+ in0_r += in2_r;
+ in3_r -= in1_r;
+ in4_r = (in0_r - in3_r) >> 1;
+ in1_r = in4_r - in1_r;
+ in2_r = in4_r - in2_r;
+ in0_r -= in1_r;
+ in3_r += in2_r;
+
+ TRANSPOSE4x4_SW_SW(in0_r, in1_r, in2_r, in3_r, in0_r, in1_r, in2_r, in3_r);
+
+ in0_r += in1_r;
+ in2_r -= in3_r;
+ in4_r = (in0_r - in2_r) >> 1;
+ in3_r = in4_r - in3_r;
+ in1_r = in4_r - in1_r;
+ in0_r -= in3_r;
+ in2_r += in1_r;
+
+ PCKEV_H4_SH(in0_r, in0_r, in1_r, in1_r, in2_r, in2_r, in3_r, in3_r,
+ in0, in1, in2, in3);
+ ADDBLK_ST4x4_UB(in0, in3, in1, in2, dst, dst_stride);
+}
+
+void vp9_iwht4x4_1_add_msa(const int16_t *input, uint8_t *dst,
+ int32_t dst_stride) {
+ int16_t a1, e1;
+ v8i16 in1, in0 = { 0 };
+
+ a1 = input[0] >> UNIT_QUANT_SHIFT;
+ e1 = a1 >> 1;
+ a1 -= e1;
+
+ in0 = __msa_insert_h(in0, 0, a1);
+ in0 = __msa_insert_h(in0, 1, e1);
+ in0 = __msa_insert_h(in0, 2, e1);
+ in0 = __msa_insert_h(in0, 3, e1);
+
+ in1 = in0 >> 1;
+ in0 -= in1;
+
+ ADDBLK_ST4x4_UB(in0, in1, in1, in1, dst, dst_stride);
+}
+
+void vp9_idct4x4_16_add_msa(const int16_t *input, uint8_t *dst,
+ int32_t dst_stride) {
+ v8i16 in0, in1, in2, in3;
+
+ /* load vector elements of 4x4 block */
+ LD4x4_SH(input, in0, in1, in2, in3);
+ /* rows */
+ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+ VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+ /* columns */
+ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+ VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+ /* rounding (add 2^3, divide by 2^4) */
+ SRARI_H4_SH(in0, in1, in2, in3, 4);
+ ADDBLK_ST4x4_UB(in0, in1, in2, in3, dst, dst_stride);
+}
+
+void vp9_idct4x4_1_add_msa(const int16_t *input, uint8_t *dst,
+ int32_t dst_stride) {
+ int16_t out;
+ v8i16 vec;
+
+ out = ROUND_POWER_OF_TWO((input[0] * cospi_16_64), DCT_CONST_BITS);
+ out = ROUND_POWER_OF_TWO((out * cospi_16_64), DCT_CONST_BITS);
+ out = ROUND_POWER_OF_TWO(out, 4);
+ vec = __msa_fill_h(out);
+
+ ADDBLK_ST4x4_UB(vec, vec, vec, vec, dst, dst_stride);
+}
+
+void vp9_iht4x4_16_add_msa(const int16_t *input, uint8_t *dst,
+ int32_t dst_stride, int32_t tx_type) {
+ v8i16 in0, in1, in2, in3;
+
+ /* load vector elements of 4x4 block */
+ LD4x4_SH(input, in0, in1, in2, in3);
+ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+
+ switch (tx_type) {
+ case DCT_DCT:
+ /* DCT in horizontal */
+ VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+ /* DCT in vertical */
+ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+ VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+ break;
+ case ADST_DCT:
+ /* DCT in horizontal */
+ VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+ /* ADST in vertical */
+ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+ VP9_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+ break;
+ case DCT_ADST:
+ /* ADST in horizontal */
+ VP9_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+ /* DCT in vertical */
+ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+ VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+ break;
+ case ADST_ADST:
+ /* ADST in horizontal */
+ VP9_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+ /* ADST in vertical */
+ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+ VP9_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ /* final rounding (add 2^3, divide by 2^4) and shift */
+ SRARI_H4_SH(in0, in1, in2, in3, 4);
+ /* add block and store 4x4 */
+ ADDBLK_ST4x4_UB(in0, in1, in2, in3, dst, dst_stride);
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct8x8_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct8x8_msa.c
new file mode 100644
index 00000000000..a4a9c212401
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct8x8_msa.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include "vp9/common/mips/msa/vp9_idct_msa.h"
+
+void vp9_idct8x8_64_add_msa(const int16_t *input, uint8_t *dst,
+ int32_t dst_stride) {
+ v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+
+ /* load vector elements of 8x8 block */
+ LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7);
+
+ /* rows transform */
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ /* 1D idct8x8 */
+ VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ /* columns transform */
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ /* 1D idct8x8 */
+ VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ /* final rounding (add 2^4, divide by 2^5) and shift */
+ SRARI_H4_SH(in0, in1, in2, in3, 5);
+ SRARI_H4_SH(in4, in5, in6, in7, 5);
+ /* add block and store 8x8 */
+ VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3);
+ dst += (4 * dst_stride);
+ VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);
+}
+
+void vp9_idct8x8_12_add_msa(const int16_t *input, uint8_t *dst,
+ int32_t dst_stride) {
+ v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+ v8i16 s0, s1, s2, s3, s4, s5, s6, s7, k0, k1, k2, k3, m0, m1, m2, m3;
+ v4i32 tmp0, tmp1, tmp2, tmp3;
+ v8i16 zero = { 0 };
+
+ /* load vector elements of 8x8 block */
+ LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7);
+ TRANSPOSE8X4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+
+ /* stage1 */
+ ILVL_H2_SH(in3, in0, in2, in1, s0, s1);
+ k0 = VP9_SET_COSPI_PAIR(cospi_28_64, -cospi_4_64);
+ k1 = VP9_SET_COSPI_PAIR(cospi_4_64, cospi_28_64);
+ k2 = VP9_SET_COSPI_PAIR(-cospi_20_64, cospi_12_64);
+ k3 = VP9_SET_COSPI_PAIR(cospi_12_64, cospi_20_64);
+ DOTP_SH4_SW(s0, s0, s1, s1, k0, k1, k2, k3, tmp0, tmp1, tmp2, tmp3);
+ SRARI_W4_SW(tmp0, tmp1, tmp2, tmp3, DCT_CONST_BITS);
+ PCKEV_H2_SH(zero, tmp0, zero, tmp1, s0, s1);
+ PCKEV_H2_SH(zero, tmp2, zero, tmp3, s2, s3);
+ BUTTERFLY_4(s0, s1, s3, s2, s4, s7, s6, s5);
+
+ /* stage2 */
+ ILVR_H2_SH(in3, in1, in2, in0, s1, s0);
+ k0 = VP9_SET_COSPI_PAIR(cospi_16_64, cospi_16_64);
+ k1 = VP9_SET_COSPI_PAIR(cospi_16_64, -cospi_16_64);
+ k2 = VP9_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64);
+ k3 = VP9_SET_COSPI_PAIR(cospi_8_64, cospi_24_64);
+ DOTP_SH4_SW(s0, s0, s1, s1, k0, k1, k2, k3, tmp0, tmp1, tmp2, tmp3);
+ SRARI_W4_SW(tmp0, tmp1, tmp2, tmp3, DCT_CONST_BITS);
+ PCKEV_H2_SH(zero, tmp0, zero, tmp1, s0, s1);
+ PCKEV_H2_SH(zero, tmp2, zero, tmp3, s2, s3);
+ BUTTERFLY_4(s0, s1, s2, s3, m0, m1, m2, m3);
+
+ /* stage3 */
+ s0 = __msa_ilvr_h(s6, s5);
+
+ k1 = VP9_SET_COSPI_PAIR(-cospi_16_64, cospi_16_64);
+ DOTP_SH2_SW(s0, s0, k1, k0, tmp0, tmp1);
+ SRARI_W2_SW(tmp0, tmp1, DCT_CONST_BITS);
+ PCKEV_H2_SH(zero, tmp0, zero, tmp1, s2, s3);
+
+ /* stage4 */
+ BUTTERFLY_8(m0, m1, m2, m3, s4, s2, s3, s7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ TRANSPOSE4X8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+
+ /* final rounding (add 2^4, divide by 2^5) and shift */
+ SRARI_H4_SH(in0, in1, in2, in3, 5);
+ SRARI_H4_SH(in4, in5, in6, in7, 5);
+
+ /* add block and store 8x8 */
+ VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3);
+ dst += (4 * dst_stride);
+ VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);
+}
+
+void vp9_idct8x8_1_add_msa(const int16_t *input, uint8_t *dst,
+ int32_t dst_stride) {
+ int16_t out;
+ int32_t val;
+ v8i16 vec;
+
+ out = ROUND_POWER_OF_TWO((input[0] * cospi_16_64), DCT_CONST_BITS);
+ out = ROUND_POWER_OF_TWO((out * cospi_16_64), DCT_CONST_BITS);
+ val = ROUND_POWER_OF_TWO(out, 5);
+ vec = __msa_fill_h(val);
+
+ VP9_ADDBLK_ST8x4_UB(dst, dst_stride, vec, vec, vec, vec);
+ dst += (4 * dst_stride);
+ VP9_ADDBLK_ST8x4_UB(dst, dst_stride, vec, vec, vec, vec);
+}
+
+void vp9_iht8x8_64_add_msa(const int16_t *input, uint8_t *dst,
+ int32_t dst_stride, int32_t tx_type) {
+ v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+
+ /* load vector elements of 8x8 block */
+ LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7);
+
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+
+ switch (tx_type) {
+ case DCT_DCT:
+ /* DCT in horizontal */
+ VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ /* DCT in vertical */
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ break;
+ case ADST_DCT:
+ /* DCT in horizontal */
+ VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ /* ADST in vertical */
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ break;
+ case DCT_ADST:
+ /* ADST in horizontal */
+ VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ /* DCT in vertical */
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ break;
+ case ADST_ADST:
+ /* ADST in horizontal */
+ VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ /* ADST in vertical */
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ /* final rounding (add 2^4, divide by 2^5) and shift */
+ SRARI_H4_SH(in0, in1, in2, in3, 5);
+ SRARI_H4_SH(in4, in5, in6, in7, 5);
+
+ /* add block and store 8x8 */
+ VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3);
+ dst += (4 * dst_stride);
+ VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7);
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct_msa.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct_msa.h
new file mode 100644
index 00000000000..60e27fc1107
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_idct_msa.h
@@ -0,0 +1,481 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_COMMON_MIPS_MSA_VP9_IDCT_MSA_H_
+#define VP9_COMMON_MIPS_MSA_VP9_IDCT_MSA_H_
+
+#include "vpx_ports/mem.h"
+#include "vp9/common/vp9_idct.h"
+#include "vp9/common/mips/msa/vp9_macros_msa.h"
+
+#define VP9_DOTP_CONST_PAIR(reg0, reg1, cnst0, cnst1, out0, out1) { \
+ v8i16 k0_m = __msa_fill_h(cnst0); \
+ v4i32 s0_m, s1_m, s2_m, s3_m; \
+ \
+ s0_m = (v4i32)__msa_fill_h(cnst1); \
+ k0_m = __msa_ilvev_h((v8i16)s0_m, k0_m); \
+ \
+ ILVRL_H2_SW((-reg1), reg0, s1_m, s0_m); \
+ ILVRL_H2_SW(reg0, reg1, s3_m, s2_m); \
+ DOTP_SH2_SW(s1_m, s0_m, k0_m, k0_m, s1_m, s0_m); \
+ SRARI_W2_SW(s1_m, s0_m, DCT_CONST_BITS); \
+ out0 = __msa_pckev_h((v8i16)s0_m, (v8i16)s1_m); \
+ \
+ DOTP_SH2_SW(s3_m, s2_m, k0_m, k0_m, s1_m, s0_m); \
+ SRARI_W2_SW(s1_m, s0_m, DCT_CONST_BITS); \
+ out1 = __msa_pckev_h((v8i16)s0_m, (v8i16)s1_m); \
+}
+
+#define VP9_DOT_ADD_SUB_SRARI_PCK(in0, in1, in2, in3, in4, in5, in6, in7, \
+ dst0, dst1, dst2, dst3) { \
+ v4i32 tp0_m, tp1_m, tp2_m, tp3_m, tp4_m; \
+ v4i32 tp5_m, tp6_m, tp7_m, tp8_m, tp9_m; \
+ \
+ DOTP_SH4_SW(in0, in1, in0, in1, in4, in4, in5, in5, \
+ tp0_m, tp2_m, tp3_m, tp4_m); \
+ DOTP_SH4_SW(in2, in3, in2, in3, in6, in6, in7, in7, \
+ tp5_m, tp6_m, tp7_m, tp8_m); \
+ BUTTERFLY_4(tp0_m, tp3_m, tp7_m, tp5_m, tp1_m, tp9_m, tp7_m, tp5_m); \
+ BUTTERFLY_4(tp2_m, tp4_m, tp8_m, tp6_m, tp3_m, tp0_m, tp4_m, tp2_m); \
+ SRARI_W4_SW(tp1_m, tp9_m, tp7_m, tp5_m, DCT_CONST_BITS); \
+ SRARI_W4_SW(tp3_m, tp0_m, tp4_m, tp2_m, DCT_CONST_BITS); \
+ PCKEV_H4_SH(tp1_m, tp3_m, tp9_m, tp0_m, tp7_m, tp4_m, tp5_m, tp2_m, \
+ dst0, dst1, dst2, dst3); \
+}
+
+#define VP9_DOT_SHIFT_RIGHT_PCK_H(in0, in1, in2) ({ \
+ v8i16 dst_m; \
+ v4i32 tp0_m, tp1_m; \
+ \
+ DOTP_SH2_SW(in0, in1, in2, in2, tp1_m, tp0_m); \
+ SRARI_W2_SW(tp1_m, tp0_m, DCT_CONST_BITS); \
+ dst_m = __msa_pckev_h((v8i16)tp1_m, (v8i16)tp0_m); \
+ \
+ dst_m; \
+})
+
+#define VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3, out4, out5, out6, out7) { \
+ v8i16 cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst4_m; \
+ v8i16 vec0_m, vec1_m, vec2_m, vec3_m, s0_m, s1_m; \
+ v8i16 coeff0_m = { cospi_2_64, cospi_6_64, cospi_10_64, cospi_14_64, \
+ cospi_18_64, cospi_22_64, cospi_26_64, cospi_30_64 }; \
+ v8i16 coeff1_m = { cospi_8_64, -cospi_8_64, cospi_16_64, \
+ -cospi_16_64, cospi_24_64, -cospi_24_64, 0, 0 }; \
+ \
+ SPLATI_H2_SH(coeff0_m, 0, 7, cnst0_m, cnst1_m); \
+ cnst2_m = -cnst0_m; \
+ ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m); \
+ SPLATI_H2_SH(coeff0_m, 4, 3, cnst2_m, cnst3_m); \
+ cnst4_m = -cnst2_m; \
+ ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m); \
+ \
+ ILVRL_H2_SH(in0, in7, vec1_m, vec0_m); \
+ ILVRL_H2_SH(in4, in3, vec3_m, vec2_m); \
+ VP9_DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, \
+ cnst1_m, cnst2_m, cnst3_m, in7, in0, \
+ in4, in3); \
+ \
+ SPLATI_H2_SH(coeff0_m, 2, 5, cnst0_m, cnst1_m); \
+ cnst2_m = -cnst0_m; \
+ ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m); \
+ SPLATI_H2_SH(coeff0_m, 6, 1, cnst2_m, cnst3_m); \
+ cnst4_m = -cnst2_m; \
+ ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m); \
+ \
+ ILVRL_H2_SH(in2, in5, vec1_m, vec0_m); \
+ ILVRL_H2_SH(in6, in1, vec3_m, vec2_m); \
+ \
+ VP9_DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, \
+ cnst1_m, cnst2_m, cnst3_m, in5, in2, \
+ in6, in1); \
+ BUTTERFLY_4(in7, in0, in2, in5, s1_m, s0_m, in2, in5); \
+ out7 = -s0_m; \
+ out0 = s1_m; \
+ \
+ SPLATI_H4_SH(coeff1_m, 0, 4, 1, 5, \
+ cnst0_m, cnst1_m, cnst2_m, cnst3_m); \
+ \
+ ILVEV_H2_SH(cnst3_m, cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst2_m); \
+ cnst0_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
+ cnst1_m = cnst0_m; \
+ \
+ ILVRL_H2_SH(in4, in3, vec1_m, vec0_m); \
+ ILVRL_H2_SH(in6, in1, vec3_m, vec2_m); \
+ VP9_DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, \
+ cnst2_m, cnst3_m, cnst1_m, out1, out6, \
+ s0_m, s1_m); \
+ \
+ SPLATI_H2_SH(coeff1_m, 2, 3, cnst0_m, cnst1_m); \
+ cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
+ \
+ ILVRL_H2_SH(in2, in5, vec1_m, vec0_m); \
+ ILVRL_H2_SH(s0_m, s1_m, vec3_m, vec2_m); \
+ out3 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \
+ out4 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m); \
+ out2 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst0_m); \
+ out5 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst1_m); \
+ \
+ out1 = -out1; \
+ out3 = -out3; \
+ out5 = -out5; \
+}
+
+#define VP9_MADD_SHORT(m0, m1, c0, c1, res0, res1) { \
+ v4i32 madd0_m, madd1_m, madd2_m, madd3_m; \
+ v8i16 madd_s0_m, madd_s1_m; \
+ \
+ ILVRL_H2_SH(m1, m0, madd_s0_m, madd_s1_m); \
+ DOTP_SH4_SW(madd_s0_m, madd_s1_m, madd_s0_m, madd_s1_m, \
+ c0, c0, c1, c1, madd0_m, madd1_m, madd2_m, madd3_m); \
+ SRARI_W4_SW(madd0_m, madd1_m, madd2_m, madd3_m, DCT_CONST_BITS); \
+ PCKEV_H2_SH(madd1_m, madd0_m, madd3_m, madd2_m, res0, res1); \
+}
+
+#define VP9_MADD_BF(inp0, inp1, inp2, inp3, cst0, cst1, cst2, cst3, \
+ out0, out1, out2, out3) { \
+ v8i16 madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m; \
+ v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m, m4_m, m5_m; \
+ \
+ ILVRL_H2_SH(inp1, inp0, madd_s0_m, madd_s1_m); \
+ ILVRL_H2_SH(inp3, inp2, madd_s2_m, madd_s3_m); \
+ DOTP_SH4_SW(madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m, \
+ cst0, cst0, cst2, cst2, tmp0_m, tmp1_m, tmp2_m, tmp3_m); \
+ BUTTERFLY_4(tmp0_m, tmp1_m, tmp3_m, tmp2_m, \
+ m4_m, m5_m, tmp3_m, tmp2_m); \
+ SRARI_W4_SW(m4_m, m5_m, tmp2_m, tmp3_m, DCT_CONST_BITS); \
+ PCKEV_H2_SH(m5_m, m4_m, tmp3_m, tmp2_m, out0, out1); \
+ DOTP_SH4_SW(madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m, \
+ cst1, cst1, cst3, cst3, tmp0_m, tmp1_m, tmp2_m, tmp3_m); \
+ BUTTERFLY_4(tmp0_m, tmp1_m, tmp3_m, tmp2_m, \
+ m4_m, m5_m, tmp3_m, tmp2_m); \
+ SRARI_W4_SW(m4_m, m5_m, tmp2_m, tmp3_m, DCT_CONST_BITS); \
+ PCKEV_H2_SH(m5_m, m4_m, tmp3_m, tmp2_m, out2, out3); \
+}
+
+#define VP9_SET_COSPI_PAIR(c0_h, c1_h) ({ \
+ v8i16 out0_m, r0_m, r1_m; \
+ \
+ r0_m = __msa_fill_h(c0_h); \
+ r1_m = __msa_fill_h(c1_h); \
+ out0_m = __msa_ilvev_h(r1_m, r0_m); \
+ \
+ out0_m; \
+})
+
+#define VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3) { \
+ uint8_t *dst_m = (uint8_t *) (dst); \
+ v16u8 dst0_m, dst1_m, dst2_m, dst3_m; \
+ v16i8 tmp0_m, tmp1_m; \
+ v16i8 zero_m = { 0 }; \
+ v8i16 res0_m, res1_m, res2_m, res3_m; \
+ \
+ LD_UB4(dst_m, dst_stride, dst0_m, dst1_m, dst2_m, dst3_m); \
+ ILVR_B4_SH(zero_m, dst0_m, zero_m, dst1_m, zero_m, dst2_m, \
+ zero_m, dst3_m, res0_m, res1_m, res2_m, res3_m); \
+ ADD4(res0_m, in0, res1_m, in1, res2_m, in2, res3_m, in3, \
+ res0_m, res1_m, res2_m, res3_m); \
+ CLIP_SH4_0_255(res0_m, res1_m, res2_m, res3_m); \
+ PCKEV_B2_SB(res1_m, res0_m, res3_m, res2_m, tmp0_m, tmp1_m); \
+ ST8x4_UB(tmp0_m, tmp1_m, dst_m, dst_stride); \
+}
+
+#define VP9_IDCT4x4(in0, in1, in2, in3, out0, out1, out2, out3) { \
+ v8i16 c0_m, c1_m, c2_m, c3_m; \
+ v8i16 step0_m, step1_m; \
+ v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
+ \
+ c0_m = VP9_SET_COSPI_PAIR(cospi_16_64, cospi_16_64); \
+ c1_m = VP9_SET_COSPI_PAIR(cospi_16_64, -cospi_16_64); \
+ step0_m = __msa_ilvr_h(in2, in0); \
+ DOTP_SH2_SW(step0_m, step0_m, c0_m, c1_m, tmp0_m, tmp1_m); \
+ \
+ c2_m = VP9_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64); \
+ c3_m = VP9_SET_COSPI_PAIR(cospi_8_64, cospi_24_64); \
+ step1_m = __msa_ilvr_h(in3, in1); \
+ DOTP_SH2_SW(step1_m, step1_m, c2_m, c3_m, tmp2_m, tmp3_m); \
+ SRARI_W4_SW(tmp0_m, tmp1_m, tmp2_m, tmp3_m, DCT_CONST_BITS); \
+ \
+ PCKEV_H2_SW(tmp1_m, tmp0_m, tmp3_m, tmp2_m, tmp0_m, tmp2_m); \
+ SLDI_B2_0_SW(tmp0_m, tmp2_m, tmp1_m, tmp3_m, 8); \
+ BUTTERFLY_4((v8i16)tmp0_m, (v8i16)tmp1_m, \
+ (v8i16)tmp2_m, (v8i16)tmp3_m, \
+ out0, out1, out2, out3); \
+}
+
+#define VP9_IADST4x4(in0, in1, in2, in3, out0, out1, out2, out3) { \
+ v8i16 res0_m, res1_m, c0_m, c1_m; \
+ v8i16 k1_m, k2_m, k3_m, k4_m; \
+ v8i16 zero_m = { 0 }; \
+ v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
+ v4i32 int0_m, int1_m, int2_m, int3_m; \
+ v8i16 mask_m = { sinpi_1_9, sinpi_2_9, sinpi_3_9, \
+ sinpi_4_9, -sinpi_1_9, -sinpi_2_9, -sinpi_3_9, \
+ -sinpi_4_9 }; \
+ \
+ SPLATI_H4_SH(mask_m, 3, 0, 1, 2, c0_m, c1_m, k1_m, k2_m); \
+ ILVEV_H2_SH(c0_m, c1_m, k1_m, k2_m, c0_m, c1_m); \
+ ILVR_H2_SH(in0, in2, in1, in3, res0_m, res1_m); \
+ DOTP_SH2_SW(res0_m, res1_m, c0_m, c1_m, tmp2_m, tmp1_m); \
+ int0_m = tmp2_m + tmp1_m; \
+ \
+ SPLATI_H2_SH(mask_m, 4, 7, k4_m, k3_m); \
+ ILVEV_H2_SH(k4_m, k1_m, k3_m, k2_m, c0_m, c1_m); \
+ DOTP_SH2_SW(res0_m, res1_m, c0_m, c1_m, tmp0_m, tmp1_m); \
+ int1_m = tmp0_m + tmp1_m; \
+ \
+ c0_m = __msa_splati_h(mask_m, 6); \
+ ILVL_H2_SH(k2_m, c0_m, zero_m, k2_m, c0_m, c1_m); \
+ ILVR_H2_SH(in0, in2, in1, in3, res0_m, res1_m); \
+ DOTP_SH2_SW(res0_m, res1_m, c0_m, c1_m, tmp0_m, tmp1_m); \
+ int2_m = tmp0_m + tmp1_m; \
+ \
+ c0_m = __msa_splati_h(mask_m, 6); \
+ c0_m = __msa_ilvev_h(c0_m, k1_m); \
+ \
+ res0_m = __msa_ilvr_h((in1), (in3)); \
+ tmp0_m = __msa_dotp_s_w(res0_m, c0_m); \
+ int3_m = tmp2_m + tmp0_m; \
+ \
+ res0_m = __msa_ilvr_h((in2), (in3)); \
+ c1_m = __msa_ilvev_h(k4_m, k3_m); \
+ \
+ tmp2_m = __msa_dotp_s_w(res0_m, c1_m); \
+ res1_m = __msa_ilvr_h((in0), (in2)); \
+ c1_m = __msa_ilvev_h(k1_m, zero_m); \
+ \
+ tmp3_m = __msa_dotp_s_w(res1_m, c1_m); \
+ int3_m += tmp2_m; \
+ int3_m += tmp3_m; \
+ \
+ SRARI_W4_SW(int0_m, int1_m, int2_m, int3_m, DCT_CONST_BITS); \
+ PCKEV_H2_SH(int0_m, int0_m, int1_m, int1_m, out0, out1); \
+ PCKEV_H2_SH(int2_m, int2_m, int3_m, int3_m, out2, out3); \
+}
+
+#define VP9_SET_CONST_PAIR(mask_h, idx1_h, idx2_h) ({ \
+ v8i16 c0_m, c1_m; \
+ \
+ SPLATI_H2_SH(mask_h, idx1_h, idx2_h, c0_m, c1_m); \
+ c0_m = __msa_ilvev_h(c1_m, c0_m); \
+ \
+ c0_m; \
+})
+
+/* multiply and add macro */
+#define VP9_MADD(inp0, inp1, inp2, inp3, cst0, cst1, cst2, cst3, \
+ out0, out1, out2, out3) { \
+ v8i16 madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m; \
+ v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
+ \
+ ILVRL_H2_SH(inp1, inp0, madd_s1_m, madd_s0_m); \
+ ILVRL_H2_SH(inp3, inp2, madd_s3_m, madd_s2_m); \
+ DOTP_SH4_SW(madd_s1_m, madd_s0_m, madd_s1_m, madd_s0_m, \
+ cst0, cst0, cst1, cst1, tmp0_m, tmp1_m, tmp2_m, tmp3_m); \
+ SRARI_W4_SW(tmp0_m, tmp1_m, tmp2_m, tmp3_m, DCT_CONST_BITS); \
+ PCKEV_H2_SH(tmp1_m, tmp0_m, tmp3_m, tmp2_m, out0, out1); \
+ DOTP_SH4_SW(madd_s3_m, madd_s2_m, madd_s3_m, madd_s2_m, \
+ cst2, cst2, cst3, cst3, tmp0_m, tmp1_m, tmp2_m, tmp3_m); \
+ SRARI_W4_SW(tmp0_m, tmp1_m, tmp2_m, tmp3_m, DCT_CONST_BITS); \
+ PCKEV_H2_SH(tmp1_m, tmp0_m, tmp3_m, tmp2_m, out2, out3); \
+}
+
+/* idct 8x8 macro */
+#define VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3, out4, out5, out6, out7) { \
+ v8i16 tp0_m, tp1_m, tp2_m, tp3_m, tp4_m, tp5_m, tp6_m, tp7_m; \
+ v8i16 k0_m, k1_m, k2_m, k3_m, res0_m, res1_m, res2_m, res3_m; \
+ v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
+ v8i16 mask_m = { cospi_28_64, cospi_4_64, cospi_20_64, cospi_12_64, \
+ cospi_16_64, -cospi_4_64, -cospi_20_64, -cospi_16_64 }; \
+ \
+ k0_m = VP9_SET_CONST_PAIR(mask_m, 0, 5); \
+ k1_m = VP9_SET_CONST_PAIR(mask_m, 1, 0); \
+ k2_m = VP9_SET_CONST_PAIR(mask_m, 6, 3); \
+ k3_m = VP9_SET_CONST_PAIR(mask_m, 3, 2); \
+ VP9_MADD(in1, in7, in3, in5, k0_m, k1_m, k2_m, k3_m, in1, in7, in3, in5); \
+ SUB2(in1, in3, in7, in5, res0_m, res1_m); \
+ k0_m = VP9_SET_CONST_PAIR(mask_m, 4, 7); \
+ k1_m = __msa_splati_h(mask_m, 4); \
+ \
+ ILVRL_H2_SH(res0_m, res1_m, res2_m, res3_m); \
+ DOTP_SH4_SW(res2_m, res3_m, res2_m, res3_m, k0_m, k0_m, k1_m, k1_m, \
+ tmp0_m, tmp1_m, tmp2_m, tmp3_m); \
+ SRARI_W4_SW(tmp0_m, tmp1_m, tmp2_m, tmp3_m, DCT_CONST_BITS); \
+ tp4_m = in1 + in3; \
+ PCKEV_H2_SH(tmp1_m, tmp0_m, tmp3_m, tmp2_m, tp5_m, tp6_m); \
+ tp7_m = in7 + in5; \
+ k2_m = VP9_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64); \
+ k3_m = VP9_SET_COSPI_PAIR(cospi_8_64, cospi_24_64); \
+ VP9_MADD(in0, in4, in2, in6, k1_m, k0_m, k2_m, k3_m, \
+ in0, in4, in2, in6); \
+ BUTTERFLY_4(in0, in4, in2, in6, tp0_m, tp1_m, tp2_m, tp3_m); \
+ BUTTERFLY_8(tp0_m, tp1_m, tp2_m, tp3_m, tp4_m, tp5_m, tp6_m, tp7_m, \
+ out0, out1, out2, out3, out4, out5, out6, out7); \
+}
+
+#define VP9_IADST8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3, out4, out5, out6, out7) { \
+ v4i32 r0_m, r1_m, r2_m, r3_m, r4_m, r5_m, r6_m, r7_m; \
+ v4i32 m0_m, m1_m, m2_m, m3_m, t0_m, t1_m; \
+ v8i16 res0_m, res1_m, res2_m, res3_m, k0_m, k1_m, in_s0, in_s1; \
+ v8i16 mask1_m = { cospi_2_64, cospi_30_64, -cospi_2_64, \
+ cospi_10_64, cospi_22_64, -cospi_10_64, cospi_18_64, cospi_14_64 }; \
+ v8i16 mask2_m = { cospi_14_64, -cospi_18_64, cospi_26_64, \
+ cospi_6_64, -cospi_26_64, cospi_8_64, cospi_24_64, -cospi_8_64 }; \
+ v8i16 mask3_m = { -cospi_24_64, cospi_8_64, cospi_16_64, \
+ -cospi_16_64, 0, 0, 0, 0 }; \
+ \
+ k0_m = VP9_SET_CONST_PAIR(mask1_m, 0, 1); \
+ k1_m = VP9_SET_CONST_PAIR(mask1_m, 1, 2); \
+ ILVRL_H2_SH(in1, in0, in_s1, in_s0); \
+ DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m, \
+ r0_m, r1_m, r2_m, r3_m); \
+ k0_m = VP9_SET_CONST_PAIR(mask1_m, 6, 7); \
+ k1_m = VP9_SET_CONST_PAIR(mask2_m, 0, 1); \
+ ILVRL_H2_SH(in5, in4, in_s1, in_s0); \
+ DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m, \
+ r4_m, r5_m, r6_m, r7_m); \
+ ADD4(r0_m, r4_m, r1_m, r5_m, r2_m, r6_m, r3_m, r7_m, \
+ m0_m, m1_m, m2_m, m3_m); \
+ SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS); \
+ PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, res0_m, res1_m); \
+ SUB4(r0_m, r4_m, r1_m, r5_m, r2_m, r6_m, r3_m, r7_m, \
+ m0_m, m1_m, m2_m, m3_m); \
+ SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS); \
+ PCKEV_H2_SW(m1_m, m0_m, m3_m, m2_m, t0_m, t1_m); \
+ k0_m = VP9_SET_CONST_PAIR(mask1_m, 3, 4); \
+ k1_m = VP9_SET_CONST_PAIR(mask1_m, 4, 5); \
+ ILVRL_H2_SH(in3, in2, in_s1, in_s0); \
+ DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m, \
+ r0_m, r1_m, r2_m, r3_m); \
+ k0_m = VP9_SET_CONST_PAIR(mask2_m, 2, 3); \
+ k1_m = VP9_SET_CONST_PAIR(mask2_m, 3, 4); \
+ ILVRL_H2_SH(in7, in6, in_s1, in_s0); \
+ DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m, \
+ r4_m, r5_m, r6_m, r7_m); \
+ ADD4(r0_m, r4_m, r1_m, r5_m, r2_m, r6_m, r3_m, r7_m, \
+ m0_m, m1_m, m2_m, m3_m); \
+ SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS); \
+ PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, res2_m, res3_m); \
+ SUB4(r0_m, r4_m, r1_m, r5_m, r2_m, r6_m, r3_m, r7_m, \
+ m0_m, m1_m, m2_m, m3_m); \
+ SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS); \
+ PCKEV_H2_SW(m1_m, m0_m, m3_m, m2_m, r2_m, r3_m); \
+ ILVRL_H2_SW(r3_m, r2_m, m2_m, m3_m); \
+ BUTTERFLY_4(res0_m, res1_m, res3_m, res2_m, out0, in7, in4, in3); \
+ k0_m = VP9_SET_CONST_PAIR(mask2_m, 5, 6); \
+ k1_m = VP9_SET_CONST_PAIR(mask2_m, 6, 7); \
+ ILVRL_H2_SH(t1_m, t0_m, in_s1, in_s0); \
+ DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m, \
+ r0_m, r1_m, r2_m, r3_m); \
+ k1_m = VP9_SET_CONST_PAIR(mask3_m, 0, 1); \
+ DOTP_SH4_SW(m2_m, m3_m, m2_m, m3_m, k0_m, k0_m, k1_m, k1_m, \
+ r4_m, r5_m, r6_m, r7_m); \
+ ADD4(r0_m, r6_m, r1_m, r7_m, r2_m, r4_m, r3_m, r5_m, \
+ m0_m, m1_m, m2_m, m3_m); \
+ SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS); \
+ PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, in1, out6); \
+ SUB4(r0_m, r6_m, r1_m, r7_m, r2_m, r4_m, r3_m, r5_m, \
+ m0_m, m1_m, m2_m, m3_m); \
+ SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS); \
+ PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, in2, in5); \
+ k0_m = VP9_SET_CONST_PAIR(mask3_m, 2, 2); \
+ k1_m = VP9_SET_CONST_PAIR(mask3_m, 2, 3); \
+ ILVRL_H2_SH(in4, in3, in_s1, in_s0); \
+ DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m, \
+ m0_m, m1_m, m2_m, m3_m); \
+ SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS); \
+ PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, in3, out4); \
+ ILVRL_H2_SW(in5, in2, m2_m, m3_m); \
+ DOTP_SH4_SW(m2_m, m3_m, m2_m, m3_m, k0_m, k0_m, k1_m, k1_m, \
+ m0_m, m1_m, m2_m, m3_m); \
+ SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS); \
+ PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, out2, in5); \
+ \
+ out1 = -in1; \
+ out3 = -in3; \
+ out5 = -in5; \
+ out7 = -in7; \
+}
+
+#define VP9_IADST8x16_1D(r0, r1, r2, r3, r4, r5, r6, r7, r8, \
+ r9, r10, r11, r12, r13, r14, r15, \
+ out0, out1, out2, out3, out4, out5, \
+ out6, out7, out8, out9, out10, out11, \
+ out12, out13, out14, out15) { \
+ v8i16 g0_m, g1_m, g2_m, g3_m, g4_m, g5_m, g6_m, g7_m; \
+ v8i16 g8_m, g9_m, g10_m, g11_m, g12_m, g13_m, g14_m, g15_m; \
+ v8i16 h0_m, h1_m, h2_m, h3_m, h4_m, h5_m, h6_m, h7_m; \
+ v8i16 h8_m, h9_m, h10_m, h11_m; \
+ v8i16 k0_m, k1_m, k2_m, k3_m; \
+ \
+ /* stage 1 */ \
+ k0_m = VP9_SET_COSPI_PAIR(cospi_1_64, cospi_31_64); \
+ k1_m = VP9_SET_COSPI_PAIR(cospi_31_64, -cospi_1_64); \
+ k2_m = VP9_SET_COSPI_PAIR(cospi_17_64, cospi_15_64); \
+ k3_m = VP9_SET_COSPI_PAIR(cospi_15_64, -cospi_17_64); \
+ VP9_MADD_BF(r15, r0, r7, r8, k0_m, k1_m, k2_m, k3_m, \
+ g0_m, g1_m, g2_m, g3_m); \
+ k0_m = VP9_SET_COSPI_PAIR(cospi_5_64, cospi_27_64); \
+ k1_m = VP9_SET_COSPI_PAIR(cospi_27_64, -cospi_5_64); \
+ k2_m = VP9_SET_COSPI_PAIR(cospi_21_64, cospi_11_64); \
+ k3_m = VP9_SET_COSPI_PAIR(cospi_11_64, -cospi_21_64); \
+ VP9_MADD_BF(r13, r2, r5, r10, k0_m, k1_m, k2_m, k3_m, \
+ g4_m, g5_m, g6_m, g7_m); \
+ k0_m = VP9_SET_COSPI_PAIR(cospi_9_64, cospi_23_64); \
+ k1_m = VP9_SET_COSPI_PAIR(cospi_23_64, -cospi_9_64); \
+ k2_m = VP9_SET_COSPI_PAIR(cospi_25_64, cospi_7_64); \
+ k3_m = VP9_SET_COSPI_PAIR(cospi_7_64, -cospi_25_64); \
+ VP9_MADD_BF(r11, r4, r3, r12, k0_m, k1_m, k2_m, k3_m, \
+ g8_m, g9_m, g10_m, g11_m); \
+ k0_m = VP9_SET_COSPI_PAIR(cospi_13_64, cospi_19_64); \
+ k1_m = VP9_SET_COSPI_PAIR(cospi_19_64, -cospi_13_64); \
+ k2_m = VP9_SET_COSPI_PAIR(cospi_29_64, cospi_3_64); \
+ k3_m = VP9_SET_COSPI_PAIR(cospi_3_64, -cospi_29_64); \
+ VP9_MADD_BF(r9, r6, r1, r14, k0_m, k1_m, k2_m, k3_m, \
+ g12_m, g13_m, g14_m, g15_m); \
+ \
+ /* stage 2 */ \
+ k0_m = VP9_SET_COSPI_PAIR(cospi_4_64, cospi_28_64); \
+ k1_m = VP9_SET_COSPI_PAIR(cospi_28_64, -cospi_4_64); \
+ k2_m = VP9_SET_COSPI_PAIR(-cospi_28_64, cospi_4_64); \
+ VP9_MADD_BF(g1_m, g3_m, g9_m, g11_m, k0_m, k1_m, k2_m, k0_m, \
+ h0_m, h1_m, h2_m, h3_m); \
+ k0_m = VP9_SET_COSPI_PAIR(cospi_12_64, cospi_20_64); \
+ k1_m = VP9_SET_COSPI_PAIR(-cospi_20_64, cospi_12_64); \
+ k2_m = VP9_SET_COSPI_PAIR(cospi_20_64, -cospi_12_64); \
+ VP9_MADD_BF(g7_m, g5_m, g15_m, g13_m, k0_m, k1_m, k2_m, k0_m, \
+ h4_m, h5_m, h6_m, h7_m); \
+ BUTTERFLY_4(h0_m, h2_m, h6_m, h4_m, out8, out9, out11, out10); \
+ BUTTERFLY_8(g0_m, g2_m, g4_m, g6_m, g14_m, g12_m, g10_m, g8_m, \
+ h8_m, h9_m, h10_m, h11_m, h6_m, h4_m, h2_m, h0_m); \
+ \
+ /* stage 3 */ \
+ BUTTERFLY_4(h8_m, h9_m, h11_m, h10_m, out0, out1, h11_m, h10_m); \
+ k0_m = VP9_SET_COSPI_PAIR(cospi_8_64, cospi_24_64); \
+ k1_m = VP9_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64); \
+ k2_m = VP9_SET_COSPI_PAIR(-cospi_24_64, cospi_8_64); \
+ VP9_MADD_BF(h0_m, h2_m, h4_m, h6_m, k0_m, k1_m, k2_m, k0_m, \
+ out4, out6, out5, out7); \
+ VP9_MADD_BF(h1_m, h3_m, h5_m, h7_m, k0_m, k1_m, k2_m, k0_m, \
+ out12, out14, out13, out15); \
+ \
+ /* stage 4 */ \
+ k0_m = VP9_SET_COSPI_PAIR(cospi_16_64, cospi_16_64); \
+ k1_m = VP9_SET_COSPI_PAIR(-cospi_16_64, -cospi_16_64); \
+ k2_m = VP9_SET_COSPI_PAIR(cospi_16_64, -cospi_16_64); \
+ k3_m = VP9_SET_COSPI_PAIR(-cospi_16_64, cospi_16_64); \
+ VP9_MADD_SHORT(h10_m, h11_m, k1_m, k2_m, out2, out3); \
+ VP9_MADD_SHORT(out6, out7, k0_m, k3_m, out6, out7); \
+ VP9_MADD_SHORT(out10, out11, k0_m, k3_m, out10, out11); \
+ VP9_MADD_SHORT(out14, out15, k1_m, k2_m, out14, out15); \
+}
+#endif /* VP9_COMMON_MIPS_MSA_VP9_IDCT_MSA_H_ */
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_intra_predict_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_intra_predict_msa.c
new file mode 100644
index 00000000000..2fc610505a8
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_intra_predict_msa.c
@@ -0,0 +1,737 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/mips/msa/vp9_macros_msa.h"
+
+#define IPRED_SUBS_UH2_UH(in0, in1, out0, out1) { \
+ out0 = __msa_subs_u_h(out0, in0); \
+ out1 = __msa_subs_u_h(out1, in1); \
+}
+
+static void intra_predict_vert_4x4_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ uint32_t src_data;
+
+ src_data = LW(src);
+
+ SW4(src_data, src_data, src_data, src_data, dst, dst_stride);
+}
+
+static void intra_predict_vert_8x8_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ uint32_t row;
+ uint32_t src_data1, src_data2;
+
+ src_data1 = LW(src);
+ src_data2 = LW(src + 4);
+
+ for (row = 8; row--;) {
+ SW(src_data1, dst);
+ SW(src_data2, (dst + 4));
+ dst += dst_stride;
+ }
+}
+
+static void intra_predict_vert_16x16_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ uint32_t row;
+ v16u8 src0;
+
+ src0 = LD_UB(src);
+
+ for (row = 16; row--;) {
+ ST_UB(src0, dst);
+ dst += dst_stride;
+ }
+}
+
+static void intra_predict_vert_32x32_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ uint32_t row;
+ v16u8 src1, src2;
+
+ src1 = LD_UB(src);
+ src2 = LD_UB(src + 16);
+
+ for (row = 32; row--;) {
+ ST_UB2(src1, src2, dst, 16);
+ dst += dst_stride;
+ }
+}
+
+static void intra_predict_horiz_4x4_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ uint32_t out0, out1, out2, out3;
+
+ out0 = src[0] * 0x01010101;
+ out1 = src[1] * 0x01010101;
+ out2 = src[2] * 0x01010101;
+ out3 = src[3] * 0x01010101;
+
+ SW4(out0, out1, out2, out3, dst, dst_stride);
+}
+
+static void intra_predict_horiz_8x8_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ out0 = src[0] * 0x0101010101010101ull;
+ out1 = src[1] * 0x0101010101010101ull;
+ out2 = src[2] * 0x0101010101010101ull;
+ out3 = src[3] * 0x0101010101010101ull;
+ out4 = src[4] * 0x0101010101010101ull;
+ out5 = src[5] * 0x0101010101010101ull;
+ out6 = src[6] * 0x0101010101010101ull;
+ out7 = src[7] * 0x0101010101010101ull;
+
+ SD4(out0, out1, out2, out3, dst, dst_stride);
+ dst += (4 * dst_stride);
+ SD4(out4, out5, out6, out7, dst, dst_stride);
+}
+
+static void intra_predict_horiz_16x16_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ uint32_t row;
+ uint8_t inp0, inp1, inp2, inp3;
+ v16u8 src0, src1, src2, src3;
+
+ for (row = 4; row--;) {
+ inp0 = src[0];
+ inp1 = src[1];
+ inp2 = src[2];
+ inp3 = src[3];
+ src += 4;
+
+ src0 = (v16u8)__msa_fill_b(inp0);
+ src1 = (v16u8)__msa_fill_b(inp1);
+ src2 = (v16u8)__msa_fill_b(inp2);
+ src3 = (v16u8)__msa_fill_b(inp3);
+
+ ST_UB4(src0, src1, src2, src3, dst, dst_stride);
+ dst += (4 * dst_stride);
+ }
+}
+
+static void intra_predict_horiz_32x32_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ uint32_t row;
+ uint8_t inp0, inp1, inp2, inp3;
+ v16u8 src0, src1, src2, src3;
+
+ for (row = 8; row--;) {
+ inp0 = src[0];
+ inp1 = src[1];
+ inp2 = src[2];
+ inp3 = src[3];
+ src += 4;
+
+ src0 = (v16u8)__msa_fill_b(inp0);
+ src1 = (v16u8)__msa_fill_b(inp1);
+ src2 = (v16u8)__msa_fill_b(inp2);
+ src3 = (v16u8)__msa_fill_b(inp3);
+
+ ST_UB2(src0, src0, dst, 16);
+ dst += dst_stride;
+ ST_UB2(src1, src1, dst, 16);
+ dst += dst_stride;
+ ST_UB2(src2, src2, dst, 16);
+ dst += dst_stride;
+ ST_UB2(src3, src3, dst, 16);
+ dst += dst_stride;
+ }
+}
+
+static void intra_predict_dc_4x4_msa(const uint8_t *src_top,
+ const uint8_t *src_left,
+ uint8_t *dst, int32_t dst_stride) {
+ uint32_t val0, val1;
+ v16i8 store, src = { 0 };
+ v8u16 sum_h;
+ v4u32 sum_w;
+ v2u64 sum_d;
+
+ val0 = LW(src_top);
+ val1 = LW(src_left);
+ INSERT_W2_SB(val0, val1, src);
+ sum_h = __msa_hadd_u_h((v16u8)src, (v16u8)src);
+ sum_w = __msa_hadd_u_w(sum_h, sum_h);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 3);
+ store = __msa_splati_b((v16i8)sum_w, 0);
+ val0 = __msa_copy_u_w((v4i32)store, 0);
+
+ SW4(val0, val0, val0, val0, dst, dst_stride);
+}
+
+static void intra_predict_dc_tl_4x4_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ uint32_t val0;
+ v16i8 store, data = { 0 };
+ v8u16 sum_h;
+ v4u32 sum_w;
+
+ val0 = LW(src);
+ data = (v16i8)__msa_insert_w((v4i32)data, 0, val0);
+ sum_h = __msa_hadd_u_h((v16u8)data, (v16u8)data);
+ sum_w = __msa_hadd_u_w(sum_h, sum_h);
+ sum_w = (v4u32)__msa_srari_w((v4i32)sum_w, 2);
+ store = __msa_splati_b((v16i8)sum_w, 0);
+ val0 = __msa_copy_u_w((v4i32)store, 0);
+
+ SW4(val0, val0, val0, val0, dst, dst_stride);
+}
+
+static void intra_predict_128dc_4x4_msa(uint8_t *dst, int32_t dst_stride) {
+ uint32_t out;
+ const v16i8 store = __msa_ldi_b(128);
+
+ out = __msa_copy_u_w((v4i32)store, 0);
+
+ SW4(out, out, out, out, dst, dst_stride);
+}
+
+static void intra_predict_dc_8x8_msa(const uint8_t *src_top,
+ const uint8_t *src_left,
+ uint8_t *dst, int32_t dst_stride) {
+ uint64_t val0, val1;
+ v16i8 store;
+ v16u8 src = { 0 };
+ v8u16 sum_h;
+ v4u32 sum_w;
+ v2u64 sum_d;
+
+ val0 = LD(src_top);
+ val1 = LD(src_left);
+ INSERT_D2_UB(val0, val1, src);
+ sum_h = __msa_hadd_u_h(src, src);
+ sum_w = __msa_hadd_u_w(sum_h, sum_h);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 4);
+ store = __msa_splati_b((v16i8)sum_w, 0);
+ val0 = __msa_copy_u_d((v2i64)store, 0);
+
+ SD4(val0, val0, val0, val0, dst, dst_stride);
+ dst += (4 * dst_stride);
+ SD4(val0, val0, val0, val0, dst, dst_stride);
+}
+
+static void intra_predict_dc_tl_8x8_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ uint64_t val0;
+ v16i8 store;
+ v16u8 data = { 0 };
+ v8u16 sum_h;
+ v4u32 sum_w;
+ v2u64 sum_d;
+
+ val0 = LD(src);
+ data = (v16u8)__msa_insert_d((v2i64)data, 0, val0);
+ sum_h = __msa_hadd_u_h(data, data);
+ sum_w = __msa_hadd_u_w(sum_h, sum_h);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 3);
+ store = __msa_splati_b((v16i8)sum_w, 0);
+ val0 = __msa_copy_u_d((v2i64)store, 0);
+
+ SD4(val0, val0, val0, val0, dst, dst_stride);
+ dst += (4 * dst_stride);
+ SD4(val0, val0, val0, val0, dst, dst_stride);
+}
+
+static void intra_predict_128dc_8x8_msa(uint8_t *dst, int32_t dst_stride) {
+ uint64_t out;
+ const v16i8 store = __msa_ldi_b(128);
+
+ out = __msa_copy_u_d((v2i64)store, 0);
+
+ SD4(out, out, out, out, dst, dst_stride);
+ dst += (4 * dst_stride);
+ SD4(out, out, out, out, dst, dst_stride);
+}
+
+static void intra_predict_dc_16x16_msa(const uint8_t *src_top,
+ const uint8_t *src_left,
+ uint8_t *dst, int32_t dst_stride) {
+ v16u8 top, left, out;
+ v8u16 sum_h, sum_top, sum_left;
+ v4u32 sum_w;
+ v2u64 sum_d;
+
+ top = LD_UB(src_top);
+ left = LD_UB(src_left);
+ HADD_UB2_UH(top, left, sum_top, sum_left);
+ sum_h = sum_top + sum_left;
+ sum_w = __msa_hadd_u_w(sum_h, sum_h);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 5);
+ out = (v16u8)__msa_splati_b((v16i8)sum_w, 0);
+
+ ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
+ dst += (8 * dst_stride);
+ ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
+}
+
+static void intra_predict_dc_tl_16x16_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ v16u8 data, out;
+ v8u16 sum_h;
+ v4u32 sum_w;
+ v2u64 sum_d;
+
+ data = LD_UB(src);
+ sum_h = __msa_hadd_u_h(data, data);
+ sum_w = __msa_hadd_u_w(sum_h, sum_h);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 4);
+ out = (v16u8)__msa_splati_b((v16i8)sum_w, 0);
+
+ ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
+ dst += (8 * dst_stride);
+ ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
+}
+
+static void intra_predict_128dc_16x16_msa(uint8_t *dst, int32_t dst_stride) {
+ const v16u8 out = (v16u8)__msa_ldi_b(128);
+
+ ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
+ dst += (8 * dst_stride);
+ ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride);
+}
+
+static void intra_predict_dc_32x32_msa(const uint8_t *src_top,
+ const uint8_t *src_left,
+ uint8_t *dst, int32_t dst_stride) {
+ uint32_t row;
+ v16u8 top0, top1, left0, left1, out;
+ v8u16 sum_h, sum_top0, sum_top1, sum_left0, sum_left1;
+ v4u32 sum_w;
+ v2u64 sum_d;
+
+ LD_UB2(src_top, 16, top0, top1);
+ LD_UB2(src_left, 16, left0, left1);
+ HADD_UB2_UH(top0, top1, sum_top0, sum_top1);
+ HADD_UB2_UH(left0, left1, sum_left0, sum_left1);
+ sum_h = sum_top0 + sum_top1;
+ sum_h += sum_left0 + sum_left1;
+ sum_w = __msa_hadd_u_w(sum_h, sum_h);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 6);
+ out = (v16u8)__msa_splati_b((v16i8)sum_w, 0);
+
+ for (row = 16; row--;) {
+ ST_UB2(out, out, dst, 16);
+ dst += dst_stride;
+ ST_UB2(out, out, dst, 16);
+ dst += dst_stride;
+ }
+}
+
+static void intra_predict_dc_tl_32x32_msa(const uint8_t *src, uint8_t *dst,
+ int32_t dst_stride) {
+ uint32_t row;
+ v16u8 data0, data1, out;
+ v8u16 sum_h, sum_data0, sum_data1;
+ v4u32 sum_w;
+ v2u64 sum_d;
+
+ LD_UB2(src, 16, data0, data1);
+ HADD_UB2_UH(data0, data1, sum_data0, sum_data1);
+ sum_h = sum_data0 + sum_data1;
+ sum_w = __msa_hadd_u_w(sum_h, sum_h);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d);
+ sum_d = __msa_hadd_u_d(sum_w, sum_w);
+ sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 5);
+ out = (v16u8)__msa_splati_b((v16i8)sum_w, 0);
+
+ for (row = 16; row--;) {
+ ST_UB2(out, out, dst, 16);
+ dst += dst_stride;
+ ST_UB2(out, out, dst, 16);
+ dst += dst_stride;
+ }
+}
+
+static void intra_predict_128dc_32x32_msa(uint8_t *dst, int32_t dst_stride) {
+ uint32_t row;
+ const v16u8 out = (v16u8)__msa_ldi_b(128);
+
+ for (row = 16; row--;) {
+ ST_UB2(out, out, dst, 16);
+ dst += dst_stride;
+ ST_UB2(out, out, dst, 16);
+ dst += dst_stride;
+ }
+}
+
+static void intra_predict_tm_4x4_msa(const uint8_t *src_top_ptr,
+ const uint8_t *src_left,
+ uint8_t *dst, int32_t dst_stride) {
+ uint32_t val;
+ uint8_t top_left = src_top_ptr[-1];
+ v16i8 src_left0, src_left1, src_left2, src_left3, tmp0, tmp1, src_top = { 0 };
+ v16u8 src0, src1, src2, src3;
+ v8u16 src_top_left, vec0, vec1, vec2, vec3;
+
+ src_top_left = (v8u16)__msa_fill_h(top_left);
+ val = LW(src_top_ptr);
+ src_top = (v16i8)__msa_insert_w((v4i32)src_top, 0, val);
+
+ src_left0 = __msa_fill_b(src_left[0]);
+ src_left1 = __msa_fill_b(src_left[1]);
+ src_left2 = __msa_fill_b(src_left[2]);
+ src_left3 = __msa_fill_b(src_left[3]);
+
+ ILVR_B4_UB(src_left0, src_top, src_left1, src_top, src_left2, src_top,
+ src_left3, src_top, src0, src1, src2, src3);
+ HADD_UB4_UH(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec0, vec1);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec2, vec3);
+ SAT_UH4_UH(vec0, vec1, vec2, vec3, 7);
+ PCKEV_B2_SB(vec1, vec0, vec3, vec2, tmp0, tmp1);
+ ST4x4_UB(tmp0, tmp1, 0, 2, 0, 2, dst, dst_stride);
+}
+
+static void intra_predict_tm_8x8_msa(const uint8_t *src_top_ptr,
+ const uint8_t *src_left,
+ uint8_t *dst, int32_t dst_stride) {
+ uint64_t val;
+ uint8_t top_left = src_top_ptr[-1];
+ uint32_t loop_cnt;
+ v16i8 src_left0, src_left1, src_left2, src_left3, tmp0, tmp1, src_top = { 0 };
+ v8u16 src_top_left, vec0, vec1, vec2, vec3;
+ v16u8 src0, src1, src2, src3;
+
+ val = LD(src_top_ptr);
+ src_top = (v16i8)__msa_insert_d((v2i64)src_top, 0, val);
+ src_top_left = (v8u16)__msa_fill_h(top_left);
+
+ for (loop_cnt = 2; loop_cnt--;) {
+ src_left0 = __msa_fill_b(src_left[0]);
+ src_left1 = __msa_fill_b(src_left[1]);
+ src_left2 = __msa_fill_b(src_left[2]);
+ src_left3 = __msa_fill_b(src_left[3]);
+ src_left += 4;
+
+ ILVR_B4_UB(src_left0, src_top, src_left1, src_top, src_left2, src_top,
+ src_left3, src_top, src0, src1, src2, src3);
+ HADD_UB4_UH(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec0, vec1);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec2, vec3);
+ SAT_UH4_UH(vec0, vec1, vec2, vec3, 7);
+ PCKEV_B2_SB(vec1, vec0, vec3, vec2, tmp0, tmp1);
+ ST8x4_UB(tmp0, tmp1, dst, dst_stride);
+ dst += (4 * dst_stride);
+ }
+}
+
+static void intra_predict_tm_16x16_msa(const uint8_t *src_top_ptr,
+ const uint8_t *src_left,
+ uint8_t *dst, int32_t dst_stride) {
+ uint8_t top_left = src_top_ptr[-1];
+ uint32_t loop_cnt;
+ v16i8 src_top, src_left0, src_left1, src_left2, src_left3;
+ v8u16 src_top_left, res_r, res_l;
+
+ src_top = LD_SB(src_top_ptr);
+ src_top_left = (v8u16)__msa_fill_h(top_left);
+
+ for (loop_cnt = 4; loop_cnt--;) {
+ src_left0 = __msa_fill_b(src_left[0]);
+ src_left1 = __msa_fill_b(src_left[1]);
+ src_left2 = __msa_fill_b(src_left[2]);
+ src_left3 = __msa_fill_b(src_left[3]);
+ src_left += 4;
+
+ ILVRL_B2_UH(src_left0, src_top, res_r, res_l);
+ HADD_UB2_UH(res_r, res_l, res_r, res_l);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l);
+
+ SAT_UH2_UH(res_r, res_l, 7);
+ PCKEV_ST_SB(res_r, res_l, dst);
+ dst += dst_stride;
+
+ ILVRL_B2_UH(src_left1, src_top, res_r, res_l);
+ HADD_UB2_UH(res_r, res_l, res_r, res_l);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l);
+ SAT_UH2_UH(res_r, res_l, 7);
+ PCKEV_ST_SB(res_r, res_l, dst);
+ dst += dst_stride;
+
+ ILVRL_B2_UH(src_left2, src_top, res_r, res_l);
+ HADD_UB2_UH(res_r, res_l, res_r, res_l);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l);
+ SAT_UH2_UH(res_r, res_l, 7);
+ PCKEV_ST_SB(res_r, res_l, dst);
+ dst += dst_stride;
+
+ ILVRL_B2_UH(src_left3, src_top, res_r, res_l);
+ HADD_UB2_UH(res_r, res_l, res_r, res_l);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l);
+ SAT_UH2_UH(res_r, res_l, 7);
+ PCKEV_ST_SB(res_r, res_l, dst);
+ dst += dst_stride;
+ }
+}
+
+static void intra_predict_tm_32x32_msa(const uint8_t *src_top,
+ const uint8_t *src_left,
+ uint8_t *dst, int32_t dst_stride) {
+ uint8_t top_left = src_top[-1];
+ uint32_t loop_cnt;
+ v16i8 src_top0, src_top1, src_left0, src_left1, src_left2, src_left3;
+ v8u16 src_top_left, res_r0, res_r1, res_l0, res_l1;
+
+ LD_SB2(src_top, 16, src_top0, src_top1);
+ src_top_left = (v8u16)__msa_fill_h(top_left);
+
+ for (loop_cnt = 8; loop_cnt--;) {
+ src_left0 = __msa_fill_b(src_left[0]);
+ src_left1 = __msa_fill_b(src_left[1]);
+ src_left2 = __msa_fill_b(src_left[2]);
+ src_left3 = __msa_fill_b(src_left[3]);
+ src_left += 4;
+
+ ILVR_B2_UH(src_left0, src_top0, src_left0, src_top1, res_r0, res_r1);
+ ILVL_B2_UH(src_left0, src_top0, src_left0, src_top1, res_l0, res_l1);
+ HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1);
+ SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7);
+ PCKEV_ST_SB(res_r0, res_l0, dst);
+ PCKEV_ST_SB(res_r1, res_l1, dst + 16);
+ dst += dst_stride;
+
+ ILVR_B2_UH(src_left1, src_top0, src_left1, src_top1, res_r0, res_r1);
+ ILVL_B2_UH(src_left1, src_top0, src_left1, src_top1, res_l0, res_l1);
+ HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1);
+ SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7);
+ PCKEV_ST_SB(res_r0, res_l0, dst);
+ PCKEV_ST_SB(res_r1, res_l1, dst + 16);
+ dst += dst_stride;
+
+ ILVR_B2_UH(src_left2, src_top0, src_left2, src_top1, res_r0, res_r1);
+ ILVL_B2_UH(src_left2, src_top0, src_left2, src_top1, res_l0, res_l1);
+ HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1);
+ SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7);
+ PCKEV_ST_SB(res_r0, res_l0, dst);
+ PCKEV_ST_SB(res_r1, res_l1, dst + 16);
+ dst += dst_stride;
+
+ ILVR_B2_UH(src_left3, src_top0, src_left3, src_top1, res_r0, res_r1);
+ ILVL_B2_UH(src_left3, src_top0, src_left3, src_top1, res_l0, res_l1);
+ HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0);
+ IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1);
+ SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7);
+ PCKEV_ST_SB(res_r0, res_l0, dst);
+ PCKEV_ST_SB(res_r1, res_l1, dst + 16);
+ dst += dst_stride;
+ }
+}
+
+void vp9_v_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)left;
+
+ intra_predict_vert_4x4_msa(above, dst, y_stride);
+}
+
+void vp9_v_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)left;
+
+ intra_predict_vert_8x8_msa(above, dst, y_stride);
+}
+
+void vp9_v_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)left;
+
+ intra_predict_vert_16x16_msa(above, dst, y_stride);
+}
+
+void vp9_v_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)left;
+
+ intra_predict_vert_32x32_msa(above, dst, y_stride);
+}
+
+void vp9_h_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+
+ intra_predict_horiz_4x4_msa(left, dst, y_stride);
+}
+
+void vp9_h_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+
+ intra_predict_horiz_8x8_msa(left, dst, y_stride);
+}
+
+void vp9_h_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+
+ intra_predict_horiz_16x16_msa(left, dst, y_stride);
+}
+
+void vp9_h_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+
+ intra_predict_horiz_32x32_msa(left, dst, y_stride);
+}
+
+void vp9_dc_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ intra_predict_dc_4x4_msa(above, left, dst, y_stride);
+}
+
+void vp9_dc_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ intra_predict_dc_8x8_msa(above, left, dst, y_stride);
+}
+
+void vp9_dc_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ intra_predict_dc_16x16_msa(above, left, dst, y_stride);
+}
+
+void vp9_dc_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ intra_predict_dc_32x32_msa(above, left, dst, y_stride);
+}
+
+void vp9_dc_top_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)left;
+
+ intra_predict_dc_tl_4x4_msa(above, dst, y_stride);
+}
+
+void vp9_dc_top_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)left;
+
+ intra_predict_dc_tl_8x8_msa(above, dst, y_stride);
+}
+
+void vp9_dc_top_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)left;
+
+ intra_predict_dc_tl_16x16_msa(above, dst, y_stride);
+}
+
+void vp9_dc_top_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)left;
+
+ intra_predict_dc_tl_32x32_msa(above, dst, y_stride);
+}
+
+void vp9_dc_left_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+
+ intra_predict_dc_tl_4x4_msa(left, dst, y_stride);
+}
+
+void vp9_dc_left_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+
+ intra_predict_dc_tl_8x8_msa(left, dst, y_stride);
+}
+
+void vp9_dc_left_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above,
+ const uint8_t *left) {
+ (void)above;
+
+ intra_predict_dc_tl_16x16_msa(left, dst, y_stride);
+}
+
+void vp9_dc_left_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above,
+ const uint8_t *left) {
+ (void)above;
+
+ intra_predict_dc_tl_32x32_msa(left, dst, y_stride);
+}
+
+void vp9_dc_128_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+ (void)left;
+
+ intra_predict_128dc_4x4_msa(dst, y_stride);
+}
+
+void vp9_dc_128_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+ (void)left;
+
+ intra_predict_128dc_8x8_msa(dst, y_stride);
+}
+
+void vp9_dc_128_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+ (void)left;
+
+ intra_predict_128dc_16x16_msa(dst, y_stride);
+}
+
+void vp9_dc_128_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ (void)above;
+ (void)left;
+
+ intra_predict_128dc_32x32_msa(dst, y_stride);
+}
+
+void vp9_tm_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ intra_predict_tm_4x4_msa(above, left, dst, y_stride);
+}
+
+void vp9_tm_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ intra_predict_tm_8x8_msa(above, left, dst, y_stride);
+}
+
+void vp9_tm_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ intra_predict_tm_16x16_msa(above, left, dst, y_stride);
+}
+
+void vp9_tm_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride,
+ const uint8_t *above, const uint8_t *left) {
+ intra_predict_tm_32x32_msa(above, left, dst, y_stride);
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_loopfilter_16_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_loopfilter_16_msa.c
new file mode 100644
index 00000000000..aeaa48e4e60
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_loopfilter_16_msa.c
@@ -0,0 +1,1480 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_ports/mem.h"
+#include "vp9/common/mips/msa/vp9_loopfilter_msa.h"
+
+int32_t vp9_hz_lpf_t4_and_t8_16w(uint8_t *src, int32_t pitch,
+ uint8_t *filter48,
+ const uint8_t *b_limit_ptr,
+ const uint8_t *limit_ptr,
+ const uint8_t *thresh_ptr) {
+ v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+ v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;
+ v16u8 flat, mask, hev, thresh, b_limit, limit;
+ v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
+ v8u16 p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l;
+ v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r;
+ v8i16 p2_filt8_l, p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l;
+ v16u8 zero = { 0 };
+
+ /* load vector elements */
+ LD_UB8(src - (4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
+
+ thresh = (v16u8)__msa_fill_b(*thresh_ptr);
+ b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
+ limit = (v16u8)__msa_fill_b(*limit_ptr);
+
+ /* mask and hev */
+ LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+ hev, mask, flat);
+ VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
+ VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
+
+ if (__msa_test_bz_v(flat)) {
+ ST_UB4(p1_out, p0_out, q0_out, q1_out, (src - 2 * pitch), pitch);
+
+ return 1;
+ } else {
+ ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
+ zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r,
+ q2_r, q3_r);
+ VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
+ p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
+
+ ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l, p2_l, p1_l, p0_l);
+ ILVL_B4_UH(zero, q0, zero, q1, zero, q2, zero, q3, q0_l, q1_l, q2_l, q3_l);
+ VP9_FILTER8(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, p2_filt8_l,
+ p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l);
+
+ /* convert 16 bit output data into 8 bit */
+ PCKEV_B4_SH(p2_filt8_l, p2_filt8_r, p1_filt8_l, p1_filt8_r, p0_filt8_l,
+ p0_filt8_r, q0_filt8_l, q0_filt8_r, p2_filt8_r, p1_filt8_r,
+ p0_filt8_r, q0_filt8_r);
+ PCKEV_B2_SH(q1_filt8_l, q1_filt8_r, q2_filt8_l, q2_filt8_r, q1_filt8_r,
+ q2_filt8_r);
+
+ /* store pixel values */
+ p2_out = __msa_bmnz_v(p2, (v16u8)p2_filt8_r, flat);
+ p1_out = __msa_bmnz_v(p1_out, (v16u8)p1_filt8_r, flat);
+ p0_out = __msa_bmnz_v(p0_out, (v16u8)p0_filt8_r, flat);
+ q0_out = __msa_bmnz_v(q0_out, (v16u8)q0_filt8_r, flat);
+ q1_out = __msa_bmnz_v(q1_out, (v16u8)q1_filt8_r, flat);
+ q2_out = __msa_bmnz_v(q2, (v16u8)q2_filt8_r, flat);
+
+ ST_UB4(p2_out, p1_out, p0_out, q0_out, filter48, 16);
+ filter48 += (4 * 16);
+ ST_UB2(q1_out, q2_out, filter48, 16);
+ filter48 += (2 * 16);
+ ST_UB(flat, filter48);
+
+ return 0;
+ }
+}
+
+void vp9_hz_lpf_t16_16w(uint8_t *src, int32_t pitch, uint8_t *filter48) {
+ v16u8 flat, flat2, filter8;
+ v16i8 zero = { 0 };
+ v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
+ v8u16 p7_r_in, p6_r_in, p5_r_in, p4_r_in, p3_r_in, p2_r_in, p1_r_in, p0_r_in;
+ v8u16 q7_r_in, q6_r_in, q5_r_in, q4_r_in, q3_r_in, q2_r_in, q1_r_in, q0_r_in;
+ v8u16 p7_l_in, p6_l_in, p5_l_in, p4_l_in, p3_l_in, p2_l_in, p1_l_in, p0_l_in;
+ v8u16 q7_l_in, q6_l_in, q5_l_in, q4_l_in, q3_l_in, q2_l_in, q1_l_in, q0_l_in;
+ v8u16 tmp0_r, tmp1_r, tmp0_l, tmp1_l;
+ v8i16 l_out, r_out;
+
+ flat = LD_UB(filter48 + 96);
+
+ LD_UB8((src - 8 * pitch), pitch, p7, p6, p5, p4, p3, p2, p1, p0);
+ LD_UB8(src, pitch, q0, q1, q2, q3, q4, q5, q6, q7);
+ VP9_FLAT5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, flat2);
+
+ if (__msa_test_bz_v(flat2)) {
+ LD_UB4(filter48, 16, p2, p1, p0, q0);
+ LD_UB2(filter48 + 4 * 16, 16, q1, q2);
+
+ src -= 3 * pitch;
+ ST_UB4(p2, p1, p0, q0, src, pitch);
+ src += (4 * pitch);
+ ST_UB2(q1, q2, src, pitch);
+ } else {
+ src -= 7 * pitch;
+
+ ILVR_B8_UH(zero, p7, zero, p6, zero, p5, zero, p4, zero, p3, zero, p2,
+ zero, p1, zero, p0, p7_r_in, p6_r_in, p5_r_in, p4_r_in, p3_r_in,
+ p2_r_in, p1_r_in, p0_r_in);
+
+ q0_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q0);
+
+ tmp0_r = p7_r_in << 3;
+ tmp0_r -= p7_r_in;
+ tmp0_r += p6_r_in;
+ tmp0_r += q0_r_in;
+ tmp1_r = p6_r_in + p5_r_in;
+ tmp1_r += p4_r_in;
+ tmp1_r += p3_r_in;
+ tmp1_r += p2_r_in;
+ tmp1_r += p1_r_in;
+ tmp1_r += p0_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+
+ ILVL_B4_UH(zero, p7, zero, p6, zero, p5, zero, p4, p7_l_in, p6_l_in,
+ p5_l_in, p4_l_in);
+ ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l_in, p2_l_in,
+ p1_l_in, p0_l_in);
+ q0_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q0);
+
+ tmp0_l = p7_l_in << 3;
+ tmp0_l -= p7_l_in;
+ tmp0_l += p6_l_in;
+ tmp0_l += q0_l_in;
+ tmp1_l = p6_l_in + p5_l_in;
+ tmp1_l += p4_l_in;
+ tmp1_l += p3_l_in;
+ tmp1_l += p2_l_in;
+ tmp1_l += p1_l_in;
+ tmp1_l += p0_l_in;
+ tmp1_l += tmp0_l;
+ l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+
+ r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+ p6 = __msa_bmnz_v(p6, (v16u8)r_out, flat2);
+ ST_UB(p6, src);
+ src += pitch;
+
+ /* p5 */
+ q1_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q1);
+ tmp0_r = p5_r_in - p6_r_in;
+ tmp0_r += q1_r_in;
+ tmp0_r -= p7_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+
+ q1_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q1);
+ tmp0_l = p5_l_in - p6_l_in;
+ tmp0_l += q1_l_in;
+ tmp0_l -= p7_l_in;
+ tmp1_l += tmp0_l;
+ l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+
+ r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+ p5 = __msa_bmnz_v(p5, (v16u8)r_out, flat2);
+ ST_UB(p5, src);
+ src += pitch;
+
+ /* p4 */
+ q2_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q2);
+ tmp0_r = p4_r_in - p5_r_in;
+ tmp0_r += q2_r_in;
+ tmp0_r -= p7_r_in;
+ tmp1_r += tmp0_r;
+ r_out = (v8i16)__msa_srari_h((v8i16)tmp1_r, 4);
+
+ q2_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q2);
+ tmp0_l = p4_l_in - p5_l_in;
+ tmp0_l += q2_l_in;
+ tmp0_l -= p7_l_in;
+ tmp1_l += tmp0_l;
+ l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+
+ r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+ p4 = __msa_bmnz_v(p4, (v16u8)r_out, flat2);
+ ST_UB(p4, src);
+ src += pitch;
+
+ /* p3 */
+ q3_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q3);
+ tmp0_r = p3_r_in - p4_r_in;
+ tmp0_r += q3_r_in;
+ tmp0_r -= p7_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+
+ q3_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q3);
+ tmp0_l = p3_l_in - p4_l_in;
+ tmp0_l += q3_l_in;
+ tmp0_l -= p7_l_in;
+ tmp1_l += tmp0_l;
+ l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+
+ r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+ p3 = __msa_bmnz_v(p3, (v16u8)r_out, flat2);
+ ST_UB(p3, src);
+ src += pitch;
+
+ /* p2 */
+ q4_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q4);
+ filter8 = LD_UB(filter48);
+ tmp0_r = p2_r_in - p3_r_in;
+ tmp0_r += q4_r_in;
+ tmp0_r -= p7_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+
+ q4_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q4);
+ tmp0_l = p2_l_in - p3_l_in;
+ tmp0_l += q4_l_in;
+ tmp0_l -= p7_l_in;
+ tmp1_l += tmp0_l;
+ l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+
+ r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+ filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+ ST_UB(filter8, src);
+ src += pitch;
+
+ /* p1 */
+ q5_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q5);
+ filter8 = LD_UB(filter48 + 16);
+ tmp0_r = p1_r_in - p2_r_in;
+ tmp0_r += q5_r_in;
+ tmp0_r -= p7_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+
+ q5_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q5);
+ tmp0_l = p1_l_in - p2_l_in;
+ tmp0_l += q5_l_in;
+ tmp0_l -= p7_l_in;
+ tmp1_l += tmp0_l;
+ l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+
+ r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+ filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+ ST_UB(filter8, src);
+ src += pitch;
+
+ /* p0 */
+ q6_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q6);
+ filter8 = LD_UB(filter48 + 32);
+ tmp0_r = p0_r_in - p1_r_in;
+ tmp0_r += q6_r_in;
+ tmp0_r -= p7_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+
+ q6_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q6);
+ tmp0_l = p0_l_in - p1_l_in;
+ tmp0_l += q6_l_in;
+ tmp0_l -= p7_l_in;
+ tmp1_l += tmp0_l;
+ l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+
+ r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+ filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+ ST_UB(filter8, src);
+ src += pitch;
+
+ /* q0 */
+ q7_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q7);
+ filter8 = LD_UB(filter48 + 48);
+ tmp0_r = q7_r_in - p0_r_in;
+ tmp0_r += q0_r_in;
+ tmp0_r -= p7_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+
+ q7_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q7);
+ tmp0_l = q7_l_in - p0_l_in;
+ tmp0_l += q0_l_in;
+ tmp0_l -= p7_l_in;
+ tmp1_l += tmp0_l;
+ l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+
+ r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+ filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+ ST_UB(filter8, src);
+ src += pitch;
+
+ /* q1 */
+ filter8 = LD_UB(filter48 + 64);
+ tmp0_r = q7_r_in - q0_r_in;
+ tmp0_r += q1_r_in;
+ tmp0_r -= p6_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+
+ tmp0_l = q7_l_in - q0_l_in;
+ tmp0_l += q1_l_in;
+ tmp0_l -= p6_l_in;
+ tmp1_l += tmp0_l;
+ l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+
+ r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+ filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+ ST_UB(filter8, src);
+ src += pitch;
+
+ /* q2 */
+ filter8 = LD_UB(filter48 + 80);
+ tmp0_r = q7_r_in - q1_r_in;
+ tmp0_r += q2_r_in;
+ tmp0_r -= p5_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+
+ tmp0_l = q7_l_in - q1_l_in;
+ tmp0_l += q2_l_in;
+ tmp0_l -= p5_l_in;
+ tmp1_l += tmp0_l;
+ l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+
+ r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+ filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+ ST_UB(filter8, src);
+ src += pitch;
+
+ /* q3 */
+ tmp0_r = q7_r_in - q2_r_in;
+ tmp0_r += q3_r_in;
+ tmp0_r -= p4_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+
+ tmp0_l = q7_l_in - q2_l_in;
+ tmp0_l += q3_l_in;
+ tmp0_l -= p4_l_in;
+ tmp1_l += tmp0_l;
+ l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+
+ r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+ q3 = __msa_bmnz_v(q3, (v16u8)r_out, flat2);
+ ST_UB(q3, src);
+ src += pitch;
+
+ /* q4 */
+ tmp0_r = q7_r_in - q3_r_in;
+ tmp0_r += q4_r_in;
+ tmp0_r -= p3_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+
+ tmp0_l = q7_l_in - q3_l_in;
+ tmp0_l += q4_l_in;
+ tmp0_l -= p3_l_in;
+ tmp1_l += tmp0_l;
+ l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+
+ r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+ q4 = __msa_bmnz_v(q4, (v16u8)r_out, flat2);
+ ST_UB(q4, src);
+ src += pitch;
+
+ /* q5 */
+ tmp0_r = q7_r_in - q4_r_in;
+ tmp0_r += q5_r_in;
+ tmp0_r -= p2_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+
+ tmp0_l = q7_l_in - q4_l_in;
+ tmp0_l += q5_l_in;
+ tmp0_l -= p2_l_in;
+ tmp1_l += tmp0_l;
+ l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+
+ r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+ q5 = __msa_bmnz_v(q5, (v16u8)r_out, flat2);
+ ST_UB(q5, src);
+ src += pitch;
+
+ /* q6 */
+ tmp0_r = q7_r_in - q5_r_in;
+ tmp0_r += q6_r_in;
+ tmp0_r -= p1_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+
+ tmp0_l = q7_l_in - q5_l_in;
+ tmp0_l += q6_l_in;
+ tmp0_l -= p1_l_in;
+ tmp1_l += tmp0_l;
+ l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+
+ r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+ q6 = __msa_bmnz_v(q6, (v16u8)r_out, flat2);
+ ST_UB(q6, src);
+ }
+}
+
+void vp9_lpf_horizontal_16_dual_msa(uint8_t *src, int32_t pitch,
+ const uint8_t *b_limit_ptr,
+ const uint8_t *limit_ptr,
+ const uint8_t *thresh_ptr,
+ int32_t count) {
+ DECLARE_ALIGNED(32, uint8_t, filter48[16 * 8]);
+ uint8_t early_exit = 0;
+
+ (void)count;
+
+ early_exit = vp9_hz_lpf_t4_and_t8_16w(src, pitch, &filter48[0], b_limit_ptr,
+ limit_ptr, thresh_ptr);
+
+ if (0 == early_exit) {
+ vp9_hz_lpf_t16_16w(src, pitch, filter48);
+ }
+}
+
+void vp9_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch,
+ const uint8_t *b_limit_ptr,
+ const uint8_t *limit_ptr,
+ const uint8_t *thresh_ptr,
+ int32_t count) {
+ if (1 == count) {
+ uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d;
+ uint64_t dword0, dword1;
+ v16u8 flat2, mask, hev, flat, thresh, b_limit, limit;
+ v16u8 p3, p2, p1, p0, q3, q2, q1, q0, p7, p6, p5, p4, q4, q5, q6, q7;
+ v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;
+ v16u8 p0_filter16, p1_filter16;
+ v8i16 p2_filter8, p1_filter8, p0_filter8;
+ v8i16 q0_filter8, q1_filter8, q2_filter8;
+ v8u16 p7_r, p6_r, p5_r, p4_r, q7_r, q6_r, q5_r, q4_r;
+ v8u16 p3_r, p2_r, p1_r, p0_r, q3_r, q2_r, q1_r, q0_r;
+ v16i8 zero = { 0 };
+ v8u16 tmp0, tmp1, tmp2;
+
+ /* load vector elements */
+ LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
+
+ thresh = (v16u8)__msa_fill_b(*thresh_ptr);
+ b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
+ limit = (v16u8)__msa_fill_b(*limit_ptr);
+
+ LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+ hev, mask, flat);
+ VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
+ VP9_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out,
+ q1_out);
+
+ flat = (v16u8)__msa_ilvr_d((v2i64)zero, (v2i64)flat);
+
+ if (__msa_test_bz_v(flat)) {
+ p1_d = __msa_copy_u_d((v2i64)p1_out, 0);
+ p0_d = __msa_copy_u_d((v2i64)p0_out, 0);
+ q0_d = __msa_copy_u_d((v2i64)q0_out, 0);
+ q1_d = __msa_copy_u_d((v2i64)q1_out, 0);
+ SD4(p1_d, p0_d, q0_d, q1_d, src - 2 * pitch, pitch);
+ } else {
+ /* convert 8 bit input data into 16 bit */
+ ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
+ zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r,
+ q3_r);
+ VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filter8,
+ p1_filter8, p0_filter8, q0_filter8, q1_filter8, q2_filter8);
+
+ /* convert 16 bit output data into 8 bit */
+ PCKEV_B4_SH(zero, p2_filter8, zero, p1_filter8, zero, p0_filter8,
+ zero, q0_filter8, p2_filter8, p1_filter8, p0_filter8,
+ q0_filter8);
+ PCKEV_B2_SH(zero, q1_filter8, zero, q2_filter8, q1_filter8, q2_filter8);
+
+ /* store pixel values */
+ p2_out = __msa_bmnz_v(p2, (v16u8)p2_filter8, flat);
+ p1_out = __msa_bmnz_v(p1_out, (v16u8)p1_filter8, flat);
+ p0_out = __msa_bmnz_v(p0_out, (v16u8)p0_filter8, flat);
+ q0_out = __msa_bmnz_v(q0_out, (v16u8)q0_filter8, flat);
+ q1_out = __msa_bmnz_v(q1_out, (v16u8)q1_filter8, flat);
+ q2_out = __msa_bmnz_v(q2, (v16u8)q2_filter8, flat);
+
+ /* load 16 vector elements */
+ LD_UB4((src - 8 * pitch), pitch, p7, p6, p5, p4);
+ LD_UB4(src + (4 * pitch), pitch, q4, q5, q6, q7);
+
+ VP9_FLAT5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, flat2);
+
+ if (__msa_test_bz_v(flat2)) {
+ p2_d = __msa_copy_u_d((v2i64)p2_out, 0);
+ p1_d = __msa_copy_u_d((v2i64)p1_out, 0);
+ p0_d = __msa_copy_u_d((v2i64)p0_out, 0);
+ q0_d = __msa_copy_u_d((v2i64)q0_out, 0);
+ q1_d = __msa_copy_u_d((v2i64)q1_out, 0);
+ q2_d = __msa_copy_u_d((v2i64)q2_out, 0);
+
+ SD4(p2_d, p1_d, p0_d, q0_d, src - 3 * pitch, pitch);
+ SD(q1_d, src + pitch);
+ SD(q2_d, src + 2 * pitch);
+ } else {
+ /* LSB(right) 8 pixel operation */
+ ILVR_B8_UH(zero, p7, zero, p6, zero, p5, zero, p4, zero, q4, zero, q5,
+ zero, q6, zero, q7, p7_r, p6_r, p5_r, p4_r, q4_r, q5_r, q6_r,
+ q7_r);
+
+ tmp0 = p7_r << 3;
+ tmp0 -= p7_r;
+ tmp0 += p6_r;
+ tmp0 += q0_r;
+
+ src -= 7 * pitch;
+
+ /* calculation of p6 and p5 */
+ tmp1 = p6_r + p5_r + p4_r + p3_r;
+ tmp1 += (p2_r + p1_r + p0_r);
+ tmp1 += tmp0;
+ p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
+ tmp0 = p5_r - p6_r + q1_r - p7_r;
+ tmp1 += tmp0;
+ p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
+ PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16,
+ p1_filter16);
+ p0_filter16 = __msa_bmnz_v(p6, p0_filter16, flat2);
+ p1_filter16 = __msa_bmnz_v(p5, p1_filter16, flat2);
+ dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0);
+ dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0);
+ SD(dword0, src);
+ src += pitch;
+ SD(dword1, src);
+ src += pitch;
+
+ /* calculation of p4 and p3 */
+ tmp0 = p4_r - p5_r + q2_r - p7_r;
+ tmp2 = p3_r - p4_r + q3_r - p7_r;
+ tmp1 += tmp0;
+ p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
+ tmp1 += tmp2;
+ p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
+ PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16,
+ p1_filter16);
+ p0_filter16 = __msa_bmnz_v(p4, p0_filter16, flat2);
+ p1_filter16 = __msa_bmnz_v(p3, p1_filter16, flat2);
+ dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0);
+ dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0);
+ SD(dword0, src);
+ src += pitch;
+ SD(dword1, src);
+ src += pitch;
+
+ /* calculation of p2 and p1 */
+ tmp0 = p2_r - p3_r + q4_r - p7_r;
+ tmp2 = p1_r - p2_r + q5_r - p7_r;
+ tmp1 += tmp0;
+ p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
+ tmp1 += tmp2;
+ p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
+ PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16,
+ p1_filter16);
+ p0_filter16 = __msa_bmnz_v(p2_out, p0_filter16, flat2);
+ p1_filter16 = __msa_bmnz_v(p1_out, p1_filter16, flat2);
+ dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0);
+ dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0);
+ SD(dword0, src);
+ src += pitch;
+ SD(dword1, src);
+ src += pitch;
+
+ /* calculation of p0 and q0 */
+ tmp0 = (p0_r - p1_r) + (q6_r - p7_r);
+ tmp2 = (q7_r - p0_r) + (q0_r - p7_r);
+ tmp1 += tmp0;
+ p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
+ tmp1 += tmp2;
+ p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
+ PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16,
+ p1_filter16);
+ p0_filter16 = __msa_bmnz_v(p0_out, p0_filter16, flat2);
+ p1_filter16 = __msa_bmnz_v(q0_out, p1_filter16, flat2);
+ dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0);
+ dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0);
+ SD(dword0, src);
+ src += pitch;
+ SD(dword1, src);
+ src += pitch;
+
+ /* calculation of q1 and q2 */
+ tmp0 = q7_r - q0_r + q1_r - p6_r;
+ tmp2 = q7_r - q1_r + q2_r - p5_r;
+ tmp1 += tmp0;
+ p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
+ tmp1 += tmp2;
+ p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
+ PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16,
+ p1_filter16);
+ p0_filter16 = __msa_bmnz_v(q1_out, p0_filter16, flat2);
+ p1_filter16 = __msa_bmnz_v(q2_out, p1_filter16, flat2);
+ dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0);
+ dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0);
+ SD(dword0, src);
+ src += pitch;
+ SD(dword1, src);
+ src += pitch;
+
+ /* calculation of q3 and q4 */
+ tmp0 = (q7_r - q2_r) + (q3_r - p4_r);
+ tmp2 = (q7_r - q3_r) + (q4_r - p3_r);
+ tmp1 += tmp0;
+ p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
+ tmp1 += tmp2;
+ p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
+ PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16,
+ p1_filter16);
+ p0_filter16 = __msa_bmnz_v(q3, p0_filter16, flat2);
+ p1_filter16 = __msa_bmnz_v(q4, p1_filter16, flat2);
+ dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0);
+ dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0);
+ SD(dword0, src);
+ src += pitch;
+ SD(dword1, src);
+ src += pitch;
+
+ /* calculation of q5 and q6 */
+ tmp0 = (q7_r - q4_r) + (q5_r - p2_r);
+ tmp2 = (q7_r - q5_r) + (q6_r - p1_r);
+ tmp1 += tmp0;
+ p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
+ tmp1 += tmp2;
+ p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4);
+ PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16,
+ p1_filter16);
+ p0_filter16 = __msa_bmnz_v(q5, p0_filter16, flat2);
+ p1_filter16 = __msa_bmnz_v(q6, p1_filter16, flat2);
+ dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0);
+ dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0);
+ SD(dword0, src);
+ src += pitch;
+ SD(dword1, src);
+ }
+ }
+ } else {
+ vp9_lpf_horizontal_16_dual_msa(src, pitch, b_limit_ptr, limit_ptr,
+ thresh_ptr, count);
+ }
+}
+
+static void vp9_transpose_16x8_to_8x16(uint8_t *input, int32_t in_pitch,
+ uint8_t *output, int32_t out_pitch) {
+ v16u8 p7_org, p6_org, p5_org, p4_org, p3_org, p2_org, p1_org, p0_org;
+ v16i8 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
+
+ LD_UB8(input, in_pitch,
+ p7_org, p6_org, p5_org, p4_org, p3_org, p2_org, p1_org, p0_org);
+ /* 8x8 transpose */
+ TRANSPOSE8x8_UB_UB(p7_org, p6_org, p5_org, p4_org, p3_org, p2_org, p1_org,
+ p0_org, p7, p6, p5, p4, p3, p2, p1, p0);
+ /* 8x8 transpose */
+ ILVL_B4_SB(p5_org, p7_org, p4_org, p6_org, p1_org, p3_org, p0_org, p2_org,
+ tmp0, tmp1, tmp2, tmp3);
+ ILVR_B2_SB(tmp1, tmp0, tmp3, tmp2, tmp4, tmp6);
+ ILVL_B2_SB(tmp1, tmp0, tmp3, tmp2, tmp5, tmp7);
+ ILVR_W2_UB(tmp6, tmp4, tmp7, tmp5, q0, q4);
+ ILVL_W2_UB(tmp6, tmp4, tmp7, tmp5, q2, q6);
+ SLDI_B4_0_UB(q0, q2, q4, q6, q1, q3, q5, q7, 8);
+
+ ST_UB8(p7, p6, p5, p4, p3, p2, p1, p0, output, out_pitch);
+ output += (8 * out_pitch);
+ ST_UB8(q0, q1, q2, q3, q4, q5, q6, q7, output, out_pitch);
+}
+
+static void vp9_transpose_8x16_to_16x8(uint8_t *input, int32_t in_pitch,
+ uint8_t *output, int32_t out_pitch) {
+ v16u8 p7_o, p6_o, p5_o, p4_o, p3_o, p2_o, p1_o, p0_o;
+ v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
+
+ LD_UB8(input, in_pitch, p7, p6, p5, p4, p3, p2, p1, p0);
+ LD_UB8(input + (8 * in_pitch), in_pitch, q0, q1, q2, q3, q4, q5, q6, q7);
+ TRANSPOSE16x8_UB_UB(p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5,
+ q6, q7, p7_o, p6_o, p5_o, p4_o, p3_o, p2_o, p1_o, p0_o);
+ ST_UB8(p7_o, p6_o, p5_o, p4_o, p3_o, p2_o, p1_o, p0_o, output, out_pitch);
+}
+
+static void vp9_transpose_16x16(uint8_t *input, int32_t in_pitch,
+ uint8_t *output, int32_t out_pitch) {
+ v16u8 row0, row1, row2, row3, row4, row5, row6, row7;
+ v16u8 row8, row9, row10, row11, row12, row13, row14, row15;
+ v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
+ v8i16 tmp0, tmp1, tmp4, tmp5, tmp6, tmp7;
+ v4i32 tmp2, tmp3;
+
+ LD_UB8(input, in_pitch, row0, row1, row2, row3, row4, row5, row6, row7);
+ input += (8 * in_pitch);
+ LD_UB8(input, in_pitch,
+ row8, row9, row10, row11, row12, row13, row14, row15);
+
+ TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
+ row8, row9, row10, row11, row12, row13, row14, row15,
+ p7, p6, p5, p4, p3, p2, p1, p0);
+
+ /* transpose 16x8 matrix into 8x16 */
+ /* total 8 intermediate register and 32 instructions */
+ q7 = (v16u8)__msa_ilvod_d((v2i64)row8, (v2i64)row0);
+ q6 = (v16u8)__msa_ilvod_d((v2i64)row9, (v2i64)row1);
+ q5 = (v16u8)__msa_ilvod_d((v2i64)row10, (v2i64)row2);
+ q4 = (v16u8)__msa_ilvod_d((v2i64)row11, (v2i64)row3);
+ q3 = (v16u8)__msa_ilvod_d((v2i64)row12, (v2i64)row4);
+ q2 = (v16u8)__msa_ilvod_d((v2i64)row13, (v2i64)row5);
+ q1 = (v16u8)__msa_ilvod_d((v2i64)row14, (v2i64)row6);
+ q0 = (v16u8)__msa_ilvod_d((v2i64)row15, (v2i64)row7);
+
+ ILVEV_B2_SH(q7, q6, q5, q4, tmp0, tmp1);
+ tmp4 = (v8i16)__msa_ilvod_b((v16i8)q6, (v16i8)q7);
+ tmp5 = (v8i16)__msa_ilvod_b((v16i8)q4, (v16i8)q5);
+
+ ILVEV_B2_UB(q3, q2, q1, q0, q5, q7);
+ tmp6 = (v8i16)__msa_ilvod_b((v16i8)q2, (v16i8)q3);
+ tmp7 = (v8i16)__msa_ilvod_b((v16i8)q0, (v16i8)q1);
+
+ ILVEV_H2_SW(tmp0, tmp1, q5, q7, tmp2, tmp3);
+ q0 = (v16u8)__msa_ilvev_w(tmp3, tmp2);
+ q4 = (v16u8)__msa_ilvod_w(tmp3, tmp2);
+
+ tmp2 = (v4i32)__msa_ilvod_h(tmp1, tmp0);
+ tmp3 = (v4i32)__msa_ilvod_h((v8i16)q7, (v8i16)q5);
+ q2 = (v16u8)__msa_ilvev_w(tmp3, tmp2);
+ q6 = (v16u8)__msa_ilvod_w(tmp3, tmp2);
+
+ ILVEV_H2_SW(tmp4, tmp5, tmp6, tmp7, tmp2, tmp3);
+ q1 = (v16u8)__msa_ilvev_w(tmp3, tmp2);
+ q5 = (v16u8)__msa_ilvod_w(tmp3, tmp2);
+
+ tmp2 = (v4i32)__msa_ilvod_h(tmp5, tmp4);
+ tmp3 = (v4i32)__msa_ilvod_h(tmp7, tmp6);
+ q3 = (v16u8)__msa_ilvev_w(tmp3, tmp2);
+ q7 = (v16u8)__msa_ilvod_w(tmp3, tmp2);
+
+ ST_UB8(p7, p6, p5, p4, p3, p2, p1, p0, output, out_pitch);
+ output += (8 * out_pitch);
+ ST_UB8(q0, q1, q2, q3, q4, q5, q6, q7, output, out_pitch);
+}
+
+int32_t vp9_vt_lpf_t4_and_t8_8w(uint8_t *src, uint8_t *filter48,
+ uint8_t *src_org, int32_t pitch_org,
+ const uint8_t *b_limit_ptr,
+ const uint8_t *limit_ptr,
+ const uint8_t *thresh_ptr) {
+ v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+ v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;
+ v16u8 flat, mask, hev, thresh, b_limit, limit;
+ v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
+ v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r;
+ v16i8 zero = { 0 };
+ v8i16 vec0, vec1, vec2, vec3;
+
+ /* load vector elements */
+ LD_UB8(src - (4 * 16), 16, p3, p2, p1, p0, q0, q1, q2, q3);
+
+ thresh = (v16u8)__msa_fill_b(*thresh_ptr);
+ b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
+ limit = (v16u8)__msa_fill_b(*limit_ptr);
+
+ /* mask and hev */
+ LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+ hev, mask, flat);
+ /* flat4 */
+ VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
+ /* filter4 */
+ VP9_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
+
+ flat = (v16u8)__msa_ilvr_d((v2i64)zero, (v2i64)flat);
+
+ if (__msa_test_bz_v(flat)) {
+ ILVR_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
+ ILVRL_H2_SH(vec1, vec0, vec2, vec3);
+ ST4x8_UB(vec2, vec3, (src_org - 2), pitch_org);
+ return 1;
+ } else {
+ ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
+ zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r,
+ q3_r);
+ VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
+ p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
+
+ /* convert 16 bit output data into 8 bit */
+ p2_r = (v8u16)__msa_pckev_b((v16i8)p2_filt8_r, (v16i8)p2_filt8_r);
+ p1_r = (v8u16)__msa_pckev_b((v16i8)p1_filt8_r, (v16i8)p1_filt8_r);
+ p0_r = (v8u16)__msa_pckev_b((v16i8)p0_filt8_r, (v16i8)p0_filt8_r);
+ q0_r = (v8u16)__msa_pckev_b((v16i8)q0_filt8_r, (v16i8)q0_filt8_r);
+ q1_r = (v8u16)__msa_pckev_b((v16i8)q1_filt8_r, (v16i8)q1_filt8_r);
+ q2_r = (v8u16)__msa_pckev_b((v16i8)q2_filt8_r, (v16i8)q2_filt8_r);
+
+ /* store pixel values */
+ p2_out = __msa_bmnz_v(p2, (v16u8)p2_r, flat);
+ p1_out = __msa_bmnz_v(p1_out, (v16u8)p1_r, flat);
+ p0_out = __msa_bmnz_v(p0_out, (v16u8)p0_r, flat);
+ q0_out = __msa_bmnz_v(q0_out, (v16u8)q0_r, flat);
+ q1_out = __msa_bmnz_v(q1_out, (v16u8)q1_r, flat);
+ q2_out = __msa_bmnz_v(q2, (v16u8)q2_r, flat);
+
+ ST_UB4(p2_out, p1_out, p0_out, q0_out, filter48, 16);
+ filter48 += (4 * 16);
+ ST_UB2(q1_out, q2_out, filter48, 16);
+ filter48 += (2 * 16);
+ ST_UB(flat, filter48);
+
+ return 0;
+ }
+}
+
+int32_t vp9_vt_lpf_t16_8w(uint8_t *src, uint8_t *src_org, int32_t pitch,
+ uint8_t *filter48) {
+ v16i8 zero = { 0 };
+ v16u8 filter8, flat, flat2;
+ v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
+ v8u16 p7_r_in, p6_r_in, p5_r_in, p4_r_in, p3_r_in, p2_r_in, p1_r_in, p0_r_in;
+ v8u16 q7_r_in, q6_r_in, q5_r_in, q4_r_in, q3_r_in, q2_r_in, q1_r_in, q0_r_in;
+ v8u16 tmp0_r, tmp1_r;
+ v8i16 r_out;
+
+ flat = LD_UB(filter48 + 6 * 16);
+
+ LD_UB8((src - 8 * 16), 16, p7, p6, p5, p4, p3, p2, p1, p0);
+ LD_UB8(src, 16, q0, q1, q2, q3, q4, q5, q6, q7);
+
+ VP9_FLAT5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, flat2);
+
+ if (__msa_test_bz_v(flat2)) {
+ v8i16 vec0, vec1, vec2, vec3, vec4;
+
+ LD_UB4(filter48, 16, p2, p1, p0, q0);
+ LD_UB2(filter48 + 4 * 16, 16, q1, q2);
+
+ ILVR_B2_SH(p1, p2, q0, p0, vec0, vec1);
+ ILVRL_H2_SH(vec1, vec0, vec3, vec4);
+ vec2 = (v8i16)__msa_ilvr_b((v16i8)q2, (v16i8)q1);
+
+ src_org -= 3;
+ ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src_org, pitch);
+ ST2x4_UB(vec2, 0, (src_org + 4), pitch);
+ src_org += (4 * pitch);
+ ST4x4_UB(vec4, vec4, 0, 1, 2, 3, src_org, pitch);
+ ST2x4_UB(vec2, 4, (src_org + 4), pitch);
+
+ return 1;
+ } else {
+ src -= 7 * 16;
+
+ ILVR_B8_UH(zero, p7, zero, p6, zero, p5, zero, p4, zero, p3, zero, p2,
+ zero, p1, zero, p0, p7_r_in, p6_r_in, p5_r_in, p4_r_in,
+ p3_r_in, p2_r_in, p1_r_in, p0_r_in);
+ q0_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q0);
+
+ tmp0_r = p7_r_in << 3;
+ tmp0_r -= p7_r_in;
+ tmp0_r += p6_r_in;
+ tmp0_r += q0_r_in;
+ tmp1_r = p6_r_in + p5_r_in;
+ tmp1_r += p4_r_in;
+ tmp1_r += p3_r_in;
+ tmp1_r += p2_r_in;
+ tmp1_r += p1_r_in;
+ tmp1_r += p0_r_in;
+ tmp1_r += tmp0_r;
+
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+ r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
+ p6 = __msa_bmnz_v(p6, (v16u8)r_out, flat2);
+ ST8x1_UB(p6, src);
+ src += 16;
+
+ /* p5 */
+ q1_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q1);
+ tmp0_r = p5_r_in - p6_r_in;
+ tmp0_r += q1_r_in;
+ tmp0_r -= p7_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+ r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
+ p5 = __msa_bmnz_v(p5, (v16u8)r_out, flat2);
+ ST8x1_UB(p5, src);
+ src += 16;
+
+ /* p4 */
+ q2_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q2);
+ tmp0_r = p4_r_in - p5_r_in;
+ tmp0_r += q2_r_in;
+ tmp0_r -= p7_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+ r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
+ p4 = __msa_bmnz_v(p4, (v16u8)r_out, flat2);
+ ST8x1_UB(p4, src);
+ src += 16;
+
+ /* p3 */
+ q3_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q3);
+ tmp0_r = p3_r_in - p4_r_in;
+ tmp0_r += q3_r_in;
+ tmp0_r -= p7_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+ r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
+ p3 = __msa_bmnz_v(p3, (v16u8)r_out, flat2);
+ ST8x1_UB(p3, src);
+ src += 16;
+
+ /* p2 */
+ q4_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q4);
+ filter8 = LD_UB(filter48);
+ tmp0_r = p2_r_in - p3_r_in;
+ tmp0_r += q4_r_in;
+ tmp0_r -= p7_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+ r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
+ filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+ ST8x1_UB(filter8, src);
+ src += 16;
+
+ /* p1 */
+ q5_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q5);
+ filter8 = LD_UB(filter48 + 16);
+ tmp0_r = p1_r_in - p2_r_in;
+ tmp0_r += q5_r_in;
+ tmp0_r -= p7_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+ r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
+ filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+ ST8x1_UB(filter8, src);
+ src += 16;
+
+ /* p0 */
+ q6_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q6);
+ filter8 = LD_UB(filter48 + 32);
+ tmp0_r = p0_r_in - p1_r_in;
+ tmp0_r += q6_r_in;
+ tmp0_r -= p7_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+ r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
+ filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+ ST8x1_UB(filter8, src);
+ src += 16;
+
+ /* q0 */
+ q7_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q7);
+ filter8 = LD_UB(filter48 + 48);
+ tmp0_r = q7_r_in - p0_r_in;
+ tmp0_r += q0_r_in;
+ tmp0_r -= p7_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+ r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
+ filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+ ST8x1_UB(filter8, src);
+ src += 16;
+
+ /* q1 */
+ filter8 = LD_UB(filter48 + 64);
+ tmp0_r = q7_r_in - q0_r_in;
+ tmp0_r += q1_r_in;
+ tmp0_r -= p6_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+ r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
+ filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+ ST8x1_UB(filter8, src);
+ src += 16;
+
+ /* q2 */
+ filter8 = LD_UB(filter48 + 80);
+ tmp0_r = q7_r_in - q1_r_in;
+ tmp0_r += q2_r_in;
+ tmp0_r -= p5_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+ r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
+ filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+ ST8x1_UB(filter8, src);
+ src += 16;
+
+ /* q3 */
+ tmp0_r = q7_r_in - q2_r_in;
+ tmp0_r += q3_r_in;
+ tmp0_r -= p4_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+ r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
+ q3 = __msa_bmnz_v(q3, (v16u8)r_out, flat2);
+ ST8x1_UB(q3, src);
+ src += 16;
+
+ /* q4 */
+ tmp0_r = q7_r_in - q3_r_in;
+ tmp0_r += q4_r_in;
+ tmp0_r -= p3_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+ r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
+ q4 = __msa_bmnz_v(q4, (v16u8)r_out, flat2);
+ ST8x1_UB(q4, src);
+ src += 16;
+
+ /* q5 */
+ tmp0_r = q7_r_in - q4_r_in;
+ tmp0_r += q5_r_in;
+ tmp0_r -= p2_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+ r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
+ q5 = __msa_bmnz_v(q5, (v16u8)r_out, flat2);
+ ST8x1_UB(q5, src);
+ src += 16;
+
+ /* q6 */
+ tmp0_r = q7_r_in - q5_r_in;
+ tmp0_r += q6_r_in;
+ tmp0_r -= p1_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+ r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out);
+ q6 = __msa_bmnz_v(q6, (v16u8)r_out, flat2);
+ ST8x1_UB(q6, src);
+
+ return 0;
+ }
+}
+
+void vp9_lpf_vertical_16_msa(uint8_t *src, int32_t pitch,
+ const uint8_t *b_limit_ptr,
+ const uint8_t *limit_ptr,
+ const uint8_t *thresh_ptr) {
+ uint8_t early_exit = 0;
+ DECLARE_ALIGNED(32, uint8_t, transposed_input[16 * 24]);
+ uint8_t *filter48 = &transposed_input[16 * 16];
+
+ vp9_transpose_16x8_to_8x16(src - 8, pitch, transposed_input, 16);
+
+ early_exit = vp9_vt_lpf_t4_and_t8_8w((transposed_input + 16 * 8),
+ &filter48[0], src, pitch, b_limit_ptr,
+ limit_ptr, thresh_ptr);
+
+ if (0 == early_exit) {
+ early_exit = vp9_vt_lpf_t16_8w((transposed_input + 16 * 8), src, pitch,
+ &filter48[0]);
+
+ if (0 == early_exit) {
+ vp9_transpose_8x16_to_16x8(transposed_input, 16, src - 8, pitch);
+ }
+ }
+}
+
+int32_t vp9_vt_lpf_t4_and_t8_16w(uint8_t *src, uint8_t *filter48,
+ uint8_t *src_org, int32_t pitch,
+ const uint8_t *b_limit_ptr,
+ const uint8_t *limit_ptr,
+ const uint8_t *thresh_ptr) {
+ v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+ v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;
+ v16u8 flat, mask, hev, thresh, b_limit, limit;
+ v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
+ v8u16 p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l;
+ v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r;
+ v8i16 p2_filt8_l, p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l;
+ v16i8 zero = { 0 };
+ v8i16 vec0, vec1, vec2, vec3, vec4, vec5;
+
+ /* load vector elements */
+ LD_UB8(src - (4 * 16), 16, p3, p2, p1, p0, q0, q1, q2, q3);
+
+ thresh = (v16u8)__msa_fill_b(*thresh_ptr);
+ b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
+ limit = (v16u8)__msa_fill_b(*limit_ptr);
+
+ /* mask and hev */
+ LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+ hev, mask, flat);
+ /* flat4 */
+ VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
+ /* filter4 */
+ VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
+
+ if (__msa_test_bz_v(flat)) {
+ ILVR_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
+ ILVRL_H2_SH(vec1, vec0, vec2, vec3);
+ ILVL_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
+ ILVRL_H2_SH(vec1, vec0, vec4, vec5);
+
+ src_org -= 2;
+ ST4x8_UB(vec2, vec3, src_org, pitch);
+ src_org += 8 * pitch;
+ ST4x8_UB(vec4, vec5, src_org, pitch);
+
+ return 1;
+ } else {
+ ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
+ zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r,
+ q3_r);
+ VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
+ p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
+ ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l, p2_l, p1_l, p0_l);
+ ILVL_B4_UH(zero, q0, zero, q1, zero, q2, zero, q3, q0_l, q1_l, q2_l, q3_l);
+ VP9_FILTER8(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, p2_filt8_l,
+ p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l);
+
+ /* convert 16 bit output data into 8 bit */
+ PCKEV_B4_SH(p2_filt8_l, p2_filt8_r, p1_filt8_l, p1_filt8_r, p0_filt8_l,
+ p0_filt8_r, q0_filt8_l, q0_filt8_r, p2_filt8_r, p1_filt8_r,
+ p0_filt8_r, q0_filt8_r);
+ PCKEV_B2_SH(q1_filt8_l, q1_filt8_r, q2_filt8_l, q2_filt8_r, q1_filt8_r,
+ q2_filt8_r);
+
+ /* store pixel values */
+ p2_out = __msa_bmnz_v(p2, (v16u8)p2_filt8_r, flat);
+ p1_out = __msa_bmnz_v(p1_out, (v16u8)p1_filt8_r, flat);
+ p0_out = __msa_bmnz_v(p0_out, (v16u8)p0_filt8_r, flat);
+ q0_out = __msa_bmnz_v(q0_out, (v16u8)q0_filt8_r, flat);
+ q1_out = __msa_bmnz_v(q1_out, (v16u8)q1_filt8_r, flat);
+ q2_out = __msa_bmnz_v(q2, (v16u8)q2_filt8_r, flat);
+
+ ST_UB4(p2_out, p1_out, p0_out, q0_out, filter48, 16);
+ filter48 += (4 * 16);
+ ST_UB2(q1_out, q2_out, filter48, 16);
+ filter48 += (2 * 16);
+ ST_UB(flat, filter48);
+
+ return 0;
+ }
+}
+
+int32_t vp9_vt_lpf_t16_16w(uint8_t *src, uint8_t *src_org, int32_t pitch,
+ uint8_t *filter48) {
+ v16u8 flat, flat2, filter8;
+ v16i8 zero = { 0 };
+ v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7;
+ v8u16 p7_r_in, p6_r_in, p5_r_in, p4_r_in, p3_r_in, p2_r_in, p1_r_in, p0_r_in;
+ v8u16 q7_r_in, q6_r_in, q5_r_in, q4_r_in, q3_r_in, q2_r_in, q1_r_in, q0_r_in;
+ v8u16 p7_l_in, p6_l_in, p5_l_in, p4_l_in, p3_l_in, p2_l_in, p1_l_in, p0_l_in;
+ v8u16 q7_l_in, q6_l_in, q5_l_in, q4_l_in, q3_l_in, q2_l_in, q1_l_in, q0_l_in;
+ v8u16 tmp0_r, tmp1_r, tmp0_l, tmp1_l;
+ v8i16 l_out, r_out;
+
+ flat = LD_UB(filter48 + 6 * 16);
+
+ LD_UB8((src - 8 * 16), 16, p7, p6, p5, p4, p3, p2, p1, p0);
+ LD_UB8(src, 16, q0, q1, q2, q3, q4, q5, q6, q7);
+
+ VP9_FLAT5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, flat2);
+
+ if (__msa_test_bz_v(flat2)) {
+ v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+
+ LD_UB4(filter48, 16, p2, p1, p0, q0);
+ LD_UB2(filter48 + 4 * 16, 16, q1, q2);
+
+ ILVR_B2_SH(p1, p2, q0, p0, vec0, vec1);
+ ILVRL_H2_SH(vec1, vec0, vec3, vec4);
+ ILVL_B2_SH(p1, p2, q0, p0, vec0, vec1);
+ ILVRL_H2_SH(vec1, vec0, vec6, vec7);
+ ILVRL_B2_SH(q2, q1, vec2, vec5);
+
+ src_org -= 3;
+ ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src_org, pitch);
+ ST2x4_UB(vec2, 0, (src_org + 4), pitch);
+ src_org += (4 * pitch);
+ ST4x4_UB(vec4, vec4, 0, 1, 2, 3, src_org, pitch);
+ ST2x4_UB(vec2, 4, (src_org + 4), pitch);
+ src_org += (4 * pitch);
+ ST4x4_UB(vec6, vec6, 0, 1, 2, 3, src_org, pitch);
+ ST2x4_UB(vec5, 0, (src_org + 4), pitch);
+ src_org += (4 * pitch);
+ ST4x4_UB(vec7, vec7, 0, 1, 2, 3, src_org, pitch);
+ ST2x4_UB(vec5, 4, (src_org + 4), pitch);
+
+ return 1;
+ } else {
+ src -= 7 * 16;
+
+ ILVR_B8_UH(zero, p7, zero, p6, zero, p5, zero, p4, zero, p3, zero, p2,
+ zero, p1, zero, p0, p7_r_in, p6_r_in, p5_r_in, p4_r_in,
+ p3_r_in, p2_r_in, p1_r_in, p0_r_in);
+ q0_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q0);
+
+ tmp0_r = p7_r_in << 3;
+ tmp0_r -= p7_r_in;
+ tmp0_r += p6_r_in;
+ tmp0_r += q0_r_in;
+ tmp1_r = p6_r_in + p5_r_in;
+ tmp1_r += p4_r_in;
+ tmp1_r += p3_r_in;
+ tmp1_r += p2_r_in;
+ tmp1_r += p1_r_in;
+ tmp1_r += p0_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+
+ ILVL_B4_UH(zero, p7, zero, p6, zero, p5, zero, p4, p7_l_in, p6_l_in,
+ p5_l_in, p4_l_in);
+ ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l_in, p2_l_in,
+ p1_l_in, p0_l_in);
+ q0_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q0);
+
+ tmp0_l = p7_l_in << 3;
+ tmp0_l -= p7_l_in;
+ tmp0_l += p6_l_in;
+ tmp0_l += q0_l_in;
+ tmp1_l = p6_l_in + p5_l_in;
+ tmp1_l += p4_l_in;
+ tmp1_l += p3_l_in;
+ tmp1_l += p2_l_in;
+ tmp1_l += p1_l_in;
+ tmp1_l += p0_l_in;
+ tmp1_l += tmp0_l;
+ l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+
+ r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+ p6 = __msa_bmnz_v(p6, (v16u8)r_out, flat2);
+ ST_UB(p6, src);
+ src += 16;
+
+ /* p5 */
+ q1_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q1);
+ tmp0_r = p5_r_in - p6_r_in;
+ tmp0_r += q1_r_in;
+ tmp0_r -= p7_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+ q1_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q1);
+ tmp0_l = p5_l_in - p6_l_in;
+ tmp0_l += q1_l_in;
+ tmp0_l -= p7_l_in;
+ tmp1_l += tmp0_l;
+ l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+ r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+ p5 = __msa_bmnz_v(p5, (v16u8)r_out, flat2);
+ ST_UB(p5, src);
+ src += 16;
+
+ /* p4 */
+ q2_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q2);
+ tmp0_r = p4_r_in - p5_r_in;
+ tmp0_r += q2_r_in;
+ tmp0_r -= p7_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+ q2_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q2);
+ tmp0_l = p4_l_in - p5_l_in;
+ tmp0_l += q2_l_in;
+ tmp0_l -= p7_l_in;
+ tmp1_l += tmp0_l;
+ l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+ r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+ p4 = __msa_bmnz_v(p4, (v16u8)r_out, flat2);
+ ST_UB(p4, src);
+ src += 16;
+
+ /* p3 */
+ q3_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q3);
+ tmp0_r = p3_r_in - p4_r_in;
+ tmp0_r += q3_r_in;
+ tmp0_r -= p7_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+ q3_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q3);
+ tmp0_l = p3_l_in - p4_l_in;
+ tmp0_l += q3_l_in;
+ tmp0_l -= p7_l_in;
+ tmp1_l += tmp0_l;
+ l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+ r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+ p3 = __msa_bmnz_v(p3, (v16u8)r_out, flat2);
+ ST_UB(p3, src);
+ src += 16;
+
+ /* p2 */
+ q4_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q4);
+ filter8 = LD_UB(filter48);
+ tmp0_r = p2_r_in - p3_r_in;
+ tmp0_r += q4_r_in;
+ tmp0_r -= p7_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+ q4_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q4);
+ tmp0_l = p2_l_in - p3_l_in;
+ tmp0_l += q4_l_in;
+ tmp0_l -= p7_l_in;
+ tmp1_l += tmp0_l;
+ l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+ r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+ filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+ ST_UB(filter8, src);
+ src += 16;
+
+ /* p1 */
+ q5_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q5);
+ filter8 = LD_UB(filter48 + 16);
+ tmp0_r = p1_r_in - p2_r_in;
+ tmp0_r += q5_r_in;
+ tmp0_r -= p7_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+ q5_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q5);
+ tmp0_l = p1_l_in - p2_l_in;
+ tmp0_l += q5_l_in;
+ tmp0_l -= p7_l_in;
+ tmp1_l += tmp0_l;
+ l_out = __msa_srari_h((v8i16)(tmp1_l), 4);
+ r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+ filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+ ST_UB(filter8, src);
+ src += 16;
+
+ /* p0 */
+ q6_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q6);
+ filter8 = LD_UB(filter48 + 32);
+ tmp0_r = p0_r_in - p1_r_in;
+ tmp0_r += q6_r_in;
+ tmp0_r -= p7_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+ q6_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q6);
+ tmp0_l = p0_l_in - p1_l_in;
+ tmp0_l += q6_l_in;
+ tmp0_l -= p7_l_in;
+ tmp1_l += tmp0_l;
+ l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+ r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+ filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+ ST_UB(filter8, src);
+ src += 16;
+
+ /* q0 */
+ q7_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q7);
+ filter8 = LD_UB(filter48 + 48);
+ tmp0_r = q7_r_in - p0_r_in;
+ tmp0_r += q0_r_in;
+ tmp0_r -= p7_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+ q7_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q7);
+ tmp0_l = q7_l_in - p0_l_in;
+ tmp0_l += q0_l_in;
+ tmp0_l -= p7_l_in;
+ tmp1_l += tmp0_l;
+ l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+ r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+ filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+ ST_UB(filter8, src);
+ src += 16;
+
+ /* q1 */
+ filter8 = LD_UB(filter48 + 64);
+ tmp0_r = q7_r_in - q0_r_in;
+ tmp0_r += q1_r_in;
+ tmp0_r -= p6_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+ tmp0_l = q7_l_in - q0_l_in;
+ tmp0_l += q1_l_in;
+ tmp0_l -= p6_l_in;
+ tmp1_l += tmp0_l;
+ l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+ r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+ filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+ ST_UB(filter8, src);
+ src += 16;
+
+ /* q2 */
+ filter8 = LD_UB(filter48 + 80);
+ tmp0_r = q7_r_in - q1_r_in;
+ tmp0_r += q2_r_in;
+ tmp0_r -= p5_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+ tmp0_l = q7_l_in - q1_l_in;
+ tmp0_l += q2_l_in;
+ tmp0_l -= p5_l_in;
+ tmp1_l += tmp0_l;
+ l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+ r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+ filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2);
+ ST_UB(filter8, src);
+ src += 16;
+
+ /* q3 */
+ tmp0_r = q7_r_in - q2_r_in;
+ tmp0_r += q3_r_in;
+ tmp0_r -= p4_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+ tmp0_l = q7_l_in - q2_l_in;
+ tmp0_l += q3_l_in;
+ tmp0_l -= p4_l_in;
+ tmp1_l += tmp0_l;
+ l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+ r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+ q3 = __msa_bmnz_v(q3, (v16u8)r_out, flat2);
+ ST_UB(q3, src);
+ src += 16;
+
+ /* q4 */
+ tmp0_r = q7_r_in - q3_r_in;
+ tmp0_r += q4_r_in;
+ tmp0_r -= p3_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+ tmp0_l = q7_l_in - q3_l_in;
+ tmp0_l += q4_l_in;
+ tmp0_l -= p3_l_in;
+ tmp1_l += tmp0_l;
+ l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+ r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+ q4 = __msa_bmnz_v(q4, (v16u8)r_out, flat2);
+ ST_UB(q4, src);
+ src += 16;
+
+ /* q5 */
+ tmp0_r = q7_r_in - q4_r_in;
+ tmp0_r += q5_r_in;
+ tmp0_r -= p2_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+ tmp0_l = q7_l_in - q4_l_in;
+ tmp0_l += q5_l_in;
+ tmp0_l -= p2_l_in;
+ tmp1_l += tmp0_l;
+ l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+ r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+ q5 = __msa_bmnz_v(q5, (v16u8)r_out, flat2);
+ ST_UB(q5, src);
+ src += 16;
+
+ /* q6 */
+ tmp0_r = q7_r_in - q5_r_in;
+ tmp0_r += q6_r_in;
+ tmp0_r -= p1_r_in;
+ tmp1_r += tmp0_r;
+ r_out = __msa_srari_h((v8i16)tmp1_r, 4);
+ tmp0_l = q7_l_in - q5_l_in;
+ tmp0_l += q6_l_in;
+ tmp0_l -= p1_l_in;
+ tmp1_l += tmp0_l;
+ l_out = __msa_srari_h((v8i16)tmp1_l, 4);
+ r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out);
+ q6 = __msa_bmnz_v(q6, (v16u8)r_out, flat2);
+ ST_UB(q6, src);
+
+ return 0;
+ }
+}
+
+void vp9_lpf_vertical_16_dual_msa(uint8_t *src, int32_t pitch,
+ const uint8_t *b_limit_ptr,
+ const uint8_t *limit_ptr,
+ const uint8_t *thresh_ptr) {
+ uint8_t early_exit = 0;
+ DECLARE_ALIGNED(32, uint8_t, transposed_input[16 * 24]);
+ uint8_t *filter48 = &transposed_input[16 * 16];
+
+ vp9_transpose_16x16((src - 8), pitch, &transposed_input[0], 16);
+
+ early_exit = vp9_vt_lpf_t4_and_t8_16w((transposed_input + 16 * 8),
+ &filter48[0], src, pitch, b_limit_ptr,
+ limit_ptr, thresh_ptr);
+
+ if (0 == early_exit) {
+ early_exit = vp9_vt_lpf_t16_16w((transposed_input + 16 * 8), src, pitch,
+ &filter48[0]);
+
+ if (0 == early_exit) {
+ vp9_transpose_16x16(transposed_input, 16, (src - 8), pitch);
+ }
+ }
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_loopfilter_4_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_loopfilter_4_msa.c
new file mode 100644
index 00000000000..7f691355a0f
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_loopfilter_4_msa.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/common/mips/msa/vp9_loopfilter_msa.h"
+
+void vp9_lpf_horizontal_4_msa(uint8_t *src, int32_t pitch,
+ const uint8_t *b_limit_ptr,
+ const uint8_t *limit_ptr,
+ const uint8_t *thresh_ptr,
+ int32_t count) {
+ uint64_t p1_d, p0_d, q0_d, q1_d;
+ v16u8 mask, hev, flat, thresh, b_limit, limit;
+ v16u8 p3, p2, p1, p0, q3, q2, q1, q0, p1_out, p0_out, q0_out, q1_out;
+
+ (void)count;
+
+ /* load vector elements */
+ LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
+
+ thresh = (v16u8)__msa_fill_b(*thresh_ptr);
+ b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
+ limit = (v16u8)__msa_fill_b(*limit_ptr);
+
+ LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+ hev, mask, flat);
+ VP9_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
+
+ p1_d = __msa_copy_u_d((v2i64)p1_out, 0);
+ p0_d = __msa_copy_u_d((v2i64)p0_out, 0);
+ q0_d = __msa_copy_u_d((v2i64)q0_out, 0);
+ q1_d = __msa_copy_u_d((v2i64)q1_out, 0);
+ SD4(p1_d, p0_d, q0_d, q1_d, (src - 2 * pitch), pitch);
+}
+
+void vp9_lpf_horizontal_4_dual_msa(uint8_t *src, int32_t pitch,
+ const uint8_t *b_limit0_ptr,
+ const uint8_t *limit0_ptr,
+ const uint8_t *thresh0_ptr,
+ const uint8_t *b_limit1_ptr,
+ const uint8_t *limit1_ptr,
+ const uint8_t *thresh1_ptr) {
+ v16u8 mask, hev, flat, thresh0, b_limit0, limit0, thresh1, b_limit1, limit1;
+ v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+
+ /* load vector elements */
+ LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
+
+ thresh0 = (v16u8)__msa_fill_b(*thresh0_ptr);
+ thresh1 = (v16u8)__msa_fill_b(*thresh1_ptr);
+ thresh0 = (v16u8)__msa_ilvr_d((v2i64)thresh1, (v2i64)thresh0);
+
+ b_limit0 = (v16u8)__msa_fill_b(*b_limit0_ptr);
+ b_limit1 = (v16u8)__msa_fill_b(*b_limit1_ptr);
+ b_limit0 = (v16u8)__msa_ilvr_d((v2i64)b_limit1, (v2i64)b_limit0);
+
+ limit0 = (v16u8)__msa_fill_b(*limit0_ptr);
+ limit1 = (v16u8)__msa_fill_b(*limit1_ptr);
+ limit0 = (v16u8)__msa_ilvr_d((v2i64)limit1, (v2i64)limit0);
+
+ LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0,
+ hev, mask, flat);
+ VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1, p0, q0, q1);
+
+ ST_UB4(p1, p0, q0, q1, (src - 2 * pitch), pitch);
+}
+
+void vp9_lpf_vertical_4_msa(uint8_t *src, int32_t pitch,
+ const uint8_t *b_limit_ptr,
+ const uint8_t *limit_ptr,
+ const uint8_t *thresh_ptr,
+ int32_t count) {
+ v16u8 mask, hev, flat, limit, thresh, b_limit;
+ v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+ v8i16 vec0, vec1, vec2, vec3;
+
+ (void)count;
+
+ LD_UB8((src - 4), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
+
+ thresh = (v16u8)__msa_fill_b(*thresh_ptr);
+ b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
+ limit = (v16u8)__msa_fill_b(*limit_ptr);
+
+ TRANSPOSE8x8_UB_UB(p3, p2, p1, p0, q0, q1, q2, q3,
+ p3, p2, p1, p0, q0, q1, q2, q3);
+ LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+ hev, mask, flat);
+ VP9_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1, p0, q0, q1);
+ ILVR_B2_SH(p0, p1, q1, q0, vec0, vec1);
+ ILVRL_H2_SH(vec1, vec0, vec2, vec3);
+
+ src -= 2;
+ ST4x4_UB(vec2, vec2, 0, 1, 2, 3, src, pitch);
+ src += 4 * pitch;
+ ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch);
+}
+
+void vp9_lpf_vertical_4_dual_msa(uint8_t *src, int32_t pitch,
+ const uint8_t *b_limit0_ptr,
+ const uint8_t *limit0_ptr,
+ const uint8_t *thresh0_ptr,
+ const uint8_t *b_limit1_ptr,
+ const uint8_t *limit1_ptr,
+ const uint8_t *thresh1_ptr) {
+ v16u8 mask, hev, flat;
+ v16u8 thresh0, b_limit0, limit0, thresh1, b_limit1, limit1;
+ v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+ v16u8 row0, row1, row2, row3, row4, row5, row6, row7;
+ v16u8 row8, row9, row10, row11, row12, row13, row14, row15;
+ v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+
+ LD_UB8(src - 4, pitch, row0, row1, row2, row3, row4, row5, row6, row7);
+ LD_UB8(src - 4 + (8 * pitch), pitch,
+ row8, row9, row10, row11, row12, row13, row14, row15);
+
+ TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7,
+ row8, row9, row10, row11, row12, row13, row14, row15,
+ p3, p2, p1, p0, q0, q1, q2, q3);
+
+ thresh0 = (v16u8)__msa_fill_b(*thresh0_ptr);
+ thresh1 = (v16u8)__msa_fill_b(*thresh1_ptr);
+ thresh0 = (v16u8)__msa_ilvr_d((v2i64)thresh1, (v2i64)thresh0);
+
+ b_limit0 = (v16u8)__msa_fill_b(*b_limit0_ptr);
+ b_limit1 = (v16u8)__msa_fill_b(*b_limit1_ptr);
+ b_limit0 = (v16u8)__msa_ilvr_d((v2i64)b_limit1, (v2i64)b_limit0);
+
+ limit0 = (v16u8)__msa_fill_b(*limit0_ptr);
+ limit1 = (v16u8)__msa_fill_b(*limit1_ptr);
+ limit0 = (v16u8)__msa_ilvr_d((v2i64)limit1, (v2i64)limit0);
+
+ LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0,
+ hev, mask, flat);
+ VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1, p0, q0, q1);
+ ILVR_B2_SH(p0, p1, q1, q0, tmp0, tmp1);
+ ILVRL_H2_SH(tmp1, tmp0, tmp2, tmp3);
+ ILVL_B2_SH(p0, p1, q1, q0, tmp0, tmp1);
+ ILVRL_H2_SH(tmp1, tmp0, tmp4, tmp5);
+
+ src -= 2;
+
+ ST4x8_UB(tmp2, tmp3, src, pitch);
+ src += (8 * pitch);
+ ST4x8_UB(tmp4, tmp5, src, pitch);
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_loopfilter_8_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_loopfilter_8_msa.c
new file mode 100644
index 00000000000..26a858d6e62
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_loopfilter_8_msa.c
@@ -0,0 +1,348 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vp9/common/mips/msa/vp9_loopfilter_msa.h"
+
+void vp9_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch,
+ const uint8_t *b_limit_ptr,
+ const uint8_t *limit_ptr,
+ const uint8_t *thresh_ptr,
+ int32_t count) {
+ uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d;
+ v16u8 mask, hev, flat, thresh, b_limit, limit;
+ v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+ v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;
+ v8i16 p2_filter8, p1_filter8, p0_filter8, q0_filter8, q1_filter8, q2_filter8;
+ v8u16 p3_r, p2_r, p1_r, p0_r, q3_r, q2_r, q1_r, q0_r;
+ v16i8 zero = { 0 };
+
+ (void)count;
+
+ /* load vector elements */
+ LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
+
+ thresh = (v16u8)__msa_fill_b(*thresh_ptr);
+ b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
+ limit = (v16u8)__msa_fill_b(*limit_ptr);
+
+ LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+ hev, mask, flat);
+ VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
+ VP9_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
+
+ flat = (v16u8)__msa_ilvr_d((v2i64)zero, (v2i64)flat);
+
+ if (__msa_test_bz_v(flat)) {
+ p1_d = __msa_copy_u_d((v2i64)p1_out, 0);
+ p0_d = __msa_copy_u_d((v2i64)p0_out, 0);
+ q0_d = __msa_copy_u_d((v2i64)q0_out, 0);
+ q1_d = __msa_copy_u_d((v2i64)q1_out, 0);
+ SD4(p1_d, p0_d, q0_d, q1_d, (src - 2 * pitch), pitch);
+ } else {
+ ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
+ zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r,
+ q2_r, q3_r);
+ VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filter8,
+ p1_filter8, p0_filter8, q0_filter8, q1_filter8, q2_filter8);
+
+ /* convert 16 bit output data into 8 bit */
+ PCKEV_B4_SH(zero, p2_filter8, zero, p1_filter8, zero, p0_filter8,
+ zero, q0_filter8, p2_filter8, p1_filter8, p0_filter8,
+ q0_filter8);
+ PCKEV_B2_SH(zero, q1_filter8, zero, q2_filter8, q1_filter8, q2_filter8);
+
+ /* store pixel values */
+ p2_out = __msa_bmnz_v(p2, (v16u8)p2_filter8, flat);
+ p1_out = __msa_bmnz_v(p1_out, (v16u8)p1_filter8, flat);
+ p0_out = __msa_bmnz_v(p0_out, (v16u8)p0_filter8, flat);
+ q0_out = __msa_bmnz_v(q0_out, (v16u8)q0_filter8, flat);
+ q1_out = __msa_bmnz_v(q1_out, (v16u8)q1_filter8, flat);
+ q2_out = __msa_bmnz_v(q2, (v16u8)q2_filter8, flat);
+
+ p2_d = __msa_copy_u_d((v2i64)p2_out, 0);
+ p1_d = __msa_copy_u_d((v2i64)p1_out, 0);
+ p0_d = __msa_copy_u_d((v2i64)p0_out, 0);
+ q0_d = __msa_copy_u_d((v2i64)q0_out, 0);
+ q1_d = __msa_copy_u_d((v2i64)q1_out, 0);
+ q2_d = __msa_copy_u_d((v2i64)q2_out, 0);
+
+ src -= 3 * pitch;
+
+ SD4(p2_d, p1_d, p0_d, q0_d, src, pitch);
+ src += (4 * pitch);
+ SD(q1_d, src);
+ src += pitch;
+ SD(q2_d, src);
+ }
+}
+
+void vp9_lpf_horizontal_8_dual_msa(uint8_t *src, int32_t pitch,
+ const uint8_t *b_limit0,
+ const uint8_t *limit0,
+ const uint8_t *thresh0,
+ const uint8_t *b_limit1,
+ const uint8_t *limit1,
+ const uint8_t *thresh1) {
+ v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+ v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out;
+ v16u8 flat, mask, hev, tmp, thresh, b_limit, limit;
+ v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
+ v8u16 p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l;
+ v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r;
+ v8i16 p2_filt8_l, p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l;
+ v16u8 zero = { 0 };
+
+ /* load vector elements */
+ LD_UB8(src - (4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
+
+ thresh = (v16u8)__msa_fill_b(*thresh0);
+ tmp = (v16u8)__msa_fill_b(*thresh1);
+ thresh = (v16u8)__msa_ilvr_d((v2i64)tmp, (v2i64)thresh);
+
+ b_limit = (v16u8)__msa_fill_b(*b_limit0);
+ tmp = (v16u8)__msa_fill_b(*b_limit1);
+ b_limit = (v16u8)__msa_ilvr_d((v2i64)tmp, (v2i64)b_limit);
+
+ limit = (v16u8)__msa_fill_b(*limit0);
+ tmp = (v16u8)__msa_fill_b(*limit1);
+ limit = (v16u8)__msa_ilvr_d((v2i64)tmp, (v2i64)limit);
+
+ /* mask and hev */
+ LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+ hev, mask, flat);
+ VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
+ VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
+
+ if (__msa_test_bz_v(flat)) {
+ ST_UB4(p1_out, p0_out, q0_out, q1_out, (src - 2 * pitch), pitch);
+ } else {
+ ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
+ zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r,
+ q2_r, q3_r);
+ VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
+ p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
+
+ ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l, p2_l, p1_l, p0_l);
+ ILVL_B4_UH(zero, q0, zero, q1, zero, q2, zero, q3, q0_l, q1_l, q2_l, q3_l);
+ VP9_FILTER8(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, p2_filt8_l,
+ p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l);
+
+ /* convert 16 bit output data into 8 bit */
+ PCKEV_B4_SH(p2_filt8_l, p2_filt8_r, p1_filt8_l, p1_filt8_r, p0_filt8_l,
+ p0_filt8_r, q0_filt8_l, q0_filt8_r, p2_filt8_r, p1_filt8_r,
+ p0_filt8_r, q0_filt8_r);
+ PCKEV_B2_SH(q1_filt8_l, q1_filt8_r, q2_filt8_l, q2_filt8_r, q1_filt8_r,
+ q2_filt8_r);
+
+ /* store pixel values */
+ p2_out = __msa_bmnz_v(p2, (v16u8)p2_filt8_r, flat);
+ p1_out = __msa_bmnz_v(p1_out, (v16u8)p1_filt8_r, flat);
+ p0_out = __msa_bmnz_v(p0_out, (v16u8)p0_filt8_r, flat);
+ q0_out = __msa_bmnz_v(q0_out, (v16u8)q0_filt8_r, flat);
+ q1_out = __msa_bmnz_v(q1_out, (v16u8)q1_filt8_r, flat);
+ q2_out = __msa_bmnz_v(q2, (v16u8)q2_filt8_r, flat);
+
+ src -= 3 * pitch;
+
+ ST_UB4(p2_out, p1_out, p0_out, q0_out, src, pitch);
+ src += (4 * pitch);
+ ST_UB2(q1_out, q2_out, src, pitch);
+ src += (2 * pitch);
+ }
+}
+
+void vp9_lpf_vertical_8_msa(uint8_t *src, int32_t pitch,
+ const uint8_t *b_limit_ptr,
+ const uint8_t *limit_ptr,
+ const uint8_t *thresh_ptr,
+ int32_t count) {
+ v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+ v16u8 p1_out, p0_out, q0_out, q1_out;
+ v16u8 flat, mask, hev, thresh, b_limit, limit;
+ v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
+ v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r;
+ v16u8 zero = { 0 };
+ v8i16 vec0, vec1, vec2, vec3, vec4;
+
+ (void)count;
+
+ /* load vector elements */
+ LD_UB8(src - 4, pitch, p3, p2, p1, p0, q0, q1, q2, q3);
+
+ TRANSPOSE8x8_UB_UB(p3, p2, p1, p0, q0, q1, q2, q3,
+ p3, p2, p1, p0, q0, q1, q2, q3);
+
+ thresh = (v16u8)__msa_fill_b(*thresh_ptr);
+ b_limit = (v16u8)__msa_fill_b(*b_limit_ptr);
+ limit = (v16u8)__msa_fill_b(*limit_ptr);
+
+ /* mask and hev */
+ LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+ hev, mask, flat);
+ /* flat4 */
+ VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
+ /* filter4 */
+ VP9_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
+
+ flat = (v16u8)__msa_ilvr_d((v2i64)zero, (v2i64)flat);
+
+ if (__msa_test_bz_v(flat)) {
+ /* Store 4 pixels p1-_q1 */
+ ILVR_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
+ ILVRL_H2_SH(vec1, vec0, vec2, vec3);
+
+ src -= 2;
+ ST4x4_UB(vec2, vec2, 0, 1, 2, 3, src, pitch);
+ src += 4 * pitch;
+ ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch);
+ } else {
+ ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
+ zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r,
+ q3_r);
+ VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
+ p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
+ /* convert 16 bit output data into 8 bit */
+ PCKEV_B4_SH(p2_filt8_r, p2_filt8_r, p1_filt8_r, p1_filt8_r, p0_filt8_r,
+ p0_filt8_r, q0_filt8_r, q0_filt8_r, p2_filt8_r, p1_filt8_r,
+ p0_filt8_r, q0_filt8_r);
+ PCKEV_B2_SH(q1_filt8_r, q1_filt8_r, q2_filt8_r, q2_filt8_r, q1_filt8_r,
+ q2_filt8_r);
+
+ /* store pixel values */
+ p2 = __msa_bmnz_v(p2, (v16u8)p2_filt8_r, flat);
+ p1 = __msa_bmnz_v(p1_out, (v16u8)p1_filt8_r, flat);
+ p0 = __msa_bmnz_v(p0_out, (v16u8)p0_filt8_r, flat);
+ q0 = __msa_bmnz_v(q0_out, (v16u8)q0_filt8_r, flat);
+ q1 = __msa_bmnz_v(q1_out, (v16u8)q1_filt8_r, flat);
+ q2 = __msa_bmnz_v(q2, (v16u8)q2_filt8_r, flat);
+
+ /* Store 6 pixels p2-_q2 */
+ ILVR_B2_SH(p1, p2, q0, p0, vec0, vec1);
+ ILVRL_H2_SH(vec1, vec0, vec2, vec3);
+ vec4 = (v8i16)__msa_ilvr_b((v16i8)q2, (v16i8)q1);
+
+ src -= 3;
+ ST4x4_UB(vec2, vec2, 0, 1, 2, 3, src, pitch);
+ ST2x4_UB(vec4, 0, src + 4, pitch);
+ src += (4 * pitch);
+ ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch);
+ ST2x4_UB(vec4, 4, src + 4, pitch);
+ }
+}
+
+void vp9_lpf_vertical_8_dual_msa(uint8_t *src, int32_t pitch,
+ const uint8_t *b_limit0,
+ const uint8_t *limit0,
+ const uint8_t *thresh0,
+ const uint8_t *b_limit1,
+ const uint8_t *limit1,
+ const uint8_t *thresh1) {
+ uint8_t *temp_src;
+ v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
+ v16u8 p1_out, p0_out, q0_out, q1_out;
+ v16u8 flat, mask, hev, thresh, b_limit, limit;
+ v16u8 row4, row5, row6, row7, row12, row13, row14, row15;
+ v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r;
+ v8u16 p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l;
+ v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r;
+ v8i16 p2_filt8_l, p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l;
+ v16u8 zero = { 0 };
+ v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+
+ temp_src = src - 4;
+
+ LD_UB8(temp_src, pitch, p0, p1, p2, p3, row4, row5, row6, row7);
+ temp_src += (8 * pitch);
+ LD_UB8(temp_src, pitch, q3, q2, q1, q0, row12, row13, row14, row15);
+
+ /* transpose 16x8 matrix into 8x16 */
+ TRANSPOSE16x8_UB_UB(p0, p1, p2, p3, row4, row5, row6, row7,
+ q3, q2, q1, q0, row12, row13, row14, row15,
+ p3, p2, p1, p0, q0, q1, q2, q3);
+
+ thresh = (v16u8)__msa_fill_b(*thresh0);
+ vec0 = (v8i16)__msa_fill_b(*thresh1);
+ thresh = (v16u8)__msa_ilvr_d((v2i64)vec0, (v2i64)thresh);
+
+ b_limit = (v16u8)__msa_fill_b(*b_limit0);
+ vec0 = (v8i16)__msa_fill_b(*b_limit1);
+ b_limit = (v16u8)__msa_ilvr_d((v2i64)vec0, (v2i64)b_limit);
+
+ limit = (v16u8)__msa_fill_b(*limit0);
+ vec0 = (v8i16)__msa_fill_b(*limit1);
+ limit = (v16u8)__msa_ilvr_d((v2i64)vec0, (v2i64)limit);
+
+ /* mask and hev */
+ LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh,
+ hev, mask, flat);
+ /* flat4 */
+ VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
+ /* filter4 */
+ VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
+
+ if (__msa_test_bz_v(flat)) {
+ ILVR_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
+ ILVRL_H2_SH(vec1, vec0, vec2, vec3);
+ ILVL_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1);
+ ILVRL_H2_SH(vec1, vec0, vec4, vec5);
+
+ src -= 2;
+ ST4x8_UB(vec2, vec3, src, pitch);
+ src += 8 * pitch;
+ ST4x8_UB(vec4, vec5, src, pitch);
+ } else {
+ ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1,
+ zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r,
+ q3_r);
+ VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r,
+ p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r);
+
+ ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l, p2_l, p1_l, p0_l);
+ ILVL_B4_UH(zero, q0, zero, q1, zero, q2, zero, q3, q0_l, q1_l, q2_l, q3_l);
+
+ /* filter8 */
+ VP9_FILTER8(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, p2_filt8_l,
+ p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l);
+
+ /* convert 16 bit output data into 8 bit */
+ PCKEV_B4_SH(p2_filt8_l, p2_filt8_r, p1_filt8_l, p1_filt8_r, p0_filt8_l,
+ p0_filt8_r, q0_filt8_l, q0_filt8_r, p2_filt8_r, p1_filt8_r,
+ p0_filt8_r, q0_filt8_r);
+ PCKEV_B2_SH(q1_filt8_l, q1_filt8_r, q2_filt8_l, q2_filt8_r, q1_filt8_r,
+ q2_filt8_r);
+
+ /* store pixel values */
+ p2 = __msa_bmnz_v(p2, (v16u8)p2_filt8_r, flat);
+ p1 = __msa_bmnz_v(p1_out, (v16u8)p1_filt8_r, flat);
+ p0 = __msa_bmnz_v(p0_out, (v16u8)p0_filt8_r, flat);
+ q0 = __msa_bmnz_v(q0_out, (v16u8)q0_filt8_r, flat);
+ q1 = __msa_bmnz_v(q1_out, (v16u8)q1_filt8_r, flat);
+ q2 = __msa_bmnz_v(q2, (v16u8)q2_filt8_r, flat);
+
+ ILVR_B2_SH(p1, p2, q0, p0, vec0, vec1);
+ ILVRL_H2_SH(vec1, vec0, vec3, vec4);
+ ILVL_B2_SH(p1, p2, q0, p0, vec0, vec1);
+ ILVRL_H2_SH(vec1, vec0, vec6, vec7);
+ ILVRL_B2_SH(q2, q1, vec2, vec5);
+
+ src -= 3;
+ ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch);
+ ST2x4_UB(vec2, 0, src + 4, pitch);
+ src += (4 * pitch);
+ ST4x4_UB(vec4, vec4, 0, 1, 2, 3, src, pitch);
+ ST2x4_UB(vec2, 4, src + 4, pitch);
+ src += (4 * pitch);
+ ST4x4_UB(vec6, vec6, 0, 1, 2, 3, src, pitch);
+ ST2x4_UB(vec5, 0, src + 4, pitch);
+ src += (4 * pitch);
+ ST4x4_UB(vec7, vec7, 0, 1, 2, 3, src, pitch);
+ ST2x4_UB(vec5, 4, src + 4, pitch);
+ }
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_loopfilter_msa.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_loopfilter_msa.h
new file mode 100644
index 00000000000..0643e41a520
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_loopfilter_msa.h
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_COMMON_MIPS_MSA_VP9_LOOPFILTER_MSA_H_
+#define VP9_COMMON_MIPS_MSA_VP9_LOOPFILTER_MSA_H_
+
+#include "vp9/common/mips/msa/vp9_macros_msa.h"
+
+#define VP9_LPF_FILTER4_8W(p1_in, p0_in, q0_in, q1_in, mask_in, hev_in, \
+ p1_out, p0_out, q0_out, q1_out) { \
+ v16i8 p1_m, p0_m, q0_m, q1_m, q0_sub_p0, filt_sign; \
+ v16i8 filt, filt1, filt2, cnst4b, cnst3b; \
+ v8i16 q0_sub_p0_r, filt_r, cnst3h; \
+ \
+ p1_m = (v16i8)__msa_xori_b(p1_in, 0x80); \
+ p0_m = (v16i8)__msa_xori_b(p0_in, 0x80); \
+ q0_m = (v16i8)__msa_xori_b(q0_in, 0x80); \
+ q1_m = (v16i8)__msa_xori_b(q1_in, 0x80); \
+ \
+ filt = __msa_subs_s_b(p1_m, q1_m); \
+ filt = filt & (v16i8)hev_in; \
+ q0_sub_p0 = q0_m - p0_m; \
+ filt_sign = __msa_clti_s_b(filt, 0); \
+ \
+ cnst3h = __msa_ldi_h(3); \
+ q0_sub_p0_r = (v8i16)__msa_ilvr_b(q0_sub_p0, q0_sub_p0); \
+ q0_sub_p0_r = __msa_dotp_s_h((v16i8)q0_sub_p0_r, (v16i8)cnst3h); \
+ filt_r = (v8i16)__msa_ilvr_b(filt_sign, filt); \
+ filt_r += q0_sub_p0_r; \
+ filt_r = __msa_sat_s_h(filt_r, 7); \
+ \
+ /* combine left and right part */ \
+ filt = __msa_pckev_b((v16i8)filt_r, (v16i8)filt_r); \
+ \
+ filt = filt & (v16i8)mask_in; \
+ cnst4b = __msa_ldi_b(4); \
+ filt1 = __msa_adds_s_b(filt, cnst4b); \
+ filt1 >>= 3; \
+ \
+ cnst3b = __msa_ldi_b(3); \
+ filt2 = __msa_adds_s_b(filt, cnst3b); \
+ filt2 >>= 3; \
+ \
+ q0_m = __msa_subs_s_b(q0_m, filt1); \
+ q0_out = __msa_xori_b((v16u8)q0_m, 0x80); \
+ p0_m = __msa_adds_s_b(p0_m, filt2); \
+ p0_out = __msa_xori_b((v16u8)p0_m, 0x80); \
+ \
+ filt = __msa_srari_b(filt1, 1); \
+ hev_in = __msa_xori_b((v16u8)hev_in, 0xff); \
+ filt = filt & (v16i8)hev_in; \
+ \
+ q1_m = __msa_subs_s_b(q1_m, filt); \
+ q1_out = __msa_xori_b((v16u8)q1_m, 0x80); \
+ p1_m = __msa_adds_s_b(p1_m, filt); \
+ p1_out = __msa_xori_b((v16u8)p1_m, 0x80); \
+}
+
+#define VP9_LPF_FILTER4_4W(p1_in, p0_in, q0_in, q1_in, mask_in, hev_in, \
+ p1_out, p0_out, q0_out, q1_out) { \
+ v16i8 p1_m, p0_m, q0_m, q1_m, q0_sub_p0, filt_sign; \
+ v16i8 filt, filt1, filt2, cnst4b, cnst3b; \
+ v8i16 q0_sub_p0_r, q0_sub_p0_l, filt_l, filt_r, cnst3h; \
+ \
+ p1_m = (v16i8)__msa_xori_b(p1_in, 0x80); \
+ p0_m = (v16i8)__msa_xori_b(p0_in, 0x80); \
+ q0_m = (v16i8)__msa_xori_b(q0_in, 0x80); \
+ q1_m = (v16i8)__msa_xori_b(q1_in, 0x80); \
+ \
+ filt = __msa_subs_s_b(p1_m, q1_m); \
+ \
+ filt = filt & (v16i8)hev_in; \
+ \
+ q0_sub_p0 = q0_m - p0_m; \
+ filt_sign = __msa_clti_s_b(filt, 0); \
+ \
+ cnst3h = __msa_ldi_h(3); \
+ q0_sub_p0_r = (v8i16)__msa_ilvr_b(q0_sub_p0, q0_sub_p0); \
+ q0_sub_p0_r = __msa_dotp_s_h((v16i8)q0_sub_p0_r, (v16i8)cnst3h); \
+ filt_r = (v8i16)__msa_ilvr_b(filt_sign, filt); \
+ filt_r += q0_sub_p0_r; \
+ filt_r = __msa_sat_s_h(filt_r, 7); \
+ \
+ q0_sub_p0_l = (v8i16)__msa_ilvl_b(q0_sub_p0, q0_sub_p0); \
+ q0_sub_p0_l = __msa_dotp_s_h((v16i8)q0_sub_p0_l, (v16i8)cnst3h); \
+ filt_l = (v8i16)__msa_ilvl_b(filt_sign, filt); \
+ filt_l += q0_sub_p0_l; \
+ filt_l = __msa_sat_s_h(filt_l, 7); \
+ \
+ filt = __msa_pckev_b((v16i8)filt_l, (v16i8)filt_r); \
+ filt = filt & (v16i8)mask_in; \
+ \
+ cnst4b = __msa_ldi_b(4); \
+ filt1 = __msa_adds_s_b(filt, cnst4b); \
+ filt1 >>= 3; \
+ \
+ cnst3b = __msa_ldi_b(3); \
+ filt2 = __msa_adds_s_b(filt, cnst3b); \
+ filt2 >>= 3; \
+ \
+ q0_m = __msa_subs_s_b(q0_m, filt1); \
+ q0_out = __msa_xori_b((v16u8)q0_m, 0x80); \
+ p0_m = __msa_adds_s_b(p0_m, filt2); \
+ p0_out = __msa_xori_b((v16u8)p0_m, 0x80); \
+ \
+ filt = __msa_srari_b(filt1, 1); \
+ hev_in = __msa_xori_b((v16u8)hev_in, 0xff); \
+ filt = filt & (v16i8)hev_in; \
+ \
+ q1_m = __msa_subs_s_b(q1_m, filt); \
+ q1_out = __msa_xori_b((v16u8)q1_m, 0x80); \
+ p1_m = __msa_adds_s_b(p1_m, filt); \
+ p1_out = __msa_xori_b((v16u8)p1_m, 0x80); \
+}
+
+#define VP9_FLAT4(p3_in, p2_in, p0_in, q0_in, q2_in, q3_in, flat_out) { \
+ v16u8 tmp, p2_a_sub_p0, q2_a_sub_q0, p3_a_sub_p0, q3_a_sub_q0; \
+ v16u8 zero_in = { 0 }; \
+ \
+ tmp = __msa_ori_b(zero_in, 1); \
+ p2_a_sub_p0 = __msa_asub_u_b(p2_in, p0_in); \
+ q2_a_sub_q0 = __msa_asub_u_b(q2_in, q0_in); \
+ p3_a_sub_p0 = __msa_asub_u_b(p3_in, p0_in); \
+ q3_a_sub_q0 = __msa_asub_u_b(q3_in, q0_in); \
+ \
+ p2_a_sub_p0 = __msa_max_u_b(p2_a_sub_p0, q2_a_sub_q0); \
+ flat_out = __msa_max_u_b(p2_a_sub_p0, flat_out); \
+ p3_a_sub_p0 = __msa_max_u_b(p3_a_sub_p0, q3_a_sub_q0); \
+ flat_out = __msa_max_u_b(p3_a_sub_p0, flat_out); \
+ \
+ flat_out = (tmp < (v16u8)flat_out); \
+ flat_out = __msa_xori_b(flat_out, 0xff); \
+ flat_out = flat_out & (mask); \
+}
+
+#define VP9_FLAT5(p7_in, p6_in, p5_in, p4_in, p0_in, q0_in, q4_in, \
+ q5_in, q6_in, q7_in, flat_in, flat2_out) { \
+ v16u8 tmp, zero_in = { 0 }; \
+ v16u8 p4_a_sub_p0, q4_a_sub_q0, p5_a_sub_p0, q5_a_sub_q0; \
+ v16u8 p6_a_sub_p0, q6_a_sub_q0, p7_a_sub_p0, q7_a_sub_q0; \
+ \
+ tmp = __msa_ori_b(zero_in, 1); \
+ p4_a_sub_p0 = __msa_asub_u_b(p4_in, p0_in); \
+ q4_a_sub_q0 = __msa_asub_u_b(q4_in, q0_in); \
+ p5_a_sub_p0 = __msa_asub_u_b(p5_in, p0_in); \
+ q5_a_sub_q0 = __msa_asub_u_b(q5_in, q0_in); \
+ p6_a_sub_p0 = __msa_asub_u_b(p6_in, p0_in); \
+ q6_a_sub_q0 = __msa_asub_u_b(q6_in, q0_in); \
+ p7_a_sub_p0 = __msa_asub_u_b(p7_in, p0_in); \
+ q7_a_sub_q0 = __msa_asub_u_b(q7_in, q0_in); \
+ \
+ p4_a_sub_p0 = __msa_max_u_b(p4_a_sub_p0, q4_a_sub_q0); \
+ flat2_out = __msa_max_u_b(p5_a_sub_p0, q5_a_sub_q0); \
+ flat2_out = __msa_max_u_b(p4_a_sub_p0, flat2_out); \
+ p6_a_sub_p0 = __msa_max_u_b(p6_a_sub_p0, q6_a_sub_q0); \
+ flat2_out = __msa_max_u_b(p6_a_sub_p0, flat2_out); \
+ p7_a_sub_p0 = __msa_max_u_b(p7_a_sub_p0, q7_a_sub_q0); \
+ flat2_out = __msa_max_u_b(p7_a_sub_p0, flat2_out); \
+ \
+ flat2_out = (tmp < (v16u8)flat2_out); \
+ flat2_out = __msa_xori_b(flat2_out, 0xff); \
+ flat2_out = flat2_out & flat_in; \
+}
+
+#define VP9_FILTER8(p3_in, p2_in, p1_in, p0_in, \
+ q0_in, q1_in, q2_in, q3_in, \
+ p2_filt8_out, p1_filt8_out, p0_filt8_out, \
+ q0_filt8_out, q1_filt8_out, q2_filt8_out) { \
+ v8u16 tmp0, tmp1, tmp2; \
+ \
+ tmp2 = p2_in + p1_in + p0_in; \
+ tmp0 = p3_in << 1; \
+ \
+ tmp0 = tmp0 + tmp2 + q0_in; \
+ tmp1 = tmp0 + p3_in + p2_in; \
+ p2_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp1, 3); \
+ \
+ tmp1 = tmp0 + p1_in + q1_in; \
+ p1_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp1, 3); \
+ \
+ tmp1 = q2_in + q1_in + q0_in; \
+ tmp2 = tmp2 + tmp1; \
+ tmp0 = tmp2 + (p0_in); \
+ tmp0 = tmp0 + (p3_in); \
+ p0_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp0, 3); \
+ \
+ tmp0 = q2_in + q3_in; \
+ tmp0 = p0_in + tmp1 + tmp0; \
+ tmp1 = q3_in + q3_in; \
+ tmp1 = tmp1 + tmp0; \
+ q2_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp1, 3); \
+ \
+ tmp0 = tmp2 + q3_in; \
+ tmp1 = tmp0 + q0_in; \
+ q0_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp1, 3); \
+ \
+ tmp1 = tmp0 - p2_in; \
+ tmp0 = q1_in + q3_in; \
+ tmp1 = tmp0 + tmp1; \
+ q1_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp1, 3); \
+}
+
+#define LPF_MASK_HEV(p3_in, p2_in, p1_in, p0_in, \
+ q0_in, q1_in, q2_in, q3_in, \
+ limit_in, b_limit_in, thresh_in, \
+ hev_out, mask_out, flat_out) { \
+ v16u8 p3_asub_p2_m, p2_asub_p1_m, p1_asub_p0_m, q1_asub_q0_m; \
+ v16u8 p1_asub_q1_m, p0_asub_q0_m, q3_asub_q2_m, q2_asub_q1_m; \
+ \
+ /* absolute subtraction of pixel values */ \
+ p3_asub_p2_m = __msa_asub_u_b(p3_in, p2_in); \
+ p2_asub_p1_m = __msa_asub_u_b(p2_in, p1_in); \
+ p1_asub_p0_m = __msa_asub_u_b(p1_in, p0_in); \
+ q1_asub_q0_m = __msa_asub_u_b(q1_in, q0_in); \
+ q2_asub_q1_m = __msa_asub_u_b(q2_in, q1_in); \
+ q3_asub_q2_m = __msa_asub_u_b(q3_in, q2_in); \
+ p0_asub_q0_m = __msa_asub_u_b(p0_in, q0_in); \
+ p1_asub_q1_m = __msa_asub_u_b(p1_in, q1_in); \
+ \
+ /* calculation of hev */ \
+ flat_out = __msa_max_u_b(p1_asub_p0_m, q1_asub_q0_m); \
+ hev_out = thresh_in < (v16u8)flat_out; \
+ \
+ /* calculation of mask */ \
+ p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p0_asub_q0_m); \
+ p1_asub_q1_m >>= 1; \
+ p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p1_asub_q1_m); \
+ \
+ mask_out = b_limit_in < p0_asub_q0_m; \
+ mask_out = __msa_max_u_b(flat_out, mask_out); \
+ p3_asub_p2_m = __msa_max_u_b(p3_asub_p2_m, p2_asub_p1_m); \
+ mask_out = __msa_max_u_b(p3_asub_p2_m, mask_out); \
+ q2_asub_q1_m = __msa_max_u_b(q2_asub_q1_m, q3_asub_q2_m); \
+ mask_out = __msa_max_u_b(q2_asub_q1_m, mask_out); \
+ \
+ mask_out = limit_in < (v16u8)mask_out; \
+ mask_out = __msa_xori_b(mask_out, 0xff); \
+}
+#endif /* VP9_COMMON_MIPS_MSA_VP9_LOOPFILTER_MSA_H_ */
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_macros_msa.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_macros_msa.h
index d7aabbb8898..43850758c24 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_macros_msa.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_macros_msa.h
@@ -16,852 +16,1971 @@
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
-#if HAVE_MSA
-/* load macros */
-#define LOAD_UB(psrc) *((const v16u8 *)(psrc))
-#define LOAD_SB(psrc) *((const v16i8 *)(psrc))
-#define LOAD_UH(psrc) *((const v8u16 *)(psrc))
-#define LOAD_SH(psrc) *((const v8i16 *)(psrc))
-#define LOAD_UW(psrc) *((const v4u32 *)(psrc))
-#define LOAD_SW(psrc) *((const v4i32 *)(psrc))
-#define LOAD_UD(psrc) *((const v2u64 *)(psrc))
-#define LOAD_SD(psrc) *((const v2i64 *)(psrc))
-
-/* store macros */
-#define STORE_UB(vec, pdest) *((v16u8 *)(pdest)) = (vec)
-#define STORE_SB(vec, pdest) *((v16i8 *)(pdest)) = (vec)
-#define STORE_UH(vec, pdest) *((v8u16 *)(pdest)) = (vec)
-#define STORE_SH(vec, pdest) *((v8i16 *)(pdest)) = (vec)
-#define STORE_UW(vec, pdest) *((v4u32 *)(pdest)) = (vec)
-#define STORE_SW(vec, pdest) *((v4i32 *)(pdest)) = (vec)
-#define STORE_UD(vec, pdest) *((v2u64 *)(pdest)) = (vec)
-#define STORE_SD(vec, pdest) *((v2i64 *)(pdest)) = (vec)
+#define LD_B(RTYPE, psrc) *((const RTYPE *)(psrc))
+#define LD_UB(...) LD_B(v16u8, __VA_ARGS__)
+#define LD_SB(...) LD_B(v16i8, __VA_ARGS__)
+
+#define LD_H(RTYPE, psrc) *((const RTYPE *)(psrc))
+#define LD_UH(...) LD_H(v8u16, __VA_ARGS__)
+#define LD_SH(...) LD_H(v8i16, __VA_ARGS__)
+
+#define LD_W(RTYPE, psrc) *((const RTYPE *)(psrc))
+#define LD_SW(...) LD_W(v4i32, __VA_ARGS__)
+
+#define ST_B(RTYPE, in, pdst) *((RTYPE *)(pdst)) = (in)
+#define ST_UB(...) ST_B(v16u8, __VA_ARGS__)
+#define ST_SB(...) ST_B(v16i8, __VA_ARGS__)
+
+#define ST_H(RTYPE, in, pdst) *((RTYPE *)(pdst)) = (in)
+#define ST_SH(...) ST_H(v8i16, __VA_ARGS__)
+
+#define ST_W(RTYPE, in, pdst) *((RTYPE *)(pdst)) = (in)
+#define ST_SW(...) ST_W(v4i32, __VA_ARGS__)
#if (__mips_isa_rev >= 6)
-#define LOAD_WORD(psrc) ({ \
- const uint8_t *src_m = (const uint8_t *)(psrc); \
- uint32_t val_m; \
- \
- __asm__ __volatile__ ( \
- "lw %[val_m], %[src_m] \n\t" \
- \
- : [val_m] "=r" (val_m) \
- : [src_m] "m" (*src_m) \
- ); \
- \
- val_m; \
+#define LH(psrc) ({ \
+ const uint8_t *psrc_m = (const uint8_t *)(psrc); \
+ uint16_t val_m; \
+ \
+ __asm__ __volatile__ ( \
+ "lh %[val_m], %[psrc_m] \n\t" \
+ \
+ : [val_m] "=r" (val_m) \
+ : [psrc_m] "m" (*psrc_m) \
+ ); \
+ \
+ val_m; \
+})
+
+#define LW(psrc) ({ \
+ const uint8_t *psrc_m = (const uint8_t *)(psrc); \
+ uint32_t val_m; \
+ \
+ __asm__ __volatile__ ( \
+ "lw %[val_m], %[psrc_m] \n\t" \
+ \
+ : [val_m] "=r" (val_m) \
+ : [psrc_m] "m" (*psrc_m) \
+ ); \
+ \
+ val_m; \
})
#if (__mips == 64)
-#define LOAD_DWORD(psrc) ({ \
- const uint8_t *src_m = (const uint8_t *)(psrc); \
- uint64_t val_m = 0; \
- \
- __asm__ __volatile__ ( \
- "ld %[val_m], %[src_m] \n\t" \
- \
- : [val_m] "=r" (val_m) \
- : [src_m] "m" (*src_m) \
- ); \
- \
- val_m; \
+#define LD(psrc) ({ \
+ const uint8_t *psrc_m = (const uint8_t *)(psrc); \
+ uint64_t val_m = 0; \
+ \
+ __asm__ __volatile__ ( \
+ "ld %[val_m], %[psrc_m] \n\t" \
+ \
+ : [val_m] "=r" (val_m) \
+ : [psrc_m] "m" (*psrc_m) \
+ ); \
+ \
+ val_m; \
})
#else // !(__mips == 64)
-#define LOAD_DWORD(psrc) ({ \
- const uint8_t *src1_m = (const uint8_t *)(psrc); \
- const uint8_t *src2_m = ((const uint8_t *)(psrc)) + 4; \
- uint32_t val0_m, val1_m; \
- uint64_t genval_m = 0; \
- \
- __asm__ __volatile__ ( \
- "lw %[val0_m], %[src1_m] \n\t" \
- \
- : [val0_m] "=r" (val0_m) \
- : [src1_m] "m" (*src1_m) \
- ); \
- \
- __asm__ __volatile__ ( \
- "lw %[val1_m], %[src2_m] \n\t" \
- \
- : [val1_m] "=r" (val1_m) \
- : [src2_m] "m" (*src2_m) \
- ); \
- \
- genval_m = (uint64_t)(val1_m); \
- genval_m = (uint64_t)((genval_m << 32) & 0xFFFFFFFF00000000); \
- genval_m = (uint64_t)(genval_m | (uint64_t)val0_m); \
- \
- genval_m; \
+#define LD(psrc) ({ \
+ const uint8_t *psrc_m = (const uint8_t *)(psrc); \
+ uint32_t val0_m, val1_m; \
+ uint64_t val_m = 0; \
+ \
+ val0_m = LW(psrc_m); \
+ val1_m = LW(psrc_m + 4); \
+ \
+ val_m = (uint64_t)(val1_m); \
+ val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); \
+ val_m = (uint64_t)(val_m | (uint64_t)val0_m); \
+ \
+ val_m; \
})
#endif // (__mips == 64)
-#define STORE_WORD_WITH_OFFSET_1(pdst, val) { \
- uint8_t *dst_ptr_m = ((uint8_t *)(pdst)) + 1; \
- const uint32_t val_m = (val); \
- \
- __asm__ __volatile__ ( \
- "sw %[val_m], %[dst_ptr_m] \n\t" \
- \
- : [dst_ptr_m] "=m" (*dst_ptr_m) \
- : [val_m] "r" (val_m) \
- ); \
-}
-
-#define STORE_WORD(pdst, val) { \
- uint8_t *dst_ptr_m = (uint8_t *)(pdst); \
- const uint32_t val_m = (val); \
- \
- __asm__ __volatile__ ( \
- "sw %[val_m], %[dst_ptr_m] \n\t" \
- \
- : [dst_ptr_m] "=m" (*dst_ptr_m) \
- : [val_m] "r" (val_m) \
- ); \
-}
-
-#define STORE_DWORD(pdst, val) { \
- uint8_t *dst_ptr_m = (uint8_t *)(pdst); \
- const uint64_t val_m = (val); \
- \
- __asm__ __volatile__ ( \
- "sd %[val_m], %[dst_ptr_m] \n\t" \
- \
- : [dst_ptr_m] "=m" (*dst_ptr_m) \
- : [val_m] "r" (val_m) \
- ); \
+
+#define SH(val, pdst) { \
+ uint8_t *pdst_m = (uint8_t *)(pdst); \
+ const uint16_t val_m = (val); \
+ \
+ __asm__ __volatile__ ( \
+ "sh %[val_m], %[pdst_m] \n\t" \
+ \
+ : [pdst_m] "=m" (*pdst_m) \
+ : [val_m] "r" (val_m) \
+ ); \
+}
+
+#define SW(val, pdst) { \
+ uint8_t *pdst_m = (uint8_t *)(pdst); \
+ const uint32_t val_m = (val); \
+ \
+ __asm__ __volatile__ ( \
+ "sw %[val_m], %[pdst_m] \n\t" \
+ \
+ : [pdst_m] "=m" (*pdst_m) \
+ : [val_m] "r" (val_m) \
+ ); \
+}
+
+#define SD(val, pdst) { \
+ uint8_t *pdst_m = (uint8_t *)(pdst); \
+ const uint64_t val_m = (val); \
+ \
+ __asm__ __volatile__ ( \
+ "sd %[val_m], %[pdst_m] \n\t" \
+ \
+ : [pdst_m] "=m" (*pdst_m) \
+ : [val_m] "r" (val_m) \
+ ); \
}
#else // !(__mips_isa_rev >= 6)
-#define LOAD_WORD(psrc) ({ \
- const uint8_t *src_m = (const uint8_t *)(psrc); \
- uint32_t val_m; \
- \
- __asm__ __volatile__ ( \
- "ulw %[val_m], %[src_m] \n\t" \
- \
- : [val_m] "=r" (val_m) \
- : [src_m] "m" (*src_m) \
- ); \
- \
- val_m; \
+#define LH(psrc) ({ \
+ const uint8_t *psrc_m = (const uint8_t *)(psrc); \
+ uint16_t val_m; \
+ \
+ __asm__ __volatile__ ( \
+ "ulh %[val_m], %[psrc_m] \n\t" \
+ \
+ : [val_m] "=r" (val_m) \
+ : [psrc_m] "m" (*psrc_m) \
+ ); \
+ \
+ val_m; \
+})
+
+#define LW(psrc) ({ \
+ const uint8_t *psrc_m = (const uint8_t *)(psrc); \
+ uint32_t val_m; \
+ \
+ __asm__ __volatile__ ( \
+ "ulw %[val_m], %[psrc_m] \n\t" \
+ \
+ : [val_m] "=r" (val_m) \
+ : [psrc_m] "m" (*psrc_m) \
+ ); \
+ \
+ val_m; \
})
#if (__mips == 64)
-#define LOAD_DWORD(psrc) ({ \
- const uint8_t *src_m = (const uint8_t *)(psrc); \
- uint64_t val_m = 0; \
- \
- __asm__ __volatile__ ( \
- "uld %[val_m], %[src_m] \n\t" \
- \
- : [val_m] "=r" (val_m) \
- : [src_m] "m" (*src_m) \
- ); \
- \
- val_m; \
+#define LD(psrc) ({ \
+ const uint8_t *psrc_m = (const uint8_t *)(psrc); \
+ uint64_t val_m = 0; \
+ \
+ __asm__ __volatile__ ( \
+ "uld %[val_m], %[psrc_m] \n\t" \
+ \
+ : [val_m] "=r" (val_m) \
+ : [psrc_m] "m" (*psrc_m) \
+ ); \
+ \
+ val_m; \
})
#else // !(__mips == 64)
-#define LOAD_DWORD(psrc) ({ \
- const uint8_t *src1_m = (const uint8_t *)(psrc); \
- const uint8_t *src2_m = ((const uint8_t *)(psrc)) + 4; \
- uint32_t val0_m, val1_m; \
- uint64_t genval_m = 0; \
- \
- __asm__ __volatile__ ( \
- "ulw %[val0_m], %[src1_m] \n\t" \
- \
- : [val0_m] "=r" (val0_m) \
- : [src1_m] "m" (*src1_m) \
- ); \
- \
- __asm__ __volatile__ ( \
- "ulw %[val1_m], %[src2_m] \n\t" \
- \
- : [val1_m] "=r" (val1_m) \
- : [src2_m] "m" (*src2_m) \
- ); \
- \
- genval_m = (uint64_t)(val1_m); \
- genval_m = (uint64_t)((genval_m << 32) & 0xFFFFFFFF00000000); \
- genval_m = (uint64_t)(genval_m | (uint64_t)val0_m); \
- \
- genval_m; \
+#define LD(psrc) ({ \
+ const uint8_t *psrc_m1 = (const uint8_t *)(psrc); \
+ uint32_t val0_m, val1_m; \
+ uint64_t val_m = 0; \
+ \
+ val0_m = LW(psrc_m1); \
+ val1_m = LW(psrc_m1 + 4); \
+ \
+ val_m = (uint64_t)(val1_m); \
+ val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); \
+ val_m = (uint64_t)(val_m | (uint64_t)val0_m); \
+ \
+ val_m; \
})
#endif // (__mips == 64)
-#define STORE_WORD_WITH_OFFSET_1(pdst, val) { \
- uint8_t *dst_ptr_m = ((uint8_t *)(pdst)) + 1; \
- const uint32_t val_m = (val); \
- \
- __asm__ __volatile__ ( \
- "usw %[val_m], %[dst_ptr_m] \n\t" \
- \
- : [dst_ptr_m] "=m" (*dst_ptr_m) \
- : [val_m] "r" (val_m) \
- ); \
+#define SH(val, pdst) { \
+ uint8_t *pdst_m = (uint8_t *)(pdst); \
+ const uint16_t val_m = (val); \
+ \
+ __asm__ __volatile__ ( \
+ "ush %[val_m], %[pdst_m] \n\t" \
+ \
+ : [pdst_m] "=m" (*pdst_m) \
+ : [val_m] "r" (val_m) \
+ ); \
}
-#define STORE_WORD(pdst, val) { \
- uint8_t *dst_ptr_m = (uint8_t *)(pdst); \
- const uint32_t val_m = (val); \
- \
- __asm__ __volatile__ ( \
- "usw %[val_m], %[dst_ptr_m] \n\t" \
- \
- : [dst_ptr_m] "=m" (*dst_ptr_m) \
- : [val_m] "r" (val_m) \
- ); \
+#define SW(val, pdst) { \
+ uint8_t *pdst_m = (uint8_t *)(pdst); \
+ const uint32_t val_m = (val); \
+ \
+ __asm__ __volatile__ ( \
+ "usw %[val_m], %[pdst_m] \n\t" \
+ \
+ : [pdst_m] "=m" (*pdst_m) \
+ : [val_m] "r" (val_m) \
+ ); \
}
-#define STORE_DWORD(pdst, val) { \
- uint8_t *dst1_m = (uint8_t *)(pdst); \
- uint8_t *dst2_m = ((uint8_t *)(pdst)) + 4; \
+#define SD(val, pdst) { \
+ uint8_t *pdst_m1 = (uint8_t *)(pdst); \
uint32_t val0_m, val1_m; \
\
val0_m = (uint32_t)((val) & 0x00000000FFFFFFFF); \
val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \
\
- __asm__ __volatile__ ( \
- "usw %[val0_m], %[dst1_m] \n\t" \
- "usw %[val1_m], %[dst2_m] \n\t" \
- \
- : [dst1_m] "=m" (*dst1_m), [dst2_m] "=m" (*dst2_m) \
- : [val0_m] "r" (val0_m), [val1_m] "r" (val1_m) \
- ); \
+ SW(val0_m, pdst_m1); \
+ SW(val1_m, pdst_m1 + 4); \
}
#endif // (__mips_isa_rev >= 6)
-#define LOAD_2VECS_UB(psrc, stride, \
- val0, val1) { \
- val0 = LOAD_UB(psrc + 0 * stride); \
- val1 = LOAD_UB(psrc + 1 * stride); \
-}
-
-#define LOAD_4VECS_UB(psrc, stride, \
- val0, val1, val2, val3) { \
- val0 = LOAD_UB(psrc + 0 * stride); \
- val1 = LOAD_UB(psrc + 1 * stride); \
- val2 = LOAD_UB(psrc + 2 * stride); \
- val3 = LOAD_UB(psrc + 3 * stride); \
-}
-
-#define LOAD_4VECS_SB(psrc, stride, \
- val0, val1, val2, val3) { \
- val0 = LOAD_SB(psrc + 0 * stride); \
- val1 = LOAD_SB(psrc + 1 * stride); \
- val2 = LOAD_SB(psrc + 2 * stride); \
- val3 = LOAD_SB(psrc + 3 * stride); \
-}
-
-#define LOAD_5VECS_UB(psrc, stride, \
- out0, out1, out2, out3, out4) { \
- LOAD_4VECS_UB((psrc), (stride), \
- (out0), (out1), (out2), (out3)); \
- out4 = LOAD_UB(psrc + 4 * stride); \
+/* Description : Load 4 words with stride
+ Arguments : Inputs - psrc (source pointer to load from)
+ - stride
+ Outputs - out0, out1, out2, out3
+ Details : Loads word in 'out0' from (psrc)
+ Loads word in 'out1' from (psrc + stride)
+ Loads word in 'out2' from (psrc + 2 * stride)
+ Loads word in 'out3' from (psrc + 3 * stride)
+*/
+#define LW4(psrc, stride, out0, out1, out2, out3) { \
+ out0 = LW((psrc)); \
+ out1 = LW((psrc) + stride); \
+ out2 = LW((psrc) + 2 * stride); \
+ out3 = LW((psrc) + 3 * stride); \
+}
+
+/* Description : Load double words with stride
+ Arguments : Inputs - psrc (source pointer to load from)
+ - stride
+ Outputs - out0, out1
+ Details : Loads double word in 'out0' from (psrc)
+ Loads double word in 'out1' from (psrc + stride)
+*/
+#define LD2(psrc, stride, out0, out1) { \
+ out0 = LD((psrc)); \
+ out1 = LD((psrc) + stride); \
+}
+#define LD4(psrc, stride, out0, out1, out2, out3) { \
+ LD2((psrc), stride, out0, out1); \
+ LD2((psrc) + 2 * stride, stride, out2, out3); \
+}
+
+/* Description : Store 4 words with stride
+ Arguments : Inputs - in0, in1, in2, in3, pdst, stride
+ Details : Stores word from 'in0' to (pdst)
+ Stores word from 'in1' to (pdst + stride)
+ Stores word from 'in2' to (pdst + 2 * stride)
+ Stores word from 'in3' to (pdst + 3 * stride)
+*/
+#define SW4(in0, in1, in2, in3, pdst, stride) { \
+ SW(in0, (pdst)) \
+ SW(in1, (pdst) + stride); \
+ SW(in2, (pdst) + 2 * stride); \
+ SW(in3, (pdst) + 3 * stride); \
+}
+
+/* Description : Store 4 double words with stride
+ Arguments : Inputs - in0, in1, in2, in3, pdst, stride
+ Details : Stores double word from 'in0' to (pdst)
+ Stores double word from 'in1' to (pdst + stride)
+ Stores double word from 'in2' to (pdst + 2 * stride)
+ Stores double word from 'in3' to (pdst + 3 * stride)
+*/
+#define SD4(in0, in1, in2, in3, pdst, stride) { \
+ SD(in0, (pdst)) \
+ SD(in1, (pdst) + stride); \
+ SD(in2, (pdst) + 2 * stride); \
+ SD(in3, (pdst) + 3 * stride); \
+}
+
+/* Description : Load vectors with 16 byte elements with stride
+ Arguments : Inputs - psrc (source pointer to load from)
+ - stride
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Loads 16 byte elements in 'out0' from (psrc)
+ Loads 16 byte elements in 'out1' from (psrc + stride)
+*/
+#define LD_B2(RTYPE, psrc, stride, out0, out1) { \
+ out0 = LD_B(RTYPE, (psrc)); \
+ out1 = LD_B(RTYPE, (psrc) + stride); \
+}
+#define LD_UB2(...) LD_B2(v16u8, __VA_ARGS__)
+#define LD_SB2(...) LD_B2(v16i8, __VA_ARGS__)
+
+#define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) { \
+ LD_B2(RTYPE, (psrc), stride, out0, out1); \
+ LD_B2(RTYPE, (psrc) + 2 * stride , stride, out2, out3); \
+}
+#define LD_UB4(...) LD_B4(v16u8, __VA_ARGS__)
+#define LD_SB4(...) LD_B4(v16i8, __VA_ARGS__)
+
+#define LD_B5(RTYPE, psrc, stride, out0, out1, out2, out3, out4) { \
+ LD_B4(RTYPE, (psrc), stride, out0, out1, out2, out3); \
+ out4 = LD_B(RTYPE, (psrc) + 4 * stride); \
+}
+#define LD_UB5(...) LD_B5(v16u8, __VA_ARGS__)
+#define LD_SB5(...) LD_B5(v16i8, __VA_ARGS__)
+
+#define LD_B7(RTYPE, psrc, stride, \
+ out0, out1, out2, out3, out4, out5, out6) { \
+ LD_B5(RTYPE, (psrc), stride, out0, out1, out2, out3, out4); \
+ LD_B2(RTYPE, (psrc) + 5 * stride, stride, out5, out6); \
+}
+#define LD_SB7(...) LD_B7(v16i8, __VA_ARGS__)
+
+#define LD_B8(RTYPE, psrc, stride, \
+ out0, out1, out2, out3, out4, out5, out6, out7) { \
+ LD_B4(RTYPE, (psrc), stride, out0, out1, out2, out3); \
+ LD_B4(RTYPE, (psrc) + 4 * stride, stride, out4, out5, out6, out7); \
+}
+#define LD_UB8(...) LD_B8(v16u8, __VA_ARGS__)
+#define LD_SB8(...) LD_B8(v16i8, __VA_ARGS__)
+
+/* Description : Load vectors with 8 halfword elements with stride
+ Arguments : Inputs - psrc (source pointer to load from)
+ - stride
+ Outputs - out0, out1
+ Details : Loads 8 halfword elements in 'out0' from (psrc)
+ Loads 8 halfword elements in 'out1' from (psrc + stride)
+*/
+#define LD_H2(RTYPE, psrc, stride, out0, out1) { \
+ out0 = LD_H(RTYPE, (psrc)); \
+ out1 = LD_H(RTYPE, (psrc) + (stride)); \
+}
+#define LD_SH2(...) LD_H2(v8i16, __VA_ARGS__)
+
+#define LD_H4(RTYPE, psrc, stride, out0, out1, out2, out3) { \
+ LD_H2(RTYPE, (psrc), stride, out0, out1); \
+ LD_H2(RTYPE, (psrc) + 2 * stride, stride, out2, out3); \
+}
+#define LD_SH4(...) LD_H4(v8i16, __VA_ARGS__)
+
+#define LD_H8(RTYPE, psrc, stride, \
+ out0, out1, out2, out3, out4, out5, out6, out7) { \
+ LD_H4(RTYPE, (psrc), stride, out0, out1, out2, out3); \
+ LD_H4(RTYPE, (psrc) + 4 * stride, stride, out4, out5, out6, out7); \
+}
+#define LD_SH8(...) LD_H8(v8i16, __VA_ARGS__)
+
+#define LD_H16(RTYPE, psrc, stride, \
+ out0, out1, out2, out3, out4, out5, out6, out7, \
+ out8, out9, out10, out11, out12, out13, out14, out15) { \
+ LD_H8(RTYPE, (psrc), stride, \
+ out0, out1, out2, out3, out4, out5, out6, out7); \
+ LD_H8(RTYPE, (psrc) + 8 * stride, stride, \
+ out8, out9, out10, out11, out12, out13, out14, out15); \
+}
+#define LD_SH16(...) LD_H16(v8i16, __VA_ARGS__)
+
+/* Description : Load as 4x4 block of signed halfword elements from 1D source
+ data into 4 vectors (Each vector with 4 signed halfwords)
+ Arguments : Inputs - psrc
+ Outputs - out0, out1, out2, out3
+*/
+#define LD4x4_SH(psrc, out0, out1, out2, out3) { \
+ out0 = LD_SH(psrc); \
+ out2 = LD_SH(psrc + 8); \
+ out1 = (v8i16)__msa_ilvl_d((v2i64)out0, (v2i64)out0); \
+ out3 = (v8i16)__msa_ilvl_d((v2i64)out2, (v2i64)out2); \
+}
+
+/* Description : Load 2 vectors of signed word elements with stride
+ Arguments : Inputs - psrc (source pointer to load from)
+ - stride
+ Outputs - out0, out1
+ Return Type - signed word
+*/
+#define LD_SW2(psrc, stride, out0, out1) { \
+ out0 = LD_SW((psrc)); \
+ out1 = LD_SW((psrc) + stride); \
+}
+
+/* Description : Store vectors of 16 byte elements with stride
+ Arguments : Inputs - in0, in1, stride
+ Outputs - pdst (destination pointer to store to)
+ Details : Stores 16 byte elements from 'in0' to (pdst)
+ Stores 16 byte elements from 'in1' to (pdst + stride)
+*/
+#define ST_B2(RTYPE, in0, in1, pdst, stride) { \
+ ST_B(RTYPE, in0, (pdst)); \
+ ST_B(RTYPE, in1, (pdst) + stride); \
+}
+#define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__)
+
+#define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) { \
+ ST_B2(RTYPE, in0, in1, (pdst), stride); \
+ ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \
+}
+#define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__)
+
+#define ST_B8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ pdst, stride) { \
+ ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride); \
+ ST_B4(RTYPE, in4, in5, in6, in7, (pdst) + 4 * stride, stride); \
+}
+#define ST_UB8(...) ST_B8(v16u8, __VA_ARGS__)
+
+/* Description : Store vectors of 8 halfword elements with stride
+ Arguments : Inputs - in0, in1, stride
+ Outputs - pdst (destination pointer to store to)
+ Details : Stores 8 halfword elements from 'in0' to (pdst)
+ Stores 8 halfword elements from 'in1' to (pdst + stride)
+*/
+#define ST_H2(RTYPE, in0, in1, pdst, stride) { \
+ ST_H(RTYPE, in0, (pdst)); \
+ ST_H(RTYPE, in1, (pdst) + stride); \
+}
+#define ST_SH2(...) ST_H2(v8i16, __VA_ARGS__)
+
+#define ST_H4(RTYPE, in0, in1, in2, in3, pdst, stride) { \
+ ST_H2(RTYPE, in0, in1, (pdst), stride); \
+ ST_H2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \
+}
+#define ST_SH4(...) ST_H4(v8i16, __VA_ARGS__)
+
+#define ST_H8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride) { \
+ ST_H4(RTYPE, in0, in1, in2, in3, (pdst), stride); \
+ ST_H4(RTYPE, in4, in5, in6, in7, (pdst) + 4 * stride, stride); \
+}
+#define ST_SH8(...) ST_H8(v8i16, __VA_ARGS__)
+
+/* Description : Store vectors of word elements with stride
+ Arguments : Inputs - in0, in1, stride
+ - pdst (destination pointer to store to)
+ Details : Store 4 word elements from 'in0' to (pdst)
+ Store 4 word elements from 'in1' to (pdst + stride)
+*/
+#define ST_SW2(in0, in1, pdst, stride) { \
+ ST_SW(in0, (pdst)); \
+ ST_SW(in1, (pdst) + stride); \
+}
+
+/* Description : Store as 2x4 byte block to destination memory from input vector
+ Arguments : Inputs - in, stidx, pdst, stride
+ Return Type - unsigned byte
+ Details : Index stidx halfword element from 'in' vector is copied and
+ stored on first line
+ Index stidx+1 halfword element from 'in' vector is copied and
+ stored on second line
+ Index stidx+2 halfword element from 'in' vector is copied and
+ stored on third line
+ Index stidx+3 halfword element from 'in' vector is copied and
+ stored on fourth line
+*/
+#define ST2x4_UB(in, stidx, pdst, stride) { \
+ uint16_t out0_m, out1_m, out2_m, out3_m; \
+ uint8_t *pblk_2x4_m = (uint8_t *)(pdst); \
+ \
+ out0_m = __msa_copy_u_h((v8i16)in, (stidx)); \
+ out1_m = __msa_copy_u_h((v8i16)in, (stidx + 1)); \
+ out2_m = __msa_copy_u_h((v8i16)in, (stidx + 2)); \
+ out3_m = __msa_copy_u_h((v8i16)in, (stidx + 3)); \
+ \
+ SH(out0_m, pblk_2x4_m); \
+ SH(out1_m, pblk_2x4_m + stride); \
+ SH(out2_m, pblk_2x4_m + 2 * stride); \
+ SH(out3_m, pblk_2x4_m + 3 * stride); \
+}
+
+/* Description : Store 4x2 byte block to destination memory from input vector
+ Arguments : Inputs - in, pdst, stride
+ Details : Index 0 word element from 'in' vector is copied to a GP
+ register and stored to (pdst)
+ Index 1 word element from 'in' vector is copied to a GP
+ register and stored to (pdst + stride)
+*/
+#define ST4x2_UB(in, pdst, stride) { \
+ uint32_t out0_m, out1_m; \
+ uint8_t *pblk_4x2_m = (uint8_t *)(pdst); \
+ \
+ out0_m = __msa_copy_u_w((v4i32)in, 0); \
+ out1_m = __msa_copy_u_w((v4i32)in, 1); \
+ \
+ SW(out0_m, pblk_4x2_m); \
+ SW(out1_m, pblk_4x2_m + stride); \
+}
+
+/* Description : Store as 4x4 byte block to destination memory from input vector
+ Arguments : Inputs - in0, in1, pdst, stride
+ Return Type - unsigned byte
+ Details : Idx0 word element from input vector 'in0' is copied and stored
+ on first line
+ Idx1 word element from input vector 'in0' is copied and stored
+ on second line
+ Idx2 word element from input vector 'in1' is copied and stored
+ on third line
+ Idx3 word element from input vector 'in1' is copied and stored
+ on fourth line
+*/
+#define ST4x4_UB(in0, in1, idx0, idx1, idx2, idx3, pdst, stride) { \
+ uint32_t out0_m, out1_m, out2_m, out3_m; \
+ uint8_t *pblk_4x4_m = (uint8_t *)(pdst); \
+ \
+ out0_m = __msa_copy_u_w((v4i32)in0, idx0); \
+ out1_m = __msa_copy_u_w((v4i32)in0, idx1); \
+ out2_m = __msa_copy_u_w((v4i32)in1, idx2); \
+ out3_m = __msa_copy_u_w((v4i32)in1, idx3); \
+ \
+ SW4(out0_m, out1_m, out2_m, out3_m, pblk_4x4_m, stride); \
+}
+#define ST4x8_UB(in0, in1, pdst, stride) { \
+ uint8_t *pblk_4x8 = (uint8_t *)(pdst); \
+ \
+ ST4x4_UB(in0, in0, 0, 1, 2, 3, pblk_4x8, stride); \
+ ST4x4_UB(in1, in1, 0, 1, 2, 3, pblk_4x8 + 4 * stride, stride); \
}
-#define LOAD_5VECS_SB(psrc, stride, \
- out0, out1, out2, out3, out4) { \
- LOAD_4VECS_SB((psrc), (stride), \
- (out0), (out1), (out2), (out3)); \
- out4 = LOAD_SB(psrc + 4 * stride); \
-}
+/* Description : Store as 8x1 byte block to destination memory from input vector
+ Arguments : Inputs - in, pdst
+ Details : Index 0 double word element from input vector 'in' is copied
+ and stored to destination memory at (pdst)
+*/
+#define ST8x1_UB(in, pdst) { \
+ uint64_t out0_m; \
+ \
+ out0_m = __msa_copy_u_d((v2i64)in, 0); \
+ SD(out0_m, pdst); \
+}
-#define LOAD_7VECS_SB(psrc, stride, \
- val0, val1, val2, val3, \
- val4, val5, val6) { \
- val0 = LOAD_SB((psrc) + 0 * (stride)); \
- val1 = LOAD_SB((psrc) + 1 * (stride)); \
- val2 = LOAD_SB((psrc) + 2 * (stride)); \
- val3 = LOAD_SB((psrc) + 3 * (stride)); \
- val4 = LOAD_SB((psrc) + 4 * (stride)); \
- val5 = LOAD_SB((psrc) + 5 * (stride)); \
- val6 = LOAD_SB((psrc) + 6 * (stride)); \
-}
-
-#define LOAD_8VECS_UB(psrc, stride, \
- out0, out1, out2, out3, \
- out4, out5, out6, out7) { \
- LOAD_4VECS_UB((psrc), (stride), \
- (out0), (out1), (out2), (out3)); \
- LOAD_4VECS_UB((psrc + 4 * stride), (stride), \
- (out4), (out5), (out6), (out7)); \
-}
-
-#define LOAD_8VECS_SB(psrc, stride, \
- out0, out1, out2, out3, \
- out4, out5, out6, out7) { \
- LOAD_4VECS_SB((psrc), (stride), \
- (out0), (out1), (out2), (out3)); \
- LOAD_4VECS_SB((psrc + 4 * stride), (stride), \
- (out4), (out5), (out6), (out7)); \
-}
-
-#define LOAD_2VECS_SH(psrc, stride, \
- val0, val1) { \
- val0 = LOAD_SH((psrc) + 0 * (stride)); \
- val1 = LOAD_SH((psrc) + 1 * (stride)); \
-}
-
-#define LOAD_4VECS_SH(psrc, stride, \
- val0, val1, val2, val3) { \
- LOAD_2VECS_SH((psrc), (stride), val0, val1); \
- LOAD_2VECS_SH((psrc + 2 * stride), (stride), val2, val3); \
-}
-
-#define LOAD_8VECS_SH(psrc, stride, \
- val0, val1, val2, val3, \
- val4, val5, val6, val7) { \
- LOAD_4VECS_SH((psrc), (stride), \
- val0, val1, val2, val3); \
- LOAD_4VECS_SH((psrc + 4 * stride), (stride), \
- val4, val5, val6, val7); \
-}
-
-#define LOAD_16VECS_SH(psrc, stride, \
- val0, val1, val2, val3, \
- val4, val5, val6, val7, \
- val8, val9, val10, val11, \
- val12, val13, val14, val15) { \
- LOAD_8VECS_SH((psrc), (stride), \
- val0, val1, val2, val3, \
- val4, val5, val6, val7); \
- LOAD_8VECS_SH((psrc + 8 * (stride)), (stride), \
- val8, val9, val10, val11, \
- val12, val13, val14, val15); \
-}
-
-#define STORE_4VECS_UB(dst_out, pitch, \
- in0, in1, in2, in3) { \
- STORE_UB((in0), (dst_out)); \
- STORE_UB((in1), ((dst_out) + (pitch))); \
- STORE_UB((in2), ((dst_out) + 2 * (pitch))); \
- STORE_UB((in3), ((dst_out) + 3 * (pitch))); \
-}
-
-#define STORE_8VECS_UB(dst_out, pitch_in, \
- in0, in1, in2, in3, \
- in4, in5, in6, in7) { \
- STORE_4VECS_UB(dst_out, pitch_in, \
- in0, in1, in2, in3); \
- STORE_4VECS_UB((dst_out + 4 * (pitch_in)), pitch_in, \
- in4, in5, in6, in7); \
-}
-
-#define VEC_INSERT_4W_UB(src, src0, src1, src2, src3) { \
- src = (v16u8)__msa_insert_w((v4i32)(src), 0, (src0)); \
- src = (v16u8)__msa_insert_w((v4i32)(src), 1, (src1)); \
- src = (v16u8)__msa_insert_w((v4i32)(src), 2, (src2)); \
- src = (v16u8)__msa_insert_w((v4i32)(src), 3, (src3)); \
-}
-
-#define VEC_INSERT_2DW_UB(src, src0, src1) { \
- src = (v16u8)__msa_insert_d((v2i64)(src), 0, (src0)); \
- src = (v16u8)__msa_insert_d((v2i64)(src), 1, (src1)); \
-}
-
-#define STORE_4VECS_SH(ptr, stride, \
- in0, in1, in2, in3) { \
- STORE_SH(in0, ((ptr) + 0 * stride)); \
- STORE_SH(in1, ((ptr) + 1 * stride)); \
- STORE_SH(in2, ((ptr) + 2 * stride)); \
- STORE_SH(in3, ((ptr) + 3 * stride)); \
-}
-
-#define STORE_8VECS_SH(ptr, stride, \
- in0, in1, in2, in3, \
- in4, in5, in6, in7) { \
- STORE_SH(in0, ((ptr) + 0 * stride)); \
- STORE_SH(in1, ((ptr) + 1 * stride)); \
- STORE_SH(in2, ((ptr) + 2 * stride)); \
- STORE_SH(in3, ((ptr) + 3 * stride)); \
- STORE_SH(in4, ((ptr) + 4 * stride)); \
- STORE_SH(in5, ((ptr) + 5 * stride)); \
- STORE_SH(in6, ((ptr) + 6 * stride)); \
- STORE_SH(in7, ((ptr) + 7 * stride)); \
-}
-
-#define CLIP_UNSIGNED_CHAR_H(in) ({ \
+/* Description : Store as 8x2 byte block to destination memory from input vector
+ Arguments : Inputs - in, pdst, stride
+ Details : Index 0 double word element from input vector 'in' is copied
+ and stored to destination memory at (pdst)
+ Index 1 double word element from input vector 'in' is copied
+ and stored to destination memory at (pdst + stride)
+*/
+#define ST8x2_UB(in, pdst, stride) { \
+ uint64_t out0_m, out1_m; \
+ uint8_t *pblk_8x2_m = (uint8_t *)(pdst); \
+ \
+ out0_m = __msa_copy_u_d((v2i64)in, 0); \
+ out1_m = __msa_copy_u_d((v2i64)in, 1); \
+ \
+ SD(out0_m, pblk_8x2_m); \
+ SD(out1_m, pblk_8x2_m + stride); \
+}
+
+/* Description : Store as 8x4 byte block to destination memory from input
+ vectors
+ Arguments : Inputs - in0, in1, pdst, stride
+ Details : Index 0 double word element from input vector 'in0' is copied
+ and stored to destination memory at (pblk_8x4_m)
+ Index 1 double word element from input vector 'in0' is copied
+ and stored to destination memory at (pblk_8x4_m + stride)
+ Index 0 double word element from input vector 'in1' is copied
+ and stored to destination memory at (pblk_8x4_m + 2 * stride)
+ Index 1 double word element from input vector 'in1' is copied
+ and stored to destination memory at (pblk_8x4_m + 3 * stride)
+*/
+#define ST8x4_UB(in0, in1, pdst, stride) { \
+ uint64_t out0_m, out1_m, out2_m, out3_m; \
+ uint8_t *pblk_8x4_m = (uint8_t *)(pdst); \
+ \
+ out0_m = __msa_copy_u_d((v2i64)in0, 0); \
+ out1_m = __msa_copy_u_d((v2i64)in0, 1); \
+ out2_m = __msa_copy_u_d((v2i64)in1, 0); \
+ out3_m = __msa_copy_u_d((v2i64)in1, 1); \
+ \
+ SD4(out0_m, out1_m, out2_m, out3_m, pblk_8x4_m, stride); \
+}
+
+/* Description : average with rounding (in0 + in1 + 1) / 2.
+ Arguments : Inputs - in0, in1, in2, in3,
+ Outputs - out0, out1
+ Return Type - signed byte
+ Details : Each byte element from 'in0' vector is added with each byte
+ element from 'in1' vector. The addition of the elements plus 1
+ (for rounding) is done unsigned with full precision,
+ i.e. the result has one extra bit. Unsigned division by 2
+ (or logical shift right by one bit) is performed before writing
+ the result to vector 'out0'
+ Similar for the pair of 'in2' and 'in3'
+*/
+#define AVER_UB2(RTYPE, in0, in1, in2, in3, out0, out1) { \
+ out0 = (RTYPE)__msa_aver_u_b((v16u8)in0, (v16u8)in1); \
+ out1 = (RTYPE)__msa_aver_u_b((v16u8)in2, (v16u8)in3); \
+}
+#define AVER_UB2_UB(...) AVER_UB2(v16u8, __VA_ARGS__)
+
+#define AVER_UB4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3) { \
+ AVER_UB2(RTYPE, in0, in1, in2, in3, out0, out1) \
+ AVER_UB2(RTYPE, in4, in5, in6, in7, out2, out3) \
+}
+#define AVER_UB4_UB(...) AVER_UB4(v16u8, __VA_ARGS__)
+
+/* Description : Immediate number of columns to slide with zero
+ Arguments : Inputs - in0, in1, slide_val
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Byte elements from 'zero_m' vector are slide into 'in0' by
+ number of elements specified by 'slide_val'
+*/
+#define SLDI_B2_0(RTYPE, in0, in1, out0, out1, slide_val) { \
+ v16i8 zero_m = { 0 }; \
+ out0 = (RTYPE)__msa_sldi_b((v16i8)zero_m, (v16i8)in0, slide_val); \
+ out1 = (RTYPE)__msa_sldi_b((v16i8)zero_m, (v16i8)in1, slide_val); \
+}
+#define SLDI_B2_0_SW(...) SLDI_B2_0(v4i32, __VA_ARGS__)
+
+#define SLDI_B4_0(RTYPE, in0, in1, in2, in3, \
+ out0, out1, out2, out3, slide_val) { \
+ SLDI_B2_0(RTYPE, in0, in1, out0, out1, slide_val); \
+ SLDI_B2_0(RTYPE, in2, in3, out2, out3, slide_val); \
+}
+#define SLDI_B4_0_UB(...) SLDI_B4_0(v16u8, __VA_ARGS__)
+
+/* Description : Immediate number of columns to slide
+ Arguments : Inputs - in0_0, in0_1, in1_0, in1_1, slide_val
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Byte elements from 'in0_0' vector are slide into 'in1_0' by
+ number of elements specified by 'slide_val'
+*/
+#define SLDI_B2(RTYPE, in0_0, in0_1, in1_0, in1_1, out0, out1, slide_val) { \
+ out0 = (RTYPE)__msa_sldi_b((v16i8)in0_0, (v16i8)in1_0, slide_val); \
+ out1 = (RTYPE)__msa_sldi_b((v16i8)in0_1, (v16i8)in1_1, slide_val); \
+}
+#define SLDI_B2_SH(...) SLDI_B2(v8i16, __VA_ARGS__)
+
+#define SLDI_B3(RTYPE, in0_0, in0_1, in0_2, in1_0, in1_1, in1_2, \
+ out0, out1, out2, slide_val) { \
+ SLDI_B2(RTYPE, in0_0, in0_1, in1_0, in1_1, out0, out1, slide_val) \
+ out2 = (RTYPE)__msa_sldi_b((v16i8)in0_2, (v16i8)in1_2, slide_val); \
+}
+#define SLDI_B3_SB(...) SLDI_B3(v16i8, __VA_ARGS__)
+#define SLDI_B3_UH(...) SLDI_B3(v8u16, __VA_ARGS__)
+
+/* Description : Shuffle byte vector elements as per mask vector
+ Arguments : Inputs - in0, in1, in2, in3, mask0, mask1
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Selective byte elements from in0 & in1 are copied to out0 as
+ per control vector mask0
+ Selective byte elements from in2 & in3 are copied to out1 as
+ per control vector mask1
+*/
+#define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) { \
+ out0 = (RTYPE)__msa_vshf_b((v16i8)mask0, (v16i8)in1, (v16i8)in0); \
+ out1 = (RTYPE)__msa_vshf_b((v16i8)mask1, (v16i8)in3, (v16i8)in2); \
+}
+#define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__)
+#define VSHF_B2_SB(...) VSHF_B2(v16i8, __VA_ARGS__)
+#define VSHF_B2_UH(...) VSHF_B2(v8u16, __VA_ARGS__)
+
+#define VSHF_B4(RTYPE, in0, in1, mask0, mask1, mask2, mask3, \
+ out0, out1, out2, out3) { \
+ VSHF_B2(RTYPE, in0, in1, in0, in1, mask0, mask1, out0, out1); \
+ VSHF_B2(RTYPE, in0, in1, in0, in1, mask2, mask3, out2, out3); \
+}
+#define VSHF_B4_SB(...) VSHF_B4(v16i8, __VA_ARGS__)
+#define VSHF_B4_SH(...) VSHF_B4(v8i16, __VA_ARGS__)
+
+/* Description : Dot product of byte vector elements
+ Arguments : Inputs - mult0, mult1
+ cnst0, cnst1
+ Outputs - out0, out1
+ Return Type - unsigned halfword
+ Details : Unsigned byte elements from mult0 are multiplied with
+ unsigned byte elements from cnst0 producing a result
+ twice the size of input i.e. unsigned halfword.
+ Then this multiplication results of adjacent odd-even elements
+ are added together and stored to the out vector
+ (2 unsigned halfword results)
+*/
+#define DOTP_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) { \
+ out0 = (RTYPE)__msa_dotp_u_h((v16u8)mult0, (v16u8)cnst0); \
+ out1 = (RTYPE)__msa_dotp_u_h((v16u8)mult1, (v16u8)cnst1); \
+}
+#define DOTP_UB2_UH(...) DOTP_UB2(v8u16, __VA_ARGS__)
+
+#define DOTP_UB4(RTYPE, mult0, mult1, mult2, mult3, \
+ cnst0, cnst1, cnst2, cnst3, \
+ out0, out1, out2, out3) { \
+ DOTP_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \
+ DOTP_UB2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \
+}
+#define DOTP_UB4_UH(...) DOTP_UB4(v8u16, __VA_ARGS__)
+
+/* Description : Dot product of byte vector elements
+ Arguments : Inputs - mult0, mult1
+ cnst0, cnst1
+ Outputs - out0, out1
+ Return Type - signed halfword
+ Details : Signed byte elements from mult0 are multiplied with
+ signed byte elements from cnst0 producing a result
+ twice the size of input i.e. signed halfword.
+ Then this multiplication results of adjacent odd-even elements
+ are added together and stored to the out vector
+ (2 signed halfword results)
+*/
+#define DOTP_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) { \
+ out0 = (RTYPE)__msa_dotp_s_h((v16i8)mult0, (v16i8)cnst0); \
+ out1 = (RTYPE)__msa_dotp_s_h((v16i8)mult1, (v16i8)cnst1); \
+}
+#define DOTP_SB2_SH(...) DOTP_SB2(v8i16, __VA_ARGS__)
+
+#define DOTP_SB4(RTYPE, mult0, mult1, mult2, mult3, \
+ cnst0, cnst1, cnst2, cnst3, out0, out1, out2, out3) { \
+ DOTP_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \
+ DOTP_SB2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \
+}
+#define DOTP_SB4_SH(...) DOTP_SB4(v8i16, __VA_ARGS__)
+
+/* Description : Dot product of halfword vector elements
+ Arguments : Inputs - mult0, mult1
+ cnst0, cnst1
+ Outputs - out0, out1
+ Return Type - signed word
+ Details : Signed halfword elements from mult0 are multiplied with
+ signed halfword elements from cnst0 producing a result
+ twice the size of input i.e. signed word.
+ Then this multiplication results of adjacent odd-even elements
+ are added together and stored to the out vector
+ (2 signed word results)
+*/
+#define DOTP_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) { \
+ out0 = (RTYPE)__msa_dotp_s_w((v8i16)mult0, (v8i16)cnst0); \
+ out1 = (RTYPE)__msa_dotp_s_w((v8i16)mult1, (v8i16)cnst1); \
+}
+#define DOTP_SH2_SW(...) DOTP_SH2(v4i32, __VA_ARGS__)
+
+#define DOTP_SH4(RTYPE, mult0, mult1, mult2, mult3, \
+ cnst0, cnst1, cnst2, cnst3, \
+ out0, out1, out2, out3) { \
+ DOTP_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \
+ DOTP_SH2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \
+}
+#define DOTP_SH4_SW(...) DOTP_SH4(v4i32, __VA_ARGS__)
+
+/* Description : Dot product of word vector elements
+ Arguments : Inputs - mult0, mult1
+ cnst0, cnst1
+ Outputs - out0, out1
+ Return Type - signed word
+ Details : Signed word elements from mult0 are multiplied with
+ signed word elements from cnst0 producing a result
+ twice the size of input i.e. signed double word.
+ Then this multiplication results of adjacent odd-even elements
+ are added together and stored to the out vector
+ (2 signed double word results)
+*/
+#define DOTP_SW2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) { \
+ out0 = (RTYPE)__msa_dotp_s_d((v4i32)mult0, (v4i32)cnst0); \
+ out1 = (RTYPE)__msa_dotp_s_d((v4i32)mult1, (v4i32)cnst1); \
+}
+#define DOTP_SW2_SD(...) DOTP_SW2(v2i64, __VA_ARGS__)
+
+/* Description : Dot product & addition of byte vector elements
+ Arguments : Inputs - mult0, mult1
+ cnst0, cnst1
+ Outputs - out0, out1
+ Return Type - signed halfword
+ Details : Signed byte elements from mult0 are multiplied with
+ signed byte elements from cnst0 producing a result
+ twice the size of input i.e. signed halfword.
+ Then this multiplication results of adjacent odd-even elements
+ are added to the out vector
+ (2 signed halfword results)
+*/
+#define DPADD_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) { \
+ out0 = (RTYPE)__msa_dpadd_s_h((v8i16)out0, (v16i8)mult0, (v16i8)cnst0); \
+ out1 = (RTYPE)__msa_dpadd_s_h((v8i16)out1, (v16i8)mult1, (v16i8)cnst1); \
+}
+#define DPADD_SB2_SH(...) DPADD_SB2(v8i16, __VA_ARGS__)
+
+#define DPADD_SB4(RTYPE, mult0, mult1, mult2, mult3, \
+ cnst0, cnst1, cnst2, cnst3, out0, out1, out2, out3) { \
+ DPADD_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \
+ DPADD_SB2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \
+}
+#define DPADD_SB4_SH(...) DPADD_SB4(v8i16, __VA_ARGS__)
+
+/* Description : Dot product & addition of halfword vector elements
+ Arguments : Inputs - mult0, mult1
+ cnst0, cnst1
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Signed halfword elements from 'mult0' are multiplied with
+ signed halfword elements from 'cnst0' producing a result
+ twice the size of input i.e. signed word.
+ The multiplication result of adjacent odd-even elements
+ are added to the 'out0' vector
+*/
+#define DPADD_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) { \
+ out0 = (RTYPE)__msa_dpadd_s_w((v4i32)out0, (v8i16)mult0, (v8i16)cnst0); \
+ out1 = (RTYPE)__msa_dpadd_s_w((v4i32)out1, (v8i16)mult1, (v8i16)cnst1); \
+}
+#define DPADD_SH2_SW(...) DPADD_SH2(v4i32, __VA_ARGS__)
+
+/* Description : Dot product & addition of double word vector elements
+ Arguments : Inputs - mult0, mult1
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Each signed word element from 'mult0' is multiplied with itself
+ producing an intermediate result twice the size of input
+ i.e. signed double word
+ The multiplication result of adjacent odd-even elements
+ are added to the 'out0' vector
+*/
+#define DPADD_SD2(RTYPE, mult0, mult1, out0, out1) { \
+ out0 = (RTYPE)__msa_dpadd_s_d((v2i64)out0, (v4i32)mult0, (v4i32)mult0); \
+ out1 = (RTYPE)__msa_dpadd_s_d((v2i64)out1, (v4i32)mult1, (v4i32)mult1); \
+}
+#define DPADD_SD2_SD(...) DPADD_SD2(v2i64, __VA_ARGS__)
+
+/* Description : Minimum values between unsigned elements of
+ either vector are copied to the output vector
+ Arguments : Inputs - in0, in1, min_vec
+ Outputs - in0, in1, (in place)
+ Return Type - unsigned halfword
+ Details : Minimum of unsigned halfword element values from 'in0' and
+ 'min_value' are written to output vector 'in0'
+*/
+#define MIN_UH2(RTYPE, in0, in1, min_vec) { \
+ in0 = (RTYPE)__msa_min_u_h((v8u16)in0, min_vec); \
+ in1 = (RTYPE)__msa_min_u_h((v8u16)in1, min_vec); \
+}
+#define MIN_UH2_UH(...) MIN_UH2(v8u16, __VA_ARGS__)
+
+#define MIN_UH4(RTYPE, in0, in1, in2, in3, min_vec) { \
+ MIN_UH2(RTYPE, in0, in1, min_vec); \
+ MIN_UH2(RTYPE, in2, in3, min_vec); \
+}
+#define MIN_UH4_UH(...) MIN_UH4(v8u16, __VA_ARGS__)
+
+/* Description : Clips all signed halfword elements of input vector
+ between 0 & 255
+ Arguments : Inputs - in (input vector)
+ Outputs - out_m (output vector with clipped elements)
+ Return Type - signed halfword
+*/
+#define CLIP_SH_0_255(in) ({ \
v8i16 max_m = __msa_ldi_h(255); \
v8i16 out_m; \
\
- out_m = __msa_maxi_s_h((v8i16)(in), 0); \
+ out_m = __msa_maxi_s_h((v8i16)in, 0); \
out_m = __msa_min_s_h((v8i16)max_m, (v8i16)out_m); \
out_m; \
})
+#define CLIP_SH2_0_255(in0, in1) { \
+ in0 = CLIP_SH_0_255(in0); \
+ in1 = CLIP_SH_0_255(in1); \
+}
+#define CLIP_SH4_0_255(in0, in1, in2, in3) { \
+ CLIP_SH2_0_255(in0, in1); \
+ CLIP_SH2_0_255(in2, in3); \
+}
-/* halfword 8x8 transpose macro */
-#define TRANSPOSE8x8_H_SH(in0, in1, in2, in3, \
- in4, in5, in6, in7, \
- out0, out1, out2, out3, \
- out4, out5, out6, out7) { \
- v8i16 s0_m, s1_m; \
- v8i16 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
- v8i16 tmp4_m, tmp5_m, tmp6_m, tmp7_m; \
- \
- s0_m = __msa_ilvr_h((v8i16)(in6), (v8i16)(in4)); \
- s1_m = __msa_ilvr_h((v8i16)(in7), (v8i16)(in5)); \
- tmp0_m = __msa_ilvr_h((v8i16)s1_m, (v8i16)s0_m); \
- tmp1_m = __msa_ilvl_h((v8i16)s1_m, (v8i16)s0_m); \
- \
- s0_m = __msa_ilvl_h((v8i16)(in6), (v8i16)(in4)); \
- s1_m = __msa_ilvl_h((v8i16)(in7), (v8i16)(in5)); \
- tmp2_m = __msa_ilvr_h((v8i16)s1_m, (v8i16)s0_m); \
- tmp3_m = __msa_ilvl_h((v8i16)s1_m, (v8i16)s0_m); \
- \
- s0_m = __msa_ilvr_h((v8i16)(in2), (v8i16)(in0)); \
- s1_m = __msa_ilvr_h((v8i16)(in3), (v8i16)(in1)); \
- tmp4_m = __msa_ilvr_h((v8i16)s1_m, (v8i16)s0_m); \
- tmp5_m = __msa_ilvl_h((v8i16)s1_m, (v8i16)s0_m); \
- \
- s0_m = __msa_ilvl_h((v8i16)(in2), (v8i16)(in0)); \
- s1_m = __msa_ilvl_h((v8i16)(in3), (v8i16)(in1)); \
- tmp6_m = __msa_ilvr_h((v8i16)s1_m, (v8i16)s0_m); \
- tmp7_m = __msa_ilvl_h((v8i16)s1_m, (v8i16)s0_m); \
- \
- out0 = (v8i16)__msa_pckev_d((v2i64)tmp0_m, (v2i64)tmp4_m); \
- out1 = (v8i16)__msa_pckod_d((v2i64)tmp0_m, (v2i64)tmp4_m); \
- out2 = (v8i16)__msa_pckev_d((v2i64)tmp1_m, (v2i64)tmp5_m); \
- out3 = (v8i16)__msa_pckod_d((v2i64)tmp1_m, (v2i64)tmp5_m); \
- out4 = (v8i16)__msa_pckev_d((v2i64)tmp2_m, (v2i64)tmp6_m); \
- out5 = (v8i16)__msa_pckod_d((v2i64)tmp2_m, (v2i64)tmp6_m); \
- out6 = (v8i16)__msa_pckev_d((v2i64)tmp3_m, (v2i64)tmp7_m); \
- out7 = (v8i16)__msa_pckod_d((v2i64)tmp3_m, (v2i64)tmp7_m); \
-}
-
-/* interleave macros */
-/* no in-place support */
-#define ILV_B_LRLR_UB(in0, in1, in2, in3, \
- out0, out1, out2, out3) { \
- out0 = (v16u8)__msa_ilvl_b((v16i8)(in1), (v16i8)(in0)); \
- out1 = (v16u8)__msa_ilvr_b((v16i8)(in1), (v16i8)(in0)); \
- out2 = (v16u8)__msa_ilvl_b((v16i8)(in3), (v16i8)(in2)); \
- out3 = (v16u8)__msa_ilvr_b((v16i8)(in3), (v16i8)(in2)); \
-}
-
-#define ILV_H_LRLR_SH(in0, in1, in2, in3, \
- out0, out1, out2, out3) { \
- out0 = __msa_ilvl_h((v8i16)(in1), (v8i16)(in0)); \
- out1 = __msa_ilvr_h((v8i16)(in1), (v8i16)(in0)); \
- out2 = __msa_ilvl_h((v8i16)(in3), (v8i16)(in2)); \
- out3 = __msa_ilvr_h((v8i16)(in3), (v8i16)(in2)); \
-}
-
-#define ILV_H_LR_SH(in0, in1, out0, out1) { \
- out0 = __msa_ilvl_h((v8i16)(in1), (v8i16)(in0)); \
- out1 = __msa_ilvr_h((v8i16)(in1), (v8i16)(in0)); \
-}
-
-#define ILVR_B_2VECS_UB(in0_r, in1_r, in0_l, in1_l, \
- out0, out1) { \
- out0 = (v16u8)__msa_ilvr_b((v16i8)(in0_l), (v16i8)(in0_r)); \
- out1 = (v16u8)__msa_ilvr_b((v16i8)(in1_l), (v16i8)(in1_r)); \
-}
-
-#define ILVR_B_2VECS_SB(in0_r, in1_r, in0_l, in1_l, \
- out0, out1) { \
- out0 = __msa_ilvr_b((v16i8)(in0_l), (v16i8)(in0_r)); \
- out1 = __msa_ilvr_b((v16i8)(in1_l), (v16i8)(in1_r)); \
-}
-
-#define ILVR_B_4VECS_UB(in0_r, in1_r, in2_r, in3_r, \
- in0_l, in1_l, in2_l, in3_l, \
- out0, out1, out2, out3) { \
- ILVR_B_2VECS_UB(in0_r, in1_r, in0_l, in1_l, \
- out0, out1); \
- ILVR_B_2VECS_UB(in2_r, in3_r, in2_l, in3_l, \
- out2, out3); \
-}
-
-#define ILVR_B_4VECS_SB(in0_r, in1_r, in2_r, in3_r, \
- in0_l, in1_l, in2_l, in3_l, \
- out0, out1, out2, out3) { \
- ILVR_B_2VECS_SB(in0_r, in1_r, in0_l, in1_l, \
- out0, out1); \
- ILVR_B_2VECS_SB(in2_r, in3_r, in2_l, in3_l, \
- out2, out3); \
-}
-
-#define ILVR_B_6VECS_SB(in0_r, in1_r, in2_r, \
- in3_r, in4_r, in5_r, \
- in0_l, in1_l, in2_l, \
- in3_l, in4_l, in5_l, \
- out0, out1, out2, \
- out3, out4, out5) { \
- ILVR_B_2VECS_SB(in0_r, in1_r, in0_l, in1_l, \
- out0, out1); \
- ILVR_B_2VECS_SB(in2_r, in3_r, in2_l, in3_l, \
- out2, out3); \
- ILVR_B_2VECS_SB(in4_r, in5_r, in4_l, in5_l, \
- out4, out5); \
-}
-
-#define ILVR_B_8VECS_SB(in0_r, in1_r, in2_r, in3_r, \
- in4_r, in5_r, in6_r, in7_r, \
- in0_l, in1_l, in2_l, in3_l, \
- in4_l, in5_l, in6_l, in7_l, \
- out0, out1, out2, out3, \
- out4, out5, out6, out7) { \
- ILVR_B_2VECS_SB(in0_r, in1_r, in0_l, in1_l, \
- out0, out1); \
- ILVR_B_2VECS_SB(in2_r, in3_r, in2_l, in3_l, \
- out2, out3); \
- ILVR_B_2VECS_SB(in4_r, in5_r, in4_l, in5_l, \
- out4, out5); \
- ILVR_B_2VECS_SB(in6_r, in7_r, in6_l, in7_l, \
- out6, out7); \
-}
-
-#define ILVL_B_2VECS_SB(in0_r, in1_r, in0_l, in1_l, \
- out0, out1) { \
- out0 = __msa_ilvl_b((v16i8)(in0_l), (v16i8)(in0_r)); \
- out1 = __msa_ilvl_b((v16i8)(in1_l), (v16i8)(in1_r)); \
-}
-
-#define ILVL_B_4VECS_SB(in0_r, in1_r, in2_r, in3_r, \
- in0_l, in1_l, in2_l, in3_l, \
- out0, out1, out2, out3) { \
- ILVL_B_2VECS_SB(in0_r, in1_r, in0_l, in1_l, \
- out0, out1); \
- ILVL_B_2VECS_SB(in2_r, in3_r, in2_l, in3_l, \
- out2, out3); \
-}
-
-#define ILVL_B_6VECS_SB(in0_r, in1_r, in2_r, \
- in3_r, in4_r, in5_r, \
- in0_l, in1_l, in2_l, \
- in3_l, in4_l, in5_l, \
- out0, out1, out2, \
- out3, out4, out5) { \
- ILVL_B_2VECS_SB(in0_r, in1_r, in0_l, in1_l, \
- out0, out1); \
- ILVL_B_2VECS_SB(in2_r, in3_r, in2_l, in3_l, \
- out2, out3); \
- ILVL_B_2VECS_SB(in4_r, in5_r, in4_l, in5_l, \
- out4, out5); \
-}
-
-#define ILVR_D_2VECS_SB(out0, in0_l, in0_r, \
- out1, in1_l, in1_r) { \
- out0 = (v16i8)__msa_ilvr_d((v2i64)(in0_l), (v2i64)(in0_r)); \
- out1 = (v16i8)__msa_ilvr_d((v2i64)(in1_l), (v2i64)(in1_r)); \
-}
-
-#define ILVR_D_3VECS_SB(out0, in0_l, in0_r, \
- out1, in1_l, in1_r, \
- out2, in2_l, in2_r) { \
- ILVR_D_2VECS_SB(out0, in0_l, in0_r, \
- out1, in1_l, in1_r); \
- out2 = (v16i8)__msa_ilvr_d((v2i64)(in2_l), (v2i64)(in2_r)); \
-}
-
-#define ILVR_D_4VECS_SB(out0, in0_l, in0_r, \
- out1, in1_l, in1_r, \
- out2, in2_l, in2_r, \
- out3, in3_l, in3_r) { \
- ILVR_D_2VECS_SB(out0, in0_l, in0_r, \
- out1, in1_l, in1_r); \
- ILVR_D_2VECS_SB(out2, in2_l, in2_r, \
- out3, in3_l, in3_r); \
-}
-
-#define DOTP_S_W_4VECS_SW(m0, c0, m1, c1, \
- m2, c2, m3, c3, \
- out0, out1, out2, out3) { \
- out0 = __msa_dotp_s_w((v8i16)(m0), (v8i16)(c0)); \
- out1 = __msa_dotp_s_w((v8i16)(m1), (v8i16)(c1)); \
- out2 = __msa_dotp_s_w((v8i16)(m2), (v8i16)(c2)); \
- out3 = __msa_dotp_s_w((v8i16)(m3), (v8i16)(c3)); \
-}
-
-#define PCKEV_H_2VECS_SH(in0_l, in0_r, in1_l, in1_r, \
- out0, out1) { \
- out0 = __msa_pckev_h((v8i16)(in0_l), (v8i16)(in0_r)); \
- out1 = __msa_pckev_h((v8i16)(in1_l), (v8i16)(in1_r)); \
-}
-
-#define XORI_B_2VECS_UB(val0, val1, \
- out0, out1, xor_val) { \
- out0 = __msa_xori_b((v16u8)(val0), (xor_val)); \
- out1 = __msa_xori_b((v16u8)(val1), (xor_val)); \
-}
-
-#define XORI_B_2VECS_SB(val0, val1, \
- out0, out1, xor_val) { \
- out0 = (v16i8)__msa_xori_b((v16u8)(val0), (xor_val)); \
- out1 = (v16i8)__msa_xori_b((v16u8)(val1), (xor_val)); \
-}
-
-#define XORI_B_3VECS_SB(val0, val1, val2, \
- out0, out1, out2, xor_val) { \
- XORI_B_2VECS_SB(val0, val1, out0, out1, xor_val); \
- out2 = (v16i8)__msa_xori_b((v16u8)(val2), (xor_val)); \
-}
-
-#define XORI_B_4VECS_UB(val0, val1, val2, val3, \
- out0, out1, out2, out3, \
- xor_val) { \
- XORI_B_2VECS_UB(val0, val1, out0, out1, xor_val); \
- XORI_B_2VECS_UB(val2, val3, out2, out3, xor_val); \
-}
-
-#define XORI_B_4VECS_SB(val0, val1, val2, val3, \
- out0, out1, out2, out3, \
- xor_val) { \
- XORI_B_2VECS_SB(val0, val1, out0, out1, xor_val); \
- XORI_B_2VECS_SB(val2, val3, out2, out3, xor_val); \
+/* Description : Addition of 4 signed word elements
+ 4 signed word elements of input vector are added together and
+ the resulting integer sum is returned
+ Arguments : Inputs - in (signed word vector)
+ Outputs - sum_m (i32 sum)
+ Return Type - signed word
+*/
+#define HADD_SW_S32(in) ({ \
+ v2i64 res0_m, res1_m; \
+ int32_t sum_m; \
+ \
+ res0_m = __msa_hadd_s_d((v4i32)in, (v4i32)in); \
+ res1_m = __msa_splati_d(res0_m, 1); \
+ res0_m = res0_m + res1_m; \
+ sum_m = __msa_copy_s_w((v4i32)res0_m, 0); \
+ sum_m; \
+})
+
+/* Description : Horizontal addition of unsigned byte vector elements
+ Arguments : Inputs - in0, in1
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Each unsigned odd byte element from 'in0' is added to
+ even unsigned byte element from 'in0' (pairwise) and the
+ halfword result is stored in 'out0'
+*/
+#define HADD_UB2(RTYPE, in0, in1, out0, out1) { \
+ out0 = (RTYPE)__msa_hadd_u_h((v16u8)in0, (v16u8)in0); \
+ out1 = (RTYPE)__msa_hadd_u_h((v16u8)in1, (v16u8)in1); \
}
+#define HADD_UB2_UH(...) HADD_UB2(v8u16, __VA_ARGS__)
-#define XORI_B_7VECS_SB(val0, val1, val2, val3, \
- val4, val5, val6, \
- out0, out1, out2, out3, \
- out4, out5, out6, \
- xor_val) { \
- XORI_B_4VECS_SB(val0, val1, val2, val3, \
- out0, out1, out2, out3, xor_val); \
- XORI_B_3VECS_SB(val4, val5, val6, \
- out4, out5, out6, xor_val); \
+#define HADD_UB4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) { \
+ HADD_UB2(RTYPE, in0, in1, out0, out1); \
+ HADD_UB2(RTYPE, in2, in3, out2, out3); \
+}
+#define HADD_UB4_UH(...) HADD_UB4(v8u16, __VA_ARGS__)
+
+/* Description : Horizontal subtraction of unsigned byte vector elements
+ Arguments : Inputs - in0, in1
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Each unsigned odd byte element from 'in0' is subtracted from
+ even unsigned byte element from 'in0' (pairwise) and the
+ halfword result is written to 'out0'
+*/
+#define HSUB_UB2(RTYPE, in0, in1, out0, out1) { \
+ out0 = (RTYPE)__msa_hsub_u_h((v16u8)in0, (v16u8)in0); \
+ out1 = (RTYPE)__msa_hsub_u_h((v16u8)in1, (v16u8)in1); \
+}
+#define HSUB_UB2_SH(...) HSUB_UB2(v8i16, __VA_ARGS__)
+
+/* Description : Horizontal subtraction of signed halfword vector elements
+ Arguments : Inputs - in0, in1
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Each signed odd halfword element from 'in0' is subtracted from
+ even signed halfword element from 'in0' (pairwise) and the
+ word result is written to 'out0'
+*/
+#define HSUB_UH2(RTYPE, in0, in1, out0, out1) { \
+ out0 = (RTYPE)__msa_hsub_s_w((v8i16)in0, (v8i16)in0); \
+ out1 = (RTYPE)__msa_hsub_s_w((v8i16)in1, (v8i16)in1); \
+}
+#define HSUB_UH2_SW(...) HSUB_UH2(v4i32, __VA_ARGS__)
+
+/* Description : Insert specified word elements from input vectors to 1
+ destination vector
+ Arguments : Inputs - in0, in1, in2, in3 (4 input vectors)
+ Outputs - out (output vector)
+ Return Type - as per RTYPE
+*/
+#define INSERT_W2(RTYPE, in0, in1, out) { \
+ out = (RTYPE)__msa_insert_w((v4i32)out, 0, in0); \
+ out = (RTYPE)__msa_insert_w((v4i32)out, 1, in1); \
}
+#define INSERT_W2_SB(...) INSERT_W2(v16i8, __VA_ARGS__)
-#define SRARI_H_4VECS_UH(val0, val1, val2, val3, \
- out0, out1, out2, out3, \
- shift_right_val) { \
- out0 = (v8u16)__msa_srari_h((v8i16)(val0), (shift_right_val)); \
- out1 = (v8u16)__msa_srari_h((v8i16)(val1), (shift_right_val)); \
- out2 = (v8u16)__msa_srari_h((v8i16)(val2), (shift_right_val)); \
- out3 = (v8u16)__msa_srari_h((v8i16)(val3), (shift_right_val)); \
+#define INSERT_W4(RTYPE, in0, in1, in2, in3, out) { \
+ out = (RTYPE)__msa_insert_w((v4i32)out, 0, in0); \
+ out = (RTYPE)__msa_insert_w((v4i32)out, 1, in1); \
+ out = (RTYPE)__msa_insert_w((v4i32)out, 2, in2); \
+ out = (RTYPE)__msa_insert_w((v4i32)out, 3, in3); \
+}
+#define INSERT_W4_UB(...) INSERT_W4(v16u8, __VA_ARGS__)
+#define INSERT_W4_SB(...) INSERT_W4(v16i8, __VA_ARGS__)
+
+/* Description : Insert specified double word elements from input vectors to 1
+ destination vector
+ Arguments : Inputs - in0, in1 (2 input vectors)
+ Outputs - out (output vector)
+ Return Type - as per RTYPE
+*/
+#define INSERT_D2(RTYPE, in0, in1, out) { \
+ out = (RTYPE)__msa_insert_d((v2i64)out, 0, in0); \
+ out = (RTYPE)__msa_insert_d((v2i64)out, 1, in1); \
+}
+#define INSERT_D2_UB(...) INSERT_D2(v16u8, __VA_ARGS__)
+#define INSERT_D2_SB(...) INSERT_D2(v16i8, __VA_ARGS__)
+
+/* Description : Interleave even byte elements from vectors
+ Arguments : Inputs - in0, in1, in2, in3
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Even byte elements of 'in0' and even byte
+ elements of 'in1' are interleaved and copied to 'out0'
+ Even byte elements of 'in2' and even byte
+ elements of 'in3' are interleaved and copied to 'out1'
+*/
+#define ILVEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) { \
+ out0 = (RTYPE)__msa_ilvev_b((v16i8)in1, (v16i8)in0); \
+ out1 = (RTYPE)__msa_ilvev_b((v16i8)in3, (v16i8)in2); \
+}
+#define ILVEV_B2_UB(...) ILVEV_B2(v16u8, __VA_ARGS__)
+#define ILVEV_B2_SH(...) ILVEV_B2(v8i16, __VA_ARGS__)
+
+/* Description : Interleave even halfword elements from vectors
+ Arguments : Inputs - in0, in1, in2, in3
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Even halfword elements of 'in0' and even halfword
+ elements of 'in1' are interleaved and copied to 'out0'
+ Even halfword elements of 'in2' and even halfword
+ elements of 'in3' are interleaved and copied to 'out1'
+*/
+#define ILVEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) { \
+ out0 = (RTYPE)__msa_ilvev_h((v8i16)in1, (v8i16)in0); \
+ out1 = (RTYPE)__msa_ilvev_h((v8i16)in3, (v8i16)in2); \
}
+#define ILVEV_H2_UB(...) ILVEV_H2(v16u8, __VA_ARGS__)
+#define ILVEV_H2_SH(...) ILVEV_H2(v8i16, __VA_ARGS__)
+#define ILVEV_H2_SW(...) ILVEV_H2(v4i32, __VA_ARGS__)
+
+/* Description : Interleave even word elements from vectors
+ Arguments : Inputs - in0, in1, in2, in3
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Even word elements of 'in0' and 'in1' are interleaved
+ and written to 'out0'
+*/
+#define ILVEV_W2(RTYPE, in0, in1, in2, in3, out0, out1) { \
+ out0 = (RTYPE)__msa_ilvev_w((v4i32)in1, (v4i32)in0); \
+ out1 = (RTYPE)__msa_ilvev_w((v4i32)in3, (v4i32)in2); \
+}
+#define ILVEV_W2_SB(...) ILVEV_W2(v16i8, __VA_ARGS__)
+
+/* Description : Interleave even double word elements from vectors
+ Arguments : Inputs - in0, in1, in2, in3
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Even double word elements of 'in0' and even double word
+ elements of 'in1' are interleaved and copied to 'out0'
+ Even double word elements of 'in2' and even double word
+ elements of 'in3' are interleaved and copied to 'out1'
+*/
+#define ILVEV_D2(RTYPE, in0, in1, in2, in3, out0, out1) { \
+ out0 = (RTYPE)__msa_ilvev_d((v2i64)in1, (v2i64)in0); \
+ out1 = (RTYPE)__msa_ilvev_d((v2i64)in3, (v2i64)in2); \
+}
+#define ILVEV_D2_UB(...) ILVEV_D2(v16u8, __VA_ARGS__)
+
+/* Description : Interleave left half of byte elements from vectors
+ Arguments : Inputs - in0, in1, in2, in3
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Left half of byte elements of in0 and left half of byte
+ elements of in1 are interleaved and copied to out0.
+ Left half of byte elements of in2 and left half of byte
+ elements of in3 are interleaved and copied to out1.
+*/
+#define ILVL_B2(RTYPE, in0, in1, in2, in3, out0, out1) { \
+ out0 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \
+ out1 = (RTYPE)__msa_ilvl_b((v16i8)in2, (v16i8)in3); \
+}
+#define ILVL_B2_UB(...) ILVL_B2(v16u8, __VA_ARGS__)
+#define ILVL_B2_SB(...) ILVL_B2(v16i8, __VA_ARGS__)
+#define ILVL_B2_UH(...) ILVL_B2(v8u16, __VA_ARGS__)
+#define ILVL_B2_SH(...) ILVL_B2(v8i16, __VA_ARGS__)
+
+#define ILVL_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3) { \
+ ILVL_B2(RTYPE, in0, in1, in2, in3, out0, out1); \
+ ILVL_B2(RTYPE, in4, in5, in6, in7, out2, out3); \
+}
+#define ILVL_B4_SB(...) ILVL_B4(v16i8, __VA_ARGS__)
+#define ILVL_B4_UH(...) ILVL_B4(v8u16, __VA_ARGS__)
+
+/* Description : Interleave left half of halfword elements from vectors
+ Arguments : Inputs - in0, in1, in2, in3
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Left half of halfword elements of in0 and left half of halfword
+ elements of in1 are interleaved and copied to out0.
+ Left half of halfword elements of in2 and left half of halfword
+ elements of in3 are interleaved and copied to out1.
+*/
+#define ILVL_H2(RTYPE, in0, in1, in2, in3, out0, out1) { \
+ out0 = (RTYPE)__msa_ilvl_h((v8i16)in0, (v8i16)in1); \
+ out1 = (RTYPE)__msa_ilvl_h((v8i16)in2, (v8i16)in3); \
+}
+#define ILVL_H2_SH(...) ILVL_H2(v8i16, __VA_ARGS__)
+
+/* Description : Interleave left half of word elements from vectors
+ Arguments : Inputs - in0, in1, in2, in3
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Left half of word elements of in0 and left half of word
+ elements of in1 are interleaved and copied to out0.
+ Left half of word elements of in2 and left half of word
+ elements of in3 are interleaved and copied to out1.
+*/
+#define ILVL_W2(RTYPE, in0, in1, in2, in3, out0, out1) { \
+ out0 = (RTYPE)__msa_ilvl_w((v4i32)in0, (v4i32)in1); \
+ out1 = (RTYPE)__msa_ilvl_w((v4i32)in2, (v4i32)in3); \
+}
+#define ILVL_W2_UB(...) ILVL_W2(v16u8, __VA_ARGS__)
+#define ILVL_W2_SH(...) ILVL_W2(v8i16, __VA_ARGS__)
+
+/* Description : Interleave right half of byte elements from vectors
+ Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7
+ Outputs - out0, out1, out2, out3
+ Return Type - as per RTYPE
+ Details : Right half of byte elements of in0 and right half of byte
+ elements of in1 are interleaved and copied to out0.
+ Right half of byte elements of in2 and right half of byte
+ elements of in3 are interleaved and copied to out1.
+ Similar for other pairs
+*/
+#define ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1) { \
+ out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \
+ out1 = (RTYPE)__msa_ilvr_b((v16i8)in2, (v16i8)in3); \
+}
+#define ILVR_B2_UB(...) ILVR_B2(v16u8, __VA_ARGS__)
+#define ILVR_B2_SB(...) ILVR_B2(v16i8, __VA_ARGS__)
+#define ILVR_B2_UH(...) ILVR_B2(v8u16, __VA_ARGS__)
+#define ILVR_B2_SH(...) ILVR_B2(v8i16, __VA_ARGS__)
+
+#define ILVR_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3) { \
+ ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1); \
+ ILVR_B2(RTYPE, in4, in5, in6, in7, out2, out3); \
+}
+#define ILVR_B4_UB(...) ILVR_B4(v16u8, __VA_ARGS__)
+#define ILVR_B4_SB(...) ILVR_B4(v16i8, __VA_ARGS__)
+#define ILVR_B4_UH(...) ILVR_B4(v8u16, __VA_ARGS__)
+#define ILVR_B4_SH(...) ILVR_B4(v8i16, __VA_ARGS__)
+
+#define ILVR_B8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ in8, in9, in10, in11, in12, in13, in14, in15, \
+ out0, out1, out2, out3, out4, out5, out6, out7) { \
+ ILVR_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3); \
+ ILVR_B4(RTYPE, in8, in9, in10, in11, in12, in13, in14, in15, \
+ out4, out5, out6, out7); \
+}
+#define ILVR_B8_UH(...) ILVR_B8(v8u16, __VA_ARGS__)
+
+/* Description : Interleave right half of halfword elements from vectors
+ Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7
+ Outputs - out0, out1, out2, out3
+ Return Type - signed halfword
+ Details : Right half of halfword elements of in0 and right half of
+ halfword elements of in1 are interleaved and copied to out0.
+ Right half of halfword elements of in2 and right half of
+ halfword elements of in3 are interleaved and copied to out1.
+ Similar for other pairs
+*/
+#define ILVR_H2(RTYPE, in0, in1, in2, in3, out0, out1) { \
+ out0 = (RTYPE)__msa_ilvr_h((v8i16)in0, (v8i16)in1); \
+ out1 = (RTYPE)__msa_ilvr_h((v8i16)in2, (v8i16)in3); \
+}
+#define ILVR_H2_SH(...) ILVR_H2(v8i16, __VA_ARGS__)
+
+#define ILVR_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3) { \
+ ILVR_H2(RTYPE, in0, in1, in2, in3, out0, out1); \
+ ILVR_H2(RTYPE, in4, in5, in6, in7, out2, out3); \
+}
+#define ILVR_H4_SH(...) ILVR_H4(v8i16, __VA_ARGS__)
-#define SRARI_H_4VECS_SH(val0, val1, val2, val3, \
- out0, out1, out2, out3, \
- shift_right_val) { \
- out0 = __msa_srari_h((v8i16)(val0), (shift_right_val)); \
- out1 = __msa_srari_h((v8i16)(val1), (shift_right_val)); \
- out2 = __msa_srari_h((v8i16)(val2), (shift_right_val)); \
- out3 = __msa_srari_h((v8i16)(val3), (shift_right_val)); \
+#define ILVR_W2(RTYPE, in0, in1, in2, in3, out0, out1) { \
+ out0 = (RTYPE)__msa_ilvr_w((v4i32)in0, (v4i32)in1); \
+ out1 = (RTYPE)__msa_ilvr_w((v4i32)in2, (v4i32)in3); \
}
+#define ILVR_W2_UB(...) ILVR_W2(v16u8, __VA_ARGS__)
+#define ILVR_W2_SH(...) ILVR_W2(v8i16, __VA_ARGS__)
-#define SRARI_W_4VECS_SW(val0, val1, val2, val3, \
- out0, out1, out2, out3, \
- shift_right_val) { \
- out0 = __msa_srari_w((v4i32)(val0), (shift_right_val)); \
- out1 = __msa_srari_w((v4i32)(val1), (shift_right_val)); \
- out2 = __msa_srari_w((v4i32)(val2), (shift_right_val)); \
- out3 = __msa_srari_w((v4i32)(val3), (shift_right_val)); \
+#define ILVR_W4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3) { \
+ ILVR_W2(RTYPE, in0, in1, in2, in3, out0, out1); \
+ ILVR_W2(RTYPE, in4, in5, in6, in7, out2, out3); \
}
+#define ILVR_W4_UB(...) ILVR_W4(v16u8, __VA_ARGS__)
+
+/* Description : Interleave right half of double word elements from vectors
+ Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7
+ Outputs - out0, out1, out2, out3
+ Return Type - unsigned double word
+ Details : Right half of double word elements of in0 and right half of
+ double word elements of in1 are interleaved and copied to out0.
+ Right half of double word elements of in2 and right half of
+ double word elements of in3 are interleaved and copied to out1.
+*/
+#define ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1) { \
+ out0 = (RTYPE)__msa_ilvr_d((v2i64)(in0), (v2i64)(in1)); \
+ out1 = (RTYPE)__msa_ilvr_d((v2i64)(in2), (v2i64)(in3)); \
+}
+#define ILVR_D2_UB(...) ILVR_D2(v16u8, __VA_ARGS__)
+#define ILVR_D2_SB(...) ILVR_D2(v16i8, __VA_ARGS__)
+#define ILVR_D2_SH(...) ILVR_D2(v8i16, __VA_ARGS__)
+
+#define ILVR_D3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2) { \
+ ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1); \
+ out2 = (RTYPE)__msa_ilvr_d((v2i64)(in4), (v2i64)(in5)); \
+}
+#define ILVR_D3_SB(...) ILVR_D3(v16i8, __VA_ARGS__)
-#define SRARI_SATURATE_UNSIGNED_H(input, right_shift_val, sat_val) ({ \
- v8u16 out_m; \
+#define ILVR_D4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3) { \
+ ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1); \
+ ILVR_D2(RTYPE, in4, in5, in6, in7, out2, out3); \
+}
+#define ILVR_D4_SB(...) ILVR_D4(v16i8, __VA_ARGS__)
+#define ILVR_D4_UB(...) ILVR_D4(v16u8, __VA_ARGS__)
+
+/* Description : Interleave both left and right half of input vectors
+ Arguments : Inputs - in0, in1
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Right half of byte elements from 'in0' and 'in1' are
+ interleaved and stored to 'out0'
+ Left half of byte elements from 'in0' and 'in1' are
+ interleaved and stored to 'out1'
+*/
+#define ILVRL_B2(RTYPE, in0, in1, out0, out1) { \
+ out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \
+ out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \
+}
+#define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__)
+#define ILVRL_B2_SB(...) ILVRL_B2(v16i8, __VA_ARGS__)
+#define ILVRL_B2_UH(...) ILVRL_B2(v8u16, __VA_ARGS__)
+#define ILVRL_B2_SH(...) ILVRL_B2(v8i16, __VA_ARGS__)
+
+#define ILVRL_H2(RTYPE, in0, in1, out0, out1) { \
+ out0 = (RTYPE)__msa_ilvr_h((v8i16)in0, (v8i16)in1); \
+ out1 = (RTYPE)__msa_ilvl_h((v8i16)in0, (v8i16)in1); \
+}
+#define ILVRL_H2_SH(...) ILVRL_H2(v8i16, __VA_ARGS__)
+#define ILVRL_H2_SW(...) ILVRL_H2(v4i32, __VA_ARGS__)
+
+#define ILVRL_W2(RTYPE, in0, in1, out0, out1) { \
+ out0 = (RTYPE)__msa_ilvr_w((v4i32)in0, (v4i32)in1); \
+ out1 = (RTYPE)__msa_ilvl_w((v4i32)in0, (v4i32)in1); \
+}
+#define ILVRL_W2_SH(...) ILVRL_W2(v8i16, __VA_ARGS__)
+#define ILVRL_W2_SW(...) ILVRL_W2(v4i32, __VA_ARGS__)
+
+/* Description : Saturate the halfword element values to the max
+ unsigned value of (sat_val+1 bits)
+ The element data width remains unchanged
+ Arguments : Inputs - in0, in1, in2, in3, sat_val
+ Outputs - in0, in1, in2, in3 (in place)
+ Return Type - unsigned halfword
+ Details : Each unsigned halfword element from 'in0' is saturated to the
+ value generated with (sat_val+1) bit range.
+ The results are stored in place
+*/
+#define SAT_UH2(RTYPE, in0, in1, sat_val) { \
+ in0 = (RTYPE)__msa_sat_u_h((v8u16)in0, sat_val); \
+ in1 = (RTYPE)__msa_sat_u_h((v8u16)in1, sat_val); \
+}
+#define SAT_UH2_UH(...) SAT_UH2(v8u16, __VA_ARGS__)
+
+#define SAT_UH4(RTYPE, in0, in1, in2, in3, sat_val) { \
+ SAT_UH2(RTYPE, in0, in1, sat_val); \
+ SAT_UH2(RTYPE, in2, in3, sat_val) \
+}
+#define SAT_UH4_UH(...) SAT_UH4(v8u16, __VA_ARGS__)
+
+/* Description : Saturate the halfword element values to the max
+ unsigned value of (sat_val+1 bits)
+ The element data width remains unchanged
+ Arguments : Inputs - in0, in1, in2, in3, sat_val
+ Outputs - in0, in1, in2, in3 (in place)
+ Return Type - unsigned halfword
+ Details : Each unsigned halfword element from 'in0' is saturated to the
+ value generated with (sat_val+1) bit range
+ The results are stored in place
+*/
+#define SAT_SH2(RTYPE, in0, in1, sat_val) { \
+ in0 = (RTYPE)__msa_sat_s_h((v8i16)in0, sat_val); \
+ in1 = (RTYPE)__msa_sat_s_h((v8i16)in1, sat_val); \
+}
+#define SAT_SH2_SH(...) SAT_SH2(v8i16, __VA_ARGS__)
+
+#define SAT_SH4(RTYPE, in0, in1, in2, in3, sat_val) { \
+ SAT_SH2(RTYPE, in0, in1, sat_val); \
+ SAT_SH2(RTYPE, in2, in3, sat_val); \
+}
+#define SAT_SH4_SH(...) SAT_SH4(v8i16, __VA_ARGS__)
+
+/* Description : Indexed halfword element values are replicated to all
+ elements in output vector
+ Arguments : Inputs - in, idx0, idx1
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : 'idx0' element value from 'in' vector is replicated to all
+ elements in 'out0' vector
+ Valid index range for halfword operation is 0-7
+*/
+#define SPLATI_H2(RTYPE, in, idx0, idx1, out0, out1) { \
+ out0 = (RTYPE)__msa_splati_h((v8i16)in, idx0); \
+ out1 = (RTYPE)__msa_splati_h((v8i16)in, idx1); \
+}
+#define SPLATI_H2_SH(...) SPLATI_H2(v8i16, __VA_ARGS__)
+
+#define SPLATI_H4(RTYPE, in, idx0, idx1, idx2, idx3, \
+ out0, out1, out2, out3) { \
+ SPLATI_H2(RTYPE, in, idx0, idx1, out0, out1); \
+ SPLATI_H2(RTYPE, in, idx2, idx3, out2, out3); \
+}
+#define SPLATI_H4_SB(...) SPLATI_H4(v16i8, __VA_ARGS__)
+#define SPLATI_H4_SH(...) SPLATI_H4(v8i16, __VA_ARGS__)
+
+/* Description : Pack even byte elements of vector pairs
+ Arguments : Inputs - in0, in1, in2, in3
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Even byte elements of in0 are copied to the left half of
+ out0 & even byte elements of in1 are copied to the right
+ half of out0.
+ Even byte elements of in2 are copied to the left half of
+ out1 & even byte elements of in3 are copied to the right
+ half of out1.
+*/
+#define PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) { \
+ out0 = (RTYPE)__msa_pckev_b((v16i8)in0, (v16i8)in1); \
+ out1 = (RTYPE)__msa_pckev_b((v16i8)in2, (v16i8)in3); \
+}
+#define PCKEV_B2_SB(...) PCKEV_B2(v16i8, __VA_ARGS__)
+#define PCKEV_B2_UB(...) PCKEV_B2(v16u8, __VA_ARGS__)
+#define PCKEV_B2_SH(...) PCKEV_B2(v8i16, __VA_ARGS__)
+
+#define PCKEV_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3) { \
+ PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1); \
+ PCKEV_B2(RTYPE, in4, in5, in6, in7, out2, out3); \
+}
+#define PCKEV_B4_SB(...) PCKEV_B4(v16i8, __VA_ARGS__)
+#define PCKEV_B4_UB(...) PCKEV_B4(v16u8, __VA_ARGS__)
+#define PCKEV_B4_SH(...) PCKEV_B4(v8i16, __VA_ARGS__)
+
+/* Description : Pack even halfword elements of vector pairs
+ Arguments : Inputs - in0, in1, in2, in3
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Even halfword elements of in0 are copied to the left half of
+ out0 & even halfword elements of in1 are copied to the right
+ half of out0.
+ Even halfword elements of in2 are copied to the left half of
+ out1 & even halfword elements of in3 are copied to the right
+ half of out1.
+*/
+#define PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) { \
+ out0 = (RTYPE)__msa_pckev_h((v8i16)in0, (v8i16)in1); \
+ out1 = (RTYPE)__msa_pckev_h((v8i16)in2, (v8i16)in3); \
+}
+#define PCKEV_H2_SH(...) PCKEV_H2(v8i16, __VA_ARGS__)
+#define PCKEV_H2_SW(...) PCKEV_H2(v4i32, __VA_ARGS__)
+
+#define PCKEV_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3) { \
+ PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1); \
+ PCKEV_H2(RTYPE, in4, in5, in6, in7, out2, out3); \
+}
+#define PCKEV_H4_SH(...) PCKEV_H4(v8i16, __VA_ARGS__)
+
+/* Description : Pack even double word elements of vector pairs
+ Arguments : Inputs - in0, in1, in2, in3
+ Outputs - out0, out1
+ Return Type - unsigned byte
+ Details : Even double elements of in0 are copied to the left half of
+ out0 & even double elements of in1 are copied to the right
+ half of out0.
+ Even double elements of in2 are copied to the left half of
+ out1 & even double elements of in3 are copied to the right
+ half of out1.
+*/
+#define PCKEV_D2(RTYPE, in0, in1, in2, in3, out0, out1) { \
+ out0 = (RTYPE)__msa_pckev_d((v2i64)in0, (v2i64)in1); \
+ out1 = (RTYPE)__msa_pckev_d((v2i64)in2, (v2i64)in3); \
+}
+#define PCKEV_D2_UB(...) PCKEV_D2(v16u8, __VA_ARGS__)
+#define PCKEV_D2_SH(...) PCKEV_D2(v8i16, __VA_ARGS__)
+
+#define PCKEV_D4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3) { \
+ PCKEV_D2(RTYPE, in0, in1, in2, in3, out0, out1); \
+ PCKEV_D2(RTYPE, in4, in5, in6, in7, out2, out3); \
+}
+#define PCKEV_D4_UB(...) PCKEV_D4(v16u8, __VA_ARGS__)
+
+/* Description : Each byte element is logically xor'ed with immediate 128
+ Arguments : Inputs - in0, in1
+ Outputs - in0, in1 (in-place)
+ Return Type - as per RTYPE
+ Details : Each unsigned byte element from input vector 'in0' is
+ logically xor'ed with 128 and the result is in-place stored in
+ 'in0' vector
+ Each unsigned byte element from input vector 'in1' is
+ logically xor'ed with 128 and the result is in-place stored in
+ 'in1' vector
+ Similar for other pairs
+*/
+#define XORI_B2_128(RTYPE, in0, in1) { \
+ in0 = (RTYPE)__msa_xori_b((v16u8)in0, 128); \
+ in1 = (RTYPE)__msa_xori_b((v16u8)in1, 128); \
+}
+#define XORI_B2_128_UB(...) XORI_B2_128(v16u8, __VA_ARGS__)
+#define XORI_B2_128_SB(...) XORI_B2_128(v16i8, __VA_ARGS__)
+
+#define XORI_B3_128(RTYPE, in0, in1, in2) { \
+ XORI_B2_128(RTYPE, in0, in1); \
+ in2 = (RTYPE)__msa_xori_b((v16u8)in2, 128); \
+}
+#define XORI_B3_128_SB(...) XORI_B3_128(v16i8, __VA_ARGS__)
+
+#define XORI_B4_128(RTYPE, in0, in1, in2, in3) { \
+ XORI_B2_128(RTYPE, in0, in1); \
+ XORI_B2_128(RTYPE, in2, in3); \
+}
+#define XORI_B4_128_UB(...) XORI_B4_128(v16u8, __VA_ARGS__)
+#define XORI_B4_128_SB(...) XORI_B4_128(v16i8, __VA_ARGS__)
+
+#define XORI_B7_128(RTYPE, in0, in1, in2, in3, in4, in5, in6) { \
+ XORI_B4_128(RTYPE, in0, in1, in2, in3); \
+ XORI_B3_128(RTYPE, in4, in5, in6); \
+}
+#define XORI_B7_128_SB(...) XORI_B7_128(v16i8, __VA_ARGS__)
+
+/* Description : Average of signed halfword elements -> (a + b) / 2
+ Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7
+ Outputs - out0, out1, out2, out3
+ Return Type - as per RTYPE
+ Details : Each signed halfword element from 'in0' is added to each
+ signed halfword element of 'in1' with full precision resulting
+ in one extra bit in the result. The result is then divided by
+ 2 and written to 'out0'
+*/
+#define AVE_SH4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3) { \
+ out0 = (RTYPE)__msa_ave_s_h((v8i16)in0, (v8i16)in1); \
+ out1 = (RTYPE)__msa_ave_s_h((v8i16)in2, (v8i16)in3); \
+ out2 = (RTYPE)__msa_ave_s_h((v8i16)in4, (v8i16)in5); \
+ out3 = (RTYPE)__msa_ave_s_h((v8i16)in6, (v8i16)in7); \
+}
+#define AVE_SH4_SH(...) AVE_SH4(v8i16, __VA_ARGS__)
+
+/* Description : Addition of signed halfword elements and signed saturation
+ Arguments : Inputs - in0, in1, in2, in3
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Signed halfword elements from 'in0' are added to signed
+ halfword elements of 'in1'. The result is then signed saturated
+ between -32768 to +32767 (as per halfword data type)
+ Similar for other pairs
+*/
+#define ADDS_SH2(RTYPE, in0, in1, in2, in3, out0, out1) { \
+ out0 = (RTYPE)__msa_adds_s_h((v8i16)in0, (v8i16)in1); \
+ out1 = (RTYPE)__msa_adds_s_h((v8i16)in2, (v8i16)in3); \
+}
+#define ADDS_SH2_SH(...) ADDS_SH2(v8i16, __VA_ARGS__)
+
+#define ADDS_SH4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3) { \
+ ADDS_SH2(RTYPE, in0, in1, in2, in3, out0, out1); \
+ ADDS_SH2(RTYPE, in4, in5, in6, in7, out2, out3); \
+}
+#define ADDS_SH4_SH(...) ADDS_SH4(v8i16, __VA_ARGS__)
+
+/* Description : Shift left all elements of vector (generic for all data types)
+ Arguments : Inputs - in0, in1, in2, in3, shift
+ Outputs - in0, in1, in2, in3 (in place)
+ Return Type - as per input vector RTYPE
+ Details : Each element of vector 'in0' is left shifted by 'shift' and
+ the result is in place written to 'in0'
+ Similar for other pairs
+*/
+#define SLLI_4V(in0, in1, in2, in3, shift) { \
+ in0 = in0 << shift; \
+ in1 = in1 << shift; \
+ in2 = in2 << shift; \
+ in3 = in3 << shift; \
+}
+
+/* Description : Arithmetic shift right all elements of vector
+ (generic for all data types)
+ Arguments : Inputs - in0, in1, in2, in3, shift
+ Outputs - in0, in1, in2, in3 (in place)
+ Return Type - as per input vector RTYPE
+ Details : Each element of vector 'in0' is right shifted by 'shift' and
+ the result is in place written to 'in0'
+ Here, 'shift' is GP variable passed in
+ Similar for other pairs
+*/
+#define SRA_4V(in0, in1, in2, in3, shift) { \
+ in0 = in0 >> shift; \
+ in1 = in1 >> shift; \
+ in2 = in2 >> shift; \
+ in3 = in3 >> shift; \
+}
+
+/* Description : Shift right arithmetic rounded words
+ Arguments : Inputs - in0, in1, shift
+ Outputs - in place operation
+ Return Type - as per RTYPE
+ Details : Each element of vector 'in0' is shifted right arithmetically by
+ the number of bits in the corresponding element in the vector
+ 'shift'. The last discarded bit is added to shifted value for
+ rounding and the result is written in-place.
+ 'shift' is a vector.
+*/
+#define SRAR_W2(RTYPE, in0, in1, shift) { \
+ in0 = (RTYPE)__msa_srar_w((v4i32)in0, (v4i32)shift); \
+ in1 = (RTYPE)__msa_srar_w((v4i32)in1, (v4i32)shift); \
+}
+
+#define SRAR_W4(RTYPE, in0, in1, in2, in3, shift) { \
+ SRAR_W2(RTYPE, in0, in1, shift) \
+ SRAR_W2(RTYPE, in2, in3, shift) \
+}
+#define SRAR_W4_SW(...) SRAR_W4(v4i32, __VA_ARGS__)
+
+/* Description : Shift right arithmetic rounded (immediate)
+ Arguments : Inputs - in0, in1, in2, in3, shift
+ Outputs - in0, in1, in2, in3 (in place)
+ Return Type - as per RTYPE
+ Details : Each element of vector 'in0' is shifted right arithmetic by
+ value in 'shift'.
+ The last discarded bit is added to shifted value for rounding
+ and the result is in place written to 'in0'
+ Similar for other pairs
+*/
+#define SRARI_H2(RTYPE, in0, in1, shift) { \
+ in0 = (RTYPE)__msa_srari_h((v8i16)in0, shift); \
+ in1 = (RTYPE)__msa_srari_h((v8i16)in1, shift); \
+}
+#define SRARI_H2_UH(...) SRARI_H2(v8u16, __VA_ARGS__)
+#define SRARI_H2_SH(...) SRARI_H2(v8i16, __VA_ARGS__)
+
+#define SRARI_H4(RTYPE, in0, in1, in2, in3, shift) { \
+ SRARI_H2(RTYPE, in0, in1, shift); \
+ SRARI_H2(RTYPE, in2, in3, shift); \
+}
+#define SRARI_H4_UH(...) SRARI_H4(v8u16, __VA_ARGS__)
+#define SRARI_H4_SH(...) SRARI_H4(v8i16, __VA_ARGS__)
+
+/* Description : Shift right arithmetic rounded (immediate)
+ Arguments : Inputs - in0, in1, shift
+ Outputs - in0, in1 (in place)
+ Return Type - as per RTYPE
+ Details : Each element of vector 'in0' is shifted right arithmetic by
+ value in 'shift'.
+ The last discarded bit is added to shifted value for rounding
+ and the result is in place written to 'in0'
+ Similar for other pairs
+*/
+#define SRARI_W2(RTYPE, in0, in1, shift) { \
+ in0 = (RTYPE)__msa_srari_w((v4i32)in0, shift); \
+ in1 = (RTYPE)__msa_srari_w((v4i32)in1, shift); \
+}
+#define SRARI_W2_SW(...) SRARI_W2(v4i32, __VA_ARGS__)
+
+#define SRARI_W4(RTYPE, in0, in1, in2, in3, shift) { \
+ SRARI_W2(RTYPE, in0, in1, shift); \
+ SRARI_W2(RTYPE, in2, in3, shift); \
+}
+#define SRARI_W4_SW(...) SRARI_W4(v4i32, __VA_ARGS__)
+
+/* Description : Logical shift right all elements of vector (immediate)
+ Arguments : Inputs - in0, in1, in2, in3, shift
+ Outputs - out0, out1, out2, out3
+ Return Type - as per RTYPE
+ Details : Each element of vector 'in0' is right shifted by 'shift' and
+ the result is written in-place. 'shift' is an immediate value.
+*/
+#define SRLI_H4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3, shift) { \
+ out0 = (RTYPE)__msa_srli_h((v8i16)in0, shift); \
+ out1 = (RTYPE)__msa_srli_h((v8i16)in1, shift); \
+ out2 = (RTYPE)__msa_srli_h((v8i16)in2, shift); \
+ out3 = (RTYPE)__msa_srli_h((v8i16)in3, shift); \
+}
+#define SRLI_H4_SH(...) SRLI_H4(v8i16, __VA_ARGS__)
+
+/* Description : Multiplication of pairs of vectors
+ Arguments : Inputs - in0, in1, in2, in3
+ Outputs - out0, out1
+ Details : Each element from 'in0' is multiplied with elements from 'in1'
+ and the result is written to 'out0'
+*/
+#define MUL2(in0, in1, in2, in3, out0, out1) { \
+ out0 = in0 * in1; \
+ out1 = in2 * in3; \
+}
+#define MUL4(in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3) { \
+ MUL2(in0, in1, in2, in3, out0, out1); \
+ MUL2(in4, in5, in6, in7, out2, out3); \
+}
+
+/* Description : Addition of 2 pairs of vectors
+ Arguments : Inputs - in0, in1, in2, in3
+ Outputs - out0, out1
+ Details : Each element from 2 pairs vectors is added and 2 results are
+ produced
+*/
+#define ADD2(in0, in1, in2, in3, out0, out1) { \
+ out0 = in0 + in1; \
+ out1 = in2 + in3; \
+}
+#define ADD4(in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3) { \
+ ADD2(in0, in1, in2, in3, out0, out1); \
+ ADD2(in4, in5, in6, in7, out2, out3); \
+}
+
+/* Description : Subtraction of 2 pairs of vectors
+ Arguments : Inputs - in0, in1, in2, in3
+ Outputs - out0, out1
+ Details : Each element from 2 pairs vectors is subtracted and 2 results
+ are produced
+*/
+#define SUB2(in0, in1, in2, in3, out0, out1) { \
+ out0 = in0 - in1; \
+ out1 = in2 - in3; \
+}
+#define SUB4(in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3) { \
+ out0 = in0 - in1; \
+ out1 = in2 - in3; \
+ out2 = in4 - in5; \
+ out3 = in6 - in7; \
+}
+
+/* Description : Sign extend halfword elements from right half of the vector
+ Arguments : Inputs - in (input halfword vector)
+ Outputs - out (sign extended word vectors)
+ Return Type - signed word
+ Details : Sign bit of halfword elements from input vector 'in' is
+ extracted and interleaved with same vector 'in0' to generate
+ 4 word elements keeping sign intact
+*/
+#define UNPCK_R_SH_SW(in, out) { \
+ v8i16 sign_m; \
+ \
+ sign_m = __msa_clti_s_h((v8i16)in, 0); \
+ out = (v4i32)__msa_ilvr_h(sign_m, (v8i16)in); \
+}
+
+/* Description : Zero extend unsigned byte elements to halfword elements
+ Arguments : Inputs - in (1 input unsigned byte vector)
+ Outputs - out0, out1 (unsigned 2 halfword vectors)
+ Return Type - signed halfword
+ Details : Zero extended right half of vector is returned in 'out0'
+ Zero extended left half of vector is returned in 'out1'
+*/
+#define UNPCK_UB_SH(in, out0, out1) { \
+ v16i8 zero_m = { 0 }; \
+ \
+ ILVRL_B2_SH(zero_m, in, out0, out1); \
+}
+
+/* Description : Sign extend halfword elements from input vector and return
+ result in pair of vectors
+ Arguments : Inputs - in (1 input halfword vector)
+ Outputs - out0, out1 (sign extended 2 word vectors)
+ Return Type - signed word
+ Details : Sign bit of halfword elements from input vector 'in' is
+ extracted and interleaved right with same vector 'in0' to
+ generate 4 signed word elements in 'out0'
+ Then interleaved left with same vector 'in0' to
+ generate 4 signed word elements in 'out1'
+*/
+#define UNPCK_SH_SW(in, out0, out1) { \
+ v8i16 tmp_m; \
+ \
+ tmp_m = __msa_clti_s_h((v8i16)in, 0); \
+ ILVRL_H2_SW(tmp_m, in, out0, out1); \
+}
+
+/* Description : Butterfly of 4 input vectors
+ Arguments : Inputs - in0, in1, in2, in3
+ Outputs - out0, out1, out2, out3
+ Details : Butterfly operation
+*/
+#define BUTTERFLY_4(in0, in1, in2, in3, out0, out1, out2, out3) { \
+ out0 = in0 + in3; \
+ out1 = in1 + in2; \
+ \
+ out2 = in1 - in2; \
+ out3 = in0 - in3; \
+}
+
+/* Description : Butterfly of 8 input vectors
+ Arguments : Inputs - in0 ... in7
+ Outputs - out0 .. out7
+ Details : Butterfly operation
+*/
+#define BUTTERFLY_8(in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3, out4, out5, out6, out7) { \
+ out0 = in0 + in7; \
+ out1 = in1 + in6; \
+ out2 = in2 + in5; \
+ out3 = in3 + in4; \
\
- out_m = (v8u16)__msa_srari_h((v8i16)(input), (right_shift_val)); \
- out_m = __msa_sat_u_h(out_m, (sat_val)); \
- out_m; \
-})
+ out4 = in3 - in4; \
+ out5 = in2 - in5; \
+ out6 = in1 - in6; \
+ out7 = in0 - in7; \
+}
-#define SRARI_SATURATE_SIGNED_H(input, right_shift_val, sat_val) ({ \
- v8i16 out_m; \
- \
- out_m = __msa_srari_h((v8i16)(input), (right_shift_val)); \
- out_m = __msa_sat_s_h(out_m, (sat_val)); \
- out_m; \
-})
+/* Description : Butterfly of 16 input vectors
+ Arguments : Inputs - in0 ... in15
+ Outputs - out0 .. out15
+ Details : Butterfly operation
+*/
+#define BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7, \
+ in8, in9, in10, in11, in12, in13, in14, in15, \
+ out0, out1, out2, out3, out4, out5, out6, out7, \
+ out8, out9, out10, out11, out12, out13, out14, out15) { \
+ out0 = in0 + in15; \
+ out1 = in1 + in14; \
+ out2 = in2 + in13; \
+ out3 = in3 + in12; \
+ out4 = in4 + in11; \
+ out5 = in5 + in10; \
+ out6 = in6 + in9; \
+ out7 = in7 + in8; \
+ \
+ out8 = in7 - in8; \
+ out9 = in6 - in9; \
+ out10 = in5 - in10; \
+ out11 = in4 - in11; \
+ out12 = in3 - in12; \
+ out13 = in2 - in13; \
+ out14 = in1 - in14; \
+ out15 = in0 - in15; \
+}
-#define PCKEV_2B_XORI128_STORE_4_BYTES_4(in1, in2, \
- pdst, stride) { \
- uint32_t out0_m, out1_m, out2_m, out3_m; \
- v16i8 tmp0_m; \
- uint8_t *dst_m = (uint8_t *)(pdst); \
- \
- tmp0_m = __msa_pckev_b((v16i8)(in2), (v16i8)(in1)); \
- tmp0_m = (v16i8)__msa_xori_b((v16u8)tmp0_m, 128); \
- \
- out0_m = __msa_copy_u_w((v4i32)tmp0_m, 0); \
- out1_m = __msa_copy_u_w((v4i32)tmp0_m, 1); \
- out2_m = __msa_copy_u_w((v4i32)tmp0_m, 2); \
- out3_m = __msa_copy_u_w((v4i32)tmp0_m, 3); \
- \
- STORE_WORD(dst_m, out0_m); \
- dst_m += stride; \
- STORE_WORD(dst_m, out1_m); \
- dst_m += stride; \
- STORE_WORD(dst_m, out2_m); \
- dst_m += stride; \
- STORE_WORD(dst_m, out3_m); \
-}
-
-#define PCKEV_B_4_XORI128_STORE_8_BYTES_4(in1, in2, \
- in3, in4, \
- pdst, stride) { \
- uint64_t out0_m, out1_m, out2_m, out3_m; \
- v16i8 tmp0_m, tmp1_m; \
- uint8_t *dst_m = (uint8_t *)(pdst); \
- \
- tmp0_m = __msa_pckev_b((v16i8)(in2), (v16i8)(in1)); \
- tmp1_m = __msa_pckev_b((v16i8)(in4), (v16i8)(in3)); \
- \
- tmp0_m = (v16i8)__msa_xori_b((v16u8)tmp0_m, 128); \
- tmp1_m = (v16i8)__msa_xori_b((v16u8)tmp1_m, 128); \
- \
- out0_m = __msa_copy_u_d((v2i64)tmp0_m, 0); \
- out1_m = __msa_copy_u_d((v2i64)tmp0_m, 1); \
- out2_m = __msa_copy_u_d((v2i64)tmp1_m, 0); \
- out3_m = __msa_copy_u_d((v2i64)tmp1_m, 1); \
- \
- STORE_DWORD(dst_m, out0_m); \
- dst_m += stride; \
- STORE_DWORD(dst_m, out1_m); \
- dst_m += stride; \
- STORE_DWORD(dst_m, out2_m); \
- dst_m += stride; \
- STORE_DWORD(dst_m, out3_m); \
-}
-
-/* Only for signed vecs */
-#define PCKEV_B_XORI128_STORE_VEC(in1, in2, pdest) { \
- v16i8 tmp_m; \
- \
- tmp_m = __msa_pckev_b((v16i8)(in1), (v16i8)(in2)); \
- tmp_m = (v16i8)__msa_xori_b((v16u8)tmp_m, 128); \
- STORE_SB(tmp_m, (pdest)); \
-}
-
-/* Only for signed vecs */
-#define PCKEV_B_4_XORI128_AVG_STORE_8_BYTES_4(in1, dst0, \
- in2, dst1, \
- in3, dst2, \
- in4, dst3, \
- pdst, stride) { \
- uint64_t out0_m, out1_m, out2_m, out3_m; \
- v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
- uint8_t *dst_m = (uint8_t *)(pdst); \
- \
- tmp0_m = (v16u8)__msa_pckev_b((v16i8)(in2), (v16i8)(in1)); \
- tmp1_m = (v16u8)__msa_pckev_b((v16i8)(in4), (v16i8)(in3)); \
- \
- tmp2_m = (v16u8)__msa_ilvr_d((v2i64)(dst1), (v2i64)(dst0)); \
- tmp3_m = (v16u8)__msa_ilvr_d((v2i64)(dst3), (v2i64)(dst2)); \
- \
- tmp0_m = __msa_xori_b(tmp0_m, 128); \
- tmp1_m = __msa_xori_b(tmp1_m, 128); \
- \
- tmp0_m = __msa_aver_u_b(tmp0_m, tmp2_m); \
- tmp1_m = __msa_aver_u_b(tmp1_m, tmp3_m); \
- \
- out0_m = __msa_copy_u_d((v2i64)tmp0_m, 0); \
- out1_m = __msa_copy_u_d((v2i64)tmp0_m, 1); \
- out2_m = __msa_copy_u_d((v2i64)tmp1_m, 0); \
- out3_m = __msa_copy_u_d((v2i64)tmp1_m, 1); \
- \
- STORE_DWORD(dst_m, out0_m); \
- dst_m += stride; \
- STORE_DWORD(dst_m, out1_m); \
- dst_m += stride; \
- STORE_DWORD(dst_m, out2_m); \
- dst_m += stride; \
- STORE_DWORD(dst_m, out3_m); \
-}
-
-/* Only for signed vecs */
-#define PCKEV_B_XORI128_AVG_STORE_VEC(in1, in2, dst, pdest) { \
- v16u8 tmp_m; \
- \
- tmp_m = (v16u8)__msa_pckev_b((v16i8)(in1), (v16i8)(in2)); \
- tmp_m = __msa_xori_b(tmp_m, 128); \
- tmp_m = __msa_aver_u_b(tmp_m, (v16u8)(dst)); \
- STORE_UB(tmp_m, (pdest)); \
-}
-
-#define PCKEV_B_STORE_8_BYTES_4(in1, in2, in3, in4, \
- pdst, stride) { \
- uint64_t out0_m, out1_m, out2_m, out3_m; \
- v16i8 tmp0_m, tmp1_m; \
- uint8_t *dst_m = (uint8_t *)(pdst); \
- \
- tmp0_m = __msa_pckev_b((v16i8)(in2), (v16i8)(in1)); \
- tmp1_m = __msa_pckev_b((v16i8)(in4), (v16i8)(in3)); \
- \
- out0_m = __msa_copy_u_d((v2i64)tmp0_m, 0); \
- out1_m = __msa_copy_u_d((v2i64)tmp0_m, 1); \
- out2_m = __msa_copy_u_d((v2i64)tmp1_m, 0); \
- out3_m = __msa_copy_u_d((v2i64)tmp1_m, 1); \
- \
- STORE_DWORD(dst_m, out0_m); \
- dst_m += stride; \
- STORE_DWORD(dst_m, out1_m); \
- dst_m += stride; \
- STORE_DWORD(dst_m, out2_m); \
- dst_m += stride; \
- STORE_DWORD(dst_m, out3_m); \
-}
-
-/* Only for unsigned vecs */
-#define PCKEV_B_STORE_VEC(in1, in2, pdest) { \
- v16i8 tmp_m; \
- \
- tmp_m = __msa_pckev_b((v16i8)(in1), (v16i8)(in2)); \
- STORE_SB(tmp_m, (pdest)); \
+/* Description : Transposes input 8x8 byte block
+ Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7
+ (input 8x8 byte block)
+ Outputs - out0, out1, out2, out3, out4, out5, out6, out7
+ (output 8x8 byte block)
+ Return Type - unsigned byte
+ Details :
+*/
+#define TRANSPOSE8x8_UB(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3, out4, out5, out6, out7) { \
+ v16i8 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
+ v16i8 tmp4_m, tmp5_m, tmp6_m, tmp7_m; \
+ \
+ ILVR_B4_SB(in2, in0, in3, in1, in6, in4, in7, in5, \
+ tmp0_m, tmp1_m, tmp2_m, tmp3_m); \
+ ILVRL_B2_SB(tmp1_m, tmp0_m, tmp4_m, tmp5_m); \
+ ILVRL_B2_SB(tmp3_m, tmp2_m, tmp6_m, tmp7_m); \
+ ILVRL_W2(RTYPE, tmp6_m, tmp4_m, out0, out2); \
+ ILVRL_W2(RTYPE, tmp7_m, tmp5_m, out4, out6); \
+ SLDI_B2_0(RTYPE, out0, out2, out1, out3, 8); \
+ SLDI_B2_0(RTYPE, out4, out6, out5, out7, 8); \
+}
+#define TRANSPOSE8x8_UB_UB(...) TRANSPOSE8x8_UB(v16u8, __VA_ARGS__)
+
+/* Description : Transposes 16x8 block into 8x16 with byte elements in vectors
+ Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7,
+ in8, in9, in10, in11, in12, in13, in14, in15
+ Outputs - out0, out1, out2, out3, out4, out5, out6, out7
+ Return Type - unsigned byte
+ Details :
+*/
+#define TRANSPOSE16x8_UB_UB(in0, in1, in2, in3, in4, in5, in6, in7, \
+ in8, in9, in10, in11, in12, in13, in14, in15, \
+ out0, out1, out2, out3, out4, out5, out6, out7) { \
+ v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
+ v16u8 tmp4_m, tmp5_m, tmp6_m, tmp7_m; \
+ \
+ ILVEV_D2_UB(in0, in8, in1, in9, out7, out6); \
+ ILVEV_D2_UB(in2, in10, in3, in11, out5, out4); \
+ ILVEV_D2_UB(in4, in12, in5, in13, out3, out2); \
+ ILVEV_D2_UB(in6, in14, in7, in15, out1, out0); \
+ \
+ tmp0_m = (v16u8)__msa_ilvev_b((v16i8)out6, (v16i8)out7); \
+ tmp4_m = (v16u8)__msa_ilvod_b((v16i8)out6, (v16i8)out7); \
+ tmp1_m = (v16u8)__msa_ilvev_b((v16i8)out4, (v16i8)out5); \
+ tmp5_m = (v16u8)__msa_ilvod_b((v16i8)out4, (v16i8)out5); \
+ out5 = (v16u8)__msa_ilvev_b((v16i8)out2, (v16i8)out3); \
+ tmp6_m = (v16u8)__msa_ilvod_b((v16i8)out2, (v16i8)out3); \
+ out7 = (v16u8)__msa_ilvev_b((v16i8)out0, (v16i8)out1); \
+ tmp7_m = (v16u8)__msa_ilvod_b((v16i8)out0, (v16i8)out1); \
+ \
+ ILVEV_H2_UB(tmp0_m, tmp1_m, out5, out7, tmp2_m, tmp3_m); \
+ out0 = (v16u8)__msa_ilvev_w((v4i32)tmp3_m, (v4i32)tmp2_m); \
+ out4 = (v16u8)__msa_ilvod_w((v4i32)tmp3_m, (v4i32)tmp2_m); \
+ \
+ tmp2_m = (v16u8)__msa_ilvod_h((v8i16)tmp1_m, (v8i16)tmp0_m); \
+ tmp3_m = (v16u8)__msa_ilvod_h((v8i16)out7, (v8i16)out5); \
+ out2 = (v16u8)__msa_ilvev_w((v4i32)tmp3_m, (v4i32)tmp2_m); \
+ out6 = (v16u8)__msa_ilvod_w((v4i32)tmp3_m, (v4i32)tmp2_m); \
+ \
+ ILVEV_H2_UB(tmp4_m, tmp5_m, tmp6_m, tmp7_m, tmp2_m, tmp3_m); \
+ out1 = (v16u8)__msa_ilvev_w((v4i32)tmp3_m, (v4i32)tmp2_m); \
+ out5 = (v16u8)__msa_ilvod_w((v4i32)tmp3_m, (v4i32)tmp2_m); \
+ \
+ tmp2_m = (v16u8)__msa_ilvod_h((v8i16)tmp5_m, (v8i16)tmp4_m); \
+ tmp2_m = (v16u8)__msa_ilvod_h((v8i16)tmp5_m, (v8i16)tmp4_m); \
+ tmp3_m = (v16u8)__msa_ilvod_h((v8i16)tmp7_m, (v8i16)tmp6_m); \
+ tmp3_m = (v16u8)__msa_ilvod_h((v8i16)tmp7_m, (v8i16)tmp6_m); \
+ out3 = (v16u8)__msa_ilvev_w((v4i32)tmp3_m, (v4i32)tmp2_m); \
+ out7 = (v16u8)__msa_ilvod_w((v4i32)tmp3_m, (v4i32)tmp2_m); \
}
-#define PCKEV_B_AVG_STORE_8_BYTES_4(in1, dst0, in2, dst1, \
- in3, dst2, in4, dst3, \
- pdst, stride) { \
- uint64_t out0_m, out1_m, out2_m, out3_m; \
- v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
- uint8_t *dst_m = (uint8_t *)(pdst); \
- \
- tmp0_m = (v16u8)__msa_pckev_b((v16i8)(in2), (v16i8)(in1)); \
- tmp1_m = (v16u8)__msa_pckev_b((v16i8)(in4), (v16i8)(in3)); \
- \
- tmp2_m = (v16u8)__msa_pckev_d((v2i64)(dst1), (v2i64)(dst0)); \
- tmp3_m = (v16u8)__msa_pckev_d((v2i64)(dst3), (v2i64)(dst2)); \
- \
- tmp0_m = __msa_aver_u_b(tmp0_m, tmp2_m); \
- tmp1_m = __msa_aver_u_b(tmp1_m, tmp3_m); \
+/* Description : Transposes 4x4 block with half word elements in vectors
+ Arguments : Inputs - in0, in1, in2, in3
+ Outputs - out0, out1, out2, out3
+ Return Type - signed halfword
+ Details :
+*/
+#define TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, out0, out1, out2, out3) { \
+ v8i16 s0_m, s1_m; \
+ \
+ ILVR_H2_SH(in1, in0, in3, in2, s0_m, s1_m); \
+ ILVRL_W2_SH(s1_m, s0_m, out0, out2); \
+ out1 = (v8i16)__msa_ilvl_d((v2i64)out0, (v2i64)out0); \
+ out3 = (v8i16)__msa_ilvl_d((v2i64)out0, (v2i64)out2); \
+}
+
+/* Description : Transposes 4x8 block with half word elements in vectors
+ Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7
+ Outputs - out0, out1, out2, out3, out4, out5, out6, out7
+ Return Type - signed halfword
+ Details :
+*/
+#define TRANSPOSE4X8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3, out4, out5, out6, out7) { \
+ v8i16 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
+ v8i16 tmp0_n, tmp1_n, tmp2_n, tmp3_n; \
+ v8i16 zero_m = { 0 }; \
+ \
+ ILVR_H4_SH(in1, in0, in3, in2, in5, in4, in7, in6, \
+ tmp0_n, tmp1_n, tmp2_n, tmp3_n); \
+ ILVRL_W2_SH(tmp1_n, tmp0_n, tmp0_m, tmp2_m); \
+ ILVRL_W2_SH(tmp3_n, tmp2_n, tmp1_m, tmp3_m); \
+ \
+ out0 = (v8i16)__msa_ilvr_d((v2i64)tmp1_m, (v2i64)tmp0_m); \
+ out1 = (v8i16)__msa_ilvl_d((v2i64)tmp1_m, (v2i64)tmp0_m); \
+ out2 = (v8i16)__msa_ilvr_d((v2i64)tmp3_m, (v2i64)tmp2_m); \
+ out3 = (v8i16)__msa_ilvl_d((v2i64)tmp3_m, (v2i64)tmp2_m); \
+ \
+ out4 = zero_m; \
+ out5 = zero_m; \
+ out6 = zero_m; \
+ out7 = zero_m; \
+}
+
+/* Description : Transposes 8x4 block with half word elements in vectors
+ Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7
+ Outputs - out0, out1, out2, out3, out4, out5, out6, out7
+ Return Type - signed halfword
+ Details :
+*/
+#define TRANSPOSE8X4_SH_SH(in0, in1, in2, in3, out0, out1, out2, out3) { \
+ v8i16 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
+ \
+ ILVR_H2_SH(in1, in0, in3, in2, tmp0_m, tmp1_m); \
+ ILVL_H2_SH(in1, in0, in3, in2, tmp2_m, tmp3_m); \
+ ILVR_W2_SH(tmp1_m, tmp0_m, tmp3_m, tmp2_m, out0, out2); \
+ ILVL_W2_SH(tmp1_m, tmp0_m, tmp3_m, tmp2_m, out1, out3); \
+}
+
+/* Description : Transposes 8x8 block with half word elements in vectors
+ Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7
+ Outputs - out0, out1, out2, out3, out4, out5, out6, out7
+ Return Type - signed halfword
+ Details :
+*/
+#define TRANSPOSE8x8_H(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3, out4, out5, out6, out7) { \
+ v8i16 s0_m, s1_m; \
+ v8i16 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
+ v8i16 tmp4_m, tmp5_m, tmp6_m, tmp7_m; \
+ \
+ ILVR_H2_SH(in6, in4, in7, in5, s0_m, s1_m); \
+ ILVRL_H2_SH(s1_m, s0_m, tmp0_m, tmp1_m); \
+ ILVL_H2_SH(in6, in4, in7, in5, s0_m, s1_m); \
+ ILVRL_H2_SH(s1_m, s0_m, tmp2_m, tmp3_m); \
+ ILVR_H2_SH(in2, in0, in3, in1, s0_m, s1_m); \
+ ILVRL_H2_SH(s1_m, s0_m, tmp4_m, tmp5_m); \
+ ILVL_H2_SH(in2, in0, in3, in1, s0_m, s1_m); \
+ ILVRL_H2_SH(s1_m, s0_m, tmp6_m, tmp7_m); \
+ PCKEV_D4(RTYPE, tmp0_m, tmp4_m, tmp1_m, tmp5_m, tmp2_m, tmp6_m, \
+ tmp3_m, tmp7_m, out0, out2, out4, out6); \
+ out1 = (RTYPE)__msa_pckod_d((v2i64)tmp0_m, (v2i64)tmp4_m); \
+ out3 = (RTYPE)__msa_pckod_d((v2i64)tmp1_m, (v2i64)tmp5_m); \
+ out5 = (RTYPE)__msa_pckod_d((v2i64)tmp2_m, (v2i64)tmp6_m); \
+ out7 = (RTYPE)__msa_pckod_d((v2i64)tmp3_m, (v2i64)tmp7_m); \
+}
+#define TRANSPOSE8x8_SH_SH(...) TRANSPOSE8x8_H(v8i16, __VA_ARGS__)
+
+/* Description : Transposes 4x4 block with word elements in vectors
+ Arguments : Inputs - in0, in1, in2, in3
+ Outputs - out0, out1, out2, out3
+ Return Type - signed word
+ Details :
+*/
+#define TRANSPOSE4x4_SW_SW(in0, in1, in2, in3, out0, out1, out2, out3) { \
+ v4i32 s0_m, s1_m, s2_m, s3_m; \
+ \
+ ILVRL_W2_SW(in1, in0, s0_m, s1_m); \
+ ILVRL_W2_SW(in3, in2, s2_m, s3_m); \
+ \
+ out0 = (v4i32)__msa_ilvr_d((v2i64)s2_m, (v2i64)s0_m); \
+ out1 = (v4i32)__msa_ilvl_d((v2i64)s2_m, (v2i64)s0_m); \
+ out2 = (v4i32)__msa_ilvr_d((v2i64)s3_m, (v2i64)s1_m); \
+ out3 = (v4i32)__msa_ilvl_d((v2i64)s3_m, (v2i64)s1_m); \
+}
+
+/* Description : Add block 4x4
+ Arguments : Inputs - in0, in1, in2, in3, pdst, stride
+ Outputs -
+ Return Type - unsigned bytes
+ Details : Least significant 4 bytes from each input vector are added to
+ the destination bytes, clipped between 0-255 and then stored.
+*/
+#define ADDBLK_ST4x4_UB(in0, in1, in2, in3, pdst, stride) { \
+ uint32_t src0_m, src1_m, src2_m, src3_m; \
+ uint32_t out0_m, out1_m, out2_m, out3_m; \
+ v8i16 inp0_m, inp1_m, res0_m, res1_m; \
+ v16i8 dst0_m = { 0 }; \
+ v16i8 dst1_m = { 0 }; \
+ v16i8 zero_m = { 0 }; \
\
- out0_m = __msa_copy_u_d((v2i64)tmp0_m, 0); \
- out1_m = __msa_copy_u_d((v2i64)tmp0_m, 1); \
- out2_m = __msa_copy_u_d((v2i64)tmp1_m, 0); \
- out3_m = __msa_copy_u_d((v2i64)tmp1_m, 1); \
+ ILVR_D2_SH(in1, in0, in3, in2, inp0_m, inp1_m) \
+ LW4(pdst, stride, src0_m, src1_m, src2_m, src3_m); \
+ INSERT_W2_SB(src0_m, src1_m, dst0_m); \
+ INSERT_W2_SB(src2_m, src3_m, dst1_m); \
+ ILVR_B2_SH(zero_m, dst0_m, zero_m, dst1_m, res0_m, res1_m); \
+ ADD2(res0_m, inp0_m, res1_m, inp1_m, res0_m, res1_m); \
+ CLIP_SH2_0_255(res0_m, res1_m); \
+ PCKEV_B2_SB(res0_m, res0_m, res1_m, res1_m, dst0_m, dst1_m); \
\
- STORE_DWORD(dst_m, out0_m); \
- dst_m += stride; \
- STORE_DWORD(dst_m, out1_m); \
- dst_m += stride; \
- STORE_DWORD(dst_m, out2_m); \
- dst_m += stride; \
- STORE_DWORD(dst_m, out3_m); \
-}
-
-#define PCKEV_B_AVG_STORE_VEC(in1, in2, dst, pdest) { \
- v16u8 tmp_m; \
- \
- tmp_m = (v16u8)__msa_pckev_b((v16i8)(in1), (v16i8)(in2)); \
- tmp_m = __msa_aver_u_b(tmp_m, (v16u8)(dst)); \
- STORE_UB(tmp_m, (pdest)); \
-}
-
-/* Generic for Vector types and GP operations */
-#define BUTTERFLY_4(in0, in1, in2, in3, \
- out0, out1, out2, out3) { \
- out0 = (in0) + (in3); \
- out1 = (in1) + (in2); \
- \
- out2 = (in1) - (in2); \
- out3 = (in0) - (in3); \
-}
-
-/* Generic for Vector types and GP operations */
-#define BUTTERFLY_8(in0, in1, in2, in3, \
- in4, in5, in6, in7, \
- out0, out1, out2, out3, \
- out4, out5, out6, out7) { \
- out0 = (in0) + (in7); \
- out1 = (in1) + (in6); \
- out2 = (in2) + (in5); \
- out3 = (in3) + (in4); \
- \
- out4 = (in3) - (in4); \
- out5 = (in2) - (in5); \
- out6 = (in1) - (in6); \
- out7 = (in0) - (in7); \
-}
-#endif /* HAVE_MSA */
+ out0_m = __msa_copy_u_w((v4i32)dst0_m, 0); \
+ out1_m = __msa_copy_u_w((v4i32)dst0_m, 1); \
+ out2_m = __msa_copy_u_w((v4i32)dst1_m, 0); \
+ out3_m = __msa_copy_u_w((v4i32)dst1_m, 1); \
+ SW4(out0_m, out1_m, out2_m, out3_m, pdst, stride); \
+}
+
+/* Description : Pack even elements of input vectors & xor with 128
+ Arguments : Inputs - in0, in1
+ Outputs - out_m
+ Return Type - unsigned byte
+ Details : Signed byte even elements from 'in0' and 'in1' are packed
+ together in one vector and the resulting vector is xor'ed with
+ 128 to shift the range from signed to unsigned byte
+*/
+#define PCKEV_XORI128_UB(in0, in1) ({ \
+ v16u8 out_m; \
+ \
+ out_m = (v16u8)__msa_pckev_b((v16i8)in1, (v16i8)in0); \
+ out_m = (v16u8)__msa_xori_b((v16u8)out_m, 128); \
+ out_m; \
+})
+
+/* Description : Converts inputs to unsigned bytes, interleave, average & store
+ as 8x4 unsigned byte block
+ Arguments : Inputs - in0, in1, in2, in3, dst0, dst1, dst2, dst3,
+ pdst, stride
+*/
+#define CONVERT_UB_AVG_ST8x4_UB(in0, in1, in2, in3, \
+ dst0, dst1, dst2, dst3, pdst, stride) { \
+ v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
+ uint8_t *pdst_m = (uint8_t *)(pdst); \
+ \
+ tmp0_m = PCKEV_XORI128_UB(in0, in1); \
+ tmp1_m = PCKEV_XORI128_UB(in2, in3); \
+ ILVR_D2_UB(dst1, dst0, dst3, dst2, tmp2_m, tmp3_m); \
+ AVER_UB2_UB(tmp0_m, tmp2_m, tmp1_m, tmp3_m, tmp0_m, tmp1_m); \
+ ST8x4_UB(tmp0_m, tmp1_m, pdst_m, stride); \
+}
+
+/* Description : Pack even byte elements and store byte vector in destination
+ memory
+ Arguments : Inputs - in0, in1, pdst
+*/
+#define PCKEV_ST_SB(in0, in1, pdst) { \
+ v16i8 tmp_m; \
+ \
+ tmp_m = __msa_pckev_b((v16i8)in1, (v16i8)in0); \
+ ST_SB(tmp_m, (pdst)); \
+}
+
+/* Description : Horizontal 2 tap filter kernel code
+ Arguments : Inputs - in0, in1, mask, coeff, shift
+*/
+#define HORIZ_2TAP_FILT_UH(in0, in1, mask, coeff, shift) ({ \
+ v16i8 tmp0_m; \
+ v8u16 tmp1_m; \
+ \
+ tmp0_m = __msa_vshf_b((v16i8)mask, (v16i8)in1, (v16i8)in0); \
+ tmp1_m = __msa_dotp_u_h((v16u8)tmp0_m, (v16u8)coeff); \
+ tmp1_m = (v8u16)__msa_srari_h((v8i16)tmp1_m, shift); \
+ tmp1_m = __msa_sat_u_h(tmp1_m, shift); \
+ \
+ tmp1_m; \
+})
#endif /* VP9_COMMON_MIPS_MSA_VP9_MACROS_MSA_H_ */
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_mfqe_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_mfqe_msa.c
new file mode 100644
index 00000000000..64cb9a81835
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/mips/msa/vp9_mfqe_msa.c
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_onyxc_int.h"
+#include "vp9/common/mips/msa/vp9_macros_msa.h"
+
+static void filter_by_weight8x8_msa(const uint8_t *src_ptr, int32_t src_stride,
+ uint8_t *dst_ptr, int32_t dst_stride,
+ int32_t src_weight) {
+ int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight;
+ int32_t row;
+ uint64_t src0_d, src1_d, dst0_d, dst1_d;
+ v16i8 src0 = { 0 };
+ v16i8 src1 = { 0 };
+ v16i8 dst0 = { 0 };
+ v16i8 dst1 = { 0 };
+ v8i16 src_wt, dst_wt, res_h_r, res_h_l, src_r, src_l, dst_r, dst_l;
+
+ src_wt = __msa_fill_h(src_weight);
+ dst_wt = __msa_fill_h(dst_weight);
+
+ for (row = 2; row--;) {
+ LD2(src_ptr, src_stride, src0_d, src1_d);
+ src_ptr += (2 * src_stride);
+ LD2(dst_ptr, dst_stride, dst0_d, dst1_d);
+ INSERT_D2_SB(src0_d, src1_d, src0);
+ INSERT_D2_SB(dst0_d, dst1_d, dst0);
+
+ LD2(src_ptr, src_stride, src0_d, src1_d);
+ src_ptr += (2 * src_stride);
+ LD2((dst_ptr + 2 * dst_stride), dst_stride, dst0_d, dst1_d);
+ INSERT_D2_SB(src0_d, src1_d, src1);
+ INSERT_D2_SB(dst0_d, dst1_d, dst1);
+
+ UNPCK_UB_SH(src0, src_r, src_l);
+ UNPCK_UB_SH(dst0, dst_r, dst_l);
+ res_h_r = (src_r * src_wt);
+ res_h_r += (dst_r * dst_wt);
+ res_h_l = (src_l * src_wt);
+ res_h_l += (dst_l * dst_wt);
+ SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
+ dst0 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r);
+ ST8x2_UB(dst0, dst_ptr, dst_stride);
+ dst_ptr += (2 * dst_stride);
+
+ UNPCK_UB_SH(src1, src_r, src_l);
+ UNPCK_UB_SH(dst1, dst_r, dst_l);
+ res_h_r = (src_r * src_wt);
+ res_h_r += (dst_r * dst_wt);
+ res_h_l = (src_l * src_wt);
+ res_h_l += (dst_l * dst_wt);
+ SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
+ dst1 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r);
+ ST8x2_UB(dst1, dst_ptr, dst_stride);
+ dst_ptr += (2 * dst_stride);
+ }
+}
+
+static void filter_by_weight16x16_msa(const uint8_t *src_ptr,
+ int32_t src_stride,
+ uint8_t *dst_ptr,
+ int32_t dst_stride,
+ int32_t src_weight) {
+ int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight;
+ int32_t row;
+ v16i8 src0, src1, src2, src3, dst0, dst1, dst2, dst3;
+ v8i16 src_wt, dst_wt, res_h_r, res_h_l, src_r, src_l, dst_r, dst_l;
+
+ src_wt = __msa_fill_h(src_weight);
+ dst_wt = __msa_fill_h(dst_weight);
+
+ for (row = 4; row--;) {
+ LD_SB4(src_ptr, src_stride, src0, src1, src2, src3);
+ src_ptr += (4 * src_stride);
+ LD_SB4(dst_ptr, dst_stride, dst0, dst1, dst2, dst3);
+
+ UNPCK_UB_SH(src0, src_r, src_l);
+ UNPCK_UB_SH(dst0, dst_r, dst_l);
+ res_h_r = (src_r * src_wt);
+ res_h_r += (dst_r * dst_wt);
+ res_h_l = (src_l * src_wt);
+ res_h_l += (dst_l * dst_wt);
+ SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
+ PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
+ dst_ptr += dst_stride;
+
+ UNPCK_UB_SH(src1, src_r, src_l);
+ UNPCK_UB_SH(dst1, dst_r, dst_l);
+ res_h_r = (src_r * src_wt);
+ res_h_r += (dst_r * dst_wt);
+ res_h_l = (src_l * src_wt);
+ res_h_l += (dst_l * dst_wt);
+ SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
+ PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
+ dst_ptr += dst_stride;
+
+ UNPCK_UB_SH(src2, src_r, src_l);
+ UNPCK_UB_SH(dst2, dst_r, dst_l);
+ res_h_r = (src_r * src_wt);
+ res_h_r += (dst_r * dst_wt);
+ res_h_l = (src_l * src_wt);
+ res_h_l += (dst_l * dst_wt);
+ SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
+ PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
+ dst_ptr += dst_stride;
+
+ UNPCK_UB_SH(src3, src_r, src_l);
+ UNPCK_UB_SH(dst3, dst_r, dst_l);
+ res_h_r = (src_r * src_wt);
+ res_h_r += (dst_r * dst_wt);
+ res_h_l = (src_l * src_wt);
+ res_h_l += (dst_l * dst_wt);
+ SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
+ PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
+ dst_ptr += dst_stride;
+ }
+}
+
+void vp9_filter_by_weight8x8_msa(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ int src_weight) {
+ filter_by_weight8x8_msa(src, src_stride, dst, dst_stride, src_weight);
+}
+
+void vp9_filter_by_weight16x16_msa(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ int src_weight) {
+ filter_by_weight16x16_msa(src, src_stride, dst, dst_stride, src_weight);
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_alloccommon.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_alloccommon.c
index 222b88eff5d..8eda491de93 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_alloccommon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_alloccommon.c
@@ -11,6 +11,7 @@
#include "./vpx_config.h"
#include "vpx_mem/vpx_mem.h"
+#include "vp9/common/vp9_alloccommon.h"
#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_entropymv.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_blockd.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_blockd.h
index 018a9c2b975..64d379cab34 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_blockd.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_blockd.h
@@ -18,74 +18,28 @@
#include "vpx_scale/yv12config.h"
#include "vp9/common/vp9_common_data.h"
-#include "vp9/common/vp9_filter.h"
+#include "vp9/common/vp9_entropy.h"
+#include "vp9/common/vp9_entropymode.h"
#include "vp9/common/vp9_mv.h"
#include "vp9/common/vp9_scale.h"
+#include "vp9/common/vp9_seg_common.h"
#ifdef __cplusplus
extern "C" {
#endif
-#define BLOCK_SIZE_GROUPS 4
-#define SKIP_CONTEXTS 3
-#define INTER_MODE_CONTEXTS 7
-
-/* Segment Feature Masks */
-#define MAX_MV_REF_CANDIDATES 2
-
-#define INTRA_INTER_CONTEXTS 4
-#define COMP_INTER_CONTEXTS 5
-#define REF_CONTEXTS 5
-
-typedef enum {
- PLANE_TYPE_Y = 0,
- PLANE_TYPE_UV = 1,
- PLANE_TYPES
-} PLANE_TYPE;
-
#define MAX_MB_PLANE 3
-typedef char ENTROPY_CONTEXT;
-
-static INLINE int combine_entropy_contexts(ENTROPY_CONTEXT a,
- ENTROPY_CONTEXT b) {
- return (a != 0) + (b != 0);
-}
-
typedef enum {
KEY_FRAME = 0,
INTER_FRAME = 1,
FRAME_TYPES,
} FRAME_TYPE;
-typedef enum {
- DC_PRED, // Average of above and left pixels
- V_PRED, // Vertical
- H_PRED, // Horizontal
- D45_PRED, // Directional 45 deg = round(arctan(1/1) * 180/pi)
- D135_PRED, // Directional 135 deg = 180 - 45
- D117_PRED, // Directional 117 deg = 180 - 63
- D153_PRED, // Directional 153 deg = 180 - 27
- D207_PRED, // Directional 207 deg = 180 + 27
- D63_PRED, // Directional 63 deg = round(arctan(2/1) * 180/pi)
- TM_PRED, // True-motion
- NEARESTMV,
- NEARMV,
- ZEROMV,
- NEWMV,
- MB_MODE_COUNT
-} PREDICTION_MODE;
-
static INLINE int is_inter_mode(PREDICTION_MODE mode) {
return mode >= NEARESTMV && mode <= NEWMV;
}
-#define INTRA_MODES (TM_PRED + 1)
-
-#define INTER_MODES (1 + NEWMV - NEARESTMV)
-
-#define INTER_OFFSET(mode) ((mode) - NEARESTMV)
-
/* For keyframes, intra block modes are predicted by the (already decoded)
modes for the Y blocks to the left and above us; for interframes, there
is a single probability table. */
@@ -96,16 +50,16 @@ typedef struct {
} b_mode_info;
// Note that the rate-distortion optimization loop, bit-stream writer, and
-// decoder implementation modules critically rely on the enum entry values
+// decoder implementation modules critically rely on the defined entry values
// specified herein. They should be refactored concurrently.
-typedef enum {
- NONE = -1,
- INTRA_FRAME = 0,
- LAST_FRAME = 1,
- GOLDEN_FRAME = 2,
- ALTREF_FRAME = 3,
- MAX_REF_FRAMES = 4
-} MV_REFERENCE_FRAME;
+
+#define NONE -1
+#define INTRA_FRAME 0
+#define LAST_FRAME 1
+#define GOLDEN_FRAME 2
+#define ALTREF_FRAME 3
+#define MAX_REF_FRAMES 4
+typedef int8_t MV_REFERENCE_FRAME;
// This structure now relates to 8x8 block regions.
typedef struct {
@@ -121,12 +75,17 @@ typedef struct {
PREDICTION_MODE uv_mode;
// Only for INTER blocks
+ INTERP_FILTER interp_filter;
MV_REFERENCE_FRAME ref_frame[2];
+
+ // TODO(slavarnway): Delete and use bmi[3].as_mv[] instead.
int_mv mv[2];
+
+#if CONFIG_VP9_ENCODER
+ // TODO(slavarnway): Move to encoder
int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
uint8_t mode_context[MAX_REF_FRAMES];
- INTERP_FILTER interp_filter;
-
+#endif
} MB_MODE_INFO;
typedef struct MODE_INFO {
@@ -170,9 +129,12 @@ struct macroblockd_plane {
int subsampling_y;
struct buf_2d dst;
struct buf_2d pre[2];
- const int16_t *dequant;
ENTROPY_CONTEXT *above_context;
ENTROPY_CONTEXT *left_context;
+ int16_t seg_dequant[MAX_SEGMENTS][2];
+
+ // encoder
+ const int16_t *dequant;
};
#define BLOCK_OFFSET(x, i) ((x) + (i) * 16)
@@ -187,7 +149,7 @@ typedef struct RefBuffer {
typedef struct macroblockd {
struct macroblockd_plane plane[MAX_MB_PLANE];
-
+ FRAME_COUNTS *counts;
int mi_stride;
MODE_INFO **mi;
@@ -199,12 +161,17 @@ typedef struct macroblockd {
int up_available;
int left_available;
+ const vp9_prob (*partition_probs)[PARTITION_TYPES - 1];
+
/* Distance of MB away from frame edges */
int mb_to_left_edge;
int mb_to_right_edge;
int mb_to_top_edge;
int mb_to_bottom_edge;
+ FRAME_CONTEXT *fc;
+ int frame_parallel_decoding_mode;
+
/* pointers to reference frames */
RefBuffer *block_refs[2];
@@ -217,13 +184,9 @@ typedef struct macroblockd {
PARTITION_CONTEXT *above_seg_context;
PARTITION_CONTEXT left_seg_context[8];
- /* mc buffer */
- DECLARE_ALIGNED(16, uint8_t, mc_buf[80 * 2 * 80 * 2]);
-
#if CONFIG_VP9_HIGHBITDEPTH
/* Bit depth: 8, 10, 12 */
int bd;
- DECLARE_ALIGNED(16, uint16_t, mc_buf_high[80 * 2 * 80 * 2]);
#endif
/* dqcoeff are shared by all the planes. So planes must be decoded serially */
@@ -285,6 +248,27 @@ static INLINE BLOCK_SIZE get_plane_block_size(BLOCK_SIZE bsize,
return ss_size_lookup[bsize][pd->subsampling_x][pd->subsampling_y];
}
+static INLINE void reset_skip_context(MACROBLOCKD *xd, BLOCK_SIZE bsize) {
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; i++) {
+ struct macroblockd_plane *const pd = &xd->plane[i];
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
+ memset(pd->above_context, 0,
+ sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide_lookup[plane_bsize]);
+ memset(pd->left_context, 0,
+ sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high_lookup[plane_bsize]);
+ }
+}
+
+static INLINE const vp9_prob *get_y_mode_probs(const MODE_INFO *mi,
+ const MODE_INFO *above_mi,
+ const MODE_INFO *left_mi,
+ int block) {
+ const PREDICTION_MODE above = vp9_above_block_mode(mi, above_mi, block);
+ const PREDICTION_MODE left = vp9_left_block_mode(mi, left_mi, block);
+ return vp9_kf_y_mode_prob[above][left];
+}
+
typedef void (*foreach_transformed_block_visitor)(int plane, int block,
BLOCK_SIZE plane_bsize,
TX_SIZE tx_size,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_common.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_common.h
index d06b8e0405e..9c2d7791e75 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_common.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_common.h
@@ -27,12 +27,6 @@ extern "C" {
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
-#define ROUND_POWER_OF_TWO(value, n) \
- (((value) + (1 << ((n) - 1))) >> (n))
-
-#define ALIGN_POWER_OF_TWO(value, n) \
- (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1))
-
// Only need this for fixed-size arrays, for structs just assign.
#define vp9_copy(dest, src) { \
assert(sizeof(dest) == sizeof(src)); \
@@ -83,9 +77,6 @@ static INLINE uint16_t clip_pixel_highbd(int val, int bd) {
typedef int64_t tran_high_t;
typedef int32_t tran_low_t;
-#define CONVERT_TO_SHORTPTR(x) ((uint16_t*)(((uintptr_t)x) << 1))
-#define CONVERT_TO_BYTEPTR(x) ((uint8_t*)(((uintptr_t)x) >> 1 ))
-
#else
// Note:
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropy.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropy.c
index a2584e8da5b..ad6c04bcc46 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropy.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropy.c
@@ -133,12 +133,6 @@ const uint8_t vp9_pt_energy_class[ENTROPY_TOKENS] = {
0, 1, 2, 3, 3, 4, 4, 5, 5, 5, 5, 5
};
-const vp9_tree_index vp9_coefmodel_tree[TREE_SIZE(UNCONSTRAINED_NODES + 1)] = {
- -EOB_MODEL_TOKEN, 2,
- -ZERO_TOKEN, 4,
- -ONE_TOKEN, -TWO_TOKEN,
-};
-
// Model obtained from a 2-sided zero-centerd distribuition derived
// from a Pareto distribution. The cdf of the distribution is:
// cdf(x) = 0.5 + 0.5 * sgn(x) * [1 - {alpha/(alpha + |x|)} ^ beta]
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropy.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropy.h
index 5a9007b5417..2fc97c3f9ed 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropy.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropy.h
@@ -14,8 +14,8 @@
#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_common.h"
+#include "vp9/common/vp9_enums.h"
#include "vp9/common/vp9_prob.h"
-#include "vp9/common/vp9_scan.h"
#ifdef __cplusplus
extern "C" {
@@ -74,7 +74,6 @@ DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat6_prob_high12[18]);
#endif // CONFIG_VP9_HIGHBITDEPTH
#define EOB_MODEL_TOKEN 3
-extern const vp9_tree_index vp9_coefmodel_tree[];
typedef struct {
const vp9_tree_index *tree;
@@ -137,18 +136,6 @@ struct VP9Common;
void vp9_default_coef_probs(struct VP9Common *cm);
void vp9_adapt_coef_probs(struct VP9Common *cm);
-static INLINE void reset_skip_context(MACROBLOCKD *xd, BLOCK_SIZE bsize) {
- int i;
- for (i = 0; i < MAX_MB_PLANE; i++) {
- struct macroblockd_plane *const pd = &xd->plane[i];
- const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
- memset(pd->above_context, 0,
- sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide_lookup[plane_bsize]);
- memset(pd->left_context, 0,
- sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high_lookup[plane_bsize]);
- }
-}
-
// This is the index in the scan order beyond which all coefficients for
// 8x8 transform and above are in the top band.
// This macro is currently unused but may be used by certain implementations
@@ -185,6 +172,13 @@ typedef unsigned int vp9_coeff_count_model[REF_TYPES][COEF_BANDS]
void vp9_model_to_full_probs(const vp9_prob *model, vp9_prob *full);
+typedef char ENTROPY_CONTEXT;
+
+static INLINE int combine_entropy_contexts(ENTROPY_CONTEXT a,
+ ENTROPY_CONTEXT b) {
+ return (a != 0) + (b != 0);
+}
+
static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
const ENTROPY_CONTEXT *l) {
ENTROPY_CONTEXT above_ec = 0, left_ec = 0;
@@ -214,18 +208,6 @@ static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
return combine_entropy_contexts(above_ec, left_ec);
}
-static INLINE const scan_order *get_scan(const MACROBLOCKD *xd, TX_SIZE tx_size,
- PLANE_TYPE type, int block_idx) {
- const MODE_INFO *const mi = xd->mi[0];
-
- if (is_inter_block(&mi->mbmi) || type != PLANE_TYPE_Y || xd->lossless) {
- return &vp9_default_scan_orders[tx_size];
- } else {
- const PREDICTION_MODE mode = get_y_mode(mi, block_idx);
- return &vp9_scan_orders[tx_size][intra_mode_to_tx_type_lookup[mode]];
- }
-}
-
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymode.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymode.c
index 424451fee39..22d431bb22d 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymode.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymode.c
@@ -314,7 +314,7 @@ static const vp9_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS]
{ 149, 144, },
};
-void vp9_init_mode_probs(FRAME_CONTEXT *fc) {
+static void init_mode_probs(FRAME_CONTEXT *fc) {
vp9_copy(fc->uv_mode_prob, default_if_uv_probs);
vp9_copy(fc->y_mode_prob, default_if_y_probs);
vp9_copy(fc->switchable_interp_prob, default_switchable_interp_prob);
@@ -444,7 +444,7 @@ void vp9_setup_past_independence(VP9_COMMON *cm) {
lf->last_sharpness_level = -1;
vp9_default_coef_probs(cm);
- vp9_init_mode_probs(cm->fc);
+ init_mode_probs(cm->fc);
vp9_init_mv_probs(cm);
cm->fc->initialized = 1;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymode.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymode.h
index f4e20e1af8b..8c9e6a7319d 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymode.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymode.h
@@ -11,7 +11,7 @@
#ifndef VP9_COMMON_VP9_ENTROPYMODE_H_
#define VP9_COMMON_VP9_ENTROPYMODE_H_
-#include "vp9/common/vp9_blockd.h"
+#include "vp9/common/vp9_filter.h"
#include "vp9/common/vp9_entropy.h"
#include "vp9/common/vp9_entropymv.h"
@@ -19,8 +19,12 @@
extern "C" {
#endif
+#define BLOCK_SIZE_GROUPS 4
+
#define TX_SIZE_CONTEXTS 2
+#define INTER_OFFSET(mode) ((mode) - NEARESTMV)
+
struct VP9Common;
struct tx_probs {
@@ -86,8 +90,6 @@ extern const vp9_tree_index vp9_switchable_interp_tree
void vp9_setup_past_independence(struct VP9Common *cm);
-void vp9_init_mode_probs(FRAME_CONTEXT *fc);
-
void vp9_adapt_mode_probs(struct VP9Common *cm);
void tx_counts_to_branch_counts_32x32(const unsigned int *tx_count_32x32p,
@@ -97,15 +99,6 @@ void tx_counts_to_branch_counts_16x16(const unsigned int *tx_count_16x16p,
void tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p,
unsigned int (*ct_8x8p)[2]);
-static INLINE const vp9_prob *get_y_mode_probs(const MODE_INFO *mi,
- const MODE_INFO *above_mi,
- const MODE_INFO *left_mi,
- int block) {
- const PREDICTION_MODE above = vp9_above_block_mode(mi, above_mi, block);
- const PREDICTION_MODE left = vp9_left_block_mode(mi, left_mi, block);
- return vp9_kf_y_mode_prob[above][left];
-}
-
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_enums.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_enums.h
index 7938fc10a11..d089f23f970 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_enums.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_enums.h
@@ -12,6 +12,7 @@
#define VP9_COMMON_VP9_ENUMS_H_
#include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
#ifdef __cplusplus
extern "C" {
@@ -40,23 +41,22 @@ typedef enum BITSTREAM_PROFILE {
MAX_PROFILES
} BITSTREAM_PROFILE;
-typedef enum BLOCK_SIZE {
- BLOCK_4X4,
- BLOCK_4X8,
- BLOCK_8X4,
- BLOCK_8X8,
- BLOCK_8X16,
- BLOCK_16X8,
- BLOCK_16X16,
- BLOCK_16X32,
- BLOCK_32X16,
- BLOCK_32X32,
- BLOCK_32X64,
- BLOCK_64X32,
- BLOCK_64X64,
- BLOCK_SIZES,
- BLOCK_INVALID = BLOCK_SIZES
-} BLOCK_SIZE;
+#define BLOCK_4X4 0
+#define BLOCK_4X8 1
+#define BLOCK_8X4 2
+#define BLOCK_8X8 3
+#define BLOCK_8X16 4
+#define BLOCK_16X8 5
+#define BLOCK_16X16 6
+#define BLOCK_16X32 7
+#define BLOCK_32X16 8
+#define BLOCK_32X32 9
+#define BLOCK_32X64 10
+#define BLOCK_64X32 11
+#define BLOCK_64X64 12
+#define BLOCK_SIZES 13
+#define BLOCK_INVALID BLOCK_SIZES
+typedef uint8_t BLOCK_SIZE;
typedef enum PARTITION_TYPE {
PARTITION_NONE,
@@ -72,13 +72,12 @@ typedef char PARTITION_CONTEXT;
#define PARTITION_CONTEXTS (4 * PARTITION_PLOFFSET)
// block transform size
-typedef enum {
- TX_4X4 = 0, // 4x4 transform
- TX_8X8 = 1, // 8x8 transform
- TX_16X16 = 2, // 16x16 transform
- TX_32X32 = 3, // 32x32 transform
- TX_SIZES
-} TX_SIZE;
+typedef uint8_t TX_SIZE;
+#define TX_4X4 ((TX_SIZE)0) // 4x4 transform
+#define TX_8X8 ((TX_SIZE)1) // 8x8 transform
+#define TX_16X16 ((TX_SIZE)2) // 16x16 transform
+#define TX_32X32 ((TX_SIZE)3) // 32x32 transform
+#define TX_SIZES ((TX_SIZE)4)
// frame transform mode
typedef enum {
@@ -104,6 +103,43 @@ typedef enum {
VP9_ALT_FLAG = 1 << 2,
} VP9_REFFRAME;
+typedef enum {
+ PLANE_TYPE_Y = 0,
+ PLANE_TYPE_UV = 1,
+ PLANE_TYPES
+} PLANE_TYPE;
+
+#define DC_PRED 0 // Average of above and left pixels
+#define V_PRED 1 // Vertical
+#define H_PRED 2 // Horizontal
+#define D45_PRED 3 // Directional 45 deg = round(arctan(1/1) * 180/pi)
+#define D135_PRED 4 // Directional 135 deg = 180 - 45
+#define D117_PRED 5 // Directional 117 deg = 180 - 63
+#define D153_PRED 6 // Directional 153 deg = 180 - 27
+#define D207_PRED 7 // Directional 207 deg = 180 + 27
+#define D63_PRED 8 // Directional 63 deg = round(arctan(2/1) * 180/pi)
+#define TM_PRED 9 // True-motion
+#define NEARESTMV 10
+#define NEARMV 11
+#define ZEROMV 12
+#define NEWMV 13
+#define MB_MODE_COUNT 14
+typedef uint8_t PREDICTION_MODE;
+
+#define INTRA_MODES (TM_PRED + 1)
+
+#define INTER_MODES (1 + NEWMV - NEARESTMV)
+
+#define SKIP_CONTEXTS 3
+#define INTER_MODE_CONTEXTS 7
+
+/* Segment Feature Masks */
+#define MAX_MV_REF_CANDIDATES 2
+
+#define INTRA_INTER_CONTEXTS 4
+#define COMP_INTER_CONTEXTS 5
+#define REF_CONTEXTS 5
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.c
index afcdf22ec63..14654a5ab2a 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.c
@@ -12,7 +12,8 @@
#include "vp9/common/vp9_filter.h"
-const InterpKernel vp9_bilinear_filters[SUBPEL_SHIFTS] = {
+DECLARE_ALIGNED(256, static const InterpKernel,
+ bilinear_filters[SUBPEL_SHIFTS]) = {
{ 0, 0, 0, 128, 0, 0, 0, 0 },
{ 0, 0, 0, 120, 8, 0, 0, 0 },
{ 0, 0, 0, 112, 16, 0, 0, 0 },
@@ -32,8 +33,8 @@ const InterpKernel vp9_bilinear_filters[SUBPEL_SHIFTS] = {
};
// Lagrangian interpolation filter
-DECLARE_ALIGNED(256, const InterpKernel,
- vp9_sub_pel_filters_8[SUBPEL_SHIFTS]) = {
+DECLARE_ALIGNED(256, static const InterpKernel,
+ sub_pel_filters_8[SUBPEL_SHIFTS]) = {
{ 0, 0, 0, 128, 0, 0, 0, 0},
{ 0, 1, -5, 126, 8, -3, 1, 0},
{ -1, 3, -10, 122, 18, -6, 2, 0},
@@ -53,8 +54,8 @@ DECLARE_ALIGNED(256, const InterpKernel,
};
// DCT based filter
-DECLARE_ALIGNED(256, const InterpKernel,
- vp9_sub_pel_filters_8s[SUBPEL_SHIFTS]) = {
+DECLARE_ALIGNED(256, static const InterpKernel,
+ sub_pel_filters_8s[SUBPEL_SHIFTS]) = {
{0, 0, 0, 128, 0, 0, 0, 0},
{-1, 3, -7, 127, 8, -3, 1, 0},
{-2, 5, -13, 125, 17, -6, 3, -1},
@@ -74,8 +75,8 @@ DECLARE_ALIGNED(256, const InterpKernel,
};
// freqmultiplier = 0.5
-DECLARE_ALIGNED(256, const InterpKernel,
- vp9_sub_pel_filters_8lp[SUBPEL_SHIFTS]) = {
+DECLARE_ALIGNED(256, static const InterpKernel,
+ sub_pel_filters_8lp[SUBPEL_SHIFTS]) = {
{ 0, 0, 0, 128, 0, 0, 0, 0},
{-3, -1, 32, 64, 38, 1, -3, 0},
{-2, -2, 29, 63, 41, 2, -3, 0},
@@ -95,15 +96,15 @@ DECLARE_ALIGNED(256, const InterpKernel,
};
-static const InterpKernel* vp9_filter_kernels[4] = {
- vp9_sub_pel_filters_8,
- vp9_sub_pel_filters_8lp,
- vp9_sub_pel_filters_8s,
- vp9_bilinear_filters
+static const InterpKernel* filter_kernels[4] = {
+ sub_pel_filters_8,
+ sub_pel_filters_8lp,
+ sub_pel_filters_8s,
+ bilinear_filters
};
const InterpKernel *vp9_get_interp_kernel(INTERP_FILTER filter) {
assert(filter != SWITCHABLE);
- return vp9_filter_kernels[filter];
+ return filter_kernels[filter];
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.h
index d963ee23569..13d38affbaa 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_filter.h
@@ -27,30 +27,21 @@ extern "C" {
#define SUBPEL_SHIFTS (1 << SUBPEL_BITS)
#define SUBPEL_TAPS 8
-typedef enum {
- EIGHTTAP = 0,
- EIGHTTAP_SMOOTH = 1,
- EIGHTTAP_SHARP = 2,
- SWITCHABLE_FILTERS = 3, /* Number of switchable filters */
- BILINEAR = 3,
- // The codec can operate in four possible inter prediction filter mode:
- // 8-tap, 8-tap-smooth, 8-tap-sharp, and switching between the three.
- SWITCHABLE_FILTER_CONTEXTS = SWITCHABLE_FILTERS + 1,
- SWITCHABLE = 4 /* should be the last one */
-} INTERP_FILTER;
+#define EIGHTTAP 0
+#define EIGHTTAP_SMOOTH 1
+#define EIGHTTAP_SHARP 2
+#define SWITCHABLE_FILTERS 3 /* Number of switchable filters */
+#define BILINEAR 3
+// The codec can operate in four possible inter prediction filter mode:
+// 8-tap, 8-tap-smooth, 8-tap-sharp, and switching between the three.
+#define SWITCHABLE_FILTER_CONTEXTS (SWITCHABLE_FILTERS + 1)
+#define SWITCHABLE 4 /* should be the last one */
+typedef uint8_t INTERP_FILTER;
typedef int16_t InterpKernel[SUBPEL_TAPS];
const InterpKernel *vp9_get_interp_kernel(INTERP_FILTER filter);
-DECLARE_ALIGNED(256, extern const InterpKernel,
- vp9_bilinear_filters[SUBPEL_SHIFTS]);
-
-// The VP9_BILINEAR_FILTERS_2TAP macro returns a pointer to the bilinear
-// filter kernel as a 2 tap filter.
-#define BILINEAR_FILTERS_2TAP(x) \
- (vp9_bilinear_filters[(x)] + SUBPEL_TAPS/2 - 1)
-
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_idct.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_idct.c
index 3b214371c47..174b96e21ad 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_idct.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_idct.c
@@ -11,6 +11,7 @@
#include <math.h>
#include "./vp9_rtcd.h"
+#include "vpx_ports/mem.h"
#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_idct.h"
#include "vp9/common/vp9_systemdependent.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_idct.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_idct.h
index 6e2551dd4bc..cee1682a67f 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_idct.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_idct.h
@@ -14,6 +14,7 @@
#include <assert.h>
#include "./vpx_config.h"
+#include "vpx_ports/mem.h"
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_enums.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter.c
index 69d393ef469..9816728364a 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter.c
@@ -13,6 +13,7 @@
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/common/vp9_reconinter.h"
#include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
#include "vp9/common/vp9_seg_common.h"
@@ -266,8 +267,8 @@ void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) {
for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) {
int lvl_seg = default_filt_lvl;
- if (vp9_segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) {
- const int data = vp9_get_segdata(seg, seg_id, SEG_LVL_ALT_LF);
+ if (segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) {
+ const int data = get_segdata(seg, seg_id, SEG_LVL_ALT_LF);
lvl_seg = clamp(seg->abs_delta == SEGMENT_ABSDATA ?
data : default_filt_lvl + data,
0, MAX_LOOP_FILTER);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter_filters.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter_filters.c
index 2e32c40b85e..3cf4c32253e 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter_filters.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter_filters.c
@@ -9,6 +9,7 @@
*/
#include "./vpx_config.h"
+#include "vpx_ports/mem.h"
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_loopfilter.h"
#include "vp9/common/vp9_onyxc_int.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_mfqe.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_mfqe.c
index 57189df16ec..bebb37eda07 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_mfqe.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_mfqe.c
@@ -171,13 +171,13 @@ static void mfqe_block(BLOCK_SIZE bs, const uint8_t *y, const uint8_t *u,
get_thr(bs, qdiff, &sad_thr, &vdiff_thr);
if (bs == BLOCK_16X16) {
- vdiff = (vp9_variance16x16(y, y_stride, yd, yd_stride, &sse) + 128) >> 8;
+ vdiff = (vpx_variance16x16(y, y_stride, yd, yd_stride, &sse) + 128) >> 8;
sad = (vpx_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8;
} else if (bs == BLOCK_32X32) {
- vdiff = (vp9_variance32x32(y, y_stride, yd, yd_stride, &sse) + 512) >> 10;
+ vdiff = (vpx_variance32x32(y, y_stride, yd, yd_stride, &sse) + 512) >> 10;
sad = (vpx_sad32x32(y, y_stride, yd, yd_stride) + 512) >> 10;
} else /* if (bs == BLOCK_64X64) */ {
- vdiff = (vp9_variance64x64(y, y_stride, yd, yd_stride, &sse) + 2048) >> 12;
+ vdiff = (vpx_variance64x64(y, y_stride, yd, yd_stride, &sse) + 2048) >> 12;
sad = (vpx_sad64x64(y, y_stride, yd, yd_stride) + 2048) >> 12;
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_mvref_common.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_mvref_common.c
index 51e147e0056..5f8ee0fcc50 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_mvref_common.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_mvref_common.c
@@ -18,7 +18,8 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
int_mv *mv_ref_list,
int block, int mi_row, int mi_col,
- find_mv_refs_sync sync, void *const data) {
+ find_mv_refs_sync sync, void *const data,
+ uint8_t *mode_context) {
const int *ref_sign_bias = cm->ref_frame_sign_bias;
int i, refmv_count = 0;
const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];
@@ -138,7 +139,7 @@ static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd,
Done:
- mi->mbmi.mode_context[ref_frame] = counter_to_context[context_counter];
+ mode_context[ref_frame] = counter_to_context[context_counter];
// Clamp vectors
for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i)
@@ -150,9 +151,10 @@ void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
int_mv *mv_ref_list,
int mi_row, int mi_col,
- find_mv_refs_sync sync, void *const data) {
+ find_mv_refs_sync sync, void *const data,
+ uint8_t *mode_context) {
find_mv_refs_idx(cm, xd, tile, mi, ref_frame, mv_ref_list, -1,
- mi_row, mi_col, sync, data);
+ mi_row, mi_col, sync, data, mode_context);
}
static void lower_mv_precision(MV *mv, int allow_hp) {
@@ -181,7 +183,8 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, int allow_hp,
void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
const TileInfo *const tile,
int block, int ref, int mi_row, int mi_col,
- int_mv *nearest_mv, int_mv *near_mv) {
+ int_mv *nearest_mv, int_mv *near_mv,
+ uint8_t *mode_context) {
int_mv mv_list[MAX_MV_REF_CANDIDATES];
MODE_INFO *const mi = xd->mi[0];
b_mode_info *bmi = mi->bmi;
@@ -190,7 +193,7 @@ void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
assert(MAX_MV_REF_CANDIDATES == 2);
find_mv_refs_idx(cm, xd, tile, mi, mi->mbmi.ref_frame[ref], mv_list, block,
- mi_row, mi_col, NULL, NULL);
+ mi_row, mi_col, NULL, NULL, mode_context);
near_mv->as_int = 0;
switch (block) {
@@ -223,6 +226,6 @@ void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
break;
}
default:
- assert("Invalid block index.");
+ assert(0 && "Invalid block index.");
}
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_mvref_common.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_mvref_common.h
index f1df521468f..621dc14be65 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_mvref_common.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_mvref_common.h
@@ -212,7 +212,8 @@ void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
const TileInfo *const tile,
MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
int_mv *mv_ref_list, int mi_row, int mi_col,
- find_mv_refs_sync sync, void *const data);
+ find_mv_refs_sync sync, void *const data,
+ uint8_t *mode_context);
// check a list of motion vectors by sad score using a number rows of pixels
// above and a number cols of pixels in the left to select the one with best
@@ -223,7 +224,8 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, int allow_hp,
void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd,
const TileInfo *const tile,
int block, int ref, int mi_row, int mi_col,
- int_mv *nearest_mv, int_mv *near_mv);
+ int_mv *nearest_mv, int_mv *near_mv,
+ uint8_t *mode_context);
#ifdef __cplusplus
} // extern "C"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_onyxc_int.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_onyxc_int.h
index f710f810618..1811d76dfd3 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_onyxc_int.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_onyxc_int.h
@@ -14,6 +14,7 @@
#include "./vpx_config.h"
#include "vpx/internal/vpx_codec_internal.h"
#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_alloccommon.h"
#include "vp9/common/vp9_loopfilter.h"
#include "vp9/common/vp9_entropymv.h"
#include "vp9/common/vp9_entropy.h"
@@ -161,7 +162,8 @@ typedef struct VP9Common {
int show_existing_frame;
// Flag signaling that the frame is encoded using only INTRA modes.
- int intra_only;
+ uint8_t intra_only;
+ uint8_t last_intra_only;
int allow_high_precision_mv;
@@ -263,6 +265,7 @@ typedef struct VP9Common {
int log2_tile_cols, log2_tile_rows;
int byte_alignment;
+ int skip_loop_filter;
// Private data associated with the frame buffer callbacks.
void *cb_priv;
@@ -307,8 +310,13 @@ static INLINE int get_free_fb(VP9_COMMON *cm) {
if (frame_bufs[i].ref_count == 0)
break;
- assert(i < FRAME_BUFFERS);
- frame_bufs[i].ref_count = 1;
+ if (i != FRAME_BUFFERS) {
+ frame_bufs[i].ref_count = 1;
+ } else {
+ // Reset i to be INVALID_IDX to indicate no free buffer found.
+ i = INVALID_IDX;
+ }
+
unlock_buffer_pool(cm->buffer_pool);
return i;
}
@@ -328,6 +336,18 @@ static INLINE int mi_cols_aligned_to_sb(int n_mis) {
return ALIGN_POWER_OF_TWO(n_mis, MI_BLOCK_SIZE_LOG2);
}
+static INLINE int frame_is_intra_only(const VP9_COMMON *const cm) {
+ return cm->frame_type == KEY_FRAME || cm->intra_only;
+}
+
+static INLINE void set_partition_probs(const VP9_COMMON *const cm,
+ MACROBLOCKD *const xd) {
+ xd->partition_probs =
+ frame_is_intra_only(cm) ?
+ &vp9_kf_partition_probs[0] :
+ (const vp9_prob (*)[PARTITION_TYPES - 1])cm->fc->partition_prob;
+}
+
static INLINE void init_macroblockd(VP9_COMMON *cm, MACROBLOCKD *xd) {
int i;
@@ -335,21 +355,26 @@ static INLINE void init_macroblockd(VP9_COMMON *cm, MACROBLOCKD *xd) {
xd->plane[i].dqcoeff = xd->dqcoeff;
xd->above_context[i] = cm->above_context +
i * sizeof(*cm->above_context) * 2 * mi_cols_aligned_to_sb(cm->mi_cols);
+
+ if (xd->plane[i].plane_type == PLANE_TYPE_Y) {
+ memcpy(xd->plane[i].seg_dequant, cm->y_dequant, sizeof(cm->y_dequant));
+ } else {
+ memcpy(xd->plane[i].seg_dequant, cm->uv_dequant, sizeof(cm->uv_dequant));
+ }
+ xd->fc = cm->fc;
+ xd->frame_parallel_decoding_mode = cm->frame_parallel_decoding_mode;
}
xd->above_seg_context = cm->above_seg_context;
xd->mi_stride = cm->mi_stride;
xd->error_info = &cm->error;
-}
-static INLINE int frame_is_intra_only(const VP9_COMMON *const cm) {
- return cm->frame_type == KEY_FRAME || cm->intra_only;
+ set_partition_probs(cm, xd);
}
-static INLINE const vp9_prob* get_partition_probs(const VP9_COMMON *cm,
+static INLINE const vp9_prob* get_partition_probs(const MACROBLOCKD *xd,
int ctx) {
- return frame_is_intra_only(cm) ? vp9_kf_partition_probs[ctx]
- : cm->fc->partition_prob[ctx];
+ return xd->partition_probs[ctx];
}
static INLINE void set_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col) {
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_postproc.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_postproc.c
index 983a4744dd6..d26a6eb5c88 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_postproc.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_postproc.c
@@ -16,12 +16,10 @@
#include "./vpx_scale_rtcd.h"
#include "./vp9_rtcd.h"
+#include "vpx_ports/mem.h"
#include "vpx_scale/vpx_scale.h"
#include "vpx_scale/yv12config.h"
-#if CONFIG_VP9_HIGHBITDEPTH
-#include "vp9/common/vp9_common.h"
-#endif
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/common/vp9_postproc.h"
#include "vp9/common/vp9_systemdependent.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_quant_common.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_quant_common.c
index 564a3eb0ce3..d83f3c1a2f5 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_quant_common.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_quant_common.c
@@ -266,8 +266,8 @@ int16_t vp9_ac_quant(int qindex, int delta, vpx_bit_depth_t bit_depth) {
int vp9_get_qindex(const struct segmentation *seg, int segment_id,
int base_qindex) {
- if (vp9_segfeature_active(seg, segment_id, SEG_LVL_ALT_Q)) {
- const int data = vp9_get_segdata(seg, segment_id, SEG_LVL_ALT_Q);
+ if (segfeature_active(seg, segment_id, SEG_LVL_ALT_Q)) {
+ const int data = get_segdata(seg, segment_id, SEG_LVL_ALT_Q);
const int seg_qindex = seg->abs_delta == SEGMENT_ABSDATA ?
data : base_qindex + data;
return clamp(seg_qindex, 0, MAXQ);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_reconintra.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_reconintra.c
index 825d03d69b2..1e9acb8d5d3 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_reconintra.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_reconintra.c
@@ -12,6 +12,7 @@
#include "./vp9_rtcd.h"
#include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
#include "vpx_ports/vpx_once.h"
#include "vp9/common/vp9_reconintra.h"
@@ -78,6 +79,15 @@ static const uint8_t extend_modes[INTRA_MODES] = {
intra_pred_highbd_sized(type, 16) \
intra_pred_highbd_sized(type, 32)
+#define intra_pred_no_4x4(type) \
+ intra_pred_sized(type, 8) \
+ intra_pred_sized(type, 16) \
+ intra_pred_sized(type, 32) \
+ intra_pred_highbd_sized(type, 4) \
+ intra_pred_highbd_sized(type, 8) \
+ intra_pred_highbd_sized(type, 16) \
+ intra_pred_highbd_sized(type, 32)
+
#else
#define intra_pred_allsizes(type) \
@@ -85,8 +95,17 @@ static const uint8_t extend_modes[INTRA_MODES] = {
intra_pred_sized(type, 8) \
intra_pred_sized(type, 16) \
intra_pred_sized(type, 32)
+
+#define intra_pred_no_4x4(type) \
+ intra_pred_sized(type, 8) \
+ intra_pred_sized(type, 16) \
+ intra_pred_sized(type, 32)
#endif // CONFIG_VP9_HIGHBITDEPTH
+#define DST(x, y) dst[(x) + (y) * stride]
+#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2)
+#define AVG2(a, b) (((a) + (b) + 1) >> 1)
+
#if CONFIG_VP9_HIGHBITDEPTH
static INLINE void highbd_d207_predictor(uint16_t *dst, ptrdiff_t stride,
int bs, const uint16_t *above,
@@ -97,18 +116,16 @@ static INLINE void highbd_d207_predictor(uint16_t *dst, ptrdiff_t stride,
// First column.
for (r = 0; r < bs - 1; ++r) {
- dst[r * stride] = ROUND_POWER_OF_TWO(left[r] + left[r + 1], 1);
+ dst[r * stride] = AVG2(left[r], left[r + 1]);
}
dst[(bs - 1) * stride] = left[bs - 1];
dst++;
// Second column.
for (r = 0; r < bs - 2; ++r) {
- dst[r * stride] = ROUND_POWER_OF_TWO(left[r] + left[r + 1] * 2 +
- left[r + 2], 2);
+ dst[r * stride] = AVG3(left[r], left[r + 1], left[r + 2]);
}
- dst[(bs - 2) * stride] = ROUND_POWER_OF_TWO(left[bs - 2] +
- left[bs - 1] * 3, 2);
+ dst[(bs - 2) * stride] = AVG3(left[bs - 2], left[bs - 1], left[bs - 1]);
dst[(bs - 1) * stride] = left[bs - 1];
dst++;
@@ -130,11 +147,9 @@ static INLINE void highbd_d63_predictor(uint16_t *dst, ptrdiff_t stride,
(void) bd;
for (r = 0; r < bs; ++r) {
for (c = 0; c < bs; ++c) {
- dst[c] = r & 1 ? ROUND_POWER_OF_TWO(above[r/2 + c] +
- above[r/2 + c + 1] * 2 +
- above[r/2 + c + 2], 2)
- : ROUND_POWER_OF_TWO(above[r/2 + c] +
- above[r/2 + c + 1], 1);
+ dst[c] = r & 1 ? AVG3(above[(r >> 1) + c], above[(r >> 1) + c + 1],
+ above[(r >> 1) + c + 2])
+ : AVG2(above[(r >> 1) + c], above[(r >> 1) + c + 1]);
}
dst += stride;
}
@@ -148,9 +163,8 @@ static INLINE void highbd_d45_predictor(uint16_t *dst, ptrdiff_t stride, int bs,
(void) bd;
for (r = 0; r < bs; ++r) {
for (c = 0; c < bs; ++c) {
- dst[c] = r + c + 2 < bs * 2 ? ROUND_POWER_OF_TWO(above[r + c] +
- above[r + c + 1] * 2 +
- above[r + c + 2], 2)
+ dst[c] = r + c + 2 < bs * 2 ? AVG3(above[r + c], above[r + c + 1],
+ above[r + c + 2])
: above[bs * 2 - 1];
}
dst += stride;
@@ -165,20 +179,19 @@ static INLINE void highbd_d117_predictor(uint16_t *dst, ptrdiff_t stride,
// first row
for (c = 0; c < bs; c++)
- dst[c] = ROUND_POWER_OF_TWO(above[c - 1] + above[c], 1);
+ dst[c] = AVG2(above[c - 1], above[c]);
dst += stride;
// second row
- dst[0] = ROUND_POWER_OF_TWO(left[0] + above[-1] * 2 + above[0], 2);
+ dst[0] = AVG3(left[0], above[-1], above[0]);
for (c = 1; c < bs; c++)
- dst[c] = ROUND_POWER_OF_TWO(above[c - 2] + above[c - 1] * 2 + above[c], 2);
+ dst[c] = AVG3(above[c - 2], above[c - 1], above[c]);
dst += stride;
// the rest of first col
- dst[0] = ROUND_POWER_OF_TWO(above[-1] + left[0] * 2 + left[1], 2);
+ dst[0] = AVG3(above[-1], left[0], left[1]);
for (r = 3; r < bs; ++r)
- dst[(r - 2) * stride] = ROUND_POWER_OF_TWO(left[r - 3] + left[r - 2] * 2 +
- left[r - 1], 2);
+ dst[(r - 2) * stride] = AVG3(left[r - 3], left[r - 2], left[r - 1]);
// the rest of the block
for (r = 2; r < bs; ++r) {
@@ -193,14 +206,13 @@ static INLINE void highbd_d135_predictor(uint16_t *dst, ptrdiff_t stride,
const uint16_t *left, int bd) {
int r, c;
(void) bd;
- dst[0] = ROUND_POWER_OF_TWO(left[0] + above[-1] * 2 + above[0], 2);
+ dst[0] = AVG3(left[0], above[-1], above[0]);
for (c = 1; c < bs; c++)
- dst[c] = ROUND_POWER_OF_TWO(above[c - 2] + above[c - 1] * 2 + above[c], 2);
+ dst[c] = AVG3(above[c - 2], above[c - 1], above[c]);
- dst[stride] = ROUND_POWER_OF_TWO(above[-1] + left[0] * 2 + left[1], 2);
+ dst[stride] = AVG3(above[-1], left[0], left[1]);
for (r = 2; r < bs; ++r)
- dst[r * stride] = ROUND_POWER_OF_TWO(left[r - 2] + left[r - 1] * 2 +
- left[r], 2);
+ dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]);
dst += stride;
for (r = 1; r < bs; ++r) {
@@ -215,20 +227,19 @@ static INLINE void highbd_d153_predictor(uint16_t *dst, ptrdiff_t stride,
const uint16_t *left, int bd) {
int r, c;
(void) bd;
- dst[0] = ROUND_POWER_OF_TWO(above[-1] + left[0], 1);
+ dst[0] = AVG2(above[-1], left[0]);
for (r = 1; r < bs; r++)
- dst[r * stride] = ROUND_POWER_OF_TWO(left[r - 1] + left[r], 1);
+ dst[r * stride] = AVG2(left[r - 1], left[r]);
dst++;
- dst[0] = ROUND_POWER_OF_TWO(left[0] + above[-1] * 2 + above[0], 2);
- dst[stride] = ROUND_POWER_OF_TWO(above[-1] + left[0] * 2 + left[1], 2);
+ dst[0] = AVG3(left[0], above[-1], above[0]);
+ dst[stride] = AVG3(above[-1], left[0], left[1]);
for (r = 2; r < bs; r++)
- dst[r * stride] = ROUND_POWER_OF_TWO(left[r - 2] + left[r - 1] * 2 +
- left[r], 2);
+ dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]);
dst++;
for (c = 0; c < bs - 2; c++)
- dst[c] = ROUND_POWER_OF_TWO(above[c - 1] + above[c] * 2 + above[c + 1], 2);
+ dst[c] = AVG3(above[c - 1], above[c], above[c + 1]);
dst += stride;
for (r = 1; r < bs; ++r) {
@@ -344,22 +355,37 @@ static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride,
}
#endif // CONFIG_VP9_HIGHBITDEPTH
+void vp9_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ const int I = left[0];
+ const int J = left[1];
+ const int K = left[2];
+ const int L = left[3];
+ (void)above;
+ DST(0, 0) = AVG2(I, J);
+ DST(2, 0) = DST(0, 1) = AVG2(J, K);
+ DST(2, 1) = DST(0, 2) = AVG2(K, L);
+ DST(1, 0) = AVG3(I, J, K);
+ DST(3, 0) = DST(1, 1) = AVG3(J, K, L);
+ DST(3, 1) = DST(1, 2) = AVG3(K, L, L);
+ DST(3, 2) = DST(2, 2) =
+ DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L;
+}
+
static INLINE void d207_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
int r, c;
(void) above;
// first column
for (r = 0; r < bs - 1; ++r)
- dst[r * stride] = ROUND_POWER_OF_TWO(left[r] + left[r + 1], 1);
+ dst[r * stride] = AVG2(left[r], left[r + 1]);
dst[(bs - 1) * stride] = left[bs - 1];
dst++;
// second column
for (r = 0; r < bs - 2; ++r)
- dst[r * stride] = ROUND_POWER_OF_TWO(left[r] + left[r + 1] * 2 +
- left[r + 2], 2);
- dst[(bs - 2) * stride] = ROUND_POWER_OF_TWO(left[bs - 2] +
- left[bs - 1] * 3, 2);
+ dst[r * stride] = AVG3(left[r], left[r + 1], left[r + 2]);
+ dst[(bs - 2) * stride] = AVG3(left[bs - 2], left[bs - 1], left[bs - 1]);
dst[(bs - 1) * stride] = left[bs - 1];
dst++;
@@ -371,38 +397,112 @@ static INLINE void d207_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
for (c = 0; c < bs - 2; ++c)
dst[r * stride + c] = dst[(r + 1) * stride + c - 2];
}
-intra_pred_allsizes(d207)
+intra_pred_no_4x4(d207)
+
+void vp9_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ const int A = above[0];
+ const int B = above[1];
+ const int C = above[2];
+ const int D = above[3];
+ const int E = above[4];
+ const int F = above[5];
+ const int G = above[6];
+ (void)left;
+ DST(0, 0) = AVG2(A, B);
+ DST(1, 0) = DST(0, 2) = AVG2(B, C);
+ DST(2, 0) = DST(1, 2) = AVG2(C, D);
+ DST(3, 0) = DST(2, 2) = AVG2(D, E);
+ DST(3, 2) = AVG2(E, F); // differs from vp8
+
+ DST(0, 1) = AVG3(A, B, C);
+ DST(1, 1) = DST(0, 3) = AVG3(B, C, D);
+ DST(2, 1) = DST(1, 3) = AVG3(C, D, E);
+ DST(3, 1) = DST(2, 3) = AVG3(D, E, F);
+ DST(3, 3) = AVG3(E, F, G); // differs from vp8
+}
static INLINE void d63_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
int r, c;
- (void) left;
- for (r = 0; r < bs; ++r) {
- for (c = 0; c < bs; ++c)
- dst[c] = r & 1 ? ROUND_POWER_OF_TWO(above[r/2 + c] +
- above[r/2 + c + 1] * 2 +
- above[r/2 + c + 2], 2)
- : ROUND_POWER_OF_TWO(above[r/2 + c] +
- above[r/2 + c + 1], 1);
- dst += stride;
+ int size;
+ (void)left;
+ for (c = 0; c < bs; ++c) {
+ dst[c] = AVG2(above[c], above[c + 1]);
+ dst[stride + c] = AVG3(above[c], above[c + 1], above[c + 2]);
+ }
+ for (r = 2, size = bs - 2; r < bs; r += 2, --size) {
+ memcpy(dst + (r + 0) * stride, dst + (r >> 1), size);
+ memset(dst + (r + 0) * stride + size, above[bs - 1], bs - size);
+ memcpy(dst + (r + 1) * stride, dst + stride + (r >> 1), size);
+ memset(dst + (r + 1) * stride + size, above[bs - 1], bs - size);
}
}
-intra_pred_allsizes(d63)
+intra_pred_no_4x4(d63)
+
+void vp9_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ const int A = above[0];
+ const int B = above[1];
+ const int C = above[2];
+ const int D = above[3];
+ const int E = above[4];
+ const int F = above[5];
+ const int G = above[6];
+ const int H = above[7];
+ (void)stride;
+ (void)left;
+ DST(0, 0) = AVG3(A, B, C);
+ DST(1, 0) = DST(0, 1) = AVG3(B, C, D);
+ DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E);
+ DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F);
+ DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G);
+ DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
+ DST(3, 3) = H; // differs from vp8
+}
static INLINE void d45_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
- int r, c;
- (void) left;
- for (r = 0; r < bs; ++r) {
- for (c = 0; c < bs; ++c)
- dst[c] = r + c + 2 < bs * 2 ? ROUND_POWER_OF_TWO(above[r + c] +
- above[r + c + 1] * 2 +
- above[r + c + 2], 2)
- : above[bs * 2 - 1];
+ const uint8_t above_right = above[bs - 1];
+ const uint8_t *const dst_row0 = dst;
+ int x, size;
+ (void)left;
+
+ for (x = 0; x < bs - 1; ++x) {
+ dst[x] = AVG3(above[x], above[x + 1], above[x + 2]);
+ }
+ dst[bs - 1] = above_right;
+ dst += stride;
+ for (x = 1, size = bs - 2; x < bs; ++x, --size) {
+ memcpy(dst, dst_row0 + x, size);
+ memset(dst + size, above_right, x + 1);
dst += stride;
}
}
-intra_pred_allsizes(d45)
+intra_pred_no_4x4(d45)
+
+void vp9_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ const int I = left[0];
+ const int J = left[1];
+ const int K = left[2];
+ const int X = above[-1];
+ const int A = above[0];
+ const int B = above[1];
+ const int C = above[2];
+ const int D = above[3];
+ DST(0, 0) = DST(1, 2) = AVG2(X, A);
+ DST(1, 0) = DST(2, 2) = AVG2(A, B);
+ DST(2, 0) = DST(3, 2) = AVG2(B, C);
+ DST(3, 0) = AVG2(C, D);
+
+ DST(0, 3) = AVG3(K, J, I);
+ DST(0, 2) = AVG3(J, I, X);
+ DST(0, 1) = DST(1, 3) = AVG3(I, X, A);
+ DST(1, 1) = DST(2, 3) = AVG3(X, A, B);
+ DST(2, 1) = DST(3, 3) = AVG3(A, B, C);
+ DST(3, 1) = AVG3(B, C, D);
+}
static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
@@ -410,20 +510,19 @@ static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
// first row
for (c = 0; c < bs; c++)
- dst[c] = ROUND_POWER_OF_TWO(above[c - 1] + above[c], 1);
+ dst[c] = AVG2(above[c - 1], above[c]);
dst += stride;
// second row
- dst[0] = ROUND_POWER_OF_TWO(left[0] + above[-1] * 2 + above[0], 2);
+ dst[0] = AVG3(left[0], above[-1], above[0]);
for (c = 1; c < bs; c++)
- dst[c] = ROUND_POWER_OF_TWO(above[c - 2] + above[c - 1] * 2 + above[c], 2);
+ dst[c] = AVG3(above[c - 2], above[c - 1], above[c]);
dst += stride;
// the rest of first col
- dst[0] = ROUND_POWER_OF_TWO(above[-1] + left[0] * 2 + left[1], 2);
+ dst[0] = AVG3(above[-1], left[0], left[1]);
for (r = 3; r < bs; ++r)
- dst[(r - 2) * stride] = ROUND_POWER_OF_TWO(left[r - 3] + left[r - 2] * 2 +
- left[r - 1], 2);
+ dst[(r - 2) * stride] = AVG3(left[r - 3], left[r - 2], left[r - 1]);
// the rest of the block
for (r = 2; r < bs; ++r) {
@@ -432,19 +531,39 @@ static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
dst += stride;
}
}
-intra_pred_allsizes(d117)
+intra_pred_no_4x4(d117)
+
+void vp9_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ const int I = left[0];
+ const int J = left[1];
+ const int K = left[2];
+ const int L = left[3];
+ const int X = above[-1];
+ const int A = above[0];
+ const int B = above[1];
+ const int C = above[2];
+ const int D = above[3];
+ (void)stride;
+ DST(0, 3) = AVG3(J, K, L);
+ DST(1, 3) = DST(0, 2) = AVG3(I, J, K);
+ DST(2, 3) = DST(1, 2) = DST(0, 1) = AVG3(X, I, J);
+ DST(3, 3) = DST(2, 2) = DST(1, 1) = DST(0, 0) = AVG3(A, X, I);
+ DST(3, 2) = DST(2, 1) = DST(1, 0) = AVG3(B, A, X);
+ DST(3, 1) = DST(2, 0) = AVG3(C, B, A);
+ DST(3, 0) = AVG3(D, C, B);
+}
static INLINE void d135_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
int r, c;
- dst[0] = ROUND_POWER_OF_TWO(left[0] + above[-1] * 2 + above[0], 2);
+ dst[0] = AVG3(left[0], above[-1], above[0]);
for (c = 1; c < bs; c++)
- dst[c] = ROUND_POWER_OF_TWO(above[c - 2] + above[c - 1] * 2 + above[c], 2);
+ dst[c] = AVG3(above[c - 2], above[c - 1], above[c]);
- dst[stride] = ROUND_POWER_OF_TWO(above[-1] + left[0] * 2 + left[1], 2);
+ dst[stride] = AVG3(above[-1], left[0], left[1]);
for (r = 2; r < bs; ++r)
- dst[r * stride] = ROUND_POWER_OF_TWO(left[r - 2] + left[r - 1] * 2 +
- left[r], 2);
+ dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]);
dst += stride;
for (r = 1; r < bs; ++r) {
@@ -453,25 +572,48 @@ static INLINE void d135_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
dst += stride;
}
}
-intra_pred_allsizes(d135)
+intra_pred_no_4x4(d135)
+
+void vp9_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ const int I = left[0];
+ const int J = left[1];
+ const int K = left[2];
+ const int L = left[3];
+ const int X = above[-1];
+ const int A = above[0];
+ const int B = above[1];
+ const int C = above[2];
+
+ DST(0, 0) = DST(2, 1) = AVG2(I, X);
+ DST(0, 1) = DST(2, 2) = AVG2(J, I);
+ DST(0, 2) = DST(2, 3) = AVG2(K, J);
+ DST(0, 3) = AVG2(L, K);
+
+ DST(3, 0) = AVG3(A, B, C);
+ DST(2, 0) = AVG3(X, A, B);
+ DST(1, 0) = DST(3, 1) = AVG3(I, X, A);
+ DST(1, 1) = DST(3, 2) = AVG3(J, I, X);
+ DST(1, 2) = DST(3, 3) = AVG3(K, J, I);
+ DST(1, 3) = AVG3(L, K, J);
+}
static INLINE void d153_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
int r, c;
- dst[0] = ROUND_POWER_OF_TWO(above[-1] + left[0], 1);
+ dst[0] = AVG2(above[-1], left[0]);
for (r = 1; r < bs; r++)
- dst[r * stride] = ROUND_POWER_OF_TWO(left[r - 1] + left[r], 1);
+ dst[r * stride] = AVG2(left[r - 1], left[r]);
dst++;
- dst[0] = ROUND_POWER_OF_TWO(left[0] + above[-1] * 2 + above[0], 2);
- dst[stride] = ROUND_POWER_OF_TWO(above[-1] + left[0] * 2 + left[1], 2);
+ dst[0] = AVG3(left[0], above[-1], above[0]);
+ dst[stride] = AVG3(above[-1], left[0], left[1]);
for (r = 2; r < bs; r++)
- dst[r * stride] = ROUND_POWER_OF_TWO(left[r - 2] + left[r - 1] * 2 +
- left[r], 2);
+ dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]);
dst++;
for (c = 0; c < bs - 2; c++)
- dst[c] = ROUND_POWER_OF_TWO(above[c - 1] + above[c] * 2 + above[c + 1], 2);
+ dst[c] = AVG3(above[c - 1], above[c], above[c + 1]);
dst += stride;
for (r = 1; r < bs; ++r) {
@@ -480,7 +622,7 @@ static INLINE void d153_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
dst += stride;
}
}
-intra_pred_allsizes(d153)
+intra_pred_no_4x4(d153)
static INLINE void v_predictor(uint8_t *dst, ptrdiff_t stride, int bs,
const uint8_t *above, const uint8_t *left) {
@@ -658,7 +800,7 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd,
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
DECLARE_ALIGNED(16, uint16_t, left_col[32]);
- DECLARE_ALIGNED(16, uint16_t, above_data[128 + 16]);
+ DECLARE_ALIGNED(16, uint16_t, above_data[64 + 16]);
uint16_t *above_row = above_data + 16;
const uint16_t *const_above_row = above_row;
const int bs = 4 << tx_size;
@@ -781,7 +923,7 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
int plane) {
int i;
DECLARE_ALIGNED(16, uint8_t, left_col[32]);
- DECLARE_ALIGNED(16, uint8_t, above_data[128 + 16]);
+ DECLARE_ALIGNED(16, uint8_t, above_data[64 + 16]);
uint8_t *above_row = above_data + 16;
const uint8_t *const_above_row = above_row;
const int bs = 4 << tx_size;
@@ -943,6 +1085,6 @@ void vp9_predict_intra_block(const MACROBLOCKD *xd, int block_idx, int bwl_in,
have_top, have_left, have_right, x, y, plane);
}
-void vp9_init_intra_predictors() {
+void vp9_init_intra_predictors(void) {
once(vp9_init_intra_predictors_internal);
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_reconintra.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_reconintra.h
index 845f3bcaac7..da5e435b132 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_reconintra.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_reconintra.h
@@ -18,7 +18,7 @@
extern "C" {
#endif
-void vp9_init_intra_predictors();
+void vp9_init_intra_predictors(void);
void vp9_predict_intra_block(const MACROBLOCKD *xd, int block_idx, int bwl_in,
TX_SIZE tx_size, PREDICTION_MODE mode,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl
index d05afa525c3..22a5efdd5b2 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl
@@ -60,52 +60,52 @@ add_proto qw/void vp9_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, co
specialize qw/vp9_d207_predictor_4x4/, "$ssse3_x86inc";
add_proto qw/void vp9_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_d45_predictor_4x4/, "$ssse3_x86inc";
+specialize qw/vp9_d45_predictor_4x4 neon/, "$ssse3_x86inc";
add_proto qw/void vp9_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d63_predictor_4x4/, "$ssse3_x86inc";
add_proto qw/void vp9_h_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_h_predictor_4x4 neon dspr2/, "$ssse3_x86inc";
+specialize qw/vp9_h_predictor_4x4 neon dspr2 msa/, "$ssse3_x86inc";
add_proto qw/void vp9_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d117_predictor_4x4/;
add_proto qw/void vp9_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_d135_predictor_4x4/;
+specialize qw/vp9_d135_predictor_4x4 neon/;
add_proto qw/void vp9_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d153_predictor_4x4/, "$ssse3_x86inc";
add_proto qw/void vp9_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_v_predictor_4x4 neon/, "$sse_x86inc";
+specialize qw/vp9_v_predictor_4x4 neon msa/, "$sse_x86inc";
add_proto qw/void vp9_tm_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_tm_predictor_4x4 neon dspr2/, "$sse_x86inc";
+specialize qw/vp9_tm_predictor_4x4 neon dspr2 msa/, "$sse_x86inc";
add_proto qw/void vp9_dc_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_predictor_4x4 dspr2/, "$sse_x86inc";
+specialize qw/vp9_dc_predictor_4x4 dspr2 msa neon/, "$sse_x86inc";
add_proto qw/void vp9_dc_top_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_top_predictor_4x4/, "$sse_x86inc";
+specialize qw/vp9_dc_top_predictor_4x4 msa neon/, "$sse_x86inc";
add_proto qw/void vp9_dc_left_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_left_predictor_4x4/, "$sse_x86inc";
+specialize qw/vp9_dc_left_predictor_4x4 msa neon/, "$sse_x86inc";
add_proto qw/void vp9_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_128_predictor_4x4/, "$sse_x86inc";
+specialize qw/vp9_dc_128_predictor_4x4 msa neon/, "$sse_x86inc";
add_proto qw/void vp9_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d207_predictor_8x8/, "$ssse3_x86inc";
add_proto qw/void vp9_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_d45_predictor_8x8/, "$ssse3_x86inc";
+specialize qw/vp9_d45_predictor_8x8 neon/, "$ssse3_x86inc";
add_proto qw/void vp9_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d63_predictor_8x8/, "$ssse3_x86inc";
add_proto qw/void vp9_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_h_predictor_8x8 neon dspr2/, "$ssse3_x86inc";
+specialize qw/vp9_h_predictor_8x8 neon dspr2 msa/, "$ssse3_x86inc";
add_proto qw/void vp9_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d117_predictor_8x8/;
@@ -117,34 +117,34 @@ add_proto qw/void vp9_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, co
specialize qw/vp9_d153_predictor_8x8/, "$ssse3_x86inc";
add_proto qw/void vp9_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_v_predictor_8x8 neon/, "$sse_x86inc";
+specialize qw/vp9_v_predictor_8x8 neon msa/, "$sse_x86inc";
add_proto qw/void vp9_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_tm_predictor_8x8 neon dspr2/, "$sse2_x86inc";
+specialize qw/vp9_tm_predictor_8x8 neon dspr2 msa/, "$sse2_x86inc";
add_proto qw/void vp9_dc_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_predictor_8x8 dspr2/, "$sse_x86inc";
+specialize qw/vp9_dc_predictor_8x8 dspr2 neon msa/, "$sse_x86inc";
add_proto qw/void vp9_dc_top_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_top_predictor_8x8/, "$sse_x86inc";
+specialize qw/vp9_dc_top_predictor_8x8 neon msa/, "$sse_x86inc";
add_proto qw/void vp9_dc_left_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_left_predictor_8x8/, "$sse_x86inc";
+specialize qw/vp9_dc_left_predictor_8x8 neon msa/, "$sse_x86inc";
add_proto qw/void vp9_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_128_predictor_8x8/, "$sse_x86inc";
+specialize qw/vp9_dc_128_predictor_8x8 neon msa/, "$sse_x86inc";
add_proto qw/void vp9_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d207_predictor_16x16/, "$ssse3_x86inc";
add_proto qw/void vp9_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_d45_predictor_16x16/, "$ssse3_x86inc";
+specialize qw/vp9_d45_predictor_16x16 neon/, "$ssse3_x86inc";
add_proto qw/void vp9_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d63_predictor_16x16/, "$ssse3_x86inc";
add_proto qw/void vp9_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_h_predictor_16x16 neon dspr2/, "$ssse3_x86inc";
+specialize qw/vp9_h_predictor_16x16 neon dspr2 msa/, "$ssse3_x86inc";
add_proto qw/void vp9_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d117_predictor_16x16/;
@@ -156,22 +156,22 @@ add_proto qw/void vp9_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride,
specialize qw/vp9_d153_predictor_16x16/, "$ssse3_x86inc";
add_proto qw/void vp9_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_v_predictor_16x16 neon/, "$sse2_x86inc";
+specialize qw/vp9_v_predictor_16x16 neon msa/, "$sse2_x86inc";
add_proto qw/void vp9_tm_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_tm_predictor_16x16 neon/, "$sse2_x86inc";
+specialize qw/vp9_tm_predictor_16x16 neon msa/, "$sse2_x86inc";
add_proto qw/void vp9_dc_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_predictor_16x16 dspr2/, "$sse2_x86inc";
+specialize qw/vp9_dc_predictor_16x16 dspr2 neon msa/, "$sse2_x86inc";
add_proto qw/void vp9_dc_top_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_top_predictor_16x16/, "$sse2_x86inc";
+specialize qw/vp9_dc_top_predictor_16x16 neon msa/, "$sse2_x86inc";
add_proto qw/void vp9_dc_left_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_left_predictor_16x16/, "$sse2_x86inc";
+specialize qw/vp9_dc_left_predictor_16x16 neon msa/, "$sse2_x86inc";
add_proto qw/void vp9_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_128_predictor_16x16/, "$sse2_x86inc";
+specialize qw/vp9_dc_128_predictor_16x16 neon msa/, "$sse2_x86inc";
add_proto qw/void vp9_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d207_predictor_32x32/, "$ssse3_x86inc";
@@ -183,7 +183,7 @@ add_proto qw/void vp9_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, c
specialize qw/vp9_d63_predictor_32x32/, "$ssse3_x86inc";
add_proto qw/void vp9_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_h_predictor_32x32 neon/, "$ssse3_x86inc";
+specialize qw/vp9_h_predictor_32x32 neon msa/, "$ssse3_x86inc";
add_proto qw/void vp9_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
specialize qw/vp9_d117_predictor_32x32/;
@@ -192,68 +192,68 @@ add_proto qw/void vp9_d135_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride,
specialize qw/vp9_d135_predictor_32x32/;
add_proto qw/void vp9_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_d153_predictor_32x32/;
+specialize qw/vp9_d153_predictor_32x32/, "$ssse3_x86inc";
add_proto qw/void vp9_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_v_predictor_32x32 neon/, "$sse2_x86inc";
+specialize qw/vp9_v_predictor_32x32 neon msa/, "$sse2_x86inc";
add_proto qw/void vp9_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_tm_predictor_32x32 neon/, "$sse2_x86_64";
+specialize qw/vp9_tm_predictor_32x32 neon msa/, "$sse2_x86_64";
add_proto qw/void vp9_dc_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_predictor_32x32/, "$sse2_x86inc";
+specialize qw/vp9_dc_predictor_32x32 msa neon/, "$sse2_x86inc";
add_proto qw/void vp9_dc_top_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_top_predictor_32x32/, "$sse2_x86inc";
+specialize qw/vp9_dc_top_predictor_32x32 msa neon/, "$sse2_x86inc";
add_proto qw/void vp9_dc_left_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_left_predictor_32x32/, "$sse2_x86inc";
+specialize qw/vp9_dc_left_predictor_32x32 msa neon/, "$sse2_x86inc";
add_proto qw/void vp9_dc_128_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vp9_dc_128_predictor_32x32/, "$sse2_x86inc";
+specialize qw/vp9_dc_128_predictor_32x32 msa neon/, "$sse2_x86inc";
#
# Loopfilter
#
add_proto qw/void vp9_lpf_vertical_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
-specialize qw/vp9_lpf_vertical_16 sse2 neon_asm dspr2/;
+specialize qw/vp9_lpf_vertical_16 sse2 neon_asm dspr2 msa/;
$vp9_lpf_vertical_16_neon_asm=vp9_lpf_vertical_16_neon;
add_proto qw/void vp9_lpf_vertical_16_dual/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
-specialize qw/vp9_lpf_vertical_16_dual sse2 neon_asm dspr2/;
+specialize qw/vp9_lpf_vertical_16_dual sse2 neon_asm dspr2 msa/;
$vp9_lpf_vertical_16_dual_neon_asm=vp9_lpf_vertical_16_dual_neon;
add_proto qw/void vp9_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vp9_lpf_vertical_8 sse2 neon_asm dspr2/;
+specialize qw/vp9_lpf_vertical_8 sse2 neon_asm dspr2 msa/;
$vp9_lpf_vertical_8_neon_asm=vp9_lpf_vertical_8_neon;
add_proto qw/void vp9_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/vp9_lpf_vertical_8_dual sse2 neon_asm dspr2/;
+specialize qw/vp9_lpf_vertical_8_dual sse2 neon_asm dspr2 msa/;
$vp9_lpf_vertical_8_dual_neon_asm=vp9_lpf_vertical_8_dual_neon;
add_proto qw/void vp9_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vp9_lpf_vertical_4 mmx neon dspr2/;
+specialize qw/vp9_lpf_vertical_4 mmx neon dspr2 msa/;
add_proto qw/void vp9_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/vp9_lpf_vertical_4_dual sse2 neon dspr2/;
+specialize qw/vp9_lpf_vertical_4_dual sse2 neon dspr2 msa/;
add_proto qw/void vp9_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vp9_lpf_horizontal_16 sse2 avx2 neon_asm dspr2/;
+specialize qw/vp9_lpf_horizontal_16 sse2 avx2 neon_asm dspr2 msa/;
$vp9_lpf_horizontal_16_neon_asm=vp9_lpf_horizontal_16_neon;
add_proto qw/void vp9_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vp9_lpf_horizontal_8 sse2 neon_asm dspr2/;
+specialize qw/vp9_lpf_horizontal_8 sse2 neon_asm dspr2 msa/;
$vp9_lpf_horizontal_8_neon_asm=vp9_lpf_horizontal_8_neon;
add_proto qw/void vp9_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/vp9_lpf_horizontal_8_dual sse2 neon_asm dspr2/;
+specialize qw/vp9_lpf_horizontal_8_dual sse2 neon_asm dspr2 msa/;
$vp9_lpf_horizontal_8_dual_neon_asm=vp9_lpf_horizontal_8_dual_neon;
add_proto qw/void vp9_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vp9_lpf_horizontal_4 mmx neon dspr2/;
+specialize qw/vp9_lpf_horizontal_4 mmx neon dspr2 msa/;
add_proto qw/void vp9_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
-specialize qw/vp9_lpf_horizontal_4_dual sse2 neon dspr2/;
+specialize qw/vp9_lpf_horizontal_4_dual sse2 neon dspr2 msa/;
#
# post proc
@@ -276,10 +276,10 @@ specialize qw/vp9_plane_add_noise sse2/;
$vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt;
add_proto qw/void vp9_filter_by_weight16x16/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight";
-specialize qw/vp9_filter_by_weight16x16 sse2/;
+specialize qw/vp9_filter_by_weight16x16 sse2 msa/;
add_proto qw/void vp9_filter_by_weight8x8/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight";
-specialize qw/vp9_filter_by_weight8x8 sse2/;
+specialize qw/vp9_filter_by_weight8x8 sse2 msa/;
}
#
@@ -301,13 +301,13 @@ add_proto qw/void vp9_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride
specialize qw/vp9_convolve8_vert sse2 ssse3 neon dspr2 msa/, "$avx2_ssse3";
add_proto qw/void vp9_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_avg sse2 ssse3 neon dspr2/;
+specialize qw/vp9_convolve8_avg sse2 ssse3 neon dspr2 msa/;
add_proto qw/void vp9_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_avg_horiz sse2 ssse3 neon dspr2/;
+specialize qw/vp9_convolve8_avg_horiz sse2 ssse3 neon dspr2 msa/;
add_proto qw/void vp9_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
-specialize qw/vp9_convolve8_avg_vert sse2 ssse3 neon dspr2/;
+specialize qw/vp9_convolve8_avg_vert sse2 ssse3 neon dspr2 msa/;
#
# dct
@@ -419,19 +419,19 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vp9_iwht4x4_16_add/;
} else {
add_proto qw/void vp9_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct4x4_1_add sse2 neon dspr2/;
+ specialize qw/vp9_idct4x4_1_add sse2 neon dspr2 msa/;
add_proto qw/void vp9_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct4x4_16_add sse2 neon dspr2/;
+ specialize qw/vp9_idct4x4_16_add sse2 neon dspr2 msa/;
add_proto qw/void vp9_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct8x8_1_add sse2 neon dspr2/;
+ specialize qw/vp9_idct8x8_1_add sse2 neon dspr2 msa/;
add_proto qw/void vp9_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct8x8_64_add sse2 neon dspr2/, "$ssse3_x86_64";
+ specialize qw/vp9_idct8x8_64_add sse2 neon dspr2 msa/, "$ssse3_x86_64";
add_proto qw/void vp9_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_idct8x8_12_add sse2 neon dspr2/, "$ssse3_x86_64";
+ specialize qw/vp9_idct8x8_12_add sse2 neon dspr2 msa/, "$ssse3_x86_64";
add_proto qw/void vp9_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vp9_idct16x16_1_add sse2 neon dspr2 msa/;
@@ -454,10 +454,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vp9_idct32x32_1_add sse2 neon dspr2 msa/;
add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
- specialize qw/vp9_iht4x4_16_add sse2 neon dspr2/;
+ specialize qw/vp9_iht4x4_16_add sse2 neon dspr2 msa/;
add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type";
- specialize qw/vp9_iht8x8_64_add sse2 neon dspr2/;
+ specialize qw/vp9_iht8x8_64_add sse2 neon dspr2 msa/;
add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
specialize qw/vp9_iht16x16_256_add sse2 dspr2 msa/;
@@ -465,10 +465,10 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
# dct and add
add_proto qw/void vp9_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_iwht4x4_1_add/;
+ specialize qw/vp9_iwht4x4_1_add msa/;
add_proto qw/void vp9_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vp9_iwht4x4_16_add/;
+ specialize qw/vp9_iwht4x4_16_add msa/;
}
}
@@ -797,51 +797,6 @@ if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
# variance
-add_proto qw/unsigned int vp9_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance32x16 avx2/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance16x32/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance64x32 avx2 neon/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance32x64 neon/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance32x32 avx2 neon/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance64x64 avx2 neon/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance16x16 avx2 neon/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance16x8/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance8x16/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance8x8 neon/, "$sse2_x86inc";
-
-add_proto qw/void vp9_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
-specialize qw/vp9_get8x8var neon/, "$sse2_x86inc";
-
-add_proto qw/void vp9_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
-specialize qw/vp9_get16x16var avx2 neon/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance8x4/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance4x8/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
-specialize qw/vp9_variance4x4/, "$sse2_x86inc";
-
add_proto qw/unsigned int vp9_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_sub_pixel_variance64x64 avx2 neon/, "$sse2_x86inc", "$ssse3_x86inc";
@@ -922,26 +877,11 @@ specialize qw/vp9_sub_pixel_variance4x4/, "$sse_x86inc", "$ssse3_x86inc";
add_proto qw/unsigned int vp9_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_sub_pixel_avg_variance4x4/, "$sse_x86inc", "$ssse3_x86inc";
-add_proto qw/unsigned int vp9_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
-specialize qw/vp9_mse16x16 avx2/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
-specialize qw/vp9_mse8x16/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
-specialize qw/vp9_mse16x8/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
-specialize qw/vp9_mse8x8/, "$sse2_x86inc";
-
-add_proto qw/unsigned int vp9_get_mb_ss/, "const int16_t *";
-specialize qw/vp9_get_mb_ss/, "$sse2_x86inc";
-
add_proto qw/unsigned int vp9_avg_8x8/, "const uint8_t *, int p";
-specialize qw/vp9_avg_8x8 sse2 neon/;
+specialize qw/vp9_avg_8x8 sse2 neon msa/;
add_proto qw/unsigned int vp9_avg_4x4/, "const uint8_t *, int p";
-specialize qw/vp9_avg_4x4 sse2/;
+specialize qw/vp9_avg_4x4 sse2 msa/;
add_proto qw/void vp9_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
specialize qw/vp9_minmax_8x8 sse2/;
@@ -969,14 +909,14 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vp9_highbd_avg_8x8/;
add_proto qw/unsigned int vp9_highbd_avg_4x4/, "const uint8_t *, int p";
specialize qw/vp9_highbd_avg_4x4/;
- add_proto qw/unsigned int vp9_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
+ add_proto qw/void vp9_highbd_minmax_8x8/, "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max";
specialize qw/vp9_highbd_minmax_8x8/;
}
# ENCODEMB INVOKE
add_proto qw/void vp9_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
-specialize qw/vp9_subtract_block neon/, "$sse2_x86inc";
+specialize qw/vp9_subtract_block neon msa/, "$sse2_x86inc";
#
# Denoiser
@@ -1008,7 +948,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vp9_fdct8x8_quant/;
} else {
add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
- specialize qw/vp9_block_error avx2/, "$sse2_x86inc";
+ specialize qw/vp9_block_error avx2 msa/, "$sse2_x86inc";
add_proto qw/int64_t vp9_block_error_fp/, "const int16_t *coeff, const int16_t *dqcoeff, int block_size";
specialize qw/vp9_block_error_fp sse2/;
@@ -1083,43 +1023,43 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vp9_fdct32x32_rd sse2/;
} else {
add_proto qw/void vp9_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
- specialize qw/vp9_fht4x4 sse2/;
+ specialize qw/vp9_fht4x4 sse2 msa/;
add_proto qw/void vp9_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
- specialize qw/vp9_fht8x8 sse2/;
+ specialize qw/vp9_fht8x8 sse2 msa/;
add_proto qw/void vp9_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
- specialize qw/vp9_fht16x16 sse2/;
+ specialize qw/vp9_fht16x16 sse2 msa/;
add_proto qw/void vp9_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fwht4x4/, "$mmx_x86inc";
+ specialize qw/vp9_fwht4x4 msa/, "$mmx_x86inc";
add_proto qw/void vp9_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct4x4_1 sse2/;
add_proto qw/void vp9_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct4x4 sse2/;
+ specialize qw/vp9_fdct4x4 sse2 msa/;
add_proto qw/void vp9_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct8x8_1 sse2 neon/;
+ specialize qw/vp9_fdct8x8_1 sse2 neon msa/;
add_proto qw/void vp9_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct8x8 sse2 neon/, "$ssse3_x86_64";
+ specialize qw/vp9_fdct8x8 sse2 neon msa/, "$ssse3_x86_64";
add_proto qw/void vp9_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct16x16_1 sse2/;
+ specialize qw/vp9_fdct16x16_1 sse2 msa/;
add_proto qw/void vp9_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct16x16 sse2/;
+ specialize qw/vp9_fdct16x16 sse2 msa/;
add_proto qw/void vp9_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct32x32_1 sse2/;
+ specialize qw/vp9_fdct32x32_1 sse2 msa/;
add_proto qw/void vp9_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct32x32 sse2 avx2/;
+ specialize qw/vp9_fdct32x32 sse2 avx2 msa/;
add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vp9_fdct32x32_rd sse2 avx2/;
+ specialize qw/vp9_fdct32x32_rd sse2 avx2 msa/;
}
#
@@ -1137,146 +1077,10 @@ add_proto qw/int vp9_full_range_search/, "const struct macroblock *x, const stru
specialize qw/vp9_full_range_search/;
add_proto qw/void vp9_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
-specialize qw/vp9_temporal_filter_apply sse2/;
+specialize qw/vp9_temporal_filter_apply sse2 msa/;
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
- # variance
- add_proto qw/unsigned int vp9_highbd_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_variance32x16/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_variance16x32/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_variance64x32/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_variance32x64/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_variance32x32/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_variance64x64/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_variance16x16/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_variance16x8/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_variance8x16/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_variance8x8/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_variance8x4/;
-
- add_proto qw/unsigned int vp9_highbd_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_variance4x8/;
-
- add_proto qw/unsigned int vp9_highbd_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_variance4x4/;
-
- add_proto qw/void vp9_highbd_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- specialize qw/vp9_highbd_get8x8var/, "$sse2_x86inc";
-
- add_proto qw/void vp9_highbd_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- specialize qw/vp9_highbd_get16x16var/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_10_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_variance32x16/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_10_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_variance16x32/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_10_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_variance64x32/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_10_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_variance32x64/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_10_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_variance32x32/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_10_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_variance64x64/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_10_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_variance16x16/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_10_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_variance16x8/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_10_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_variance8x16/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_10_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_variance8x8/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_10_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_variance8x4/;
-
- add_proto qw/unsigned int vp9_highbd_10_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_variance4x8/;
-
- add_proto qw/unsigned int vp9_highbd_10_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_variance4x4/;
-
- add_proto qw/void vp9_highbd_10_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- specialize qw/vp9_highbd_10_get8x8var/, "$sse2_x86inc";
-
- add_proto qw/void vp9_highbd_10_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- specialize qw/vp9_highbd_10_get16x16var/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_12_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_variance32x16/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_12_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_variance16x32/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_12_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_variance64x32/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_12_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_variance32x64/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_12_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_variance32x32/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_12_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_variance64x64/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_12_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_variance16x16/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_12_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_variance16x8/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_12_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_variance8x16/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_12_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_variance8x8/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_12_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_variance8x4/;
-
- add_proto qw/unsigned int vp9_highbd_12_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_variance4x8/;
-
- add_proto qw/unsigned int vp9_highbd_12_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_variance4x4/;
-
- add_proto qw/void vp9_highbd_12_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- specialize qw/vp9_highbd_12_get8x8var/, "$sse2_x86inc";
-
- add_proto qw/void vp9_highbd_12_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- specialize qw/vp9_highbd_12_get16x16var/, "$sse2_x86inc";
-
add_proto qw/unsigned int vp9_highbd_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_highbd_sub_pixel_variance64x64/, "$sse2_x86inc";
@@ -1511,41 +1315,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/unsigned int vp9_highbd_12_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_highbd_12_sub_pixel_avg_variance4x4/;
- add_proto qw/unsigned int vp9_highbd_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vp9_highbd_mse16x16/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vp9_highbd_mse8x16/;
-
- add_proto qw/unsigned int vp9_highbd_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vp9_highbd_mse16x8/;
-
- add_proto qw/unsigned int vp9_highbd_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vp9_highbd_mse8x8/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_10_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_mse16x16/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_10_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_mse8x16/;
-
- add_proto qw/unsigned int vp9_highbd_10_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_mse16x8/;
-
- add_proto qw/unsigned int vp9_highbd_10_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vp9_highbd_10_mse8x8/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_12_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_mse16x16/, "$sse2_x86inc";
-
- add_proto qw/unsigned int vp9_highbd_12_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_mse8x16/;
-
- add_proto qw/unsigned int vp9_highbd_12_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_mse16x8/;
-
- add_proto qw/unsigned int vp9_highbd_12_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
- specialize qw/vp9_highbd_12_mse8x8/, "$sse2_x86inc";
# ENCODEMB INVOKE
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_scan.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_scan.h
index 65e2aa69a54..1d86b5cfe2c 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_scan.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_scan.h
@@ -38,6 +38,18 @@ static INLINE int get_coef_context(const int16_t *neighbors,
token_cache[neighbors[MAX_NEIGHBORS * c + 1]]) >> 1;
}
+static INLINE const scan_order *get_scan(const MACROBLOCKD *xd, TX_SIZE tx_size,
+ PLANE_TYPE type, int block_idx) {
+ const MODE_INFO *const mi = xd->mi[0];
+
+ if (is_inter_block(&mi->mbmi) || type != PLANE_TYPE_Y || xd->lossless) {
+ return &vp9_default_scan_orders[tx_size];
+ } else {
+ const PREDICTION_MODE mode = get_y_mode(mi, block_idx);
+ return &vp9_scan_orders[tx_size][intra_mode_to_tx_type_lookup[mode]];
+ }
+}
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_seg_common.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_seg_common.c
index 910200ecc9c..471e238ccff 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_seg_common.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_seg_common.c
@@ -25,12 +25,6 @@ static const int seg_feature_data_max[SEG_LVL_MAX] = {
// the coding mechanism is still subject to change so these provide a
// convenient single point of change.
-int vp9_segfeature_active(const struct segmentation *seg, int segment_id,
- SEG_LVL_FEATURES feature_id) {
- return seg->enabled &&
- (seg->feature_mask[segment_id] & (1 << feature_id));
-}
-
void vp9_clearall_segfeatures(struct segmentation *seg) {
vp9_zero(seg->feature_data);
vp9_zero(seg->feature_mask);
@@ -60,12 +54,6 @@ void vp9_set_segdata(struct segmentation *seg, int segment_id,
seg->feature_data[segment_id][feature_id] = seg_data;
}
-int vp9_get_segdata(const struct segmentation *seg, int segment_id,
- SEG_LVL_FEATURES feature_id) {
- return seg->feature_data[segment_id][feature_id];
-}
-
-
const vp9_tree_index vp9_segment_tree[TREE_SIZE(MAX_SEGMENTS)] = {
2, 4, 6, 8, 10, 12,
0, -1, -2, -3, -4, -5, -6, -7
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_seg_common.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_seg_common.h
index ff2d66a3658..95c9918303d 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_seg_common.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_seg_common.h
@@ -49,9 +49,12 @@ struct segmentation {
unsigned int feature_mask[MAX_SEGMENTS];
};
-int vp9_segfeature_active(const struct segmentation *seg,
- int segment_id,
- SEG_LVL_FEATURES feature_id);
+static INLINE int segfeature_active(const struct segmentation *seg,
+ int segment_id,
+ SEG_LVL_FEATURES feature_id) {
+ return seg->enabled &&
+ (seg->feature_mask[segment_id] & (1 << feature_id));
+}
void vp9_clearall_segfeatures(struct segmentation *seg);
@@ -68,9 +71,10 @@ void vp9_set_segdata(struct segmentation *seg,
SEG_LVL_FEATURES feature_id,
int seg_data);
-int vp9_get_segdata(const struct segmentation *seg,
- int segment_id,
- SEG_LVL_FEATURES feature_id);
+static INLINE int get_segdata(const struct segmentation *seg, int segment_id,
+ SEG_LVL_FEATURES feature_id) {
+ return seg->feature_data[segment_id][feature_id];
+}
extern const vp9_tree_index vp9_segment_tree[TREE_SIZE(MAX_SEGMENTS)];
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_systemdependent.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_systemdependent.h
index 161c381ad08..fc77762def5 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_systemdependent.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_systemdependent.h
@@ -11,15 +11,14 @@
#ifndef VP9_COMMON_VP9_SYSTEMDEPENDENT_H_
#define VP9_COMMON_VP9_SYSTEMDEPENDENT_H_
+#include "vpx_ports/msvc.h"
+
#ifdef _MSC_VER
# include <math.h> // the ceil() definition must precede intrin.h
# if _MSC_VER > 1310 && (defined(_M_X64) || defined(_M_IX86))
# include <intrin.h>
-# define USE_MSC_INTRIN
+# define USE_MSC_INTRINSICS
# endif
-#if _MSC_VER < 1900
-# define snprintf _snprintf
-#endif
#endif
#ifdef __cplusplus
@@ -50,7 +49,7 @@ static INLINE int round(double x) {
static INLINE int get_msb(unsigned int n) {
return 31 ^ __builtin_clz(n);
}
-#elif defined(USE_MSC_INTRIN)
+#elif defined(USE_MSC_INTRINSICS)
#pragma intrinsic(_BitScanReverse)
static INLINE int get_msb(unsigned int n) {
@@ -58,7 +57,7 @@ static INLINE int get_msb(unsigned int n) {
_BitScanReverse(&first_set_bit, n);
return first_set_bit;
}
-#undef USE_MSC_INTRIN
+#undef USE_MSC_INTRINSICS
#else
// Returns (int)floor(log2(n)). n must be > 0.
static INLINE int get_msb(unsigned int n) {
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/convolve.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/convolve.h
new file mode 100644
index 00000000000..de2df47e5e5
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/convolve.h
@@ -0,0 +1,296 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef VP9_COMMON_X86_CONVOLVE_H_
+#define VP9_COMMON_X86_CONVOLVE_H_
+
+#include <assert.h>
+
+#include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_ports/mem.h"
+
+typedef void filter8_1dfunction (
+ const uint8_t *src_ptr,
+ ptrdiff_t src_pitch,
+ uint8_t *output_ptr,
+ ptrdiff_t out_pitch,
+ uint32_t output_height,
+ const int16_t *filter
+);
+
+#define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
+ void vp9_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \
+ uint8_t *dst, ptrdiff_t dst_stride, \
+ const int16_t *filter_x, int x_step_q4, \
+ const int16_t *filter_y, int y_step_q4, \
+ int w, int h) { \
+ if (step_q4 == 16 && filter[3] != 128) { \
+ if (filter[0] || filter[1] || filter[2]) { \
+ while (w >= 16) { \
+ vp9_filter_block1d16_##dir##8_##avg##opt(src_start, \
+ src_stride, \
+ dst, \
+ dst_stride, \
+ h, \
+ filter); \
+ src += 16; \
+ dst += 16; \
+ w -= 16; \
+ } \
+ while (w >= 8) { \
+ vp9_filter_block1d8_##dir##8_##avg##opt(src_start, \
+ src_stride, \
+ dst, \
+ dst_stride, \
+ h, \
+ filter); \
+ src += 8; \
+ dst += 8; \
+ w -= 8; \
+ } \
+ while (w >= 4) { \
+ vp9_filter_block1d4_##dir##8_##avg##opt(src_start, \
+ src_stride, \
+ dst, \
+ dst_stride, \
+ h, \
+ filter); \
+ src += 4; \
+ dst += 4; \
+ w -= 4; \
+ } \
+ } else { \
+ while (w >= 16) { \
+ vp9_filter_block1d16_##dir##2_##avg##opt(src, \
+ src_stride, \
+ dst, \
+ dst_stride, \
+ h, \
+ filter); \
+ src += 16; \
+ dst += 16; \
+ w -= 16; \
+ } \
+ while (w >= 8) { \
+ vp9_filter_block1d8_##dir##2_##avg##opt(src, \
+ src_stride, \
+ dst, \
+ dst_stride, \
+ h, \
+ filter); \
+ src += 8; \
+ dst += 8; \
+ w -= 8; \
+ } \
+ while (w >= 4) { \
+ vp9_filter_block1d4_##dir##2_##avg##opt(src, \
+ src_stride, \
+ dst, \
+ dst_stride, \
+ h, \
+ filter); \
+ src += 4; \
+ dst += 4; \
+ w -= 4; \
+ } \
+ } \
+ } \
+ if (w) { \
+ vp9_convolve8_##name##_c(src, src_stride, dst, dst_stride, \
+ filter_x, x_step_q4, filter_y, y_step_q4, \
+ w, h); \
+ } \
+}
+
+#define FUN_CONV_2D(avg, opt) \
+void vp9_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
+ uint8_t *dst, ptrdiff_t dst_stride, \
+ const int16_t *filter_x, int x_step_q4, \
+ const int16_t *filter_y, int y_step_q4, \
+ int w, int h) { \
+ assert(w <= 64); \
+ assert(h <= 64); \
+ if (x_step_q4 == 16 && y_step_q4 == 16) { \
+ if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \
+ filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \
+ DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \
+ vp9_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \
+ filter_x, x_step_q4, filter_y, y_step_q4, \
+ w, h + 7); \
+ vp9_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \
+ filter_x, x_step_q4, filter_y, \
+ y_step_q4, w, h); \
+ } else { \
+ DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]); \
+ vp9_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \
+ filter_x, x_step_q4, filter_y, y_step_q4, \
+ w, h + 1); \
+ vp9_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \
+ filter_x, x_step_q4, filter_y, \
+ y_step_q4, w, h); \
+ } \
+ } else { \
+ vp9_convolve8_##avg##c(src, src_stride, dst, dst_stride, \
+ filter_x, x_step_q4, filter_y, y_step_q4, w, h); \
+ } \
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+
+typedef void highbd_filter8_1dfunction (
+ const uint16_t *src_ptr,
+ const ptrdiff_t src_pitch,
+ uint16_t *output_ptr,
+ ptrdiff_t out_pitch,
+ unsigned int output_height,
+ const int16_t *filter,
+ int bd
+);
+
+#define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
+ void vp9_highbd_convolve8_##name##_##opt(const uint8_t *src8, \
+ ptrdiff_t src_stride, \
+ uint8_t *dst8, \
+ ptrdiff_t dst_stride, \
+ const int16_t *filter_x, \
+ int x_step_q4, \
+ const int16_t *filter_y, \
+ int y_step_q4, \
+ int w, int h, int bd) { \
+ if (step_q4 == 16 && filter[3] != 128) { \
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
+ uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \
+ if (filter[0] || filter[1] || filter[2]) { \
+ while (w >= 16) { \
+ vp9_highbd_filter_block1d16_##dir##8_##avg##opt(src_start, \
+ src_stride, \
+ dst, \
+ dst_stride, \
+ h, \
+ filter, \
+ bd); \
+ src += 16; \
+ dst += 16; \
+ w -= 16; \
+ } \
+ while (w >= 8) { \
+ vp9_highbd_filter_block1d8_##dir##8_##avg##opt(src_start, \
+ src_stride, \
+ dst, \
+ dst_stride, \
+ h, \
+ filter, \
+ bd); \
+ src += 8; \
+ dst += 8; \
+ w -= 8; \
+ } \
+ while (w >= 4) { \
+ vp9_highbd_filter_block1d4_##dir##8_##avg##opt(src_start, \
+ src_stride, \
+ dst, \
+ dst_stride, \
+ h, \
+ filter, \
+ bd); \
+ src += 4; \
+ dst += 4; \
+ w -= 4; \
+ } \
+ } else { \
+ while (w >= 16) { \
+ vp9_highbd_filter_block1d16_##dir##2_##avg##opt(src, \
+ src_stride, \
+ dst, \
+ dst_stride, \
+ h, \
+ filter, \
+ bd); \
+ src += 16; \
+ dst += 16; \
+ w -= 16; \
+ } \
+ while (w >= 8) { \
+ vp9_highbd_filter_block1d8_##dir##2_##avg##opt(src, \
+ src_stride, \
+ dst, \
+ dst_stride, \
+ h, \
+ filter, \
+ bd); \
+ src += 8; \
+ dst += 8; \
+ w -= 8; \
+ } \
+ while (w >= 4) { \
+ vp9_highbd_filter_block1d4_##dir##2_##avg##opt(src, \
+ src_stride, \
+ dst, \
+ dst_stride, \
+ h, \
+ filter, \
+ bd); \
+ src += 4; \
+ dst += 4; \
+ w -= 4; \
+ } \
+ } \
+ } \
+ if (w) { \
+ vp9_highbd_convolve8_##name##_c(src8, src_stride, dst8, dst_stride, \
+ filter_x, x_step_q4, filter_y, y_step_q4, \
+ w, h, bd); \
+ } \
+}
+
+#define HIGH_FUN_CONV_2D(avg, opt) \
+void vp9_highbd_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
+ uint8_t *dst, ptrdiff_t dst_stride, \
+ const int16_t *filter_x, int x_step_q4, \
+ const int16_t *filter_y, int y_step_q4, \
+ int w, int h, int bd) { \
+ assert(w <= 64); \
+ assert(h <= 64); \
+ if (x_step_q4 == 16 && y_step_q4 == 16) { \
+ if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \
+ filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \
+ DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \
+ vp9_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \
+ CONVERT_TO_BYTEPTR(fdata2), 64, \
+ filter_x, x_step_q4, \
+ filter_y, y_step_q4, \
+ w, h + 7, bd); \
+ vp9_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2) + 192, \
+ 64, dst, dst_stride, \
+ filter_x, x_step_q4, \
+ filter_y, y_step_q4, \
+ w, h, bd); \
+ } else { \
+ DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \
+ vp9_highbd_convolve8_horiz_##opt(src, src_stride, \
+ CONVERT_TO_BYTEPTR(fdata2), 64, \
+ filter_x, x_step_q4, \
+ filter_y, y_step_q4, \
+ w, h + 1, bd); \
+ vp9_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2), 64, \
+ dst, dst_stride, \
+ filter_x, x_step_q4, \
+ filter_y, y_step_q4, \
+ w, h, bd); \
+ } \
+ } else { \
+ vp9_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \
+ filter_x, x_step_q4, filter_y, y_step_q4, w, \
+ h, bd); \
+ } \
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+#endif // VP9_COMMON_X86_CONVOLVE_H_
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_asm_stubs.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_asm_stubs.c
index 963023c53b1..fd55fb8c664 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_asm_stubs.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_asm_stubs.c
@@ -8,421 +8,9 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#include <assert.h>
-
-#include "./vpx_config.h"
#include "./vp9_rtcd.h"
-#include "vpx_ports/mem.h"
-
-typedef void filter8_1dfunction (
- const unsigned char *src_ptr,
- const ptrdiff_t src_pitch,
- unsigned char *output_ptr,
- ptrdiff_t out_pitch,
- unsigned int output_height,
- const short *filter
-);
-
-#define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
- void vp9_convolve8_##name##_##opt(const uint8_t *src, ptrdiff_t src_stride, \
- uint8_t *dst, ptrdiff_t dst_stride, \
- const int16_t *filter_x, int x_step_q4, \
- const int16_t *filter_y, int y_step_q4, \
- int w, int h) { \
- if (step_q4 == 16 && filter[3] != 128) { \
- if (filter[0] || filter[1] || filter[2]) { \
- while (w >= 16) { \
- vp9_filter_block1d16_##dir##8_##avg##opt(src_start, \
- src_stride, \
- dst, \
- dst_stride, \
- h, \
- filter); \
- src += 16; \
- dst += 16; \
- w -= 16; \
- } \
- while (w >= 8) { \
- vp9_filter_block1d8_##dir##8_##avg##opt(src_start, \
- src_stride, \
- dst, \
- dst_stride, \
- h, \
- filter); \
- src += 8; \
- dst += 8; \
- w -= 8; \
- } \
- while (w >= 4) { \
- vp9_filter_block1d4_##dir##8_##avg##opt(src_start, \
- src_stride, \
- dst, \
- dst_stride, \
- h, \
- filter); \
- src += 4; \
- dst += 4; \
- w -= 4; \
- } \
- } else { \
- while (w >= 16) { \
- vp9_filter_block1d16_##dir##2_##avg##opt(src, \
- src_stride, \
- dst, \
- dst_stride, \
- h, \
- filter); \
- src += 16; \
- dst += 16; \
- w -= 16; \
- } \
- while (w >= 8) { \
- vp9_filter_block1d8_##dir##2_##avg##opt(src, \
- src_stride, \
- dst, \
- dst_stride, \
- h, \
- filter); \
- src += 8; \
- dst += 8; \
- w -= 8; \
- } \
- while (w >= 4) { \
- vp9_filter_block1d4_##dir##2_##avg##opt(src, \
- src_stride, \
- dst, \
- dst_stride, \
- h, \
- filter); \
- src += 4; \
- dst += 4; \
- w -= 4; \
- } \
- } \
- } \
- if (w) { \
- vp9_convolve8_##name##_c(src, src_stride, dst, dst_stride, \
- filter_x, x_step_q4, filter_y, y_step_q4, \
- w, h); \
- } \
-}
-
-#define FUN_CONV_2D(avg, opt) \
-void vp9_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
- uint8_t *dst, ptrdiff_t dst_stride, \
- const int16_t *filter_x, int x_step_q4, \
- const int16_t *filter_y, int y_step_q4, \
- int w, int h) { \
- assert(w <= 64); \
- assert(h <= 64); \
- if (x_step_q4 == 16 && y_step_q4 == 16) { \
- if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \
- filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \
- DECLARE_ALIGNED(16, unsigned char, fdata2[64 * 71]); \
- vp9_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \
- filter_x, x_step_q4, filter_y, y_step_q4, \
- w, h + 7); \
- vp9_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \
- filter_x, x_step_q4, filter_y, \
- y_step_q4, w, h); \
- } else { \
- DECLARE_ALIGNED(16, unsigned char, fdata2[64 * 65]); \
- vp9_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \
- filter_x, x_step_q4, filter_y, y_step_q4, \
- w, h + 1); \
- vp9_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \
- filter_x, x_step_q4, filter_y, \
- y_step_q4, w, h); \
- } \
- } else { \
- vp9_convolve8_##avg##c(src, src_stride, dst, dst_stride, \
- filter_x, x_step_q4, filter_y, y_step_q4, w, h); \
- } \
-}
-
-#if CONFIG_VP9_HIGHBITDEPTH
-
-typedef void highbd_filter8_1dfunction (
- const uint16_t *src_ptr,
- const ptrdiff_t src_pitch,
- uint16_t *output_ptr,
- ptrdiff_t out_pitch,
- unsigned int output_height,
- const int16_t *filter,
- int bd
-);
-
-#define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
- void vp9_highbd_convolve8_##name##_##opt(const uint8_t *src8, \
- ptrdiff_t src_stride, \
- uint8_t *dst8, \
- ptrdiff_t dst_stride, \
- const int16_t *filter_x, \
- int x_step_q4, \
- const int16_t *filter_y, \
- int y_step_q4, \
- int w, int h, int bd) { \
- if (step_q4 == 16 && filter[3] != 128) { \
- uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
- uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); \
- if (filter[0] || filter[1] || filter[2]) { \
- while (w >= 16) { \
- vp9_highbd_filter_block1d16_##dir##8_##avg##opt(src_start, \
- src_stride, \
- dst, \
- dst_stride, \
- h, \
- filter, \
- bd); \
- src += 16; \
- dst += 16; \
- w -= 16; \
- } \
- while (w >= 8) { \
- vp9_highbd_filter_block1d8_##dir##8_##avg##opt(src_start, \
- src_stride, \
- dst, \
- dst_stride, \
- h, \
- filter, \
- bd); \
- src += 8; \
- dst += 8; \
- w -= 8; \
- } \
- while (w >= 4) { \
- vp9_highbd_filter_block1d4_##dir##8_##avg##opt(src_start, \
- src_stride, \
- dst, \
- dst_stride, \
- h, \
- filter, \
- bd); \
- src += 4; \
- dst += 4; \
- w -= 4; \
- } \
- } else { \
- while (w >= 16) { \
- vp9_highbd_filter_block1d16_##dir##2_##avg##opt(src, \
- src_stride, \
- dst, \
- dst_stride, \
- h, \
- filter, \
- bd); \
- src += 16; \
- dst += 16; \
- w -= 16; \
- } \
- while (w >= 8) { \
- vp9_highbd_filter_block1d8_##dir##2_##avg##opt(src, \
- src_stride, \
- dst, \
- dst_stride, \
- h, \
- filter, \
- bd); \
- src += 8; \
- dst += 8; \
- w -= 8; \
- } \
- while (w >= 4) { \
- vp9_highbd_filter_block1d4_##dir##2_##avg##opt(src, \
- src_stride, \
- dst, \
- dst_stride, \
- h, \
- filter, \
- bd); \
- src += 4; \
- dst += 4; \
- w -= 4; \
- } \
- } \
- } \
- if (w) { \
- vp9_highbd_convolve8_##name##_c(src8, src_stride, dst8, dst_stride, \
- filter_x, x_step_q4, filter_y, y_step_q4, \
- w, h, bd); \
- } \
-}
-
-#define HIGH_FUN_CONV_2D(avg, opt) \
-void vp9_highbd_convolve8_##avg##opt(const uint8_t *src, ptrdiff_t src_stride, \
- uint8_t *dst, ptrdiff_t dst_stride, \
- const int16_t *filter_x, int x_step_q4, \
- const int16_t *filter_y, int y_step_q4, \
- int w, int h, int bd) { \
- assert(w <= 64); \
- assert(h <= 64); \
- if (x_step_q4 == 16 && y_step_q4 == 16) { \
- if (filter_x[0] || filter_x[1] || filter_x[2] || filter_x[3] == 128 || \
- filter_y[0] || filter_y[1] || filter_y[2] || filter_y[3] == 128) { \
- DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \
- vp9_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \
- CONVERT_TO_BYTEPTR(fdata2), 64, \
- filter_x, x_step_q4, \
- filter_y, y_step_q4, \
- w, h + 7, bd); \
- vp9_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2) + 192, \
- 64, dst, dst_stride, \
- filter_x, x_step_q4, \
- filter_y, y_step_q4, \
- w, h, bd); \
- } else { \
- DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \
- vp9_highbd_convolve8_horiz_##opt(src, src_stride, \
- CONVERT_TO_BYTEPTR(fdata2), 64, \
- filter_x, x_step_q4, \
- filter_y, y_step_q4, \
- w, h + 1, bd); \
- vp9_highbd_convolve8_##avg##vert_##opt(CONVERT_TO_BYTEPTR(fdata2), 64, \
- dst, dst_stride, \
- filter_x, x_step_q4, \
- filter_y, y_step_q4, \
- w, h, bd); \
- } \
- } else { \
- vp9_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \
- filter_x, x_step_q4, filter_y, y_step_q4, w, \
- h, bd); \
- } \
-}
-#endif // CONFIG_VP9_HIGHBITDEPTH
-
-#if HAVE_AVX2 && HAVE_SSSE3
-filter8_1dfunction vp9_filter_block1d16_v8_avx2;
-filter8_1dfunction vp9_filter_block1d16_h8_avx2;
-filter8_1dfunction vp9_filter_block1d4_v8_ssse3;
-#if ARCH_X86_64
-filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3;
-#define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_intrin_ssse3
-#define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_intrin_ssse3
-#define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_intrin_ssse3
-#else // ARCH_X86
-filter8_1dfunction vp9_filter_block1d8_v8_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h8_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h8_ssse3;
-#define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_ssse3
-#define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_ssse3
-#define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_ssse3
-#endif // ARCH_X86_64 / ARCH_X86
-filter8_1dfunction vp9_filter_block1d16_v2_ssse3;
-filter8_1dfunction vp9_filter_block1d16_h2_ssse3;
-filter8_1dfunction vp9_filter_block1d8_v2_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h2_ssse3;
-filter8_1dfunction vp9_filter_block1d4_v2_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h2_ssse3;
-#define vp9_filter_block1d4_v8_avx2 vp9_filter_block1d4_v8_ssse3
-#define vp9_filter_block1d16_v2_avx2 vp9_filter_block1d16_v2_ssse3
-#define vp9_filter_block1d16_h2_avx2 vp9_filter_block1d16_h2_ssse3
-#define vp9_filter_block1d8_v2_avx2 vp9_filter_block1d8_v2_ssse3
-#define vp9_filter_block1d8_h2_avx2 vp9_filter_block1d8_h2_ssse3
-#define vp9_filter_block1d4_v2_avx2 vp9_filter_block1d4_v2_ssse3
-#define vp9_filter_block1d4_h2_avx2 vp9_filter_block1d4_h2_ssse3
-// void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride,
-// uint8_t *dst, ptrdiff_t dst_stride,
-// const int16_t *filter_x, int x_step_q4,
-// const int16_t *filter_y, int y_step_q4,
-// int w, int h);
-// void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride,
-// uint8_t *dst, ptrdiff_t dst_stride,
-// const int16_t *filter_x, int x_step_q4,
-// const int16_t *filter_y, int y_step_q4,
-// int w, int h);
-FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , avx2);
-FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , avx2);
-
-// void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride,
-// uint8_t *dst, ptrdiff_t dst_stride,
-// const int16_t *filter_x, int x_step_q4,
-// const int16_t *filter_y, int y_step_q4,
-// int w, int h);
-FUN_CONV_2D(, avx2);
-#endif // HAVE_AX2 && HAVE_SSSE3
-#if HAVE_SSSE3
-#if ARCH_X86_64
-filter8_1dfunction vp9_filter_block1d16_v8_intrin_ssse3;
-filter8_1dfunction vp9_filter_block1d16_h8_intrin_ssse3;
-filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3;
-filter8_1dfunction vp9_filter_block1d4_v8_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3;
-#define vp9_filter_block1d16_v8_ssse3 vp9_filter_block1d16_v8_intrin_ssse3
-#define vp9_filter_block1d16_h8_ssse3 vp9_filter_block1d16_h8_intrin_ssse3
-#define vp9_filter_block1d8_v8_ssse3 vp9_filter_block1d8_v8_intrin_ssse3
-#define vp9_filter_block1d8_h8_ssse3 vp9_filter_block1d8_h8_intrin_ssse3
-#define vp9_filter_block1d4_h8_ssse3 vp9_filter_block1d4_h8_intrin_ssse3
-#else // ARCH_X86
-filter8_1dfunction vp9_filter_block1d16_v8_ssse3;
-filter8_1dfunction vp9_filter_block1d16_h8_ssse3;
-filter8_1dfunction vp9_filter_block1d8_v8_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h8_ssse3;
-filter8_1dfunction vp9_filter_block1d4_v8_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h8_ssse3;
-#endif // ARCH_X86_64 / ARCH_X86
-filter8_1dfunction vp9_filter_block1d16_v8_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d16_h8_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d8_v8_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h8_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d4_v8_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h8_avg_ssse3;
-
-filter8_1dfunction vp9_filter_block1d16_v2_ssse3;
-filter8_1dfunction vp9_filter_block1d16_h2_ssse3;
-filter8_1dfunction vp9_filter_block1d8_v2_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h2_ssse3;
-filter8_1dfunction vp9_filter_block1d4_v2_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h2_ssse3;
-filter8_1dfunction vp9_filter_block1d16_v2_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d16_h2_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d8_v2_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d8_h2_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d4_v2_avg_ssse3;
-filter8_1dfunction vp9_filter_block1d4_h2_avg_ssse3;
-
-// void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
-// uint8_t *dst, ptrdiff_t dst_stride,
-// const int16_t *filter_x, int x_step_q4,
-// const int16_t *filter_y, int y_step_q4,
-// int w, int h);
-// void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
-// uint8_t *dst, ptrdiff_t dst_stride,
-// const int16_t *filter_x, int x_step_q4,
-// const int16_t *filter_y, int y_step_q4,
-// int w, int h);
-// void vp9_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
-// uint8_t *dst, ptrdiff_t dst_stride,
-// const int16_t *filter_x, int x_step_q4,
-// const int16_t *filter_y, int y_step_q4,
-// int w, int h);
-// void vp9_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
-// uint8_t *dst, ptrdiff_t dst_stride,
-// const int16_t *filter_x, int x_step_q4,
-// const int16_t *filter_y, int y_step_q4,
-// int w, int h);
-FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , ssse3);
-FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , ssse3);
-FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, ssse3);
-FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_,
- ssse3);
-
-// void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride,
-// uint8_t *dst, ptrdiff_t dst_stride,
-// const int16_t *filter_x, int x_step_q4,
-// const int16_t *filter_y, int y_step_q4,
-// int w, int h);
-// void vp9_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride,
-// uint8_t *dst, ptrdiff_t dst_stride,
-// const int16_t *filter_x, int x_step_q4,
-// const int16_t *filter_y, int y_step_q4,
-// int w, int h);
-FUN_CONV_2D(, ssse3);
-FUN_CONV_2D(avg_ , ssse3);
-#endif // HAVE_SSSE3
+#include "./vpx_config.h"
+#include "vp9/common/x86/convolve.h"
#if HAVE_SSE2
filter8_1dfunction vp9_filter_block1d16_v8_sse2;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c
index 1637f0e545a..b40669c6375 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_high_loopfilter_intrin_sse2.c
@@ -11,6 +11,7 @@
#include <emmintrin.h> // SSE2
#include "./vp9_rtcd.h"
+#include "vpx_ports/mem.h"
#include "vp9/common/vp9_loopfilter.h"
#include "vpx_ports/emmintrin_compat.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c
index 0385c7955c9..ce010df3b8a 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -8,6 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include "./vp9_rtcd.h"
+#include "vpx_ports/mem.h"
#include "vp9/common/x86/vp9_idct_intrin_sse2.h"
#include "vp9/common/vp9_idct.h"
@@ -3203,34 +3205,20 @@ void vp9_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest,
// idct constants for each stage
const __m128i stg1_0 = pair_set_epi16(cospi_31_64, -cospi_1_64);
const __m128i stg1_1 = pair_set_epi16(cospi_1_64, cospi_31_64);
- const __m128i stg1_2 = pair_set_epi16(cospi_15_64, -cospi_17_64);
- const __m128i stg1_3 = pair_set_epi16(cospi_17_64, cospi_15_64);
- const __m128i stg1_4 = pair_set_epi16(cospi_23_64, -cospi_9_64);
- const __m128i stg1_5 = pair_set_epi16(cospi_9_64, cospi_23_64);
const __m128i stg1_6 = pair_set_epi16(cospi_7_64, -cospi_25_64);
const __m128i stg1_7 = pair_set_epi16(cospi_25_64, cospi_7_64);
const __m128i stg1_8 = pair_set_epi16(cospi_27_64, -cospi_5_64);
const __m128i stg1_9 = pair_set_epi16(cospi_5_64, cospi_27_64);
- const __m128i stg1_10 = pair_set_epi16(cospi_11_64, -cospi_21_64);
- const __m128i stg1_11 = pair_set_epi16(cospi_21_64, cospi_11_64);
- const __m128i stg1_12 = pair_set_epi16(cospi_19_64, -cospi_13_64);
- const __m128i stg1_13 = pair_set_epi16(cospi_13_64, cospi_19_64);
const __m128i stg1_14 = pair_set_epi16(cospi_3_64, -cospi_29_64);
const __m128i stg1_15 = pair_set_epi16(cospi_29_64, cospi_3_64);
const __m128i stg2_0 = pair_set_epi16(cospi_30_64, -cospi_2_64);
const __m128i stg2_1 = pair_set_epi16(cospi_2_64, cospi_30_64);
- const __m128i stg2_2 = pair_set_epi16(cospi_14_64, -cospi_18_64);
- const __m128i stg2_3 = pair_set_epi16(cospi_18_64, cospi_14_64);
- const __m128i stg2_4 = pair_set_epi16(cospi_22_64, -cospi_10_64);
- const __m128i stg2_5 = pair_set_epi16(cospi_10_64, cospi_22_64);
const __m128i stg2_6 = pair_set_epi16(cospi_6_64, -cospi_26_64);
const __m128i stg2_7 = pair_set_epi16(cospi_26_64, cospi_6_64);
const __m128i stg3_0 = pair_set_epi16(cospi_28_64, -cospi_4_64);
const __m128i stg3_1 = pair_set_epi16(cospi_4_64, cospi_28_64);
- const __m128i stg3_2 = pair_set_epi16(cospi_12_64, -cospi_20_64);
- const __m128i stg3_3 = pair_set_epi16(cospi_20_64, cospi_12_64);
const __m128i stg3_4 = pair_set_epi16(-cospi_4_64, cospi_28_64);
const __m128i stg3_5 = pair_set_epi16(cospi_28_64, cospi_4_64);
const __m128i stg3_6 = pair_set_epi16(-cospi_28_64, -cospi_4_64);
@@ -3240,8 +3228,6 @@ void vp9_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest,
const __m128i stg4_0 = pair_set_epi16(cospi_16_64, cospi_16_64);
const __m128i stg4_1 = pair_set_epi16(cospi_16_64, -cospi_16_64);
- const __m128i stg4_2 = pair_set_epi16(cospi_24_64, -cospi_8_64);
- const __m128i stg4_3 = pair_set_epi16(cospi_8_64, cospi_24_64);
const __m128i stg4_4 = pair_set_epi16(-cospi_8_64, cospi_24_64);
const __m128i stg4_5 = pair_set_epi16(cospi_24_64, cospi_8_64);
const __m128i stg4_6 = pair_set_epi16(-cospi_24_64, -cospi_8_64);
@@ -3261,47 +3247,29 @@ void vp9_idct32x32_34_add_sse2(const int16_t *input, uint8_t *dest,
stp2_30, stp2_31;
__m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
int i;
- // Load input data.
- LOAD_DQCOEFF(in[0], input);
- LOAD_DQCOEFF(in[8], input);
- LOAD_DQCOEFF(in[16], input);
- LOAD_DQCOEFF(in[24], input);
- LOAD_DQCOEFF(in[1], input);
- LOAD_DQCOEFF(in[9], input);
- LOAD_DQCOEFF(in[17], input);
- LOAD_DQCOEFF(in[25], input);
- LOAD_DQCOEFF(in[2], input);
- LOAD_DQCOEFF(in[10], input);
- LOAD_DQCOEFF(in[18], input);
- LOAD_DQCOEFF(in[26], input);
- LOAD_DQCOEFF(in[3], input);
- LOAD_DQCOEFF(in[11], input);
- LOAD_DQCOEFF(in[19], input);
- LOAD_DQCOEFF(in[27], input);
-
- LOAD_DQCOEFF(in[4], input);
- LOAD_DQCOEFF(in[12], input);
- LOAD_DQCOEFF(in[20], input);
- LOAD_DQCOEFF(in[28], input);
- LOAD_DQCOEFF(in[5], input);
- LOAD_DQCOEFF(in[13], input);
- LOAD_DQCOEFF(in[21], input);
- LOAD_DQCOEFF(in[29], input);
- LOAD_DQCOEFF(in[6], input);
- LOAD_DQCOEFF(in[14], input);
- LOAD_DQCOEFF(in[22], input);
- LOAD_DQCOEFF(in[30], input);
- LOAD_DQCOEFF(in[7], input);
- LOAD_DQCOEFF(in[15], input);
- LOAD_DQCOEFF(in[23], input);
- LOAD_DQCOEFF(in[31], input);
+
+ // Load input data. Only need to load the top left 8x8 block.
+ in[0] = _mm_load_si128((const __m128i *)input);
+ in[1] = _mm_load_si128((const __m128i *)(input + 32));
+ in[2] = _mm_load_si128((const __m128i *)(input + 64));
+ in[3] = _mm_load_si128((const __m128i *)(input + 96));
+ in[4] = _mm_load_si128((const __m128i *)(input + 128));
+ in[5] = _mm_load_si128((const __m128i *)(input + 160));
+ in[6] = _mm_load_si128((const __m128i *)(input + 192));
+ in[7] = _mm_load_si128((const __m128i *)(input + 224));
+
+ for (i = 8; i < 32; ++i) {
+ in[i] = _mm_setzero_si128();
+ }
array_transpose_8x8(in, in);
+ // TODO(hkuang): Following transposes are unnecessary. But remove them will
+ // lead to performance drop on some devices.
array_transpose_8x8(in + 8, in + 8);
array_transpose_8x8(in + 16, in + 16);
array_transpose_8x8(in + 24, in + 24);
- IDCT32
+ IDCT32_34
// 1_D: Store 32 intermediate results for each 8x32 block.
col[0] = _mm_add_epi16(stp1_0, stp1_31);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_loopfilter_intrin_avx2.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_loopfilter_intrin_avx2.c
index 0cb0912ad62..770a65f4ca1 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_loopfilter_intrin_avx2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_loopfilter_intrin_avx2.c
@@ -9,6 +9,8 @@
*/
#include <immintrin.h> /* AVX2 */
+
+#include "./vp9_rtcd.h"
#include "vpx_ports/mem.h"
static void mb_lpf_horizontal_edge_w_avx2_8(unsigned char *s, int p,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_loopfilter_intrin_sse2.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_loopfilter_intrin_sse2.c
index 8723d32836d..e321dbebe39 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_loopfilter_intrin_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_loopfilter_intrin_sse2.c
@@ -9,6 +9,8 @@
*/
#include <emmintrin.h> // SSE2
+
+#include "./vp9_rtcd.h"
#include "vp9/common/vp9_loopfilter.h"
#include "vpx_ports/emmintrin_compat.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
index 3bc7d3918b7..cee8d1e76ac 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
@@ -8,7 +8,14 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+// Due to a header conflict between math.h and intrinsics includes with ceil()
+// in certain configurations under vs9 this include needs to precede
+// immintrin.h.
+#include "./vp9_rtcd.h"
+
#include <immintrin.h>
+
+#include "vp9/common/x86/convolve.h"
#include "vpx_ports/mem.h"
// filters for 16_h8 and 16_v8
@@ -53,23 +60,23 @@ DECLARE_ALIGNED(32, static const uint8_t, filt4_global_avx2[32]) = {
# define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x)
#endif // __clang__
-void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr,
- unsigned int src_pixels_per_line,
- unsigned char *output_ptr,
- unsigned int output_pitch,
- unsigned int output_height,
- int16_t *filter) {
+static void vp9_filter_block1d16_h8_avx2(const uint8_t *src_ptr,
+ ptrdiff_t src_pixels_per_line,
+ uint8_t *output_ptr,
+ ptrdiff_t output_pitch,
+ uint32_t output_height,
+ const int16_t *filter) {
__m128i filtersReg;
__m256i addFilterReg64, filt1Reg, filt2Reg, filt3Reg, filt4Reg;
__m256i firstFilters, secondFilters, thirdFilters, forthFilters;
__m256i srcRegFilt32b1_1, srcRegFilt32b2_1, srcRegFilt32b2, srcRegFilt32b3;
__m256i srcReg32b1, srcReg32b2, filtersReg32;
unsigned int i;
- unsigned int src_stride, dst_stride;
+ ptrdiff_t src_stride, dst_stride;
// create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
addFilterReg64 = _mm256_set1_epi32((int)0x0400040u);
- filtersReg = _mm_loadu_si128((__m128i *)filter);
+ filtersReg = _mm_loadu_si128((const __m128i *)filter);
// converting the 16 bit (short) to 8 bit (byte) and have the same data
// in both lanes of 128 bit register.
filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
@@ -104,9 +111,9 @@ void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr,
for (i = output_height; i > 1; i-=2) {
// load the 2 strides of source
srcReg32b1 = _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(src_ptr-3)));
+ _mm_loadu_si128((const __m128i *)(src_ptr - 3)));
srcReg32b1 = _mm256_inserti128_si256(srcReg32b1,
- _mm_loadu_si128((__m128i *)
+ _mm_loadu_si128((const __m128i *)
(src_ptr+src_pixels_per_line-3)), 1);
// filter the source buffer
@@ -135,9 +142,9 @@ void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr,
// reading 2 strides of the next 16 bytes
// (part of it was being read by earlier read)
srcReg32b2 = _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(src_ptr+5)));
+ _mm_loadu_si128((const __m128i *)(src_ptr + 5)));
srcReg32b2 = _mm256_inserti128_si256(srcReg32b2,
- _mm_loadu_si128((__m128i *)
+ _mm_loadu_si128((const __m128i *)
(src_ptr+src_pixels_per_line+5)), 1);
// add and saturate the results together
@@ -202,7 +209,7 @@ void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr,
__m128i srcReg1, srcReg2, srcRegFilt1_1, srcRegFilt2_1;
__m128i srcRegFilt2, srcRegFilt3;
- srcReg1 = _mm_loadu_si128((__m128i *)(src_ptr-3));
+ srcReg1 = _mm_loadu_si128((const __m128i *)(src_ptr - 3));
// filter the source buffer
srcRegFilt1_1 = _mm_shuffle_epi8(srcReg1,
@@ -237,7 +244,7 @@ void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr,
// reading the next 16 bytes
// (part of it was being read by earlier read)
- srcReg2 = _mm_loadu_si128((__m128i *)(src_ptr+5));
+ srcReg2 = _mm_loadu_si128((const __m128i *)(src_ptr + 5));
// add and saturate the results together
srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1,
@@ -297,12 +304,12 @@ void vp9_filter_block1d16_h8_avx2(unsigned char *src_ptr,
}
}
-void vp9_filter_block1d16_v8_avx2(unsigned char *src_ptr,
- unsigned int src_pitch,
- unsigned char *output_ptr,
- unsigned int out_pitch,
- unsigned int output_height,
- int16_t *filter) {
+static void vp9_filter_block1d16_v8_avx2(const uint8_t *src_ptr,
+ ptrdiff_t src_pitch,
+ uint8_t *output_ptr,
+ ptrdiff_t out_pitch,
+ uint32_t output_height,
+ const int16_t *filter) {
__m128i filtersReg;
__m256i addFilterReg64;
__m256i srcReg32b1, srcReg32b2, srcReg32b3, srcReg32b4, srcReg32b5;
@@ -310,11 +317,11 @@ void vp9_filter_block1d16_v8_avx2(unsigned char *src_ptr,
__m256i srcReg32b11, srcReg32b12, filtersReg32;
__m256i firstFilters, secondFilters, thirdFilters, forthFilters;
unsigned int i;
- unsigned int src_stride, dst_stride;
+ ptrdiff_t src_stride, dst_stride;
// create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
addFilterReg64 = _mm256_set1_epi32((int)0x0400040u);
- filtersReg = _mm_loadu_si128((__m128i *)filter);
+ filtersReg = _mm_loadu_si128((const __m128i *)filter);
// converting the 16 bit (short) to 8 bit (byte) and have the
// same data in both lanes of 128 bit register.
filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
@@ -344,19 +351,19 @@ void vp9_filter_block1d16_v8_avx2(unsigned char *src_ptr,
// load 16 bytes 7 times in stride of src_pitch
srcReg32b1 = _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(src_ptr)));
+ _mm_loadu_si128((const __m128i *)(src_ptr)));
srcReg32b2 = _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(src_ptr+src_pitch)));
+ _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch)));
srcReg32b3 = _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*2)));
+ _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 2)));
srcReg32b4 = _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*3)));
+ _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 3)));
srcReg32b5 = _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*4)));
+ _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 4)));
srcReg32b6 = _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*5)));
+ _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 5)));
srcReg32b7 = _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*6)));
+ _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 6)));
// have each consecutive loads on the same 256 register
srcReg32b1 = _mm256_inserti128_si256(srcReg32b1,
@@ -393,11 +400,11 @@ void vp9_filter_block1d16_v8_avx2(unsigned char *src_ptr,
// load the last 2 loads of 16 bytes and have every two
// consecutive loads in the same 256 bit register
srcReg32b8 = _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*7)));
+ _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 7)));
srcReg32b7 = _mm256_inserti128_si256(srcReg32b7,
_mm256_castsi256_si128(srcReg32b8), 1);
srcReg32b9 = _mm256_castsi128_si256(
- _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*8)));
+ _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 8)));
srcReg32b8 = _mm256_inserti128_si256(srcReg32b8,
_mm256_castsi256_si128(srcReg32b9), 1);
@@ -476,7 +483,7 @@ void vp9_filter_block1d16_v8_avx2(unsigned char *src_ptr,
__m128i srcRegFilt1, srcRegFilt3, srcRegFilt4, srcRegFilt5;
__m128i srcRegFilt6, srcRegFilt7, srcRegFilt8;
// load the last 16 bytes
- srcRegFilt8 = _mm_loadu_si128((__m128i *)(src_ptr+src_pitch*7));
+ srcRegFilt8 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 7));
// merge the last 2 results together
srcRegFilt4 = _mm_unpacklo_epi8(
@@ -542,3 +549,54 @@ void vp9_filter_block1d16_v8_avx2(unsigned char *src_ptr,
_mm_store_si128((__m128i*)output_ptr, srcRegFilt1);
}
}
+
+#if HAVE_AVX2 && HAVE_SSSE3
+filter8_1dfunction vp9_filter_block1d4_v8_ssse3;
+#if ARCH_X86_64
+filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3;
+filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3;
+filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3;
+#define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_intrin_ssse3
+#define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_intrin_ssse3
+#define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_intrin_ssse3
+#else // ARCH_X86
+filter8_1dfunction vp9_filter_block1d8_v8_ssse3;
+filter8_1dfunction vp9_filter_block1d8_h8_ssse3;
+filter8_1dfunction vp9_filter_block1d4_h8_ssse3;
+#define vp9_filter_block1d8_v8_avx2 vp9_filter_block1d8_v8_ssse3
+#define vp9_filter_block1d8_h8_avx2 vp9_filter_block1d8_h8_ssse3
+#define vp9_filter_block1d4_h8_avx2 vp9_filter_block1d4_h8_ssse3
+#endif // ARCH_X86_64
+filter8_1dfunction vp9_filter_block1d16_v2_ssse3;
+filter8_1dfunction vp9_filter_block1d16_h2_ssse3;
+filter8_1dfunction vp9_filter_block1d8_v2_ssse3;
+filter8_1dfunction vp9_filter_block1d8_h2_ssse3;
+filter8_1dfunction vp9_filter_block1d4_v2_ssse3;
+filter8_1dfunction vp9_filter_block1d4_h2_ssse3;
+#define vp9_filter_block1d4_v8_avx2 vp9_filter_block1d4_v8_ssse3
+#define vp9_filter_block1d16_v2_avx2 vp9_filter_block1d16_v2_ssse3
+#define vp9_filter_block1d16_h2_avx2 vp9_filter_block1d16_h2_ssse3
+#define vp9_filter_block1d8_v2_avx2 vp9_filter_block1d8_v2_ssse3
+#define vp9_filter_block1d8_h2_avx2 vp9_filter_block1d8_h2_ssse3
+#define vp9_filter_block1d4_v2_avx2 vp9_filter_block1d4_v2_ssse3
+#define vp9_filter_block1d4_h2_avx2 vp9_filter_block1d4_h2_ssse3
+// void vp9_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+// void vp9_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , avx2);
+FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , avx2);
+
+// void vp9_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+FUN_CONV_2D(, avx2);
+#endif // HAVE_AX2 && HAVE_SSSE3
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c
index 71dbb402dd4..5fd2857e140 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_ssse3.c
@@ -8,7 +8,14 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+// Due to a header conflict between math.h and intrinsics includes with ceil()
+// in certain configurations under vs9 this include needs to precede
+// tmmintrin.h.
+#include "./vp9_rtcd.h"
+
#include <tmmintrin.h>
+
+#include "vp9/common/x86/convolve.h"
#include "vpx_ports/mem.h"
#include "vpx_ports/emmintrin_compat.h"
@@ -38,12 +45,17 @@ DECLARE_ALIGNED(16, static const uint8_t, filt4_global[16]) = {
6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14
};
-void vp9_filter_block1d4_h8_intrin_ssse3(unsigned char *src_ptr,
- unsigned int src_pixels_per_line,
- unsigned char *output_ptr,
- unsigned int output_pitch,
- unsigned int output_height,
- int16_t *filter) {
+// These are reused by the avx2 intrinsics.
+filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3;
+filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3;
+filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3;
+
+void vp9_filter_block1d4_h8_intrin_ssse3(const uint8_t *src_ptr,
+ ptrdiff_t src_pixels_per_line,
+ uint8_t *output_ptr,
+ ptrdiff_t output_pitch,
+ uint32_t output_height,
+ const int16_t *filter) {
__m128i firstFilters, secondFilters, shuffle1, shuffle2;
__m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt4;
__m128i addFilterReg64, filtersReg, srcReg, minReg;
@@ -51,7 +63,7 @@ void vp9_filter_block1d4_h8_intrin_ssse3(unsigned char *src_ptr,
// create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
addFilterReg64 =_mm_set1_epi32((int)0x0400040u);
- filtersReg = _mm_loadu_si128((__m128i *)filter);
+ filtersReg = _mm_loadu_si128((const __m128i *)filter);
// converting the 16 bit (short) to 8 bit (byte) and have the same data
// in both lanes of 128 bit register.
filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
@@ -72,7 +84,7 @@ void vp9_filter_block1d4_h8_intrin_ssse3(unsigned char *src_ptr,
shuffle2 = _mm_load_si128((__m128i const *)filt2_4_h8);
for (i = 0; i < output_height; i++) {
- srcReg = _mm_loadu_si128((__m128i *)(src_ptr-3));
+ srcReg = _mm_loadu_si128((const __m128i *)(src_ptr - 3));
// filter the source buffer
srcRegFilt1= _mm_shuffle_epi8(srcReg, shuffle1);
@@ -109,12 +121,12 @@ void vp9_filter_block1d4_h8_intrin_ssse3(unsigned char *src_ptr,
}
}
-void vp9_filter_block1d8_h8_intrin_ssse3(unsigned char *src_ptr,
- unsigned int src_pixels_per_line,
- unsigned char *output_ptr,
- unsigned int output_pitch,
- unsigned int output_height,
- int16_t *filter) {
+void vp9_filter_block1d8_h8_intrin_ssse3(const uint8_t *src_ptr,
+ ptrdiff_t src_pixels_per_line,
+ uint8_t *output_ptr,
+ ptrdiff_t output_pitch,
+ uint32_t output_height,
+ const int16_t *filter) {
__m128i firstFilters, secondFilters, thirdFilters, forthFilters, srcReg;
__m128i filt1Reg, filt2Reg, filt3Reg, filt4Reg;
__m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt4;
@@ -123,7 +135,7 @@ void vp9_filter_block1d8_h8_intrin_ssse3(unsigned char *src_ptr,
// create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
addFilterReg64 = _mm_set1_epi32((int)0x0400040u);
- filtersReg = _mm_loadu_si128((__m128i *)filter);
+ filtersReg = _mm_loadu_si128((const __m128i *)filter);
// converting the 16 bit (short) to 8 bit (byte) and have the same data
// in both lanes of 128 bit register.
filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
@@ -147,7 +159,7 @@ void vp9_filter_block1d8_h8_intrin_ssse3(unsigned char *src_ptr,
filt4Reg = _mm_load_si128((__m128i const *)filt4_global);
for (i = 0; i < output_height; i++) {
- srcReg = _mm_loadu_si128((__m128i *)(src_ptr-3));
+ srcReg = _mm_loadu_si128((const __m128i *)(src_ptr - 3));
// filter the source buffer
srcRegFilt1= _mm_shuffle_epi8(srcReg, filt1Reg);
@@ -189,12 +201,12 @@ void vp9_filter_block1d8_h8_intrin_ssse3(unsigned char *src_ptr,
}
}
-void vp9_filter_block1d16_h8_intrin_ssse3(unsigned char *src_ptr,
- unsigned int src_pixels_per_line,
- unsigned char *output_ptr,
- unsigned int output_pitch,
- unsigned int output_height,
- int16_t *filter) {
+static void vp9_filter_block1d16_h8_intrin_ssse3(const uint8_t *src_ptr,
+ ptrdiff_t src_pixels_per_line,
+ uint8_t *output_ptr,
+ ptrdiff_t output_pitch,
+ uint32_t output_height,
+ const int16_t *filter) {
__m128i addFilterReg64, filtersReg, srcReg1, srcReg2;
__m128i filt1Reg, filt2Reg, filt3Reg, filt4Reg;
__m128i firstFilters, secondFilters, thirdFilters, forthFilters;
@@ -203,7 +215,7 @@ void vp9_filter_block1d16_h8_intrin_ssse3(unsigned char *src_ptr,
// create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
addFilterReg64 = _mm_set1_epi32((int)0x0400040u);
- filtersReg = _mm_loadu_si128((__m128i *)filter);
+ filtersReg = _mm_loadu_si128((const __m128i *)filter);
// converting the 16 bit (short) to 8 bit (byte) and have the same data
// in both lanes of 128 bit register.
filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
@@ -227,7 +239,7 @@ void vp9_filter_block1d16_h8_intrin_ssse3(unsigned char *src_ptr,
filt4Reg = _mm_load_si128((__m128i const *)filt4_global);
for (i = 0; i < output_height; i++) {
- srcReg1 = _mm_loadu_si128((__m128i *)(src_ptr-3));
+ srcReg1 = _mm_loadu_si128((const __m128i *)(src_ptr - 3));
// filter the source buffer
srcRegFilt1_1= _mm_shuffle_epi8(srcReg1, filt1Reg);
@@ -254,7 +266,7 @@ void vp9_filter_block1d16_h8_intrin_ssse3(unsigned char *src_ptr,
// reading the next 16 bytes.
// (part of it was being read by earlier read)
- srcReg2 = _mm_loadu_si128((__m128i *)(src_ptr+5));
+ srcReg2 = _mm_loadu_si128((const __m128i *)(src_ptr + 5));
// add and saturate the results together
srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1,
@@ -306,12 +318,12 @@ void vp9_filter_block1d16_h8_intrin_ssse3(unsigned char *src_ptr,
}
}
-void vp9_filter_block1d8_v8_intrin_ssse3(unsigned char *src_ptr,
- unsigned int src_pitch,
- unsigned char *output_ptr,
- unsigned int out_pitch,
- unsigned int output_height,
- int16_t *filter) {
+void vp9_filter_block1d8_v8_intrin_ssse3(const uint8_t *src_ptr,
+ ptrdiff_t src_pitch,
+ uint8_t *output_ptr,
+ ptrdiff_t out_pitch,
+ uint32_t output_height,
+ const int16_t *filter) {
__m128i addFilterReg64, filtersReg, minReg;
__m128i firstFilters, secondFilters, thirdFilters, forthFilters;
__m128i srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt5;
@@ -321,7 +333,7 @@ void vp9_filter_block1d8_v8_intrin_ssse3(unsigned char *src_ptr,
// create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
addFilterReg64 = _mm_set1_epi32((int)0x0400040u);
- filtersReg = _mm_loadu_si128((__m128i *)filter);
+ filtersReg = _mm_loadu_si128((const __m128i *)filter);
// converting the 16 bit (short) to 8 bit (byte) and have the same data
// in both lanes of 128 bit register.
filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
@@ -336,17 +348,17 @@ void vp9_filter_block1d8_v8_intrin_ssse3(unsigned char *src_ptr,
forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u));
// load the first 7 rows of 8 bytes
- srcReg1 = _mm_loadl_epi64((__m128i *)&src_ptr[0]);
- srcReg2 = _mm_loadl_epi64((__m128i *)&(src_ptr + src_pitch)[0]);
- srcReg3 = _mm_loadl_epi64((__m128i *)&(src_ptr + src_pitch * 2)[0]);
- srcReg4 = _mm_loadl_epi64((__m128i *)&(src_ptr + src_pitch * 3)[0]);
- srcReg5 = _mm_loadl_epi64((__m128i *)&(src_ptr + src_pitch * 4)[0]);
- srcReg6 = _mm_loadl_epi64((__m128i *)&(src_ptr + src_pitch * 5)[0]);
- srcReg7 = _mm_loadl_epi64((__m128i *)&(src_ptr + src_pitch * 6)[0]);
+ srcReg1 = _mm_loadl_epi64((const __m128i *)src_ptr);
+ srcReg2 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch));
+ srcReg3 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 2));
+ srcReg4 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 3));
+ srcReg5 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 4));
+ srcReg6 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 5));
+ srcReg7 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 6));
for (i = 0; i < output_height; i++) {
// load the last 8 bytes
- srcReg8 = _mm_loadl_epi64((__m128i *)&(src_ptr + src_pitch * 7)[0]);
+ srcReg8 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 7));
// merge the result together
srcRegFilt1 = _mm_unpacklo_epi8(srcReg1, srcReg2);
@@ -394,12 +406,12 @@ void vp9_filter_block1d8_v8_intrin_ssse3(unsigned char *src_ptr,
}
}
-void vp9_filter_block1d16_v8_intrin_ssse3(unsigned char *src_ptr,
- unsigned int src_pitch,
- unsigned char *output_ptr,
- unsigned int out_pitch,
- unsigned int output_height,
- int16_t *filter) {
+static void vp9_filter_block1d16_v8_intrin_ssse3(const uint8_t *src_ptr,
+ ptrdiff_t src_pitch,
+ uint8_t *output_ptr,
+ ptrdiff_t out_pitch,
+ uint32_t output_height,
+ const int16_t *filter) {
__m128i addFilterReg64, filtersReg, srcRegFilt1, srcRegFilt3;
__m128i firstFilters, secondFilters, thirdFilters, forthFilters;
__m128i srcRegFilt5, srcRegFilt6, srcRegFilt7, srcRegFilt8;
@@ -409,7 +421,7 @@ void vp9_filter_block1d16_v8_intrin_ssse3(unsigned char *src_ptr,
// create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
addFilterReg64 = _mm_set1_epi32((int)0x0400040u);
- filtersReg = _mm_loadu_si128((__m128i *)filter);
+ filtersReg = _mm_loadu_si128((const __m128i *)filter);
// converting the 16 bit (short) to 8 bit (byte) and have the same data
// in both lanes of 128 bit register.
filtersReg =_mm_packs_epi16(filtersReg, filtersReg);
@@ -424,17 +436,17 @@ void vp9_filter_block1d16_v8_intrin_ssse3(unsigned char *src_ptr,
forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u));
// load the first 7 rows of 16 bytes
- srcReg1 = _mm_loadu_si128((__m128i *)(src_ptr));
- srcReg2 = _mm_loadu_si128((__m128i *)(src_ptr + src_pitch));
- srcReg3 = _mm_loadu_si128((__m128i *)(src_ptr + src_pitch * 2));
- srcReg4 = _mm_loadu_si128((__m128i *)(src_ptr + src_pitch * 3));
- srcReg5 = _mm_loadu_si128((__m128i *)(src_ptr + src_pitch * 4));
- srcReg6 = _mm_loadu_si128((__m128i *)(src_ptr + src_pitch * 5));
- srcReg7 = _mm_loadu_si128((__m128i *)(src_ptr + src_pitch * 6));
+ srcReg1 = _mm_loadu_si128((const __m128i *)(src_ptr));
+ srcReg2 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch));
+ srcReg3 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 2));
+ srcReg4 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 3));
+ srcReg5 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 4));
+ srcReg6 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 5));
+ srcReg7 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 6));
for (i = 0; i < output_height; i++) {
// load the last 16 bytes
- srcReg8 = _mm_loadu_si128((__m128i *)(src_ptr + src_pitch * 7));
+ srcReg8 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 7));
// merge the result together
srcRegFilt5 = _mm_unpacklo_epi8(srcReg1, srcReg2);
@@ -508,3 +520,82 @@ void vp9_filter_block1d16_v8_intrin_ssse3(unsigned char *src_ptr,
output_ptr+=out_pitch;
}
}
+
+#if ARCH_X86_64
+filter8_1dfunction vp9_filter_block1d16_v8_intrin_ssse3;
+filter8_1dfunction vp9_filter_block1d16_h8_intrin_ssse3;
+filter8_1dfunction vp9_filter_block1d8_v8_intrin_ssse3;
+filter8_1dfunction vp9_filter_block1d8_h8_intrin_ssse3;
+filter8_1dfunction vp9_filter_block1d4_v8_ssse3;
+filter8_1dfunction vp9_filter_block1d4_h8_intrin_ssse3;
+#define vp9_filter_block1d16_v8_ssse3 vp9_filter_block1d16_v8_intrin_ssse3
+#define vp9_filter_block1d16_h8_ssse3 vp9_filter_block1d16_h8_intrin_ssse3
+#define vp9_filter_block1d8_v8_ssse3 vp9_filter_block1d8_v8_intrin_ssse3
+#define vp9_filter_block1d8_h8_ssse3 vp9_filter_block1d8_h8_intrin_ssse3
+#define vp9_filter_block1d4_h8_ssse3 vp9_filter_block1d4_h8_intrin_ssse3
+#else // ARCH_X86
+filter8_1dfunction vp9_filter_block1d16_v8_ssse3;
+filter8_1dfunction vp9_filter_block1d16_h8_ssse3;
+filter8_1dfunction vp9_filter_block1d8_v8_ssse3;
+filter8_1dfunction vp9_filter_block1d8_h8_ssse3;
+filter8_1dfunction vp9_filter_block1d4_v8_ssse3;
+filter8_1dfunction vp9_filter_block1d4_h8_ssse3;
+#endif // ARCH_X86_64
+filter8_1dfunction vp9_filter_block1d16_v8_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d16_h8_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d8_v8_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d8_h8_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d4_v8_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d4_h8_avg_ssse3;
+
+filter8_1dfunction vp9_filter_block1d16_v2_ssse3;
+filter8_1dfunction vp9_filter_block1d16_h2_ssse3;
+filter8_1dfunction vp9_filter_block1d8_v2_ssse3;
+filter8_1dfunction vp9_filter_block1d8_h2_ssse3;
+filter8_1dfunction vp9_filter_block1d4_v2_ssse3;
+filter8_1dfunction vp9_filter_block1d4_h2_ssse3;
+filter8_1dfunction vp9_filter_block1d16_v2_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d16_h2_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d8_v2_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d8_h2_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d4_v2_avg_ssse3;
+filter8_1dfunction vp9_filter_block1d4_h2_avg_ssse3;
+
+// void vp9_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+// void vp9_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+// void vp9_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+// void vp9_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , ssse3);
+FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , ssse3);
+FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, ssse3);
+FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_,
+ ssse3);
+
+// void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+// void vp9_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride,
+// uint8_t *dst, ptrdiff_t dst_stride,
+// const int16_t *filter_x, int x_step_q4,
+// const int16_t *filter_y, int y_step_q4,
+// int w, int h);
+FUN_CONV_2D(, ssse3);
+FUN_CONV_2D(avg_ , ssse3);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.c b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.c
index eb9b7971074..9311d8dad7d 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.c
@@ -15,6 +15,7 @@
#include "./vpx_scale_rtcd.h"
#include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
#include "vpx_ports/mem_ops.h"
#include "vpx_scale/vpx_scale.h"
@@ -289,9 +290,7 @@ static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block,
}
struct intra_args {
- VP9_COMMON *cm;
MACROBLOCKD *xd;
- FRAME_COUNTS *counts;
vp9_reader *r;
int seg_id;
};
@@ -300,7 +299,6 @@ static void predict_and_reconstruct_intra_block(int plane, int block,
BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
struct intra_args *const args = (struct intra_args *)arg;
- VP9_COMMON *const cm = args->cm;
MACROBLOCKD *const xd = args->xd;
struct macroblockd_plane *const pd = &xd->plane[plane];
MODE_INFO *const mi = xd->mi[0];
@@ -317,7 +315,7 @@ static void predict_and_reconstruct_intra_block(int plane, int block,
x, y, plane);
if (!mi->mbmi.skip) {
- const int eob = vp9_decode_block_tokens(cm, xd, args->counts, plane, block,
+ const int eob = vp9_decode_block_tokens(xd, plane, block,
plane_bsize, x, y, tx_size,
args->r, args->seg_id);
inverse_transform_block(xd, plane, block, tx_size, dst, pd->dst.stride,
@@ -326,10 +324,8 @@ static void predict_and_reconstruct_intra_block(int plane, int block,
}
struct inter_args {
- VP9_COMMON *cm;
MACROBLOCKD *xd;
vp9_reader *r;
- FRAME_COUNTS *counts;
int *eobtotal;
int seg_id;
};
@@ -338,12 +334,11 @@ static void reconstruct_inter_block(int plane, int block,
BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
struct inter_args *args = (struct inter_args *)arg;
- VP9_COMMON *const cm = args->cm;
MACROBLOCKD *const xd = args->xd;
struct macroblockd_plane *const pd = &xd->plane[plane];
int x, y, eob;
txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y);
- eob = vp9_decode_block_tokens(cm, xd, args->counts, plane, block, plane_bsize,
+ eob = vp9_decode_block_tokens(xd, plane, block, plane_bsize,
x, y, tx_size, args->r, args->seg_id);
inverse_transform_block(xd, plane, block, tx_size,
&pd->dst.buf[4 * y * pd->dst.stride + 4 * x],
@@ -351,6 +346,357 @@ static void reconstruct_inter_block(int plane, int block,
*args->eobtotal += eob;
}
+static void build_mc_border(const uint8_t *src, int src_stride,
+ uint8_t *dst, int dst_stride,
+ int x, int y, int b_w, int b_h, int w, int h) {
+ // Get a pointer to the start of the real data for this row.
+ const uint8_t *ref_row = src - x - y * src_stride;
+
+ if (y >= h)
+ ref_row += (h - 1) * src_stride;
+ else if (y > 0)
+ ref_row += y * src_stride;
+
+ do {
+ int right = 0, copy;
+ int left = x < 0 ? -x : 0;
+
+ if (left > b_w)
+ left = b_w;
+
+ if (x + b_w > w)
+ right = x + b_w - w;
+
+ if (right > b_w)
+ right = b_w;
+
+ copy = b_w - left - right;
+
+ if (left)
+ memset(dst, ref_row[0], left);
+
+ if (copy)
+ memcpy(dst + left, ref_row + x + left, copy);
+
+ if (right)
+ memset(dst + left + copy, ref_row[w - 1], right);
+
+ dst += dst_stride;
+ ++y;
+
+ if (y > 0 && y < h)
+ ref_row += src_stride;
+ } while (--b_h);
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void high_build_mc_border(const uint8_t *src8, int src_stride,
+ uint16_t *dst, int dst_stride,
+ int x, int y, int b_w, int b_h,
+ int w, int h) {
+ // Get a pointer to the start of the real data for this row.
+ const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ const uint16_t *ref_row = src - x - y * src_stride;
+
+ if (y >= h)
+ ref_row += (h - 1) * src_stride;
+ else if (y > 0)
+ ref_row += y * src_stride;
+
+ do {
+ int right = 0, copy;
+ int left = x < 0 ? -x : 0;
+
+ if (left > b_w)
+ left = b_w;
+
+ if (x + b_w > w)
+ right = x + b_w - w;
+
+ if (right > b_w)
+ right = b_w;
+
+ copy = b_w - left - right;
+
+ if (left)
+ vpx_memset16(dst, ref_row[0], left);
+
+ if (copy)
+ memcpy(dst + left, ref_row + x + left, copy * sizeof(uint16_t));
+
+ if (right)
+ vpx_memset16(dst + left + copy, ref_row[w - 1], right);
+
+ dst += dst_stride;
+ ++y;
+
+ if (y > 0 && y < h)
+ ref_row += src_stride;
+ } while (--b_h);
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride,
+ int x0, int y0, int b_w, int b_h,
+ int frame_width, int frame_height,
+ int border_offset,
+ uint8_t *const dst, int dst_buf_stride,
+ int subpel_x, int subpel_y,
+ const InterpKernel *kernel,
+ const struct scale_factors *sf,
+ MACROBLOCKD *xd,
+ int w, int h, int ref, int xs, int ys) {
+ DECLARE_ALIGNED(16, uint16_t, mc_buf_high[80 * 2 * 80 * 2]);
+ const uint8_t *buf_ptr;
+
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ high_build_mc_border(buf_ptr1, pre_buf_stride, mc_buf_high, b_w,
+ x0, y0, b_w, b_h, frame_width, frame_height);
+ buf_ptr = CONVERT_TO_BYTEPTR(mc_buf_high) + border_offset;
+ } else {
+ build_mc_border(buf_ptr1, pre_buf_stride, (uint8_t *)mc_buf_high, b_w,
+ x0, y0, b_w, b_h, frame_width, frame_height);
+ buf_ptr = ((uint8_t *)mc_buf_high) + border_offset;
+ }
+
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ high_inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x,
+ subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd);
+ } else {
+ inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x,
+ subpel_y, sf, w, h, ref, kernel, xs, ys);
+ }
+}
+#else
+static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride,
+ int x0, int y0, int b_w, int b_h,
+ int frame_width, int frame_height,
+ int border_offset,
+ uint8_t *const dst, int dst_buf_stride,
+ int subpel_x, int subpel_y,
+ const InterpKernel *kernel,
+ const struct scale_factors *sf,
+ int w, int h, int ref, int xs, int ys) {
+ DECLARE_ALIGNED(16, uint8_t, mc_buf[80 * 2 * 80 * 2]);
+ const uint8_t *buf_ptr;
+
+ build_mc_border(buf_ptr1, pre_buf_stride, mc_buf, b_w,
+ x0, y0, b_w, b_h, frame_width, frame_height);
+ buf_ptr = mc_buf + border_offset;
+
+ inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x,
+ subpel_y, sf, w, h, ref, kernel, xs, ys);
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+static void dec_build_inter_predictors(VP9Decoder *const pbi, MACROBLOCKD *xd,
+ int plane, int bw, int bh, int x,
+ int y, int w, int h, int mi_x, int mi_y,
+ const InterpKernel *kernel,
+ const struct scale_factors *sf,
+ struct buf_2d *pre_buf,
+ struct buf_2d *dst_buf, const MV* mv,
+ RefCntBuffer *ref_frame_buf,
+ int is_scaled, int ref) {
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
+ MV32 scaled_mv;
+ int xs, ys, x0, y0, x0_16, y0_16, frame_width, frame_height,
+ buf_stride, subpel_x, subpel_y;
+ uint8_t *ref_frame, *buf_ptr;
+
+ // Get reference frame pointer, width and height.
+ if (plane == 0) {
+ frame_width = ref_frame_buf->buf.y_crop_width;
+ frame_height = ref_frame_buf->buf.y_crop_height;
+ ref_frame = ref_frame_buf->buf.y_buffer;
+ } else {
+ frame_width = ref_frame_buf->buf.uv_crop_width;
+ frame_height = ref_frame_buf->buf.uv_crop_height;
+ ref_frame = plane == 1 ? ref_frame_buf->buf.u_buffer
+ : ref_frame_buf->buf.v_buffer;
+ }
+
+ if (is_scaled) {
+ const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, mv, bw, bh,
+ pd->subsampling_x,
+ pd->subsampling_y);
+ // Co-ordinate of containing block to pixel precision.
+ int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x));
+ int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y));
+
+ // Co-ordinate of the block to 1/16th pixel precision.
+ x0_16 = (x_start + x) << SUBPEL_BITS;
+ y0_16 = (y_start + y) << SUBPEL_BITS;
+
+ // Co-ordinate of current block in reference frame
+ // to 1/16th pixel precision.
+ x0_16 = sf->scale_value_x(x0_16, sf);
+ y0_16 = sf->scale_value_y(y0_16, sf);
+
+ // Map the top left corner of the block into the reference frame.
+ x0 = sf->scale_value_x(x_start + x, sf);
+ y0 = sf->scale_value_y(y_start + y, sf);
+
+ // Scale the MV and incorporate the sub-pixel offset of the block
+ // in the reference frame.
+ scaled_mv = vp9_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf);
+ xs = sf->x_step_q4;
+ ys = sf->y_step_q4;
+ } else {
+ // Co-ordinate of containing block to pixel precision.
+ x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
+ y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
+
+ // Co-ordinate of the block to 1/16th pixel precision.
+ x0_16 = x0 << SUBPEL_BITS;
+ y0_16 = y0 << SUBPEL_BITS;
+
+ scaled_mv.row = mv->row * (1 << (1 - pd->subsampling_y));
+ scaled_mv.col = mv->col * (1 << (1 - pd->subsampling_x));
+ xs = ys = 16;
+ }
+ subpel_x = scaled_mv.col & SUBPEL_MASK;
+ subpel_y = scaled_mv.row & SUBPEL_MASK;
+
+ // Calculate the top left corner of the best matching block in the
+ // reference frame.
+ x0 += scaled_mv.col >> SUBPEL_BITS;
+ y0 += scaled_mv.row >> SUBPEL_BITS;
+ x0_16 += scaled_mv.col;
+ y0_16 += scaled_mv.row;
+
+ // Get reference block pointer.
+ buf_ptr = ref_frame + y0 * pre_buf->stride + x0;
+ buf_stride = pre_buf->stride;
+
+ // Do border extension if there is motion or the
+ // width/height is not a multiple of 8 pixels.
+ if (is_scaled || scaled_mv.col || scaled_mv.row ||
+ (frame_width & 0x7) || (frame_height & 0x7)) {
+ int y1 = (y0_16 + (h - 1) * ys) >> SUBPEL_BITS;
+
+ // Get reference block bottom right horizontal coordinate.
+ int x1 = (x0_16 + (w - 1) * xs) >> SUBPEL_BITS;
+ int x_pad = 0, y_pad = 0;
+
+ if (subpel_x || (sf->x_step_q4 != SUBPEL_SHIFTS)) {
+ x0 -= VP9_INTERP_EXTEND - 1;
+ x1 += VP9_INTERP_EXTEND;
+ x_pad = 1;
+ }
+
+ if (subpel_y || (sf->y_step_q4 != SUBPEL_SHIFTS)) {
+ y0 -= VP9_INTERP_EXTEND - 1;
+ y1 += VP9_INTERP_EXTEND;
+ y_pad = 1;
+ }
+
+ // Wait until reference block is ready. Pad 7 more pixels as last 7
+ // pixels of each superblock row can be changed by next superblock row.
+ if (pbi->frame_parallel_decode)
+ vp9_frameworker_wait(pbi->frame_worker_owner, ref_frame_buf,
+ MAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1));
+
+ // Skip border extension if block is inside the frame.
+ if (x0 < 0 || x0 > frame_width - 1 || x1 < 0 || x1 > frame_width - 1 ||
+ y0 < 0 || y0 > frame_height - 1 || y1 < 0 || y1 > frame_height - 1) {
+ // Extend the border.
+ const uint8_t *const buf_ptr1 = ref_frame + y0 * buf_stride + x0;
+ const int b_w = x1 - x0 + 1;
+ const int b_h = y1 - y0 + 1;
+ const int border_offset = y_pad * 3 * b_w + x_pad * 3;
+
+ extend_and_predict(buf_ptr1, buf_stride, x0, y0, b_w, b_h,
+ frame_width, frame_height, border_offset,
+ dst, dst_buf->stride,
+ subpel_x, subpel_y,
+ kernel, sf,
+#if CONFIG_VP9_HIGHBITDEPTH
+ xd,
+#endif
+ w, h, ref, xs, ys);
+ return;
+ }
+ } else {
+ // Wait until reference block is ready. Pad 7 more pixels as last 7
+ // pixels of each superblock row can be changed by next superblock row.
+ if (pbi->frame_parallel_decode) {
+ const int y1 = (y0_16 + (h - 1) * ys) >> SUBPEL_BITS;
+ vp9_frameworker_wait(pbi->frame_worker_owner, ref_frame_buf,
+ MAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1));
+ }
+ }
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ high_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
+ subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd);
+ } else {
+ inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
+ subpel_y, sf, w, h, ref, kernel, xs, ys);
+ }
+#else
+ inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
+ subpel_y, sf, w, h, ref, kernel, xs, ys);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+}
+
+static void dec_build_inter_predictors_sb(VP9Decoder *const pbi,
+ MACROBLOCKD *xd,
+ int mi_row, int mi_col,
+ BLOCK_SIZE bsize) {
+ int plane;
+ const int mi_x = mi_col * MI_SIZE;
+ const int mi_y = mi_row * MI_SIZE;
+ const MODE_INFO *mi = xd->mi[0];
+ const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter);
+ const BLOCK_SIZE sb_type = mi->mbmi.sb_type;
+ const int is_compound = has_second_ref(&mi->mbmi);
+
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize,
+ &xd->plane[plane]);
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ struct buf_2d *const dst_buf = &pd->dst;
+ const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
+ const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
+
+ const int bw = 4 * num_4x4_w;
+ const int bh = 4 * num_4x4_h;
+ int ref;
+
+ for (ref = 0; ref < 1 + is_compound; ++ref) {
+ const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
+ struct buf_2d *const pre_buf = &pd->pre[ref];
+ const int idx = xd->block_refs[ref]->idx;
+ BufferPool *const pool = pbi->common.buffer_pool;
+ RefCntBuffer *const ref_frame_buf = &pool->frame_bufs[idx];
+ const int is_scaled = vp9_is_scaled(sf);
+
+ if (sb_type < BLOCK_8X8) {
+ int i = 0, x, y;
+ assert(bsize == BLOCK_8X8);
+ for (y = 0; y < num_4x4_h; ++y) {
+ for (x = 0; x < num_4x4_w; ++x) {
+ const MV mv = average_split_mvs(pd, mi, ref, i++);
+ dec_build_inter_predictors(pbi, xd, plane, bw, bh,
+ 4 * x, 4 * y, 4, 4, mi_x, mi_y, kernel,
+ sf, pre_buf, dst_buf, &mv,
+ ref_frame_buf, is_scaled, ref);
+ }
+ }
+ } else {
+ const MV mv = mi->mbmi.mv[ref].as_mv;
+ dec_build_inter_predictors(pbi, xd, plane, bw, bh,
+ 0, 0, bw, bh, mi_x, mi_y, kernel,
+ sf, pre_buf, dst_buf, &mv, ref_frame_buf,
+ is_scaled, ref);
+ }
+ }
+ }
+}
+
static MB_MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd,
const TileInfo *const tile,
BLOCK_SIZE bsize, int mi_row, int mi_col) {
@@ -380,14 +726,22 @@ static MB_MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd,
}
static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd,
- FRAME_COUNTS *counts,
const TileInfo *const tile,
int mi_row, int mi_col,
vp9_reader *r, BLOCK_SIZE bsize) {
VP9_COMMON *const cm = &pbi->common;
const int less8x8 = bsize < BLOCK_8X8;
MB_MODE_INFO *mbmi = set_offsets(cm, xd, tile, bsize, mi_row, mi_col);
- vp9_read_mode_info(pbi, xd, counts, tile, mi_row, mi_col, r);
+
+ if (bsize >= BLOCK_8X8 && (cm->subsampling_x || cm->subsampling_y)) {
+ const BLOCK_SIZE uv_subsize =
+ ss_size_lookup[bsize][cm->subsampling_x][cm->subsampling_y];
+ if (uv_subsize == BLOCK_INVALID)
+ vpx_internal_error(xd->error_info,
+ VPX_CODEC_CORRUPT_FRAME, "Invalid block size.");
+ }
+
+ vp9_read_mode_info(pbi, xd, tile, mi_row, mi_col, r);
if (less8x8)
bsize = BLOCK_8X8;
@@ -397,17 +751,17 @@ static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd,
}
if (!is_inter_block(mbmi)) {
- struct intra_args arg = {cm, xd, counts, r, mbmi->segment_id};
+ struct intra_args arg = {xd, r, mbmi->segment_id};
vp9_foreach_transformed_block(xd, bsize,
predict_and_reconstruct_intra_block, &arg);
} else {
// Prediction
- vp9_dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col, bsize);
+ dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col, bsize);
// Reconstruction
if (!mbmi->skip) {
int eobtotal = 0;
- struct inter_args arg = {cm, xd, r, counts, &eobtotal, mbmi->segment_id};
+ struct inter_args arg = {xd, r, &eobtotal, mbmi->segment_id};
vp9_foreach_transformed_block(xd, bsize, reconstruct_inter_block, &arg);
if (!less8x8 && eobtotal == 0)
mbmi->skip = 1; // skip loopfilter
@@ -417,14 +771,12 @@ static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd,
xd->corrupted |= vp9_reader_has_error(r);
}
-static PARTITION_TYPE read_partition(VP9_COMMON *cm, MACROBLOCKD *xd,
- FRAME_COUNTS *counts, int hbs,
- int mi_row, int mi_col, BLOCK_SIZE bsize,
- vp9_reader *r) {
+static PARTITION_TYPE read_partition(MACROBLOCKD *xd, int mi_row, int mi_col,
+ BLOCK_SIZE bsize, vp9_reader *r,
+ int has_rows, int has_cols) {
const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
- const vp9_prob *const probs = get_partition_probs(cm, ctx);
- const int has_rows = (mi_row + hbs) < cm->mi_rows;
- const int has_cols = (mi_col + hbs) < cm->mi_cols;
+ const vp9_prob *const probs = get_partition_probs(xd, ctx);
+ FRAME_COUNTS *counts = xd->counts;
PARTITION_TYPE p;
if (has_rows && has_cols)
@@ -436,56 +788,50 @@ static PARTITION_TYPE read_partition(VP9_COMMON *cm, MACROBLOCKD *xd,
else
p = PARTITION_SPLIT;
- if (!cm->frame_parallel_decoding_mode)
+ if (counts)
++counts->partition[ctx][p];
return p;
}
static void decode_partition(VP9Decoder *const pbi, MACROBLOCKD *const xd,
- FRAME_COUNTS *counts,
const TileInfo *const tile,
int mi_row, int mi_col,
vp9_reader* r, BLOCK_SIZE bsize) {
VP9_COMMON *const cm = &pbi->common;
const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2;
PARTITION_TYPE partition;
- BLOCK_SIZE subsize, uv_subsize;
+ BLOCK_SIZE subsize;
+ const int has_rows = (mi_row + hbs) < cm->mi_rows;
+ const int has_cols = (mi_col + hbs) < cm->mi_cols;
if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
return;
- partition = read_partition(cm, xd, counts, hbs, mi_row, mi_col, bsize, r);
+ partition = read_partition(xd, mi_row, mi_col, bsize, r, has_rows, has_cols);
subsize = get_subsize(bsize, partition);
- uv_subsize = ss_size_lookup[subsize][cm->subsampling_x][cm->subsampling_y];
- if (subsize >= BLOCK_8X8 && uv_subsize == BLOCK_INVALID)
- vpx_internal_error(xd->error_info,
- VPX_CODEC_CORRUPT_FRAME, "Invalid block size.");
- if (subsize < BLOCK_8X8) {
- decode_block(pbi, xd, counts, tile, mi_row, mi_col, r, subsize);
+ if (bsize == BLOCK_8X8) {
+ decode_block(pbi, xd, tile, mi_row, mi_col, r, subsize);
} else {
switch (partition) {
case PARTITION_NONE:
- decode_block(pbi, xd, counts, tile, mi_row, mi_col, r, subsize);
+ decode_block(pbi, xd, tile, mi_row, mi_col, r, subsize);
break;
case PARTITION_HORZ:
- decode_block(pbi, xd, counts, tile, mi_row, mi_col, r, subsize);
- if (mi_row + hbs < cm->mi_rows)
- decode_block(pbi, xd, counts, tile, mi_row + hbs, mi_col, r, subsize);
+ decode_block(pbi, xd, tile, mi_row, mi_col, r, subsize);
+ if (has_rows)
+ decode_block(pbi, xd, tile, mi_row + hbs, mi_col, r, subsize);
break;
case PARTITION_VERT:
- decode_block(pbi, xd, counts, tile, mi_row, mi_col, r, subsize);
- if (mi_col + hbs < cm->mi_cols)
- decode_block(pbi, xd, counts, tile, mi_row, mi_col + hbs, r, subsize);
+ decode_block(pbi, xd, tile, mi_row, mi_col, r, subsize);
+ if (has_cols)
+ decode_block(pbi, xd, tile, mi_row, mi_col + hbs, r, subsize);
break;
case PARTITION_SPLIT:
- decode_partition(pbi, xd, counts, tile, mi_row, mi_col, r, subsize);
- decode_partition(pbi, xd, counts, tile, mi_row, mi_col + hbs, r,
- subsize);
- decode_partition(pbi, xd, counts, tile, mi_row + hbs, mi_col, r,
- subsize);
- decode_partition(pbi, xd, counts, tile, mi_row + hbs, mi_col + hbs, r,
- subsize);
+ decode_partition(pbi, xd, tile, mi_row, mi_col, r, subsize);
+ decode_partition(pbi, xd, tile, mi_row, mi_col + hbs, r, subsize);
+ decode_partition(pbi, xd, tile, mi_row + hbs, mi_col, r, subsize);
+ decode_partition(pbi, xd, tile, mi_row + hbs, mi_col + hbs, r, subsize);
break;
default:
assert(0 && "Invalid partition type");
@@ -673,12 +1019,6 @@ static INTERP_FILTER read_interp_filter(struct vp9_read_bit_buffer *rb) {
: literal_to_filter[vp9_rb_read_literal(rb, 2)];
}
-void vp9_read_frame_size(struct vp9_read_bit_buffer *rb,
- int *width, int *height) {
- *width = vp9_rb_read_literal(rb, 16) + 1;
- *height = vp9_rb_read_literal(rb, 16) + 1;
-}
-
static void setup_display_size(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
cm->display_width = cm->width;
cm->display_height = cm->height;
@@ -698,7 +1038,8 @@ static void resize_context_buffers(VP9_COMMON *cm, int width, int height) {
#if CONFIG_SIZE_LIMIT
if (width > DECODE_WIDTH_LIMIT || height > DECODE_HEIGHT_LIMIT)
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
- "Width and height beyond allowed size.");
+ "Dimensions of %dx%d beyond allowed size of %dx%d.",
+ width, height, DECODE_WIDTH_LIMIT, DECODE_HEIGHT_LIMIT);
#endif
if (cm->width != width || cm->height != height) {
const int new_mi_rows =
@@ -929,7 +1270,8 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
int mi_row, mi_col;
TileData *tile_data = NULL;
- if (cm->lf.filter_level && pbi->lf_worker.data1 == NULL) {
+ if (cm->lf.filter_level && !cm->skip_loop_filter &&
+ pbi->lf_worker.data1 == NULL) {
CHECK_MEM_ERROR(cm, pbi->lf_worker.data1,
vpx_memalign(32, sizeof(LFWorkerData)));
pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker;
@@ -939,7 +1281,7 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
}
}
- if (cm->lf.filter_level) {
+ if (cm->lf.filter_level && !cm->skip_loop_filter) {
LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
// Be sure to sync as we might be resuming after a failed frame decode.
winterface->sync(&pbi->lf_worker);
@@ -979,6 +1321,8 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
tile_data->cm = cm;
tile_data->xd = pbi->mb;
tile_data->xd.corrupted = 0;
+ tile_data->xd.counts = cm->frame_parallel_decoding_mode ?
+ NULL : &cm->counts;
vp9_tile_init(&tile, tile_data->cm, tile_row, tile_col);
setup_token_decoder(buf->data, data_end, buf->size, &cm->error,
&tile_data->bit_reader, pbi->decrypt_cb,
@@ -1001,7 +1345,7 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
vp9_zero(tile_data->xd.left_seg_context);
for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end;
mi_col += MI_BLOCK_SIZE) {
- decode_partition(pbi, &tile_data->xd, &cm->counts, &tile, mi_row,
+ decode_partition(pbi, &tile_data->xd, &tile, mi_row,
mi_col, &tile_data->bit_reader, BLOCK_64X64);
}
pbi->mb.corrupted |= tile_data->xd.corrupted;
@@ -1010,7 +1354,7 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
"Failed to decode tile data");
}
// Loopfilter one row.
- if (cm->lf.filter_level) {
+ if (cm->lf.filter_level && !cm->skip_loop_filter) {
const int lf_start = mi_row - MI_BLOCK_SIZE;
LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
@@ -1039,7 +1383,7 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi,
}
// Loopfilter remaining rows in the frame.
- if (cm->lf.filter_level) {
+ if (cm->lf.filter_level && !cm->skip_loop_filter) {
LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1;
winterface->sync(&pbi->lf_worker);
lf_data->start = lf_data->stop;
@@ -1074,7 +1418,7 @@ static int tile_worker_hook(TileWorkerData *const tile_data,
vp9_zero(tile_data->xd.left_seg_context);
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
mi_col += MI_BLOCK_SIZE) {
- decode_partition(tile_data->pbi, &tile_data->xd, &tile_data->counts,
+ decode_partition(tile_data->pbi, &tile_data->xd,
tile, mi_row, mi_col, &tile_data->bit_reader,
BLOCK_64X64);
}
@@ -1112,8 +1456,6 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi,
if (pbi->num_tile_workers == 0) {
const int num_threads = pbi->max_threads & ~1;
int i;
- // TODO(jzern): Allocate one less worker, as in the current code we only
- // use num_threads - 1 workers.
CHECK_MEM_ERROR(cm, pbi->tile_workers,
vpx_malloc(num_threads * sizeof(*pbi->tile_workers)));
// Ensure tile data offsets will be properly aligned. This may fail on
@@ -1198,6 +1540,8 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi,
tile_data->pbi = pbi;
tile_data->xd = pbi->mb;
tile_data->xd.corrupted = 0;
+ tile_data->xd.counts = cm->frame_parallel_decoding_mode ?
+ 0 : &tile_data->counts;
vp9_tile_init(tile, cm, 0, buf->col);
setup_token_decoder(buf->data, data_end, buf->size, &cm->error,
&tile_data->bit_reader, pbi->decrypt_cb,
@@ -1251,20 +1595,6 @@ static void error_handler(void *data) {
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet");
}
-int vp9_read_sync_code(struct vp9_read_bit_buffer *const rb) {
- return vp9_rb_read_literal(rb, 8) == VP9_SYNC_CODE_0 &&
- vp9_rb_read_literal(rb, 8) == VP9_SYNC_CODE_1 &&
- vp9_rb_read_literal(rb, 8) == VP9_SYNC_CODE_2;
-}
-
-BITSTREAM_PROFILE vp9_read_profile(struct vp9_read_bit_buffer *rb) {
- int profile = vp9_rb_read_bit(rb);
- profile |= vp9_rb_read_bit(rb) << 1;
- if (profile > 2)
- profile += vp9_rb_read_bit(rb);
- return (BITSTREAM_PROFILE) profile;
-}
-
static void read_bitdepth_colorspace_sampling(
VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) {
if (cm->profile >= PROFILE_2) {
@@ -1311,12 +1641,13 @@ static void read_bitdepth_colorspace_sampling(
static size_t read_uncompressed_header(VP9Decoder *pbi,
struct vp9_read_bit_buffer *rb) {
VP9_COMMON *const cm = &pbi->common;
- RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
- BufferPool *const pool = pbi->common.buffer_pool;
+ BufferPool *const pool = cm->buffer_pool;
+ RefCntBuffer *const frame_bufs = pool->frame_bufs;
int i, mask, ref_index = 0;
size_t sz;
cm->last_frame_type = cm->frame_type;
+ cm->last_intra_only = cm->intra_only;
if (vp9_rb_read_literal(rb, 2) != VP9_FRAME_MARKER)
vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM,
@@ -1593,12 +1924,12 @@ static void debug_check_frame_counts(const VP9_COMMON *const cm) {
}
#endif // NDEBUG
-static struct vp9_read_bit_buffer* init_read_bit_buffer(
+static struct vp9_read_bit_buffer *init_read_bit_buffer(
VP9Decoder *pbi,
struct vp9_read_bit_buffer *rb,
const uint8_t *data,
const uint8_t *data_end,
- uint8_t *clear_data /* buffer size MAX_VP9_HEADER_SIZE */) {
+ uint8_t clear_data[MAX_VP9_HEADER_SIZE]) {
rb->bit_offset = 0;
rb->error_handler = error_handler;
rb->error_handler_data = &pbi->common;
@@ -1614,12 +1945,34 @@ static struct vp9_read_bit_buffer* init_read_bit_buffer(
return rb;
}
+//------------------------------------------------------------------------------
+
+int vp9_read_sync_code(struct vp9_read_bit_buffer *const rb) {
+ return vp9_rb_read_literal(rb, 8) == VP9_SYNC_CODE_0 &&
+ vp9_rb_read_literal(rb, 8) == VP9_SYNC_CODE_1 &&
+ vp9_rb_read_literal(rb, 8) == VP9_SYNC_CODE_2;
+}
+
+void vp9_read_frame_size(struct vp9_read_bit_buffer *rb,
+ int *width, int *height) {
+ *width = vp9_rb_read_literal(rb, 16) + 1;
+ *height = vp9_rb_read_literal(rb, 16) + 1;
+}
+
+BITSTREAM_PROFILE vp9_read_profile(struct vp9_read_bit_buffer *rb) {
+ int profile = vp9_rb_read_bit(rb);
+ profile |= vp9_rb_read_bit(rb) << 1;
+ if (profile > 2)
+ profile += vp9_rb_read_bit(rb);
+ return (BITSTREAM_PROFILE) profile;
+}
+
void vp9_decode_frame(VP9Decoder *pbi,
const uint8_t *data, const uint8_t *data_end,
const uint8_t **p_data_end) {
VP9_COMMON *const cm = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
- struct vp9_read_bit_buffer rb = { NULL, NULL, 0, NULL, 0};
+ struct vp9_read_bit_buffer rb;
int context_updated = 0;
uint8_t clear_data[MAX_VP9_HEADER_SIZE];
const size_t first_partition_size = read_uncompressed_header(pbi,
@@ -1643,8 +1996,9 @@ void vp9_decode_frame(VP9Decoder *pbi,
cm->use_prev_frame_mvs = !cm->error_resilient_mode &&
cm->width == cm->last_width &&
cm->height == cm->last_height &&
- !cm->intra_only &&
- cm->last_show_frame;
+ !cm->last_intra_only &&
+ cm->last_show_frame &&
+ (cm->last_frame_type != KEY_FRAME);
vp9_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y);
@@ -1661,7 +2015,7 @@ void vp9_decode_frame(VP9Decoder *pbi,
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
"Decode failed. Frame data header is corrupted.");
- if (cm->lf.filter_level) {
+ if (cm->lf.filter_level && !cm->skip_loop_filter) {
vp9_loop_filter_frame_init(cm, cm->lf.filter_level);
}
@@ -1687,11 +2041,13 @@ void vp9_decode_frame(VP9Decoder *pbi,
// Multi-threaded tile decoder
*p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end);
if (!xd->corrupted) {
- // If multiple threads are used to decode tiles, then we use those threads
- // to do parallel loopfiltering.
- vp9_loop_filter_frame_mt(new_fb, cm, pbi->mb.plane, cm->lf.filter_level,
- 0, 0, pbi->tile_workers, pbi->num_tile_workers,
- &pbi->lf_row_sync);
+ if (!cm->skip_loop_filter) {
+ // If multiple threads are used to decode tiles, then we use those
+ // threads to do parallel loopfiltering.
+ vp9_loop_filter_frame_mt(new_fb, cm, pbi->mb.plane,
+ cm->lf.filter_level, 0, 0, pbi->tile_workers,
+ pbi->num_tile_workers, &pbi->lf_row_sync);
+ }
} else {
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
"Decode failed. Frame data is corrupted.");
@@ -1721,327 +2077,3 @@ void vp9_decode_frame(VP9Decoder *pbi,
if (cm->refresh_frame_context && !context_updated)
cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
}
-
-static void build_mc_border(const uint8_t *src, int src_stride,
- uint8_t *dst, int dst_stride,
- int x, int y, int b_w, int b_h, int w, int h) {
- // Get a pointer to the start of the real data for this row.
- const uint8_t *ref_row = src - x - y * src_stride;
-
- if (y >= h)
- ref_row += (h - 1) * src_stride;
- else if (y > 0)
- ref_row += y * src_stride;
-
- do {
- int right = 0, copy;
- int left = x < 0 ? -x : 0;
-
- if (left > b_w)
- left = b_w;
-
- if (x + b_w > w)
- right = x + b_w - w;
-
- if (right > b_w)
- right = b_w;
-
- copy = b_w - left - right;
-
- if (left)
- memset(dst, ref_row[0], left);
-
- if (copy)
- memcpy(dst + left, ref_row + x + left, copy);
-
- if (right)
- memset(dst + left + copy, ref_row[w - 1], right);
-
- dst += dst_stride;
- ++y;
-
- if (y > 0 && y < h)
- ref_row += src_stride;
- } while (--b_h);
-}
-
-#if CONFIG_VP9_HIGHBITDEPTH
-static void high_build_mc_border(const uint8_t *src8, int src_stride,
- uint16_t *dst, int dst_stride,
- int x, int y, int b_w, int b_h,
- int w, int h) {
- // Get a pointer to the start of the real data for this row.
- const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- const uint16_t *ref_row = src - x - y * src_stride;
-
- if (y >= h)
- ref_row += (h - 1) * src_stride;
- else if (y > 0)
- ref_row += y * src_stride;
-
- do {
- int right = 0, copy;
- int left = x < 0 ? -x : 0;
-
- if (left > b_w)
- left = b_w;
-
- if (x + b_w > w)
- right = x + b_w - w;
-
- if (right > b_w)
- right = b_w;
-
- copy = b_w - left - right;
-
- if (left)
- vpx_memset16(dst, ref_row[0], left);
-
- if (copy)
- memcpy(dst + left, ref_row + x + left, copy * sizeof(uint16_t));
-
- if (right)
- vpx_memset16(dst + left + copy, ref_row[w - 1], right);
-
- dst += dst_stride;
- ++y;
-
- if (y > 0 && y < h)
- ref_row += src_stride;
- } while (--b_h);
-}
-#endif // CONFIG_VP9_HIGHBITDEPTH
-
-void dec_build_inter_predictors(VP9Decoder *const pbi, MACROBLOCKD *xd,
- int plane, int bw, int bh, int x,
- int y, int w, int h, int mi_x, int mi_y,
- const InterpKernel *kernel,
- const struct scale_factors *sf,
- struct buf_2d *pre_buf, struct buf_2d *dst_buf,
- const MV* mv, RefCntBuffer *ref_frame_buf,
- int is_scaled, int ref) {
- struct macroblockd_plane *const pd = &xd->plane[plane];
- uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x;
- MV32 scaled_mv;
- int xs, ys, x0, y0, x0_16, y0_16, frame_width, frame_height,
- buf_stride, subpel_x, subpel_y;
- uint8_t *ref_frame, *buf_ptr;
-
- // Get reference frame pointer, width and height.
- if (plane == 0) {
- frame_width = ref_frame_buf->buf.y_crop_width;
- frame_height = ref_frame_buf->buf.y_crop_height;
- ref_frame = ref_frame_buf->buf.y_buffer;
- } else {
- frame_width = ref_frame_buf->buf.uv_crop_width;
- frame_height = ref_frame_buf->buf.uv_crop_height;
- ref_frame = plane == 1 ? ref_frame_buf->buf.u_buffer
- : ref_frame_buf->buf.v_buffer;
- }
-
- if (is_scaled) {
- const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, mv, bw, bh,
- pd->subsampling_x,
- pd->subsampling_y);
- // Co-ordinate of containing block to pixel precision.
- int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x));
- int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y));
-
- // Co-ordinate of the block to 1/16th pixel precision.
- x0_16 = (x_start + x) << SUBPEL_BITS;
- y0_16 = (y_start + y) << SUBPEL_BITS;
-
- // Co-ordinate of current block in reference frame
- // to 1/16th pixel precision.
- x0_16 = sf->scale_value_x(x0_16, sf);
- y0_16 = sf->scale_value_y(y0_16, sf);
-
- // Map the top left corner of the block into the reference frame.
- x0 = sf->scale_value_x(x_start + x, sf);
- y0 = sf->scale_value_y(y_start + y, sf);
-
- // Scale the MV and incorporate the sub-pixel offset of the block
- // in the reference frame.
- scaled_mv = vp9_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf);
- xs = sf->x_step_q4;
- ys = sf->y_step_q4;
- } else {
- // Co-ordinate of containing block to pixel precision.
- x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x;
- y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y;
-
- // Co-ordinate of the block to 1/16th pixel precision.
- x0_16 = x0 << SUBPEL_BITS;
- y0_16 = y0 << SUBPEL_BITS;
-
- scaled_mv.row = mv->row * (1 << (1 - pd->subsampling_y));
- scaled_mv.col = mv->col * (1 << (1 - pd->subsampling_x));
- xs = ys = 16;
- }
- subpel_x = scaled_mv.col & SUBPEL_MASK;
- subpel_y = scaled_mv.row & SUBPEL_MASK;
-
- // Calculate the top left corner of the best matching block in the
- // reference frame.
- x0 += scaled_mv.col >> SUBPEL_BITS;
- y0 += scaled_mv.row >> SUBPEL_BITS;
- x0_16 += scaled_mv.col;
- y0_16 += scaled_mv.row;
-
- // Get reference block pointer.
- buf_ptr = ref_frame + y0 * pre_buf->stride + x0;
- buf_stride = pre_buf->stride;
-
- // Do border extension if there is motion or the
- // width/height is not a multiple of 8 pixels.
- if (is_scaled || scaled_mv.col || scaled_mv.row ||
- (frame_width & 0x7) || (frame_height & 0x7)) {
- int y1 = (y0_16 + (h - 1) * ys) >> SUBPEL_BITS;
-
- // Get reference block bottom right horizontal coordinate.
- int x1 = (x0_16 + (w - 1) * xs) >> SUBPEL_BITS;
- int x_pad = 0, y_pad = 0;
-
- if (subpel_x || (sf->x_step_q4 != SUBPEL_SHIFTS)) {
- x0 -= VP9_INTERP_EXTEND - 1;
- x1 += VP9_INTERP_EXTEND;
- x_pad = 1;
- }
-
- if (subpel_y || (sf->y_step_q4 != SUBPEL_SHIFTS)) {
- y0 -= VP9_INTERP_EXTEND - 1;
- y1 += VP9_INTERP_EXTEND;
- y_pad = 1;
- }
-
- // Wait until reference block is ready. Pad 7 more pixels as last 7
- // pixels of each superblock row can be changed by next superblock row.
- if (pbi->frame_parallel_decode)
- vp9_frameworker_wait(pbi->frame_worker_owner, ref_frame_buf,
- MAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1));
-
- // Skip border extension if block is inside the frame.
- if (x0 < 0 || x0 > frame_width - 1 || x1 < 0 || x1 > frame_width - 1 ||
- y0 < 0 || y0 > frame_height - 1 || y1 < 0 || y1 > frame_height - 1) {
- uint8_t *buf_ptr1 = ref_frame + y0 * pre_buf->stride + x0;
- // Extend the border.
-#if CONFIG_VP9_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- high_build_mc_border(buf_ptr1,
- pre_buf->stride,
- xd->mc_buf_high,
- x1 - x0 + 1,
- x0,
- y0,
- x1 - x0 + 1,
- y1 - y0 + 1,
- frame_width,
- frame_height);
- buf_stride = x1 - x0 + 1;
- buf_ptr = CONVERT_TO_BYTEPTR(xd->mc_buf_high) +
- y_pad * 3 * buf_stride + x_pad * 3;
- } else {
- build_mc_border(buf_ptr1,
- pre_buf->stride,
- xd->mc_buf,
- x1 - x0 + 1,
- x0,
- y0,
- x1 - x0 + 1,
- y1 - y0 + 1,
- frame_width,
- frame_height);
- buf_stride = x1 - x0 + 1;
- buf_ptr = xd->mc_buf + y_pad * 3 * buf_stride + x_pad * 3;
- }
-#else
- build_mc_border(buf_ptr1,
- pre_buf->stride,
- xd->mc_buf,
- x1 - x0 + 1,
- x0,
- y0,
- x1 - x0 + 1,
- y1 - y0 + 1,
- frame_width,
- frame_height);
- buf_stride = x1 - x0 + 1;
- buf_ptr = xd->mc_buf + y_pad * 3 * buf_stride + x_pad * 3;
-#endif // CONFIG_VP9_HIGHBITDEPTH
- }
- } else {
- // Wait until reference block is ready. Pad 7 more pixels as last 7
- // pixels of each superblock row can be changed by next superblock row.
- if (pbi->frame_parallel_decode) {
- const int y1 = ((y0_16 + (h - 1) * ys) >> SUBPEL_BITS) + 1;
- vp9_frameworker_wait(pbi->frame_worker_owner, ref_frame_buf,
- MAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1));
- }
- }
-#if CONFIG_VP9_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- high_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
- subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd);
- } else {
- inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
- subpel_y, sf, w, h, ref, kernel, xs, ys);
- }
-#else
- inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
- subpel_y, sf, w, h, ref, kernel, xs, ys);
-#endif // CONFIG_VP9_HIGHBITDEPTH
-}
-
-void vp9_dec_build_inter_predictors_sb(VP9Decoder *const pbi, MACROBLOCKD *xd,
- int mi_row, int mi_col,
- BLOCK_SIZE bsize) {
- int plane;
- const int mi_x = mi_col * MI_SIZE;
- const int mi_y = mi_row * MI_SIZE;
- const MODE_INFO *mi = xd->mi[0];
- const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter);
- const BLOCK_SIZE sb_type = mi->mbmi.sb_type;
- const int is_compound = has_second_ref(&mi->mbmi);
-
- for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
- const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize,
- &xd->plane[plane]);
- struct macroblockd_plane *const pd = &xd->plane[plane];
- struct buf_2d *const dst_buf = &pd->dst;
- const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
- const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
-
- const int bw = 4 * num_4x4_w;
- const int bh = 4 * num_4x4_h;
- int ref;
-
- for (ref = 0; ref < 1 + is_compound; ++ref) {
- const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
- struct buf_2d *const pre_buf = &pd->pre[ref];
- const int idx = xd->block_refs[ref]->idx;
- BufferPool *const pool = pbi->common.buffer_pool;
- RefCntBuffer *const ref_frame_buf = &pool->frame_bufs[idx];
- const int is_scaled = vp9_is_scaled(sf);
-
- if (sb_type < BLOCK_8X8) {
- int i = 0, x, y;
- assert(bsize == BLOCK_8X8);
- for (y = 0; y < num_4x4_h; ++y) {
- for (x = 0; x < num_4x4_w; ++x) {
- const MV mv = average_split_mvs(pd, mi, ref, i++);
- dec_build_inter_predictors(pbi, xd, plane, bw, bh,
- 4 * x, 4 * y, 4, 4, mi_x, mi_y, kernel,
- sf, pre_buf, dst_buf, &mv,
- ref_frame_buf, is_scaled, ref);
- }
- }
- } else {
- const MV mv = mi->mbmi.mv[ref].as_mv;
- dec_build_inter_predictors(pbi, xd, plane, bw, bh,
- 0, 0, bw, bh, mi_x, mi_y, kernel,
- sf, pre_buf, dst_buf, &mv, ref_frame_buf,
- is_scaled, ref);
- }
- }
- }
-}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.h b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.h
index 8410c541e45..a876e7c60f1 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.h
@@ -16,24 +16,18 @@
extern "C" {
#endif
-struct VP9Common;
struct VP9Decoder;
struct vp9_read_bit_buffer;
-void vp9_init_dequantizer(struct VP9Common *cm);
-
-void vp9_decode_frame(struct VP9Decoder *pbi,
- const uint8_t *data, const uint8_t *data_end,
- const uint8_t **p_data_end);
-
int vp9_read_sync_code(struct vp9_read_bit_buffer *const rb);
void vp9_read_frame_size(struct vp9_read_bit_buffer *rb,
int *width, int *height);
BITSTREAM_PROFILE vp9_read_profile(struct vp9_read_bit_buffer *rb);
-void vp9_dec_build_inter_predictors_sb(struct VP9Decoder *const pbi,
- MACROBLOCKD *xd, int mi_row, int mi_col,
- BLOCK_SIZE bsize);
+void vp9_decode_frame(struct VP9Decoder *pbi,
+ const uint8_t *data, const uint8_t *data_end,
+ const uint8_t **p_data_end);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodemv.c b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodemv.c
index ce6ff997778..8a8d8ddd8e6 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodemv.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodemv.c
@@ -27,30 +27,33 @@ static PREDICTION_MODE read_intra_mode(vp9_reader *r, const vp9_prob *p) {
return (PREDICTION_MODE)vp9_read_tree(r, vp9_intra_mode_tree, p);
}
-static PREDICTION_MODE read_intra_mode_y(VP9_COMMON *cm, FRAME_COUNTS *counts,
+static PREDICTION_MODE read_intra_mode_y(VP9_COMMON *cm, MACROBLOCKD *xd,
vp9_reader *r, int size_group) {
const PREDICTION_MODE y_mode =
read_intra_mode(r, cm->fc->y_mode_prob[size_group]);
- if (!cm->frame_parallel_decoding_mode)
+ FRAME_COUNTS *counts = xd->counts;
+ if (counts)
++counts->y_mode[size_group][y_mode];
return y_mode;
}
-static PREDICTION_MODE read_intra_mode_uv(VP9_COMMON *cm, FRAME_COUNTS *counts,
+static PREDICTION_MODE read_intra_mode_uv(VP9_COMMON *cm, MACROBLOCKD *xd,
vp9_reader *r,
PREDICTION_MODE y_mode) {
const PREDICTION_MODE uv_mode = read_intra_mode(r,
cm->fc->uv_mode_prob[y_mode]);
- if (!cm->frame_parallel_decoding_mode)
+ FRAME_COUNTS *counts = xd->counts;
+ if (counts)
++counts->uv_mode[y_mode][uv_mode];
return uv_mode;
}
-static PREDICTION_MODE read_inter_mode(VP9_COMMON *cm, FRAME_COUNTS *counts,
+static PREDICTION_MODE read_inter_mode(VP9_COMMON *cm, MACROBLOCKD *xd,
vp9_reader *r, int ctx) {
const int mode = vp9_read_tree(r, vp9_inter_mode_tree,
cm->fc->inter_mode_probs[ctx]);
- if (!cm->frame_parallel_decoding_mode)
+ FRAME_COUNTS *counts = xd->counts;
+ if (counts)
++counts->inter_mode[ctx][mode];
return NEARESTMV + mode;
@@ -61,8 +64,8 @@ static int read_segment_id(vp9_reader *r, const struct segmentation *seg) {
}
static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd,
- FRAME_COUNTS *counts,
TX_SIZE max_tx_size, vp9_reader *r) {
+ FRAME_COUNTS *counts = xd->counts;
const int ctx = vp9_get_tx_size_context(xd);
const vp9_prob *tx_probs = get_tx_probs(max_tx_size, ctx, &cm->fc->tx_probs);
int tx_size = vp9_read(r, tx_probs[0]);
@@ -72,19 +75,18 @@ static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd,
tx_size += vp9_read(r, tx_probs[2]);
}
- if (!cm->frame_parallel_decoding_mode)
+ if (counts)
++get_tx_counts(max_tx_size, ctx, &counts->tx)[tx_size];
return (TX_SIZE)tx_size;
}
static TX_SIZE read_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd,
- FRAME_COUNTS *counts,
int allow_select, vp9_reader *r) {
TX_MODE tx_mode = cm->tx_mode;
BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
if (allow_select && tx_mode == TX_MODE_SELECT && bsize >= BLOCK_8X8)
- return read_selected_tx_size(cm, xd, counts, max_tx_size, r);
+ return read_selected_tx_size(cm, xd, max_tx_size, r);
else
return MIN(max_tx_size, tx_mode_to_biggest_tx_size[tx_mode]);
}
@@ -174,14 +176,14 @@ static int read_inter_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd,
}
static int read_skip(VP9_COMMON *cm, const MACROBLOCKD *xd,
- FRAME_COUNTS *counts,
int segment_id, vp9_reader *r) {
- if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
+ if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
return 1;
} else {
const int ctx = vp9_get_skip_context(xd);
const int skip = vp9_read(r, cm->fc->skip_probs[ctx]);
- if (!cm->frame_parallel_decoding_mode)
+ FRAME_COUNTS *counts = xd->counts;
+ if (counts)
++counts->skip[ctx][skip];
return skip;
}
@@ -189,7 +191,6 @@ static int read_skip(VP9_COMMON *cm, const MACROBLOCKD *xd,
static void read_intra_frame_mode_info(VP9_COMMON *const cm,
MACROBLOCKD *const xd,
- FRAME_COUNTS *counts,
int mi_row, int mi_col, vp9_reader *r) {
MODE_INFO *const mi = xd->mi[0];
MB_MODE_INFO *const mbmi = &mi->mbmi;
@@ -199,8 +200,8 @@ static void read_intra_frame_mode_info(VP9_COMMON *const cm,
int i;
mbmi->segment_id = read_intra_segment_id(cm, bsize, mi_row, mi_col, r);
- mbmi->skip = read_skip(cm, xd, counts, mbmi->segment_id, r);
- mbmi->tx_size = read_tx_size(cm, xd, counts, 1, r);
+ mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r);
+ mbmi->tx_size = read_tx_size(cm, xd, 1, r);
mbmi->ref_frame[0] = INTRA_FRAME;
mbmi->ref_frame[1] = NONE;
@@ -285,13 +286,13 @@ static INLINE void read_mv(vp9_reader *r, MV *mv, const MV *ref,
static REFERENCE_MODE read_block_reference_mode(VP9_COMMON *cm,
const MACROBLOCKD *xd,
- FRAME_COUNTS *counts,
vp9_reader *r) {
if (cm->reference_mode == REFERENCE_MODE_SELECT) {
const int ctx = vp9_get_reference_mode_context(cm, xd);
const REFERENCE_MODE mode =
(REFERENCE_MODE)vp9_read(r, cm->fc->comp_inter_prob[ctx]);
- if (!cm->frame_parallel_decoding_mode)
+ FRAME_COUNTS *counts = xd->counts;
+ if (counts)
++counts->comp_inter[ctx][mode];
return mode; // SINGLE_REFERENCE or COMPOUND_REFERENCE
} else {
@@ -301,34 +302,35 @@ static REFERENCE_MODE read_block_reference_mode(VP9_COMMON *cm,
// Read the referncence frame
static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd,
- FRAME_COUNTS *counts, vp9_reader *r,
+ vp9_reader *r,
int segment_id, MV_REFERENCE_FRAME ref_frame[2]) {
FRAME_CONTEXT *const fc = cm->fc;
+ FRAME_COUNTS *counts = xd->counts;
- if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
- ref_frame[0] = (MV_REFERENCE_FRAME)vp9_get_segdata(&cm->seg, segment_id,
- SEG_LVL_REF_FRAME);
+ if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
+ ref_frame[0] = (MV_REFERENCE_FRAME)get_segdata(&cm->seg, segment_id,
+ SEG_LVL_REF_FRAME);
ref_frame[1] = NONE;
} else {
- const REFERENCE_MODE mode = read_block_reference_mode(cm, xd, counts, r);
+ const REFERENCE_MODE mode = read_block_reference_mode(cm, xd, r);
// FIXME(rbultje) I'm pretty sure this breaks segmentation ref frame coding
if (mode == COMPOUND_REFERENCE) {
const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref];
const int ctx = vp9_get_pred_context_comp_ref_p(cm, xd);
const int bit = vp9_read(r, fc->comp_ref_prob[ctx]);
- if (!cm->frame_parallel_decoding_mode)
+ if (counts)
++counts->comp_ref[ctx][bit];
ref_frame[idx] = cm->comp_fixed_ref;
ref_frame[!idx] = cm->comp_var_ref[bit];
} else if (mode == SINGLE_REFERENCE) {
const int ctx0 = vp9_get_pred_context_single_ref_p1(xd);
const int bit0 = vp9_read(r, fc->single_ref_prob[ctx0][0]);
- if (!cm->frame_parallel_decoding_mode)
+ if (counts)
++counts->single_ref[ctx0][0][bit0];
if (bit0) {
const int ctx1 = vp9_get_pred_context_single_ref_p2(xd);
const int bit1 = vp9_read(r, fc->single_ref_prob[ctx1][1]);
- if (!cm->frame_parallel_decoding_mode)
+ if (counts)
++counts->single_ref[ctx1][1][bit1];
ref_frame[0] = bit1 ? ALTREF_FRAME : GOLDEN_FRAME;
} else {
@@ -345,18 +347,19 @@ static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd,
static INLINE INTERP_FILTER read_switchable_interp_filter(
VP9_COMMON *const cm, MACROBLOCKD *const xd,
- FRAME_COUNTS *counts, vp9_reader *r) {
+ vp9_reader *r) {
const int ctx = vp9_get_pred_context_switchable_interp(xd);
const INTERP_FILTER type =
(INTERP_FILTER)vp9_read_tree(r, vp9_switchable_interp_tree,
cm->fc->switchable_interp_prob[ctx]);
- if (!cm->frame_parallel_decoding_mode)
+ FRAME_COUNTS *counts = xd->counts;
+ if (counts)
++counts->switchable_interp[ctx][type];
return type;
}
static void read_intra_block_mode_info(VP9_COMMON *const cm,
- FRAME_COUNTS *counts, MODE_INFO *mi,
+ MACROBLOCKD *const xd, MODE_INFO *mi,
vp9_reader *r) {
MB_MODE_INFO *const mbmi = &mi->mbmi;
const BLOCK_SIZE bsize = mi->mbmi.sb_type;
@@ -368,26 +371,26 @@ static void read_intra_block_mode_info(VP9_COMMON *const cm,
switch (bsize) {
case BLOCK_4X4:
for (i = 0; i < 4; ++i)
- mi->bmi[i].as_mode = read_intra_mode_y(cm, counts, r, 0);
+ mi->bmi[i].as_mode = read_intra_mode_y(cm, xd, r, 0);
mbmi->mode = mi->bmi[3].as_mode;
break;
case BLOCK_4X8:
- mi->bmi[0].as_mode = mi->bmi[2].as_mode = read_intra_mode_y(cm, counts,
+ mi->bmi[0].as_mode = mi->bmi[2].as_mode = read_intra_mode_y(cm, xd,
r, 0);
mi->bmi[1].as_mode = mi->bmi[3].as_mode = mbmi->mode =
- read_intra_mode_y(cm, counts, r, 0);
+ read_intra_mode_y(cm, xd, r, 0);
break;
case BLOCK_8X4:
- mi->bmi[0].as_mode = mi->bmi[1].as_mode = read_intra_mode_y(cm, counts,
+ mi->bmi[0].as_mode = mi->bmi[1].as_mode = read_intra_mode_y(cm, xd,
r, 0);
mi->bmi[2].as_mode = mi->bmi[3].as_mode = mbmi->mode =
- read_intra_mode_y(cm, counts, r, 0);
+ read_intra_mode_y(cm, xd, r, 0);
break;
default:
- mbmi->mode = read_intra_mode_y(cm, counts, r, size_group_lookup[bsize]);
+ mbmi->mode = read_intra_mode_y(cm, xd, r, size_group_lookup[bsize]);
}
- mbmi->uv_mode = read_intra_mode_uv(cm, counts, r, mbmi->mode);
+ mbmi->uv_mode = read_intra_mode_uv(cm, xd, r, mbmi->mode);
}
static INLINE int is_mv_valid(const MV *mv) {
@@ -395,7 +398,7 @@ static INLINE int is_mv_valid(const MV *mv) {
mv->col > MV_LOW && mv->col < MV_UPP;
}
-static INLINE int assign_mv(VP9_COMMON *cm, FRAME_COUNTS *counts,
+static INLINE int assign_mv(VP9_COMMON *cm, MACROBLOCKD *xd,
PREDICTION_MODE mode,
int_mv mv[2], int_mv ref_mv[2],
int_mv nearest_mv[2], int_mv near_mv[2],
@@ -405,8 +408,8 @@ static INLINE int assign_mv(VP9_COMMON *cm, FRAME_COUNTS *counts,
switch (mode) {
case NEWMV: {
- nmv_context_counts *const mv_counts = cm->frame_parallel_decoding_mode ?
- NULL : &counts->mv;
+ FRAME_COUNTS *counts = xd->counts;
+ nmv_context_counts *const mv_counts = counts ? &counts->mv : NULL;
for (i = 0; i < 1 + is_compound; ++i) {
read_mv(r, &mv[i].as_mv, &ref_mv[i].as_mv, &cm->fc->nmvc, mv_counts,
allow_hp);
@@ -440,15 +443,14 @@ static INLINE int assign_mv(VP9_COMMON *cm, FRAME_COUNTS *counts,
}
static int read_is_inter_block(VP9_COMMON *const cm, MACROBLOCKD *const xd,
- FRAME_COUNTS *counts,
int segment_id, vp9_reader *r) {
- if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
- return vp9_get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME) !=
- INTRA_FRAME;
+ if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
+ return get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME) != INTRA_FRAME;
} else {
const int ctx = vp9_get_intra_inter_context(xd);
const int is_inter = vp9_read(r, cm->fc->intra_inter_prob[ctx]);
- if (!cm->frame_parallel_decoding_mode)
+ FRAME_COUNTS *counts = xd->counts;
+ if (counts)
++counts->intra_inter[ctx][is_inter];
return is_inter;
}
@@ -462,7 +464,6 @@ static void fpm_sync(void *const data, int mi_row) {
static void read_inter_block_mode_info(VP9Decoder *const pbi,
MACROBLOCKD *const xd,
- FRAME_COUNTS *counts,
const TileInfo *const tile,
MODE_INFO *const mi,
int mi_row, int mi_col, vp9_reader *r) {
@@ -471,9 +472,11 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi,
const BLOCK_SIZE bsize = mbmi->sb_type;
const int allow_hp = cm->allow_high_precision_mv;
int_mv nearestmv[2], nearmv[2];
- int inter_mode_ctx, ref, is_compound;
+ int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES];
+ int ref, is_compound;
+ uint8_t inter_mode_ctx[MAX_REF_FRAMES];
- read_ref_frames(cm, xd, counts, r, mbmi->segment_id, mbmi->ref_frame);
+ read_ref_frames(cm, xd, r, mbmi->segment_id, mbmi->ref_frame);
is_compound = has_second_ref(mbmi);
for (ref = 0; ref < 1 + is_compound; ++ref) {
@@ -485,13 +488,11 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi,
"Reference frame has invalid dimensions");
vp9_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col,
&ref_buf->sf);
- vp9_find_mv_refs(cm, xd, tile, mi, frame, mbmi->ref_mvs[frame],
- mi_row, mi_col, fpm_sync, (void *)pbi);
+ vp9_find_mv_refs(cm, xd, tile, mi, frame, ref_mvs[frame],
+ mi_row, mi_col, fpm_sync, (void *)pbi, inter_mode_ctx);
}
- inter_mode_ctx = mbmi->mode_context[mbmi->ref_frame[0]];
-
- if (vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
mbmi->mode = ZEROMV;
if (bsize < BLOCK_8X8) {
vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM,
@@ -500,18 +501,19 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi,
}
} else {
if (bsize >= BLOCK_8X8)
- mbmi->mode = read_inter_mode(cm, counts, r, inter_mode_ctx);
+ mbmi->mode = read_inter_mode(cm, xd, r,
+ inter_mode_ctx[mbmi->ref_frame[0]]);
}
if (bsize < BLOCK_8X8 || mbmi->mode != ZEROMV) {
for (ref = 0; ref < 1 + is_compound; ++ref) {
- vp9_find_best_ref_mvs(xd, allow_hp, mbmi->ref_mvs[mbmi->ref_frame[ref]],
+ vp9_find_best_ref_mvs(xd, allow_hp, ref_mvs[mbmi->ref_frame[ref]],
&nearestmv[ref], &nearmv[ref]);
}
}
mbmi->interp_filter = (cm->interp_filter == SWITCHABLE)
- ? read_switchable_interp_filter(cm, xd, counts, r)
+ ? read_switchable_interp_filter(cm, xd, r)
: cm->interp_filter;
if (bsize < BLOCK_8X8) {
@@ -524,15 +526,18 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi,
for (idx = 0; idx < 2; idx += num_4x4_w) {
int_mv block[2];
const int j = idy * 2 + idx;
- b_mode = read_inter_mode(cm, counts, r, inter_mode_ctx);
+ b_mode = read_inter_mode(cm, xd, r, inter_mode_ctx[mbmi->ref_frame[0]]);
- if (b_mode == NEARESTMV || b_mode == NEARMV)
+ if (b_mode == NEARESTMV || b_mode == NEARMV) {
+ uint8_t dummy_mode_ctx[MAX_REF_FRAMES];
for (ref = 0; ref < 1 + is_compound; ++ref)
vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, j, ref, mi_row, mi_col,
&nearest_sub8x8[ref],
- &near_sub8x8[ref]);
+ &near_sub8x8[ref],
+ dummy_mode_ctx);
+ }
- if (!assign_mv(cm, counts, b_mode, block, nearestmv,
+ if (!assign_mv(cm, xd, b_mode, block, nearestmv,
nearest_sub8x8, near_sub8x8,
is_compound, allow_hp, r)) {
xd->corrupted |= 1;
@@ -555,14 +560,13 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi,
mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
} else {
- xd->corrupted |= !assign_mv(cm, counts, mbmi->mode, mbmi->mv, nearestmv,
+ xd->corrupted |= !assign_mv(cm, xd, mbmi->mode, mbmi->mv, nearestmv,
nearestmv, nearmv, is_compound, allow_hp, r);
}
}
static void read_inter_frame_mode_info(VP9Decoder *const pbi,
MACROBLOCKD *const xd,
- FRAME_COUNTS *counts,
const TileInfo *const tile,
int mi_row, int mi_col, vp9_reader *r) {
VP9_COMMON *const cm = &pbi->common;
@@ -573,18 +577,17 @@ static void read_inter_frame_mode_info(VP9Decoder *const pbi,
mbmi->mv[0].as_int = 0;
mbmi->mv[1].as_int = 0;
mbmi->segment_id = read_inter_segment_id(cm, xd, mi_row, mi_col, r);
- mbmi->skip = read_skip(cm, xd, counts, mbmi->segment_id, r);
- inter_block = read_is_inter_block(cm, xd, counts, mbmi->segment_id, r);
- mbmi->tx_size = read_tx_size(cm, xd, counts, !mbmi->skip || !inter_block, r);
+ mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r);
+ inter_block = read_is_inter_block(cm, xd, mbmi->segment_id, r);
+ mbmi->tx_size = read_tx_size(cm, xd, !mbmi->skip || !inter_block, r);
if (inter_block)
- read_inter_block_mode_info(pbi, xd, counts, tile, mi, mi_row, mi_col, r);
+ read_inter_block_mode_info(pbi, xd, tile, mi, mi_row, mi_col, r);
else
- read_intra_block_mode_info(cm, counts, mi, r);
+ read_intra_block_mode_info(cm, xd, mi, r);
}
void vp9_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd,
- FRAME_COUNTS *counts,
const TileInfo *const tile,
int mi_row, int mi_col, vp9_reader *r) {
VP9_COMMON *const cm = &pbi->common;
@@ -596,19 +599,20 @@ void vp9_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd,
MV_REF* frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col;
int w, h;
- if (frame_is_intra_only(cm))
- read_intra_frame_mode_info(cm, xd, counts, mi_row, mi_col, r);
- else
- read_inter_frame_mode_info(pbi, xd, counts, tile, mi_row, mi_col, r);
-
- for (h = 0; h < y_mis; ++h) {
- MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
- for (w = 0; w < x_mis; ++w) {
- MV_REF *const mv = frame_mv + w;
- mv->ref_frame[0] = mi->mbmi.ref_frame[0];
- mv->ref_frame[1] = mi->mbmi.ref_frame[1];
- mv->mv[0].as_int = mi->mbmi.mv[0].as_int;
- mv->mv[1].as_int = mi->mbmi.mv[1].as_int;
+ if (frame_is_intra_only(cm)) {
+ read_intra_frame_mode_info(cm, xd, mi_row, mi_col, r);
+ } else {
+ read_inter_frame_mode_info(pbi, xd, tile, mi_row, mi_col, r);
+
+ for (h = 0; h < y_mis; ++h) {
+ MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
+ for (w = 0; w < x_mis; ++w) {
+ MV_REF *const mv = frame_mv + w;
+ mv->ref_frame[0] = mi->mbmi.ref_frame[0];
+ mv->ref_frame[1] = mi->mbmi.ref_frame[1];
+ mv->mv[0].as_int = mi->mbmi.mv[0].as_int;
+ mv->mv[1].as_int = mi->mbmi.mv[1].as_int;
+ }
}
}
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodemv.h b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodemv.h
index c79dff71888..dd97d8da030 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodemv.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodemv.h
@@ -21,7 +21,6 @@ extern "C" {
struct TileInfo;
void vp9_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd,
- FRAME_COUNTS *counts,
const struct TileInfo *const tile,
int mi_row, int mi_col, vp9_reader *r);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.c b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.c
index cf1f23fb98f..7991a39e610 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decoder.c
@@ -211,6 +211,9 @@ vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm,
// Find an empty frame buffer.
const int free_fb = get_free_fb(cm);
+ if (cm->new_fb_idx == INVALID_IDX)
+ return VPX_CODEC_MEM_ERROR;
+
// Decrease ref_count since it will be increased again in
// ref_cnt_fb() below.
--frame_bufs[free_fb].ref_count;
@@ -298,7 +301,10 @@ int vp9_receive_compressed_data(VP9Decoder *pbi,
&& frame_bufs[cm->new_fb_idx].ref_count == 0)
pool->release_fb_cb(pool->cb_priv,
&frame_bufs[cm->new_fb_idx].raw_frame_buffer);
+ // Find a free frame buffer. Return error if can not find any.
cm->new_fb_idx = get_free_fb(cm);
+ if (cm->new_fb_idx == INVALID_IDX)
+ return VPX_CODEC_MEM_ERROR;
// Assign a MV array to the frame buffer.
cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx];
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_detokenize.c b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_detokenize.c
index bb8c66fc09f..3304e64b2d5 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_detokenize.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_detokenize.c
@@ -17,6 +17,7 @@
#if CONFIG_COEFFICIENT_RANGE_CHECKING
#include "vp9/common/vp9_idct.h"
#endif
+#include "vp9/common/vp9_scan.h"
#include "vp9/decoder/vp9_detokenize.h"
@@ -34,7 +35,7 @@
#define INCREMENT_COUNT(token) \
do { \
- if (!cm->frame_parallel_decoding_mode) \
+ if (counts) \
++coef_counts[band][ctx][token]; \
} while (0)
@@ -45,22 +46,21 @@ static INLINE int read_coeff(const vp9_prob *probs, int n, vp9_reader *r) {
return val;
}
-static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
- FRAME_COUNTS *counts, PLANE_TYPE type,
+static int decode_coefs(const MACROBLOCKD *xd,
+ PLANE_TYPE type,
tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq,
int ctx, const int16_t *scan, const int16_t *nb,
vp9_reader *r) {
+ FRAME_COUNTS *counts = xd->counts;
const int max_eob = 16 << (tx_size << 1);
- const FRAME_CONTEXT *const fc = cm->fc;
+ const FRAME_CONTEXT *const fc = xd->fc;
const int ref = is_inter_block(&xd->mi[0]->mbmi);
int band, c = 0;
const vp9_prob (*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] =
fc->coef_probs[tx_size][type][ref];
const vp9_prob *prob;
- unsigned int (*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1] =
- counts->coef[tx_size][type][ref];
- unsigned int (*eob_branch_count)[COEFF_CONTEXTS] =
- counts->eob_branch[tx_size][type][ref];
+ unsigned int (*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1];
+ unsigned int (*eob_branch_count)[COEFF_CONTEXTS];
uint8_t token_cache[32 * 32];
const uint8_t *band_translate = get_band_translate(tx_size);
const int dq_shift = (tx_size == TX_32X32);
@@ -73,9 +73,14 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
const uint8_t *cat5_prob;
const uint8_t *cat6_prob;
+ if (counts) {
+ coef_counts = counts->coef[tx_size][type][ref];
+ eob_branch_count = counts->eob_branch[tx_size][type][ref];
+ }
+
#if CONFIG_VP9_HIGHBITDEPTH
- if (cm->use_highbitdepth) {
- if (cm->bit_depth == VPX_BITS_10) {
+ if (xd->bd > VPX_BITS_8) {
+ if (xd->bd == VPX_BITS_10) {
cat1_prob = vp9_cat1_prob_high10;
cat2_prob = vp9_cat2_prob_high10;
cat3_prob = vp9_cat3_prob_high10;
@@ -111,7 +116,7 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
int val = -1;
band = *band_translate++;
prob = coef_probs[band][ctx];
- if (!cm->frame_parallel_decoding_mode)
+ if (counts)
++eob_branch_count[band][ctx];
if (!vp9_read(r, prob[EOB_CONTEXT_NODE])) {
INCREMENT_COUNT(EOB_MODEL_TOKEN);
@@ -161,7 +166,7 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
break;
case CATEGORY6_TOKEN:
#if CONFIG_VP9_HIGHBITDEPTH
- switch (cm->bit_depth) {
+ switch (xd->bd) {
case VPX_BITS_8:
val = CAT6_MIN_VAL + read_coeff(cat6_prob, 14, r);
break;
@@ -185,7 +190,7 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
#if CONFIG_COEFFICIENT_RANGE_CHECKING
#if CONFIG_VP9_HIGHBITDEPTH
dqcoeff[scan[c]] = highbd_check_range((vp9_read_bit(r) ? -v : v),
- cm->bit_depth);
+ xd->bd);
#else
dqcoeff[scan[c]] = check_range(vp9_read_bit(r) ? -v : v);
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -201,18 +206,17 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd,
return c;
}
-int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd,
- FRAME_COUNTS *counts, int plane, int block,
+int vp9_decode_block_tokens(MACROBLOCKD *xd,
+ int plane, int block,
BLOCK_SIZE plane_bsize, int x, int y,
TX_SIZE tx_size, vp9_reader *r,
int seg_id) {
struct macroblockd_plane *const pd = &xd->plane[plane];
- const int16_t *const dequant = (plane == 0) ? cm->y_dequant[seg_id]
- : cm->uv_dequant[seg_id];
+ const int16_t *const dequant = pd->seg_dequant[seg_id];
const int ctx = get_entropy_context(tx_size, pd->above_context + x,
pd->left_context + y);
const scan_order *so = get_scan(xd, tx_size, pd->plane_type, block);
- const int eob = decode_coefs(cm, xd, counts, pd->plane_type,
+ const int eob = decode_coefs(xd, pd->plane_type,
BLOCK_OFFSET(pd->dqcoeff, block), tx_size,
dequant, ctx, so->scan, so->neighbors, r);
vp9_set_contexts(xd, pd, plane_bsize, tx_size, eob > 0, x, y);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_detokenize.h b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_detokenize.h
index 86126b6a19e..df176066898 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_detokenize.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_detokenize.h
@@ -19,8 +19,8 @@
extern "C" {
#endif
-int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd,
- FRAME_COUNTS *counts, int plane, int block,
+int vp9_decode_block_tokens(MACROBLOCKD *xd,
+ int plane, int block,
BLOCK_SIZE plane_bsize, int x, int y,
TX_SIZE tx_size, vp9_reader *r,
int seg_id);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_variance_neon.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_variance_neon.c
index cf82dd75d92..0ac194e92b0 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_variance_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/arm/neon/vp9_variance_neon.c
@@ -10,91 +10,24 @@
#include <arm_neon.h>
#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
#include "./vpx_config.h"
#include "vpx_ports/mem.h"
#include "vpx/vpx_integer.h"
-#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_filter.h"
-#include "vp9/encoder/vp9_variance.h"
-
-static INLINE int horizontal_add_s16x8(const int16x8_t v_16x8) {
- const int32x4_t a = vpaddlq_s16(v_16x8);
- const int64x2_t b = vpaddlq_s32(a);
- const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)),
- vreinterpret_s32_s64(vget_high_s64(b)));
- return vget_lane_s32(c, 0);
-}
-
-static INLINE int horizontal_add_s32x4(const int32x4_t v_32x4) {
- const int64x2_t b = vpaddlq_s32(v_32x4);
- const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)),
- vreinterpret_s32_s64(vget_high_s64(b)));
- return vget_lane_s32(c, 0);
-}
-
-// w * h must be less than 2048 or local variable v_sum may overflow.
-static void variance_neon_w8(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- int w, int h, uint32_t *sse, int *sum) {
- int i, j;
- int16x8_t v_sum = vdupq_n_s16(0);
- int32x4_t v_sse_lo = vdupq_n_s32(0);
- int32x4_t v_sse_hi = vdupq_n_s32(0);
-
- for (i = 0; i < h; ++i) {
- for (j = 0; j < w; j += 8) {
- const uint8x8_t v_a = vld1_u8(&a[j]);
- const uint8x8_t v_b = vld1_u8(&b[j]);
- const uint16x8_t v_diff = vsubl_u8(v_a, v_b);
- const int16x8_t sv_diff = vreinterpretq_s16_u16(v_diff);
- v_sum = vaddq_s16(v_sum, sv_diff);
- v_sse_lo = vmlal_s16(v_sse_lo,
- vget_low_s16(sv_diff),
- vget_low_s16(sv_diff));
- v_sse_hi = vmlal_s16(v_sse_hi,
- vget_high_s16(sv_diff),
- vget_high_s16(sv_diff));
- }
- a += a_stride;
- b += b_stride;
- }
-
- *sum = horizontal_add_s16x8(v_sum);
- *sse = (unsigned int)horizontal_add_s32x4(vaddq_s32(v_sse_lo, v_sse_hi));
-}
-
-void vp9_get8x8var_neon(const uint8_t *src_ptr, int source_stride,
- const uint8_t *ref_ptr, int ref_stride,
- unsigned int *sse, int *sum) {
- variance_neon_w8(src_ptr, source_stride, ref_ptr, ref_stride, 8,
- 8, sse, sum);
-}
-
-unsigned int vp9_variance8x8_neon(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- unsigned int *sse) {
- int sum;
- variance_neon_w8(a, a_stride, b, b_stride, 8, 8, sse, &sum);
- return *sse - (((int64_t)sum * sum) >> 6); // >> 6 = / 8 * 8
-}
-
-void vp9_get16x16var_neon(const uint8_t *src_ptr, int source_stride,
- const uint8_t *ref_ptr, int ref_stride,
- unsigned int *sse, int *sum) {
- variance_neon_w8(src_ptr, source_stride, ref_ptr, ref_stride, 16,
- 16, sse, sum);
-}
-
-unsigned int vp9_variance16x16_neon(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- unsigned int *sse) {
- int sum;
- variance_neon_w8(a, a_stride, b, b_stride, 16, 16, sse, &sum);
- return *sse - (((int64_t)sum * sum) >> 8); // >> 8 = / 16 * 16
-}
+static uint8_t bilinear_filters[8][2] = {
+ { 128, 0, },
+ { 112, 16, },
+ { 96, 32, },
+ { 80, 48, },
+ { 64, 64, },
+ { 48, 80, },
+ { 32, 96, },
+ { 16, 112, },
+};
static void var_filter_block2d_bil_w8(const uint8_t *src_ptr,
uint8_t *output_ptr,
@@ -102,9 +35,9 @@ static void var_filter_block2d_bil_w8(const uint8_t *src_ptr,
int pixel_step,
unsigned int output_height,
unsigned int output_width,
- const int16_t *vp9_filter) {
- const uint8x8_t f0 = vmov_n_u8((uint8_t)vp9_filter[0]);
- const uint8x8_t f1 = vmov_n_u8((uint8_t)vp9_filter[1]);
+ const uint8_t *vp9_filter) {
+ const uint8x8_t f0 = vmov_n_u8(vp9_filter[0]);
+ const uint8x8_t f1 = vmov_n_u8(vp9_filter[1]);
unsigned int i;
for (i = 0; i < output_height; ++i) {
const uint8x8_t src_0 = vld1_u8(&src_ptr[0]);
@@ -125,9 +58,9 @@ static void var_filter_block2d_bil_w16(const uint8_t *src_ptr,
int pixel_step,
unsigned int output_height,
unsigned int output_width,
- const int16_t *vp9_filter) {
- const uint8x8_t f0 = vmov_n_u8((uint8_t)vp9_filter[0]);
- const uint8x8_t f1 = vmov_n_u8((uint8_t)vp9_filter[1]);
+ const uint8_t *vp9_filter) {
+ const uint8x8_t f0 = vmov_n_u8(vp9_filter[0]);
+ const uint8x8_t f1 = vmov_n_u8(vp9_filter[1]);
unsigned int i, j;
for (i = 0; i < output_height; ++i) {
for (j = 0; j < output_width; j += 16) {
@@ -159,10 +92,10 @@ unsigned int vp9_sub_pixel_variance8x8_neon(const uint8_t *src,
var_filter_block2d_bil_w8(src, fdata3, src_stride, 1,
9, 8,
- BILINEAR_FILTERS_2TAP(xoffset));
+ bilinear_filters[xoffset]);
var_filter_block2d_bil_w8(fdata3, temp2, 8, 8, 8,
- 8, BILINEAR_FILTERS_2TAP(yoffset));
- return vp9_variance8x8_neon(temp2, 8, dst, dst_stride, sse);
+ 8, bilinear_filters[yoffset]);
+ return vpx_variance8x8_neon(temp2, 8, dst, dst_stride, sse);
}
unsigned int vp9_sub_pixel_variance16x16_neon(const uint8_t *src,
@@ -177,80 +110,10 @@ unsigned int vp9_sub_pixel_variance16x16_neon(const uint8_t *src,
var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
17, 16,
- BILINEAR_FILTERS_2TAP(xoffset));
+ bilinear_filters[xoffset]);
var_filter_block2d_bil_w16(fdata3, temp2, 16, 16, 16,
- 16, BILINEAR_FILTERS_2TAP(yoffset));
- return vp9_variance16x16_neon(temp2, 16, dst, dst_stride, sse);
-}
-
-void vp9_get32x32var_neon(const uint8_t *src_ptr, int source_stride,
- const uint8_t *ref_ptr, int ref_stride,
- unsigned int *sse, int *sum) {
- variance_neon_w8(src_ptr, source_stride, ref_ptr, ref_stride, 32,
- 32, sse, sum);
-}
-
-unsigned int vp9_variance32x32_neon(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- unsigned int *sse) {
- int sum;
- variance_neon_w8(a, a_stride, b, b_stride, 32, 32, sse, &sum);
- return *sse - (((int64_t)sum * sum) >> 10); // >> 10 = / 32 * 32
-}
-
-unsigned int vp9_variance32x64_neon(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- unsigned int *sse) {
- int sum1, sum2;
- uint32_t sse1, sse2;
- variance_neon_w8(a, a_stride, b, b_stride, 32, 32, &sse1, &sum1);
- variance_neon_w8(a + (32 * a_stride), a_stride,
- b + (32 * b_stride), b_stride, 32, 32,
- &sse2, &sum2);
- *sse = sse1 + sse2;
- sum1 += sum2;
- return *sse - (((int64_t)sum1 * sum1) >> 11); // >> 11 = / 32 * 64
-}
-
-unsigned int vp9_variance64x32_neon(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- unsigned int *sse) {
- int sum1, sum2;
- uint32_t sse1, sse2;
- variance_neon_w8(a, a_stride, b, b_stride, 64, 16, &sse1, &sum1);
- variance_neon_w8(a + (16 * a_stride), a_stride,
- b + (16 * b_stride), b_stride, 64, 16,
- &sse2, &sum2);
- *sse = sse1 + sse2;
- sum1 += sum2;
- return *sse - (((int64_t)sum1 * sum1) >> 11); // >> 11 = / 32 * 64
-}
-
-unsigned int vp9_variance64x64_neon(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- unsigned int *sse) {
- int sum1, sum2;
- uint32_t sse1, sse2;
-
- variance_neon_w8(a, a_stride, b, b_stride, 64, 16, &sse1, &sum1);
- variance_neon_w8(a + (16 * a_stride), a_stride,
- b + (16 * b_stride), b_stride, 64, 16,
- &sse2, &sum2);
- sse1 += sse2;
- sum1 += sum2;
-
- variance_neon_w8(a + (16 * 2 * a_stride), a_stride,
- b + (16 * 2 * b_stride), b_stride,
- 64, 16, &sse2, &sum2);
- sse1 += sse2;
- sum1 += sum2;
-
- variance_neon_w8(a + (16 * 3 * a_stride), a_stride,
- b + (16 * 3 * b_stride), b_stride,
- 64, 16, &sse2, &sum2);
- *sse = sse1 + sse2;
- sum1 += sum2;
- return *sse - (((int64_t)sum1 * sum1) >> 12); // >> 12 = / 64 * 64
+ 16, bilinear_filters[yoffset]);
+ return vpx_variance16x16_neon(temp2, 16, dst, dst_stride, sse);
}
unsigned int vp9_sub_pixel_variance32x32_neon(const uint8_t *src,
@@ -265,10 +128,10 @@ unsigned int vp9_sub_pixel_variance32x32_neon(const uint8_t *src,
var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
33, 32,
- BILINEAR_FILTERS_2TAP(xoffset));
+ bilinear_filters[xoffset]);
var_filter_block2d_bil_w16(fdata3, temp2, 32, 32, 32,
- 32, BILINEAR_FILTERS_2TAP(yoffset));
- return vp9_variance32x32_neon(temp2, 32, dst, dst_stride, sse);
+ 32, bilinear_filters[yoffset]);
+ return vpx_variance32x32_neon(temp2, 32, dst, dst_stride, sse);
}
unsigned int vp9_sub_pixel_variance64x64_neon(const uint8_t *src,
@@ -283,8 +146,8 @@ unsigned int vp9_sub_pixel_variance64x64_neon(const uint8_t *src,
var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
65, 64,
- BILINEAR_FILTERS_2TAP(xoffset));
+ bilinear_filters[xoffset]);
var_filter_block2d_bil_w16(fdata3, temp2, 64, 64, 64,
- 64, BILINEAR_FILTERS_2TAP(yoffset));
- return vp9_variance64x64_neon(temp2, 64, dst, dst_stride, sse);
+ 64, bilinear_filters[yoffset]);
+ return vpx_variance64x64_neon(temp2, 64, dst, dst_stride, sse);
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_avg_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_avg_msa.c
new file mode 100644
index 00000000000..f2e8b275a6a
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_avg_msa.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/mips/msa/vp9_macros_msa.h"
+
+uint32_t vp9_avg_8x8_msa(const uint8_t *src, int32_t src_stride) {
+ uint32_t sum_out;
+ v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+ v8u16 sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7;
+ v4u32 sum = { 0 };
+
+ LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+ HADD_UB4_UH(src0, src1, src2, src3, sum0, sum1, sum2, sum3);
+ HADD_UB4_UH(src4, src5, src6, src7, sum4, sum5, sum6, sum7);
+ ADD4(sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum0, sum2, sum4, sum6);
+ ADD2(sum0, sum2, sum4, sum6, sum0, sum4);
+ sum0 += sum4;
+
+ sum = __msa_hadd_u_w(sum0, sum0);
+ sum0 = (v8u16)__msa_pckev_h((v8i16)sum, (v8i16)sum);
+ sum = __msa_hadd_u_w(sum0, sum0);
+ sum = (v4u32)__msa_srari_w((v4i32)sum, 6);
+ sum_out = __msa_copy_u_w((v4i32)sum, 0);
+
+ return sum_out;
+}
+
+uint32_t vp9_avg_4x4_msa(const uint8_t *src, int32_t src_stride) {
+ uint32_t sum_out;
+ uint32_t src0, src1, src2, src3;
+ v16u8 vec = { 0 };
+ v8u16 sum0;
+ v4u32 sum1;
+ v2u64 sum2;
+
+ LW4(src, src_stride, src0, src1, src2, src3);
+ INSERT_W4_UB(src0, src1, src2, src3, vec);
+
+ sum0 = __msa_hadd_u_h(vec, vec);
+ sum1 = __msa_hadd_u_w(sum0, sum0);
+ sum0 = (v8u16)__msa_pckev_h((v8i16)sum1, (v8i16)sum1);
+ sum1 = __msa_hadd_u_w(sum0, sum0);
+ sum2 = __msa_hadd_u_d(sum1, sum1);
+ sum1 = (v4u32)__msa_srari_w((v4i32)sum2, 4);
+ sum_out = __msa_copy_u_w((v4i32)sum1, 0);
+
+ return sum_out;
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_error_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_error_msa.c
new file mode 100644
index 00000000000..9709092fcce
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_error_msa.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/mips/msa/vp9_macros_msa.h"
+
+#define BLOCK_ERROR_BLOCKSIZE_MSA(BSize) \
+static int64_t block_error_##BSize##size_msa(const int16_t *coeff_ptr, \
+ const int16_t *dq_coeff_ptr, \
+ int64_t *ssz) { \
+ int64_t err = 0; \
+ uint32_t loop_cnt; \
+ v8i16 coeff, dq_coeff, coeff_r_h, coeff_l_h; \
+ v4i32 diff_r, diff_l, coeff_r_w, coeff_l_w; \
+ v2i64 sq_coeff_r, sq_coeff_l; \
+ v2i64 err0, err_dup0, err1, err_dup1; \
+ \
+ coeff = LD_SH(coeff_ptr); \
+ dq_coeff = LD_SH(dq_coeff_ptr); \
+ UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
+ ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
+ HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
+ DOTP_SW2_SD(coeff_r_w, coeff_l_w, coeff_r_w, coeff_l_w, \
+ sq_coeff_r, sq_coeff_l); \
+ DOTP_SW2_SD(diff_r, diff_l, diff_r, diff_l, err0, err1); \
+ \
+ coeff = LD_SH(coeff_ptr + 8); \
+ dq_coeff = LD_SH(dq_coeff_ptr + 8); \
+ UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
+ ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
+ HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
+ DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
+ DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
+ \
+ coeff_ptr += 16; \
+ dq_coeff_ptr += 16; \
+ \
+ for (loop_cnt = ((BSize >> 4) - 1); loop_cnt--;) { \
+ coeff = LD_SH(coeff_ptr); \
+ dq_coeff = LD_SH(dq_coeff_ptr); \
+ UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
+ ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
+ HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
+ DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
+ DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
+ \
+ coeff = LD_SH(coeff_ptr + 8); \
+ dq_coeff = LD_SH(dq_coeff_ptr + 8); \
+ UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \
+ ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \
+ HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \
+ DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \
+ DPADD_SD2_SD(diff_r, diff_l, err0, err1); \
+ \
+ coeff_ptr += 16; \
+ dq_coeff_ptr += 16; \
+ } \
+ \
+ err_dup0 = __msa_splati_d(sq_coeff_r, 1); \
+ err_dup1 = __msa_splati_d(sq_coeff_l, 1); \
+ sq_coeff_r += err_dup0; \
+ sq_coeff_l += err_dup1; \
+ *ssz = __msa_copy_s_d(sq_coeff_r, 0); \
+ *ssz += __msa_copy_s_d(sq_coeff_l, 0); \
+ \
+ err_dup0 = __msa_splati_d(err0, 1); \
+ err_dup1 = __msa_splati_d(err1, 1); \
+ err0 += err_dup0; \
+ err1 += err_dup1; \
+ err = __msa_copy_s_d(err0, 0); \
+ err += __msa_copy_s_d(err1, 0); \
+ \
+ return err; \
+}
+
+BLOCK_ERROR_BLOCKSIZE_MSA(16);
+BLOCK_ERROR_BLOCKSIZE_MSA(64);
+BLOCK_ERROR_BLOCKSIZE_MSA(256);
+BLOCK_ERROR_BLOCKSIZE_MSA(1024);
+
+int64_t vp9_block_error_msa(const tran_low_t *coeff_ptr,
+ const tran_low_t *dq_coeff_ptr,
+ intptr_t blk_size, int64_t *ssz) {
+ int64_t err;
+ const int16_t *coeff = (const int16_t *)coeff_ptr;
+ const int16_t *dq_coeff = (const int16_t *)dq_coeff_ptr;
+
+ switch (blk_size) {
+ case 16:
+ err = block_error_16size_msa(coeff, dq_coeff, ssz);
+ break;
+ case 64:
+ err = block_error_64size_msa(coeff, dq_coeff, ssz);
+ break;
+ case 256:
+ err = block_error_256size_msa(coeff, dq_coeff, ssz);
+ break;
+ case 1024:
+ err = block_error_1024size_msa(coeff, dq_coeff, ssz);
+ break;
+ default:
+ err = vp9_block_error_c(coeff_ptr, dq_coeff_ptr, blk_size, ssz);
+ break;
+ }
+
+ return err;
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct16x16_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct16x16_msa.c
new file mode 100644
index 00000000000..a3ebfab1f7d
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct16x16_msa.c
@@ -0,0 +1,688 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "./vp9_rtcd.h"
+#include "vp9/encoder/mips/msa/vp9_fdct_msa.h"
+
+static void fdct8x16_1d_column(const int16_t *input, int16_t *tmp_ptr,
+ int32_t src_stride) {
+ v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+ v8i16 in8, in9, in10, in11, in12, in13, in14, in15;
+ v8i16 stp21, stp22, stp23, stp24, stp25, stp26, stp30;
+ v8i16 stp31, stp32, stp33, stp34, stp35, stp36, stp37;
+ v8i16 vec0, vec1, vec2, vec3, vec4, vec5, cnst0, cnst1, cnst4, cnst5;
+ v8i16 coeff = { cospi_16_64, -cospi_16_64, cospi_8_64, cospi_24_64,
+ -cospi_8_64, -cospi_24_64, cospi_12_64, cospi_20_64 };
+ v8i16 coeff1 = { cospi_2_64, cospi_30_64, cospi_14_64, cospi_18_64,
+ cospi_10_64, cospi_22_64, cospi_6_64, cospi_26_64 };
+ v8i16 coeff2 = { -cospi_2_64, -cospi_10_64, -cospi_18_64, -cospi_26_64,
+ 0, 0, 0, 0 };
+
+ LD_SH16(input, src_stride,
+ in0, in1, in2, in3, in4, in5, in6, in7,
+ in8, in9, in10, in11, in12, in13, in14, in15);
+ SLLI_4V(in0, in1, in2, in3, 2);
+ SLLI_4V(in4, in5, in6, in7, 2);
+ SLLI_4V(in8, in9, in10, in11, 2);
+ SLLI_4V(in12, in13, in14, in15, 2);
+ ADD4(in0, in15, in1, in14, in2, in13, in3, in12, tmp0, tmp1, tmp2, tmp3);
+ ADD4(in4, in11, in5, in10, in6, in9, in7, in8, tmp4, tmp5, tmp6, tmp7);
+ VP9_FDCT8x16_EVEN(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7,
+ tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
+ ST_SH8(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp_ptr, 32);
+ SUB4(in0, in15, in1, in14, in2, in13, in3, in12, in15, in14, in13, in12);
+ SUB4(in4, in11, in5, in10, in6, in9, in7, in8, in11, in10, in9, in8);
+
+ tmp_ptr += 16;
+
+ /* stp 1 */
+ ILVL_H2_SH(in10, in13, in11, in12, vec2, vec4);
+ ILVR_H2_SH(in10, in13, in11, in12, vec3, vec5);
+
+ cnst4 = __msa_splati_h(coeff, 0);
+ stp25 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec2, vec3, cnst4);
+
+ cnst5 = __msa_splati_h(coeff, 1);
+ cnst5 = __msa_ilvev_h(cnst5, cnst4);
+ stp22 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec2, vec3, cnst5);
+ stp24 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec4, vec5, cnst4);
+ stp23 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec4, vec5, cnst5);
+
+ /* stp2 */
+ BUTTERFLY_4(in8, in9, stp22, stp23, stp30, stp31, stp32, stp33);
+ BUTTERFLY_4(in15, in14, stp25, stp24, stp37, stp36, stp35, stp34);
+ ILVL_H2_SH(stp36, stp31, stp35, stp32, vec2, vec4);
+ ILVR_H2_SH(stp36, stp31, stp35, stp32, vec3, vec5);
+ SPLATI_H2_SH(coeff, 2, 3, cnst0, cnst1);
+ cnst0 = __msa_ilvev_h(cnst0, cnst1);
+ stp26 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec2, vec3, cnst0);
+
+ cnst0 = __msa_splati_h(coeff, 4);
+ cnst1 = __msa_ilvev_h(cnst1, cnst0);
+ stp21 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec2, vec3, cnst1);
+
+ BUTTERFLY_4(stp30, stp37, stp26, stp21, in8, in15, in14, in9);
+ ILVRL_H2_SH(in15, in8, vec1, vec0);
+ SPLATI_H2_SH(coeff1, 0, 1, cnst0, cnst1);
+ cnst0 = __msa_ilvev_h(cnst0, cnst1);
+
+ in8 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0, vec1, cnst0);
+ ST_SH(in8, tmp_ptr);
+
+ cnst0 = __msa_splati_h(coeff2, 0);
+ cnst0 = __msa_ilvev_h(cnst1, cnst0);
+ in8 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0, vec1, cnst0);
+ ST_SH(in8, tmp_ptr + 224);
+
+ ILVRL_H2_SH(in14, in9, vec1, vec0);
+ SPLATI_H2_SH(coeff1, 2, 3, cnst0, cnst1);
+ cnst1 = __msa_ilvev_h(cnst1, cnst0);
+
+ in8 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0, vec1, cnst1);
+ ST_SH(in8, tmp_ptr + 128);
+
+ cnst1 = __msa_splati_h(coeff2, 2);
+ cnst0 = __msa_ilvev_h(cnst0, cnst1);
+ in8 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0, vec1, cnst0);
+ ST_SH(in8, tmp_ptr + 96);
+
+ SPLATI_H2_SH(coeff, 2, 5, cnst0, cnst1);
+ cnst1 = __msa_ilvev_h(cnst1, cnst0);
+
+ stp25 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec4, vec5, cnst1);
+
+ cnst1 = __msa_splati_h(coeff, 3);
+ cnst1 = __msa_ilvev_h(cnst0, cnst1);
+ stp22 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec4, vec5, cnst1);
+
+ /* stp4 */
+ ADD2(stp34, stp25, stp33, stp22, in13, in10);
+
+ ILVRL_H2_SH(in13, in10, vec1, vec0);
+ SPLATI_H2_SH(coeff1, 4, 5, cnst0, cnst1);
+ cnst0 = __msa_ilvev_h(cnst0, cnst1);
+ in8 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0, vec1, cnst0);
+ ST_SH(in8, tmp_ptr + 64);
+
+ cnst0 = __msa_splati_h(coeff2, 1);
+ cnst0 = __msa_ilvev_h(cnst1, cnst0);
+ in8 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0, vec1, cnst0);
+ ST_SH(in8, tmp_ptr + 160);
+
+ SUB2(stp34, stp25, stp33, stp22, in12, in11);
+ ILVRL_H2_SH(in12, in11, vec1, vec0);
+ SPLATI_H2_SH(coeff1, 6, 7, cnst0, cnst1);
+ cnst1 = __msa_ilvev_h(cnst1, cnst0);
+
+ in8 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0, vec1, cnst1);
+ ST_SH(in8, tmp_ptr + 192);
+
+ cnst1 = __msa_splati_h(coeff2, 3);
+ cnst0 = __msa_ilvev_h(cnst0, cnst1);
+ in8 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0, vec1, cnst0);
+ ST_SH(in8, tmp_ptr + 32);
+}
+
+static void fdct16x8_1d_row(int16_t *input, int16_t *output) {
+ v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+ v8i16 in8, in9, in10, in11, in12, in13, in14, in15;
+
+ LD_SH8(input, 16, in0, in1, in2, in3, in4, in5, in6, in7);
+ LD_SH8((input + 8), 16, in8, in9, in10, in11, in12, in13, in14, in15);
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ TRANSPOSE8x8_SH_SH(in8, in9, in10, in11, in12, in13, in14, in15,
+ in8, in9, in10, in11, in12, in13, in14, in15);
+ ADD4(in0, 1, in1, 1, in2, 1, in3, 1, in0, in1, in2, in3);
+ ADD4(in4, 1, in5, 1, in6, 1, in7, 1, in4, in5, in6, in7);
+ ADD4(in8, 1, in9, 1, in10, 1, in11, 1, in8, in9, in10, in11);
+ ADD4(in12, 1, in13, 1, in14, 1, in15, 1, in12, in13, in14, in15);
+ SRA_4V(in0, in1, in2, in3, 2);
+ SRA_4V(in4, in5, in6, in7, 2);
+ SRA_4V(in8, in9, in10, in11, 2);
+ SRA_4V(in12, in13, in14, in15, 2);
+ BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11,
+ in12, in13, in14, in15, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5,
+ tmp6, tmp7, in8, in9, in10, in11, in12, in13, in14, in15);
+ ST_SH8(in8, in9, in10, in11, in12, in13, in14, in15, input, 16);
+ VP9_FDCT8x16_EVEN(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7,
+ tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
+ LD_SH8(input, 16, in8, in9, in10, in11, in12, in13, in14, in15);
+ VP9_FDCT8x16_ODD(in8, in9, in10, in11, in12, in13, in14, in15,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ TRANSPOSE8x8_SH_SH(tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3,
+ tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3);
+ ST_SH8(tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3, output, 16);
+ TRANSPOSE8x8_SH_SH(tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7,
+ tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7);
+ ST_SH8(tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7, output + 8, 16);
+}
+
+void vp9_fdct16x16_msa(const int16_t *input, int16_t *output,
+ int32_t src_stride) {
+ int32_t i;
+ DECLARE_ALIGNED(32, int16_t, tmp_buf[16 * 16]);
+
+ /* column transform */
+ for (i = 0; i < 2; ++i) {
+ fdct8x16_1d_column((input + 8 * i), (&tmp_buf[0] + 8 * i), src_stride);
+ }
+
+ /* row transform */
+ for (i = 0; i < 2; ++i) {
+ fdct16x8_1d_row((&tmp_buf[0] + (128 * i)), (output + (128 * i)));
+ }
+}
+
+void vp9_fdct16x16_1_msa(const int16_t *input, int16_t *out, int32_t stride) {
+ out[1] = 0;
+
+ out[0] = VP9_LD_HADD(input, stride);
+ out[0] += VP9_LD_HADD(input + 8, stride);
+ out[0] += VP9_LD_HADD(input + 16 * 8, stride);
+ out[0] += VP9_LD_HADD(input + 16 * 8 + 8, stride);
+ out[0] >>= 1;
+}
+
+static void fadst16_cols_step1_msa(const int16_t *input, int32_t stride,
+ const int32_t *const0, int16_t *int_buf) {
+ v8i16 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15;
+ v8i16 tp0, tp1, tp2, tp3, g0, g1, g2, g3, g8, g9, g10, g11, h0, h1, h2, h3;
+ v4i32 k0, k1, k2, k3;
+
+ /* load input data */
+ r0 = LD_SH(input);
+ r15 = LD_SH(input + 15 * stride);
+ r7 = LD_SH(input + 7 * stride);
+ r8 = LD_SH(input + 8 * stride);
+ SLLI_4V(r0, r15, r7, r8, 2);
+
+ /* stage 1 */
+ LD_SW2(const0, 4, k0, k1);
+ LD_SW2(const0 + 8, 4, k2, k3);
+ VP9_MADD_BF(r15, r0, r7, r8, k0, k1, k2, k3, g0, g1, g2, g3);
+
+ r3 = LD_SH(input + 3 * stride);
+ r4 = LD_SH(input + 4 * stride);
+ r11 = LD_SH(input + 11 * stride);
+ r12 = LD_SH(input + 12 * stride);
+ SLLI_4V(r3, r4, r11, r12, 2);
+
+ LD_SW2(const0 + 4 * 4, 4, k0, k1);
+ LD_SW2(const0 + 4 * 6, 4, k2, k3);
+ VP9_MADD_BF(r11, r4, r3, r12, k0, k1, k2, k3, g8, g9, g10, g11);
+
+ /* stage 2 */
+ BUTTERFLY_4(g0, g2, g10, g8, tp0, tp2, tp3, tp1);
+ ST_SH2(tp0, tp2, int_buf, 8);
+ ST_SH2(tp1, tp3, int_buf + 4 * 8, 8);
+
+ LD_SW2(const0 + 4 * 8, 4, k0, k1);
+ k2 = LD_SW(const0 + 4 * 10);
+ VP9_MADD_BF(g1, g3, g9, g11, k0, k1, k2, k0, h0, h1, h2, h3);
+
+ ST_SH2(h0, h1, int_buf + 8 * 8, 8);
+ ST_SH2(h3, h2, int_buf + 12 * 8, 8);
+
+ r9 = LD_SH(input + 9 * stride);
+ r6 = LD_SH(input + 6 * stride);
+ r1 = LD_SH(input + stride);
+ r14 = LD_SH(input + 14 * stride);
+ SLLI_4V(r9, r6, r1, r14, 2);
+
+ LD_SW2(const0 + 4 * 11, 4, k0, k1);
+ LD_SW2(const0 + 4 * 13, 4, k2, k3);
+ VP9_MADD_BF(r9, r6, r1, r14, k0, k1, k2, k3, g0, g1, g2, g3);
+
+ ST_SH2(g1, g3, int_buf + 3 * 8, 4 * 8);
+
+ r13 = LD_SH(input + 13 * stride);
+ r2 = LD_SH(input + 2 * stride);
+ r5 = LD_SH(input + 5 * stride);
+ r10 = LD_SH(input + 10 * stride);
+ SLLI_4V(r13, r2, r5, r10, 2);
+
+ LD_SW2(const0 + 4 * 15, 4, k0, k1);
+ LD_SW2(const0 + 4 * 17, 4, k2, k3);
+ VP9_MADD_BF(r13, r2, r5, r10, k0, k1, k2, k3, h0, h1, h2, h3);
+
+ ST_SH2(h1, h3, int_buf + 11 * 8, 4 * 8);
+
+ BUTTERFLY_4(h0, h2, g2, g0, tp0, tp1, tp2, tp3);
+ ST_SH4(tp0, tp1, tp2, tp3, int_buf + 2 * 8, 4 * 8);
+}
+
+static void fadst16_cols_step2_msa(int16_t *int_buf, const int32_t *const0,
+ int16_t *out) {
+ int16_t *out_ptr = out + 128;
+ v8i16 tp0, tp1, tp2, tp3, g5, g7, g13, g15;
+ v8i16 h0, h1, h2, h3, h4, h5, h6, h7, h10, h11;
+ v8i16 out0, out1, out2, out3, out4, out5, out6, out7;
+ v8i16 out8, out9, out10, out11, out12, out13, out14, out15;
+ v4i32 k0, k1, k2, k3;
+
+ LD_SH2(int_buf + 3 * 8, 4 * 8, g13, g15);
+ LD_SH2(int_buf + 11 * 8, 4 * 8, g5, g7);
+ LD_SW2(const0 + 4 * 19, 4, k0, k1);
+ k2 = LD_SW(const0 + 4 * 21);
+ VP9_MADD_BF(g7, g5, g15, g13, k0, k1, k2, k0, h4, h5, h6, h7);
+
+ tp0 = LD_SH(int_buf + 4 * 8);
+ tp1 = LD_SH(int_buf + 5 * 8);
+ tp3 = LD_SH(int_buf + 10 * 8);
+ tp2 = LD_SH(int_buf + 14 * 8);
+ LD_SW2(const0 + 4 * 22, 4, k0, k1);
+ k2 = LD_SW(const0 + 4 * 24);
+ VP9_MADD_BF(tp0, tp1, tp2, tp3, k0, k1, k2, k0, out4, out6, out5, out7);
+ out4 = -out4;
+ ST_SH(out4, (out + 3 * 16));
+ ST_SH(out5, (out_ptr + 4 * 16));
+
+ h1 = LD_SH(int_buf + 9 * 8);
+ h3 = LD_SH(int_buf + 12 * 8);
+ VP9_MADD_BF(h1, h3, h5, h7, k0, k1, k2, k0, out12, out14, out13, out15);
+ out13 = -out13;
+ ST_SH(out12, (out + 2 * 16));
+ ST_SH(out13, (out_ptr + 5 * 16));
+
+ tp0 = LD_SH(int_buf);
+ tp1 = LD_SH(int_buf + 8);
+ tp2 = LD_SH(int_buf + 2 * 8);
+ tp3 = LD_SH(int_buf + 6 * 8);
+
+ BUTTERFLY_4(tp0, tp1, tp3, tp2, out0, out1, h11, h10);
+ out1 = -out1;
+ ST_SH(out0, (out));
+ ST_SH(out1, (out_ptr + 7 * 16));
+
+ h0 = LD_SH(int_buf + 8 * 8);
+ h2 = LD_SH(int_buf + 13 * 8);
+
+ BUTTERFLY_4(h0, h2, h6, h4, out8, out9, out11, out10);
+ out8 = -out8;
+ ST_SH(out8, (out + 16));
+ ST_SH(out9, (out_ptr + 6 * 16));
+
+ /* stage 4 */
+ LD_SW2(const0 + 4 * 25, 4, k0, k1);
+ LD_SW2(const0 + 4 * 27, 4, k2, k3);
+ VP9_MADD_SHORT(h10, h11, k1, k2, out2, out3);
+ ST_SH(out2, (out + 7 * 16));
+ ST_SH(out3, (out_ptr));
+
+ VP9_MADD_SHORT(out6, out7, k0, k3, out6, out7);
+ ST_SH(out6, (out + 4 * 16));
+ ST_SH(out7, (out_ptr + 3 * 16));
+
+ VP9_MADD_SHORT(out10, out11, k0, k3, out10, out11);
+ ST_SH(out10, (out + 6 * 16));
+ ST_SH(out11, (out_ptr + 16));
+
+ VP9_MADD_SHORT(out14, out15, k1, k2, out14, out15);
+ ST_SH(out14, (out + 5 * 16));
+ ST_SH(out15, (out_ptr + 2 * 16));
+}
+
+static void fadst16_transpose_postproc_msa(int16_t *input, int16_t *out) {
+ v8i16 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15;
+ v8i16 l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14, l15;
+
+ /* load input data */
+ LD_SH8(input, 16, l0, l1, l2, l3, l4, l5, l6, l7);
+ TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7,
+ r0, r1, r2, r3, r4, r5, r6, r7);
+ VP9_FDCT_POSTPROC_2V_NEG_H(r0, r1);
+ VP9_FDCT_POSTPROC_2V_NEG_H(r2, r3);
+ VP9_FDCT_POSTPROC_2V_NEG_H(r4, r5);
+ VP9_FDCT_POSTPROC_2V_NEG_H(r6, r7);
+ ST_SH8(r0, r1, r2, r3, r4, r5, r6, r7, out, 8);
+ out += 64;
+
+ LD_SH8(input + 8, 16, l8, l9, l10, l11, l12, l13, l14, l15);
+ TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15,
+ r8, r9, r10, r11, r12, r13, r14, r15);
+ VP9_FDCT_POSTPROC_2V_NEG_H(r8, r9);
+ VP9_FDCT_POSTPROC_2V_NEG_H(r10, r11);
+ VP9_FDCT_POSTPROC_2V_NEG_H(r12, r13);
+ VP9_FDCT_POSTPROC_2V_NEG_H(r14, r15);
+ ST_SH8(r8, r9, r10, r11, r12, r13, r14, r15, out, 8);
+ out += 64;
+
+ /* load input data */
+ input += 128;
+ LD_SH8(input, 16, l0, l1, l2, l3, l4, l5, l6, l7);
+ TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7,
+ r0, r1, r2, r3, r4, r5, r6, r7);
+ VP9_FDCT_POSTPROC_2V_NEG_H(r0, r1);
+ VP9_FDCT_POSTPROC_2V_NEG_H(r2, r3);
+ VP9_FDCT_POSTPROC_2V_NEG_H(r4, r5);
+ VP9_FDCT_POSTPROC_2V_NEG_H(r6, r7);
+ ST_SH8(r0, r1, r2, r3, r4, r5, r6, r7, out, 8);
+ out += 64;
+
+ LD_SH8(input + 8, 16, l8, l9, l10, l11, l12, l13, l14, l15);
+ TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15,
+ r8, r9, r10, r11, r12, r13, r14, r15);
+ VP9_FDCT_POSTPROC_2V_NEG_H(r8, r9);
+ VP9_FDCT_POSTPROC_2V_NEG_H(r10, r11);
+ VP9_FDCT_POSTPROC_2V_NEG_H(r12, r13);
+ VP9_FDCT_POSTPROC_2V_NEG_H(r14, r15);
+ ST_SH8(r8, r9, r10, r11, r12, r13, r14, r15, out, 8);
+}
+
+static void fadst16_rows_step1_msa(int16_t *input, const int32_t *const0,
+ int16_t *int_buf) {
+ v8i16 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15;
+ v8i16 tp0, tp1, tp2, tp3, g0, g1, g2, g3, g8, g9, g10, g11, h0, h1, h2, h3;
+ v4i32 k0, k1, k2, k3;
+
+ /* load input data */
+ r0 = LD_SH(input);
+ r7 = LD_SH(input + 7 * 8);
+ r8 = LD_SH(input + 8 * 8);
+ r15 = LD_SH(input + 15 * 8);
+
+ /* stage 1 */
+ LD_SW2(const0, 4, k0, k1);
+ LD_SW2(const0 + 4 * 2, 4, k2, k3);
+ VP9_MADD_BF(r15, r0, r7, r8, k0, k1, k2, k3, g0, g1, g2, g3);
+
+ r3 = LD_SH(input + 3 * 8);
+ r4 = LD_SH(input + 4 * 8);
+ r11 = LD_SH(input + 11 * 8);
+ r12 = LD_SH(input + 12 * 8);
+
+ LD_SW2(const0 + 4 * 4, 4, k0, k1);
+ LD_SW2(const0 + 4 * 6, 4, k2, k3);
+ VP9_MADD_BF(r11, r4, r3, r12, k0, k1, k2, k3, g8, g9, g10, g11);
+
+ /* stage 2 */
+ BUTTERFLY_4(g0, g2, g10, g8, tp0, tp2, tp3, tp1);
+ ST_SH2(tp0, tp1, int_buf, 4 * 8);
+ ST_SH2(tp2, tp3, int_buf + 8, 4 * 8);
+
+ LD_SW2(const0 + 4 * 8, 4, k0, k1);
+ k2 = LD_SW(const0 + 4 * 10);
+ VP9_MADD_BF(g1, g3, g9, g11, k0, k1, k2, k0, h0, h1, h2, h3);
+ ST_SH2(h0, h3, int_buf + 8 * 8, 4 * 8);
+ ST_SH2(h1, h2, int_buf + 9 * 8, 4 * 8);
+
+ r1 = LD_SH(input + 8);
+ r6 = LD_SH(input + 6 * 8);
+ r9 = LD_SH(input + 9 * 8);
+ r14 = LD_SH(input + 14 * 8);
+
+ LD_SW2(const0 + 4 * 11, 4, k0, k1);
+ LD_SW2(const0 + 4 * 13, 4, k2, k3);
+ VP9_MADD_BF(r9, r6, r1, r14, k0, k1, k2, k3, g0, g1, g2, g3);
+ ST_SH2(g1, g3, int_buf + 3 * 8, 4 * 8);
+
+ r2 = LD_SH(input + 2 * 8);
+ r5 = LD_SH(input + 5 * 8);
+ r10 = LD_SH(input + 10 * 8);
+ r13 = LD_SH(input + 13 * 8);
+
+ LD_SW2(const0 + 4 * 15, 4, k0, k1);
+ LD_SW2(const0 + 4 * 17, 4, k2, k3);
+ VP9_MADD_BF(r13, r2, r5, r10, k0, k1, k2, k3, h0, h1, h2, h3);
+ ST_SH2(h1, h3, int_buf + 11 * 8, 4 * 8);
+ BUTTERFLY_4(h0, h2, g2, g0, tp0, tp1, tp2, tp3);
+ ST_SH4(tp0, tp1, tp2, tp3, int_buf + 2 * 8, 4 * 8);
+}
+
+static void fadst16_rows_step2_msa(int16_t *int_buf, const int32_t *const0,
+ int16_t *out) {
+ int16_t *out_ptr = out + 8;
+ v8i16 tp0, tp1, tp2, tp3, g5, g7, g13, g15;
+ v8i16 h0, h1, h2, h3, h4, h5, h6, h7, h10, h11;
+ v8i16 out0, out1, out2, out3, out4, out5, out6, out7;
+ v8i16 out8, out9, out10, out11, out12, out13, out14, out15;
+ v4i32 k0, k1, k2, k3;
+
+ g13 = LD_SH(int_buf + 3 * 8);
+ g15 = LD_SH(int_buf + 7 * 8);
+ g5 = LD_SH(int_buf + 11 * 8);
+ g7 = LD_SH(int_buf + 15 * 8);
+
+ LD_SW2(const0 + 4 * 19, 4, k0, k1);
+ k2 = LD_SW(const0 + 4 * 21);
+ VP9_MADD_BF(g7, g5, g15, g13, k0, k1, k2, k0, h4, h5, h6, h7);
+
+ tp0 = LD_SH(int_buf + 4 * 8);
+ tp1 = LD_SH(int_buf + 5 * 8);
+ tp3 = LD_SH(int_buf + 10 * 8);
+ tp2 = LD_SH(int_buf + 14 * 8);
+
+ LD_SW2(const0 + 4 * 22, 4, k0, k1);
+ k2 = LD_SW(const0 + 4 * 24);
+ VP9_MADD_BF(tp0, tp1, tp2, tp3, k0, k1, k2, k0, out4, out6, out5, out7);
+ out4 = -out4;
+ ST_SH(out4, (out + 3 * 16));
+ ST_SH(out5, (out_ptr + 4 * 16));
+
+ h1 = LD_SH(int_buf + 9 * 8);
+ h3 = LD_SH(int_buf + 12 * 8);
+ VP9_MADD_BF(h1, h3, h5, h7, k0, k1, k2, k0, out12, out14, out13, out15);
+ out13 = -out13;
+ ST_SH(out12, (out + 2 * 16));
+ ST_SH(out13, (out_ptr + 5 * 16));
+
+ tp0 = LD_SH(int_buf);
+ tp1 = LD_SH(int_buf + 8);
+ tp2 = LD_SH(int_buf + 2 * 8);
+ tp3 = LD_SH(int_buf + 6 * 8);
+
+ BUTTERFLY_4(tp0, tp1, tp3, tp2, out0, out1, h11, h10);
+ out1 = -out1;
+ ST_SH(out0, (out));
+ ST_SH(out1, (out_ptr + 7 * 16));
+
+ h0 = LD_SH(int_buf + 8 * 8);
+ h2 = LD_SH(int_buf + 13 * 8);
+ BUTTERFLY_4(h0, h2, h6, h4, out8, out9, out11, out10);
+ out8 = -out8;
+ ST_SH(out8, (out + 16));
+ ST_SH(out9, (out_ptr + 6 * 16));
+
+ /* stage 4 */
+ LD_SW2(const0 + 4 * 25, 4, k0, k1);
+ LD_SW2(const0 + 4 * 27, 4, k2, k3);
+ VP9_MADD_SHORT(h10, h11, k1, k2, out2, out3);
+ ST_SH(out2, (out + 7 * 16));
+ ST_SH(out3, (out_ptr));
+
+ VP9_MADD_SHORT(out6, out7, k0, k3, out6, out7);
+ ST_SH(out6, (out + 4 * 16));
+ ST_SH(out7, (out_ptr + 3 * 16));
+
+ VP9_MADD_SHORT(out10, out11, k0, k3, out10, out11);
+ ST_SH(out10, (out + 6 * 16));
+ ST_SH(out11, (out_ptr + 16));
+
+ VP9_MADD_SHORT(out14, out15, k1, k2, out14, out15);
+ ST_SH(out14, (out + 5 * 16));
+ ST_SH(out15, (out_ptr + 2 * 16));
+}
+
+static void fadst16_transpose_msa(int16_t *input, int16_t *out) {
+ v8i16 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15;
+ v8i16 l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14, l15;
+
+ /* load input data */
+ LD_SH16(input, 8, l0, l8, l1, l9, l2, l10, l3, l11,
+ l4, l12, l5, l13, l6, l14, l7, l15);
+ TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7,
+ r0, r1, r2, r3, r4, r5, r6, r7);
+ TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15,
+ r8, r9, r10, r11, r12, r13, r14, r15);
+ ST_SH8(r0, r8, r1, r9, r2, r10, r3, r11, out, 8);
+ ST_SH8(r4, r12, r5, r13, r6, r14, r7, r15, (out + 64), 8);
+ out += 16 * 8;
+
+ /* load input data */
+ input += 128;
+ LD_SH16(input, 8, l0, l8, l1, l9, l2, l10, l3, l11,
+ l4, l12, l5, l13, l6, l14, l7, l15);
+ TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7,
+ r0, r1, r2, r3, r4, r5, r6, r7);
+ TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15,
+ r8, r9, r10, r11, r12, r13, r14, r15);
+ ST_SH8(r0, r8, r1, r9, r2, r10, r3, r11, out, 8);
+ ST_SH8(r4, r12, r5, r13, r6, r14, r7, r15, (out + 64), 8);
+}
+
+static void postproc_fdct16x8_1d_row(int16_t *intermediate, int16_t *output) {
+ int16_t *temp = intermediate;
+ int16_t *out = output;
+ v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ v8i16 in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11;
+ v8i16 in12, in13, in14, in15;
+
+ LD_SH8(temp, 16, in0, in1, in2, in3, in4, in5, in6, in7);
+ temp = intermediate + 8;
+ LD_SH8(temp, 16, in8, in9, in10, in11, in12, in13, in14, in15);
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ TRANSPOSE8x8_SH_SH(in8, in9, in10, in11, in12, in13, in14, in15,
+ in8, in9, in10, in11, in12, in13, in14, in15);
+ VP9_FDCT_POSTPROC_2V_NEG_H(in0, in1);
+ VP9_FDCT_POSTPROC_2V_NEG_H(in2, in3);
+ VP9_FDCT_POSTPROC_2V_NEG_H(in4, in5);
+ VP9_FDCT_POSTPROC_2V_NEG_H(in6, in7);
+ VP9_FDCT_POSTPROC_2V_NEG_H(in8, in9);
+ VP9_FDCT_POSTPROC_2V_NEG_H(in10, in11);
+ VP9_FDCT_POSTPROC_2V_NEG_H(in12, in13);
+ VP9_FDCT_POSTPROC_2V_NEG_H(in14, in15);
+ BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7,
+ in8, in9, in10, in11, in12, in13, in14, in15,
+ tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7,
+ in8, in9, in10, in11, in12, in13, in14, in15);
+ temp = intermediate;
+ ST_SH8(in8, in9, in10, in11, in12, in13, in14, in15, temp, 16);
+ VP9_FDCT8x16_EVEN(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7,
+ tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
+ temp = intermediate;
+ LD_SH8(temp, 16, in8, in9, in10, in11, in12, in13, in14, in15);
+ VP9_FDCT8x16_ODD(in8, in9, in10, in11, in12, in13, in14, in15,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ TRANSPOSE8x8_SH_SH(tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3,
+ tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3);
+ ST_SH8(tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3, out, 16);
+ TRANSPOSE8x8_SH_SH(tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7,
+ tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7);
+ out = output + 8;
+ ST_SH8(tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7, out, 16);
+}
+
+void vp9_fht16x16_msa(const int16_t *input, int16_t *output,
+ int32_t stride, int32_t tx_type) {
+ DECLARE_ALIGNED(32, int16_t, tmp[256]);
+ DECLARE_ALIGNED(32, int16_t, trans_buf[256]);
+ DECLARE_ALIGNED(32, int16_t, tmp_buf[128]);
+ int32_t i;
+ int16_t *ptmpbuf = &tmp_buf[0];
+ int16_t *trans = &trans_buf[0];
+ const int32_t const_arr[29 * 4] = {
+ 52707308, 52707308, 52707308, 52707308,
+ -1072430300, -1072430300, -1072430300, -1072430300,
+ 795618043, 795618043, 795618043, 795618043,
+ -721080468, -721080468, -721080468, -721080468,
+ 459094491, 459094491, 459094491, 459094491,
+ -970646691, -970646691, -970646691, -970646691,
+ 1010963856, 1010963856, 1010963856, 1010963856,
+ -361743294, -361743294, -361743294, -361743294,
+ 209469125, 209469125, 209469125, 209469125,
+ -1053094788, -1053094788, -1053094788, -1053094788,
+ 1053160324, 1053160324, 1053160324, 1053160324,
+ 639644520, 639644520, 639644520, 639644520,
+ -862444000, -862444000, -862444000, -862444000,
+ 1062144356, 1062144356, 1062144356, 1062144356,
+ -157532337, -157532337, -157532337, -157532337,
+ 260914709, 260914709, 260914709, 260914709,
+ -1041559667, -1041559667, -1041559667, -1041559667,
+ 920985831, 920985831, 920985831, 920985831,
+ -551995675, -551995675, -551995675, -551995675,
+ 596522295, 596522295, 596522295, 596522295,
+ 892853362, 892853362, 892853362, 892853362,
+ -892787826, -892787826, -892787826, -892787826,
+ 410925857, 410925857, 410925857, 410925857,
+ -992012162, -992012162, -992012162, -992012162,
+ 992077698, 992077698, 992077698, 992077698,
+ 759246145, 759246145, 759246145, 759246145,
+ -759180609, -759180609, -759180609, -759180609,
+ -759222975, -759222975, -759222975, -759222975,
+ 759288511, 759288511, 759288511, 759288511 };
+
+ switch (tx_type) {
+ case DCT_DCT:
+ /* column transform */
+ for (i = 0; i < 2; ++i) {
+ fdct8x16_1d_column(input + 8 * i, tmp + 8 * i, stride);
+ }
+
+ /* row transform */
+ for (i = 0; i < 2; ++i) {
+ fdct16x8_1d_row(tmp + (128 * i), output + (128 * i));
+ }
+ break;
+ case ADST_DCT:
+ /* column transform */
+ for (i = 0; i < 2; ++i) {
+ fadst16_cols_step1_msa(input + (i << 3), stride, const_arr, ptmpbuf);
+ fadst16_cols_step2_msa(ptmpbuf, const_arr, tmp + (i << 3));
+ }
+
+ /* row transform */
+ for (i = 0; i < 2; ++i) {
+ postproc_fdct16x8_1d_row(tmp + (128 * i), output + (128 * i));
+ }
+ break;
+ case DCT_ADST:
+ /* column transform */
+ for (i = 0; i < 2; ++i) {
+ fdct8x16_1d_column(input + 8 * i, tmp + 8 * i, stride);
+ }
+
+ fadst16_transpose_postproc_msa(tmp, trans);
+
+ /* row transform */
+ for (i = 0; i < 2; ++i) {
+ fadst16_rows_step1_msa(trans + (i << 7), const_arr, ptmpbuf);
+ fadst16_rows_step2_msa(ptmpbuf, const_arr, tmp + (i << 7));
+ }
+
+ fadst16_transpose_msa(tmp, output);
+ break;
+ case ADST_ADST:
+ /* column transform */
+ for (i = 0; i < 2; ++i) {
+ fadst16_cols_step1_msa(input + (i << 3), stride, const_arr, ptmpbuf);
+ fadst16_cols_step2_msa(ptmpbuf, const_arr, tmp + (i << 3));
+ }
+
+ fadst16_transpose_postproc_msa(tmp, trans);
+
+ /* row transform */
+ for (i = 0; i < 2; ++i) {
+ fadst16_rows_step1_msa(trans + (i << 7), const_arr, ptmpbuf);
+ fadst16_rows_step2_msa(ptmpbuf, const_arr, tmp + (i << 7));
+ }
+
+ fadst16_transpose_msa(tmp, output);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct32x32_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct32x32_msa.c
new file mode 100644
index 00000000000..3a740232228
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct32x32_msa.c
@@ -0,0 +1,956 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/encoder/mips/msa/vp9_fdct_msa.h"
+
+static void fdct8x32_1d_column_load_butterfly(const int16_t *input,
+ int32_t src_stride,
+ int16_t *temp_buff) {
+ v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+ v8i16 step0, step1, step2, step3;
+ v8i16 in0_1, in1_1, in2_1, in3_1, in4_1, in5_1, in6_1, in7_1;
+ v8i16 step0_1, step1_1, step2_1, step3_1;
+
+ /* 1st and 2nd set */
+ LD_SH4(input, src_stride, in0, in1, in2, in3);
+ LD_SH4(input + (28 * src_stride), src_stride, in4, in5, in6, in7);
+ LD_SH4(input + (4 * src_stride), src_stride, in0_1, in1_1, in2_1, in3_1);
+ LD_SH4(input + (24 * src_stride), src_stride, in4_1, in5_1, in6_1, in7_1);
+ SLLI_4V(in0, in1, in2, in3, 2);
+ SLLI_4V(in4, in5, in6, in7, 2);
+ SLLI_4V(in0_1, in1_1, in2_1, in3_1, 2);
+ SLLI_4V(in4_1, in5_1, in6_1, in7_1, 2);
+ BUTTERFLY_8(in0, in1, in2, in3, in4, in5, in6, in7,
+ step0, step1, step2, step3, in4, in5, in6, in7);
+ BUTTERFLY_8(in0_1, in1_1, in2_1, in3_1, in4_1, in5_1, in6_1, in7_1,
+ step0_1, step1_1, step2_1, step3_1, in4_1, in5_1, in6_1, in7_1);
+ ST_SH4(step0, step1, step2, step3, temp_buff, 8);
+ ST_SH4(in4, in5, in6, in7, temp_buff + (28 * 8), 8);
+ ST_SH4(step0_1, step1_1, step2_1, step3_1, temp_buff + (4 * 8), 8);
+ ST_SH4(in4_1, in5_1, in6_1, in7_1, temp_buff + (24 * 8), 8);
+
+ /* 3rd and 4th set */
+ LD_SH4(input + (8 * src_stride), src_stride, in0, in1, in2, in3);
+ LD_SH4(input + (20 * src_stride), src_stride, in4, in5, in6, in7);
+ LD_SH4(input + (12 * src_stride), src_stride, in0_1, in1_1, in2_1, in3_1);
+ LD_SH4(input + (16 * src_stride), src_stride, in4_1, in5_1, in6_1, in7_1);
+ SLLI_4V(in0, in1, in2, in3, 2);
+ SLLI_4V(in4, in5, in6, in7, 2);
+ SLLI_4V(in0_1, in1_1, in2_1, in3_1, 2);
+ SLLI_4V(in4_1, in5_1, in6_1, in7_1, 2);
+ BUTTERFLY_8(in0, in1, in2, in3, in4, in5, in6, in7,
+ step0, step1, step2, step3, in4, in5, in6, in7);
+ BUTTERFLY_8(in0_1, in1_1, in2_1, in3_1, in4_1, in5_1, in6_1, in7_1,
+ step0_1, step1_1, step2_1, step3_1, in4_1, in5_1, in6_1, in7_1);
+ ST_SH4(step0, step1, step2, step3, temp_buff + (8 * 8), 8);
+ ST_SH4(in4, in5, in6, in7, temp_buff + (20 * 8), 8);
+ ST_SH4(step0_1, step1_1, step2_1, step3_1, temp_buff + (12 * 8), 8);
+ ST_SH4(in4_1, in5_1, in6_1, in7_1, temp_buff + (15 * 8) + 8, 8);
+}
+
+static void fdct8x32_1d_column_even_store(int16_t *input, int16_t *temp) {
+ v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+ v8i16 in8, in9, in10, in11, in12, in13, in14, in15;
+ v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+ v8i16 temp0, temp1;
+
+ /* fdct even */
+ LD_SH4(input, 8, in0, in1, in2, in3);
+ LD_SH4(input + 96, 8, in12, in13, in14, in15);
+ BUTTERFLY_8(in0, in1, in2, in3, in12, in13, in14, in15,
+ vec0, vec1, vec2, vec3, in12, in13, in14, in15);
+ LD_SH4(input + 32, 8, in4, in5, in6, in7);
+ LD_SH4(input + 64, 8, in8, in9, in10, in11);
+ BUTTERFLY_8(in4, in5, in6, in7, in8, in9, in10, in11,
+ vec4, vec5, vec6, vec7, in8, in9, in10, in11);
+
+ /* Stage 3 */
+ ADD4(vec0, vec7, vec1, vec6, vec2, vec5, vec3, vec4, in0, in1, in2, in3);
+ BUTTERFLY_4(in0, in1, in2, in3, temp0, in4, in1, in0);
+ VP9_DOTP_CONST_PAIR(temp0, in4, cospi_16_64, cospi_16_64, temp1, temp0);
+ VP9_FDCT32_POSTPROC_2V_POS_H(temp0, temp1);
+ ST_SH(temp0, temp);
+ ST_SH(temp1, temp + 512);
+
+ VP9_DOTP_CONST_PAIR(in0, in1, cospi_24_64, cospi_8_64, temp1, temp0);
+ VP9_FDCT32_POSTPROC_2V_POS_H(temp0, temp1);
+ ST_SH(temp0, temp + 256);
+ ST_SH(temp1, temp + 768);
+
+ SUB4(vec0, vec7, vec1, vec6, vec2, vec5, vec3, vec4, vec7, vec6, vec5, vec4);
+ VP9_DOTP_CONST_PAIR(vec6, vec5, cospi_16_64, cospi_16_64, vec5, vec6);
+ ADD2(vec4, vec5, vec7, vec6, vec0, vec1);
+ VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_28_64, cospi_4_64, temp1, temp0);
+ VP9_FDCT32_POSTPROC_2V_POS_H(temp0, temp1);
+ ST_SH(temp0, temp + 128);
+ ST_SH(temp1, temp + 896);
+
+ SUB2(vec4, vec5, vec7, vec6, vec4, vec7);
+ VP9_DOTP_CONST_PAIR(vec7, vec4, cospi_12_64, cospi_20_64, temp1, temp0);
+ VP9_FDCT32_POSTPROC_2V_POS_H(temp0, temp1);
+ ST_SH(temp0, temp + 640);
+ ST_SH(temp1, temp + 384);
+
+ VP9_DOTP_CONST_PAIR(in13, in10, cospi_16_64, cospi_16_64, vec2, vec5);
+ VP9_DOTP_CONST_PAIR(in12, in11, cospi_16_64, cospi_16_64, vec3, vec4);
+ ADD4(in8, vec3, in9, vec2, in14, vec5, in15, vec4, in0, vec1, vec6, in2);
+ VP9_DOTP_CONST_PAIR(vec6, vec1, cospi_24_64, cospi_8_64, in1, in3);
+ ADD2(in0, in1, in2, in3, vec0, vec7);
+ VP9_DOTP_CONST_PAIR(vec7, vec0, cospi_30_64, cospi_2_64, temp1, temp0);
+ VP9_FDCT32_POSTPROC_2V_POS_H(temp0, temp1);
+ ST_SH(temp0, temp + 64);
+ ST_SH(temp1, temp + 960);
+
+ SUB2(in0, in1, in2, in3, in0, in2);
+ VP9_DOTP_CONST_PAIR(in2, in0, cospi_14_64, cospi_18_64, temp1, temp0);
+ VP9_FDCT32_POSTPROC_2V_POS_H(temp0, temp1);
+ ST_SH(temp0, temp + 576);
+ ST_SH(temp1, temp + 448);
+
+ SUB2(in9, vec2, in14, vec5, vec2, vec5);
+ VP9_DOTP_CONST_PAIR((-vec2), vec5, cospi_24_64, cospi_8_64, in2, in1);
+ SUB4(in8, vec3, in15, vec4, in3, in2, in0, in1, in3, in0, vec2, vec5);
+ VP9_DOTP_CONST_PAIR(vec5, vec2, cospi_22_64, cospi_10_64, temp1, temp0);
+ VP9_FDCT32_POSTPROC_2V_POS_H(temp0, temp1);
+ ST_SH(temp0, temp + 320);
+ ST_SH(temp1, temp + 704);
+
+ ADD2(in3, in2, in0, in1, vec3, vec4);
+ VP9_DOTP_CONST_PAIR(vec4, vec3, cospi_6_64, cospi_26_64, temp0, temp1);
+ VP9_FDCT32_POSTPROC_2V_POS_H(temp0, temp1);
+ ST_SH(temp0, temp + 192);
+ ST_SH(temp1, temp + 832);
+}
+
+static void fdct8x32_1d_column_odd_store(int16_t *input, int16_t *temp_ptr) {
+ v8i16 in16, in17, in18, in19, in20, in21, in22, in23;
+ v8i16 in24, in25, in26, in27, in28, in29, in30, in31, vec4, vec5;
+
+ in20 = LD_SH(input + 32);
+ in21 = LD_SH(input + 40);
+ in26 = LD_SH(input + 80);
+ in27 = LD_SH(input + 88);
+
+ VP9_DOTP_CONST_PAIR(in27, in20, cospi_16_64, cospi_16_64, in20, in27);
+ VP9_DOTP_CONST_PAIR(in26, in21, cospi_16_64, cospi_16_64, in21, in26);
+
+ in18 = LD_SH(input + 16);
+ in19 = LD_SH(input + 24);
+ in28 = LD_SH(input + 96);
+ in29 = LD_SH(input + 104);
+
+ vec4 = in19 - in20;
+ ST_SH(vec4, input + 32);
+ vec4 = in18 - in21;
+ ST_SH(vec4, input + 40);
+ vec4 = in29 - in26;
+ ST_SH(vec4, input + 80);
+ vec4 = in28 - in27;
+ ST_SH(vec4, input + 88);
+
+ in21 = in18 + in21;
+ in20 = in19 + in20;
+ in27 = in28 + in27;
+ in26 = in29 + in26;
+
+ LD_SH4(input + 48, 8, in22, in23, in24, in25);
+ VP9_DOTP_CONST_PAIR(in25, in22, cospi_16_64, cospi_16_64, in22, in25);
+ VP9_DOTP_CONST_PAIR(in24, in23, cospi_16_64, cospi_16_64, in23, in24);
+
+ in16 = LD_SH(input);
+ in17 = LD_SH(input + 8);
+ in30 = LD_SH(input + 112);
+ in31 = LD_SH(input + 120);
+
+ vec4 = in17 - in22;
+ ST_SH(vec4, input + 16);
+ vec4 = in16 - in23;
+ ST_SH(vec4, input + 24);
+ vec4 = in31 - in24;
+ ST_SH(vec4, input + 96);
+ vec4 = in30 - in25;
+ ST_SH(vec4, input + 104);
+
+ ADD4(in16, in23, in17, in22, in30, in25, in31, in24, in16, in17, in30, in31);
+ VP9_DOTP_CONST_PAIR(in26, in21, cospi_24_64, cospi_8_64, in18, in29);
+ VP9_DOTP_CONST_PAIR(in27, in20, cospi_24_64, cospi_8_64, in19, in28);
+ ADD4(in16, in19, in17, in18, in30, in29, in31, in28, in27, in22, in21, in25);
+ VP9_DOTP_CONST_PAIR(in21, in22, cospi_28_64, cospi_4_64, in26, in24);
+ ADD2(in27, in26, in25, in24, in23, in20);
+ VP9_DOTP_CONST_PAIR(in20, in23, cospi_31_64, cospi_1_64, vec4, vec5);
+ VP9_FDCT32_POSTPROC_2V_POS_H(vec5, vec4);
+ ST_SH(vec5, temp_ptr);
+ ST_SH(vec4, temp_ptr + 960);
+
+ SUB2(in27, in26, in25, in24, in22, in21);
+ VP9_DOTP_CONST_PAIR(in21, in22, cospi_15_64, cospi_17_64, vec5, vec4);
+ VP9_FDCT32_POSTPROC_2V_POS_H(vec5, vec4);
+ ST_SH(vec5, temp_ptr + 448);
+ ST_SH(vec4, temp_ptr + 512);
+
+ SUB4(in17, in18, in16, in19, in31, in28, in30, in29, in23, in26, in24, in20);
+ VP9_DOTP_CONST_PAIR((-in23), in20, cospi_28_64, cospi_4_64, in27, in25);
+ SUB2(in26, in27, in24, in25, in23, in20);
+ VP9_DOTP_CONST_PAIR(in20, in23, cospi_23_64, cospi_9_64, vec4, vec5);
+ VP9_FDCT32_POSTPROC_2V_POS_H(vec5, vec4);
+ ST_SH(vec4, temp_ptr + 704);
+ ST_SH(vec5, temp_ptr + 256);
+
+ ADD2(in26, in27, in24, in25, in22, in21);
+ VP9_DOTP_CONST_PAIR(in21, in22, cospi_7_64, cospi_25_64, vec4, vec5);
+ VP9_FDCT32_POSTPROC_2V_POS_H(vec5, vec4);
+ ST_SH(vec4, temp_ptr + 192);
+ ST_SH(vec5, temp_ptr + 768);
+
+ LD_SH4(input + 16, 8, in22, in23, in20, in21);
+ LD_SH4(input + 80, 8, in26, in27, in24, in25);
+ in16 = in20;
+ in17 = in21;
+ VP9_DOTP_CONST_PAIR(-in16, in27, cospi_24_64, cospi_8_64, in20, in27);
+ VP9_DOTP_CONST_PAIR(-in17, in26, cospi_24_64, cospi_8_64, in21, in26);
+ SUB4(in23, in20, in22, in21, in25, in26, in24, in27, in28, in17, in18, in31);
+ VP9_DOTP_CONST_PAIR(in18, in17, cospi_12_64, cospi_20_64, in29, in30);
+ ADD2(in28, in29, in31, in30, in16, in19);
+ VP9_DOTP_CONST_PAIR(in19, in16, cospi_27_64, cospi_5_64, vec5, vec4);
+ VP9_FDCT32_POSTPROC_2V_POS_H(vec5, vec4);
+ ST_SH(vec5, temp_ptr + 832);
+ ST_SH(vec4, temp_ptr + 128);
+
+ SUB2(in28, in29, in31, in30, in17, in18);
+ VP9_DOTP_CONST_PAIR(in18, in17, cospi_11_64, cospi_21_64, vec5, vec4);
+ VP9_FDCT32_POSTPROC_2V_POS_H(vec5, vec4);
+ ST_SH(vec5, temp_ptr + 320);
+ ST_SH(vec4, temp_ptr + 640);
+ ADD4(in22, in21, in23, in20, in24, in27, in25, in26, in16, in29, in30, in19);
+ VP9_DOTP_CONST_PAIR(-in16, in19, cospi_12_64, cospi_20_64, in28, in31);
+ SUB2(in29, in28, in30, in31, in16, in19);
+ VP9_DOTP_CONST_PAIR(in19, in16, cospi_19_64, cospi_13_64, vec5, vec4);
+ VP9_FDCT32_POSTPROC_2V_POS_H(vec5, vec4);
+ ST_SH(vec5, temp_ptr + 576);
+ ST_SH(vec4, temp_ptr + 384);
+
+ ADD2(in29, in28, in30, in31, in17, in18);
+ VP9_DOTP_CONST_PAIR(in18, in17, cospi_3_64, cospi_29_64, vec5, vec4);
+ VP9_FDCT32_POSTPROC_2V_POS_H(vec5, vec4);
+ ST_SH(vec5, temp_ptr + 64);
+ ST_SH(vec4, temp_ptr + 896);
+}
+
+static void fdct8x32_1d_column(const int16_t *input, int32_t src_stride,
+ int16_t *tmp_buf, int16_t *tmp_buf_big) {
+ fdct8x32_1d_column_load_butterfly(input, src_stride, tmp_buf);
+ fdct8x32_1d_column_even_store(tmp_buf, tmp_buf_big);
+ fdct8x32_1d_column_odd_store(tmp_buf + 128, (tmp_buf_big + 32));
+}
+
+static void fdct8x32_1d_row_load_butterfly(int16_t *temp_buff,
+ int16_t *output) {
+ v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+ v8i16 in8, in9, in10, in11, in12, in13, in14, in15;
+ v8i16 step0, step1, step2, step3, step4, step5, step6, step7;
+
+ LD_SH8(temp_buff, 32, in0, in1, in2, in3, in4, in5, in6, in7);
+ LD_SH8(temp_buff + 24, 32, in8, in9, in10, in11, in12, in13, in14, in15);
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ TRANSPOSE8x8_SH_SH(in8, in9, in10, in11, in12, in13, in14, in15,
+ in8, in9, in10, in11, in12, in13, in14, in15);
+ BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7,
+ in8, in9, in10, in11, in12, in13, in14, in15,
+ step0, step1, step2, step3, step4, step5, step6, step7,
+ in8, in9, in10, in11, in12, in13, in14, in15);
+ ST_SH8(step0, step1, step2, step3, step4, step5, step6, step7, output, 8);
+ ST_SH8(in8, in9, in10, in11, in12, in13, in14, in15, (output + 24 * 8), 8);
+
+ /* 2nd set */
+ LD_SH8(temp_buff + 8, 32, in0, in1, in2, in3, in4, in5, in6, in7);
+ LD_SH8(temp_buff + 16, 32, in8, in9, in10, in11, in12, in13, in14, in15);
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ TRANSPOSE8x8_SH_SH(in8, in9, in10, in11, in12, in13, in14, in15,
+ in8, in9, in10, in11, in12, in13, in14, in15);
+ BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7,
+ in8, in9, in10, in11, in12, in13, in14, in15,
+ step0, step1, step2, step3, step4, step5, step6, step7,
+ in8, in9, in10, in11, in12, in13, in14, in15);
+ ST_SH8(step0, step1, step2, step3, step4, step5, step6, step7,
+ (output + 8 * 8), 8);
+ ST_SH8(in8, in9, in10, in11, in12, in13, in14, in15, (output + 16 * 8), 8);
+}
+
+static void fdct8x32_1d_row_even_4x(int16_t *input, int16_t *interm_ptr,
+ int16_t *out) {
+ v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+ v8i16 in8, in9, in10, in11, in12, in13, in14, in15;
+ v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
+ v4i32 vec0_l, vec1_l, vec2_l, vec3_l, vec4_l, vec5_l, vec6_l, vec7_l;
+ v4i32 vec0_r, vec1_r, vec2_r, vec3_r, vec4_r, vec5_r, vec6_r, vec7_r;
+ v4i32 tmp0_w, tmp1_w, tmp2_w, tmp3_w;
+
+ /* fdct32 even */
+ /* stage 2 */
+ LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7);
+ LD_SH8(input + 64, 8, in8, in9, in10, in11, in12, in13, in14, in15);
+
+ BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7,
+ in8, in9, in10, in11, in12, in13, in14, in15,
+ vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7,
+ in8, in9, in10, in11, in12, in13, in14, in15);
+ ST_SH8(vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, interm_ptr, 8);
+ ST_SH8(in8, in9, in10, in11, in12, in13, in14, in15, interm_ptr + 64, 8);
+
+ /* Stage 3 */
+ UNPCK_SH_SW(vec0, vec0_l, vec0_r);
+ UNPCK_SH_SW(vec1, vec1_l, vec1_r);
+ UNPCK_SH_SW(vec2, vec2_l, vec2_r);
+ UNPCK_SH_SW(vec3, vec3_l, vec3_r);
+ UNPCK_SH_SW(vec4, vec4_l, vec4_r);
+ UNPCK_SH_SW(vec5, vec5_l, vec5_r);
+ UNPCK_SH_SW(vec6, vec6_l, vec6_r);
+ UNPCK_SH_SW(vec7, vec7_l, vec7_r);
+ ADD4(vec0_r, vec7_r, vec1_r, vec6_r, vec2_r, vec5_r, vec3_r, vec4_r,
+ tmp0_w, tmp1_w, tmp2_w, tmp3_w);
+ BUTTERFLY_4(tmp0_w, tmp1_w, tmp2_w, tmp3_w, vec4_r, vec6_r, vec7_r, vec5_r);
+ ADD4(vec0_l, vec7_l, vec1_l, vec6_l, vec2_l, vec5_l, vec3_l, vec4_l,
+ vec0_r, vec1_r, vec2_r, vec3_r);
+
+ tmp3_w = vec0_r + vec3_r;
+ vec0_r = vec0_r - vec3_r;
+ vec3_r = vec1_r + vec2_r;
+ vec1_r = vec1_r - vec2_r;
+
+ VP9_DOTP_CONST_PAIR_W(vec4_r, vec6_r, tmp3_w, vec3_r, cospi_16_64,
+ cospi_16_64, vec4_r, tmp3_w, vec6_r, vec3_r);
+ VP9_FDCT32_POSTPROC_NEG_W(vec4_r);
+ VP9_FDCT32_POSTPROC_NEG_W(tmp3_w);
+ VP9_FDCT32_POSTPROC_NEG_W(vec6_r);
+ VP9_FDCT32_POSTPROC_NEG_W(vec3_r);
+ PCKEV_H2_SH(vec4_r, tmp3_w, vec6_r, vec3_r, vec4, vec5);
+ ST_SH2(vec5, vec4, out, 8);
+
+ VP9_DOTP_CONST_PAIR_W(vec5_r, vec7_r, vec0_r, vec1_r, cospi_24_64,
+ cospi_8_64, vec4_r, tmp3_w, vec6_r, vec3_r);
+ VP9_FDCT32_POSTPROC_NEG_W(vec4_r);
+ VP9_FDCT32_POSTPROC_NEG_W(tmp3_w);
+ VP9_FDCT32_POSTPROC_NEG_W(vec6_r);
+ VP9_FDCT32_POSTPROC_NEG_W(vec3_r);
+ PCKEV_H2_SH(vec4_r, tmp3_w, vec6_r, vec3_r, vec4, vec5);
+ ST_SH2(vec5, vec4, out + 16, 8);
+
+ LD_SH8(interm_ptr, 8, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7);
+ SUB4(vec3, vec4, vec2, vec5, vec1, vec6, vec0, vec7, vec4, vec5, vec6, vec7);
+ VP9_DOTP_CONST_PAIR(vec6, vec5, cospi_16_64, cospi_16_64, vec5, vec6);
+ ADD2(vec4, vec5, vec7, vec6, vec0, vec1);
+ VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_28_64, cospi_4_64, in5, in4);
+ VP9_FDCT_POSTPROC_2V_NEG_H(in4, in5);
+ ST_SH(in4, out + 32);
+ ST_SH(in5, out + 56);
+
+ SUB2(vec4, vec5, vec7, vec6, vec4, vec7);
+ VP9_DOTP_CONST_PAIR(vec7, vec4, cospi_12_64, cospi_20_64, in5, in4);
+ VP9_FDCT_POSTPROC_2V_NEG_H(in4, in5);
+ ST_SH(in4, out + 40);
+ ST_SH(in5, out + 48);
+
+ LD_SH8(interm_ptr + 64, 8, in8, in9, in10, in11, in12, in13, in14, in15);
+ VP9_DOTP_CONST_PAIR(in13, in10, cospi_16_64, cospi_16_64, vec2, vec5);
+ VP9_DOTP_CONST_PAIR(in12, in11, cospi_16_64, cospi_16_64, vec3, vec4);
+ ADD4(in8, vec3, in9, vec2, in14, vec5, in15, vec4, in0, vec1, vec6, in2);
+ VP9_DOTP_CONST_PAIR(vec6, vec1, cospi_24_64, cospi_8_64, in1, in3);
+ ADD2(in0, in1, in2, in3, vec0, vec7);
+ VP9_DOTP_CONST_PAIR(vec7, vec0, cospi_30_64, cospi_2_64, in5, in4);
+ VP9_FDCT_POSTPROC_2V_NEG_H(in4, in5);
+ ST_SH(in4, out + 64);
+ ST_SH(in5, out + 120);
+
+ SUB2(in0, in1, in2, in3, in0, in2);
+ VP9_DOTP_CONST_PAIR(in2, in0, cospi_14_64, cospi_18_64, in5, in4);
+ VP9_FDCT_POSTPROC_2V_NEG_H(in4, in5);
+ ST_SH(in4, out + 72);
+ ST_SH(in5, out + 112);
+
+ SUB2(in9, vec2, in14, vec5, vec2, vec5);
+ VP9_DOTP_CONST_PAIR((-vec2), vec5, cospi_24_64, cospi_8_64, in2, in1);
+ SUB4(in8, vec3, in15, vec4, in3, in2, in0, in1, in3, in0, vec2, vec5);
+ VP9_DOTP_CONST_PAIR(vec5, vec2, cospi_22_64, cospi_10_64, in5, in4);
+ VP9_FDCT_POSTPROC_2V_NEG_H(in4, in5);
+ ST_SH(in4, out + 80);
+ ST_SH(in5, out + 104);
+
+ ADD2(in3, in2, in0, in1, vec3, vec4);
+ VP9_DOTP_CONST_PAIR(vec4, vec3, cospi_6_64, cospi_26_64, in4, in5);
+ VP9_FDCT_POSTPROC_2V_NEG_H(in4, in5);
+ ST_SH(in4, out + 96);
+ ST_SH(in5, out + 88);
+}
+
+static void fdct8x32_1d_row_even(int16_t *temp, int16_t *out) {
+ v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+ v8i16 in8, in9, in10, in11, in12, in13, in14, in15;
+ v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, temp0, temp1;
+
+ /* fdct32 even */
+ /* stage 2 */
+ LD_SH8(temp, 8, in0, in1, in2, in3, in4, in5, in6, in7);
+ LD_SH8(temp + 64, 8, in8, in9, in10, in11, in12, in13, in14, in15);
+
+ BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7,
+ in8, in9, in10, in11, in12, in13, in14, in15,
+ vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7,
+ in8, in9, in10, in11, in12, in13, in14, in15);
+
+ /* Stage 3 */
+ ADD4(vec0, vec7, vec1, vec6, vec2, vec5, vec3, vec4, in0, in1, in2, in3);
+ BUTTERFLY_4(in0, in1, in2, in3, temp0, in4, in1, in0);
+ VP9_DOTP_CONST_PAIR(temp0, in4, cospi_16_64, cospi_16_64, temp1, temp0);
+ VP9_FDCT_POSTPROC_2V_NEG_H(temp0, temp1);
+ ST_SH(temp0, out);
+ ST_SH(temp1, out + 8);
+
+ VP9_DOTP_CONST_PAIR(in0, in1, cospi_24_64, cospi_8_64, temp1, temp0);
+ VP9_FDCT_POSTPROC_2V_NEG_H(temp0, temp1);
+ ST_SH(temp0, out + 16);
+ ST_SH(temp1, out + 24);
+
+ SUB4(vec3, vec4, vec2, vec5, vec1, vec6, vec0, vec7, vec4, vec5, vec6, vec7);
+ VP9_DOTP_CONST_PAIR(vec6, vec5, cospi_16_64, cospi_16_64, vec5, vec6);
+ ADD2(vec4, vec5, vec7, vec6, vec0, vec1);
+ VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_28_64, cospi_4_64, temp1, temp0);
+ VP9_FDCT_POSTPROC_2V_NEG_H(temp0, temp1);
+ ST_SH(temp0, out + 32);
+ ST_SH(temp1, out + 56);
+
+ SUB2(vec4, vec5, vec7, vec6, vec4, vec7);
+ VP9_DOTP_CONST_PAIR(vec7, vec4, cospi_12_64, cospi_20_64, temp1, temp0);
+ VP9_FDCT_POSTPROC_2V_NEG_H(temp0, temp1);
+ ST_SH(temp0, out + 40);
+ ST_SH(temp1, out + 48);
+
+ VP9_DOTP_CONST_PAIR(in13, in10, cospi_16_64, cospi_16_64, vec2, vec5);
+ VP9_DOTP_CONST_PAIR(in12, in11, cospi_16_64, cospi_16_64, vec3, vec4);
+ ADD4(in8, vec3, in9, vec2, in14, vec5, in15, vec4, in0, vec1, vec6, in2);
+ VP9_DOTP_CONST_PAIR(vec6, vec1, cospi_24_64, cospi_8_64, in1, in3);
+ ADD2(in0, in1, in2, in3, vec0, vec7);
+ VP9_DOTP_CONST_PAIR(vec7, vec0, cospi_30_64, cospi_2_64, temp1, temp0);
+ VP9_FDCT_POSTPROC_2V_NEG_H(temp0, temp1);
+ ST_SH(temp0, out + 64);
+ ST_SH(temp1, out + 120);
+
+ SUB2(in0, in1, in2, in3, in0, in2);
+ VP9_DOTP_CONST_PAIR(in2, in0, cospi_14_64, cospi_18_64, temp1, temp0);
+ VP9_FDCT_POSTPROC_2V_NEG_H(temp0, temp1);
+ ST_SH(temp0, out + 72);
+ ST_SH(temp1, out + 112);
+
+ SUB2(in9, vec2, in14, vec5, vec2, vec5);
+ VP9_DOTP_CONST_PAIR((-vec2), vec5, cospi_24_64, cospi_8_64, in2, in1);
+ SUB4(in8, vec3, in15, vec4, in3, in2, in0, in1, in3, in0, vec2, vec5)
+ VP9_DOTP_CONST_PAIR(vec5, vec2, cospi_22_64, cospi_10_64, temp1, temp0);
+ VP9_FDCT_POSTPROC_2V_NEG_H(temp0, temp1);
+ ST_SH(temp0, out + 80);
+ ST_SH(temp1, out + 104);
+
+ ADD2(in3, in2, in0, in1, vec3, vec4);
+ VP9_DOTP_CONST_PAIR(vec4, vec3, cospi_6_64, cospi_26_64, temp0, temp1);
+ VP9_FDCT_POSTPROC_2V_NEG_H(temp0, temp1);
+ ST_SH(temp0, out + 96);
+ ST_SH(temp1, out + 88);
+}
+
+static void fdct8x32_1d_row_odd(int16_t *temp, int16_t *interm_ptr,
+ int16_t *out) {
+ v8i16 in16, in17, in18, in19, in20, in21, in22, in23;
+ v8i16 in24, in25, in26, in27, in28, in29, in30, in31, vec4, vec5;
+
+ in20 = LD_SH(temp + 32);
+ in21 = LD_SH(temp + 40);
+ in26 = LD_SH(temp + 80);
+ in27 = LD_SH(temp + 88);
+
+ VP9_DOTP_CONST_PAIR(in27, in20, cospi_16_64, cospi_16_64, in20, in27);
+ VP9_DOTP_CONST_PAIR(in26, in21, cospi_16_64, cospi_16_64, in21, in26);
+
+ in18 = LD_SH(temp + 16);
+ in19 = LD_SH(temp + 24);
+ in28 = LD_SH(temp + 96);
+ in29 = LD_SH(temp + 104);
+
+ vec4 = in19 - in20;
+ ST_SH(vec4, interm_ptr + 32);
+ vec4 = in18 - in21;
+ ST_SH(vec4, interm_ptr + 88);
+ vec4 = in28 - in27;
+ ST_SH(vec4, interm_ptr + 56);
+ vec4 = in29 - in26;
+ ST_SH(vec4, interm_ptr + 64);
+
+ ADD4(in18, in21, in19, in20, in28, in27, in29, in26, in21, in20, in27, in26);
+
+ in22 = LD_SH(temp + 48);
+ in23 = LD_SH(temp + 56);
+ in24 = LD_SH(temp + 64);
+ in25 = LD_SH(temp + 72);
+
+ VP9_DOTP_CONST_PAIR(in25, in22, cospi_16_64, cospi_16_64, in22, in25);
+ VP9_DOTP_CONST_PAIR(in24, in23, cospi_16_64, cospi_16_64, in23, in24);
+
+ in16 = LD_SH(temp);
+ in17 = LD_SH(temp + 8);
+ in30 = LD_SH(temp + 112);
+ in31 = LD_SH(temp + 120);
+
+ vec4 = in17 - in22;
+ ST_SH(vec4, interm_ptr + 40);
+ vec4 = in30 - in25;
+ ST_SH(vec4, interm_ptr + 48);
+ vec4 = in31 - in24;
+ ST_SH(vec4, interm_ptr + 72);
+ vec4 = in16 - in23;
+ ST_SH(vec4, interm_ptr + 80);
+
+ ADD4(in16, in23, in17, in22, in30, in25, in31, in24, in16, in17, in30, in31);
+ VP9_DOTP_CONST_PAIR(in26, in21, cospi_24_64, cospi_8_64, in18, in29);
+ VP9_DOTP_CONST_PAIR(in27, in20, cospi_24_64, cospi_8_64, in19, in28);
+
+ ADD4(in16, in19, in17, in18, in30, in29, in31, in28, in27, in22, in21, in25);
+ VP9_DOTP_CONST_PAIR(in21, in22, cospi_28_64, cospi_4_64, in26, in24);
+ ADD2(in27, in26, in25, in24, in23, in20);
+
+ VP9_DOTP_CONST_PAIR(in20, in23, cospi_31_64, cospi_1_64, vec4, vec5);
+ VP9_FDCT_POSTPROC_2V_NEG_H(vec5, vec4);
+ ST_SH(vec5, out);
+ ST_SH(vec4, out + 120);
+
+ SUB2(in27, in26, in25, in24, in22, in21);
+
+ VP9_DOTP_CONST_PAIR(in21, in22, cospi_15_64, cospi_17_64, vec5, vec4);
+ VP9_FDCT_POSTPROC_2V_NEG_H(vec5, vec4);
+ ST_SH(vec5, out + 112);
+ ST_SH(vec4, out + 8);
+
+ SUB4(in17, in18, in16, in19, in31, in28, in30, in29, in23, in26, in24, in20);
+ VP9_DOTP_CONST_PAIR((-in23), in20, cospi_28_64, cospi_4_64, in27, in25);
+ SUB2(in26, in27, in24, in25, in23, in20);
+
+ VP9_DOTP_CONST_PAIR(in20, in23, cospi_23_64, cospi_9_64, vec4, vec5);
+ VP9_FDCT_POSTPROC_2V_NEG_H(vec5, vec4);
+ ST_SH(vec4, out + 16);
+ ST_SH(vec5, out + 104);
+
+ ADD2(in26, in27, in24, in25, in22, in21);
+ VP9_DOTP_CONST_PAIR(in21, in22, cospi_7_64, cospi_25_64, vec4, vec5);
+ VP9_FDCT_POSTPROC_2V_NEG_H(vec5, vec4);
+ ST_SH(vec4, out + 24);
+ ST_SH(vec5, out + 96);
+
+ in20 = LD_SH(interm_ptr + 32);
+ in21 = LD_SH(interm_ptr + 88);
+ in27 = LD_SH(interm_ptr + 56);
+ in26 = LD_SH(interm_ptr + 64);
+
+ in16 = in20;
+ in17 = in21;
+ VP9_DOTP_CONST_PAIR(-in16, in27, cospi_24_64, cospi_8_64, in20, in27);
+ VP9_DOTP_CONST_PAIR(-in17, in26, cospi_24_64, cospi_8_64, in21, in26);
+
+ in22 = LD_SH(interm_ptr + 40);
+ in25 = LD_SH(interm_ptr + 48);
+ in24 = LD_SH(interm_ptr + 72);
+ in23 = LD_SH(interm_ptr + 80);
+
+ SUB4(in23, in20, in22, in21, in25, in26, in24, in27, in28, in17, in18, in31);
+ VP9_DOTP_CONST_PAIR(in18, in17, cospi_12_64, cospi_20_64, in29, in30);
+ ADD2(in28, in29, in31, in30, in16, in19);
+ VP9_DOTP_CONST_PAIR(in19, in16, cospi_27_64, cospi_5_64, vec5, vec4);
+ VP9_FDCT_POSTPROC_2V_NEG_H(vec5, vec4);
+ ST_SH(vec5, out + 32);
+ ST_SH(vec4, out + 88);
+
+ SUB2(in28, in29, in31, in30, in17, in18);
+ VP9_DOTP_CONST_PAIR(in18, in17, cospi_11_64, cospi_21_64, vec5, vec4);
+ VP9_FDCT_POSTPROC_2V_NEG_H(vec5, vec4);
+ ST_SH(vec5, out + 40);
+ ST_SH(vec4, out + 80);
+
+ ADD4(in22, in21, in23, in20, in24, in27, in25, in26, in16, in29, in30, in19);
+ VP9_DOTP_CONST_PAIR(-in16, in19, cospi_12_64, cospi_20_64, in28, in31);
+ SUB2(in29, in28, in30, in31, in16, in19);
+
+ VP9_DOTP_CONST_PAIR(in19, in16, cospi_19_64, cospi_13_64, vec5, vec4);
+ VP9_FDCT_POSTPROC_2V_NEG_H(vec5, vec4);
+ ST_SH(vec5, out + 72);
+ ST_SH(vec4, out + 48);
+
+ ADD2(in29, in28, in30, in31, in17, in18);
+
+ VP9_DOTP_CONST_PAIR(in18, in17, cospi_3_64, cospi_29_64, vec5, vec4);
+ VP9_FDCT_POSTPROC_2V_NEG_H(vec5, vec4);
+ ST_SH(vec4, out + 56);
+ ST_SH(vec5, out + 64);
+}
+
+static void fdct8x32_1d_row_transpose_store(int16_t *temp, int16_t *output) {
+ v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+ v8i16 in0_1, in1_1, in2_1, in3_1, in4_1, in5_1, in6_1, in7_1;
+
+ /* 1st set */
+ in0 = LD_SH(temp);
+ in4 = LD_SH(temp + 32);
+ in2 = LD_SH(temp + 64);
+ in6 = LD_SH(temp + 96);
+ in1 = LD_SH(temp + 128);
+ in7 = LD_SH(temp + 152);
+ in3 = LD_SH(temp + 192);
+ in5 = LD_SH(temp + 216);
+
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+
+ /* 2nd set */
+ in0_1 = LD_SH(temp + 16);
+ in1_1 = LD_SH(temp + 232);
+ in2_1 = LD_SH(temp + 80);
+ in3_1 = LD_SH(temp + 168);
+ in4_1 = LD_SH(temp + 48);
+ in5_1 = LD_SH(temp + 176);
+ in6_1 = LD_SH(temp + 112);
+ in7_1 = LD_SH(temp + 240);
+
+ ST_SH8(in0, in1, in2, in3, in4, in5, in6, in7, output, 32);
+ TRANSPOSE8x8_SH_SH(in0_1, in1_1, in2_1, in3_1, in4_1, in5_1, in6_1, in7_1,
+ in0_1, in1_1, in2_1, in3_1, in4_1, in5_1, in6_1, in7_1);
+
+ /* 3rd set */
+ in0 = LD_SH(temp + 8);
+ in1 = LD_SH(temp + 136);
+ in2 = LD_SH(temp + 72);
+ in3 = LD_SH(temp + 200);
+ in4 = LD_SH(temp + 40);
+ in5 = LD_SH(temp + 208);
+ in6 = LD_SH(temp + 104);
+ in7 = LD_SH(temp + 144);
+
+ ST_SH8(in0_1, in1_1, in2_1, in3_1, in4_1, in5_1, in6_1, in7_1,
+ output + 8, 32);
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ ST_SH8(in0, in1, in2, in3, in4, in5, in6, in7, output + 16, 32);
+
+ /* 4th set */
+ in0_1 = LD_SH(temp + 24);
+ in1_1 = LD_SH(temp + 224);
+ in2_1 = LD_SH(temp + 88);
+ in3_1 = LD_SH(temp + 160);
+ in4_1 = LD_SH(temp + 56);
+ in5_1 = LD_SH(temp + 184);
+ in6_1 = LD_SH(temp + 120);
+ in7_1 = LD_SH(temp + 248);
+
+ TRANSPOSE8x8_SH_SH(in0_1, in1_1, in2_1, in3_1, in4_1, in5_1, in6_1, in7_1,
+ in0_1, in1_1, in2_1, in3_1, in4_1, in5_1, in6_1, in7_1);
+ ST_SH8(in0_1, in1_1, in2_1, in3_1, in4_1, in5_1, in6_1, in7_1,
+ output + 24, 32);
+}
+
+static void fdct32x8_1d_row(int16_t *temp, int16_t *temp_buf,
+ int16_t *output) {
+ fdct8x32_1d_row_load_butterfly(temp, temp_buf);
+ fdct8x32_1d_row_even(temp_buf, temp_buf);
+ fdct8x32_1d_row_odd(temp_buf + 128, temp, temp_buf + 128);
+ fdct8x32_1d_row_transpose_store(temp_buf, output);
+}
+
+static void fdct32x8_1d_row_4x(int16_t *tmp_buf_big, int16_t *tmp_buf,
+ int16_t *output) {
+ fdct8x32_1d_row_load_butterfly(tmp_buf_big, tmp_buf);
+ fdct8x32_1d_row_even_4x(tmp_buf, tmp_buf_big, tmp_buf);
+ fdct8x32_1d_row_odd(tmp_buf + 128, tmp_buf_big, tmp_buf + 128);
+ fdct8x32_1d_row_transpose_store(tmp_buf, output);
+}
+
+void vp9_fdct32x32_msa(const int16_t *input, int16_t *output,
+ int32_t src_stride) {
+ int32_t i;
+ DECLARE_ALIGNED(32, int16_t, tmp_buf_big[1024]);
+ DECLARE_ALIGNED(32, int16_t, tmp_buf[256]);
+
+ /* column transform */
+ for (i = 0; i < 4; ++i) {
+ fdct8x32_1d_column(input + (8 * i), src_stride, tmp_buf,
+ tmp_buf_big + (8 * i));
+ }
+
+ /* row transform */
+ fdct32x8_1d_row_4x(tmp_buf_big, tmp_buf, output);
+
+ /* row transform */
+ for (i = 1; i < 4; ++i) {
+ fdct32x8_1d_row(tmp_buf_big + (i * 256), tmp_buf, output + (i * 256));
+ }
+}
+
+void vp9_fdct32x32_1_msa(const int16_t *input, int16_t *out, int32_t stride) {
+ out[1] = 0;
+
+ out[0] = VP9_LD_HADD(input, stride);
+ out[0] += VP9_LD_HADD(input + 8, stride);
+ out[0] += VP9_LD_HADD(input + 16, stride);
+ out[0] += VP9_LD_HADD(input + 24, stride);
+ out[0] += VP9_LD_HADD(input + 32 * 8, stride);
+ out[0] += VP9_LD_HADD(input + 32 * 8 + 8, stride);
+ out[0] += VP9_LD_HADD(input + 32 * 8 + 16, stride);
+ out[0] += VP9_LD_HADD(input + 32 * 8 + 24, stride);
+ out[0] += VP9_LD_HADD(input + 32 * 16, stride);
+ out[0] += VP9_LD_HADD(input + 32 * 16 + 8, stride);
+ out[0] += VP9_LD_HADD(input + 32 * 16 + 16, stride);
+ out[0] += VP9_LD_HADD(input + 32 * 16 + 24, stride);
+ out[0] += VP9_LD_HADD(input + 32 * 24, stride);
+ out[0] += VP9_LD_HADD(input + 32 * 24 + 8, stride);
+ out[0] += VP9_LD_HADD(input + 32 * 24 + 16, stride);
+ out[0] += VP9_LD_HADD(input + 32 * 24 + 24, stride);
+ out[0] >>= 3;
+}
+
+static void fdct8x32_1d_row_even_rd(int16_t *temp, int16_t *out) {
+ v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+ v8i16 in8, in9, in10, in11, in12, in13, in14, in15;
+ v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, temp0, temp1;
+
+ /* fdct32 even */
+ /* stage 2 */
+ LD_SH8(temp, 8, in0, in1, in2, in3, in4, in5, in6, in7);
+ LD_SH8(temp + 64, 8, in8, in9, in10, in11, in12, in13, in14, in15);
+
+ BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7,
+ in8, in9, in10, in11, in12, in13, in14, in15,
+ vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7,
+ in8, in9, in10, in11, in12, in13, in14, in15);
+ VP9_FDCT_POSTPROC_2V_NEG_H(vec0, vec1);
+ VP9_FDCT_POSTPROC_2V_NEG_H(vec2, vec3);
+ VP9_FDCT_POSTPROC_2V_NEG_H(vec4, vec5);
+ VP9_FDCT_POSTPROC_2V_NEG_H(vec6, vec7);
+ VP9_FDCT_POSTPROC_2V_NEG_H(in8, in9);
+ VP9_FDCT_POSTPROC_2V_NEG_H(in10, in11);
+ VP9_FDCT_POSTPROC_2V_NEG_H(in12, in13);
+ VP9_FDCT_POSTPROC_2V_NEG_H(in14, in15);
+
+ /* Stage 3 */
+ ADD4(vec0, vec7, vec1, vec6, vec2, vec5, vec3, vec4, in0, in1, in2, in3);
+
+ temp0 = in0 + in3;
+ in0 = in0 - in3;
+ in3 = in1 + in2;
+ in1 = in1 - in2;
+
+ VP9_DOTP_CONST_PAIR(temp0, in3, cospi_16_64, cospi_16_64, temp1, temp0);
+ ST_SH(temp0, out);
+ ST_SH(temp1, out + 8);
+
+ VP9_DOTP_CONST_PAIR(in0, in1, cospi_24_64, cospi_8_64, temp1, temp0);
+ ST_SH(temp0, out + 16);
+ ST_SH(temp1, out + 24);
+
+ SUB4(vec3, vec4, vec2, vec5, vec1, vec6, vec0, vec7, vec4, vec5, vec6, vec7);
+ VP9_DOTP_CONST_PAIR(vec6, vec5, cospi_16_64, cospi_16_64, vec5, vec6);
+ ADD2(vec4, vec5, vec7, vec6, vec0, vec1);
+ VP9_DOTP_CONST_PAIR(vec1, vec0, cospi_28_64, cospi_4_64, temp1, temp0);
+ ST_SH(temp0, out + 32);
+ ST_SH(temp1, out + 56);
+
+ SUB2(vec4, vec5, vec7, vec6, vec4, vec7);
+ VP9_DOTP_CONST_PAIR(vec7, vec4, cospi_12_64, cospi_20_64, temp1, temp0);
+ ST_SH(temp0, out + 40);
+ ST_SH(temp1, out + 48);
+
+ VP9_DOTP_CONST_PAIR(in13, in10, cospi_16_64, cospi_16_64, vec2, vec5);
+ VP9_DOTP_CONST_PAIR(in12, in11, cospi_16_64, cospi_16_64, vec3, vec4);
+ ADD4(in8, vec3, in9, vec2, in14, vec5, in15, vec4, in0, vec1, vec6, in2);
+ VP9_DOTP_CONST_PAIR(vec6, vec1, cospi_24_64, cospi_8_64, in1, in3);
+ ADD2(in0, in1, in2, in3, vec0, vec7);
+ VP9_DOTP_CONST_PAIR(vec7, vec0, cospi_30_64, cospi_2_64, temp1, temp0);
+ ST_SH(temp0, out + 64);
+ ST_SH(temp1, out + 120);
+
+ SUB2(in0, in1, in2, in3, in0, in2);
+ VP9_DOTP_CONST_PAIR(in2, in0, cospi_14_64, cospi_18_64, temp1, temp0);
+ ST_SH(temp0, out + 72);
+ ST_SH(temp1, out + 112);
+
+ SUB2(in9, vec2, in14, vec5, vec2, vec5);
+ VP9_DOTP_CONST_PAIR((-vec2), vec5, cospi_24_64, cospi_8_64, in2, in1);
+ SUB4(in8, vec3, in15, vec4, in3, in2, in0, in1, in3, in0, vec2, vec5);
+ VP9_DOTP_CONST_PAIR(vec5, vec2, cospi_22_64, cospi_10_64, temp1, temp0);
+ ST_SH(temp0, out + 80);
+ ST_SH(temp1, out + 104);
+
+ ADD2(in3, in2, in0, in1, vec3, vec4);
+ VP9_DOTP_CONST_PAIR(vec4, vec3, cospi_6_64, cospi_26_64, temp0, temp1);
+ ST_SH(temp0, out + 96);
+ ST_SH(temp1, out + 88);
+}
+
+static void fdct8x32_1d_row_odd_rd(int16_t *temp, int16_t *interm_ptr,
+ int16_t *out) {
+ v8i16 in16, in17, in18, in19, in20, in21, in22, in23;
+ v8i16 in24, in25, in26, in27, in28, in29, in30, in31;
+ v8i16 vec4, vec5;
+
+ in20 = LD_SH(temp + 32);
+ in21 = LD_SH(temp + 40);
+ in26 = LD_SH(temp + 80);
+ in27 = LD_SH(temp + 88);
+
+ VP9_DOTP_CONST_PAIR(in27, in20, cospi_16_64, cospi_16_64, in20, in27);
+ VP9_DOTP_CONST_PAIR(in26, in21, cospi_16_64, cospi_16_64, in21, in26);
+
+ VP9_FDCT_POSTPROC_2V_NEG_H(in20, in21);
+ VP9_FDCT_POSTPROC_2V_NEG_H(in26, in27);
+
+ in18 = LD_SH(temp + 16);
+ in19 = LD_SH(temp + 24);
+ in28 = LD_SH(temp + 96);
+ in29 = LD_SH(temp + 104);
+
+ VP9_FDCT_POSTPROC_2V_NEG_H(in18, in19);
+ VP9_FDCT_POSTPROC_2V_NEG_H(in28, in29);
+
+ vec4 = in19 - in20;
+ ST_SH(vec4, interm_ptr + 32);
+ vec4 = in18 - in21;
+ ST_SH(vec4, interm_ptr + 88);
+ vec4 = in29 - in26;
+ ST_SH(vec4, interm_ptr + 64);
+ vec4 = in28 - in27;
+ ST_SH(vec4, interm_ptr + 56);
+
+ ADD4(in18, in21, in19, in20, in28, in27, in29, in26, in21, in20, in27, in26);
+
+ in22 = LD_SH(temp + 48);
+ in23 = LD_SH(temp + 56);
+ in24 = LD_SH(temp + 64);
+ in25 = LD_SH(temp + 72);
+
+ VP9_DOTP_CONST_PAIR(in25, in22, cospi_16_64, cospi_16_64, in22, in25);
+ VP9_DOTP_CONST_PAIR(in24, in23, cospi_16_64, cospi_16_64, in23, in24);
+ VP9_FDCT_POSTPROC_2V_NEG_H(in22, in23);
+ VP9_FDCT_POSTPROC_2V_NEG_H(in24, in25);
+
+ in16 = LD_SH(temp);
+ in17 = LD_SH(temp + 8);
+ in30 = LD_SH(temp + 112);
+ in31 = LD_SH(temp + 120);
+
+ VP9_FDCT_POSTPROC_2V_NEG_H(in16, in17);
+ VP9_FDCT_POSTPROC_2V_NEG_H(in30, in31);
+
+ vec4 = in17 - in22;
+ ST_SH(vec4, interm_ptr + 40);
+ vec4 = in30 - in25;
+ ST_SH(vec4, interm_ptr + 48);
+ vec4 = in31 - in24;
+ ST_SH(vec4, interm_ptr + 72);
+ vec4 = in16 - in23;
+ ST_SH(vec4, interm_ptr + 80);
+
+ ADD4(in16, in23, in17, in22, in30, in25, in31, in24, in16, in17, in30, in31);
+ VP9_DOTP_CONST_PAIR(in26, in21, cospi_24_64, cospi_8_64, in18, in29);
+ VP9_DOTP_CONST_PAIR(in27, in20, cospi_24_64, cospi_8_64, in19, in28);
+ ADD4(in16, in19, in17, in18, in30, in29, in31, in28, in27, in22, in21, in25);
+ VP9_DOTP_CONST_PAIR(in21, in22, cospi_28_64, cospi_4_64, in26, in24);
+ ADD2(in27, in26, in25, in24, in23, in20);
+ VP9_DOTP_CONST_PAIR(in20, in23, cospi_31_64, cospi_1_64, vec4, vec5);
+ ST_SH(vec5, out);
+ ST_SH(vec4, out + 120);
+
+ SUB2(in27, in26, in25, in24, in22, in21);
+ VP9_DOTP_CONST_PAIR(in21, in22, cospi_15_64, cospi_17_64, vec5, vec4);
+ ST_SH(vec5, out + 112);
+ ST_SH(vec4, out + 8);
+
+ SUB4(in17, in18, in16, in19, in31, in28, in30, in29, in23, in26, in24, in20);
+ VP9_DOTP_CONST_PAIR((-in23), in20, cospi_28_64, cospi_4_64, in27, in25);
+ SUB2(in26, in27, in24, in25, in23, in20);
+ VP9_DOTP_CONST_PAIR(in20, in23, cospi_23_64, cospi_9_64, vec4, vec5);
+ ST_SH(vec4, out + 16);
+ ST_SH(vec5, out + 104);
+
+ ADD2(in26, in27, in24, in25, in22, in21);
+ VP9_DOTP_CONST_PAIR(in21, in22, cospi_7_64, cospi_25_64, vec4, vec5);
+ ST_SH(vec4, out + 24);
+ ST_SH(vec5, out + 96);
+
+ in20 = LD_SH(interm_ptr + 32);
+ in21 = LD_SH(interm_ptr + 88);
+ in27 = LD_SH(interm_ptr + 56);
+ in26 = LD_SH(interm_ptr + 64);
+
+ in16 = in20;
+ in17 = in21;
+ VP9_DOTP_CONST_PAIR(-in16, in27, cospi_24_64, cospi_8_64, in20, in27);
+ VP9_DOTP_CONST_PAIR(-in17, in26, cospi_24_64, cospi_8_64, in21, in26);
+
+ in22 = LD_SH(interm_ptr + 40);
+ in25 = LD_SH(interm_ptr + 48);
+ in24 = LD_SH(interm_ptr + 72);
+ in23 = LD_SH(interm_ptr + 80);
+
+ SUB4(in23, in20, in22, in21, in25, in26, in24, in27, in28, in17, in18, in31);
+ VP9_DOTP_CONST_PAIR(in18, in17, cospi_12_64, cospi_20_64, in29, in30);
+ in16 = in28 + in29;
+ in19 = in31 + in30;
+ VP9_DOTP_CONST_PAIR(in19, in16, cospi_27_64, cospi_5_64, vec5, vec4);
+ ST_SH(vec5, out + 32);
+ ST_SH(vec4, out + 88);
+
+ SUB2(in28, in29, in31, in30, in17, in18);
+ VP9_DOTP_CONST_PAIR(in18, in17, cospi_11_64, cospi_21_64, vec5, vec4);
+ ST_SH(vec5, out + 40);
+ ST_SH(vec4, out + 80);
+
+ ADD4(in22, in21, in23, in20, in24, in27, in25, in26, in16, in29, in30, in19);
+ VP9_DOTP_CONST_PAIR(-in16, in19, cospi_12_64, cospi_20_64, in28, in31);
+ SUB2(in29, in28, in30, in31, in16, in19);
+ VP9_DOTP_CONST_PAIR(in19, in16, cospi_19_64, cospi_13_64, vec5, vec4);
+ ST_SH(vec5, out + 72);
+ ST_SH(vec4, out + 48);
+
+ ADD2(in29, in28, in30, in31, in17, in18);
+ VP9_DOTP_CONST_PAIR(in18, in17, cospi_3_64, cospi_29_64, vec5, vec4);
+ ST_SH(vec4, out + 56);
+ ST_SH(vec5, out + 64);
+}
+
+static void fdct32x8_1d_row_rd(int16_t *tmp_buf_big, int16_t *tmp_buf,
+ int16_t *output) {
+ fdct8x32_1d_row_load_butterfly(tmp_buf_big, tmp_buf);
+ fdct8x32_1d_row_even_rd(tmp_buf, tmp_buf);
+ fdct8x32_1d_row_odd_rd((tmp_buf + 128), tmp_buf_big, (tmp_buf + 128));
+ fdct8x32_1d_row_transpose_store(tmp_buf, output);
+}
+
+void vp9_fdct32x32_rd_msa(const int16_t *input, int16_t *out,
+ int32_t src_stride) {
+ int32_t i;
+ DECLARE_ALIGNED(32, int16_t, tmp_buf_big[1024]);
+ DECLARE_ALIGNED(32, int16_t, tmp_buf[256]);
+
+ /* column transform */
+ for (i = 0; i < 4; ++i) {
+ fdct8x32_1d_column(input + (8 * i), src_stride, &tmp_buf[0],
+ &tmp_buf_big[0] + (8 * i));
+ }
+
+ /* row transform */
+ for (i = 0; i < 4; ++i) {
+ fdct32x8_1d_row_rd(&tmp_buf_big[0] + (8 * i * 32), &tmp_buf[0],
+ out + (8 * i * 32));
+ }
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct4x4_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct4x4_msa.c
new file mode 100644
index 00000000000..790b4fb8d79
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct4x4_msa.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "./vp9_rtcd.h"
+#include "vp9/encoder/mips/msa/vp9_fdct_msa.h"
+
+void vp9_fwht4x4_msa(const int16_t *input, int16_t *output,
+ int32_t src_stride) {
+ v8i16 in0, in1, in2, in3, in4;
+
+ LD_SH4(input, src_stride, in0, in1, in2, in3);
+
+ in0 += in1;
+ in3 -= in2;
+ in4 = (in0 - in3) >> 1;
+ SUB2(in4, in1, in4, in2, in1, in2);
+ in0 -= in2;
+ in3 += in1;
+
+ TRANSPOSE4x4_SH_SH(in0, in2, in3, in1, in0, in2, in3, in1);
+
+ in0 += in2;
+ in1 -= in3;
+ in4 = (in0 - in1) >> 1;
+ SUB2(in4, in2, in4, in3, in2, in3);
+ in0 -= in3;
+ in1 += in2;
+
+ SLLI_4V(in0, in1, in2, in3, 2);
+
+ TRANSPOSE4x4_SH_SH(in0, in3, in1, in2, in0, in3, in1, in2);
+
+ ST4x2_UB(in0, output, 4);
+ ST4x2_UB(in3, output + 4, 4);
+ ST4x2_UB(in1, output + 8, 4);
+ ST4x2_UB(in2, output + 12, 4);
+}
+
+void vp9_fdct4x4_msa(const int16_t *input, int16_t *output,
+ int32_t src_stride) {
+ v8i16 in0, in1, in2, in3;
+
+ LD_SH4(input, src_stride, in0, in1, in2, in3);
+
+ /* fdct4 pre-process */
+ {
+ v8i16 vec, mask;
+ v16i8 zero = { 0 };
+ v16i8 one = __msa_ldi_b(1);
+
+ mask = (v8i16)__msa_sldi_b(zero, one, 15);
+ SLLI_4V(in0, in1, in2, in3, 4);
+ vec = __msa_ceqi_h(in0, 0);
+ vec = vec ^ 255;
+ vec = mask & vec;
+ in0 += vec;
+ }
+
+ VP9_FDCT4(in0, in1, in2, in3, in0, in1, in2, in3);
+ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+ VP9_FDCT4(in0, in1, in2, in3, in0, in1, in2, in3);
+ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+ ADD4(in0, 1, in1, 1, in2, 1, in3, 1, in0, in1, in2, in3);
+ SRA_4V(in0, in1, in2, in3, 2);
+ PCKEV_D2_SH(in1, in0, in3, in2, in0, in2);
+ ST_SH2(in0, in2, output, 8);
+}
+
+void vp9_fht4x4_msa(const int16_t *input, int16_t *output, int32_t stride,
+ int32_t tx_type) {
+ v8i16 in0, in1, in2, in3;
+
+ LD_SH4(input, stride, in0, in1, in2, in3);
+
+ /* fdct4 pre-process */
+ {
+ v8i16 temp, mask;
+ v16i8 zero = { 0 };
+ v16i8 one = __msa_ldi_b(1);
+
+ mask = (v8i16)__msa_sldi_b(zero, one, 15);
+ SLLI_4V(in0, in1, in2, in3, 4);
+ temp = __msa_ceqi_h(in0, 0);
+ temp = (v8i16)__msa_xori_b((v16u8)temp, 255);
+ temp = mask & temp;
+ in0 += temp;
+ }
+
+ switch (tx_type) {
+ case DCT_DCT:
+ VP9_FDCT4(in0, in1, in2, in3, in0, in1, in2, in3);
+ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+ VP9_FDCT4(in0, in1, in2, in3, in0, in1, in2, in3);
+ break;
+ case ADST_DCT:
+ VP9_FADST4(in0, in1, in2, in3, in0, in1, in2, in3);
+ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+ VP9_FDCT4(in0, in1, in2, in3, in0, in1, in2, in3);
+ break;
+ case DCT_ADST:
+ VP9_FDCT4(in0, in1, in2, in3, in0, in1, in2, in3);
+ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+ VP9_FADST4(in0, in1, in2, in3, in0, in1, in2, in3);
+ break;
+ case ADST_ADST:
+ VP9_FADST4(in0, in1, in2, in3, in0, in1, in2, in3);
+ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+ VP9_FADST4(in0, in1, in2, in3, in0, in1, in2, in3);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3);
+ ADD4(in0, 1, in1, 1, in2, 1, in3, 1, in0, in1, in2, in3);
+ SRA_4V(in0, in1, in2, in3, 2);
+ PCKEV_D2_SH(in1, in0, in3, in2, in0, in2);
+ ST_SH2(in0, in2, output, 8);
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct8x8_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct8x8_msa.c
new file mode 100644
index 00000000000..68e65723abb
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct8x8_msa.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "./vp9_rtcd.h"
+#include "vp9/encoder/mips/msa/vp9_fdct_msa.h"
+
+void vp9_fdct8x8_msa(const int16_t *input, int16_t *output,
+ int32_t src_stride) {
+ v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+
+ LD_SH8(input, src_stride, in0, in1, in2, in3, in4, in5, in6, in7);
+ SLLI_4V(in0, in1, in2, in3, 2);
+ SLLI_4V(in4, in5, in6, in7, 2);
+ VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ VP9_SRLI_AVE_S_4V_H(in0, in1, in2, in3, in4, in5, in6, in7);
+ ST_SH8(in0, in1, in2, in3, in4, in5, in6, in7, output, 8);
+}
+
+void vp9_fdct8x8_1_msa(const int16_t *input, int16_t *out, int32_t stride) {
+ out[0] = VP9_LD_HADD(input, stride);
+ out[1] = 0;
+}
+
+void vp9_fht8x8_msa(const int16_t *input, int16_t *output, int32_t stride,
+ int32_t tx_type) {
+ v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
+
+ LD_SH8(input, stride, in0, in1, in2, in3, in4, in5, in6, in7);
+ SLLI_4V(in0, in1, in2, in3, 2);
+ SLLI_4V(in4, in5, in6, in7, 2);
+
+ switch (tx_type) {
+ case DCT_DCT:
+ VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ break;
+ case ADST_DCT:
+ VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ break;
+ case DCT_ADST:
+ VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ break;
+ case ADST_ADST:
+ VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7,
+ in0, in1, in2, in3, in4, in5, in6, in7);
+ VP9_SRLI_AVE_S_4V_H(in0, in1, in2, in3, in4, in5, in6, in7);
+ ST_SH8(in0, in1, in2, in3, in4, in5, in6, in7, output, 8);
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct_msa.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct_msa.h
new file mode 100644
index 00000000000..ad66576b6e5
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_fdct_msa.h
@@ -0,0 +1,548 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_ENCODER_MIPS_MSA_VP9_FDCT_MSA_H_
+#define VP9_ENCODER_MIPS_MSA_VP9_FDCT_MSA_H_
+
+#include "vpx_ports/mem.h"
+#include "vp9/common/vp9_idct.h"
+#include "vp9/common/mips/msa/vp9_macros_msa.h"
+
+#define VP9_DOTP_CONST_PAIR(reg0, reg1, cnst0, cnst1, out0, out1) { \
+ v8i16 k0_m = __msa_fill_h(cnst0); \
+ v4i32 s0_m, s1_m, s2_m, s3_m; \
+ \
+ s0_m = (v4i32)__msa_fill_h(cnst1); \
+ k0_m = __msa_ilvev_h((v8i16)s0_m, k0_m); \
+ \
+ ILVRL_H2_SW((-reg1), reg0, s1_m, s0_m); \
+ ILVRL_H2_SW(reg0, reg1, s3_m, s2_m); \
+ DOTP_SH2_SW(s1_m, s0_m, k0_m, k0_m, s1_m, s0_m); \
+ SRARI_W2_SW(s1_m, s0_m, DCT_CONST_BITS); \
+ out0 = __msa_pckev_h((v8i16)s0_m, (v8i16)s1_m); \
+ \
+ DOTP_SH2_SW(s3_m, s2_m, k0_m, k0_m, s1_m, s0_m); \
+ SRARI_W2_SW(s1_m, s0_m, DCT_CONST_BITS); \
+ out1 = __msa_pckev_h((v8i16)s0_m, (v8i16)s1_m); \
+}
+
+#define VP9_DOT_ADD_SUB_SRARI_PCK(in0, in1, in2, in3, in4, in5, in6, in7, \
+ dst0, dst1, dst2, dst3) { \
+ v4i32 tp0_m, tp1_m, tp2_m, tp3_m, tp4_m; \
+ v4i32 tp5_m, tp6_m, tp7_m, tp8_m, tp9_m; \
+ \
+ DOTP_SH4_SW(in0, in1, in0, in1, in4, in4, in5, in5, \
+ tp0_m, tp2_m, tp3_m, tp4_m); \
+ DOTP_SH4_SW(in2, in3, in2, in3, in6, in6, in7, in7, \
+ tp5_m, tp6_m, tp7_m, tp8_m); \
+ BUTTERFLY_4(tp0_m, tp3_m, tp7_m, tp5_m, tp1_m, tp9_m, tp7_m, tp5_m); \
+ BUTTERFLY_4(tp2_m, tp4_m, tp8_m, tp6_m, tp3_m, tp0_m, tp4_m, tp2_m); \
+ SRARI_W4_SW(tp1_m, tp9_m, tp7_m, tp5_m, DCT_CONST_BITS); \
+ SRARI_W4_SW(tp3_m, tp0_m, tp4_m, tp2_m, DCT_CONST_BITS); \
+ PCKEV_H4_SH(tp1_m, tp3_m, tp9_m, tp0_m, tp7_m, tp4_m, tp5_m, tp2_m, \
+ dst0, dst1, dst2, dst3); \
+}
+
+#define VP9_DOT_SHIFT_RIGHT_PCK_H(in0, in1, in2) ({ \
+ v8i16 dst_m; \
+ v4i32 tp0_m, tp1_m; \
+ \
+ DOTP_SH2_SW(in0, in1, in2, in2, tp1_m, tp0_m); \
+ SRARI_W2_SW(tp1_m, tp0_m, DCT_CONST_BITS); \
+ dst_m = __msa_pckev_h((v8i16)tp1_m, (v8i16)tp0_m); \
+ \
+ dst_m; \
+})
+
+#define VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3, out4, out5, out6, out7) { \
+ v8i16 cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst4_m; \
+ v8i16 vec0_m, vec1_m, vec2_m, vec3_m, s0_m, s1_m; \
+ v8i16 coeff0_m = { cospi_2_64, cospi_6_64, cospi_10_64, cospi_14_64, \
+ cospi_18_64, cospi_22_64, cospi_26_64, cospi_30_64 }; \
+ v8i16 coeff1_m = { cospi_8_64, -cospi_8_64, cospi_16_64, -cospi_16_64, \
+ cospi_24_64, -cospi_24_64, 0, 0 }; \
+ \
+ SPLATI_H2_SH(coeff0_m, 0, 7, cnst0_m, cnst1_m); \
+ cnst2_m = -cnst0_m; \
+ ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m); \
+ SPLATI_H2_SH(coeff0_m, 4, 3, cnst2_m, cnst3_m); \
+ cnst4_m = -cnst2_m; \
+ ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m); \
+ \
+ ILVRL_H2_SH(in0, in7, vec1_m, vec0_m); \
+ ILVRL_H2_SH(in4, in3, vec3_m, vec2_m); \
+ VP9_DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, \
+ cnst1_m, cnst2_m, cnst3_m, in7, in0, \
+ in4, in3); \
+ \
+ SPLATI_H2_SH(coeff0_m, 2, 5, cnst0_m, cnst1_m); \
+ cnst2_m = -cnst0_m; \
+ ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m); \
+ SPLATI_H2_SH(coeff0_m, 6, 1, cnst2_m, cnst3_m); \
+ cnst4_m = -cnst2_m; \
+ ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m); \
+ \
+ ILVRL_H2_SH(in2, in5, vec1_m, vec0_m); \
+ ILVRL_H2_SH(in6, in1, vec3_m, vec2_m); \
+ \
+ VP9_DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, \
+ cnst1_m, cnst2_m, cnst3_m, in5, in2, \
+ in6, in1); \
+ BUTTERFLY_4(in7, in0, in2, in5, s1_m, s0_m, in2, in5); \
+ out7 = -s0_m; \
+ out0 = s1_m; \
+ \
+ SPLATI_H4_SH(coeff1_m, 0, 4, 1, 5, cnst0_m, cnst1_m, cnst2_m, cnst3_m); \
+ \
+ ILVEV_H2_SH(cnst3_m, cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst2_m); \
+ cnst0_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
+ cnst1_m = cnst0_m; \
+ \
+ ILVRL_H2_SH(in4, in3, vec1_m, vec0_m); \
+ ILVRL_H2_SH(in6, in1, vec3_m, vec2_m); \
+ VP9_DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, \
+ cnst2_m, cnst3_m, cnst1_m, out1, out6, \
+ s0_m, s1_m); \
+ \
+ SPLATI_H2_SH(coeff1_m, 2, 3, cnst0_m, cnst1_m); \
+ cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
+ \
+ ILVRL_H2_SH(in2, in5, vec1_m, vec0_m); \
+ ILVRL_H2_SH(s0_m, s1_m, vec3_m, vec2_m); \
+ out3 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \
+ out4 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m); \
+ out2 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst0_m); \
+ out5 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst1_m); \
+ \
+ out1 = -out1; \
+ out3 = -out3; \
+ out5 = -out5; \
+}
+
+#define VP9_MADD_SHORT(m0, m1, c0, c1, res0, res1) { \
+ v4i32 madd0_m, madd1_m, madd2_m, madd3_m; \
+ v8i16 madd_s0_m, madd_s1_m; \
+ \
+ ILVRL_H2_SH(m1, m0, madd_s0_m, madd_s1_m); \
+ DOTP_SH4_SW(madd_s0_m, madd_s1_m, madd_s0_m, madd_s1_m, \
+ c0, c0, c1, c1, madd0_m, madd1_m, madd2_m, madd3_m); \
+ SRARI_W4_SW(madd0_m, madd1_m, madd2_m, madd3_m, DCT_CONST_BITS); \
+ PCKEV_H2_SH(madd1_m, madd0_m, madd3_m, madd2_m, res0, res1); \
+}
+
+#define VP9_MADD_BF(inp0, inp1, inp2, inp3, cst0, cst1, cst2, cst3, \
+ out0, out1, out2, out3) { \
+ v8i16 madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m; \
+ v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m, m4_m, m5_m; \
+ \
+ ILVRL_H2_SH(inp1, inp0, madd_s0_m, madd_s1_m); \
+ ILVRL_H2_SH(inp3, inp2, madd_s2_m, madd_s3_m); \
+ DOTP_SH4_SW(madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m, \
+ cst0, cst0, cst2, cst2, tmp0_m, tmp1_m, tmp2_m, tmp3_m); \
+ BUTTERFLY_4(tmp0_m, tmp1_m, tmp3_m, tmp2_m, \
+ m4_m, m5_m, tmp3_m, tmp2_m); \
+ SRARI_W4_SW(m4_m, m5_m, tmp2_m, tmp3_m, DCT_CONST_BITS); \
+ PCKEV_H2_SH(m5_m, m4_m, tmp3_m, tmp2_m, out0, out1); \
+ DOTP_SH4_SW(madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m, \
+ cst1, cst1, cst3, cst3, tmp0_m, tmp1_m, tmp2_m, tmp3_m); \
+ BUTTERFLY_4(tmp0_m, tmp1_m, tmp3_m, tmp2_m, \
+ m4_m, m5_m, tmp3_m, tmp2_m); \
+ SRARI_W4_SW(m4_m, m5_m, tmp2_m, tmp3_m, DCT_CONST_BITS); \
+ PCKEV_H2_SH(m5_m, m4_m, tmp3_m, tmp2_m, out2, out3); \
+}
+
+#define VP9_LD_HADD(psrc, stride) ({ \
+ v8i16 in0_m, in1_m, in2_m, in3_m, in4_m, in5_m, in6_m, in7_m; \
+ v4i32 vec_w_m; \
+ \
+ LD_SH4((psrc), stride, in0_m, in1_m, in2_m, in3_m); \
+ ADD2(in0_m, in1_m, in2_m, in3_m, in0_m, in2_m); \
+ LD_SH4(((psrc) + 4 * stride), stride, in4_m, in5_m, in6_m, in7_m); \
+ ADD4(in4_m, in5_m, in6_m, in7_m, in0_m, in2_m, in4_m, in6_m, \
+ in4_m, in6_m, in0_m, in4_m); \
+ in0_m += in4_m; \
+ \
+ vec_w_m = __msa_hadd_s_w(in0_m, in0_m); \
+ HADD_SW_S32(vec_w_m); \
+})
+
+#define VP9_FDCT_POSTPROC_2V_NEG_H(vec0, vec1) { \
+ v8i16 tp0_m, tp1_m; \
+ v8i16 one_m = __msa_ldi_h(1); \
+ \
+ tp0_m = __msa_clti_s_h(vec0, 0); \
+ tp1_m = __msa_clti_s_h(vec1, 0); \
+ vec0 += 1; \
+ vec1 += 1; \
+ tp0_m = one_m & tp0_m; \
+ tp1_m = one_m & tp1_m; \
+ vec0 += tp0_m; \
+ vec1 += tp1_m; \
+ vec0 >>= 2; \
+ vec1 >>= 2; \
+}
+
+#define VP9_FDCT4(in0, in1, in2, in3, out0, out1, out2, out3) { \
+ v8i16 cnst0_m, cnst1_m, cnst2_m, cnst3_m; \
+ v8i16 vec0_m, vec1_m, vec2_m, vec3_m; \
+ v4i32 vec4_m, vec5_m, vec6_m, vec7_m; \
+ v8i16 coeff_m = { cospi_16_64, -cospi_16_64, cospi_8_64, \
+ cospi_24_64, -cospi_8_64, 0, 0, 0 }; \
+ \
+ BUTTERFLY_4(in0, in1, in2, in3, vec0_m, vec1_m, vec2_m, vec3_m); \
+ ILVR_H2_SH(vec1_m, vec0_m, vec3_m, vec2_m, vec0_m, vec2_m); \
+ SPLATI_H2_SH(coeff_m, 0, 1, cnst0_m, cnst1_m); \
+ cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
+ vec5_m = __msa_dotp_s_w(vec0_m, cnst1_m); \
+ \
+ SPLATI_H2_SH(coeff_m, 4, 3, cnst2_m, cnst3_m); \
+ cnst2_m = __msa_ilvev_h(cnst3_m, cnst2_m); \
+ vec7_m = __msa_dotp_s_w(vec2_m, cnst2_m); \
+ \
+ vec4_m = __msa_dotp_s_w(vec0_m, cnst0_m); \
+ cnst2_m = __msa_splati_h(coeff_m, 2); \
+ cnst2_m = __msa_ilvev_h(cnst2_m, cnst3_m); \
+ vec6_m = __msa_dotp_s_w(vec2_m, cnst2_m); \
+ \
+ SRARI_W4_SW(vec4_m, vec5_m, vec6_m, vec7_m, DCT_CONST_BITS); \
+ PCKEV_H4_SH(vec4_m, vec4_m, vec5_m, vec5_m, vec6_m, vec6_m, \
+ vec7_m, vec7_m, out0, out2, out1, out3); \
+}
+
+#define VP9_FADST4(in0, in1, in2, in3, out0, out1, out2, out3) { \
+ v4i32 s0_m, s1_m, s2_m, s3_m, constant_m; \
+ v4i32 in0_r_m, in1_r_m, in2_r_m, in3_r_m; \
+ \
+ UNPCK_R_SH_SW(in0, in0_r_m); \
+ UNPCK_R_SH_SW(in1, in1_r_m); \
+ UNPCK_R_SH_SW(in2, in2_r_m); \
+ UNPCK_R_SH_SW(in3, in3_r_m); \
+ \
+ constant_m = __msa_fill_w(sinpi_4_9); \
+ MUL2(in0_r_m, constant_m, in3_r_m, constant_m, s1_m, s0_m); \
+ \
+ constant_m = __msa_fill_w(sinpi_1_9); \
+ s0_m += in0_r_m * constant_m; \
+ s1_m -= in1_r_m * constant_m; \
+ \
+ constant_m = __msa_fill_w(sinpi_2_9); \
+ s0_m += in1_r_m * constant_m; \
+ s1_m += in3_r_m * constant_m; \
+ \
+ s2_m = in0_r_m + in1_r_m - in3_r_m; \
+ \
+ constant_m = __msa_fill_w(sinpi_3_9); \
+ MUL2(in2_r_m, constant_m, s2_m, constant_m, s3_m, in1_r_m); \
+ \
+ in0_r_m = s0_m + s3_m; \
+ s2_m = s1_m - s3_m; \
+ s3_m = s1_m - s0_m + s3_m; \
+ \
+ SRARI_W4_SW(in0_r_m, in1_r_m, s2_m, s3_m, DCT_CONST_BITS); \
+ PCKEV_H4_SH(in0_r_m, in0_r_m, in1_r_m, in1_r_m, s2_m, s2_m, \
+ s3_m, s3_m, out0, out1, out2, out3); \
+}
+
+#define VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3, out4, out5, out6, out7) { \
+ v8i16 s0_m, s1_m, s2_m, s3_m, s4_m, s5_m, s6_m; \
+ v8i16 s7_m, x0_m, x1_m, x2_m, x3_m; \
+ v8i16 coeff_m = { cospi_16_64, -cospi_16_64, cospi_8_64, \
+ cospi_24_64, cospi_4_64, cospi_28_64, \
+ cospi_12_64, cospi_20_64 }; \
+ \
+ /* FDCT stage1 */ \
+ BUTTERFLY_8(in0, in1, in2, in3, in4, in5, in6, in7, \
+ s0_m, s1_m, s2_m, s3_m, s4_m, s5_m, s6_m, s7_m); \
+ BUTTERFLY_4(s0_m, s1_m, s2_m, s3_m, x0_m, x1_m, x2_m, x3_m); \
+ ILVL_H2_SH(x1_m, x0_m, x3_m, x2_m, s0_m, s2_m); \
+ ILVR_H2_SH(x1_m, x0_m, x3_m, x2_m, s1_m, s3_m); \
+ SPLATI_H2_SH(coeff_m, 0, 1, x0_m, x1_m); \
+ x1_m = __msa_ilvev_h(x1_m, x0_m); \
+ out4 = VP9_DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x1_m); \
+ \
+ SPLATI_H2_SH(coeff_m, 2, 3, x2_m, x3_m); \
+ x2_m = -x2_m; \
+ x2_m = __msa_ilvev_h(x3_m, x2_m); \
+ out6 = VP9_DOT_SHIFT_RIGHT_PCK_H(s2_m, s3_m, x2_m); \
+ \
+ out0 = VP9_DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x0_m); \
+ x2_m = __msa_splati_h(coeff_m, 2); \
+ x2_m = __msa_ilvev_h(x2_m, x3_m); \
+ out2 = VP9_DOT_SHIFT_RIGHT_PCK_H(s2_m, s3_m, x2_m); \
+ \
+ /* stage2 */ \
+ ILVRL_H2_SH(s5_m, s6_m, s1_m, s0_m); \
+ \
+ s6_m = VP9_DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x0_m); \
+ s5_m = VP9_DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x1_m); \
+ \
+ /* stage3 */ \
+ BUTTERFLY_4(s4_m, s7_m, s6_m, s5_m, x0_m, x3_m, x2_m, x1_m); \
+ \
+ /* stage4 */ \
+ ILVL_H2_SH(x3_m, x0_m, x2_m, x1_m, s4_m, s6_m); \
+ ILVR_H2_SH(x3_m, x0_m, x2_m, x1_m, s5_m, s7_m); \
+ \
+ SPLATI_H2_SH(coeff_m, 4, 5, x0_m, x1_m); \
+ x1_m = __msa_ilvev_h(x0_m, x1_m); \
+ out1 = VP9_DOT_SHIFT_RIGHT_PCK_H(s4_m, s5_m, x1_m); \
+ \
+ SPLATI_H2_SH(coeff_m, 6, 7, x2_m, x3_m); \
+ x2_m = __msa_ilvev_h(x3_m, x2_m); \
+ out5 = VP9_DOT_SHIFT_RIGHT_PCK_H(s6_m, s7_m, x2_m); \
+ \
+ x1_m = __msa_splati_h(coeff_m, 5); \
+ x0_m = -x0_m; \
+ x0_m = __msa_ilvev_h(x1_m, x0_m); \
+ out7 = VP9_DOT_SHIFT_RIGHT_PCK_H(s4_m, s5_m, x0_m); \
+ \
+ x2_m = __msa_splati_h(coeff_m, 6); \
+ x3_m = -x3_m; \
+ x2_m = __msa_ilvev_h(x2_m, x3_m); \
+ out3 = VP9_DOT_SHIFT_RIGHT_PCK_H(s6_m, s7_m, x2_m); \
+}
+
+#define VP9_SRLI_AVE_S_4V_H(in0, in1, in2, in3, in4, in5, in6, in7) { \
+ v8i16 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \
+ \
+ SRLI_H4_SH(in0, in1, in2, in3, vec0_m, vec1_m, vec2_m, vec3_m, 15); \
+ SRLI_H4_SH(in4, in5, in6, in7, vec4_m, vec5_m, vec6_m, vec7_m, 15); \
+ AVE_SH4_SH(vec0_m, in0, vec1_m, in1, vec2_m, in2, vec3_m, in3, \
+ in0, in1, in2, in3); \
+ AVE_SH4_SH(vec4_m, in4, vec5_m, in5, vec6_m, in6, vec7_m, in7, \
+ in4, in5, in6, in7); \
+}
+
+#define VP9_FDCT8x16_EVEN(in0, in1, in2, in3, in4, in5, in6, in7, \
+ out0, out1, out2, out3, out4, out5, out6, out7) { \
+ v8i16 s0_m, s1_m, s2_m, s3_m, s4_m, s5_m, s6_m, s7_m; \
+ v8i16 x0_m, x1_m, x2_m, x3_m; \
+ v8i16 coeff_m = { cospi_16_64, -cospi_16_64, cospi_8_64, cospi_24_64, \
+ cospi_4_64, cospi_28_64, cospi_12_64, cospi_20_64 }; \
+ \
+ /* FDCT stage1 */ \
+ BUTTERFLY_8(in0, in1, in2, in3, in4, in5, in6, in7, \
+ s0_m, s1_m, s2_m, s3_m, s4_m, s5_m, s6_m, s7_m); \
+ BUTTERFLY_4(s0_m, s1_m, s2_m, s3_m, x0_m, x1_m, x2_m, x3_m); \
+ ILVL_H2_SH(x1_m, x0_m, x3_m, x2_m, s0_m, s2_m); \
+ ILVR_H2_SH(x1_m, x0_m, x3_m, x2_m, s1_m, s3_m); \
+ SPLATI_H2_SH(coeff_m, 0, 1, x0_m, x1_m); \
+ x1_m = __msa_ilvev_h(x1_m, x0_m); \
+ out4 = VP9_DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x1_m); \
+ \
+ SPLATI_H2_SH(coeff_m, 2, 3, x2_m, x3_m); \
+ x2_m = -x2_m; \
+ x2_m = __msa_ilvev_h(x3_m, x2_m); \
+ out6 = VP9_DOT_SHIFT_RIGHT_PCK_H(s2_m, s3_m, x2_m); \
+ \
+ out0 = VP9_DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x0_m); \
+ x2_m = __msa_splati_h(coeff_m, 2); \
+ x2_m = __msa_ilvev_h(x2_m, x3_m); \
+ out2 = VP9_DOT_SHIFT_RIGHT_PCK_H(s2_m, s3_m, x2_m); \
+ \
+ /* stage2 */ \
+ ILVRL_H2_SH(s5_m, s6_m, s1_m, s0_m); \
+ \
+ s6_m = VP9_DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x0_m); \
+ s5_m = VP9_DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x1_m); \
+ \
+ /* stage3 */ \
+ BUTTERFLY_4(s4_m, s7_m, s6_m, s5_m, x0_m, x3_m, x2_m, x1_m); \
+ \
+ /* stage4 */ \
+ ILVL_H2_SH(x3_m, x0_m, x2_m, x1_m, s4_m, s6_m); \
+ ILVR_H2_SH(x3_m, x0_m, x2_m, x1_m, s5_m, s7_m); \
+ \
+ SPLATI_H2_SH(coeff_m, 4, 5, x0_m, x1_m); \
+ x1_m = __msa_ilvev_h(x0_m, x1_m); \
+ out1 = VP9_DOT_SHIFT_RIGHT_PCK_H(s4_m, s5_m, x1_m); \
+ \
+ SPLATI_H2_SH(coeff_m, 6, 7, x2_m, x3_m); \
+ x2_m = __msa_ilvev_h(x3_m, x2_m); \
+ out5 = VP9_DOT_SHIFT_RIGHT_PCK_H(s6_m, s7_m, x2_m); \
+ \
+ x1_m = __msa_splati_h(coeff_m, 5); \
+ x0_m = -x0_m; \
+ x0_m = __msa_ilvev_h(x1_m, x0_m); \
+ out7 = VP9_DOT_SHIFT_RIGHT_PCK_H(s4_m, s5_m, x0_m); \
+ \
+ x2_m = __msa_splati_h(coeff_m, 6); \
+ x3_m = -x3_m; \
+ x2_m = __msa_ilvev_h(x2_m, x3_m); \
+ out3 = VP9_DOT_SHIFT_RIGHT_PCK_H(s6_m, s7_m, x2_m); \
+}
+
+#define VP9_FDCT8x16_ODD(input0, input1, input2, input3, \
+ input4, input5, input6, input7, \
+ out1, out3, out5, out7, \
+ out9, out11, out13, out15) { \
+ v8i16 stp21_m, stp22_m, stp23_m, stp24_m, stp25_m, stp26_m; \
+ v8i16 stp30_m, stp31_m, stp32_m, stp33_m, stp34_m, stp35_m; \
+ v8i16 stp36_m, stp37_m, vec0_m, vec1_m; \
+ v8i16 vec2_m, vec3_m, vec4_m, vec5_m, vec6_m; \
+ v8i16 cnst0_m, cnst1_m, cnst4_m, cnst5_m; \
+ v8i16 coeff_m = { cospi_16_64, -cospi_16_64, cospi_8_64, \
+ cospi_24_64, -cospi_8_64, -cospi_24_64, \
+ cospi_12_64, cospi_20_64 }; \
+ v8i16 coeff1_m = { cospi_2_64, cospi_30_64, cospi_14_64, \
+ cospi_18_64, cospi_10_64, cospi_22_64, \
+ cospi_6_64, cospi_26_64 }; \
+ v8i16 coeff2_m = { -cospi_2_64, -cospi_10_64, -cospi_18_64, \
+ -cospi_26_64, 0, 0, 0, 0 }; \
+ \
+ /* stp 1 */ \
+ ILVL_H2_SH(input2, input5, input3, input4, vec2_m, vec4_m); \
+ ILVR_H2_SH(input2, input5, input3, input4, vec3_m, vec5_m); \
+ \
+ cnst4_m = __msa_splati_h(coeff_m, 0); \
+ stp25_m = VP9_DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst4_m); \
+ \
+ cnst5_m = __msa_splati_h(coeff_m, 1); \
+ cnst5_m = __msa_ilvev_h(cnst5_m, cnst4_m); \
+ stp22_m = VP9_DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst5_m); \
+ stp24_m = VP9_DOT_SHIFT_RIGHT_PCK_H(vec4_m, vec5_m, cnst4_m); \
+ stp23_m = VP9_DOT_SHIFT_RIGHT_PCK_H(vec4_m, vec5_m, cnst5_m); \
+ \
+ /* stp2 */ \
+ BUTTERFLY_4(input0, input1, stp22_m, stp23_m, \
+ stp30_m, stp31_m, stp32_m, stp33_m); \
+ BUTTERFLY_4(input7, input6, stp25_m, stp24_m, \
+ stp37_m, stp36_m, stp35_m, stp34_m); \
+ \
+ ILVL_H2_SH(stp36_m, stp31_m, stp35_m, stp32_m, vec2_m, vec4_m); \
+ ILVR_H2_SH(stp36_m, stp31_m, stp35_m, stp32_m, vec3_m, vec5_m); \
+ \
+ SPLATI_H2_SH(coeff_m, 2, 3, cnst0_m, cnst1_m); \
+ cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m); \
+ stp26_m = VP9_DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst0_m); \
+ \
+ cnst0_m = __msa_splati_h(coeff_m, 4); \
+ cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
+ stp21_m = VP9_DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst1_m); \
+ \
+ SPLATI_H2_SH(coeff_m, 5, 2, cnst0_m, cnst1_m); \
+ cnst1_m = __msa_ilvev_h(cnst0_m, cnst1_m); \
+ stp25_m = VP9_DOT_SHIFT_RIGHT_PCK_H(vec4_m, vec5_m, cnst1_m); \
+ \
+ cnst0_m = __msa_splati_h(coeff_m, 3); \
+ cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
+ stp22_m = VP9_DOT_SHIFT_RIGHT_PCK_H(vec4_m, vec5_m, cnst1_m); \
+ \
+ /* stp4 */ \
+ BUTTERFLY_4(stp30_m, stp37_m, stp26_m, stp21_m, \
+ vec6_m, vec2_m, vec4_m, vec5_m); \
+ BUTTERFLY_4(stp33_m, stp34_m, stp25_m, stp22_m, \
+ stp21_m, stp23_m, stp24_m, stp31_m); \
+ \
+ ILVRL_H2_SH(vec2_m, vec6_m, vec1_m, vec0_m); \
+ SPLATI_H2_SH(coeff1_m, 0, 1, cnst0_m, cnst1_m); \
+ cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m); \
+ \
+ out1 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \
+ \
+ cnst0_m = __msa_splati_h(coeff2_m, 0); \
+ cnst0_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
+ out15 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \
+ \
+ ILVRL_H2_SH(vec4_m, vec5_m, vec1_m, vec0_m); \
+ SPLATI_H2_SH(coeff1_m, 2, 3, cnst0_m, cnst1_m); \
+ cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
+ \
+ out9 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m); \
+ \
+ cnst1_m = __msa_splati_h(coeff2_m, 2); \
+ cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m); \
+ out7 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \
+ \
+ ILVRL_H2_SH(stp23_m, stp21_m, vec1_m, vec0_m); \
+ SPLATI_H2_SH(coeff1_m, 4, 5, cnst0_m, cnst1_m); \
+ cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m); \
+ out5 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \
+ \
+ cnst0_m = __msa_splati_h(coeff2_m, 1); \
+ cnst0_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
+ out11 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \
+ \
+ ILVRL_H2_SH(stp24_m, stp31_m, vec1_m, vec0_m); \
+ SPLATI_H2_SH(coeff1_m, 6, 7, cnst0_m, cnst1_m); \
+ cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \
+ \
+ out13 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m); \
+ \
+ cnst1_m = __msa_splati_h(coeff2_m, 3); \
+ cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m); \
+ out3 = VP9_DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \
+}
+
+#define VP9_FDCT32_POSTPROC_NEG_W(vec) { \
+ v4i32 temp_m; \
+ v4i32 one_m = __msa_ldi_w(1); \
+ \
+ temp_m = __msa_clti_s_w(vec, 0); \
+ vec += 1; \
+ temp_m = one_m & temp_m; \
+ vec += temp_m; \
+ vec >>= 2; \
+}
+
+#define VP9_FDCT32_POSTPROC_2V_POS_H(vec0, vec1) { \
+ v8i16 tp0_m, tp1_m; \
+ v8i16 one = __msa_ldi_h(1); \
+ \
+ tp0_m = __msa_clei_s_h(vec0, 0); \
+ tp1_m = __msa_clei_s_h(vec1, 0); \
+ tp0_m = (v8i16)__msa_xori_b((v16u8)tp0_m, 255); \
+ tp1_m = (v8i16)__msa_xori_b((v16u8)tp1_m, 255); \
+ vec0 += 1; \
+ vec1 += 1; \
+ tp0_m = one & tp0_m; \
+ tp1_m = one & tp1_m; \
+ vec0 += tp0_m; \
+ vec1 += tp1_m; \
+ vec0 >>= 2; \
+ vec1 >>= 2; \
+}
+
+#define VP9_DOTP_CONST_PAIR_W(reg0_left, reg1_left, reg0_right, \
+ reg1_right, const0, const1, \
+ out0, out1, out2, out3) { \
+ v4i32 s0_m, s1_m, s2_m, s3_m, s4_m, s5_m, s6_m, s7_m; \
+ v2i64 tp0_m, tp1_m, tp2_m, tp3_m; \
+ v4i32 k0_m = __msa_fill_w((int32_t) const0); \
+ \
+ s0_m = __msa_fill_w((int32_t) const1); \
+ k0_m = __msa_ilvev_w(s0_m, k0_m); \
+ \
+ ILVRL_W2_SW(-reg1_left, reg0_left, s1_m, s0_m); \
+ ILVRL_W2_SW(reg0_left, reg1_left, s3_m, s2_m); \
+ ILVRL_W2_SW(-reg1_right, reg0_right, s5_m, s4_m); \
+ ILVRL_W2_SW(reg0_right, reg1_right, s7_m, s6_m); \
+ \
+ DOTP_SW2_SD(s0_m, s1_m, k0_m, k0_m, tp0_m, tp1_m); \
+ DOTP_SW2_SD(s4_m, s5_m, k0_m, k0_m, tp2_m, tp3_m); \
+ tp0_m = __msa_srari_d(tp0_m, DCT_CONST_BITS); \
+ tp1_m = __msa_srari_d(tp1_m, DCT_CONST_BITS); \
+ tp2_m = __msa_srari_d(tp2_m, DCT_CONST_BITS); \
+ tp3_m = __msa_srari_d(tp3_m, DCT_CONST_BITS); \
+ out0 = __msa_pckev_w((v4i32)tp0_m, (v4i32)tp1_m); \
+ out1 = __msa_pckev_w((v4i32)tp2_m, (v4i32)tp3_m); \
+ \
+ DOTP_SW2_SD(s2_m, s3_m, k0_m, k0_m, tp0_m, tp1_m); \
+ DOTP_SW2_SD(s6_m, s7_m, k0_m, k0_m, tp2_m, tp3_m); \
+ tp0_m = __msa_srari_d(tp0_m, DCT_CONST_BITS); \
+ tp1_m = __msa_srari_d(tp1_m, DCT_CONST_BITS); \
+ tp2_m = __msa_srari_d(tp2_m, DCT_CONST_BITS); \
+ tp3_m = __msa_srari_d(tp3_m, DCT_CONST_BITS); \
+ out2 = __msa_pckev_w((v4i32)tp0_m, (v4i32)tp1_m); \
+ out3 = __msa_pckev_w((v4i32)tp2_m, (v4i32)tp3_m); \
+}
+#endif /* VP9_ENCODER_MIPS_MSA_VP9_FDCT_MSA_H_ */
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_subtract_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_subtract_msa.c
new file mode 100644
index 00000000000..1b8b694ce38
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_subtract_msa.c
@@ -0,0 +1,264 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/mips/msa/vp9_macros_msa.h"
+
+static void sub_blk_4x4_msa(const uint8_t *src_ptr, int32_t src_stride,
+ const uint8_t *pred_ptr, int32_t pred_stride,
+ int16_t *diff_ptr, int32_t diff_stride) {
+ uint32_t src0, src1, src2, src3;
+ uint32_t pred0, pred1, pred2, pred3;
+ v16i8 src = { 0 };
+ v16i8 pred = { 0 };
+ v16u8 src_l0, src_l1;
+ v8i16 diff0, diff1;
+
+ LW4(src_ptr, src_stride, src0, src1, src2, src3);
+ LW4(pred_ptr, pred_stride, pred0, pred1, pred2, pred3);
+ INSERT_W4_SB(src0, src1, src2, src3, src);
+ INSERT_W4_SB(pred0, pred1, pred2, pred3, pred);
+ ILVRL_B2_UB(src, pred, src_l0, src_l1);
+ HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+ ST8x4_UB(diff0, diff1, diff_ptr, (2 * diff_stride));
+}
+
+static void sub_blk_8x8_msa(const uint8_t *src_ptr, int32_t src_stride,
+ const uint8_t *pred_ptr, int32_t pred_stride,
+ int16_t *diff_ptr, int32_t diff_stride) {
+ uint32_t loop_cnt;
+ uint64_t src0, src1, pred0, pred1;
+ v16i8 src = { 0 };
+ v16i8 pred = { 0 };
+ v16u8 src_l0, src_l1;
+ v8i16 diff0, diff1;
+
+ for (loop_cnt = 4; loop_cnt--;) {
+ LD2(src_ptr, src_stride, src0, src1);
+ src_ptr += (2 * src_stride);
+ LD2(pred_ptr, pred_stride, pred0, pred1);
+ pred_ptr += (2 * pred_stride);
+
+ INSERT_D2_SB(src0, src1, src);
+ INSERT_D2_SB(pred0, pred1, pred);
+ ILVRL_B2_UB(src, pred, src_l0, src_l1);
+ HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+ ST_SH2(diff0, diff1, diff_ptr, diff_stride);
+ diff_ptr += (2 * diff_stride);
+ }
+}
+
+static void sub_blk_16x16_msa(const uint8_t *src, int32_t src_stride,
+ const uint8_t *pred, int32_t pred_stride,
+ int16_t *diff, int32_t diff_stride) {
+ int8_t count;
+ v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
+ v16i8 pred0, pred1, pred2, pred3, pred4, pred5, pred6, pred7;
+ v16u8 src_l0, src_l1;
+ v8i16 diff0, diff1;
+
+ for (count = 2; count--;) {
+ LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7);
+ src += (8 * src_stride);
+
+ LD_SB8(pred, pred_stride,
+ pred0, pred1, pred2, pred3, pred4, pred5, pred6, pred7);
+ pred += (8 * pred_stride);
+
+ ILVRL_B2_UB(src0, pred0, src_l0, src_l1);
+ HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+ ST_SH2(diff0, diff1, diff, 8);
+ diff += diff_stride;
+
+ ILVRL_B2_UB(src1, pred1, src_l0, src_l1);
+ HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+ ST_SH2(diff0, diff1, diff, 8);
+ diff += diff_stride;
+
+ ILVRL_B2_UB(src2, pred2, src_l0, src_l1);
+ HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+ ST_SH2(diff0, diff1, diff, 8);
+ diff += diff_stride;
+
+ ILVRL_B2_UB(src3, pred3, src_l0, src_l1);
+ HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+ ST_SH2(diff0, diff1, diff, 8);
+ diff += diff_stride;
+
+ ILVRL_B2_UB(src4, pred4, src_l0, src_l1);
+ HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+ ST_SH2(diff0, diff1, diff, 8);
+ diff += diff_stride;
+
+ ILVRL_B2_UB(src5, pred5, src_l0, src_l1);
+ HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+ ST_SH2(diff0, diff1, diff, 8);
+ diff += diff_stride;
+
+ ILVRL_B2_UB(src6, pred6, src_l0, src_l1);
+ HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+ ST_SH2(diff0, diff1, diff, 8);
+ diff += diff_stride;
+
+ ILVRL_B2_UB(src7, pred7, src_l0, src_l1);
+ HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+ ST_SH2(diff0, diff1, diff, 8);
+ diff += diff_stride;
+ }
+}
+
+static void sub_blk_32x32_msa(const uint8_t *src, int32_t src_stride,
+ const uint8_t *pred, int32_t pred_stride,
+ int16_t *diff, int32_t diff_stride) {
+ uint32_t loop_cnt;
+ v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
+ v16i8 pred0, pred1, pred2, pred3, pred4, pred5, pred6, pred7;
+ v16u8 src_l0, src_l1;
+ v8i16 diff0, diff1;
+
+ for (loop_cnt = 8; loop_cnt--;) {
+ LD_SB2(src, 16, src0, src1);
+ src += src_stride;
+ LD_SB2(src, 16, src2, src3);
+ src += src_stride;
+ LD_SB2(src, 16, src4, src5);
+ src += src_stride;
+ LD_SB2(src, 16, src6, src7);
+ src += src_stride;
+
+ LD_SB2(pred, 16, pred0, pred1);
+ pred += pred_stride;
+ LD_SB2(pred, 16, pred2, pred3);
+ pred += pred_stride;
+ LD_SB2(pred, 16, pred4, pred5);
+ pred += pred_stride;
+ LD_SB2(pred, 16, pred6, pred7);
+ pred += pred_stride;
+
+ ILVRL_B2_UB(src0, pred0, src_l0, src_l1);
+ HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+ ST_SH2(diff0, diff1, diff, 8);
+ ILVRL_B2_UB(src1, pred1, src_l0, src_l1);
+ HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+ ST_SH2(diff0, diff1, diff + 16, 8);
+ diff += diff_stride;
+
+ ILVRL_B2_UB(src2, pred2, src_l0, src_l1);
+ HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+ ST_SH2(diff0, diff1, diff, 8);
+ ILVRL_B2_UB(src3, pred3, src_l0, src_l1);
+ HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+ ST_SH2(diff0, diff1, diff + 16, 8);
+ diff += diff_stride;
+
+ ILVRL_B2_UB(src4, pred4, src_l0, src_l1);
+ HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+ ST_SH2(diff0, diff1, diff, 8);
+ ILVRL_B2_UB(src5, pred5, src_l0, src_l1);
+ HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+ ST_SH2(diff0, diff1, diff + 16, 8);
+ diff += diff_stride;
+
+ ILVRL_B2_UB(src6, pred6, src_l0, src_l1);
+ HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+ ST_SH2(diff0, diff1, diff, 8);
+ ILVRL_B2_UB(src7, pred7, src_l0, src_l1);
+ HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+ ST_SH2(diff0, diff1, diff + 16, 8);
+ diff += diff_stride;
+ }
+}
+
+static void sub_blk_64x64_msa(const uint8_t *src, int32_t src_stride,
+ const uint8_t *pred, int32_t pred_stride,
+ int16_t *diff, int32_t diff_stride) {
+ uint32_t loop_cnt;
+ v16i8 src0, src1, src2, src3, src4, src5, src6, src7;
+ v16i8 pred0, pred1, pred2, pred3, pred4, pred5, pred6, pred7;
+ v16u8 src_l0, src_l1;
+ v8i16 diff0, diff1;
+
+ for (loop_cnt = 32; loop_cnt--;) {
+ LD_SB4(src, 16, src0, src1, src2, src3);
+ src += src_stride;
+ LD_SB4(src, 16, src4, src5, src6, src7);
+ src += src_stride;
+
+ LD_SB4(pred, 16, pred0, pred1, pred2, pred3);
+ pred += pred_stride;
+ LD_SB4(pred, 16, pred4, pred5, pred6, pred7);
+ pred += pred_stride;
+
+ ILVRL_B2_UB(src0, pred0, src_l0, src_l1);
+ HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+ ST_SH2(diff0, diff1, diff, 8);
+ ILVRL_B2_UB(src1, pred1, src_l0, src_l1);
+ HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+ ST_SH2(diff0, diff1, diff + 16, 8);
+ ILVRL_B2_UB(src2, pred2, src_l0, src_l1);
+ HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+ ST_SH2(diff0, diff1, diff + 32, 8);
+ ILVRL_B2_UB(src3, pred3, src_l0, src_l1);
+ HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+ ST_SH2(diff0, diff1, diff + 48, 8);
+ diff += diff_stride;
+
+ ILVRL_B2_UB(src4, pred4, src_l0, src_l1);
+ HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+ ST_SH2(diff0, diff1, diff, 8);
+ ILVRL_B2_UB(src5, pred5, src_l0, src_l1);
+ HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+ ST_SH2(diff0, diff1, diff + 16, 8);
+ ILVRL_B2_UB(src6, pred6, src_l0, src_l1);
+ HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+ ST_SH2(diff0, diff1, diff + 32, 8);
+ ILVRL_B2_UB(src7, pred7, src_l0, src_l1);
+ HSUB_UB2_SH(src_l0, src_l1, diff0, diff1);
+ ST_SH2(diff0, diff1, diff + 48, 8);
+ diff += diff_stride;
+ }
+}
+
+void vp9_subtract_block_msa(int32_t rows, int32_t cols,
+ int16_t *diff_ptr, ptrdiff_t diff_stride,
+ const uint8_t *src_ptr, ptrdiff_t src_stride,
+ const uint8_t *pred_ptr, ptrdiff_t pred_stride) {
+ if (rows == cols) {
+ switch (rows) {
+ case 4:
+ sub_blk_4x4_msa(src_ptr, src_stride, pred_ptr, pred_stride,
+ diff_ptr, diff_stride);
+ break;
+ case 8:
+ sub_blk_8x8_msa(src_ptr, src_stride, pred_ptr, pred_stride,
+ diff_ptr, diff_stride);
+ break;
+ case 16:
+ sub_blk_16x16_msa(src_ptr, src_stride, pred_ptr, pred_stride,
+ diff_ptr, diff_stride);
+ break;
+ case 32:
+ sub_blk_32x32_msa(src_ptr, src_stride, pred_ptr, pred_stride,
+ diff_ptr, diff_stride);
+ break;
+ case 64:
+ sub_blk_64x64_msa(src_ptr, src_stride, pred_ptr, pred_stride,
+ diff_ptr, diff_stride);
+ break;
+ default:
+ vp9_subtract_block_c(rows, cols, diff_ptr, diff_stride, src_ptr,
+ src_stride, pred_ptr, pred_stride);
+ break;
+ }
+ } else {
+ vp9_subtract_block_c(rows, cols, diff_ptr, diff_stride, src_ptr, src_stride,
+ pred_ptr, pred_stride);
+ }
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_temporal_filter_msa.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_temporal_filter_msa.c
new file mode 100644
index 00000000000..4053bffaef2
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/mips/msa/vp9_temporal_filter_msa.c
@@ -0,0 +1,289 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/mips/msa/vp9_macros_msa.h"
+
+static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr,
+ uint32_t stride,
+ uint8_t *frm2_ptr,
+ int32_t filt_sth,
+ int32_t filt_wgt,
+ uint32_t *acc,
+ uint16_t *cnt) {
+ uint32_t row;
+ uint64_t f0, f1, f2, f3;
+ v16i8 frm2, frm1 = { 0 };
+ v16i8 frm4, frm3 = { 0 };
+ v16u8 frm_r, frm_l;
+ v8i16 frm2_r, frm2_l;
+ v8i16 diff0, diff1, mod0_h, mod1_h;
+ v4i32 cnst3, cnst16, filt_wt, strength;
+ v4i32 mod0_w, mod1_w, mod2_w, mod3_w;
+ v4i32 diff0_r, diff0_l, diff1_r, diff1_l;
+ v4i32 frm2_rr, frm2_rl, frm2_lr, frm2_ll;
+ v4i32 acc0, acc1, acc2, acc3;
+ v8i16 cnt0, cnt1;
+
+ filt_wt = __msa_fill_w(filt_wgt);
+ strength = __msa_fill_w(filt_sth);
+ cnst3 = __msa_ldi_w(3);
+ cnst16 = __msa_ldi_w(16);
+
+ for (row = 2; row--;) {
+ LD4(frm1_ptr, stride, f0, f1, f2, f3);
+ frm1_ptr += (4 * stride);
+
+ LD_SB2(frm2_ptr, 16, frm2, frm4);
+ frm2_ptr += 32;
+
+ LD_SW2(acc, 4, acc0, acc1);
+ LD_SW2(acc + 8, 4, acc2, acc3);
+ LD_SH2(cnt, 8, cnt0, cnt1);
+
+ INSERT_D2_SB(f0, f1, frm1);
+ INSERT_D2_SB(f2, f3, frm3);
+ ILVRL_B2_UB(frm1, frm2, frm_r, frm_l);
+ HSUB_UB2_SH(frm_r, frm_l, diff0, diff1);
+ UNPCK_SH_SW(diff0, diff0_r, diff0_l);
+ UNPCK_SH_SW(diff1, diff1_r, diff1_l);
+ MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l,
+ diff1_l, mod0_w, mod1_w, mod2_w, mod3_w);
+ MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3,
+ mod0_w, mod1_w, mod2_w, mod3_w);
+ SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
+
+ diff0_r = (mod0_w < cnst16);
+ diff0_l = (mod1_w < cnst16);
+ diff1_r = (mod2_w < cnst16);
+ diff1_l = (mod3_w < cnst16);
+
+ SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w,
+ mod0_w, mod1_w, mod2_w, mod3_w);
+
+ mod0_w = diff0_r & mod0_w;
+ mod1_w = diff0_l & mod1_w;
+ mod2_w = diff1_r & mod2_w;
+ mod3_w = diff1_l & mod3_w;
+
+ MUL4(mod0_w, filt_wt, mod1_w, filt_wt, mod2_w, filt_wt, mod3_w, filt_wt,
+ mod0_w, mod1_w, mod2_w, mod3_w);
+ PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h);
+ ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h);
+ ST_SH2(mod0_h, mod1_h, cnt, 8);
+ cnt += 16;
+
+ UNPCK_UB_SH(frm2, frm2_r, frm2_l);
+ UNPCK_SH_SW(frm2_r, frm2_rr, frm2_rl);
+ UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
+ MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
+ mod0_w, mod1_w, mod2_w, mod3_w);
+ ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3,
+ mod0_w, mod1_w, mod2_w, mod3_w);
+
+ ST_SW2(mod0_w, mod1_w, acc, 4);
+ acc += 8;
+ ST_SW2(mod2_w, mod3_w, acc, 4);
+ acc += 8;
+
+ LD_SW2(acc, 4, acc0, acc1);
+ LD_SW2(acc + 8, 4, acc2, acc3);
+ LD_SH2(cnt, 8, cnt0, cnt1);
+
+ ILVRL_B2_UB(frm3, frm4, frm_r, frm_l);
+ HSUB_UB2_SH(frm_r, frm_l, diff0, diff1);
+ UNPCK_SH_SW(diff0, diff0_r, diff0_l);
+ UNPCK_SH_SW(diff1, diff1_r, diff1_l);
+ MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l,
+ diff1_l, mod0_w, mod1_w, mod2_w, mod3_w);
+ MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3,
+ mod0_w, mod1_w, mod2_w, mod3_w);
+ SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
+
+ diff0_r = (mod0_w < cnst16);
+ diff0_l = (mod1_w < cnst16);
+ diff1_r = (mod2_w < cnst16);
+ diff1_l = (mod3_w < cnst16);
+
+ SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w,
+ mod0_w, mod1_w, mod2_w, mod3_w);
+
+ mod0_w = diff0_r & mod0_w;
+ mod1_w = diff0_l & mod1_w;
+ mod2_w = diff1_r & mod2_w;
+ mod3_w = diff1_l & mod3_w;
+
+ MUL4(mod0_w, filt_wt, mod1_w, filt_wt, mod2_w, filt_wt, mod3_w, filt_wt,
+ mod0_w, mod1_w, mod2_w, mod3_w);
+ PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h);
+ ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h);
+ ST_SH2(mod0_h, mod1_h, cnt, 8);
+ cnt += 16;
+ UNPCK_UB_SH(frm4, frm2_r, frm2_l);
+ UNPCK_SH_SW(frm2_r, frm2_rr, frm2_rl);
+ UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
+ MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
+ mod0_w, mod1_w, mod2_w, mod3_w);
+ ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3,
+ mod0_w, mod1_w, mod2_w, mod3_w);
+
+ ST_SW2(mod0_w, mod1_w, acc, 4);
+ acc += 8;
+ ST_SW2(mod2_w, mod3_w, acc, 4);
+ acc += 8;
+ }
+}
+
+static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr,
+ uint32_t stride,
+ uint8_t *frm2_ptr,
+ int32_t filt_sth,
+ int32_t filt_wgt,
+ uint32_t *acc,
+ uint16_t *cnt) {
+ uint32_t row;
+ v16i8 frm1, frm2, frm3, frm4;
+ v16u8 frm_r, frm_l;
+ v16i8 zero = { 0 };
+ v8u16 frm2_r, frm2_l;
+ v8i16 diff0, diff1, mod0_h, mod1_h;
+ v4i32 cnst3, cnst16, filt_wt, strength;
+ v4i32 mod0_w, mod1_w, mod2_w, mod3_w;
+ v4i32 diff0_r, diff0_l, diff1_r, diff1_l;
+ v4i32 frm2_rr, frm2_rl, frm2_lr, frm2_ll;
+ v4i32 acc0, acc1, acc2, acc3;
+ v8i16 cnt0, cnt1;
+
+ filt_wt = __msa_fill_w(filt_wgt);
+ strength = __msa_fill_w(filt_sth);
+ cnst3 = __msa_ldi_w(3);
+ cnst16 = __msa_ldi_w(16);
+
+ for (row = 8; row--;) {
+ LD_SB2(frm1_ptr, stride, frm1, frm3);
+ frm1_ptr += stride;
+
+ LD_SB2(frm2_ptr, 16, frm2, frm4);
+ frm2_ptr += 16;
+
+ LD_SW2(acc, 4, acc0, acc1);
+ LD_SW2(acc, 4, acc2, acc3);
+ LD_SH2(cnt, 8, cnt0, cnt1);
+
+ ILVRL_B2_UB(frm1, frm2, frm_r, frm_l);
+ HSUB_UB2_SH(frm_r, frm_l, diff0, diff1);
+ UNPCK_SH_SW(diff0, diff0_r, diff0_l);
+ UNPCK_SH_SW(diff1, diff1_r, diff1_l);
+ MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
+ mod0_w, mod1_w, mod2_w, mod3_w);
+ MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3,
+ mod0_w, mod1_w, mod2_w, mod3_w);
+ SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
+
+ diff0_r = (mod0_w < cnst16);
+ diff0_l = (mod1_w < cnst16);
+ diff1_r = (mod2_w < cnst16);
+ diff1_l = (mod3_w < cnst16);
+
+ SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w,
+ mod0_w, mod1_w, mod2_w, mod3_w);
+
+ mod0_w = diff0_r & mod0_w;
+ mod1_w = diff0_l & mod1_w;
+ mod2_w = diff1_r & mod2_w;
+ mod3_w = diff1_l & mod3_w;
+
+ MUL4(mod0_w, filt_wt, mod1_w, filt_wt, mod2_w, filt_wt, mod3_w, filt_wt,
+ mod0_w, mod1_w, mod2_w, mod3_w);
+ PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h);
+ ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h);
+ ST_SH2(mod0_h, mod1_h, cnt, 8);
+ cnt += 16;
+
+ ILVRL_B2_UH(zero, frm2, frm2_r, frm2_l);
+ UNPCK_SH_SW(frm2_r, frm2_rr, frm2_rl);
+ UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
+ MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
+ mod0_w, mod1_w, mod2_w, mod3_w);
+ ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3,
+ mod0_w, mod1_w, mod2_w, mod3_w);
+
+ ST_SW2(mod0_w, mod1_w, acc, 4);
+ acc += 8;
+ ST_SW2(mod2_w, mod3_w, acc, 4);
+ acc += 8;
+
+ LD_SW2(acc, 4, acc0, acc1);
+ LD_SW2(acc + 8, 4, acc2, acc3);
+ LD_SH2(cnt, 8, cnt0, cnt1);
+
+ ILVRL_B2_UB(frm3, frm4, frm_r, frm_l);
+ HSUB_UB2_SH(frm_r, frm_l, diff0, diff1);
+ UNPCK_SH_SW(diff0, diff0_r, diff0_l);
+ UNPCK_SH_SW(diff1, diff1_r, diff1_l);
+ MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l,
+ mod0_w, mod1_w, mod2_w, mod3_w);
+ MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3,
+ mod0_w, mod1_w, mod2_w, mod3_w);
+ SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength);
+
+ diff0_r = (mod0_w < cnst16);
+ diff0_l = (mod1_w < cnst16);
+ diff1_r = (mod2_w < cnst16);
+ diff1_l = (mod3_w < cnst16);
+
+ SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w,
+ mod0_w, mod1_w, mod2_w, mod3_w);
+
+ mod0_w = diff0_r & mod0_w;
+ mod1_w = diff0_l & mod1_w;
+ mod2_w = diff1_r & mod2_w;
+ mod3_w = diff1_l & mod3_w;
+
+ MUL4(mod0_w, filt_wt, mod1_w, filt_wt, mod2_w, filt_wt, mod3_w, filt_wt,
+ mod0_w, mod1_w, mod2_w, mod3_w);
+ PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h);
+ ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h);
+ ST_SH2(mod0_h, mod1_h, cnt, 8);
+ cnt += 16;
+
+ ILVRL_B2_UH(zero, frm4, frm2_r, frm2_l);
+ UNPCK_SH_SW(frm2_r, frm2_rr, frm2_rl);
+ UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll);
+ MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll,
+ mod0_w, mod1_w, mod2_w, mod3_w);
+ ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3,
+ mod0_w, mod1_w, mod2_w, mod3_w);
+ ST_SW2(mod0_w, mod1_w, acc, 4);
+ acc += 8;
+ ST_SW2(mod2_w, mod3_w, acc, 4);
+ acc += 8;
+
+ frm1_ptr += stride;
+ frm2_ptr += 16;
+ }
+}
+
+void vp9_temporal_filter_apply_msa(uint8_t *frame1_ptr, uint32_t stride,
+ uint8_t *frame2_ptr, uint32_t blk_w,
+ uint32_t blk_h, int32_t strength,
+ int32_t filt_wgt, uint32_t *accu,
+ uint16_t *cnt) {
+ if (8 == (blk_w * blk_h)) {
+ temporal_filter_apply_8size_msa(frame1_ptr, stride, frame2_ptr,
+ strength, filt_wgt, accu, cnt);
+ } else if (16 == (blk_w * blk_h)) {
+ temporal_filter_apply_16size_msa(frame1_ptr, stride, frame2_ptr,
+ strength, filt_wgt, accu, cnt);
+ } else {
+ vp9_temporal_filter_apply_c(frame1_ptr, stride, frame2_ptr, blk_w, blk_h,
+ strength, filt_wgt, accu, cnt);
+ }
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_complexity.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_complexity.c
index 9622ba1d67e..bea7653d2a7 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_complexity.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_complexity.c
@@ -11,6 +11,7 @@
#include <limits.h>
#include <math.h>
+#include "vp9/encoder/vp9_aq_complexity.h"
#include "vp9/encoder/vp9_aq_variance.h"
#include "vp9/encoder/vp9_encodeframe.h"
#include "vp9/common/vp9_seg_common.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_complexity.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_complexity.h
index c0dce6c5b7d..e9acb1ca504 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_complexity.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_complexity.h
@@ -16,6 +16,8 @@
extern "C" {
#endif
+#include "vp9/common/vp9_enums.h"
+
struct VP9_COMP;
struct macroblock;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c
index 24b427df575..6270bf45293 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -39,6 +39,8 @@ struct CYCLIC_REFRESH {
int rdmult;
// Cyclic refresh map.
signed char *map;
+ // Map of the last q a block was coded at.
+ uint8_t *last_coded_q_map;
// Thresholds applied to the projected rate/distortion of the coding block,
// when deciding whether block should be refreshed.
int64_t thresh_rate_sb;
@@ -48,12 +50,14 @@ struct CYCLIC_REFRESH {
int16_t motion_thresh;
// Rate target ratio to set q delta.
double rate_ratio_qdelta;
+ // Boost factor for rate target ratio, for segment CR_SEGMENT_ID_BOOST2.
+ int rate_boost_fac;
double low_content_avg;
- int qindex_delta_seg1;
- int qindex_delta_seg2;
+ int qindex_delta[3];
};
CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
+ size_t last_coded_q_map_size;
CYCLIC_REFRESH *const cr = vpx_calloc(1, sizeof(*cr));
if (cr == NULL)
return NULL;
@@ -63,12 +67,21 @@ CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
vpx_free(cr);
return NULL;
}
+ last_coded_q_map_size = mi_rows * mi_cols * sizeof(*cr->last_coded_q_map);
+ cr->last_coded_q_map = vpx_malloc(last_coded_q_map_size);
+ if (cr->last_coded_q_map == NULL) {
+ vpx_free(cr);
+ return NULL;
+ }
+ assert(MAXQ <= 255);
+ memset(cr->last_coded_q_map, MAXQ, last_coded_q_map_size);
return cr;
}
void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr) {
vpx_free(cr->map);
+ vpx_free(cr->last_coded_q_map);
vpx_free(cr);
}
@@ -116,7 +129,8 @@ static int candidate_refresh_aq(const CYCLIC_REFRESH *cr,
else if (bsize >= BLOCK_16X16 &&
rate < cr->thresh_rate_sb &&
is_inter_block(mbmi) &&
- mbmi->mv[0].as_int == 0)
+ mbmi->mv[0].as_int == 0 &&
+ cr->rate_boost_fac > 10)
// More aggressive delta-q for bigger blocks with zero motion.
return CR_SEGMENT_ID_BOOST2;
else
@@ -157,11 +171,11 @@ int vp9_cyclic_refresh_estimate_bits_at_q(const VP9_COMP *cpi,
correction_factor, cm->bit_depth) +
weight_segment1 *
vp9_estimate_bits_at_q(cm->frame_type,
- cm->base_qindex + cr->qindex_delta_seg1, mbs,
+ cm->base_qindex + cr->qindex_delta[1], mbs,
correction_factor, cm->bit_depth) +
weight_segment2 *
vp9_estimate_bits_at_q(cm->frame_type,
- cm->base_qindex + cr->qindex_delta_seg2, mbs,
+ cm->base_qindex + cr->qindex_delta[2], mbs,
correction_factor, cm->bit_depth));
return estimated_bits;
}
@@ -246,9 +260,16 @@ void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi,
// copy mbmi->segment_id into global segmentation map.
for (y = 0; y < ymis; y++)
for (x = 0; x < xmis; x++) {
- cr->map[block_index + y * cm->mi_cols + x] = new_map_value;
- cpi->segmentation_map[block_index + y * cm->mi_cols + x] =
- mbmi->segment_id;
+ int map_offset = block_index + y * cm->mi_cols + x;
+ cr->map[map_offset] = new_map_value;
+ cpi->segmentation_map[map_offset] = mbmi->segment_id;
+ // Inter skip blocks were clearly not coded at the current qindex, so
+ // don't update the map for them. For cases where motion is non-zero or
+ // the reference frame isn't the previous frame, the previous value in
+ // the map for this spatial location is not entirely correct.
+ if (!is_inter_block(mbmi) || !skip)
+ cr->last_coded_q_map[map_offset] = clamp(
+ cm->base_qindex + cr->qindex_delta[mbmi->segment_id], 0, MAXQ);
}
}
@@ -357,7 +378,7 @@ void vp9_cyclic_refresh_check_golden_update(VP9_COMP *const cpi) {
// 1/CR_SEGMENT_ID_BOOST1 (refresh) for each superblock.
// Blocks labeled as BOOST1 may later get set to BOOST2 (during the
// encoding of the superblock).
-void vp9_cyclic_refresh_update_map(VP9_COMP *const cpi) {
+static void cyclic_refresh_update_map(VP9_COMP *const cpi) {
VP9_COMMON *const cm = &cpi->common;
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
unsigned char *const seg_map = cpi->segmentation_map;
@@ -382,6 +403,10 @@ void vp9_cyclic_refresh_update_map(VP9_COMP *const cpi) {
int sb_col_index = i - sb_row_index * sb_cols;
int mi_row = sb_row_index * MI_BLOCK_SIZE;
int mi_col = sb_col_index * MI_BLOCK_SIZE;
+ int qindex_thresh =
+ cpi->oxcf.content == VP9E_CONTENT_SCREEN
+ ? vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2, cm->base_qindex)
+ : 0;
assert(mi_row >= 0 && mi_row < cm->mi_rows);
assert(mi_col >= 0 && mi_col < cm->mi_cols);
bl_index = mi_row * cm->mi_cols + mi_col;
@@ -397,7 +422,8 @@ void vp9_cyclic_refresh_update_map(VP9_COMP *const cpi) {
// for possible boost/refresh (segment 1). The segment id may get
// reset to 0 later if block gets coded anything other than ZEROMV.
if (cr->map[bl_index2] == 0) {
- sum_map++;
+ if (cr->last_coded_q_map[bl_index2] > qindex_thresh)
+ sum_map++;
} else if (cr->map[bl_index2] < 0) {
cr->map[bl_index2]++;
}
@@ -420,18 +446,30 @@ void vp9_cyclic_refresh_update_map(VP9_COMP *const cpi) {
cr->sb_index = i;
}
-// Set/update global/frame level cyclic refresh parameters.
+// Set cyclic refresh parameters.
void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) {
const RATE_CONTROL *const rc = &cpi->rc;
+ const VP9_COMMON *const cm = &cpi->common;
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
cr->percent_refresh = 10;
+ cr->max_qdelta_perc = 50;
+ cr->time_for_refresh = 0;
// Use larger delta-qp (increase rate_ratio_qdelta) for first few (~4)
- // periods of the refresh cycle, after a key frame. This corresponds to ~40
- // frames with cr->percent_refresh = 10.
- if (rc->frames_since_key < 40)
+ // periods of the refresh cycle, after a key frame.
+ if (rc->frames_since_key < 4 * cr->percent_refresh)
cr->rate_ratio_qdelta = 3.0;
else
cr->rate_ratio_qdelta = 2.0;
+ // Adjust some parameters for low resolutions at low bitrates.
+ if (cm->width <= 352 &&
+ cm->height <= 288 &&
+ rc->avg_frame_bandwidth < 3400) {
+ cr->motion_thresh = 4;
+ cr->rate_boost_fac = 10;
+ } else {
+ cr->motion_thresh = 32;
+ cr->rate_boost_fac = 17;
+ }
}
// Setup cyclic background refresh: set delta q and segmentation map.
@@ -452,16 +490,17 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
unsigned char *const seg_map = cpi->segmentation_map;
memset(seg_map, 0, cm->mi_rows * cm->mi_cols);
vp9_disable_segmentation(&cm->seg);
- if (cm->frame_type == KEY_FRAME)
+ if (cm->frame_type == KEY_FRAME) {
+ memset(cr->last_coded_q_map, MAXQ,
+ cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map));
cr->sb_index = 0;
+ }
return;
} else {
int qindex_delta = 0;
int qindex2;
const double q = vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth);
vp9_clear_system_state();
- cr->max_qdelta_perc = 50;
- cr->time_for_refresh = 0;
// Set rate threshold to some multiple (set to 2 for now) of the target
// rate (target is given by sb64_target_rate and scaled by 256).
cr->thresh_rate_sb = ((int64_t)(rc->sb64_target_rate) << 8) << 2;
@@ -469,7 +508,7 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
// q will not exceed 457, so (q * q) is within 32bit; see:
// vp9_convert_qindex_to_q(), vp9_ac_quant(), ac_qlookup*[].
cr->thresh_dist_sb = ((int64_t)(q * q)) << 2;
- cr->motion_thresh = 32;
+
// Set up segmentation.
// Clear down the segment map.
vp9_enable_segmentation(&cm->seg);
@@ -493,7 +532,7 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
// Set the q delta for segment BOOST1.
qindex_delta = compute_deltaq(cpi, cm->base_qindex, cr->rate_ratio_qdelta);
- cr->qindex_delta_seg1 = qindex_delta;
+ cr->qindex_delta[1] = qindex_delta;
// Compute rd-mult for segment BOOST1.
qindex2 = clamp(cm->base_qindex + cm->y_dc_delta_q + qindex_delta, 0, MAXQ);
@@ -503,17 +542,24 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
vp9_set_segdata(seg, CR_SEGMENT_ID_BOOST1, SEG_LVL_ALT_Q, qindex_delta);
// Set a more aggressive (higher) q delta for segment BOOST2.
- qindex_delta = compute_deltaq(cpi, cm->base_qindex,
- MIN(CR_MAX_RATE_TARGET_RATIO,
- CR_BOOST2_FAC * cr->rate_ratio_qdelta));
- cr->qindex_delta_seg2 = qindex_delta;
+ qindex_delta = compute_deltaq(
+ cpi, cm->base_qindex, MIN(CR_MAX_RATE_TARGET_RATIO,
+ 0.1 * cr->rate_boost_fac * cr->rate_ratio_qdelta));
+ cr->qindex_delta[2] = qindex_delta;
vp9_set_segdata(seg, CR_SEGMENT_ID_BOOST2, SEG_LVL_ALT_Q, qindex_delta);
// Update the segmentation and refresh map.
- vp9_cyclic_refresh_update_map(cpi);
+ cyclic_refresh_update_map(cpi);
}
}
int vp9_cyclic_refresh_get_rdmult(const CYCLIC_REFRESH *cr) {
return cr->rdmult;
}
+
+void vp9_cyclic_refresh_reset_resize(VP9_COMP *const cpi) {
+ const VP9_COMMON *const cm = &cpi->common;
+ CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
+ memset(cr->map, 0, cm->mi_rows * cm->mi_cols);
+ cr->sb_index = 0;
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h
index 21f114b5e59..29d2a91bc69 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h
@@ -27,9 +27,6 @@ extern "C" {
// Maximum rate target ratio for setting segment delta-qp.
#define CR_MAX_RATE_TARGET_RATIO 4.0
-// Boost factor for rate target ratio, for segment CR_SEGMENT_ID_BOOST2.
-#define CR_BOOST2_FAC 1.7
-
struct VP9_COMP;
struct CYCLIC_REFRESH;
@@ -78,6 +75,8 @@ void vp9_cyclic_refresh_setup(struct VP9_COMP *const cpi);
int vp9_cyclic_refresh_get_rdmult(const CYCLIC_REFRESH *cr);
+void vp9_cyclic_refresh_reset_resize(struct VP9_COMP *const cpi);
+
static INLINE int cyclic_refresh_segment_id_boosted(int segment_id) {
return segment_id == CR_SEGMENT_ID_BOOST1 ||
segment_id == CR_SEGMENT_ID_BOOST2;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_variance.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_variance.c
index be6f7e4ee53..f072717f1d9 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_variance.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_variance.c
@@ -10,6 +10,8 @@
#include <math.h>
+#include "vpx_ports/mem.h"
+
#include "vp9/encoder/vp9_aq_variance.h"
#include "vp9/common/vp9_seg_common.h"
@@ -80,6 +82,61 @@ void vp9_vaq_frame_setup(VP9_COMP *cpi) {
}
}
+/* TODO(agrange, paulwilkins): The block_variance calls the unoptimized versions
+ * of variance() and highbd_8_variance(). It should not.
+ */
+static void aq_variance(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ int w, int h, unsigned int *sse, int *sum) {
+ int i, j;
+
+ *sum = 0;
+ *sse = 0;
+
+ for (i = 0; i < h; i++) {
+ for (j = 0; j < w; j++) {
+ const int diff = a[j] - b[j];
+ *sum += diff;
+ *sse += diff * diff;
+ }
+
+ a += a_stride;
+ b += b_stride;
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void aq_highbd_variance64(const uint8_t *a8, int a_stride,
+ const uint8_t *b8, int b_stride,
+ int w, int h, uint64_t *sse, uint64_t *sum) {
+ int i, j;
+
+ uint16_t *a = CONVERT_TO_SHORTPTR(a8);
+ uint16_t *b = CONVERT_TO_SHORTPTR(b8);
+ *sum = 0;
+ *sse = 0;
+
+ for (i = 0; i < h; i++) {
+ for (j = 0; j < w; j++) {
+ const int diff = a[j] - b[j];
+ *sum += diff;
+ *sse += diff * diff;
+ }
+ a += a_stride;
+ b += b_stride;
+ }
+}
+
+static void aq_highbd_8_variance(const uint8_t *a8, int a_stride,
+ const uint8_t *b8, int b_stride,
+ int w, int h, unsigned int *sse, int *sum) {
+ uint64_t sse_long = 0;
+ uint64_t sum_long = 0;
+ aq_highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
+ *sse = (unsigned int)sse_long;
+ *sum = (int)sum_long;
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
static unsigned int block_variance(VP9_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bs) {
@@ -96,18 +153,18 @@ static unsigned int block_variance(VP9_COMP *cpi, MACROBLOCK *x,
int avg;
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- highbd_variance(x->plane[0].src.buf, x->plane[0].src.stride,
- CONVERT_TO_BYTEPTR(vp9_highbd_64_zeros), 0, bw, bh,
- &sse, &avg);
+ aq_highbd_8_variance(x->plane[0].src.buf, x->plane[0].src.stride,
+ CONVERT_TO_BYTEPTR(vp9_highbd_64_zeros), 0, bw, bh,
+ &sse, &avg);
sse >>= 2 * (xd->bd - 8);
avg >>= (xd->bd - 8);
} else {
- variance(x->plane[0].src.buf, x->plane[0].src.stride,
- vp9_64_zeros, 0, bw, bh, &sse, &avg);
+ aq_variance(x->plane[0].src.buf, x->plane[0].src.stride,
+ vp9_64_zeros, 0, bw, bh, &sse, &avg);
}
#else
- variance(x->plane[0].src.buf, x->plane[0].src.stride,
- vp9_64_zeros, 0, bw, bh, &sse, &avg);
+ aq_variance(x->plane[0].src.buf, x->plane[0].src.stride,
+ vp9_64_zeros, 0, bw, bh, &sse, &avg);
#endif // CONFIG_VP9_HIGHBITDEPTH
var = sse - (((int64_t)avg * avg) / (bw * bh));
return (256 * var) / (bw * bh);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_avg.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_avg.c
index 95b13bb7718..3ef3882d280 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_avg.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_avg.c
@@ -7,6 +7,7 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
+#include "./vp9_rtcd.h"
#include "vp9/common/vp9_common.h"
#include "vpx_ports/mem.h"
@@ -28,6 +29,8 @@ unsigned int vp9_avg_4x4_c(const uint8_t *s, int p) {
return (sum + 8) >> 4;
}
+// src_diff: first pass, 9 bit, dynamic range [-255, 255]
+// second pass, 12 bit, dynamic range [-2040, 2040]
static void hadamard_col8(const int16_t *src_diff, int src_stride,
int16_t *coeff) {
int16_t b0 = src_diff[0 * src_stride] + src_diff[1 * src_stride];
@@ -64,15 +67,18 @@ void vp9_hadamard_8x8_c(int16_t const *src_diff, int src_stride,
int16_t buffer[64];
int16_t *tmp_buf = &buffer[0];
for (idx = 0; idx < 8; ++idx) {
- hadamard_col8(src_diff, src_stride, tmp_buf);
+ hadamard_col8(src_diff, src_stride, tmp_buf); // src_diff: 9 bit
+ // dynamic range [-255, 255]
tmp_buf += 8;
++src_diff;
}
tmp_buf = &buffer[0];
for (idx = 0; idx < 8; ++idx) {
- hadamard_col8(tmp_buf, 8, coeff);
- coeff += 8;
+ hadamard_col8(tmp_buf, 8, coeff); // tmp_buf: 12 bit
+ // dynamic range [-2040, 2040]
+ coeff += 8; // coeff: 15 bit
+ // dynamic range [-16320, 16320]
++tmp_buf;
}
}
@@ -82,58 +88,68 @@ void vp9_hadamard_16x16_c(int16_t const *src_diff, int src_stride,
int16_t *coeff) {
int idx;
for (idx = 0; idx < 4; ++idx) {
+ // src_diff: 9 bit, dynamic range [-255, 255]
int16_t const *src_ptr = src_diff + (idx >> 1) * 8 * src_stride
+ (idx & 0x01) * 8;
vp9_hadamard_8x8_c(src_ptr, src_stride, coeff + idx * 64);
}
+ // coeff: 15 bit, dynamic range [-16320, 16320]
for (idx = 0; idx < 64; ++idx) {
int16_t a0 = coeff[0];
int16_t a1 = coeff[64];
int16_t a2 = coeff[128];
int16_t a3 = coeff[192];
- int16_t b0 = a0 + a1;
- int16_t b1 = a0 - a1;
- int16_t b2 = a2 + a3;
- int16_t b3 = a2 - a3;
+ int16_t b0 = (a0 + a1) >> 1; // (a0 + a1): 16 bit, [-32640, 32640]
+ int16_t b1 = (a0 - a1) >> 1; // b0-b3: 15 bit, dynamic range
+ int16_t b2 = (a2 + a3) >> 1; // [-16320, 16320]
+ int16_t b3 = (a2 - a3) >> 1;
- coeff[0] = (b0 + b2) >> 1;
- coeff[64] = (b1 + b3) >> 1;
- coeff[128] = (b0 - b2) >> 1;
- coeff[192] = (b1 - b3) >> 1;
+ coeff[0] = b0 + b2; // 16 bit, [-32640, 32640]
+ coeff[64] = b1 + b3;
+ coeff[128] = b0 - b2;
+ coeff[192] = b1 - b3;
++coeff;
}
}
+// coeff: 16 bits, dynamic range [-32640, 32640].
+// length: value range {16, 64, 256, 1024}.
int16_t vp9_satd_c(const int16_t *coeff, int length) {
int i;
int satd = 0;
for (i = 0; i < length; ++i)
satd += abs(coeff[i]);
+ // satd: 26 bits, dynamic range [-32640 * 1024, 32640 * 1024]
return (int16_t)satd;
}
// Integer projection onto row vectors.
-void vp9_int_pro_row_c(int16_t *hbuf, uint8_t const *ref,
+// height: value range {16, 32, 64}.
+void vp9_int_pro_row_c(int16_t hbuf[16], uint8_t const *ref,
const int ref_stride, const int height) {
int idx;
- const int norm_factor = MAX(8, height >> 1);
+ const int norm_factor = height >> 1;
for (idx = 0; idx < 16; ++idx) {
int i;
hbuf[idx] = 0;
+ // hbuf[idx]: 14 bit, dynamic range [0, 16320].
for (i = 0; i < height; ++i)
hbuf[idx] += ref[i * ref_stride];
+ // hbuf[idx]: 9 bit, dynamic range [0, 510].
hbuf[idx] /= norm_factor;
++ref;
}
}
+// width: value range {16, 32, 64}.
int16_t vp9_int_pro_col_c(uint8_t const *ref, const int width) {
int idx;
int16_t sum = 0;
+ // sum: 14 bit, dynamic range [0, 16320]
for (idx = 0; idx < width; ++idx)
sum += ref[idx];
return sum;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.c
index d20e067669f..1ebdd066bfe 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.c
@@ -93,7 +93,7 @@ static void write_selected_tx_size(const VP9_COMMON *cm,
static int write_skip(const VP9_COMMON *cm, const MACROBLOCKD *xd,
int segment_id, const MODE_INFO *mi, vp9_writer *w) {
- if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
+ if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
return 1;
} else {
const int skip = mi->mbmi.skip;
@@ -207,10 +207,10 @@ static void write_ref_frames(const VP9_COMMON *cm, const MACROBLOCKD *xd,
// If segment level coding of this signal is disabled...
// or the segment allows multiple reference frame options
- if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
+ if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) {
assert(!is_compound);
assert(mbmi->ref_frame[0] ==
- vp9_get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME));
+ get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME));
} else {
// does the feature use compound prediction or not
// (if not specified at the frame/segment level)
@@ -264,7 +264,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
skip = write_skip(cm, xd, segment_id, mi, w);
- if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
+ if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
vp9_write(w, is_inter, vp9_get_intra_inter_prob(cm, xd));
if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT &&
@@ -293,7 +293,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
write_ref_frames(cm, xd, w);
// If segment skip is not enabled code the mode.
- if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)) {
+ if (!segfeature_active(seg, segment_id, SEG_LVL_SKIP)) {
if (bsize >= BLOCK_8X8) {
write_inter_mode(w, mode, inter_probs);
}
@@ -403,7 +403,7 @@ static void write_partition(const VP9_COMMON *const cm,
int hbs, int mi_row, int mi_col,
PARTITION_TYPE p, BLOCK_SIZE bsize, vp9_writer *w) {
const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
- const vp9_prob *const probs = get_partition_probs(cm, ctx);
+ const vp9_prob *const probs = xd->partition_probs[ctx];
const int has_rows = (mi_row + hbs) < cm->mi_rows;
const int has_cols = (mi_col + hbs) < cm->mi_cols;
@@ -481,9 +481,12 @@ static void write_modes_sb(VP9_COMP *cpi,
static void write_modes(VP9_COMP *cpi,
const TileInfo *const tile, vp9_writer *w,
TOKENEXTRA **tok, const TOKENEXTRA *const tok_end) {
+ const VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
int mi_row, mi_col;
+ set_partition_probs(cm, xd);
+
for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end;
mi_row += MI_BLOCK_SIZE) {
vp9_zero(xd->left_seg_context);
@@ -787,10 +790,10 @@ static void encode_segmentation(VP9_COMMON *cm, MACROBLOCKD *xd,
for (i = 0; i < MAX_SEGMENTS; i++) {
for (j = 0; j < SEG_LVL_MAX; j++) {
- const int active = vp9_segfeature_active(seg, i, j);
+ const int active = segfeature_active(seg, i, j);
vp9_wb_write_bit(wb, active);
if (active) {
- const int data = vp9_get_segdata(seg, i, j);
+ const int data = get_segdata(seg, i, j);
const int data_max = vp9_seg_feature_data_max(j);
if (vp9_is_segfeature_signed(j)) {
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_dct.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_dct.c
index 9e6ca3d594c..414d2bb1502 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_dct.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_dct.c
@@ -14,6 +14,7 @@
#include "./vpx_config.h"
#include "./vp9_rtcd.h"
+#include "vpx_ports/mem.h"
#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_idct.h"
#include "vp9/common/vp9_systemdependent.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c
index 0e74784e9b6..b4059752ee7 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c
@@ -13,8 +13,10 @@
#include <stdio.h>
#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
#include "./vpx_config.h"
+#include "vpx_ports/mem.h"
#include "vpx_ports/vpx_timer.h"
#include "vp9/common/vp9_common.h"
@@ -360,7 +362,7 @@ static void get_variance(var *v) {
((v->sum_error * v->sum_error) >> v->log2_count)) >> v->log2_count);
}
-void sum_2_variances(const var *a, const var *b, var *r) {
+static void sum_2_variances(const var *a, const var *b, var *r) {
assert(a->log2_count == b->log2_count);
fill_variance(a->sum_square_error + b->sum_square_error,
a->sum_error + b->sum_error, a->log2_count + 1, r);
@@ -368,6 +370,7 @@ void sum_2_variances(const var *a, const var *b, var *r) {
static void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
variance_node node;
+ memset(&node, 0, sizeof(node));
tree_to_node(data, bsize, &node);
sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]);
sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]);
@@ -462,46 +465,55 @@ static int set_vt_partitioning(VP9_COMP *cpi,
return 0;
}
-void vp9_set_vbp_thresholds(VP9_COMP *cpi, int q) {
+// Set the variance split thresholds for following the block sizes:
+// 0 - threshold_64x64, 1 - threshold_32x32, 2 - threshold_16x16,
+// 3 - vbp_threshold_8x8. vbp_threshold_8x8 (to split to 4x4 partition) is
+// currently only used on key frame.
+static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q) {
+ VP9_COMMON *const cm = &cpi->common;
+ const int is_key_frame = (cm->frame_type == KEY_FRAME);
+ const int threshold_multiplier = is_key_frame ? 20 : 1;
+ const int64_t threshold_base = (int64_t)(threshold_multiplier *
+ cpi->y_dequant[q][1]);
+ if (is_key_frame) {
+ thresholds[0] = threshold_base;
+ thresholds[1] = threshold_base >> 2;
+ thresholds[2] = threshold_base >> 2;
+ thresholds[3] = threshold_base << 2;
+ } else {
+ thresholds[1] = threshold_base;
+ if (cm->width <= 352 && cm->height <= 288) {
+ thresholds[0] = threshold_base >> 2;
+ thresholds[2] = threshold_base << 3;
+ } else {
+ thresholds[0] = threshold_base;
+ thresholds[1] = (5 * threshold_base) >> 2;
+ if (cm->width >= 1920 && cm->height >= 1080)
+ thresholds[1] = (7 * threshold_base) >> 2;
+ thresholds[2] = threshold_base << cpi->oxcf.speed;
+ }
+ }
+}
+
+void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q) {
+ VP9_COMMON *const cm = &cpi->common;
SPEED_FEATURES *const sf = &cpi->sf;
+ const int is_key_frame = (cm->frame_type == KEY_FRAME);
if (sf->partition_search_type != VAR_BASED_PARTITION &&
sf->partition_search_type != REFERENCE_PARTITION) {
return;
} else {
- VP9_COMMON *const cm = &cpi->common;
- const int is_key_frame = (cm->frame_type == KEY_FRAME);
- const int threshold_multiplier = is_key_frame ? 20 : 1;
- const int64_t threshold_base = (int64_t)(threshold_multiplier *
- cpi->y_dequant[q][1]);
-
- // TODO(marpan): Allow 4x4 partitions for inter-frames.
- // use_4x4_partition = (variance4x4downsample[i2 + j] == 1);
- // If 4x4 partition is not used, then 8x8 partition will be selected
- // if variance of 16x16 block is very high, so use larger threshold
- // for 16x16 (threshold_bsize_min) in that case.
-
- // Array index: 0 - threshold_64x64; 1 - threshold_32x32;
- // 2 - threshold_16x16; 3 - vbp_threshold_8x8;
+ set_vbp_thresholds(cpi, cpi->vbp_thresholds, q);
+ // The thresholds below are not changed locally.
if (is_key_frame) {
- cpi->vbp_thresholds[0] = threshold_base;
- cpi->vbp_thresholds[1] = threshold_base >> 2;
- cpi->vbp_thresholds[2] = threshold_base >> 2;
- cpi->vbp_thresholds[3] = threshold_base << 2;
cpi->vbp_threshold_sad = 0;
cpi->vbp_bsize_min = BLOCK_8X8;
} else {
- cpi->vbp_thresholds[1] = threshold_base;
- if (cm->width <= 352 && cm->height <= 288) {
- cpi->vbp_thresholds[0] = threshold_base >> 2;
- cpi->vbp_thresholds[2] = threshold_base << 3;
+ if (cm->width <= 352 && cm->height <= 288)
cpi->vbp_threshold_sad = 100;
- } else {
- cpi->vbp_thresholds[0] = threshold_base;
- cpi->vbp_thresholds[1] = (5 * threshold_base) >> 2;
- cpi->vbp_thresholds[2] = threshold_base << cpi->oxcf.speed;
+ else
cpi->vbp_threshold_sad = (cpi->y_dequant[q][1] << 1) > 1000 ?
(cpi->y_dequant[q][1] << 1) : 1000;
- }
cpi->vbp_bsize_min = BLOCK_16X16;
}
cpi->vbp_threshold_minmax = 15 + (q >> 3);
@@ -550,23 +562,6 @@ static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d,
return (minmax_max - minmax_min);
}
-static void modify_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q) {
- VP9_COMMON *const cm = &cpi->common;
- const int64_t threshold_base = (int64_t)(cpi->y_dequant[q][1]);
-
- // Array index: 0 - threshold_64x64; 1 - threshold_32x32;
- // 2 - threshold_16x16; 3 - vbp_threshold_8x8;
- thresholds[1] = threshold_base;
- if (cm->width <= 352 && cm->height <= 288) {
- thresholds[0] = threshold_base >> 2;
- thresholds[2] = threshold_base << 3;
- } else {
- thresholds[0] = threshold_base;
- thresholds[1] = (5 * threshold_base) >> 2;
- thresholds[2] = threshold_base << cpi->oxcf.speed;
- }
-}
-
static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d,
int dp, int x8_idx, int y8_idx, v8x8 *vst,
#if CONFIG_VP9_HIGHBITDEPTH
@@ -679,7 +674,7 @@ static int choose_partitioning(VP9_COMP *cpi,
if (cyclic_refresh_segment_id_boosted(segment_id)) {
int q = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
- modify_vbp_thresholds(cpi, thresholds, q);
+ set_vbp_thresholds(cpi, thresholds, q);
}
}
@@ -693,17 +688,28 @@ static int choose_partitioning(VP9_COMP *cpi,
s = x->plane[0].src.buf;
sp = x->plane[0].src.stride;
- if (!is_key_frame) {
+ if (!is_key_frame && !(is_one_pass_cbr_svc(cpi) &&
+ cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) {
+ // In the case of spatial/temporal scalable coding, the assumption here is
+ // that the temporal reference frame will always be of type LAST_FRAME.
+ // TODO(marpan): If that assumption is broken, we need to revisit this code.
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
unsigned int uv_sad;
const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
- const YV12_BUFFER_CONFIG *yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
+ const YV12_BUFFER_CONFIG *yv12_g = NULL;
unsigned int y_sad, y_sad_g;
const BLOCK_SIZE bsize = BLOCK_32X32
+ (mi_col + 4 < cm->mi_cols) * 2 + (mi_row + 4 < cm->mi_rows);
assert(yv12 != NULL);
+
+ if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id)) {
+ // For now, GOLDEN will not be used for non-zero spatial layers, since
+ // it may not be a temporal reference.
+ yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
+ }
+
if (yv12_g && yv12_g != yv12) {
vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
&cm->frame_refs[GOLDEN_FRAME - 1].sf);
@@ -723,7 +729,7 @@ static int choose_partitioning(VP9_COMP *cpi,
mbmi->mv[0].as_int = 0;
mbmi->interp_filter = BILINEAR;
- y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize);
+ y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col);
if (y_sad_g < y_sad) {
vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
&cm->frame_refs[GOLDEN_FRAME - 1].sf);
@@ -838,7 +844,9 @@ static int choose_partitioning(VP9_COMP *cpi,
}
}
}
- if (is_key_frame || (low_res &&
+ // TODO(marpan): There is an issue with variance based on 4x4 average in
+ // svc mode, don't allow it for now.
+ if (is_key_frame || (low_res && !cpi->use_svc &&
vt.split[i].split[j].part_variances.none.variance >
(thresholds[1] << 1))) {
force_split[split_index] = 0;
@@ -1043,7 +1051,7 @@ static void update_state(VP9_COMP *cpi, ThreadData *td,
if (!output_enabled)
return;
- if (!vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ if (!segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
for (i = 0; i < TX_MODES; i++)
rdc->tx_select_diff[i] += ctx->tx_rd_diff[i];
}
@@ -1240,7 +1248,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi,
vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd);
} else {
if (bsize >= BLOCK_8X8) {
- if (vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
+ if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
vp9_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, rd_cost, bsize,
ctx, best_rd);
else
@@ -1283,8 +1291,8 @@ static void update_stats(VP9_COMMON *cm, ThreadData *td) {
if (!frame_is_intra_only(cm)) {
FRAME_COUNTS *const counts = td->counts;
const int inter_block = is_inter_block(mbmi);
- const int seg_ref_active = vp9_segfeature_active(&cm->seg, mbmi->segment_id,
- SEG_LVL_REF_FRAME);
+ const int seg_ref_active = segfeature_active(&cm->seg, mbmi->segment_id,
+ SEG_LVL_REF_FRAME);
if (!seg_ref_active) {
counts->intra_inter[vp9_get_intra_inter_context(xd)][inter_block]++;
// If the segment reference feature is enabled we have only a single
@@ -1309,7 +1317,7 @@ static void update_stats(VP9_COMMON *cm, ThreadData *td) {
}
}
if (inter_block &&
- !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
+ !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
const int mode_ctx = mbmi->mode_context[mbmi->ref_frame[0]];
if (bsize >= BLOCK_8X8) {
const PREDICTION_MODE mode = mbmi->mode;
@@ -1556,7 +1564,7 @@ static void set_fixed_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
}
}
-const struct {
+static const struct {
int row;
int col;
} coord_lookup[16] = {
@@ -2212,66 +2220,6 @@ static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile,
*max_block_size = max_size;
}
-static void auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile,
- MACROBLOCKD *const xd,
- int mi_row, int mi_col,
- BLOCK_SIZE *min_block_size,
- BLOCK_SIZE *max_block_size) {
- VP9_COMMON *const cm = &cpi->common;
- MODE_INFO **mi_8x8 = xd->mi;
- const int left_in_image = xd->left_available && mi_8x8[-1];
- const int above_in_image = xd->up_available && mi_8x8[-xd->mi_stride];
- int row8x8_remaining = tile->mi_row_end - mi_row;
- int col8x8_remaining = tile->mi_col_end - mi_col;
- int bh, bw;
- BLOCK_SIZE min_size = BLOCK_32X32;
- BLOCK_SIZE max_size = BLOCK_8X8;
- int bsl = mi_width_log2_lookup[BLOCK_64X64];
- const int search_range_ctrl = (((mi_row + mi_col) >> bsl) +
- get_chessboard_index(cm->current_video_frame)) & 0x1;
- // Trap case where we do not have a prediction.
- if (search_range_ctrl &&
- (left_in_image || above_in_image || cm->frame_type != KEY_FRAME)) {
- int block;
- MODE_INFO **mi;
- BLOCK_SIZE sb_type;
-
- // Find the min and max partition sizes used in the left SB64.
- if (left_in_image) {
- MODE_INFO *cur_mi;
- mi = &mi_8x8[-1];
- for (block = 0; block < MI_BLOCK_SIZE; ++block) {
- cur_mi = mi[block * xd->mi_stride];
- sb_type = cur_mi ? cur_mi->mbmi.sb_type : 0;
- min_size = MIN(min_size, sb_type);
- max_size = MAX(max_size, sb_type);
- }
- }
- // Find the min and max partition sizes used in the above SB64.
- if (above_in_image) {
- mi = &mi_8x8[-xd->mi_stride * MI_BLOCK_SIZE];
- for (block = 0; block < MI_BLOCK_SIZE; ++block) {
- sb_type = mi[block] ? mi[block]->mbmi.sb_type : 0;
- min_size = MIN(min_size, sb_type);
- max_size = MAX(max_size, sb_type);
- }
- }
-
- min_size = min_partition_size[min_size];
- max_size = find_partition_size(max_size, row8x8_remaining, col8x8_remaining,
- &bh, &bw);
- min_size = MIN(min_size, max_size);
- min_size = MAX(min_size, BLOCK_8X8);
- max_size = MIN(max_size, BLOCK_32X32);
- } else {
- min_size = BLOCK_8X8;
- max_size = BLOCK_32X32;
- }
-
- *min_block_size = min_size;
- *max_block_size = max_size;
-}
-
// TODO(jingning) refactor functions setting partition search range
static void set_partition_range(VP9_COMMON *cm, MACROBLOCKD *xd,
int mi_row, int mi_col, BLOCK_SIZE bsize,
@@ -2841,7 +2789,7 @@ static void encode_rd_sb_row(VP9_COMP *cpi,
const uint8_t *const map = seg->update_map ? cpi->segmentation_map
: cm->last_frame_seg_map;
int segment_id = vp9_get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col);
- seg_skip = vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP);
+ seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
}
x->source_variance = UINT_MAX;
@@ -2901,7 +2849,7 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) {
static int check_dual_ref_flags(VP9_COMP *cpi) {
const int ref_flags = cpi->ref_frame_flags;
- if (vp9_segfeature_active(&cpi->common.seg, 1, SEG_LVL_REF_FRAME)) {
+ if (segfeature_active(&cpi->common.seg, 1, SEG_LVL_REF_FRAME)) {
return 0;
} else {
return (!!(ref_flags & VP9_GOLD_FLAG) + !!(ref_flags & VP9_LAST_FLAG)
@@ -2937,8 +2885,7 @@ static TX_MODE select_tx_mode(const VP9_COMP *cpi, MACROBLOCKD *const xd) {
if (xd->lossless)
return ONLY_4X4;
if (cpi->common.frame_type == KEY_FRAME &&
- cpi->sf.use_nonrd_pick_mode &&
- cpi->sf.partition_search_type == VAR_BASED_PARTITION)
+ cpi->sf.use_nonrd_pick_mode)
return ALLOW_16X16;
if (cpi->sf.tx_size_search_method == USE_LARGESTALL)
return ALLOW_32X32;
@@ -2976,7 +2923,7 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi,
if (cm->frame_type == KEY_FRAME)
hybrid_intra_mode_search(cpi, x, rd_cost, bsize, ctx);
- else if (vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
+ else if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
set_mode_info_seg_skip(x, cm->tx_mode, rd_cost, bsize);
else if (bsize >= BLOCK_8X8)
vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col,
@@ -3591,7 +3538,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi,
const uint8_t *const map = seg->update_map ? cpi->segmentation_map
: cm->last_frame_seg_map;
int segment_id = vp9_get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col);
- seg_skip = vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP);
+ seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
if (seg_skip) {
partition_search_type = FIXED_PARTITION;
}
@@ -3624,15 +3571,26 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi,
set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
xd->mi[0]->mbmi.segment_id) {
- x->max_partition_size = BLOCK_64X64;
+ // Use lower max_partition_size for low resoultions.
+ if (cm->width <= 352 && cm->height <= 288)
+ x->max_partition_size = BLOCK_32X32;
+ else
+ x->max_partition_size = BLOCK_64X64;
x->min_partition_size = BLOCK_8X8;
nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
BLOCK_64X64, &dummy_rdc, 1,
INT64_MAX, td->pc_root);
} else {
choose_partitioning(cpi, tile_info, x, mi_row, mi_col);
- nonrd_select_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
- BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
+ // TODO(marpan): Seems like nonrd_select_partition does not support
+ // 4x4 partition. Since 4x4 is used on key frame, use this switch
+ // for now.
+ if (cm->frame_type == KEY_FRAME)
+ nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
+ BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
+ else
+ nonrd_select_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
+ BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
}
break;
@@ -3671,15 +3629,15 @@ static int set_var_thresh_from_histogram(VP9_COMP *cpi) {
if (cm->use_highbitdepth) {
switch (cm->bit_depth) {
case VPX_BITS_8:
- vp9_highbd_get16x16var(src, src_stride, last_src, last_stride,
+ vpx_highbd_8_get16x16var(src, src_stride, last_src, last_stride,
&var16->sse, &var16->sum);
break;
case VPX_BITS_10:
- vp9_highbd_10_get16x16var(src, src_stride, last_src, last_stride,
+ vpx_highbd_10_get16x16var(src, src_stride, last_src, last_stride,
&var16->sse, &var16->sum);
break;
case VPX_BITS_12:
- vp9_highbd_12_get16x16var(src, src_stride, last_src, last_stride,
+ vpx_highbd_12_get16x16var(src, src_stride, last_src, last_stride,
&var16->sse, &var16->sum);
break;
default:
@@ -3688,11 +3646,11 @@ static int set_var_thresh_from_histogram(VP9_COMP *cpi) {
return -1;
}
} else {
- vp9_get16x16var(src, src_stride, last_src, last_stride,
+ vpx_get16x16var(src, src_stride, last_src, last_stride,
&var16->sse, &var16->sum);
}
#else
- vp9_get16x16var(src, src_stride, last_src, last_stride,
+ vpx_get16x16var(src, src_stride, last_src, last_stride,
&var16->sse, &var16->sum);
#endif // CONFIG_VP9_HIGHBITDEPTH
var16->var = var16->sse -
@@ -3778,9 +3736,13 @@ void vp9_init_tile_data(VP9_COMP *cpi) {
TOKENEXTRA *pre_tok = cpi->tile_tok[0][0];
int tile_tok = 0;
- if (cpi->tile_data == NULL) {
+ if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) {
+ if (cpi->tile_data != NULL)
+ vpx_free(cpi->tile_data);
CHECK_MEM_ERROR(cm, cpi->tile_data,
vpx_malloc(tile_cols * tile_rows * sizeof(*cpi->tile_data)));
+ cpi->allocated_tiles = tile_cols * tile_rows;
+
for (tile_row = 0; tile_row < tile_rows; ++tile_row)
for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
TileDataEnc *tile_data =
@@ -4149,8 +4111,8 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td,
MODE_INFO **mi_8x8 = xd->mi;
MODE_INFO *mi = mi_8x8[0];
MB_MODE_INFO *mbmi = &mi->mbmi;
- const int seg_skip = vp9_segfeature_active(&cm->seg, mbmi->segment_id,
- SEG_LVL_SKIP);
+ const int seg_skip = segfeature_active(&cm->seg, mbmi->segment_id,
+ SEG_LVL_SKIP);
const int mis = cm->mi_stride;
const int mi_width = num_8x8_blocks_wide_lookup[bsize];
const int mi_height = num_8x8_blocks_high_lookup[bsize];
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.h
index 1acde0283e9..6aaa56463b0 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.h
@@ -40,7 +40,7 @@ void vp9_init_tile_data(struct VP9_COMP *cpi);
void vp9_encode_tile(struct VP9_COMP *cpi, struct ThreadData *td,
int tile_row, int tile_col);
-void vp9_set_vbp_thresholds(struct VP9_COMP *cpi, int q);
+void vp9_set_variance_partition_thresholds(struct VP9_COMP *cpi, int q);
#ifdef __cplusplus
} // extern "C"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemb.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemb.c
index 9a4e61ec882..2829365e533 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemb.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemb.c
@@ -13,10 +13,12 @@
#include "./vpx_config.h"
#include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
#include "vp9/common/vp9_idct.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
+#include "vp9/common/vp9_scan.h"
#include "vp9/common/vp9_systemdependent.h"
#include "vp9/encoder/vp9_encodemb.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.c
index af73fcbdcc3..22759983ffa 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.c
@@ -22,7 +22,7 @@ static struct vp9_token mv_class_encodings[MV_CLASSES];
static struct vp9_token mv_fp_encodings[MV_FP_SIZE];
static struct vp9_token mv_class0_encodings[CLASS0_SIZE];
-void vp9_entropy_mv_init() {
+void vp9_entropy_mv_init(void) {
vp9_tokens_from_tree(mv_joint_encodings, vp9_mv_joint_tree);
vp9_tokens_from_tree(mv_class_encodings, vp9_mv_class_tree);
vp9_tokens_from_tree(mv_class0_encodings, vp9_mv_class0_tree);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.h
index 0ae473749ab..e8ee5ab6641 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.h
@@ -18,7 +18,7 @@
extern "C" {
#endif
-void vp9_entropy_mv_init();
+void vp9_entropy_mv_init(void);
void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vp9_writer *w,
nmv_context_counts *const counts);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c
index a1018adb88f..d708b83197b 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c
@@ -17,6 +17,7 @@
#include "./vpx_dsp_rtcd.h"
#include "./vpx_scale_rtcd.h"
#include "vpx/internal/vpx_psnr.h"
+#include "vpx_ports/mem.h"
#include "vpx_ports/vpx_timer.h"
#include "vp9/common/vp9_alloccommon.h"
@@ -111,7 +112,7 @@ static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) {
// Mark all inactive blocks as active. Other segmentation features may be set
// so memset cannot be used, instead only inactive blocks should be reset.
-void vp9_suppress_active_map(VP9_COMP *cpi) {
+static void suppress_active_map(VP9_COMP *cpi) {
unsigned char *const seg_map = cpi->segmentation_map;
int i;
if (cpi->active_map.enabled || cpi->active_map.update)
@@ -120,7 +121,7 @@ void vp9_suppress_active_map(VP9_COMP *cpi) {
seg_map[i] = AM_SEGMENT_ID_ACTIVE;
}
-void vp9_apply_active_map(VP9_COMP *cpi) {
+static void apply_active_map(VP9_COMP *cpi) {
struct segmentation *const seg = &cpi->common.seg;
unsigned char *const seg_map = cpi->segmentation_map;
const unsigned char *const active_map = cpi->active_map.map;
@@ -685,6 +686,29 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) {
vp9_setup_pc_tree(&cpi->common, &cpi->td);
}
+void vp9_new_framerate(VP9_COMP *cpi, double framerate) {
+ cpi->framerate = framerate < 0.1 ? 30 : framerate;
+ vp9_rc_update_framerate(cpi);
+}
+
+static void set_tile_limits(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+
+ int min_log2_tile_cols, max_log2_tile_cols;
+ vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
+
+ if (is_two_pass_svc(cpi) &&
+ (cpi->svc.encode_empty_frame_state == ENCODING ||
+ cpi->svc.number_spatial_layers > 1)) {
+ cm->log2_tile_cols = 0;
+ cm->log2_tile_rows = 0;
+ } else {
+ cm->log2_tile_cols = clamp(cpi->oxcf.tile_columns,
+ min_log2_tile_cols, max_log2_tile_cols);
+ cm->log2_tile_rows = cpi->oxcf.tile_rows;
+ }
+}
+
static void update_frame_size(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
@@ -693,6 +717,8 @@ static void update_frame_size(VP9_COMP *cpi) {
vp9_init_context_buffers(cm);
init_macroblockd(cm, xd);
+ set_tile_limits(cpi);
+
if (is_two_pass_svc(cpi)) {
if (vp9_realloc_frame_buffer(&cpi->alt_ref_buffer,
cm->width, cm->height,
@@ -707,27 +733,6 @@ static void update_frame_size(VP9_COMP *cpi) {
}
}
-void vp9_new_framerate(VP9_COMP *cpi, double framerate) {
- cpi->framerate = framerate < 0.1 ? 30 : framerate;
- vp9_rc_update_framerate(cpi);
-}
-
-static void set_tile_limits(VP9_COMP *cpi) {
- VP9_COMMON *const cm = &cpi->common;
-
- int min_log2_tile_cols, max_log2_tile_cols;
- vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
-
- if (is_two_pass_svc(cpi) && cpi->svc.encode_empty_frame_state == ENCODING) {
- cm->log2_tile_cols = 0;
- cm->log2_tile_rows = 0;
- } else {
- cm->log2_tile_cols = clamp(cpi->oxcf.tile_columns,
- min_log2_tile_cols, max_log2_tile_cols);
- cm->log2_tile_rows = cpi->oxcf.tile_rows;
- }
-}
-
static void init_buffer_indices(VP9_COMP *cpi) {
cpi->lst_fb_idx = 0;
cpi->gld_fb_idx = 1;
@@ -751,6 +756,8 @@ static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) {
cm->height = oxcf->height;
vp9_alloc_compressor_data(cpi);
+ cpi->svc.temporal_layering_mode = oxcf->temporal_layering_mode;
+
// Single thread case: use counts in common.
cpi->td.counts = &cm->counts;
@@ -995,7 +1002,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_32X16,
vpx_highbd_sad32x16_bits8,
vpx_highbd_sad32x16_avg_bits8,
- vp9_highbd_variance32x16,
+ vpx_highbd_8_variance32x16,
vp9_highbd_sub_pixel_variance32x16,
vp9_highbd_sub_pixel_avg_variance32x16,
NULL,
@@ -1005,7 +1012,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_16X32,
vpx_highbd_sad16x32_bits8,
vpx_highbd_sad16x32_avg_bits8,
- vp9_highbd_variance16x32,
+ vpx_highbd_8_variance16x32,
vp9_highbd_sub_pixel_variance16x32,
vp9_highbd_sub_pixel_avg_variance16x32,
NULL,
@@ -1015,7 +1022,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_64X32,
vpx_highbd_sad64x32_bits8,
vpx_highbd_sad64x32_avg_bits8,
- vp9_highbd_variance64x32,
+ vpx_highbd_8_variance64x32,
vp9_highbd_sub_pixel_variance64x32,
vp9_highbd_sub_pixel_avg_variance64x32,
NULL,
@@ -1025,7 +1032,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_32X64,
vpx_highbd_sad32x64_bits8,
vpx_highbd_sad32x64_avg_bits8,
- vp9_highbd_variance32x64,
+ vpx_highbd_8_variance32x64,
vp9_highbd_sub_pixel_variance32x64,
vp9_highbd_sub_pixel_avg_variance32x64,
NULL,
@@ -1035,7 +1042,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_32X32,
vpx_highbd_sad32x32_bits8,
vpx_highbd_sad32x32_avg_bits8,
- vp9_highbd_variance32x32,
+ vpx_highbd_8_variance32x32,
vp9_highbd_sub_pixel_variance32x32,
vp9_highbd_sub_pixel_avg_variance32x32,
vpx_highbd_sad32x32x3_bits8,
@@ -1045,7 +1052,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_64X64,
vpx_highbd_sad64x64_bits8,
vpx_highbd_sad64x64_avg_bits8,
- vp9_highbd_variance64x64,
+ vpx_highbd_8_variance64x64,
vp9_highbd_sub_pixel_variance64x64,
vp9_highbd_sub_pixel_avg_variance64x64,
vpx_highbd_sad64x64x3_bits8,
@@ -1055,7 +1062,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_16X16,
vpx_highbd_sad16x16_bits8,
vpx_highbd_sad16x16_avg_bits8,
- vp9_highbd_variance16x16,
+ vpx_highbd_8_variance16x16,
vp9_highbd_sub_pixel_variance16x16,
vp9_highbd_sub_pixel_avg_variance16x16,
vpx_highbd_sad16x16x3_bits8,
@@ -1065,7 +1072,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_16X8,
vpx_highbd_sad16x8_bits8,
vpx_highbd_sad16x8_avg_bits8,
- vp9_highbd_variance16x8,
+ vpx_highbd_8_variance16x8,
vp9_highbd_sub_pixel_variance16x8,
vp9_highbd_sub_pixel_avg_variance16x8,
vpx_highbd_sad16x8x3_bits8,
@@ -1075,7 +1082,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_8X16,
vpx_highbd_sad8x16_bits8,
vpx_highbd_sad8x16_avg_bits8,
- vp9_highbd_variance8x16,
+ vpx_highbd_8_variance8x16,
vp9_highbd_sub_pixel_variance8x16,
vp9_highbd_sub_pixel_avg_variance8x16,
vpx_highbd_sad8x16x3_bits8,
@@ -1085,7 +1092,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_8X8,
vpx_highbd_sad8x8_bits8,
vpx_highbd_sad8x8_avg_bits8,
- vp9_highbd_variance8x8,
+ vpx_highbd_8_variance8x8,
vp9_highbd_sub_pixel_variance8x8,
vp9_highbd_sub_pixel_avg_variance8x8,
vpx_highbd_sad8x8x3_bits8,
@@ -1095,7 +1102,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_8X4,
vpx_highbd_sad8x4_bits8,
vpx_highbd_sad8x4_avg_bits8,
- vp9_highbd_variance8x4,
+ vpx_highbd_8_variance8x4,
vp9_highbd_sub_pixel_variance8x4,
vp9_highbd_sub_pixel_avg_variance8x4,
NULL,
@@ -1105,7 +1112,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_4X8,
vpx_highbd_sad4x8_bits8,
vpx_highbd_sad4x8_avg_bits8,
- vp9_highbd_variance4x8,
+ vpx_highbd_8_variance4x8,
vp9_highbd_sub_pixel_variance4x8,
vp9_highbd_sub_pixel_avg_variance4x8,
NULL,
@@ -1115,7 +1122,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_4X4,
vpx_highbd_sad4x4_bits8,
vpx_highbd_sad4x4_avg_bits8,
- vp9_highbd_variance4x4,
+ vpx_highbd_8_variance4x4,
vp9_highbd_sub_pixel_variance4x4,
vp9_highbd_sub_pixel_avg_variance4x4,
vpx_highbd_sad4x4x3_bits8,
@@ -1127,7 +1134,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_32X16,
vpx_highbd_sad32x16_bits10,
vpx_highbd_sad32x16_avg_bits10,
- vp9_highbd_10_variance32x16,
+ vpx_highbd_10_variance32x16,
vp9_highbd_10_sub_pixel_variance32x16,
vp9_highbd_10_sub_pixel_avg_variance32x16,
NULL,
@@ -1137,7 +1144,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_16X32,
vpx_highbd_sad16x32_bits10,
vpx_highbd_sad16x32_avg_bits10,
- vp9_highbd_10_variance16x32,
+ vpx_highbd_10_variance16x32,
vp9_highbd_10_sub_pixel_variance16x32,
vp9_highbd_10_sub_pixel_avg_variance16x32,
NULL,
@@ -1147,7 +1154,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_64X32,
vpx_highbd_sad64x32_bits10,
vpx_highbd_sad64x32_avg_bits10,
- vp9_highbd_10_variance64x32,
+ vpx_highbd_10_variance64x32,
vp9_highbd_10_sub_pixel_variance64x32,
vp9_highbd_10_sub_pixel_avg_variance64x32,
NULL,
@@ -1157,7 +1164,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_32X64,
vpx_highbd_sad32x64_bits10,
vpx_highbd_sad32x64_avg_bits10,
- vp9_highbd_10_variance32x64,
+ vpx_highbd_10_variance32x64,
vp9_highbd_10_sub_pixel_variance32x64,
vp9_highbd_10_sub_pixel_avg_variance32x64,
NULL,
@@ -1167,7 +1174,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_32X32,
vpx_highbd_sad32x32_bits10,
vpx_highbd_sad32x32_avg_bits10,
- vp9_highbd_10_variance32x32,
+ vpx_highbd_10_variance32x32,
vp9_highbd_10_sub_pixel_variance32x32,
vp9_highbd_10_sub_pixel_avg_variance32x32,
vpx_highbd_sad32x32x3_bits10,
@@ -1177,7 +1184,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_64X64,
vpx_highbd_sad64x64_bits10,
vpx_highbd_sad64x64_avg_bits10,
- vp9_highbd_10_variance64x64,
+ vpx_highbd_10_variance64x64,
vp9_highbd_10_sub_pixel_variance64x64,
vp9_highbd_10_sub_pixel_avg_variance64x64,
vpx_highbd_sad64x64x3_bits10,
@@ -1187,7 +1194,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_16X16,
vpx_highbd_sad16x16_bits10,
vpx_highbd_sad16x16_avg_bits10,
- vp9_highbd_10_variance16x16,
+ vpx_highbd_10_variance16x16,
vp9_highbd_10_sub_pixel_variance16x16,
vp9_highbd_10_sub_pixel_avg_variance16x16,
vpx_highbd_sad16x16x3_bits10,
@@ -1197,7 +1204,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_16X8,
vpx_highbd_sad16x8_bits10,
vpx_highbd_sad16x8_avg_bits10,
- vp9_highbd_10_variance16x8,
+ vpx_highbd_10_variance16x8,
vp9_highbd_10_sub_pixel_variance16x8,
vp9_highbd_10_sub_pixel_avg_variance16x8,
vpx_highbd_sad16x8x3_bits10,
@@ -1207,7 +1214,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_8X16,
vpx_highbd_sad8x16_bits10,
vpx_highbd_sad8x16_avg_bits10,
- vp9_highbd_10_variance8x16,
+ vpx_highbd_10_variance8x16,
vp9_highbd_10_sub_pixel_variance8x16,
vp9_highbd_10_sub_pixel_avg_variance8x16,
vpx_highbd_sad8x16x3_bits10,
@@ -1217,7 +1224,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_8X8,
vpx_highbd_sad8x8_bits10,
vpx_highbd_sad8x8_avg_bits10,
- vp9_highbd_10_variance8x8,
+ vpx_highbd_10_variance8x8,
vp9_highbd_10_sub_pixel_variance8x8,
vp9_highbd_10_sub_pixel_avg_variance8x8,
vpx_highbd_sad8x8x3_bits10,
@@ -1227,7 +1234,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_8X4,
vpx_highbd_sad8x4_bits10,
vpx_highbd_sad8x4_avg_bits10,
- vp9_highbd_10_variance8x4,
+ vpx_highbd_10_variance8x4,
vp9_highbd_10_sub_pixel_variance8x4,
vp9_highbd_10_sub_pixel_avg_variance8x4,
NULL,
@@ -1237,7 +1244,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_4X8,
vpx_highbd_sad4x8_bits10,
vpx_highbd_sad4x8_avg_bits10,
- vp9_highbd_10_variance4x8,
+ vpx_highbd_10_variance4x8,
vp9_highbd_10_sub_pixel_variance4x8,
vp9_highbd_10_sub_pixel_avg_variance4x8,
NULL,
@@ -1247,7 +1254,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_4X4,
vpx_highbd_sad4x4_bits10,
vpx_highbd_sad4x4_avg_bits10,
- vp9_highbd_10_variance4x4,
+ vpx_highbd_10_variance4x4,
vp9_highbd_10_sub_pixel_variance4x4,
vp9_highbd_10_sub_pixel_avg_variance4x4,
vpx_highbd_sad4x4x3_bits10,
@@ -1259,7 +1266,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_32X16,
vpx_highbd_sad32x16_bits12,
vpx_highbd_sad32x16_avg_bits12,
- vp9_highbd_12_variance32x16,
+ vpx_highbd_12_variance32x16,
vp9_highbd_12_sub_pixel_variance32x16,
vp9_highbd_12_sub_pixel_avg_variance32x16,
NULL,
@@ -1269,7 +1276,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_16X32,
vpx_highbd_sad16x32_bits12,
vpx_highbd_sad16x32_avg_bits12,
- vp9_highbd_12_variance16x32,
+ vpx_highbd_12_variance16x32,
vp9_highbd_12_sub_pixel_variance16x32,
vp9_highbd_12_sub_pixel_avg_variance16x32,
NULL,
@@ -1279,7 +1286,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_64X32,
vpx_highbd_sad64x32_bits12,
vpx_highbd_sad64x32_avg_bits12,
- vp9_highbd_12_variance64x32,
+ vpx_highbd_12_variance64x32,
vp9_highbd_12_sub_pixel_variance64x32,
vp9_highbd_12_sub_pixel_avg_variance64x32,
NULL,
@@ -1289,7 +1296,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_32X64,
vpx_highbd_sad32x64_bits12,
vpx_highbd_sad32x64_avg_bits12,
- vp9_highbd_12_variance32x64,
+ vpx_highbd_12_variance32x64,
vp9_highbd_12_sub_pixel_variance32x64,
vp9_highbd_12_sub_pixel_avg_variance32x64,
NULL,
@@ -1299,7 +1306,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_32X32,
vpx_highbd_sad32x32_bits12,
vpx_highbd_sad32x32_avg_bits12,
- vp9_highbd_12_variance32x32,
+ vpx_highbd_12_variance32x32,
vp9_highbd_12_sub_pixel_variance32x32,
vp9_highbd_12_sub_pixel_avg_variance32x32,
vpx_highbd_sad32x32x3_bits12,
@@ -1309,7 +1316,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_64X64,
vpx_highbd_sad64x64_bits12,
vpx_highbd_sad64x64_avg_bits12,
- vp9_highbd_12_variance64x64,
+ vpx_highbd_12_variance64x64,
vp9_highbd_12_sub_pixel_variance64x64,
vp9_highbd_12_sub_pixel_avg_variance64x64,
vpx_highbd_sad64x64x3_bits12,
@@ -1319,7 +1326,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_16X16,
vpx_highbd_sad16x16_bits12,
vpx_highbd_sad16x16_avg_bits12,
- vp9_highbd_12_variance16x16,
+ vpx_highbd_12_variance16x16,
vp9_highbd_12_sub_pixel_variance16x16,
vp9_highbd_12_sub_pixel_avg_variance16x16,
vpx_highbd_sad16x16x3_bits12,
@@ -1329,7 +1336,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_16X8,
vpx_highbd_sad16x8_bits12,
vpx_highbd_sad16x8_avg_bits12,
- vp9_highbd_12_variance16x8,
+ vpx_highbd_12_variance16x8,
vp9_highbd_12_sub_pixel_variance16x8,
vp9_highbd_12_sub_pixel_avg_variance16x8,
vpx_highbd_sad16x8x3_bits12,
@@ -1339,7 +1346,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_8X16,
vpx_highbd_sad8x16_bits12,
vpx_highbd_sad8x16_avg_bits12,
- vp9_highbd_12_variance8x16,
+ vpx_highbd_12_variance8x16,
vp9_highbd_12_sub_pixel_variance8x16,
vp9_highbd_12_sub_pixel_avg_variance8x16,
vpx_highbd_sad8x16x3_bits12,
@@ -1349,7 +1356,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_8X8,
vpx_highbd_sad8x8_bits12,
vpx_highbd_sad8x8_avg_bits12,
- vp9_highbd_12_variance8x8,
+ vpx_highbd_12_variance8x8,
vp9_highbd_12_sub_pixel_variance8x8,
vp9_highbd_12_sub_pixel_avg_variance8x8,
vpx_highbd_sad8x8x3_bits12,
@@ -1359,7 +1366,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_8X4,
vpx_highbd_sad8x4_bits12,
vpx_highbd_sad8x4_avg_bits12,
- vp9_highbd_12_variance8x4,
+ vpx_highbd_12_variance8x4,
vp9_highbd_12_sub_pixel_variance8x4,
vp9_highbd_12_sub_pixel_avg_variance8x4,
NULL,
@@ -1369,7 +1376,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_4X8,
vpx_highbd_sad4x8_bits12,
vpx_highbd_sad4x8_avg_bits12,
- vp9_highbd_12_variance4x8,
+ vpx_highbd_12_variance4x8,
vp9_highbd_12_sub_pixel_variance4x8,
vp9_highbd_12_sub_pixel_avg_variance4x8,
NULL,
@@ -1379,7 +1386,7 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
HIGHBD_BFP(BLOCK_4X4,
vpx_highbd_sad4x4_bits12,
vpx_highbd_sad4x4_avg_bits12,
- vp9_highbd_12_variance4x4,
+ vpx_highbd_12_variance4x4,
vp9_highbd_12_sub_pixel_variance4x4,
vp9_highbd_12_sub_pixel_avg_variance4x4,
vpx_highbd_sad4x4x3_bits12,
@@ -1589,6 +1596,9 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
sizeof(*cm->frame_contexts)));
cpi->use_svc = 0;
+ cpi->resize_state = 0;
+ cpi->resize_avg_qp = 0;
+ cpi->resize_buffer_underflow = 0;
cpi->common.buffer_pool = pool;
init_config(cpi, oxcf);
@@ -1802,61 +1812,61 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
cpi->fn_ptr[BT].sdx4df = SDX4DF;
BFP(BLOCK_32X16, vpx_sad32x16, vpx_sad32x16_avg,
- vp9_variance32x16, vp9_sub_pixel_variance32x16,
+ vpx_variance32x16, vp9_sub_pixel_variance32x16,
vp9_sub_pixel_avg_variance32x16, NULL, NULL, vpx_sad32x16x4d)
BFP(BLOCK_16X32, vpx_sad16x32, vpx_sad16x32_avg,
- vp9_variance16x32, vp9_sub_pixel_variance16x32,
+ vpx_variance16x32, vp9_sub_pixel_variance16x32,
vp9_sub_pixel_avg_variance16x32, NULL, NULL, vpx_sad16x32x4d)
BFP(BLOCK_64X32, vpx_sad64x32, vpx_sad64x32_avg,
- vp9_variance64x32, vp9_sub_pixel_variance64x32,
+ vpx_variance64x32, vp9_sub_pixel_variance64x32,
vp9_sub_pixel_avg_variance64x32, NULL, NULL, vpx_sad64x32x4d)
BFP(BLOCK_32X64, vpx_sad32x64, vpx_sad32x64_avg,
- vp9_variance32x64, vp9_sub_pixel_variance32x64,
+ vpx_variance32x64, vp9_sub_pixel_variance32x64,
vp9_sub_pixel_avg_variance32x64, NULL, NULL, vpx_sad32x64x4d)
BFP(BLOCK_32X32, vpx_sad32x32, vpx_sad32x32_avg,
- vp9_variance32x32, vp9_sub_pixel_variance32x32,
+ vpx_variance32x32, vp9_sub_pixel_variance32x32,
vp9_sub_pixel_avg_variance32x32, vpx_sad32x32x3, vpx_sad32x32x8,
vpx_sad32x32x4d)
BFP(BLOCK_64X64, vpx_sad64x64, vpx_sad64x64_avg,
- vp9_variance64x64, vp9_sub_pixel_variance64x64,
+ vpx_variance64x64, vp9_sub_pixel_variance64x64,
vp9_sub_pixel_avg_variance64x64, vpx_sad64x64x3, vpx_sad64x64x8,
vpx_sad64x64x4d)
BFP(BLOCK_16X16, vpx_sad16x16, vpx_sad16x16_avg,
- vp9_variance16x16, vp9_sub_pixel_variance16x16,
+ vpx_variance16x16, vp9_sub_pixel_variance16x16,
vp9_sub_pixel_avg_variance16x16, vpx_sad16x16x3, vpx_sad16x16x8,
vpx_sad16x16x4d)
BFP(BLOCK_16X8, vpx_sad16x8, vpx_sad16x8_avg,
- vp9_variance16x8, vp9_sub_pixel_variance16x8,
+ vpx_variance16x8, vp9_sub_pixel_variance16x8,
vp9_sub_pixel_avg_variance16x8,
vpx_sad16x8x3, vpx_sad16x8x8, vpx_sad16x8x4d)
BFP(BLOCK_8X16, vpx_sad8x16, vpx_sad8x16_avg,
- vp9_variance8x16, vp9_sub_pixel_variance8x16,
+ vpx_variance8x16, vp9_sub_pixel_variance8x16,
vp9_sub_pixel_avg_variance8x16,
vpx_sad8x16x3, vpx_sad8x16x8, vpx_sad8x16x4d)
BFP(BLOCK_8X8, vpx_sad8x8, vpx_sad8x8_avg,
- vp9_variance8x8, vp9_sub_pixel_variance8x8,
+ vpx_variance8x8, vp9_sub_pixel_variance8x8,
vp9_sub_pixel_avg_variance8x8,
vpx_sad8x8x3, vpx_sad8x8x8, vpx_sad8x8x4d)
BFP(BLOCK_8X4, vpx_sad8x4, vpx_sad8x4_avg,
- vp9_variance8x4, vp9_sub_pixel_variance8x4,
+ vpx_variance8x4, vp9_sub_pixel_variance8x4,
vp9_sub_pixel_avg_variance8x4, NULL, vpx_sad8x4x8, vpx_sad8x4x4d)
BFP(BLOCK_4X8, vpx_sad4x8, vpx_sad4x8_avg,
- vp9_variance4x8, vp9_sub_pixel_variance4x8,
+ vpx_variance4x8, vp9_sub_pixel_variance4x8,
vp9_sub_pixel_avg_variance4x8, NULL, vpx_sad4x8x8, vpx_sad4x8x4d)
BFP(BLOCK_4X4, vpx_sad4x4, vpx_sad4x4_avg,
- vp9_variance4x4, vp9_sub_pixel_variance4x4,
+ vpx_variance4x4, vp9_sub_pixel_variance4x4,
vp9_sub_pixel_avg_variance4x4,
vpx_sad4x4x3, vpx_sad4x4x8, vpx_sad4x4x4d)
@@ -2049,6 +2059,65 @@ void vp9_remove_compressor(VP9_COMP *cpi) {
#endif
}
+/* TODO(yaowu): The block_variance calls the unoptimized versions of variance()
+ * and highbd_8_variance(). It should not.
+ */
+static void encoder_variance(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ int w, int h, unsigned int *sse, int *sum) {
+ int i, j;
+
+ *sum = 0;
+ *sse = 0;
+
+ for (i = 0; i < h; i++) {
+ for (j = 0; j < w; j++) {
+ const int diff = a[j] - b[j];
+ *sum += diff;
+ *sse += diff * diff;
+ }
+
+ a += a_stride;
+ b += b_stride;
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void encoder_highbd_variance64(const uint8_t *a8, int a_stride,
+ const uint8_t *b8, int b_stride,
+ int w, int h, uint64_t *sse,
+ uint64_t *sum) {
+ int i, j;
+
+ uint16_t *a = CONVERT_TO_SHORTPTR(a8);
+ uint16_t *b = CONVERT_TO_SHORTPTR(b8);
+ *sum = 0;
+ *sse = 0;
+
+ for (i = 0; i < h; i++) {
+ for (j = 0; j < w; j++) {
+ const int diff = a[j] - b[j];
+ *sum += diff;
+ *sse += diff * diff;
+ }
+ a += a_stride;
+ b += b_stride;
+ }
+}
+
+static void encoder_highbd_8_variance(const uint8_t *a8, int a_stride,
+ const uint8_t *b8, int b_stride,
+ int w, int h,
+ unsigned int *sse, int *sum) {
+ uint64_t sse_long = 0;
+ uint64_t sum_long = 0;
+ encoder_highbd_variance64(a8, a_stride, b8, b_stride, w, h,
+ &sse_long, &sum_long);
+ *sse = (unsigned int)sse_long;
+ *sum = (int)sum_long;
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
static int64_t get_sse(const uint8_t *a, int a_stride,
const uint8_t *b, int b_stride,
int width, int height) {
@@ -2060,15 +2129,15 @@ static int64_t get_sse(const uint8_t *a, int a_stride,
int x, y;
if (dw > 0) {
- variance(&a[width - dw], a_stride, &b[width - dw], b_stride,
- dw, height, &sse, &sum);
+ encoder_variance(&a[width - dw], a_stride, &b[width - dw], b_stride,
+ dw, height, &sse, &sum);
total_sse += sse;
}
if (dh > 0) {
- variance(&a[(height - dh) * a_stride], a_stride,
- &b[(height - dh) * b_stride], b_stride,
- width - dw, dh, &sse, &sum);
+ encoder_variance(&a[(height - dh) * a_stride], a_stride,
+ &b[(height - dh) * b_stride], b_stride,
+ width - dw, dh, &sse, &sum);
total_sse += sse;
}
@@ -2076,7 +2145,7 @@ static int64_t get_sse(const uint8_t *a, int a_stride,
const uint8_t *pa = a;
const uint8_t *pb = b;
for (x = 0; x < width / 16; ++x) {
- vp9_mse16x16(pa, a_stride, pb, b_stride, &sse);
+ vpx_mse16x16(pa, a_stride, pb, b_stride, &sse);
total_sse += sse;
pa += 16;
@@ -2121,21 +2190,22 @@ static int64_t highbd_get_sse(const uint8_t *a, int a_stride,
unsigned int sse = 0;
int sum = 0;
if (dw > 0) {
- highbd_variance(&a[width - dw], a_stride, &b[width - dw], b_stride,
- dw, height, &sse, &sum);
+ encoder_highbd_8_variance(&a[width - dw], a_stride,
+ &b[width - dw], b_stride,
+ dw, height, &sse, &sum);
total_sse += sse;
}
if (dh > 0) {
- highbd_variance(&a[(height - dh) * a_stride], a_stride,
- &b[(height - dh) * b_stride], b_stride,
- width - dw, dh, &sse, &sum);
+ encoder_highbd_8_variance(&a[(height - dh) * a_stride], a_stride,
+ &b[(height - dh) * b_stride], b_stride,
+ width - dw, dh, &sse, &sum);
total_sse += sse;
}
for (y = 0; y < height / 16; ++y) {
const uint8_t *pa = a;
const uint8_t *pb = b;
for (x = 0; x < width / 16; ++x) {
- vp9_highbd_mse16x16(pa, a_stride, pb, b_stride, &sse);
+ vpx_highbd_8_mse16x16(pa, a_stride, pb, b_stride, &sse);
total_sse += sse;
pa += 16;
pb += 16;
@@ -2260,8 +2330,9 @@ static void generate_psnr_packet(VP9_COMP *cpi) {
pkt.data.psnr.psnr[i] = psnr.psnr[i];
}
pkt.kind = VPX_CODEC_PSNR_PKT;
- if (is_two_pass_svc(cpi))
- cpi->svc.layer_context[cpi->svc.spatial_layer_id].psnr_pkt = pkt.data.psnr;
+ if (cpi->use_svc)
+ cpi->svc.layer_context[cpi->svc.spatial_layer_id *
+ cpi->svc.number_temporal_layers].psnr_pkt = pkt.data.psnr;
else
vpx_codec_pkt_list_add(cpi->output_pkt_list, &pkt);
}
@@ -2711,7 +2782,10 @@ void vp9_scale_references(VP9_COMP *cpi) {
#if CONFIG_VP9_HIGHBITDEPTH
if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) {
const int new_fb = get_free_fb(cm);
- RefCntBuffer *const new_fb_ptr = &pool->frame_bufs[new_fb];
+ RefCntBuffer *new_fb_ptr = NULL;
+ if (cm->new_fb_idx == INVALID_IDX)
+ return;
+ new_fb_ptr = &pool->frame_bufs[new_fb];
cm->cur_frame = &pool->frame_bufs[new_fb];
vp9_realloc_frame_buffer(&pool->frame_bufs[new_fb].buf,
cm->width, cm->height,
@@ -2723,7 +2797,10 @@ void vp9_scale_references(VP9_COMP *cpi) {
#else
if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) {
const int new_fb = get_free_fb(cm);
- RefCntBuffer *const new_fb_ptr = &pool->frame_bufs[new_fb];
+ RefCntBuffer *new_fb_ptr = NULL;
+ if (cm->new_fb_idx == INVALID_IDX)
+ return;
+ new_fb_ptr = &pool->frame_bufs[new_fb];
vp9_realloc_frame_buffer(&new_fb_ptr->buf,
cm->width, cm->height,
cm->subsampling_x, cm->subsampling_y,
@@ -2792,19 +2869,25 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
recon_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
if (cpi->twopass.total_left_stats.coded_error != 0.0)
- fprintf(f, "%10u %dx%d %10d %10d %10d %10d"
- "%10"PRId64" %10"PRId64" %10"PRId64" %10"PRId64" %10d "
- "%7.2lf %7.2lf %7.2lf %7.2lf %7.2lf"
+ fprintf(f, "%10u %dx%d %d %d %10d %10d %10d %10d"
+ "%10"PRId64" %10"PRId64" %5d %5d %10"PRId64" "
+ "%10"PRId64" %10"PRId64" %10d "
+ "%7.2lf %7.2lf %7.2lf %7.2lf %7.2lf"
"%6d %6d %5d %5d %5d "
"%10"PRId64" %10.3lf"
- "%10lf %8u %10"PRId64" %10d %10d\n",
+ "%10lf %8u %10"PRId64" %10d %10d %10d\n",
cpi->common.current_video_frame,
cm->width, cm->height,
+ cpi->rc.source_alt_ref_pending,
+ cpi->rc.source_alt_ref_active,
cpi->rc.this_frame_target,
cpi->rc.projected_frame_size,
cpi->rc.projected_frame_size / cpi->common.MBs,
(cpi->rc.projected_frame_size - cpi->rc.this_frame_target),
cpi->rc.vbr_bits_off_target,
+ cpi->rc.vbr_bits_off_target_fast,
+ cpi->twopass.extend_minq,
+ cpi->twopass.extend_minq_fast,
cpi->rc.total_target_vs_actual,
(cpi->rc.starting_buffer_level - cpi->rc.bits_off_target),
cpi->rc.total_actual_bits, cm->base_qindex,
@@ -2821,7 +2904,8 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
cpi->twopass.bits_left /
(1 + cpi->twopass.total_left_stats.coded_error),
cpi->tot_recode_hits, recon_err, cpi->rc.kf_boost,
- cpi->twopass.kf_zeromotion_pct);
+ cpi->twopass.kf_zeromotion_pct,
+ cpi->twopass.fr_content_type);
fclose(f);
@@ -2933,7 +3017,7 @@ static void init_motion_estimation(VP9_COMP *cpi) {
}
}
-void set_frame_size(VP9_COMP *cpi) {
+static void set_frame_size(VP9_COMP *cpi) {
int ref_frame;
VP9_COMMON *const cm = &cpi->common;
VP9EncoderConfig *const oxcf = &cpi->oxcf;
@@ -2951,6 +3035,31 @@ void set_frame_size(VP9_COMP *cpi) {
oxcf->scaled_frame_height);
}
+ if (oxcf->pass == 0 &&
+ oxcf->rc_mode == VPX_CBR &&
+ !cpi->use_svc &&
+ oxcf->resize_mode == RESIZE_DYNAMIC) {
+ if (cpi->resize_state == 1) {
+ oxcf->scaled_frame_width =
+ (cm->width * cpi->resize_scale_num) / cpi->resize_scale_den;
+ oxcf->scaled_frame_height =
+ (cm->height * cpi->resize_scale_num) /cpi->resize_scale_den;
+ } else if (cpi->resize_state == -1) {
+ // Go back up to original size.
+ oxcf->scaled_frame_width = oxcf->width;
+ oxcf->scaled_frame_height = oxcf->height;
+ }
+ if (cpi->resize_state != 0) {
+ // There has been a change in frame size.
+ vp9_set_size_literal(cpi,
+ oxcf->scaled_frame_width,
+ oxcf->scaled_frame_height);
+
+ // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed.
+ set_mv_search_params(cpi);
+ }
+ }
+
if ((oxcf->pass == 2) &&
(!cpi->use_svc ||
(is_two_pass_svc(cpi) &&
@@ -3026,11 +3135,11 @@ static void encode_without_recode_loop(VP9_COMP *cpi) {
set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
vp9_set_quantizer(cm, q);
- vp9_set_vbp_thresholds(cpi, q);
+ vp9_set_variance_partition_thresholds(cpi, q);
setup_frame(cpi);
- vp9_suppress_active_map(cpi);
+ suppress_active_map(cpi);
// Variance adaptive and in frame q adjustment experiments are mutually
// exclusive.
if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
@@ -3040,7 +3149,7 @@ static void encode_without_recode_loop(VP9_COMP *cpi) {
} else if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
vp9_cyclic_refresh_setup(cpi);
}
- vp9_apply_active_map(cpi);
+ apply_active_map(cpi);
// transform / motion compensation build reconstruction frame
vp9_encode_frame(cpi);
@@ -3394,7 +3503,7 @@ static void set_arf_sign_bias(VP9_COMP *cpi) {
cm->ref_frame_sign_bias[ALTREF_FRAME] = arf_sign_bias;
}
-int setup_interp_filter_search_mask(VP9_COMP *cpi) {
+static int setup_interp_filter_search_mask(VP9_COMP *cpi) {
INTERP_FILTER ifilter;
int ref_total[MAX_REF_FRAMES] = {0};
MV_REFERENCE_FRAME ref;
@@ -3471,34 +3580,41 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
}
}
if (is_two_pass_svc(cpi) && cm->error_resilient_mode == 0) {
- // Use the last frame context for the empty frame.
+ // Use context 0 for intra only empty frame, but the last frame context
+ // for other empty frames.
+ if (cpi->svc.encode_empty_frame_state == ENCODING) {
+ if (cpi->svc.encode_intra_empty_frame != 0)
+ cm->frame_context_idx = 0;
+ else
+ cm->frame_context_idx = FRAME_CONTEXTS - 1;
+ } else {
cm->frame_context_idx =
- (cpi->svc.encode_empty_frame_state == ENCODING) ? FRAME_CONTEXTS - 1 :
cpi->svc.spatial_layer_id * cpi->svc.number_temporal_layers +
cpi->svc.temporal_layer_id;
+ }
+
+ cm->frame_parallel_decoding_mode = oxcf->frame_parallel_decoding_mode;
// The probs will be updated based on the frame type of its previous
// frame if frame_parallel_decoding_mode is 0. The type may vary for
// the frame after a key frame in base layer since we may drop enhancement
// layers. So set frame_parallel_decoding_mode to 1 in this case.
- if (cpi->svc.number_temporal_layers == 1) {
- if (cpi->svc.spatial_layer_id == 0 &&
- cpi->svc.layer_context[0].last_frame_type == KEY_FRAME)
- cm->frame_parallel_decoding_mode = 1;
- else
- cm->frame_parallel_decoding_mode = 0;
- } else if (cpi->svc.spatial_layer_id == 0) {
- // Find the 2nd frame in temporal base layer and 1st frame in temporal
- // enhancement layers from the key frame.
- int i;
- for (i = 0; i < cpi->svc.number_temporal_layers; ++i) {
- if (cpi->svc.layer_context[0].frames_from_key_frame == 1 << i) {
+ if (cm->frame_parallel_decoding_mode == 0) {
+ if (cpi->svc.number_temporal_layers == 1) {
+ if (cpi->svc.spatial_layer_id == 0 &&
+ cpi->svc.layer_context[0].last_frame_type == KEY_FRAME)
cm->frame_parallel_decoding_mode = 1;
- break;
+ } else if (cpi->svc.spatial_layer_id == 0) {
+ // Find the 2nd frame in temporal base layer and 1st frame in temporal
+ // enhancement layers from the key frame.
+ int i;
+ for (i = 0; i < cpi->svc.number_temporal_layers; ++i) {
+ if (cpi->svc.layer_context[0].frames_from_key_frame == 1 << i) {
+ cm->frame_parallel_decoding_mode = 1;
+ break;
+ }
}
}
- if (i == cpi->svc.number_temporal_layers)
- cm->frame_parallel_decoding_mode = 0;
}
}
@@ -3643,9 +3759,11 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
}
cm->prev_frame = cm->cur_frame;
- if (is_two_pass_svc(cpi))
- cpi->svc.layer_context[cpi->svc.spatial_layer_id].last_frame_type =
- cm->frame_type;
+ if (cpi->use_svc)
+ cpi->svc.layer_context[cpi->svc.spatial_layer_id *
+ cpi->svc.number_temporal_layers +
+ cpi->svc.temporal_layer_id].last_frame_type =
+ cm->frame_type;
}
static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
@@ -3788,8 +3906,8 @@ static int frame_is_reference(const VP9_COMP *cpi) {
cm->seg.update_data;
}
-void adjust_frame_rate(VP9_COMP *cpi,
- const struct lookahead_entry *source) {
+static void adjust_frame_rate(VP9_COMP *cpi,
+ const struct lookahead_entry *source) {
int64_t this_duration;
int step = 0;
@@ -3872,15 +3990,16 @@ static void check_src_altref(VP9_COMP *cpi,
extern double vp9_get_blockiness(const unsigned char *img1, int img1_pitch,
const unsigned char *img2, int img2_pitch,
int width, int height);
-#endif
-void adjust_image_stat(double y, double u, double v, double all, ImageStat *s) {
+static void adjust_image_stat(double y, double u, double v, double all,
+ ImageStat *s) {
s->stat[Y] += y;
s->stat[U] += u;
s->stat[V] += v;
s->stat[ALL] += all;
s->worst = MIN(s->worst, all);
}
+#endif // CONFIG_INTERNAL_STATS
int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
size_t *size, uint8_t *dest,
@@ -3905,6 +4024,8 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
#endif
if (oxcf->pass == 2)
vp9_restore_layer_context(cpi);
+ } else if (is_one_pass_cbr_svc(cpi)) {
+ vp9_one_pass_cbr_svc_start_layer(cpi);
}
vpx_usec_timer_start(&cmptimer);
@@ -3923,9 +4044,11 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
// Normal defaults
cm->reset_frame_context = 0;
cm->refresh_frame_context = 1;
- cpi->refresh_last_frame = 1;
- cpi->refresh_golden_frame = 0;
- cpi->refresh_alt_ref_frame = 0;
+ if (!is_one_pass_cbr_svc(cpi)) {
+ cpi->refresh_last_frame = 1;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_alt_ref_frame = 0;
+ }
// Should we encode an arf frame.
arf_src_index = get_arf_src_index(cpi);
@@ -3962,6 +4085,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
}
cm->show_frame = 0;
+ cm->intra_only = 0;
cpi->refresh_alt_ref_frame = 1;
cpi->refresh_golden_frame = 0;
cpi->refresh_last_frame = 0;
@@ -3980,12 +4104,11 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
}
// Read in the source frame.
-#if CONFIG_SPATIAL_SVC
- if (is_two_pass_svc(cpi))
+ if (cpi->use_svc)
source = vp9_svc_lookahead_pop(cpi, cpi->lookahead, flush);
else
-#endif
source = vp9_lookahead_pop(cpi->lookahead, flush);
+
if (source != NULL) {
cm->show_frame = 1;
cm->intra_only = 0;
@@ -4034,8 +4157,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
adjust_frame_rate(cpi, source);
}
- if (cpi->svc.number_temporal_layers > 1 &&
- oxcf->rc_mode == VPX_CBR) {
+ if (is_one_pass_cbr_svc(cpi)) {
vp9_update_temporal_layer_framerate(cpi);
vp9_restore_layer_context(cpi);
}
@@ -4071,7 +4193,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
(is_two_pass_svc(cpi) &&
cpi->svc.encode_empty_frame_state != ENCODING))) {
vp9_rc_get_second_pass_params(cpi);
- } else {
+ } else if (oxcf->pass == 1) {
set_frame_size(cpi);
}
@@ -4117,11 +4239,10 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
}
// Save layer specific state.
- if ((cpi->svc.number_temporal_layers > 1 &&
- oxcf->rc_mode == VPX_CBR) ||
- ((cpi->svc.number_temporal_layers > 1 ||
- cpi->svc.number_spatial_layers > 1) &&
- oxcf->pass == 2)) {
+ if (is_one_pass_cbr_svc(cpi) ||
+ ((cpi->svc.number_temporal_layers > 1 ||
+ cpi->svc.number_spatial_layers > 1) &&
+ oxcf->pass == 2)) {
vp9_save_layer_context(cpi);
}
@@ -4180,7 +4301,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
vp9_clear_system_state();
#if CONFIG_VP9_HIGHBITDEPTH
- calc_highbd_psnr(orig, pp, &psnr, cpi->td.mb.e_mbd.bd,
+ calc_highbd_psnr(orig, pp, &psnr2, cpi->td.mb.e_mbd.bd,
cpi->oxcf.input_bit_depth);
#else
calc_psnr(orig, pp, &psnr2);
@@ -4231,31 +4352,38 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
}
}
if (cpi->b_calculate_blockiness) {
- double frame_blockiness = vp9_get_blockiness(
- cpi->Source->y_buffer, cpi->Source->y_stride,
- cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
- cpi->Source->y_width, cpi->Source->y_height);
- cpi->worst_blockiness = MAX(cpi->worst_blockiness, frame_blockiness);
- cpi->total_blockiness += frame_blockiness;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (!cm->use_highbitdepth)
+#endif
+ {
+ double frame_blockiness = vp9_get_blockiness(
+ cpi->Source->y_buffer, cpi->Source->y_stride,
+ cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
+ cpi->Source->y_width, cpi->Source->y_height);
+ cpi->worst_blockiness = MAX(cpi->worst_blockiness, frame_blockiness);
+ cpi->total_blockiness += frame_blockiness;
+ }
}
if (cpi->b_calculate_consistency) {
- double this_inconsistency = vp9_get_ssim_metrics(
- cpi->Source->y_buffer, cpi->Source->y_stride,
- cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
- cpi->Source->y_width, cpi->Source->y_height, cpi->ssim_vars,
- &cpi->metrics, 1);
-
- const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1);
-
-
- double consistency = vpx_sse_to_psnr(samples, peak,
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (!cm->use_highbitdepth)
+#endif
+ {
+ double this_inconsistency = vp9_get_ssim_metrics(
+ cpi->Source->y_buffer, cpi->Source->y_stride,
+ cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
+ cpi->Source->y_width, cpi->Source->y_height, cpi->ssim_vars,
+ &cpi->metrics, 1);
+
+ const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1);
+ double consistency = vpx_sse_to_psnr(samples, peak,
(double)cpi->total_inconsistency);
-
- if (consistency > 0.0)
- cpi->worst_consistency = MIN(cpi->worst_consistency,
- consistency);
- cpi->total_inconsistency += this_inconsistency;
+ if (consistency > 0.0)
+ cpi->worst_consistency = MIN(cpi->worst_consistency,
+ consistency);
+ cpi->total_inconsistency += this_inconsistency;
+ }
}
if (cpi->b_calculate_ssimg) {
@@ -4273,6 +4401,9 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
#endif // CONFIG_VP9_HIGHBITDEPTH
adjust_image_stat(y, u, v, frame_all, &cpi->ssimg);
}
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (!cm->use_highbitdepth)
+#endif
{
double y, u, v, frame_all;
frame_all = vp9_calc_fastssim(cpi->Source, cm->frame_to_show, &y, &u,
@@ -4280,6 +4411,9 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
adjust_image_stat(y, u, v, frame_all, &cpi->fastssim);
/* TODO(JBB): add 10/12 bit support */
}
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (!cm->use_highbitdepth)
+#endif
{
double y, u, v, frame_all;
frame_all = vp9_psnrhvs(cpi->Source, cm->frame_to_show, &y, &u, &v);
@@ -4291,8 +4425,10 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
#endif
if (is_two_pass_svc(cpi)) {
- if (cpi->svc.encode_empty_frame_state == ENCODING)
+ if (cpi->svc.encode_empty_frame_state == ENCODING) {
cpi->svc.encode_empty_frame_state = ENCODED;
+ cpi->svc.encode_intra_empty_frame = 0;
+ }
if (cm->show_frame) {
++cpi->svc.spatial_layer_to_encode;
@@ -4302,6 +4438,12 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
// May need the empty frame after an visible frame.
cpi->svc.encode_empty_frame_state = NEED_TO_ENCODE;
}
+ } else if (is_one_pass_cbr_svc(cpi)) {
+ if (cm->show_frame) {
+ ++cpi->svc.spatial_layer_to_encode;
+ if (cpi->svc.spatial_layer_to_encode >= cpi->svc.number_spatial_layers)
+ cpi->svc.spatial_layer_to_encode = 0;
+ }
}
return 0;
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h
index 41f1c13d493..2b0da103ffe 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h
@@ -194,10 +194,10 @@ typedef struct VP9EncoderConfig {
int ss_number_layers; // Number of spatial layers.
int ts_number_layers; // Number of temporal layers.
// Bitrate allocation for spatial layers.
+ int layer_target_bitrate[VPX_MAX_LAYERS];
int ss_target_bitrate[VPX_SS_MAX_LAYERS];
int ss_enable_auto_arf[VPX_SS_MAX_LAYERS];
// Bitrate allocation (CBR mode) and framerate factor, for temporal layers.
- int ts_target_bitrate[VPX_TS_MAX_LAYERS];
int ts_rate_decimator[VPX_TS_MAX_LAYERS];
int enable_auto_arf;
@@ -237,6 +237,7 @@ typedef struct VP9EncoderConfig {
int use_highbitdepth;
#endif
vpx_color_space_t color_space;
+ VP9E_TEMPORAL_LAYERING_MODE temporal_layering_mode;
} VP9EncoderConfig;
static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) {
@@ -305,6 +306,7 @@ typedef struct VP9_COMP {
YV12_BUFFER_CONFIG scaled_last_source;
TileDataEnc *tile_data;
+ int allocated_tiles; // Keep track of memory allocated for tiles.
// For a still frame, this flag is set to 1 to skip partition search.
int partition_search_skippable_frame;
@@ -477,6 +479,12 @@ typedef struct VP9_COMP {
#endif
int resize_pending;
+ int resize_state;
+ int resize_scale_num;
+ int resize_scale_den;
+ int resize_avg_qp;
+ int resize_buffer_underflow;
+ int resize_count;
// VAR_BASED_PARTITION thresholds
// 0 - threshold_64x64; 1 - threshold_32x32;
@@ -611,9 +619,11 @@ YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm,
void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags);
static INLINE int is_two_pass_svc(const struct VP9_COMP *const cpi) {
- return cpi->use_svc &&
- ((cpi->svc.number_spatial_layers > 1) ||
- (cpi->svc.number_temporal_layers > 1 && cpi->oxcf.pass != 0));
+ return cpi->use_svc && cpi->oxcf.pass != 0;
+}
+
+static INLINE int is_one_pass_cbr_svc(const struct VP9_COMP *const cpi) {
+ return (cpi->use_svc && cpi->oxcf.pass == 0);
}
static INLINE int is_altref_enabled(const VP9_COMP *const cpi) {
@@ -642,6 +652,8 @@ static INLINE int *cond_cost_list(const struct VP9_COMP *cpi, int *cost_list) {
void vp9_new_framerate(VP9_COMP *cpi, double framerate);
+#define LAYER_IDS_TO_IDX(sl, tl, num_tl) ((sl) * (num_tl) + (tl))
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ethread.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ethread.c
index 8700ccdaecd..4ae3fbc5409 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ethread.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ethread.c
@@ -54,6 +54,18 @@ static int enc_worker_hook(EncWorkerData *const thread_data, void *unused) {
return 0;
}
+static int get_max_tile_cols(VP9_COMP *cpi) {
+ const int aligned_width = ALIGN_POWER_OF_TWO(cpi->oxcf.width, MI_SIZE_LOG2);
+ int mi_cols = aligned_width >> MI_SIZE_LOG2;
+ int min_log2_tile_cols, max_log2_tile_cols;
+ int log2_tile_cols;
+
+ vp9_get_tile_n_bits(mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
+ log2_tile_cols = clamp(cpi->oxcf.tile_columns,
+ min_log2_tile_cols, max_log2_tile_cols);
+ return (1 << log2_tile_cols);
+}
+
void vp9_encode_tiles_mt(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols;
@@ -65,20 +77,30 @@ void vp9_encode_tiles_mt(VP9_COMP *cpi) {
// Only run once to create threads and allocate thread data.
if (cpi->num_workers == 0) {
+ int allocated_workers = num_workers;
+
+ // While using SVC, we need to allocate threads according to the highest
+ // resolution.
+ if (cpi->use_svc) {
+ int max_tile_cols = get_max_tile_cols(cpi);
+ allocated_workers = MIN(cpi->oxcf.max_threads, max_tile_cols);
+ }
+
CHECK_MEM_ERROR(cm, cpi->workers,
- vpx_malloc(num_workers * sizeof(*cpi->workers)));
+ vpx_malloc(allocated_workers * sizeof(*cpi->workers)));
CHECK_MEM_ERROR(cm, cpi->tile_thr_data,
- vpx_calloc(num_workers, sizeof(*cpi->tile_thr_data)));
+ vpx_calloc(allocated_workers,
+ sizeof(*cpi->tile_thr_data)));
- for (i = 0; i < num_workers; i++) {
+ for (i = 0; i < allocated_workers; i++) {
VP9Worker *const worker = &cpi->workers[i];
EncWorkerData *thread_data = &cpi->tile_thr_data[i];
++cpi->num_workers;
winterface->init(worker);
- if (i < num_workers - 1) {
+ if (i < allocated_workers - 1) {
thread_data->cpi = cpi;
// Allocate thread data.
@@ -154,7 +176,7 @@ void vp9_encode_tiles_mt(VP9_COMP *cpi) {
// Set the starting tile for each thread.
thread_data->start = i;
- if (i == num_workers - 1)
+ if (i == cpi->num_workers - 1)
winterface->execute(worker);
else
winterface->launch(worker);
@@ -171,7 +193,7 @@ void vp9_encode_tiles_mt(VP9_COMP *cpi) {
EncWorkerData *const thread_data = (EncWorkerData*)worker->data1;
// Accumulate counters.
- if (i < num_workers - 1) {
+ if (i < cpi->num_workers - 1) {
vp9_accumulate_frame_counts(cm, thread_data->td->counts, 0);
accumulate_rd_opt(&cpi->td, thread_data->td);
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_extend.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_extend.c
index 96f3598b1dc..6e1ed365dab 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_extend.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_extend.c
@@ -9,6 +9,7 @@
*/
#include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
#include "vp9/common/vp9_common.h"
#include "vp9/encoder/vp9_extend.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c
index 9752668b15d..3d7843ea731 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c
@@ -12,9 +12,11 @@
#include <math.h>
#include <stdio.h>
+#include "./vpx_dsp_rtcd.h"
#include "./vpx_scale_rtcd.h"
#include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
#include "vpx_scale/vpx_scale.h"
#include "vpx_scale/yv12config.h"
@@ -111,8 +113,8 @@ static void output_stats(FIRSTPASS_STATS *stats,
fpfile = fopen("firstpass.stt", "a");
fprintf(fpfile, "%12.0lf %12.4lf %12.0lf %12.0lf %12.0lf %12.4lf %12.4lf"
- "%12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf"
- "%12.4lf %12.0lf %12.0lf %12.0lf %12.4lf\n",
+ "%12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf"
+ "%12.4lf %12.4lf %12.0lf %12.0lf %12.0lf %12.4lf\n",
stats->frame,
stats->weight,
stats->intra_error,
@@ -122,6 +124,8 @@ static void output_stats(FIRSTPASS_STATS *stats,
stats->pcnt_motion,
stats->pcnt_second_ref,
stats->pcnt_neutral,
+ stats->intra_skip_pct,
+ stats->inactive_zone_rows,
stats->MVr,
stats->mvr_abs,
stats->MVc,
@@ -158,7 +162,9 @@ static void zero_stats(FIRSTPASS_STATS *section) {
section->pcnt_motion = 0.0;
section->pcnt_second_ref = 0.0;
section->pcnt_neutral = 0.0;
- section->MVr = 0.0;
+ section->intra_skip_pct = 0.0;
+ section->inactive_zone_rows = 0.0;
+ section->MVr = 0.0;
section->mvr_abs = 0.0;
section->MVc = 0.0;
section->mvc_abs = 0.0;
@@ -183,7 +189,9 @@ static void accumulate_stats(FIRSTPASS_STATS *section,
section->pcnt_motion += frame->pcnt_motion;
section->pcnt_second_ref += frame->pcnt_second_ref;
section->pcnt_neutral += frame->pcnt_neutral;
- section->MVr += frame->MVr;
+ section->intra_skip_pct += frame->intra_skip_pct;
+ section->inactive_zone_rows += frame->inactive_zone_rows;
+ section->MVr += frame->MVr;
section->mvr_abs += frame->mvr_abs;
section->MVc += frame->MVc;
section->mvc_abs += frame->mvc_abs;
@@ -206,7 +214,9 @@ static void subtract_stats(FIRSTPASS_STATS *section,
section->pcnt_motion -= frame->pcnt_motion;
section->pcnt_second_ref -= frame->pcnt_second_ref;
section->pcnt_neutral -= frame->pcnt_neutral;
- section->MVr -= frame->MVr;
+ section->intra_skip_pct -= frame->intra_skip_pct;
+ section->inactive_zone_rows -= frame->inactive_zone_rows;
+ section->MVr -= frame->MVr;
section->mvr_abs -= frame->mvr_abs;
section->MVc -= frame->MVc;
section->mvc_abs -= frame->mvc_abs;
@@ -266,13 +276,13 @@ void vp9_end_first_pass(VP9_COMP *cpi) {
static vp9_variance_fn_t get_block_variance_fn(BLOCK_SIZE bsize) {
switch (bsize) {
case BLOCK_8X8:
- return vp9_mse8x8;
+ return vpx_mse8x8;
case BLOCK_16X8:
- return vp9_mse16x8;
+ return vpx_mse16x8;
case BLOCK_8X16:
- return vp9_mse8x16;
+ return vpx_mse8x16;
default:
- return vp9_mse16x16;
+ return vpx_mse16x16;
}
}
@@ -292,37 +302,37 @@ static vp9_variance_fn_t highbd_get_block_variance_fn(BLOCK_SIZE bsize,
default:
switch (bsize) {
case BLOCK_8X8:
- return vp9_highbd_mse8x8;
+ return vpx_highbd_8_mse8x8;
case BLOCK_16X8:
- return vp9_highbd_mse16x8;
+ return vpx_highbd_8_mse16x8;
case BLOCK_8X16:
- return vp9_highbd_mse8x16;
+ return vpx_highbd_8_mse8x16;
default:
- return vp9_highbd_mse16x16;
+ return vpx_highbd_8_mse16x16;
}
break;
case 10:
switch (bsize) {
case BLOCK_8X8:
- return vp9_highbd_10_mse8x8;
+ return vpx_highbd_10_mse8x8;
case BLOCK_16X8:
- return vp9_highbd_10_mse16x8;
+ return vpx_highbd_10_mse16x8;
case BLOCK_8X16:
- return vp9_highbd_10_mse8x16;
+ return vpx_highbd_10_mse8x16;
default:
- return vp9_highbd_10_mse16x16;
+ return vpx_highbd_10_mse16x16;
}
break;
case 12:
switch (bsize) {
case BLOCK_8X8:
- return vp9_highbd_12_mse8x8;
+ return vpx_highbd_12_mse8x8;
case BLOCK_16X8:
- return vp9_highbd_12_mse16x8;
+ return vpx_highbd_12_mse16x8;
case BLOCK_8X16:
- return vp9_highbd_12_mse8x16;
+ return vpx_highbd_12_mse8x16;
default:
- return vp9_highbd_12_mse16x16;
+ return vpx_highbd_12_mse16x16;
}
break;
}
@@ -451,6 +461,8 @@ static void set_first_pass_params(VP9_COMP *cpi) {
cpi->rc.frames_to_key = INT_MAX;
}
+#define UL_INTRA_THRESH 50
+#define INVALID_ROW -1
void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
int mb_row, mb_col;
MACROBLOCK *const x = &cpi->td.mb;
@@ -475,6 +487,8 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
int second_ref_count = 0;
const int intrapenalty = INTRA_MODE_PENALTY;
double neutral_count;
+ int intra_skip_count = 0;
+ int image_data_start_row = INVALID_ROW;
int new_mv_count = 0;
int sum_in_vectors = 0;
MV lastmv = {0, 0};
@@ -633,7 +647,19 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
xd->mi[0]->mbmi.tx_size = use_dc_pred ?
(bsize >= BLOCK_16X16 ? TX_16X16 : TX_8X8) : TX_4X4;
vp9_encode_intra_block_plane(x, bsize, 0);
- this_error = vp9_get_mb_ss(x->plane[0].src_diff);
+ this_error = vpx_get_mb_ss(x->plane[0].src_diff);
+
+ // Keep a record of blocks that have almost no intra error residual
+ // (i.e. are in effect completely flat and untextured in the intra
+ // domain). In natural videos this is uncommon, but it is much more
+ // common in animations, graphics and screen content, so may be used
+ // as a signal to detect these types of content.
+ if (this_error < UL_INTRA_THRESH) {
+ ++intra_skip_count;
+ } else if ((mb_col > 0) && (image_data_start_row == INVALID_ROW)) {
+ image_data_start_row = mb_row;
+ }
+
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
switch (cm->bit_depth) {
@@ -961,6 +987,18 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
vp9_clear_system_state();
}
+ // Clamp the image start to rows/2. This number of rows is discarded top
+ // and bottom as dead data so rows / 2 means the frame is blank.
+ if ((image_data_start_row > cm->mb_rows / 2) ||
+ (image_data_start_row == INVALID_ROW)) {
+ image_data_start_row = cm->mb_rows / 2;
+ }
+ // Exclude any image dead zone
+ if (image_data_start_row > 0) {
+ intra_skip_count =
+ MAX(0, intra_skip_count - (image_data_start_row * cm->mb_cols * 2));
+ }
+
{
FIRSTPASS_STATS fps;
// The minimum error here insures some bit allocation to frames even
@@ -985,6 +1023,8 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
fps.pcnt_inter = (double)intercount / num_mbs;
fps.pcnt_second_ref = (double)second_ref_count / num_mbs;
fps.pcnt_neutral = (double)neutral_count / num_mbs;
+ fps.intra_skip_pct = (double)intra_skip_count / num_mbs;
+ fps.inactive_zone_rows = (double)image_data_start_row;
if (mvcount > 0) {
fps.MVr = (double)sum_mvr / mvcount;
@@ -1106,21 +1146,25 @@ static double calc_correction_factor(double err_per_mb,
static int get_twopass_worst_quality(const VP9_COMP *cpi,
const double section_err,
+ double inactive_zone,
int section_target_bandwidth,
double group_weight_factor) {
const RATE_CONTROL *const rc = &cpi->rc;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
+ inactive_zone = fclamp(inactive_zone, 0.0, 1.0);
+
if (section_target_bandwidth <= 0) {
return rc->worst_quality; // Highest value allowed
} else {
const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE)
? cpi->initial_mbs : cpi->common.MBs;
- const double err_per_mb = section_err / num_mbs;
+ const int active_mbs = MAX(1, num_mbs - (int)(num_mbs * inactive_zone));
+ const double av_err_per_mb = section_err / active_mbs;
const double speed_term = 1.0 + 0.04 * oxcf->speed;
- const double ediv_size_correction = num_mbs / EDIV_SIZE_FACTOR;
+ const double ediv_size_correction = (double)num_mbs / EDIV_SIZE_FACTOR;
const int target_norm_bits_per_mb = ((uint64_t)section_target_bandwidth <<
- BPER_MB_NORMBITS) / num_mbs;
+ BPER_MB_NORMBITS) / active_mbs;
int q;
int is_svc_upper_layer = 0;
@@ -1133,7 +1177,7 @@ static int get_twopass_worst_quality(const VP9_COMP *cpi,
// content at the given rate.
for (q = rc->best_quality; q < rc->worst_quality; ++q) {
const double factor =
- calc_correction_factor(err_per_mb,
+ calc_correction_factor(av_err_per_mb,
ERR_DIVISOR - ediv_size_correction,
is_svc_upper_layer ? SVC_FACTOR_PT_LOW :
FACTOR_PT_LOW, FACTOR_PT_HIGH, q,
@@ -1246,8 +1290,9 @@ void vp9_init_second_pass(VP9_COMP *cpi) {
twopass->modified_error_left = modified_error_total;
}
- // Reset the vbr bits off target counter
+ // Reset the vbr bits off target counters
cpi->rc.vbr_bits_off_target = 0;
+ cpi->rc.vbr_bits_off_target_fast = 0;
cpi->rc.rate_error_estimate = 0;
@@ -1411,6 +1456,8 @@ static double calc_frame_boost(VP9_COMP *cpi,
const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE)
? cpi->initial_mbs : cpi->common.MBs;
+ // TODO(paulwilkins): correct for dead zone
+
// Underlying boost factor is based on inter error ratio.
frame_boost = (BASELINE_ERR_PER_MB * num_mbs) /
DOUBLE_DIVIDE_CHECK(this_frame->coded_error);
@@ -1695,7 +1742,7 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
mid_frame_idx = frame_index + (rc->baseline_gf_interval >> 1) - 1;
// Allocate bits to the other frames in the group.
- for (i = 0; i < rc->baseline_gf_interval - 1; ++i) {
+ for (i = 0; i < rc->baseline_gf_interval - rc->source_alt_ref_pending; ++i) {
int arf_idx = 0;
if (EOF == input_stats(twopass, &frame_stats))
break;
@@ -1776,6 +1823,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
#if GROUP_ADAPTIVE_MAXQ
double gf_group_raw_error = 0.0;
#endif
+ double gf_group_skip_pct = 0.0;
+ double gf_group_inactive_zone_rows = 0.0;
double gf_first_frame_err = 0.0;
double mod_frame_err = 0.0;
@@ -1825,6 +1874,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
#if GROUP_ADAPTIVE_MAXQ
gf_group_raw_error -= this_frame->coded_error;
#endif
+ gf_group_skip_pct -= this_frame->intra_skip_pct;
+ gf_group_inactive_zone_rows -= this_frame->inactive_zone_rows;
}
// Motion breakout threshold for loop below depends on image size.
@@ -1869,6 +1920,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
#if GROUP_ADAPTIVE_MAXQ
gf_group_raw_error += this_frame->coded_error;
#endif
+ gf_group_skip_pct += this_frame->intra_skip_pct;
+ gf_group_inactive_zone_rows += this_frame->inactive_zone_rows;
if (EOF == input_stats(twopass, &next_frame))
break;
@@ -1933,8 +1986,26 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Was the group length constrained by the requirement for a new KF?
rc->constrained_gf_group = (i >= rc->frames_to_key) ? 1 : 0;
+ // Should we use the alternate reference frame.
+ if (allow_alt_ref &&
+ (i < cpi->oxcf.lag_in_frames) &&
+ (i >= rc->min_gf_interval)) {
+ // Calculate the boost for alt ref.
+ rc->gfu_boost = calc_arf_boost(cpi, 0, (i - 1), (i - 1), &f_boost,
+ &b_boost);
+ rc->source_alt_ref_pending = 1;
+
+ // Test to see if multi arf is appropriate.
+ cpi->multi_arf_enabled =
+ (cpi->multi_arf_allowed && (rc->baseline_gf_interval >= 6) &&
+ (zero_motion_accumulator < 0.995)) ? 1 : 0;
+ } else {
+ rc->gfu_boost = MAX((int)boost_score, MIN_ARF_GF_BOOST);
+ rc->source_alt_ref_pending = 0;
+ }
+
// Set the interval until the next gf.
- if (is_key_frame || rc->source_alt_ref_active)
+ if (is_key_frame || rc->source_alt_ref_pending)
rc->baseline_gf_interval = i - 1;
else
rc->baseline_gf_interval = i;
@@ -1953,30 +2024,14 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
#if GROUP_ADAPTIVE_MAXQ
gf_group_raw_error += this_frame->coded_error;
#endif
+ gf_group_skip_pct += this_frame->intra_skip_pct;
+ gf_group_inactive_zone_rows += this_frame->inactive_zone_rows;
}
rc->baseline_gf_interval = new_gf_interval;
}
rc->frames_till_gf_update_due = rc->baseline_gf_interval;
- // Should we use the alternate reference frame.
- if (allow_alt_ref &&
- (i < cpi->oxcf.lag_in_frames) &&
- (i >= rc->min_gf_interval)) {
- // Calculate the boost for alt ref.
- rc->gfu_boost = calc_arf_boost(cpi, 0, (i - 1), (i - 1), &f_boost,
- &b_boost);
- rc->source_alt_ref_pending = 1;
-
- // Test to see if multi arf is appropriate.
- cpi->multi_arf_enabled =
- (cpi->multi_arf_allowed && (rc->baseline_gf_interval >= 6) &&
- (zero_motion_accumulator < 0.995)) ? 1 : 0;
- } else {
- rc->gfu_boost = MAX((int)boost_score, MIN_ARF_GF_BOOST);
- rc->source_alt_ref_pending = 0;
- }
-
// Reset the file position.
reset_fpf_position(twopass, start_pos);
@@ -1993,6 +2048,12 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
const int vbr_group_bits_per_frame =
(int)(gf_group_bits / rc->baseline_gf_interval);
const double group_av_err = gf_group_raw_error / rc->baseline_gf_interval;
+ const double group_av_skip_pct =
+ gf_group_skip_pct / rc->baseline_gf_interval;
+ const double group_av_inactive_zone =
+ ((gf_group_inactive_zone_rows * 2) /
+ (rc->baseline_gf_interval * (double)cm->mb_rows));
+
int tmp_q;
// rc factor is a weight factor that corrects for local rate control drift.
double rc_factor = 1.0;
@@ -2004,7 +2065,9 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
(double)(100 - rc->rate_error_estimate) / 100.0);
}
tmp_q =
- get_twopass_worst_quality(cpi, group_av_err, vbr_group_bits_per_frame,
+ get_twopass_worst_quality(cpi, group_av_err,
+ (group_av_skip_pct + group_av_inactive_zone),
+ vbr_group_bits_per_frame,
twopass->kfgroup_inter_fraction * rc_factor);
twopass->active_worst_quality =
MAX(tmp_q, twopass->active_worst_quality >> 1);
@@ -2413,7 +2476,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
}
// Define the reference buffers that will be updated post encode.
-void configure_buffer_updates(VP9_COMP *cpi) {
+static void configure_buffer_updates(VP9_COMP *cpi) {
TWO_PASS *const twopass = &cpi->twopass;
cpi->rc.is_src_frame_alt_ref = 0;
@@ -2460,7 +2523,7 @@ void configure_buffer_updates(VP9_COMP *cpi) {
}
}
-int is_skippable_frame(const VP9_COMP *cpi) {
+static int is_skippable_frame(const VP9_COMP *cpi) {
// If the current frame does not have non-zero motion vector detected in the
// first pass, and so do its previous and forward frames, then this frame
// can be skipped for partition check, and the partition size is assigned
@@ -2543,10 +2606,17 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
// Special case code for first frame.
const int section_target_bandwidth = (int)(twopass->bits_left /
frames_left);
+ const double section_length = twopass->total_left_stats.count;
const double section_error =
- twopass->total_left_stats.coded_error / twopass->total_left_stats.count;
+ twopass->total_left_stats.coded_error / section_length;
+ const double section_intra_skip =
+ twopass->total_left_stats.intra_skip_pct / section_length;
+ const double section_inactive_zone =
+ (twopass->total_left_stats.inactive_zone_rows * 2) /
+ ((double)cm->mb_rows * section_length);
const int tmp_q =
get_twopass_worst_quality(cpi, section_error,
+ section_intra_skip + section_inactive_zone,
section_target_bandwidth, DEFAULT_GRP_WEIGHT);
twopass->active_worst_quality = tmp_q;
@@ -2562,6 +2632,12 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
if (EOF == input_stats(twopass, &this_frame))
return;
+ // Set the frame content type flag.
+ if (this_frame.intra_skip_pct >= FC_ANIMATION_THRESH)
+ twopass->fr_content_type = FC_GRAPHICS_ANIMATION;
+ else
+ twopass->fr_content_type = FC_NORMAL;
+
// Keyframe and section processing.
if (rc->frames_to_key == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY)) {
FIRSTPASS_STATS this_frame_copy;
@@ -2580,9 +2656,8 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
cpi->ref_frame_flags &=
(~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG);
lc->frames_from_key_frame = 0;
- // Reset the empty frame resolution since we have a key frame.
- cpi->svc.empty_frame_width = cm->width;
- cpi->svc.empty_frame_height = cm->height;
+ // Encode an intra only empty frame since we have a key frame.
+ cpi->svc.encode_intra_empty_frame = 1;
}
} else {
cm->frame_type = INTER_FRAME;
@@ -2649,6 +2724,7 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
#define MINQ_ADJ_LIMIT 48
#define MINQ_ADJ_LIMIT_CQ 20
+#define HIGH_UNDERSHOOT_RATIO 2
void vp9_twopass_postencode_update(VP9_COMP *cpi) {
TWO_PASS *const twopass = &cpi->twopass;
RATE_CONTROL *const rc = &cpi->rc;
@@ -2715,5 +2791,32 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi) {
twopass->extend_minq = clamp(twopass->extend_minq, 0, minq_adj_limit);
twopass->extend_maxq = clamp(twopass->extend_maxq, 0, maxq_adj_limit);
+
+ // If there is a big and undexpected undershoot then feed the extra
+ // bits back in quickly. One situation where this may happen is if a
+ // frame is unexpectedly almost perfectly predicted by the ARF or GF
+ // but not very well predcited by the previous frame.
+ if (!frame_is_kf_gf_arf(cpi) && !cpi->rc.is_src_frame_alt_ref) {
+ int fast_extra_thresh = rc->base_frame_target / HIGH_UNDERSHOOT_RATIO;
+ if (rc->projected_frame_size < fast_extra_thresh) {
+ rc->vbr_bits_off_target_fast +=
+ fast_extra_thresh - rc->projected_frame_size;
+ rc->vbr_bits_off_target_fast =
+ MIN(rc->vbr_bits_off_target_fast, (4 * rc->avg_frame_bandwidth));
+
+ // Fast adaptation of minQ if necessary to use up the extra bits.
+ if (rc->avg_frame_bandwidth) {
+ twopass->extend_minq_fast =
+ (int)(rc->vbr_bits_off_target_fast * 8 / rc->avg_frame_bandwidth);
+ }
+ twopass->extend_minq_fast = MIN(twopass->extend_minq_fast,
+ minq_adj_limit - twopass->extend_minq);
+ } else if (rc->vbr_bits_off_target_fast) {
+ twopass->extend_minq_fast = MIN(twopass->extend_minq_fast,
+ minq_adj_limit - twopass->extend_minq);
+ } else {
+ twopass->extend_minq_fast = 0;
+ }
+ }
}
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.h
index 08e7a8bf114..00479322d31 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.h
@@ -51,6 +51,8 @@ typedef struct {
double pcnt_motion;
double pcnt_second_ref;
double pcnt_neutral;
+ double intra_skip_pct;
+ double inactive_zone_rows; // Image mask rows top and bottom.
double MVr;
double mvr_abs;
double MVc;
@@ -73,6 +75,13 @@ typedef enum {
FRAME_UPDATE_TYPES = 5
} FRAME_UPDATE_TYPE;
+#define FC_ANIMATION_THRESH 0.15
+typedef enum {
+ FC_NORMAL = 0,
+ FC_GRAPHICS_ANIMATION = 1,
+ FRAME_CONTENT_TYPES = 2
+} FRAME_CONTENT_TYPE;
+
typedef struct {
unsigned char index;
RATE_FACTOR_LEVEL rf_level[(MAX_LAG_BUFFERS * 2) + 1];
@@ -103,6 +112,8 @@ typedef struct {
uint8_t *this_frame_mb_stats;
FIRSTPASS_MB_STATS firstpass_mb_stats;
#endif
+ // An indication of the content type of the current frame
+ FRAME_CONTENT_TYPE fr_content_type;
// Projected total bits available for a key frame group of frames
int64_t kf_group_bits;
@@ -122,6 +133,7 @@ typedef struct {
int baseline_active_worst_quality;
int extend_minq;
int extend_maxq;
+ int extend_minq_fast;
GF_GROUP gf_group;
} TWO_PASS;
@@ -135,6 +147,7 @@ void vp9_end_first_pass(struct VP9_COMP *cpi);
void vp9_init_second_pass(struct VP9_COMP *cpi);
void vp9_rc_get_second_pass_params(struct VP9_COMP *cpi);
+void vp9_twopass_postencode_update(struct VP9_COMP *cpi);
// Post encode update of the rate control parameters for 2-pass
void vp9_twopass_postencode_update(struct VP9_COMP *cpi);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c
index 80c509a1b49..081b99f9f1f 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c
@@ -13,10 +13,13 @@
#include <stdio.h>
#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
#include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
#include "vp9/common/vp9_common.h"
+#include "vp9/common/vp9_reconinter.h"
#include "vp9/encoder/vp9_encoder.h"
#include "vp9/encoder/vp9_mcomp.h"
@@ -159,9 +162,9 @@ void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
error_per_bit + 4096) >> 13 : 0)
-// convert motion vector component to offset for svf calc
+// convert motion vector component to offset for sv[a]f calc
static INLINE int sp(int x) {
- return (x & 7) << 1;
+ return x & 7;
}
static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
@@ -283,32 +286,32 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
bestmv->row *= 8; \
bestmv->col *= 8;
-static INLINE unsigned int setup_center_error(const MACROBLOCKD *xd,
- const MV *bestmv,
- const MV *ref_mv,
- int error_per_bit,
- const vp9_variance_fn_ptr_t *vfp,
- const uint8_t *const src,
- const int src_stride,
- const uint8_t *const y,
- int y_stride,
- const uint8_t *second_pred,
- int w, int h, int offset,
- int *mvjcost, int *mvcost[2],
- unsigned int *sse1,
- int *distortion) {
+static unsigned int setup_center_error(const MACROBLOCKD *xd,
+ const MV *bestmv,
+ const MV *ref_mv,
+ int error_per_bit,
+ const vp9_variance_fn_ptr_t *vfp,
+ const uint8_t *const src,
+ const int src_stride,
+ const uint8_t *const y,
+ int y_stride,
+ const uint8_t *second_pred,
+ int w, int h, int offset,
+ int *mvjcost, int *mvcost[2],
+ unsigned int *sse1,
+ int *distortion) {
unsigned int besterr;
#if CONFIG_VP9_HIGHBITDEPTH
if (second_pred != NULL) {
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]);
- vp9_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,
+ vpx_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,
y_stride);
besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride,
sse1);
} else {
DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
- vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
+ vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
}
} else {
@@ -320,7 +323,7 @@ static INLINE unsigned int setup_center_error(const MACROBLOCKD *xd,
(void) xd;
if (second_pred != NULL) {
DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
- vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
+ vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);
besterr = vfp->vf(comp_pred, w, src, src_stride, sse1);
} else {
besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1);
@@ -607,7 +610,7 @@ int vp9_find_best_sub_pixel_tree_pruned(const MACROBLOCK *x,
return besterr;
}
-const MV search_step_table[12] = {
+static const MV search_step_table[12] = {
// left, right, up, down
{0, -4}, {0, 4}, {-4, 0}, {4, 0},
{0, -2}, {0, 2}, {-2, 0}, {2, 0},
@@ -676,16 +679,14 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
tc = bc + search_step[idx].col;
if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
- int row_offset = (tr & 0x07) << 1;
- int col_offset = (tc & 0x07) << 1;
MV this_mv;
this_mv.row = tr;
this_mv.col = tc;
if (second_pred == NULL)
- thismse = vfp->svf(pre_address, y_stride, col_offset, row_offset,
+ thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, &sse);
else
- thismse = vfp->svaf(pre_address, y_stride, col_offset, row_offset,
+ thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, &sse, second_pred);
cost_array[idx] = thismse +
mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
@@ -706,14 +707,12 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
tr = br + (cost_array[2] < cost_array[3] ? -hstep : hstep);
if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
- int row_offset = (tr & 0x07) << 1;
- int col_offset = (tc & 0x07) << 1;
MV this_mv = {tr, tc};
if (second_pred == NULL)
- thismse = vfp->svf(pre_address, y_stride, col_offset, row_offset,
+ thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, &sse);
else
- thismse = vfp->svaf(pre_address, y_stride, col_offset, row_offset,
+ thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, &sse, second_pred);
cost_array[4] = thismse +
mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
@@ -1788,8 +1787,11 @@ static const MV search_pos[4] = {
};
unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
- BLOCK_SIZE bsize) {
+ BLOCK_SIZE bsize,
+ int mi_row, int mi_col) {
MACROBLOCKD *xd = &x->e_mbd;
+ MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
+ struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
DECLARE_ALIGNED(16, int16_t, hbuf[128]);
DECLARE_ALIGNED(16, int16_t, vbuf[128]);
DECLARE_ALIGNED(16, int16_t, src_hbuf[64]);
@@ -1806,12 +1808,34 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
unsigned int best_sad, tmp_sad, this_sad[4];
MV this_mv;
const int norm_factor = 3 + (bw >> 5);
+ const YV12_BUFFER_CONFIG *scaled_ref_frame =
+ vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]);
+
+ if (scaled_ref_frame) {
+ int i;
+ // Swap out the reference frame for a version that's been scaled to
+ // match the resolution of the current frame, allowing the existing
+ // motion search code to be used without additional modifications.
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ backup_yv12[i] = xd->plane[i].pre[0];
+ vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
+ }
#if CONFIG_VP9_HIGHBITDEPTH
- tmp_mv->row = 0;
- tmp_mv->col = 0;
- return cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, src_stride,
- xd->plane[0].pre[0].buf, ref_stride);
+ {
+ unsigned int this_sad;
+ tmp_mv->row = 0;
+ tmp_mv->col = 0;
+ this_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, src_stride,
+ xd->plane[0].pre[0].buf, ref_stride);
+
+ if (scaled_ref_frame) {
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ xd->plane[i].pre[0] = backup_yv12[i];
+ }
+ return this_sad;
+ }
#endif
// Set up prediction 1-D reference set
@@ -1889,6 +1913,12 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
tmp_mv->row *= 8;
tmp_mv->col *= 8;
+ if (scaled_ref_frame) {
+ int i;
+ for (i = 0; i < MAX_MB_PLANE; i++)
+ xd->plane[i].pre[0] = backup_yv12[i];
+ }
+
return best_sad;
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h
index dd8a4607942..99c1afa28ff 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.h
@@ -83,7 +83,8 @@ int vp9_full_pixel_diamond(const struct VP9_COMP *cpi, MACROBLOCK *x,
// Perform integral projection based motion estimation.
unsigned int vp9_int_pro_motion_estimation(const struct VP9_COMP *cpi,
MACROBLOCK *x,
- BLOCK_SIZE bsize);
+ BLOCK_SIZE bsize,
+ int mi_row, int mi_col);
typedef int (integer_mv_pattern_search_fn) (
const MACROBLOCK *x,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_picklpf.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_picklpf.c
index 5eb5d542b78..8e191038514 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_picklpf.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_picklpf.c
@@ -14,6 +14,7 @@
#include "./vpx_scale_rtcd.h"
#include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
#include "vp9/common/vp9_loopfilter.h"
#include "vp9/common/vp9_onyxc_int.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c
index 9fb7cfba7bf..3eaa99054ce 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c
@@ -14,8 +14,10 @@
#include <stdio.h>
#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
#include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_common.h"
@@ -23,6 +25,7 @@
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
+#include "vp9/common/vp9_scan.h"
#include "vp9/encoder/vp9_cost.h"
#include "vp9/encoder/vp9_encoder.h"
@@ -214,7 +217,7 @@ static void block_variance(const uint8_t *src, int src_stride,
for (i = 0; i < h; i += block_size) {
for (j = 0; j < w; j += block_size) {
- vp9_get8x8var(src + src_stride * i + j, src_stride,
+ vpx_get8x8var(src + src_stride * i + j, src_stride,
ref + ref_stride * i + j, ref_stride,
&sse8x8[k], &sum8x8[k]);
*sse += sse8x8[k];
@@ -294,13 +297,11 @@ static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize,
else
tx_size = TX_8X8;
- if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) {
- if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
- cyclic_refresh_segment_id_boosted(xd->mi[0]->mbmi.segment_id))
- tx_size = TX_8X8;
- else if (tx_size > TX_16X16)
- tx_size = TX_16X16;
- }
+ if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
+ cyclic_refresh_segment_id_boosted(xd->mi[0]->mbmi.segment_id))
+ tx_size = TX_8X8;
+ else if (tx_size > TX_16X16)
+ tx_size = TX_16X16;
} else {
tx_size = MIN(max_txsize_lookup[bsize],
tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
@@ -478,13 +479,11 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
else
xd->mi[0]->mbmi.tx_size = TX_8X8;
- if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) {
- if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
- cyclic_refresh_segment_id_boosted(xd->mi[0]->mbmi.segment_id))
- xd->mi[0]->mbmi.tx_size = TX_8X8;
- else if (xd->mi[0]->mbmi.tx_size > TX_16X16)
- xd->mi[0]->mbmi.tx_size = TX_16X16;
- }
+ if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
+ cyclic_refresh_segment_id_boosted(xd->mi[0]->mbmi.segment_id))
+ xd->mi[0]->mbmi.tx_size = TX_8X8;
+ else if (xd->mi[0]->mbmi.tx_size > TX_16X16)
+ xd->mi[0]->mbmi.tx_size = TX_16X16;
} else {
xd->mi[0]->mbmi.tx_size =
MIN(max_txsize_lookup[bsize],
@@ -659,7 +658,8 @@ static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist,
block = 0;
*rate = 0;
*dist = 0;
- *sse = (*sse << 6) >> shift;
+ if (*sse < INT64_MAX)
+ *sse = (*sse << 6) >> shift;
for (r = 0; r < max_blocks_high; r += block_step) {
for (c = 0; c < num_4x4_w; c += block_step) {
if (c < max_blocks_wide) {
@@ -1059,6 +1059,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int mi_row, int mi_col, RD_COST *rd_cost,
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
VP9_COMMON *const cm = &cpi->common;
+ SPEED_FEATURES *const sf = &cpi->sf;
TileInfo *const tile_info = &tile_data->tile_info;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
@@ -1078,9 +1079,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
unsigned int var_y = UINT_MAX;
unsigned int sse_y = UINT_MAX;
// Reduce the intra cost penalty for small blocks (<=16x16).
- const int reduction_fac =
- (cpi->sf.partition_search_type == VAR_BASED_PARTITION &&
- bsize <= BLOCK_16X16) ? ((bsize <= BLOCK_8X8) ? 4 : 2) : 0;
+ const int reduction_fac = (bsize <= BLOCK_16X16) ?
+ ((bsize <= BLOCK_8X8) ? 4 : 2) : 0;
const int intra_cost_penalty = vp9_get_intra_cost_penalty(
cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth) >> reduction_fac;
const int64_t inter_mode_thresh = RDCOST(x->rdmult, x->rddiv,
@@ -1180,7 +1180,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (cm->use_prev_frame_mvs)
vp9_find_mv_refs(cm, xd, tile_info, xd->mi[0], ref_frame,
- candidates, mi_row, mi_col, NULL, NULL);
+ candidates, mi_row, mi_col, NULL, NULL,
+ xd->mi[0]->mbmi.mode_context);
else
const_motion[ref_frame] = mv_refs_rt(cm, xd, tile_info,
xd->mi[0],
@@ -1219,7 +1220,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
continue;
i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME;
- if (cpi->ref_frame_flags & flag_list[i])
+ if ((cpi->ref_frame_flags & flag_list[i]) && sf->reference_masking)
if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1))
ref_frame_skip_mask |= (1 << ref_frame);
if (ref_frame_skip_mask & (1 << ref_frame))
@@ -1247,7 +1248,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (bsize < BLOCK_16X16)
continue;
- tmp_sad = vp9_int_pro_motion_estimation(cpi, x, bsize);
+ tmp_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col);
if (tmp_sad > x->pred_mv_sad[LAST_FRAME])
continue;
@@ -1594,7 +1595,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (cpi->sf.adaptive_rd_thresh) {
THR_MODES best_mode_idx = mode_idx[best_ref_frame][mode_offset(mbmi->mode)];
- PREDICTION_MODE this_mode;
if (best_ref_frame == INTRA_FRAME) {
// Only consider the modes that are included in the intra_mode_list.
@@ -1604,12 +1604,12 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
// TODO(yunqingwang): Check intra mode mask and only update freq_fact
// for those valid modes.
for (i = 0; i < intra_modes; i++) {
- PREDICTION_MODE this_mode = intra_mode_list[i];
update_thresh_freq_fact(cpi, tile_data, bsize, INTRA_FRAME,
- best_mode_idx, this_mode);
+ best_mode_idx, intra_mode_list[i]);
}
} else {
for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ++ref_frame) {
+ PREDICTION_MODE this_mode;
if (best_ref_frame != ref_frame) continue;
for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
update_thresh_freq_fact(cpi, tile_data, bsize, ref_frame,
@@ -1660,7 +1660,8 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col,
sf, sf);
vp9_find_mv_refs(cm, xd, tile_info, xd->mi[0], ref_frame,
- candidates, mi_row, mi_col, NULL, NULL);
+ candidates, mi_row, mi_col, NULL, NULL,
+ xd->mi[0]->mbmi.mode_context);
vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
&dummy_mv[0], &dummy_mv[1]);
@@ -1692,8 +1693,8 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
// If the segment reference frame feature is enabled....
// then do nothing if the current ref frame is not allowed..
- if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
- vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame)
+ if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
+ get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame)
continue;
mbmi->ref_frame[0] = ref_frame;
@@ -1734,7 +1735,8 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
b_mv[NEWMV].as_int = INVALID_MV;
vp9_append_sub8x8_mvs_for_idx(cm, xd, tile_info, i, 0, mi_row, mi_col,
&b_mv[NEARESTMV],
- &b_mv[NEARMV]);
+ &b_mv[NEARMV],
+ xd->mi[0]->mbmi.mode_context);
for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
int b_rate = 0;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_quantize.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_quantize.c
index 3c07e2c2437..e6e17c073e3 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_quantize.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_quantize.c
@@ -11,6 +11,7 @@
#include <math.h>
#include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
#include "vp9/common/vp9_quant_common.h"
#include "vp9/common/vp9_seg_common.h"
@@ -677,7 +678,7 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) {
x->plane[i].quant_thred[1] = x->plane[i].zbin[1] * x->plane[i].zbin[1];
}
- x->skip_block = vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP);
+ x->skip_block = segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP);
x->q_index = qindex;
x->errorperbit = rdmult >> 6;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c
index 4c33ffd977b..85003f65ef1 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c
@@ -16,6 +16,7 @@
#include <string.h>
#include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
#include "vp9/common/vp9_alloccommon.h"
#include "vp9/encoder/vp9_aq_cyclicrefresh.h"
@@ -136,7 +137,7 @@ static void init_minq_luts(int *kf_low_m, int *kf_high_m,
}
}
-void vp9_rc_init_minq_luts() {
+void vp9_rc_init_minq_luts(void) {
init_minq_luts(kf_low_motion_minq_8, kf_high_motion_minq_8,
arfgf_low_motion_minq_8, arfgf_high_motion_minq_8,
inter_minq_8, rtc_minq_8, VPX_BITS_8);
@@ -233,13 +234,16 @@ int vp9_rc_clamp_iframe_target_size(const VP9_COMP *const cpi, int target) {
return target;
}
-// Update the buffer level for higher layers, given the encoded current layer.
+// Update the buffer level for higher temporal layers, given the encoded current
+// temporal layer.
static void update_layer_buffer_level(SVC *svc, int encoded_frame_size) {
- int temporal_layer = 0;
+ int i = 0;
int current_temporal_layer = svc->temporal_layer_id;
- for (temporal_layer = current_temporal_layer + 1;
- temporal_layer < svc->number_temporal_layers; ++temporal_layer) {
- LAYER_CONTEXT *lc = &svc->layer_context[temporal_layer];
+ for (i = current_temporal_layer + 1;
+ i < svc->number_temporal_layers; ++i) {
+ const int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, i,
+ svc->number_temporal_layers);
+ LAYER_CONTEXT *lc = &svc->layer_context[layer];
RATE_CONTROL *lrc = &lc->rc;
int bits_off_for_this_layer = (int)(lc->target_bandwidth / lc->framerate -
encoded_frame_size);
@@ -267,7 +271,7 @@ static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) {
rc->bits_off_target = MIN(rc->bits_off_target, rc->maximum_buffer_size);
rc->buffer_level = rc->bits_off_target;
- if (cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR) {
+ if (is_one_pass_cbr_svc(cpi)) {
update_layer_buffer_level(&cpi->svc, encoded_frame_size);
}
}
@@ -491,7 +495,10 @@ int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame,
i = active_best_quality;
do {
- if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) {
+ if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
+ cm->seg.enabled &&
+ cpi->svc.temporal_layer_id == 0 &&
+ cpi->svc.spatial_layer_id == 0) {
bits_per_mb_at_this_q =
(int)vp9_cyclic_refresh_rc_bits_per_mb(cpi, i, correction_factor);
} else {
@@ -1057,10 +1064,12 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi,
if (frame_is_intra_only(cm) ||
(!rc->is_src_frame_alt_ref &&
(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) {
- active_best_quality -= cpi->twopass.extend_minq;
+ active_best_quality -=
+ (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast);
active_worst_quality += (cpi->twopass.extend_maxq / 2);
} else {
- active_best_quality -= cpi->twopass.extend_minq / 2;
+ active_best_quality -=
+ (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast) / 2;
active_worst_quality += cpi->twopass.extend_maxq;
}
}
@@ -1203,11 +1212,9 @@ static void update_golden_frame_stats(VP9_COMP *cpi) {
// this frame refreshes means next frames don't unless specified by user
rc->frames_since_golden = 0;
- if (cpi->oxcf.pass == 2) {
- if (!rc->source_alt_ref_pending &&
- cpi->twopass.gf_group.rf_level[0] == GF_ARF_STD)
- rc->source_alt_ref_active = 0;
- } else if (!rc->source_alt_ref_pending) {
+ // If we are not using alt ref in the up and coming group clear the arf
+ // active flag.
+ if (!rc->source_alt_ref_pending) {
rc->source_alt_ref_active = 0;
}
@@ -1414,13 +1421,14 @@ static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
} else {
target = rc->avg_frame_bandwidth;
}
- if (svc->number_temporal_layers > 1 &&
- oxcf->rc_mode == VPX_CBR) {
+ if (is_one_pass_cbr_svc(cpi)) {
// Note that for layers, avg_frame_bandwidth is the cumulative
// per-frame-bandwidth. For the target size of this frame, use the
// layer average frame size (i.e., non-cumulative per-frame-bw).
- int current_temporal_layer = svc->temporal_layer_id;
- const LAYER_CONTEXT *lc = &svc->layer_context[current_temporal_layer];
+ int layer =
+ LAYER_IDS_TO_IDX(svc->spatial_layer_id,
+ svc->temporal_layer_id, svc->number_temporal_layers);
+ const LAYER_CONTEXT *lc = &svc->layer_context[layer];
target = lc->avg_frame_size;
min_frame_target = MAX(lc->avg_frame_size >> 4, FRAME_OVERHEAD_BITS);
}
@@ -1455,7 +1463,9 @@ static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
if (svc->number_temporal_layers > 1 &&
oxcf->rc_mode == VPX_CBR) {
// Use the layer framerate for temporal layers CBR mode.
- const LAYER_CONTEXT *lc = &svc->layer_context[svc->temporal_layer_id];
+ const int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id,
+ svc->temporal_layer_id, svc->number_temporal_layers);
+ const LAYER_CONTEXT *lc = &svc->layer_context[layer];
framerate = lc->framerate;
}
kf_boost = MAX(kf_boost, (int)(2 * framerate - 16));
@@ -1468,10 +1478,27 @@ static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
return vp9_rc_clamp_iframe_target_size(cpi, target);
}
+// Reset information needed to set proper reference frames and buffer updates
+// for temporal layering. This is called when a key frame is encoded.
+static void reset_temporal_layer_to_zero(VP9_COMP *cpi) {
+ int sl;
+ LAYER_CONTEXT *lc = NULL;
+ cpi->svc.temporal_layer_id = 0;
+
+ for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) {
+ lc = &cpi->svc.layer_context[sl * cpi->svc.number_temporal_layers];
+ lc->current_video_frame_in_layer = 0;
+ lc->frames_from_key_frame = 0;
+ }
+}
+
void vp9_rc_get_svc_params(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
int target = rc->avg_frame_bandwidth;
+ const int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id,
+ cpi->svc.temporal_layer_id, cpi->svc.number_temporal_layers);
+
if ((cm->current_video_frame == 0) ||
(cpi->frame_flags & FRAMEFLAGS_KEY) ||
(cpi->oxcf.auto_key && (rc->frames_since_key %
@@ -1480,30 +1507,39 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) {
rc->source_alt_ref_active = 0;
if (is_two_pass_svc(cpi)) {
- cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame = 1;
+ cpi->svc.layer_context[layer].is_key_frame = 1;
cpi->ref_frame_flags &=
(~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG);
- }
-
- if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR) {
+ } else if (is_one_pass_cbr_svc(cpi)) {
+ cpi->svc.layer_context[layer].is_key_frame = 1;
+ reset_temporal_layer_to_zero(cpi);
+ cpi->ref_frame_flags &=
+ (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG);
+ // Assumption here is that LAST_FRAME is being updated for a keyframe.
+ // Thus no change in update flags.
target = calc_iframe_target_size_one_pass_cbr(cpi);
}
} else {
cm->frame_type = INTER_FRAME;
-
if (is_two_pass_svc(cpi)) {
- LAYER_CONTEXT *lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id];
+ LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
if (cpi->svc.spatial_layer_id == 0) {
lc->is_key_frame = 0;
} else {
- lc->is_key_frame = cpi->svc.layer_context[0].is_key_frame;
+ lc->is_key_frame =
+ cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame;
if (lc->is_key_frame)
cpi->ref_frame_flags &= (~VP9_LAST_FLAG);
}
cpi->ref_frame_flags &= (~VP9_ALT_FLAG);
- }
-
- if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR) {
+ } else if (is_one_pass_cbr_svc(cpi)) {
+ LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
+ if (cpi->svc.spatial_layer_id == 0) {
+ lc->is_key_frame = 0;
+ } else {
+ lc->is_key_frame =
+ cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame;
+ }
target = calc_pframe_target_size_one_pass_cbr(cpi);
}
}
@@ -1560,6 +1596,10 @@ void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) {
target = calc_pframe_target_size_one_pass_cbr(cpi);
vp9_rc_set_frame_target(cpi, target);
+ if (cpi->oxcf.resize_mode == RESIZE_DYNAMIC)
+ cpi->resize_state = vp9_resize_one_pass_cbr(cpi);
+ else
+ cpi->resize_state = 0;
}
int vp9_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget,
@@ -1669,9 +1709,9 @@ void vp9_rc_update_framerate(VP9_COMP *cpi) {
#define VBR_PCT_ADJUSTMENT_LIMIT 50
// For VBR...adjustment to the frame target based on error from previous frames
-static void vbr_rate_correction(VP9_COMP *cpi,
- int *this_frame_target,
- int64_t vbr_bits_off_target) {
+static void vbr_rate_correction(VP9_COMP *cpi, int *this_frame_target) {
+ RATE_CONTROL *const rc = &cpi->rc;
+ int64_t vbr_bits_off_target = rc->vbr_bits_off_target;
int max_delta;
double position_factor = 1.0;
@@ -1695,6 +1735,20 @@ static void vbr_rate_correction(VP9_COMP *cpi,
(vbr_bits_off_target < -max_delta) ? max_delta
: (int)-vbr_bits_off_target;
}
+
+ // Fast redistribution of bits arising from massive local undershoot.
+ // Dont do it for kf,arf,gf or overlay frames.
+ if (!frame_is_kf_gf_arf(cpi) && !rc->is_src_frame_alt_ref &&
+ rc->vbr_bits_off_target_fast) {
+ int one_frame_bits = MAX(rc->avg_frame_bandwidth, *this_frame_target);
+ int fast_extra_bits;
+ fast_extra_bits =
+ (int)MIN(rc->vbr_bits_off_target_fast, one_frame_bits);
+ fast_extra_bits = (int)MIN(fast_extra_bits,
+ MAX(one_frame_bits / 8, rc->vbr_bits_off_target_fast / 8));
+ *this_frame_target += (int)fast_extra_bits;
+ rc->vbr_bits_off_target_fast -= fast_extra_bits;
+ }
}
void vp9_set_target_rate(VP9_COMP *cpi) {
@@ -1703,6 +1757,95 @@ void vp9_set_target_rate(VP9_COMP *cpi) {
// Correction to rate target based on prior over or under shoot.
if (cpi->oxcf.rc_mode == VPX_VBR || cpi->oxcf.rc_mode == VPX_CQ)
- vbr_rate_correction(cpi, &target_rate, rc->vbr_bits_off_target);
+ vbr_rate_correction(cpi, &target_rate);
vp9_rc_set_frame_target(cpi, target_rate);
}
+
+// Check if we should resize, based on average QP from past x frames.
+// Only allow for resize at most one scale down for now, scaling factor is 2.
+int vp9_resize_one_pass_cbr(VP9_COMP *cpi) {
+ const VP9_COMMON *const cm = &cpi->common;
+ RATE_CONTROL *const rc = &cpi->rc;
+ int resize_now = 0;
+ cpi->resize_scale_num = 1;
+ cpi->resize_scale_den = 1;
+ // Don't resize on key frame; reset the counters on key frame.
+ if (cm->frame_type == KEY_FRAME) {
+ cpi->resize_avg_qp = 0;
+ cpi->resize_count = 0;
+ return 0;
+ }
+ // Resize based on average QP over some window.
+ // Ignore samples close to key frame, since QP is usually high after key.
+ if (cpi->rc.frames_since_key > 2 * cpi->framerate) {
+ const int window = (int)(5 * cpi->framerate);
+ cpi->resize_avg_qp += cm->base_qindex;
+ if (cpi->rc.buffer_level < 0)
+ ++cpi->resize_buffer_underflow;
+ ++cpi->resize_count;
+ // Check for resize action every "window" frames.
+ if (cpi->resize_count >= window) {
+ int avg_qp = cpi->resize_avg_qp / cpi->resize_count;
+ // Resize down if buffer level has underflowed sufficent amount in past
+ // window, and we are at original resolution.
+ // Resize back up if average QP is low, and we are currently in a resized
+ // down state.
+ if (cpi->resize_state == 0 &&
+ cpi->resize_buffer_underflow > (cpi->resize_count >> 3)) {
+ resize_now = 1;
+ } else if (cpi->resize_state == 1 &&
+ avg_qp < 40 * cpi->rc.worst_quality / 100) {
+ resize_now = -1;
+ }
+ // Reset for next window measurement.
+ cpi->resize_avg_qp = 0;
+ cpi->resize_count = 0;
+ cpi->resize_buffer_underflow = 0;
+ }
+ }
+ // If decision is to resize, reset some quantities, and check is we should
+ // reduce rate correction factor,
+ if (resize_now != 0) {
+ int target_bits_per_frame;
+ int active_worst_quality;
+ int qindex;
+ int tot_scale_change;
+ // For now, resize is by 1/2 x 1/2.
+ cpi->resize_scale_num = 1;
+ cpi->resize_scale_den = 2;
+ tot_scale_change = (cpi->resize_scale_den * cpi->resize_scale_den) /
+ (cpi->resize_scale_num * cpi->resize_scale_num);
+ // Reset buffer level to optimal, update target size.
+ rc->buffer_level = rc->optimal_buffer_level;
+ rc->bits_off_target = rc->optimal_buffer_level;
+ rc->this_frame_target = calc_pframe_target_size_one_pass_cbr(cpi);
+ // Reset cyclic refresh parameters.
+ if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled)
+ vp9_cyclic_refresh_reset_resize(cpi);
+ // Get the projected qindex, based on the scaled target frame size (scaled
+ // so target_bits_per_mb in vp9_rc_regulate_q will be correct target).
+ target_bits_per_frame = (resize_now == 1) ?
+ rc->this_frame_target * tot_scale_change :
+ rc->this_frame_target / tot_scale_change;
+ active_worst_quality = calc_active_worst_quality_one_pass_cbr(cpi);
+ qindex = vp9_rc_regulate_q(cpi,
+ target_bits_per_frame,
+ rc->best_quality,
+ active_worst_quality);
+ // If resize is down, check if projected q index is close to worst_quality,
+ // and if so, reduce the rate correction factor (since likely can afford
+ // lower q for resized frame).
+ if (resize_now == 1 &&
+ qindex > 90 * cpi->rc.worst_quality / 100) {
+ rc->rate_correction_factors[INTER_NORMAL] *= 0.85;
+ }
+ // If resize is back up, check if projected q index is too much above the
+ // current base_qindex, and if so, reduce the rate correction factor
+ // (since prefer to keep q for resized frame at least close to previous q).
+ if (resize_now == -1 &&
+ qindex > 130 * cm->base_qindex / 100) {
+ rc->rate_correction_factors[INTER_NORMAL] *= 0.9;
+ }
+ }
+ return resize_now;
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.h
index 869f6e59e97..a10836c7444 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.h
@@ -100,6 +100,7 @@ typedef struct {
int64_t buffer_level;
int64_t bits_off_target;
int64_t vbr_bits_off_target;
+ int64_t vbr_bits_off_target_fast;
int decimation_factor;
int decimation_count;
@@ -152,7 +153,7 @@ int vp9_estimate_bits_at_q(FRAME_TYPE frame_kind, int q, int mbs,
double vp9_convert_qindex_to_q(int qindex, vpx_bit_depth_t bit_depth);
-void vp9_rc_init_minq_luts();
+void vp9_rc_init_minq_luts(void);
// Generally at the high level, the following flow is expected
// to be enforced for rate control:
@@ -244,6 +245,8 @@ void vp9_rc_set_gf_interval_range(const struct VP9_COMP *const cpi,
void vp9_set_target_rate(struct VP9_COMP *cpi);
+int vp9_resize_one_pass_cbr(struct VP9_COMP *cpi);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.c
index 194001c51a2..90ee1e44a1b 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.c
@@ -15,6 +15,7 @@
#include "./vp9_rtcd.h"
#include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_entropy.h"
@@ -128,7 +129,7 @@ static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range,
}
}
-void vp9_init_me_luts() {
+void vp9_init_me_luts(void) {
init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE,
VPX_BITS_8);
#if CONFIG_VP9_HIGHBITDEPTH
@@ -264,6 +265,7 @@ static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
void vp9_initialize_rd_consts(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->td.mb;
+ MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
RD_OPT *const rd = &cpi->rd;
int i;
@@ -279,6 +281,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
cm->frame_type != KEY_FRAME) ? 0 : 1;
set_block_thresholds(cm, rd);
+ set_partition_probs(cm, xd);
if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME)
fill_token_costs(x->token_costs, cm->fc->coef_probs);
@@ -286,7 +289,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
if (cpi->sf.partition_search_type != VAR_BASED_PARTITION ||
cm->frame_type == KEY_FRAME) {
for (i = 0; i < PARTITION_CONTEXTS; ++i)
- vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(cm, i),
+ vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(xd, i),
vp9_partition_tree);
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h
index 4d247342b0a..7ba2568fe68 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rd.h
@@ -150,7 +150,7 @@ int16_t* vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize,
YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const struct VP9_COMP *cpi,
int ref_frame);
-void vp9_init_me_luts();
+void vp9_init_me_luts(void);
void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
const struct macroblockd_plane *pd,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c
index 73825623748..162d4de5f5d 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c
@@ -14,6 +14,7 @@
#include "./vp9_rtcd.h"
#include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
#include "vp9/common/vp9_common.h"
#include "vp9/common/vp9_entropy.h"
@@ -24,6 +25,7 @@
#include "vp9/common/vp9_quant_common.h"
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
+#include "vp9/common/vp9_scan.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/common/vp9_systemdependent.h"
@@ -1802,7 +1804,8 @@ static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
frame_mv[ZEROMV][frame].as_int = 0;
vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, i, ref, mi_row, mi_col,
&frame_mv[NEARESTMV][frame],
- &frame_mv[NEARMV][frame]);
+ &frame_mv[NEARMV][frame],
+ xd->mi[0]->mbmi.mode_context);
}
// search for the best motion vector on this segment
@@ -2117,8 +2120,8 @@ static void estimate_ref_frame_costs(const VP9_COMMON *cm,
unsigned int *ref_costs_single,
unsigned int *ref_costs_comp,
vp9_prob *comp_mode_p) {
- int seg_ref_active = vp9_segfeature_active(&cm->seg, segment_id,
- SEG_LVL_REF_FRAME);
+ int seg_ref_active = segfeature_active(&cm->seg, segment_id,
+ SEG_LVL_REF_FRAME);
if (seg_ref_active) {
memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
@@ -2218,7 +2221,7 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
// Gets an initial list of candidate vectors from neighbours and orders them
vp9_find_mv_refs(cm, xd, tile, mi, ref_frame, candidates, mi_row, mi_col,
- NULL, NULL);
+ NULL, NULL, xd->mi[0]->mbmi.mode_context);
// Candidate refinement carried out at encoder and decoder
vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
@@ -3004,8 +3007,8 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi,
}
// If the segment reference frame feature is enabled....
// then do nothing if the current ref frame is not allowed..
- if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
- vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
+ if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
+ get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
ref_frame_skip_mask[0] |= (1 << ref_frame);
ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
}
@@ -3014,7 +3017,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi,
// Disable this drop out case if the ref frame
// segment level feature is enabled for this segment. This is to
// prevent the possibility that we end up unable to pick any mode.
- if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
+ if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
// Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
// unless ARNR filtering is enabled in which case we want
// an unfiltered alternative. We allow near/nearest as well
@@ -3193,7 +3196,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi,
// Do not allow compound prediction if the segment level reference frame
// feature is in use as in this case there can only be one reference.
- if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
+ if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
continue;
if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
@@ -3635,7 +3638,7 @@ void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi,
rd_cost->rate = INT_MAX;
- assert(vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
+ assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
mbmi->mode = ZEROMV;
mbmi->uv_mode = DC_PRED;
@@ -3847,7 +3850,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi,
continue;
// Do not allow compound prediction if the segment level reference frame
// feature is in use as in this case there can only be one reference.
- if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
+ if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
continue;
if ((sf->mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
@@ -3872,13 +3875,13 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi,
// If the segment reference frame feature is enabled....
// then do nothing if the current ref frame is not allowed..
- if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
- vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
+ if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
+ get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
continue;
// Disable this drop out case if the ref frame
// segment level feature is enabled for this segment. This is to
// prevent the possibility that we end up unable to pick any mode.
- } else if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
+ } else if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
// Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
// unless ARNR filtering is enabled in which case we want
// an unfiltered alternative. We allow near/nearest as well
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_resize.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_resize.c
index 2ebdff291d6..f46cad80491 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_resize.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_resize.c
@@ -15,6 +15,7 @@
#include <stdlib.h>
#include <string.h>
+#include "vpx_ports/mem.h"
#include "vp9/common/vp9_common.h"
#include "vp9/encoder/vp9_resize.h"
@@ -28,7 +29,7 @@
typedef int16_t interp_kernel[INTERP_TAPS];
// Filters for interpolation (0.5-band) - note this also filters integer pels.
-const interp_kernel vp9_filteredinterp_filters500[(1 << SUBPEL_BITS)] = {
+static const interp_kernel filteredinterp_filters500[(1 << SUBPEL_BITS)] = {
{-3, 0, 35, 64, 35, 0, -3, 0},
{-3, -1, 34, 64, 36, 1, -3, 0},
{-3, -1, 32, 64, 38, 1, -3, 0},
@@ -64,7 +65,7 @@ const interp_kernel vp9_filteredinterp_filters500[(1 << SUBPEL_BITS)] = {
};
// Filters for interpolation (0.625-band) - note this also filters integer pels.
-const interp_kernel vp9_filteredinterp_filters625[(1 << SUBPEL_BITS)] = {
+static const interp_kernel filteredinterp_filters625[(1 << SUBPEL_BITS)] = {
{-1, -8, 33, 80, 33, -8, -1, 0},
{-1, -8, 30, 80, 35, -8, -1, 1},
{-1, -8, 28, 80, 37, -7, -2, 1},
@@ -100,7 +101,7 @@ const interp_kernel vp9_filteredinterp_filters625[(1 << SUBPEL_BITS)] = {
};
// Filters for interpolation (0.75-band) - note this also filters integer pels.
-const interp_kernel vp9_filteredinterp_filters750[(1 << SUBPEL_BITS)] = {
+static const interp_kernel filteredinterp_filters750[(1 << SUBPEL_BITS)] = {
{2, -11, 25, 96, 25, -11, 2, 0},
{2, -11, 22, 96, 28, -11, 2, 0},
{2, -10, 19, 95, 31, -11, 2, 0},
@@ -136,7 +137,7 @@ const interp_kernel vp9_filteredinterp_filters750[(1 << SUBPEL_BITS)] = {
};
// Filters for interpolation (0.875-band) - note this also filters integer pels.
-const interp_kernel vp9_filteredinterp_filters875[(1 << SUBPEL_BITS)] = {
+static const interp_kernel filteredinterp_filters875[(1 << SUBPEL_BITS)] = {
{3, -8, 13, 112, 13, -8, 3, 0},
{3, -7, 10, 112, 17, -9, 3, -1},
{2, -6, 7, 111, 21, -9, 3, -1},
@@ -172,7 +173,7 @@ const interp_kernel vp9_filteredinterp_filters875[(1 << SUBPEL_BITS)] = {
};
// Filters for interpolation (full-band) - no filtering for integer pixels
-const interp_kernel vp9_filteredinterp_filters1000[(1 << SUBPEL_BITS)] = {
+static const interp_kernel filteredinterp_filters1000[(1 << SUBPEL_BITS)] = {
{0, 0, 0, 128, 0, 0, 0, 0},
{0, 1, -3, 128, 3, -1, 0, 0},
{-1, 2, -6, 127, 7, -2, 1, 0},
@@ -214,15 +215,15 @@ static const int16_t vp9_down2_symodd_half_filter[] = {64, 35, 0, -3};
static const interp_kernel *choose_interp_filter(int inlength, int outlength) {
int outlength16 = outlength * 16;
if (outlength16 >= inlength * 16)
- return vp9_filteredinterp_filters1000;
+ return filteredinterp_filters1000;
else if (outlength16 >= inlength * 13)
- return vp9_filteredinterp_filters875;
+ return filteredinterp_filters875;
else if (outlength16 >= inlength * 11)
- return vp9_filteredinterp_filters750;
+ return filteredinterp_filters750;
else if (outlength16 >= inlength * 9)
- return vp9_filteredinterp_filters625;
+ return filteredinterp_filters625;
else
- return vp9_filteredinterp_filters500;
+ return filteredinterp_filters500;
}
static void interpolate(const uint8_t *const input, int inlength,
@@ -427,7 +428,7 @@ static int get_down2_length(int length, int steps) {
return length;
}
-int get_down2_steps(int in_length, int out_length) {
+static int get_down2_steps(int in_length, int out_length) {
int steps = 0;
int proj_in_length;
while ((proj_in_length = get_down2_length(in_length, 1)) >= out_length) {
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c
index 4f93578326b..6c6c4ed3022 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c
@@ -19,9 +19,33 @@ static int frame_is_boosted(const VP9_COMP *cpi) {
return frame_is_kf_gf_arf(cpi) || vp9_is_upper_layer_key_frame(cpi);
}
-static void set_good_speed_feature_framesize_dependent(VP9_COMMON *cm,
+// Sets a partition size down to which the auto partition code will always
+// search (can go lower), based on the image dimensions. The logic here
+// is that the extent to which ringing artefacts are offensive, depends
+// partly on the screen area that over which they propogate. Propogation is
+// limited by transform block size but the screen area take up by a given block
+// size will be larger for a small image format stretched to full screen.
+static BLOCK_SIZE set_partition_min_limit(VP9_COMMON *const cm) {
+ unsigned int screen_area = (cm->width * cm->height);
+
+ // Select block size based on image format size.
+ if (screen_area < 1280 * 720) {
+ // Formats smaller in area than 720P
+ return BLOCK_4X4;
+ } else if (screen_area < 1920 * 1080) {
+ // Format >= 720P and < 1080P
+ return BLOCK_8X8;
+ } else {
+ // Formats 1080P and up
+ return BLOCK_16X16;
+ }
+}
+
+static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi,
SPEED_FEATURES *sf,
int speed) {
+ VP9_COMMON *const cm = &cpi->common;
+
if (speed >= 1) {
if (MIN(cm->width, cm->height) >= 720) {
sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT
@@ -45,6 +69,7 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMMON *cm,
sf->partition_search_breakout_dist_thr = (1 << 22);
sf->partition_search_breakout_rate_thr = 100;
}
+ sf->rd_auto_partition_min_limit = set_partition_min_limit(cm);
}
if (speed >= 3) {
@@ -62,6 +87,13 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMMON *cm,
}
}
+ // If this is a two pass clip that fits the criteria for animated or
+ // graphics content then reset disable_split_mask for speeds 1-4.
+ if ((speed >= 1) && (cpi->oxcf.pass == 2) &&
+ (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION)) {
+ sf->disable_split_mask = DISABLE_COMPOUND_SPLIT;
+ }
+
if (speed >= 4) {
if (MIN(cm->width, cm->height) >= 720) {
sf->partition_search_breakout_dist_thr = (1 << 26);
@@ -72,29 +104,6 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMMON *cm,
}
}
-// Sets a partition size down to which the auto partition code will always
-// search (can go lower), based on the image dimensions. The logic here
-// is that the extent to which ringing artefacts are offensive, depends
-// partly on the screen area that over which they propogate. Propogation is
-// limited by transform block size but the screen area take up by a given block
-// size will be larger for a small image format stretched to full screen.
-static BLOCK_SIZE set_partition_min_limit(VP9_COMP *cpi) {
- VP9_COMMON *const cm = &cpi->common;
- unsigned int screen_area = (cm->width * cm->height);
-
- // Select block size based on image format size.
- if (screen_area < 1280 * 720) {
- // Formats smaller in area than 720P
- return BLOCK_4X4;
- } else if (screen_area < 1920 * 1080) {
- // Format >= 720P and < 1080P
- return BLOCK_8X8;
- } else {
- // Formats 1080P and up
- return BLOCK_16X16;
- }
-}
-
static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
SPEED_FEATURES *sf, int speed) {
const int boosted = frame_is_boosted(cpi);
@@ -139,7 +148,6 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
sf->disable_filter_search_var_thresh = 100;
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
- sf->rd_auto_partition_min_limit = set_partition_min_limit(cpi);
sf->allow_partition_search_skip = 1;
}
@@ -260,8 +268,12 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
FLAG_SKIP_INTRA_LOWVAR;
sf->adaptive_pred_interp_filter = 2;
- // Reference masking is not supported in dynamic scaling mode.
- sf->reference_masking = cpi->oxcf.resize_mode != RESIZE_DYNAMIC ? 1 : 0;
+ // Disable reference masking if using spatial scaling since
+ // pred_mv_sad will not be set (since vp9_mv_pred will not
+ // be called).
+ // TODO(marpan/agrange): Fix this condition.
+ sf->reference_masking = (cpi->oxcf.resize_mode != RESIZE_DYNAMIC &&
+ cpi->svc.number_spatial_layers == 1) ? 1 : 0;
sf->disable_filter_search_var_thresh = 50;
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
@@ -337,6 +349,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
sf->coeff_prob_appx_step = 4;
sf->use_fast_coef_updates = is_keyframe ? TWO_LOOP : ONE_LOOP_REDUCED;
sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH;
+ sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8;
if (!is_keyframe) {
int i;
@@ -360,7 +373,6 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
// Turn on this to use non-RD key frame coding mode.
sf->use_nonrd_pick_mode = 1;
sf->mv.search_method = NSTEP;
- sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8;
sf->mv.reduce_first_step_size = 1;
sf->skip_encode_sb = 0;
}
@@ -379,7 +391,6 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) {
SPEED_FEATURES *const sf = &cpi->sf;
- VP9_COMMON *const cm = &cpi->common;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
RD_OPT *const rd = &cpi->rd;
int i;
@@ -387,7 +398,7 @@ void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) {
if (oxcf->mode == REALTIME) {
set_rt_speed_feature_framesize_dependent(cpi, sf, oxcf->speed);
} else if (oxcf->mode == GOOD) {
- set_good_speed_feature_framesize_dependent(cm, sf, oxcf->speed);
+ set_good_speed_feature_framesize_dependent(cpi, sf, oxcf->speed);
}
if (sf->disable_split_mask == DISABLE_ALL_SPLIT) {
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ssim.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ssim.c
index 88db5dda06d..172de5d1daa 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ssim.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ssim.c
@@ -10,6 +10,7 @@
#include <math.h>
#include "./vp9_rtcd.h"
+#include "vpx_ports/mem.h"
#include "vp9/encoder/vp9_ssim.h"
void vp9_ssim_parms_16x16_c(uint8_t *s, int sp, uint8_t *r,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_subexp.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_subexp.c
index cfdc90d15fb..b345b162cdb 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_subexp.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_subexp.c
@@ -12,6 +12,7 @@
#include "vp9/common/vp9_entropy.h"
#include "vp9/encoder/vp9_cost.h"
+#include "vp9/encoder/vp9_subexp.h"
#include "vp9/encoder/vp9_writer.h"
#define vp9_cost_upd256 ((int)(vp9_cost_one(upd) - vp9_cost_zero(upd)))
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_subexp.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_subexp.h
index ac54893cf45..6fbb747e7d3 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_subexp.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_subexp.h
@@ -16,11 +16,15 @@
extern "C" {
#endif
-void vp9_write_prob_diff_update(vp9_writer *w,
+#include "vp9/common/vp9_prob.h"
+
+struct vp9_writer;
+
+void vp9_write_prob_diff_update(struct vp9_writer *w,
vp9_prob newp, vp9_prob oldp);
-void vp9_cond_prob_diff_update(vp9_writer *w, vp9_prob *oldp,
- unsigned int *ct);
+void vp9_cond_prob_diff_update(struct vp9_writer *w, vp9_prob *oldp,
+ const unsigned int ct[2]);
int vp9_prob_diff_update_savings_search(const unsigned int *ct,
vp9_prob oldp, vp9_prob *bestp,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.c
index b3491a27a4a..1b35ac9b61e 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.c
@@ -15,89 +15,85 @@
#include "vp9/encoder/vp9_extend.h"
#define SMALL_FRAME_FB_IDX 7
+#define SMALL_FRAME_WIDTH 16
+#define SMALL_FRAME_HEIGHT 16
void vp9_init_layer_context(VP9_COMP *const cpi) {
SVC *const svc = &cpi->svc;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
- int layer;
- int layer_end;
+ int sl, tl;
int alt_ref_idx = svc->number_spatial_layers;
svc->spatial_layer_id = 0;
svc->temporal_layer_id = 0;
- if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) {
- layer_end = svc->number_temporal_layers;
- } else {
- layer_end = svc->number_spatial_layers;
-
- if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) {
- if (vp9_realloc_frame_buffer(&cpi->svc.empty_frame.img,
- cpi->common.width, cpi->common.height,
- cpi->common.subsampling_x,
- cpi->common.subsampling_y,
+ if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) {
+ if (vp9_realloc_frame_buffer(&cpi->svc.empty_frame.img,
+ SMALL_FRAME_WIDTH, SMALL_FRAME_HEIGHT,
+ cpi->common.subsampling_x,
+ cpi->common.subsampling_y,
#if CONFIG_VP9_HIGHBITDEPTH
cpi->common.use_highbitdepth,
#endif
VP9_ENC_BORDER_IN_PIXELS,
cpi->common.byte_alignment,
NULL, NULL, NULL))
- vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
- "Failed to allocate empty frame for multiple frame "
- "contexts");
-
- memset(cpi->svc.empty_frame.img.buffer_alloc, 0x80,
- cpi->svc.empty_frame.img.buffer_alloc_sz);
- cpi->svc.empty_frame_width = cpi->common.width;
- cpi->svc.empty_frame_height = cpi->common.height;
- }
+ vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate empty frame for multiple frame "
+ "contexts");
+
+ memset(cpi->svc.empty_frame.img.buffer_alloc, 0x80,
+ cpi->svc.empty_frame.img.buffer_alloc_sz);
}
- for (layer = 0; layer < layer_end; ++layer) {
- LAYER_CONTEXT *const lc = &svc->layer_context[layer];
- RATE_CONTROL *const lrc = &lc->rc;
- int i;
- lc->current_video_frame_in_layer = 0;
- lc->layer_size = 0;
- lc->frames_from_key_frame = 0;
- lc->last_frame_type = FRAME_TYPES;
- lrc->ni_av_qi = oxcf->worst_allowed_q;
- lrc->total_actual_bits = 0;
- lrc->total_target_vs_actual = 0;
- lrc->ni_tot_qi = 0;
- lrc->tot_q = 0.0;
- lrc->avg_q = 0.0;
- lrc->ni_frames = 0;
- lrc->decimation_count = 0;
- lrc->decimation_factor = 0;
-
- for (i = 0; i < RATE_FACTOR_LEVELS; ++i) {
- lrc->rate_correction_factors[i] = 1.0;
- }
+ for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
+ for (tl = 0; tl < oxcf->ts_number_layers; ++tl) {
+ int layer = LAYER_IDS_TO_IDX(sl, tl, oxcf->ts_number_layers);
+ LAYER_CONTEXT *const lc = &svc->layer_context[layer];
+ RATE_CONTROL *const lrc = &lc->rc;
+ int i;
+ lc->current_video_frame_in_layer = 0;
+ lc->layer_size = 0;
+ lc->frames_from_key_frame = 0;
+ lc->last_frame_type = FRAME_TYPES;
+ lrc->ni_av_qi = oxcf->worst_allowed_q;
+ lrc->total_actual_bits = 0;
+ lrc->total_target_vs_actual = 0;
+ lrc->ni_tot_qi = 0;
+ lrc->tot_q = 0.0;
+ lrc->avg_q = 0.0;
+ lrc->ni_frames = 0;
+ lrc->decimation_count = 0;
+ lrc->decimation_factor = 0;
+
+ for (i = 0; i < RATE_FACTOR_LEVELS; ++i) {
+ lrc->rate_correction_factors[i] = 1.0;
+ }
- if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) {
- lc->target_bandwidth = oxcf->ts_target_bitrate[layer];
- lrc->last_q[INTER_FRAME] = oxcf->worst_allowed_q;
- lrc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q;
- lrc->avg_frame_qindex[KEY_FRAME] = oxcf->worst_allowed_q;
- } else {
- lc->target_bandwidth = oxcf->ss_target_bitrate[layer];
- lrc->last_q[KEY_FRAME] = oxcf->best_allowed_q;
- lrc->last_q[INTER_FRAME] = oxcf->best_allowed_q;
- lrc->avg_frame_qindex[KEY_FRAME] = (oxcf->worst_allowed_q +
- oxcf->best_allowed_q) / 2;
- lrc->avg_frame_qindex[INTER_FRAME] = (oxcf->worst_allowed_q +
+ if (cpi->oxcf.rc_mode == VPX_CBR) {
+ lc->target_bandwidth = oxcf->layer_target_bitrate[layer];
+ lrc->last_q[INTER_FRAME] = oxcf->worst_allowed_q;
+ lrc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q;
+ lrc->avg_frame_qindex[KEY_FRAME] = oxcf->worst_allowed_q;
+ } else {
+ lc->target_bandwidth = oxcf->layer_target_bitrate[layer];
+ lrc->last_q[KEY_FRAME] = oxcf->best_allowed_q;
+ lrc->last_q[INTER_FRAME] = oxcf->best_allowed_q;
+ lrc->avg_frame_qindex[KEY_FRAME] = (oxcf->worst_allowed_q +
oxcf->best_allowed_q) / 2;
- if (oxcf->ss_enable_auto_arf[layer])
- lc->alt_ref_idx = alt_ref_idx++;
- else
- lc->alt_ref_idx = INVALID_IDX;
- lc->gold_ref_idx = INVALID_IDX;
- }
+ lrc->avg_frame_qindex[INTER_FRAME] = (oxcf->worst_allowed_q +
+ oxcf->best_allowed_q) / 2;
+ if (oxcf->ss_enable_auto_arf[sl])
+ lc->alt_ref_idx = alt_ref_idx++;
+ else
+ lc->alt_ref_idx = INVALID_IDX;
+ lc->gold_ref_idx = INVALID_IDX;
+ }
- lrc->buffer_level = oxcf->starting_buffer_level_ms *
- lc->target_bandwidth / 1000;
- lrc->bits_off_target = lrc->buffer_level;
+ lrc->buffer_level = oxcf->starting_buffer_level_ms *
+ lc->target_bandwidth / 1000;
+ lrc->bits_off_target = lrc->buffer_level;
+ }
}
// Still have extra buffer for base layer golden frame
@@ -112,53 +108,98 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi,
SVC *const svc = &cpi->svc;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
const RATE_CONTROL *const rc = &cpi->rc;
- int layer;
- int layer_end;
+ int sl, tl, layer = 0, spatial_layer_target;
float bitrate_alloc = 1.0;
- if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) {
- layer_end = svc->number_temporal_layers;
- } else {
- layer_end = svc->number_spatial_layers;
- }
+ if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) {
+ for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
+ spatial_layer_target = 0;
- for (layer = 0; layer < layer_end; ++layer) {
- LAYER_CONTEXT *const lc = &svc->layer_context[layer];
- RATE_CONTROL *const lrc = &lc->rc;
+ for (tl = 0; tl < oxcf->ts_number_layers; ++tl) {
+ layer = LAYER_IDS_TO_IDX(sl, tl, oxcf->ts_number_layers);
+ svc->layer_context[layer].target_bandwidth =
+ oxcf->layer_target_bitrate[layer];
+ }
- if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) {
- lc->target_bandwidth = oxcf->ts_target_bitrate[layer];
- } else {
- lc->target_bandwidth = oxcf->ss_target_bitrate[layer];
+ layer = LAYER_IDS_TO_IDX(sl, ((oxcf->ts_number_layers - 1) < 0 ?
+ 0 : (oxcf->ts_number_layers - 1)), oxcf->ts_number_layers);
+ spatial_layer_target =
+ svc->layer_context[layer].target_bandwidth =
+ oxcf->layer_target_bitrate[layer];
+
+ for (tl = 0; tl < oxcf->ts_number_layers; ++tl) {
+ LAYER_CONTEXT *const lc =
+ &svc->layer_context[sl * oxcf->ts_number_layers + tl];
+ RATE_CONTROL *const lrc = &lc->rc;
+
+ lc->spatial_layer_target_bandwidth = spatial_layer_target;
+ bitrate_alloc = (float)lc->target_bandwidth / spatial_layer_target;
+ lrc->starting_buffer_level =
+ (int64_t)(rc->starting_buffer_level * bitrate_alloc);
+ lrc->optimal_buffer_level =
+ (int64_t)(rc->optimal_buffer_level * bitrate_alloc);
+ lrc->maximum_buffer_size =
+ (int64_t)(rc->maximum_buffer_size * bitrate_alloc);
+ lrc->bits_off_target =
+ MIN(lrc->bits_off_target, lrc->maximum_buffer_size);
+ lrc->buffer_level = MIN(lrc->buffer_level, lrc->maximum_buffer_size);
+ lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[tl];
+ lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
+ lrc->max_frame_bandwidth = rc->max_frame_bandwidth;
+ lrc->worst_quality = rc->worst_quality;
+ lrc->best_quality = rc->best_quality;
+ }
}
- bitrate_alloc = (float)lc->target_bandwidth / target_bandwidth;
- // Update buffer-related quantities.
- lrc->starting_buffer_level =
- (int64_t)(rc->starting_buffer_level * bitrate_alloc);
- lrc->optimal_buffer_level =
- (int64_t)(rc->optimal_buffer_level * bitrate_alloc);
- lrc->maximum_buffer_size =
- (int64_t)(rc->maximum_buffer_size * bitrate_alloc);
- lrc->bits_off_target = MIN(lrc->bits_off_target, lrc->maximum_buffer_size);
- lrc->buffer_level = MIN(lrc->buffer_level, lrc->maximum_buffer_size);
- // Update framerate-related quantities.
+ } else {
+ int layer_end;
+
if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) {
- lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[layer];
+ layer_end = svc->number_temporal_layers;
} else {
- lc->framerate = cpi->framerate;
+ layer_end = svc->number_spatial_layers;
+ }
+
+ for (layer = 0; layer < layer_end; ++layer) {
+ LAYER_CONTEXT *const lc = &svc->layer_context[layer];
+ RATE_CONTROL *const lrc = &lc->rc;
+
+ lc->target_bandwidth = oxcf->layer_target_bitrate[layer];
+
+ bitrate_alloc = (float)lc->target_bandwidth / target_bandwidth;
+ // Update buffer-related quantities.
+ lrc->starting_buffer_level =
+ (int64_t)(rc->starting_buffer_level * bitrate_alloc);
+ lrc->optimal_buffer_level =
+ (int64_t)(rc->optimal_buffer_level * bitrate_alloc);
+ lrc->maximum_buffer_size =
+ (int64_t)(rc->maximum_buffer_size * bitrate_alloc);
+ lrc->bits_off_target = MIN(lrc->bits_off_target,
+ lrc->maximum_buffer_size);
+ lrc->buffer_level = MIN(lrc->buffer_level, lrc->maximum_buffer_size);
+ // Update framerate-related quantities.
+ if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) {
+ lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[layer];
+ } else {
+ lc->framerate = cpi->framerate;
+ }
+ lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
+ lrc->max_frame_bandwidth = rc->max_frame_bandwidth;
+ // Update qp-related quantities.
+ lrc->worst_quality = rc->worst_quality;
+ lrc->best_quality = rc->best_quality;
}
- lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
- lrc->max_frame_bandwidth = rc->max_frame_bandwidth;
- // Update qp-related quantities.
- lrc->worst_quality = rc->worst_quality;
- lrc->best_quality = rc->best_quality;
}
}
static LAYER_CONTEXT *get_layer_context(VP9_COMP *const cpi) {
- return (cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ?
- &cpi->svc.layer_context[cpi->svc.temporal_layer_id] :
- &cpi->svc.layer_context[cpi->svc.spatial_layer_id];
+ if (is_one_pass_cbr_svc(cpi))
+ return &cpi->svc.layer_context[cpi->svc.spatial_layer_id *
+ cpi->svc.number_temporal_layers + cpi->svc.temporal_layer_id];
+ else
+ return (cpi->svc.number_temporal_layers > 1 &&
+ cpi->oxcf.rc_mode == VPX_CBR) ?
+ &cpi->svc.layer_context[cpi->svc.temporal_layer_id] :
+ &cpi->svc.layer_context[cpi->svc.spatial_layer_id];
}
void vp9_update_temporal_layer_framerate(VP9_COMP *const cpi) {
@@ -166,18 +207,22 @@ void vp9_update_temporal_layer_framerate(VP9_COMP *const cpi) {
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
LAYER_CONTEXT *const lc = get_layer_context(cpi);
RATE_CONTROL *const lrc = &lc->rc;
- const int layer = svc->temporal_layer_id;
+ // Index into spatial+temporal arrays.
+ const int st_idx = svc->spatial_layer_id * svc->number_temporal_layers +
+ svc->temporal_layer_id;
+ const int tl = svc->temporal_layer_id;
- lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[layer];
+ lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[tl];
lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
lrc->max_frame_bandwidth = cpi->rc.max_frame_bandwidth;
// Update the average layer frame size (non-cumulative per-frame-bw).
- if (layer == 0) {
+ if (tl == 0) {
lc->avg_frame_size = lrc->avg_frame_bandwidth;
} else {
const double prev_layer_framerate =
- cpi->framerate / oxcf->ts_rate_decimator[layer - 1];
- const int prev_layer_target_bandwidth = oxcf->ts_target_bitrate[layer - 1];
+ cpi->framerate / oxcf->ts_rate_decimator[tl - 1];
+ const int prev_layer_target_bandwidth =
+ oxcf->layer_target_bitrate[st_idx - 1];
lc->avg_frame_size =
(int)((lc->target_bandwidth - prev_layer_target_bandwidth) /
(lc->framerate - prev_layer_framerate));
@@ -243,9 +288,8 @@ void vp9_init_second_pass_spatial_svc(VP9_COMP *cpi) {
void vp9_inc_frame_in_layer(VP9_COMP *const cpi) {
LAYER_CONTEXT *const lc =
- (cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ?
- &cpi->svc.layer_context[cpi->svc.temporal_layer_id] :
- &cpi->svc.layer_context[cpi->svc.spatial_layer_id];
+ &cpi->svc.layer_context[cpi->svc.spatial_layer_id *
+ cpi->svc.number_temporal_layers];
++lc->current_video_frame_in_layer;
++lc->frames_from_key_frame;
}
@@ -253,10 +297,11 @@ void vp9_inc_frame_in_layer(VP9_COMP *const cpi) {
int vp9_is_upper_layer_key_frame(const VP9_COMP *const cpi) {
return is_two_pass_svc(cpi) &&
cpi->svc.spatial_layer_id > 0 &&
- cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame;
+ cpi->svc.layer_context[cpi->svc.spatial_layer_id *
+ cpi->svc.number_temporal_layers +
+ cpi->svc.temporal_layer_id].is_key_frame;
}
-#if CONFIG_SPATIAL_SVC
static void get_layer_resolution(const int width_org, const int height_org,
const int num, const int den,
int *width_out, int *height_out) {
@@ -276,6 +321,201 @@ static void get_layer_resolution(const int width_org, const int height_org,
*height_out = h;
}
+// The function sets proper ref_frame_flags, buffer indices, and buffer update
+// variables for temporal layering mode 3 - that does 0-2-1-2 temporal layering
+// scheme.
+static void set_flags_and_fb_idx_for_temporal_mode3(VP9_COMP *const cpi) {
+ int frame_num_within_temporal_struct = 0;
+ int spatial_id, temporal_id;
+ spatial_id = cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode;
+ frame_num_within_temporal_struct =
+ cpi->svc.layer_context[cpi->svc.spatial_layer_id *
+ cpi->svc.number_temporal_layers].current_video_frame_in_layer % 4;
+ temporal_id = cpi->svc.temporal_layer_id =
+ (frame_num_within_temporal_struct & 1) ? 2 :
+ (frame_num_within_temporal_struct >> 1);
+ cpi->ext_refresh_last_frame = cpi->ext_refresh_golden_frame =
+ cpi->ext_refresh_alt_ref_frame = 0;
+ if (!temporal_id) {
+ cpi->ext_refresh_frame_flags_pending = 1;
+ cpi->ext_refresh_last_frame = 1;
+ if (!spatial_id) {
+ cpi->ref_frame_flags = VP9_LAST_FLAG;
+ } else if (cpi->svc.layer_context[temporal_id].is_key_frame) {
+ // base layer is a key frame.
+ cpi->ref_frame_flags = VP9_GOLD_FLAG;
+ } else {
+ cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+ }
+ } else if (temporal_id == 1) {
+ cpi->ext_refresh_frame_flags_pending = 1;
+ cpi->ext_refresh_alt_ref_frame = 1;
+ if (!spatial_id) {
+ cpi->ref_frame_flags = VP9_LAST_FLAG;
+ } else {
+ cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+ }
+ } else {
+ if (frame_num_within_temporal_struct == 1) {
+ // the first tl2 picture
+ if (!spatial_id) {
+ cpi->ext_refresh_frame_flags_pending = 1;
+ cpi->ext_refresh_alt_ref_frame = 1;
+ cpi->ref_frame_flags = VP9_LAST_FLAG;
+ } else if (spatial_id < cpi->svc.number_spatial_layers - 1) {
+ cpi->ext_refresh_frame_flags_pending = 1;
+ cpi->ext_refresh_alt_ref_frame = 1;
+ cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+ } else { // Top layer
+ cpi->ext_refresh_frame_flags_pending = 0;
+ cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+ }
+ } else {
+ // The second tl2 picture
+ if (!spatial_id) {
+ cpi->ext_refresh_frame_flags_pending = 1;
+ cpi->ref_frame_flags = VP9_LAST_FLAG;
+ cpi->ext_refresh_last_frame = 1;
+ } else if (spatial_id < cpi->svc.number_spatial_layers - 1) {
+ cpi->ext_refresh_frame_flags_pending = 1;
+ cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+ cpi->ext_refresh_last_frame = 1;
+ } else { // top layer
+ cpi->ext_refresh_frame_flags_pending = 0;
+ cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+ }
+ }
+ }
+ if (temporal_id == 0) {
+ cpi->lst_fb_idx = spatial_id;
+ if (spatial_id)
+ cpi->gld_fb_idx = spatial_id - 1;
+ else
+ cpi->gld_fb_idx = 0;
+ cpi->alt_fb_idx = 0;
+ } else if (temporal_id == 1) {
+ cpi->lst_fb_idx = spatial_id;
+ cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1;
+ cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id;
+ } else if (frame_num_within_temporal_struct == 1) {
+ cpi->lst_fb_idx = spatial_id;
+ cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1;
+ cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id;
+ } else {
+ cpi->lst_fb_idx = cpi->svc.number_spatial_layers + spatial_id;
+ cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1;
+ cpi->alt_fb_idx = 0;
+ }
+}
+
+// The function sets proper ref_frame_flags, buffer indices, and buffer update
+// variables for temporal layering mode 2 - that does 0-1-0-1 temporal layering
+// scheme.
+static void set_flags_and_fb_idx_for_temporal_mode2(VP9_COMP *const cpi) {
+ int spatial_id, temporal_id;
+ spatial_id = cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode;
+ temporal_id = cpi->svc.temporal_layer_id =
+ cpi->svc.layer_context[cpi->svc.spatial_layer_id *
+ cpi->svc.number_temporal_layers].current_video_frame_in_layer & 1;
+ cpi->ext_refresh_last_frame = cpi->ext_refresh_golden_frame =
+ cpi->ext_refresh_alt_ref_frame = 0;
+ if (!temporal_id) {
+ cpi->ext_refresh_frame_flags_pending = 1;
+ cpi->ext_refresh_last_frame = 1;
+ if (!spatial_id) {
+ cpi->ref_frame_flags = VP9_LAST_FLAG;
+ } else if (cpi->svc.layer_context[temporal_id].is_key_frame) {
+ // base layer is a key frame.
+ cpi->ref_frame_flags = VP9_GOLD_FLAG;
+ } else {
+ cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+ }
+ } else if (temporal_id == 1) {
+ cpi->ext_refresh_frame_flags_pending = 1;
+ cpi->ext_refresh_alt_ref_frame = 1;
+ if (!spatial_id) {
+ cpi->ref_frame_flags = VP9_LAST_FLAG;
+ } else {
+ cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+ }
+ }
+
+ if (temporal_id == 0) {
+ cpi->lst_fb_idx = spatial_id;
+ if (spatial_id)
+ cpi->gld_fb_idx = spatial_id - 1;
+ else
+ cpi->gld_fb_idx = 0;
+ cpi->alt_fb_idx = 0;
+ } else if (temporal_id == 1) {
+ cpi->lst_fb_idx = spatial_id;
+ cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1;
+ cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id;
+ }
+}
+
+// The function sets proper ref_frame_flags, buffer indices, and buffer update
+// variables for temporal layering mode 0 - that has no temporal layering.
+static void set_flags_and_fb_idx_for_temporal_mode_noLayering(
+ VP9_COMP *const cpi) {
+ int spatial_id;
+ spatial_id = cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode;
+ cpi->ext_refresh_last_frame =
+ cpi->ext_refresh_golden_frame = cpi->ext_refresh_alt_ref_frame = 0;
+ cpi->ext_refresh_frame_flags_pending = 1;
+ cpi->ext_refresh_last_frame = 1;
+ if (!spatial_id) {
+ cpi->ref_frame_flags = VP9_LAST_FLAG;
+ } else if (cpi->svc.layer_context[0].is_key_frame) {
+ cpi->ref_frame_flags = VP9_GOLD_FLAG;
+ } else {
+ cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+ }
+ cpi->lst_fb_idx = spatial_id;
+ if (spatial_id)
+ cpi->gld_fb_idx = spatial_id - 1;
+ else
+ cpi->gld_fb_idx = 0;
+}
+
+int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
+ int width = 0, height = 0;
+ LAYER_CONTEXT *lc = NULL;
+
+ if (cpi->svc.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) {
+ set_flags_and_fb_idx_for_temporal_mode3(cpi);
+ } else if (cpi->svc.temporal_layering_mode ==
+ VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) {
+ set_flags_and_fb_idx_for_temporal_mode_noLayering(cpi);
+ } else if (cpi->svc.temporal_layering_mode ==
+ VP9E_TEMPORAL_LAYERING_MODE_0101) {
+ set_flags_and_fb_idx_for_temporal_mode2(cpi);
+ } else if (cpi->svc.temporal_layering_mode ==
+ VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
+ // VP9E_TEMPORAL_LAYERING_MODE_BYPASS :
+ // if the code goes here, it means the encoder will be relying on the
+ // flags from outside for layering.
+ // However, since when spatial+temporal layering is used, the buffer indices
+ // cannot be derived automatically, the bypass mode will only work when the
+ // number of spatial layers equals 1.
+ assert(cpi->svc.number_spatial_layers == 1);
+ }
+
+ lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id *
+ cpi->svc.number_temporal_layers +
+ cpi->svc.temporal_layer_id];
+
+ get_layer_resolution(cpi->oxcf.width, cpi->oxcf.height,
+ lc->scaling_factor_num, lc->scaling_factor_den,
+ &width, &height);
+
+ if (vp9_set_size_literal(cpi, width, height) != 0)
+ return VPX_CODEC_INVALID_PARAM;
+
+ return 0;
+}
+
+#if CONFIG_SPATIAL_SVC
int vp9_svc_start_frame(VP9_COMP *const cpi) {
int width = 0, height = 0;
LAYER_CONTEXT *lc;
@@ -362,20 +602,11 @@ int vp9_svc_start_frame(VP9_COMP *const cpi) {
cpi->lst_fb_idx =
cpi->gld_fb_idx = cpi->alt_fb_idx = SMALL_FRAME_FB_IDX;
- // Gradually make the empty frame smaller to save bits. Make it half of
- // its previous size because of the scaling factor restriction.
- cpi->svc.empty_frame_width >>= 1;
- cpi->svc.empty_frame_width = (cpi->svc.empty_frame_width + 1) & ~1;
- if (cpi->svc.empty_frame_width < 16)
- cpi->svc.empty_frame_width = 16;
+ if (cpi->svc.encode_intra_empty_frame != 0)
+ cpi->common.intra_only = 1;
- cpi->svc.empty_frame_height >>= 1;
- cpi->svc.empty_frame_height = (cpi->svc.empty_frame_height + 1) & ~1;
- if (cpi->svc.empty_frame_height < 16)
- cpi->svc.empty_frame_height = 16;
-
- width = cpi->svc.empty_frame_width;
- height = cpi->svc.empty_frame_height;
+ width = SMALL_FRAME_WIDTH;
+ height = SMALL_FRAME_HEIGHT;
}
}
}
@@ -395,11 +626,12 @@ int vp9_svc_start_frame(VP9_COMP *const cpi) {
return 0;
}
+#endif
+
struct lookahead_entry *vp9_svc_lookahead_pop(VP9_COMP *const cpi,
struct lookahead_ctx *ctx,
int drain) {
struct lookahead_entry *buf = NULL;
-
if (ctx->sz && (drain || ctx->sz == ctx->max_sz - MAX_PRE_FRAMES)) {
buf = vp9_lookahead_peek(ctx, 0);
if (buf != NULL) {
@@ -409,7 +641,5 @@ struct lookahead_entry *vp9_svc_lookahead_pop(VP9_COMP *const cpi,
}
}
}
-
return buf;
}
-#endif
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.h
index e9645ce9f24..b6a5ea54835 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.h
@@ -22,6 +22,7 @@ extern "C" {
typedef struct {
RATE_CONTROL rc;
int target_bandwidth;
+ int spatial_layer_target_bandwidth; // Target for the spatial layer.
double framerate;
int avg_frame_size;
int max_q;
@@ -57,17 +58,18 @@ typedef struct {
NEED_TO_ENCODE
}encode_empty_frame_state;
struct lookahead_entry empty_frame;
- int empty_frame_width;
- int empty_frame_height;
+ int encode_intra_empty_frame;
// Store scaled source frames to be used for temporal filter to generate
// a alt ref frame.
YV12_BUFFER_CONFIG scaled_frames[MAX_LAG_BUFFERS];
// Layer context used for rate control in one pass temporal CBR mode or
- // two pass spatial mode. Defined for temporal or spatial layers for now.
- // Does not support temporal combined with spatial RC.
- LAYER_CONTEXT layer_context[MAX(VPX_TS_MAX_LAYERS, VPX_SS_MAX_LAYERS)];
+ // two pass spatial mode.
+ LAYER_CONTEXT layer_context[VPX_MAX_LAYERS];
+ // Indicates what sort of temporal layering is used.
+ // Currently, this only works for CBR mode.
+ VP9E_TEMPORAL_LAYERING_MODE temporal_layering_mode;
} SVC;
struct VP9_COMP;
@@ -111,6 +113,8 @@ struct lookahead_entry *vp9_svc_lookahead_pop(struct VP9_COMP *const cpi,
// Start a frame and initialize svc parameters
int vp9_svc_start_frame(struct VP9_COMP *const cpi);
+int vp9_one_pass_cbr_svc_start_layer(struct VP9_COMP *const cpi);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.c
index d7979ab53a5..24b6203cb66 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.c
@@ -23,7 +23,9 @@
#include "vp9/encoder/vp9_quantize.h"
#include "vp9/encoder/vp9_ratectrl.h"
#include "vp9/encoder/vp9_segmentation.h"
+#include "vp9/encoder/vp9_temporal_filter.h"
#include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
#include "vpx_ports/vpx_timer.h"
#include "vpx_scale/vpx_scale.h"
@@ -109,7 +111,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
kernel, mv_precision_uv, x, y);
}
-void vp9_temporal_filter_init() {
+void vp9_temporal_filter_init(void) {
int i;
fixed_divide[0] = 0;
@@ -680,7 +682,7 @@ void vp9_temporal_filter(VP9_COMP *cpi, int distance) {
if (frames_to_blur > 0) {
// Setup scaling factors. Scaling on each of the arnr frames is not
// supported.
- if (is_two_pass_svc(cpi)) {
+ if (cpi->use_svc) {
// In spatial svc the scaling factors might be less then 1/2.
// So we will use non-normative scaling.
int frame_used = 0;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.h
index a971e0ae365..f537b8870a6 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_temporal_filter.h
@@ -15,7 +15,7 @@
extern "C" {
#endif
-void vp9_temporal_filter_init();
+void vp9_temporal_filter_init(void);
void vp9_temporal_filter(VP9_COMP *cpi, int distance);
#ifdef __cplusplus
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_tokenize.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_tokenize.c
index 862be4d384a..181a99ce880 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_tokenize.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_tokenize.c
@@ -17,6 +17,7 @@
#include "vp9/common/vp9_entropy.h"
#include "vp9/common/vp9_pred_common.h"
+#include "vp9/common/vp9_scan.h"
#include "vp9/common/vp9_seg_common.h"
#include "vp9/encoder/vp9_cost.h"
@@ -483,7 +484,7 @@ static INLINE void add_token_no_extra(TOKENEXTRA **t,
static INLINE int get_tx_eob(const struct segmentation *seg, int segment_id,
TX_SIZE tx_size) {
const int eob_max = 16 << (tx_size << 1);
- return vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) ? 0 : eob_max;
+ return segfeature_active(seg, segment_id, SEG_LVL_SKIP) ? 0 : eob_max;
}
static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize,
@@ -614,8 +615,8 @@ void vp9_tokenize_sb(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
const int ctx = vp9_get_skip_context(xd);
- const int skip_inc = !vp9_segfeature_active(&cm->seg, mbmi->segment_id,
- SEG_LVL_SKIP);
+ const int skip_inc = !segfeature_active(&cm->seg, mbmi->segment_id,
+ SEG_LVL_SKIP);
struct tokenize_b_args arg = {cpi, td, t};
if (mbmi->skip) {
if (!dry_run)
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_variance.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_variance.c
index f38f96d6c26..c571b7c9545 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_variance.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_variance.c
@@ -9,6 +9,7 @@
*/
#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
#include "vpx_ports/mem.h"
#include "vpx/vpx_integer.h"
@@ -18,25 +19,16 @@
#include "vp9/encoder/vp9_variance.h"
-void variance(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- int w, int h, unsigned int *sse, int *sum) {
- int i, j;
-
- *sum = 0;
- *sse = 0;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- const int diff = a[j] - b[j];
- *sum += diff;
- *sse += diff * diff;
- }
-
- a += a_stride;
- b += b_stride;
- }
-}
+static uint8_t bilinear_filters[8][2] = {
+ { 128, 0, },
+ { 112, 16, },
+ { 96, 32, },
+ { 80, 48, },
+ { 64, 64, },
+ { 48, 80, },
+ { 32, 96, },
+ { 16, 112, },
+};
// Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal
// or vertical direction to produce the filtered output block. Used to implement
@@ -52,7 +44,7 @@ static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
int pixel_step,
unsigned int output_height,
unsigned int output_width,
- const int16_t *vp9_filter) {
+ const uint8_t *vp9_filter) {
unsigned int i, j;
for (i = 0; i < output_height; i++) {
@@ -84,7 +76,7 @@ static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
unsigned int pixel_step,
unsigned int output_height,
unsigned int output_width,
- const int16_t *vp9_filter) {
+ const uint8_t *vp9_filter) {
unsigned int i, j;
for (i = 0; i < output_height; i++) {
@@ -100,25 +92,6 @@ static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
}
}
-unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) {
- unsigned int i, sum = 0;
-
- for (i = 0; i < 256; ++i) {
- sum += src_ptr[i] * src_ptr[i];
- }
-
- return sum;
-}
-
-#define VAR(W, H) \
-unsigned int vp9_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
- const uint8_t *b, int b_stride, \
- unsigned int *sse) { \
- int sum; \
- variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
- return *sse - (((int64_t)sum * sum) / (W * H)); \
-}
-
#define SUBPIX_VAR(W, H) \
unsigned int vp9_sub_pixel_variance##W##x##H##_c( \
const uint8_t *src, int src_stride, \
@@ -129,11 +102,11 @@ unsigned int vp9_sub_pixel_variance##W##x##H##_c( \
uint8_t temp2[H * W]; \
\
var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
- BILINEAR_FILTERS_2TAP(xoffset)); \
+ bilinear_filters[xoffset]); \
var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- BILINEAR_FILTERS_2TAP(yoffset)); \
+ bilinear_filters[yoffset]); \
\
- return vp9_variance##W##x##H##_c(temp2, W, dst, dst_stride, sse); \
+ return vpx_variance##W##x##H##_c(temp2, W, dst, dst_stride, sse); \
}
#define SUBPIX_AVG_VAR(W, H) \
@@ -148,182 +121,55 @@ unsigned int vp9_sub_pixel_avg_variance##W##x##H##_c( \
DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
\
var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
- BILINEAR_FILTERS_2TAP(xoffset)); \
+ bilinear_filters[xoffset]); \
var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- BILINEAR_FILTERS_2TAP(yoffset)); \
+ bilinear_filters[yoffset]); \
\
- vp9_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \
+ vpx_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \
\
- return vp9_variance##W##x##H##_c(temp3, W, dst, dst_stride, sse); \
-}
-
-void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride,
- const uint8_t *ref_ptr, int ref_stride,
- unsigned int *sse, int *sum) {
- variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum);
-}
-
-void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride,
- const uint8_t *ref_ptr, int ref_stride,
- unsigned int *sse, int *sum) {
- variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum);
-}
-
-unsigned int vp9_mse16x16_c(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse) {
- int sum;
- variance(src, src_stride, ref, ref_stride, 16, 16, sse, &sum);
- return *sse;
-}
-
-unsigned int vp9_mse16x8_c(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse) {
- int sum;
- variance(src, src_stride, ref, ref_stride, 16, 8, sse, &sum);
- return *sse;
-}
-
-unsigned int vp9_mse8x16_c(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse) {
- int sum;
- variance(src, src_stride, ref, ref_stride, 8, 16, sse, &sum);
- return *sse;
-}
-
-unsigned int vp9_mse8x8_c(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse) {
- int sum;
- variance(src, src_stride, ref, ref_stride, 8, 8, sse, &sum);
- return *sse;
+ return vpx_variance##W##x##H##_c(temp3, W, dst, dst_stride, sse); \
}
-VAR(4, 4)
SUBPIX_VAR(4, 4)
SUBPIX_AVG_VAR(4, 4)
-VAR(4, 8)
SUBPIX_VAR(4, 8)
SUBPIX_AVG_VAR(4, 8)
-VAR(8, 4)
SUBPIX_VAR(8, 4)
SUBPIX_AVG_VAR(8, 4)
-VAR(8, 8)
SUBPIX_VAR(8, 8)
SUBPIX_AVG_VAR(8, 8)
-VAR(8, 16)
SUBPIX_VAR(8, 16)
SUBPIX_AVG_VAR(8, 16)
-VAR(16, 8)
SUBPIX_VAR(16, 8)
SUBPIX_AVG_VAR(16, 8)
-VAR(16, 16)
SUBPIX_VAR(16, 16)
SUBPIX_AVG_VAR(16, 16)
-VAR(16, 32)
SUBPIX_VAR(16, 32)
SUBPIX_AVG_VAR(16, 32)
-VAR(32, 16)
SUBPIX_VAR(32, 16)
SUBPIX_AVG_VAR(32, 16)
-VAR(32, 32)
SUBPIX_VAR(32, 32)
SUBPIX_AVG_VAR(32, 32)
-VAR(32, 64)
SUBPIX_VAR(32, 64)
SUBPIX_AVG_VAR(32, 64)
-VAR(64, 32)
SUBPIX_VAR(64, 32)
SUBPIX_AVG_VAR(64, 32)
-VAR(64, 64)
SUBPIX_VAR(64, 64)
SUBPIX_AVG_VAR(64, 64)
-void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
- int height, const uint8_t *ref, int ref_stride) {
- int i, j;
-
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- const int tmp = pred[j] + ref[j];
- comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
- }
- comp_pred += width;
- pred += width;
- ref += ref_stride;
- }
-}
-
#if CONFIG_VP9_HIGHBITDEPTH
-void highbd_variance64(const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride,
- int w, int h, uint64_t *sse,
- uint64_t *sum) {
- int i, j;
-
- uint16_t *a = CONVERT_TO_SHORTPTR(a8);
- uint16_t *b = CONVERT_TO_SHORTPTR(b8);
- *sum = 0;
- *sse = 0;
-
- for (i = 0; i < h; i++) {
- for (j = 0; j < w; j++) {
- const int diff = a[j] - b[j];
- *sum += diff;
- *sse += diff * diff;
- }
- a += a_stride;
- b += b_stride;
- }
-}
-
-void highbd_variance(const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride,
- int w, int h, unsigned int *sse,
- int *sum) {
- uint64_t sse_long = 0;
- uint64_t sum_long = 0;
- highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
- *sse = (unsigned int)sse_long;
- *sum = (int)sum_long;
-}
-
-void highbd_10_variance(const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride,
- int w, int h, unsigned int *sse,
- int *sum) {
- uint64_t sse_long = 0;
- uint64_t sum_long = 0;
- highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
- *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2);
- *sse = (unsigned int)ROUND_POWER_OF_TWO(sse_long, 4);
-}
-
-void highbd_12_variance(const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride,
- int w, int h, unsigned int *sse,
- int *sum) {
- uint64_t sse_long = 0;
- uint64_t sum_long = 0;
- highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
- *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4);
- *sse = (unsigned int)ROUND_POWER_OF_TWO(sse_long, 8);
-}
-
static void highbd_var_filter_block2d_bil_first_pass(
const uint8_t *src_ptr8,
uint16_t *output_ptr,
@@ -331,7 +177,7 @@ static void highbd_var_filter_block2d_bil_first_pass(
int pixel_step,
unsigned int output_height,
unsigned int output_width,
- const int16_t *vp9_filter) {
+ const uint8_t *vp9_filter) {
unsigned int i, j;
uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
for (i = 0; i < output_height; i++) {
@@ -357,7 +203,7 @@ static void highbd_var_filter_block2d_bil_second_pass(
unsigned int pixel_step,
unsigned int output_height,
unsigned int output_width,
- const int16_t *vp9_filter) {
+ const uint8_t *vp9_filter) {
unsigned int i, j;
for (i = 0; i < output_height; i++) {
@@ -374,35 +220,6 @@ static void highbd_var_filter_block2d_bil_second_pass(
}
}
-#define HIGHBD_VAR(W, H) \
-unsigned int vp9_highbd_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
- const uint8_t *b, int b_stride, \
- unsigned int *sse) { \
- int sum; \
- highbd_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
- return *sse - (((int64_t)sum * sum) / (W * H)); \
-} \
-\
-unsigned int vp9_highbd_10_variance##W##x##H##_c(const uint8_t *a, \
- int a_stride, \
- const uint8_t *b, \
- int b_stride, \
- unsigned int *sse) { \
- int sum; \
- highbd_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
- return *sse - (((int64_t)sum * sum) / (W * H)); \
-} \
-\
-unsigned int vp9_highbd_12_variance##W##x##H##_c(const uint8_t *a, \
- int a_stride, \
- const uint8_t *b, \
- int b_stride, \
- unsigned int *sse) { \
- int sum; \
- highbd_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
- return *sse - (((int64_t)sum * sum) / (W * H)); \
-}
-
#define HIGHBD_SUBPIX_VAR(W, H) \
unsigned int vp9_highbd_sub_pixel_variance##W##x##H##_c( \
const uint8_t *src, int src_stride, \
@@ -413,11 +230,11 @@ unsigned int vp9_highbd_sub_pixel_variance##W##x##H##_c( \
uint16_t temp2[H * W]; \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, BILINEAR_FILTERS_2TAP(xoffset)); \
+ W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- BILINEAR_FILTERS_2TAP(yoffset)); \
+ bilinear_filters[yoffset]); \
\
- return vp9_highbd_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \
+ return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \
dst_stride, sse); \
} \
\
@@ -430,11 +247,11 @@ unsigned int vp9_highbd_10_sub_pixel_variance##W##x##H##_c( \
uint16_t temp2[H * W]; \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, BILINEAR_FILTERS_2TAP(xoffset)); \
+ W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- BILINEAR_FILTERS_2TAP(yoffset)); \
+ bilinear_filters[yoffset]); \
\
- return vp9_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
+ return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
W, dst, dst_stride, sse); \
} \
\
@@ -447,11 +264,11 @@ unsigned int vp9_highbd_12_sub_pixel_variance##W##x##H##_c( \
uint16_t temp2[H * W]; \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, BILINEAR_FILTERS_2TAP(xoffset)); \
+ W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- BILINEAR_FILTERS_2TAP(yoffset)); \
+ bilinear_filters[yoffset]); \
\
- return vp9_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
+ return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
W, dst, dst_stride, sse); \
}
@@ -467,14 +284,14 @@ unsigned int vp9_highbd_sub_pixel_avg_variance##W##x##H##_c( \
DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, BILINEAR_FILTERS_2TAP(xoffset)); \
+ W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- BILINEAR_FILTERS_2TAP(yoffset)); \
+ bilinear_filters[yoffset]); \
\
- vp9_highbd_comp_avg_pred(temp3, second_pred, W, H, \
+ vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
CONVERT_TO_BYTEPTR(temp2), W); \
\
- return vp9_highbd_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \
+ return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \
dst_stride, sse); \
} \
\
@@ -489,14 +306,14 @@ unsigned int vp9_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \
DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, BILINEAR_FILTERS_2TAP(xoffset)); \
+ W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- BILINEAR_FILTERS_2TAP(yoffset)); \
+ bilinear_filters[yoffset]); \
\
- vp9_highbd_comp_avg_pred(temp3, second_pred, W, H, \
+ vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
CONVERT_TO_BYTEPTR(temp2), W); \
\
- return vp9_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \
+ return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \
W, dst, dst_stride, sse); \
} \
\
@@ -511,141 +328,53 @@ unsigned int vp9_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \
DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
\
highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
- W, BILINEAR_FILTERS_2TAP(xoffset)); \
+ W, bilinear_filters[xoffset]); \
highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
- BILINEAR_FILTERS_2TAP(yoffset)); \
+ bilinear_filters[yoffset]); \
\
- vp9_highbd_comp_avg_pred(temp3, second_pred, W, H, \
+ vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
CONVERT_TO_BYTEPTR(temp2), W); \
\
- return vp9_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \
+ return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \
W, dst, dst_stride, sse); \
}
-#define HIGHBD_GET_VAR(S) \
-void vp9_highbd_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
- const uint8_t *ref, int ref_stride, \
- unsigned int *sse, int *sum) { \
- highbd_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
-} \
-\
-void vp9_highbd_10_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
- const uint8_t *ref, int ref_stride, \
- unsigned int *sse, int *sum) { \
- highbd_10_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
-} \
-\
-void vp9_highbd_12_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
- const uint8_t *ref, int ref_stride, \
- unsigned int *sse, int *sum) { \
- highbd_12_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
-}
-
-#define HIGHBD_MSE(W, H) \
-unsigned int vp9_highbd_mse##W##x##H##_c(const uint8_t *src, \
- int src_stride, \
- const uint8_t *ref, \
- int ref_stride, \
- unsigned int *sse) { \
- int sum; \
- highbd_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
- return *sse; \
-} \
-\
-unsigned int vp9_highbd_10_mse##W##x##H##_c(const uint8_t *src, \
- int src_stride, \
- const uint8_t *ref, \
- int ref_stride, \
- unsigned int *sse) { \
- int sum; \
- highbd_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
- return *sse; \
-} \
-\
-unsigned int vp9_highbd_12_mse##W##x##H##_c(const uint8_t *src, \
- int src_stride, \
- const uint8_t *ref, \
- int ref_stride, \
- unsigned int *sse) { \
- int sum; \
- highbd_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
- return *sse; \
-}
-
-HIGHBD_GET_VAR(8)
-HIGHBD_GET_VAR(16)
-
-HIGHBD_MSE(16, 16)
-HIGHBD_MSE(16, 8)
-HIGHBD_MSE(8, 16)
-HIGHBD_MSE(8, 8)
-
-HIGHBD_VAR(4, 4)
HIGHBD_SUBPIX_VAR(4, 4)
HIGHBD_SUBPIX_AVG_VAR(4, 4)
-HIGHBD_VAR(4, 8)
HIGHBD_SUBPIX_VAR(4, 8)
HIGHBD_SUBPIX_AVG_VAR(4, 8)
-HIGHBD_VAR(8, 4)
HIGHBD_SUBPIX_VAR(8, 4)
HIGHBD_SUBPIX_AVG_VAR(8, 4)
-HIGHBD_VAR(8, 8)
HIGHBD_SUBPIX_VAR(8, 8)
HIGHBD_SUBPIX_AVG_VAR(8, 8)
-HIGHBD_VAR(8, 16)
HIGHBD_SUBPIX_VAR(8, 16)
HIGHBD_SUBPIX_AVG_VAR(8, 16)
-HIGHBD_VAR(16, 8)
HIGHBD_SUBPIX_VAR(16, 8)
HIGHBD_SUBPIX_AVG_VAR(16, 8)
-HIGHBD_VAR(16, 16)
HIGHBD_SUBPIX_VAR(16, 16)
HIGHBD_SUBPIX_AVG_VAR(16, 16)
-HIGHBD_VAR(16, 32)
HIGHBD_SUBPIX_VAR(16, 32)
HIGHBD_SUBPIX_AVG_VAR(16, 32)
-HIGHBD_VAR(32, 16)
HIGHBD_SUBPIX_VAR(32, 16)
HIGHBD_SUBPIX_AVG_VAR(32, 16)
-HIGHBD_VAR(32, 32)
HIGHBD_SUBPIX_VAR(32, 32)
HIGHBD_SUBPIX_AVG_VAR(32, 32)
-HIGHBD_VAR(32, 64)
HIGHBD_SUBPIX_VAR(32, 64)
HIGHBD_SUBPIX_AVG_VAR(32, 64)
-HIGHBD_VAR(64, 32)
HIGHBD_SUBPIX_VAR(64, 32)
HIGHBD_SUBPIX_AVG_VAR(64, 32)
-HIGHBD_VAR(64, 64)
HIGHBD_SUBPIX_VAR(64, 64)
HIGHBD_SUBPIX_AVG_VAR(64, 64)
-
-void vp9_highbd_comp_avg_pred(uint16_t *comp_pred, const uint8_t *pred8,
- int width, int height, const uint8_t *ref8,
- int ref_stride) {
- int i, j;
- uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- const int tmp = pred[j] + ref[j];
- comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
- }
- comp_pred += width;
- pred += width;
- ref += ref_stride;
- }
-}
#endif // CONFIG_VP9_HIGHBITDEPTH
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_variance.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_variance.h
index 53148f23c56..0a8739510f4 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_variance.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_variance.h
@@ -12,33 +12,12 @@
#define VP9_ENCODER_VP9_VARIANCE_H_
#include "vpx/vpx_integer.h"
+#include "vpx_ports/mem.h"
#ifdef __cplusplus
extern "C" {
#endif
-void variance(const uint8_t *a, int a_stride,
- const uint8_t *b, int b_stride,
- int w, int h,
- unsigned int *sse, int *sum);
-
-#if CONFIG_VP9_HIGHBITDEPTH
-void highbd_variance(const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride,
- int w, int h,
- unsigned int *sse, int *sum);
-
-void highbd_10_variance(const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride,
- int w, int h,
- unsigned int *sse, int *sum);
-
-void highbd_12_variance(const uint8_t *a8, int a_stride,
- const uint8_t *b8, int b_stride,
- int w, int h,
- unsigned int *sse, int *sum);
-#endif
-
typedef unsigned int(*vp9_sad_fn_t)(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
@@ -95,15 +74,6 @@ typedef struct vp9_variance_vtable {
vp9_sad_multi_d_fn_t sdx4df;
} vp9_variance_fn_ptr_t;
-void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
- int height, const uint8_t *ref, int ref_stride);
-
-#if CONFIG_VP9_HIGHBITDEPTH
-void vp9_highbd_comp_avg_pred(uint16_t *comp_pred, const uint8_t *pred,
- int width, int height,
- const uint8_t *ref, int ref_stride);
-#endif
-
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_writer.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_writer.h
index 9d161f95cf6..e347ea41441 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_writer.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_writer.h
@@ -19,7 +19,7 @@
extern "C" {
#endif
-typedef struct {
+typedef struct vp9_writer {
unsigned int lowvalue;
unsigned int range;
int count;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_avg_intrin_sse2.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_avg_intrin_sse2.c
index 4672aa6b8cf..4531d794a9c 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_avg_intrin_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_avg_intrin_sse2.c
@@ -9,6 +9,8 @@
*/
#include <emmintrin.h>
+
+#include "./vp9_rtcd.h"
#include "vpx_ports/mem.h"
void vp9_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp,
@@ -262,17 +264,18 @@ void vp9_hadamard_16x16_sse2(int16_t const *src_diff, int src_stride,
__m128i b2 = _mm_add_epi16(coeff2, coeff3);
__m128i b3 = _mm_sub_epi16(coeff2, coeff3);
+ b0 = _mm_srai_epi16(b0, 1);
+ b1 = _mm_srai_epi16(b1, 1);
+ b2 = _mm_srai_epi16(b2, 1);
+ b3 = _mm_srai_epi16(b3, 1);
+
coeff0 = _mm_add_epi16(b0, b2);
coeff1 = _mm_add_epi16(b1, b3);
- coeff0 = _mm_srai_epi16(coeff0, 1);
- coeff1 = _mm_srai_epi16(coeff1, 1);
_mm_store_si128((__m128i *)coeff, coeff0);
_mm_store_si128((__m128i *)(coeff + 64), coeff1);
coeff2 = _mm_sub_epi16(b0, b2);
coeff3 = _mm_sub_epi16(b1, b3);
- coeff2 = _mm_srai_epi16(coeff2, 1);
- coeff3 = _mm_srai_epi16(coeff3, 1);
_mm_store_si128((__m128i *)(coeff + 128), coeff2);
_mm_store_si128((__m128i *)(coeff + 192), coeff3);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_avx2.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_avx2_impl.h
index 66827ad8037..ae6bfe5fa2d 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_avx2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_avx2_impl.h
@@ -9,6 +9,8 @@
*/
#include <immintrin.h> // AVX2
+
+#include "./vp9_rtcd.h"
#include "vp9/common/vp9_idct.h" // for cospi constants
#include "vpx_ports/mem.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_sse2.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h
index 099993aa6a0..003ebd13fe3 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct32x32_sse2_impl.h
@@ -9,6 +9,8 @@
*/
#include <emmintrin.h> // SSE2
+
+#include "./vp9_rtcd.h"
#include "vp9/common/vp9_idct.h" // for cospi constants
#include "vp9/encoder/x86/vp9_dct_sse2.h"
#include "vp9/encoder/vp9_dct.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_avx2.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_avx2.c
index 3a19f52746c..8f3b61ad86d 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_avx2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_avx2.c
@@ -15,12 +15,12 @@
#define FDCT32x32_2D_AVX2 vp9_fdct32x32_rd_avx2
#define FDCT32x32_HIGH_PRECISION 0
-#include "vp9/encoder/x86/vp9_dct32x32_avx2.c"
+#include "vp9/encoder/x86/vp9_dct32x32_avx2_impl.h"
#undef FDCT32x32_2D_AVX2
#undef FDCT32x32_HIGH_PRECISION
#define FDCT32x32_2D_AVX2 vp9_fdct32x32_avx2
#define FDCT32x32_HIGH_PRECISION 1
-#include "vp9/encoder/x86/vp9_dct32x32_avx2.c" // NOLINT
+#include "vp9/encoder/x86/vp9_dct32x32_avx2_impl.h" // NOLINT
#undef FDCT32x32_2D_AVX2
#undef FDCT32x32_HIGH_PRECISION
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c
index 564b7955e5b..cff4fcbdce0 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_sse2.c
@@ -10,6 +10,8 @@
#include <assert.h>
#include <emmintrin.h> // SSE2
+
+#include "./vp9_rtcd.h"
#include "vp9/common/vp9_idct.h" // for cospi constants
#include "vp9/encoder/vp9_dct.h"
#include "vp9/encoder/x86/vp9_dct_sse2.h"
@@ -96,7 +98,7 @@ static INLINE void transpose_4x4(__m128i *res) {
res[3] = _mm_unpackhi_epi64(res[2], res[2]);
}
-void fdct4_sse2(__m128i *in) {
+static void fdct4_sse2(__m128i *in) {
const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64);
@@ -129,7 +131,7 @@ void fdct4_sse2(__m128i *in) {
transpose_4x4(in);
}
-void fadst4_sse2(__m128i *in) {
+static void fadst4_sse2(__m128i *in) {
const __m128i k__sinpi_p01_p02 = pair_set_epi16(sinpi_1_9, sinpi_2_9);
const __m128i k__sinpi_p04_m01 = pair_set_epi16(sinpi_4_9, -sinpi_1_9);
const __m128i k__sinpi_p03_p04 = pair_set_epi16(sinpi_3_9, sinpi_4_9);
@@ -831,7 +833,7 @@ static INLINE void array_transpose_8x8(__m128i *in, __m128i *res) {
// 07 17 27 37 47 57 67 77
}
-void fdct8_sse2(__m128i *in) {
+static void fdct8_sse2(__m128i *in) {
// constants
const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64);
@@ -971,7 +973,7 @@ void fdct8_sse2(__m128i *in) {
array_transpose_8x8(in, in);
}
-void fadst8_sse2(__m128i *in) {
+static void fadst8_sse2(__m128i *in) {
// Constants
const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64);
const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64);
@@ -1353,7 +1355,7 @@ static INLINE void right_shift_16x16(__m128i *res0, __m128i *res1) {
right_shift_8x8(res1 + 8, 2);
}
-void fdct16_8col(__m128i *in) {
+static void fdct16_8col(__m128i *in) {
// perform 16x16 1-D DCT for 8 columns
__m128i i[8], s[8], p[8], t[8], u[16], v[16];
const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64);
@@ -1675,7 +1677,7 @@ void fdct16_8col(__m128i *in) {
in[15] = _mm_packs_epi32(v[14], v[15]);
}
-void fadst16_8col(__m128i *in) {
+static void fadst16_8col(__m128i *in) {
// perform 16x16 1-D ADST for 8 columns
__m128i s[16], x[16], u[32], v[32];
const __m128i k__cospi_p01_p31 = pair_set_epi16(cospi_1_64, cospi_31_64);
@@ -2145,13 +2147,13 @@ void fadst16_8col(__m128i *in) {
in[15] = _mm_sub_epi16(kZero, s[1]);
}
-void fdct16_sse2(__m128i *in0, __m128i *in1) {
+static void fdct16_sse2(__m128i *in0, __m128i *in1) {
fdct16_8col(in0);
fdct16_8col(in1);
array_transpose_16x16(in0, in1);
}
-void fadst16_sse2(__m128i *in0, __m128i *in1) {
+static void fadst16_sse2(__m128i *in0, __m128i *in1) {
fadst16_8col(in0);
fadst16_8col(in1);
array_transpose_16x16(in0, in1);
@@ -2334,7 +2336,7 @@ void vp9_highbd_fht8x8_sse2(const int16_t *input, tran_low_t *output,
}
}
-void vp9_highbd_fht16x16_sse2(int16_t *input, tran_low_t *output,
+void vp9_highbd_fht16x16_sse2(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
if (tx_type == DCT_DCT) {
vp9_highbd_fdct16x16_sse2(input, output, stride);
@@ -2368,8 +2370,8 @@ void vp9_highbd_fht16x16_sse2(int16_t *input, tran_low_t *output,
/*
* The DCTnxn functions are defined using the macros below. The main code for
- * them is in separate files (vp9/encoder/x86/vp9_dct_impl_sse2.c &
- * vp9/encoder/x86/vp9_dct32x32_sse2.c) which are used by both the 8 bit code
+ * them is in separate files (vp9/encoder/x86/vp9_dct_sse2_impl.h &
+ * vp9/encoder/x86/vp9_dct32x32_sse2_impl.h) which are used by both the 8 bit code
* and the high bit depth code.
*/
@@ -2378,20 +2380,20 @@ void vp9_highbd_fht16x16_sse2(int16_t *input, tran_low_t *output,
#define FDCT4x4_2D vp9_fdct4x4_sse2
#define FDCT8x8_2D vp9_fdct8x8_sse2
#define FDCT16x16_2D vp9_fdct16x16_sse2
-#include "vp9/encoder/x86/vp9_dct_impl_sse2.c"
+#include "vp9/encoder/x86/vp9_dct_sse2_impl.h"
#undef FDCT4x4_2D
#undef FDCT8x8_2D
#undef FDCT16x16_2D
#define FDCT32x32_2D vp9_fdct32x32_rd_sse2
#define FDCT32x32_HIGH_PRECISION 0
-#include "vp9/encoder/x86/vp9_dct32x32_sse2.c"
+#include "vp9/encoder/x86/vp9_dct32x32_sse2_impl.h"
#undef FDCT32x32_2D
#undef FDCT32x32_HIGH_PRECISION
#define FDCT32x32_2D vp9_fdct32x32_sse2
#define FDCT32x32_HIGH_PRECISION 1
-#include "vp9/encoder/x86/vp9_dct32x32_sse2.c" // NOLINT
+#include "vp9/encoder/x86/vp9_dct32x32_sse2_impl.h" // NOLINT
#undef FDCT32x32_2D
#undef FDCT32x32_HIGH_PRECISION
@@ -2405,20 +2407,20 @@ void vp9_highbd_fht16x16_sse2(int16_t *input, tran_low_t *output,
#define FDCT4x4_2D vp9_highbd_fdct4x4_sse2
#define FDCT8x8_2D vp9_highbd_fdct8x8_sse2
#define FDCT16x16_2D vp9_highbd_fdct16x16_sse2
-#include "vp9/encoder/x86/vp9_dct_impl_sse2.c" // NOLINT
+#include "vp9/encoder/x86/vp9_dct_sse2_impl.h" // NOLINT
#undef FDCT4x4_2D
#undef FDCT8x8_2D
#undef FDCT16x16_2D
#define FDCT32x32_2D vp9_highbd_fdct32x32_rd_sse2
#define FDCT32x32_HIGH_PRECISION 0
-#include "vp9/encoder/x86/vp9_dct32x32_sse2.c" // NOLINT
+#include "vp9/encoder/x86/vp9_dct32x32_sse2_impl.h" // NOLINT
#undef FDCT32x32_2D
#undef FDCT32x32_HIGH_PRECISION
#define FDCT32x32_2D vp9_highbd_fdct32x32_sse2
#define FDCT32x32_HIGH_PRECISION 1
-#include "vp9/encoder/x86/vp9_dct32x32_sse2.c" // NOLINT
+#include "vp9/encoder/x86/vp9_dct32x32_sse2_impl.h" // NOLINT
#undef FDCT32x32_2D
#undef FDCT32x32_HIGH_PRECISION
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_impl_sse2.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_sse2_impl.h
index e03a76d2e89..11bf5a25e62 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_impl_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_sse2_impl.h
@@ -9,6 +9,8 @@
*/
#include <emmintrin.h> // SSE2
+
+#include "./vp9_rtcd.h"
#include "vp9/common/vp9_idct.h" // for cospi constants
#include "vp9/encoder/vp9_dct.h"
#include "vp9/encoder/x86/vp9_dct_sse2.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c
index 1c1005aeeda..96038fee16b 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c
@@ -15,6 +15,8 @@
#include <math.h>
#endif
#include <tmmintrin.h> // SSSE3
+
+#include "./vp9_rtcd.h"
#include "vp9/common/x86/vp9_idct_intrin_sse2.h"
void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_error_intrin_avx2.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_error_intrin_avx2.c
index c67490fad34..dfebaab0ac6 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_error_intrin_avx2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_error_intrin_avx2.c
@@ -9,8 +9,9 @@
*/
#include <immintrin.h> // AVX2
-#include "vpx/vpx_integer.h"
+#include "./vp9_rtcd.h"
+#include "vpx/vpx_integer.h"
int64_t vp9_block_error_avx2(const int16_t *coeff,
const int16_t *dqcoeff,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c
index ffa43b65a59..cbdd1c93e1c 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c
@@ -10,6 +10,7 @@
#include <emmintrin.h>
+#include "vpx_ports/mem.h"
#include "vp9/common/vp9_common.h"
#if CONFIG_VP9_HIGHBITDEPTH
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_highbd_subpel_variance.asm b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_highbd_subpel_variance.asm
index 987729f962c..4594bb1aabd 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_highbd_subpel_variance.asm
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_highbd_subpel_variance.asm
@@ -14,35 +14,19 @@ SECTION_RODATA
pw_8: times 8 dw 8
bilin_filter_m_sse2: times 8 dw 16
times 8 dw 0
- times 8 dw 15
- times 8 dw 1
times 8 dw 14
times 8 dw 2
- times 8 dw 13
- times 8 dw 3
times 8 dw 12
times 8 dw 4
- times 8 dw 11
- times 8 dw 5
times 8 dw 10
times 8 dw 6
- times 8 dw 9
- times 8 dw 7
times 16 dw 8
- times 8 dw 7
- times 8 dw 9
times 8 dw 6
times 8 dw 10
- times 8 dw 5
- times 8 dw 11
times 8 dw 4
times 8 dw 12
- times 8 dw 3
- times 8 dw 13
times 8 dw 2
times 8 dw 14
- times 8 dw 1
- times 8 dw 15
SECTION .text
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_highbd_variance_sse2.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_highbd_variance_sse2.c
index 4bc3e7e2d15..29b7b278217 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_highbd_variance_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_highbd_variance_sse2.c
@@ -13,237 +13,6 @@
#include "vp9/encoder/vp9_variance.h"
#include "vpx_ports/mem.h"
-typedef uint32_t (*high_variance_fn_t) (const uint16_t *src, int src_stride,
- const uint16_t *ref, int ref_stride,
- uint32_t *sse, int *sum);
-
-uint32_t vp9_highbd_calc8x8var_sse2(const uint16_t *src, int src_stride,
- const uint16_t *ref, int ref_stride,
- uint32_t *sse, int *sum);
-
-uint32_t vp9_highbd_calc16x16var_sse2(const uint16_t *src, int src_stride,
- const uint16_t *ref, int ref_stride,
- uint32_t *sse, int *sum);
-
-static void highbd_variance_sse2(const uint16_t *src, int src_stride,
- const uint16_t *ref, int ref_stride,
- int w, int h, uint32_t *sse, int *sum,
- high_variance_fn_t var_fn, int block_size) {
- int i, j;
-
- *sse = 0;
- *sum = 0;
-
- for (i = 0; i < h; i += block_size) {
- for (j = 0; j < w; j += block_size) {
- unsigned int sse0;
- int sum0;
- var_fn(src + src_stride * i + j, src_stride,
- ref + ref_stride * i + j, ref_stride, &sse0, &sum0);
- *sse += sse0;
- *sum += sum0;
- }
- }
-}
-
-static void highbd_10_variance_sse2(const uint16_t *src, int src_stride,
- const uint16_t *ref, int ref_stride,
- int w, int h, uint32_t *sse, int *sum,
- high_variance_fn_t var_fn, int block_size) {
- int i, j;
- uint64_t sse_long = 0;
- int64_t sum_long = 0;
-
- for (i = 0; i < h; i += block_size) {
- for (j = 0; j < w; j += block_size) {
- unsigned int sse0;
- int sum0;
- var_fn(src + src_stride * i + j, src_stride,
- ref + ref_stride * i + j, ref_stride, &sse0, &sum0);
- sse_long += sse0;
- sum_long += sum0;
- }
- }
- *sum = ROUND_POWER_OF_TWO(sum_long, 2);
- *sse = ROUND_POWER_OF_TWO(sse_long, 4);
-}
-
-static void highbd_12_variance_sse2(const uint16_t *src, int src_stride,
- const uint16_t *ref, int ref_stride,
- int w, int h, uint32_t *sse, int *sum,
- high_variance_fn_t var_fn, int block_size) {
- int i, j;
- uint64_t sse_long = 0;
- int64_t sum_long = 0;
-
- for (i = 0; i < h; i += block_size) {
- for (j = 0; j < w; j += block_size) {
- unsigned int sse0;
- int sum0;
- var_fn(src + src_stride * i + j, src_stride,
- ref + ref_stride * i + j, ref_stride, &sse0, &sum0);
- sse_long += sse0;
- sum_long += sum0;
- }
- }
- *sum = ROUND_POWER_OF_TWO(sum_long, 4);
- *sse = ROUND_POWER_OF_TWO(sse_long, 8);
-}
-
-
-#define HIGH_GET_VAR(S) \
-void vp9_highbd_get##S##x##S##var_sse2(const uint8_t *src8, int src_stride, \
- const uint8_t *ref8, int ref_stride, \
- uint32_t *sse, int *sum) { \
- uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
- vp9_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, \
- sse, sum); \
-} \
-\
-void vp9_highbd_10_get##S##x##S##var_sse2(const uint8_t *src8, int src_stride, \
- const uint8_t *ref8, int ref_stride, \
- uint32_t *sse, int *sum) { \
- uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
- vp9_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, \
- sse, sum); \
- *sum = ROUND_POWER_OF_TWO(*sum, 2); \
- *sse = ROUND_POWER_OF_TWO(*sse, 4); \
-} \
-\
-void vp9_highbd_12_get##S##x##S##var_sse2(const uint8_t *src8, int src_stride, \
- const uint8_t *ref8, int ref_stride, \
- uint32_t *sse, int *sum) { \
- uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
- vp9_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, \
- sse, sum); \
- *sum = ROUND_POWER_OF_TWO(*sum, 4); \
- *sse = ROUND_POWER_OF_TWO(*sse, 8); \
-}
-
-HIGH_GET_VAR(16);
-HIGH_GET_VAR(8);
-
-#undef HIGH_GET_VAR
-
-#define VAR_FN(w, h, block_size, shift) \
-uint32_t vp9_highbd_variance##w##x##h##_sse2( \
- const uint8_t *src8, int src_stride, \
- const uint8_t *ref8, int ref_stride, uint32_t *sse) { \
- int sum; \
- uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
- highbd_variance_sse2(src, src_stride, ref, ref_stride, w, h, sse, &sum, \
- vp9_highbd_calc##block_size##x##block_size##var_sse2, \
- block_size); \
- return *sse - (((int64_t)sum * sum) >> shift); \
-} \
-\
-uint32_t vp9_highbd_10_variance##w##x##h##_sse2( \
- const uint8_t *src8, int src_stride, \
- const uint8_t *ref8, int ref_stride, uint32_t *sse) { \
- int sum; \
- uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
- highbd_10_variance_sse2( \
- src, src_stride, ref, ref_stride, w, h, sse, &sum, \
- vp9_highbd_calc##block_size##x##block_size##var_sse2, block_size); \
- return *sse - (((int64_t)sum * sum) >> shift); \
-} \
-\
-uint32_t vp9_highbd_12_variance##w##x##h##_sse2( \
- const uint8_t *src8, int src_stride, \
- const uint8_t *ref8, int ref_stride, uint32_t *sse) { \
- int sum; \
- uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
- highbd_12_variance_sse2( \
- src, src_stride, ref, ref_stride, w, h, sse, &sum, \
- vp9_highbd_calc##block_size##x##block_size##var_sse2, block_size); \
- return *sse - (((int64_t)sum * sum) >> shift); \
-}
-
-VAR_FN(64, 64, 16, 12);
-VAR_FN(64, 32, 16, 11);
-VAR_FN(32, 64, 16, 11);
-VAR_FN(32, 32, 16, 10);
-VAR_FN(32, 16, 16, 9);
-VAR_FN(16, 32, 16, 9);
-VAR_FN(16, 16, 16, 8);
-VAR_FN(16, 8, 8, 7);
-VAR_FN(8, 16, 8, 7);
-VAR_FN(8, 8, 8, 6);
-
-#undef VAR_FN
-
-unsigned int vp9_highbd_mse16x16_sse2(const uint8_t *src8, int src_stride,
- const uint8_t *ref8, int ref_stride,
- unsigned int *sse) {
- int sum;
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
- highbd_variance_sse2(src, src_stride, ref, ref_stride, 16, 16,
- sse, &sum, vp9_highbd_calc16x16var_sse2, 16);
- return *sse;
-}
-
-unsigned int vp9_highbd_10_mse16x16_sse2(const uint8_t *src8, int src_stride,
- const uint8_t *ref8, int ref_stride,
- unsigned int *sse) {
- int sum;
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
- highbd_10_variance_sse2(src, src_stride, ref, ref_stride, 16, 16,
- sse, &sum, vp9_highbd_calc16x16var_sse2, 16);
- return *sse;
-}
-
-unsigned int vp9_highbd_12_mse16x16_sse2(const uint8_t *src8, int src_stride,
- const uint8_t *ref8, int ref_stride,
- unsigned int *sse) {
- int sum;
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
- highbd_12_variance_sse2(src, src_stride, ref, ref_stride, 16, 16,
- sse, &sum, vp9_highbd_calc16x16var_sse2, 16);
- return *sse;
-}
-
-unsigned int vp9_highbd_mse8x8_sse2(const uint8_t *src8, int src_stride,
- const uint8_t *ref8, int ref_stride,
- unsigned int *sse) {
- int sum;
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
- highbd_variance_sse2(src, src_stride, ref, ref_stride, 8, 8,
- sse, &sum, vp9_highbd_calc8x8var_sse2, 8);
- return *sse;
-}
-
-unsigned int vp9_highbd_10_mse8x8_sse2(const uint8_t *src8, int src_stride,
- const uint8_t *ref8, int ref_stride,
- unsigned int *sse) {
- int sum;
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
- highbd_10_variance_sse2(src, src_stride, ref, ref_stride, 8, 8,
- sse, &sum, vp9_highbd_calc8x8var_sse2, 8);
- return *sse;
-}
-
-unsigned int vp9_highbd_12_mse8x8_sse2(const uint8_t *src8, int src_stride,
- const uint8_t *ref8, int ref_stride,
- unsigned int *sse) {
- int sum;
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
- uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
- highbd_12_variance_sse2(src, src_stride, ref, ref_stride, 8, 8,
- sse, &sum, vp9_highbd_calc8x8var_sse2, 8);
- return *sse;
-}
-
#define DECL(w, opt) \
int vp9_highbd_sub_pixel_variance##w##xh_##opt(const uint16_t *src, \
ptrdiff_t src_stride, \
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c
index 00abd3c4962..71fdfd71624 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c
@@ -11,6 +11,7 @@
#include <emmintrin.h>
#include <xmmintrin.h>
+#include "./vp9_rtcd.h"
#include "vpx/vpx_integer.h"
void vp9_quantize_b_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_subpel_variance.asm b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_subpel_variance.asm
index 06b8b034a5e..292cf34d1a2 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_subpel_variance.asm
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_subpel_variance.asm
@@ -14,52 +14,28 @@ SECTION_RODATA
pw_8: times 8 dw 8
bilin_filter_m_sse2: times 8 dw 16
times 8 dw 0
- times 8 dw 15
- times 8 dw 1
times 8 dw 14
times 8 dw 2
- times 8 dw 13
- times 8 dw 3
times 8 dw 12
times 8 dw 4
- times 8 dw 11
- times 8 dw 5
times 8 dw 10
times 8 dw 6
- times 8 dw 9
- times 8 dw 7
times 16 dw 8
- times 8 dw 7
- times 8 dw 9
times 8 dw 6
times 8 dw 10
- times 8 dw 5
- times 8 dw 11
times 8 dw 4
times 8 dw 12
- times 8 dw 3
- times 8 dw 13
times 8 dw 2
times 8 dw 14
- times 8 dw 1
- times 8 dw 15
bilin_filter_m_ssse3: times 8 db 16, 0
- times 8 db 15, 1
times 8 db 14, 2
- times 8 db 13, 3
times 8 db 12, 4
- times 8 db 11, 5
times 8 db 10, 6
- times 8 db 9, 7
times 16 db 8
- times 8 db 7, 9
times 8 db 6, 10
- times 8 db 5, 11
times 8 db 4, 12
- times 8 db 3, 13
times 8 db 2, 14
- times 8 db 1, 15
SECTION .text
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
index a441cadaf70..b1c79752076 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
@@ -9,42 +9,28 @@
*/
#include <immintrin.h> // AVX2
+
+#include "./vp9_rtcd.h"
#include "vpx_ports/mem.h"
#include "vp9/encoder/vp9_variance.h"
DECLARE_ALIGNED(32, static const uint8_t, bilinear_filters_avx2[512]) = {
16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0,
16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0,
- 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1,
- 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1,
14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2,
14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2,
- 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3,
- 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3,
12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4,
12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4,
- 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5,
- 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5,
10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6,
10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6,
- 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7,
- 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
- 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9,
- 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9,
6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10,
6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10,
- 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11,
- 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11,
4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12,
4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12,
- 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13,
- 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13,
2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14,
2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14,
- 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15,
- 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15
};
#define FILTER_SRC(filter) \
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_avx2.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_avx2.c
index ea09b959e12..8cd071de5e2 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_avx2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_avx2.c
@@ -7,23 +7,12 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
+#include "./vp9_rtcd.h"
#include "./vpx_config.h"
#include "vp9/encoder/vp9_variance.h"
#include "vpx_ports/mem.h"
-typedef void (*get_var_avx2)(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse, int *sum);
-
-void vp9_get16x16var_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse, int *sum);
-
-void vp9_get32x32var_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse, int *sum);
-
unsigned int vp9_sub_pixel_variance32xh_avx2(const uint8_t *src, int src_stride,
int x_offset, int y_offset,
const uint8_t *dst, int dst_stride,
@@ -41,81 +30,6 @@ unsigned int vp9_sub_pixel_avg_variance32xh_avx2(const uint8_t *src,
int height,
unsigned int *sseptr);
-static void variance_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- int w, int h, unsigned int *sse, int *sum,
- get_var_avx2 var_fn, int block_size) {
- int i, j;
-
- *sse = 0;
- *sum = 0;
-
- for (i = 0; i < h; i += 16) {
- for (j = 0; j < w; j += block_size) {
- unsigned int sse0;
- int sum0;
- var_fn(&src[src_stride * i + j], src_stride,
- &ref[ref_stride * i + j], ref_stride, &sse0, &sum0);
- *sse += sse0;
- *sum += sum0;
- }
- }
-}
-
-
-unsigned int vp9_variance16x16_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse) {
- int sum;
- variance_avx2(src, src_stride, ref, ref_stride, 16, 16,
- sse, &sum, vp9_get16x16var_avx2, 16);
- return *sse - (((unsigned int)sum * sum) >> 8);
-}
-
-unsigned int vp9_mse16x16_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse) {
- int sum;
- vp9_get16x16var_avx2(src, src_stride, ref, ref_stride, sse, &sum);
- return *sse;
-}
-
-unsigned int vp9_variance32x16_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse) {
- int sum;
- variance_avx2(src, src_stride, ref, ref_stride, 32, 16,
- sse, &sum, vp9_get32x32var_avx2, 32);
- return *sse - (((int64_t)sum * sum) >> 9);
-}
-
-unsigned int vp9_variance32x32_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse) {
- int sum;
- variance_avx2(src, src_stride, ref, ref_stride, 32, 32,
- sse, &sum, vp9_get32x32var_avx2, 32);
- return *sse - (((int64_t)sum * sum) >> 10);
-}
-
-unsigned int vp9_variance64x64_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse) {
- int sum;
- variance_avx2(src, src_stride, ref, ref_stride, 64, 64,
- sse, &sum, vp9_get32x32var_avx2, 32);
- return *sse - (((int64_t)sum * sum) >> 12);
-}
-
-unsigned int vp9_variance64x32_avx2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse) {
- int sum;
- variance_avx2(src, src_stride, ref, ref_stride, 64, 32,
- sse, &sum, vp9_get32x32var_avx2, 32);
- return *sse - (((int64_t)sum * sum) >> 11);
-}
-
unsigned int vp9_sub_pixel_variance64x64_avx2(const uint8_t *src,
int src_stride,
int x_offset,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_sse2.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_sse2.c
index 8490bbbdc2e..961efe34ee6 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_sse2.c
@@ -10,310 +10,12 @@
#include <emmintrin.h> // SSE2
+#include "./vp9_rtcd.h"
#include "./vpx_config.h"
#include "vp9/encoder/vp9_variance.h"
#include "vpx_ports/mem.h"
-typedef unsigned int (*variance_fn_t) (const unsigned char *src, int src_stride,
- const unsigned char *ref, int ref_stride,
- unsigned int *sse, int *sum);
-
-unsigned int vp9_get_mb_ss_sse2(const int16_t *src) {
- __m128i vsum = _mm_setzero_si128();
- int i;
-
- for (i = 0; i < 32; ++i) {
- const __m128i v = _mm_loadu_si128((const __m128i *)src);
- vsum = _mm_add_epi32(vsum, _mm_madd_epi16(v, v));
- src += 8;
- }
-
- vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 8));
- vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 4));
- return _mm_cvtsi128_si32(vsum);
-}
-
-#define READ64(p, stride, i) \
- _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const uint32_t *)(p + i * stride)), \
- _mm_cvtsi32_si128(*(const uint32_t *)(p + (i + 1) * stride)))
-
-unsigned int vp9_get4x4var_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse, int *sum) {
- const __m128i zero = _mm_setzero_si128();
- const __m128i src0 = _mm_unpacklo_epi8(READ64(src, src_stride, 0), zero);
- const __m128i src1 = _mm_unpacklo_epi8(READ64(src, src_stride, 2), zero);
- const __m128i ref0 = _mm_unpacklo_epi8(READ64(ref, ref_stride, 0), zero);
- const __m128i ref1 = _mm_unpacklo_epi8(READ64(ref, ref_stride, 2), zero);
- const __m128i diff0 = _mm_sub_epi16(src0, ref0);
- const __m128i diff1 = _mm_sub_epi16(src1, ref1);
-
- // sum
- __m128i vsum = _mm_add_epi16(diff0, diff1);
- vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8));
- vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4));
- vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 2));
- *sum = (int16_t)_mm_extract_epi16(vsum, 0);
-
- // sse
- vsum = _mm_add_epi32(_mm_madd_epi16(diff0, diff0),
- _mm_madd_epi16(diff1, diff1));
- vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 8));
- vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 4));
- *sse = _mm_cvtsi128_si32(vsum);
-
- return 0;
-}
-
-unsigned int vp9_get8x8var_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse, int *sum) {
- const __m128i zero = _mm_setzero_si128();
- __m128i vsum = _mm_setzero_si128();
- __m128i vsse = _mm_setzero_si128();
- int i;
-
- for (i = 0; i < 8; i += 2) {
- const __m128i src0 = _mm_unpacklo_epi8(_mm_loadl_epi64(
- (const __m128i *)(src + i * src_stride)), zero);
- const __m128i ref0 = _mm_unpacklo_epi8(_mm_loadl_epi64(
- (const __m128i *)(ref + i * ref_stride)), zero);
- const __m128i diff0 = _mm_sub_epi16(src0, ref0);
-
- const __m128i src1 = _mm_unpacklo_epi8(_mm_loadl_epi64(
- (const __m128i *)(src + (i + 1) * src_stride)), zero);
- const __m128i ref1 = _mm_unpacklo_epi8(_mm_loadl_epi64(
- (const __m128i *)(ref + (i + 1) * ref_stride)), zero);
- const __m128i diff1 = _mm_sub_epi16(src1, ref1);
-
- vsum = _mm_add_epi16(vsum, diff0);
- vsum = _mm_add_epi16(vsum, diff1);
- vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff0, diff0));
- vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff1, diff1));
- }
-
- // sum
- vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8));
- vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4));
- vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 2));
- *sum = (int16_t)_mm_extract_epi16(vsum, 0);
-
- // sse
- vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 8));
- vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 4));
- *sse = _mm_cvtsi128_si32(vsse);
-
- return 0;
-}
-
-unsigned int vp9_get16x16var_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse, int *sum) {
- const __m128i zero = _mm_setzero_si128();
- __m128i vsum = _mm_setzero_si128();
- __m128i vsse = _mm_setzero_si128();
- int i;
-
- for (i = 0; i < 16; ++i) {
- const __m128i s = _mm_loadu_si128((const __m128i *)src);
- const __m128i r = _mm_loadu_si128((const __m128i *)ref);
-
- const __m128i src0 = _mm_unpacklo_epi8(s, zero);
- const __m128i ref0 = _mm_unpacklo_epi8(r, zero);
- const __m128i diff0 = _mm_sub_epi16(src0, ref0);
-
- const __m128i src1 = _mm_unpackhi_epi8(s, zero);
- const __m128i ref1 = _mm_unpackhi_epi8(r, zero);
- const __m128i diff1 = _mm_sub_epi16(src1, ref1);
-
- vsum = _mm_add_epi16(vsum, diff0);
- vsum = _mm_add_epi16(vsum, diff1);
- vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff0, diff0));
- vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff1, diff1));
-
- src += src_stride;
- ref += ref_stride;
- }
-
- // sum
- vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8));
- vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4));
- *sum = (int16_t)_mm_extract_epi16(vsum, 0) +
- (int16_t)_mm_extract_epi16(vsum, 1);
-
- // sse
- vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 8));
- vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 4));
- *sse = _mm_cvtsi128_si32(vsse);
-
- return 0;
-}
-
-
-static void variance_sse2(const unsigned char *src, int src_stride,
- const unsigned char *ref, int ref_stride,
- int w, int h, unsigned int *sse, int *sum,
- variance_fn_t var_fn, int block_size) {
- int i, j;
-
- *sse = 0;
- *sum = 0;
-
- for (i = 0; i < h; i += block_size) {
- for (j = 0; j < w; j += block_size) {
- unsigned int sse0;
- int sum0;
- var_fn(src + src_stride * i + j, src_stride,
- ref + ref_stride * i + j, ref_stride, &sse0, &sum0);
- *sse += sse0;
- *sum += sum0;
- }
- }
-}
-
-unsigned int vp9_variance4x4_sse2(const unsigned char *src, int src_stride,
- const unsigned char *ref, int ref_stride,
- unsigned int *sse) {
- int sum;
- vp9_get4x4var_sse2(src, src_stride, ref, ref_stride, sse, &sum);
- return *sse - (((unsigned int)sum * sum) >> 4);
-}
-
-unsigned int vp9_variance8x4_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse) {
- int sum;
- variance_sse2(src, src_stride, ref, ref_stride, 8, 4,
- sse, &sum, vp9_get4x4var_sse2, 4);
- return *sse - (((unsigned int)sum * sum) >> 5);
-}
-
-unsigned int vp9_variance4x8_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse) {
- int sum;
- variance_sse2(src, src_stride, ref, ref_stride, 4, 8,
- sse, &sum, vp9_get4x4var_sse2, 4);
- return *sse - (((unsigned int)sum * sum) >> 5);
-}
-
-unsigned int vp9_variance8x8_sse2(const unsigned char *src, int src_stride,
- const unsigned char *ref, int ref_stride,
- unsigned int *sse) {
- int sum;
- vp9_get8x8var_sse2(src, src_stride, ref, ref_stride, sse, &sum);
- return *sse - (((unsigned int)sum * sum) >> 6);
-}
-
-unsigned int vp9_variance16x8_sse2(const unsigned char *src, int src_stride,
- const unsigned char *ref, int ref_stride,
- unsigned int *sse) {
- int sum;
- variance_sse2(src, src_stride, ref, ref_stride, 16, 8,
- sse, &sum, vp9_get8x8var_sse2, 8);
- return *sse - (((unsigned int)sum * sum) >> 7);
-}
-
-unsigned int vp9_variance8x16_sse2(const unsigned char *src, int src_stride,
- const unsigned char *ref, int ref_stride,
- unsigned int *sse) {
- int sum;
- variance_sse2(src, src_stride, ref, ref_stride, 8, 16,
- sse, &sum, vp9_get8x8var_sse2, 8);
- return *sse - (((unsigned int)sum * sum) >> 7);
-}
-
-unsigned int vp9_variance16x16_sse2(const unsigned char *src, int src_stride,
- const unsigned char *ref, int ref_stride,
- unsigned int *sse) {
- int sum;
- vp9_get16x16var_sse2(src, src_stride, ref, ref_stride, sse, &sum);
- return *sse - (((unsigned int)sum * sum) >> 8);
-}
-
-unsigned int vp9_variance32x32_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse) {
- int sum;
- variance_sse2(src, src_stride, ref, ref_stride, 32, 32,
- sse, &sum, vp9_get16x16var_sse2, 16);
- return *sse - (((int64_t)sum * sum) >> 10);
-}
-
-unsigned int vp9_variance32x16_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse) {
- int sum;
- variance_sse2(src, src_stride, ref, ref_stride, 32, 16,
- sse, &sum, vp9_get16x16var_sse2, 16);
- return *sse - (((int64_t)sum * sum) >> 9);
-}
-
-unsigned int vp9_variance16x32_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse) {
- int sum;
- variance_sse2(src, src_stride, ref, ref_stride, 16, 32,
- sse, &sum, vp9_get16x16var_sse2, 16);
- return *sse - (((int64_t)sum * sum) >> 9);
-}
-
-unsigned int vp9_variance64x64_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse) {
- int sum;
- variance_sse2(src, src_stride, ref, ref_stride, 64, 64,
- sse, &sum, vp9_get16x16var_sse2, 16);
- return *sse - (((int64_t)sum * sum) >> 12);
-}
-
-unsigned int vp9_variance64x32_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse) {
- int sum;
- variance_sse2(src, src_stride, ref, ref_stride, 64, 32,
- sse, &sum, vp9_get16x16var_sse2, 16);
- return *sse - (((int64_t)sum * sum) >> 11);
-}
-
-unsigned int vp9_variance32x64_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse) {
- int sum;
- variance_sse2(src, src_stride, ref, ref_stride, 32, 64,
- sse, &sum, vp9_get16x16var_sse2, 16);
- return *sse - (((int64_t)sum * sum) >> 11);
-}
-
-unsigned int vp9_mse8x8_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse) {
- vp9_variance8x8_sse2(src, src_stride, ref, ref_stride, sse);
- return *sse;
-}
-
-unsigned int vp9_mse8x16_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse) {
- vp9_variance8x16_sse2(src, src_stride, ref, ref_stride, sse);
- return *sse;
-}
-
-unsigned int vp9_mse16x8_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse) {
- vp9_variance16x8_sse2(src, src_stride, ref, ref_stride, sse);
- return *sse;
-}
-
-unsigned int vp9_mse16x16_sse2(const uint8_t *src, int src_stride,
- const uint8_t *ref, int ref_stride,
- unsigned int *sse) {
- vp9_variance16x16_sse2(src, src_stride, ref, ref_stride, sse);
- return *sse;
-}
-
// The 2 unused parameters are place holders for PIC enabled build.
#define DECL(w, opt) \
int vp9_sub_pixel_variance##w##xh_##opt(const uint8_t *src, \
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_common.mk b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_common.mk
index c9326eeea69..6f091eefb63 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_common.mk
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_common.mk
@@ -69,6 +69,7 @@ VP9_COMMON_SRCS-yes += common/vp9_common_data.h
VP9_COMMON_SRCS-yes += common/vp9_scan.c
VP9_COMMON_SRCS-yes += common/vp9_scan.h
+VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/convolve.h
VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_asm_stubs.c
VP9_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp9_loopfilter_intrin_sse2.c
VP9_COMMON_SRCS-$(HAVE_AVX2) += common/x86/vp9_loopfilter_intrin_avx2.c
@@ -131,14 +132,29 @@ VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_mblpf_vert_loopfilter_ds
# common (msa)
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_macros_msa.h
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve8_avg_horiz_msa.c
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve8_avg_msa.c
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve8_avg_vert_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve8_horiz_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve8_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve8_vert_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve_avg_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve_copy_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_convolve_msa.h
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct4x4_msa.c
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct8x8_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct16x16_msa.c
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct32x32_msa.c
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct_msa.h
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_intra_predict_msa.c
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_loopfilter_4_msa.c
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_loopfilter_8_msa.c
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_loopfilter_16_msa.c
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_loopfilter_msa.h
+
+ifeq ($(CONFIG_VP9_POSTPROC),yes)
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_mfqe_msa.c
+endif
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c
VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.h
@@ -197,8 +213,9 @@ VP9_COMMON_SRCS-yes += common/arm/neon/vp9_loopfilter_4_neon.c
# TODO(johannkoenig): re-enable when chromium build is fixed
# # https://code.google.com/p/chromium/issues/detail?id=443839
#VP9_COMMON_SRCS-yes += common/arm/neon/vp9_loopfilter_8_neon.c
-VP9_COMMON_SRCS-yes += common/arm/neon/vp9_reconintra_neon.c
endif # HAVE_NEON
endif # HAVE_NEON_ASM
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_reconintra_neon.c
+
$(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.pl))
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c
index cba15e693e9..9462be9faf1 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c
@@ -176,15 +176,23 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
RANGE_CHECK(cfg, ss_number_layers, 1, VPX_SS_MAX_LAYERS);
RANGE_CHECK(cfg, ts_number_layers, 1, VPX_TS_MAX_LAYERS);
+ if (cfg->ss_number_layers * cfg->ts_number_layers > VPX_MAX_LAYERS)
+ ERROR("ss_number_layers * ts_number_layers is out of range");
if (cfg->ts_number_layers > 1) {
- unsigned int i;
- for (i = 1; i < cfg->ts_number_layers; ++i)
- if (cfg->ts_target_bitrate[i] < cfg->ts_target_bitrate[i - 1])
+ unsigned int sl, tl;
+ for (sl = 1; sl < cfg->ss_number_layers; ++sl) {
+ for (tl = 1; tl < cfg->ts_number_layers; ++tl) {
+ const int layer =
+ LAYER_IDS_TO_IDX(sl, tl, cfg->ts_number_layers);
+ if (cfg->layer_target_bitrate[layer] <
+ cfg->layer_target_bitrate[layer - 1])
ERROR("ts_target_bitrate entries are not increasing");
+ }
+ }
RANGE_CHECK(cfg, ts_rate_decimator[cfg->ts_number_layers - 1], 1, 1);
- for (i = cfg->ts_number_layers - 2; i > 0; --i)
- if (cfg->ts_rate_decimator[i - 1] != 2 * cfg->ts_rate_decimator[i])
+ for (tl = cfg->ts_number_layers - 2; tl > 0; --tl)
+ if (cfg->ts_rate_decimator[tl - 1] != 2 * cfg->ts_rate_decimator[tl])
ERROR("ts_rate_decimator factors are not powers of 2");
}
@@ -360,6 +368,7 @@ static vpx_codec_err_t set_encoder_config(
const vpx_codec_enc_cfg_t *cfg,
const struct vp9_extracfg *extra_cfg) {
const int is_vbr = cfg->rc_end_usage == VPX_VBR;
+ int sl, tl;
oxcf->profile = cfg->g_profile;
oxcf->max_threads = (int)cfg->g_threads;
oxcf->width = cfg->g_w;
@@ -460,35 +469,33 @@ static vpx_codec_err_t set_encoder_config(
oxcf->frame_periodic_boost = extra_cfg->frame_periodic_boost;
oxcf->ss_number_layers = cfg->ss_number_layers;
+ oxcf->ts_number_layers = cfg->ts_number_layers;
+ oxcf->temporal_layering_mode = (enum vp9e_temporal_layering_mode)
+ cfg->temporal_layering_mode;
- if (oxcf->ss_number_layers > 1) {
- int i;
- for (i = 0; i < VPX_SS_MAX_LAYERS; ++i) {
- oxcf->ss_target_bitrate[i] = 1000 * cfg->ss_target_bitrate[i];
+ for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
#if CONFIG_SPATIAL_SVC
- oxcf->ss_enable_auto_arf[i] = cfg->ss_enable_auto_alt_ref[i];
+ oxcf->ss_enable_auto_arf[sl] = cfg->ss_enable_auto_alt_ref[sl];
#endif
+ for (tl = 0; tl < oxcf->ts_number_layers; ++tl) {
+ oxcf->layer_target_bitrate[sl * oxcf->ts_number_layers + tl] =
+ 1000 * cfg->layer_target_bitrate[sl * oxcf->ts_number_layers + tl];
}
- } else if (oxcf->ss_number_layers == 1) {
+ }
+ if (oxcf->ss_number_layers == 1 && oxcf->pass != 0) {
oxcf->ss_target_bitrate[0] = (int)oxcf->target_bandwidth;
#if CONFIG_SPATIAL_SVC
oxcf->ss_enable_auto_arf[0] = extra_cfg->enable_auto_alt_ref;
#endif
}
-
- oxcf->ts_number_layers = cfg->ts_number_layers;
-
if (oxcf->ts_number_layers > 1) {
- int i;
- for (i = 0; i < VPX_TS_MAX_LAYERS; ++i) {
- oxcf->ts_target_bitrate[i] = 1000 * cfg->ts_target_bitrate[i];
- oxcf->ts_rate_decimator[i] = cfg->ts_rate_decimator[i];
+ for (tl = 0; tl < VPX_TS_MAX_LAYERS; ++tl) {
+ oxcf->ts_rate_decimator[tl] = cfg->ts_rate_decimator[tl] ?
+ cfg->ts_rate_decimator[tl] : 1;
}
} else if (oxcf->ts_number_layers == 1) {
- oxcf->ts_target_bitrate[0] = (int)oxcf->target_bandwidth;
oxcf->ts_rate_decimator[0] = 1;
}
-
/*
printf("Current VP9 Settings: \n");
printf("target_bandwidth: %d\n", oxcf->target_bandwidth);
@@ -902,11 +909,12 @@ static vpx_codec_frame_flags_t get_frame_pkt_flags(const VP9_COMP *cpi,
unsigned int lib_flags) {
vpx_codec_frame_flags_t flags = lib_flags << 16;
- if (lib_flags & FRAMEFLAGS_KEY
-#if CONFIG_SPATIAL_SVC
- || (is_two_pass_svc(cpi) && cpi->svc.layer_context[0].is_key_frame)
-#endif
- )
+ if (lib_flags & FRAMEFLAGS_KEY ||
+ (cpi->use_svc &&
+ cpi->svc.layer_context[cpi->svc.spatial_layer_id *
+ cpi->svc.number_temporal_layers +
+ cpi->svc.temporal_layer_id].is_key_frame)
+ )
flags |= VPX_FRAME_IS_KEY;
if (cpi->droppable)
@@ -1022,16 +1030,15 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
vpx_codec_cx_pkt_t pkt;
#if CONFIG_SPATIAL_SVC
- if (is_two_pass_svc(cpi))
- cpi->svc.layer_context[cpi->svc.spatial_layer_id].layer_size += size;
+ if (cpi->use_svc)
+ cpi->svc.layer_context[cpi->svc.spatial_layer_id *
+ cpi->svc.number_temporal_layers].layer_size += size;
#endif
// Pack invisible frames with the next visible frame
- if (!cpi->common.show_frame
-#if CONFIG_SPATIAL_SVC
- || (is_two_pass_svc(cpi) &&
- cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)
-#endif
+ if (!cpi->common.show_frame ||
+ (cpi->use_svc &&
+ cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)
) {
if (ctx->pending_cx_data == 0)
ctx->pending_cx_data = cx_data;
@@ -1089,24 +1096,27 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
pkt.data.frame.partition_id = -1;
if(ctx->output_cx_pkt_cb.output_cx_pkt)
- ctx->output_cx_pkt_cb.output_cx_pkt(&pkt, ctx->output_cx_pkt_cb.user_priv);
+ ctx->output_cx_pkt_cb.output_cx_pkt(&pkt,
+ ctx->output_cx_pkt_cb.user_priv);
else
vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt);
cx_data += size;
cx_data_sz -= size;
+#if VPX_ENCODER_ABI_VERSION > (5 + VPX_CODEC_ABI_VERSION)
#if CONFIG_SPATIAL_SVC
- if (is_two_pass_svc(cpi) && !ctx->output_cx_pkt_cb.output_cx_pkt) {
+ if (cpi->use_svc && !ctx->output_cx_pkt_cb.output_cx_pkt) {
vpx_codec_cx_pkt_t pkt_sizes, pkt_psnr;
- int i;
+ int sl;
vp9_zero(pkt_sizes);
vp9_zero(pkt_psnr);
pkt_sizes.kind = VPX_CODEC_SPATIAL_SVC_LAYER_SIZES;
pkt_psnr.kind = VPX_CODEC_SPATIAL_SVC_LAYER_PSNR;
- for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
- LAYER_CONTEXT *lc = &cpi->svc.layer_context[i];
- pkt_sizes.data.layer_sizes[i] = lc->layer_size;
- pkt_psnr.data.layer_psnr[i] = lc->psnr_pkt;
+ for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) {
+ LAYER_CONTEXT *lc =
+ &cpi->svc.layer_context[sl * cpi->svc.number_temporal_layers];
+ pkt_sizes.data.layer_sizes[sl] = lc->layer_size;
+ pkt_psnr.data.layer_psnr[sl] = lc->psnr_pkt;
lc->layer_size = 0;
}
@@ -1115,6 +1125,12 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt_psnr);
}
#endif
+#endif
+ if (is_one_pass_cbr_svc(cpi) &&
+ (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) {
+ // Encoded all spatial layers; exit loop.
+ break;
+ }
}
}
}
@@ -1292,16 +1308,20 @@ static vpx_codec_err_t ctrl_set_scale_mode(vpx_codec_alg_priv_t *ctx,
static vpx_codec_err_t ctrl_set_svc(vpx_codec_alg_priv_t *ctx, va_list args) {
int data = va_arg(args, int);
const vpx_codec_enc_cfg_t *cfg = &ctx->cfg;
+ // Both one-pass and two-pass RC are supported now.
+ // User setting this has to make sure of the following.
+ // In two-pass setting: either (but not both)
+ // cfg->ss_number_layers > 1, or cfg->ts_number_layers > 1
+ // In one-pass setting:
+ // either or both cfg->ss_number_layers > 1, or cfg->ts_number_layers > 1
vp9_set_svc(ctx->cpi, data);
- // CBR or two pass mode for SVC with both temporal and spatial layers
- // not yet supported.
+
if (data == 1 &&
- (cfg->rc_end_usage == VPX_CBR ||
- cfg->g_pass == VPX_RC_FIRST_PASS ||
+ (cfg->g_pass == VPX_RC_FIRST_PASS ||
cfg->g_pass == VPX_RC_LAST_PASS) &&
- cfg->ss_number_layers > 1 &&
- cfg->ts_number_layers > 1) {
+ cfg->ss_number_layers > 1 &&
+ cfg->ts_number_layers > 1) {
return VPX_CODEC_INVALID_PARAM;
}
return VPX_CODEC_OK;
@@ -1313,9 +1333,7 @@ static vpx_codec_err_t ctrl_set_svc_layer_id(vpx_codec_alg_priv_t *ctx,
VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi;
SVC *const svc = &cpi->svc;
-#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
svc->spatial_layer_id = data->spatial_layer_id;
-#endif
svc->temporal_layer_id = data->temporal_layer_id;
// Checks on valid layer_id input.
if (svc->temporal_layer_id < 0 ||
@@ -1335,9 +1353,7 @@ static vpx_codec_err_t ctrl_get_svc_layer_id(vpx_codec_alg_priv_t *ctx,
VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi;
SVC *const svc = &cpi->svc;
-#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
data->spatial_layer_id = svc->spatial_layer_id;
-#endif
data->temporal_layer_id = svc->temporal_layer_id;
return VPX_CODEC_OK;
@@ -1347,15 +1363,21 @@ static vpx_codec_err_t ctrl_set_svc_parameters(vpx_codec_alg_priv_t *ctx,
va_list args) {
VP9_COMP *const cpi = ctx->cpi;
vpx_svc_extra_cfg_t *const params = va_arg(args, vpx_svc_extra_cfg_t *);
- int i;
-
- for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
- LAYER_CONTEXT *lc = &cpi->svc.layer_context[i];
-
- lc->max_q = params->max_quantizers[i];
- lc->min_q = params->min_quantizers[i];
- lc->scaling_factor_num = params->scaling_factor_num[i];
- lc->scaling_factor_den = params->scaling_factor_den[i];
+ int sl, tl;
+
+ // Number of temporal layers and number of spatial layers have to be set
+ // properly before calling this control function.
+ for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) {
+ for (tl = 0; tl < cpi->svc.number_temporal_layers; ++tl) {
+ const int layer =
+ LAYER_IDS_TO_IDX(sl, tl, cpi->svc.number_temporal_layers);
+ LAYER_CONTEXT *lc =
+ &cpi->svc.layer_context[layer];
+ lc->max_q = params->max_quantizers[sl];
+ lc->min_q = params->min_quantizers[sl];
+ lc->scaling_factor_num = params->scaling_factor_num[sl];
+ lc->scaling_factor_den = params->scaling_factor_den[sl];
+ }
}
return VPX_CODEC_OK;
@@ -1416,10 +1438,8 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
{VP9E_SET_AQ_MODE, ctrl_set_aq_mode},
{VP9E_SET_FRAME_PERIODIC_BOOST, ctrl_set_frame_periodic_boost},
{VP9E_SET_SVC, ctrl_set_svc},
-#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
{VP9E_SET_SVC_PARAMETERS, ctrl_set_svc_parameters},
{VP9E_REGISTER_CX_CALLBACK, ctrl_register_cx_callback},
-#endif
{VP9E_SET_SVC_LAYER_ID, ctrl_set_svc_layer_id},
{VP9E_SET_TUNE_CONTENT, ctrl_set_tune_content},
{VP9E_SET_COLOR_SPACE, ctrl_set_color_space},
@@ -1429,9 +1449,7 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
{VP8E_GET_LAST_QUANTIZER, ctrl_get_quantizer},
{VP8E_GET_LAST_QUANTIZER_64, ctrl_get_quantizer64},
{VP9_GET_REFERENCE, ctrl_get_reference},
-#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
{VP9E_GET_SVC_LAYER_ID, ctrl_get_svc_layer_id},
-#endif
{VP9E_GET_ACTIVEMAP, ctrl_get_active_map},
{ -1, NULL},
@@ -1495,6 +1513,8 @@ static vpx_codec_enc_cfg_map_t encoder_usage_cfg_map[] = {
{0}, // ts_rate_decimator
0, // ts_periodicity
{0}, // ts_layer_id
+ {0}, // layer_taget_bitrate
+ 0 // temporal_layering_mode
}
},
};
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.c b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.c
index ff76204d822..4080d64c170 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.c
@@ -55,6 +55,7 @@ struct vpx_codec_alg_priv {
int invert_tile_order;
int last_show_frame; // Index of last output frame.
int byte_alignment;
+ int skip_loop_filter;
// Frame parallel related.
int frame_parallel_decode; // frame-based threading.
@@ -285,6 +286,7 @@ static void init_buffer_callbacks(vpx_codec_alg_priv_t *ctx) {
cm->new_fb_idx = INVALID_IDX;
cm->byte_alignment = ctx->byte_alignment;
+ cm->skip_loop_filter = ctx->skip_loop_filter;
if (ctx->get_ext_fb_cb != NULL && ctx->release_ext_fb_cb != NULL) {
pool->get_fb_cb = ctx->get_ext_fb_cb;
@@ -937,7 +939,8 @@ static vpx_codec_err_t ctrl_get_frame_corrupted(vpx_codec_alg_priv_t *ctx,
frame_worker_data->pbi->common.buffer_pool->frame_bufs;
if (frame_worker_data->pbi->common.frame_to_show == NULL)
return VPX_CODEC_ERROR;
- *corrupted = frame_bufs[ctx->last_show_frame].buf.corrupted;
+ if (ctx->last_show_frame >= 0)
+ *corrupted = frame_bufs[ctx->last_show_frame].buf.corrupted;
return VPX_CODEC_OK;
} else {
return VPX_CODEC_ERROR;
@@ -1058,6 +1061,19 @@ static vpx_codec_err_t ctrl_set_byte_alignment(vpx_codec_alg_priv_t *ctx,
return VPX_CODEC_OK;
}
+static vpx_codec_err_t ctrl_set_skip_loop_filter(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ ctx->skip_loop_filter = va_arg(args, int);
+
+ if (ctx->frame_workers) {
+ VP9Worker *const worker = ctx->frame_workers;
+ FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
+ frame_worker_data->pbi->common.skip_loop_filter = ctx->skip_loop_filter;
+ }
+
+ return VPX_CODEC_OK;
+}
+
static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = {
{VP8_COPY_REFERENCE, ctrl_copy_reference},
@@ -1071,6 +1087,7 @@ static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = {
{VP9_INVERT_TILE_DECODE_ORDER, ctrl_set_invert_tile_order},
{VPXD_SET_DECRYPTOR, ctrl_set_decryptor},
{VP9_SET_BYTE_ALIGNMENT, ctrl_set_byte_alignment},
+ {VP9_SET_SKIP_LOOP_FILTER, ctrl_set_skip_loop_filter},
// Getters
{VP8D_GET_LAST_REF_UPDATES, ctrl_get_last_ref_updates},
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_iface_common.h b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_iface_common.h
index e585aa14725..58bb7d5d648 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_iface_common.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_iface_common.h
@@ -10,6 +10,8 @@
#ifndef VP9_VP9_IFACE_COMMON_H_
#define VP9_VP9_IFACE_COMMON_H_
+#include "vpx_ports/mem.h"
+
static void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12,
void *user_priv) {
/** vpx_img_wrap() doesn't allow specifying independent strides for
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/vp9cx.mk b/chromium/third_party/libvpx/source/libvpx/vp9/vp9cx.mk
index 7359b2de05d..e78c111f085 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/vp9cx.mk
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/vp9cx.mk
@@ -102,13 +102,11 @@ VP9_CX_SRCS-yes += encoder/vp9_temporal_filter.h
VP9_CX_SRCS-yes += encoder/vp9_mbgraph.c
VP9_CX_SRCS-yes += encoder/vp9_mbgraph.h
-VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_variance_impl_intrin_avx2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_avg_intrin_sse2.c
VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_temporal_filter_apply_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_quantize_sse2.c
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
-VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_variance_impl_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_quantize_intrin_sse2.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_block_error_intrin_sse2.c
endif
@@ -134,14 +132,14 @@ VP9_CX_SRCS-$(ARCH_X86_64) += encoder/x86/vp9_ssim_opt_x86_64.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.c
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_dct_ssse3.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.h
-VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct32x32_sse2.c
-VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_impl_sse2.c
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct32x32_sse2_impl.h
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2_impl.h
ifeq ($(CONFIG_VP9_TEMPORAL_DENOISING),yes)
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_denoiser_sse2.c
endif
-VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_dct32x32_avx2.c
+VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_dct32x32_avx2_impl.h
VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_dct_avx2.c
VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_error_intrin_avx2.c
VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_variance_avx2.c
@@ -154,4 +152,14 @@ VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_quantize_neon.c
VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_subtract_neon.c
VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_variance_neon.c
+VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_avg_msa.c
+VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_error_msa.c
+VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct4x4_msa.c
+VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct8x8_msa.c
+VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct16x16_msa.c
+VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct32x32_msa.c
+VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct_msa.h
+VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_subtract_msa.c
+VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_temporal_filter_msa.c
+
VP9_CX_SRCS-yes := $(filter-out $(VP9_CX_SRCS_REMOVE-yes),$(VP9_CX_SRCS-yes))
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx/exports_dec b/chromium/third_party/libvpx/source/libvpx/vpx/exports_dec
index 3ce1499b77d..c694ebae128 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx/exports_dec
+++ b/chromium/third_party/libvpx/source/libvpx/vpx/exports_dec
@@ -1,10 +1,8 @@
text vpx_codec_dec_init_ver
text vpx_codec_decode
text vpx_codec_get_frame
-text vpx_codec_get_mem_map
text vpx_codec_get_stream_info
text vpx_codec_peek_stream_info
text vpx_codec_register_put_frame_cb
text vpx_codec_register_put_slice_cb
text vpx_codec_set_frame_buffer_functions
-text vpx_codec_set_mem_map
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx/src/svc_encodeframe.c b/chromium/third_party/libvpx/source/libvpx/vpx/src/svc_encodeframe.c
index e711cf909ba..9844ace54dc 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx/src/svc_encodeframe.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx/src/svc_encodeframe.c
@@ -302,31 +302,79 @@ void assign_layer_bitrates(const SvcContext *svc_ctx,
vpx_codec_enc_cfg_t *const enc_cfg) {
int i;
const SvcInternal_t *const si = get_const_svc_internal(svc_ctx);
+ int sl, tl, spatial_layer_target;
+
+ if (svc_ctx->temporal_layering_mode != 0) {
+ if (si->bitrates[0] != 0) {
+ enc_cfg->rc_target_bitrate = 0;
+ for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) {
+ enc_cfg->ss_target_bitrate[sl*svc_ctx->temporal_layers] = 0;
+ for (tl = 0; tl < svc_ctx->temporal_layers; ++tl) {
+ enc_cfg->ss_target_bitrate[sl*svc_ctx->temporal_layers]
+ += (unsigned int)si->bitrates[sl * svc_ctx->temporal_layers + tl];
+ enc_cfg->layer_target_bitrate[sl*svc_ctx->temporal_layers + tl]
+ = si->bitrates[sl * svc_ctx->temporal_layers + tl];
+ }
+ }
+ } else {
+ float total = 0;
+ float alloc_ratio[VPX_MAX_LAYERS] = {0};
+
+ for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) {
+ if (si->svc_params.scaling_factor_den[sl] > 0) {
+ alloc_ratio[sl] = (float)(si->svc_params.scaling_factor_num[sl] *
+ 1.0 / si->svc_params.scaling_factor_den[sl]);
+ total += alloc_ratio[sl];
+ }
+ }
- if (si->bitrates[0] != 0) {
- enc_cfg->rc_target_bitrate = 0;
- for (i = 0; i < svc_ctx->spatial_layers; ++i) {
- enc_cfg->ss_target_bitrate[i] = (unsigned int)si->bitrates[i];
- enc_cfg->rc_target_bitrate += si->bitrates[i];
+ for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) {
+ enc_cfg->ss_target_bitrate[sl] = spatial_layer_target =
+ (unsigned int)(enc_cfg->rc_target_bitrate *
+ alloc_ratio[sl] / total);
+ if (svc_ctx->temporal_layering_mode == 3) {
+ enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers] =
+ spatial_layer_target >> 1;
+ enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 1] =
+ (spatial_layer_target >> 1) + (spatial_layer_target >> 2);
+ enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 2] =
+ spatial_layer_target;
+ } else if (svc_ctx->temporal_layering_mode == 2) {
+ enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers] =
+ spatial_layer_target * 2 / 3;
+ enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 1] =
+ spatial_layer_target;
+ } else {
+ // User should explicitly assign bitrates in this case.
+ assert(0);
+ }
+ }
}
} else {
- float total = 0;
- float alloc_ratio[VPX_SS_MAX_LAYERS] = {0};
+ if (si->bitrates[0] != 0) {
+ enc_cfg->rc_target_bitrate = 0;
+ for (i = 0; i < svc_ctx->spatial_layers; ++i) {
+ enc_cfg->ss_target_bitrate[i] = (unsigned int)si->bitrates[i];
+ enc_cfg->rc_target_bitrate += si->bitrates[i];
+ }
+ } else {
+ float total = 0;
+ float alloc_ratio[VPX_MAX_LAYERS] = {0};
- for (i = 0; i < svc_ctx->spatial_layers; ++i) {
- if (si->svc_params.scaling_factor_den[i] > 0) {
- alloc_ratio[i] = (float)(si->svc_params.scaling_factor_num[i] * 1.0 /
- si->svc_params.scaling_factor_den[i]);
+ for (i = 0; i < svc_ctx->spatial_layers; ++i) {
+ if (si->svc_params.scaling_factor_den[i] > 0) {
+ alloc_ratio[i] = (float)(si->svc_params.scaling_factor_num[i] * 1.0 /
+ si->svc_params.scaling_factor_den[i]);
- alloc_ratio[i] *= alloc_ratio[i];
- total += alloc_ratio[i];
+ alloc_ratio[i] *= alloc_ratio[i];
+ total += alloc_ratio[i];
+ }
}
- }
-
- for (i = 0; i < VPX_SS_MAX_LAYERS; ++i) {
- if (total > 0) {
- enc_cfg->ss_target_bitrate[i] = (unsigned int)
- (enc_cfg->rc_target_bitrate * alloc_ratio[i] / total);
+ for (i = 0; i < VPX_SS_MAX_LAYERS; ++i) {
+ if (total > 0) {
+ enc_cfg->layer_target_bitrate[i] = (unsigned int)
+ (enc_cfg->rc_target_bitrate * alloc_ratio[i] / total);
+ }
}
}
}
@@ -365,6 +413,14 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
return VPX_CODEC_INVALID_PARAM;
}
+ // Note: temporal_layering_mode only applies to one-pass CBR
+ // si->svc_params.temporal_layering_mode = svc_ctx->temporal_layering_mode;
+ if (svc_ctx->temporal_layering_mode == 3) {
+ svc_ctx->temporal_layers = 3;
+ } else if (svc_ctx->temporal_layering_mode == 2) {
+ svc_ctx->temporal_layers = 2;
+ }
+
for (i = 0; i < VPX_SS_MAX_LAYERS; ++i) {
si->svc_params.max_quantizers[i] = MAX_QUANTIZER;
si->svc_params.min_quantizers[i] = 0;
@@ -387,6 +443,14 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
if (svc_ctx->temporal_layers > VPX_TS_MAX_LAYERS)
svc_ctx->temporal_layers = VPX_TS_MAX_LAYERS;
+ if (svc_ctx->temporal_layers * svc_ctx->spatial_layers > VPX_MAX_LAYERS) {
+ svc_log(svc_ctx, SVC_LOG_ERROR,
+ "spatial layers * temporal layers exceeds the maximum number of "
+ "allowed layers of %d\n",
+ svc_ctx->spatial_layers * svc_ctx->temporal_layers,
+ (int) VPX_MAX_LAYERS);
+ return VPX_CODEC_INVALID_PARAM;
+ }
assign_layer_bitrates(svc_ctx, enc_cfg);
#if CONFIG_SPATIAL_SVC
@@ -403,10 +467,24 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
}
}
- // modify encoder configuration
+ if (svc_ctx->threads)
+ enc_cfg->g_threads = svc_ctx->threads;
+
+ // Modify encoder configuration
enc_cfg->ss_number_layers = svc_ctx->spatial_layers;
enc_cfg->ts_number_layers = svc_ctx->temporal_layers;
+ if (enc_cfg->rc_end_usage == VPX_CBR) {
+ enc_cfg->rc_resize_allowed = 0;
+ enc_cfg->rc_min_quantizer = 2;
+ enc_cfg->rc_max_quantizer = 63;
+ enc_cfg->rc_undershoot_pct = 50;
+ enc_cfg->rc_overshoot_pct = 50;
+ enc_cfg->rc_buf_initial_sz = 20;
+ enc_cfg->rc_buf_optimal_sz = 600;
+ enc_cfg->rc_buf_sz = 1000;
+ }
+
if (enc_cfg->g_error_resilient == 0 && si->use_multiple_frame_contexts == 0)
enc_cfg->g_error_resilient = 1;
@@ -451,6 +529,7 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx,
iter = NULL;
while ((cx_pkt = vpx_codec_get_cx_data(codec_ctx, &iter))) {
switch (cx_pkt->kind) {
+#if VPX_ENCODER_ABI_VERSION > (5 + VPX_CODEC_ABI_VERSION)
#if CONFIG_SPATIAL_SVC
case VPX_CODEC_SPATIAL_SVC_LAYER_PSNR: {
int i;
@@ -489,6 +568,7 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx,
break;
}
#endif
+#endif
default: {
break;
}
@@ -554,7 +634,7 @@ const char *vpx_svc_dump_statistics(SvcContext *svc_ctx) {
mse[1], mse[2], mse[3]);
bytes_total += si->bytes_sum[i];
- // clear sums for next time
+ // Clear sums for next time.
si->bytes_sum[i] = 0;
for (j = 0; j < COMPONENTS; ++j) {
si->psnr_sum[i][j] = 0;
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx/svc_context.h b/chromium/third_party/libvpx/source/libvpx/vpx/svc_context.h
index cf791bdeb56..a09651cc991 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx/svc_context.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx/svc_context.h
@@ -33,10 +33,13 @@ typedef struct {
// public interface to svc_command options
int spatial_layers; // number of spatial layers
int temporal_layers; // number of temporal layers
+ int temporal_layering_mode;
SVC_LOG_LEVEL log_level; // amount of information to display
int log_print; // when set, printf log messages instead of returning the
// message with svc_get_message
-
+ int output_rc_stat; // for outputting rc stats
+ int speed; // speed setting for codec
+ int threads;
// private storage for vpx_svc_encode
void *internal;
} SvcContext;
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx/vp8cx.h b/chromium/third_party/libvpx/source/libvpx/vpx/vp8cx.h
index 0e8adc134c5..19bc4bdcce1 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx/vp8cx.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx/vp8cx.h
@@ -327,6 +327,8 @@ enum vp8e_enc_control_id {
/*!\brief Codec control function to set encoder screen content mode.
*
+ * 0: off, 1: On, 2: On with more aggressive rate control.
+ *
* Supported in codecs: VP8
*/
VP8E_SET_SCREEN_CONTENT_MODE,
@@ -448,7 +450,6 @@ enum vp8e_enc_control_id {
*/
VP9E_SET_SVC,
-#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
/*!\brief Codec control function to set parameters for SVC.
* \note Parameters contain min_q, max_q, scaling factor for each of the
* SVC layers.
@@ -456,7 +457,6 @@ enum vp8e_enc_control_id {
* Supported in codecs: VP9
*/
VP9E_SET_SVC_PARAMETERS,
-#endif
/*!\brief Codec control function to set svc layer for spatial and temporal.
* \note Valid ranges: 0..#vpx_codec_enc_cfg::ss_number_layers for spatial
@@ -476,7 +476,6 @@ enum vp8e_enc_control_id {
*/
VP9E_SET_TUNE_CONTENT,
-#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
/*!\brief Codec control function to get svc layer ID.
* \note The layer ID returned is for the data packet from the registered
* callback function.
@@ -492,7 +491,6 @@ enum vp8e_enc_control_id {
* Supported in codecs: VP9
*/
VP9E_REGISTER_CX_CALLBACK,
-#endif
/*!\brief Codec control function to set color space info.
* \note Valid ranges: 0..7, default is "UNKNOWN".
@@ -509,6 +507,17 @@ enum vp8e_enc_control_id {
*/
VP9E_SET_COLOR_SPACE,
+ /*!\brief Codec control function to set temporal layering mode.
+ * \note Valid ranges: 0..3, default is "0" (VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING).
+ * 0 = VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING
+ * 1 = VP9E_TEMPORAL_LAYERING_MODE_BYPASS
+ * 2 = VP9E_TEMPORAL_LAYERING_MODE_0101
+ * 3 = VP9E_TEMPORAL_LAYERING_MODE_0212
+ *
+ * Supported in codecs: VP9
+ */
+ VP9E_SET_TEMPORAL_LAYERING_MODE,
+
/*!\brief Codec control function to get an Active map back from the encoder.
*
* Supported in codecs: VP9
@@ -527,6 +536,32 @@ typedef enum vpx_scaling_mode_1d {
VP8E_ONETWO = 3
} VPX_SCALING_MODE;
+/*!\brief Temporal layering mode enum for VP9 SVC.
+ *
+ * This set of macros define the different temporal layering modes.
+ * Supported codecs: VP9 (in SVC mode)
+ *
+ */
+typedef enum vp9e_temporal_layering_mode {
+ /*!\brief No temporal layering.
+ * Used when only spatial layering is used.
+ */
+ VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING = 0,
+
+ /*!\brief Bypass mode.
+ * Used when application needs to control temporal layering.
+ * This will only work when the number of spatial layers equals 1.
+ */
+ VP9E_TEMPORAL_LAYERING_MODE_BYPASS = 1,
+
+ /*!\brief 0-1-0-1... temporal layering scheme with two temporal layers.
+ */
+ VP9E_TEMPORAL_LAYERING_MODE_0101 = 2,
+
+ /*!\brief 0-2-1-2... temporal layering scheme with three temporal layers.
+ */
+ VP9E_TEMPORAL_LAYERING_MODE_0212 = 3
+} VP9E_TEMPORAL_LAYERING_MODE;
/*!\brief vpx region of interest map
*
@@ -602,7 +637,6 @@ typedef enum {
VP8_TUNE_SSIM
} vp8e_tuning;
-#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
/*!\brief vp9 svc layer parameters
*
* This defines the spatial and temporal layer id numbers for svc encoding.
@@ -614,18 +648,6 @@ typedef struct vpx_svc_layer_id {
int spatial_layer_id; /**< Spatial layer id number. */
int temporal_layer_id; /**< Temporal layer id number. */
} vpx_svc_layer_id_t;
-#else
-/*!\brief vp9 svc layer parameters
- *
- * This defines the temporal layer id numbers for svc encoding.
- * This is used with the #VP9E_SET_SVC_LAYER_ID control to set the
- * temporal layer id for the current frame.
- *
- */
-typedef struct vpx_svc_layer_id {
- int temporal_layer_id; /**< Temporal layer id number. */
-} vpx_svc_layer_id_t;
-#endif
/*!\brief VP8 encoder control function parameter type
*
@@ -649,10 +671,8 @@ VPX_CTRL_USE_TYPE(VP8E_SET_ACTIVEMAP, vpx_active_map_t *)
VPX_CTRL_USE_TYPE(VP8E_SET_SCALEMODE, vpx_scaling_mode_t *)
VPX_CTRL_USE_TYPE(VP9E_SET_SVC, int)
-#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
VPX_CTRL_USE_TYPE(VP9E_SET_SVC_PARAMETERS, void *)
VPX_CTRL_USE_TYPE(VP9E_REGISTER_CX_CALLBACK, void *)
-#endif
VPX_CTRL_USE_TYPE(VP9E_SET_SVC_LAYER_ID, vpx_svc_layer_id_t *)
VPX_CTRL_USE_TYPE(VP8E_SET_CPUUSED, int)
@@ -673,9 +693,7 @@ VPX_CTRL_USE_TYPE(VP9E_SET_TILE_ROWS, int)
VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER, int *)
VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER_64, int *)
-#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
VPX_CTRL_USE_TYPE(VP9E_GET_SVC_LAYER_ID, vpx_svc_layer_id_t *)
-#endif
VPX_CTRL_USE_TYPE(VP8E_SET_MAX_INTRA_BITRATE_PCT, unsigned int)
VPX_CTRL_USE_TYPE(VP8E_SET_MAX_INTER_BITRATE_PCT, unsigned int)
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx/vp8dx.h b/chromium/third_party/libvpx/source/libvpx/vpx/vp8dx.h
index 83898bf8496..bc9cb1a62fc 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx/vp8dx.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx/vp8dx.h
@@ -106,6 +106,13 @@ enum vp8_dec_control_id {
*/
VP9_INVERT_TILE_DECODE_ORDER,
+ /** control function to set the skip loop filter flag. Valid values are
+ * integers. The decoder will skip the loop filter when its value is set to
+ * nonzero. If the loop filter is skipped the decoder may accumulate decode
+ * artifacts. The default value is 0.
+ */
+ VP9_SET_SKIP_LOOP_FILTER,
+
VP8_DECODER_CTRL_ID_MAX
};
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx/vpx_codec.mk b/chromium/third_party/libvpx/source/libvpx/vpx/vpx_codec.mk
index a1ad3c5312c..ccdef040c3a 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx/vpx_codec.mk
+++ b/chromium/third_party/libvpx/source/libvpx/vpx/vpx_codec.mk
@@ -31,17 +31,17 @@ API_DOC_SRCS-yes += vpx_encoder.h
API_DOC_SRCS-yes += vpx_frame_buffer.h
API_DOC_SRCS-yes += vpx_image.h
-API_SRCS-yes += src/vpx_decoder.c
-API_SRCS-yes += vpx_decoder.h
-API_SRCS-yes += src/vpx_encoder.c
-API_SRCS-yes += vpx_encoder.h
-API_SRCS-yes += internal/vpx_codec_internal.h
-API_SRCS-yes += internal/vpx_psnr.h
-API_SRCS-yes += src/vpx_codec.c
-API_SRCS-yes += src/vpx_image.c
-API_SRCS-yes += src/vpx_psnr.c
-API_SRCS-yes += vpx_codec.h
-API_SRCS-yes += vpx_codec.mk
-API_SRCS-yes += vpx_frame_buffer.h
-API_SRCS-yes += vpx_image.h
-API_SRCS-$(BUILD_LIBVPX) += vpx_integer.h
+API_SRCS-yes += src/vpx_decoder.c
+API_SRCS-yes += vpx_decoder.h
+API_SRCS-yes += src/vpx_encoder.c
+API_SRCS-yes += vpx_encoder.h
+API_SRCS-yes += internal/vpx_codec_internal.h
+API_SRCS-yes += internal/vpx_psnr.h
+API_SRCS-yes += src/vpx_codec.c
+API_SRCS-yes += src/vpx_image.c
+API_SRCS-yes += src/vpx_psnr.c
+API_SRCS-yes += vpx_codec.h
+API_SRCS-yes += vpx_codec.mk
+API_SRCS-yes += vpx_frame_buffer.h
+API_SRCS-yes += vpx_image.h
+API_SRCS-yes += vpx_integer.h
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx/vpx_encoder.h b/chromium/third_party/libvpx/source/libvpx/vpx/vpx_encoder.h
index bf75584d589..2b17f98a231 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx/vpx_encoder.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx/vpx_encoder.h
@@ -42,8 +42,11 @@ extern "C" {
/*!\deprecated Use #VPX_TS_MAX_PERIODICITY instead. */
#define MAX_PERIODICITY VPX_TS_MAX_PERIODICITY
- /*!\deprecated Use #VPX_TS_MAX_LAYERS instead. */
-#define MAX_LAYERS VPX_TS_MAX_LAYERS
+/*! Temporal+Spatial Scalability: Maximum number of coding layers */
+#define VPX_MAX_LAYERS 12 // 3 temporal + 4 spatial layers are allowed.
+
+/*!\deprecated Use #VPX_MAX_LAYERS instead. */
+#define MAX_LAYERS VPX_MAX_LAYERS // 3 temporal + 4 spatial layers allowed.
/*! Spatial Scalability: Maximum number of coding layers */
#define VPX_SS_MAX_LAYERS 5
@@ -59,7 +62,7 @@ extern "C" {
* types, removing or reassigning enums, adding/removing/rearranging
* fields to structures
*/
-#define VPX_ENCODER_ABI_VERSION (4 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/
+#define VPX_ENCODER_ABI_VERSION (5 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/
/*! \brief Encoder capabilities bitfield
@@ -163,7 +166,7 @@ extern "C" {
VPX_CODEC_PSNR_PKT, /**< PSNR statistics for this frame */
// Spatial SVC is still experimental and may be removed before the next ABI
// bump.
-#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
+#if VPX_ENCODER_ABI_VERSION > (5 + VPX_CODEC_ABI_VERSION)
VPX_CODEC_SPATIAL_SVC_LAYER_SIZES, /**< Sizes for each layer in this frame*/
VPX_CODEC_SPATIAL_SVC_LAYER_PSNR, /**< PSNR for each layer in this frame*/
#endif
@@ -205,7 +208,7 @@ extern "C" {
vpx_fixed_buf_t raw; /**< data for arbitrary packets */
// Spatial SVC is still experimental and may be removed before the next
// ABI bump.
-#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
+#if VPX_ENCODER_ABI_VERSION > (5 + VPX_CODEC_ABI_VERSION)
size_t layer_sizes[VPX_SS_MAX_LAYERS];
struct vpx_psnr_pkt layer_psnr[VPX_SS_MAX_LAYERS];
#endif
@@ -729,6 +732,22 @@ extern "C" {
* ts_periodicity=8, then ts_layer_id = (0,1,0,1,0,1,0,1).
*/
unsigned int ts_layer_id[VPX_TS_MAX_PERIODICITY];
+
+ /*!\brief Target bitrate for each spatial/temporal layer.
+ *
+ * These values specify the target coding bitrate to be used for each
+ * spatial/temporal layer.
+ *
+ */
+ unsigned int layer_target_bitrate[VPX_MAX_LAYERS];
+
+ /*!\brief Temporal layering mode indicating which temporal layering scheme to use.
+ *
+ * The value (refer to VP9E_TEMPORAL_LAYERING_MODE) specifies the
+ * temporal layering mode to use.
+ *
+ */
+ int temporal_layering_mode;
} vpx_codec_enc_cfg_t; /**< alias for struct vpx_codec_enc_cfg */
/*!\brief vp9 svc extra configure parameters
@@ -737,10 +756,11 @@ extern "C" {
*
*/
typedef struct vpx_svc_parameters {
- int max_quantizers[VPX_SS_MAX_LAYERS]; /**< Max Q for each layer */
- int min_quantizers[VPX_SS_MAX_LAYERS]; /**< Min Q for each layer */
- int scaling_factor_num[VPX_SS_MAX_LAYERS]; /**< Scaling factor-numerator*/
- int scaling_factor_den[VPX_SS_MAX_LAYERS]; /**< Scaling factor-denominator*/
+ int max_quantizers[VPX_MAX_LAYERS]; /**< Max Q for each layer */
+ int min_quantizers[VPX_MAX_LAYERS]; /**< Min Q for each layer */
+ int scaling_factor_num[VPX_MAX_LAYERS]; /**< Scaling factor-numerator */
+ int scaling_factor_den[VPX_MAX_LAYERS]; /**< Scaling factor-denominator */
+ int temporal_layering_mode; /**< Temporal layering mode */
} vpx_svc_extra_cfg_t;
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/variance_media.asm b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/variance_media.asm
new file mode 100644
index 00000000000..f7f9e14b0a7
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/variance_media.asm
@@ -0,0 +1,358 @@
+;
+; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+
+ EXPORT |vpx_variance16x16_media|
+ EXPORT |vpx_variance8x8_media|
+ EXPORT |vpx_mse16x16_media|
+
+ ARM
+ REQUIRE8
+ PRESERVE8
+
+ AREA ||.text||, CODE, READONLY, ALIGN=2
+
+; r0 unsigned char *src_ptr
+; r1 int source_stride
+; r2 unsigned char *ref_ptr
+; r3 int recon_stride
+; stack unsigned int *sse
+|vpx_variance16x16_media| PROC
+
+ stmfd sp!, {r4-r12, lr}
+
+ pld [r0, r1, lsl #0]
+ pld [r2, r3, lsl #0]
+
+ mov r8, #0 ; initialize sum = 0
+ mov r11, #0 ; initialize sse = 0
+ mov r12, #16 ; set loop counter to 16 (=block height)
+
+loop16x16
+ ; 1st 4 pixels
+ ldr r4, [r0, #0] ; load 4 src pixels
+ ldr r5, [r2, #0] ; load 4 ref pixels
+
+ mov lr, #0 ; constant zero
+
+ usub8 r6, r4, r5 ; calculate difference
+ pld [r0, r1, lsl #1]
+ sel r7, r6, lr ; select bytes with positive difference
+ usub8 r9, r5, r4 ; calculate difference with reversed operands
+ pld [r2, r3, lsl #1]
+ sel r6, r9, lr ; select bytes with negative difference
+
+ ; calculate partial sums
+ usad8 r4, r7, lr ; calculate sum of positive differences
+ usad8 r5, r6, lr ; calculate sum of negative differences
+ orr r6, r6, r7 ; differences of all 4 pixels
+ ; calculate total sum
+ adds r8, r8, r4 ; add positive differences to sum
+ subs r8, r8, r5 ; subtract negative differences from sum
+
+ ; calculate sse
+ uxtb16 r5, r6 ; byte (two pixels) to halfwords
+ uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
+ smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
+
+ ; 2nd 4 pixels
+ ldr r4, [r0, #4] ; load 4 src pixels
+ ldr r5, [r2, #4] ; load 4 ref pixels
+ smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
+
+ usub8 r6, r4, r5 ; calculate difference
+ sel r7, r6, lr ; select bytes with positive difference
+ usub8 r9, r5, r4 ; calculate difference with reversed operands
+ sel r6, r9, lr ; select bytes with negative difference
+
+ ; calculate partial sums
+ usad8 r4, r7, lr ; calculate sum of positive differences
+ usad8 r5, r6, lr ; calculate sum of negative differences
+ orr r6, r6, r7 ; differences of all 4 pixels
+
+ ; calculate total sum
+ add r8, r8, r4 ; add positive differences to sum
+ sub r8, r8, r5 ; subtract negative differences from sum
+
+ ; calculate sse
+ uxtb16 r5, r6 ; byte (two pixels) to halfwords
+ uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
+ smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
+
+ ; 3rd 4 pixels
+ ldr r4, [r0, #8] ; load 4 src pixels
+ ldr r5, [r2, #8] ; load 4 ref pixels
+ smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
+
+ usub8 r6, r4, r5 ; calculate difference
+ sel r7, r6, lr ; select bytes with positive difference
+ usub8 r9, r5, r4 ; calculate difference with reversed operands
+ sel r6, r9, lr ; select bytes with negative difference
+
+ ; calculate partial sums
+ usad8 r4, r7, lr ; calculate sum of positive differences
+ usad8 r5, r6, lr ; calculate sum of negative differences
+ orr r6, r6, r7 ; differences of all 4 pixels
+
+ ; calculate total sum
+ add r8, r8, r4 ; add positive differences to sum
+ sub r8, r8, r5 ; subtract negative differences from sum
+
+ ; calculate sse
+ uxtb16 r5, r6 ; byte (two pixels) to halfwords
+ uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
+ smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
+
+ ; 4th 4 pixels
+ ldr r4, [r0, #12] ; load 4 src pixels
+ ldr r5, [r2, #12] ; load 4 ref pixels
+ smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
+
+ usub8 r6, r4, r5 ; calculate difference
+ add r0, r0, r1 ; set src_ptr to next row
+ sel r7, r6, lr ; select bytes with positive difference
+ usub8 r9, r5, r4 ; calculate difference with reversed operands
+ add r2, r2, r3 ; set dst_ptr to next row
+ sel r6, r9, lr ; select bytes with negative difference
+
+ ; calculate partial sums
+ usad8 r4, r7, lr ; calculate sum of positive differences
+ usad8 r5, r6, lr ; calculate sum of negative differences
+ orr r6, r6, r7 ; differences of all 4 pixels
+
+ ; calculate total sum
+ add r8, r8, r4 ; add positive differences to sum
+ sub r8, r8, r5 ; subtract negative differences from sum
+
+ ; calculate sse
+ uxtb16 r5, r6 ; byte (two pixels) to halfwords
+ uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
+ smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
+ smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
+
+
+ subs r12, r12, #1
+
+ bne loop16x16
+
+ ; return stuff
+ ldr r6, [sp, #40] ; get address of sse
+ mul r0, r8, r8 ; sum * sum
+ str r11, [r6] ; store sse
+ sub r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
+
+ ldmfd sp!, {r4-r12, pc}
+
+ ENDP
+
+; r0 unsigned char *src_ptr
+; r1 int source_stride
+; r2 unsigned char *ref_ptr
+; r3 int recon_stride
+; stack unsigned int *sse
+|vpx_variance8x8_media| PROC
+
+ push {r4-r10, lr}
+
+ pld [r0, r1, lsl #0]
+ pld [r2, r3, lsl #0]
+
+ mov r12, #8 ; set loop counter to 8 (=block height)
+ mov r4, #0 ; initialize sum = 0
+ mov r5, #0 ; initialize sse = 0
+
+loop8x8
+ ; 1st 4 pixels
+ ldr r6, [r0, #0x0] ; load 4 src pixels
+ ldr r7, [r2, #0x0] ; load 4 ref pixels
+
+ mov lr, #0 ; constant zero
+
+ usub8 r8, r6, r7 ; calculate difference
+ pld [r0, r1, lsl #1]
+ sel r10, r8, lr ; select bytes with positive difference
+ usub8 r9, r7, r6 ; calculate difference with reversed operands
+ pld [r2, r3, lsl #1]
+ sel r8, r9, lr ; select bytes with negative difference
+
+ ; calculate partial sums
+ usad8 r6, r10, lr ; calculate sum of positive differences
+ usad8 r7, r8, lr ; calculate sum of negative differences
+ orr r8, r8, r10 ; differences of all 4 pixels
+ ; calculate total sum
+ add r4, r4, r6 ; add positive differences to sum
+ sub r4, r4, r7 ; subtract negative differences from sum
+
+ ; calculate sse
+ uxtb16 r7, r8 ; byte (two pixels) to halfwords
+ uxtb16 r10, r8, ror #8 ; another two pixels to halfwords
+ smlad r5, r7, r7, r5 ; dual signed multiply, add and accumulate (1)
+
+ ; 2nd 4 pixels
+ ldr r6, [r0, #0x4] ; load 4 src pixels
+ ldr r7, [r2, #0x4] ; load 4 ref pixels
+ smlad r5, r10, r10, r5 ; dual signed multiply, add and accumulate (2)
+
+ usub8 r8, r6, r7 ; calculate difference
+ add r0, r0, r1 ; set src_ptr to next row
+ sel r10, r8, lr ; select bytes with positive difference
+ usub8 r9, r7, r6 ; calculate difference with reversed operands
+ add r2, r2, r3 ; set dst_ptr to next row
+ sel r8, r9, lr ; select bytes with negative difference
+
+ ; calculate partial sums
+ usad8 r6, r10, lr ; calculate sum of positive differences
+ usad8 r7, r8, lr ; calculate sum of negative differences
+ orr r8, r8, r10 ; differences of all 4 pixels
+
+ ; calculate total sum
+ add r4, r4, r6 ; add positive differences to sum
+ sub r4, r4, r7 ; subtract negative differences from sum
+
+ ; calculate sse
+ uxtb16 r7, r8 ; byte (two pixels) to halfwords
+ uxtb16 r10, r8, ror #8 ; another two pixels to halfwords
+ smlad r5, r7, r7, r5 ; dual signed multiply, add and accumulate (1)
+ subs r12, r12, #1 ; next row
+ smlad r5, r10, r10, r5 ; dual signed multiply, add and accumulate (2)
+
+ bne loop8x8
+
+ ; return stuff
+ ldr r8, [sp, #32] ; get address of sse
+ mul r1, r4, r4 ; sum * sum
+ str r5, [r8] ; store sse
+ sub r0, r5, r1, ASR #6 ; return (sse - ((sum * sum) >> 6))
+
+ pop {r4-r10, pc}
+
+ ENDP
+
+; r0 unsigned char *src_ptr
+; r1 int source_stride
+; r2 unsigned char *ref_ptr
+; r3 int recon_stride
+; stack unsigned int *sse
+;
+;note: Based on vpx_variance16x16_media. In this function, sum is never used.
+; So, we can remove this part of calculation.
+
+|vpx_mse16x16_media| PROC
+
+ push {r4-r9, lr}
+
+ pld [r0, r1, lsl #0]
+ pld [r2, r3, lsl #0]
+
+ mov r12, #16 ; set loop counter to 16 (=block height)
+ mov r4, #0 ; initialize sse = 0
+
+loopmse
+ ; 1st 4 pixels
+ ldr r5, [r0, #0x0] ; load 4 src pixels
+ ldr r6, [r2, #0x0] ; load 4 ref pixels
+
+ mov lr, #0 ; constant zero
+
+ usub8 r8, r5, r6 ; calculate difference
+ pld [r0, r1, lsl #1]
+ sel r7, r8, lr ; select bytes with positive difference
+ usub8 r9, r6, r5 ; calculate difference with reversed operands
+ pld [r2, r3, lsl #1]
+ sel r8, r9, lr ; select bytes with negative difference
+
+ ; calculate partial sums
+ usad8 r5, r7, lr ; calculate sum of positive differences
+ usad8 r6, r8, lr ; calculate sum of negative differences
+ orr r8, r8, r7 ; differences of all 4 pixels
+
+ ldr r5, [r0, #0x4] ; load 4 src pixels
+
+ ; calculate sse
+ uxtb16 r6, r8 ; byte (two pixels) to halfwords
+ uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
+ smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
+
+ ; 2nd 4 pixels
+ ldr r6, [r2, #0x4] ; load 4 ref pixels
+ smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
+
+ usub8 r8, r5, r6 ; calculate difference
+ sel r7, r8, lr ; select bytes with positive difference
+ usub8 r9, r6, r5 ; calculate difference with reversed operands
+ sel r8, r9, lr ; select bytes with negative difference
+
+ ; calculate partial sums
+ usad8 r5, r7, lr ; calculate sum of positive differences
+ usad8 r6, r8, lr ; calculate sum of negative differences
+ orr r8, r8, r7 ; differences of all 4 pixels
+ ldr r5, [r0, #0x8] ; load 4 src pixels
+ ; calculate sse
+ uxtb16 r6, r8 ; byte (two pixels) to halfwords
+ uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
+ smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
+
+ ; 3rd 4 pixels
+ ldr r6, [r2, #0x8] ; load 4 ref pixels
+ smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
+
+ usub8 r8, r5, r6 ; calculate difference
+ sel r7, r8, lr ; select bytes with positive difference
+ usub8 r9, r6, r5 ; calculate difference with reversed operands
+ sel r8, r9, lr ; select bytes with negative difference
+
+ ; calculate partial sums
+ usad8 r5, r7, lr ; calculate sum of positive differences
+ usad8 r6, r8, lr ; calculate sum of negative differences
+ orr r8, r8, r7 ; differences of all 4 pixels
+
+ ldr r5, [r0, #0xc] ; load 4 src pixels
+
+ ; calculate sse
+ uxtb16 r6, r8 ; byte (two pixels) to halfwords
+ uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
+ smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
+
+ ; 4th 4 pixels
+ ldr r6, [r2, #0xc] ; load 4 ref pixels
+ smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
+
+ usub8 r8, r5, r6 ; calculate difference
+ add r0, r0, r1 ; set src_ptr to next row
+ sel r7, r8, lr ; select bytes with positive difference
+ usub8 r9, r6, r5 ; calculate difference with reversed operands
+ add r2, r2, r3 ; set dst_ptr to next row
+ sel r8, r9, lr ; select bytes with negative difference
+
+ ; calculate partial sums
+ usad8 r5, r7, lr ; calculate sum of positive differences
+ usad8 r6, r8, lr ; calculate sum of negative differences
+ orr r8, r8, r7 ; differences of all 4 pixels
+
+ subs r12, r12, #1 ; next row
+
+ ; calculate sse
+ uxtb16 r6, r8 ; byte (two pixels) to halfwords
+ uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
+ smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
+ smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
+
+ bne loopmse
+
+ ; return stuff
+ ldr r1, [sp, #28] ; get address of sse
+ mov r0, r4 ; return sse
+ str r4, [r1] ; store sse
+
+ pop {r4-r9, pc}
+
+ ENDP
+
+ END
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/variance_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/variance_neon.c
new file mode 100644
index 00000000000..ede6e7bbb03
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/variance_neon.c
@@ -0,0 +1,418 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+#include "./vpx_dsp_rtcd.h"
+#include "./vpx_config.h"
+
+#include "vpx/vpx_integer.h"
+#include "vpx_ports/mem.h"
+
+static INLINE int horizontal_add_s16x8(const int16x8_t v_16x8) {
+ const int32x4_t a = vpaddlq_s16(v_16x8);
+ const int64x2_t b = vpaddlq_s32(a);
+ const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)),
+ vreinterpret_s32_s64(vget_high_s64(b)));
+ return vget_lane_s32(c, 0);
+}
+
+static INLINE int horizontal_add_s32x4(const int32x4_t v_32x4) {
+ const int64x2_t b = vpaddlq_s32(v_32x4);
+ const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)),
+ vreinterpret_s32_s64(vget_high_s64(b)));
+ return vget_lane_s32(c, 0);
+}
+
+// w * h must be less than 2048 or local variable v_sum may overflow.
+static void variance_neon_w8(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ int w, int h, uint32_t *sse, int *sum) {
+ int i, j;
+ int16x8_t v_sum = vdupq_n_s16(0);
+ int32x4_t v_sse_lo = vdupq_n_s32(0);
+ int32x4_t v_sse_hi = vdupq_n_s32(0);
+
+ for (i = 0; i < h; ++i) {
+ for (j = 0; j < w; j += 8) {
+ const uint8x8_t v_a = vld1_u8(&a[j]);
+ const uint8x8_t v_b = vld1_u8(&b[j]);
+ const uint16x8_t v_diff = vsubl_u8(v_a, v_b);
+ const int16x8_t sv_diff = vreinterpretq_s16_u16(v_diff);
+ v_sum = vaddq_s16(v_sum, sv_diff);
+ v_sse_lo = vmlal_s16(v_sse_lo,
+ vget_low_s16(sv_diff),
+ vget_low_s16(sv_diff));
+ v_sse_hi = vmlal_s16(v_sse_hi,
+ vget_high_s16(sv_diff),
+ vget_high_s16(sv_diff));
+ }
+ a += a_stride;
+ b += b_stride;
+ }
+
+ *sum = horizontal_add_s16x8(v_sum);
+ *sse = (unsigned int)horizontal_add_s32x4(vaddq_s32(v_sse_lo, v_sse_hi));
+}
+
+void vpx_get8x8var_neon(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ unsigned int *sse, int *sum) {
+ variance_neon_w8(a, a_stride, b, b_stride, 8, 8, sse, sum);
+}
+
+void vpx_get16x16var_neon(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ unsigned int *sse, int *sum) {
+ variance_neon_w8(a, a_stride, b, b_stride, 16, 16, sse, sum);
+}
+
+unsigned int vpx_variance8x8_neon(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ unsigned int *sse) {
+ int sum;
+ variance_neon_w8(a, a_stride, b, b_stride, 8, 8, sse, &sum);
+ return *sse - (((int64_t)sum * sum) >> 6); // >> 6 = / 8 * 8
+}
+
+unsigned int vpx_variance16x16_neon(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ unsigned int *sse) {
+ int sum;
+ variance_neon_w8(a, a_stride, b, b_stride, 16, 16, sse, &sum);
+ return *sse - (((int64_t)sum * sum) >> 8); // >> 8 = / 16 * 16
+}
+
+unsigned int vpx_variance32x32_neon(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ unsigned int *sse) {
+ int sum;
+ variance_neon_w8(a, a_stride, b, b_stride, 32, 32, sse, &sum);
+ return *sse - (((int64_t)sum * sum) >> 10); // >> 10 = / 32 * 32
+}
+
+unsigned int vpx_variance32x64_neon(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ unsigned int *sse) {
+ int sum1, sum2;
+ uint32_t sse1, sse2;
+ variance_neon_w8(a, a_stride, b, b_stride, 32, 32, &sse1, &sum1);
+ variance_neon_w8(a + (32 * a_stride), a_stride,
+ b + (32 * b_stride), b_stride, 32, 32,
+ &sse2, &sum2);
+ *sse = sse1 + sse2;
+ sum1 += sum2;
+ return *sse - (((int64_t)sum1 * sum1) >> 11); // >> 11 = / 32 * 64
+}
+
+unsigned int vpx_variance64x32_neon(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ unsigned int *sse) {
+ int sum1, sum2;
+ uint32_t sse1, sse2;
+ variance_neon_w8(a, a_stride, b, b_stride, 64, 16, &sse1, &sum1);
+ variance_neon_w8(a + (16 * a_stride), a_stride,
+ b + (16 * b_stride), b_stride, 64, 16,
+ &sse2, &sum2);
+ *sse = sse1 + sse2;
+ sum1 += sum2;
+ return *sse - (((int64_t)sum1 * sum1) >> 11); // >> 11 = / 32 * 64
+}
+
+unsigned int vpx_variance64x64_neon(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ unsigned int *sse) {
+ int sum1, sum2;
+ uint32_t sse1, sse2;
+
+ variance_neon_w8(a, a_stride, b, b_stride, 64, 16, &sse1, &sum1);
+ variance_neon_w8(a + (16 * a_stride), a_stride,
+ b + (16 * b_stride), b_stride, 64, 16,
+ &sse2, &sum2);
+ sse1 += sse2;
+ sum1 += sum2;
+
+ variance_neon_w8(a + (16 * 2 * a_stride), a_stride,
+ b + (16 * 2 * b_stride), b_stride,
+ 64, 16, &sse2, &sum2);
+ sse1 += sse2;
+ sum1 += sum2;
+
+ variance_neon_w8(a + (16 * 3 * a_stride), a_stride,
+ b + (16 * 3 * b_stride), b_stride,
+ 64, 16, &sse2, &sum2);
+ *sse = sse1 + sse2;
+ sum1 += sum2;
+ return *sse - (((int64_t)sum1 * sum1) >> 12); // >> 12 = / 64 * 64
+}
+
+unsigned int vpx_variance16x8_neon(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ int i;
+ int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
+ uint32x2_t d0u32, d10u32;
+ int64x1_t d0s64, d1s64;
+ uint8x16_t q0u8, q1u8, q2u8, q3u8;
+ uint16x8_t q11u16, q12u16, q13u16, q14u16;
+ int32x4_t q8s32, q9s32, q10s32;
+ int64x2_t q0s64, q1s64, q5s64;
+
+ q8s32 = vdupq_n_s32(0);
+ q9s32 = vdupq_n_s32(0);
+ q10s32 = vdupq_n_s32(0);
+
+ for (i = 0; i < 4; i++) {
+ q0u8 = vld1q_u8(src_ptr);
+ src_ptr += source_stride;
+ q1u8 = vld1q_u8(src_ptr);
+ src_ptr += source_stride;
+ __builtin_prefetch(src_ptr);
+
+ q2u8 = vld1q_u8(ref_ptr);
+ ref_ptr += recon_stride;
+ q3u8 = vld1q_u8(ref_ptr);
+ ref_ptr += recon_stride;
+ __builtin_prefetch(ref_ptr);
+
+ q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
+ q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
+ q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
+ q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
+
+ d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
+ d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
+ q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
+ q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
+ q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
+
+ d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
+ d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
+ q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
+ q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
+ q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
+
+ d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
+ d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
+ q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q13u16));
+ q9s32 = vmlal_s16(q9s32, d26s16, d26s16);
+ q10s32 = vmlal_s16(q10s32, d27s16, d27s16);
+
+ d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
+ d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
+ q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q14u16));
+ q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
+ q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
+ }
+
+ q10s32 = vaddq_s32(q10s32, q9s32);
+ q0s64 = vpaddlq_s32(q8s32);
+ q1s64 = vpaddlq_s32(q10s32);
+
+ d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
+ d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
+
+ q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
+ vreinterpret_s32_s64(d0s64));
+ vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
+
+ d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 7);
+ d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
+
+ return vget_lane_u32(d0u32, 0);
+}
+
+unsigned int vpx_variance8x16_neon(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ int i;
+ uint8x8_t d0u8, d2u8, d4u8, d6u8;
+ int16x4_t d22s16, d23s16, d24s16, d25s16;
+ uint32x2_t d0u32, d10u32;
+ int64x1_t d0s64, d1s64;
+ uint16x8_t q11u16, q12u16;
+ int32x4_t q8s32, q9s32, q10s32;
+ int64x2_t q0s64, q1s64, q5s64;
+
+ q8s32 = vdupq_n_s32(0);
+ q9s32 = vdupq_n_s32(0);
+ q10s32 = vdupq_n_s32(0);
+
+ for (i = 0; i < 8; i++) {
+ d0u8 = vld1_u8(src_ptr);
+ src_ptr += source_stride;
+ d2u8 = vld1_u8(src_ptr);
+ src_ptr += source_stride;
+ __builtin_prefetch(src_ptr);
+
+ d4u8 = vld1_u8(ref_ptr);
+ ref_ptr += recon_stride;
+ d6u8 = vld1_u8(ref_ptr);
+ ref_ptr += recon_stride;
+ __builtin_prefetch(ref_ptr);
+
+ q11u16 = vsubl_u8(d0u8, d4u8);
+ q12u16 = vsubl_u8(d2u8, d6u8);
+
+ d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
+ d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
+ q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q11u16));
+ q9s32 = vmlal_s16(q9s32, d22s16, d22s16);
+ q10s32 = vmlal_s16(q10s32, d23s16, d23s16);
+
+ d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
+ d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
+ q8s32 = vpadalq_s16(q8s32, vreinterpretq_s16_u16(q12u16));
+ q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
+ q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
+ }
+
+ q10s32 = vaddq_s32(q10s32, q9s32);
+ q0s64 = vpaddlq_s32(q8s32);
+ q1s64 = vpaddlq_s32(q10s32);
+
+ d0s64 = vadd_s64(vget_low_s64(q0s64), vget_high_s64(q0s64));
+ d1s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
+
+ q5s64 = vmull_s32(vreinterpret_s32_s64(d0s64),
+ vreinterpret_s32_s64(d0s64));
+ vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d1s64), 0);
+
+ d10u32 = vshr_n_u32(vreinterpret_u32_s64(vget_low_s64(q5s64)), 7);
+ d0u32 = vsub_u32(vreinterpret_u32_s64(d1s64), d10u32);
+
+ return vget_lane_u32(d0u32, 0);
+}
+
+unsigned int vpx_mse16x16_neon(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride,
+ unsigned int *sse) {
+ int i;
+ int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
+ int64x1_t d0s64;
+ uint8x16_t q0u8, q1u8, q2u8, q3u8;
+ int32x4_t q7s32, q8s32, q9s32, q10s32;
+ uint16x8_t q11u16, q12u16, q13u16, q14u16;
+ int64x2_t q1s64;
+
+ q7s32 = vdupq_n_s32(0);
+ q8s32 = vdupq_n_s32(0);
+ q9s32 = vdupq_n_s32(0);
+ q10s32 = vdupq_n_s32(0);
+
+ for (i = 0; i < 8; i++) { // mse16x16_neon_loop
+ q0u8 = vld1q_u8(src_ptr);
+ src_ptr += source_stride;
+ q1u8 = vld1q_u8(src_ptr);
+ src_ptr += source_stride;
+ q2u8 = vld1q_u8(ref_ptr);
+ ref_ptr += recon_stride;
+ q3u8 = vld1q_u8(ref_ptr);
+ ref_ptr += recon_stride;
+
+ q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
+ q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
+ q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
+ q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
+
+ d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
+ d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
+ q7s32 = vmlal_s16(q7s32, d22s16, d22s16);
+ q8s32 = vmlal_s16(q8s32, d23s16, d23s16);
+
+ d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
+ d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
+ q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
+ q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
+
+ d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
+ d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
+ q7s32 = vmlal_s16(q7s32, d26s16, d26s16);
+ q8s32 = vmlal_s16(q8s32, d27s16, d27s16);
+
+ d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
+ d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
+ q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
+ q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
+ }
+
+ q7s32 = vaddq_s32(q7s32, q8s32);
+ q9s32 = vaddq_s32(q9s32, q10s32);
+ q10s32 = vaddq_s32(q7s32, q9s32);
+
+ q1s64 = vpaddlq_s32(q10s32);
+ d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
+
+ vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d0s64), 0);
+ return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0);
+}
+
+unsigned int vpx_get4x4sse_cs_neon(
+ const unsigned char *src_ptr,
+ int source_stride,
+ const unsigned char *ref_ptr,
+ int recon_stride) {
+ int16x4_t d22s16, d24s16, d26s16, d28s16;
+ int64x1_t d0s64;
+ uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
+ int32x4_t q7s32, q8s32, q9s32, q10s32;
+ uint16x8_t q11u16, q12u16, q13u16, q14u16;
+ int64x2_t q1s64;
+
+ d0u8 = vld1_u8(src_ptr);
+ src_ptr += source_stride;
+ d4u8 = vld1_u8(ref_ptr);
+ ref_ptr += recon_stride;
+ d1u8 = vld1_u8(src_ptr);
+ src_ptr += source_stride;
+ d5u8 = vld1_u8(ref_ptr);
+ ref_ptr += recon_stride;
+ d2u8 = vld1_u8(src_ptr);
+ src_ptr += source_stride;
+ d6u8 = vld1_u8(ref_ptr);
+ ref_ptr += recon_stride;
+ d3u8 = vld1_u8(src_ptr);
+ src_ptr += source_stride;
+ d7u8 = vld1_u8(ref_ptr);
+ ref_ptr += recon_stride;
+
+ q11u16 = vsubl_u8(d0u8, d4u8);
+ q12u16 = vsubl_u8(d1u8, d5u8);
+ q13u16 = vsubl_u8(d2u8, d6u8);
+ q14u16 = vsubl_u8(d3u8, d7u8);
+
+ d22s16 = vget_low_s16(vreinterpretq_s16_u16(q11u16));
+ d24s16 = vget_low_s16(vreinterpretq_s16_u16(q12u16));
+ d26s16 = vget_low_s16(vreinterpretq_s16_u16(q13u16));
+ d28s16 = vget_low_s16(vreinterpretq_s16_u16(q14u16));
+
+ q7s32 = vmull_s16(d22s16, d22s16);
+ q8s32 = vmull_s16(d24s16, d24s16);
+ q9s32 = vmull_s16(d26s16, d26s16);
+ q10s32 = vmull_s16(d28s16, d28s16);
+
+ q7s32 = vaddq_s32(q7s32, q8s32);
+ q9s32 = vaddq_s32(q9s32, q10s32);
+ q9s32 = vaddq_s32(q7s32, q9s32);
+
+ q1s64 = vpaddlq_s32(q9s32);
+ d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
+
+ return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0);
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/sad.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/sad.c
index 9db312fbe05..c0c3ff99645 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/sad.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/sad.c
@@ -14,13 +14,7 @@
#include "./vpx_dsp_rtcd.h"
#include "vpx/vpx_integer.h"
-
-#if CONFIG_VP9_HIGHBITDEPTH
-#include "vp9/common/vp9_common.h"
-#endif // CONFIG_VP9_HIGHBITDEPTH
-// Temporary ...
-#define ROUND_POWER_OF_TWO(value, n) \
- (((value) + (1 << ((n) - 1))) >> (n))
+#include "vpx_ports/mem.h"
/* Sum the difference between every corresponding element of the buffers. */
static INLINE unsigned int sad(const uint8_t *a, int a_stride,
@@ -39,6 +33,7 @@ static INLINE unsigned int sad(const uint8_t *a, int a_stride,
return sad;
}
+// TODO(johannkoenig): this moved to vpx_dsp, should be able to clean this up.
/* Remove dependency on vp9 variance function by duplicating vp9_comp_avg_pred.
* The function averages every corresponding element of the buffers and stores
* the value in a third buffer, comp_pred.
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/variance.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/variance.c
new file mode 100644
index 00000000000..084dd7b7ead
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/variance.c
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+
+#include "vpx_ports/mem.h"
+#include "vpx/vpx_integer.h"
+
+unsigned int vpx_get4x4sse_cs_c(const unsigned char *a, int a_stride,
+ const unsigned char *b, int b_stride) {
+ int distortion = 0;
+ int r, c;
+
+ for (r = 0; r < 4; r++) {
+ for (c = 0; c < 4; c++) {
+ int diff = a[c] - b[c];
+ distortion += diff * diff;
+ }
+
+ a += a_stride;
+ b += b_stride;
+ }
+
+ return distortion;
+}
+
+unsigned int vpx_get_mb_ss_c(const int16_t *a) {
+ unsigned int i, sum = 0;
+
+ for (i = 0; i < 256; ++i) {
+ sum += a[i] * a[i];
+ }
+
+ return sum;
+}
+
+static void variance(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ int w, int h, unsigned int *sse, int *sum) {
+ int i, j;
+
+ *sum = 0;
+ *sse = 0;
+
+ for (i = 0; i < h; i++) {
+ for (j = 0; j < w; j++) {
+ const int diff = a[j] - b[j];
+ *sum += diff;
+ *sse += diff * diff;
+ }
+
+ a += a_stride;
+ b += b_stride;
+ }
+}
+
+#define VAR(W, H) \
+unsigned int vpx_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
+ const uint8_t *b, int b_stride, \
+ unsigned int *sse) { \
+ int sum; \
+ variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
+ return *sse - (((int64_t)sum * sum) / (W * H)); \
+}
+
+/* Identical to the variance call except it takes an additional parameter, sum,
+ * and returns that value using pass-by-reference instead of returning
+ * sse - sum^2 / w*h
+ */
+#define GET_VAR(W, H) \
+void vpx_get##W##x##H##var_c(const uint8_t *a, int a_stride, \
+ const uint8_t *b, int b_stride, \
+ unsigned int *sse, int *sum) { \
+ variance(a, a_stride, b, b_stride, W, H, sse, sum); \
+}
+
+/* Identical to the variance call except it does not calculate the
+ * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in
+ * variable.
+ */
+#define MSE(W, H) \
+unsigned int vpx_mse##W##x##H##_c(const uint8_t *a, int a_stride, \
+ const uint8_t *b, int b_stride, \
+ unsigned int *sse) { \
+ int sum; \
+ variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
+ return *sse; \
+}
+
+VAR(64, 64)
+VAR(64, 32)
+VAR(32, 64)
+VAR(32, 32)
+VAR(32, 16)
+VAR(16, 32)
+VAR(16, 16)
+VAR(16, 8)
+VAR(8, 16)
+VAR(8, 8)
+VAR(8, 4)
+VAR(4, 8)
+VAR(4, 4)
+
+GET_VAR(16, 16)
+GET_VAR(8, 8)
+
+MSE(16, 16)
+MSE(16, 8)
+MSE(8, 16)
+MSE(8, 8)
+
+void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
+ int height, const uint8_t *ref, int ref_stride) {
+ int i, j;
+
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j++) {
+ const int tmp = pred[j] + ref[j];
+ comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
+ }
+ comp_pred += width;
+ pred += width;
+ ref += ref_stride;
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void highbd_variance64(const uint8_t *a8, int a_stride,
+ const uint8_t *b8, int b_stride,
+ int w, int h, uint64_t *sse, uint64_t *sum) {
+ int i, j;
+
+ uint16_t *a = CONVERT_TO_SHORTPTR(a8);
+ uint16_t *b = CONVERT_TO_SHORTPTR(b8);
+ *sum = 0;
+ *sse = 0;
+
+ for (i = 0; i < h; i++) {
+ for (j = 0; j < w; j++) {
+ const int diff = a[j] - b[j];
+ *sum += diff;
+ *sse += diff * diff;
+ }
+ a += a_stride;
+ b += b_stride;
+ }
+}
+
+static void highbd_8_variance(const uint8_t *a8, int a_stride,
+ const uint8_t *b8, int b_stride,
+ int w, int h, unsigned int *sse, int *sum) {
+ uint64_t sse_long = 0;
+ uint64_t sum_long = 0;
+ highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
+ *sse = (unsigned int)sse_long;
+ *sum = (int)sum_long;
+}
+
+static void highbd_10_variance(const uint8_t *a8, int a_stride,
+ const uint8_t *b8, int b_stride,
+ int w, int h, unsigned int *sse, int *sum) {
+ uint64_t sse_long = 0;
+ uint64_t sum_long = 0;
+ highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
+ *sse = (unsigned int)ROUND_POWER_OF_TWO(sse_long, 4);
+ *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2);
+}
+
+static void highbd_12_variance(const uint8_t *a8, int a_stride,
+ const uint8_t *b8, int b_stride,
+ int w, int h, unsigned int *sse, int *sum) {
+ uint64_t sse_long = 0;
+ uint64_t sum_long = 0;
+ highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
+ *sse = (unsigned int)ROUND_POWER_OF_TWO(sse_long, 8);
+ *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4);
+}
+
+#define HIGHBD_VAR(W, H) \
+unsigned int vpx_highbd_8_variance##W##x##H##_c(const uint8_t *a, \
+ int a_stride, \
+ const uint8_t *b, \
+ int b_stride, \
+ unsigned int *sse) { \
+ int sum; \
+ highbd_8_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
+ return *sse - (((int64_t)sum * sum) / (W * H)); \
+} \
+\
+unsigned int vpx_highbd_10_variance##W##x##H##_c(const uint8_t *a, \
+ int a_stride, \
+ const uint8_t *b, \
+ int b_stride, \
+ unsigned int *sse) { \
+ int sum; \
+ highbd_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
+ return *sse - (((int64_t)sum * sum) / (W * H)); \
+} \
+\
+unsigned int vpx_highbd_12_variance##W##x##H##_c(const uint8_t *a, \
+ int a_stride, \
+ const uint8_t *b, \
+ int b_stride, \
+ unsigned int *sse) { \
+ int sum; \
+ highbd_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
+ return *sse - (((int64_t)sum * sum) / (W * H)); \
+}
+
+#define HIGHBD_GET_VAR(S) \
+void vpx_highbd_8_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
+ const uint8_t *ref, int ref_stride, \
+ unsigned int *sse, int *sum) { \
+ highbd_8_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
+} \
+\
+void vpx_highbd_10_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
+ const uint8_t *ref, int ref_stride, \
+ unsigned int *sse, int *sum) { \
+ highbd_10_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
+} \
+\
+void vpx_highbd_12_get##S##x##S##var_c(const uint8_t *src, int src_stride, \
+ const uint8_t *ref, int ref_stride, \
+ unsigned int *sse, int *sum) { \
+ highbd_12_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \
+}
+
+#define HIGHBD_MSE(W, H) \
+unsigned int vpx_highbd_8_mse##W##x##H##_c(const uint8_t *src, \
+ int src_stride, \
+ const uint8_t *ref, \
+ int ref_stride, \
+ unsigned int *sse) { \
+ int sum; \
+ highbd_8_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
+ return *sse; \
+} \
+\
+unsigned int vpx_highbd_10_mse##W##x##H##_c(const uint8_t *src, \
+ int src_stride, \
+ const uint8_t *ref, \
+ int ref_stride, \
+ unsigned int *sse) { \
+ int sum; \
+ highbd_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
+ return *sse; \
+} \
+\
+unsigned int vpx_highbd_12_mse##W##x##H##_c(const uint8_t *src, \
+ int src_stride, \
+ const uint8_t *ref, \
+ int ref_stride, \
+ unsigned int *sse) { \
+ int sum; \
+ highbd_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
+ return *sse; \
+}
+
+HIGHBD_GET_VAR(8)
+HIGHBD_GET_VAR(16)
+
+HIGHBD_MSE(16, 16)
+HIGHBD_MSE(16, 8)
+HIGHBD_MSE(8, 16)
+HIGHBD_MSE(8, 8)
+
+HIGHBD_VAR(64, 64)
+HIGHBD_VAR(64, 32)
+HIGHBD_VAR(32, 64)
+HIGHBD_VAR(32, 32)
+HIGHBD_VAR(32, 16)
+HIGHBD_VAR(16, 32)
+HIGHBD_VAR(16, 16)
+HIGHBD_VAR(16, 8)
+HIGHBD_VAR(8, 16)
+HIGHBD_VAR(8, 8)
+HIGHBD_VAR(8, 4)
+HIGHBD_VAR(4, 8)
+HIGHBD_VAR(4, 4)
+
+void vpx_highbd_comp_avg_pred(uint16_t *comp_pred, const uint8_t *pred8,
+ int width, int height, const uint8_t *ref8,
+ int ref_stride) {
+ int i, j;
+ uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+ for (i = 0; i < height; i++) {
+ for (j = 0; j < width; j++) {
+ const int tmp = pred[j] + ref[j];
+ comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
+ }
+ comp_pred += width;
+ pred += width;
+ ref += ref_stride;
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp.mk b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp.mk
index 606515d2c19..f23534adc15 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp.mk
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp.mk
@@ -17,6 +17,7 @@ DSP_SRCS-$(HAVE_MEDIA) += arm/sad_media$(ASM)
DSP_SRCS-$(HAVE_NEON) += arm/sad4d_neon.c
DSP_SRCS-$(HAVE_NEON) += arm/sad_neon.c
+
DSP_SRCS-$(HAVE_MMX) += x86/sad_mmx.asm
DSP_SRCS-$(HAVE_SSE2) += x86/sad4d_sse2.asm
DSP_SRCS-$(HAVE_SSE2) += x86/sad_sse2.asm
@@ -29,9 +30,28 @@ DSP_SRCS-$(HAVE_AVX2) += x86/sad_avx2.c
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_sad4d_sse2.asm
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_sad_sse2.asm
+
endif # CONFIG_VP9_HIGHBITDEPTH
endif # CONFIG_ENCODERS
+ifneq ($(filter yes,$(CONFIG_ENCODERS) $(CONFIG_POSTPROC) $(CONFIG_VP9_POSTPROC)),)
+DSP_SRCS-yes += variance.c
+
+DSP_SRCS-$(HAVE_MEDIA) += arm/variance_media$(ASM)
+DSP_SRCS-$(HAVE_NEON) += arm/variance_neon.c
+
+DSP_SRCS-$(HAVE_MMX) += x86/variance_mmx.c
+DSP_SRCS-$(HAVE_MMX) += x86/variance_impl_mmx.asm
+DSP_SRCS-$(HAVE_SSE2) += x86/variance_sse2.c
+DSP_SRCS-$(HAVE_AVX2) += x86/variance_avx2.c
+DSP_SRCS-$(HAVE_AVX2) += x86/variance_impl_avx2.c
+
+ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
+DSP_SRCS-$(HAVE_SSE2) += x86/highbd_variance_sse2.c
+DSP_SRCS-$(HAVE_SSE2) += x86/highbd_variance_impl_sse2.asm
+endif # CONFIG_VP9_HIGHBITDEPTH
+endif # CONFIG_ENCODERS || CONFIG_POSTPROC || CONFIG_VP9_POSTPROC
+
DSP_SRCS-no += $(DSP_SRCS_REMOVE-yes)
DSP_SRCS-yes += vpx_dsp_rtcd.c
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl
index ebec9ec0660..55271cf9c14 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -392,4 +392,212 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
} # CONFIG_VP9_HIGHBITDEPTH
} # CONFIG_ENCODERS
+if (vpx_config("CONFIG_ENCODERS") eq "yes" || vpx_config("CONFIG_POSTPROC") eq "yes" || vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
+
+add_proto qw/unsigned int vpx_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_variance64x64 sse2 avx2 neon/;
+
+add_proto qw/unsigned int vpx_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_variance64x32 sse2 avx2 neon/;
+
+add_proto qw/unsigned int vpx_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_variance32x64 sse2 neon/;
+
+add_proto qw/unsigned int vpx_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_variance32x32 sse2 avx2 neon/;
+
+add_proto qw/unsigned int vpx_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_variance32x16 sse2 avx2/;
+
+add_proto qw/unsigned int vpx_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_variance16x32 sse2/;
+
+add_proto qw/unsigned int vpx_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_variance16x16 mmx sse2 avx2 media neon/;
+
+add_proto qw/unsigned int vpx_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_variance16x8 mmx sse2 neon/;
+
+add_proto qw/unsigned int vpx_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_variance8x16 mmx sse2 neon/;
+
+add_proto qw/unsigned int vpx_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_variance8x8 mmx sse2 media neon/;
+
+add_proto qw/unsigned int vpx_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_variance8x4 sse2/;
+
+add_proto qw/unsigned int vpx_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_variance4x8 sse2/;
+
+add_proto qw/unsigned int vpx_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_variance4x4 mmx sse2/;
+
+
+add_proto qw/void vpx_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+ specialize qw/vpx_get16x16var sse2 avx2 neon/;
+
+add_proto qw/void vpx_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+ specialize qw/vpx_get8x8var mmx sse2 neon/;
+
+add_proto qw/unsigned int vpx_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+ specialize qw/vpx_mse16x16 mmx sse2 avx2 media neon/;
+
+add_proto qw/unsigned int vpx_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+ specialize qw/vpx_mse16x8 sse2/;
+
+add_proto qw/unsigned int vpx_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+ specialize qw/vpx_mse8x16 sse2/;
+
+add_proto qw/unsigned int vpx_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+ specialize qw/vpx_mse8x8 sse2/;
+
+add_proto qw/unsigned int vpx_get_mb_ss/, "const int16_t *";
+ specialize qw/vpx_get_mb_ss mmx sse2/;
+
+add_proto qw/unsigned int vpx_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride";
+ specialize qw/vpx_get4x4sse_cs neon/;
+
+add_proto qw/void vpx_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride";
+
+if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
+ add_proto qw/unsigned int vpx_highbd_12_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_12_variance64x64 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_12_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_12_variance64x32 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_12_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_12_variance32x64 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_12_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_12_variance32x32 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_12_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_12_variance32x16 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_12_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_12_variance16x32 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_12_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_12_variance16x16 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_12_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_12_variance16x8 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_12_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_12_variance8x16 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_12_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_12_variance8x8 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_12_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_12_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_12_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+
+ add_proto qw/unsigned int vpx_highbd_10_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_10_variance64x64 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_10_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_10_variance64x32 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_10_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_10_variance32x64 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_10_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_10_variance32x32 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_10_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_10_variance32x16 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_10_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_10_variance16x32 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_10_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_10_variance16x16 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_10_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_10_variance16x8 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_10_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_10_variance8x16 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_10_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_10_variance8x8 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_10_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_10_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_10_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+
+ add_proto qw/unsigned int vpx_highbd_8_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_8_variance64x64 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_8_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_8_variance64x32 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_8_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_8_variance32x64 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_8_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_8_variance32x32 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_8_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_8_variance32x16 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_8_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_8_variance16x32 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_8_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_8_variance16x16 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_8_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_8_variance16x8 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_8_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_8_variance8x16 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_8_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_8_variance8x8 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_8_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_8_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_8_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
+
+ add_proto qw/void vpx_highbd_8_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+ add_proto qw/void vpx_highbd_8_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+
+ add_proto qw/void vpx_highbd_10_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+ add_proto qw/void vpx_highbd_10_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+
+ add_proto qw/void vpx_highbd_12_get16x16var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+ add_proto qw/void vpx_highbd_12_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
+
+ add_proto qw/unsigned int vpx_highbd_8_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_8_mse16x16 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_8_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_8_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_8_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_8_mse8x8 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_10_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_10_mse16x16 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_10_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_10_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_10_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_10_mse8x8 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_12_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_12_mse16x16 sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_12_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_12_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+ add_proto qw/unsigned int vpx_highbd_12_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
+ specialize qw/vpx_highbd_12_mse8x8 sse2/;
+
+ add_proto qw/void vpx_highbd_comp_avg_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride";
+} # CONFIG_VP9_HIGHBITDEPTH
+} # CONFIG_ENCODERS || CONFIG_POSTPROC || CONFIG_VP9_POSTPROC
+
1;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_highbd_variance_impl_sse2.asm b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_variance_impl_sse2.asm
index 821dd0660bc..923418a9921 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_highbd_variance_impl_sse2.asm
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_variance_impl_sse2.asm
@@ -11,7 +11,7 @@
%include "vpx_ports/x86_abi_support.asm"
-;unsigned int vp9_highbd_calc16x16var_sse2
+;unsigned int vpx_highbd_calc16x16var_sse2
;(
; unsigned char * src_ptr,
; int source_stride,
@@ -20,8 +20,8 @@
; unsigned int * SSE,
; int * Sum
;)
-global sym(vp9_highbd_calc16x16var_sse2) PRIVATE
-sym(vp9_highbd_calc16x16var_sse2):
+global sym(vpx_highbd_calc16x16var_sse2) PRIVATE
+sym(vpx_highbd_calc16x16var_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
@@ -164,7 +164,7 @@ sym(vp9_highbd_calc16x16var_sse2):
ret
-;unsigned int vp9_highbd_calc8x8var_sse2
+;unsigned int vpx_highbd_calc8x8var_sse2
;(
; unsigned char * src_ptr,
; int source_stride,
@@ -173,8 +173,8 @@ sym(vp9_highbd_calc16x16var_sse2):
; unsigned int * SSE,
; int * Sum
;)
-global sym(vp9_highbd_calc8x8var_sse2) PRIVATE
-sym(vp9_highbd_calc8x8var_sse2):
+global sym(vpx_highbd_calc8x8var_sse2) PRIVATE
+sym(vpx_highbd_calc8x8var_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_variance_sse2.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_variance_sse2.c
new file mode 100644
index 00000000000..343c0478b9a
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_variance_sse2.c
@@ -0,0 +1,245 @@
+/*
+ * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include "./vpx_config.h"
+#include "vp9/common/vp9_common.h"
+
+#include "vp9/encoder/vp9_variance.h"
+#include "vpx_ports/mem.h"
+
+typedef uint32_t (*high_variance_fn_t) (const uint16_t *src, int src_stride,
+ const uint16_t *ref, int ref_stride,
+ uint32_t *sse, int *sum);
+
+uint32_t vpx_highbd_calc8x8var_sse2(const uint16_t *src, int src_stride,
+ const uint16_t *ref, int ref_stride,
+ uint32_t *sse, int *sum);
+
+uint32_t vpx_highbd_calc16x16var_sse2(const uint16_t *src, int src_stride,
+ const uint16_t *ref, int ref_stride,
+ uint32_t *sse, int *sum);
+
+static void highbd_8_variance_sse2(const uint16_t *src, int src_stride,
+ const uint16_t *ref, int ref_stride,
+ int w, int h, uint32_t *sse, int *sum,
+ high_variance_fn_t var_fn, int block_size) {
+ int i, j;
+
+ *sse = 0;
+ *sum = 0;
+
+ for (i = 0; i < h; i += block_size) {
+ for (j = 0; j < w; j += block_size) {
+ unsigned int sse0;
+ int sum0;
+ var_fn(src + src_stride * i + j, src_stride,
+ ref + ref_stride * i + j, ref_stride, &sse0, &sum0);
+ *sse += sse0;
+ *sum += sum0;
+ }
+ }
+}
+
+static void highbd_10_variance_sse2(const uint16_t *src, int src_stride,
+ const uint16_t *ref, int ref_stride,
+ int w, int h, uint32_t *sse, int *sum,
+ high_variance_fn_t var_fn, int block_size) {
+ int i, j;
+ uint64_t sse_long = 0;
+ int64_t sum_long = 0;
+
+ for (i = 0; i < h; i += block_size) {
+ for (j = 0; j < w; j += block_size) {
+ unsigned int sse0;
+ int sum0;
+ var_fn(src + src_stride * i + j, src_stride,
+ ref + ref_stride * i + j, ref_stride, &sse0, &sum0);
+ sse_long += sse0;
+ sum_long += sum0;
+ }
+ }
+ *sum = ROUND_POWER_OF_TWO(sum_long, 2);
+ *sse = ROUND_POWER_OF_TWO(sse_long, 4);
+}
+
+static void highbd_12_variance_sse2(const uint16_t *src, int src_stride,
+ const uint16_t *ref, int ref_stride,
+ int w, int h, uint32_t *sse, int *sum,
+ high_variance_fn_t var_fn, int block_size) {
+ int i, j;
+ uint64_t sse_long = 0;
+ int64_t sum_long = 0;
+
+ for (i = 0; i < h; i += block_size) {
+ for (j = 0; j < w; j += block_size) {
+ unsigned int sse0;
+ int sum0;
+ var_fn(src + src_stride * i + j, src_stride,
+ ref + ref_stride * i + j, ref_stride, &sse0, &sum0);
+ sse_long += sse0;
+ sum_long += sum0;
+ }
+ }
+ *sum = ROUND_POWER_OF_TWO(sum_long, 4);
+ *sse = ROUND_POWER_OF_TWO(sse_long, 8);
+}
+
+
+#define HIGH_GET_VAR(S) \
+void vpx_highbd_get##S##x##S##var_sse2(const uint8_t *src8, int src_stride, \
+ const uint8_t *ref8, int ref_stride, \
+ uint32_t *sse, int *sum) { \
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
+ vpx_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, \
+ sse, sum); \
+} \
+\
+void vpx_highbd_10_get##S##x##S##var_sse2(const uint8_t *src8, int src_stride, \
+ const uint8_t *ref8, int ref_stride, \
+ uint32_t *sse, int *sum) { \
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
+ vpx_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, \
+ sse, sum); \
+ *sum = ROUND_POWER_OF_TWO(*sum, 2); \
+ *sse = ROUND_POWER_OF_TWO(*sse, 4); \
+} \
+\
+void vpx_highbd_12_get##S##x##S##var_sse2(const uint8_t *src8, int src_stride, \
+ const uint8_t *ref8, int ref_stride, \
+ uint32_t *sse, int *sum) { \
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
+ vpx_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, \
+ sse, sum); \
+ *sum = ROUND_POWER_OF_TWO(*sum, 4); \
+ *sse = ROUND_POWER_OF_TWO(*sse, 8); \
+}
+
+HIGH_GET_VAR(16);
+HIGH_GET_VAR(8);
+
+#undef HIGH_GET_VAR
+
+#define VAR_FN(w, h, block_size, shift) \
+uint32_t vpx_highbd_8_variance##w##x##h##_sse2( \
+ const uint8_t *src8, int src_stride, \
+ const uint8_t *ref8, int ref_stride, uint32_t *sse) { \
+ int sum; \
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
+ highbd_8_variance_sse2(src, src_stride, ref, ref_stride, w, h, sse, &sum, \
+ vpx_highbd_calc##block_size##x##block_size##var_sse2, \
+ block_size); \
+ return *sse - (((int64_t)sum * sum) >> shift); \
+} \
+\
+uint32_t vpx_highbd_10_variance##w##x##h##_sse2( \
+ const uint8_t *src8, int src_stride, \
+ const uint8_t *ref8, int ref_stride, uint32_t *sse) { \
+ int sum; \
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
+ highbd_10_variance_sse2( \
+ src, src_stride, ref, ref_stride, w, h, sse, &sum, \
+ vpx_highbd_calc##block_size##x##block_size##var_sse2, block_size); \
+ return *sse - (((int64_t)sum * sum) >> shift); \
+} \
+\
+uint32_t vpx_highbd_12_variance##w##x##h##_sse2( \
+ const uint8_t *src8, int src_stride, \
+ const uint8_t *ref8, int ref_stride, uint32_t *sse) { \
+ int sum; \
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
+ highbd_12_variance_sse2( \
+ src, src_stride, ref, ref_stride, w, h, sse, &sum, \
+ vpx_highbd_calc##block_size##x##block_size##var_sse2, block_size); \
+ return *sse - (((int64_t)sum * sum) >> shift); \
+}
+
+VAR_FN(64, 64, 16, 12);
+VAR_FN(64, 32, 16, 11);
+VAR_FN(32, 64, 16, 11);
+VAR_FN(32, 32, 16, 10);
+VAR_FN(32, 16, 16, 9);
+VAR_FN(16, 32, 16, 9);
+VAR_FN(16, 16, 16, 8);
+VAR_FN(16, 8, 8, 7);
+VAR_FN(8, 16, 8, 7);
+VAR_FN(8, 8, 8, 6);
+
+#undef VAR_FN
+
+unsigned int vpx_highbd_8_mse16x16_sse2(const uint8_t *src8, int src_stride,
+ const uint8_t *ref8, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+ highbd_8_variance_sse2(src, src_stride, ref, ref_stride, 16, 16,
+ sse, &sum, vpx_highbd_calc16x16var_sse2, 16);
+ return *sse;
+}
+
+unsigned int vpx_highbd_10_mse16x16_sse2(const uint8_t *src8, int src_stride,
+ const uint8_t *ref8, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+ highbd_10_variance_sse2(src, src_stride, ref, ref_stride, 16, 16,
+ sse, &sum, vpx_highbd_calc16x16var_sse2, 16);
+ return *sse;
+}
+
+unsigned int vpx_highbd_12_mse16x16_sse2(const uint8_t *src8, int src_stride,
+ const uint8_t *ref8, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+ highbd_12_variance_sse2(src, src_stride, ref, ref_stride, 16, 16,
+ sse, &sum, vpx_highbd_calc16x16var_sse2, 16);
+ return *sse;
+}
+
+unsigned int vpx_highbd_8_mse8x8_sse2(const uint8_t *src8, int src_stride,
+ const uint8_t *ref8, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+ highbd_8_variance_sse2(src, src_stride, ref, ref_stride, 8, 8,
+ sse, &sum, vpx_highbd_calc8x8var_sse2, 8);
+ return *sse;
+}
+
+unsigned int vpx_highbd_10_mse8x8_sse2(const uint8_t *src8, int src_stride,
+ const uint8_t *ref8, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+ highbd_10_variance_sse2(src, src_stride, ref, ref_stride, 8, 8,
+ sse, &sum, vpx_highbd_calc8x8var_sse2, 8);
+ return *sse;
+}
+
+unsigned int vpx_highbd_12_mse8x8_sse2(const uint8_t *src8, int src_stride,
+ const uint8_t *ref8, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
+ highbd_12_variance_sse2(src, src_stride, ref, ref_stride, 8, 8,
+ sse, &sum, vpx_highbd_calc8x8var_sse2, 8);
+ return *sse;
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/sad4d_avx2.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/sad4d_avx2.c
index 4128f2ac37c..793658f9ea9 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/sad4d_avx2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/sad4d_avx2.c
@@ -8,18 +8,19 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include <immintrin.h> // AVX2
+#include "./vpx_dsp_rtcd.h"
#include "vpx/vpx_integer.h"
-void vpx_sad32x32x4d_avx2(uint8_t *src,
+void vpx_sad32x32x4d_avx2(const uint8_t *src,
int src_stride,
- uint8_t *ref[4],
+ const uint8_t *const ref[4],
int ref_stride,
uint32_t res[4]) {
__m256i src_reg, ref0_reg, ref1_reg, ref2_reg, ref3_reg;
__m256i sum_ref0, sum_ref1, sum_ref2, sum_ref3;
__m256i sum_mlow, sum_mhigh;
int i;
- uint8_t *ref0, *ref1, *ref2, *ref3;
+ const uint8_t *ref0, *ref1, *ref2, *ref3;
ref0 = ref[0];
ref1 = ref[1];
@@ -31,11 +32,11 @@ void vpx_sad32x32x4d_avx2(uint8_t *src,
sum_ref3 = _mm256_set1_epi16(0);
for (i = 0; i < 32 ; i++) {
// load src and all refs
- src_reg = _mm256_loadu_si256((__m256i *)(src));
- ref0_reg = _mm256_loadu_si256((__m256i *) (ref0));
- ref1_reg = _mm256_loadu_si256((__m256i *) (ref1));
- ref2_reg = _mm256_loadu_si256((__m256i *) (ref2));
- ref3_reg = _mm256_loadu_si256((__m256i *) (ref3));
+ src_reg = _mm256_loadu_si256((const __m256i *)src);
+ ref0_reg = _mm256_loadu_si256((const __m256i *)ref0);
+ ref1_reg = _mm256_loadu_si256((const __m256i *)ref1);
+ ref2_reg = _mm256_loadu_si256((const __m256i *)ref2);
+ ref3_reg = _mm256_loadu_si256((const __m256i *)ref3);
// sum of the absolute differences between every ref-i to src
ref0_reg = _mm256_sad_epu8(ref0_reg, src_reg);
ref1_reg = _mm256_sad_epu8(ref1_reg, src_reg);
@@ -80,9 +81,9 @@ void vpx_sad32x32x4d_avx2(uint8_t *src,
}
}
-void vpx_sad64x64x4d_avx2(uint8_t *src,
+void vpx_sad64x64x4d_avx2(const uint8_t *src,
int src_stride,
- uint8_t *ref[4],
+ const uint8_t *const ref[4],
int ref_stride,
uint32_t res[4]) {
__m256i src_reg, srcnext_reg, ref0_reg, ref0next_reg;
@@ -91,7 +92,7 @@ void vpx_sad64x64x4d_avx2(uint8_t *src,
__m256i sum_ref0, sum_ref1, sum_ref2, sum_ref3;
__m256i sum_mlow, sum_mhigh;
int i;
- uint8_t *ref0, *ref1, *ref2, *ref3;
+ const uint8_t *ref0, *ref1, *ref2, *ref3;
ref0 = ref[0];
ref1 = ref[1];
@@ -103,16 +104,16 @@ void vpx_sad64x64x4d_avx2(uint8_t *src,
sum_ref3 = _mm256_set1_epi16(0);
for (i = 0; i < 64 ; i++) {
// load 64 bytes from src and all refs
- src_reg = _mm256_loadu_si256((__m256i *)(src));
- srcnext_reg = _mm256_loadu_si256((__m256i *)(src + 32));
- ref0_reg = _mm256_loadu_si256((__m256i *) (ref0));
- ref0next_reg = _mm256_loadu_si256((__m256i *) (ref0 + 32));
- ref1_reg = _mm256_loadu_si256((__m256i *) (ref1));
- ref1next_reg = _mm256_loadu_si256((__m256i *) (ref1 + 32));
- ref2_reg = _mm256_loadu_si256((__m256i *) (ref2));
- ref2next_reg = _mm256_loadu_si256((__m256i *) (ref2 + 32));
- ref3_reg = _mm256_loadu_si256((__m256i *) (ref3));
- ref3next_reg = _mm256_loadu_si256((__m256i *) (ref3 + 32));
+ src_reg = _mm256_loadu_si256((const __m256i *)src);
+ srcnext_reg = _mm256_loadu_si256((const __m256i *)(src + 32));
+ ref0_reg = _mm256_loadu_si256((const __m256i *)ref0);
+ ref0next_reg = _mm256_loadu_si256((const __m256i *)(ref0 + 32));
+ ref1_reg = _mm256_loadu_si256((const __m256i *)ref1);
+ ref1next_reg = _mm256_loadu_si256((const __m256i *)(ref1 + 32));
+ ref2_reg = _mm256_loadu_si256((const __m256i *)ref2);
+ ref2next_reg = _mm256_loadu_si256((const __m256i *)(ref2 + 32));
+ ref3_reg = _mm256_loadu_si256((const __m256i *)ref3);
+ ref3next_reg = _mm256_loadu_si256((const __m256i *)(ref3 + 32));
// sum of the absolute differences between every ref-i to src
ref0_reg = _mm256_sad_epu8(ref0_reg, src_reg);
ref1_reg = _mm256_sad_epu8(ref1_reg, src_reg);
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/sad_avx2.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/sad_avx2.c
index 78536a47218..ce9ad8f780c 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/sad_avx2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/sad_avx2.c
@@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include <immintrin.h>
+#include "./vpx_dsp_rtcd.h"
#include "vpx_ports/mem.h"
#define FSAD64_H(h) \
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_avx2.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_avx2.c
new file mode 100644
index 00000000000..82cef4af0af
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_avx2.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include "./vpx_dsp_rtcd.h"
+
+typedef void (*get_var_avx2)(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse, int *sum);
+
+void vpx_get32x32var_avx2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse, int *sum);
+
+static void variance_avx2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ int w, int h, unsigned int *sse, int *sum,
+ get_var_avx2 var_fn, int block_size) {
+ int i, j;
+
+ *sse = 0;
+ *sum = 0;
+
+ for (i = 0; i < h; i += 16) {
+ for (j = 0; j < w; j += block_size) {
+ unsigned int sse0;
+ int sum0;
+ var_fn(&src[src_stride * i + j], src_stride,
+ &ref[ref_stride * i + j], ref_stride, &sse0, &sum0);
+ *sse += sse0;
+ *sum += sum0;
+ }
+ }
+}
+
+
+unsigned int vpx_variance16x16_avx2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ variance_avx2(src, src_stride, ref, ref_stride, 16, 16,
+ sse, &sum, vpx_get16x16var_avx2, 16);
+ return *sse - (((unsigned int)sum * sum) >> 8);
+}
+
+unsigned int vpx_mse16x16_avx2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ vpx_get16x16var_avx2(src, src_stride, ref, ref_stride, sse, &sum);
+ return *sse;
+}
+
+unsigned int vpx_variance32x16_avx2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ variance_avx2(src, src_stride, ref, ref_stride, 32, 16,
+ sse, &sum, vpx_get32x32var_avx2, 32);
+ return *sse - (((int64_t)sum * sum) >> 9);
+}
+
+unsigned int vpx_variance32x32_avx2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ variance_avx2(src, src_stride, ref, ref_stride, 32, 32,
+ sse, &sum, vpx_get32x32var_avx2, 32);
+ return *sse - (((int64_t)sum * sum) >> 10);
+}
+
+unsigned int vpx_variance64x64_avx2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ variance_avx2(src, src_stride, ref, ref_stride, 64, 64,
+ sse, &sum, vpx_get32x32var_avx2, 32);
+ return *sse - (((int64_t)sum * sum) >> 12);
+}
+
+unsigned int vpx_variance64x32_avx2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ variance_avx2(src, src_stride, ref, ref_stride, 64, 32,
+ sse, &sum, vpx_get32x32var_avx2, 32);
+ return *sse - (((int64_t)sum * sum) >> 11);
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_impl_intrin_avx2.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_impl_avx2.c
index f9923280a34..0e40959aa9d 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_variance_impl_intrin_avx2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_impl_avx2.c
@@ -10,7 +10,9 @@
#include <immintrin.h> // AVX2
-void vp9_get16x16var_avx2(const unsigned char *src_ptr,
+#include "./vpx_dsp_rtcd.h"
+
+void vpx_get16x16var_avx2(const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
@@ -121,7 +123,7 @@ void vp9_get16x16var_avx2(const unsigned char *src_ptr,
}
}
-void vp9_get32x32var_avx2(const unsigned char *src_ptr,
+void vpx_get32x32var_avx2(const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/variance_impl_mmx.asm b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_impl_mmx.asm
index 7d5e6810bf0..a8d7d99dbc0 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/variance_impl_mmx.asm
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_impl_mmx.asm
@@ -11,9 +11,9 @@
%include "vpx_ports/x86_abi_support.asm"
-;unsigned int vp8_get_mb_ss_mmx( short *src_ptr )
-global sym(vp8_get_mb_ss_mmx) PRIVATE
-sym(vp8_get_mb_ss_mmx):
+;unsigned int vpx_get_mb_ss_mmx( short *src_ptr )
+global sym(vpx_get_mb_ss_mmx) PRIVATE
+sym(vpx_get_mb_ss_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
@@ -63,7 +63,7 @@ sym(vp8_get_mb_ss_mmx):
ret
-;unsigned int vp8_get8x8var_mmx
+;void vpx_get8x8var_mmx
;(
; unsigned char *src_ptr,
; int source_stride,
@@ -72,8 +72,8 @@ sym(vp8_get_mb_ss_mmx):
; unsigned int *SSE,
; int *Sum
;)
-global sym(vp8_get8x8var_mmx) PRIVATE
-sym(vp8_get8x8var_mmx):
+global sym(vpx_get8x8var_mmx) PRIVATE
+sym(vpx_get8x8var_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
@@ -310,8 +310,8 @@ sym(vp8_get8x8var_mmx):
-;unsigned int
-;vp8_get4x4var_mmx
+;void
+;vpx_get4x4var_mmx
;(
; unsigned char *src_ptr,
; int source_stride,
@@ -320,8 +320,8 @@ sym(vp8_get8x8var_mmx):
; unsigned int *SSE,
; int *Sum
;)
-global sym(vp8_get4x4var_mmx) PRIVATE
-sym(vp8_get4x4var_mmx):
+global sym(vpx_get4x4var_mmx) PRIVATE
+sym(vpx_get4x4var_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
@@ -422,430 +422,3 @@ sym(vp8_get4x4var_mmx):
UNSHADOW_ARGS
pop rbp
ret
-
-
-
-;unsigned int
-;vp8_get4x4sse_cs_mmx
-;(
-; unsigned char *src_ptr,
-; int source_stride,
-; unsigned char *ref_ptr,
-; int recon_stride
-;)
-global sym(vp8_get4x4sse_cs_mmx) PRIVATE
-sym(vp8_get4x4sse_cs_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
- push rsi
- push rdi
- push rbx
- ; end prolog
-
-
- pxor mm6, mm6 ; Blank mmx7
- pxor mm7, mm7 ; Blank mmx7
-
- mov rax, arg(0) ;[src_ptr] ; Load base addresses
- mov rbx, arg(2) ;[ref_ptr]
- movsxd rcx, dword ptr arg(1) ;[source_stride]
- movsxd rdx, dword ptr arg(3) ;[recon_stride]
- ; Row 1
- movd mm0, [rax] ; Copy eight bytes to mm0
- movd mm1, [rbx] ; Copy eight bytes to mm1
- punpcklbw mm0, mm6 ; unpack to higher prrcision
- punpcklbw mm1, mm6
- psubsw mm0, mm1 ; A-B (low order) to MM0
- pmaddwd mm0, mm0 ; square and accumulate
- add rbx,rdx ; Inc pointer into ref data
- add rax,rcx ; Inc pointer into the new data
- movd mm1, [rbx] ; Copy eight bytes to mm1
- paddd mm7, mm0 ; accumulate in mm7
-
- ; Row 2
- movd mm0, [rax] ; Copy eight bytes to mm0
- punpcklbw mm0, mm6 ; unpack to higher prrcision
- punpcklbw mm1, mm6
- psubsw mm0, mm1 ; A-B (low order) to MM0
- pmaddwd mm0, mm0 ; square and accumulate
- add rbx,rdx ; Inc pointer into ref data
- add rax,rcx ; Inc pointer into the new data
- movd mm1, [rbx] ; Copy eight bytes to mm1
- paddd mm7, mm0 ; accumulate in mm7
-
- ; Row 3
- movd mm0, [rax] ; Copy eight bytes to mm0
- punpcklbw mm1, mm6
- punpcklbw mm0, mm6 ; unpack to higher prrcision
- psubsw mm0, mm1 ; A-B (low order) to MM0
-
- pmaddwd mm0, mm0 ; square and accumulate
- add rbx,rdx ; Inc pointer into ref data
- add rax,rcx ; Inc pointer into the new data
- movd mm1, [rbx] ; Copy eight bytes to mm1
- paddd mm7, mm0 ; accumulate in mm7
-
- ; Row 4
- movd mm0, [rax] ; Copy eight bytes to mm0
- punpcklbw mm0, mm6 ; unpack to higher prrcision
- punpcklbw mm1, mm6
- psubsw mm0, mm1 ; A-B (low order) to MM0
- pmaddwd mm0, mm0 ; square and accumulate
- paddd mm7, mm0 ; accumulate in mm7
-
- movq mm0, mm7 ;
- psrlq mm7, 32
-
- paddd mm0, mm7
- movq rax, mm0
-
-
- ; begin epilog
- pop rbx
- pop rdi
- pop rsi
- UNSHADOW_ARGS
- pop rbp
- ret
-
-%define mmx_filter_shift 7
-
-;void vp8_filter_block2d_bil4x4_var_mmx
-;(
-; unsigned char *ref_ptr,
-; int ref_pixels_per_line,
-; unsigned char *src_ptr,
-; int src_pixels_per_line,
-; unsigned short *HFilter,
-; unsigned short *VFilter,
-; int *sum,
-; unsigned int *sumsquared
-;)
-global sym(vp8_filter_block2d_bil4x4_var_mmx) PRIVATE
-sym(vp8_filter_block2d_bil4x4_var_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 8
- GET_GOT rbx
- push rsi
- push rdi
- sub rsp, 16
- ; end prolog
-
-
- pxor mm6, mm6 ;
- pxor mm7, mm7 ;
-
- mov rax, arg(4) ;HFilter ;
- mov rdx, arg(5) ;VFilter ;
-
- mov rsi, arg(0) ;ref_ptr ;
- mov rdi, arg(2) ;src_ptr ;
-
- mov rcx, 4 ;
- pxor mm0, mm0 ;
-
- movd mm1, [rsi] ;
- movd mm3, [rsi+1] ;
-
- punpcklbw mm1, mm0 ;
- pmullw mm1, [rax] ;
-
- punpcklbw mm3, mm0 ;
- pmullw mm3, [rax+8] ;
-
- paddw mm1, mm3 ;
- paddw mm1, [GLOBAL(mmx_bi_rd)] ;
-
- psraw mm1, mmx_filter_shift ;
- movq mm5, mm1
-
-%if ABI_IS_32BIT
- add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
-%else
- movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ;
- add rsi, r8
-%endif
-
-.filter_block2d_bil4x4_var_mmx_loop:
-
- movd mm1, [rsi] ;
- movd mm3, [rsi+1] ;
-
- punpcklbw mm1, mm0 ;
- pmullw mm1, [rax] ;
-
- punpcklbw mm3, mm0 ;
- pmullw mm3, [rax+8] ;
-
- paddw mm1, mm3 ;
- paddw mm1, [GLOBAL(mmx_bi_rd)] ;
-
- psraw mm1, mmx_filter_shift ;
- movq mm3, mm5 ;
-
- movq mm5, mm1 ;
- pmullw mm3, [rdx] ;
-
- pmullw mm1, [rdx+8] ;
- paddw mm1, mm3 ;
-
-
- paddw mm1, [GLOBAL(mmx_bi_rd)] ;
- psraw mm1, mmx_filter_shift ;
-
- movd mm3, [rdi] ;
- punpcklbw mm3, mm0 ;
-
- psubw mm1, mm3 ;
- paddw mm6, mm1 ;
-
- pmaddwd mm1, mm1 ;
- paddd mm7, mm1 ;
-
-%if ABI_IS_32BIT
- add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
- add rdi, dword ptr arg(3) ;src_pixels_per_line ;
-%else
- movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
- movsxd r9, dword ptr arg(3) ;src_pixels_per_line
- add rsi, r8
- add rdi, r9
-%endif
- sub rcx, 1 ;
- jnz .filter_block2d_bil4x4_var_mmx_loop ;
-
-
- pxor mm3, mm3 ;
- pxor mm2, mm2 ;
-
- punpcklwd mm2, mm6 ;
- punpckhwd mm3, mm6 ;
-
- paddd mm2, mm3 ;
- movq mm6, mm2 ;
-
- psrlq mm6, 32 ;
- paddd mm2, mm6 ;
-
- psrad mm2, 16 ;
- movq mm4, mm7 ;
-
- psrlq mm4, 32 ;
- paddd mm4, mm7 ;
-
- mov rdi, arg(6) ;sum
- mov rsi, arg(7) ;sumsquared
-
- movd dword ptr [rdi], mm2 ;
- movd dword ptr [rsi], mm4 ;
-
-
-
- ; begin epilog
- add rsp, 16
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-
-
-;void vp8_filter_block2d_bil_var_mmx
-;(
-; unsigned char *ref_ptr,
-; int ref_pixels_per_line,
-; unsigned char *src_ptr,
-; int src_pixels_per_line,
-; unsigned int Height,
-; unsigned short *HFilter,
-; unsigned short *VFilter,
-; int *sum,
-; unsigned int *sumsquared
-;)
-global sym(vp8_filter_block2d_bil_var_mmx) PRIVATE
-sym(vp8_filter_block2d_bil_var_mmx):
- push rbp
- mov rbp, rsp
- SHADOW_ARGS_TO_STACK 9
- GET_GOT rbx
- push rsi
- push rdi
- sub rsp, 16
- ; end prolog
-
- pxor mm6, mm6 ;
- pxor mm7, mm7 ;
- mov rax, arg(5) ;HFilter ;
-
- mov rdx, arg(6) ;VFilter ;
- mov rsi, arg(0) ;ref_ptr ;
-
- mov rdi, arg(2) ;src_ptr ;
- movsxd rcx, dword ptr arg(4) ;Height ;
-
- pxor mm0, mm0 ;
- movq mm1, [rsi] ;
-
- movq mm3, [rsi+1] ;
- movq mm2, mm1 ;
-
- movq mm4, mm3 ;
- punpcklbw mm1, mm0 ;
-
- punpckhbw mm2, mm0 ;
- pmullw mm1, [rax] ;
-
- pmullw mm2, [rax] ;
- punpcklbw mm3, mm0 ;
-
- punpckhbw mm4, mm0 ;
- pmullw mm3, [rax+8] ;
-
- pmullw mm4, [rax+8] ;
- paddw mm1, mm3 ;
-
- paddw mm2, mm4 ;
- paddw mm1, [GLOBAL(mmx_bi_rd)] ;
-
- psraw mm1, mmx_filter_shift ;
- paddw mm2, [GLOBAL(mmx_bi_rd)] ;
-
- psraw mm2, mmx_filter_shift ;
- movq mm5, mm1
-
- packuswb mm5, mm2 ;
-%if ABI_IS_32BIT
- add rsi, dword ptr arg(1) ;ref_pixels_per_line
-%else
- movsxd r8, dword ptr arg(1) ;ref_pixels_per_line
- add rsi, r8
-%endif
-
-.filter_block2d_bil_var_mmx_loop:
-
- movq mm1, [rsi] ;
- movq mm3, [rsi+1] ;
-
- movq mm2, mm1 ;
- movq mm4, mm3 ;
-
- punpcklbw mm1, mm0 ;
- punpckhbw mm2, mm0 ;
-
- pmullw mm1, [rax] ;
- pmullw mm2, [rax] ;
-
- punpcklbw mm3, mm0 ;
- punpckhbw mm4, mm0 ;
-
- pmullw mm3, [rax+8] ;
- pmullw mm4, [rax+8] ;
-
- paddw mm1, mm3 ;
- paddw mm2, mm4 ;
-
- paddw mm1, [GLOBAL(mmx_bi_rd)] ;
- psraw mm1, mmx_filter_shift ;
-
- paddw mm2, [GLOBAL(mmx_bi_rd)] ;
- psraw mm2, mmx_filter_shift ;
-
- movq mm3, mm5 ;
- movq mm4, mm5 ;
-
- punpcklbw mm3, mm0 ;
- punpckhbw mm4, mm0 ;
-
- movq mm5, mm1 ;
- packuswb mm5, mm2 ;
-
- pmullw mm3, [rdx] ;
- pmullw mm4, [rdx] ;
-
- pmullw mm1, [rdx+8] ;
- pmullw mm2, [rdx+8] ;
-
- paddw mm1, mm3 ;
- paddw mm2, mm4 ;
-
- paddw mm1, [GLOBAL(mmx_bi_rd)] ;
- paddw mm2, [GLOBAL(mmx_bi_rd)] ;
-
- psraw mm1, mmx_filter_shift ;
- psraw mm2, mmx_filter_shift ;
-
- movq mm3, [rdi] ;
- movq mm4, mm3 ;
-
- punpcklbw mm3, mm0 ;
- punpckhbw mm4, mm0 ;
-
- psubw mm1, mm3 ;
- psubw mm2, mm4 ;
-
- paddw mm6, mm1 ;
- pmaddwd mm1, mm1 ;
-
- paddw mm6, mm2 ;
- pmaddwd mm2, mm2 ;
-
- paddd mm7, mm1 ;
- paddd mm7, mm2 ;
-
-%if ABI_IS_32BIT
- add rsi, dword ptr arg(1) ;ref_pixels_per_line ;
- add rdi, dword ptr arg(3) ;src_pixels_per_line ;
-%else
- movsxd r8, dword ptr arg(1) ;ref_pixels_per_line ;
- movsxd r9, dword ptr arg(3) ;src_pixels_per_line ;
- add rsi, r8
- add rdi, r9
-%endif
- sub rcx, 1 ;
- jnz .filter_block2d_bil_var_mmx_loop ;
-
-
- pxor mm3, mm3 ;
- pxor mm2, mm2 ;
-
- punpcklwd mm2, mm6 ;
- punpckhwd mm3, mm6 ;
-
- paddd mm2, mm3 ;
- movq mm6, mm2 ;
-
- psrlq mm6, 32 ;
- paddd mm2, mm6 ;
-
- psrad mm2, 16 ;
- movq mm4, mm7 ;
-
- psrlq mm4, 32 ;
- paddd mm4, mm7 ;
-
- mov rdi, arg(7) ;sum
- mov rsi, arg(8) ;sumsquared
-
- movd dword ptr [rdi], mm2 ;
- movd dword ptr [rsi], mm4 ;
-
- ; begin epilog
- add rsp, 16
- pop rdi
- pop rsi
- RESTORE_GOT
- UNSHADOW_ARGS
- pop rbp
- ret
-
-
-SECTION_RODATA
-;short mmx_bi_rd[4] = { 64, 64, 64, 64};
-align 16
-mmx_bi_rd:
- times 4 dw 64
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_mmx.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_mmx.c
new file mode 100644
index 00000000000..99dd741bca5
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_mmx.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vpx_dsp_rtcd.h"
+
+extern void vpx_get4x4var_mmx(const uint8_t *a, int a_stride,
+ const uint8_t *b, int b_stride,
+ unsigned int *sse, int *sum);
+
+unsigned int vpx_variance4x4_mmx(const unsigned char *a, int a_stride,
+ const unsigned char *b, int b_stride,
+ unsigned int *sse) {
+ unsigned int var;
+ int avg;
+
+ vpx_get4x4var_mmx(a, a_stride, b, b_stride, &var, &avg);
+ *sse = var;
+ return (var - (((unsigned int)avg * avg) >> 4));
+}
+
+unsigned int vpx_variance8x8_mmx(const unsigned char *a, int a_stride,
+ const unsigned char *b, int b_stride,
+ unsigned int *sse) {
+ unsigned int var;
+ int avg;
+
+ vpx_get8x8var_mmx(a, a_stride, b, b_stride, &var, &avg);
+ *sse = var;
+
+ return (var - (((unsigned int)avg * avg) >> 6));
+}
+
+unsigned int vpx_mse16x16_mmx(const unsigned char *a, int a_stride,
+ const unsigned char *b, int b_stride,
+ unsigned int *sse) {
+ unsigned int sse0, sse1, sse2, sse3, var;
+ int sum0, sum1, sum2, sum3;
+
+ vpx_get8x8var_mmx(a, a_stride, b, b_stride, &sse0, &sum0);
+ vpx_get8x8var_mmx(a + 8, a_stride, b + 8, b_stride, &sse1, &sum1);
+ vpx_get8x8var_mmx(a + 8 * a_stride, a_stride,
+ b + 8 * b_stride, b_stride, &sse2, &sum2);
+ vpx_get8x8var_mmx(a + 8 * a_stride + 8, a_stride,
+ b + 8 * b_stride + 8, b_stride, &sse3, &sum3);
+
+ var = sse0 + sse1 + sse2 + sse3;
+ *sse = var;
+ return var;
+}
+
+unsigned int vpx_variance16x16_mmx(const unsigned char *a, int a_stride,
+ const unsigned char *b, int b_stride,
+ unsigned int *sse) {
+ unsigned int sse0, sse1, sse2, sse3, var;
+ int sum0, sum1, sum2, sum3, avg;
+
+ vpx_get8x8var_mmx(a, a_stride, b, b_stride, &sse0, &sum0);
+ vpx_get8x8var_mmx(a + 8, a_stride, b + 8, b_stride, &sse1, &sum1);
+ vpx_get8x8var_mmx(a + 8 * a_stride, a_stride,
+ b + 8 * b_stride, b_stride, &sse2, &sum2);
+ vpx_get8x8var_mmx(a + 8 * a_stride + 8, a_stride,
+ b + 8 * b_stride + 8, b_stride, &sse3, &sum3);
+
+ var = sse0 + sse1 + sse2 + sse3;
+ avg = sum0 + sum1 + sum2 + sum3;
+ *sse = var;
+ return (var - (((unsigned int)avg * avg) >> 8));
+}
+
+unsigned int vpx_variance16x8_mmx(const unsigned char *a, int a_stride,
+ const unsigned char *b, int b_stride,
+ unsigned int *sse) {
+ unsigned int sse0, sse1, var;
+ int sum0, sum1, avg;
+
+ vpx_get8x8var_mmx(a, a_stride, b, b_stride, &sse0, &sum0);
+ vpx_get8x8var_mmx(a + 8, a_stride, b + 8, b_stride, &sse1, &sum1);
+
+ var = sse0 + sse1;
+ avg = sum0 + sum1;
+ *sse = var;
+ return (var - (((unsigned int)avg * avg) >> 7));
+}
+
+unsigned int vpx_variance8x16_mmx(const unsigned char *a, int a_stride,
+ const unsigned char *b, int b_stride,
+ unsigned int *sse) {
+ unsigned int sse0, sse1, var;
+ int sum0, sum1, avg;
+
+ vpx_get8x8var_mmx(a, a_stride, b, b_stride, &sse0, &sum0);
+ vpx_get8x8var_mmx(a + 8 * a_stride, a_stride,
+ b + 8 * b_stride, b_stride, &sse1, &sum1);
+
+ var = sse0 + sse1;
+ avg = sum0 + sum1;
+ *sse = var;
+
+ return (var - (((unsigned int)avg * avg) >> 7));
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_sse2.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_sse2.c
new file mode 100644
index 00000000000..6256bc53621
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/variance_sse2.c
@@ -0,0 +1,309 @@
+/*
+ * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <emmintrin.h> // SSE2
+
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+
+#include "vpx_ports/mem.h"
+
+typedef void (*getNxMvar_fn_t) (const unsigned char *src, int src_stride,
+ const unsigned char *ref, int ref_stride,
+ unsigned int *sse, int *sum);
+
+unsigned int vpx_get_mb_ss_sse2(const int16_t *src) {
+ __m128i vsum = _mm_setzero_si128();
+ int i;
+
+ for (i = 0; i < 32; ++i) {
+ const __m128i v = _mm_loadu_si128((const __m128i *)src);
+ vsum = _mm_add_epi32(vsum, _mm_madd_epi16(v, v));
+ src += 8;
+ }
+
+ vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 8));
+ vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 4));
+ return _mm_cvtsi128_si32(vsum);
+}
+
+#define READ64(p, stride, i) \
+ _mm_unpacklo_epi8(_mm_cvtsi32_si128(*(const uint32_t *)(p + i * stride)), \
+ _mm_cvtsi32_si128(*(const uint32_t *)(p + (i + 1) * stride)))
+
+static void get4x4var_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse, int *sum) {
+ const __m128i zero = _mm_setzero_si128();
+ const __m128i src0 = _mm_unpacklo_epi8(READ64(src, src_stride, 0), zero);
+ const __m128i src1 = _mm_unpacklo_epi8(READ64(src, src_stride, 2), zero);
+ const __m128i ref0 = _mm_unpacklo_epi8(READ64(ref, ref_stride, 0), zero);
+ const __m128i ref1 = _mm_unpacklo_epi8(READ64(ref, ref_stride, 2), zero);
+ const __m128i diff0 = _mm_sub_epi16(src0, ref0);
+ const __m128i diff1 = _mm_sub_epi16(src1, ref1);
+
+ // sum
+ __m128i vsum = _mm_add_epi16(diff0, diff1);
+ vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8));
+ vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4));
+ vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 2));
+ *sum = (int16_t)_mm_extract_epi16(vsum, 0);
+
+ // sse
+ vsum = _mm_add_epi32(_mm_madd_epi16(diff0, diff0),
+ _mm_madd_epi16(diff1, diff1));
+ vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 8));
+ vsum = _mm_add_epi32(vsum, _mm_srli_si128(vsum, 4));
+ *sse = _mm_cvtsi128_si32(vsum);
+}
+
+void vpx_get8x8var_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse, int *sum) {
+ const __m128i zero = _mm_setzero_si128();
+ __m128i vsum = _mm_setzero_si128();
+ __m128i vsse = _mm_setzero_si128();
+ int i;
+
+ for (i = 0; i < 8; i += 2) {
+ const __m128i src0 = _mm_unpacklo_epi8(_mm_loadl_epi64(
+ (const __m128i *)(src + i * src_stride)), zero);
+ const __m128i ref0 = _mm_unpacklo_epi8(_mm_loadl_epi64(
+ (const __m128i *)(ref + i * ref_stride)), zero);
+ const __m128i diff0 = _mm_sub_epi16(src0, ref0);
+
+ const __m128i src1 = _mm_unpacklo_epi8(_mm_loadl_epi64(
+ (const __m128i *)(src + (i + 1) * src_stride)), zero);
+ const __m128i ref1 = _mm_unpacklo_epi8(_mm_loadl_epi64(
+ (const __m128i *)(ref + (i + 1) * ref_stride)), zero);
+ const __m128i diff1 = _mm_sub_epi16(src1, ref1);
+
+ vsum = _mm_add_epi16(vsum, diff0);
+ vsum = _mm_add_epi16(vsum, diff1);
+ vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff0, diff0));
+ vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff1, diff1));
+ }
+
+ // sum
+ vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8));
+ vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4));
+ vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 2));
+ *sum = (int16_t)_mm_extract_epi16(vsum, 0);
+
+ // sse
+ vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 8));
+ vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 4));
+ *sse = _mm_cvtsi128_si32(vsse);
+}
+
+void vpx_get16x16var_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse, int *sum) {
+ const __m128i zero = _mm_setzero_si128();
+ __m128i vsum = _mm_setzero_si128();
+ __m128i vsse = _mm_setzero_si128();
+ int i;
+
+ for (i = 0; i < 16; ++i) {
+ const __m128i s = _mm_loadu_si128((const __m128i *)src);
+ const __m128i r = _mm_loadu_si128((const __m128i *)ref);
+
+ const __m128i src0 = _mm_unpacklo_epi8(s, zero);
+ const __m128i ref0 = _mm_unpacklo_epi8(r, zero);
+ const __m128i diff0 = _mm_sub_epi16(src0, ref0);
+
+ const __m128i src1 = _mm_unpackhi_epi8(s, zero);
+ const __m128i ref1 = _mm_unpackhi_epi8(r, zero);
+ const __m128i diff1 = _mm_sub_epi16(src1, ref1);
+
+ vsum = _mm_add_epi16(vsum, diff0);
+ vsum = _mm_add_epi16(vsum, diff1);
+ vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff0, diff0));
+ vsse = _mm_add_epi32(vsse, _mm_madd_epi16(diff1, diff1));
+
+ src += src_stride;
+ ref += ref_stride;
+ }
+
+ // sum
+ vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8));
+ vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4));
+ *sum = (int16_t)_mm_extract_epi16(vsum, 0) +
+ (int16_t)_mm_extract_epi16(vsum, 1);
+
+ // sse
+ vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 8));
+ vsse = _mm_add_epi32(vsse, _mm_srli_si128(vsse, 4));
+ *sse = _mm_cvtsi128_si32(vsse);
+}
+
+
+static void variance_sse2(const unsigned char *src, int src_stride,
+ const unsigned char *ref, int ref_stride,
+ int w, int h, unsigned int *sse, int *sum,
+ getNxMvar_fn_t var_fn, int block_size) {
+ int i, j;
+
+ *sse = 0;
+ *sum = 0;
+
+ for (i = 0; i < h; i += block_size) {
+ for (j = 0; j < w; j += block_size) {
+ unsigned int sse0;
+ int sum0;
+ var_fn(src + src_stride * i + j, src_stride,
+ ref + ref_stride * i + j, ref_stride, &sse0, &sum0);
+ *sse += sse0;
+ *sum += sum0;
+ }
+ }
+}
+
+unsigned int vpx_variance4x4_sse2(const unsigned char *src, int src_stride,
+ const unsigned char *ref, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ get4x4var_sse2(src, src_stride, ref, ref_stride, sse, &sum);
+ return *sse - (((unsigned int)sum * sum) >> 4);
+}
+
+unsigned int vpx_variance8x4_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ variance_sse2(src, src_stride, ref, ref_stride, 8, 4,
+ sse, &sum, get4x4var_sse2, 4);
+ return *sse - (((unsigned int)sum * sum) >> 5);
+}
+
+unsigned int vpx_variance4x8_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ variance_sse2(src, src_stride, ref, ref_stride, 4, 8,
+ sse, &sum, get4x4var_sse2, 4);
+ return *sse - (((unsigned int)sum * sum) >> 5);
+}
+
+unsigned int vpx_variance8x8_sse2(const unsigned char *src, int src_stride,
+ const unsigned char *ref, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ vpx_get8x8var_sse2(src, src_stride, ref, ref_stride, sse, &sum);
+ return *sse - (((unsigned int)sum * sum) >> 6);
+}
+
+unsigned int vpx_variance16x8_sse2(const unsigned char *src, int src_stride,
+ const unsigned char *ref, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ variance_sse2(src, src_stride, ref, ref_stride, 16, 8,
+ sse, &sum, vpx_get8x8var_sse2, 8);
+ return *sse - (((unsigned int)sum * sum) >> 7);
+}
+
+unsigned int vpx_variance8x16_sse2(const unsigned char *src, int src_stride,
+ const unsigned char *ref, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ variance_sse2(src, src_stride, ref, ref_stride, 8, 16,
+ sse, &sum, vpx_get8x8var_sse2, 8);
+ return *sse - (((unsigned int)sum * sum) >> 7);
+}
+
+unsigned int vpx_variance16x16_sse2(const unsigned char *src, int src_stride,
+ const unsigned char *ref, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ vpx_get16x16var_sse2(src, src_stride, ref, ref_stride, sse, &sum);
+ return *sse - (((unsigned int)sum * sum) >> 8);
+}
+
+unsigned int vpx_variance32x32_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ variance_sse2(src, src_stride, ref, ref_stride, 32, 32,
+ sse, &sum, vpx_get16x16var_sse2, 16);
+ return *sse - (((int64_t)sum * sum) >> 10);
+}
+
+unsigned int vpx_variance32x16_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ variance_sse2(src, src_stride, ref, ref_stride, 32, 16,
+ sse, &sum, vpx_get16x16var_sse2, 16);
+ return *sse - (((int64_t)sum * sum) >> 9);
+}
+
+unsigned int vpx_variance16x32_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ variance_sse2(src, src_stride, ref, ref_stride, 16, 32,
+ sse, &sum, vpx_get16x16var_sse2, 16);
+ return *sse - (((int64_t)sum * sum) >> 9);
+}
+
+unsigned int vpx_variance64x64_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ variance_sse2(src, src_stride, ref, ref_stride, 64, 64,
+ sse, &sum, vpx_get16x16var_sse2, 16);
+ return *sse - (((int64_t)sum * sum) >> 12);
+}
+
+unsigned int vpx_variance64x32_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ variance_sse2(src, src_stride, ref, ref_stride, 64, 32,
+ sse, &sum, vpx_get16x16var_sse2, 16);
+ return *sse - (((int64_t)sum * sum) >> 11);
+}
+
+unsigned int vpx_variance32x64_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse) {
+ int sum;
+ variance_sse2(src, src_stride, ref, ref_stride, 32, 64,
+ sse, &sum, vpx_get16x16var_sse2, 16);
+ return *sse - (((int64_t)sum * sum) >> 11);
+}
+
+unsigned int vpx_mse8x8_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse) {
+ vpx_variance8x8_sse2(src, src_stride, ref, ref_stride, sse);
+ return *sse;
+}
+
+unsigned int vpx_mse8x16_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse) {
+ vpx_variance8x16_sse2(src, src_stride, ref, ref_stride, sse);
+ return *sse;
+}
+
+unsigned int vpx_mse16x8_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse) {
+ vpx_variance16x8_sse2(src, src_stride, ref, ref_stride, sse);
+ return *sse;
+}
+
+unsigned int vpx_mse16x16_sse2(const uint8_t *src, int src_stride,
+ const uint8_t *ref, int ref_stride,
+ unsigned int *sse) {
+ vpx_variance16x16_sse2(src, src_stride, ref, ref_stride, sse);
+ return *sse;
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_ports/mem.h b/chromium/third_party/libvpx/source/libvpx/vpx_ports/mem.h
index 0106a45d6e4..7502f906325 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_ports/mem.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_ports/mem.h
@@ -38,4 +38,16 @@
#define __builtin_prefetch(x)
#endif
+/* Shift down with rounding */
+#define ROUND_POWER_OF_TWO(value, n) \
+ (((value) + (1 << ((n) - 1))) >> (n))
+
+#define ALIGN_POWER_OF_TWO(value, n) \
+ (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1))
+
+#if CONFIG_VP9_HIGHBITDEPTH
+#define CONVERT_TO_SHORTPTR(x) ((uint16_t*)(((uintptr_t)x) << 1))
+#define CONVERT_TO_BYTEPTR(x) ((uint8_t*)(((uintptr_t)x) >> 1))
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
#endif // VPX_PORTS_MEM_H_
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_ports/msvc.h b/chromium/third_party/libvpx/source/libvpx/vpx_ports/msvc.h
new file mode 100644
index 00000000000..43a36e76184
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_ports/msvc.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_PORTS_MSVC_H_
+#define VPX_PORTS_MSVC_H_
+#ifdef _MSC_VER
+
+#include "./vpx_config.h"
+
+# if _MSC_VER < 1900 // VS2015 provides snprintf
+# define snprintf _snprintf
+# endif // _MSC_VER < 1900
+
+#endif // _MSC_VER
+#endif // VPX_PORTS_MSVC_H_
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_ports/vpx_ports.mk b/chromium/third_party/libvpx/source/libvpx/vpx_ports/vpx_ports.mk
index a7275431fe9..ab7fc4ac7a3 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_ports/vpx_ports.mk
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_ports/vpx_ports.mk
@@ -11,13 +11,14 @@
PORTS_SRCS-yes += vpx_ports.mk
-PORTS_SRCS-$(BUILD_LIBVPX) += mem.h
-PORTS_SRCS-$(BUILD_LIBVPX) += vpx_timer.h
+PORTS_SRCS-yes += mem.h
+PORTS_SRCS-yes += msvc.h
+PORTS_SRCS-yes += vpx_timer.h
ifeq ($(ARCH_X86)$(ARCH_X86_64),yes)
-PORTS_SRCS-$(BUILD_LIBVPX) += emms.asm
-PORTS_SRCS-$(BUILD_LIBVPX) += x86.h
-PORTS_SRCS-$(BUILD_LIBVPX) += x86_abi_support.asm
+PORTS_SRCS-yes += emms.asm
+PORTS_SRCS-yes += x86.h
+PORTS_SRCS-yes += x86_abi_support.asm
endif
PORTS_SRCS-$(ARCH_ARM) += arm_cpudetect.c
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/gen_scalers.c b/chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/gen_scalers.c
index 995c45b6ab6..dab324edfcc 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/gen_scalers.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/gen_scalers.c
@@ -8,7 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-
+#include "./vpx_scale_rtcd.h"
#include "vpx_scale/vpx_scale.h"
#include "vpx_mem/vpx_mem.h"
/****************************************************************************
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/vpx_scale.c b/chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/vpx_scale.c
index 089e673757c..15e4ba87e72 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/vpx_scale.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/vpx_scale.c
@@ -22,6 +22,7 @@
****************************************************************************/
#include "./vpx_scale_rtcd.h"
#include "vpx_mem/vpx_mem.h"
+#include "vpx_scale/vpx_scale.h"
#include "vpx_scale/yv12config.h"
typedef struct {
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/yv12config.c b/chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/yv12config.c
index 169c2ab2d73..7582792d939 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/yv12config.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/yv12config.c
@@ -12,9 +12,7 @@
#include "vpx_scale/yv12config.h"
#include "vpx_mem/vpx_mem.h"
-#if CONFIG_VP9 && CONFIG_VP9_HIGHBITDEPTH
-#include "vp9/common/vp9_common.h"
-#endif
+#include "vpx_ports/mem.h"
/****************************************************************************
* Exports
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/yv12extend.c b/chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/yv12extend.c
index 6214a12189c..086e2f398fb 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/yv12extend.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_scale/generic/yv12extend.c
@@ -10,8 +10,10 @@
#include <assert.h>
#include "./vpx_config.h"
+#include "./vpx_scale_rtcd.h"
#include "vpx/vpx_integer.h"
#include "vpx_mem/vpx_mem.h"
+#include "vpx_ports/mem.h"
#include "vpx_scale/yv12config.h"
#if CONFIG_VP9 && CONFIG_VP9_HIGHBITDEPTH
#include "vp9/common/vp9_common.h"
diff --git a/chromium/third_party/libvpx/source/libvpx/vpxdec.c b/chromium/third_party/libvpx/source/libvpx/vpxdec.c
index 8c938df8de6..baa61151ca8 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpxdec.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpxdec.c
@@ -106,24 +106,25 @@ static const arg_def_t *all_args[] = {
};
#if CONFIG_VP8_DECODER
-static const arg_def_t addnoise_level = ARG_DEF(NULL, "noise-level", 1,
- "Enable VP8 postproc add noise");
-static const arg_def_t deblock = ARG_DEF(NULL, "deblock", 0,
- "Enable VP8 deblocking");
-static const arg_def_t demacroblock_level = ARG_DEF(NULL, "demacroblock-level", 1,
- "Enable VP8 demacroblocking, w/ level");
-static const arg_def_t pp_debug_info = ARG_DEF(NULL, "pp-debug-info", 1,
- "Enable VP8 visible debug info");
-static const arg_def_t pp_disp_ref_frame = ARG_DEF(NULL, "pp-dbg-ref-frame", 1,
- "Display only selected reference frame per macro block");
-static const arg_def_t pp_disp_mb_modes = ARG_DEF(NULL, "pp-dbg-mb-modes", 1,
- "Display only selected macro block modes");
-static const arg_def_t pp_disp_b_modes = ARG_DEF(NULL, "pp-dbg-b-modes", 1,
- "Display only selected block modes");
-static const arg_def_t pp_disp_mvs = ARG_DEF(NULL, "pp-dbg-mvs", 1,
- "Draw only selected motion vectors");
-static const arg_def_t mfqe = ARG_DEF(NULL, "mfqe", 0,
- "Enable multiframe quality enhancement");
+static const arg_def_t addnoise_level = ARG_DEF(
+ NULL, "noise-level", 1, "Enable VP8 postproc add noise");
+static const arg_def_t deblock = ARG_DEF(
+ NULL, "deblock", 0, "Enable VP8 deblocking");
+static const arg_def_t demacroblock_level = ARG_DEF(
+ NULL, "demacroblock-level", 1, "Enable VP8 demacroblocking, w/ level");
+static const arg_def_t pp_debug_info = ARG_DEF(
+ NULL, "pp-debug-info", 1, "Enable VP8 visible debug info");
+static const arg_def_t pp_disp_ref_frame = ARG_DEF(
+ NULL, "pp-dbg-ref-frame", 1,
+ "Display only selected reference frame per macro block");
+static const arg_def_t pp_disp_mb_modes = ARG_DEF(
+ NULL, "pp-dbg-mb-modes", 1, "Display only selected macro block modes");
+static const arg_def_t pp_disp_b_modes = ARG_DEF(
+ NULL, "pp-dbg-b-modes", 1, "Display only selected block modes");
+static const arg_def_t pp_disp_mvs = ARG_DEF(
+ NULL, "pp-dbg-mvs", 1, "Draw only selected motion vectors");
+static const arg_def_t mfqe = ARG_DEF(
+ NULL, "mfqe", 0, "Enable multiframe quality enhancement");
static const arg_def_t *vp8_pp_args[] = {
&addnoise_level, &deblock, &demacroblock_level, &pp_debug_info,
@@ -169,7 +170,7 @@ static INLINE int libyuv_scale(vpx_image_t *src, vpx_image_t *dst,
}
#endif
-void usage_exit() {
+void usage_exit(void) {
int i;
fprintf(stderr, "Usage: %s <options> filename\n\n"
@@ -312,7 +313,7 @@ static void write_image_file(const vpx_image_t *img, const int planes[3],
}
}
-int file_is_raw(struct VpxInputContext *input) {
+static int file_is_raw(struct VpxInputContext *input) {
uint8_t buf[32];
int is_raw = 0;
vpx_codec_stream_info_t si;
@@ -343,7 +344,7 @@ int file_is_raw(struct VpxInputContext *input) {
return is_raw;
}
-void show_progress(int frame_in, int frame_out, uint64_t dx_time) {
+static void show_progress(int frame_in, int frame_out, uint64_t dx_time) {
fprintf(stderr,
"%d decoded frames/%d showed frames in %"PRId64" us (%.2f fps)\r",
frame_in, frame_out, dx_time,
@@ -365,8 +366,8 @@ struct ExternalFrameBufferList {
// Application private data passed into the set function. |min_size| is the
// minimum size in bytes needed to decode the next frame. |fb| pointer to the
// frame buffer.
-int get_vp9_frame_buffer(void *cb_priv, size_t min_size,
- vpx_codec_frame_buffer_t *fb) {
+static int get_vp9_frame_buffer(void *cb_priv, size_t min_size,
+ vpx_codec_frame_buffer_t *fb) {
int i;
struct ExternalFrameBufferList *const ext_fb_list =
(struct ExternalFrameBufferList *)cb_priv;
@@ -403,8 +404,8 @@ int get_vp9_frame_buffer(void *cb_priv, size_t min_size,
// Callback used by libvpx when there are no references to the frame buffer.
// |cb_priv| user private data passed into the set function. |fb| pointer
// to the frame buffer.
-int release_vp9_frame_buffer(void *cb_priv,
- vpx_codec_frame_buffer_t *fb) {
+static int release_vp9_frame_buffer(void *cb_priv,
+ vpx_codec_frame_buffer_t *fb) {
struct ExternalFrameBuffer *const ext_fb =
(struct ExternalFrameBuffer *)fb->priv;
(void)cb_priv;
@@ -412,9 +413,9 @@ int release_vp9_frame_buffer(void *cb_priv,
return 0;
}
-void generate_filename(const char *pattern, char *out, size_t q_len,
- unsigned int d_w, unsigned int d_h,
- unsigned int frame_in) {
+static void generate_filename(const char *pattern, char *out, size_t q_len,
+ unsigned int d_w, unsigned int d_h,
+ unsigned int frame_in) {
const char *p = pattern;
char *q = out;
@@ -536,7 +537,7 @@ static int img_shifted_realloc_required(const vpx_image_t *img,
}
#endif
-int main_loop(int argc, const char **argv_) {
+static int main_loop(int argc, const char **argv_) {
vpx_codec_ctx_t decoder;
char *fn = NULL;
int i;
@@ -813,34 +814,42 @@ int main_loop(int argc, const char **argv_) {
fprintf(stderr, "%s\n", decoder.name);
#if CONFIG_VP8_DECODER
-
if (vp8_pp_cfg.post_proc_flag
&& vpx_codec_control(&decoder, VP8_SET_POSTPROC, &vp8_pp_cfg)) {
- fprintf(stderr, "Failed to configure postproc: %s\n", vpx_codec_error(&decoder));
+ fprintf(stderr, "Failed to configure postproc: %s\n",
+ vpx_codec_error(&decoder));
return EXIT_FAILURE;
}
if (vp8_dbg_color_ref_frame
- && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_REF_FRAME, vp8_dbg_color_ref_frame)) {
- fprintf(stderr, "Failed to configure reference block visualizer: %s\n", vpx_codec_error(&decoder));
+ && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_REF_FRAME,
+ vp8_dbg_color_ref_frame)) {
+ fprintf(stderr, "Failed to configure reference block visualizer: %s\n",
+ vpx_codec_error(&decoder));
return EXIT_FAILURE;
}
if (vp8_dbg_color_mb_modes
- && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_MB_MODES, vp8_dbg_color_mb_modes)) {
- fprintf(stderr, "Failed to configure macro block visualizer: %s\n", vpx_codec_error(&decoder));
+ && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_MB_MODES,
+ vp8_dbg_color_mb_modes)) {
+ fprintf(stderr, "Failed to configure macro block visualizer: %s\n",
+ vpx_codec_error(&decoder));
return EXIT_FAILURE;
}
if (vp8_dbg_color_b_modes
- && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_B_MODES, vp8_dbg_color_b_modes)) {
- fprintf(stderr, "Failed to configure block visualizer: %s\n", vpx_codec_error(&decoder));
+ && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_B_MODES,
+ vp8_dbg_color_b_modes)) {
+ fprintf(stderr, "Failed to configure block visualizer: %s\n",
+ vpx_codec_error(&decoder));
return EXIT_FAILURE;
}
if (vp8_dbg_display_mv
- && vpx_codec_control(&decoder, VP8_SET_DBG_DISPLAY_MV, vp8_dbg_display_mv)) {
- fprintf(stderr, "Failed to configure motion vector visualizer: %s\n", vpx_codec_error(&decoder));
+ && vpx_codec_control(&decoder, VP8_SET_DBG_DISPLAY_MV,
+ vp8_dbg_display_mv)) {
+ fprintf(stderr, "Failed to configure motion vector visualizer: %s\n",
+ vpx_codec_error(&decoder));
return EXIT_FAILURE;
}
#endif
diff --git a/chromium/third_party/libvpx/source/libvpx/vpxenc.c b/chromium/third_party/libvpx/source/libvpx/vpxenc.c
index 851d43291cd..8bbb9fc6a20 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpxenc.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpxenc.c
@@ -99,7 +99,7 @@ static void warn_or_exit_on_error(vpx_codec_ctx_t *ctx, int fatal,
va_end(ap);
}
-int read_frame(struct VpxInputContext *input_ctx, vpx_image_t *img) {
+static int read_frame(struct VpxInputContext *input_ctx, vpx_image_t *img) {
FILE *f = input_ctx->file;
y4m_input *y4m = &input_ctx->y4m;
int shortread = 0;
@@ -114,14 +114,14 @@ int read_frame(struct VpxInputContext *input_ctx, vpx_image_t *img) {
return !shortread;
}
-int file_is_y4m(const char detect[4]) {
+static int file_is_y4m(const char detect[4]) {
if (memcmp(detect, "YUV4", 4) == 0) {
return 1;
}
return 0;
}
-int fourcc_is_ivf(const char detect[4]) {
+static int fourcc_is_ivf(const char detect[4]) {
if (memcmp(detect, "DKIF", 4) == 0) {
return 1;
}
@@ -330,10 +330,6 @@ static const arg_def_t sharpness = ARG_DEF(
NULL, "sharpness", 1, "Loop filter sharpness (0..7)");
static const arg_def_t static_thresh = ARG_DEF(
NULL, "static-thresh", 1, "Motion detection threshold");
-static const arg_def_t cpu_used_vp8 = ARG_DEF(
- NULL, "cpu-used", 1, "CPU Used (-16..16)");
-static const arg_def_t cpu_used_vp9 = ARG_DEF(
- NULL, "cpu-used", 1, "CPU Used (-8..8)");
static const arg_def_t auto_altref = ARG_DEF(
NULL, "auto-alt-ref", 1, "Enable automatic alt reference frames");
static const arg_def_t arnr_maxframes = ARG_DEF(
@@ -353,17 +349,14 @@ static const arg_def_t cq_level = ARG_DEF(
NULL, "cq-level", 1, "Constant/Constrained Quality level");
static const arg_def_t max_intra_rate_pct = ARG_DEF(
NULL, "max-intra-rate", 1, "Max I-frame bitrate (pct)");
-static const arg_def_t max_inter_rate_pct = ARG_DEF(
- NULL, "max-inter-rate", 1, "Max P-frame bitrate (pct)");
-static const arg_def_t gf_cbr_boost_pct = ARG_DEF(
- NULL, "gf-cbr-boost", 1, "Boost for Golden Frame in CBR mode (pct)");
-
-static const arg_def_t screen_content_mode = ARG_DEF(NULL, "screen-content-mode", 1,
- "Screen content mode");
#if CONFIG_VP8_ENCODER
+static const arg_def_t cpu_used_vp8 = ARG_DEF(
+ NULL, "cpu-used", 1, "CPU Used (-16..16)");
static const arg_def_t token_parts = ARG_DEF(
NULL, "token-parts", 1, "Number of token partitions to use, log2");
+static const arg_def_t screen_content_mode = ARG_DEF(
+ NULL, "screen-content-mode", 1, "Screen content mode");
static const arg_def_t *vp8_args[] = {
&cpu_used_vp8, &auto_altref, &noise_sens, &sharpness, &static_thresh,
&token_parts, &arnr_maxframes, &arnr_strength, &arnr_type,
@@ -382,6 +375,8 @@ static const int vp8_arg_ctrl_map[] = {
#endif
#if CONFIG_VP9_ENCODER
+static const arg_def_t cpu_used_vp9 = ARG_DEF(
+ NULL, "cpu-used", 1, "CPU Used (-8..8)");
static const arg_def_t tile_cols = ARG_DEF(
NULL, "tile-columns", 1, "Number of tile columns to use, log2");
static const arg_def_t tile_rows = ARG_DEF(
@@ -397,6 +392,10 @@ static const arg_def_t aq_mode = ARG_DEF(
static const arg_def_t frame_periodic_boost = ARG_DEF(
NULL, "frame-boost", 1,
"Enable frame periodic boost (0: off (default), 1: on)");
+static const arg_def_t gf_cbr_boost_pct = ARG_DEF(
+ NULL, "gf-cbr-boost", 1, "Boost for Golden Frame in CBR mode (pct)");
+static const arg_def_t max_inter_rate_pct = ARG_DEF(
+ NULL, "max-inter-rate", 1, "Max P-frame bitrate (pct)");
static const struct arg_enum_list color_space_enum[] = {
{ "unknown", VPX_CS_UNKNOWN },
@@ -467,8 +466,9 @@ static const int vp9_arg_ctrl_map[] = {
static const arg_def_t *no_args[] = { NULL };
-void usage_exit() {
+void usage_exit(void) {
int i;
+ const int num_encoder = get_vpx_encoder_count();
fprintf(stderr, "Usage: %s <options> -o dst_filename src_filename \n",
exec_name);
@@ -496,11 +496,15 @@ void usage_exit() {
" in fractional seconds. Default is 1/1000.\n");
fprintf(stderr, "\nIncluded encoders:\n\n");
- for (i = 0; i < get_vpx_encoder_count(); ++i) {
+ for (i = 0; i < num_encoder; ++i) {
const VpxInterface *const encoder = get_vpx_encoder_by_index(i);
- fprintf(stderr, " %-6s - %s\n",
- encoder->name, vpx_codec_iface_name(encoder->codec_interface()));
+ const char* defstr = (i == (num_encoder - 1)) ? "(default)" : "";
+ fprintf(stderr, " %-6s - %s %s\n",
+ encoder->name, vpx_codec_iface_name(encoder->codec_interface()),
+ defstr);
}
+ fprintf(stderr, "\n ");
+ fprintf(stderr, "Use --codec to switch to a non-default encoder.\n\n");
exit(EXIT_FAILURE);
}
@@ -793,8 +797,8 @@ struct stream_state {
};
-void validate_positive_rational(const char *msg,
- struct vpx_rational *rat) {
+static void validate_positive_rational(const char *msg,
+ struct vpx_rational *rat) {
if (rat->den < 0) {
rat->num *= -1;
rat->den *= -1;
@@ -811,10 +815,14 @@ void validate_positive_rational(const char *msg,
static void parse_global_config(struct VpxEncoderConfig *global, char **argv) {
char **argi, **argj;
struct arg arg;
+ const int num_encoder = get_vpx_encoder_count();
+
+ if (num_encoder < 1)
+ die("Error: no valid encoder available\n");
/* Initialize default parameters */
memset(global, 0, sizeof(*global));
- global->codec = get_vpx_encoder_by_index(0);
+ global->codec = get_vpx_encoder_by_index(num_encoder - 1);
global->passes = 0;
global->color_type = I420;
/* Assign default deadline to good quality */
@@ -919,7 +927,7 @@ static void parse_global_config(struct VpxEncoderConfig *global, char **argv) {
}
-void open_input_file(struct VpxInputContext *input) {
+static void open_input_file(struct VpxInputContext *input) {
/* Parse certain options from the input file, if possible */
input->file = strcmp(input->filename, "-")
? fopen(input->filename, "rb") : set_binary_mode(stdin);
@@ -935,6 +943,10 @@ void open_input_file(struct VpxInputContext *input) {
rewind(input->file);
}
+ /* Default to 1:1 pixel aspect ratio. */
+ input->pixel_aspect_ratio.numerator = 1;
+ input->pixel_aspect_ratio.denominator = 1;
+
/* For RAW input sources, these bytes will applied on the first frame
* in read_frame().
*/
@@ -948,6 +960,8 @@ void open_input_file(struct VpxInputContext *input) {
input->file_type = FILE_TYPE_Y4M;
input->width = input->y4m.pic_w;
input->height = input->y4m.pic_h;
+ input->pixel_aspect_ratio.numerator = input->y4m.par_n;
+ input->pixel_aspect_ratio.denominator = input->y4m.par_d;
input->framerate.numerator = input->y4m.fps_n;
input->framerate.denominator = input->y4m.fps_d;
input->fmt = input->y4m.vpx_fmt;
@@ -1381,7 +1395,8 @@ static void show_stream_config(struct stream_state *stream,
static void open_output_file(struct stream_state *stream,
- struct VpxEncoderConfig *global) {
+ struct VpxEncoderConfig *global,
+ const struct VpxRational *pixel_aspect_ratio) {
const char *fn = stream->config.out_fn;
const struct vpx_codec_enc_cfg *const cfg = &stream->config.cfg;
@@ -1402,7 +1417,8 @@ static void open_output_file(struct stream_state *stream,
write_webm_file_header(&stream->ebml, cfg,
&global->framerate,
stream->config.stereo_fmt,
- global->codec->fourcc);
+ global->codec->fourcc,
+ pixel_aspect_ratio);
}
#endif
@@ -2035,7 +2051,8 @@ int main(int argc, const char **argv_) {
}
FOREACH_STREAM(setup_pass(stream, &global, pass));
- FOREACH_STREAM(open_output_file(stream, &global));
+ FOREACH_STREAM(open_output_file(stream, &global,
+ &input.pixel_aspect_ratio));
FOREACH_STREAM(initialize_encoder(stream, &global));
#if CONFIG_VP9 && CONFIG_VP9_HIGHBITDEPTH
diff --git a/chromium/third_party/libvpx/source/libvpx/webmenc.cc b/chromium/third_party/libvpx/source/libvpx/webmenc.cc
index a0e542b17cf..8212ee36cf6 100644
--- a/chromium/third_party/libvpx/source/libvpx/webmenc.cc
+++ b/chromium/third_party/libvpx/source/libvpx/webmenc.cc
@@ -24,7 +24,8 @@ void write_webm_file_header(struct EbmlGlobal *glob,
const vpx_codec_enc_cfg_t *cfg,
const struct vpx_rational *fps,
stereo_format_t stereo_fmt,
- unsigned int fourcc) {
+ unsigned int fourcc,
+ const struct VpxRational *par) {
mkvmuxer::MkvWriter *const writer = new mkvmuxer::MkvWriter(glob->stream);
mkvmuxer::Segment *const segment = new mkvmuxer::Segment();
segment->Init(writer);
@@ -49,6 +50,15 @@ void write_webm_file_header(struct EbmlGlobal *glob,
segment->GetTrackByNumber(video_track_id));
video_track->SetStereoMode(stereo_fmt);
video_track->set_codec_id(fourcc == VP8_FOURCC ? "V_VP8" : "V_VP9");
+ if (par->numerator > 1 || par->denominator > 1) {
+ // TODO(fgalligan): Add support of DisplayUnit, Display Aspect Ratio type
+ // to WebM format.
+ const uint64_t display_width =
+ static_cast<uint64_t>(((cfg->g_w * par->numerator * 1.0) /
+ par->denominator) + .5);
+ video_track->set_display_width(display_width);
+ video_track->set_display_height(cfg->g_h);
+ }
if (glob->debug) {
video_track->set_uid(kDebugTrackUid);
}
diff --git a/chromium/third_party/libvpx/source/libvpx/webmenc.h b/chromium/third_party/libvpx/source/libvpx/webmenc.h
index 0ac606be483..c255d3de669 100644
--- a/chromium/third_party/libvpx/source/libvpx/webmenc.h
+++ b/chromium/third_party/libvpx/source/libvpx/webmenc.h
@@ -42,7 +42,8 @@ void write_webm_file_header(struct EbmlGlobal *glob,
const vpx_codec_enc_cfg_t *cfg,
const struct vpx_rational *fps,
stereo_format_t stereo_fmt,
- unsigned int fourcc);
+ unsigned int fourcc,
+ const struct VpxRational *par);
void write_webm_block(struct EbmlGlobal *glob,
const vpx_codec_enc_cfg_t *cfg,