summaryrefslogtreecommitdiff
path: root/chromium/third_party/libvpx
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@qt.io>2018-05-03 13:42:47 +0200
committerAllan Sandfeld Jensen <allan.jensen@qt.io>2018-05-15 10:27:51 +0000
commit8c5c43c7b138c9b4b0bf56d946e61d3bbc111bec (patch)
treed29d987c4d7b173cf853279b79a51598f104b403 /chromium/third_party/libvpx
parent830c9e163d31a9180fadca926b3e1d7dfffb5021 (diff)
downloadqtwebengine-chromium-8c5c43c7b138c9b4b0bf56d946e61d3bbc111bec.tar.gz
BASELINE: Update Chromium to 66.0.3359.156
Change-Id: I0c9831ad39911a086b6377b16f995ad75a51e441 Reviewed-by: Michal Klocek <michal.klocek@qt.io>
Diffstat (limited to 'chromium/third_party/libvpx')
-rw-r--r--chromium/third_party/libvpx/README.chromium8
-rw-r--r--chromium/third_party/libvpx/libvpx_srcs.gni9
-rw-r--r--chromium/third_party/libvpx/source/config/ios/arm-neon/vp9_rtcd.h6
-rw-r--r--chromium/third_party/libvpx/source/config/ios/arm64/vp9_rtcd.h6
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h12
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm-neon/vp9_rtcd.h6
-rw-r--r--chromium/third_party/libvpx/source/config/linux/arm64/vp9_rtcd.h6
-rw-r--r--chromium/third_party/libvpx/source/config/linux/ia32/vp9_rtcd.h41
-rw-r--r--chromium/third_party/libvpx/source/config/linux/x64/vp9_rtcd.h41
-rw-r--r--chromium/third_party/libvpx/source/config/mac/ia32/vp9_rtcd.h41
-rw-r--r--chromium/third_party/libvpx/source/config/mac/x64/vp9_rtcd.h41
-rw-r--r--chromium/third_party/libvpx/source/config/vpx_version.h10
-rw-r--r--chromium/third_party/libvpx/source/config/win/ia32/vp9_rtcd.h41
-rw-r--r--chromium/third_party/libvpx/source/config/win/x64/vp9_rtcd.h41
-rw-r--r--chromium/third_party/libvpx/source/libvpx/.clang-format18
-rw-r--r--chromium/third_party/libvpx/source/libvpx/.mailmap4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/AUTHORS16
-rw-r--r--chromium/third_party/libvpx/source/libvpx/CHANGELOG25
-rw-r--r--chromium/third_party/libvpx/source/libvpx/README4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/examples/vp9_spatial_svc_encoder.c27
-rw-r--r--chromium/third_party/libvpx/source/libvpx/examples/vpx_temporal_svc_encoder.c90
-rw-r--r--chromium/third_party/libvpx/source/libvpx/libs.doxy_template12
-rw-r--r--chromium/third_party/libvpx/source/libvpx/libs.mk4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/common/blockd.h7
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/common/extend.c3
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/common/mips/mmi/idct_blk_mmi.c5
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/common/modecont.c36
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/common/x86/vp8_asm_stubs.c8
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/decoder/decodeframe.c2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/decoder/ec_types.h4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/decoder/onyxd_int.h4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/decoder/threading.c21
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/encoder/firstpass.c34
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_if.c22
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/encoder/ratectrl.c5
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/encoder/rdopt.c6
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/encoder/treewriter.h6
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/vp8_cx_iface.c24
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_highbd_iht4x4_add_neon.c160
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c229
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c668
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_iht_neon.h60
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropy.c2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropy.h1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymode.c3
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymv.c4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter.c2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_pred_common.c5
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl17
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_highbd_iht16x16_add_sse4.c419
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_highbd_iht8x8_add_sse4.c255
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c40
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_context_tree.c5
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c18
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemb.c9
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c157
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h11
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c68
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.h12
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mbgraph.h4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c10
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c86
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c41
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.h8
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.c8
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.h2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_intrin_sse2.c12
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_avx2.c140
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/vp9_common.mk16
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c37
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/vp9cx.mk1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx/src/vpx_encoder.c52
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx/vp8cx.h28
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx/vpx_encoder.h8
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx/vpx_frame_buffer.h2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_idct16x16_add_neon.c97
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_idct32x32_1024_add_neon.c82
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_idct4x4_add_neon.c130
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_idct8x8_add_neon.c316
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_idct_neon.h99
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct32x32_135_add_neon.c12
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct32x32_34_add_neon.c12
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct4x4_add_neon.c45
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct8x8_add_neon.c114
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct_neon.h668
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/intrapred_neon.c2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/mem_neon.h7
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/inv_txfm.h1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/deblock_msa.c63
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/inv_txfm_vsx.c5
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp.mk1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/fwd_txfm_impl_sse2.h2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_convolve_avx2.c4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_idct16x16_add_sse4.c6
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_idct8x8_add_sse2.c4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_idct8x8_add_sse4.c14
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_inv_txfm_sse4.h3
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm80
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/inv_txfm_sse2.c10
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/inv_txfm_sse2.h3
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/quantize_x86.h1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/subpel_variance_sse2.asm88
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_ports/mem_ops.h1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_util/vpx_atomics.h4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpxenc.c6
-rw-r--r--chromium/third_party/libvpx/source/libvpx/y4minput.c1
108 files changed, 3101 insertions, 2084 deletions
diff --git a/chromium/third_party/libvpx/README.chromium b/chromium/third_party/libvpx/README.chromium
index 854e6dbc239..72b4efbd3bc 100644
--- a/chromium/third_party/libvpx/README.chromium
+++ b/chromium/third_party/libvpx/README.chromium
@@ -1,13 +1,13 @@
Name: libvpx
URL: http://www.webmproject.org
-Version: v1.6.0
+Version: v1.7.0
License: BSD
License File: source/libvpx/LICENSE
Security Critical: yes
-Date: Tuesday January 16 2018
-Branch: master
-Commit: 373e08f921e5bfd5a96963fabbbbe16ec793d44e
+Date: Tuesday March 6 2018
+Branch: m66-3359
+Commit: e9fff8a9dbcd03fbf3e5b7caaa9dc2631a79882a
Description:
Contains the sources used to compile libvpx binaries used by Google Chrome and
diff --git a/chromium/third_party/libvpx/libvpx_srcs.gni b/chromium/third_party/libvpx/libvpx_srcs.gni
index bcd8439c2d8..6ff45bcb1d7 100644
--- a/chromium/third_party/libvpx/libvpx_srcs.gni
+++ b/chromium/third_party/libvpx/libvpx_srcs.gni
@@ -450,7 +450,9 @@ libvpx_srcs_x86_ssse3 = [
]
libvpx_srcs_x86_sse4_1 = [
"//third_party/libvpx/source/libvpx/vp8/encoder/x86/quantize_sse4.c",
+ "//third_party/libvpx/source/libvpx/vp9/common/x86/vp9_highbd_iht16x16_add_sse4.c",
"//third_party/libvpx/source/libvpx/vp9/common/x86/vp9_highbd_iht4x4_add_sse4.c",
+ "//third_party/libvpx/source/libvpx/vp9/common/x86/vp9_highbd_iht8x8_add_sse4.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_idct16x16_add_sse4.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_idct32x32_add_sse4.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_idct4x4_add_sse4.c",
@@ -462,6 +464,7 @@ libvpx_srcs_x86_avx = [
]
libvpx_srcs_x86_avx2 = [
"//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_error_avx2.c",
+ "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_avx2.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/x86/avg_intrin_avx2.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/x86/fwd_txfm_avx2.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_convolve_avx2.c",
@@ -924,7 +927,9 @@ libvpx_srcs_x86_64_ssse3 = [
]
libvpx_srcs_x86_64_sse4_1 = [
"//third_party/libvpx/source/libvpx/vp8/encoder/x86/quantize_sse4.c",
+ "//third_party/libvpx/source/libvpx/vp9/common/x86/vp9_highbd_iht16x16_add_sse4.c",
"//third_party/libvpx/source/libvpx/vp9/common/x86/vp9_highbd_iht4x4_add_sse4.c",
+ "//third_party/libvpx/source/libvpx/vp9/common/x86/vp9_highbd_iht8x8_add_sse4.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_idct16x16_add_sse4.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_idct32x32_add_sse4.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_idct4x4_add_sse4.c",
@@ -936,6 +941,7 @@ libvpx_srcs_x86_64_avx = [
]
libvpx_srcs_x86_64_avx2 = [
"//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_error_avx2.c",
+ "//third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_avx2.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/x86/avg_intrin_avx2.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/x86/fwd_txfm_avx2.c",
"//third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_convolve_avx2.c",
@@ -1431,6 +1437,7 @@ libvpx_srcs_arm_neon = [
"//third_party/libvpx/source/libvpx/vp8/vp8_dx_iface.c",
"//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c",
"//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c",
+ "//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_iht_neon.h",
"//third_party/libvpx/source/libvpx/vp9/common/vp9_alloccommon.c",
"//third_party/libvpx/source/libvpx/vp9/common/vp9_alloccommon.h",
"//third_party/libvpx/source/libvpx/vp9/common/vp9_blockd.c",
@@ -1821,6 +1828,7 @@ libvpx_srcs_arm_neon_cpu_detect = [
"//third_party/libvpx/source/libvpx/vp8/encoder/vp8_quantize.c",
"//third_party/libvpx/source/libvpx/vp8/vp8_cx_iface.c",
"//third_party/libvpx/source/libvpx/vp8/vp8_dx_iface.c",
+ "//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_iht_neon.h",
"//third_party/libvpx/source/libvpx/vp9/common/vp9_alloccommon.c",
"//third_party/libvpx/source/libvpx/vp9/common/vp9_alloccommon.h",
"//third_party/libvpx/source/libvpx/vp9/common/vp9_blockd.c",
@@ -2255,6 +2263,7 @@ libvpx_srcs_arm64 = [
"//third_party/libvpx/source/libvpx/vp8/vp8_dx_iface.c",
"//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c",
"//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c",
+ "//third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_iht_neon.h",
"//third_party/libvpx/source/libvpx/vp9/common/vp9_alloccommon.c",
"//third_party/libvpx/source/libvpx/vp9/common/vp9_alloccommon.h",
"//third_party/libvpx/source/libvpx/vp9/common/vp9_blockd.c",
diff --git a/chromium/third_party/libvpx/source/config/ios/arm-neon/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/ios/arm-neon/vp9_rtcd.h
index bd397cc8086..4df63fcec1a 100644
--- a/chromium/third_party/libvpx/source/config/ios/arm-neon/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/ios/arm-neon/vp9_rtcd.h
@@ -159,11 +159,7 @@ void vp9_iht8x8_64_add_c(const tran_low_t* input,
uint8_t* dest,
int stride,
int tx_type);
-void vp9_iht8x8_64_add_neon(const tran_low_t* input,
- uint8_t* dest,
- int stride,
- int tx_type);
-#define vp9_iht8x8_64_add vp9_iht8x8_64_add_neon
+#define vp9_iht8x8_64_add vp9_iht8x8_64_add_c
void vp9_quantize_fp_c(const tran_low_t* coeff_ptr,
intptr_t n_coeffs,
diff --git a/chromium/third_party/libvpx/source/config/ios/arm64/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/ios/arm64/vp9_rtcd.h
index bd397cc8086..4df63fcec1a 100644
--- a/chromium/third_party/libvpx/source/config/ios/arm64/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/ios/arm64/vp9_rtcd.h
@@ -159,11 +159,7 @@ void vp9_iht8x8_64_add_c(const tran_low_t* input,
uint8_t* dest,
int stride,
int tx_type);
-void vp9_iht8x8_64_add_neon(const tran_low_t* input,
- uint8_t* dest,
- int stride,
- int tx_type);
-#define vp9_iht8x8_64_add vp9_iht8x8_64_add_neon
+#define vp9_iht8x8_64_add vp9_iht8x8_64_add_c
void vp9_quantize_fp_c(const tran_low_t* coeff_ptr,
intptr_t n_coeffs,
diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h
index dc5b2d1c595..25b8930cd36 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm-neon-cpu-detect/vp9_rtcd.h
@@ -184,14 +184,7 @@ void vp9_iht8x8_64_add_c(const tran_low_t* input,
uint8_t* dest,
int stride,
int tx_type);
-void vp9_iht8x8_64_add_neon(const tran_low_t* input,
- uint8_t* dest,
- int stride,
- int tx_type);
-RTCD_EXTERN void (*vp9_iht8x8_64_add)(const tran_low_t* input,
- uint8_t* dest,
- int stride,
- int tx_type);
+#define vp9_iht8x8_64_add vp9_iht8x8_64_add_c
void vp9_quantize_fp_c(const tran_low_t* coeff_ptr,
intptr_t n_coeffs,
@@ -298,9 +291,6 @@ static void setup_rtcd_internal(void) {
vp9_iht4x4_16_add = vp9_iht4x4_16_add_c;
if (flags & HAS_NEON)
vp9_iht4x4_16_add = vp9_iht4x4_16_add_neon;
- vp9_iht8x8_64_add = vp9_iht8x8_64_add_c;
- if (flags & HAS_NEON)
- vp9_iht8x8_64_add = vp9_iht8x8_64_add_neon;
vp9_quantize_fp = vp9_quantize_fp_c;
if (flags & HAS_NEON)
vp9_quantize_fp = vp9_quantize_fp_neon;
diff --git a/chromium/third_party/libvpx/source/config/linux/arm-neon/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/linux/arm-neon/vp9_rtcd.h
index bd397cc8086..4df63fcec1a 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm-neon/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm-neon/vp9_rtcd.h
@@ -159,11 +159,7 @@ void vp9_iht8x8_64_add_c(const tran_low_t* input,
uint8_t* dest,
int stride,
int tx_type);
-void vp9_iht8x8_64_add_neon(const tran_low_t* input,
- uint8_t* dest,
- int stride,
- int tx_type);
-#define vp9_iht8x8_64_add vp9_iht8x8_64_add_neon
+#define vp9_iht8x8_64_add vp9_iht8x8_64_add_c
void vp9_quantize_fp_c(const tran_low_t* coeff_ptr,
intptr_t n_coeffs,
diff --git a/chromium/third_party/libvpx/source/config/linux/arm64/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/linux/arm64/vp9_rtcd.h
index bd397cc8086..4df63fcec1a 100644
--- a/chromium/third_party/libvpx/source/config/linux/arm64/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/arm64/vp9_rtcd.h
@@ -159,11 +159,7 @@ void vp9_iht8x8_64_add_c(const tran_low_t* input,
uint8_t* dest,
int stride,
int tx_type);
-void vp9_iht8x8_64_add_neon(const tran_low_t* input,
- uint8_t* dest,
- int stride,
- int tx_type);
-#define vp9_iht8x8_64_add vp9_iht8x8_64_add_neon
+#define vp9_iht8x8_64_add vp9_iht8x8_64_add_c
void vp9_quantize_fp_c(const tran_low_t* coeff_ptr,
intptr_t n_coeffs,
diff --git a/chromium/third_party/libvpx/source/config/linux/ia32/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/linux/ia32/vp9_rtcd.h
index c5f23c47e04..c8f69b95e1d 100644
--- a/chromium/third_party/libvpx/source/config/linux/ia32/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/ia32/vp9_rtcd.h
@@ -277,7 +277,16 @@ void vp9_highbd_iht16x16_256_add_c(const tran_low_t* input,
int pitch,
int tx_type,
int bd);
-#define vp9_highbd_iht16x16_256_add vp9_highbd_iht16x16_256_add_c
+void vp9_highbd_iht16x16_256_add_sse4_1(const tran_low_t* input,
+ uint16_t* output,
+ int pitch,
+ int tx_type,
+ int bd);
+RTCD_EXTERN void (*vp9_highbd_iht16x16_256_add)(const tran_low_t* input,
+ uint16_t* output,
+ int pitch,
+ int tx_type,
+ int bd);
void vp9_highbd_iht4x4_16_add_c(const tran_low_t* input,
uint16_t* dest,
@@ -300,7 +309,16 @@ void vp9_highbd_iht8x8_64_add_c(const tran_low_t* input,
int stride,
int tx_type,
int bd);
-#define vp9_highbd_iht8x8_64_add vp9_highbd_iht8x8_64_add_c
+void vp9_highbd_iht8x8_64_add_sse4_1(const tran_low_t* input,
+ uint16_t* dest,
+ int stride,
+ int tx_type,
+ int bd);
+RTCD_EXTERN void (*vp9_highbd_iht8x8_64_add)(const tran_low_t* input,
+ uint16_t* dest,
+ int stride,
+ int tx_type,
+ int bd);
void vp9_highbd_mbpost_proc_across_ip_c(uint16_t* src,
int pitch,
@@ -424,6 +442,17 @@ void vp9_quantize_fp_sse2(const tran_low_t* coeff_ptr,
uint16_t* eob_ptr,
const int16_t* scan,
const int16_t* iscan);
+void vp9_quantize_fp_avx2(const tran_low_t* coeff_ptr,
+ intptr_t n_coeffs,
+ int skip_block,
+ const int16_t* round_ptr,
+ const int16_t* quant_ptr,
+ tran_low_t* qcoeff_ptr,
+ tran_low_t* dqcoeff_ptr,
+ const int16_t* dequant_ptr,
+ uint16_t* eob_ptr,
+ const int16_t* scan,
+ const int16_t* iscan);
RTCD_EXTERN void (*vp9_quantize_fp)(const tran_low_t* coeff_ptr,
intptr_t n_coeffs,
int skip_block,
@@ -512,9 +541,15 @@ static void setup_rtcd_internal(void) {
vp9_highbd_block_error = vp9_highbd_block_error_c;
if (flags & HAS_SSE2)
vp9_highbd_block_error = vp9_highbd_block_error_sse2;
+ vp9_highbd_iht16x16_256_add = vp9_highbd_iht16x16_256_add_c;
+ if (flags & HAS_SSE4_1)
+ vp9_highbd_iht16x16_256_add = vp9_highbd_iht16x16_256_add_sse4_1;
vp9_highbd_iht4x4_16_add = vp9_highbd_iht4x4_16_add_c;
if (flags & HAS_SSE4_1)
vp9_highbd_iht4x4_16_add = vp9_highbd_iht4x4_16_add_sse4_1;
+ vp9_highbd_iht8x8_64_add = vp9_highbd_iht8x8_64_add_c;
+ if (flags & HAS_SSE4_1)
+ vp9_highbd_iht8x8_64_add = vp9_highbd_iht8x8_64_add_sse4_1;
vp9_iht16x16_256_add = vp9_iht16x16_256_add_c;
if (flags & HAS_SSE2)
vp9_iht16x16_256_add = vp9_iht16x16_256_add_sse2;
@@ -527,6 +562,8 @@ static void setup_rtcd_internal(void) {
vp9_quantize_fp = vp9_quantize_fp_c;
if (flags & HAS_SSE2)
vp9_quantize_fp = vp9_quantize_fp_sse2;
+ if (flags & HAS_AVX2)
+ vp9_quantize_fp = vp9_quantize_fp_avx2;
vp9_scale_and_extend_frame = vp9_scale_and_extend_frame_c;
if (flags & HAS_SSSE3)
vp9_scale_and_extend_frame = vp9_scale_and_extend_frame_ssse3;
diff --git a/chromium/third_party/libvpx/source/config/linux/x64/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/linux/x64/vp9_rtcd.h
index 4773ba6af4a..6f00c78fb74 100644
--- a/chromium/third_party/libvpx/source/config/linux/x64/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/linux/x64/vp9_rtcd.h
@@ -246,7 +246,16 @@ void vp9_highbd_iht16x16_256_add_c(const tran_low_t* input,
int pitch,
int tx_type,
int bd);
-#define vp9_highbd_iht16x16_256_add vp9_highbd_iht16x16_256_add_c
+void vp9_highbd_iht16x16_256_add_sse4_1(const tran_low_t* input,
+ uint16_t* output,
+ int pitch,
+ int tx_type,
+ int bd);
+RTCD_EXTERN void (*vp9_highbd_iht16x16_256_add)(const tran_low_t* input,
+ uint16_t* output,
+ int pitch,
+ int tx_type,
+ int bd);
void vp9_highbd_iht4x4_16_add_c(const tran_low_t* input,
uint16_t* dest,
@@ -269,7 +278,16 @@ void vp9_highbd_iht8x8_64_add_c(const tran_low_t* input,
int stride,
int tx_type,
int bd);
-#define vp9_highbd_iht8x8_64_add vp9_highbd_iht8x8_64_add_c
+void vp9_highbd_iht8x8_64_add_sse4_1(const tran_low_t* input,
+ uint16_t* dest,
+ int stride,
+ int tx_type,
+ int bd);
+RTCD_EXTERN void (*vp9_highbd_iht8x8_64_add)(const tran_low_t* input,
+ uint16_t* dest,
+ int stride,
+ int tx_type,
+ int bd);
void vp9_highbd_mbpost_proc_across_ip_c(uint16_t* src,
int pitch,
@@ -395,6 +413,17 @@ void vp9_quantize_fp_ssse3(const tran_low_t* coeff_ptr,
uint16_t* eob_ptr,
const int16_t* scan,
const int16_t* iscan);
+void vp9_quantize_fp_avx2(const tran_low_t* coeff_ptr,
+ intptr_t n_coeffs,
+ int skip_block,
+ const int16_t* round_ptr,
+ const int16_t* quant_ptr,
+ tran_low_t* qcoeff_ptr,
+ tran_low_t* dqcoeff_ptr,
+ const int16_t* dequant_ptr,
+ uint16_t* eob_ptr,
+ const int16_t* scan,
+ const int16_t* iscan);
RTCD_EXTERN void (*vp9_quantize_fp)(const tran_low_t* coeff_ptr,
intptr_t n_coeffs,
int skip_block,
@@ -476,12 +505,20 @@ static void setup_rtcd_internal(void) {
vp9_fdct8x8_quant = vp9_fdct8x8_quant_c;
if (flags & HAS_SSSE3)
vp9_fdct8x8_quant = vp9_fdct8x8_quant_ssse3;
+ vp9_highbd_iht16x16_256_add = vp9_highbd_iht16x16_256_add_c;
+ if (flags & HAS_SSE4_1)
+ vp9_highbd_iht16x16_256_add = vp9_highbd_iht16x16_256_add_sse4_1;
vp9_highbd_iht4x4_16_add = vp9_highbd_iht4x4_16_add_c;
if (flags & HAS_SSE4_1)
vp9_highbd_iht4x4_16_add = vp9_highbd_iht4x4_16_add_sse4_1;
+ vp9_highbd_iht8x8_64_add = vp9_highbd_iht8x8_64_add_c;
+ if (flags & HAS_SSE4_1)
+ vp9_highbd_iht8x8_64_add = vp9_highbd_iht8x8_64_add_sse4_1;
vp9_quantize_fp = vp9_quantize_fp_sse2;
if (flags & HAS_SSSE3)
vp9_quantize_fp = vp9_quantize_fp_ssse3;
+ if (flags & HAS_AVX2)
+ vp9_quantize_fp = vp9_quantize_fp_avx2;
vp9_quantize_fp_32x32 = vp9_quantize_fp_32x32_c;
if (flags & HAS_SSSE3)
vp9_quantize_fp_32x32 = vp9_quantize_fp_32x32_ssse3;
diff --git a/chromium/third_party/libvpx/source/config/mac/ia32/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/mac/ia32/vp9_rtcd.h
index c5f23c47e04..c8f69b95e1d 100644
--- a/chromium/third_party/libvpx/source/config/mac/ia32/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/mac/ia32/vp9_rtcd.h
@@ -277,7 +277,16 @@ void vp9_highbd_iht16x16_256_add_c(const tran_low_t* input,
int pitch,
int tx_type,
int bd);
-#define vp9_highbd_iht16x16_256_add vp9_highbd_iht16x16_256_add_c
+void vp9_highbd_iht16x16_256_add_sse4_1(const tran_low_t* input,
+ uint16_t* output,
+ int pitch,
+ int tx_type,
+ int bd);
+RTCD_EXTERN void (*vp9_highbd_iht16x16_256_add)(const tran_low_t* input,
+ uint16_t* output,
+ int pitch,
+ int tx_type,
+ int bd);
void vp9_highbd_iht4x4_16_add_c(const tran_low_t* input,
uint16_t* dest,
@@ -300,7 +309,16 @@ void vp9_highbd_iht8x8_64_add_c(const tran_low_t* input,
int stride,
int tx_type,
int bd);
-#define vp9_highbd_iht8x8_64_add vp9_highbd_iht8x8_64_add_c
+void vp9_highbd_iht8x8_64_add_sse4_1(const tran_low_t* input,
+ uint16_t* dest,
+ int stride,
+ int tx_type,
+ int bd);
+RTCD_EXTERN void (*vp9_highbd_iht8x8_64_add)(const tran_low_t* input,
+ uint16_t* dest,
+ int stride,
+ int tx_type,
+ int bd);
void vp9_highbd_mbpost_proc_across_ip_c(uint16_t* src,
int pitch,
@@ -424,6 +442,17 @@ void vp9_quantize_fp_sse2(const tran_low_t* coeff_ptr,
uint16_t* eob_ptr,
const int16_t* scan,
const int16_t* iscan);
+void vp9_quantize_fp_avx2(const tran_low_t* coeff_ptr,
+ intptr_t n_coeffs,
+ int skip_block,
+ const int16_t* round_ptr,
+ const int16_t* quant_ptr,
+ tran_low_t* qcoeff_ptr,
+ tran_low_t* dqcoeff_ptr,
+ const int16_t* dequant_ptr,
+ uint16_t* eob_ptr,
+ const int16_t* scan,
+ const int16_t* iscan);
RTCD_EXTERN void (*vp9_quantize_fp)(const tran_low_t* coeff_ptr,
intptr_t n_coeffs,
int skip_block,
@@ -512,9 +541,15 @@ static void setup_rtcd_internal(void) {
vp9_highbd_block_error = vp9_highbd_block_error_c;
if (flags & HAS_SSE2)
vp9_highbd_block_error = vp9_highbd_block_error_sse2;
+ vp9_highbd_iht16x16_256_add = vp9_highbd_iht16x16_256_add_c;
+ if (flags & HAS_SSE4_1)
+ vp9_highbd_iht16x16_256_add = vp9_highbd_iht16x16_256_add_sse4_1;
vp9_highbd_iht4x4_16_add = vp9_highbd_iht4x4_16_add_c;
if (flags & HAS_SSE4_1)
vp9_highbd_iht4x4_16_add = vp9_highbd_iht4x4_16_add_sse4_1;
+ vp9_highbd_iht8x8_64_add = vp9_highbd_iht8x8_64_add_c;
+ if (flags & HAS_SSE4_1)
+ vp9_highbd_iht8x8_64_add = vp9_highbd_iht8x8_64_add_sse4_1;
vp9_iht16x16_256_add = vp9_iht16x16_256_add_c;
if (flags & HAS_SSE2)
vp9_iht16x16_256_add = vp9_iht16x16_256_add_sse2;
@@ -527,6 +562,8 @@ static void setup_rtcd_internal(void) {
vp9_quantize_fp = vp9_quantize_fp_c;
if (flags & HAS_SSE2)
vp9_quantize_fp = vp9_quantize_fp_sse2;
+ if (flags & HAS_AVX2)
+ vp9_quantize_fp = vp9_quantize_fp_avx2;
vp9_scale_and_extend_frame = vp9_scale_and_extend_frame_c;
if (flags & HAS_SSSE3)
vp9_scale_and_extend_frame = vp9_scale_and_extend_frame_ssse3;
diff --git a/chromium/third_party/libvpx/source/config/mac/x64/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/mac/x64/vp9_rtcd.h
index 4773ba6af4a..6f00c78fb74 100644
--- a/chromium/third_party/libvpx/source/config/mac/x64/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/mac/x64/vp9_rtcd.h
@@ -246,7 +246,16 @@ void vp9_highbd_iht16x16_256_add_c(const tran_low_t* input,
int pitch,
int tx_type,
int bd);
-#define vp9_highbd_iht16x16_256_add vp9_highbd_iht16x16_256_add_c
+void vp9_highbd_iht16x16_256_add_sse4_1(const tran_low_t* input,
+ uint16_t* output,
+ int pitch,
+ int tx_type,
+ int bd);
+RTCD_EXTERN void (*vp9_highbd_iht16x16_256_add)(const tran_low_t* input,
+ uint16_t* output,
+ int pitch,
+ int tx_type,
+ int bd);
void vp9_highbd_iht4x4_16_add_c(const tran_low_t* input,
uint16_t* dest,
@@ -269,7 +278,16 @@ void vp9_highbd_iht8x8_64_add_c(const tran_low_t* input,
int stride,
int tx_type,
int bd);
-#define vp9_highbd_iht8x8_64_add vp9_highbd_iht8x8_64_add_c
+void vp9_highbd_iht8x8_64_add_sse4_1(const tran_low_t* input,
+ uint16_t* dest,
+ int stride,
+ int tx_type,
+ int bd);
+RTCD_EXTERN void (*vp9_highbd_iht8x8_64_add)(const tran_low_t* input,
+ uint16_t* dest,
+ int stride,
+ int tx_type,
+ int bd);
void vp9_highbd_mbpost_proc_across_ip_c(uint16_t* src,
int pitch,
@@ -395,6 +413,17 @@ void vp9_quantize_fp_ssse3(const tran_low_t* coeff_ptr,
uint16_t* eob_ptr,
const int16_t* scan,
const int16_t* iscan);
+void vp9_quantize_fp_avx2(const tran_low_t* coeff_ptr,
+ intptr_t n_coeffs,
+ int skip_block,
+ const int16_t* round_ptr,
+ const int16_t* quant_ptr,
+ tran_low_t* qcoeff_ptr,
+ tran_low_t* dqcoeff_ptr,
+ const int16_t* dequant_ptr,
+ uint16_t* eob_ptr,
+ const int16_t* scan,
+ const int16_t* iscan);
RTCD_EXTERN void (*vp9_quantize_fp)(const tran_low_t* coeff_ptr,
intptr_t n_coeffs,
int skip_block,
@@ -476,12 +505,20 @@ static void setup_rtcd_internal(void) {
vp9_fdct8x8_quant = vp9_fdct8x8_quant_c;
if (flags & HAS_SSSE3)
vp9_fdct8x8_quant = vp9_fdct8x8_quant_ssse3;
+ vp9_highbd_iht16x16_256_add = vp9_highbd_iht16x16_256_add_c;
+ if (flags & HAS_SSE4_1)
+ vp9_highbd_iht16x16_256_add = vp9_highbd_iht16x16_256_add_sse4_1;
vp9_highbd_iht4x4_16_add = vp9_highbd_iht4x4_16_add_c;
if (flags & HAS_SSE4_1)
vp9_highbd_iht4x4_16_add = vp9_highbd_iht4x4_16_add_sse4_1;
+ vp9_highbd_iht8x8_64_add = vp9_highbd_iht8x8_64_add_c;
+ if (flags & HAS_SSE4_1)
+ vp9_highbd_iht8x8_64_add = vp9_highbd_iht8x8_64_add_sse4_1;
vp9_quantize_fp = vp9_quantize_fp_sse2;
if (flags & HAS_SSSE3)
vp9_quantize_fp = vp9_quantize_fp_ssse3;
+ if (flags & HAS_AVX2)
+ vp9_quantize_fp = vp9_quantize_fp_avx2;
vp9_quantize_fp_32x32 = vp9_quantize_fp_32x32_c;
if (flags & HAS_SSSE3)
vp9_quantize_fp_32x32 = vp9_quantize_fp_32x32_ssse3;
diff --git a/chromium/third_party/libvpx/source/config/vpx_version.h b/chromium/third_party/libvpx/source/config/vpx_version.h
index 978e0519130..5b85ff05211 100644
--- a/chromium/third_party/libvpx/source/config/vpx_version.h
+++ b/chromium/third_party/libvpx/source/config/vpx_version.h
@@ -1,8 +1,8 @@
// This file is generated. Do not edit.
#define VERSION_MAJOR 1
-#define VERSION_MINOR 6
-#define VERSION_PATCH 1
-#define VERSION_EXTRA "1494-g373e08f92"
+#define VERSION_MINOR 7
+#define VERSION_PATCH 0
+#define VERSION_EXTRA "111-ge9fff8a9d"
#define VERSION_PACKED ((VERSION_MAJOR<<16)|(VERSION_MINOR<<8)|(VERSION_PATCH))
-#define VERSION_STRING_NOSP "v1.6.1-1494-g373e08f92"
-#define VERSION_STRING " v1.6.1-1494-g373e08f92"
+#define VERSION_STRING_NOSP "v1.7.0-111-ge9fff8a9d"
+#define VERSION_STRING " v1.7.0-111-ge9fff8a9d"
diff --git a/chromium/third_party/libvpx/source/config/win/ia32/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/win/ia32/vp9_rtcd.h
index c5f23c47e04..c8f69b95e1d 100644
--- a/chromium/third_party/libvpx/source/config/win/ia32/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/win/ia32/vp9_rtcd.h
@@ -277,7 +277,16 @@ void vp9_highbd_iht16x16_256_add_c(const tran_low_t* input,
int pitch,
int tx_type,
int bd);
-#define vp9_highbd_iht16x16_256_add vp9_highbd_iht16x16_256_add_c
+void vp9_highbd_iht16x16_256_add_sse4_1(const tran_low_t* input,
+ uint16_t* output,
+ int pitch,
+ int tx_type,
+ int bd);
+RTCD_EXTERN void (*vp9_highbd_iht16x16_256_add)(const tran_low_t* input,
+ uint16_t* output,
+ int pitch,
+ int tx_type,
+ int bd);
void vp9_highbd_iht4x4_16_add_c(const tran_low_t* input,
uint16_t* dest,
@@ -300,7 +309,16 @@ void vp9_highbd_iht8x8_64_add_c(const tran_low_t* input,
int stride,
int tx_type,
int bd);
-#define vp9_highbd_iht8x8_64_add vp9_highbd_iht8x8_64_add_c
+void vp9_highbd_iht8x8_64_add_sse4_1(const tran_low_t* input,
+ uint16_t* dest,
+ int stride,
+ int tx_type,
+ int bd);
+RTCD_EXTERN void (*vp9_highbd_iht8x8_64_add)(const tran_low_t* input,
+ uint16_t* dest,
+ int stride,
+ int tx_type,
+ int bd);
void vp9_highbd_mbpost_proc_across_ip_c(uint16_t* src,
int pitch,
@@ -424,6 +442,17 @@ void vp9_quantize_fp_sse2(const tran_low_t* coeff_ptr,
uint16_t* eob_ptr,
const int16_t* scan,
const int16_t* iscan);
+void vp9_quantize_fp_avx2(const tran_low_t* coeff_ptr,
+ intptr_t n_coeffs,
+ int skip_block,
+ const int16_t* round_ptr,
+ const int16_t* quant_ptr,
+ tran_low_t* qcoeff_ptr,
+ tran_low_t* dqcoeff_ptr,
+ const int16_t* dequant_ptr,
+ uint16_t* eob_ptr,
+ const int16_t* scan,
+ const int16_t* iscan);
RTCD_EXTERN void (*vp9_quantize_fp)(const tran_low_t* coeff_ptr,
intptr_t n_coeffs,
int skip_block,
@@ -512,9 +541,15 @@ static void setup_rtcd_internal(void) {
vp9_highbd_block_error = vp9_highbd_block_error_c;
if (flags & HAS_SSE2)
vp9_highbd_block_error = vp9_highbd_block_error_sse2;
+ vp9_highbd_iht16x16_256_add = vp9_highbd_iht16x16_256_add_c;
+ if (flags & HAS_SSE4_1)
+ vp9_highbd_iht16x16_256_add = vp9_highbd_iht16x16_256_add_sse4_1;
vp9_highbd_iht4x4_16_add = vp9_highbd_iht4x4_16_add_c;
if (flags & HAS_SSE4_1)
vp9_highbd_iht4x4_16_add = vp9_highbd_iht4x4_16_add_sse4_1;
+ vp9_highbd_iht8x8_64_add = vp9_highbd_iht8x8_64_add_c;
+ if (flags & HAS_SSE4_1)
+ vp9_highbd_iht8x8_64_add = vp9_highbd_iht8x8_64_add_sse4_1;
vp9_iht16x16_256_add = vp9_iht16x16_256_add_c;
if (flags & HAS_SSE2)
vp9_iht16x16_256_add = vp9_iht16x16_256_add_sse2;
@@ -527,6 +562,8 @@ static void setup_rtcd_internal(void) {
vp9_quantize_fp = vp9_quantize_fp_c;
if (flags & HAS_SSE2)
vp9_quantize_fp = vp9_quantize_fp_sse2;
+ if (flags & HAS_AVX2)
+ vp9_quantize_fp = vp9_quantize_fp_avx2;
vp9_scale_and_extend_frame = vp9_scale_and_extend_frame_c;
if (flags & HAS_SSSE3)
vp9_scale_and_extend_frame = vp9_scale_and_extend_frame_ssse3;
diff --git a/chromium/third_party/libvpx/source/config/win/x64/vp9_rtcd.h b/chromium/third_party/libvpx/source/config/win/x64/vp9_rtcd.h
index 4773ba6af4a..6f00c78fb74 100644
--- a/chromium/third_party/libvpx/source/config/win/x64/vp9_rtcd.h
+++ b/chromium/third_party/libvpx/source/config/win/x64/vp9_rtcd.h
@@ -246,7 +246,16 @@ void vp9_highbd_iht16x16_256_add_c(const tran_low_t* input,
int pitch,
int tx_type,
int bd);
-#define vp9_highbd_iht16x16_256_add vp9_highbd_iht16x16_256_add_c
+void vp9_highbd_iht16x16_256_add_sse4_1(const tran_low_t* input,
+ uint16_t* output,
+ int pitch,
+ int tx_type,
+ int bd);
+RTCD_EXTERN void (*vp9_highbd_iht16x16_256_add)(const tran_low_t* input,
+ uint16_t* output,
+ int pitch,
+ int tx_type,
+ int bd);
void vp9_highbd_iht4x4_16_add_c(const tran_low_t* input,
uint16_t* dest,
@@ -269,7 +278,16 @@ void vp9_highbd_iht8x8_64_add_c(const tran_low_t* input,
int stride,
int tx_type,
int bd);
-#define vp9_highbd_iht8x8_64_add vp9_highbd_iht8x8_64_add_c
+void vp9_highbd_iht8x8_64_add_sse4_1(const tran_low_t* input,
+ uint16_t* dest,
+ int stride,
+ int tx_type,
+ int bd);
+RTCD_EXTERN void (*vp9_highbd_iht8x8_64_add)(const tran_low_t* input,
+ uint16_t* dest,
+ int stride,
+ int tx_type,
+ int bd);
void vp9_highbd_mbpost_proc_across_ip_c(uint16_t* src,
int pitch,
@@ -395,6 +413,17 @@ void vp9_quantize_fp_ssse3(const tran_low_t* coeff_ptr,
uint16_t* eob_ptr,
const int16_t* scan,
const int16_t* iscan);
+void vp9_quantize_fp_avx2(const tran_low_t* coeff_ptr,
+ intptr_t n_coeffs,
+ int skip_block,
+ const int16_t* round_ptr,
+ const int16_t* quant_ptr,
+ tran_low_t* qcoeff_ptr,
+ tran_low_t* dqcoeff_ptr,
+ const int16_t* dequant_ptr,
+ uint16_t* eob_ptr,
+ const int16_t* scan,
+ const int16_t* iscan);
RTCD_EXTERN void (*vp9_quantize_fp)(const tran_low_t* coeff_ptr,
intptr_t n_coeffs,
int skip_block,
@@ -476,12 +505,20 @@ static void setup_rtcd_internal(void) {
vp9_fdct8x8_quant = vp9_fdct8x8_quant_c;
if (flags & HAS_SSSE3)
vp9_fdct8x8_quant = vp9_fdct8x8_quant_ssse3;
+ vp9_highbd_iht16x16_256_add = vp9_highbd_iht16x16_256_add_c;
+ if (flags & HAS_SSE4_1)
+ vp9_highbd_iht16x16_256_add = vp9_highbd_iht16x16_256_add_sse4_1;
vp9_highbd_iht4x4_16_add = vp9_highbd_iht4x4_16_add_c;
if (flags & HAS_SSE4_1)
vp9_highbd_iht4x4_16_add = vp9_highbd_iht4x4_16_add_sse4_1;
+ vp9_highbd_iht8x8_64_add = vp9_highbd_iht8x8_64_add_c;
+ if (flags & HAS_SSE4_1)
+ vp9_highbd_iht8x8_64_add = vp9_highbd_iht8x8_64_add_sse4_1;
vp9_quantize_fp = vp9_quantize_fp_sse2;
if (flags & HAS_SSSE3)
vp9_quantize_fp = vp9_quantize_fp_ssse3;
+ if (flags & HAS_AVX2)
+ vp9_quantize_fp = vp9_quantize_fp_avx2;
vp9_quantize_fp_32x32 = vp9_quantize_fp_32x32_c;
if (flags & HAS_SSSE3)
vp9_quantize_fp_32x32 = vp9_quantize_fp_32x32_ssse3;
diff --git a/chromium/third_party/libvpx/source/libvpx/.clang-format b/chromium/third_party/libvpx/source/libvpx/.clang-format
index c1483199e04..e76a526e4b9 100644
--- a/chromium/third_party/libvpx/source/libvpx/.clang-format
+++ b/chromium/third_party/libvpx/source/libvpx/.clang-format
@@ -1,12 +1,12 @@
---
Language: Cpp
# BasedOnStyle: Google
-# Generated with clang-format 4.0.1
+# Generated with clang-format 5.0.0
AccessModifierOffset: -1
AlignAfterOpenBracket: Align
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
-AlignEscapedNewlinesLeft: true
+AlignEscapedNewlines: Left
AlignOperands: true
AlignTrailingComments: true
AllowAllParametersOfDeclarationOnNextLine: true
@@ -33,14 +33,20 @@ BraceWrapping:
BeforeCatch: false
BeforeElse: false
IndentBraces: false
+ SplitEmptyFunction: true
+ SplitEmptyRecord: true
+ SplitEmptyNamespace: true
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
+BreakBeforeInheritanceComma: false
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
+BreakConstructorInitializers: BeforeColon
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: true
ColumnLimit: 80
CommentPragmas: '^ IWYU pragma:'
+CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: false
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
@@ -48,7 +54,11 @@ Cpp11BracedListStyle: false
DerivePointerAlignment: false
DisableFormat: false
ExperimentalAutoDetectBinPacking: false
-ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
+FixNamespaceComments: true
+ForEachMacros:
+ - foreach
+ - Q_FOREACH
+ - BOOST_FOREACH
IncludeCategories:
- Regex: '^<.*\.h>'
Priority: 1
@@ -70,6 +80,7 @@ NamespaceIndentation: None
ObjCBlockIndentWidth: 2
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: false
+PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
@@ -79,6 +90,7 @@ PenaltyReturnTypeOnItsOwnLine: 200
PointerAlignment: Right
ReflowComments: true
SortIncludes: false
+SortUsingDeclarations: true
SpaceAfterCStyleCast: false
SpaceAfterTemplateKeyword: true
SpaceBeforeAssignmentOperators: true
diff --git a/chromium/third_party/libvpx/source/libvpx/.mailmap b/chromium/third_party/libvpx/source/libvpx/.mailmap
index 166c45ee8f4..29af5106504 100644
--- a/chromium/third_party/libvpx/source/libvpx/.mailmap
+++ b/chromium/third_party/libvpx/source/libvpx/.mailmap
@@ -3,6 +3,7 @@ Aℓex Converse <aconverse@google.com>
Aℓex Converse <aconverse@google.com> <alex.converse@gmail.com>
Alexis Ballier <aballier@gentoo.org> <alexis.ballier@gmail.com>
Alpha Lam <hclam@google.com> <hclam@chromium.org>
+Chris Cunningham <chcunningham@chromium.org>
Daniele Castagna <dcastagna@chromium.org> <dcastagna@google.com>
Deb Mukherjee <debargha@google.com>
Erik Niemeyer <erik.a.niemeyer@intel.com> <erik.a.niemeyer@gmail.com>
@@ -21,18 +22,21 @@ Marco Paniconi <marpan@google.com>
Marco Paniconi <marpan@google.com> <marpan@chromium.org>
Pascal Massimino <pascal.massimino@gmail.com>
Paul Wilkins <paulwilkins@google.com>
+Peter Boström <pbos@chromium.org> <pbos@google.com>
Peter de Rivaz <peter.derivaz@gmail.com>
Peter de Rivaz <peter.derivaz@gmail.com> <peter.derivaz@argondesign.com>
Ralph Giles <giles@xiph.org> <giles@entropywave.com>
Ralph Giles <giles@xiph.org> <giles@mozilla.com>
Ronald S. Bultje <rsbultje@gmail.com> <rbultje@google.com>
Sami Pietilä <samipietila@google.com>
+Shiyou Yin <yinshiyou-hf@loongson.cn>
Tamar Levy <tamar.levy@intel.com>
Tamar Levy <tamar.levy@intel.com> <levytamar82@gmail.com>
Tero Rintaluoma <teror@google.com> <tero.rintaluoma@on2.com>
Timothy B. Terriberry <tterribe@xiph.org> <tterriberry@mozilla.com>
Tom Finegan <tomfinegan@google.com>
Tom Finegan <tomfinegan@google.com> <tomfinegan@chromium.org>
+Urvang Joshi <urvang@google.com> <urvang@chromium.org>
Yaowu Xu <yaowu@google.com> <adam@xuyaowu.com>
Yaowu Xu <yaowu@google.com> <yaowu@xuyaowu.com>
Yaowu Xu <yaowu@google.com> <Yaowu Xu>
diff --git a/chromium/third_party/libvpx/source/libvpx/AUTHORS b/chromium/third_party/libvpx/source/libvpx/AUTHORS
index fc2ef8f43cb..04c28724329 100644
--- a/chromium/third_party/libvpx/source/libvpx/AUTHORS
+++ b/chromium/third_party/libvpx/source/libvpx/AUTHORS
@@ -3,13 +3,13 @@
Aaron Watry <awatry@gmail.com>
Abo Talib Mahfoodh <ab.mahfoodh@gmail.com>
-Adam Xu <adam@xuyaowu.com>
Adrian Grange <agrange@google.com>
Aℓex Converse <aconverse@google.com>
Ahmad Sharif <asharif@google.com>
Aleksey Vasenev <margtu-fivt@ya.ru>
Alexander Potapenko <glider@google.com>
Alexander Voronov <avoronov@graphics.cs.msu.ru>
+Alexandra Hájková <alexandra.khirnova@gmail.com>
Alexis Ballier <aballier@gentoo.org>
Alok Ahuja <waveletcoeff@gmail.com>
Alpha Lam <hclam@google.com>
@@ -17,6 +17,7 @@ A.Mahfoodh <ab.mahfoodh@gmail.com>
Ami Fischman <fischman@chromium.org>
Andoni Morales Alastruey <ylatuya@gmail.com>
Andres Mejia <mcitadel@gmail.com>
+Andrew Lewis <andrewlewis@google.com>
Andrew Russell <anrussell@google.com>
Angie Chiang <angiebird@google.com>
Aron Rosenberg <arosenberg@logitech.com>
@@ -24,7 +25,9 @@ Attila Nagy <attilanagy@google.com>
Brion Vibber <bvibber@wikimedia.org>
changjun.yang <changjun.yang@intel.com>
Charles 'Buck' Krasic <ckrasic@google.com>
+Cheng Chen <chengchen@google.com>
chm <chm@rock-chips.com>
+Chris Cunningham <chcunningham@chromium.org>
Christian Duvivier <cduvivier@google.com>
Daniele Castagna <dcastagna@chromium.org>
Daniel Kang <ddkang@google.com>
@@ -46,10 +49,12 @@ Geza Lore <gezalore@gmail.com>
Ghislain MARY <ghislainmary2@gmail.com>
Giuseppe Scrivano <gscrivano@gnu.org>
Gordana Cmiljanovic <gordana.cmiljanovic@imgtec.com>
+Gregor Jasny <gjasny@gmail.com>
Guillaume Martres <gmartres@google.com>
Guillermo Ballester Valor <gbvalor@gmail.com>
Hangyu Kuang <hkuang@google.com>
Hanno Böck <hanno@hboeck.de>
+Han Shen <shenhan@google.com>
Henrik Lundin <hlundin@google.com>
Hui Su <huisu@google.com>
Ivan Krasin <krasin@chromium.org>
@@ -83,6 +88,7 @@ Justin Clift <justin@salasaga.org>
Justin Lebar <justin.lebar@gmail.com>
Kaustubh Raste <kaustubh.raste@imgtec.com>
KO Myung-Hun <komh@chollian.net>
+Kyle Siefring <kylesiefring@gmail.com>
Lawrence Velázquez <larryv@macports.org>
Linfeng Zhang <linfengz@google.com>
Lou Quillio <louquillio@google.com>
@@ -101,6 +107,7 @@ Mikhal Shemer <mikhal@google.com>
Min Chen <chenm003@gmail.com>
Minghai Shang <minghai@google.com>
Min Ye <yeemmi@google.com>
+Moriyoshi Koizumi <mozo@mozo.jp>
Morton Jonuschat <yabawock@gmail.com>
Nathan E. Egge <negge@mozilla.com>
Nico Weber <thakis@chromium.org>
@@ -111,12 +118,15 @@ Paul Wilkins <paulwilkins@google.com>
Pavol Rusnak <stick@gk2.sk>
Paweł Hajdan <phajdan@google.com>
Pengchong Jin <pengchong@google.com>
-Peter Boström <pbos@google.com>
+Peter Boström <pbos@chromium.org>
+Peter Collingbourne <pcc@chromium.org>
Peter de Rivaz <peter.derivaz@gmail.com>
Philip Jägenstedt <philipj@opera.com>
Priit Laes <plaes@plaes.org>
Rafael Ávila de Espíndola <rafael.espindola@gmail.com>
Rafaël Carré <funman@videolan.org>
+Rafael de Lucena Valle <rafaeldelucena@gmail.com>
+Rahul Chaudhry <rahulchaudhry@google.com>
Ralph Giles <giles@xiph.org>
Ranjit Kumar Tulabandu <ranjit.tulabandu@ittiam.com>
Rob Bradford <rob@linux.intel.com>
@@ -135,6 +145,7 @@ Shiyou Yin <yinshiyou-hf@loongson.cn>
Shunyao Li <shunyaoli@google.com>
Stefan Holmer <holmer@google.com>
Suman Sunkara <sunkaras@google.com>
+Sylvestre Ledru <sylvestre@mozilla.com>
Taekhyun Kim <takim@nvidia.com>
Takanori MATSUURA <t.matsuu@gmail.com>
Tamar Levy <tamar.levy@intel.com>
@@ -147,6 +158,7 @@ Tom Finegan <tomfinegan@google.com>
Tristan Matthews <le.businessman@gmail.com>
Urvang Joshi <urvang@google.com>
Vignesh Venkatasubramanian <vigneshv@google.com>
+Vlad Tsyrklevich <vtsyrklevich@chromium.org>
Yaowu Xu <yaowu@google.com>
Yi Luo <luoyi@google.com>
Yongzhe Wang <yongzhe@google.com>
diff --git a/chromium/third_party/libvpx/source/libvpx/CHANGELOG b/chromium/third_party/libvpx/source/libvpx/CHANGELOG
index 7e7aec67ac8..2281394c8ed 100644
--- a/chromium/third_party/libvpx/source/libvpx/CHANGELOG
+++ b/chromium/third_party/libvpx/source/libvpx/CHANGELOG
@@ -1,3 +1,28 @@
+2017-01-04 v1.7.0 "Mandarin Duck"
+ This release focused on high bit depth performance (10/12 bit) and vp9
+ encoding improvements.
+
+ - Upgrading:
+ This release is ABI incompatible due to new vp9 encoder features.
+
+ Frame parallel decoding for vp9 has been removed.
+
+ - Enhancements:
+ vp9 encoding supports additional threads with --row-mt. This can be greater
+ than the number of tiles.
+
+ Two new vp9 encoder options have been added:
+ --corpus-complexity
+ --tune-content=film
+
+ Additional tooling for respecting the vp9 "level" profiles has been added.
+
+ - Bug fixes:
+ A variety of fuzzing issues.
+ vp8 threading fix for ARM.
+ Codec control VP9_SET_SKIP_LOOP_FILTER fixed.
+ Reject invalid multi resolution configurations.
+
2017-01-09 v1.6.1 "Long Tailed Duck"
This release improves upon the VP9 encoder and speeds up the encoding and
decoding processes.
diff --git a/chromium/third_party/libvpx/source/libvpx/README b/chromium/third_party/libvpx/source/libvpx/README
index f910ce76187..73304dd62f4 100644
--- a/chromium/third_party/libvpx/source/libvpx/README
+++ b/chromium/third_party/libvpx/source/libvpx/README
@@ -1,4 +1,4 @@
-README - 26 January 2017
+README - 24 January 2018
Welcome to the WebM VP8/VP9 Codec SDK!
@@ -63,6 +63,8 @@ COMPILING THE APPLICATIONS/LIBRARIES:
armv8-linux-gcc
mips32-linux-gcc
mips64-linux-gcc
+ ppc64-linux-gcc
+ ppc64le-linux-gcc
sparc-solaris-gcc
x86-android-gcc
x86-darwin8-gcc
diff --git a/chromium/third_party/libvpx/source/libvpx/examples/vp9_spatial_svc_encoder.c b/chromium/third_party/libvpx/source/libvpx/examples/vp9_spatial_svc_encoder.c
index 0987cbfb858..d49a2307d24 100644
--- a/chromium/third_party/libvpx/source/libvpx/examples/vp9_spatial_svc_encoder.c
+++ b/chromium/third_party/libvpx/source/libvpx/examples/vp9_spatial_svc_encoder.c
@@ -429,8 +429,9 @@ static void set_rate_control_stats(struct RateControlStats *rc,
rc->layer_framerate[layer] = framerate / cfg->ts_rate_decimator[tl];
if (tl > 0) {
rc->layer_pfb[layer] =
- 1000.0 * (cfg->layer_target_bitrate[layer] -
- cfg->layer_target_bitrate[layer - 1]) /
+ 1000.0 *
+ (cfg->layer_target_bitrate[layer] -
+ cfg->layer_target_bitrate[layer - 1]) /
(rc->layer_framerate[layer] - rc->layer_framerate[layer - 1]);
} else {
rc->layer_pfb[layer] = 1000.0 * cfg->layer_target_bitrate[layer] /
@@ -573,8 +574,8 @@ void set_frame_flags_bypass_mode(int sl, int tl, int num_spatial_layers,
} else {
if (is_key_frame) {
ref_frame_config->frame_flags[sl] =
- VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_ARF |
- VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
+ VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
+ VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF;
} else {
ref_frame_config->frame_flags[sl] =
VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
@@ -588,14 +589,24 @@ void set_frame_flags_bypass_mode(int sl, int tl, int num_spatial_layers,
} else {
ref_frame_config->frame_flags[sl] =
VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
+ if (sl == num_spatial_layers - 1)
+ ref_frame_config->frame_flags[sl] =
+ VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_ARF |
+ VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
}
}
if (tl == 0) {
ref_frame_config->lst_fb_idx[sl] = sl;
- if (sl)
- ref_frame_config->gld_fb_idx[sl] = sl - 1;
- else
+ if (sl) {
+ if (is_key_frame) {
+ ref_frame_config->lst_fb_idx[sl] = sl - 1;
+ ref_frame_config->gld_fb_idx[sl] = sl;
+ } else {
+ ref_frame_config->gld_fb_idx[sl] = sl - 1;
+ }
+ } else {
ref_frame_config->gld_fb_idx[sl] = 0;
+ }
ref_frame_config->alt_fb_idx[sl] = 0;
} else if (tl == 1) {
ref_frame_config->lst_fb_idx[sl] = sl;
@@ -738,6 +749,8 @@ int main(int argc, const char **argv) {
// the encode for the whole superframe. The encoder will internally loop
// over all the spatial layers for the current superframe.
vpx_codec_control(&codec, VP9E_SET_SVC_LAYER_ID, &layer_id);
+ // TODO(jianj): Fix the parameter passing for "is_key_frame" in
+ // set_frame_flags_bypass_model() for case of periodic key frames.
set_frame_flags_bypass_mode(sl, layer_id.temporal_layer_id,
svc_ctx.spatial_layers, frame_cnt == 0,
&ref_frame_config);
diff --git a/chromium/third_party/libvpx/source/libvpx/examples/vpx_temporal_svc_encoder.c b/chromium/third_party/libvpx/source/libvpx/examples/vpx_temporal_svc_encoder.c
index f5736ea45d2..4c985ba523a 100644
--- a/chromium/third_party/libvpx/source/libvpx/examples/vpx_temporal_svc_encoder.c
+++ b/chromium/third_party/libvpx/source/libvpx/examples/vpx_temporal_svc_encoder.c
@@ -26,7 +26,9 @@
#include "../tools_common.h"
#include "../video_writer.h"
-#define VP8_ROI_MAP 0
+#define ROI_MAP 0
+
+#define zero(Dest) memset(&Dest, 0, sizeof(Dest));
static const char *exec_name;
@@ -99,9 +101,10 @@ static void set_rate_control_metrics(struct RateControlMetrics *rc,
for (i = 0; i < cfg->ts_number_layers; ++i) {
if (i > 0) {
rc->layer_framerate[i] = framerate / cfg->ts_rate_decimator[i];
- rc->layer_pfb[i] = 1000.0 * (rc->layer_target_bitrate[i] -
- rc->layer_target_bitrate[i - 1]) /
- (rc->layer_framerate[i] - rc->layer_framerate[i - 1]);
+ rc->layer_pfb[i] =
+ 1000.0 *
+ (rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) /
+ (rc->layer_framerate[i] - rc->layer_framerate[i - 1]);
}
rc->layer_input_frames[i] = 0;
rc->layer_enc_frames[i] = 0;
@@ -164,38 +167,60 @@ static void printout_rate_control_summary(struct RateControlMetrics *rc,
die("Error: Number of input frames not equal to output! \n");
}
-#if VP8_ROI_MAP
-static void vp8_set_roi_map(vpx_codec_enc_cfg_t *cfg, vpx_roi_map_t *roi) {
+#if ROI_MAP
+static void set_roi_map(const char *enc_name, vpx_codec_enc_cfg_t *cfg,
+ vpx_roi_map_t *roi) {
unsigned int i, j;
- memset(roi, 0, sizeof(*roi));
+ int block_size = 0;
+ uint8_t is_vp8 = strncmp(enc_name, "vp8", 3) == 0 ? 1 : 0;
+ uint8_t is_vp9 = strncmp(enc_name, "vp9", 3) == 0 ? 1 : 0;
+ if (!is_vp8 && !is_vp9) {
+ die("unsupported codec.");
+ }
+ zero(*roi);
+
+ block_size = is_vp9 && !is_vp8 ? 8 : 16;
// ROI is based on the segments (4 for vp8, 8 for vp9), smallest unit for
// segment is 16x16 for vp8, 8x8 for vp9.
- roi->rows = (cfg->g_h + 15) / 16;
- roi->cols = (cfg->g_w + 15) / 16;
+ roi->rows = (cfg->g_h + block_size - 1) / block_size;
+ roi->cols = (cfg->g_w + block_size - 1) / block_size;
// Applies delta QP on the segment blocks, varies from -63 to 63.
// Setting to negative means lower QP (better quality).
// Below we set delta_q to the extreme (-63) to show strong effect.
- roi->delta_q[0] = 0;
+ // VP8 uses the first 4 segments. VP9 uses all 8 segments.
+ zero(roi->delta_q);
roi->delta_q[1] = -63;
- roi->delta_q[2] = 0;
- roi->delta_q[3] = 0;
// Applies delta loopfilter strength on the segment blocks, varies from -63 to
- // 63. Setting to positive means stronger loopfilter.
- roi->delta_lf[0] = 0;
- roi->delta_lf[1] = 0;
- roi->delta_lf[2] = 0;
- roi->delta_lf[3] = 0;
-
- // Applies skip encoding threshold on the segment blocks, varies from 0 to
- // UINT_MAX. Larger value means more skipping of encoding is possible.
- // This skip threshold only applies on delta frames.
- roi->static_threshold[0] = 0;
- roi->static_threshold[1] = 0;
- roi->static_threshold[2] = 0;
- roi->static_threshold[3] = 0;
+ // 63. Setting to positive means stronger loopfilter. VP8 uses the first 4
+ // segments. VP9 uses all 8 segments.
+ zero(roi->delta_lf);
+
+ if (is_vp8) {
+ // Applies skip encoding threshold on the segment blocks, varies from 0 to
+ // UINT_MAX. Larger value means more skipping of encoding is possible.
+ // This skip threshold only applies on delta frames.
+ zero(roi->static_threshold);
+ }
+
+ if (is_vp9) {
+ // Apply skip segment. Setting to 1 means this block will be copied from
+ // previous frame.
+ zero(roi->skip);
+ }
+
+ if (is_vp9) {
+ // Apply ref frame segment.
+ // -1 : Do not apply this segment.
+ // 0 : Froce using intra.
+ // 1 : Force using last.
+ // 2 : Force using golden.
+ // 3 : Force using alfref but not used in non-rd pickmode for 0 lag.
+ memset(roi->ref_frame, -1, sizeof(roi->ref_frame));
+ roi->ref_frame[1] = 1;
+ }
// Use 2 states: 1 is center square, 0 is the rest.
roi->roi_map =
@@ -563,7 +588,7 @@ int main(int argc, char **argv) {
int layering_mode = 0;
int layer_flags[VPX_TS_MAX_PERIODICITY] = { 0 };
int flag_periodicity = 1;
-#if VP8_ROI_MAP
+#if ROI_MAP
vpx_roi_map_t roi;
#endif
vpx_svc_layer_id_t layer_id = { 0, 0 };
@@ -766,8 +791,8 @@ int main(int argc, char **argv) {
vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kVp8DenoiserOff);
vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
vpx_codec_control(&codec, VP8E_SET_GF_CBR_BOOST_PCT, 0);
-#if VP8_ROI_MAP
- vp8_set_roi_map(&cfg, &roi);
+#if ROI_MAP
+ set_roi_map(encoder->name, &cfg, &roi);
if (vpx_codec_control(&codec, VP8E_SET_ROI_MAP, &roi))
die_codec(&codec, "Failed to set ROI map");
#endif
@@ -784,6 +809,12 @@ int main(int argc, char **argv) {
vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
vpx_codec_control(&codec, VP9E_SET_TUNE_CONTENT, 0);
vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1));
+#if ROI_MAP
+ set_roi_map(encoder->name, &cfg, &roi);
+ if (vpx_codec_control(&codec, VP9E_SET_ROI_MAP, &roi))
+ die_codec(&codec, "Failed to set ROI map");
+ vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 0);
+#endif
// TODO(marpan/jianj): There is an issue with row-mt for low resolutons at
// high speed settings, disable its use for those cases for now.
if (cfg.g_threads > 1 && ((cfg.g_w > 320 && cfg.g_h > 240) || speed < 7))
@@ -911,5 +942,8 @@ int main(int argc, char **argv) {
for (i = 0; i < cfg.ts_number_layers; ++i) vpx_video_writer_close(outfile[i]);
vpx_img_free(&raw);
+#if ROI_MAP
+ free(roi.roi_map);
+#endif
return EXIT_SUCCESS;
}
diff --git a/chromium/third_party/libvpx/source/libvpx/libs.doxy_template b/chromium/third_party/libvpx/source/libvpx/libs.doxy_template
index 5a8f847280e..1eacc8fe2db 100644
--- a/chromium/third_party/libvpx/source/libvpx/libs.doxy_template
+++ b/chromium/third_party/libvpx/source/libvpx/libs.doxy_template
@@ -943,18 +943,6 @@ GENERATE_XML = NO
XML_OUTPUT = xml
-# The XML_SCHEMA tag can be used to specify an XML schema,
-# which can be used by a validating XML parser to check the
-# syntax of the XML files.
-
-XML_SCHEMA =
-
-# The XML_DTD tag can be used to specify an XML DTD,
-# which can be used by a validating XML parser to check the
-# syntax of the XML files.
-
-XML_DTD =
-
# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
# dump the program listings (including syntax highlighting
# and cross-referencing information) to the XML output. Note that
diff --git a/chromium/third_party/libvpx/source/libvpx/libs.mk b/chromium/third_party/libvpx/source/libvpx/libs.mk
index ba75b4a4f8a..a3e2f9d0ebd 100644
--- a/chromium/third_party/libvpx/source/libvpx/libs.mk
+++ b/chromium/third_party/libvpx/source/libvpx/libs.mk
@@ -233,8 +233,8 @@ OBJS-yes += $(LIBVPX_OBJS)
LIBS-$(if yes,$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a
$(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS)
-SO_VERSION_MAJOR := 4
-SO_VERSION_MINOR := 1
+SO_VERSION_MAJOR := 5
+SO_VERSION_MINOR := 0
SO_VERSION_PATCH := 0
ifeq ($(filter darwin%,$(TGT_OS)),$(TGT_OS))
LIBVPX_SO := libvpx.$(SO_VERSION_MAJOR).dylib
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/blockd.h b/chromium/third_party/libvpx/source/libvpx/vp8/common/blockd.h
index 1a3aad16afd..ddcc307eb8d 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/blockd.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/blockd.h
@@ -37,7 +37,9 @@ extern "C" {
#define SEGMENT_DELTADATA 0
#define SEGMENT_ABSDATA 1
-typedef struct { int r, c; } POS;
+typedef struct {
+ int r, c;
+} POS;
#define PLANE_TYPE_Y_NO_DC 0
#define PLANE_TYPE_Y2 1
@@ -180,6 +182,9 @@ typedef struct {
unsigned int low_res_ref_frames[MAX_REF_FRAMES];
// The video frame counter value for the key frame, for lowest resolution.
unsigned int key_frame_counter_value;
+ // Flags to signal skipped encoding of previous and base layer stream.
+ unsigned int skip_encoding_prev_stream;
+ unsigned int skip_encoding_base_stream;
LOWER_RES_MB_INFO *mb_info;
} LOWER_RES_FRAME_INFO;
#endif
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/extend.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/extend.c
index 2d67b516bef..f4dbce2cd53 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/extend.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/extend.c
@@ -20,8 +20,7 @@ static void copy_and_extend_plane(unsigned char *s, /* source */
int et, /* extend top border */
int el, /* extend left border */
int eb, /* extend bottom border */
- int er /* extend right border */
- ) {
+ int er) { /* extend right border */
int i;
unsigned char *src_ptr1, *src_ptr2;
unsigned char *dest_ptr1, *dest_ptr2;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/mips/mmi/idct_blk_mmi.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/mips/mmi/idct_blk_mmi.c
index f6020ab4688..3f69071748a 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/mips/mmi/idct_blk_mmi.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/mips/mmi/idct_blk_mmi.c
@@ -12,7 +12,7 @@
#include "vpx_mem/vpx_mem.h"
void vp8_dequant_idct_add_y_block_mmi(int16_t *q, int16_t *dq, uint8_t *dst,
- int stride, int8_t *eobs) {
+ int stride, char *eobs) {
int i, j;
for (i = 0; i < 4; i++) {
@@ -33,8 +33,7 @@ void vp8_dequant_idct_add_y_block_mmi(int16_t *q, int16_t *dq, uint8_t *dst,
}
void vp8_dequant_idct_add_uv_block_mmi(int16_t *q, int16_t *dq, uint8_t *dstu,
- uint8_t *dstv, int stride,
- int8_t *eobs) {
+ uint8_t *dstv, int stride, char *eobs) {
int i, j;
for (i = 0; i < 2; i++) {
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/modecont.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/modecont.c
index d6ad9bb99ae..bab410374f6 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/modecont.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/modecont.c
@@ -11,28 +11,16 @@
#include "entropy.h"
const int vp8_mode_contexts[6][4] = {
- {
- /* 0 */
- 7, 1, 1, 143,
- },
- {
- /* 1 */
- 14, 18, 14, 107,
- },
- {
- /* 2 */
- 135, 64, 57, 68,
- },
- {
- /* 3 */
- 60, 56, 128, 65,
- },
- {
- /* 4 */
- 159, 134, 128, 34,
- },
- {
- /* 5 */
- 234, 188, 128, 28,
- },
+ { /* 0 */
+ 7, 1, 1, 143 },
+ { /* 1 */
+ 14, 18, 14, 107 },
+ { /* 2 */
+ 135, 64, 57, 68 },
+ { /* 3 */
+ 60, 56, 128, 65 },
+ { /* 4 */
+ 159, 134, 128, 34 },
+ { /* 5 */
+ 234, 188, 128, 28 },
};
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/vp8_asm_stubs.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/vp8_asm_stubs.c
index de59b3894a4..de836f19df4 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/vp8_asm_stubs.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/x86/vp8_asm_stubs.c
@@ -95,9 +95,7 @@ void vp8_sixtap_predict4x4_mmx(unsigned char *src_ptr, int src_pixels_per_line,
void vp8_sixtap_predict16x16_sse2(unsigned char *src_ptr,
int src_pixels_per_line, int xoffset,
int yoffset, unsigned char *dst_ptr,
- int dst_pitch
-
- ) {
+ int dst_pitch) {
DECLARE_ALIGNED(16, unsigned short,
FData2[24 * 24]); /* Temp data bufffer used in filtering */
@@ -236,9 +234,7 @@ extern void vp8_filter_block1d4_v6_ssse3(unsigned char *src_ptr,
void vp8_sixtap_predict16x16_ssse3(unsigned char *src_ptr,
int src_pixels_per_line, int xoffset,
int yoffset, unsigned char *dst_ptr,
- int dst_pitch
-
- ) {
+ int dst_pitch) {
DECLARE_ALIGNED(16, unsigned char, FData2[24 * 24]);
if (xoffset) {
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/decoder/decodeframe.c b/chromium/third_party/libvpx/source/libvpx/vp8/decoder/decodeframe.c
index 077bd3da268..8bfd3cea3dc 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/decoder/decodeframe.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/decoder/decodeframe.c
@@ -674,7 +674,7 @@ static unsigned int read_partition_size(VP8D_COMP *pbi,
static int read_is_valid(const unsigned char *start, size_t len,
const unsigned char *end) {
- return (start + len > start && start + len <= end);
+ return len != 0 && end > start && len <= (size_t)(end - start);
}
static unsigned int read_available_partition_size(
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/decoder/ec_types.h b/chromium/third_party/libvpx/source/libvpx/vp8/decoder/ec_types.h
index 0ab08b649ad..8da561f479b 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/decoder/ec_types.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/decoder/ec_types.h
@@ -34,7 +34,9 @@ typedef struct {
/* Structure used to hold all the overlaps of a macroblock. The overlaps of a
* macroblock is further divided into block overlaps.
*/
-typedef struct { B_OVERLAP overlaps[16]; } MB_OVERLAP;
+typedef struct {
+ B_OVERLAP overlaps[16];
+} MB_OVERLAP;
/* Structure for keeping track of motion vectors and which reference frame they
* refer to. Used for motion vector interpolation.
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/decoder/onyxd_int.h b/chromium/third_party/libvpx/source/libvpx/vp8/decoder/onyxd_int.h
index 5ecacdbb972..3689258f191 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/decoder/onyxd_int.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/decoder/onyxd_int.h
@@ -31,7 +31,9 @@ typedef struct {
void *ptr2;
} DECODETHREAD_DATA;
-typedef struct { MACROBLOCKD mbd; } MB_ROW_DEC;
+typedef struct {
+ MACROBLOCKD mbd;
+} MB_ROW_DEC;
typedef struct {
int enabled;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/decoder/threading.c b/chromium/third_party/libvpx/source/libvpx/vp8/decoder/threading.c
index d0213f75c12..aadc8dc712f 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/decoder/threading.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/decoder/threading.c
@@ -739,24 +739,21 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) {
/* Allocate memory for above_row buffers. */
CALLOC_ARRAY(pbi->mt_yabove_row, pc->mb_rows);
for (i = 0; i < pc->mb_rows; ++i)
- CHECK_MEM_ERROR(
- pbi->mt_yabove_row[i],
- vpx_memalign(
- 16, sizeof(unsigned char) * (width + (VP8BORDERINPIXELS << 1))));
+ CHECK_MEM_ERROR(pbi->mt_yabove_row[i],
+ vpx_memalign(16, sizeof(unsigned char) *
+ (width + (VP8BORDERINPIXELS << 1))));
CALLOC_ARRAY(pbi->mt_uabove_row, pc->mb_rows);
for (i = 0; i < pc->mb_rows; ++i)
- CHECK_MEM_ERROR(
- pbi->mt_uabove_row[i],
- vpx_memalign(16,
- sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS)));
+ CHECK_MEM_ERROR(pbi->mt_uabove_row[i],
+ vpx_memalign(16, sizeof(unsigned char) *
+ (uv_width + VP8BORDERINPIXELS)));
CALLOC_ARRAY(pbi->mt_vabove_row, pc->mb_rows);
for (i = 0; i < pc->mb_rows; ++i)
- CHECK_MEM_ERROR(
- pbi->mt_vabove_row[i],
- vpx_memalign(16,
- sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS)));
+ CHECK_MEM_ERROR(pbi->mt_vabove_row[i],
+ vpx_memalign(16, sizeof(unsigned char) *
+ (uv_width + VP8BORDERINPIXELS)));
/* Allocate memory for left_col buffers. */
CALLOC_ARRAY(pbi->mt_yleft_col, pc->mb_rows);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/firstpass.c b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/firstpass.c
index 70f92434103..4ea991e5240 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/firstpass.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/firstpass.c
@@ -989,11 +989,11 @@ static int estimate_max_q(VP8_COMP *cpi, FIRSTPASS_STATS *fpstats,
bits_per_mb_at_this_q =
vp8_bits_per_mb[INTER_FRAME][Q] + overhead_bits_per_mb;
- bits_per_mb_at_this_q = (int)(.5 +
- err_correction_factor * speed_correction *
- cpi->twopass.est_max_qcorrection_factor *
- cpi->twopass.section_max_qfactor *
- (double)bits_per_mb_at_this_q);
+ bits_per_mb_at_this_q =
+ (int)(.5 + err_correction_factor * speed_correction *
+ cpi->twopass.est_max_qcorrection_factor *
+ cpi->twopass.section_max_qfactor *
+ (double)bits_per_mb_at_this_q);
/* Mode and motion overhead */
/* As Q rises in real encode loop rd code will force overhead down
@@ -1086,9 +1086,8 @@ static int estimate_cq(VP8_COMP *cpi, FIRSTPASS_STATS *fpstats,
vp8_bits_per_mb[INTER_FRAME][Q] + overhead_bits_per_mb;
bits_per_mb_at_this_q =
- (int)(.5 +
- err_correction_factor * speed_correction * clip_iifactor *
- (double)bits_per_mb_at_this_q);
+ (int)(.5 + err_correction_factor * speed_correction * clip_iifactor *
+ (double)bits_per_mb_at_this_q);
/* Mode and motion overhead */
/* As Q rises in real encode loop rd code will force overhead down
@@ -1273,9 +1272,8 @@ void vp8_init_second_pass(VP8_COMP *cpi) {
* sum duration is not. Its calculated based on the actual durations of
* all frames from the first pass.
*/
- vp8_new_framerate(cpi,
- 10000000.0 * cpi->twopass.total_stats.count /
- cpi->twopass.total_stats.duration);
+ vp8_new_framerate(cpi, 10000000.0 * cpi->twopass.total_stats.count /
+ cpi->twopass.total_stats.duration);
cpi->output_framerate = cpi->framerate;
cpi->twopass.bits_left = (int64_t)(cpi->twopass.total_stats.duration *
@@ -1739,10 +1737,11 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) {
/* Dont break out very close to a key frame */
((cpi->twopass.frames_to_key - i) >= MIN_GF_INTERVAL) &&
((boost_score > 20.0) || (next_frame.pcnt_inter < 0.75)) &&
- (!flash_detected) && ((mv_ratio_accumulator > 100.0) ||
- (abs_mv_in_out_accumulator > 3.0) ||
- (mv_in_out_accumulator < -2.0) ||
- ((boost_score - old_boost_score) < 2.0)))) {
+ (!flash_detected) &&
+ ((mv_ratio_accumulator > 100.0) ||
+ (abs_mv_in_out_accumulator > 3.0) ||
+ (mv_in_out_accumulator < -2.0) ||
+ ((boost_score - old_boost_score) < 2.0)))) {
boost_score = old_boost_score;
break;
}
@@ -1815,8 +1814,9 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) {
(next_frame.pcnt_inter > 0.75) &&
((mv_in_out_accumulator / (double)i > -0.2) ||
(mv_in_out_accumulator > -2.0)) &&
- (cpi->gfu_boost > 100) && (cpi->twopass.gf_decay_rate <=
- (ARF_DECAY_THRESH + (cpi->gfu_boost / 200))))
+ (cpi->gfu_boost > 100) &&
+ (cpi->twopass.gf_decay_rate <=
+ (ARF_DECAY_THRESH + (cpi->gfu_boost / 200))))
#endif
{
int Boost;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_if.c b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_if.c
index 3c92be32fee..2da940199cf 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_if.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_if.c
@@ -2862,7 +2862,6 @@ void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame)
fclose(yframe);
}
#endif
-/* return of 0 means drop frame */
#if !CONFIG_REALTIME_ONLY
/* Function to test for conditions that indeicate we should loop
@@ -3364,11 +3363,6 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size,
(LOWER_RES_FRAME_INFO *)cpi->oxcf.mr_low_res_mode_info;
if (cpi->oxcf.mr_encoder_id) {
- // TODO(marpan): This constraint shouldn't be needed, as we would like
- // to allow for key frame setting (forced or periodic) defined per
- // spatial layer. For now, keep this in.
- cm->frame_type = low_res_frame_info->frame_type;
-
// Check if lower resolution is available for motion vector reuse.
if (cm->frame_type != KEY_FRAME) {
cpi->mr_low_res_mv_avail = 1;
@@ -3393,7 +3387,16 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size,
== low_res_frame_info->low_res_ref_frames[ALTREF_FRAME]);
*/
}
+ // Disable motion vector reuse (i.e., disable any usage of the low_res)
+ // if the previous lower stream is skipped/disabled.
+ if (low_res_frame_info->skip_encoding_prev_stream) {
+ cpi->mr_low_res_mv_avail = 0;
+ }
}
+ // This stream is not skipped (i.e., it's being encoded), so set this skip
+ // flag to 0. This is needed for the next stream (i.e., which is the next
+ // frame to be encoded).
+ low_res_frame_info->skip_encoding_prev_stream = 0;
// On a key frame: For the lowest resolution, keep track of the key frame
// counter value. For the higher resolutions, reset the current video
@@ -4782,8 +4785,6 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size,
cpi->temporal_pattern_counter++;
}
-/* reset to normal state now that we are done. */
-
#if 0
{
char filename[512];
@@ -4999,10 +5000,13 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags,
// be received for that high layer, which will yield an incorrect
// frame rate (from time-stamp adjustment in above calculation).
if (cpi->oxcf.mr_encoder_id) {
- cpi->ref_framerate = low_res_frame_info->low_res_framerate;
+ if (!low_res_frame_info->skip_encoding_base_stream)
+ cpi->ref_framerate = low_res_frame_info->low_res_framerate;
} else {
// Keep track of frame rate for lowest resolution.
low_res_frame_info->low_res_framerate = cpi->ref_framerate;
+ // The base stream is being encoded so set skip flag to 0.
+ low_res_frame_info->skip_encoding_base_stream = 0;
}
}
#endif
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ratectrl.c b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ratectrl.c
index ab9b8fdc8a2..fc833bccc94 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ratectrl.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ratectrl.c
@@ -1052,9 +1052,8 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var) {
* overflow when values are large
*/
projected_size_based_on_q =
- (int)(((.5 +
- rate_correction_factor *
- vp8_bits_per_mb[cpi->common.frame_type][Q]) *
+ (int)(((.5 + rate_correction_factor *
+ vp8_bits_per_mb[cpi->common.frame_type][Q]) *
cpi->common.MBs) /
(1 << BPER_MB_NORMBITS));
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/rdopt.c b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/rdopt.c
index e210b441055..fa5dde15d88 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/rdopt.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/rdopt.c
@@ -770,9 +770,9 @@ static void rd_pick_intra_mbuv_mode(MACROBLOCK *x, int *rate,
vp8_quantize_mbuv(x);
rate_to = rd_cost_mbuv(x);
- this_rate = rate_to +
- x->intra_uv_mode_cost[xd->frame_type]
- [xd->mode_info_context->mbmi.uv_mode];
+ this_rate =
+ rate_to + x->intra_uv_mode_cost[xd->frame_type]
+ [xd->mode_info_context->mbmi.uv_mode];
this_distortion = vp8_mbuverror(x) / 4;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/treewriter.h b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/treewriter.h
index dadbbe3f80d..e76699092bf 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/treewriter.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/treewriter.h
@@ -56,8 +56,7 @@ static INLINE unsigned int vp8_cost_branch(const unsigned int ct[2],
static void vp8_treed_write(vp8_writer *const w, vp8_tree t,
const vp8_prob *const p, int v,
- int n /* number of bits in v, assumed nonzero */
- ) {
+ int n) { /* number of bits in v, assumed nonzero */
vp8_tree_index i = 0;
do {
@@ -73,8 +72,7 @@ static INLINE void vp8_write_token(vp8_writer *const w, vp8_tree t,
}
static int vp8_treed_cost(vp8_tree t, const vp8_prob *const p, int v,
- int n /* number of bits in v, assumed nonzero */
- ) {
+ int n) { /* number of bits in v, assumed nonzero */
int c = 0;
vp8_tree_index i = 0;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/vp8_cx_iface.c b/chromium/third_party/libvpx/source/libvpx/vp8/vp8_cx_iface.c
index 8f0a782bca5..d3e20059410 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/vp8_cx_iface.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/vp8_cx_iface.c
@@ -802,7 +802,20 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx,
unsigned long deadline) {
vpx_codec_err_t res = VPX_CODEC_OK;
- if (!ctx->cfg.rc_target_bitrate) return res;
+ if (!ctx->cfg.rc_target_bitrate) {
+#if CONFIG_MULTI_RES_ENCODING
+ if (!ctx->cpi) return VPX_CODEC_ERROR;
+ if (ctx->cpi->oxcf.mr_total_resolutions > 1) {
+ LOWER_RES_FRAME_INFO *low_res_frame_info =
+ (LOWER_RES_FRAME_INFO *)ctx->cpi->oxcf.mr_low_res_mode_info;
+ if (!low_res_frame_info) return VPX_CODEC_ERROR;
+ low_res_frame_info->skip_encoding_prev_stream = 1;
+ if (ctx->cpi->oxcf.mr_encoder_id == 0)
+ low_res_frame_info->skip_encoding_base_stream = 1;
+ }
+#endif
+ return res;
+ }
if (img) res = validate_img(ctx, img);
@@ -902,8 +915,8 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx,
(unsigned long)((delta * ctx->cfg.g_timebase.den + round) /
ctx->cfg.g_timebase.num / 10000000);
pkt.data.frame.flags = lib_flags << 16;
- pkt.data.frame.width = cpi->common.Width;
- pkt.data.frame.height = cpi->common.Height;
+ pkt.data.frame.width[0] = cpi->common.Width;
+ pkt.data.frame.height[0] = cpi->common.Height;
if (lib_flags & FRAMEFLAGS_KEY) {
pkt.data.frame.flags |= VPX_FRAME_IS_KEY;
@@ -1261,6 +1274,9 @@ CODEC_INTERFACE(vpx_codec_vp8_cx) = {
vp8e_usage_cfg_map, /* vpx_codec_enc_cfg_map_t cfg_maps; */
vp8e_encode, /* vpx_codec_encode_fn_t encode; */
vp8e_get_cxdata, /* vpx_codec_get_cx_data_fn_t get_cx_data; */
- vp8e_set_config, NULL, vp8e_get_preview, vp8e_mr_alloc_mem,
+ vp8e_set_config,
+ NULL,
+ vp8e_get_preview,
+ vp8e_mr_alloc_mem,
} /* encoder functions */
};
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_highbd_iht4x4_add_neon.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_highbd_iht4x4_add_neon.c
new file mode 100644
index 00000000000..46284238d61
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_highbd_iht4x4_add_neon.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+#include <assert.h>
+
+#include "./vp9_rtcd.h"
+#include "./vpx_config.h"
+#include "vp9/common/vp9_common.h"
+#include "vp9/common/arm/neon/vp9_iht_neon.h"
+#include "vpx_dsp/arm/highbd_idct_neon.h"
+#include "vpx_dsp/arm/idct_neon.h"
+#include "vpx_dsp/arm/mem_neon.h"
+#include "vpx_dsp/txfm_common.h"
+
+static INLINE void highbd_iadst4(int32x4_t *const io) {
+ const int32_t sinpis[4] = { sinpi_1_9, sinpi_2_9, sinpi_3_9, sinpi_4_9 };
+ const int32x4_t sinpi = vld1q_s32(sinpis);
+ int32x4_t s[8];
+
+ s[0] = vmulq_lane_s32(io[0], vget_low_s32(sinpi), 0);
+ s[1] = vmulq_lane_s32(io[0], vget_low_s32(sinpi), 1);
+ s[2] = vmulq_lane_s32(io[1], vget_high_s32(sinpi), 0);
+ s[3] = vmulq_lane_s32(io[2], vget_high_s32(sinpi), 1);
+ s[4] = vmulq_lane_s32(io[2], vget_low_s32(sinpi), 0);
+ s[5] = vmulq_lane_s32(io[3], vget_low_s32(sinpi), 1);
+ s[6] = vmulq_lane_s32(io[3], vget_high_s32(sinpi), 1);
+ s[7] = vsubq_s32(io[0], io[2]);
+ s[7] = vaddq_s32(s[7], io[3]);
+
+ s[0] = vaddq_s32(s[0], s[3]);
+ s[0] = vaddq_s32(s[0], s[5]);
+ s[1] = vsubq_s32(s[1], s[4]);
+ s[1] = vsubq_s32(s[1], s[6]);
+ s[3] = s[2];
+ s[2] = vmulq_lane_s32(s[7], vget_high_s32(sinpi), 0);
+
+ io[0] = vaddq_s32(s[0], s[3]);
+ io[1] = vaddq_s32(s[1], s[3]);
+ io[2] = s[2];
+ io[3] = vaddq_s32(s[0], s[1]);
+ io[3] = vsubq_s32(io[3], s[3]);
+ io[0] = vrshrq_n_s32(io[0], DCT_CONST_BITS);
+ io[1] = vrshrq_n_s32(io[1], DCT_CONST_BITS);
+ io[2] = vrshrq_n_s32(io[2], DCT_CONST_BITS);
+ io[3] = vrshrq_n_s32(io[3], DCT_CONST_BITS);
+}
+
+void vp9_highbd_iht4x4_16_add_neon(const tran_low_t *input, uint16_t *dest,
+ int stride, int tx_type, int bd) {
+ const int16x8_t max = vdupq_n_s16((1 << bd) - 1);
+ int16x8_t a[2];
+ int32x4_t c[4];
+
+ c[0] = vld1q_s32(input);
+ c[1] = vld1q_s32(input + 4);
+ c[2] = vld1q_s32(input + 8);
+ c[3] = vld1q_s32(input + 12);
+
+ if (bd == 8) {
+ a[0] = vcombine_s16(vmovn_s32(c[0]), vmovn_s32(c[1]));
+ a[1] = vcombine_s16(vmovn_s32(c[2]), vmovn_s32(c[3]));
+ transpose_s16_4x4q(&a[0], &a[1]);
+
+ switch (tx_type) {
+ case DCT_DCT:
+ idct4x4_16_kernel_bd8(a);
+ a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
+ transpose_s16_4x4q(&a[0], &a[1]);
+ idct4x4_16_kernel_bd8(a);
+ a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
+ break;
+
+ case ADST_DCT:
+ idct4x4_16_kernel_bd8(a);
+ a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
+ transpose_s16_4x4q(&a[0], &a[1]);
+ iadst4(a);
+ break;
+
+ case DCT_ADST:
+ iadst4(a);
+ transpose_s16_4x4q(&a[0], &a[1]);
+ idct4x4_16_kernel_bd8(a);
+ a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
+ break;
+
+ default:
+ assert(tx_type == ADST_ADST);
+ iadst4(a);
+ transpose_s16_4x4q(&a[0], &a[1]);
+ iadst4(a);
+ break;
+ }
+ a[0] = vrshrq_n_s16(a[0], 4);
+ a[1] = vrshrq_n_s16(a[1], 4);
+ } else {
+ switch (tx_type) {
+ case DCT_DCT: {
+ const int32x4_t cospis = vld1q_s32(kCospi32);
+
+ if (bd == 10) {
+ idct4x4_16_kernel_bd10(cospis, c);
+ idct4x4_16_kernel_bd10(cospis, c);
+ } else {
+ idct4x4_16_kernel_bd12(cospis, c);
+ idct4x4_16_kernel_bd12(cospis, c);
+ }
+ break;
+ }
+
+ case ADST_DCT: {
+ const int32x4_t cospis = vld1q_s32(kCospi32);
+
+ if (bd == 10) {
+ idct4x4_16_kernel_bd10(cospis, c);
+ } else {
+ idct4x4_16_kernel_bd12(cospis, c);
+ }
+ transpose_s32_4x4(&c[0], &c[1], &c[2], &c[3]);
+ highbd_iadst4(c);
+ break;
+ }
+
+ case DCT_ADST: {
+ const int32x4_t cospis = vld1q_s32(kCospi32);
+
+ transpose_s32_4x4(&c[0], &c[1], &c[2], &c[3]);
+ highbd_iadst4(c);
+ if (bd == 10) {
+ idct4x4_16_kernel_bd10(cospis, c);
+ } else {
+ idct4x4_16_kernel_bd12(cospis, c);
+ }
+ break;
+ }
+
+ default: {
+ assert(tx_type == ADST_ADST);
+ transpose_s32_4x4(&c[0], &c[1], &c[2], &c[3]);
+ highbd_iadst4(c);
+ transpose_s32_4x4(&c[0], &c[1], &c[2], &c[3]);
+ highbd_iadst4(c);
+ break;
+ }
+ }
+ a[0] = vcombine_s16(vqrshrn_n_s32(c[0], 4), vqrshrn_n_s32(c[1], 4));
+ a[1] = vcombine_s16(vqrshrn_n_s32(c[2], 4), vqrshrn_n_s32(c[3], 4));
+ }
+
+ highbd_idct4x4_1_add_kernel1(&dest, stride, a[0], max);
+ highbd_idct4x4_1_add_kernel1(&dest, stride, a[1], max);
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c
index 025254c3f33..4f0a90f2156 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c
@@ -14,206 +14,63 @@
#include "./vp9_rtcd.h"
#include "./vpx_config.h"
#include "vp9/common/vp9_common.h"
+#include "vp9/common/arm/neon/vp9_iht_neon.h"
+#include "vpx_dsp/arm/idct_neon.h"
+#include "vpx_dsp/arm/mem_neon.h"
#include "vpx_dsp/txfm_common.h"
-static INLINE void TRANSPOSE4X4(int16x8_t *q8s16, int16x8_t *q9s16) {
- int32x4_t q8s32, q9s32;
- int16x4x2_t d0x2s16, d1x2s16;
- int32x4x2_t q0x2s32;
-
- d0x2s16 = vtrn_s16(vget_low_s16(*q8s16), vget_high_s16(*q8s16));
- d1x2s16 = vtrn_s16(vget_low_s16(*q9s16), vget_high_s16(*q9s16));
-
- q8s32 = vreinterpretq_s32_s16(vcombine_s16(d0x2s16.val[0], d0x2s16.val[1]));
- q9s32 = vreinterpretq_s32_s16(vcombine_s16(d1x2s16.val[0], d1x2s16.val[1]));
- q0x2s32 = vtrnq_s32(q8s32, q9s32);
-
- *q8s16 = vreinterpretq_s16_s32(q0x2s32.val[0]);
- *q9s16 = vreinterpretq_s16_s32(q0x2s32.val[1]);
-}
-
-static INLINE void GENERATE_COSINE_CONSTANTS(int16x4_t *d0s16, int16x4_t *d1s16,
- int16x4_t *d2s16) {
- *d0s16 = vdup_n_s16(cospi_8_64);
- *d1s16 = vdup_n_s16(cospi_16_64);
- *d2s16 = vdup_n_s16(cospi_24_64);
-}
-
-static INLINE void GENERATE_SINE_CONSTANTS(int16x4_t *d3s16, int16x4_t *d4s16,
- int16x4_t *d5s16, int16x8_t *q3s16) {
- *d3s16 = vdup_n_s16(sinpi_1_9);
- *d4s16 = vdup_n_s16(sinpi_2_9);
- *q3s16 = vdupq_n_s16(sinpi_3_9);
- *d5s16 = vdup_n_s16(sinpi_4_9);
-}
-
-static INLINE void IDCT4x4_1D(int16x4_t *d0s16, int16x4_t *d1s16,
- int16x4_t *d2s16, int16x8_t *q8s16,
- int16x8_t *q9s16) {
- int16x4_t d16s16, d17s16, d18s16, d19s16, d23s16, d24s16;
- int16x4_t d26s16, d27s16, d28s16, d29s16;
- int32x4_t q10s32, q13s32, q14s32, q15s32;
- int16x8_t q13s16, q14s16;
-
- d16s16 = vget_low_s16(*q8s16);
- d17s16 = vget_high_s16(*q8s16);
- d18s16 = vget_low_s16(*q9s16);
- d19s16 = vget_high_s16(*q9s16);
-
- d23s16 = vadd_s16(d16s16, d18s16);
- d24s16 = vsub_s16(d16s16, d18s16);
-
- q15s32 = vmull_s16(d17s16, *d2s16);
- q10s32 = vmull_s16(d17s16, *d0s16);
- q13s32 = vmull_s16(d23s16, *d1s16);
- q14s32 = vmull_s16(d24s16, *d1s16);
- q15s32 = vmlsl_s16(q15s32, d19s16, *d0s16);
- q10s32 = vmlal_s16(q10s32, d19s16, *d2s16);
-
- d26s16 = vrshrn_n_s32(q13s32, 14);
- d27s16 = vrshrn_n_s32(q14s32, 14);
- d29s16 = vrshrn_n_s32(q15s32, 14);
- d28s16 = vrshrn_n_s32(q10s32, 14);
-
- q13s16 = vcombine_s16(d26s16, d27s16);
- q14s16 = vcombine_s16(d28s16, d29s16);
- *q8s16 = vaddq_s16(q13s16, q14s16);
- *q9s16 = vsubq_s16(q13s16, q14s16);
- *q9s16 = vcombine_s16(vget_high_s16(*q9s16), vget_low_s16(*q9s16)); // vswp
-}
-
-static INLINE void IADST4x4_1D(int16x4_t *d3s16, int16x4_t *d4s16,
- int16x4_t *d5s16, int16x8_t *q3s16,
- int16x8_t *q8s16, int16x8_t *q9s16) {
- int16x4_t d6s16, d16s16, d17s16, d18s16, d19s16;
- int32x4_t q8s32, q9s32, q10s32, q11s32, q12s32, q13s32, q14s32, q15s32;
-
- d6s16 = vget_low_s16(*q3s16);
-
- d16s16 = vget_low_s16(*q8s16);
- d17s16 = vget_high_s16(*q8s16);
- d18s16 = vget_low_s16(*q9s16);
- d19s16 = vget_high_s16(*q9s16);
-
- q10s32 = vmull_s16(*d3s16, d16s16);
- q11s32 = vmull_s16(*d4s16, d16s16);
- q12s32 = vmull_s16(d6s16, d17s16);
- q13s32 = vmull_s16(*d5s16, d18s16);
- q14s32 = vmull_s16(*d3s16, d18s16);
- q15s32 = vmovl_s16(d16s16);
- q15s32 = vaddw_s16(q15s32, d19s16);
- q8s32 = vmull_s16(*d4s16, d19s16);
- q15s32 = vsubw_s16(q15s32, d18s16);
- q9s32 = vmull_s16(*d5s16, d19s16);
-
- q10s32 = vaddq_s32(q10s32, q13s32);
- q10s32 = vaddq_s32(q10s32, q8s32);
- q11s32 = vsubq_s32(q11s32, q14s32);
- q8s32 = vdupq_n_s32(sinpi_3_9);
- q11s32 = vsubq_s32(q11s32, q9s32);
- q15s32 = vmulq_s32(q15s32, q8s32);
-
- q13s32 = vaddq_s32(q10s32, q12s32);
- q10s32 = vaddq_s32(q10s32, q11s32);
- q14s32 = vaddq_s32(q11s32, q12s32);
- q10s32 = vsubq_s32(q10s32, q12s32);
-
- d16s16 = vrshrn_n_s32(q13s32, 14);
- d17s16 = vrshrn_n_s32(q14s32, 14);
- d18s16 = vrshrn_n_s32(q15s32, 14);
- d19s16 = vrshrn_n_s32(q10s32, 14);
-
- *q8s16 = vcombine_s16(d16s16, d17s16);
- *q9s16 = vcombine_s16(d18s16, d19s16);
-}
-
void vp9_iht4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) {
- uint8x8_t d26u8, d27u8;
- int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16;
- uint32x2_t d26u32, d27u32;
- int16x8_t q3s16, q8s16, q9s16;
- uint16x8_t q8u16, q9u16;
-
- d26u32 = d27u32 = vdup_n_u32(0);
+ int16x8_t a[2];
+ uint8x8_t s[2], d[2];
+ uint16x8_t sum[2];
- q8s16 = vld1q_s16(input);
- q9s16 = vld1q_s16(input + 8);
+ assert(!((intptr_t)dest % sizeof(uint32_t)));
+ assert(!(stride % sizeof(uint32_t)));
- TRANSPOSE4X4(&q8s16, &q9s16);
+ a[0] = load_tran_low_to_s16q(input);
+ a[1] = load_tran_low_to_s16q(input + 8);
+ transpose_s16_4x4q(&a[0], &a[1]);
switch (tx_type) {
- case 0: // idct_idct is not supported. Fall back to C
- vp9_iht4x4_16_add_c(input, dest, stride, tx_type);
- return;
- case 1: // iadst_idct
- // generate constants
- GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16);
- GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16);
-
- // first transform rows
- IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16);
-
- // transpose the matrix
- TRANSPOSE4X4(&q8s16, &q9s16);
-
- // then transform columns
- IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
+ case DCT_DCT:
+ idct4x4_16_kernel_bd8(a);
+ a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
+ transpose_s16_4x4q(&a[0], &a[1]);
+ idct4x4_16_kernel_bd8(a);
+ a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
break;
- case 2: // idct_iadst
- // generate constantsyy
- GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16);
- GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16);
- // first transform rows
- IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
-
- // transpose the matrix
- TRANSPOSE4X4(&q8s16, &q9s16);
-
- // then transform columns
- IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16);
+ case ADST_DCT:
+ idct4x4_16_kernel_bd8(a);
+ a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
+ transpose_s16_4x4q(&a[0], &a[1]);
+ iadst4(a);
break;
- case 3: // iadst_iadst
- // generate constants
- GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16);
-
- // first transform rows
- IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
- // transpose the matrix
- TRANSPOSE4X4(&q8s16, &q9s16);
-
- // then transform columns
- IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
+ case DCT_ADST:
+ iadst4(a);
+ transpose_s16_4x4q(&a[0], &a[1]);
+ idct4x4_16_kernel_bd8(a);
+ a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
break;
- default: // iadst_idct
- assert(0);
+
+ default:
+ assert(tx_type == ADST_ADST);
+ iadst4(a);
+ transpose_s16_4x4q(&a[0], &a[1]);
+ iadst4(a);
break;
}
- q8s16 = vrshrq_n_s16(q8s16, 4);
- q9s16 = vrshrq_n_s16(q9s16, 4);
-
- d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 0);
- dest += stride;
- d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 1);
- dest += stride;
- d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 0);
- dest += stride;
- d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 1);
-
- q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u32(d26u32));
- q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u32(d27u32));
-
- d26u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16));
- d27u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16));
-
- vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 1);
- dest -= stride;
- vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 0);
- dest -= stride;
- vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 1);
- dest -= stride;
- vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 0);
+ a[0] = vrshrq_n_s16(a[0], 4);
+ a[1] = vrshrq_n_s16(a[1], 4);
+ s[0] = load_u8(dest, stride);
+ s[1] = load_u8(dest + 2 * stride, stride);
+ sum[0] = vaddw_u8(vreinterpretq_u16_s16(a[0]), s[0]);
+ sum[1] = vaddw_u8(vreinterpretq_u16_s16(a[1]), s[1]);
+ d[0] = vqmovun_s16(vreinterpretq_s16_u16(sum[0]));
+ d[1] = vqmovun_s16(vreinterpretq_s16_u16(sum[1]));
+ store_u8(dest, stride, d[0]);
+ store_u8(dest + 2 * stride, stride, d[1]);
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c
index 1c739861c38..19163bc8780 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c
@@ -14,527 +14,199 @@
#include "./vp9_rtcd.h"
#include "./vpx_config.h"
#include "vp9/common/vp9_common.h"
+#include "vpx_dsp/arm/idct_neon.h"
+#include "vpx_dsp/arm/mem_neon.h"
#include "vpx_dsp/arm/transpose_neon.h"
-static int16_t cospi_2_64 = 16305;
-static int16_t cospi_4_64 = 16069;
-static int16_t cospi_6_64 = 15679;
-static int16_t cospi_8_64 = 15137;
-static int16_t cospi_10_64 = 14449;
-static int16_t cospi_12_64 = 13623;
-static int16_t cospi_14_64 = 12665;
-static int16_t cospi_16_64 = 11585;
-static int16_t cospi_18_64 = 10394;
-static int16_t cospi_20_64 = 9102;
-static int16_t cospi_22_64 = 7723;
-static int16_t cospi_24_64 = 6270;
-static int16_t cospi_26_64 = 4756;
-static int16_t cospi_28_64 = 3196;
-static int16_t cospi_30_64 = 1606;
-
-static INLINE void IDCT8x8_1D(int16x8_t *q8s16, int16x8_t *q9s16,
- int16x8_t *q10s16, int16x8_t *q11s16,
- int16x8_t *q12s16, int16x8_t *q13s16,
- int16x8_t *q14s16, int16x8_t *q15s16) {
- int16x4_t d0s16, d1s16, d2s16, d3s16;
- int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16;
- int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16;
- int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16;
- int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16;
- int32x4_t q2s32, q3s32, q5s32, q6s32, q8s32, q9s32;
- int32x4_t q10s32, q11s32, q12s32, q13s32, q15s32;
-
- d0s16 = vdup_n_s16(cospi_28_64);
- d1s16 = vdup_n_s16(cospi_4_64);
- d2s16 = vdup_n_s16(cospi_12_64);
- d3s16 = vdup_n_s16(cospi_20_64);
-
- d16s16 = vget_low_s16(*q8s16);
- d17s16 = vget_high_s16(*q8s16);
- d18s16 = vget_low_s16(*q9s16);
- d19s16 = vget_high_s16(*q9s16);
- d20s16 = vget_low_s16(*q10s16);
- d21s16 = vget_high_s16(*q10s16);
- d22s16 = vget_low_s16(*q11s16);
- d23s16 = vget_high_s16(*q11s16);
- d24s16 = vget_low_s16(*q12s16);
- d25s16 = vget_high_s16(*q12s16);
- d26s16 = vget_low_s16(*q13s16);
- d27s16 = vget_high_s16(*q13s16);
- d28s16 = vget_low_s16(*q14s16);
- d29s16 = vget_high_s16(*q14s16);
- d30s16 = vget_low_s16(*q15s16);
- d31s16 = vget_high_s16(*q15s16);
-
- q2s32 = vmull_s16(d18s16, d0s16);
- q3s32 = vmull_s16(d19s16, d0s16);
- q5s32 = vmull_s16(d26s16, d2s16);
- q6s32 = vmull_s16(d27s16, d2s16);
-
- q2s32 = vmlsl_s16(q2s32, d30s16, d1s16);
- q3s32 = vmlsl_s16(q3s32, d31s16, d1s16);
- q5s32 = vmlsl_s16(q5s32, d22s16, d3s16);
- q6s32 = vmlsl_s16(q6s32, d23s16, d3s16);
-
- d8s16 = vrshrn_n_s32(q2s32, 14);
- d9s16 = vrshrn_n_s32(q3s32, 14);
- d10s16 = vrshrn_n_s32(q5s32, 14);
- d11s16 = vrshrn_n_s32(q6s32, 14);
- q4s16 = vcombine_s16(d8s16, d9s16);
- q5s16 = vcombine_s16(d10s16, d11s16);
-
- q2s32 = vmull_s16(d18s16, d1s16);
- q3s32 = vmull_s16(d19s16, d1s16);
- q9s32 = vmull_s16(d26s16, d3s16);
- q13s32 = vmull_s16(d27s16, d3s16);
-
- q2s32 = vmlal_s16(q2s32, d30s16, d0s16);
- q3s32 = vmlal_s16(q3s32, d31s16, d0s16);
- q9s32 = vmlal_s16(q9s32, d22s16, d2s16);
- q13s32 = vmlal_s16(q13s32, d23s16, d2s16);
-
- d14s16 = vrshrn_n_s32(q2s32, 14);
- d15s16 = vrshrn_n_s32(q3s32, 14);
- d12s16 = vrshrn_n_s32(q9s32, 14);
- d13s16 = vrshrn_n_s32(q13s32, 14);
- q6s16 = vcombine_s16(d12s16, d13s16);
- q7s16 = vcombine_s16(d14s16, d15s16);
-
- d0s16 = vdup_n_s16(cospi_16_64);
-
- q2s32 = vmull_s16(d16s16, d0s16);
- q3s32 = vmull_s16(d17s16, d0s16);
- q13s32 = vmull_s16(d16s16, d0s16);
- q15s32 = vmull_s16(d17s16, d0s16);
-
- q2s32 = vmlal_s16(q2s32, d24s16, d0s16);
- q3s32 = vmlal_s16(q3s32, d25s16, d0s16);
- q13s32 = vmlsl_s16(q13s32, d24s16, d0s16);
- q15s32 = vmlsl_s16(q15s32, d25s16, d0s16);
-
- d0s16 = vdup_n_s16(cospi_24_64);
- d1s16 = vdup_n_s16(cospi_8_64);
-
- d18s16 = vrshrn_n_s32(q2s32, 14);
- d19s16 = vrshrn_n_s32(q3s32, 14);
- d22s16 = vrshrn_n_s32(q13s32, 14);
- d23s16 = vrshrn_n_s32(q15s32, 14);
- *q9s16 = vcombine_s16(d18s16, d19s16);
- *q11s16 = vcombine_s16(d22s16, d23s16);
-
- q2s32 = vmull_s16(d20s16, d0s16);
- q3s32 = vmull_s16(d21s16, d0s16);
- q8s32 = vmull_s16(d20s16, d1s16);
- q12s32 = vmull_s16(d21s16, d1s16);
-
- q2s32 = vmlsl_s16(q2s32, d28s16, d1s16);
- q3s32 = vmlsl_s16(q3s32, d29s16, d1s16);
- q8s32 = vmlal_s16(q8s32, d28s16, d0s16);
- q12s32 = vmlal_s16(q12s32, d29s16, d0s16);
-
- d26s16 = vrshrn_n_s32(q2s32, 14);
- d27s16 = vrshrn_n_s32(q3s32, 14);
- d30s16 = vrshrn_n_s32(q8s32, 14);
- d31s16 = vrshrn_n_s32(q12s32, 14);
- *q13s16 = vcombine_s16(d26s16, d27s16);
- *q15s16 = vcombine_s16(d30s16, d31s16);
-
- q0s16 = vaddq_s16(*q9s16, *q15s16);
- q1s16 = vaddq_s16(*q11s16, *q13s16);
- q2s16 = vsubq_s16(*q11s16, *q13s16);
- q3s16 = vsubq_s16(*q9s16, *q15s16);
-
- *q13s16 = vsubq_s16(q4s16, q5s16);
- q4s16 = vaddq_s16(q4s16, q5s16);
- *q14s16 = vsubq_s16(q7s16, q6s16);
- q7s16 = vaddq_s16(q7s16, q6s16);
- d26s16 = vget_low_s16(*q13s16);
- d27s16 = vget_high_s16(*q13s16);
- d28s16 = vget_low_s16(*q14s16);
- d29s16 = vget_high_s16(*q14s16);
-
- d16s16 = vdup_n_s16(cospi_16_64);
-
- q9s32 = vmull_s16(d28s16, d16s16);
- q10s32 = vmull_s16(d29s16, d16s16);
- q11s32 = vmull_s16(d28s16, d16s16);
- q12s32 = vmull_s16(d29s16, d16s16);
-
- q9s32 = vmlsl_s16(q9s32, d26s16, d16s16);
- q10s32 = vmlsl_s16(q10s32, d27s16, d16s16);
- q11s32 = vmlal_s16(q11s32, d26s16, d16s16);
- q12s32 = vmlal_s16(q12s32, d27s16, d16s16);
-
- d10s16 = vrshrn_n_s32(q9s32, 14);
- d11s16 = vrshrn_n_s32(q10s32, 14);
- d12s16 = vrshrn_n_s32(q11s32, 14);
- d13s16 = vrshrn_n_s32(q12s32, 14);
- q5s16 = vcombine_s16(d10s16, d11s16);
- q6s16 = vcombine_s16(d12s16, d13s16);
-
- *q8s16 = vaddq_s16(q0s16, q7s16);
- *q9s16 = vaddq_s16(q1s16, q6s16);
- *q10s16 = vaddq_s16(q2s16, q5s16);
- *q11s16 = vaddq_s16(q3s16, q4s16);
- *q12s16 = vsubq_s16(q3s16, q4s16);
- *q13s16 = vsubq_s16(q2s16, q5s16);
- *q14s16 = vsubq_s16(q1s16, q6s16);
- *q15s16 = vsubq_s16(q0s16, q7s16);
+static INLINE void iadst_half_butterfly_neon(int16x8_t *const x,
+ const int16x4_t c) {
+ const int16x8_t sum = vaddq_s16(x[0], x[1]);
+ const int16x8_t sub = vsubq_s16(x[0], x[1]);
+ int32x4_t t0[2], t1[2];
+
+ t0[0] = vmull_lane_s16(vget_low_s16(sum), c, 0);
+ t0[1] = vmull_lane_s16(vget_high_s16(sum), c, 0);
+ t1[0] = vmull_lane_s16(vget_low_s16(sub), c, 0);
+ t1[1] = vmull_lane_s16(vget_high_s16(sub), c, 0);
+ x[0] = dct_const_round_shift_low_8(t0);
+ x[1] = dct_const_round_shift_low_8(t1);
}
-static INLINE void IADST8X8_1D(int16x8_t *q8s16, int16x8_t *q9s16,
- int16x8_t *q10s16, int16x8_t *q11s16,
- int16x8_t *q12s16, int16x8_t *q13s16,
- int16x8_t *q14s16, int16x8_t *q15s16) {
- int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16;
- int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16;
- int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16;
- int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16;
- int16x8_t q2s16, q4s16, q5s16, q6s16;
- int32x4_t q0s32, q1s32, q2s32, q3s32, q4s32, q5s32, q6s32, q7s32, q8s32;
- int32x4_t q9s32, q10s32, q11s32, q12s32, q13s32, q14s32, q15s32;
-
- d16s16 = vget_low_s16(*q8s16);
- d17s16 = vget_high_s16(*q8s16);
- d18s16 = vget_low_s16(*q9s16);
- d19s16 = vget_high_s16(*q9s16);
- d20s16 = vget_low_s16(*q10s16);
- d21s16 = vget_high_s16(*q10s16);
- d22s16 = vget_low_s16(*q11s16);
- d23s16 = vget_high_s16(*q11s16);
- d24s16 = vget_low_s16(*q12s16);
- d25s16 = vget_high_s16(*q12s16);
- d26s16 = vget_low_s16(*q13s16);
- d27s16 = vget_high_s16(*q13s16);
- d28s16 = vget_low_s16(*q14s16);
- d29s16 = vget_high_s16(*q14s16);
- d30s16 = vget_low_s16(*q15s16);
- d31s16 = vget_high_s16(*q15s16);
-
- d14s16 = vdup_n_s16(cospi_2_64);
- d15s16 = vdup_n_s16(cospi_30_64);
-
- q1s32 = vmull_s16(d30s16, d14s16);
- q2s32 = vmull_s16(d31s16, d14s16);
- q3s32 = vmull_s16(d30s16, d15s16);
- q4s32 = vmull_s16(d31s16, d15s16);
-
- d30s16 = vdup_n_s16(cospi_18_64);
- d31s16 = vdup_n_s16(cospi_14_64);
-
- q1s32 = vmlal_s16(q1s32, d16s16, d15s16);
- q2s32 = vmlal_s16(q2s32, d17s16, d15s16);
- q3s32 = vmlsl_s16(q3s32, d16s16, d14s16);
- q4s32 = vmlsl_s16(q4s32, d17s16, d14s16);
-
- q5s32 = vmull_s16(d22s16, d30s16);
- q6s32 = vmull_s16(d23s16, d30s16);
- q7s32 = vmull_s16(d22s16, d31s16);
- q8s32 = vmull_s16(d23s16, d31s16);
-
- q5s32 = vmlal_s16(q5s32, d24s16, d31s16);
- q6s32 = vmlal_s16(q6s32, d25s16, d31s16);
- q7s32 = vmlsl_s16(q7s32, d24s16, d30s16);
- q8s32 = vmlsl_s16(q8s32, d25s16, d30s16);
-
- q11s32 = vaddq_s32(q1s32, q5s32);
- q12s32 = vaddq_s32(q2s32, q6s32);
- q1s32 = vsubq_s32(q1s32, q5s32);
- q2s32 = vsubq_s32(q2s32, q6s32);
-
- d22s16 = vrshrn_n_s32(q11s32, 14);
- d23s16 = vrshrn_n_s32(q12s32, 14);
- *q11s16 = vcombine_s16(d22s16, d23s16);
-
- q12s32 = vaddq_s32(q3s32, q7s32);
- q15s32 = vaddq_s32(q4s32, q8s32);
- q3s32 = vsubq_s32(q3s32, q7s32);
- q4s32 = vsubq_s32(q4s32, q8s32);
-
- d2s16 = vrshrn_n_s32(q1s32, 14);
- d3s16 = vrshrn_n_s32(q2s32, 14);
- d24s16 = vrshrn_n_s32(q12s32, 14);
- d25s16 = vrshrn_n_s32(q15s32, 14);
- d6s16 = vrshrn_n_s32(q3s32, 14);
- d7s16 = vrshrn_n_s32(q4s32, 14);
- *q12s16 = vcombine_s16(d24s16, d25s16);
-
- d0s16 = vdup_n_s16(cospi_10_64);
- d1s16 = vdup_n_s16(cospi_22_64);
- q4s32 = vmull_s16(d26s16, d0s16);
- q5s32 = vmull_s16(d27s16, d0s16);
- q2s32 = vmull_s16(d26s16, d1s16);
- q6s32 = vmull_s16(d27s16, d1s16);
-
- d30s16 = vdup_n_s16(cospi_26_64);
- d31s16 = vdup_n_s16(cospi_6_64);
-
- q4s32 = vmlal_s16(q4s32, d20s16, d1s16);
- q5s32 = vmlal_s16(q5s32, d21s16, d1s16);
- q2s32 = vmlsl_s16(q2s32, d20s16, d0s16);
- q6s32 = vmlsl_s16(q6s32, d21s16, d0s16);
-
- q0s32 = vmull_s16(d18s16, d30s16);
- q13s32 = vmull_s16(d19s16, d30s16);
-
- q0s32 = vmlal_s16(q0s32, d28s16, d31s16);
- q13s32 = vmlal_s16(q13s32, d29s16, d31s16);
-
- q10s32 = vmull_s16(d18s16, d31s16);
- q9s32 = vmull_s16(d19s16, d31s16);
-
- q10s32 = vmlsl_s16(q10s32, d28s16, d30s16);
- q9s32 = vmlsl_s16(q9s32, d29s16, d30s16);
-
- q14s32 = vaddq_s32(q2s32, q10s32);
- q15s32 = vaddq_s32(q6s32, q9s32);
- q2s32 = vsubq_s32(q2s32, q10s32);
- q6s32 = vsubq_s32(q6s32, q9s32);
-
- d28s16 = vrshrn_n_s32(q14s32, 14);
- d29s16 = vrshrn_n_s32(q15s32, 14);
- d4s16 = vrshrn_n_s32(q2s32, 14);
- d5s16 = vrshrn_n_s32(q6s32, 14);
- *q14s16 = vcombine_s16(d28s16, d29s16);
-
- q9s32 = vaddq_s32(q4s32, q0s32);
- q10s32 = vaddq_s32(q5s32, q13s32);
- q4s32 = vsubq_s32(q4s32, q0s32);
- q5s32 = vsubq_s32(q5s32, q13s32);
-
- d30s16 = vdup_n_s16(cospi_8_64);
- d31s16 = vdup_n_s16(cospi_24_64);
-
- d18s16 = vrshrn_n_s32(q9s32, 14);
- d19s16 = vrshrn_n_s32(q10s32, 14);
- d8s16 = vrshrn_n_s32(q4s32, 14);
- d9s16 = vrshrn_n_s32(q5s32, 14);
- *q9s16 = vcombine_s16(d18s16, d19s16);
-
- q5s32 = vmull_s16(d2s16, d30s16);
- q6s32 = vmull_s16(d3s16, d30s16);
- q7s32 = vmull_s16(d2s16, d31s16);
- q0s32 = vmull_s16(d3s16, d31s16);
-
- q5s32 = vmlal_s16(q5s32, d6s16, d31s16);
- q6s32 = vmlal_s16(q6s32, d7s16, d31s16);
- q7s32 = vmlsl_s16(q7s32, d6s16, d30s16);
- q0s32 = vmlsl_s16(q0s32, d7s16, d30s16);
-
- q1s32 = vmull_s16(d4s16, d30s16);
- q3s32 = vmull_s16(d5s16, d30s16);
- q10s32 = vmull_s16(d4s16, d31s16);
- q2s32 = vmull_s16(d5s16, d31s16);
-
- q1s32 = vmlsl_s16(q1s32, d8s16, d31s16);
- q3s32 = vmlsl_s16(q3s32, d9s16, d31s16);
- q10s32 = vmlal_s16(q10s32, d8s16, d30s16);
- q2s32 = vmlal_s16(q2s32, d9s16, d30s16);
-
- *q8s16 = vaddq_s16(*q11s16, *q9s16);
- *q11s16 = vsubq_s16(*q11s16, *q9s16);
- q4s16 = vaddq_s16(*q12s16, *q14s16);
- *q12s16 = vsubq_s16(*q12s16, *q14s16);
-
- q14s32 = vaddq_s32(q5s32, q1s32);
- q15s32 = vaddq_s32(q6s32, q3s32);
- q5s32 = vsubq_s32(q5s32, q1s32);
- q6s32 = vsubq_s32(q6s32, q3s32);
-
- d18s16 = vrshrn_n_s32(q14s32, 14);
- d19s16 = vrshrn_n_s32(q15s32, 14);
- d10s16 = vrshrn_n_s32(q5s32, 14);
- d11s16 = vrshrn_n_s32(q6s32, 14);
- *q9s16 = vcombine_s16(d18s16, d19s16);
-
- q1s32 = vaddq_s32(q7s32, q10s32);
- q3s32 = vaddq_s32(q0s32, q2s32);
- q7s32 = vsubq_s32(q7s32, q10s32);
- q0s32 = vsubq_s32(q0s32, q2s32);
-
- d28s16 = vrshrn_n_s32(q1s32, 14);
- d29s16 = vrshrn_n_s32(q3s32, 14);
- d14s16 = vrshrn_n_s32(q7s32, 14);
- d15s16 = vrshrn_n_s32(q0s32, 14);
- *q14s16 = vcombine_s16(d28s16, d29s16);
-
- d30s16 = vdup_n_s16(cospi_16_64);
-
- d22s16 = vget_low_s16(*q11s16);
- d23s16 = vget_high_s16(*q11s16);
- q2s32 = vmull_s16(d22s16, d30s16);
- q3s32 = vmull_s16(d23s16, d30s16);
- q13s32 = vmull_s16(d22s16, d30s16);
- q1s32 = vmull_s16(d23s16, d30s16);
+static INLINE void iadst_butterfly_lane_0_1_neon(const int16x8_t in0,
+ const int16x8_t in1,
+ const int16x4_t c,
+ int32x4_t *const s0,
+ int32x4_t *const s1) {
+ s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 0);
+ s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 0);
+ s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 1);
+ s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 1);
+
+ s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 1);
+ s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 1);
+ s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 0);
+ s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 0);
+}
- d24s16 = vget_low_s16(*q12s16);
- d25s16 = vget_high_s16(*q12s16);
- q2s32 = vmlal_s16(q2s32, d24s16, d30s16);
- q3s32 = vmlal_s16(q3s32, d25s16, d30s16);
- q13s32 = vmlsl_s16(q13s32, d24s16, d30s16);
- q1s32 = vmlsl_s16(q1s32, d25s16, d30s16);
+static INLINE void iadst_butterfly_lane_2_3_neon(const int16x8_t in0,
+ const int16x8_t in1,
+ const int16x4_t c,
+ int32x4_t *const s0,
+ int32x4_t *const s1) {
+ s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 2);
+ s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 2);
+ s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 3);
+ s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 3);
+
+ s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 3);
+ s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 3);
+ s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 2);
+ s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 2);
+}
- d4s16 = vrshrn_n_s32(q2s32, 14);
- d5s16 = vrshrn_n_s32(q3s32, 14);
- d24s16 = vrshrn_n_s32(q13s32, 14);
- d25s16 = vrshrn_n_s32(q1s32, 14);
- q2s16 = vcombine_s16(d4s16, d5s16);
- *q12s16 = vcombine_s16(d24s16, d25s16);
+static INLINE void iadst_butterfly_lane_3_2_neon(const int16x8_t in0,
+ const int16x8_t in1,
+ const int16x4_t c,
+ int32x4_t *const s0,
+ int32x4_t *const s1) {
+ s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 3);
+ s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 3);
+ s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 2);
+ s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 2);
+
+ s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 2);
+ s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 2);
+ s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 3);
+ s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 3);
+}
- q13s32 = vmull_s16(d10s16, d30s16);
- q1s32 = vmull_s16(d11s16, d30s16);
- q11s32 = vmull_s16(d10s16, d30s16);
- q0s32 = vmull_s16(d11s16, d30s16);
+static INLINE int16x8_t add_dct_const_round_shift_low_8(
+ const int32x4_t *const in0, const int32x4_t *const in1) {
+ int32x4_t sum[2];
- q13s32 = vmlal_s16(q13s32, d14s16, d30s16);
- q1s32 = vmlal_s16(q1s32, d15s16, d30s16);
- q11s32 = vmlsl_s16(q11s32, d14s16, d30s16);
- q0s32 = vmlsl_s16(q0s32, d15s16, d30s16);
+ sum[0] = vaddq_s32(in0[0], in1[0]);
+ sum[1] = vaddq_s32(in0[1], in1[1]);
+ return dct_const_round_shift_low_8(sum);
+}
- d20s16 = vrshrn_n_s32(q13s32, 14);
- d21s16 = vrshrn_n_s32(q1s32, 14);
- d12s16 = vrshrn_n_s32(q11s32, 14);
- d13s16 = vrshrn_n_s32(q0s32, 14);
- *q10s16 = vcombine_s16(d20s16, d21s16);
- q6s16 = vcombine_s16(d12s16, d13s16);
+static INLINE int16x8_t sub_dct_const_round_shift_low_8(
+ const int32x4_t *const in0, const int32x4_t *const in1) {
+ int32x4_t sum[2];
- q5s16 = vdupq_n_s16(0);
+ sum[0] = vsubq_s32(in0[0], in1[0]);
+ sum[1] = vsubq_s32(in0[1], in1[1]);
+ return dct_const_round_shift_low_8(sum);
+}
- *q9s16 = vsubq_s16(q5s16, *q9s16);
- *q11s16 = vsubq_s16(q5s16, q2s16);
- *q13s16 = vsubq_s16(q5s16, q6s16);
- *q15s16 = vsubq_s16(q5s16, q4s16);
+static INLINE void iadst8(int16x8_t *const io) {
+ const int16x4_t c0 =
+ create_s16x4_neon(cospi_2_64, cospi_30_64, cospi_10_64, cospi_22_64);
+ const int16x4_t c1 =
+ create_s16x4_neon(cospi_18_64, cospi_14_64, cospi_26_64, cospi_6_64);
+ const int16x4_t c2 =
+ create_s16x4_neon(cospi_16_64, 0, cospi_8_64, cospi_24_64);
+ int16x8_t x[8], t[4];
+ int32x4_t s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], s7[2];
+
+ x[0] = io[7];
+ x[1] = io[0];
+ x[2] = io[5];
+ x[3] = io[2];
+ x[4] = io[3];
+ x[5] = io[4];
+ x[6] = io[1];
+ x[7] = io[6];
+
+ // stage 1
+ iadst_butterfly_lane_0_1_neon(x[0], x[1], c0, s0, s1);
+ iadst_butterfly_lane_2_3_neon(x[2], x[3], c0, s2, s3);
+ iadst_butterfly_lane_0_1_neon(x[4], x[5], c1, s4, s5);
+ iadst_butterfly_lane_2_3_neon(x[6], x[7], c1, s6, s7);
+
+ x[0] = add_dct_const_round_shift_low_8(s0, s4);
+ x[1] = add_dct_const_round_shift_low_8(s1, s5);
+ x[2] = add_dct_const_round_shift_low_8(s2, s6);
+ x[3] = add_dct_const_round_shift_low_8(s3, s7);
+ x[4] = sub_dct_const_round_shift_low_8(s0, s4);
+ x[5] = sub_dct_const_round_shift_low_8(s1, s5);
+ x[6] = sub_dct_const_round_shift_low_8(s2, s6);
+ x[7] = sub_dct_const_round_shift_low_8(s3, s7);
+
+ // stage 2
+ t[0] = x[0];
+ t[1] = x[1];
+ t[2] = x[2];
+ t[3] = x[3];
+ iadst_butterfly_lane_2_3_neon(x[4], x[5], c2, s4, s5);
+ iadst_butterfly_lane_3_2_neon(x[7], x[6], c2, s7, s6);
+
+ x[0] = vaddq_s16(t[0], t[2]);
+ x[1] = vaddq_s16(t[1], t[3]);
+ x[2] = vsubq_s16(t[0], t[2]);
+ x[3] = vsubq_s16(t[1], t[3]);
+ x[4] = add_dct_const_round_shift_low_8(s4, s6);
+ x[5] = add_dct_const_round_shift_low_8(s5, s7);
+ x[6] = sub_dct_const_round_shift_low_8(s4, s6);
+ x[7] = sub_dct_const_round_shift_low_8(s5, s7);
+
+ // stage 3
+ iadst_half_butterfly_neon(x + 2, c2);
+ iadst_half_butterfly_neon(x + 6, c2);
+
+ io[0] = x[0];
+ io[1] = vnegq_s16(x[4]);
+ io[2] = x[6];
+ io[3] = vnegq_s16(x[2]);
+ io[4] = x[3];
+ io[5] = vnegq_s16(x[7]);
+ io[6] = x[5];
+ io[7] = vnegq_s16(x[1]);
}
void vp9_iht8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) {
- int i;
- uint8_t *d1, *d2;
- uint8x8_t d0u8, d1u8, d2u8, d3u8;
- uint64x1_t d0u64, d1u64, d2u64, d3u64;
- int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16;
- uint16x8_t q8u16, q9u16, q10u16, q11u16;
-
- q8s16 = vld1q_s16(input);
- q9s16 = vld1q_s16(input + 8);
- q10s16 = vld1q_s16(input + 8 * 2);
- q11s16 = vld1q_s16(input + 8 * 3);
- q12s16 = vld1q_s16(input + 8 * 4);
- q13s16 = vld1q_s16(input + 8 * 5);
- q14s16 = vld1q_s16(input + 8 * 6);
- q15s16 = vld1q_s16(input + 8 * 7);
-
- transpose_s16_8x8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
- &q15s16);
+ const int16x8_t cospis = vld1q_s16(kCospi);
+ const int16x4_t cospis0 = vget_low_s16(cospis); // cospi 0, 8, 16, 24
+ const int16x4_t cospis1 = vget_high_s16(cospis); // cospi 4, 12, 20, 28
+ int16x8_t a[8];
+
+ a[0] = load_tran_low_to_s16q(input + 0 * 8);
+ a[1] = load_tran_low_to_s16q(input + 1 * 8);
+ a[2] = load_tran_low_to_s16q(input + 2 * 8);
+ a[3] = load_tran_low_to_s16q(input + 3 * 8);
+ a[4] = load_tran_low_to_s16q(input + 4 * 8);
+ a[5] = load_tran_low_to_s16q(input + 5 * 8);
+ a[6] = load_tran_low_to_s16q(input + 6 * 8);
+ a[7] = load_tran_low_to_s16q(input + 7 * 8);
+
+ transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
switch (tx_type) {
- case 0: // idct_idct is not supported. Fall back to C
- vp9_iht8x8_64_add_c(input, dest, stride, tx_type);
- return;
- case 1: // iadst_idct
- // generate IDCT constants
- // GENERATE_IDCT_CONSTANTS
-
- // first transform rows
- IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
- &q15s16);
-
- // transpose the matrix
- transpose_s16_8x8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16,
- &q14s16, &q15s16);
-
- // generate IADST constants
- // GENERATE_IADST_CONSTANTS
-
- // then transform columns
- IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
- &q15s16);
+ case DCT_DCT:
+ idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a);
+ transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
+ idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a);
break;
- case 2: // idct_iadst
- // generate IADST constants
- // GENERATE_IADST_CONSTANTS
-
- // first transform rows
- IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
- &q15s16);
-
- // transpose the matrix
- transpose_s16_8x8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16,
- &q14s16, &q15s16);
- // generate IDCT constants
- // GENERATE_IDCT_CONSTANTS
-
- // then transform columns
- IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
- &q15s16);
+ case ADST_DCT:
+ idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a);
+ transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
+ iadst8(a);
break;
- case 3: // iadst_iadst
- // generate IADST constants
- // GENERATE_IADST_CONSTANTS
-
- // first transform rows
- IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
- &q15s16);
-
- // transpose the matrix
- transpose_s16_8x8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16,
- &q14s16, &q15s16);
- // then transform columns
- IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
- &q15s16);
+ case DCT_ADST:
+ iadst8(a);
+ transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
+ idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a);
break;
- default: // iadst_idct
- assert(0);
+
+ default:
+ assert(tx_type == ADST_ADST);
+ iadst8(a);
+ transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
+ iadst8(a);
break;
}
- q8s16 = vrshrq_n_s16(q8s16, 5);
- q9s16 = vrshrq_n_s16(q9s16, 5);
- q10s16 = vrshrq_n_s16(q10s16, 5);
- q11s16 = vrshrq_n_s16(q11s16, 5);
- q12s16 = vrshrq_n_s16(q12s16, 5);
- q13s16 = vrshrq_n_s16(q13s16, 5);
- q14s16 = vrshrq_n_s16(q14s16, 5);
- q15s16 = vrshrq_n_s16(q15s16, 5);
-
- for (d1 = d2 = dest, i = 0; i < 2; i++) {
- if (i != 0) {
- q8s16 = q12s16;
- q9s16 = q13s16;
- q10s16 = q14s16;
- q11s16 = q15s16;
- }
-
- d0u64 = vld1_u64((uint64_t *)d1);
- d1 += stride;
- d1u64 = vld1_u64((uint64_t *)d1);
- d1 += stride;
- d2u64 = vld1_u64((uint64_t *)d1);
- d1 += stride;
- d3u64 = vld1_u64((uint64_t *)d1);
- d1 += stride;
-
- q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u64(d0u64));
- q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u64(d1u64));
- q10u16 =
- vaddw_u8(vreinterpretq_u16_s16(q10s16), vreinterpret_u8_u64(d2u64));
- q11u16 =
- vaddw_u8(vreinterpretq_u16_s16(q11s16), vreinterpret_u8_u64(d3u64));
-
- d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16));
- d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16));
- d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16));
- d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16));
-
- vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8));
- d2 += stride;
- vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8));
- d2 += stride;
- vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8));
- d2 += stride;
- vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8));
- d2 += stride;
- }
+ idct8x8_add8x8_neon(a, dest, stride);
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_iht_neon.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_iht_neon.h
new file mode 100644
index 00000000000..08daa1a4a65
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/arm/neon/vp9_iht_neon.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VP9_COMMON_ARM_NEON_VP9_IHT_NEON_H_
+#define VP9_COMMON_ARM_NEON_VP9_IHT_NEON_H_
+
+#include <arm_neon.h>
+
+#include "./vp9_rtcd.h"
+#include "./vpx_config.h"
+#include "vp9/common/vp9_common.h"
+#include "vpx_dsp/arm/idct_neon.h"
+#include "vpx_dsp/arm/mem_neon.h"
+#include "vpx_dsp/txfm_common.h"
+
+static INLINE void iadst4(int16x8_t *const io) {
+ const int32x4_t c3 = vdupq_n_s32(sinpi_3_9);
+ int16x4_t x[4];
+ int32x4_t s[8], output[4];
+ const int16x4_t c =
+ create_s16x4_neon(sinpi_1_9, sinpi_2_9, sinpi_3_9, sinpi_4_9);
+
+ x[0] = vget_low_s16(io[0]);
+ x[1] = vget_low_s16(io[1]);
+ x[2] = vget_high_s16(io[0]);
+ x[3] = vget_high_s16(io[1]);
+
+ s[0] = vmull_lane_s16(x[0], c, 0);
+ s[1] = vmull_lane_s16(x[0], c, 1);
+ s[2] = vmull_lane_s16(x[1], c, 2);
+ s[3] = vmull_lane_s16(x[2], c, 3);
+ s[4] = vmull_lane_s16(x[2], c, 0);
+ s[5] = vmull_lane_s16(x[3], c, 1);
+ s[6] = vmull_lane_s16(x[3], c, 3);
+ s[7] = vaddl_s16(x[0], x[3]);
+ s[7] = vsubw_s16(s[7], x[2]);
+
+ s[0] = vaddq_s32(s[0], s[3]);
+ s[0] = vaddq_s32(s[0], s[5]);
+ s[1] = vsubq_s32(s[1], s[4]);
+ s[1] = vsubq_s32(s[1], s[6]);
+ s[3] = s[2];
+ s[2] = vmulq_s32(c3, s[7]);
+
+ output[0] = vaddq_s32(s[0], s[3]);
+ output[1] = vaddq_s32(s[1], s[3]);
+ output[2] = s[2];
+ output[3] = vaddq_s32(s[0], s[1]);
+ output[3] = vsubq_s32(output[3], s[3]);
+ dct_const_round_shift_low_8_dual(output, &io[0], &io[1]);
+}
+
+#endif // VP9_COMMON_ARM_NEON_VP9_IHT_NEON_H_
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropy.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropy.c
index a575bda729c..430b917b8fb 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropy.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropy.c
@@ -42,6 +42,7 @@ const vpx_prob vp9_cat6_prob_high12[] = { 255, 255, 255, 255, 254, 254,
177, 153, 140, 133, 130, 129 };
#endif
+/* clang-format off */
const uint8_t vp9_coefband_trans_8x8plus[1024] = {
0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5,
// beyond MAXBAND_INDEX+1 all values are filled as 5
@@ -85,6 +86,7 @@ const uint8_t vp9_coefband_trans_8x8plus[1024] = {
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
};
+/* clang-format on */
const uint8_t vp9_coefband_trans_4x4[16] = {
0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropy.h b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropy.h
index 1da49116687..0ab502592b3 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropy.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropy.h
@@ -137,7 +137,6 @@ static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) {
// 128 lists of probabilities are stored for the following ONE node probs:
// 1, 3, 5, 7, ..., 253, 255
// In between probabilities are interpolated linearly
-
#define COEFF_PROB_MODELS 255
#define UNCONSTRAINED_NODES 3
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymode.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymode.c
index 47cd63e94f9..48cad3318de 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymode.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymode.c
@@ -186,16 +186,19 @@ const vpx_prob vp9_kf_partition_probs[PARTITION_CONTEXTS][PARTITION_TYPES - 1] =
{ 93, 24, 99 }, // a split, l not split
{ 85, 119, 44 }, // l split, a not split
{ 62, 59, 67 }, // a/l both split
+
// 16x16 -> 8x8
{ 149, 53, 53 }, // a/l both not split
{ 94, 20, 48 }, // a split, l not split
{ 83, 53, 24 }, // l split, a not split
{ 52, 18, 18 }, // a/l both split
+
// 32x32 -> 16x16
{ 150, 40, 39 }, // a/l both not split
{ 78, 12, 26 }, // a split, l not split
{ 67, 33, 11 }, // l split, a not split
{ 24, 7, 5 }, // a/l both split
+
// 64x64 -> 32x32
{ 174, 35, 49 }, // a/l both not split
{ 68, 11, 27 }, // a split, l not split
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymv.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymv.c
index a18a290cfd0..b6f052d088e 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymv.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_entropymv.c
@@ -22,9 +22,7 @@ const vpx_tree_index vp9_mv_class_tree[TREE_SIZE(MV_CLASSES)] = {
18, -MV_CLASS_7, -MV_CLASS_8, -MV_CLASS_9, -MV_CLASS_10,
};
-const vpx_tree_index vp9_mv_class0_tree[TREE_SIZE(CLASS0_SIZE)] = {
- -0, -1,
-};
+const vpx_tree_index vp9_mv_class0_tree[TREE_SIZE(CLASS0_SIZE)] = { -0, -1 };
const vpx_tree_index vp9_mv_fp_tree[TREE_SIZE(MV_FP_SIZE)] = { -0, 2, -1,
4, -2, -3 };
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter.c
index c7c343aed5d..da9180b71a5 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_loopfilter.c
@@ -1174,7 +1174,7 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm,
}
// Disable filtering on the leftmost column
- border_mask = ~(mi_col == 0);
+ border_mask = ~(mi_col == 0 ? 1 : 0);
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
highbd_filter_selectively_vert(
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_pred_common.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_pred_common.c
index a7ddc0b9515..15c7e6376c9 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_pred_common.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_pred_common.c
@@ -229,9 +229,8 @@ int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) {
else
pred_context = 4 * (edge_mi->ref_frame[0] == GOLDEN_FRAME);
} else {
- pred_context = 1 +
- 2 * (edge_mi->ref_frame[0] == GOLDEN_FRAME ||
- edge_mi->ref_frame[1] == GOLDEN_FRAME);
+ pred_context = 1 + 2 * (edge_mi->ref_frame[0] == GOLDEN_FRAME ||
+ edge_mi->ref_frame[1] == GOLDEN_FRAME);
}
} else { // inter/inter
const int above_has_second = has_second_ref(above_mi);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl
index dd6120266c9..2470578e7da 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl
@@ -67,13 +67,13 @@ add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *outp
if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") {
# Note that there are more specializations appended when
# CONFIG_VP9_HIGHBITDEPTH is off.
- specialize qw/vp9_iht4x4_16_add sse2/;
+ specialize qw/vp9_iht4x4_16_add neon sse2/;
specialize qw/vp9_iht8x8_64_add sse2/;
specialize qw/vp9_iht16x16_256_add sse2/;
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") ne "yes") {
# Note that these specializations are appended to the above ones.
- specialize qw/vp9_iht4x4_16_add neon dspr2 msa/;
- specialize qw/vp9_iht8x8_64_add neon dspr2 msa/;
+ specialize qw/vp9_iht4x4_16_add dspr2 msa/;
+ specialize qw/vp9_iht8x8_64_add dspr2 msa/;
specialize qw/vp9_iht16x16_256_add dspr2 msa/;
}
}
@@ -97,13 +97,16 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
# Note as optimized versions of these functions are added we need to add a check to ensure
# that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
add_proto qw/void vp9_highbd_iht4x4_16_add/, "const tran_low_t *input, uint16_t *dest, int stride, int tx_type, int bd";
- if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") {
- specialize qw/vp9_highbd_iht4x4_16_add sse4_1/;
- }
add_proto qw/void vp9_highbd_iht8x8_64_add/, "const tran_low_t *input, uint16_t *dest, int stride, int tx_type, int bd";
add_proto qw/void vp9_highbd_iht16x16_256_add/, "const tran_low_t *input, uint16_t *output, int pitch, int tx_type, int bd";
+
+ if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") {
+ specialize qw/vp9_highbd_iht4x4_16_add neon sse4_1/;
+ specialize qw/vp9_highbd_iht8x8_64_add sse4_1/;
+ specialize qw/vp9_highbd_iht16x16_256_add sse4_1/;
+ }
}
#
@@ -126,7 +129,7 @@ add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_
add_proto qw/int64_t vp9_block_error_fp/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size";
add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-specialize qw/vp9_quantize_fp neon sse2/, "$ssse3_x86_64";
+specialize qw/vp9_quantize_fp neon sse2 avx2/, "$ssse3_x86_64";
add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/vp9_quantize_fp_32x32 neon/, "$ssse3_x86_64";
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_highbd_iht16x16_add_sse4.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_highbd_iht16x16_add_sse4.c
new file mode 100644
index 00000000000..57b79a732da
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_highbd_iht16x16_add_sse4.c
@@ -0,0 +1,419 @@
+/*
+ * Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_idct.h"
+#include "vpx_dsp/x86/highbd_inv_txfm_sse4.h"
+#include "vpx_dsp/x86/inv_txfm_sse2.h"
+#include "vpx_dsp/x86/transpose_sse2.h"
+#include "vpx_dsp/x86/txfm_common_sse2.h"
+
+static INLINE void highbd_iadst_half_butterfly_sse4_1(const __m128i in,
+ const int c,
+ __m128i *const s) {
+ const __m128i pair_c = pair_set_epi32(4 * c, 0);
+ __m128i x[2];
+
+ extend_64bit(in, x);
+ s[0] = _mm_mul_epi32(pair_c, x[0]);
+ s[1] = _mm_mul_epi32(pair_c, x[1]);
+}
+
+static INLINE void highbd_iadst_butterfly_sse4_1(const __m128i in0,
+ const __m128i in1,
+ const int c0, const int c1,
+ __m128i *const s0,
+ __m128i *const s1) {
+ const __m128i pair_c0 = pair_set_epi32(4 * c0, 0);
+ const __m128i pair_c1 = pair_set_epi32(4 * c1, 0);
+ __m128i t00[2], t01[2], t10[2], t11[2];
+ __m128i x0[2], x1[2];
+
+ extend_64bit(in0, x0);
+ extend_64bit(in1, x1);
+ t00[0] = _mm_mul_epi32(pair_c0, x0[0]);
+ t00[1] = _mm_mul_epi32(pair_c0, x0[1]);
+ t01[0] = _mm_mul_epi32(pair_c0, x1[0]);
+ t01[1] = _mm_mul_epi32(pair_c0, x1[1]);
+ t10[0] = _mm_mul_epi32(pair_c1, x0[0]);
+ t10[1] = _mm_mul_epi32(pair_c1, x0[1]);
+ t11[0] = _mm_mul_epi32(pair_c1, x1[0]);
+ t11[1] = _mm_mul_epi32(pair_c1, x1[1]);
+
+ s0[0] = _mm_add_epi64(t00[0], t11[0]);
+ s0[1] = _mm_add_epi64(t00[1], t11[1]);
+ s1[0] = _mm_sub_epi64(t10[0], t01[0]);
+ s1[1] = _mm_sub_epi64(t10[1], t01[1]);
+}
+
+static void highbd_iadst16_4col_sse4_1(__m128i *const io /*io[16]*/) {
+ __m128i s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], s7[2], s8[2], s9[2],
+ s10[2], s11[2], s12[2], s13[2], s14[2], s15[2];
+ __m128i x0[2], x1[2], x2[2], x3[2], x4[2], x5[2], x6[2], x7[2], x8[2], x9[2],
+ x10[2], x11[2], x12[2], x13[2], x14[2], x15[2];
+
+ // stage 1
+ highbd_iadst_butterfly_sse4_1(io[15], io[0], cospi_1_64, cospi_31_64, s0, s1);
+ highbd_iadst_butterfly_sse4_1(io[13], io[2], cospi_5_64, cospi_27_64, s2, s3);
+ highbd_iadst_butterfly_sse4_1(io[11], io[4], cospi_9_64, cospi_23_64, s4, s5);
+ highbd_iadst_butterfly_sse4_1(io[9], io[6], cospi_13_64, cospi_19_64, s6, s7);
+ highbd_iadst_butterfly_sse4_1(io[7], io[8], cospi_17_64, cospi_15_64, s8, s9);
+ highbd_iadst_butterfly_sse4_1(io[5], io[10], cospi_21_64, cospi_11_64, s10,
+ s11);
+ highbd_iadst_butterfly_sse4_1(io[3], io[12], cospi_25_64, cospi_7_64, s12,
+ s13);
+ highbd_iadst_butterfly_sse4_1(io[1], io[14], cospi_29_64, cospi_3_64, s14,
+ s15);
+
+ x0[0] = _mm_add_epi64(s0[0], s8[0]);
+ x0[1] = _mm_add_epi64(s0[1], s8[1]);
+ x1[0] = _mm_add_epi64(s1[0], s9[0]);
+ x1[1] = _mm_add_epi64(s1[1], s9[1]);
+ x2[0] = _mm_add_epi64(s2[0], s10[0]);
+ x2[1] = _mm_add_epi64(s2[1], s10[1]);
+ x3[0] = _mm_add_epi64(s3[0], s11[0]);
+ x3[1] = _mm_add_epi64(s3[1], s11[1]);
+ x4[0] = _mm_add_epi64(s4[0], s12[0]);
+ x4[1] = _mm_add_epi64(s4[1], s12[1]);
+ x5[0] = _mm_add_epi64(s5[0], s13[0]);
+ x5[1] = _mm_add_epi64(s5[1], s13[1]);
+ x6[0] = _mm_add_epi64(s6[0], s14[0]);
+ x6[1] = _mm_add_epi64(s6[1], s14[1]);
+ x7[0] = _mm_add_epi64(s7[0], s15[0]);
+ x7[1] = _mm_add_epi64(s7[1], s15[1]);
+ x8[0] = _mm_sub_epi64(s0[0], s8[0]);
+ x8[1] = _mm_sub_epi64(s0[1], s8[1]);
+ x9[0] = _mm_sub_epi64(s1[0], s9[0]);
+ x9[1] = _mm_sub_epi64(s1[1], s9[1]);
+ x10[0] = _mm_sub_epi64(s2[0], s10[0]);
+ x10[1] = _mm_sub_epi64(s2[1], s10[1]);
+ x11[0] = _mm_sub_epi64(s3[0], s11[0]);
+ x11[1] = _mm_sub_epi64(s3[1], s11[1]);
+ x12[0] = _mm_sub_epi64(s4[0], s12[0]);
+ x12[1] = _mm_sub_epi64(s4[1], s12[1]);
+ x13[0] = _mm_sub_epi64(s5[0], s13[0]);
+ x13[1] = _mm_sub_epi64(s5[1], s13[1]);
+ x14[0] = _mm_sub_epi64(s6[0], s14[0]);
+ x14[1] = _mm_sub_epi64(s6[1], s14[1]);
+ x15[0] = _mm_sub_epi64(s7[0], s15[0]);
+ x15[1] = _mm_sub_epi64(s7[1], s15[1]);
+
+ x0[0] = dct_const_round_shift_64bit(x0[0]);
+ x0[1] = dct_const_round_shift_64bit(x0[1]);
+ x1[0] = dct_const_round_shift_64bit(x1[0]);
+ x1[1] = dct_const_round_shift_64bit(x1[1]);
+ x2[0] = dct_const_round_shift_64bit(x2[0]);
+ x2[1] = dct_const_round_shift_64bit(x2[1]);
+ x3[0] = dct_const_round_shift_64bit(x3[0]);
+ x3[1] = dct_const_round_shift_64bit(x3[1]);
+ x4[0] = dct_const_round_shift_64bit(x4[0]);
+ x4[1] = dct_const_round_shift_64bit(x4[1]);
+ x5[0] = dct_const_round_shift_64bit(x5[0]);
+ x5[1] = dct_const_round_shift_64bit(x5[1]);
+ x6[0] = dct_const_round_shift_64bit(x6[0]);
+ x6[1] = dct_const_round_shift_64bit(x6[1]);
+ x7[0] = dct_const_round_shift_64bit(x7[0]);
+ x7[1] = dct_const_round_shift_64bit(x7[1]);
+ x8[0] = dct_const_round_shift_64bit(x8[0]);
+ x8[1] = dct_const_round_shift_64bit(x8[1]);
+ x9[0] = dct_const_round_shift_64bit(x9[0]);
+ x9[1] = dct_const_round_shift_64bit(x9[1]);
+ x10[0] = dct_const_round_shift_64bit(x10[0]);
+ x10[1] = dct_const_round_shift_64bit(x10[1]);
+ x11[0] = dct_const_round_shift_64bit(x11[0]);
+ x11[1] = dct_const_round_shift_64bit(x11[1]);
+ x12[0] = dct_const_round_shift_64bit(x12[0]);
+ x12[1] = dct_const_round_shift_64bit(x12[1]);
+ x13[0] = dct_const_round_shift_64bit(x13[0]);
+ x13[1] = dct_const_round_shift_64bit(x13[1]);
+ x14[0] = dct_const_round_shift_64bit(x14[0]);
+ x14[1] = dct_const_round_shift_64bit(x14[1]);
+ x15[0] = dct_const_round_shift_64bit(x15[0]);
+ x15[1] = dct_const_round_shift_64bit(x15[1]);
+ x0[0] = pack_4(x0[0], x0[1]);
+ x1[0] = pack_4(x1[0], x1[1]);
+ x2[0] = pack_4(x2[0], x2[1]);
+ x3[0] = pack_4(x3[0], x3[1]);
+ x4[0] = pack_4(x4[0], x4[1]);
+ x5[0] = pack_4(x5[0], x5[1]);
+ x6[0] = pack_4(x6[0], x6[1]);
+ x7[0] = pack_4(x7[0], x7[1]);
+ x8[0] = pack_4(x8[0], x8[1]);
+ x9[0] = pack_4(x9[0], x9[1]);
+ x10[0] = pack_4(x10[0], x10[1]);
+ x11[0] = pack_4(x11[0], x11[1]);
+ x12[0] = pack_4(x12[0], x12[1]);
+ x13[0] = pack_4(x13[0], x13[1]);
+ x14[0] = pack_4(x14[0], x14[1]);
+ x15[0] = pack_4(x15[0], x15[1]);
+
+ // stage 2
+ s0[0] = x0[0];
+ s1[0] = x1[0];
+ s2[0] = x2[0];
+ s3[0] = x3[0];
+ s4[0] = x4[0];
+ s5[0] = x5[0];
+ s6[0] = x6[0];
+ s7[0] = x7[0];
+ x0[0] = _mm_add_epi32(s0[0], s4[0]);
+ x1[0] = _mm_add_epi32(s1[0], s5[0]);
+ x2[0] = _mm_add_epi32(s2[0], s6[0]);
+ x3[0] = _mm_add_epi32(s3[0], s7[0]);
+ x4[0] = _mm_sub_epi32(s0[0], s4[0]);
+ x5[0] = _mm_sub_epi32(s1[0], s5[0]);
+ x6[0] = _mm_sub_epi32(s2[0], s6[0]);
+ x7[0] = _mm_sub_epi32(s3[0], s7[0]);
+
+ highbd_iadst_butterfly_sse4_1(x8[0], x9[0], cospi_4_64, cospi_28_64, s8, s9);
+ highbd_iadst_butterfly_sse4_1(x10[0], x11[0], cospi_20_64, cospi_12_64, s10,
+ s11);
+ highbd_iadst_butterfly_sse4_1(x13[0], x12[0], cospi_28_64, cospi_4_64, s13,
+ s12);
+ highbd_iadst_butterfly_sse4_1(x15[0], x14[0], cospi_12_64, cospi_20_64, s15,
+ s14);
+
+ x8[0] = _mm_add_epi64(s8[0], s12[0]);
+ x8[1] = _mm_add_epi64(s8[1], s12[1]);
+ x9[0] = _mm_add_epi64(s9[0], s13[0]);
+ x9[1] = _mm_add_epi64(s9[1], s13[1]);
+ x10[0] = _mm_add_epi64(s10[0], s14[0]);
+ x10[1] = _mm_add_epi64(s10[1], s14[1]);
+ x11[0] = _mm_add_epi64(s11[0], s15[0]);
+ x11[1] = _mm_add_epi64(s11[1], s15[1]);
+ x12[0] = _mm_sub_epi64(s8[0], s12[0]);
+ x12[1] = _mm_sub_epi64(s8[1], s12[1]);
+ x13[0] = _mm_sub_epi64(s9[0], s13[0]);
+ x13[1] = _mm_sub_epi64(s9[1], s13[1]);
+ x14[0] = _mm_sub_epi64(s10[0], s14[0]);
+ x14[1] = _mm_sub_epi64(s10[1], s14[1]);
+ x15[0] = _mm_sub_epi64(s11[0], s15[0]);
+ x15[1] = _mm_sub_epi64(s11[1], s15[1]);
+ x8[0] = dct_const_round_shift_64bit(x8[0]);
+ x8[1] = dct_const_round_shift_64bit(x8[1]);
+ x9[0] = dct_const_round_shift_64bit(x9[0]);
+ x9[1] = dct_const_round_shift_64bit(x9[1]);
+ x10[0] = dct_const_round_shift_64bit(x10[0]);
+ x10[1] = dct_const_round_shift_64bit(x10[1]);
+ x11[0] = dct_const_round_shift_64bit(x11[0]);
+ x11[1] = dct_const_round_shift_64bit(x11[1]);
+ x12[0] = dct_const_round_shift_64bit(x12[0]);
+ x12[1] = dct_const_round_shift_64bit(x12[1]);
+ x13[0] = dct_const_round_shift_64bit(x13[0]);
+ x13[1] = dct_const_round_shift_64bit(x13[1]);
+ x14[0] = dct_const_round_shift_64bit(x14[0]);
+ x14[1] = dct_const_round_shift_64bit(x14[1]);
+ x15[0] = dct_const_round_shift_64bit(x15[0]);
+ x15[1] = dct_const_round_shift_64bit(x15[1]);
+ x8[0] = pack_4(x8[0], x8[1]);
+ x9[0] = pack_4(x9[0], x9[1]);
+ x10[0] = pack_4(x10[0], x10[1]);
+ x11[0] = pack_4(x11[0], x11[1]);
+ x12[0] = pack_4(x12[0], x12[1]);
+ x13[0] = pack_4(x13[0], x13[1]);
+ x14[0] = pack_4(x14[0], x14[1]);
+ x15[0] = pack_4(x15[0], x15[1]);
+
+ // stage 3
+ s0[0] = x0[0];
+ s1[0] = x1[0];
+ s2[0] = x2[0];
+ s3[0] = x3[0];
+ highbd_iadst_butterfly_sse4_1(x4[0], x5[0], cospi_8_64, cospi_24_64, s4, s5);
+ highbd_iadst_butterfly_sse4_1(x7[0], x6[0], cospi_24_64, cospi_8_64, s7, s6);
+ s8[0] = x8[0];
+ s9[0] = x9[0];
+ s10[0] = x10[0];
+ s11[0] = x11[0];
+ highbd_iadst_butterfly_sse4_1(x12[0], x13[0], cospi_8_64, cospi_24_64, s12,
+ s13);
+ highbd_iadst_butterfly_sse4_1(x15[0], x14[0], cospi_24_64, cospi_8_64, s15,
+ s14);
+
+ x0[0] = _mm_add_epi32(s0[0], s2[0]);
+ x1[0] = _mm_add_epi32(s1[0], s3[0]);
+ x2[0] = _mm_sub_epi32(s0[0], s2[0]);
+ x3[0] = _mm_sub_epi32(s1[0], s3[0]);
+ x4[0] = _mm_add_epi64(s4[0], s6[0]);
+ x4[1] = _mm_add_epi64(s4[1], s6[1]);
+ x5[0] = _mm_add_epi64(s5[0], s7[0]);
+ x5[1] = _mm_add_epi64(s5[1], s7[1]);
+ x6[0] = _mm_sub_epi64(s4[0], s6[0]);
+ x6[1] = _mm_sub_epi64(s4[1], s6[1]);
+ x7[0] = _mm_sub_epi64(s5[0], s7[0]);
+ x7[1] = _mm_sub_epi64(s5[1], s7[1]);
+ x4[0] = dct_const_round_shift_64bit(x4[0]);
+ x4[1] = dct_const_round_shift_64bit(x4[1]);
+ x5[0] = dct_const_round_shift_64bit(x5[0]);
+ x5[1] = dct_const_round_shift_64bit(x5[1]);
+ x6[0] = dct_const_round_shift_64bit(x6[0]);
+ x6[1] = dct_const_round_shift_64bit(x6[1]);
+ x7[0] = dct_const_round_shift_64bit(x7[0]);
+ x7[1] = dct_const_round_shift_64bit(x7[1]);
+ x4[0] = pack_4(x4[0], x4[1]);
+ x5[0] = pack_4(x5[0], x5[1]);
+ x6[0] = pack_4(x6[0], x6[1]);
+ x7[0] = pack_4(x7[0], x7[1]);
+ x8[0] = _mm_add_epi32(s8[0], s10[0]);
+ x9[0] = _mm_add_epi32(s9[0], s11[0]);
+ x10[0] = _mm_sub_epi32(s8[0], s10[0]);
+ x11[0] = _mm_sub_epi32(s9[0], s11[0]);
+ x12[0] = _mm_add_epi64(s12[0], s14[0]);
+ x12[1] = _mm_add_epi64(s12[1], s14[1]);
+ x13[0] = _mm_add_epi64(s13[0], s15[0]);
+ x13[1] = _mm_add_epi64(s13[1], s15[1]);
+ x14[0] = _mm_sub_epi64(s12[0], s14[0]);
+ x14[1] = _mm_sub_epi64(s12[1], s14[1]);
+ x15[0] = _mm_sub_epi64(s13[0], s15[0]);
+ x15[1] = _mm_sub_epi64(s13[1], s15[1]);
+ x12[0] = dct_const_round_shift_64bit(x12[0]);
+ x12[1] = dct_const_round_shift_64bit(x12[1]);
+ x13[0] = dct_const_round_shift_64bit(x13[0]);
+ x13[1] = dct_const_round_shift_64bit(x13[1]);
+ x14[0] = dct_const_round_shift_64bit(x14[0]);
+ x14[1] = dct_const_round_shift_64bit(x14[1]);
+ x15[0] = dct_const_round_shift_64bit(x15[0]);
+ x15[1] = dct_const_round_shift_64bit(x15[1]);
+ x12[0] = pack_4(x12[0], x12[1]);
+ x13[0] = pack_4(x13[0], x13[1]);
+ x14[0] = pack_4(x14[0], x14[1]);
+ x15[0] = pack_4(x15[0], x15[1]);
+
+ // stage 4
+ s2[0] = _mm_add_epi32(x2[0], x3[0]);
+ s3[0] = _mm_sub_epi32(x2[0], x3[0]);
+ s6[0] = _mm_add_epi32(x7[0], x6[0]);
+ s7[0] = _mm_sub_epi32(x7[0], x6[0]);
+ s10[0] = _mm_add_epi32(x11[0], x10[0]);
+ s11[0] = _mm_sub_epi32(x11[0], x10[0]);
+ s14[0] = _mm_add_epi32(x14[0], x15[0]);
+ s15[0] = _mm_sub_epi32(x14[0], x15[0]);
+ highbd_iadst_half_butterfly_sse4_1(s2[0], -cospi_16_64, s2);
+ highbd_iadst_half_butterfly_sse4_1(s3[0], cospi_16_64, s3);
+ highbd_iadst_half_butterfly_sse4_1(s6[0], cospi_16_64, s6);
+ highbd_iadst_half_butterfly_sse4_1(s7[0], cospi_16_64, s7);
+ highbd_iadst_half_butterfly_sse4_1(s10[0], cospi_16_64, s10);
+ highbd_iadst_half_butterfly_sse4_1(s11[0], cospi_16_64, s11);
+ highbd_iadst_half_butterfly_sse4_1(s14[0], -cospi_16_64, s14);
+ highbd_iadst_half_butterfly_sse4_1(s15[0], cospi_16_64, s15);
+
+ x2[0] = dct_const_round_shift_64bit(s2[0]);
+ x2[1] = dct_const_round_shift_64bit(s2[1]);
+ x3[0] = dct_const_round_shift_64bit(s3[0]);
+ x3[1] = dct_const_round_shift_64bit(s3[1]);
+ x6[0] = dct_const_round_shift_64bit(s6[0]);
+ x6[1] = dct_const_round_shift_64bit(s6[1]);
+ x7[0] = dct_const_round_shift_64bit(s7[0]);
+ x7[1] = dct_const_round_shift_64bit(s7[1]);
+ x10[0] = dct_const_round_shift_64bit(s10[0]);
+ x10[1] = dct_const_round_shift_64bit(s10[1]);
+ x11[0] = dct_const_round_shift_64bit(s11[0]);
+ x11[1] = dct_const_round_shift_64bit(s11[1]);
+ x14[0] = dct_const_round_shift_64bit(s14[0]);
+ x14[1] = dct_const_round_shift_64bit(s14[1]);
+ x15[0] = dct_const_round_shift_64bit(s15[0]);
+ x15[1] = dct_const_round_shift_64bit(s15[1]);
+ x2[0] = pack_4(x2[0], x2[1]);
+ x3[0] = pack_4(x3[0], x3[1]);
+ x6[0] = pack_4(x6[0], x6[1]);
+ x7[0] = pack_4(x7[0], x7[1]);
+ x10[0] = pack_4(x10[0], x10[1]);
+ x11[0] = pack_4(x11[0], x11[1]);
+ x14[0] = pack_4(x14[0], x14[1]);
+ x15[0] = pack_4(x15[0], x15[1]);
+
+ io[0] = x0[0];
+ io[1] = _mm_sub_epi32(_mm_setzero_si128(), x8[0]);
+ io[2] = x12[0];
+ io[3] = _mm_sub_epi32(_mm_setzero_si128(), x4[0]);
+ io[4] = x6[0];
+ io[5] = x14[0];
+ io[6] = x10[0];
+ io[7] = x2[0];
+ io[8] = x3[0];
+ io[9] = x11[0];
+ io[10] = x15[0];
+ io[11] = x7[0];
+ io[12] = x5[0];
+ io[13] = _mm_sub_epi32(_mm_setzero_si128(), x13[0]);
+ io[14] = x9[0];
+ io[15] = _mm_sub_epi32(_mm_setzero_si128(), x1[0]);
+}
+
+void vp9_highbd_iht16x16_256_add_sse4_1(const tran_low_t *input, uint16_t *dest,
+ int stride, int tx_type, int bd) {
+ int i;
+ __m128i out[16], *in;
+
+ if (bd == 8) {
+ __m128i l[16], r[16];
+
+ in = l;
+ for (i = 0; i < 2; i++) {
+ highbd_load_pack_transpose_32bit_8x8(&input[0], 16, &in[0]);
+ highbd_load_pack_transpose_32bit_8x8(&input[8], 16, &in[8]);
+ if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
+ idct16_8col(in, in);
+ } else {
+ vpx_iadst16_8col_sse2(in);
+ }
+ in = r;
+ input += 128;
+ }
+
+ for (i = 0; i < 16; i += 8) {
+ int j;
+ transpose_16bit_8x8(l + i, out);
+ transpose_16bit_8x8(r + i, out + 8);
+ if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
+ idct16_8col(out, out);
+ } else {
+ vpx_iadst16_8col_sse2(out);
+ }
+
+ for (j = 0; j < 16; ++j) {
+ highbd_write_buffer_8(dest + j * stride, out[j], bd);
+ }
+ dest += 8;
+ }
+ } else {
+ __m128i all[4][16];
+
+ for (i = 0; i < 4; i++) {
+ in = all[i];
+ highbd_load_transpose_32bit_8x4(&input[0], 16, &in[0]);
+ highbd_load_transpose_32bit_8x4(&input[8], 16, &in[8]);
+ if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
+ vpx_highbd_idct16_4col_sse4_1(in);
+ } else {
+ highbd_iadst16_4col_sse4_1(in);
+ }
+ input += 4 * 16;
+ }
+
+ for (i = 0; i < 16; i += 4) {
+ int j;
+ transpose_32bit_4x4(all[0] + i, out + 0);
+ transpose_32bit_4x4(all[1] + i, out + 4);
+ transpose_32bit_4x4(all[2] + i, out + 8);
+ transpose_32bit_4x4(all[3] + i, out + 12);
+ if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
+ vpx_highbd_idct16_4col_sse4_1(out);
+ } else {
+ highbd_iadst16_4col_sse4_1(out);
+ }
+
+ for (j = 0; j < 16; ++j) {
+ highbd_write_buffer_4(dest + j * stride, out[j], bd);
+ }
+ dest += 4;
+ }
+ }
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_highbd_iht8x8_add_sse4.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_highbd_iht8x8_add_sse4.c
new file mode 100644
index 00000000000..7d949b6dbce
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_highbd_iht8x8_add_sse4.c
@@ -0,0 +1,255 @@
+/*
+ * Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_idct.h"
+#include "vpx_dsp/x86/highbd_inv_txfm_sse4.h"
+#include "vpx_dsp/x86/inv_txfm_sse2.h"
+#include "vpx_dsp/x86/transpose_sse2.h"
+#include "vpx_dsp/x86/txfm_common_sse2.h"
+
+static INLINE void highbd_iadst_half_butterfly_sse4_1(const __m128i in,
+ const int c,
+ __m128i *const s) {
+ const __m128i pair_c = pair_set_epi32(4 * c, 0);
+ __m128i x[2];
+
+ extend_64bit(in, x);
+ s[0] = _mm_mul_epi32(pair_c, x[0]);
+ s[1] = _mm_mul_epi32(pair_c, x[1]);
+}
+
+static INLINE void highbd_iadst_butterfly_sse4_1(const __m128i in0,
+ const __m128i in1,
+ const int c0, const int c1,
+ __m128i *const s0,
+ __m128i *const s1) {
+ const __m128i pair_c0 = pair_set_epi32(4 * c0, 0);
+ const __m128i pair_c1 = pair_set_epi32(4 * c1, 0);
+ __m128i t00[2], t01[2], t10[2], t11[2];
+ __m128i x0[2], x1[2];
+
+ extend_64bit(in0, x0);
+ extend_64bit(in1, x1);
+ t00[0] = _mm_mul_epi32(pair_c0, x0[0]);
+ t00[1] = _mm_mul_epi32(pair_c0, x0[1]);
+ t01[0] = _mm_mul_epi32(pair_c0, x1[0]);
+ t01[1] = _mm_mul_epi32(pair_c0, x1[1]);
+ t10[0] = _mm_mul_epi32(pair_c1, x0[0]);
+ t10[1] = _mm_mul_epi32(pair_c1, x0[1]);
+ t11[0] = _mm_mul_epi32(pair_c1, x1[0]);
+ t11[1] = _mm_mul_epi32(pair_c1, x1[1]);
+
+ s0[0] = _mm_add_epi64(t00[0], t11[0]);
+ s0[1] = _mm_add_epi64(t00[1], t11[1]);
+ s1[0] = _mm_sub_epi64(t10[0], t01[0]);
+ s1[1] = _mm_sub_epi64(t10[1], t01[1]);
+}
+
+static void highbd_iadst8_sse4_1(__m128i *const io) {
+ __m128i s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], s7[2];
+ __m128i x0[2], x1[2], x2[2], x3[2], x4[2], x5[2], x6[2], x7[2];
+
+ transpose_32bit_4x4x2(io, io);
+
+ // stage 1
+ highbd_iadst_butterfly_sse4_1(io[7], io[0], cospi_2_64, cospi_30_64, s0, s1);
+ highbd_iadst_butterfly_sse4_1(io[3], io[4], cospi_18_64, cospi_14_64, s4, s5);
+ x0[0] = _mm_add_epi64(s0[0], s4[0]);
+ x0[1] = _mm_add_epi64(s0[1], s4[1]);
+ x1[0] = _mm_add_epi64(s1[0], s5[0]);
+ x1[1] = _mm_add_epi64(s1[1], s5[1]);
+ x4[0] = _mm_sub_epi64(s0[0], s4[0]);
+ x4[1] = _mm_sub_epi64(s0[1], s4[1]);
+ x5[0] = _mm_sub_epi64(s1[0], s5[0]);
+ x5[1] = _mm_sub_epi64(s1[1], s5[1]);
+
+ highbd_iadst_butterfly_sse4_1(io[5], io[2], cospi_10_64, cospi_22_64, s2, s3);
+ highbd_iadst_butterfly_sse4_1(io[1], io[6], cospi_26_64, cospi_6_64, s6, s7);
+ x2[0] = _mm_add_epi64(s2[0], s6[0]);
+ x2[1] = _mm_add_epi64(s2[1], s6[1]);
+ x3[0] = _mm_add_epi64(s3[0], s7[0]);
+ x3[1] = _mm_add_epi64(s3[1], s7[1]);
+ x6[0] = _mm_sub_epi64(s2[0], s6[0]);
+ x6[1] = _mm_sub_epi64(s2[1], s6[1]);
+ x7[0] = _mm_sub_epi64(s3[0], s7[0]);
+ x7[1] = _mm_sub_epi64(s3[1], s7[1]);
+
+ x0[0] = dct_const_round_shift_64bit(x0[0]);
+ x0[1] = dct_const_round_shift_64bit(x0[1]);
+ x1[0] = dct_const_round_shift_64bit(x1[0]);
+ x1[1] = dct_const_round_shift_64bit(x1[1]);
+ x2[0] = dct_const_round_shift_64bit(x2[0]);
+ x2[1] = dct_const_round_shift_64bit(x2[1]);
+ x3[0] = dct_const_round_shift_64bit(x3[0]);
+ x3[1] = dct_const_round_shift_64bit(x3[1]);
+ x4[0] = dct_const_round_shift_64bit(x4[0]);
+ x4[1] = dct_const_round_shift_64bit(x4[1]);
+ x5[0] = dct_const_round_shift_64bit(x5[0]);
+ x5[1] = dct_const_round_shift_64bit(x5[1]);
+ x6[0] = dct_const_round_shift_64bit(x6[0]);
+ x6[1] = dct_const_round_shift_64bit(x6[1]);
+ x7[0] = dct_const_round_shift_64bit(x7[0]);
+ x7[1] = dct_const_round_shift_64bit(x7[1]);
+ s0[0] = pack_4(x0[0], x0[1]); // s0 = x0;
+ s1[0] = pack_4(x1[0], x1[1]); // s1 = x1;
+ s2[0] = pack_4(x2[0], x2[1]); // s2 = x2;
+ s3[0] = pack_4(x3[0], x3[1]); // s3 = x3;
+ x4[0] = pack_4(x4[0], x4[1]);
+ x5[0] = pack_4(x5[0], x5[1]);
+ x6[0] = pack_4(x6[0], x6[1]);
+ x7[0] = pack_4(x7[0], x7[1]);
+
+ // stage 2
+ x0[0] = _mm_add_epi32(s0[0], s2[0]);
+ x1[0] = _mm_add_epi32(s1[0], s3[0]);
+ x2[0] = _mm_sub_epi32(s0[0], s2[0]);
+ x3[0] = _mm_sub_epi32(s1[0], s3[0]);
+
+ highbd_iadst_butterfly_sse4_1(x4[0], x5[0], cospi_8_64, cospi_24_64, s4, s5);
+ highbd_iadst_butterfly_sse4_1(x7[0], x6[0], cospi_24_64, cospi_8_64, s7, s6);
+
+ x4[0] = _mm_add_epi64(s4[0], s6[0]);
+ x4[1] = _mm_add_epi64(s4[1], s6[1]);
+ x5[0] = _mm_add_epi64(s5[0], s7[0]);
+ x5[1] = _mm_add_epi64(s5[1], s7[1]);
+ x6[0] = _mm_sub_epi64(s4[0], s6[0]);
+ x6[1] = _mm_sub_epi64(s4[1], s6[1]);
+ x7[0] = _mm_sub_epi64(s5[0], s7[0]);
+ x7[1] = _mm_sub_epi64(s5[1], s7[1]);
+ x4[0] = dct_const_round_shift_64bit(x4[0]);
+ x4[1] = dct_const_round_shift_64bit(x4[1]);
+ x5[0] = dct_const_round_shift_64bit(x5[0]);
+ x5[1] = dct_const_round_shift_64bit(x5[1]);
+ x6[0] = dct_const_round_shift_64bit(x6[0]);
+ x6[1] = dct_const_round_shift_64bit(x6[1]);
+ x7[0] = dct_const_round_shift_64bit(x7[0]);
+ x7[1] = dct_const_round_shift_64bit(x7[1]);
+ x4[0] = pack_4(x4[0], x4[1]);
+ x5[0] = pack_4(x5[0], x5[1]);
+ x6[0] = pack_4(x6[0], x6[1]);
+ x7[0] = pack_4(x7[0], x7[1]);
+
+ // stage 3
+ s2[0] = _mm_add_epi32(x2[0], x3[0]);
+ s3[0] = _mm_sub_epi32(x2[0], x3[0]);
+ s6[0] = _mm_add_epi32(x6[0], x7[0]);
+ s7[0] = _mm_sub_epi32(x6[0], x7[0]);
+ highbd_iadst_half_butterfly_sse4_1(s2[0], cospi_16_64, s2);
+ highbd_iadst_half_butterfly_sse4_1(s3[0], cospi_16_64, s3);
+ highbd_iadst_half_butterfly_sse4_1(s6[0], cospi_16_64, s6);
+ highbd_iadst_half_butterfly_sse4_1(s7[0], cospi_16_64, s7);
+
+ x2[0] = dct_const_round_shift_64bit(s2[0]);
+ x2[1] = dct_const_round_shift_64bit(s2[1]);
+ x3[0] = dct_const_round_shift_64bit(s3[0]);
+ x3[1] = dct_const_round_shift_64bit(s3[1]);
+ x6[0] = dct_const_round_shift_64bit(s6[0]);
+ x6[1] = dct_const_round_shift_64bit(s6[1]);
+ x7[0] = dct_const_round_shift_64bit(s7[0]);
+ x7[1] = dct_const_round_shift_64bit(s7[1]);
+ x2[0] = pack_4(x2[0], x2[1]);
+ x3[0] = pack_4(x3[0], x3[1]);
+ x6[0] = pack_4(x6[0], x6[1]);
+ x7[0] = pack_4(x7[0], x7[1]);
+
+ io[0] = x0[0];
+ io[1] = _mm_sub_epi32(_mm_setzero_si128(), x4[0]);
+ io[2] = x6[0];
+ io[3] = _mm_sub_epi32(_mm_setzero_si128(), x2[0]);
+ io[4] = x3[0];
+ io[5] = _mm_sub_epi32(_mm_setzero_si128(), x7[0]);
+ io[6] = x5[0];
+ io[7] = _mm_sub_epi32(_mm_setzero_si128(), x1[0]);
+}
+
+void vp9_highbd_iht8x8_64_add_sse4_1(const tran_low_t *input, uint16_t *dest,
+ int stride, int tx_type, int bd) {
+ __m128i io[16];
+
+ io[0] = _mm_load_si128((const __m128i *)(input + 0 * 8 + 0));
+ io[4] = _mm_load_si128((const __m128i *)(input + 0 * 8 + 4));
+ io[1] = _mm_load_si128((const __m128i *)(input + 1 * 8 + 0));
+ io[5] = _mm_load_si128((const __m128i *)(input + 1 * 8 + 4));
+ io[2] = _mm_load_si128((const __m128i *)(input + 2 * 8 + 0));
+ io[6] = _mm_load_si128((const __m128i *)(input + 2 * 8 + 4));
+ io[3] = _mm_load_si128((const __m128i *)(input + 3 * 8 + 0));
+ io[7] = _mm_load_si128((const __m128i *)(input + 3 * 8 + 4));
+ io[8] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 0));
+ io[12] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 4));
+ io[9] = _mm_load_si128((const __m128i *)(input + 5 * 8 + 0));
+ io[13] = _mm_load_si128((const __m128i *)(input + 5 * 8 + 4));
+ io[10] = _mm_load_si128((const __m128i *)(input + 6 * 8 + 0));
+ io[14] = _mm_load_si128((const __m128i *)(input + 6 * 8 + 4));
+ io[11] = _mm_load_si128((const __m128i *)(input + 7 * 8 + 0));
+ io[15] = _mm_load_si128((const __m128i *)(input + 7 * 8 + 4));
+
+ if (bd == 8) {
+ __m128i io_short[8];
+
+ io_short[0] = _mm_packs_epi32(io[0], io[4]);
+ io_short[1] = _mm_packs_epi32(io[1], io[5]);
+ io_short[2] = _mm_packs_epi32(io[2], io[6]);
+ io_short[3] = _mm_packs_epi32(io[3], io[7]);
+ io_short[4] = _mm_packs_epi32(io[8], io[12]);
+ io_short[5] = _mm_packs_epi32(io[9], io[13]);
+ io_short[6] = _mm_packs_epi32(io[10], io[14]);
+ io_short[7] = _mm_packs_epi32(io[11], io[15]);
+
+ if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
+ vpx_idct8_sse2(io_short);
+ } else {
+ iadst8_sse2(io_short);
+ }
+ if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
+ vpx_idct8_sse2(io_short);
+ } else {
+ iadst8_sse2(io_short);
+ }
+ round_shift_8x8(io_short, io);
+ } else {
+ __m128i temp[4];
+
+ if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
+ vpx_highbd_idct8x8_half1d_sse4_1(io);
+ vpx_highbd_idct8x8_half1d_sse4_1(&io[8]);
+ } else {
+ highbd_iadst8_sse4_1(io);
+ highbd_iadst8_sse4_1(&io[8]);
+ }
+
+ temp[0] = io[4];
+ temp[1] = io[5];
+ temp[2] = io[6];
+ temp[3] = io[7];
+ io[4] = io[8];
+ io[5] = io[9];
+ io[6] = io[10];
+ io[7] = io[11];
+
+ if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
+ vpx_highbd_idct8x8_half1d_sse4_1(io);
+ io[8] = temp[0];
+ io[9] = temp[1];
+ io[10] = temp[2];
+ io[11] = temp[3];
+ vpx_highbd_idct8x8_half1d_sse4_1(&io[8]);
+ } else {
+ highbd_iadst8_sse4_1(io);
+ io[8] = temp[0];
+ io[9] = temp[1];
+ io[10] = temp[2];
+ io[11] = temp[3];
+ highbd_iadst8_sse4_1(&io[8]);
+ }
+ highbd_idct8x8_final_round(io);
+ }
+ recon_and_store_8x8(io, dest, stride, bd);
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c
index 6996260e261..ad693718c0e 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -10,8 +10,6 @@
#include "./vp9_rtcd.h"
#include "vpx_dsp/x86/inv_txfm_sse2.h"
-#include "vpx_dsp/x86/txfm_common_sse2.h"
-#include "vpx_ports/mem.h"
void vp9_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) {
@@ -22,23 +20,23 @@ void vp9_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
in[1] = load_input_data8(input + 8);
switch (tx_type) {
- case 0: // DCT_DCT
+ case DCT_DCT:
idct4_sse2(in);
idct4_sse2(in);
break;
- case 1: // ADST_DCT
+ case ADST_DCT:
idct4_sse2(in);
iadst4_sse2(in);
break;
- case 2: // DCT_ADST
+ case DCT_ADST:
iadst4_sse2(in);
idct4_sse2(in);
break;
- case 3: // ADST_ADST
+ default:
+ assert(tx_type == ADST_ADST);
iadst4_sse2(in);
iadst4_sse2(in);
break;
- default: assert(0); break;
}
// Final round and shift
@@ -67,23 +65,23 @@ void vp9_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
in[7] = load_input_data8(input + 8 * 7);
switch (tx_type) {
- case 0: // DCT_DCT
- idct8_sse2(in);
- idct8_sse2(in);
+ case DCT_DCT:
+ vpx_idct8_sse2(in);
+ vpx_idct8_sse2(in);
break;
- case 1: // ADST_DCT
- idct8_sse2(in);
+ case ADST_DCT:
+ vpx_idct8_sse2(in);
iadst8_sse2(in);
break;
- case 2: // DCT_ADST
+ case DCT_ADST:
iadst8_sse2(in);
- idct8_sse2(in);
+ vpx_idct8_sse2(in);
break;
- case 3: // ADST_ADST
+ default:
+ assert(tx_type == ADST_ADST);
iadst8_sse2(in);
iadst8_sse2(in);
break;
- default: assert(0); break;
}
// Final rounding and shift
@@ -201,23 +199,23 @@ void vp9_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest,
load_buffer_8x16(input, in1);
switch (tx_type) {
- case 0: // DCT_DCT
+ case DCT_DCT:
idct16_sse2(in0, in1);
idct16_sse2(in0, in1);
break;
- case 1: // ADST_DCT
+ case ADST_DCT:
idct16_sse2(in0, in1);
iadst16_sse2(in0, in1);
break;
- case 2: // DCT_ADST
+ case DCT_ADST:
iadst16_sse2(in0, in1);
idct16_sse2(in0, in1);
break;
- case 3: // ADST_ADST
+ default:
+ assert(tx_type == ADST_ADST);
iadst16_sse2(in0, in1);
iadst16_sse2(in0, in1);
break;
- default: assert(0); break;
}
write_buffer_8x16(dest, in0, stride);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c
index 2f2f0055a7c..ed43de70197 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -464,10 +464,6 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) {
cr->rate_ratio_qdelta = VPXMAX(cr->rate_ratio_qdelta, 2.5);
}
}
- if (cpi->svc.spatial_layer_id > 0) {
- cr->motion_thresh = 4;
- cr->rate_boost_fac = 12;
- }
if (cpi->oxcf.rc_mode == VPX_VBR) {
// To be adjusted for VBR mode, e.g., based on gf period and boost.
// For now use smaller qp-delta (than CBR), no second boosted seg, and
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_context_tree.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_context_tree.c
index 2f7e5443325..52a81afb58f 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_context_tree.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_context_tree.c
@@ -12,7 +12,10 @@
#include "vp9/encoder/vp9_encoder.h"
static const BLOCK_SIZE square[] = {
- BLOCK_8X8, BLOCK_16X16, BLOCK_32X32, BLOCK_64X64,
+ BLOCK_8X8,
+ BLOCK_16X16,
+ BLOCK_32X32,
+ BLOCK_64X64,
};
static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c
index 682477df18b..6517fb35808 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c
@@ -1513,9 +1513,9 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
}
}
}
- if (is_key_frame || (low_res &&
- vt.split[i].split[j].part_variances.none.variance >
- threshold_4x4avg)) {
+ if (is_key_frame ||
+ (low_res && vt.split[i].split[j].part_variances.none.variance >
+ threshold_4x4avg)) {
force_split[split_index] = 0;
// Go down to 4x4 down-sampling for variance.
variance4x4downsample[i2 + j] = 1;
@@ -3403,9 +3403,10 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
// Rate and distortion based partition search termination clause.
if (!cpi->sf.ml_partition_search_early_termination &&
- !x->e_mbd.lossless && ((best_rdc.dist < (dist_breakout_thr >> 2)) ||
- (best_rdc.dist < dist_breakout_thr &&
- best_rdc.rate < rate_breakout_thr))) {
+ !x->e_mbd.lossless &&
+ ((best_rdc.dist < (dist_breakout_thr >> 2)) ||
+ (best_rdc.dist < dist_breakout_thr &&
+ best_rdc.rate < rate_breakout_thr))) {
do_rect = 0;
}
}
@@ -4620,8 +4621,9 @@ void vp9_init_tile_data(VP9_COMP *cpi) {
if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) {
if (cpi->tile_data != NULL) vpx_free(cpi->tile_data);
- CHECK_MEM_ERROR(cm, cpi->tile_data, vpx_malloc(tile_cols * tile_rows *
- sizeof(*cpi->tile_data)));
+ CHECK_MEM_ERROR(
+ cm, cpi->tile_data,
+ vpx_malloc(tile_cols * tile_rows * sizeof(*cpi->tile_data)));
cpi->allocated_tiles = tile_cols * tile_rows;
for (tile_row = 0; tile_row < tile_rows; ++tile_row)
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemb.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemb.c
index f3c17f2559f..970077d8943 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemb.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemb.c
@@ -50,7 +50,8 @@ void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
}
static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
- { 10, 6 }, { 8, 5 },
+ { 10, 6 },
+ { 8, 5 },
};
// 'num' can be negative, but 'shift' must be non-negative.
@@ -200,9 +201,9 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
const int band_next = band_translate[i + 1];
const int token_next =
(i + 1 != eob) ? vp9_get_token(qcoeff[scan[i + 1]]) : EOB_TOKEN;
- unsigned int(
- *const token_costs_next)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
- token_costs + band_next;
+ unsigned int(*const token_costs_next)[2][COEFF_CONTEXTS]
+ [ENTROPY_TOKENS] =
+ token_costs + band_next;
token_cache[rc] = vp9_pt_energy_class[t0];
ctx_next = get_coef_context(nb, token_cache, i + 1);
token_tree_sel_next = (x == 0);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c
index 1e91d015512..fd3889ddad2 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c
@@ -65,12 +65,12 @@
#define AM_SEGMENT_ID_INACTIVE 7
#define AM_SEGMENT_ID_ACTIVE 0
-#define ALTREF_HIGH_PRECISION_MV 1 // Whether to use high precision mv
- // for altref computation.
-#define HIGH_PRECISION_MV_QTHRESH 200 // Q threshold for high precision
- // mv. Choose a very high value for
- // now so that HIGH_PRECISION is always
- // chosen.
+// Whether to use high precision mv for altref computation.
+#define ALTREF_HIGH_PRECISION_MV 1
+
+// Q threshold for high precision mv. Choose a very high value for now so that
+// HIGH_PRECISION is always chosen.
+#define HIGH_PRECISION_MV_QTHRESH 200
#define FRAME_SIZE_FACTOR 128 // empirical params for context model threshold
#define FRAME_RATE_FACTOR 8
@@ -547,6 +547,74 @@ static void apply_active_map(VP9_COMP *cpi) {
}
}
+static void apply_roi_map(VP9_COMP *cpi) {
+ VP9_COMMON *cm = &cpi->common;
+ struct segmentation *const seg = &cm->seg;
+ vpx_roi_map_t *roi = &cpi->roi;
+ const int *delta_q = roi->delta_q;
+ const int *delta_lf = roi->delta_lf;
+ const int *skip = roi->skip;
+ int ref_frame[8];
+ int internal_delta_q[MAX_SEGMENTS];
+ int i;
+ static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
+ VP9_ALT_FLAG };
+
+ // TODO(jianj): Investigate why ROI not working in speed < 5 or in non
+ // realtime mode.
+ if (cpi->oxcf.mode != REALTIME || cpi->oxcf.speed < 5) return;
+ if (!roi->enabled) return;
+
+ memcpy(&ref_frame, roi->ref_frame, sizeof(ref_frame));
+
+ vp9_enable_segmentation(seg);
+ vp9_clearall_segfeatures(seg);
+ // Select delta coding method;
+ seg->abs_delta = SEGMENT_DELTADATA;
+
+ memcpy(cpi->segmentation_map, roi->roi_map, (cm->mi_rows * cm->mi_cols));
+
+ for (i = 0; i < MAX_SEGMENTS; ++i) {
+ // Translate the external delta q values to internal values.
+ internal_delta_q[i] = vp9_quantizer_to_qindex(abs(delta_q[i]));
+ if (delta_q[i] < 0) internal_delta_q[i] = -internal_delta_q[i];
+ vp9_disable_segfeature(seg, i, SEG_LVL_ALT_Q);
+ vp9_disable_segfeature(seg, i, SEG_LVL_ALT_LF);
+ if (internal_delta_q[i] != 0) {
+ vp9_enable_segfeature(seg, i, SEG_LVL_ALT_Q);
+ vp9_set_segdata(seg, i, SEG_LVL_ALT_Q, internal_delta_q[i]);
+ }
+ if (delta_lf[i] != 0) {
+ vp9_enable_segfeature(seg, i, SEG_LVL_ALT_LF);
+ vp9_set_segdata(seg, i, SEG_LVL_ALT_LF, delta_lf[i]);
+ }
+ if (skip[i] != 0) {
+ vp9_enable_segfeature(seg, i, SEG_LVL_SKIP);
+ vp9_set_segdata(seg, i, SEG_LVL_SKIP, skip[i]);
+ }
+ if (ref_frame[i] >= 0) {
+ int valid_ref = 1;
+ // ALTREF is not used as reference for nonrd_pickmode with 0 lag.
+ if (ref_frame[i] == ALTREF_FRAME && cpi->sf.use_nonrd_pick_mode)
+ valid_ref = 0;
+ // If GOLDEN is selected, make sure it's set as reference.
+ if (ref_frame[i] == GOLDEN_FRAME &&
+ !(cpi->ref_frame_flags & flag_list[ref_frame[i]])) {
+ valid_ref = 0;
+ }
+ // GOLDEN was updated in previous encoded frame, so GOLDEN and LAST are
+ // same reference.
+ if (ref_frame[i] == GOLDEN_FRAME && cpi->rc.frames_since_golden == 0)
+ ref_frame[i] = LAST_FRAME;
+ if (valid_ref) {
+ vp9_enable_segfeature(seg, i, SEG_LVL_REF_FRAME);
+ vp9_set_segdata(seg, i, SEG_LVL_REF_FRAME, ref_frame[i]);
+ }
+ }
+ }
+ roi->enabled = 1;
+}
+
static void init_level_info(Vp9LevelInfo *level_info) {
Vp9LevelStats *const level_stats = &level_info->level_stats;
Vp9LevelSpec *const level_spec = &level_info->level_spec;
@@ -557,6 +625,13 @@ static void init_level_info(Vp9LevelInfo *level_info) {
level_spec->min_altref_distance = INT_MAX;
}
+static int check_seg_range(int seg_data[8], int range) {
+ return !(abs(seg_data[0]) > range || abs(seg_data[1]) > range ||
+ abs(seg_data[2]) > range || abs(seg_data[3]) > range ||
+ abs(seg_data[4]) > range || abs(seg_data[5]) > range ||
+ abs(seg_data[6]) > range || abs(seg_data[7]) > range);
+}
+
VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec) {
int i;
const Vp9LevelSpec *this_level;
@@ -583,6 +658,61 @@ VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec) {
return (i == VP9_LEVELS) ? LEVEL_UNKNOWN : vp9_level_defs[i].level;
}
+int vp9_set_roi_map(VP9_COMP *cpi, unsigned char *map, unsigned int rows,
+ unsigned int cols, int delta_q[8], int delta_lf[8],
+ int skip[8], int ref_frame[8]) {
+ VP9_COMMON *cm = &cpi->common;
+ vpx_roi_map_t *roi = &cpi->roi;
+ const int range = 63;
+ const int ref_frame_range = 3; // Alt-ref
+ const int skip_range = 1;
+ const int frame_rows = cpi->common.mi_rows;
+ const int frame_cols = cpi->common.mi_cols;
+
+ // Check number of rows and columns match
+ if (frame_rows != (int)rows || frame_cols != (int)cols) {
+ return -1;
+ }
+
+ if (!check_seg_range(delta_q, range) || !check_seg_range(delta_lf, range) ||
+ !check_seg_range(ref_frame, ref_frame_range) ||
+ !check_seg_range(skip, skip_range))
+ return -1;
+
+ // Also disable segmentation if no deltas are specified.
+ if (!map ||
+ (!(delta_q[0] | delta_q[1] | delta_q[2] | delta_q[3] | delta_q[4] |
+ delta_q[5] | delta_q[6] | delta_q[7] | delta_lf[0] | delta_lf[1] |
+ delta_lf[2] | delta_lf[3] | delta_lf[4] | delta_lf[5] | delta_lf[6] |
+ delta_lf[7] | skip[0] | skip[1] | skip[2] | skip[3] | skip[4] |
+ skip[5] | skip[6] | skip[7]) &&
+ (ref_frame[0] == -1 && ref_frame[1] == -1 && ref_frame[2] == -1 &&
+ ref_frame[3] == -1 && ref_frame[4] == -1 && ref_frame[5] == -1 &&
+ ref_frame[6] == -1 && ref_frame[7] == -1))) {
+ vp9_disable_segmentation(&cm->seg);
+ cpi->roi.enabled = 0;
+ return 0;
+ }
+
+ if (roi->roi_map) {
+ vpx_free(roi->roi_map);
+ roi->roi_map = NULL;
+ }
+ CHECK_MEM_ERROR(cm, roi->roi_map, vpx_malloc(rows * cols));
+
+ // Copy to ROI sturcture in the compressor.
+ memcpy(roi->roi_map, map, rows * cols);
+ memcpy(&roi->delta_q, delta_q, MAX_SEGMENTS * sizeof(delta_q[0]));
+ memcpy(&roi->delta_lf, delta_lf, MAX_SEGMENTS * sizeof(delta_lf[0]));
+ memcpy(&roi->skip, skip, MAX_SEGMENTS * sizeof(skip[0]));
+ memcpy(&roi->ref_frame, ref_frame, MAX_SEGMENTS * sizeof(ref_frame[0]));
+ roi->enabled = 1;
+ roi->rows = rows;
+ roi->cols = cols;
+
+ return 0;
+}
+
int vp9_set_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows,
int cols) {
if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols) {
@@ -817,6 +947,9 @@ static void dealloc_compressor_data(VP9_COMP *cpi) {
vpx_free(cpi->active_map.map);
cpi->active_map.map = NULL;
+ vpx_free(cpi->roi.roi_map);
+ cpi->roi.roi_map = NULL;
+
vpx_free(cpi->consec_zero_mv);
cpi->consec_zero_mv = NULL;
@@ -1121,8 +1254,9 @@ static void alloc_util_frame_buffers(VP9_COMP *cpi) {
// For 1 pass cbr: allocate scaled_frame that may be used as an intermediate
// buffer for a 2 stage down-sampling: two stages of 1:2 down-sampling for a
- // target of 1/4x1/4.
- if (is_one_pass_cbr_svc(cpi) && !cpi->svc.scaled_temp_is_alloc) {
+ // target of 1/4x1/4. number_spatial_layers must be greater than 2.
+ if (is_one_pass_cbr_svc(cpi) && !cpi->svc.scaled_temp_is_alloc &&
+ cpi->svc.number_spatial_layers > 2) {
cpi->svc.scaled_temp_is_alloc = 1;
if (vpx_realloc_frame_buffer(
&cpi->svc.scaled_temp, cm->width >> 1, cm->height >> 1,
@@ -2017,8 +2151,9 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
realloc_segmentation_maps(cpi);
- CHECK_MEM_ERROR(cm, cpi->skin_map, vpx_calloc(cm->mi_rows * cm->mi_cols,
- sizeof(cpi->skin_map[0])));
+ CHECK_MEM_ERROR(
+ cm, cpi->skin_map,
+ vpx_calloc(cm->mi_rows * cm->mi_cols, sizeof(cpi->skin_map[0])));
CHECK_MEM_ERROR(cm, cpi->alt_ref_aq, vp9_alt_ref_aq_create());
@@ -3630,6 +3765,8 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
// it may be pretty bad for rate-control,
// and I should handle it somehow
vp9_alt_ref_aq_setup_map(cpi->alt_ref_aq, cpi);
+ } else if (cpi->roi.enabled && cm->frame_type != KEY_FRAME) {
+ apply_roi_map(cpi);
}
apply_active_map(cpi);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h
index d723d93cbc1..2989af35e93 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h
@@ -723,6 +723,8 @@ typedef struct VP9_COMP {
uint8_t *count_arf_frame_usage;
uint8_t *count_lastgolden_frame_usage;
+
+ vpx_roi_map_t roi;
} VP9_COMP;
void vp9_initialize_enc(void);
@@ -868,9 +870,8 @@ static INLINE int is_one_pass_cbr_svc(const struct VP9_COMP *const cpi) {
#if CONFIG_VP9_TEMPORAL_DENOISING
static INLINE int denoise_svc(const struct VP9_COMP *const cpi) {
- return (!cpi->use_svc ||
- (cpi->use_svc &&
- cpi->svc.spatial_layer_id >= cpi->svc.first_layer_denoise));
+ return (!cpi->use_svc || (cpi->use_svc && cpi->svc.spatial_layer_id >=
+ cpi->svc.first_layer_denoise));
}
#endif
@@ -938,6 +939,10 @@ static INLINE int log_tile_cols_from_picsize_level(uint32_t width,
VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec);
+int vp9_set_roi_map(VP9_COMP *cpi, unsigned char *map, unsigned int rows,
+ unsigned int cols, int delta_q[8], int delta_lf[8],
+ int skip[8], int ref_frame[8]);
+
void vp9_new_framerate(VP9_COMP *cpi, double framerate);
void vp9_set_row_mt(VP9_COMP *cpi);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c
index 4093b5529fd..f4fda096584 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c
@@ -731,9 +731,8 @@ static void first_pass_stat_calc(VP9_COMP *cpi, FIRSTPASS_STATS *fps,
// Exclude any image dead zone
if (fp_acc_data->image_data_start_row > 0) {
fp_acc_data->intra_skip_count =
- VPXMAX(0,
- fp_acc_data->intra_skip_count -
- (fp_acc_data->image_data_start_row * cm->mb_cols * 2));
+ VPXMAX(0, fp_acc_data->intra_skip_count -
+ (fp_acc_data->image_data_start_row * cm->mb_cols * 2));
}
fp_acc_data->intra_factor = fp_acc_data->intra_factor / (double)num_mbs;
@@ -2238,9 +2237,6 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
}
gf_group->arf_update_idx[0] = arf_buffer_indices[0];
gf_group->arf_ref_idx[0] = arf_buffer_indices[0];
-
- // Step over the golden frame / overlay frame
- if (EOF == input_stats(twopass, &frame_stats)) return;
}
// Deduct the boost bits for arf (or gf if it is not a key frame)
@@ -2285,7 +2281,8 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
// Define middle frame
mid_frame_idx = frame_index + (rc->baseline_gf_interval >> 1) - 1;
- normal_frames = (rc->baseline_gf_interval - rc->source_alt_ref_pending);
+ normal_frames =
+ rc->baseline_gf_interval - (key_frame || rc->source_alt_ref_pending);
if (normal_frames > 1)
normal_frame_bits = (int)(total_group_bits / normal_frames);
else
@@ -2551,9 +2548,9 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
}
// Break out conditions.
- if (
- // Break at active_max_gf_interval unless almost totally static.
- ((i >= active_max_gf_interval) && (zero_motion_accumulator < 0.995)) ||
+ // Break at maximum of active_max_gf_interval unless almost totally static.
+ if (((twopass->kf_zeromotion_pct < STATIC_KF_GROUP_THRESH) &&
+ (i >= active_max_gf_interval) && (zero_motion_accumulator < 0.995)) ||
(
// Don't break out with a very short interval.
(i >= active_min_gf_interval) &&
@@ -2573,8 +2570,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
rc->constrained_gf_group = (i >= rc->frames_to_key) ? 1 : 0;
// Should we use the alternate reference frame.
- if (allow_alt_ref && (i < cpi->oxcf.lag_in_frames) &&
- (i >= rc->min_gf_interval)) {
+ if ((twopass->kf_zeromotion_pct < STATIC_KF_GROUP_THRESH) && allow_alt_ref &&
+ (i < cpi->oxcf.lag_in_frames) && (i >= rc->min_gf_interval)) {
const int forward_frames = (rc->frames_to_key - i >= i - 1)
? i - 1
: VPXMAX(0, rc->frames_to_key - i);
@@ -2602,7 +2599,10 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
#endif
// Set the interval until the next gf.
- rc->baseline_gf_interval = i - (is_key_frame || rc->source_alt_ref_pending);
+ rc->baseline_gf_interval =
+ (twopass->kf_zeromotion_pct < STATIC_KF_GROUP_THRESH)
+ ? (i - (is_key_frame || rc->source_alt_ref_pending))
+ : i;
// Only encode alt reference frame in temporal base layer. So
// baseline_gf_interval should be multiple of a temporal layer group
@@ -2700,13 +2700,24 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
#endif
}
+// Intra / Inter threshold very low
+#define VERY_LOW_II 1.5
+// Clean slide transitions we expect a sharp single frame spike in error.
+#define ERROR_SPIKE 5.0
+
// Slide show transition detection.
// Tests for case where there is very low error either side of the current frame
// but much higher just for this frame. This can help detect key frames in
// slide shows even where the slides are pictures of different sizes.
+// Also requires that intra and inter errors are very similar to help eliminate
+// harmful false positives.
// It will not help if the transition is a fade or other multi-frame effect.
-static int slide_transition(double this_err, double last_err, double next_err) {
- return (this_err > (last_err * 5.0)) && (this_err > (next_err * 5.0));
+static int slide_transition(const FIRSTPASS_STATS *this_frame,
+ const FIRSTPASS_STATS *last_frame,
+ const FIRSTPASS_STATS *next_frame) {
+ return (this_frame->intra_error < (this_frame->coded_error * VERY_LOW_II)) &&
+ (this_frame->coded_error > (last_frame->coded_error * ERROR_SPIKE)) &&
+ (this_frame->coded_error > (next_frame->coded_error * ERROR_SPIKE));
}
// Threshold for use of the lagging second reference frame. High second ref
@@ -2753,8 +2764,7 @@ static int test_candidate_kf(TWO_PASS *twopass,
if ((this_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) &&
(next_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) &&
((this_frame->pcnt_inter < VERY_LOW_INTER_THRESH) ||
- (slide_transition(this_frame->coded_error, last_frame->coded_error,
- next_frame->coded_error)) ||
+ (slide_transition(this_frame, last_frame, next_frame)) ||
((pcnt_intra > MIN_INTRA_LEVEL) &&
(pcnt_intra > (INTRA_VS_INTER_THRESH * modified_pcnt_inter)) &&
((this_frame->intra_error /
@@ -3019,8 +3029,14 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
double zm_factor;
// Monitor for static sections.
- zero_motion_accumulator = VPXMIN(
- zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame));
+ // First frame in kf group the second ref indicator is invalid.
+ if (i > 0) {
+ zero_motion_accumulator = VPXMIN(
+ zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame));
+ } else {
+ zero_motion_accumulator =
+ next_frame.pcnt_inter - next_frame.pcnt_motion;
+ }
// Factor 0.75-1.25 based on how much of frame is static.
zm_factor = (0.75 + (zero_motion_accumulator / 2.0));
@@ -3056,10 +3072,16 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
twopass->section_intra_rating = calculate_section_intra_ratio(
start_position, twopass->stats_in_end, rc->frames_to_key);
- // Apply various clamps for min and max boost
- rc->kf_boost = VPXMAX((int)boost_score, (rc->frames_to_key * 3));
- rc->kf_boost = VPXMAX(rc->kf_boost, MIN_KF_TOT_BOOST);
- rc->kf_boost = VPXMIN(rc->kf_boost, MAX_KF_TOT_BOOST);
+ // Special case for static / slide show content but dont apply
+ // if the kf group is very short.
+ if ((zero_motion_accumulator > 0.99) && (rc->frames_to_key > 8)) {
+ rc->kf_boost = VPXMAX((rc->frames_to_key * 100), MAX_KF_TOT_BOOST);
+ } else {
+ // Apply various clamps for min and max boost
+ rc->kf_boost = VPXMAX((int)boost_score, (rc->frames_to_key * 3));
+ rc->kf_boost = VPXMAX(rc->kf_boost, MIN_KF_TOT_BOOST);
+ rc->kf_boost = VPXMIN(rc->kf_boost, MAX_KF_TOT_BOOST);
+ }
// Work out how many bits to allocate for the key frame itself.
kf_bits = calculate_boost_bits((rc->frames_to_key - 1), rc->kf_boost,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.h
index 000ecd77926..aa497e3daab 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.h
@@ -120,12 +120,12 @@ typedef enum {
typedef struct {
unsigned char index;
unsigned char first_inter_index;
- RATE_FACTOR_LEVEL rf_level[(MAX_LAG_BUFFERS * 2) + 1];
- FRAME_UPDATE_TYPE update_type[(MAX_LAG_BUFFERS * 2) + 1];
- unsigned char arf_src_offset[(MAX_LAG_BUFFERS * 2) + 1];
- unsigned char arf_update_idx[(MAX_LAG_BUFFERS * 2) + 1];
- unsigned char arf_ref_idx[(MAX_LAG_BUFFERS * 2) + 1];
- int bit_allocation[(MAX_LAG_BUFFERS * 2) + 1];
+ RATE_FACTOR_LEVEL rf_level[MAX_STATIC_GF_GROUP_LENGTH + 1];
+ FRAME_UPDATE_TYPE update_type[MAX_STATIC_GF_GROUP_LENGTH + 1];
+ unsigned char arf_src_offset[MAX_STATIC_GF_GROUP_LENGTH + 1];
+ unsigned char arf_update_idx[MAX_STATIC_GF_GROUP_LENGTH + 1];
+ unsigned char arf_ref_idx[MAX_STATIC_GF_GROUP_LENGTH + 1];
+ int bit_allocation[MAX_STATIC_GF_GROUP_LENGTH + 1];
} GF_GROUP;
typedef struct {
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mbgraph.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mbgraph.h
index df2fb98efa6..c3af972bc00 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mbgraph.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mbgraph.h
@@ -25,7 +25,9 @@ typedef struct {
} ref[MAX_REF_FRAMES];
} MBGRAPH_MB_STATS;
-typedef struct { MBGRAPH_MB_STATS *mb_stats; } MBGRAPH_FRAME_STATS;
+typedef struct {
+ MBGRAPH_MB_STATS *mb_stats;
+} MBGRAPH_FRAME_STATS;
struct VP9_COMP;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c
index 44f01be25a0..37406c23274 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_mcomp.c
@@ -1785,7 +1785,10 @@ static int vector_match(int16_t *ref, int16_t *src, int bwl) {
}
static const MV search_pos[4] = {
- { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 },
+ { -1, 0 },
+ { 0, -1 },
+ { 0, 1 },
+ { 1, 0 },
};
unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
@@ -1876,7 +1879,10 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
{
const uint8_t *const pos[4] = {
- ref_buf - ref_stride, ref_buf - 1, ref_buf + 1, ref_buf + ref_stride,
+ ref_buf - ref_stride,
+ ref_buf - 1,
+ ref_buf + 1,
+ ref_buf + ref_stride,
};
cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, this_sad);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c
index f2f323a282d..212c260fa24 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c
@@ -1488,7 +1488,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
int skip_ref_find_pred[4] = { 0 };
unsigned int sse_zeromv_normalized = UINT_MAX;
unsigned int best_sse_sofar = UINT_MAX;
- unsigned int thresh_svc_skip_golden = 500;
#if CONFIG_VP9_TEMPORAL_DENOISING
VP9_PICKMODE_CTX_DEN ctx_den;
int64_t zero_last_cost_orig = INT64_MAX;
@@ -1496,11 +1495,23 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
#endif
INTERP_FILTER filter_gf_svc = EIGHTTAP;
MV_REFERENCE_FRAME best_second_ref_frame = NONE;
+ const struct segmentation *const seg = &cm->seg;
int comp_modes = 0;
int num_inter_modes = (cpi->use_svc) ? RT_INTER_MODES_SVC : RT_INTER_MODES;
int flag_svc_subpel = 0;
int svc_mv_col = 0;
int svc_mv_row = 0;
+ unsigned int thresh_svc_skip_golden = 500;
+ // Lower the skip threshold if lower spatial layer is better quality relative
+ // to current layer.
+ if (cpi->svc.spatial_layer_id > 0 && cm->base_qindex > 150 &&
+ cm->base_qindex > cpi->svc.lower_layer_qindex + 15)
+ thresh_svc_skip_golden = 100;
+ // Increase skip threshold if lower spatial layer is lower quality relative
+ // to current layer.
+ else if (cpi->svc.spatial_layer_id > 0 && cm->base_qindex < 140 &&
+ cm->base_qindex < cpi->svc.lower_layer_qindex - 20)
+ thresh_svc_skip_golden = 1000;
init_ref_frame_cost(cm, xd, ref_frame_cost);
@@ -1638,6 +1649,16 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
cpi->sf.use_compound_nonrd_pickmode && usable_ref_frame == ALTREF_FRAME)
comp_modes = 2;
+ // If the segment reference frame feature is enabled and it's set to GOLDEN
+ // reference, then make sure we don't skip checking GOLDEN, this is to
+ // prevent possibility of not picking any mode.
+ if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME) &&
+ get_segdata(seg, mi->segment_id, SEG_LVL_REF_FRAME) == GOLDEN_FRAME) {
+ usable_ref_frame = GOLDEN_FRAME;
+ skip_ref_find_pred[GOLDEN_FRAME] = 0;
+ thresh_svc_skip_golden = 0;
+ }
+
for (ref_frame = LAST_FRAME; ref_frame <= usable_ref_frame; ++ref_frame) {
if (!skip_ref_find_pred[ref_frame]) {
find_predictors(cpi, x, ref_frame, frame_mv, const_motion,
@@ -1699,6 +1720,12 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
if (ref_frame > usable_ref_frame) continue;
if (skip_ref_find_pred[ref_frame]) continue;
+ // If the segment reference frame feature is enabled then do nothing if the
+ // current ref frame is not allowed.
+ if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME) &&
+ get_segdata(seg, mi->segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame)
+ continue;
+
if (flag_svc_subpel && ref_frame == GOLDEN_FRAME) {
force_gf_mv = 1;
// Only test mode if NEARESTMV/NEARMV is (svc_mv_col, svc_mv_row),
@@ -1713,7 +1740,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
}
if (comp_pred) {
- const struct segmentation *const seg = &cm->seg;
if (!cpi->allow_comp_inter_inter) continue;
// Skip compound inter modes if ARF is not available.
if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue;
@@ -1785,29 +1811,34 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
continue;
}
- if (sf->reference_masking &&
- !(frame_mv[this_mode][ref_frame].as_int == 0 &&
- ref_frame == LAST_FRAME)) {
- if (usable_ref_frame < ALTREF_FRAME) {
- if (!force_skip_low_temp_var && usable_ref_frame > LAST_FRAME) {
- i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME;
- if ((cpi->ref_frame_flags & flag_list[i]))
- if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1))
- ref_frame_skip_mask |= (1 << ref_frame);
+ // Disable this drop out case if the ref frame segment level feature is
+ // enabled for this segment. This is to prevent the possibility that we end
+ // up unable to pick any mode.
+ if (!segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME)) {
+ if (sf->reference_masking &&
+ !(frame_mv[this_mode][ref_frame].as_int == 0 &&
+ ref_frame == LAST_FRAME)) {
+ if (usable_ref_frame < ALTREF_FRAME) {
+ if (!force_skip_low_temp_var && usable_ref_frame > LAST_FRAME) {
+ i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME;
+ if ((cpi->ref_frame_flags & flag_list[i]))
+ if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1))
+ ref_frame_skip_mask |= (1 << ref_frame);
+ }
+ } else if (!cpi->rc.is_src_frame_alt_ref &&
+ !(frame_mv[this_mode][ref_frame].as_int == 0 &&
+ ref_frame == ALTREF_FRAME)) {
+ int ref1 = (ref_frame == GOLDEN_FRAME) ? LAST_FRAME : GOLDEN_FRAME;
+ int ref2 = (ref_frame == ALTREF_FRAME) ? LAST_FRAME : ALTREF_FRAME;
+ if (((cpi->ref_frame_flags & flag_list[ref1]) &&
+ (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[ref1] << 1))) ||
+ ((cpi->ref_frame_flags & flag_list[ref2]) &&
+ (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[ref2] << 1))))
+ ref_frame_skip_mask |= (1 << ref_frame);
}
- } else if (!cpi->rc.is_src_frame_alt_ref &&
- !(frame_mv[this_mode][ref_frame].as_int == 0 &&
- ref_frame == ALTREF_FRAME)) {
- int ref1 = (ref_frame == GOLDEN_FRAME) ? LAST_FRAME : GOLDEN_FRAME;
- int ref2 = (ref_frame == ALTREF_FRAME) ? LAST_FRAME : ALTREF_FRAME;
- if (((cpi->ref_frame_flags & flag_list[ref1]) &&
- (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[ref1] << 1))) ||
- ((cpi->ref_frame_flags & flag_list[ref2]) &&
- (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[ref2] << 1))))
- ref_frame_skip_mask |= (1 << ref_frame);
}
+ if (ref_frame_skip_mask & (1 << ref_frame)) continue;
}
- if (ref_frame_skip_mask & (1 << ref_frame)) continue;
// Select prediction reference frames.
for (i = 0; i < MAX_MB_PLANE; i++) {
@@ -2202,9 +2233,11 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
// For spatial enhancemanent layer: perform intra prediction only if base
// layer is chosen as the reference. Always perform intra prediction if
- // LAST is the only reference or is_key_frame is set.
+ // LAST is the only reference, or is_key_frame is set, or on base
+ // temporal layer.
if (cpi->svc.spatial_layer_id) {
perform_intra_pred =
+ cpi->svc.temporal_layer_id == 0 ||
cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame ||
!(cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) ||
(!cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
@@ -2214,6 +2247,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR &&
cpi->rc.is_src_frame_alt_ref)
perform_intra_pred = 0;
+
+ // If the segment reference frame feature is enabled and set then
+ // skip the intra prediction.
+ if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME) &&
+ get_segdata(seg, mi->segment_id, SEG_LVL_REF_FRAME) > 0)
+ perform_intra_pred = 0;
+
// Perform intra prediction search, if the best SAD is above a certain
// threshold.
if (best_rdc.rdcost == INT64_MAX ||
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c
index 4a7ccc4e581..2e4c9d2bf43 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c
@@ -31,10 +31,13 @@
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/encoder/vp9_ratectrl.h"
-// Max rate target for 1080P and below encodes under normal circumstances
-// (1920 * 1080 / (16 * 16)) * MAX_MB_RATE bits per MB
+// Max rate per frame for 1080P and below encodes if no level requirement given.
+// For larger formats limit to MAX_MB_RATE bits per MB
+// 4Mbits is derived from the level requirement for level 4 (1080P 30) which
+// requires that HW can sustain a rate of 16Mbits over a 4 frame group.
+// If a lower level requirement is specified then this may over ride this value.
#define MAX_MB_RATE 250
-#define MAXRATE_1080P 2025000
+#define MAXRATE_1080P 4000000
#define DEFAULT_KF_BOOST 2000
#define DEFAULT_GF_BOOST 2000
@@ -1100,6 +1103,9 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index,
// Baseline value derived from cpi->active_worst_quality and kf boost.
active_best_quality =
get_kf_active_quality(rc, active_worst_quality, cm->bit_depth);
+ if (cpi->twopass.kf_zeromotion_pct >= STATIC_KF_GROUP_THRESH) {
+ active_best_quality /= 4;
+ }
// Allow somewhat lower kf minq with small image formats.
if ((cm->width * cm->height) <= (352 * 288)) {
@@ -1490,6 +1496,9 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
if (cm->frame_type != KEY_FRAME) rc->reset_high_source_sad = 0;
rc->last_avg_frame_bandwidth = rc->avg_frame_bandwidth;
+ if (cpi->use_svc &&
+ cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)
+ cpi->svc.lower_layer_qindex = cm->base_qindex;
}
void vp9_rc_postencode_update_drop_frame(VP9_COMP *cpi) {
@@ -1584,9 +1593,8 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) {
// Adjust boost and af_ratio based on avg_frame_low_motion, which varies
// between 0 and 100 (stationary, 100% zero/small motion).
rc->gfu_boost =
- VPXMAX(500,
- DEFAULT_GF_BOOST * (rc->avg_frame_low_motion << 1) /
- (rc->avg_frame_low_motion + 100));
+ VPXMAX(500, DEFAULT_GF_BOOST * (rc->avg_frame_low_motion << 1) /
+ (rc->avg_frame_low_motion + 100));
rc->af_ratio_onepass_vbr = VPXMIN(15, VPXMAX(5, 3 * rc->gfu_boost / 400));
}
adjust_gfint_frame_constraint(cpi, rc->frames_to_key);
@@ -1861,13 +1869,8 @@ void vp9_rc_set_gf_interval_range(const VP9_COMP *const cpi,
rc->max_gf_interval = vp9_rc_get_default_max_gf_interval(
cpi->framerate, rc->min_gf_interval);
- // Extended interval for genuinely static scenes
- rc->static_scene_max_gf_interval = MAX_LAG_BUFFERS * 2;
-
- if (is_altref_enabled(cpi)) {
- if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1)
- rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1;
- }
+ // Extended max interval for genuinely static scenes like slide shows.
+ rc->static_scene_max_gf_interval = MAX_STATIC_GF_GROUP_LENGTH;
if (rc->max_gf_interval > rc->static_scene_max_gf_interval)
rc->max_gf_interval = rc->static_scene_max_gf_interval;
@@ -1911,12 +1914,12 @@ void vp9_rc_update_framerate(VP9_COMP *cpi) {
VPXMAX(rc->min_frame_bandwidth, FRAME_OVERHEAD_BITS);
// A maximum bitrate for a frame is defined.
- // The baseline for this aligns with HW implementations that
- // can support decode of 1080P content up to a bitrate of MAX_MB_RATE bits
- // per 16x16 MB (averaged over a frame). However this limit is extended if
- // a very high rate is given on the command line or the the rate cannnot
- // be acheived because of a user specificed max q (e.g. when the user
- // specifies lossless encode.
+ // However this limit is extended if a very high rate is given on the command
+ // line or the the rate cannnot be acheived because of a user specificed max q
+ // (e.g. when the user specifies lossless encode).
+ //
+ // If a level is specified that requires a lower maximum rate then the level
+ // value take precedence.
vbr_max_bits =
(int)(((int64_t)rc->avg_frame_bandwidth * oxcf->two_pass_vbrmax_section) /
100);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.h
index c1b210677e2..3a40e013810 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.h
@@ -34,6 +34,14 @@ extern "C" {
#define FRAME_OVERHEAD_BITS 200
+// Threshold used to define a KF group as static (e.g. a slide show).
+// Essentially this means that no frame in the group has more than 1% of MBs
+// that are not marked as coded with 0,0 motion in the first pass.
+#define STATIC_KF_GROUP_THRESH 99
+
+// The maximum duration of a GF group that is static (for example a slide show).
+#define MAX_STATIC_GF_GROUP_LENGTH 250
+
typedef enum {
INTER_NORMAL = 0,
INTER_HIGH = 1,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c
index 2ba6378c5e4..90f06720baa 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c
@@ -59,7 +59,9 @@ typedef struct {
MV_REFERENCE_FRAME ref_frame[2];
} MODE_DEFINITION;
-typedef struct { MV_REFERENCE_FRAME ref_frame[2]; } REF_DEFINITION;
+typedef struct {
+ MV_REFERENCE_FRAME ref_frame[2];
+} REF_DEFINITION;
struct rdcost_block_args {
const VP9_COMP *cpi;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.c
index eb39bab25f6..389d48f210c 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.c
@@ -45,8 +45,8 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
svc->ext_lst_fb_idx[sl] = 0;
svc->ext_gld_fb_idx[sl] = 1;
svc->ext_alt_fb_idx[sl] = 2;
- svc->downsample_filter_type[sl] = EIGHTTAP;
- svc->downsample_filter_phase[sl] = 0; // Set to 8 for averaging filter.
+ svc->downsample_filter_type[sl] = BILINEAR;
+ svc->downsample_filter_phase[sl] = 8; // Set to 8 for averaging filter.
}
if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) {
@@ -155,6 +155,8 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi,
int sl, tl, layer = 0, spatial_layer_target;
float bitrate_alloc = 1.0;
+ cpi->svc.temporal_layering_mode = oxcf->temporal_layering_mode;
+
if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) {
for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
for (tl = 0; tl < oxcf->ts_number_layers; ++tl) {
@@ -547,6 +549,8 @@ static void set_flags_and_fb_idx_for_temporal_mode2(VP9_COMP *const cpi) {
if (!spatial_id) {
cpi->ref_frame_flags = VP9_LAST_FLAG;
} else {
+ if (spatial_id == cpi->svc.number_spatial_layers - 1)
+ cpi->ext_refresh_alt_ref_frame = 0;
cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
}
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.h
index 8b708e271a0..87686fe59f9 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_svc_layercontext.h
@@ -103,6 +103,8 @@ typedef struct SVC {
int first_layer_denoise;
int skip_enhancement_layer;
+
+ int lower_layer_qindex;
} SVC;
struct VP9_COMP;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_intrin_sse2.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_intrin_sse2.c
index dbd243ac103..293cdcd675a 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_intrin_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_intrin_sse2.c
@@ -170,13 +170,13 @@ void vp9_fht4x4_sse2(const int16_t *input, tran_low_t *output, int stride,
fadst4_sse2(in);
write_buffer_4x4(output, in);
break;
- case ADST_ADST:
+ default:
+ assert(tx_type == ADST_ADST);
load_buffer_4x4(input, in, stride);
fadst4_sse2(in);
fadst4_sse2(in);
write_buffer_4x4(output, in);
break;
- default: assert(0); break;
}
}
@@ -1097,14 +1097,14 @@ void vp9_fht8x8_sse2(const int16_t *input, tran_low_t *output, int stride,
right_shift_8x8(in, 1);
write_buffer_8x8(output, in, 8);
break;
- case ADST_ADST:
+ default:
+ assert(tx_type == ADST_ADST);
load_buffer_8x8(input, in, stride);
fadst8_sse2(in);
fadst8_sse2(in);
right_shift_8x8(in, 1);
write_buffer_8x8(output, in, 8);
break;
- default: assert(0); break;
}
}
@@ -1963,13 +1963,13 @@ void vp9_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride,
fadst16_sse2(in0, in1);
write_buffer_16x16(output, in0, in1, 16);
break;
- case ADST_ADST:
+ default:
+ assert(tx_type == ADST_ADST);
load_buffer_16x16(input, in0, in1, stride);
fadst16_sse2(in0, in1);
right_shift_16x16(in0, in1);
fadst16_sse2(in0, in1);
write_buffer_16x16(output, in0, in1, 16);
break;
- default: assert(0); break;
}
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_avx2.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_avx2.c
new file mode 100644
index 00000000000..4bebc34d676
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_quantize_avx2.c
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2017 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <immintrin.h> // AVX2
+
+#include "./vp9_rtcd.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_dsp/x86/bitdepth_conversion_avx2.h"
+#include "vpx_dsp/x86/quantize_x86.h"
+
+// Zero fill 8 positions in the output buffer.
+static INLINE void store_zero_tran_low(tran_low_t *a) {
+ const __m256i zero = _mm256_setzero_si256();
+#if CONFIG_VP9_HIGHBITDEPTH
+ _mm256_storeu_si256((__m256i *)(a), zero);
+ _mm256_storeu_si256((__m256i *)(a + 8), zero);
+#else
+ _mm256_storeu_si256((__m256i *)(a), zero);
+#endif
+}
+
+static INLINE __m256i scan_eob_256(const __m256i *iscan_ptr,
+ __m256i *coeff256) {
+ const __m256i iscan = _mm256_loadu_si256(iscan_ptr);
+ const __m256i zero256 = _mm256_setzero_si256();
+#if CONFIG_VP9_HIGHBITDEPTH
+ // The _mm256_packs_epi32() in load_tran_low() packs the 64 bit coeff as
+ // B1 A1 B0 A0. Shuffle to B1 B0 A1 A0 in order to scan eob correctly.
+ const __m256i _coeff256 = _mm256_permute4x64_epi64(*coeff256, 0xd8);
+ const __m256i zero_coeff0 = _mm256_cmpeq_epi16(_coeff256, zero256);
+#else
+ const __m256i zero_coeff0 = _mm256_cmpeq_epi16(*coeff256, zero256);
+#endif
+ const __m256i nzero_coeff0 = _mm256_cmpeq_epi16(zero_coeff0, zero256);
+ // Add one to convert from indices to counts
+ const __m256i iscan_plus_one = _mm256_sub_epi16(iscan, nzero_coeff0);
+ return _mm256_and_si256(iscan_plus_one, nzero_coeff0);
+}
+
+void vp9_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t *round_ptr,
+ const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
+ uint16_t *eob_ptr, const int16_t *scan_ptr,
+ const int16_t *iscan_ptr) {
+ __m128i eob;
+ __m256i round256, quant256, dequant256;
+ __m256i eob256, thr256;
+
+ (void)scan_ptr;
+ (void)skip_block;
+ assert(!skip_block);
+
+ coeff_ptr += n_coeffs;
+ iscan_ptr += n_coeffs;
+ qcoeff_ptr += n_coeffs;
+ dqcoeff_ptr += n_coeffs;
+ n_coeffs = -n_coeffs;
+
+ {
+ __m256i coeff256;
+
+ // Setup global values
+ {
+ const __m128i round = _mm_load_si128((const __m128i *)round_ptr);
+ const __m128i quant = _mm_load_si128((const __m128i *)quant_ptr);
+ const __m128i dequant = _mm_load_si128((const __m128i *)dequant_ptr);
+ round256 = _mm256_castsi128_si256(round);
+ round256 = _mm256_permute4x64_epi64(round256, 0x54);
+
+ quant256 = _mm256_castsi128_si256(quant);
+ quant256 = _mm256_permute4x64_epi64(quant256, 0x54);
+
+ dequant256 = _mm256_castsi128_si256(dequant);
+ dequant256 = _mm256_permute4x64_epi64(dequant256, 0x54);
+ }
+
+ {
+ __m256i qcoeff256;
+ __m256i qtmp256;
+ coeff256 = load_tran_low(coeff_ptr + n_coeffs);
+ qcoeff256 = _mm256_abs_epi16(coeff256);
+ qcoeff256 = _mm256_adds_epi16(qcoeff256, round256);
+ qtmp256 = _mm256_mulhi_epi16(qcoeff256, quant256);
+ qcoeff256 = _mm256_sign_epi16(qtmp256, coeff256);
+ store_tran_low(qcoeff256, qcoeff_ptr + n_coeffs);
+ coeff256 = _mm256_mullo_epi16(qcoeff256, dequant256);
+ store_tran_low(coeff256, dqcoeff_ptr + n_coeffs);
+ }
+
+ eob256 = scan_eob_256((const __m256i *)(iscan_ptr + n_coeffs), &coeff256);
+ n_coeffs += 8 * 2;
+ }
+
+ // remove dc constants
+ dequant256 = _mm256_permute2x128_si256(dequant256, dequant256, 0x31);
+ quant256 = _mm256_permute2x128_si256(quant256, quant256, 0x31);
+ round256 = _mm256_permute2x128_si256(round256, round256, 0x31);
+
+ thr256 = _mm256_srai_epi16(dequant256, 1);
+
+ // AC only loop
+ while (n_coeffs < 0) {
+ __m256i coeff256 = load_tran_low(coeff_ptr + n_coeffs);
+ __m256i qcoeff256 = _mm256_abs_epi16(coeff256);
+ int32_t nzflag =
+ _mm256_movemask_epi8(_mm256_cmpgt_epi16(qcoeff256, thr256));
+
+ if (nzflag) {
+ __m256i qtmp256;
+ qcoeff256 = _mm256_adds_epi16(qcoeff256, round256);
+ qtmp256 = _mm256_mulhi_epi16(qcoeff256, quant256);
+ qcoeff256 = _mm256_sign_epi16(qtmp256, coeff256);
+ store_tran_low(qcoeff256, qcoeff_ptr + n_coeffs);
+ coeff256 = _mm256_mullo_epi16(qcoeff256, dequant256);
+ store_tran_low(coeff256, dqcoeff_ptr + n_coeffs);
+ eob256 = _mm256_max_epi16(
+ eob256,
+ scan_eob_256((const __m256i *)(iscan_ptr + n_coeffs), &coeff256));
+ } else {
+ store_zero_tran_low(qcoeff_ptr + n_coeffs);
+ store_zero_tran_low(dqcoeff_ptr + n_coeffs);
+ }
+ n_coeffs += 8 * 2;
+ }
+
+ eob = _mm_max_epi16(_mm256_castsi256_si128(eob256),
+ _mm256_extracti128_si256(eob256, 1));
+
+ *eob_ptr = accumulate_eob(eob);
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_common.mk b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_common.mk
index 9819fb64102..721e170bfbf 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_common.mk
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_common.mk
@@ -64,10 +64,13 @@ VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.c
VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.h
VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.c
-VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct4x4_msa.c
-VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct8x8_msa.c
-VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct16x16_msa.c
-VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct4x4_msa.c
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct8x8_msa.c
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct16x16_msa.c
+VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht4x4_add_neon.c
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht8x8_add_neon.c
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht_neon.h
ifeq ($(CONFIG_VP9_POSTPROC),yes)
VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_mfqe_msa.c
@@ -78,10 +81,11 @@ ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans4_dspr2.c
VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans8_dspr2.c
VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans16_dspr2.c
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht4x4_add_neon.c
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht8x8_add_neon.c
else
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_highbd_iht4x4_add_neon.c
VP9_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/vp9_highbd_iht4x4_add_sse4.c
+VP9_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/vp9_highbd_iht8x8_add_sse4.c
+VP9_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/vp9_highbd_iht16x16_add_sse4.c
endif
$(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.pl))
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c
index 5bfe9aa0579..7312786a3c3 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_cx_iface.c
@@ -1067,12 +1067,11 @@ static vpx_codec_frame_flags_t get_frame_pkt_flags(const VP9_COMP *cpi,
vpx_codec_frame_flags_t flags = lib_flags << 16;
if (lib_flags & FRAMEFLAGS_KEY ||
- (cpi->use_svc &&
- cpi->svc
- .layer_context[cpi->svc.spatial_layer_id *
- cpi->svc.number_temporal_layers +
- cpi->svc.temporal_layer_id]
- .is_key_frame))
+ (cpi->use_svc && cpi->svc
+ .layer_context[cpi->svc.spatial_layer_id *
+ cpi->svc.number_temporal_layers +
+ cpi->svc.temporal_layer_id]
+ .is_key_frame))
flags |= VPX_FRAME_IS_KEY;
if (cpi->droppable) flags |= VPX_FRAME_IS_DROPPABLE;
@@ -1234,6 +1233,8 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
ctx->pending_frame_magnitude |= size;
cx_data += size;
cx_data_sz -= size;
+ pkt.data.frame.width[cpi->svc.spatial_layer_id] = cpi->common.width;
+ pkt.data.frame.height[cpi->svc.spatial_layer_id] = cpi->common.height;
if (ctx->output_cx_pkt_cb.output_cx_pkt) {
pkt.kind = VPX_CODEC_CX_FRAME_PKT;
@@ -1260,8 +1261,8 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
pkt.data.frame.duration = (unsigned long)ticks_to_timebase_units(
timebase, dst_end_time_stamp - dst_time_stamp);
pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags);
- pkt.data.frame.width = cpi->common.width;
- pkt.data.frame.height = cpi->common.height;
+ pkt.data.frame.width[cpi->svc.spatial_layer_id] = cpi->common.width;
+ pkt.data.frame.height[cpi->svc.spatial_layer_id] = cpi->common.height;
if (ctx->pending_cx_data) {
if (size) ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
@@ -1414,11 +1415,21 @@ static vpx_image_t *encoder_get_preview(vpx_codec_alg_priv_t *ctx) {
static vpx_codec_err_t ctrl_set_roi_map(vpx_codec_alg_priv_t *ctx,
va_list args) {
- (void)ctx;
- (void)args;
+ vpx_roi_map_t *data = va_arg(args, vpx_roi_map_t *);
+
+ if (data) {
+ vpx_roi_map_t *roi = (vpx_roi_map_t *)data;
- // TODO(yaowu): Need to re-implement and test for VP9.
- return VPX_CODEC_INVALID_PARAM;
+ if (!vp9_set_roi_map(ctx->cpi, roi->roi_map, roi->rows, roi->cols,
+ roi->delta_q, roi->delta_lf, roi->skip,
+ roi->ref_frame)) {
+ return VPX_CODEC_OK;
+ } else {
+ return VPX_CODEC_INVALID_PARAM;
+ }
+ } else {
+ return VPX_CODEC_INVALID_PARAM;
+ }
}
static vpx_codec_err_t ctrl_set_active_map(vpx_codec_alg_priv_t *ctx,
@@ -1608,7 +1619,7 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
// Setters
{ VP8_SET_REFERENCE, ctrl_set_reference },
{ VP8_SET_POSTPROC, ctrl_set_previewpp },
- { VP8E_SET_ROI_MAP, ctrl_set_roi_map },
+ { VP9E_SET_ROI_MAP, ctrl_set_roi_map },
{ VP8E_SET_ACTIVEMAP, ctrl_set_active_map },
{ VP8E_SET_SCALEMODE, ctrl_set_scale_mode },
{ VP8E_SET_CPUUSED, ctrl_set_cpuused },
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/vp9cx.mk b/chromium/third_party/libvpx/source/libvpx/vp9/vp9cx.mk
index d633ed1429d..6186d461438 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/vp9cx.mk
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/vp9cx.mk
@@ -103,6 +103,7 @@ VP9_CX_SRCS-yes += encoder/vp9_mbgraph.h
VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/temporal_filter_sse4.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_quantize_sse2.c
+VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_quantize_avx2.c
VP9_CX_SRCS-$(HAVE_AVX) += encoder/x86/vp9_diamond_search_sad_avx.c
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_block_error_intrin_sse2.c
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx/src/vpx_encoder.c b/chromium/third_party/libvpx/source/libvpx/vpx/src/vpx_encoder.c
index 4390cf7c8f1..1cf2dca695a 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx/src/vpx_encoder.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx/src/vpx_encoder.c
@@ -12,8 +12,11 @@
* \brief Provides the high level interface to wrap encoder algorithms.
*
*/
+#include <assert.h>
#include <limits.h>
+#include <stdlib.h>
#include <string.h>
+#include "vp8/common/blockd.h"
#include "vpx_config.h"
#include "vpx/internal/vpx_codec_internal.h"
@@ -81,6 +84,8 @@ vpx_codec_err_t vpx_codec_enc_init_multi_ver(
int i;
void *mem_loc = NULL;
+ if (iface->enc.mr_get_mem_loc == NULL) return VPX_CODEC_INCAPABLE;
+
if (!(res = iface->enc.mr_get_mem_loc(cfg, &mem_loc))) {
for (i = 0; i < num_enc; i++) {
vpx_codec_priv_enc_mr_cfg_t mr_cfg;
@@ -89,28 +94,27 @@ vpx_codec_err_t vpx_codec_enc_init_multi_ver(
if (dsf->num < 1 || dsf->num > 4096 || dsf->den < 1 ||
dsf->den > dsf->num) {
res = VPX_CODEC_INVALID_PARAM;
- break;
+ } else {
+ mr_cfg.mr_low_res_mode_info = mem_loc;
+ mr_cfg.mr_total_resolutions = num_enc;
+ mr_cfg.mr_encoder_id = num_enc - 1 - i;
+ mr_cfg.mr_down_sampling_factor.num = dsf->num;
+ mr_cfg.mr_down_sampling_factor.den = dsf->den;
+
+ /* Force Key-frame synchronization. Namely, encoder at higher
+ * resolution always use the same frame_type chosen by the
+ * lowest-resolution encoder.
+ */
+ if (mr_cfg.mr_encoder_id) cfg->kf_mode = VPX_KF_DISABLED;
+
+ ctx->iface = iface;
+ ctx->name = iface->name;
+ ctx->priv = NULL;
+ ctx->init_flags = flags;
+ ctx->config.enc = cfg;
+ res = ctx->iface->init(ctx, &mr_cfg);
}
- mr_cfg.mr_low_res_mode_info = mem_loc;
- mr_cfg.mr_total_resolutions = num_enc;
- mr_cfg.mr_encoder_id = num_enc - 1 - i;
- mr_cfg.mr_down_sampling_factor.num = dsf->num;
- mr_cfg.mr_down_sampling_factor.den = dsf->den;
-
- /* Force Key-frame synchronization. Namely, encoder at higher
- * resolution always use the same frame_type chosen by the
- * lowest-resolution encoder.
- */
- if (mr_cfg.mr_encoder_id) cfg->kf_mode = VPX_KF_DISABLED;
-
- ctx->iface = iface;
- ctx->name = iface->name;
- ctx->priv = NULL;
- ctx->init_flags = flags;
- ctx->config.enc = cfg;
- res = ctx->iface->init(ctx, &mr_cfg);
-
if (res) {
const char *error_detail = ctx->priv ? ctx->priv->err_detail : NULL;
/* Destroy current ctx */
@@ -124,10 +128,14 @@ vpx_codec_err_t vpx_codec_enc_init_multi_ver(
vpx_codec_destroy(ctx);
i--;
}
+#if CONFIG_MULTI_RES_ENCODING
+ assert(mem_loc);
+ free(((LOWER_RES_FRAME_INFO *)mem_loc)->mb_info);
+ free(mem_loc);
+#endif
+ return SAVE_STATUS(ctx, res);
}
- if (res) break;
-
ctx++;
cfg++;
dsf++;
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx/vp8cx.h b/chromium/third_party/libvpx/source/libvpx/vpx/vp8cx.h
index 6c67ec24f2c..261ed861414 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx/vp8cx.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx/vp8cx.h
@@ -125,7 +125,7 @@ extern vpx_codec_iface_t *vpx_codec_vp9_cx(void);
enum vp8e_enc_control_id {
/*!\brief Codec control function to pass an ROI map to encoder.
*
- * Supported in codecs: VP8, VP9
+ * Supported in codecs: VP8
*/
VP8E_SET_ROI_MAP = 8,
@@ -423,6 +423,12 @@ enum vp8e_enc_control_id {
*/
VP9E_SET_SVC,
+ /*!\brief Codec control function to pass an ROI map to encoder.
+ *
+ * Supported in codecs: VP9
+ */
+ VP9E_SET_ROI_MAP,
+
/*!\brief Codec control function to set parameters for SVC.
* \note Parameters contain min_q, max_q, scaling factor for each of the
* SVC layers.
@@ -643,16 +649,20 @@ typedef enum vp9e_temporal_layering_mode {
*/
typedef struct vpx_roi_map {
- /*! An id between 0 and 3 for each 16x16 region within a frame. */
+ /*! If ROI is enabled. */
+ uint8_t enabled;
+ /*! An id between 0-3 (0-7 for vp9) for each 16x16 (8x8 for VP9)
+ * region within a frame. */
unsigned char *roi_map;
unsigned int rows; /**< Number of rows. */
unsigned int cols; /**< Number of columns. */
- // TODO(paulwilkins): broken for VP9 which has 8 segments
- // q and loop filter deltas for each segment
- // (see MAX_MB_SEGMENTS)
- int delta_q[4]; /**< Quantizer deltas. */
- int delta_lf[4]; /**< Loop filter deltas. */
- /*! Static breakout threshold for each segment. */
+ /*! VP8 only uses the first 4 segments. VP9 uses 8 segments. */
+ int delta_q[8]; /**< Quantizer deltas. */
+ int delta_lf[8]; /**< Loop filter deltas. */
+ /*! skip and ref frame segment is only used in VP9. */
+ int skip[8]; /**< Skip this block. */
+ int ref_frame[8]; /**< Reference frame for this block. */
+ /*! Static breakout threshold for each segment. Only used in VP8. */
unsigned int static_threshold[4];
} vpx_roi_map_t;
@@ -749,6 +759,8 @@ VPX_CTRL_USE_TYPE(VP8E_SET_TEMPORAL_LAYER_ID, int)
#define VPX_CTRL_VP8E_SET_TEMPORAL_LAYER_ID
VPX_CTRL_USE_TYPE(VP8E_SET_ROI_MAP, vpx_roi_map_t *)
#define VPX_CTRL_VP8E_SET_ROI_MAP
+VPX_CTRL_USE_TYPE(VP9E_SET_ROI_MAP, vpx_roi_map_t *)
+#define VPX_CTRL_VP9E_SET_ROI_MAP
VPX_CTRL_USE_TYPE(VP8E_SET_ACTIVEMAP, vpx_active_map_t *)
#define VPX_CTRL_VP8E_SET_ACTIVEMAP
VPX_CTRL_USE_TYPE(VP8E_SET_SCALEMODE, vpx_scaling_mode_t *)
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx/vpx_encoder.h b/chromium/third_party/libvpx/source/libvpx/vpx/vpx_encoder.h
index de964eb46fe..f9a2ed374d1 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx/vpx_encoder.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx/vpx_encoder.h
@@ -63,7 +63,7 @@ extern "C" {
* fields to structures
*/
#define VPX_ENCODER_ABI_VERSION \
- (7 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/
+ (8 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/
/*! \brief Encoder capabilities bitfield
*
@@ -182,8 +182,10 @@ typedef struct vpx_codec_cx_pkt {
* Only applicable when "output partition" mode is enabled. First
* partition has id 0.*/
int partition_id;
- unsigned int width; /**< frame width */
- unsigned int height; /**< frame height */
+ /*!\brief Width and height of frames in this packet. VP8 will only use the
+ * first one.*/
+ unsigned int width[VPX_SS_MAX_LAYERS]; /**< frame width */
+ unsigned int height[VPX_SS_MAX_LAYERS]; /**< frame height */
} frame; /**< data for compressed frame packet */
vpx_fixed_buf_t twopass_stats; /**< data for two-pass packet */
vpx_fixed_buf_t firstpass_mb_stats; /**< first pass mb packet */
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx/vpx_frame_buffer.h b/chromium/third_party/libvpx/source/libvpx/vpx/vpx_frame_buffer.h
index ad70cdd572b..ac17c529df0 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx/vpx_frame_buffer.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx/vpx_frame_buffer.h
@@ -57,7 +57,7 @@ typedef struct vpx_codec_frame_buffer {
* return 0. Any failure the callback must return a value less than 0.
*
* \param[in] priv Callback's private data
- * \param[in] new_size Size in bytes needed by the buffer
+ * \param[in] min_size Size in bytes needed by the buffer
* \param[in,out] fb Pointer to vpx_codec_frame_buffer_t
*/
typedef int (*vpx_get_frame_buffer_cb_fn_t)(void *priv, size_t min_size,
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_idct16x16_add_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_idct16x16_add_neon.c
index 5358839b538..3fa2f9e28f3 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_idct16x16_add_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_idct16x16_add_neon.c
@@ -14,58 +14,33 @@
#include "vpx_dsp/arm/idct_neon.h"
#include "vpx_dsp/inv_txfm.h"
-static INLINE void highbd_idct16x16_add_wrap_low_8x2(const int64x2x2_t *const t,
- int32x4x2_t *const d0,
- int32x4x2_t *const d1) {
- int32x2x2_t t32[4];
-
- t32[0].val[0] = vrshrn_n_s64(t[0].val[0], DCT_CONST_BITS);
- t32[0].val[1] = vrshrn_n_s64(t[0].val[1], DCT_CONST_BITS);
- t32[1].val[0] = vrshrn_n_s64(t[1].val[0], DCT_CONST_BITS);
- t32[1].val[1] = vrshrn_n_s64(t[1].val[1], DCT_CONST_BITS);
- t32[2].val[0] = vrshrn_n_s64(t[2].val[0], DCT_CONST_BITS);
- t32[2].val[1] = vrshrn_n_s64(t[2].val[1], DCT_CONST_BITS);
- t32[3].val[0] = vrshrn_n_s64(t[3].val[0], DCT_CONST_BITS);
- t32[3].val[1] = vrshrn_n_s64(t[3].val[1], DCT_CONST_BITS);
- d0->val[0] = vcombine_s32(t32[0].val[0], t32[0].val[1]);
- d0->val[1] = vcombine_s32(t32[1].val[0], t32[1].val[1]);
- d1->val[0] = vcombine_s32(t32[2].val[0], t32[2].val[1]);
- d1->val[1] = vcombine_s32(t32[3].val[0], t32[3].val[1]);
+static INLINE int32x4_t dct_const_round_shift_high_4(const int64x2x2_t in) {
+ int32x2x2_t t32;
+
+ t32.val[0] = vrshrn_n_s64(in.val[0], DCT_CONST_BITS);
+ t32.val[1] = vrshrn_n_s64(in.val[1], DCT_CONST_BITS);
+ return vcombine_s32(t32.val[0], t32.val[1]);
}
-static INLINE void highbd_idct16x16_add_wrap_low_4x2(const int64x2x2_t *const t,
- int32x4_t *const d0,
- int32x4_t *const d1) {
- int32x2x2_t t32[2];
-
- t32[0].val[0] = vrshrn_n_s64(t[0].val[0], DCT_CONST_BITS);
- t32[0].val[1] = vrshrn_n_s64(t[0].val[1], DCT_CONST_BITS);
- t32[1].val[0] = vrshrn_n_s64(t[1].val[0], DCT_CONST_BITS);
- t32[1].val[1] = vrshrn_n_s64(t[1].val[1], DCT_CONST_BITS);
- *d0 = vcombine_s32(t32[0].val[0], t32[0].val[1]);
- *d1 = vcombine_s32(t32[1].val[0], t32[1].val[1]);
+static INLINE void dct_const_round_shift_high_4_dual(
+ const int64x2x2_t *const in, int32x4_t *const d0, int32x4_t *const d1) {
+ *d0 = dct_const_round_shift_high_4(in[0]);
+ *d1 = dct_const_round_shift_high_4(in[1]);
}
static INLINE int32x4x2_t
-highbd_idct16x16_add_wrap_low_8x1(const int64x2x2_t *const t) {
- int32x2x2_t t32[2];
- int32x4x2_t d;
-
- t32[0].val[0] = vrshrn_n_s64(t[0].val[0], DCT_CONST_BITS);
- t32[0].val[1] = vrshrn_n_s64(t[0].val[1], DCT_CONST_BITS);
- t32[1].val[0] = vrshrn_n_s64(t[1].val[0], DCT_CONST_BITS);
- t32[1].val[1] = vrshrn_n_s64(t[1].val[1], DCT_CONST_BITS);
- d.val[0] = vcombine_s32(t32[0].val[0], t32[0].val[1]);
- d.val[1] = vcombine_s32(t32[1].val[0], t32[1].val[1]);
- return d;
+dct_const_round_shift_high_4x2_int64x2x2(const int64x2x2_t *const in) {
+ int32x4x2_t out;
+ out.val[0] = dct_const_round_shift_high_4(in[0]);
+ out.val[1] = dct_const_round_shift_high_4(in[1]);
+ return out;
}
-static INLINE int32x4_t highbd_idct16x16_add_wrap_low_4x1(const int64x2x2_t t) {
- int32x2x2_t t32;
-
- t32.val[0] = vrshrn_n_s64(t.val[0], DCT_CONST_BITS);
- t32.val[1] = vrshrn_n_s64(t.val[1], DCT_CONST_BITS);
- return vcombine_s32(t32.val[0], t32.val[1]);
+static INLINE void dct_const_round_shift_high_4x2x2(const int64x2x2_t *const in,
+ int32x4x2_t *const d0,
+ int32x4x2_t *const d1) {
+ *d0 = dct_const_round_shift_high_4x2_int64x2x2(in + 0);
+ *d1 = dct_const_round_shift_high_4x2_int64x2x2(in + 2);
}
static INLINE void highbd_idct_cospi_2_30(const int32x4x2_t s0,
@@ -107,7 +82,7 @@ static INLINE void highbd_idct_cospi_2_30(const int32x4x2_t s0,
vget_low_s32(cospi_2_30_10_22), 0);
t[3].val[1] = vmlal_lane_s32(t[3].val[1], vget_high_s32(s0.val[1]),
vget_low_s32(cospi_2_30_10_22), 0);
- highbd_idct16x16_add_wrap_low_8x2(t, d0, d1);
+ dct_const_round_shift_high_4x2x2(t, d0, d1);
}
static INLINE void highbd_idct_cospi_4_28(const int32x4x2_t s0,
@@ -149,7 +124,7 @@ static INLINE void highbd_idct_cospi_4_28(const int32x4x2_t s0,
vget_low_s32(cospi_4_12_20N_28), 0);
t[3].val[1] = vmlal_lane_s32(t[3].val[1], vget_high_s32(s0.val[1]),
vget_low_s32(cospi_4_12_20N_28), 0);
- highbd_idct16x16_add_wrap_low_8x2(t, d0, d1);
+ dct_const_round_shift_high_4x2x2(t, d0, d1);
}
static INLINE void highbd_idct_cospi_6_26(const int32x4x2_t s0,
@@ -191,7 +166,7 @@ static INLINE void highbd_idct_cospi_6_26(const int32x4x2_t s0,
vget_low_s32(cospi_6_26N_14_18N), 1);
t[3].val[1] = vmlsl_lane_s32(t[3].val[1], vget_high_s32(s0.val[1]),
vget_low_s32(cospi_6_26N_14_18N), 1);
- highbd_idct16x16_add_wrap_low_8x2(t, d0, d1);
+ dct_const_round_shift_high_4x2x2(t, d0, d1);
}
static INLINE void highbd_idct_cospi_10_22(const int32x4x2_t s0,
@@ -233,7 +208,7 @@ static INLINE void highbd_idct_cospi_10_22(const int32x4x2_t s0,
vget_high_s32(cospi_2_30_10_22), 0);
t[3].val[1] = vmlal_lane_s32(t[3].val[1], vget_high_s32(s0.val[1]),
vget_high_s32(cospi_2_30_10_22), 0);
- highbd_idct16x16_add_wrap_low_8x2(t, d0, d1);
+ dct_const_round_shift_high_4x2x2(t, d0, d1);
}
static INLINE void highbd_idct_cospi_12_20(const int32x4x2_t s0,
@@ -275,7 +250,7 @@ static INLINE void highbd_idct_cospi_12_20(const int32x4x2_t s0,
vget_high_s32(cospi_4_12_20N_28), 0);
t[3].val[1] = vmlsl_lane_s32(t[3].val[1], vget_high_s32(s0.val[1]),
vget_high_s32(cospi_4_12_20N_28), 0);
- highbd_idct16x16_add_wrap_low_8x2(t, d0, d1);
+ dct_const_round_shift_high_4x2x2(t, d0, d1);
}
static INLINE void highbd_idct_cospi_14_18(const int32x4x2_t s0,
@@ -317,7 +292,7 @@ static INLINE void highbd_idct_cospi_14_18(const int32x4x2_t s0,
vget_high_s32(cospi_6_26N_14_18N), 1);
t[3].val[1] = vmlsl_lane_s32(t[3].val[1], vget_high_s32(s0.val[1]),
vget_high_s32(cospi_6_26N_14_18N), 1);
- highbd_idct16x16_add_wrap_low_8x2(t, d0, d1);
+ dct_const_round_shift_high_4x2x2(t, d0, d1);
}
static INLINE void highbd_idct_cospi_8_24_q_kernel(
@@ -386,7 +361,7 @@ static INLINE void highbd_idct_cospi_8_24_q(const int32x4x2_t s0,
int64x2x2_t t[4];
highbd_idct_cospi_8_24_q_kernel(s0, s1, cospi_0_8_16_24, t);
- highbd_idct16x16_add_wrap_low_8x2(t, d0, d1);
+ dct_const_round_shift_high_4x2x2(t, d0, d1);
}
static INLINE void highbd_idct_cospi_8_24_d(const int32x4_t s0,
@@ -397,7 +372,7 @@ static INLINE void highbd_idct_cospi_8_24_d(const int32x4_t s0,
int64x2x2_t t[2];
highbd_idct_cospi_8_24_d_kernel(s0, s1, cospi_0_8_16_24, t);
- highbd_idct16x16_add_wrap_low_4x2(t, d0, d1);
+ dct_const_round_shift_high_4_dual(t, d0, d1);
}
static INLINE void highbd_idct_cospi_8_24_neg_q(const int32x4x2_t s0,
@@ -412,7 +387,7 @@ static INLINE void highbd_idct_cospi_8_24_neg_q(const int32x4x2_t s0,
t[2].val[1] = vsubq_s64(vdupq_n_s64(0), t[2].val[1]);
t[3].val[0] = vsubq_s64(vdupq_n_s64(0), t[3].val[0]);
t[3].val[1] = vsubq_s64(vdupq_n_s64(0), t[3].val[1]);
- highbd_idct16x16_add_wrap_low_8x2(t, d0, d1);
+ dct_const_round_shift_high_4x2x2(t, d0, d1);
}
static INLINE void highbd_idct_cospi_8_24_neg_d(const int32x4_t s0,
@@ -425,7 +400,7 @@ static INLINE void highbd_idct_cospi_8_24_neg_d(const int32x4_t s0,
highbd_idct_cospi_8_24_d_kernel(s0, s1, cospi_0_8_16_24, t);
t[1].val[0] = vsubq_s64(vdupq_n_s64(0), t[1].val[0]);
t[1].val[1] = vsubq_s64(vdupq_n_s64(0), t[1].val[1]);
- highbd_idct16x16_add_wrap_low_4x2(t, d0, d1);
+ dct_const_round_shift_high_4_dual(t, d0, d1);
}
static INLINE void highbd_idct_cospi_16_16_q(const int32x4x2_t s0,
@@ -459,7 +434,7 @@ static INLINE void highbd_idct_cospi_16_16_q(const int32x4x2_t s0,
vget_high_s32(cospi_0_8_16_24), 0);
t[3].val[1] = vmlal_lane_s32(t[5].val[1], vget_high_s32(s0.val[1]),
vget_high_s32(cospi_0_8_16_24), 0);
- highbd_idct16x16_add_wrap_low_8x2(t, d0, d1);
+ dct_const_round_shift_high_4x2x2(t, d0, d1);
}
static INLINE void highbd_idct_cospi_16_16_d(const int32x4_t s0,
@@ -481,7 +456,7 @@ static INLINE void highbd_idct_cospi_16_16_d(const int32x4_t s0,
vget_high_s32(cospi_0_8_16_24), 0);
t[1].val[1] = vmlal_lane_s32(t[2].val[1], vget_high_s32(s0),
vget_high_s32(cospi_0_8_16_24), 0);
- highbd_idct16x16_add_wrap_low_4x2(t, d0, d1);
+ dct_const_round_shift_high_4_dual(t, d0, d1);
}
static INLINE void highbd_idct16x16_add_stage7_dual(
@@ -815,7 +790,7 @@ static INLINE int32x4x2_t highbd_idct_cospi_lane0_dual(const int32x4x2_t s,
t[0].val[1] = vmull_lane_s32(vget_high_s32(s.val[0]), coef, 0);
t[1].val[0] = vmull_lane_s32(vget_low_s32(s.val[1]), coef, 0);
t[1].val[1] = vmull_lane_s32(vget_high_s32(s.val[1]), coef, 0);
- return highbd_idct16x16_add_wrap_low_8x1(t);
+ return dct_const_round_shift_high_4x2_int64x2x2(t);
}
static INLINE int32x4_t highbd_idct_cospi_lane0(const int32x4_t s,
@@ -824,7 +799,7 @@ static INLINE int32x4_t highbd_idct_cospi_lane0(const int32x4_t s,
t.val[0] = vmull_lane_s32(vget_low_s32(s), coef, 0);
t.val[1] = vmull_lane_s32(vget_high_s32(s), coef, 0);
- return highbd_idct16x16_add_wrap_low_4x1(t);
+ return dct_const_round_shift_high_4(t);
}
static INLINE int32x4x2_t highbd_idct_cospi_lane1_dual(const int32x4x2_t s,
@@ -835,7 +810,7 @@ static INLINE int32x4x2_t highbd_idct_cospi_lane1_dual(const int32x4x2_t s,
t[0].val[1] = vmull_lane_s32(vget_high_s32(s.val[0]), coef, 1);
t[1].val[0] = vmull_lane_s32(vget_low_s32(s.val[1]), coef, 1);
t[1].val[1] = vmull_lane_s32(vget_high_s32(s.val[1]), coef, 1);
- return highbd_idct16x16_add_wrap_low_8x1(t);
+ return dct_const_round_shift_high_4x2_int64x2x2(t);
}
static INLINE int32x4_t highbd_idct_cospi_lane1(const int32x4_t s,
@@ -844,7 +819,7 @@ static INLINE int32x4_t highbd_idct_cospi_lane1(const int32x4_t s,
t.val[0] = vmull_lane_s32(vget_low_s32(s), coef, 1);
t.val[1] = vmull_lane_s32(vget_high_s32(s), coef, 1);
- return highbd_idct16x16_add_wrap_low_4x1(t);
+ return dct_const_round_shift_high_4(t);
}
static void vpx_highbd_idct16x16_38_add_half1d(const int32_t *input,
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_idct32x32_1024_add_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_idct32x32_1024_add_neon.c
index 96a55c472f6..5b36f733678 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_idct32x32_1024_add_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_idct32x32_1024_add_neon.c
@@ -124,83 +124,77 @@ static INLINE void do_butterfly(const int32x4x2_t qIn0, const int32x4x2_t qIn1,
vrshrn_n_s64(q[3].val[1], DCT_CONST_BITS));
}
-static INLINE void load_s32x4q_dual(
- const int32_t *in, int32x4x2_t *const s0, int32x4x2_t *const s1,
- int32x4x2_t *const s2, int32x4x2_t *const s3, int32x4x2_t *const s4,
- int32x4x2_t *const s5, int32x4x2_t *const s6, int32x4x2_t *const s7) {
- s0->val[0] = vld1q_s32(in);
- s0->val[1] = vld1q_s32(in + 4);
+static INLINE void load_s32x4q_dual(const int32_t *in, int32x4x2_t *const s) {
+ s[0].val[0] = vld1q_s32(in);
+ s[0].val[1] = vld1q_s32(in + 4);
in += 32;
- s1->val[0] = vld1q_s32(in);
- s1->val[1] = vld1q_s32(in + 4);
+ s[1].val[0] = vld1q_s32(in);
+ s[1].val[1] = vld1q_s32(in + 4);
in += 32;
- s2->val[0] = vld1q_s32(in);
- s2->val[1] = vld1q_s32(in + 4);
+ s[2].val[0] = vld1q_s32(in);
+ s[2].val[1] = vld1q_s32(in + 4);
in += 32;
- s3->val[0] = vld1q_s32(in);
- s3->val[1] = vld1q_s32(in + 4);
+ s[3].val[0] = vld1q_s32(in);
+ s[3].val[1] = vld1q_s32(in + 4);
in += 32;
- s4->val[0] = vld1q_s32(in);
- s4->val[1] = vld1q_s32(in + 4);
+ s[4].val[0] = vld1q_s32(in);
+ s[4].val[1] = vld1q_s32(in + 4);
in += 32;
- s5->val[0] = vld1q_s32(in);
- s5->val[1] = vld1q_s32(in + 4);
+ s[5].val[0] = vld1q_s32(in);
+ s[5].val[1] = vld1q_s32(in + 4);
in += 32;
- s6->val[0] = vld1q_s32(in);
- s6->val[1] = vld1q_s32(in + 4);
+ s[6].val[0] = vld1q_s32(in);
+ s[6].val[1] = vld1q_s32(in + 4);
in += 32;
- s7->val[0] = vld1q_s32(in);
- s7->val[1] = vld1q_s32(in + 4);
+ s[7].val[0] = vld1q_s32(in);
+ s[7].val[1] = vld1q_s32(in + 4);
}
-static INLINE void transpose_and_store_s32_8x8(int32x4x2_t a0, int32x4x2_t a1,
- int32x4x2_t a2, int32x4x2_t a3,
- int32x4x2_t a4, int32x4x2_t a5,
- int32x4x2_t a6, int32x4x2_t a7,
+static INLINE void transpose_and_store_s32_8x8(int32x4x2_t *const a,
int32_t **out) {
- transpose_s32_8x8(&a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7);
+ transpose_s32_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
- vst1q_s32(*out, a0.val[0]);
+ vst1q_s32(*out, a[0].val[0]);
*out += 4;
- vst1q_s32(*out, a0.val[1]);
+ vst1q_s32(*out, a[0].val[1]);
*out += 4;
- vst1q_s32(*out, a1.val[0]);
+ vst1q_s32(*out, a[1].val[0]);
*out += 4;
- vst1q_s32(*out, a1.val[1]);
+ vst1q_s32(*out, a[1].val[1]);
*out += 4;
- vst1q_s32(*out, a2.val[0]);
+ vst1q_s32(*out, a[2].val[0]);
*out += 4;
- vst1q_s32(*out, a2.val[1]);
+ vst1q_s32(*out, a[2].val[1]);
*out += 4;
- vst1q_s32(*out, a3.val[0]);
+ vst1q_s32(*out, a[3].val[0]);
*out += 4;
- vst1q_s32(*out, a3.val[1]);
+ vst1q_s32(*out, a[3].val[1]);
*out += 4;
- vst1q_s32(*out, a4.val[0]);
+ vst1q_s32(*out, a[4].val[0]);
*out += 4;
- vst1q_s32(*out, a4.val[1]);
+ vst1q_s32(*out, a[4].val[1]);
*out += 4;
- vst1q_s32(*out, a5.val[0]);
+ vst1q_s32(*out, a[5].val[0]);
*out += 4;
- vst1q_s32(*out, a5.val[1]);
+ vst1q_s32(*out, a[5].val[1]);
*out += 4;
- vst1q_s32(*out, a6.val[0]);
+ vst1q_s32(*out, a[6].val[0]);
*out += 4;
- vst1q_s32(*out, a6.val[1]);
+ vst1q_s32(*out, a[6].val[1]);
*out += 4;
- vst1q_s32(*out, a7.val[0]);
+ vst1q_s32(*out, a[7].val[0]);
*out += 4;
- vst1q_s32(*out, a7.val[1]);
+ vst1q_s32(*out, a[7].val[1]);
*out += 4;
}
static INLINE void idct32_transpose_pair(const int32_t *input, int32_t *t_buf) {
int i;
- int32x4x2_t s0, s1, s2, s3, s4, s5, s6, s7;
+ int32x4x2_t s[8];
for (i = 0; i < 4; i++, input += 8) {
- load_s32x4q_dual(input, &s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7);
- transpose_and_store_s32_8x8(s0, s1, s2, s3, s4, s5, s6, s7, &t_buf);
+ load_s32x4q_dual(input, s);
+ transpose_and_store_s32_8x8(s, &t_buf);
}
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_idct4x4_add_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_idct4x4_add_neon.c
index 1418a75a15b..7be1dad1d31 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_idct4x4_add_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_idct4x4_add_neon.c
@@ -11,27 +11,10 @@
#include <arm_neon.h>
#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/arm/highbd_idct_neon.h"
#include "vpx_dsp/arm/idct_neon.h"
#include "vpx_dsp/inv_txfm.h"
-static INLINE void highbd_idct4x4_1_add_kernel1(uint16_t **dest,
- const int stride,
- const int16x8_t res,
- const int16x8_t max) {
- const uint16x4_t a0 = vld1_u16(*dest);
- const uint16x4_t a1 = vld1_u16(*dest + stride);
- const int16x8_t a = vreinterpretq_s16_u16(vcombine_u16(a0, a1));
- // Note: In some profile tests, res is quite close to +/-32767.
- // We use saturating addition.
- const int16x8_t b = vqaddq_s16(res, a);
- const int16x8_t c = vminq_s16(b, max);
- const uint16x8_t d = vqshluq_n_s16(c, 0);
- vst1_u16(*dest, vget_low_u16(d));
- *dest += stride;
- vst1_u16(*dest, vget_high_u16(d));
- *dest += stride;
-}
-
// res is in reverse row order
static INLINE void highbd_idct4x4_1_add_kernel2(uint16_t **dest,
const int stride,
@@ -65,109 +48,42 @@ void vpx_highbd_idct4x4_1_add_neon(const tran_low_t *input, uint16_t *dest,
highbd_idct4x4_1_add_kernel1(&dest, stride, dc, max);
}
-static INLINE void idct4x4_16_kernel_bd10(const int32x4_t cospis,
- int32x4_t *const a0,
- int32x4_t *const a1,
- int32x4_t *const a2,
- int32x4_t *const a3) {
- int32x4_t b0, b1, b2, b3;
-
- transpose_s32_4x4(a0, a1, a2, a3);
- b0 = vaddq_s32(*a0, *a2);
- b1 = vsubq_s32(*a0, *a2);
- b0 = vmulq_lane_s32(b0, vget_high_s32(cospis), 0);
- b1 = vmulq_lane_s32(b1, vget_high_s32(cospis), 0);
- b2 = vmulq_lane_s32(*a1, vget_high_s32(cospis), 1);
- b3 = vmulq_lane_s32(*a1, vget_low_s32(cospis), 1);
- b2 = vmlsq_lane_s32(b2, *a3, vget_low_s32(cospis), 1);
- b3 = vmlaq_lane_s32(b3, *a3, vget_high_s32(cospis), 1);
- b0 = vrshrq_n_s32(b0, DCT_CONST_BITS);
- b1 = vrshrq_n_s32(b1, DCT_CONST_BITS);
- b2 = vrshrq_n_s32(b2, DCT_CONST_BITS);
- b3 = vrshrq_n_s32(b3, DCT_CONST_BITS);
- *a0 = vaddq_s32(b0, b3);
- *a1 = vaddq_s32(b1, b2);
- *a2 = vsubq_s32(b1, b2);
- *a3 = vsubq_s32(b0, b3);
-}
-
-static INLINE void idct4x4_16_kernel_bd12(const int32x4_t cospis,
- int32x4_t *const a0,
- int32x4_t *const a1,
- int32x4_t *const a2,
- int32x4_t *const a3) {
- int32x4_t b0, b1, b2, b3;
- int64x2_t c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11;
-
- transpose_s32_4x4(a0, a1, a2, a3);
- b0 = vaddq_s32(*a0, *a2);
- b1 = vsubq_s32(*a0, *a2);
- c0 = vmull_lane_s32(vget_low_s32(b0), vget_high_s32(cospis), 0);
- c1 = vmull_lane_s32(vget_high_s32(b0), vget_high_s32(cospis), 0);
- c2 = vmull_lane_s32(vget_low_s32(b1), vget_high_s32(cospis), 0);
- c3 = vmull_lane_s32(vget_high_s32(b1), vget_high_s32(cospis), 0);
- c4 = vmull_lane_s32(vget_low_s32(*a1), vget_high_s32(cospis), 1);
- c5 = vmull_lane_s32(vget_high_s32(*a1), vget_high_s32(cospis), 1);
- c6 = vmull_lane_s32(vget_low_s32(*a1), vget_low_s32(cospis), 1);
- c7 = vmull_lane_s32(vget_high_s32(*a1), vget_low_s32(cospis), 1);
- c8 = vmull_lane_s32(vget_low_s32(*a3), vget_low_s32(cospis), 1);
- c9 = vmull_lane_s32(vget_high_s32(*a3), vget_low_s32(cospis), 1);
- c10 = vmull_lane_s32(vget_low_s32(*a3), vget_high_s32(cospis), 1);
- c11 = vmull_lane_s32(vget_high_s32(*a3), vget_high_s32(cospis), 1);
- c4 = vsubq_s64(c4, c8);
- c5 = vsubq_s64(c5, c9);
- c6 = vaddq_s64(c6, c10);
- c7 = vaddq_s64(c7, c11);
- b0 = vcombine_s32(vrshrn_n_s64(c0, DCT_CONST_BITS),
- vrshrn_n_s64(c1, DCT_CONST_BITS));
- b1 = vcombine_s32(vrshrn_n_s64(c2, DCT_CONST_BITS),
- vrshrn_n_s64(c3, DCT_CONST_BITS));
- b2 = vcombine_s32(vrshrn_n_s64(c4, DCT_CONST_BITS),
- vrshrn_n_s64(c5, DCT_CONST_BITS));
- b3 = vcombine_s32(vrshrn_n_s64(c6, DCT_CONST_BITS),
- vrshrn_n_s64(c7, DCT_CONST_BITS));
- *a0 = vaddq_s32(b0, b3);
- *a1 = vaddq_s32(b1, b2);
- *a2 = vsubq_s32(b1, b2);
- *a3 = vsubq_s32(b0, b3);
-}
-
void vpx_highbd_idct4x4_16_add_neon(const tran_low_t *input, uint16_t *dest,
int stride, int bd) {
const int16x8_t max = vdupq_n_s16((1 << bd) - 1);
- int32x4_t c0 = vld1q_s32(input);
- int32x4_t c1 = vld1q_s32(input + 4);
- int32x4_t c2 = vld1q_s32(input + 8);
- int32x4_t c3 = vld1q_s32(input + 12);
- int16x8_t a0, a1;
+ int16x8_t a[2];
+ int32x4_t c[4];
- if (bd == 8) {
- const int16x4_t cospis = vld1_s16(kCospi);
+ c[0] = vld1q_s32(input);
+ c[1] = vld1q_s32(input + 4);
+ c[2] = vld1q_s32(input + 8);
+ c[3] = vld1q_s32(input + 12);
+ if (bd == 8) {
// Rows
- a0 = vcombine_s16(vmovn_s32(c0), vmovn_s32(c1));
- a1 = vcombine_s16(vmovn_s32(c2), vmovn_s32(c3));
- idct4x4_16_kernel_bd8(cospis, &a0, &a1);
+ a[0] = vcombine_s16(vmovn_s32(c[0]), vmovn_s32(c[1]));
+ a[1] = vcombine_s16(vmovn_s32(c[2]), vmovn_s32(c[3]));
+ transpose_idct4x4_16_bd8(a);
// Columns
- a1 = vcombine_s16(vget_high_s16(a1), vget_low_s16(a1));
- idct4x4_16_kernel_bd8(cospis, &a0, &a1);
- a0 = vrshrq_n_s16(a0, 4);
- a1 = vrshrq_n_s16(a1, 4);
+ a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
+ transpose_idct4x4_16_bd8(a);
+ a[0] = vrshrq_n_s16(a[0], 4);
+ a[1] = vrshrq_n_s16(a[1], 4);
} else {
const int32x4_t cospis = vld1q_s32(kCospi32);
if (bd == 10) {
- idct4x4_16_kernel_bd10(cospis, &c0, &c1, &c2, &c3);
- idct4x4_16_kernel_bd10(cospis, &c0, &c1, &c2, &c3);
+ idct4x4_16_kernel_bd10(cospis, c);
+ idct4x4_16_kernel_bd10(cospis, c);
} else {
- idct4x4_16_kernel_bd12(cospis, &c0, &c1, &c2, &c3);
- idct4x4_16_kernel_bd12(cospis, &c0, &c1, &c2, &c3);
+ idct4x4_16_kernel_bd12(cospis, c);
+ idct4x4_16_kernel_bd12(cospis, c);
}
- a0 = vcombine_s16(vqrshrn_n_s32(c0, 4), vqrshrn_n_s32(c1, 4));
- a1 = vcombine_s16(vqrshrn_n_s32(c3, 4), vqrshrn_n_s32(c2, 4));
+ a[0] = vcombine_s16(vqrshrn_n_s32(c[0], 4), vqrshrn_n_s32(c[1], 4));
+ a[1] = vcombine_s16(vqrshrn_n_s32(c[3], 4), vqrshrn_n_s32(c[2], 4));
}
- highbd_idct4x4_1_add_kernel1(&dest, stride, a0, max);
- highbd_idct4x4_1_add_kernel2(&dest, stride, a1, max);
+ highbd_idct4x4_1_add_kernel1(&dest, stride, a[0], max);
+ highbd_idct4x4_1_add_kernel2(&dest, stride, a[1], max);
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_idct8x8_add_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_idct8x8_add_neon.c
index dd90134a6e0..e51e574cc9b 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_idct8x8_add_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_idct8x8_add_neon.c
@@ -127,7 +127,7 @@ static INLINE void idct8x8_12_half1d_bd12(
int32x4_t *const io1, int32x4_t *const io2, int32x4_t *const io3,
int32x4_t *const io4, int32x4_t *const io5, int32x4_t *const io6,
int32x4_t *const io7) {
- int32x2_t input_1l, input_1h, input_3l, input_3h;
+ int32x2_t input1l, input1h, input3l, input3h;
int32x2_t step1l[2], step1h[2];
int32x4_t step1[8], step2[8];
int64x2_t t64[8];
@@ -136,23 +136,23 @@ static INLINE void idct8x8_12_half1d_bd12(
transpose_s32_4x4(io0, io1, io2, io3);
// stage 1
- input_1l = vget_low_s32(*io1);
- input_1h = vget_high_s32(*io1);
- input_3l = vget_low_s32(*io3);
- input_3h = vget_high_s32(*io3);
+ input1l = vget_low_s32(*io1);
+ input1h = vget_high_s32(*io1);
+ input3l = vget_low_s32(*io3);
+ input3h = vget_high_s32(*io3);
step1l[0] = vget_low_s32(*io0);
step1h[0] = vget_high_s32(*io0);
step1l[1] = vget_low_s32(*io2);
step1h[1] = vget_high_s32(*io2);
- t64[0] = vmull_lane_s32(input_1l, vget_high_s32(cospis1), 1);
- t64[1] = vmull_lane_s32(input_1h, vget_high_s32(cospis1), 1);
- t64[2] = vmull_lane_s32(input_3l, vget_high_s32(cospis1), 0);
- t64[3] = vmull_lane_s32(input_3h, vget_high_s32(cospis1), 0);
- t64[4] = vmull_lane_s32(input_3l, vget_low_s32(cospis1), 1);
- t64[5] = vmull_lane_s32(input_3h, vget_low_s32(cospis1), 1);
- t64[6] = vmull_lane_s32(input_1l, vget_low_s32(cospis1), 0);
- t64[7] = vmull_lane_s32(input_1h, vget_low_s32(cospis1), 0);
+ t64[0] = vmull_lane_s32(input1l, vget_high_s32(cospis1), 1);
+ t64[1] = vmull_lane_s32(input1h, vget_high_s32(cospis1), 1);
+ t64[2] = vmull_lane_s32(input3l, vget_high_s32(cospis1), 0);
+ t64[3] = vmull_lane_s32(input3h, vget_high_s32(cospis1), 0);
+ t64[4] = vmull_lane_s32(input3l, vget_low_s32(cospis1), 1);
+ t64[5] = vmull_lane_s32(input3h, vget_low_s32(cospis1), 1);
+ t64[6] = vmull_lane_s32(input1l, vget_low_s32(cospis1), 0);
+ t64[7] = vmull_lane_s32(input1h, vget_low_s32(cospis1), 0);
t32[0] = vrshrn_n_s64(t64[0], DCT_CONST_BITS);
t32[1] = vrshrn_n_s64(t64[1], DCT_CONST_BITS);
t32[2] = vrshrn_n_s64(t64[2], DCT_CONST_BITS);
@@ -222,9 +222,7 @@ static INLINE void idct8x8_12_half1d_bd12(
*io7 = vsubq_s32(step1[0], step2[7]);
}
-static INLINE void highbd_add8x8(int16x8_t a0, int16x8_t a1, int16x8_t a2,
- int16x8_t a3, int16x8_t a4, int16x8_t a5,
- int16x8_t a6, int16x8_t a7, uint16_t *dest,
+static INLINE void highbd_add8x8(int16x8_t *const a, uint16_t *dest,
const int stride, const int bd) {
const int16x8_t max = vdupq_n_s16((1 << bd) - 1);
const uint16_t *dst = dest;
@@ -248,14 +246,14 @@ static INLINE void highbd_add8x8(int16x8_t a0, int16x8_t a1, int16x8_t a2,
dst += stride;
d7 = vld1q_u16(dst);
- d0_s16 = vqaddq_s16(a0, vreinterpretq_s16_u16(d0));
- d1_s16 = vqaddq_s16(a1, vreinterpretq_s16_u16(d1));
- d2_s16 = vqaddq_s16(a2, vreinterpretq_s16_u16(d2));
- d3_s16 = vqaddq_s16(a3, vreinterpretq_s16_u16(d3));
- d4_s16 = vqaddq_s16(a4, vreinterpretq_s16_u16(d4));
- d5_s16 = vqaddq_s16(a5, vreinterpretq_s16_u16(d5));
- d6_s16 = vqaddq_s16(a6, vreinterpretq_s16_u16(d6));
- d7_s16 = vqaddq_s16(a7, vreinterpretq_s16_u16(d7));
+ d0_s16 = vqaddq_s16(a[0], vreinterpretq_s16_u16(d0));
+ d1_s16 = vqaddq_s16(a[1], vreinterpretq_s16_u16(d1));
+ d2_s16 = vqaddq_s16(a[2], vreinterpretq_s16_u16(d2));
+ d3_s16 = vqaddq_s16(a[3], vreinterpretq_s16_u16(d3));
+ d4_s16 = vqaddq_s16(a[4], vreinterpretq_s16_u16(d4));
+ d5_s16 = vqaddq_s16(a[5], vreinterpretq_s16_u16(d5));
+ d6_s16 = vqaddq_s16(a[6], vreinterpretq_s16_u16(d6));
+ d7_s16 = vqaddq_s16(a[7], vreinterpretq_s16_u16(d7));
d0_s16 = vminq_s16(d0_s16, max);
d1_s16 = vminq_s16(d1_s16, max);
@@ -293,11 +291,13 @@ static INLINE void highbd_add8x8(int16x8_t a0, int16x8_t a1, int16x8_t a2,
void vpx_highbd_idct8x8_12_add_neon(const tran_low_t *input, uint16_t *dest,
int stride, int bd) {
- int32x4_t a0 = vld1q_s32(input);
- int32x4_t a1 = vld1q_s32(input + 8);
- int32x4_t a2 = vld1q_s32(input + 16);
- int32x4_t a3 = vld1q_s32(input + 24);
- int16x8_t c0, c1, c2, c3, c4, c5, c6, c7;
+ int32x4_t a[16];
+ int16x8_t c[8];
+
+ a[0] = vld1q_s32(input);
+ a[1] = vld1q_s32(input + 8);
+ a[2] = vld1q_s32(input + 16);
+ a[3] = vld1q_s32(input + 24);
if (bd == 8) {
const int16x8_t cospis = vld1q_s16(kCospi);
@@ -305,54 +305,52 @@ void vpx_highbd_idct8x8_12_add_neon(const tran_low_t *input, uint16_t *dest,
const int16x4_t cospis0 = vget_low_s16(cospis); // cospi 0, 8, 16, 24
const int16x4_t cospisd0 = vget_low_s16(cospisd); // doubled 0, 8, 16, 24
const int16x4_t cospisd1 = vget_high_s16(cospisd); // doubled 4, 12, 20, 28
- int16x4_t b0 = vmovn_s32(a0);
- int16x4_t b1 = vmovn_s32(a1);
- int16x4_t b2 = vmovn_s32(a2);
- int16x4_t b3 = vmovn_s32(a3);
- int16x4_t b4, b5, b6, b7;
-
- idct8x8_12_pass1_bd8(cospis0, cospisd0, cospisd1, &b0, &b1, &b2, &b3, &b4,
- &b5, &b6, &b7);
- idct8x8_12_pass2_bd8(cospis0, cospisd0, cospisd1, b0, b1, b2, b3, b4, b5,
- b6, b7, &c0, &c1, &c2, &c3, &c4, &c5, &c6, &c7);
- c0 = vrshrq_n_s16(c0, 5);
- c1 = vrshrq_n_s16(c1, 5);
- c2 = vrshrq_n_s16(c2, 5);
- c3 = vrshrq_n_s16(c3, 5);
- c4 = vrshrq_n_s16(c4, 5);
- c5 = vrshrq_n_s16(c5, 5);
- c6 = vrshrq_n_s16(c6, 5);
- c7 = vrshrq_n_s16(c7, 5);
+ int16x4_t b[8];
+
+ b[0] = vmovn_s32(a[0]);
+ b[1] = vmovn_s32(a[1]);
+ b[2] = vmovn_s32(a[2]);
+ b[3] = vmovn_s32(a[3]);
+
+ idct8x8_12_pass1_bd8(cospis0, cospisd0, cospisd1, b);
+ idct8x8_12_pass2_bd8(cospis0, cospisd0, cospisd1, b, c);
+ c[0] = vrshrq_n_s16(c[0], 5);
+ c[1] = vrshrq_n_s16(c[1], 5);
+ c[2] = vrshrq_n_s16(c[2], 5);
+ c[3] = vrshrq_n_s16(c[3], 5);
+ c[4] = vrshrq_n_s16(c[4], 5);
+ c[5] = vrshrq_n_s16(c[5], 5);
+ c[6] = vrshrq_n_s16(c[6], 5);
+ c[7] = vrshrq_n_s16(c[7], 5);
} else {
const int32x4_t cospis0 = vld1q_s32(kCospi32); // cospi 0, 8, 16, 24
const int32x4_t cospis1 = vld1q_s32(kCospi32 + 4); // cospi 4, 12, 20, 28
- int32x4_t a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15;
if (bd == 10) {
- idct8x8_12_half1d_bd10(cospis0, cospis1, &a0, &a1, &a2, &a3, &a4, &a5,
- &a6, &a7);
- idct8x8_12_half1d_bd10(cospis0, cospis1, &a0, &a1, &a2, &a3, &a8, &a9,
- &a10, &a11);
- idct8x8_12_half1d_bd10(cospis0, cospis1, &a4, &a5, &a6, &a7, &a12, &a13,
- &a14, &a15);
+ idct8x8_12_half1d_bd10(cospis0, cospis1, &a[0], &a[1], &a[2], &a[3],
+ &a[4], &a[5], &a[6], &a[7]);
+ idct8x8_12_half1d_bd10(cospis0, cospis1, &a[0], &a[1], &a[2], &a[3],
+ &a[8], &a[9], &a[10], &a[11]);
+ idct8x8_12_half1d_bd10(cospis0, cospis1, &a[4], &a[5], &a[6], &a[7],
+ &a[12], &a[13], &a[14], &a[15]);
} else {
- idct8x8_12_half1d_bd12(cospis0, cospis1, &a0, &a1, &a2, &a3, &a4, &a5,
- &a6, &a7);
- idct8x8_12_half1d_bd12(cospis0, cospis1, &a0, &a1, &a2, &a3, &a8, &a9,
- &a10, &a11);
- idct8x8_12_half1d_bd12(cospis0, cospis1, &a4, &a5, &a6, &a7, &a12, &a13,
- &a14, &a15);
+ idct8x8_12_half1d_bd12(cospis0, cospis1, &a[0], &a[1], &a[2], &a[3],
+ &a[4], &a[5], &a[6], &a[7]);
+ idct8x8_12_half1d_bd12(cospis0, cospis1, &a[0], &a[1], &a[2], &a[3],
+ &a[8], &a[9], &a[10], &a[11]);
+ idct8x8_12_half1d_bd12(cospis0, cospis1, &a[4], &a[5], &a[6], &a[7],
+ &a[12], &a[13], &a[14], &a[15]);
}
- c0 = vcombine_s16(vrshrn_n_s32(a0, 5), vrshrn_n_s32(a4, 5));
- c1 = vcombine_s16(vrshrn_n_s32(a1, 5), vrshrn_n_s32(a5, 5));
- c2 = vcombine_s16(vrshrn_n_s32(a2, 5), vrshrn_n_s32(a6, 5));
- c3 = vcombine_s16(vrshrn_n_s32(a3, 5), vrshrn_n_s32(a7, 5));
- c4 = vcombine_s16(vrshrn_n_s32(a8, 5), vrshrn_n_s32(a12, 5));
- c5 = vcombine_s16(vrshrn_n_s32(a9, 5), vrshrn_n_s32(a13, 5));
- c6 = vcombine_s16(vrshrn_n_s32(a10, 5), vrshrn_n_s32(a14, 5));
- c7 = vcombine_s16(vrshrn_n_s32(a11, 5), vrshrn_n_s32(a15, 5));
+ c[0] = vcombine_s16(vrshrn_n_s32(a[0], 5), vrshrn_n_s32(a[4], 5));
+ c[1] = vcombine_s16(vrshrn_n_s32(a[1], 5), vrshrn_n_s32(a[5], 5));
+ c[2] = vcombine_s16(vrshrn_n_s32(a[2], 5), vrshrn_n_s32(a[6], 5));
+ c[3] = vcombine_s16(vrshrn_n_s32(a[3], 5), vrshrn_n_s32(a[7], 5));
+ c[4] = vcombine_s16(vrshrn_n_s32(a[8], 5), vrshrn_n_s32(a[12], 5));
+ c[5] = vcombine_s16(vrshrn_n_s32(a[9], 5), vrshrn_n_s32(a[13], 5));
+ c[6] = vcombine_s16(vrshrn_n_s32(a[10], 5), vrshrn_n_s32(a[14], 5));
+ c[7] = vcombine_s16(vrshrn_n_s32(a[11], 5), vrshrn_n_s32(a[15], 5));
}
- highbd_add8x8(c0, c1, c2, c3, c4, c5, c6, c7, dest, stride, bd);
+ highbd_add8x8(c, dest, stride, bd);
}
static INLINE void idct8x8_64_half1d_bd10(
@@ -428,8 +426,8 @@ static INLINE void idct8x8_64_half1d_bd12(
int32x4_t *const io1, int32x4_t *const io2, int32x4_t *const io3,
int32x4_t *const io4, int32x4_t *const io5, int32x4_t *const io6,
int32x4_t *const io7) {
- int32x2_t input_1l, input_1h, input_3l, input_3h, input_5l, input_5h,
- input_7l, input_7h;
+ int32x2_t input1l, input1h, input3l, input3h, input5l, input5h, input7l,
+ input7h;
int32x2_t step1l[4], step1h[4];
int32x4_t step1[8], step2[8];
int64x2_t t64[8];
@@ -438,14 +436,14 @@ static INLINE void idct8x8_64_half1d_bd12(
transpose_s32_8x4(io0, io1, io2, io3, io4, io5, io6, io7);
// stage 1
- input_1l = vget_low_s32(*io1);
- input_1h = vget_high_s32(*io1);
- input_3l = vget_low_s32(*io3);
- input_3h = vget_high_s32(*io3);
- input_5l = vget_low_s32(*io5);
- input_5h = vget_high_s32(*io5);
- input_7l = vget_low_s32(*io7);
- input_7h = vget_high_s32(*io7);
+ input1l = vget_low_s32(*io1);
+ input1h = vget_high_s32(*io1);
+ input3l = vget_low_s32(*io3);
+ input3h = vget_high_s32(*io3);
+ input5l = vget_low_s32(*io5);
+ input5h = vget_high_s32(*io5);
+ input7l = vget_low_s32(*io7);
+ input7h = vget_high_s32(*io7);
step1l[0] = vget_low_s32(*io0);
step1h[0] = vget_high_s32(*io0);
step1l[1] = vget_low_s32(*io2);
@@ -455,22 +453,22 @@ static INLINE void idct8x8_64_half1d_bd12(
step1l[3] = vget_low_s32(*io6);
step1h[3] = vget_high_s32(*io6);
- t64[0] = vmull_lane_s32(input_1l, vget_high_s32(cospis1), 1);
- t64[1] = vmull_lane_s32(input_1h, vget_high_s32(cospis1), 1);
- t64[2] = vmull_lane_s32(input_3l, vget_high_s32(cospis1), 0);
- t64[3] = vmull_lane_s32(input_3h, vget_high_s32(cospis1), 0);
- t64[4] = vmull_lane_s32(input_3l, vget_low_s32(cospis1), 1);
- t64[5] = vmull_lane_s32(input_3h, vget_low_s32(cospis1), 1);
- t64[6] = vmull_lane_s32(input_1l, vget_low_s32(cospis1), 0);
- t64[7] = vmull_lane_s32(input_1h, vget_low_s32(cospis1), 0);
- t64[0] = vmlsl_lane_s32(t64[0], input_7l, vget_low_s32(cospis1), 0);
- t64[1] = vmlsl_lane_s32(t64[1], input_7h, vget_low_s32(cospis1), 0);
- t64[2] = vmlal_lane_s32(t64[2], input_5l, vget_low_s32(cospis1), 1);
- t64[3] = vmlal_lane_s32(t64[3], input_5h, vget_low_s32(cospis1), 1);
- t64[4] = vmlsl_lane_s32(t64[4], input_5l, vget_high_s32(cospis1), 0);
- t64[5] = vmlsl_lane_s32(t64[5], input_5h, vget_high_s32(cospis1), 0);
- t64[6] = vmlal_lane_s32(t64[6], input_7l, vget_high_s32(cospis1), 1);
- t64[7] = vmlal_lane_s32(t64[7], input_7h, vget_high_s32(cospis1), 1);
+ t64[0] = vmull_lane_s32(input1l, vget_high_s32(cospis1), 1);
+ t64[1] = vmull_lane_s32(input1h, vget_high_s32(cospis1), 1);
+ t64[2] = vmull_lane_s32(input3l, vget_high_s32(cospis1), 0);
+ t64[3] = vmull_lane_s32(input3h, vget_high_s32(cospis1), 0);
+ t64[4] = vmull_lane_s32(input3l, vget_low_s32(cospis1), 1);
+ t64[5] = vmull_lane_s32(input3h, vget_low_s32(cospis1), 1);
+ t64[6] = vmull_lane_s32(input1l, vget_low_s32(cospis1), 0);
+ t64[7] = vmull_lane_s32(input1h, vget_low_s32(cospis1), 0);
+ t64[0] = vmlsl_lane_s32(t64[0], input7l, vget_low_s32(cospis1), 0);
+ t64[1] = vmlsl_lane_s32(t64[1], input7h, vget_low_s32(cospis1), 0);
+ t64[2] = vmlal_lane_s32(t64[2], input5l, vget_low_s32(cospis1), 1);
+ t64[3] = vmlal_lane_s32(t64[3], input5h, vget_low_s32(cospis1), 1);
+ t64[4] = vmlsl_lane_s32(t64[4], input5l, vget_high_s32(cospis1), 0);
+ t64[5] = vmlsl_lane_s32(t64[5], input5h, vget_high_s32(cospis1), 0);
+ t64[6] = vmlal_lane_s32(t64[6], input7l, vget_high_s32(cospis1), 1);
+ t64[7] = vmlal_lane_s32(t64[7], input7h, vget_high_s32(cospis1), 1);
t32[0] = vrshrn_n_s64(t64[0], DCT_CONST_BITS);
t32[1] = vrshrn_n_s64(t64[1], DCT_CONST_BITS);
t32[2] = vrshrn_n_s64(t64[2], DCT_CONST_BITS);
@@ -553,79 +551,83 @@ static INLINE void idct8x8_64_half1d_bd12(
void vpx_highbd_idct8x8_64_add_neon(const tran_low_t *input, uint16_t *dest,
int stride, int bd) {
- int32x4_t a0 = vld1q_s32(input);
- int32x4_t a1 = vld1q_s32(input + 4);
- int32x4_t a2 = vld1q_s32(input + 8);
- int32x4_t a3 = vld1q_s32(input + 12);
- int32x4_t a4 = vld1q_s32(input + 16);
- int32x4_t a5 = vld1q_s32(input + 20);
- int32x4_t a6 = vld1q_s32(input + 24);
- int32x4_t a7 = vld1q_s32(input + 28);
- int32x4_t a8 = vld1q_s32(input + 32);
- int32x4_t a9 = vld1q_s32(input + 36);
- int32x4_t a10 = vld1q_s32(input + 40);
- int32x4_t a11 = vld1q_s32(input + 44);
- int32x4_t a12 = vld1q_s32(input + 48);
- int32x4_t a13 = vld1q_s32(input + 52);
- int32x4_t a14 = vld1q_s32(input + 56);
- int32x4_t a15 = vld1q_s32(input + 60);
- int16x8_t c0, c1, c2, c3, c4, c5, c6, c7;
+ int32x4_t a[16];
+ int16x8_t c[8];
+
+ a[0] = vld1q_s32(input);
+ a[1] = vld1q_s32(input + 4);
+ a[2] = vld1q_s32(input + 8);
+ a[3] = vld1q_s32(input + 12);
+ a[4] = vld1q_s32(input + 16);
+ a[5] = vld1q_s32(input + 20);
+ a[6] = vld1q_s32(input + 24);
+ a[7] = vld1q_s32(input + 28);
+ a[8] = vld1q_s32(input + 32);
+ a[9] = vld1q_s32(input + 36);
+ a[10] = vld1q_s32(input + 40);
+ a[11] = vld1q_s32(input + 44);
+ a[12] = vld1q_s32(input + 48);
+ a[13] = vld1q_s32(input + 52);
+ a[14] = vld1q_s32(input + 56);
+ a[15] = vld1q_s32(input + 60);
if (bd == 8) {
const int16x8_t cospis = vld1q_s16(kCospi);
const int16x4_t cospis0 = vget_low_s16(cospis); // cospi 0, 8, 16, 24
const int16x4_t cospis1 = vget_high_s16(cospis); // cospi 4, 12, 20, 28
- int16x8_t b0 = vcombine_s16(vmovn_s32(a0), vmovn_s32(a1));
- int16x8_t b1 = vcombine_s16(vmovn_s32(a2), vmovn_s32(a3));
- int16x8_t b2 = vcombine_s16(vmovn_s32(a4), vmovn_s32(a5));
- int16x8_t b3 = vcombine_s16(vmovn_s32(a6), vmovn_s32(a7));
- int16x8_t b4 = vcombine_s16(vmovn_s32(a8), vmovn_s32(a9));
- int16x8_t b5 = vcombine_s16(vmovn_s32(a10), vmovn_s32(a11));
- int16x8_t b6 = vcombine_s16(vmovn_s32(a12), vmovn_s32(a13));
- int16x8_t b7 = vcombine_s16(vmovn_s32(a14), vmovn_s32(a15));
-
- idct8x8_64_1d_bd8(cospis0, cospis1, &b0, &b1, &b2, &b3, &b4, &b5, &b6, &b7);
- idct8x8_64_1d_bd8(cospis0, cospis1, &b0, &b1, &b2, &b3, &b4, &b5, &b6, &b7);
-
- c0 = vrshrq_n_s16(b0, 5);
- c1 = vrshrq_n_s16(b1, 5);
- c2 = vrshrq_n_s16(b2, 5);
- c3 = vrshrq_n_s16(b3, 5);
- c4 = vrshrq_n_s16(b4, 5);
- c5 = vrshrq_n_s16(b5, 5);
- c6 = vrshrq_n_s16(b6, 5);
- c7 = vrshrq_n_s16(b7, 5);
+ int16x8_t b[8];
+
+ b[0] = vcombine_s16(vmovn_s32(a[0]), vmovn_s32(a[1]));
+ b[1] = vcombine_s16(vmovn_s32(a[2]), vmovn_s32(a[3]));
+ b[2] = vcombine_s16(vmovn_s32(a[4]), vmovn_s32(a[5]));
+ b[3] = vcombine_s16(vmovn_s32(a[6]), vmovn_s32(a[7]));
+ b[4] = vcombine_s16(vmovn_s32(a[8]), vmovn_s32(a[9]));
+ b[5] = vcombine_s16(vmovn_s32(a[10]), vmovn_s32(a[11]));
+ b[6] = vcombine_s16(vmovn_s32(a[12]), vmovn_s32(a[13]));
+ b[7] = vcombine_s16(vmovn_s32(a[14]), vmovn_s32(a[15]));
+
+ idct8x8_64_1d_bd8(cospis0, cospis1, b);
+ idct8x8_64_1d_bd8(cospis0, cospis1, b);
+
+ c[0] = vrshrq_n_s16(b[0], 5);
+ c[1] = vrshrq_n_s16(b[1], 5);
+ c[2] = vrshrq_n_s16(b[2], 5);
+ c[3] = vrshrq_n_s16(b[3], 5);
+ c[4] = vrshrq_n_s16(b[4], 5);
+ c[5] = vrshrq_n_s16(b[5], 5);
+ c[6] = vrshrq_n_s16(b[6], 5);
+ c[7] = vrshrq_n_s16(b[7], 5);
} else {
const int32x4_t cospis0 = vld1q_s32(kCospi32); // cospi 0, 8, 16, 24
const int32x4_t cospis1 = vld1q_s32(kCospi32 + 4); // cospi 4, 12, 20, 28
if (bd == 10) {
- idct8x8_64_half1d_bd10(cospis0, cospis1, &a0, &a1, &a2, &a3, &a4, &a5,
- &a6, &a7);
- idct8x8_64_half1d_bd10(cospis0, cospis1, &a8, &a9, &a10, &a11, &a12, &a13,
- &a14, &a15);
- idct8x8_64_half1d_bd10(cospis0, cospis1, &a0, &a8, &a1, &a9, &a2, &a10,
- &a3, &a11);
- idct8x8_64_half1d_bd10(cospis0, cospis1, &a4, &a12, &a5, &a13, &a6, &a14,
- &a7, &a15);
+ idct8x8_64_half1d_bd10(cospis0, cospis1, &a[0], &a[1], &a[2], &a[3],
+ &a[4], &a[5], &a[6], &a[7]);
+ idct8x8_64_half1d_bd10(cospis0, cospis1, &a[8], &a[9], &a[10], &a[11],
+ &a[12], &a[13], &a[14], &a[15]);
+ idct8x8_64_half1d_bd10(cospis0, cospis1, &a[0], &a[8], &a[1], &a[9],
+ &a[2], &a[10], &a[3], &a[11]);
+ idct8x8_64_half1d_bd10(cospis0, cospis1, &a[4], &a[12], &a[5], &a[13],
+ &a[6], &a[14], &a[7], &a[15]);
} else {
- idct8x8_64_half1d_bd12(cospis0, cospis1, &a0, &a1, &a2, &a3, &a4, &a5,
- &a6, &a7);
- idct8x8_64_half1d_bd12(cospis0, cospis1, &a8, &a9, &a10, &a11, &a12, &a13,
- &a14, &a15);
- idct8x8_64_half1d_bd12(cospis0, cospis1, &a0, &a8, &a1, &a9, &a2, &a10,
- &a3, &a11);
- idct8x8_64_half1d_bd12(cospis0, cospis1, &a4, &a12, &a5, &a13, &a6, &a14,
- &a7, &a15);
+ idct8x8_64_half1d_bd12(cospis0, cospis1, &a[0], &a[1], &a[2], &a[3],
+ &a[4], &a[5], &a[6], &a[7]);
+ idct8x8_64_half1d_bd12(cospis0, cospis1, &a[8], &a[9], &a[10], &a[11],
+ &a[12], &a[13], &a[14], &a[15]);
+ idct8x8_64_half1d_bd12(cospis0, cospis1, &a[0], &a[8], &a[1], &a[9],
+ &a[2], &a[10], &a[3], &a[11]);
+ idct8x8_64_half1d_bd12(cospis0, cospis1, &a[4], &a[12], &a[5], &a[13],
+ &a[6], &a[14], &a[7], &a[15]);
}
- c0 = vcombine_s16(vrshrn_n_s32(a0, 5), vrshrn_n_s32(a4, 5));
- c1 = vcombine_s16(vrshrn_n_s32(a8, 5), vrshrn_n_s32(a12, 5));
- c2 = vcombine_s16(vrshrn_n_s32(a1, 5), vrshrn_n_s32(a5, 5));
- c3 = vcombine_s16(vrshrn_n_s32(a9, 5), vrshrn_n_s32(a13, 5));
- c4 = vcombine_s16(vrshrn_n_s32(a2, 5), vrshrn_n_s32(a6, 5));
- c5 = vcombine_s16(vrshrn_n_s32(a10, 5), vrshrn_n_s32(a14, 5));
- c6 = vcombine_s16(vrshrn_n_s32(a3, 5), vrshrn_n_s32(a7, 5));
- c7 = vcombine_s16(vrshrn_n_s32(a11, 5), vrshrn_n_s32(a15, 5));
+ c[0] = vcombine_s16(vrshrn_n_s32(a[0], 5), vrshrn_n_s32(a[4], 5));
+ c[1] = vcombine_s16(vrshrn_n_s32(a[8], 5), vrshrn_n_s32(a[12], 5));
+ c[2] = vcombine_s16(vrshrn_n_s32(a[1], 5), vrshrn_n_s32(a[5], 5));
+ c[3] = vcombine_s16(vrshrn_n_s32(a[9], 5), vrshrn_n_s32(a[13], 5));
+ c[4] = vcombine_s16(vrshrn_n_s32(a[2], 5), vrshrn_n_s32(a[6], 5));
+ c[5] = vcombine_s16(vrshrn_n_s32(a[10], 5), vrshrn_n_s32(a[14], 5));
+ c[6] = vcombine_s16(vrshrn_n_s32(a[3], 5), vrshrn_n_s32(a[7], 5));
+ c[7] = vcombine_s16(vrshrn_n_s32(a[11], 5), vrshrn_n_s32(a[15], 5));
}
- highbd_add8x8(c0, c1, c2, c3, c4, c5, c6, c7, dest, stride, bd);
+ highbd_add8x8(c, dest, stride, bd);
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_idct_neon.h b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_idct_neon.h
new file mode 100644
index 00000000000..92fcb7f3adc
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_idct_neon.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_DSP_ARM_HIGHBD_IDCT_NEON_H_
+#define VPX_DSP_ARM_HIGHBD_IDCT_NEON_H_
+
+#include <arm_neon.h>
+
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/arm/idct_neon.h"
+#include "vpx_dsp/inv_txfm.h"
+
+static INLINE void highbd_idct4x4_1_add_kernel1(uint16_t **dest,
+ const int stride,
+ const int16x8_t res,
+ const int16x8_t max) {
+ const uint16x4_t a0 = vld1_u16(*dest);
+ const uint16x4_t a1 = vld1_u16(*dest + stride);
+ const int16x8_t a = vreinterpretq_s16_u16(vcombine_u16(a0, a1));
+ // Note: In some profile tests, res is quite close to +/-32767.
+ // We use saturating addition.
+ const int16x8_t b = vqaddq_s16(res, a);
+ const int16x8_t c = vminq_s16(b, max);
+ const uint16x8_t d = vqshluq_n_s16(c, 0);
+ vst1_u16(*dest, vget_low_u16(d));
+ *dest += stride;
+ vst1_u16(*dest, vget_high_u16(d));
+ *dest += stride;
+}
+
+static INLINE void idct4x4_16_kernel_bd10(const int32x4_t cospis,
+ int32x4_t *const a) {
+ int32x4_t b0, b1, b2, b3;
+
+ transpose_s32_4x4(&a[0], &a[1], &a[2], &a[3]);
+ b0 = vaddq_s32(a[0], a[2]);
+ b1 = vsubq_s32(a[0], a[2]);
+ b0 = vmulq_lane_s32(b0, vget_high_s32(cospis), 0);
+ b1 = vmulq_lane_s32(b1, vget_high_s32(cospis), 0);
+ b2 = vmulq_lane_s32(a[1], vget_high_s32(cospis), 1);
+ b3 = vmulq_lane_s32(a[1], vget_low_s32(cospis), 1);
+ b2 = vmlsq_lane_s32(b2, a[3], vget_low_s32(cospis), 1);
+ b3 = vmlaq_lane_s32(b3, a[3], vget_high_s32(cospis), 1);
+ b0 = vrshrq_n_s32(b0, DCT_CONST_BITS);
+ b1 = vrshrq_n_s32(b1, DCT_CONST_BITS);
+ b2 = vrshrq_n_s32(b2, DCT_CONST_BITS);
+ b3 = vrshrq_n_s32(b3, DCT_CONST_BITS);
+ a[0] = vaddq_s32(b0, b3);
+ a[1] = vaddq_s32(b1, b2);
+ a[2] = vsubq_s32(b1, b2);
+ a[3] = vsubq_s32(b0, b3);
+}
+
+static INLINE void idct4x4_16_kernel_bd12(const int32x4_t cospis,
+ int32x4_t *const a) {
+ int32x4_t b0, b1, b2, b3;
+ int64x2_t c[12];
+
+ transpose_s32_4x4(&a[0], &a[1], &a[2], &a[3]);
+ b0 = vaddq_s32(a[0], a[2]);
+ b1 = vsubq_s32(a[0], a[2]);
+ c[0] = vmull_lane_s32(vget_low_s32(b0), vget_high_s32(cospis), 0);
+ c[1] = vmull_lane_s32(vget_high_s32(b0), vget_high_s32(cospis), 0);
+ c[2] = vmull_lane_s32(vget_low_s32(b1), vget_high_s32(cospis), 0);
+ c[3] = vmull_lane_s32(vget_high_s32(b1), vget_high_s32(cospis), 0);
+ c[4] = vmull_lane_s32(vget_low_s32(a[1]), vget_high_s32(cospis), 1);
+ c[5] = vmull_lane_s32(vget_high_s32(a[1]), vget_high_s32(cospis), 1);
+ c[6] = vmull_lane_s32(vget_low_s32(a[1]), vget_low_s32(cospis), 1);
+ c[7] = vmull_lane_s32(vget_high_s32(a[1]), vget_low_s32(cospis), 1);
+ c[8] = vmull_lane_s32(vget_low_s32(a[3]), vget_low_s32(cospis), 1);
+ c[9] = vmull_lane_s32(vget_high_s32(a[3]), vget_low_s32(cospis), 1);
+ c[10] = vmull_lane_s32(vget_low_s32(a[3]), vget_high_s32(cospis), 1);
+ c[11] = vmull_lane_s32(vget_high_s32(a[3]), vget_high_s32(cospis), 1);
+ c[4] = vsubq_s64(c[4], c[8]);
+ c[5] = vsubq_s64(c[5], c[9]);
+ c[6] = vaddq_s64(c[6], c[10]);
+ c[7] = vaddq_s64(c[7], c[11]);
+ b0 = vcombine_s32(vrshrn_n_s64(c[0], DCT_CONST_BITS),
+ vrshrn_n_s64(c[1], DCT_CONST_BITS));
+ b1 = vcombine_s32(vrshrn_n_s64(c[2], DCT_CONST_BITS),
+ vrshrn_n_s64(c[3], DCT_CONST_BITS));
+ b2 = vcombine_s32(vrshrn_n_s64(c[4], DCT_CONST_BITS),
+ vrshrn_n_s64(c[5], DCT_CONST_BITS));
+ b3 = vcombine_s32(vrshrn_n_s64(c[6], DCT_CONST_BITS),
+ vrshrn_n_s64(c[7], DCT_CONST_BITS));
+ a[0] = vaddq_s32(b0, b3);
+ a[1] = vaddq_s32(b1, b2);
+ a[2] = vsubq_s32(b1, b2);
+ a[3] = vsubq_s32(b0, b3);
+}
+
+#endif // VPX_DSP_ARM_HIGHBD_IDCT_NEON_H_
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct32x32_135_add_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct32x32_135_add_neon.c
index 021211bc990..057731ad924 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct32x32_135_add_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct32x32_135_add_neon.c
@@ -650,14 +650,10 @@ void vpx_idct32_16_neon(const int16_t *const input, void *const output,
highbd_add_and_store_bd8(out, output, stride);
} else {
uint8_t *const outputT = (uint8_t *)output;
- add_and_store_u8_s16(out[0], out[1], out[2], out[3], out[4], out[5], out[6],
- out[7], outputT, stride);
- add_and_store_u8_s16(out[8], out[9], out[10], out[11], out[12], out[13],
- out[14], out[15], outputT + (8 * stride), stride);
- add_and_store_u8_s16(out[16], out[17], out[18], out[19], out[20], out[21],
- out[22], out[23], outputT + (16 * stride), stride);
- add_and_store_u8_s16(out[24], out[25], out[26], out[27], out[28], out[29],
- out[30], out[31], outputT + (24 * stride), stride);
+ add_and_store_u8_s16(out + 0, outputT, stride);
+ add_and_store_u8_s16(out + 8, outputT + (8 * stride), stride);
+ add_and_store_u8_s16(out + 16, outputT + (16 * stride), stride);
+ add_and_store_u8_s16(out + 24, outputT + (24 * stride), stride);
}
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct32x32_34_add_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct32x32_34_add_neon.c
index f3c336fa31f..f570547e449 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct32x32_34_add_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct32x32_34_add_neon.c
@@ -490,14 +490,10 @@ void vpx_idct32_8_neon(const int16_t *input, void *const output, int stride,
highbd_add_and_store_bd8(out, output, stride);
} else {
uint8_t *const outputT = (uint8_t *)output;
- add_and_store_u8_s16(out[0], out[1], out[2], out[3], out[4], out[5], out[6],
- out[7], outputT, stride);
- add_and_store_u8_s16(out[8], out[9], out[10], out[11], out[12], out[13],
- out[14], out[15], outputT + (8 * stride), stride);
- add_and_store_u8_s16(out[16], out[17], out[18], out[19], out[20], out[21],
- out[22], out[23], outputT + (16 * stride), stride);
- add_and_store_u8_s16(out[24], out[25], out[26], out[27], out[28], out[29],
- out[30], out[31], outputT + (24 * stride), stride);
+ add_and_store_u8_s16(out + 0, outputT, stride);
+ add_and_store_u8_s16(out + 8, outputT + (8 * stride), stride);
+ add_and_store_u8_s16(out + 16, outputT + (16 * stride), stride);
+ add_and_store_u8_s16(out + 24, outputT + (24 * stride), stride);
}
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct4x4_add_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct4x4_add_neon.c
index 673a36840e3..8192ee4cf87 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct4x4_add_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct4x4_add_neon.c
@@ -19,44 +19,41 @@
void vpx_idct4x4_16_add_neon(const tran_low_t *input, uint8_t *dest,
int stride) {
const uint8_t *dst = dest;
- const int16x4_t cospis = vld1_s16(kCospi);
- uint8x8_t dest01_u8;
- uint32x2_t dest32_u32 = vdup_n_u32(0);
- int16x8_t a0, a1;
- uint8x8_t d01, d32;
- uint16x8_t d01_u16, d32_u16;
+ uint32x2_t s32 = vdup_n_u32(0);
+ int16x8_t a[2];
+ uint8x8_t s, d[2];
+ uint16x8_t sum[2];
assert(!((intptr_t)dest % sizeof(uint32_t)));
assert(!(stride % sizeof(uint32_t)));
// Rows
- a0 = load_tran_low_to_s16q(input);
- a1 = load_tran_low_to_s16q(input + 8);
- idct4x4_16_kernel_bd8(cospis, &a0, &a1);
+ a[0] = load_tran_low_to_s16q(input);
+ a[1] = load_tran_low_to_s16q(input + 8);
+ transpose_idct4x4_16_bd8(a);
// Columns
- a1 = vcombine_s16(vget_high_s16(a1), vget_low_s16(a1));
- idct4x4_16_kernel_bd8(cospis, &a0, &a1);
- a0 = vrshrq_n_s16(a0, 4);
- a1 = vrshrq_n_s16(a1, 4);
+ a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
+ transpose_idct4x4_16_bd8(a);
+ a[0] = vrshrq_n_s16(a[0], 4);
+ a[1] = vrshrq_n_s16(a[1], 4);
- dest01_u8 = load_u8(dst, stride);
+ s = load_u8(dst, stride);
dst += 2 * stride;
// The elements are loaded in reverse order.
- dest32_u32 = vld1_lane_u32((const uint32_t *)dst, dest32_u32, 1);
+ s32 = vld1_lane_u32((const uint32_t *)dst, s32, 1);
dst += stride;
- dest32_u32 = vld1_lane_u32((const uint32_t *)dst, dest32_u32, 0);
+ s32 = vld1_lane_u32((const uint32_t *)dst, s32, 0);
- d01_u16 = vaddw_u8(vreinterpretq_u16_s16(a0), dest01_u8);
- d32_u16 =
- vaddw_u8(vreinterpretq_u16_s16(a1), vreinterpret_u8_u32(dest32_u32));
- d01 = vqmovun_s16(vreinterpretq_s16_u16(d01_u16));
- d32 = vqmovun_s16(vreinterpretq_s16_u16(d32_u16));
+ sum[0] = vaddw_u8(vreinterpretq_u16_s16(a[0]), s);
+ sum[1] = vaddw_u8(vreinterpretq_u16_s16(a[1]), vreinterpret_u8_u32(s32));
+ d[0] = vqmovun_s16(vreinterpretq_s16_u16(sum[0]));
+ d[1] = vqmovun_s16(vreinterpretq_s16_u16(sum[1]));
- store_u8(dest, stride, d01);
+ store_u8(dest, stride, d[0]);
dest += 2 * stride;
// The elements are stored in reverse order.
- vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d32), 1);
+ vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d[1]), 1);
dest += stride;
- vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d32), 0);
+ vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d[1]), 0);
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct8x8_add_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct8x8_add_neon.c
index 1121ade2796..7471387e476 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct8x8_add_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct8x8_add_neon.c
@@ -17,91 +17,25 @@
#include "vpx_dsp/arm/transpose_neon.h"
#include "vpx_dsp/txfm_common.h"
-static INLINE void add8x8(int16x8_t a0, int16x8_t a1, int16x8_t a2,
- int16x8_t a3, int16x8_t a4, int16x8_t a5,
- int16x8_t a6, int16x8_t a7, uint8_t *dest,
- const int stride) {
- const uint8_t *dst = dest;
- uint8x8_t d0, d1, d2, d3, d4, d5, d6, d7;
- uint16x8_t d0_u16, d1_u16, d2_u16, d3_u16, d4_u16, d5_u16, d6_u16, d7_u16;
-
- a0 = vrshrq_n_s16(a0, 5);
- a1 = vrshrq_n_s16(a1, 5);
- a2 = vrshrq_n_s16(a2, 5);
- a3 = vrshrq_n_s16(a3, 5);
- a4 = vrshrq_n_s16(a4, 5);
- a5 = vrshrq_n_s16(a5, 5);
- a6 = vrshrq_n_s16(a6, 5);
- a7 = vrshrq_n_s16(a7, 5);
-
- d0 = vld1_u8(dst);
- dst += stride;
- d1 = vld1_u8(dst);
- dst += stride;
- d2 = vld1_u8(dst);
- dst += stride;
- d3 = vld1_u8(dst);
- dst += stride;
- d4 = vld1_u8(dst);
- dst += stride;
- d5 = vld1_u8(dst);
- dst += stride;
- d6 = vld1_u8(dst);
- dst += stride;
- d7 = vld1_u8(dst);
-
- d0_u16 = vaddw_u8(vreinterpretq_u16_s16(a0), d0);
- d1_u16 = vaddw_u8(vreinterpretq_u16_s16(a1), d1);
- d2_u16 = vaddw_u8(vreinterpretq_u16_s16(a2), d2);
- d3_u16 = vaddw_u8(vreinterpretq_u16_s16(a3), d3);
- d4_u16 = vaddw_u8(vreinterpretq_u16_s16(a4), d4);
- d5_u16 = vaddw_u8(vreinterpretq_u16_s16(a5), d5);
- d6_u16 = vaddw_u8(vreinterpretq_u16_s16(a6), d6);
- d7_u16 = vaddw_u8(vreinterpretq_u16_s16(a7), d7);
-
- d0 = vqmovun_s16(vreinterpretq_s16_u16(d0_u16));
- d1 = vqmovun_s16(vreinterpretq_s16_u16(d1_u16));
- d2 = vqmovun_s16(vreinterpretq_s16_u16(d2_u16));
- d3 = vqmovun_s16(vreinterpretq_s16_u16(d3_u16));
- d4 = vqmovun_s16(vreinterpretq_s16_u16(d4_u16));
- d5 = vqmovun_s16(vreinterpretq_s16_u16(d5_u16));
- d6 = vqmovun_s16(vreinterpretq_s16_u16(d6_u16));
- d7 = vqmovun_s16(vreinterpretq_s16_u16(d7_u16));
-
- vst1_u8(dest, d0);
- dest += stride;
- vst1_u8(dest, d1);
- dest += stride;
- vst1_u8(dest, d2);
- dest += stride;
- vst1_u8(dest, d3);
- dest += stride;
- vst1_u8(dest, d4);
- dest += stride;
- vst1_u8(dest, d5);
- dest += stride;
- vst1_u8(dest, d6);
- dest += stride;
- vst1_u8(dest, d7);
-}
-
void vpx_idct8x8_64_add_neon(const tran_low_t *input, uint8_t *dest,
int stride) {
const int16x8_t cospis = vld1q_s16(kCospi);
const int16x4_t cospis0 = vget_low_s16(cospis); // cospi 0, 8, 16, 24
const int16x4_t cospis1 = vget_high_s16(cospis); // cospi 4, 12, 20, 28
- int16x8_t a0 = load_tran_low_to_s16q(input);
- int16x8_t a1 = load_tran_low_to_s16q(input + 8);
- int16x8_t a2 = load_tran_low_to_s16q(input + 16);
- int16x8_t a3 = load_tran_low_to_s16q(input + 24);
- int16x8_t a4 = load_tran_low_to_s16q(input + 32);
- int16x8_t a5 = load_tran_low_to_s16q(input + 40);
- int16x8_t a6 = load_tran_low_to_s16q(input + 48);
- int16x8_t a7 = load_tran_low_to_s16q(input + 56);
-
- idct8x8_64_1d_bd8(cospis0, cospis1, &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7);
- idct8x8_64_1d_bd8(cospis0, cospis1, &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7);
- add8x8(a0, a1, a2, a3, a4, a5, a6, a7, dest, stride);
+ int16x8_t a[8];
+
+ a[0] = load_tran_low_to_s16q(input);
+ a[1] = load_tran_low_to_s16q(input + 8);
+ a[2] = load_tran_low_to_s16q(input + 16);
+ a[3] = load_tran_low_to_s16q(input + 24);
+ a[4] = load_tran_low_to_s16q(input + 32);
+ a[5] = load_tran_low_to_s16q(input + 40);
+ a[6] = load_tran_low_to_s16q(input + 48);
+ a[7] = load_tran_low_to_s16q(input + 56);
+
+ idct8x8_64_1d_bd8(cospis0, cospis1, a);
+ idct8x8_64_1d_bd8(cospis0, cospis1, a);
+ idct8x8_add8x8_neon(a, dest, stride);
}
void vpx_idct8x8_12_add_neon(const tran_low_t *input, uint8_t *dest,
@@ -111,17 +45,15 @@ void vpx_idct8x8_12_add_neon(const tran_low_t *input, uint8_t *dest,
const int16x4_t cospis0 = vget_low_s16(cospis); // cospi 0, 8, 16, 24
const int16x4_t cospisd0 = vget_low_s16(cospisd); // doubled 0, 8, 16, 24
const int16x4_t cospisd1 = vget_high_s16(cospisd); // doubled 4, 12, 20, 28
- int16x4_t a0, a1, a2, a3, a4, a5, a6, a7;
- int16x8_t b0, b1, b2, b3, b4, b5, b6, b7;
+ int16x4_t a[8];
+ int16x8_t b[8];
- a0 = load_tran_low_to_s16d(input);
- a1 = load_tran_low_to_s16d(input + 8);
- a2 = load_tran_low_to_s16d(input + 16);
- a3 = load_tran_low_to_s16d(input + 24);
+ a[0] = load_tran_low_to_s16d(input);
+ a[1] = load_tran_low_to_s16d(input + 8);
+ a[2] = load_tran_low_to_s16d(input + 16);
+ a[3] = load_tran_low_to_s16d(input + 24);
- idct8x8_12_pass1_bd8(cospis0, cospisd0, cospisd1, &a0, &a1, &a2, &a3, &a4,
- &a5, &a6, &a7);
- idct8x8_12_pass2_bd8(cospis0, cospisd0, cospisd1, a0, a1, a2, a3, a4, a5, a6,
- a7, &b0, &b1, &b2, &b3, &b4, &b5, &b6, &b7);
- add8x8(b0, b1, b2, b3, b4, b5, b6, b7, dest, stride);
+ idct8x8_12_pass1_bd8(cospis0, cospisd0, cospisd1, a);
+ idct8x8_12_pass2_bd8(cospis0, cospisd0, cospisd1, a, b);
+ idct8x8_add8x8_neon(b, dest, stride);
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct_neon.h b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct_neon.h
index 6ed02af5acc..c4d3b471131 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct_neon.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct_neon.h
@@ -78,6 +78,28 @@ static INLINE int32x4x2_t highbd_idct_sub_dual(const int32x4x2_t s0,
//------------------------------------------------------------------------------
+static INLINE int16x8_t dct_const_round_shift_low_8(const int32x4_t *const in) {
+ return vcombine_s16(vrshrn_n_s32(in[0], DCT_CONST_BITS),
+ vrshrn_n_s32(in[1], DCT_CONST_BITS));
+}
+
+static INLINE void dct_const_round_shift_low_8_dual(const int32x4_t *const t32,
+ int16x8_t *const d0,
+ int16x8_t *const d1) {
+ *d0 = dct_const_round_shift_low_8(t32 + 0);
+ *d1 = dct_const_round_shift_low_8(t32 + 2);
+}
+
+static INLINE int32x4x2_t
+dct_const_round_shift_high_4x2(const int64x2_t *const in) {
+ int32x4x2_t out;
+ out.val[0] = vcombine_s32(vrshrn_n_s64(in[0], DCT_CONST_BITS),
+ vrshrn_n_s64(in[1], DCT_CONST_BITS));
+ out.val[1] = vcombine_s32(vrshrn_n_s64(in[2], DCT_CONST_BITS),
+ vrshrn_n_s64(in[3], DCT_CONST_BITS));
+ return out;
+}
+
// Multiply a by a_const. Saturate, shift and narrow by DCT_CONST_BITS.
static INLINE int16x8_t multiply_shift_and_narrow_s16(const int16x8_t a,
const int16_t a_const) {
@@ -102,24 +124,24 @@ static INLINE int16x8_t add_multiply_shift_and_narrow_s16(
// input) this function can not use vaddq_s16.
// In order to match existing behavior and intentionally out of range tests,
// expand the addition up to 32 bits to prevent truncation.
- int32x4_t temp_low = vaddl_s16(vget_low_s16(a), vget_low_s16(b));
- int32x4_t temp_high = vaddl_s16(vget_high_s16(a), vget_high_s16(b));
- temp_low = vmulq_n_s32(temp_low, ab_const);
- temp_high = vmulq_n_s32(temp_high, ab_const);
- return vcombine_s16(vrshrn_n_s32(temp_low, DCT_CONST_BITS),
- vrshrn_n_s32(temp_high, DCT_CONST_BITS));
+ int32x4_t t[2];
+ t[0] = vaddl_s16(vget_low_s16(a), vget_low_s16(b));
+ t[1] = vaddl_s16(vget_high_s16(a), vget_high_s16(b));
+ t[0] = vmulq_n_s32(t[0], ab_const);
+ t[1] = vmulq_n_s32(t[1], ab_const);
+ return dct_const_round_shift_low_8(t);
}
// Subtract b from a, then multiply by ab_const. Shift and narrow by
// DCT_CONST_BITS.
static INLINE int16x8_t sub_multiply_shift_and_narrow_s16(
const int16x8_t a, const int16x8_t b, const int16_t ab_const) {
- int32x4_t temp_low = vsubl_s16(vget_low_s16(a), vget_low_s16(b));
- int32x4_t temp_high = vsubl_s16(vget_high_s16(a), vget_high_s16(b));
- temp_low = vmulq_n_s32(temp_low, ab_const);
- temp_high = vmulq_n_s32(temp_high, ab_const);
- return vcombine_s16(vrshrn_n_s32(temp_low, DCT_CONST_BITS),
- vrshrn_n_s32(temp_high, DCT_CONST_BITS));
+ int32x4_t t[2];
+ t[0] = vsubl_s16(vget_low_s16(a), vget_low_s16(b));
+ t[1] = vsubl_s16(vget_high_s16(a), vget_high_s16(b));
+ t[0] = vmulq_n_s32(t[0], ab_const);
+ t[1] = vmulq_n_s32(t[1], ab_const);
+ return dct_const_round_shift_low_8(t);
}
// Multiply a by a_const and b by b_const, then accumulate. Shift and narrow by
@@ -127,12 +149,12 @@ static INLINE int16x8_t sub_multiply_shift_and_narrow_s16(
static INLINE int16x8_t multiply_accumulate_shift_and_narrow_s16(
const int16x8_t a, const int16_t a_const, const int16x8_t b,
const int16_t b_const) {
- int32x4_t temp_low = vmull_n_s16(vget_low_s16(a), a_const);
- int32x4_t temp_high = vmull_n_s16(vget_high_s16(a), a_const);
- temp_low = vmlal_n_s16(temp_low, vget_low_s16(b), b_const);
- temp_high = vmlal_n_s16(temp_high, vget_high_s16(b), b_const);
- return vcombine_s16(vrshrn_n_s32(temp_low, DCT_CONST_BITS),
- vrshrn_n_s32(temp_high, DCT_CONST_BITS));
+ int32x4_t t[2];
+ t[0] = vmull_n_s16(vget_low_s16(a), a_const);
+ t[1] = vmull_n_s16(vget_high_s16(a), a_const);
+ t[0] = vmlal_n_s16(t[0], vget_low_s16(b), b_const);
+ t[1] = vmlal_n_s16(t[1], vget_high_s16(b), b_const);
+ return dct_const_round_shift_low_8(t);
}
//------------------------------------------------------------------------------
@@ -145,53 +167,43 @@ static INLINE int16x8_t multiply_accumulate_shift_and_narrow_s16(
static INLINE int32x4x2_t
multiply_shift_and_narrow_s32_dual(const int32x4x2_t a, const int32_t a_const) {
int64x2_t b[4];
- int32x4x2_t c;
+
b[0] = vmull_n_s32(vget_low_s32(a.val[0]), a_const);
b[1] = vmull_n_s32(vget_high_s32(a.val[0]), a_const);
b[2] = vmull_n_s32(vget_low_s32(a.val[1]), a_const);
b[3] = vmull_n_s32(vget_high_s32(a.val[1]), a_const);
- c.val[0] = vcombine_s32(vrshrn_n_s64(b[0], DCT_CONST_BITS),
- vrshrn_n_s64(b[1], DCT_CONST_BITS));
- c.val[1] = vcombine_s32(vrshrn_n_s64(b[2], DCT_CONST_BITS),
- vrshrn_n_s64(b[3], DCT_CONST_BITS));
- return c;
+ return dct_const_round_shift_high_4x2(b);
}
// Add a and b, then multiply by ab_const. Shift and narrow by DCT_CONST_BITS.
static INLINE int32x4x2_t add_multiply_shift_and_narrow_s32_dual(
const int32x4x2_t a, const int32x4x2_t b, const int32_t ab_const) {
- const int32x4_t temp_low = vaddq_s32(a.val[0], b.val[0]);
- const int32x4_t temp_high = vaddq_s32(a.val[1], b.val[1]);
+ int32x4_t t[2];
int64x2_t c[4];
- int32x4x2_t d;
- c[0] = vmull_n_s32(vget_low_s32(temp_low), ab_const);
- c[1] = vmull_n_s32(vget_high_s32(temp_low), ab_const);
- c[2] = vmull_n_s32(vget_low_s32(temp_high), ab_const);
- c[3] = vmull_n_s32(vget_high_s32(temp_high), ab_const);
- d.val[0] = vcombine_s32(vrshrn_n_s64(c[0], DCT_CONST_BITS),
- vrshrn_n_s64(c[1], DCT_CONST_BITS));
- d.val[1] = vcombine_s32(vrshrn_n_s64(c[2], DCT_CONST_BITS),
- vrshrn_n_s64(c[3], DCT_CONST_BITS));
- return d;
+
+ t[0] = vaddq_s32(a.val[0], b.val[0]);
+ t[1] = vaddq_s32(a.val[1], b.val[1]);
+ c[0] = vmull_n_s32(vget_low_s32(t[0]), ab_const);
+ c[1] = vmull_n_s32(vget_high_s32(t[0]), ab_const);
+ c[2] = vmull_n_s32(vget_low_s32(t[1]), ab_const);
+ c[3] = vmull_n_s32(vget_high_s32(t[1]), ab_const);
+ return dct_const_round_shift_high_4x2(c);
}
// Subtract b from a, then multiply by ab_const. Shift and narrow by
// DCT_CONST_BITS.
static INLINE int32x4x2_t sub_multiply_shift_and_narrow_s32_dual(
const int32x4x2_t a, const int32x4x2_t b, const int32_t ab_const) {
- const int32x4_t temp_low = vsubq_s32(a.val[0], b.val[0]);
- const int32x4_t temp_high = vsubq_s32(a.val[1], b.val[1]);
+ int32x4_t t[2];
int64x2_t c[4];
- int32x4x2_t d;
- c[0] = vmull_n_s32(vget_low_s32(temp_low), ab_const);
- c[1] = vmull_n_s32(vget_high_s32(temp_low), ab_const);
- c[2] = vmull_n_s32(vget_low_s32(temp_high), ab_const);
- c[3] = vmull_n_s32(vget_high_s32(temp_high), ab_const);
- d.val[0] = vcombine_s32(vrshrn_n_s64(c[0], DCT_CONST_BITS),
- vrshrn_n_s64(c[1], DCT_CONST_BITS));
- d.val[1] = vcombine_s32(vrshrn_n_s64(c[2], DCT_CONST_BITS),
- vrshrn_n_s64(c[3], DCT_CONST_BITS));
- return d;
+
+ t[0] = vsubq_s32(a.val[0], b.val[0]);
+ t[1] = vsubq_s32(a.val[1], b.val[1]);
+ c[0] = vmull_n_s32(vget_low_s32(t[0]), ab_const);
+ c[1] = vmull_n_s32(vget_high_s32(t[0]), ab_const);
+ c[2] = vmull_n_s32(vget_low_s32(t[1]), ab_const);
+ c[3] = vmull_n_s32(vget_high_s32(t[1]), ab_const);
+ return dct_const_round_shift_high_4x2(c);
}
// Multiply a by a_const and b by b_const, then accumulate. Shift and narrow by
@@ -200,7 +212,6 @@ static INLINE int32x4x2_t multiply_accumulate_shift_and_narrow_s32_dual(
const int32x4x2_t a, const int32_t a_const, const int32x4x2_t b,
const int32_t b_const) {
int64x2_t c[4];
- int32x4x2_t d;
c[0] = vmull_n_s32(vget_low_s32(a.val[0]), a_const);
c[1] = vmull_n_s32(vget_high_s32(a.val[0]), a_const);
c[2] = vmull_n_s32(vget_low_s32(a.val[1]), a_const);
@@ -209,72 +220,66 @@ static INLINE int32x4x2_t multiply_accumulate_shift_and_narrow_s32_dual(
c[1] = vmlal_n_s32(c[1], vget_high_s32(b.val[0]), b_const);
c[2] = vmlal_n_s32(c[2], vget_low_s32(b.val[1]), b_const);
c[3] = vmlal_n_s32(c[3], vget_high_s32(b.val[1]), b_const);
- d.val[0] = vcombine_s32(vrshrn_n_s64(c[0], DCT_CONST_BITS),
- vrshrn_n_s64(c[1], DCT_CONST_BITS));
- d.val[1] = vcombine_s32(vrshrn_n_s64(c[2], DCT_CONST_BITS),
- vrshrn_n_s64(c[3], DCT_CONST_BITS));
- return d;
+ return dct_const_round_shift_high_4x2(c);
}
// Shift the output down by 6 and add it to the destination buffer.
-static INLINE void add_and_store_u8_s16(const int16x8_t a0, const int16x8_t a1,
- const int16x8_t a2, const int16x8_t a3,
- const int16x8_t a4, const int16x8_t a5,
- const int16x8_t a6, const int16x8_t a7,
- uint8_t *b, const int b_stride) {
- uint8x8_t b0, b1, b2, b3, b4, b5, b6, b7;
- int16x8_t c0, c1, c2, c3, c4, c5, c6, c7;
- b0 = vld1_u8(b);
- b += b_stride;
- b1 = vld1_u8(b);
- b += b_stride;
- b2 = vld1_u8(b);
- b += b_stride;
- b3 = vld1_u8(b);
- b += b_stride;
- b4 = vld1_u8(b);
- b += b_stride;
- b5 = vld1_u8(b);
- b += b_stride;
- b6 = vld1_u8(b);
- b += b_stride;
- b7 = vld1_u8(b);
- b -= (7 * b_stride);
+static INLINE void add_and_store_u8_s16(const int16x8_t *const a, uint8_t *d,
+ const int stride) {
+ uint8x8_t b[8];
+ int16x8_t c[8];
+
+ b[0] = vld1_u8(d);
+ d += stride;
+ b[1] = vld1_u8(d);
+ d += stride;
+ b[2] = vld1_u8(d);
+ d += stride;
+ b[3] = vld1_u8(d);
+ d += stride;
+ b[4] = vld1_u8(d);
+ d += stride;
+ b[5] = vld1_u8(d);
+ d += stride;
+ b[6] = vld1_u8(d);
+ d += stride;
+ b[7] = vld1_u8(d);
+ d -= (7 * stride);
// c = b + (a >> 6)
- c0 = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b0)), a0, 6);
- c1 = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b1)), a1, 6);
- c2 = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b2)), a2, 6);
- c3 = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b3)), a3, 6);
- c4 = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b4)), a4, 6);
- c5 = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b5)), a5, 6);
- c6 = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b6)), a6, 6);
- c7 = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b7)), a7, 6);
-
- b0 = vqmovun_s16(c0);
- b1 = vqmovun_s16(c1);
- b2 = vqmovun_s16(c2);
- b3 = vqmovun_s16(c3);
- b4 = vqmovun_s16(c4);
- b5 = vqmovun_s16(c5);
- b6 = vqmovun_s16(c6);
- b7 = vqmovun_s16(c7);
-
- vst1_u8(b, b0);
- b += b_stride;
- vst1_u8(b, b1);
- b += b_stride;
- vst1_u8(b, b2);
- b += b_stride;
- vst1_u8(b, b3);
- b += b_stride;
- vst1_u8(b, b4);
- b += b_stride;
- vst1_u8(b, b5);
- b += b_stride;
- vst1_u8(b, b6);
- b += b_stride;
- vst1_u8(b, b7);
+ c[0] = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b[0])), a[0], 6);
+ c[1] = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b[1])), a[1], 6);
+ c[2] = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b[2])), a[2], 6);
+ c[3] = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b[3])), a[3], 6);
+ c[4] = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b[4])), a[4], 6);
+ c[5] = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b[5])), a[5], 6);
+ c[6] = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b[6])), a[6], 6);
+ c[7] = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b[7])), a[7], 6);
+
+ b[0] = vqmovun_s16(c[0]);
+ b[1] = vqmovun_s16(c[1]);
+ b[2] = vqmovun_s16(c[2]);
+ b[3] = vqmovun_s16(c[3]);
+ b[4] = vqmovun_s16(c[4]);
+ b[5] = vqmovun_s16(c[5]);
+ b[6] = vqmovun_s16(c[6]);
+ b[7] = vqmovun_s16(c[7]);
+
+ vst1_u8(d, b[0]);
+ d += stride;
+ vst1_u8(d, b[1]);
+ d += stride;
+ vst1_u8(d, b[2]);
+ d += stride;
+ vst1_u8(d, b[3]);
+ d += stride;
+ vst1_u8(d, b[4]);
+ d += stride;
+ vst1_u8(d, b[5]);
+ d += stride;
+ vst1_u8(d, b[6]);
+ d += stride;
+ vst1_u8(d, b[7]);
}
static INLINE uint8x16_t create_dcq(const int16_t dc) {
@@ -283,56 +288,53 @@ static INLINE uint8x16_t create_dcq(const int16_t dc) {
return vdupq_n_u8((uint8_t)t);
}
-static INLINE void idct4x4_16_kernel_bd8(const int16x4_t cospis,
- int16x8_t *const a0,
- int16x8_t *const a1) {
- int16x4_t b0, b1, b2, b3;
- int32x4_t c0, c1, c2, c3;
- int16x8_t d0, d1;
-
- transpose_s16_4x4q(a0, a1);
- b0 = vget_low_s16(*a0);
- b1 = vget_high_s16(*a0);
- b2 = vget_low_s16(*a1);
- b3 = vget_high_s16(*a1);
- c0 = vmull_lane_s16(b0, cospis, 2);
- c2 = vmull_lane_s16(b1, cospis, 2);
- c1 = vsubq_s32(c0, c2);
- c0 = vaddq_s32(c0, c2);
- c2 = vmull_lane_s16(b2, cospis, 3);
- c3 = vmull_lane_s16(b2, cospis, 1);
- c2 = vmlsl_lane_s16(c2, b3, cospis, 1);
- c3 = vmlal_lane_s16(c3, b3, cospis, 3);
- b0 = vrshrn_n_s32(c0, DCT_CONST_BITS);
- b1 = vrshrn_n_s32(c1, DCT_CONST_BITS);
- b2 = vrshrn_n_s32(c2, DCT_CONST_BITS);
- b3 = vrshrn_n_s32(c3, DCT_CONST_BITS);
- d0 = vcombine_s16(b0, b1);
- d1 = vcombine_s16(b3, b2);
- *a0 = vaddq_s16(d0, d1);
- *a1 = vsubq_s16(d0, d1);
-}
-
-static INLINE void idct8x8_12_pass1_bd8(
- const int16x4_t cospis0, const int16x4_t cospisd0, const int16x4_t cospisd1,
- int16x4_t *const io0, int16x4_t *const io1, int16x4_t *const io2,
- int16x4_t *const io3, int16x4_t *const io4, int16x4_t *const io5,
- int16x4_t *const io6, int16x4_t *const io7) {
+static INLINE void idct4x4_16_kernel_bd8(int16x8_t *const a) {
+ const int16x4_t cospis = vld1_s16(kCospi);
+ int16x4_t b[4];
+ int32x4_t c[4];
+ int16x8_t d[2];
+
+ b[0] = vget_low_s16(a[0]);
+ b[1] = vget_high_s16(a[0]);
+ b[2] = vget_low_s16(a[1]);
+ b[3] = vget_high_s16(a[1]);
+ c[0] = vmull_lane_s16(b[0], cospis, 2);
+ c[2] = vmull_lane_s16(b[1], cospis, 2);
+ c[1] = vsubq_s32(c[0], c[2]);
+ c[0] = vaddq_s32(c[0], c[2]);
+ c[3] = vmull_lane_s16(b[2], cospis, 3);
+ c[2] = vmull_lane_s16(b[2], cospis, 1);
+ c[3] = vmlsl_lane_s16(c[3], b[3], cospis, 1);
+ c[2] = vmlal_lane_s16(c[2], b[3], cospis, 3);
+ dct_const_round_shift_low_8_dual(c, &d[0], &d[1]);
+ a[0] = vaddq_s16(d[0], d[1]);
+ a[1] = vsubq_s16(d[0], d[1]);
+}
+
+static INLINE void transpose_idct4x4_16_bd8(int16x8_t *const a) {
+ transpose_s16_4x4q(&a[0], &a[1]);
+ idct4x4_16_kernel_bd8(a);
+}
+
+static INLINE void idct8x8_12_pass1_bd8(const int16x4_t cospis0,
+ const int16x4_t cospisd0,
+ const int16x4_t cospisd1,
+ int16x4_t *const io) {
int16x4_t step1[8], step2[8];
int32x4_t t32[2];
- transpose_s16_4x4d(io0, io1, io2, io3);
+ transpose_s16_4x4d(&io[0], &io[1], &io[2], &io[3]);
// stage 1
- step1[4] = vqrdmulh_lane_s16(*io1, cospisd1, 3);
- step1[5] = vqrdmulh_lane_s16(*io3, cospisd1, 2);
- step1[6] = vqrdmulh_lane_s16(*io3, cospisd1, 1);
- step1[7] = vqrdmulh_lane_s16(*io1, cospisd1, 0);
+ step1[4] = vqrdmulh_lane_s16(io[1], cospisd1, 3);
+ step1[5] = vqrdmulh_lane_s16(io[3], cospisd1, 2);
+ step1[6] = vqrdmulh_lane_s16(io[3], cospisd1, 1);
+ step1[7] = vqrdmulh_lane_s16(io[1], cospisd1, 0);
// stage 2
- step2[1] = vqrdmulh_lane_s16(*io0, cospisd0, 2);
- step2[2] = vqrdmulh_lane_s16(*io2, cospisd0, 3);
- step2[3] = vqrdmulh_lane_s16(*io2, cospisd0, 1);
+ step2[1] = vqrdmulh_lane_s16(io[0], cospisd0, 2);
+ step2[2] = vqrdmulh_lane_s16(io[2], cospisd0, 3);
+ step2[3] = vqrdmulh_lane_s16(io[2], cospisd0, 1);
step2[4] = vadd_s16(step1[4], step1[5]);
step2[5] = vsub_s16(step1[4], step1[5]);
@@ -352,32 +354,27 @@ static INLINE void idct8x8_12_pass1_bd8(
step1[6] = vrshrn_n_s32(t32[1], DCT_CONST_BITS);
// stage 4
- *io0 = vadd_s16(step1[0], step2[7]);
- *io1 = vadd_s16(step1[1], step1[6]);
- *io2 = vadd_s16(step1[2], step1[5]);
- *io3 = vadd_s16(step1[3], step2[4]);
- *io4 = vsub_s16(step1[3], step2[4]);
- *io5 = vsub_s16(step1[2], step1[5]);
- *io6 = vsub_s16(step1[1], step1[6]);
- *io7 = vsub_s16(step1[0], step2[7]);
-}
-
-static INLINE void idct8x8_12_pass2_bd8(
- const int16x4_t cospis0, const int16x4_t cospisd0, const int16x4_t cospisd1,
- const int16x4_t input0, const int16x4_t input1, const int16x4_t input2,
- const int16x4_t input3, const int16x4_t input4, const int16x4_t input5,
- const int16x4_t input6, const int16x4_t input7, int16x8_t *const output0,
- int16x8_t *const output1, int16x8_t *const output2,
- int16x8_t *const output3, int16x8_t *const output4,
- int16x8_t *const output5, int16x8_t *const output6,
- int16x8_t *const output7) {
+ io[0] = vadd_s16(step1[0], step2[7]);
+ io[1] = vadd_s16(step1[1], step1[6]);
+ io[2] = vadd_s16(step1[2], step1[5]);
+ io[3] = vadd_s16(step1[3], step2[4]);
+ io[4] = vsub_s16(step1[3], step2[4]);
+ io[5] = vsub_s16(step1[2], step1[5]);
+ io[6] = vsub_s16(step1[1], step1[6]);
+ io[7] = vsub_s16(step1[0], step2[7]);
+}
+
+static INLINE void idct8x8_12_pass2_bd8(const int16x4_t cospis0,
+ const int16x4_t cospisd0,
+ const int16x4_t cospisd1,
+ const int16x4_t *const input,
+ int16x8_t *const output) {
int16x8_t in[4];
int16x8_t step1[8], step2[8];
int32x4_t t32[8];
- int16x4_t t16[8];
- transpose_s16_4x8(input0, input1, input2, input3, input4, input5, input6,
- input7, &in[0], &in[1], &in[2], &in[3]);
+ transpose_s16_4x8(input[0], input[1], input[2], input[3], input[4], input[5],
+ input[6], input[7], &in[0], &in[1], &in[2], &in[3]);
// stage 1
step1[4] = vqrdmulhq_lane_s16(in[1], cospisd1, 3);
@@ -407,86 +404,64 @@ static INLINE void idct8x8_12_pass2_bd8(
t32[1] = vmlsl_lane_s16(t32[3], vget_high_s16(step2[5]), cospis0, 2);
t32[2] = vmlal_lane_s16(t32[2], vget_low_s16(step2[5]), cospis0, 2);
t32[3] = vmlal_lane_s16(t32[3], vget_high_s16(step2[5]), cospis0, 2);
- t16[0] = vrshrn_n_s32(t32[0], DCT_CONST_BITS);
- t16[1] = vrshrn_n_s32(t32[1], DCT_CONST_BITS);
- t16[2] = vrshrn_n_s32(t32[2], DCT_CONST_BITS);
- t16[3] = vrshrn_n_s32(t32[3], DCT_CONST_BITS);
- step1[5] = vcombine_s16(t16[0], t16[1]);
- step1[6] = vcombine_s16(t16[2], t16[3]);
+ dct_const_round_shift_low_8_dual(t32, &step1[5], &step1[6]);
// stage 4
- *output0 = vaddq_s16(step1[0], step2[7]);
- *output1 = vaddq_s16(step1[1], step1[6]);
- *output2 = vaddq_s16(step1[2], step1[5]);
- *output3 = vaddq_s16(step1[3], step2[4]);
- *output4 = vsubq_s16(step1[3], step2[4]);
- *output5 = vsubq_s16(step1[2], step1[5]);
- *output6 = vsubq_s16(step1[1], step1[6]);
- *output7 = vsubq_s16(step1[0], step2[7]);
-}
-
-static INLINE void idct8x8_64_1d_bd8(const int16x4_t cospis0,
- const int16x4_t cospis1,
- int16x8_t *const io0, int16x8_t *const io1,
- int16x8_t *const io2, int16x8_t *const io3,
- int16x8_t *const io4, int16x8_t *const io5,
- int16x8_t *const io6,
- int16x8_t *const io7) {
- int16x4_t input_1l, input_1h, input_3l, input_3h, input_5l, input_5h,
- input_7l, input_7h;
+ output[0] = vaddq_s16(step1[0], step2[7]);
+ output[1] = vaddq_s16(step1[1], step1[6]);
+ output[2] = vaddq_s16(step1[2], step1[5]);
+ output[3] = vaddq_s16(step1[3], step2[4]);
+ output[4] = vsubq_s16(step1[3], step2[4]);
+ output[5] = vsubq_s16(step1[2], step1[5]);
+ output[6] = vsubq_s16(step1[1], step1[6]);
+ output[7] = vsubq_s16(step1[0], step2[7]);
+}
+
+static INLINE void idct8x8_64_1d_bd8_kernel(const int16x4_t cospis0,
+ const int16x4_t cospis1,
+ int16x8_t *const io) {
+ int16x4_t input1l, input1h, input3l, input3h, input5l, input5h, input7l,
+ input7h;
int16x4_t step1l[4], step1h[4];
int16x8_t step1[8], step2[8];
int32x4_t t32[8];
- int16x4_t t16[8];
-
- transpose_s16_8x8(io0, io1, io2, io3, io4, io5, io6, io7);
// stage 1
- input_1l = vget_low_s16(*io1);
- input_1h = vget_high_s16(*io1);
- input_3l = vget_low_s16(*io3);
- input_3h = vget_high_s16(*io3);
- input_5l = vget_low_s16(*io5);
- input_5h = vget_high_s16(*io5);
- input_7l = vget_low_s16(*io7);
- input_7h = vget_high_s16(*io7);
- step1l[0] = vget_low_s16(*io0);
- step1h[0] = vget_high_s16(*io0);
- step1l[1] = vget_low_s16(*io2);
- step1h[1] = vget_high_s16(*io2);
- step1l[2] = vget_low_s16(*io4);
- step1h[2] = vget_high_s16(*io4);
- step1l[3] = vget_low_s16(*io6);
- step1h[3] = vget_high_s16(*io6);
-
- t32[0] = vmull_lane_s16(input_1l, cospis1, 3);
- t32[1] = vmull_lane_s16(input_1h, cospis1, 3);
- t32[2] = vmull_lane_s16(input_3l, cospis1, 2);
- t32[3] = vmull_lane_s16(input_3h, cospis1, 2);
- t32[4] = vmull_lane_s16(input_3l, cospis1, 1);
- t32[5] = vmull_lane_s16(input_3h, cospis1, 1);
- t32[6] = vmull_lane_s16(input_1l, cospis1, 0);
- t32[7] = vmull_lane_s16(input_1h, cospis1, 0);
- t32[0] = vmlsl_lane_s16(t32[0], input_7l, cospis1, 0);
- t32[1] = vmlsl_lane_s16(t32[1], input_7h, cospis1, 0);
- t32[2] = vmlal_lane_s16(t32[2], input_5l, cospis1, 1);
- t32[3] = vmlal_lane_s16(t32[3], input_5h, cospis1, 1);
- t32[4] = vmlsl_lane_s16(t32[4], input_5l, cospis1, 2);
- t32[5] = vmlsl_lane_s16(t32[5], input_5h, cospis1, 2);
- t32[6] = vmlal_lane_s16(t32[6], input_7l, cospis1, 3);
- t32[7] = vmlal_lane_s16(t32[7], input_7h, cospis1, 3);
- t16[0] = vrshrn_n_s32(t32[0], DCT_CONST_BITS);
- t16[1] = vrshrn_n_s32(t32[1], DCT_CONST_BITS);
- t16[2] = vrshrn_n_s32(t32[2], DCT_CONST_BITS);
- t16[3] = vrshrn_n_s32(t32[3], DCT_CONST_BITS);
- t16[4] = vrshrn_n_s32(t32[4], DCT_CONST_BITS);
- t16[5] = vrshrn_n_s32(t32[5], DCT_CONST_BITS);
- t16[6] = vrshrn_n_s32(t32[6], DCT_CONST_BITS);
- t16[7] = vrshrn_n_s32(t32[7], DCT_CONST_BITS);
- step1[4] = vcombine_s16(t16[0], t16[1]);
- step1[5] = vcombine_s16(t16[2], t16[3]);
- step1[6] = vcombine_s16(t16[4], t16[5]);
- step1[7] = vcombine_s16(t16[6], t16[7]);
+ input1l = vget_low_s16(io[1]);
+ input1h = vget_high_s16(io[1]);
+ input3l = vget_low_s16(io[3]);
+ input3h = vget_high_s16(io[3]);
+ input5l = vget_low_s16(io[5]);
+ input5h = vget_high_s16(io[5]);
+ input7l = vget_low_s16(io[7]);
+ input7h = vget_high_s16(io[7]);
+ step1l[0] = vget_low_s16(io[0]);
+ step1h[0] = vget_high_s16(io[0]);
+ step1l[1] = vget_low_s16(io[2]);
+ step1h[1] = vget_high_s16(io[2]);
+ step1l[2] = vget_low_s16(io[4]);
+ step1h[2] = vget_high_s16(io[4]);
+ step1l[3] = vget_low_s16(io[6]);
+ step1h[3] = vget_high_s16(io[6]);
+
+ t32[0] = vmull_lane_s16(input1l, cospis1, 3);
+ t32[1] = vmull_lane_s16(input1h, cospis1, 3);
+ t32[2] = vmull_lane_s16(input3l, cospis1, 2);
+ t32[3] = vmull_lane_s16(input3h, cospis1, 2);
+ t32[4] = vmull_lane_s16(input3l, cospis1, 1);
+ t32[5] = vmull_lane_s16(input3h, cospis1, 1);
+ t32[6] = vmull_lane_s16(input1l, cospis1, 0);
+ t32[7] = vmull_lane_s16(input1h, cospis1, 0);
+ t32[0] = vmlsl_lane_s16(t32[0], input7l, cospis1, 0);
+ t32[1] = vmlsl_lane_s16(t32[1], input7h, cospis1, 0);
+ t32[2] = vmlal_lane_s16(t32[2], input5l, cospis1, 1);
+ t32[3] = vmlal_lane_s16(t32[3], input5h, cospis1, 1);
+ t32[4] = vmlsl_lane_s16(t32[4], input5l, cospis1, 2);
+ t32[5] = vmlsl_lane_s16(t32[5], input5h, cospis1, 2);
+ t32[6] = vmlal_lane_s16(t32[6], input7l, cospis1, 3);
+ t32[7] = vmlal_lane_s16(t32[7], input7h, cospis1, 3);
+ dct_const_round_shift_low_8_dual(&t32[0], &step1[4], &step1[5]);
+ dct_const_round_shift_low_8_dual(&t32[4], &step1[6], &step1[7]);
// stage 2
t32[2] = vmull_lane_s16(step1l[0], cospis0, 2);
@@ -503,18 +478,8 @@ static INLINE void idct8x8_64_1d_bd8(const int16x4_t cospis0,
t32[5] = vmlsl_lane_s16(t32[5], step1h[3], cospis0, 1);
t32[6] = vmlal_lane_s16(t32[6], step1l[3], cospis0, 3);
t32[7] = vmlal_lane_s16(t32[7], step1h[3], cospis0, 3);
- t16[0] = vrshrn_n_s32(t32[0], DCT_CONST_BITS);
- t16[1] = vrshrn_n_s32(t32[1], DCT_CONST_BITS);
- t16[2] = vrshrn_n_s32(t32[2], DCT_CONST_BITS);
- t16[3] = vrshrn_n_s32(t32[3], DCT_CONST_BITS);
- t16[4] = vrshrn_n_s32(t32[4], DCT_CONST_BITS);
- t16[5] = vrshrn_n_s32(t32[5], DCT_CONST_BITS);
- t16[6] = vrshrn_n_s32(t32[6], DCT_CONST_BITS);
- t16[7] = vrshrn_n_s32(t32[7], DCT_CONST_BITS);
- step2[0] = vcombine_s16(t16[0], t16[1]);
- step2[1] = vcombine_s16(t16[2], t16[3]);
- step2[2] = vcombine_s16(t16[4], t16[5]);
- step2[3] = vcombine_s16(t16[6], t16[7]);
+ dct_const_round_shift_low_8_dual(&t32[0], &step2[0], &step2[1]);
+ dct_const_round_shift_low_8_dual(&t32[4], &step2[2], &step2[3]);
step2[4] = vaddq_s16(step1[4], step1[5]);
step2[5] = vsubq_s16(step1[4], step1[5]);
@@ -533,35 +498,25 @@ static INLINE void idct8x8_64_1d_bd8(const int16x4_t cospis0,
t32[1] = vmlsl_lane_s16(t32[3], vget_high_s16(step2[5]), cospis0, 2);
t32[2] = vmlal_lane_s16(t32[2], vget_low_s16(step2[5]), cospis0, 2);
t32[3] = vmlal_lane_s16(t32[3], vget_high_s16(step2[5]), cospis0, 2);
- t16[0] = vrshrn_n_s32(t32[0], DCT_CONST_BITS);
- t16[1] = vrshrn_n_s32(t32[1], DCT_CONST_BITS);
- t16[2] = vrshrn_n_s32(t32[2], DCT_CONST_BITS);
- t16[3] = vrshrn_n_s32(t32[3], DCT_CONST_BITS);
- step1[5] = vcombine_s16(t16[0], t16[1]);
- step1[6] = vcombine_s16(t16[2], t16[3]);
+ dct_const_round_shift_low_8_dual(t32, &step1[5], &step1[6]);
// stage 4
- *io0 = vaddq_s16(step1[0], step2[7]);
- *io1 = vaddq_s16(step1[1], step1[6]);
- *io2 = vaddq_s16(step1[2], step1[5]);
- *io3 = vaddq_s16(step1[3], step2[4]);
- *io4 = vsubq_s16(step1[3], step2[4]);
- *io5 = vsubq_s16(step1[2], step1[5]);
- *io6 = vsubq_s16(step1[1], step1[6]);
- *io7 = vsubq_s16(step1[0], step2[7]);
+ io[0] = vaddq_s16(step1[0], step2[7]);
+ io[1] = vaddq_s16(step1[1], step1[6]);
+ io[2] = vaddq_s16(step1[2], step1[5]);
+ io[3] = vaddq_s16(step1[3], step2[4]);
+ io[4] = vsubq_s16(step1[3], step2[4]);
+ io[5] = vsubq_s16(step1[2], step1[5]);
+ io[6] = vsubq_s16(step1[1], step1[6]);
+ io[7] = vsubq_s16(step1[0], step2[7]);
}
-static INLINE void idct16x16_add_wrap_low_8x2(const int32x4_t *const t32,
- int16x8_t *const d0,
- int16x8_t *const d1) {
- int16x4_t t16[4];
-
- t16[0] = vrshrn_n_s32(t32[0], DCT_CONST_BITS);
- t16[1] = vrshrn_n_s32(t32[1], DCT_CONST_BITS);
- t16[2] = vrshrn_n_s32(t32[2], DCT_CONST_BITS);
- t16[3] = vrshrn_n_s32(t32[3], DCT_CONST_BITS);
- *d0 = vcombine_s16(t16[0], t16[1]);
- *d1 = vcombine_s16(t16[2], t16[3]);
+static INLINE void idct8x8_64_1d_bd8(const int16x4_t cospis0,
+ const int16x4_t cospis1,
+ int16x8_t *const io) {
+ transpose_s16_8x8(&io[0], &io[1], &io[2], &io[3], &io[4], &io[5], &io[6],
+ &io[7]);
+ idct8x8_64_1d_bd8_kernel(cospis0, cospis1, io);
}
static INLINE void idct_cospi_8_24_q_kernel(const int16x8_t s0,
@@ -584,7 +539,7 @@ static INLINE void idct_cospi_8_24_q(const int16x8_t s0, const int16x8_t s1,
int32x4_t t32[4];
idct_cospi_8_24_q_kernel(s0, s1, cospi_0_8_16_24, t32);
- idct16x16_add_wrap_low_8x2(t32, d0, d1);
+ dct_const_round_shift_low_8_dual(t32, d0, d1);
}
static INLINE void idct_cospi_8_24_neg_q(const int16x8_t s0, const int16x8_t s1,
@@ -596,7 +551,7 @@ static INLINE void idct_cospi_8_24_neg_q(const int16x8_t s0, const int16x8_t s1,
idct_cospi_8_24_q_kernel(s0, s1, cospi_0_8_16_24, t32);
t32[2] = vnegq_s32(t32[2]);
t32[3] = vnegq_s32(t32[3]);
- idct16x16_add_wrap_low_8x2(t32, d0, d1);
+ dct_const_round_shift_low_8_dual(t32, d0, d1);
}
static INLINE void idct_cospi_16_16_q(const int16x8_t s0, const int16x8_t s1,
@@ -611,7 +566,7 @@ static INLINE void idct_cospi_16_16_q(const int16x8_t s0, const int16x8_t s1,
t32[1] = vmlsl_lane_s16(t32[5], vget_high_s16(s0), cospi_0_8_16_24, 2);
t32[2] = vmlal_lane_s16(t32[4], vget_low_s16(s0), cospi_0_8_16_24, 2);
t32[3] = vmlal_lane_s16(t32[5], vget_high_s16(s0), cospi_0_8_16_24, 2);
- idct16x16_add_wrap_low_8x2(t32, d0, d1);
+ dct_const_round_shift_low_8_dual(t32, d0, d1);
}
static INLINE void idct_cospi_2_30(const int16x8_t s0, const int16x8_t s1,
@@ -627,7 +582,7 @@ static INLINE void idct_cospi_2_30(const int16x8_t s0, const int16x8_t s1,
t32[1] = vmlsl_lane_s16(t32[1], vget_high_s16(s1), cospi_2_30_10_22, 0);
t32[2] = vmlal_lane_s16(t32[2], vget_low_s16(s0), cospi_2_30_10_22, 0);
t32[3] = vmlal_lane_s16(t32[3], vget_high_s16(s0), cospi_2_30_10_22, 0);
- idct16x16_add_wrap_low_8x2(t32, d0, d1);
+ dct_const_round_shift_low_8_dual(t32, d0, d1);
}
static INLINE void idct_cospi_4_28(const int16x8_t s0, const int16x8_t s1,
@@ -643,7 +598,7 @@ static INLINE void idct_cospi_4_28(const int16x8_t s0, const int16x8_t s1,
t32[1] = vmlsl_lane_s16(t32[1], vget_high_s16(s1), cospi_4_12_20N_28, 0);
t32[2] = vmlal_lane_s16(t32[2], vget_low_s16(s0), cospi_4_12_20N_28, 0);
t32[3] = vmlal_lane_s16(t32[3], vget_high_s16(s0), cospi_4_12_20N_28, 0);
- idct16x16_add_wrap_low_8x2(t32, d0, d1);
+ dct_const_round_shift_low_8_dual(t32, d0, d1);
}
static INLINE void idct_cospi_6_26(const int16x8_t s0, const int16x8_t s1,
@@ -659,7 +614,7 @@ static INLINE void idct_cospi_6_26(const int16x8_t s0, const int16x8_t s1,
t32[1] = vmlal_lane_s16(t32[1], vget_high_s16(s1), cospi_6_26N_14_18N, 1);
t32[2] = vmlsl_lane_s16(t32[2], vget_low_s16(s0), cospi_6_26N_14_18N, 1);
t32[3] = vmlsl_lane_s16(t32[3], vget_high_s16(s0), cospi_6_26N_14_18N, 1);
- idct16x16_add_wrap_low_8x2(t32, d0, d1);
+ dct_const_round_shift_low_8_dual(t32, d0, d1);
}
static INLINE void idct_cospi_10_22(const int16x8_t s0, const int16x8_t s1,
@@ -675,7 +630,7 @@ static INLINE void idct_cospi_10_22(const int16x8_t s0, const int16x8_t s1,
t32[1] = vmlsl_lane_s16(t32[1], vget_high_s16(s1), cospi_2_30_10_22, 2);
t32[2] = vmlal_lane_s16(t32[2], vget_low_s16(s0), cospi_2_30_10_22, 2);
t32[3] = vmlal_lane_s16(t32[3], vget_high_s16(s0), cospi_2_30_10_22, 2);
- idct16x16_add_wrap_low_8x2(t32, d0, d1);
+ dct_const_round_shift_low_8_dual(t32, d0, d1);
}
static INLINE void idct_cospi_12_20(const int16x8_t s0, const int16x8_t s1,
@@ -691,7 +646,7 @@ static INLINE void idct_cospi_12_20(const int16x8_t s0, const int16x8_t s1,
t32[1] = vmlal_lane_s16(t32[1], vget_high_s16(s1), cospi_4_12_20N_28, 2);
t32[2] = vmlsl_lane_s16(t32[2], vget_low_s16(s0), cospi_4_12_20N_28, 2);
t32[3] = vmlsl_lane_s16(t32[3], vget_high_s16(s0), cospi_4_12_20N_28, 2);
- idct16x16_add_wrap_low_8x2(t32, d0, d1);
+ dct_const_round_shift_low_8_dual(t32, d0, d1);
}
static INLINE void idct_cospi_14_18(const int16x8_t s0, const int16x8_t s1,
@@ -707,7 +662,7 @@ static INLINE void idct_cospi_14_18(const int16x8_t s0, const int16x8_t s1,
t32[1] = vmlal_lane_s16(t32[1], vget_high_s16(s1), cospi_6_26N_14_18N, 3);
t32[2] = vmlsl_lane_s16(t32[2], vget_low_s16(s0), cospi_6_26N_14_18N, 3);
t32[3] = vmlsl_lane_s16(t32[3], vget_high_s16(s0), cospi_6_26N_14_18N, 3);
- idct16x16_add_wrap_low_8x2(t32, d0, d1);
+ dct_const_round_shift_low_8_dual(t32, d0, d1);
}
static INLINE void idct16x16_add_stage7(const int16x8_t *const step2,
@@ -786,73 +741,94 @@ static INLINE void idct16x16_store_pass1(const int16x8_t *const out,
vst1q_s16(output, out[15]);
}
-static INLINE void idct16x16_add8x1(int16x8_t res, uint8_t **dest,
- const int stride) {
- uint8x8_t d = vld1_u8(*dest);
- uint16x8_t q;
-
- res = vrshrq_n_s16(res, 6);
- q = vaddw_u8(vreinterpretq_u16_s16(res), d);
- d = vqmovun_s16(vreinterpretq_s16_u16(q));
+static INLINE void idct8x8_add8x1(const int16x8_t a, uint8_t **const dest,
+ const int stride) {
+ const uint8x8_t s = vld1_u8(*dest);
+ const int16x8_t res = vrshrq_n_s16(a, 5);
+ const uint16x8_t q = vaddw_u8(vreinterpretq_u16_s16(res), s);
+ const uint8x8_t d = vqmovun_s16(vreinterpretq_s16_u16(q));
vst1_u8(*dest, d);
*dest += stride;
}
-static INLINE void highbd_idct16x16_add8x1(int16x8_t res, const int16x8_t max,
- uint16_t **dest, const int stride) {
- uint16x8_t d = vld1q_u16(*dest);
+static INLINE void idct8x8_add8x8_neon(int16x8_t *const out, uint8_t *dest,
+ const int stride) {
+ idct8x8_add8x1(out[0], &dest, stride);
+ idct8x8_add8x1(out[1], &dest, stride);
+ idct8x8_add8x1(out[2], &dest, stride);
+ idct8x8_add8x1(out[3], &dest, stride);
+ idct8x8_add8x1(out[4], &dest, stride);
+ idct8x8_add8x1(out[5], &dest, stride);
+ idct8x8_add8x1(out[6], &dest, stride);
+ idct8x8_add8x1(out[7], &dest, stride);
+}
- res = vqaddq_s16(res, vreinterpretq_s16_u16(d));
- res = vminq_s16(res, max);
- d = vqshluq_n_s16(res, 0);
+static INLINE void idct16x16_add8x1(const int16x8_t a, uint8_t **const dest,
+ const int stride) {
+ const uint8x8_t s = vld1_u8(*dest);
+ const int16x8_t res = vrshrq_n_s16(a, 6);
+ const uint16x8_t q = vaddw_u8(vreinterpretq_u16_s16(res), s);
+ const uint8x8_t d = vqmovun_s16(vreinterpretq_s16_u16(q));
+ vst1_u8(*dest, d);
+ *dest += stride;
+}
+
+static INLINE void highbd_idct16x16_add8x1(const int16x8_t a,
+ const int16x8_t max,
+ uint16_t **const dest,
+ const int stride) {
+ const uint16x8_t s = vld1q_u16(*dest);
+ const int16x8_t res0 = vqaddq_s16(a, vreinterpretq_s16_u16(s));
+ const int16x8_t res1 = vminq_s16(res0, max);
+ const uint16x8_t d = vqshluq_n_s16(res1, 0);
vst1q_u16(*dest, d);
*dest += stride;
}
-static INLINE void highbd_idct16x16_add8x1_bd8(int16x8_t res, uint16_t **dest,
+static INLINE void highbd_idct16x16_add8x1_bd8(const int16x8_t a,
+ uint16_t **const dest,
const int stride) {
- uint16x8_t d = vld1q_u16(*dest);
-
- res = vrsraq_n_s16(vreinterpretq_s16_u16(d), res, 6);
- d = vmovl_u8(vqmovun_s16(res));
+ const uint16x8_t s = vld1q_u16(*dest);
+ const int16x8_t res = vrsraq_n_s16(vreinterpretq_s16_u16(s), a, 6);
+ const uint16x8_t d = vmovl_u8(vqmovun_s16(res));
vst1q_u16(*dest, d);
*dest += stride;
}
static INLINE void highbd_add_and_store_bd8(const int16x8_t *const a,
- uint16_t *out, const int b_stride) {
- highbd_idct16x16_add8x1_bd8(a[0], &out, b_stride);
- highbd_idct16x16_add8x1_bd8(a[1], &out, b_stride);
- highbd_idct16x16_add8x1_bd8(a[2], &out, b_stride);
- highbd_idct16x16_add8x1_bd8(a[3], &out, b_stride);
- highbd_idct16x16_add8x1_bd8(a[4], &out, b_stride);
- highbd_idct16x16_add8x1_bd8(a[5], &out, b_stride);
- highbd_idct16x16_add8x1_bd8(a[6], &out, b_stride);
- highbd_idct16x16_add8x1_bd8(a[7], &out, b_stride);
- highbd_idct16x16_add8x1_bd8(a[8], &out, b_stride);
- highbd_idct16x16_add8x1_bd8(a[9], &out, b_stride);
- highbd_idct16x16_add8x1_bd8(a[10], &out, b_stride);
- highbd_idct16x16_add8x1_bd8(a[11], &out, b_stride);
- highbd_idct16x16_add8x1_bd8(a[12], &out, b_stride);
- highbd_idct16x16_add8x1_bd8(a[13], &out, b_stride);
- highbd_idct16x16_add8x1_bd8(a[14], &out, b_stride);
- highbd_idct16x16_add8x1_bd8(a[15], &out, b_stride);
- highbd_idct16x16_add8x1_bd8(a[16], &out, b_stride);
- highbd_idct16x16_add8x1_bd8(a[17], &out, b_stride);
- highbd_idct16x16_add8x1_bd8(a[18], &out, b_stride);
- highbd_idct16x16_add8x1_bd8(a[19], &out, b_stride);
- highbd_idct16x16_add8x1_bd8(a[20], &out, b_stride);
- highbd_idct16x16_add8x1_bd8(a[21], &out, b_stride);
- highbd_idct16x16_add8x1_bd8(a[22], &out, b_stride);
- highbd_idct16x16_add8x1_bd8(a[23], &out, b_stride);
- highbd_idct16x16_add8x1_bd8(a[24], &out, b_stride);
- highbd_idct16x16_add8x1_bd8(a[25], &out, b_stride);
- highbd_idct16x16_add8x1_bd8(a[26], &out, b_stride);
- highbd_idct16x16_add8x1_bd8(a[27], &out, b_stride);
- highbd_idct16x16_add8x1_bd8(a[28], &out, b_stride);
- highbd_idct16x16_add8x1_bd8(a[29], &out, b_stride);
- highbd_idct16x16_add8x1_bd8(a[30], &out, b_stride);
- highbd_idct16x16_add8x1_bd8(a[31], &out, b_stride);
+ uint16_t *out, const int stride) {
+ highbd_idct16x16_add8x1_bd8(a[0], &out, stride);
+ highbd_idct16x16_add8x1_bd8(a[1], &out, stride);
+ highbd_idct16x16_add8x1_bd8(a[2], &out, stride);
+ highbd_idct16x16_add8x1_bd8(a[3], &out, stride);
+ highbd_idct16x16_add8x1_bd8(a[4], &out, stride);
+ highbd_idct16x16_add8x1_bd8(a[5], &out, stride);
+ highbd_idct16x16_add8x1_bd8(a[6], &out, stride);
+ highbd_idct16x16_add8x1_bd8(a[7], &out, stride);
+ highbd_idct16x16_add8x1_bd8(a[8], &out, stride);
+ highbd_idct16x16_add8x1_bd8(a[9], &out, stride);
+ highbd_idct16x16_add8x1_bd8(a[10], &out, stride);
+ highbd_idct16x16_add8x1_bd8(a[11], &out, stride);
+ highbd_idct16x16_add8x1_bd8(a[12], &out, stride);
+ highbd_idct16x16_add8x1_bd8(a[13], &out, stride);
+ highbd_idct16x16_add8x1_bd8(a[14], &out, stride);
+ highbd_idct16x16_add8x1_bd8(a[15], &out, stride);
+ highbd_idct16x16_add8x1_bd8(a[16], &out, stride);
+ highbd_idct16x16_add8x1_bd8(a[17], &out, stride);
+ highbd_idct16x16_add8x1_bd8(a[18], &out, stride);
+ highbd_idct16x16_add8x1_bd8(a[19], &out, stride);
+ highbd_idct16x16_add8x1_bd8(a[20], &out, stride);
+ highbd_idct16x16_add8x1_bd8(a[21], &out, stride);
+ highbd_idct16x16_add8x1_bd8(a[22], &out, stride);
+ highbd_idct16x16_add8x1_bd8(a[23], &out, stride);
+ highbd_idct16x16_add8x1_bd8(a[24], &out, stride);
+ highbd_idct16x16_add8x1_bd8(a[25], &out, stride);
+ highbd_idct16x16_add8x1_bd8(a[26], &out, stride);
+ highbd_idct16x16_add8x1_bd8(a[27], &out, stride);
+ highbd_idct16x16_add8x1_bd8(a[28], &out, stride);
+ highbd_idct16x16_add8x1_bd8(a[29], &out, stride);
+ highbd_idct16x16_add8x1_bd8(a[30], &out, stride);
+ highbd_idct16x16_add8x1_bd8(a[31], &out, stride);
}
static INLINE void highbd_idct16x16_add_store(const int32x4x2_t *const out,
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/intrapred_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/intrapred_neon.c
index fb1fa6b681d..38e275834b0 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/intrapred_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/intrapred_neon.c
@@ -667,8 +667,6 @@ void vpx_d135_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
d135_store_32x2(&dst, stride, row_0, row_1, row_2);
}
-// -----------------------------------------------------------------------------
-
#if !HAVE_NEON_ASM
void vpx_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/mem_neon.h b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/mem_neon.h
index 4efad5333e3..ea09629540b 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/mem_neon.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/mem_neon.h
@@ -19,6 +19,13 @@
#include "vpx/vpx_integer.h"
#include "vpx_dsp/vpx_dsp_common.h"
+static INLINE int16x4_t create_s16x4_neon(const int16_t c0, const int16_t c1,
+ const int16_t c2, const int16_t c3) {
+ return vcreate_s16((uint16_t)c0 | ((uint16_t)c1 << 16) |
+ ((int64_t)(uint16_t)c2 << 32) |
+ ((int64_t)(uint16_t)c3 << 48));
+}
+
// Helper functions used to load tran_low_t into int16, narrowing if necessary.
static INLINE int16x8x2_t load_tran_low_to_s16x2q(const tran_low_t *buf) {
#if CONFIG_VP9_HIGHBITDEPTH
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/inv_txfm.h b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/inv_txfm.h
index 13137659fae..e86fd3996a1 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/inv_txfm.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/inv_txfm.h
@@ -76,7 +76,6 @@ static INLINE tran_high_t highbd_check_range(tran_high_t input, int bd) {
// bd of 10 uses trans_low with 18bits, need to remove 14bits
// bd of 12 uses trans_low with 20bits, need to remove 12bits
// bd of x uses trans_low with 8+x bits, need to remove 24-x bits
-
#define WRAPLOW(x) ((((int32_t)check_range(x)) << 16) >> 16)
#if CONFIG_VP9_HIGHBITDEPTH
#define HIGHBD_WRAPLOW(x, bd) \
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/deblock_msa.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/deblock_msa.c
index aafa272fbdf..9ef04836a27 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/deblock_msa.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/deblock_msa.c
@@ -14,38 +14,37 @@
extern const int16_t vpx_rv[];
-#define VPX_TRANSPOSE8x16_UB_UB(in0, in1, in2, in3, in4, in5, in6, in7, out0, \
- out1, out2, out3, out4, out5, out6, out7, \
- out8, out9, out10, out11, out12, out13, out14, \
- out15) \
- { \
- v8i16 temp0, temp1, temp2, temp3, temp4; \
- v8i16 temp5, temp6, temp7, temp8, temp9; \
- \
- ILVR_B4_SH(in1, in0, in3, in2, in5, in4, in7, in6, temp0, temp1, temp2, \
- temp3); \
- ILVR_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5); \
- ILVRL_W2_SH(temp5, temp4, temp6, temp7); \
- ILVL_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5); \
- ILVRL_W2_SH(temp5, temp4, temp8, temp9); \
- ILVL_B4_SH(in1, in0, in3, in2, in5, in4, in7, in6, temp0, temp1, temp2, \
- temp3); \
- ILVR_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5); \
- ILVRL_W2_UB(temp5, temp4, out8, out10); \
- ILVL_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5); \
- ILVRL_W2_UB(temp5, temp4, out12, out14); \
- out0 = (v16u8)temp6; \
- out2 = (v16u8)temp7; \
- out4 = (v16u8)temp8; \
- out6 = (v16u8)temp9; \
- out9 = (v16u8)__msa_ilvl_d((v2i64)out8, (v2i64)out8); \
- out11 = (v16u8)__msa_ilvl_d((v2i64)out10, (v2i64)out10); \
- out13 = (v16u8)__msa_ilvl_d((v2i64)out12, (v2i64)out12); \
- out15 = (v16u8)__msa_ilvl_d((v2i64)out14, (v2i64)out14); \
- out1 = (v16u8)__msa_ilvl_d((v2i64)out0, (v2i64)out0); \
- out3 = (v16u8)__msa_ilvl_d((v2i64)out2, (v2i64)out2); \
- out5 = (v16u8)__msa_ilvl_d((v2i64)out4, (v2i64)out4); \
- out7 = (v16u8)__msa_ilvl_d((v2i64)out6, (v2i64)out6); \
+#define VPX_TRANSPOSE8x16_UB_UB( \
+ in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3, out4, \
+ out5, out6, out7, out8, out9, out10, out11, out12, out13, out14, out15) \
+ { \
+ v8i16 temp0, temp1, temp2, temp3, temp4; \
+ v8i16 temp5, temp6, temp7, temp8, temp9; \
+ \
+ ILVR_B4_SH(in1, in0, in3, in2, in5, in4, in7, in6, temp0, temp1, temp2, \
+ temp3); \
+ ILVR_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5); \
+ ILVRL_W2_SH(temp5, temp4, temp6, temp7); \
+ ILVL_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5); \
+ ILVRL_W2_SH(temp5, temp4, temp8, temp9); \
+ ILVL_B4_SH(in1, in0, in3, in2, in5, in4, in7, in6, temp0, temp1, temp2, \
+ temp3); \
+ ILVR_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5); \
+ ILVRL_W2_UB(temp5, temp4, out8, out10); \
+ ILVL_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5); \
+ ILVRL_W2_UB(temp5, temp4, out12, out14); \
+ out0 = (v16u8)temp6; \
+ out2 = (v16u8)temp7; \
+ out4 = (v16u8)temp8; \
+ out6 = (v16u8)temp9; \
+ out9 = (v16u8)__msa_ilvl_d((v2i64)out8, (v2i64)out8); \
+ out11 = (v16u8)__msa_ilvl_d((v2i64)out10, (v2i64)out10); \
+ out13 = (v16u8)__msa_ilvl_d((v2i64)out12, (v2i64)out12); \
+ out15 = (v16u8)__msa_ilvl_d((v2i64)out14, (v2i64)out14); \
+ out1 = (v16u8)__msa_ilvl_d((v2i64)out0, (v2i64)out0); \
+ out3 = (v16u8)__msa_ilvl_d((v2i64)out2, (v2i64)out2); \
+ out5 = (v16u8)__msa_ilvl_d((v2i64)out4, (v2i64)out4); \
+ out7 = (v16u8)__msa_ilvl_d((v2i64)out6, (v2i64)out6); \
}
#define VPX_AVER_IF_RETAIN(above2_in, above1_in, src_in, below1_in, below2_in, \
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/inv_txfm_vsx.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/inv_txfm_vsx.c
index d43a9fd1841..f095cb0a481 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/inv_txfm_vsx.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/ppc/inv_txfm_vsx.c
@@ -109,6 +109,7 @@ static int16x8_t cospi31_v = { 804, 804, 804, 804, 804, 804, 804, 804 };
void vpx_idct4x4_16_add_vsx(const tran_low_t *input, uint8_t *dest,
int stride) {
+ int i, j;
int32x4_t temp1, temp2, temp3, temp4;
int16x8_t step0, step1, tmp16_0, tmp16_1, t_out0, t_out1;
uint8x16_t mask0 = { 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF,
@@ -152,8 +153,8 @@ void vpx_idct4x4_16_add_vsx(const tran_low_t *input, uint8_t *dest,
output_v = vec_packsu(tmp16_0, tmp16_1);
vec_vsx_st(output_v, 0, tmp_dest);
- for (int i = 0; i < 4; i++)
- for (int j = 0; j < 4; j++) dest[j * stride + i] = tmp_dest[j * 4 + i];
+ for (i = 0; i < 4; i++)
+ for (j = 0; j < 4; j++) dest[j * stride + i] = tmp_dest[j * 4 + i];
}
#define TRANSPOSE8x8(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, \
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp.mk b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp.mk
index 3b1a873cd20..a4a6fa0842a 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp.mk
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp.mk
@@ -242,6 +242,7 @@ DSP_SRCS-$(HAVE_NEON) += arm/highbd_idct32x32_add_neon.c
DSP_SRCS-$(HAVE_NEON) += arm/highbd_idct32x32_34_add_neon.c
DSP_SRCS-$(HAVE_NEON) += arm/highbd_idct32x32_135_add_neon.c
DSP_SRCS-$(HAVE_NEON) += arm/highbd_idct32x32_1024_add_neon.c
+DSP_SRCS-$(HAVE_NEON) += arm/highbd_idct_neon.h
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_inv_txfm_sse2.h
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_idct4x4_add_sse2.c
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_idct8x8_add_sse2.c
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/fwd_txfm_impl_sse2.h b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/fwd_txfm_impl_sse2.h
index f9abaecf28e..fd28d0d559d 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/fwd_txfm_impl_sse2.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/fwd_txfm_impl_sse2.h
@@ -778,6 +778,7 @@ void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) {
return;
}
#endif // DCT_HIGH_BIT_DEPTH
+
// Interleave to do the multiply by constants which gets us
// into 32 bits.
{
@@ -834,6 +835,7 @@ void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) {
return;
}
#endif // DCT_HIGH_BIT_DEPTH
+
// Interleave to do the multiply by constants which gets us
// into 32 bits.
{
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_convolve_avx2.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_convolve_avx2.c
index 7e75d5d10c2..ef94522a3b5 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_convolve_avx2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_convolve_avx2.c
@@ -192,8 +192,6 @@ void vpx_highbd_convolve_avg_avx2(const uint16_t *src, ptrdiff_t src_stride,
// -----------------------------------------------------------------------------
// Horizontal and vertical filtering
-#define CONV8_ROUNDING_BITS (7)
-
static const uint8_t signal_pattern_0[32] = { 0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6,
7, 6, 7, 8, 9, 0, 1, 2, 3, 2, 3,
4, 5, 4, 5, 6, 7, 6, 7, 8, 9 };
@@ -210,6 +208,8 @@ static const uint8_t signal_pattern_2[32] = { 6, 7, 8, 9, 8, 9, 10, 11,
static const uint32_t signal_index[8] = { 2, 3, 4, 5, 2, 3, 4, 5 };
+#define CONV8_ROUNDING_BITS (7)
+
// -----------------------------------------------------------------------------
// Horizontal Filtering
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_idct16x16_add_sse4.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_idct16x16_add_sse4.c
index de097c66a69..7898ee12c84 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_idct16x16_add_sse4.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_idct16x16_add_sse4.c
@@ -53,7 +53,7 @@ static INLINE void highbd_idct16_4col_stage6(const __m128i *const in,
out[15] = in[15];
}
-static INLINE void highbd_idct16_4col(__m128i *const io /*io[16]*/) {
+void vpx_highbd_idct16_4col_sse4_1(__m128i *const io /*io[16]*/) {
__m128i step1[16], step2[16];
// stage 2
@@ -233,7 +233,7 @@ void vpx_highbd_idct16x16_256_add_sse4_1(const tran_low_t *input,
in = all[i];
highbd_load_transpose_32bit_8x4(&input[0], 16, &in[0]);
highbd_load_transpose_32bit_8x4(&input[8], 16, &in[8]);
- highbd_idct16_4col(in);
+ vpx_highbd_idct16_4col_sse4_1(in);
input += 4 * 16;
}
@@ -243,7 +243,7 @@ void vpx_highbd_idct16x16_256_add_sse4_1(const tran_low_t *input,
transpose_32bit_4x4(all[1] + i, out + 4);
transpose_32bit_4x4(all[2] + i, out + 8);
transpose_32bit_4x4(all[3] + i, out + 12);
- highbd_idct16_4col(out);
+ vpx_highbd_idct16_4col_sse4_1(out);
for (j = 0; j < 16; ++j) {
highbd_write_buffer_4(dest + j * stride, out[j], bd);
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_idct8x8_add_sse2.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_idct8x8_add_sse2.c
index 909a6b79485..bb7a510e15b 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_idct8x8_add_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_idct8x8_add_sse2.c
@@ -124,8 +124,8 @@ void vpx_highbd_idct8x8_64_add_sse2(const tran_low_t *input, uint16_t *dest,
io_short[6] = _mm_packs_epi32(io[10], io[14]);
io_short[7] = _mm_packs_epi32(io[11], io[15]);
- idct8_sse2(io_short);
- idct8_sse2(io_short);
+ vpx_idct8_sse2(io_short);
+ vpx_idct8_sse2(io_short);
round_shift_8x8(io_short, io);
} else {
__m128i temp[4];
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_idct8x8_add_sse4.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_idct8x8_add_sse4.c
index ae391b2c02c..8b2e3d2415e 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_idct8x8_add_sse4.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_idct8x8_add_sse4.c
@@ -17,7 +17,7 @@
#include "vpx_dsp/x86/inv_txfm_ssse3.h"
#include "vpx_dsp/x86/transpose_sse2.h"
-static void highbd_idct8x8_half1d(__m128i *const io) {
+void vpx_highbd_idct8x8_half1d_sse4_1(__m128i *const io) {
__m128i step1[8], step2[8];
transpose_32bit_4x4x2(io, io);
@@ -126,13 +126,13 @@ void vpx_highbd_idct8x8_64_add_sse4_1(const tran_low_t *input, uint16_t *dest,
io_short[6] = _mm_packs_epi32(io[10], io[14]);
io_short[7] = _mm_packs_epi32(io[11], io[15]);
- idct8_sse2(io_short);
- idct8_sse2(io_short);
+ vpx_idct8_sse2(io_short);
+ vpx_idct8_sse2(io_short);
round_shift_8x8(io_short, io);
} else {
__m128i temp[4];
- highbd_idct8x8_half1d(io);
+ vpx_highbd_idct8x8_half1d_sse4_1(io);
io[8] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 0));
io[12] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 4));
@@ -142,7 +142,7 @@ void vpx_highbd_idct8x8_64_add_sse4_1(const tran_low_t *input, uint16_t *dest,
io[14] = _mm_load_si128((const __m128i *)(input + 6 * 8 + 4));
io[11] = _mm_load_si128((const __m128i *)(input + 7 * 8 + 0));
io[15] = _mm_load_si128((const __m128i *)(input + 7 * 8 + 4));
- highbd_idct8x8_half1d(&io[8]);
+ vpx_highbd_idct8x8_half1d_sse4_1(&io[8]);
temp[0] = io[4];
temp[1] = io[5];
@@ -152,13 +152,13 @@ void vpx_highbd_idct8x8_64_add_sse4_1(const tran_low_t *input, uint16_t *dest,
io[5] = io[9];
io[6] = io[10];
io[7] = io[11];
- highbd_idct8x8_half1d(io);
+ vpx_highbd_idct8x8_half1d_sse4_1(io);
io[8] = temp[0];
io[9] = temp[1];
io[10] = temp[2];
io[11] = temp[3];
- highbd_idct8x8_half1d(&io[8]);
+ vpx_highbd_idct8x8_half1d_sse4_1(&io[8]);
highbd_idct8x8_final_round(io);
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_inv_txfm_sse4.h b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_inv_txfm_sse4.h
index 435934f1b4a..5a7fd1d3997 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_inv_txfm_sse4.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_inv_txfm_sse4.h
@@ -106,4 +106,7 @@ static INLINE void highbd_idct4_sse4_1(__m128i *const io) {
io[3] = _mm_sub_epi32(step[0], step[3]); // step[0] - step[3]
}
+void vpx_highbd_idct8x8_half1d_sse4_1(__m128i *const io);
+void vpx_highbd_idct16_4col_sse4_1(__m128i *const io /*io[16]*/);
+
#endif // VPX_DSP_X86_HIGHBD_INV_TXFM_SSE4_H_
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm
index d9a6932e0b8..e1f9657dfac 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm
@@ -91,7 +91,7 @@ SECTION .text
%define filter_idx_shift 5
-%ifdef PIC ; 64bit PIC
+%if ARCH_X86_64
%if %2 == 1 ; avg
cglobal highbd_sub_pixel_avg_variance%1xh, 9, 10, 13, src, src_stride, \
x_offset, y_offset, \
@@ -99,19 +99,20 @@ SECTION .text
sec, sec_stride, height, sse
%define sec_str sec_strideq
%else
- cglobal highbd_sub_pixel_variance%1xh, 7, 8, 13, src, src_stride, x_offset, \
- y_offset, dst, dst_stride, height, sse
+ cglobal highbd_sub_pixel_variance%1xh, 7, 8, 13, src, src_stride, \
+ x_offset, y_offset, \
+ dst, dst_stride, height, sse
%endif
%define block_height heightd
%define bilin_filter sseq
%else
- %if ARCH_X86=1 && CONFIG_PIC=1
+ %if CONFIG_PIC=1
%if %2 == 1 ; avg
cglobal highbd_sub_pixel_avg_variance%1xh, 7, 7, 13, src, src_stride, \
- x_offset, y_offset, \
- dst, dst_stride, \
- sec, sec_stride, \
- height, sse, g_bilin_filter, g_pw_8
+ x_offset, y_offset, \
+ dst, dst_stride, \
+ sec, sec_stride, height, sse, \
+ g_bilin_filter, g_pw_8
%define block_height dword heightm
%define sec_str sec_stridemp
@@ -130,8 +131,9 @@ SECTION .text
LOAD_IF_USED 0, 1 ; load eax, ecx back
%else
cglobal highbd_sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, \
- x_offset, y_offset, dst, dst_stride, height, \
- sse, g_bilin_filter, g_pw_8
+ x_offset, y_offset, \
+ dst, dst_stride, height, sse, \
+ g_bilin_filter, g_pw_8
%define block_height heightd
; Store bilin_filter and pw_8 location in stack
@@ -150,22 +152,16 @@ SECTION .text
%endif
%else
%if %2 == 1 ; avg
- cglobal highbd_sub_pixel_avg_variance%1xh, 7 + 2 * ARCH_X86_64, \
- 7 + 2 * ARCH_X86_64, 13, src, src_stride, \
- x_offset, y_offset, \
- dst, dst_stride, \
- sec, sec_stride, \
- height, sse
- %if ARCH_X86_64
- %define block_height heightd
- %define sec_str sec_strideq
- %else
+ cglobal highbd_sub_pixel_avg_variance%1xh, 7, 7, 13, src, src_stride, \
+ x_offset, y_offset, \
+ dst, dst_stride, \
+ sec, sec_stride, height, sse
%define block_height dword heightm
%define sec_str sec_stridemp
- %endif
%else
cglobal highbd_sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, \
- x_offset, y_offset, dst, dst_stride, height, sse
+ x_offset, y_offset, \
+ dst, dst_stride, height, sse
%define block_height heightd
%endif
@@ -284,14 +280,14 @@ SECTION .text
.x_zero_y_nonhalf:
; x_offset == 0 && y_offset == bilin interpolation
-%ifdef PIC
- lea bilin_filter, [bilin_filter_m]
+%if ARCH_X86_64
+ lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl y_offsetd, filter_idx_shift
%if ARCH_X86_64 && mmsize == 16
mova m8, [bilin_filter+y_offsetq]
mova m9, [bilin_filter+y_offsetq+16]
- mova m10, [pw_8]
+ mova m10, [GLOBAL(pw_8)]
%define filter_y_a m8
%define filter_y_b m9
%define filter_rnd m10
@@ -308,7 +304,7 @@ SECTION .text
add y_offsetq, bilin_filter
%define filter_y_a [y_offsetq]
%define filter_y_b [y_offsetq+16]
-%define filter_rnd [pw_8]
+%define filter_rnd [GLOBAL(pw_8)]
%endif
%endif
@@ -511,14 +507,14 @@ SECTION .text
.x_half_y_nonhalf:
; x_offset == 0.5 && y_offset == bilin interpolation
-%ifdef PIC
- lea bilin_filter, [bilin_filter_m]
+%if ARCH_X86_64
+ lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl y_offsetd, filter_idx_shift
%if ARCH_X86_64 && mmsize == 16
mova m8, [bilin_filter+y_offsetq]
mova m9, [bilin_filter+y_offsetq+16]
- mova m10, [pw_8]
+ mova m10, [GLOBAL(pw_8)]
%define filter_y_a m8
%define filter_y_b m9
%define filter_rnd m10
@@ -535,7 +531,7 @@ SECTION .text
add y_offsetq, bilin_filter
%define filter_y_a [y_offsetq]
%define filter_y_b [y_offsetq+16]
-%define filter_rnd [pw_8]
+%define filter_rnd [GLOBAL(pw_8)]
%endif
%endif
@@ -633,14 +629,14 @@ SECTION .text
jnz .x_nonhalf_y_nonzero
; x_offset == bilin interpolation && y_offset == 0
-%ifdef PIC
- lea bilin_filter, [bilin_filter_m]
+%if ARCH_X86_64
+ lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl x_offsetd, filter_idx_shift
%if ARCH_X86_64 && mmsize == 16
mova m8, [bilin_filter+x_offsetq]
mova m9, [bilin_filter+x_offsetq+16]
- mova m10, [pw_8]
+ mova m10, [GLOBAL(pw_8)]
%define filter_x_a m8
%define filter_x_b m9
%define filter_rnd m10
@@ -657,7 +653,7 @@ SECTION .text
add x_offsetq, bilin_filter
%define filter_x_a [x_offsetq]
%define filter_x_b [x_offsetq+16]
-%define filter_rnd [pw_8]
+%define filter_rnd [GLOBAL(pw_8)]
%endif
%endif
@@ -732,14 +728,14 @@ SECTION .text
jne .x_nonhalf_y_nonhalf
; x_offset == bilin interpolation && y_offset == 0.5
-%ifdef PIC
- lea bilin_filter, [bilin_filter_m]
+%if ARCH_X86_64
+ lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl x_offsetd, filter_idx_shift
%if ARCH_X86_64 && mmsize == 16
mova m8, [bilin_filter+x_offsetq]
mova m9, [bilin_filter+x_offsetq+16]
- mova m10, [pw_8]
+ mova m10, [GLOBAL(pw_8)]
%define filter_x_a m8
%define filter_x_b m9
%define filter_rnd m10
@@ -756,7 +752,7 @@ SECTION .text
add x_offsetq, bilin_filter
%define filter_x_a [x_offsetq]
%define filter_x_b [x_offsetq+16]
-%define filter_rnd [pw_8]
+%define filter_rnd [GLOBAL(pw_8)]
%endif
%endif
@@ -859,8 +855,8 @@ SECTION .text
.x_nonhalf_y_nonhalf:
; loading filter - this is same as in 8-bit depth
-%ifdef PIC
- lea bilin_filter, [bilin_filter_m]
+%if ARCH_X86_64
+ lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl x_offsetd, filter_idx_shift ; filter_idx_shift = 5
shl y_offsetd, filter_idx_shift
@@ -869,7 +865,7 @@ SECTION .text
mova m9, [bilin_filter+x_offsetq+16]
mova m10, [bilin_filter+y_offsetq]
mova m11, [bilin_filter+y_offsetq+16]
- mova m12, [pw_8]
+ mova m12, [GLOBAL(pw_8)]
%define filter_x_a m8
%define filter_x_b m9
%define filter_y_a m10
@@ -897,7 +893,7 @@ SECTION .text
%define filter_x_b [x_offsetq+16]
%define filter_y_a [y_offsetq]
%define filter_y_b [y_offsetq+16]
-%define filter_rnd [pw_8]
+%define filter_rnd [GLOBAL(pw_8)]
%endif
%endif
; end of load filter
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/inv_txfm_sse2.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/inv_txfm_sse2.c
index 6b1837df549..4b02da96664 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/inv_txfm_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/inv_txfm_sse2.c
@@ -165,7 +165,7 @@ void vpx_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest,
// 2-D
for (i = 0; i < 2; i++) {
- idct8_sse2(in);
+ vpx_idct8_sse2(in);
}
write_buffer_8x8(in, dest, stride);
@@ -221,7 +221,7 @@ void vpx_idct8x8_1_add_sse2(const tran_low_t *input, uint8_t *dest,
recon_and_store_8_dual(dest, dc_value, stride);
}
-void idct8_sse2(__m128i *const in) {
+void vpx_idct8_sse2(__m128i *const in) {
// 8x8 Transpose is copied from vpx_fdct8x8_sse2()
transpose_16bit_8x8(in, in);
@@ -514,7 +514,7 @@ void vpx_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest,
}
}
-static void iadst16_8col(__m128i *const in) {
+void vpx_iadst16_8col_sse2(__m128i *const in) {
// perform 16x16 1-D ADST for 8 columns
__m128i s[16], x[16], u[32], v[32];
const __m128i k__cospi_p01_p31 = pair_set_epi16(cospi_1_64, cospi_31_64);
@@ -874,8 +874,8 @@ void idct16_sse2(__m128i *const in0, __m128i *const in1) {
void iadst16_sse2(__m128i *const in0, __m128i *const in1) {
transpose_16bit_16x16(in0, in1);
- iadst16_8col(in0);
- iadst16_8col(in1);
+ vpx_iadst16_8col_sse2(in0);
+ vpx_iadst16_8col_sse2(in1);
}
// Group the coefficient calculation into smaller functions to prevent stack
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/inv_txfm_sse2.h b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/inv_txfm_sse2.h
index 5cd5098f149..d573f66c9f8 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/inv_txfm_sse2.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/inv_txfm_sse2.h
@@ -697,10 +697,11 @@ static INLINE void idct32_8x32_quarter_3_4_stage_4_to_7(
}
void idct4_sse2(__m128i *const in);
-void idct8_sse2(__m128i *const in);
+void vpx_idct8_sse2(__m128i *const in);
void idct16_sse2(__m128i *const in0, __m128i *const in1);
void iadst4_sse2(__m128i *const in);
void iadst8_sse2(__m128i *const in);
+void vpx_iadst16_8col_sse2(__m128i *const in);
void iadst16_sse2(__m128i *const in0, __m128i *const in1);
void idct32_1024_8x32(const __m128i *const in, __m128i *const out);
void idct32_34_8x32_sse2(const __m128i *const in, __m128i *const out);
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/quantize_x86.h b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/quantize_x86.h
index 34928fbb56d..0e07a2ac5f8 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/quantize_x86.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/quantize_x86.h
@@ -12,7 +12,6 @@
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
-#include "vpx_dsp/x86/bitdepth_conversion_sse2.h"
static INLINE void load_b_values(const int16_t *zbin_ptr, __m128i *zbin,
const int16_t *round_ptr, __m128i *round,
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/subpel_variance_sse2.asm b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/subpel_variance_sse2.asm
index cee4468c1f0..d938c1da4fa 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/subpel_variance_sse2.asm
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/subpel_variance_sse2.asm
@@ -114,27 +114,26 @@ SECTION .text
; 11, not 13, if the registers are ordered correctly. May make a minor speed
; difference on Win64
-%ifdef PIC ; 64bit PIC
+%if ARCH_X86_64
%if %2 == 1 ; avg
cglobal sub_pixel_avg_variance%1xh, 9, 10, 13, src, src_stride, \
- x_offset, y_offset, \
- dst, dst_stride, \
- sec, sec_stride, height, sse
+ x_offset, y_offset, dst, dst_stride, \
+ sec, sec_stride, height, sse
%define sec_str sec_strideq
%else
- cglobal sub_pixel_variance%1xh, 7, 8, 13, src, src_stride, x_offset, \
- y_offset, dst, dst_stride, height, sse
+ cglobal sub_pixel_variance%1xh, 7, 8, 13, src, src_stride, \
+ x_offset, y_offset, dst, dst_stride, \
+ height, sse
%endif
%define block_height heightd
%define bilin_filter sseq
%else
- %if ARCH_X86=1 && CONFIG_PIC=1
+ %if CONFIG_PIC=1
%if %2 == 1 ; avg
cglobal sub_pixel_avg_variance%1xh, 7, 7, 13, src, src_stride, \
- x_offset, y_offset, \
- dst, dst_stride, \
- sec, sec_stride, \
- height, sse, g_bilin_filter, g_pw_8
+ x_offset, y_offset, dst, dst_stride, \
+ sec, sec_stride, height, sse, \
+ g_bilin_filter, g_pw_8
%define block_height dword heightm
%define sec_str sec_stridemp
@@ -152,9 +151,9 @@ SECTION .text
LOAD_IF_USED 0, 1 ; load eax, ecx back
%else
- cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, \
- y_offset, dst, dst_stride, height, sse, \
- g_bilin_filter, g_pw_8
+ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, \
+ x_offset, y_offset, dst, dst_stride, \
+ height, sse, g_bilin_filter, g_pw_8
%define block_height heightd
;Store bilin_filter and pw_8 location in stack
@@ -173,25 +172,18 @@ SECTION .text
%endif
%else
%if %2 == 1 ; avg
- cglobal sub_pixel_avg_variance%1xh, 7 + 2 * ARCH_X86_64, \
- 7 + 2 * ARCH_X86_64, 13, src, src_stride, \
- x_offset, y_offset, \
- dst, dst_stride, \
- sec, sec_stride, \
- height, sse
- %if ARCH_X86_64
- %define block_height heightd
- %define sec_str sec_strideq
- %else
+ cglobal sub_pixel_avg_variance%1xh, 7, 7, 13, src, src_stride, \
+ x_offset, y_offset, \
+ dst, dst_stride, sec, sec_stride, \
+ height, sse
%define block_height dword heightm
%define sec_str sec_stridemp
- %endif
%else
- cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, x_offset, \
- y_offset, dst, dst_stride, height, sse
+ cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, \
+ x_offset, y_offset, dst, dst_stride, \
+ height, sse
%define block_height heightd
%endif
-
%define bilin_filter bilin_filter_m
%endif
%endif
@@ -371,8 +363,8 @@ SECTION .text
.x_zero_y_nonhalf:
; x_offset == 0 && y_offset == bilin interpolation
-%ifdef PIC
- lea bilin_filter, [bilin_filter_m]
+%if ARCH_X86_64
+ lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl y_offsetd, filter_idx_shift
%if ARCH_X86_64 && %1 > 4
@@ -380,7 +372,7 @@ SECTION .text
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
mova m9, [bilin_filter+y_offsetq+16]
%endif
- mova m10, [pw_8]
+ mova m10, [GLOBAL(pw_8)]
%define filter_y_a m8
%define filter_y_b m9
%define filter_rnd m10
@@ -397,7 +389,7 @@ SECTION .text
add y_offsetq, bilin_filter
%define filter_y_a [y_offsetq]
%define filter_y_b [y_offsetq+16]
-%define filter_rnd [pw_8]
+%define filter_rnd [GLOBAL(pw_8)]
%endif
%endif
@@ -694,8 +686,8 @@ SECTION .text
.x_half_y_nonhalf:
; x_offset == 0.5 && y_offset == bilin interpolation
-%ifdef PIC
- lea bilin_filter, [bilin_filter_m]
+%if ARCH_X86_64
+ lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl y_offsetd, filter_idx_shift
%if ARCH_X86_64 && %1 > 4
@@ -703,7 +695,7 @@ SECTION .text
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
mova m9, [bilin_filter+y_offsetq+16]
%endif
- mova m10, [pw_8]
+ mova m10, [GLOBAL(pw_8)]
%define filter_y_a m8
%define filter_y_b m9
%define filter_rnd m10
@@ -720,7 +712,7 @@ SECTION .text
add y_offsetq, bilin_filter
%define filter_y_a [y_offsetq]
%define filter_y_b [y_offsetq+16]
-%define filter_rnd [pw_8]
+%define filter_rnd [GLOBAL(pw_8)]
%endif
%endif
@@ -852,8 +844,8 @@ SECTION .text
jnz .x_nonhalf_y_nonzero
; x_offset == bilin interpolation && y_offset == 0
-%ifdef PIC
- lea bilin_filter, [bilin_filter_m]
+%if ARCH_X86_64
+ lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl x_offsetd, filter_idx_shift
%if ARCH_X86_64 && %1 > 4
@@ -861,7 +853,7 @@ SECTION .text
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
mova m9, [bilin_filter+x_offsetq+16]
%endif
- mova m10, [pw_8]
+ mova m10, [GLOBAL(pw_8)]
%define filter_x_a m8
%define filter_x_b m9
%define filter_rnd m10
@@ -878,7 +870,7 @@ SECTION .text
add x_offsetq, bilin_filter
%define filter_x_a [x_offsetq]
%define filter_x_b [x_offsetq+16]
-%define filter_rnd [pw_8]
+%define filter_rnd [GLOBAL(pw_8)]
%endif
%endif
@@ -994,8 +986,8 @@ SECTION .text
jne .x_nonhalf_y_nonhalf
; x_offset == bilin interpolation && y_offset == 0.5
-%ifdef PIC
- lea bilin_filter, [bilin_filter_m]
+%if ARCH_X86_64
+ lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl x_offsetd, filter_idx_shift
%if ARCH_X86_64 && %1 > 4
@@ -1003,7 +995,7 @@ SECTION .text
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
mova m9, [bilin_filter+x_offsetq+16]
%endif
- mova m10, [pw_8]
+ mova m10, [GLOBAL(pw_8)]
%define filter_x_a m8
%define filter_x_b m9
%define filter_rnd m10
@@ -1020,7 +1012,7 @@ SECTION .text
add x_offsetq, bilin_filter
%define filter_x_a [x_offsetq]
%define filter_x_b [x_offsetq+16]
-%define filter_rnd [pw_8]
+%define filter_rnd [GLOBAL(pw_8)]
%endif
%endif
@@ -1192,8 +1184,8 @@ SECTION .text
STORE_AND_RET %1
.x_nonhalf_y_nonhalf:
-%ifdef PIC
- lea bilin_filter, [bilin_filter_m]
+%if ARCH_X86_64
+ lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl x_offsetd, filter_idx_shift
shl y_offsetd, filter_idx_shift
@@ -1206,7 +1198,7 @@ SECTION .text
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
mova m11, [bilin_filter+y_offsetq+16]
%endif
- mova m12, [pw_8]
+ mova m12, [GLOBAL(pw_8)]
%define filter_x_a m8
%define filter_x_b m9
%define filter_y_a m10
@@ -1234,7 +1226,7 @@ SECTION .text
%define filter_x_b [x_offsetq+16]
%define filter_y_a [y_offsetq]
%define filter_y_b [y_offsetq+16]
-%define filter_rnd [pw_8]
+%define filter_rnd [GLOBAL(pw_8)]
%endif
%endif
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_ports/mem_ops.h b/chromium/third_party/libvpx/source/libvpx/vpx_ports/mem_ops.h
index 343f27577c0..b0608591f89 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_ports/mem_ops.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_ports/mem_ops.h
@@ -224,5 +224,4 @@ static VPX_INLINE void mem_put_le32(void *vmem, MEM_VALUE_T val) {
mem[3] = (MAU_T)((val >> 24) & 0xff);
}
/* clang-format on */
-
#endif // VPX_PORTS_MEM_OPS_H_
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_util/vpx_atomics.h b/chromium/third_party/libvpx/source/libvpx/vpx_util/vpx_atomics.h
index b8cf80daeb5..50469106565 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_util/vpx_atomics.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_util/vpx_atomics.h
@@ -68,7 +68,9 @@ extern "C" {
// on any platform (to discourage programmer errors by setting values directly).
// This primitive MUST be initialized using vpx_atomic_init or VPX_ATOMIC_INIT
// (NOT memset) and accessed through vpx_atomic_ functions.
-typedef struct vpx_atomic_int { volatile int value; } vpx_atomic_int;
+typedef struct vpx_atomic_int {
+ volatile int value;
+} vpx_atomic_int;
#define VPX_ATOMIC_INIT(num) \
{ num }
diff --git a/chromium/third_party/libvpx/source/libvpx/vpxenc.c b/chromium/third_party/libvpx/source/libvpx/vpxenc.c
index 9bd4792eb4d..144e60ccf82 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpxenc.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpxenc.c
@@ -2215,9 +2215,9 @@ int main(int argc, const char **argv_) {
if (!global.quiet) {
FOREACH_STREAM(fprintf(
- stderr, "\rPass %d/%d frame %4d/%-4d %7" PRId64 "B %7" PRId64
- "b/f %7" PRId64 "b/s"
- " %7" PRId64 " %s (%.2f fps)\033[K\n",
+ stderr,
+ "\rPass %d/%d frame %4d/%-4d %7" PRId64 "B %7" PRId64 "b/f %7" PRId64
+ "b/s %7" PRId64 " %s (%.2f fps)\033[K\n",
pass + 1, global.passes, frames_in, stream->frames_out,
(int64_t)stream->nbytes,
seen_frames ? (int64_t)(stream->nbytes * 8 / seen_frames) : 0,
diff --git a/chromium/third_party/libvpx/source/libvpx/y4minput.c b/chromium/third_party/libvpx/source/libvpx/y4minput.c
index e2df7a2996c..bf349c0593a 100644
--- a/chromium/third_party/libvpx/source/libvpx/y4minput.c
+++ b/chromium/third_party/libvpx/source/libvpx/y4minput.c
@@ -139,7 +139,6 @@ static int y4m_parse_tags(y4m_input *_y4m, char *_tags) {
Conversions which require both horizontal and vertical filtering could
have these steps pipelined, for less memory consumption and better cache
performance, but we do them separately for simplicity.*/
-
#define OC_MINI(_a, _b) ((_a) > (_b) ? (_b) : (_a))
#define OC_MAXI(_a, _b) ((_a) < (_b) ? (_b) : (_a))
#define OC_CLAMPI(_a, _b, _c) (OC_MAXI(_a, OC_MINI(_b, _c)))