summaryrefslogtreecommitdiff
path: root/chromium/third_party/libvpx/source/libvpx
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@qt.io>2017-03-08 10:28:10 +0100
committerAllan Sandfeld Jensen <allan.jensen@qt.io>2017-03-20 13:40:30 +0000
commite733310db58160074f574c429d48f8308c0afe17 (patch)
treef8aef4b7e62a69928dbcf880620eece20f98c6df /chromium/third_party/libvpx/source/libvpx
parent2f583e4aec1ae3a86fa047829c96b310dc12ecdf (diff)
downloadqtwebengine-chromium-e733310db58160074f574c429d48f8308c0afe17.tar.gz
BASELINE: Update Chromium to 56.0.2924.122
Change-Id: I4e04de8f47e47e501c46ed934c76a431c6337ced Reviewed-by: Michael BrĂ¼ning <michael.bruning@qt.io>
Diffstat (limited to 'chromium/third_party/libvpx/source/libvpx')
-rw-r--r--chromium/third_party/libvpx/source/libvpx/build/make/Android.mk28
-rw-r--r--chromium/third_party/libvpx/source/libvpx/build/make/Makefile10
-rwxr-xr-xchromium/third_party/libvpx/source/libvpx/build/make/ads2gas.pl8
-rwxr-xr-xchromium/third_party/libvpx/source/libvpx/build/make/ads2gas_apple.pl12
-rw-r--r--chromium/third_party/libvpx/source/libvpx/build/make/configure.sh7
-rwxr-xr-xchromium/third_party/libvpx/source/libvpx/build/make/gen_msvs_vcxproj.sh6
-rwxr-xr-xchromium/third_party/libvpx/source/libvpx/configure6
-rw-r--r--chromium/third_party/libvpx/source/libvpx/examples/vp9_spatial_svc_encoder.c34
-rw-r--r--chromium/third_party/libvpx/source/libvpx/libs.mk4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/third_party/libwebm/README.libvpx2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/file_util.cc17
-rw-r--r--chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/hdr_util.cc168
-rw-r--r--chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/hdr_util.h24
-rw-r--r--chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/webmids.h8
-rw-r--r--chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.cc819
-rw-r--r--chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.h304
-rw-r--r--chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvmuxer/mkvmuxerutil.cc325
-rw-r--r--chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvmuxer/mkvmuxerutil.h85
-rw-r--r--chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvmuxer/mkvwriter.cc2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvparser/mkvparser.cc114
-rw-r--r--chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvparser/mkvparser.h33
-rw-r--r--chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvparser/mkvreader.cc4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/tools.mk110
-rw-r--r--chromium/third_party/libvpx/source/libvpx/tools/tiny_ssim.c200
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/common/loopfilter_filters.c4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/common/mips/msa/idct_msa.c127
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/common/mips/msa/loopfilter_filters_msa.c333
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/common/mips/msa/vp8_macros_msa.h2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/common/onyx.h2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_if.c46
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_int.h8
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/encoder/ratectrl.c211
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp8/vp8_cx_iface.c11
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_frame_buffers.c12
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_idct.c4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.c3
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodemv.c4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.c245
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.h18
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c55
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.c7
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.h3
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c35
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h8
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ethread.c4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c142
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c63
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c50
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.h1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c5
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c40
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.c18
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.h4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx/src/svc_encodeframe.c59
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx/svc_context.h2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx/vp8cx.h20
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx/vp8dx.h7
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/fwd_txfm_neon.c8
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_loopfilter_neon.c761
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_vpx_convolve8_neon.c923
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_vpx_convolve_avg_neon.c185
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_vpx_convolve_copy_neon.c103
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_vpx_convolve_neon.c65
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct16x16_1_add_neon.asm5
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct16x16_1_add_neon.c2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct16x16_add_neon.asm150
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct16x16_add_neon.c60
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct32x32_1_add_neon.asm144
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct32x32_1_add_neon.c6
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct32x32_34_add_neon.c519
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct32x32_add_neon.asm1299
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct4x4_1_add_neon.asm5
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct4x4_1_add_neon.c2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct4x4_add_neon.asm19
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct4x4_add_neon.c15
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct8x8_1_add_neon.asm5
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct8x8_1_add_neon.c2
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct8x8_add_neon.asm88
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct8x8_add_neon.c65
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct_neon.asm30
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct_neon.h172
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/intrapred_neon.c1051
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/loopfilter_neon.c12
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/transpose_neon.h177
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/vpx_convolve8_neon.c32
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/vpx_convolve_neon.c13
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/fwd_txfm.c136
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/inv_txfm.c421
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/inv_txfm.h5
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/loopfilter.c8
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/deblock_msa.c5
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/inv_txfm_msa.h14
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/loopfilter_16_msa.c4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/loopfilter_4_msa.c4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/loopfilter_8_msa.c4
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/loopfilter_msa.h235
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/macros_msa.h68
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/sad_msa.c24
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/variance_msa.c12
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/vpx_convolve_msa.h27
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_convolve.c57
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp.mk42
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl62
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_filter.h11
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/fdct.h57
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_quantize_intrin_sse2.c6
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/inv_txfm_sse2.c12
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/quantize_sse2.c59
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c10
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_mem/vpx_mem.c32
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpx_mem/vpx_mem.h1
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpxdec.c59
-rw-r--r--chromium/third_party/libvpx/source/libvpx/vpxenc.c26
114 files changed, 7115 insertions, 4088 deletions
diff --git a/chromium/third_party/libvpx/source/libvpx/build/make/Android.mk b/chromium/third_party/libvpx/source/libvpx/build/make/Android.mk
index 36120170e81..09bdc5d2f70 100644
--- a/chromium/third_party/libvpx/source/libvpx/build/make/Android.mk
+++ b/chromium/third_party/libvpx/source/libvpx/build/make/Android.mk
@@ -71,7 +71,7 @@ ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
include $(CONFIG_DIR)libs-armv7-android-gcc.mk
LOCAL_ARM_MODE := arm
else ifeq ($(TARGET_ARCH_ABI),arm64-v8a)
- include $(CONFIG_DIR)libs-armv8-android-gcc.mk
+ include $(CONFIG_DIR)libs-arm64-android-gcc.mk
LOCAL_ARM_MODE := arm
else ifeq ($(TARGET_ARCH_ABI),x86)
include $(CONFIG_DIR)libs-x86-android-gcc.mk
@@ -101,8 +101,8 @@ LOCAL_CFLAGS := -O3
# like x86inc.asm and x86_abi_support.asm
LOCAL_ASMFLAGS := -I$(LIBVPX_PATH)
-.PRECIOUS: %.asm.s
-$(ASM_CNV_PATH)/libvpx/%.asm.s: $(LIBVPX_PATH)/%.asm
+.PRECIOUS: %.asm.S
+$(ASM_CNV_PATH)/libvpx/%.asm.S: $(LIBVPX_PATH)/%.asm
@mkdir -p $(dir $@)
@$(CONFIG_DIR)$(ASM_CONVERSION) <$< > $@
@@ -132,7 +132,7 @@ endif
# Pull out assembly files, splitting NEON from the rest. This is
# done to specify that the NEON assembly files use NEON assembler flags.
-# x86 assembly matches %.asm, arm matches %.asm.s
+# x86 assembly matches %.asm, arm matches %.asm.S
# x86:
@@ -140,12 +140,12 @@ CODEC_SRCS_ASM_X86 = $(filter %.asm, $(CODEC_SRCS_UNIQUE))
LOCAL_SRC_FILES += $(foreach file, $(CODEC_SRCS_ASM_X86), libvpx/$(file))
# arm:
-CODEC_SRCS_ASM_ARM_ALL = $(filter %.asm.s, $(CODEC_SRCS_UNIQUE))
+CODEC_SRCS_ASM_ARM_ALL = $(filter %.asm.S, $(CODEC_SRCS_UNIQUE))
CODEC_SRCS_ASM_ARM = $(foreach v, \
$(CODEC_SRCS_ASM_ARM_ALL), \
$(if $(findstring neon,$(v)),,$(v)))
-CODEC_SRCS_ASM_ADS2GAS = $(patsubst %.s, \
- $(ASM_CNV_PATH_LOCAL)/libvpx/%.s, \
+CODEC_SRCS_ASM_ADS2GAS = $(patsubst %.S, \
+ $(ASM_CNV_PATH_LOCAL)/libvpx/%.S, \
$(CODEC_SRCS_ASM_ARM))
LOCAL_SRC_FILES += $(CODEC_SRCS_ASM_ADS2GAS)
@@ -153,18 +153,19 @@ ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
CODEC_SRCS_ASM_NEON = $(foreach v, \
$(CODEC_SRCS_ASM_ARM_ALL),\
$(if $(findstring neon,$(v)),$(v),))
- CODEC_SRCS_ASM_NEON_ADS2GAS = $(patsubst %.s, \
- $(ASM_CNV_PATH_LOCAL)/libvpx/%.s, \
+ CODEC_SRCS_ASM_NEON_ADS2GAS = $(patsubst %.S, \
+ $(ASM_CNV_PATH_LOCAL)/libvpx/%.S, \
$(CODEC_SRCS_ASM_NEON))
- LOCAL_SRC_FILES += $(patsubst %.s, \
- %.s.neon, \
+ LOCAL_SRC_FILES += $(patsubst %.S, \
+ %.S.neon, \
$(CODEC_SRCS_ASM_NEON_ADS2GAS))
endif
LOCAL_CFLAGS += \
-DHAVE_CONFIG_H=vpx_config.h \
-I$(LIBVPX_PATH) \
- -I$(ASM_CNV_PATH)
+ -I$(ASM_CNV_PATH) \
+ -I$(ASM_CNV_PATH)/libvpx
LOCAL_MODULE := libvpx
@@ -185,7 +186,8 @@ endif
$$(rtcd_dep_template_SRCS): vpx_scale_rtcd.h
$$(rtcd_dep_template_SRCS): vpx_dsp_rtcd.h
-ifneq ($(findstring $(TARGET_ARCH_ABI),x86 x86_64),)
+rtcd_dep_template_CONFIG_ASM_ABIS := x86 x86_64 armeabi-v7a
+ifneq ($(findstring $(TARGET_ARCH_ABI),$(rtcd_dep_template_CONFIG_ASM_ABIS)),)
$$(rtcd_dep_template_SRCS): vpx_config.asm
endif
endef
diff --git a/chromium/third_party/libvpx/source/libvpx/build/make/Makefile b/chromium/third_party/libvpx/source/libvpx/build/make/Makefile
index 469eb74c3aa..cba605786cb 100644
--- a/chromium/third_party/libvpx/source/libvpx/build/make/Makefile
+++ b/chromium/third_party/libvpx/source/libvpx/build/make/Makefile
@@ -90,7 +90,7 @@ all:
.PHONY: clean
clean::
- rm -f $(OBJS-yes) $(OBJS-yes:.o=.d) $(OBJS-yes:.asm.s.o=.asm.s)
+ rm -f $(OBJS-yes) $(OBJS-yes:.o=.d) $(OBJS-yes:.asm.S.o=.asm.S)
rm -f $(CLEAN-OBJS)
.PHONY: clean
@@ -180,13 +180,13 @@ $(BUILD_PFX)%.asm.o: %.asm
$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
$(qexec)$(AS) $(ASFLAGS) -o $@ $<
-$(BUILD_PFX)%.s.d: %.s
+$(BUILD_PFX)%.S.d: %.S
$(if $(quiet),@echo " [DEP] $@")
$(qexec)mkdir -p $(dir $@)
$(qexec)$(SRC_PATH_BARE)/build/make/gen_asm_deps.sh \
--build-pfx=$(BUILD_PFX) --depfile=$@ $(ASFLAGS) $< > $@
-$(BUILD_PFX)%.s.o: %.s
+$(BUILD_PFX)%.S.o: %.S
$(if $(quiet),@echo " [AS] $@")
$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
$(qexec)$(AS) $(ASFLAGS) -o $@ $<
@@ -198,8 +198,8 @@ $(BUILD_PFX)%.c.S: %.c
$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
$(qexec)$(CC) -S $(CFLAGS) -o $@ $<
-.PRECIOUS: %.asm.s
-$(BUILD_PFX)%.asm.s: %.asm
+.PRECIOUS: %.asm.S
+$(BUILD_PFX)%.asm.S: %.asm
$(if $(quiet),@echo " [ASM CONVERSION] $@")
$(qexec)mkdir -p $(dir $@)
$(qexec)$(ASM_CONVERSION) <$< >$@
diff --git a/chromium/third_party/libvpx/source/libvpx/build/make/ads2gas.pl b/chromium/third_party/libvpx/source/libvpx/build/make/ads2gas.pl
index 7272424af2e..029cc4a56f2 100755
--- a/chromium/third_party/libvpx/source/libvpx/build/make/ads2gas.pl
+++ b/chromium/third_party/libvpx/source/libvpx/build/make/ads2gas.pl
@@ -138,14 +138,6 @@ while (<STDIN>)
s/DCD(.*)/.long $1/;
s/DCB(.*)/.byte $1/;
- # RN to .req
- if (s/RN\s+([Rr]\d+|lr)/.req $1/)
- {
- print;
- print "$comment_sub$comment\n" if defined $comment;
- next;
- }
-
# Make function visible to linker, and make additional symbol with
# prepended underscore
s/EXPORT\s+\|([\$\w]*)\|/.global $1 \n\t.type $1, function/;
diff --git a/chromium/third_party/libvpx/source/libvpx/build/make/ads2gas_apple.pl b/chromium/third_party/libvpx/source/libvpx/build/make/ads2gas_apple.pl
index 1a9e105ba8d..e1ae7b4f871 100755
--- a/chromium/third_party/libvpx/source/libvpx/build/make/ads2gas_apple.pl
+++ b/chromium/third_party/libvpx/source/libvpx/build/make/ads2gas_apple.pl
@@ -120,18 +120,6 @@ while (<STDIN>)
s/DCD(.*)/.long $1/;
s/DCB(.*)/.byte $1/;
- # Build a hash of all the register - alias pairs.
- if (s/(.*)RN(.*)/$1 .req $2/g)
- {
- $register_aliases{trim($1)} = trim($2);
- next;
- }
-
- while (($key, $value) = each(%register_aliases))
- {
- s/\b$key\b/$value/g;
- }
-
# Make function visible to linker, and make additional symbol with
# prepended underscore
s/EXPORT\s+\|([\$\w]*)\|/.globl _$1\n\t.globl $1/;
diff --git a/chromium/third_party/libvpx/source/libvpx/build/make/configure.sh b/chromium/third_party/libvpx/source/libvpx/build/make/configure.sh
index 35609e89af4..007e0200023 100644
--- a/chromium/third_party/libvpx/source/libvpx/build/make/configure.sh
+++ b/chromium/third_party/libvpx/source/libvpx/build/make/configure.sh
@@ -635,7 +635,7 @@ setup_gnu_toolchain() {
AS=${AS:-${CROSS}as}
STRIP=${STRIP:-${CROSS}strip}
NM=${NM:-${CROSS}nm}
- AS_SFX=.s
+ AS_SFX=.S
EXE_SFX=
}
@@ -926,7 +926,7 @@ EOF
;;
vs*)
asm_conversion_cmd="${source_path}/build/make/ads2armasm_ms.pl"
- AS_SFX=.s
+ AS_SFX=.S
msvs_arch_dir=arm-msvs
disable_feature multithread
disable_feature unit_tests
@@ -936,6 +936,7 @@ EOF
# only "AppContainerApplication" which requires an AppxManifest.
# Therefore disable the examples, just build the library.
disable_feature examples
+ disable_feature tools
fi
;;
rvct)
@@ -1034,7 +1035,7 @@ EOF
STRIP="$(${XCRUN_FIND} strip)"
NM="$(${XCRUN_FIND} nm)"
RANLIB="$(${XCRUN_FIND} ranlib)"
- AS_SFX=.s
+ AS_SFX=.S
LD="${CXX:-$(${XCRUN_FIND} ld)}"
# ASFLAGS is written here instead of using check_add_asflags
diff --git a/chromium/third_party/libvpx/source/libvpx/build/make/gen_msvs_vcxproj.sh b/chromium/third_party/libvpx/source/libvpx/build/make/gen_msvs_vcxproj.sh
index e98611d1024..2cf62c117c2 100755
--- a/chromium/third_party/libvpx/source/libvpx/build/make/gen_msvs_vcxproj.sh
+++ b/chromium/third_party/libvpx/source/libvpx/build/make/gen_msvs_vcxproj.sh
@@ -82,7 +82,7 @@ generate_filter() {
| sed -e "s,$src_path_bare,," \
-e 's/^[\./]\+//g' -e 's,[:/ ],_,g')
- if ([ "$pat" == "asm" ] || [ "$pat" == "s" ]) && $asm_use_custom_step; then
+ if ([ "$pat" == "asm" ] || [ "$pat" == "s" ] || [ "$pat" == "S" ]) && $asm_use_custom_step; then
# Avoid object file name collisions, i.e. vpx_config.c and
# vpx_config.asm produce the same object file without
# this additional suffix.
@@ -203,7 +203,7 @@ for opt in "$@"; do
# The paths in file_list are fixed outside of the loop.
file_list[${#file_list[@]}]="$opt"
case "$opt" in
- *.asm|*.s) uses_asm=true
+ *.asm|*.[Ss]) uses_asm=true
;;
esac
;;
@@ -452,7 +452,7 @@ generate_vcxproj() {
done
open_tag ItemGroup
- generate_filter "Source Files" "c;cc;cpp;def;odl;idl;hpj;bat;asm;asmx;s"
+ generate_filter "Source Files" "c;cc;cpp;def;odl;idl;hpj;bat;asm;asmx;s;S"
close_tag ItemGroup
open_tag ItemGroup
generate_filter "Header Files" "h;hm;inl;inc;xsd"
diff --git a/chromium/third_party/libvpx/source/libvpx/configure b/chromium/third_party/libvpx/source/libvpx/configure
index 7065dfef538..fb732acf3e5 100755
--- a/chromium/third_party/libvpx/source/libvpx/configure
+++ b/chromium/third_party/libvpx/source/libvpx/configure
@@ -22,6 +22,7 @@ show_help(){
Advanced options:
${toggle_libs} libraries
${toggle_examples} examples
+ ${toggle_tools} tools
${toggle_docs} documentation
${toggle_unit_tests} unit tests
${toggle_decode_perf_tests} build decoder perf tests with unit tests
@@ -155,7 +156,7 @@ all_platforms="${all_platforms} generic-gnu"
# all_targets is a list of all targets that can be configured
# note that these should be in dependency order for now.
-all_targets="libs examples docs"
+all_targets="libs examples tools docs"
# all targets available are enabled, by default.
for t in ${all_targets}; do
@@ -331,6 +332,7 @@ CMDLINE_SELECT="
libs
examples
+ tools
docs
libc
as
@@ -476,7 +478,7 @@ EOF
#
# Write makefiles for all enabled targets
#
- for tgt in libs examples docs solution; do
+ for tgt in libs examples tools docs solution; do
tgt_fn="$tgt-$toolchain.mk"
if enabled $tgt; then
diff --git a/chromium/third_party/libvpx/source/libvpx/examples/vp9_spatial_svc_encoder.c b/chromium/third_party/libvpx/source/libvpx/examples/vp9_spatial_svc_encoder.c
index cecdce0804c..fa2df7271b2 100644
--- a/chromium/third_party/libvpx/source/libvpx/examples/vp9_spatial_svc_encoder.c
+++ b/chromium/third_party/libvpx/source/libvpx/examples/vp9_spatial_svc_encoder.c
@@ -84,6 +84,8 @@ static const arg_def_t speed_arg =
ARG_DEF("sp", "speed", 1, "speed configuration");
static const arg_def_t aqmode_arg =
ARG_DEF("aq", "aqmode", 1, "aq-mode off/on");
+static const arg_def_t bitrates_arg =
+ ARG_DEF("bl", "bitrates", 1, "bitrates[sl * num_tl + tl]");
#if CONFIG_VP9_HIGHBITDEPTH
static const struct arg_enum_list bitdepth_enum[] = {
@@ -124,6 +126,7 @@ static const arg_def_t *svc_args[] = { &frames_arg,
#endif
&speed_arg,
&rc_end_usage_arg,
+ &bitrates_arg,
NULL };
static const uint32_t default_frames_to_skip = 0;
@@ -250,6 +253,9 @@ static void parse_command_line(int argc, const char **argv_,
} else if (arg_match(&arg, &scale_factors_arg, argi)) {
snprintf(string_options, sizeof(string_options), "%s scale-factors=%s",
string_options, arg.val);
+ } else if (arg_match(&arg, &bitrates_arg, argi)) {
+ snprintf(string_options, sizeof(string_options), "%s bitrates=%s",
+ string_options, arg.val);
} else if (arg_match(&arg, &passes_arg, argi)) {
passes = arg_parse_uint(&arg);
if (passes < 1 || passes > 2) {
@@ -417,7 +423,6 @@ static void set_rate_control_stats(struct RateControlStats *rc,
for (sl = 0; sl < cfg->ss_number_layers; ++sl) {
for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
const int layer = sl * cfg->ts_number_layers + tl;
- const int tlayer0 = sl * cfg->ts_number_layers;
if (cfg->ts_number_layers == 1)
rc->layer_framerate[layer] = framerate;
else
@@ -428,8 +433,8 @@ static void set_rate_control_stats(struct RateControlStats *rc,
cfg->layer_target_bitrate[layer - 1]) /
(rc->layer_framerate[layer] - rc->layer_framerate[layer - 1]);
} else {
- rc->layer_pfb[tlayer0] = 1000.0 * cfg->layer_target_bitrate[tlayer0] /
- rc->layer_framerate[tlayer0];
+ rc->layer_pfb[layer] = 1000.0 * cfg->layer_target_bitrate[layer] /
+ rc->layer_framerate[layer];
}
rc->layer_input_frames[layer] = 0;
rc->layer_enc_frames[layer] = 0;
@@ -449,12 +454,13 @@ static void printout_rate_control_summary(struct RateControlStats *rc,
vpx_codec_enc_cfg_t *cfg,
int frame_cnt) {
unsigned int sl, tl;
- int tot_num_frames = 0;
double perc_fluctuation = 0.0;
+ int tot_num_frames = 0;
printf("Total number of processed frames: %d\n\n", frame_cnt - 1);
printf("Rate control layer stats for sl%d tl%d layer(s):\n\n",
cfg->ss_number_layers, cfg->ts_number_layers);
for (sl = 0; sl < cfg->ss_number_layers; ++sl) {
+ tot_num_frames = 0;
for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
const int layer = sl * cfg->ts_number_layers + tl;
const int num_dropped =
@@ -462,7 +468,7 @@ static void printout_rate_control_summary(struct RateControlStats *rc,
? (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer])
: (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer] -
1);
- if (!sl) tot_num_frames += rc->layer_input_frames[layer];
+ tot_num_frames += rc->layer_input_frames[layer];
rc->layer_encoding_bitrate[layer] = 0.001 * rc->layer_framerate[layer] *
rc->layer_encoding_bitrate[layer] /
tot_num_frames;
@@ -620,7 +626,7 @@ int main(int argc, const char **argv) {
struct RateControlStats rc;
vpx_svc_layer_id_t layer_id;
vpx_svc_ref_frame_config_t ref_frame_config;
- int sl, tl;
+ unsigned int sl, tl;
double sum_bitrate = 0.0;
double sum_bitrate2 = 0.0;
double framerate = 30.0;
@@ -695,6 +701,8 @@ int main(int argc, const char **argv) {
vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (svc_ctx.threads >> 1));
if (svc_ctx.speed >= 5 && svc_ctx.aqmode == 1)
vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
+ if (svc_ctx.speed >= 5)
+ vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
// Encode frames
while (!end_of_stream) {
@@ -730,7 +738,7 @@ int main(int argc, const char **argv) {
&ref_frame_config);
// Keep track of input frames, to account for frame drops in rate control
// stats/metrics.
- for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
+ for (sl = 0; sl < (unsigned int)enc_cfg.ss_number_layers; ++sl) {
++rc.layer_input_frames[sl * enc_cfg.ts_number_layers +
layer_id.temporal_layer_id];
}
@@ -793,7 +801,7 @@ int main(int argc, const char **argv) {
rc.layer_encoding_bitrate[layer] += 8.0 * sizes[sl];
// Keep count of rate control stats per layer, for non-key
// frames.
- if (tl == layer_id.temporal_layer_id &&
+ if (tl == (unsigned int)layer_id.temporal_layer_id &&
!(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)) {
rc.layer_avg_frame_size[layer] += 8.0 * sizes[sl];
rc.layer_avg_rate_mismatch[layer] +=
@@ -807,7 +815,7 @@ int main(int argc, const char **argv) {
// Update for short-time encoding bitrate states, for moving
// window of size rc->window, shifted by rc->window / 2.
// Ignore first window segment, due to key frame.
- if (frame_cnt > rc.window_size) {
+ if (frame_cnt > (unsigned int)rc.window_size) {
tl = layer_id.temporal_layer_id;
for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate;
@@ -823,13 +831,14 @@ int main(int argc, const char **argv) {
}
// Second shifted window.
- if (frame_cnt > rc.window_size + rc.window_size / 2) {
+ if (frame_cnt >
+ (unsigned int)(rc.window_size + rc.window_size / 2)) {
tl = layer_id.temporal_layer_id;
for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
sum_bitrate2 += 0.001 * 8.0 * sizes[sl] * framerate;
}
- if (frame_cnt > 2 * rc.window_size &&
+ if (frame_cnt > (unsigned int)(2 * rc.window_size) &&
frame_cnt % rc.window_size == 0) {
rc.window_count += 1;
rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
@@ -842,10 +851,11 @@ int main(int argc, const char **argv) {
}
#endif
}
-
+ /*
printf("SVC frame: %d, kf: %d, size: %d, pts: %d\n", frames_received,
!!(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY),
(int)cx_pkt->data.frame.sz, (int)cx_pkt->data.frame.pts);
+ */
if (enc_cfg.ss_number_layers == 1 && enc_cfg.ts_number_layers == 1)
si->bytes_sum[0] += (int)cx_pkt->data.frame.sz;
++frames_received;
diff --git a/chromium/third_party/libvpx/source/libvpx/libs.mk b/chromium/third_party/libvpx/source/libvpx/libs.mk
index 6e12b540454..f4f48cc1621 100644
--- a/chromium/third_party/libvpx/source/libvpx/libs.mk
+++ b/chromium/third_party/libvpx/source/libvpx/libs.mk
@@ -12,7 +12,7 @@
# ARM assembly files are written in RVCT-style. We use some make magic to
# filter those files to allow GCC compilation
ifeq ($(ARCH_ARM),yes)
- ASM:=$(if $(filter yes,$(CONFIG_GCC)$(CONFIG_MSVS)),.asm.s,.asm)
+ ASM:=$(if $(filter yes,$(CONFIG_GCC)$(CONFIG_MSVS)),.asm.S,.asm)
else
ASM:=.asm
endif
@@ -366,7 +366,7 @@ endif
#
# Add assembler dependencies for configuration.
#
-$(filter %.s.o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm
+$(filter %.S.o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm
$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm
diff --git a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/README.libvpx b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/README.libvpx
index 73f83032225..1f8a13d78c1 100644
--- a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/README.libvpx
+++ b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/README.libvpx
@@ -1,5 +1,5 @@
URL: https://chromium.googlesource.com/webm/libwebm
-Version: 32d5ac49414a8914ec1e1f285f3f927c6e8ec29d
+Version: 9732ae991efb71aced4267d4794918279e362d99
License: BSD
License File: LICENSE.txt
diff --git a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/file_util.cc b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/file_util.cc
index 4f91318f3e9..6dab146dd98 100644
--- a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/file_util.cc
+++ b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/file_util.cc
@@ -14,6 +14,7 @@
#include <cstdio>
#include <cstdlib>
+#include <cstring>
#include <fstream>
#include <ios>
@@ -21,13 +22,23 @@ namespace libwebm {
std::string GetTempFileName() {
#if !defined _MSC_VER && !defined __MINGW32__
- char temp_file_name_template[] = "libwebm_temp.XXXXXX";
+ std::string temp_file_name_template_str =
+ std::string(std::getenv("TEST_TMPDIR") ? std::getenv("TEST_TMPDIR") :
+ ".") +
+ "/libwebm_temp.XXXXXX";
+ char* temp_file_name_template =
+ new char[temp_file_name_template_str.length() + 1];
+ memset(temp_file_name_template, 0, temp_file_name_template_str.length() + 1);
+ temp_file_name_template_str.copy(temp_file_name_template,
+ temp_file_name_template_str.length(), 0);
int fd = mkstemp(temp_file_name_template);
+ std::string temp_file_name =
+ (fd != -1) ? std::string(temp_file_name_template) : std::string();
+ delete[] temp_file_name_template;
if (fd != -1) {
close(fd);
- return std::string(temp_file_name_template);
}
- return std::string();
+ return temp_file_name;
#else
char tmp_file_name[_MAX_PATH];
errno_t err = tmpnam_s(tmp_file_name);
diff --git a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/hdr_util.cc b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/hdr_util.cc
index e1a9842fb6e..e1618ce75a7 100644
--- a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/hdr_util.cc
+++ b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/hdr_util.cc
@@ -7,12 +7,15 @@
// be found in the AUTHORS file in the root of the source tree.
#include "hdr_util.h"
+#include <climits>
#include <cstddef>
#include <new>
#include "mkvparser/mkvparser.h"
namespace libwebm {
+const int Vp9CodecFeatures::kValueNotPresent = INT_MAX;
+
bool CopyPrimaryChromaticity(const mkvparser::PrimaryChromaticity& parser_pc,
PrimaryChromaticityPtr* muxer_pc) {
muxer_pc->reset(new (std::nothrow)
@@ -29,9 +32,9 @@ bool MasteringMetadataValuePresent(double value) {
bool CopyMasteringMetadata(const mkvparser::MasteringMetadata& parser_mm,
mkvmuxer::MasteringMetadata* muxer_mm) {
if (MasteringMetadataValuePresent(parser_mm.luminance_max))
- muxer_mm->luminance_max = parser_mm.luminance_max;
+ muxer_mm->set_luminance_max(parser_mm.luminance_max);
if (MasteringMetadataValuePresent(parser_mm.luminance_min))
- muxer_mm->luminance_min = parser_mm.luminance_min;
+ muxer_mm->set_luminance_min(parser_mm.luminance_min);
PrimaryChromaticityPtr r_ptr(NULL);
PrimaryChromaticityPtr g_ptr(NULL);
@@ -73,34 +76,37 @@ bool CopyColour(const mkvparser::Colour& parser_colour,
return false;
if (ColourValuePresent(parser_colour.matrix_coefficients))
- muxer_colour->matrix_coefficients = parser_colour.matrix_coefficients;
+ muxer_colour->set_matrix_coefficients(parser_colour.matrix_coefficients);
if (ColourValuePresent(parser_colour.bits_per_channel))
- muxer_colour->bits_per_channel = parser_colour.bits_per_channel;
- if (ColourValuePresent(parser_colour.chroma_subsampling_horz))
- muxer_colour->chroma_subsampling_horz =
- parser_colour.chroma_subsampling_horz;
- if (ColourValuePresent(parser_colour.chroma_subsampling_vert))
- muxer_colour->chroma_subsampling_vert =
- parser_colour.chroma_subsampling_vert;
+ muxer_colour->set_bits_per_channel(parser_colour.bits_per_channel);
+ if (ColourValuePresent(parser_colour.chroma_subsampling_horz)) {
+ muxer_colour->set_chroma_subsampling_horz(
+ parser_colour.chroma_subsampling_horz);
+ }
+ if (ColourValuePresent(parser_colour.chroma_subsampling_vert)) {
+ muxer_colour->set_chroma_subsampling_vert(
+ parser_colour.chroma_subsampling_vert);
+ }
if (ColourValuePresent(parser_colour.cb_subsampling_horz))
- muxer_colour->cb_subsampling_horz = parser_colour.cb_subsampling_horz;
+ muxer_colour->set_cb_subsampling_horz(parser_colour.cb_subsampling_horz);
if (ColourValuePresent(parser_colour.cb_subsampling_vert))
- muxer_colour->cb_subsampling_vert = parser_colour.cb_subsampling_vert;
+ muxer_colour->set_cb_subsampling_vert(parser_colour.cb_subsampling_vert);
if (ColourValuePresent(parser_colour.chroma_siting_horz))
- muxer_colour->chroma_siting_horz = parser_colour.chroma_siting_horz;
+ muxer_colour->set_chroma_siting_horz(parser_colour.chroma_siting_horz);
if (ColourValuePresent(parser_colour.chroma_siting_vert))
- muxer_colour->chroma_siting_vert = parser_colour.chroma_siting_vert;
+ muxer_colour->set_chroma_siting_vert(parser_colour.chroma_siting_vert);
if (ColourValuePresent(parser_colour.range))
- muxer_colour->range = parser_colour.range;
- if (ColourValuePresent(parser_colour.transfer_characteristics))
- muxer_colour->transfer_characteristics =
- parser_colour.transfer_characteristics;
+ muxer_colour->set_range(parser_colour.range);
+ if (ColourValuePresent(parser_colour.transfer_characteristics)) {
+ muxer_colour->set_transfer_characteristics(
+ parser_colour.transfer_characteristics);
+ }
if (ColourValuePresent(parser_colour.primaries))
- muxer_colour->primaries = parser_colour.primaries;
+ muxer_colour->set_primaries(parser_colour.primaries);
if (ColourValuePresent(parser_colour.max_cll))
- muxer_colour->max_cll = parser_colour.max_cll;
+ muxer_colour->set_max_cll(parser_colour.max_cll);
if (ColourValuePresent(parser_colour.max_fall))
- muxer_colour->max_fall = parser_colour.max_fall;
+ muxer_colour->set_max_fall(parser_colour.max_fall);
if (parser_colour.mastering_metadata) {
mkvmuxer::MasteringMetadata muxer_mm;
@@ -116,8 +122,8 @@ bool CopyColour(const mkvparser::Colour& parser_colour,
//
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-// | ID Byte | Length | |
-// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
+// | ID Byte | Length | |
+// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
// | |
// : Bytes 1..Length of Codec Feature :
// | |
@@ -132,51 +138,83 @@ bool CopyColour(const mkvparser::Colour& parser_colour,
//
// The X bit is reserved.
//
-// Currently only profile level is supported. ID byte must be set to 1, and
-// length must be 1. Supported values are:
-//
-// 10: Level 1
-// 11: Level 1.1
-// 20: Level 2
-// 21: Level 2.1
-// 30: Level 3
-// 31: Level 3.1
-// 40: Level 4
-// 41: Level 4.1
-// 50: Level 5
-// 51: Level 5.1
-// 52: Level 5.2
-// 60: Level 6
-// 61: Level 6.1
-// 62: Level 6.2
-//
// See the following link for more information:
// http://www.webmproject.org/vp9/profiles/
-int ParseVpxCodecPrivate(const uint8_t* private_data, int32_t length) {
- const int kVpxCodecPrivateLength = 3;
- if (!private_data || length != kVpxCodecPrivateLength)
- return 0;
-
- const uint8_t id_byte = *private_data;
- if (id_byte != 1)
- return 0;
-
- const int kVpxProfileLength = 1;
- const uint8_t length_byte = private_data[1];
- if (length_byte != kVpxProfileLength)
- return 0;
-
- const int level = static_cast<int>(private_data[2]);
-
- const int kNumLevels = 14;
- const int levels[kNumLevels] = {10, 11, 20, 21, 30, 31, 40,
- 41, 50, 51, 52, 60, 61, 62};
+bool ParseVpxCodecPrivate(const uint8_t* private_data, int32_t length,
+ Vp9CodecFeatures* features) {
+ const int kVpxCodecPrivateMinLength = 3;
+ if (!private_data || !features || length < kVpxCodecPrivateMinLength)
+ return false;
- for (int i = 0; i < kNumLevels; ++i) {
- if (level == levels[i])
- return level;
- }
+ const uint8_t kVp9ProfileId = 1;
+ const uint8_t kVp9LevelId = 2;
+ const uint8_t kVp9BitDepthId = 3;
+ const uint8_t kVp9ChromaSubsamplingId = 4;
+ const int kVpxFeatureLength = 1;
+ int offset = 0;
+
+ // Set features to not set.
+ features->profile = Vp9CodecFeatures::kValueNotPresent;
+ features->level = Vp9CodecFeatures::kValueNotPresent;
+ features->bit_depth = Vp9CodecFeatures::kValueNotPresent;
+ features->chroma_subsampling = Vp9CodecFeatures::kValueNotPresent;
+ do {
+ const uint8_t id_byte = private_data[offset++];
+ const uint8_t length_byte = private_data[offset++];
+ if (length_byte != kVpxFeatureLength)
+ return false;
+ if (id_byte == kVp9ProfileId) {
+ const int priv_profile = static_cast<int>(private_data[offset++]);
+ if (priv_profile < 0 || priv_profile > 3)
+ return false;
+ if (features->profile != Vp9CodecFeatures::kValueNotPresent &&
+ features->profile != priv_profile) {
+ return false;
+ }
+ features->profile = priv_profile;
+ } else if (id_byte == kVp9LevelId) {
+ const int priv_level = static_cast<int>(private_data[offset++]);
+
+ const int kNumLevels = 14;
+ const int levels[kNumLevels] = {10, 11, 20, 21, 30, 31, 40,
+ 41, 50, 51, 52, 60, 61, 62};
+
+ for (int i = 0; i < kNumLevels; ++i) {
+ if (priv_level == levels[i]) {
+ if (features->level != Vp9CodecFeatures::kValueNotPresent &&
+ features->level != priv_level) {
+ return false;
+ }
+ features->level = priv_level;
+ break;
+ }
+ }
+ if (features->level == Vp9CodecFeatures::kValueNotPresent)
+ return false;
+ } else if (id_byte == kVp9BitDepthId) {
+ const int priv_profile = static_cast<int>(private_data[offset++]);
+ if (priv_profile != 8 && priv_profile != 10 && priv_profile != 12)
+ return false;
+ if (features->bit_depth != Vp9CodecFeatures::kValueNotPresent &&
+ features->bit_depth != priv_profile) {
+ return false;
+ }
+ features->bit_depth = priv_profile;
+ } else if (id_byte == kVp9ChromaSubsamplingId) {
+ const int priv_profile = static_cast<int>(private_data[offset++]);
+ if (priv_profile != 0 && priv_profile != 2 && priv_profile != 3)
+ return false;
+ if (features->chroma_subsampling != Vp9CodecFeatures::kValueNotPresent &&
+ features->chroma_subsampling != priv_profile) {
+ return false;
+ }
+ features->chroma_subsampling = priv_profile;
+ } else {
+ // Invalid ID.
+ return false;
+ }
+ } while (offset + kVpxCodecPrivateMinLength <= length);
- return 0;
+ return true;
}
} // namespace libwebm
diff --git a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/hdr_util.h b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/hdr_util.h
index d30c2b9f2a0..689fb30a3fc 100644
--- a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/hdr_util.h
+++ b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/hdr_util.h
@@ -28,6 +28,25 @@ namespace libwebm {
// TODO(tomfinegan): These should be moved to libwebm_utils once c++11 is
// required by libwebm.
+// Features of the VP9 codec that may be set in the CodecPrivate of a VP9 video
+// stream. A value of kValueNotPresent represents that the value was not set in
+// the CodecPrivate.
+struct Vp9CodecFeatures {
+ static const int kValueNotPresent;
+
+ Vp9CodecFeatures()
+ : profile(kValueNotPresent),
+ level(kValueNotPresent),
+ bit_depth(kValueNotPresent),
+ chroma_subsampling(kValueNotPresent) {}
+ ~Vp9CodecFeatures() {}
+
+ int profile;
+ int level;
+ int bit_depth;
+ int chroma_subsampling;
+};
+
typedef std::auto_ptr<mkvmuxer::PrimaryChromaticity> PrimaryChromaticityPtr;
bool CopyPrimaryChromaticity(const mkvparser::PrimaryChromaticity& parser_pc,
@@ -43,8 +62,9 @@ bool ColourValuePresent(long long value);
bool CopyColour(const mkvparser::Colour& parser_colour,
mkvmuxer::Colour* muxer_colour);
-// Returns VP9 profile upon success or 0 upon failure.
-int ParseVpxCodecPrivate(const uint8_t* private_data, int32_t length);
+// Returns true if |features| is set to one or more valid values.
+bool ParseVpxCodecPrivate(const uint8_t* private_data, int32_t length,
+ Vp9CodecFeatures* features);
} // namespace libwebm
diff --git a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/webmids.h b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/webmids.h
index 32a0c5fb911..89d722a71bc 100644
--- a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/webmids.h
+++ b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/common/webmids.h
@@ -124,6 +124,14 @@ enum MkvId {
kMkvLuminanceMin = 0x55DA,
// end mastering metadata
// end colour
+ // projection
+ kMkvProjection = 0x7670,
+ kMkvProjectionType = 0x7671,
+ kMkvProjectionPrivate = 0x7672,
+ kMkvProjectionPoseYaw = 0x7673,
+ kMkvProjectionPosePitch = 0x7674,
+ kMkvProjectionPoseRoll = 0x7675,
+ // end projection
// audio
kMkvAudio = 0xE1,
kMkvSamplingFrequency = 0xB5,
diff --git a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.cc b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.cc
index c79ce24ed35..299b45c989c 100644
--- a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.cc
+++ b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.cc
@@ -16,6 +16,7 @@
#include <ctime>
#include <memory>
#include <new>
+#include <string>
#include <vector>
#include "common/webmids.h"
@@ -25,10 +26,19 @@
namespace mkvmuxer {
+const float PrimaryChromaticity::kChromaticityMin = 0.0f;
+const float PrimaryChromaticity::kChromaticityMax = 1.0f;
+const float MasteringMetadata::kMinLuminance = 0.0f;
+const float MasteringMetadata::kMinLuminanceMax = 999.99f;
+const float MasteringMetadata::kMaxLuminanceMax = 9999.99f;
const float MasteringMetadata::kValueNotPresent = FLT_MAX;
const uint64_t Colour::kValueNotPresent = UINT64_MAX;
namespace {
+
+const char kDocTypeWebm[] = "webm";
+const char kDocTypeMatroska[] = "matroska";
+
// Deallocate the string designated by |dst|, and then copy the |src|
// string to |dst|. The caller owns both the |src| string and the
// |dst| copy (hence the caller is responsible for eventually
@@ -63,7 +73,7 @@ bool CopyChromaticity(const PrimaryChromaticity* src,
if (!dst)
return false;
- dst->reset(new (std::nothrow) PrimaryChromaticity(src->x, src->y));
+ dst->reset(new (std::nothrow) PrimaryChromaticity(src->x(), src->y()));
if (!dst->get())
return false;
@@ -80,36 +90,57 @@ IMkvWriter::IMkvWriter() {}
IMkvWriter::~IMkvWriter() {}
-bool WriteEbmlHeader(IMkvWriter* writer, uint64_t doc_type_version) {
+bool WriteEbmlHeader(IMkvWriter* writer, uint64_t doc_type_version,
+ const char* const doc_type) {
// Level 0
- uint64_t size = EbmlElementSize(libwebm::kMkvEBMLVersion, UINT64_C(1));
- size += EbmlElementSize(libwebm::kMkvEBMLReadVersion, UINT64_C(1));
- size += EbmlElementSize(libwebm::kMkvEBMLMaxIDLength, UINT64_C(4));
- size += EbmlElementSize(libwebm::kMkvEBMLMaxSizeLength, UINT64_C(8));
- size += EbmlElementSize(libwebm::kMkvDocType, "webm");
- size += EbmlElementSize(libwebm::kMkvDocTypeVersion, doc_type_version);
- size += EbmlElementSize(libwebm::kMkvDocTypeReadVersion, UINT64_C(2));
+ uint64_t size =
+ EbmlElementSize(libwebm::kMkvEBMLVersion, static_cast<uint64>(1));
+ size += EbmlElementSize(libwebm::kMkvEBMLReadVersion, static_cast<uint64>(1));
+ size += EbmlElementSize(libwebm::kMkvEBMLMaxIDLength, static_cast<uint64>(4));
+ size +=
+ EbmlElementSize(libwebm::kMkvEBMLMaxSizeLength, static_cast<uint64>(8));
+ size += EbmlElementSize(libwebm::kMkvDocType, doc_type);
+ size += EbmlElementSize(libwebm::kMkvDocTypeVersion,
+ static_cast<uint64>(doc_type_version));
+ size +=
+ EbmlElementSize(libwebm::kMkvDocTypeReadVersion, static_cast<uint64>(2));
if (!WriteEbmlMasterElement(writer, libwebm::kMkvEBML, size))
return false;
- if (!WriteEbmlElement(writer, libwebm::kMkvEBMLVersion, UINT64_C(1)))
+ if (!WriteEbmlElement(writer, libwebm::kMkvEBMLVersion,
+ static_cast<uint64>(1))) {
return false;
- if (!WriteEbmlElement(writer, libwebm::kMkvEBMLReadVersion, UINT64_C(1)))
+ }
+ if (!WriteEbmlElement(writer, libwebm::kMkvEBMLReadVersion,
+ static_cast<uint64>(1))) {
return false;
- if (!WriteEbmlElement(writer, libwebm::kMkvEBMLMaxIDLength, UINT64_C(4)))
+ }
+ if (!WriteEbmlElement(writer, libwebm::kMkvEBMLMaxIDLength,
+ static_cast<uint64>(4))) {
return false;
- if (!WriteEbmlElement(writer, libwebm::kMkvEBMLMaxSizeLength, UINT64_C(8)))
+ }
+ if (!WriteEbmlElement(writer, libwebm::kMkvEBMLMaxSizeLength,
+ static_cast<uint64>(8))) {
return false;
- if (!WriteEbmlElement(writer, libwebm::kMkvDocType, "webm"))
+ }
+ if (!WriteEbmlElement(writer, libwebm::kMkvDocType, doc_type))
return false;
- if (!WriteEbmlElement(writer, libwebm::kMkvDocTypeVersion, doc_type_version))
+ if (!WriteEbmlElement(writer, libwebm::kMkvDocTypeVersion,
+ static_cast<uint64>(doc_type_version))) {
return false;
- if (!WriteEbmlElement(writer, libwebm::kMkvDocTypeReadVersion, UINT64_C(2)))
+ }
+ if (!WriteEbmlElement(writer, libwebm::kMkvDocTypeReadVersion,
+ static_cast<uint64>(2))) {
return false;
+ }
return true;
}
+bool WriteEbmlHeader(IMkvWriter* writer, uint64_t doc_type_version) {
+ return WriteEbmlHeader(writer, doc_type_version, kDocTypeWebm);
+}
+
bool WriteEbmlHeader(IMkvWriter* writer) {
return WriteEbmlHeader(writer, mkvmuxer::Segment::kDefaultDocTypeVersion);
}
@@ -262,15 +293,17 @@ bool CuePoint::Write(IMkvWriter* writer) const {
if (!writer || track_ < 1 || cluster_pos_ < 1)
return false;
- uint64_t size =
- EbmlElementSize(libwebm::kMkvCueClusterPosition, cluster_pos_);
- size += EbmlElementSize(libwebm::kMkvCueTrack, track_);
+ uint64_t size = EbmlElementSize(libwebm::kMkvCueClusterPosition,
+ static_cast<uint64>(cluster_pos_));
+ size += EbmlElementSize(libwebm::kMkvCueTrack, static_cast<uint64>(track_));
if (output_block_number_ && block_number_ > 1)
- size += EbmlElementSize(libwebm::kMkvCueBlockNumber, block_number_);
+ size += EbmlElementSize(libwebm::kMkvCueBlockNumber,
+ static_cast<uint64>(block_number_));
const uint64_t track_pos_size =
EbmlMasterElementSize(libwebm::kMkvCueTrackPositions, size) + size;
const uint64_t payload_size =
- EbmlElementSize(libwebm::kMkvCueTime, time_) + track_pos_size;
+ EbmlElementSize(libwebm::kMkvCueTime, static_cast<uint64>(time_)) +
+ track_pos_size;
if (!WriteEbmlMasterElement(writer, libwebm::kMkvCuePoint, payload_size))
return false;
@@ -279,18 +312,27 @@ bool CuePoint::Write(IMkvWriter* writer) const {
if (payload_position < 0)
return false;
- if (!WriteEbmlElement(writer, libwebm::kMkvCueTime, time_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvCueTime,
+ static_cast<uint64>(time_))) {
return false;
+ }
if (!WriteEbmlMasterElement(writer, libwebm::kMkvCueTrackPositions, size))
return false;
- if (!WriteEbmlElement(writer, libwebm::kMkvCueTrack, track_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvCueTrack,
+ static_cast<uint64>(track_))) {
return false;
- if (!WriteEbmlElement(writer, libwebm::kMkvCueClusterPosition, cluster_pos_))
+ }
+ if (!WriteEbmlElement(writer, libwebm::kMkvCueClusterPosition,
+ static_cast<uint64>(cluster_pos_))) {
return false;
- if (output_block_number_ && block_number_ > 1)
- if (!WriteEbmlElement(writer, libwebm::kMkvCueBlockNumber, block_number_))
+ }
+ if (output_block_number_ && block_number_ > 1) {
+ if (!WriteEbmlElement(writer, libwebm::kMkvCueBlockNumber,
+ static_cast<uint64>(block_number_))) {
return false;
+ }
+ }
const int64_t stop_position = writer->Position();
if (stop_position < 0)
@@ -303,15 +345,17 @@ bool CuePoint::Write(IMkvWriter* writer) const {
}
uint64_t CuePoint::PayloadSize() const {
- uint64_t size =
- EbmlElementSize(libwebm::kMkvCueClusterPosition, cluster_pos_);
- size += EbmlElementSize(libwebm::kMkvCueTrack, track_);
+ uint64_t size = EbmlElementSize(libwebm::kMkvCueClusterPosition,
+ static_cast<uint64>(cluster_pos_));
+ size += EbmlElementSize(libwebm::kMkvCueTrack, static_cast<uint64>(track_));
if (output_block_number_ && block_number_ > 1)
- size += EbmlElementSize(libwebm::kMkvCueBlockNumber, block_number_);
+ size += EbmlElementSize(libwebm::kMkvCueBlockNumber,
+ static_cast<uint64>(block_number_));
const uint64_t track_pos_size =
EbmlMasterElementSize(libwebm::kMkvCueTrackPositions, size) + size;
const uint64_t payload_size =
- EbmlElementSize(libwebm::kMkvCueTime, time_) + track_pos_size;
+ EbmlElementSize(libwebm::kMkvCueTime, static_cast<uint64>(time_)) +
+ track_pos_size;
return payload_size;
}
@@ -456,8 +500,9 @@ bool ContentEncAESSettings::Write(IMkvWriter* writer) const {
return false;
if (!WriteEbmlElement(writer, libwebm::kMkvAESSettingsCipherMode,
- cipher_mode_))
+ static_cast<uint64>(cipher_mode_))) {
return false;
+ }
const int64_t stop_position = writer->Position();
if (stop_position < 0 ||
@@ -468,8 +513,8 @@ bool ContentEncAESSettings::Write(IMkvWriter* writer) const {
}
uint64_t ContentEncAESSettings::PayloadSize() const {
- uint64_t size =
- EbmlElementSize(libwebm::kMkvAESSettingsCipherMode, cipher_mode_);
+ uint64_t size = EbmlElementSize(libwebm::kMkvAESSettingsCipherMode,
+ static_cast<uint64>(cipher_mode_));
return size;
}
@@ -529,20 +574,22 @@ bool ContentEncoding::Write(IMkvWriter* writer) const {
encoding_size))
return false;
if (!WriteEbmlElement(writer, libwebm::kMkvContentEncodingOrder,
- encoding_order_))
+ static_cast<uint64>(encoding_order_)))
return false;
if (!WriteEbmlElement(writer, libwebm::kMkvContentEncodingScope,
- encoding_scope_))
+ static_cast<uint64>(encoding_scope_)))
return false;
if (!WriteEbmlElement(writer, libwebm::kMkvContentEncodingType,
- encoding_type_))
+ static_cast<uint64>(encoding_type_)))
return false;
if (!WriteEbmlMasterElement(writer, libwebm::kMkvContentEncryption,
encryption_size))
return false;
- if (!WriteEbmlElement(writer, libwebm::kMkvContentEncAlgo, enc_algo_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvContentEncAlgo,
+ static_cast<uint64>(enc_algo_))) {
return false;
+ }
if (!WriteEbmlElement(writer, libwebm::kMkvContentEncKeyID, enc_key_id_,
enc_key_id_length_))
return false;
@@ -571,12 +618,12 @@ uint64_t ContentEncoding::EncodingSize(uint64_t compresion_size,
EbmlMasterElementSize(libwebm::kMkvContentEncryption, encryption_size) +
encryption_size;
}
- encoding_size +=
- EbmlElementSize(libwebm::kMkvContentEncodingType, encoding_type_);
- encoding_size +=
- EbmlElementSize(libwebm::kMkvContentEncodingScope, encoding_scope_);
- encoding_size +=
- EbmlElementSize(libwebm::kMkvContentEncodingOrder, encoding_order_);
+ encoding_size += EbmlElementSize(libwebm::kMkvContentEncodingType,
+ static_cast<uint64>(encoding_type_));
+ encoding_size += EbmlElementSize(libwebm::kMkvContentEncodingScope,
+ static_cast<uint64>(encoding_scope_));
+ encoding_size += EbmlElementSize(libwebm::kMkvContentEncodingOrder,
+ static_cast<uint64>(encoding_order_));
return encoding_size;
}
@@ -586,7 +633,8 @@ uint64_t ContentEncoding::EncryptionSize() const {
uint64_t encryption_size = EbmlElementSize(libwebm::kMkvContentEncKeyID,
enc_key_id_, enc_key_id_length_);
- encryption_size += EbmlElementSize(libwebm::kMkvContentEncAlgo, enc_algo_);
+ encryption_size += EbmlElementSize(libwebm::kMkvContentEncAlgo,
+ static_cast<uint64>(enc_algo_));
return encryption_size + aes_size;
}
@@ -664,9 +712,10 @@ ContentEncoding* Track::GetContentEncodingByIndex(uint32_t index) const {
}
uint64_t Track::PayloadSize() const {
- uint64_t size = EbmlElementSize(libwebm::kMkvTrackNumber, number_);
- size += EbmlElementSize(libwebm::kMkvTrackUID, uid_);
- size += EbmlElementSize(libwebm::kMkvTrackType, type_);
+ uint64_t size =
+ EbmlElementSize(libwebm::kMkvTrackNumber, static_cast<uint64>(number_));
+ size += EbmlElementSize(libwebm::kMkvTrackUID, static_cast<uint64>(uid_));
+ size += EbmlElementSize(libwebm::kMkvTrackType, static_cast<uint64>(type_));
if (codec_id_)
size += EbmlElementSize(libwebm::kMkvCodecID, codec_id_);
if (codec_private_)
@@ -676,15 +725,22 @@ uint64_t Track::PayloadSize() const {
size += EbmlElementSize(libwebm::kMkvLanguage, language_);
if (name_)
size += EbmlElementSize(libwebm::kMkvName, name_);
- if (max_block_additional_id_)
+ if (max_block_additional_id_) {
size += EbmlElementSize(libwebm::kMkvMaxBlockAdditionID,
- max_block_additional_id_);
- if (codec_delay_)
- size += EbmlElementSize(libwebm::kMkvCodecDelay, codec_delay_);
- if (seek_pre_roll_)
- size += EbmlElementSize(libwebm::kMkvSeekPreRoll, seek_pre_roll_);
- if (default_duration_)
- size += EbmlElementSize(libwebm::kMkvDefaultDuration, default_duration_);
+ static_cast<uint64>(max_block_additional_id_));
+ }
+ if (codec_delay_) {
+ size += EbmlElementSize(libwebm::kMkvCodecDelay,
+ static_cast<uint64>(codec_delay_));
+ }
+ if (seek_pre_roll_) {
+ size += EbmlElementSize(libwebm::kMkvSeekPreRoll,
+ static_cast<uint64>(seek_pre_roll_));
+ }
+ if (default_duration_) {
+ size += EbmlElementSize(libwebm::kMkvDefaultDuration,
+ static_cast<uint64>(default_duration_));
+ }
if (content_encoding_entries_size_ > 0) {
uint64_t content_encodings_size = 0;
@@ -722,55 +778,64 @@ bool Track::Write(IMkvWriter* writer) const {
if (!WriteEbmlMasterElement(writer, libwebm::kMkvTrackEntry, payload_size))
return false;
- uint64_t size = EbmlElementSize(libwebm::kMkvTrackNumber, number_);
- size += EbmlElementSize(libwebm::kMkvTrackUID, uid_);
- size += EbmlElementSize(libwebm::kMkvTrackType, type_);
+ uint64_t size =
+ EbmlElementSize(libwebm::kMkvTrackNumber, static_cast<uint64>(number_));
+ size += EbmlElementSize(libwebm::kMkvTrackUID, static_cast<uint64>(uid_));
+ size += EbmlElementSize(libwebm::kMkvTrackType, static_cast<uint64>(type_));
if (codec_id_)
size += EbmlElementSize(libwebm::kMkvCodecID, codec_id_);
if (codec_private_)
size += EbmlElementSize(libwebm::kMkvCodecPrivate, codec_private_,
- codec_private_length_);
+ static_cast<uint64>(codec_private_length_));
if (language_)
size += EbmlElementSize(libwebm::kMkvLanguage, language_);
if (name_)
size += EbmlElementSize(libwebm::kMkvName, name_);
if (max_block_additional_id_)
size += EbmlElementSize(libwebm::kMkvMaxBlockAdditionID,
- max_block_additional_id_);
+ static_cast<uint64>(max_block_additional_id_));
if (codec_delay_)
- size += EbmlElementSize(libwebm::kMkvCodecDelay, codec_delay_);
+ size += EbmlElementSize(libwebm::kMkvCodecDelay,
+ static_cast<uint64>(codec_delay_));
if (seek_pre_roll_)
- size += EbmlElementSize(libwebm::kMkvSeekPreRoll, seek_pre_roll_);
+ size += EbmlElementSize(libwebm::kMkvSeekPreRoll,
+ static_cast<uint64>(seek_pre_roll_));
if (default_duration_)
- size += EbmlElementSize(libwebm::kMkvDefaultDuration, default_duration_);
+ size += EbmlElementSize(libwebm::kMkvDefaultDuration,
+ static_cast<uint64>(default_duration_));
const int64_t payload_position = writer->Position();
if (payload_position < 0)
return false;
- if (!WriteEbmlElement(writer, libwebm::kMkvTrackNumber, number_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvTrackNumber,
+ static_cast<uint64>(number_)))
return false;
- if (!WriteEbmlElement(writer, libwebm::kMkvTrackUID, uid_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvTrackUID,
+ static_cast<uint64>(uid_)))
return false;
- if (!WriteEbmlElement(writer, libwebm::kMkvTrackType, type_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvTrackType,
+ static_cast<uint64>(type_)))
return false;
if (max_block_additional_id_) {
if (!WriteEbmlElement(writer, libwebm::kMkvMaxBlockAdditionID,
- max_block_additional_id_)) {
+ static_cast<uint64>(max_block_additional_id_))) {
return false;
}
}
if (codec_delay_) {
- if (!WriteEbmlElement(writer, libwebm::kMkvCodecDelay, codec_delay_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvCodecDelay,
+ static_cast<uint64>(codec_delay_)))
return false;
}
if (seek_pre_roll_) {
- if (!WriteEbmlElement(writer, libwebm::kMkvSeekPreRoll, seek_pre_roll_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvSeekPreRoll,
+ static_cast<uint64>(seek_pre_roll_)))
return false;
}
if (default_duration_) {
if (!WriteEbmlElement(writer, libwebm::kMkvDefaultDuration,
- default_duration_))
+ static_cast<uint64>(default_duration_)))
return false;
}
if (codec_id_) {
@@ -779,7 +844,7 @@ bool Track::Write(IMkvWriter* writer) const {
}
if (codec_private_) {
if (!WriteEbmlElement(writer, libwebm::kMkvCodecPrivate, codec_private_,
- codec_private_length_))
+ static_cast<uint64>(codec_private_length_)))
return false;
}
if (language_) {
@@ -890,14 +955,23 @@ void Track::set_name(const char* name) {
//
// Colour and its child elements
-uint64_t PrimaryChromaticity::PrimaryChromaticityPayloadSize(
+uint64_t PrimaryChromaticity::PrimaryChromaticitySize(
libwebm::MkvId x_id, libwebm::MkvId y_id) const {
- return EbmlElementSize(x_id, x) + EbmlElementSize(y_id, y);
+ return EbmlElementSize(x_id, x_) + EbmlElementSize(y_id, y_);
}
bool PrimaryChromaticity::Write(IMkvWriter* writer, libwebm::MkvId x_id,
libwebm::MkvId y_id) const {
- return WriteEbmlElement(writer, x_id, x) && WriteEbmlElement(writer, y_id, y);
+ if (!Valid()) {
+ return false;
+ }
+ return WriteEbmlElement(writer, x_id, x_) &&
+ WriteEbmlElement(writer, y_id, y_);
+}
+
+bool PrimaryChromaticity::Valid() const {
+ return (x_ >= kChromaticityMin && x_ <= kChromaticityMax &&
+ y_ >= kChromaticityMin && y_ <= kChromaticityMax);
}
uint64_t MasteringMetadata::MasteringMetadataSize() const {
@@ -909,6 +983,31 @@ uint64_t MasteringMetadata::MasteringMetadataSize() const {
return size;
}
+bool MasteringMetadata::Valid() const {
+ if (luminance_min_ != kValueNotPresent) {
+ if (luminance_min_ < kMinLuminance || luminance_min_ > kMinLuminanceMax ||
+ luminance_min_ > luminance_max_) {
+ return false;
+ }
+ }
+ if (luminance_max_ != kValueNotPresent) {
+ if (luminance_max_ < kMinLuminance || luminance_max_ > kMaxLuminanceMax ||
+ luminance_max_ < luminance_min_) {
+ return false;
+ }
+ }
+ if (r_ && !r_->Valid())
+ return false;
+ if (g_ && !g_->Valid())
+ return false;
+ if (b_ && !b_->Valid())
+ return false;
+ if (white_point_ && !white_point_->Valid())
+ return false;
+
+ return true;
+}
+
bool MasteringMetadata::Write(IMkvWriter* writer) const {
const uint64_t size = PayloadSize();
@@ -918,12 +1017,12 @@ bool MasteringMetadata::Write(IMkvWriter* writer) const {
if (!WriteEbmlMasterElement(writer, libwebm::kMkvMasteringMetadata, size))
return false;
- if (luminance_max != kValueNotPresent &&
- !WriteEbmlElement(writer, libwebm::kMkvLuminanceMax, luminance_max)) {
+ if (luminance_max_ != kValueNotPresent &&
+ !WriteEbmlElement(writer, libwebm::kMkvLuminanceMax, luminance_max_)) {
return false;
}
- if (luminance_min != kValueNotPresent &&
- !WriteEbmlElement(writer, libwebm::kMkvLuminanceMin, luminance_min)) {
+ if (luminance_min_ != kValueNotPresent &&
+ !WriteEbmlElement(writer, libwebm::kMkvLuminanceMin, luminance_min_)) {
return false;
}
if (r_ &&
@@ -984,25 +1083,25 @@ bool MasteringMetadata::SetChromaticity(
uint64_t MasteringMetadata::PayloadSize() const {
uint64_t size = 0;
- if (luminance_max != kValueNotPresent)
- size += EbmlElementSize(libwebm::kMkvLuminanceMax, luminance_max);
- if (luminance_min != kValueNotPresent)
- size += EbmlElementSize(libwebm::kMkvLuminanceMin, luminance_min);
+ if (luminance_max_ != kValueNotPresent)
+ size += EbmlElementSize(libwebm::kMkvLuminanceMax, luminance_max_);
+ if (luminance_min_ != kValueNotPresent)
+ size += EbmlElementSize(libwebm::kMkvLuminanceMin, luminance_min_);
if (r_) {
- size += r_->PrimaryChromaticityPayloadSize(
- libwebm::kMkvPrimaryRChromaticityX, libwebm::kMkvPrimaryRChromaticityY);
+ size += r_->PrimaryChromaticitySize(libwebm::kMkvPrimaryRChromaticityX,
+ libwebm::kMkvPrimaryRChromaticityY);
}
if (g_) {
- size += g_->PrimaryChromaticityPayloadSize(
- libwebm::kMkvPrimaryGChromaticityX, libwebm::kMkvPrimaryGChromaticityY);
+ size += g_->PrimaryChromaticitySize(libwebm::kMkvPrimaryGChromaticityX,
+ libwebm::kMkvPrimaryGChromaticityY);
}
if (b_) {
- size += b_->PrimaryChromaticityPayloadSize(
- libwebm::kMkvPrimaryBChromaticityX, libwebm::kMkvPrimaryBChromaticityY);
+ size += b_->PrimaryChromaticitySize(libwebm::kMkvPrimaryBChromaticityX,
+ libwebm::kMkvPrimaryBChromaticityY);
}
if (white_point_) {
- size += white_point_->PrimaryChromaticityPayloadSize(
+ size += white_point_->PrimaryChromaticitySize(
libwebm::kMkvWhitePointChromaticityX,
libwebm::kMkvWhitePointChromaticityY);
}
@@ -1019,6 +1118,33 @@ uint64_t Colour::ColourSize() const {
return size;
}
+bool Colour::Valid() const {
+ if (mastering_metadata_ && !mastering_metadata_->Valid())
+ return false;
+ if (matrix_coefficients_ != kValueNotPresent &&
+ !IsMatrixCoefficientsValueValid(matrix_coefficients_)) {
+ return false;
+ }
+ if (chroma_siting_horz_ != kValueNotPresent &&
+ !IsChromaSitingHorzValueValid(chroma_siting_horz_)) {
+ return false;
+ }
+ if (chroma_siting_vert_ != kValueNotPresent &&
+ !IsChromaSitingVertValueValid(chroma_siting_vert_)) {
+ return false;
+ }
+ if (range_ != kValueNotPresent && !IsColourRangeValueValid(range_))
+ return false;
+ if (transfer_characteristics_ != kValueNotPresent &&
+ !IsTransferCharacteristicsValueValid(transfer_characteristics_)) {
+ return false;
+ }
+ if (primaries_ != kValueNotPresent && !IsPrimariesValueValid(primaries_))
+ return false;
+
+ return true;
+}
+
bool Colour::Write(IMkvWriter* writer) const {
const uint64_t size = PayloadSize();
@@ -1026,69 +1152,77 @@ bool Colour::Write(IMkvWriter* writer) const {
if (size == 0)
return true;
+ // Don't write an invalid element.
+ if (!Valid())
+ return false;
+
if (!WriteEbmlMasterElement(writer, libwebm::kMkvColour, size))
return false;
- if (matrix_coefficients != kValueNotPresent &&
+ if (matrix_coefficients_ != kValueNotPresent &&
!WriteEbmlElement(writer, libwebm::kMkvMatrixCoefficients,
- matrix_coefficients)) {
+ static_cast<uint64>(matrix_coefficients_))) {
return false;
}
- if (bits_per_channel != kValueNotPresent &&
+ if (bits_per_channel_ != kValueNotPresent &&
!WriteEbmlElement(writer, libwebm::kMkvBitsPerChannel,
- bits_per_channel)) {
+ static_cast<uint64>(bits_per_channel_))) {
return false;
}
- if (chroma_subsampling_horz != kValueNotPresent &&
+ if (chroma_subsampling_horz_ != kValueNotPresent &&
!WriteEbmlElement(writer, libwebm::kMkvChromaSubsamplingHorz,
- chroma_subsampling_horz)) {
+ static_cast<uint64>(chroma_subsampling_horz_))) {
return false;
}
- if (chroma_subsampling_vert != kValueNotPresent &&
+ if (chroma_subsampling_vert_ != kValueNotPresent &&
!WriteEbmlElement(writer, libwebm::kMkvChromaSubsamplingVert,
- chroma_subsampling_vert)) {
+ static_cast<uint64>(chroma_subsampling_vert_))) {
return false;
}
- if (cb_subsampling_horz != kValueNotPresent &&
+ if (cb_subsampling_horz_ != kValueNotPresent &&
!WriteEbmlElement(writer, libwebm::kMkvCbSubsamplingHorz,
- cb_subsampling_horz)) {
+ static_cast<uint64>(cb_subsampling_horz_))) {
return false;
}
- if (cb_subsampling_vert != kValueNotPresent &&
+ if (cb_subsampling_vert_ != kValueNotPresent &&
!WriteEbmlElement(writer, libwebm::kMkvCbSubsamplingVert,
- cb_subsampling_vert)) {
+ static_cast<uint64>(cb_subsampling_vert_))) {
return false;
}
- if (chroma_siting_horz != kValueNotPresent &&
+ if (chroma_siting_horz_ != kValueNotPresent &&
!WriteEbmlElement(writer, libwebm::kMkvChromaSitingHorz,
- chroma_siting_horz)) {
+ static_cast<uint64>(chroma_siting_horz_))) {
return false;
}
- if (chroma_siting_vert != kValueNotPresent &&
+ if (chroma_siting_vert_ != kValueNotPresent &&
!WriteEbmlElement(writer, libwebm::kMkvChromaSitingVert,
- chroma_siting_vert)) {
+ static_cast<uint64>(chroma_siting_vert_))) {
return false;
}
- if (range != kValueNotPresent &&
- !WriteEbmlElement(writer, libwebm::kMkvRange, range)) {
+ if (range_ != kValueNotPresent &&
+ !WriteEbmlElement(writer, libwebm::kMkvRange,
+ static_cast<uint64>(range_))) {
return false;
}
- if (transfer_characteristics != kValueNotPresent &&
+ if (transfer_characteristics_ != kValueNotPresent &&
!WriteEbmlElement(writer, libwebm::kMkvTransferCharacteristics,
- transfer_characteristics)) {
+ static_cast<uint64>(transfer_characteristics_))) {
return false;
}
- if (primaries != kValueNotPresent &&
- !WriteEbmlElement(writer, libwebm::kMkvPrimaries, primaries)) {
+ if (primaries_ != kValueNotPresent &&
+ !WriteEbmlElement(writer, libwebm::kMkvPrimaries,
+ static_cast<uint64>(primaries_))) {
return false;
}
- if (max_cll != kValueNotPresent &&
- !WriteEbmlElement(writer, libwebm::kMkvMaxCLL, max_cll)) {
+ if (max_cll_ != kValueNotPresent &&
+ !WriteEbmlElement(writer, libwebm::kMkvMaxCLL,
+ static_cast<uint64>(max_cll_))) {
return false;
}
- if (max_fall != kValueNotPresent &&
- !WriteEbmlElement(writer, libwebm::kMkvMaxFALL, max_fall)) {
+ if (max_fall_ != kValueNotPresent &&
+ !WriteEbmlElement(writer, libwebm::kMkvMaxFALL,
+ static_cast<uint64>(max_fall_))) {
return false;
}
@@ -1103,8 +1237,8 @@ bool Colour::SetMasteringMetadata(const MasteringMetadata& mastering_metadata) {
if (!mm_ptr.get())
return false;
- mm_ptr->luminance_max = mastering_metadata.luminance_max;
- mm_ptr->luminance_min = mastering_metadata.luminance_min;
+ mm_ptr->set_luminance_max(mastering_metadata.luminance_max());
+ mm_ptr->set_luminance_min(mastering_metadata.luminance_min());
if (!mm_ptr->SetChromaticity(mastering_metadata.r(), mastering_metadata.g(),
mastering_metadata.b(),
@@ -1120,38 +1254,56 @@ bool Colour::SetMasteringMetadata(const MasteringMetadata& mastering_metadata) {
uint64_t Colour::PayloadSize() const {
uint64_t size = 0;
- if (matrix_coefficients != kValueNotPresent)
- size +=
- EbmlElementSize(libwebm::kMkvMatrixCoefficients, matrix_coefficients);
- if (bits_per_channel != kValueNotPresent)
- size += EbmlElementSize(libwebm::kMkvBitsPerChannel, bits_per_channel);
- if (chroma_subsampling_horz != kValueNotPresent)
+ if (matrix_coefficients_ != kValueNotPresent) {
+ size += EbmlElementSize(libwebm::kMkvMatrixCoefficients,
+ static_cast<uint64>(matrix_coefficients_));
+ }
+ if (bits_per_channel_ != kValueNotPresent) {
+ size += EbmlElementSize(libwebm::kMkvBitsPerChannel,
+ static_cast<uint64>(bits_per_channel_));
+ }
+ if (chroma_subsampling_horz_ != kValueNotPresent) {
size += EbmlElementSize(libwebm::kMkvChromaSubsamplingHorz,
- chroma_subsampling_horz);
- if (chroma_subsampling_vert != kValueNotPresent)
+ static_cast<uint64>(chroma_subsampling_horz_));
+ }
+ if (chroma_subsampling_vert_ != kValueNotPresent) {
size += EbmlElementSize(libwebm::kMkvChromaSubsamplingVert,
- chroma_subsampling_vert);
- if (cb_subsampling_horz != kValueNotPresent)
- size +=
- EbmlElementSize(libwebm::kMkvCbSubsamplingHorz, cb_subsampling_horz);
- if (cb_subsampling_vert != kValueNotPresent)
- size +=
- EbmlElementSize(libwebm::kMkvCbSubsamplingVert, cb_subsampling_vert);
- if (chroma_siting_horz != kValueNotPresent)
- size += EbmlElementSize(libwebm::kMkvChromaSitingHorz, chroma_siting_horz);
- if (chroma_siting_vert != kValueNotPresent)
- size += EbmlElementSize(libwebm::kMkvChromaSitingVert, chroma_siting_vert);
- if (range != kValueNotPresent)
- size += EbmlElementSize(libwebm::kMkvRange, range);
- if (transfer_characteristics != kValueNotPresent)
+ static_cast<uint64>(chroma_subsampling_vert_));
+ }
+ if (cb_subsampling_horz_ != kValueNotPresent) {
+ size += EbmlElementSize(libwebm::kMkvCbSubsamplingHorz,
+ static_cast<uint64>(cb_subsampling_horz_));
+ }
+ if (cb_subsampling_vert_ != kValueNotPresent) {
+ size += EbmlElementSize(libwebm::kMkvCbSubsamplingVert,
+ static_cast<uint64>(cb_subsampling_vert_));
+ }
+ if (chroma_siting_horz_ != kValueNotPresent) {
+ size += EbmlElementSize(libwebm::kMkvChromaSitingHorz,
+ static_cast<uint64>(chroma_siting_horz_));
+ }
+ if (chroma_siting_vert_ != kValueNotPresent) {
+ size += EbmlElementSize(libwebm::kMkvChromaSitingVert,
+ static_cast<uint64>(chroma_siting_vert_));
+ }
+ if (range_ != kValueNotPresent) {
+ size += EbmlElementSize(libwebm::kMkvRange, static_cast<uint64>(range_));
+ }
+ if (transfer_characteristics_ != kValueNotPresent) {
size += EbmlElementSize(libwebm::kMkvTransferCharacteristics,
- transfer_characteristics);
- if (primaries != kValueNotPresent)
- size += EbmlElementSize(libwebm::kMkvPrimaries, primaries);
- if (max_cll != kValueNotPresent)
- size += EbmlElementSize(libwebm::kMkvMaxCLL, max_cll);
- if (max_fall != kValueNotPresent)
- size += EbmlElementSize(libwebm::kMkvMaxFALL, max_fall);
+ static_cast<uint64>(transfer_characteristics_));
+ }
+ if (primaries_ != kValueNotPresent) {
+ size += EbmlElementSize(libwebm::kMkvPrimaries,
+ static_cast<uint64>(primaries_));
+ }
+ if (max_cll_ != kValueNotPresent) {
+ size += EbmlElementSize(libwebm::kMkvMaxCLL, static_cast<uint64>(max_cll_));
+ }
+ if (max_fall_ != kValueNotPresent) {
+ size +=
+ EbmlElementSize(libwebm::kMkvMaxFALL, static_cast<uint64>(max_fall_));
+ }
if (mastering_metadata_)
size += mastering_metadata_->MasteringMetadataSize();
@@ -1161,12 +1313,103 @@ uint64_t Colour::PayloadSize() const {
///////////////////////////////////////////////////////////////
//
+// Projection element
+
+uint64_t Projection::ProjectionSize() const {
+ uint64_t size = PayloadSize();
+
+ if (size > 0)
+ size += EbmlMasterElementSize(libwebm::kMkvProjection, size);
+
+ return size;
+}
+
+bool Projection::Write(IMkvWriter* writer) const {
+ const uint64_t size = PayloadSize();
+
+ // Don't write an empty element.
+ if (size == 0)
+ return true;
+
+ if (!WriteEbmlMasterElement(writer, libwebm::kMkvProjection, size))
+ return false;
+
+ if (!WriteEbmlElement(writer, libwebm::kMkvProjectionType,
+ static_cast<uint64>(type_))) {
+ return false;
+ }
+
+ if (private_data_length_ > 0 && private_data_ != NULL &&
+ !WriteEbmlElement(writer, libwebm::kMkvProjectionPrivate, private_data_,
+ private_data_length_)) {
+ return false;
+ }
+
+ if (!WriteEbmlElement(writer, libwebm::kMkvProjectionPoseYaw, pose_yaw_))
+ return false;
+
+ if (!WriteEbmlElement(writer, libwebm::kMkvProjectionPosePitch,
+ pose_pitch_)) {
+ return false;
+ }
+
+ if (!WriteEbmlElement(writer, libwebm::kMkvProjectionPoseRoll, pose_roll_)) {
+ return false;
+ }
+
+ return true;
+}
+
+bool Projection::SetProjectionPrivate(const uint8_t* data,
+ uint64_t data_length) {
+ if (data == NULL || data_length == 0) {
+ return false;
+ }
+
+ if (data_length != static_cast<size_t>(data_length)) {
+ return false;
+ }
+
+ uint8_t* new_private_data =
+ new (std::nothrow) uint8_t[static_cast<size_t>(data_length)];
+ if (new_private_data == NULL) {
+ return false;
+ }
+
+ delete[] private_data_;
+ private_data_ = new_private_data;
+ private_data_length_ = data_length;
+ memcpy(private_data_, data, static_cast<size_t>(data_length));
+
+ return true;
+}
+
+uint64_t Projection::PayloadSize() const {
+ uint64_t size =
+ EbmlElementSize(libwebm::kMkvProjection, static_cast<uint64>(type_));
+
+ if (private_data_length_ > 0 && private_data_ != NULL) {
+ size += EbmlElementSize(libwebm::kMkvProjectionPrivate, private_data_,
+ private_data_length_);
+ }
+
+ size += EbmlElementSize(libwebm::kMkvProjectionPoseYaw, pose_yaw_);
+ size += EbmlElementSize(libwebm::kMkvProjectionPosePitch, pose_pitch_);
+ size += EbmlElementSize(libwebm::kMkvProjectionPoseRoll, pose_roll_);
+
+ return size;
+}
+
+///////////////////////////////////////////////////////////////
+//
// VideoTrack Class
VideoTrack::VideoTrack(unsigned int* seed)
: Track(seed),
display_height_(0),
display_width_(0),
+ pixel_height_(0),
+ pixel_width_(0),
crop_left_(0),
crop_right_(0),
crop_top_(0),
@@ -1176,9 +1419,13 @@ VideoTrack::VideoTrack(unsigned int* seed)
stereo_mode_(0),
alpha_mode_(0),
width_(0),
- colour_(NULL) {}
+ colour_(NULL),
+ projection_(NULL) {}
-VideoTrack::~VideoTrack() { delete colour_; }
+VideoTrack::~VideoTrack() {
+ delete colour_;
+ delete projection_;
+}
bool VideoTrack::SetStereoMode(uint64_t stereo_mode) {
if (stereo_mode != kMono && stereo_mode != kSideBySideLeftIsFirst &&
@@ -1221,40 +1468,52 @@ bool VideoTrack::Write(IMkvWriter* writer) const {
if (payload_position < 0)
return false;
- if (!WriteEbmlElement(writer, libwebm::kMkvPixelWidth, width_))
+ if (!WriteEbmlElement(
+ writer, libwebm::kMkvPixelWidth,
+ static_cast<uint64>((pixel_width_ > 0) ? pixel_width_ : width_)))
return false;
- if (!WriteEbmlElement(writer, libwebm::kMkvPixelHeight, height_))
+ if (!WriteEbmlElement(
+ writer, libwebm::kMkvPixelHeight,
+ static_cast<uint64>((pixel_height_ > 0) ? pixel_height_ : height_)))
return false;
if (display_width_ > 0) {
- if (!WriteEbmlElement(writer, libwebm::kMkvDisplayWidth, display_width_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvDisplayWidth,
+ static_cast<uint64>(display_width_)))
return false;
}
if (display_height_ > 0) {
- if (!WriteEbmlElement(writer, libwebm::kMkvDisplayHeight, display_height_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvDisplayHeight,
+ static_cast<uint64>(display_height_)))
return false;
}
if (crop_left_ > 0) {
- if (!WriteEbmlElement(writer, libwebm::kMkvPixelCropLeft, crop_left_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvPixelCropLeft,
+ static_cast<uint64>(crop_left_)))
return false;
}
if (crop_right_ > 0) {
- if (!WriteEbmlElement(writer, libwebm::kMkvPixelCropRight, crop_right_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvPixelCropRight,
+ static_cast<uint64>(crop_right_)))
return false;
}
if (crop_top_ > 0) {
- if (!WriteEbmlElement(writer, libwebm::kMkvPixelCropTop, crop_top_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvPixelCropTop,
+ static_cast<uint64>(crop_top_)))
return false;
}
if (crop_bottom_ > 0) {
- if (!WriteEbmlElement(writer, libwebm::kMkvPixelCropBottom, crop_bottom_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvPixelCropBottom,
+ static_cast<uint64>(crop_bottom_)))
return false;
}
if (stereo_mode_ > kMono) {
- if (!WriteEbmlElement(writer, libwebm::kMkvStereoMode, stereo_mode_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvStereoMode,
+ static_cast<uint64>(stereo_mode_)))
return false;
}
if (alpha_mode_ > kNoAlpha) {
- if (!WriteEbmlElement(writer, libwebm::kMkvAlphaMode, alpha_mode_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvAlphaMode,
+ static_cast<uint64>(alpha_mode_)))
return false;
}
if (frame_rate_ > 0.0) {
@@ -1267,6 +1526,10 @@ bool VideoTrack::Write(IMkvWriter* writer) const {
if (!colour_->Write(writer))
return false;
}
+ if (projection_) {
+ if (!projection_->Write(writer))
+ return false;
+ }
const int64_t stop_position = writer->Position();
if (stop_position < 0 ||
@@ -1287,47 +1550,83 @@ bool VideoTrack::SetColour(const Colour& colour) {
return false;
}
- colour_ptr->matrix_coefficients = colour.matrix_coefficients;
- colour_ptr->bits_per_channel = colour.bits_per_channel;
- colour_ptr->chroma_subsampling_horz = colour.chroma_subsampling_horz;
- colour_ptr->chroma_subsampling_vert = colour.chroma_subsampling_vert;
- colour_ptr->cb_subsampling_horz = colour.cb_subsampling_horz;
- colour_ptr->cb_subsampling_vert = colour.cb_subsampling_vert;
- colour_ptr->chroma_siting_horz = colour.chroma_siting_horz;
- colour_ptr->chroma_siting_vert = colour.chroma_siting_vert;
- colour_ptr->range = colour.range;
- colour_ptr->transfer_characteristics = colour.transfer_characteristics;
- colour_ptr->primaries = colour.primaries;
- colour_ptr->max_cll = colour.max_cll;
- colour_ptr->max_fall = colour.max_fall;
+ colour_ptr->set_matrix_coefficients(colour.matrix_coefficients());
+ colour_ptr->set_bits_per_channel(colour.bits_per_channel());
+ colour_ptr->set_chroma_subsampling_horz(colour.chroma_subsampling_horz());
+ colour_ptr->set_chroma_subsampling_vert(colour.chroma_subsampling_vert());
+ colour_ptr->set_cb_subsampling_horz(colour.cb_subsampling_horz());
+ colour_ptr->set_cb_subsampling_vert(colour.cb_subsampling_vert());
+ colour_ptr->set_chroma_siting_horz(colour.chroma_siting_horz());
+ colour_ptr->set_chroma_siting_vert(colour.chroma_siting_vert());
+ colour_ptr->set_range(colour.range());
+ colour_ptr->set_transfer_characteristics(colour.transfer_characteristics());
+ colour_ptr->set_primaries(colour.primaries());
+ colour_ptr->set_max_cll(colour.max_cll());
+ colour_ptr->set_max_fall(colour.max_fall());
+ delete colour_;
colour_ = colour_ptr.release();
return true;
}
+bool VideoTrack::SetProjection(const Projection& projection) {
+ std::auto_ptr<Projection> projection_ptr(new Projection());
+ if (!projection_ptr.get())
+ return false;
+
+ if (projection.private_data()) {
+ if (!projection_ptr->SetProjectionPrivate(
+ projection.private_data(), projection.private_data_length())) {
+ return false;
+ }
+ }
+
+ projection_ptr->set_type(projection.type());
+ projection_ptr->set_pose_yaw(projection.pose_yaw());
+ projection_ptr->set_pose_pitch(projection.pose_pitch());
+ projection_ptr->set_pose_roll(projection.pose_roll());
+ delete projection_;
+ projection_ = projection_ptr.release();
+ return true;
+}
+
uint64_t VideoTrack::VideoPayloadSize() const {
- uint64_t size = EbmlElementSize(libwebm::kMkvPixelWidth, width_);
- size += EbmlElementSize(libwebm::kMkvPixelHeight, height_);
+ uint64_t size = EbmlElementSize(
+ libwebm::kMkvPixelWidth,
+ static_cast<uint64>((pixel_width_ > 0) ? pixel_width_ : width_));
+ size += EbmlElementSize(
+ libwebm::kMkvPixelHeight,
+ static_cast<uint64>((pixel_height_ > 0) ? pixel_height_ : height_));
if (display_width_ > 0)
- size += EbmlElementSize(libwebm::kMkvDisplayWidth, display_width_);
+ size += EbmlElementSize(libwebm::kMkvDisplayWidth,
+ static_cast<uint64>(display_width_));
if (display_height_ > 0)
- size += EbmlElementSize(libwebm::kMkvDisplayHeight, display_height_);
+ size += EbmlElementSize(libwebm::kMkvDisplayHeight,
+ static_cast<uint64>(display_height_));
if (crop_left_ > 0)
- size += EbmlElementSize(libwebm::kMkvPixelCropLeft, crop_left_);
+ size += EbmlElementSize(libwebm::kMkvPixelCropLeft,
+ static_cast<uint64>(crop_left_));
if (crop_right_ > 0)
- size += EbmlElementSize(libwebm::kMkvPixelCropRight, crop_right_);
+ size += EbmlElementSize(libwebm::kMkvPixelCropRight,
+ static_cast<uint64>(crop_right_));
if (crop_top_ > 0)
- size += EbmlElementSize(libwebm::kMkvPixelCropTop, crop_top_);
+ size += EbmlElementSize(libwebm::kMkvPixelCropTop,
+ static_cast<uint64>(crop_top_));
if (crop_bottom_ > 0)
- size += EbmlElementSize(libwebm::kMkvPixelCropBottom, crop_bottom_);
+ size += EbmlElementSize(libwebm::kMkvPixelCropBottom,
+ static_cast<uint64>(crop_bottom_));
if (stereo_mode_ > kMono)
- size += EbmlElementSize(libwebm::kMkvStereoMode, stereo_mode_);
+ size += EbmlElementSize(libwebm::kMkvStereoMode,
+ static_cast<uint64>(stereo_mode_));
if (alpha_mode_ > kNoAlpha)
- size += EbmlElementSize(libwebm::kMkvAlphaMode, alpha_mode_);
+ size += EbmlElementSize(libwebm::kMkvAlphaMode,
+ static_cast<uint64>(alpha_mode_));
if (frame_rate_ > 0.0)
size += EbmlElementSize(libwebm::kMkvFrameRate,
static_cast<float>(frame_rate_));
if (colour_)
size += colour_->ColourSize();
+ if (projection_)
+ size += projection_->ProjectionSize();
return size;
}
@@ -1346,9 +1645,11 @@ uint64_t AudioTrack::PayloadSize() const {
uint64_t size = EbmlElementSize(libwebm::kMkvSamplingFrequency,
static_cast<float>(sample_rate_));
- size += EbmlElementSize(libwebm::kMkvChannels, channels_);
+ size +=
+ EbmlElementSize(libwebm::kMkvChannels, static_cast<uint64>(channels_));
if (bit_depth_ > 0)
- size += EbmlElementSize(libwebm::kMkvBitDepth, bit_depth_);
+ size +=
+ EbmlElementSize(libwebm::kMkvBitDepth, static_cast<uint64>(bit_depth_));
size += EbmlMasterElementSize(libwebm::kMkvAudio, size);
return parent_size + size;
@@ -1361,9 +1662,11 @@ bool AudioTrack::Write(IMkvWriter* writer) const {
// Calculate AudioSettings size.
uint64_t size = EbmlElementSize(libwebm::kMkvSamplingFrequency,
static_cast<float>(sample_rate_));
- size += EbmlElementSize(libwebm::kMkvChannels, channels_);
+ size +=
+ EbmlElementSize(libwebm::kMkvChannels, static_cast<uint64>(channels_));
if (bit_depth_ > 0)
- size += EbmlElementSize(libwebm::kMkvBitDepth, bit_depth_);
+ size +=
+ EbmlElementSize(libwebm::kMkvBitDepth, static_cast<uint64>(bit_depth_));
if (!WriteEbmlMasterElement(writer, libwebm::kMkvAudio, size))
return false;
@@ -1375,10 +1678,12 @@ bool AudioTrack::Write(IMkvWriter* writer) const {
if (!WriteEbmlElement(writer, libwebm::kMkvSamplingFrequency,
static_cast<float>(sample_rate_)))
return false;
- if (!WriteEbmlElement(writer, libwebm::kMkvChannels, channels_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvChannels,
+ static_cast<uint64>(channels_)))
return false;
if (bit_depth_ > 0)
- if (!WriteEbmlElement(writer, libwebm::kMkvBitDepth, bit_depth_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvBitDepth,
+ static_cast<uint64>(bit_depth_)))
return false;
const int64_t stop_position = writer->Position();
@@ -1398,6 +1703,10 @@ const char Tracks::kVorbisCodecId[] = "A_VORBIS";
const char Tracks::kVp8CodecId[] = "V_VP8";
const char Tracks::kVp9CodecId[] = "V_VP9";
const char Tracks::kVp10CodecId[] = "V_VP10";
+const char Tracks::kWebVttCaptionsId[] = "D_WEBVTT/CAPTIONS";
+const char Tracks::kWebVttDescriptionsId[] = "D_WEBVTT/DESCRIPTIONS";
+const char Tracks::kWebVttMetadataId[] = "D_WEBVTT/METADATA";
+const char Tracks::kWebVttSubtitlesId[] = "D_WEBVTT/SUBTITLES";
Tracks::Tracks()
: track_entries_(NULL), track_entries_size_(0), wrote_tracks_(false) {}
@@ -1650,9 +1959,11 @@ bool Chapter::ExpandDisplaysArray() {
uint64_t Chapter::WriteAtom(IMkvWriter* writer) const {
uint64_t payload_size =
EbmlElementSize(libwebm::kMkvChapterStringUID, id_) +
- EbmlElementSize(libwebm::kMkvChapterUID, uid_) +
- EbmlElementSize(libwebm::kMkvChapterTimeStart, start_timecode_) +
- EbmlElementSize(libwebm::kMkvChapterTimeEnd, end_timecode_);
+ EbmlElementSize(libwebm::kMkvChapterUID, static_cast<uint64>(uid_)) +
+ EbmlElementSize(libwebm::kMkvChapterTimeStart,
+ static_cast<uint64>(start_timecode_)) +
+ EbmlElementSize(libwebm::kMkvChapterTimeEnd,
+ static_cast<uint64>(end_timecode_));
for (int idx = 0; idx < displays_count_; ++idx) {
const Display& d = displays_[idx];
@@ -1674,13 +1985,16 @@ uint64_t Chapter::WriteAtom(IMkvWriter* writer) const {
if (!WriteEbmlElement(writer, libwebm::kMkvChapterStringUID, id_))
return 0;
- if (!WriteEbmlElement(writer, libwebm::kMkvChapterUID, uid_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvChapterUID,
+ static_cast<uint64>(uid_)))
return 0;
- if (!WriteEbmlElement(writer, libwebm::kMkvChapterTimeStart, start_timecode_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvChapterTimeStart,
+ static_cast<uint64>(start_timecode_)))
return 0;
- if (!WriteEbmlElement(writer, libwebm::kMkvChapterTimeEnd, end_timecode_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvChapterTimeEnd,
+ static_cast<uint64>(end_timecode_)))
return 0;
for (int idx = 0; idx < displays_count_; ++idx) {
@@ -2125,7 +2439,17 @@ Cluster::Cluster(uint64_t timecode, int64_t cues_pos, uint64_t timecode_scale,
write_last_frame_with_duration_(write_last_frame_with_duration),
writer_(NULL) {}
-Cluster::~Cluster() {}
+Cluster::~Cluster() {
+ // Delete any stored frames that are left behind. This will happen if the
+ // Cluster was not Finalized for whatever reason.
+ while (!stored_frames_.empty()) {
+ while (!stored_frames_.begin()->second.empty()) {
+ delete stored_frames_.begin()->second.front();
+ stored_frames_.begin()->second.pop_front();
+ }
+ stored_frames_.erase(stored_frames_.begin()->first);
+ }
+}
bool Cluster::Init(IMkvWriter* ptr_writer) {
if (!ptr_writer) {
@@ -2421,10 +2745,10 @@ bool SeekHead::Finalize(IMkvWriter* writer) const {
for (int32_t i = 0; i < kSeekEntryCount; ++i) {
if (seek_entry_id_[i] != 0) {
- entry_size[i] = EbmlElementSize(
- libwebm::kMkvSeekID, static_cast<uint64_t>(seek_entry_id_[i]));
- entry_size[i] +=
- EbmlElementSize(libwebm::kMkvSeekPosition, seek_entry_pos_[i]);
+ entry_size[i] = EbmlElementSize(libwebm::kMkvSeekID,
+ static_cast<uint64>(seek_entry_id_[i]));
+ entry_size[i] += EbmlElementSize(
+ libwebm::kMkvSeekPosition, static_cast<uint64>(seek_entry_pos_[i]));
payload_size +=
EbmlMasterElementSize(libwebm::kMkvSeek, entry_size[i]) +
@@ -2449,11 +2773,11 @@ bool SeekHead::Finalize(IMkvWriter* writer) const {
return false;
if (!WriteEbmlElement(writer, libwebm::kMkvSeekID,
- static_cast<uint64_t>(seek_entry_id_[i])))
+ static_cast<uint64>(seek_entry_id_[i])))
return false;
if (!WriteEbmlElement(writer, libwebm::kMkvSeekPosition,
- seek_entry_pos_[i]))
+ static_cast<uint64>(seek_entry_pos_[i])))
return false;
}
}
@@ -2522,8 +2846,10 @@ bool SeekHead::SetSeekEntry(int index, uint32_t id, uint64_t position) {
uint64_t SeekHead::MaxEntrySize() const {
const uint64_t max_entry_payload_size =
- EbmlElementSize(libwebm::kMkvSeekID, UINT64_C(0xffffffff)) +
- EbmlElementSize(libwebm::kMkvSeekPosition, UINT64_C(0xffffffffffffffff));
+ EbmlElementSize(libwebm::kMkvSeekID,
+ static_cast<uint64>(UINT64_C(0xffffffff))) +
+ EbmlElementSize(libwebm::kMkvSeekPosition,
+ static_cast<uint64>(UINT64_C(0xffffffffffffffff)));
const uint64_t max_entry_size =
EbmlMasterElementSize(libwebm::kMkvSeek, max_entry_payload_size) +
max_entry_payload_size;
@@ -2613,7 +2939,8 @@ bool SegmentInfo::Write(IMkvWriter* writer) {
if (!writer || !muxing_app_ || !writing_app_)
return false;
- uint64_t size = EbmlElementSize(libwebm::kMkvTimecodeScale, timecode_scale_);
+ uint64_t size = EbmlElementSize(libwebm::kMkvTimecodeScale,
+ static_cast<uint64>(timecode_scale_));
if (duration_ > 0.0)
size +=
EbmlElementSize(libwebm::kMkvDuration, static_cast<float>(duration_));
@@ -2629,7 +2956,8 @@ bool SegmentInfo::Write(IMkvWriter* writer) {
if (payload_position < 0)
return false;
- if (!WriteEbmlElement(writer, libwebm::kMkvTimecodeScale, timecode_scale_))
+ if (!WriteEbmlElement(writer, libwebm::kMkvTimecodeScale,
+ static_cast<uint64>(timecode_scale_)))
return false;
if (duration_ > 0.0) {
@@ -2725,10 +3053,12 @@ Segment::Segment()
output_cues_(true),
accurate_cluster_duration_(false),
fixed_size_cluster_timecode_(false),
+ estimate_file_duration_(true),
payload_pos_(0),
size_position_(0),
doc_type_version_(kDefaultDocTypeVersion),
doc_type_version_written_(0),
+ duration_(0.0),
writer_cluster_(NULL),
writer_cues_(NULL),
writer_header_(NULL) {
@@ -2833,6 +3163,10 @@ bool Segment::Init(IMkvWriter* ptr_writer) {
writer_cluster_ = ptr_writer;
writer_cues_ = ptr_writer;
writer_header_ = ptr_writer;
+ memset(&track_frames_written_, 0,
+ sizeof(track_frames_written_[0]) * kMaxTrackNumber);
+ memset(&last_track_timestamp_, 0,
+ sizeof(last_track_timestamp_[0]) * kMaxTrackNumber);
return segment_info_.Init();
}
@@ -2876,7 +3210,10 @@ bool Segment::Finalize() {
if (WriteFramesAll() < 0)
return false;
- if (cluster_list_size_ > 0) {
+ // In kLive mode, call Cluster::Finalize only if |accurate_cluster_duration_|
+ // is set. In all other modes, always call Cluster::Finalize.
+ if ((mode_ == kLive ? accurate_cluster_duration_ : true) &&
+ cluster_list_size_ > 0) {
// Update last cluster's size
Cluster* const old_cluster = cluster_list_[cluster_list_size_ - 1];
@@ -2892,9 +3229,30 @@ bool Segment::Finalize() {
chunk_count_++;
}
- const double duration =
+ double duration =
(static_cast<double>(last_timestamp_) + last_block_duration_) /
segment_info_.timecode_scale();
+ if (duration_ > 0.0) {
+ duration = duration_;
+ } else {
+ if (last_block_duration_ == 0 && estimate_file_duration_) {
+ const int num_tracks = static_cast<int>(tracks_.track_entries_size());
+ for (int i = 0; i < num_tracks; ++i) {
+ if (track_frames_written_[i] < 2)
+ continue;
+
+ // Estimate the duration for the last block of a Track.
+ const double nano_per_frame =
+ static_cast<double>(last_track_timestamp_[i]) /
+ (track_frames_written_[i] - 1);
+ const double track_duration =
+ (last_track_timestamp_[i] + nano_per_frame) /
+ segment_info_.timecode_scale();
+ if (track_duration > duration)
+ duration = track_duration;
+ }
+ }
+ }
segment_info_.set_duration(duration);
if (!segment_info_.Finalize(writer_header_))
return false;
@@ -2941,7 +3299,9 @@ bool Segment::Finalize() {
if (writer_header_->Position(0))
return false;
- if (!WriteEbmlHeader(writer_header_, doc_type_version_))
+ const char* const doc_type =
+ DocTypeIsWebm() ? kDocTypeWebm : kDocTypeMatroska;
+ if (!WriteEbmlHeader(writer_header_, doc_type_version_, doc_type))
return false;
if (writer_header_->Position() != ebml_header_size_)
return false;
@@ -3138,7 +3498,10 @@ bool Segment::AddGenericFrame(const Frame* frame) {
Frame* const new_frame = new (std::nothrow) Frame();
if (!new_frame || !new_frame->CopyFrom(*frame))
return false;
- return QueueFrame(new_frame);
+ if (!QueueFrame(new_frame))
+ return false;
+ track_frames_written_[frame->track_number() - 1]++;
+ return true;
}
if (!DoNewClusterProcessing(frame->track_number(), frame->timestamp(),
@@ -3178,10 +3541,10 @@ bool Segment::AddGenericFrame(const Frame* frame) {
last_timestamp_ = frame->timestamp();
last_track_timestamp_[frame->track_number() - 1] = frame->timestamp();
last_block_duration_ = frame->duration();
+ track_frames_written_[frame->track_number() - 1]++;
if (frame_created)
delete frame;
-
return true;
}
@@ -3292,8 +3655,9 @@ Track* Segment::GetTrackByNumber(uint64_t track_number) const {
bool Segment::WriteSegmentHeader() {
UpdateDocTypeVersion();
- // TODO(fgalligan): Support more than one segment.
- if (!WriteEbmlHeader(writer_header_, doc_type_version_))
+ const char* const doc_type =
+ DocTypeIsWebm() ? kDocTypeWebm : kDocTypeMatroska;
+ if (!WriteEbmlHeader(writer_header_, doc_type_version_, doc_type))
return false;
doc_type_version_written_ = doc_type_version_;
ebml_header_size_ = static_cast<int32_t>(writer_header_->Position());
@@ -3766,4 +4130,35 @@ bool Segment::WriteFramesLessThan(uint64_t timestamp) {
return true;
}
+bool Segment::DocTypeIsWebm() const {
+ const int kNumCodecIds = 9;
+
+ // TODO(vigneshv): Tweak .clang-format.
+ const char* kWebmCodecIds[kNumCodecIds] = {
+ Tracks::kOpusCodecId, Tracks::kVorbisCodecId,
+ Tracks::kVp8CodecId, Tracks::kVp9CodecId,
+ Tracks::kVp10CodecId, Tracks::kWebVttCaptionsId,
+ Tracks::kWebVttDescriptionsId, Tracks::kWebVttMetadataId,
+ Tracks::kWebVttSubtitlesId};
+
+ const int num_tracks = static_cast<int>(tracks_.track_entries_size());
+ for (int track_index = 0; track_index < num_tracks; ++track_index) {
+ const Track* const track = tracks_.GetTrackByIndex(track_index);
+ const std::string codec_id = track->codec_id();
+
+ bool id_is_webm = false;
+ for (int id_index = 0; id_index < kNumCodecIds; ++id_index) {
+ if (codec_id == kWebmCodecIds[id_index]) {
+ id_is_webm = true;
+ break;
+ }
+ }
+
+ if (!id_is_webm)
+ return false;
+ }
+
+ return true;
+}
+
} // namespace mkvmuxer
diff --git a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.h b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.h
index 55ba07196df..46b0029dc47 100644
--- a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.h
+++ b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvmuxer/mkvmuxer.h
@@ -64,6 +64,12 @@ class IMkvWriter {
LIBWEBM_DISALLOW_COPY_AND_ASSIGN(IMkvWriter);
};
+// Writes out the EBML header for a WebM file, but allows caller to specify
+// DocType. This function must be called before any other libwebm writing
+// functions are called.
+bool WriteEbmlHeader(IMkvWriter* writer, uint64_t doc_type_version,
+ const char* const doc_type);
+
// Writes out the EBML header for a WebM file. This function must be called
// before any other libwebm writing functions are called.
bool WriteEbmlHeader(IMkvWriter* writer, uint64_t doc_type_version);
@@ -348,26 +354,42 @@ class ContentEncoding {
///////////////////////////////////////////////////////////////
// Colour element.
-struct PrimaryChromaticity {
- PrimaryChromaticity(float x_val, float y_val) : x(x_val), y(y_val) {}
- PrimaryChromaticity() : x(0), y(0) {}
+class PrimaryChromaticity {
+ public:
+ static const float kChromaticityMin;
+ static const float kChromaticityMax;
+
+ PrimaryChromaticity(float x_val, float y_val) : x_(x_val), y_(y_val) {}
+ PrimaryChromaticity() : x_(0), y_(0) {}
~PrimaryChromaticity() {}
- uint64_t PrimaryChromaticityPayloadSize(libwebm::MkvId x_id,
- libwebm::MkvId y_id) const;
+
+ // Returns sum of |x_id| and |y_id| element id sizes and payload sizes.
+ uint64_t PrimaryChromaticitySize(libwebm::MkvId x_id,
+ libwebm::MkvId y_id) const;
+ bool Valid() const;
bool Write(IMkvWriter* writer, libwebm::MkvId x_id,
libwebm::MkvId y_id) const;
- float x;
- float y;
+ float x() const { return x_; }
+ void set_x(float new_x) { x_ = new_x; }
+ float y() const { return y_; }
+ void set_y(float new_y) { y_ = new_y; }
+
+ private:
+ float x_;
+ float y_;
};
class MasteringMetadata {
public:
static const float kValueNotPresent;
+ static const float kMinLuminance;
+ static const float kMinLuminanceMax;
+ static const float kMaxLuminanceMax;
MasteringMetadata()
- : luminance_max(kValueNotPresent),
- luminance_min(kValueNotPresent),
+ : luminance_max_(kValueNotPresent),
+ luminance_min_(kValueNotPresent),
r_(NULL),
g_(NULL),
b_(NULL),
@@ -381,6 +403,7 @@ class MasteringMetadata {
// Returns total size of the MasteringMetadata element.
uint64_t MasteringMetadataSize() const;
+ bool Valid() const;
bool Write(IMkvWriter* writer) const;
// Copies non-null chromaticity.
@@ -393,13 +416,21 @@ class MasteringMetadata {
const PrimaryChromaticity* b() const { return b_; }
const PrimaryChromaticity* white_point() const { return white_point_; }
- float luminance_max;
- float luminance_min;
+ float luminance_max() const { return luminance_max_; }
+ void set_luminance_max(float luminance_max) {
+ luminance_max_ = luminance_max;
+ }
+ float luminance_min() const { return luminance_min_; }
+ void set_luminance_min(float luminance_min) {
+ luminance_min_ = luminance_min;
+ }
private:
// Returns size of MasteringMetadata child elements.
uint64_t PayloadSize() const;
+ float luminance_max_;
+ float luminance_min_;
PrimaryChromaticity* r_;
PrimaryChromaticity* g_;
PrimaryChromaticity* b_;
@@ -408,26 +439,90 @@ class MasteringMetadata {
class Colour {
public:
+ enum MatrixCoefficients {
+ kGbr = 0,
+ kBt709 = 1,
+ kUnspecifiedMc = 2,
+ kReserved = 3,
+ kFcc = 4,
+ kBt470bg = 5,
+ kSmpte170MMc = 6,
+ kSmpte240MMc = 7,
+ kYcocg = 8,
+ kBt2020NonConstantLuminance = 9,
+ kBt2020ConstantLuminance = 10,
+ };
+ enum ChromaSitingHorz {
+ kUnspecifiedCsh = 0,
+ kLeftCollocated = 1,
+ kHalfCsh = 2,
+ };
+ enum ChromaSitingVert {
+ kUnspecifiedCsv = 0,
+ kTopCollocated = 1,
+ kHalfCsv = 2,
+ };
+ enum Range {
+ kUnspecifiedCr = 0,
+ kBroadcastRange = 1,
+ kFullRange = 2,
+ kMcTcDefined = 3, // Defined by MatrixCoefficients/TransferCharacteristics.
+ };
+ enum TransferCharacteristics {
+ kIturBt709Tc = 1,
+ kUnspecifiedTc = 2,
+ kReservedTc = 3,
+ kGamma22Curve = 4,
+ kGamma28Curve = 5,
+ kSmpte170MTc = 6,
+ kSmpte240MTc = 7,
+ kLinear = 8,
+ kLog = 9,
+ kLogSqrt = 10,
+ kIec6196624 = 11,
+ kIturBt1361ExtendedColourGamut = 12,
+ kIec6196621 = 13,
+ kIturBt202010bit = 14,
+ kIturBt202012bit = 15,
+ kSmpteSt2084 = 16,
+ kSmpteSt4281Tc = 17,
+ kAribStdB67Hlg = 18,
+ };
+ enum Primaries {
+ kReservedP0 = 0,
+ kIturBt709P = 1,
+ kUnspecifiedP = 2,
+ kReservedP3 = 3,
+ kIturBt470M = 4,
+ kIturBt470Bg = 5,
+ kSmpte170MP = 6,
+ kSmpte240MP = 7,
+ kFilm = 8,
+ kIturBt2020 = 9,
+ kSmpteSt4281P = 10,
+ kJedecP22Phosphors = 22,
+ };
static const uint64_t kValueNotPresent;
Colour()
- : matrix_coefficients(kValueNotPresent),
- bits_per_channel(kValueNotPresent),
- chroma_subsampling_horz(kValueNotPresent),
- chroma_subsampling_vert(kValueNotPresent),
- cb_subsampling_horz(kValueNotPresent),
- cb_subsampling_vert(kValueNotPresent),
- chroma_siting_horz(kValueNotPresent),
- chroma_siting_vert(kValueNotPresent),
- range(kValueNotPresent),
- transfer_characteristics(kValueNotPresent),
- primaries(kValueNotPresent),
- max_cll(kValueNotPresent),
- max_fall(kValueNotPresent),
+ : matrix_coefficients_(kValueNotPresent),
+ bits_per_channel_(kValueNotPresent),
+ chroma_subsampling_horz_(kValueNotPresent),
+ chroma_subsampling_vert_(kValueNotPresent),
+ cb_subsampling_horz_(kValueNotPresent),
+ cb_subsampling_vert_(kValueNotPresent),
+ chroma_siting_horz_(kValueNotPresent),
+ chroma_siting_vert_(kValueNotPresent),
+ range_(kValueNotPresent),
+ transfer_characteristics_(kValueNotPresent),
+ primaries_(kValueNotPresent),
+ max_cll_(kValueNotPresent),
+ max_fall_(kValueNotPresent),
mastering_metadata_(NULL) {}
~Colour() { delete mastering_metadata_; }
// Returns total size of the Colour element.
uint64_t ColourSize() const;
+ bool Valid() const;
bool Write(IMkvWriter* writer) const;
// Deep copies |mastering_metadata|.
@@ -437,28 +532,125 @@ class Colour {
return mastering_metadata_;
}
- uint64_t matrix_coefficients;
- uint64_t bits_per_channel;
- uint64_t chroma_subsampling_horz;
- uint64_t chroma_subsampling_vert;
- uint64_t cb_subsampling_horz;
- uint64_t cb_subsampling_vert;
- uint64_t chroma_siting_horz;
- uint64_t chroma_siting_vert;
- uint64_t range;
- uint64_t transfer_characteristics;
- uint64_t primaries;
- uint64_t max_cll;
- uint64_t max_fall;
+ uint64_t matrix_coefficients() const { return matrix_coefficients_; }
+ void set_matrix_coefficients(uint64_t matrix_coefficients) {
+ matrix_coefficients_ = matrix_coefficients;
+ }
+ uint64_t bits_per_channel() const { return bits_per_channel_; }
+ void set_bits_per_channel(uint64_t bits_per_channel) {
+ bits_per_channel_ = bits_per_channel;
+ }
+ uint64_t chroma_subsampling_horz() const { return chroma_subsampling_horz_; }
+ void set_chroma_subsampling_horz(uint64_t chroma_subsampling_horz) {
+ chroma_subsampling_horz_ = chroma_subsampling_horz;
+ }
+ uint64_t chroma_subsampling_vert() const { return chroma_subsampling_vert_; }
+ void set_chroma_subsampling_vert(uint64_t chroma_subsampling_vert) {
+ chroma_subsampling_vert_ = chroma_subsampling_vert;
+ }
+ uint64_t cb_subsampling_horz() const { return cb_subsampling_horz_; }
+ void set_cb_subsampling_horz(uint64_t cb_subsampling_horz) {
+ cb_subsampling_horz_ = cb_subsampling_horz;
+ }
+ uint64_t cb_subsampling_vert() const { return cb_subsampling_vert_; }
+ void set_cb_subsampling_vert(uint64_t cb_subsampling_vert) {
+ cb_subsampling_vert_ = cb_subsampling_vert;
+ }
+ uint64_t chroma_siting_horz() const { return chroma_siting_horz_; }
+ void set_chroma_siting_horz(uint64_t chroma_siting_horz) {
+ chroma_siting_horz_ = chroma_siting_horz;
+ }
+ uint64_t chroma_siting_vert() const { return chroma_siting_vert_; }
+ void set_chroma_siting_vert(uint64_t chroma_siting_vert) {
+ chroma_siting_vert_ = chroma_siting_vert;
+ }
+ uint64_t range() const { return range_; }
+ void set_range(uint64_t range) { range_ = range; }
+ uint64_t transfer_characteristics() const {
+ return transfer_characteristics_;
+ }
+ void set_transfer_characteristics(uint64_t transfer_characteristics) {
+ transfer_characteristics_ = transfer_characteristics;
+ }
+ uint64_t primaries() const { return primaries_; }
+ void set_primaries(uint64_t primaries) { primaries_ = primaries; }
+ uint64_t max_cll() const { return max_cll_; }
+ void set_max_cll(uint64_t max_cll) { max_cll_ = max_cll; }
+ uint64_t max_fall() const { return max_fall_; }
+ void set_max_fall(uint64_t max_fall) { max_fall_ = max_fall; }
private:
// Returns size of Colour child elements.
uint64_t PayloadSize() const;
+ uint64_t matrix_coefficients_;
+ uint64_t bits_per_channel_;
+ uint64_t chroma_subsampling_horz_;
+ uint64_t chroma_subsampling_vert_;
+ uint64_t cb_subsampling_horz_;
+ uint64_t cb_subsampling_vert_;
+ uint64_t chroma_siting_horz_;
+ uint64_t chroma_siting_vert_;
+ uint64_t range_;
+ uint64_t transfer_characteristics_;
+ uint64_t primaries_;
+ uint64_t max_cll_;
+ uint64_t max_fall_;
+
MasteringMetadata* mastering_metadata_;
};
///////////////////////////////////////////////////////////////
+// Projection element.
+class Projection {
+ public:
+ enum ProjectionType {
+ kTypeNotPresent = -1,
+ kRectangular = 0,
+ kEquirectangular = 1,
+ kCubeMap = 2,
+ kMesh = 3,
+ };
+ static const uint64_t kValueNotPresent;
+ Projection()
+ : type_(kRectangular),
+ pose_yaw_(0.0),
+ pose_pitch_(0.0),
+ pose_roll_(0.0),
+ private_data_(NULL),
+ private_data_length_(0) {}
+ ~Projection() { delete[] private_data_; }
+
+ uint64_t ProjectionSize() const;
+ bool Write(IMkvWriter* writer) const;
+
+ bool SetProjectionPrivate(const uint8_t* private_data,
+ uint64_t private_data_length);
+
+ ProjectionType type() const { return type_; }
+ void set_type(ProjectionType type) { type_ = type; }
+ float pose_yaw() const { return pose_yaw_; }
+ void set_pose_yaw(float pose_yaw) { pose_yaw_ = pose_yaw; }
+ float pose_pitch() const { return pose_pitch_; }
+ void set_pose_pitch(float pose_pitch) { pose_pitch_ = pose_pitch; }
+ float pose_roll() const { return pose_roll_; }
+ void set_pose_roll(float pose_roll) { pose_roll_ = pose_roll; }
+ uint8_t* private_data() const { return private_data_; }
+ uint64_t private_data_length() const { return private_data_length_; }
+
+ private:
+ // Returns size of VideoProjection child elements.
+ uint64_t PayloadSize() const;
+
+ ProjectionType type_;
+ float pose_yaw_;
+ float pose_pitch_;
+ float pose_roll_;
+ uint8_t* private_data_;
+ uint64_t private_data_length_;
+};
+
+///////////////////////////////////////////////////////////////
// Track element.
class Track {
public:
@@ -581,6 +773,10 @@ class VideoTrack : public Track {
uint64_t display_height() const { return display_height_; }
void set_display_width(uint64_t width) { display_width_ = width; }
uint64_t display_width() const { return display_width_; }
+ void set_pixel_height(uint64_t height) { pixel_height_ = height; }
+ uint64_t pixel_height() const { return pixel_height_; }
+ void set_pixel_width(uint64_t width) { pixel_width_ = width; }
+ uint64_t pixel_width() const { return pixel_width_; }
void set_crop_left(uint64_t crop_left) { crop_left_ = crop_left; }
uint64_t crop_left() const { return crop_left_; }
@@ -605,6 +801,11 @@ class VideoTrack : public Track {
// Deep copies |colour|.
bool SetColour(const Colour& colour);
+ Projection* projection() { return projection_; }
+
+ // Deep copies |projection|.
+ bool SetProjection(const Projection& projection);
+
private:
// Returns the size in bytes of the Video element.
uint64_t VideoPayloadSize() const;
@@ -612,6 +813,8 @@ class VideoTrack : public Track {
// Video track element names.
uint64_t display_height_;
uint64_t display_width_;
+ uint64_t pixel_height_;
+ uint64_t pixel_width_;
uint64_t crop_left_;
uint64_t crop_right_;
uint64_t crop_top_;
@@ -623,6 +826,7 @@ class VideoTrack : public Track {
uint64_t width_;
Colour* colour_;
+ Projection* projection_;
LIBWEBM_DISALLOW_COPY_AND_ASSIGN(VideoTrack);
};
@@ -670,6 +874,10 @@ class Tracks {
static const char kVp8CodecId[];
static const char kVp9CodecId[];
static const char kVp10CodecId[];
+ static const char kWebVttCaptionsId[];
+ static const char kWebVttDescriptionsId[];
+ static const char kWebVttMetadataId[];
+ static const char kWebVttSubtitlesId[];
Tracks();
~Tracks();
@@ -1294,8 +1502,8 @@ class Segment {
kBeforeClusters = 0x1 // Position Cues before Clusters
};
- const static uint32_t kDefaultDocTypeVersion = 2;
- const static uint64_t kDefaultMaxClusterDuration = 30000000000ULL;
+ static const uint32_t kDefaultDocTypeVersion = 4;
+ static const uint64_t kDefaultMaxClusterDuration = 30000000000ULL;
Segment();
~Segment();
@@ -1481,7 +1689,16 @@ class Segment {
Mode mode() const { return mode_; }
CuesPosition cues_position() const { return cues_position_; }
bool output_cues() const { return output_cues_; }
+ void set_estimate_file_duration(bool estimate_duration) {
+ estimate_file_duration_ = estimate_duration;
+ }
+ bool estimate_file_duration() const { return estimate_file_duration_; }
const SegmentInfo* segment_info() const { return &segment_info_; }
+ void set_duration(double duration) { duration_ = duration; }
+ double duration() const { return duration_; }
+
+ // Returns true when codec IDs are valid for WebM.
+ bool DocTypeIsWebm() const;
private:
// Checks if header information has been output and initialized. If not it
@@ -1637,6 +1854,9 @@ class Segment {
// Last timestamp in nanoseconds by track number added to a cluster.
uint64_t last_track_timestamp_[kMaxTrackNumber];
+ // Number of frames written per track.
+ uint64_t track_frames_written_[kMaxTrackNumber];
+
// Maximum time in nanoseconds for a cluster duration. This variable is a
// guideline and some clusters may have a longer duration. Default is 30
// seconds.
@@ -1665,6 +1885,9 @@ class Segment {
// Flag whether or not to write the Cluster Timecode using exactly 8 bytes.
bool fixed_size_cluster_timecode_;
+ // Flag whether or not to estimate the file duration.
+ bool estimate_file_duration_;
+
// The size of the EBML header, used to validate the header if
// WriteEbmlHeader() is called more than once.
int32_t ebml_header_size_;
@@ -1682,6 +1905,9 @@ class Segment {
uint32_t doc_type_version_;
uint32_t doc_type_version_written_;
+ // If |duration_| is > 0, then explicitly set the duration of the segment.
+ double duration_;
+
// Pointer to the writer objects. Not owned by this class.
IMkvWriter* writer_cluster_;
IMkvWriter* writer_cues_;
diff --git a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvmuxer/mkvmuxerutil.cc b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvmuxer/mkvmuxerutil.cc
index 3562b8ab828..1ba17ac1ba0 100644
--- a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvmuxer/mkvmuxerutil.cc
+++ b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvmuxer/mkvmuxerutil.cc
@@ -31,20 +31,20 @@ namespace {
// Date elements are always 8 octets in size.
const int kDateElementSize = 8;
-uint64_t WriteBlock(IMkvWriter* writer, const Frame* const frame,
- int64_t timecode, uint64_t timecode_scale) {
- uint64_t block_additional_elem_size = 0;
- uint64_t block_addid_elem_size = 0;
- uint64_t block_more_payload_size = 0;
- uint64_t block_more_elem_size = 0;
- uint64_t block_additions_payload_size = 0;
- uint64_t block_additions_elem_size = 0;
+uint64 WriteBlock(IMkvWriter* writer, const Frame* const frame, int64 timecode,
+ uint64 timecode_scale) {
+ uint64 block_additional_elem_size = 0;
+ uint64 block_addid_elem_size = 0;
+ uint64 block_more_payload_size = 0;
+ uint64 block_more_elem_size = 0;
+ uint64 block_additions_payload_size = 0;
+ uint64 block_additions_elem_size = 0;
if (frame->additional()) {
block_additional_elem_size =
EbmlElementSize(libwebm::kMkvBlockAdditional, frame->additional(),
frame->additional_length());
- block_addid_elem_size =
- EbmlElementSize(libwebm::kMkvBlockAddID, frame->add_id());
+ block_addid_elem_size = EbmlElementSize(
+ libwebm::kMkvBlockAddID, static_cast<uint64>(frame->add_id()));
block_more_payload_size =
block_addid_elem_size + block_additional_elem_size;
@@ -58,32 +58,33 @@ uint64_t WriteBlock(IMkvWriter* writer, const Frame* const frame,
block_additions_payload_size;
}
- uint64_t discard_padding_elem_size = 0;
+ uint64 discard_padding_elem_size = 0;
if (frame->discard_padding() != 0) {
discard_padding_elem_size =
- EbmlElementSize(libwebm::kMkvDiscardPadding, frame->discard_padding());
+ EbmlElementSize(libwebm::kMkvDiscardPadding,
+ static_cast<int64>(frame->discard_padding()));
}
- const uint64_t reference_block_timestamp =
+ const uint64 reference_block_timestamp =
frame->reference_block_timestamp() / timecode_scale;
- uint64_t reference_block_elem_size = 0;
+ uint64 reference_block_elem_size = 0;
if (!frame->is_key()) {
reference_block_elem_size =
EbmlElementSize(libwebm::kMkvReferenceBlock, reference_block_timestamp);
}
- const uint64_t duration = frame->duration() / timecode_scale;
- uint64_t block_duration_elem_size = 0;
+ const uint64 duration = frame->duration() / timecode_scale;
+ uint64 block_duration_elem_size = 0;
if (duration > 0)
block_duration_elem_size =
EbmlElementSize(libwebm::kMkvBlockDuration, duration);
- const uint64_t block_payload_size = 4 + frame->length();
- const uint64_t block_elem_size =
+ const uint64 block_payload_size = 4 + frame->length();
+ const uint64 block_elem_size =
EbmlMasterElementSize(libwebm::kMkvBlock, block_payload_size) +
block_payload_size;
- const uint64_t block_group_payload_size =
+ const uint64 block_group_payload_size =
block_elem_size + block_additions_elem_size + block_duration_elem_size +
discard_padding_elem_size + reference_block_elem_size;
@@ -105,7 +106,7 @@ uint64_t WriteBlock(IMkvWriter* writer, const Frame* const frame,
if (SerializeInt(writer, 0, 1))
return 0;
- if (writer->Write(frame->frame(), static_cast<uint32_t>(frame->length())))
+ if (writer->Write(frame->frame(), static_cast<uint32>(frame->length())))
return 0;
if (frame->additional()) {
@@ -118,7 +119,8 @@ uint64_t WriteBlock(IMkvWriter* writer, const Frame* const frame,
block_more_payload_size))
return 0;
- if (!WriteEbmlElement(writer, libwebm::kMkvBlockAddID, frame->add_id()))
+ if (!WriteEbmlElement(writer, libwebm::kMkvBlockAddID,
+ static_cast<uint64>(frame->add_id())))
return 0;
if (!WriteEbmlElement(writer, libwebm::kMkvBlockAdditional,
@@ -129,7 +131,7 @@ uint64_t WriteBlock(IMkvWriter* writer, const Frame* const frame,
if (frame->discard_padding() != 0 &&
!WriteEbmlElement(writer, libwebm::kMkvDiscardPadding,
- frame->discard_padding())) {
+ static_cast<int64>(frame->discard_padding()))) {
return false;
}
@@ -148,38 +150,38 @@ uint64_t WriteBlock(IMkvWriter* writer, const Frame* const frame,
block_group_payload_size;
}
-uint64_t WriteSimpleBlock(IMkvWriter* writer, const Frame* const frame,
- int64_t timecode) {
+uint64 WriteSimpleBlock(IMkvWriter* writer, const Frame* const frame,
+ int64 timecode) {
if (WriteID(writer, libwebm::kMkvSimpleBlock))
return 0;
- const int32_t size = static_cast<int32_t>(frame->length()) + 4;
+ const int32 size = static_cast<int32>(frame->length()) + 4;
if (WriteUInt(writer, size))
return 0;
- if (WriteUInt(writer, static_cast<uint64_t>(frame->track_number())))
+ if (WriteUInt(writer, static_cast<uint64>(frame->track_number())))
return 0;
if (SerializeInt(writer, timecode, 2))
return 0;
- uint64_t flags = 0;
+ uint64 flags = 0;
if (frame->is_key())
flags |= 0x80;
if (SerializeInt(writer, flags, 1))
return 0;
- if (writer->Write(frame->frame(), static_cast<uint32_t>(frame->length())))
+ if (writer->Write(frame->frame(), static_cast<uint32>(frame->length())))
return 0;
- return static_cast<uint64_t>(GetUIntSize(libwebm::kMkvSimpleBlock) +
- GetCodedUIntSize(size) + 4 + frame->length());
+ return GetUIntSize(libwebm::kMkvSimpleBlock) + GetCodedUIntSize(size) + 4 +
+ frame->length();
}
} // namespace
-int32_t GetCodedUIntSize(uint64_t value) {
+int32 GetCodedUIntSize(uint64 value) {
if (value < 0x000000000000007FULL)
return 1;
else if (value < 0x0000000000003FFFULL)
@@ -197,7 +199,7 @@ int32_t GetCodedUIntSize(uint64_t value) {
return 8;
}
-int32_t GetUIntSize(uint64_t value) {
+int32 GetUIntSize(uint64 value) {
if (value < 0x0000000000000100ULL)
return 1;
else if (value < 0x0000000000010000ULL)
@@ -215,26 +217,26 @@ int32_t GetUIntSize(uint64_t value) {
return 8;
}
-int32_t GetIntSize(int64_t value) {
+int32 GetIntSize(int64 value) {
// Doubling the requested value ensures positive values with their high bit
// set are written with 0-padding to avoid flipping the signedness.
- const uint64_t v = (value < 0) ? value ^ -1LL : value;
+ const uint64 v = (value < 0) ? value ^ -1LL : value;
return GetUIntSize(2 * v);
}
-uint64_t EbmlMasterElementSize(uint64_t type, uint64_t value) {
+uint64 EbmlMasterElementSize(uint64 type, uint64 value) {
// Size of EBML ID
- int32_t ebml_size = GetUIntSize(type);
+ int32 ebml_size = GetUIntSize(type);
// Datasize
ebml_size += GetCodedUIntSize(value);
- return static_cast<uint64_t>(ebml_size);
+ return ebml_size;
}
-uint64_t EbmlElementSize(uint64_t type, int64_t value) {
+uint64 EbmlElementSize(uint64 type, int64 value) {
// Size of EBML ID
- int32_t ebml_size = GetUIntSize(type);
+ int32 ebml_size = GetUIntSize(type);
// Datasize
ebml_size += GetIntSize(value);
@@ -242,20 +244,19 @@ uint64_t EbmlElementSize(uint64_t type, int64_t value) {
// Size of Datasize
ebml_size++;
- return static_cast<uint64_t>(ebml_size);
+ return ebml_size;
}
-uint64_t EbmlElementSize(uint64_t type, uint64_t value) {
+uint64 EbmlElementSize(uint64 type, uint64 value) {
return EbmlElementSize(type, value, 0);
}
-uint64_t EbmlElementSize(uint64_t type, uint64_t value, uint64_t fixed_size) {
+uint64 EbmlElementSize(uint64 type, uint64 value, uint64 fixed_size) {
// Size of EBML ID
- uint64_t ebml_size = static_cast<uint64_t>(GetUIntSize(type));
+ uint64 ebml_size = GetUIntSize(type);
// Datasize
- ebml_size +=
- (fixed_size > 0) ? fixed_size : static_cast<uint64_t>(GetUIntSize(value));
+ ebml_size += (fixed_size > 0) ? fixed_size : GetUIntSize(value);
// Size of Datasize
ebml_size++;
@@ -263,9 +264,9 @@ uint64_t EbmlElementSize(uint64_t type, uint64_t value, uint64_t fixed_size) {
return ebml_size;
}
-uint64_t EbmlElementSize(uint64_t type, float /* value */) {
+uint64 EbmlElementSize(uint64 type, float /* value */) {
// Size of EBML ID
- uint64_t ebml_size = static_cast<uint64_t>(GetUIntSize(type));
+ uint64 ebml_size = GetUIntSize(type);
// Datasize
ebml_size += sizeof(float);
@@ -276,12 +277,12 @@ uint64_t EbmlElementSize(uint64_t type, float /* value */) {
return ebml_size;
}
-uint64_t EbmlElementSize(uint64_t type, const char* value) {
+uint64 EbmlElementSize(uint64 type, const char* value) {
if (!value)
return 0;
// Size of EBML ID
- uint64_t ebml_size = static_cast<uint64_t>(GetUIntSize(type));
+ uint64 ebml_size = GetUIntSize(type);
// Datasize
ebml_size += strlen(value);
@@ -292,12 +293,12 @@ uint64_t EbmlElementSize(uint64_t type, const char* value) {
return ebml_size;
}
-uint64_t EbmlElementSize(uint64_t type, const uint8_t* value, uint64_t size) {
+uint64 EbmlElementSize(uint64 type, const uint8* value, uint64 size) {
if (!value)
return 0;
// Size of EBML ID
- uint64_t ebml_size = static_cast<uint64_t>(GetUIntSize(type));
+ uint64 ebml_size = GetUIntSize(type);
// Datasize
ebml_size += size;
@@ -308,9 +309,9 @@ uint64_t EbmlElementSize(uint64_t type, const uint8_t* value, uint64_t size) {
return ebml_size;
}
-uint64_t EbmlDateElementSize(uint64_t type) {
+uint64 EbmlDateElementSize(uint64 type) {
// Size of EBML ID
- uint64_t ebml_size = static_cast<uint64_t>(GetUIntSize(type));
+ uint64 ebml_size = GetUIntSize(type);
// Datasize
ebml_size += kDateElementSize;
@@ -321,18 +322,18 @@ uint64_t EbmlDateElementSize(uint64_t type) {
return ebml_size;
}
-int32_t SerializeInt(IMkvWriter* writer, int64_t value, int32_t size) {
+int32 SerializeInt(IMkvWriter* writer, int64 value, int32 size) {
if (!writer || size < 1 || size > 8)
return -1;
- for (int32_t i = 1; i <= size; ++i) {
- const int32_t byte_count = size - i;
- const int32_t bit_count = byte_count * 8;
+ for (int32 i = 1; i <= size; ++i) {
+ const int32 byte_count = size - i;
+ const int32 bit_count = byte_count * 8;
- const int64_t bb = value >> bit_count;
- const uint8_t b = static_cast<uint8_t>(bb);
+ const int64 bb = value >> bit_count;
+ const uint8 b = static_cast<uint8>(bb);
- const int32_t status = writer->Write(&b, 1);
+ const int32 status = writer->Write(&b, 1);
if (status < 0)
return status;
@@ -341,26 +342,26 @@ int32_t SerializeInt(IMkvWriter* writer, int64_t value, int32_t size) {
return 0;
}
-int32_t SerializeFloat(IMkvWriter* writer, float f) {
+int32 SerializeFloat(IMkvWriter* writer, float f) {
if (!writer)
return -1;
- assert(sizeof(uint32_t) == sizeof(float));
+ assert(sizeof(uint32) == sizeof(float));
// This union is merely used to avoid a reinterpret_cast from float& to
// uint32& which will result in violation of strict aliasing.
union U32 {
- uint32_t u32;
+ uint32 u32;
float f;
} value;
value.f = f;
- for (int32_t i = 1; i <= 4; ++i) {
- const int32_t byte_count = 4 - i;
- const int32_t bit_count = byte_count * 8;
+ for (int32 i = 1; i <= 4; ++i) {
+ const int32 byte_count = 4 - i;
+ const int32 bit_count = byte_count * 8;
- const uint8_t byte = static_cast<uint8_t>(value.u32 >> bit_count);
+ const uint8 byte = static_cast<uint8>(value.u32 >> bit_count);
- const int32_t status = writer->Write(&byte, 1);
+ const int32 status = writer->Write(&byte, 1);
if (status < 0)
return status;
@@ -369,21 +370,21 @@ int32_t SerializeFloat(IMkvWriter* writer, float f) {
return 0;
}
-int32_t WriteUInt(IMkvWriter* writer, uint64_t value) {
+int32 WriteUInt(IMkvWriter* writer, uint64 value) {
if (!writer)
return -1;
- int32_t size = GetCodedUIntSize(value);
+ int32 size = GetCodedUIntSize(value);
return WriteUIntSize(writer, value, size);
}
-int32_t WriteUIntSize(IMkvWriter* writer, uint64_t value, int32_t size) {
+int32 WriteUIntSize(IMkvWriter* writer, uint64 value, int32 size) {
if (!writer || size < 0 || size > 8)
return -1;
if (size > 0) {
- const uint64_t bit = 1LL << (size * 7);
+ const uint64 bit = 1LL << (size * 7);
if (value > (bit - 2))
return -1;
@@ -391,11 +392,11 @@ int32_t WriteUIntSize(IMkvWriter* writer, uint64_t value, int32_t size) {
value |= bit;
} else {
size = 1;
- int64_t bit;
+ int64 bit;
for (;;) {
bit = 1LL << (size * 7);
- const uint64_t max = bit - 2;
+ const uint64 max = bit - 2;
if (value <= max)
break;
@@ -412,18 +413,18 @@ int32_t WriteUIntSize(IMkvWriter* writer, uint64_t value, int32_t size) {
return SerializeInt(writer, value, size);
}
-int32_t WriteID(IMkvWriter* writer, uint64_t type) {
+int32 WriteID(IMkvWriter* writer, uint64 type) {
if (!writer)
return -1;
writer->ElementStartNotify(type, writer->Position());
- const int32_t size = GetUIntSize(type);
+ const int32 size = GetUIntSize(type);
return SerializeInt(writer, type, size);
}
-bool WriteEbmlMasterElement(IMkvWriter* writer, uint64_t type, uint64_t size) {
+bool WriteEbmlMasterElement(IMkvWriter* writer, uint64 type, uint64 size) {
if (!writer)
return false;
@@ -436,19 +437,19 @@ bool WriteEbmlMasterElement(IMkvWriter* writer, uint64_t type, uint64_t size) {
return true;
}
-bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, uint64_t value) {
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, uint64 value) {
return WriteEbmlElement(writer, type, value, 0);
}
-bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, uint64_t value,
- uint64_t fixed_size) {
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, uint64 value,
+ uint64 fixed_size) {
if (!writer)
return false;
if (WriteID(writer, type))
return false;
- uint64_t size = static_cast<uint64_t>(GetUIntSize(value));
+ uint64 size = GetUIntSize(value);
if (fixed_size > 0) {
if (size > fixed_size)
return false;
@@ -457,30 +458,30 @@ bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, uint64_t value,
if (WriteUInt(writer, size))
return false;
- if (SerializeInt(writer, value, static_cast<int32_t>(size)))
+ if (SerializeInt(writer, value, static_cast<int32>(size)))
return false;
return true;
}
-bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, int64_t value) {
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, int64 value) {
if (!writer)
return false;
if (WriteID(writer, type))
return 0;
- const uint64_t size = GetIntSize(value);
+ const uint64 size = GetIntSize(value);
if (WriteUInt(writer, size))
return false;
- if (SerializeInt(writer, value, static_cast<int32_t>(size)))
+ if (SerializeInt(writer, value, static_cast<int32>(size)))
return false;
return true;
}
-bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, float value) {
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, float value) {
if (!writer)
return false;
@@ -496,25 +497,25 @@ bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, float value) {
return true;
}
-bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, const char* value) {
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const char* value) {
if (!writer || !value)
return false;
if (WriteID(writer, type))
return false;
- const uint64_t length = strlen(value);
+ const uint64 length = strlen(value);
if (WriteUInt(writer, length))
return false;
- if (writer->Write(value, static_cast<const uint32_t>(length)))
+ if (writer->Write(value, static_cast<const uint32>(length)))
return false;
return true;
}
-bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, const uint8_t* value,
- uint64_t size) {
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const uint8* value,
+ uint64 size) {
if (!writer || !value || size < 1)
return false;
@@ -524,13 +525,13 @@ bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, const uint8_t* value,
if (WriteUInt(writer, size))
return false;
- if (writer->Write(value, static_cast<uint32_t>(size)))
+ if (writer->Write(value, static_cast<uint32>(size)))
return false;
return true;
}
-bool WriteEbmlDateElement(IMkvWriter* writer, uint64_t type, int64_t value) {
+bool WriteEbmlDateElement(IMkvWriter* writer, uint64 type, int64 value) {
if (!writer)
return false;
@@ -546,8 +547,8 @@ bool WriteEbmlDateElement(IMkvWriter* writer, uint64_t type, int64_t value) {
return true;
}
-uint64_t WriteFrame(IMkvWriter* writer, const Frame* const frame,
- Cluster* cluster) {
+uint64 WriteFrame(IMkvWriter* writer, const Frame* const frame,
+ Cluster* cluster) {
if (!writer || !frame || !frame->IsValid() || !cluster ||
!cluster->timecode_scale())
return 0;
@@ -556,7 +557,7 @@ uint64_t WriteFrame(IMkvWriter* writer, const Frame* const frame,
// timecode for the cluster itself (remember that block timecode
// is a signed, 16-bit integer). However, as a simplification we
// only permit non-negative cluster-relative timecodes for blocks.
- const int64_t relative_timecode = cluster->GetRelativeTimecode(
+ const int64 relative_timecode = cluster->GetRelativeTimecode(
frame->timestamp() / cluster->timecode_scale());
if (relative_timecode < 0 || relative_timecode > kMaxBlockTimecode)
return 0;
@@ -567,20 +568,19 @@ uint64_t WriteFrame(IMkvWriter* writer, const Frame* const frame,
cluster->timecode_scale());
}
-uint64_t WriteVoidElement(IMkvWriter* writer, uint64_t size) {
+uint64 WriteVoidElement(IMkvWriter* writer, uint64 size) {
if (!writer)
return false;
// Subtract one for the void ID and the coded size.
- uint64_t void_entry_size = size - 1 - GetCodedUIntSize(size - 1);
- uint64_t void_size =
- EbmlMasterElementSize(libwebm::kMkvVoid, void_entry_size) +
- void_entry_size;
+ uint64 void_entry_size = size - 1 - GetCodedUIntSize(size - 1);
+ uint64 void_size = EbmlMasterElementSize(libwebm::kMkvVoid, void_entry_size) +
+ void_entry_size;
if (void_size != size)
return 0;
- const int64_t payload_position = writer->Position();
+ const int64 payload_position = writer->Position();
if (payload_position < 0)
return 0;
@@ -590,30 +590,29 @@ uint64_t WriteVoidElement(IMkvWriter* writer, uint64_t size) {
if (WriteUInt(writer, void_entry_size))
return 0;
- const uint8_t value = 0;
- for (int32_t i = 0; i < static_cast<int32_t>(void_entry_size); ++i) {
+ const uint8 value = 0;
+ for (int32 i = 0; i < static_cast<int32>(void_entry_size); ++i) {
if (writer->Write(&value, 1))
return 0;
}
- const int64_t stop_position = writer->Position();
+ const int64 stop_position = writer->Position();
if (stop_position < 0 ||
- stop_position - payload_position != static_cast<int64_t>(void_size))
+ stop_position - payload_position != static_cast<int64>(void_size))
return 0;
return void_size;
}
-void GetVersion(int32_t* major, int32_t* minor, int32_t* build,
- int32_t* revision) {
+void GetVersion(int32* major, int32* minor, int32* build, int32* revision) {
*major = 0;
*minor = 2;
*build = 1;
*revision = 0;
}
-uint64_t MakeUID(unsigned int* seed) {
- uint64_t uid = 0;
+uint64 MakeUID(unsigned int* seed) {
+ uint64 uid = 0;
#ifdef __MINGW32__
srand(*seed);
@@ -625,21 +624,22 @@ uint64_t MakeUID(unsigned int* seed) {
// TODO(fgalligan): Move random number generation to platform specific code.
#ifdef _MSC_VER
(void)seed;
- const int32_t nn = rand();
+ const int32 nn = rand();
#elif __ANDROID__
- int32_t temp_num = 1;
+ (void)seed;
+ int32 temp_num = 1;
int fd = open("/dev/urandom", O_RDONLY);
if (fd != -1) {
read(fd, &temp_num, sizeof(temp_num));
close(fd);
}
- const int32_t nn = temp_num;
+ const int32 nn = temp_num;
#elif defined __MINGW32__
- const int32_t nn = rand();
+ const int32 nn = rand();
#else
- const int32_t nn = rand_r(seed);
+ const int32 nn = rand_r(seed);
#endif
- const int32_t n = 0xFF & (nn >> 4); // throw away low-order bits
+ const int32 n = 0xFF & (nn >> 4); // throw away low-order bits
uid |= n;
}
@@ -647,4 +647,97 @@ uint64_t MakeUID(unsigned int* seed) {
return uid;
}
+bool IsMatrixCoefficientsValueValid(uint64_t value) {
+ switch (value) {
+ case mkvmuxer::Colour::kGbr:
+ case mkvmuxer::Colour::kBt709:
+ case mkvmuxer::Colour::kUnspecifiedMc:
+ case mkvmuxer::Colour::kReserved:
+ case mkvmuxer::Colour::kFcc:
+ case mkvmuxer::Colour::kBt470bg:
+ case mkvmuxer::Colour::kSmpte170MMc:
+ case mkvmuxer::Colour::kSmpte240MMc:
+ case mkvmuxer::Colour::kYcocg:
+ case mkvmuxer::Colour::kBt2020NonConstantLuminance:
+ case mkvmuxer::Colour::kBt2020ConstantLuminance:
+ return true;
+ }
+ return false;
+}
+
+bool IsChromaSitingHorzValueValid(uint64_t value) {
+ switch (value) {
+ case mkvmuxer::Colour::kUnspecifiedCsh:
+ case mkvmuxer::Colour::kLeftCollocated:
+ case mkvmuxer::Colour::kHalfCsh:
+ return true;
+ }
+ return false;
+}
+
+bool IsChromaSitingVertValueValid(uint64_t value) {
+ switch (value) {
+ case mkvmuxer::Colour::kUnspecifiedCsv:
+ case mkvmuxer::Colour::kTopCollocated:
+ case mkvmuxer::Colour::kHalfCsv:
+ return true;
+ }
+ return false;
+}
+
+bool IsColourRangeValueValid(uint64_t value) {
+ switch (value) {
+ case mkvmuxer::Colour::kUnspecifiedCr:
+ case mkvmuxer::Colour::kBroadcastRange:
+ case mkvmuxer::Colour::kFullRange:
+ case mkvmuxer::Colour::kMcTcDefined:
+ return true;
+ }
+ return false;
+}
+
+bool IsTransferCharacteristicsValueValid(uint64_t value) {
+ switch (value) {
+ case mkvmuxer::Colour::kIturBt709Tc:
+ case mkvmuxer::Colour::kUnspecifiedTc:
+ case mkvmuxer::Colour::kReservedTc:
+ case mkvmuxer::Colour::kGamma22Curve:
+ case mkvmuxer::Colour::kGamma28Curve:
+ case mkvmuxer::Colour::kSmpte170MTc:
+ case mkvmuxer::Colour::kSmpte240MTc:
+ case mkvmuxer::Colour::kLinear:
+ case mkvmuxer::Colour::kLog:
+ case mkvmuxer::Colour::kLogSqrt:
+ case mkvmuxer::Colour::kIec6196624:
+ case mkvmuxer::Colour::kIturBt1361ExtendedColourGamut:
+ case mkvmuxer::Colour::kIec6196621:
+ case mkvmuxer::Colour::kIturBt202010bit:
+ case mkvmuxer::Colour::kIturBt202012bit:
+ case mkvmuxer::Colour::kSmpteSt2084:
+ case mkvmuxer::Colour::kSmpteSt4281Tc:
+ case mkvmuxer::Colour::kAribStdB67Hlg:
+ return true;
+ }
+ return false;
+}
+
+bool IsPrimariesValueValid(uint64_t value) {
+ switch (value) {
+ case mkvmuxer::Colour::kReservedP0:
+ case mkvmuxer::Colour::kIturBt709P:
+ case mkvmuxer::Colour::kUnspecifiedP:
+ case mkvmuxer::Colour::kReservedP3:
+ case mkvmuxer::Colour::kIturBt470M:
+ case mkvmuxer::Colour::kIturBt470Bg:
+ case mkvmuxer::Colour::kSmpte170MP:
+ case mkvmuxer::Colour::kSmpte240MP:
+ case mkvmuxer::Colour::kFilm:
+ case mkvmuxer::Colour::kIturBt2020:
+ case mkvmuxer::Colour::kSmpteSt4281P:
+ case mkvmuxer::Colour::kJedecP22Phosphors:
+ return true;
+ }
+ return false;
+}
+
} // namespace mkvmuxer
diff --git a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvmuxer/mkvmuxerutil.h b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvmuxer/mkvmuxerutil.h
index 0e21a2dcbe5..132388da599 100644
--- a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvmuxer/mkvmuxerutil.h
+++ b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvmuxer/mkvmuxerutil.h
@@ -8,87 +8,104 @@
#ifndef MKVMUXER_MKVMUXERUTIL_H_
#define MKVMUXER_MKVMUXERUTIL_H_
-#include <stdint.h>
+#include "mkvmuxertypes.h"
+
+#include "stdint.h"
namespace mkvmuxer {
class Cluster;
class Frame;
class IMkvWriter;
-const uint64_t kEbmlUnknownValue = 0x01FFFFFFFFFFFFFFULL;
-const int64_t kMaxBlockTimecode = 0x07FFFLL;
+// TODO(tomfinegan): mkvmuxer:: integer types continue to be used here because
+// changing them causes pain for downstream projects. It would be nice if a
+// solution that allows removal of the mkvmuxer:: integer types while avoiding
+// pain for downstream users of libwebm. Considering that mkvmuxerutil.{cc,h}
+// are really, for the great majority of cases, EBML size calculation and writer
+// functions, perhaps a more EBML focused utility would be the way to go as a
+// first step.
+
+const uint64 kEbmlUnknownValue = 0x01FFFFFFFFFFFFFFULL;
+const int64 kMaxBlockTimecode = 0x07FFFLL;
// Writes out |value| in Big Endian order. Returns 0 on success.
-int32_t SerializeInt(IMkvWriter* writer, int64_t value, int32_t size);
+int32 SerializeInt(IMkvWriter* writer, int64 value, int32 size);
// Returns the size in bytes of the element.
-int32_t GetUIntSize(uint64_t value);
-int32_t GetIntSize(int64_t value);
-int32_t GetCodedUIntSize(uint64_t value);
-uint64_t EbmlMasterElementSize(uint64_t type, uint64_t value);
-uint64_t EbmlElementSize(uint64_t type, int64_t value);
-uint64_t EbmlElementSize(uint64_t type, uint64_t value);
-uint64_t EbmlElementSize(uint64_t type, float value);
-uint64_t EbmlElementSize(uint64_t type, const char* value);
-uint64_t EbmlElementSize(uint64_t type, const uint8_t* value, uint64_t size);
-uint64_t EbmlDateElementSize(uint64_t type);
+int32 GetUIntSize(uint64 value);
+int32 GetIntSize(int64 value);
+int32 GetCodedUIntSize(uint64 value);
+uint64 EbmlMasterElementSize(uint64 type, uint64 value);
+uint64 EbmlElementSize(uint64 type, int64 value);
+uint64 EbmlElementSize(uint64 type, uint64 value);
+uint64 EbmlElementSize(uint64 type, float value);
+uint64 EbmlElementSize(uint64 type, const char* value);
+uint64 EbmlElementSize(uint64 type, const uint8* value, uint64 size);
+uint64 EbmlDateElementSize(uint64 type);
// Returns the size in bytes of the element assuming that the element was
// written using |fixed_size| bytes. If |fixed_size| is set to zero, then it
// computes the necessary number of bytes based on |value|.
-uint64_t EbmlElementSize(uint64_t type, uint64_t value, uint64_t fixed_size);
+uint64 EbmlElementSize(uint64 type, uint64 value, uint64 fixed_size);
// Creates an EBML coded number from |value| and writes it out. The size of
// the coded number is determined by the value of |value|. |value| must not
// be in a coded form. Returns 0 on success.
-int32_t WriteUInt(IMkvWriter* writer, uint64_t value);
+int32 WriteUInt(IMkvWriter* writer, uint64 value);
// Creates an EBML coded number from |value| and writes it out. The size of
// the coded number is determined by the value of |size|. |value| must not
// be in a coded form. Returns 0 on success.
-int32_t WriteUIntSize(IMkvWriter* writer, uint64_t value, int32_t size);
+int32 WriteUIntSize(IMkvWriter* writer, uint64 value, int32 size);
// Output an Mkv master element. Returns true if the element was written.
-bool WriteEbmlMasterElement(IMkvWriter* writer, uint64_t value, uint64_t size);
+bool WriteEbmlMasterElement(IMkvWriter* writer, uint64 value, uint64 size);
// Outputs an Mkv ID, calls |IMkvWriter::ElementStartNotify|, and passes the
// ID to |SerializeInt|. Returns 0 on success.
-int32_t WriteID(IMkvWriter* writer, uint64_t type);
+int32 WriteID(IMkvWriter* writer, uint64 type);
// Output an Mkv non-master element. Returns true if the element was written.
-bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, uint64_t value);
-bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, int64_t value);
-bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, float value);
-bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, const char* value);
-bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, const uint8_t* value,
- uint64_t size);
-bool WriteEbmlDateElement(IMkvWriter* writer, uint64_t type, int64_t value);
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, uint64 value);
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, int64 value);
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, float value);
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const char* value);
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const uint8* value,
+ uint64 size);
+bool WriteEbmlDateElement(IMkvWriter* writer, uint64 type, int64 value);
// Output an Mkv non-master element using fixed size. The element will be
// written out using exactly |fixed_size| bytes. If |fixed_size| is set to zero
// then it computes the necessary number of bytes based on |value|. Returns true
// if the element was written.
-bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, uint64_t value,
- uint64_t fixed_size);
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, uint64 value,
+ uint64 fixed_size);
// Output a Mkv Frame. It decides the correct element to write (Block vs
// SimpleBlock) based on the parameters of the Frame.
-uint64_t WriteFrame(IMkvWriter* writer, const Frame* const frame,
- Cluster* cluster);
+uint64 WriteFrame(IMkvWriter* writer, const Frame* const frame,
+ Cluster* cluster);
// Output a void element. |size| must be the entire size in bytes that will be
// void. The function will calculate the size of the void header and subtract
// it from |size|.
-uint64_t WriteVoidElement(IMkvWriter* writer, uint64_t size);
+uint64 WriteVoidElement(IMkvWriter* writer, uint64 size);
// Returns the version number of the muxer in |major|, |minor|, |build|,
// and |revision|.
-void GetVersion(int32_t* major, int32_t* minor, int32_t* build,
- int32_t* revision);
+void GetVersion(int32* major, int32* minor, int32* build, int32* revision);
// Returns a random number to be used for UID, using |seed| to seed
// the random-number generator (see POSIX rand_r() for semantics).
-uint64_t MakeUID(unsigned int* seed);
+uint64 MakeUID(unsigned int* seed);
+
+// Colour field validation helpers. All return true when |value| is valid.
+bool IsMatrixCoefficientsValueValid(uint64_t value);
+bool IsChromaSitingHorzValueValid(uint64_t value);
+bool IsChromaSitingVertValueValid(uint64_t value);
+bool IsColourRangeValueValid(uint64_t value);
+bool IsTransferCharacteristicsValueValid(uint64_t value);
+bool IsPrimariesValueValid(uint64_t value);
} // namespace mkvmuxer
diff --git a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvmuxer/mkvwriter.cc b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvmuxer/mkvwriter.cc
index ca48e149c6d..ec34e4df818 100644
--- a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvmuxer/mkvwriter.cc
+++ b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvmuxer/mkvwriter.cc
@@ -77,7 +77,7 @@ int32 MkvWriter::Position(int64 position) {
#ifdef _MSC_VER
return _fseeki64(file_, position, SEEK_SET);
#else
- return fseek(file_, position, SEEK_SET);
+ return fseeko(file_, static_cast<off_t>(position), SEEK_SET);
#endif
}
diff --git a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvparser/mkvparser.cc b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvparser/mkvparser.cc
index 21801154d9f..e62d6f6075c 100644
--- a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvparser/mkvparser.cc
+++ b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvparser/mkvparser.cc
@@ -25,6 +25,7 @@
namespace mkvparser {
const float MasteringMetadata::kValueNotPresent = FLT_MAX;
const long long Colour::kValueNotPresent = LLONG_MAX;
+const float Projection::kValueNotPresent = FLT_MAX;
#ifdef MSC_COMPAT
inline bool isnan(double val) { return !!_isnan(val); }
@@ -1475,6 +1476,8 @@ long Segment::Load() {
}
}
+SeekHead::Entry::Entry() : id(0), pos(0), element_start(0), element_size(0) {}
+
SeekHead::SeekHead(Segment* pSegment, long long start, long long size_,
long long element_start, long long element_size)
: m_pSegment(pSegment),
@@ -1766,18 +1769,7 @@ bool SeekHead::ParseEntry(IMkvReader* pReader, long long start, long long size_,
if ((pos + seekIdSize) > stop)
return false;
- // Note that the SeekId payload really is serialized
- // as a "Matroska integer", not as a plain binary value.
- // In fact, Matroska requires that ID values in the
- // stream exactly match the binary representation as listed
- // in the Matroska specification.
- //
- // This parser is more liberal, and permits IDs to have
- // any width. (This could make the representation in the stream
- // different from what's in the spec, but it doesn't matter here,
- // since we always normalize "Matroska integer" values.)
-
- pEntry->id = ReadUInt(pReader, pos, len); // payload
+ pEntry->id = ReadID(pReader, pos, len); // payload
if (pEntry->id <= 0)
return false;
@@ -4125,7 +4117,7 @@ ContentEncoding::~ContentEncoding() {
}
const ContentEncoding::ContentCompression*
- ContentEncoding::GetCompressionByIndex(unsigned long idx) const {
+ContentEncoding::GetCompressionByIndex(unsigned long idx) const {
const ptrdiff_t count = compression_entries_end_ - compression_entries_;
assert(count >= 0);
@@ -5188,11 +5180,92 @@ bool Colour::Parse(IMkvReader* reader, long long colour_start,
return true;
}
+bool Projection::Parse(IMkvReader* reader, long long start, long long size,
+ Projection** projection) {
+ if (!reader || *projection)
+ return false;
+
+ std::auto_ptr<Projection> projection_ptr(new Projection());
+ if (!projection_ptr.get())
+ return false;
+
+ const long long end = start + size;
+ long long read_pos = start;
+
+ while (read_pos < end) {
+ long long child_id = 0;
+ long long child_size = 0;
+
+ const long long status =
+ ParseElementHeader(reader, read_pos, end, child_id, child_size);
+ if (status < 0)
+ return false;
+
+ if (child_id == libwebm::kMkvProjectionType) {
+ long long projection_type = kTypeNotPresent;
+ projection_type = UnserializeUInt(reader, read_pos, child_size);
+ if (projection_type < 0)
+ return false;
+
+ projection_ptr->type = static_cast<ProjectionType>(projection_type);
+ } else if (child_id == libwebm::kMkvProjectionPrivate) {
+ unsigned char* data = SafeArrayAlloc<unsigned char>(1, child_size);
+
+ if (data == NULL)
+ return false;
+
+ const int status =
+ reader->Read(read_pos, static_cast<long>(child_size), data);
+
+ if (status) {
+ delete[] data;
+ return false;
+ }
+
+ projection_ptr->private_data = data;
+ projection_ptr->private_data_length = static_cast<size_t>(child_size);
+ } else {
+ double value = 0;
+ const long long value_parse_status =
+ UnserializeFloat(reader, read_pos, child_size, value);
+ if (value_parse_status < 0) {
+ return false;
+ }
+
+ switch (child_id) {
+ case libwebm::kMkvProjectionPoseYaw:
+ projection_ptr->pose_yaw = static_cast<float>(value);
+ break;
+ case libwebm::kMkvProjectionPosePitch:
+ projection_ptr->pose_pitch = static_cast<float>(value);
+ break;
+ case libwebm::kMkvProjectionPoseRoll:
+ projection_ptr->pose_roll = static_cast<float>(value);
+ break;
+ default:
+ return false;
+ }
+ }
+
+ read_pos += child_size;
+ if (read_pos > end)
+ return false;
+ }
+
+ *projection = projection_ptr.release();
+ return true;
+}
+
VideoTrack::VideoTrack(Segment* pSegment, long long element_start,
long long element_size)
- : Track(pSegment, element_start, element_size), m_colour(NULL) {}
+ : Track(pSegment, element_start, element_size),
+ m_colour(NULL),
+ m_projection(NULL) {}
-VideoTrack::~VideoTrack() { delete m_colour; }
+VideoTrack::~VideoTrack() {
+ delete m_colour;
+ delete m_projection;
+}
long VideoTrack::Parse(Segment* pSegment, const Info& info,
long long element_start, long long element_size,
@@ -5224,6 +5297,7 @@ long VideoTrack::Parse(Segment* pSegment, const Info& info,
const long long stop = pos + s.size;
Colour* colour = NULL;
+ Projection* projection = NULL;
while (pos < stop) {
long long id, size;
@@ -5274,6 +5348,9 @@ long VideoTrack::Parse(Segment* pSegment, const Info& info,
} else if (id == libwebm::kMkvColour) {
if (!Colour::Parse(pReader, pos, size, &colour))
return E_FILE_FORMAT_INVALID;
+ } else if (id == libwebm::kMkvProjection) {
+ if (!Projection::Parse(pReader, pos, size, &projection))
+ return E_FILE_FORMAT_INVALID;
}
pos += size; // consume payload
@@ -5305,6 +5382,7 @@ long VideoTrack::Parse(Segment* pSegment, const Info& info,
pTrack->m_stereo_mode = stereo_mode;
pTrack->m_rate = rate;
pTrack->m_colour = colour;
+ pTrack->m_projection = projection;
pResult = pTrack;
return 0; // success
@@ -5405,6 +5483,8 @@ long VideoTrack::Seek(long long time_ns, const BlockEntry*& pResult) const {
Colour* VideoTrack::GetColour() const { return m_colour; }
+Projection* VideoTrack::GetProjection() const { return m_projection; }
+
long long VideoTrack::GetWidth() const { return m_width; }
long long VideoTrack::GetHeight() const { return m_height; }
@@ -6698,8 +6778,10 @@ Cluster::Cluster(Segment* pSegment, long idx, long long element_start
{}
Cluster::~Cluster() {
- if (m_entries_count <= 0)
+ if (m_entries_count <= 0) {
+ delete[] m_entries;
return;
+ }
BlockEntry** i = m_entries;
BlockEntry** const j = m_entries + m_entries_count;
diff --git a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvparser/mkvparser.h b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvparser/mkvparser.h
index 42e6e88ab46..26c2b7e5ebf 100644
--- a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvparser/mkvparser.h
+++ b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvparser/mkvparser.h
@@ -473,6 +473,34 @@ struct Colour {
MasteringMetadata* mastering_metadata;
};
+struct Projection {
+ enum ProjectionType {
+ kTypeNotPresent = -1,
+ kRectangular = 0,
+ kEquirectangular = 1,
+ kCubeMap = 2,
+ kMesh = 3,
+ };
+ static const float kValueNotPresent;
+ Projection()
+ : type(kTypeNotPresent),
+ private_data(NULL),
+ private_data_length(0),
+ pose_yaw(kValueNotPresent),
+ pose_pitch(kValueNotPresent),
+ pose_roll(kValueNotPresent) {}
+ ~Projection() { delete[] private_data; }
+ static bool Parse(IMkvReader* reader, long long element_start,
+ long long element_size, Projection** projection);
+
+ ProjectionType type;
+ unsigned char* private_data;
+ size_t private_data_length;
+ float pose_yaw;
+ float pose_pitch;
+ float pose_roll;
+};
+
class VideoTrack : public Track {
VideoTrack(const VideoTrack&);
VideoTrack& operator=(const VideoTrack&);
@@ -497,6 +525,8 @@ class VideoTrack : public Track {
Colour* GetColour() const;
+ Projection* GetProjection() const;
+
private:
long long m_width;
long long m_height;
@@ -508,6 +538,7 @@ class VideoTrack : public Track {
double m_rate;
Colour* m_colour;
+ Projection* m_projection;
};
class AudioTrack : public Track {
@@ -813,6 +844,8 @@ class SeekHead {
long Parse();
struct Entry {
+ Entry();
+
// the SeekHead entry payload
long long id;
long long pos;
diff --git a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvparser/mkvreader.cc b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvparser/mkvreader.cc
index 9f90d8c4f86..b8fd00c2635 100644
--- a/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvparser/mkvreader.cc
+++ b/chromium/third_party/libvpx/source/libvpx/third_party/libwebm/mkvparser/mkvreader.cc
@@ -117,7 +117,7 @@ int MkvReader::Read(long long offset, long len, unsigned char* buffer) {
if (status)
return -1; // error
#else
- fseek(m_file, offset, SEEK_SET);
+ fseeko(m_file, static_cast<off_t>(offset), SEEK_SET);
#endif
const size_t size = fread(buffer, 1, len, m_file);
@@ -128,4 +128,4 @@ int MkvReader::Read(long long offset, long len, unsigned char* buffer) {
return 0; // success
}
-} // namespace mkvparser \ No newline at end of file
+} // namespace mkvparser
diff --git a/chromium/third_party/libvpx/source/libvpx/tools.mk b/chromium/third_party/libvpx/source/libvpx/tools.mk
new file mode 100644
index 00000000000..3c660b1dfd5
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/tools.mk
@@ -0,0 +1,110 @@
+##
+## Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+##
+## Use of this source code is governed by a BSD-style license
+## that can be found in the LICENSE file in the root of the source
+## tree. An additional intellectual property rights grant can be found
+## in the file PATENTS. All contributing project authors may
+## be found in the AUTHORS file in the root of the source tree.
+##
+
+# List of tools to build.
+TOOLS-yes += tiny_ssim.c
+tiny_ssim.SRCS += vpx/vpx_integer.h
+tiny_ssim.GUID = 3afa9b05-940b-4d68-b5aa-55157d8ed7b4
+tiny_ssim.DESCRIPTION = Generate SSIM/PSNR from raw .yuv files
+
+#
+# End of specified files. The rest of the build rules should happen
+# automagically from here.
+#
+
+
+# Expand list of selected tools to build (as specified above)
+TOOLS = $(addprefix tools/,$(call enabled,TOOLS))
+ALL_SRCS = $(foreach ex,$(TOOLS),$($(notdir $(ex:.c=)).SRCS))
+
+
+# Expand all tools sources into a variable containing all sources
+# for that tools (not just them main one specified in TOOLS)
+# and add this file to the list (for MSVS workspace generation)
+$(foreach ex,$(TOOLS),$(eval $(notdir $(ex:.c=)).SRCS += $(ex) tools.mk))
+
+
+# Create build/install dependencies for all tools. The common case
+# is handled here. The MSVS case is handled below.
+NOT_MSVS = $(if $(CONFIG_MSVS),,yes)
+DIST-BINS-$(NOT_MSVS) += $(addprefix bin/,$(TOOLS:.c=$(EXE_SFX)))
+DIST-SRCS-yes += $(ALL_SRCS)
+OBJS-$(NOT_MSVS) += $(call objs,$(ALL_SRCS))
+BINS-$(NOT_MSVS) += $(addprefix $(BUILD_PFX),$(TOOLS:.c=$(EXE_SFX)))
+
+
+# Instantiate linker template for all tools.
+$(foreach bin,$(BINS-yes),\
+ $(eval $(bin):)\
+ $(eval $(call linker_template,$(bin),\
+ $(call objs,$($(notdir $(bin:$(EXE_SFX)=)).SRCS)) \
+ -lm\
+ )))
+
+
+# The following pairs define a mapping of locations in the distribution
+# tree to locations in the source/build trees.
+INSTALL_MAPS += src/%.c %.c
+INSTALL_MAPS += src/% $(SRC_PATH_BARE)/%
+INSTALL_MAPS += bin/% %
+INSTALL_MAPS += % %
+
+
+# Build Visual Studio Projects. We use a template here to instantiate
+# explicit rules rather than using an implicit rule because we want to
+# leverage make's VPATH searching rather than specifying the paths on
+# each file in TOOLS. This has the unfortunate side effect that
+# touching the source files trigger a rebuild of the project files
+# even though there is no real dependency there (the dependency is on
+# the makefiles). We may want to revisit this.
+define vcproj_template
+$(1): $($(1:.$(VCPROJ_SFX)=).SRCS) vpx.$(VCPROJ_SFX)
+ $(if $(quiet),@echo " [vcproj] $$@")
+ $(qexec)$$(GEN_VCPROJ)\
+ --exe\
+ --target=$$(TOOLCHAIN)\
+ --name=$$(@:.$(VCPROJ_SFX)=)\
+ --ver=$$(CONFIG_VS_VERSION)\
+ --proj-guid=$$($$(@:.$(VCPROJ_SFX)=).GUID)\
+ --src-path-bare="$(SRC_PATH_BARE)" \
+ $$(if $$(CONFIG_STATIC_MSVCRT),--static-crt) \
+ --out=$$@ $$(INTERNAL_CFLAGS) $$(CFLAGS) \
+ $$(INTERNAL_LDFLAGS) $$(LDFLAGS) $$^
+endef
+TOOLS_BASENAME := $(notdir $(TOOLS))
+PROJECTS-$(CONFIG_MSVS) += $(TOOLS_BASENAME:.c=.$(VCPROJ_SFX))
+INSTALL-BINS-$(CONFIG_MSVS) += $(foreach p,$(VS_PLATFORMS),\
+ $(addprefix bin/$(p)/,$(TOOLS_BASENAME:.c=.exe)))
+$(foreach proj,$(call enabled,PROJECTS),\
+ $(eval $(call vcproj_template,$(proj))))
+
+#
+# Documentation Rules
+#
+%.dox: %.c
+ @echo " [DOXY] $@"
+ @mkdir -p $(dir $@)
+ @echo "/*!\page tools_$(@F:.dox=) $(@F:.dox=)" > $@
+ @echo " \includelineno $(<F)" >> $@
+ @echo "*/" >> $@
+
+tools.dox: tools.mk
+ @echo " [DOXY] $@"
+ @echo "/*!\page tools Tools" > $@
+ @echo " This SDK includes a number of tools/utilities."\
+ "The following tools are included: ">>$@
+ @$(foreach ex,$(sort $(notdir $(TOOLS:.c=))),\
+ echo " - \subpage tools_$(ex) $($(ex).DESCRIPTION)" >> $@;)
+ @echo "*/" >> $@
+
+CLEAN-OBJS += tools.doxy tools.dox $(TOOLS:.c=.dox)
+DOCS-yes += tools.doxy tools.dox
+tools.doxy: tools.dox $(TOOLS:.c=.dox)
+ @echo "INPUT += $^" > $@
diff --git a/chromium/third_party/libvpx/source/libvpx/tools/tiny_ssim.c b/chromium/third_party/libvpx/source/libvpx/tools/tiny_ssim.c
new file mode 100644
index 00000000000..28052e0a84d
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/tools/tiny_ssim.c
@@ -0,0 +1,200 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <errno.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "vpx/vpx_integer.h"
+
+void vp8_ssim_parms_8x8_c(unsigned char *s, int sp, unsigned char *r, int rp,
+ uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s,
+ uint32_t *sum_sq_r, uint32_t *sum_sxr) {
+ int i, j;
+ for (i = 0; i < 8; i++, s += sp, r += rp) {
+ for (j = 0; j < 8; j++) {
+ *sum_s += s[j];
+ *sum_r += r[j];
+ *sum_sq_s += s[j] * s[j];
+ *sum_sq_r += r[j] * r[j];
+ *sum_sxr += s[j] * r[j];
+ }
+ }
+}
+
+static const int64_t cc1 = 26634; // (64^2*(.01*255)^2
+static const int64_t cc2 = 239708; // (64^2*(.03*255)^2
+
+static double similarity(uint32_t sum_s, uint32_t sum_r, uint32_t sum_sq_s,
+ uint32_t sum_sq_r, uint32_t sum_sxr, int count) {
+ int64_t ssim_n, ssim_d;
+ int64_t c1, c2;
+
+ // scale the constants by number of pixels
+ c1 = (cc1 * count * count) >> 12;
+ c2 = (cc2 * count * count) >> 12;
+
+ ssim_n = (2 * sum_s * sum_r + c1) *
+ ((int64_t)2 * count * sum_sxr - (int64_t)2 * sum_s * sum_r + c2);
+
+ ssim_d = (sum_s * sum_s + sum_r * sum_r + c1) *
+ ((int64_t)count * sum_sq_s - (int64_t)sum_s * sum_s +
+ (int64_t)count * sum_sq_r - (int64_t)sum_r * sum_r + c2);
+
+ return ssim_n * 1.0 / ssim_d;
+}
+
+static double ssim_8x8(unsigned char *s, int sp, unsigned char *r, int rp) {
+ uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0;
+ vp8_ssim_parms_8x8_c(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r,
+ &sum_sxr);
+ return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64);
+}
+
+// We are using a 8x8 moving window with starting location of each 8x8 window
+// on the 4x4 pixel grid. Such arrangement allows the windows to overlap
+// block boundaries to penalize blocking artifacts.
+double vp8_ssim2(unsigned char *img1, unsigned char *img2, int stride_img1,
+ int stride_img2, int width, int height) {
+ int i, j;
+ int samples = 0;
+ double ssim_total = 0;
+
+ // sample point start with each 4x4 location
+ for (i = 0; i <= height - 8;
+ i += 4, img1 += stride_img1 * 4, img2 += stride_img2 * 4) {
+ for (j = 0; j <= width - 8; j += 4) {
+ double v = ssim_8x8(img1 + j, stride_img1, img2 + j, stride_img2);
+ ssim_total += v;
+ samples++;
+ }
+ }
+ ssim_total /= samples;
+ return ssim_total;
+}
+
+static uint64_t calc_plane_error(uint8_t *orig, int orig_stride, uint8_t *recon,
+ int recon_stride, unsigned int cols,
+ unsigned int rows) {
+ unsigned int row, col;
+ uint64_t total_sse = 0;
+ int diff;
+
+ for (row = 0; row < rows; row++) {
+ for (col = 0; col < cols; col++) {
+ diff = orig[col] - recon[col];
+ total_sse += diff * diff;
+ }
+
+ orig += orig_stride;
+ recon += recon_stride;
+ }
+
+ return total_sse;
+}
+
+#define MAX_PSNR 100
+
+double vp9_mse2psnr(double samples, double peak, double mse) {
+ double psnr;
+
+ if (mse > 0.0)
+ psnr = 10.0 * log10(peak * peak * samples / mse);
+ else
+ psnr = MAX_PSNR; // Limit to prevent / 0
+
+ if (psnr > MAX_PSNR) psnr = MAX_PSNR;
+
+ return psnr;
+}
+
+int main(int argc, char *argv[]) {
+ FILE *f[2];
+ uint8_t *buf[2];
+ int w, h, n_frames, tl_skip = 0, tl_skips_remaining = 0;
+ double ssim = 0, psnravg = 0, psnrglb = 0;
+ double ssimy, ssimu, ssimv;
+ uint64_t psnry, psnru, psnrv;
+
+ if (argc < 4) {
+ fprintf(stderr, "Usage: %s file1.yuv file2.yuv WxH [tl_skip={0,1,3}]\n",
+ argv[0]);
+ return 1;
+ }
+ f[0] = strcmp(argv[1], "-") ? fopen(argv[1], "rb") : stdin;
+ f[1] = strcmp(argv[2], "-") ? fopen(argv[2], "rb") : stdin;
+ sscanf(argv[3], "%dx%d", &w, &h);
+ // Number of frames to skip from file1.yuv for every frame used. Normal values
+ // 0, 1 and 3 correspond to TL2, TL1 and TL0 respectively for a 3TL encoding
+ // in mode 10. 7 would be reasonable for comparing TL0 of a 4-layer encoding.
+ if (argc > 4) {
+ sscanf(argv[4], "%d", &tl_skip);
+ }
+ if (!f[0] || !f[1]) {
+ fprintf(stderr, "Could not open input files: %s\n", strerror(errno));
+ return 1;
+ }
+ if (w <= 0 || h <= 0 || w & 1 || h & 1) {
+ fprintf(stderr, "Invalid size %dx%d\n", w, h);
+ return 1;
+ }
+ buf[0] = malloc(w * h * 3 / 2);
+ buf[1] = malloc(w * h * 3 / 2);
+ n_frames = 0;
+ while (1) {
+ size_t r1, r2;
+ r1 = fread(buf[0], w * h * 3 / 2, 1, f[0]);
+ if (r1) {
+ // Reading parts of file1.yuv that were not used in temporal layer.
+ if (tl_skips_remaining > 0) {
+ --tl_skips_remaining;
+ continue;
+ }
+ // Use frame, but skip |tl_skip| after it.
+ tl_skips_remaining = tl_skip;
+ }
+ r2 = fread(buf[1], w * h * 3 / 2, 1, f[1]);
+ if (r1 && r2 && r1 != r2) {
+ fprintf(stderr, "Failed to read data: %s [%d/%d]\n", strerror(errno),
+ (int)r1, (int)r2);
+ return 1;
+ } else if (r1 == 0 || r2 == 0) {
+ break;
+ }
+#define psnr_and_ssim(ssim, psnr, buf0, buf1, w, h) \
+ ssim = vp8_ssim2(buf0, buf1, w, w, w, h); \
+ psnr = calc_plane_error(buf0, w, buf1, w, w, h);
+ psnr_and_ssim(ssimy, psnry, buf[0], buf[1], w, h);
+ psnr_and_ssim(ssimu, psnru, buf[0] + w * h, buf[1] + w * h, w / 2, h / 2);
+ psnr_and_ssim(ssimv, psnrv, buf[0] + w * h * 5 / 4, buf[1] + w * h * 5 / 4,
+ w / 2, h / 2);
+ ssim += 0.8 * ssimy + 0.1 * (ssimu + ssimv);
+ psnravg +=
+ vp9_mse2psnr(w * h * 6 / 4, 255.0, (double)psnry + psnru + psnrv);
+ psnrglb += psnry + psnru + psnrv;
+ n_frames++;
+ }
+ free(buf[0]);
+ free(buf[1]);
+ ssim /= n_frames;
+ psnravg /= n_frames;
+ psnrglb = vp9_mse2psnr((double)n_frames * w * h * 6 / 4, 255.0, psnrglb);
+
+ printf("AvgPSNR: %lf\n", psnravg);
+ printf("GlbPSNR: %lf\n", psnrglb);
+ printf("SSIM: %lf\n", 100 * pow(ssim, 8.0));
+ printf("Nframes: %d\n", n_frames);
+
+ if (strcmp(argv[1], "-")) fclose(f[0]);
+ if (strcmp(argv[2], "-")) fclose(f[1]);
+
+ return 0;
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/loopfilter_filters.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/loopfilter_filters.c
index 1f60721e1cd..2a7cde8788f 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/loopfilter_filters.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/loopfilter_filters.c
@@ -63,8 +63,8 @@ static void vp8_filter(signed char mask, uc hev, uc *op1, uc *op0, uc *oq0,
filter_value &= mask;
/* save bottom 3 bits so that we round one side +4 and the other +3
- * if it equals 4 we'll set to adjust by -1 to account for the fact
- * we'd round 3 the other way
+ * if it equals 4 we'll set it to adjust by -1 to account for the fact
+ * we'd round it by 3 the other way
*/
Filter1 = vp8_signed_char_clamp(filter_value + 4);
Filter2 = vp8_signed_char_clamp(filter_value + 3);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/mips/msa/idct_msa.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/mips/msa/idct_msa.c
index e1759c875e4..3d516d0f81a 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/mips/msa/idct_msa.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/mips/msa/idct_msa.c
@@ -90,8 +90,7 @@ static void idct4x4_addblk_msa(int16_t *input, uint8_t *pred,
v4i32 in0, in1, in2, in3, hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
v4i32 res0, res1, res2, res3;
v16i8 zero = { 0 };
- v16i8 pred0, pred1, pred2, pred3, dest0, dest1, dest2, dest3;
- v16i8 mask = { 0, 4, 8, 12, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 };
+ v16i8 pred0, pred1, pred2, pred3;
LD_SH2(input, 8, input0, input1);
UNPCK_SH_SW(input0, in0, in1);
@@ -111,20 +110,17 @@ static void idct4x4_addblk_msa(int16_t *input, uint8_t *pred,
res1 = CLIP_SW_0_255(res1);
res2 = CLIP_SW_0_255(res2);
res3 = CLIP_SW_0_255(res3);
- LD_SB4(dest, dest_stride, dest0, dest1, dest2, dest3);
- VSHF_B2_SB(res0, dest0, res1, dest1, mask, mask, dest0, dest1);
- VSHF_B2_SB(res2, dest2, res3, dest3, mask, mask, dest2, dest3);
- ST_SB4(dest0, dest1, dest2, dest3, dest, dest_stride);
+ PCKEV_B2_SW(res0, res1, res2, res3, vt0, vt1);
+ res0 = (v4i32)__msa_pckev_b((v16i8)vt0, (v16i8)vt1);
+ ST4x4_UB(res0, res0, 3, 2, 1, 0, dest, dest_stride);
}
static void idct4x4_addconst_msa(int16_t in_dc, uint8_t *pred,
int32_t pred_stride, uint8_t *dest,
int32_t dest_stride) {
- v8i16 vec;
- v8i16 res0, res1, res2, res3;
+ v8i16 vec, res0, res1, res2, res3, dst0, dst1;
v16i8 zero = { 0 };
- v16i8 pred0, pred1, pred2, pred3, dest0, dest1, dest2, dest3;
- v16i8 mask = { 0, 2, 4, 6, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 };
+ v16i8 pred0, pred1, pred2, pred3;
vec = __msa_fill_h(in_dc);
vec = __msa_srari_h(vec, 3);
@@ -133,55 +129,59 @@ static void idct4x4_addconst_msa(int16_t in_dc, uint8_t *pred,
res2, res3);
ADD4(res0, vec, res1, vec, res2, vec, res3, vec, res0, res1, res2, res3);
CLIP_SH4_0_255(res0, res1, res2, res3);
- LD_SB4(dest, dest_stride, dest0, dest1, dest2, dest3);
- VSHF_B2_SB(res0, dest0, res1, dest1, mask, mask, dest0, dest1);
- VSHF_B2_SB(res2, dest2, res3, dest3, mask, mask, dest2, dest3);
- ST_SB4(dest0, dest1, dest2, dest3, dest, dest_stride);
+ PCKEV_B2_SH(res1, res0, res3, res2, dst0, dst1);
+ dst0 = (v8i16)__msa_pckev_w((v4i32)dst1, (v4i32)dst0);
+ ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dest, dest_stride);
}
void vp8_short_inv_walsh4x4_msa(int16_t *input, int16_t *mb_dq_coeff) {
- v8i16 input0, input1;
- v4i32 in0, in1, in2, in3, a1, b1, c1, d1;
- v4i32 hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
+ v8i16 input0, input1, tmp0, tmp1, tmp2, tmp3, out0, out1;
+ const v8i16 mask0 = { 0, 1, 2, 3, 8, 9, 10, 11 };
+ const v8i16 mask1 = { 4, 5, 6, 7, 12, 13, 14, 15 };
+ const v8i16 mask2 = { 0, 4, 8, 12, 1, 5, 9, 13 };
+ const v8i16 mask3 = { 3, 7, 11, 15, 2, 6, 10, 14 };
LD_SH2(input, 8, input0, input1);
- UNPCK_SH_SW(input0, in0, in1);
- UNPCK_SH_SW(input1, in2, in3);
- BUTTERFLY_4(in0, in1, in2, in3, a1, b1, c1, d1);
- BUTTERFLY_4(a1, d1, c1, b1, hz0, hz1, hz3, hz2);
- TRANSPOSE4x4_SW_SW(hz0, hz1, hz2, hz3, hz0, hz1, hz2, hz3);
- BUTTERFLY_4(hz0, hz1, hz2, hz3, a1, b1, c1, d1);
- BUTTERFLY_4(a1, d1, c1, b1, vt0, vt1, vt3, vt2);
- ADD4(vt0, 3, vt1, 3, vt2, 3, vt3, 3, vt0, vt1, vt2, vt3);
- SRA_4V(vt0, vt1, vt2, vt3, 3);
- mb_dq_coeff[0] = __msa_copy_s_h((v8i16)vt0, 0);
- mb_dq_coeff[16] = __msa_copy_s_h((v8i16)vt1, 0);
- mb_dq_coeff[32] = __msa_copy_s_h((v8i16)vt2, 0);
- mb_dq_coeff[48] = __msa_copy_s_h((v8i16)vt3, 0);
- mb_dq_coeff[64] = __msa_copy_s_h((v8i16)vt0, 2);
- mb_dq_coeff[80] = __msa_copy_s_h((v8i16)vt1, 2);
- mb_dq_coeff[96] = __msa_copy_s_h((v8i16)vt2, 2);
- mb_dq_coeff[112] = __msa_copy_s_h((v8i16)vt3, 2);
- mb_dq_coeff[128] = __msa_copy_s_h((v8i16)vt0, 4);
- mb_dq_coeff[144] = __msa_copy_s_h((v8i16)vt1, 4);
- mb_dq_coeff[160] = __msa_copy_s_h((v8i16)vt2, 4);
- mb_dq_coeff[176] = __msa_copy_s_h((v8i16)vt3, 4);
- mb_dq_coeff[192] = __msa_copy_s_h((v8i16)vt0, 6);
- mb_dq_coeff[208] = __msa_copy_s_h((v8i16)vt1, 6);
- mb_dq_coeff[224] = __msa_copy_s_h((v8i16)vt2, 6);
- mb_dq_coeff[240] = __msa_copy_s_h((v8i16)vt3, 6);
+ input1 = (v8i16)__msa_sldi_b((v16i8)input1, (v16i8)input1, 8);
+ tmp0 = input0 + input1;
+ tmp1 = input0 - input1;
+ VSHF_H2_SH(tmp0, tmp1, tmp0, tmp1, mask0, mask1, tmp2, tmp3);
+ out0 = tmp2 + tmp3;
+ out1 = tmp2 - tmp3;
+ VSHF_H2_SH(out0, out1, out0, out1, mask2, mask3, input0, input1);
+ tmp0 = input0 + input1;
+ tmp1 = input0 - input1;
+ VSHF_H2_SH(tmp0, tmp1, tmp0, tmp1, mask0, mask1, tmp2, tmp3);
+ tmp0 = tmp2 + tmp3;
+ tmp1 = tmp2 - tmp3;
+ ADD2(tmp0, 3, tmp1, 3, out0, out1);
+ out0 >>= 3;
+ out1 >>= 3;
+ mb_dq_coeff[0] = __msa_copy_s_h(out0, 0);
+ mb_dq_coeff[16] = __msa_copy_s_h(out0, 4);
+ mb_dq_coeff[32] = __msa_copy_s_h(out1, 0);
+ mb_dq_coeff[48] = __msa_copy_s_h(out1, 4);
+ mb_dq_coeff[64] = __msa_copy_s_h(out0, 1);
+ mb_dq_coeff[80] = __msa_copy_s_h(out0, 5);
+ mb_dq_coeff[96] = __msa_copy_s_h(out1, 1);
+ mb_dq_coeff[112] = __msa_copy_s_h(out1, 5);
+ mb_dq_coeff[128] = __msa_copy_s_h(out0, 2);
+ mb_dq_coeff[144] = __msa_copy_s_h(out0, 6);
+ mb_dq_coeff[160] = __msa_copy_s_h(out1, 2);
+ mb_dq_coeff[176] = __msa_copy_s_h(out1, 6);
+ mb_dq_coeff[192] = __msa_copy_s_h(out0, 3);
+ mb_dq_coeff[208] = __msa_copy_s_h(out0, 7);
+ mb_dq_coeff[224] = __msa_copy_s_h(out1, 3);
+ mb_dq_coeff[240] = __msa_copy_s_h(out1, 7);
}
static void dequant_idct4x4_addblk_msa(int16_t *input, int16_t *dequant_input,
uint8_t *dest, int32_t dest_stride) {
v8i16 input0, input1, dequant_in0, dequant_in1, mul0, mul1;
- v8i16 in0, in1, in2, in3;
- v8i16 hz0_h, hz1_h, hz2_h, hz3_h;
- v16i8 dest0, dest1, dest2, dest3;
- v4i32 hz0_w, hz1_w, hz2_w, hz3_w;
- v4i32 vt0, vt1, vt2, vt3, res0, res1, res2, res3;
+ v8i16 in0, in1, in2, in3, hz0_h, hz1_h, hz2_h, hz3_h;
+ v16u8 dest0, dest1, dest2, dest3;
+ v4i32 hz0_w, hz1_w, hz2_w, hz3_w, vt0, vt1, vt2, vt3, res0, res1, res2, res3;
v2i64 zero = { 0 };
- v16i8 mask = { 0, 4, 8, 12, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 };
LD_SH2(input, 8, input0, input1);
LD_SH2(dequant_input, 8, dequant_in0, dequant_in1);
@@ -196,7 +196,7 @@ static void dequant_idct4x4_addblk_msa(int16_t *input, int16_t *dequant_input,
VP8_IDCT_1D_W(hz0_w, hz1_w, hz2_w, hz3_w, vt0, vt1, vt2, vt3);
SRARI_W4_SW(vt0, vt1, vt2, vt3, 3);
TRANSPOSE4x4_SW_SW(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3);
- LD_SB4(dest, dest_stride, dest0, dest1, dest2, dest3);
+ LD_UB4(dest, dest_stride, dest0, dest1, dest2, dest3);
ILVR_B4_SW(zero, dest0, zero, dest1, zero, dest2, zero, dest3, res0, res1,
res2, res3);
ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3, res0, res1, res2,
@@ -206,19 +206,17 @@ static void dequant_idct4x4_addblk_msa(int16_t *input, int16_t *dequant_input,
res1 = CLIP_SW_0_255(res1);
res2 = CLIP_SW_0_255(res2);
res3 = CLIP_SW_0_255(res3);
- VSHF_B2_SB(res0, dest0, res1, dest1, mask, mask, dest0, dest1);
- VSHF_B2_SB(res2, dest2, res3, dest3, mask, mask, dest2, dest3);
- ST_SB4(dest0, dest1, dest2, dest3, dest, dest_stride);
+ PCKEV_B2_SW(res0, res1, res2, res3, vt0, vt1);
+ res0 = (v4i32)__msa_pckev_b((v16i8)vt0, (v16i8)vt1);
+ ST4x4_UB(res0, res0, 3, 2, 1, 0, dest, dest_stride);
}
static void dequant_idct4x4_addblk_2x_msa(int16_t *input,
int16_t *dequant_input, uint8_t *dest,
int32_t dest_stride) {
v16u8 dest0, dest1, dest2, dest3;
- v8i16 in0, in1, in2, in3;
- v8i16 mul0, mul1, mul2, mul3, dequant_in0, dequant_in1;
- v8i16 hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3;
- v8i16 res0, res1, res2, res3;
+ v8i16 in0, in1, in2, in3, mul0, mul1, mul2, mul3, dequant_in0, dequant_in1;
+ v8i16 hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3, res0, res1, res2, res3;
v4i32 hz0l, hz1l, hz2l, hz3l, hz0r, hz1r, hz2r, hz3r;
v4i32 vt0l, vt1l, vt2l, vt3l, vt0r, vt1r, vt2r, vt3r;
v16i8 zero = { 0 };
@@ -247,11 +245,8 @@ static void dequant_idct4x4_addblk_2x_msa(int16_t *input,
res2, res3);
ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3);
CLIP_SH4_0_255(res0, res1, res2, res3);
- PCKEV_B4_SH(res0, res0, res1, res1, res2, res2, res3, res3, res0, res1, res2,
- res3);
- PCKOD_D2_UB(dest0, res0, dest1, res1, dest0, dest1);
- PCKOD_D2_UB(dest2, res2, dest3, res3, dest2, dest3);
- ST_UB4(dest0, dest1, dest2, dest3, dest, dest_stride);
+ PCKEV_B2_SW(res1, res0, res3, res2, vt0l, vt1l);
+ ST8x4_UB(vt0l, vt1l, dest, dest_stride);
__asm__ __volatile__(
"sw $zero, 0(%[input]) \n\t"
@@ -276,10 +271,9 @@ static void dequant_idct4x4_addblk_2x_msa(int16_t *input,
static void dequant_idct_addconst_2x_msa(int16_t *input, int16_t *dequant_input,
uint8_t *dest, int32_t dest_stride) {
- v8i16 input_dc0, input_dc1, vec;
+ v8i16 input_dc0, input_dc1, vec, res0, res1, res2, res3;
v16u8 dest0, dest1, dest2, dest3;
v16i8 zero = { 0 };
- v8i16 res0, res1, res2, res3;
input_dc0 = __msa_fill_h(input[0] * dequant_input[0]);
input_dc1 = __msa_fill_h(input[16] * dequant_input[0]);
@@ -292,11 +286,8 @@ static void dequant_idct_addconst_2x_msa(int16_t *input, int16_t *dequant_input,
res2, res3);
ADD4(res0, vec, res1, vec, res2, vec, res3, vec, res0, res1, res2, res3);
CLIP_SH4_0_255(res0, res1, res2, res3);
- PCKEV_B4_SH(res0, res0, res1, res1, res2, res2, res3, res3, res0, res1, res2,
- res3);
- PCKOD_D2_UB(dest0, res0, dest1, res1, dest0, dest1);
- PCKOD_D2_UB(dest2, res2, dest3, res3, dest2, dest3);
- ST_UB4(dest0, dest1, dest2, dest3, dest, dest_stride);
+ PCKEV_B2_SH(res1, res0, res3, res2, res0, res1);
+ ST8x4_UB(res0, res1, dest, dest_stride);
}
void vp8_short_idct4x4llm_msa(int16_t *input, uint8_t *pred_ptr,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/mips/msa/loopfilter_filters_msa.c b/chromium/third_party/libvpx/source/libvpx/vp8/common/mips/msa/loopfilter_filters_msa.c
index f5f1790ef53..98a4fc09a35 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/mips/msa/loopfilter_filters_msa.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/mips/msa/loopfilter_filters_msa.c
@@ -24,208 +24,145 @@
mask = ((v16u8)mask <= b_limit); \
}
-#define VP8_LPF_FILTER4_4W(p1_in_out, p0_in_out, q0_in_out, q1_in_out, \
- mask_in, hev_in) \
- { \
- v16i8 p1_m, p0_m, q0_m, q1_m, q0_sub_p0, filt_sign; \
- v16i8 filt, filt1, filt2, cnst4b, cnst3b; \
- v8i16 q0_sub_p0_r, q0_sub_p0_l, filt_l, filt_r, cnst3h; \
- \
- p1_m = (v16i8)__msa_xori_b(p1_in_out, 0x80); \
- p0_m = (v16i8)__msa_xori_b(p0_in_out, 0x80); \
- q0_m = (v16i8)__msa_xori_b(q0_in_out, 0x80); \
- q1_m = (v16i8)__msa_xori_b(q1_in_out, 0x80); \
- \
- filt = __msa_subs_s_b(p1_m, q1_m); \
- \
- filt = filt & (v16i8)hev_in; \
- \
- q0_sub_p0 = q0_m - p0_m; \
- filt_sign = __msa_clti_s_b(filt, 0); \
- \
- cnst3h = __msa_ldi_h(3); \
- q0_sub_p0_r = (v8i16)__msa_ilvr_b(q0_sub_p0, q0_sub_p0); \
- q0_sub_p0_r = __msa_dotp_s_h((v16i8)q0_sub_p0_r, (v16i8)cnst3h); \
- filt_r = (v8i16)__msa_ilvr_b(filt_sign, filt); \
- filt_r += q0_sub_p0_r; \
- filt_r = __msa_sat_s_h(filt_r, 7); \
- \
- q0_sub_p0_l = (v8i16)__msa_ilvl_b(q0_sub_p0, q0_sub_p0); \
- q0_sub_p0_l = __msa_dotp_s_h((v16i8)q0_sub_p0_l, (v16i8)cnst3h); \
- filt_l = (v8i16)__msa_ilvl_b(filt_sign, filt); \
- filt_l += q0_sub_p0_l; \
- filt_l = __msa_sat_s_h(filt_l, 7); \
- \
- filt = __msa_pckev_b((v16i8)filt_l, (v16i8)filt_r); \
- filt = filt & (v16i8)mask_in; \
- \
- cnst4b = __msa_ldi_b(4); \
- filt1 = __msa_adds_s_b(filt, cnst4b); \
- filt1 >>= 3; \
- \
- cnst3b = __msa_ldi_b(3); \
- filt2 = __msa_adds_s_b(filt, cnst3b); \
- filt2 >>= 3; \
- \
- q0_m = __msa_subs_s_b(q0_m, filt1); \
- q0_in_out = __msa_xori_b((v16u8)q0_m, 0x80); \
- p0_m = __msa_adds_s_b(p0_m, filt2); \
- p0_in_out = __msa_xori_b((v16u8)p0_m, 0x80); \
- \
- filt = __msa_srari_b(filt1, 1); \
- hev_in = __msa_xori_b((v16u8)hev_in, 0xff); \
- filt = filt & (v16i8)hev_in; \
- \
- q1_m = __msa_subs_s_b(q1_m, filt); \
- q1_in_out = __msa_xori_b((v16u8)q1_m, 0x80); \
- p1_m = __msa_adds_s_b(p1_m, filt); \
- p1_in_out = __msa_xori_b((v16u8)p1_m, 0x80); \
+#define VP8_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev) \
+ { \
+ v16i8 p1_m, p0_m, q0_m, q1_m, filt, q0_sub_p0, t1, t2; \
+ const v16i8 cnst4b = __msa_ldi_b(4); \
+ const v16i8 cnst3b = __msa_ldi_b(3); \
+ \
+ p1_m = (v16i8)__msa_xori_b(p1, 0x80); \
+ p0_m = (v16i8)__msa_xori_b(p0, 0x80); \
+ q0_m = (v16i8)__msa_xori_b(q0, 0x80); \
+ q1_m = (v16i8)__msa_xori_b(q1, 0x80); \
+ \
+ filt = __msa_subs_s_b(p1_m, q1_m); \
+ filt &= hev; \
+ q0_sub_p0 = __msa_subs_s_b(q0_m, p0_m); \
+ filt = __msa_adds_s_b(filt, q0_sub_p0); \
+ filt = __msa_adds_s_b(filt, q0_sub_p0); \
+ filt = __msa_adds_s_b(filt, q0_sub_p0); \
+ filt &= mask; \
+ t1 = __msa_adds_s_b(filt, cnst4b); \
+ t1 >>= cnst3b; \
+ t2 = __msa_adds_s_b(filt, cnst3b); \
+ t2 >>= cnst3b; \
+ q0_m = __msa_subs_s_b(q0_m, t1); \
+ q0 = __msa_xori_b((v16u8)q0_m, 0x80); \
+ p0_m = __msa_adds_s_b(p0_m, t2); \
+ p0 = __msa_xori_b((v16u8)p0_m, 0x80); \
+ filt = __msa_srari_b(t1, 1); \
+ hev = __msa_xori_b(hev, 0xff); \
+ filt &= hev; \
+ q1_m = __msa_subs_s_b(q1_m, filt); \
+ q1 = __msa_xori_b((v16u8)q1_m, 0x80); \
+ p1_m = __msa_adds_s_b(p1_m, filt); \
+ p1 = __msa_xori_b((v16u8)p1_m, 0x80); \
}
-#define VP8_SIMPLE_FILT(p1_in, p0_in, q0_in, q1_in, mask) \
- { \
- v16i8 p1_m, p0_m, q0_m, q1_m, q0_sub_p0, q0_sub_p0_sign; \
- v16i8 filt, filt1, filt2, cnst4b, cnst3b, filt_sign; \
- v8i16 q0_sub_p0_r, q0_sub_p0_l, filt_l, filt_r, cnst3h; \
- \
- p1_m = (v16i8)__msa_xori_b(p1_in, 0x80); \
- p0_m = (v16i8)__msa_xori_b(p0_in, 0x80); \
- q0_m = (v16i8)__msa_xori_b(q0_in, 0x80); \
- q1_m = (v16i8)__msa_xori_b(q1_in, 0x80); \
- \
- filt = __msa_subs_s_b(p1_m, q1_m); \
- \
- q0_sub_p0 = q0_m - p0_m; \
- filt_sign = __msa_clti_s_b(filt, 0); \
- \
- cnst3h = __msa_ldi_h(3); \
- q0_sub_p0_sign = __msa_clti_s_b(q0_sub_p0, 0); \
- q0_sub_p0_r = (v8i16)__msa_ilvr_b(q0_sub_p0_sign, q0_sub_p0); \
- q0_sub_p0_r *= cnst3h; \
- filt_r = (v8i16)__msa_ilvr_b(filt_sign, filt); \
- filt_r += q0_sub_p0_r; \
- filt_r = __msa_sat_s_h(filt_r, 7); \
- \
- q0_sub_p0_l = (v8i16)__msa_ilvl_b(q0_sub_p0_sign, q0_sub_p0); \
- q0_sub_p0_l *= cnst3h; \
- filt_l = (v8i16)__msa_ilvl_b(filt_sign, filt); \
- filt_l += q0_sub_p0_l; \
- filt_l = __msa_sat_s_h(filt_l, 7); \
- \
- filt = __msa_pckev_b((v16i8)filt_l, (v16i8)filt_r); \
- filt = filt & (v16i8)(mask); \
- \
- cnst4b = __msa_ldi_b(4); \
- filt1 = __msa_adds_s_b(filt, cnst4b); \
- filt1 >>= 3; \
- \
- cnst3b = __msa_ldi_b(3); \
- filt2 = __msa_adds_s_b(filt, cnst3b); \
- filt2 >>= 3; \
- \
- q0_m = __msa_subs_s_b(q0_m, filt1); \
- p0_m = __msa_adds_s_b(p0_m, filt2); \
- q0_in = __msa_xori_b((v16u8)q0_m, 0x80); \
- p0_in = __msa_xori_b((v16u8)p0_m, 0x80); \
+#define VP8_SIMPLE_FILT(p1_in, p0_in, q0_in, q1_in, mask) \
+ { \
+ v16i8 p1_m, p0_m, q0_m, q1_m, filt, filt1, filt2; \
+ v16i8 q0_sub_p0; \
+ const v16i8 cnst4b = __msa_ldi_b(4); \
+ const v16i8 cnst3b = __msa_ldi_b(3); \
+ \
+ p1_m = (v16i8)__msa_xori_b(p1_in, 0x80); \
+ p0_m = (v16i8)__msa_xori_b(p0_in, 0x80); \
+ q0_m = (v16i8)__msa_xori_b(q0_in, 0x80); \
+ q1_m = (v16i8)__msa_xori_b(q1_in, 0x80); \
+ \
+ filt = __msa_subs_s_b(p1_m, q1_m); \
+ q0_sub_p0 = __msa_subs_s_b(q0_m, p0_m); \
+ filt = __msa_adds_s_b(filt, q0_sub_p0); \
+ filt = __msa_adds_s_b(filt, q0_sub_p0); \
+ filt = __msa_adds_s_b(filt, q0_sub_p0); \
+ filt &= mask; \
+ filt1 = __msa_adds_s_b(filt, cnst4b); \
+ filt1 >>= cnst3b; \
+ filt2 = __msa_adds_s_b(filt, cnst3b); \
+ filt2 >>= cnst3b; \
+ q0_m = __msa_subs_s_b(q0_m, filt1); \
+ p0_m = __msa_adds_s_b(p0_m, filt2); \
+ q0_in = __msa_xori_b((v16u8)q0_m, 0x80); \
+ p0_in = __msa_xori_b((v16u8)p0_m, 0x80); \
}
-#define VP8_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev) \
- { \
- v16i8 p2_m, p1_m, p0_m, q2_m, q1_m, q0_m; \
- v16i8 filt, q0_sub_p0, cnst4b, cnst3b; \
- v16i8 u, filt1, filt2, filt_sign, q0_sub_p0_sign; \
- v8i16 q0_sub_p0_r, q0_sub_p0_l, filt_r, u_r, u_l, filt_l; \
- v8i16 cnst3h, cnst27h, cnst18h, cnst63h; \
- \
- cnst3h = __msa_ldi_h(3); \
- \
- p2_m = (v16i8)__msa_xori_b(p2, 0x80); \
- p1_m = (v16i8)__msa_xori_b(p1, 0x80); \
- p0_m = (v16i8)__msa_xori_b(p0, 0x80); \
- q0_m = (v16i8)__msa_xori_b(q0, 0x80); \
- q1_m = (v16i8)__msa_xori_b(q1, 0x80); \
- q2_m = (v16i8)__msa_xori_b(q2, 0x80); \
- \
- filt = __msa_subs_s_b(p1_m, q1_m); \
- q0_sub_p0 = q0_m - p0_m; \
- q0_sub_p0_sign = __msa_clti_s_b(q0_sub_p0, 0); \
- filt_sign = __msa_clti_s_b(filt, 0); \
- \
- q0_sub_p0_r = (v8i16)__msa_ilvr_b(q0_sub_p0_sign, q0_sub_p0); \
- q0_sub_p0_r *= cnst3h; \
- filt_r = (v8i16)__msa_ilvr_b(filt_sign, filt); \
- filt_r = filt_r + q0_sub_p0_r; \
- filt_r = __msa_sat_s_h(filt_r, 7); \
- \
- q0_sub_p0_l = (v8i16)__msa_ilvl_b(q0_sub_p0_sign, q0_sub_p0); \
- q0_sub_p0_l *= cnst3h; \
- filt_l = (v8i16)__msa_ilvl_b(filt_sign, filt); \
- filt_l = filt_l + q0_sub_p0_l; \
- filt_l = __msa_sat_s_h(filt_l, 7); \
- \
- filt = __msa_pckev_b((v16i8)filt_l, (v16i8)filt_r); \
- filt = filt & (v16i8)mask; \
- filt2 = filt & (v16i8)hev; \
- \
- hev = __msa_xori_b(hev, 0xff); \
- filt = filt & (v16i8)hev; \
- cnst4b = __msa_ldi_b(4); \
- filt1 = __msa_adds_s_b(filt2, cnst4b); \
- filt1 >>= 3; \
- cnst3b = __msa_ldi_b(3); \
- filt2 = __msa_adds_s_b(filt2, cnst3b); \
- filt2 >>= 3; \
- q0_m = __msa_subs_s_b(q0_m, filt1); \
- p0_m = __msa_adds_s_b(p0_m, filt2); \
- \
- filt_sign = __msa_clti_s_b(filt, 0); \
- ILVRL_B2_SH(filt_sign, filt, filt_r, filt_l); \
- \
- cnst27h = __msa_ldi_h(27); \
- cnst63h = __msa_ldi_h(63); \
- \
- u_r = filt_r * cnst27h; \
- u_r += cnst63h; \
- u_r >>= 7; \
- u_r = __msa_sat_s_h(u_r, 7); \
- u_l = filt_l * cnst27h; \
- u_l += cnst63h; \
- u_l >>= 7; \
- u_l = __msa_sat_s_h(u_l, 7); \
- u = __msa_pckev_b((v16i8)u_l, (v16i8)u_r); \
- q0_m = __msa_subs_s_b(q0_m, u); \
- q0 = __msa_xori_b((v16u8)q0_m, 0x80); \
- p0_m = __msa_adds_s_b(p0_m, u); \
- p0 = __msa_xori_b((v16u8)p0_m, 0x80); \
- cnst18h = __msa_ldi_h(18); \
- u_r = filt_r * cnst18h; \
- u_r += cnst63h; \
- u_r >>= 7; \
- u_r = __msa_sat_s_h(u_r, 7); \
- \
- u_l = filt_l * cnst18h; \
- u_l += cnst63h; \
- u_l >>= 7; \
- u_l = __msa_sat_s_h(u_l, 7); \
- u = __msa_pckev_b((v16i8)u_l, (v16i8)u_r); \
- q1_m = __msa_subs_s_b(q1_m, u); \
- q1 = __msa_xori_b((v16u8)q1_m, 0x80); \
- p1_m = __msa_adds_s_b(p1_m, u); \
- p1 = __msa_xori_b((v16u8)p1_m, 0x80); \
- u_r = filt_r << 3; \
- u_r += filt_r + cnst63h; \
- u_r >>= 7; \
- u_r = __msa_sat_s_h(u_r, 7); \
- \
- u_l = filt_l << 3; \
- u_l += filt_l + cnst63h; \
- u_l >>= 7; \
- u_l = __msa_sat_s_h(u_l, 7); \
- u = __msa_pckev_b((v16i8)u_l, (v16i8)u_r); \
- q2_m = __msa_subs_s_b(q2_m, u); \
- q2 = __msa_xori_b((v16u8)q2_m, 0x80); \
- p2_m = __msa_adds_s_b(p2_m, u); \
- p2 = __msa_xori_b((v16u8)p2_m, 0x80); \
+#define VP8_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev) \
+ { \
+ v16i8 p2_m, p1_m, p0_m, q2_m, q1_m, q0_m; \
+ v16i8 u, filt, t1, t2, filt_sign, q0_sub_p0; \
+ v8i16 filt_r, filt_l, u_r, u_l; \
+ v8i16 temp0, temp1, temp2, temp3; \
+ const v16i8 cnst4b = __msa_ldi_b(4); \
+ const v16i8 cnst3b = __msa_ldi_b(3); \
+ const v8i16 cnst9h = __msa_ldi_h(9); \
+ const v8i16 cnst63h = __msa_ldi_h(63); \
+ \
+ p2_m = (v16i8)__msa_xori_b(p2, 0x80); \
+ p1_m = (v16i8)__msa_xori_b(p1, 0x80); \
+ p0_m = (v16i8)__msa_xori_b(p0, 0x80); \
+ q0_m = (v16i8)__msa_xori_b(q0, 0x80); \
+ q1_m = (v16i8)__msa_xori_b(q1, 0x80); \
+ q2_m = (v16i8)__msa_xori_b(q2, 0x80); \
+ \
+ filt = __msa_subs_s_b(p1_m, q1_m); \
+ q0_sub_p0 = __msa_subs_s_b(q0_m, p0_m); \
+ filt = __msa_adds_s_b(filt, q0_sub_p0); \
+ filt = __msa_adds_s_b(filt, q0_sub_p0); \
+ filt = __msa_adds_s_b(filt, q0_sub_p0); \
+ filt &= mask; \
+ \
+ t2 = filt & hev; \
+ hev = __msa_xori_b(hev, 0xff); \
+ filt &= hev; \
+ t1 = __msa_adds_s_b(t2, cnst4b); \
+ t1 >>= cnst3b; \
+ t2 = __msa_adds_s_b(t2, cnst3b); \
+ t2 >>= cnst3b; \
+ q0_m = __msa_subs_s_b(q0_m, t1); \
+ p0_m = __msa_adds_s_b(p0_m, t2); \
+ filt_sign = __msa_clti_s_b(filt, 0); \
+ ILVRL_B2_SH(filt_sign, filt, filt_r, filt_l); \
+ temp0 = filt_r * cnst9h; \
+ temp1 = temp0 + cnst63h; \
+ temp2 = filt_l * cnst9h; \
+ temp3 = temp2 + cnst63h; \
+ \
+ u_r = temp1 >> 7; \
+ u_r = __msa_sat_s_h(u_r, 7); \
+ u_l = temp3 >> 7; \
+ u_l = __msa_sat_s_h(u_l, 7); \
+ u = __msa_pckev_b((v16i8)u_l, (v16i8)u_r); \
+ q2_m = __msa_subs_s_b(q2_m, u); \
+ p2_m = __msa_adds_s_b(p2_m, u); \
+ q2 = __msa_xori_b((v16u8)q2_m, 0x80); \
+ p2 = __msa_xori_b((v16u8)p2_m, 0x80); \
+ \
+ temp1 += temp0; \
+ temp3 += temp2; \
+ \
+ u_r = temp1 >> 7; \
+ u_r = __msa_sat_s_h(u_r, 7); \
+ u_l = temp3 >> 7; \
+ u_l = __msa_sat_s_h(u_l, 7); \
+ u = __msa_pckev_b((v16i8)u_l, (v16i8)u_r); \
+ q1_m = __msa_subs_s_b(q1_m, u); \
+ p1_m = __msa_adds_s_b(p1_m, u); \
+ q1 = __msa_xori_b((v16u8)q1_m, 0x80); \
+ p1 = __msa_xori_b((v16u8)p1_m, 0x80); \
+ \
+ temp1 += temp0; \
+ temp3 += temp2; \
+ \
+ u_r = temp1 >> 7; \
+ u_r = __msa_sat_s_h(u_r, 7); \
+ u_l = temp3 >> 7; \
+ u_l = __msa_sat_s_h(u_l, 7); \
+ u = __msa_pckev_b((v16i8)u_l, (v16i8)u_r); \
+ q0_m = __msa_subs_s_b(q0_m, u); \
+ p0_m = __msa_adds_s_b(p0_m, u); \
+ q0 = __msa_xori_b((v16u8)q0_m, 0x80); \
+ p0 = __msa_xori_b((v16u8)p0_m, 0x80); \
}
#define LPF_MASK_HEV(p3_in, p2_in, p1_in, p0_in, q0_in, q1_in, q2_in, q3_in, \
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/mips/msa/vp8_macros_msa.h b/chromium/third_party/libvpx/source/libvpx/vp8/common/mips/msa/vp8_macros_msa.h
index 65905f6c027..6bec3adec39 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/mips/msa/vp8_macros_msa.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/mips/msa/vp8_macros_msa.h
@@ -1221,6 +1221,8 @@
}
#define PCKEV_B2_SB(...) PCKEV_B2(v16i8, __VA_ARGS__)
#define PCKEV_B2_UB(...) PCKEV_B2(v16u8, __VA_ARGS__)
+#define PCKEV_B2_SH(...) PCKEV_B2(v8i16, __VA_ARGS__)
+#define PCKEV_B2_SW(...) PCKEV_B2(v4i32, __VA_ARGS__)
#define PCKEV_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \
out2, out3) \
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/common/onyx.h b/chromium/third_party/libvpx/source/libvpx/vp8/common/onyx.h
index 43e3c29b509..72fba2ec56b 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/common/onyx.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/common/onyx.h
@@ -110,6 +110,8 @@ typedef struct {
int Sharpness;
int cpu_used;
unsigned int rc_max_intra_bitrate_pct;
+ /* percent of rate boost for golden frame in CBR mode. */
+ unsigned int gf_cbr_boost_pct;
unsigned int screen_content_mode;
/* mode ->
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_if.c b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_if.c
index 87560f28b1e..c5389594553 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_if.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_if.c
@@ -1467,6 +1467,12 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) {
cpi->baseline_gf_interval =
cpi->oxcf.alt_freq ? cpi->oxcf.alt_freq : DEFAULT_GF_INTERVAL;
+ // GF behavior for 1 pass CBR, used when error_resilience is off.
+ if (!cpi->oxcf.error_resilient_mode &&
+ cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER &&
+ cpi->oxcf.Mode == MODE_REALTIME)
+ cpi->baseline_gf_interval = cpi->gf_interval_onepass_cbr;
+
#if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
cpi->oxcf.token_partitions = 3;
#endif
@@ -1766,9 +1772,13 @@ struct VP8_COMP *vp8_create_compressor(VP8_CONFIG *oxcf) {
cpi->mse_source_denoised = 0;
/* Should we use the cyclic refresh method.
- * Currently this is tied to error resilliant mode
+ * Currently there is no external control for this.
+ * Enable it for error_resilient_mode, or for 1 pass CBR mode.
*/
- cpi->cyclic_refresh_mode_enabled = cpi->oxcf.error_resilient_mode;
+ cpi->cyclic_refresh_mode_enabled =
+ (cpi->oxcf.error_resilient_mode ||
+ (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER &&
+ cpi->oxcf.Mode <= 2));
cpi->cyclic_refresh_mode_max_mbs_perframe =
(cpi->common.mb_rows * cpi->common.mb_cols) / 7;
if (cpi->oxcf.number_of_layers == 1) {
@@ -1781,6 +1791,23 @@ struct VP8_COMP *vp8_create_compressor(VP8_CONFIG *oxcf) {
cpi->cyclic_refresh_mode_index = 0;
cpi->cyclic_refresh_q = 32;
+ // GF behavior for 1 pass CBR, used when error_resilience is off.
+ cpi->gf_update_onepass_cbr = 0;
+ cpi->gf_noboost_onepass_cbr = 0;
+ if (!cpi->oxcf.error_resilient_mode &&
+ cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER && cpi->oxcf.Mode <= 2) {
+ cpi->gf_update_onepass_cbr = 1;
+ cpi->gf_noboost_onepass_cbr = 1;
+ cpi->gf_interval_onepass_cbr =
+ cpi->cyclic_refresh_mode_max_mbs_perframe > 0
+ ? (2 * (cpi->common.mb_rows * cpi->common.mb_cols) /
+ cpi->cyclic_refresh_mode_max_mbs_perframe)
+ : 10;
+ cpi->gf_interval_onepass_cbr =
+ VPXMIN(40, VPXMAX(6, cpi->gf_interval_onepass_cbr));
+ cpi->baseline_gf_interval = cpi->gf_interval_onepass_cbr;
+ }
+
if (cpi->cyclic_refresh_mode_enabled) {
CHECK_MEM_ERROR(cpi->cyclic_refresh_map,
vpx_calloc((cpi->common.mb_rows * cpi->common.mb_cols), 1));
@@ -3925,7 +3952,6 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size,
#else
/* transform / motion compensation build reconstruction frame */
vp8_encode_frame(cpi);
-
if (cpi->oxcf.screen_content_mode == 2) {
if (vp8_drop_encodedframe_overshoot(cpi, Q)) return;
}
@@ -4203,6 +4229,20 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size,
}
} while (Loop == 1);
+#if defined(DROP_UNCODED_FRAMES)
+ /* if there are no coded macroblocks at all drop this frame */
+ if (cpi->common.MBs == cpi->mb.skip_true_count &&
+ (cpi->drop_frame_count & 7) != 7 && cm->frame_type != KEY_FRAME) {
+ cpi->common.current_video_frame++;
+ cpi->frames_since_key++;
+ cpi->drop_frame_count++;
+ // We advance the temporal pattern for dropped frames.
+ cpi->temporal_pattern_counter++;
+ return;
+ }
+ cpi->drop_frame_count = 0;
+#endif
+
#if 0
/* Experimental code for lagged and one pass
* Update stats used for one pass GF selection
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_int.h b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_int.h
index 59ad5773a64..bfcc6457c19 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_int.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/onyx_int.h
@@ -413,6 +413,9 @@ typedef struct VP8_COMP {
int drop_frames_allowed; /* Are we permitted to drop frames? */
int drop_frame; /* Drop this frame? */
+#if defined(DROP_UNCODED_FRAMES)
+ int drop_frame_count;
+#endif
vp8_prob frame_coef_probs[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS]
[ENTROPY_NODES];
@@ -501,6 +504,11 @@ typedef struct VP8_COMP {
int force_maxqp;
+ // GF update for 1 pass cbr.
+ int gf_update_onepass_cbr;
+ int gf_interval_onepass_cbr;
+ int gf_noboost_onepass_cbr;
+
#if CONFIG_MULTITHREAD
/* multithread data */
int *mt_current_mb_col;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ratectrl.c b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ratectrl.c
index 4d6afc19b35..e89247ae4ae 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ratectrl.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/encoder/ratectrl.c
@@ -885,61 +885,61 @@ static void calc_pframe_target_size(VP8_COMP *cpi) {
/* Adjust target frame size for Golden Frames: */
if (cpi->oxcf.error_resilient_mode == 0 &&
(cpi->frames_till_gf_update_due == 0) && !cpi->drop_frame) {
- int Q =
- (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q;
-
- int gf_frame_useage = 0; /* Golden frame useage since last GF */
- int tot_mbs = cpi->recent_ref_frame_usage[INTRA_FRAME] +
- cpi->recent_ref_frame_usage[LAST_FRAME] +
- cpi->recent_ref_frame_usage[GOLDEN_FRAME] +
- cpi->recent_ref_frame_usage[ALTREF_FRAME];
-
- int pct_gf_active = (100 * cpi->gf_active_count) /
- (cpi->common.mb_rows * cpi->common.mb_cols);
-
- if (tot_mbs) {
- gf_frame_useage = (cpi->recent_ref_frame_usage[GOLDEN_FRAME] +
- cpi->recent_ref_frame_usage[ALTREF_FRAME]) *
- 100 / tot_mbs;
- }
+ if (!cpi->gf_update_onepass_cbr) {
+ int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME]
+ : cpi->oxcf.fixed_q;
+
+ int gf_frame_useage = 0; /* Golden frame useage since last GF */
+ int tot_mbs = cpi->recent_ref_frame_usage[INTRA_FRAME] +
+ cpi->recent_ref_frame_usage[LAST_FRAME] +
+ cpi->recent_ref_frame_usage[GOLDEN_FRAME] +
+ cpi->recent_ref_frame_usage[ALTREF_FRAME];
+
+ int pct_gf_active = (100 * cpi->gf_active_count) /
+ (cpi->common.mb_rows * cpi->common.mb_cols);
+
+ if (tot_mbs) {
+ gf_frame_useage = (cpi->recent_ref_frame_usage[GOLDEN_FRAME] +
+ cpi->recent_ref_frame_usage[ALTREF_FRAME]) *
+ 100 / tot_mbs;
+ }
- if (pct_gf_active > gf_frame_useage) gf_frame_useage = pct_gf_active;
+ if (pct_gf_active > gf_frame_useage) gf_frame_useage = pct_gf_active;
- /* Is a fixed manual GF frequency being used */
- if (cpi->auto_gold) {
- /* For one pass throw a GF if recent frame intra useage is
- * low or the GF useage is high
- */
- if ((cpi->pass == 0) &&
- (cpi->this_frame_percent_intra < 15 || gf_frame_useage >= 5)) {
- cpi->common.refresh_golden_frame = 1;
+ /* Is a fixed manual GF frequency being used */
+ if (cpi->auto_gold) {
+ /* For one pass throw a GF if recent frame intra useage is
+ * low or the GF useage is high
+ */
+ if ((cpi->pass == 0) &&
+ (cpi->this_frame_percent_intra < 15 || gf_frame_useage >= 5)) {
+ cpi->common.refresh_golden_frame = 1;
- /* Two pass GF descision */
- } else if (cpi->pass == 2) {
- cpi->common.refresh_golden_frame = 1;
+ /* Two pass GF descision */
+ } else if (cpi->pass == 2) {
+ cpi->common.refresh_golden_frame = 1;
+ }
}
- }
#if 0
- /* Debug stats */
- if (0)
- {
- FILE *f;
+ /* Debug stats */
+ if (0) {
+ FILE *f;
- f = fopen("gf_useaget.stt", "a");
- fprintf(f, " %8ld %10ld %10ld %10ld %10ld\n",
- cpi->common.current_video_frame, cpi->gfu_boost, GFQ_ADJUSTMENT, cpi->gfu_boost, gf_frame_useage);
- fclose(f);
- }
+ f = fopen("gf_useaget.stt", "a");
+ fprintf(f, " %8ld %10ld %10ld %10ld %10ld\n",
+ cpi->common.current_video_frame, cpi->gfu_boost,
+ GFQ_ADJUSTMENT, cpi->gfu_boost, gf_frame_useage);
+ fclose(f);
+ }
#endif
- if (cpi->common.refresh_golden_frame == 1) {
+ if (cpi->common.refresh_golden_frame == 1) {
#if 0
- if (0)
- {
+ if (0) {
FILE *f;
f = fopen("GFexit.stt", "a");
@@ -949,61 +949,76 @@ static void calc_pframe_target_size(VP8_COMP *cpi) {
#endif
- if (cpi->auto_adjust_gold_quantizer) {
- calc_gf_params(cpi);
- }
-
- /* If we are using alternate ref instead of gf then do not apply the
- * boost It will instead be applied to the altref update Jims
- * modified boost
- */
- if (!cpi->source_alt_ref_active) {
- if (cpi->oxcf.fixed_q < 0) {
- if (cpi->pass == 2) {
- /* The spend on the GF is defined in the two pass
- * code for two pass encodes
- */
- cpi->this_frame_target = cpi->per_frame_bandwidth;
- } else {
- int Boost = cpi->last_boost;
- int frames_in_section = cpi->frames_till_gf_update_due + 1;
- int allocation_chunks = (frames_in_section * 100) + (Boost - 100);
- int bits_in_section = cpi->inter_frame_target * frames_in_section;
-
- /* Normalize Altboost and allocations chunck down to
- * prevent overflow
- */
- while (Boost > 1000) {
- Boost /= 2;
- allocation_chunks /= 2;
- }
+ if (cpi->auto_adjust_gold_quantizer) {
+ calc_gf_params(cpi);
+ }
- /* Avoid loss of precision but avoid overflow */
- if ((bits_in_section >> 7) > allocation_chunks) {
- cpi->this_frame_target =
- Boost * (bits_in_section / allocation_chunks);
+ /* If we are using alternate ref instead of gf then do not apply the
+ * boost It will instead be applied to the altref update Jims
+ * modified boost
+ */
+ if (!cpi->source_alt_ref_active) {
+ if (cpi->oxcf.fixed_q < 0) {
+ if (cpi->pass == 2) {
+ /* The spend on the GF is defined in the two pass
+ * code for two pass encodes
+ */
+ cpi->this_frame_target = cpi->per_frame_bandwidth;
} else {
- cpi->this_frame_target =
- (Boost * bits_in_section) / allocation_chunks;
+ int Boost = cpi->last_boost;
+ int frames_in_section = cpi->frames_till_gf_update_due + 1;
+ int allocation_chunks = (frames_in_section * 100) + (Boost - 100);
+ int bits_in_section = cpi->inter_frame_target * frames_in_section;
+
+ /* Normalize Altboost and allocations chunck down to
+ * prevent overflow
+ */
+ while (Boost > 1000) {
+ Boost /= 2;
+ allocation_chunks /= 2;
+ }
+
+ /* Avoid loss of precision but avoid overflow */
+ if ((bits_in_section >> 7) > allocation_chunks) {
+ cpi->this_frame_target =
+ Boost * (bits_in_section / allocation_chunks);
+ } else {
+ cpi->this_frame_target =
+ (Boost * bits_in_section) / allocation_chunks;
+ }
}
+ } else {
+ cpi->this_frame_target =
+ (estimate_bits_at_q(1, Q, cpi->common.MBs, 1.0) *
+ cpi->last_boost) /
+ 100;
}
} else {
- cpi->this_frame_target =
- (estimate_bits_at_q(1, Q, cpi->common.MBs, 1.0) *
- cpi->last_boost) /
- 100;
+ /* If there is an active ARF at this location use the minimum
+ * bits on this frame even if it is a contructed arf.
+ * The active maximum quantizer insures that an appropriate
+ * number of bits will be spent if needed for contstructed ARFs.
+ */
+ cpi->this_frame_target = 0;
}
+ cpi->current_gf_interval = cpi->frames_till_gf_update_due;
}
- /* If there is an active ARF at this location use the minimum
- * bits on this frame even if it is a contructed arf.
- * The active maximum quantizer insures that an appropriate
- * number of bits will be spent if needed for contstructed ARFs.
- */
- else {
- cpi->this_frame_target = 0;
+ } else {
+ // Special case for 1 pass CBR: fixed gf period.
+ // TODO(marpan): Adjust this boost/interval logic.
+ // If gf_cbr_boost_pct is small (below threshold) set the flag
+ // gf_noboost_onepass_cbr = 1, which forces the gf to use the same
+ // rate correction factor as last.
+ cpi->gf_noboost_onepass_cbr = (cpi->oxcf.gf_cbr_boost_pct <= 100);
+ cpi->baseline_gf_interval = cpi->gf_interval_onepass_cbr;
+ // Skip this update if the zero_mvcount is low.
+ if (cpi->zeromv_count > (cpi->common.MBs >> 1)) {
+ cpi->common.refresh_golden_frame = 1;
+ cpi->this_frame_target =
+ (cpi->this_frame_target * (100 + cpi->oxcf.gf_cbr_boost_pct)) / 100;
}
-
+ cpi->frames_till_gf_update_due = cpi->baseline_gf_interval;
cpi->current_gf_interval = cpi->frames_till_gf_update_due;
}
}
@@ -1025,8 +1040,9 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var) {
if (cpi->common.frame_type == KEY_FRAME) {
rate_correction_factor = cpi->key_frame_rate_correction_factor;
} else {
- if (cpi->oxcf.number_of_layers == 1 && (cpi->common.refresh_alt_ref_frame ||
- cpi->common.refresh_golden_frame)) {
+ if (cpi->oxcf.number_of_layers == 1 && !cpi->gf_noboost_onepass_cbr &&
+ (cpi->common.refresh_alt_ref_frame ||
+ cpi->common.refresh_golden_frame)) {
rate_correction_factor = cpi->gf_rate_correction_factor;
} else {
rate_correction_factor = cpi->rate_correction_factor;
@@ -1102,8 +1118,9 @@ void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var) {
if (cpi->common.frame_type == KEY_FRAME) {
cpi->key_frame_rate_correction_factor = rate_correction_factor;
} else {
- if (cpi->oxcf.number_of_layers == 1 && (cpi->common.refresh_alt_ref_frame ||
- cpi->common.refresh_golden_frame)) {
+ if (cpi->oxcf.number_of_layers == 1 && !cpi->gf_noboost_onepass_cbr &&
+ (cpi->common.refresh_alt_ref_frame ||
+ cpi->common.refresh_golden_frame)) {
cpi->gf_rate_correction_factor = rate_correction_factor;
} else {
cpi->rate_correction_factor = rate_correction_factor;
@@ -1118,7 +1135,6 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame) {
cpi->active_worst_quality = cpi->worst_quality;
return cpi->worst_quality;
}
-
/* Reset Zbin OQ value */
cpi->mb.zbin_over_quant = 0;
@@ -1128,10 +1144,12 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame) {
if (cpi->common.frame_type == KEY_FRAME) {
Q = cpi->oxcf.key_q;
} else if (cpi->oxcf.number_of_layers == 1 &&
- cpi->common.refresh_alt_ref_frame) {
+ cpi->common.refresh_alt_ref_frame &&
+ !cpi->gf_noboost_onepass_cbr) {
Q = cpi->oxcf.alt_q;
} else if (cpi->oxcf.number_of_layers == 1 &&
- cpi->common.refresh_golden_frame) {
+ cpi->common.refresh_golden_frame &&
+ !cpi->gf_noboost_onepass_cbr) {
Q = cpi->oxcf.gold_q;
}
} else {
@@ -1145,7 +1163,7 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame) {
if (cpi->common.frame_type == KEY_FRAME) {
correction_factor = cpi->key_frame_rate_correction_factor;
} else {
- if (cpi->oxcf.number_of_layers == 1 &&
+ if (cpi->oxcf.number_of_layers == 1 && !cpi->gf_noboost_onepass_cbr &&
(cpi->common.refresh_alt_ref_frame ||
cpi->common.refresh_golden_frame)) {
correction_factor = cpi->gf_rate_correction_factor;
@@ -1199,6 +1217,7 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame) {
if (cpi->common.frame_type == KEY_FRAME) {
zbin_oqmax = 0;
} else if (cpi->oxcf.number_of_layers == 1 &&
+ !cpi->gf_noboost_onepass_cbr &&
(cpi->common.refresh_alt_ref_frame ||
(cpi->common.refresh_golden_frame &&
!cpi->source_alt_ref_active))) {
diff --git a/chromium/third_party/libvpx/source/libvpx/vp8/vp8_cx_iface.c b/chromium/third_party/libvpx/source/libvpx/vp8/vp8_cx_iface.c
index fac237eec02..f8475ed61da 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp8/vp8_cx_iface.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp8/vp8_cx_iface.c
@@ -40,6 +40,7 @@ struct vp8_extracfg {
vp8e_tuning tuning;
unsigned int cq_level; /* constrained quality level */
unsigned int rc_max_intra_bitrate_pct;
+ unsigned int gf_cbr_boost_pct;
unsigned int screen_content_mode;
};
@@ -65,6 +66,7 @@ static struct vp8_extracfg default_extracfg = {
0, /* tuning*/
10, /* cq_level */
0, /* rc_max_intra_bitrate_pct */
+ 0, /* gf_cbr_boost_pct */
0, /* screen_content_mode */
};
@@ -315,6 +317,7 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf,
oxcf->target_bandwidth = cfg.rc_target_bitrate;
oxcf->rc_max_intra_bitrate_pct = vp8_cfg.rc_max_intra_bitrate_pct;
+ oxcf->gf_cbr_boost_pct = vp8_cfg.gf_cbr_boost_pct;
oxcf->best_allowed_q = cfg.rc_min_quantizer;
oxcf->worst_allowed_q = cfg.rc_max_quantizer;
@@ -558,6 +561,13 @@ static vpx_codec_err_t set_rc_max_intra_bitrate_pct(vpx_codec_alg_priv_t *ctx,
return update_extracfg(ctx, &extra_cfg);
}
+static vpx_codec_err_t ctrl_set_rc_gf_cbr_boost_pct(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp8_extracfg extra_cfg = ctx->vp8_cfg;
+ extra_cfg.gf_cbr_boost_pct = CAST(VP8E_SET_GF_CBR_BOOST_PCT, args);
+ return update_extracfg(ctx, &extra_cfg);
+}
+
static vpx_codec_err_t set_screen_content_mode(vpx_codec_alg_priv_t *ctx,
va_list args) {
struct vp8_extracfg extra_cfg = ctx->vp8_cfg;
@@ -1159,6 +1169,7 @@ static vpx_codec_ctrl_fn_map_t vp8e_ctf_maps[] = {
{ VP8E_SET_CQ_LEVEL, set_cq_level },
{ VP8E_SET_MAX_INTRA_BITRATE_PCT, set_rc_max_intra_bitrate_pct },
{ VP8E_SET_SCREEN_CONTENT_MODE, set_screen_content_mode },
+ { VP8E_SET_GF_CBR_BOOST_PCT, ctrl_set_rc_gf_cbr_boost_pct },
{ -1, NULL },
};
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_frame_buffers.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_frame_buffers.c
index efcf2bf885b..a254e79d20e 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_frame_buffers.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_frame_buffers.c
@@ -52,14 +52,12 @@ int vp9_get_frame_buffer(void *cb_priv, size_t min_size,
if (i == int_fb_list->num_internal_frame_buffers) return -1;
if (int_fb_list->int_fb[i].size < min_size) {
- int_fb_list->int_fb[i].data =
- (uint8_t *)vpx_realloc(int_fb_list->int_fb[i].data, min_size);
- if (!int_fb_list->int_fb[i].data) return -1;
-
- // This memset is needed for fixing valgrind error from C loop filter
+ vpx_free(int_fb_list->int_fb[i].data);
+ // The data must be zeroed to fix a valgrind error from the C loop filter
// due to access uninitialized memory in frame border. It could be
- // removed if border is totally removed.
- memset(int_fb_list->int_fb[i].data, 0, min_size);
+ // skipped if border were totally removed.
+ int_fb_list->int_fb[i].data = (uint8_t *)vpx_calloc(1, min_size);
+ if (!int_fb_list->int_fb[i].data) return -1;
int_fb_list->int_fb[i].size = min_size;
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_idct.c b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_idct.c
index c6a39f85ca0..e3a088e2870 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_idct.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_idct.c
@@ -331,8 +331,8 @@ void vp9_highbd_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
// DC only DCT coefficient
if (eob == 1) {
vpx_highbd_idct8x8_1_add(input, dest, stride, bd);
- } else if (eob <= 10) {
- vpx_highbd_idct8x8_10_add(input, dest, stride, bd);
+ } else if (eob <= 12) {
+ vpx_highbd_idct8x8_12_add(input, dest, stride, bd);
} else {
vpx_highbd_idct8x8_64_add(input, dest, stride, bd);
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl
index fafc6598393..abef0676396 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/common/vp9_rtcd_defs.pl
@@ -137,6 +137,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
add_proto qw/void vp9_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ specialize qw/vp9_fdct8x8_quant ssse3/;
} else {
add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
specialize qw/vp9_block_error avx2 msa sse2/;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.c b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.c
index fde0b7e318c..628d1c8d2bc 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodeframe.c
@@ -1517,7 +1517,6 @@ static int tile_worker_hook(TileWorkerData *const tile_data,
return 0;
}
- tile_data->xd.error_info = &tile_data->error_info;
tile_data->xd.corrupted = 0;
do {
@@ -1529,6 +1528,8 @@ static int tile_worker_hook(TileWorkerData *const tile_data,
&tile_data->error_info, &tile_data->bit_reader,
pbi->decrypt_cb, pbi->decrypt_state);
vp9_init_macroblockd(&pbi->common, &tile_data->xd, tile_data->dqcoeff);
+ // init resets xd.error_info
+ tile_data->xd.error_info = &tile_data->error_info;
for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end;
mi_row += MI_BLOCK_SIZE) {
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodemv.c b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodemv.c
index 4372ba0371d..1a4152436a2 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodemv.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/decoder/vp9_decodemv.c
@@ -770,6 +770,10 @@ static void read_inter_block_mode_info(VP9Decoder *const pbi,
int idx, idy;
PREDICTION_MODE b_mode;
int_mv best_sub8x8[2];
+ const uint32_t invalid_mv = 0x80008000;
+ // Initialize the 2nd element as even though it won't be used meaningfully
+ // if is_compound is false, copying/clamping it may trigger a MSan warning.
+ best_sub8x8[1].as_int = invalid_mv;
for (idy = 0; idy < 2; idy += num_4x4_h) {
for (idx = 0; idx < 2; idx += num_4x4_w) {
const int j = idy * 2 + idx;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.c
index 3f1c430f98d..49aea69ebd1 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.c
@@ -80,8 +80,8 @@ static void prob_diff_update(const vpx_tree_index *tree,
vp9_cond_prob_diff_update(w, &probs[i], branch_ct[i]);
}
-static void write_selected_tx_size(const VP9_COMMON *cm, const MACROBLOCKD *xd,
- vpx_writer *w) {
+static void write_selected_tx_size(const VP9_COMMON *cm,
+ const MACROBLOCKD *const xd, vpx_writer *w) {
TX_SIZE tx_size = xd->mi[0]->tx_size;
BLOCK_SIZE bsize = xd->mi[0]->sb_type;
const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
@@ -95,7 +95,7 @@ static void write_selected_tx_size(const VP9_COMMON *cm, const MACROBLOCKD *xd,
}
}
-static int write_skip(const VP9_COMMON *cm, const MACROBLOCKD *xd,
+static int write_skip(const VP9_COMMON *cm, const MACROBLOCKD *const xd,
int segment_id, const MODE_INFO *mi, vpx_writer *w) {
if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) {
return 1;
@@ -195,7 +195,7 @@ static void write_segment_id(vpx_writer *w, const struct segmentation *seg,
}
// This function encodes the reference frame
-static void write_ref_frames(const VP9_COMMON *cm, const MACROBLOCKD *xd,
+static void write_ref_frames(const VP9_COMMON *cm, const MACROBLOCKD *const xd,
vpx_writer *w) {
const MODE_INFO *const mi = xd->mi[0];
const int is_compound = has_second_ref(mi);
@@ -230,14 +230,16 @@ static void write_ref_frames(const VP9_COMMON *cm, const MACROBLOCKD *xd,
}
}
-static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
- vpx_writer *w) {
+static void pack_inter_mode_mvs(VP9_COMP *cpi, const MACROBLOCKD *const xd,
+ const MB_MODE_INFO_EXT *const mbmi_ext,
+ vpx_writer *w,
+ unsigned int *const max_mv_magnitude,
+ int interp_filter_selected[MAX_REF_FRAMES]
+ [SWITCHABLE]) {
VP9_COMMON *const cm = &cpi->common;
const nmv_context *nmvc = &cm->fc->nmvc;
- const MACROBLOCK *const x = &cpi->td.mb;
- const MACROBLOCKD *const xd = &x->e_mbd;
const struct segmentation *const seg = &cm->seg;
- const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
+ const MODE_INFO *const mi = xd->mi[0];
const PREDICTION_MODE mode = mi->mode;
const int segment_id = mi->segment_id;
const BLOCK_SIZE bsize = mi->sb_type;
@@ -299,7 +301,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
vp9_write_token(w, vp9_switchable_interp_tree,
cm->fc->switchable_interp_prob[ctx],
&switchable_interp_encodings[mi->interp_filter]);
- ++cpi->interp_filter_selected[0][mi->interp_filter];
+ ++interp_filter_selected[0][mi->interp_filter];
} else {
assert(mi->interp_filter == cm->interp_filter);
}
@@ -317,7 +319,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
for (ref = 0; ref < 1 + is_compound; ++ref)
vp9_encode_mv(cpi, w, &mi->bmi[j].as_mv[ref].as_mv,
&mbmi_ext->ref_mvs[mi->ref_frame[ref]][0].as_mv,
- nmvc, allow_hp);
+ nmvc, allow_hp, max_mv_magnitude);
}
}
}
@@ -326,16 +328,16 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi,
for (ref = 0; ref < 1 + is_compound; ++ref)
vp9_encode_mv(cpi, w, &mi->mv[ref].as_mv,
&mbmi_ext->ref_mvs[mi->ref_frame[ref]][0].as_mv, nmvc,
- allow_hp);
+ allow_hp, max_mv_magnitude);
}
}
}
}
static void write_mb_modes_kf(const VP9_COMMON *cm, const MACROBLOCKD *xd,
- MODE_INFO **mi_8x8, vpx_writer *w) {
+ vpx_writer *w) {
const struct segmentation *const seg = &cm->seg;
- const MODE_INFO *const mi = mi_8x8[0];
+ const MODE_INFO *const mi = xd->mi[0];
const MODE_INFO *const above_mi = xd->above_mi;
const MODE_INFO *const left_mi = xd->left_mi;
const BLOCK_SIZE bsize = mi->sb_type;
@@ -366,27 +368,29 @@ static void write_mb_modes_kf(const VP9_COMMON *cm, const MACROBLOCKD *xd,
write_intra_mode(w, mi->uv_mode, vp9_kf_uv_mode_prob[mi->mode]);
}
-static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile,
- vpx_writer *w, TOKENEXTRA **tok,
- const TOKENEXTRA *const tok_end, int mi_row,
- int mi_col) {
+static void write_modes_b(VP9_COMP *cpi, MACROBLOCKD *const xd,
+ const TileInfo *const tile, vpx_writer *w,
+ TOKENEXTRA **tok, const TOKENEXTRA *const tok_end,
+ int mi_row, int mi_col,
+ unsigned int *const max_mv_magnitude,
+ int interp_filter_selected[MAX_REF_FRAMES]
+ [SWITCHABLE]) {
const VP9_COMMON *const cm = &cpi->common;
- MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
+ const MB_MODE_INFO_EXT *const mbmi_ext =
+ cpi->td.mb.mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
MODE_INFO *m;
xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col);
m = xd->mi[0];
- cpi->td.mb.mbmi_ext =
- cpi->td.mb.mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
-
set_mi_row_col(xd, tile, mi_row, num_8x8_blocks_high_lookup[m->sb_type],
mi_col, num_8x8_blocks_wide_lookup[m->sb_type], cm->mi_rows,
cm->mi_cols);
if (frame_is_intra_only(cm)) {
- write_mb_modes_kf(cm, xd, xd->mi, w);
+ write_mb_modes_kf(cm, xd, w);
} else {
- pack_inter_mode_mvs(cpi, m, w);
+ pack_inter_mode_mvs(cpi, xd, mbmi_ext, w, max_mv_magnitude,
+ interp_filter_selected);
}
assert(*tok < tok_end);
@@ -415,13 +419,14 @@ static void write_partition(const VP9_COMMON *const cm,
}
}
-static void write_modes_sb(VP9_COMP *cpi, const TileInfo *const tile,
- vpx_writer *w, TOKENEXTRA **tok,
- const TOKENEXTRA *const tok_end, int mi_row,
- int mi_col, BLOCK_SIZE bsize) {
+static void write_modes_sb(VP9_COMP *cpi, MACROBLOCKD *const xd,
+ const TileInfo *const tile, vpx_writer *w,
+ TOKENEXTRA **tok, const TOKENEXTRA *const tok_end,
+ int mi_row, int mi_col, BLOCK_SIZE bsize,
+ unsigned int *const max_mv_magnitude,
+ int interp_filter_selected[MAX_REF_FRAMES]
+ [SWITCHABLE]) {
const VP9_COMMON *const cm = &cpi->common;
- MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
-
const int bsl = b_width_log2_lookup[bsize];
const int bs = (1 << bsl) / 4;
PARTITION_TYPE partition;
@@ -436,30 +441,37 @@ static void write_modes_sb(VP9_COMP *cpi, const TileInfo *const tile,
write_partition(cm, xd, bs, mi_row, mi_col, partition, bsize, w);
subsize = get_subsize(bsize, partition);
if (subsize < BLOCK_8X8) {
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
+ write_modes_b(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col,
+ max_mv_magnitude, interp_filter_selected);
} else {
switch (partition) {
case PARTITION_NONE:
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
+ write_modes_b(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col,
+ max_mv_magnitude, interp_filter_selected);
break;
case PARTITION_HORZ:
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
+ write_modes_b(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col,
+ max_mv_magnitude, interp_filter_selected);
if (mi_row + bs < cm->mi_rows)
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row + bs, mi_col);
+ write_modes_b(cpi, xd, tile, w, tok, tok_end, mi_row + bs, mi_col,
+ max_mv_magnitude, interp_filter_selected);
break;
case PARTITION_VERT:
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
+ write_modes_b(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col,
+ max_mv_magnitude, interp_filter_selected);
if (mi_col + bs < cm->mi_cols)
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col + bs);
+ write_modes_b(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col + bs,
+ max_mv_magnitude, interp_filter_selected);
break;
case PARTITION_SPLIT:
- write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, subsize);
- write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col + bs,
- subsize);
- write_modes_sb(cpi, tile, w, tok, tok_end, mi_row + bs, mi_col,
- subsize);
- write_modes_sb(cpi, tile, w, tok, tok_end, mi_row + bs, mi_col + bs,
- subsize);
+ write_modes_sb(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col, subsize,
+ max_mv_magnitude, interp_filter_selected);
+ write_modes_sb(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col + bs,
+ subsize, max_mv_magnitude, interp_filter_selected);
+ write_modes_sb(cpi, xd, tile, w, tok, tok_end, mi_row + bs, mi_col,
+ subsize, max_mv_magnitude, interp_filter_selected);
+ write_modes_sb(cpi, xd, tile, w, tok, tok_end, mi_row + bs, mi_col + bs,
+ subsize, max_mv_magnitude, interp_filter_selected);
break;
default: assert(0);
}
@@ -471,11 +483,13 @@ static void write_modes_sb(VP9_COMP *cpi, const TileInfo *const tile,
update_partition_context(xd, mi_row, mi_col, subsize, bsize);
}
-static void write_modes(VP9_COMP *cpi, const TileInfo *const tile,
- vpx_writer *w, TOKENEXTRA **tok,
- const TOKENEXTRA *const tok_end) {
+static void write_modes(VP9_COMP *cpi, MACROBLOCKD *const xd,
+ const TileInfo *const tile, vpx_writer *w,
+ TOKENEXTRA **tok, const TOKENEXTRA *const tok_end,
+ unsigned int *const max_mv_magnitude,
+ int interp_filter_selected[MAX_REF_FRAMES]
+ [SWITCHABLE]) {
const VP9_COMMON *const cm = &cpi->common;
- MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
int mi_row, mi_col;
set_partition_probs(cm, xd);
@@ -485,7 +499,8 @@ static void write_modes(VP9_COMP *cpi, const TileInfo *const tile,
vp9_zero(xd->left_seg_context);
for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end;
mi_col += MI_BLOCK_SIZE)
- write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, BLOCK_64X64);
+ write_modes_sb(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col,
+ BLOCK_64X64, max_mv_magnitude, interp_filter_selected);
}
}
@@ -900,8 +915,128 @@ int vp9_get_refresh_mask(VP9_COMP *cpi) {
}
}
+static int encode_tile_worker(VP9_COMP *cpi, VP9BitstreamWorkerData *data) {
+ MACROBLOCKD *const xd = &data->xd;
+ vpx_start_encode(&data->bit_writer, data->dest);
+ write_modes(cpi, xd, &cpi->tile_data[data->tile_idx].tile_info,
+ &data->bit_writer, &data->tok, data->tok_end,
+ &data->max_mv_magnitude, data->interp_filter_selected);
+ assert(data->tok == data->tok_end);
+ vpx_stop_encode(&data->bit_writer);
+ return 1;
+}
+
+void vp9_bitstream_encode_tiles_buffer_dealloc(VP9_COMP *const cpi) {
+ if (cpi->vp9_bitstream_worker_data) {
+ int i;
+ for (i = 1; i < cpi->num_workers; ++i) {
+ vpx_free(cpi->vp9_bitstream_worker_data[i].dest);
+ }
+ vpx_free(cpi->vp9_bitstream_worker_data);
+ cpi->vp9_bitstream_worker_data = NULL;
+ }
+}
+
+static int encode_tiles_buffer_alloc(VP9_COMP *const cpi) {
+ int i;
+ const size_t worker_data_size =
+ cpi->num_workers * sizeof(*cpi->vp9_bitstream_worker_data);
+ cpi->vp9_bitstream_worker_data = vpx_memalign(16, worker_data_size);
+ memset(cpi->vp9_bitstream_worker_data, 0, worker_data_size);
+ if (!cpi->vp9_bitstream_worker_data) return 1;
+ for (i = 1; i < cpi->num_workers; ++i) {
+ cpi->vp9_bitstream_worker_data[i].dest_size =
+ cpi->oxcf.width * cpi->oxcf.height;
+ cpi->vp9_bitstream_worker_data[i].dest =
+ vpx_malloc(cpi->vp9_bitstream_worker_data[i].dest_size);
+ if (!cpi->vp9_bitstream_worker_data[i].dest) return 1;
+ }
+ return 0;
+}
+
+static size_t encode_tiles_mt(VP9_COMP *cpi, uint8_t *data_ptr) {
+ const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
+ VP9_COMMON *const cm = &cpi->common;
+ const int tile_cols = 1 << cm->log2_tile_cols;
+ const int num_workers = cpi->num_workers;
+ size_t total_size = 0;
+ int tile_col = 0;
+
+ if (!cpi->vp9_bitstream_worker_data ||
+ cpi->vp9_bitstream_worker_data[1].dest_size >
+ (cpi->oxcf.width * cpi->oxcf.height)) {
+ vp9_bitstream_encode_tiles_buffer_dealloc(cpi);
+ if (encode_tiles_buffer_alloc(cpi)) return 0;
+ }
+
+ while (tile_col < tile_cols) {
+ int i, j;
+ for (i = 0; i < num_workers && tile_col < tile_cols; ++i) {
+ VPxWorker *const worker = &cpi->workers[i];
+ VP9BitstreamWorkerData *const data = &cpi->vp9_bitstream_worker_data[i];
+
+ // Populate the worker data.
+ data->xd = cpi->td.mb.e_mbd;
+ data->tile_idx = tile_col;
+ data->tok = cpi->tile_tok[0][tile_col];
+ data->tok_end = cpi->tile_tok[0][tile_col] + cpi->tok_count[0][tile_col];
+ data->max_mv_magnitude = cpi->max_mv_magnitude;
+ memset(data->interp_filter_selected, 0,
+ sizeof(data->interp_filter_selected[0][0]) * SWITCHABLE);
+
+ // First thread can directly write into the output buffer.
+ if (i == 0) {
+ // If this worker happens to be for the last tile, then do not offset it
+ // by 4 for the tile size.
+ data->dest =
+ data_ptr + total_size + (tile_col == tile_cols - 1 ? 0 : 4);
+ }
+ worker->data1 = cpi;
+ worker->data2 = data;
+ worker->hook = (VPxWorkerHook)encode_tile_worker;
+ worker->had_error = 0;
+
+ if (i < num_workers - 1) {
+ winterface->launch(worker);
+ } else {
+ winterface->execute(worker);
+ }
+ ++tile_col;
+ }
+ for (j = 0; j < i; ++j) {
+ VPxWorker *const worker = &cpi->workers[j];
+ VP9BitstreamWorkerData *const data =
+ (VP9BitstreamWorkerData *)worker->data2;
+ uint32_t tile_size;
+ int k;
+
+ if (!winterface->sync(worker)) return 0;
+ tile_size = data->bit_writer.pos;
+
+ // Aggregate per-thread bitstream stats.
+ cpi->max_mv_magnitude =
+ VPXMAX(cpi->max_mv_magnitude, data->max_mv_magnitude);
+ for (k = 0; k < SWITCHABLE; ++k) {
+ cpi->interp_filter_selected[0][k] += data->interp_filter_selected[0][k];
+ }
+
+ // Prefix the size of the tile on all but the last.
+ if (tile_col != tile_cols || j < i - 1) {
+ mem_put_be32(data_ptr + total_size, tile_size);
+ total_size += 4;
+ }
+ if (j > 0) {
+ memcpy(data_ptr + total_size, data->dest, tile_size);
+ }
+ total_size += tile_size;
+ }
+ }
+ return total_size;
+}
+
static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) {
VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
vpx_writer residual_bc;
int tile_row, tile_col;
TOKENEXTRA *tok_end;
@@ -912,6 +1047,14 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) {
memset(cm->above_seg_context, 0,
sizeof(*cm->above_seg_context) * mi_cols_aligned_to_sb(cm->mi_cols));
+ // Encoding tiles in parallel is done only for realtime mode now. In other
+ // modes the speed up is insignificant and requires further testing to ensure
+ // that it does not make the overall process worse in any case.
+ if (cpi->oxcf.mode == REALTIME && cpi->num_workers > 1 && tile_rows == 1 &&
+ tile_cols > 1) {
+ return encode_tiles_mt(cpi, data_ptr);
+ }
+
for (tile_row = 0; tile_row < tile_rows; tile_row++) {
for (tile_col = 0; tile_col < tile_cols; tile_col++) {
int tile_idx = tile_row * tile_cols + tile_col;
@@ -925,8 +1068,9 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) {
else
vpx_start_encode(&residual_bc, data_ptr + total_size);
- write_modes(cpi, &cpi->tile_data[tile_idx].tile_info, &residual_bc, &tok,
- tok_end);
+ write_modes(cpi, xd, &cpi->tile_data[tile_idx].tile_info, &residual_bc,
+ &tok, tok_end, &cpi->max_mv_magnitude,
+ cpi->interp_filter_selected);
assert(tok == tok_end);
vpx_stop_encode(&residual_bc);
if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1) {
@@ -938,7 +1082,6 @@ static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) {
total_size += residual_bc.pos;
}
}
-
return total_size;
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.h
index 8c97d37f77e..044a3bbc7bc 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_bitstream.h
@@ -17,8 +17,26 @@ extern "C" {
#include "vp9/encoder/vp9_encoder.h"
+typedef struct VP9BitstreamWorkerData {
+ uint8_t *dest;
+ int dest_size;
+ TOKENEXTRA *tok;
+ TOKENEXTRA *tok_end;
+ vpx_writer bit_writer;
+ int tile_idx;
+ unsigned int max_mv_magnitude;
+ // The size of interp_filter_selected in VP9_COMP is actually
+ // MAX_REFERENCE_FRAMES x SWITCHABLE. But when encoding tiles, all we ever do
+ // is increment the very first index (index 0) for the first dimension. Hence
+ // this is sufficient.
+ int interp_filter_selected[1][SWITCHABLE];
+ DECLARE_ALIGNED(16, MACROBLOCKD, xd);
+} VP9BitstreamWorkerData;
+
int vp9_get_refresh_mask(VP9_COMP *cpi);
+void vp9_bitstream_encode_tiles_buffer_dealloc(VP9_COMP *const cpi);
+
void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size);
static INLINE int vp9_preserve_existing_gf(VP9_COMP *cpi) {
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c
index 335faca82b1..3ab05375ff7 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodeframe.c
@@ -795,7 +795,12 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
v16x16 vt2[16];
int force_split[21];
int avg_32x32;
+ int max_var_32x32 = 0;
+ int min_var_32x32 = INT_MAX;
+ int var_32x32;
int avg_16x16[4];
+ int64_t threshold_4x4avg;
+ NOISE_LEVEL noise_level = kLow;
uint8_t *s;
const uint8_t *d;
int sp;
@@ -829,6 +834,9 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
}
}
+ threshold_4x4avg =
+ (cpi->oxcf.speed < 8) ? thresholds[1] << 1 : thresholds[2] >> 1;
+
memset(x->variance_low, 0, sizeof(x->variance_low));
if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3);
@@ -846,7 +854,7 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
// that the temporal reference frame will always be of type LAST_FRAME.
// TODO(marpan): If that assumption is broken, we need to revisit this code.
MODE_INFO *mi = xd->mi[0];
- const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
+ YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
const YV12_BUFFER_CONFIG *yv12_g = NULL;
unsigned int y_sad_g, y_sad_thr;
@@ -871,9 +879,18 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
y_sad_g = UINT_MAX;
}
- vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
- &cm->frame_refs[LAST_FRAME - 1].sf);
- mi->ref_frame[0] = LAST_FRAME;
+ if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR &&
+ cpi->rc.is_src_frame_alt_ref) {
+ yv12 = get_ref_frame_buffer(cpi, ALTREF_FRAME);
+ vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
+ &cm->frame_refs[ALTREF_FRAME - 1].sf);
+ mi->ref_frame[0] = ALTREF_FRAME;
+ y_sad_g = UINT_MAX;
+ } else {
+ vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
+ &cm->frame_refs[LAST_FRAME - 1].sf);
+ mi->ref_frame[0] = LAST_FRAME;
+ }
mi->ref_frame[1] = NONE;
mi->sb_type = BLOCK_64X64;
mi->mv[0].as_int = 0;
@@ -986,7 +1003,7 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
}
if (is_key_frame || (low_res &&
vt.split[i].split[j].part_variances.none.variance >
- (thresholds[1] << 1))) {
+ threshold_4x4avg)) {
force_split[split_index] = 0;
// Go down to 4x4 down-sampling for variance.
variance4x4downsample[i2 + j] = 1;
@@ -1029,6 +1046,9 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
// (64x64) level.
if (!force_split[i + 1]) {
get_variance(&vt.split[i].part_variances.none);
+ var_32x32 = vt.split[i].part_variances.none.variance;
+ max_var_32x32 = VPXMAX(var_32x32, max_var_32x32);
+ min_var_32x32 = VPXMIN(var_32x32, min_var_32x32);
if (vt.split[i].part_variances.none.variance > thresholds[1] ||
(!is_key_frame &&
vt.split[i].part_variances.none.variance > (thresholds[1] >> 1) &&
@@ -1036,15 +1056,27 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
force_split[i + 1] = 1;
force_split[0] = 1;
}
- avg_32x32 += vt.split[i].part_variances.none.variance;
+ avg_32x32 += var_32x32;
}
}
if (!force_split[0]) {
fill_variance_tree(&vt, BLOCK_64X64);
get_variance(&vt.part_variances.none);
+ if (cpi->noise_estimate.enabled)
+ noise_level = vp9_noise_estimate_extract_level(&cpi->noise_estimate);
// If variance of this 64x64 block is above (some threshold of) the average
// variance over the sub-32x32 blocks, then force this block to split.
- if (!is_key_frame && vt.part_variances.none.variance > (5 * avg_32x32) >> 4)
+ // Only checking this for noise level >= medium for now.
+ if (!is_key_frame && noise_level >= kMedium &&
+ vt.part_variances.none.variance > (5 * avg_32x32) >> 4)
+ force_split[0] = 1;
+ // Else if the maximum 32x32 variance minus the miniumum 32x32 variance in
+ // a 64x64 block is greater than threshold and the maximum 32x32 variance is
+ // above a miniumum threshold, then force the split of a 64x64 block
+ // Only check this for low noise.
+ else if (!is_key_frame && noise_level < kMedium &&
+ (max_var_32x32 - min_var_32x32) > 3 * (thresholds[0] >> 3) &&
+ max_var_32x32 > thresholds[0] >> 1)
force_split[0] = 1;
}
@@ -1863,7 +1895,7 @@ static void update_state_rt(VP9_COMP *cpi, ThreadData *td,
}
}
- if (cm->use_prev_frame_mvs ||
+ if (cm->use_prev_frame_mvs || !cm->error_resilient_mode ||
(cpi->svc.use_base_mv && cpi->svc.number_spatial_layers > 1 &&
cpi->svc.spatial_layer_id != cpi->svc.number_spatial_layers - 1)) {
MV_REF *const frame_mvs =
@@ -3942,8 +3974,10 @@ void vp9_encode_tile(VP9_COMP *cpi, ThreadData *td, int tile_row,
int mi_row;
// Set up pointers to per thread motion search counters.
- td->mb.m_search_count_ptr = &td->rd_counts.m_search_count;
- td->mb.ex_search_count_ptr = &td->rd_counts.ex_search_count;
+ this_tile->m_search_count = 0; // Count of motion search hits.
+ this_tile->ex_search_count = 0; // Exhaustive mesh search hits.
+ td->mb.m_search_count_ptr = &this_tile->m_search_count;
+ td->mb.ex_search_count_ptr = &this_tile->ex_search_count;
for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += MI_BLOCK_SIZE) {
if (cpi->sf.use_nonrd_pick_mode)
@@ -4048,6 +4082,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
vp9_zero(x->zcoeff_blk);
if (cm->frame_type != KEY_FRAME && cpi->rc.frames_since_golden == 0 &&
+ !(cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR) &&
!cpi->use_svc)
cpi->ref_frame_flags &= (~VP9_GOLD_FLAG);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.c
index 874a8e4b981..023d087c2ce 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.c
@@ -208,7 +208,8 @@ void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vpx_writer *w,
}
void vp9_encode_mv(VP9_COMP *cpi, vpx_writer *w, const MV *mv, const MV *ref,
- const nmv_context *mvctx, int usehp) {
+ const nmv_context *mvctx, int usehp,
+ unsigned int *const max_mv_magnitude) {
const MV diff = { mv->row - ref->row, mv->col - ref->col };
const MV_JOINT_TYPE j = vp9_get_mv_joint(&diff);
usehp = usehp && use_mv_hp(ref);
@@ -223,8 +224,8 @@ void vp9_encode_mv(VP9_COMP *cpi, vpx_writer *w, const MV *mv, const MV *ref,
// If auto_mv_step_size is enabled then keep track of the largest
// motion vector component used.
if (cpi->sf.mv.auto_mv_step_size) {
- unsigned int maxv = VPXMAX(abs(mv->row), abs(mv->col)) >> 3;
- cpi->max_mv_magnitude = VPXMAX(maxv, cpi->max_mv_magnitude);
+ const unsigned int maxv = VPXMAX(abs(mv->row), abs(mv->col)) >> 3;
+ *max_mv_magnitude = VPXMAX(maxv, *max_mv_magnitude);
}
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.h
index ad77b8154f3..9fc7ab8dc45 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encodemv.h
@@ -23,7 +23,8 @@ void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vpx_writer *w,
nmv_context_counts *const counts);
void vp9_encode_mv(VP9_COMP *cpi, vpx_writer *w, const MV *mv, const MV *ref,
- const nmv_context *mvctx, int usehp);
+ const nmv_context *mvctx, int usehp,
+ unsigned int *const max_mv_magnitude);
void vp9_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
const nmv_context *mvctx, int usehp);
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c
index 12f02e7c5d9..2a58003829c 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.c
@@ -2030,7 +2030,10 @@ void vp9_remove_compressor(VP9_COMP *cpi) {
vpx_free(cpi->tile_thr_data);
vpx_free(cpi->workers);
- if (cpi->num_workers > 1) vp9_loop_filter_dealloc(&cpi->lf_row_sync);
+ if (cpi->num_workers > 1) {
+ vp9_loop_filter_dealloc(&cpi->lf_row_sync);
+ vp9_bitstream_encode_tiles_buffer_dealloc(cpi);
+ }
vp9_alt_ref_aq_destroy(cpi->alt_ref_aq);
@@ -2438,6 +2441,8 @@ static int recode_loop_test(VP9_COMP *cpi, int high_limit, int low_limit, int q,
cpi->resize_pending = 1;
return 1;
}
+ // Force recode if projected_frame_size > max_frame_bandwidth
+ if (rc->projected_frame_size >= rc->max_frame_bandwidth) return 1;
// TODO(agrange) high_limit could be greater than the scale-down threshold.
if ((rc->projected_frame_size > high_limit && q < maxq) ||
@@ -2796,7 +2801,7 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
dc_quant_devisor = 4.0;
#endif
- fprintf(f, "%10u %dx%d %10d %10d %d %d %10d %10d %10d %10d"
+ fprintf(f, "%10u %dx%d %d %d %10d %10d %10d %10d"
"%10"PRId64" %10"PRId64" %5d %5d %10"PRId64" "
"%10"PRId64" %10"PRId64" %10d "
"%7.2lf %7.2lf %7.2lf %7.2lf %7.2lf"
@@ -2805,8 +2810,6 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
"%10lf %8u %10"PRId64" %10d %10d %10d %10d %10d\n",
cpi->common.current_video_frame,
cm->width, cm->height,
- cpi->td.rd_counts.m_search_count,
- cpi->td.rd_counts.ex_search_count,
cpi->rc.source_alt_ref_pending,
cpi->rc.source_alt_ref_active,
cpi->rc.this_frame_target,
@@ -3124,7 +3127,8 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
if (cpi->oxcf.pass == 0 && cpi->oxcf.mode == REALTIME &&
cpi->oxcf.speed >= 5 && cpi->resize_state == 0 &&
(cpi->oxcf.content == VP9E_CONTENT_SCREEN ||
- cpi->oxcf.rc_mode == VPX_VBR))
+ cpi->oxcf.rc_mode == VPX_VBR) &&
+ cm->show_frame)
vp9_avg_source_sad(cpi);
// For 1 pass SVC, since only ZEROMV is allowed for upsampled reference
@@ -3214,6 +3218,13 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
vpx_clear_system_state();
}
+#define MAX_QSTEP_ADJ 4
+static int get_qstep_adj(int rate_excess, int rate_limit) {
+ int qstep =
+ rate_limit ? ((rate_excess + rate_limit / 2) / rate_limit) : INT_MAX;
+ return VPXMIN(qstep, MAX_QSTEP_ADJ);
+}
+
static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size,
uint8_t *dest) {
VP9_COMMON *const cm = &cpi->common;
@@ -3387,6 +3398,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size,
// to attempt to recode.
int last_q = q;
int retries = 0;
+ int qstep;
if (cpi->resize_pending == 1) {
// Change in frame size so go back around the recode loop.
@@ -3412,7 +3424,10 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size,
q_high = rc->worst_quality;
// Raise Qlow as to at least the current value
- q_low = q < q_high ? q + 1 : q_high;
+ qstep =
+ get_qstep_adj(rc->projected_frame_size, rc->this_frame_target);
+ q_low = VPXMIN(q + qstep, q_high);
+ // q_low = q < q_high ? q + 1 : q_high;
if (undershoot_seen || loop_at_this_size > 1) {
// Update rate_correction_factor unless
@@ -3437,7 +3452,10 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size,
overshoot_seen = 1;
} else {
// Frame is too small
- q_high = q > q_low ? q - 1 : q_low;
+ qstep =
+ get_qstep_adj(rc->this_frame_target, rc->projected_frame_size);
+ q_high = VPXMAX(q - qstep, q_low);
+ // q_high = q > q_low ? q - 1 : q_low;
if (overshoot_seen || loop_at_this_size > 1) {
vp9_rc_update_rate_correction_factors(cpi);
@@ -4477,7 +4495,8 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
cpi->svc.layer_context[cpi->svc.spatial_layer_id].has_alt_frame = 1;
#endif
- if ((oxcf->arnr_max_frames > 0) && (oxcf->arnr_strength > 0)) {
+ if ((oxcf->mode != REALTIME) && (oxcf->arnr_max_frames > 0) &&
+ (oxcf->arnr_strength > 0)) {
int bitrate = cpi->rc.avg_frame_bandwidth / 40;
int not_low_bitrate = bitrate > ALT_REF_AQ_LOW_BITRATE_BOUNDARY;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h
index 66e41492b57..0007e6395da 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_encoder.h
@@ -267,14 +267,14 @@ typedef struct TileDataEnc {
TileInfo tile_info;
int thresh_freq_fact[BLOCK_SIZES][MAX_MODES];
int mode_map[BLOCK_SIZES][MAX_MODES];
+ int m_search_count;
+ int ex_search_count;
} TileDataEnc;
typedef struct RD_COUNTS {
vp9_coeff_count coef_counts[TX_SIZES][PLANE_TYPES];
int64_t comp_pred_diff[REFERENCE_MODES];
int64_t filter_diff[SWITCHABLE_FILTER_CONTEXTS];
- int m_search_count;
- int ex_search_count;
} RD_COUNTS;
typedef struct ThreadData {
@@ -601,6 +601,7 @@ typedef struct VP9_COMP {
VPxWorker *workers;
struct EncWorkerData *tile_thr_data;
VP9LfSync lf_row_sync;
+ struct VP9BitstreamWorkerData *vp9_bitstream_worker_data;
int keep_level_stats;
Vp9LevelInfo level_info;
@@ -735,7 +736,8 @@ static INLINE int is_one_pass_cbr_svc(const struct VP9_COMP *const cpi) {
}
static INLINE int is_altref_enabled(const VP9_COMP *const cpi) {
- return cpi->oxcf.mode != REALTIME && cpi->oxcf.lag_in_frames > 0 &&
+ return !(cpi->oxcf.mode == REALTIME && cpi->oxcf.rc_mode == VPX_CBR) &&
+ cpi->oxcf.lag_in_frames > 0 &&
(cpi->oxcf.enable_auto_arf &&
(!is_two_pass_svc(cpi) ||
cpi->oxcf.ss_enable_auto_arf[cpi->svc.spatial_layer_id]));
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ethread.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ethread.c
index 7657573bbf0..f4f7c7baccd 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ethread.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ethread.c
@@ -30,10 +30,6 @@ static void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) {
for (n = 0; n < ENTROPY_TOKENS; n++)
td->rd_counts.coef_counts[i][j][k][l][m][n] +=
td_t->rd_counts.coef_counts[i][j][k][l][m][n];
-
- // Counts of all motion searches and exhuastive mesh searches.
- td->rd_counts.m_search_count += td_t->rd_counts.m_search_count;
- td->rd_counts.ex_search_count += td_t->rd_counts.ex_search_count;
}
static int enc_worker_hook(EncWorkerData *const thread_data, void *unused) {
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c
index 2f1fe360d85..788952d3467 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_firstpass.c
@@ -48,10 +48,8 @@
#define FIRST_PASS_Q 10.0
#define GF_MAX_BOOST 96.0
#define INTRA_MODE_PENALTY 1024
-#define KF_MAX_BOOST 128.0
#define MIN_ARF_GF_BOOST 240
#define MIN_DECAY_FACTOR 0.01
-#define MIN_KF_BOOST 300
#define NEW_MV_MODE_PENALTY 32
#define SVC_FACTOR_PT_LOW 0.45
#define DARK_THRESH 64
@@ -1578,7 +1576,7 @@ static double get_sr_decay_rate(const VP9_COMP *cpi,
sr_decay = 1.0 - (SR_DIFF_PART * sr_diff) - motion_amplitude_part -
(INTRA_PART * modified_pcnt_intra);
}
- return VPXMAX(sr_decay, VPXMIN(DEFAULT_DECAY_LIMIT, modified_pct_inter));
+ return VPXMAX(sr_decay, DEFAULT_DECAY_LIMIT);
}
// This function gives an estimate of how badly we believe the prediction
@@ -1681,6 +1679,7 @@ static void accumulate_frame_motion_stats(const FIRSTPASS_STATS *stats,
#define BASELINE_ERR_PER_MB 1000.0
static double calc_frame_boost(VP9_COMP *cpi, const FIRSTPASS_STATS *this_frame,
+ double *sr_accumulator,
double this_frame_mv_in_out, double max_boost) {
double frame_boost;
const double lq = vp9_convert_qindex_to_q(
@@ -1694,17 +1693,56 @@ static double calc_frame_boost(VP9_COMP *cpi, const FIRSTPASS_STATS *this_frame,
// Underlying boost factor is based on inter error ratio.
frame_boost = (BASELINE_ERR_PER_MB * num_mbs) /
- DOUBLE_DIVIDE_CHECK(this_frame->coded_error);
+ DOUBLE_DIVIDE_CHECK(this_frame->coded_error + *sr_accumulator);
+
+ // Update the accumulator for second ref error difference.
+ // This is intended to give an indication of how much the coded error is
+ // increasing over time.
+ *sr_accumulator += (this_frame->sr_coded_error - this_frame->coded_error) / 1;
+ *sr_accumulator = VPXMAX(0.0, *sr_accumulator);
+
+ // Small adjustment for cases where there is a zoom out
+ if (this_frame_mv_in_out > 0.0)
+ frame_boost += frame_boost * (this_frame_mv_in_out * 2.0);
+
+ // Q correction and scalling
frame_boost = frame_boost * BOOST_FACTOR * boost_q_correction;
- // Increase boost for frames where new data coming into frame (e.g. zoom out).
- // Slightly reduce boost if there is a net balance of motion out of the frame
- // (zoom in). The range for this_frame_mv_in_out is -1.0 to +1.0.
+ return VPXMIN(frame_boost, max_boost * boost_q_correction);
+}
+
+#define KF_BOOST_FACTOR 12.5
+static double calc_kf_frame_boost(VP9_COMP *cpi,
+ const FIRSTPASS_STATS *this_frame,
+ double *sr_accumulator,
+ double this_frame_mv_in_out,
+ double max_boost) {
+ double frame_boost;
+ const double lq = vp9_convert_qindex_to_q(
+ cpi->rc.avg_frame_qindex[INTER_FRAME], cpi->common.bit_depth);
+ const double boost_q_correction = VPXMIN((0.50 + (lq * 0.015)), 2.00);
+ int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE) ? cpi->initial_mbs
+ : cpi->common.MBs;
+
+ // Correct for any inactive region in the image
+ num_mbs = (int)VPXMAX(1, num_mbs * calculate_active_area(cpi, this_frame));
+
+ // Underlying boost factor is based on inter error ratio.
+ frame_boost = (BASELINE_ERR_PER_MB * num_mbs) /
+ DOUBLE_DIVIDE_CHECK(this_frame->coded_error + *sr_accumulator);
+
+ // Update the accumulator for second ref error difference.
+ // This is intended to give an indication of how much the coded error is
+ // increasing over time.
+ *sr_accumulator += (this_frame->sr_coded_error - this_frame->coded_error) / 1;
+ *sr_accumulator = VPXMAX(0.0, *sr_accumulator);
+
+ // Small adjustment for cases where there is a zoom out
if (this_frame_mv_in_out > 0.0)
frame_boost += frame_boost * (this_frame_mv_in_out * 2.0);
- // In the extreme case the boost is halved.
- else
- frame_boost += frame_boost * (this_frame_mv_in_out / 2.0);
+
+ // Q correction and scalling
+ frame_boost = frame_boost * KF_BOOST_FACTOR * boost_q_correction;
return VPXMIN(frame_boost, max_boost * boost_q_correction);
}
@@ -1719,6 +1757,7 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset, int f_frames, int b_frames,
double this_frame_mv_in_out = 0.0;
double mv_in_out_accumulator = 0.0;
double abs_mv_in_out_accumulator = 0.0;
+ double sr_accumulator = 0.0;
int arf_boost;
int flash_detected = 0;
@@ -1745,9 +1784,10 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset, int f_frames, int b_frames,
: decay_accumulator;
}
- boost_score +=
- decay_accumulator *
- calc_frame_boost(cpi, this_frame, this_frame_mv_in_out, GF_MAX_BOOST);
+ sr_accumulator = 0.0;
+ boost_score += decay_accumulator *
+ calc_frame_boost(cpi, this_frame, &sr_accumulator,
+ this_frame_mv_in_out, GF_MAX_BOOST);
}
*f_boost = (int)boost_score;
@@ -1759,6 +1799,7 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset, int f_frames, int b_frames,
this_frame_mv_in_out = 0.0;
mv_in_out_accumulator = 0.0;
abs_mv_in_out_accumulator = 0.0;
+ sr_accumulator = 0.0;
// Search backward towards last gf position.
for (i = -1; i >= -b_frames; --i) {
@@ -1783,9 +1824,10 @@ static int calc_arf_boost(VP9_COMP *cpi, int offset, int f_frames, int b_frames,
: decay_accumulator;
}
- boost_score +=
- decay_accumulator *
- calc_frame_boost(cpi, this_frame, this_frame_mv_in_out, GF_MAX_BOOST);
+ sr_accumulator = 0.0;
+ boost_score += decay_accumulator *
+ calc_frame_boost(cpi, this_frame, &sr_accumulator,
+ this_frame_mv_in_out, GF_MAX_BOOST);
}
*b_boost = (int)boost_score;
@@ -2085,7 +2127,6 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
double mv_ratio_accumulator = 0.0;
double decay_accumulator = 1.0;
double zero_motion_accumulator = 1.0;
-
double loop_decay_rate = 1.00;
double last_loop_decay_rate = 1.00;
@@ -2095,6 +2136,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
double mv_ratio_accumulator_thresh;
double mv_in_out_thresh;
double abs_mv_in_out_thresh;
+ double sr_accumulator = 0.0;
unsigned int allow_alt_ref = is_altref_enabled(cpi);
int f_boost = 0;
@@ -2221,9 +2263,10 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
}
// Calculate a boost number for this frame.
- boost_score +=
- decay_accumulator *
- calc_frame_boost(cpi, &next_frame, this_frame_mv_in_out, GF_MAX_BOOST);
+ sr_accumulator = 0.0;
+ boost_score += decay_accumulator *
+ calc_frame_boost(cpi, &next_frame, &sr_accumulator,
+ this_frame_mv_in_out, GF_MAX_BOOST);
// Break out conditions.
if (
@@ -2473,6 +2516,10 @@ static int test_candidate_kf(TWO_PASS *twopass,
}
#define FRAMES_TO_CHECK_DECAY 8
+#define KF_MAX_FRAME_BOOST 96.0
+#define MIN_KF_TOT_BOOST 300
+#define MAX_KF_TOT_BOOST 5400
+#define KF_BOOST_SCAN_MAX_FRAMES 32
static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
int i, j;
@@ -2485,14 +2532,13 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
FIRSTPASS_STATS next_frame;
FIRSTPASS_STATS last_frame;
int kf_bits = 0;
- int loop_decay_counter = 0;
double decay_accumulator = 1.0;
- double av_decay_accumulator = 0.0;
double zero_motion_accumulator = 1.0;
double boost_score = 0.0;
double kf_mod_err = 0.0;
double kf_group_err = 0.0;
double recent_loop_decay[FRAMES_TO_CHECK_DECAY];
+ double sr_accumulator = 0.0;
vp9_zero(next_frame);
@@ -2642,34 +2688,36 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Scan through the kf group collating various stats used to determine
// how many bits to spend on it.
- decay_accumulator = 1.0;
boost_score = 0.0;
+
for (i = 0; i < (rc->frames_to_key - 1); ++i) {
if (EOF == input_stats(twopass, &next_frame)) break;
- // Monitor for static sections.
- zero_motion_accumulator = VPXMIN(zero_motion_accumulator,
- get_zero_motion_factor(cpi, &next_frame));
-
- // Not all frames in the group are necessarily used in calculating boost.
- if ((i <= rc->max_gf_interval) ||
- ((i <= (rc->max_gf_interval * 4)) && (decay_accumulator > 0.5))) {
- const double frame_boost =
- calc_frame_boost(cpi, &next_frame, 0, KF_MAX_BOOST);
-
- // How fast is prediction quality decaying.
- if (!detect_flash(twopass, 0)) {
- const double loop_decay_rate =
- get_prediction_decay_rate(cpi, &next_frame);
- decay_accumulator *= loop_decay_rate;
- decay_accumulator = VPXMAX(decay_accumulator, MIN_DECAY_FACTOR);
- av_decay_accumulator += decay_accumulator;
- ++loop_decay_counter;
- }
- boost_score += (decay_accumulator * frame_boost);
+ if (i <= KF_BOOST_SCAN_MAX_FRAMES) {
+ double frame_boost;
+ double zm_factor;
+
+ // Monitor for static sections.
+ zero_motion_accumulator = VPXMIN(
+ zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame));
+
+ // Factor 0.75-1.25 based on how much of frame is static.
+ zm_factor = (0.75 + (zero_motion_accumulator / 2.0));
+
+ // The second (lagging) ref error is not valid immediately after
+ // a key frame because either the lag has not built up (in the case of
+ // the first key frame or it points to a refernce before the new key
+ // frame.
+ if (i < 2) sr_accumulator = 0.0;
+ frame_boost = calc_kf_frame_boost(cpi, &next_frame, &sr_accumulator, 0,
+ KF_MAX_FRAME_BOOST * zm_factor);
+
+ boost_score += frame_boost;
+ if (frame_boost < 25.00) break;
+ } else {
+ break;
}
}
- av_decay_accumulator /= (double)loop_decay_counter;
reset_fpf_position(twopass, start_position);
@@ -2681,9 +2729,9 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
start_position, twopass->stats_in_end, rc->frames_to_key);
// Apply various clamps for min and max boost
- rc->kf_boost = (int)(av_decay_accumulator * boost_score);
- rc->kf_boost = VPXMAX(rc->kf_boost, (rc->frames_to_key * 3));
- rc->kf_boost = VPXMAX(rc->kf_boost, MIN_KF_BOOST);
+ rc->kf_boost = VPXMAX((int)boost_score, (rc->frames_to_key * 3));
+ rc->kf_boost = VPXMAX(rc->kf_boost, MIN_KF_TOT_BOOST);
+ rc->kf_boost = VPXMIN(rc->kf_boost, MAX_KF_TOT_BOOST);
// Work out how many bits to allocate for the key frame itself.
kf_bits = calculate_boost_bits((rc->frames_to_key - 1), rc->kf_boost,
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c
index 76d2611d89f..2b7ddbcd948 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_pickmode.c
@@ -1080,12 +1080,14 @@ typedef struct {
PREDICTION_MODE pred_mode;
} REF_MODE;
-#define RT_INTER_MODES 8
+#define RT_INTER_MODES 12
static const REF_MODE ref_mode_set[RT_INTER_MODES] = {
{ LAST_FRAME, ZEROMV }, { LAST_FRAME, NEARESTMV },
{ GOLDEN_FRAME, ZEROMV }, { LAST_FRAME, NEARMV },
{ LAST_FRAME, NEWMV }, { GOLDEN_FRAME, NEARESTMV },
- { GOLDEN_FRAME, NEARMV }, { GOLDEN_FRAME, NEWMV }
+ { GOLDEN_FRAME, NEARMV }, { GOLDEN_FRAME, NEWMV },
+ { ALTREF_FRAME, ZEROMV }, { ALTREF_FRAME, NEARESTMV },
+ { ALTREF_FRAME, NEARMV }, { ALTREF_FRAME, NEWMV }
};
static const REF_MODE ref_mode_set_svc[RT_INTER_MODES] = {
{ LAST_FRAME, ZEROMV }, { GOLDEN_FRAME, ZEROMV },
@@ -1467,6 +1469,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
usable_ref_frame = GOLDEN_FRAME;
}
+ if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR &&
+ (cpi->rc.alt_ref_gf_group || cpi->rc.is_src_frame_alt_ref))
+ usable_ref_frame = ALTREF_FRAME;
+
// For svc mode, on spatial_layer_id > 0: if the reference has different scale
// constrain the inter mode to only test zero motion.
if (cpi->use_svc && svc->force_zero_mode_spatial_ref &&
@@ -1506,7 +1512,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
int this_early_term = 0;
PREDICTION_MODE this_mode = ref_mode_set[idx].pred_mode;
- if (cpi->use_svc) this_mode = ref_mode_set_svc[idx].pred_mode;
+ ref_frame = ref_mode_set[idx].ref_frame;
+
+ if (cpi->use_svc) {
+ this_mode = ref_mode_set_svc[idx].pred_mode;
+ ref_frame = ref_mode_set_svc[idx].ref_frame;
+ }
+ if (ref_frame > usable_ref_frame) continue;
if (sf->short_circuit_flat_blocks && x->source_variance == 0 &&
this_mode != NEARESTMV) {
@@ -1515,9 +1527,23 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
if (!(cpi->sf.inter_mode_mask[bsize] & (1 << this_mode))) continue;
- ref_frame = ref_mode_set[idx].ref_frame;
- if (cpi->use_svc) {
- ref_frame = ref_mode_set_svc[idx].ref_frame;
+ if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR) {
+ if (cpi->rc.is_src_frame_alt_ref &&
+ (ref_frame != ALTREF_FRAME ||
+ frame_mv[this_mode][ref_frame].as_int != 0))
+ continue;
+
+ if (cpi->rc.alt_ref_gf_group &&
+ cpi->rc.frames_since_golden > (cpi->rc.baseline_gf_interval >> 1) &&
+ ref_frame == GOLDEN_FRAME &&
+ frame_mv[this_mode][ref_frame].as_int != 0)
+ continue;
+
+ if (cpi->rc.alt_ref_gf_group &&
+ cpi->rc.frames_since_golden < (cpi->rc.baseline_gf_interval >> 1) &&
+ ref_frame == ALTREF_FRAME &&
+ frame_mv[this_mode][ref_frame].as_int != 0)
+ continue;
}
if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue;
@@ -1543,13 +1569,27 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
continue;
}
- if (!force_skip_low_temp_var &&
+ if (sf->reference_masking &&
!(frame_mv[this_mode][ref_frame].as_int == 0 &&
ref_frame == LAST_FRAME)) {
- i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME;
- if ((cpi->ref_frame_flags & flag_list[i]) && sf->reference_masking)
- if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1))
+ if (usable_ref_frame < ALTREF_FRAME) {
+ if (!force_skip_low_temp_var && usable_ref_frame > LAST_FRAME) {
+ i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME;
+ if ((cpi->ref_frame_flags & flag_list[i]))
+ if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1))
+ ref_frame_skip_mask |= (1 << ref_frame);
+ }
+ } else if (!cpi->rc.is_src_frame_alt_ref &&
+ !(frame_mv[this_mode][ref_frame].as_int == 0 &&
+ ref_frame == ALTREF_FRAME)) {
+ int ref1 = (ref_frame == GOLDEN_FRAME) ? LAST_FRAME : GOLDEN_FRAME;
+ int ref2 = (ref_frame == ALTREF_FRAME) ? LAST_FRAME : ALTREF_FRAME;
+ if (((cpi->ref_frame_flags & flag_list[ref1]) &&
+ (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[ref1] << 1))) ||
+ ((cpi->ref_frame_flags & flag_list[ref2]) &&
+ (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[ref2] << 1))))
ref_frame_skip_mask |= (1 << ref_frame);
+ }
}
if (ref_frame_skip_mask & (1 << ref_frame)) continue;
@@ -1884,6 +1924,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
svc_force_zero_mode[best_ref_frame - 1]);
inter_mode_thresh = (inter_mode_thresh << 1) + inter_mode_thresh;
}
+ if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR &&
+ cpi->rc.is_src_frame_alt_ref)
+ perform_intra_pred = 0;
// Perform intra prediction search, if the best SAD is above a certain
// threshold.
if ((!force_skip_low_temp_var || bsize < BLOCK_32X32) && perform_intra_pred &&
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c
index 93eddd655ac..b5cfd5de6c6 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.c
@@ -45,6 +45,9 @@
#define FRAME_OVERHEAD_BITS 200
+// Use this macro to turn on/off use of alt-refs in one-pass mode.
+#define USE_ALTREF_FOR_ONE_PASS 0
+
#if CONFIG_VP9_HIGHBITDEPTH
#define ASSIGN_MINQ_TABLE(bit_depth, name) \
do { \
@@ -327,6 +330,7 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
rc->prev_avg_source_sad_lag = 0;
rc->high_source_sad = 0;
rc->high_source_sad_lagindex = -1;
+ rc->alt_ref_gf_group = 0;
rc->fac_active_worst_inter = 150;
rc->fac_active_worst_gf = 100;
rc->force_qpmin = 0;
@@ -561,6 +565,13 @@ int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame,
q = clamp(q, VPXMIN(cpi->rc.q_1_frame, cpi->rc.q_2_frame),
VPXMAX(cpi->rc.q_1_frame, cpi->rc.q_2_frame));
}
+#if USE_ALTREF_FOR_ONE_PASS
+ if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_VBR &&
+ cpi->oxcf.lag_in_frames > 0 && cpi->rc.is_src_frame_alt_ref &&
+ !cpi->rc.alt_ref_gf_group) {
+ q = VPXMIN(q, (q + cpi->rc.last_boosted_qindex) >> 1);
+ }
+#endif
return q;
}
@@ -1429,24 +1440,16 @@ void vp9_rc_postencode_update_drop_frame(VP9_COMP *cpi) {
cpi->rc.rc_1_frame = 0;
}
-// Use this macro to turn on/off use of alt-refs in one-pass mode.
-#define USE_ALTREF_FOR_ONE_PASS 1
-
static int calc_pframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) {
const RATE_CONTROL *const rc = &cpi->rc;
- int target;
const int af_ratio = rc->af_ratio_onepass_vbr;
-#if USE_ALTREF_FOR_ONE_PASS
- target =
+ int target =
(!rc->is_src_frame_alt_ref &&
(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))
? (rc->avg_frame_bandwidth * rc->baseline_gf_interval * af_ratio) /
(rc->baseline_gf_interval + af_ratio - 1)
: (rc->avg_frame_bandwidth * rc->baseline_gf_interval) /
(rc->baseline_gf_interval + af_ratio - 1);
-#else
- target = rc->avg_frame_bandwidth;
-#endif
return vp9_rc_clamp_pframe_target_size(cpi, target);
}
@@ -1499,8 +1502,8 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) {
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cpi->oxcf.pass == 0) {
vp9_cyclic_refresh_set_golden_update(cpi);
} else {
- rc->baseline_gf_interval =
- (rc->min_gf_interval + rc->max_gf_interval) / 2;
+ rc->baseline_gf_interval = VPXMIN(
+ 20, VPXMAX(10, (rc->min_gf_interval + rc->max_gf_interval) / 2));
}
rc->af_ratio_onepass_vbr = 10;
if (rc->rolling_target_bits > 0)
@@ -1526,6 +1529,7 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) {
rc->frames_till_gf_update_due = rc->baseline_gf_interval;
cpi->refresh_golden_frame = 1;
rc->source_alt_ref_pending = USE_ALTREF_FOR_ONE_PASS;
+ rc->alt_ref_gf_group = USE_ALTREF_FOR_ONE_PASS;
}
if (cm->frame_type == KEY_FRAME)
target = calc_iframe_target_size_one_pass_vbr(cpi);
@@ -2088,8 +2092,8 @@ void adjust_gf_boost_lag_one_pass_vbr(VP9_COMP *cpi, uint64_t avg_sad_current) {
rc->high_source_sad_lagindex = high_source_sad_lagindex;
// Adjust some factors for the next GF group, ignore initial key frame,
// and only for lag_in_frames not too small.
- if (cpi->refresh_golden_frame == 1 && cm->frame_type != KEY_FRAME &&
- cm->current_video_frame > 30 && cpi->oxcf.lag_in_frames > 8) {
+ if (cpi->refresh_golden_frame == 1 && cm->current_video_frame > 30 &&
+ cpi->oxcf.lag_in_frames > 8) {
int frame_constraint;
if (rc->rolling_target_bits > 0)
rate_err =
@@ -2110,6 +2114,8 @@ void adjust_gf_boost_lag_one_pass_vbr(VP9_COMP *cpi, uint64_t avg_sad_current) {
? VPXMAX(10, rc->baseline_gf_interval >> 1)
: VPXMAX(6, rc->baseline_gf_interval >> 1);
}
+ if (rc->baseline_gf_interval > cpi->oxcf.lag_in_frames - 1)
+ rc->baseline_gf_interval = cpi->oxcf.lag_in_frames - 1;
// Check for constraining gf_interval for up-coming scene/content changes,
// or for up-coming key frame, whichever is closer.
frame_constraint = rc->frames_to_key;
@@ -2133,6 +2139,23 @@ void adjust_gf_boost_lag_one_pass_vbr(VP9_COMP *cpi, uint64_t avg_sad_current) {
rc->af_ratio_onepass_vbr = 5;
rc->gfu_boost = DEFAULT_GF_BOOST >> 2;
}
+#if USE_ALTREF_FOR_ONE_PASS
+ // Don't use alt-ref if there is a scene cut within the group,
+ // or content is not low.
+ if ((rc->high_source_sad_lagindex > 0 &&
+ rc->high_source_sad_lagindex <= rc->frames_till_gf_update_due) ||
+ (avg_source_sad_lag > 3 * sad_thresh1 >> 3)) {
+ rc->source_alt_ref_pending = 0;
+ rc->alt_ref_gf_group = 0;
+ } else {
+ rc->source_alt_ref_pending = 1;
+ rc->alt_ref_gf_group = 1;
+ // If alt-ref is used for this gf group, limit the interval.
+ if (rc->baseline_gf_interval > 10 &&
+ rc->baseline_gf_interval < rc->frames_to_key)
+ rc->baseline_gf_interval = 10;
+ }
+#endif
target = calc_pframe_target_size_one_pass_vbr(cpi);
vp9_rc_set_frame_target(cpi, target);
}
@@ -2261,6 +2284,7 @@ void vp9_avg_source_sad(VP9_COMP *cpi) {
cpi->ext_refresh_frame_flags_pending == 0) {
int target;
cpi->refresh_golden_frame = 1;
+ rc->source_alt_ref_pending = USE_ALTREF_FOR_ONE_PASS;
rc->gfu_boost = DEFAULT_GF_BOOST >> 1;
rc->baseline_gf_interval =
VPXMIN(20, VPXMAX(10, rc->baseline_gf_interval));
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.h b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.h
index 6006e9b051a..70aef03ffb4 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_ratectrl.h
@@ -160,6 +160,7 @@ typedef struct {
uint64_t avg_source_sad[MAX_LAG_BUFFERS];
uint64_t prev_avg_source_sad_lag;
int high_source_sad_lagindex;
+ int alt_ref_gf_group;
int high_source_sad;
int count_last_scene_change;
int avg_frame_low_motion;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c
index ea893609193..3e1ed50a6d2 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/vp9_speed_features.c
@@ -421,6 +421,10 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, int speed,
(frames_since_key % (sf->last_partitioning_redo_frequency << 1) == 1);
sf->max_delta_qindex = is_keyframe ? 20 : 15;
sf->partition_search_type = REFERENCE_PARTITION;
+ if (cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.lag_in_frames > 0 &&
+ cpi->rc.is_src_frame_alt_ref) {
+ sf->partition_search_type = VAR_BASED_PARTITION;
+ }
sf->use_nonrd_pick_mode = 1;
sf->allow_skip_recode = 0;
sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEW_ZERO;
@@ -504,7 +508,6 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, int speed,
sf->short_circuit_low_temp_var = 2;
}
sf->limit_newmv_early_exit = 0;
- sf->bias_golden = 0;
}
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c
index fb2a9254172..b3c3d7beb9e 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c
@@ -12,14 +12,17 @@
#include <tmmintrin.h> // SSSE3
#include "./vp9_rtcd.h"
+#include "./vpx_config.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_dsp/x86/fdct.h"
#include "vpx_dsp/x86/inv_txfm_sse2.h"
#include "vpx_dsp/x86/txfm_common_sse2.h"
void vp9_fdct8x8_quant_ssse3(
- const int16_t *input, int stride, int16_t *coeff_ptr, intptr_t n_coeffs,
+ const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr,
const int16_t *quant_ptr, const int16_t *quant_shift_ptr,
- int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
uint16_t *eob_ptr, const int16_t *scan_ptr, const int16_t *iscan_ptr) {
__m128i zero;
int pass;
@@ -328,15 +331,15 @@ void vp9_fdct8x8_quant_ssse3(
qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
- _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0);
- _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
+ store_tran_low(qcoeff0, qcoeff_ptr + n_coeffs);
+ store_tran_low(qcoeff1, qcoeff_ptr + n_coeffs + 8);
coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
dequant = _mm_unpackhi_epi64(dequant, dequant);
coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
- _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0);
- _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
+ store_tran_low(coeff0, dqcoeff_ptr + n_coeffs);
+ store_tran_low(coeff1, dqcoeff_ptr + n_coeffs + 8);
}
{
@@ -398,20 +401,21 @@ void vp9_fdct8x8_quant_ssse3(
qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
- _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0);
- _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
+ store_tran_low(qcoeff0, qcoeff_ptr + n_coeffs);
+ store_tran_low(qcoeff1, qcoeff_ptr + n_coeffs + 8);
coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
- _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0);
- _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
+ store_tran_low(coeff0, dqcoeff_ptr + n_coeffs);
+ store_tran_low(coeff1, dqcoeff_ptr + n_coeffs + 8);
} else {
- _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), zero);
- _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, zero);
+ // Maybe a more efficient way to store 0?
+ store_zero_tran_low(qcoeff_ptr + n_coeffs);
+ store_zero_tran_low(qcoeff_ptr + n_coeffs + 8);
- _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), zero);
- _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, zero);
+ store_zero_tran_low(dqcoeff_ptr + n_coeffs);
+ store_zero_tran_low(dqcoeff_ptr + n_coeffs + 8);
}
}
@@ -452,10 +456,10 @@ void vp9_fdct8x8_quant_ssse3(
}
} else {
do {
- _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), zero);
- _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, zero);
- _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), zero);
- _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, zero);
+ store_zero_tran_low(dqcoeff_ptr + n_coeffs);
+ store_zero_tran_low(dqcoeff_ptr + n_coeffs + 8);
+ store_zero_tran_low(qcoeff_ptr + n_coeffs);
+ store_zero_tran_low(qcoeff_ptr + n_coeffs + 8);
n_coeffs += 8 * 2;
} while (n_coeffs < 0);
*eob_ptr = 0;
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.c b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.c
index 3b5dc3ddac0..0a3e84a0da2 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.c
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.c
@@ -467,8 +467,8 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx,
// as the size of the first intra frame be better? This will
// avoid too many deallocate and allocate.
if (frame_worker_data->scratch_buffer_size < data_sz) {
- frame_worker_data->scratch_buffer =
- (uint8_t *)vpx_realloc(frame_worker_data->scratch_buffer, data_sz);
+ vpx_free(frame_worker_data->scratch_buffer);
+ frame_worker_data->scratch_buffer = (uint8_t *)vpx_malloc(data_sz);
if (frame_worker_data->scratch_buffer == NULL) {
set_error_detail(ctx, "Failed to reallocate scratch buffer");
return VPX_CODEC_MEM_ERROR;
@@ -553,6 +553,9 @@ static vpx_codec_err_t decoder_decode(vpx_codec_alg_priv_t *ctx,
ctx->decrypt_cb, ctx->decrypt_state);
if (res != VPX_CODEC_OK) return res;
+ if (ctx->svc_decoding && ctx->svc_spatial_layer < frame_count - 1)
+ frame_count = ctx->svc_spatial_layer + 1;
+
if (ctx->frame_parallel_decode) {
// Decode in frame parallel mode. When decoding in this mode, the frame
// passed to the decoder must be either a normal frame or a superframe with
@@ -1001,6 +1004,16 @@ static vpx_codec_err_t ctrl_set_skip_loop_filter(vpx_codec_alg_priv_t *ctx,
return VPX_CODEC_OK;
}
+static vpx_codec_err_t ctrl_set_spatial_layer_svc(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ ctx->svc_decoding = 1;
+ ctx->svc_spatial_layer = va_arg(args, int);
+ if (ctx->svc_spatial_layer < 0)
+ return VPX_CODEC_INVALID_PARAM;
+ else
+ return VPX_CODEC_OK;
+}
+
static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = {
{ VP8_COPY_REFERENCE, ctrl_copy_reference },
@@ -1011,6 +1024,7 @@ static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = {
{ VPXD_SET_DECRYPTOR, ctrl_set_decryptor },
{ VP9_SET_BYTE_ALIGNMENT, ctrl_set_byte_alignment },
{ VP9_SET_SKIP_LOOP_FILTER, ctrl_set_skip_loop_filter },
+ { VP9_DECODE_SVC_SPATIAL_LAYER, ctrl_set_spatial_layer_svc },
// Getters
{ VP8D_GET_LAST_REF_UPDATES, ctrl_get_last_ref_updates },
diff --git a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.h b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.h
index cc3d51842ac..c1559599b8c 100644
--- a/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.h
+++ b/chromium/third_party/libvpx/source/libvpx/vp9/vp9_dx_iface.h
@@ -60,6 +60,10 @@ struct vpx_codec_alg_priv {
void *ext_priv; // Private data associated with the external frame buffers.
vpx_get_frame_buffer_cb_fn_t get_ext_fb_cb;
vpx_release_frame_buffer_cb_fn_t release_ext_fb_cb;
+
+ // Allow for decoding up to a given spatial layer for SVC stream.
+ int svc_decoding;
+ int svc_spatial_layer;
};
#endif // VP9_VP9_DX_IFACE_H_
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx/src/svc_encodeframe.c b/chromium/third_party/libvpx/source/libvpx/vpx/src/svc_encodeframe.c
index 5aa0b8ddb84..88b1531d8c4 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx/src/svc_encodeframe.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx/src/svc_encodeframe.c
@@ -53,6 +53,10 @@ static const int DEFAULT_SCALE_FACTORS_NUM[VPX_SS_MAX_LAYERS] = { 4, 5, 7, 11,
static const int DEFAULT_SCALE_FACTORS_DEN[VPX_SS_MAX_LAYERS] = { 16, 16, 16,
16, 16 };
+static const int DEFAULT_SCALE_FACTORS_NUM_2x[VPX_SS_MAX_LAYERS] = { 1, 2, 4 };
+
+static const int DEFAULT_SCALE_FACTORS_DEN_2x[VPX_SS_MAX_LAYERS] = { 4, 4, 4 };
+
typedef enum {
QUANTIZER = 0,
BITRATE,
@@ -156,6 +160,9 @@ static vpx_codec_err_t parse_layer_options_from_string(SvcContext *svc_ctx,
char *token;
const char *delim = ",";
char *save_ptr;
+ int num_layers = svc_ctx->spatial_layers;
+ if (type == BITRATE)
+ num_layers = svc_ctx->spatial_layers * svc_ctx->temporal_layers;
if (input == NULL || option0 == NULL ||
(option1 == NULL && type == SCALE_FACTOR))
@@ -163,7 +170,7 @@ static vpx_codec_err_t parse_layer_options_from_string(SvcContext *svc_ctx,
input_string = strdup(input);
token = strtok_r(input_string, delim, &save_ptr);
- for (i = 0; i < svc_ctx->spatial_layers; ++i) {
+ for (i = 0; i < num_layers; ++i) {
if (token != NULL) {
res = extract_option(type, token, option0 + i, option1 + i);
if (res != VPX_CODEC_OK) break;
@@ -172,11 +179,11 @@ static vpx_codec_err_t parse_layer_options_from_string(SvcContext *svc_ctx,
break;
}
}
- if (res == VPX_CODEC_OK && i != svc_ctx->spatial_layers) {
+ if (res == VPX_CODEC_OK && i != num_layers) {
svc_log(svc_ctx, SVC_LOG_ERROR,
"svc: layer params type: %d %d values required, "
"but only %d specified\n",
- type, svc_ctx->spatial_layers, i);
+ type, num_layers, i);
res = VPX_CODEC_INVALID_PARAM;
}
free(input_string);
@@ -287,24 +294,30 @@ vpx_codec_err_t vpx_svc_set_options(SvcContext *svc_ctx, const char *options) {
return VPX_CODEC_OK;
}
-void assign_layer_bitrates(const SvcContext *svc_ctx,
- vpx_codec_enc_cfg_t *const enc_cfg) {
+vpx_codec_err_t assign_layer_bitrates(const SvcContext *svc_ctx,
+ vpx_codec_enc_cfg_t *const enc_cfg) {
int i;
const SvcInternal_t *const si = get_const_svc_internal(svc_ctx);
int sl, tl, spatial_layer_target;
if (svc_ctx->temporal_layering_mode != 0) {
if (si->bitrates[0] != 0) {
- enc_cfg->rc_target_bitrate = 0;
+ unsigned int total_bitrate = 0;
for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) {
- enc_cfg->ss_target_bitrate[sl * svc_ctx->temporal_layers] = 0;
+ total_bitrate += si->bitrates[sl * svc_ctx->temporal_layers +
+ svc_ctx->temporal_layers - 1];
for (tl = 0; tl < svc_ctx->temporal_layers; ++tl) {
enc_cfg->ss_target_bitrate[sl * svc_ctx->temporal_layers] +=
(unsigned int)si->bitrates[sl * svc_ctx->temporal_layers + tl];
enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + tl] =
si->bitrates[sl * svc_ctx->temporal_layers + tl];
+ if (tl > 0 && (si->bitrates[sl * svc_ctx->temporal_layers + tl] <=
+ si->bitrates[sl * svc_ctx->temporal_layers + tl - 1]))
+ return VPX_CODEC_INVALID_PARAM;
}
}
+ if (total_bitrate != enc_cfg->rc_target_bitrate)
+ return VPX_CODEC_INVALID_PARAM;
} else {
float total = 0;
float alloc_ratio[VPX_MAX_LAYERS] = { 0 };
@@ -341,11 +354,14 @@ void assign_layer_bitrates(const SvcContext *svc_ctx,
}
} else {
if (si->bitrates[0] != 0) {
- enc_cfg->rc_target_bitrate = 0;
+ unsigned int total_bitrate = 0;
for (i = 0; i < svc_ctx->spatial_layers; ++i) {
enc_cfg->ss_target_bitrate[i] = (unsigned int)si->bitrates[i];
- enc_cfg->rc_target_bitrate += si->bitrates[i];
+ enc_cfg->layer_target_bitrate[i] = (unsigned int)si->bitrates[i];
+ total_bitrate += si->bitrates[i];
}
+ if (total_bitrate != enc_cfg->rc_target_bitrate)
+ return VPX_CODEC_INVALID_PARAM;
} else {
float total = 0;
float alloc_ratio[VPX_MAX_LAYERS] = { 0 };
@@ -368,6 +384,7 @@ void assign_layer_bitrates(const SvcContext *svc_ctx,
}
}
}
+ return VPX_CODEC_OK;
}
vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
@@ -412,12 +429,24 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
si->svc_params.scaling_factor_den[sl] = DEFAULT_SCALE_FACTORS_DEN[sl];
si->svc_params.speed_per_layer[sl] = svc_ctx->speed;
}
-
+ if (enc_cfg->rc_end_usage == VPX_CBR && enc_cfg->g_pass == VPX_RC_ONE_PASS &&
+ svc_ctx->spatial_layers <= 3) {
+ for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) {
+ int sl2 = (svc_ctx->spatial_layers == 2) ? sl + 1 : sl;
+ si->svc_params.scaling_factor_num[sl] = DEFAULT_SCALE_FACTORS_NUM_2x[sl2];
+ si->svc_params.scaling_factor_den[sl] = DEFAULT_SCALE_FACTORS_DEN_2x[sl2];
+ }
+ }
for (tl = 0; tl < svc_ctx->temporal_layers; ++tl) {
for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) {
i = sl * svc_ctx->temporal_layers + tl;
si->svc_params.max_quantizers[i] = MAX_QUANTIZER;
si->svc_params.min_quantizers[i] = 0;
+ if (enc_cfg->rc_end_usage == VPX_CBR &&
+ enc_cfg->g_pass == VPX_RC_ONE_PASS) {
+ si->svc_params.max_quantizers[i] = 56;
+ si->svc_params.min_quantizers[i] = 2;
+ }
}
}
@@ -442,7 +471,15 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
(int)VPX_MAX_LAYERS);
return VPX_CODEC_INVALID_PARAM;
}
- assign_layer_bitrates(svc_ctx, enc_cfg);
+ res = assign_layer_bitrates(svc_ctx, enc_cfg);
+ if (res != VPX_CODEC_OK) {
+ svc_log(svc_ctx, SVC_LOG_ERROR,
+ "layer bitrates incorrect: \n"
+ "1) spatial layer bitrates should sum up to target \n"
+ "2) temporal layer bitrates should be increasing within \n"
+ "a spatial layer \n");
+ return VPX_CODEC_INVALID_PARAM;
+ }
#if CONFIG_SPATIAL_SVC
for (i = 0; i < svc_ctx->spatial_layers; ++i)
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx/svc_context.h b/chromium/third_party/libvpx/source/libvpx/vpx/svc_context.h
index c8bde5832a5..462785075cb 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx/svc_context.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx/svc_context.h
@@ -54,7 +54,7 @@ typedef struct SvcInternal {
// values extracted from option, quantizers
vpx_svc_extra_cfg_t svc_params;
int enable_auto_alt_ref[VPX_SS_MAX_LAYERS];
- int bitrates[VPX_SS_MAX_LAYERS];
+ int bitrates[VPX_MAX_LAYERS];
// accumulated statistics
double psnr_sum[VPX_SS_MAX_LAYERS][COMPONENTS]; // total/Y/U/V
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx/vp8cx.h b/chromium/third_party/libvpx/source/libvpx/vpx/vp8cx.h
index 8fa25e8bc07..cc90159bc3a 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx/vp8cx.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx/vp8cx.h
@@ -561,7 +561,22 @@ enum vp8e_enc_control_id {
*
* Supported in codecs: VP9
*/
- VP9E_SET_ALT_REF_AQ
+ VP9E_SET_ALT_REF_AQ,
+
+ /*!\brief Boost percentage for Golden Frame in CBR mode.
+ *
+ * This value controls the amount of boost given to Golden Frame in
+ * CBR mode. It is expressed as a percentage of the average
+ * per-frame bitrate, with the special (and default) value 0 meaning
+ * the feature is off, i.e., no golden frame boost in CBR mode and
+ * average bitrate target is used.
+ *
+ * For example, to allow 100% more bits, i.e, 2X, in a golden frame
+ * than average frame, set this to 100.
+ *
+ * Supported in codecs: VP8
+ */
+ VP8E_SET_GF_CBR_BOOST_PCT,
};
/*!\brief vpx 1-D scaling mode
@@ -769,6 +784,9 @@ VPX_CTRL_USE_TYPE(VP8E_SET_MAX_INTRA_BITRATE_PCT, unsigned int)
VPX_CTRL_USE_TYPE(VP8E_SET_MAX_INTER_BITRATE_PCT, unsigned int)
#define VPX_CTRL_VP8E_SET_MAX_INTER_BITRATE_PCT
+VPX_CTRL_USE_TYPE(VP8E_SET_GF_CBR_BOOST_PCT, unsigned int)
+#define VPX_CTRL_VP8E_SET_GF_CBR_BOOST_PCT
+
VPX_CTRL_USE_TYPE(VP8E_SET_SCREEN_CONTENT_MODE, unsigned int)
#define VPX_CTRL_VP8E_SET_SCREEN_CONTENT_MODE
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx/vp8dx.h b/chromium/third_party/libvpx/source/libvpx/vpx/vp8dx.h
index 88204acd378..0d7759eb25b 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx/vp8dx.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx/vp8dx.h
@@ -111,6 +111,11 @@ enum vp8_dec_control_id {
*/
VP9_SET_SKIP_LOOP_FILTER,
+ /** control function to decode SVC stream up to the x spatial layers,
+ * where x is passed in through the control, and is 0 for base layer.
+ */
+ VP9_DECODE_SVC_SPATIAL_LAYER,
+
VP8_DECODER_CTRL_ID_MAX
};
@@ -162,6 +167,8 @@ VPX_CTRL_USE_TYPE(VP9D_GET_FRAME_SIZE, int *)
#define VPX_CTRL_VP9D_GET_FRAME_SIZE
VPX_CTRL_USE_TYPE(VP9_INVERT_TILE_DECODE_ORDER, int)
#define VPX_CTRL_VP9_INVERT_TILE_DECODE_ORDER
+#define VPX_CTRL_VP9_DECODE_SVC_SPATIAL_LAYER
+VPX_CTRL_USE_TYPE(VP9_DECODE_SVC_SPATIAL_LAYER, int)
/*!\endcond */
/*! @} - end defgroup vp8_decoder */
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/fwd_txfm_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/fwd_txfm_neon.c
index 7cb2ba90d2f..e9503f13d70 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/fwd_txfm_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/fwd_txfm_neon.c
@@ -52,10 +52,10 @@ void vpx_fdct8x8_neon(const int16_t *input, int16_t *final_output, int stride) {
v_t2_hi = vmlal_n_s16(v_t2_hi, vget_high_s16(v_x3), (int16_t)cospi_8_64);
v_t3_lo = vmlsl_n_s16(v_t3_lo, vget_low_s16(v_x2), (int16_t)cospi_8_64);
v_t3_hi = vmlsl_n_s16(v_t3_hi, vget_high_s16(v_x2), (int16_t)cospi_8_64);
- v_t0_lo = vmulq_n_s32(v_t0_lo, cospi_16_64);
- v_t0_hi = vmulq_n_s32(v_t0_hi, cospi_16_64);
- v_t1_lo = vmulq_n_s32(v_t1_lo, cospi_16_64);
- v_t1_hi = vmulq_n_s32(v_t1_hi, cospi_16_64);
+ v_t0_lo = vmulq_n_s32(v_t0_lo, (int32_t)cospi_16_64);
+ v_t0_hi = vmulq_n_s32(v_t0_hi, (int32_t)cospi_16_64);
+ v_t1_lo = vmulq_n_s32(v_t1_lo, (int32_t)cospi_16_64);
+ v_t1_hi = vmulq_n_s32(v_t1_hi, (int32_t)cospi_16_64);
{
const int16x4_t a = vrshrn_n_s32(v_t0_lo, DCT_CONST_BITS);
const int16x4_t b = vrshrn_n_s32(v_t0_hi, DCT_CONST_BITS);
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_loopfilter_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_loopfilter_neon.c
new file mode 100644
index 00000000000..5530c6425b2
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_loopfilter_neon.c
@@ -0,0 +1,761 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/arm/transpose_neon.h"
+
+static INLINE void load_thresh(const uint8_t *blimit, const uint8_t *limit,
+ const uint8_t *thresh, uint16x8_t *blimit_vec,
+ uint16x8_t *limit_vec, uint16x8_t *thresh_vec,
+ const int bd) {
+ const int16x8_t shift = vdupq_n_s16(bd - 8);
+ *blimit_vec = vmovl_u8(vld1_dup_u8(blimit));
+ *limit_vec = vmovl_u8(vld1_dup_u8(limit));
+ *thresh_vec = vmovl_u8(vld1_dup_u8(thresh));
+ *blimit_vec = vshlq_u16(*blimit_vec, shift);
+ *limit_vec = vshlq_u16(*limit_vec, shift);
+ *thresh_vec = vshlq_u16(*thresh_vec, shift);
+}
+
+// Here flat is 128-bit long, with each 16-bit chunk being a mask of
+// a pixel. When used to control filter branches, we only detect whether it is
+// all 0s or all 1s. We pairwise add flat to a 32-bit long number flat_status.
+// flat equals 0 if and only if flat_status equals 0.
+// flat equals -1 (all 1s) if and only if flat_status equals -4. (This is true
+// because each mask occupies more than 1 bit.)
+static INLINE uint32_t calc_flat_status(const uint16x8_t flat) {
+ const uint64x1_t t0 = vadd_u64(vreinterpret_u64_u16(vget_low_u16(flat)),
+ vreinterpret_u64_u16(vget_high_u16(flat)));
+ const uint64x1_t t1 = vpaddl_u32(vreinterpret_u32_u64(t0));
+ return vget_lane_u32(vreinterpret_u32_u64(t1), 0);
+}
+
+static INLINE uint16x8_t
+filter_hev_mask4(const uint16x8_t limit, const uint16x8_t blimit,
+ const uint16x8_t thresh, const uint16x8_t p3,
+ const uint16x8_t p2, const uint16x8_t p1, const uint16x8_t p0,
+ const uint16x8_t q0, const uint16x8_t q1, const uint16x8_t q2,
+ const uint16x8_t q3, uint16x8_t *hev, uint16x8_t *mask) {
+ uint16x8_t max, t0, t1;
+
+ max = vabdq_u16(p1, p0);
+ max = vmaxq_u16(max, vabdq_u16(q1, q0));
+ *hev = vcgtq_u16(max, thresh);
+ *mask = vmaxq_u16(max, vabdq_u16(p3, p2));
+ *mask = vmaxq_u16(*mask, vabdq_u16(p2, p1));
+ *mask = vmaxq_u16(*mask, vabdq_u16(q2, q1));
+ *mask = vmaxq_u16(*mask, vabdq_u16(q3, q2));
+ t0 = vabdq_u16(p0, q0);
+ t1 = vabdq_u16(p1, q1);
+ t0 = vaddq_u16(t0, t0);
+ t1 = vshrq_n_u16(t1, 1);
+ t0 = vaddq_u16(t0, t1);
+ *mask = vcleq_u16(*mask, limit);
+ t0 = vcleq_u16(t0, blimit);
+ *mask = vandq_u16(*mask, t0);
+
+ return max;
+}
+
+static INLINE uint16x8_t filter_flat_hev_mask(
+ const uint16x8_t limit, const uint16x8_t blimit, const uint16x8_t thresh,
+ const uint16x8_t p3, const uint16x8_t p2, const uint16x8_t p1,
+ const uint16x8_t p0, const uint16x8_t q0, const uint16x8_t q1,
+ const uint16x8_t q2, const uint16x8_t q3, uint16x8_t *flat,
+ uint32_t *flat_status, uint16x8_t *hev, const int bd) {
+ uint16x8_t mask;
+ const uint16x8_t max = filter_hev_mask4(limit, blimit, thresh, p3, p2, p1, p0,
+ q0, q1, q2, q3, hev, &mask);
+ *flat = vmaxq_u16(max, vabdq_u16(p2, p0));
+ *flat = vmaxq_u16(*flat, vabdq_u16(q2, q0));
+ *flat = vmaxq_u16(*flat, vabdq_u16(p3, p0));
+ *flat = vmaxq_u16(*flat, vabdq_u16(q3, q0));
+ *flat = vcleq_u16(*flat, vdupq_n_u16(1 << (bd - 8))); /* flat_mask4() */
+ *flat = vandq_u16(*flat, mask);
+ *flat_status = calc_flat_status(*flat);
+
+ return mask;
+}
+
+static INLINE uint16x8_t flat_mask5(const uint16x8_t p4, const uint16x8_t p3,
+ const uint16x8_t p2, const uint16x8_t p1,
+ const uint16x8_t p0, const uint16x8_t q0,
+ const uint16x8_t q1, const uint16x8_t q2,
+ const uint16x8_t q3, const uint16x8_t q4,
+ const uint16x8_t flat,
+ uint32_t *flat2_status, const int bd) {
+ uint16x8_t flat2 = vabdq_u16(p4, p0);
+ flat2 = vmaxq_u16(flat2, vabdq_u16(p3, p0));
+ flat2 = vmaxq_u16(flat2, vabdq_u16(p2, p0));
+ flat2 = vmaxq_u16(flat2, vabdq_u16(p1, p0));
+ flat2 = vmaxq_u16(flat2, vabdq_u16(q1, q0));
+ flat2 = vmaxq_u16(flat2, vabdq_u16(q2, q0));
+ flat2 = vmaxq_u16(flat2, vabdq_u16(q3, q0));
+ flat2 = vmaxq_u16(flat2, vabdq_u16(q4, q0));
+ flat2 = vcleq_u16(flat2, vdupq_n_u16(1 << (bd - 8)));
+ flat2 = vandq_u16(flat2, flat);
+ *flat2_status = calc_flat_status(flat2);
+
+ return flat2;
+}
+
+static INLINE int16x8_t flip_sign(const uint16x8_t v, const int bd) {
+ const uint16x8_t offset = vdupq_n_u16(0x80 << (bd - 8));
+ return vreinterpretq_s16_u16(vsubq_u16(v, offset));
+}
+
+static INLINE uint16x8_t flip_sign_back(const int16x8_t v, const int bd) {
+ const int16x8_t offset = vdupq_n_s16(0x80 << (bd - 8));
+ return vreinterpretq_u16_s16(vaddq_s16(v, offset));
+}
+
+static INLINE void filter_update(const uint16x8_t sub0, const uint16x8_t sub1,
+ const uint16x8_t add0, const uint16x8_t add1,
+ uint16x8_t *sum) {
+ *sum = vsubq_u16(*sum, sub0);
+ *sum = vsubq_u16(*sum, sub1);
+ *sum = vaddq_u16(*sum, add0);
+ *sum = vaddq_u16(*sum, add1);
+}
+
+static INLINE uint16x8_t calc_7_tap_filter_kernel(const uint16x8_t sub0,
+ const uint16x8_t sub1,
+ const uint16x8_t add0,
+ const uint16x8_t add1,
+ uint16x8_t *sum) {
+ filter_update(sub0, sub1, add0, add1, sum);
+ return vrshrq_n_u16(*sum, 3);
+}
+
+static INLINE uint16x8_t apply_15_tap_filter_kernel(
+ const uint16x8_t flat, const uint16x8_t sub0, const uint16x8_t sub1,
+ const uint16x8_t add0, const uint16x8_t add1, const uint16x8_t in,
+ uint16x8_t *sum) {
+ filter_update(sub0, sub1, add0, add1, sum);
+ return vbslq_u16(flat, vrshrq_n_u16(*sum, 4), in);
+}
+
+// 7-tap filter [1, 1, 1, 2, 1, 1, 1]
+static INLINE void calc_7_tap_filter(const uint16x8_t p3, const uint16x8_t p2,
+ const uint16x8_t p1, const uint16x8_t p0,
+ const uint16x8_t q0, const uint16x8_t q1,
+ const uint16x8_t q2, const uint16x8_t q3,
+ uint16x8_t *op2, uint16x8_t *op1,
+ uint16x8_t *op0, uint16x8_t *oq0,
+ uint16x8_t *oq1, uint16x8_t *oq2) {
+ uint16x8_t sum;
+ sum = vaddq_u16(p3, p3); // 2*p3
+ sum = vaddq_u16(sum, p3); // 3*p3
+ sum = vaddq_u16(sum, p2); // 3*p3+p2
+ sum = vaddq_u16(sum, p2); // 3*p3+2*p2
+ sum = vaddq_u16(sum, p1); // 3*p3+2*p2+p1
+ sum = vaddq_u16(sum, p0); // 3*p3+2*p2+p1+p0
+ sum = vaddq_u16(sum, q0); // 3*p3+2*p2+p1+p0+q0
+ *op2 = vrshrq_n_u16(sum, 3);
+ *op1 = calc_7_tap_filter_kernel(p3, p2, p1, q1, &sum);
+ *op0 = calc_7_tap_filter_kernel(p3, p1, p0, q2, &sum);
+ *oq0 = calc_7_tap_filter_kernel(p3, p0, q0, q3, &sum);
+ *oq1 = calc_7_tap_filter_kernel(p2, q0, q1, q3, &sum);
+ *oq2 = calc_7_tap_filter_kernel(p1, q1, q2, q3, &sum);
+}
+
+static INLINE void apply_7_tap_filter(const uint16x8_t flat,
+ const uint16x8_t p3, const uint16x8_t p2,
+ const uint16x8_t p1, const uint16x8_t p0,
+ const uint16x8_t q0, const uint16x8_t q1,
+ const uint16x8_t q2, const uint16x8_t q3,
+ uint16x8_t *op2, uint16x8_t *op1,
+ uint16x8_t *op0, uint16x8_t *oq0,
+ uint16x8_t *oq1, uint16x8_t *oq2) {
+ uint16x8_t tp1, tp0, tq0, tq1;
+ calc_7_tap_filter(p3, p2, p1, p0, q0, q1, q2, q3, op2, &tp1, &tp0, &tq0, &tq1,
+ oq2);
+ *op2 = vbslq_u16(flat, *op2, p2);
+ *op1 = vbslq_u16(flat, tp1, *op1);
+ *op0 = vbslq_u16(flat, tp0, *op0);
+ *oq0 = vbslq_u16(flat, tq0, *oq0);
+ *oq1 = vbslq_u16(flat, tq1, *oq1);
+ *oq2 = vbslq_u16(flat, *oq2, q2);
+}
+
+// 15-tap filter [1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1]
+static INLINE void apply_15_tap_filter(
+ const uint16x8_t flat2, const uint16x8_t p7, const uint16x8_t p6,
+ const uint16x8_t p5, const uint16x8_t p4, const uint16x8_t p3,
+ const uint16x8_t p2, const uint16x8_t p1, const uint16x8_t p0,
+ const uint16x8_t q0, const uint16x8_t q1, const uint16x8_t q2,
+ const uint16x8_t q3, const uint16x8_t q4, const uint16x8_t q5,
+ const uint16x8_t q6, const uint16x8_t q7, uint16x8_t *op6, uint16x8_t *op5,
+ uint16x8_t *op4, uint16x8_t *op3, uint16x8_t *op2, uint16x8_t *op1,
+ uint16x8_t *op0, uint16x8_t *oq0, uint16x8_t *oq1, uint16x8_t *oq2,
+ uint16x8_t *oq3, uint16x8_t *oq4, uint16x8_t *oq5, uint16x8_t *oq6) {
+ uint16x8_t sum;
+ sum = vshlq_n_u16(p7, 3); // 8*p7
+ sum = vsubq_u16(sum, p7); // 7*p7
+ sum = vaddq_u16(sum, p6); // 7*p7+p6
+ sum = vaddq_u16(sum, p6); // 7*p7+2*p6
+ sum = vaddq_u16(sum, p5); // 7*p7+2*p6+p5
+ sum = vaddq_u16(sum, p4); // 7*p7+2*p6+p5+p4
+ sum = vaddq_u16(sum, p3); // 7*p7+2*p6+p5+p4+p3
+ sum = vaddq_u16(sum, p2); // 7*p7+2*p6+p5+p4+p3+p2
+ sum = vaddq_u16(sum, p1); // 7*p7+2*p6+p5+p4+p3+p2+p1
+ sum = vaddq_u16(sum, p0); // 7*p7+2*p6+p5+p4+p3+p2+p1+p0
+ sum = vaddq_u16(sum, q0); // 7*p7+2*p6+p5+p4+p3+p2+p1+p0+q0
+ *op6 = vbslq_u16(flat2, vrshrq_n_u16(sum, 4), p6);
+ *op5 = apply_15_tap_filter_kernel(flat2, p7, p6, p5, q1, p5, &sum);
+ *op4 = apply_15_tap_filter_kernel(flat2, p7, p5, p4, q2, p4, &sum);
+ *op3 = apply_15_tap_filter_kernel(flat2, p7, p4, p3, q3, p3, &sum);
+ *op2 = apply_15_tap_filter_kernel(flat2, p7, p3, p2, q4, *op2, &sum);
+ *op1 = apply_15_tap_filter_kernel(flat2, p7, p2, p1, q5, *op1, &sum);
+ *op0 = apply_15_tap_filter_kernel(flat2, p7, p1, p0, q6, *op0, &sum);
+ *oq0 = apply_15_tap_filter_kernel(flat2, p7, p0, q0, q7, *oq0, &sum);
+ *oq1 = apply_15_tap_filter_kernel(flat2, p6, q0, q1, q7, *oq1, &sum);
+ *oq2 = apply_15_tap_filter_kernel(flat2, p5, q1, q2, q7, *oq2, &sum);
+ *oq3 = apply_15_tap_filter_kernel(flat2, p4, q2, q3, q7, q3, &sum);
+ *oq4 = apply_15_tap_filter_kernel(flat2, p3, q3, q4, q7, q4, &sum);
+ *oq5 = apply_15_tap_filter_kernel(flat2, p2, q4, q5, q7, q5, &sum);
+ *oq6 = apply_15_tap_filter_kernel(flat2, p1, q5, q6, q7, q6, &sum);
+}
+
+static INLINE void filter4(const uint16x8_t mask, const uint16x8_t hev,
+ const uint16x8_t p1, const uint16x8_t p0,
+ const uint16x8_t q0, const uint16x8_t q1,
+ uint16x8_t *op1, uint16x8_t *op0, uint16x8_t *oq0,
+ uint16x8_t *oq1, const int bd) {
+ const int16x8_t max = vdupq_n_s16((1 << (bd - 1)) - 1);
+ const int16x8_t min = vdupq_n_s16((int16_t)(((uint32_t)-1) << (bd - 1)));
+ int16x8_t filter, filter1, filter2, t;
+ int16x8_t ps1 = flip_sign(p1, bd);
+ int16x8_t ps0 = flip_sign(p0, bd);
+ int16x8_t qs0 = flip_sign(q0, bd);
+ int16x8_t qs1 = flip_sign(q1, bd);
+
+ /* add outer taps if we have high edge variance */
+ filter = vsubq_s16(ps1, qs1);
+ filter = vmaxq_s16(filter, min);
+ filter = vminq_s16(filter, max);
+ filter = vandq_s16(filter, vreinterpretq_s16_u16(hev));
+ t = vsubq_s16(qs0, ps0);
+
+ /* inner taps */
+ filter = vaddq_s16(filter, t);
+ filter = vaddq_s16(filter, t);
+ filter = vaddq_s16(filter, t);
+ filter = vmaxq_s16(filter, min);
+ filter = vminq_s16(filter, max);
+ filter = vandq_s16(filter, vreinterpretq_s16_u16(mask));
+
+ /* save bottom 3 bits so that we round one side +4 and the other +3 */
+ /* if it equals 4 we'll set it to adjust by -1 to account for the fact */
+ /* we'd round it by 3 the other way */
+ t = vaddq_s16(filter, vdupq_n_s16(4));
+ t = vminq_s16(t, max);
+ filter1 = vshrq_n_s16(t, 3);
+ t = vaddq_s16(filter, vdupq_n_s16(3));
+ t = vminq_s16(t, max);
+ filter2 = vshrq_n_s16(t, 3);
+
+ qs0 = vsubq_s16(qs0, filter1);
+ qs0 = vmaxq_s16(qs0, min);
+ qs0 = vminq_s16(qs0, max);
+ ps0 = vaddq_s16(ps0, filter2);
+ ps0 = vmaxq_s16(ps0, min);
+ ps0 = vminq_s16(ps0, max);
+ *oq0 = flip_sign_back(qs0, bd);
+ *op0 = flip_sign_back(ps0, bd);
+
+ /* outer tap adjustments */
+ filter = vrshrq_n_s16(filter1, 1);
+ filter = vbicq_s16(filter, vreinterpretq_s16_u16(hev));
+
+ qs1 = vsubq_s16(qs1, filter);
+ qs1 = vmaxq_s16(qs1, min);
+ qs1 = vminq_s16(qs1, max);
+ ps1 = vaddq_s16(ps1, filter);
+ ps1 = vmaxq_s16(ps1, min);
+ ps1 = vminq_s16(ps1, max);
+ *oq1 = flip_sign_back(qs1, bd);
+ *op1 = flip_sign_back(ps1, bd);
+}
+
+static INLINE void filter8(const uint16x8_t mask, const uint16x8_t flat,
+ const uint32_t flat_status, const uint16x8_t hev,
+ const uint16x8_t p3, const uint16x8_t p2,
+ const uint16x8_t p1, const uint16x8_t p0,
+ const uint16x8_t q0, const uint16x8_t q1,
+ const uint16x8_t q2, const uint16x8_t q3,
+ uint16x8_t *op2, uint16x8_t *op1, uint16x8_t *op0,
+ uint16x8_t *oq0, uint16x8_t *oq1, uint16x8_t *oq2,
+ const int bd) {
+ if (flat_status != (uint32_t)-4) {
+ filter4(mask, hev, p1, p0, q0, q1, op1, op0, oq0, oq1, bd);
+ *op2 = p2;
+ *oq2 = q2;
+ if (flat_status) {
+ apply_7_tap_filter(flat, p3, p2, p1, p0, q0, q1, q2, q3, op2, op1, op0,
+ oq0, oq1, oq2);
+ }
+ } else {
+ calc_7_tap_filter(p3, p2, p1, p0, q0, q1, q2, q3, op2, op1, op0, oq0, oq1,
+ oq2);
+ }
+}
+
+static INLINE void filter16(
+ const uint16x8_t mask, const uint16x8_t flat, const uint32_t flat_status,
+ const uint16x8_t flat2, const uint32_t flat2_status, const uint16x8_t hev,
+ const uint16x8_t p7, const uint16x8_t p6, const uint16x8_t p5,
+ const uint16x8_t p4, const uint16x8_t p3, const uint16x8_t p2,
+ const uint16x8_t p1, const uint16x8_t p0, const uint16x8_t q0,
+ const uint16x8_t q1, const uint16x8_t q2, const uint16x8_t q3,
+ const uint16x8_t q4, const uint16x8_t q5, const uint16x8_t q6,
+ const uint16x8_t q7, uint16x8_t *op6, uint16x8_t *op5, uint16x8_t *op4,
+ uint16x8_t *op3, uint16x8_t *op2, uint16x8_t *op1, uint16x8_t *op0,
+ uint16x8_t *oq0, uint16x8_t *oq1, uint16x8_t *oq2, uint16x8_t *oq3,
+ uint16x8_t *oq4, uint16x8_t *oq5, uint16x8_t *oq6, const int bd) {
+ if (flat_status != (uint32_t)-4) {
+ filter4(mask, hev, p1, p0, q0, q1, op1, op0, oq0, oq1, bd);
+ }
+
+ if (flat_status) {
+ *op2 = p2;
+ *oq2 = q2;
+ if (flat2_status != (uint32_t)-4) {
+ apply_7_tap_filter(flat, p3, p2, p1, p0, q0, q1, q2, q3, op2, op1, op0,
+ oq0, oq1, oq2);
+ }
+ if (flat2_status) {
+ apply_15_tap_filter(flat2, p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3,
+ q4, q5, q6, q7, op6, op5, op4, op3, op2, op1, op0,
+ oq0, oq1, oq2, oq3, oq4, oq5, oq6);
+ }
+ }
+}
+
+static INLINE void load_8x8(const uint16_t *s, const int p, uint16x8_t *p3,
+ uint16x8_t *p2, uint16x8_t *p1, uint16x8_t *p0,
+ uint16x8_t *q0, uint16x8_t *q1, uint16x8_t *q2,
+ uint16x8_t *q3) {
+ *p3 = vld1q_u16(s);
+ s += p;
+ *p2 = vld1q_u16(s);
+ s += p;
+ *p1 = vld1q_u16(s);
+ s += p;
+ *p0 = vld1q_u16(s);
+ s += p;
+ *q0 = vld1q_u16(s);
+ s += p;
+ *q1 = vld1q_u16(s);
+ s += p;
+ *q2 = vld1q_u16(s);
+ s += p;
+ *q3 = vld1q_u16(s);
+}
+
+static INLINE void load_8x16(const uint16_t *s, const int p, uint16x8_t *s0,
+ uint16x8_t *s1, uint16x8_t *s2, uint16x8_t *s3,
+ uint16x8_t *s4, uint16x8_t *s5, uint16x8_t *s6,
+ uint16x8_t *s7, uint16x8_t *s8, uint16x8_t *s9,
+ uint16x8_t *s10, uint16x8_t *s11, uint16x8_t *s12,
+ uint16x8_t *s13, uint16x8_t *s14,
+ uint16x8_t *s15) {
+ *s0 = vld1q_u16(s);
+ s += p;
+ *s1 = vld1q_u16(s);
+ s += p;
+ *s2 = vld1q_u16(s);
+ s += p;
+ *s3 = vld1q_u16(s);
+ s += p;
+ *s4 = vld1q_u16(s);
+ s += p;
+ *s5 = vld1q_u16(s);
+ s += p;
+ *s6 = vld1q_u16(s);
+ s += p;
+ *s7 = vld1q_u16(s);
+ s += p;
+ *s8 = vld1q_u16(s);
+ s += p;
+ *s9 = vld1q_u16(s);
+ s += p;
+ *s10 = vld1q_u16(s);
+ s += p;
+ *s11 = vld1q_u16(s);
+ s += p;
+ *s12 = vld1q_u16(s);
+ s += p;
+ *s13 = vld1q_u16(s);
+ s += p;
+ *s14 = vld1q_u16(s);
+ s += p;
+ *s15 = vld1q_u16(s);
+}
+
+static INLINE void store_8x4(uint16_t *s, const int p, const uint16x8_t s0,
+ const uint16x8_t s1, const uint16x8_t s2,
+ const uint16x8_t s3) {
+ vst1q_u16(s, s0);
+ s += p;
+ vst1q_u16(s, s1);
+ s += p;
+ vst1q_u16(s, s2);
+ s += p;
+ vst1q_u16(s, s3);
+}
+
+static INLINE void store_8x6(uint16_t *s, const int p, const uint16x8_t s0,
+ const uint16x8_t s1, const uint16x8_t s2,
+ const uint16x8_t s3, const uint16x8_t s4,
+ const uint16x8_t s5) {
+ vst1q_u16(s, s0);
+ s += p;
+ vst1q_u16(s, s1);
+ s += p;
+ vst1q_u16(s, s2);
+ s += p;
+ vst1q_u16(s, s3);
+ s += p;
+ vst1q_u16(s, s4);
+ s += p;
+ vst1q_u16(s, s5);
+}
+
+static INLINE void store_4x8(uint16_t *s, const int p, const uint16x8_t p1,
+ const uint16x8_t p0, const uint16x8_t q0,
+ const uint16x8_t q1) {
+ uint16x8x4_t o;
+
+ o.val[0] = p1;
+ o.val[1] = p0;
+ o.val[2] = q0;
+ o.val[3] = q1;
+ vst4q_lane_u16(s, o, 0);
+ s += p;
+ vst4q_lane_u16(s, o, 1);
+ s += p;
+ vst4q_lane_u16(s, o, 2);
+ s += p;
+ vst4q_lane_u16(s, o, 3);
+ s += p;
+ vst4q_lane_u16(s, o, 4);
+ s += p;
+ vst4q_lane_u16(s, o, 5);
+ s += p;
+ vst4q_lane_u16(s, o, 6);
+ s += p;
+ vst4q_lane_u16(s, o, 7);
+}
+
+static INLINE void store_6x8(uint16_t *s, const int p, const uint16x8_t s0,
+ const uint16x8_t s1, const uint16x8_t s2,
+ const uint16x8_t s3, const uint16x8_t s4,
+ const uint16x8_t s5) {
+ uint16x8x3_t o0, o1;
+
+ o0.val[0] = s0;
+ o0.val[1] = s1;
+ o0.val[2] = s2;
+ o1.val[0] = s3;
+ o1.val[1] = s4;
+ o1.val[2] = s5;
+ vst3q_lane_u16(s - 3, o0, 0);
+ vst3q_lane_u16(s + 0, o1, 0);
+ s += p;
+ vst3q_lane_u16(s - 3, o0, 1);
+ vst3q_lane_u16(s + 0, o1, 1);
+ s += p;
+ vst3q_lane_u16(s - 3, o0, 2);
+ vst3q_lane_u16(s + 0, o1, 2);
+ s += p;
+ vst3q_lane_u16(s - 3, o0, 3);
+ vst3q_lane_u16(s + 0, o1, 3);
+ s += p;
+ vst3q_lane_u16(s - 3, o0, 4);
+ vst3q_lane_u16(s + 0, o1, 4);
+ s += p;
+ vst3q_lane_u16(s - 3, o0, 5);
+ vst3q_lane_u16(s + 0, o1, 5);
+ s += p;
+ vst3q_lane_u16(s - 3, o0, 6);
+ vst3q_lane_u16(s + 0, o1, 6);
+ s += p;
+ vst3q_lane_u16(s - 3, o0, 7);
+ vst3q_lane_u16(s + 0, o1, 7);
+}
+
+static INLINE void store_7x8(uint16_t *s, const int p, const uint16x8_t s0,
+ const uint16x8_t s1, const uint16x8_t s2,
+ const uint16x8_t s3, const uint16x8_t s4,
+ const uint16x8_t s5, const uint16x8_t s6) {
+ uint16x8x4_t o0;
+ uint16x8x3_t o1;
+
+ o0.val[0] = s0;
+ o0.val[1] = s1;
+ o0.val[2] = s2;
+ o0.val[3] = s3;
+ o1.val[0] = s4;
+ o1.val[1] = s5;
+ o1.val[2] = s6;
+ vst4q_lane_u16(s - 4, o0, 0);
+ vst3q_lane_u16(s + 0, o1, 0);
+ s += p;
+ vst4q_lane_u16(s - 4, o0, 1);
+ vst3q_lane_u16(s + 0, o1, 1);
+ s += p;
+ vst4q_lane_u16(s - 4, o0, 2);
+ vst3q_lane_u16(s + 0, o1, 2);
+ s += p;
+ vst4q_lane_u16(s - 4, o0, 3);
+ vst3q_lane_u16(s + 0, o1, 3);
+ s += p;
+ vst4q_lane_u16(s - 4, o0, 4);
+ vst3q_lane_u16(s + 0, o1, 4);
+ s += p;
+ vst4q_lane_u16(s - 4, o0, 5);
+ vst3q_lane_u16(s + 0, o1, 5);
+ s += p;
+ vst4q_lane_u16(s - 4, o0, 6);
+ vst3q_lane_u16(s + 0, o1, 6);
+ s += p;
+ vst4q_lane_u16(s - 4, o0, 7);
+ vst3q_lane_u16(s + 0, o1, 7);
+}
+
+static INLINE void store_8x14(uint16_t *s, const int p, const uint16x8_t p6,
+ const uint16x8_t p5, const uint16x8_t p4,
+ const uint16x8_t p3, const uint16x8_t p2,
+ const uint16x8_t p1, const uint16x8_t p0,
+ const uint16x8_t q0, const uint16x8_t q1,
+ const uint16x8_t q2, const uint16x8_t q3,
+ const uint16x8_t q4, const uint16x8_t q5,
+ const uint16x8_t q6, const uint32_t flat_status,
+ const uint32_t flat2_status) {
+ if (flat_status) {
+ if (flat2_status) {
+ vst1q_u16(s - 7 * p, p6);
+ vst1q_u16(s - 6 * p, p5);
+ vst1q_u16(s - 5 * p, p4);
+ vst1q_u16(s - 4 * p, p3);
+ vst1q_u16(s + 3 * p, q3);
+ vst1q_u16(s + 4 * p, q4);
+ vst1q_u16(s + 5 * p, q5);
+ vst1q_u16(s + 6 * p, q6);
+ }
+ vst1q_u16(s - 3 * p, p2);
+ vst1q_u16(s + 2 * p, q2);
+ }
+ vst1q_u16(s - 2 * p, p1);
+ vst1q_u16(s - 1 * p, p0);
+ vst1q_u16(s + 0 * p, q0);
+ vst1q_u16(s + 1 * p, q1);
+}
+
+void vpx_highbd_lpf_horizontal_4_neon(uint16_t *s, int p, const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh, int bd) {
+ uint16x8_t blimit_vec, limit_vec, thresh_vec, p3, p2, p1, p0, q0, q1, q2, q3,
+ mask, hev;
+
+ load_thresh(blimit, limit, thresh, &blimit_vec, &limit_vec, &thresh_vec, bd);
+ load_8x8(s - 4 * p, p, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
+ filter_hev_mask4(limit_vec, blimit_vec, thresh_vec, p3, p2, p1, p0, q0, q1,
+ q2, q3, &hev, &mask);
+ filter4(mask, hev, p1, p0, q0, q1, &p1, &p0, &q0, &q1, bd);
+ store_8x4(s - 2 * p, p, p1, p0, q0, q1);
+}
+
+void vpx_highbd_lpf_horizontal_4_dual_neon(
+ uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0,
+ const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
+ const uint8_t *thresh1, int bd) {
+ vpx_highbd_lpf_horizontal_4_neon(s, p, blimit0, limit0, thresh0, bd);
+ vpx_highbd_lpf_horizontal_4_neon(s + 8, p, blimit1, limit1, thresh1, bd);
+}
+
+void vpx_highbd_lpf_vertical_4_neon(uint16_t *s, int p, const uint8_t *blimit,
+ const uint8_t *limit, const uint8_t *thresh,
+ int bd) {
+ uint16x8_t blimit_vec, limit_vec, thresh_vec, p3, p2, p1, p0, q0, q1, q2, q3,
+ mask, hev;
+
+ load_8x8(s - 4, p, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
+ transpose_s16_8x8((int16x8_t *)&p3, (int16x8_t *)&p2, (int16x8_t *)&p1,
+ (int16x8_t *)&p0, (int16x8_t *)&q0, (int16x8_t *)&q1,
+ (int16x8_t *)&q2, (int16x8_t *)&q3);
+ load_thresh(blimit, limit, thresh, &blimit_vec, &limit_vec, &thresh_vec, bd);
+ filter_hev_mask4(limit_vec, blimit_vec, thresh_vec, p3, p2, p1, p0, q0, q1,
+ q2, q3, &hev, &mask);
+ filter4(mask, hev, p1, p0, q0, q1, &p1, &p0, &q0, &q1, bd);
+ store_4x8(s - 2, p, p1, p0, q0, q1);
+}
+
+void vpx_highbd_lpf_vertical_4_dual_neon(
+ uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0,
+ const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
+ const uint8_t *thresh1, int bd) {
+ vpx_highbd_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0, bd);
+ vpx_highbd_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1, bd);
+}
+
+void vpx_highbd_lpf_horizontal_8_neon(uint16_t *s, int p, const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh, int bd) {
+ uint16x8_t blimit_vec, limit_vec, thresh_vec, p3, p2, p1, p0, q0, q1, q2, q3,
+ op2, op1, op0, oq0, oq1, oq2, mask, flat, hev;
+ uint32_t flat_status;
+
+ load_thresh(blimit, limit, thresh, &blimit_vec, &limit_vec, &thresh_vec, bd);
+ load_8x8(s - 4 * p, p, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
+ mask = filter_flat_hev_mask(limit_vec, blimit_vec, thresh_vec, p3, p2, p1, p0,
+ q0, q1, q2, q3, &flat, &flat_status, &hev, bd);
+ filter8(mask, flat, flat_status, hev, p3, p2, p1, p0, q0, q1, q2, q3, &op2,
+ &op1, &op0, &oq0, &oq1, &oq2, bd);
+ store_8x6(s - 3 * p, p, op2, op1, op0, oq0, oq1, oq2);
+}
+
+void vpx_highbd_lpf_horizontal_8_dual_neon(
+ uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0,
+ const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
+ const uint8_t *thresh1, int bd) {
+ vpx_highbd_lpf_horizontal_8_neon(s, p, blimit0, limit0, thresh0, bd);
+ vpx_highbd_lpf_horizontal_8_neon(s + 8, p, blimit1, limit1, thresh1, bd);
+}
+
+void vpx_highbd_lpf_vertical_8_neon(uint16_t *s, int p, const uint8_t *blimit,
+ const uint8_t *limit, const uint8_t *thresh,
+ int bd) {
+ uint16x8_t blimit_vec, limit_vec, thresh_vec, p3, p2, p1, p0, q0, q1, q2, q3,
+ op2, op1, op0, oq0, oq1, oq2, mask, flat, hev;
+ uint32_t flat_status;
+
+ load_8x8(s - 4, p, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3);
+ transpose_s16_8x8((int16x8_t *)&p3, (int16x8_t *)&p2, (int16x8_t *)&p1,
+ (int16x8_t *)&p0, (int16x8_t *)&q0, (int16x8_t *)&q1,
+ (int16x8_t *)&q2, (int16x8_t *)&q3);
+ load_thresh(blimit, limit, thresh, &blimit_vec, &limit_vec, &thresh_vec, bd);
+ mask = filter_flat_hev_mask(limit_vec, blimit_vec, thresh_vec, p3, p2, p1, p0,
+ q0, q1, q2, q3, &flat, &flat_status, &hev, bd);
+ filter8(mask, flat, flat_status, hev, p3, p2, p1, p0, q0, q1, q2, q3, &op2,
+ &op1, &op0, &oq0, &oq1, &oq2, bd);
+ // Note: store_6x8() is faster than transpose + store_8x8().
+ store_6x8(s, p, op2, op1, op0, oq0, oq1, oq2);
+}
+
+void vpx_highbd_lpf_vertical_8_dual_neon(
+ uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0,
+ const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1,
+ const uint8_t *thresh1, int bd) {
+ vpx_highbd_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0, bd);
+ vpx_highbd_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1, bd);
+}
+
+static void lpf_horizontal_16_kernel(uint16_t *s, int p,
+ const uint16x8_t blimit_vec,
+ const uint16x8_t limit_vec,
+ const uint16x8_t thresh_vec,
+ const int bd) {
+ uint16x8_t mask, flat, flat2, hev, p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2,
+ q3, q4, q5, q6, q7, op6, op5, op4, op3, op2, op1, op0, oq0, oq1, oq2, oq3,
+ oq4, oq5, oq6;
+ uint32_t flat_status, flat2_status;
+
+ load_8x16(s - 8 * p, p, &p7, &p6, &p5, &p4, &p3, &p2, &p1, &p0, &q0, &q1, &q2,
+ &q3, &q4, &q5, &q6, &q7);
+ mask = filter_flat_hev_mask(limit_vec, blimit_vec, thresh_vec, p3, p2, p1, p0,
+ q0, q1, q2, q3, &flat, &flat_status, &hev, bd);
+ flat2 = flat_mask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat,
+ &flat2_status, bd);
+ filter16(mask, flat, flat_status, flat2, flat2_status, hev, p7, p6, p5, p4,
+ p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7, &op6, &op5, &op4,
+ &op3, &op2, &op1, &op0, &oq0, &oq1, &oq2, &oq3, &oq4, &oq5, &oq6,
+ bd);
+ store_8x14(s, p, op6, op5, op4, op3, op2, op1, op0, oq0, oq1, oq2, oq3, oq4,
+ oq5, oq6, flat_status, flat2_status);
+}
+
+static void lpf_vertical_16_kernel(uint16_t *s, int p,
+ const uint16x8_t blimit_vec,
+ const uint16x8_t limit_vec,
+ const uint16x8_t thresh_vec, const int bd) {
+ uint16x8_t mask, flat, flat2, hev, p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2,
+ q3, q4, q5, q6, q7, op6, op5, op4, op3, op2, op1, op0, oq0, oq1, oq2, oq3,
+ oq4, oq5, oq6;
+ uint32_t flat_status, flat2_status;
+
+ load_8x8(s - 8, p, &p7, &p6, &p5, &p4, &p3, &p2, &p1, &p0);
+ transpose_s16_8x8((int16x8_t *)&p7, (int16x8_t *)&p6, (int16x8_t *)&p5,
+ (int16x8_t *)&p4, (int16x8_t *)&p3, (int16x8_t *)&p2,
+ (int16x8_t *)&p1, (int16x8_t *)&p0);
+ load_8x8(s, p, &q0, &q1, &q2, &q3, &q4, &q5, &q6, &q7);
+ transpose_s16_8x8((int16x8_t *)&q0, (int16x8_t *)&q1, (int16x8_t *)&q2,
+ (int16x8_t *)&q3, (int16x8_t *)&q4, (int16x8_t *)&q5,
+ (int16x8_t *)&q6, (int16x8_t *)&q7);
+ mask = filter_flat_hev_mask(limit_vec, blimit_vec, thresh_vec, p3, p2, p1, p0,
+ q0, q1, q2, q3, &flat, &flat_status, &hev, bd);
+ flat2 = flat_mask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat,
+ &flat2_status, bd);
+ filter16(mask, flat, flat_status, flat2, flat2_status, hev, p7, p6, p5, p4,
+ p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7, &op6, &op5, &op4,
+ &op3, &op2, &op1, &op0, &oq0, &oq1, &oq2, &oq3, &oq4, &oq5, &oq6,
+ bd);
+ if (flat_status) {
+ if (flat2_status) {
+ store_7x8(s - 3, p, op6, op5, op4, op3, op2, op1, op0);
+ store_7x8(s + 4, p, oq0, oq1, oq2, oq3, oq4, oq5, oq6);
+ } else {
+ // Note: store_6x8() is faster than transpose + store_8x8().
+ store_6x8(s, p, op2, op1, op0, oq0, oq1, oq2);
+ }
+ } else {
+ store_4x8(s - 2, p, op1, op0, oq0, oq1);
+ }
+}
+
+void vpx_highbd_lpf_horizontal_16_neon(uint16_t *s, int p,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh, int bd) {
+ uint16x8_t blimit_vec, limit_vec, thresh_vec;
+ load_thresh(blimit, limit, thresh, &blimit_vec, &limit_vec, &thresh_vec, bd);
+ lpf_horizontal_16_kernel(s, p, blimit_vec, limit_vec, thresh_vec, bd);
+}
+
+void vpx_highbd_lpf_horizontal_16_dual_neon(uint16_t *s, int p,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh, int bd) {
+ uint16x8_t blimit_vec, limit_vec, thresh_vec;
+ load_thresh(blimit, limit, thresh, &blimit_vec, &limit_vec, &thresh_vec, bd);
+ lpf_horizontal_16_kernel(s, p, blimit_vec, limit_vec, thresh_vec, bd);
+ lpf_horizontal_16_kernel(s + 8, p, blimit_vec, limit_vec, thresh_vec, bd);
+}
+
+void vpx_highbd_lpf_vertical_16_neon(uint16_t *s, int p, const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh, int bd) {
+ uint16x8_t blimit_vec, limit_vec, thresh_vec;
+ load_thresh(blimit, limit, thresh, &blimit_vec, &limit_vec, &thresh_vec, bd);
+ lpf_vertical_16_kernel(s, p, blimit_vec, limit_vec, thresh_vec, bd);
+}
+
+void vpx_highbd_lpf_vertical_16_dual_neon(uint16_t *s, int p,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh, int bd) {
+ uint16x8_t blimit_vec, limit_vec, thresh_vec;
+ load_thresh(blimit, limit, thresh, &blimit_vec, &limit_vec, &thresh_vec, bd);
+ lpf_vertical_16_kernel(s, p, blimit_vec, limit_vec, thresh_vec, bd);
+ lpf_vertical_16_kernel(s + 8 * p, p, blimit_vec, limit_vec, thresh_vec, bd);
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_vpx_convolve8_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_vpx_convolve8_neon.c
new file mode 100644
index 00000000000..1fde13e8d6d
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_vpx_convolve8_neon.c
@@ -0,0 +1,923 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+#include <assert.h>
+
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_dsp/arm/transpose_neon.h"
+#include "vpx_ports/mem.h"
+
+static INLINE void load_4x4(const int16_t *s, ptrdiff_t p, int16x4_t *s0,
+ int16x4_t *s1, int16x4_t *s2, int16x4_t *s3) {
+ *s0 = vld1_s16(s);
+ s += p;
+ *s1 = vld1_s16(s);
+ s += p;
+ *s2 = vld1_s16(s);
+ s += p;
+ *s3 = vld1_s16(s);
+}
+
+static INLINE void load_8x4(const uint16_t *s, ptrdiff_t p, uint16x8_t *s0,
+ uint16x8_t *s1, uint16x8_t *s2, uint16x8_t *s3) {
+ *s0 = vld1q_u16(s);
+ s += p;
+ *s1 = vld1q_u16(s);
+ s += p;
+ *s2 = vld1q_u16(s);
+ s += p;
+ *s3 = vld1q_u16(s);
+}
+
+static INLINE void load_8x8(const int16_t *s, ptrdiff_t p, int16x8_t *s0,
+ int16x8_t *s1, int16x8_t *s2, int16x8_t *s3,
+ int16x8_t *s4, int16x8_t *s5, int16x8_t *s6,
+ int16x8_t *s7) {
+ *s0 = vld1q_s16(s);
+ s += p;
+ *s1 = vld1q_s16(s);
+ s += p;
+ *s2 = vld1q_s16(s);
+ s += p;
+ *s3 = vld1q_s16(s);
+ s += p;
+ *s4 = vld1q_s16(s);
+ s += p;
+ *s5 = vld1q_s16(s);
+ s += p;
+ *s6 = vld1q_s16(s);
+ s += p;
+ *s7 = vld1q_s16(s);
+}
+
+static INLINE void store_8x8(uint16_t *s, ptrdiff_t p, const uint16x8_t s0,
+ const uint16x8_t s1, const uint16x8_t s2,
+ const uint16x8_t s3, const uint16x8_t s4,
+ const uint16x8_t s5, const uint16x8_t s6,
+ const uint16x8_t s7) {
+ vst1q_u16(s, s0);
+ s += p;
+ vst1q_u16(s, s1);
+ s += p;
+ vst1q_u16(s, s2);
+ s += p;
+ vst1q_u16(s, s3);
+ s += p;
+ vst1q_u16(s, s4);
+ s += p;
+ vst1q_u16(s, s5);
+ s += p;
+ vst1q_u16(s, s6);
+ s += p;
+ vst1q_u16(s, s7);
+}
+
+static INLINE int32x4_t convolve8_4(const int16x4_t s0, const int16x4_t s1,
+ const int16x4_t s2, const int16x4_t s3,
+ const int16x4_t s4, const int16x4_t s5,
+ const int16x4_t s6, const int16x4_t s7,
+ const int16x8_t filters) {
+ const int16x4_t filters_lo = vget_low_s16(filters);
+ const int16x4_t filters_hi = vget_high_s16(filters);
+ int32x4_t sum = vdupq_n_s32(0);
+
+ sum = vmlal_lane_s16(sum, s0, filters_lo, 0);
+ sum = vmlal_lane_s16(sum, s1, filters_lo, 1);
+ sum = vmlal_lane_s16(sum, s2, filters_lo, 2);
+ sum = vmlal_lane_s16(sum, s3, filters_lo, 3);
+ sum = vmlal_lane_s16(sum, s4, filters_hi, 0);
+ sum = vmlal_lane_s16(sum, s5, filters_hi, 1);
+ sum = vmlal_lane_s16(sum, s6, filters_hi, 2);
+ sum = vmlal_lane_s16(sum, s7, filters_hi, 3);
+ return sum;
+}
+
+static INLINE uint16x8_t convolve8_8(const int16x8_t s0, const int16x8_t s1,
+ const int16x8_t s2, const int16x8_t s3,
+ const int16x8_t s4, const int16x8_t s5,
+ const int16x8_t s6, const int16x8_t s7,
+ const int16x8_t filters,
+ const uint16x8_t max) {
+ const int16x4_t filters_lo = vget_low_s16(filters);
+ const int16x4_t filters_hi = vget_high_s16(filters);
+ int32x4_t sum0 = vdupq_n_s32(0);
+ int32x4_t sum1 = vdupq_n_s32(0);
+ uint16x8_t d;
+
+ sum0 = vmlal_lane_s16(sum0, vget_low_s16(s0), filters_lo, 0);
+ sum0 = vmlal_lane_s16(sum0, vget_low_s16(s1), filters_lo, 1);
+ sum0 = vmlal_lane_s16(sum0, vget_low_s16(s2), filters_lo, 2);
+ sum0 = vmlal_lane_s16(sum0, vget_low_s16(s3), filters_lo, 3);
+ sum0 = vmlal_lane_s16(sum0, vget_low_s16(s4), filters_hi, 0);
+ sum0 = vmlal_lane_s16(sum0, vget_low_s16(s5), filters_hi, 1);
+ sum0 = vmlal_lane_s16(sum0, vget_low_s16(s6), filters_hi, 2);
+ sum0 = vmlal_lane_s16(sum0, vget_low_s16(s7), filters_hi, 3);
+ sum1 = vmlal_lane_s16(sum1, vget_high_s16(s0), filters_lo, 0);
+ sum1 = vmlal_lane_s16(sum1, vget_high_s16(s1), filters_lo, 1);
+ sum1 = vmlal_lane_s16(sum1, vget_high_s16(s2), filters_lo, 2);
+ sum1 = vmlal_lane_s16(sum1, vget_high_s16(s3), filters_lo, 3);
+ sum1 = vmlal_lane_s16(sum1, vget_high_s16(s4), filters_hi, 0);
+ sum1 = vmlal_lane_s16(sum1, vget_high_s16(s5), filters_hi, 1);
+ sum1 = vmlal_lane_s16(sum1, vget_high_s16(s6), filters_hi, 2);
+ sum1 = vmlal_lane_s16(sum1, vget_high_s16(s7), filters_hi, 3);
+ d = vcombine_u16(vqrshrun_n_s32(sum0, 7), vqrshrun_n_s32(sum1, 7));
+ d = vminq_u16(d, max);
+ return d;
+}
+
+void vpx_highbd_convolve8_horiz_neon(const uint8_t *src8, ptrdiff_t src_stride,
+ uint8_t *dst8, ptrdiff_t dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, // unused
+ int y_step_q4, // unused
+ int w, int h, int bd) {
+ if (x_step_q4 != 16) {
+ vpx_highbd_convolve8_horiz_c(src8, src_stride, dst8, dst_stride, filter_x,
+ x_step_q4, filter_y, y_step_q4, w, h, bd);
+ } else {
+ const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+ const int16x8_t filters = vld1q_s16(filter_x);
+ const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
+ uint16x8_t t0, t1, t2, t3;
+
+ assert(!((intptr_t)dst & 3));
+ assert(!(dst_stride & 3));
+
+ src -= 3;
+
+ if (h == 4) {
+ int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10;
+ int32x4_t d0, d1, d2, d3;
+ uint16x8_t d01, d23;
+
+ __builtin_prefetch(src + 0 * src_stride);
+ __builtin_prefetch(src + 1 * src_stride);
+ __builtin_prefetch(src + 2 * src_stride);
+ __builtin_prefetch(src + 3 * src_stride);
+ load_8x4(src, src_stride, &t0, &t1, &t2, &t3);
+ transpose_u16_8x4(&t0, &t1, &t2, &t3);
+ s0 = vreinterpret_s16_u16(vget_low_u16(t0));
+ s1 = vreinterpret_s16_u16(vget_low_u16(t1));
+ s2 = vreinterpret_s16_u16(vget_low_u16(t2));
+ s3 = vreinterpret_s16_u16(vget_low_u16(t3));
+ s4 = vreinterpret_s16_u16(vget_high_u16(t0));
+ s5 = vreinterpret_s16_u16(vget_high_u16(t1));
+ s6 = vreinterpret_s16_u16(vget_high_u16(t2));
+ __builtin_prefetch(dst + 0 * dst_stride);
+ __builtin_prefetch(dst + 1 * dst_stride);
+ __builtin_prefetch(dst + 2 * dst_stride);
+ __builtin_prefetch(dst + 3 * dst_stride);
+ src += 7;
+
+ do {
+ load_4x4((const int16_t *)src, src_stride, &s7, &s8, &s9, &s10);
+ transpose_s16_4x4d(&s7, &s8, &s9, &s10);
+
+ d0 = convolve8_4(s0, s1, s2, s3, s4, s5, s6, s7, filters);
+ d1 = convolve8_4(s1, s2, s3, s4, s5, s6, s7, s8, filters);
+ d2 = convolve8_4(s2, s3, s4, s5, s6, s7, s8, s9, filters);
+ d3 = convolve8_4(s3, s4, s5, s6, s7, s8, s9, s10, filters);
+
+ d01 = vcombine_u16(vqrshrun_n_s32(d0, 7), vqrshrun_n_s32(d1, 7));
+ d23 = vcombine_u16(vqrshrun_n_s32(d2, 7), vqrshrun_n_s32(d3, 7));
+ d01 = vminq_u16(d01, max);
+ d23 = vminq_u16(d23, max);
+ transpose_u16_4x4q(&d01, &d23);
+
+ vst1_u16(dst + 0 * dst_stride, vget_low_u16(d01));
+ vst1_u16(dst + 1 * dst_stride, vget_low_u16(d23));
+ vst1_u16(dst + 2 * dst_stride, vget_high_u16(d01));
+ vst1_u16(dst + 3 * dst_stride, vget_high_u16(d23));
+
+ s0 = s4;
+ s1 = s5;
+ s2 = s6;
+ s3 = s7;
+ s4 = s8;
+ s5 = s9;
+ s6 = s10;
+ src += 4;
+ dst += 4;
+ w -= 4;
+ } while (w > 0);
+ } else {
+ int16x8_t t4, t5, t6, t7;
+ int16x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10;
+ uint16x8_t d0, d1, d2, d3;
+
+ if (w == 4) {
+ do {
+ load_8x8((const int16_t *)src, src_stride, &s0, &s1, &s2, &s3, &s4,
+ &s5, &s6, &s7);
+ transpose_s16_8x8(&s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7);
+
+ load_8x8((const int16_t *)(src + 7), src_stride, &s7, &s8, &s9, &s10,
+ &t4, &t5, &t6, &t7);
+ src += 8 * src_stride;
+ __builtin_prefetch(dst + 0 * dst_stride);
+ __builtin_prefetch(dst + 1 * dst_stride);
+ __builtin_prefetch(dst + 2 * dst_stride);
+ __builtin_prefetch(dst + 3 * dst_stride);
+ __builtin_prefetch(dst + 4 * dst_stride);
+ __builtin_prefetch(dst + 5 * dst_stride);
+ __builtin_prefetch(dst + 6 * dst_stride);
+ __builtin_prefetch(dst + 7 * dst_stride);
+ transpose_s16_8x8(&s7, &s8, &s9, &s10, &t4, &t5, &t6, &t7);
+
+ __builtin_prefetch(src + 0 * src_stride);
+ __builtin_prefetch(src + 1 * src_stride);
+ __builtin_prefetch(src + 2 * src_stride);
+ __builtin_prefetch(src + 3 * src_stride);
+ __builtin_prefetch(src + 4 * src_stride);
+ __builtin_prefetch(src + 5 * src_stride);
+ __builtin_prefetch(src + 6 * src_stride);
+ __builtin_prefetch(src + 7 * src_stride);
+ d0 = convolve8_8(s0, s1, s2, s3, s4, s5, s6, s7, filters, max);
+ d1 = convolve8_8(s1, s2, s3, s4, s5, s6, s7, s8, filters, max);
+ d2 = convolve8_8(s2, s3, s4, s5, s6, s7, s8, s9, filters, max);
+ d3 = convolve8_8(s3, s4, s5, s6, s7, s8, s9, s10, filters, max);
+
+ transpose_u16_8x4(&d0, &d1, &d2, &d3);
+ vst1_u16(dst, vget_low_u16(d0));
+ dst += dst_stride;
+ vst1_u16(dst, vget_low_u16(d1));
+ dst += dst_stride;
+ vst1_u16(dst, vget_low_u16(d2));
+ dst += dst_stride;
+ vst1_u16(dst, vget_low_u16(d3));
+ dst += dst_stride;
+ vst1_u16(dst, vget_high_u16(d0));
+ dst += dst_stride;
+ vst1_u16(dst, vget_high_u16(d1));
+ dst += dst_stride;
+ vst1_u16(dst, vget_high_u16(d2));
+ dst += dst_stride;
+ vst1_u16(dst, vget_high_u16(d3));
+ dst += dst_stride;
+ h -= 8;
+ } while (h > 0);
+ } else {
+ int width;
+ const uint16_t *s;
+ uint16_t *d;
+ int16x8_t s11, s12, s13, s14;
+ uint16x8_t d4, d5, d6, d7;
+
+ do {
+ __builtin_prefetch(src + 0 * src_stride);
+ __builtin_prefetch(src + 1 * src_stride);
+ __builtin_prefetch(src + 2 * src_stride);
+ __builtin_prefetch(src + 3 * src_stride);
+ __builtin_prefetch(src + 4 * src_stride);
+ __builtin_prefetch(src + 5 * src_stride);
+ __builtin_prefetch(src + 6 * src_stride);
+ __builtin_prefetch(src + 7 * src_stride);
+ load_8x8((const int16_t *)src, src_stride, &s0, &s1, &s2, &s3, &s4,
+ &s5, &s6, &s7);
+ transpose_s16_8x8(&s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7);
+
+ width = w;
+ s = src + 7;
+ d = dst;
+ __builtin_prefetch(dst + 0 * dst_stride);
+ __builtin_prefetch(dst + 1 * dst_stride);
+ __builtin_prefetch(dst + 2 * dst_stride);
+ __builtin_prefetch(dst + 3 * dst_stride);
+ __builtin_prefetch(dst + 4 * dst_stride);
+ __builtin_prefetch(dst + 5 * dst_stride);
+ __builtin_prefetch(dst + 6 * dst_stride);
+ __builtin_prefetch(dst + 7 * dst_stride);
+
+ do {
+ load_8x8((const int16_t *)s, src_stride, &s7, &s8, &s9, &s10, &s11,
+ &s12, &s13, &s14);
+ transpose_s16_8x8(&s7, &s8, &s9, &s10, &s11, &s12, &s13, &s14);
+
+ d0 = convolve8_8(s0, s1, s2, s3, s4, s5, s6, s7, filters, max);
+ d1 = convolve8_8(s1, s2, s3, s4, s5, s6, s7, s8, filters, max);
+ d2 = convolve8_8(s2, s3, s4, s5, s6, s7, s8, s9, filters, max);
+ d3 = convolve8_8(s3, s4, s5, s6, s7, s8, s9, s10, filters, max);
+ d4 = convolve8_8(s4, s5, s6, s7, s8, s9, s10, s11, filters, max);
+ d5 = convolve8_8(s5, s6, s7, s8, s9, s10, s11, s12, filters, max);
+ d6 = convolve8_8(s6, s7, s8, s9, s10, s11, s12, s13, filters, max);
+ d7 = convolve8_8(s7, s8, s9, s10, s11, s12, s13, s14, filters, max);
+
+ transpose_u16_8x8(&d0, &d1, &d2, &d3, &d4, &d5, &d6, &d7);
+ store_8x8(d, dst_stride, d0, d1, d2, d3, d4, d5, d6, d7);
+
+ s0 = s8;
+ s1 = s9;
+ s2 = s10;
+ s3 = s11;
+ s4 = s12;
+ s5 = s13;
+ s6 = s14;
+ s += 8;
+ d += 8;
+ width -= 8;
+ } while (width > 0);
+ src += 8 * src_stride;
+ dst += 8 * dst_stride;
+ h -= 8;
+ } while (h > 0);
+ }
+ }
+ }
+}
+
+void vpx_highbd_convolve8_avg_horiz_neon(const uint8_t *src8,
+ ptrdiff_t src_stride, uint8_t *dst8,
+ ptrdiff_t dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, // unused
+ int y_step_q4, // unused
+ int w, int h, int bd) {
+ if (x_step_q4 != 16) {
+ vpx_highbd_convolve8_avg_horiz_c(src8, src_stride, dst8, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4,
+ w, h, bd);
+ } else {
+ const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+ const int16x8_t filters = vld1q_s16(filter_x);
+ const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
+ uint16x8_t t0, t1, t2, t3;
+
+ assert(!((intptr_t)dst & 3));
+ assert(!(dst_stride & 3));
+
+ src -= 3;
+
+ if (h == 4) {
+ int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10;
+ int32x4_t d0, d1, d2, d3;
+ uint16x8_t d01, d23, t01, t23;
+
+ __builtin_prefetch(src + 0 * src_stride);
+ __builtin_prefetch(src + 1 * src_stride);
+ __builtin_prefetch(src + 2 * src_stride);
+ __builtin_prefetch(src + 3 * src_stride);
+ load_8x4(src, src_stride, &t0, &t1, &t2, &t3);
+ transpose_u16_8x4(&t0, &t1, &t2, &t3);
+ s0 = vreinterpret_s16_u16(vget_low_u16(t0));
+ s1 = vreinterpret_s16_u16(vget_low_u16(t1));
+ s2 = vreinterpret_s16_u16(vget_low_u16(t2));
+ s3 = vreinterpret_s16_u16(vget_low_u16(t3));
+ s4 = vreinterpret_s16_u16(vget_high_u16(t0));
+ s5 = vreinterpret_s16_u16(vget_high_u16(t1));
+ s6 = vreinterpret_s16_u16(vget_high_u16(t2));
+ __builtin_prefetch(dst + 0 * dst_stride);
+ __builtin_prefetch(dst + 1 * dst_stride);
+ __builtin_prefetch(dst + 2 * dst_stride);
+ __builtin_prefetch(dst + 3 * dst_stride);
+ src += 7;
+
+ do {
+ load_4x4((const int16_t *)src, src_stride, &s7, &s8, &s9, &s10);
+ transpose_s16_4x4d(&s7, &s8, &s9, &s10);
+
+ d0 = convolve8_4(s0, s1, s2, s3, s4, s5, s6, s7, filters);
+ d1 = convolve8_4(s1, s2, s3, s4, s5, s6, s7, s8, filters);
+ d2 = convolve8_4(s2, s3, s4, s5, s6, s7, s8, s9, filters);
+ d3 = convolve8_4(s3, s4, s5, s6, s7, s8, s9, s10, filters);
+
+ t01 = vcombine_u16(vqrshrun_n_s32(d0, 7), vqrshrun_n_s32(d1, 7));
+ t23 = vcombine_u16(vqrshrun_n_s32(d2, 7), vqrshrun_n_s32(d3, 7));
+ t01 = vminq_u16(t01, max);
+ t23 = vminq_u16(t23, max);
+ transpose_u16_4x4q(&t01, &t23);
+
+ d01 = vcombine_u16(vld1_u16(dst + 0 * dst_stride),
+ vld1_u16(dst + 2 * dst_stride));
+ d23 = vcombine_u16(vld1_u16(dst + 1 * dst_stride),
+ vld1_u16(dst + 3 * dst_stride));
+ d01 = vrhaddq_u16(d01, t01);
+ d23 = vrhaddq_u16(d23, t23);
+
+ vst1_u16(dst + 0 * dst_stride, vget_low_u16(d01));
+ vst1_u16(dst + 1 * dst_stride, vget_low_u16(d23));
+ vst1_u16(dst + 2 * dst_stride, vget_high_u16(d01));
+ vst1_u16(dst + 3 * dst_stride, vget_high_u16(d23));
+
+ s0 = s4;
+ s1 = s5;
+ s2 = s6;
+ s3 = s7;
+ s4 = s8;
+ s5 = s9;
+ s6 = s10;
+ src += 4;
+ dst += 4;
+ w -= 4;
+ } while (w > 0);
+ } else {
+ int16x8_t t4, t5, t6, t7;
+ int16x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10;
+ uint16x8_t d0, d1, d2, d3, t0, t1, t2, t3;
+
+ if (w == 4) {
+ do {
+ load_8x8((const int16_t *)src, src_stride, &s0, &s1, &s2, &s3, &s4,
+ &s5, &s6, &s7);
+ transpose_s16_8x8(&s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7);
+
+ load_8x8((const int16_t *)(src + 7), src_stride, &s7, &s8, &s9, &s10,
+ &t4, &t5, &t6, &t7);
+ src += 8 * src_stride;
+ __builtin_prefetch(dst + 0 * dst_stride);
+ __builtin_prefetch(dst + 1 * dst_stride);
+ __builtin_prefetch(dst + 2 * dst_stride);
+ __builtin_prefetch(dst + 3 * dst_stride);
+ __builtin_prefetch(dst + 4 * dst_stride);
+ __builtin_prefetch(dst + 5 * dst_stride);
+ __builtin_prefetch(dst + 6 * dst_stride);
+ __builtin_prefetch(dst + 7 * dst_stride);
+ transpose_s16_8x8(&s7, &s8, &s9, &s10, &t4, &t5, &t6, &t7);
+
+ __builtin_prefetch(src + 0 * src_stride);
+ __builtin_prefetch(src + 1 * src_stride);
+ __builtin_prefetch(src + 2 * src_stride);
+ __builtin_prefetch(src + 3 * src_stride);
+ __builtin_prefetch(src + 4 * src_stride);
+ __builtin_prefetch(src + 5 * src_stride);
+ __builtin_prefetch(src + 6 * src_stride);
+ __builtin_prefetch(src + 7 * src_stride);
+ t0 = convolve8_8(s0, s1, s2, s3, s4, s5, s6, s7, filters, max);
+ t1 = convolve8_8(s1, s2, s3, s4, s5, s6, s7, s8, filters, max);
+ t2 = convolve8_8(s2, s3, s4, s5, s6, s7, s8, s9, filters, max);
+ t3 = convolve8_8(s3, s4, s5, s6, s7, s8, s9, s10, filters, max);
+ transpose_u16_8x4(&t0, &t1, &t2, &t3);
+
+ d0 = vcombine_u16(vld1_u16(dst + 0 * dst_stride),
+ vld1_u16(dst + 4 * dst_stride));
+ d1 = vcombine_u16(vld1_u16(dst + 1 * dst_stride),
+ vld1_u16(dst + 5 * dst_stride));
+ d2 = vcombine_u16(vld1_u16(dst + 2 * dst_stride),
+ vld1_u16(dst + 6 * dst_stride));
+ d3 = vcombine_u16(vld1_u16(dst + 3 * dst_stride),
+ vld1_u16(dst + 7 * dst_stride));
+ d0 = vrhaddq_u16(d0, t0);
+ d1 = vrhaddq_u16(d1, t1);
+ d2 = vrhaddq_u16(d2, t2);
+ d3 = vrhaddq_u16(d3, t3);
+
+ vst1_u16(dst, vget_low_u16(d0));
+ dst += dst_stride;
+ vst1_u16(dst, vget_low_u16(d1));
+ dst += dst_stride;
+ vst1_u16(dst, vget_low_u16(d2));
+ dst += dst_stride;
+ vst1_u16(dst, vget_low_u16(d3));
+ dst += dst_stride;
+ vst1_u16(dst, vget_high_u16(d0));
+ dst += dst_stride;
+ vst1_u16(dst, vget_high_u16(d1));
+ dst += dst_stride;
+ vst1_u16(dst, vget_high_u16(d2));
+ dst += dst_stride;
+ vst1_u16(dst, vget_high_u16(d3));
+ dst += dst_stride;
+ h -= 8;
+ } while (h > 0);
+ } else {
+ int width;
+ const uint16_t *s;
+ uint16_t *d;
+ int16x8_t s11, s12, s13, s14;
+ uint16x8_t d4, d5, d6, d7;
+
+ do {
+ __builtin_prefetch(src + 0 * src_stride);
+ __builtin_prefetch(src + 1 * src_stride);
+ __builtin_prefetch(src + 2 * src_stride);
+ __builtin_prefetch(src + 3 * src_stride);
+ __builtin_prefetch(src + 4 * src_stride);
+ __builtin_prefetch(src + 5 * src_stride);
+ __builtin_prefetch(src + 6 * src_stride);
+ __builtin_prefetch(src + 7 * src_stride);
+ load_8x8((const int16_t *)src, src_stride, &s0, &s1, &s2, &s3, &s4,
+ &s5, &s6, &s7);
+ transpose_s16_8x8(&s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7);
+
+ width = w;
+ s = src + 7;
+ d = dst;
+ __builtin_prefetch(dst + 0 * dst_stride);
+ __builtin_prefetch(dst + 1 * dst_stride);
+ __builtin_prefetch(dst + 2 * dst_stride);
+ __builtin_prefetch(dst + 3 * dst_stride);
+ __builtin_prefetch(dst + 4 * dst_stride);
+ __builtin_prefetch(dst + 5 * dst_stride);
+ __builtin_prefetch(dst + 6 * dst_stride);
+ __builtin_prefetch(dst + 7 * dst_stride);
+
+ do {
+ load_8x8((const int16_t *)s, src_stride, &s7, &s8, &s9, &s10, &s11,
+ &s12, &s13, &s14);
+ transpose_s16_8x8(&s7, &s8, &s9, &s10, &s11, &s12, &s13, &s14);
+
+ d0 = convolve8_8(s0, s1, s2, s3, s4, s5, s6, s7, filters, max);
+ d1 = convolve8_8(s1, s2, s3, s4, s5, s6, s7, s8, filters, max);
+ d2 = convolve8_8(s2, s3, s4, s5, s6, s7, s8, s9, filters, max);
+ d3 = convolve8_8(s3, s4, s5, s6, s7, s8, s9, s10, filters, max);
+ d4 = convolve8_8(s4, s5, s6, s7, s8, s9, s10, s11, filters, max);
+ d5 = convolve8_8(s5, s6, s7, s8, s9, s10, s11, s12, filters, max);
+ d6 = convolve8_8(s6, s7, s8, s9, s10, s11, s12, s13, filters, max);
+ d7 = convolve8_8(s7, s8, s9, s10, s11, s12, s13, s14, filters, max);
+
+ transpose_u16_8x8(&d0, &d1, &d2, &d3, &d4, &d5, &d6, &d7);
+
+ d0 = vrhaddq_u16(d0, vld1q_u16(d + 0 * dst_stride));
+ d1 = vrhaddq_u16(d1, vld1q_u16(d + 1 * dst_stride));
+ d2 = vrhaddq_u16(d2, vld1q_u16(d + 2 * dst_stride));
+ d3 = vrhaddq_u16(d3, vld1q_u16(d + 3 * dst_stride));
+ d4 = vrhaddq_u16(d4, vld1q_u16(d + 4 * dst_stride));
+ d5 = vrhaddq_u16(d5, vld1q_u16(d + 5 * dst_stride));
+ d6 = vrhaddq_u16(d6, vld1q_u16(d + 6 * dst_stride));
+ d7 = vrhaddq_u16(d7, vld1q_u16(d + 7 * dst_stride));
+
+ store_8x8(d, dst_stride, d0, d1, d2, d3, d4, d5, d6, d7);
+
+ s0 = s8;
+ s1 = s9;
+ s2 = s10;
+ s3 = s11;
+ s4 = s12;
+ s5 = s13;
+ s6 = s14;
+ s += 8;
+ d += 8;
+ width -= 8;
+ } while (width > 0);
+ src += 8 * src_stride;
+ dst += 8 * dst_stride;
+ h -= 8;
+ } while (h > 0);
+ }
+ }
+ }
+}
+
+void vpx_highbd_convolve8_vert_neon(const uint8_t *src8, ptrdiff_t src_stride,
+ uint8_t *dst8, ptrdiff_t dst_stride,
+ const int16_t *filter_x, // unused
+ int x_step_q4, // unused
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h, int bd) {
+ if (y_step_q4 != 16) {
+ vpx_highbd_convolve8_vert_c(src8, src_stride, dst8, dst_stride, filter_x,
+ x_step_q4, filter_y, y_step_q4, w, h, bd);
+ } else {
+ const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+ const int16x8_t filters = vld1q_s16(filter_y);
+ const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
+
+ assert(!((intptr_t)dst & 3));
+ assert(!(dst_stride & 3));
+
+ src -= 3 * src_stride;
+
+ if (w == 4) {
+ int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10;
+ int32x4_t d0, d1, d2, d3;
+ uint16x8_t d01, d23;
+
+ s0 = vreinterpret_s16_u16(vld1_u16(src));
+ src += src_stride;
+ s1 = vreinterpret_s16_u16(vld1_u16(src));
+ src += src_stride;
+ s2 = vreinterpret_s16_u16(vld1_u16(src));
+ src += src_stride;
+ s3 = vreinterpret_s16_u16(vld1_u16(src));
+ src += src_stride;
+ s4 = vreinterpret_s16_u16(vld1_u16(src));
+ src += src_stride;
+ s5 = vreinterpret_s16_u16(vld1_u16(src));
+ src += src_stride;
+ s6 = vreinterpret_s16_u16(vld1_u16(src));
+ src += src_stride;
+
+ do {
+ s7 = vreinterpret_s16_u16(vld1_u16(src));
+ src += src_stride;
+ s8 = vreinterpret_s16_u16(vld1_u16(src));
+ src += src_stride;
+ s9 = vreinterpret_s16_u16(vld1_u16(src));
+ src += src_stride;
+ s10 = vreinterpret_s16_u16(vld1_u16(src));
+ src += src_stride;
+
+ __builtin_prefetch(dst + 0 * dst_stride);
+ __builtin_prefetch(dst + 1 * dst_stride);
+ __builtin_prefetch(dst + 2 * dst_stride);
+ __builtin_prefetch(dst + 3 * dst_stride);
+ __builtin_prefetch(src + 0 * src_stride);
+ __builtin_prefetch(src + 1 * src_stride);
+ __builtin_prefetch(src + 2 * src_stride);
+ __builtin_prefetch(src + 3 * src_stride);
+ d0 = convolve8_4(s0, s1, s2, s3, s4, s5, s6, s7, filters);
+ d1 = convolve8_4(s1, s2, s3, s4, s5, s6, s7, s8, filters);
+ d2 = convolve8_4(s2, s3, s4, s5, s6, s7, s8, s9, filters);
+ d3 = convolve8_4(s3, s4, s5, s6, s7, s8, s9, s10, filters);
+
+ d01 = vcombine_u16(vqrshrun_n_s32(d0, 7), vqrshrun_n_s32(d1, 7));
+ d23 = vcombine_u16(vqrshrun_n_s32(d2, 7), vqrshrun_n_s32(d3, 7));
+ d01 = vminq_u16(d01, max);
+ d23 = vminq_u16(d23, max);
+ vst1_u16(dst, vget_low_u16(d01));
+ dst += dst_stride;
+ vst1_u16(dst, vget_high_u16(d01));
+ dst += dst_stride;
+ vst1_u16(dst, vget_low_u16(d23));
+ dst += dst_stride;
+ vst1_u16(dst, vget_high_u16(d23));
+ dst += dst_stride;
+
+ s0 = s4;
+ s1 = s5;
+ s2 = s6;
+ s3 = s7;
+ s4 = s8;
+ s5 = s9;
+ s6 = s10;
+ h -= 4;
+ } while (h > 0);
+ } else {
+ int height;
+ const uint16_t *s;
+ uint16_t *d;
+ int16x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10;
+ uint16x8_t d0, d1, d2, d3;
+
+ do {
+ __builtin_prefetch(src + 0 * src_stride);
+ __builtin_prefetch(src + 1 * src_stride);
+ __builtin_prefetch(src + 2 * src_stride);
+ __builtin_prefetch(src + 3 * src_stride);
+ __builtin_prefetch(src + 4 * src_stride);
+ __builtin_prefetch(src + 5 * src_stride);
+ __builtin_prefetch(src + 6 * src_stride);
+ s = src;
+ s0 = vreinterpretq_s16_u16(vld1q_u16(s));
+ s += src_stride;
+ s1 = vreinterpretq_s16_u16(vld1q_u16(s));
+ s += src_stride;
+ s2 = vreinterpretq_s16_u16(vld1q_u16(s));
+ s += src_stride;
+ s3 = vreinterpretq_s16_u16(vld1q_u16(s));
+ s += src_stride;
+ s4 = vreinterpretq_s16_u16(vld1q_u16(s));
+ s += src_stride;
+ s5 = vreinterpretq_s16_u16(vld1q_u16(s));
+ s += src_stride;
+ s6 = vreinterpretq_s16_u16(vld1q_u16(s));
+ s += src_stride;
+ d = dst;
+ height = h;
+
+ do {
+ s7 = vreinterpretq_s16_u16(vld1q_u16(s));
+ s += src_stride;
+ s8 = vreinterpretq_s16_u16(vld1q_u16(s));
+ s += src_stride;
+ s9 = vreinterpretq_s16_u16(vld1q_u16(s));
+ s += src_stride;
+ s10 = vreinterpretq_s16_u16(vld1q_u16(s));
+ s += src_stride;
+
+ __builtin_prefetch(d + 0 * dst_stride);
+ __builtin_prefetch(d + 1 * dst_stride);
+ __builtin_prefetch(d + 2 * dst_stride);
+ __builtin_prefetch(d + 3 * dst_stride);
+ __builtin_prefetch(s + 0 * src_stride);
+ __builtin_prefetch(s + 1 * src_stride);
+ __builtin_prefetch(s + 2 * src_stride);
+ __builtin_prefetch(s + 3 * src_stride);
+ d0 = convolve8_8(s0, s1, s2, s3, s4, s5, s6, s7, filters, max);
+ d1 = convolve8_8(s1, s2, s3, s4, s5, s6, s7, s8, filters, max);
+ d2 = convolve8_8(s2, s3, s4, s5, s6, s7, s8, s9, filters, max);
+ d3 = convolve8_8(s3, s4, s5, s6, s7, s8, s9, s10, filters, max);
+
+ vst1q_u16(d, d0);
+ d += dst_stride;
+ vst1q_u16(d, d1);
+ d += dst_stride;
+ vst1q_u16(d, d2);
+ d += dst_stride;
+ vst1q_u16(d, d3);
+ d += dst_stride;
+
+ s0 = s4;
+ s1 = s5;
+ s2 = s6;
+ s3 = s7;
+ s4 = s8;
+ s5 = s9;
+ s6 = s10;
+ height -= 4;
+ } while (height > 0);
+ src += 8;
+ dst += 8;
+ w -= 8;
+ } while (w > 0);
+ }
+ }
+}
+
+void vpx_highbd_convolve8_avg_vert_neon(const uint8_t *src8,
+ ptrdiff_t src_stride, uint8_t *dst8,
+ ptrdiff_t dst_stride,
+ const int16_t *filter_x, // unused
+ int x_step_q4, // unused
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h, int bd) {
+ if (y_step_q4 != 16) {
+ vpx_highbd_convolve8_avg_vert_c(src8, src_stride, dst8, dst_stride,
+ filter_x, x_step_q4, filter_y, y_step_q4, w,
+ h, bd);
+ } else {
+ const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+ const int16x8_t filters = vld1q_s16(filter_y);
+ const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
+
+ assert(!((intptr_t)dst & 3));
+ assert(!(dst_stride & 3));
+
+ src -= 3 * src_stride;
+
+ if (w == 4) {
+ int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10;
+ int32x4_t d0, d1, d2, d3;
+ uint16x8_t d01, d23, t01, t23;
+
+ s0 = vreinterpret_s16_u16(vld1_u16(src));
+ src += src_stride;
+ s1 = vreinterpret_s16_u16(vld1_u16(src));
+ src += src_stride;
+ s2 = vreinterpret_s16_u16(vld1_u16(src));
+ src += src_stride;
+ s3 = vreinterpret_s16_u16(vld1_u16(src));
+ src += src_stride;
+ s4 = vreinterpret_s16_u16(vld1_u16(src));
+ src += src_stride;
+ s5 = vreinterpret_s16_u16(vld1_u16(src));
+ src += src_stride;
+ s6 = vreinterpret_s16_u16(vld1_u16(src));
+ src += src_stride;
+
+ do {
+ s7 = vreinterpret_s16_u16(vld1_u16(src));
+ src += src_stride;
+ s8 = vreinterpret_s16_u16(vld1_u16(src));
+ src += src_stride;
+ s9 = vreinterpret_s16_u16(vld1_u16(src));
+ src += src_stride;
+ s10 = vreinterpret_s16_u16(vld1_u16(src));
+ src += src_stride;
+
+ __builtin_prefetch(dst + 0 * dst_stride);
+ __builtin_prefetch(dst + 1 * dst_stride);
+ __builtin_prefetch(dst + 2 * dst_stride);
+ __builtin_prefetch(dst + 3 * dst_stride);
+ __builtin_prefetch(src + 0 * src_stride);
+ __builtin_prefetch(src + 1 * src_stride);
+ __builtin_prefetch(src + 2 * src_stride);
+ __builtin_prefetch(src + 3 * src_stride);
+ d0 = convolve8_4(s0, s1, s2, s3, s4, s5, s6, s7, filters);
+ d1 = convolve8_4(s1, s2, s3, s4, s5, s6, s7, s8, filters);
+ d2 = convolve8_4(s2, s3, s4, s5, s6, s7, s8, s9, filters);
+ d3 = convolve8_4(s3, s4, s5, s6, s7, s8, s9, s10, filters);
+
+ t01 = vcombine_u16(vqrshrun_n_s32(d0, 7), vqrshrun_n_s32(d1, 7));
+ t23 = vcombine_u16(vqrshrun_n_s32(d2, 7), vqrshrun_n_s32(d3, 7));
+ t01 = vminq_u16(t01, max);
+ t23 = vminq_u16(t23, max);
+
+ d01 = vcombine_u16(vld1_u16(dst + 0 * dst_stride),
+ vld1_u16(dst + 1 * dst_stride));
+ d23 = vcombine_u16(vld1_u16(dst + 2 * dst_stride),
+ vld1_u16(dst + 3 * dst_stride));
+ d01 = vrhaddq_u16(d01, t01);
+ d23 = vrhaddq_u16(d23, t23);
+
+ vst1_u16(dst, vget_low_u16(d01));
+ dst += dst_stride;
+ vst1_u16(dst, vget_high_u16(d01));
+ dst += dst_stride;
+ vst1_u16(dst, vget_low_u16(d23));
+ dst += dst_stride;
+ vst1_u16(dst, vget_high_u16(d23));
+ dst += dst_stride;
+
+ s0 = s4;
+ s1 = s5;
+ s2 = s6;
+ s3 = s7;
+ s4 = s8;
+ s5 = s9;
+ s6 = s10;
+ h -= 4;
+ } while (h > 0);
+ } else {
+ int height;
+ const uint16_t *s;
+ uint16_t *d;
+ int16x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10;
+ uint16x8_t d0, d1, d2, d3, t0, t1, t2, t3;
+
+ do {
+ __builtin_prefetch(src + 0 * src_stride);
+ __builtin_prefetch(src + 1 * src_stride);
+ __builtin_prefetch(src + 2 * src_stride);
+ __builtin_prefetch(src + 3 * src_stride);
+ __builtin_prefetch(src + 4 * src_stride);
+ __builtin_prefetch(src + 5 * src_stride);
+ __builtin_prefetch(src + 6 * src_stride);
+ s = src;
+ s0 = vreinterpretq_s16_u16(vld1q_u16(s));
+ s += src_stride;
+ s1 = vreinterpretq_s16_u16(vld1q_u16(s));
+ s += src_stride;
+ s2 = vreinterpretq_s16_u16(vld1q_u16(s));
+ s += src_stride;
+ s3 = vreinterpretq_s16_u16(vld1q_u16(s));
+ s += src_stride;
+ s4 = vreinterpretq_s16_u16(vld1q_u16(s));
+ s += src_stride;
+ s5 = vreinterpretq_s16_u16(vld1q_u16(s));
+ s += src_stride;
+ s6 = vreinterpretq_s16_u16(vld1q_u16(s));
+ s += src_stride;
+ d = dst;
+ height = h;
+
+ do {
+ s7 = vreinterpretq_s16_u16(vld1q_u16(s));
+ s += src_stride;
+ s8 = vreinterpretq_s16_u16(vld1q_u16(s));
+ s += src_stride;
+ s9 = vreinterpretq_s16_u16(vld1q_u16(s));
+ s += src_stride;
+ s10 = vreinterpretq_s16_u16(vld1q_u16(s));
+ s += src_stride;
+
+ __builtin_prefetch(d + 0 * dst_stride);
+ __builtin_prefetch(d + 1 * dst_stride);
+ __builtin_prefetch(d + 2 * dst_stride);
+ __builtin_prefetch(d + 3 * dst_stride);
+ __builtin_prefetch(s + 0 * src_stride);
+ __builtin_prefetch(s + 1 * src_stride);
+ __builtin_prefetch(s + 2 * src_stride);
+ __builtin_prefetch(s + 3 * src_stride);
+ t0 = convolve8_8(s0, s1, s2, s3, s4, s5, s6, s7, filters, max);
+ t1 = convolve8_8(s1, s2, s3, s4, s5, s6, s7, s8, filters, max);
+ t2 = convolve8_8(s2, s3, s4, s5, s6, s7, s8, s9, filters, max);
+ t3 = convolve8_8(s3, s4, s5, s6, s7, s8, s9, s10, filters, max);
+
+ d0 = vld1q_u16(d + 0 * dst_stride);
+ d1 = vld1q_u16(d + 1 * dst_stride);
+ d2 = vld1q_u16(d + 2 * dst_stride);
+ d3 = vld1q_u16(d + 3 * dst_stride);
+ d0 = vrhaddq_u16(d0, t0);
+ d1 = vrhaddq_u16(d1, t1);
+ d2 = vrhaddq_u16(d2, t2);
+ d3 = vrhaddq_u16(d3, t3);
+
+ vst1q_u16(d, d0);
+ d += dst_stride;
+ vst1q_u16(d, d1);
+ d += dst_stride;
+ vst1q_u16(d, d2);
+ d += dst_stride;
+ vst1q_u16(d, d3);
+ d += dst_stride;
+
+ s0 = s4;
+ s1 = s5;
+ s2 = s6;
+ s3 = s7;
+ s4 = s8;
+ s5 = s9;
+ s6 = s10;
+ height -= 4;
+ } while (height > 0);
+ src += 8;
+ dst += 8;
+ w -= 8;
+ } while (w > 0);
+ }
+ }
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_vpx_convolve_avg_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_vpx_convolve_avg_neon.c
new file mode 100644
index 00000000000..f4d70761eb3
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_vpx_convolve_avg_neon.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
+
+void vpx_highbd_convolve_avg_neon(const uint8_t *src8, ptrdiff_t src_stride,
+ uint8_t *dst8, ptrdiff_t dst_stride,
+ const int16_t *filter_x, int filter_x_stride,
+ const int16_t *filter_y, int filter_y_stride,
+ int w, int h, int bd) {
+ const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+
+ (void)filter_x;
+ (void)filter_x_stride;
+ (void)filter_y;
+ (void)filter_y_stride;
+ (void)bd;
+
+ if (w < 8) { // avg4
+ uint16x4_t s0, s1, d0, d1;
+ uint16x8_t s01, d01;
+ do {
+ s0 = vld1_u16(src);
+ d0 = vld1_u16(dst);
+ src += src_stride;
+ s1 = vld1_u16(src);
+ d1 = vld1_u16(dst + dst_stride);
+ src += src_stride;
+ s01 = vcombine_u16(s0, s1);
+ d01 = vcombine_u16(d0, d1);
+ d01 = vrhaddq_u16(s01, d01);
+ vst1_u16(dst, vget_low_u16(d01));
+ dst += dst_stride;
+ vst1_u16(dst, vget_high_u16(d01));
+ dst += dst_stride;
+ h -= 2;
+ } while (h > 0);
+ } else if (w == 8) { // avg8
+ uint16x8_t s0, s1, d0, d1;
+ do {
+ s0 = vld1q_u16(src);
+ d0 = vld1q_u16(dst);
+ src += src_stride;
+ s1 = vld1q_u16(src);
+ d1 = vld1q_u16(dst + dst_stride);
+ src += src_stride;
+
+ d0 = vrhaddq_u16(s0, d0);
+ d1 = vrhaddq_u16(s1, d1);
+
+ vst1q_u16(dst, d0);
+ dst += dst_stride;
+ vst1q_u16(dst, d1);
+ dst += dst_stride;
+ h -= 2;
+ } while (h > 0);
+ } else if (w < 32) { // avg16
+ uint16x8_t s0l, s0h, s1l, s1h, d0l, d0h, d1l, d1h;
+ do {
+ s0l = vld1q_u16(src);
+ s0h = vld1q_u16(src + 8);
+ d0l = vld1q_u16(dst);
+ d0h = vld1q_u16(dst + 8);
+ src += src_stride;
+ s1l = vld1q_u16(src);
+ s1h = vld1q_u16(src + 8);
+ d1l = vld1q_u16(dst + dst_stride);
+ d1h = vld1q_u16(dst + dst_stride + 8);
+ src += src_stride;
+
+ d0l = vrhaddq_u16(s0l, d0l);
+ d0h = vrhaddq_u16(s0h, d0h);
+ d1l = vrhaddq_u16(s1l, d1l);
+ d1h = vrhaddq_u16(s1h, d1h);
+
+ vst1q_u16(dst, d0l);
+ vst1q_u16(dst + 8, d0h);
+ dst += dst_stride;
+ vst1q_u16(dst, d1l);
+ vst1q_u16(dst + 8, d1h);
+ dst += dst_stride;
+ h -= 2;
+ } while (h > 0);
+ } else if (w == 32) { // avg32
+ uint16x8_t s0, s1, s2, s3, d0, d1, d2, d3;
+ do {
+ s0 = vld1q_u16(src);
+ s1 = vld1q_u16(src + 8);
+ s2 = vld1q_u16(src + 16);
+ s3 = vld1q_u16(src + 24);
+ d0 = vld1q_u16(dst);
+ d1 = vld1q_u16(dst + 8);
+ d2 = vld1q_u16(dst + 16);
+ d3 = vld1q_u16(dst + 24);
+ src += src_stride;
+
+ d0 = vrhaddq_u16(s0, d0);
+ d1 = vrhaddq_u16(s1, d1);
+ d2 = vrhaddq_u16(s2, d2);
+ d3 = vrhaddq_u16(s3, d3);
+
+ vst1q_u16(dst, d0);
+ vst1q_u16(dst + 8, d1);
+ vst1q_u16(dst + 16, d2);
+ vst1q_u16(dst + 24, d3);
+ dst += dst_stride;
+
+ s0 = vld1q_u16(src);
+ s1 = vld1q_u16(src + 8);
+ s2 = vld1q_u16(src + 16);
+ s3 = vld1q_u16(src + 24);
+ d0 = vld1q_u16(dst);
+ d1 = vld1q_u16(dst + 8);
+ d2 = vld1q_u16(dst + 16);
+ d3 = vld1q_u16(dst + 24);
+ src += src_stride;
+
+ d0 = vrhaddq_u16(s0, d0);
+ d1 = vrhaddq_u16(s1, d1);
+ d2 = vrhaddq_u16(s2, d2);
+ d3 = vrhaddq_u16(s3, d3);
+
+ vst1q_u16(dst, d0);
+ vst1q_u16(dst + 8, d1);
+ vst1q_u16(dst + 16, d2);
+ vst1q_u16(dst + 24, d3);
+ dst += dst_stride;
+ h -= 2;
+ } while (h > 0);
+ } else { // avg64
+ uint16x8_t s0, s1, s2, s3, d0, d1, d2, d3;
+ do {
+ s0 = vld1q_u16(src);
+ s1 = vld1q_u16(src + 8);
+ s2 = vld1q_u16(src + 16);
+ s3 = vld1q_u16(src + 24);
+ d0 = vld1q_u16(dst);
+ d1 = vld1q_u16(dst + 8);
+ d2 = vld1q_u16(dst + 16);
+ d3 = vld1q_u16(dst + 24);
+
+ d0 = vrhaddq_u16(s0, d0);
+ d1 = vrhaddq_u16(s1, d1);
+ d2 = vrhaddq_u16(s2, d2);
+ d3 = vrhaddq_u16(s3, d3);
+
+ vst1q_u16(dst, d0);
+ vst1q_u16(dst + 8, d1);
+ vst1q_u16(dst + 16, d2);
+ vst1q_u16(dst + 24, d3);
+
+ s0 = vld1q_u16(src + 32);
+ s1 = vld1q_u16(src + 40);
+ s2 = vld1q_u16(src + 48);
+ s3 = vld1q_u16(src + 56);
+ d0 = vld1q_u16(dst + 32);
+ d1 = vld1q_u16(dst + 40);
+ d2 = vld1q_u16(dst + 48);
+ d3 = vld1q_u16(dst + 56);
+
+ d0 = vrhaddq_u16(s0, d0);
+ d1 = vrhaddq_u16(s1, d1);
+ d2 = vrhaddq_u16(s2, d2);
+ d3 = vrhaddq_u16(s3, d3);
+
+ vst1q_u16(dst + 32, d0);
+ vst1q_u16(dst + 40, d1);
+ vst1q_u16(dst + 48, d2);
+ vst1q_u16(dst + 56, d3);
+ src += src_stride;
+ dst += dst_stride;
+ } while (--h);
+ }
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_vpx_convolve_copy_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_vpx_convolve_copy_neon.c
new file mode 100644
index 00000000000..a980ab1a380
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_vpx_convolve_copy_neon.c
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+#include "./vpx_dsp_rtcd.h"
+#include "vpx/vpx_integer.h"
+
+void vpx_highbd_convolve_copy_neon(const uint8_t *src8, ptrdiff_t src_stride,
+ uint8_t *dst8, ptrdiff_t dst_stride,
+ const int16_t *filter_x, int filter_x_stride,
+ const int16_t *filter_y, int filter_y_stride,
+ int w, int h, int bd) {
+ const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+
+ (void)filter_x;
+ (void)filter_x_stride;
+ (void)filter_y;
+ (void)filter_y_stride;
+ (void)bd;
+
+ if (w < 8) { // copy4
+ do {
+ vst1_u16(dst, vld1_u16(src));
+ src += src_stride;
+ dst += dst_stride;
+ vst1_u16(dst, vld1_u16(src));
+ src += src_stride;
+ dst += dst_stride;
+ h -= 2;
+ } while (h > 0);
+ } else if (w == 8) { // copy8
+ do {
+ vst1q_u16(dst, vld1q_u16(src));
+ src += src_stride;
+ dst += dst_stride;
+ vst1q_u16(dst, vld1q_u16(src));
+ src += src_stride;
+ dst += dst_stride;
+ h -= 2;
+ } while (h > 0);
+ } else if (w < 32) { // copy16
+ do {
+ vst2q_u16(dst, vld2q_u16(src));
+ src += src_stride;
+ dst += dst_stride;
+ vst2q_u16(dst, vld2q_u16(src));
+ src += src_stride;
+ dst += dst_stride;
+ vst2q_u16(dst, vld2q_u16(src));
+ src += src_stride;
+ dst += dst_stride;
+ vst2q_u16(dst, vld2q_u16(src));
+ src += src_stride;
+ dst += dst_stride;
+ h -= 4;
+ } while (h > 0);
+ } else if (w == 32) { // copy32
+ do {
+ vst4q_u16(dst, vld4q_u16(src));
+ src += src_stride;
+ dst += dst_stride;
+ vst4q_u16(dst, vld4q_u16(src));
+ src += src_stride;
+ dst += dst_stride;
+ vst4q_u16(dst, vld4q_u16(src));
+ src += src_stride;
+ dst += dst_stride;
+ vst4q_u16(dst, vld4q_u16(src));
+ src += src_stride;
+ dst += dst_stride;
+ h -= 4;
+ } while (h > 0);
+ } else { // copy64
+ do {
+ vst4q_u16(dst, vld4q_u16(src));
+ vst4q_u16(dst + 32, vld4q_u16(src + 32));
+ src += src_stride;
+ dst += dst_stride;
+ vst4q_u16(dst, vld4q_u16(src));
+ vst4q_u16(dst + 32, vld4q_u16(src + 32));
+ src += src_stride;
+ dst += dst_stride;
+ vst4q_u16(dst, vld4q_u16(src));
+ vst4q_u16(dst + 32, vld4q_u16(src + 32));
+ src += src_stride;
+ dst += dst_stride;
+ vst4q_u16(dst, vld4q_u16(src));
+ vst4q_u16(dst + 32, vld4q_u16(src + 32));
+ src += src_stride;
+ dst += dst_stride;
+ h -= 4;
+ } while (h > 0);
+ }
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_vpx_convolve_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_vpx_convolve_neon.c
new file mode 100644
index 00000000000..4e6e109920a
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/highbd_vpx_convolve_neon.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_dsp/vpx_filter.h"
+#include "vpx_ports/mem.h"
+
+void vpx_highbd_convolve8_neon(const uint8_t *src8, ptrdiff_t src_stride,
+ uint8_t *dst, ptrdiff_t dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4, int w,
+ int h, int bd) {
+ const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ const int y0_q4 = get_filter_offset(filter_y, get_filter_base(filter_y));
+ // + 1 to make it divisible by 4
+ DECLARE_ALIGNED(16, uint16_t, temp[64 * 136]);
+ const int intermediate_height =
+ (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
+
+ /* Filter starting 3 lines back. The neon implementation will ignore the given
+ * height and filter a multiple of 4 lines. Since this goes in to the temp
+ * buffer which has lots of extra room and is subsequently discarded this is
+ * safe if somewhat less than ideal. */
+ vpx_highbd_convolve8_horiz_neon(CONVERT_TO_BYTEPTR(src - src_stride * 3),
+ src_stride, CONVERT_TO_BYTEPTR(temp), w,
+ filter_x, x_step_q4, filter_y, y_step_q4, w,
+ intermediate_height, bd);
+
+ /* Step into the temp buffer 3 lines to get the actual frame data */
+ vpx_highbd_convolve8_vert_neon(CONVERT_TO_BYTEPTR(temp + w * 3), w, dst,
+ dst_stride, filter_x, x_step_q4, filter_y,
+ y_step_q4, w, h, bd);
+}
+
+void vpx_highbd_convolve8_avg_neon(const uint8_t *src8, ptrdiff_t src_stride,
+ uint8_t *dst, ptrdiff_t dst_stride,
+ const int16_t *filter_x, int x_step_q4,
+ const int16_t *filter_y, int y_step_q4,
+ int w, int h, int bd) {
+ const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ const int y0_q4 = get_filter_offset(filter_y, get_filter_base(filter_y));
+ // + 1 to make it divisible by 4
+ DECLARE_ALIGNED(16, uint16_t, temp[64 * 136]);
+ const int intermediate_height =
+ (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
+
+ /* This implementation has the same issues as above. In addition, we only want
+ * to average the values after both passes.
+ */
+ vpx_highbd_convolve8_horiz_neon(CONVERT_TO_BYTEPTR(src - src_stride * 3),
+ src_stride, CONVERT_TO_BYTEPTR(temp), w,
+ filter_x, x_step_q4, filter_y, y_step_q4, w,
+ intermediate_height, bd);
+ vpx_highbd_convolve8_avg_vert_neon(CONVERT_TO_BYTEPTR(temp + w * 3), w, dst,
+ dst_stride, filter_x, x_step_q4, filter_y,
+ y_step_q4, w, h, bd);
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct16x16_1_add_neon.asm b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct16x16_1_add_neon.asm
index dc459e20d9c..e3c0c5210d2 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct16x16_1_add_neon.asm
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct16x16_1_add_neon.asm
@@ -25,9 +25,8 @@
|vpx_idct16x16_1_add_neon| PROC
ldrsh r0, [r0]
- ; generate cospi_16_64 = 11585
- mov r12, #0x2d00
- add r12, #0x41
+ ; cospi_16_64 = 11585
+ movw r12, #0x2d41
; out = dct_const_round_shift(input[0] * cospi_16_64)
mul r0, r0, r12 ; input[0] * cospi_16_64
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct16x16_1_add_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct16x16_1_add_neon.c
index 4035830f3c8..f1e49ff5178 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct16x16_1_add_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct16x16_1_add_neon.c
@@ -21,7 +21,7 @@ void vpx_idct16x16_1_add_neon(const tran_low_t *input, uint8_t *dest,
uint16x8_t q0u16, q9u16, q10u16, q11u16, q12u16;
int16x8_t q0s16;
uint8_t *d1, *d2;
- int16_t i, j, a1, cospi_16_64 = 11585;
+ int16_t i, j, a1;
int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
out = dct_const_round_shift(out * cospi_16_64);
a1 = ROUND_POWER_OF_TWO(out, 6);
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct16x16_add_neon.asm b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct16x16_add_neon.asm
index 22a0c95941a..5e64cea0ae7 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct16x16_add_neon.asm
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct16x16_add_neon.asm
@@ -60,13 +60,11 @@
vld2.s16 {q1,q2}, [r0]!
vmov.s16 q15, q1
- ; generate cospi_28_64 = 3196
- mov r3, #0xc00
- add r3, #0x7c
+ ; cospi_28_64 = 3196
+ movw r3, #0x0c7c
- ; generate cospi_4_64 = 16069
- mov r12, #0x3e00
- add r12, #0xc5
+ ; cospi_4_64 = 16069
+ movw r12, #0x3ec5
; transpose the input data
TRANSPOSE8X8
@@ -76,13 +74,11 @@
vdup.16 d1, r12 ; duplicate cospi_4_64
; preloading to avoid stall
- ; generate cospi_12_64 = 13623
- mov r3, #0x3500
- add r3, #0x37
+ ; cospi_12_64 = 13623
+ movw r3, #0x3537
- ; generate cospi_20_64 = 9102
- mov r12, #0x2300
- add r12, #0x8e
+ ; cospi_20_64 = 9102
+ movw r12, #0x238e
; step2[4] * cospi_28_64
vmull.s16 q2, d18, d0
@@ -112,13 +108,11 @@
vqrshrn.s32 d15, q6, #14 ; >> 14
; preloading to avoid stall
- ; generate cospi_16_64 = 11585
- mov r3, #0x2d00
- add r3, #0x41
+ ; cospi_16_64 = 11585
+ movw r3, #0x2d41
- ; generate cospi_24_64 = 6270
- mov r12, #0x1800
- add r12, #0x7e
+ ; cospi_24_64 = 6270
+ movw r12, #0x187e
; step2[5] * cospi_12_64
vmull.s16 q2, d26, d2
@@ -155,9 +149,8 @@
vmull.s16 q0, d24, d30
vmull.s16 q1, d25, d30
- ; generate cospi_8_64 = 15137
- mov r3, #0x3b00
- add r3, #0x21
+ ; cospi_8_64 = 15137
+ movw r3, #0x3b21
vdup.16 d30, r12 ; duplicate cospi_24_64
vdup.16 d31, r3 ; duplicate cospi_8_64
@@ -208,9 +201,8 @@
vsub.s16 q14, q7, q6 ; step2[6] = -step1[6] + step1[7];
vadd.s16 q15, q6, q7 ; step2[7] = step1[6] + step1[7];
- ; generate cospi_16_64 = 11585
- mov r3, #0x2d00
- add r3, #0x41
+ ; cospi_16_64 = 11585
+ movw r3, #0x2d41
; stage 5
vadd.s16 q0, q8, q11 ; step1[0] = step2[0] + step2[3];
@@ -307,13 +299,11 @@
vld2.s16 {q0,q1}, [r0]!
vmov.s16 q15, q0;
- ; generate cospi_30_64 = 1606
- mov r3, #0x0600
- add r3, #0x46
+ ; cospi_30_64 = 1606
+ movw r3, #0x0646
- ; generate cospi_2_64 = 16305
- mov r12, #0x3f00
- add r12, #0xb1
+ ; cospi_2_64 = 16305
+ movw r12, #0x3fb1
; transpose the input data
TRANSPOSE8X8
@@ -323,13 +313,11 @@
vdup.16 d13, r12 ; duplicate cospi_2_64
; preloading to avoid stall
- ; generate cospi_14_64 = 12665
- mov r3, #0x3100
- add r3, #0x79
+ ; cospi_14_64 = 12665
+ movw r3, #0x3179
- ; generate cospi_18_64 = 10394
- mov r12, #0x2800
- add r12, #0x9a
+ ; cospi_18_64 = 10394
+ movw r12, #0x289a
; step1[8] * cospi_30_64
vmull.s16 q2, d16, d12
@@ -359,13 +347,11 @@
vqrshrn.s32 d15, q4, #14 ; >> 14
; preloading to avoid stall
- ; generate cospi_22_64 = 7723
- mov r3, #0x1e00
- add r3, #0x2b
+ ; cospi_22_64 = 7723
+ movw r3, #0x1e2b
- ; generate cospi_10_64 = 14449
- mov r12, #0x3800
- add r12, #0x71
+ ; cospi_10_64 = 14449
+ movw r12, #0x3871
; step1[9] * cospi_14_64
vmull.s16 q2, d24, d30
@@ -411,13 +397,11 @@
vmlal.s16 q5, d27, d30
; preloading to avoid stall
- ; generate cospi_6_64 = 15679
- mov r3, #0x3d00
- add r3, #0x3f
+ ; cospi_6_64 = 15679
+ movw r3, #0x3d3f
- ; generate cospi_26_64 = 4756
- mov r12, #0x1200
- add r12, #0x94
+ ; cospi_26_64 = 4756
+ movw r12, #0x1294
vdup.16 d30, r3 ; duplicate cospi_6_64
vdup.16 d31, r12 ; duplicate cospi_26_64
@@ -466,13 +450,11 @@
vadd.s16 q7, q6, q7 ; step1[15]=step2[14]+step2[15]
; stage 4
- ; generate cospi_24_64 = 6270
- mov r3, #0x1800
- add r3, #0x7e
+ ; cospi_24_64 = 6270
+ movw r3, #0x187e
- ; generate cospi_8_64 = 15137
- mov r12, #0x3b00
- add r12, #0x21
+ ; cospi_8_64 = 15137
+ movw r12, #0x3b21
; -step1[9] * cospi_8_64 + step1[14] * cospi_24_64
vdup.16 d30, r12 ; duplicate cospi_8_64
@@ -543,9 +525,8 @@
vadd.s16 q15, q7, q4 ; step1[15] =step2[12]+step2[15];
; stage 6.
- ; generate cospi_16_64 = 11585
- mov r12, #0x2d00
- add r12, #0x41
+ ; cospi_16_64 = 11585
+ movw r12, #0x2d41
vdup.16 d14, r12 ; duplicate cospi_16_64
@@ -810,13 +791,11 @@ end_idct16x16_pass2
vld2.s16 {q1,q2}, [r0]!
vmov.s16 q15, q1
- ; generate cospi_28_64*2 = 6392
- mov r3, #0x1800
- add r3, #0xf8
+ ; cospi_28_64*2 = 6392
+ movw r3, #0x18f8
- ; generate cospi_4_64*2 = 32138
- mov r12, #0x7d00
- add r12, #0x8a
+ ; cospi_4_64*2 = 32138
+ movw r12, #0x7d8a
; transpose the input data
TRANSPOSE8X8
@@ -833,9 +812,8 @@ end_idct16x16_pass2
vqrdmulh.s16 q4, q9, q0
; preloading to avoid stall
- ; generate cospi_16_64*2 = 23170
- mov r3, #0x5a00
- add r3, #0x82
+ ; cospi_16_64*2 = 23170
+ movw r3, #0x5a82
; dct_const_round_shift(step2[4] * cospi_4_64);
vqrdmulh.s16 q7, q9, q1
@@ -843,9 +821,8 @@ end_idct16x16_pass2
; stage 4
vdup.16 q1, r3 ; cospi_16_64*2
- ; generate cospi_16_64 = 11585
- mov r3, #0x2d00
- add r3, #0x41
+ ; cospi_16_64 = 11585
+ movw r3, #0x2d41
vdup.16 d4, r3; ; duplicate cospi_16_64
@@ -939,13 +916,11 @@ end_idct16x16_pass2
vld2.s16 {q0,q1}, [r0]!
vmov.s16 q15, q0;
- ; generate 2*cospi_30_64 = 3212
- mov r3, #0xc00
- add r3, #0x8c
+ ; 2*cospi_30_64 = 3212
+ movw r3, #0x0c8c
- ; generate 2*cospi_2_64 = 32610
- mov r12, #0x7f00
- add r12, #0x62
+ ; 2*cospi_2_64 = 32610
+ movw r12, #0x7f62
; transpose the input data
TRANSPOSE8X8
@@ -962,15 +937,13 @@ end_idct16x16_pass2
vqrdmulh.s16 q7, q8, q6
; preloading to avoid stall
- ; generate 2*cospi_26_64 = 9512
- mov r12, #0x2500
- add r12, #0x28
+ ; 2*cospi_26_64 = 9512
+ movw r12, #0x2528
rsb r12, #0
vdup.16 q15, r12 ; duplicate -2*cospi_26_64
- ; generate 2*cospi_6_64 = 31358
- mov r3, #0x7a00
- add r3, #0x7e
+ ; 2*cospi_6_64 = 31358
+ movw r3, #0x7a7e
vdup.16 q14, r3 ; duplicate 2*cospi_6_64
; dct_const_round_shift(- step1[12] * cospi_26_64)
@@ -980,14 +953,12 @@ end_idct16x16_pass2
vqrdmulh.s16 q4, q9, q14
; stage 4
- ; generate cospi_24_64 = 6270
- mov r3, #0x1800
- add r3, #0x7e
+ ; cospi_24_64 = 6270
+ movw r3, #0x187e
vdup.16 d31, r3 ; duplicate cospi_24_64
- ; generate cospi_8_64 = 15137
- mov r12, #0x3b00
- add r12, #0x21
+ ; cospi_8_64 = 15137
+ movw r12, #0x3b21
vdup.16 d30, r12 ; duplicate cospi_8_64
; step1[14] * cospi_24_64
@@ -1052,9 +1023,8 @@ end_idct16x16_pass2
vadd.s16 q15, q7, q4 ; step1[15] =step2[12]+step2[15];
; stage 6.
- ; generate cospi_16_64 = 11585
- mov r12, #0x2d00
- add r12, #0x41
+ ; cospi_16_64 = 11585
+ movw r12, #0x2d41
vdup.16 d14, r12 ; duplicate cospi_16_64
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct16x16_add_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct16x16_add_neon.c
index ce5cbcbcda5..f682afc7bf6 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct16x16_add_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct16x16_add_neon.c
@@ -73,8 +73,8 @@ void vpx_idct16x16_256_add_neon_pass1(int16_t *in, int16_t *out,
d31s16 = vget_high_s16(q15s16);
// stage 3
- d0s16 = vdup_n_s16(cospi_28_64);
- d1s16 = vdup_n_s16(cospi_4_64);
+ d0s16 = vdup_n_s16((int16_t)cospi_28_64);
+ d1s16 = vdup_n_s16((int16_t)cospi_4_64);
q2s32 = vmull_s16(d18s16, d0s16);
q3s32 = vmull_s16(d19s16, d0s16);
@@ -86,8 +86,8 @@ void vpx_idct16x16_256_add_neon_pass1(int16_t *in, int16_t *out,
q5s32 = vmlal_s16(q5s32, d30s16, d0s16);
q6s32 = vmlal_s16(q6s32, d31s16, d0s16);
- d2s16 = vdup_n_s16(cospi_12_64);
- d3s16 = vdup_n_s16(cospi_20_64);
+ d2s16 = vdup_n_s16((int16_t)cospi_12_64);
+ d3s16 = vdup_n_s16((int16_t)cospi_20_64);
d8s16 = vqrshrn_n_s32(q2s32, 14);
d9s16 = vqrshrn_n_s32(q3s32, 14);
@@ -114,15 +114,15 @@ void vpx_idct16x16_256_add_neon_pass1(int16_t *in, int16_t *out,
q6s16 = vcombine_s16(d12s16, d13s16);
// stage 4
- d30s16 = vdup_n_s16(cospi_16_64);
+ d30s16 = vdup_n_s16((int16_t)cospi_16_64);
q2s32 = vmull_s16(d16s16, d30s16);
q11s32 = vmull_s16(d17s16, d30s16);
q0s32 = vmull_s16(d24s16, d30s16);
q1s32 = vmull_s16(d25s16, d30s16);
- d30s16 = vdup_n_s16(cospi_24_64);
- d31s16 = vdup_n_s16(cospi_8_64);
+ d30s16 = vdup_n_s16((int16_t)cospi_24_64);
+ d31s16 = vdup_n_s16((int16_t)cospi_8_64);
q3s32 = vaddq_s32(q2s32, q0s32);
q12s32 = vaddq_s32(q11s32, q1s32);
@@ -168,7 +168,7 @@ void vpx_idct16x16_256_add_neon_pass1(int16_t *in, int16_t *out,
q2s16 = vsubq_s16(q9s16, q10s16);
q3s16 = vsubq_s16(q8s16, q11s16);
- d16s16 = vdup_n_s16(cospi_16_64);
+ d16s16 = vdup_n_s16((int16_t)cospi_16_64);
q11s32 = vmull_s16(d26s16, d16s16);
q12s32 = vmull_s16(d27s16, d16s16);
@@ -313,8 +313,8 @@ void vpx_idct16x16_256_add_neon_pass2(int16_t *src, int16_t *out,
d31s16 = vget_high_s16(q15s16);
// stage 3
- d12s16 = vdup_n_s16(cospi_30_64);
- d13s16 = vdup_n_s16(cospi_2_64);
+ d12s16 = vdup_n_s16((int16_t)cospi_30_64);
+ d13s16 = vdup_n_s16((int16_t)cospi_2_64);
q2s32 = vmull_s16(d16s16, d12s16);
q3s32 = vmull_s16(d17s16, d12s16);
@@ -333,8 +333,8 @@ void vpx_idct16x16_256_add_neon_pass2(int16_t *src, int16_t *out,
q0s16 = vcombine_s16(d0s16, d1s16);
q7s16 = vcombine_s16(d14s16, d15s16);
- d30s16 = vdup_n_s16(cospi_14_64);
- d31s16 = vdup_n_s16(cospi_18_64);
+ d30s16 = vdup_n_s16((int16_t)cospi_14_64);
+ d31s16 = vdup_n_s16((int16_t)cospi_18_64);
q2s32 = vmull_s16(d24s16, d30s16);
q3s32 = vmull_s16(d25s16, d30s16);
@@ -353,8 +353,8 @@ void vpx_idct16x16_256_add_neon_pass2(int16_t *src, int16_t *out,
q1s16 = vcombine_s16(d2s16, d3s16);
q6s16 = vcombine_s16(d12s16, d13s16);
- d30s16 = vdup_n_s16(cospi_22_64);
- d31s16 = vdup_n_s16(cospi_10_64);
+ d30s16 = vdup_n_s16((int16_t)cospi_22_64);
+ d31s16 = vdup_n_s16((int16_t)cospi_10_64);
q11s32 = vmull_s16(d20s16, d30s16);
q12s32 = vmull_s16(d21s16, d30s16);
@@ -373,8 +373,8 @@ void vpx_idct16x16_256_add_neon_pass2(int16_t *src, int16_t *out,
q2s16 = vcombine_s16(d4s16, d5s16);
q5s16 = vcombine_s16(d10s16, d11s16);
- d30s16 = vdup_n_s16(cospi_6_64);
- d31s16 = vdup_n_s16(cospi_26_64);
+ d30s16 = vdup_n_s16((int16_t)cospi_6_64);
+ d31s16 = vdup_n_s16((int16_t)cospi_26_64);
q10s32 = vmull_s16(d28s16, d30s16);
q11s32 = vmull_s16(d29s16, d30s16);
@@ -413,8 +413,8 @@ void vpx_idct16x16_256_add_neon_pass2(int16_t *src, int16_t *out,
d28s16 = vget_low_s16(q14s16);
d29s16 = vget_high_s16(q14s16);
- d30s16 = vdup_n_s16(cospi_8_64);
- d31s16 = vdup_n_s16(cospi_24_64);
+ d30s16 = vdup_n_s16((int16_t)cospi_8_64);
+ d31s16 = vdup_n_s16((int16_t)cospi_24_64);
q2s32 = vmull_s16(d18s16, d31s16);
q3s32 = vmull_s16(d19s16, d31s16);
@@ -474,7 +474,7 @@ void vpx_idct16x16_256_add_neon_pass2(int16_t *src, int16_t *out,
d26s16 = vget_low_s16(q13s16);
d27s16 = vget_high_s16(q13s16);
- d14s16 = vdup_n_s16(cospi_16_64);
+ d14s16 = vdup_n_s16((int16_t)cospi_16_64);
q3s32 = vmull_s16(d26s16, d14s16);
q4s32 = vmull_s16(d27s16, d14s16);
@@ -837,15 +837,15 @@ void vpx_idct16x16_10_add_neon_pass1(int16_t *in, int16_t *out,
&q15s16);
// stage 3
- q0s16 = vdupq_n_s16(cospi_28_64 * 2);
- q1s16 = vdupq_n_s16(cospi_4_64 * 2);
+ q0s16 = vdupq_n_s16((int16_t)cospi_28_64 * 2);
+ q1s16 = vdupq_n_s16((int16_t)cospi_4_64 * 2);
q4s16 = vqrdmulhq_s16(q9s16, q0s16);
q7s16 = vqrdmulhq_s16(q9s16, q1s16);
// stage 4
- q1s16 = vdupq_n_s16(cospi_16_64 * 2);
- d4s16 = vdup_n_s16(cospi_16_64);
+ q1s16 = vdupq_n_s16((int16_t)cospi_16_64 * 2);
+ d4s16 = vdup_n_s16((int16_t)cospi_16_64);
q8s16 = vqrdmulhq_s16(q8s16, q1s16);
@@ -979,13 +979,13 @@ void vpx_idct16x16_10_add_neon_pass2(int16_t *src, int16_t *out,
&q15s16);
// stage 3
- q6s16 = vdupq_n_s16(cospi_30_64 * 2);
+ q6s16 = vdupq_n_s16((int16_t)cospi_30_64 * 2);
q0s16 = vqrdmulhq_s16(q8s16, q6s16);
- q6s16 = vdupq_n_s16(cospi_2_64 * 2);
+ q6s16 = vdupq_n_s16((int16_t)cospi_2_64 * 2);
q7s16 = vqrdmulhq_s16(q8s16, q6s16);
- q15s16 = vdupq_n_s16(-cospi_26_64 * 2);
- q14s16 = vdupq_n_s16(cospi_6_64 * 2);
+ q15s16 = vdupq_n_s16((int16_t)-cospi_26_64 * 2);
+ q14s16 = vdupq_n_s16((int16_t)cospi_6_64 * 2);
q3s16 = vqrdmulhq_s16(q9s16, q15s16);
q4s16 = vqrdmulhq_s16(q9s16, q14s16);
@@ -999,8 +999,8 @@ void vpx_idct16x16_10_add_neon_pass2(int16_t *src, int16_t *out,
d14s16 = vget_low_s16(q7s16);
d15s16 = vget_high_s16(q7s16);
- d30s16 = vdup_n_s16(cospi_8_64);
- d31s16 = vdup_n_s16(cospi_24_64);
+ d30s16 = vdup_n_s16((int16_t)cospi_8_64);
+ d31s16 = vdup_n_s16((int16_t)cospi_24_64);
q12s32 = vmull_s16(d14s16, d31s16);
q5s32 = vmull_s16(d15s16, d31s16);
@@ -1057,7 +1057,7 @@ void vpx_idct16x16_10_add_neon_pass2(int16_t *src, int16_t *out,
d26s16 = vget_low_s16(q13s16);
d27s16 = vget_high_s16(q13s16);
- d14s16 = vdup_n_s16(cospi_16_64);
+ d14s16 = vdup_n_s16((int16_t)cospi_16_64);
q3s32 = vmull_s16(d26s16, d14s16);
q4s32 = vmull_s16(d27s16, d14s16);
q0s32 = vmull_s16(d20s16, d14s16);
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct32x32_1_add_neon.asm b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct32x32_1_add_neon.asm
deleted file mode 100644
index 96d276b4d14..00000000000
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct32x32_1_add_neon.asm
+++ /dev/null
@@ -1,144 +0,0 @@
-;
-; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license and patent
-; grant that can be found in the LICENSE file in the root of the source
-; tree. All contributing project authors may be found in the AUTHORS
-; file in the root of the source tree.
-;
-
- EXPORT |vpx_idct32x32_1_add_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-
- ;TODO(hkuang): put the following macros in a seperate
- ;file so other idct function could also use them.
- MACRO
- LD_16x8 $src, $stride
- vld1.8 {q8}, [$src], $stride
- vld1.8 {q9}, [$src], $stride
- vld1.8 {q10}, [$src], $stride
- vld1.8 {q11}, [$src], $stride
- vld1.8 {q12}, [$src], $stride
- vld1.8 {q13}, [$src], $stride
- vld1.8 {q14}, [$src], $stride
- vld1.8 {q15}, [$src], $stride
- MEND
-
- MACRO
- ADD_DIFF_16x8 $diff
- vqadd.u8 q8, q8, $diff
- vqadd.u8 q9, q9, $diff
- vqadd.u8 q10, q10, $diff
- vqadd.u8 q11, q11, $diff
- vqadd.u8 q12, q12, $diff
- vqadd.u8 q13, q13, $diff
- vqadd.u8 q14, q14, $diff
- vqadd.u8 q15, q15, $diff
- MEND
-
- MACRO
- SUB_DIFF_16x8 $diff
- vqsub.u8 q8, q8, $diff
- vqsub.u8 q9, q9, $diff
- vqsub.u8 q10, q10, $diff
- vqsub.u8 q11, q11, $diff
- vqsub.u8 q12, q12, $diff
- vqsub.u8 q13, q13, $diff
- vqsub.u8 q14, q14, $diff
- vqsub.u8 q15, q15, $diff
- MEND
-
- MACRO
- ST_16x8 $dst, $stride
- vst1.8 {q8}, [$dst], $stride
- vst1.8 {q9}, [$dst], $stride
- vst1.8 {q10},[$dst], $stride
- vst1.8 {q11},[$dst], $stride
- vst1.8 {q12},[$dst], $stride
- vst1.8 {q13},[$dst], $stride
- vst1.8 {q14},[$dst], $stride
- vst1.8 {q15},[$dst], $stride
- MEND
-
-;void vpx_idct32x32_1_add_neon(int16_t *input, uint8_t *dest,
-; int dest_stride)
-;
-; r0 int16_t input
-; r1 uint8_t *dest
-; r2 int dest_stride
-
-|vpx_idct32x32_1_add_neon| PROC
- push {lr}
- pld [r1]
- add r3, r1, #16 ; r3 dest + 16 for second loop
- ldrsh r0, [r0]
-
- ; generate cospi_16_64 = 11585
- mov r12, #0x2d00
- add r12, #0x41
-
- ; out = dct_const_round_shift(input[0] * cospi_16_64)
- mul r0, r0, r12 ; input[0] * cospi_16_64
- add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
- asr r0, r0, #14 ; >> DCT_CONST_BITS
-
- ; out = dct_const_round_shift(out * cospi_16_64)
- mul r0, r0, r12 ; out * cospi_16_64
- mov r12, r1 ; save dest
- add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1))
- asr r0, r0, #14 ; >> DCT_CONST_BITS
-
- ; a1 = ROUND_POWER_OF_TWO(out, 6)
- add r0, r0, #32 ; + (1 <<((6) - 1))
- asrs r0, r0, #6 ; >> 6
- bge diff_positive_32_32
-
-diff_negative_32_32
- neg r0, r0
- usat r0, #8, r0
- vdup.u8 q0, r0
- mov r0, #4
-
-diff_negative_32_32_loop
- sub r0, #1
- LD_16x8 r1, r2
- SUB_DIFF_16x8 q0
- ST_16x8 r12, r2
-
- LD_16x8 r1, r2
- SUB_DIFF_16x8 q0
- ST_16x8 r12, r2
- cmp r0, #2
- moveq r1, r3
- moveq r12, r3
- cmp r0, #0
- bne diff_negative_32_32_loop
- pop {pc}
-
-diff_positive_32_32
- usat r0, #8, r0
- vdup.u8 q0, r0
- mov r0, #4
-
-diff_positive_32_32_loop
- sub r0, #1
- LD_16x8 r1, r2
- ADD_DIFF_16x8 q0
- ST_16x8 r12, r2
-
- LD_16x8 r1, r2
- ADD_DIFF_16x8 q0
- ST_16x8 r12, r2
- cmp r0, #2
- moveq r1, r3
- moveq r12, r3
- cmp r0, #0
- bne diff_positive_32_32_loop
- pop {pc}
-
- ENDP ; |vpx_idct32x32_1_add_neon|
- END
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct32x32_1_add_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct32x32_1_add_neon.c
index 9dfdf8d6965..6be4b01229b 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct32x32_1_add_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct32x32_1_add_neon.c
@@ -94,7 +94,7 @@ void vpx_idct32x32_1_add_neon(const tran_low_t *input, uint8_t *dest,
uint8x16_t q0u8, q8u8, q9u8, q10u8, q11u8, q12u8, q13u8, q14u8, q15u8;
int i, j, dest_stride8;
uint8_t *d;
- int16_t a1, cospi_16_64 = 11585;
+ int16_t a1;
int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
out = dct_const_round_shift(out * cospi_16_64);
@@ -103,7 +103,7 @@ void vpx_idct32x32_1_add_neon(const tran_low_t *input, uint8_t *dest,
dest_stride8 = dest_stride * 8;
if (a1 >= 0) { // diff_positive_32_32
a1 = a1 < 0 ? 0 : a1 > 255 ? 255 : a1;
- q0u8 = vdupq_n_u8(a1);
+ q0u8 = vdupq_n_u8((uint8_t)a1);
for (i = 0; i < 2; i++, dest += 16) { // diff_positive_32_32_loop
d = dest;
for (j = 0; j < 4; j++) {
@@ -119,7 +119,7 @@ void vpx_idct32x32_1_add_neon(const tran_low_t *input, uint8_t *dest,
} else { // diff_negative_32_32
a1 = -a1;
a1 = a1 < 0 ? 0 : a1 > 255 ? 255 : a1;
- q0u8 = vdupq_n_u8(a1);
+ q0u8 = vdupq_n_u8((uint8_t)a1);
for (i = 0; i < 2; i++, dest += 16) { // diff_negative_32_32_loop
d = dest;
for (j = 0; j < 4; j++) {
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct32x32_34_add_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct32x32_34_add_neon.c
new file mode 100644
index 00000000000..ebec9df54ad
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct32x32_34_add_neon.c
@@ -0,0 +1,519 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/arm/idct_neon.h"
+#include "vpx_dsp/txfm_common.h"
+
+// Only for the first pass of the _34_ variant. Since it only uses values from
+// the top left 8x8 it can safely assume all the remaining values are 0 and skip
+// an awful lot of calculations. In fact, only the first 6 columns make the cut.
+// None of the elements in the 7th or 8th column are used so it skips any calls
+// to input[67] too.
+// In C this does a single row of 32 for each call. Here it transposes the top
+// left 8x8 to allow using SIMD.
+
+// vp9/common/vp9_scan.c:vp9_default_iscan_32x32 arranges the first 34 non-zero
+// coefficients as follows:
+// 0 1 2 3 4 5 6 7
+// 0 0 2 5 10 17 25
+// 1 1 4 8 15 22 30
+// 2 3 7 12 18 28
+// 3 6 11 16 23 31
+// 4 9 14 19 29
+// 5 13 20 26
+// 6 21 27 33
+// 7 24 32
+static void idct32_6_neon(const int16_t *input, int16_t *output) {
+ int16x8_t in0, in1, in2, in3, in4, in5, in6, in7;
+ int16x8_t s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s1_9, s1_10,
+ s1_11, s1_12, s1_13, s1_14, s1_15, s1_16, s1_17, s1_18, s1_19, s1_20,
+ s1_21, s1_22, s1_23, s1_24, s1_25, s1_26, s1_27, s1_28, s1_29, s1_30,
+ s1_31;
+ int16x8_t s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s2_9, s2_10,
+ s2_11, s2_12, s2_13, s2_14, s2_15, s2_16, s2_17, s2_18, s2_19, s2_20,
+ s2_21, s2_22, s2_23, s2_24, s2_25, s2_26, s2_27, s2_28, s2_29, s2_30,
+ s2_31;
+ int16x8_t s3_24, s3_25, s3_26, s3_27;
+
+ load_and_transpose_s16_8x8(input, 32, &in0, &in1, &in2, &in3, &in4, &in5,
+ &in6, &in7);
+
+ // stage 1
+ // input[1] * cospi_31_64 - input[31] * cospi_1_64 (but input[31] == 0)
+ s1_16 = multiply_shift_and_narrow_s16(in1, cospi_31_64);
+ // input[1] * cospi_1_64 + input[31] * cospi_31_64 (but input[31] == 0)
+ s1_31 = multiply_shift_and_narrow_s16(in1, cospi_1_64);
+
+ s1_20 = multiply_shift_and_narrow_s16(in5, cospi_27_64);
+ s1_27 = multiply_shift_and_narrow_s16(in5, cospi_5_64);
+
+ s1_23 = multiply_shift_and_narrow_s16(in3, -cospi_29_64);
+ s1_24 = multiply_shift_and_narrow_s16(in3, cospi_3_64);
+
+ // stage 2
+ s2_8 = multiply_shift_and_narrow_s16(in2, cospi_30_64);
+ s2_15 = multiply_shift_and_narrow_s16(in2, cospi_2_64);
+
+ // stage 3
+ s1_4 = multiply_shift_and_narrow_s16(in4, cospi_28_64);
+ s1_7 = multiply_shift_and_narrow_s16(in4, cospi_4_64);
+
+ s1_17 = multiply_accumulate_shift_and_narrow_s16(s1_16, -cospi_4_64, s1_31,
+ cospi_28_64);
+ s1_30 = multiply_accumulate_shift_and_narrow_s16(s1_16, cospi_28_64, s1_31,
+ cospi_4_64);
+
+ s1_21 = multiply_accumulate_shift_and_narrow_s16(s1_20, -cospi_20_64, s1_27,
+ cospi_12_64);
+ s1_26 = multiply_accumulate_shift_and_narrow_s16(s1_20, cospi_12_64, s1_27,
+ cospi_20_64);
+
+ s1_22 = multiply_accumulate_shift_and_narrow_s16(s1_23, -cospi_12_64, s1_24,
+ -cospi_20_64);
+ s1_25 = multiply_accumulate_shift_and_narrow_s16(s1_23, -cospi_20_64, s1_24,
+ cospi_12_64);
+
+ // stage 4
+ s1_0 = multiply_shift_and_narrow_s16(in0, cospi_16_64);
+
+ s2_9 = multiply_accumulate_shift_and_narrow_s16(s2_8, -cospi_8_64, s2_15,
+ cospi_24_64);
+ s2_14 = multiply_accumulate_shift_and_narrow_s16(s2_8, cospi_24_64, s2_15,
+ cospi_8_64);
+
+ s2_20 = vsubq_s16(s1_23, s1_20);
+ s2_21 = vsubq_s16(s1_22, s1_21);
+ s2_22 = vaddq_s16(s1_21, s1_22);
+ s2_23 = vaddq_s16(s1_20, s1_23);
+ s2_24 = vaddq_s16(s1_24, s1_27);
+ s2_25 = vaddq_s16(s1_25, s1_26);
+ s2_26 = vsubq_s16(s1_25, s1_26);
+ s2_27 = vsubq_s16(s1_24, s1_27);
+
+ // stage 5
+ s1_5 = sub_multiply_shift_and_narrow_s16(s1_7, s1_4, cospi_16_64);
+ s1_6 = add_multiply_shift_and_narrow_s16(s1_4, s1_7, cospi_16_64);
+
+ s1_18 = multiply_accumulate_shift_and_narrow_s16(s1_17, -cospi_8_64, s1_30,
+ cospi_24_64);
+ s1_29 = multiply_accumulate_shift_and_narrow_s16(s1_17, cospi_24_64, s1_30,
+ cospi_8_64);
+
+ s1_19 = multiply_accumulate_shift_and_narrow_s16(s1_16, -cospi_8_64, s1_31,
+ cospi_24_64);
+ s1_28 = multiply_accumulate_shift_and_narrow_s16(s1_16, cospi_24_64, s1_31,
+ cospi_8_64);
+
+ s1_20 = multiply_accumulate_shift_and_narrow_s16(s2_20, -cospi_24_64, s2_27,
+ -cospi_8_64);
+ s1_27 = multiply_accumulate_shift_and_narrow_s16(s2_20, -cospi_8_64, s2_27,
+ cospi_24_64);
+
+ s1_21 = multiply_accumulate_shift_and_narrow_s16(s2_21, -cospi_24_64, s2_26,
+ -cospi_8_64);
+ s1_26 = multiply_accumulate_shift_and_narrow_s16(s2_21, -cospi_8_64, s2_26,
+ cospi_24_64);
+
+ // stage 6
+ s2_0 = vaddq_s16(s1_0, s1_7);
+ s2_1 = vaddq_s16(s1_0, s1_6);
+ s2_2 = vaddq_s16(s1_0, s1_5);
+ s2_3 = vaddq_s16(s1_0, s1_4);
+ s2_4 = vsubq_s16(s1_0, s1_4);
+ s2_5 = vsubq_s16(s1_0, s1_5);
+ s2_6 = vsubq_s16(s1_0, s1_6);
+ s2_7 = vsubq_s16(s1_0, s1_7);
+
+ s2_10 = sub_multiply_shift_and_narrow_s16(s2_14, s2_9, cospi_16_64);
+ s2_13 = add_multiply_shift_and_narrow_s16(s2_9, s2_14, cospi_16_64);
+
+ s2_11 = sub_multiply_shift_and_narrow_s16(s2_15, s2_8, cospi_16_64);
+ s2_12 = add_multiply_shift_and_narrow_s16(s2_8, s2_15, cospi_16_64);
+
+ s2_16 = vaddq_s16(s1_16, s2_23);
+ s2_17 = vaddq_s16(s1_17, s2_22);
+ s2_18 = vaddq_s16(s1_18, s1_21);
+ s2_19 = vaddq_s16(s1_19, s1_20);
+ s2_20 = vsubq_s16(s1_19, s1_20);
+ s2_21 = vsubq_s16(s1_18, s1_21);
+ s2_22 = vsubq_s16(s1_17, s2_22);
+ s2_23 = vsubq_s16(s1_16, s2_23);
+
+ s3_24 = vsubq_s16(s1_31, s2_24);
+ s3_25 = vsubq_s16(s1_30, s2_25);
+ s3_26 = vsubq_s16(s1_29, s1_26);
+ s3_27 = vsubq_s16(s1_28, s1_27);
+ s2_28 = vaddq_s16(s1_27, s1_28);
+ s2_29 = vaddq_s16(s1_26, s1_29);
+ s2_30 = vaddq_s16(s2_25, s1_30);
+ s2_31 = vaddq_s16(s2_24, s1_31);
+
+ // stage 7
+ s1_0 = vaddq_s16(s2_0, s2_15);
+ s1_1 = vaddq_s16(s2_1, s2_14);
+ s1_2 = vaddq_s16(s2_2, s2_13);
+ s1_3 = vaddq_s16(s2_3, s2_12);
+ s1_4 = vaddq_s16(s2_4, s2_11);
+ s1_5 = vaddq_s16(s2_5, s2_10);
+ s1_6 = vaddq_s16(s2_6, s2_9);
+ s1_7 = vaddq_s16(s2_7, s2_8);
+ s1_8 = vsubq_s16(s2_7, s2_8);
+ s1_9 = vsubq_s16(s2_6, s2_9);
+ s1_10 = vsubq_s16(s2_5, s2_10);
+ s1_11 = vsubq_s16(s2_4, s2_11);
+ s1_12 = vsubq_s16(s2_3, s2_12);
+ s1_13 = vsubq_s16(s2_2, s2_13);
+ s1_14 = vsubq_s16(s2_1, s2_14);
+ s1_15 = vsubq_s16(s2_0, s2_15);
+
+ s1_20 = sub_multiply_shift_and_narrow_s16(s3_27, s2_20, cospi_16_64);
+ s1_27 = add_multiply_shift_and_narrow_s16(s2_20, s3_27, cospi_16_64);
+
+ s1_21 = sub_multiply_shift_and_narrow_s16(s3_26, s2_21, cospi_16_64);
+ s1_26 = add_multiply_shift_and_narrow_s16(s2_21, s3_26, cospi_16_64);
+
+ s1_22 = sub_multiply_shift_and_narrow_s16(s3_25, s2_22, cospi_16_64);
+ s1_25 = add_multiply_shift_and_narrow_s16(s2_22, s3_25, cospi_16_64);
+
+ s1_23 = sub_multiply_shift_and_narrow_s16(s3_24, s2_23, cospi_16_64);
+ s1_24 = add_multiply_shift_and_narrow_s16(s2_23, s3_24, cospi_16_64);
+
+ // final stage
+ vst1q_s16(output, vaddq_s16(s1_0, s2_31));
+ output += 8;
+ vst1q_s16(output, vaddq_s16(s1_1, s2_30));
+ output += 8;
+ vst1q_s16(output, vaddq_s16(s1_2, s2_29));
+ output += 8;
+ vst1q_s16(output, vaddq_s16(s1_3, s2_28));
+ output += 8;
+ vst1q_s16(output, vaddq_s16(s1_4, s1_27));
+ output += 8;
+ vst1q_s16(output, vaddq_s16(s1_5, s1_26));
+ output += 8;
+ vst1q_s16(output, vaddq_s16(s1_6, s1_25));
+ output += 8;
+ vst1q_s16(output, vaddq_s16(s1_7, s1_24));
+ output += 8;
+
+ vst1q_s16(output, vaddq_s16(s1_8, s1_23));
+ output += 8;
+ vst1q_s16(output, vaddq_s16(s1_9, s1_22));
+ output += 8;
+ vst1q_s16(output, vaddq_s16(s1_10, s1_21));
+ output += 8;
+ vst1q_s16(output, vaddq_s16(s1_11, s1_20));
+ output += 8;
+ vst1q_s16(output, vaddq_s16(s1_12, s2_19));
+ output += 8;
+ vst1q_s16(output, vaddq_s16(s1_13, s2_18));
+ output += 8;
+ vst1q_s16(output, vaddq_s16(s1_14, s2_17));
+ output += 8;
+ vst1q_s16(output, vaddq_s16(s1_15, s2_16));
+ output += 8;
+
+ vst1q_s16(output, vsubq_s16(s1_15, s2_16));
+ output += 8;
+ vst1q_s16(output, vsubq_s16(s1_14, s2_17));
+ output += 8;
+ vst1q_s16(output, vsubq_s16(s1_13, s2_18));
+ output += 8;
+ vst1q_s16(output, vsubq_s16(s1_12, s2_19));
+ output += 8;
+ vst1q_s16(output, vsubq_s16(s1_11, s1_20));
+ output += 8;
+ vst1q_s16(output, vsubq_s16(s1_10, s1_21));
+ output += 8;
+ vst1q_s16(output, vsubq_s16(s1_9, s1_22));
+ output += 8;
+ vst1q_s16(output, vsubq_s16(s1_8, s1_23));
+ output += 8;
+
+ vst1q_s16(output, vsubq_s16(s1_7, s1_24));
+ output += 8;
+ vst1q_s16(output, vsubq_s16(s1_6, s1_25));
+ output += 8;
+ vst1q_s16(output, vsubq_s16(s1_5, s1_26));
+ output += 8;
+ vst1q_s16(output, vsubq_s16(s1_4, s1_27));
+ output += 8;
+ vst1q_s16(output, vsubq_s16(s1_3, s2_28));
+ output += 8;
+ vst1q_s16(output, vsubq_s16(s1_2, s2_29));
+ output += 8;
+ vst1q_s16(output, vsubq_s16(s1_1, s2_30));
+ output += 8;
+ vst1q_s16(output, vsubq_s16(s1_0, s2_31));
+}
+
+static void idct32_8_neon(const int16_t *input, uint8_t *output, int stride) {
+ int16x8_t in0, in1, in2, in3, in4, in5, in6, in7;
+ int16x8_t out0, out1, out2, out3, out4, out5, out6, out7;
+ int16x8_t s1_0, s1_1, s1_2, s1_3, s1_4, s1_5, s1_6, s1_7, s1_8, s1_9, s1_10,
+ s1_11, s1_12, s1_13, s1_14, s1_15, s1_16, s1_17, s1_18, s1_19, s1_20,
+ s1_21, s1_22, s1_23, s1_24, s1_25, s1_26, s1_27, s1_28, s1_29, s1_30,
+ s1_31;
+ int16x8_t s2_0, s2_1, s2_2, s2_3, s2_4, s2_5, s2_6, s2_7, s2_8, s2_9, s2_10,
+ s2_11, s2_12, s2_13, s2_14, s2_15, s2_16, s2_17, s2_18, s2_19, s2_20,
+ s2_21, s2_22, s2_23, s2_24, s2_25, s2_26, s2_27, s2_28, s2_29, s2_30,
+ s2_31;
+ int16x8_t s3_24, s3_25, s3_26, s3_27;
+
+ load_and_transpose_s16_8x8(input, 8, &in0, &in1, &in2, &in3, &in4, &in5, &in6,
+ &in7);
+
+ // stage 1
+ s1_16 = multiply_shift_and_narrow_s16(in1, cospi_31_64);
+ s1_31 = multiply_shift_and_narrow_s16(in1, cospi_1_64);
+
+ // Different for _8_
+ s1_19 = multiply_shift_and_narrow_s16(in7, -cospi_25_64);
+ s1_28 = multiply_shift_and_narrow_s16(in7, cospi_7_64);
+
+ s1_20 = multiply_shift_and_narrow_s16(in5, cospi_27_64);
+ s1_27 = multiply_shift_and_narrow_s16(in5, cospi_5_64);
+
+ s1_23 = multiply_shift_and_narrow_s16(in3, -cospi_29_64);
+ s1_24 = multiply_shift_and_narrow_s16(in3, cospi_3_64);
+
+ // stage 2
+ s2_8 = multiply_shift_and_narrow_s16(in2, cospi_30_64);
+ s2_15 = multiply_shift_and_narrow_s16(in2, cospi_2_64);
+
+ s2_11 = multiply_shift_and_narrow_s16(in6, -cospi_26_64);
+ s2_12 = multiply_shift_and_narrow_s16(in6, cospi_6_64);
+
+ // stage 3
+ s1_4 = multiply_shift_and_narrow_s16(in4, cospi_28_64);
+ s1_7 = multiply_shift_and_narrow_s16(in4, cospi_4_64);
+
+ s1_17 = multiply_accumulate_shift_and_narrow_s16(s1_16, -cospi_4_64, s1_31,
+ cospi_28_64);
+ s1_30 = multiply_accumulate_shift_and_narrow_s16(s1_16, cospi_28_64, s1_31,
+ cospi_4_64);
+
+ // Different for _8_
+ s1_18 = multiply_accumulate_shift_and_narrow_s16(s1_19, -cospi_28_64, s1_28,
+ -cospi_4_64);
+ s1_29 = multiply_accumulate_shift_and_narrow_s16(s1_19, -cospi_4_64, s1_28,
+ cospi_28_64);
+
+ s1_21 = multiply_accumulate_shift_and_narrow_s16(s1_20, -cospi_20_64, s1_27,
+ cospi_12_64);
+ s1_26 = multiply_accumulate_shift_and_narrow_s16(s1_20, cospi_12_64, s1_27,
+ cospi_20_64);
+
+ s1_22 = multiply_accumulate_shift_and_narrow_s16(s1_23, -cospi_12_64, s1_24,
+ -cospi_20_64);
+ s1_25 = multiply_accumulate_shift_and_narrow_s16(s1_23, -cospi_20_64, s1_24,
+ cospi_12_64);
+
+ // stage 4
+ s1_0 = multiply_shift_and_narrow_s16(in0, cospi_16_64);
+
+ s2_9 = multiply_accumulate_shift_and_narrow_s16(s2_8, -cospi_8_64, s2_15,
+ cospi_24_64);
+ s2_14 = multiply_accumulate_shift_and_narrow_s16(s2_8, cospi_24_64, s2_15,
+ cospi_8_64);
+
+ s2_10 = multiply_accumulate_shift_and_narrow_s16(s2_11, -cospi_24_64, s2_12,
+ -cospi_8_64);
+ s2_13 = multiply_accumulate_shift_and_narrow_s16(s2_11, -cospi_8_64, s2_12,
+ cospi_24_64);
+
+ s2_16 = vaddq_s16(s1_16, s1_19);
+
+ s2_17 = vaddq_s16(s1_17, s1_18);
+ s2_18 = vsubq_s16(s1_17, s1_18);
+
+ s2_19 = vsubq_s16(s1_16, s1_19);
+
+ s2_20 = vsubq_s16(s1_23, s1_20);
+ s2_21 = vsubq_s16(s1_22, s1_21);
+
+ s2_22 = vaddq_s16(s1_21, s1_22);
+ s2_23 = vaddq_s16(s1_20, s1_23);
+
+ s2_24 = vaddq_s16(s1_24, s1_27);
+ s2_25 = vaddq_s16(s1_25, s1_26);
+ s2_26 = vsubq_s16(s1_25, s1_26);
+ s2_27 = vsubq_s16(s1_24, s1_27);
+
+ s2_28 = vsubq_s16(s1_31, s1_28);
+ s2_29 = vsubq_s16(s1_30, s1_29);
+ s2_30 = vaddq_s16(s1_29, s1_30);
+ s2_31 = vaddq_s16(s1_28, s1_31);
+
+ // stage 5
+ s1_5 = sub_multiply_shift_and_narrow_s16(s1_7, s1_4, cospi_16_64);
+ s1_6 = add_multiply_shift_and_narrow_s16(s1_4, s1_7, cospi_16_64);
+
+ s1_8 = vaddq_s16(s2_8, s2_11);
+ s1_9 = vaddq_s16(s2_9, s2_10);
+ s1_10 = vsubq_s16(s2_9, s2_10);
+ s1_11 = vsubq_s16(s2_8, s2_11);
+ s1_12 = vsubq_s16(s2_15, s2_12);
+ s1_13 = vsubq_s16(s2_14, s2_13);
+ s1_14 = vaddq_s16(s2_13, s2_14);
+ s1_15 = vaddq_s16(s2_12, s2_15);
+
+ s1_18 = multiply_accumulate_shift_and_narrow_s16(s2_18, -cospi_8_64, s2_29,
+ cospi_24_64);
+ s1_29 = multiply_accumulate_shift_and_narrow_s16(s2_18, cospi_24_64, s2_29,
+ cospi_8_64);
+
+ s1_19 = multiply_accumulate_shift_and_narrow_s16(s2_19, -cospi_8_64, s2_28,
+ cospi_24_64);
+ s1_28 = multiply_accumulate_shift_and_narrow_s16(s2_19, cospi_24_64, s2_28,
+ cospi_8_64);
+
+ s1_20 = multiply_accumulate_shift_and_narrow_s16(s2_20, -cospi_24_64, s2_27,
+ -cospi_8_64);
+ s1_27 = multiply_accumulate_shift_and_narrow_s16(s2_20, -cospi_8_64, s2_27,
+ cospi_24_64);
+
+ s1_21 = multiply_accumulate_shift_and_narrow_s16(s2_21, -cospi_24_64, s2_26,
+ -cospi_8_64);
+ s1_26 = multiply_accumulate_shift_and_narrow_s16(s2_21, -cospi_8_64, s2_26,
+ cospi_24_64);
+
+ // stage 6
+ s2_0 = vaddq_s16(s1_0, s1_7);
+ s2_1 = vaddq_s16(s1_0, s1_6);
+ s2_2 = vaddq_s16(s1_0, s1_5);
+ s2_3 = vaddq_s16(s1_0, s1_4);
+ s2_4 = vsubq_s16(s1_0, s1_4);
+ s2_5 = vsubq_s16(s1_0, s1_5);
+ s2_6 = vsubq_s16(s1_0, s1_6);
+ s2_7 = vsubq_s16(s1_0, s1_7);
+
+ s2_10 = sub_multiply_shift_and_narrow_s16(s1_13, s1_10, cospi_16_64);
+ s2_13 = add_multiply_shift_and_narrow_s16(s1_10, s1_13, cospi_16_64);
+
+ s2_11 = sub_multiply_shift_and_narrow_s16(s1_12, s1_11, cospi_16_64);
+ s2_12 = add_multiply_shift_and_narrow_s16(s1_11, s1_12, cospi_16_64);
+
+ s1_16 = vaddq_s16(s2_16, s2_23);
+ s1_17 = vaddq_s16(s2_17, s2_22);
+ s2_18 = vaddq_s16(s1_18, s1_21);
+ s2_19 = vaddq_s16(s1_19, s1_20);
+ s2_20 = vsubq_s16(s1_19, s1_20);
+ s2_21 = vsubq_s16(s1_18, s1_21);
+ s1_22 = vsubq_s16(s2_17, s2_22);
+ s1_23 = vsubq_s16(s2_16, s2_23);
+
+ s3_24 = vsubq_s16(s2_31, s2_24);
+ s3_25 = vsubq_s16(s2_30, s2_25);
+ s3_26 = vsubq_s16(s1_29, s1_26);
+ s3_27 = vsubq_s16(s1_28, s1_27);
+ s2_28 = vaddq_s16(s1_27, s1_28);
+ s2_29 = vaddq_s16(s1_26, s1_29);
+ s2_30 = vaddq_s16(s2_25, s2_30);
+ s2_31 = vaddq_s16(s2_24, s2_31);
+
+ // stage 7
+ s1_0 = vaddq_s16(s2_0, s1_15);
+ s1_1 = vaddq_s16(s2_1, s1_14);
+ s1_2 = vaddq_s16(s2_2, s2_13);
+ s1_3 = vaddq_s16(s2_3, s2_12);
+ s1_4 = vaddq_s16(s2_4, s2_11);
+ s1_5 = vaddq_s16(s2_5, s2_10);
+ s1_6 = vaddq_s16(s2_6, s1_9);
+ s1_7 = vaddq_s16(s2_7, s1_8);
+ s1_8 = vsubq_s16(s2_7, s1_8);
+ s1_9 = vsubq_s16(s2_6, s1_9);
+ s1_10 = vsubq_s16(s2_5, s2_10);
+ s1_11 = vsubq_s16(s2_4, s2_11);
+ s1_12 = vsubq_s16(s2_3, s2_12);
+ s1_13 = vsubq_s16(s2_2, s2_13);
+ s1_14 = vsubq_s16(s2_1, s1_14);
+ s1_15 = vsubq_s16(s2_0, s1_15);
+
+ s1_20 = sub_multiply_shift_and_narrow_s16(s3_27, s2_20, cospi_16_64);
+ s1_27 = add_multiply_shift_and_narrow_s16(s2_20, s3_27, cospi_16_64);
+
+ s1_21 = sub_multiply_shift_and_narrow_s16(s3_26, s2_21, cospi_16_64);
+ s1_26 = add_multiply_shift_and_narrow_s16(s2_21, s3_26, cospi_16_64);
+
+ s2_22 = sub_multiply_shift_and_narrow_s16(s3_25, s1_22, cospi_16_64);
+ s1_25 = add_multiply_shift_and_narrow_s16(s1_22, s3_25, cospi_16_64);
+
+ s2_23 = sub_multiply_shift_and_narrow_s16(s3_24, s1_23, cospi_16_64);
+ s1_24 = add_multiply_shift_and_narrow_s16(s1_23, s3_24, cospi_16_64);
+
+ // final stage
+ out0 = vaddq_s16(s1_0, s2_31);
+ out1 = vaddq_s16(s1_1, s2_30);
+ out2 = vaddq_s16(s1_2, s2_29);
+ out3 = vaddq_s16(s1_3, s2_28);
+ out4 = vaddq_s16(s1_4, s1_27);
+ out5 = vaddq_s16(s1_5, s1_26);
+ out6 = vaddq_s16(s1_6, s1_25);
+ out7 = vaddq_s16(s1_7, s1_24);
+
+ add_and_store_u8_s16(out0, out1, out2, out3, out4, out5, out6, out7, output,
+ stride);
+
+ out0 = vaddq_s16(s1_8, s2_23);
+ out1 = vaddq_s16(s1_9, s2_22);
+ out2 = vaddq_s16(s1_10, s1_21);
+ out3 = vaddq_s16(s1_11, s1_20);
+ out4 = vaddq_s16(s1_12, s2_19);
+ out5 = vaddq_s16(s1_13, s2_18);
+ out6 = vaddq_s16(s1_14, s1_17);
+ out7 = vaddq_s16(s1_15, s1_16);
+
+ add_and_store_u8_s16(out0, out1, out2, out3, out4, out5, out6, out7,
+ output + (8 * stride), stride);
+
+ out0 = vsubq_s16(s1_15, s1_16);
+ out1 = vsubq_s16(s1_14, s1_17);
+ out2 = vsubq_s16(s1_13, s2_18);
+ out3 = vsubq_s16(s1_12, s2_19);
+ out4 = vsubq_s16(s1_11, s1_20);
+ out5 = vsubq_s16(s1_10, s1_21);
+ out6 = vsubq_s16(s1_9, s2_22);
+ out7 = vsubq_s16(s1_8, s2_23);
+
+ add_and_store_u8_s16(out0, out1, out2, out3, out4, out5, out6, out7,
+ output + (16 * stride), stride);
+
+ out0 = vsubq_s16(s1_7, s1_24);
+ out1 = vsubq_s16(s1_6, s1_25);
+ out2 = vsubq_s16(s1_5, s1_26);
+ out3 = vsubq_s16(s1_4, s1_27);
+ out4 = vsubq_s16(s1_3, s2_28);
+ out5 = vsubq_s16(s1_2, s2_29);
+ out6 = vsubq_s16(s1_1, s2_30);
+ out7 = vsubq_s16(s1_0, s2_31);
+
+ add_and_store_u8_s16(out0, out1, out2, out3, out4, out5, out6, out7,
+ output + (24 * stride), stride);
+}
+
+void vpx_idct32x32_34_add_neon(const int16_t *input, uint8_t *dest,
+ int stride) {
+ int i;
+ int16_t temp[32 * 8];
+ int16_t *t = temp;
+
+ idct32_6_neon(input, t);
+
+ for (i = 0; i < 32; i += 8) {
+ idct32_8_neon(t, dest, stride);
+ t += (8 * 8);
+ dest += 8;
+ }
+}
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct32x32_add_neon.asm b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct32x32_add_neon.asm
deleted file mode 100644
index 7483ee77e18..00000000000
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct32x32_add_neon.asm
+++ /dev/null
@@ -1,1299 +0,0 @@
-;
-; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
-;
-; Use of this source code is governed by a BSD-style license
-; that can be found in the LICENSE file in the root of the source
-; tree. An additional intellectual property rights grant can be found
-; in the file PATENTS. All contributing project authors may
-; be found in the AUTHORS file in the root of the source tree.
-;
-
-;TODO(cd): adjust these constant to be able to use vqdmulh for faster
-; dct_const_round_shift(a * b) within butterfly calculations.
-cospi_1_64 EQU 16364
-cospi_2_64 EQU 16305
-cospi_3_64 EQU 16207
-cospi_4_64 EQU 16069
-cospi_5_64 EQU 15893
-cospi_6_64 EQU 15679
-cospi_7_64 EQU 15426
-cospi_8_64 EQU 15137
-cospi_9_64 EQU 14811
-cospi_10_64 EQU 14449
-cospi_11_64 EQU 14053
-cospi_12_64 EQU 13623
-cospi_13_64 EQU 13160
-cospi_14_64 EQU 12665
-cospi_15_64 EQU 12140
-cospi_16_64 EQU 11585
-cospi_17_64 EQU 11003
-cospi_18_64 EQU 10394
-cospi_19_64 EQU 9760
-cospi_20_64 EQU 9102
-cospi_21_64 EQU 8423
-cospi_22_64 EQU 7723
-cospi_23_64 EQU 7005
-cospi_24_64 EQU 6270
-cospi_25_64 EQU 5520
-cospi_26_64 EQU 4756
-cospi_27_64 EQU 3981
-cospi_28_64 EQU 3196
-cospi_29_64 EQU 2404
-cospi_30_64 EQU 1606
-cospi_31_64 EQU 804
-
-
- EXPORT |vpx_idct32x32_1024_add_neon|
- ARM
- REQUIRE8
- PRESERVE8
-
- AREA ||.text||, CODE, READONLY, ALIGN=2
-
- AREA Block, CODE, READONLY
-
- ; --------------------------------------------------------------------------
- ; Load from transposed_buffer
- ; q13 = transposed_buffer[first_offset]
- ; q14 = transposed_buffer[second_offset]
- ; for proper address calculation, the last offset used when manipulating
- ; transposed_buffer must be passed in. use 0 for first use.
- MACRO
- LOAD_FROM_TRANSPOSED $prev_offset, $first_offset, $second_offset
- ; address calculation with proper stride and loading
- add r0, #($first_offset - $prev_offset )*8*2
- vld1.s16 {q14}, [r0]
- add r0, #($second_offset - $first_offset)*8*2
- vld1.s16 {q13}, [r0]
- ; (used) two registers (q14, q13)
- MEND
- ; --------------------------------------------------------------------------
- ; Load from output (used as temporary storage)
- ; reg1 = output[first_offset]
- ; reg2 = output[second_offset]
- ; for proper address calculation, the last offset used when manipulating
- ; output, whether reading or storing) must be passed in. use 0 for first
- ; use.
- MACRO
- LOAD_FROM_OUTPUT $prev_offset, $first_offset, $second_offset, $reg1, $reg2
- ; address calculation with proper stride and loading
- add r1, #($first_offset - $prev_offset )*32*2
- vld1.s16 {$reg1}, [r1]
- add r1, #($second_offset - $first_offset)*32*2
- vld1.s16 {$reg2}, [r1]
- ; (used) two registers ($reg1, $reg2)
- MEND
- ; --------------------------------------------------------------------------
- ; Store into output (sometimes as as temporary storage)
- ; output[first_offset] = reg1
- ; output[second_offset] = reg2
- ; for proper address calculation, the last offset used when manipulating
- ; output, whether reading or storing) must be passed in. use 0 for first
- ; use.
- MACRO
- STORE_IN_OUTPUT $prev_offset, $first_offset, $second_offset, $reg1, $reg2
- ; address calculation with proper stride and storing
- add r1, #($first_offset - $prev_offset )*32*2
- vst1.16 {$reg1}, [r1]
- add r1, #($second_offset - $first_offset)*32*2
- vst1.16 {$reg2}, [r1]
- MEND
- ; --------------------------------------------------------------------------
- ; Combine-add results with current destination content
- ; q6-q9 contain the results (out[j * 32 + 0-31])
- MACRO
- STORE_COMBINE_CENTER_RESULTS
- ; load dest[j * dest_stride + 0-31]
- vld1.s16 {d8}, [r10], r2
- vld1.s16 {d11}, [r9], r11
- vld1.s16 {d9}, [r10]
- vld1.s16 {d10}, [r9]
- ; ROUND_POWER_OF_TWO
- vrshr.s16 q7, q7, #6
- vrshr.s16 q8, q8, #6
- vrshr.s16 q9, q9, #6
- vrshr.s16 q6, q6, #6
- ; add to dest[j * dest_stride + 0-31]
- vaddw.u8 q7, q7, d9
- vaddw.u8 q8, q8, d10
- vaddw.u8 q9, q9, d11
- vaddw.u8 q6, q6, d8
- ; clip pixel
- vqmovun.s16 d9, q7
- vqmovun.s16 d10, q8
- vqmovun.s16 d11, q9
- vqmovun.s16 d8, q6
- ; store back into dest[j * dest_stride + 0-31]
- vst1.16 {d9}, [r10], r11
- vst1.16 {d10}, [r9], r2
- vst1.16 {d8}, [r10]
- vst1.16 {d11}, [r9]
- ; update pointers (by dest_stride * 2)
- sub r9, r9, r2, lsl #1
- add r10, r10, r2, lsl #1
- MEND
- ; --------------------------------------------------------------------------
- ; Combine-add results with current destination content
- ; q6-q9 contain the results (out[j * 32 + 0-31])
- MACRO
- STORE_COMBINE_CENTER_RESULTS_LAST
- ; load dest[j * dest_stride + 0-31]
- vld1.s16 {d8}, [r10], r2
- vld1.s16 {d11}, [r9], r11
- vld1.s16 {d9}, [r10]
- vld1.s16 {d10}, [r9]
- ; ROUND_POWER_OF_TWO
- vrshr.s16 q7, q7, #6
- vrshr.s16 q8, q8, #6
- vrshr.s16 q9, q9, #6
- vrshr.s16 q6, q6, #6
- ; add to dest[j * dest_stride + 0-31]
- vaddw.u8 q7, q7, d9
- vaddw.u8 q8, q8, d10
- vaddw.u8 q9, q9, d11
- vaddw.u8 q6, q6, d8
- ; clip pixel
- vqmovun.s16 d9, q7
- vqmovun.s16 d10, q8
- vqmovun.s16 d11, q9
- vqmovun.s16 d8, q6
- ; store back into dest[j * dest_stride + 0-31]
- vst1.16 {d9}, [r10], r11
- vst1.16 {d10}, [r9], r2
- vst1.16 {d8}, [r10]!
- vst1.16 {d11}, [r9]!
- ; update pointers (by dest_stride * 2)
- sub r9, r9, r2, lsl #1
- add r10, r10, r2, lsl #1
- MEND
- ; --------------------------------------------------------------------------
- ; Combine-add results with current destination content
- ; q4-q7 contain the results (out[j * 32 + 0-31])
- MACRO
- STORE_COMBINE_EXTREME_RESULTS
- ; load dest[j * dest_stride + 0-31]
- vld1.s16 {d4}, [r7], r2
- vld1.s16 {d7}, [r6], r11
- vld1.s16 {d5}, [r7]
- vld1.s16 {d6}, [r6]
- ; ROUND_POWER_OF_TWO
- vrshr.s16 q5, q5, #6
- vrshr.s16 q6, q6, #6
- vrshr.s16 q7, q7, #6
- vrshr.s16 q4, q4, #6
- ; add to dest[j * dest_stride + 0-31]
- vaddw.u8 q5, q5, d5
- vaddw.u8 q6, q6, d6
- vaddw.u8 q7, q7, d7
- vaddw.u8 q4, q4, d4
- ; clip pixel
- vqmovun.s16 d5, q5
- vqmovun.s16 d6, q6
- vqmovun.s16 d7, q7
- vqmovun.s16 d4, q4
- ; store back into dest[j * dest_stride + 0-31]
- vst1.16 {d5}, [r7], r11
- vst1.16 {d6}, [r6], r2
- vst1.16 {d7}, [r6]
- vst1.16 {d4}, [r7]
- ; update pointers (by dest_stride * 2)
- sub r6, r6, r2, lsl #1
- add r7, r7, r2, lsl #1
- MEND
- ; --------------------------------------------------------------------------
- ; Combine-add results with current destination content
- ; q4-q7 contain the results (out[j * 32 + 0-31])
- MACRO
- STORE_COMBINE_EXTREME_RESULTS_LAST
- ; load dest[j * dest_stride + 0-31]
- vld1.s16 {d4}, [r7], r2
- vld1.s16 {d7}, [r6], r11
- vld1.s16 {d5}, [r7]
- vld1.s16 {d6}, [r6]
- ; ROUND_POWER_OF_TWO
- vrshr.s16 q5, q5, #6
- vrshr.s16 q6, q6, #6
- vrshr.s16 q7, q7, #6
- vrshr.s16 q4, q4, #6
- ; add to dest[j * dest_stride + 0-31]
- vaddw.u8 q5, q5, d5
- vaddw.u8 q6, q6, d6
- vaddw.u8 q7, q7, d7
- vaddw.u8 q4, q4, d4
- ; clip pixel
- vqmovun.s16 d5, q5
- vqmovun.s16 d6, q6
- vqmovun.s16 d7, q7
- vqmovun.s16 d4, q4
- ; store back into dest[j * dest_stride + 0-31]
- vst1.16 {d5}, [r7], r11
- vst1.16 {d6}, [r6], r2
- vst1.16 {d7}, [r6]!
- vst1.16 {d4}, [r7]!
- ; update pointers (by dest_stride * 2)
- sub r6, r6, r2, lsl #1
- add r7, r7, r2, lsl #1
- MEND
- ; --------------------------------------------------------------------------
- ; Touches q8-q12, q15 (q13-q14 are preserved)
- ; valid output registers are anything but q8-q11
- MACRO
- DO_BUTTERFLY $regC, $regD, $regA, $regB, $first_constant, $second_constant, $reg1, $reg2, $reg3, $reg4
- ; TODO(cd): have special case to re-use constants when they are similar for
- ; consecutive butterflies
- ; TODO(cd): have special case when both constants are the same, do the
- ; additions/subtractions before the multiplies.
- ; generate the constants
- ; generate scalar constants
- mov r8, #$first_constant & 0xFF00
- mov r12, #$second_constant & 0xFF00
- add r8, #$first_constant & 0x00FF
- add r12, #$second_constant & 0x00FF
- ; generate vector constants
- vdup.16 d30, r8
- vdup.16 d31, r12
- ; (used) two for inputs (regA-regD), one for constants (q15)
- ; do some multiplications (ordered for maximum latency hiding)
- vmull.s16 q8, $regC, d30
- vmull.s16 q10, $regA, d31
- vmull.s16 q9, $regD, d30
- vmull.s16 q11, $regB, d31
- vmull.s16 q12, $regC, d31
- ; (used) five for intermediate (q8-q12), one for constants (q15)
- ; do some addition/subtractions (to get back two register)
- vsub.s32 q8, q8, q10
- vsub.s32 q9, q9, q11
- ; do more multiplications (ordered for maximum latency hiding)
- vmull.s16 q10, $regD, d31
- vmull.s16 q11, $regA, d30
- vmull.s16 q15, $regB, d30
- ; (used) six for intermediate (q8-q12, q15)
- ; do more addition/subtractions
- vadd.s32 q11, q12, q11
- vadd.s32 q10, q10, q15
- ; (used) four for intermediate (q8-q11)
- ; dct_const_round_shift
- vqrshrn.s32 $reg1, q8, #14
- vqrshrn.s32 $reg2, q9, #14
- vqrshrn.s32 $reg3, q11, #14
- vqrshrn.s32 $reg4, q10, #14
- ; (used) two for results, well four d registers
- MEND
- ; --------------------------------------------------------------------------
- ; Touches q8-q12, q15 (q13-q14 are preserved)
- ; valid output registers are anything but q8-q11
- MACRO
- DO_BUTTERFLY_STD $first_constant, $second_constant, $reg1, $reg2, $reg3, $reg4
- DO_BUTTERFLY d28, d29, d26, d27, $first_constant, $second_constant, $reg1, $reg2, $reg3, $reg4
- MEND
- ; --------------------------------------------------------------------------
-
-;void vpx_idct32x32_1024_add_neon(int16_t *input, uint8_t *dest, int dest_stride);
-;
-; r0 int16_t *input,
-; r1 uint8_t *dest,
-; r2 int dest_stride)
-; loop counters
-; r4 bands loop counter
-; r5 pass loop counter
-; r8 transpose loop counter
-; combine-add pointers
-; r6 dest + 31 * dest_stride, descending (30, 29, 28, ...)
-; r7 dest + 0 * dest_stride, ascending (1, 2, 3, ...)
-; r9 dest + 15 * dest_stride, descending (14, 13, 12, ...)
-; r10 dest + 16 * dest_stride, ascending (17, 18, 19, ...)
-
-|vpx_idct32x32_1024_add_neon| PROC
- ; This function does one pass of idct32x32 transform.
- ;
- ; This is done by transposing the input and then doing a 1d transform on
- ; columns. In the first pass, the transposed columns are the original
- ; rows. In the second pass, after the transposition, the colums are the
- ; original columns.
- ; The 1d transform is done by looping over bands of eight columns (the
- ; idct32_bands loop). For each band, the transform input transposition
- ; is done on demand, one band of four 8x8 matrices at a time. The four
- ; matrices are transposed by pairs (the idct32_transpose_pair loop).
- push {r4-r11}
- vpush {d8-d15}
- ; stack operation
- ; internal buffer used to transpose 8 lines into before transforming them
- ; int16_t transpose_buffer[32 * 8];
- ; at sp + [4096, 4607]
- ; results of the first pass (transpose and transform rows)
- ; int16_t pass1[32 * 32];
- ; at sp + [0, 2047]
- ; results of the second pass (transpose and transform columns)
- ; int16_t pass2[32 * 32];
- ; at sp + [2048, 4095]
- sub sp, sp, #512+2048+2048
-
- ; r6 = dest + 31 * dest_stride
- ; r7 = dest + 0 * dest_stride
- ; r9 = dest + 15 * dest_stride
- ; r10 = dest + 16 * dest_stride
- rsb r6, r2, r2, lsl #5
- rsb r9, r2, r2, lsl #4
- add r10, r1, r2, lsl #4
- mov r7, r1
- add r6, r6, r1
- add r9, r9, r1
- ; r11 = -dest_stride
- neg r11, r2
- ; r3 = input
- mov r3, r0
- ; parameters for first pass
- ; r0 = transpose_buffer[32 * 8]
- add r0, sp, #4096
- ; r1 = pass1[32 * 32]
- mov r1, sp
-
- mov r5, #0 ; initialize pass loop counter
-idct32_pass_loop
- mov r4, #4 ; initialize bands loop counter
-idct32_bands_loop
- mov r8, #2 ; initialize transpose loop counter
-idct32_transpose_pair_loop
- ; Load two horizontally consecutive 8x8 16bit data matrices. The first one
- ; into q0-q7 and the second one into q8-q15. There is a stride of 64,
- ; adjusted to 32 because of the two post-increments.
- vld1.s16 {q8}, [r3]!
- vld1.s16 {q0}, [r3]!
- add r3, #32
- vld1.s16 {q9}, [r3]!
- vld1.s16 {q1}, [r3]!
- add r3, #32
- vld1.s16 {q10}, [r3]!
- vld1.s16 {q2}, [r3]!
- add r3, #32
- vld1.s16 {q11}, [r3]!
- vld1.s16 {q3}, [r3]!
- add r3, #32
- vld1.s16 {q12}, [r3]!
- vld1.s16 {q4}, [r3]!
- add r3, #32
- vld1.s16 {q13}, [r3]!
- vld1.s16 {q5}, [r3]!
- add r3, #32
- vld1.s16 {q14}, [r3]!
- vld1.s16 {q6}, [r3]!
- add r3, #32
- vld1.s16 {q15}, [r3]!
- vld1.s16 {q7}, [r3]!
-
- ; Transpose the two 8x8 16bit data matrices.
- vswp d17, d24
- vswp d23, d30
- vswp d21, d28
- vswp d19, d26
- vswp d1, d8
- vswp d7, d14
- vswp d5, d12
- vswp d3, d10
- vtrn.32 q8, q10
- vtrn.32 q9, q11
- vtrn.32 q12, q14
- vtrn.32 q13, q15
- vtrn.32 q0, q2
- vtrn.32 q1, q3
- vtrn.32 q4, q6
- vtrn.32 q5, q7
- vtrn.16 q8, q9
- vtrn.16 q10, q11
- vtrn.16 q12, q13
- vtrn.16 q14, q15
- vtrn.16 q0, q1
- vtrn.16 q2, q3
- vtrn.16 q4, q5
- vtrn.16 q6, q7
-
- ; Store both matrices after each other. There is a stride of 32, which
- ; adjusts to nothing because of the post-increments.
- vst1.16 {q8}, [r0]!
- vst1.16 {q9}, [r0]!
- vst1.16 {q10}, [r0]!
- vst1.16 {q11}, [r0]!
- vst1.16 {q12}, [r0]!
- vst1.16 {q13}, [r0]!
- vst1.16 {q14}, [r0]!
- vst1.16 {q15}, [r0]!
- vst1.16 {q0}, [r0]!
- vst1.16 {q1}, [r0]!
- vst1.16 {q2}, [r0]!
- vst1.16 {q3}, [r0]!
- vst1.16 {q4}, [r0]!
- vst1.16 {q5}, [r0]!
- vst1.16 {q6}, [r0]!
- vst1.16 {q7}, [r0]!
-
- ; increment pointers by adjusted stride (not necessary for r0/out)
- ; go back by 7*32 for the seven lines moved fully by read and add
- ; go back by 32 for the eigth line only read
- ; advance by 16*2 to go the next pair
- sub r3, r3, #7*32*2 + 32 - 16*2
- ; transpose pair loop processing
- subs r8, r8, #1
- bne idct32_transpose_pair_loop
-
- ; restore r0/input to its original value
- sub r0, r0, #32*8*2
-
- ; Instead of doing the transforms stage by stage, it is done by loading
- ; some input values and doing as many stages as possible to minimize the
- ; storing/loading of intermediate results. To fit within registers, the
- ; final coefficients are cut into four blocks:
- ; BLOCK A: 16-19,28-31
- ; BLOCK B: 20-23,24-27
- ; BLOCK C: 8-10,11-15
- ; BLOCK D: 0-3,4-7
- ; Blocks A and C are straight calculation through the various stages. In
- ; block B, further calculations are performed using the results from
- ; block A. In block D, further calculations are performed using the results
- ; from block C and then the final calculations are done using results from
- ; block A and B which have been combined at the end of block B.
-
- ; --------------------------------------------------------------------------
- ; BLOCK A: 16-19,28-31
- ; --------------------------------------------------------------------------
- ; generate 16,17,30,31
- ; --------------------------------------------------------------------------
- ; part of stage 1
- ;temp1 = input[1 * 32] * cospi_31_64 - input[31 * 32] * cospi_1_64;
- ;temp2 = input[1 * 32] * cospi_1_64 + input[31 * 32] * cospi_31_64;
- ;step1b[16][i] = dct_const_round_shift(temp1);
- ;step1b[31][i] = dct_const_round_shift(temp2);
- LOAD_FROM_TRANSPOSED 0, 1, 31
- DO_BUTTERFLY_STD cospi_31_64, cospi_1_64, d0, d1, d4, d5
- ; --------------------------------------------------------------------------
- ; part of stage 1
- ;temp1 = input[17 * 32] * cospi_15_64 - input[15 * 32] * cospi_17_64;
- ;temp2 = input[17 * 32] * cospi_17_64 + input[15 * 32] * cospi_15_64;
- ;step1b[17][i] = dct_const_round_shift(temp1);
- ;step1b[30][i] = dct_const_round_shift(temp2);
- LOAD_FROM_TRANSPOSED 31, 17, 15
- DO_BUTTERFLY_STD cospi_15_64, cospi_17_64, d2, d3, d6, d7
- ; --------------------------------------------------------------------------
- ; part of stage 2
- ;step2[16] = step1b[16][i] + step1b[17][i];
- ;step2[17] = step1b[16][i] - step1b[17][i];
- ;step2[30] = -step1b[30][i] + step1b[31][i];
- ;step2[31] = step1b[30][i] + step1b[31][i];
- vadd.s16 q4, q0, q1
- vsub.s16 q13, q0, q1
- vadd.s16 q6, q2, q3
- vsub.s16 q14, q2, q3
- ; --------------------------------------------------------------------------
- ; part of stage 3
- ;temp1 = step1b[30][i] * cospi_28_64 - step1b[17][i] * cospi_4_64;
- ;temp2 = step1b[30][i] * cospi_4_64 - step1b[17][i] * cospi_28_64;
- ;step3[17] = dct_const_round_shift(temp1);
- ;step3[30] = dct_const_round_shift(temp2);
- DO_BUTTERFLY_STD cospi_28_64, cospi_4_64, d10, d11, d14, d15
- ; --------------------------------------------------------------------------
- ; generate 18,19,28,29
- ; --------------------------------------------------------------------------
- ; part of stage 1
- ;temp1 = input[9 * 32] * cospi_23_64 - input[23 * 32] * cospi_9_64;
- ;temp2 = input[9 * 32] * cospi_9_64 + input[23 * 32] * cospi_23_64;
- ;step1b[18][i] = dct_const_round_shift(temp1);
- ;step1b[29][i] = dct_const_round_shift(temp2);
- LOAD_FROM_TRANSPOSED 15, 9, 23
- DO_BUTTERFLY_STD cospi_23_64, cospi_9_64, d0, d1, d4, d5
- ; --------------------------------------------------------------------------
- ; part of stage 1
- ;temp1 = input[25 * 32] * cospi_7_64 - input[7 * 32] * cospi_25_64;
- ;temp2 = input[25 * 32] * cospi_25_64 + input[7 * 32] * cospi_7_64;
- ;step1b[19][i] = dct_const_round_shift(temp1);
- ;step1b[28][i] = dct_const_round_shift(temp2);
- LOAD_FROM_TRANSPOSED 23, 25, 7
- DO_BUTTERFLY_STD cospi_7_64, cospi_25_64, d2, d3, d6, d7
- ; --------------------------------------------------------------------------
- ; part of stage 2
- ;step2[18] = -step1b[18][i] + step1b[19][i];
- ;step2[19] = step1b[18][i] + step1b[19][i];
- ;step2[28] = step1b[28][i] + step1b[29][i];
- ;step2[29] = step1b[28][i] - step1b[29][i];
- vsub.s16 q13, q3, q2
- vadd.s16 q3, q3, q2
- vsub.s16 q14, q1, q0
- vadd.s16 q2, q1, q0
- ; --------------------------------------------------------------------------
- ; part of stage 3
- ;temp1 = step1b[18][i] * (-cospi_4_64) - step1b[29][i] * (-cospi_28_64);
- ;temp2 = step1b[18][i] * (-cospi_28_64) + step1b[29][i] * (-cospi_4_64);
- ;step3[29] = dct_const_round_shift(temp1);
- ;step3[18] = dct_const_round_shift(temp2);
- DO_BUTTERFLY_STD (-cospi_4_64), (-cospi_28_64), d2, d3, d0, d1
- ; --------------------------------------------------------------------------
- ; combine 16-19,28-31
- ; --------------------------------------------------------------------------
- ; part of stage 4
- ;step1[16] = step1b[16][i] + step1b[19][i];
- ;step1[17] = step1b[17][i] + step1b[18][i];
- ;step1[18] = step1b[17][i] - step1b[18][i];
- ;step1[29] = step1b[30][i] - step1b[29][i];
- ;step1[30] = step1b[30][i] + step1b[29][i];
- ;step1[31] = step1b[31][i] + step1b[28][i];
- vadd.s16 q8, q4, q2
- vadd.s16 q9, q5, q0
- vadd.s16 q10, q7, q1
- vadd.s16 q15, q6, q3
- vsub.s16 q13, q5, q0
- vsub.s16 q14, q7, q1
- STORE_IN_OUTPUT 0, 16, 31, q8, q15
- STORE_IN_OUTPUT 31, 17, 30, q9, q10
- ; --------------------------------------------------------------------------
- ; part of stage 5
- ;temp1 = step1b[29][i] * cospi_24_64 - step1b[18][i] * cospi_8_64;
- ;temp2 = step1b[29][i] * cospi_8_64 + step1b[18][i] * cospi_24_64;
- ;step2[18] = dct_const_round_shift(temp1);
- ;step2[29] = dct_const_round_shift(temp2);
- DO_BUTTERFLY_STD cospi_24_64, cospi_8_64, d0, d1, d2, d3
- STORE_IN_OUTPUT 30, 29, 18, q1, q0
- ; --------------------------------------------------------------------------
- ; part of stage 4
- ;step1[19] = step1b[16][i] - step1b[19][i];
- ;step1[28] = step1b[31][i] - step1b[28][i];
- vsub.s16 q13, q4, q2
- vsub.s16 q14, q6, q3
- ; --------------------------------------------------------------------------
- ; part of stage 5
- ;temp1 = step1b[28][i] * cospi_24_64 - step1b[19][i] * cospi_8_64;
- ;temp2 = step1b[28][i] * cospi_8_64 + step1b[19][i] * cospi_24_64;
- ;step2[19] = dct_const_round_shift(temp1);
- ;step2[28] = dct_const_round_shift(temp2);
- DO_BUTTERFLY_STD cospi_24_64, cospi_8_64, d8, d9, d12, d13
- STORE_IN_OUTPUT 18, 19, 28, q4, q6
- ; --------------------------------------------------------------------------
-
-
- ; --------------------------------------------------------------------------
- ; BLOCK B: 20-23,24-27
- ; --------------------------------------------------------------------------
- ; generate 20,21,26,27
- ; --------------------------------------------------------------------------
- ; part of stage 1
- ;temp1 = input[5 * 32] * cospi_27_64 - input[27 * 32] * cospi_5_64;
- ;temp2 = input[5 * 32] * cospi_5_64 + input[27 * 32] * cospi_27_64;
- ;step1b[20][i] = dct_const_round_shift(temp1);
- ;step1b[27][i] = dct_const_round_shift(temp2);
- LOAD_FROM_TRANSPOSED 7, 5, 27
- DO_BUTTERFLY_STD cospi_27_64, cospi_5_64, d0, d1, d4, d5
- ; --------------------------------------------------------------------------
- ; part of stage 1
- ;temp1 = input[21 * 32] * cospi_11_64 - input[11 * 32] * cospi_21_64;
- ;temp2 = input[21 * 32] * cospi_21_64 + input[11 * 32] * cospi_11_64;
- ;step1b[21][i] = dct_const_round_shift(temp1);
- ;step1b[26][i] = dct_const_round_shift(temp2);
- LOAD_FROM_TRANSPOSED 27, 21, 11
- DO_BUTTERFLY_STD cospi_11_64, cospi_21_64, d2, d3, d6, d7
- ; --------------------------------------------------------------------------
- ; part of stage 2
- ;step2[20] = step1b[20][i] + step1b[21][i];
- ;step2[21] = step1b[20][i] - step1b[21][i];
- ;step2[26] = -step1b[26][i] + step1b[27][i];
- ;step2[27] = step1b[26][i] + step1b[27][i];
- vsub.s16 q13, q0, q1
- vadd.s16 q0, q0, q1
- vsub.s16 q14, q2, q3
- vadd.s16 q2, q2, q3
- ; --------------------------------------------------------------------------
- ; part of stage 3
- ;temp1 = step1b[26][i] * cospi_12_64 - step1b[21][i] * cospi_20_64;
- ;temp2 = step1b[26][i] * cospi_20_64 + step1b[21][i] * cospi_12_64;
- ;step3[21] = dct_const_round_shift(temp1);
- ;step3[26] = dct_const_round_shift(temp2);
- DO_BUTTERFLY_STD cospi_12_64, cospi_20_64, d2, d3, d6, d7
- ; --------------------------------------------------------------------------
- ; generate 22,23,24,25
- ; --------------------------------------------------------------------------
- ; part of stage 1
- ;temp1 = input[13 * 32] * cospi_19_64 - input[19 * 32] * cospi_13_64;
- ;temp2 = input[13 * 32] * cospi_13_64 + input[19 * 32] * cospi_19_64;
- ;step1b[22][i] = dct_const_round_shift(temp1);
- ;step1b[25][i] = dct_const_round_shift(temp2);
- LOAD_FROM_TRANSPOSED 11, 13, 19
- DO_BUTTERFLY_STD cospi_19_64, cospi_13_64, d10, d11, d14, d15
- ; --------------------------------------------------------------------------
- ; part of stage 1
- ;temp1 = input[29 * 32] * cospi_3_64 - input[3 * 32] * cospi_29_64;
- ;temp2 = input[29 * 32] * cospi_29_64 + input[3 * 32] * cospi_3_64;
- ;step1b[23][i] = dct_const_round_shift(temp1);
- ;step1b[24][i] = dct_const_round_shift(temp2);
- LOAD_FROM_TRANSPOSED 19, 29, 3
- DO_BUTTERFLY_STD cospi_3_64, cospi_29_64, d8, d9, d12, d13
- ; --------------------------------------------------------------------------
- ; part of stage 2
- ;step2[22] = -step1b[22][i] + step1b[23][i];
- ;step2[23] = step1b[22][i] + step1b[23][i];
- ;step2[24] = step1b[24][i] + step1b[25][i];
- ;step2[25] = step1b[24][i] - step1b[25][i];
- vsub.s16 q14, q4, q5
- vadd.s16 q5, q4, q5
- vsub.s16 q13, q6, q7
- vadd.s16 q6, q6, q7
- ; --------------------------------------------------------------------------
- ; part of stage 3
- ;temp1 = step1b[22][i] * (-cospi_20_64) - step1b[25][i] * (-cospi_12_64);
- ;temp2 = step1b[22][i] * (-cospi_12_64) + step1b[25][i] * (-cospi_20_64);
- ;step3[25] = dct_const_round_shift(temp1);
- ;step3[22] = dct_const_round_shift(temp2);
- DO_BUTTERFLY_STD (-cospi_20_64), (-cospi_12_64), d8, d9, d14, d15
- ; --------------------------------------------------------------------------
- ; combine 20-23,24-27
- ; --------------------------------------------------------------------------
- ; part of stage 4
- ;step1[22] = step1b[22][i] + step1b[21][i];
- ;step1[23] = step1b[23][i] + step1b[20][i];
- vadd.s16 q10, q7, q1
- vadd.s16 q11, q5, q0
- ;step1[24] = step1b[24][i] + step1b[27][i];
- ;step1[25] = step1b[25][i] + step1b[26][i];
- vadd.s16 q12, q6, q2
- vadd.s16 q15, q4, q3
- ; --------------------------------------------------------------------------
- ; part of stage 6
- ;step3[16] = step1b[16][i] + step1b[23][i];
- ;step3[17] = step1b[17][i] + step1b[22][i];
- ;step3[22] = step1b[17][i] - step1b[22][i];
- ;step3[23] = step1b[16][i] - step1b[23][i];
- LOAD_FROM_OUTPUT 28, 16, 17, q14, q13
- vadd.s16 q8, q14, q11
- vadd.s16 q9, q13, q10
- vsub.s16 q13, q13, q10
- vsub.s16 q11, q14, q11
- STORE_IN_OUTPUT 17, 17, 16, q9, q8
- ; --------------------------------------------------------------------------
- ; part of stage 6
- ;step3[24] = step1b[31][i] - step1b[24][i];
- ;step3[25] = step1b[30][i] - step1b[25][i];
- ;step3[30] = step1b[30][i] + step1b[25][i];
- ;step3[31] = step1b[31][i] + step1b[24][i];
- LOAD_FROM_OUTPUT 16, 30, 31, q14, q9
- vsub.s16 q8, q9, q12
- vadd.s16 q10, q14, q15
- vsub.s16 q14, q14, q15
- vadd.s16 q12, q9, q12
- STORE_IN_OUTPUT 31, 30, 31, q10, q12
- ; --------------------------------------------------------------------------
- ; TODO(cd) do some register allocation change to remove these push/pop
- vpush {q8} ; [24]
- vpush {q11} ; [23]
- ; --------------------------------------------------------------------------
- ; part of stage 7
- ;temp1 = (step1b[25][i] - step1b[22][i]) * cospi_16_64;
- ;temp2 = (step1b[25][i] + step1b[22][i]) * cospi_16_64;
- ;step1[22] = dct_const_round_shift(temp1);
- ;step1[25] = dct_const_round_shift(temp2);
- DO_BUTTERFLY_STD cospi_16_64, cospi_16_64, d26, d27, d28, d29
- STORE_IN_OUTPUT 31, 25, 22, q14, q13
- ; --------------------------------------------------------------------------
- ; part of stage 7
- ;temp1 = (step1b[24][i] - step1b[23][i]) * cospi_16_64;
- ;temp2 = (step1b[24][i] + step1b[23][i]) * cospi_16_64;
- ;step1[23] = dct_const_round_shift(temp1);
- ;step1[24] = dct_const_round_shift(temp2);
- ; TODO(cd) do some register allocation change to remove these push/pop
- vpop {q13} ; [23]
- vpop {q14} ; [24]
- DO_BUTTERFLY_STD cospi_16_64, cospi_16_64, d26, d27, d28, d29
- STORE_IN_OUTPUT 22, 24, 23, q14, q13
- ; --------------------------------------------------------------------------
- ; part of stage 4
- ;step1[20] = step1b[23][i] - step1b[20][i];
- ;step1[27] = step1b[24][i] - step1b[27][i];
- vsub.s16 q14, q5, q0
- vsub.s16 q13, q6, q2
- ; --------------------------------------------------------------------------
- ; part of stage 5
- ;temp1 = step1b[20][i] * (-cospi_8_64) - step1b[27][i] * (-cospi_24_64);
- ;temp2 = step1b[20][i] * (-cospi_24_64) + step1b[27][i] * (-cospi_8_64);
- ;step2[27] = dct_const_round_shift(temp1);
- ;step2[20] = dct_const_round_shift(temp2);
- DO_BUTTERFLY_STD (-cospi_8_64), (-cospi_24_64), d10, d11, d12, d13
- ; --------------------------------------------------------------------------
- ; part of stage 4
- ;step1[21] = step1b[22][i] - step1b[21][i];
- ;step1[26] = step1b[25][i] - step1b[26][i];
- vsub.s16 q14, q7, q1
- vsub.s16 q13, q4, q3
- ; --------------------------------------------------------------------------
- ; part of stage 5
- ;temp1 = step1b[21][i] * (-cospi_8_64) - step1b[26][i] * (-cospi_24_64);
- ;temp2 = step1b[21][i] * (-cospi_24_64) + step1b[26][i] * (-cospi_8_64);
- ;step2[26] = dct_const_round_shift(temp1);
- ;step2[21] = dct_const_round_shift(temp2);
- DO_BUTTERFLY_STD (-cospi_8_64), (-cospi_24_64), d0, d1, d2, d3
- ; --------------------------------------------------------------------------
- ; part of stage 6
- ;step3[18] = step1b[18][i] + step1b[21][i];
- ;step3[19] = step1b[19][i] + step1b[20][i];
- ;step3[20] = step1b[19][i] - step1b[20][i];
- ;step3[21] = step1b[18][i] - step1b[21][i];
- LOAD_FROM_OUTPUT 23, 18, 19, q14, q13
- vadd.s16 q8, q14, q1
- vadd.s16 q9, q13, q6
- vsub.s16 q13, q13, q6
- vsub.s16 q1, q14, q1
- STORE_IN_OUTPUT 19, 18, 19, q8, q9
- ; --------------------------------------------------------------------------
- ; part of stage 6
- ;step3[27] = step1b[28][i] - step1b[27][i];
- ;step3[28] = step1b[28][i] + step1b[27][i];
- ;step3[29] = step1b[29][i] + step1b[26][i];
- ;step3[26] = step1b[29][i] - step1b[26][i];
- LOAD_FROM_OUTPUT 19, 28, 29, q8, q9
- vsub.s16 q14, q8, q5
- vadd.s16 q10, q8, q5
- vadd.s16 q11, q9, q0
- vsub.s16 q0, q9, q0
- STORE_IN_OUTPUT 29, 28, 29, q10, q11
- ; --------------------------------------------------------------------------
- ; part of stage 7
- ;temp1 = (step1b[27][i] - step1b[20][i]) * cospi_16_64;
- ;temp2 = (step1b[27][i] + step1b[20][i]) * cospi_16_64;
- ;step1[20] = dct_const_round_shift(temp1);
- ;step1[27] = dct_const_round_shift(temp2);
- DO_BUTTERFLY_STD cospi_16_64, cospi_16_64, d26, d27, d28, d29
- STORE_IN_OUTPUT 29, 20, 27, q13, q14
- ; --------------------------------------------------------------------------
- ; part of stage 7
- ;temp1 = (step1b[26][i] - step1b[21][i]) * cospi_16_64;
- ;temp2 = (step1b[26][i] + step1b[21][i]) * cospi_16_64;
- ;step1[21] = dct_const_round_shift(temp1);
- ;step1[26] = dct_const_round_shift(temp2);
- DO_BUTTERFLY d0, d1, d2, d3, cospi_16_64, cospi_16_64, d2, d3, d0, d1
- STORE_IN_OUTPUT 27, 21, 26, q1, q0
- ; --------------------------------------------------------------------------
-
-
- ; --------------------------------------------------------------------------
- ; BLOCK C: 8-10,11-15
- ; --------------------------------------------------------------------------
- ; generate 8,9,14,15
- ; --------------------------------------------------------------------------
- ; part of stage 2
- ;temp1 = input[2 * 32] * cospi_30_64 - input[30 * 32] * cospi_2_64;
- ;temp2 = input[2 * 32] * cospi_2_64 + input[30 * 32] * cospi_30_64;
- ;step2[8] = dct_const_round_shift(temp1);
- ;step2[15] = dct_const_round_shift(temp2);
- LOAD_FROM_TRANSPOSED 3, 2, 30
- DO_BUTTERFLY_STD cospi_30_64, cospi_2_64, d0, d1, d4, d5
- ; --------------------------------------------------------------------------
- ; part of stage 2
- ;temp1 = input[18 * 32] * cospi_14_64 - input[14 * 32] * cospi_18_64;
- ;temp2 = input[18 * 32] * cospi_18_64 + input[14 * 32] * cospi_14_64;
- ;step2[9] = dct_const_round_shift(temp1);
- ;step2[14] = dct_const_round_shift(temp2);
- LOAD_FROM_TRANSPOSED 30, 18, 14
- DO_BUTTERFLY_STD cospi_14_64, cospi_18_64, d2, d3, d6, d7
- ; --------------------------------------------------------------------------
- ; part of stage 3
- ;step3[8] = step1b[8][i] + step1b[9][i];
- ;step3[9] = step1b[8][i] - step1b[9][i];
- ;step3[14] = step1b[15][i] - step1b[14][i];
- ;step3[15] = step1b[15][i] + step1b[14][i];
- vsub.s16 q13, q0, q1
- vadd.s16 q0, q0, q1
- vsub.s16 q14, q2, q3
- vadd.s16 q2, q2, q3
- ; --------------------------------------------------------------------------
- ; part of stage 4
- ;temp1 = step1b[14][i] * cospi_24_64 - step1b[9][i] * cospi_8_64;
- ;temp2 = step1b[14][i] * cospi_8_64 + step1b[9][i] * cospi_24_64;
- ;step1[9] = dct_const_round_shift(temp1);
- ;step1[14] = dct_const_round_shift(temp2);
- DO_BUTTERFLY_STD cospi_24_64, cospi_8_64, d2, d3, d6, d7
- ; --------------------------------------------------------------------------
- ; generate 10,11,12,13
- ; --------------------------------------------------------------------------
- ; part of stage 2
- ;temp1 = input[10 * 32] * cospi_22_64 - input[22 * 32] * cospi_10_64;
- ;temp2 = input[10 * 32] * cospi_10_64 + input[22 * 32] * cospi_22_64;
- ;step2[10] = dct_const_round_shift(temp1);
- ;step2[13] = dct_const_round_shift(temp2);
- LOAD_FROM_TRANSPOSED 14, 10, 22
- DO_BUTTERFLY_STD cospi_22_64, cospi_10_64, d10, d11, d14, d15
- ; --------------------------------------------------------------------------
- ; part of stage 2
- ;temp1 = input[26 * 32] * cospi_6_64 - input[6 * 32] * cospi_26_64;
- ;temp2 = input[26 * 32] * cospi_26_64 + input[6 * 32] * cospi_6_64;
- ;step2[11] = dct_const_round_shift(temp1);
- ;step2[12] = dct_const_round_shift(temp2);
- LOAD_FROM_TRANSPOSED 22, 26, 6
- DO_BUTTERFLY_STD cospi_6_64, cospi_26_64, d8, d9, d12, d13
- ; --------------------------------------------------------------------------
- ; part of stage 3
- ;step3[10] = step1b[11][i] - step1b[10][i];
- ;step3[11] = step1b[11][i] + step1b[10][i];
- ;step3[12] = step1b[12][i] + step1b[13][i];
- ;step3[13] = step1b[12][i] - step1b[13][i];
- vsub.s16 q14, q4, q5
- vadd.s16 q5, q4, q5
- vsub.s16 q13, q6, q7
- vadd.s16 q6, q6, q7
- ; --------------------------------------------------------------------------
- ; part of stage 4
- ;temp1 = step1b[10][i] * (-cospi_8_64) - step1b[13][i] * (-cospi_24_64);
- ;temp2 = step1b[10][i] * (-cospi_24_64) + step1b[13][i] * (-cospi_8_64);
- ;step1[13] = dct_const_round_shift(temp1);
- ;step1[10] = dct_const_round_shift(temp2);
- DO_BUTTERFLY_STD (-cospi_8_64), (-cospi_24_64), d8, d9, d14, d15
- ; --------------------------------------------------------------------------
- ; combine 8-10,11-15
- ; --------------------------------------------------------------------------
- ; part of stage 5
- ;step2[8] = step1b[8][i] + step1b[11][i];
- ;step2[9] = step1b[9][i] + step1b[10][i];
- ;step2[10] = step1b[9][i] - step1b[10][i];
- vadd.s16 q8, q0, q5
- vadd.s16 q9, q1, q7
- vsub.s16 q13, q1, q7
- ;step2[13] = step1b[14][i] - step1b[13][i];
- ;step2[14] = step1b[14][i] + step1b[13][i];
- ;step2[15] = step1b[15][i] + step1b[12][i];
- vsub.s16 q14, q3, q4
- vadd.s16 q10, q3, q4
- vadd.s16 q15, q2, q6
- STORE_IN_OUTPUT 26, 8, 15, q8, q15
- STORE_IN_OUTPUT 15, 9, 14, q9, q10
- ; --------------------------------------------------------------------------
- ; part of stage 6
- ;temp1 = (step1b[13][i] - step1b[10][i]) * cospi_16_64;
- ;temp2 = (step1b[13][i] + step1b[10][i]) * cospi_16_64;
- ;step3[10] = dct_const_round_shift(temp1);
- ;step3[13] = dct_const_round_shift(temp2);
- DO_BUTTERFLY_STD cospi_16_64, cospi_16_64, d2, d3, d6, d7
- STORE_IN_OUTPUT 14, 13, 10, q3, q1
- ; --------------------------------------------------------------------------
- ; part of stage 5
- ;step2[11] = step1b[8][i] - step1b[11][i];
- ;step2[12] = step1b[15][i] - step1b[12][i];
- vsub.s16 q13, q0, q5
- vsub.s16 q14, q2, q6
- ; --------------------------------------------------------------------------
- ; part of stage 6
- ;temp1 = (step1b[12][i] - step1b[11][i]) * cospi_16_64;
- ;temp2 = (step1b[12][i] + step1b[11][i]) * cospi_16_64;
- ;step3[11] = dct_const_round_shift(temp1);
- ;step3[12] = dct_const_round_shift(temp2);
- DO_BUTTERFLY_STD cospi_16_64, cospi_16_64, d2, d3, d6, d7
- STORE_IN_OUTPUT 10, 11, 12, q1, q3
- ; --------------------------------------------------------------------------
-
-
- ; --------------------------------------------------------------------------
- ; BLOCK D: 0-3,4-7
- ; --------------------------------------------------------------------------
- ; generate 4,5,6,7
- ; --------------------------------------------------------------------------
- ; part of stage 3
- ;temp1 = input[4 * 32] * cospi_28_64 - input[28 * 32] * cospi_4_64;
- ;temp2 = input[4 * 32] * cospi_4_64 + input[28 * 32] * cospi_28_64;
- ;step3[4] = dct_const_round_shift(temp1);
- ;step3[7] = dct_const_round_shift(temp2);
- LOAD_FROM_TRANSPOSED 6, 4, 28
- DO_BUTTERFLY_STD cospi_28_64, cospi_4_64, d0, d1, d4, d5
- ; --------------------------------------------------------------------------
- ; part of stage 3
- ;temp1 = input[20 * 32] * cospi_12_64 - input[12 * 32] * cospi_20_64;
- ;temp2 = input[20 * 32] * cospi_20_64 + input[12 * 32] * cospi_12_64;
- ;step3[5] = dct_const_round_shift(temp1);
- ;step3[6] = dct_const_round_shift(temp2);
- LOAD_FROM_TRANSPOSED 28, 20, 12
- DO_BUTTERFLY_STD cospi_12_64, cospi_20_64, d2, d3, d6, d7
- ; --------------------------------------------------------------------------
- ; part of stage 4
- ;step1[4] = step1b[4][i] + step1b[5][i];
- ;step1[5] = step1b[4][i] - step1b[5][i];
- ;step1[6] = step1b[7][i] - step1b[6][i];
- ;step1[7] = step1b[7][i] + step1b[6][i];
- vsub.s16 q13, q0, q1
- vadd.s16 q0, q0, q1
- vsub.s16 q14, q2, q3
- vadd.s16 q2, q2, q3
- ; --------------------------------------------------------------------------
- ; part of stage 5
- ;temp1 = (step1b[6][i] - step1b[5][i]) * cospi_16_64;
- ;temp2 = (step1b[5][i] + step1b[6][i]) * cospi_16_64;
- ;step2[5] = dct_const_round_shift(temp1);
- ;step2[6] = dct_const_round_shift(temp2);
- DO_BUTTERFLY_STD cospi_16_64, cospi_16_64, d2, d3, d6, d7
- ; --------------------------------------------------------------------------
- ; generate 0,1,2,3
- ; --------------------------------------------------------------------------
- ; part of stage 4
- ;temp1 = (input[0 * 32] - input[16 * 32]) * cospi_16_64;
- ;temp2 = (input[0 * 32] + input[16 * 32]) * cospi_16_64;
- ;step1[1] = dct_const_round_shift(temp1);
- ;step1[0] = dct_const_round_shift(temp2);
- LOAD_FROM_TRANSPOSED 12, 0, 16
- DO_BUTTERFLY_STD cospi_16_64, cospi_16_64, d10, d11, d14, d15
- ; --------------------------------------------------------------------------
- ; part of stage 4
- ;temp1 = input[8 * 32] * cospi_24_64 - input[24 * 32] * cospi_8_64;
- ;temp2 = input[8 * 32] * cospi_8_64 + input[24 * 32] * cospi_24_64;
- ;step1[2] = dct_const_round_shift(temp1);
- ;step1[3] = dct_const_round_shift(temp2);
- LOAD_FROM_TRANSPOSED 16, 8, 24
- DO_BUTTERFLY_STD cospi_24_64, cospi_8_64, d28, d29, d12, d13
- ; --------------------------------------------------------------------------
- ; part of stage 5
- ;step2[0] = step1b[0][i] + step1b[3][i];
- ;step2[1] = step1b[1][i] + step1b[2][i];
- ;step2[2] = step1b[1][i] - step1b[2][i];
- ;step2[3] = step1b[0][i] - step1b[3][i];
- vadd.s16 q4, q7, q6
- vsub.s16 q7, q7, q6
- vsub.s16 q6, q5, q14
- vadd.s16 q5, q5, q14
- ; --------------------------------------------------------------------------
- ; combine 0-3,4-7
- ; --------------------------------------------------------------------------
- ; part of stage 6
- ;step3[0] = step1b[0][i] + step1b[7][i];
- ;step3[1] = step1b[1][i] + step1b[6][i];
- ;step3[2] = step1b[2][i] + step1b[5][i];
- ;step3[3] = step1b[3][i] + step1b[4][i];
- vadd.s16 q8, q4, q2
- vadd.s16 q9, q5, q3
- vadd.s16 q10, q6, q1
- vadd.s16 q11, q7, q0
- ;step3[4] = step1b[3][i] - step1b[4][i];
- ;step3[5] = step1b[2][i] - step1b[5][i];
- ;step3[6] = step1b[1][i] - step1b[6][i];
- ;step3[7] = step1b[0][i] - step1b[7][i];
- vsub.s16 q12, q7, q0
- vsub.s16 q13, q6, q1
- vsub.s16 q14, q5, q3
- vsub.s16 q15, q4, q2
- ; --------------------------------------------------------------------------
- ; part of stage 7
- ;step1[0] = step1b[0][i] + step1b[15][i];
- ;step1[1] = step1b[1][i] + step1b[14][i];
- ;step1[14] = step1b[1][i] - step1b[14][i];
- ;step1[15] = step1b[0][i] - step1b[15][i];
- LOAD_FROM_OUTPUT 12, 14, 15, q0, q1
- vadd.s16 q2, q8, q1
- vadd.s16 q3, q9, q0
- vsub.s16 q4, q9, q0
- vsub.s16 q5, q8, q1
- ; --------------------------------------------------------------------------
- ; part of final stage
- ;output[14 * 32] = step1b[14][i] + step1b[17][i];
- ;output[15 * 32] = step1b[15][i] + step1b[16][i];
- ;output[16 * 32] = step1b[15][i] - step1b[16][i];
- ;output[17 * 32] = step1b[14][i] - step1b[17][i];
- LOAD_FROM_OUTPUT 15, 16, 17, q0, q1
- vadd.s16 q8, q4, q1
- vadd.s16 q9, q5, q0
- vsub.s16 q6, q5, q0
- vsub.s16 q7, q4, q1
-
- cmp r5, #0
- bgt idct32_bands_end_2nd_pass
-
-idct32_bands_end_1st_pass
- STORE_IN_OUTPUT 17, 16, 17, q6, q7
- STORE_IN_OUTPUT 17, 14, 15, q8, q9
- ; --------------------------------------------------------------------------
- ; part of final stage
- ;output[ 0 * 32] = step1b[0][i] + step1b[31][i];
- ;output[ 1 * 32] = step1b[1][i] + step1b[30][i];
- ;output[30 * 32] = step1b[1][i] - step1b[30][i];
- ;output[31 * 32] = step1b[0][i] - step1b[31][i];
- LOAD_FROM_OUTPUT 15, 30, 31, q0, q1
- vadd.s16 q4, q2, q1
- vadd.s16 q5, q3, q0
- vsub.s16 q6, q3, q0
- vsub.s16 q7, q2, q1
- STORE_IN_OUTPUT 31, 30, 31, q6, q7
- STORE_IN_OUTPUT 31, 0, 1, q4, q5
- ; --------------------------------------------------------------------------
- ; part of stage 7
- ;step1[2] = step1b[2][i] + step1b[13][i];
- ;step1[3] = step1b[3][i] + step1b[12][i];
- ;step1[12] = step1b[3][i] - step1b[12][i];
- ;step1[13] = step1b[2][i] - step1b[13][i];
- LOAD_FROM_OUTPUT 1, 12, 13, q0, q1
- vadd.s16 q2, q10, q1
- vadd.s16 q3, q11, q0
- vsub.s16 q4, q11, q0
- vsub.s16 q5, q10, q1
- ; --------------------------------------------------------------------------
- ; part of final stage
- ;output[12 * 32] = step1b[12][i] + step1b[19][i];
- ;output[13 * 32] = step1b[13][i] + step1b[18][i];
- ;output[18 * 32] = step1b[13][i] - step1b[18][i];
- ;output[19 * 32] = step1b[12][i] - step1b[19][i];
- LOAD_FROM_OUTPUT 13, 18, 19, q0, q1
- vadd.s16 q8, q4, q1
- vadd.s16 q9, q5, q0
- vsub.s16 q6, q5, q0
- vsub.s16 q7, q4, q1
- STORE_IN_OUTPUT 19, 18, 19, q6, q7
- STORE_IN_OUTPUT 19, 12, 13, q8, q9
- ; --------------------------------------------------------------------------
- ; part of final stage
- ;output[ 2 * 32] = step1b[2][i] + step1b[29][i];
- ;output[ 3 * 32] = step1b[3][i] + step1b[28][i];
- ;output[28 * 32] = step1b[3][i] - step1b[28][i];
- ;output[29 * 32] = step1b[2][i] - step1b[29][i];
- LOAD_FROM_OUTPUT 13, 28, 29, q0, q1
- vadd.s16 q4, q2, q1
- vadd.s16 q5, q3, q0
- vsub.s16 q6, q3, q0
- vsub.s16 q7, q2, q1
- STORE_IN_OUTPUT 29, 28, 29, q6, q7
- STORE_IN_OUTPUT 29, 2, 3, q4, q5
- ; --------------------------------------------------------------------------
- ; part of stage 7
- ;step1[4] = step1b[4][i] + step1b[11][i];
- ;step1[5] = step1b[5][i] + step1b[10][i];
- ;step1[10] = step1b[5][i] - step1b[10][i];
- ;step1[11] = step1b[4][i] - step1b[11][i];
- LOAD_FROM_OUTPUT 3, 10, 11, q0, q1
- vadd.s16 q2, q12, q1
- vadd.s16 q3, q13, q0
- vsub.s16 q4, q13, q0
- vsub.s16 q5, q12, q1
- ; --------------------------------------------------------------------------
- ; part of final stage
- ;output[10 * 32] = step1b[10][i] + step1b[21][i];
- ;output[11 * 32] = step1b[11][i] + step1b[20][i];
- ;output[20 * 32] = step1b[11][i] - step1b[20][i];
- ;output[21 * 32] = step1b[10][i] - step1b[21][i];
- LOAD_FROM_OUTPUT 11, 20, 21, q0, q1
- vadd.s16 q8, q4, q1
- vadd.s16 q9, q5, q0
- vsub.s16 q6, q5, q0
- vsub.s16 q7, q4, q1
- STORE_IN_OUTPUT 21, 20, 21, q6, q7
- STORE_IN_OUTPUT 21, 10, 11, q8, q9
- ; --------------------------------------------------------------------------
- ; part of final stage
- ;output[ 4 * 32] = step1b[4][i] + step1b[27][i];
- ;output[ 5 * 32] = step1b[5][i] + step1b[26][i];
- ;output[26 * 32] = step1b[5][i] - step1b[26][i];
- ;output[27 * 32] = step1b[4][i] - step1b[27][i];
- LOAD_FROM_OUTPUT 11, 26, 27, q0, q1
- vadd.s16 q4, q2, q1
- vadd.s16 q5, q3, q0
- vsub.s16 q6, q3, q0
- vsub.s16 q7, q2, q1
- STORE_IN_OUTPUT 27, 26, 27, q6, q7
- STORE_IN_OUTPUT 27, 4, 5, q4, q5
- ; --------------------------------------------------------------------------
- ; part of stage 7
- ;step1[6] = step1b[6][i] + step1b[9][i];
- ;step1[7] = step1b[7][i] + step1b[8][i];
- ;step1[8] = step1b[7][i] - step1b[8][i];
- ;step1[9] = step1b[6][i] - step1b[9][i];
- LOAD_FROM_OUTPUT 5, 8, 9, q0, q1
- vadd.s16 q2, q14, q1
- vadd.s16 q3, q15, q0
- vsub.s16 q4, q15, q0
- vsub.s16 q5, q14, q1
- ; --------------------------------------------------------------------------
- ; part of final stage
- ;output[ 8 * 32] = step1b[8][i] + step1b[23][i];
- ;output[ 9 * 32] = step1b[9][i] + step1b[22][i];
- ;output[22 * 32] = step1b[9][i] - step1b[22][i];
- ;output[23 * 32] = step1b[8][i] - step1b[23][i];
- LOAD_FROM_OUTPUT 9, 22, 23, q0, q1
- vadd.s16 q8, q4, q1
- vadd.s16 q9, q5, q0
- vsub.s16 q6, q5, q0
- vsub.s16 q7, q4, q1
- STORE_IN_OUTPUT 23, 22, 23, q6, q7
- STORE_IN_OUTPUT 23, 8, 9, q8, q9
- ; --------------------------------------------------------------------------
- ; part of final stage
- ;output[ 6 * 32] = step1b[6][i] + step1b[25][i];
- ;output[ 7 * 32] = step1b[7][i] + step1b[24][i];
- ;output[24 * 32] = step1b[7][i] - step1b[24][i];
- ;output[25 * 32] = step1b[6][i] - step1b[25][i];
- LOAD_FROM_OUTPUT 9, 24, 25, q0, q1
- vadd.s16 q4, q2, q1
- vadd.s16 q5, q3, q0
- vsub.s16 q6, q3, q0
- vsub.s16 q7, q2, q1
- STORE_IN_OUTPUT 25, 24, 25, q6, q7
- STORE_IN_OUTPUT 25, 6, 7, q4, q5
-
- ; restore r0 by removing the last offset from the last
- ; operation (LOAD_FROM_TRANSPOSED 16, 8, 24) => 24*8*2
- sub r0, r0, #24*8*2
- ; restore r1 by removing the last offset from the last
- ; operation (STORE_IN_OUTPUT 24, 6, 7) => 7*32*2
- ; advance by 8 columns => 8*2
- sub r1, r1, #7*32*2 - 8*2
- ; advance by 8 lines (8*32*2)
- ; go back by the two pairs from the loop (32*2)
- add r3, r3, #8*32*2 - 32*2
-
- ; bands loop processing
- subs r4, r4, #1
- bne idct32_bands_loop
-
- ; parameters for second pass
- ; the input of pass2 is the result of pass1. we have to remove the offset
- ; of 32 columns induced by the above idct32_bands_loop
- sub r3, r1, #32*2
- ; r1 = pass2[32 * 32]
- add r1, sp, #2048
-
- ; pass loop processing
- add r5, r5, #1
- b idct32_pass_loop
-
-idct32_bands_end_2nd_pass
- STORE_COMBINE_CENTER_RESULTS
- ; --------------------------------------------------------------------------
- ; part of final stage
- ;output[ 0 * 32] = step1b[0][i] + step1b[31][i];
- ;output[ 1 * 32] = step1b[1][i] + step1b[30][i];
- ;output[30 * 32] = step1b[1][i] - step1b[30][i];
- ;output[31 * 32] = step1b[0][i] - step1b[31][i];
- LOAD_FROM_OUTPUT 17, 30, 31, q0, q1
- vadd.s16 q4, q2, q1
- vadd.s16 q5, q3, q0
- vsub.s16 q6, q3, q0
- vsub.s16 q7, q2, q1
- STORE_COMBINE_EXTREME_RESULTS
- ; --------------------------------------------------------------------------
- ; part of stage 7
- ;step1[2] = step1b[2][i] + step1b[13][i];
- ;step1[3] = step1b[3][i] + step1b[12][i];
- ;step1[12] = step1b[3][i] - step1b[12][i];
- ;step1[13] = step1b[2][i] - step1b[13][i];
- LOAD_FROM_OUTPUT 31, 12, 13, q0, q1
- vadd.s16 q2, q10, q1
- vadd.s16 q3, q11, q0
- vsub.s16 q4, q11, q0
- vsub.s16 q5, q10, q1
- ; --------------------------------------------------------------------------
- ; part of final stage
- ;output[12 * 32] = step1b[12][i] + step1b[19][i];
- ;output[13 * 32] = step1b[13][i] + step1b[18][i];
- ;output[18 * 32] = step1b[13][i] - step1b[18][i];
- ;output[19 * 32] = step1b[12][i] - step1b[19][i];
- LOAD_FROM_OUTPUT 13, 18, 19, q0, q1
- vadd.s16 q8, q4, q1
- vadd.s16 q9, q5, q0
- vsub.s16 q6, q5, q0
- vsub.s16 q7, q4, q1
- STORE_COMBINE_CENTER_RESULTS
- ; --------------------------------------------------------------------------
- ; part of final stage
- ;output[ 2 * 32] = step1b[2][i] + step1b[29][i];
- ;output[ 3 * 32] = step1b[3][i] + step1b[28][i];
- ;output[28 * 32] = step1b[3][i] - step1b[28][i];
- ;output[29 * 32] = step1b[2][i] - step1b[29][i];
- LOAD_FROM_OUTPUT 19, 28, 29, q0, q1
- vadd.s16 q4, q2, q1
- vadd.s16 q5, q3, q0
- vsub.s16 q6, q3, q0
- vsub.s16 q7, q2, q1
- STORE_COMBINE_EXTREME_RESULTS
- ; --------------------------------------------------------------------------
- ; part of stage 7
- ;step1[4] = step1b[4][i] + step1b[11][i];
- ;step1[5] = step1b[5][i] + step1b[10][i];
- ;step1[10] = step1b[5][i] - step1b[10][i];
- ;step1[11] = step1b[4][i] - step1b[11][i];
- LOAD_FROM_OUTPUT 29, 10, 11, q0, q1
- vadd.s16 q2, q12, q1
- vadd.s16 q3, q13, q0
- vsub.s16 q4, q13, q0
- vsub.s16 q5, q12, q1
- ; --------------------------------------------------------------------------
- ; part of final stage
- ;output[10 * 32] = step1b[10][i] + step1b[21][i];
- ;output[11 * 32] = step1b[11][i] + step1b[20][i];
- ;output[20 * 32] = step1b[11][i] - step1b[20][i];
- ;output[21 * 32] = step1b[10][i] - step1b[21][i];
- LOAD_FROM_OUTPUT 11, 20, 21, q0, q1
- vadd.s16 q8, q4, q1
- vadd.s16 q9, q5, q0
- vsub.s16 q6, q5, q0
- vsub.s16 q7, q4, q1
- STORE_COMBINE_CENTER_RESULTS
- ; --------------------------------------------------------------------------
- ; part of final stage
- ;output[ 4 * 32] = step1b[4][i] + step1b[27][i];
- ;output[ 5 * 32] = step1b[5][i] + step1b[26][i];
- ;output[26 * 32] = step1b[5][i] - step1b[26][i];
- ;output[27 * 32] = step1b[4][i] - step1b[27][i];
- LOAD_FROM_OUTPUT 21, 26, 27, q0, q1
- vadd.s16 q4, q2, q1
- vadd.s16 q5, q3, q0
- vsub.s16 q6, q3, q0
- vsub.s16 q7, q2, q1
- STORE_COMBINE_EXTREME_RESULTS
- ; --------------------------------------------------------------------------
- ; part of stage 7
- ;step1[6] = step1b[6][i] + step1b[9][i];
- ;step1[7] = step1b[7][i] + step1b[8][i];
- ;step1[8] = step1b[7][i] - step1b[8][i];
- ;step1[9] = step1b[6][i] - step1b[9][i];
- LOAD_FROM_OUTPUT 27, 8, 9, q0, q1
- vadd.s16 q2, q14, q1
- vadd.s16 q3, q15, q0
- vsub.s16 q4, q15, q0
- vsub.s16 q5, q14, q1
- ; --------------------------------------------------------------------------
- ; part of final stage
- ;output[ 8 * 32] = step1b[8][i] + step1b[23][i];
- ;output[ 9 * 32] = step1b[9][i] + step1b[22][i];
- ;output[22 * 32] = step1b[9][i] - step1b[22][i];
- ;output[23 * 32] = step1b[8][i] - step1b[23][i];
- LOAD_FROM_OUTPUT 9, 22, 23, q0, q1
- vadd.s16 q8, q4, q1
- vadd.s16 q9, q5, q0
- vsub.s16 q6, q5, q0
- vsub.s16 q7, q4, q1
- STORE_COMBINE_CENTER_RESULTS_LAST
- ; --------------------------------------------------------------------------
- ; part of final stage
- ;output[ 6 * 32] = step1b[6][i] + step1b[25][i];
- ;output[ 7 * 32] = step1b[7][i] + step1b[24][i];
- ;output[24 * 32] = step1b[7][i] - step1b[24][i];
- ;output[25 * 32] = step1b[6][i] - step1b[25][i];
- LOAD_FROM_OUTPUT 23, 24, 25, q0, q1
- vadd.s16 q4, q2, q1
- vadd.s16 q5, q3, q0
- vsub.s16 q6, q3, q0
- vsub.s16 q7, q2, q1
- STORE_COMBINE_EXTREME_RESULTS_LAST
- ; --------------------------------------------------------------------------
- ; restore pointers to their initial indices for next band pass by
- ; removing/adding dest_stride * 8. The actual increment by eight
- ; is taken care of within the _LAST macros.
- add r6, r6, r2, lsl #3
- add r9, r9, r2, lsl #3
- sub r7, r7, r2, lsl #3
- sub r10, r10, r2, lsl #3
-
- ; restore r0 by removing the last offset from the last
- ; operation (LOAD_FROM_TRANSPOSED 16, 8, 24) => 24*8*2
- sub r0, r0, #24*8*2
- ; restore r1 by removing the last offset from the last
- ; operation (LOAD_FROM_OUTPUT 23, 24, 25) => 25*32*2
- ; advance by 8 columns => 8*2
- sub r1, r1, #25*32*2 - 8*2
- ; advance by 8 lines (8*32*2)
- ; go back by the two pairs from the loop (32*2)
- add r3, r3, #8*32*2 - 32*2
-
- ; bands loop processing
- subs r4, r4, #1
- bne idct32_bands_loop
-
- ; stack operation
- add sp, sp, #512+2048+2048
- vpop {d8-d15}
- pop {r4-r11}
- bx lr
- ENDP ; |vpx_idct32x32_1024_add_neon|
- END
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct4x4_1_add_neon.asm b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct4x4_1_add_neon.asm
index adab715dde5..cbfab361af8 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct4x4_1_add_neon.asm
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct4x4_1_add_neon.asm
@@ -25,9 +25,8 @@
|vpx_idct4x4_1_add_neon| PROC
ldrsh r0, [r0]
- ; generate cospi_16_64 = 11585
- mov r12, #0x2d00
- add r12, #0x41
+ ; cospi_16_64 = 11585
+ movw r12, #0x2d41
; out = dct_const_round_shift(input[0] * cospi_16_64)
mul r0, r0, r12 ; input[0] * cospi_16_64
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct4x4_1_add_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct4x4_1_add_neon.c
index b37cb51a1a7..525aac05a84 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct4x4_1_add_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct4x4_1_add_neon.c
@@ -21,7 +21,7 @@ void vpx_idct4x4_1_add_neon(const tran_low_t *input, uint8_t *dest,
uint16x8_t q8u16;
int16x8_t q0s16;
uint8_t *d1, *d2;
- int16_t i, a1, cospi_16_64 = 11585;
+ int16_t i, a1;
int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
out = dct_const_round_shift(out * cospi_16_64);
a1 = ROUND_POWER_OF_TWO(out, 4);
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct4x4_add_neon.asm b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct4x4_add_neon.asm
index 877fbd63435..bd4e86ded25 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct4x4_add_neon.asm
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct4x4_add_neon.asm
@@ -15,6 +15,8 @@
AREA ||.text||, CODE, READONLY, ALIGN=2
+ INCLUDE vpx_dsp/arm/idct_neon.asm.S
+
AREA Block, CODE, READONLY ; name this block of code
;void vpx_idct4x4_16_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
;
@@ -33,18 +35,15 @@
; So, two passes of a transpose followed by a column transform.
; load the inputs into q8-q9, d16-d19
- vld1.s16 {q8,q9}, [r0]!
+ LOAD_TRAN_LOW_TO_S16 d16, d17, d18, d19, r0
; generate scalar constants
- ; cospi_8_64 = 15137 = 0x3b21
- mov r0, #0x3b00
- add r0, #0x21
- ; cospi_16_64 = 11585 = 0x2d41
- mov r3, #0x2d00
- add r3, #0x41
- ; cospi_24_64 = 6270 = 0x 187e
- mov r12, #0x1800
- add r12, #0x7e
+ ; cospi_8_64 = 15137
+ movw r0, #0x3b21
+ ; cospi_16_64 = 11585
+ movw r3, #0x2d41
+ ; cospi_24_64 = 6270
+ movw r12, #0x187e
; transpose the input data
; 00 01 02 03 d16
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct4x4_add_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct4x4_add_neon.c
index 1caa456987d..8f669c90765 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct4x4_add_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct4x4_add_neon.c
@@ -11,6 +11,8 @@
#include <arm_neon.h>
#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/arm/idct_neon.h"
+#include "vpx_dsp/txfm_common.h"
void vpx_idct4x4_16_add_neon(const tran_low_t *input, uint8_t *dest,
int dest_stride) {
@@ -24,14 +26,11 @@ void vpx_idct4x4_16_add_neon(const tran_low_t *input, uint8_t *dest,
int16x4x2_t d0x2s16, d1x2s16;
int32x4x2_t q0x2s32;
uint8_t *d;
- int16_t cospi_8_64 = 15137;
- int16_t cospi_16_64 = 11585;
- int16_t cospi_24_64 = 6270;
d26u32 = d27u32 = vdup_n_u32(0);
- q8s16 = vld1q_s16(input);
- q9s16 = vld1q_s16(input + 8);
+ q8s16 = load_tran_low_to_s16(input);
+ q9s16 = load_tran_low_to_s16(input + 8);
d16s16 = vget_low_s16(q8s16);
d17s16 = vget_high_s16(q8s16);
@@ -43,8 +42,8 @@ void vpx_idct4x4_16_add_neon(const tran_low_t *input, uint8_t *dest,
q8s16 = vcombine_s16(d0x2s16.val[0], d0x2s16.val[1]);
q9s16 = vcombine_s16(d1x2s16.val[0], d1x2s16.val[1]);
- d20s16 = vdup_n_s16(cospi_8_64);
- d21s16 = vdup_n_s16(cospi_16_64);
+ d20s16 = vdup_n_s16((int16_t)cospi_8_64);
+ d21s16 = vdup_n_s16((int16_t)cospi_16_64);
q0x2s32 =
vtrnq_s32(vreinterpretq_s32_s16(q8s16), vreinterpretq_s32_s16(q9s16));
@@ -53,7 +52,7 @@ void vpx_idct4x4_16_add_neon(const tran_low_t *input, uint8_t *dest,
d18s16 = vget_low_s16(vreinterpretq_s16_s32(q0x2s32.val[1]));
d19s16 = vget_high_s16(vreinterpretq_s16_s32(q0x2s32.val[1]));
- d22s16 = vdup_n_s16(cospi_24_64);
+ d22s16 = vdup_n_s16((int16_t)cospi_24_64);
// stage 1
d23s16 = vadd_s16(d16s16, d18s16);
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct8x8_1_add_neon.asm b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct8x8_1_add_neon.asm
index dbbff364f37..e4531c6e97f 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct8x8_1_add_neon.asm
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct8x8_1_add_neon.asm
@@ -25,9 +25,8 @@
|vpx_idct8x8_1_add_neon| PROC
ldrsh r0, [r0]
- ; generate cospi_16_64 = 11585
- mov r12, #0x2d00
- add r12, #0x41
+ ; cospi_16_64 = 11585
+ movw r12, #0x2d41
; out = dct_const_round_shift(input[0] * cospi_16_64)
mul r0, r0, r12 ; input[0] * cospi_16_64
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct8x8_1_add_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct8x8_1_add_neon.c
index df557de8187..eee41e6c6b1 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct8x8_1_add_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct8x8_1_add_neon.c
@@ -21,7 +21,7 @@ void vpx_idct8x8_1_add_neon(const tran_low_t *input, uint8_t *dest,
uint16x8_t q0u16, q9u16, q10u16, q11u16, q12u16;
int16x8_t q0s16;
uint8_t *d1, *d2;
- int16_t i, a1, cospi_16_64 = 11585;
+ int16_t i, a1;
int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
out = dct_const_round_shift(out * cospi_16_64);
a1 = ROUND_POWER_OF_TWO(out, 5);
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct8x8_add_neon.asm b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct8x8_add_neon.asm
index 6ab59b41b74..a5c9c927d67 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct8x8_add_neon.asm
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct8x8_add_neon.asm
@@ -16,6 +16,8 @@
AREA ||.text||, CODE, READONLY, ALIGN=2
+ INCLUDE vpx_dsp/arm/idct_neon.asm.S
+
; Parallel 1D IDCT on all the columns of a 8x8 16bit data matrix which are
; loaded in q8-q15. The output will be stored back into q8-q15 registers.
; This macro will touch q0-q7 registers and use them as buffer during
@@ -207,41 +209,34 @@
|vpx_idct8x8_64_add_neon| PROC
push {r4-r9}
vpush {d8-d15}
- vld1.s16 {q8,q9}, [r0]!
- vld1.s16 {q10,q11}, [r0]!
- vld1.s16 {q12,q13}, [r0]!
- vld1.s16 {q14,q15}, [r0]!
+ LOAD_TRAN_LOW_TO_S16 d16, d17, d18, d19, r0
+ LOAD_TRAN_LOW_TO_S16 d20, d21, d22, d23, r0
+ LOAD_TRAN_LOW_TO_S16 d24, d25, d26, d27, r0
+ LOAD_TRAN_LOW_TO_S16 d28, d29, d30, d31, r0
; transpose the input data
TRANSPOSE8X8
- ; generate cospi_28_64 = 3196
- mov r3, #0x0c00
- add r3, #0x7c
+ ; cospi_28_64 = 3196
+ movw r3, #0x0c7c
- ; generate cospi_4_64 = 16069
- mov r4, #0x3e00
- add r4, #0xc5
+ ; cospi_4_64 = 16069
+ movw r4, #0x3ec5
- ; generate cospi_12_64 = 13623
- mov r5, #0x3500
- add r5, #0x37
+ ; cospi_12_64 = 13623
+ movw r5, #0x3537
- ; generate cospi_20_64 = 9102
- mov r6, #0x2300
- add r6, #0x8e
+ ; cospi_20_64 = 9102
+ movw r6, #0x238e
- ; generate cospi_16_64 = 11585
- mov r7, #0x2d00
- add r7, #0x41
+ ; cospi_16_64 = 11585
+ movw r7, #0x2d41
- ; generate cospi_24_64 = 6270
- mov r8, #0x1800
- add r8, #0x7e
+ ; cospi_24_64 = 6270
+ movw r8, #0x187e
- ; generate cospi_8_64 = 15137
- mov r9, #0x3b00
- add r9, #0x21
+ ; cospi_8_64 = 15137
+ movw r9, #0x3b21
; First transform rows
IDCT8x8_1D
@@ -319,41 +314,34 @@
|vpx_idct8x8_12_add_neon| PROC
push {r4-r9}
vpush {d8-d15}
- vld1.s16 {q8,q9}, [r0]!
- vld1.s16 {q10,q11}, [r0]!
- vld1.s16 {q12,q13}, [r0]!
- vld1.s16 {q14,q15}, [r0]!
+ LOAD_TRAN_LOW_TO_S16 d16, d17, d18, d19, r0
+ LOAD_TRAN_LOW_TO_S16 d20, d21, d22, d23, r0
+ LOAD_TRAN_LOW_TO_S16 d24, d25, d26, d27, r0
+ LOAD_TRAN_LOW_TO_S16 d28, d29, d30, d31, r0
; transpose the input data
TRANSPOSE8X8
- ; generate cospi_28_64 = 3196
- mov r3, #0x0c00
- add r3, #0x7c
+ ; cospi_28_64 = 3196
+ movw r3, #0x0c7c
- ; generate cospi_4_64 = 16069
- mov r4, #0x3e00
- add r4, #0xc5
+ ; cospi_4_64 = 16069
+ movw r4, #0x3ec5
- ; generate cospi_12_64 = 13623
- mov r5, #0x3500
- add r5, #0x37
+ ; cospi_12_64 = 13623
+ movw r5, #0x3537
- ; generate cospi_20_64 = 9102
- mov r6, #0x2300
- add r6, #0x8e
+ ; cospi_20_64 = 9102
+ movw r6, #0x238e
- ; generate cospi_16_64 = 11585
- mov r7, #0x2d00
- add r7, #0x41
+ ; cospi_16_64 = 11585
+ movw r7, #0x2d41
- ; generate cospi_24_64 = 6270
- mov r8, #0x1800
- add r8, #0x7e
+ ; cospi_24_64 = 6270
+ movw r8, #0x187e
- ; generate cospi_8_64 = 15137
- mov r9, #0x3b00
- add r9, #0x21
+ ; cospi_8_64 = 15137
+ movw r9, #0x3b21
; First transform rows
; stage 1
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct8x8_add_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct8x8_add_neon.c
index 7d65612417c..159a6ec9891 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct8x8_add_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct8x8_add_neon.c
@@ -12,6 +12,7 @@
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
+#include "vpx_dsp/arm/idct_neon.h"
#include "vpx_dsp/arm/transpose_neon.h"
#include "vpx_dsp/txfm_common.h"
@@ -27,10 +28,10 @@ static INLINE void IDCT8x8_1D(int16x8_t *q8s16, int16x8_t *q9s16,
int32x4_t q2s32, q3s32, q5s32, q6s32, q8s32, q9s32;
int32x4_t q10s32, q11s32, q12s32, q13s32, q15s32;
- d0s16 = vdup_n_s16(cospi_28_64);
- d1s16 = vdup_n_s16(cospi_4_64);
- d2s16 = vdup_n_s16(cospi_12_64);
- d3s16 = vdup_n_s16(cospi_20_64);
+ d0s16 = vdup_n_s16((int16_t)cospi_28_64);
+ d1s16 = vdup_n_s16((int16_t)cospi_4_64);
+ d2s16 = vdup_n_s16((int16_t)cospi_12_64);
+ d3s16 = vdup_n_s16((int16_t)cospi_20_64);
d16s16 = vget_low_s16(*q8s16);
d17s16 = vget_high_s16(*q8s16);
@@ -83,7 +84,7 @@ static INLINE void IDCT8x8_1D(int16x8_t *q8s16, int16x8_t *q9s16,
q6s16 = vcombine_s16(d12s16, d13s16);
q7s16 = vcombine_s16(d14s16, d15s16);
- d0s16 = vdup_n_s16(cospi_16_64);
+ d0s16 = vdup_n_s16((int16_t)cospi_16_64);
q2s32 = vmull_s16(d16s16, d0s16);
q3s32 = vmull_s16(d17s16, d0s16);
@@ -95,8 +96,8 @@ static INLINE void IDCT8x8_1D(int16x8_t *q8s16, int16x8_t *q9s16,
q13s32 = vmlsl_s16(q13s32, d24s16, d0s16);
q15s32 = vmlsl_s16(q15s32, d25s16, d0s16);
- d0s16 = vdup_n_s16(cospi_24_64);
- d1s16 = vdup_n_s16(cospi_8_64);
+ d0s16 = vdup_n_s16((int16_t)cospi_24_64);
+ d1s16 = vdup_n_s16((int16_t)cospi_8_64);
d18s16 = vqrshrn_n_s32(q2s32, 14);
d19s16 = vqrshrn_n_s32(q3s32, 14);
@@ -136,7 +137,7 @@ static INLINE void IDCT8x8_1D(int16x8_t *q8s16, int16x8_t *q9s16,
d28s16 = vget_low_s16(*q14s16);
d29s16 = vget_high_s16(*q14s16);
- d16s16 = vdup_n_s16(cospi_16_64);
+ d16s16 = vdup_n_s16((int16_t)cospi_16_64);
q9s32 = vmull_s16(d28s16, d16s16);
q10s32 = vmull_s16(d29s16, d16s16);
@@ -173,14 +174,14 @@ void vpx_idct8x8_64_add_neon(const tran_low_t *input, uint8_t *dest,
int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16;
uint16x8_t q8u16, q9u16, q10u16, q11u16;
- q8s16 = vld1q_s16(input);
- q9s16 = vld1q_s16(input + 8);
- q10s16 = vld1q_s16(input + 16);
- q11s16 = vld1q_s16(input + 24);
- q12s16 = vld1q_s16(input + 32);
- q13s16 = vld1q_s16(input + 40);
- q14s16 = vld1q_s16(input + 48);
- q15s16 = vld1q_s16(input + 56);
+ q8s16 = load_tran_low_to_s16(input);
+ q9s16 = load_tran_low_to_s16(input + 8);
+ q10s16 = load_tran_low_to_s16(input + 16);
+ q11s16 = load_tran_low_to_s16(input + 24);
+ q12s16 = load_tran_low_to_s16(input + 32);
+ q13s16 = load_tran_low_to_s16(input + 40);
+ q14s16 = load_tran_low_to_s16(input + 48);
+ q15s16 = load_tran_low_to_s16(input + 56);
transpose_s16_8x8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
&q15s16);
@@ -279,43 +280,43 @@ void vpx_idct8x8_12_add_neon(const tran_low_t *input, uint8_t *dest,
uint16x8_t q8u16, q9u16, q10u16, q11u16;
int32x4_t q9s32, q10s32, q11s32, q12s32;
- q8s16 = vld1q_s16(input);
- q9s16 = vld1q_s16(input + 8);
- q10s16 = vld1q_s16(input + 16);
- q11s16 = vld1q_s16(input + 24);
- q12s16 = vld1q_s16(input + 32);
- q13s16 = vld1q_s16(input + 40);
- q14s16 = vld1q_s16(input + 48);
- q15s16 = vld1q_s16(input + 56);
+ q8s16 = load_tran_low_to_s16(input);
+ q9s16 = load_tran_low_to_s16(input + 8);
+ q10s16 = load_tran_low_to_s16(input + 16);
+ q11s16 = load_tran_low_to_s16(input + 24);
+ q12s16 = load_tran_low_to_s16(input + 32);
+ q13s16 = load_tran_low_to_s16(input + 40);
+ q14s16 = load_tran_low_to_s16(input + 48);
+ q15s16 = load_tran_low_to_s16(input + 56);
transpose_s16_8x8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
&q15s16);
// First transform rows
// stage 1
- q0s16 = vdupq_n_s16(cospi_28_64 * 2);
- q1s16 = vdupq_n_s16(cospi_4_64 * 2);
+ q0s16 = vdupq_n_s16((int16_t)cospi_28_64 * 2);
+ q1s16 = vdupq_n_s16((int16_t)cospi_4_64 * 2);
q4s16 = vqrdmulhq_s16(q9s16, q0s16);
- q0s16 = vdupq_n_s16(-cospi_20_64 * 2);
+ q0s16 = vdupq_n_s16(-(int16_t)cospi_20_64 * 2);
q7s16 = vqrdmulhq_s16(q9s16, q1s16);
- q1s16 = vdupq_n_s16(cospi_12_64 * 2);
+ q1s16 = vdupq_n_s16((int16_t)cospi_12_64 * 2);
q5s16 = vqrdmulhq_s16(q11s16, q0s16);
- q0s16 = vdupq_n_s16(cospi_16_64 * 2);
+ q0s16 = vdupq_n_s16((int16_t)cospi_16_64 * 2);
q6s16 = vqrdmulhq_s16(q11s16, q1s16);
// stage 2 & stage 3 - even half
- q1s16 = vdupq_n_s16(cospi_24_64 * 2);
+ q1s16 = vdupq_n_s16((int16_t)cospi_24_64 * 2);
q9s16 = vqrdmulhq_s16(q8s16, q0s16);
- q0s16 = vdupq_n_s16(cospi_8_64 * 2);
+ q0s16 = vdupq_n_s16((int16_t)cospi_8_64 * 2);
q13s16 = vqrdmulhq_s16(q10s16, q1s16);
@@ -337,7 +338,7 @@ void vpx_idct8x8_12_add_neon(const tran_low_t *input, uint8_t *dest,
d28s16 = vget_low_s16(q14s16);
d29s16 = vget_high_s16(q14s16);
- d16s16 = vdup_n_s16(cospi_16_64);
+ d16s16 = vdup_n_s16((int16_t)cospi_16_64);
q9s32 = vmull_s16(d28s16, d16s16);
q10s32 = vmull_s16(d29s16, d16s16);
q11s32 = vmull_s16(d28s16, d16s16);
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct_neon.asm b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct_neon.asm
new file mode 100644
index 00000000000..f39e8ddd4b4
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct_neon.asm
@@ -0,0 +1,30 @@
+;
+; Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+;
+; Use of this source code is governed by a BSD-style license
+; that can be found in the LICENSE file in the root of the source
+; tree. An additional intellectual property rights grant can be found
+; in the file PATENTS. All contributing project authors may
+; be found in the AUTHORS file in the root of the source tree.
+;
+
+ INCLUDE ./vpx_config.asm
+
+ ; Helper function used to load tran_low_t into int16, narrowing if
+ ; necessary.
+ ; $dst0..3 are d registers with the pairs assumed to be contiguous in
+ ; non-high-bitdepth builds. q0-q3 are used as temporaries in high-bitdepth.
+ MACRO
+ LOAD_TRAN_LOW_TO_S16 $dst0, $dst1, $dst2, $dst3, $src
+ IF CONFIG_VP9_HIGHBITDEPTH
+ vld1.s32 {q0,q1}, [$src]!
+ vld1.s32 {q2,q3}, [$src]!
+ vmovn.i32 $dst0, q0
+ vmovn.i32 $dst1, q1
+ vmovn.i32 $dst2, q2
+ vmovn.i32 $dst3, q3
+ ELSE
+ vld1.s16 {$dst0-$dst1,$dst2-$dst3}, [$src]!
+ ENDIF
+ MEND
+ END
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct_neon.h b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct_neon.h
new file mode 100644
index 00000000000..5c2a53c034f
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/idct_neon.h
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_DSP_ARM_IDCT_NEON_H_
+#define VPX_DSP_ARM_IDCT_NEON_H_
+
+#include <arm_neon.h>
+
+#include "./vpx_config.h"
+#include "vpx_dsp/arm/transpose_neon.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+
+//------------------------------------------------------------------------------
+
+// Helper function used to load tran_low_t into int16, narrowing if necessary.
+static INLINE int16x8_t load_tran_low_to_s16(const tran_low_t *buf) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ const int32x4_t v0 = vld1q_s32(buf);
+ const int32x4_t v1 = vld1q_s32(buf + 4);
+ const int16x4_t s0 = vmovn_s32(v0);
+ const int16x4_t s1 = vmovn_s32(v1);
+ return vcombine_s16(s0, s1);
+#else
+ return vld1q_s16(buf);
+#endif
+}
+
+// Multiply a by a_const. Saturate, shift and narrow by 14.
+static INLINE int16x8_t multiply_shift_and_narrow_s16(const int16x8_t a,
+ const int16_t a_const) {
+ // Shift by 14 + rounding will be within 16 bits for well formed streams.
+ // See WRAPLOW and dct_const_round_shift for details.
+ // This instruction doubles the result and returns the high half, essentially
+ // resulting in a right shift by 15. By multiplying the constant first that
+ // becomes a right shift by 14.
+ // The largest possible value used here is
+ // vpx_dsp/txfm_common.h:cospi_1_64 = 16364 (* 2 = 32728) a which falls *just*
+ // within the range of int16_t (+32767 / -32768) even when negated.
+ return vqrdmulhq_n_s16(a, a_const * 2);
+}
+
+// Add a and b, then multiply by ab_const. Shift and narrow by 14.
+static INLINE int16x8_t add_multiply_shift_and_narrow_s16(
+ const int16x8_t a, const int16x8_t b, const int16_t ab_const) {
+ // In both add_ and it's pair, sub_, the input for well-formed streams will be
+ // well within 16 bits (input to the idct is the difference between two frames
+ // and will be within -255 to 255, or 9 bits)
+ // However, for inputs over about 25,000 (valid for int16_t, but not for idct
+ // input) this function can not use vaddq_s16.
+ // In order to match existing behavior and intentionally out of range tests,
+ // expand the addition up to 32 bits to prevent truncation.
+ int32x4_t temp_low = vaddl_s16(vget_low_s16(a), vget_low_s16(b));
+ int32x4_t temp_high = vaddl_s16(vget_high_s16(a), vget_high_s16(b));
+ temp_low = vmulq_n_s32(temp_low, ab_const);
+ temp_high = vmulq_n_s32(temp_high, ab_const);
+ return vcombine_s16(vrshrn_n_s32(temp_low, 14), vrshrn_n_s32(temp_high, 14));
+}
+
+// Subtract b from a, then multiply by ab_const. Shift and narrow by 14.
+static INLINE int16x8_t sub_multiply_shift_and_narrow_s16(
+ const int16x8_t a, const int16x8_t b, const int16_t ab_const) {
+ int32x4_t temp_low = vsubl_s16(vget_low_s16(a), vget_low_s16(b));
+ int32x4_t temp_high = vsubl_s16(vget_high_s16(a), vget_high_s16(b));
+ temp_low = vmulq_n_s32(temp_low, ab_const);
+ temp_high = vmulq_n_s32(temp_high, ab_const);
+ return vcombine_s16(vrshrn_n_s32(temp_low, 14), vrshrn_n_s32(temp_high, 14));
+}
+
+// Multiply a by a_const and b by b_const, then accumulate. Shift and narrow by
+// 14.
+static INLINE int16x8_t multiply_accumulate_shift_and_narrow_s16(
+ const int16x8_t a, const int16_t a_const, const int16x8_t b,
+ const int16_t b_const) {
+ int32x4_t temp_low = vmull_n_s16(vget_low_s16(a), a_const);
+ int32x4_t temp_high = vmull_n_s16(vget_high_s16(a), a_const);
+ temp_low = vmlal_n_s16(temp_low, vget_low_s16(b), b_const);
+ temp_high = vmlal_n_s16(temp_high, vget_high_s16(b), b_const);
+ return vcombine_s16(vrshrn_n_s32(temp_low, 14), vrshrn_n_s32(temp_high, 14));
+}
+
+static INLINE void load_and_transpose_s16_8x8(const int16_t *a, int a_stride,
+ int16x8_t *a0, int16x8_t *a1,
+ int16x8_t *a2, int16x8_t *a3,
+ int16x8_t *a4, int16x8_t *a5,
+ int16x8_t *a6, int16x8_t *a7) {
+ *a0 = vld1q_s16(a);
+ a += a_stride;
+ *a1 = vld1q_s16(a);
+ a += a_stride;
+ *a2 = vld1q_s16(a);
+ a += a_stride;
+ *a3 = vld1q_s16(a);
+ a += a_stride;
+ *a4 = vld1q_s16(a);
+ a += a_stride;
+ *a5 = vld1q_s16(a);
+ a += a_stride;
+ *a6 = vld1q_s16(a);
+ a += a_stride;
+ *a7 = vld1q_s16(a);
+
+ transpose_s16_8x8(a0, a1, a2, a3, a4, a5, a6, a7);
+}
+
+// Shift the output down by 6 and add it to the destination buffer.
+static INLINE void add_and_store_u8_s16(const int16x8_t a0, const int16x8_t a1,
+ const int16x8_t a2, const int16x8_t a3,
+ const int16x8_t a4, const int16x8_t a5,
+ const int16x8_t a6, const int16x8_t a7,
+ uint8_t *b, const int b_stride) {
+ uint8x8_t b0, b1, b2, b3, b4, b5, b6, b7;
+ int16x8_t c0, c1, c2, c3, c4, c5, c6, c7;
+ b0 = vld1_u8(b);
+ b += b_stride;
+ b1 = vld1_u8(b);
+ b += b_stride;
+ b2 = vld1_u8(b);
+ b += b_stride;
+ b3 = vld1_u8(b);
+ b += b_stride;
+ b4 = vld1_u8(b);
+ b += b_stride;
+ b5 = vld1_u8(b);
+ b += b_stride;
+ b6 = vld1_u8(b);
+ b += b_stride;
+ b7 = vld1_u8(b);
+ b -= (7 * b_stride);
+
+ // c = b + (a >> 6)
+ c0 = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b0)), a0, 6);
+ c1 = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b1)), a1, 6);
+ c2 = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b2)), a2, 6);
+ c3 = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b3)), a3, 6);
+ c4 = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b4)), a4, 6);
+ c5 = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b5)), a5, 6);
+ c6 = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b6)), a6, 6);
+ c7 = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b7)), a7, 6);
+
+ b0 = vqmovun_s16(c0);
+ b1 = vqmovun_s16(c1);
+ b2 = vqmovun_s16(c2);
+ b3 = vqmovun_s16(c3);
+ b4 = vqmovun_s16(c4);
+ b5 = vqmovun_s16(c5);
+ b6 = vqmovun_s16(c6);
+ b7 = vqmovun_s16(c7);
+
+ vst1_u8(b, b0);
+ b += b_stride;
+ vst1_u8(b, b1);
+ b += b_stride;
+ vst1_u8(b, b2);
+ b += b_stride;
+ vst1_u8(b, b3);
+ b += b_stride;
+ vst1_u8(b, b4);
+ b += b_stride;
+ vst1_u8(b, b5);
+ b += b_stride;
+ vst1_u8(b, b6);
+ b += b_stride;
+ vst1_u8(b, b7);
+}
+#endif // VPX_DSP_ARM_IDCT_NEON_H_
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/intrapred_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/intrapred_neon.c
index 38e79ed69dd..e150a5302d5 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/intrapred_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/intrapred_neon.c
@@ -17,306 +17,254 @@
//------------------------------------------------------------------------------
// DC 4x4
-// 'do_above' and 'do_left' facilitate branch removal when inlined.
-static INLINE void dc_4x4(uint8_t *dst, ptrdiff_t stride, const uint8_t *above,
- const uint8_t *left, int do_above, int do_left) {
- uint16x4_t sum_top;
- uint16x4_t sum_left;
- uint16x4_t dc0;
-
- if (do_above) {
- const uint8x8_t A = vld1_u8(above); // top row
- const uint16x4_t p0 = vpaddl_u8(A); // cascading summation of the top
- sum_top = vpadd_u16(p0, p0);
- }
-
- if (do_left) {
- const uint8x8_t L = vld1_u8(left); // left border
- const uint16x4_t p0 = vpaddl_u8(L); // cascading summation of the left
- sum_left = vpadd_u16(p0, p0);
- }
-
- if (do_above && do_left) {
- const uint16x4_t sum = vadd_u16(sum_left, sum_top);
- dc0 = vrshr_n_u16(sum, 3);
- } else if (do_above) {
- dc0 = vrshr_n_u16(sum_top, 2);
- } else if (do_left) {
- dc0 = vrshr_n_u16(sum_left, 2);
- } else {
- dc0 = vdup_n_u16(0x80);
- }
+static INLINE uint16x4_t dc_sum_4(const uint8_t *ref) {
+ const uint8x8_t ref_u8 = vld1_u8(ref);
+ const uint16x4_t p0 = vpaddl_u8(ref_u8);
+ return vpadd_u16(p0, p0);
+}
- {
- const uint8x8_t dc = vdup_lane_u8(vreinterpret_u8_u16(dc0), 0);
- int i;
- for (i = 0; i < 4; ++i) {
- vst1_lane_u32((uint32_t *)(dst + i * stride), vreinterpret_u32_u8(dc), 0);
- }
+static INLINE void dc_store_4x4(uint8_t *dst, ptrdiff_t stride,
+ const uint8x8_t dc) {
+ const uint8x8_t dc_dup = vdup_lane_u8(dc, 0);
+ int i;
+ for (i = 0; i < 4; ++i, dst += stride) {
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(dc_dup), 0);
}
}
void vpx_dc_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
- dc_4x4(dst, stride, above, left, 1, 1);
+ const uint8x8_t a = vld1_u8(above);
+ const uint8x8_t l = vld1_u8(left);
+ const uint16x8_t al = vaddl_u8(a, l);
+ uint16x4_t sum;
+ uint8x8_t dc;
+ sum = vpadd_u16(vget_low_u16(al), vget_low_u16(al));
+ sum = vpadd_u16(sum, sum);
+ dc = vreinterpret_u8_u16(vrshr_n_u16(sum, 3));
+ dc_store_4x4(dst, stride, dc);
}
void vpx_dc_left_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
+ const uint16x4_t sum = dc_sum_4(left);
+ const uint8x8_t dc = vreinterpret_u8_u16(vrshr_n_u16(sum, 2));
(void)above;
- dc_4x4(dst, stride, NULL, left, 0, 1);
+ dc_store_4x4(dst, stride, dc);
}
void vpx_dc_top_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
+ const uint16x4_t sum = dc_sum_4(above);
+ const uint8x8_t dc = vreinterpret_u8_u16(vrshr_n_u16(sum, 2));
(void)left;
- dc_4x4(dst, stride, above, NULL, 1, 0);
+ dc_store_4x4(dst, stride, dc);
}
void vpx_dc_128_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
+ const uint8x8_t dc = vdup_n_u8(0x80);
(void)above;
(void)left;
- dc_4x4(dst, stride, NULL, NULL, 0, 0);
+ dc_store_4x4(dst, stride, dc);
}
//------------------------------------------------------------------------------
// DC 8x8
-// 'do_above' and 'do_left' facilitate branch removal when inlined.
-static INLINE void dc_8x8(uint8_t *dst, ptrdiff_t stride, const uint8_t *above,
- const uint8_t *left, int do_above, int do_left) {
- uint16x8_t sum_top;
- uint16x8_t sum_left;
- uint8x8_t dc0;
-
- if (do_above) {
- const uint8x8_t A = vld1_u8(above); // top row
- const uint16x4_t p0 = vpaddl_u8(A); // cascading summation of the top
- const uint16x4_t p1 = vpadd_u16(p0, p0);
- const uint16x4_t p2 = vpadd_u16(p1, p1);
- sum_top = vcombine_u16(p2, p2);
- }
-
- if (do_left) {
- const uint8x8_t L = vld1_u8(left); // left border
- const uint16x4_t p0 = vpaddl_u8(L); // cascading summation of the left
- const uint16x4_t p1 = vpadd_u16(p0, p0);
- const uint16x4_t p2 = vpadd_u16(p1, p1);
- sum_left = vcombine_u16(p2, p2);
- }
-
- if (do_above && do_left) {
- const uint16x8_t sum = vaddq_u16(sum_left, sum_top);
- dc0 = vrshrn_n_u16(sum, 4);
- } else if (do_above) {
- dc0 = vrshrn_n_u16(sum_top, 3);
- } else if (do_left) {
- dc0 = vrshrn_n_u16(sum_left, 3);
- } else {
- dc0 = vdup_n_u8(0x80);
- }
+static INLINE uint16x4_t dc_sum_8(const uint8_t *ref) {
+ const uint8x8_t ref_u8 = vld1_u8(ref);
+ uint16x4_t sum = vpaddl_u8(ref_u8);
+ sum = vpadd_u16(sum, sum);
+ return vpadd_u16(sum, sum);
+}
- {
- const uint8x8_t dc = vdup_lane_u8(dc0, 0);
- int i;
- for (i = 0; i < 8; ++i) {
- vst1_u32((uint32_t *)(dst + i * stride), vreinterpret_u32_u8(dc));
- }
+static INLINE void dc_store_8x8(uint8_t *dst, ptrdiff_t stride,
+ const uint8x8_t dc) {
+ const uint8x8_t dc_dup = vdup_lane_u8(dc, 0);
+ int i;
+ for (i = 0; i < 8; ++i, dst += stride) {
+ vst1_u8(dst, dc_dup);
}
}
void vpx_dc_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
- dc_8x8(dst, stride, above, left, 1, 1);
+ const uint8x8_t above_u8 = vld1_u8(above);
+ const uint8x8_t left_u8 = vld1_u8(left);
+ const uint8x16_t above_and_left = vcombine_u8(above_u8, left_u8);
+ const uint16x8_t p0 = vpaddlq_u8(above_and_left);
+ uint16x4_t sum = vadd_u16(vget_low_u16(p0), vget_high_u16(p0));
+ uint8x8_t dc;
+ sum = vpadd_u16(sum, sum);
+ sum = vpadd_u16(sum, sum);
+ dc = vreinterpret_u8_u16(vrshr_n_u16(sum, 4));
+ dc_store_8x8(dst, stride, dc);
}
void vpx_dc_left_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
+ const uint16x4_t sum = dc_sum_8(left);
+ const uint8x8_t dc = vreinterpret_u8_u16(vrshr_n_u16(sum, 3));
(void)above;
- dc_8x8(dst, stride, NULL, left, 0, 1);
+ dc_store_8x8(dst, stride, dc);
}
void vpx_dc_top_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
+ const uint16x4_t sum = dc_sum_8(above);
+ const uint8x8_t dc = vreinterpret_u8_u16(vrshr_n_u16(sum, 3));
(void)left;
- dc_8x8(dst, stride, above, NULL, 1, 0);
+ dc_store_8x8(dst, stride, dc);
}
void vpx_dc_128_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
+ const uint8x8_t dc = vdup_n_u8(0x80);
(void)above;
(void)left;
- dc_8x8(dst, stride, NULL, NULL, 0, 0);
+ dc_store_8x8(dst, stride, dc);
}
//------------------------------------------------------------------------------
// DC 16x16
-// 'do_above' and 'do_left' facilitate branch removal when inlined.
-static INLINE void dc_16x16(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left,
- int do_above, int do_left) {
- uint16x8_t sum_top;
- uint16x8_t sum_left;
- uint8x8_t dc0;
-
- if (do_above) {
- const uint8x16_t A = vld1q_u8(above); // top row
- const uint16x8_t p0 = vpaddlq_u8(A); // cascading summation of the top
- const uint16x4_t p1 = vadd_u16(vget_low_u16(p0), vget_high_u16(p0));
- const uint16x4_t p2 = vpadd_u16(p1, p1);
- const uint16x4_t p3 = vpadd_u16(p2, p2);
- sum_top = vcombine_u16(p3, p3);
- }
-
- if (do_left) {
- const uint8x16_t L = vld1q_u8(left); // left row
- const uint16x8_t p0 = vpaddlq_u8(L); // cascading summation of the left
- const uint16x4_t p1 = vadd_u16(vget_low_u16(p0), vget_high_u16(p0));
- const uint16x4_t p2 = vpadd_u16(p1, p1);
- const uint16x4_t p3 = vpadd_u16(p2, p2);
- sum_left = vcombine_u16(p3, p3);
- }
-
- if (do_above && do_left) {
- const uint16x8_t sum = vaddq_u16(sum_left, sum_top);
- dc0 = vrshrn_n_u16(sum, 5);
- } else if (do_above) {
- dc0 = vrshrn_n_u16(sum_top, 4);
- } else if (do_left) {
- dc0 = vrshrn_n_u16(sum_left, 4);
- } else {
- dc0 = vdup_n_u8(0x80);
- }
+static INLINE uint16x4_t dc_sum_16(const uint8_t *ref) {
+ const uint8x16_t ref_u8 = vld1q_u8(ref);
+ const uint16x8_t p0 = vpaddlq_u8(ref_u8);
+ uint16x4_t sum = vadd_u16(vget_low_u16(p0), vget_high_u16(p0));
+ sum = vpadd_u16(sum, sum);
+ return vpadd_u16(sum, sum);
+}
- {
- const uint8x16_t dc = vdupq_lane_u8(dc0, 0);
- int i;
- for (i = 0; i < 16; ++i) {
- vst1q_u8(dst + i * stride, dc);
- }
+static INLINE void dc_store_16x16(uint8_t *dst, ptrdiff_t stride,
+ const uint8x8_t dc) {
+ const uint8x16_t dc_dup = vdupq_lane_u8(dc, 0);
+ int i;
+ for (i = 0; i < 16; ++i, dst += stride) {
+ vst1q_u8(dst, dc_dup);
}
}
void vpx_dc_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
- dc_16x16(dst, stride, above, left, 1, 1);
+ const uint8x16_t ref0 = vld1q_u8(above);
+ const uint8x16_t ref1 = vld1q_u8(left);
+ const uint16x8_t p0 = vpaddlq_u8(ref0);
+ const uint16x8_t p1 = vpaddlq_u8(ref1);
+ const uint16x8_t p2 = vaddq_u16(p0, p1);
+ uint16x4_t sum = vadd_u16(vget_low_u16(p2), vget_high_u16(p2));
+ uint8x8_t dc;
+ sum = vpadd_u16(sum, sum);
+ sum = vpadd_u16(sum, sum);
+ dc = vreinterpret_u8_u16(vrshr_n_u16(sum, 5));
+ dc_store_16x16(dst, stride, dc);
}
void vpx_dc_left_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above,
const uint8_t *left) {
+ const uint16x4_t sum = dc_sum_16(left);
+ const uint8x8_t dc = vreinterpret_u8_u16(vrshr_n_u16(sum, 4));
(void)above;
- dc_16x16(dst, stride, NULL, left, 0, 1);
+ dc_store_16x16(dst, stride, dc);
}
void vpx_dc_top_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above,
const uint8_t *left) {
+ const uint16x4_t sum = dc_sum_16(above);
+ const uint8x8_t dc = vreinterpret_u8_u16(vrshr_n_u16(sum, 4));
(void)left;
- dc_16x16(dst, stride, above, NULL, 1, 0);
+ dc_store_16x16(dst, stride, dc);
}
void vpx_dc_128_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above,
const uint8_t *left) {
+ const uint8x8_t dc = vdup_n_u8(0x80);
(void)above;
(void)left;
- dc_16x16(dst, stride, NULL, NULL, 0, 0);
+ dc_store_16x16(dst, stride, dc);
}
//------------------------------------------------------------------------------
// DC 32x32
-// 'do_above' and 'do_left' facilitate branch removal when inlined.
-static INLINE void dc_32x32(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left,
- int do_above, int do_left) {
- uint16x8_t sum_top;
- uint16x8_t sum_left;
- uint8x8_t dc0;
-
- if (do_above) {
- const uint8x16_t A0 = vld1q_u8(above); // top row
- const uint8x16_t A1 = vld1q_u8(above + 16);
- const uint16x8_t p0 = vpaddlq_u8(A0); // cascading summation of the top
- const uint16x8_t p1 = vpaddlq_u8(A1);
- const uint16x8_t p2 = vaddq_u16(p0, p1);
- const uint16x4_t p3 = vadd_u16(vget_low_u16(p2), vget_high_u16(p2));
- const uint16x4_t p4 = vpadd_u16(p3, p3);
- const uint16x4_t p5 = vpadd_u16(p4, p4);
- sum_top = vcombine_u16(p5, p5);
- }
-
- if (do_left) {
- const uint8x16_t L0 = vld1q_u8(left); // left row
- const uint8x16_t L1 = vld1q_u8(left + 16);
- const uint16x8_t p0 = vpaddlq_u8(L0); // cascading summation of the left
- const uint16x8_t p1 = vpaddlq_u8(L1);
- const uint16x8_t p2 = vaddq_u16(p0, p1);
- const uint16x4_t p3 = vadd_u16(vget_low_u16(p2), vget_high_u16(p2));
- const uint16x4_t p4 = vpadd_u16(p3, p3);
- const uint16x4_t p5 = vpadd_u16(p4, p4);
- sum_left = vcombine_u16(p5, p5);
- }
+static INLINE uint16x4_t dc_sum_32(const uint8_t *ref) {
+ const uint8x16x2_t r = vld2q_u8(ref);
+ const uint16x8_t p0 = vpaddlq_u8(r.val[0]);
+ const uint16x8_t p1 = vpaddlq_u8(r.val[1]);
+ const uint16x8_t p2 = vaddq_u16(p0, p1);
+ uint16x4_t sum = vadd_u16(vget_low_u16(p2), vget_high_u16(p2));
+ sum = vpadd_u16(sum, sum);
+ return vpadd_u16(sum, sum);
+}
- if (do_above && do_left) {
- const uint16x8_t sum = vaddq_u16(sum_left, sum_top);
- dc0 = vrshrn_n_u16(sum, 6);
- } else if (do_above) {
- dc0 = vrshrn_n_u16(sum_top, 5);
- } else if (do_left) {
- dc0 = vrshrn_n_u16(sum_left, 5);
- } else {
- dc0 = vdup_n_u8(0x80);
- }
+static INLINE void dc_store_32x32(uint8_t *dst, ptrdiff_t stride,
+ const uint8x8_t dc) {
+ uint8x16x2_t dc_dup;
+ int i;
+ dc_dup.val[0] = dc_dup.val[1] = vdupq_lane_u8(dc, 0);
- {
- const uint8x16_t dc = vdupq_lane_u8(dc0, 0);
- int i;
- for (i = 0; i < 32; ++i) {
- vst1q_u8(dst + i * stride, dc);
- vst1q_u8(dst + i * stride + 16, dc);
- }
+ for (i = 0; i < 32; ++i, dst += stride) {
+ vst2q_u8(dst, dc_dup);
}
}
void vpx_dc_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
- dc_32x32(dst, stride, above, left, 1, 1);
+ const uint8x16x2_t a = vld2q_u8(above);
+ const uint8x16x2_t l = vld2q_u8(left);
+ const uint16x8_t pa0 = vpaddlq_u8(a.val[0]);
+ const uint16x8_t pl0 = vpaddlq_u8(l.val[0]);
+ const uint16x8_t pa1 = vpaddlq_u8(a.val[1]);
+ const uint16x8_t pl1 = vpaddlq_u8(l.val[1]);
+ const uint16x8_t pa = vaddq_u16(pa0, pa1);
+ const uint16x8_t pl = vaddq_u16(pl0, pl1);
+ const uint16x8_t pal = vaddq_u16(pa, pl);
+ uint16x4_t sum = vadd_u16(vget_low_u16(pal), vget_high_u16(pal));
+ uint8x8_t dc;
+ sum = vpadd_u16(sum, sum);
+ sum = vpadd_u16(sum, sum);
+ dc = vreinterpret_u8_u16(vrshr_n_u16(sum, 6));
+ dc_store_32x32(dst, stride, dc);
}
void vpx_dc_left_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above,
const uint8_t *left) {
+ const uint16x4_t sum = dc_sum_32(left);
+ const uint8x8_t dc = vreinterpret_u8_u16(vrshr_n_u16(sum, 5));
(void)above;
- dc_32x32(dst, stride, NULL, left, 0, 1);
+ dc_store_32x32(dst, stride, dc);
}
void vpx_dc_top_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above,
const uint8_t *left) {
+ const uint16x4_t sum = dc_sum_32(above);
+ const uint8x8_t dc = vreinterpret_u8_u16(vrshr_n_u16(sum, 5));
(void)left;
- dc_32x32(dst, stride, above, NULL, 1, 0);
+ dc_store_32x32(dst, stride, dc);
}
void vpx_dc_128_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above,
const uint8_t *left) {
+ const uint8x8_t dc = vdup_n_u8(0x80);
(void)above;
(void)left;
- dc_32x32(dst, stride, NULL, NULL, 0, 0);
+ dc_store_32x32(dst, stride, dc);
}
// -----------------------------------------------------------------------------
void vpx_d45_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
- const uint64x1_t A0 = vreinterpret_u64_u8(vld1_u8(above)); // top row
- const uint64x1_t A1 = vshr_n_u64(A0, 8);
- const uint64x1_t A2 = vshr_n_u64(A0, 16);
- const uint8x8_t ABCDEFGH = vreinterpret_u8_u64(A0);
+ const uint8x8_t ABCDEFGH = vld1_u8(above);
+ const uint64x1_t A1 = vshr_n_u64(vreinterpret_u64_u8(ABCDEFGH), 8);
+ const uint64x1_t A2 = vshr_n_u64(vreinterpret_u64_u8(ABCDEFGH), 16);
const uint8x8_t BCDEFGH0 = vreinterpret_u8_u64(A1);
const uint8x8_t CDEFGH00 = vreinterpret_u8_u64(A2);
const uint8x8_t avg1 = vhadd_u8(ABCDEFGH, CDEFGH00);
@@ -331,485 +279,506 @@ void vpx_d45_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
vst1_lane_u32((uint32_t *)(dst + 1 * stride), r1, 0);
vst1_lane_u32((uint32_t *)(dst + 2 * stride), r2, 0);
vst1_lane_u32((uint32_t *)(dst + 3 * stride), r3, 0);
- dst[3 * stride + 3] = above[7];
+ vst1_lane_u8(dst + 3 * stride + 3, ABCDEFGH, 7);
+}
+
+static INLINE void d45_store_8(uint8_t **dst, const ptrdiff_t stride,
+ const uint8x8_t above_right, uint8x8_t *row) {
+ *row = vext_u8(*row, above_right, 1);
+ vst1_u8(*dst, *row);
+ *dst += stride;
}
void vpx_d45_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
- static const uint8_t shuffle1[8] = { 1, 2, 3, 4, 5, 6, 7, 7 };
- static const uint8_t shuffle2[8] = { 2, 3, 4, 5, 6, 7, 7, 7 };
- const uint8x8_t sh_12345677 = vld1_u8(shuffle1);
- const uint8x8_t sh_23456777 = vld1_u8(shuffle2);
- const uint8x8_t A0 = vld1_u8(above); // top row
- const uint8x8_t A1 = vtbl1_u8(A0, sh_12345677);
- const uint8x8_t A2 = vtbl1_u8(A0, sh_23456777);
+ const uint8x8_t A0 = vld1_u8(above);
+ const uint8x8_t above_right = vdup_lane_u8(A0, 7);
+ const uint8x8_t A1 = vext_u8(A0, above_right, 1);
+ const uint8x8_t A2 = vext_u8(A0, above_right, 2);
const uint8x8_t avg1 = vhadd_u8(A0, A2);
uint8x8_t row = vrhadd_u8(avg1, A1);
- int i;
(void)left;
- for (i = 0; i < 7; ++i) {
- vst1_u8(dst + i * stride, row);
- row = vtbl1_u8(row, sh_12345677);
- }
- vst1_u8(dst + i * stride, row);
+
+ vst1_u8(dst, row);
+ dst += stride;
+ d45_store_8(&dst, stride, above_right, &row);
+ d45_store_8(&dst, stride, above_right, &row);
+ d45_store_8(&dst, stride, above_right, &row);
+ d45_store_8(&dst, stride, above_right, &row);
+ d45_store_8(&dst, stride, above_right, &row);
+ d45_store_8(&dst, stride, above_right, &row);
+ vst1_u8(dst, above_right);
+}
+
+static INLINE void d45_store_16(uint8_t **dst, const ptrdiff_t stride,
+ const uint8x16_t above_right, uint8x16_t *row) {
+ *row = vextq_u8(*row, above_right, 1);
+ vst1q_u8(*dst, *row);
+ *dst += stride;
}
void vpx_d45_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
- const uint8x16_t A0 = vld1q_u8(above); // top row
- const uint8x16_t above_right = vld1q_dup_u8(above + 15);
+ const uint8x16_t A0 = vld1q_u8(above);
+ const uint8x16_t above_right = vdupq_lane_u8(vget_high_u8(A0), 7);
const uint8x16_t A1 = vextq_u8(A0, above_right, 1);
const uint8x16_t A2 = vextq_u8(A0, above_right, 2);
const uint8x16_t avg1 = vhaddq_u8(A0, A2);
uint8x16_t row = vrhaddq_u8(avg1, A1);
- int i;
(void)left;
- for (i = 0; i < 15; ++i) {
- vst1q_u8(dst + i * stride, row);
- row = vextq_u8(row, above_right, 1);
- }
- vst1q_u8(dst + i * stride, row);
+
+ vst1q_u8(dst, row);
+ dst += stride;
+ d45_store_16(&dst, stride, above_right, &row);
+ d45_store_16(&dst, stride, above_right, &row);
+ d45_store_16(&dst, stride, above_right, &row);
+ d45_store_16(&dst, stride, above_right, &row);
+ d45_store_16(&dst, stride, above_right, &row);
+ d45_store_16(&dst, stride, above_right, &row);
+ d45_store_16(&dst, stride, above_right, &row);
+ d45_store_16(&dst, stride, above_right, &row);
+ d45_store_16(&dst, stride, above_right, &row);
+ d45_store_16(&dst, stride, above_right, &row);
+ d45_store_16(&dst, stride, above_right, &row);
+ d45_store_16(&dst, stride, above_right, &row);
+ d45_store_16(&dst, stride, above_right, &row);
+ d45_store_16(&dst, stride, above_right, &row);
+ vst1q_u8(dst, above_right);
}
// -----------------------------------------------------------------------------
void vpx_d135_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
- const uint8x8_t XABCD_u8 = vld1_u8(above - 1);
- const uint64x1_t XABCD = vreinterpret_u64_u8(XABCD_u8);
- const uint64x1_t ____XABC = vshl_n_u64(XABCD, 32);
+ const uint8x8_t XABCD = vld1_u8(above - 1);
const uint32x2_t zero = vdup_n_u32(0);
const uint32x2_t IJKL = vld1_lane_u32((const uint32_t *)left, zero, 0);
- const uint8x8_t IJKL_u8 = vreinterpret_u8_u32(IJKL);
- const uint64x1_t LKJI____ = vreinterpret_u64_u8(vrev32_u8(IJKL_u8));
- const uint64x1_t LKJIXABC = vorr_u64(LKJI____, ____XABC);
- const uint8x8_t KJIXABC_ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 8));
- const uint8x8_t JIXABC__ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 16));
- const uint8_t D = vget_lane_u8(XABCD_u8, 4);
- const uint8x8_t JIXABCD_ = vset_lane_u8(D, JIXABC__, 6);
- const uint8x8_t LKJIXABC_u8 = vreinterpret_u8_u64(LKJIXABC);
- const uint8x8_t avg1 = vhadd_u8(JIXABCD_, LKJIXABC_u8);
- const uint8x8_t avg2 = vrhadd_u8(avg1, KJIXABC_);
+ const uint8x8_t LKJI = vrev64_u8(vreinterpret_u8_u32(IJKL));
+ const uint8x8_t LKJIXABC = vext_u8(LKJI, XABCD, 4);
+ const uint8x8_t KJIXABCD = vext_u8(LKJI, XABCD, 5);
+ const uint8x8_t JIXABCD0 =
+ vreinterpret_u8_u64(vshr_n_u64(vreinterpret_u64_u8(KJIXABCD), 8));
+ const uint8x8_t avg1 = vhadd_u8(JIXABCD0, LKJIXABC);
+ const uint8x8_t avg2 = vrhadd_u8(avg1, KJIXABCD);
const uint64x1_t avg2_u64 = vreinterpret_u64_u8(avg2);
const uint32x2_t r3 = vreinterpret_u32_u8(avg2);
const uint32x2_t r2 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 8));
const uint32x2_t r1 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 16));
const uint32x2_t r0 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 24));
- vst1_lane_u32((uint32_t *)(dst + 0 * stride), r0, 0);
- vst1_lane_u32((uint32_t *)(dst + 1 * stride), r1, 0);
- vst1_lane_u32((uint32_t *)(dst + 2 * stride), r2, 0);
- vst1_lane_u32((uint32_t *)(dst + 3 * stride), r3, 0);
+ vst1_lane_u32((uint32_t *)dst, r0, 0);
+ dst += stride;
+ vst1_lane_u32((uint32_t *)dst, r1, 0);
+ dst += stride;
+ vst1_lane_u32((uint32_t *)dst, r2, 0);
+ dst += stride;
+ vst1_lane_u32((uint32_t *)dst, r3, 0);
}
+// -----------------------------------------------------------------------------
+
#if !HAVE_NEON_ASM
void vpx_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
+ const uint32_t d = *(const uint32_t *)above;
int i;
- uint32x2_t d0u32 = vdup_n_u32(0);
(void)left;
- d0u32 = vld1_lane_u32((const uint32_t *)above, d0u32, 0);
- for (i = 0; i < 4; i++, dst += stride)
- vst1_lane_u32((uint32_t *)dst, d0u32, 0);
+ for (i = 0; i < 4; i++, dst += stride) {
+ *(uint32_t *)dst = d;
+ }
}
void vpx_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
+ const uint8x8_t d = vld1_u8(above);
int i;
- uint8x8_t d0u8 = vdup_n_u8(0);
(void)left;
- d0u8 = vld1_u8(above);
- for (i = 0; i < 8; i++, dst += stride) vst1_u8(dst, d0u8);
+ for (i = 0; i < 8; i++, dst += stride) {
+ vst1_u8(dst, d);
+ }
}
void vpx_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
+ const uint8x16_t d = vld1q_u8(above);
int i;
- uint8x16_t q0u8 = vdupq_n_u8(0);
(void)left;
- q0u8 = vld1q_u8(above);
- for (i = 0; i < 16; i++, dst += stride) vst1q_u8(dst, q0u8);
+ for (i = 0; i < 16; i++, dst += stride) {
+ vst1q_u8(dst, d);
+ }
}
void vpx_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
+ const uint8x16_t d0 = vld1q_u8(above);
+ const uint8x16_t d1 = vld1q_u8(above + 16);
int i;
- uint8x16_t q0u8 = vdupq_n_u8(0);
- uint8x16_t q1u8 = vdupq_n_u8(0);
(void)left;
- q0u8 = vld1q_u8(above);
- q1u8 = vld1q_u8(above + 16);
- for (i = 0; i < 32; i++, dst += stride) {
- vst1q_u8(dst, q0u8);
- vst1q_u8(dst + 16, q1u8);
+ for (i = 0; i < 32; i++) {
+ // Note: performance was worse using vst2q_u8 under gcc-4.9 & clang-3.8.
+ // clang-3.8 unrolled the loop fully with no filler so the cause is likely
+ // the latency of the instruction.
+ vst1q_u8(dst, d0);
+ dst += 16;
+ vst1q_u8(dst, d1);
+ dst += stride - 16;
}
}
+// -----------------------------------------------------------------------------
+
void vpx_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
- uint8x8_t d0u8 = vdup_n_u8(0);
- uint32x2_t d1u32 = vdup_n_u32(0);
+ const uint32x2_t zero = vdup_n_u32(0);
+ const uint8x8_t left_u8 =
+ vreinterpret_u8_u32(vld1_lane_u32((const uint32_t *)left, zero, 0));
+ uint8x8_t d;
(void)above;
- d1u32 = vld1_lane_u32((const uint32_t *)left, d1u32, 0);
-
- d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 0);
- vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0);
+ d = vdup_lane_u8(left_u8, 0);
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d), 0);
dst += stride;
- d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 1);
- vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0);
+ d = vdup_lane_u8(left_u8, 1);
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d), 0);
dst += stride;
- d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 2);
- vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0);
+ d = vdup_lane_u8(left_u8, 2);
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d), 0);
dst += stride;
- d0u8 = vdup_lane_u8(vreinterpret_u8_u32(d1u32), 3);
- vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0);
+ d = vdup_lane_u8(left_u8, 3);
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d), 0);
}
void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
- uint8x8_t d0u8 = vdup_n_u8(0);
- uint64x1_t d1u64 = vdup_n_u64(0);
+ const uint8x8_t left_u8 = vld1_u8(left);
+ uint8x8_t d;
(void)above;
- d1u64 = vld1_u64((const uint64_t *)left);
-
- d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 0);
- vst1_u8(dst, d0u8);
+ d = vdup_lane_u8(left_u8, 0);
+ vst1_u8(dst, d);
dst += stride;
- d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 1);
- vst1_u8(dst, d0u8);
+ d = vdup_lane_u8(left_u8, 1);
+ vst1_u8(dst, d);
dst += stride;
- d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 2);
- vst1_u8(dst, d0u8);
+ d = vdup_lane_u8(left_u8, 2);
+ vst1_u8(dst, d);
dst += stride;
- d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 3);
- vst1_u8(dst, d0u8);
+ d = vdup_lane_u8(left_u8, 3);
+ vst1_u8(dst, d);
dst += stride;
- d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 4);
- vst1_u8(dst, d0u8);
+ d = vdup_lane_u8(left_u8, 4);
+ vst1_u8(dst, d);
dst += stride;
- d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 5);
- vst1_u8(dst, d0u8);
+ d = vdup_lane_u8(left_u8, 5);
+ vst1_u8(dst, d);
dst += stride;
- d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 6);
- vst1_u8(dst, d0u8);
+ d = vdup_lane_u8(left_u8, 6);
+ vst1_u8(dst, d);
dst += stride;
- d0u8 = vdup_lane_u8(vreinterpret_u8_u64(d1u64), 7);
- vst1_u8(dst, d0u8);
+ d = vdup_lane_u8(left_u8, 7);
+ vst1_u8(dst, d);
}
void vpx_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
- int j;
- uint8x8_t d2u8 = vdup_n_u8(0);
- uint8x16_t q0u8 = vdupq_n_u8(0);
- uint8x16_t q1u8 = vdupq_n_u8(0);
+ const uint8x16_t left_u8q = vld1q_u8(left);
+ uint8x8_t left_u8d = vget_low_u8(left_u8q);
+ uint8x16_t d;
+ int i;
(void)above;
- q1u8 = vld1q_u8(left);
- d2u8 = vget_low_u8(q1u8);
- for (j = 0; j < 2; j++, d2u8 = vget_high_u8(q1u8)) {
- q0u8 = vdupq_lane_u8(d2u8, 0);
- vst1q_u8(dst, q0u8);
+ for (i = 0; i < 2; i++, left_u8d = vget_high_u8(left_u8q)) {
+ d = vdupq_lane_u8(left_u8d, 0);
+ vst1q_u8(dst, d);
dst += stride;
- q0u8 = vdupq_lane_u8(d2u8, 1);
- vst1q_u8(dst, q0u8);
+ d = vdupq_lane_u8(left_u8d, 1);
+ vst1q_u8(dst, d);
dst += stride;
- q0u8 = vdupq_lane_u8(d2u8, 2);
- vst1q_u8(dst, q0u8);
+ d = vdupq_lane_u8(left_u8d, 2);
+ vst1q_u8(dst, d);
dst += stride;
- q0u8 = vdupq_lane_u8(d2u8, 3);
- vst1q_u8(dst, q0u8);
+ d = vdupq_lane_u8(left_u8d, 3);
+ vst1q_u8(dst, d);
dst += stride;
- q0u8 = vdupq_lane_u8(d2u8, 4);
- vst1q_u8(dst, q0u8);
+ d = vdupq_lane_u8(left_u8d, 4);
+ vst1q_u8(dst, d);
dst += stride;
- q0u8 = vdupq_lane_u8(d2u8, 5);
- vst1q_u8(dst, q0u8);
+ d = vdupq_lane_u8(left_u8d, 5);
+ vst1q_u8(dst, d);
dst += stride;
- q0u8 = vdupq_lane_u8(d2u8, 6);
- vst1q_u8(dst, q0u8);
+ d = vdupq_lane_u8(left_u8d, 6);
+ vst1q_u8(dst, d);
dst += stride;
- q0u8 = vdupq_lane_u8(d2u8, 7);
- vst1q_u8(dst, q0u8);
+ d = vdupq_lane_u8(left_u8d, 7);
+ vst1q_u8(dst, d);
dst += stride;
}
}
void vpx_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
- int j, k;
- uint8x8_t d2u8 = vdup_n_u8(0);
- uint8x16_t q0u8 = vdupq_n_u8(0);
- uint8x16_t q1u8 = vdupq_n_u8(0);
+ uint8x16_t d;
+ int i;
(void)above;
- for (k = 0; k < 2; k++, left += 16) {
- q1u8 = vld1q_u8(left);
- d2u8 = vget_low_u8(q1u8);
- for (j = 0; j < 2; j++, d2u8 = vget_high_u8(q1u8)) {
- q0u8 = vdupq_lane_u8(d2u8, 0);
- vst1q_u8(dst, q0u8);
- vst1q_u8(dst + 16, q0u8);
- dst += stride;
- q0u8 = vdupq_lane_u8(d2u8, 1);
- vst1q_u8(dst, q0u8);
- vst1q_u8(dst + 16, q0u8);
- dst += stride;
- q0u8 = vdupq_lane_u8(d2u8, 2);
- vst1q_u8(dst, q0u8);
- vst1q_u8(dst + 16, q0u8);
- dst += stride;
- q0u8 = vdupq_lane_u8(d2u8, 3);
- vst1q_u8(dst, q0u8);
- vst1q_u8(dst + 16, q0u8);
- dst += stride;
- q0u8 = vdupq_lane_u8(d2u8, 4);
- vst1q_u8(dst, q0u8);
- vst1q_u8(dst + 16, q0u8);
- dst += stride;
- q0u8 = vdupq_lane_u8(d2u8, 5);
- vst1q_u8(dst, q0u8);
- vst1q_u8(dst + 16, q0u8);
- dst += stride;
- q0u8 = vdupq_lane_u8(d2u8, 6);
- vst1q_u8(dst, q0u8);
- vst1q_u8(dst + 16, q0u8);
- dst += stride;
- q0u8 = vdupq_lane_u8(d2u8, 7);
- vst1q_u8(dst, q0u8);
- vst1q_u8(dst + 16, q0u8);
- dst += stride;
- }
+ for (i = 0; i < 2; i++, left += 16) {
+ const uint8x16_t left_u8 = vld1q_u8(left);
+ const uint8x8_t left_low = vget_low_u8(left_u8);
+ const uint8x8_t left_high = vget_high_u8(left_u8);
+ d = vdupq_lane_u8(left_low, 0);
+ vst1q_u8(dst, d); // Note clang-3.8 produced poor code w/vst2q_u8
+ dst += 16;
+ vst1q_u8(dst, d);
+ dst += stride - 16;
+ d = vdupq_lane_u8(left_low, 1);
+ vst1q_u8(dst, d);
+ dst += 16;
+ vst1q_u8(dst, d);
+ dst += stride - 16;
+ d = vdupq_lane_u8(left_low, 2);
+ vst1q_u8(dst, d);
+ dst += 16;
+ vst1q_u8(dst, d);
+ dst += stride - 16;
+ d = vdupq_lane_u8(left_low, 3);
+ vst1q_u8(dst, d);
+ dst += 16;
+ vst1q_u8(dst, d);
+ dst += stride - 16;
+ d = vdupq_lane_u8(left_low, 4);
+ vst1q_u8(dst, d);
+ dst += 16;
+ vst1q_u8(dst, d);
+ dst += stride - 16;
+ d = vdupq_lane_u8(left_low, 5);
+ vst1q_u8(dst, d);
+ dst += 16;
+ vst1q_u8(dst, d);
+ dst += stride - 16;
+ d = vdupq_lane_u8(left_low, 6);
+ vst1q_u8(dst, d);
+ dst += 16;
+ vst1q_u8(dst, d);
+ dst += stride - 16;
+ d = vdupq_lane_u8(left_low, 7);
+ vst1q_u8(dst, d);
+ dst += 16;
+ vst1q_u8(dst, d);
+ dst += stride - 16;
+
+ d = vdupq_lane_u8(left_high, 0);
+ vst1q_u8(dst, d);
+ dst += 16;
+ vst1q_u8(dst, d);
+ dst += stride - 16;
+ d = vdupq_lane_u8(left_high, 1);
+ vst1q_u8(dst, d);
+ dst += 16;
+ vst1q_u8(dst, d);
+ dst += stride - 16;
+ d = vdupq_lane_u8(left_high, 2);
+ vst1q_u8(dst, d);
+ dst += 16;
+ vst1q_u8(dst, d);
+ dst += stride - 16;
+ d = vdupq_lane_u8(left_high, 3);
+ vst1q_u8(dst, d);
+ dst += 16;
+ vst1q_u8(dst, d);
+ dst += stride - 16;
+ d = vdupq_lane_u8(left_high, 4);
+ vst1q_u8(dst, d);
+ dst += 16;
+ vst1q_u8(dst, d);
+ dst += stride - 16;
+ d = vdupq_lane_u8(left_high, 5);
+ vst1q_u8(dst, d);
+ dst += 16;
+ vst1q_u8(dst, d);
+ dst += stride - 16;
+ d = vdupq_lane_u8(left_high, 6);
+ vst1q_u8(dst, d);
+ dst += 16;
+ vst1q_u8(dst, d);
+ dst += stride - 16;
+ d = vdupq_lane_u8(left_high, 7);
+ vst1q_u8(dst, d);
+ dst += 16;
+ vst1q_u8(dst, d);
+ dst += stride - 16;
}
}
+// -----------------------------------------------------------------------------
+
+static INLINE int16x8_t convert_u8_to_s16(uint8x8_t v) {
+ return vreinterpretq_s16_u16(vmovl_u8(v));
+}
+
void vpx_tm_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
- int i;
- uint16x8_t q1u16, q3u16;
- int16x8_t q1s16;
- uint8x8_t d0u8 = vdup_n_u8(0);
- uint32x2_t d2u32 = vdup_n_u32(0);
-
- d0u8 = vld1_dup_u8(above - 1);
- d2u32 = vld1_lane_u32((const uint32_t *)above, d2u32, 0);
- q3u16 = vsubl_u8(vreinterpret_u8_u32(d2u32), d0u8);
- for (i = 0; i < 4; i++, dst += stride) {
- q1u16 = vdupq_n_u16((uint16_t)left[i]);
- q1s16 =
- vaddq_s16(vreinterpretq_s16_u16(q1u16), vreinterpretq_s16_u16(q3u16));
- d0u8 = vqmovun_s16(q1s16);
- vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d0u8), 0);
- }
+ const uint8x8_t top_left = vld1_dup_u8(above - 1);
+ const uint8x8_t left_u8 = vld1_u8(left);
+ const uint8x8_t above_u8 = vld1_u8(above);
+ const int16x4_t left_s16 = vget_low_s16(convert_u8_to_s16(left_u8));
+ int16x8_t sub, sum;
+ uint32x2_t d;
+
+ sub = vreinterpretq_s16_u16(vsubl_u8(above_u8, top_left));
+ // Avoid vcombine_s16() which generates lots of redundant code with clang-3.8.
+ sub = vreinterpretq_s16_s64(
+ vdupq_lane_s64(vreinterpret_s64_s16(vget_low_s16(sub)), 0));
+
+ sum = vcombine_s16(vdup_lane_s16(left_s16, 0), vdup_lane_s16(left_s16, 1));
+ sum = vaddq_s16(sum, sub);
+ d = vreinterpret_u32_u8(vqmovun_s16(sum));
+ vst1_lane_u32((uint32_t *)dst, d, 0);
+ dst += stride;
+ vst1_lane_u32((uint32_t *)dst, d, 1);
+ dst += stride;
+
+ sum = vcombine_s16(vdup_lane_s16(left_s16, 2), vdup_lane_s16(left_s16, 3));
+ sum = vaddq_s16(sum, sub);
+ d = vreinterpret_u32_u8(vqmovun_s16(sum));
+ vst1_lane_u32((uint32_t *)dst, d, 0);
+ dst += stride;
+ vst1_lane_u32((uint32_t *)dst, d, 1);
+}
+
+static INLINE void tm_8_kernel(uint8_t **dst, const ptrdiff_t stride,
+ const int16x8_t left_dup, const int16x8_t sub) {
+ const int16x8_t sum = vaddq_s16(left_dup, sub);
+ const uint8x8_t d = vqmovun_s16(sum);
+ vst1_u8(*dst, d);
+ *dst += stride;
}
void vpx_tm_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
- int j;
- uint16x8_t q0u16, q3u16, q10u16;
- int16x8_t q0s16;
- uint16x4_t d20u16;
- uint8x8_t d0u8, d2u8, d30u8;
-
- d0u8 = vld1_dup_u8(above - 1);
- d30u8 = vld1_u8(left);
- d2u8 = vld1_u8(above);
- q10u16 = vmovl_u8(d30u8);
- q3u16 = vsubl_u8(d2u8, d0u8);
- d20u16 = vget_low_u16(q10u16);
- for (j = 0; j < 2; j++, d20u16 = vget_high_u16(q10u16)) {
- q0u16 = vdupq_lane_u16(d20u16, 0);
- q0s16 =
- vaddq_s16(vreinterpretq_s16_u16(q3u16), vreinterpretq_s16_u16(q0u16));
- d0u8 = vqmovun_s16(q0s16);
- vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8));
- dst += stride;
- q0u16 = vdupq_lane_u16(d20u16, 1);
- q0s16 =
- vaddq_s16(vreinterpretq_s16_u16(q3u16), vreinterpretq_s16_u16(q0u16));
- d0u8 = vqmovun_s16(q0s16);
- vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8));
- dst += stride;
- q0u16 = vdupq_lane_u16(d20u16, 2);
- q0s16 =
- vaddq_s16(vreinterpretq_s16_u16(q3u16), vreinterpretq_s16_u16(q0u16));
- d0u8 = vqmovun_s16(q0s16);
- vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8));
- dst += stride;
- q0u16 = vdupq_lane_u16(d20u16, 3);
- q0s16 =
- vaddq_s16(vreinterpretq_s16_u16(q3u16), vreinterpretq_s16_u16(q0u16));
- d0u8 = vqmovun_s16(q0s16);
- vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d0u8));
- dst += stride;
+ const uint8x8_t top_left = vld1_dup_u8(above - 1);
+ const uint8x8_t above_u8 = vld1_u8(above);
+ const uint8x8_t left_u8 = vld1_u8(left);
+ const int16x8_t left_s16q = convert_u8_to_s16(left_u8);
+ const int16x8_t sub = vreinterpretq_s16_u16(vsubl_u8(above_u8, top_left));
+ int16x4_t left_s16d = vget_low_s16(left_s16q);
+ int i;
+
+ for (i = 0; i < 2; i++, left_s16d = vget_high_s16(left_s16q)) {
+ int16x8_t left_dup;
+
+ left_dup = vdupq_lane_s16(left_s16d, 0);
+ tm_8_kernel(&dst, stride, left_dup, sub);
+ left_dup = vdupq_lane_s16(left_s16d, 1);
+ tm_8_kernel(&dst, stride, left_dup, sub);
+ left_dup = vdupq_lane_s16(left_s16d, 2);
+ tm_8_kernel(&dst, stride, left_dup, sub);
+ left_dup = vdupq_lane_s16(left_s16d, 3);
+ tm_8_kernel(&dst, stride, left_dup, sub);
}
}
+static INLINE void tm_16_kernel(uint8_t **dst, const ptrdiff_t stride,
+ const int16x8_t left_dup, const int16x8_t sub0,
+ const int16x8_t sub1) {
+ const int16x8_t sum0 = vaddq_s16(left_dup, sub0);
+ const int16x8_t sum1 = vaddq_s16(left_dup, sub1);
+ const uint8x8_t d0 = vqmovun_s16(sum0);
+ const uint8x8_t d1 = vqmovun_s16(sum1);
+ vst1_u8(*dst, d0);
+ *dst += 8;
+ vst1_u8(*dst, d1);
+ *dst += stride - 8;
+}
+
void vpx_tm_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
- int j, k;
- uint16x8_t q0u16, q2u16, q3u16, q8u16, q10u16;
- uint8x16_t q0u8, q1u8;
- int16x8_t q0s16, q1s16, q8s16, q11s16;
- uint16x4_t d20u16;
- uint8x8_t d2u8, d3u8, d18u8, d22u8, d23u8;
-
- q0u8 = vld1q_dup_u8(above - 1);
- q1u8 = vld1q_u8(above);
- q2u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q0u8));
- q3u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q0u8));
- for (k = 0; k < 2; k++, left += 8) {
- d18u8 = vld1_u8(left);
- q10u16 = vmovl_u8(d18u8);
- d20u16 = vget_low_u16(q10u16);
- for (j = 0; j < 2; j++, d20u16 = vget_high_u16(q10u16)) {
- q0u16 = vdupq_lane_u16(d20u16, 0);
- q8u16 = vdupq_lane_u16(d20u16, 1);
- q1s16 =
- vaddq_s16(vreinterpretq_s16_u16(q0u16), vreinterpretq_s16_u16(q2u16));
- q0s16 =
- vaddq_s16(vreinterpretq_s16_u16(q0u16), vreinterpretq_s16_u16(q3u16));
- q11s16 =
- vaddq_s16(vreinterpretq_s16_u16(q8u16), vreinterpretq_s16_u16(q2u16));
- q8s16 =
- vaddq_s16(vreinterpretq_s16_u16(q8u16), vreinterpretq_s16_u16(q3u16));
- d2u8 = vqmovun_s16(q1s16);
- d3u8 = vqmovun_s16(q0s16);
- d22u8 = vqmovun_s16(q11s16);
- d23u8 = vqmovun_s16(q8s16);
- vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d2u8));
- vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d3u8));
- dst += stride;
- vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d22u8));
- vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d23u8));
- dst += stride;
-
- q0u16 = vdupq_lane_u16(d20u16, 2);
- q8u16 = vdupq_lane_u16(d20u16, 3);
- q1s16 =
- vaddq_s16(vreinterpretq_s16_u16(q0u16), vreinterpretq_s16_u16(q2u16));
- q0s16 =
- vaddq_s16(vreinterpretq_s16_u16(q0u16), vreinterpretq_s16_u16(q3u16));
- q11s16 =
- vaddq_s16(vreinterpretq_s16_u16(q8u16), vreinterpretq_s16_u16(q2u16));
- q8s16 =
- vaddq_s16(vreinterpretq_s16_u16(q8u16), vreinterpretq_s16_u16(q3u16));
- d2u8 = vqmovun_s16(q1s16);
- d3u8 = vqmovun_s16(q0s16);
- d22u8 = vqmovun_s16(q11s16);
- d23u8 = vqmovun_s16(q8s16);
- vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d2u8));
- vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d3u8));
- dst += stride;
- vst1_u64((uint64_t *)dst, vreinterpret_u64_u8(d22u8));
- vst1_u64((uint64_t *)(dst + 8), vreinterpret_u64_u8(d23u8));
- dst += stride;
- }
+ const uint8x16_t top_left = vld1q_dup_u8(above - 1);
+ const uint8x16_t above_u8 = vld1q_u8(above);
+ const int16x8_t sub0 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_low_u8(above_u8), vget_low_u8(top_left)));
+ const int16x8_t sub1 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_high_u8(above_u8), vget_high_u8(top_left)));
+ int16x8_t left_dup;
+ int i;
+
+ for (i = 0; i < 2; i++, left += 8) {
+ const uint8x8_t left_u8 = vld1_u8(left);
+ const int16x8_t left_s16q = convert_u8_to_s16(left_u8);
+ const int16x4_t left_low = vget_low_s16(left_s16q);
+ const int16x4_t left_high = vget_high_s16(left_s16q);
+
+ left_dup = vdupq_lane_s16(left_low, 0);
+ tm_16_kernel(&dst, stride, left_dup, sub0, sub1);
+ left_dup = vdupq_lane_s16(left_low, 1);
+ tm_16_kernel(&dst, stride, left_dup, sub0, sub1);
+ left_dup = vdupq_lane_s16(left_low, 2);
+ tm_16_kernel(&dst, stride, left_dup, sub0, sub1);
+ left_dup = vdupq_lane_s16(left_low, 3);
+ tm_16_kernel(&dst, stride, left_dup, sub0, sub1);
+
+ left_dup = vdupq_lane_s16(left_high, 0);
+ tm_16_kernel(&dst, stride, left_dup, sub0, sub1);
+ left_dup = vdupq_lane_s16(left_high, 1);
+ tm_16_kernel(&dst, stride, left_dup, sub0, sub1);
+ left_dup = vdupq_lane_s16(left_high, 2);
+ tm_16_kernel(&dst, stride, left_dup, sub0, sub1);
+ left_dup = vdupq_lane_s16(left_high, 3);
+ tm_16_kernel(&dst, stride, left_dup, sub0, sub1);
}
}
+static INLINE void tm_32_kernel(uint8_t **dst, const ptrdiff_t stride,
+ const int16x8_t left_dup, const int16x8_t sub0,
+ const int16x8_t sub1, const int16x8_t sub2,
+ const int16x8_t sub3) {
+ const int16x8_t sum0 = vaddq_s16(left_dup, sub0);
+ const int16x8_t sum1 = vaddq_s16(left_dup, sub1);
+ const int16x8_t sum2 = vaddq_s16(left_dup, sub2);
+ const int16x8_t sum3 = vaddq_s16(left_dup, sub3);
+ const uint8x8_t d0 = vqmovun_s16(sum0);
+ const uint8x8_t d1 = vqmovun_s16(sum1);
+ const uint8x8_t d2 = vqmovun_s16(sum2);
+ const uint8x8_t d3 = vqmovun_s16(sum3);
+
+ vst1q_u8(*dst, vcombine_u8(d0, d1));
+ *dst += 16;
+ vst1q_u8(*dst, vcombine_u8(d2, d3));
+ *dst += stride - 16;
+}
+
void vpx_tm_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
- int j, k;
- uint16x8_t q0u16, q3u16, q8u16, q9u16, q10u16, q11u16;
- uint8x16_t q0u8, q1u8, q2u8;
- int16x8_t q12s16, q13s16, q14s16, q15s16;
- uint16x4_t d6u16;
- uint8x8_t d0u8, d1u8, d2u8, d3u8, d26u8;
-
- q0u8 = vld1q_dup_u8(above - 1);
- q1u8 = vld1q_u8(above);
- q2u8 = vld1q_u8(above + 16);
- q8u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q0u8));
- q9u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q0u8));
- q10u16 = vsubl_u8(vget_low_u8(q2u8), vget_low_u8(q0u8));
- q11u16 = vsubl_u8(vget_high_u8(q2u8), vget_high_u8(q0u8));
- for (k = 0; k < 4; k++, left += 8) {
- d26u8 = vld1_u8(left);
- q3u16 = vmovl_u8(d26u8);
- d6u16 = vget_low_u16(q3u16);
- for (j = 0; j < 2; j++, d6u16 = vget_high_u16(q3u16)) {
- q0u16 = vdupq_lane_u16(d6u16, 0);
- q12s16 =
- vaddq_s16(vreinterpretq_s16_u16(q0u16), vreinterpretq_s16_u16(q8u16));
- q13s16 =
- vaddq_s16(vreinterpretq_s16_u16(q0u16), vreinterpretq_s16_u16(q9u16));
- q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
- vreinterpretq_s16_u16(q10u16));
- q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
- vreinterpretq_s16_u16(q11u16));
- d0u8 = vqmovun_s16(q12s16);
- d1u8 = vqmovun_s16(q13s16);
- d2u8 = vqmovun_s16(q14s16);
- d3u8 = vqmovun_s16(q15s16);
- q0u8 = vcombine_u8(d0u8, d1u8);
- q1u8 = vcombine_u8(d2u8, d3u8);
- vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8));
- vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8));
- dst += stride;
-
- q0u16 = vdupq_lane_u16(d6u16, 1);
- q12s16 =
- vaddq_s16(vreinterpretq_s16_u16(q0u16), vreinterpretq_s16_u16(q8u16));
- q13s16 =
- vaddq_s16(vreinterpretq_s16_u16(q0u16), vreinterpretq_s16_u16(q9u16));
- q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
- vreinterpretq_s16_u16(q10u16));
- q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
- vreinterpretq_s16_u16(q11u16));
- d0u8 = vqmovun_s16(q12s16);
- d1u8 = vqmovun_s16(q13s16);
- d2u8 = vqmovun_s16(q14s16);
- d3u8 = vqmovun_s16(q15s16);
- q0u8 = vcombine_u8(d0u8, d1u8);
- q1u8 = vcombine_u8(d2u8, d3u8);
- vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8));
- vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8));
- dst += stride;
-
- q0u16 = vdupq_lane_u16(d6u16, 2);
- q12s16 =
- vaddq_s16(vreinterpretq_s16_u16(q0u16), vreinterpretq_s16_u16(q8u16));
- q13s16 =
- vaddq_s16(vreinterpretq_s16_u16(q0u16), vreinterpretq_s16_u16(q9u16));
- q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
- vreinterpretq_s16_u16(q10u16));
- q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
- vreinterpretq_s16_u16(q11u16));
- d0u8 = vqmovun_s16(q12s16);
- d1u8 = vqmovun_s16(q13s16);
- d2u8 = vqmovun_s16(q14s16);
- d3u8 = vqmovun_s16(q15s16);
- q0u8 = vcombine_u8(d0u8, d1u8);
- q1u8 = vcombine_u8(d2u8, d3u8);
- vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8));
- vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8));
- dst += stride;
-
- q0u16 = vdupq_lane_u16(d6u16, 3);
- q12s16 =
- vaddq_s16(vreinterpretq_s16_u16(q0u16), vreinterpretq_s16_u16(q8u16));
- q13s16 =
- vaddq_s16(vreinterpretq_s16_u16(q0u16), vreinterpretq_s16_u16(q9u16));
- q14s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
- vreinterpretq_s16_u16(q10u16));
- q15s16 = vaddq_s16(vreinterpretq_s16_u16(q0u16),
- vreinterpretq_s16_u16(q11u16));
- d0u8 = vqmovun_s16(q12s16);
- d1u8 = vqmovun_s16(q13s16);
- d2u8 = vqmovun_s16(q14s16);
- d3u8 = vqmovun_s16(q15s16);
- q0u8 = vcombine_u8(d0u8, d1u8);
- q1u8 = vcombine_u8(d2u8, d3u8);
- vst1q_u64((uint64_t *)dst, vreinterpretq_u64_u8(q0u8));
- vst1q_u64((uint64_t *)(dst + 16), vreinterpretq_u64_u8(q1u8));
- dst += stride;
+ const uint8x16_t top_left = vld1q_dup_u8(above - 1);
+ const uint8x16_t above_low = vld1q_u8(above);
+ const uint8x16_t above_high = vld1q_u8(above + 16);
+ const int16x8_t sub0 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_low_u8(above_low), vget_low_u8(top_left)));
+ const int16x8_t sub1 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_high_u8(above_low), vget_high_u8(top_left)));
+ const int16x8_t sub2 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_low_u8(above_high), vget_low_u8(top_left)));
+ const int16x8_t sub3 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_high_u8(above_high), vget_high_u8(top_left)));
+ int16x8_t left_dup;
+ int i, j;
+
+ for (j = 0; j < 4; j++, left += 8) {
+ const uint8x8_t left_u8 = vld1_u8(left);
+ const int16x8_t left_s16q = convert_u8_to_s16(left_u8);
+ int16x4_t left_s16d = vget_low_s16(left_s16q);
+ for (i = 0; i < 2; i++, left_s16d = vget_high_s16(left_s16q)) {
+ left_dup = vdupq_lane_s16(left_s16d, 0);
+ tm_32_kernel(&dst, stride, left_dup, sub0, sub1, sub2, sub3);
+ left_dup = vdupq_lane_s16(left_s16d, 1);
+ tm_32_kernel(&dst, stride, left_dup, sub0, sub1, sub2, sub3);
+ left_dup = vdupq_lane_s16(left_s16d, 2);
+ tm_32_kernel(&dst, stride, left_dup, sub0, sub1, sub2, sub3);
+ left_dup = vdupq_lane_s16(left_s16d, 3);
+ tm_32_kernel(&dst, stride, left_dup, sub0, sub1, sub2, sub3);
}
}
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/loopfilter_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/loopfilter_neon.c
index fc080163bb4..7419cea022d 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/loopfilter_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/loopfilter_neon.c
@@ -423,8 +423,8 @@ static INLINE void apply_15_tap_filter_16(
filter = vand##r##s8(filter, vreinterpret##r##s8_u8(mask)); \
\
/* save bottom 3 bits so that we round one side +4 and the other +3 */ \
- /* if it equals 4 we'll set to adjust by -1 to account for the fact */ \
- /* we'd round 3 the other way */ \
+ /* if it equals 4 we'll set it to adjust by -1 to account for the fact */ \
+ /* we'd round it by 3 the other way */ \
filter1 = vshr##r##n_s8(vqadd##r##s8(filter, vdup##r##n_s8(4)), 3); \
filter2 = vshr##r##n_s8(vqadd##r##s8(filter, vdup##r##n_s8(3)), 3); \
\
@@ -909,7 +909,7 @@ void vpx_lpf_vertical_8_neon(uint8_t *s, int p, const uint8_t *blimit,
p0, q0, q1, q2, q3, &flat, &flat_status, &hev);
filter8_8(mask, flat, flat_status, hev, p3, p2, p1, p0, q0, q1, q2, q3, &op2,
&op1, &op0, &oq0, &oq1, &oq2);
- // Note: tranpose + store_8x8() is faster than store_6x8().
+ // Note: transpose + store_8x8() is faster than store_6x8().
transpose_u8_8x8(&p3, &op2, &op1, &op0, &oq0, &oq1, &oq2, &q3);
store_8x8(s - 4, p, p3, op2, op1, op0, oq0, oq1, oq2, q3);
}
@@ -934,7 +934,7 @@ void vpx_lpf_vertical_8_dual_neon(uint8_t *s, int p, const uint8_t *blimit0,
p0, q0, q1, q2, q3, &flat, &flat_status, &hev);
filter8_16(mask, flat, flat_status, hev, p3, p2, p1, p0, q0, q1, q2, q3, &op2,
&op1, &op0, &oq0, &oq1, &oq2);
- // Note: store_6x8() twice is faster than tranpose + store_8x16().
+ // Note: store_6x8() twice is faster than transpose + store_8x16().
store_6x8(s, p, vget_low_u8(op2), vget_low_u8(op1), vget_low_u8(op0),
vget_low_u8(oq0), vget_low_u8(oq1), vget_low_u8(oq2));
store_6x8(s + 8 * p, p, vget_high_u8(op2), vget_high_u8(op1),
@@ -1037,7 +1037,7 @@ void vpx_lpf_vertical_16_neon(uint8_t *s, int p, const uint8_t *blimit,
&s6, &s7);
store_16x8(s, p, s0, s1, s2, s3, s4, s5, s6, s7);
} else {
- // Note: tranpose + store_8x8() is faster than store_6x8().
+ // Note: transpose + store_8x8() is faster than store_6x8().
transpose_u8_8x8(&p3, &op2, &op1, &op0, &oq0, &oq1, &oq2, &q3);
store_8x8(s + 4, p, p3, op2, op1, op0, oq0, oq1, oq2, q3);
}
@@ -1074,7 +1074,7 @@ void vpx_lpf_vertical_16_dual_neon(uint8_t *s, int p, const uint8_t *blimit,
store_16x16(s, p, s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12,
s13, s14, s15);
} else {
- // Note: store_6x8() twice is faster than tranpose + store_8x16().
+ // Note: store_6x8() twice is faster than transpose + store_8x16().
s += 8;
store_6x8(s, p, vget_low_u8(op2), vget_low_u8(op1), vget_low_u8(op0),
vget_low_u8(oq0), vget_low_u8(oq1), vget_low_u8(oq2));
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/transpose_neon.h b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/transpose_neon.h
index 55188c5bc21..445add29689 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/transpose_neon.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/transpose_neon.h
@@ -39,6 +39,15 @@ static INLINE uint8x16x2_t vpx_vtrnq_u64(uint32x4_t a0, uint32x4_t a1) {
return b0;
}
+static INLINE uint16x8x2_t vpx_vtrnq_u64_to_u16(uint32x4_t a0, uint32x4_t a1) {
+ uint16x8x2_t b0;
+ b0.val[0] = vcombine_u16(vreinterpret_u16_u32(vget_low_u32(a0)),
+ vreinterpret_u16_u32(vget_low_u32(a1)));
+ b0.val[1] = vcombine_u16(vreinterpret_u16_u32(vget_high_u32(a0)),
+ vreinterpret_u16_u32(vget_high_u32(a1)));
+ return b0;
+}
+
static INLINE void transpose_u8_4x4(uint8x8_t *a0, uint8x8_t *a1) {
// Swap 16 bit elements. Goes from:
// a0: 00 01 02 03 10 11 12 13
@@ -68,6 +77,70 @@ static INLINE void transpose_u8_4x4(uint8x8_t *a0, uint8x8_t *a1) {
*a1 = d0.val[1];
}
+static INLINE void transpose_s16_4x4d(int16x4_t *a0, int16x4_t *a1,
+ int16x4_t *a2, int16x4_t *a3) {
+ // Swap 16 bit elements. Goes from:
+ // a0: 00 01 02 03
+ // a1: 10 11 12 13
+ // a2: 20 21 22 23
+ // a3: 30 31 32 33
+ // to:
+ // b0.val[0]: 00 10 02 12
+ // b0.val[1]: 01 11 03 13
+ // b1.val[0]: 20 30 22 32
+ // b1.val[1]: 21 31 23 33
+
+ const int16x4x2_t b0 = vtrn_s16(*a0, *a1);
+ const int16x4x2_t b1 = vtrn_s16(*a2, *a3);
+
+ // Swap 32 bit elements resulting in:
+ // c0.val[0]: 00 10 20 30
+ // c0.val[1]: 02 12 22 32
+ // c1.val[0]: 01 11 21 31
+ // c1.val[1]: 03 13 23 33
+
+ const int32x2x2_t c0 = vtrn_s32(vreinterpret_s32_s16(b0.val[0]),
+ vreinterpret_s32_s16(b1.val[0]));
+ const int32x2x2_t c1 = vtrn_s32(vreinterpret_s32_s16(b0.val[1]),
+ vreinterpret_s32_s16(b1.val[1]));
+
+ *a0 = vreinterpret_s16_s32(c0.val[0]);
+ *a1 = vreinterpret_s16_s32(c1.val[0]);
+ *a2 = vreinterpret_s16_s32(c0.val[1]);
+ *a3 = vreinterpret_s16_s32(c1.val[1]);
+}
+
+static INLINE void transpose_u16_4x4q(uint16x8_t *a0, uint16x8_t *a1) {
+ // Swap 32 bit elements. Goes from:
+ // a0: 00 01 02 03 10 11 12 13
+ // a1: 20 21 22 23 30 31 32 33
+ // to:
+ // b0.val[0]: 00 01 20 21 10 11 30 31
+ // b0.val[1]: 02 03 22 23 12 13 32 33
+
+ const uint32x4x2_t b0 =
+ vtrnq_u32(vreinterpretq_u32_u16(*a0), vreinterpretq_u32_u16(*a1));
+
+ // Swap 64 bit elements resulting in:
+ // c0.val[0]: 00 01 20 21 02 03 22 23
+ // c0.val[1]: 10 11 30 31 12 13 32 33
+
+ const uint32x4_t c0 =
+ vcombine_u32(vget_low_u32(b0.val[0]), vget_low_u32(b0.val[1]));
+ const uint32x4_t c1 =
+ vcombine_u32(vget_high_u32(b0.val[0]), vget_high_u32(b0.val[1]));
+
+ // Swap 16 bit elements resulting in:
+ // d0.val[0]: 00 10 20 30 02 12 22 32
+ // d0.val[1]: 01 11 21 31 03 13 23 33
+
+ const uint16x8x2_t d0 =
+ vtrnq_u16(vreinterpretq_u16_u32(c0), vreinterpretq_u16_u32(c1));
+
+ *a0 = d0.val[0];
+ *a1 = d0.val[1];
+}
+
static INLINE void transpose_u8_8x4(uint8x8_t *a0, uint8x8_t *a1, uint8x8_t *a2,
uint8x8_t *a3) {
// Swap 8 bit elements. Goes from:
@@ -101,6 +174,39 @@ static INLINE void transpose_u8_8x4(uint8x8_t *a0, uint8x8_t *a1, uint8x8_t *a2,
*a3 = vreinterpret_u8_u16(c1.val[1]);
}
+static INLINE void transpose_u16_8x4(uint16x8_t *a0, uint16x8_t *a1,
+ uint16x8_t *a2, uint16x8_t *a3) {
+ // Swap 16 bit elements. Goes from:
+ // a0: 00 01 02 03 04 05 06 07
+ // a1: 10 11 12 13 14 15 16 17
+ // a2: 20 21 22 23 24 25 26 27
+ // a3: 30 31 32 33 34 35 36 37
+ // to:
+ // b0.val[0]: 00 10 02 12 04 14 06 16
+ // b0.val[1]: 01 11 03 13 05 15 07 17
+ // b1.val[0]: 20 30 22 32 24 34 26 36
+ // b1.val[1]: 21 31 23 33 25 35 27 37
+
+ const uint16x8x2_t b0 = vtrnq_u16(*a0, *a1);
+ const uint16x8x2_t b1 = vtrnq_u16(*a2, *a3);
+
+ // Swap 32 bit elements resulting in:
+ // c0.val[0]: 00 10 20 30 04 14 24 34
+ // c0.val[1]: 02 12 22 32 06 16 26 36
+ // c1.val[0]: 01 11 21 31 05 15 25 35
+ // c1.val[1]: 03 13 23 33 07 17 27 37
+
+ const uint32x4x2_t c0 = vtrnq_u32(vreinterpretq_u32_u16(b0.val[0]),
+ vreinterpretq_u32_u16(b1.val[0]));
+ const uint32x4x2_t c1 = vtrnq_u32(vreinterpretq_u32_u16(b0.val[1]),
+ vreinterpretq_u32_u16(b1.val[1]));
+
+ *a0 = vreinterpretq_u16_u32(c0.val[0]);
+ *a1 = vreinterpretq_u16_u32(c1.val[0]);
+ *a2 = vreinterpretq_u16_u32(c0.val[1]);
+ *a3 = vreinterpretq_u16_u32(c1.val[1]);
+}
+
// Note: Using 'd' registers or 'q' registers has almost identical speed. We use
// 'q' registers here to save some instructions.
static INLINE void transpose_u8_8x8(uint8x8_t *a0, uint8x8_t *a1, uint8x8_t *a2,
@@ -228,6 +334,77 @@ static INLINE void transpose_s16_8x8(int16x8_t *a0, int16x8_t *a1,
*a7 = d3.val[1];
}
+static INLINE void transpose_u16_8x8(uint16x8_t *a0, uint16x8_t *a1,
+ uint16x8_t *a2, uint16x8_t *a3,
+ uint16x8_t *a4, uint16x8_t *a5,
+ uint16x8_t *a6, uint16x8_t *a7) {
+ // Swap 16 bit elements. Goes from:
+ // a0: 00 01 02 03 04 05 06 07
+ // a1: 10 11 12 13 14 15 16 17
+ // a2: 20 21 22 23 24 25 26 27
+ // a3: 30 31 32 33 34 35 36 37
+ // a4: 40 41 42 43 44 45 46 47
+ // a5: 50 51 52 53 54 55 56 57
+ // a6: 60 61 62 63 64 65 66 67
+ // a7: 70 71 72 73 74 75 76 77
+ // to:
+ // b0.val[0]: 00 10 02 12 04 14 06 16
+ // b0.val[1]: 01 11 03 13 05 15 07 17
+ // b1.val[0]: 20 30 22 32 24 34 26 36
+ // b1.val[1]: 21 31 23 33 25 35 27 37
+ // b2.val[0]: 40 50 42 52 44 54 46 56
+ // b2.val[1]: 41 51 43 53 45 55 47 57
+ // b3.val[0]: 60 70 62 72 64 74 66 76
+ // b3.val[1]: 61 71 63 73 65 75 67 77
+
+ const uint16x8x2_t b0 = vtrnq_u16(*a0, *a1);
+ const uint16x8x2_t b1 = vtrnq_u16(*a2, *a3);
+ const uint16x8x2_t b2 = vtrnq_u16(*a4, *a5);
+ const uint16x8x2_t b3 = vtrnq_u16(*a6, *a7);
+
+ // Swap 32 bit elements resulting in:
+ // c0.val[0]: 00 10 20 30 04 14 24 34
+ // c0.val[1]: 02 12 22 32 06 16 26 36
+ // c1.val[0]: 01 11 21 31 05 15 25 35
+ // c1.val[1]: 03 13 23 33 07 17 27 37
+ // c2.val[0]: 40 50 60 70 44 54 64 74
+ // c2.val[1]: 42 52 62 72 46 56 66 76
+ // c3.val[0]: 41 51 61 71 45 55 65 75
+ // c3.val[1]: 43 53 63 73 47 57 67 77
+
+ const uint32x4x2_t c0 = vtrnq_u32(vreinterpretq_u32_u16(b0.val[0]),
+ vreinterpretq_u32_u16(b1.val[0]));
+ const uint32x4x2_t c1 = vtrnq_u32(vreinterpretq_u32_u16(b0.val[1]),
+ vreinterpretq_u32_u16(b1.val[1]));
+ const uint32x4x2_t c2 = vtrnq_u32(vreinterpretq_u32_u16(b2.val[0]),
+ vreinterpretq_u32_u16(b3.val[0]));
+ const uint32x4x2_t c3 = vtrnq_u32(vreinterpretq_u32_u16(b2.val[1]),
+ vreinterpretq_u32_u16(b3.val[1]));
+
+ // Swap 64 bit elements resulting in:
+ // d0.val[0]: 00 10 20 30 40 50 60 70
+ // d0.val[1]: 04 14 24 34 44 54 64 74
+ // d1.val[0]: 01 11 21 31 41 51 61 71
+ // d1.val[1]: 05 15 25 35 45 55 65 75
+ // d2.val[0]: 02 12 22 32 42 52 62 72
+ // d2.val[1]: 06 16 26 36 46 56 66 76
+ // d3.val[0]: 03 13 23 33 43 53 63 73
+ // d3.val[1]: 07 17 27 37 47 57 67 77
+ const uint16x8x2_t d0 = vpx_vtrnq_u64_to_u16(c0.val[0], c2.val[0]);
+ const uint16x8x2_t d1 = vpx_vtrnq_u64_to_u16(c1.val[0], c3.val[0]);
+ const uint16x8x2_t d2 = vpx_vtrnq_u64_to_u16(c0.val[1], c2.val[1]);
+ const uint16x8x2_t d3 = vpx_vtrnq_u64_to_u16(c1.val[1], c3.val[1]);
+
+ *a0 = d0.val[0];
+ *a1 = d1.val[0];
+ *a2 = d2.val[0];
+ *a3 = d3.val[0];
+ *a4 = d0.val[1];
+ *a5 = d1.val[1];
+ *a6 = d2.val[1];
+ *a7 = d3.val[1];
+}
+
static INLINE void transpose_u8_16x8(
const uint8x16_t i0, const uint8x16_t i1, const uint8x16_t i2,
const uint8x16_t i3, const uint8x16_t i4, const uint8x16_t i5,
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/vpx_convolve8_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/vpx_convolve8_neon.c
index e16d33718aa..1386838eea6 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/vpx_convolve8_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/vpx_convolve8_neon.c
@@ -820,14 +820,14 @@ void vpx_convolve8_vert_neon(const uint8_t *src, ptrdiff_t src_stride,
s10 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s)));
s += src_stride;
- __builtin_prefetch(dst + 0 * dst_stride);
- __builtin_prefetch(dst + 1 * dst_stride);
- __builtin_prefetch(dst + 2 * dst_stride);
- __builtin_prefetch(dst + 3 * dst_stride);
- __builtin_prefetch(src + 0 * src_stride);
- __builtin_prefetch(src + 1 * src_stride);
- __builtin_prefetch(src + 2 * src_stride);
- __builtin_prefetch(src + 3 * src_stride);
+ __builtin_prefetch(d + 0 * dst_stride);
+ __builtin_prefetch(d + 1 * dst_stride);
+ __builtin_prefetch(d + 2 * dst_stride);
+ __builtin_prefetch(d + 3 * dst_stride);
+ __builtin_prefetch(s + 0 * src_stride);
+ __builtin_prefetch(s + 1 * src_stride);
+ __builtin_prefetch(s + 2 * src_stride);
+ __builtin_prefetch(s + 3 * src_stride);
d0 = convolve8_8(s0, s1, s2, s3, s4, s5, s6, s7, filters, filter3,
filter4);
d1 = convolve8_8(s1, s2, s3, s4, s5, s6, s7, s8, filters, filter3,
@@ -1002,14 +1002,14 @@ void vpx_convolve8_avg_vert_neon(const uint8_t *src, ptrdiff_t src_stride,
s10 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s)));
s += src_stride;
- __builtin_prefetch(dst + 0 * dst_stride);
- __builtin_prefetch(dst + 1 * dst_stride);
- __builtin_prefetch(dst + 2 * dst_stride);
- __builtin_prefetch(dst + 3 * dst_stride);
- __builtin_prefetch(src + 0 * src_stride);
- __builtin_prefetch(src + 1 * src_stride);
- __builtin_prefetch(src + 2 * src_stride);
- __builtin_prefetch(src + 3 * src_stride);
+ __builtin_prefetch(d + 0 * dst_stride);
+ __builtin_prefetch(d + 1 * dst_stride);
+ __builtin_prefetch(d + 2 * dst_stride);
+ __builtin_prefetch(d + 3 * dst_stride);
+ __builtin_prefetch(s + 0 * src_stride);
+ __builtin_prefetch(s + 1 * src_stride);
+ __builtin_prefetch(s + 2 * src_stride);
+ __builtin_prefetch(s + 3 * src_stride);
d0 = convolve8_8(s0, s1, s2, s3, s4, s5, s6, s7, filters, filter3,
filter4);
d1 = convolve8_8(s1, s2, s3, s4, s5, s6, s7, s8, filters, filter3,
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/vpx_convolve_neon.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/vpx_convolve_neon.c
index 5d7fa54fcd4..6ca0e501b3c 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/vpx_convolve_neon.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/arm/vpx_convolve_neon.c
@@ -24,16 +24,15 @@ void vpx_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
DECLARE_ALIGNED(8, uint8_t, temp[64 * 72]);
// Account for the vertical phase needing 3 lines prior and 4 lines post
- int intermediate_height = h + 7;
+ const int intermediate_height = h + 7;
assert(y_step_q4 == 16);
assert(x_step_q4 == 16);
- /* Filter starting 3 lines back. The neon implementation will ignore the
- * given height and filter a multiple of 4 lines. Since this goes in to
- * the temp buffer which has lots of extra room and is subsequently discarded
- * this is safe if somewhat less than ideal.
- */
+ /* Filter starting 3 lines back. The neon implementation will ignore the given
+ * height and filter a multiple of 4 lines. Since this goes in to the temp
+ * buffer which has lots of extra room and is subsequently discarded this is
+ * safe if somewhat less than ideal. */
vpx_convolve8_horiz_neon(src - src_stride * 3, src_stride, temp, w, filter_x,
x_step_q4, filter_y, y_step_q4, w,
intermediate_height);
@@ -49,7 +48,7 @@ void vpx_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
const int16_t *filter_y, int y_step_q4, int w,
int h) {
DECLARE_ALIGNED(8, uint8_t, temp[64 * 72]);
- int intermediate_height = h + 7;
+ const int intermediate_height = h + 7;
assert(y_step_q4 == 16);
assert(x_step_q4 == 16);
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/fwd_txfm.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/fwd_txfm.c
index 4e7d4053ea9..aa59601094d 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/fwd_txfm.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/fwd_txfm.c
@@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include <assert.h>
#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/fwd_txfm.h"
@@ -21,36 +22,37 @@ void vpx_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) {
int pass;
// We need an intermediate buffer between passes.
tran_low_t intermediate[4 * 4];
- const int16_t *in_pass0 = input;
- const tran_low_t *in = NULL;
+ const tran_low_t *in_low = NULL;
tran_low_t *out = intermediate;
// Do the two transform/transpose passes
for (pass = 0; pass < 2; ++pass) {
- tran_high_t input[4]; // canbe16
+ tran_high_t in_high[4]; // canbe16
tran_high_t step[4]; // canbe16
tran_high_t temp1, temp2; // needs32
int i;
for (i = 0; i < 4; ++i) {
// Load inputs.
- if (0 == pass) {
- input[0] = in_pass0[0 * stride] * 16;
- input[1] = in_pass0[1 * stride] * 16;
- input[2] = in_pass0[2 * stride] * 16;
- input[3] = in_pass0[3 * stride] * 16;
- if (i == 0 && input[0]) {
- input[0] += 1;
+ if (pass == 0) {
+ in_high[0] = input[0 * stride] * 16;
+ in_high[1] = input[1 * stride] * 16;
+ in_high[2] = input[2 * stride] * 16;
+ in_high[3] = input[3 * stride] * 16;
+ if (i == 0 && in_high[0]) {
+ ++in_high[0];
}
} else {
- input[0] = in[0 * 4];
- input[1] = in[1 * 4];
- input[2] = in[2 * 4];
- input[3] = in[3 * 4];
+ assert(in_low != NULL);
+ in_high[0] = in_low[0 * 4];
+ in_high[1] = in_low[1 * 4];
+ in_high[2] = in_low[2 * 4];
+ in_high[3] = in_low[3 * 4];
+ ++in_low;
}
// Transform.
- step[0] = input[0] + input[3];
- step[1] = input[1] + input[2];
- step[2] = input[1] - input[2];
- step[3] = input[0] - input[3];
+ step[0] = in_high[0] + in_high[3];
+ step[1] = in_high[1] + in_high[2];
+ step[2] = in_high[1] - in_high[2];
+ step[3] = in_high[0] - in_high[3];
temp1 = (step[0] + step[1]) * cospi_16_64;
temp2 = (step[0] - step[1]) * cospi_16_64;
out[0] = (tran_low_t)fdct_round_shift(temp1);
@@ -60,12 +62,11 @@ void vpx_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) {
out[1] = (tran_low_t)fdct_round_shift(temp1);
out[3] = (tran_low_t)fdct_round_shift(temp2);
// Do next column (which is a transposed row in second/horizontal pass)
- in_pass0++;
- in++;
+ ++input;
out += 4;
}
// Setup in/out for next pass.
- in = intermediate;
+ in_low = intermediate;
out = output;
}
@@ -99,7 +100,6 @@ void vpx_fdct8x8_c(const int16_t *input, tran_low_t *final_output, int stride) {
tran_high_t t0, t1, t2, t3; // needs32
tran_high_t x0, x1, x2, x3; // canbe16
- int i;
for (i = 0; i < 8; i++) {
// stage 1
if (pass == 0) {
@@ -190,56 +190,57 @@ void vpx_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) {
int pass;
// We need an intermediate buffer between passes.
tran_low_t intermediate[256];
- const int16_t *in_pass0 = input;
- const tran_low_t *in = NULL;
+ const tran_low_t *in_low = NULL;
tran_low_t *out = intermediate;
// Do the two transform/transpose passes
for (pass = 0; pass < 2; ++pass) {
tran_high_t step1[8]; // canbe16
tran_high_t step2[8]; // canbe16
tran_high_t step3[8]; // canbe16
- tran_high_t input[8]; // canbe16
+ tran_high_t in_high[8]; // canbe16
tran_high_t temp1, temp2; // needs32
int i;
for (i = 0; i < 16; i++) {
if (0 == pass) {
// Calculate input for the first 8 results.
- input[0] = (in_pass0[0 * stride] + in_pass0[15 * stride]) * 4;
- input[1] = (in_pass0[1 * stride] + in_pass0[14 * stride]) * 4;
- input[2] = (in_pass0[2 * stride] + in_pass0[13 * stride]) * 4;
- input[3] = (in_pass0[3 * stride] + in_pass0[12 * stride]) * 4;
- input[4] = (in_pass0[4 * stride] + in_pass0[11 * stride]) * 4;
- input[5] = (in_pass0[5 * stride] + in_pass0[10 * stride]) * 4;
- input[6] = (in_pass0[6 * stride] + in_pass0[9 * stride]) * 4;
- input[7] = (in_pass0[7 * stride] + in_pass0[8 * stride]) * 4;
+ in_high[0] = (input[0 * stride] + input[15 * stride]) * 4;
+ in_high[1] = (input[1 * stride] + input[14 * stride]) * 4;
+ in_high[2] = (input[2 * stride] + input[13 * stride]) * 4;
+ in_high[3] = (input[3 * stride] + input[12 * stride]) * 4;
+ in_high[4] = (input[4 * stride] + input[11 * stride]) * 4;
+ in_high[5] = (input[5 * stride] + input[10 * stride]) * 4;
+ in_high[6] = (input[6 * stride] + input[9 * stride]) * 4;
+ in_high[7] = (input[7 * stride] + input[8 * stride]) * 4;
// Calculate input for the next 8 results.
- step1[0] = (in_pass0[7 * stride] - in_pass0[8 * stride]) * 4;
- step1[1] = (in_pass0[6 * stride] - in_pass0[9 * stride]) * 4;
- step1[2] = (in_pass0[5 * stride] - in_pass0[10 * stride]) * 4;
- step1[3] = (in_pass0[4 * stride] - in_pass0[11 * stride]) * 4;
- step1[4] = (in_pass0[3 * stride] - in_pass0[12 * stride]) * 4;
- step1[5] = (in_pass0[2 * stride] - in_pass0[13 * stride]) * 4;
- step1[6] = (in_pass0[1 * stride] - in_pass0[14 * stride]) * 4;
- step1[7] = (in_pass0[0 * stride] - in_pass0[15 * stride]) * 4;
+ step1[0] = (input[7 * stride] - input[8 * stride]) * 4;
+ step1[1] = (input[6 * stride] - input[9 * stride]) * 4;
+ step1[2] = (input[5 * stride] - input[10 * stride]) * 4;
+ step1[3] = (input[4 * stride] - input[11 * stride]) * 4;
+ step1[4] = (input[3 * stride] - input[12 * stride]) * 4;
+ step1[5] = (input[2 * stride] - input[13 * stride]) * 4;
+ step1[6] = (input[1 * stride] - input[14 * stride]) * 4;
+ step1[7] = (input[0 * stride] - input[15 * stride]) * 4;
} else {
// Calculate input for the first 8 results.
- input[0] = ((in[0 * 16] + 1) >> 2) + ((in[15 * 16] + 1) >> 2);
- input[1] = ((in[1 * 16] + 1) >> 2) + ((in[14 * 16] + 1) >> 2);
- input[2] = ((in[2 * 16] + 1) >> 2) + ((in[13 * 16] + 1) >> 2);
- input[3] = ((in[3 * 16] + 1) >> 2) + ((in[12 * 16] + 1) >> 2);
- input[4] = ((in[4 * 16] + 1) >> 2) + ((in[11 * 16] + 1) >> 2);
- input[5] = ((in[5 * 16] + 1) >> 2) + ((in[10 * 16] + 1) >> 2);
- input[6] = ((in[6 * 16] + 1) >> 2) + ((in[9 * 16] + 1) >> 2);
- input[7] = ((in[7 * 16] + 1) >> 2) + ((in[8 * 16] + 1) >> 2);
+ assert(in_low != NULL);
+ in_high[0] = ((in_low[0 * 16] + 1) >> 2) + ((in_low[15 * 16] + 1) >> 2);
+ in_high[1] = ((in_low[1 * 16] + 1) >> 2) + ((in_low[14 * 16] + 1) >> 2);
+ in_high[2] = ((in_low[2 * 16] + 1) >> 2) + ((in_low[13 * 16] + 1) >> 2);
+ in_high[3] = ((in_low[3 * 16] + 1) >> 2) + ((in_low[12 * 16] + 1) >> 2);
+ in_high[4] = ((in_low[4 * 16] + 1) >> 2) + ((in_low[11 * 16] + 1) >> 2);
+ in_high[5] = ((in_low[5 * 16] + 1) >> 2) + ((in_low[10 * 16] + 1) >> 2);
+ in_high[6] = ((in_low[6 * 16] + 1) >> 2) + ((in_low[9 * 16] + 1) >> 2);
+ in_high[7] = ((in_low[7 * 16] + 1) >> 2) + ((in_low[8 * 16] + 1) >> 2);
// Calculate input for the next 8 results.
- step1[0] = ((in[7 * 16] + 1) >> 2) - ((in[8 * 16] + 1) >> 2);
- step1[1] = ((in[6 * 16] + 1) >> 2) - ((in[9 * 16] + 1) >> 2);
- step1[2] = ((in[5 * 16] + 1) >> 2) - ((in[10 * 16] + 1) >> 2);
- step1[3] = ((in[4 * 16] + 1) >> 2) - ((in[11 * 16] + 1) >> 2);
- step1[4] = ((in[3 * 16] + 1) >> 2) - ((in[12 * 16] + 1) >> 2);
- step1[5] = ((in[2 * 16] + 1) >> 2) - ((in[13 * 16] + 1) >> 2);
- step1[6] = ((in[1 * 16] + 1) >> 2) - ((in[14 * 16] + 1) >> 2);
- step1[7] = ((in[0 * 16] + 1) >> 2) - ((in[15 * 16] + 1) >> 2);
+ step1[0] = ((in_low[7 * 16] + 1) >> 2) - ((in_low[8 * 16] + 1) >> 2);
+ step1[1] = ((in_low[6 * 16] + 1) >> 2) - ((in_low[9 * 16] + 1) >> 2);
+ step1[2] = ((in_low[5 * 16] + 1) >> 2) - ((in_low[10 * 16] + 1) >> 2);
+ step1[3] = ((in_low[4 * 16] + 1) >> 2) - ((in_low[11 * 16] + 1) >> 2);
+ step1[4] = ((in_low[3 * 16] + 1) >> 2) - ((in_low[12 * 16] + 1) >> 2);
+ step1[5] = ((in_low[2 * 16] + 1) >> 2) - ((in_low[13 * 16] + 1) >> 2);
+ step1[6] = ((in_low[1 * 16] + 1) >> 2) - ((in_low[14 * 16] + 1) >> 2);
+ step1[7] = ((in_low[0 * 16] + 1) >> 2) - ((in_low[15 * 16] + 1) >> 2);
+ in_low++;
}
// Work on the first eight values; fdct8(input, even_results);
{
@@ -248,14 +249,14 @@ void vpx_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) {
tran_high_t x0, x1, x2, x3; // canbe16
// stage 1
- s0 = input[0] + input[7];
- s1 = input[1] + input[6];
- s2 = input[2] + input[5];
- s3 = input[3] + input[4];
- s4 = input[3] - input[4];
- s5 = input[2] - input[5];
- s6 = input[1] - input[6];
- s7 = input[0] - input[7];
+ s0 = in_high[0] + in_high[7];
+ s1 = in_high[1] + in_high[6];
+ s2 = in_high[2] + in_high[5];
+ s3 = in_high[3] + in_high[4];
+ s4 = in_high[3] - in_high[4];
+ s5 = in_high[2] - in_high[5];
+ s6 = in_high[1] - in_high[6];
+ s7 = in_high[0] - in_high[7];
// fdct4(step, step);
x0 = s0 + s3;
@@ -350,12 +351,11 @@ void vpx_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) {
out[15] = (tran_low_t)fdct_round_shift(temp2);
}
// Do next column (which is a transposed row in second/horizontal pass)
- in++;
- in_pass0++;
+ input++;
out += 16;
}
// Setup in/out for next pass.
- in = intermediate;
+ in_low = intermediate;
out = output;
}
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/inv_txfm.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/inv_txfm.c
index 46ddd1da0d0..f3f543ddfe8 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/inv_txfm.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/inv_txfm.c
@@ -96,6 +96,7 @@ void vpx_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest, int dest_stride) {
void idct4_c(const tran_low_t *input, tran_low_t *output) {
tran_low_t step[4];
tran_high_t temp1, temp2;
+
// stage 1
temp1 = (input[0] + input[2]) * cospi_16_64;
temp2 = (input[0] - input[2]) * cospi_16_64;
@@ -114,9 +115,9 @@ void idct4_c(const tran_low_t *input, tran_low_t *output) {
}
void vpx_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
+ int i, j;
tran_low_t out[4 * 4];
tran_low_t *outptr = out;
- int i, j;
tran_low_t temp_in[4], temp_out[4];
// Rows
@@ -142,6 +143,7 @@ void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest,
int i;
tran_high_t a1;
tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64));
+
out = WRAPLOW(dct_const_round_shift(out * cospi_16_64));
a1 = ROUND_POWER_OF_TWO(out, 4);
@@ -157,6 +159,7 @@ void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest,
void idct8_c(const tran_low_t *input, tran_low_t *output) {
tran_low_t step1[8], step2[8];
tran_high_t temp1, temp2;
+
// stage 1
step1[0] = input[0];
step1[2] = input[4];
@@ -209,9 +212,9 @@ void idct8_c(const tran_low_t *input, tran_low_t *output) {
}
void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
+ int i, j;
tran_low_t out[8 * 8];
tran_low_t *outptr = out;
- int i, j;
tran_low_t temp_in[8], temp_out[8];
// First transform rows
@@ -236,6 +239,7 @@ void vpx_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
int i, j;
tran_high_t a1;
tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64));
+
out = WRAPLOW(dct_const_round_shift(out * cospi_16_64));
a1 = ROUND_POWER_OF_TWO(out, 5);
for (j = 0; j < 8; ++j) {
@@ -246,14 +250,13 @@ void vpx_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
void iadst4_c(const tran_low_t *input, tran_low_t *output) {
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
-
tran_low_t x0 = input[0];
tran_low_t x1 = input[1];
tran_low_t x2 = input[2];
tran_low_t x3 = input[3];
if (!(x0 | x1 | x2 | x3)) {
- output[0] = output[1] = output[2] = output[3] = 0;
+ memset(output, 0, 4 * sizeof(*output));
return;
}
@@ -283,7 +286,6 @@ void iadst4_c(const tran_low_t *input, tran_low_t *output) {
void iadst8_c(const tran_low_t *input, tran_low_t *output) {
int s0, s1, s2, s3, s4, s5, s6, s7;
-
tran_high_t x0 = input[7];
tran_high_t x1 = input[0];
tran_high_t x2 = input[5];
@@ -294,8 +296,7 @@ void iadst8_c(const tran_low_t *input, tran_low_t *output) {
tran_high_t x7 = input[6];
if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) {
- output[0] = output[1] = output[2] = output[3] = output[4] = output[5] =
- output[6] = output[7] = 0;
+ memset(output, 0, 8 * sizeof(*output));
return;
}
@@ -359,13 +360,13 @@ void iadst8_c(const tran_low_t *input, tran_low_t *output) {
}
void vpx_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
+ int i, j;
tran_low_t out[8 * 8] = { 0 };
tran_low_t *outptr = out;
- int i, j;
tran_low_t temp_in[8], temp_out[8];
// First transform rows
- // only first 4 row has non-zero coefs
+ // Only first 4 row has non-zero coefs
for (i = 0; i < 4; ++i) {
idct8_c(input, outptr);
input += 8;
@@ -550,9 +551,9 @@ void idct16_c(const tran_low_t *input, tran_low_t *output) {
void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest,
int stride) {
+ int i, j;
tran_low_t out[16 * 16];
tran_low_t *outptr = out;
- int i, j;
tran_low_t temp_in[16], temp_out[16];
// First transform rows
@@ -576,7 +577,6 @@ void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest,
void iadst16_c(const tran_low_t *input, tran_low_t *output) {
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8;
tran_high_t s9, s10, s11, s12, s13, s14, s15;
-
tran_high_t x0 = input[15];
tran_high_t x1 = input[0];
tran_high_t x2 = input[13];
@@ -596,9 +596,7 @@ void iadst16_c(const tran_low_t *input, tran_low_t *output) {
if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 | x9 | x10 | x11 | x12 |
x13 | x14 | x15)) {
- output[0] = output[1] = output[2] = output[3] = output[4] = output[5] =
- output[6] = output[7] = output[8] = output[9] = output[10] =
- output[11] = output[12] = output[13] = output[14] = output[15] = 0;
+ memset(output, 0, 16 * sizeof(*output));
return;
}
@@ -746,9 +744,9 @@ void iadst16_c(const tran_low_t *input, tran_low_t *output) {
void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest,
int stride) {
+ int i, j;
tran_low_t out[16 * 16] = { 0 };
tran_low_t *outptr = out;
- int i, j;
tran_low_t temp_in[16], temp_out[16];
// First transform rows. Since all non-zero dct coefficients are in
@@ -774,6 +772,7 @@ void vpx_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
int i, j;
tran_high_t a1;
tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64));
+
out = WRAPLOW(dct_const_round_shift(out * cospi_16_64));
a1 = ROUND_POWER_OF_TWO(out, 6);
for (j = 0; j < 16; ++j) {
@@ -1151,9 +1150,9 @@ void idct32_c(const tran_low_t *input, tran_low_t *output) {
void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest,
int stride) {
+ int i, j;
tran_low_t out[32 * 32];
tran_low_t *outptr = out;
- int i, j;
tran_low_t temp_in[32], temp_out[32];
// Rows
@@ -1188,13 +1187,13 @@ void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest,
void vpx_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest,
int stride) {
+ int i, j;
tran_low_t out[32 * 32] = { 0 };
tran_low_t *outptr = out;
- int i, j;
tran_low_t temp_in[32], temp_out[32];
// Rows
- // only upper-left 16x16 has non-zero coeff
+ // Only upper-left 16x16 has non-zero coeff
for (i = 0; i < 16; ++i) {
idct32_c(input, outptr);
input += 32;
@@ -1214,13 +1213,13 @@ void vpx_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest,
void vpx_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest,
int stride) {
+ int i, j;
tran_low_t out[32 * 32] = { 0 };
tran_low_t *outptr = out;
- int i, j;
tran_low_t temp_in[32], temp_out[32];
// Rows
- // only upper-left 8x8 has non-zero coeff
+ // Only upper-left 8x8 has non-zero coeff
for (i = 0; i < 8; ++i) {
idct32_c(input, outptr);
input += 32;
@@ -1241,8 +1240,8 @@ void vpx_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest,
void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
int i, j;
tran_high_t a1;
-
tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64));
+
out = WRAPLOW(dct_const_round_shift(out * cospi_16_64));
a1 = ROUND_POWER_OF_TWO(out, 6);
@@ -1373,12 +1372,12 @@ void vpx_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd) {
// stage 1
temp1 = (input[0] + input[2]) * cospi_16_64;
temp2 = (input[0] - input[2]) * cospi_16_64;
- step[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step[0] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step[1] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64;
temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64;
- step[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step[2] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step[3] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
// stage 2
output[0] = HIGHBD_WRAPLOW(step[0] + step[3], bd);
@@ -1389,9 +1388,9 @@ void vpx_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd) {
void vpx_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
+ int i, j;
tran_low_t out[4 * 4];
tran_low_t *outptr = out;
- int i, j;
tran_low_t temp_in[4], temp_out[4];
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
@@ -1418,10 +1417,10 @@ void vpx_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8,
int i;
tran_high_t a1;
tran_low_t out =
- HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[0] * cospi_16_64), bd);
+ HIGHBD_WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd);
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
- out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd);
+ out = HIGHBD_WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd);
a1 = ROUND_POWER_OF_TWO(out, 4);
for (i = 0; i < 4; i++) {
@@ -1452,12 +1451,12 @@ void vpx_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd) {
step1[3] = input[6];
temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64;
temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64;
- step1[4] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step1[7] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[4] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[7] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64;
temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64;
- step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
// stage 2 & stage 3 - even half
vpx_highbd_idct4_c(step1, step1, bd);
@@ -1472,8 +1471,8 @@ void vpx_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd) {
step1[4] = step2[4];
temp1 = (step2[6] - step2[5]) * cospi_16_64;
temp2 = (step2[5] + step2[6]) * cospi_16_64;
- step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
step1[7] = step2[7];
// stage 4
@@ -1489,20 +1488,20 @@ void vpx_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd) {
void vpx_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
+ int i, j;
tran_low_t out[8 * 8];
tran_low_t *outptr = out;
- int i, j;
tran_low_t temp_in[8], temp_out[8];
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
- // First transform rows.
+ // First transform rows
for (i = 0; i < 8; ++i) {
vpx_highbd_idct8_c(input, outptr, bd);
input += 8;
outptr += 8;
}
- // Then transform columns.
+ // Then transform columns
for (i = 0; i < 8; ++i) {
for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
vpx_highbd_idct8_c(temp_in, temp_out, bd);
@@ -1518,9 +1517,10 @@ void vpx_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8,
int i, j;
tran_high_t a1;
tran_low_t out =
- HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[0] * cospi_16_64), bd);
+ HIGHBD_WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd);
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
- out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd);
+
+ out = HIGHBD_WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd);
a1 = ROUND_POWER_OF_TWO(out, 5);
for (j = 0; j < 8; ++j) {
for (i = 0; i < 8; ++i) dest[i] = highbd_clip_pixel_add(dest[i], a1, bd);
@@ -1567,10 +1567,10 @@ void vpx_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd) {
// The overall dynamic range is 14b (input) + 14b (multiplication scaling)
// + 1b (addition) = 29b.
// Hence the output bit depth is 15b.
- output[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s3), bd);
- output[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 + s3), bd);
- output[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2), bd);
- output[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s1 - s3), bd);
+ output[0] = HIGHBD_WRAPLOW(dct_const_round_shift(s0 + s3), bd);
+ output[1] = HIGHBD_WRAPLOW(dct_const_round_shift(s1 + s3), bd);
+ output[2] = HIGHBD_WRAPLOW(dct_const_round_shift(s2), bd);
+ output[3] = HIGHBD_WRAPLOW(dct_const_round_shift(s0 + s1 - s3), bd);
}
void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) {
@@ -1608,14 +1608,14 @@ void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) {
s6 = cospi_26_64 * x6 + cospi_6_64 * x7;
s7 = cospi_6_64 * x6 - cospi_26_64 * x7;
- x0 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s4), bd);
- x1 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 + s5), bd);
- x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 + s6), bd);
- x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 + s7), bd);
- x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 - s4), bd);
- x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 - s5), bd);
- x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 - s6), bd);
- x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 - s7), bd);
+ x0 = HIGHBD_WRAPLOW(dct_const_round_shift(s0 + s4), bd);
+ x1 = HIGHBD_WRAPLOW(dct_const_round_shift(s1 + s5), bd);
+ x2 = HIGHBD_WRAPLOW(dct_const_round_shift(s2 + s6), bd);
+ x3 = HIGHBD_WRAPLOW(dct_const_round_shift(s3 + s7), bd);
+ x4 = HIGHBD_WRAPLOW(dct_const_round_shift(s0 - s4), bd);
+ x5 = HIGHBD_WRAPLOW(dct_const_round_shift(s1 - s5), bd);
+ x6 = HIGHBD_WRAPLOW(dct_const_round_shift(s2 - s6), bd);
+ x7 = HIGHBD_WRAPLOW(dct_const_round_shift(s3 - s7), bd);
// stage 2
s0 = x0;
@@ -1631,10 +1631,10 @@ void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) {
x1 = HIGHBD_WRAPLOW(s1 + s3, bd);
x2 = HIGHBD_WRAPLOW(s0 - s2, bd);
x3 = HIGHBD_WRAPLOW(s1 - s3, bd);
- x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 + s6), bd);
- x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 + s7), bd);
- x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 - s6), bd);
- x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 - s7), bd);
+ x4 = HIGHBD_WRAPLOW(dct_const_round_shift(s4 + s6), bd);
+ x5 = HIGHBD_WRAPLOW(dct_const_round_shift(s5 + s7), bd);
+ x6 = HIGHBD_WRAPLOW(dct_const_round_shift(s4 - s6), bd);
+ x7 = HIGHBD_WRAPLOW(dct_const_round_shift(s5 - s7), bd);
// stage 3
s2 = cospi_16_64 * (x2 + x3);
@@ -1642,10 +1642,10 @@ void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) {
s6 = cospi_16_64 * (x6 + x7);
s7 = cospi_16_64 * (x6 - x7);
- x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2), bd);
- x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3), bd);
- x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6), bd);
- x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7), bd);
+ x2 = HIGHBD_WRAPLOW(dct_const_round_shift(s2), bd);
+ x3 = HIGHBD_WRAPLOW(dct_const_round_shift(s3), bd);
+ x6 = HIGHBD_WRAPLOW(dct_const_round_shift(s6), bd);
+ x7 = HIGHBD_WRAPLOW(dct_const_round_shift(s7), bd);
output[0] = HIGHBD_WRAPLOW(x0, bd);
output[1] = HIGHBD_WRAPLOW(-x4, bd);
@@ -1657,22 +1657,23 @@ void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) {
output[7] = HIGHBD_WRAPLOW(-x1, bd);
}
-void vpx_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8,
+void vpx_highbd_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
+ int i, j;
tran_low_t out[8 * 8] = { 0 };
tran_low_t *outptr = out;
- int i, j;
tran_low_t temp_in[8], temp_out[8];
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
- // First transform rows.
- // Only first 4 row has non-zero coefs.
+ // First transform rows
+ // Only first 4 row has non-zero coefs
for (i = 0; i < 4; ++i) {
vpx_highbd_idct8_c(input, outptr, bd);
input += 8;
outptr += 8;
}
- // Then transform columns.
+
+ // Then transform columns
for (i = 0; i < 8; ++i) {
for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
vpx_highbd_idct8_c(temp_in, temp_out, bd);
@@ -1726,23 +1727,23 @@ void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) {
temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
- step2[8] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step2[15] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step2[8] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[15] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
- step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step2[9] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[14] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
- step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step2[10] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[13] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
- step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step2[11] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[12] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
// stage 3
step1[0] = step2[0];
@@ -1752,12 +1753,12 @@ void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) {
temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
- step1[4] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step1[7] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[4] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[7] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
- step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[9], bd);
step1[9] = HIGHBD_WRAPLOW(step2[8] - step2[9], bd);
@@ -1771,12 +1772,12 @@ void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) {
// stage 4
temp1 = (step1[0] + step1[1]) * cospi_16_64;
temp2 = (step1[0] - step1[1]) * cospi_16_64;
- step2[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step2[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step2[0] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[1] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
- step2[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step2[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step2[2] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[3] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd);
step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd);
step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd);
@@ -1786,12 +1787,12 @@ void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) {
step2[15] = step1[15];
temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
- step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step2[9] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[14] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
- step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step2[10] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[13] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
step2[11] = step1[11];
step2[12] = step1[12];
@@ -1803,8 +1804,8 @@ void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) {
step1[4] = step2[4];
temp1 = (step2[6] - step2[5]) * cospi_16_64;
temp2 = (step2[5] + step2[6]) * cospi_16_64;
- step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
step1[7] = step2[7];
step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[11], bd);
@@ -1829,12 +1830,12 @@ void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) {
step2[9] = step1[9];
temp1 = (-step1[10] + step1[13]) * cospi_16_64;
temp2 = (step1[10] + step1[13]) * cospi_16_64;
- step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step2[10] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[13] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
temp1 = (-step1[11] + step1[12]) * cospi_16_64;
temp2 = (step1[11] + step1[12]) * cospi_16_64;
- step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step2[11] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[12] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
step2[14] = step1[14];
step2[15] = step1[15];
@@ -1859,20 +1860,20 @@ void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) {
void vpx_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
+ int i, j;
tran_low_t out[16 * 16];
tran_low_t *outptr = out;
- int i, j;
tran_low_t temp_in[16], temp_out[16];
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
- // First transform rows.
+ // First transform rows
for (i = 0; i < 16; ++i) {
vpx_highbd_idct16_c(input, outptr, bd);
input += 16;
outptr += 16;
}
- // Then transform columns.
+ // Then transform columns
for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
vpx_highbd_idct16_c(temp_in, temp_out, bd);
@@ -1936,22 +1937,22 @@ void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) {
s14 = x14 * cospi_29_64 + x15 * cospi_3_64;
s15 = x14 * cospi_3_64 - x15 * cospi_29_64;
- x0 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 + s8), bd);
- x1 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 + s9), bd);
- x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 + s10), bd);
- x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 + s11), bd);
- x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 + s12), bd);
- x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 + s13), bd);
- x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6 + s14), bd);
- x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7 + s15), bd);
- x8 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s0 - s8), bd);
- x9 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s1 - s9), bd);
- x10 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2 - s10), bd);
- x11 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3 - s11), bd);
- x12 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 - s12), bd);
- x13 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 - s13), bd);
- x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6 - s14), bd);
- x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7 - s15), bd);
+ x0 = HIGHBD_WRAPLOW(dct_const_round_shift(s0 + s8), bd);
+ x1 = HIGHBD_WRAPLOW(dct_const_round_shift(s1 + s9), bd);
+ x2 = HIGHBD_WRAPLOW(dct_const_round_shift(s2 + s10), bd);
+ x3 = HIGHBD_WRAPLOW(dct_const_round_shift(s3 + s11), bd);
+ x4 = HIGHBD_WRAPLOW(dct_const_round_shift(s4 + s12), bd);
+ x5 = HIGHBD_WRAPLOW(dct_const_round_shift(s5 + s13), bd);
+ x6 = HIGHBD_WRAPLOW(dct_const_round_shift(s6 + s14), bd);
+ x7 = HIGHBD_WRAPLOW(dct_const_round_shift(s7 + s15), bd);
+ x8 = HIGHBD_WRAPLOW(dct_const_round_shift(s0 - s8), bd);
+ x9 = HIGHBD_WRAPLOW(dct_const_round_shift(s1 - s9), bd);
+ x10 = HIGHBD_WRAPLOW(dct_const_round_shift(s2 - s10), bd);
+ x11 = HIGHBD_WRAPLOW(dct_const_round_shift(s3 - s11), bd);
+ x12 = HIGHBD_WRAPLOW(dct_const_round_shift(s4 - s12), bd);
+ x13 = HIGHBD_WRAPLOW(dct_const_round_shift(s5 - s13), bd);
+ x14 = HIGHBD_WRAPLOW(dct_const_round_shift(s6 - s14), bd);
+ x15 = HIGHBD_WRAPLOW(dct_const_round_shift(s7 - s15), bd);
// stage 2
s0 = x0;
@@ -1979,14 +1980,14 @@ void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) {
x5 = HIGHBD_WRAPLOW(s1 - s5, bd);
x6 = HIGHBD_WRAPLOW(s2 - s6, bd);
x7 = HIGHBD_WRAPLOW(s3 - s7, bd);
- x8 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s8 + s12), bd);
- x9 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s9 + s13), bd);
- x10 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s10 + s14), bd);
- x11 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s11 + s15), bd);
- x12 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s8 - s12), bd);
- x13 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s9 - s13), bd);
- x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s10 - s14), bd);
- x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s11 - s15), bd);
+ x8 = HIGHBD_WRAPLOW(dct_const_round_shift(s8 + s12), bd);
+ x9 = HIGHBD_WRAPLOW(dct_const_round_shift(s9 + s13), bd);
+ x10 = HIGHBD_WRAPLOW(dct_const_round_shift(s10 + s14), bd);
+ x11 = HIGHBD_WRAPLOW(dct_const_round_shift(s11 + s15), bd);
+ x12 = HIGHBD_WRAPLOW(dct_const_round_shift(s8 - s12), bd);
+ x13 = HIGHBD_WRAPLOW(dct_const_round_shift(s9 - s13), bd);
+ x14 = HIGHBD_WRAPLOW(dct_const_round_shift(s10 - s14), bd);
+ x15 = HIGHBD_WRAPLOW(dct_const_round_shift(s11 - s15), bd);
// stage 3
s0 = x0;
@@ -2010,18 +2011,18 @@ void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) {
x1 = HIGHBD_WRAPLOW(s1 + s3, bd);
x2 = HIGHBD_WRAPLOW(s0 - s2, bd);
x3 = HIGHBD_WRAPLOW(s1 - s3, bd);
- x4 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 + s6), bd);
- x5 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 + s7), bd);
- x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s4 - s6), bd);
- x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s5 - s7), bd);
+ x4 = HIGHBD_WRAPLOW(dct_const_round_shift(s4 + s6), bd);
+ x5 = HIGHBD_WRAPLOW(dct_const_round_shift(s5 + s7), bd);
+ x6 = HIGHBD_WRAPLOW(dct_const_round_shift(s4 - s6), bd);
+ x7 = HIGHBD_WRAPLOW(dct_const_round_shift(s5 - s7), bd);
x8 = HIGHBD_WRAPLOW(s8 + s10, bd);
x9 = HIGHBD_WRAPLOW(s9 + s11, bd);
x10 = HIGHBD_WRAPLOW(s8 - s10, bd);
x11 = HIGHBD_WRAPLOW(s9 - s11, bd);
- x12 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s12 + s14), bd);
- x13 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s13 + s15), bd);
- x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s12 - s14), bd);
- x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s13 - s15), bd);
+ x12 = HIGHBD_WRAPLOW(dct_const_round_shift(s12 + s14), bd);
+ x13 = HIGHBD_WRAPLOW(dct_const_round_shift(s13 + s15), bd);
+ x14 = HIGHBD_WRAPLOW(dct_const_round_shift(s12 - s14), bd);
+ x15 = HIGHBD_WRAPLOW(dct_const_round_shift(s13 - s15), bd);
// stage 4
s2 = (-cospi_16_64) * (x2 + x3);
@@ -2033,14 +2034,14 @@ void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) {
s14 = (-cospi_16_64) * (x14 + x15);
s15 = cospi_16_64 * (x14 - x15);
- x2 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s2), bd);
- x3 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s3), bd);
- x6 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s6), bd);
- x7 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s7), bd);
- x10 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s10), bd);
- x11 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s11), bd);
- x14 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s14), bd);
- x15 = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(s15), bd);
+ x2 = HIGHBD_WRAPLOW(dct_const_round_shift(s2), bd);
+ x3 = HIGHBD_WRAPLOW(dct_const_round_shift(s3), bd);
+ x6 = HIGHBD_WRAPLOW(dct_const_round_shift(s6), bd);
+ x7 = HIGHBD_WRAPLOW(dct_const_round_shift(s7), bd);
+ x10 = HIGHBD_WRAPLOW(dct_const_round_shift(s10), bd);
+ x11 = HIGHBD_WRAPLOW(dct_const_round_shift(s11), bd);
+ x14 = HIGHBD_WRAPLOW(dct_const_round_shift(s14), bd);
+ x15 = HIGHBD_WRAPLOW(dct_const_round_shift(s15), bd);
output[0] = HIGHBD_WRAPLOW(x0, bd);
output[1] = HIGHBD_WRAPLOW(-x8, bd);
@@ -2062,9 +2063,9 @@ void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) {
void vpx_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
+ int i, j;
tran_low_t out[16 * 16] = { 0 };
tran_low_t *outptr = out;
- int i, j;
tran_low_t temp_in[16], temp_out[16];
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
@@ -2076,7 +2077,7 @@ void vpx_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8,
outptr += 16;
}
- // Then transform columns.
+ // Then transform columns
for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
vpx_highbd_idct16_c(temp_in, temp_out, bd);
@@ -2092,10 +2093,10 @@ void vpx_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8,
int i, j;
tran_high_t a1;
tran_low_t out =
- HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[0] * cospi_16_64), bd);
+ HIGHBD_WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd);
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
- out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd);
+ out = HIGHBD_WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd);
a1 = ROUND_POWER_OF_TWO(out, 6);
for (j = 0; j < 16; ++j) {
for (i = 0; i < 16; ++i) dest[i] = highbd_clip_pixel_add(dest[i], a1, bd);
@@ -2137,43 +2138,43 @@ static void highbd_idct32_c(const tran_low_t *input, tran_low_t *output,
temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64;
temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64;
- step1[16] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step1[31] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[16] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[31] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64;
temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64;
- step1[17] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step1[30] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[17] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[30] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64;
temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64;
- step1[18] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step1[29] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[18] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[29] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64;
temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64;
- step1[19] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step1[28] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[19] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[28] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64;
temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64;
- step1[20] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step1[27] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[20] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[27] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64;
temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64;
- step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[21] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[26] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64;
temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64;
- step1[22] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step1[25] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[22] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[25] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64;
temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64;
- step1[23] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step1[24] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[23] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[24] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
// stage 2
step2[0] = step1[0];
@@ -2187,23 +2188,23 @@ static void highbd_idct32_c(const tran_low_t *input, tran_low_t *output,
temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
- step2[8] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step2[15] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step2[8] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[15] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
- step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step2[9] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[14] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
- step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step2[10] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[13] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
- step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step2[11] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[12] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
step2[16] = HIGHBD_WRAPLOW(step1[16] + step1[17], bd);
step2[17] = HIGHBD_WRAPLOW(step1[16] - step1[17], bd);
@@ -2230,12 +2231,12 @@ static void highbd_idct32_c(const tran_low_t *input, tran_low_t *output,
temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
- step1[4] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step1[7] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[4] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[7] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
- step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[9], bd);
step1[9] = HIGHBD_WRAPLOW(step2[8] - step2[9], bd);
@@ -2250,22 +2251,22 @@ static void highbd_idct32_c(const tran_low_t *input, tran_low_t *output,
step1[31] = step2[31];
temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64;
temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64;
- step1[17] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step1[30] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[17] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[30] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64;
temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64;
- step1[18] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step1[29] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[18] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[29] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
step1[19] = step2[19];
step1[20] = step2[20];
temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64;
temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64;
- step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[21] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[26] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64;
temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64;
- step1[22] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step1[25] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[22] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[25] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
step1[23] = step2[23];
step1[24] = step2[24];
step1[27] = step2[27];
@@ -2274,12 +2275,12 @@ static void highbd_idct32_c(const tran_low_t *input, tran_low_t *output,
// stage 4
temp1 = (step1[0] + step1[1]) * cospi_16_64;
temp2 = (step1[0] - step1[1]) * cospi_16_64;
- step2[0] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step2[1] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step2[0] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[1] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
- step2[2] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step2[3] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step2[2] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[3] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd);
step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd);
step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd);
@@ -2289,12 +2290,12 @@ static void highbd_idct32_c(const tran_low_t *input, tran_low_t *output,
step2[15] = step1[15];
temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
- step2[9] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step2[14] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step2[9] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[14] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
- step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step2[10] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[13] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
step2[11] = step1[11];
step2[12] = step1[12];
@@ -2324,8 +2325,8 @@ static void highbd_idct32_c(const tran_low_t *input, tran_low_t *output,
step1[4] = step2[4];
temp1 = (step2[6] - step2[5]) * cospi_16_64;
temp2 = (step2[5] + step2[6]) * cospi_16_64;
- step1[5] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step1[6] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
step1[7] = step2[7];
step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[11], bd);
@@ -2341,20 +2342,20 @@ static void highbd_idct32_c(const tran_low_t *input, tran_low_t *output,
step1[17] = step2[17];
temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64;
temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64;
- step1[18] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step1[29] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[18] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[29] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64;
temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64;
- step1[19] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step1[28] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[19] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[28] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64;
temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64;
- step1[20] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step1[27] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[20] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[27] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64;
temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64;
- step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[21] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[26] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
step1[22] = step2[22];
step1[23] = step2[23];
step1[24] = step2[24];
@@ -2375,12 +2376,12 @@ static void highbd_idct32_c(const tran_low_t *input, tran_low_t *output,
step2[9] = step1[9];
temp1 = (-step1[10] + step1[13]) * cospi_16_64;
temp2 = (step1[10] + step1[13]) * cospi_16_64;
- step2[10] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step2[13] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step2[10] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[13] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
temp1 = (-step1[11] + step1[12]) * cospi_16_64;
temp2 = (step1[11] + step1[12]) * cospi_16_64;
- step2[11] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step2[12] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step2[11] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step2[12] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
step2[14] = step1[14];
step2[15] = step1[15];
@@ -2426,20 +2427,20 @@ static void highbd_idct32_c(const tran_low_t *input, tran_low_t *output,
step1[19] = step2[19];
temp1 = (-step2[20] + step2[27]) * cospi_16_64;
temp2 = (step2[20] + step2[27]) * cospi_16_64;
- step1[20] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step1[27] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[20] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[27] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
temp1 = (-step2[21] + step2[26]) * cospi_16_64;
temp2 = (step2[21] + step2[26]) * cospi_16_64;
- step1[21] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step1[26] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[21] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[26] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
temp1 = (-step2[22] + step2[25]) * cospi_16_64;
temp2 = (step2[22] + step2[25]) * cospi_16_64;
- step1[22] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step1[25] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[22] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[25] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
temp1 = (-step2[23] + step2[24]) * cospi_16_64;
temp2 = (step2[23] + step2[24]) * cospi_16_64;
- step1[23] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp1), bd);
- step1[24] = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(temp2), bd);
+ step1[23] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
+ step1[24] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
step1[28] = step2[28];
step1[29] = step2[29];
step1[30] = step2[30];
@@ -2482,9 +2483,9 @@ static void highbd_idct32_c(const tran_low_t *input, tran_low_t *output,
void vpx_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
+ int i, j;
tran_low_t out[32 * 32];
tran_low_t *outptr = out;
- int i, j;
tran_low_t temp_in[32], temp_out[32];
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
@@ -2520,19 +2521,20 @@ void vpx_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8,
void vpx_highbd_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
+ int i, j;
tran_low_t out[32 * 32] = { 0 };
tran_low_t *outptr = out;
- int i, j;
tran_low_t temp_in[32], temp_out[32];
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
// Rows
- // Only upper-left 8x8 has non-zero coeff.
+ // Only upper-left 8x8 has non-zero coeff
for (i = 0; i < 8; ++i) {
highbd_idct32_c(input, outptr, bd);
input += 32;
outptr += 32;
}
+
// Columns
for (i = 0; i < 32; ++i) {
for (j = 0; j < 32; ++j) temp_in[j] = out[j * 32 + i];
@@ -2549,10 +2551,10 @@ void vpx_highbd_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8,
int i, j;
int a1;
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
-
tran_low_t out =
- HIGHBD_WRAPLOW(highbd_dct_const_round_shift(input[0] * cospi_16_64), bd);
- out = HIGHBD_WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64), bd);
+ HIGHBD_WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd);
+
+ out = HIGHBD_WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd);
a1 = ROUND_POWER_OF_TWO(out, 6);
for (j = 0; j < 32; ++j) {
@@ -2560,4 +2562,5 @@ void vpx_highbd_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8,
dest += stride;
}
}
+
#endif // CONFIG_VP9_HIGHBITDEPTH
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/inv_txfm.h b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/inv_txfm.h
index e530730d575..13137659fae 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/inv_txfm.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/inv_txfm.h
@@ -57,11 +57,6 @@ static INLINE tran_high_t highbd_check_range(tran_high_t input, int bd) {
(void)bd;
return input;
}
-
-static INLINE tran_high_t highbd_dct_const_round_shift(tran_high_t input) {
- tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
- return (tran_high_t)rv;
-}
#endif // CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_EMULATE_HARDWARE
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/loopfilter.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/loopfilter.c
index 60a15e23bcf..9866ea37d6d 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/loopfilter.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/loopfilter.c
@@ -94,8 +94,8 @@ static INLINE void filter4(int8_t mask, uint8_t thresh, uint8_t *op1,
filter = signed_char_clamp(filter + 3 * (qs0 - ps0)) & mask;
// save bottom 3 bits so that we round one side +4 and the other +3
- // if it equals 4 we'll set to adjust by -1 to account for the fact
- // we'd round 3 the other way
+ // if it equals 4 we'll set it to adjust by -1 to account for the fact
+ // we'd round it by 3 the other way
filter1 = signed_char_clamp(filter + 4) >> 3;
filter2 = signed_char_clamp(filter + 3) >> 3;
@@ -425,8 +425,8 @@ static INLINE void highbd_filter4(int8_t mask, uint8_t thresh, uint16_t *op1,
filter = signed_char_clamp_high(filter + 3 * (qs0 - ps0), bd) & mask;
// Save bottom 3 bits so that we round one side +4 and the other +3
- // if it equals 4 we'll set to adjust by -1 to account for the fact
- // we'd round 3 the other way.
+ // if it equals 4 we'll set it to adjust by -1 to account for the fact
+ // we'd round it by 3 the other way.
filter1 = signed_char_clamp_high(filter + 4, bd) >> 3;
filter2 = signed_char_clamp_high(filter + 3, bd) >> 3;
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/deblock_msa.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/deblock_msa.c
index 402d7ed9979..cc633c6698d 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/deblock_msa.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/deblock_msa.c
@@ -454,7 +454,7 @@ void vpx_mbpost_proc_across_ip_msa(uint8_t *src_ptr, int32_t pitch,
v16u8 tmp = { 0 };
v16i8 zero = { 0 };
v8u16 sum_h, src_r_h, src_l_h;
- v4u32 src_r_w, src_l_w;
+ v4u32 src_r_w;
v4i32 flimit_vec;
flimit_vec = __msa_fill_w(flimit);
@@ -473,9 +473,8 @@ void vpx_mbpost_proc_across_ip_msa(uint8_t *src_ptr, int32_t pitch,
src[15] = 0;
ILVRL_B2_UH(zero, src, src_r_h, src_l_h);
src_r_w = __msa_dotp_u_w(src_r_h, src_r_h);
- src_l_w = __msa_dotp_u_w(src_l_h, src_l_h);
+ src_r_w += __msa_dotp_u_w(src_l_h, src_l_h);
sum_sq = HADD_SW_S32(src_r_w);
- sum_sq += HADD_SW_S32(src_l_w);
sum_h = __msa_hadd_u_h(src, src);
sum = HADD_UH_U32(sum_h);
{
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/inv_txfm_msa.h b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/inv_txfm_msa.h
index 8b8a4173d2f..1fe9b28e8ad 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/inv_txfm_msa.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/inv_txfm_msa.h
@@ -196,18 +196,18 @@
out2, out3) \
{ \
v8i16 madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m; \
- v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
+ v4i32 tmp0_madd, tmp1_madd, tmp2_madd, tmp3_madd; \
\
ILVRL_H2_SH(inp1, inp0, madd_s1_m, madd_s0_m); \
ILVRL_H2_SH(inp3, inp2, madd_s3_m, madd_s2_m); \
DOTP_SH4_SW(madd_s1_m, madd_s0_m, madd_s1_m, madd_s0_m, cst0, cst0, cst1, \
- cst1, tmp0_m, tmp1_m, tmp2_m, tmp3_m); \
- SRARI_W4_SW(tmp0_m, tmp1_m, tmp2_m, tmp3_m, DCT_CONST_BITS); \
- PCKEV_H2_SH(tmp1_m, tmp0_m, tmp3_m, tmp2_m, out0, out1); \
+ cst1, tmp0_madd, tmp1_madd, tmp2_madd, tmp3_madd); \
+ SRARI_W4_SW(tmp0_madd, tmp1_madd, tmp2_madd, tmp3_madd, DCT_CONST_BITS); \
+ PCKEV_H2_SH(tmp1_madd, tmp0_madd, tmp3_madd, tmp2_madd, out0, out1); \
DOTP_SH4_SW(madd_s3_m, madd_s2_m, madd_s3_m, madd_s2_m, cst2, cst2, cst3, \
- cst3, tmp0_m, tmp1_m, tmp2_m, tmp3_m); \
- SRARI_W4_SW(tmp0_m, tmp1_m, tmp2_m, tmp3_m, DCT_CONST_BITS); \
- PCKEV_H2_SH(tmp1_m, tmp0_m, tmp3_m, tmp2_m, out2, out3); \
+ cst3, tmp0_madd, tmp1_madd, tmp2_madd, tmp3_madd); \
+ SRARI_W4_SW(tmp0_madd, tmp1_madd, tmp2_madd, tmp3_madd, DCT_CONST_BITS); \
+ PCKEV_H2_SH(tmp1_madd, tmp0_madd, tmp3_madd, tmp2_madd, out2, out3); \
}
/* idct 8x8 macro */
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/loopfilter_16_msa.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/loopfilter_16_msa.c
index 6ee2456ca5d..b73d56bd558 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/loopfilter_16_msa.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/loopfilter_16_msa.c
@@ -449,7 +449,7 @@ static void mb_lpf_horizontal_edge(uint8_t *src, int32_t pitch,
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
mask, flat);
VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
- VP9_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out,
+ VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out,
q1_out);
flat = (v16u8)__msa_ilvr_d((v2i64)zero, (v2i64)flat);
@@ -779,7 +779,7 @@ int32_t vpx_vt_lpf_t4_and_t8_8w(uint8_t *src, uint8_t *filter48,
/* flat4 */
VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
/* filter4 */
- VP9_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
+ VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
flat = (v16u8)__msa_ilvr_d((v2i64)zero, (v2i64)flat);
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/loopfilter_4_msa.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/loopfilter_4_msa.c
index e0079665f71..9500cd2fd86 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/loopfilter_4_msa.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/loopfilter_4_msa.c
@@ -27,7 +27,7 @@ void vpx_lpf_horizontal_4_msa(uint8_t *src, int32_t pitch,
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
mask, flat);
- VP9_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
+ VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
p1_d = __msa_copy_u_d((v2i64)p1_out, 0);
p0_d = __msa_copy_u_d((v2i64)p0_out, 0);
@@ -86,7 +86,7 @@ void vpx_lpf_vertical_4_msa(uint8_t *src, int32_t pitch,
q3);
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
mask, flat);
- VP9_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1, p0, q0, q1);
+ VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1, p0, q0, q1);
ILVR_B2_SH(p0, p1, q1, q0, vec0, vec1);
ILVRL_H2_SH(vec1, vec0, vec2, vec3);
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/loopfilter_8_msa.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/loopfilter_8_msa.c
index 403e5dc51b3..a22c62bb3a3 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/loopfilter_8_msa.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/loopfilter_8_msa.c
@@ -32,7 +32,7 @@ void vpx_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch,
LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev,
mask, flat);
VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
- VP9_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
+ VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
flat = (v16u8)__msa_ilvr_d((v2i64)zero, (v2i64)flat);
@@ -177,7 +177,7 @@ void vpx_lpf_vertical_8_msa(uint8_t *src, int32_t pitch,
/* flat4 */
VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat);
/* filter4 */
- VP9_LPF_FILTER4_8W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
+ VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out);
flat = (v16u8)__msa_ilvr_d((v2i64)zero, (v2i64)flat);
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/loopfilter_msa.h b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/loopfilter_msa.h
index 4063e5e6b87..49fd74c25a4 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/loopfilter_msa.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/loopfilter_msa.h
@@ -13,144 +13,71 @@
#include "vpx_dsp/mips/macros_msa.h"
-#define VP9_LPF_FILTER4_8W(p1_in, p0_in, q0_in, q1_in, mask_in, hev_in, \
- p1_out, p0_out, q0_out, q1_out) \
- { \
- v16i8 p1_m, p0_m, q0_m, q1_m, q0_sub_p0, filt_sign; \
- v16i8 filt, filt1, filt2, cnst4b, cnst3b; \
- v8i16 q0_sub_p0_r, filt_r, cnst3h; \
- \
- p1_m = (v16i8)__msa_xori_b(p1_in, 0x80); \
- p0_m = (v16i8)__msa_xori_b(p0_in, 0x80); \
- q0_m = (v16i8)__msa_xori_b(q0_in, 0x80); \
- q1_m = (v16i8)__msa_xori_b(q1_in, 0x80); \
- \
- filt = __msa_subs_s_b(p1_m, q1_m); \
- filt = filt & (v16i8)hev_in; \
- q0_sub_p0 = q0_m - p0_m; \
- filt_sign = __msa_clti_s_b(filt, 0); \
- \
- cnst3h = __msa_ldi_h(3); \
- q0_sub_p0_r = (v8i16)__msa_ilvr_b(q0_sub_p0, q0_sub_p0); \
- q0_sub_p0_r = __msa_dotp_s_h((v16i8)q0_sub_p0_r, (v16i8)cnst3h); \
- filt_r = (v8i16)__msa_ilvr_b(filt_sign, filt); \
- filt_r += q0_sub_p0_r; \
- filt_r = __msa_sat_s_h(filt_r, 7); \
- \
- /* combine left and right part */ \
- filt = __msa_pckev_b((v16i8)filt_r, (v16i8)filt_r); \
- \
- filt = filt & (v16i8)mask_in; \
- cnst4b = __msa_ldi_b(4); \
- filt1 = __msa_adds_s_b(filt, cnst4b); \
- filt1 >>= 3; \
- \
- cnst3b = __msa_ldi_b(3); \
- filt2 = __msa_adds_s_b(filt, cnst3b); \
- filt2 >>= 3; \
- \
- q0_m = __msa_subs_s_b(q0_m, filt1); \
- q0_out = __msa_xori_b((v16u8)q0_m, 0x80); \
- p0_m = __msa_adds_s_b(p0_m, filt2); \
- p0_out = __msa_xori_b((v16u8)p0_m, 0x80); \
- \
- filt = __msa_srari_b(filt1, 1); \
- hev_in = __msa_xori_b((v16u8)hev_in, 0xff); \
- filt = filt & (v16i8)hev_in; \
- \
- q1_m = __msa_subs_s_b(q1_m, filt); \
- q1_out = __msa_xori_b((v16u8)q1_m, 0x80); \
- p1_m = __msa_adds_s_b(p1_m, filt); \
- p1_out = __msa_xori_b((v16u8)p1_m, 0x80); \
- }
-
-#define VP9_LPF_FILTER4_4W(p1_in, p0_in, q0_in, q1_in, mask_in, hev_in, \
- p1_out, p0_out, q0_out, q1_out) \
- { \
- v16i8 p1_m, p0_m, q0_m, q1_m, q0_sub_p0, filt_sign; \
- v16i8 filt, filt1, filt2, cnst4b, cnst3b; \
- v8i16 q0_sub_p0_r, q0_sub_p0_l, filt_l, filt_r, cnst3h; \
- \
- p1_m = (v16i8)__msa_xori_b(p1_in, 0x80); \
- p0_m = (v16i8)__msa_xori_b(p0_in, 0x80); \
- q0_m = (v16i8)__msa_xori_b(q0_in, 0x80); \
- q1_m = (v16i8)__msa_xori_b(q1_in, 0x80); \
- \
- filt = __msa_subs_s_b(p1_m, q1_m); \
- \
- filt = filt & (v16i8)hev_in; \
- \
- q0_sub_p0 = q0_m - p0_m; \
- filt_sign = __msa_clti_s_b(filt, 0); \
- \
- cnst3h = __msa_ldi_h(3); \
- q0_sub_p0_r = (v8i16)__msa_ilvr_b(q0_sub_p0, q0_sub_p0); \
- q0_sub_p0_r = __msa_dotp_s_h((v16i8)q0_sub_p0_r, (v16i8)cnst3h); \
- filt_r = (v8i16)__msa_ilvr_b(filt_sign, filt); \
- filt_r += q0_sub_p0_r; \
- filt_r = __msa_sat_s_h(filt_r, 7); \
- \
- q0_sub_p0_l = (v8i16)__msa_ilvl_b(q0_sub_p0, q0_sub_p0); \
- q0_sub_p0_l = __msa_dotp_s_h((v16i8)q0_sub_p0_l, (v16i8)cnst3h); \
- filt_l = (v8i16)__msa_ilvl_b(filt_sign, filt); \
- filt_l += q0_sub_p0_l; \
- filt_l = __msa_sat_s_h(filt_l, 7); \
- \
- filt = __msa_pckev_b((v16i8)filt_l, (v16i8)filt_r); \
- filt = filt & (v16i8)mask_in; \
- \
- cnst4b = __msa_ldi_b(4); \
- filt1 = __msa_adds_s_b(filt, cnst4b); \
- filt1 >>= 3; \
- \
- cnst3b = __msa_ldi_b(3); \
- filt2 = __msa_adds_s_b(filt, cnst3b); \
- filt2 >>= 3; \
- \
- q0_m = __msa_subs_s_b(q0_m, filt1); \
- q0_out = __msa_xori_b((v16u8)q0_m, 0x80); \
- p0_m = __msa_adds_s_b(p0_m, filt2); \
- p0_out = __msa_xori_b((v16u8)p0_m, 0x80); \
- \
- filt = __msa_srari_b(filt1, 1); \
- hev_in = __msa_xori_b((v16u8)hev_in, 0xff); \
- filt = filt & (v16i8)hev_in; \
- \
- q1_m = __msa_subs_s_b(q1_m, filt); \
- q1_out = __msa_xori_b((v16u8)q1_m, 0x80); \
- p1_m = __msa_adds_s_b(p1_m, filt); \
- p1_out = __msa_xori_b((v16u8)p1_m, 0x80); \
+#define VP9_LPF_FILTER4_4W(p1_in, p0_in, q0_in, q1_in, mask, hev, p1_out, \
+ p0_out, q0_out, q1_out) \
+ { \
+ v16i8 p1_m, p0_m, q0_m, q1_m, filt, q0_sub_p0, t1, t2; \
+ const v16i8 cnst4b = __msa_ldi_b(4); \
+ const v16i8 cnst3b = __msa_ldi_b(3); \
+ \
+ p1_m = (v16i8)__msa_xori_b(p1_in, 0x80); \
+ p0_m = (v16i8)__msa_xori_b(p0_in, 0x80); \
+ q0_m = (v16i8)__msa_xori_b(q0_in, 0x80); \
+ q1_m = (v16i8)__msa_xori_b(q1_in, 0x80); \
+ \
+ filt = __msa_subs_s_b(p1_m, q1_m); \
+ filt &= hev; \
+ q0_sub_p0 = __msa_subs_s_b(q0_m, p0_m); \
+ filt = __msa_adds_s_b(filt, q0_sub_p0); \
+ filt = __msa_adds_s_b(filt, q0_sub_p0); \
+ filt = __msa_adds_s_b(filt, q0_sub_p0); \
+ filt &= mask; \
+ t1 = __msa_adds_s_b(filt, cnst4b); \
+ t1 >>= cnst3b; \
+ t2 = __msa_adds_s_b(filt, cnst3b); \
+ t2 >>= cnst3b; \
+ q0_m = __msa_subs_s_b(q0_m, t1); \
+ q0_out = __msa_xori_b((v16u8)q0_m, 0x80); \
+ p0_m = __msa_adds_s_b(p0_m, t2); \
+ p0_out = __msa_xori_b((v16u8)p0_m, 0x80); \
+ filt = __msa_srari_b(t1, 1); \
+ hev = __msa_xori_b(hev, 0xff); \
+ filt &= hev; \
+ q1_m = __msa_subs_s_b(q1_m, filt); \
+ q1_out = __msa_xori_b((v16u8)q1_m, 0x80); \
+ p1_m = __msa_adds_s_b(p1_m, filt); \
+ p1_out = __msa_xori_b((v16u8)p1_m, 0x80); \
}
-#define VP9_FLAT4(p3_in, p2_in, p0_in, q0_in, q2_in, q3_in, flat_out) \
- { \
- v16u8 tmp, p2_a_sub_p0, q2_a_sub_q0, p3_a_sub_p0, q3_a_sub_q0; \
- v16u8 zero_in = { 0 }; \
- \
- tmp = __msa_ori_b(zero_in, 1); \
- p2_a_sub_p0 = __msa_asub_u_b(p2_in, p0_in); \
- q2_a_sub_q0 = __msa_asub_u_b(q2_in, q0_in); \
- p3_a_sub_p0 = __msa_asub_u_b(p3_in, p0_in); \
- q3_a_sub_q0 = __msa_asub_u_b(q3_in, q0_in); \
- \
- p2_a_sub_p0 = __msa_max_u_b(p2_a_sub_p0, q2_a_sub_q0); \
- flat_out = __msa_max_u_b(p2_a_sub_p0, flat_out); \
- p3_a_sub_p0 = __msa_max_u_b(p3_a_sub_p0, q3_a_sub_q0); \
- flat_out = __msa_max_u_b(p3_a_sub_p0, flat_out); \
- \
- flat_out = (tmp < (v16u8)flat_out); \
- flat_out = __msa_xori_b(flat_out, 0xff); \
- flat_out = flat_out & (mask); \
+#define VP9_FLAT4(p3_in, p2_in, p0_in, q0_in, q2_in, q3_in, flat_out) \
+ { \
+ v16u8 tmp_flat4, p2_a_sub_p0, q2_a_sub_q0, p3_a_sub_p0, q3_a_sub_q0; \
+ v16u8 zero_in = { 0 }; \
+ \
+ tmp_flat4 = __msa_ori_b(zero_in, 1); \
+ p2_a_sub_p0 = __msa_asub_u_b(p2_in, p0_in); \
+ q2_a_sub_q0 = __msa_asub_u_b(q2_in, q0_in); \
+ p3_a_sub_p0 = __msa_asub_u_b(p3_in, p0_in); \
+ q3_a_sub_q0 = __msa_asub_u_b(q3_in, q0_in); \
+ \
+ p2_a_sub_p0 = __msa_max_u_b(p2_a_sub_p0, q2_a_sub_q0); \
+ flat_out = __msa_max_u_b(p2_a_sub_p0, flat_out); \
+ p3_a_sub_p0 = __msa_max_u_b(p3_a_sub_p0, q3_a_sub_q0); \
+ flat_out = __msa_max_u_b(p3_a_sub_p0, flat_out); \
+ \
+ flat_out = (tmp_flat4 < (v16u8)flat_out); \
+ flat_out = __msa_xori_b(flat_out, 0xff); \
+ flat_out = flat_out & (mask); \
}
#define VP9_FLAT5(p7_in, p6_in, p5_in, p4_in, p0_in, q0_in, q4_in, q5_in, \
q6_in, q7_in, flat_in, flat2_out) \
{ \
- v16u8 tmp, zero_in = { 0 }; \
+ v16u8 tmp_flat5, zero_in = { 0 }; \
v16u8 p4_a_sub_p0, q4_a_sub_q0, p5_a_sub_p0, q5_a_sub_q0; \
v16u8 p6_a_sub_p0, q6_a_sub_q0, p7_a_sub_p0, q7_a_sub_q0; \
\
- tmp = __msa_ori_b(zero_in, 1); \
+ tmp_flat5 = __msa_ori_b(zero_in, 1); \
p4_a_sub_p0 = __msa_asub_u_b(p4_in, p0_in); \
q4_a_sub_q0 = __msa_asub_u_b(q4_in, q0_in); \
p5_a_sub_p0 = __msa_asub_u_b(p5_in, p0_in); \
@@ -168,7 +95,7 @@
p7_a_sub_p0 = __msa_max_u_b(p7_a_sub_p0, q7_a_sub_q0); \
flat2_out = __msa_max_u_b(p7_a_sub_p0, flat2_out); \
\
- flat2_out = (tmp < (v16u8)flat2_out); \
+ flat2_out = (tmp_flat5 < (v16u8)flat2_out); \
flat2_out = __msa_xori_b(flat2_out, 0xff); \
flat2_out = flat2_out & flat_in; \
}
@@ -177,38 +104,38 @@
p2_filt8_out, p1_filt8_out, p0_filt8_out, q0_filt8_out, \
q1_filt8_out, q2_filt8_out) \
{ \
- v8u16 tmp0, tmp1, tmp2; \
+ v8u16 tmp_filt8_0, tmp_filt8_1, tmp_filt8_2; \
\
- tmp2 = p2_in + p1_in + p0_in; \
- tmp0 = p3_in << 1; \
+ tmp_filt8_2 = p2_in + p1_in + p0_in; \
+ tmp_filt8_0 = p3_in << 1; \
\
- tmp0 = tmp0 + tmp2 + q0_in; \
- tmp1 = tmp0 + p3_in + p2_in; \
- p2_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp1, 3); \
+ tmp_filt8_0 = tmp_filt8_0 + tmp_filt8_2 + q0_in; \
+ tmp_filt8_1 = tmp_filt8_0 + p3_in + p2_in; \
+ p2_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3); \
\
- tmp1 = tmp0 + p1_in + q1_in; \
- p1_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp1, 3); \
+ tmp_filt8_1 = tmp_filt8_0 + p1_in + q1_in; \
+ p1_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3); \
\
- tmp1 = q2_in + q1_in + q0_in; \
- tmp2 = tmp2 + tmp1; \
- tmp0 = tmp2 + (p0_in); \
- tmp0 = tmp0 + (p3_in); \
- p0_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp0, 3); \
+ tmp_filt8_1 = q2_in + q1_in + q0_in; \
+ tmp_filt8_2 = tmp_filt8_2 + tmp_filt8_1; \
+ tmp_filt8_0 = tmp_filt8_2 + (p0_in); \
+ tmp_filt8_0 = tmp_filt8_0 + (p3_in); \
+ p0_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_0, 3); \
\
- tmp0 = q2_in + q3_in; \
- tmp0 = p0_in + tmp1 + tmp0; \
- tmp1 = q3_in + q3_in; \
- tmp1 = tmp1 + tmp0; \
- q2_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp1, 3); \
+ tmp_filt8_0 = q2_in + q3_in; \
+ tmp_filt8_0 = p0_in + tmp_filt8_1 + tmp_filt8_0; \
+ tmp_filt8_1 = q3_in + q3_in; \
+ tmp_filt8_1 = tmp_filt8_1 + tmp_filt8_0; \
+ q2_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3); \
\
- tmp0 = tmp2 + q3_in; \
- tmp1 = tmp0 + q0_in; \
- q0_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp1, 3); \
+ tmp_filt8_0 = tmp_filt8_2 + q3_in; \
+ tmp_filt8_1 = tmp_filt8_0 + q0_in; \
+ q0_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3); \
\
- tmp1 = tmp0 - p2_in; \
- tmp0 = q1_in + q3_in; \
- tmp1 = tmp0 + tmp1; \
- q1_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp1, 3); \
+ tmp_filt8_1 = tmp_filt8_0 - p2_in; \
+ tmp_filt8_0 = q1_in + q3_in; \
+ tmp_filt8_1 = tmp_filt8_0 + tmp_filt8_1; \
+ q1_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3); \
}
#define LPF_MASK_HEV(p3_in, p2_in, p1_in, p0_in, q0_in, q1_in, q2_in, q3_in, \
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/macros_msa.h b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/macros_msa.h
index f498fbe9de2..002e574aa8f 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/macros_msa.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/macros_msa.h
@@ -168,20 +168,20 @@
val_m; \
})
#else // !(__mips == 64)
-#define LD(psrc) \
- ({ \
- const uint8_t *psrc_m1 = (const uint8_t *)(psrc); \
- uint32_t val0_m, val1_m; \
- uint64_t val_m = 0; \
- \
- val0_m = LW(psrc_m1); \
- val1_m = LW(psrc_m1 + 4); \
- \
- val_m = (uint64_t)(val1_m); \
- val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); \
- val_m = (uint64_t)(val_m | (uint64_t)val0_m); \
- \
- val_m; \
+#define LD(psrc) \
+ ({ \
+ const uint8_t *psrc_m1 = (const uint8_t *)(psrc); \
+ uint32_t val0_m, val1_m; \
+ uint64_t val_m_combined = 0; \
+ \
+ val0_m = LW(psrc_m1); \
+ val1_m = LW(psrc_m1 + 4); \
+ \
+ val_m_combined = (uint64_t)(val1_m); \
+ val_m_combined = (uint64_t)((val_m_combined << 32) & 0xFFFFFFFF00000000); \
+ val_m_combined = (uint64_t)(val_m_combined | (uint64_t)val0_m); \
+ \
+ val_m_combined; \
})
#endif // (__mips == 64)
@@ -909,27 +909,42 @@
sum_m; \
})
-/* Description : Horizontal addition of 8 unsigned halfword elements
- Arguments : Inputs - in (unsigned halfword vector)
- Outputs - sum_m (u32 sum)
- Return Type - unsigned word
- Details : 8 unsigned halfword elements of input vector are added
- together and the resulting integer sum is returned
+/* Description : Horizontal addition of 4 unsigned word elements
+ Arguments : Input - in (unsigned word vector)
+ Output - sum_m (u32 sum)
+ Return Type - unsigned word (GP)
+ Details : 4 unsigned word elements of 'in' vector are added together and
+ the resulting integer sum is returned
*/
-#define HADD_UH_U32(in) \
+#define HADD_UW_U32(in) \
({ \
- v4u32 res_m; \
v2u64 res0_m, res1_m; \
uint32_t sum_m; \
\
- res_m = __msa_hadd_u_w((v8u16)in, (v8u16)in); \
- res0_m = __msa_hadd_u_d(res_m, res_m); \
+ res0_m = __msa_hadd_u_d((v4u32)in, (v4u32)in); \
res1_m = (v2u64)__msa_splati_d((v2i64)res0_m, 1); \
- res0_m = res0_m + res1_m; \
+ res0_m += res1_m; \
sum_m = __msa_copy_u_w((v4i32)res0_m, 0); \
sum_m; \
})
+/* Description : Horizontal addition of 8 unsigned halfword elements
+ Arguments : Input - in (unsigned halfword vector)
+ Output - sum_m (u32 sum)
+ Return Type - unsigned word
+ Details : 8 unsigned halfword elements of 'in' vector are added
+ together and the resulting integer sum is returned
+*/
+#define HADD_UH_U32(in) \
+ ({ \
+ v4u32 res_m; \
+ uint32_t sum_m; \
+ \
+ res_m = __msa_hadd_u_w((v8u16)in, (v8u16)in); \
+ sum_m = HADD_UW_U32(res_m); \
+ sum_m; \
+ })
+
/* Description : Horizontal addition of unsigned byte vector elements
Arguments : Inputs - in0, in1
Outputs - out0, out1
@@ -2019,13 +2034,12 @@
pdst, stride) \
{ \
v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
- uint8_t *pdst_m = (uint8_t *)(pdst); \
\
tmp0_m = PCKEV_XORI128_UB(in0, in1); \
tmp1_m = PCKEV_XORI128_UB(in2, in3); \
ILVR_D2_UB(dst1, dst0, dst3, dst2, tmp2_m, tmp3_m); \
AVER_UB2_UB(tmp0_m, tmp2_m, tmp1_m, tmp3_m, tmp0_m, tmp1_m); \
- ST8x4_UB(tmp0_m, tmp1_m, pdst_m, stride); \
+ ST8x4_UB(tmp0_m, tmp1_m, pdst, stride); \
}
/* Description : Pack even byte elements and store byte vector in destination
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/sad_msa.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/sad_msa.c
index 6455814e1b8..e295123acf0 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/sad_msa.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/sad_msa.c
@@ -1030,6 +1030,7 @@ static void sad_64width_x4d_msa(const uint8_t *src, int32_t src_stride,
v8u16 sad2_1 = { 0 };
v8u16 sad3_0 = { 0 };
v8u16 sad3_1 = { 0 };
+ v4u32 sad;
ref0_ptr = aref_ptr[0];
ref1_ptr = aref_ptr[1];
@@ -1061,14 +1062,21 @@ static void sad_64width_x4d_msa(const uint8_t *src, int32_t src_stride,
sad3_1 += SAD_UB2_UH(src2, src3, ref2, ref3);
}
- sad_array[0] = HADD_UH_U32(sad0_0);
- sad_array[0] += HADD_UH_U32(sad0_1);
- sad_array[1] = HADD_UH_U32(sad1_0);
- sad_array[1] += HADD_UH_U32(sad1_1);
- sad_array[2] = HADD_UH_U32(sad2_0);
- sad_array[2] += HADD_UH_U32(sad2_1);
- sad_array[3] = HADD_UH_U32(sad3_0);
- sad_array[3] += HADD_UH_U32(sad3_1);
+ sad = __msa_hadd_u_w(sad0_0, sad0_0);
+ sad += __msa_hadd_u_w(sad0_1, sad0_1);
+ sad_array[0] = HADD_UW_U32(sad);
+
+ sad = __msa_hadd_u_w(sad1_0, sad1_0);
+ sad += __msa_hadd_u_w(sad1_1, sad1_1);
+ sad_array[1] = HADD_UW_U32(sad);
+
+ sad = __msa_hadd_u_w(sad2_0, sad2_0);
+ sad += __msa_hadd_u_w(sad2_1, sad2_1);
+ sad_array[2] = HADD_UW_U32(sad);
+
+ sad = __msa_hadd_u_w(sad3_0, sad3_0);
+ sad += __msa_hadd_u_w(sad3_1, sad3_1);
+ sad_array[3] = HADD_UW_U32(sad);
}
static uint32_t avgsad_4width_msa(const uint8_t *src_ptr, int32_t src_stride,
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/variance_msa.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/variance_msa.c
index 085990e4845..49b2f99230f 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/variance_msa.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/variance_msa.c
@@ -489,27 +489,19 @@ static uint32_t sse_64width_msa(const uint8_t *src_ptr, int32_t src_stride,
uint32_t vpx_get4x4sse_cs_msa(const uint8_t *src_ptr, int32_t src_stride,
const uint8_t *ref_ptr, int32_t ref_stride) {
- uint32_t err = 0;
uint32_t src0, src1, src2, src3;
uint32_t ref0, ref1, ref2, ref3;
v16i8 src = { 0 };
v16i8 ref = { 0 };
- v16u8 src_vec0, src_vec1;
- v8i16 diff0, diff1;
v4i32 err0 = { 0 };
- v4i32 err1 = { 0 };
LW4(src_ptr, src_stride, src0, src1, src2, src3);
LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
INSERT_W4_SB(src0, src1, src2, src3, src);
INSERT_W4_SB(ref0, ref1, ref2, ref3, ref);
- ILVRL_B2_UB(src, ref, src_vec0, src_vec1);
- HSUB_UB2_SH(src_vec0, src_vec1, diff0, diff1);
- DPADD_SH2_SW(diff0, diff1, diff0, diff1, err0, err1);
- err = HADD_SW_S32(err0);
- err += HADD_SW_S32(err1);
+ CALC_MSE_B(src, ref, err0);
- return err;
+ return HADD_SW_S32(err0);
}
#define VARIANCE_4Wx4H(sse, diff) VARIANCE_WxH(sse, diff, 4);
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/vpx_convolve_msa.h b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/vpx_convolve_msa.h
index 198c21ed20a..f75679521a4 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/vpx_convolve_msa.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/mips/vpx_convolve_msa.h
@@ -16,18 +16,18 @@
extern const uint8_t mc_filt_mask_arr[16 * 3];
-#define FILT_8TAP_DPADD_S_H(vec0, vec1, vec2, vec3, filt0, filt1, filt2, \
- filt3) \
- ({ \
- v8i16 tmp0, tmp1; \
- \
- tmp0 = __msa_dotp_s_h((v16i8)vec0, (v16i8)filt0); \
- tmp0 = __msa_dpadd_s_h(tmp0, (v16i8)vec1, (v16i8)filt1); \
- tmp1 = __msa_dotp_s_h((v16i8)vec2, (v16i8)filt2); \
- tmp1 = __msa_dpadd_s_h(tmp1, (v16i8)vec3, (v16i8)filt3); \
- tmp0 = __msa_adds_s_h(tmp0, tmp1); \
- \
- tmp0; \
+#define FILT_8TAP_DPADD_S_H(vec0, vec1, vec2, vec3, filt0, filt1, filt2, \
+ filt3) \
+ ({ \
+ v8i16 tmp_dpadd_0, tmp_dpadd_1; \
+ \
+ tmp_dpadd_0 = __msa_dotp_s_h((v16i8)vec0, (v16i8)filt0); \
+ tmp_dpadd_0 = __msa_dpadd_s_h(tmp_dpadd_0, (v16i8)vec1, (v16i8)filt1); \
+ tmp_dpadd_1 = __msa_dotp_s_h((v16i8)vec2, (v16i8)filt2); \
+ tmp_dpadd_1 = __msa_dpadd_s_h(tmp_dpadd_1, (v16i8)vec3, (v16i8)filt3); \
+ tmp_dpadd_0 = __msa_adds_s_h(tmp_dpadd_0, tmp_dpadd_1); \
+ \
+ tmp_dpadd_0; \
})
#define HORIZ_8TAP_FILT(src0, src1, mask0, mask1, mask2, mask3, filt_h0, \
@@ -114,11 +114,10 @@ extern const uint8_t mc_filt_mask_arr[16 * 3];
stride) \
{ \
v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
- uint8_t *pdst_m = (uint8_t *)(pdst); \
\
PCKEV_B2_UB(in2, in1, in4, in3, tmp0_m, tmp1_m); \
PCKEV_D2_UB(dst1, dst0, dst3, dst2, tmp2_m, tmp3_m); \
AVER_UB2_UB(tmp0_m, tmp2_m, tmp1_m, tmp3_m, tmp0_m, tmp1_m); \
- ST8x4_UB(tmp0_m, tmp1_m, pdst_m, stride); \
+ ST8x4_UB(tmp0_m, tmp1_m, pdst, stride); \
}
#endif /* VPX_DSP_MIPS_VPX_CONVOLVE_MSA_H_ */
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_convolve.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_convolve.c
index f17281b3698..cab6368e606 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_convolve.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_convolve.c
@@ -25,6 +25,7 @@ static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride,
int x_step_q4, int w, int h) {
int x, y;
src -= SUBPEL_TAPS / 2 - 1;
+
for (y = 0; y < h; ++y) {
int x_q4 = x0_q4;
for (x = 0; x < w; ++x) {
@@ -46,6 +47,7 @@ static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride,
int x_step_q4, int w, int h) {
int x, y;
src -= SUBPEL_TAPS / 2 - 1;
+
for (y = 0; y < h; ++y) {
int x_q4 = x0_q4;
for (x = 0; x < w; ++x) {
@@ -72,7 +74,7 @@ static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride,
for (x = 0; x < w; ++x) {
int y_q4 = y0_q4;
for (y = 0; y < h; ++y) {
- const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
+ const uint8_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
int k, sum = 0;
for (k = 0; k < SUBPEL_TAPS; ++k)
@@ -95,7 +97,7 @@ static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride,
for (x = 0; x < w; ++x) {
int y_q4 = y0_q4;
for (y = 0; y < h; ++y) {
- const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
+ const uint8_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
int k, sum = 0;
for (k = 0; k < SUBPEL_TAPS; ++k)
@@ -128,8 +130,8 @@ static void convolve(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
// --Must round-up because block may be located at sub-pixel position.
// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
- uint8_t temp[135 * 64];
- int intermediate_height =
+ uint8_t temp[64 * 135];
+ const int intermediate_height =
(((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
assert(w <= 64);
@@ -143,16 +145,6 @@ static void convolve(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
y_filters, y0_q4, y_step_q4, w, h);
}
-static const InterpKernel *get_filter_base(const int16_t *filter) {
- // NOTE: This assumes that the filter table is 256-byte aligned.
- // TODO(agrange) Modify to make independent of table alignment.
- return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF));
-}
-
-static int get_filter_offset(const int16_t *f, const InterpKernel *base) {
- return (int)((const InterpKernel *)(intptr_t)f - base);
-}
-
void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *filter_x, int x_step_q4,
@@ -219,7 +211,6 @@ void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
int w, int h) {
const InterpKernel *const filters_x = get_filter_base(filter_x);
const int x0_q4 = get_filter_offset(filter_x, filters_x);
-
const InterpKernel *const filters_y = get_filter_base(filter_y);
const int y0_q4 = get_filter_offset(filter_y, filters_y);
@@ -231,7 +222,7 @@ void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
ptrdiff_t dst_stride, const int16_t *filter_x,
int x_step_q4, const int16_t *filter_y, int y_step_q4,
int w, int h) {
- /* Fixed size intermediate buffer places limits on parameters. */
+ // Fixed size intermediate buffer places limits on parameters.
DECLARE_ALIGNED(16, uint8_t, temp[64 * 64]);
assert(w <= 64);
assert(h <= 64);
@@ -272,7 +263,6 @@ void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
for (y = 0; y < h; ++y) {
for (x = 0; x < w; ++x) dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
-
src += src_stride;
dst += dst_stride;
}
@@ -334,9 +324,10 @@ static void highbd_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride,
const InterpKernel *x_filters, int x0_q4,
int x_step_q4, int w, int h, int bd) {
int x, y;
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
src -= SUBPEL_TAPS / 2 - 1;
+
for (y = 0; y < h; ++y) {
int x_q4 = x0_q4;
for (x = 0; x < w; ++x) {
@@ -357,9 +348,10 @@ static void highbd_convolve_avg_horiz(const uint8_t *src8, ptrdiff_t src_stride,
const InterpKernel *x_filters, int x0_q4,
int x_step_q4, int w, int h, int bd) {
int x, y;
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
src -= SUBPEL_TAPS / 2 - 1;
+
for (y = 0; y < h; ++y) {
int x_q4 = x0_q4;
for (x = 0; x < w; ++x) {
@@ -382,9 +374,10 @@ static void highbd_convolve_vert(const uint8_t *src8, ptrdiff_t src_stride,
const InterpKernel *y_filters, int y0_q4,
int y_step_q4, int w, int h, int bd) {
int x, y;
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
src -= src_stride * (SUBPEL_TAPS / 2 - 1);
+
for (x = 0; x < w; ++x) {
int y_q4 = y0_q4;
for (y = 0; y < h; ++y) {
@@ -407,9 +400,10 @@ static void highbd_convolve_avg_vert(const uint8_t *src8, ptrdiff_t src_stride,
const InterpKernel *y_filters, int y0_q4,
int y_step_q4, int w, int h, int bd) {
int x, y;
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
src -= src_stride * (SUBPEL_TAPS / 2 - 1);
+
for (x = 0; x < w; ++x) {
int y_q4 = y0_q4;
for (y = 0; y < h; ++y) {
@@ -447,7 +441,7 @@ static void highbd_convolve(const uint8_t *src, ptrdiff_t src_stride,
// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
uint16_t temp[64 * 135];
- int intermediate_height =
+ const int intermediate_height =
(((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
assert(w <= 64);
@@ -470,6 +464,7 @@ void vpx_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
int h, int bd) {
const InterpKernel *const filters_x = get_filter_base(filter_x);
const int x0_q4 = get_filter_offset(filter_x, filters_x);
+
(void)filter_y;
(void)y_step_q4;
@@ -484,6 +479,7 @@ void vpx_highbd_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
int w, int h, int bd) {
const InterpKernel *const filters_x = get_filter_base(filter_x);
const int x0_q4 = get_filter_offset(filter_x, filters_x);
+
(void)filter_y;
(void)y_step_q4;
@@ -498,6 +494,7 @@ void vpx_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
int h, int bd) {
const InterpKernel *const filters_y = get_filter_base(filter_y);
const int y0_q4 = get_filter_offset(filter_y, filters_y);
+
(void)filter_x;
(void)x_step_q4;
@@ -512,6 +509,7 @@ void vpx_highbd_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
int w, int h, int bd) {
const InterpKernel *const filters_y = get_filter_base(filter_y);
const int y0_q4 = get_filter_offset(filter_y, filters_y);
+
(void)filter_x;
(void)x_step_q4;
@@ -526,7 +524,6 @@ void vpx_highbd_convolve8_c(const uint8_t *src, ptrdiff_t src_stride,
int h, int bd) {
const InterpKernel *const filters_x = get_filter_base(filter_x);
const int x0_q4 = get_filter_offset(filter_x, filters_x);
-
const InterpKernel *const filters_y = get_filter_base(filter_y);
const int y0_q4 = get_filter_offset(filter_y, filters_y);
@@ -556,11 +553,12 @@ void vpx_highbd_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride,
const int16_t *filter_y, int filter_y_stride,
int w, int h, int bd) {
int r;
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+
(void)filter_x;
- (void)filter_y;
(void)filter_x_stride;
+ (void)filter_y;
(void)filter_y_stride;
(void)bd;
@@ -577,18 +575,17 @@ void vpx_highbd_convolve_avg_c(const uint8_t *src8, ptrdiff_t src_stride,
const int16_t *filter_y, int filter_y_stride,
int w, int h, int bd) {
int x, y;
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
+
(void)filter_x;
- (void)filter_y;
(void)filter_x_stride;
+ (void)filter_y;
(void)filter_y_stride;
(void)bd;
for (y = 0; y < h; ++y) {
- for (x = 0; x < w; ++x) {
- dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
- }
+ for (x = 0; x < w; ++x) dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
src += src_stride;
dst += dst_stride;
}
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp.mk b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp.mk
index 66062b6e78e..2909beb0f6c 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp.mk
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp.mk
@@ -86,6 +86,10 @@ DSP_SRCS-$(HAVE_SSSE3) += x86/vpx_subpixel_8t_intrin_ssse3.c
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
DSP_SRCS-$(HAVE_SSE2) += x86/vpx_high_subpixel_8t_sse2.asm
DSP_SRCS-$(HAVE_SSE2) += x86/vpx_high_subpixel_bilinear_sse2.asm
+DSP_SRCS-$(HAVE_NEON) += arm/highbd_vpx_convolve_copy_neon.c
+DSP_SRCS-$(HAVE_NEON) += arm/highbd_vpx_convolve_avg_neon.c
+DSP_SRCS-$(HAVE_NEON) += arm/highbd_vpx_convolve8_neon.c
+DSP_SRCS-$(HAVE_NEON) += arm/highbd_vpx_convolve_neon.c
endif
DSP_SRCS-$(HAVE_SSE2) += x86/vpx_convolve_copy_sse2.asm
@@ -159,6 +163,7 @@ DSP_SRCS-$(HAVE_DSPR2) += mips/loopfilter_mb_horiz_dspr2.c
DSP_SRCS-$(HAVE_DSPR2) += mips/loopfilter_mb_vert_dspr2.c
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
+DSP_SRCS-$(HAVE_NEON) += arm/highbd_loopfilter_neon.c
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_loopfilter_sse2.c
endif # CONFIG_VP9_HIGHBITDEPTH
@@ -199,27 +204,15 @@ DSP_SRCS-$(HAVE_NEON_ASM) += arm/save_reg_neon$(ASM)
ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
ifeq ($(HAVE_NEON_ASM),yes)
-DSP_SRCS-yes += arm/idct4x4_1_add_neon$(ASM)
-DSP_SRCS-yes += arm/idct4x4_add_neon$(ASM)
-DSP_SRCS-yes += arm/idct8x8_1_add_neon$(ASM)
-DSP_SRCS-yes += arm/idct8x8_add_neon$(ASM)
-DSP_SRCS-yes += arm/idct16x16_1_add_neon$(ASM)
DSP_SRCS-yes += arm/idct16x16_add_neon$(ASM)
-DSP_SRCS-yes += arm/idct32x32_1_add_neon$(ASM)
-DSP_SRCS-yes += arm/idct32x32_add_neon$(ASM)
else
ifeq ($(HAVE_NEON),yes)
-DSP_SRCS-yes += arm/idct4x4_1_add_neon.c
-DSP_SRCS-yes += arm/idct4x4_add_neon.c
-DSP_SRCS-yes += arm/idct8x8_1_add_neon.c
-DSP_SRCS-yes += arm/idct8x8_add_neon.c
-DSP_SRCS-yes += arm/idct16x16_1_add_neon.c
DSP_SRCS-yes += arm/idct16x16_add_neon.c
-DSP_SRCS-yes += arm/idct32x32_1_add_neon.c
-DSP_SRCS-yes += arm/idct32x32_add_neon.c
endif # HAVE_NEON
endif # HAVE_NEON_ASM
DSP_SRCS-$(HAVE_NEON) += arm/idct16x16_neon.c
+DSP_SRCS-$(HAVE_NEON) += arm/idct32x32_add_neon.c
+DSP_SRCS-$(HAVE_NEON) += arm/idct32x32_34_add_neon.c
DSP_SRCS-$(HAVE_MSA) += mips/inv_txfm_msa.h
DSP_SRCS-$(HAVE_MSA) += mips/idct4x4_msa.c
@@ -233,7 +226,25 @@ DSP_SRCS-$(HAVE_DSPR2) += mips/itrans8_dspr2.c
DSP_SRCS-$(HAVE_DSPR2) += mips/itrans16_dspr2.c
DSP_SRCS-$(HAVE_DSPR2) += mips/itrans32_dspr2.c
DSP_SRCS-$(HAVE_DSPR2) += mips/itrans32_cols_dspr2.c
-endif # CONFIG_VP9_HIGHBITDEPTH
+endif # !CONFIG_VP9_HIGHBITDEPTH
+
+ifeq ($(HAVE_NEON_ASM),yes)
+DSP_SRCS-yes += arm/idct_neon$(ASM)
+DSP_SRCS-yes += arm/idct4x4_1_add_neon$(ASM)
+DSP_SRCS-yes += arm/idct4x4_add_neon$(ASM)
+DSP_SRCS-yes += arm/idct8x8_1_add_neon$(ASM)
+DSP_SRCS-yes += arm/idct8x8_add_neon$(ASM)
+DSP_SRCS-yes += arm/idct16x16_1_add_neon$(ASM)
+else
+DSP_SRCS-$(HAVE_NEON) += arm/idct4x4_1_add_neon.c
+DSP_SRCS-$(HAVE_NEON) += arm/idct4x4_add_neon.c
+DSP_SRCS-$(HAVE_NEON) += arm/idct8x8_1_add_neon.c
+DSP_SRCS-$(HAVE_NEON) += arm/idct8x8_add_neon.c
+DSP_SRCS-$(HAVE_NEON) += arm/idct16x16_1_add_neon.c
+endif # HAVE_NEON_ASM
+DSP_SRCS-$(HAVE_NEON) += arm/idct_neon.h
+DSP_SRCS-$(HAVE_NEON) += arm/idct32x32_1_add_neon.c
+
endif # CONFIG_VP9
# quantization
@@ -241,6 +252,7 @@ ifeq ($(CONFIG_VP9_ENCODER),yes)
DSP_SRCS-yes += quantize.c
DSP_SRCS-yes += quantize.h
+DSP_SRCS-$(HAVE_SSE2) += x86/fdct.h
DSP_SRCS-$(HAVE_SSE2) += x86/quantize_sse2.c
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_quantize_intrin_sse2.c
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl
index d148642e37b..ee403be3975 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -392,28 +392,28 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
# Sub Pixel Filters
#
add_proto qw/void vpx_highbd_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
- specialize qw/vpx_highbd_convolve_copy sse2/;
+ specialize qw/vpx_highbd_convolve_copy sse2 neon/;
add_proto qw/void vpx_highbd_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
- specialize qw/vpx_highbd_convolve_avg sse2/;
+ specialize qw/vpx_highbd_convolve_avg sse2 neon/;
add_proto qw/void vpx_highbd_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
- specialize qw/vpx_highbd_convolve8/, "$sse2_x86_64";
+ specialize qw/vpx_highbd_convolve8 neon/, "$sse2_x86_64";
add_proto qw/void vpx_highbd_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
- specialize qw/vpx_highbd_convolve8_horiz/, "$sse2_x86_64";
+ specialize qw/vpx_highbd_convolve8_horiz neon/, "$sse2_x86_64";
add_proto qw/void vpx_highbd_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
- specialize qw/vpx_highbd_convolve8_vert/, "$sse2_x86_64";
+ specialize qw/vpx_highbd_convolve8_vert neon/, "$sse2_x86_64";
add_proto qw/void vpx_highbd_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
- specialize qw/vpx_highbd_convolve8_avg/, "$sse2_x86_64";
+ specialize qw/vpx_highbd_convolve8_avg neon/, "$sse2_x86_64";
add_proto qw/void vpx_highbd_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
- specialize qw/vpx_highbd_convolve8_avg_horiz/, "$sse2_x86_64";
+ specialize qw/vpx_highbd_convolve8_avg_horiz neon/, "$sse2_x86_64";
add_proto qw/void vpx_highbd_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
- specialize qw/vpx_highbd_convolve8_avg_vert/, "$sse2_x86_64";
+ specialize qw/vpx_highbd_convolve8_avg_vert neon/, "$sse2_x86_64";
} # CONFIG_VP9_HIGHBITDEPTH
#
@@ -457,40 +457,40 @@ specialize qw/vpx_lpf_horizontal_4_dual sse2 neon dspr2 msa/;
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vpx_highbd_lpf_vertical_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
- specialize qw/vpx_highbd_lpf_vertical_16 sse2/;
+ specialize qw/vpx_highbd_lpf_vertical_16 sse2 neon/;
add_proto qw/void vpx_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
- specialize qw/vpx_highbd_lpf_vertical_16_dual sse2/;
+ specialize qw/vpx_highbd_lpf_vertical_16_dual sse2 neon/;
add_proto qw/void vpx_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
- specialize qw/vpx_highbd_lpf_vertical_8 sse2/;
+ specialize qw/vpx_highbd_lpf_vertical_8 sse2 neon/;
add_proto qw/void vpx_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
- specialize qw/vpx_highbd_lpf_vertical_8_dual sse2/;
+ specialize qw/vpx_highbd_lpf_vertical_8_dual sse2 neon/;
add_proto qw/void vpx_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
- specialize qw/vpx_highbd_lpf_vertical_4 sse2/;
+ specialize qw/vpx_highbd_lpf_vertical_4 sse2 neon/;
add_proto qw/void vpx_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
- specialize qw/vpx_highbd_lpf_vertical_4_dual sse2/;
+ specialize qw/vpx_highbd_lpf_vertical_4_dual sse2 neon/;
add_proto qw/void vpx_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
- specialize qw/vpx_highbd_lpf_horizontal_16 sse2/;
+ specialize qw/vpx_highbd_lpf_horizontal_16 sse2 neon/;
add_proto qw/void vpx_highbd_lpf_horizontal_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
- specialize qw/vpx_highbd_lpf_horizontal_16_dual sse2/;
+ specialize qw/vpx_highbd_lpf_horizontal_16_dual sse2 neon/;
add_proto qw/void vpx_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
- specialize qw/vpx_highbd_lpf_horizontal_8 sse2/;
+ specialize qw/vpx_highbd_lpf_horizontal_8 sse2 neon/;
add_proto qw/void vpx_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
- specialize qw/vpx_highbd_lpf_horizontal_8_dual sse2/;
+ specialize qw/vpx_highbd_lpf_horizontal_8_dual sse2 neon/;
add_proto qw/void vpx_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
- specialize qw/vpx_highbd_lpf_horizontal_4 sse2/;
+ specialize qw/vpx_highbd_lpf_horizontal_4 sse2 neon/;
add_proto qw/void vpx_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
- specialize qw/vpx_highbd_lpf_horizontal_4_dual sse2/;
+ specialize qw/vpx_highbd_lpf_horizontal_4_dual sse2 neon/;
} # CONFIG_VP9_HIGHBITDEPTH
#
@@ -637,26 +637,26 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vpx_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
- add_proto qw/void vpx_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ add_proto qw/void vpx_highbd_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
add_proto qw/void vpx_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
} else {
add_proto qw/void vpx_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_idct4x4_16_add sse2/;
+ specialize qw/vpx_idct4x4_16_add neon sse2/;
add_proto qw/void vpx_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_idct4x4_1_add sse2/;
+ specialize qw/vpx_idct4x4_1_add neon sse2/;
add_proto qw/void vpx_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_idct8x8_64_add sse2/, "$ssse3_x86_64";
+ specialize qw/vpx_idct8x8_64_add neon sse2/, "$ssse3_x86_64";
add_proto qw/void vpx_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_idct8x8_12_add sse2/, "$ssse3_x86_64";
+ specialize qw/vpx_idct8x8_12_add neon sse2/, "$ssse3_x86_64";
add_proto qw/void vpx_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_idct8x8_1_add sse2/;
+ specialize qw/vpx_idct8x8_1_add neon sse2/;
add_proto qw/void vpx_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vpx_idct16x16_256_add sse2/;
@@ -665,7 +665,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_idct16x16_10_add sse2/;
add_proto qw/void vpx_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_idct16x16_1_add sse2/;
+ specialize qw/vpx_idct16x16_1_add neon sse2/;
add_proto qw/void vpx_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vpx_idct32x32_1024_add sse2/, "$ssse3_x86_64";
@@ -679,7 +679,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_idct32x32_34_add sse2/, "$ssse3_x86_64";
add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
- specialize qw/vpx_idct32x32_1_add sse2/;
+ specialize qw/vpx_idct32x32_1_add neon sse2/;
add_proto qw/void vpx_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vpx_highbd_idct4x4_16_add sse2/;
@@ -687,8 +687,8 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vpx_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vpx_highbd_idct8x8_64_add sse2/;
- add_proto qw/void vpx_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
- specialize qw/vpx_highbd_idct8x8_10_add sse2/;
+ add_proto qw/void vpx_highbd_idct8x8_12_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
+ specialize qw/vpx_highbd_idct8x8_12_add sse2/;
add_proto qw/void vpx_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vpx_highbd_idct16x16_256_add sse2/;
@@ -764,8 +764,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vpx_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vpx_idct32x32_34_add sse2 neon dspr2 msa/, "$ssse3_x86_64";
- # Need to add 34 eob idct32x32 neon implementation.
- $vpx_idct32x32_34_add_neon=vpx_idct32x32_1024_add_neon;
add_proto qw/void vpx_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/vpx_idct32x32_1_add sse2 neon dspr2 msa/;
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_filter.h b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_filter.h
index 6cea251bcca..26d690501b6 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_filter.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/vpx_filter.h
@@ -26,6 +26,17 @@ extern "C" {
typedef int16_t InterpKernel[SUBPEL_TAPS];
+static INLINE const InterpKernel *get_filter_base(const int16_t *filter) {
+ // NOTE: This assumes that the filter table is 256-byte aligned.
+ // TODO(agrange) Modify to make independent of table alignment.
+ return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF));
+}
+
+static INLINE int get_filter_offset(const int16_t *f,
+ const InterpKernel *base) {
+ return (int)((const InterpKernel *)(intptr_t)f - base);
+}
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/fdct.h b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/fdct.h
new file mode 100644
index 00000000000..54a6d81fcbc
--- /dev/null
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/fdct.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef VPX_DSP_X86_FDCT_H_
+#define VPX_DSP_X86_FDCT_H_
+
+#include <xmmintrin.h>
+
+#include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+
+// Load 8 16 bit values. If the source is 32 bits then cast down.
+// This does not saturate values. It only truncates.
+static INLINE __m128i load_tran_low(const tran_low_t *a) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ return _mm_setr_epi16((int16_t)a[0], (int16_t)a[1], (int16_t)a[2],
+ (int16_t)a[3], (int16_t)a[4], (int16_t)a[5],
+ (int16_t)a[6], (int16_t)a[7]);
+#else
+ return _mm_load_si128((const __m128i *)a);
+#endif
+}
+
+// Store 8 16 bit values. If the destination is 32 bits then sign extend the
+// values by multiplying by 1.
+static INLINE void store_tran_low(__m128i a, tran_low_t *b) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ const __m128i one = _mm_set1_epi16(1);
+ const __m128i a_hi = _mm_mulhi_epi16(a, one);
+ const __m128i a_lo = _mm_mullo_epi16(a, one);
+ const __m128i a_1 = _mm_unpacklo_epi16(a_lo, a_hi);
+ const __m128i a_2 = _mm_unpackhi_epi16(a_lo, a_hi);
+ _mm_store_si128((__m128i *)(b), a_1);
+ _mm_store_si128((__m128i *)(b + 4), a_2);
+#else
+ _mm_store_si128((__m128i *)(b), a);
+#endif
+}
+
+// Zero fill 8 positions in the output buffer.
+static INLINE void store_zero_tran_low(tran_low_t *a) {
+ const __m128i zero = _mm_setzero_si128();
+#if CONFIG_VP9_HIGHBITDEPTH
+ _mm_store_si128((__m128i *)(a), zero);
+ _mm_store_si128((__m128i *)(a + 4), zero);
+#else
+ _mm_store_si128((__m128i *)(a), zero);
+#endif
+}
+#endif // VPX_DSP_X86_FDCT_H_
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_quantize_intrin_sse2.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_quantize_intrin_sse2.c
index 64b56223ede..2362476c1f1 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_quantize_intrin_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/highbd_quantize_intrin_sse2.c
@@ -77,10 +77,10 @@ void vpx_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t count,
for (j = 0; j < 4; j++) {
if (test & (1 << (4 * j))) {
int k = 4 * i + j;
- const int64_t tmp1 = abs_coeff[j] + round_ptr[k != 0];
- const int64_t tmp2 = ((tmp1 * quant_ptr[k != 0]) >> 16) + tmp1;
+ const int64_t tmp3 = abs_coeff[j] + round_ptr[k != 0];
+ const int64_t tmp4 = ((tmp3 * quant_ptr[k != 0]) >> 16) + tmp3;
const uint32_t abs_qcoeff =
- (uint32_t)((tmp2 * quant_shift_ptr[k != 0]) >> 16);
+ (uint32_t)((tmp4 * quant_shift_ptr[k != 0]) >> 16);
qcoeff_ptr[k] = (int)(abs_qcoeff ^ coeff_sign[j]) - coeff_sign[j];
dqcoeff_ptr[k] = qcoeff_ptr[k] * dequant_ptr[k != 0];
if (abs_qcoeff) eob_i = iscan[k] > eob_i ? iscan[k] : eob_i;
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/inv_txfm_sse2.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/inv_txfm_sse2.c
index cb56ad0789c..d5fc1440c41 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/inv_txfm_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/inv_txfm_sse2.c
@@ -2379,7 +2379,6 @@ void vpx_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest,
#define IDCT32_34 \
/* Stage1 */ \
{ \
- const __m128i zero = _mm_setzero_si128(); \
const __m128i lo_1_31 = _mm_unpacklo_epi16(in[1], zero); \
const __m128i hi_1_31 = _mm_unpackhi_epi16(in[1], zero); \
\
@@ -2404,7 +2403,6 @@ void vpx_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest,
\
/* Stage2 */ \
{ \
- const __m128i zero = _mm_setzero_si128(); \
const __m128i lo_2_30 = _mm_unpacklo_epi16(in[2], zero); \
const __m128i hi_2_30 = _mm_unpackhi_epi16(in[2], zero); \
\
@@ -2431,7 +2429,6 @@ void vpx_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest,
\
/* Stage3 */ \
{ \
- const __m128i zero = _mm_setzero_si128(); \
const __m128i lo_4_28 = _mm_unpacklo_epi16(in[4], zero); \
const __m128i hi_4_28 = _mm_unpackhi_epi16(in[4], zero); \
\
@@ -2472,7 +2469,6 @@ void vpx_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest,
\
/* Stage4 */ \
{ \
- const __m128i zero = _mm_setzero_si128(); \
const __m128i lo_0_16 = _mm_unpacklo_epi16(in[0], zero); \
const __m128i hi_0_16 = _mm_unpackhi_epi16(in[0], zero); \
\
@@ -3009,6 +3005,7 @@ void vpx_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest,
// Only upper-left 8x8 has non-zero coeff
void vpx_idct32x32_34_add_sse2(const tran_low_t *input, uint8_t *dest,
int stride) {
+ const __m128i zero = _mm_setzero_si128();
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i final_rounding = _mm_set1_epi16(1 << 5);
@@ -3104,7 +3101,6 @@ void vpx_idct32x32_34_add_sse2(const tran_low_t *input, uint8_t *dest,
col[31] = _mm_sub_epi16(stp1_0, stp1_31);
for (i = 0; i < 4; i++) {
int j;
- const __m128i zero = _mm_setzero_si128();
// Transpose 32x8 block to 8x32 block
array_transpose_8x8(col + i * 8, in);
IDCT32_34
@@ -3677,7 +3673,7 @@ void vpx_highbd_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest8,
}
}
-void vpx_highbd_idct8x8_10_add_sse2(const tran_low_t *input, uint8_t *dest8,
+void vpx_highbd_idct8x8_12_add_sse2(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
tran_low_t out[8 * 8] = { 0 };
tran_low_t *outptr = out;
@@ -4021,8 +4017,8 @@ void vpx_highbd_idct32x32_1_add_sse2(const tran_low_t *input, uint8_t *dest8,
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
tran_low_t out;
- out = highbd_dct_const_round_shift(input[0] * cospi_16_64);
- out = highbd_dct_const_round_shift(out * cospi_16_64);
+ out = dct_const_round_shift(input[0] * cospi_16_64);
+ out = dct_const_round_shift(out * cospi_16_64);
a = ROUND_POWER_OF_TWO(out, 6);
d = _mm_set1_epi32(a);
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/quantize_sse2.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/quantize_sse2.c
index 2c7e431c745..0580a7bd7b6 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/quantize_sse2.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/quantize_sse2.c
@@ -13,32 +13,7 @@
#include "./vpx_dsp_rtcd.h"
#include "vpx/vpx_integer.h"
-
-static INLINE __m128i load_coefficients(const tran_low_t *coeff_ptr) {
-#if CONFIG_VP9_HIGHBITDEPTH
- return _mm_setr_epi16((int16_t)coeff_ptr[0], (int16_t)coeff_ptr[1],
- (int16_t)coeff_ptr[2], (int16_t)coeff_ptr[3],
- (int16_t)coeff_ptr[4], (int16_t)coeff_ptr[5],
- (int16_t)coeff_ptr[6], (int16_t)coeff_ptr[7]);
-#else
- return _mm_load_si128((const __m128i *)coeff_ptr);
-#endif
-}
-
-static INLINE void store_coefficients(__m128i coeff_vals,
- tran_low_t *coeff_ptr) {
-#if CONFIG_VP9_HIGHBITDEPTH
- __m128i one = _mm_set1_epi16(1);
- __m128i coeff_vals_hi = _mm_mulhi_epi16(coeff_vals, one);
- __m128i coeff_vals_lo = _mm_mullo_epi16(coeff_vals, one);
- __m128i coeff_vals_1 = _mm_unpacklo_epi16(coeff_vals_lo, coeff_vals_hi);
- __m128i coeff_vals_2 = _mm_unpackhi_epi16(coeff_vals_lo, coeff_vals_hi);
- _mm_store_si128((__m128i *)(coeff_ptr), coeff_vals_1);
- _mm_store_si128((__m128i *)(coeff_ptr + 4), coeff_vals_2);
-#else
- _mm_store_si128((__m128i *)(coeff_ptr), coeff_vals);
-#endif
-}
+#include "vpx_dsp/x86/fdct.h"
void vpx_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *zbin_ptr,
@@ -81,8 +56,8 @@ void vpx_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
__m128i qtmp0, qtmp1;
__m128i cmp_mask0, cmp_mask1;
// Do DC and first 15 AC
- coeff0 = load_coefficients(coeff_ptr + n_coeffs);
- coeff1 = load_coefficients(coeff_ptr + n_coeffs + 8);
+ coeff0 = load_tran_low(coeff_ptr + n_coeffs);
+ coeff1 = load_tran_low(coeff_ptr + n_coeffs + 8);
// Poor man's sign extract
coeff0_sign = _mm_srai_epi16(coeff0, 15);
@@ -117,15 +92,15 @@ void vpx_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0);
qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1);
- store_coefficients(qcoeff0, qcoeff_ptr + n_coeffs);
- store_coefficients(qcoeff1, qcoeff_ptr + n_coeffs + 8);
+ store_tran_low(qcoeff0, qcoeff_ptr + n_coeffs);
+ store_tran_low(qcoeff1, qcoeff_ptr + n_coeffs + 8);
coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
dequant = _mm_unpackhi_epi64(dequant, dequant);
coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
- store_coefficients(coeff0, dqcoeff_ptr + n_coeffs);
- store_coefficients(coeff1, dqcoeff_ptr + n_coeffs + 8);
+ store_tran_low(coeff0, dqcoeff_ptr + n_coeffs);
+ store_tran_low(coeff1, dqcoeff_ptr + n_coeffs + 8);
}
{
@@ -159,8 +134,8 @@ void vpx_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
__m128i qtmp0, qtmp1;
__m128i cmp_mask0, cmp_mask1;
- coeff0 = load_coefficients(coeff_ptr + n_coeffs);
- coeff1 = load_coefficients(coeff_ptr + n_coeffs + 8);
+ coeff0 = load_tran_low(coeff_ptr + n_coeffs);
+ coeff1 = load_tran_low(coeff_ptr + n_coeffs + 8);
// Poor man's sign extract
coeff0_sign = _mm_srai_epi16(coeff0, 15);
@@ -191,14 +166,14 @@ void vpx_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0);
qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1);
- store_coefficients(qcoeff0, qcoeff_ptr + n_coeffs);
- store_coefficients(qcoeff1, qcoeff_ptr + n_coeffs + 8);
+ store_tran_low(qcoeff0, qcoeff_ptr + n_coeffs);
+ store_tran_low(qcoeff1, qcoeff_ptr + n_coeffs + 8);
coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
- store_coefficients(coeff0, dqcoeff_ptr + n_coeffs);
- store_coefficients(coeff1, dqcoeff_ptr + n_coeffs + 8);
+ store_tran_low(coeff0, dqcoeff_ptr + n_coeffs);
+ store_tran_low(coeff1, dqcoeff_ptr + n_coeffs + 8);
}
{
@@ -237,10 +212,10 @@ void vpx_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
}
} else {
do {
- store_coefficients(zero, dqcoeff_ptr + n_coeffs);
- store_coefficients(zero, dqcoeff_ptr + n_coeffs + 8);
- store_coefficients(zero, qcoeff_ptr + n_coeffs);
- store_coefficients(zero, qcoeff_ptr + n_coeffs + 8);
+ store_tran_low(zero, dqcoeff_ptr + n_coeffs);
+ store_tran_low(zero, dqcoeff_ptr + n_coeffs + 8);
+ store_tran_low(zero, qcoeff_ptr + n_coeffs);
+ store_tran_low(zero, qcoeff_ptr + n_coeffs + 8);
n_coeffs += 8 * 2;
} while (n_coeffs < 0);
*eob_ptr = 0;
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c
index b26d97b4551..09c75d455ca 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c
@@ -860,16 +860,6 @@ static void scaledconvolve2d(const uint8_t *src, ptrdiff_t src_stride,
}
}
-static const InterpKernel *get_filter_base(const int16_t *filter) {
- // NOTE: This assumes that the filter table is 256-byte aligned.
- // TODO(agrange) Modify to make independent of table alignment.
- return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF));
-}
-
-static int get_filter_offset(const int16_t *f, const InterpKernel *base) {
- return (int)((const InterpKernel *)(intptr_t)f - base);
-}
-
void vpx_scaled_2d_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
ptrdiff_t dst_stride, const int16_t *filter_x,
int x_step_q4, const int16_t *filter_y, int y_step_q4,
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_mem/vpx_mem.c b/chromium/third_party/libvpx/source/libvpx/vpx_mem/vpx_mem.c
index c94ed52d16d..a9be0868066 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_mem/vpx_mem.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_mem/vpx_mem.c
@@ -76,38 +76,6 @@ void *vpx_calloc(size_t num, size_t size) {
return x;
}
-void *vpx_realloc(void *memblk, size_t size) {
- void *new_addr = NULL;
-
- /*
- The realloc() function changes the size of the object pointed to by
- ptr to the size specified by size, and returns a pointer to the
- possibly moved block. The contents are unchanged up to the lesser
- of the new and old sizes. If ptr is null, realloc() behaves like
- malloc() for the specified size. If size is zero (0) and ptr is
- not a null pointer, the object pointed to is freed.
- */
- if (!memblk)
- new_addr = vpx_malloc(size);
- else if (!size)
- vpx_free(memblk);
- else {
- void *addr = get_actual_malloc_address(memblk);
- const uint64_t aligned_size =
- get_aligned_malloc_size(size, DEFAULT_ALIGNMENT);
- if (!check_size_argument_overflow(1, aligned_size)) return NULL;
-
- addr = realloc(addr, (size_t)aligned_size);
- if (addr) {
- new_addr = align_addr((unsigned char *)addr + ADDRESS_STORAGE_SIZE,
- DEFAULT_ALIGNMENT);
- set_actual_malloc_address(new_addr, addr);
- }
- }
-
- return new_addr;
-}
-
void vpx_free(void *memblk) {
if (memblk) {
void *addr = get_actual_malloc_address(memblk);
diff --git a/chromium/third_party/libvpx/source/libvpx/vpx_mem/vpx_mem.h b/chromium/third_party/libvpx/source/libvpx/vpx_mem/vpx_mem.h
index c14f288b895..733aff4885c 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpx_mem/vpx_mem.h
+++ b/chromium/third_party/libvpx/source/libvpx/vpx_mem/vpx_mem.h
@@ -26,7 +26,6 @@ extern "C" {
void *vpx_memalign(size_t align, size_t size);
void *vpx_malloc(size_t size);
void *vpx_calloc(size_t num, size_t size);
-void *vpx_realloc(void *memblk, size_t size);
void vpx_free(void *memblk);
#if CONFIG_VP9_HIGHBITDEPTH
diff --git a/chromium/third_party/libvpx/source/libvpx/vpxdec.c b/chromium/third_party/libvpx/source/libvpx/vpxdec.c
index d1ed3e6cae0..2cdb69d5a31 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpxdec.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpxdec.c
@@ -9,11 +9,11 @@
*/
#include <assert.h>
+#include <limits.h>
+#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
-#include <stdarg.h>
#include <string.h>
-#include <limits.h>
#include "./vpx_config.h"
@@ -92,31 +92,19 @@ static const arg_def_t md5arg =
static const arg_def_t outbitdeptharg =
ARG_DEF(NULL, "output-bit-depth", 1, "Output bit-depth for decoded frames");
#endif
-
-static const arg_def_t *all_args[] = { &codecarg,
- &use_yv12,
- &use_i420,
- &flipuvarg,
- &rawvideo,
- &noblitarg,
- &progressarg,
- &limitarg,
- &skiparg,
- &postprocarg,
- &summaryarg,
- &outputfile,
- &threadsarg,
- &frameparallelarg,
- &verbosearg,
- &scalearg,
- &fb_arg,
- &md5arg,
- &error_concealment,
- &continuearg,
+static const arg_def_t svcdecodingarg = ARG_DEF(
+ NULL, "svc-decode-layer", 1, "Decode SVC stream up to given spatial layer");
+
+static const arg_def_t *all_args[] = {
+ &codecarg, &use_yv12, &use_i420, &flipuvarg, &rawvideo,
+ &noblitarg, &progressarg, &limitarg, &skiparg, &postprocarg,
+ &summaryarg, &outputfile, &threadsarg, &frameparallelarg, &verbosearg,
+ &scalearg, &fb_arg, &md5arg, &error_concealment, &continuearg,
#if CONFIG_VP9_HIGHBITDEPTH
- &outbitdeptharg,
+ &outbitdeptharg,
#endif
- NULL };
+ &svcdecodingarg, NULL
+};
#if CONFIG_VP8_DECODER
static const arg_def_t addnoise_level =
@@ -519,6 +507,8 @@ static int main_loop(int argc, const char **argv_) {
#if CONFIG_VP9_HIGHBITDEPTH
unsigned int output_bit_depth = 0;
#endif
+ int svc_decoding = 0;
+ int svc_spatial_layer = 0;
#if CONFIG_VP8_DECODER
vp8_postproc_cfg_t vp8_pp_cfg = { 0, 0, 0 };
#endif
@@ -610,6 +600,10 @@ static int main_loop(int argc, const char **argv_) {
output_bit_depth = arg_parse_uint(&arg);
}
#endif
+ else if (arg_match(&arg, &svcdecodingarg, argi)) {
+ svc_decoding = 1;
+ svc_spatial_layer = arg_parse_uint(&arg);
+ }
#if CONFIG_VP8_DECODER
else if (arg_match(&arg, &addnoise_level, argi)) {
postproc = 1;
@@ -726,7 +720,14 @@ static int main_loop(int argc, const char **argv_) {
vpx_codec_error(&decoder));
goto fail2;
}
-
+ if (svc_decoding) {
+ if (vpx_codec_control(&decoder, VP9_DECODE_SVC_SPATIAL_LAYER,
+ svc_spatial_layer)) {
+ fprintf(stderr, "Failed to set spatial layer for svc decode: %s\n",
+ vpx_codec_error(&decoder));
+ goto fail;
+ }
+ }
if (!quiet) fprintf(stderr, "%s\n", decoder.name);
#if CONFIG_VP8_DECODER
@@ -780,8 +781,8 @@ static int main_loop(int argc, const char **argv_) {
const char *detail = vpx_codec_error_detail(&decoder);
warn("Failed to decode frame %d: %s", frame_in,
vpx_codec_error(&decoder));
-
if (detail) warn("Additional information: %s", detail);
+ corrupted = 1;
if (!keep_going) goto fail;
}
@@ -800,6 +801,8 @@ static int main_loop(int argc, const char **argv_) {
// Flush the decoder in frame parallel decode.
if (vpx_codec_decode(&decoder, NULL, 0, NULL, 0)) {
warn("Failed to flush decoder: %s", vpx_codec_error(&decoder));
+ corrupted = 1;
+ if (!keep_going) goto fail;
}
}
@@ -812,7 +815,7 @@ static int main_loop(int argc, const char **argv_) {
vpx_usec_timer_mark(&timer);
dx_time += (unsigned int)vpx_usec_timer_elapsed(&timer);
- if (!frame_parallel &&
+ if (!frame_parallel && !corrupted &&
vpx_codec_control(&decoder, VP8D_GET_FRAME_CORRUPTED, &corrupted)) {
warn("Failed VP8_GET_FRAME_CORRUPTED: %s", vpx_codec_error(&decoder));
if (!keep_going) goto fail;
diff --git a/chromium/third_party/libvpx/source/libvpx/vpxenc.c b/chromium/third_party/libvpx/source/libvpx/vpxenc.c
index 6e0af57a42c..a0f760574c8 100644
--- a/chromium/third_party/libvpx/source/libvpx/vpxenc.c
+++ b/chromium/third_party/libvpx/source/libvpx/vpxenc.c
@@ -355,6 +355,8 @@ static const arg_def_t cq_level =
ARG_DEF(NULL, "cq-level", 1, "Constant/Constrained Quality level");
static const arg_def_t max_intra_rate_pct =
ARG_DEF(NULL, "max-intra-rate", 1, "Max I-frame bitrate (pct)");
+static const arg_def_t gf_cbr_boost_pct = ARG_DEF(
+ NULL, "gf-cbr-boost", 1, "Boost for Golden Frame in CBR mode (pct)");
#if CONFIG_VP8_ENCODER
static const arg_def_t cpu_used_vp8 =
@@ -363,12 +365,21 @@ static const arg_def_t token_parts =
ARG_DEF(NULL, "token-parts", 1, "Number of token partitions to use, log2");
static const arg_def_t screen_content_mode =
ARG_DEF(NULL, "screen-content-mode", 1, "Screen content mode");
-static const arg_def_t *vp8_args[] = {
- &cpu_used_vp8, &auto_altref, &noise_sens, &sharpness,
- &static_thresh, &token_parts, &arnr_maxframes, &arnr_strength,
- &arnr_type, &tune_ssim, &cq_level, &max_intra_rate_pct,
- &screen_content_mode, NULL
-};
+static const arg_def_t *vp8_args[] = { &cpu_used_vp8,
+ &auto_altref,
+ &noise_sens,
+ &sharpness,
+ &static_thresh,
+ &token_parts,
+ &arnr_maxframes,
+ &arnr_strength,
+ &arnr_type,
+ &tune_ssim,
+ &cq_level,
+ &max_intra_rate_pct,
+ &gf_cbr_boost_pct,
+ &screen_content_mode,
+ NULL };
static const int vp8_arg_ctrl_map[] = { VP8E_SET_CPUUSED,
VP8E_SET_ENABLEAUTOALTREF,
VP8E_SET_NOISE_SENSITIVITY,
@@ -381,6 +392,7 @@ static const int vp8_arg_ctrl_map[] = { VP8E_SET_CPUUSED,
VP8E_SET_TUNING,
VP8E_SET_CQ_LEVEL,
VP8E_SET_MAX_INTRA_BITRATE_PCT,
+ VP8E_SET_GF_CBR_BOOST_PCT,
VP8E_SET_SCREEN_CONTENT_MODE,
0 };
#endif
@@ -407,8 +419,6 @@ static const arg_def_t alt_ref_aq = ARG_DEF(NULL, "alt-ref-aq", 1,
static const arg_def_t frame_periodic_boost =
ARG_DEF(NULL, "frame-boost", 1,
"Enable frame periodic boost (0: off (default), 1: on)");
-static const arg_def_t gf_cbr_boost_pct = ARG_DEF(
- NULL, "gf-cbr-boost", 1, "Boost for Golden Frame in CBR mode (pct)");
static const arg_def_t max_inter_rate_pct =
ARG_DEF(NULL, "max-inter-rate", 1, "Max P-frame bitrate (pct)");
static const arg_def_t min_gf_interval = ARG_DEF(