diff options
author | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2020-08-31 12:20:52 +0200 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2020-08-31 13:07:02 +0000 |
commit | a9c2e7190bbf1da1133d8d80f0208dc320e003cc (patch) | |
tree | 566ce7d595111d029c7bb9dd782041d4cdd5e438 | |
parent | dabae54d81d96768c355fd3e1671e48340bf906f (diff) | |
download | qtwebengine-chromium-a9c2e7190bbf1da1133d8d80f0208dc320e003cc.tar.gz |
[Backport] Security issue 1108639
Pull in a more recent OpenH264 sources from Chromium 85
Change-Id: Iad5293f5eb3332c35a823a5b3a76f66ecf9afa2b
Reviewed-by: Michal Klocek <michal.klocek@qt.io>
85 files changed, 9852 insertions, 2622 deletions
diff --git a/chromium/third_party/openh264/BUILD.gn b/chromium/third_party/openh264/BUILD.gn index f3b9d997a82..8afb73a4dc4 100644 --- a/chromium/third_party/openh264/BUILD.gn +++ b/chromium/third_party/openh264/BUILD.gn @@ -5,9 +5,9 @@ import("//build/config/linux/pkg_config.gni") import("//build/config/sanitizers/sanitizers.gni") import("//build/shim_headers.gni") +import("//third_party/nasm/nasm_assemble.gni") import("//third_party/openh264/openh264_args.gni") import("//third_party/openh264/openh264_sources.gni") -import("//third_party/yasm/yasm_assemble.gni") # Config shared by all openh264 targets. config("config") { @@ -42,7 +42,7 @@ config("config") { } } -# YASM assembly is only checked to be working on Windows and Linux. +# NASM assembly is only checked to be working on Windows and Linux. # Mac is known to fail certain tests when building, but actual assembly # is believed to work. # MSAN builds are flaky with assembler. crbug.com/685168 @@ -53,19 +53,19 @@ use_assembler = (is_win || is_linux) && # This IF statement will make the targets visible only on specific builds, # which will lead to failures on other platforms if accidentally invoked. if (use_assembler) { - yasm_defines = [] + asm_defines = [] if (!is_component_build) { if (is_mac || is_ios) { - yasm_defines += [ "WELS_PRIVATE_EXTERN=:private_extern" ] + asm_defines += [ "WELS_PRIVATE_EXTERN=private_extern" ] } else if (is_linux || is_android || is_fuchsia) { - yasm_defines += [ "WELS_PRIVATE_EXTERN=:hidden" ] + asm_defines += [ "WELS_PRIVATE_EXTERN=hidden" ] } } - yasm_assemble("openh264_common_yasm") { + nasm_assemble("openh264_common_asm") { include_dirs = openh264_common_include_dirs sources = openh264_common_sources_asm_x86 - defines = yasm_defines + defines = asm_defines if (target_cpu == "x86") { defines += [ "X86_32", "X86_32_PICASM" ] } else { # x64 @@ -82,11 +82,11 @@ if (use_assembler) { } } - yasm_assemble("openh264_processing_yasm") { + nasm_assemble("openh264_processing_asm") { include_dirs = openh264_processing_include_dirs include_dirs += [ "./src/codec/common/x86" ] sources = openh264_processing_sources_asm_x86 - defines = yasm_defines + defines = asm_defines if (target_cpu == "x86") { defines += [ "X86_32", "X86_32_PICASM" ] } else { # x64 @@ -103,11 +103,11 @@ if (use_assembler) { } } - yasm_assemble("openh264_encoder_yasm") { + nasm_assemble("openh264_encoder_asm") { include_dirs = openh264_encoder_include_dirs include_dirs += [ "./src/codec/common/x86" ] sources = openh264_encoder_sources_asm_x86 - defines = yasm_defines + defines = asm_defines if (target_cpu == "x86") { defines += [ "X86_32", "X86_32_PICASM" ] } else { # x64 @@ -138,7 +138,7 @@ source_set("bundled_common") { deps = [] if (use_assembler) { defines = [ "X86_ASM" ] - deps += [ ":openh264_common_yasm" ] + deps += [ ":openh264_common_asm" ] } if (is_android) { deps += [ @@ -166,7 +166,7 @@ source_set("bundled_processing") { ] if (use_assembler) { defines = [ "X86_ASM" ] - deps += [ ":openh264_processing_yasm" ] + deps += [ ":openh264_processing_asm" ] } } @@ -192,7 +192,7 @@ source_set("bundled_encoder") { ] if (use_assembler) { defines = [ "X86_ASM" ] - deps += [ ":openh264_encoder_yasm" ] + deps += [ ":openh264_encoder_asm" ] } } diff --git a/chromium/third_party/openh264/openh264_sources.gni b/chromium/third_party/openh264/openh264_sources.gni index 0f9b77bcc76..e714dad9ef7 100644 --- a/chromium/third_party/openh264/openh264_sources.gni +++ b/chromium/third_party/openh264/openh264_sources.gni @@ -4,6 +4,7 @@ openh264_common_include_dirs = [ "//third_party/openh264/src/codec/common/arm", "//third_party/openh264/src/codec/common/inc", "//third_party/openh264/src/codec/common/src", + "//third_party/openh264/src/codec/common/x86", ] openh264_common_sources = [ diff --git a/chromium/third_party/openh264/src/.travis.yml b/chromium/third_party/openh264/src/.travis.yml index 6e79ec14288..5eec0c291f1 100644 --- a/chromium/third_party/openh264/src/.travis.yml +++ b/chromium/third_party/openh264/src/.travis.yml @@ -1,5 +1,5 @@ language: cpp -dist: trusty +dist: xenial compiler: - g++ @@ -7,8 +7,8 @@ compiler: before_install: - sudo apt-get update -qq - - sudo apt-get install -qq nasm g++-multilib gcc-multilib libc6-dev-i386 python3-pip unzip - - sudo python3 -m pip install meson==0.44.1 + - sudo apt-get install -qq nasm g++-multilib gcc-multilib libc6-dev-i386 python3-pip python3-setuptools unzip + - sudo python3 -m pip install meson==0.47.0 - wget https://github.com/ninja-build/ninja/releases/download/v1.8.2/ninja-linux.zip - unzip ninja-linux.zip - export PATH=$PATH:$PWD diff --git a/chromium/third_party/openh264/src/Makefile b/chromium/third_party/openh264/src/Makefile index e70c966d34d..53d16ccb65e 100644 --- a/chromium/third_party/openh264/src/Makefile +++ b/chromium/third_party/openh264/src/Makefile @@ -34,9 +34,10 @@ GTEST_VER=release-1.8.1 CCASFLAGS=$(CFLAGS) STATIC_LDFLAGS=-lstdc++ STRIP ?= strip +USE_STACK_PROTECTOR = Yes -SHAREDLIB_MAJORVERSION=5 -FULL_VERSION := 1.9.0 +SHAREDLIB_MAJORVERSION=6 +FULL_VERSION := 2.1.0 ifeq (,$(wildcard $(SRC_PATH)gmp-api)) HAVE_GMP_API=No @@ -285,10 +286,10 @@ endif endif $(PROJECT_NAME).pc: $(PROJECT_NAME).pc.in - @sed -e 's;@prefix@;$(PREFIX);' -e 's;@VERSION@;$(FULL_VERSION);' -e 's;@LIBS@;;' -e 's;@LIBS_PRIVATE@;$(STATIC_LDFLAGS);' < $< > $@ + @sed -e 's;@prefix@;$(PREFIX);' -e 's;@libdir@;$(PREFIX)/lib;' -e 's;@VERSION@;$(FULL_VERSION);' -e 's;@LIBS@;;' -e 's;@LIBS_PRIVATE@;$(STATIC_LDFLAGS);' < $< > $@ $(PROJECT_NAME)-static.pc: $(PROJECT_NAME).pc.in - @sed -e 's;@prefix@;$(PREFIX);' -e 's;@VERSION@;$(FULL_VERSION);' -e 's;@LIBS@;$(STATIC_LDFLAGS);' -e 's;@LIBS_PRIVATE@;;' < $< > $@ + @sed -e 's;@prefix@;$(PREFIX);' -e 's;@libdir@;$(PREFIX)/lib;' -e 's;@VERSION@;$(FULL_VERSION);' -e 's;@LIBS@;$(STATIC_LDFLAGS);' -e 's;@LIBS_PRIVATE@;;' < $< > $@ install-headers: mkdir -p $(DESTDIR)$(PREFIX)/include/wels diff --git a/chromium/third_party/openh264/src/RELEASES b/chromium/third_party/openh264/src/RELEASES index 632f1374d6a..349549db7d5 100644 --- a/chromium/third_party/openh264/src/RELEASES +++ b/chromium/third_party/openh264/src/RELEASES @@ -1,6 +1,24 @@ Releases ----------- +v2.1.0 +------ +- Experimentally support for multi-thread decoding(default disabled,and may result in random problems if enabled) +- Assembly optimization for loongson platform +- Update meson version to 5 +- Some minor bug fixes + +v2.0.0 +------ +- B-frame decoding support for Main and High Profile with two test cases +- Add support for loongson(https://en.wikipedia.org/wiki/Loongson) platform +- Add clang support for arm/arm64/x86 for NDK version over 17 +- Enable stack protector +- Add some test cases +- Avoid using C++/CX code for threads for Windows Phone/Windows Store/UWP +- Remove extra visual studio projects for the decoder +- Remove check for working compiler in NDK +- Bug fixes v1.8.0 ------ @@ -180,6 +198,37 @@ Binaries These binary releases are distributed under this license: http://www.openh264.org/BINARY_LICENSE.txt +v2.1.0 +http://ciscobinary.openh264.org/libopenh264-2.1.0-android-arm.so.bz2 +http://ciscobinary.openh264.org/libopenh264-2.1.0-android-arm.so.sig.bz2 +http://ciscobinary.openh264.org/libopenh264-2.1.0-android-arm64.so.bz2 +http://ciscobinary.openh264.org/libopenh264-2.1.0-android-arm64.so.sig.bz2 +http://ciscobinary.openh264.org/libopenh264-2.1.0-ios.a.bz2 +http://ciscobinary.openh264.org/libopenh264-2.1.0-ios.a.sig.bz2 +http://ciscobinary.openh264.org/libopenh264-2.1.0-linux32.5.so.bz2 +http://ciscobinary.openh264.org/libopenh264-2.1.0-linux32.5.so.sig.bz2 +http://ciscobinary.openh264.org/libopenh264-2.1.0-linux64.5.so.bz2 +http://ciscobinary.openh264.org/libopenh264-2.1.0-linux64.5.so.sig.bz2 +http://ciscobinary.openh264.org/libopenh264-2.1.0-osx32.5.dylib.bz2 +http://ciscobinary.openh264.org/libopenh264-2.1.0-osx64.5.dylib.bz2 +http://ciscobinary.openh264.org/openh264-2.1.0-win32.dll.bz2 +http://ciscobinary.openh264.org/openh264-2.1.0-win64.dll.bz2 + +v2.0.0 +------ +http://ciscobinary.openh264.org/libopenh264-2.0.0-android.so.bz2 +http://ciscobinary.openh264.org/libopenh264-2.0.0-android.so.sig.bz2 +http://ciscobinary.openh264.org/libopenh264-2.0.0-ios.a.bz2 +http://ciscobinary.openh264.org/libopenh264-2.0.0-ios.a.sig.bz2 +http://ciscobinary.openh264.org/libopenh264-2.0.0-linux32.5.so.bz2 +http://ciscobinary.openh264.org/libopenh264-2.0.0-linux32.5.so.sig.bz2 +http://ciscobinary.openh264.org/libopenh264-2.0.0-linux64.5.so.bz2 +http://ciscobinary.openh264.org/libopenh264-2.0.0-linux64.5.so.sig.bz2 +http://ciscobinary.openh264.org/libopenh264-2.0.0-osx32.5.dylib.bz2 +http://ciscobinary.openh264.org/libopenh264-2.0.0-osx64.5.dylib.bz2 +http://ciscobinary.openh264.org/openh264-2.0.0-win32.dll.bz2 +http://ciscobinary.openh264.org/openh264-2.0.0-win64.dll.bz2 + v1.8.0 ------ http://ciscobinary.openh264.org/libopenh264-1.8.0-android19.so.bz2 diff --git a/chromium/third_party/openh264/src/build/arch.mk b/chromium/third_party/openh264/src/build/arch.mk index 8ac3e70a5ad..c6570ed4e59 100644 --- a/chromium/third_party/openh264/src/build/arch.mk +++ b/chromium/third_party/openh264/src/build/arch.mk @@ -30,14 +30,26 @@ CFLAGS += -DHAVE_NEON_AARCH64 endif endif -#for loongson +#for mips ifneq ($(filter mips mips64, $(ARCH)),) ifeq ($(USE_ASM), Yes) +ENABLE_MMI=Yes +ENABLE_MSA=Yes ASM_ARCH = mips ASMFLAGS += -I$(SRC_PATH)codec/common/mips/ -LOONGSON3A = $(shell g++ -dM -E - < /dev/null | grep '_MIPS_TUNE ' | cut -f 3 -d " ") -ifeq ($(LOONGSON3A), "loongson3a") -CFLAGS += -DHAVE_MMI +#mmi +ifeq ($(ENABLE_MMI), Yes) +ENABLE_MMI = $(shell $(SRC_PATH)build/mips-simd-check.sh $(CC) mmi) +ifeq ($(ENABLE_MMI), Yes) +CFLAGS += -DHAVE_MMI -march=loongson3a +endif +endif +#msa +ifeq ($(ENABLE_MSA), Yes) +ENABLE_MSA = $(shell $(SRC_PATH)build/mips-simd-check.sh $(CC) msa) +ifeq ($(ENABLE_MSA), Yes) +CFLAGS += -DHAVE_MSA -mmsa +endif endif endif endif diff --git a/chromium/third_party/openh264/src/build/mips-simd-check.sh b/chromium/third_party/openh264/src/build/mips-simd-check.sh new file mode 100755 index 00000000000..d0d72f9edd6 --- /dev/null +++ b/chromium/third_party/openh264/src/build/mips-simd-check.sh @@ -0,0 +1,32 @@ +#!/bin/bash +#********************************************************************************** +# This script is using in build/arch.mk for mips to detect the simd instructions: +# mmi, msa (maybe more in the future). +# +# --usage: +# ./mips-simd-check.sh $(CC) mmi +# or ./mips-simd-check.sh $(CC) msa +# +# date: 10/17/2019 Created +#********************************************************************************** + +TMPC=$(mktemp tmp.XXXXXX.c) +TMPO=$(mktemp tmp.XXXXXX.o) +if [ $2 == "mmi" ] +then + echo "void main(void){ __asm__ volatile(\"punpcklhw \$f0, \$f0, \$f0\"); }" > $TMPC + $1 -march=loongson3a $TMPC -o $TMPO &> /dev/null + if test -s $TMPO + then + echo "Yes" + fi +elif [ $2 == "msa" ] +then + echo "void main(void){ __asm__ volatile(\"addvi.b \$w0, \$w1, 1\"); }" > $TMPC + $1 -mmsa $TMPC -o $TMPO &> /dev/null + if test -s $TMPO + then + echo "Yes" + fi +fi +rm -f $TMPC $TMPO diff --git a/chromium/third_party/openh264/src/build/mktargets.py b/chromium/third_party/openh264/src/build/mktargets.py index 593280c0991..518909d3dfd 100755 --- a/chromium/third_party/openh264/src/build/mktargets.py +++ b/chromium/third_party/openh264/src/build/mktargets.py @@ -119,9 +119,9 @@ for file in sfiles: armfiles.append(file) mipsfiles = [] for file in cfiles: - c = file.split('/') - if 'mips' in c: - mipsfiles.append(file) + c = file.split('/') + if 'mips' in c: + mipsfiles.append(file) cfiles = [x for x in cfiles if x not in mipsfiles] @@ -181,15 +181,34 @@ if len(arm64files) > 0: f.write("OBJS += $(%s_OBJSARM64)\n\n"%(PREFIX)) if len(mipsfiles) > 0: - f.write("%s_ASM_MIPS_SRCS=\\\n"%(PREFIX)) - for c in mipsfiles: - f.write("\t$(%s_SRCDIR)/%s\\\n"%(PREFIX, c)) - f.write("\n") - f.write("%s_OBJSMIPS += $(%s_ASM_MIPS_SRCS:.c=.$(OBJ))\n"%(PREFIX, PREFIX)) - f.write("ifeq ($(ASM_ARCH), mips)\n") - f.write("%s_OBJS += $(%s_OBJSMIPS)\n"%(PREFIX,PREFIX)) - f.write("endif\n") - f.write("OBJS += $(%s_OBJSMIPS)\n\n"%(PREFIX)) + mmifiles = [] + for file in mipsfiles: + if '_mmi' in file: + mmifiles.append(file) + f.write("%s_ASM_MIPS_MMI_SRCS=\\\n"%(PREFIX)) + for c in mmifiles: + f.write("\t$(%s_SRCDIR)/%s\\\n"%(PREFIX, c)) + f.write("\n") + f.write("%s_OBJSMIPS_MMI += $(%s_ASM_MIPS_MMI_SRCS:.c=.$(OBJ))\n\n"%(PREFIX, PREFIX)) + msafiles = [] + for file in mipsfiles: + if '_msa' in file: + msafiles.append(file) + f.write("%s_ASM_MIPS_MSA_SRCS=\\\n"%(PREFIX)) + for c in msafiles: + f.write("\t$(%s_SRCDIR)/%s\\\n"%(PREFIX, c)) + f.write("\n") + f.write("%s_OBJSMIPS_MSA += $(%s_ASM_MIPS_MSA_SRCS:.c=.$(OBJ))\n"%(PREFIX, PREFIX)) + f.write("ifeq ($(ASM_ARCH), mips)\n") + f.write("ifeq ($(ENABLE_MMI), Yes)\n") + f.write("%s_OBJS += $(%s_OBJSMIPS_MMI)\n"%(PREFIX,PREFIX)) + f.write("endif\n") + f.write("ifeq ($(ENABLE_MSA), Yes)\n") + f.write("%s_OBJS += $(%s_OBJSMIPS_MSA)\n"%(PREFIX,PREFIX)) + f.write("endif\n") + f.write("endif\n") + f.write("OBJS += $(%s_OBJSMIPS_MMI)\n"%(PREFIX)) + f.write("OBJS += $(%s_OBJSMIPS_MSA)\n\n"%(PREFIX)) f.write("OBJS += $(%s_OBJS)\n\n"%(PREFIX)) write_cpp_rule_pattern(f) diff --git a/chromium/third_party/openh264/src/build/platform-android.mk b/chromium/third_party/openh264/src/build/platform-android.mk index 7f50eec6350..0c442dfac13 100644 --- a/chromium/third_party/openh264/src/build/platform-android.mk +++ b/chromium/third_party/openh264/src/build/platform-android.mk @@ -45,10 +45,14 @@ CXX = $(TOOLCHAINPREFIX)g++ CC = $(TOOLCHAINPREFIX)gcc AR = $(TOOLCHAINPREFIX)ar CFLAGS += -DANDROID_NDK -fpic --sysroot=$(SYSROOT) -MMD -MP +ifeq ($(USE_STACK_PROTECTOR), Yes) +CFLAGS += -fstack-protector-all +endif CFLAGS += -isystem $(NDKROOT)/sysroot/usr/include -isystem $(NDKROOT)/sysroot/usr/include/$(TOOLCHAIN_NAME) -D__ANDROID_API__=$(NDKLEVEL) CXXFLAGS += -fno-rtti -fno-exceptions LDFLAGS += --sysroot=$(SYSROOT) SHLDFLAGS = -Wl,--no-undefined -Wl,-z,relro -Wl,-z,now -Wl,-soname,lib$(PROJECT_NAME).so +UTSHLDFLAGS = -Wl,-soname,libut.so ifeq ($(NDK_TOOLCHAIN_VERSION), clang) HOST_OS = $(shell uname -s | tr [A-Z] [a-z]) @@ -70,19 +74,31 @@ ifeq ($(NDK_TOOLCHAIN_VERSION), clang) CFLAGS += -target $(TARGET_NAME) LDFLAGS += -target $(TARGET_NAME) -gcc-toolchain $(GCC_TOOLCHAIN_PATH) - LDFLAGS += -Wl,--exclude-libs,libgcc.a -Wl,--exclude-libs,libunwind.a endif +# background reading: https://android.googlesource.com/platform/ndk/+/master/docs/BuildSystemMaintainers.md#unwinding +LDFLAGS += -Wl,--exclude-libs,libgcc.a -Wl,--exclude-libs,libunwind.a + +ifneq ($(findstring /,$(CXX)),$(findstring \,$(CXX))) ifneq ($(CXX),$(wildcard $(CXX))) ifneq ($(CXX).exe,$(wildcard $(CXX).exe)) $(error Compiler not found, bad NDKROOT or ARCH?) endif endif +endif +ifeq ($(NDK_TOOLCHAIN_VERSION), clang) +STL_INCLUDES = \ + -I$(NDKROOT)/sources/cxx-stl/llvm-libc++/include \ + -I$(NDKROOT)/sources/cxx-stl/llvm-libc++abi/include +STL_LIB = \ + $(NDKROOT)/sources/cxx-stl/llvm-libc++/libs/$(APP_ABI)/libc++_static.a +else STL_INCLUDES = \ -I$(NDKROOT)/sources/cxx-stl/stlport/stlport STL_LIB = \ $(NDKROOT)/sources/cxx-stl/stlport/libs/$(APP_ABI)/libstlport_static.a +endif GTEST_INCLUDES = $(STL_INCLUDES) CODEC_UNITTEST_INCLUDES = $(STL_INCLUDES) diff --git a/chromium/third_party/openh264/src/build/platform-bsd.mk b/chromium/third_party/openh264/src/build/platform-bsd.mk index cade69aac6f..2e0bf2ccd60 100644 --- a/chromium/third_party/openh264/src/build/platform-bsd.mk +++ b/chromium/third_party/openh264/src/build/platform-bsd.mk @@ -4,6 +4,9 @@ SHAREDLIBSUFFIXFULLVER=$(SHAREDLIBSUFFIX).$(FULL_VERSION) SHAREDLIBSUFFIXMAJORVER=$(SHAREDLIBSUFFIX).$(SHAREDLIB_MAJORVERSION) SHLDFLAGS = -Wl,-soname,$(LIBPREFIX)$(PROJECT_NAME).$(SHAREDLIBSUFFIXMAJORVER) CFLAGS += -fPIC +ifeq ($(USE_STACK_PROTECTOR), Yes) +CFLAGS += -fstack-protector-all +endif LDFLAGS += -lpthread STATIC_LDFLAGS += -lpthread -lm ifeq ($(ASM_ARCH), x86) diff --git a/chromium/third_party/openh264/src/build/platform-darwin.mk b/chromium/third_party/openh264/src/build/platform-darwin.mk index 95947427753..6f91dafb2af 100644 --- a/chromium/third_party/openh264/src/build/platform-darwin.mk +++ b/chromium/third_party/openh264/src/build/platform-darwin.mk @@ -3,14 +3,17 @@ SHAREDLIB_DIR = $(PREFIX)/lib SHAREDLIBSUFFIX = dylib SHAREDLIBSUFFIXFULLVER=$(FULL_VERSION).$(SHAREDLIBSUFFIX) SHAREDLIBSUFFIXMAJORVER=$(SHAREDLIB_MAJORVERSION).$(SHAREDLIBSUFFIX) -CURRENT_VERSION := 1.9.0 -COMPATIBILITY_VERSION := 1.9.0 +CURRENT_VERSION := 2.1.0 +COMPATIBILITY_VERSION := 2.1.0 SHLDFLAGS = -dynamiclib -twolevel_namespace -undefined dynamic_lookup \ -fno-common -headerpad_max_install_names -install_name \ $(SHAREDLIB_DIR)/$(LIBPREFIX)$(PROJECT_NAME).$(SHAREDLIBSUFFIXMAJORVER) SHARED = -dynamiclib SHARED += -current_version $(CURRENT_VERSION) -compatibility_version $(COMPATIBILITY_VERSION) CFLAGS += -Wall -fPIC -MMD -MP +ifeq ($(USE_STACK_PROTECTOR), Yes) +CFLAGS += -fstack-protector-all +endif ifeq ($(ASM_ARCH), x86) ASMFLAGS += -DPREFIX ifeq ($(ARCH), x86_64) diff --git a/chromium/third_party/openh264/src/build/platform-linux.mk b/chromium/third_party/openh264/src/build/platform-linux.mk index 52230a2a6f3..b5c006b2325 100644 --- a/chromium/third_party/openh264/src/build/platform-linux.mk +++ b/chromium/third_party/openh264/src/build/platform-linux.mk @@ -4,6 +4,9 @@ SHAREDLIBSUFFIXFULLVER=$(SHAREDLIBSUFFIX).$(FULL_VERSION) SHAREDLIBSUFFIXMAJORVER=$(SHAREDLIBSUFFIX).$(SHAREDLIB_MAJORVERSION) SHLDFLAGS = -Wl,-soname,$(LIBPREFIX)$(PROJECT_NAME).$(SHAREDLIBSUFFIXMAJORVER) CFLAGS += -Wall -fno-strict-aliasing -fPIC -MMD -MP +ifeq ($(USE_STACK_PROTECTOR), Yes) +CFLAGS += -fstack-protector-all +endif LDFLAGS += -lpthread STATIC_LDFLAGS += -lpthread -lm AR_OPTS = crD $@ diff --git a/chromium/third_party/openh264/src/codec/api/svc/codec_api.h b/chromium/third_party/openh264/src/codec/api/svc/codec_api.h index cea05329ed9..a1326c8f054 100644 --- a/chromium/third_party/openh264/src/codec/api/svc/codec_api.h +++ b/chromium/third_party/openh264/src/codec/api/svc/codec_api.h @@ -330,7 +330,7 @@ class ISVCEncoder { virtual int EXTAPI SetOption (ENCODER_OPTION eOptionId, void* pOption) = 0; /** - * @brief Set option for encoder, detail option type, please refer to enumurate ENCODER_OPTION. + * @brief Get option for encoder, detail option type, please refer to enumurate ENCODER_OPTION. * @param pOption option for encoder such as InDataFormat, IDRInterval, SVC Encode Param, Frame Rate, Bitrate,... * @return CM_RETURN: 0 - success; otherwise - failed; */ diff --git a/chromium/third_party/openh264/src/codec/api/svc/codec_app_def.h b/chromium/third_party/openh264/src/codec/api/svc/codec_app_def.h index e5ee3cc46bf..bb3c3d67b78 100644 --- a/chromium/third_party/openh264/src/codec/api/svc/codec_app_def.h +++ b/chromium/third_party/openh264/src/codec/api/svc/codec_app_def.h @@ -78,13 +78,14 @@ typedef enum { /** * Errors derived from bitstream parsing */ - dsErrorFree = 0x00, ///< bit stream error-free - dsFramePending = 0x01, ///< need more throughput to generate a frame output, - dsRefLost = 0x02, ///< layer lost at reference frame with temporal id 0 - dsBitstreamError = 0x04, ///< error bitstreams(maybe broken internal frame) the decoder cared - dsDepLayerLost = 0x08, ///< dependented layer is ever lost - dsNoParamSets = 0x10, ///< no parameter set NALs involved - dsDataErrorConcealed = 0x20, ///< current data error concealed specified + dsErrorFree = 0x00, ///< bit stream error-free + dsFramePending = 0x01, ///< need more throughput to generate a frame output, + dsRefLost = 0x02, ///< layer lost at reference frame with temporal id 0 + dsBitstreamError = 0x04, ///< error bitstreams(maybe broken internal frame) the decoder cared + dsDepLayerLost = 0x08, ///< dependented layer is ever lost + dsNoParamSets = 0x10, ///< no parameter set NALs involved + dsDataErrorConcealed = 0x20, ///< current data error concealed specified + dsRefListNullPtrs = 0x40, ///<ref picure list contains null ptrs within uiRefCount range /** * Errors derived from logic level @@ -166,8 +167,8 @@ typedef enum { DECODER_OPTION_LEVEL, ///< get current AU level info,only is used in GetOption DECODER_OPTION_STATISTICS_LOG_INTERVAL,///< set log output interval DECODER_OPTION_IS_REF_PIC, ///< feedback current frame is ref pic or not - DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER ///< number of frames remaining in decoder buffer when pictures are required to re-ordered into display-order. - + DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER, ///< number of frames remaining in decoder buffer when pictures are required to re-ordered into display-order. + DECODER_OPTION_NUM_OF_THREADS, ///< number of decoding threads. The maximum thread count is equal or less than lesser of (cpu core counts and 16). } DECODER_OPTION; /** diff --git a/chromium/third_party/openh264/src/codec/api/svc/codec_def.h b/chromium/third_party/openh264/src/codec/api/svc/codec_def.h index 4f7eb9d88d0..edde5f4a2e9 100644 --- a/chromium/third_party/openh264/src/codec/api/svc/codec_def.h +++ b/chromium/third_party/openh264/src/codec/api/svc/codec_def.h @@ -201,6 +201,7 @@ typedef struct TagBufferInfo { union { SSysMEMBuffer sSystemBuffer; ///< memory info for one picture } UsrData; ///< output buffer info + unsigned char* pDst[3]; //point to picture YUV data } SBufferInfo; diff --git a/chromium/third_party/openh264/src/codec/api/svc/codec_ver.h b/chromium/third_party/openh264/src/codec/api/svc/codec_ver.h index 1c366f139d6..a4e494f6b09 100644 --- a/chromium/third_party/openh264/src/codec/api/svc/codec_ver.h +++ b/chromium/third_party/openh264/src/codec/api/svc/codec_ver.h @@ -4,12 +4,12 @@ #include "codec_app_def.h" -static const OpenH264Version g_stCodecVersion = {1, 9, 0, 1806}; -static const char* const g_strCodecVer = "OpenH264 version:1.9.0.1806"; +static const OpenH264Version g_stCodecVersion = {2, 1, 0, 2002}; +static const char* const g_strCodecVer = "OpenH264 version:2.1.0.2002"; -#define OPENH264_MAJOR (1) -#define OPENH264_MINOR (9) +#define OPENH264_MAJOR (2) +#define OPENH264_MINOR (1) #define OPENH264_REVISION (0) -#define OPENH264_RESERVED (1806) +#define OPENH264_RESERVED (2002) #endif // CODEC_VER_H diff --git a/chromium/third_party/openh264/src/codec/build/win32/dec/WelsDecCore.vcproj b/chromium/third_party/openh264/src/codec/build/win32/dec/WelsDecCore.vcproj index a697fce1f95..de7f119f332 100644 --- a/chromium/third_party/openh264/src/codec/build/win32/dec/WelsDecCore.vcproj +++ b/chromium/third_party/openh264/src/codec/build/win32/dec/WelsDecCore.vcproj @@ -860,6 +860,10 @@ RelativePath="..\..\..\common\inc\wels_const_common.h" > </File> + <File + RelativePath="..\..\..\decoder\core\inc\wels_decoder_thread.h" + > + </File> </Filter> <Filter Name="Source Files" @@ -977,6 +981,14 @@ RelativePath="..\..\..\common\src\utils.cpp" > </File> + <File + RelativePath="..\..\..\common\src\WelsThreadLib.cpp" + > + </File> + <File + RelativePath="..\..\..\decoder\core\src\wels_decoder_thread.cpp" + > + </File> </Filter> </Files> <Globals> diff --git a/chromium/third_party/openh264/src/codec/common/inc/WelsThreadLib.h b/chromium/third_party/openh264/src/codec/common/inc/WelsThreadLib.h index 494a076c031..cd26dbdd91e 100644 --- a/chromium/third_party/openh264/src/codec/common/inc/WelsThreadLib.h +++ b/chromium/third_party/openh264/src/codec/common/inc/WelsThreadLib.h @@ -60,6 +60,19 @@ typedef HANDLE WELS_EVENT; #define WELS_THREAD_ROUTINE_TYPE DWORD WINAPI #define WELS_THREAD_ROUTINE_RETURN(rc) return (DWORD)rc; +#ifdef WINAPI_FAMILY +#if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) +#define WP80 + +#define InitializeCriticalSection(x) InitializeCriticalSectionEx(x, 0, 0) +#define GetSystemInfo(x) GetNativeSystemInfo(x) +#define CreateEvent(attr, reset, init, name) CreateEventEx(attr, name, ((reset) ? CREATE_EVENT_MANUAL_RESET : 0) | ((init) ? CREATE_EVENT_INITIAL_SET : 0), EVENT_ALL_ACCESS) +#define CreateSemaphore(a, b, c, d) CreateSemaphoreEx(a, b, c, d, 0, SEMAPHORE_ALL_ACCESS) +#define WaitForSingleObject(a, b) WaitForSingleObjectEx(a, b, FALSE) +#define WaitForMultipleObjects(a, b, c, d) WaitForMultipleObjectsEx(a, b, c, d, FALSE) +#endif +#endif + #else // NON-WINDOWS #include <stdlib.h> diff --git a/chromium/third_party/openh264/src/codec/common/inc/asmdefs_mmi.h b/chromium/third_party/openh264/src/codec/common/inc/asmdefs_mmi.h index 5baa823e2b6..69a7ae3981f 100644 --- a/chromium/third_party/openh264/src/codec/common/inc/asmdefs_mmi.h +++ b/chromium/third_party/openh264/src/codec/common/inc/asmdefs_mmi.h @@ -289,7 +289,7 @@ * backup register */ #define BACKUP_REG \ - double __back_temp[8]; \ + double __attribute__((aligned(16))) __back_temp[8]; \ if (_MIPS_SIM == _ABI64) \ __asm__ volatile ( \ "gssqc1 $f25, $f24, 0x00(%[temp]) \n\t" \ diff --git a/chromium/third_party/openh264/src/codec/common/inc/copy_mb.h b/chromium/third_party/openh264/src/codec/common/inc/copy_mb.h index 56bef626faa..532702a9edc 100644 --- a/chromium/third_party/openh264/src/codec/common/inc/copy_mb.h +++ b/chromium/third_party/openh264/src/codec/common/inc/copy_mb.h @@ -82,6 +82,13 @@ void WelsCopy16x8NotAligned_mmi (uint8_t* Dst, int32_t iStrideD, uint8_t* Src, void WelsCopy16x16_mmi (uint8_t* Dst, int32_t iStrideD, uint8_t* Src, int32_t iStrideS); void WelsCopy16x16NotAligned_mmi (uint8_t* Dst, int32_t iStrideD, uint8_t* Src, int32_t iStrideS); #endif//HAVE_MMI + +#if defined (HAVE_MSA) +void WelsCopy8x8_msa (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); +void WelsCopy8x16_msa (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS); +void WelsCopy16x8_msa (uint8_t* Dst, int32_t iStrideD, uint8_t* Src, int32_t iStrideS); +void WelsCopy16x16_msa (uint8_t* Dst, int32_t iStrideD, uint8_t* Src, int32_t iStrideS); +#endif//HAVE_MSA #if defined(__cplusplus) } #endif//__cplusplus diff --git a/chromium/third_party/openh264/src/codec/common/inc/cpu_core.h b/chromium/third_party/openh264/src/codec/common/inc/cpu_core.h index e5906c62b99..f25787b04a5 100644 --- a/chromium/third_party/openh264/src/codec/common/inc/cpu_core.h +++ b/chromium/third_party/openh264/src/codec/common/inc/cpu_core.h @@ -86,6 +86,7 @@ /* For loongson */ #define WELS_CPU_MMI 0x00000001 /* mmi */ +#define WELS_CPU_MSA 0x00000002 /* msa */ /* * Interfaces for CPU core feature detection as below diff --git a/chromium/third_party/openh264/src/codec/common/inc/deblocking_common.h b/chromium/third_party/openh264/src/codec/common/inc/deblocking_common.h index a605a6a224f..3ec9b2e5d8c 100644 --- a/chromium/third_party/openh264/src/codec/common/inc/deblocking_common.h +++ b/chromium/third_party/openh264/src/codec/common/inc/deblocking_common.h @@ -91,6 +91,20 @@ void DeblockChromaLt4H_mmi (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, i int8_t* pTC); void WelsNonZeroCount_mmi (int8_t* pNonZeroCount); #endif//HAVE_MMI + +#if defined(HAVE_MSA) +void DeblockLumaLt4V_msa (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc); +void DeblockLumaEq4V_msa (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta); +void DeblockLumaLt4H_msa (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc); +void DeblockLumaEq4H_msa (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta); +void DeblockChromaEq4V_msa (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta); +void DeblockChromaLt4V_msa (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta, + int8_t* pTC); +void DeblockChromaEq4H_msa (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta); +void DeblockChromaLt4H_msa (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta, + int8_t* pTC); +void WelsNonZeroCount_msa (int8_t* pNonZeroCount); +#endif//HAVE_MSA #if defined(__cplusplus) } #endif//__cplusplus diff --git a/chromium/third_party/openh264/src/codec/common/inc/expand_pic.h b/chromium/third_party/openh264/src/codec/common/inc/expand_pic.h index cbb69b421ff..2b06d9e47fe 100644 --- a/chromium/third_party/openh264/src/codec/common/inc/expand_pic.h +++ b/chromium/third_party/openh264/src/codec/common/inc/expand_pic.h @@ -47,6 +47,7 @@ extern "C" { #endif//__cplusplus #define PADDING_LENGTH 32 // reference extension +#define CHROMA_PADDING_LENGTH 16 // chroma reference extension #if defined(X86_ASM) void ExpandPictureLuma_sse2 (uint8_t* pDst, @@ -89,6 +90,10 @@ typedef struct TagExpandPicFunc { PExpandPictureFunc pfExpandChromaPicture[2]; } SExpandPicFunc; +void PadMBLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW, const int32_t& kiPicH, + const int32_t& kiMbX, const int32_t& kiMbY, const int32_t& kiMBWidth, const int32_t& kiMBHeight); +void PadMBChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW, const int32_t& kiPicH, + const int32_t& kiMbX, const int32_t& kiMbY, const int32_t& kiMBWidth, const int32_t& kiMBHeight); void ExpandReferencingPicture (uint8_t* pData[3], int32_t iWidth, int32_t iHeight, int32_t iStride[3], PExpandPictureFunc pExpLuma, PExpandPictureFunc pExpChrom[2]); diff --git a/chromium/third_party/openh264/src/codec/common/inc/msa_macros.h b/chromium/third_party/openh264/src/codec/common/inc/msa_macros.h new file mode 100644 index 00000000000..2eef0e5b838 --- /dev/null +++ b/chromium/third_party/openh264/src/codec/common/inc/msa_macros.h @@ -0,0 +1,2393 @@ +/* + * Copyright © 2020 Loongson Technology Co. Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Yin Shiyou (yinshiyou-hf@loongson.cn) + * Gu Xiwei (guxiwei-hf@loongson.cn) + */ + +/* + * This header file is copied from loongson LSOM project. + * MSA macros is implemented with msa intrinsics in msa.h, + * and used for simplifing MSA optimization. + */ + +#ifndef _MSA_MACROS_H +#define _MSA_MACROS_H 1 +#define MSA_MACROS_VERSION 18 +#include <msa.h> + +#if (__mips_isa_rev >= 6) + #define LH(psrc) \ + ( { \ + uint16_t val_lh_m = *(uint16_t *)(psrc); \ + val_lh_m; \ + } ) + + #define LW(psrc) \ + ( { \ + uint32_t val_lw_m = *(uint32_t *)(psrc); \ + val_lw_m; \ + } ) + + #if (__mips == 64) + #define LD(psrc) \ + ( { \ + uint64_t val_ld_m = *(uint64_t *)(psrc); \ + val_ld_m; \ + } ) + #else // !(__mips == 64) + #define LD(psrc) \ + ( { \ + uint8_t *psrc_ld_m = (uint8_t *) (psrc); \ + uint32_t val0_ld_m, val1_ld_m; \ + uint64_t val_ld_m = 0; \ + \ + val0_ld_m = LW(psrc_ld_m); \ + val1_ld_m = LW(psrc_ld_m + 4); \ + \ + val_ld_m = (uint64_t) (val1_ld_m); \ + val_ld_m = (uint64_t) ((val_ld_m << 32) & 0xFFFFFFFF00000000); \ + val_ld_m = (uint64_t) (val_ld_m | (uint64_t) val0_ld_m); \ + \ + val_ld_m; \ + } ) + #endif // (__mips == 64) + + #define SH(val, pdst) *(uint16_t *)(pdst) = (val); + #define SW(val, pdst) *(uint32_t *)(pdst) = (val); + #define SD(val, pdst) *(uint64_t *)(pdst) = (val); + +#else // !(__mips_isa_rev >= 6) + #define LH(psrc) \ + ( { \ + uint8_t *psrc_lh_m = (uint8_t *) (psrc); \ + uint16_t val_lh_m; \ + \ + __asm__ volatile ( \ + "ulh %[val_lh_m], %[psrc_lh_m] \n\t" \ + \ + : [val_lh_m] "=r" (val_lh_m) \ + : [psrc_lh_m] "m" (*psrc_lh_m) \ + ); \ + \ + val_lh_m; \ + } ) + + #define LW(psrc) \ + ( { \ + uint8_t *psrc_lw_m = (uint8_t *) (psrc); \ + uint32_t val_lw_m; \ + \ + __asm__ volatile ( \ + "ulw %[val_lw_m], %[psrc_lw_m] \n\t" \ + \ + : [val_lw_m] "=r" (val_lw_m) \ + : [psrc_lw_m] "m" (*psrc_lw_m) \ + ); \ + \ + val_lw_m; \ + } ) + + #if (__mips == 64) + #define LD(psrc) \ + ( { \ + uint8_t *psrc_ld_m = (uint8_t *) (psrc); \ + uint64_t val_ld_m = 0; \ + \ + __asm__ volatile ( \ + "uld %[val_ld_m], %[psrc_ld_m] \n\t" \ + \ + : [val_ld_m] "=r" (val_ld_m) \ + : [psrc_ld_m] "m" (*psrc_ld_m) \ + ); \ + \ + val_ld_m; \ + } ) + #else // !(__mips == 64) + #define LD(psrc) \ + ( { \ + uint8_t *psrc_ld_m = (uint8_t *) (psrc); \ + uint32_t val0_ld_m, val1_ld_m; \ + uint64_t val_ld_m = 0; \ + \ + val0_ld_m = LW(psrc_ld_m); \ + val1_ld_m = LW(psrc_ld_m + 4); \ + \ + val_ld_m = (uint64_t) (val1_ld_m); \ + val_ld_m = (uint64_t) ((val_ld_m << 32) & 0xFFFFFFFF00000000); \ + val_ld_m = (uint64_t) (val_ld_m | (uint64_t) val0_ld_m); \ + \ + val_ld_m; \ + } ) + #endif // (__mips == 64) + + #define SH(val, pdst) \ + { \ + uint8_t *pdst_sh_m = (uint8_t *) (pdst); \ + uint16_t val_sh_m = (val); \ + \ + __asm__ volatile ( \ + "ush %[val_sh_m], %[pdst_sh_m] \n\t" \ + \ + : [pdst_sh_m] "=m" (*pdst_sh_m) \ + : [val_sh_m] "r" (val_sh_m) \ + ); \ + } + + #define SW(val, pdst) \ + { \ + uint8_t *pdst_sw_m = (uint8_t *) (pdst); \ + uint32_t val_sw_m = (val); \ + \ + __asm__ volatile ( \ + "usw %[val_sw_m], %[pdst_sw_m] \n\t" \ + \ + : [pdst_sw_m] "=m" (*pdst_sw_m) \ + : [val_sw_m] "r" (val_sw_m) \ + ); \ + } + + #define SD(val, pdst) \ + { \ + uint8_t *pdst_sd_m = (uint8_t *) (pdst); \ + uint32_t val0_sd_m, val1_sd_m; \ + \ + val0_sd_m = (uint32_t) ((val) & 0x00000000FFFFFFFF); \ + val1_sd_m = (uint32_t) (((val) >> 32) & 0x00000000FFFFFFFF); \ + \ + SW(val0_sd_m, pdst_sd_m); \ + SW(val1_sd_m, pdst_sd_m + 4); \ + } +#endif // (__mips_isa_rev >= 6) + + + + + + +/* Description : Load vector elements with stride. + * Arguments : Inputs - psrc (source pointer to load from) + * - stride + * Outputs - out0, out1... + * Return Type - as per RTYPE + * Details : Loads elements in 'out0' from (psrc). + * Loads elements in 'out1' from (psrc + stride). + */ +#define MSA_LD_V(RTYPE, psrc, out) (out) = *((RTYPE *)(psrc)); + +#define MSA_LD_V2(RTYPE, psrc, stride, out0, out1) \ +{ \ + MSA_LD_V(RTYPE, (psrc), out0); \ + MSA_LD_V(RTYPE, (psrc) + (stride), out1); \ +} + +#define MSA_LD_V4(RTYPE, psrc, stride, out0, out1, out2, out3) \ +{ \ + MSA_LD_V2(RTYPE, (psrc), stride, out0, out1); \ + MSA_LD_V2(RTYPE, (psrc) + 2 * (stride) , stride, out2, out3); \ +} + +#define MSA_LD_V8(RTYPE, psrc, stride, out0, out1, out2, out3, \ + out4, out5, out6, out7) \ +{ \ + MSA_LD_V4(RTYPE, (psrc), stride, out0, out1, out2, out3); \ + MSA_LD_V4(RTYPE, (psrc) + 4 * (stride), stride, out4, out5, out6, out7); \ +} + +/* Description : Store vectors with stride. + * Arguments : Inputs - in0, in1... (source vector to be stored) + * - stride + * Outputs - pdst (destination pointer to store to) + * Details : Stores elements from 'in0' to (pdst). + * Stores elements from 'in1' to (pdst + stride). + */ +#define MSA_ST_V(RTYPE, in, pdst) *((RTYPE *)(pdst)) = (in); + +#define MSA_ST_V2(RTYPE, in0, in1, pdst, stride) \ +{ \ + MSA_ST_V(RTYPE, in0, (pdst)); \ + MSA_ST_V(RTYPE, in1, (pdst) + (stride)); \ +} + +#define MSA_ST_V4(RTYPE, in0, in1, in2, in3, pdst, stride) \ +{ \ + MSA_ST_V2(RTYPE, in0, in1, (pdst), stride); \ + MSA_ST_V2(RTYPE, in2, in3, (pdst) + 2 * (stride), stride); \ +} + +#define MSA_ST_V8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride) \ +{ \ + MSA_ST_V4(RTYPE, in0, in1, in2, in3, (pdst), stride); \ + MSA_ST_V4(RTYPE, in4, in5, in6, in7, (pdst) + 4 * (stride), stride); \ +} + +/* Description : Store half word elements of vector with stride. + * Arguments : Inputs - in (source vector) + * - pdst (destination pointer to store to) + * - stride + * Details : Stores half word 'idx0' from 'in' to (pdst). + * Stores half word 'idx1' from 'in' to (pdst + stride). + * Similar for other elements. + */ +#define MSA_ST_H(in, idx, pdst) \ +{ \ + uint16_t out0_m; \ + out0_m = __msa_copy_u_h((v8i16) in, idx); \ + SH(out0_m, (pdst)); \ +} +#define MSA_ST_H2(in, idx0, idx1, pdst, stride) \ +{ \ + uint16_t out0_m, out1_m; \ + out0_m = __msa_copy_u_h((v8i16) in, idx0); \ + out1_m = __msa_copy_u_h((v8i16) in, idx1); \ + SH(out0_m, (pdst)); \ + SH(out1_m, (pdst) + stride); \ +} +#define MSA_ST_H4(in, idx0, idx1, idx2, idx3, pdst, stride) \ +{ \ + uint16_t out0_m, out1_m, out2_m, out3_m; \ + out0_m = __msa_copy_u_h((v8i16) in, idx0); \ + out1_m = __msa_copy_u_h((v8i16) in, idx1); \ + out2_m = __msa_copy_u_h((v8i16) in, idx2); \ + out3_m = __msa_copy_u_h((v8i16) in, idx3); \ + SH(out0_m, (pdst)); \ + SH(out1_m, (pdst) + stride); \ + SH(out2_m, (pdst) + 2 * stride); \ + SH(out3_m, (pdst) + 3 * stride); \ +} +#define MSA_ST_H8(in, idx0, idx1, idx2, idx3, idx4, idx5, \ + idx6, idx7, pdst, stride) \ +{ \ + MSA_ST_H4(in, idx0, idx1, idx2, idx3, pdst, stride) \ + MSA_ST_H4(in, idx4, idx5, idx6, idx7, (pdst) + 4*stride, stride) \ +} + +/* Description : Store word elements of vector with stride. + * Arguments : Inputs - in (source vector) + * - pdst (destination pointer to store to) + * - stride + * Details : Stores word 'idx0' from 'in' to (pdst). + * Stores word 'idx1' from 'in' to (pdst + stride). + * Similar for other elements. + */ +#define MSA_ST_W(in, idx, pdst) \ +{ \ + uint32_t out0_m; \ + out0_m = __msa_copy_u_w((v4i32) in, idx); \ + SW(out0_m, (pdst)); \ +} +#define MSA_ST_W2(in, idx0, idx1, pdst, stride) \ +{ \ + uint32_t out0_m, out1_m; \ + out0_m = __msa_copy_u_w((v4i32) in, idx0); \ + out1_m = __msa_copy_u_w((v4i32) in, idx1); \ + SW(out0_m, (pdst)); \ + SW(out1_m, (pdst) + stride); \ +} +#define MSA_ST_W4(in, idx0, idx1, idx2, idx3, pdst, stride) \ +{ \ + uint32_t out0_m, out1_m, out2_m, out3_m; \ + out0_m = __msa_copy_u_w((v4i32) in, idx0); \ + out1_m = __msa_copy_u_w((v4i32) in, idx1); \ + out2_m = __msa_copy_u_w((v4i32) in, idx2); \ + out3_m = __msa_copy_u_w((v4i32) in, idx3); \ + SW(out0_m, (pdst)); \ + SW(out1_m, (pdst) + stride); \ + SW(out2_m, (pdst) + 2*stride); \ + SW(out3_m, (pdst) + 3*stride); \ +} +#define MSA_ST_W8(in0, in1, idx0, idx1, idx2, idx3, \ + idx4, idx5, idx6, idx7, pdst, stride) \ +{ \ + MSA_ST_W4(in0, idx0, idx1, idx2, idx3, pdst, stride) \ + MSA_ST_W4(in1, idx4, idx5, idx6, idx7, pdst + 4*stride, stride) \ +} + +/* Description : Store double word elements of vector with stride. + * Arguments : Inputs - in (source vector) + * - pdst (destination pointer to store to) + * - stride + * Details : Stores double word 'idx0' from 'in' to (pdst). + * Stores double word 'idx1' from 'in' to (pdst + stride). + * Similar for other elements. + */ +#define MSA_ST_D(in, idx, pdst) \ +{ \ + uint64_t out0_m; \ + out0_m = __msa_copy_u_d((v2i64) in, idx); \ + SD(out0_m, (pdst)); \ +} +#define MSA_ST_D2(in, idx0, idx1, pdst, stride) \ +{ \ + uint64_t out0_m, out1_m; \ + out0_m = __msa_copy_u_d((v2i64) in, idx0); \ + out1_m = __msa_copy_u_d((v2i64) in, idx1); \ + SD(out0_m, (pdst)); \ + SD(out1_m, (pdst) + stride); \ +} +#define MSA_ST_D4(in0, in1, idx0, idx1, idx2, idx3, pdst, stride) \ +{ \ + uint64_t out0_m, out1_m, out2_m, out3_m; \ + out0_m = __msa_copy_u_d((v2i64) in0, idx0); \ + out1_m = __msa_copy_u_d((v2i64) in0, idx1); \ + out2_m = __msa_copy_u_d((v2i64) in1, idx2); \ + out3_m = __msa_copy_u_d((v2i64) in1, idx3); \ + SD(out0_m, (pdst)); \ + SD(out1_m, (pdst) + stride); \ + SD(out2_m, (pdst) + 2 * stride); \ + SD(out3_m, (pdst) + 3 * stride); \ +} +#define MSA_ST_D8(in0, in1, in2, in3, idx0, idx1, idx2, idx3, \ + idx4, idx5, idx6, idx7, pdst, stride) \ +{ \ + MSA_ST_D4(in0, in1, idx0, idx1, idx2, idx3, pdst, stride) \ + MSA_ST_D4(in2, in3, idx4, idx5, idx6, idx7, pdst + 4 * stride, stride) \ +} + +/* Description : Shuffle byte vector elements as per mask vector. + * Arguments : Inputs - in0, in1 (source vectors) + * - mask (mask vectors) + * Outputs - out (dstination vectors) + * Return Type - as per RTYPE + * Details : Selective byte elements from 'in0' & 'in1' are copied to 'out' as + * per control vector 'mask'. + */ +#define MSA_VSHF_B(RTYPE, in0, in1, mask, out) \ +{ \ + out = (RTYPE) __msa_vshf_b((v16i8) mask, (v16i8) in0, (v16i8) in1); \ +} + +#define MSA_VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \ +{ \ + MSA_VSHF_B(RTYPE, in0, in1, mask0, out0) \ + MSA_VSHF_B(RTYPE, in2, in3, mask1, out1) \ +} + +#define MSA_VSHF_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + mask0, mask1, mask2, mask3, out0, out1, out2, out3) \ +{ \ + MSA_VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1); \ + MSA_VSHF_B2(RTYPE, in4, in5, in6, in7, mask2, mask3, out2, out3); \ +} + +/* Description : Shuffle halfword vector elements as per mask vector. + * Arguments : Inputs - in0, in1 (source vectors) + * - mask (mask vectors) + * Outputs - out (dstination vectors) + * Return Type - as per RTYPE + * Details : Selective halfword elements from 'in0' & 'in1' are copied to 'out' as + * per control vector 'mask'. + */ +#define MSA_VSHF_H(RTYPE, in0, in1, mask, out) \ +{ \ + out = (RTYPE) __msa_vshf_h((v8i16) mask, (v8i16) in0, (v8i16) in1); \ +} + +#define MSA_VSHF_H2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \ +{ \ + MSA_VSHF_H(RTYPE, in0, in1, mask0, out0) \ + MSA_VSHF_H(RTYPE, in2, in3, mask1, out1) \ +} + +#define MSA_VSHF_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + mask0, mask1, mask2, mask3, out0, out1, out2, out3) \ +{ \ + MSA_VSHF_H2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1); \ + MSA_VSHF_H2(RTYPE, in4, in5, in6, in7, mask2, mask3, out2, out3); \ +} + +/* Description : Shuffle word vector elements as per mask vector. + * Arguments : Inputs - in0, in1 (source vectors) + * - mask (mask vectors) + * Outputs - out (dstination vectors) + * Return Type - as per RTYPE + * Details : Selective word elements from 'in0' & 'in1' are copied to 'out' as + * per control vector 'mask'. + */ +#define MSA_VSHF_W(RTYPE, in0, in1, mask, out) \ +{ \ + out = (RTYPE) __msa_vshf_w((v4i32) mask, (v4i32) in0, (v4i32) in1); \ +} + +#define MSA_VSHF_W2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \ +{ \ + MSA_VSHF_W(RTYPE, in0, in1, mask0, out0) \ + MSA_VSHF_W(RTYPE, in2, in3, mask1, out1) \ +} + +#define MSA_VSHF_W4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + mask0, mask1, mask2, mask3, out0, out1, out2, out3) \ +{ \ + MSA_VSHF_W2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1); \ + MSA_VSHF_W2(RTYPE, in4, in5, in6, in7, mask2, mask3, out2, out3); \ +} + +/* Description : Interleave even byte elements from vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Even byte elements of 'in0' and even byte + * elements of 'in1' are interleaved and copied to 'out'. + */ +#define MSA_ILVEV_B(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_ilvev_b((v16i8) in0, (v16i8) in1); \ +} + +#define MSA_ILVEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_ILVEV_B(RTYPE, in0, in1, out0); \ + MSA_ILVEV_B(RTYPE, in2, in3, out1); \ +} + +#define MSA_ILVEV_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVEV_B2(RTYPE, in0, in1, in2, in3, out0, out1); \ + MSA_ILVEV_B2(RTYPE, in4, in5, in6, in7, out2, out3); \ +} + +/* Description : Interleave even half word elements from vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Even half word elements of 'in0' and even half word + * elements of 'in1' are interleaved and copied to 'out'. + */ +#define MSA_ILVEV_H(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_ilvev_h((v8i16) in0, (v8i16) in1); \ +} + +#define MSA_ILVEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_ILVEV_H(RTYPE, in0, in1, out0); \ + MSA_ILVEV_H(RTYPE, in2, in3, out1); \ +} + +#define MSA_ILVEV_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVEV_H2(RTYPE, in0, in1, in2, in3, out0, out1); \ + MSA_ILVEV_H2(RTYPE, in4, in5, in6, in7, out2, out3); \ +} + +/* Description : Interleave even word elements from vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Even word elements of 'in0' and even word + * elements of 'in1' are interleaved and copied to 'out'. + */ +#define MSA_ILVEV_W(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_ilvev_w((v2i64) in0, (v2i64) in1); \ +} + +#define MSA_ILVEV_W2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_ILVEV_W(RTYPE, in0, in1, out0); \ + MSA_ILVEV_W(RTYPE, in2, in3, out1); \ +} + +#define MSA_ILVEV_W4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVEV_W2(RTYPE, in0, in1, in2, in3, out0, out1); \ + MSA_ILVEV_W2(RTYPE, in4, in5, in6, in7, out2, out3); \ +} + +/* Description : Interleave even double word elements from vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Even double word elements of 'in0' and even double word + * elements of 'in1' are interleaved and copied to 'out'. + */ +#define MSA_ILVEV_D(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_ilvev_d((v2i64) in0, (v2i64) in1); \ +} + +#define MSA_ILVEV_D2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_ILVEV_D(RTYPE, in0, in1, out0); \ + MSA_ILVEV_D(RTYPE, in2, in3, out1); \ +} + +#define MSA_ILVEV_D4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVEV_D2(RTYPE, in0, in1, in2, in3, out0, out1); \ + MSA_ILVEV_D2(RTYPE, in4, in5, in6, in7, out2, out3); \ +} + +/* Description : Interleave odd byte elements from vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Odd byte elements of 'in0' and odd byte + * elements of 'in1' are interleaved and copied to 'out'. + */ +#define MSA_ILVOD_B(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_ilvod_b((v16i8) in0, (v16i8) in1); \ +} + +#define MSA_ILVOD_B2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_ILVOD_B(RTYPE, in0, in1, out0); \ + MSA_ILVOD_B(RTYPE, in2, in3, out1); \ +} + +#define MSA_ILVOD_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVOD_B2(RTYPE, in0, in1, in2, in3, out0, out1); \ + MSA_ILVOD_B2(RTYPE, in4, in5, in6, in7, out2, out3); \ +} + +/* Description : Interleave odd half word elements from vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Odd half word elements of 'in0' and odd half word + * elements of 'in1' are interleaved and copied to 'out'. + */ +#define MSA_ILVOD_H(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_ilvod_h((v8i16) in0, (v8i16) in1); \ +} + +#define MSA_ILVOD_H2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_ILVOD_H(RTYPE, in0, in1, out0); \ + MSA_ILVOD_H(RTYPE, in2, in3, out1); \ +} + +#define MSA_ILVOD_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVOD_H2(RTYPE, in0, in1, in2, in3, out0, out1); \ + MSA_ILVOD_H2(RTYPE, in4, in5, in6, in7, out2, out3); \ +} + +/* Description : Interleave odd word elements from vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Odd word elements of 'in0' and odd word + * elements of 'in1' are interleaved and copied to 'out'. + */ +#define MSA_ILVOD_W(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_ilvod_w((v4i32) in0, (v4i32) in1); \ +} + +#define MSA_ILVOD_W2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_ILVOD_W(RTYPE, in0, in1, out0); \ + MSA_ILVOD_W(RTYPE, in2, in3, out1); \ +} + +#define MSA_ILVOD_W4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVOD_W2(RTYPE, in0, in1, in2, in3, out0, out1); \ + MSA_ILVOD_W2(RTYPE, in4, in5, in6, in7, out2, out3); \ +} + +/* Description : Interleave odd double word elements from vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Odd double word elements of 'in0' and odd double word + * elements of 'in1' are interleaved and copied to 'out'. + */ +#define MSA_ILVOD_D(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_ilvod_d((v2i64) in0, (v2i64) in1); \ +} + +#define MSA_ILVOD_D2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_ILVOD_D(RTYPE, in0, in1, out0); \ + MSA_ILVOD_D(RTYPE, in2, in3, out1); \ +} + +#define MSA_ILVOD_D4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVOD_D2(RTYPE, in0, in1, in2, in3, out0, out1); \ + MSA_ILVOD_D2(RTYPE, in4, in5, in6, in7, out2, out3); \ +} + +/* Description : Interleave left half of byte elements from vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Left half of byte elements of 'in0' and left half of byte + * elements of 'in1' are interleaved and copied to 'out'. + */ +#define MSA_ILVL_B(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_ilvl_b((v16i8) in0, (v16i8) in1); \ +} + +#define MSA_ILVL_B2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_ILVL_B(RTYPE, in0, in1, out0); \ + MSA_ILVL_B(RTYPE, in2, in3, out1); \ +} + +#define MSA_ILVL_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVL_B2(RTYPE, in0, in1, in2, in3, out0, out1); \ + MSA_ILVL_B2(RTYPE, in4, in5, in6, in7, out2, out3); \ +} + +/* Description : Interleave left half of halfword elements from vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Left half of halfword elements of 'in0' and left half of halfword + * elements of 'in1' are interleaved and copied to 'out'. + */ +#define MSA_ILVL_H(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_ilvl_h((v8i16) in0, (v8i16) in1); \ +} + +#define MSA_ILVL_H2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_ILVL_H(RTYPE, in0, in1, out0); \ + MSA_ILVL_H(RTYPE, in2, in3, out1); \ +} + +#define MSA_ILVL_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVL_H2(RTYPE, in0, in1, in2, in3, out0, out1); \ + MSA_ILVL_H2(RTYPE, in4, in5, in6, in7, out2, out3); \ +} + +/* Description : Interleave left half of word elements from vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Left half of word elements of 'in0' and left half of word + * elements of 'in1' are interleaved and copied to 'out'. + */ +#define MSA_ILVL_W(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_ilvl_w((v4i32) in0, (v4i32) in1); \ +} + +#define MSA_ILVL_W2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_ILVL_W(RTYPE, in0, in1, out0); \ + MSA_ILVL_W(RTYPE, in2, in3, out1); \ +} + +#define MSA_ILVL_W4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVL_W2(RTYPE, in0, in1, in2, in3, out0, out1); \ + MSA_ILVL_W2(RTYPE, in4, in5, in6, in7, out2, out3); \ +} + +/* Description : Interleave left half of double word elements from vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Left half of double word elements of 'in0' and left half of + * double word elements of 'in1' are interleaved and copied to 'out'. + */ +#define MSA_ILVL_D(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_ilvl_d((v2i64) in0, (v2i64) in1); \ +} + +#define MSA_ILVL_D2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_ILVL_D(RTYPE, in0, in1, out0); \ + MSA_ILVL_D(RTYPE, in2, in3, out1); \ +} + +#define MSA_ILVL_D4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVL_D2(RTYPE, in0, in1, in2, in3, out0, out1); \ + MSA_ILVL_D2(RTYPE, in4, in5, in6, in7, out2, out3); \ +} + +/* Description : Interleave right half of byte elements from vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Right half of byte elements of 'in0' and right half of byte + * elements of 'in1' are interleaved and copied to 'out'. + */ +#define MSA_ILVR_B(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_ilvr_b((v16i8) in0, (v16i8) in1); \ +} + +#define MSA_ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_ILVR_B(RTYPE, in0, in1, out0); \ + MSA_ILVR_B(RTYPE, in2, in3, out1); \ +} + +#define MSA_ILVR_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1); \ + MSA_ILVR_B2(RTYPE, in4, in5, in6, in7, out2, out3); \ +} + +/* Description : Interleave right half of halfword elements from vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Right half of halfword elements of 'in0' and right half of halfword + * elements of 'in1' are interleaved and copied to 'out'. + */ +#define MSA_ILVR_H(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_ilvr_h((v8i16) in0, (v8i16) in1); \ +} + +#define MSA_ILVR_H2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_ILVR_H(RTYPE, in0, in1, out0); \ + MSA_ILVR_H(RTYPE, in2, in3, out1); \ +} + +#define MSA_ILVR_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVR_H2(RTYPE, in0, in1, in2, in3, out0, out1); \ + MSA_ILVR_H2(RTYPE, in4, in5, in6, in7, out2, out3); \ +} + +/* Description : Interleave right half of word elements from vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Right half of word elements of 'in0' and right half of word + * elements of 'in1' are interleaved and copied to 'out'. + */ +#define MSA_ILVR_W(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_ilvr_w((v4i32) in0, (v4i32) in1); \ +} + +#define MSA_ILVR_W2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_ILVR_W(RTYPE, in0, in1, out0); \ + MSA_ILVR_W(RTYPE, in2, in3, out1); \ +} + +#define MSA_ILVR_W4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVR_W2(RTYPE, in0, in1, in2, in3, out0, out1); \ + MSA_ILVR_W2(RTYPE, in4, in5, in6, in7, out2, out3); \ +} + +/* Description : Interleave right half of double word elements from vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Right half of double word elements of 'in0' and right half of + * double word elements of 'in1' are interleaved and copied to 'out'. + */ +#define MSA_ILVR_D(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_ilvr_d((v2i64) in0, (v2i64) in1); \ +} + +#define MSA_ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_ILVR_D(RTYPE, in0, in1, out0); \ + MSA_ILVR_D(RTYPE, in2, in3, out1); \ +} + +#define MSA_ILVR_D4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1); \ + MSA_ILVR_D2(RTYPE, in4, in5, in6, in7, out2, out3); \ +} + +/* Description : Interleave both left and right half of input vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out0, out1 + * Return Type - as per RTYPE + * Details : Right half of byte elements from 'in0' and 'in1' are + * interleaved and stored to 'out0'. + * Left half of byte elements from 'in0' and 'in1' are + * interleaved and stored to 'out1'. + */ +#define MSA_ILVRL_B2(RTYPE, in0, in1, out0, out1) \ +{ \ + MSA_ILVR_B(RTYPE, in0, in1, out0); \ + MSA_ILVL_B(RTYPE, in0, in1, out1); \ +} + +#define MSA_ILVRL_B4(RTYPE, in0, in1, in2, in3, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVRL_B2(RTYPE, in0, in1, out0, out1); \ + MSA_ILVRL_B2(RTYPE, in2, in3, out2, out3); \ +} + +/* Description : Interleave both left and right half of input vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out0, out1 + * Return Type - as per RTYPE + * Details : Right half of halfword elements from 'in0' and 'in1' are + * interleaved and stored to 'out0'. + * Left half of halfword elements from 'in0' and 'in1' are + * interleaved and stored to 'out1'. + */ +#define MSA_ILVRL_H2(RTYPE, in0, in1, out0, out1) \ +{ \ + MSA_ILVR_H(RTYPE, in0, in1, out0); \ + MSA_ILVL_H(RTYPE, in0, in1, out1); \ +} + +#define MSA_ILVRL_H4(RTYPE, in0, in1, in2, in3, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVRL_H2(RTYPE, in0, in1, out0, out1); \ + MSA_ILVRL_H2(RTYPE, in2, in3, out2, out3); \ +} + +/* Description : Interleave both left and right half of input vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out0, out1 + * Return Type - as per RTYPE + * Details : Right half of word elements from 'in0' and 'in1' are + * interleaved and stored to 'out0'. + * Left half of word elements from 'in0' and 'in1' are + * interleaved and stored to 'out1'. + */ +#define MSA_ILVRL_W2(RTYPE, in0, in1, out0, out1) \ +{ \ + MSA_ILVR_W(RTYPE, in0, in1, out0); \ + MSA_ILVL_W(RTYPE, in0, in1, out1); \ +} + +#define MSA_ILVRL_W4(RTYPE, in0, in1, in2, in3, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVRL_W2(RTYPE, in0, in1, out0, out1); \ + MSA_ILVRL_W2(RTYPE, in2, in3, out2, out3); \ +} + +/* Description : Interleave both left and right half of input vectors. + * Arguments : Inputs - in0, in1 + * Outputs - out0, out1 + * Return Type - as per RTYPE + * Details : Right half of double word elements from 'in0' and 'in1' are + * interleaved and stored to 'out0'. + * Left half of double word elements from 'in0' and 'in1' are + * interleaved and stored to 'out1'. + */ +#define MSA_ILVRL_D2(RTYPE, in0, in1, out0, out1) \ +{ \ + MSA_ILVR_D(RTYPE, in0, in1, out0); \ + MSA_ILVL_D(RTYPE, in0, in1, out1); \ +} + +#define MSA_ILVRL_D4(RTYPE, in0, in1, in2, in3, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVRL_D2(RTYPE, in0, in1, out0, out1); \ + MSA_ILVRL_D2(RTYPE, in2, in3, out2, out3); \ +} + +/* Description : Indexed byte elements are replicated to all elements in + * output vector. + * Arguments : Inputs - in, idx + * Outputs - out + * Return Type - as per RTYPE + * Details : 'idx' element value from 'in' vector is replicated to all + * elements in 'out' vector. + * Valid index range for halfword operation is 0-7. + */ +#define MSA_SPLATI_B(RTYPE, in, idx, out) \ +{ \ + out = (RTYPE) __msa_splati_b((v16i8) in, idx); \ +} + +#define MSA_SPLATI_B2(RTYPE, in, idx0, idx1, out0, out1) \ +{ \ + MSA_SPLATI_B(RTYPE, in, idx0, out0) \ + MSA_SPLATI_B(RTYPE, in, idx1, out1) \ +} + +#define MSA_SPLATI_B4(RTYPE, in, idx0, idx1, idx2, idx3, \ + out0, out1, out2, out3) \ +{ \ + MSA_SPLATI_B2(RTYPE, in, idx0, idx1, out0, out1) \ + MSA_SPLATI_B2(RTYPE, in, idx2, idx3, out2, out3) \ +} + +/* Description : Indexed halfword elements are replicated to all elements in + * output vector. + * Arguments : Inputs - in, idx + * Outputs - out + * Return Type - as per RTYPE + * Details : 'idx' element value from 'in' vector is replicated to all + * elements in 'out' vector. + * Valid index range for halfword operation is 0-7. + */ +#define MSA_SPLATI_H(RTYPE, in, idx, out) \ +{ \ + out = (RTYPE) __msa_splati_h((v8i16) in, idx); \ +} + +#define MSA_SPLATI_H2(RTYPE, in, idx0, idx1, out0, out1) \ +{ \ + MSA_SPLATI_H(RTYPE, in, idx0, out0) \ + MSA_SPLATI_H(RTYPE, in, idx1, out1) \ +} + +#define MSA_SPLATI_H4(RTYPE, in, idx0, idx1, idx2, idx3, \ + out0, out1, out2, out3) \ +{ \ + MSA_SPLATI_H2(RTYPE, in, idx0, idx1, out0, out1) \ + MSA_SPLATI_H2(RTYPE, in, idx2, idx3, out2, out3) \ +} + +/* Description : Indexed word elements are replicated to all elements in + * output vector. + * Arguments : Inputs - in, idx + * Outputs - out + * Return Type - as per RTYPE + * Details : 'idx' element value from 'in' vector is replicated to all + * elements in 'out' vector. + * Valid index range for halfword operation is 0-3. + */ +#define MSA_SPLATI_W(RTYPE, in, idx, out) \ +{ \ + out = (RTYPE) __msa_splati_w((v4i32) in, idx); \ +} + +#define MSA_SPLATI_W2(RTYPE, in, idx0, idx1, out0, out1) \ +{ \ + MSA_SPLATI_W(RTYPE, in, idx0, out0) \ + MSA_SPLATI_W(RTYPE, in, idx1, out1) \ +} + +#define MSA_SPLATI_W4(RTYPE, in, idx0, idx1, idx2, idx3, \ + out0, out1, out2, out3) \ +{ \ + MSA_SPLATI_W2(RTYPE, in, idx0, idx1, out0, out1) \ + MSA_SPLATI_W2(RTYPE, in, idx2, idx3, out2, out3) \ +} + +/* Description : Pack even byte elements of vector pairs. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Even byte elements of 'in0' are copied to the left half of + * 'out' & even byte elements of 'in1' are copied to the right + * half of 'out'. + */ +#define MSA_PCKEV_B(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_pckev_b((v16i8) in0, (v16i8) in1); \ +} + +#define MSA_PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_PCKEV_B(RTYPE, in0, in1, out0) \ + MSA_PCKEV_B(RTYPE, in2, in3, out1) \ +} + +#define MSA_PCKEV_B4(RTYPE, in0, in1, in2, in3, in4, in5, \ + in6, in7, out0, out1, out2, out3) \ +{ \ + MSA_PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) \ + MSA_PCKEV_B2(RTYPE, in4, in5, in6, in7, out2, out3) \ +} + +/* Description : Pack even halfword elements of vector pairs. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Even halfword elements of 'in0' are copied to the left half of + * 'out' & even halfword elements of 'in1' are copied to the right + * half of 'out'. + */ +#define MSA_PCKEV_H(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_pckev_h((v8i16) in0, (v8i16) in1); \ +} + +#define MSA_PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_PCKEV_H(RTYPE, in0, in1, out0) \ + MSA_PCKEV_H(RTYPE, in2, in3, out1) \ +} + +#define MSA_PCKEV_H4(RTYPE, in0, in1, in2, in3, in4, in5, \ + in6, in7, out0, out1, out2, out3) \ +{ \ + MSA_PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \ + MSA_PCKEV_H2(RTYPE, in4, in5, in6, in7, out2, out3) \ +} + +/* Description : Pack even word elements of vector pairs. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Even word elements of 'in0' are copied to the left half of + * 'out' & even word elements of 'in1' are copied to the right + * half of 'out'. + */ +#define MSA_PCKEV_W(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_pckev_w((v4i32) in0, (v4i32) in1); \ +} + +#define MSA_PCKEV_W2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_PCKEV_W(RTYPE, in0, in1, out0) \ + MSA_PCKEV_W(RTYPE, in2, in3, out1) \ +} + +#define MSA_PCKEV_W4(RTYPE, in0, in1, in2, in3, in4, in5, \ + in6, in7, out0, out1, out2, out3) \ +{ \ + MSA_PCKEV_W2(RTYPE, in0, in1, in2, in3, out0, out1) \ + MSA_PCKEV_W2(RTYPE, in4, in5, in6, in7, out2, out3) \ +} + +/* Description : Pack even double word elements of vector pairs. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Even double word elements of 'in0' are copied to the left + * half of 'out' & even double word elements of 'in1' are + * copied to the right half of 'out'. + */ +#define MSA_PCKEV_D(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_pckev_d((v2i64) in0, (v2i64) in1); \ +} + +#define MSA_PCKEV_D2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_PCKEV_D(RTYPE, in0, in1, out0) \ + MSA_PCKEV_D(RTYPE, in2, in3, out1) \ +} + +#define MSA_PCKEV_D4(RTYPE, in0, in1, in2, in3, in4, in5, \ + in6, in7, out0, out1, out2, out3) \ +{ \ + MSA_PCKEV_D2(RTYPE, in0, in1, in2, in3, out0, out1) \ + MSA_PCKEV_D2(RTYPE, in4, in5, in6, in7, out2, out3) \ +} + +/* Description : Pack odd byte elements of vector pairs. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Odd byte elements of 'in0' are copied to the left half of + * 'out' & odd byte elements of 'in1' are copied to the right + * half of 'out'. + */ +#define MSA_PCKOD_B(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_pckod_b((v16i8) in0, (v16i8) in1); \ +} + +#define MSA_PCKOD_B2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_PCKOD_B(RTYPE, in0, in1, out0) \ + MSA_PCKOD_B(RTYPE, in2, in3, out1) \ +} + +#define MSA_PCKOD_B4(RTYPE, in0, in1, in2, in3, in4, in5, \ + in6, in7, out0, out1, out2, out3) \ +{ \ + MSA_PCKOD_B2(RTYPE, in0, in1, in2, in3, out0, out1) \ + MSA_PCKOD_B2(RTYPE, in4, in5, in6, in7, out2, out3) \ +} + +/* Description : Pack odd halfword elements of vector pairs. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Odd halfword elements of 'in0' are copied to the left half of + * 'out' & odd halfword elements of 'in1' are copied to the right + * half of 'out'. + */ +#define MSA_PCKOD_H(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_pckod_h((v8i16) in0, (v8i16) in1); \ +} + +#define MSA_PCKOD_H2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_PCKOD_H(RTYPE, in0, in1, out0) \ + MSA_PCKOD_H(RTYPE, in2, in3, out1) \ +} + +#define MSA_PCKOD_H4(RTYPE, in0, in1, in2, in3, in4, in5, \ + in6, in7, out0, out1, out2, out3) \ +{ \ + MSA_PCKOD_H2(RTYPE, in0, in1, in2, in3, out0, out1) \ + MSA_PCKOD_H2(RTYPE, in4, in5, in6, in7, out2, out3) \ +} + +/* Description : Pack odd word elements of vector pairs. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Odd word elements of 'in0' are copied to the left half of + * 'out' & odd word elements of 'in1' are copied to the right + * half of 'out'. + */ +#define MSA_PCKOD_W(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_pckod_w((v4i32) in0, (v4i32) in1); \ +} + +#define MSA_PCKOD_W2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_PCKOD_W(RTYPE, in0, in1, out0) \ + MSA_PCKOD_W(RTYPE, in2, in3, out1) \ +} + +#define MSA_PCKOD_W4(RTYPE, in0, in1, in2, in3, in4, in5, \ + in6, in7, out0, out1, out2, out3) \ +{ \ + MSA_PCKOD_W2(RTYPE, in0, in1, in2, in3, out0, out1) \ + MSA_PCKOD_W2(RTYPE, in4, in5, in6, in7, out2, out3) \ +} + +/* Description : Pack odd double word elements of vector pairs. + * Arguments : Inputs - in0, in1 + * Outputs - out + * Return Type - as per RTYPE + * Details : Odd double word elements of 'in0' are copied to the left + * half of 'out' & odd double word elements of 'in1' are + * copied to the right half of 'out'. + */ +#define MSA_PCKOD_D(RTYPE, in0, in1, out) \ +{ \ + out = (RTYPE) __msa_pckod_d((v2i64) in0, (v2i64) in1); \ +} + +#define MSA_PCKOD_D2(RTYPE, in0, in1, in2, in3, out0, out1) \ +{ \ + MSA_PCKOD_D(RTYPE, in0, in1, out0) \ + MSA_PCKOD_D(RTYPE, in2, in3, out1) \ +} + +#define MSA_PCKOD_D4(RTYPE, in0, in1, in2, in3, in4, in5, \ + in6, in7, out0, out1, out2, out3) \ +{ \ + MSA_PCKOD_D2(RTYPE, in0, in1, in2, in3, out0, out1) \ + MSA_PCKOD_D2(RTYPE, in4, in5, in6, in7, out2, out3) \ +} + +/* Description : Dot product of unsigned byte vector elements. + * Arguments : Inputs - mult + * cnst + * Outputs - out + * Return Type - as per RTYPE + * Details : Unsigned byte elements from 'mult' are multiplied with + * unsigned byte elements from 'cnst' producing a result + * twice the size of input i.e. unsigned halfword. + * Then this multiplication results of adjacent odd-even elements + * are added together and stored to the out vector. + */ +#define MSA_DOTP_UB(RTYPE, mult, cnst, out) \ +{ \ + out = (RTYPE) __msa_dotp_u_h((v16u8) mult, (v16u8) cnst); \ +} + +#define MSA_DOTP_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \ +{ \ + MSA_DOTP_UB(RTYPE, mult0, cnst0, out0) \ + MSA_DOTP_UB(RTYPE, mult1, cnst1, out1) \ +} + +#define MSA_DOTP_UB4(RTYPE, mult0, mult1, mult2, mult3, \ + cnst0, cnst1, cnst2, cnst3, \ + out0, out1, out2, out3) \ +{ \ + MSA_DOTP_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \ + MSA_DOTP_UB2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \ +} + +/* Description : Dot product of signed byte vector elements. + * Arguments : Inputs - mult + * cnst + * Outputs - out + * Return Type - as per RTYPE + * Details : Signed byte elements from 'mult' are multiplied with + * signed byte elements from 'cnst' producing a result + * twice the size of input i.e. signed halfword. + * Then this multiplication results of adjacent odd-even elements + * are added together and stored to the out vector. + */ +#define MSA_DOTP_SB(RTYPE, mult, cnst, out) \ +{ \ + out = (RTYPE) __msa_dotp_s_h((v16i8) mult, (v16i8) cnst); \ +} + +#define MSA_DOTP_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \ +{ \ + MSA_DOTP_SB(RTYPE, mult0, cnst0, out0) \ + MSA_DOTP_SB(RTYPE, mult1, cnst1, out1) \ +} + +#define MSA_DOTP_SB4(RTYPE, mult0, mult1, mult2, mult3, \ + cnst0, cnst1, cnst2, cnst3, \ + out0, out1, out2, out3) \ +{ \ + MSA_DOTP_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \ + MSA_DOTP_SB2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \ +} + +/* Description : Dot product of unsigned halfword vector elements. + * Arguments : Inputs - mult + * cnst + * Outputs - out + * Return Type - as per RTYPE + * Details : Unsigned halfword elements from 'mult' are multiplied with + * unsigned halfword elements from 'cnst' producing a result + * twice the size of input i.e. unsigned word. + * Then this multiplication results of adjacent odd-even elements + * are added together and stored to the out vector. + */ +#define MSA_DOTP_UH(RTYPE, mult, cnst, out) \ +{ \ + out = (RTYPE) __msa_dotp_u_w((v8u16) mult, (v8u16) cnst); \ +} + +#define MSA_DOTP_UH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \ +{ \ + MSA_DOTP_UH(RTYPE, mult0, cnst0, out0) \ + MSA_DOTP_UH(RTYPE, mult1, cnst1, out1) \ +} + +#define MSA_DOTP_UH4(RTYPE, mult0, mult1, mult2, mult3, \ + cnst0, cnst1, cnst2, cnst3, \ + out0, out1, out2, out3) \ +{ \ + MSA_DOTP_UH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \ + MSA_DOTP_UH2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \ +} + +/* Description : Dot product of signed halfword vector elements. + * Arguments : Inputs - mult + * cnst + * Outputs - out + * Return Type - as per RTYPE + * Details : Signed halfword elements from 'mult' are multiplied with + * signed halfword elements from 'cnst' producing a result + * twice the size of input i.e. signed word. + * Then this multiplication results of adjacent odd-even elements + * are added together and stored to the out vector. + */ +#define MSA_DOTP_SH(RTYPE, mult, cnst, out) \ +{ \ + out = (RTYPE) __msa_dotp_s_w((v8i16) mult, (v8i16) cnst); \ +} + +#define MSA_DOTP_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \ +{ \ + MSA_DOTP_SH(RTYPE, mult0, cnst0, out0) \ + MSA_DOTP_SH(RTYPE, mult1, cnst1, out1) \ +} + +#define MSA_DOTP_SH4(RTYPE, mult0, mult1, mult2, mult3, \ + cnst0, cnst1, cnst2, cnst3, \ + out0, out1, out2, out3) \ +{ \ + MSA_DOTP_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \ + MSA_DOTP_SH2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \ +} + +/* Description : Dot product & addition of unsigned byte vector elements. + * Arguments : Inputs - mult + * cnst + * Outputs - out + * Return Type - as per RTYPE + * Details : Unsigned byte elements from 'mult' are multiplied with + * unsigned byte elements from 'cnst' producing a result + * twice the size of input i.e. unsigned halfword. + * Then this multiplication results of adjacent odd-even elements + * are added to the out vector. + */ +#define MSA_DPADD_UB(RTYPE, mult, cnst, out) \ +{ \ + out = (RTYPE) __msa_dpadd_u_h((v8u16) out, \ + (v16u8) mult, (v16u8) cnst); \ +} + +#define MSA_DPADD_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \ +{ \ + MSA_DPADD_UB(RTYPE, mult0, cnst0, out0) \ + MSA_DPADD_UB(RTYPE, mult1, cnst1, out1) \ +} + +#define MSA_DPADD_UB4(RTYPE, mult0, mult1, mult2, mult3, \ + cnst0, cnst1, cnst2, cnst3, out0, out1, out2, out3) \ +{ \ + MSA_DPADD_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \ + MSA_DPADD_UB2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \ +} + +/* Description : Dot product & addition of signed byte vector elements. + * Arguments : Inputs - mult + * cnst + * Outputs - out + * Return Type - as per RTYPE + * Details : Signed byte elements from 'mult' are multiplied with + * signed byte elements from 'cnst' producing a result + * twice the size of input i.e. signed halfword. + * Then this multiplication results of adjacent odd-even elements + * are added to the out vector. + */ +#define MSA_DPADD_SB(RTYPE, mult, cnst, out) \ +{ \ + out = (RTYPE) __msa_dpadd_s_h((v8i16) out, \ + (v16i8) mult, (v16i8) cnst); \ +} + +#define MSA_DPADD_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \ +{ \ + MSA_DPADD_SB(RTYPE, mult0, cnst0, out0) \ + MSA_DPADD_SB(RTYPE, mult1, cnst1, out1) \ +} + +#define MSA_DPADD_SB4(RTYPE, mult0, mult1, mult2, mult3, \ + cnst0, cnst1, cnst2, cnst3, out0, out1, out2, out3) \ +{ \ + MSA_DPADD_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \ + MSA_DPADD_SB2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \ +} + +/* Description : Dot product & addition of unsigned halfword vector elements. + * Arguments : Inputs - mult + * cnst + * Outputs - out + * Return Type - as per RTYPE + * Details : Unsigned halfword elements from 'mult' are multiplied with + * unsigned halfword elements from 'cnst' producing a result + * twice the size of input i.e. unsigned word. + * Then this multiplication results of adjacent odd-even elements + * are added to the out vector. + */ +#define MSA_DPADD_UH(RTYPE, mult, cnst, out) \ +{ \ + out = (RTYPE) __msa_dpadd_u_w((v4u32) out, \ + (v8u16) mult, (v8u16) cnst); \ +} + +#define MSA_DPADD_UH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \ +{ \ + MSA_DPADD_UH(RTYPE, mult0, cnst0, out0) \ + MSA_DPADD_UH(RTYPE, mult1, cnst1, out1) \ +} + +#define MSA_DPADD_UH4(RTYPE, mult0, mult1, mult2, mult3, \ + cnst0, cnst1, cnst2, cnst3, out0, out1, out2, out3) \ +{ \ + MSA_DPADD_UH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \ + MSA_DPADD_UH2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \ +} + +/* Description : Dot product & addition of signed halfword vector elements. + * Arguments : Inputs - mult + * cnst + * Outputs - out + * Return Type - as per RTYPE + * Details : Signed halfword elements from 'mult' are multiplied with + * signed halfword elements from 'cnst' producing a result + * twice the size of input i.e. signed word. + * Then this multiplication results of adjacent odd-even elements + * are added to the out vector. + */ +#define MSA_DPADD_SH(RTYPE, mult, cnst, out) \ +{ \ + out = (RTYPE) __msa_dpadd_s_w((v4i32) out, \ + (v8i16) mult, (v8i16) cnst); \ +} + +#define MSA_DPADD_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \ +{ \ + MSA_DPADD_SH(RTYPE, mult0, cnst0, out0) \ + MSA_DPADD_SH(RTYPE, mult1, cnst1, out1) \ +} + +#define MSA_DPADD_SH4(RTYPE, mult0, mult1, mult2, mult3, \ + cnst0, cnst1, cnst2, cnst3, out0, out1, out2, out3) \ +{ \ + MSA_DPADD_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \ + MSA_DPADD_SH2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \ +} + +/* Description : Clip all signed halfword elements of input vector between min & max. + * out = ((in) < (min)) ? (min) : (((in) > (max)) ? (max) : (in)). + * Arguments : Inputs - in (input vector) + * - min (min threshold) + * - max (max threshold) + * Outputs - in (output vector with clipped elements) + * Note : type of 'in' must be v8i16. + */ +#define MSA_CLIP_SH(in, min, max) \ +{ \ + in = __msa_max_s_h((v8i16) min, (v8i16) in); \ + in = __msa_min_s_h((v8i16) max, (v8i16) in); \ +} + +/* Description : Clip all signed halfword elements of input vector between 0 & 255. + * Arguments : Inputs - in (input vector) + * Outputs - in (output vector with clipped elements) + * Note : type of 'in' must be v8i16. + */ +#define MSA_CLIP_SH_0_255(in) \ +{ \ + in = __msa_maxi_s_h((v8i16) in, 0); \ + in = (v8i16) __msa_sat_u_h((v8u16) in, 7); \ +} + +#define MSA_CLIP_SH2_0_255(in0, in1) \ +{ \ + MSA_CLIP_SH_0_255(in0); \ + MSA_CLIP_SH_0_255(in1); \ +} + +#define MSA_CLIP_SH4_0_255(in0, in1, in2, in3) \ +{ \ + MSA_CLIP_SH2_0_255(in0, in1); \ + MSA_CLIP_SH2_0_255(in2, in3); \ +} + +#define MSA_CLIP_SH8_0_255(in0, in1, in2, in3, \ + in4, in5, in6, in7) \ +{ \ + MSA_CLIP_SH4_0_255(in0, in1, in2, in3); \ + MSA_CLIP_SH4_0_255(in4, in5, in6, in7); \ +} + +/* Description : Clip all signed word elements of input vector between 0 & 255. + * Arguments : Inputs - in (input vector) + * Outputs - in (output vector with clipped elements) + * Note : type of 'in' must be v4i32. + */ +#define MSA_CLIP_SW_0_255(in) \ +{ \ + in = __msa_maxi_s_w((v4i32) in, 0); \ + in = (v4i32) __msa_sat_u_w((v4u32) in, 7); \ +} + +#define MSA_CLIP_SW2_0_255(in0, in1) \ +{ \ + MSA_CLIP_SW_0_255(in0); \ + MSA_CLIP_SW_0_255(in1); \ +} + +#define MSA_CLIP_SW4_0_255(in0, in1, in2, in3) \ +{ \ + MSA_CLIP_SW2_0_255(in0, in1); \ + MSA_CLIP_SW2_0_255(in2, in3); \ +} + +#define MSA_CLIP_SW8_0_255(in0, in1, in2, in3, \ + in4, in5, in6, in7) \ +{ \ + MSA_CLIP_SW4_0_255(in0, in1, in2, in3); \ + MSA_CLIP_SW4_0_255(in4, in5, in6, in7); \ +} + +/* Description : Addition of 16 unsigned byte elements. + * 16 unsigned byte elements of input vector are added + * together and resulted integer sum is returned. + * Arguments : Inputs - in (unsigned byte vector) + * Outputs - sum_m (u32 sum) + * Return Type - unsigned word + */ +#define MSA_HADD_UB_U32(in, sum_m) \ +{ \ + v8u16 res_m; \ + v4u32 res0_m; \ + v2u64 res1_m, res2_m; \ + \ + res_m = __msa_hadd_u_h((v16u8) in, (v16u8) in); \ + res0_m = __msa_hadd_u_w(res_m, res_m); \ + res1_m = __msa_hadd_u_d(res0_m, res0_m); \ + res2_m = (v2u64) __msa_splati_d((v2i64) res1_m, 1); \ + res1_m += res2_m; \ + sum_m = __msa_copy_u_w((v4i32) res1_m, 0); \ +} + +/* Description : Addition of 8 unsigned halfword elements. + * 8 unsigned halfword elements of input vector are added + * together and resulted integer sum is returned. + * Arguments : Inputs - in (unsigned halfword vector) + * Outputs - sum_m (u32 sum) + * Return Type - unsigned word + */ +#define MSA_HADD_UH_U32(in, sum_m) \ +{ \ + v4u32 res_m; \ + v2u64 res0_m, res1_m; \ + \ + res_m = __msa_hadd_u_w((v8u16) in, (v8u16) in); \ + res0_m = __msa_hadd_u_d(res_m, res_m); \ + res1_m = (v2u64) __msa_splati_d((v2i64) res0_m, 1); \ + res0_m += res1_m; \ + sum_m = __msa_copy_u_w((v4i32) res0_m, 0); \ +} + +/* Description : Addition of 4 unsigned word elements. + * 4 unsigned word elements of input vector are added together and + * resulted integer sum is returned. + * Arguments : Inputs - in (unsigned word vector) + * Outputs - sum_m (u32 sum) + * Return Type - unsigned word + */ +#define MSA_HADD_UW_U32(in, sum_m) \ +{ \ + v2u64 res0_m, res1_m; \ + \ + res0_m = __msa_hadd_u_d((v4u32) in, (v4u32) in); \ + res1_m = (v2u64) __msa_splati_d((v2i64) res0_m, 1); \ + res0_m += res1_m; \ + sum_m = __msa_copy_u_w((v4i32) res0_m, 0); \ +} + +/* Description : Addition of 16 signed byte elements. + * 16 signed byte elements of input vector are added + * together and resulted integer sum is returned. + * Arguments : Inputs - in (signed byte vector) + * Outputs - sum_m (i32 sum) + * Return Type - signed word + */ +#define MSA_HADD_SB_S32(in, sum_m) \ +{ \ + v8i16 res_m; \ + v4i32 res0_m; \ + v2i64 res1_m, res2_m; \ + \ + res_m = __msa_hadd_s_h((v16i8) in, (v16i8) in); \ + res0_m = __msa_hadd_s_w(res_m, res_m); \ + res1_m = __msa_hadd_s_d(res0_m, res0_m); \ + res2_m = __msa_splati_d(res1_m, 1); \ + res1_m += res2_m; \ + sum_m = __msa_copy_s_w((v4i32) res1_m, 0); \ +} + +/* Description : Addition of 8 signed halfword elements. + * 8 signed halfword elements of input vector are added + * together and resulted integer sum is returned. + * Arguments : Inputs - in (signed halfword vector) + * Outputs - sum_m (i32 sum) + * Return Type - signed word + */ +#define MSA_HADD_SH_S32(in, sum_m) \ +{ \ + v4i32 res_m; \ + v2i64 res0_m, res1_m; \ + \ + res_m = __msa_hadd_s_w((v8i16) in, (v8i16) in); \ + res0_m = __msa_hadd_s_d(res_m, res_m); \ + res1_m = __msa_splati_d(res0_m, 1); \ + res0_m += res1_m; \ + sum_m = __msa_copy_s_w((v4i32) res0_m, 0); \ +} + +/* Description : Addition of 4 signed word elements. + * 4 signed word elements of input vector are added together and + * resulted integer sum is returned. + * Arguments : Inputs - in (signed word vector) + * Outputs - sum_m (i32 sum) + * Return Type - signed word + */ +#define MSA_HADD_SW_S32(in, sum_m) \ +{ \ + v2i64 res0_m, res1_m; \ + \ + res0_m = __msa_hadd_s_d((v4i32) in, (v4i32) in); \ + res1_m = __msa_splati_d(res0_m, 1); \ + res0_m += res1_m; \ + sum_m = __msa_copy_s_w((v4i32) res0_m, 0); \ +} + +/* Description : Saturate the unsigned halfword element values to the max + * unsigned value of (sat_val+1 bits). + * The element data width remains unchanged. + * Arguments : Inputs - in, sat_val + * Outputs - in (in place) + * Return Type - v8u16 + * Details : Each unsigned halfword element from 'in' is saturated to the + * value generated with (sat_val+1) bit range. + * Results are in placed to original vectors. + */ +#define MSA_SAT_UH(in, sat_val) \ +{ \ + in = __msa_sat_u_h(in, sat_val); \ +} + +#define MSA_SAT_UH2(in0, in1, sat_val) \ +{ \ + MSA_SAT_UH(in0, sat_val) \ + MSA_SAT_UH(in1, sat_val) \ +} + +#define MSA_SAT_UH4(in0, in1, in2, in3, sat_val) \ +{ \ + MSA_SAT_UH2(in0, in1, sat_val) \ + MSA_SAT_UH2(in2, in3, sat_val) \ +} + +/* Description : Saturate the signed halfword element values to the max + * signed value of (sat_val+1 bits). + * The element data width remains unchanged. + * Arguments : Inputs - in, sat_val + * Outputs - in (in place) + * Return Type - v8i16 + * Details : Each signed halfword element from 'in' is saturated to the + * value generated with (sat_val+1) bit range. + * Results are in placed to original vectors. + */ +#define MSA_SAT_SH(in, sat_val) \ +{ \ + in = __msa_sat_s_h(in, sat_val); \ +} + +#define MSA_SAT_SH2(in0, in1, sat_val) \ +{ \ + MSA_SAT_SH(in0, sat_val) \ + MSA_SAT_SH(in1, sat_val) \ +} + +#define MSA_SAT_SH4(in0, in1, in2, in3, sat_val) \ +{ \ + MSA_SAT_SH2(in0, in1, sat_val) \ + MSA_SAT_SH2(in2, in3, sat_val) \ +} + +/* Description : Saturate the unsigned word element values to the max + * unsigned value of (sat_val+1 bits). + * The element data width remains unchanged. + * Arguments : Inputs - in, sat_val + * Outputs - in (in place) + * Return Type - v4u32 + * Details : Each unsigned word element from 'in' is saturated to the + * value generated with (sat_val+1) bit range. + * Results are in placed to original vectors. + */ +#define MSA_SAT_UW(in, sat_val) \ +{ \ + in = __msa_sat_u_w(in, sat_val); \ +} + +#define MSA_SAT_UW2(in0, in1, sat_val) \ +{ \ + MSA_SAT_UW(in0, sat_val) \ + MSA_SAT_UW(in1, sat_val) \ +} + +#define MSA_SAT_UW4(in0, in1, in2, in3, sat_val) \ +{ \ + MSA_SAT_UW2(in0, in1, sat_val) \ + MSA_SAT_UW2(in2, in3, sat_val) \ +} + +/* Description : Saturate the signed word element values to the max + * signed value of (sat_val+1 bits). + * The element data width remains unchanged. + * Arguments : Inputs - in, sat_val + * Outputs - in (in place) + * Return Type - v4i32 + * Details : Each signed word element from 'in' is saturated to the + * value generated with (sat_val+1) bit range. + * Results are in placed to original vectors. + */ +#define MSA_SAT_SW(in, sat_val) \ +{ \ + in = __msa_sat_s_w(in, sat_val); \ +} + +#define MSA_SAT_SW2(in0, in1, sat_val) \ +{ \ + MSA_SAT_SW(in0, sat_val) \ + MSA_SAT_SW(in1, sat_val) \ +} + +#define MSA_SAT_SW4(in0, in1, in2, in3, sat_val) \ +{ \ + MSA_SAT_SW2(in0, in1, sat_val) \ + MSA_SAT_SW2(in2, in3, sat_val) \ +} + +/* Description : Each byte element is logically xor'ed with immediate 128. + * Arguments : Inputs - in + * Outputs - in (in-place) + * Return Type - as per RTYPE + * Details : Each unsigned byte element from input vector 'in' is + * logically xor'ed with 128 and result is in-place stored in + * 'in' vector. + */ +#define MSA_XORI_B_128(RTYPE, in) \ +{ \ + in = (RTYPE) __msa_xori_b((v16u8) in, 128); \ +} + +#define MSA_XORI_B2_128(RTYPE, in0, in1) \ +{ \ + MSA_XORI_B_128(RTYPE, in0); \ + MSA_XORI_B_128(RTYPE, in1); \ +} + +#define MSA_XORI_B4_128(RTYPE, in0, in1, in2, in3) \ +{ \ + MSA_XORI_B2_128(RTYPE, in0, in1); \ + MSA_XORI_B2_128(RTYPE, in2, in3); \ +} + +/* Description : Shift right logical all byte elements of vector. + * Arguments : Inputs - in, shift + * Outputs - in (in place) + * Return Type - as per RTYPE + * Details : Each element of vector 'in' is shifted right logical by + * number of bits respective element holds in vector 'shift' and + * result is in place written to 'in'. + * Here, 'shift' is a vector passed in. + */ +#define MSA_SRL_B(RTYPE, in, shift) \ +{ \ + in = (RTYPE) __msa_srl_b((v16i8) in, (v16i8) shift); \ +} + +#define MSA_SRL_B2(RTYPE, in0, in1, shift) \ +{ \ + MSA_SRL_B(RTYPE, in0, shift); \ + MSA_SRL_B(RTYPE, in1, shift); \ +} + +#define MSA_SRL_B4(RTYPE, in0, in1, in2, in3, shift) \ +{ \ + MSA_SRL_B2(RTYPE, in0, in1, shift); \ + MSA_SRL_B2(RTYPE, in2, in3, shift); \ +} + +/* Description : Shift right logical all halfword elements of vector. + * Arguments : Inputs - in, shift + * Outputs - in (in place) + * Return Type - as per RTYPE + * Details : Each element of vector 'in' is shifted right logical by + * number of bits respective element holds in vector 'shift' and + * result is in place written to 'in'. + * Here, 'shift' is a vector passed in. + */ +#define MSA_SRL_H(RTYPE, in, shift) \ +{ \ + in = (RTYPE) __msa_srl_h((v8i16) in, (v8i16) shift); \ +} + +#define MSA_SRL_H2(RTYPE, in0, in1, shift) \ +{ \ + MSA_SRL_H(RTYPE, in0, shift); \ + MSA_SRL_H(RTYPE, in1, shift); \ +} + +#define MSA_SRL_H4(RTYPE, in0, in1, in2, in3, shift) \ +{ \ + MSA_SRL_H2(RTYPE, in0, in1, shift); \ + MSA_SRL_H2(RTYPE, in2, in3, shift); \ +} + +/* Description : Shift right logical all word elements of vector. + * Arguments : Inputs - in, shift + * Outputs - in (in place) + * Return Type - as per RTYPE + * Details : Each element of vector 'in' is shifted right logical by + * number of bits respective element holds in vector 'shift' and + * result is in place written to 'in'. + * Here, 'shift' is a vector passed in. + */ +#define MSA_SRL_W(RTYPE, in, shift) \ +{ \ + in = (RTYPE) __msa_srl_w((v4i32) in, (v4i32) shift); \ +} + +#define MSA_SRL_W2(RTYPE, in0, in1, shift) \ +{ \ + MSA_SRL_W(RTYPE, in0, shift); \ + MSA_SRL_W(RTYPE, in1, shift); \ +} + +#define MSA_SRL_W4(RTYPE, in0, in1, in2, in3, shift) \ +{ \ + MSA_SRL_W2(RTYPE, in0, in1, shift); \ + MSA_SRL_W2(RTYPE, in2, in3, shift); \ +} + +/* Description : Shift right logical all double word elements of vector. + * Arguments : Inputs - in, shift + * Outputs - in (in place) + * Return Type - as per RTYPE + * Details : Each element of vector 'in' is shifted right logical by + * number of bits respective element holds in vector 'shift' and + * result is in place written to 'in'. + * Here, 'shift' is a vector passed in. + */ +#define MSA_SRL_D(RTYPE, in, shift) \ +{ \ + in = (RTYPE) __msa_srl_d((v2i64) in, (v2i64) shift); \ +} + +#define MSA_SRL_D2(RTYPE, in0, in1, shift) \ +{ \ + MSA_SRL_D(RTYPE, in0, shift); \ + MSA_SRL_D(RTYPE, in1, shift); \ +} + +#define MSA_SRL_D4(RTYPE, in0, in1, in2, in3, shift) \ +{ \ + MSA_SRL_D2(RTYPE, in0, in1, shift); \ + MSA_SRL_D2(RTYPE, in2, in3, shift); \ +} + +/* Description : Shift right logical rounded all byte elements of vector. + * Arguments : Inputs - in, shift + * Outputs - in (in place) + * Return Type - as per RTYPE + * Details : Each element of vector 'in' is shifted right logical rounded + * by number of bits respective element holds in vector 'shift' + * and result is in place written to 'in'. + * Here, 'shift' is a vector passed in. + */ +#define MSA_SRLR_B(RTYPE, in, shift) \ +{ \ + in = (RTYPE) __msa_srlr_b((v16i8) in, (v16i8) shift); \ +} + +#define MSA_SRLR_B2(RTYPE, in0, in1, shift) \ +{ \ + MSA_SRLR_B(RTYPE, in0, shift); \ + MSA_SRLR_B(RTYPE, in1, shift); \ +} + +#define MSA_SRLR_B4(RTYPE, in0, in1, in2, in3, shift) \ +{ \ + MSA_SRLR_B2(RTYPE, in0, in1, shift); \ + MSA_SRLR_B2(RTYPE, in2, in3, shift); \ +} + +/* Description : Shift right logical rounded all halfword elements of vector. + * Arguments : Inputs - in, shift + * Outputs - in (in place) + * Return Type - as per RTYPE + * Details : Each element of vector 'in' is shifted right logical rounded + * by number of bits respective element holds in vector 'shift' + * and result is in place written to 'in'. + * Here, 'shift' is a vector passed in. + */ +#define MSA_SRLR_H(RTYPE, in, shift) \ +{ \ + in = (RTYPE) __msa_srlr_h((v8i16) in, (v8i16) shift); \ +} + +#define MSA_SRLR_H2(RTYPE, in0, in1, shift) \ +{ \ + MSA_SRLR_H(RTYPE, in0, shift); \ + MSA_SRLR_H(RTYPE, in1, shift); \ +} + +#define MSA_SRLR_H4(RTYPE, in0, in1, in2, in3, shift) \ +{ \ + MSA_SRLR_H2(RTYPE, in0, in1, shift); \ + MSA_SRLR_H2(RTYPE, in2, in3, shift); \ +} + +/* Description : Shift right logical rounded all word elements of vector. + * Arguments : Inputs - in, shift + * Outputs - in (in place) + * Return Type - as per RTYPE + * Details : Each element of vector 'in' is shifted right logical rounded + * by number of bits respective element holds in vector 'shift' + * and result is in place written to 'in'. + * Here, 'shift' is a vector passed in. + */ +#define MSA_SRLR_W(RTYPE, in, shift) \ +{ \ + in = (RTYPE) __msa_srlr_w((v4i32) in, (v4i32) shift); \ +} + +#define MSA_SRLR_W2(RTYPE, in0, in1, shift) \ +{ \ + MSA_SRLR_W(RTYPE, in0, shift); \ + MSA_SRLR_W(RTYPE, in1, shift); \ +} + +#define MSA_SRLR_W4(RTYPE, in0, in1, in2, in3, shift) \ +{ \ + MSA_SRLR_W2(RTYPE, in0, in1, shift); \ + MSA_SRLR_W2(RTYPE, in2, in3, shift); \ +} + +/* Description : Shift right logical rounded all double word elements of vector. + * Arguments : Inputs - in, shift + * Outputs - in (in place) + * Return Type - as per RTYPE + * Details : Each element of vector 'in' is shifted right logical rounded + * by number of bits respective element holds in vector 'shift' + * and result is in place written to 'in'. + * Here, 'shift' is a vector passed in. + */ +#define MSA_SRLR_D(RTYPE, in, shift) \ +{ \ + in = (RTYPE) __msa_srlr_d((v2i64) in, (v2i64) shift); \ +} + +#define MSA_SRLR_D2(RTYPE, in0, in1, shift) \ +{ \ + MSA_SRLR_D(RTYPE, in0, shift); \ + MSA_SRLR_D(RTYPE, in1, shift); \ +} + +#define MSA_SRLR_D4(RTYPE, in0, in1, in2, in3, shift) \ +{ \ + MSA_SRLR_D2(RTYPE, in0, in1, shift); \ + MSA_SRLR_D2(RTYPE, in2, in3, shift); \ +} + +/* Description : Shift right arithmetic rounded all byte elements of vector. + * Arguments : Inputs - in, shift + * Outputs - in (in place) + * Return Type - as per RTYPE + * Details : Each element of vector 'in' is shifted right arithmetic + * rounded by number of bits respective element holds in + * vector 'shift' and result is in place written to 'in'. + * Here, 'shift' is a vector passed in. + */ +#define MSA_SRAR_B(RTYPE, in, shift) \ +{ \ + in = (RTYPE) __msa_srar_b((v16i8) in, (v16i8) shift); \ +} + +#define MSA_SRAR_B2(RTYPE, in0, in1, shift) \ +{ \ + MSA_SRAR_B(RTYPE, in0, shift); \ + MSA_SRAR_B(RTYPE, in1, shift); \ +} + +#define MSA_SRAR_B4(RTYPE, in0, in1, in2, in3, shift) \ +{ \ + MSA_SRAR_B2(RTYPE, in0, in1, shift); \ + MSA_SRAR_B2(RTYPE, in2, in3, shift); \ +} + +/* Description : Shift right arithmetic rounded all halfword elements of vector. + * Arguments : Inputs - in, shift + * Outputs - in (in place) + * Return Type - as per RTYPE + * Details : Each element of vector 'in' is shifted right arithmetic + * rounded by number of bits respective element holds in + * vector 'shift' and result is in place written to 'in'. + * Here, 'shift' is a vector passed in. + */ +#define MSA_SRAR_H(RTYPE, in, shift) \ +{ \ + in = (RTYPE) __msa_srar_h((v8i16) in, (v8i16) shift); \ +} + +#define MSA_SRAR_H2(RTYPE, in0, in1, shift) \ +{ \ + MSA_SRAR_H(RTYPE, in0, shift); \ + MSA_SRAR_H(RTYPE, in1, shift); \ +} + +#define MSA_SRAR_H4(RTYPE, in0, in1, in2, in3, shift) \ +{ \ + MSA_SRAR_H2(RTYPE, in0, in1, shift); \ + MSA_SRAR_H2(RTYPE, in2, in3, shift); \ +} + +/* Description : Shift right arithmetic rounded all word elements of vector. + * Arguments : Inputs - in, shift + * Outputs - in (in place) + * Return Type - as per RTYPE + * Details : Each element of vector 'in' is shifted right arithmetic + * rounded by number of bits respective element holds in + * vector 'shift' and result is in place written to 'in'. + * Here, 'shift' is a vector passed in. + */ +#define MSA_SRAR_W(RTYPE, in, shift) \ +{ \ + in = (RTYPE) __msa_srar_w((v4i32) in, (v4i32) shift); \ +} + +#define MSA_SRAR_W2(RTYPE, in0, in1, shift) \ +{ \ + MSA_SRAR_W(RTYPE, in0, shift); \ + MSA_SRAR_W(RTYPE, in1, shift); \ +} + +#define MSA_SRAR_W4(RTYPE, in0, in1, in2, in3, shift) \ +{ \ + MSA_SRAR_W2(RTYPE, in0, in1, shift); \ + MSA_SRAR_W2(RTYPE, in2, in3, shift); \ +} + +/* Description : Shift right arithmetic rounded all double word elements + * of vector. + * Arguments : Inputs - in, shift + * Outputs - in (in place) + * Return Type - as per RTYPE + * Details : Each element of vector 'in' is shifted right arithmetic + * rounded by number of bits respective element holds in + * vector 'shift' and result is in place written to 'in'. + * Here, 'shift' is a vector passed in. + */ +#define MSA_SRAR_D(RTYPE, in, shift) \ +{ \ + in = (RTYPE) __msa_srar_d((v2i64) in, (v2i64) shift); \ +} + +#define MSA_SRAR_D2(RTYPE, in0, in1, shift) \ +{ \ + MSA_SRAR_D(RTYPE, in0, shift); \ + MSA_SRAR_D(RTYPE, in1, shift); \ +} + +#define MSA_SRAR_D4(RTYPE, in0, in1, in2, in3, shift) \ +{ \ + MSA_SRAR_D2(RTYPE, in0, in1, shift); \ + MSA_SRAR_D2(RTYPE, in2, in3, shift); \ +} + +/* Description : Shift right arithmetic rounded all byte elements of vector. + * Arguments : Inputs - in, shift + * Outputs - in (in place) + * Return Type - as per RTYPE + * Details : Each element of vector 'in' is shifted right arithmetic + * rounded by number of bits respective element holds in vector + * 'shift' and result is in place written to 'in'. + * Here, 'shift' is a immediate number passed in. + */ +#define MSA_SRARI_B(RTYPE, in, shift) \ +{ \ + in = (RTYPE) __msa_srari_b((v16i8) in, (v16i8) shift); \ +} + +#define MSA_SRARI_B2(RTYPE, in0, in1, shift) \ +{ \ + MSA_SRARI_B(RTYPE, in0, shift); \ + MSA_SRARI_B(RTYPE, in1, shift); \ +} + +#define MSA_SRARI_B4(RTYPE, in0, in1, in2, in3, shift) \ +{ \ + MSA_SRARI_B2(RTYPE, in0, in1, shift); \ + MSA_SRARI_B2(RTYPE, in2, in3, shift); \ +} + +/* Description : Shift right arithmetic rounded all halfword elements of vector. + * Arguments : Inputs - in, shift + * Outputs - in (in place) + * Return Type - as per RTYPE + * Details : Each element of vector 'in' is shifted right arithmetic + * rounded by number of bits respective element holds in vector + * 'shift' and result is in place written to 'in'. + * Here, 'shift' is a immediate number passed in. + */ +#define MSA_SRARI_H(RTYPE, in, shift) \ +{ \ + in = (RTYPE) __msa_srari_h((v8i16) in, (v8i16) shift); \ +} + +#define MSA_SRARI_H2(RTYPE, in0, in1, shift) \ +{ \ + MSA_SRARI_H(RTYPE, in0, shift); \ + MSA_SRARI_H(RTYPE, in1, shift); \ +} + +#define MSA_SRARI_H4(RTYPE, in0, in1, in2, in3, shift) \ +{ \ + MSA_SRARI_H2(RTYPE, in0, in1, shift); \ + MSA_SRARI_H2(RTYPE, in2, in3, shift); \ +} + +/* Description : Shift right arithmetic rounded all word elements of vector. + * Arguments : Inputs - in, shift + * Outputs - in (in place) + * Return Type - as per RTYPE + * Details : Each element of vector 'in' is shifted right arithmetic + * rounded by number of bits respective element holds in vector + * 'shift' and result is in place written to 'in'. + * Here, 'shift' is a immediate number passed in. + */ +#define MSA_SRARI_W(RTYPE, in, shift) \ +{ \ + in = (RTYPE) __msa_srari_w((v4i32) in, (v4i32) shift); \ +} + +#define MSA_SRARI_W2(RTYPE, in0, in1, shift) \ +{ \ + MSA_SRARI_W(RTYPE, in0, shift); \ + MSA_SRARI_W(RTYPE, in1, shift); \ +} + +#define MSA_SRARI_W4(RTYPE, in0, in1, in2, in3, shift) \ +{ \ + MSA_SRARI_W2(RTYPE, in0, in1, shift); \ + MSA_SRARI_W2(RTYPE, in2, in3, shift); \ +} + +/* Description : Shift right arithmetic rounded all double word elements + * of vector. + * Arguments : Inputs - in, shift + * Outputs - in (in place) + * Return Type - as per RTYPE + * Details : Each element of vector 'in' is shifted right arithmetic + * rounded by number of bits respective element holds in + * vector 'shift' and result is in place written to 'in'. + * Here, 'shift' is a immediate number passed in. + */ +#define MSA_SRARI_D(RTYPE, in, shift) \ +{ \ + in = (RTYPE) __msa_srari_d((v2i64) in, (v2i64) shift); \ +} + +#define MSA_SRARI_D2(RTYPE, in0, in1, shift) \ +{ \ + MSA_SRARI_D(RTYPE, in0, shift); \ + MSA_SRARI_D(RTYPE, in1, shift); \ +} + +#define MSA_SRARI_D4(RTYPE, in0, in1, in2, in3, shift) \ +{ \ + MSA_SRARI_D2(RTYPE, in0, in1, shift); \ + MSA_SRARI_D2(RTYPE, in2, in3, shift); \ +} + +/* Description : Transposes input 4x4 byte block. + * Arguments : Inputs - in0, in1, in2, in3 (input 4x4 byte block) + * Outputs - out0, out1, out2, out3 (output 4x4 byte block) + * Return Type - RTYPE + * Details : + */ +#define MSA_TRANSPOSE4x4_B(RTYPE, in0, in1, in2, in3, \ + out0, out1, out2, out3) \ +{ \ + v16i8 zero_m = { 0 }; \ + \ + MSA_ILVR_B2(RTYPE, in2, in0, in3, in1, out2, out3); \ + out0 = (RTYPE) __msa_ilvr_b((v16i8) out3, (v16i8) out2); \ + out1 = (RTYPE) __msa_sldi_b(zero_m, (v16i8) out0, 4); \ + out2 = (RTYPE) __msa_sldi_b(zero_m, (v16i8) out1, 4); \ + out3 = (RTYPE) __msa_sldi_b(zero_m, (v16i8) out2, 4); \ +} + +/* Description : Transposes input 8x4 byte block into 4x8. + * Arguments : Inputs - in0, in1, in2 ~ in7 (input 8x4 byte block) + * Outputs - out0, out1, out2, out3 (output 4x8 byte block) + * Return Type - RTYPE + * Details : + */ +#define MSA_TRANSPOSE8x4_B(RTYPE, in0, in1, in2, in3, in4, in5, \ + in6, in7, out0, out1, out2, out3) \ +{ \ + v16i8 zero_m = { 0 }; \ + \ + MSA_ILVR_B4(RTYPE, in2, in0, in3, in1, in6, in4, in7, in5, \ + out0, out1, out2, out3); \ + MSA_ILVR_H2(RTYPE, out2, out0, out3, out1, out2, out3); \ + out0 = (RTYPE) __msa_ilvr_b((v16i8) out3, (v16i8) out2); \ + out1 = (RTYPE) __msa_sldi_b(zero_m, (v16i8) out0, 8); \ + out2 = (RTYPE) __msa_ilvl_b((v16i8) out3, (v16i8) out2); \ + out3 = (RTYPE) __msa_sldi_b(zero_m, (v16i8) out2, 8); \ +} + +/* Description : Transposes 16x4 block into 4x16 with byte elements in vectors. + * Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7, + * in8, in9, in10, in11, in12, in13, in14, in15 + * Outputs - out0, out1, out2, out3 + * Return Type - RTYPE + * Details : + */ +#define MSA_TRANSPOSE16x4_B(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + in8, in9, in10, in11, in12, in13, in14, in15, \ + out0, out1, out2, out3) \ +{ \ + v2i64 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \ + \ + MSA_ILVR_B4(RTYPE, in2, in0, in3, in1, in6, in4, in7, in5, \ + out0, out1, out2, out3); \ + MSA_ILVR_H2(RTYPE, out2, out0, out3, out1, out2, out3); \ + MSA_ILVRL_B2(v2i64, out3, out2, tmp0_m, tmp1_m); \ + \ + MSA_ILVR_B4(RTYPE, in10, in8, in11, in9, in14, in12, in15, in13, \ + out0, out1, out2, out3); \ + MSA_ILVR_H2(RTYPE, out2, out0, out3, out1, out2, out3); \ + MSA_ILVRL_B2(v2i64, out3, out2, tmp2_m, tmp3_m); \ + \ + MSA_ILVRL_D4(RTYPE, tmp2_m, tmp0_m, tmp3_m, tmp1_m, \ + out0, out1, out2, out3); \ +} + +/* Description : Transposes input 8x8 byte block. + * Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7 + * (input 8x8 byte block) + * Outputs - out0, out1, out2, out3, out4, out5, out6, out7 + * (output 8x8 byte block) + * Return Type - RTYPE + * Details : + */ +#define MSA_TRANSPOSE8x8_B(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3, out4, out5, out6, out7) \ +{ \ + v16i8 zero_m = {0}; \ + \ + MSA_ILVR_B4(RTYPE, in2, in0, in3, in1, in6, in4, in7, in5, \ + out0, out1, out2, out3); \ + MSA_ILVRL_B4(RTYPE, out1, out0, out3, out2, out4, out5, out6, out7); \ + MSA_ILVRL_W4(RTYPE, out6, out4, out7, out5, out0, out2, out4, out6); \ + out1 = (RTYPE) __msa_sldi_b(zero_m, (v16i8) out0, 8); \ + out3 = (RTYPE) __msa_sldi_b(zero_m, (v16i8) out2, 8); \ + out5 = (RTYPE) __msa_sldi_b(zero_m, (v16i8) out4, 8); \ + out7 = (RTYPE) __msa_sldi_b(zero_m, (v16i8) out6, 8); \ +} + +/* Description : Transposes 16x8 block into 8x16 with byte elements in vectors. + * Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7, + * in8, in9, in10, in11, in12, in13, in14, in15 + * Outputs - out0, out1, out2, out3, out4, out5, out6, out7 + * Return Type - RTYPE + * Details : + */ +#define MSA_TRANSPOSE16x8_B(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + in8, in9, in10, in11, in12, in13, in14, in15, \ + out0, out1, out2, out3, out4, out5, out6, out7) \ +{ \ + v16i8 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \ + \ + MSA_ILVEV_D4(RTYPE, in8, in0, in9, in1, in10, in2, in11, in3, \ + out7, out6, out5, out4); \ + MSA_ILVEV_D4(RTYPE, in12, in4, in13, in5, in14, in6, in15, in7, \ + out3, out2, out1, out0); \ + \ + tmp0_m = __msa_ilvev_b((v16i8) out6, (v16i8) out7); \ + tmp1_m = __msa_ilvod_b((v16i8) out6, (v16i8) out7); \ + out6 = (RTYPE) __msa_ilvev_b((v16i8) out4, (v16i8) out5); \ + out5 = (RTYPE) __msa_ilvod_b((v16i8) out4, (v16i8) out5); \ + tmp2_m = __msa_ilvev_b((v16i8) out2, (v16i8) out3); \ + tmp3_m = __msa_ilvod_b((v16i8) out2, (v16i8) out3); \ + out2 = (RTYPE) __msa_ilvev_b((v16i8) out0, (v16i8) out1); \ + out1 = (RTYPE) __msa_ilvod_b((v16i8) out0, (v16i8) out1); \ + \ + MSA_ILVEV_H2(RTYPE, out6, tmp0_m, out2, tmp2_m, out3, out7); \ + out0 = (RTYPE) __msa_ilvev_w((v4i32) out7, (v4i32) out3); \ + out4 = (RTYPE) __msa_ilvod_w((v4i32) out7, (v4i32) out3); \ + \ + MSA_ILVOD_H2(RTYPE, out6, tmp0_m, out2, tmp2_m, out3, out7); \ + out2 = (RTYPE) __msa_ilvev_w((v4i32) out7, (v4i32) out3); \ + out6 = (RTYPE) __msa_ilvod_w((v4i32) out7, (v4i32) out3); \ + \ + MSA_ILVOD_H2(v16i8, out5, tmp1_m, out1, tmp3_m, tmp0_m, tmp2_m); \ + out3 = (RTYPE) __msa_ilvev_w((v4i32) tmp2_m, (v4i32) tmp0_m); \ + out7 = (RTYPE) __msa_ilvod_w((v4i32) tmp2_m, (v4i32) tmp0_m); \ + \ + MSA_ILVEV_H2(v16i8, out5, tmp1_m, out1, tmp3_m, tmp0_m, tmp2_m); \ + out1 = (RTYPE) __msa_ilvev_w((v4i32) tmp2_m, (v4i32) tmp0_m); \ + out5 = (RTYPE) __msa_ilvod_w((v4i32) tmp2_m, (v4i32) tmp0_m); \ +} + +/* Description : Transposes 4x4 block with half word elements in vectors. + * Arguments : Inputs - in0, in1, in2, in3 + * Outputs - out0, out1, out2, out3 + * Return Type - RTYPE + * Details : + */ +#define MSA_TRANSPOSE4x4_H(RTYPE, in0, in1, in2, in3, \ + out0, out1, out2, out3) \ +{ \ + MSA_ILVR_H2(RTYPE, in1, in0, in3, in2, out1, out3); \ + MSA_ILVRL_W2(RTYPE, out3, out1, out0, out2); \ + MSA_ILVL_D2(RTYPE, out0, out0, out2, out2, out1, out3); \ +} + +/* Description : Transposes 8x4 block with half word elements in vectors. + * Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7 + * Outputs - out0, out1, out2, out3 + * Return Type - RTYPE + * Details : + */ +#define MSA_TRANSPOSE8x4_H(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3) \ +{ \ + v8i16 s0_m, s1_m; \ + v8i16 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \ + \ + MSA_ILVR_H2(v8i16, in6, in4, in7, in5, s0_m, s1_m); \ + MSA_ILVRL_H2(v8i16, s1_m, s0_m, tmp0_m, tmp1_m); \ + MSA_ILVR_H2(v8i16, in2, in0, in3, in1, s0_m, s1_m); \ + MSA_ILVRL_H2(v8i16, s1_m, s0_m, tmp2_m, tmp3_m); \ + MSA_PCKEV_D2(RTYPE, tmp0_m, tmp2_m, tmp1_m, tmp3_m, out0, out2); \ + MSA_PCKOD_D2(RTYPE, tmp0_m, tmp2_m, tmp1_m, tmp3_m, out1, out3); \ +} + +/* Description : Transposes 8x8 block with half word elements in vectors. + * Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7 + * Outputs - out0, out1, out2, out3, out4, out5, out6, out7 + * Return Type - RTYPE + * Details : + */ +#define MSA_TRANSPOSE8x8_H(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ + out0, out1, out2, out3, out4, out5, out6, out7) \ +{ \ + v8i16 s0_m, s1_m; \ + v8i16 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \ + v8i16 tmp4_m, tmp5_m, tmp6_m, tmp7_m; \ + \ + MSA_ILVR_H2(v8i16, in6, in4, in7, in5, s0_m, s1_m); \ + MSA_ILVRL_H2(v8i16, s1_m, s0_m, tmp0_m, tmp1_m); \ + MSA_ILVL_H2(v8i16, in6, in4, in7, in5, s0_m, s1_m); \ + MSA_ILVRL_H2(v8i16, s1_m, s0_m, tmp2_m, tmp3_m); \ + MSA_ILVR_H2(v8i16, in2, in0, in3, in1, s0_m, s1_m); \ + MSA_ILVRL_H2(v8i16, s1_m, s0_m, tmp4_m, tmp5_m); \ + MSA_ILVL_H2(v8i16, in2, in0, in3, in1, s0_m, s1_m); \ + MSA_ILVRL_H2(v8i16, s1_m, s0_m, tmp6_m, tmp7_m); \ + MSA_PCKEV_D4(RTYPE, tmp0_m, tmp4_m, tmp1_m, tmp5_m, tmp2_m, tmp6_m, \ + tmp3_m, tmp7_m, out0, out2, out4, out6); \ + MSA_PCKOD_D4(RTYPE, tmp0_m, tmp4_m, tmp1_m, tmp5_m, tmp2_m, tmp6_m, \ + tmp3_m, tmp7_m, out1, out3, out5, out7); \ +} + +#endif /* _MSA_MACROS_H */ diff --git a/chromium/third_party/openh264/src/codec/common/meson.build b/chromium/third_party/openh264/src/codec/common/meson.build index d7d15a61ee9..7f8acb685be 100644 --- a/chromium/third_party/openh264/src/codec/common/meson.build +++ b/chromium/third_party/openh264/src/codec/common/meson.build @@ -17,21 +17,41 @@ cpp_sources = [ 'src/WelsThreadPool.cpp', ] -asm_sources = [ - 'x86/cpuid.asm', - 'x86/dct.asm', - 'x86/deblock.asm', - 'x86/expand_picture.asm', - 'x86/intra_pred_com.asm', - 'x86/mb_copy.asm', - 'x86/mc_chroma.asm', - 'x86/mc_luma.asm', - 'x86/satd_sad.asm', - 'x86/vaa.asm', -] - -objs_asm = asm_gen.process(asm_sources) +objs_asm = [] +if ['x86', 'x86_64'].contains(cpu_family) + asm_sources = [ + 'x86/cpuid.asm', + 'x86/dct.asm', + 'x86/deblock.asm', + 'x86/expand_picture.asm', + 'x86/intra_pred_com.asm', + 'x86/mb_copy.asm', + 'x86/mc_chroma.asm', + 'x86/mc_luma.asm', + 'x86/satd_sad.asm', + 'x86/vaa.asm', + ] + objs_asm += asm_gen.process(asm_sources) +elif cpu_family == 'arm' + cpp_sources += [ + 'arm/copy_mb_neon.S', + 'arm/deblocking_neon.S', + 'arm/expand_picture_neon.S', + 'arm/intra_pred_common_neon.S', + 'arm/mc_neon.S', + ] +elif cpu_family == 'aarch64' + cpp_sources += [ + 'arm64/copy_mb_aarch64_neon.S', + 'arm64/deblocking_aarch64_neon.S', + 'arm64/expand_picture_aarch64_neon.S', + 'arm64/intra_pred_common_aarch64_neon.S', + 'arm64/mc_aarch64_neon.S', + ] +else + error('Unsupported cpu_family @0@'.format(cpu_family)) +endif libcommon = static_library('common', cpp_sources, objs_asm, - include_directories: inc, + include_directories: [inc, casm_inc], dependencies: deps) diff --git a/chromium/third_party/openh264/src/codec/common/mips/copy_mb_msa.c b/chromium/third_party/openh264/src/codec/common/mips/copy_mb_msa.c new file mode 100644 index 00000000000..4ba01edc3bd --- /dev/null +++ b/chromium/third_party/openh264/src/codec/common/mips/copy_mb_msa.c @@ -0,0 +1,80 @@ +/*! + * \copy + * Copyright (C) 2020 Loongson Technology Co. Ltd. + * Contributed by Gu Xiwei(guxiwei-hf@loongson.cn) + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file copy_mb_msa.c + * + * \brief MIPS MSA optimizations + * + * \date 14/05/2020 Created + * + ************************************************************************************* + */ + +#include <stdint.h> +#include "msa_macros.h" + +void WelsCopy8x8_msa(uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, + int32_t iStrideS ) { + v16u8 src0, src1; + for (int i = 0; i < 4; i++) { + MSA_LD_V2(v16u8, pSrc, iStrideS, src0, src1); + MSA_ST_D(src0, 0, pDst); + MSA_ST_D(src1, 0, pDst + iStrideD); + pSrc += 2 * iStrideS; + pDst += 2 * iStrideD; + } +} + +void WelsCopy8x16_msa(uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, + int32_t iStrideS) { + WelsCopy8x8_msa(pDst, iStrideD, pSrc, iStrideS); + WelsCopy8x8_msa(pDst + 8 * iStrideD, iStrideD, + pSrc + 8 * iStrideS, iStrideS); +} + +void WelsCopy16x8_msa(uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, + int32_t iStrideS) { + v16u8 src0, src1; + for (int i = 0; i < 4; i++) { + MSA_LD_V2(v16u8, pSrc, iStrideS, src0, src1); + MSA_ST_V2(v16u8, src0, src1, pDst, iStrideD); + pSrc += 2 * iStrideS; + pDst += 2 * iStrideD; + } +} + +void WelsCopy16x16_msa(uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, + int32_t iStrideS) { + WelsCopy16x8_msa(pDst, iStrideD, pSrc, iStrideS); + WelsCopy16x8_msa(pDst + 8 * iStrideD, iStrideD, + pSrc + 8 * iStrideS, iStrideS); +}; diff --git a/chromium/third_party/openh264/src/codec/common/mips/deblock_msa.c b/chromium/third_party/openh264/src/codec/common/mips/deblock_msa.c new file mode 100644 index 00000000000..0d3dfcb798e --- /dev/null +++ b/chromium/third_party/openh264/src/codec/common/mips/deblock_msa.c @@ -0,0 +1,1024 @@ +/*! + * \copy + * Copyright (C) 2019 Loongson Technology Co. Ltd. + * Contributed by Gu Xiwei(guxiwei-hf@loongson.cn) + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file deblock_msa.c + * + * \brief MIPS MSA optimizations + * + * \date 15/05/2020 Created + * + ************************************************************************************* + */ + +#include <stdint.h> +#include "msa_macros.h" + +void DeblockLumaLt4V_msa(uint8_t *pPix, int32_t iStride, int32_t iAlpha, + int32_t iBeta, int8_t *pTc) { + v16u8 p0, p1, p2, q0, q1, q2; + v16i8 iTc, negiTc, negTc, flags, f; + v8i16 p0_l, p0_r, p1_l, p1_r, p2_l, p2_r, q0_l, q0_r, q1_l, q1_r, q2_l, q2_r; + v8i16 tc_l, tc_r, negTc_l, negTc_r; + v8i16 iTc_l, iTc_r, negiTc_l, negiTc_r; + // Use for temporary variable + v8i16 t0, t1, t2, t3; + v16u8 alpha, beta; + v16u8 bDetaP0Q0, bDetaP1P0, bDetaQ1Q0, bDetaP2P0, bDetaQ2Q0; + v16i8 const_1_b = __msa_ldi_b(1); + v8i16 const_1_h = __msa_ldi_h(1); + v8i16 const_4_h = __msa_ldi_h(4); + v8i16 const_not_255_h = __msa_ldi_h(~255); + v16i8 zero = { 0 }; + v16i8 tc = { pTc[0 >> 2], pTc[1 >> 2], pTc[2 >> 2], pTc[3 >> 2], + pTc[4 >> 2], pTc[5 >> 2], pTc[6 >> 2], pTc[7 >> 2], + pTc[8 >> 2], pTc[9 >> 2], pTc[10 >> 2], pTc[11 >> 2], + pTc[12 >> 2], pTc[13 >> 2], pTc[14 >> 2], pTc[15 >> 2] }; + negTc = zero - tc; + iTc = tc; + + // Load data from pPix + MSA_LD_V4(v16u8, pPix - 3 * iStride, iStride, p2, p1, p0, q0); + MSA_LD_V2(v16u8, pPix + iStride, iStride, q1, q2); + alpha = (v16u8)__msa_fill_b(iAlpha); + beta = (v16u8)__msa_fill_b(iBeta); + + bDetaP0Q0 = __msa_asub_u_b(p0, q0); + bDetaP1P0 = __msa_asub_u_b(p1, p0); + bDetaQ1Q0 = __msa_asub_u_b(q1, q0); + bDetaP2P0 = __msa_asub_u_b(p2, p0); + bDetaQ2Q0 = __msa_asub_u_b(q2, q0); + bDetaP0Q0 = (v16u8)__msa_clt_u_b(bDetaP0Q0, alpha); + bDetaP1P0 = (v16u8)__msa_clt_u_b(bDetaP1P0, beta); + bDetaQ1Q0 = (v16u8)__msa_clt_u_b(bDetaQ1Q0, beta); + bDetaP2P0 = (v16u8)__msa_clt_u_b(bDetaP2P0, beta); + bDetaQ2Q0 = (v16u8)__msa_clt_u_b(bDetaQ2Q0, beta); + + // Unsigned extend p0, p1, p2, q0, q1, q2 from 8 bits to 16 bits + MSA_ILVRL_B4(v8i16, zero, p0, zero, p1, + p0_r, p0_l, p1_r, p1_l); + MSA_ILVRL_B4(v8i16, zero, p2, zero, q0, + p2_r, p2_l, q0_r, q0_l); + MSA_ILVRL_B4(v8i16, zero, q1, zero, q2, + q1_r, q1_l, q2_r, q2_l); + // Signed extend tc, negTc from 8 bits to 16 bits + flags = __msa_clt_s_b(tc, zero); + MSA_ILVRL_B2(v8i16, flags, tc, tc_r, tc_l); + flags = __msa_clt_s_b(negTc, zero); + MSA_ILVRL_B2(v8i16, flags, negTc, negTc_r, negTc_l); + + f = (v16i8)bDetaP0Q0 & (v16i8)bDetaP1P0 & (v16i8)bDetaQ1Q0; + flags = f & (v16i8)bDetaP2P0; + flags = __msa_ceq_b(flags, zero); + iTc += ((~flags) & const_1_b); + flags = f & (v16i8)bDetaQ2Q0; + flags = __msa_ceq_b(flags, zero); + iTc += ((~flags) & const_1_b); + negiTc = zero - iTc; + // Signed extend iTc, negiTc from 8 bits to 16 bits + flags = __msa_clt_s_b(iTc, zero); + MSA_ILVRL_B2(v8i16, flags, iTc, iTc_r, iTc_l); + flags = __msa_clt_s_b(negiTc, zero); + MSA_ILVRL_B2(v8i16, flags, negiTc, negiTc_r, negiTc_l); + + // Calculate the left part + // p1 + t0 = (p2_l + ((p0_l + q0_l + const_1_h) >> 1) - (p1_l << 1)) >> 1; + t0 = __msa_max_s_h(negTc_l, t0); + t0 = __msa_min_s_h(tc_l, t0); + t1 = p1_l + t0; + // q1 + t0 = (q2_l + ((p0_l + q0_l + const_1_h) >> 1) - (q1_l << 1)) >> 1; + t0 = __msa_max_s_h(negTc_l, t0); + t0 = __msa_min_s_h(tc_l, t0); + t2 = q1_l + t0; + // iDeta + t0 = (((q0_l - p0_l) << 2) + (p1_l - q1_l) + const_4_h) >> 3; + t0 = __msa_max_s_h(negiTc_l, t0); + t0 = __msa_min_s_h(iTc_l, t0); + p1_l = t1; + q1_l = t2; + // p0 + t1 = p0_l + t0; + t2 = t1 & const_not_255_h; + t3 = __msa_cle_s_h((v8i16)zero, t1); + flags = (v16i8)__msa_ceq_h(t2, (v8i16)zero); + p0_l = (t1 & (v8i16)flags) + (t3 & (v8i16)(~flags)); + // q0 + t1 = q0_l - t0; + t2 = t1 & const_not_255_h; + t3 = __msa_cle_s_h((v8i16)zero, t1); + flags = (v16i8)__msa_ceq_h(t2, (v8i16)zero); + q0_l = (t1 & (v8i16)flags) + (t3 & (v8i16)(~flags)); + + // Calculate the right part + // p1 + t0 = (p2_r + ((p0_r + q0_r + const_1_h) >> 1) - (p1_r << 1)) >> 1; + t0 = __msa_max_s_h(negTc_r, t0); + t0 = __msa_min_s_h(tc_r, t0); + t1 = p1_r + t0; + // q1 + t0 = (q2_r + ((p0_r + q0_r + const_1_h) >> 1) - (q1_r << 1)) >> 1; + t0 = __msa_max_s_h(negTc_r, t0); + t0 = __msa_min_s_h(tc_r, t0); + t2 = q1_r + t0; + // iDeta + t0 = (((q0_r - p0_r) << 2) + (p1_r - q1_r) + const_4_h) >> 3; + t0 = __msa_max_s_h(negiTc_r, t0); + t0 = __msa_min_s_h(iTc_r, t0); + p1_r = t1; + q1_r = t2; + // p0 + t1 = p0_r + t0; + t2 = t1 & const_not_255_h; + t3 = __msa_cle_s_h((v8i16)zero, t1); + flags = (v16i8)__msa_ceq_h(t2, (v8i16)zero); + p0_r = (t1 & (v8i16)flags) + (t3 & (v8i16)(~flags)); + // q0 + t1 = q0_r - t0; + t2 = t1 & const_not_255_h; + t3 = __msa_cle_s_h((v8i16)zero, t1); + flags = (v16i8)__msa_ceq_h(t2, (v8i16)zero); + q0_r = (t1 & (v8i16)flags) + (t3 & (v8i16)(~flags)); + + // Combined left and right + MSA_PCKEV_B4(v8i16, p1_l, p1_r, p0_l, p0_r, q0_l, q0_r, q1_l, q1_r, + t0, t1, t2, t3); + flags = (v16i8)__msa_cle_s_b(zero, tc); + flags &= f; + p0 = (v16u8)(((v16i8)t1 & flags) + (p0 & (~flags))); + q0 = (v16u8)(((v16i8)t2 & flags) + (q0 & (~flags))); + // Using t1, t2 as temporary flags + t1 = (v8i16)(flags & (~(__msa_ceq_b((v16i8)bDetaP2P0, zero)))); + p1 = (v16u8)(t0 & t1) + (p1 & (v16u8)(~t1)); + t2 = (v8i16)(flags & (~(__msa_ceq_b((v16i8)bDetaQ2Q0, zero)))); + q1 = (v16u8)(t3 & t2) + (q1 & (v16u8)(~t2)); + + // Store data to pPix + MSA_ST_V4(v16u8, p1, p0, q0, q1, pPix - 2 * iStride, iStride); +} + +void DeblockLumaEq4V_msa(uint8_t *pPix, int32_t iStride, int32_t iAlpha, + int32_t iBeta) { + v16u8 p0, p1, p2, p3, q0, q1, q2, q3; + v8i16 p0_l, p0_r, p1_l, p1_r, p2_l, p2_r, p3_l, p3_r, + q0_l, q0_r, q1_l, q1_r, q2_l, q2_r, q3_l, q3_r; + v8i16 t0, t1, t2, t0_con1; + v8i16 s0, s1, s2, s0_con1; + v16u8 alpha, beta; + v16u8 iDetaP0Q0, bDetaP1P0, bDetaQ1Q0, bDetaP2P0, bDetaQ2Q0; + // Condition mask + v16u8 mask0, mask1; + v16i8 const_2_b = __msa_ldi_b(2); + v8i16 const_2_h = __msa_ldi_h(2); + v8i16 const_4_h = __msa_ldi_h(4); + v16i8 zero = { 0 }; + + // Load data from pPix + MSA_LD_V8(v16u8, pPix - 4 * iStride, iStride, p3, p2, p1, p0, + q0, q1, q2, q3); + // iAlpha and beta are uint8_t type + alpha = (v16u8)__msa_fill_b(iAlpha); + beta = (v16u8)__msa_fill_b(iBeta); + + // iDetaP0Q0 is not bool type + iDetaP0Q0 = __msa_asub_u_b(p0, q0); + + bDetaP1P0 = __msa_asub_u_b(p1, p0); + bDetaQ1Q0 = __msa_asub_u_b(q1, q0); + bDetaP2P0 = __msa_asub_u_b(p2, p0); + bDetaQ2Q0 = __msa_asub_u_b(q2, q0); + bDetaP1P0 = (v16u8)__msa_clt_u_b(bDetaP1P0, beta); + bDetaQ1Q0 = (v16u8)__msa_clt_u_b(bDetaQ1Q0, beta); + bDetaP2P0 = (v16u8)__msa_clt_u_b(bDetaP2P0, beta); + bDetaQ2Q0 = (v16u8)__msa_clt_u_b(bDetaQ2Q0, beta); + + // Unsigned extend p0, p1, p2, p3, q0, q1, q2, q3 from 8 bits to 16 bits + MSA_ILVRL_B4(v8i16, zero, p0, zero, p1, + p0_r, p0_l, p1_r, p1_l); + MSA_ILVRL_B4(v8i16, zero, p2, zero, p3, + p2_r, p2_l, p3_r, p3_l); + MSA_ILVRL_B4(v8i16, zero, q0, zero, q1, + q0_r, q0_l, q1_r, q1_l); + MSA_ILVRL_B4(v8i16, zero, q2, zero, q3, + q2_r, q2_l, q3_r, q3_l); + + // Calculate condition mask + // (iDetaP0Q0 < iAlpha) && bDetaP1P0 && bDetaQ1Q0 + mask0 = (v16u8)__msa_clt_u_b(iDetaP0Q0, alpha); + mask0 &= bDetaP1P0; + mask0 &= bDetaQ1Q0; + // iDetaP0Q0 < ((iAlpha >> 2) + 2) + mask1 = (v16u8)((alpha >> 2) + const_2_b); + mask1 = (v16u8)__msa_clt_u_b(iDetaP0Q0, mask1); + + // Calculate the left part + // p0 + t0 = (p2_l + (p1_l << 1) + (p0_l << 1) + (q0_l << 1) + q1_l + const_4_h) >> 3; + // p1 + t1 = (p2_l + p1_l + p0_l + q0_l + const_2_h) >> 2; + // p2 + t2 = ((p3_l << 1) + p2_l + (p2_l << 1) + p1_l + p0_l + q0_l + const_4_h) >> 3; + // p0 condition 1 + t0_con1 = ((p1_l << 1) + p0_l + q1_l + const_2_h) >> 2; + // q0 + s0 = (p1_l + (p0_l << 1) + (q0_l << 1) + (q1_l << 1) + q2_l + const_4_h) >> 3; + // q1 + s1 = (p0_l + q0_l + q1_l + q2_l + const_2_h) >> 2; + // q2 + s2 = ((q3_l << 1) + q2_l + (q2_l << 1) + q1_l + q0_l + p0_l + const_4_h) >> 3; + // q0 condition 1 + s0_con1 = ((q1_l << 1) + q0_l + p1_l + const_2_h) >> 2; + // Move back + p0_l = t0; + p1_l = t1; + p2_l = t2; + q0_l = s0; + q1_l = s1; + q2_l = s2; + // Use p3_l, q3_l as tmp + p3_l = t0_con1; + q3_l = s0_con1; + + // Calculate the right part + // p0 + t0 = (p2_r + (p1_r << 1) + (p0_r << 1) + (q0_r << 1) + q1_r + const_4_h) >> 3; + // p1 + t1 = (p2_r + p1_r + p0_r + q0_r + const_2_h) >> 2; + // p2 + t2 = ((p3_r << 1) + p2_r + (p2_r << 1) + p1_r + p0_r + q0_r + const_4_h) >> 3; + // p0 condition 1 + t0_con1 = ((p1_r << 1) + p0_r + q1_r + const_2_h) >> 2; + // q0 + s0 = (p1_r + (p0_r << 1) + (q0_r << 1) + (q1_r << 1) + q2_r + const_4_h) >> 3; + // q1 + s1 = (p0_r + q0_r + q1_r + q2_r + const_2_h) >> 2; + // q2 + s2 = ((q3_r << 1) + q2_r + (q2_r << 1) + q1_r + q0_r + p0_r + const_4_h) >> 3; + // q0 condition 1 + s0_con1 = ((q1_r << 1) + q0_r + p1_r + const_2_h) >> 2; + // Move back + p0_r = t0; + p1_r = t1; + p2_r = t2; + q0_r = s0; + q1_r = s1; + q2_r = s2; + // Use p3_r, q3_r as tmp + p3_r = t0_con1; + q3_r = s0_con1; + + // Combined left and right + MSA_PCKEV_B4(v8i16, p0_l, p0_r, p1_l, p1_r, p2_l, p2_r, q0_l, q0_r, + t0, t1, t2, s0); + MSA_PCKEV_B4(v8i16, q1_l, q1_r, q2_l, q2_r, p3_l, p3_r, q3_l, q3_r, + s1, s2, t0_con1, s0_con1); + t0 = (v8i16)(((v16u8)t0 & mask0 & mask1 & bDetaP2P0) + ((v16u8)t0_con1 & + mask0 & mask1 & (~bDetaP2P0)) + ((v16u8)t0_con1 & mask0 & (~mask1))); + t1 = (v8i16)((v16u8)t1 & mask0 & mask1 & bDetaP2P0); + t2 = (v8i16)((v16u8)t2 & mask0 & mask1 & bDetaP2P0); + s0 = (v8i16)(((v16u8)s0 & mask0 & mask1 & bDetaQ2Q0) + ((v16u8)s0_con1 & + mask0 & mask1 & (~bDetaQ2Q0)) + ((v16u8)s0_con1 & mask0 & (~mask1))); + s1 = (v8i16)((v16u8)s1 & mask0 & mask1 & bDetaQ2Q0); + s2 = (v8i16)((v16u8)s2 & mask0 & mask1 & bDetaQ2Q0); + p0 = (v16u8)t0 + (p0 & (~mask0)); + p1 = (v16u8)t1 + (p1 & ~(mask0 & mask1 & bDetaP2P0)); + p2 = (v16u8)t2 + (p2 & ~(mask0 & mask1 & bDetaP2P0)); + q0 = (v16u8)s0 + (q0 & (~mask0)); + q1 = (v16u8)s1 + (q1 & ~(mask0 & mask1 & bDetaQ2Q0)); + q2 = (v16u8)s2 + (q2 & ~(mask0 & mask1 & bDetaQ2Q0)); + + // Store data to pPix + MSA_ST_V4(v16u8, p2, p1, p0, q0, pPix - 3 * iStride, iStride); + MSA_ST_V2(v16u8, q1, q2, pPix + iStride, iStride); +} + + +void DeblockLumaLt4H_msa(uint8_t* pPix, int32_t iStride, int32_t iAlpha, + int32_t iBeta, int8_t* pTc) { + v16u8 p0, p1, p2, q0, q1, q2; + v16i8 iTc, negiTc, negTc, flags, f; + v8i16 p0_l, p0_r, p1_l, p1_r, p2_l, p2_r, q0_l, q0_r, q1_l, q1_r, q2_l, q2_r; + v8i16 tc_l, tc_r, negTc_l, negTc_r; + v8i16 iTc_l, iTc_r, negiTc_l, negiTc_r; + // Use for temporary variable + v8i16 t0, t1, t2, t3; + v16u8 alpha, beta; + v16u8 bDetaP0Q0, bDetaP1P0, bDetaQ1Q0, bDetaP2P0, bDetaQ2Q0; + v16i8 const_1_b = __msa_ldi_b(1); + v8i16 const_1_h = __msa_ldi_h(1); + v8i16 const_4_h = __msa_ldi_h(4); + v8i16 const_not_255_h = __msa_ldi_h(~255); + v16i8 zero = { 0 }; + v16i8 tc = { pTc[0 >> 2], pTc[1 >> 2], pTc[2 >> 2], pTc[3 >> 2], + pTc[4 >> 2], pTc[5 >> 2], pTc[6 >> 2], pTc[7 >> 2], + pTc[8 >> 2], pTc[9 >> 2], pTc[10 >> 2], pTc[11 >> 2], + pTc[12 >> 2], pTc[13 >> 2], pTc[14 >> 2], pTc[15 >> 2] }; + negTc = zero - tc; + iTc = tc; + + // Load data from pPix + MSA_LD_V8(v8i16, pPix - 3, iStride, t0, t1, t2, t3, q1_l, q1_r, q2_l, q2_r); + MSA_LD_V8(v8i16, pPix + 8 * iStride - 3, iStride, p0_l, p0_r, p1_l, p1_r, + p2_l, p2_r, q0_l, q0_r); + // Transpose 16x8 to 8x16, we just need p0, p1, p2, q0, q1, q2 + MSA_TRANSPOSE16x8_B(v16u8, t0, t1, t2, t3, q1_l, q1_r, q2_l, q2_r, + p0_l, p0_r, p1_l, p1_r, p2_l, p2_r, q0_l, q0_r, + p2, p1, p0, q0, q1, q2, alpha, beta); + + alpha = (v16u8)__msa_fill_b(iAlpha); + beta = (v16u8)__msa_fill_b(iBeta); + + bDetaP0Q0 = __msa_asub_u_b(p0, q0); + bDetaP1P0 = __msa_asub_u_b(p1, p0); + bDetaQ1Q0 = __msa_asub_u_b(q1, q0); + bDetaP2P0 = __msa_asub_u_b(p2, p0); + bDetaQ2Q0 = __msa_asub_u_b(q2, q0); + bDetaP0Q0 = (v16u8)__msa_clt_u_b(bDetaP0Q0, alpha); + bDetaP1P0 = (v16u8)__msa_clt_u_b(bDetaP1P0, beta); + bDetaQ1Q0 = (v16u8)__msa_clt_u_b(bDetaQ1Q0, beta); + bDetaP2P0 = (v16u8)__msa_clt_u_b(bDetaP2P0, beta); + bDetaQ2Q0 = (v16u8)__msa_clt_u_b(bDetaQ2Q0, beta); + + // Unsigned extend p0, p1, p2, q0, q1, q2 from 8 bits to 16 bits + MSA_ILVRL_B4(v8i16, zero, p0, zero, p1, + p0_r, p0_l, p1_r, p1_l); + MSA_ILVRL_B4(v8i16, zero, p2, zero, q0, + p2_r, p2_l, q0_r, q0_l); + MSA_ILVRL_B4(v8i16, zero, q1, zero, q2, + q1_r, q1_l, q2_r, q2_l); + // Signed extend tc, negTc from 8 bits to 16 bits + flags = __msa_clt_s_b(tc, zero); + MSA_ILVRL_B2(v8i16, flags, tc, tc_r, tc_l); + flags = __msa_clt_s_b(negTc, zero); + MSA_ILVRL_B2(v8i16, flags, negTc, negTc_r, negTc_l); + + f = (v16i8)bDetaP0Q0 & (v16i8)bDetaP1P0 & (v16i8)bDetaQ1Q0; + flags = f & (v16i8)bDetaP2P0; + flags = __msa_ceq_b(flags, zero); + iTc += ((~flags) & const_1_b); + flags = f & (v16i8)bDetaQ2Q0; + flags = __msa_ceq_b(flags, zero); + iTc += ((~flags) & const_1_b); + negiTc = zero - iTc; + // Signed extend iTc, negiTc from 8 bits to 16 bits + flags = __msa_clt_s_b(iTc, zero); + MSA_ILVRL_B2(v8i16, flags, iTc, iTc_r, iTc_l); + flags = __msa_clt_s_b(negiTc, zero); + MSA_ILVRL_B2(v8i16, flags, negiTc, negiTc_r, negiTc_l); + + // Calculate the left part + // p1 + t0 = (p2_l + ((p0_l + q0_l + const_1_h) >> 1) - (p1_l << 1)) >> 1; + t0 = __msa_max_s_h(negTc_l, t0); + t0 = __msa_min_s_h(tc_l, t0); + t1 = p1_l + t0; + // q1 + t0 = (q2_l + ((p0_l + q0_l + const_1_h) >> 1) - (q1_l << 1)) >> 1; + t0 = __msa_max_s_h(negTc_l, t0); + t0 = __msa_min_s_h(tc_l, t0); + t2 = q1_l + t0; + // iDeta + t0 = (((q0_l - p0_l) << 2) + (p1_l - q1_l) + const_4_h) >> 3; + t0 = __msa_max_s_h(negiTc_l, t0); + t0 = __msa_min_s_h(iTc_l, t0); + p1_l = t1; + q1_l = t2; + // p0 + t1 = p0_l + t0; + t2 = t1 & const_not_255_h; + t3 = __msa_cle_s_h((v8i16)zero, t1); + flags = (v16i8)__msa_ceq_h(t2, (v8i16)zero); + p0_l = (t1 & (v8i16)flags) + (t3 & (v8i16)(~flags)); + // q0 + t1 = q0_l - t0; + t2 = t1 & const_not_255_h; + t3 = __msa_cle_s_h((v8i16)zero, t1); + flags = (v16i8)__msa_ceq_h(t2, (v8i16)zero); + q0_l = (t1 & (v8i16)flags) + (t3 & (v8i16)(~flags)); + + // Calculate the right part + // p1 + t0 = (p2_r + ((p0_r + q0_r + const_1_h) >> 1) - (p1_r << 1)) >> 1; + t0 = __msa_max_s_h(negTc_r, t0); + t0 = __msa_min_s_h(tc_r, t0); + t1 = p1_r + t0; + // q1 + t0 = (q2_r + ((p0_r + q0_r + const_1_h) >> 1) - (q1_r << 1)) >> 1; + t0 = __msa_max_s_h(negTc_r, t0); + t0 = __msa_min_s_h(tc_r, t0); + t2 = q1_r + t0; + // iDeta + t0 = (((q0_r - p0_r) << 2) + (p1_r - q1_r) + const_4_h) >> 3; + t0 = __msa_max_s_h(negiTc_r, t0); + t0 = __msa_min_s_h(iTc_r, t0); + p1_r = t1; + q1_r = t2; + // p0 + t1 = p0_r + t0; + t2 = t1 & const_not_255_h; + t3 = __msa_cle_s_h((v8i16)zero, t1); + flags = (v16i8)__msa_ceq_h(t2, (v8i16)zero); + p0_r = (t1 & (v8i16)flags) + (t3 & (v8i16)(~flags)); + // q0 + t1 = q0_r - t0; + t2 = t1 & const_not_255_h; + t3 = __msa_cle_s_h((v8i16)zero, t1); + flags = (v16i8)__msa_ceq_h(t2, (v8i16)zero); + q0_r = (t1 & (v8i16)flags) + (t3 & (v8i16)(~flags)); + + // Combined left and right + MSA_PCKEV_B4(v8i16, p1_l, p1_r, p0_l, p0_r, q0_l, q0_r, q1_l, q1_r, + t0, t1, t2, t3); + flags = (v16i8)__msa_cle_s_b(zero, tc); + flags &= f; + p0 = (v16u8)(((v16i8)t1 & flags) + (p0 & (~flags))); + q0 = (v16u8)(((v16i8)t2 & flags) + (q0 & (~flags))); + // Using t1, t2 as temporary flags + t1 = (v8i16)(flags & (~(__msa_ceq_b((v16i8)bDetaP2P0, zero)))); + p1 = (v16u8)(t0 & t1) + (p1 & (v16u8)(~t1)); + t2 = (v8i16)(flags & (~(__msa_ceq_b((v16i8)bDetaQ2Q0, zero)))); + q1 = (v16u8)(t3 & t2) + (q1 & (v16u8)(~t2)); + + MSA_ILVRL_B4(v8i16, p0, p1, q1, q0, t0, t1, t2, t3); + MSA_ILVRL_H4(v16u8, t2, t0, t3, t1, p1, p0, q0, q1); + // Store data to pPix + MSA_ST_W8(p1, p0, 0, 1, 2, 3, 0, 1, 2, 3, pPix - 2, iStride); + MSA_ST_W8(q0, q1, 0, 1, 2, 3, 0, 1, 2, 3, pPix + 8 * iStride - 2, iStride); +} + +void DeblockLumaEq4H_msa(uint8_t *pPix, int32_t iStride, int32_t iAlpha, + int32_t iBeta) { + v16u8 p0, p1, p2, p3, q0, q1, q2, q3; + v8i16 p0_l, p0_r, p1_l, p1_r, p2_l, p2_r, p3_l, p3_r, + q0_l, q0_r, q1_l, q1_r, q2_l, q2_r, q3_l, q3_r; + v8i16 t0, t1, t2, t0_con1; + v8i16 s0, s1, s2, s0_con1; + v16u8 alpha, beta; + v16u8 iDetaP0Q0, bDetaP1P0, bDetaQ1Q0, bDetaP2P0, bDetaQ2Q0; + // Condition mask + v16u8 mask0, mask1; + v16i8 const_2_b = __msa_ldi_b(2); + v8i16 const_2_h = __msa_ldi_h(2); + v8i16 const_4_h = __msa_ldi_h(4); + v16i8 zero = { 0 }; + + // Load data from pPix + MSA_LD_V8(v8i16, pPix - 4, iStride, p0_l, p0_r, p1_l, p1_r, + p2_l, p2_r, p3_l, p3_r); + MSA_LD_V8(v8i16, pPix + 8 * iStride - 4, iStride, + q0_l, q0_r, q1_l, q1_r, q2_l, q2_r, q3_l, q3_r); + // Transpose 16x8 to 8x16, we just need p0, p1, p2, p3, q0, q1, q2, q3 + MSA_TRANSPOSE16x8_B(v16u8, p0_l, p0_r, p1_l, p1_r, p2_l, p2_r, p3_l, p3_r, + q0_l, q0_r, q1_l, q1_r, q2_l, q2_r, q3_l, q3_r, + p3, p2, p1, p0, q0, q1, q2, q3); + // iAlpha and beta are uint8_t type + alpha = (v16u8)__msa_fill_b(iAlpha); + beta = (v16u8)__msa_fill_b(iBeta); + + // iDetaP0Q0 is not bool type + iDetaP0Q0 = __msa_asub_u_b(p0, q0); + + bDetaP1P0 = __msa_asub_u_b(p1, p0); + bDetaQ1Q0 = __msa_asub_u_b(q1, q0); + bDetaP2P0 = __msa_asub_u_b(p2, p0); + bDetaQ2Q0 = __msa_asub_u_b(q2, q0); + bDetaP1P0 = (v16u8)__msa_clt_u_b(bDetaP1P0, beta); + bDetaQ1Q0 = (v16u8)__msa_clt_u_b(bDetaQ1Q0, beta); + bDetaP2P0 = (v16u8)__msa_clt_u_b(bDetaP2P0, beta); + bDetaQ2Q0 = (v16u8)__msa_clt_u_b(bDetaQ2Q0, beta); + + // Unsigned extend p0, p1, p2, p3, q0, q1, q2, q3 from 8 bits to 16 bits + MSA_ILVRL_B4(v8i16, zero, p0, zero, p1, + p0_r, p0_l, p1_r, p1_l); + MSA_ILVRL_B4(v8i16, zero, p2, zero, p3, + p2_r, p2_l, p3_r, p3_l); + MSA_ILVRL_B4(v8i16, zero, q0, zero, q1, + q0_r, q0_l, q1_r, q1_l); + MSA_ILVRL_B4(v8i16, zero, q2, zero, q3, + q2_r, q2_l, q3_r, q3_l); + + // Calculate condition mask + // (iDetaP0Q0 < iAlpha) && bDetaP1P0 && bDetaQ1Q0 + mask0 = (v16u8)__msa_clt_u_b(iDetaP0Q0, alpha); + mask0 &= bDetaP1P0; + mask0 &= bDetaQ1Q0; + // iDetaP0Q0 < ((iAlpha >> 2) + 2) + mask1 = (v16u8)((alpha >> 2) + const_2_b); + mask1 = (v16u8)__msa_clt_u_b(iDetaP0Q0, mask1); + + // Calculate the left part + // p0 + t0 = (p2_l + (p1_l << 1) + (p0_l << 1) + (q0_l << 1) + q1_l + const_4_h) >> 3; + // p1 + t1 = (p2_l + p1_l + p0_l + q0_l + const_2_h) >> 2; + // p2 + t2 = ((p3_l << 1) + p2_l + (p2_l << 1) + p1_l + p0_l + q0_l + const_4_h) >> 3; + // p0 condition 1 + t0_con1 = ((p1_l << 1) + p0_l + q1_l + const_2_h) >> 2; + // q0 + s0 = (p1_l + (p0_l << 1) + (q0_l << 1) + (q1_l << 1) + q2_l + const_4_h) >> 3; + // q1 + s1 = (p0_l + q0_l + q1_l + q2_l + const_2_h) >> 2; + // q2 + s2 = ((q3_l << 1) + q2_l + (q2_l << 1) + q1_l + q0_l + p0_l + const_4_h) >> 3; + // q0 condition 1 + s0_con1 = ((q1_l << 1) + q0_l + p1_l + const_2_h) >> 2; + // Move back + p0_l = t0; + p1_l = t1; + p2_l = t2; + q0_l = s0; + q1_l = s1; + q2_l = s2; + // Use p3_l, q3_l as tmp + p3_l = t0_con1; + q3_l = s0_con1; + + // Calculate the right part + // p0 + t0 = (p2_r + (p1_r << 1) + (p0_r << 1) + (q0_r << 1) + q1_r + const_4_h) >> 3; + // p1 + t1 = (p2_r + p1_r + p0_r + q0_r + const_2_h) >> 2; + // p2 + t2 = ((p3_r << 1) + p2_r + (p2_r << 1) + p1_r + p0_r + q0_r + const_4_h) >> 3; + // p0 condition 1 + t0_con1 = ((p1_r << 1) + p0_r + q1_r + const_2_h) >> 2; + // q0 + s0 = (p1_r + (p0_r << 1) + (q0_r << 1) + (q1_r << 1) + q2_r + const_4_h) >> 3; + // q1 + s1 = (p0_r + q0_r + q1_r + q2_r + const_2_h) >> 2; + // q2 + s2 = ((q3_r << 1) + q2_r + (q2_r << 1) + q1_r + q0_r + p0_r + const_4_h) >> 3; + // q0 condition 1 + s0_con1 = ((q1_r << 1) + q0_r + p1_r + const_2_h) >> 2; + // Move back + p0_r = t0; + p1_r = t1; + p2_r = t2; + q0_r = s0; + q1_r = s1; + q2_r = s2; + // Use p3_r, q3_r as tmp + p3_r = t0_con1; + q3_r = s0_con1; + + // Combined left and right + MSA_PCKEV_B4(v8i16, p0_l, p0_r, p1_l, p1_r, p2_l, p2_r, q0_l, q0_r, + t0, t1, t2, s0); + MSA_PCKEV_B4(v8i16, q1_l, q1_r, q2_l, q2_r, p3_l, p3_r, q3_l, q3_r, + s1, s2, t0_con1, s0_con1); + t0 = (v8i16)(((v16u8)t0 & mask0 & mask1 & bDetaP2P0) + ((v16u8)t0_con1 & + mask0 & mask1 & (~bDetaP2P0)) + ((v16u8)t0_con1 & mask0 & (~mask1))); + t1 = (v8i16)((v16u8)t1 & mask0 & mask1 & bDetaP2P0); + t2 = (v8i16)((v16u8)t2 & mask0 & mask1 & bDetaP2P0); + s0 = (v8i16)(((v16u8)s0 & mask0 & mask1 & bDetaQ2Q0) + ((v16u8)s0_con1 & + mask0 & mask1 & (~bDetaQ2Q0)) + ((v16u8)s0_con1 & mask0 & (~mask1))); + s1 = (v8i16)((v16u8)s1 & mask0 & mask1 & bDetaQ2Q0); + s2 = (v8i16)((v16u8)s2 & mask0 & mask1 & bDetaQ2Q0); + p0 = (v16u8)t0 + (p0 & (~mask0)); + p1 = (v16u8)t1 + (p1 & ~(mask0 & mask1 & bDetaP2P0)); + p2 = (v16u8)t2 + (p2 & ~(mask0 & mask1 & bDetaP2P0)); + q0 = (v16u8)s0 + (q0 & (~mask0)); + q1 = (v16u8)s1 + (q1 & ~(mask0 & mask1 & bDetaQ2Q0)); + q2 = (v16u8)s2 + (q2 & ~(mask0 & mask1 & bDetaQ2Q0)); + + MSA_ILVRL_B4(v8i16, p1, p2, q0, p0, t0, s0, t1, s1); + MSA_ILVRL_B2(v8i16, q2, q1, t2, s2); + MSA_ILVRL_H4(v16u8, t1, t0, s1, s0, p2, p1, p0, q0); + // Store data to pPix + MSA_ST_W8(p2, p1, 0, 1, 2, 3, 0, 1, 2, 3, pPix - 3, iStride); + MSA_ST_W8(p0, q0, 0, 1, 2, 3, 0, 1, 2, 3, pPix + 8 * iStride - 3, iStride); + MSA_ST_H8(t2, 0, 1, 2, 3, 4, 5, 6, 7, pPix + 1, iStride); + MSA_ST_H8(s2, 0, 1, 2, 3, 4, 5, 6, 7, pPix + 8 * iStride + 1, iStride); +} + +void DeblockChromaLt4V_msa(uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, + int32_t iAlpha, int32_t iBeta, int8_t* pTc) { + v16u8 p0, p1, q0, q1; + v8i16 p0_e, p1_e, q0_e, q1_e; + v16i8 negTc, flags, f; + v8i16 tc_e, negTc_e; + // Use for temporary variable + v8i16 t0, t1, t2, t3; + v16u8 alpha, beta; + v16u8 bDetaP0Q0, bDetaP1P0, bDetaQ1Q0; + v8i16 const_4_h = __msa_ldi_h(4); + v8i16 const_not_255_h = __msa_ldi_h(~255); + v16i8 zero = { 0 }; + v16i8 tc = { pTc[0 >> 1], pTc[1 >> 1], pTc[2 >> 1], pTc[3 >> 1], + pTc[4 >> 1], pTc[5 >> 1], pTc[6 >> 1], pTc[7 >> 1] }; + negTc = zero - tc; + + alpha = (v16u8)__msa_fill_b(iAlpha); + beta = (v16u8)__msa_fill_b(iBeta); + // Signed extend tc, negTc from 8 bits to 16 bits + flags = __msa_clt_s_b(tc, zero); + MSA_ILVR_B(v8i16, flags, tc, tc_e); + flags = __msa_clt_s_b(negTc, zero); + MSA_ILVR_B(v8i16, flags, negTc, negTc_e); + + // Cb + // Load data from pPixCb + MSA_LD_V4(v16u8, pPixCb - 2 * iStride, iStride, p1, p0, q0, q1); + + bDetaP0Q0 = __msa_asub_u_b(p0, q0); + bDetaP1P0 = __msa_asub_u_b(p1, p0); + bDetaQ1Q0 = __msa_asub_u_b(q1, q0); + bDetaP0Q0 = (v16u8)__msa_clt_u_b(bDetaP0Q0, alpha); + bDetaP1P0 = (v16u8)__msa_clt_u_b(bDetaP1P0, beta); + bDetaQ1Q0 = (v16u8)__msa_clt_u_b(bDetaQ1Q0, beta); + + // Unsigned extend p0, p1, q0, q1 from 8 bits to 16 bits + MSA_ILVR_B4(v8i16, zero, p0, zero, p1, zero, q0, zero, q1, + p0_e, p1_e, q0_e, q1_e); + + f = (v16i8)bDetaP0Q0 & (v16i8)bDetaP1P0 & (v16i8)bDetaQ1Q0; + + // iDeta + t0 = (((q0_e - p0_e) << 2) + (p1_e - q1_e) + const_4_h) >> 3; + t0 = __msa_max_s_h(negTc_e, t0); + t0 = __msa_min_s_h(tc_e, t0); + // p0 + t1 = p0_e + t0; + t2 = t1 & const_not_255_h; + t3 = __msa_cle_s_h((v8i16)zero, t1); + flags = (v16i8)__msa_ceq_h(t2, (v8i16)zero); + p0_e = (t1 & (v8i16)flags) + (t3 & (v8i16)(~flags)); + // q0 + t1 = q0_e - t0; + t2 = t1 & const_not_255_h; + t3 = __msa_cle_s_h((v8i16)zero, t1); + flags = (v16i8)__msa_ceq_h(t2, (v8i16)zero); + q0_e = (t1 & (v8i16)flags) + (t3 & (v8i16)(~flags)); + + MSA_PCKEV_B2(v8i16, p0_e, p0_e, q0_e, q0_e, t0, t1); + flags = (v16i8)__msa_cle_s_b(zero, tc); + flags &= f; + p0 = (v16u8)(((v16i8)t0 & flags) + (p0 & (~flags))); + q0 = (v16u8)(((v16i8)t1 & flags) + (q0 & (~flags))); + // Store data to pPixCb + MSA_ST_D(p0, 0, pPixCb - iStride); + MSA_ST_D(q0, 0, pPixCb); + + // Cr + // Load data from pPixCr + MSA_LD_V4(v16u8, pPixCr - 2 * iStride, iStride, p1, p0, q0, q1); + + bDetaP0Q0 = __msa_asub_u_b(p0, q0); + bDetaP1P0 = __msa_asub_u_b(p1, p0); + bDetaQ1Q0 = __msa_asub_u_b(q1, q0); + bDetaP0Q0 = (v16u8)__msa_clt_u_b(bDetaP0Q0, alpha); + bDetaP1P0 = (v16u8)__msa_clt_u_b(bDetaP1P0, beta); + bDetaQ1Q0 = (v16u8)__msa_clt_u_b(bDetaQ1Q0, beta); + + // Unsigned extend p0, p1, q0, q1 from 8 bits to 16 bits + MSA_ILVR_B4(v8i16, zero, p0, zero, p1, zero, q0, zero, q1, + p0_e, p1_e, q0_e, q1_e); + + f = (v16i8)bDetaP0Q0 & (v16i8)bDetaP1P0 & (v16i8)bDetaQ1Q0; + + // iDeta + t0 = (((q0_e - p0_e) << 2) + (p1_e - q1_e) + const_4_h) >> 3; + t0 = __msa_max_s_h(negTc_e, t0); + t0 = __msa_min_s_h(tc_e, t0); + // p0 + t1 = p0_e + t0; + t2 = t1 & const_not_255_h; + t3 = __msa_cle_s_h((v8i16)zero, t1); + flags = (v16i8)__msa_ceq_h(t2, (v8i16)zero); + p0_e = (t1 & (v8i16)flags) + (t3 & (v8i16)(~flags)); + // q0 + t1 = q0_e - t0; + t2 = t1 & const_not_255_h; + t3 = __msa_cle_s_h((v8i16)zero, t1); + flags = (v16i8)__msa_ceq_h(t2, (v8i16)zero); + q0_e = (t1 & (v8i16)flags) + (t3 & (v8i16)(~flags)); + + MSA_PCKEV_B2(v8i16, p0_e, p0_e, q0_e, q0_e, t0, t1); + flags = (v16i8)__msa_cle_s_b(zero, tc); + flags &= f; + p0 = (v16u8)(((v16i8)t0 & flags) + (p0 & (~flags))); + q0 = (v16u8)(((v16i8)t1 & flags) + (q0 & (~flags))); + // Store data to pPixCr + MSA_ST_D(p0, 0, pPixCr - iStride); + MSA_ST_D(q0, 0, pPixCr); +} + +void DeblockChromaEq4V_msa(uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, + int32_t iAlpha, int32_t iBeta) { + v16u8 p0, p1, q0, q1; + v8i16 p0_e, p1_e, q0_e, q1_e; + v16i8 f; + // Use for temporary variable + v8i16 t0, t1; + v16u8 alpha, beta; + v16u8 bDetaP0Q0, bDetaP1P0, bDetaQ1Q0; + v8i16 const_2_h = __msa_ldi_h(2); + v16i8 zero = { 0 }; + + alpha = (v16u8)__msa_fill_b(iAlpha); + beta = (v16u8)__msa_fill_b(iBeta); + + // Cb + // Load data from pPixCb + MSA_LD_V4(v16u8, pPixCb - 2 * iStride, iStride, p1, p0, q0, q1); + + bDetaP0Q0 = __msa_asub_u_b(p0, q0); + bDetaP1P0 = __msa_asub_u_b(p1, p0); + bDetaQ1Q0 = __msa_asub_u_b(q1, q0); + bDetaP0Q0 = (v16u8)__msa_clt_u_b(bDetaP0Q0, alpha); + bDetaP1P0 = (v16u8)__msa_clt_u_b(bDetaP1P0, beta); + bDetaQ1Q0 = (v16u8)__msa_clt_u_b(bDetaQ1Q0, beta); + + // Unsigned extend p0, p1, q0, q1 from 8 bits to 16 bits + MSA_ILVR_B4(v8i16, zero, p0, zero, p1, zero, q0, zero, q1, + p0_e, p1_e, q0_e, q1_e); + + f = (v16i8)bDetaP0Q0 & (v16i8)bDetaP1P0 & (v16i8)bDetaQ1Q0; + + // p0 + p0_e = ((p1_e << 1) + p0_e + q1_e + const_2_h) >> 2; + // q0 + q0_e = ((q1_e << 1) + q0_e + p1_e + const_2_h) >> 2; + + MSA_PCKEV_B2(v8i16, p0_e, p0_e, q0_e, q0_e, t0, t1); + p0 = (v16u8)(((v16i8)t0 & f) + (p0 & (~f))); + q0 = (v16u8)(((v16i8)t1 & f) + (q0 & (~f))); + // Store data to pPixCb + MSA_ST_D(p0, 0, pPixCb - iStride); + MSA_ST_D(q0, 0, pPixCb); + + // Cr + // Load data from pPixCr + MSA_LD_V4(v16u8, pPixCr - 2 * iStride, iStride, p1, p0, q0, q1); + + bDetaP0Q0 = __msa_asub_u_b(p0, q0); + bDetaP1P0 = __msa_asub_u_b(p1, p0); + bDetaQ1Q0 = __msa_asub_u_b(q1, q0); + bDetaP0Q0 = (v16u8)__msa_clt_u_b(bDetaP0Q0, alpha); + bDetaP1P0 = (v16u8)__msa_clt_u_b(bDetaP1P0, beta); + bDetaQ1Q0 = (v16u8)__msa_clt_u_b(bDetaQ1Q0, beta); + + // Unsigned extend p0, p1, q0, q1 from 8 bits to 16 bits + MSA_ILVR_B4(v8i16, zero, p0, zero, p1, zero, q0, zero, q1, + p0_e, p1_e, q0_e, q1_e); + + f = (v16i8)bDetaP0Q0 & (v16i8)bDetaP1P0 & (v16i8)bDetaQ1Q0; + + // p0 + p0_e = ((p1_e << 1) + p0_e + q1_e + const_2_h) >> 2; + // q0 + q0_e = ((q1_e << 1) + q0_e + p1_e + const_2_h) >> 2; + + MSA_PCKEV_B2(v8i16, p0_e, p0_e, q0_e, q0_e, t0, t1); + p0 = (v16u8)(((v16i8)t0 & f) + (p0 & (~f))); + q0 = (v16u8)(((v16i8)t1 & f) + (q0 & (~f))); + // Store data to pPixCr + MSA_ST_D(p0, 0, pPixCr - iStride); + MSA_ST_D(q0, 0, pPixCr); +} + +void DeblockChromaLt4H_msa(uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, + int32_t iAlpha, int32_t iBeta, int8_t* pTc) { + v16u8 p0, p1, q0, q1; + v8i16 p0_e, p1_e, q0_e, q1_e; + v16i8 negTc, flags, f; + v8i16 tc_e, negTc_e; + // Use for temporary variable + v8i16 t0, t1, t2, t3; + v16u8 alpha, beta; + v16u8 bDetaP0Q0, bDetaP1P0, bDetaQ1Q0; + v8i16 const_4_h = __msa_ldi_h(4); + v8i16 const_not_255_h = __msa_ldi_h(~255); + v16i8 zero = { 0 }; + v16i8 tc = { pTc[0 >> 1], pTc[1 >> 1], pTc[2 >> 1], pTc[3 >> 1], + pTc[4 >> 1], pTc[5 >> 1], pTc[6 >> 1], pTc[7 >> 1] }; + negTc = zero - tc; + + alpha = (v16u8)__msa_fill_b(iAlpha); + beta = (v16u8)__msa_fill_b(iBeta); + // Signed extend tc, negTc from 8 bits to 16 bits + flags = __msa_clt_s_b(tc, zero); + MSA_ILVR_B(v8i16, flags, tc, tc_e); + flags = __msa_clt_s_b(negTc, zero); + MSA_ILVR_B(v8i16, flags, negTc, negTc_e); + + // Cb + // Load data from pPixCb + MSA_LD_V8(v8i16, pPixCb - 2, iStride, p1_e, p0_e, q0_e, q1_e, + t0, t1, t2, t3); + // Transpose 8x4 to 4x8, we just need p0, p1, q0, q1 + MSA_TRANSPOSE8x4_B(v16u8, p1_e, p0_e, q0_e, q1_e, t0, t1, t2, t3, + p1, p0, q0, q1); + + bDetaP0Q0 = __msa_asub_u_b(p0, q0); + bDetaP1P0 = __msa_asub_u_b(p1, p0); + bDetaQ1Q0 = __msa_asub_u_b(q1, q0); + bDetaP0Q0 = (v16u8)__msa_clt_u_b(bDetaP0Q0, alpha); + bDetaP1P0 = (v16u8)__msa_clt_u_b(bDetaP1P0, beta); + bDetaQ1Q0 = (v16u8)__msa_clt_u_b(bDetaQ1Q0, beta); + + // Unsigned extend p0, p1, q0, q1 from 8 bits to 16 bits + MSA_ILVR_B4(v8i16, zero, p0, zero, p1, zero, q0, zero, q1, + p0_e, p1_e, q0_e, q1_e); + + f = (v16i8)bDetaP0Q0 & (v16i8)bDetaP1P0 & (v16i8)bDetaQ1Q0; + + // iDeta + t0 = (((q0_e - p0_e) << 2) + (p1_e - q1_e) + const_4_h) >> 3; + t0 = __msa_max_s_h(negTc_e, t0); + t0 = __msa_min_s_h(tc_e, t0); + // p0 + t1 = p0_e + t0; + t2 = t1 & const_not_255_h; + t3 = __msa_cle_s_h((v8i16)zero, t1); + flags = (v16i8)__msa_ceq_h(t2, (v8i16)zero); + p0_e = (t1 & (v8i16)flags) + (t3 & (v8i16)(~flags)); + // q0 + t1 = q0_e - t0; + t2 = t1 & const_not_255_h; + t3 = __msa_cle_s_h((v8i16)zero, t1); + flags = (v16i8)__msa_ceq_h(t2, (v8i16)zero); + q0_e = (t1 & (v8i16)flags) + (t3 & (v8i16)(~flags)); + + MSA_PCKEV_B2(v8i16, p0_e, p0_e, q0_e, q0_e, t0, t1); + flags = (v16i8)__msa_cle_s_b(zero, tc); + flags &= f; + p0 = (v16u8)(((v16i8)t0 & flags) + (p0 & (~flags))); + q0 = (v16u8)(((v16i8)t1 & flags) + (q0 & (~flags))); + // Store data to pPixCb + MSA_ILVR_B(v16u8, q0, p0, p0); + MSA_ST_H8(p0, 0, 1, 2, 3, 4, 5, 6, 7, pPixCb - 1, iStride); + + // Cr + // Load data from pPixCr + MSA_LD_V8(v8i16, pPixCr - 2, iStride, p1_e, p0_e, q0_e, q1_e, + t0, t1, t2, t3); + // Transpose 8x4 to 4x8, we just need p0, p1, q0, q1 + MSA_TRANSPOSE8x4_B(v16u8, p1_e, p0_e, q0_e, q1_e, t0, t1, t2, t3, + p1, p0, q0, q1); + + bDetaP0Q0 = __msa_asub_u_b(p0, q0); + bDetaP1P0 = __msa_asub_u_b(p1, p0); + bDetaQ1Q0 = __msa_asub_u_b(q1, q0); + bDetaP0Q0 = (v16u8)__msa_clt_u_b(bDetaP0Q0, alpha); + bDetaP1P0 = (v16u8)__msa_clt_u_b(bDetaP1P0, beta); + bDetaQ1Q0 = (v16u8)__msa_clt_u_b(bDetaQ1Q0, beta); + + // Unsigned extend p0, p1, q0, q1 from 8 bits to 16 bits + MSA_ILVR_B4(v8i16, zero, p0, zero, p1, zero, q0, zero, q1, + p0_e, p1_e, q0_e, q1_e); + + f = (v16i8)bDetaP0Q0 & (v16i8)bDetaP1P0 & (v16i8)bDetaQ1Q0; + + // iDeta + t0 = (((q0_e - p0_e) << 2) + (p1_e - q1_e) + const_4_h) >> 3; + t0 = __msa_max_s_h(negTc_e, t0); + t0 = __msa_min_s_h(tc_e, t0); + // p0 + t1 = p0_e + t0; + t2 = t1 & const_not_255_h; + t3 = __msa_cle_s_h((v8i16)zero, t1); + flags = (v16i8)__msa_ceq_h(t2, (v8i16)zero); + p0_e = (t1 & (v8i16)flags) + (t3 & (v8i16)(~flags)); + // q0 + t1 = q0_e - t0; + t2 = t1 & const_not_255_h; + t3 = __msa_cle_s_h((v8i16)zero, t1); + flags = (v16i8)__msa_ceq_h(t2, (v8i16)zero); + q0_e = (t1 & (v8i16)flags) + (t3 & (v8i16)(~flags)); + + MSA_PCKEV_B2(v8i16, p0_e, p0_e, q0_e, q0_e, t0, t1); + flags = (v16i8)__msa_cle_s_b(zero, tc); + flags &= f; + p0 = (v16u8)(((v16i8)t0 & flags) + (p0 & (~flags))); + q0 = (v16u8)(((v16i8)t1 & flags) + (q0 & (~flags))); + // Store data to pPixCr + MSA_ILVR_B(v16u8, q0, p0, p0); + MSA_ST_H8(p0, 0, 1, 2, 3, 4, 5, 6, 7, pPixCr - 1, iStride); +} + +void DeblockChromaEq4H_msa(uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, + int32_t iAlpha, int32_t iBeta) { + v16u8 p0, p1, q0, q1; + v8i16 p0_e, p1_e, q0_e, q1_e; + v16i8 f; + // Use for temporary variable + v8i16 t0, t1, t2, t3; + v16u8 alpha, beta; + v16u8 bDetaP0Q0, bDetaP1P0, bDetaQ1Q0; + v8i16 const_2_h = __msa_ldi_h(2); + v16i8 zero = { 0 }; + + alpha = (v16u8)__msa_fill_b(iAlpha); + beta = (v16u8)__msa_fill_b(iBeta); + + // Cb + // Load data from pPixCb + MSA_LD_V8(v8i16, pPixCb - 2, iStride, p1_e, p0_e, q0_e, q1_e, + t0, t1, t2, t3); + // Transpose 8x4 to 4x8, we just need p0, p1, q0, q1 + MSA_TRANSPOSE8x4_B(v16u8, p1_e, p0_e, q0_e, q1_e, t0, t1, t2, t3, + p1, p0, q0, q1); + + bDetaP0Q0 = __msa_asub_u_b(p0, q0); + bDetaP1P0 = __msa_asub_u_b(p1, p0); + bDetaQ1Q0 = __msa_asub_u_b(q1, q0); + bDetaP0Q0 = (v16u8)__msa_clt_u_b(bDetaP0Q0, alpha); + bDetaP1P0 = (v16u8)__msa_clt_u_b(bDetaP1P0, beta); + bDetaQ1Q0 = (v16u8)__msa_clt_u_b(bDetaQ1Q0, beta); + + // Unsigned extend p0, p1, q0, q1 from 8 bits to 16 bits + MSA_ILVR_B4(v8i16, zero, p0, zero, p1, zero, q0, zero, q1, + p0_e, p1_e, q0_e, q1_e); + + f = (v16i8)bDetaP0Q0 & (v16i8)bDetaP1P0 & (v16i8)bDetaQ1Q0; + + // p0 + p0_e = ((p1_e << 1) + p0_e + q1_e + const_2_h) >> 2; + // q0 + q0_e = ((q1_e << 1) + q0_e + p1_e + const_2_h) >> 2; + + MSA_PCKEV_B2(v8i16, p0_e, p0_e, q0_e, q0_e, t0, t1); + p0 = (v16u8)(((v16i8)t0 & f) + (p0 & (~f))); + q0 = (v16u8)(((v16i8)t1 & f) + (q0 & (~f))); + // Store data to pPixCb + MSA_ILVR_B(v16u8, q0, p0, p0); + MSA_ST_H8(p0, 0, 1, 2, 3, 4, 5, 6, 7, pPixCb - 1, iStride); + + // Cr + // Load data from pPixCr + MSA_LD_V8(v8i16, pPixCr - 2, iStride, p1_e, p0_e, q0_e, q1_e, + t0, t1, t2, t3); + // Transpose 8x4 to 4x8, we just need p0, p1, q0, q1 + MSA_TRANSPOSE8x4_B(v16u8, p1_e, p0_e, q0_e, q1_e, t0, t1, t2, t3, + p1, p0, q0, q1); + + bDetaP0Q0 = __msa_asub_u_b(p0, q0); + bDetaP1P0 = __msa_asub_u_b(p1, p0); + bDetaQ1Q0 = __msa_asub_u_b(q1, q0); + bDetaP0Q0 = (v16u8)__msa_clt_u_b(bDetaP0Q0, alpha); + bDetaP1P0 = (v16u8)__msa_clt_u_b(bDetaP1P0, beta); + bDetaQ1Q0 = (v16u8)__msa_clt_u_b(bDetaQ1Q0, beta); + + // Unsigned extend p0, p1, q0, q1 from 8 bits to 16 bits + MSA_ILVR_B4(v8i16, zero, p0, zero, p1, zero, q0, zero, q1, + p0_e, p1_e, q0_e, q1_e); + + f = (v16i8)bDetaP0Q0 & (v16i8)bDetaP1P0 & (v16i8)bDetaQ1Q0; + + // p0 + p0_e = ((p1_e << 1) + p0_e + q1_e + const_2_h) >> 2; + // q0 + q0_e = ((q1_e << 1) + q0_e + p1_e + const_2_h) >> 2; + + MSA_PCKEV_B2(v8i16, p0_e, p0_e, q0_e, q0_e, t0, t1); + p0 = (v16u8)(((v16i8)t0 & f) + (p0 & (~f))); + q0 = (v16u8)(((v16i8)t1 & f) + (q0 & (~f))); + // Store data to pPixCr + MSA_ILVR_B(v16u8, q0, p0, p0); + MSA_ST_H8(p0, 0, 1, 2, 3, 4, 5, 6, 7, pPixCr - 1, iStride); +} + +void WelsNonZeroCount_msa(int8_t* pNonZeroCount) { + v16u8 src0, src1; + v16u8 zero = { 0 }; + v16u8 const_1 = (v16u8)__msa_fill_b(0x01); + + MSA_LD_V2(v16u8, pNonZeroCount, 16, src0, src1); + src0 = (v16u8)__msa_ceq_b((v16i8)zero, (v16i8)src0); + src1 = (v16u8)__msa_ceq_b((v16i8)zero, (v16i8)src1); + src0 += const_1; + src1 += const_1; + MSA_ST_V(v16u8, src0, pNonZeroCount); + MSA_ST_D(src1, 0, pNonZeroCount + 16); +} diff --git a/chromium/third_party/openh264/src/codec/common/src/WelsThreadLib.cpp b/chromium/third_party/openh264/src/codec/common/src/WelsThreadLib.cpp index 8aa67f11659..4477e34e34c 100644 --- a/chromium/third_party/openh264/src/codec/common/src/WelsThreadLib.cpp +++ b/chromium/third_party/openh264/src/codec/common/src/WelsThreadLib.cpp @@ -71,18 +71,6 @@ #if defined(_WIN32) || defined(__CYGWIN__) -#ifdef WINAPI_FAMILY -#if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) -#define WP80 - -#define InitializeCriticalSection(x) InitializeCriticalSectionEx(x, 0, 0) -#define GetSystemInfo(x) GetNativeSystemInfo(x) -#define CreateEvent(attr, reset, init, name) CreateEventEx(attr, name, ((reset) ? CREATE_EVENT_MANUAL_RESET : 0) | ((init) ? CREATE_EVENT_INITIAL_SET : 0), EVENT_ALL_ACCESS) -#define WaitForSingleObject(a, b) WaitForSingleObjectEx(a, b, FALSE) -#define WaitForMultipleObjects(a, b, c, d) WaitForMultipleObjectsEx(a, b, c, d, FALSE) -#endif -#endif - WELS_THREAD_ERROR_CODE WelsMutexInit (WELS_MUTEX* mutex) { InitializeCriticalSection (mutex); diff --git a/chromium/third_party/openh264/src/codec/common/src/cpu.cpp b/chromium/third_party/openh264/src/codec/common/src/cpu.cpp index a39fd064578..fb5d3dae4c8 100644 --- a/chromium/third_party/openh264/src/codec/common/src/cpu.cpp +++ b/chromium/third_party/openh264/src/codec/common/src/cpu.cpp @@ -309,12 +309,45 @@ uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) { #elif defined(mips) /* for loongson */ +static uint32_t get_cpu_flags_from_cpuinfo(void) +{ + uint32_t flags = 0; + +# ifdef __linux__ + FILE* fp = fopen("/proc/cpuinfo", "r"); + if (!fp) + return flags; + + char buf[200]; + memset(buf, 0, sizeof(buf)); + while (fgets(buf, sizeof(buf), fp)) { + if (!strncmp(buf, "model name", strlen("model name"))) { + if (strstr(buf, "Loongson-3A") || strstr(buf, "Loongson-3B") || + strstr(buf, "Loongson-2K")) { + flags |= WELS_CPU_MMI; + } + break; + } + } + while (fgets(buf, sizeof(buf), fp)) { + if(!strncmp(buf, "ASEs implemented", strlen("ASEs implemented"))) { + if (strstr(buf, "loongson-mmi") && strstr(buf, "loongson-ext")) { + flags |= WELS_CPU_MMI; + } + if (strstr(buf, "msa")) { + flags |= WELS_CPU_MSA; + } + break; + } + } + fclose(fp); +# endif + + return flags; +} + uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) { -#if defined(HAVE_MMI) - return WELS_CPU_MMI; -#else - return 0; -#endif + return get_cpu_flags_from_cpuinfo(); } #else /* Neither X86_ASM, HAVE_NEON, HAVE_NEON_AARCH64 nor mips */ @@ -324,5 +357,3 @@ uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) { } #endif - - diff --git a/chromium/third_party/openh264/src/codec/common/src/expand_pic.cpp b/chromium/third_party/openh264/src/codec/common/src/expand_pic.cpp index 65ffb3fddc2..fcd96540e92 100644 --- a/chromium/third_party/openh264/src/codec/common/src/expand_pic.cpp +++ b/chromium/third_party/openh264/src/codec/common/src/expand_pic.cpp @@ -33,6 +33,240 @@ #include "expand_pic.h" #include "cpu_core.h" +static inline void MBPadTopLeftLuma_c (uint8_t*& pDst, const int32_t& kiStride) { + const uint8_t kuiTL = pDst[0]; + int32_t i = 0; + uint8_t* pTopLeft = pDst; + do { + pTopLeft -= kiStride; + // pad pTop + memcpy (pTopLeft, pDst, 16); // confirmed_safe_unsafe_usage + memset (pTopLeft - PADDING_LENGTH, kuiTL, PADDING_LENGTH); //pTop left + } while (++i < PADDING_LENGTH); +} + +static inline void MBPadTopLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbX) { + uint8_t* pTopLine = pDst + (kiMbX << 4); + int32_t i = 0; + uint8_t* pTop = pTopLine; + do { + pTop -= kiStride; + // pad pTop + memcpy (pTop, pTopLine, 16); // confirmed_safe_unsafe_usage + } while (++i < PADDING_LENGTH); +} + +static inline void MBPadBottomLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbX, + const int32_t& kiPicH) { + uint8_t* pBottomLine = pDst + (kiPicH - 1) * kiStride + (kiMbX << 4); + int32_t i = 0; + uint8_t* pBottom = pBottomLine; + do { + pBottom += kiStride; + // pad pBottom + memcpy (pBottom, pBottomLine, 16); // confirmed_safe_unsafe_usage + } while (++i < PADDING_LENGTH); +} + +static inline void MBPadTopRightLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW) { + uint8_t* pTopRight = pDst + kiPicW; + const uint8_t kuiTR = pTopRight[-1]; + int32_t i = 0; + uint8_t* pTop = pTopRight; + do { + pTop -= kiStride; + // pad pTop + memcpy (pTop - 16, pTopRight - 16, 16); // confirmed_safe_unsafe_usage + memset (pTop, kuiTR, PADDING_LENGTH); //pTop Right + } while (++i < PADDING_LENGTH); +} + +static inline void MBPadBottomLeftLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicH) { + uint8_t* pDstLastLine = pDst + (kiPicH - 1) * kiStride; + const uint8_t kuiBL = pDstLastLine[0]; + int32_t i = 0; + uint8_t* pBottom = pDstLastLine; + do { + pBottom += kiStride; + // pad pBottom + memcpy (pBottom, pDstLastLine, 16); // confirmed_safe_unsafe_usage + memset (pBottom - PADDING_LENGTH, kuiBL, PADDING_LENGTH); //pBottom left + } while (++i < PADDING_LENGTH); +} + +static inline void MBPadBottomRightLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW, + const int32_t& kiPicH) { + uint8_t* pDstLastLine = pDst + (kiPicH - 1) * kiStride + kiPicW; + const uint8_t kuiBR = pDstLastLine[-1]; + int32_t i = 0; + uint8_t* pBottom = pDstLastLine; + do { + pBottom += kiStride; + // pad pBottom + memcpy (pBottom - 16, pDstLastLine - 16, 16); // confirmed_safe_unsafe_usage + memset (pBottom, kuiBR, PADDING_LENGTH); //pBottom Right + } while (++i < PADDING_LENGTH); +} + +static inline void MBPadLeftLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbY) { + uint8_t* pTmp = pDst + (kiMbY << 4) * kiStride; + for (int32_t i = 0; i < 16; ++i) { + // pad left + memset (pTmp - PADDING_LENGTH, pTmp[0], PADDING_LENGTH); + pTmp += kiStride; + } +} + +static inline void MBPadRightLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbY, + const int32_t& kiPicW) { + uint8_t* pTmp = pDst + (kiMbY << 4) * kiStride + kiPicW; + for (int32_t i = 0; i < 16; ++i) { + // pad right + memset (pTmp, pTmp[-1], PADDING_LENGTH); + pTmp += kiStride; + } +} + +static inline void MBPadTopChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbX) { + uint8_t* pTopLine = pDst + (kiMbX << 3); + int32_t i = 0; + uint8_t* pTop = pTopLine; + do { + pTop -= kiStride; + // pad pTop + memcpy (pTop, pTopLine, 8); // confirmed_safe_unsafe_usage + } while (++i < CHROMA_PADDING_LENGTH); +} + +static inline void MBPadBottomChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbX, + const int32_t& kiPicH) { + uint8_t* pBottomLine = pDst + (kiPicH - 1) * kiStride + (kiMbX << 3); + int32_t i = 0; + uint8_t* pBottom = pBottomLine; + do { + pBottom += kiStride; + // pad pBottom + memcpy (pBottom, pBottomLine, 8); // confirmed_safe_unsafe_usage + } while (++i < CHROMA_PADDING_LENGTH); +} + +static inline void MBPadTopLeftChroma_c (uint8_t*& pDst, const int32_t& kiStride) { + const uint8_t kuiTL = pDst[0]; + int32_t i = 0; + uint8_t* pTopLeft = pDst; + do { + pTopLeft -= kiStride; + // pad pTop + memcpy (pTopLeft, pDst, 8); // confirmed_safe_unsafe_usage + memset (pTopLeft - CHROMA_PADDING_LENGTH, kuiTL, CHROMA_PADDING_LENGTH); //pTop left + } while (++i < CHROMA_PADDING_LENGTH); +} + +static inline void MBPadTopRightChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW) { + uint8_t* pTopRight = pDst + kiPicW; + const uint8_t kuiTR = pTopRight[-1]; + int32_t i = 0; + uint8_t* pTop = pTopRight; + do { + pTop -= kiStride; + // pad pTop + memcpy (pTop - 8, pTopRight - 8, 8); // confirmed_safe_unsafe_usage + memset (pTop, kuiTR, CHROMA_PADDING_LENGTH); //pTop Right + } while (++i < CHROMA_PADDING_LENGTH); +} + +static inline void MBPadBottomLeftChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicH) { + uint8_t* pDstLastLine = pDst + (kiPicH - 1) * kiStride; + const uint8_t kuiBL = pDstLastLine[0]; + int32_t i = 0; + uint8_t* pBottom = pDstLastLine; + do { + pBottom += kiStride; + // pad pBottom + memcpy (pBottom, pDstLastLine, 8); // confirmed_safe_unsafe_usage + memset (pBottom - CHROMA_PADDING_LENGTH, kuiBL, CHROMA_PADDING_LENGTH); //pBottom left + } while (++i < CHROMA_PADDING_LENGTH); +} + +static inline void MBPadBottomRightChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW, + const int32_t kiPicH) { + uint8_t* pDstLastLine = pDst + (kiPicH - 1) * kiStride + kiPicW; + const uint8_t kuiBR = pDstLastLine[-1]; + int32_t i = 0; + uint8_t* pBottom = pDstLastLine; + do { + pBottom += kiStride; + // pad pBottom + memcpy (pBottom - 8, pDstLastLine - 8, 8); // confirmed_safe_unsafe_usage + memset (pBottom, kuiBR, CHROMA_PADDING_LENGTH); //pBottom Right + } while (++i < CHROMA_PADDING_LENGTH); +} + +static inline void MBPadLeftChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbY) { + uint8_t* pTmp = pDst + (kiMbY << 3) * kiStride; + for (int32_t i = 0; i < 8; ++i) { + // pad left + memset (pTmp - CHROMA_PADDING_LENGTH, pTmp[0], CHROMA_PADDING_LENGTH); + pTmp += kiStride; + } +} + +static inline void MBPadRightChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiMbY, + const int32_t& kiPicW) { + uint8_t* pTmp = pDst + (kiMbY << 3) * kiStride + kiPicW; + for (int32_t i = 0; i < 8; ++i) { + // pad right + memset (pTmp, pTmp[-1], CHROMA_PADDING_LENGTH); + pTmp += kiStride; + } +} + +void PadMBLuma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW, const int32_t& kiPicH, + const int32_t& kiMbX, const int32_t& kiMbY, const int32_t& kiMBWidth, const int32_t& kiMBHeight) { + if (kiMbX == 0 && kiMbY == 0) { + MBPadTopLeftLuma_c (pDst, kiStride); + } else if (kiMbY == 0 && kiMbX == kiMBWidth - 1) { + MBPadTopRightLuma_c (pDst, kiStride, kiPicW); + } else if (kiMbY == kiMBHeight - 1 && kiMbX == 0) { + MBPadBottomLeftLuma_c (pDst, kiStride, kiPicH); + } else if (kiMbY == kiMBHeight - 1 && kiMbX == kiMBWidth - 1) { + MBPadBottomRightLuma_c (pDst, kiStride, kiPicW, kiPicH); + } + if (kiMbX == 0) { + MBPadLeftLuma_c (pDst, kiStride, kiMbY); + } else if (kiMbX == kiMBWidth - 1) { + MBPadRightLuma_c (pDst, kiStride, kiMbY, kiPicW); + } + if (kiMbY == 0 && kiMbX > 0 && kiMbX < kiMBWidth - 1) { + MBPadTopLuma_c (pDst, kiStride, kiMbX); + } else if (kiMbY == kiMBHeight - 1 && kiMbX > 0 && kiMbX < kiMBWidth - 1) { + MBPadBottomLuma_c (pDst, kiStride, kiMbX, kiPicH); + } +} + +void PadMBChroma_c (uint8_t*& pDst, const int32_t& kiStride, const int32_t& kiPicW, const int32_t& kiPicH, + const int32_t& kiMbX, const int32_t& kiMbY, const int32_t& kiMBWidth, const int32_t& kiMBHeight) { + if (kiMbX == 0 && kiMbY == 0) { + MBPadTopLeftChroma_c (pDst, kiStride); + } else if (kiMbY == 0 && kiMbX == kiMBWidth - 1) { + MBPadTopRightChroma_c (pDst, kiStride, kiPicW); + } else if (kiMbY == kiMBHeight - 1 && kiMbX == 0) { + MBPadBottomLeftChroma_c (pDst, kiStride, kiPicH); + } else if (kiMbY == kiMBHeight - 1 && kiMbX == kiMBWidth - 1) { + MBPadBottomRightChroma_c (pDst, kiStride, kiPicW, kiPicH); + } + if (kiMbX == 0) { + MBPadLeftChroma_c (pDst, kiStride, kiMbY); + } else if (kiMbX == kiMBWidth - 1) { + MBPadRightChroma_c (pDst, kiStride, kiMbY, kiPicW); + } + if (kiMbY == 0 && kiMbX > 0 && kiMbX < kiMBWidth - 1) { + MBPadTopChroma_c (pDst, kiStride, kiMbX); + } else if (kiMbY == kiMBHeight - 1 && kiMbX > 0 && kiMbX < kiMBWidth - 1) { + MBPadBottomChroma_c (pDst, kiStride, kiMbX, kiPicH); + } +} + // rewrite it (split into luma & chroma) that is helpful for mmx/sse2 optimization perform, 9/27/2009 static inline void ExpandPictureLuma_c (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW, const int32_t kiPicH) { diff --git a/chromium/third_party/openh264/src/codec/common/src/utils.cpp b/chromium/third_party/openh264/src/codec/common/src/utils.cpp index 2edd73a84e5..fc0fbf90470 100644 --- a/chromium/third_party/openh264/src/codec/common/src/utils.cpp +++ b/chromium/third_party/openh264/src/codec/common/src/utils.cpp @@ -76,7 +76,7 @@ void WelsLog (SLogContext* logCtx, int32_t iLevel, const char* kpFmt, ...) { #ifndef CALC_PSNR #define CONST_FACTOR_PSNR (10.0 / log(10.0)) // for good computation -#define CALC_PSNR(w, h, s) ((float)(CONST_FACTOR_PSNR * log( 65025.0 * w * h / iSqe ))) +#define CALC_PSNR(w, h, s) ((float)(CONST_FACTOR_PSNR * log( 65025.0 * w * h / s ))) #endif//CALC_PSNR /* diff --git a/chromium/third_party/openh264/src/codec/common/targets.mk b/chromium/third_party/openh264/src/codec/common/targets.mk index 96843cd9df0..43de4ce4cf4 100644 --- a/chromium/third_party/openh264/src/codec/common/targets.mk +++ b/chromium/third_party/openh264/src/codec/common/targets.mk @@ -66,18 +66,30 @@ COMMON_OBJS += $(COMMON_OBJSARM64) endif OBJS += $(COMMON_OBJSARM64) -COMMON_ASM_MIPS_SRCS=\ +COMMON_ASM_MIPS_MMI_SRCS=\ $(COMMON_SRCDIR)/mips/copy_mb_mmi.c\ $(COMMON_SRCDIR)/mips/deblock_mmi.c\ $(COMMON_SRCDIR)/mips/expand_picture_mmi.c\ $(COMMON_SRCDIR)/mips/intra_pred_com_mmi.c\ $(COMMON_SRCDIR)/mips/satd_sad_mmi.c\ -COMMON_OBJSMIPS += $(COMMON_ASM_MIPS_SRCS:.c=.$(OBJ)) +COMMON_OBJSMIPS_MMI += $(COMMON_ASM_MIPS_MMI_SRCS:.c=.$(OBJ)) + +COMMON_ASM_MIPS_MSA_SRCS=\ + $(COMMON_SRCDIR)/mips/copy_mb_msa.c\ + $(COMMON_SRCDIR)/mips/deblock_msa.c\ + +COMMON_OBJSMIPS_MSA += $(COMMON_ASM_MIPS_MSA_SRCS:.c=.$(OBJ)) ifeq ($(ASM_ARCH), mips) -COMMON_OBJS += $(COMMON_OBJSMIPS) +ifeq ($(ENABLE_MMI), Yes) +COMMON_OBJS += $(COMMON_OBJSMIPS_MMI) +endif +ifeq ($(ENABLE_MSA), Yes) +COMMON_OBJS += $(COMMON_OBJSMIPS_MSA) +endif endif -OBJS += $(COMMON_OBJSMIPS) +OBJS += $(COMMON_OBJSMIPS_MMI) +OBJS += $(COMMON_OBJSMIPS_MSA) OBJS += $(COMMON_OBJS) diff --git a/chromium/third_party/openh264/src/codec/common/x86/asm_inc.asm b/chromium/third_party/openh264/src/codec/common/x86/asm_inc.asm index 11253ce7c7f..56366fb6299 100644 --- a/chromium/third_party/openh264/src/codec/common/x86/asm_inc.asm +++ b/chromium/third_party/openh264/src/codec/common/x86/asm_inc.asm @@ -485,15 +485,20 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits ; Mark the stack as non- %endmacro %macro WELS_EXTERN 1 - %ifndef WELS_PRIVATE_EXTERN - %define WELS_PRIVATE_EXTERN - %endif ALIGN 16, nop %ifdef PREFIX - global _%1 WELS_PRIVATE_EXTERN + %ifdef WELS_PRIVATE_EXTERN + global _%1: WELS_PRIVATE_EXTERN + %else + global _%1 + %endif %define %1 _%1 %else - global %1 WELS_PRIVATE_EXTERN + %ifdef WELS_PRIVATE_EXTERN + global %1: WELS_PRIVATE_EXTERN + %else + global %1 + %endif %endif %1: %endmacro diff --git a/chromium/third_party/openh264/src/codec/console/dec/src/h264dec.cpp b/chromium/third_party/openh264/src/codec/console/dec/src/h264dec.cpp index 88223dce772..0b59ec105a7 100644 --- a/chromium/third_party/openh264/src/codec/console/dec/src/h264dec.cpp +++ b/chromium/third_party/openh264/src/codec/console/dec/src/h264dec.cpp @@ -52,7 +52,6 @@ #include "measure_time.h" #include "d3d9_utils.h" - using namespace std; #if defined (WINDOWS_PHONE) @@ -69,6 +68,105 @@ int g_iDecodedFrameNum = 0; #endif //using namespace WelsDec; +int32_t readPicture (uint8_t* pBuf, const int32_t& iFileSize, const int32_t& bufPos, uint8_t*& pSpsBuf, + int32_t& sps_byte_count) { + int32_t bytes_available = iFileSize - bufPos; + if (bytes_available < 4) { + return bytes_available; + } + uint8_t* ptr = pBuf + bufPos; + int32_t read_bytes = 0; + int32_t sps_count = 0; + int32_t pps_count = 0; + int32_t non_idr_pict_count = 0; + int32_t idr_pict_count = 0; + pSpsBuf = NULL; + sps_byte_count = 0; + while (read_bytes < bytes_available - 4) { + bool has4ByteStartCode = ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 1; + bool has3ByteStartCode = false; + if (!has4ByteStartCode) { + has3ByteStartCode = ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 1; + } + if (has4ByteStartCode || has3ByteStartCode) { + uint8_t nal_unit_type = has4ByteStartCode ? (ptr[4] & 0x1F) : (ptr[3] & 0x1F); + if (nal_unit_type == 1) { + if (++non_idr_pict_count == 1 && idr_pict_count == 1) { + return read_bytes; + } + if (non_idr_pict_count == 2) { + return read_bytes; + } + } else if (nal_unit_type == 5) { + if (++idr_pict_count == 1 && non_idr_pict_count == 1) { + return read_bytes; + } + if (idr_pict_count == 2) { + return read_bytes; + } + } else if (nal_unit_type == 7) { + pSpsBuf = ptr + (has4ByteStartCode ? 4 : 3); + if ((++sps_count == 1) && (non_idr_pict_count == 1 || idr_pict_count == 1)) { + return read_bytes; + } + } else if (nal_unit_type == 8) { + if (++pps_count == 1 && sps_count == 1) { + sps_byte_count = int32_t (ptr - pSpsBuf); + } + } + if (read_bytes >= bytes_available - 4) { + return bytes_available; + } + read_bytes += 4; + ptr += 4; + } else { + ++ptr; + ++read_bytes; + } + } + return bytes_available; +} + +void FlushFrames (ISVCDecoder* pDecoder, int64_t& iTotal, FILE* pYuvFile, FILE* pOptionFile, int32_t& iFrameCount, + unsigned long long& uiTimeStamp, int32_t& iWidth, int32_t& iHeight, int32_t& iLastWidth, int32_t iLastHeight) { + uint8_t* pData[3] = { NULL }; + uint8_t* pDst[3] = { NULL }; + SBufferInfo sDstBufInfo; + int32_t num_of_frames_in_buffer = 0; + CUtils cOutputModule; + pDecoder->GetOption (DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER, &num_of_frames_in_buffer); + for (int32_t i = 0; i < num_of_frames_in_buffer; ++i) { + int64_t iStart = WelsTime(); + pData[0] = NULL; + pData[1] = NULL; + pData[2] = NULL; + memset (&sDstBufInfo, 0, sizeof (SBufferInfo)); + sDstBufInfo.uiInBsTimeStamp = uiTimeStamp; + pDecoder->FlushFrame (pData, &sDstBufInfo); + if (sDstBufInfo.iBufferStatus == 1) { + pDst[0] = sDstBufInfo.pDst[0]; + pDst[1] = sDstBufInfo.pDst[1]; + pDst[2] = sDstBufInfo.pDst[2]; + } + int64_t iEnd = WelsTime(); + iTotal += iEnd - iStart; + if (sDstBufInfo.iBufferStatus == 1) { + cOutputModule.Process ((void**)pDst, &sDstBufInfo, pYuvFile); + iWidth = sDstBufInfo.UsrData.sSystemBuffer.iWidth; + iHeight = sDstBufInfo.UsrData.sSystemBuffer.iHeight; + if (pOptionFile != NULL) { + if (iWidth != iLastWidth && iHeight != iLastHeight) { + fwrite (&iFrameCount, sizeof (iFrameCount), 1, pOptionFile); + fwrite (&iWidth, sizeof (iWidth), 1, pOptionFile); + fwrite (&iHeight, sizeof (iHeight), 1, pOptionFile); + iLastWidth = iWidth; + iLastHeight = iHeight; + } + } + ++iFrameCount; + } + } +} void H264DecodeInstance (ISVCDecoder* pDecoder, const char* kpH264FileName, const char* kpOuputFileName, int32_t& iWidth, int32_t& iHeight, const char* pOptionFileName, const char* pLengthFileName, int32_t iErrorConMethod, @@ -95,14 +193,17 @@ void H264DecodeInstance (ISVCDecoder* pDecoder, const char* kpH264FileName, cons int32_t iBufPos = 0; int32_t iFileSize; - int32_t i = 0; int32_t iLastWidth = 0, iLastHeight = 0; int32_t iFrameCount = 0; int32_t iEndOfStreamFlag = 0; - int32_t num_of_frames_in_buffer = 0; pDecoder->SetOption (DECODER_OPTION_ERROR_CON_IDC, &iErrorConMethod); CUtils cOutputModule; double dElapsed = 0; + uint8_t uLastSpsBuf[32]; + int32_t iLastSpsByteCount = 0; + + int32_t iThreadCount = 1; + pDecoder->GetOption (DECODER_OPTION_NUM_OF_THREADS, &iThreadCount); if (kpH264FileName) { pH264File = fopen (kpH264FileName, "rb"); @@ -148,7 +249,7 @@ void H264DecodeInstance (ISVCDecoder* pDecoder, const char* kpH264FileName, cons fseek (pH264File, 0L, SEEK_END); iFileSize = (int32_t) ftell (pH264File); - if (iFileSize <= 0) { + if (iFileSize <= 4) { fprintf (stderr, "Current Bit Stream File is too small, read error!!!!\n"); goto label_exit; } @@ -181,13 +282,32 @@ void H264DecodeInstance (ISVCDecoder* pDecoder, const char* kpH264FileName, cons goto label_exit; iSliceSize = static_cast<int32_t> (pInfo[2]); } else { - for (i = 0; i < iFileSize; i++) { - if ((pBuf[iBufPos + i] == 0 && pBuf[iBufPos + i + 1] == 0 && pBuf[iBufPos + i + 2] == 0 && pBuf[iBufPos + i + 3] == 1 - && i > 0) || (pBuf[iBufPos + i] == 0 && pBuf[iBufPos + i + 1] == 0 && pBuf[iBufPos + i + 2] == 1 && i > 0)) { - break; + if (iThreadCount >= 1) { + uint8_t* uSpsPtr = NULL; + int32_t iSpsByteCount = 0; + iSliceSize = readPicture (pBuf, iFileSize, iBufPos, uSpsPtr, iSpsByteCount); + if (iLastSpsByteCount > 0 && iSpsByteCount > 0) { + if (iSpsByteCount != iLastSpsByteCount || memcmp (uSpsPtr, uLastSpsBuf, iLastSpsByteCount) != 0) { + //whenever new sequence is different from preceding sequence. All pending frames must be flushed out before the new sequence can start to decode. + FlushFrames (pDecoder, iTotal, pYuvFile, pOptionFile, iFrameCount, uiTimeStamp, iWidth, iHeight, iLastWidth, + iLastHeight); + } + } + if (iSpsByteCount > 0 && uSpsPtr != NULL) { + if (iSpsByteCount > 32) iSpsByteCount = 32; + iLastSpsByteCount = iSpsByteCount; + memcpy (uLastSpsBuf, uSpsPtr, iSpsByteCount); + } + } else { + int i = 0; + for (i = 0; i < iFileSize; i++) { + if ((pBuf[iBufPos + i] == 0 && pBuf[iBufPos + i + 1] == 0 && pBuf[iBufPos + i + 2] == 0 && pBuf[iBufPos + i + 3] == 1 + && i > 0) || (pBuf[iBufPos + i] == 0 && pBuf[iBufPos + i + 1] == 0 && pBuf[iBufPos + i + 2] == 1 && i > 0)) { + break; + } } + iSliceSize = i; } - iSliceSize = i; } if (iSliceSize < 4) { //too small size, no effective data, ignore iBufPos += iSliceSize; @@ -225,9 +345,9 @@ void H264DecodeInstance (ISVCDecoder* pDecoder, const char* kpH264FileName, cons } if (sDstBufInfo.iBufferStatus == 1) { - pDst[0] = pData[0]; - pDst[1] = pData[1]; - pDst[2] = pData[2]; + pDst[0] = sDstBufInfo.pDst[0]; + pDst[1] = sDstBufInfo.pDst[1]; + pDst[2] = sDstBufInfo.pDst[2]; } iEnd = WelsTime(); iTotal += iEnd - iStart; @@ -257,9 +377,9 @@ void H264DecodeInstance (ISVCDecoder* pDecoder, const char* kpH264FileName, cons sDstBufInfo.uiInBsTimeStamp = uiTimeStamp; pDecoder->DecodeFrame2 (NULL, 0, pData, &sDstBufInfo); if (sDstBufInfo.iBufferStatus == 1) { - pDst[0] = pData[0]; - pDst[1] = pData[1]; - pDst[2] = pData[2]; + pDst[0] = sDstBufInfo.pDst[0]; + pDst[1] = sDstBufInfo.pDst[1]; + pDst[2] = sDstBufInfo.pDst[2]; } iEnd = WelsTime(); iTotal += iEnd - iStart; @@ -283,41 +403,8 @@ void H264DecodeInstance (ISVCDecoder* pDecoder, const char* kpH264FileName, cons iBufPos += iSliceSize; ++ iSliceIndex; } - - pDecoder->GetOption (DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER, &num_of_frames_in_buffer); - for (int32_t i = 0; i < num_of_frames_in_buffer; ++i) { - iStart = WelsTime(); - pData[0] = NULL; - pData[1] = NULL; - pData[2] = NULL; - memset (&sDstBufInfo, 0, sizeof (SBufferInfo)); - sDstBufInfo.uiInBsTimeStamp = uiTimeStamp; - sDstBufInfo.iBufferStatus = 1; - pDecoder->FlushFrame (pData, &sDstBufInfo); - if (sDstBufInfo.iBufferStatus == 1) { - pDst[0] = pData[0]; - pDst[1] = pData[1]; - pDst[2] = pData[2]; - } - iEnd = WelsTime(); - iTotal += iEnd - iStart; - if (sDstBufInfo.iBufferStatus == 1) { - cOutputModule.Process ((void**)pDst, &sDstBufInfo, pYuvFile); - iWidth = sDstBufInfo.UsrData.sSystemBuffer.iWidth; - iHeight = sDstBufInfo.UsrData.sSystemBuffer.iHeight; - - if (pOptionFile != NULL) { - if (iWidth != iLastWidth && iHeight != iLastHeight) { - fwrite (&iFrameCount, sizeof (iFrameCount), 1, pOptionFile); - fwrite (&iWidth, sizeof (iWidth), 1, pOptionFile); - fwrite (&iHeight, sizeof (iHeight), 1, pOptionFile); - iLastWidth = iWidth; - iLastHeight = iHeight; - } - } - ++iFrameCount; - } - } + FlushFrames (pDecoder, iTotal, pYuvFile, pOptionFile, iFrameCount, uiTimeStamp, iWidth, iHeight, iLastWidth, + iLastHeight); dElapsed = iTotal / 1e6; fprintf (stderr, "-------------------------------------------------------\n"); fprintf (stderr, "iWidth:\t\t%d\nheight:\t\t%d\nFrames:\t\t%d\ndecode time:\t%f sec\nFPS:\t\t%f fps\n", @@ -489,6 +576,9 @@ int32_t main (int32_t iArgC, char* pArgV[]) { pDecoder->SetOption (DECODER_OPTION_TRACE_LEVEL, &iLevelSetting); } + int32_t iThreadCount = 0; + pDecoder->SetOption (DECODER_OPTION_NUM_OF_THREADS, &iThreadCount); + if (pDecoder->Initialize (&sDecParam)) { printf ("Decoder initialization failed.\n"); return 1; diff --git a/chromium/third_party/openh264/src/codec/decoder/core/inc/deblocking.h b/chromium/third_party/openh264/src/codec/decoder/core/inc/deblocking.h index 3fb3a2c7dd4..e84114a83cd 100644 --- a/chromium/third_party/openh264/src/codec/decoder/core/inc/deblocking.h +++ b/chromium/third_party/openh264/src/codec/decoder/core/inc/deblocking.h @@ -67,6 +67,26 @@ void DeblockingInit (PDeblockingFunc pDeblockingFunc, int32_t iCpu); void WelsDeblockingFilterSlice (PWelsDecoderContext pCtx, PDeblockingFilterMbFunc pDeblockMb); /*! +* \brief AVC slice init deblocking filtering target layer +* +* \in and out param SDeblockingFilter +* \in and out param iFilterIdc +* +* \return NONE +*/ +void WelsDeblockingInitFilter (PWelsDecoderContext pCtx, SDeblockingFilter& pFilter, int32_t& iFilterIdc); + +/*! +* \brief AVC MB deblocking filtering target layer +* +* \param DqLayer which has the current location of MB to be deblocked. +* +* \return NONE +*/ +void WelsDeblockingFilterMB (PDqLayer pCurDqLayer, SDeblockingFilter& pFilter, int32_t& iFilterIdc, + PDeblockingFilterMbFunc pDeblockMb); + +/*! * \brief pixel deblocking filtering * * \param filter deblocking filter @@ -77,16 +97,21 @@ void WelsDeblockingFilterSlice (PWelsDecoderContext pCtx, PDeblockingFilterMbFun * \return NONE */ -uint32_t DeblockingBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdge, int32_t iNeighMb, int32_t iMbXy); +uint32_t DeblockingBsMarginalMBAvcbase (PDeblockingFilter pFilter, PDqLayer pCurDqLayer, int32_t iEdge, + int32_t iNeighMb, int32_t iMbXy); uint32_t DeblockingBSliceBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdge, int32_t iNeighMb, int32_t iMbXy); int32_t DeblockingAvailableNoInterlayer (PDqLayer pCurDqLayer, int32_t iFilterIdc); -void DeblockingIntraMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int32_t iBoundryFlag); -void DeblockingInterMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, uint8_t nBS[2][4][4], int32_t iBoundryFlag); - void WelsDeblockingMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int32_t iBoundryFlag); +inline int8_t* GetPNzc (PDqLayer pCurDqLayer, int32_t iMbXy) { + if (pCurDqLayer->pDec != NULL && pCurDqLayer->pDec->pNzc != NULL) { + return pCurDqLayer->pDec->pNzc[iMbXy]; + } + return pCurDqLayer->pNzc[iMbXy]; +} + } // namespace WelsDec #endif //WELS_DEBLOCKING_H__ diff --git a/chromium/third_party/openh264/src/codec/decoder/core/inc/decode_slice.h b/chromium/third_party/openh264/src/codec/decoder/core/inc/decode_slice.h index 23d27591f66..f7197abbf13 100644 --- a/chromium/third_party/openh264/src/codec/decoder/core/inc/decode_slice.h +++ b/chromium/third_party/openh264/src/codec/decoder/core/inc/decode_slice.h @@ -42,6 +42,10 @@ int32_t WelsDecodeMbCavlcISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uin int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx); int32_t WelsDecodeMbCavlcPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag); + +int32_t WelsActualDecodeMbCavlcBSlice (PWelsDecoderContext pCtx); +int32_t WelsDecodeMbCavlcBSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag); + typedef int32_t (*PWelsDecMbFunc) (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag); int32_t WelsDecodeMbCabacISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag); @@ -54,17 +58,18 @@ int32_t WelsDecodeMbCabacBSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv int32_t WelsTargetSliceConstruction (PWelsDecoderContext pCtx); //construction based on slice int32_t WelsDecodeSlice (PWelsDecoderContext pCtx, bool bFirstSliceInLayer, PNalUnit pNalCur); +int32_t WelsDecodeAndConstructSlice (PWelsDecoderContext pCtx); int32_t WelsTargetMbConstruction (PWelsDecoderContext pCtx); -int32_t WelsMbIntraPredictionConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer, bool bOutput); -int32_t WelsMbInterSampleConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer, +int32_t WelsMbIntraPredictionConstruction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer, bool bOutput); +int32_t WelsMbInterSampleConstruction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer, uint8_t* pDstY, uint8_t* pDstU, uint8_t* pDstV, int32_t iStrideL, int32_t iStrideC); -int32_t WelsMbInterConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer); +int32_t WelsMbInterConstruction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer); void WelsLumaDcDequantIdct (int16_t* pBlock, int32_t iQp, PWelsDecoderContext pCtx); -int32_t WelsMbInterPrediction (PWelsDecoderContext pCtx, PDqLayer pCurLayer); +int32_t WelsMbInterPrediction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer); void WelsChromaDcIdct (int16_t* pBlock); -bool ComputeColocated (PWelsDecoderContext pCtx); +bool ComputeColocatedTemporalScaling (PWelsDecoderContext pCtx); #ifdef __cplusplus extern "C" { diff --git a/chromium/third_party/openh264/src/codec/decoder/core/inc/decoder.h b/chromium/third_party/openh264/src/codec/decoder/core/inc/decoder.h index 3cc514faa2d..0c84739d51e 100644 --- a/chromium/third_party/openh264/src/codec/decoder/core/inc/decoder.h +++ b/chromium/third_party/openh264/src/codec/decoder/core/inc/decoder.h @@ -59,6 +59,21 @@ int32_t DecoderConfigParam (PWelsDecoderContext pCtx, const SDecodingParam* kpPa */ void WelsDecoderDefaults (PWelsDecoderContext pCtx, SLogContext* pLogCtx); +/* +* fill last decoded picture info +*/ +void WelsDecoderLastDecPicInfoDefaults (SWelsLastDecPicInfo& sLastDecPicInfo); + +/*! +* \brief fill data fields in SPS and PPS default for decoder context +*/ +void WelsDecoderSpsPpsDefaults (SWelsDecoderSpsPpsCTX& sSpsPpsCtx); + +/*! +* \brief copy SpsPps from one Ctx to another ctx for threaded code +*/ +void CopySpsPps (PWelsDecoderContext pFromCtx, PWelsDecoderContext pToCtx); + /*! ************************************************************************************* * \brief Initialize Wels decoder parameters and memory @@ -157,7 +172,11 @@ void UpdateDecStatNoFreezingInfo (PWelsDecoderContext pCtx); //update decoder statistics information void UpdateDecStat (PWelsDecoderContext pCtx, const bool kbOutput); //Destroy picutre buffer -void DestroyPicBuff (PPicBuff* ppPicBuf, CMemoryAlign* pMa); +void DestroyPicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, CMemoryAlign* pMa); +//reset picture reodering buffer list +void ResetReorderingPictureBuffers (PPictReoderingStatus pPictReoderingStatus, PPictInfo pPictInfo, + const bool& bFullReset); + #ifdef __cplusplus } #endif//__cplusplus diff --git a/chromium/third_party/openh264/src/codec/decoder/core/inc/decoder_context.h b/chromium/third_party/openh264/src/codec/decoder/core/inc/decoder_context.h index 0b9401de13c..2ad2aeda2f8 100644 --- a/chromium/third_party/openh264/src/codec/decoder/core/inc/decoder_context.h +++ b/chromium/third_party/openh264/src/codec/decoder/core/inc/decoder_context.h @@ -56,6 +56,7 @@ #include "expand_pic.h" #include "mc.h" #include "memory_align.h" +#include "wels_decoder_thread.h" namespace WelsDec { #define MAX_PRED_MODE_ID_I16x16 3 @@ -64,6 +65,7 @@ namespace WelsDec { #define WELS_QP_MAX 51 #define LONG_TERM_REF +#define IMinInt32 -0x7FFFFFFF typedef struct SWels_Cabac_Element { uint8_t uiState; uint8_t uiMPS; @@ -172,6 +174,7 @@ typedef struct tagDeblockingFilter { int8_t iChromaQP[2]; int8_t iLumaQP; struct TagDeblockingFunc* pLoopf; + PPicture* pRefPics[LIST_A]; } SDeblockingFilter, *PDeblockingFilter; typedef void (*PDeblockingFilterMbFunc) (PDqLayer pCurDqLayer, PDeblockingFilter filter, int32_t boundry_flag); @@ -214,7 +217,7 @@ typedef struct TagBlockFunc { } SBlockFunc; typedef void (*PWelsFillNeighborMbInfoIntra4x4Func) (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, - int8_t* pIntraPredMode, PDqLayer pCurLayer); + int8_t* pIntraPredMode, PDqLayer pCurDqLayer); typedef void (*PWelsMapNeighToSample) (PWelsNeighAvail pNeighAvail, int32_t* pSampleAvail); typedef void (*PWelsMap16NeighToSample) (PWelsNeighAvail pNeighAvail, uint8_t* pSampleAvail); typedef int32_t (*PWelsParseIntra4x4ModeFunc) (PWelsNeighAvail pNeighAvail, int8_t* pIntraPredMode, PBitStringAux pBs, @@ -228,6 +231,72 @@ enum { OVERWRITE_SUBSETSPS = 1 << 2 }; + +//Decoder SPS and PPS global CTX +typedef struct tagWelsWelsDecoderSpsPpsCTX { + SPosOffset sFrameCrop; + + SSps sSpsBuffer[MAX_SPS_COUNT + 1]; + SPps sPpsBuffer[MAX_PPS_COUNT + 1]; + + SSubsetSps sSubsetSpsBuffer[MAX_SPS_COUNT + 1]; + SNalUnit sPrefixNal; + + PSps pActiveLayerSps[MAX_LAYER_NUM]; + bool bAvcBasedFlag; // For decoding bitstream: + + // for EC parameter sets + bool bSpsExistAheadFlag; // whether does SPS NAL exist ahead of sequence? + bool bSubspsExistAheadFlag;// whether does Subset SPS NAL exist ahead of sequence? + bool bPpsExistAheadFlag; // whether does PPS NAL exist ahead of sequence? + + int32_t iSpsErrorIgnored; + int32_t iSubSpsErrorIgnored; + int32_t iPpsErrorIgnored; + + bool bSpsAvailFlags[MAX_SPS_COUNT]; + bool bSubspsAvailFlags[MAX_SPS_COUNT]; + bool bPpsAvailFlags[MAX_PPS_COUNT]; + int32_t iPPSLastInvalidId; + int32_t iPPSInvalidNum; + int32_t iSPSLastInvalidId; + int32_t iSPSInvalidNum; + int32_t iSubSPSLastInvalidId; + int32_t iSubSPSInvalidNum; + int32_t iSeqId; //sequence id + int iOverwriteFlags; +} SWelsDecoderSpsPpsCTX, *PWelsDecoderSpsPpsCTX; + +//Last Decoded Picture Info +typedef struct tagSWelsLastDecPicInfo { + // Save the last nal header info + SNalUnitHeaderExt sLastNalHdrExt; + SSliceHeader sLastSliceHeader; + int32_t iPrevPicOrderCntMsb; + int32_t iPrevPicOrderCntLsb; + PPicture pPreviousDecodedPictureInDpb; //pointer to previously decoded picture in DPB for error concealment + int32_t iPrevFrameNum;// frame number of previous frame well decoded for non-truncated mode yet + bool bLastHasMmco5; + uint32_t uiDecodingTimeStamp; //represent relative decoding time stamps +} SWelsLastDecPicInfo, *PWelsLastDecPicInfo; + +typedef struct tagPictInfo { + SBufferInfo sBufferInfo; + int32_t iPOC; + int32_t iPicBuffIdx; + uint32_t uiDecodingTimeStamp; + bool bLastGOP; +} SPictInfo, *PPictInfo; + +typedef struct tagPictReoderingStatus { + int32_t iPictInfoIndex; + int32_t iMinPOC; + int32_t iNumOfPicts; + int32_t iLastGOPRemainPicts; + int32_t iLastWrittenPOC; + int32_t iLargestBufferedPicIndex; +} SPictReoderingStatus, *PPictReoderingStatus; + /* * SWelsDecoderContext: to maintail all modules data over decoder@framework */ @@ -262,9 +331,6 @@ typedef struct TagWelsDecoderContext { EWelsSliceType eSliceType; // Slice type bool bUsedAsRef; //flag as ref int32_t iFrameNum; - int32_t - iPrevFrameNum; // frame number of previous frame well decoded for non-truncated mode yet - bool bLastHasMmco5; // int32_t iErrorCode; // error code return while decoding in case packets lost SFmo sFmoList[MAX_PPS_COUNT]; // list for FMO storage PFmo pFmo; // current fmo context after parsed slice_header @@ -304,6 +370,7 @@ typedef struct TagWelsDecoderContext { uint32_t iMbHeight; } sMb; + // reconstruction picture PPicture pDec; //pointer to current picture being reconstructed @@ -312,65 +379,44 @@ typedef struct TagWelsDecoderContext { // reference pictures SRefPic sRefPic; - - SVlcTable sVlcTable; // vlc table + SRefPic sTmpRefPic; //used to temporarily save RefPic for next active thread + SVlcTable* pVlcTable; // vlc table SBitStringAux sBs; int32_t iMaxBsBufferSizeInByte; //actual memory size for BS buffer /* Global memory external */ + SWelsDecoderSpsPpsCTX sSpsPpsCtx; + bool bHasNewSps; SPosOffset sFrameCrop; - SSps sSpsBuffer[MAX_SPS_COUNT + 1]; - SPps sPpsBuffer[MAX_PPS_COUNT + 1]; PSliceHeader pSliceHeader; PPicBuff pPicBuff; // Initially allocated memory for pictures which are used in decoding. int32_t iPicQueueNumber; - SSubsetSps sSubsetSpsBuffer[MAX_SPS_COUNT + 1]; - SNalUnit sPrefixNal; - PAccessUnit pAccessUnitList; // current access unit list to be performed - PSps pActiveLayerSps[MAX_LAYER_NUM]; + //PSps pActiveLayerSps[MAX_LAYER_NUM]; PSps pSps; // used by current AU PPps pPps; // used by current AU // Memory for pAccessUnitList is dynamically held till decoder destruction. PDqLayer pCurDqLayer; // current DQ layer representation, also carry reference base layer if applicable PDqLayer pDqLayersList[LAYER_NUM_EXCHANGEABLE]; // DQ layers list with memory allocated - + PNalUnit pNalCur; // point to current NAL Nnit + uint8_t uiNalRefIdc; // NalRefIdc for easy access; int32_t iPicWidthReq; // picture width have requested the memory int32_t iPicHeightReq; // picture height have requested the memory uint8_t uiTargetDqId; // maximal DQ ID in current access unit, meaning target layer ID - bool bAvcBasedFlag; // For decoding bitstream: + //bool bAvcBasedFlag; // For decoding bitstream: bool bEndOfStreamFlag; // Flag on end of stream requested by external application layer bool bInstantDecFlag; // Flag for no-delay decoding bool bInitialDqLayersMem; // dq layers related memory is available? bool bOnlyOneLayerInCurAuFlag; //only one layer in current AU: 1 -// for EC parameter sets - bool bSpsExistAheadFlag; // whether does SPS NAL exist ahead of sequence? - bool bSubspsExistAheadFlag;// whether does Subset SPS NAL exist ahead of sequence? - bool bPpsExistAheadFlag; // whether does PPS NAL exist ahead of sequence? - - int32_t iSpsErrorIgnored; - int32_t iSubSpsErrorIgnored; - int32_t iPpsErrorIgnored; - - bool bSpsAvailFlags[MAX_SPS_COUNT]; - bool bSubspsAvailFlags[MAX_SPS_COUNT]; - bool bPpsAvailFlags[MAX_PPS_COUNT]; - int32_t iPPSLastInvalidId; - int32_t iPPSInvalidNum; - int32_t iSPSLastInvalidId; - int32_t iSPSInvalidNum; - int32_t iSubSPSLastInvalidId; - int32_t iSubSPSInvalidNum; - bool bReferenceLostAtT0Flag; int32_t iTotalNumMbRec; //record current number of decoded MB #ifdef LONG_TERM_REF @@ -384,7 +430,6 @@ typedef struct TagWelsDecoderContext { #endif bool bNewSeqBegin; bool bNextNewSeqBegin; - int iOverwriteFlags; //for Parse only bool bFramePending; @@ -396,7 +441,7 @@ typedef struct TagWelsDecoderContext { SPpsBsInfo sPpsBsInfo [MAX_PPS_COUNT]; SParserBsInfo* pParserBsInfo; - PPicture pPreviousDecodedPictureInDpb; //pointer to previously decoded picture in DPB for error concealment + //PPicture pPreviousDecodedPictureInDpb; //pointer to previously decoded picture in DPB for error concealment PGetIntraPredFunc pGetI16x16LumaPredFunc[7]; //h264_predict_copy_16x16; PGetIntraPredFunc pGetI4x4LumaPredFunc[14]; // h264_predict_4x4_t PGetIntraPredFunc pGetIChromaPredFunc[7]; // h264_predict_8x8_t @@ -436,18 +481,14 @@ typedef struct TagWelsDecoderContext { //trace handle void* pTraceHandle; -//Save the last nal header info - SNalUnitHeaderExt sLastNalHdrExt; - SSliceHeader sLastSliceHeader; - int32_t iPrevPicOrderCntMsb; - int32_t iPrevPicOrderCntLsb; + PWelsLastDecPicInfo pLastDecPicInfo; SWelsCabacCtx sWelsCabacContexts[4][WELS_QP_MAX + 1][WELS_CONTEXT_COUNT]; bool bCabacInited; SWelsCabacCtx pCabacCtx[WELS_CONTEXT_COUNT]; PWelsCabacDecEngine pCabacDecEngine; double dDecTime; - SDecoderStatistics sDecoderStatistics;// For real time debugging + SDecoderStatistics* pDecoderStatistics; // For real time debugging int32_t iMbEcedNum; int32_t iMbEcedPropNum; int32_t iMbNum; @@ -456,6 +497,7 @@ typedef struct TagWelsDecoderContext { int32_t iECMVs[16][2]; PPicture pECRefPic[16]; unsigned long long uiTimeStamp; + uint32_t uiDecodingTimeStamp; //represent relative decoding time stamps // To support scaling list HP uint16_t pDequant_coeff_buffer4x4[6][52][16]; uint16_t pDequant_coeff_buffer8x8[6][52][64]; @@ -465,15 +507,56 @@ typedef struct TagWelsDecoderContext { bool bDequantCoeff4x4Init; bool bUseScalingList; CMemoryAlign* pMemAlign; + void* pThreadCtx; + void* pLastThreadCtx; + WELS_MUTEX* pCsDecoder; + int16_t lastReadyHeightOffset[LIST_A][MAX_REF_PIC_COUNT]; //last ready reference MB offset + PPictInfo pPictInfoList; + PPictReoderingStatus pPictReoderingStatus; } SWelsDecoderContext, *PWelsDecoderContext; +typedef struct tagSWelsDecThread { + SWelsDecSemphore* sIsBusy; + SWelsDecSemphore sIsActivated; + SWelsDecSemphore sIsIdle; + SWelsDecThread sThrHandle; + uint32_t uiCommand; + uint32_t uiThrNum; + uint32_t uiThrMaxNum; + uint32_t uiThrStackSize; + DECLARE_PROCTHREAD_PTR (pThrProcMain); +} SWelsDecThreadInfo, *PWelsDecThreadInfo; + +typedef struct tagSWelsDecThreadCtx { + SWelsDecThreadInfo sThreadInfo; + PWelsDecoderContext pCtx; + void* threadCtxOwner; + uint8_t* kpSrc; + int32_t kiSrcLen; + uint8_t** ppDst; + SBufferInfo sDstInfo; + PPicture pDec; + SWelsDecEvent sImageReady; + SWelsDecEvent sSliceDecodeStart; + SWelsDecEvent sSliceDecodeFinish; + int32_t iPicBuffIdx; //picBuff Index +} SWelsDecoderThreadCTX, *PWelsDecoderThreadCTX; + static inline void ResetActiveSPSForEachLayer (PWelsDecoderContext pCtx) { if (pCtx->iTotalNumMbRec == 0) { for (int i = 0; i < MAX_LAYER_NUM; i++) { - pCtx->pActiveLayerSps[i] = NULL; + pCtx->sSpsPpsCtx.pActiveLayerSps[i] = NULL; } } } +static inline int32_t GetThreadCount (PWelsDecoderContext pCtx) { + int32_t iThreadCount = 0; + if (pCtx->pThreadCtx != NULL) { + PWelsDecoderThreadCTX pThreadCtx = (PWelsDecoderThreadCTX)pCtx->pThreadCtx; + iThreadCount = pThreadCtx->sThreadInfo.uiThrMaxNum; + } + return iThreadCount; +} //#ifdef __cplusplus //} //#endif//__cplusplus diff --git a/chromium/third_party/openh264/src/codec/decoder/core/inc/decoder_core.h b/chromium/third_party/openh264/src/codec/decoder/core/inc/decoder_core.h index 41cd1d6a98a..df253399671 100644 --- a/chromium/third_party/openh264/src/codec/decoder/core/inc/decoder_core.h +++ b/chromium/third_party/openh264/src/codec/decoder/core/inc/decoder_core.h @@ -129,6 +129,34 @@ int32_t ParseSliceHeaderSyntaxs (PWelsDecoderContext pCtx, PBitStringAux pBs, co */ bool PrefetchNalHeaderExtSyntax (PWelsDecoderContext pCtx, PNalUnit const kpDst, PNalUnit const kpSrc); +/* +* WelsDecodeInitAccessUnitStart +* check and (re)allocate picture buffers on new sequence begin +* bit_len: size in bit length of data +* buf_len: size in byte length of data +* coded_au: mark an Access Unit decoding finished +* return: +* 0 - success; otherwise returned error_no defined in error_no.h +*/ +int32_t WelsDecodeInitAccessUnitStart (PWelsDecoderContext pCtx, SBufferInfo* pDstInfo); +/* +* AllocPicBuffOnNewSeqBegin +* check and (re)allocate picture buffers on new sequence begin +* return: +* 0 - success; otherwise returned error_no defined in error_no.h +*/ +int32_t AllocPicBuffOnNewSeqBegin (PWelsDecoderContext pCtx); + +/* +* InitConstructAccessUnit +* Init before constructing an access unit for given input bitstream, maybe partial NAL Unit, one or more Units are involved to +* joint a collective access unit. +* parameter\ +* SBufferInfo: Buffer info +* return: +* 0 - success; otherwise returned error_no defined in error_no.h +*/ +int32_t InitConstructAccessUnit (PWelsDecoderContext pCtx, SBufferInfo* pDstInfo); /* * ConstructAccessUnit diff --git a/chromium/third_party/openh264/src/codec/decoder/core/inc/manage_dec_ref.h b/chromium/third_party/openh264/src/codec/decoder/core/inc/manage_dec_ref.h index 7be76f291b1..164ae15d6c7 100644 --- a/chromium/third_party/openh264/src/codec/decoder/core/inc/manage_dec_ref.h +++ b/chromium/third_party/openh264/src/codec/decoder/core/inc/manage_dec_ref.h @@ -48,11 +48,12 @@ namespace WelsDec { void WelsResetRefPic (PWelsDecoderContext pCtx); +void WelsResetRefPicWithoutUnRef (PWelsDecoderContext pCtx); int32_t WelsInitRefList (PWelsDecoderContext pCtx, int32_t iPoc); int32_t WelsInitBSliceRefList (PWelsDecoderContext pCtx, int32_t iPoc); int32_t WelsReorderRefList (PWelsDecoderContext pCtx); int32_t WelsReorderRefList2 (PWelsDecoderContext pCtx); -int32_t WelsMarkAsRef (PWelsDecoderContext pCtx); +int32_t WelsMarkAsRef (PWelsDecoderContext pCtx, PPicture pLastDec = NULL); } // namespace WelsDec diff --git a/chromium/third_party/openh264/src/codec/decoder/core/inc/mv_pred.h b/chromium/third_party/openh264/src/codec/decoder/core/inc/mv_pred.h index daf640155c9..bdbc7cd20b8 100644 --- a/chromium/third_party/openh264/src/codec/decoder/core/inc/mv_pred.h +++ b/chromium/third_party/openh264/src/codec/decoder/core/inc/mv_pred.h @@ -91,7 +91,7 @@ void UpdateP8x16MotionInfo (PDqLayer pCurDqLayer, int16_t iMotionVector[LIST_A][ * \param * \param output iMvp[] */ -void PredPSkipMvFromNeighbor (PDqLayer pCurLayer, int16_t iMvp[2]); +void PredPSkipMvFromNeighbor (PDqLayer pCurDqLayer, int16_t iMvp[2]); /*! * \brief get the motion predictor and reference for B-slice direct mode version 2 @@ -111,7 +111,8 @@ int32_t GetColocatedMb (PWelsDecoderContext pCtx, MbType& mbType, SubMbType& sub /*! * \brief get the motion predictor for B-slice temporal direct mode 16x16 */ -int32_t PredBDirectTemporal (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2], int8_t ref[LIST_A]); +int32_t PredBDirectTemporal (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2], int8_t ref[LIST_A], + SubMbType& subMbType); /*! * \brief get the motion params for B-slice spatial direct mode @@ -143,6 +144,48 @@ void PredInter16x8Mv (int16_t iMotionVector[LIST_A][30][MV_A], int8_t iRefIndex[ void PredInter8x16Mv (int16_t iMotionVector[LIST_A][30][MV_A], int8_t iRefIndex[LIST_A][30], int32_t listIdx, int32_t iPartIdx, int8_t iRef, int16_t iMVP[2]); +/*! +* \brief Fill the spatial direct motion vectors for 8x8 direct MB +* \param +* \param output motion vector cache and motion vector deviation cache +*/ +void FillSpatialDirect8x8Mv (PDqLayer pCurDqLayer, const int16_t& iIdx8, const int8_t& iPartCount, const int8_t& iPartW, + const SubMbType& subMbType, const bool& bIsLongRef, int16_t pMvDirect[LIST_A][2], int8_t iRef[LIST_A], + int16_t pMotionVector[LIST_A][30][MV_A], int16_t pMvdCache[LIST_A][30][MV_A]); + +/*! +* \brief Fill the temporal direct motion vectors for 8x8 direct MB +* \param +* \param output motion vector cache and motion vector deviation cache +*/ +void FillTemporalDirect8x8Mv (PDqLayer pCurDqLayer, const int16_t& iIdx8, const int8_t& iPartCount, + const int8_t& iPartW, + const SubMbType& subMbType, int8_t iRef[LIST_A], int16_t (*mvColoc)[2], + int16_t pMotionVector[LIST_A][30][MV_A], int16_t pMvdCache[LIST_A][30][MV_A]); + +/*! +* \brief returns ref_index in List_0 from the colocated ref_index in LIST_0. +* \param +* returns ref_index in List_0 of ref picture LIST_0 +*/ +int8_t MapColToList0 (PWelsDecoderContext& pCtx, const int8_t& colocRefIndexL0, + const int32_t& ref0Count); //ISO/IEC 14496-10:2009(E) (8-193) + +/*! +* \brief update ref_index cache for current MB, for 8x8 +* \param +* \param +*/ +void Update8x8RefIdx (PDqLayer& pCurDqLayer, const int16_t& iPartIdx, const int32_t& listIdx, const int8_t& iRef); + +inline uint32_t* GetMbType (PDqLayer& pCurDqLayer) { + if (pCurDqLayer->pDec != NULL) { + return pCurDqLayer->pDec->pMbType; + } else { + return pCurDqLayer->pMbType; + } +} + } // namespace WelsDec #endif//WELS_MV_PRED_H__ diff --git a/chromium/third_party/openh264/src/codec/decoder/core/inc/parse_mb_syn_cabac.h b/chromium/third_party/openh264/src/codec/decoder/core/inc/parse_mb_syn_cabac.h index 1aca12002c9..8f31543ae40 100644 --- a/chromium/third_party/openh264/src/codec/decoder/core/inc/parse_mb_syn_cabac.h +++ b/chromium/third_party/openh264/src/codec/decoder/core/inc/parse_mb_syn_cabac.h @@ -82,6 +82,8 @@ void UpdateP8x8RefIdxCabac (PDqLayer pCurDqLayer, int8_t pRefIndex[LIST_A][30 const int8_t iListIdx); void UpdateP8x8DirectCabac (PDqLayer pCurDqLayer, int32_t iPartIdx); void UpdateP16x16DirectCabac (PDqLayer pCurDqLayer); +void UpdateP8x8RefCacheIdxCabac (int8_t pRefIndex[LIST_A][30], const int16_t& iPartIdx, const int32_t& listIdx, + const int8_t& iRef); } //#pragma pack() #endif diff --git a/chromium/third_party/openh264/src/codec/decoder/core/inc/parse_mb_syn_cavlc.h b/chromium/third_party/openh264/src/codec/decoder/core/inc/parse_mb_syn_cavlc.h index ab09bbe0c18..d048551640f 100644 --- a/chromium/third_party/openh264/src/codec/decoder/core/inc/parse_mb_syn_cavlc.h +++ b/chromium/third_party/openh264/src/codec/decoder/core/inc/parse_mb_syn_cavlc.h @@ -51,18 +51,18 @@ namespace WelsDec { -void GetNeighborAvailMbType (PWelsNeighAvail pNeighAvail, PDqLayer pCurLayer); -void WelsFillCacheNonZeroCount (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, PDqLayer pCurLayer); +void GetNeighborAvailMbType (PWelsNeighAvail pNeighAvail, PDqLayer pCurDqLayer); +void WelsFillCacheNonZeroCount (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, PDqLayer pCurDqLayer); void WelsFillCacheConstrain0IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode, - PDqLayer pCurLayer); + PDqLayer pCurDqLayer); void WelsFillCacheConstrain1IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode, - PDqLayer pCurLayer); + PDqLayer pCurDqLayer); void WelsFillCacheInterCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int16_t iMvArray[LIST_A][30][MV_A], int16_t iMvdCache[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], - PDqLayer pCurLayer); -void WelsFillDirectCacheCabac (PWelsNeighAvail pNeighAvail, int8_t iDirect[30], PDqLayer pCurLayer); + PDqLayer pCurDqLayer); +void WelsFillDirectCacheCabac (PWelsNeighAvail pNeighAvail, int8_t iDirect[30], PDqLayer pCurDqLayer); void WelsFillCacheInter (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, - int16_t iMvArray[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PDqLayer pCurLayer); + int16_t iMvArray[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PDqLayer pCurDqLayer); /*! * \brief check iPredMode for intra16x16 eligible or not @@ -131,6 +131,7 @@ int32_t WelsResidualBlockCavlc8x8 (SVlcTable* pVlcTable, */ int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PBitStringAux pBs); - +int32_t ParseInterBInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][MV_A], + int8_t iRefIdxArray[LIST_A][30], PBitStringAux pBs); } // namespace WelsDec #endif//WELS_PARSE_MB_SYN_CAVLC_H__ diff --git a/chromium/third_party/openh264/src/codec/decoder/core/inc/pic_queue.h b/chromium/third_party/openh264/src/codec/decoder/core/inc/pic_queue.h index 7e7e17ff9b7..473f80ff119 100644 --- a/chromium/third_party/openh264/src/codec/decoder/core/inc/pic_queue.h +++ b/chromium/third_party/openh264/src/codec/decoder/core/inc/pic_queue.h @@ -53,6 +53,9 @@ typedef struct TagPicBuff { */ PPicture PrefetchPic (PPicBuff pPicBuff); // To get current node applicable +PPicture PrefetchPicForThread (PPicBuff pPicBuff); // To get current node applicable in the case of threaded mode +PPicture PrefetchLastPicForThread (PPicBuff pPicBuff, + const int32_t& iLast); // To get last node applicable in the case of threaded mode } // namespace WelsDec diff --git a/chromium/third_party/openh264/src/codec/decoder/core/inc/picture.h b/chromium/third_party/openh264/src/codec/decoder/core/inc/picture.h index 46bdd3b360f..bdacc364cf6 100644 --- a/chromium/third_party/openh264/src/codec/decoder/core/inc/picture.h +++ b/chromium/third_party/openh264/src/codec/decoder/core/inc/picture.h @@ -37,6 +37,7 @@ #include "typedefs.h" #include "wels_common_defs.h" #include "wels_const_common.h" +#include "wels_decoder_thread.h" using namespace WelsCommon; @@ -68,8 +69,7 @@ struct SPicture { /*******************************sef_definition for misc use****************************/ bool bUsedAsRef; //for ref pic management bool bIsLongRef; // long term reference frame flag //for ref pic management - uint8_t uiRefCount; - bool bAvailableFlag; // indicate whether it is available in this picture memory block. + int8_t iRefCount; bool bIsComplete; // indicate whether current picture is complete, not from EC /*******************************for future use****************************/ @@ -85,15 +85,22 @@ struct SPicture { int32_t iSpsId; //against mosaic caused by cross-IDR interval reference. int32_t iPpsId; unsigned long long uiTimeStamp; + uint32_t uiDecodingTimeStamp; //represent relative decoding time stamps + int32_t iPicBuffIdx; + EWelsSliceType eSliceType; + bool bIsUngroupedMultiSlice; //multi-slice picture with each each slice group contains one slice. bool bNewSeqBegin; int32_t iMbEcedNum; int32_t iMbEcedPropNum; int32_t iMbNum; + bool* pMbCorrectlyDecodedFlag; + int8_t (*pNzc)[24]; uint32_t* pMbType; // mb type used for direct mode int16_t (*pMv[LIST_A])[MB_BLOCK4x4_NUM][MV_A]; // used for direct mode int8_t (*pRefIndex[LIST_A])[MB_BLOCK4x4_NUM]; //used for direct mode struct SPicture* pRefPic[LIST_A][17]; //ref pictures used for direct mode + SWelsDecEvent* pReadyEvent; //MB line ready event };// "Picture" declaration is comflict with Mac system diff --git a/chromium/third_party/openh264/src/codec/decoder/core/inc/rec_mb.h b/chromium/third_party/openh264/src/codec/decoder/core/inc/rec_mb.h index 8d7a9ea1333..be0c4a7407a 100644 --- a/chromium/third_party/openh264/src/codec/decoder/core/inc/rec_mb.h +++ b/chromium/third_party/openh264/src/codec/decoder/core/inc/rec_mb.h @@ -74,10 +74,11 @@ typedef struct TagMCRefMember { int32_t iPicHeight; } sMCRefMember; -void BaseMC (sMCRefMember* pMCRefMem, int32_t iXOffset, int32_t iYOffset, SMcFunc* pMCFunc, +void BaseMC (PWelsDecoderContext pCtx, sMCRefMember* pMCRefMem, const int32_t& listIdx, const int8_t& iRefIdx, + int32_t iXOffset, int32_t iYOffset, SMcFunc* pMCFunc, int32_t iBlkWidth, int32_t iBlkHeight, int16_t iMVs[2]); -void WelsFillRecNeededMbInfo (PWelsDecoderContext pCtx, bool bOutput, PDqLayer pCurLayer); +void WelsFillRecNeededMbInfo (PWelsDecoderContext pCtx, bool bOutput, PDqLayer pCurDqLayer); int32_t RecI4x4Mb (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer); @@ -93,7 +94,7 @@ int32_t RecI16x16Mb (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLe int32_t RecChroma (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer); -void GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWelsDecoderContext pCtx); +int32_t GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWelsDecoderContext pCtx); int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWelsDecoderContext pCtx); diff --git a/chromium/third_party/openh264/src/codec/decoder/core/inc/slice.h b/chromium/third_party/openh264/src/codec/decoder/core/inc/slice.h index 75ad646b4de..9295eb71b14 100644 --- a/chromium/third_party/openh264/src/codec/decoder/core/inc/slice.h +++ b/chromium/third_party/openh264/src/codec/decoder/core/inc/slice.h @@ -122,7 +122,7 @@ typedef struct TagSliceHeaders { PPps pPps; int32_t iSpsId; int32_t iPpsId; - bool bIdrFlag; + bool bIdrFlag; /*********************got from other layer for efficency if possible*********************/ SRefPicListReorderSyn pRefPicListReordering; // Reference picture list reordering syntaxs diff --git a/chromium/third_party/openh264/src/codec/decoder/core/inc/wels_common_basis.h b/chromium/third_party/openh264/src/codec/decoder/core/inc/wels_common_basis.h index 833bc8bca3e..19aae110e33 100644 --- a/chromium/third_party/openh264/src/codec/decoder/core/inc/wels_common_basis.h +++ b/chromium/third_party/openh264/src/codec/decoder/core/inc/wels_common_basis.h @@ -275,7 +275,7 @@ static const SPartMbInfo g_ksInterBMbTypeInfo[] = { { MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 4, 4 } //B_8x8 }; -//Table 7.17 – Sub-macroblock types in B macroblocks. +//Table 7.17 Sub-macroblock types in B macroblocks. static const SPartMbInfo g_ksInterPSubMbTypeInfo[4] = { {SUB_MB_TYPE_8x8, 1, 2}, {SUB_MB_TYPE_8x4, 2, 2}, @@ -283,7 +283,7 @@ static const SPartMbInfo g_ksInterPSubMbTypeInfo[4] = { {SUB_MB_TYPE_4x4, 4, 1}, }; -//Table 7.18 – Sub-macroblock types in B macroblocks. +//Table 7.18 Sub-macroblock types in B macroblocks. static const SPartMbInfo g_ksInterBSubMbTypeInfo[] = { { MB_TYPE_DIRECT, 1, 2 }, //B_Direct_8x8 { SUB_MB_TYPE_8x8 | MB_TYPE_P0L0, 1, 2 }, //B_L0_8x8 diff --git a/chromium/third_party/openh264/src/codec/decoder/core/inc/wels_decoder_thread.h b/chromium/third_party/openh264/src/codec/decoder/core/inc/wels_decoder_thread.h new file mode 100644 index 00000000000..ebb8015c9ca --- /dev/null +++ b/chromium/third_party/openh264/src/codec/decoder/core/inc/wels_decoder_thread.h @@ -0,0 +1,170 @@ +/*! + * \copy + * Copyright (c) 2009-2019, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file wels_decoder_thread.h + * + * \brief Interfaces introduced in thread programming + * + * \date 08/06/2018 Created + * + ************************************************************************************* + */ + +#ifndef _WELS_DECODER_THREAD_H_ +#define _WELS_DECODER_THREAD_H_ + +#include "WelsThreadLib.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define WELS_DEC_MAX_NUM_CPU 16 +#define WELS_DEC_MAX_THREAD_STACK_SIZE 4096 +#define WELS_DEC_THREAD_COMMAND_RUN 0 +#define WELS_DEC_THREAD_COMMAND_ABORT 1 + +#if defined(_WIN32) || defined(__CYGWIN__) +typedef struct tagWelsDecSemphore { + WELS_THREAD_HANDLE h; +} SWelsDecSemphore; + +typedef struct tagWelsDecEvent { + WELS_THREAD_HANDLE h; + int isSignaled; +} SWelsDecEvent; + +typedef struct tagWelsDecThread { + WELS_THREAD_HANDLE h; +} SWelsDecThread; + +#define WelsDecThreadFunc(fn,a) DWORD WINAPI fn(LPVOID a) +#define WelsDecThreadFuncArg(a) LPWELS_THREAD_ROUTINE a +#define WELS_DEC_THREAD_WAIT_TIMEDOUT WAIT_TIMEOUT +#define WELS_DEC_THREAD_WAIT_SIGNALED WAIT_OBJECT_0 +#define WELS_DEC_THREAD_WAIT_INFINITE INFINITE + +#else // NON-WINDOWS + +typedef pthread_mutexattr_t WELS_MUTEX_ATTR; + +typedef struct tagWelsDecSemphore { + long max; + long v; + WELS_EVENT e; + WELS_MUTEX m; +} SWelsDecSemphore; + +typedef struct tagWelsDecEvent { + int manualReset; + int isSignaled; + pthread_cond_t c; + WELS_MUTEX m; +} SWelsDecEvent; + +typedef struct tagWelsDecThread { + WELS_THREAD_HANDLE h; +} SWelsDecThread; + +#define WelsDecThreadFunc(fn,a) void* fn(void* a) +#define WelsDecThreadFuncArg(a) void* (*a)(void*) + +#define WELS_DEC_THREAD_WAIT_TIMEDOUT ETIMEDOUT +#define WELS_DEC_THREAD_WAIT_SIGNALED EINTR +#define WELS_DEC_THREAD_WAIT_INFINITE -1 + +#endif//_WIN32 + +#define WelsDecThreadReturn WELS_THREAD_ROUTINE_RETURN(0); + +int32_t GetCPUCount(); + +// Event +int EventCreate (SWelsDecEvent* e, int manualReset, int initialState); +void EventPost (SWelsDecEvent* e); +int EventWait (SWelsDecEvent* e, int32_t timeout); +void EventReset (SWelsDecEvent* e); +void EventDestroy (SWelsDecEvent* e); + +// Semaphore +int SemCreate (SWelsDecSemphore* s, long value, long max); +int SemWait (SWelsDecSemphore* s, int32_t timeout); +void SemRelease (SWelsDecSemphore* s, long* prev_count); +void SemDestroy (SWelsDecSemphore* s); + +// Thread +int ThreadCreate (SWelsDecThread* t, LPWELS_THREAD_ROUTINE tf, void* ta); +int ThreadWait (SWelsDecThread* t); + +#define DECLARE_PROCTHREAD(name, argument) \ + WelsDecThreadFunc(name,argument) + +#define DECLARE_PROCTHREAD_PTR(name) \ + LPWELS_THREAD_ROUTINE name + +#define CREATE_THREAD(ph, threadproc,argument) \ + ThreadCreate(ph, threadproc, (void*)argument) + +#define CREATE_EVENT(ph, manualreset,initial_state,name) \ + EventCreate(ph,(int)(manualreset),(int)(initial_state)) + +#define CREATE_SEMAPHORE(ph, initial_count,max_count, name) \ + SemCreate(ph, (long)initial_count,(long)(max_count)) + +#define CLOSE_EVENT(ph) \ + EventDestroy(ph) + +#define CLOSE_SEMAPHORE(ph) \ + SemDestroy(ph) + +#define SET_EVENT(ph) \ + EventPost(ph) + +#define RESET_EVENT(ph) \ + EventReset(ph) + +#define RELEASE_SEMAPHORE(ph) \ + SemRelease(ph,NULL) + +#define WAIT_EVENT(ph,timeout) \ + EventWait(ph, (int32_t)timeout) + +#define WAIT_THREAD(ph) \ + ThreadWait(ph) + +#define WAIT_SEMAPHORE(ph,timeout) \ + SemWait(ph,(int32_t)timeout) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/chromium/third_party/openh264/src/codec/decoder/core/src/au_parser.cpp b/chromium/third_party/openh264/src/codec/decoder/core/src/au_parser.cpp index 9f7091e4781..91f89b4374b 100644 --- a/chromium/third_party/openh264/src/codec/decoder/core/src/au_parser.cpp +++ b/chromium/third_party/openh264/src/codec/decoder/core/src/au_parser.cpp @@ -148,48 +148,50 @@ uint8_t* ParseNalHeader (PWelsDecoderContext pCtx, SNalUnitHeader* pNalUnitHeade ++ (*pConsumedBytes); if (! (IS_SEI_NAL (pNalUnitHeader->eNalUnitType) || IS_SPS_NAL (pNalUnitHeader->eNalUnitType) - || IS_AU_DELIMITER_NAL (pNalUnitHeader->eNalUnitType) || pCtx->bSpsExistAheadFlag)) { - if (pCtx->bPrintFrameErrorTraceFlag && pCtx->iSpsErrorIgnored == 0) { + || IS_AU_DELIMITER_NAL (pNalUnitHeader->eNalUnitType) || pCtx->sSpsPpsCtx.bSpsExistAheadFlag)) { + if (pCtx->bPrintFrameErrorTraceFlag && pCtx->sSpsPpsCtx.iSpsErrorIgnored == 0) { WelsLog (pLogCtx, WELS_LOG_WARNING, "parse_nal(), no exist Sequence Parameter Sets ahead of sequence when try to decode NAL(type:%d).", pNalUnitHeader->eNalUnitType); } else { - pCtx->iSpsErrorIgnored++; + pCtx->sSpsPpsCtx.iSpsErrorIgnored++; } - pCtx->sDecoderStatistics.iSpsNoExistNalNum++; + pCtx->pDecoderStatistics->iSpsNoExistNalNum++; pCtx->iErrorCode = dsNoParamSets; return NULL; } - pCtx->iSpsErrorIgnored = 0; + pCtx->sSpsPpsCtx.iSpsErrorIgnored = 0; if (! (IS_SEI_NAL (pNalUnitHeader->eNalUnitType) || IS_PARAM_SETS_NALS (pNalUnitHeader->eNalUnitType) - || IS_AU_DELIMITER_NAL (pNalUnitHeader->eNalUnitType) || pCtx->bPpsExistAheadFlag)) { - if (pCtx->bPrintFrameErrorTraceFlag && pCtx->iPpsErrorIgnored == 0) { + || IS_AU_DELIMITER_NAL (pNalUnitHeader->eNalUnitType) || pCtx->sSpsPpsCtx.bPpsExistAheadFlag)) { + if (pCtx->bPrintFrameErrorTraceFlag && pCtx->sSpsPpsCtx.iPpsErrorIgnored == 0) { WelsLog (pLogCtx, WELS_LOG_WARNING, "parse_nal(), no exist Picture Parameter Sets ahead of sequence when try to decode NAL(type:%d).", pNalUnitHeader->eNalUnitType); } else { - pCtx->iPpsErrorIgnored++; + pCtx->sSpsPpsCtx.iPpsErrorIgnored++; } - pCtx->sDecoderStatistics.iPpsNoExistNalNum++; + pCtx->pDecoderStatistics->iPpsNoExistNalNum++; pCtx->iErrorCode = dsNoParamSets; return NULL; } - pCtx->iPpsErrorIgnored = 0; - if ((IS_VCL_NAL_AVC_BASE (pNalUnitHeader->eNalUnitType) && ! (pCtx->bSpsExistAheadFlag || pCtx->bPpsExistAheadFlag)) || - (IS_NEW_INTRODUCED_SVC_NAL (pNalUnitHeader->eNalUnitType) && ! (pCtx->bSpsExistAheadFlag || pCtx->bSubspsExistAheadFlag - || pCtx->bPpsExistAheadFlag))) { - if (pCtx->bPrintFrameErrorTraceFlag && pCtx->iSubSpsErrorIgnored == 0) { + pCtx->sSpsPpsCtx.iPpsErrorIgnored = 0; + if ((IS_VCL_NAL_AVC_BASE (pNalUnitHeader->eNalUnitType) && ! (pCtx->sSpsPpsCtx.bSpsExistAheadFlag + || pCtx->sSpsPpsCtx.bPpsExistAheadFlag)) || + (IS_NEW_INTRODUCED_SVC_NAL (pNalUnitHeader->eNalUnitType) && ! (pCtx->sSpsPpsCtx.bSpsExistAheadFlag + || pCtx->sSpsPpsCtx.bSubspsExistAheadFlag + || pCtx->sSpsPpsCtx.bPpsExistAheadFlag))) { + if (pCtx->bPrintFrameErrorTraceFlag && pCtx->sSpsPpsCtx.iSubSpsErrorIgnored == 0) { WelsLog (pLogCtx, WELS_LOG_WARNING, "ParseNalHeader(), no exist Parameter Sets ahead of sequence when try to decode slice(type:%d).", pNalUnitHeader->eNalUnitType); } else { - pCtx->iSubSpsErrorIgnored++; + pCtx->sSpsPpsCtx.iSubSpsErrorIgnored++; } - pCtx->sDecoderStatistics.iSubSpsNoExistNalNum++; + pCtx->pDecoderStatistics->iSubSpsNoExistNalNum++; pCtx->iErrorCode |= dsNoParamSets; return NULL; } - pCtx->iSubSpsErrorIgnored = 0; + pCtx->sSpsPpsCtx.iSubSpsErrorIgnored = 0; switch (pNalUnitHeader->eNalUnitType) { case NAL_UNIT_AU_DELIMITER: @@ -201,7 +203,7 @@ uint8_t* ParseNalHeader (PWelsDecoderContext pCtx, SNalUnitHeader* pNalUnitHeade break; case NAL_UNIT_PREFIX: - pCurNal = &pCtx->sPrefixNal; + pCurNal = &pCtx->sSpsPpsCtx.sPrefixNal; pCurNal->uiTimeStamp = pCtx->uiTimeStamp; if (iNalSize < NAL_UNIT_HEADER_EXT_SIZE) { @@ -365,9 +367,9 @@ uint8_t* ParseNalHeader (PWelsDecoderContext pCtx, SNalUnitHeader* pNalUnitHeade memcpy (pSavedData->pCurPos + iStartDeltaByte, pSrcNal, iActualLen); pSavedData->pCurPos += iStartDeltaByte + iActualLen; } - if (NAL_UNIT_PREFIX == pCtx->sPrefixNal.sNalHeaderExt.sNalUnitHeader.eNalUnitType) { - if (pCtx->sPrefixNal.sNalData.sPrefixNal.bPrefixNalCorrectFlag) { - PrefetchNalHeaderExtSyntax (pCtx, pCurNal, &pCtx->sPrefixNal); + if (NAL_UNIT_PREFIX == pCtx->sSpsPpsCtx.sPrefixNal.sNalHeaderExt.sNalUnitHeader.eNalUnitType) { + if (pCtx->sSpsPpsCtx.sPrefixNal.sNalData.sPrefixNal.bPrefixNalCorrectFlag) { + PrefetchNalHeaderExtSyntax (pCtx, pCurNal, &pCtx->sSpsPpsCtx.sPrefixNal); } } @@ -496,8 +498,8 @@ bool CheckAccessUnitBoundary (PWelsDecoderContext pCtx, const PNalUnit kpCurNal, const PNalUnitHeaderExt kpCurNalHeaderExt = &kpCurNal->sNalHeaderExt; const SSliceHeader* kpLastSliceHeader = &kpLastNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader; const SSliceHeader* kpCurSliceHeader = &kpCurNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader; - if (pCtx->pActiveLayerSps[kpCurNalHeaderExt->uiDependencyId] != NULL - && pCtx->pActiveLayerSps[kpCurNalHeaderExt->uiDependencyId] != kpSps) { + if (pCtx->sSpsPpsCtx.pActiveLayerSps[kpCurNalHeaderExt->uiDependencyId] != NULL + && pCtx->sSpsPpsCtx.pActiveLayerSps[kpCurNalHeaderExt->uiDependencyId] != kpSps) { return true; // the active sps changed, new sequence begins, so the current au is ready } @@ -548,8 +550,8 @@ bool CheckAccessUnitBoundary (PWelsDecoderContext pCtx, const PNalUnit kpCurNal, bool CheckNextAuNewSeq (PWelsDecoderContext pCtx, const PNalUnit kpCurNal, const PSps kpSps) { const PNalUnitHeaderExt kpCurNalHeaderExt = &kpCurNal->sNalHeaderExt; - if (pCtx->pActiveLayerSps[kpCurNalHeaderExt->uiDependencyId] != NULL - && pCtx->pActiveLayerSps[kpCurNalHeaderExt->uiDependencyId] != kpSps) + if (pCtx->sSpsPpsCtx.pActiveLayerSps[kpCurNalHeaderExt->uiDependencyId] != NULL + && pCtx->sSpsPpsCtx.pActiveLayerSps[kpCurNalHeaderExt->uiDependencyId] != kpSps) return true; if (kpCurNalHeaderExt->bIdrFlag) return true; @@ -606,7 +608,7 @@ int32_t ParseNonVclNal (PWelsDecoderContext pCtx, uint8_t* pRbsp, const int32_t pCtx->iErrorCode |= dsBitstreamError; return iErr; } - + pCtx->bHasNewSps = true; break; case NAL_UNIT_PPS: @@ -620,17 +622,18 @@ int32_t ParseNonVclNal (PWelsDecoderContext pCtx, uint8_t* pRbsp, const int32_t return iErr; } } - iErr = ParsePps (pCtx, &pCtx->sPpsBuffer[0], pBs, pSrcNal, kSrcNalLen); + iErr = ParsePps (pCtx, &pCtx->sSpsPpsCtx.sPpsBuffer[0], pBs, pSrcNal, kSrcNalLen); if (ERR_NONE != iErr) { // modified for pps invalid, 12/1/2009 if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) pCtx->iErrorCode |= dsNoParamSets; else pCtx->iErrorCode |= dsBitstreamError; + pCtx->bHasNewSps = false; return iErr; } - pCtx->bPpsExistAheadFlag = true; - + pCtx->sSpsPpsCtx.bPpsExistAheadFlag = true; + ++ (pCtx->sSpsPpsCtx.iSeqId); break; case NAL_UNIT_SEI: @@ -683,7 +686,7 @@ int32_t ParseRefBasePicMarking (PBitStringAux pBs, PRefBasePicMarking pRefBasePi } int32_t ParsePrefixNalUnit (PWelsDecoderContext pCtx, PBitStringAux pBs) { - PNalUnit pCurNal = &pCtx->sPrefixNal; + PNalUnit pCurNal = &pCtx->sSpsPpsCtx.sPrefixNal; uint32_t uiCode; if (pCurNal->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc != 0) { @@ -834,12 +837,12 @@ const SLevelLimits* GetLevelLimits (int32_t iLevelIdx, bool bConstraint3) { bool CheckSpsActive (PWelsDecoderContext pCtx, PSps pSps, bool bUseSubsetFlag) { for (int i = 0; i < MAX_LAYER_NUM; i++) { - if (pCtx->pActiveLayerSps[i] == pSps) + if (pCtx->sSpsPpsCtx.pActiveLayerSps[i] == pSps) return true; } // Pre-active, will be used soon if (bUseSubsetFlag) { - if (pSps->iMbWidth > 0 && pSps->iMbHeight > 0 && pCtx->bSubspsAvailFlags[pSps->iSpsId]) { + if (pSps->iMbWidth > 0 && pSps->iMbHeight > 0 && pCtx->sSpsPpsCtx.bSubspsAvailFlags[pSps->iSpsId]) { if (pCtx->iTotalNumMbRec > 0) { return true; } @@ -857,7 +860,7 @@ bool CheckSpsActive (PWelsDecoderContext pCtx, PSps pSps, bool bUseSubsetFlag) { } } } else { - if (pSps->iMbWidth > 0 && pSps->iMbHeight > 0 && pCtx->bSpsAvailFlags[pSps->iSpsId]) { + if (pSps->iMbWidth > 0 && pSps->iMbHeight > 0 && pCtx->sSpsPpsCtx.bSpsAvailFlags[pSps->iSpsId]) { if (pCtx->iTotalNumMbRec > 0) { return true; } @@ -1251,57 +1254,57 @@ int32_t ParseSps (PWelsDecoderContext pCtx, PBitStringAux pBsAux, int32_t* pPicW if (PRO_SCALABLE_BASELINE == uiProfileIdc || PRO_SCALABLE_HIGH == uiProfileIdc) - pCtx->bAvcBasedFlag = false; + pCtx->sSpsPpsCtx.bAvcBasedFlag = false; *pPicWidth = pSps->iMbWidth << 4; *pPicHeight = pSps->iMbHeight << 4; PSps pTmpSps = NULL; if (kbUseSubsetFlag) { - pTmpSps = &pCtx->sSubsetSpsBuffer[iSpsId].sSps; + pTmpSps = &pCtx->sSpsPpsCtx.sSubsetSpsBuffer[iSpsId].sSps; } else { - pTmpSps = &pCtx->sSpsBuffer[iSpsId]; + pTmpSps = &pCtx->sSpsPpsCtx.sSpsBuffer[iSpsId]; } if (CheckSpsActive (pCtx, pTmpSps, kbUseSubsetFlag)) { // we are overwriting the active sps, copy a temp buffer if (kbUseSubsetFlag) { - if (memcmp (&pCtx->sSubsetSpsBuffer[iSpsId], pSubsetSps, sizeof (SSubsetSps)) != 0) { + if (memcmp (&pCtx->sSpsPpsCtx.sSubsetSpsBuffer[iSpsId], pSubsetSps, sizeof (SSubsetSps)) != 0) { if (pCtx->pAccessUnitList->uiAvailUnitsNum > 0) { - memcpy (&pCtx->sSubsetSpsBuffer[MAX_SPS_COUNT], pSubsetSps, sizeof (SSubsetSps)); + memcpy (&pCtx->sSpsPpsCtx.sSubsetSpsBuffer[MAX_SPS_COUNT], pSubsetSps, sizeof (SSubsetSps)); pCtx->bAuReadyFlag = true; pCtx->pAccessUnitList->uiEndPos = pCtx->pAccessUnitList->uiAvailUnitsNum - 1; - pCtx->iOverwriteFlags |= OVERWRITE_SUBSETSPS; + pCtx->sSpsPpsCtx.iOverwriteFlags |= OVERWRITE_SUBSETSPS; } else if ((pCtx->pSps != NULL) && (pCtx->pSps->iSpsId == pSubsetSps->sSps.iSpsId)) { - memcpy (&pCtx->sSubsetSpsBuffer[MAX_SPS_COUNT], pSubsetSps, sizeof (SSubsetSps)); - pCtx->iOverwriteFlags |= OVERWRITE_SUBSETSPS; + memcpy (&pCtx->sSpsPpsCtx.sSubsetSpsBuffer[MAX_SPS_COUNT], pSubsetSps, sizeof (SSubsetSps)); + pCtx->sSpsPpsCtx.iOverwriteFlags |= OVERWRITE_SUBSETSPS; } else { - memcpy (&pCtx->sSubsetSpsBuffer[iSpsId], pSubsetSps, sizeof (SSubsetSps)); + memcpy (&pCtx->sSpsPpsCtx.sSubsetSpsBuffer[iSpsId], pSubsetSps, sizeof (SSubsetSps)); } } } else { - if (memcmp (&pCtx->sSpsBuffer[iSpsId], pSps, sizeof (SSps)) != 0) { + if (memcmp (&pCtx->sSpsPpsCtx.sSpsBuffer[iSpsId], pSps, sizeof (SSps)) != 0) { if (pCtx->pAccessUnitList->uiAvailUnitsNum > 0) { - memcpy (&pCtx->sSpsBuffer[MAX_SPS_COUNT], pSps, sizeof (SSps)); - pCtx->iOverwriteFlags |= OVERWRITE_SPS; + memcpy (&pCtx->sSpsPpsCtx.sSpsBuffer[MAX_SPS_COUNT], pSps, sizeof (SSps)); + pCtx->sSpsPpsCtx.iOverwriteFlags |= OVERWRITE_SPS; pCtx->bAuReadyFlag = true; pCtx->pAccessUnitList->uiEndPos = pCtx->pAccessUnitList->uiAvailUnitsNum - 1; } else if ((pCtx->pSps != NULL) && (pCtx->pSps->iSpsId == pSps->iSpsId)) { - memcpy (&pCtx->sSpsBuffer[MAX_SPS_COUNT], pSps, sizeof (SSps)); - pCtx->iOverwriteFlags |= OVERWRITE_SPS; + memcpy (&pCtx->sSpsPpsCtx.sSpsBuffer[MAX_SPS_COUNT], pSps, sizeof (SSps)); + pCtx->sSpsPpsCtx.iOverwriteFlags |= OVERWRITE_SPS; } else { - memcpy (&pCtx->sSpsBuffer[iSpsId], pSps, sizeof (SSps)); + memcpy (&pCtx->sSpsPpsCtx.sSpsBuffer[iSpsId], pSps, sizeof (SSps)); } } } } // Not overwrite active sps, just copy to final place else if (kbUseSubsetFlag) { - memcpy (&pCtx->sSubsetSpsBuffer[iSpsId], pSubsetSps, sizeof (SSubsetSps)); - pCtx->bSubspsAvailFlags[iSpsId] = true; - pCtx->bSubspsExistAheadFlag = true; + memcpy (&pCtx->sSpsPpsCtx.sSubsetSpsBuffer[iSpsId], pSubsetSps, sizeof (SSubsetSps)); + pCtx->sSpsPpsCtx.bSubspsAvailFlags[iSpsId] = true; + pCtx->sSpsPpsCtx.bSubspsExistAheadFlag = true; } else { - memcpy (&pCtx->sSpsBuffer[iSpsId], pSps, sizeof (SSps)); - pCtx->bSpsAvailFlags[iSpsId] = true; - pCtx->bSpsExistAheadFlag = true; + memcpy (&pCtx->sSpsPpsCtx.sSpsBuffer[iSpsId], pSps, sizeof (SSps)); + pCtx->sSpsPpsCtx.bSpsAvailFlags[iSpsId] = true; + pCtx->sSpsPpsCtx.bSpsExistAheadFlag = true; } return ERR_NONE; } @@ -1421,8 +1424,8 @@ int32_t ParsePps (PWelsDecoderContext pCtx, PPps pPpsList, PBitStringAux pBsAux, WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //pic_scaling_matrix_present_flag pPps->bPicScalingMatrixPresentFlag = !!uiCode; if (pPps->bPicScalingMatrixPresentFlag) { - if (pCtx->bSpsAvailFlags[pPps->iSpsId]) { - WELS_READ_VERIFY (ParseScalingList (&pCtx->sSpsBuffer[pPps->iSpsId], pBsAux, 1, pPps->bTransform8x8ModeFlag, + if (pCtx->sSpsPpsCtx.bSpsAvailFlags[pPps->iSpsId]) { + WELS_READ_VERIFY (ParseScalingList (&pCtx->sSpsPpsCtx.sSpsBuffer[pPps->iSpsId], pBsAux, 1, pPps->bTransform8x8ModeFlag, pPps->bPicScalingListPresentFlag, pPps->iScalingList4x4, pPps->iScalingList8x8)); } else { WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, @@ -1440,16 +1443,16 @@ int32_t ParsePps (PWelsDecoderContext pCtx, PPps pPpsList, PBitStringAux pBsAux, if (pCtx->pPps != NULL && pCtx->pPps->iPpsId == pPps->iPpsId) { if (memcmp (pCtx->pPps, pPps, sizeof (*pPps)) != 0) { - memcpy (&pCtx->sPpsBuffer[MAX_PPS_COUNT], pPps, sizeof (SPps)); - pCtx->iOverwriteFlags |= OVERWRITE_PPS; + memcpy (&pCtx->sSpsPpsCtx.sPpsBuffer[MAX_PPS_COUNT], pPps, sizeof (SPps)); + pCtx->sSpsPpsCtx.iOverwriteFlags |= OVERWRITE_PPS; if (pCtx->pAccessUnitList->uiAvailUnitsNum > 0) { pCtx->bAuReadyFlag = true; pCtx->pAccessUnitList->uiEndPos = pCtx->pAccessUnitList->uiAvailUnitsNum - 1; } } } else { - memcpy (&pCtx->sPpsBuffer[uiPpsId], pPps, sizeof (SPps)); - pCtx->bPpsAvailFlags[uiPpsId] = true; + memcpy (&pCtx->sSpsPpsCtx.sPpsBuffer[uiPpsId], pPps, sizeof (SPps)); + pCtx->sSpsPpsCtx.bPpsAvailFlags[uiPpsId] = true; } if (pCtx->pParam->bParseOnly) { if (kSrcNalLen >= SPS_PPS_BS_SIZE - 4) { //pps bs exceeds diff --git a/chromium/third_party/openh264/src/codec/decoder/core/src/deblocking.cpp b/chromium/third_party/openh264/src/codec/decoder/core/src/deblocking.cpp index 40ad5efade9..1817ab5b44b 100644 --- a/chromium/third_party/openh264/src/codec/decoder/core/src/deblocking.cpp +++ b/chromium/third_party/openh264/src/codec/decoder/core/src/deblocking.cpp @@ -55,17 +55,17 @@ namespace WelsDec { #define g_kiBetaTable(x) g_kiBetaTable[(x)+12] #define g_kiTc0Table(x) g_kiTc0Table[(x)+12] -#define MB_BS_MV(iRefIndex, iMotionVector, iMbXy, iMbBn, iIndex, iNeighIndex) \ +#define MB_BS_MV(pRefPic0, pRefPic1, iMotionVector, iMbXy, iMbBn, iIndex, iNeighIndex) \ (\ - ( iRefIndex[iMbXy][iIndex] - iRefIndex[iMbBn][iNeighIndex] )||\ + ( pRefPic0 != pRefPic1) ||\ ( WELS_ABS( iMotionVector[iMbXy][iIndex][0] - iMotionVector[iMbBn][iNeighIndex][0] ) >= 4 ) ||\ ( WELS_ABS( iMotionVector[iMbXy][iIndex][1] - iMotionVector[iMbBn][iNeighIndex][1] ) >= 4 )\ ) #if defined(SAME_MB_DIFF_REFIDX) -#define SMB_EDGE_MV(iRefIndex, iMotionVector, iIndex, iNeighIndex) \ +#define SMB_EDGE_MV(pRefPics, iMotionVector, iIndex, iNeighIndex) \ (\ - ( iRefIndex[iIndex] - iRefIndex[iNeighIndex] )||(\ + ( pRefPics[iIndex] != pRefPics[iNeighIndex] )||(\ ( WELS_ABS( iMotionVector[iIndex][0] - iMotionVector[iNeighIndex][0] ) &(~3) ) |\ ( WELS_ABS( iMotionVector[iIndex][1] - iMotionVector[iNeighIndex][1] ) &(~3) ))\ ) @@ -76,8 +76,8 @@ namespace WelsDec { ) #endif -#define BS_EDGE(bsx1, iRefIndex, iMotionVector, iIndex, iNeighIndex) \ -( (bsx1|SMB_EDGE_MV(iRefIndex, iMotionVector, iIndex, iNeighIndex))<<((uint8_t)(!!bsx1))) +#define BS_EDGE(bsx1, pRefPics, iMotionVector, iIndex, iNeighIndex) \ +( (bsx1|SMB_EDGE_MV(pRefPics, iMotionVector, iIndex, iNeighIndex))<<((uint8_t)(!!bsx1))) #define GET_ALPHA_BETA_FROM_QP(iQp, iAlphaOffset, iBetaOffset, iIndex, iAlpha, iBeta) \ {\ @@ -148,13 +148,13 @@ static const uint8_t g_kuiTableB8x8Idx[2][16] = { 8, 9, 12, 13, 10, 11, 14, 15 }, }; - +//fix Bugzilla 1486223 #define TC0_TBL_LOOKUP(tc, iIndexA, pBS, bChroma) \ {\ - tc[0] = g_kiTc0Table(iIndexA)[pBS[0]] + bChroma;\ - tc[1] = g_kiTc0Table(iIndexA)[pBS[1]] + bChroma;\ - tc[2] = g_kiTc0Table(iIndexA)[pBS[2]] + bChroma;\ - tc[3] = g_kiTc0Table(iIndexA)[pBS[3]] + bChroma;\ + tc[0] = g_kiTc0Table(iIndexA)[pBS[0] & 3] + bChroma;\ + tc[1] = g_kiTc0Table(iIndexA)[pBS[1] & 3] + bChroma;\ + tc[2] = g_kiTc0Table(iIndexA)[pBS[2] & 3] + bChroma;\ + tc[3] = g_kiTc0Table(iIndexA)[pBS[3] & 3] + bChroma;\ } void inline DeblockingBSInsideMBAvsbase (int8_t* pNnzTab, uint8_t nBS[2][4][4], int32_t iLShiftFactor) { @@ -201,14 +201,25 @@ void inline DeblockingBSInsideMBAvsbase8x8 (int8_t* pNnzTab, uint8_t nBS[2][4][4 nBS[1][2][2] = nBS[1][2][3] = (i8x8NnzTab[1] | i8x8NnzTab[3]) << iLShiftFactor; } -void static inline DeblockingBSInsideMBNormal (PDqLayer pCurDqLayer, uint8_t nBS[2][4][4], int8_t* pNnzTab, +void static inline DeblockingBSInsideMBNormal (PDeblockingFilter pFilter, PDqLayer pCurDqLayer, uint8_t nBS[2][4][4], + int8_t* pNnzTab, int32_t iMbXy) { uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3; - int8_t* iRefIndex = pCurDqLayer->pRefIndex[LIST_0][iMbXy]; + int8_t* iRefIdx = pCurDqLayer->pDec->pRefIndex[LIST_0][iMbXy]; + void* iRefs[MB_BLOCK4x4_NUM]; + int i; ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4); int8_t i8x8NnzTab[4]; + /* Look up each reference picture based on indices */ + for (i = 0; i < MB_BLOCK4x4_NUM; i++) { + if (iRefIdx[i] > REF_NOT_IN_LIST) + iRefs[i] = pFilter->pRefPics[LIST_0][iRefIdx[i]]; + else + iRefs[i] = NULL; + } + if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { for (int32_t i = 0; i < 4; i++) { int32_t iBlkIdx = i << 2; @@ -216,15 +227,15 @@ void static inline DeblockingBSInsideMBNormal (PDqLayer pCurDqLayer, uint8_t nBS pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 2]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 3]]); } //vertical - nBS[0][2][0] = nBS[0][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[1]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], + nBS[0][2][0] = nBS[0][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[1]), iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], g_kuiMbCountScan4Idx[1 << 2], g_kuiMbCountScan4Idx[0]); - nBS[0][2][2] = nBS[0][2][3] = BS_EDGE ((i8x8NnzTab[2] | i8x8NnzTab[3]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], + nBS[0][2][2] = nBS[0][2][3] = BS_EDGE ((i8x8NnzTab[2] | i8x8NnzTab[3]), iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], g_kuiMbCountScan4Idx[3 << 2], g_kuiMbCountScan4Idx[2 << 2]); //horizontal - nBS[1][2][0] = nBS[1][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[2]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], + nBS[1][2][0] = nBS[1][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[2]), iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], g_kuiMbCountScan4Idx[2 << 2], g_kuiMbCountScan4Idx[0]); - nBS[1][2][2] = nBS[1][2][3] = BS_EDGE ((i8x8NnzTab[1] | i8x8NnzTab[3]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], + nBS[1][2][2] = nBS[1][2][3] = BS_EDGE ((i8x8NnzTab[1] | i8x8NnzTab[3]), iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], g_kuiMbCountScan4Idx[3 << 2], g_kuiMbCountScan4Idx[1 << 2]); } else { uiNnz32b0 = * (uint32_t*) (pNnzTab + 0); @@ -234,58 +245,70 @@ void static inline DeblockingBSInsideMBNormal (PDqLayer pCurDqLayer, uint8_t nBS for (int i = 0; i < 3; i++) uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1]; - nBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 1, 0); - nBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 2, 1); - nBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 3, 2); + nBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 1, 0); + nBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 2, 1); + nBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 3, 2); for (int i = 0; i < 3; i++) uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1]; - nBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 5, 4); - nBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 6, 5); - nBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 7, 6); + nBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 5, 4); + nBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 6, 5); + nBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 7, 6); for (int i = 0; i < 3; i++) uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1]; - nBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 9, 8); - nBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 10, 9); - nBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 11, 10); + nBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 9, 8); + nBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 10, 9); + nBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 11, 10); for (int i = 0; i < 3; i++) uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1]; - nBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 13, 12); - nBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 14, 13); - nBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 15, 14); + nBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 13, 12); + nBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 14, 13); + nBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 15, 14); // horizontal * (uint32_t*)uiBsx4 = (uiNnz32b0 | uiNnz32b1); - nBS[1][1][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 4, 0); - nBS[1][1][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 5, 1); - nBS[1][1][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 6, 2); - nBS[1][1][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 7, 3); + nBS[1][1][0] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 4, 0); + nBS[1][1][1] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 5, 1); + nBS[1][1][2] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 6, 2); + nBS[1][1][3] = BS_EDGE (uiBsx4[3], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 7, 3); * (uint32_t*)uiBsx4 = (uiNnz32b1 | uiNnz32b2); - nBS[1][2][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 8, 4); - nBS[1][2][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 9, 5); - nBS[1][2][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 10, 6); - nBS[1][2][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 11, 7); + nBS[1][2][0] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 8, 4); + nBS[1][2][1] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 9, 5); + nBS[1][2][2] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 10, 6); + nBS[1][2][3] = BS_EDGE (uiBsx4[3], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 11, 7); * (uint32_t*)uiBsx4 = (uiNnz32b2 | uiNnz32b3); - nBS[1][3][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 12, 8); - nBS[1][3][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 13, 9); - nBS[1][3][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 14, 10); - nBS[1][3][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 15, 11); + nBS[1][3][0] = BS_EDGE (uiBsx4[0], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 12, 8); + nBS[1][3][1] = BS_EDGE (uiBsx4[1], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 13, 9); + nBS[1][3][2] = BS_EDGE (uiBsx4[2], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 14, 10); + nBS[1][3][3] = BS_EDGE (uiBsx4[3], iRefs, pCurDqLayer->pDec->pMv[LIST_0][iMbXy], 15, 11); } } -void static inline DeblockingBSliceBSInsideMBNormal (PDqLayer pCurDqLayer, uint8_t nBS[2][4][4], int8_t* pNnzTab, +void static inline DeblockingBSliceBSInsideMBNormal (PDeblockingFilter pFilter, PDqLayer pCurDqLayer, + uint8_t nBS[2][4][4], int8_t* pNnzTab, int32_t iMbXy) { uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3; - int8_t* iRefIndex[LIST_A]; - iRefIndex[LIST_0] = pCurDqLayer->pRefIndex[LIST_0][iMbXy]; - iRefIndex[LIST_1] = pCurDqLayer->pRefIndex[LIST_1][iMbXy]; - ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4); + void* iRefs[LIST_A][MB_BLOCK4x4_NUM]; + ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4); int8_t i8x8NnzTab[4]; + int l; + + for (l = 0; l < LIST_A; l++) { + int8_t* iRefIdx = pCurDqLayer->pDec->pRefIndex[l][iMbXy]; + int i; + /* Look up each reference picture based on indices */ + for (i = 0; i < MB_BLOCK4x4_NUM; i++) { + if (iRefIdx[i] > REF_NOT_IN_LIST) + iRefs[l][i] = pFilter->pRefPics[l][iRefIdx[i]]; + else + iRefs[l][i] = NULL; + } + } if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { for (int32_t i = 0; i < 4; i++) { @@ -298,9 +321,9 @@ void static inline DeblockingBSliceBSInsideMBNormal (PDqLayer pCurDqLayer, uint8 int8_t iNeigborIndex = g_kuiMbCountScan4Idx[0]; nBS[0][2][0] = nBS[0][2][1] = 1; for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - if (iRefIndex[listIdx][iIndex] > REF_NOT_IN_LIST && iRefIndex[listIdx][iNeigborIndex] > REF_NOT_IN_LIST) { - nBS[0][2][0] = nBS[0][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[1]), iRefIndex[listIdx], - pCurDqLayer->pMv[listIdx][iMbXy], + if (iRefs[listIdx][iIndex] && iRefs[listIdx][iNeigborIndex]) { + nBS[0][2][0] = nBS[0][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[1]), iRefs[listIdx], + pCurDqLayer->pDec->pMv[listIdx][iMbXy], iIndex, iNeigborIndex); break; } @@ -309,9 +332,9 @@ void static inline DeblockingBSliceBSInsideMBNormal (PDqLayer pCurDqLayer, uint8 iNeigborIndex = g_kuiMbCountScan4Idx[2 << 2]; nBS[0][2][2] = nBS[0][2][3] = 1; for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - if (iRefIndex[listIdx][iIndex] > REF_NOT_IN_LIST && iRefIndex[listIdx][iNeigborIndex] > REF_NOT_IN_LIST) { - nBS[0][2][2] = nBS[0][2][3] = BS_EDGE ((i8x8NnzTab[2] | i8x8NnzTab[3]), iRefIndex[listIdx], - pCurDqLayer->pMv[listIdx][iMbXy], + if (iRefs[listIdx][iIndex] && iRefs[listIdx][iNeigborIndex]) { + nBS[0][2][2] = nBS[0][2][3] = BS_EDGE ((i8x8NnzTab[2] | i8x8NnzTab[3]), iRefs[listIdx], + pCurDqLayer->pDec->pMv[listIdx][iMbXy], iIndex, iNeigborIndex); break; } @@ -322,9 +345,9 @@ void static inline DeblockingBSliceBSInsideMBNormal (PDqLayer pCurDqLayer, uint8 iNeigborIndex = g_kuiMbCountScan4Idx[0]; nBS[1][2][0] = nBS[1][2][1] = 1; for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - if (iRefIndex[listIdx][iIndex] > REF_NOT_IN_LIST && iRefIndex[listIdx][iNeigborIndex] > REF_NOT_IN_LIST) { - nBS[1][2][0] = nBS[1][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[2]), iRefIndex[listIdx], - pCurDqLayer->pMv[listIdx][iMbXy], + if (iRefs[listIdx][iIndex] && iRefs[listIdx][iNeigborIndex]) { + nBS[1][2][0] = nBS[1][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[2]), iRefs[listIdx], + pCurDqLayer->pDec->pMv[listIdx][iMbXy], iIndex, iNeigborIndex); break; } @@ -334,9 +357,9 @@ void static inline DeblockingBSliceBSInsideMBNormal (PDqLayer pCurDqLayer, uint8 iNeigborIndex = g_kuiMbCountScan4Idx[1 << 2]; nBS[1][2][2] = nBS[1][2][3] = 1; for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - if (iRefIndex[listIdx][iIndex] > REF_NOT_IN_LIST && iRefIndex[listIdx][iNeigborIndex] > REF_NOT_IN_LIST) { - nBS[1][2][2] = nBS[1][2][3] = BS_EDGE ((i8x8NnzTab[1] | i8x8NnzTab[3]), iRefIndex[listIdx], - pCurDqLayer->pMv[listIdx][iMbXy], + if (iRefs[listIdx][iIndex] && iRefs[listIdx][iNeigborIndex]) { + nBS[1][2][2] = nBS[1][2][3] = BS_EDGE ((i8x8NnzTab[1] | i8x8NnzTab[3]), iRefs[listIdx], + pCurDqLayer->pDec->pMv[listIdx][iMbXy], iIndex, iNeigborIndex); break; } @@ -351,22 +374,22 @@ void static inline DeblockingBSliceBSInsideMBNormal (PDqLayer pCurDqLayer, uint8 uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1]; nBS[0][1][0] = 1; for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - if (iRefIndex[listIdx][1] > REF_NOT_IN_LIST && iRefIndex[listIdx][0] > REF_NOT_IN_LIST) { - nBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 1, 0); + if (iRefs[listIdx][1] && iRefs[listIdx][0]) { + nBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 1, 0); break; } } nBS[0][2][0] = 1; for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - if (iRefIndex[listIdx][2] > REF_NOT_IN_LIST && iRefIndex[listIdx][1] > REF_NOT_IN_LIST) { - nBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 2, 1); + if (iRefs[listIdx][2] && iRefs[listIdx][1]) { + nBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 2, 1); break; } } nBS[0][3][0] = 1; for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - if (iRefIndex[listIdx][3] > REF_NOT_IN_LIST && iRefIndex[listIdx][2] > REF_NOT_IN_LIST) { - nBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 3, 2); + if (iRefs[listIdx][3] && iRefs[listIdx][2]) { + nBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 3, 2); break; } } @@ -375,22 +398,22 @@ void static inline DeblockingBSliceBSInsideMBNormal (PDqLayer pCurDqLayer, uint8 uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1]; nBS[0][1][1] = 1; for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - if (iRefIndex[listIdx][5] > REF_NOT_IN_LIST && iRefIndex[listIdx][4] > REF_NOT_IN_LIST) { - nBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 5, 4); + if (iRefs[listIdx][5] && iRefs[listIdx][4]) { + nBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 5, 4); break; } } nBS[0][2][1] = 1; for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - if (iRefIndex[listIdx][6] > REF_NOT_IN_LIST && iRefIndex[listIdx][5] > REF_NOT_IN_LIST) { - nBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 6, 5); + if (iRefs[listIdx][6] && iRefs[listIdx][5]) { + nBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 6, 5); break; } } nBS[0][3][1] = 1; for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - if (iRefIndex[listIdx][7] > REF_NOT_IN_LIST && iRefIndex[listIdx][6] > REF_NOT_IN_LIST) { - nBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 7, 6); + if (iRefs[listIdx][7] && iRefs[listIdx][6]) { + nBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 7, 6); break; } } @@ -399,22 +422,22 @@ void static inline DeblockingBSliceBSInsideMBNormal (PDqLayer pCurDqLayer, uint8 uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1]; nBS[0][1][2] = 1; for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - if (iRefIndex[listIdx][9] > REF_NOT_IN_LIST && iRefIndex[listIdx][8] > REF_NOT_IN_LIST) { - nBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 9, 8); + if (iRefs[listIdx][9] && iRefs[listIdx][8]) { + nBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 9, 8); break; } } nBS[0][2][2] = 1; for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - if (iRefIndex[listIdx][10] > REF_NOT_IN_LIST && iRefIndex[listIdx][9] > REF_NOT_IN_LIST) { - nBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 10, 9); + if (iRefs[listIdx][10] && iRefs[listIdx][9]) { + nBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 10, 9); break; } } nBS[0][3][2] = 1; for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - if (iRefIndex[listIdx][11] > REF_NOT_IN_LIST && iRefIndex[listIdx][10] > REF_NOT_IN_LIST) { - nBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 11, 10); + if (iRefs[listIdx][11] && iRefs[listIdx][10]) { + nBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 11, 10); break; } } @@ -423,22 +446,22 @@ void static inline DeblockingBSliceBSInsideMBNormal (PDqLayer pCurDqLayer, uint8 uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1]; nBS[0][1][3] = 1; for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - if (iRefIndex[listIdx][13] > REF_NOT_IN_LIST && iRefIndex[listIdx][12] > REF_NOT_IN_LIST) { - nBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 13, 12); + if (iRefs[listIdx][13] && iRefs[listIdx][12]) { + nBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 13, 12); break; } } nBS[0][2][3] = 1; for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - if (iRefIndex[listIdx][14] > REF_NOT_IN_LIST && iRefIndex[listIdx][13] > REF_NOT_IN_LIST) { - nBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 14, 13); + if (iRefs[listIdx][14] && iRefs[listIdx][13]) { + nBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 14, 13); break; } } nBS[0][3][3] = 1; for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - if (iRefIndex[listIdx][15] > REF_NOT_IN_LIST && iRefIndex[listIdx][14] > REF_NOT_IN_LIST) { - nBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 15, 14); + if (iRefs[listIdx][15] && iRefs[listIdx][14]) { + nBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 15, 14); break; } } @@ -447,29 +470,29 @@ void static inline DeblockingBSliceBSInsideMBNormal (PDqLayer pCurDqLayer, uint8 * (uint32_t*)uiBsx4 = (uiNnz32b0 | uiNnz32b1); nBS[1][1][0] = 1; for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - if (iRefIndex[listIdx][4] > REF_NOT_IN_LIST && iRefIndex[listIdx][0] > REF_NOT_IN_LIST) { - nBS[1][1][0] = BS_EDGE (uiBsx4[0], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 4, 0); + if (iRefs[listIdx][4] && iRefs[listIdx][0]) { + nBS[1][1][0] = BS_EDGE (uiBsx4[0], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 4, 0); break; } } nBS[1][1][1] = 1; for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - if (iRefIndex[listIdx][5] > REF_NOT_IN_LIST && iRefIndex[listIdx][1] > REF_NOT_IN_LIST) { - nBS[1][1][1] = BS_EDGE (uiBsx4[1], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 5, 1); + if (iRefs[listIdx][5] && iRefs[listIdx][1]) { + nBS[1][1][1] = BS_EDGE (uiBsx4[1], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 5, 1); break; } } nBS[1][1][2] = 1; for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - if (iRefIndex[listIdx][6] > REF_NOT_IN_LIST && iRefIndex[listIdx][2] > REF_NOT_IN_LIST) { - nBS[1][1][2] = BS_EDGE (uiBsx4[2], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 6, 2); + if (iRefs[listIdx][6] && iRefs[listIdx][2]) { + nBS[1][1][2] = BS_EDGE (uiBsx4[2], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 6, 2); break; } } nBS[1][1][3] = 1; for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - if (iRefIndex[listIdx][7] > REF_NOT_IN_LIST && iRefIndex[listIdx][3] > REF_NOT_IN_LIST) { - nBS[1][1][3] = BS_EDGE (uiBsx4[3], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 7, 3); + if (iRefs[listIdx][7] && iRefs[listIdx][3]) { + nBS[1][1][3] = BS_EDGE (uiBsx4[3], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 7, 3); break; } } @@ -477,29 +500,29 @@ void static inline DeblockingBSliceBSInsideMBNormal (PDqLayer pCurDqLayer, uint8 * (uint32_t*)uiBsx4 = (uiNnz32b1 | uiNnz32b2); nBS[1][2][0] = 1; for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - if (iRefIndex[listIdx][8] > REF_NOT_IN_LIST && iRefIndex[listIdx][4] > REF_NOT_IN_LIST) { - nBS[1][2][0] = BS_EDGE (uiBsx4[0], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 8, 4); + if (iRefs[listIdx][8] && iRefs[listIdx][4]) { + nBS[1][2][0] = BS_EDGE (uiBsx4[0], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 8, 4); break; } } nBS[1][2][1] = 1; for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - if (iRefIndex[listIdx][9] > REF_NOT_IN_LIST && iRefIndex[listIdx][5] > REF_NOT_IN_LIST) { - nBS[1][2][1] = BS_EDGE (uiBsx4[1], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 9, 5); + if (iRefs[listIdx][9] && iRefs[listIdx][5]) { + nBS[1][2][1] = BS_EDGE (uiBsx4[1], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 9, 5); break; } } nBS[1][2][2] = 1; for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - if (iRefIndex[listIdx][10] > REF_NOT_IN_LIST && iRefIndex[listIdx][6] > REF_NOT_IN_LIST) { - nBS[1][2][2] = BS_EDGE (uiBsx4[2], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 10, 6); + if (iRefs[listIdx][10] && iRefs[listIdx][6]) { + nBS[1][2][2] = BS_EDGE (uiBsx4[2], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 10, 6); break; } } nBS[1][2][3] = 1; for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - if (iRefIndex[listIdx][11] > REF_NOT_IN_LIST && iRefIndex[listIdx][7] > REF_NOT_IN_LIST) { - nBS[1][2][3] = BS_EDGE (uiBsx4[3], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 11, 7); + if (iRefs[listIdx][11] && iRefs[listIdx][7]) { + nBS[1][2][3] = BS_EDGE (uiBsx4[3], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 11, 7); break; } } @@ -507,29 +530,29 @@ void static inline DeblockingBSliceBSInsideMBNormal (PDqLayer pCurDqLayer, uint8 * (uint32_t*)uiBsx4 = (uiNnz32b2 | uiNnz32b3); nBS[1][3][0] = 1; for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - if (iRefIndex[listIdx][12] > REF_NOT_IN_LIST && iRefIndex[listIdx][8] > REF_NOT_IN_LIST) { - nBS[1][3][0] = BS_EDGE (uiBsx4[0], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 12, 8); + if (iRefs[listIdx][12] && iRefs[listIdx][8]) { + nBS[1][3][0] = BS_EDGE (uiBsx4[0], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 12, 8); break; } } nBS[1][3][1] = 1; for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - if (iRefIndex[listIdx][13] > REF_NOT_IN_LIST && iRefIndex[listIdx][9] > REF_NOT_IN_LIST) { - nBS[1][3][1] = BS_EDGE (uiBsx4[1], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 13, 9); + if (iRefs[listIdx][13] && iRefs[listIdx][9]) { + nBS[1][3][1] = BS_EDGE (uiBsx4[1], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 13, 9); break; } } nBS[1][3][2] = 1; for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - if (iRefIndex[listIdx][14] > REF_NOT_IN_LIST && iRefIndex[listIdx][10] > REF_NOT_IN_LIST) { - nBS[1][3][2] = BS_EDGE (uiBsx4[2], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 14, 10); + if (iRefs[listIdx][14] && iRefs[listIdx][10]) { + nBS[1][3][2] = BS_EDGE (uiBsx4[2], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 14, 10); break; } } nBS[1][3][3] = 1; for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - if (iRefIndex[listIdx][15] > REF_NOT_IN_LIST && iRefIndex[listIdx][11] > REF_NOT_IN_LIST) { - nBS[1][3][3] = BS_EDGE (uiBsx4[3], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 15, 11); + if (iRefs[listIdx][15] && iRefs[listIdx][11]) { + nBS[1][3][3] = BS_EDGE (uiBsx4[3], iRefs[listIdx], pCurDqLayer->pDec->pMv[listIdx][iMbXy], 15, 11); break; } } @@ -537,7 +560,8 @@ void static inline DeblockingBSliceBSInsideMBNormal (PDqLayer pCurDqLayer, uint8 } -uint32_t DeblockingBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdge, int32_t iNeighMb, int32_t iMbXy) { +uint32_t DeblockingBsMarginalMBAvcbase (PDeblockingFilter pFilter, PDqLayer pCurDqLayer, int32_t iEdge, + int32_t iNeighMb, int32_t iMbXy) { int32_t i, j; uint32_t uiBSx4; uint8_t* pBS = (uint8_t*) (&uiBSx4); @@ -545,17 +569,23 @@ uint32_t DeblockingBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdge, int const uint8_t* pBnIdx = &g_kuiTableBIdx[iEdge][4]; const uint8_t* pB8x8Idx = &g_kuiTableB8x8Idx[iEdge][0]; const uint8_t* pBn8x8Idx = &g_kuiTableB8x8Idx[iEdge][8]; + int8_t (*iRefIdx)[MB_BLOCK4x4_NUM] = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pRefIndex[LIST_0] : + pCurDqLayer->pRefIndex[LIST_0]; if (pCurDqLayer->pTransformSize8x8Flag[iMbXy] && pCurDqLayer->pTransformSize8x8Flag[iNeighMb]) { for (i = 0; i < 2; i++) { uint8_t uiNzc = 0; for (j = 0; uiNzc == 0 && j < 4; j++) { - uiNzc |= (pCurDqLayer->pNzc[iMbXy][* (pB8x8Idx + j)] | pCurDqLayer->pNzc[iNeighMb][* (pBn8x8Idx + j)]); + uiNzc |= (GetPNzc (pCurDqLayer, iMbXy)[* (pB8x8Idx + j)] | GetPNzc (pCurDqLayer, iNeighMb)[* (pBn8x8Idx + j)]); } if (uiNzc) { pBS[i << 1] = pBS[1 + (i << 1)] = 2; } else { - pBS[i << 1] = pBS[1 + (i << 1)] = MB_BS_MV (pCurDqLayer->pRefIndex[LIST_0], pCurDqLayer->pMv[LIST_0], iMbXy, iNeighMb, + PPicture ref0, ref1; + ref0 = (iRefIdx[iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iMbXy][*pB8x8Idx]] : NULL; + ref1 = (iRefIdx[iNeighMb][*pBn8x8Idx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iNeighMb][*pBn8x8Idx]] : + NULL; + pBS[i << 1] = pBS[1 + (i << 1)] = MB_BS_MV (ref0, ref1, pCurDqLayer->pDec->pMv[LIST_0], iMbXy, iNeighMb, *pB8x8Idx, *pBn8x8Idx); } pB8x8Idx += 4; @@ -565,13 +595,17 @@ uint32_t DeblockingBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdge, int for (i = 0; i < 2; i++) { uint8_t uiNzc = 0; for (j = 0; uiNzc == 0 && j < 4; j++) { - uiNzc |= pCurDqLayer->pNzc[iMbXy][* (pB8x8Idx + j)]; + uiNzc |= GetPNzc (pCurDqLayer, iMbXy)[* (pB8x8Idx + j)]; } for (j = 0; j < 2; j++) { - if (uiNzc | pCurDqLayer->pNzc[iNeighMb][*pBnIdx]) { + if (uiNzc | GetPNzc (pCurDqLayer, iNeighMb)[*pBnIdx]) { pBS[j + (i << 1)] = 2; } else { - pBS[j + (i << 1)] = MB_BS_MV (pCurDqLayer->pRefIndex[LIST_0], pCurDqLayer->pMv[LIST_0], iMbXy, iNeighMb, *pB8x8Idx, + PPicture ref0, ref1; + ref0 = (iRefIdx[iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iMbXy][*pB8x8Idx]] : NULL; + ref1 = (iRefIdx[iNeighMb][*pBnIdx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iNeighMb][*pBnIdx]] : NULL; + pBS[j + (i << 1)] = MB_BS_MV (ref0, ref1, + (pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_0] : pCurDqLayer->pMv[LIST_0]), iMbXy, iNeighMb, *pB8x8Idx, *pBnIdx); } pBnIdx++; @@ -582,13 +616,18 @@ uint32_t DeblockingBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdge, int for (i = 0; i < 2; i++) { uint8_t uiNzc = 0; for (j = 0; uiNzc == 0 && j < 4; j++) { - uiNzc |= pCurDqLayer->pNzc[iNeighMb][* (pBn8x8Idx + j)]; + uiNzc |= GetPNzc (pCurDqLayer, iNeighMb)[* (pBn8x8Idx + j)]; } for (j = 0; j < 2; j++) { - if (uiNzc | pCurDqLayer->pNzc[iMbXy][*pBIdx]) { + if (uiNzc | GetPNzc (pCurDqLayer, iMbXy)[*pBIdx]) { pBS[j + (i << 1)] = 2; } else { - pBS[j + (i << 1)] = MB_BS_MV (pCurDqLayer->pRefIndex[LIST_0], pCurDqLayer->pMv[LIST_0], iMbXy, iNeighMb, *pBIdx, + PPicture ref0, ref1; + ref0 = (iRefIdx[iMbXy][*pBIdx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iMbXy][*pBIdx]] : NULL; + ref1 = (iRefIdx[iNeighMb][*pBn8x8Idx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iNeighMb][*pBn8x8Idx]] : + NULL; + pBS[j + (i << 1)] = MB_BS_MV (ref0, ref1, + (pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_0] : pCurDqLayer->pMv[LIST_0]), iMbXy, iNeighMb, *pBIdx, *pBn8x8Idx); } pBIdx++; @@ -598,11 +637,14 @@ uint32_t DeblockingBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdge, int } else { // only 4x4 transform for (i = 0; i < 4; i++) { - if (pCurDqLayer->pNzc[iMbXy][*pBIdx] | pCurDqLayer->pNzc[iNeighMb][*pBnIdx]) { + if (GetPNzc (pCurDqLayer, iMbXy)[*pBIdx] | GetPNzc (pCurDqLayer, iNeighMb)[*pBnIdx]) { pBS[i] = 2; } else { - pBS[i] = MB_BS_MV (pCurDqLayer->pRefIndex[LIST_0], pCurDqLayer->pMv[LIST_0], iMbXy, iNeighMb, *pBIdx, - *pBnIdx); + PPicture ref0, ref1; + ref0 = (iRefIdx[iMbXy][*pBIdx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iMbXy][*pBIdx]] : NULL; + ref1 = (iRefIdx[iNeighMb][*pBnIdx] > REF_NOT_IN_LIST) ? pFilter->pRefPics[LIST_0][iRefIdx[iNeighMb][*pBnIdx]] : NULL; + pBS[i] = MB_BS_MV (ref0, ref1, (pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMv[LIST_0] : pCurDqLayer->pMv[LIST_0]), + iMbXy, iNeighMb, *pBIdx, *pBnIdx); } pBIdx++; pBnIdx++; @@ -611,7 +653,8 @@ uint32_t DeblockingBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdge, int return uiBSx4; } -uint32_t DeblockingBSliceBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdge, int32_t iNeighMb, int32_t iMbXy) { +uint32_t DeblockingBSliceBsMarginalMBAvcbase (PDeblockingFilter pFilter, PDqLayer pCurDqLayer, int32_t iEdge, + int32_t iNeighMb, int32_t iMbXy) { int32_t i, j; uint32_t uiBSx4; uint8_t* pBS = (uint8_t*) (&uiBSx4); @@ -619,21 +662,25 @@ uint32_t DeblockingBSliceBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdg const uint8_t* pBnIdx = &g_kuiTableBIdx[iEdge][4]; const uint8_t* pB8x8Idx = &g_kuiTableB8x8Idx[iEdge][0]; const uint8_t* pBn8x8Idx = &g_kuiTableB8x8Idx[iEdge][8]; + PPicture ref0, ref1; if (pCurDqLayer->pTransformSize8x8Flag[iMbXy] && pCurDqLayer->pTransformSize8x8Flag[iNeighMb]) { for (i = 0; i < 2; i++) { uint8_t uiNzc = 0; for (j = 0; uiNzc == 0 && j < 4; j++) { - uiNzc |= (pCurDqLayer->pNzc[iMbXy][* (pB8x8Idx + j)] | pCurDqLayer->pNzc[iNeighMb][* (pBn8x8Idx + j)]); + uiNzc |= (GetPNzc (pCurDqLayer, iMbXy)[* (pB8x8Idx + j)] | GetPNzc (pCurDqLayer, iNeighMb)[* (pBn8x8Idx + j)]); } if (uiNzc) { pBS[i << 1] = pBS[1 + (i << 1)] = 2; } else { pBS[i << 1] = pBS[1 + (i << 1)] = 1; for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - if (pCurDqLayer->pRefIndex[listIdx][iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST - && pCurDqLayer->pRefIndex[listIdx][iMbXy][*pBn8x8Idx] > REF_NOT_IN_LIST) { - pBS[i << 1] = pBS[1 + (i << 1)] = MB_BS_MV (pCurDqLayer->pRefIndex[listIdx], pCurDqLayer->pMv[listIdx], iMbXy, iNeighMb, + if (pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST + && pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][*pBn8x8Idx] > REF_NOT_IN_LIST) { + int8_t (*iRefIdx)[MB_BLOCK4x4_NUM] = pCurDqLayer->pDec->pRefIndex[listIdx]; + ref0 = pFilter->pRefPics[listIdx][iRefIdx[iMbXy][*pB8x8Idx]]; + ref1 = pFilter->pRefPics[listIdx][iRefIdx[iNeighMb][*pBn8x8Idx]]; + pBS[i << 1] = pBS[1 + (i << 1)] = MB_BS_MV (ref0, ref1, pCurDqLayer->pDec->pMv[listIdx], iMbXy, iNeighMb, *pB8x8Idx, *pBn8x8Idx); break; } @@ -646,17 +693,20 @@ uint32_t DeblockingBSliceBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdg for (i = 0; i < 2; i++) { uint8_t uiNzc = 0; for (j = 0; uiNzc == 0 && j < 4; j++) { - uiNzc |= pCurDqLayer->pNzc[iMbXy][* (pB8x8Idx + j)]; + uiNzc |= GetPNzc (pCurDqLayer, iMbXy)[* (pB8x8Idx + j)]; } for (j = 0; j < 2; j++) { - if (uiNzc | pCurDqLayer->pNzc[iNeighMb][*pBnIdx]) { + if (uiNzc | GetPNzc (pCurDqLayer, iNeighMb)[*pBnIdx]) { pBS[j + (i << 1)] = 2; } else { pBS[j + (i << 1)] = 1; for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - if (pCurDqLayer->pRefIndex[listIdx][iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST - && pCurDqLayer->pRefIndex[listIdx][iMbXy][*pBnIdx] > REF_NOT_IN_LIST) { - pBS[j + (i << 1)] = MB_BS_MV (pCurDqLayer->pRefIndex[listIdx], pCurDqLayer->pMv[listIdx], iMbXy, iNeighMb, *pB8x8Idx, + if (pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST + && pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][*pBnIdx] > REF_NOT_IN_LIST) { + int8_t (*iRefIdx)[MB_BLOCK4x4_NUM] = pCurDqLayer->pDec->pRefIndex[listIdx]; + ref0 = pFilter->pRefPics[listIdx][iRefIdx[iMbXy][*pB8x8Idx]]; + ref1 = pFilter->pRefPics[listIdx][iRefIdx[iNeighMb][*pBnIdx]]; + pBS[j + (i << 1)] = MB_BS_MV (ref0, ref1, pCurDqLayer->pDec->pMv[listIdx], iMbXy, iNeighMb, *pB8x8Idx, *pBnIdx); break; } @@ -670,18 +720,20 @@ uint32_t DeblockingBSliceBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdg for (i = 0; i < 2; i++) { uint8_t uiNzc = 0; for (j = 0; uiNzc == 0 && j < 4; j++) { - uiNzc |= pCurDqLayer->pNzc[iNeighMb][* (pBn8x8Idx + j)]; + uiNzc |= GetPNzc (pCurDqLayer, iNeighMb)[* (pBn8x8Idx + j)]; } for (j = 0; j < 2; j++) { - if (uiNzc | pCurDqLayer->pNzc[iMbXy][*pBIdx]) { + if (uiNzc | GetPNzc (pCurDqLayer, iMbXy)[*pBIdx]) { pBS[j + (i << 1)] = 2; } else { pBS[j + (i << 1)] = 1; for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - if (pCurDqLayer->pRefIndex[listIdx][iMbXy][*pBIdx] > REF_NOT_IN_LIST - && pCurDqLayer->pRefIndex[listIdx][iMbXy][*pBn8x8Idx] > REF_NOT_IN_LIST) { - pBS[j + (i << 1)] = MB_BS_MV (pCurDqLayer->pRefIndex[listIdx], pCurDqLayer->pMv[listIdx], iMbXy, iNeighMb, *pBIdx, - *pBn8x8Idx); + if (pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][*pBIdx] > REF_NOT_IN_LIST + && pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][*pBn8x8Idx] > REF_NOT_IN_LIST) { + int8_t (*iRefIdx)[MB_BLOCK4x4_NUM] = pCurDqLayer->pDec->pRefIndex[listIdx]; + ref0 = pFilter->pRefPics[listIdx][iRefIdx[iMbXy][*pBIdx]]; + ref1 = pFilter->pRefPics[listIdx][iRefIdx[iNeighMb][*pBn8x8Idx]]; + pBS[j + (i << 1)] = MB_BS_MV (ref0, ref1, pCurDqLayer->pDec->pMv[listIdx], iMbXy, iNeighMb, *pBIdx, *pBn8x8Idx); break; } } @@ -693,14 +745,17 @@ uint32_t DeblockingBSliceBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdg } else { // only 4x4 transform for (i = 0; i < 4; i++) { - if (pCurDqLayer->pNzc[iMbXy][*pBIdx] | pCurDqLayer->pNzc[iNeighMb][*pBnIdx]) { + if (GetPNzc (pCurDqLayer, iMbXy)[*pBIdx] | GetPNzc (pCurDqLayer, iNeighMb)[*pBnIdx]) { pBS[i] = 2; } else { pBS[i] = 1; for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - if (pCurDqLayer->pRefIndex[listIdx][iMbXy][*pBIdx] > REF_NOT_IN_LIST - && pCurDqLayer->pRefIndex[listIdx][iMbXy][*pBnIdx] > REF_NOT_IN_LIST) { - pBS[i] = MB_BS_MV (pCurDqLayer->pRefIndex[listIdx], pCurDqLayer->pMv[listIdx], iMbXy, iNeighMb, *pBIdx, *pBnIdx); + if (pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][*pBIdx] > REF_NOT_IN_LIST + && pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][*pBnIdx] > REF_NOT_IN_LIST) { + int8_t (*iRefIdx)[MB_BLOCK4x4_NUM] = pCurDqLayer->pDec->pRefIndex[listIdx]; + ref0 = pFilter->pRefPics[listIdx][iRefIdx[iMbXy][*pBIdx]]; + ref1 = pFilter->pRefPics[listIdx][iRefIdx[iNeighMb][*pBnIdx]]; + pBS[i] = MB_BS_MV (ref0, ref1, pCurDqLayer->pDec->pMv[listIdx], iMbXy, iNeighMb, *pBIdx, *pBnIdx); break; } } @@ -926,8 +981,8 @@ void FilteringEdgeChromaIntraV (SDeblockingFilter* pFilter, uint8_t* pPixCb, uin } -void DeblockingInterMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, uint8_t nBS[2][4][4], - int32_t iBoundryFlag) { +static void DeblockingInterMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, uint8_t nBS[2][4][4], + int32_t iBoundryFlag) { int32_t iMbXyIndex = pCurDqLayer->iMbXyIndex; int32_t iMbX = pCurDqLayer->iMbX; int32_t iMbY = pCurDqLayer->iMbY; @@ -943,7 +998,7 @@ void DeblockingInterMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, uint8_ pDestCb = pFilter->pCsData[1] + ((iMbY * iLineSizeUV + iMbX) << 3); pDestCr = pFilter->pCsData[2] + ((iMbY * iLineSizeUV + iMbX) << 3); -//Vertical margrin + //Vertical margin if (iBoundryFlag & LEFT_FLAG_MASK) { int32_t iLeftXyIndex = iMbXyIndex - 1; pFilter->iLumaQP = (iCurLumaQp + pCurDqLayer->pLumaQp[iLeftXyIndex] + 1) >> 1; @@ -1170,7 +1225,7 @@ void FilteringEdgeChromaHV (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, in } // merge h&v lookup table operation to save performance -void DeblockingIntraMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int32_t iBoundryFlag) { +static void DeblockingIntraMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int32_t iBoundryFlag) { FilteringEdgeLumaHV (pCurDqLayer, pFilter, iBoundryFlag); FilteringEdgeChromaHV (pCurDqLayer, pFilter, iBoundryFlag); } @@ -1179,7 +1234,8 @@ void WelsDeblockingMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int32_t uint8_t nBS[2][4][4] = {{{ 0 }}}; int32_t iMbXyIndex = pCurDqLayer->iMbXyIndex; - uint32_t iCurMbType = pCurDqLayer->pMbType[iMbXyIndex]; + uint32_t iCurMbType = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMbType[iMbXyIndex] : + pCurDqLayer->pMbType[iMbXyIndex]; int32_t iMbNb; PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; @@ -1197,24 +1253,28 @@ void WelsDeblockingMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int32_t if (iBoundryFlag & LEFT_FLAG_MASK) { iMbNb = iMbXyIndex - 1; + uint32_t uiMbType = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMbType[iMbNb] : pCurDqLayer->pMbType[iMbNb]; if (bBSlice) { - * (uint32_t*)nBS[0][0] = IS_INTRA (pCurDqLayer->pMbType[iMbNb]) ? 0x04040404 : DeblockingBSliceBsMarginalMBAvcbase ( - pCurDqLayer, 0, iMbNb, iMbXyIndex); + * (uint32_t*)nBS[0][0] = IS_INTRA (uiMbType) ? 0x04040404 : + DeblockingBSliceBsMarginalMBAvcbase ( + pFilter, pCurDqLayer, 0, iMbNb, iMbXyIndex); } else { - * (uint32_t*)nBS[0][0] = IS_INTRA (pCurDqLayer->pMbType[iMbNb]) ? 0x04040404 : DeblockingBsMarginalMBAvcbase ( - pCurDqLayer, 0, iMbNb, iMbXyIndex); + * (uint32_t*)nBS[0][0] = IS_INTRA (uiMbType) ? 0x04040404 : DeblockingBsMarginalMBAvcbase ( + pFilter, pCurDqLayer, 0, iMbNb, iMbXyIndex); } } else { * (uint32_t*)nBS[0][0] = 0; } if (iBoundryFlag & TOP_FLAG_MASK) { iMbNb = iMbXyIndex - pCurDqLayer->iMbWidth; + uint32_t uiMbType = pCurDqLayer->pDec != NULL ? pCurDqLayer->pDec->pMbType[iMbNb] : pCurDqLayer->pMbType[iMbNb]; if (bBSlice) { - * (uint32_t*)nBS[1][0] = IS_INTRA (pCurDqLayer->pMbType[iMbNb]) ? 0x04040404 : DeblockingBSliceBsMarginalMBAvcbase ( - pCurDqLayer, 1, iMbNb, iMbXyIndex); + * (uint32_t*)nBS[1][0] = IS_INTRA (uiMbType) ? 0x04040404 : + DeblockingBSliceBsMarginalMBAvcbase ( + pFilter, pCurDqLayer, 1, iMbNb, iMbXyIndex); } else { - * (uint32_t*)nBS[1][0] = IS_INTRA (pCurDqLayer->pMbType[iMbNb]) ? 0x04040404 : DeblockingBsMarginalMBAvcbase ( - pCurDqLayer, 1, iMbNb, iMbXyIndex); + * (uint32_t*)nBS[1][0] = IS_INTRA (uiMbType) ? 0x04040404 : DeblockingBsMarginalMBAvcbase ( + pFilter, pCurDqLayer, 1, iMbNb, iMbXyIndex); } } else { * (uint32_t*)nBS[1][0] = 0; @@ -1226,16 +1286,16 @@ void WelsDeblockingMb (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int32_t } else { if (IS_INTER_16x16 (iCurMbType)) { if (!pCurDqLayer->pTransformSize8x8Flag[pCurDqLayer->iMbXyIndex]) { - DeblockingBSInsideMBAvsbase (pCurDqLayer->pNzc[iMbXyIndex], nBS, 1); + DeblockingBSInsideMBAvsbase (GetPNzc (pCurDqLayer, iMbXyIndex), nBS, 1); } else { - DeblockingBSInsideMBAvsbase8x8 (pCurDqLayer->pNzc[iMbXyIndex], nBS, 1); + DeblockingBSInsideMBAvsbase8x8 (GetPNzc (pCurDqLayer, iMbXyIndex), nBS, 1); } } else { if (bBSlice) { - DeblockingBSliceBSInsideMBNormal (pCurDqLayer, nBS, pCurDqLayer->pNzc[iMbXyIndex], iMbXyIndex); + DeblockingBSliceBSInsideMBNormal (pFilter, pCurDqLayer, nBS, GetPNzc (pCurDqLayer, iMbXyIndex), iMbXyIndex); } else { - DeblockingBSInsideMBNormal (pCurDqLayer, nBS, pCurDqLayer->pNzc[iMbXyIndex], iMbXyIndex); + DeblockingBSInsideMBNormal (pFilter, pCurDqLayer, nBS, GetPNzc (pCurDqLayer, iMbXyIndex), iMbXyIndex); } } } @@ -1280,6 +1340,8 @@ void WelsDeblockingFilterSlice (PWelsDecoderContext pCtx, PDeblockingFilterMbFun pFilter.iSliceBetaOffset = pSliceHeaderExt->sSliceHeader.iSliceBetaOffset; pFilter.pLoopf = &pCtx->sDeblockingFunc; + pFilter.pRefPics[0] = pCtx->sRefPic.pRefList[0]; + pFilter.pRefPics[1] = pCtx->sRefPic.pRefList[1]; /* Step2: macroblock deblocking */ if (0 == iFilterIdc || 2 == iFilterIdc) { @@ -1313,6 +1375,56 @@ void WelsDeblockingFilterSlice (PWelsDecoderContext pCtx, PDeblockingFilterMbFun } while (1); } } + +/*! +* \brief AVC slice init deblocking filtering target layer +* +* \in and out param SDeblockingFilter +* \in and out param iFilterIdc +* +* \return NONE +*/ +void WelsDeblockingInitFilter (PWelsDecoderContext pCtx, SDeblockingFilter& pFilter, int32_t& iFilterIdc) { + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + PSliceHeaderExt pSliceHeaderExt = &pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt; + + memset (&pFilter, 0, sizeof (pFilter)); + + iFilterIdc = pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.uiDisableDeblockingFilterIdc; + + /* Step1: parameters set */ + pFilter.pCsData[0] = pCtx->pDec->pData[0]; + pFilter.pCsData[1] = pCtx->pDec->pData[1]; + pFilter.pCsData[2] = pCtx->pDec->pData[2]; + + pFilter.iCsStride[0] = pCtx->pDec->iLinesize[0]; + pFilter.iCsStride[1] = pCtx->pDec->iLinesize[1]; + + pFilter.eSliceType = (EWelsSliceType)pCurDqLayer->sLayerInfo.sSliceInLayer.eSliceType; + + pFilter.iSliceAlphaC0Offset = pSliceHeaderExt->sSliceHeader.iSliceAlphaC0Offset; + pFilter.iSliceBetaOffset = pSliceHeaderExt->sSliceHeader.iSliceBetaOffset; + + pFilter.pLoopf = &pCtx->sDeblockingFunc; + pFilter.pRefPics[0] = pCtx->sRefPic.pRefList[0]; + pFilter.pRefPics[1] = pCtx->sRefPic.pRefList[1]; +} + +/*! +* \brief AVC MB deblocking filtering target layer +* +* \param DqLayer which has the current location of MB to be deblocked. +* +* \return NONE +*/ +void WelsDeblockingFilterMB (PDqLayer pCurDqLayer, SDeblockingFilter& pFilter, int32_t& iFilterIdc, + PDeblockingFilterMbFunc pDeblockMb) { + /* macroblock deblocking */ + if (0 == iFilterIdc || 2 == iFilterIdc) { + int32_t iBoundryFlag = DeblockingAvailableNoInterlayer (pCurDqLayer, iFilterIdc); + pDeblockMb (pCurDqLayer, &pFilter, iBoundryFlag); + } +} /*! * \brief deblocking module initialize * @@ -1391,6 +1503,19 @@ void DeblockingInit (SDeblockingFunc* pFunc, int32_t iCpu) { pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_mmi; } #endif//HAVE_MMI + +#if defined(HAVE_MSA) + if (iCpu & WELS_CPU_MSA) { + pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_msa; + pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_msa; + pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_msa; + pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_msa; + pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_msa; + pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_msa; + pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_msa; + pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_msa; + } +#endif//HAVE_MSA } } // namespace WelsDec diff --git a/chromium/third_party/openh264/src/codec/decoder/core/src/decode_slice.cpp b/chromium/third_party/openh264/src/codec/decoder/core/src/decode_slice.cpp index 14935587275..d06a7d77f12 100644 --- a/chromium/third_party/openh264/src/codec/decoder/core/src/decode_slice.cpp +++ b/chromium/third_party/openh264/src/codec/decoder/core/src/decode_slice.cpp @@ -54,45 +54,61 @@ namespace WelsDec { extern void FreePicture (PPicture pPic, CMemoryAlign* pMa); -static inline int32_t iAbs (int32_t x) { - static const int32_t INT_BITS = (sizeof (int) * CHAR_BIT) - 1; - int32_t y = x >> INT_BITS; - return (x ^ y) - y; -} - extern PPicture AllocPicture (PWelsDecoderContext pCtx, const int32_t kiPicWidth, const int32_t kiPicHeight); +static bool CheckRefPics (const PWelsDecoderContext& pCtx) { + int32_t listCount = 1; + if (pCtx->eSliceType == B_SLICE) { + ++listCount; + } + for (int32_t list = LIST_0; list < listCount; ++list) { + int32_t shortRefCount = pCtx->sRefPic.uiShortRefCount[list]; + for (int32_t refIdx = 0; refIdx < shortRefCount; ++refIdx) { + if (!pCtx->sRefPic.pShortRefList[list][refIdx]) { + return false; + } + } + int32_t longRefCount = pCtx->sRefPic.uiLongRefCount[list]; + for (int32_t refIdx = 0; refIdx < longRefCount; ++refIdx) { + if (!pCtx->sRefPic.pLongRefList[list][refIdx]) { + return false; + } + } + } + return true; +} + int32_t WelsTargetSliceConstruction (PWelsDecoderContext pCtx) { - PDqLayer pCurLayer = pCtx->pCurDqLayer; - PSlice pCurSlice = &pCurLayer->sLayerInfo.sSliceInLayer; + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + PSlice pCurSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; PSliceHeader pSliceHeader = &pCurSlice->sSliceHeaderExt.sSliceHeader; int32_t iTotalMbTargetLayer = pSliceHeader->pSps->uiTotalMbCount; - int32_t iCurLayerWidth = pCurLayer->iMbWidth << 4; - int32_t iCurLayerHeight = pCurLayer->iMbHeight << 4; + int32_t iCurLayerWidth = pCurDqLayer->iMbWidth << 4; + int32_t iCurLayerHeight = pCurDqLayer->iMbHeight << 4; int32_t iNextMbXyIndex = 0; PFmo pFmo = pCtx->pFmo; int32_t iTotalNumMb = pCurSlice->iTotalMbInCurSlice; int32_t iCountNumMb = 0; - PDeblockingFilterMbFunc pDeblockMb; + PDeblockingFilterMbFunc pDeblockMb = WelsDeblockingMb; - if (!pCtx->bAvcBasedFlag && iCurLayerWidth != pCtx->iCurSeqIntervalMaxPicWidth) { + if (!pCtx->sSpsPpsCtx.bAvcBasedFlag && iCurLayerWidth != pCtx->iCurSeqIntervalMaxPicWidth) { return ERR_INFO_WIDTH_MISMATCH; } iNextMbXyIndex = pSliceHeader->iFirstMbInSlice; - pCurLayer->iMbX = iNextMbXyIndex % pCurLayer->iMbWidth; - pCurLayer->iMbY = iNextMbXyIndex / pCurLayer->iMbWidth; - pCurLayer->iMbXyIndex = iNextMbXyIndex; + pCurDqLayer->iMbX = iNextMbXyIndex % pCurDqLayer->iMbWidth; + pCurDqLayer->iMbY = iNextMbXyIndex / pCurDqLayer->iMbWidth; + pCurDqLayer->iMbXyIndex = iNextMbXyIndex; if (0 == iNextMbXyIndex) { - pCurLayer->pDec->iSpsId = pCtx->pSps->iSpsId; - pCurLayer->pDec->iPpsId = pCtx->pPps->iPpsId; + pCurDqLayer->pDec->iSpsId = pCtx->pSps->iSpsId; + pCurDqLayer->pDec->iPpsId = pCtx->pPps->iPpsId; - pCurLayer->pDec->uiQualityId = pCurLayer->sLayerInfo.sNalHeaderExt.uiQualityId; + pCurDqLayer->pDec->uiQualityId = pCurDqLayer->sLayerInfo.sNalHeaderExt.uiQualityId; } do { @@ -104,16 +120,16 @@ int32_t WelsTargetSliceConstruction (PWelsDecoderContext pCtx) { if (WelsTargetMbConstruction (pCtx)) { WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "WelsTargetSliceConstruction():::MB(%d, %d) construction error. pCurSlice_type:%d", - pCurLayer->iMbX, pCurLayer->iMbY, pCurSlice->eSliceType); + pCurDqLayer->iMbX, pCurDqLayer->iMbY, pCurSlice->eSliceType); return ERR_INFO_MB_RECON_FAIL; } } ++iCountNumMb; - if (!pCurLayer->pMbCorrectlyDecodedFlag[iNextMbXyIndex]) { //already con-ed, overwrite - pCurLayer->pMbCorrectlyDecodedFlag[iNextMbXyIndex] = true; - pCtx->pDec->iMbEcedPropNum += (pCurLayer->pMbRefConcealedFlag[iNextMbXyIndex] ? 1 : 0); + if (!pCurDqLayer->pMbCorrectlyDecodedFlag[iNextMbXyIndex]) { //already con-ed, overwrite + pCurDqLayer->pMbCorrectlyDecodedFlag[iNextMbXyIndex] = true; + pCtx->pDec->iMbEcedPropNum += (pCurDqLayer->pMbRefConcealedFlag[iNextMbXyIndex] ? 1 : 0); ++pCtx->iTotalNumMbRec; } @@ -133,9 +149,9 @@ int32_t WelsTargetSliceConstruction (PWelsDecoderContext pCtx) { if (-1 == iNextMbXyIndex || iNextMbXyIndex >= iTotalMbTargetLayer) { // slice group boundary or end of a frame break; } - pCurLayer->iMbX = iNextMbXyIndex % pCurLayer->iMbWidth; - pCurLayer->iMbY = iNextMbXyIndex / pCurLayer->iMbWidth; - pCurLayer->iMbXyIndex = iNextMbXyIndex; + pCurDqLayer->iMbX = iNextMbXyIndex % pCurDqLayer->iMbWidth; + pCurDqLayer->iMbY = iNextMbXyIndex / pCurDqLayer->iMbWidth; + pCurDqLayer->iMbXyIndex = iNextMbXyIndex; } while (1); pCtx->pDec->iWidthInPixel = iCurLayerWidth; @@ -147,8 +163,6 @@ int32_t WelsTargetSliceConstruction (PWelsDecoderContext pCtx) { if (pCtx->pParam->bParseOnly) //for parse only, deblocking should not go on return ERR_NONE; - pDeblockMb = WelsDeblockingMb; - if (1 == pSliceHeader->uiDisableDeblockingFilterIdc || pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.iTotalMbInCurSlice <= 0) { return ERR_NONE;//NO_SUPPORTED_FILTER_IDX @@ -160,32 +174,32 @@ int32_t WelsTargetSliceConstruction (PWelsDecoderContext pCtx) { return ERR_NONE; } -int32_t WelsMbInterSampleConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer, +int32_t WelsMbInterSampleConstruction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer, uint8_t* pDstY, uint8_t* pDstU, uint8_t* pDstV, int32_t iStrideL, int32_t iStrideC) { - int32_t iMbXy = pCurLayer->iMbXyIndex; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; int32_t i, iIndex, iOffset; - if (pCurLayer->pTransformSize8x8Flag[iMbXy]) { + if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { for (i = 0; i < 4; i++) { iIndex = g_kuiMbCountScan4Idx[i << 2]; - if (pCurLayer->pNzc[iMbXy][iIndex] || pCurLayer->pNzc[iMbXy][iIndex + 1] || pCurLayer->pNzc[iMbXy][iIndex + 4] - || pCurLayer->pNzc[iMbXy][iIndex + 5]) { + if (pCurDqLayer->pNzc[iMbXy][iIndex] || pCurDqLayer->pNzc[iMbXy][iIndex + 1] || pCurDqLayer->pNzc[iMbXy][iIndex + 4] + || pCurDqLayer->pNzc[iMbXy][iIndex + 5]) { iOffset = ((iIndex >> 2) << 2) * iStrideL + ((iIndex % 4) << 2); - pCtx->pIdctResAddPredFunc8x8 (pDstY + iOffset, iStrideL, pCurLayer->pScaledTCoeff[iMbXy] + (i << 6)); + pCtx->pIdctResAddPredFunc8x8 (pDstY + iOffset, iStrideL, pCurDqLayer->pScaledTCoeff[iMbXy] + (i << 6)); } } } else { // luma. - const int8_t* pNzc = pCurLayer->pNzc[iMbXy]; - int16_t* pScaledTCoeff = pCurLayer->pScaledTCoeff[iMbXy]; + const int8_t* pNzc = pCurDqLayer->pNzc[iMbXy]; + int16_t* pScaledTCoeff = pCurDqLayer->pScaledTCoeff[iMbXy]; pCtx->pIdctFourResAddPredFunc (pDstY + 0 * iStrideL + 0, iStrideL, pScaledTCoeff + 0 * 64, pNzc + 0); pCtx->pIdctFourResAddPredFunc (pDstY + 0 * iStrideL + 8, iStrideL, pScaledTCoeff + 1 * 64, pNzc + 2); pCtx->pIdctFourResAddPredFunc (pDstY + 8 * iStrideL + 0, iStrideL, pScaledTCoeff + 2 * 64, pNzc + 8); pCtx->pIdctFourResAddPredFunc (pDstY + 8 * iStrideL + 8, iStrideL, pScaledTCoeff + 3 * 64, pNzc + 10); } - const int8_t* pNzc = pCurLayer->pNzc[iMbXy]; - int16_t* pScaledTCoeff = pCurLayer->pScaledTCoeff[iMbXy]; + const int8_t* pNzc = pCurDqLayer->pNzc[iMbXy]; + int16_t* pScaledTCoeff = pCurDqLayer->pScaledTCoeff[iMbXy]; // Cb. pCtx->pIdctFourResAddPredFunc (pDstU, iStrideC, pScaledTCoeff + 4 * 64, pNzc + 16); // Cr. @@ -193,29 +207,23 @@ int32_t WelsMbInterSampleConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLa return ERR_NONE; } -int32_t WelsMbInterConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer) { - int32_t iMbX = pCurLayer->iMbX; - int32_t iMbY = pCurLayer->iMbY; +int32_t WelsMbInterConstruction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer) { + int32_t iMbX = pCurDqLayer->iMbX; + int32_t iMbY = pCurDqLayer->iMbY; uint8_t* pDstY, *pDstCb, *pDstCr; int32_t iLumaStride = pCtx->pDec->iLinesize[0]; int32_t iChromaStride = pCtx->pDec->iLinesize[1]; - pDstY = pCurLayer->pDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4); - pDstCb = pCurLayer->pDec->pData[1] + ((iMbY * iChromaStride + iMbX) << 3); - pDstCr = pCurLayer->pDec->pData[2] + ((iMbY * iChromaStride + iMbX) << 3); + pDstY = pCurDqLayer->pDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4); + pDstCb = pCurDqLayer->pDec->pData[1] + ((iMbY * iChromaStride + iMbX) << 3); + pDstCr = pCurDqLayer->pDec->pData[2] + ((iMbY * iChromaStride + iMbX) << 3); if (pCtx->eSliceType == P_SLICE) { - GetInterPred (pDstY, pDstCb, pDstCr, pCtx); + WELS_B_MB_REC_VERIFY (GetInterPred (pDstY, pDstCb, pDstCr, pCtx)); } else { if (pCtx->pTempDec == NULL) pCtx->pTempDec = AllocPicture (pCtx, pCtx->pSps->iMbWidth << 4, pCtx->pSps->iMbHeight << 4); - else { - if (pCtx->pTempDec->iLinesize[0] != pCtx->pDec->iLinesize[0]) { - FreePicture (pCtx->pTempDec, pCtx->pMemAlign); - pCtx->pTempDec = AllocPicture (pCtx, pCtx->pSps->iMbWidth << 4, pCtx->pSps->iMbHeight << 4); - } - } uint8_t* pTempDstYCbCr[3]; uint8_t* pDstYCbCr[3]; pTempDstYCbCr[0] = pCtx->pTempDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4); @@ -226,10 +234,12 @@ int32_t WelsMbInterConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer) { pDstYCbCr[2] = pDstCr; WELS_B_MB_REC_VERIFY (GetInterBPred (pDstYCbCr, pTempDstYCbCr, pCtx)); } - WelsMbInterSampleConstruction (pCtx, pCurLayer, pDstY, pDstCb, pDstCr, iLumaStride, iChromaStride); + WelsMbInterSampleConstruction (pCtx, pCurDqLayer, pDstY, pDstCb, pDstCr, iLumaStride, iChromaStride); - pCtx->sBlockFunc.pWelsSetNonZeroCountFunc ( - pCurLayer->pNzc[pCurLayer->iMbXyIndex]); // set all none-zero nzc to 1; dbk can be opti! + if (GetThreadCount (pCtx) <= 1) { + pCtx->sBlockFunc.pWelsSetNonZeroCountFunc ( + pCurDqLayer->pNzc[pCurDqLayer->iMbXyIndex]); // set all none-zero nzc to 1; dbk can be opti! + } return ERR_NONE; } @@ -275,36 +285,36 @@ void WelsLumaDcDequantIdct (int16_t* pBlock, int32_t iQp, PWelsDecoderContext pC #undef STRIDE } -int32_t WelsMbIntraPredictionConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer, bool bOutput) { +int32_t WelsMbIntraPredictionConstruction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer, bool bOutput) { //seems IPCM should not enter this path - int32_t iMbXy = pCurLayer->iMbXyIndex; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; - WelsFillRecNeededMbInfo (pCtx, bOutput, pCurLayer); + WelsFillRecNeededMbInfo (pCtx, bOutput, pCurDqLayer); - if (IS_INTRA16x16 (pCurLayer->pMbType[iMbXy])) { - RecI16x16Mb (iMbXy, pCtx, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer); - } else if (IS_INTRA8x8 (pCurLayer->pMbType[iMbXy])) { - RecI8x8Mb (iMbXy, pCtx, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer); - } else if (IS_INTRA4x4 (pCurLayer->pMbType[iMbXy])) { - RecI4x4Mb (iMbXy, pCtx, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer); + if (IS_INTRA16x16 (pCurDqLayer->pDec->pMbType[iMbXy])) { + RecI16x16Mb (iMbXy, pCtx, pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer); + } else if (IS_INTRA8x8 (pCurDqLayer->pDec->pMbType[iMbXy])) { + RecI8x8Mb (iMbXy, pCtx, pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer); + } else if (IS_INTRA4x4 (pCurDqLayer->pDec->pMbType[iMbXy])) { + RecI4x4Mb (iMbXy, pCtx, pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer); } return ERR_NONE; } -int32_t WelsMbInterPrediction (PWelsDecoderContext pCtx, PDqLayer pCurLayer) { - int32_t iMbX = pCurLayer->iMbX; - int32_t iMbY = pCurLayer->iMbY; +int32_t WelsMbInterPrediction (PWelsDecoderContext pCtx, PDqLayer pCurDqLayer) { + int32_t iMbX = pCurDqLayer->iMbX; + int32_t iMbY = pCurDqLayer->iMbY; uint8_t* pDstY, *pDstCb, *pDstCr; int32_t iLumaStride = pCtx->pDec->iLinesize[0]; int32_t iChromaStride = pCtx->pDec->iLinesize[1]; - pDstY = pCurLayer->pDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4); - pDstCb = pCurLayer->pDec->pData[1] + ((iMbY * iChromaStride + iMbX) << 3); - pDstCr = pCurLayer->pDec->pData[2] + ((iMbY * iChromaStride + iMbX) << 3); + pDstY = pCurDqLayer->pDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4); + pDstCb = pCurDqLayer->pDec->pData[1] + ((iMbY * iChromaStride + iMbX) << 3); + pDstCr = pCurDqLayer->pDec->pData[2] + ((iMbY * iChromaStride + iMbX) << 3); if (pCtx->eSliceType == P_SLICE) { - GetInterPred (pDstY, pDstCb, pDstCr, pCtx); + WELS_B_MB_REC_VERIFY (GetInterPred (pDstY, pDstCb, pDstCr, pCtx)); } else { if (pCtx->pTempDec == NULL) pCtx->pTempDec = AllocPicture (pCtx, pCtx->pSps->iMbWidth << 4, pCtx->pSps->iMbHeight << 4); @@ -316,27 +326,30 @@ int32_t WelsMbInterPrediction (PWelsDecoderContext pCtx, PDqLayer pCurLayer) { pDstYCbCr[0] = pDstY; pDstYCbCr[1] = pDstCb; pDstYCbCr[2] = pDstCr; - GetInterBPred (pDstYCbCr, pTempDstYCbCr, pCtx); + WELS_B_MB_REC_VERIFY (GetInterBPred (pDstYCbCr, pTempDstYCbCr, pCtx)); } return ERR_NONE; } int32_t WelsTargetMbConstruction (PWelsDecoderContext pCtx) { - PDqLayer pCurLayer = pCtx->pCurDqLayer; - if (MB_TYPE_INTRA_PCM == pCurLayer->pMbType[pCurLayer->iMbXyIndex]) { + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + if (MB_TYPE_INTRA_PCM == pCurDqLayer->pDec->pMbType[pCurDqLayer->iMbXyIndex]) { //already decoded and reconstructed when parsing return ERR_NONE; - } else if (IS_INTRA (pCurLayer->pMbType[pCurLayer->iMbXyIndex])) { - WelsMbIntraPredictionConstruction (pCtx, pCurLayer, 1); - } else if (IS_INTER (pCurLayer->pMbType[pCurLayer->iMbXyIndex])) { //InterMB - if (0 == pCurLayer->pCbp[pCurLayer->iMbXyIndex]) { //uiCbp==0 include SKIP - WelsMbInterPrediction (pCtx, pCurLayer); + } else if (IS_INTRA (pCurDqLayer->pDec->pMbType[pCurDqLayer->iMbXyIndex])) { + WelsMbIntraPredictionConstruction (pCtx, pCurDqLayer, 1); + } else if (IS_INTER (pCurDqLayer->pDec->pMbType[pCurDqLayer->iMbXyIndex])) { //InterMB + if (0 == pCurDqLayer->pCbp[pCurDqLayer->iMbXyIndex]) { //uiCbp==0 include SKIP + if (!CheckRefPics (pCtx)) { + return ERR_INFO_MB_RECON_FAIL; + } + return WelsMbInterPrediction (pCtx, pCurDqLayer); } else { - WelsMbInterConstruction (pCtx, pCurLayer); + WelsMbInterConstruction (pCtx, pCurDqLayer); } } else { WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "WelsTargetMbConstruction():::::Unknown MB type: %d", - pCurLayer->pMbType[pCurLayer->iMbXyIndex]); + pCurDqLayer->pDec->pMbType[pCurDqLayer->iMbXyIndex]); return ERR_INFO_MB_RECON_FAIL; } @@ -631,25 +644,25 @@ int32_t ParseIntra16x16Mode (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAva } int32_t WelsDecodeMbCabacISliceBaseMode0 (PWelsDecoderContext pCtx, uint32_t& uiEosFlag) { - PDqLayer pCurLayer = pCtx->pCurDqLayer; - PBitStringAux pBsAux = pCurLayer->pBitStringAux; - PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer; + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + PBitStringAux pBsAux = pCurDqLayer->pBitStringAux; + PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; SWelsNeighAvail sNeighAvail; int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart; int32_t iScanIdxEnd = pSlice->sSliceHeaderExt.uiScanIdxEnd; - int32_t iMbXy = pCurLayer->iMbXyIndex; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; int32_t i; uint32_t uiMbType = 0, uiCbp = 0, uiCbpLuma = 0, uiCbpChroma = 0; ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16); - pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true; - pCurLayer->pTransformSize8x8Flag[iMbXy] = false; + pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true; + pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false; - pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0; - pCurLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag; - GetNeighborAvailMbType (&sNeighAvail, pCurLayer); + pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0; + pCurDqLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag; + GetNeighborAvailMbType (&sNeighAvail, pCurDqLayer); WELS_READ_VERIFY (ParseMBTypeISliceCabac (pCtx, &sNeighAvail, uiMbType)); if (uiMbType > 25) { return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE); @@ -667,104 +680,104 @@ int32_t WelsDecodeMbCabacISliceBaseMode0 (PWelsDecoderContext pCtx, uint32_t& ui return ERR_NONE; } else if (0 == uiMbType) { //I4x4 ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16); - pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4; + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA4x4; if (pCtx->pPps->bTransform8x8ModeFlag) { // Transform 8x8 cabac will be added soon WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, &sNeighAvail, pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy])); } if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { - uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8; - pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer); - WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBsAux, pCurLayer)); + uiMbType = pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA8x8; + pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer); + WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBsAux, pCurDqLayer)); } else { - pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer); - WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBsAux, pCurLayer)); + pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer); + WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBsAux, pCurDqLayer)); } //get uiCbp for I4x4 WELS_READ_VERIFY (ParseCbpInfoCabac (pCtx, &sNeighAvail, uiCbp)); - pCurLayer->pCbp[iMbXy] = uiCbp; + pCurDqLayer->pCbp[iMbXy] = uiCbp; pSlice->iLastDeltaQp = uiCbp == 0 ? 0 : pSlice->iLastDeltaQp; uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? uiCbp >> 4 : 0; uiCbpLuma = uiCbp & 15; } else { //I16x16; - pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16; - pCurLayer->pTransformSize8x8Flag[iMbXy] = false; - pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true; - pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3; - pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2]; - uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0 ; - uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15; - WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurLayer); - WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, &sNeighAvail, pBsAux, pCurLayer)); - } - - ST32 (&pCurLayer->pNzc[iMbXy][0], 0); - ST32 (&pCurLayer->pNzc[iMbXy][4], 0); - ST32 (&pCurLayer->pNzc[iMbXy][8], 0); - ST32 (&pCurLayer->pNzc[iMbXy][12], 0); - ST32 (&pCurLayer->pNzc[iMbXy][16], 0); - ST32 (&pCurLayer->pNzc[iMbXy][20], 0); - pCurLayer->pCbfDc[iMbXy] = 0; - - if (pCurLayer->pCbp[iMbXy] == 0 && IS_INTRANxN (pCurLayer->pMbType[iMbXy])) { - pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA16x16; + pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false; + pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true; + pCurDqLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3; + pCurDqLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2]; + uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurDqLayer->pCbp[iMbXy] >> 4 : 0 ; + uiCbpLuma = pCurDqLayer->pCbp[iMbXy] & 15; + WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurDqLayer); + WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, &sNeighAvail, pBsAux, pCurDqLayer)); + } + + ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][16], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][20], 0); + pCurDqLayer->pCbfDc[iMbXy] = 0; + + if (pCurDqLayer->pCbp[iMbXy] == 0 && IS_INTRANxN (pCurDqLayer->pDec->pMbType[iMbXy])) { + pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; for (i = 0; i < 2; i++) { - pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 ((pCurLayer->pLumaQp[iMbXy] + - pSliceHeader->pPps->iChromaQpIndexOffset[i]), 0, 51)]; + pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 ((pCurDqLayer->pLumaQp[iMbXy] + + pSliceHeader->pPps->iChromaQpIndexOffset[i]), 0, 51)]; } } - if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) { - memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0])); + if (pCurDqLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) { + memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurDqLayer->pScaledTCoeff[iMbXy][0])); int32_t iQpDelta, iId8x8, iId4x4; WELS_READ_VERIFY (ParseDeltaQpCabac (pCtx, iQpDelta)); if (iQpDelta > 25 || iQpDelta < -26) {//out of iQpDelta range return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP); } - pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp - pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy]; + pCurDqLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp + pSlice->iLastMbQp = pCurDqLayer->pLumaQp[iMbXy]; for (i = 0; i < 2; i++) { - pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 ((pSlice->iLastMbQp + - pSliceHeader->pPps->iChromaQpIndexOffset[i]), 0, 51)]; + pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 ((pSlice->iLastMbQp + + pSliceHeader->pPps->iChromaQpIndexOffset[i]), 0, 51)]; } - if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) { + if (MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) { //step1: Luma DC WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, 0, 16, g_kuiLumaDcZigzagScan, - I16_LUMA_DC, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx)); + I16_LUMA_DC, pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer->pLumaQp[iMbXy], pCtx)); //step2: Luma AC if (uiCbpLuma) { for (i = 0; i < 16; i++) { WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, i, iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, - pCurLayer->pScaledTCoeff[iMbXy] + (i << 4), pCurLayer->pLumaQp[iMbXy], pCtx)); + pCurDqLayer->pScaledTCoeff[iMbXy] + (i << 4), pCurDqLayer->pLumaQp[iMbXy], pCtx)); } - ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1])); - ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2])); - ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3])); - ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4])); + ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1])); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2])); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3])); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4])); } else { //pNonZeroCount = 0 - ST32 (&pCurLayer->pNzc[iMbXy][0], 0); - ST32 (&pCurLayer->pNzc[iMbXy][4], 0); - ST32 (&pCurLayer->pNzc[iMbXy][8], 0); - ST32 (&pCurLayer->pNzc[iMbXy][12], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0); } } else { //non-MB_TYPE_INTRA16x16 - if (pCurLayer->pTransformSize8x8Flag[iMbXy]) { + if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { // Transform 8x8 support for CABAC for (iId8x8 = 0; iId8x8 < 4; iId8x8++) { if (uiCbpLuma & (1 << iId8x8)) { WELS_READ_VERIFY (ParseResidualBlockCabac8x8 (&sNeighAvail, pNonZeroCount, pBsAux, (iId8x8 << 2), iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan8x8 + iScanIdxStart, LUMA_DC_AC_INTRA_8, - pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), pCurLayer->pLumaQp[iMbXy], pCtx)); + pCurDqLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), pCurDqLayer->pLumaQp[iMbXy], pCtx)); } else { ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0); ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0); } } - ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1])); - ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2])); - ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3])); - ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4])); + ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1])); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2])); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3])); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4])); } else { for (iId8x8 = 0; iId8x8 < 4; iId8x8++) { if (uiCbpLuma & (1 << iId8x8)) { @@ -772,8 +785,8 @@ int32_t WelsDecodeMbCabacISliceBaseMode0 (PWelsDecoderContext pCtx, uint32_t& ui for (iId4x4 = 0; iId4x4 < 4; iId4x4++) { //Luma (DC and AC decoding together) WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1, - g_kuiZigzagScan + iScanIdxStart, LUMA_DC_AC_INTRA, pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4), - pCurLayer->pLumaQp[iMbXy], pCtx)); + g_kuiZigzagScan + iScanIdxStart, LUMA_DC_AC_INTRA, pCurDqLayer->pScaledTCoeff[iMbXy] + (iIdx << 4), + pCurDqLayer->pLumaQp[iMbXy], pCtx)); iIdx++; } } else { @@ -781,10 +794,10 @@ int32_t WelsDecodeMbCabacISliceBaseMode0 (PWelsDecoderContext pCtx, uint32_t& ui ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0); } } - ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1])); - ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2])); - ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3])); - ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4])); + ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1])); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2])); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3])); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4])); } } int32_t iMbResProperty; @@ -795,7 +808,7 @@ int32_t WelsDecodeMbCabacISliceBaseMode0 (PWelsDecoderContext pCtx, uint32_t& ui for (i = 0; i < 2; i++) { iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U; WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, 16 + (i << 2), 4, g_kuiChromaDcScan, - iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurLayer->pChromaQp[iMbXy][i], pCtx)); + iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurDqLayer->pChromaQp[iMbXy][i], pCtx)); } } @@ -807,27 +820,27 @@ int32_t WelsDecodeMbCabacISliceBaseMode0 (PWelsDecoderContext pCtx, uint32_t& ui for (iId4x4 = 0; iId4x4 < 4; iId4x4++) { WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iMbResProperty, - pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4), pCurLayer->pChromaQp[iMbXy][i], pCtx)); + pCurDqLayer->pScaledTCoeff[iMbXy] + (iIdx << 4), pCurDqLayer->pChromaQp[iMbXy][i], pCtx)); iIdx++; } } - ST16 (&pCurLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1])); - ST16 (&pCurLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2])); - ST16 (&pCurLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4])); - ST16 (&pCurLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5])); + ST16 (&pCurDqLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1])); + ST16 (&pCurDqLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2])); + ST16 (&pCurDqLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4])); + ST16 (&pCurDqLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5])); } else { - ST16 (&pCurLayer->pNzc[iMbXy][16], 0); - ST16 (&pCurLayer->pNzc[iMbXy][20], 0); - ST16 (&pCurLayer->pNzc[iMbXy][18], 0); - ST16 (&pCurLayer->pNzc[iMbXy][22], 0); + ST16 (&pCurDqLayer->pNzc[iMbXy][16], 0); + ST16 (&pCurDqLayer->pNzc[iMbXy][20], 0); + ST16 (&pCurDqLayer->pNzc[iMbXy][18], 0); + ST16 (&pCurDqLayer->pNzc[iMbXy][22], 0); } } else { - ST32 (&pCurLayer->pNzc[iMbXy][0], 0); - ST32 (&pCurLayer->pNzc[iMbXy][4], 0); - ST32 (&pCurLayer->pNzc[iMbXy][8], 0); - ST32 (&pCurLayer->pNzc[iMbXy][12], 0); - ST32 (&pCurLayer->pNzc[iMbXy][16], 0); - ST32 (&pCurLayer->pNzc[iMbXy][20], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][16], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][20], 0); } WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag)); @@ -843,21 +856,21 @@ int32_t WelsDecodeMbCabacISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uin } int32_t WelsDecodeMbCabacPSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiEosFlag) { - PDqLayer pCurLayer = pCtx->pCurDqLayer; - PBitStringAux pBsAux = pCurLayer->pBitStringAux; - PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer; + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + PBitStringAux pBsAux = pCurDqLayer->pBitStringAux; + PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart; int32_t iScanIdxEnd = pSlice->sSliceHeaderExt.uiScanIdxEnd; - int32_t iMbXy = pCurLayer->iMbXyIndex; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; int32_t iMbResProperty; int32_t i; uint32_t uiMbType = 0, uiCbp = 0, uiCbpLuma = 0, uiCbpChroma = 0; ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16); - pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0; + pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0; WELS_READ_VERIFY (ParseMBTypePSliceCabac (pCtx, pNeighAvail, uiMbType)); // uiMbType = 4 is not allowded. @@ -865,10 +878,10 @@ int32_t WelsDecodeMbCabacPSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv int16_t pMotionVector[LIST_A][30][MV_A]; int16_t pMvdCache[LIST_A][30][MV_A]; int8_t pRefIndex[LIST_A][30]; - pCurLayer->pMbType[iMbXy] = g_ksInterPMbTypeInfo[uiMbType].iType; - WelsFillCacheInterCabac (pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex, pCurLayer); + pCurDqLayer->pDec->pMbType[iMbXy] = g_ksInterPMbTypeInfo[uiMbType].iType; + WelsFillCacheInterCabac (pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex, pCurDqLayer); WELS_READ_VERIFY (ParseInterPMotionInfoCabac (pCtx, pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex)); - pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0; + pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0; } else { //Intra mode uiMbType -= 5; if (uiMbType > 25) @@ -888,58 +901,58 @@ int32_t WelsDecodeMbCabacPSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv } else { //normal Intra mode if (0 == uiMbType) { //Intra4x4 ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16); - pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4; + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA4x4; if (pCtx->pPps->bTransform8x8ModeFlag) { WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, pNeighAvail, pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy])); } if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { - uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8; - pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer); - WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurLayer)); + uiMbType = pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA8x8; + pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer); + WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurDqLayer)); } else { - pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer); - WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurLayer)); + pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer); + WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurDqLayer)); } } else { //Intra16x16 - pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16; - pCurLayer->pTransformSize8x8Flag[iMbXy] = false; - pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true; - pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3; - pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2]; - uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0; - uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15; - WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurLayer); - WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, pNeighAvail, pBsAux, pCurLayer)); + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA16x16; + pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false; + pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true; + pCurDqLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3; + pCurDqLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2]; + uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurDqLayer->pCbp[iMbXy] >> 4 : 0; + uiCbpLuma = pCurDqLayer->pCbp[iMbXy] & 15; + WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurDqLayer); + WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, pNeighAvail, pBsAux, pCurDqLayer)); } } } - ST32 (&pCurLayer->pNzc[iMbXy][0], 0); - ST32 (&pCurLayer->pNzc[iMbXy][4], 0); - ST32 (&pCurLayer->pNzc[iMbXy][8], 0); - ST32 (&pCurLayer->pNzc[iMbXy][12], 0); - ST32 (&pCurLayer->pNzc[iMbXy][16], 0); - ST32 (&pCurLayer->pNzc[iMbXy][20], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][16], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][20], 0); - if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) { + if (MB_TYPE_INTRA16x16 != pCurDqLayer->pDec->pMbType[iMbXy]) { WELS_READ_VERIFY (ParseCbpInfoCabac (pCtx, pNeighAvail, uiCbp)); - pCurLayer->pCbp[iMbXy] = uiCbp; + pCurDqLayer->pCbp[iMbXy] = uiCbp; pSlice->iLastDeltaQp = uiCbp == 0 ? 0 : pSlice->iLastDeltaQp; - uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0 ; - uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15; + uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurDqLayer->pCbp[iMbXy] >> 4 : 0 ; + uiCbpLuma = pCurDqLayer->pCbp[iMbXy] & 15; } - if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) { + if (pCurDqLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) { - if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) { + if (MB_TYPE_INTRA16x16 != pCurDqLayer->pDec->pMbType[iMbXy]) { // Need modification when B picutre add in bool bNeedParseTransformSize8x8Flag = - (((pCurLayer->pMbType[iMbXy] >= MB_TYPE_16x16 && pCurLayer->pMbType[iMbXy] <= MB_TYPE_8x16) - || pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy]) - && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA8x8) - && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA4x4) - && ((pCurLayer->pCbp[iMbXy] & 0x0F) > 0) + (((pCurDqLayer->pDec->pMbType[iMbXy] >= MB_TYPE_16x16 && pCurDqLayer->pDec->pMbType[iMbXy] <= MB_TYPE_8x16) + || pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy]) + && (pCurDqLayer->pDec->pMbType[iMbXy] != MB_TYPE_INTRA8x8) + && (pCurDqLayer->pDec->pMbType[iMbXy] != MB_TYPE_INTRA4x4) + && ((pCurDqLayer->pCbp[iMbXy] & 0x0F) > 0) && (pCtx->pPps->bTransform8x8ModeFlag)); if (bNeedParseTransformSize8x8Flag) { @@ -948,7 +961,7 @@ int32_t WelsDecodeMbCabacPSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv } } - memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0])); + memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurDqLayer->pScaledTCoeff[iMbXy][0])); int32_t iQpDelta, iId8x8, iId4x4; @@ -956,33 +969,33 @@ int32_t WelsDecodeMbCabacPSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP); } - pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp - pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy]; + pCurDqLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp + pSlice->iLastMbQp = pCurDqLayer->pLumaQp[iMbXy]; for (i = 0; i < 2; i++) { - pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp + - pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)]; + pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp + + pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)]; } - if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) { + if (MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) { //step1: Luma DC WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, 0, 16, g_kuiLumaDcZigzagScan, - I16_LUMA_DC, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx)); + I16_LUMA_DC, pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer->pLumaQp[iMbXy], pCtx)); //step2: Luma AC if (uiCbpLuma) { for (i = 0; i < 16; i++) { WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, i, iScanIdxEnd - WELS_MAX (iScanIdxStart, - 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4), - pCurLayer->pLumaQp[iMbXy], pCtx)); + 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurDqLayer->pScaledTCoeff[iMbXy] + (i << 4), + pCurDqLayer->pLumaQp[iMbXy], pCtx)); } - ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1])); - ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2])); - ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3])); - ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4])); + ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1])); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2])); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3])); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4])); } else { - ST32 (&pCurLayer->pNzc[iMbXy][0], 0); - ST32 (&pCurLayer->pNzc[iMbXy][4], 0); - ST32 (&pCurLayer->pNzc[iMbXy][8], 0); - ST32 (&pCurLayer->pNzc[iMbXy][12], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0); } } else { //non-MB_TYPE_INTRA16x16 if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { @@ -991,27 +1004,27 @@ int32_t WelsDecodeMbCabacPSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv if (uiCbpLuma & (1 << iId8x8)) { WELS_READ_VERIFY (ParseResidualBlockCabac8x8 (pNeighAvail, pNonZeroCount, pBsAux, (iId8x8 << 2), iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan8x8 + iScanIdxStart, - IS_INTRA (pCurLayer->pMbType[iMbXy]) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8, - pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), pCurLayer->pLumaQp[iMbXy], pCtx)); + IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy]) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8, + pCurDqLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), pCurDqLayer->pLumaQp[iMbXy], pCtx)); } else { ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0); ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0); } } - ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1])); - ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2])); - ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3])); - ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4])); + ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1])); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2])); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3])); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4])); } else { - iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER; + iMbResProperty = (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER; for (iId8x8 = 0; iId8x8 < 4; iId8x8++) { if (uiCbpLuma & (1 << iId8x8)) { int32_t iIdx = (iId8x8 << 2); for (iId4x4 = 0; iId4x4 < 4; iId4x4++) { //Luma (DC and AC decoding together) WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1, - g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4), - pCurLayer->pLumaQp[iMbXy], + g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (iIdx << 4), + pCurDqLayer->pLumaQp[iMbXy], pCtx)); iIdx++; } @@ -1020,10 +1033,10 @@ int32_t WelsDecodeMbCabacPSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0); } } - ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1])); - ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2])); - ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3])); - ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4])); + ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1])); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2])); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3])); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4])); } } @@ -1031,19 +1044,19 @@ int32_t WelsDecodeMbCabacPSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv //step1: DC if (1 == uiCbpChroma || 2 == uiCbpChroma) { for (i = 0; i < 2; i++) { - if (IS_INTRA (pCurLayer->pMbType[iMbXy])) + if (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U; else iMbResProperty = i ? CHROMA_DC_V_INTER : CHROMA_DC_U_INTER; WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, 16 + (i << 2), 4, g_kuiChromaDcScan, - iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurLayer->pChromaQp[iMbXy][i], pCtx)); + iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurDqLayer->pChromaQp[iMbXy][i], pCtx)); } } //step2: AC if (2 == uiCbpChroma) { for (i = 0; i < 2; i++) { - if (IS_INTRA (pCurLayer->pMbType[iMbXy])) + if (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U; else iMbResProperty = i ? CHROMA_AC_V_INTER : CHROMA_AC_U_INTER; @@ -1051,23 +1064,23 @@ int32_t WelsDecodeMbCabacPSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv for (iId4x4 = 0; iId4x4 < 4; iId4x4++) { WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, index, iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), - iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (index << 4), pCurLayer->pChromaQp[iMbXy][i], pCtx)); + iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (index << 4), pCurDqLayer->pChromaQp[iMbXy][i], pCtx)); index++; } } - ST16 (&pCurLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1])); - ST16 (&pCurLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2])); - ST16 (&pCurLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4])); - ST16 (&pCurLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5])); + ST16 (&pCurDqLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1])); + ST16 (&pCurDqLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2])); + ST16 (&pCurDqLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4])); + ST16 (&pCurDqLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5])); } else { - ST32 (&pCurLayer->pNzc[iMbXy][16], 0); - ST32 (&pCurLayer->pNzc[iMbXy][20], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][16], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][20], 0); } } else { - pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; + pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; for (i = 0; i < 2; i++) { - pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] + - pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)]; + pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] + + pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)]; } } @@ -1080,21 +1093,21 @@ int32_t WelsDecodeMbCabacPSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv } int32_t WelsDecodeMbCabacBSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiEosFlag) { - PDqLayer pCurLayer = pCtx->pCurDqLayer; - PBitStringAux pBsAux = pCurLayer->pBitStringAux; - PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer; + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + PBitStringAux pBsAux = pCurDqLayer->pBitStringAux; + PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart; int32_t iScanIdxEnd = pSlice->sSliceHeaderExt.uiScanIdxEnd; - int32_t iMbXy = pCurLayer->iMbXyIndex; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; int32_t iMbResProperty; int32_t i; uint32_t uiMbType = 0, uiCbp = 0, uiCbpLuma = 0, uiCbpChroma = 0; ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16); - pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0; + pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0; WELS_READ_VERIFY (ParseMBTypeBSliceCabac (pCtx, pNeighAvail, uiMbType)); @@ -1103,12 +1116,12 @@ int32_t WelsDecodeMbCabacBSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv int16_t pMvdCache[LIST_A][30][MV_A]; int8_t pRefIndex[LIST_A][30]; int8_t pDirect[30]; - pCurLayer->pMbType[iMbXy] = g_ksInterBMbTypeInfo[uiMbType].iType; - WelsFillCacheInterCabac (pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex, pCurLayer); - WelsFillDirectCacheCabac (pNeighAvail, pDirect, pCurLayer); + pCurDqLayer->pDec->pMbType[iMbXy] = g_ksInterBMbTypeInfo[uiMbType].iType; + WelsFillCacheInterCabac (pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex, pCurDqLayer); + WelsFillDirectCacheCabac (pNeighAvail, pDirect, pCurDqLayer); WELS_READ_VERIFY (ParseInterBMotionInfoCabac (pCtx, pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex, pDirect)); - pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0; + pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0; } else { //Intra mode uiMbType -= 23; if (uiMbType > 25) @@ -1117,7 +1130,7 @@ int32_t WelsDecodeMbCabacBSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE); if (25 == uiMbType) { //I_PCM - WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in P slice!"); + WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in B slice!"); WELS_READ_VERIFY (ParseIPCMInfoCabac (pCtx)); pSlice->iLastDeltaQp = 0; WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag)); @@ -1128,59 +1141,59 @@ int32_t WelsDecodeMbCabacBSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv } else { //normal Intra mode if (0 == uiMbType) { //Intra4x4 ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16); - pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4; + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA4x4; if (pCtx->pPps->bTransform8x8ModeFlag) { WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, pNeighAvail, pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy])); } if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { - uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8; - pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer); - WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurLayer)); + uiMbType = pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA8x8; + pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer); + WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurDqLayer)); } else { - pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer); - WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurLayer)); + pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer); + WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurDqLayer)); } } else { //Intra16x16 - pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16; - pCurLayer->pTransformSize8x8Flag[iMbXy] = false; - pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true; - pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3; - pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2]; - uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0; - uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15; - WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurLayer); - WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, pNeighAvail, pBsAux, pCurLayer)); + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA16x16; + pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false; + pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true; + pCurDqLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3; + pCurDqLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2]; + uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurDqLayer->pCbp[iMbXy] >> 4 : 0; + uiCbpLuma = pCurDqLayer->pCbp[iMbXy] & 15; + WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurDqLayer); + WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, pNeighAvail, pBsAux, pCurDqLayer)); } } } - ST32 (&pCurLayer->pNzc[iMbXy][0], 0); - ST32 (&pCurLayer->pNzc[iMbXy][4], 0); - ST32 (&pCurLayer->pNzc[iMbXy][8], 0); - ST32 (&pCurLayer->pNzc[iMbXy][12], 0); - ST32 (&pCurLayer->pNzc[iMbXy][16], 0); - ST32 (&pCurLayer->pNzc[iMbXy][20], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][16], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][20], 0); - if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) { + if (MB_TYPE_INTRA16x16 != pCurDqLayer->pDec->pMbType[iMbXy]) { WELS_READ_VERIFY (ParseCbpInfoCabac (pCtx, pNeighAvail, uiCbp)); - pCurLayer->pCbp[iMbXy] = uiCbp; + pCurDqLayer->pCbp[iMbXy] = uiCbp; pSlice->iLastDeltaQp = uiCbp == 0 ? 0 : pSlice->iLastDeltaQp; - uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0; - uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15; + uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurDqLayer->pCbp[iMbXy] >> 4 : 0; + uiCbpLuma = pCurDqLayer->pCbp[iMbXy] & 15; } - if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) { + if (pCurDqLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) { - if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) { + if (MB_TYPE_INTRA16x16 != pCurDqLayer->pDec->pMbType[iMbXy]) { // Need modification when B picutre add in bool bNeedParseTransformSize8x8Flag = - (((IS_INTER_16x16 (pCurLayer->pMbType[iMbXy]) || IS_DIRECT (pCurLayer->pMbType[iMbXy]) - || IS_INTER_16x8 (pCurLayer->pMbType[iMbXy]) || IS_INTER_8x16 (pCurLayer->pMbType[iMbXy])) - || pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy]) - && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA8x8) - && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA4x4) - && ((pCurLayer->pCbp[iMbXy] & 0x0F) > 0) + (((IS_INTER_16x16 (pCurDqLayer->pDec->pMbType[iMbXy]) || IS_DIRECT (pCurDqLayer->pDec->pMbType[iMbXy]) + || IS_INTER_16x8 (pCurDqLayer->pDec->pMbType[iMbXy]) || IS_INTER_8x16 (pCurDqLayer->pDec->pMbType[iMbXy])) + || pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy]) + && (pCurDqLayer->pDec->pMbType[iMbXy] != MB_TYPE_INTRA8x8) + && (pCurDqLayer->pDec->pMbType[iMbXy] != MB_TYPE_INTRA4x4) + && ((pCurDqLayer->pCbp[iMbXy] & 0x0F) > 0) && (pCtx->pPps->bTransform8x8ModeFlag)); if (bNeedParseTransformSize8x8Flag) { @@ -1189,7 +1202,7 @@ int32_t WelsDecodeMbCabacBSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv } } - memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0])); + memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurDqLayer->pScaledTCoeff[iMbXy][0])); int32_t iQpDelta, iId8x8, iId4x4; @@ -1197,33 +1210,33 @@ int32_t WelsDecodeMbCabacBSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP); } - pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp - pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy]; + pCurDqLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp + pSlice->iLastMbQp = pCurDqLayer->pLumaQp[iMbXy]; for (i = 0; i < 2; i++) { - pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp + - pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)]; + pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp + + pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)]; } - if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) { + if (MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) { //step1: Luma DC WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, 0, 16, g_kuiLumaDcZigzagScan, - I16_LUMA_DC, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx)); + I16_LUMA_DC, pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer->pLumaQp[iMbXy], pCtx)); //step2: Luma AC if (uiCbpLuma) { for (i = 0; i < 16; i++) { WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, i, iScanIdxEnd - WELS_MAX (iScanIdxStart, - 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4), - pCurLayer->pLumaQp[iMbXy], pCtx)); + 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurDqLayer->pScaledTCoeff[iMbXy] + (i << 4), + pCurDqLayer->pLumaQp[iMbXy], pCtx)); } - ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1])); - ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2])); - ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3])); - ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4])); + ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1])); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2])); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3])); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4])); } else { - ST32 (&pCurLayer->pNzc[iMbXy][0], 0); - ST32 (&pCurLayer->pNzc[iMbXy][4], 0); - ST32 (&pCurLayer->pNzc[iMbXy][8], 0); - ST32 (&pCurLayer->pNzc[iMbXy][12], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0); } } else { //non-MB_TYPE_INTRA16x16 if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { @@ -1232,27 +1245,27 @@ int32_t WelsDecodeMbCabacBSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv if (uiCbpLuma & (1 << iId8x8)) { WELS_READ_VERIFY (ParseResidualBlockCabac8x8 (pNeighAvail, pNonZeroCount, pBsAux, (iId8x8 << 2), iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan8x8 + iScanIdxStart, - IS_INTRA (pCurLayer->pMbType[iMbXy]) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8, - pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), pCurLayer->pLumaQp[iMbXy], pCtx)); + IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy]) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8, + pCurDqLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), pCurDqLayer->pLumaQp[iMbXy], pCtx)); } else { ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0); ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0); } } - ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1])); - ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2])); - ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3])); - ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4])); + ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1])); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2])); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3])); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4])); } else { - iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER; + iMbResProperty = (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER; for (iId8x8 = 0; iId8x8 < 4; iId8x8++) { if (uiCbpLuma & (1 << iId8x8)) { int32_t iIdx = (iId8x8 << 2); for (iId4x4 = 0; iId4x4 < 4; iId4x4++) { //Luma (DC and AC decoding together) WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1, - g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4), - pCurLayer->pLumaQp[iMbXy], + g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (iIdx << 4), + pCurDqLayer->pLumaQp[iMbXy], pCtx)); iIdx++; } @@ -1261,10 +1274,10 @@ int32_t WelsDecodeMbCabacBSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0); } } - ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1])); - ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2])); - ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3])); - ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4])); + ST32 (&pCurDqLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1])); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2])); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3])); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4])); } } @@ -1272,19 +1285,19 @@ int32_t WelsDecodeMbCabacBSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv //step1: DC if (1 == uiCbpChroma || 2 == uiCbpChroma) { for (i = 0; i < 2; i++) { - if (IS_INTRA (pCurLayer->pMbType[iMbXy])) + if (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U; else iMbResProperty = i ? CHROMA_DC_V_INTER : CHROMA_DC_U_INTER; WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, 16 + (i << 2), 4, g_kuiChromaDcScan, - iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurLayer->pChromaQp[iMbXy][i], pCtx)); + iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurDqLayer->pChromaQp[iMbXy][i], pCtx)); } } //step2: AC if (2 == uiCbpChroma) { for (i = 0; i < 2; i++) { - if (IS_INTRA (pCurLayer->pMbType[iMbXy])) + if (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U; else iMbResProperty = i ? CHROMA_AC_V_INTER : CHROMA_AC_U_INTER; @@ -1292,23 +1305,23 @@ int32_t WelsDecodeMbCabacBSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv for (iId4x4 = 0; iId4x4 < 4; iId4x4++) { WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, index, iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), - iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (index << 4), pCurLayer->pChromaQp[iMbXy][i], pCtx)); + iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (index << 4), pCurDqLayer->pChromaQp[iMbXy][i], pCtx)); index++; } } - ST16 (&pCurLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1])); - ST16 (&pCurLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2])); - ST16 (&pCurLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4])); - ST16 (&pCurLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5])); + ST16 (&pCurDqLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1])); + ST16 (&pCurDqLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2])); + ST16 (&pCurDqLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4])); + ST16 (&pCurDqLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5])); } else { - ST32 (&pCurLayer->pNzc[iMbXy][16], 0); - ST32 (&pCurLayer->pNzc[iMbXy][20], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][16], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][20], 0); } } else { - pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; + pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; for (i = 0; i < 2; i++) { - pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] + - pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)]; + pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] + + pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)]; } } @@ -1322,53 +1335,55 @@ int32_t WelsDecodeMbCabacBSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAv int32_t WelsDecodeMbCabacPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) { - PDqLayer pCurLayer = pCtx->pCurDqLayer; - PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer; + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; PPicture* ppRefPic = pCtx->sRefPic.pRefList[LIST_0]; uint32_t uiCode; - int32_t iMbXy = pCurLayer->iMbXyIndex; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; int32_t i; SWelsNeighAvail uiNeighAvail; - pCurLayer->pCbp[iMbXy] = 0; - pCurLayer->pCbfDc[iMbXy] = 0; - pCurLayer->pChromaPredMode[iMbXy] = C_PRED_DC; + pCurDqLayer->pCbp[iMbXy] = 0; + pCurDqLayer->pCbfDc[iMbXy] = 0; + pCurDqLayer->pChromaPredMode[iMbXy] = C_PRED_DC; - pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true; - pCurLayer->pTransformSize8x8Flag[iMbXy] = false; + pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true; + pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false; - GetNeighborAvailMbType (&uiNeighAvail, pCurLayer); + GetNeighborAvailMbType (&uiNeighAvail, pCurDqLayer); WELS_READ_VERIFY (ParseSkipFlagCabac (pCtx, &uiNeighAvail, uiCode)); if (uiCode) { int16_t pMv[2] = {0}; - pCurLayer->pMbType[iMbXy] = MB_TYPE_SKIP; - ST32 (&pCurLayer->pNzc[iMbXy][0], 0); - ST32 (&pCurLayer->pNzc[iMbXy][4], 0); - ST32 (&pCurLayer->pNzc[iMbXy][8], 0); - ST32 (&pCurLayer->pNzc[iMbXy][12], 0); - ST32 (&pCurLayer->pNzc[iMbXy][16], 0); - ST32 (&pCurLayer->pNzc[iMbXy][20], 0); - - pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0; - memset (pCurLayer->pRefIndex[0][iMbXy], 0, sizeof (int8_t) * 16); - pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[0] && ppRefPic[0]->bIsComplete); + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_SKIP; + ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][16], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][20], 0); + + pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0; + memset (pCurDqLayer->pDec->pRefIndex[0][iMbXy], 0, sizeof (int8_t) * 16); + bool bIsPending = GetThreadCount (pCtx) > 1; + pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[0] && (ppRefPic[0]->bIsComplete + || bIsPending)); //predict mv - PredPSkipMvFromNeighbor (pCurLayer, pMv); + PredPSkipMvFromNeighbor (pCurDqLayer, pMv); for (i = 0; i < 16; i++) { - ST32 (pCurLayer->pMv[0][iMbXy][i], * (uint32_t*)pMv); - ST32 (pCurLayer->pMvd[0][iMbXy][i], 0); + ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][i], * (uint32_t*)pMv); + ST32 (pCurDqLayer->pMvd[0][iMbXy][i], 0); } //if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag) { - // memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (int16_t)); + // memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (int16_t)); //} //reset rS - pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; //??????????????? dqaunt of previous mb + pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; //??????????????? dqaunt of previous mb for (i = 0; i < 2; i++) { - pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] + - pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)]; + pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] + + pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)]; } //for neighboring CABAC usage @@ -1385,57 +1400,63 @@ int32_t WelsDecodeMbCabacPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uin int32_t WelsDecodeMbCabacBSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) { - PDqLayer pCurLayer = pCtx->pCurDqLayer; - PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer; + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; PPicture* ppRefPicL0 = pCtx->sRefPic.pRefList[LIST_0]; PPicture* ppRefPicL1 = pCtx->sRefPic.pRefList[LIST_1]; uint32_t uiCode; - int32_t iMbXy = pCurLayer->iMbXyIndex; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; int32_t i; SWelsNeighAvail uiNeighAvail; - pCurLayer->pCbp[iMbXy] = 0; - pCurLayer->pCbfDc[iMbXy] = 0; - pCurLayer->pChromaPredMode[iMbXy] = C_PRED_DC; + pCurDqLayer->pCbp[iMbXy] = 0; + pCurDqLayer->pCbfDc[iMbXy] = 0; + pCurDqLayer->pChromaPredMode[iMbXy] = C_PRED_DC; - pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true; - pCurLayer->pTransformSize8x8Flag[iMbXy] = false; + pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true; + pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false; - GetNeighborAvailMbType (&uiNeighAvail, pCurLayer); + GetNeighborAvailMbType (&uiNeighAvail, pCurDqLayer); WELS_READ_VERIFY (ParseSkipFlagCabac (pCtx, &uiNeighAvail, uiCode)); - memset (pCurLayer->pDirect[iMbXy], 0, sizeof (int8_t) * 16); + memset (pCurDqLayer->pDirect[iMbXy], 0, sizeof (int8_t) * 16); + + bool bIsPending = GetThreadCount (pCtx) > 1; if (uiCode) { int16_t pMv[LIST_A][2] = { {0, 0}, { 0, 0 } }; int8_t ref[LIST_A] = { 0 }; - pCurLayer->pMbType[iMbXy] = MB_TYPE_SKIP | MB_TYPE_DIRECT; - ST32 (&pCurLayer->pNzc[iMbXy][0], 0); - ST32 (&pCurLayer->pNzc[iMbXy][4], 0); - ST32 (&pCurLayer->pNzc[iMbXy][8], 0); - ST32 (&pCurLayer->pNzc[iMbXy][12], 0); - ST32 (&pCurLayer->pNzc[iMbXy][16], 0); - ST32 (&pCurLayer->pNzc[iMbXy][20], 0); - - pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0; - memset (pCurLayer->pRefIndex[LIST_0][iMbXy], 0, sizeof (int8_t) * 16); - memset (pCurLayer->pRefIndex[LIST_1][iMbXy], 0, sizeof (int8_t) * 16); - pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPicL0[0] && ppRefPicL0[0]->bIsComplete) - || ! (ppRefPicL1[0] && ppRefPicL1[0]->bIsComplete); - - + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_SKIP | MB_TYPE_DIRECT; + ST32 (&pCurDqLayer->pNzc[iMbXy][0], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][4], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][8], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][12], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][16], 0); + ST32 (&pCurDqLayer->pNzc[iMbXy][20], 0); + + pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0; + memset (pCurDqLayer->pDec->pRefIndex[LIST_0][iMbXy], 0, sizeof (int8_t) * 16); + memset (pCurDqLayer->pDec->pRefIndex[LIST_1][iMbXy], 0, sizeof (int8_t) * 16); + pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPicL0[0] && (ppRefPicL0[0]->bIsComplete + || bIsPending)) || ! (ppRefPicL1[0] && (ppRefPicL1[0]->bIsComplete || bIsPending)); + + if (pCtx->bMbRefConcealed) { + SLogContext* pLogCtx = & (pCtx->sLogCtx); + WelsLog (pLogCtx, WELS_LOG_ERROR, "Ref Picture for B-Slice is lost, B-Slice decoding cannot be continued!"); + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_DATA, ERR_INFO_REFERENCE_PIC_LOST); + } + + SubMbType subMbType; if (pSliceHeader->iDirectSpatialMvPredFlag) { //predict direct spatial mv - SubMbType subMbType; int32_t ret = PredMvBDirectSpatial (pCtx, pMv, ref, subMbType); if (ret != ERR_NONE) { return ret; } } else { //temporal direct mode - ComputeColocated (pCtx); - int32_t ret = PredBDirectTemporal (pCtx, pMv, ref); + int32_t ret = PredBDirectTemporal (pCtx, pMv, ref, subMbType); if (ret != ERR_NONE) { return ret; } @@ -1443,10 +1464,10 @@ int32_t WelsDecodeMbCabacBSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uin //reset rS - pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; //??????????????? dqaunt of previous mb + pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; //??????????????? dqaunt of previous mb for (i = 0; i < 2; i++) { - pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] + - pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)]; + pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] + + pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)]; } //for neighboring CABAC usage @@ -1492,12 +1513,12 @@ int32_t WelsCalcDeqCoeffScalingList (PWelsDecoderContext pCtx) { } int32_t WelsDecodeSlice (PWelsDecoderContext pCtx, bool bFirstSliceInLayer, PNalUnit pNalCur) { - PDqLayer pCurLayer = pCtx->pCurDqLayer; + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; PFmo pFmo = pCtx->pFmo; int32_t iRet; int32_t iNextMbXyIndex, iSliceIdc; - PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer; + PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; PSliceHeaderExt pSliceHeaderExt = &pSlice->sSliceHeaderExt; PSliceHeader pSliceHeader = &pSliceHeaderExt->sSliceHeader; int32_t iMbX, iMbY; @@ -1525,6 +1546,8 @@ int32_t WelsDecodeSlice (PWelsDecoderContext pCtx, bool bFirstSliceInLayer, PNal } else { if (P_SLICE == pSliceHeader->eSliceType) { pDecMbFunc = WelsDecodeMbCavlcPSlice; + } else if (B_SLICE == pSliceHeader->eSliceType) { + pDecMbFunc = WelsDecodeMbCavlcBSlice; } else { //I_SLICE pDecMbFunc = WelsDecodeMbCavlcISlice; } @@ -1541,7 +1564,7 @@ int32_t WelsDecodeSlice (PWelsDecoderContext pCtx, bool bFirstSliceInLayer, PNal } pCtx->eSliceType = pSliceHeader->eSliceType; - if (pCurLayer->sLayerInfo.pPps->bEntropyCodingModeFlag == 1) { + if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag == 1) { int32_t iQp = pSlice->sSliceHeaderExt.sSliceHeader.iSliceQp; int32_t iCabacInitIdc = pSlice->sSliceHeaderExt.sSliceHeader.iCabacInitIdc; WelsCabacContextInit (pCtx, pSlice->eSliceType, iCabacInitIdc, iQp); @@ -1553,24 +1576,24 @@ int32_t WelsDecodeSlice (PWelsDecoderContext pCtx, bool bFirstSliceInLayer, PNal WelsCalcDeqCoeffScalingList (pCtx); iNextMbXyIndex = pSliceHeader->iFirstMbInSlice; - iMbX = iNextMbXyIndex % pCurLayer->iMbWidth; - iMbY = iNextMbXyIndex / pCurLayer->iMbWidth; // error is introduced by multiple slices case, 11/23/2009 + iMbX = iNextMbXyIndex % pCurDqLayer->iMbWidth; + iMbY = iNextMbXyIndex / pCurDqLayer->iMbWidth; // error is introduced by multiple slices case, 11/23/2009 pSlice->iMbSkipRun = -1; - iSliceIdc = (pSliceHeader->iFirstMbInSlice << 7) + pCurLayer->uiLayerDqId; + iSliceIdc = (pSliceHeader->iFirstMbInSlice << 7) + pCurDqLayer->uiLayerDqId; - pCurLayer->iMbX = iMbX; - pCurLayer->iMbY = iMbY; - pCurLayer->iMbXyIndex = iNextMbXyIndex; + pCurDqLayer->iMbX = iMbX; + pCurDqLayer->iMbY = iMbY; + pCurDqLayer->iMbXyIndex = iNextMbXyIndex; do { if ((-1 == iNextMbXyIndex) || (iNextMbXyIndex >= kiCountNumMb)) { // slice group boundary or end of a frame break; } - pCurLayer->pSliceIdc[iNextMbXyIndex] = iSliceIdc; + pCurDqLayer->pSliceIdc[iNextMbXyIndex] = iSliceIdc; pCtx->bMbRefConcealed = false; iRet = pDecMbFunc (pCtx, pNalCur, uiEosFlag); - pCurLayer->pMbRefConcealedFlag[iNextMbXyIndex] = pCtx->bMbRefConcealed; + pCurDqLayer->pMbRefConcealedFlag[iNextMbXyIndex] = pCtx->bMbRefConcealed; if (iRet != ERR_NONE) { return iRet; } @@ -1584,21 +1607,185 @@ int32_t WelsDecodeSlice (PWelsDecoderContext pCtx, bool bFirstSliceInLayer, PNal } else { ++iNextMbXyIndex; } - iMbX = iNextMbXyIndex % pCurLayer->iMbWidth; - iMbY = iNextMbXyIndex / pCurLayer->iMbWidth; - pCurLayer->iMbX = iMbX; - pCurLayer->iMbY = iMbY; - pCurLayer->iMbXyIndex = iNextMbXyIndex; + iMbX = iNextMbXyIndex % pCurDqLayer->iMbWidth; + iMbY = iNextMbXyIndex / pCurDqLayer->iMbWidth; + pCurDqLayer->iMbX = iMbX; + pCurDqLayer->iMbY = iMbY; + pCurDqLayer->iMbXyIndex = iNextMbXyIndex; } while (1); return ERR_NONE; } +int32_t WelsDecodeAndConstructSlice (PWelsDecoderContext pCtx) { + PNalUnit pNalCur = pCtx->pNalCur; + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + PFmo pFmo = pCtx->pFmo; + int32_t iRet; + int32_t iNextMbXyIndex, iSliceIdc; + + PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; + PSliceHeaderExt pSliceHeaderExt = &pSlice->sSliceHeaderExt; + PSliceHeader pSliceHeader = &pSliceHeaderExt->sSliceHeader; + int32_t iMbX, iMbY; + const int32_t kiCountNumMb = pSliceHeader->pSps->uiTotalMbCount; //need to be correct when fmo or multi slice + int32_t iTotalMbTargetLayer = kiCountNumMb; + uint32_t uiEosFlag = 0; + PWelsDecMbFunc pDecMbFunc; + + pSlice->iTotalMbInCurSlice = 0; //initialize at the starting of slice decoding. + + if (pCtx->pPps->bEntropyCodingModeFlag) { + if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag || + pSlice->sSliceHeaderExt.bAdaptiveBaseModeFlag || + pSlice->sSliceHeaderExt.bAdaptiveResidualPredFlag) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, + "WelsDecodeSlice()::::ILP flag exist, not supported with CABAC enabled!"); + pCtx->iErrorCode |= dsBitstreamError; + return dsBitstreamError; + } + if (P_SLICE == pSliceHeader->eSliceType) + pDecMbFunc = WelsDecodeMbCabacPSlice; + else if (B_SLICE == pSliceHeader->eSliceType) + pDecMbFunc = WelsDecodeMbCabacBSlice; + else //I_SLICE. B_SLICE is being supported + pDecMbFunc = WelsDecodeMbCabacISlice; + } else { + if (P_SLICE == pSliceHeader->eSliceType) { + pDecMbFunc = WelsDecodeMbCavlcPSlice; + } else if (B_SLICE == pSliceHeader->eSliceType) { + pDecMbFunc = WelsDecodeMbCavlcBSlice; + } else { //I_SLICE + pDecMbFunc = WelsDecodeMbCavlcISlice; + } + } + + if (pSliceHeader->pPps->bConstainedIntraPredFlag) { + pCtx->pFillInfoCacheIntraNxNFunc = WelsFillCacheConstrain1IntraNxN; + pCtx->pMapNxNNeighToSampleFunc = WelsMapNxNNeighToSampleConstrain1; + pCtx->pMap16x16NeighToSampleFunc = WelsMap16x16NeighToSampleConstrain1; + } else { + pCtx->pFillInfoCacheIntraNxNFunc = WelsFillCacheConstrain0IntraNxN; + pCtx->pMapNxNNeighToSampleFunc = WelsMapNxNNeighToSampleNormal; + pCtx->pMap16x16NeighToSampleFunc = WelsMap16x16NeighToSampleNormal; + } + + pCtx->eSliceType = pSliceHeader->eSliceType; + if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag == 1) { + int32_t iQp = pSlice->sSliceHeaderExt.sSliceHeader.iSliceQp; + int32_t iCabacInitIdc = pSlice->sSliceHeaderExt.sSliceHeader.iCabacInitIdc; + WelsCabacContextInit (pCtx, pSlice->eSliceType, iCabacInitIdc, iQp); + //InitCabacCtx (pCtx->pCabacCtx, pSlice->eSliceType, iCabacInitIdc, iQp); + pSlice->iLastDeltaQp = 0; + WELS_READ_VERIFY (InitCabacDecEngineFromBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux)); + } + //try to calculate the dequant_coeff + WelsCalcDeqCoeffScalingList (pCtx); + + iNextMbXyIndex = pSliceHeader->iFirstMbInSlice; + iMbX = iNextMbXyIndex % pCurDqLayer->iMbWidth; + iMbY = iNextMbXyIndex / pCurDqLayer->iMbWidth; // error is introduced by multiple slices case, 11/23/2009 + pSlice->iMbSkipRun = -1; + iSliceIdc = (pSliceHeader->iFirstMbInSlice << 7) + pCurDqLayer->uiLayerDqId; + + pCurDqLayer->iMbX = iMbX; + pCurDqLayer->iMbY = iMbY; + pCurDqLayer->iMbXyIndex = iNextMbXyIndex; + + PDeblockingFilterMbFunc pDeblockMb = WelsDeblockingMb; + + SDeblockingFilter pFilter; + int32_t iFilterIdc = 1; + if (pSliceHeader->uiDisableDeblockingFilterIdc != 1) { + WelsDeblockingInitFilter (pCtx, pFilter, iFilterIdc); + } + + do { + if ((-1 == iNextMbXyIndex) || (iNextMbXyIndex >= kiCountNumMb)) { // slice group boundary or end of a frame + break; + } + + pCurDqLayer->pSliceIdc[iNextMbXyIndex] = iSliceIdc; + pCtx->bMbRefConcealed = false; + iRet = pDecMbFunc (pCtx, pNalCur, uiEosFlag); + pCurDqLayer->pMbRefConcealedFlag[iNextMbXyIndex] = pCtx->bMbRefConcealed; + if (iRet != ERR_NONE) { + return iRet; + } + if (WelsTargetMbConstruction (pCtx)) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, + "WelsTargetSliceConstruction():::MB(%d, %d) construction error. pCurSlice_type:%d", + pCurDqLayer->iMbX, pCurDqLayer->iMbY, pSlice->eSliceType); + + return ERR_INFO_MB_RECON_FAIL; + } + memcpy (pCtx->pDec->pNzc[pCurDqLayer->iMbXyIndex], pCurDqLayer->pNzc[pCurDqLayer->iMbXyIndex], 24); + if (pCtx->eSliceType != I_SLICE) { + pCtx->sBlockFunc.pWelsSetNonZeroCountFunc ( + pCtx->pDec->pNzc[pCurDqLayer->iMbXyIndex]); // set all none-zero nzc to 1; dbk can be opti! + } + WelsDeblockingFilterMB (pCurDqLayer, pFilter, iFilterIdc, pDeblockMb); + if (pCtx->uiNalRefIdc > 0) { + if (pCurDqLayer->iMbX == 0 || pCurDqLayer->iMbX == pCurDqLayer->iMbWidth - 1 || pCurDqLayer->iMbY == 0 + || pCurDqLayer->iMbY == pCurDqLayer->iMbHeight - 1) { + PadMBLuma_c (pCurDqLayer->pDec->pData[0], pCurDqLayer->pDec->iLinesize[0], pCurDqLayer->pDec->iWidthInPixel, + pCurDqLayer->pDec->iHeightInPixel, pCurDqLayer->iMbX, pCurDqLayer->iMbY, pCurDqLayer->iMbWidth, pCurDqLayer->iMbHeight); + PadMBChroma_c (pCurDqLayer->pDec->pData[1], pCurDqLayer->pDec->iLinesize[1], pCurDqLayer->pDec->iWidthInPixel / 2, + pCurDqLayer->pDec->iHeightInPixel / 2, pCurDqLayer->iMbX, pCurDqLayer->iMbY, pCurDqLayer->iMbWidth, + pCurDqLayer->iMbHeight); + PadMBChroma_c (pCurDqLayer->pDec->pData[2], pCurDqLayer->pDec->iLinesize[2], pCurDqLayer->pDec->iWidthInPixel / 2, + pCurDqLayer->pDec->iHeightInPixel / 2, pCurDqLayer->iMbX, pCurDqLayer->iMbY, pCurDqLayer->iMbWidth, + pCurDqLayer->iMbHeight); + } + } + if (!pCurDqLayer->pMbCorrectlyDecodedFlag[iNextMbXyIndex]) { //already con-ed, overwrite + pCurDqLayer->pMbCorrectlyDecodedFlag[iNextMbXyIndex] = true; + pCtx->pDec->iMbEcedPropNum += (pCurDqLayer->pMbRefConcealedFlag[iNextMbXyIndex] ? 1 : 0); + ++pCtx->iTotalNumMbRec; + } + + if (pCtx->iTotalNumMbRec > iTotalMbTargetLayer) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, + "WelsTargetSliceConstruction():::pCtx->iTotalNumMbRec:%d, iTotalMbTargetLayer:%d", + pCtx->iTotalNumMbRec, iTotalMbTargetLayer); + + return ERR_INFO_MB_NUM_EXCEED_FAIL; + } + + ++pSlice->iTotalMbInCurSlice; + if (uiEosFlag) { //end of slice + SET_EVENT (&pCtx->pDec->pReadyEvent[pCurDqLayer->iMbY]); + break; + } + if (pSliceHeader->pPps->uiNumSliceGroups > 1) { + iNextMbXyIndex = FmoNextMb (pFmo, iNextMbXyIndex); + } else { + ++iNextMbXyIndex; + } + int32_t iLastMby = iMbY; + int32_t iLastMbx = iMbX; + iMbX = iNextMbXyIndex % pCurDqLayer->iMbWidth; + iMbY = iNextMbXyIndex / pCurDqLayer->iMbWidth; + pCurDqLayer->iMbX = iMbX; + pCurDqLayer->iMbY = iMbY; + pCurDqLayer->iMbXyIndex = iNextMbXyIndex; + if (GetThreadCount (pCtx) > 1) { + if ((iMbY > iLastMby) && (iLastMbx == pCurDqLayer->iMbWidth - 1)) { + SET_EVENT (&pCtx->pDec->pReadyEvent[iLastMby]); + } + } + } while (1); + if (GetThreadCount (pCtx) > 1) { + SET_EVENT (&pCtx->pDec->pReadyEvent[pCurDqLayer->iMbY]); + } + return ERR_NONE; +} + int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) { - SVlcTable* pVlcTable = &pCtx->sVlcTable; - PDqLayer pCurLayer = pCtx->pCurDqLayer; - PBitStringAux pBs = pCurLayer->pBitStringAux; - PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer; + SVlcTable* pVlcTable = pCtx->pVlcTable; + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + PBitStringAux pBs = pCurDqLayer->pBitStringAux; + PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; SWelsNeighAvail sNeighAvail; @@ -1607,10 +1794,10 @@ int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) { int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart; int32_t iScanIdxEnd = pSlice->sSliceHeaderExt.uiScanIdxEnd; - int32_t iMbX = pCurLayer->iMbX; - int32_t iMbY = pCurLayer->iMbY; - const int32_t iMbXy = pCurLayer->iMbXyIndex; - int8_t* pNzc = pCurLayer->pNzc[iMbXy]; + int32_t iMbX = pCurDqLayer->iMbX; + int32_t iMbY = pCurDqLayer->iMbY; + const int32_t iMbXy = pCurDqLayer->iMbXyIndex; + int8_t* pNzc = pCurDqLayer->pNzc[iMbXy]; int32_t i; int32_t iRet = ERR_NONE; uint32_t uiMbType = 0, uiCbp = 0, uiCbpL = 0, uiCbpC = 0; @@ -1618,12 +1805,12 @@ int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) { int32_t iCode; ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16); - GetNeighborAvailMbType (&sNeighAvail, pCurLayer); - pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0; - pCurLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag; + GetNeighborAvailMbType (&sNeighAvail, pCurDqLayer); + pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0; + pCurDqLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag; - pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true; - pCurLayer->pTransformSize8x8Flag[iMbXy] = false; + pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true; + pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false; WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //uiMbType uiMbType = uiCode; @@ -1634,15 +1821,15 @@ int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) { if (25 == uiMbType) { WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in I slice!"); - int32_t iDecStrideL = pCurLayer->pDec->iLinesize[0]; - int32_t iDecStrideC = pCurLayer->pDec->iLinesize[1]; + int32_t iDecStrideL = pCurDqLayer->pDec->iLinesize[0]; + int32_t iDecStrideC = pCurDqLayer->pDec->iLinesize[1]; int32_t iOffsetL = (iMbX + iMbY * iDecStrideL) << 4; int32_t iOffsetC = (iMbX + iMbY * iDecStrideC) << 3; - uint8_t* pDecY = pCurLayer->pDec->pData[0] + iOffsetL; - uint8_t* pDecU = pCurLayer->pDec->pData[1] + iOffsetC; - uint8_t* pDecV = pCurLayer->pDec->pData[2] + iOffsetC; + uint8_t* pDecY = pCurDqLayer->pDec->pData[0] + iOffsetL; + uint8_t* pDecU = pCurDqLayer->pDec->pData[1] + iOffsetC; + uint8_t* pDecV = pCurDqLayer->pDec->pData[2] + iOffsetC; uint8_t* pTmpBsBuf; @@ -1653,7 +1840,7 @@ int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) { int32_t iIndex = ((-pBs->iLeftBits) >> 3) + 2; - pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA_PCM; + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA_PCM; //step 1: locating bit-stream pointer [must align into integer byte] pBs->pCurBuf -= iIndex; @@ -1681,27 +1868,27 @@ int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) { pBs->pCurBuf += 384; //step 3: update QP and pNonZeroCount - pCurLayer->pLumaQp[iMbXy] = 0; - memset (pCurLayer->pChromaQp[iMbXy], 0, sizeof (pCurLayer->pChromaQp[iMbXy])); - memset (pNzc, 16, sizeof (pCurLayer->pNzc[iMbXy])); //Rec. 9.2.1 for PCM, nzc=16 + pCurDqLayer->pLumaQp[iMbXy] = 0; + memset (pCurDqLayer->pChromaQp[iMbXy], 0, sizeof (pCurDqLayer->pChromaQp[iMbXy])); + memset (pNzc, 16, sizeof (pCurDqLayer->pNzc[iMbXy])); //Rec. 9.2.1 for PCM, nzc=16 WELS_READ_VERIFY (InitReadBits (pBs, 0)); return ERR_NONE; } else if (0 == uiMbType) { //reference to JM ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16); - pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4; + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA4x4; if (pCtx->pPps->bTransform8x8ModeFlag) { WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag - pCurLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode; - if (pCurLayer->pTransformSize8x8Flag[iMbXy]) { - uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8; + pCurDqLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode; + if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { + uiMbType = pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA8x8; } } - if (!pCurLayer->pTransformSize8x8Flag[iMbXy]) { - pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer); - WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer)); + if (!pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { + pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer); + WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurDqLayer)); } else { - pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer); - WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer)); + pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer); + WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurDqLayer)); } //uiCbp @@ -1717,19 +1904,19 @@ int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) { uiCbp = g_kuiIntra4x4CbpTable[uiCbp]; else uiCbp = g_kuiIntra4x4CbpTable400[uiCbp]; - pCurLayer->pCbp[iMbXy] = uiCbp; + pCurDqLayer->pCbp[iMbXy] = uiCbp; uiCbpC = uiCbp >> 4; uiCbpL = uiCbp & 15; } else { //I_PCM exclude, we can ignore it - pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16; - pCurLayer->pTransformSize8x8Flag[iMbXy] = false; - pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true; - pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3; - pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2]; - uiCbpC = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0; - uiCbpL = pCurLayer->pCbp[iMbXy] & 15; - WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurLayer); - WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, &sNeighAvail, pBs, pCurLayer)); + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA16x16; + pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false; + pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true; + pCurDqLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3; + pCurDqLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2]; + uiCbpC = pCtx->pSps->uiChromaFormatIdc ? pCurDqLayer->pCbp[iMbXy] >> 4 : 0; + uiCbpL = pCurDqLayer->pCbp[iMbXy] & 15; + WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurDqLayer); + WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, &sNeighAvail, pBs, pCurDqLayer)); } ST32A4 (&pNzc[0], 0); @@ -1739,17 +1926,17 @@ int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) { ST32A4 (&pNzc[16], 0); ST32A4 (&pNzc[20], 0); - if (pCurLayer->pCbp[iMbXy] == 0 && IS_INTRANxN (pCurLayer->pMbType[iMbXy])) { - pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; + if (pCurDqLayer->pCbp[iMbXy] == 0 && IS_INTRANxN (pCurDqLayer->pDec->pMbType[iMbXy])) { + pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; for (i = 0; i < 2; i++) { - pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] + - pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)]; + pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] + + pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)]; } } - if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) { - memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0])); + if (pCurDqLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) { + memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurDqLayer->pScaledTCoeff[iMbXy][0])); int32_t iQpDelta, iId8x8, iId4x4; WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mb_qp_delta @@ -1759,29 +1946,29 @@ int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) { return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP); } - pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp - pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy]; + pCurDqLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp + pSlice->iLastMbQp = pCurDqLayer->pLumaQp[iMbXy]; for (i = 0; i < 2; i++) { - pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp + - pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, - 51)]; + pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp + + pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, + 51)]; } BsStartCavlc (pBs); - if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) { + if (MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) { //step1: Luma DC if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 0, 16, g_kuiLumaDcZigzagScan, I16_LUMA_DC, - pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) { + pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) { return iRet;//abnormal } //step2: Luma AC if (uiCbpL) { for (i = 0; i < 16; i++) { if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, i, iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, - g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4), - pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) { + g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurDqLayer->pScaledTCoeff[iMbXy] + (i << 4), + pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) { return iRet;//abnormal } } @@ -1791,15 +1978,15 @@ int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) { ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4])); } } else { //non-MB_TYPE_INTRA16x16 - if (pCurLayer->pTransformSize8x8Flag[iMbXy]) { + if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { for (iId8x8 = 0; iId8x8 < 4; iId8x8++) { - iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8; + iMbResProperty = (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8; if (uiCbpL & (1 << iId8x8)) { int32_t iIndex = (iId8x8 << 2); for (iId4x4 = 0; iId4x4 < 4; iId4x4++) { if ((iRet = WelsResidualBlockCavlc8x8 (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1, - g_kuiZigzagScan8x8 + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), iId4x4, - pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) { + g_kuiZigzagScan8x8 + iScanIdxStart, iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), iId4x4, + pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) { return iRet; } iIndex++; @@ -1820,8 +2007,8 @@ int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) { for (iId4x4 = 0; iId4x4 < 4; iId4x4++) { //Luma (DC and AC decoding together) if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1, - g_kuiZigzagScan + iScanIdxStart, LUMA_DC_AC_INTRA, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4), - pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) { + g_kuiZigzagScan + iScanIdxStart, LUMA_DC_AC_INTRA, pCurDqLayer->pScaledTCoeff[iMbXy] + (iIndex << 4), + pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) { return iRet;//abnormal } iIndex++; @@ -1844,7 +2031,7 @@ int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) { for (i = 0; i < 2; i++) { //Cb Cr iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U; if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 16 + (i << 2), 4, g_kuiChromaDcScan, iMbResProperty, - pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) { + pCurDqLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurDqLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) { return iRet;//abnormal } } @@ -1857,8 +2044,9 @@ int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) { int32_t iIndex = 16 + (i << 2); for (iId4x4 = 0; iId4x4 < 4; iId4x4++) { if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - WELS_MAX (iScanIdxStart, - 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4), - pCurLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) { + 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iMbResProperty, + pCurDqLayer->pScaledTCoeff[iMbXy] + (iIndex << 4), + pCurDqLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) { return iRet;//abnormal } iIndex++; @@ -1876,9 +2064,9 @@ int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) { } int32_t WelsDecodeMbCavlcISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) { - PDqLayer pCurLayer = pCtx->pCurDqLayer; - PBitStringAux pBs = pCurLayer->pBitStringAux; - PSliceHeaderExt pSliceHeaderExt = &pCurLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt; + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + PBitStringAux pBs = pCurDqLayer->pBitStringAux; + PSliceHeaderExt pSliceHeaderExt = &pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt; int32_t iBaseModeFlag; int32_t iRet = 0; //should have the return value to indicate decoding error or not, It's NECESSARY--2010.4.15 uint32_t uiCode; @@ -1903,7 +2091,7 @@ int32_t WelsDecodeMbCavlcISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uin // check whether there is left bits to read next time in case multiple slices iUsedBits = ((pBs->pCurBuf - pBs->pStartBuf) << 3) - (16 - pBs->iLeftBits); // sub 1, for stop bit - if ((iUsedBits == (pBs->iBits - 1)) && (0 >= pCurLayer->sLayerInfo.sSliceInLayer.iMbSkipRun)) { // slice boundary + if ((iUsedBits == (pBs->iBits - 1)) && (0 >= pCurDqLayer->sLayerInfo.sSliceInLayer.iMbSkipRun)) { // slice boundary uiEosFlag = 1; } if (iUsedBits > (pBs->iBits - @@ -1917,20 +2105,20 @@ int32_t WelsDecodeMbCavlcISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uin } int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) { - SVlcTable* pVlcTable = &pCtx->sVlcTable; - PDqLayer pCurLayer = pCtx->pCurDqLayer; - PBitStringAux pBs = pCurLayer->pBitStringAux; - PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer; + SVlcTable* pVlcTable = pCtx->pVlcTable; + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + PBitStringAux pBs = pCurDqLayer->pBitStringAux; + PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart; int32_t iScanIdxEnd = pSlice->sSliceHeaderExt.uiScanIdxEnd; SWelsNeighAvail sNeighAvail; - int32_t iMbX = pCurLayer->iMbX; - int32_t iMbY = pCurLayer->iMbY; - const int32_t iMbXy = pCurLayer->iMbXyIndex; - int8_t* pNzc = pCurLayer->pNzc[iMbXy]; + int32_t iMbX = pCurDqLayer->iMbX; + int32_t iMbY = pCurDqLayer->iMbY; + const int32_t iMbXy = pCurDqLayer->iMbXyIndex; + int8_t* pNzc = pCurDqLayer->pNzc[iMbXy]; int32_t i; int32_t iRet = ERR_NONE; uint32_t uiMbType = 0, uiCbp = 0, uiCbpL = 0, uiCbpC = 0; @@ -1938,16 +2126,16 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) { int32_t iCode; int32_t iMbResProperty; - GetNeighborAvailMbType (&sNeighAvail, pCurLayer); + GetNeighborAvailMbType (&sNeighAvail, pCurDqLayer); ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16); - pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;//2009.10.23 + pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;//2009.10.23 WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //uiMbType uiMbType = uiCode; if (uiMbType < 5) { //inter MB type int16_t iMotionVector[LIST_A][30][MV_A]; int8_t iRefIndex[LIST_A][30]; - pCurLayer->pMbType[iMbXy] = g_ksInterPMbTypeInfo[uiMbType].iType; - WelsFillCacheInter (&sNeighAvail, pNonZeroCount, iMotionVector, iRefIndex, pCurLayer); + pCurDqLayer->pDec->pMbType[iMbXy] = g_ksInterPMbTypeInfo[uiMbType].iType; + WelsFillCacheInter (&sNeighAvail, pNonZeroCount, iMotionVector, iRefIndex, pCurDqLayer); if ((iRet = ParseInterInfo (pCtx, iMotionVector, iRefIndex, pBs)) != ERR_NONE) { return iRet;//abnormal @@ -1955,13 +2143,13 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) { if (pSlice->sSliceHeaderExt.bAdaptiveResidualPredFlag == 1) { WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //residual_prediction_flag - pCurLayer->pResidualPredFlag[iMbXy] = uiCode; + pCurDqLayer->pResidualPredFlag[iMbXy] = uiCode; } else { - pCurLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag; + pCurDqLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag; } - if (pCurLayer->pResidualPredFlag[iMbXy] == 0) { - pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0; + if (pCurDqLayer->pResidualPredFlag[iMbXy] == 0) { + pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0; } else { WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "residual_pred_flag = 1 not supported."); return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP); @@ -1975,15 +2163,15 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) { if (25 == uiMbType) { WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in P slice!"); - int32_t iDecStrideL = pCurLayer->pDec->iLinesize[0]; - int32_t iDecStrideC = pCurLayer->pDec->iLinesize[1]; + int32_t iDecStrideL = pCurDqLayer->pDec->iLinesize[0]; + int32_t iDecStrideC = pCurDqLayer->pDec->iLinesize[1]; int32_t iOffsetL = (iMbX + iMbY * iDecStrideL) << 4; int32_t iOffsetC = (iMbX + iMbY * iDecStrideC) << 3; - uint8_t* pDecY = pCurLayer->pDec->pData[0] + iOffsetL; - uint8_t* pDecU = pCurLayer->pDec->pData[1] + iOffsetC; - uint8_t* pDecV = pCurLayer->pDec->pData[2] + iOffsetC; + uint8_t* pDecY = pCurDqLayer->pDec->pData[0] + iOffsetL; + uint8_t* pDecU = pCurDqLayer->pDec->pData[1] + iOffsetC; + uint8_t* pDecV = pCurDqLayer->pDec->pData[2] + iOffsetC; uint8_t* pTmpBsBuf; @@ -1993,7 +2181,7 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) { int32_t iIndex = ((-pBs->iLeftBits) >> 3) + 2; - pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA_PCM; + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA_PCM; //step 1: locating bit-stream pointer [must align into integer byte] pBs->pCurBuf -= iIndex; @@ -2022,8 +2210,8 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) { pBs->pCurBuf += 384; //step 3: update QP and pNonZeroCount - pCurLayer->pLumaQp[iMbXy] = 0; - pCurLayer->pChromaQp[iMbXy][0] = pCurLayer->pChromaQp[iMbXy][1] = 0; + pCurDqLayer->pLumaQp[iMbXy] = 0; + pCurDqLayer->pChromaQp[iMbXy][0] = pCurDqLayer->pChromaQp[iMbXy][1] = 0; //Rec. 9.2.1 for PCM, nzc=16 ST32A4 (&pNzc[0], 0x10101010); ST32A4 (&pNzc[4], 0x10101010); @@ -2036,38 +2224,38 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) { } else { if (0 == uiMbType) { ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16); - pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4; + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA4x4; if (pCtx->pPps->bTransform8x8ModeFlag) { WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag - pCurLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode; - if (pCurLayer->pTransformSize8x8Flag[iMbXy]) { - uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8; + pCurDqLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode; + if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { + uiMbType = pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA8x8; } } - if (!pCurLayer->pTransformSize8x8Flag[iMbXy]) { - pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer); - WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer)); + if (!pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { + pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer); + WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurDqLayer)); } else { - pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer); - WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer)); + pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer); + WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurDqLayer)); } } else { //I_PCM exclude, we can ignore it - pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16; - pCurLayer->pTransformSize8x8Flag[iMbXy] = false; - pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true; - pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3; - pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2]; - uiCbpC = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0; - uiCbpL = pCurLayer->pCbp[iMbXy] & 15; - WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurLayer); - if ((iRet = ParseIntra16x16Mode (pCtx, &sNeighAvail, pBs, pCurLayer)) != ERR_NONE) { + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA16x16; + pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false; + pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true; + pCurDqLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3; + pCurDqLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2]; + uiCbpC = pCtx->pSps->uiChromaFormatIdc ? pCurDqLayer->pCbp[iMbXy] >> 4 : 0; + uiCbpL = pCurDqLayer->pCbp[iMbXy] & 15; + WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurDqLayer); + if ((iRet = ParseIntra16x16Mode (pCtx, &sNeighAvail, pBs, pCurDqLayer)) != ERR_NONE) { return iRet; } } } } - if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) { + if (MB_TYPE_INTRA16x16 != pCurDqLayer->pDec->pMbType[iMbXy]) { WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //coded_block_pattern uiCbp = uiCode; { @@ -2075,29 +2263,29 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) { return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP); if (!pCtx->pSps->uiChromaFormatIdc && (uiCbp > 15)) return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP); - if (MB_TYPE_INTRA4x4 == pCurLayer->pMbType[iMbXy] || MB_TYPE_INTRA8x8 == pCurLayer->pMbType[iMbXy]) { + if (MB_TYPE_INTRA4x4 == pCurDqLayer->pDec->pMbType[iMbXy] || MB_TYPE_INTRA8x8 == pCurDqLayer->pDec->pMbType[iMbXy]) { uiCbp = pCtx->pSps->uiChromaFormatIdc ? g_kuiIntra4x4CbpTable[uiCbp] : g_kuiIntra4x4CbpTable400[uiCbp]; } else //inter uiCbp = pCtx->pSps->uiChromaFormatIdc ? g_kuiInterCbpTable[uiCbp] : g_kuiInterCbpTable400[uiCbp]; } - pCurLayer->pCbp[iMbXy] = uiCbp; - uiCbpC = pCurLayer->pCbp[iMbXy] >> 4; - uiCbpL = pCurLayer->pCbp[iMbXy] & 15; + pCurDqLayer->pCbp[iMbXy] = uiCbp; + uiCbpC = pCurDqLayer->pCbp[iMbXy] >> 4; + uiCbpL = pCurDqLayer->pCbp[iMbXy] & 15; // Need modification when B picutre add in bool bNeedParseTransformSize8x8Flag = - (((pCurLayer->pMbType[iMbXy] >= MB_TYPE_16x16 && pCurLayer->pMbType[iMbXy] <= MB_TYPE_8x16) - || pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy]) - && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA8x8) - && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA4x4) + (((pCurDqLayer->pDec->pMbType[iMbXy] >= MB_TYPE_16x16 && pCurDqLayer->pDec->pMbType[iMbXy] <= MB_TYPE_8x16) + || pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy]) + && (pCurDqLayer->pDec->pMbType[iMbXy] != MB_TYPE_INTRA8x8) + && (pCurDqLayer->pDec->pMbType[iMbXy] != MB_TYPE_INTRA4x4) && (uiCbpL > 0) && (pCtx->pPps->bTransform8x8ModeFlag)); if (bNeedParseTransformSize8x8Flag) { WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag - pCurLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode; + pCurDqLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode; } } @@ -2107,17 +2295,18 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) { ST32A4 (&pNzc[12], 0); ST32A4 (&pNzc[16], 0); ST32A4 (&pNzc[20], 0); - if (pCurLayer->pCbp[iMbXy] == 0 && !IS_INTRA16x16 (pCurLayer->pMbType[iMbXy]) && !IS_I_BL (pCurLayer->pMbType[iMbXy])) { - pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; + if (pCurDqLayer->pCbp[iMbXy] == 0 && !IS_INTRA16x16 (pCurDqLayer->pDec->pMbType[iMbXy]) + && !IS_I_BL (pCurDqLayer->pDec->pMbType[iMbXy])) { + pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; for (i = 0; i < 2; i++) { - pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] + - pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)]; + pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] + + pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)]; } } - if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) { + if (pCurDqLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) { int32_t iQpDelta, iId8x8, iId4x4; - memset (pCurLayer->pScaledTCoeff[iMbXy], 0, MB_COEFF_LIST_SIZE * sizeof (int16_t)); + memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, MB_COEFF_LIST_SIZE * sizeof (int16_t)); WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mb_qp_delta iQpDelta = iCode; @@ -2125,28 +2314,28 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) { return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP); } - pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp - pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy]; + pCurDqLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp + pSlice->iLastMbQp = pCurDqLayer->pLumaQp[iMbXy]; for (i = 0; i < 2; i++) { - pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp + - pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, - 51)]; + pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp + + pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, + 51)]; } BsStartCavlc (pBs); - if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) { + if (MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) { //step1: Luma DC if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 0, 16, g_kuiLumaDcZigzagScan, I16_LUMA_DC, - pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) { + pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) { return iRet;//abnormal } //step2: Luma AC if (uiCbpL) { for (i = 0; i < 16; i++) { if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, i, iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, - g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4), - pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) { + g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurDqLayer->pScaledTCoeff[iMbXy] + (i << 4), + pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) { return iRet;//abnormal } } @@ -2156,15 +2345,15 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) { ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4])); } } else { //non-MB_TYPE_INTRA16x16 - if (pCurLayer->pTransformSize8x8Flag[iMbXy]) { + if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { for (iId8x8 = 0; iId8x8 < 4; iId8x8++) { - iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8; + iMbResProperty = (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8; if (uiCbpL & (1 << iId8x8)) { int32_t iIndex = (iId8x8 << 2); for (iId4x4 = 0; iId4x4 < 4; iId4x4++) { if ((iRet = WelsResidualBlockCavlc8x8 (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1, - g_kuiZigzagScan8x8 + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), iId4x4, - pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) { + g_kuiZigzagScan8x8 + iScanIdxStart, iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), iId4x4, + pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) { return iRet; } iIndex++; @@ -2180,14 +2369,14 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) { ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4])); } else { // Normal T4x4 for (iId8x8 = 0; iId8x8 < 4; iId8x8++) { - iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER; + iMbResProperty = (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER; if (uiCbpL & (1 << iId8x8)) { int32_t iIndex = (iId8x8 << 2); for (iId4x4 = 0; iId4x4 < 4; iId4x4++) { //Luma (DC and AC decoding together) if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1, - g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4), - pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) { + g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (iIndex << 4), + pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) { return iRet;//abnormal } iIndex++; @@ -2209,13 +2398,13 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) { //step1: DC if (1 == uiCbpC || 2 == uiCbpC) { for (i = 0; i < 2; i++) { //Cb Cr - if (IS_INTRA (pCurLayer->pMbType[iMbXy])) + if (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U; else iMbResProperty = i ? CHROMA_DC_V_INTER : CHROMA_DC_U_INTER; if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 16 + (i << 2), 4, g_kuiChromaDcScan, iMbResProperty, - pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) { + pCurDqLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurDqLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) { return iRet;//abnormal } } @@ -2224,7 +2413,7 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) { //step2: AC if (2 == uiCbpC) { for (i = 0; i < 2; i++) { //Cb Cr - if (IS_INTRA (pCurLayer->pMbType[iMbXy])) + if (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U; else iMbResProperty = i ? CHROMA_AC_V_INTER : CHROMA_AC_U_INTER; @@ -2232,8 +2421,9 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) { int32_t iIndex = 16 + (i << 2); for (iId4x4 = 0; iId4x4 < 4; iId4x4++) { if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - WELS_MAX (iScanIdxStart, - 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4), - pCurLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) { + 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iMbResProperty, + pCurDqLayer->pScaledTCoeff[iMbXy] + (iIndex << 4), + pCurDqLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) { return iRet;//abnormal } iIndex++; @@ -2251,20 +2441,20 @@ int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) { } int32_t WelsDecodeMbCavlcPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) { - PDqLayer pCurLayer = pCtx->pCurDqLayer; - PBitStringAux pBs = pCurLayer->pBitStringAux; - PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer; + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + PBitStringAux pBs = pCurDqLayer->pBitStringAux; + PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; PPicture* ppRefPic = pCtx->sRefPic.pRefList[LIST_0]; intX_t iUsedBits; - const int32_t iMbXy = pCurLayer->iMbXyIndex; - int8_t* pNzc = pCurLayer->pNzc[iMbXy]; + const int32_t iMbXy = pCurDqLayer->iMbXyIndex; + int8_t* pNzc = pCurDqLayer->pNzc[iMbXy]; int32_t iBaseModeFlag, i; int32_t iRet = 0; //should have the return value to indicate decoding error or not, It's NECESSARY--2010.4.15 uint32_t uiCode; - pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true; - pCurLayer->pTransformSize8x8Flag[iMbXy] = false; + pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true; + pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false; if (-1 == pSlice->iMbSkipRun) { WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //mb_skip_run @@ -2276,7 +2466,7 @@ int32_t WelsDecodeMbCavlcPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uin if (pSlice->iMbSkipRun--) { int16_t iMv[2]; - pCurLayer->pMbType[iMbXy] = MB_TYPE_SKIP; + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_SKIP; ST32A4 (&pNzc[0], 0); ST32A4 (&pNzc[4], 0); ST32A4 (&pNzc[8], 0); @@ -2284,30 +2474,32 @@ int32_t WelsDecodeMbCavlcPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uin ST32A4 (&pNzc[16], 0); ST32A4 (&pNzc[20], 0); - pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0; - memset (pCurLayer->pRefIndex[0][iMbXy], 0, sizeof (int8_t) * 16); - pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[0] && ppRefPic[0]->bIsComplete); + pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0; + memset (pCurDqLayer->pDec->pRefIndex[0][iMbXy], 0, sizeof (int8_t) * 16); + bool bIsPending = GetThreadCount (pCtx) > 1; + pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[0] && (ppRefPic[0]->bIsComplete + || bIsPending)); //predict iMv - PredPSkipMvFromNeighbor (pCurLayer, iMv); + PredPSkipMvFromNeighbor (pCurDqLayer, iMv); for (i = 0; i < 16; i++) { - ST32A2 (pCurLayer->pMv[0][iMbXy][i], * (uint32_t*)iMv); + ST32A2 (pCurDqLayer->pDec->pMv[0][iMbXy][i], * (uint32_t*)iMv); } //if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag) { - // memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (int16_t)); + // memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (int16_t)); //} //reset rS if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag || (pNalCur->sNalHeaderExt.uiQualityId == 0 && pNalCur->sNalHeaderExt.uiDependencyId == 0)) { - pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; + pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; for (i = 0; i < 2; i++) { - pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] + - pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)]; + pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] + + pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)]; } } - pCurLayer->pCbp[iMbXy] = 0; + pCurDqLayer->pCbp[iMbXy] = 0; } else { if (pSlice->sSliceHeaderExt.bAdaptiveBaseModeFlag == 1) { WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //base_mode_flag @@ -2329,7 +2521,7 @@ int32_t WelsDecodeMbCavlcPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uin // check whether there is left bits to read next time in case multiple slices iUsedBits = ((pBs->pCurBuf - pBs->pStartBuf) << 3) - (16 - pBs->iLeftBits); // sub 1, for stop bit - if ((iUsedBits == (pBs->iBits - 1)) && (0 >= pCurLayer->sLayerInfo.sSliceInLayer.iMbSkipRun)) { // slice boundary + if ((iUsedBits == (pBs->iBits - 1)) && (0 >= pCurDqLayer->sLayerInfo.sSliceInLayer.iMbSkipRun)) { // slice boundary uiEosFlag = 1; } if (iUsedBits > (pBs->iBits - @@ -2342,7 +2534,457 @@ int32_t WelsDecodeMbCavlcPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uin return ERR_NONE; } -void WelsBlockFuncInit (SBlockFunc* pFunc, int32_t iCpu) { +int32_t WelsDecodeMbCavlcBSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) { + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + PBitStringAux pBs = pCurDqLayer->pBitStringAux; + PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; + PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; + PPicture* ppRefPicL0 = pCtx->sRefPic.pRefList[LIST_0]; + PPicture* ppRefPicL1 = pCtx->sRefPic.pRefList[LIST_1]; + intX_t iUsedBits; + const int32_t iMbXy = pCurDqLayer->iMbXyIndex; + int8_t* pNzc = pCurDqLayer->pNzc[iMbXy]; + int32_t iBaseModeFlag, i; + int32_t iRet = 0; //should have the return value to indicate decoding error or not, It's NECESSARY--2010.4.15 + uint32_t uiCode; + + pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true; + pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false; + + if (-1 == pSlice->iMbSkipRun) { + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //mb_skip_run + pSlice->iMbSkipRun = uiCode; + if (-1 == pSlice->iMbSkipRun) { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_SKIP_RUN); + } + } + if (pSlice->iMbSkipRun--) { + int16_t iMv[LIST_A][2] = { { 0, 0 }, { 0, 0 } }; + int8_t ref[LIST_A] = { 0 }; + + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_SKIP | MB_TYPE_DIRECT; + ST32A4 (&pNzc[0], 0); + ST32A4 (&pNzc[4], 0); + ST32A4 (&pNzc[8], 0); + ST32A4 (&pNzc[12], 0); + ST32A4 (&pNzc[16], 0); + ST32A4 (&pNzc[20], 0); + + pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0; + memset (pCurDqLayer->pDec->pRefIndex[LIST_0][iMbXy], 0, sizeof (int8_t) * 16); + memset (pCurDqLayer->pDec->pRefIndex[LIST_1][iMbXy], 0, sizeof (int8_t) * 16); + bool bIsPending = GetThreadCount (pCtx) > 1; + pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPicL0[0] && (ppRefPicL0[0]->bIsComplete + || bIsPending)) || ! (ppRefPicL1[0] && (ppRefPicL1[0]->bIsComplete || bIsPending)); + + /*if (pCtx->bMbRefConcealed) { + SLogContext* pLogCtx = & (pCtx->sLogCtx); + WelsLog (pLogCtx, WELS_LOG_ERROR, "Ref Picture for B-Slice is lost, B-Slice decoding cannot be continued!"); + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_DATA, ERR_INFO_REFERENCE_PIC_LOST); + }*/ + //predict iMv + SubMbType subMbType; + if (pSliceHeader->iDirectSpatialMvPredFlag) { + + //predict direct spatial mv + int32_t ret = PredMvBDirectSpatial (pCtx, iMv, ref, subMbType); + if (ret != ERR_NONE) { + return ret; + } + } else { + //temporal direct mode + int32_t ret = PredBDirectTemporal (pCtx, iMv, ref, subMbType); + if (ret != ERR_NONE) { + return ret; + } + } + + //if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag) { + // memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (int16_t)); + //} + + //reset rS + if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag || + (pNalCur->sNalHeaderExt.uiQualityId == 0 && pNalCur->sNalHeaderExt.uiDependencyId == 0)) { + pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; + for (i = 0; i < 2; i++) { + pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] + + pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)]; + } + } + + pCurDqLayer->pCbp[iMbXy] = 0; + } else { + if (pSlice->sSliceHeaderExt.bAdaptiveBaseModeFlag == 1) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //base_mode_flag + iBaseModeFlag = uiCode; + } else { + iBaseModeFlag = pSlice->sSliceHeaderExt.bDefaultBaseModeFlag; + } + if (!iBaseModeFlag) { + iRet = WelsActualDecodeMbCavlcBSlice (pCtx); + } else { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "iBaseModeFlag (%d) != 0, inter-layer prediction not supported.", + iBaseModeFlag); + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_ILP); + } + if (iRet) { //occur error when parsing, MUST STOP decoding + return iRet; + } + } + // check whether there is left bits to read next time in case multiple slices + iUsedBits = ((pBs->pCurBuf - pBs->pStartBuf) << 3) - (16 - pBs->iLeftBits); + // sub 1, for stop bit + if ((iUsedBits == (pBs->iBits - 1)) && (0 >= pCurDqLayer->sLayerInfo.sSliceInLayer.iMbSkipRun)) { // slice boundary + uiEosFlag = 1; + } + if (iUsedBits > (pBs->iBits - + 1)) { //When BS incomplete, as long as find it, SHOULD stop decoding to avoid mosaic or crash. + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, + "WelsDecodeMbCavlcBSlice()::::pBs incomplete, iUsedBits:%" PRId64 " > pBs->iBits:%d, MUST stop decoding.", + (int64_t)iUsedBits, pBs->iBits); + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_BS_INCOMPLETE); + } + return ERR_NONE; +} + +int32_t WelsActualDecodeMbCavlcBSlice (PWelsDecoderContext pCtx) { + SVlcTable* pVlcTable = pCtx->pVlcTable; + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + PBitStringAux pBs = pCurDqLayer->pBitStringAux; + PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; + PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; + + int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart; + int32_t iScanIdxEnd = pSlice->sSliceHeaderExt.uiScanIdxEnd; + + SWelsNeighAvail sNeighAvail; + int32_t iMbX = pCurDqLayer->iMbX; + int32_t iMbY = pCurDqLayer->iMbY; + const int32_t iMbXy = pCurDqLayer->iMbXyIndex; + int8_t* pNzc = pCurDqLayer->pNzc[iMbXy]; + int32_t i; + int32_t iRet = ERR_NONE; + uint32_t uiMbType = 0, uiCbp = 0, uiCbpL = 0, uiCbpC = 0; + uint32_t uiCode; + int32_t iCode; + int32_t iMbResProperty; + + GetNeighborAvailMbType (&sNeighAvail, pCurDqLayer); + ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16); + pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;//2009.10.23 + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //uiMbType + uiMbType = uiCode; + if (uiMbType < 23) { //inter MB type + int16_t iMotionVector[LIST_A][30][MV_A]; + int8_t iRefIndex[LIST_A][30]; + pCurDqLayer->pDec->pMbType[iMbXy] = g_ksInterBMbTypeInfo[uiMbType].iType; + WelsFillCacheInter (&sNeighAvail, pNonZeroCount, iMotionVector, iRefIndex, pCurDqLayer); + + if ((iRet = ParseInterBInfo (pCtx, iMotionVector, iRefIndex, pBs)) != ERR_NONE) { + return iRet;//abnormal + } + + if (pSlice->sSliceHeaderExt.bAdaptiveResidualPredFlag == 1) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //residual_prediction_flag + pCurDqLayer->pResidualPredFlag[iMbXy] = uiCode; + } else { + pCurDqLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag; + } + + if (pCurDqLayer->pResidualPredFlag[iMbXy] == 0) { + pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0; + } else { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "residual_pred_flag = 1 not supported."); + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP); + } + } else { //intra MB type + uiMbType -= 23; + if (uiMbType > 25) + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE); + if (!pCtx->pSps->uiChromaFormatIdc && ((uiMbType >= 5 && uiMbType <= 12) || (uiMbType >= 17 && uiMbType <= 24))) + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE); + + if (25 == uiMbType) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in B slice!"); + int32_t iDecStrideL = pCurDqLayer->pDec->iLinesize[0]; + int32_t iDecStrideC = pCurDqLayer->pDec->iLinesize[1]; + + int32_t iOffsetL = (iMbX + iMbY * iDecStrideL) << 4; + int32_t iOffsetC = (iMbX + iMbY * iDecStrideC) << 3; + + uint8_t* pDecY = pCurDqLayer->pDec->pData[0] + iOffsetL; + uint8_t* pDecU = pCurDqLayer->pDec->pData[1] + iOffsetC; + uint8_t* pDecV = pCurDqLayer->pDec->pData[2] + iOffsetC; + + uint8_t* pTmpBsBuf; + + int32_t i; + int32_t iCopySizeY = (sizeof (uint8_t) << 4); + int32_t iCopySizeUV = (sizeof (uint8_t) << 3); + + int32_t iIndex = ((-pBs->iLeftBits) >> 3) + 2; + + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA_PCM; + + //step 1: locating bit-stream pointer [must align into integer byte] + pBs->pCurBuf -= iIndex; + + //step 2: copy pixel from bit-stream into fdec [reconstruction] + pTmpBsBuf = pBs->pCurBuf; + if (!pCtx->pParam->bParseOnly) { + for (i = 0; i < 16; i++) { //luma + memcpy (pDecY, pTmpBsBuf, iCopySizeY); + pDecY += iDecStrideL; + pTmpBsBuf += 16; + } + + for (i = 0; i < 8; i++) { //cb + memcpy (pDecU, pTmpBsBuf, iCopySizeUV); + pDecU += iDecStrideC; + pTmpBsBuf += 8; + } + for (i = 0; i < 8; i++) { //cr + memcpy (pDecV, pTmpBsBuf, iCopySizeUV); + pDecV += iDecStrideC; + pTmpBsBuf += 8; + } + } + + pBs->pCurBuf += 384; + + //step 3: update QP and pNonZeroCount + pCurDqLayer->pLumaQp[iMbXy] = 0; + pCurDqLayer->pChromaQp[iMbXy][0] = pCurDqLayer->pChromaQp[iMbXy][1] = 0; + //Rec. 9.2.1 for PCM, nzc=16 + ST32A4 (&pNzc[0], 0x10101010); + ST32A4 (&pNzc[4], 0x10101010); + ST32A4 (&pNzc[8], 0x10101010); + ST32A4 (&pNzc[12], 0x10101010); + ST32A4 (&pNzc[16], 0x10101010); + ST32A4 (&pNzc[20], 0x10101010); + WELS_READ_VERIFY (InitReadBits (pBs, 0)); + return ERR_NONE; + } else { + if (0 == uiMbType) { + ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16); + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA4x4; + if (pCtx->pPps->bTransform8x8ModeFlag) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag + pCurDqLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode; + if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { + uiMbType = pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA8x8; + } + } + if (!pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { + pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer); + WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurDqLayer)); + } else { + pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurDqLayer); + WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurDqLayer)); + } + } else { //I_PCM exclude, we can ignore it + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA16x16; + pCurDqLayer->pTransformSize8x8Flag[iMbXy] = false; + pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true; + pCurDqLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3; + pCurDqLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2]; + uiCbpC = pCtx->pSps->uiChromaFormatIdc ? pCurDqLayer->pCbp[iMbXy] >> 4 : 0; + uiCbpL = pCurDqLayer->pCbp[iMbXy] & 15; + WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurDqLayer); + if ((iRet = ParseIntra16x16Mode (pCtx, &sNeighAvail, pBs, pCurDqLayer)) != ERR_NONE) { + return iRet; + } + } + } + } + + if (MB_TYPE_INTRA16x16 != pCurDqLayer->pDec->pMbType[iMbXy]) { + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //coded_block_pattern + uiCbp = uiCode; + { + if (pCtx->pSps->uiChromaFormatIdc && (uiCbp > 47)) + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP); + if (!pCtx->pSps->uiChromaFormatIdc && (uiCbp > 15)) + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP); + if (MB_TYPE_INTRA4x4 == pCurDqLayer->pDec->pMbType[iMbXy] || MB_TYPE_INTRA8x8 == pCurDqLayer->pDec->pMbType[iMbXy]) { + + uiCbp = pCtx->pSps->uiChromaFormatIdc ? g_kuiIntra4x4CbpTable[uiCbp] : g_kuiIntra4x4CbpTable400[uiCbp]; + } else //inter + uiCbp = pCtx->pSps->uiChromaFormatIdc ? g_kuiInterCbpTable[uiCbp] : g_kuiInterCbpTable400[uiCbp]; + } + + pCurDqLayer->pCbp[iMbXy] = uiCbp; + uiCbpC = pCurDqLayer->pCbp[iMbXy] >> 4; + uiCbpL = pCurDqLayer->pCbp[iMbXy] & 15; + + // Need modification when B picutre add in + bool bNeedParseTransformSize8x8Flag = + (((pCurDqLayer->pDec->pMbType[iMbXy] >= MB_TYPE_16x16 && pCurDqLayer->pDec->pMbType[iMbXy] <= MB_TYPE_8x16) + || pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy]) + && (pCurDqLayer->pDec->pMbType[iMbXy] != MB_TYPE_INTRA8x8) + && (pCurDqLayer->pDec->pMbType[iMbXy] != MB_TYPE_INTRA4x4) + && (uiCbpL > 0) + && (pCtx->pPps->bTransform8x8ModeFlag)); + + if (bNeedParseTransformSize8x8Flag) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag + pCurDqLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode; + } + } + + ST32A4 (&pNzc[0], 0); + ST32A4 (&pNzc[4], 0); + ST32A4 (&pNzc[8], 0); + ST32A4 (&pNzc[12], 0); + ST32A4 (&pNzc[16], 0); + ST32A4 (&pNzc[20], 0); + if (pCurDqLayer->pCbp[iMbXy] == 0 && !IS_INTRA16x16 (pCurDqLayer->pDec->pMbType[iMbXy]) + && !IS_I_BL (pCurDqLayer->pDec->pMbType[iMbXy])) { + pCurDqLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; + for (i = 0; i < 2; i++) { + pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurDqLayer->pLumaQp[iMbXy] + + pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)]; + } + } + + if (pCurDqLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) { + int32_t iQpDelta, iId8x8, iId4x4; + memset (pCurDqLayer->pScaledTCoeff[iMbXy], 0, MB_COEFF_LIST_SIZE * sizeof (int16_t)); + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mb_qp_delta + iQpDelta = iCode; + + if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP); + } + + pCurDqLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp + pSlice->iLastMbQp = pCurDqLayer->pLumaQp[iMbXy]; + for (i = 0; i < 2; i++) { + pCurDqLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp + + pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, + 51)]; + } + + BsStartCavlc (pBs); + + if (MB_TYPE_INTRA16x16 == pCurDqLayer->pDec->pMbType[iMbXy]) { + //step1: Luma DC + if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 0, 16, g_kuiLumaDcZigzagScan, I16_LUMA_DC, + pCurDqLayer->pScaledTCoeff[iMbXy], pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) { + return iRet;//abnormal + } + //step2: Luma AC + if (uiCbpL) { + for (i = 0; i < 16; i++) { + if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, i, iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, + g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurDqLayer->pScaledTCoeff[iMbXy] + (i << 4), + pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) { + return iRet;//abnormal + } + } + ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1])); + ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2])); + ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3])); + ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4])); + } + } else { //non-MB_TYPE_INTRA16x16 + if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) { + for (iId8x8 = 0; iId8x8 < 4; iId8x8++) { + iMbResProperty = (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8; + if (uiCbpL & (1 << iId8x8)) { + int32_t iIndex = (iId8x8 << 2); + for (iId4x4 = 0; iId4x4 < 4; iId4x4++) { + if ((iRet = WelsResidualBlockCavlc8x8 (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1, + g_kuiZigzagScan8x8 + iScanIdxStart, iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), iId4x4, + pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) { + return iRet; + } + iIndex++; + } + } else { + ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0); + ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0); + } + } + ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1])); + ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2])); + ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3])); + ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4])); + } else { // Normal T4x4 + for (iId8x8 = 0; iId8x8 < 4; iId8x8++) { + iMbResProperty = (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER; + if (uiCbpL & (1 << iId8x8)) { + int32_t iIndex = (iId8x8 << 2); + for (iId4x4 = 0; iId4x4 < 4; iId4x4++) { + //Luma (DC and AC decoding together) + if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1, + g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurDqLayer->pScaledTCoeff[iMbXy] + (iIndex << 4), + pCurDqLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) { + return iRet;//abnormal + } + iIndex++; + } + } else { + ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0); + ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0); + } + } + ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1])); + ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2])); + ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3])); + ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4])); + } + } + + + //chroma + //step1: DC + if (1 == uiCbpC || 2 == uiCbpC) { + for (i = 0; i < 2; i++) { //Cb Cr + if (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) + iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U; + else + iMbResProperty = i ? CHROMA_DC_V_INTER : CHROMA_DC_U_INTER; + + if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 16 + (i << 2), 4, g_kuiChromaDcScan, iMbResProperty, + pCurDqLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurDqLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) { + return iRet;//abnormal + } + } + } else { + } + //step2: AC + if (2 == uiCbpC) { + for (i = 0; i < 2; i++) { //Cb Cr + if (IS_INTRA (pCurDqLayer->pDec->pMbType[iMbXy])) + iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U; + else + iMbResProperty = i ? CHROMA_AC_V_INTER : CHROMA_AC_U_INTER; + + int32_t iIndex = 16 + (i << 2); + for (iId4x4 = 0; iId4x4 < 4; iId4x4++) { + if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - WELS_MAX (iScanIdxStart, + 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iMbResProperty, + pCurDqLayer->pScaledTCoeff[iMbXy] + (iIndex << 4), + pCurDqLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) { + return iRet;//abnormal + } + iIndex++; + } + } + ST16A2 (&pNzc[16], LD16A2 (&pNonZeroCount[6 + 8 * 1])); + ST16A2 (&pNzc[20], LD16A2 (&pNonZeroCount[6 + 8 * 2])); + ST16A2 (&pNzc[18], LD16A2 (&pNonZeroCount[6 + 8 * 4])); + ST16A2 (&pNzc[22], LD16A2 (&pNonZeroCount[6 + 8 * 5])); + } + BsEndCavlc (pBs); + } + + return ERR_NONE; +} + +void WelsBlockFuncInit (SBlockFunc* pFunc, int32_t iCpu) { pFunc->pWelsSetNonZeroCountFunc = WelsNonZeroCount_c; pFunc->pWelsBlockZero16x16Func = WelsBlockZero16x16_c; pFunc->pWelsBlockZero8x8Func = WelsBlockZero8x8_c; @@ -2389,31 +3031,34 @@ void WelsBlockZero16x16_c (int16_t* pBlock, int32_t iStride) { void WelsBlockZero8x8_c (int16_t* pBlock, int32_t iStride) { WelsBlockInit (pBlock, 8, 8, iStride, 0); } -bool ComputeColocated (PWelsDecoderContext pCtx) { - PDqLayer pCurLayer = pCtx->pCurDqLayer; - PSlice pCurSlice = &pCurLayer->sLayerInfo.sSliceInLayer; + +// Compute the temporal-direct scaling factor that's common +// to all direct MBs in this slice, as per clause 8.4.1.2.3 +// of T-REC H.264 201704 +bool ComputeColocatedTemporalScaling (PWelsDecoderContext pCtx) { + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + PSlice pCurSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; PSliceHeader pSliceHeader = &pCurSlice->sSliceHeaderExt.sSliceHeader; if (!pSliceHeader->iDirectSpatialMvPredFlag) { - uint32_t uiShortRefCount = pCtx->sRefPic.uiShortRefCount[LIST_0]; - for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - for (uint32_t i = 0; i < uiShortRefCount; ++i) { - int32_t iTRb = WELS_CLIP3 (-128, 127, pSliceHeader->iPicOrderCntLsb - pCtx->sRefPic.pRefList[listIdx][i]->iFramePoc); - int32_t iTRp = WELS_CLIP3 (-128, 127, - pCtx->sRefPic.pRefList[LIST_1][i]->iFramePoc - pCtx->sRefPic.pRefList[LIST_0][i]->iFramePoc); - if (iTRp != 0) { - int32_t prescale = (16384 + iAbs (iTRp / 2)) / iTRp; - pCurSlice->iMvScale[listIdx][i] = WELS_CLIP3 (-1024, 1023, (iTRb * prescale + 32) >> 6); - } else { - pCurSlice->iMvScale[listIdx][i] = 0x03FFF; + uint32_t uiRefCount = pSliceHeader->uiRefCount[LIST_0]; + if (pCtx->sRefPic.pRefList[LIST_1][0] != NULL) { + for (uint32_t i = 0; i < uiRefCount; ++i) { + if (pCtx->sRefPic.pRefList[LIST_0][i] != NULL) { + const int32_t poc0 = pCtx->sRefPic.pRefList[LIST_0][i]->iFramePoc; + const int32_t poc1 = pCtx->sRefPic.pRefList[LIST_1][0]->iFramePoc; + const int32_t poc = pSliceHeader->iPicOrderCntLsb; + const int32_t td = WELS_CLIP3 (poc1 - poc0, -128, 127); + if (td == 0) { + pCurSlice->iMvScale[LIST_0][i] = 1 << 8; + } else { + int32_t tb = WELS_CLIP3 (poc - poc0, -128, 127); + int32_t tx = (16384 + (abs (td) >> 1)) / td; + pCurSlice->iMvScale[LIST_0][i] = WELS_CLIP3 ((tb * tx + 32) >> 6, -1024, 1023); + } } } } } - //Implement the following - //get Mv_colocated_L1 - //and do calculation - //iMvp[LIST_0] = Mv_colocated_L1 * (POC(cur) - POC(L0))/POC(L1) - POC(L0)) - //iMvp[LIST_1] = Mv_colocated_L1 * (POC(cur) - POC(L1))/POC(L1) - POC(L0)) return true; } } // namespace WelsDec diff --git a/chromium/third_party/openh264/src/codec/decoder/core/src/decoder.cpp b/chromium/third_party/openh264/src/codec/decoder/core/src/decoder.cpp index b957872b90b..a6f2da4374f 100644 --- a/chromium/third_party/openh264/src/codec/decoder/core/src/decoder.cpp +++ b/chromium/third_party/openh264/src/codec/decoder/core/src/decoder.cpp @@ -52,6 +52,7 @@ #include "decode_slice.h" #include "error_concealment.h" #include "memory_align.h" +#include "wels_decoder_thread.h" namespace WelsDec { @@ -61,6 +62,7 @@ extern void FreePicture (PPicture pPic, CMemoryAlign* pMa); static int32_t CreatePicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, const int32_t kiSize, const int32_t kiPicWidth, const int32_t kiPicHeight) { + PPicBuff pPicBuf = NULL; int32_t iPicIdx = 0; if (kiSize <= 0 || kiPicWidth <= 0 || kiPicHeight <= 0) { @@ -79,7 +81,7 @@ static int32_t CreatePicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, cons if (NULL == pPicBuf->ppPic) { pPicBuf->iCapacity = 0; - DestroyPicBuff (&pPicBuf, pMa); + DestroyPicBuff (pCtx, &pPicBuf, pMa); return ERR_INFO_OUT_OF_MEMORY; } @@ -88,7 +90,7 @@ static int32_t CreatePicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, cons if (NULL == pPic) { // init capacity first for free memory pPicBuf->iCapacity = iPicIdx; - DestroyPicBuff (&pPicBuf, pMa); + DestroyPicBuff (pCtx, &pPicBuf, pMa); return ERR_INFO_OUT_OF_MEMORY; } pPicBuf->ppPic[iPicIdx] = pPic; @@ -122,7 +124,7 @@ static int32_t IncreasePicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, co if (NULL == pPicNewBuf->ppPic) { pPicNewBuf->iCapacity = 0; - DestroyPicBuff (&pPicNewBuf, pMa); + DestroyPicBuff (pCtx, &pPicNewBuf, pMa); return ERR_INFO_OUT_OF_MEMORY; } @@ -132,7 +134,7 @@ static int32_t IncreasePicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, co if (NULL == pPic) { // Set maximum capacity as the new malloc memory at the tail pPicNewBuf->iCapacity = iPicIdx; - DestroyPicBuff (&pPicNewBuf, pMa); + DestroyPicBuff (pCtx, &pPicNewBuf, pMa); return ERR_INFO_OUT_OF_MEMORY; } pPicNewBuf->ppPic[iPicIdx] = pPic; @@ -149,8 +151,7 @@ static int32_t IncreasePicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, co for (int32_t i = 0; i < pPicNewBuf->iCapacity; i++) { pPicNewBuf->ppPic[i]->bUsedAsRef = false; pPicNewBuf->ppPic[i]->bIsLongRef = false; - pPicNewBuf->ppPic[i]->uiRefCount = 0; - pPicNewBuf->ppPic[i]->bAvailableFlag = true; + pPicNewBuf->ppPic[i]->iRefCount = 0; pPicNewBuf->ppPic[i]->bIsComplete = false; } // remove old PicBuf @@ -186,13 +187,15 @@ static int32_t DecreasePicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, co if (NULL == pPicNewBuf->ppPic) { pPicNewBuf->iCapacity = 0; - DestroyPicBuff (&pPicNewBuf, pMa); + DestroyPicBuff (pCtx, &pPicNewBuf, pMa); return ERR_INFO_OUT_OF_MEMORY; } + ResetReorderingPictureBuffers (pCtx->pPictReoderingStatus, pCtx->pPictInfoList, false); + int32_t iPrevPicIdx = -1; for (iPrevPicIdx = 0; iPrevPicIdx < kiOldSize; ++iPrevPicIdx) { - if (pCtx->pPreviousDecodedPictureInDpb == pPicOldBuf->ppPic[iPrevPicIdx]) { + if (pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb == pPicOldBuf->ppPic[iPrevPicIdx]) { break; } } @@ -209,6 +212,17 @@ static int32_t DecreasePicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, co iDelIdx = kiNewSize; } + //update references due to allocation changes + //all references' references have to be reset oss-buzz 14423 + for (int32_t i = 0; i < kiNewSize; i++) { + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + int32_t j = -1; + while (++j < MAX_DPB_COUNT && pPicNewBuf->ppPic[i]->pRefPic[listIdx][j] != NULL) { + pPicNewBuf->ppPic[i]->pRefPic[listIdx][j] = NULL; + } + } + } + for (iPicIdx = iDelIdx; iPicIdx < kiOldSize; iPicIdx++) { if (iPrevPicIdx != iPicIdx) { if (pPicOldBuf->ppPic[iPicIdx] != NULL) { @@ -220,13 +234,12 @@ static int32_t DecreasePicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, co // initialize context in queue pPicNewBuf->iCapacity = kiNewSize; - *ppPicBuf = pPicNewBuf; + * ppPicBuf = pPicNewBuf; for (int32_t i = 0; i < pPicNewBuf->iCapacity; i++) { pPicNewBuf->ppPic[i]->bUsedAsRef = false; pPicNewBuf->ppPic[i]->bIsLongRef = false; - pPicNewBuf->ppPic[i]->uiRefCount = 0; - pPicNewBuf->ppPic[i]->bAvailableFlag = true; + pPicNewBuf->ppPic[i]->iRefCount = 0; pPicNewBuf->ppPic[i]->bIsComplete = false; } // remove old PicBuf @@ -242,9 +255,11 @@ static int32_t DecreasePicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, co return ERR_NONE; } -void DestroyPicBuff (PPicBuff* ppPicBuf, CMemoryAlign* pMa) { +void DestroyPicBuff (PWelsDecoderContext pCtx, PPicBuff* ppPicBuf, CMemoryAlign* pMa) { PPicBuff pPicBuf = NULL; + ResetReorderingPictureBuffers (pCtx->pPictReoderingStatus, pCtx->pPictInfoList, false); + if (NULL == ppPicBuf || NULL == *ppPicBuf) return; @@ -273,6 +288,24 @@ void DestroyPicBuff (PPicBuff* ppPicBuf, CMemoryAlign* pMa) { *ppPicBuf = NULL; } +//reset picture reodering buffer list +void ResetReorderingPictureBuffers (PPictReoderingStatus pPictReoderingStatus, PPictInfo pPictInfo, + const bool& fullReset) { + if (pPictReoderingStatus != NULL && pPictInfo != NULL) { + int32_t pictInfoListCount = fullReset ? 16 : (pPictReoderingStatus->iLargestBufferedPicIndex + 1); + pPictReoderingStatus->iPictInfoIndex = 0; + pPictReoderingStatus->iMinPOC = IMinInt32; + pPictReoderingStatus->iNumOfPicts = 0; + pPictReoderingStatus->iLastGOPRemainPicts = 0; + pPictReoderingStatus->iLastWrittenPOC = IMinInt32; + pPictReoderingStatus->iLargestBufferedPicIndex = 0; + for (int32_t i = 0; i < pictInfoListCount; ++i) { + pPictInfo[i].bLastGOP = false; + pPictInfo[i].iPOC = IMinInt32; + } + } +} + /* * fill data fields in default for decoder context */ @@ -297,7 +330,7 @@ void WelsDecoderDefaults (PWelsDecoderContext pCtx, SLogContext* pLogCtx) { pCtx->bFreezeOutput = true; pCtx->iFrameNum = -1; - pCtx->iPrevFrameNum = -1; + pCtx->pLastDecPicInfo->iPrevFrameNum = -1; pCtx->iErrorCode = ERR_NONE; pCtx->pDec = NULL; @@ -310,31 +343,91 @@ void WelsDecoderDefaults (PWelsDecoderContext pCtx, SLogContext* pLogCtx) { pCtx->pPicBuff = NULL; - pCtx->bAvcBasedFlag = true; - pCtx->pPreviousDecodedPictureInDpb = NULL; - pCtx->sDecoderStatistics.iAvgLumaQp = -1; - pCtx->sDecoderStatistics.iStatisticsLogInterval = 1000; + //pCtx->sSpsPpsCtx.bAvcBasedFlag = true; + pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb = NULL; + pCtx->pDecoderStatistics->iAvgLumaQp = -1; + pCtx->pDecoderStatistics->iStatisticsLogInterval = 1000; pCtx->bUseScalingList = false; - pCtx->iSpsErrorIgnored = 0; - pCtx->iSubSpsErrorIgnored = 0; - pCtx->iPpsErrorIgnored = 0; - pCtx->iPPSInvalidNum = 0; - pCtx->iPPSLastInvalidId = -1; - pCtx->iSPSInvalidNum = 0; - pCtx->iSPSLastInvalidId = -1; - pCtx->iSubSPSInvalidNum = 0; - pCtx->iSubSPSLastInvalidId = -1; + /*pCtx->sSpsPpsCtx.iSpsErrorIgnored = 0; + pCtx->sSpsPpsCtx.iSubSpsErrorIgnored = 0; + pCtx->sSpsPpsCtx.iPpsErrorIgnored = 0; + pCtx->sSpsPpsCtx.iPPSInvalidNum = 0; + pCtx->sSpsPpsCtx.iPPSLastInvalidId = -1; + pCtx->sSpsPpsCtx.iSPSInvalidNum = 0; + pCtx->sSpsPpsCtx.iSPSLastInvalidId = -1; + pCtx->sSpsPpsCtx.iSubSPSInvalidNum = 0; + pCtx->sSpsPpsCtx.iSubSPSLastInvalidId = -1; + */ pCtx->iFeedbackNalRefIdc = -1; //initialize - pCtx->iPrevPicOrderCntMsb = 0; - pCtx->iPrevPicOrderCntLsb = 0; + pCtx->pLastDecPicInfo->iPrevPicOrderCntMsb = 0; + pCtx->pLastDecPicInfo->iPrevPicOrderCntLsb = 0; } /* +* fill data fields in SPS and PPS default for decoder context +*/ +void WelsDecoderSpsPpsDefaults (SWelsDecoderSpsPpsCTX& sSpsPpsCtx) { + sSpsPpsCtx.bSpsExistAheadFlag = false; + sSpsPpsCtx.bSubspsExistAheadFlag = false; + sSpsPpsCtx.bPpsExistAheadFlag = false; + sSpsPpsCtx.bAvcBasedFlag = true; + sSpsPpsCtx.iSpsErrorIgnored = 0; + sSpsPpsCtx.iSubSpsErrorIgnored = 0; + sSpsPpsCtx.iPpsErrorIgnored = 0; + sSpsPpsCtx.iPPSInvalidNum = 0; + sSpsPpsCtx.iPPSLastInvalidId = -1; + sSpsPpsCtx.iSPSInvalidNum = 0; + sSpsPpsCtx.iSPSLastInvalidId = -1; + sSpsPpsCtx.iSubSPSInvalidNum = 0; + sSpsPpsCtx.iSubSPSLastInvalidId = -1; + sSpsPpsCtx.iSeqId = -1; +} + +/* +* fill last decoded picture info +*/ +void WelsDecoderLastDecPicInfoDefaults (SWelsLastDecPicInfo& sLastDecPicInfo) { + sLastDecPicInfo.iPrevPicOrderCntMsb = 0; + sLastDecPicInfo.iPrevPicOrderCntLsb = 0; + sLastDecPicInfo.pPreviousDecodedPictureInDpb = NULL; + sLastDecPicInfo.iPrevFrameNum = -1; + sLastDecPicInfo.bLastHasMmco5 = false; + sLastDecPicInfo.uiDecodingTimeStamp = 0; +} + +/*! +* \brief copy SpsPps from one Ctx to another ctx for threaded code +*/ +void CopySpsPps (PWelsDecoderContext pFromCtx, PWelsDecoderContext pToCtx) { + pToCtx->sSpsPpsCtx = pFromCtx->sSpsPpsCtx; + PAccessUnit pFromCurAu = pFromCtx->pAccessUnitList; + PSps pTmpLayerSps[MAX_LAYER_NUM]; + for (int i = 0; i < MAX_LAYER_NUM; i++) { + pTmpLayerSps[i] = NULL; + } + // track the layer sps for the current au + for (unsigned int i = pFromCurAu->uiStartPos; i <= pFromCurAu->uiEndPos; i++) { + uint32_t uiDid = pFromCurAu->pNalUnitsList[i]->sNalHeaderExt.uiDependencyId; + pTmpLayerSps[uiDid] = pFromCurAu->pNalUnitsList[i]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.pSps; + for (unsigned int j = 0; j < MAX_SPS_COUNT + 1; ++j) { + if (&pFromCtx->sSpsPpsCtx.sSpsBuffer[j] == pTmpLayerSps[uiDid]) { + pTmpLayerSps[uiDid] = &pToCtx->sSpsPpsCtx.sSpsBuffer[j]; + break; + } + } + } + for (int i = 0; i < MAX_LAYER_NUM; i++) { + if (pTmpLayerSps[i] != NULL) { + pToCtx->sSpsPpsCtx.pActiveLayerSps[i] = pTmpLayerSps[i]; + } + } +} + +/* * destory_mb_blocks */ - /* * get size of reference picture list in target layer incoming, = (iNumRefFrames */ @@ -345,6 +438,9 @@ static inline int32_t GetTargetRefListSize (PWelsDecoderContext pCtx) { iNumRefFrames = MAX_REF_PIC_COUNT + 2; } else { iNumRefFrames = pCtx->pSps->iNumRefFrames + 2; + if (GetThreadCount (pCtx) > 1) { + iNumRefFrames = MAX_REF_PIC_COUNT + 1; + } } #ifdef LONG_TERM_REF @@ -386,7 +482,9 @@ int32_t WelsRequestMem (PWelsDecoderContext pCtx, const int32_t kiMbWidth, const && kiPicHeight == pCtx->iImgHeightInPixel) && (!bNeedChangePicQueue)) // have same scaled buffer // sync update pRefList - WelsResetRefPic (pCtx); // added to sync update ref list due to pictures are free + if (GetThreadCount (pCtx) <= 1) { + WelsResetRefPic (pCtx); // added to sync update ref list due to pictures are free + } if (pCtx->bHaveGotMemory && (kiPicWidth == pCtx->iImgWidthInPixel && kiPicHeight == pCtx->iImgHeightInPixel) && pCtx->pPicBuff != NULL && pCtx->pPicBuff->iCapacity != iPicQueueSize) { @@ -414,11 +512,11 @@ int32_t WelsRequestMem (PWelsDecoderContext pCtx, const int32_t kiMbWidth, const // for Recycled_Pic_Queue PPicBuff* ppPic = &pCtx->pPicBuff; if (NULL != ppPic && NULL != *ppPic) { - DestroyPicBuff (ppPic, pMa); + DestroyPicBuff (pCtx, ppPic, pMa); } - pCtx->pPreviousDecodedPictureInDpb = NULL; + pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb = NULL; // currently only active for LIST_0 due to have no B frames iErr = CreatePicBuff (pCtx, &pCtx->pPicBuff, iPicQueueSize, kiPicWidth, kiPicHeight); @@ -460,7 +558,18 @@ void WelsFreeDynamicMemory (PWelsDecoderContext pCtx) { PPicBuff* pPicBuff = &pCtx->pPicBuff; if (NULL != pPicBuff && NULL != *pPicBuff) { - DestroyPicBuff (pPicBuff, pMa); + DestroyPicBuff (pCtx, pPicBuff, pMa); + } + if (GetThreadCount (pCtx) > 1) { + //prevent from double destruction of PPicBuff + PWelsDecoderThreadCTX pThreadCtx = (PWelsDecoderThreadCTX) (pCtx->pThreadCtx); + int32_t threadCount = pThreadCtx->sThreadInfo.uiThrMaxNum; + int32_t id = pThreadCtx->sThreadInfo.uiThrNum; + for (int32_t i = 0; i < threadCount; ++i) { + if (pThreadCtx[i - id].pCtx != NULL) { + pThreadCtx[i - id].pCtx->pPicBuff = NULL; + } + } } if (pCtx->pTempDec) { @@ -489,7 +598,7 @@ int32_t WelsOpenDecoder (PWelsDecoderContext pCtx, SLogContext* pLogCtx) { InitDecFuncs (pCtx, pCtx->uiCpuFlag); // vlc tables - InitVlcTable (&pCtx->sVlcTable); + InitVlcTable (pCtx->pVlcTable); // static memory iRet = WelsInitStaticMemory (pCtx); @@ -704,7 +813,11 @@ int32_t WelsDecodeBs (PWelsDecoderContext pCtx, const uint8_t* kpBsBuf, const in } CheckAndFinishLastPic (pCtx, ppDst, pDstBufInfo); if (pCtx->bAuReadyFlag && pCtx->pAccessUnitList->uiAvailUnitsNum != 0) { - ConstructAccessUnit (pCtx, ppDst, pDstBufInfo); + if (GetThreadCount (pCtx) <= 1) { + ConstructAccessUnit (pCtx, ppDst, pDstBufInfo); + } else { + pCtx->pAccessUnitList->uiAvailUnitsNum = 1; + } } } DecodeFinishUpdate (pCtx); @@ -760,9 +873,15 @@ int32_t WelsDecodeBs (PWelsDecoderContext pCtx, const uint8_t* kpBsBuf, const in if (IS_PARAM_SETS_NALS (pCtx->sCurNalHead.eNalUnitType)) { iRet = ParseNonVclNal (pCtx, pNalPayload, iDstIdx - iConsumedBytes, pSrcNal - 3, iSrcIdx + 3); } - CheckAndFinishLastPic (pCtx, ppDst, pDstBufInfo); + if (GetThreadCount (pCtx) <= 1) { + CheckAndFinishLastPic (pCtx, ppDst, pDstBufInfo); + } if (pCtx->bAuReadyFlag && pCtx->pAccessUnitList->uiAvailUnitsNum != 0) { - ConstructAccessUnit (pCtx, ppDst, pDstBufInfo); + if (GetThreadCount (pCtx) <= 1) { + ConstructAccessUnit (pCtx, ppDst, pDstBufInfo); + } else { + pCtx->pAccessUnitList->uiAvailUnitsNum = 1; + } } } DecodeFinishUpdate (pCtx); @@ -825,7 +944,12 @@ int32_t SyncPictureResolutionExt (PWelsDecoderContext pCtx, const int32_t kiMbWi int32_t iErr = ERR_NONE; const int32_t kiPicWidth = kiMbWidth << 4; const int32_t kiPicHeight = kiMbHeight << 4; - + //fix Bugzilla Bug1479656 reallocate temp dec picture + if (pCtx->pTempDec != NULL && (pCtx->pTempDec->iWidthInPixel != kiPicWidth + || pCtx->pTempDec->iHeightInPixel != kiPicHeight)) { + FreePicture (pCtx->pTempDec, pCtx->pMemAlign); + pCtx->pTempDec = AllocPicture (pCtx, pCtx->pSps->iMbWidth << 4, pCtx->pSps->iMbHeight << 4); + } bool bReallocFlag = false; iErr = WelsRequestMem (pCtx, kiMbWidth, kiMbHeight, bReallocFlag); // common memory used if (ERR_NONE != iErr) { @@ -1072,7 +1196,7 @@ void UpdateDecStatFreezingInfo (const bool kbIdrFlag, SDecoderStatistics* pDecSt void UpdateDecStatNoFreezingInfo (PWelsDecoderContext pCtx) { PDqLayer pCurDq = pCtx->pCurDqLayer; PPicture pPic = pCtx->pDec; - SDecoderStatistics* pDecStat = &pCtx->sDecoderStatistics; + SDecoderStatistics* pDecStat = pCtx->pDecoderStatistics; if (pDecStat->iAvgLumaQp == -1) //first correct frame received pDecStat->iAvgLumaQp = 0; @@ -1114,7 +1238,7 @@ void UpdateDecStatNoFreezingInfo (PWelsDecoderContext pCtx) { //update decoder statistics information void UpdateDecStat (PWelsDecoderContext pCtx, const bool kbOutput) { if (pCtx->bFreezeOutput) - UpdateDecStatFreezingInfo (pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt.bIdrFlag, &pCtx->sDecoderStatistics); + UpdateDecStatFreezingInfo (pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt.bIdrFlag, pCtx->pDecoderStatistics); else if (kbOutput) UpdateDecStatNoFreezingInfo (pCtx); } diff --git a/chromium/third_party/openh264/src/codec/decoder/core/src/decoder_core.cpp b/chromium/third_party/openh264/src/codec/decoder/core/src/decoder_core.cpp index b286aa37ed6..32da38e382c 100644 --- a/chromium/third_party/openh264/src/codec/decoder/core/src/decoder_core.cpp +++ b/chromium/third_party/openh264/src/codec/decoder/core/src/decoder_core.cpp @@ -77,11 +77,11 @@ static inline int32_t DecodeFrameConstruction (PWelsDecoderContext pCtx, uint8_t if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) { - if ((pCtx->sDecoderStatistics.uiWidth != (unsigned int) kiActualWidth) - || (pCtx->sDecoderStatistics.uiHeight != (unsigned int) kiActualHeight)) { - pCtx->sDecoderStatistics.uiResolutionChangeTimes++; - pCtx->sDecoderStatistics.uiWidth = kiActualWidth; - pCtx->sDecoderStatistics.uiHeight = kiActualHeight; + if ((pCtx->pDecoderStatistics->uiWidth != (unsigned int) kiActualWidth) + || (pCtx->pDecoderStatistics->uiHeight != (unsigned int) kiActualHeight)) { + pCtx->pDecoderStatistics->uiResolutionChangeTimes++; + pCtx->pDecoderStatistics->uiWidth = kiActualWidth; + pCtx->pDecoderStatistics->uiHeight = kiActualHeight; } UpdateDecStatNoFreezingInfo (pCtx); } @@ -194,8 +194,9 @@ static inline int32_t DecodeFrameConstruction (PWelsDecoderContext pCtx, uint8_t "DecodeFrameConstruction(): iTotalNumMbRec:%d, total_num_mb_sps:%d, cur_layer_mb_width:%d, cur_layer_mb_height:%d ", pCtx->iTotalNumMbRec, kiTotalNumMbInCurLayer, pCurDq->iMbWidth, pCurDq->iMbHeight); bFrameCompleteFlag = false; //return later after output buffer is done - if (pCtx->bInstantDecFlag) //no-delay decoding, wait for new slice + if (pCtx->bInstantDecFlag) { //no-delay decoding, wait for new slice return ERR_INFO_MB_NUM_INADEQUATE; + } } else if (pCurDq->sLayerInfo.sNalHeaderExt.bIdrFlag && (pCtx->iErrorCode == dsErrorFree)) { //complete non-ECed IDR frame done pCtx->pDec->bIsComplete = true; @@ -219,10 +220,30 @@ static inline int32_t DecodeFrameConstruction (PWelsDecoderContext pCtx, uint8_t ppDst[0] = ppDst[0] + pCtx->sFrameCrop.iTopOffset * 2 * pPic->iLinesize[0] + pCtx->sFrameCrop.iLeftOffset * 2; ppDst[1] = ppDst[1] + pCtx->sFrameCrop.iTopOffset * pPic->iLinesize[1] + pCtx->sFrameCrop.iLeftOffset; ppDst[2] = ppDst[2] + pCtx->sFrameCrop.iTopOffset * pPic->iLinesize[1] + pCtx->sFrameCrop.iLeftOffset; + for (int i = 0; i < 3; ++i) { + pDstInfo->pDst[i] = ppDst[i]; + } pDstInfo->iBufferStatus = 1; - - bool bOutResChange = (pCtx->iLastImgWidthInPixel != pDstInfo->UsrData.sSystemBuffer.iWidth) - || (pCtx->iLastImgHeightInPixel != pDstInfo->UsrData.sSystemBuffer.iHeight); + if (GetThreadCount (pCtx) > 1 && pPic->bIsComplete == false) { + pPic->bIsComplete = true; + } + if (GetThreadCount (pCtx) > 1) { + uint32_t uiMbHeight = (pCtx->pDec->iHeightInPixel + 15) >> 4; + for (uint32_t i = 0; i < uiMbHeight; ++i) { + SET_EVENT (&pCtx->pDec->pReadyEvent[i]); + } + } + bool bOutResChange = false; + if (GetThreadCount (pCtx) <= 1 || pCtx->pLastThreadCtx == NULL) { + bOutResChange = (pCtx->iLastImgWidthInPixel != pDstInfo->UsrData.sSystemBuffer.iWidth) + || (pCtx->iLastImgHeightInPixel != pDstInfo->UsrData.sSystemBuffer.iHeight); + } else { + if (pCtx->pLastThreadCtx != NULL) { + PWelsDecoderThreadCTX pLastThreadCtx = (PWelsDecoderThreadCTX) (pCtx->pLastThreadCtx); + bOutResChange = (pLastThreadCtx->pCtx->iLastImgWidthInPixel != pDstInfo->UsrData.sSystemBuffer.iWidth) + || (pLastThreadCtx->pCtx->iLastImgHeightInPixel != pDstInfo->UsrData.sSystemBuffer.iHeight); + } + } pCtx->iLastImgWidthInPixel = pDstInfo->UsrData.sSystemBuffer.iWidth; pCtx->iLastImgHeightInPixel = pDstInfo->UsrData.sSystemBuffer.iHeight; if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) //no buffer output if EC is disabled and frame incomplete @@ -250,11 +271,11 @@ static inline int32_t DecodeFrameConstruction (PWelsDecoderContext pCtx, uint8_t pCtx->iMbNum = pPic->iMbNum; pCtx->iMbEcedPropNum = pPic->iMbEcedPropNum; if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) { - if (pDstInfo->iBufferStatus && ((pCtx->sDecoderStatistics.uiWidth != (unsigned int) kiActualWidth) - || (pCtx->sDecoderStatistics.uiHeight != (unsigned int) kiActualHeight))) { - pCtx->sDecoderStatistics.uiResolutionChangeTimes++; - pCtx->sDecoderStatistics.uiWidth = kiActualWidth; - pCtx->sDecoderStatistics.uiHeight = kiActualHeight; + if (pDstInfo->iBufferStatus && ((pCtx->pDecoderStatistics->uiWidth != (unsigned int) kiActualWidth) + || (pCtx->pDecoderStatistics->uiHeight != (unsigned int) kiActualHeight))) { + pCtx->pDecoderStatistics->uiResolutionChangeTimes++; + pCtx->pDecoderStatistics->uiWidth = kiActualWidth; + pCtx->pDecoderStatistics->uiHeight = kiActualHeight; } UpdateDecStat (pCtx, pDstInfo->iBufferStatus != 0); } @@ -381,10 +402,13 @@ void CreateImplicitWeightTable (PWelsDecoderContext pCtx) { if (pCurDqLayer->bUseWeightedBiPredIdc && pSliceHeader->pPps->uiWeightedBipredIdc == 2) { int32_t iPoc = pSliceHeader->iPicOrderCntLsb; - if (pSliceHeader->uiRefCount[0] == 1 && pSliceHeader->uiRefCount[1] == 1 - && pCtx->sRefPic.pRefList[LIST_0][0]->iFramePoc + pCtx->sRefPic.pRefList[LIST_1][0]->iFramePoc == 2 * iPoc) { - pCurDqLayer->bUseWeightedBiPredIdc = false; - return; + //fix Bugzilla 1485229 check if pointers are NULL + if (pCtx->sRefPic.pRefList[LIST_0][0] && pCtx->sRefPic.pRefList[LIST_1][0]) { + if (pSliceHeader->uiRefCount[0] == 1 && pSliceHeader->uiRefCount[1] == 1 + && pCtx->sRefPic.pRefList[LIST_0][0]->iFramePoc + pCtx->sRefPic.pRefList[LIST_1][0]->iFramePoc == 2 * iPoc) { + pCurDqLayer->bUseWeightedBiPredIdc = false; + return; + } } pCurDqLayer->pPredWeightTable->uiLumaLog2WeightDenom = 5; @@ -524,8 +548,8 @@ int32_t ParseDecRefPicMarking (PWelsDecoderContext pCtx, PBitStringAux pBs, PSli WELS_VERIFY_RETURN_IF (-1, (!bAllowMmco5 || bMmco5Exist)); bMmco5Exist = true; - pCtx->iPrevPicOrderCntLsb = 0; - pCtx->iPrevPicOrderCntMsb = 0; + pCtx->pLastDecPicInfo->iPrevPicOrderCntLsb = 0; + pCtx->pLastDecPicInfo->iPrevPicOrderCntMsb = 0; pSh->iPicOrderCntLsb = 0; if (pCtx->pSliceHeader) pCtx->pSliceHeader->iPicOrderCntLsb = 0; @@ -843,8 +867,9 @@ void UpdateDecoderStatisticsForActiveParaset (SDecoderStatistics* pDecoderStatis * Parse slice header of bitstream in avc for storing data structure */ int32_t ParseSliceHeaderSyntaxs (PWelsDecoderContext pCtx, PBitStringAux pBs, const bool kbExtensionFlag) { - PNalUnit const kpCurNal = pCtx->pAccessUnitList->pNalUnitsList[pCtx->pAccessUnitList->uiAvailUnitsNum - - 1]; + PNalUnit const kpCurNal = + pCtx->pAccessUnitList->pNalUnitsList[pCtx->pAccessUnitList->uiAvailUnitsNum - + 1]; PNalUnitHeaderExt pNalHeaderExt = NULL; PSliceHeader pSliceHead = NULL; @@ -921,22 +946,22 @@ int32_t ParseSliceHeaderSyntaxs (PWelsDecoderContext pCtx, PBitStringAux pBs, co iPpsId = uiCode; //add check PPS available here - if (pCtx->bPpsAvailFlags[iPpsId] == false) { - pCtx->sDecoderStatistics.iPpsReportErrorNum++; - if (pCtx->iPPSLastInvalidId != iPpsId) { + if (pCtx->sSpsPpsCtx.bPpsAvailFlags[iPpsId] == false) { + pCtx->pDecoderStatistics->iPpsReportErrorNum++; + if (pCtx->sSpsPpsCtx.iPPSLastInvalidId != iPpsId) { WelsLog (pLogCtx, WELS_LOG_ERROR, "PPS id (%d) is invalid, previous id (%d) error ignored (%d)!", iPpsId, - pCtx->iPPSLastInvalidId, pCtx->iPPSInvalidNum); - pCtx->iPPSLastInvalidId = iPpsId; - pCtx->iPPSInvalidNum = 0; + pCtx->sSpsPpsCtx.iPPSLastInvalidId, pCtx->sSpsPpsCtx.iPPSInvalidNum); + pCtx->sSpsPpsCtx.iPPSLastInvalidId = iPpsId; + pCtx->sSpsPpsCtx.iPPSInvalidNum = 0; } else { - pCtx->iPPSInvalidNum++; + pCtx->sSpsPpsCtx.iPPSInvalidNum++; } pCtx->iErrorCode |= dsNoParamSets; return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_PPS_ID); } - pCtx->iPPSLastInvalidId = -1; + pCtx->sSpsPpsCtx.iPPSLastInvalidId = -1; - pPps = &pCtx->sPpsBuffer[iPpsId]; + pPps = &pCtx->sSpsPpsCtx.sPpsBuffer[iPpsId]; if (pPps->uiNumSliceGroups == 0) { WelsLog (pLogCtx, WELS_LOG_WARNING, "Invalid PPS referenced"); @@ -945,38 +970,38 @@ int32_t ParseSliceHeaderSyntaxs (PWelsDecoderContext pCtx, PBitStringAux pBs, co } if (kbExtensionFlag) { - pSubsetSps = &pCtx->sSubsetSpsBuffer[pPps->iSpsId]; + pSubsetSps = &pCtx->sSpsPpsCtx.sSubsetSpsBuffer[pPps->iSpsId]; pSps = &pSubsetSps->sSps; - if (pCtx->bSubspsAvailFlags[pPps->iSpsId] == false) { - pCtx->sDecoderStatistics.iSubSpsReportErrorNum++; - if (pCtx->iSubSPSLastInvalidId != pPps->iSpsId) { + if (pCtx->sSpsPpsCtx.bSubspsAvailFlags[pPps->iSpsId] == false) { + pCtx->pDecoderStatistics->iSubSpsReportErrorNum++; + if (pCtx->sSpsPpsCtx.iSubSPSLastInvalidId != pPps->iSpsId) { WelsLog (pLogCtx, WELS_LOG_ERROR, "Sub SPS id (%d) is invalid, previous id (%d) error ignored (%d)!", pPps->iSpsId, - pCtx->iSubSPSLastInvalidId, pCtx->iSubSPSInvalidNum); - pCtx->iSubSPSLastInvalidId = pPps->iSpsId; - pCtx->iSubSPSInvalidNum = 0; + pCtx->sSpsPpsCtx.iSubSPSLastInvalidId, pCtx->sSpsPpsCtx.iSubSPSInvalidNum); + pCtx->sSpsPpsCtx.iSubSPSLastInvalidId = pPps->iSpsId; + pCtx->sSpsPpsCtx.iSubSPSInvalidNum = 0; } else { - pCtx->iSubSPSInvalidNum++; + pCtx->sSpsPpsCtx.iSubSPSInvalidNum++; } pCtx->iErrorCode |= dsNoParamSets; return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SPS_ID); } - pCtx->iSubSPSLastInvalidId = -1; + pCtx->sSpsPpsCtx.iSubSPSLastInvalidId = -1; } else { - if (pCtx->bSpsAvailFlags[pPps->iSpsId] == false) { - pCtx->sDecoderStatistics.iSpsReportErrorNum++; - if (pCtx->iSPSLastInvalidId != pPps->iSpsId) { + if (pCtx->sSpsPpsCtx.bSpsAvailFlags[pPps->iSpsId] == false) { + pCtx->pDecoderStatistics->iSpsReportErrorNum++; + if (pCtx->sSpsPpsCtx.iSPSLastInvalidId != pPps->iSpsId) { WelsLog (pLogCtx, WELS_LOG_ERROR, "SPS id (%d) is invalid, previous id (%d) error ignored (%d)!", pPps->iSpsId, - pCtx->iSPSLastInvalidId, pCtx->iSPSInvalidNum); - pCtx->iSPSLastInvalidId = pPps->iSpsId; - pCtx->iSPSInvalidNum = 0; + pCtx->sSpsPpsCtx.iSPSLastInvalidId, pCtx->sSpsPpsCtx.iSPSInvalidNum); + pCtx->sSpsPpsCtx.iSPSLastInvalidId = pPps->iSpsId; + pCtx->sSpsPpsCtx.iSPSInvalidNum = 0; } else { - pCtx->iSPSInvalidNum++; + pCtx->sSpsPpsCtx.iSPSInvalidNum++; } pCtx->iErrorCode |= dsNoParamSets; return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SPS_ID); } - pCtx->iSPSLastInvalidId = -1; - pSps = &pCtx->sSpsBuffer[pPps->iSpsId]; + pCtx->sSpsPpsCtx.iSPSLastInvalidId = -1; + pSps = &pCtx->sSpsPpsCtx.sSpsBuffer[pPps->iSpsId]; } pSliceHead->iPpsId = iPpsId; pSliceHead->iSpsId = pPps->iSpsId; @@ -1046,16 +1071,18 @@ int32_t ParseSliceHeaderSyntaxs (PWelsDecoderContext pCtx, PBitStringAux pBs, co //Calculate poc if necessary int32_t pocLsb = pSliceHead->iPicOrderCntLsb; if (pSliceHead->bIdrFlag || kpCurNal->sNalHeaderExt.sNalUnitHeader.eNalUnitType == NAL_UNIT_CODED_SLICE_IDR) { - pCtx->iPrevPicOrderCntMsb = 0; - pCtx->iPrevPicOrderCntLsb = 0; + pCtx->pLastDecPicInfo->iPrevPicOrderCntMsb = 0; + pCtx->pLastDecPicInfo->iPrevPicOrderCntLsb = 0; } int32_t pocMsb; - if (pocLsb < pCtx->iPrevPicOrderCntLsb && pCtx->iPrevPicOrderCntLsb - pocLsb >= iMaxPocLsb / 2) - pocMsb = pCtx->iPrevPicOrderCntMsb + iMaxPocLsb; - else if (pocLsb > pCtx->iPrevPicOrderCntLsb && pocLsb - pCtx->iPrevPicOrderCntLsb > iMaxPocLsb / 2) - pocMsb = pCtx->iPrevPicOrderCntMsb - iMaxPocLsb; + if (pocLsb < pCtx->pLastDecPicInfo->iPrevPicOrderCntLsb + && pCtx->pLastDecPicInfo->iPrevPicOrderCntLsb - pocLsb >= iMaxPocLsb / 2) + pocMsb = pCtx->pLastDecPicInfo->iPrevPicOrderCntMsb + iMaxPocLsb; + else if (pocLsb > pCtx->pLastDecPicInfo->iPrevPicOrderCntLsb + && pocLsb - pCtx->pLastDecPicInfo->iPrevPicOrderCntLsb > iMaxPocLsb / 2) + pocMsb = pCtx->pLastDecPicInfo->iPrevPicOrderCntMsb - iMaxPocLsb; else - pocMsb = pCtx->iPrevPicOrderCntMsb; + pocMsb = pCtx->pLastDecPicInfo->iPrevPicOrderCntMsb; pSliceHead->iPicOrderCntLsb = pocMsb + pocLsb; if (pPps->bPicOrderPresentFlag && !pSliceHead->bFieldPicFlag) { @@ -1063,8 +1090,8 @@ int32_t ParseSliceHeaderSyntaxs (PWelsDecoderContext pCtx, PBitStringAux pBs, co } if (kpCurNal->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc != 0) { - pCtx->iPrevPicOrderCntLsb = pocLsb; - pCtx->iPrevPicOrderCntMsb = pocMsb; + pCtx->pLastDecPicInfo->iPrevPicOrderCntLsb = pocLsb; + pCtx->pLastDecPicInfo->iPrevPicOrderCntMsb = pocMsb; } //End of Calculating poc } else if (pSps->uiPocType == 1 && !pSps->bDeltaPicOrderAlwaysZeroFlag) { @@ -1373,7 +1400,7 @@ bool PrefetchNalHeaderExtSyntax (PWelsDecoderContext pCtx, PNalUnit const kppDst pNalHdrExtS = &kpSrc->sNalHeaderExt; pShExtD = &kppDst->sNalData.sVclNal.sSliceHeaderExt; pPrefixS = &kpSrc->sNalData.sPrefixNal; - pSps = &pCtx->sSpsBuffer[pCtx->sPpsBuffer[pShExtD->sSliceHeader.iPpsId].iSpsId]; + pSps = &pCtx->sSpsPpsCtx.sSpsBuffer[pCtx->sSpsPpsCtx.sPpsBuffer[pShExtD->sSliceHeader.iPpsId].iSpsId]; pNalHdrExtD->uiDependencyId = pNalHdrExtS->uiDependencyId; pNalHdrExtD->uiQualityId = pNalHdrExtS->uiQualityId; @@ -1435,7 +1462,7 @@ int32_t UpdateAccessUnit (PWelsDecoderContext pCtx) { if (uiActualIdx == pCurAu->uiActualUnitsNum) { // no found IDR nal within incoming AU, need exit to avoid mosaic issue, 11/19/2009 - pCtx->sDecoderStatistics.uiIDRLostNum++; + pCtx->pDecoderStatistics->uiIDRLostNum++; if (!pCtx->bParamSetsLostFlag) WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "UpdateAccessUnit():::::Key frame lost.....CAN NOT find IDR from current AU."); @@ -1457,7 +1484,6 @@ int32_t UpdateAccessUnit (PWelsDecoderContext pCtx) { int32_t InitialDqLayersContext (PWelsDecoderContext pCtx, const int32_t kiMaxWidth, const int32_t kiMaxHeight) { int32_t i = 0; - WELS_VERIFY_RETURN_IF (ERR_INFO_INVALID_PARAM, (NULL == pCtx || kiMaxWidth <= 0 || kiMaxHeight <= 0)) pCtx->sMb.iMbWidth = (kiMaxWidth + 15) >> 4; pCtx->sMb.iMbHeight = (kiMaxHeight + 15) >> 4; @@ -1503,7 +1529,8 @@ int32_t InitialDqLayersContext (PWelsDecoderContext pCtx, const int32_t kiMaxWid sizeof ( bool), "pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[]"); - pCtx->sMb.pTransformSize8x8Flag[i] = (bool*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (bool), + pCtx->sMb.pTransformSize8x8Flag[i] = (bool*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof ( + bool), "pCtx->sMb.pTransformSize8x8Flag[]"); pCtx->sMb.pChromaQp[i] = (int8_t (*)[2])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof ( int8_t) * 2, @@ -1514,9 +1541,11 @@ int32_t InitialDqLayersContext (PWelsDecoderContext pCtx, const int32_t kiMaxWid int16_t) * MV_A * MB_BLOCK4x4_NUM, "pCtx->sMb.pMvd[][]"); pCtx->sMb.pCbfDc[i] = (uint16_t*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (uint16_t), "pCtx->sMb.pCbfDc[]"); - pCtx->sMb.pNzc[i] = (int8_t (*)[24])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t) * 24, + pCtx->sMb.pNzc[i] = (int8_t (*)[24])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof ( + int8_t) * 24, "pCtx->sMb.pNzc[]"); - pCtx->sMb.pNzcRs[i] = (int8_t (*)[24])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t) * 24, + pCtx->sMb.pNzcRs[i] = (int8_t (*)[24])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof ( + int8_t) * 24, "pCtx->sMb.pNzcRs[]"); pCtx->sMb.pScaledTCoeff[i] = (int16_t (*)[MB_COEFF_LIST_SIZE])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * @@ -1534,20 +1563,24 @@ int32_t InitialDqLayersContext (PWelsDecoderContext pCtx, const int32_t kiMaxWid "pCtx->sMb.pChromaPredMode[]"); pCtx->sMb.pCbp[i] = (int8_t*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t), "pCtx->sMb.pCbp[]"); - pCtx->sMb.pSubMbType[i] = (uint32_t (*)[MB_PARTITION_SIZE])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * + pCtx->sMb.pSubMbType[i] = (uint32_t (*)[MB_PARTITION_SIZE])pMa->WelsMallocz (pCtx->sMb.iMbWidth * + pCtx->sMb.iMbHeight * sizeof ( uint32_t) * MB_PARTITION_SIZE, "pCtx->sMb.pSubMbType[]"); pCtx->sMb.pSliceIdc[i] = (int32_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int32_t), "pCtx->sMb.pSliceIdc[]"); // using int32_t for slice_idc, 4/21/2010 - pCtx->sMb.pResidualPredFlag[i] = (int8_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t), + pCtx->sMb.pResidualPredFlag[i] = (int8_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof ( + int8_t), "pCtx->sMb.pResidualPredFlag[]"); - pCtx->sMb.pInterPredictionDoneFlag[i] = (int8_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof ( - int8_t), "pCtx->sMb.pInterPredictionDoneFlag[]"); + pCtx->sMb.pInterPredictionDoneFlag[i] = (int8_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * + sizeof ( + int8_t), "pCtx->sMb.pInterPredictionDoneFlag[]"); pCtx->sMb.pMbCorrectlyDecodedFlag[i] = (bool*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof ( bool), "pCtx->sMb.pMbCorrectlyDecodedFlag[]"); - pCtx->sMb.pMbRefConcealedFlag[i] = (bool*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (bool), + pCtx->sMb.pMbRefConcealedFlag[i] = (bool*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof ( + bool), "pCtx->pMbRefConcealedFlag[]"); // check memory block valid due above allocated.. @@ -1594,6 +1627,8 @@ int32_t InitialDqLayersContext (PWelsDecoderContext pCtx, const int32_t kiMaxWid return ERR_NONE; } + + void UninitialDqLayersContext (PWelsDecoderContext pCtx) { int32_t i = 0; CMemoryAlign* pMa = pCtx->pMemAlign; @@ -1822,9 +1857,9 @@ void ForceClearCurrentNal (PAccessUnit pAu) { } void ForceResetParaSetStatusAndAUList (PWelsDecoderContext pCtx) { - pCtx->bSpsExistAheadFlag = false; - pCtx->bSubspsExistAheadFlag = false; - pCtx->bPpsExistAheadFlag = false; + pCtx->sSpsPpsCtx.bSpsExistAheadFlag = false; + pCtx->sSpsPpsCtx.bSubspsExistAheadFlag = false; + pCtx->sSpsPpsCtx.bPpsExistAheadFlag = false; // Force clear the AU list pCtx->pAccessUnitList->uiAvailUnitsNum = 0; @@ -2098,14 +2133,14 @@ int32_t WelsDecodeAccessUnitStart (PWelsDecoderContext pCtx) { return iRet; pCtx->pAccessUnitList->uiStartPos = 0; - if (!pCtx->bAvcBasedFlag && !CheckIntegrityNalUnitsList (pCtx)) { + if (!pCtx->sSpsPpsCtx.bAvcBasedFlag && !CheckIntegrityNalUnitsList (pCtx)) { pCtx->iErrorCode |= dsBitstreamError; return dsBitstreamError; } //check current AU has only one layer or not //If YES, can use deblocking based on AVC - if (!pCtx->bAvcBasedFlag) { + if (!pCtx->sSpsPpsCtx.bAvcBasedFlag) { CheckOnlyOneLayerInAu (pCtx); } @@ -2116,8 +2151,8 @@ void WelsDecodeAccessUnitEnd (PWelsDecoderContext pCtx) { //save previous header info PAccessUnit pCurAu = pCtx->pAccessUnitList; PNalUnit pCurNal = pCurAu->pNalUnitsList[pCurAu->uiEndPos]; - memcpy (&pCtx->sLastNalHdrExt, &pCurNal->sNalHeaderExt, sizeof (SNalUnitHeaderExt)); - memcpy (&pCtx->sLastSliceHeader, + memcpy (&pCtx->pLastDecPicInfo->sLastNalHdrExt, &pCurNal->sNalHeaderExt, sizeof (SNalUnitHeaderExt)); + memcpy (&pCtx->pLastDecPicInfo->sLastSliceHeader, &pCurNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader, sizeof (SSliceHeader)); // uninitialize context of current access unit and rbsp buffer clean ResetCurrentAccessUnit (pCtx); @@ -2144,7 +2179,7 @@ static bool CheckNewSeqBeginAndUpdateActiveLayerSps (PWelsDecoderContext pCtx) { } int iMaxActiveLayer = 0, iMaxCurrentLayer = 0; for (int i = MAX_LAYER_NUM - 1; i >= 0; i--) { - if (pCtx->pActiveLayerSps[i] != NULL) { + if (pCtx->sSpsPpsCtx.pActiveLayerSps[i] != NULL) { iMaxActiveLayer = i; break; } @@ -2156,37 +2191,39 @@ static bool CheckNewSeqBeginAndUpdateActiveLayerSps (PWelsDecoderContext pCtx) { } } if ((iMaxCurrentLayer != iMaxActiveLayer) - || (pTmpLayerSps[iMaxCurrentLayer] != pCtx->pActiveLayerSps[iMaxActiveLayer])) { + || (pTmpLayerSps[iMaxCurrentLayer] != pCtx->sSpsPpsCtx.pActiveLayerSps[iMaxActiveLayer])) { bNewSeq = true; } // fill active sps if the current sps is not null while active layer is null if (!bNewSeq) { for (int i = 0; i < MAX_LAYER_NUM; i++) { - if (pCtx->pActiveLayerSps[i] == NULL && pTmpLayerSps[i] != NULL) { - pCtx->pActiveLayerSps[i] = pTmpLayerSps[i]; + if (pCtx->sSpsPpsCtx.pActiveLayerSps[i] == NULL && pTmpLayerSps[i] != NULL) { + pCtx->sSpsPpsCtx.pActiveLayerSps[i] = pTmpLayerSps[i]; } } } else { // UpdateActiveLayerSps if new sequence start - memcpy (&pCtx->pActiveLayerSps[0], &pTmpLayerSps[0], MAX_LAYER_NUM * sizeof (PSps)); + memcpy (&pCtx->sSpsPpsCtx.pActiveLayerSps[0], &pTmpLayerSps[0], MAX_LAYER_NUM * sizeof (PSps)); } return bNewSeq; } static void WriteBackActiveParameters (PWelsDecoderContext pCtx) { - if (pCtx->iOverwriteFlags & OVERWRITE_PPS) { - memcpy (&pCtx->sPpsBuffer[pCtx->sPpsBuffer[MAX_PPS_COUNT].iPpsId], &pCtx->sPpsBuffer[MAX_PPS_COUNT], sizeof (SPps)); + if (pCtx->sSpsPpsCtx.iOverwriteFlags & OVERWRITE_PPS) { + memcpy (&pCtx->sSpsPpsCtx.sPpsBuffer[pCtx->sSpsPpsCtx.sPpsBuffer[MAX_PPS_COUNT].iPpsId], + &pCtx->sSpsPpsCtx.sPpsBuffer[MAX_PPS_COUNT], sizeof (SPps)); } - if (pCtx->iOverwriteFlags & OVERWRITE_SPS) { - memcpy (&pCtx->sSpsBuffer[pCtx->sSpsBuffer[MAX_SPS_COUNT].iSpsId], &pCtx->sSpsBuffer[MAX_SPS_COUNT], sizeof (SSps)); + if (pCtx->sSpsPpsCtx.iOverwriteFlags & OVERWRITE_SPS) { + memcpy (&pCtx->sSpsPpsCtx.sSpsBuffer[pCtx->sSpsPpsCtx.sSpsBuffer[MAX_SPS_COUNT].iSpsId], + &pCtx->sSpsPpsCtx.sSpsBuffer[MAX_SPS_COUNT], sizeof (SSps)); pCtx->bNewSeqBegin = true; } - if (pCtx->iOverwriteFlags & OVERWRITE_SUBSETSPS) { - memcpy (&pCtx->sSubsetSpsBuffer[pCtx->sSubsetSpsBuffer[MAX_SPS_COUNT].sSps.iSpsId], - &pCtx->sSubsetSpsBuffer[MAX_SPS_COUNT], sizeof (SSubsetSps)); + if (pCtx->sSpsPpsCtx.iOverwriteFlags & OVERWRITE_SUBSETSPS) { + memcpy (&pCtx->sSpsPpsCtx.sSubsetSpsBuffer[pCtx->sSpsPpsCtx.sSubsetSpsBuffer[MAX_SPS_COUNT].sSps.iSpsId], + &pCtx->sSpsPpsCtx.sSubsetSpsBuffer[MAX_SPS_COUNT], sizeof (SSubsetSps)); pCtx->bNewSeqBegin = true; } - pCtx->iOverwriteFlags = OVERWRITE_NONE; + pCtx->sSpsPpsCtx.iOverwriteFlags = OVERWRITE_NONE; } /* @@ -2205,22 +2242,19 @@ void DecodeFinishUpdate (PWelsDecoderContext pCtx) { } /* - * ConstructAccessUnit - * construct an access unit for given input bitstream, maybe partial NAL Unit, one or more Units are involved to - * joint a collective access unit. - * parameter\ - * buf: bitstream data buffer - * bit_len: size in bit length of data - * buf_len: size in byte length of data - * coded_au: mark an Access Unit decoding finished - * return: - * 0 - success; otherwise returned error_no defined in error_no.h - */ -int32_t ConstructAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo) { - int32_t iErr; +* WelsDecodeInitAccessUnitStart +* check and (re)allocate picture buffers on new sequence begin +* bit_len: size in bit length of data +* buf_len: size in byte length of data +* coded_au: mark an Access Unit decoding finished +* return: +* 0 - success; otherwise returned error_no defined in error_no.h +*/ +int32_t WelsDecodeInitAccessUnitStart (PWelsDecoderContext pCtx, SBufferInfo* pDstInfo) { + int32_t iErr = ERR_NONE; PAccessUnit pCurAu = pCtx->pAccessUnitList; pCtx->bAuReadyFlag = false; - pCtx->bLastHasMmco5 = false; + pCtx->pLastDecPicInfo->bLastHasMmco5 = false; bool bTmpNewSeqBegin = CheckNewSeqBeginAndUpdateActiveLayerSps (pCtx); pCtx->bNewSeqBegin = pCtx->bNewSeqBegin || bTmpNewSeqBegin; iErr = WelsDecodeAccessUnitStart (pCtx); @@ -2240,17 +2274,82 @@ int32_t ConstructAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferI pCtx->pSps = pCurAu->pNalUnitsList[pCurAu->uiStartPos]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.pSps; pCtx->pPps = pCurAu->pNalUnitsList[pCurAu->uiStartPos]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.pPps; + return iErr; +} + +/* +* AllocPicBuffOnNewSeqBegin +* check and (re)allocate picture buffers on new sequence begin +* return: +* 0 - success; otherwise returned error_no defined in error_no.h +*/ +int32_t AllocPicBuffOnNewSeqBegin (PWelsDecoderContext pCtx) { //try to allocate or relocate DPB memory only when new sequence is coming. - if (pCtx->bNewSeqBegin) { + if (GetThreadCount (pCtx) <= 1) { WelsResetRefPic (pCtx); //clear ref pPic when IDR NAL - iErr = SyncPictureResolutionExt (pCtx, pCtx->pSps->iMbWidth, pCtx->pSps->iMbHeight); + } + int32_t iErr = SyncPictureResolutionExt (pCtx, pCtx->pSps->iMbWidth, pCtx->pSps->iMbHeight); + + if (ERR_NONE != iErr) { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "sync picture resolution ext failed, the error is %d", iErr); + return iErr; + } + + return iErr; +} +/* +* InitConstructAccessUnit +* Init before constructing an access unit for given input bitstream, maybe partial NAL Unit, one or more Units are involved to +* joint a collective access unit. +* parameter\ +* SBufferInfo: Buffer info +* return: +* 0 - success; otherwise returned error_no defined in error_no.h +*/ +int32_t InitConstructAccessUnit (PWelsDecoderContext pCtx, SBufferInfo* pDstInfo) { + int32_t iErr = ERR_NONE; + + iErr = WelsDecodeInitAccessUnitStart (pCtx, pDstInfo); + if (ERR_NONE != iErr) { + return iErr; + } + if (pCtx->bNewSeqBegin) { + iErr = AllocPicBuffOnNewSeqBegin (pCtx); if (ERR_NONE != iErr) { - WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "sync picture resolution ext failed, the error is %d", iErr); return iErr; } } + return iErr; +} + +/* + * ConstructAccessUnit + * construct an access unit for given input bitstream, maybe partial NAL Unit, one or more Units are involved to + * joint a collective access unit. + * parameter\ + * buf: bitstream data buffer + * bit_len: size in bit length of data + * buf_len: size in byte length of data + * coded_au: mark an Access Unit decoding finished + * return: + * 0 - success; otherwise returned error_no defined in error_no.h + */ +int32_t ConstructAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo) { + int32_t iErr = ERR_NONE; + if (GetThreadCount (pCtx) <= 1) { + iErr = InitConstructAccessUnit (pCtx, pDstInfo); + if (ERR_NONE != iErr) { + return iErr; + } + } + if (pCtx->pCabacDecEngine == NULL) { + pCtx->pCabacDecEngine = (SWelsCabacDecEngine*)pCtx->pMemAlign->WelsMallocz (sizeof (SWelsCabacDecEngine), + "pCtx->pCabacDecEngine"); + WELS_VERIFY_RETURN_IF (ERR_INFO_OUT_OF_MEMORY, (NULL == pCtx->pCabacDecEngine)) + } + iErr = DecodeCurrentAccessUnit (pCtx, ppDst, pDstInfo); WelsDecodeAccessUnitEnd (pCtx); @@ -2317,12 +2416,14 @@ void WelsDqLayerDecodeStart (PWelsDecoderContext pCtx, PNalUnit pCurNal, PSps pS pCtx->bUsedAsRef = false; pCtx->iFrameNum = pSh->iFrameNum; - UpdateDecoderStatisticsForActiveParaset (& (pCtx->sDecoderStatistics), - pSps, pPps); + UpdateDecoderStatisticsForActiveParaset (pCtx->pDecoderStatistics, pSps, pPps); } int32_t InitRefPicList (PWelsDecoderContext pCtx, const uint8_t kuiNRi, int32_t iPoc) { int32_t iRet = ERR_NONE; + if (GetThreadCount (pCtx) > 1 && pCtx->bNewSeqBegin) { + WelsResetRefPic (pCtx); + } if (pCtx->eSliceType == B_SLICE) { iRet = WelsInitBSliceRefList (pCtx, iPoc); CreateImplicitWeightTable (pCtx); @@ -2377,13 +2478,27 @@ void InitCurDqLayerData (PWelsDecoderContext pCtx, PDqLayer pCurDq) { * Decode current access unit when current AU is completed. */ int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo) { - int32_t iRefCount[LIST_A]; - PNalUnit pNalCur = NULL; + PNalUnit pNalCur = pCtx->pNalCur = NULL; PAccessUnit pCurAu = pCtx->pAccessUnitList; int32_t iIdx = pCurAu->uiStartPos; int32_t iEndIdx = pCurAu->uiEndPos; + //get current thread ctx + PWelsDecoderThreadCTX pThreadCtx = NULL; + if (pCtx->pThreadCtx != NULL) { + pThreadCtx = (PWelsDecoderThreadCTX)pCtx->pThreadCtx; + } + //get last thread ctx + PWelsDecoderThreadCTX pLastThreadCtx = NULL; + if (pCtx->pLastThreadCtx != NULL) { + pLastThreadCtx = (PWelsDecoderThreadCTX) (pCtx->pLastThreadCtx); + if (pLastThreadCtx->pDec == NULL) { + pLastThreadCtx->pDec = PrefetchLastPicForThread (pCtx->pPicBuff, + pLastThreadCtx->iPicBuffIdx); + } + } + int32_t iThreadCount = GetThreadCount (pCtx); int32_t iPpsId = 0; int32_t iRet = ERR_NONE; @@ -2393,12 +2508,12 @@ int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBuf const uint8_t kuiDependencyIdMax = (kuiTargetLayerDqId & 0x7F) >> 4; int16_t iLastIdD = -1, iLastIdQ = -1; int16_t iCurrIdD = 0, iCurrIdQ = 0; - uint8_t uiNalRefIdc = 0; + pCtx->uiNalRefIdc = 0; bool bFreshSliceAvailable = true; // Another fresh slice comingup for given dq layer, for multiple slices in case of header parts of slices sometimes loss over error-prone channels, 8/14/2008 //update pCurDqLayer at the starting of AU decoding - if (pCtx->bInitialDqLayersMem) { + if (pCtx->bInitialDqLayersMem || pCtx->pCurDqLayer == NULL) { pCtx->pCurDqLayer = pCtx->pDqLayersList[0]; } @@ -2411,7 +2526,47 @@ int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBuf PSliceHeaderExt pShExt = NULL; PSliceHeader pSh = NULL; + if (pLastThreadCtx != NULL) { + pSh = &pNalCur->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader; + if (pSh->iFirstMbInSlice == 0) { + if (pLastThreadCtx->pCtx->pDec != NULL && pLastThreadCtx->pCtx->pDec->bIsUngroupedMultiSlice) { + WAIT_EVENT (&pLastThreadCtx->sSliceDecodeFinish, WELS_DEC_THREAD_WAIT_INFINITE); + } + pCtx->pDec = NULL; + pCtx->iTotalNumMbRec = 0; + } else if (pLastThreadCtx->pCtx->pDec != NULL) { + if (pSh->iFrameNum == pLastThreadCtx->pCtx->pDec->iFrameNum + && pSh->iPicOrderCntLsb == pLastThreadCtx->pCtx->pDec->iFramePoc) { + WAIT_EVENT (&pLastThreadCtx->sSliceDecodeFinish, WELS_DEC_THREAD_WAIT_INFINITE); + pCtx->pDec = pLastThreadCtx->pCtx->pDec; + pCtx->pDec->bIsUngroupedMultiSlice = true; + pCtx->sRefPic = pLastThreadCtx->pCtx->sRefPic; + pCtx->iTotalNumMbRec = pLastThreadCtx->pCtx->iTotalNumMbRec; + } + } + } + bool isNewFrame = true; + if (iThreadCount > 1) { + isNewFrame = pCtx->pDec == NULL; + } if (pCtx->pDec == NULL) { + if (pLastThreadCtx != NULL && iIdx == 0) { + pLastThreadCtx->pDec->bUsedAsRef = pLastThreadCtx->pCtx->uiNalRefIdc > 0; + if (pLastThreadCtx->pDec->bUsedAsRef) { + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + uint32_t i = 0; + while (i < MAX_DPB_COUNT && pLastThreadCtx->pCtx->sRefPic.pRefList[listIdx][i]) { + pLastThreadCtx->pDec->pRefPic[listIdx][i] = pLastThreadCtx->pCtx->sRefPic.pRefList[listIdx][i]; + ++i; + } + } + pLastThreadCtx->pCtx->sTmpRefPic = pLastThreadCtx->pCtx->sRefPic; + WelsMarkAsRef (pLastThreadCtx->pCtx, pLastThreadCtx->pDec); + pCtx->sRefPic = pLastThreadCtx->pCtx->sTmpRefPic; + } else { + pCtx->sRefPic = pLastThreadCtx->pCtx->sRefPic; + } + } pCtx->pDec = PrefetchPic (pCtx->pPicBuff); if (pCtx->iTotalNumMbRec != 0) pCtx->iTotalNumMbRec = 0; @@ -2424,17 +2579,33 @@ int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBuf pCtx->iErrorCode |= dsOutOfMemory; return ERR_INFO_REF_COUNT_OVERFLOW; } + if (pThreadCtx != NULL) { + pCtx->pDec->bIsUngroupedMultiSlice = false; + pThreadCtx->pDec = pCtx->pDec; + if (iThreadCount > 1) ++pCtx->pDec->iRefCount; + uint32_t uiMbHeight = (pCtx->pDec->iHeightInPixel + 15) >> 4; + for (uint32_t i = 0; i < uiMbHeight; ++i) { + RESET_EVENT (&pCtx->pDec->pReadyEvent[i]); + } + } pCtx->pDec->bNewSeqBegin = pCtx->bNewSeqBegin; //set flag for start decoding } else if (pCtx->iTotalNumMbRec == 0) { //pDec != NULL, already start pCtx->pDec->bNewSeqBegin = pCtx->bNewSeqBegin; //set flag for start decoding } pCtx->pDec->uiTimeStamp = pNalCur->uiTimeStamp; + pCtx->pDec->uiDecodingTimeStamp = pCtx->uiDecodingTimeStamp; + if (pThreadCtx != NULL) { + pThreadCtx->iPicBuffIdx = pCtx->pDec->iPicBuffIdx; + pCtx->pCurDqLayer->pMbCorrectlyDecodedFlag = pCtx->pDec->pMbCorrectlyDecodedFlag; + } if (pCtx->iTotalNumMbRec == 0) { //Picture start to decode for (int32_t i = 0; i < LAYER_NUM_EXCHANGEABLE; ++ i) memset (pCtx->sMb.pSliceIdc[i], 0xff, (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int32_t))); memset (pCtx->pCurDqLayer->pMbCorrectlyDecodedFlag, 0, pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight * sizeof (bool)); memset (pCtx->pCurDqLayer->pMbRefConcealedFlag, 0, pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight * sizeof (bool)); + memset (pCtx->pDec->pRefPic[LIST_0], 0, sizeof (PPicture) * MAX_DPB_COUNT); + memset (pCtx->pDec->pRefPic[LIST_1], 0, sizeof (PPicture) * MAX_DPB_COUNT); pCtx->pDec->iMbNum = pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight; pCtx->pDec->iMbEcedNum = 0; pCtx->pDec->iMbEcedPropNum = 0; @@ -2465,6 +2636,7 @@ int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBuf pCtx->pDec->iFrameNum = pSh->iFrameNum; pCtx->pDec->iFramePoc = pSh->iPicOrderCntLsb; // still can not obtain correct, because current do not support POCtype 2 pCtx->pDec->bIdrFlag = pNalCur->sNalHeaderExt.bIdrFlag; + pCtx->pDec->eSliceType = pSh->eSliceType; memcpy (&pLayerInfo.sSliceInLayer.sSliceHeaderExt, pShExt, sizeof (SSliceHeaderExt)); //confirmed_safe_unsafe_usage pLayerInfo.sSliceInLayer.bSliceHeaderExtFlag = pNalCur->sNalData.sVclNal.bSliceHeaderExtFlag; @@ -2472,7 +2644,7 @@ int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBuf pLayerInfo.sSliceInLayer.iLastMbQp = pSh->iSliceQp; dq_cur->pBitStringAux = &pNalCur->sNalData.sVclNal.sSliceBitsRead; - uiNalRefIdc = pNalCur->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc; + pCtx->uiNalRefIdc = pNalCur->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc; iPpsId = pSh->iPpsId; @@ -2497,11 +2669,9 @@ int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBuf bFreshSliceAvailable = (iCurrIdD != iLastIdD || iCurrIdQ != iLastIdQ); // do not need condition of (first_mb == 0) due multiple slices might be disorder + WelsDqLayerDecodeStart (pCtx, pNalCur, pLayerInfo.pSps, pLayerInfo.pPps); - if (iCurrIdQ == BASE_QUALITY_ID) { - ST64 (iRefCount, LD64 (pLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.uiRefCount)); - } if ((iLastIdD < 0) || //case 1: first layer (iLastIdD == iCurrIdD)) { //case 2: same uiDId @@ -2511,11 +2681,35 @@ int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBuf const bool kbIdrFlag = dq_cur->sLayerInfo.sNalHeaderExt.bIdrFlag || (dq_cur->sLayerInfo.sNalHeaderExt.sNalUnitHeader.eNalUnitType == NAL_UNIT_CODED_SLICE_IDR); // Subclause 8.2.5.2 Decoding process for gaps in frame_num + int32_t iPrevFrameNum = pCtx->pLastDecPicInfo->iPrevFrameNum; + if (pLastThreadCtx != NULL) { + if (pCtx->bNewSeqBegin) { + iPrevFrameNum = 0; + } else if (pLastThreadCtx->pDec != NULL) { + if (pLastThreadCtx->pDec->uiTimeStamp == pCtx->uiTimeStamp - 1) { + iPrevFrameNum = pLastThreadCtx->pDec->iFrameNum; + if (iPrevFrameNum == -1) iPrevFrameNum = pLastThreadCtx->pCtx->iFrameNum; + } else { + int32_t id = pThreadCtx->sThreadInfo.uiThrNum; + for (int32_t i = 0; i < iThreadCount; ++i) { + if (pThreadCtx[i - id].pCtx->uiTimeStamp == pCtx->uiTimeStamp - 1) { + if (pThreadCtx[i - id].pDec != NULL) iPrevFrameNum = pThreadCtx[i - id].pDec->iFrameNum; + if (iPrevFrameNum == -1) iPrevFrameNum = pThreadCtx[i - id].pCtx->iFrameNum; + break; + } + } + } + } else { + iPrevFrameNum = pCtx->bNewSeqBegin ? 0 : pLastThreadCtx->pCtx->iFrameNum; + } + } if (!kbIdrFlag && - pSh->iFrameNum != pCtx->iPrevFrameNum && - pSh->iFrameNum != ((pCtx->iPrevFrameNum + 1) & ((1 << dq_cur->sLayerInfo.pSps->uiLog2MaxFrameNum) - 1))) { + pSh->iFrameNum != iPrevFrameNum && + pSh->iFrameNum != ((iPrevFrameNum + 1) & ((1 << dq_cur->sLayerInfo.pSps->uiLog2MaxFrameNum) - + 1))) { WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, - "referencing pictures lost due frame gaps exist, prev_frame_num: %d, curr_frame_num: %d", pCtx->iPrevFrameNum, + "referencing pictures lost due frame gaps exist, prev_frame_num: %d, curr_frame_num: %d", + iPrevFrameNum, pSh->iFrameNum); bAllRefComplete = false; @@ -2531,8 +2725,8 @@ int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBuf } } - if (iCurrIdD == kuiDependencyIdMax && iCurrIdQ == BASE_QUALITY_ID) { - iRet = InitRefPicList (pCtx, uiNalRefIdc, pSh->iPicOrderCntLsb); + if (iCurrIdD == kuiDependencyIdMax && iCurrIdQ == BASE_QUALITY_ID && isNewFrame) { + iRet = InitRefPicList (pCtx, pCtx->uiNalRefIdc, pSh->iPicOrderCntLsb); if (iRet) { pCtx->bRPLRError = true; bAllRefComplete = false; // RPLR error, set ref pictures complete flag false @@ -2547,8 +2741,19 @@ int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBuf } } } - - iRet = WelsDecodeSlice (pCtx, bFreshSliceAvailable, pNalCur); + //calculate Colocated mv scaling factor for temporal direct prediction + if (pSh->eSliceType == B_SLICE && !pSh->iDirectSpatialMvPredFlag) + ComputeColocatedTemporalScaling (pCtx); + + if (iThreadCount > 1) { + if (iIdx == 0) { + memset (&pCtx->lastReadyHeightOffset[0][0], -1, LIST_A * MAX_REF_PIC_COUNT * sizeof (int16_t)); + SET_EVENT (&pThreadCtx->sSliceDecodeStart); + } + iRet = WelsDecodeAndConstructSlice (pCtx); + } else { + iRet = WelsDecodeSlice (pCtx, bFreshSliceAvailable, pNalCur); + } //Output good store_base reconstruction when enhancement quality layer occurred error for MGS key picture case if (iRet != ERR_NONE) { @@ -2564,17 +2769,19 @@ int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBuf } } - if (bReconstructSlice) { + if (iThreadCount <= 1 && bReconstructSlice) { if ((iRet = WelsDecodeConstructSlice (pCtx, pNalCur)) != ERR_NONE) { pCtx->pDec->bIsComplete = false; // reconstruction error, directly set the flag false return iRet; } } if (bAllRefComplete && pCtx->eSliceType != I_SLICE) { - if (pCtx->sRefPic.uiRefCount[LIST_0] > 0) { - bAllRefComplete &= CheckRefPicturesComplete (pCtx); - } else { - bAllRefComplete = false; + if (iThreadCount <= 1) { + if (pCtx->sRefPic.uiRefCount[LIST_0] > 0) { + bAllRefComplete &= CheckRefPicturesComplete (pCtx); + } else { + bAllRefComplete = false; + } } } } @@ -2626,53 +2833,80 @@ int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBuf } } + if (iThreadCount >= 1) { + int32_t id = pThreadCtx->sThreadInfo.uiThrNum; + for (int32_t i = 0; i < iThreadCount; ++i) { + if (i == id || pThreadCtx[i - id].pCtx->uiDecodingTimeStamp == 0) continue; + if (pThreadCtx[i - id].pCtx->uiDecodingTimeStamp < pCtx->uiDecodingTimeStamp) { + WAIT_EVENT (&pThreadCtx[i - id].sSliceDecodeFinish, WELS_DEC_THREAD_WAIT_INFINITE); + } + } + pCtx->pLastDecPicInfo->uiDecodingTimeStamp = pCtx->uiDecodingTimeStamp; + } iRet = DecodeFrameConstruction (pCtx, ppDst, pDstInfo); - if (iRet) + if (iRet) { + if (iThreadCount > 1) { + SET_EVENT (&pThreadCtx->sSliceDecodeFinish); + } return iRet; + } - pCtx->pPreviousDecodedPictureInDpb = pCtx->pDec; //store latest decoded picture for EC - pCtx->bUsedAsRef = false; - if (uiNalRefIdc > 0) { - pCtx->bUsedAsRef = true; - //save MBType, MV and RefIndex for use in B-Slice direct mode - memcpy (pCtx->pDec->pMbType, pCtx->pCurDqLayer->pMbType, pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (uint32_t)); - memcpy (pCtx->pDec->pMv[LIST_0], pCtx->pCurDqLayer->pMv[LIST_0], - pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int16_t) * MV_A * MB_BLOCK4x4_NUM); - memcpy (pCtx->pDec->pMv[LIST_1], pCtx->pCurDqLayer->pMv[LIST_1], - pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int16_t) * MV_A * MB_BLOCK4x4_NUM); - memcpy (pCtx->pDec->pRefIndex[LIST_0], pCtx->pCurDqLayer->pRefIndex[LIST_0], - pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t) * MB_BLOCK4x4_NUM); - memcpy (pCtx->pDec->pRefIndex[LIST_1], pCtx->pCurDqLayer->pRefIndex[LIST_1], - pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t) * MB_BLOCK4x4_NUM); - for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - for (uint32_t i = 0; i < pCtx->sRefPic.uiRefCount[listIdx]; ++i) { - pCtx->pDec->pRefPic[listIdx][i] = pCtx->sRefPic.pRefList[listIdx][i]; + pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb = pCtx->pDec; //store latest decoded picture for EC + pCtx->bUsedAsRef = pCtx->uiNalRefIdc > 0; + if (iThreadCount <= 1) { + if (pCtx->bUsedAsRef) { + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + uint32_t i = 0; + while (i < MAX_DPB_COUNT && pCtx->sRefPic.pRefList[listIdx][i]) { + pCtx->pDec->pRefPic[listIdx][i] = pCtx->sRefPic.pRefList[listIdx][i]; + ++i; + } } - } - iRet = WelsMarkAsRef (pCtx); - if (iRet != ERR_NONE) { - if (iRet == ERR_INFO_DUPLICATE_FRAME_NUM) - pCtx->iErrorCode |= dsBitstreamError; - if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) { - pCtx->pDec = NULL; - return iRet; + iRet = WelsMarkAsRef (pCtx); + if (iRet != ERR_NONE) { + if (iRet == ERR_INFO_DUPLICATE_FRAME_NUM) + pCtx->iErrorCode |= dsBitstreamError; + if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) { + pCtx->pDec = NULL; + return iRet; + } } + if (!pCtx->pParam->bParseOnly) + ExpandReferencingPicture (pCtx->pDec->pData, pCtx->pDec->iWidthInPixel, pCtx->pDec->iHeightInPixel, + pCtx->pDec->iLinesize, + pCtx->sExpandPicFunc.pfExpandLumaPicture, pCtx->sExpandPicFunc.pfExpandChromaPicture); } - if (!pCtx->pParam->bParseOnly) - ExpandReferencingPicture (pCtx->pDec->pData, pCtx->pDec->iWidthInPixel, pCtx->pDec->iHeightInPixel, - pCtx->pDec->iLinesize, - pCtx->sExpandPicFunc.pfExpandLumaPicture, pCtx->sExpandPicFunc.pfExpandChromaPicture); + } else if (iThreadCount > 1) { + SET_EVENT (&pThreadCtx->sImageReady); } pCtx->pDec = NULL; //after frame decoding, always set to NULL } // need update frame_num due current frame is well decoded if (pCurAu->pNalUnitsList[pCurAu->uiStartPos]->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc > 0) - pCtx->iPrevFrameNum = pSh->iFrameNum; - if (pCtx->bLastHasMmco5) - pCtx->iPrevFrameNum = 0; + pCtx->pLastDecPicInfo->iPrevFrameNum = pSh->iFrameNum; + if (pCtx->pLastDecPicInfo->bLastHasMmco5) + pCtx->pLastDecPicInfo->iPrevFrameNum = 0; + if (iThreadCount > 1) { + int32_t id = pThreadCtx->sThreadInfo.uiThrNum; + for (int32_t i = 0; i < iThreadCount; ++i) { + if (pThreadCtx[i - id].pCtx != NULL) { + unsigned long long uiTimeStamp = pThreadCtx[i - id].pCtx->uiTimeStamp; + if (uiTimeStamp > 0 && pThreadCtx[i - id].pCtx->sSpsPpsCtx.iSeqId > pCtx->sSpsPpsCtx.iSeqId) { + CopySpsPps (pThreadCtx[i - id].pCtx, pCtx); + if (pCtx->pPicBuff != pThreadCtx[i - id].pCtx->pPicBuff) { + pCtx->pPicBuff = pThreadCtx[i - id].pCtx->pPicBuff; + } + InitialDqLayersContext (pCtx, pCtx->pSps->iMbWidth << 4, pCtx->pSps->iMbHeight << 4); + break; + } + } + } + } + if (iThreadCount > 1) { + SET_EVENT (&pThreadCtx->sSliceDecodeFinish); + } } - return ERR_NONE; } @@ -2682,7 +2916,8 @@ bool CheckAndFinishLastPic (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferIn if (IS_VCL_NAL (pCtx->sCurNalHead.eNalUnitType, 1)) { //VCL data, AU list should have data PNalUnit pCurNal = pAu->pNalUnitsList[pAu->uiEndPos]; bAuBoundaryFlag = (pCtx->iTotalNumMbRec != 0) - && (CheckAccessUnitBoundaryExt (&pCtx->sLastNalHdrExt, &pCurNal->sNalHeaderExt, &pCtx->sLastSliceHeader, + && (CheckAccessUnitBoundaryExt (&pCtx->pLastDecPicInfo->sLastNalHdrExt, &pCurNal->sNalHeaderExt, + &pCtx->pLastDecPicInfo->sLastSliceHeader, &pCurNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader)); } else { //non VCL if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_AU_DELIMITER) { @@ -2690,11 +2925,11 @@ bool CheckAndFinishLastPic (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferIn } else if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_SEI) { bAuBoundaryFlag = true; } else if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_SPS) { - bAuBoundaryFlag = !! (pCtx->iOverwriteFlags & OVERWRITE_SPS); + bAuBoundaryFlag = !! (pCtx->sSpsPpsCtx.iOverwriteFlags & OVERWRITE_SPS); } else if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_SUBSET_SPS) { - bAuBoundaryFlag = !! (pCtx->iOverwriteFlags & OVERWRITE_SUBSETSPS); + bAuBoundaryFlag = !! (pCtx->sSpsPpsCtx.iOverwriteFlags & OVERWRITE_SUBSETSPS); } else if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_PPS) { - bAuBoundaryFlag = !! (pCtx->iOverwriteFlags & OVERWRITE_PPS); + bAuBoundaryFlag = !! (pCtx->sSpsPpsCtx.iOverwriteFlags & OVERWRITE_PPS); } if (bAuBoundaryFlag && pCtx->pAccessUnitList->uiAvailUnitsNum != 0) { //Construct remaining data first ConstructAccessUnit (pCtx, ppDst, pDstInfo); @@ -2710,16 +2945,20 @@ bool CheckAndFinishLastPic (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferIn pCtx->pDec->iPpsId = pCtx->pPps->iPpsId; DecodeFrameConstruction (pCtx, ppDst, pDstInfo); - pCtx->pPreviousDecodedPictureInDpb = pCtx->pDec; //save ECed pic for future use - if (pCtx->sLastNalHdrExt.sNalUnitHeader.uiNalRefIdc > 0) { - MarkECFrameAsRef (pCtx); + pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb = pCtx->pDec; //save ECed pic for future use + if (pCtx->pLastDecPicInfo->sLastNalHdrExt.sNalUnitHeader.uiNalRefIdc > 0) { + if (MarkECFrameAsRef (pCtx) == ERR_INFO_INVALID_PTR) { + pCtx->iErrorCode |= dsRefListNullPtrs; + return false; + } } } else if (pCtx->pParam->bParseOnly) { //clear parse only internal data status pCtx->pParserBsInfo->iNalNum = 0; pCtx->bFrameFinish = true; //clear frame pending status here! } else { if (DecodeFrameConstruction (pCtx, ppDst, pDstInfo)) { - if ((pCtx->sLastNalHdrExt.sNalUnitHeader.uiNalRefIdc > 0) && (pCtx->sLastNalHdrExt.uiTemporalId == 0)) + if ((pCtx->pLastDecPicInfo->sLastNalHdrExt.sNalUnitHeader.uiNalRefIdc > 0) + && (pCtx->pLastDecPicInfo->sLastNalHdrExt.uiTemporalId == 0)) pCtx->iErrorCode |= dsNoParamSets; else pCtx->iErrorCode |= dsBitstreamError; @@ -2729,9 +2968,9 @@ bool CheckAndFinishLastPic (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferIn } pCtx->pDec = NULL; if (pAu->pNalUnitsList[pAu->uiStartPos]->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc > 0) - pCtx->iPrevFrameNum = pCtx->sLastSliceHeader.iFrameNum; //save frame_num - if (pCtx->bLastHasMmco5) - pCtx->iPrevFrameNum = 0; + pCtx->pLastDecPicInfo->iPrevFrameNum = pCtx->pLastDecPicInfo->sLastSliceHeader.iFrameNum; //save frame_num + if (pCtx->pLastDecPicInfo->bLastHasMmco5) + pCtx->pLastDecPicInfo->iPrevFrameNum = 0; } return ERR_NONE; } @@ -2742,28 +2981,37 @@ bool CheckRefPicturesComplete (PWelsDecoderContext pCtx) { int32_t iRealMbIdx = pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.iFirstMbInSlice; for (int32_t iMbIdx = 0; bAllRefComplete && iMbIdx < pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.iTotalMbInCurSlice; iMbIdx++) { - switch (pCtx->pCurDqLayer->pMbType[iRealMbIdx]) { + switch (pCtx->pCurDqLayer->pDec->pMbType[iRealMbIdx]) { case MB_TYPE_SKIP: case MB_TYPE_16x16: - bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete; + bAllRefComplete &= + pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete; break; case MB_TYPE_16x8: - bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete; - bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][8] ]->bIsComplete; + bAllRefComplete &= + pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete; + bAllRefComplete &= + pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][8] ]->bIsComplete; break; case MB_TYPE_8x16: - bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete; - bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][2] ]->bIsComplete; + bAllRefComplete &= + pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete; + bAllRefComplete &= + pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][2] ]->bIsComplete; break; case MB_TYPE_8x8: case MB_TYPE_8x8_REF0: - bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete; - bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][2] ]->bIsComplete; - bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][8] ]->bIsComplete; - bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][10] ]->bIsComplete; + bAllRefComplete &= + pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete; + bAllRefComplete &= + pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][2] ]->bIsComplete; + bAllRefComplete &= + pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][8] ]->bIsComplete; + bAllRefComplete &= + pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pDec->pRefIndex[0][iRealMbIdx][10] ]->bIsComplete; break; default: @@ -2774,6 +3022,7 @@ bool CheckRefPicturesComplete (PWelsDecoderContext pCtx) { if (iRealMbIdx == -1) //caused by abnormal return of FmoNextMb() return false; } + return bAllRefComplete; } } // namespace WelsDec diff --git a/chromium/third_party/openh264/src/codec/decoder/core/src/error_concealment.cpp b/chromium/third_party/openh264/src/codec/decoder/core/src/error_concealment.cpp index b11f764028d..64da754693d 100644 --- a/chromium/third_party/openh264/src/codec/decoder/core/src/error_concealment.cpp +++ b/chromium/third_party/openh264/src/codec/decoder/core/src/error_concealment.cpp @@ -83,7 +83,7 @@ void InitErrorCon (PWelsDecoderContext pCtx) { //Do error concealment using frame copy method void DoErrorConFrameCopy (PWelsDecoderContext pCtx) { PPicture pDstPic = pCtx->pDec; - PPicture pSrcPic = pCtx->pPreviousDecodedPictureInDpb; + PPicture pSrcPic = pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb; uint32_t uiHeightInPixelY = (pCtx->pSps->iMbHeight) << 4; int32_t iStrideY = pDstPic->iLinesize[0]; int32_t iStrideUV = pDstPic->iLinesize[1]; @@ -109,7 +109,7 @@ void DoErrorConSliceCopy (PWelsDecoderContext pCtx) { int32_t iMbWidth = (int32_t) pCtx->pSps->iMbWidth; int32_t iMbHeight = (int32_t) pCtx->pSps->iMbHeight; PPicture pDstPic = pCtx->pDec; - PPicture pSrcPic = pCtx->pPreviousDecodedPictureInDpb; + PPicture pSrcPic = pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb; if ((pCtx->pParam->eEcActiveIdc == ERROR_CON_SLICE_COPY) && (pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt.bIdrFlag)) pSrcPic = NULL; //no cross IDR method, should fill in data instead of copy @@ -245,7 +245,7 @@ void DoMbECMvCopy (PWelsDecoderContext pCtx, PPicture pDec, PPicture pRef, int32 } iMVs[0] = iFullMVx - (iMbXInPix << 2); iMVs[1] = iFullMVy - (iMbYInPix << 2); - BaseMC (pMCRefMem, iMbXInPix, iMbYInPix, &pCtx->sMcFunc, 16, 16, iMVs); + BaseMC (pCtx, pMCRefMem, -1, -1, iMbXInPix, iMbYInPix, &pCtx->sMcFunc, 16, 16, iMVs); } return; } @@ -266,40 +266,40 @@ void GetAvilInfoFromCorrectMb (PWelsDecoderContext pCtx) { for (int32_t iMbY = 0; iMbY < iMbHeight; ++iMbY) { for (int32_t iMbX = 0; iMbX < iMbWidth; ++iMbX) { iMbXyIndex = iMbY * iMbWidth + iMbX; - if (pMbCorrectlyDecodedFlag[iMbXyIndex] && IS_INTER (pCurDqLayer->pMbType[iMbXyIndex])) { - uint32_t iMBType = pCurDqLayer->pMbType[iMbXyIndex]; + if (pMbCorrectlyDecodedFlag[iMbXyIndex] && IS_INTER (pCurDqLayer->pDec->pMbType[iMbXyIndex])) { + uint32_t iMBType = pCurDqLayer->pDec->pMbType[iMbXyIndex]; switch (iMBType) { case MB_TYPE_SKIP: case MB_TYPE_16x16: - iRefIdx = pCurDqLayer->pRefIndex[0][iMbXyIndex][0]; - pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pMv[0][iMbXyIndex][0][0]; - pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pMv[0][iMbXyIndex][0][1]; + iRefIdx = pCurDqLayer->pDec->pRefIndex[0][iMbXyIndex][0]; + pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][0][0]; + pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][0][1]; pCtx->pECRefPic[iRefIdx] = pCtx->sRefPic.pRefList[LIST_0][iRefIdx]; iInterMbCorrectNum[iRefIdx]++; break; case MB_TYPE_16x8: - iRefIdx = pCurDqLayer->pRefIndex[0][iMbXyIndex][0]; - pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pMv[0][iMbXyIndex][0][0]; - pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pMv[0][iMbXyIndex][0][1]; + iRefIdx = pCurDqLayer->pDec->pRefIndex[0][iMbXyIndex][0]; + pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][0][0]; + pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][0][1]; pCtx->pECRefPic[iRefIdx] = pCtx->sRefPic.pRefList[LIST_0][iRefIdx]; iInterMbCorrectNum[iRefIdx]++; - iRefIdx = pCurDqLayer->pRefIndex[0][iMbXyIndex][8]; - pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pMv[0][iMbXyIndex][8][0]; - pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pMv[0][iMbXyIndex][8][1]; + iRefIdx = pCurDqLayer->pDec->pRefIndex[0][iMbXyIndex][8]; + pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][8][0]; + pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][8][1]; pCtx->pECRefPic[iRefIdx] = pCtx->sRefPic.pRefList[LIST_0][iRefIdx]; iInterMbCorrectNum[iRefIdx]++; break; case MB_TYPE_8x16: - iRefIdx = pCurDqLayer->pRefIndex[0][iMbXyIndex][0]; - pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pMv[0][iMbXyIndex][0][0]; - pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pMv[0][iMbXyIndex][0][1]; + iRefIdx = pCurDqLayer->pDec->pRefIndex[0][iMbXyIndex][0]; + pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][0][0]; + pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][0][1]; pCtx->pECRefPic[iRefIdx] = pCtx->sRefPic.pRefList[LIST_0][iRefIdx]; iInterMbCorrectNum[iRefIdx]++; - iRefIdx = pCurDqLayer->pRefIndex[0][iMbXyIndex][2]; - pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pMv[0][iMbXyIndex][2][0]; - pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pMv[0][iMbXyIndex][2][1]; + iRefIdx = pCurDqLayer->pDec->pRefIndex[0][iMbXyIndex][2]; + pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][2][0]; + pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][2][1]; pCtx->pECRefPic[iRefIdx] = pCtx->sRefPic.pRefList[LIST_0][iRefIdx]; iInterMbCorrectNum[iRefIdx]++; break; @@ -311,39 +311,39 @@ void GetAvilInfoFromCorrectMb (PWelsDecoderContext pCtx) { for (i = 0; i < 4; i++) { iSubMBType = pCurDqLayer->pSubMbType[iMbXyIndex][i]; iIIdx = ((i >> 1) << 3) + ((i & 1) << 1); - iRefIdx = pCurDqLayer->pRefIndex[0][iMbXyIndex][iIIdx]; + iRefIdx = pCurDqLayer->pDec->pRefIndex[0][iMbXyIndex][iIIdx]; pCtx->pECRefPic[iRefIdx] = pCtx->sRefPic.pRefList[LIST_0][iRefIdx]; switch (iSubMBType) { case SUB_MB_TYPE_8x8: - pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx][0]; - pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx][1]; + pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx][0]; + pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx][1]; iInterMbCorrectNum[iRefIdx]++; break; case SUB_MB_TYPE_8x4: - pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx][0]; - pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx][1]; + pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx][0]; + pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx][1]; - pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx + 4][0]; - pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx + 4][1]; + pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx + 4][0]; + pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx + 4][1]; iInterMbCorrectNum[iRefIdx] += 2; break; case SUB_MB_TYPE_4x8: - pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx][0]; - pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx][1]; + pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx][0]; + pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx][1]; - pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx + 1][0]; - pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx + 1][1]; + pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx + 1][0]; + pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx + 1][1]; iInterMbCorrectNum[iRefIdx] += 2; break; case SUB_MB_TYPE_4x4: { for (j = 0; j < 4; j++) { iJIdx = ((j >> 1) << 2) + (j & 1); - pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx + iJIdx][0]; - pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pMv[0][iMbXyIndex][iIIdx + iJIdx][1]; + pCtx->iECMVs[iRefIdx][0] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx + iJIdx][0]; + pCtx->iECMVs[iRefIdx][1] += pCurDqLayer->pDec->pMv[0][iMbXyIndex][iIIdx + iJIdx][1]; } iInterMbCorrectNum[iRefIdx] += 4; } @@ -372,7 +372,7 @@ void DoErrorConSliceMVCopy (PWelsDecoderContext pCtx) { int32_t iMbWidth = (int32_t) pCtx->pSps->iMbWidth; int32_t iMbHeight = (int32_t) pCtx->pSps->iMbHeight; PPicture pDstPic = pCtx->pDec; - PPicture pSrcPic = pCtx->pPreviousDecodedPictureInDpb; + PPicture pSrcPic = pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb; bool* pMbCorrectlyDecodedFlag = pCtx->pCurDqLayer->pMbCorrectlyDecodedFlag; int32_t iMbXyIndex; diff --git a/chromium/third_party/openh264/src/codec/decoder/core/src/manage_dec_ref.cpp b/chromium/third_party/openh264/src/codec/decoder/core/src/manage_dec_ref.cpp index a40d2b09b81..571ce41d7d9 100644 --- a/chromium/third_party/openh264/src/codec/decoder/core/src/manage_dec_ref.cpp +++ b/chromium/third_party/openh264/src/codec/decoder/core/src/manage_dec_ref.cpp @@ -42,6 +42,7 @@ #include "manage_dec_ref.h" #include "error_concealment.h" #include "error_code.h" +#include "decoder.h" namespace WelsDec { @@ -50,10 +51,10 @@ static PPicture WelsDelLongFromList (PRefPic pRefPic, uint32_t uiLongTermFrameId static PPicture WelsDelShortFromListSetUnref (PRefPic pRefPic, int32_t iFrameNum); static PPicture WelsDelLongFromListSetUnref (PRefPic pRefPic, uint32_t uiLongTermFrameIdx); -static int32_t MMCO (PWelsDecoderContext pCtx, PRefPicMarking pRefPicMarking); -static int32_t MMCOProcess (PWelsDecoderContext pCtx, uint32_t uiMmcoType, +static int32_t MMCO (PWelsDecoderContext pCtx, PRefPic pRefPic, PRefPicMarking pRefPicMarking); +static int32_t MMCOProcess (PWelsDecoderContext pCtx, PRefPic pRefPic, uint32_t uiMmcoType, int32_t iShortFrameNum, uint32_t uiLongTermPicNum, int32_t iLongTermFrameIdx, int32_t iMaxLongTermFrameIdx); -static int32_t SlidingWindow (PWelsDecoderContext pCtx); +static int32_t SlidingWindow (PWelsDecoderContext pCtx, PRefPic pRefPic); static int32_t AddShortTermToList (PRefPic pRefPic, PPicture pPic); static int32_t AddLongTermToList (PRefPic pRefPic, PPicture pPic, int32_t iLongTermFrameIdx, uint32_t uiLongTermPicNum); @@ -63,7 +64,7 @@ static int32_t WelsCheckAndRecoverForFutureDecoding (PWelsDecoderContext pCtx); #ifdef LONG_TERM_REF int32_t GetLTRFrameIndex (PRefPic pRefPic, int32_t iAncLTRFrameNum); #endif -static int32_t RemainOneBufferInDpbForEC (PWelsDecoderContext pCtx); +static int32_t RemainOneBufferInDpbForEC (PWelsDecoderContext pCtx, PRefPic pRefPic); static void SetUnRef (PPicture pRef) { if (NULL != pRef) { @@ -79,6 +80,20 @@ static void SetUnRef (PPicture pRef) { pRef->uiSpatialId = -1; pRef->iSpsId = -1; pRef->bIsComplete = false; + pRef->iRefCount = 0; + + if (pRef->eSliceType == I_SLICE) { + return; + } + int32_t lists = pRef->eSliceType == P_SLICE ? 1 : 2; + for (int32_t i = 0; i < MAX_DPB_COUNT; ++i) { + for (int32_t list = 0; list < lists; ++list) { + if (pRef->pRefPic[list][i] != NULL) { + pRef->pRefPic[list][i]->iRefCount = 0; + pRef->pRefPic[list][i] = NULL; + } + } + } } } @@ -111,9 +126,29 @@ void WelsResetRefPic (PWelsDecoderContext pCtx) { pRefPic->uiLongRefCount[LIST_0] = 0; } +void WelsResetRefPicWithoutUnRef (PWelsDecoderContext pCtx) { + int32_t i = 0; + PRefPic pRefPic = &pCtx->sRefPic; + pCtx->sRefPic.uiLongRefCount[LIST_0] = pCtx->sRefPic.uiShortRefCount[LIST_0] = 0; + + pRefPic->uiRefCount[LIST_0] = 0; + pRefPic->uiRefCount[LIST_1] = 0; + + for (i = 0; i < MAX_DPB_COUNT; i++) { + pRefPic->pShortRefList[LIST_0][i] = NULL; + } + pRefPic->uiShortRefCount[LIST_0] = 0; + + for (i = 0; i < MAX_DPB_COUNT; i++) { + pRefPic->pLongRefList[LIST_0][i] = NULL; + } + pRefPic->uiLongRefCount[LIST_0] = 0; +} + static int32_t WelsCheckAndRecoverForFutureDecoding (PWelsDecoderContext pCtx) { - if ((pCtx->sRefPic.uiShortRefCount[LIST_0] + pCtx->sRefPic.uiLongRefCount[LIST_0] <= 0) && (pCtx->eSliceType != I_SLICE - && pCtx->eSliceType != SI_SLICE)) { + if ((pCtx->sRefPic.uiShortRefCount[LIST_0] + pCtx->sRefPic.uiLongRefCount[LIST_0] <= 0) + && (pCtx->eSliceType != I_SLICE + && pCtx->eSliceType != SI_SLICE)) { if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) { //IDR lost!, recover it for future decoding with data all set to 0 PPicture pRef = PrefetchPic (pCtx->pPicBuff); @@ -122,30 +157,43 @@ static int32_t WelsCheckAndRecoverForFutureDecoding (PWelsDecoderContext pCtx) { pRef->bIsComplete = false; // Set complete flag to false for lost IDR ref picture pRef->iSpsId = pCtx->pSps->iSpsId; pRef->iPpsId = pCtx->pPps->iPpsId; + if (pCtx->eSliceType == B_SLICE) { + //reset reference's references when IDR is lost + for (int32_t list = LIST_0; list < LIST_A; ++list) { + for (int32_t i = 0; i < MAX_DPB_COUNT; ++i) { + pRef->pRefPic[list][i] = NULL; + } + } + } pCtx->iErrorCode |= dsDataErrorConcealed; bool bCopyPrevious = ((ERROR_CON_FRAME_COPY_CROSS_IDR == pCtx->pParam->eEcActiveIdc) || (ERROR_CON_SLICE_COPY_CROSS_IDR == pCtx->pParam->eEcActiveIdc) || (ERROR_CON_SLICE_COPY_CROSS_IDR_FREEZE_RES_CHANGE == pCtx->pParam->eEcActiveIdc) || (ERROR_CON_SLICE_MV_COPY_CROSS_IDR == pCtx->pParam->eEcActiveIdc) || (ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE == pCtx->pParam->eEcActiveIdc)) - && (NULL != pCtx->pPreviousDecodedPictureInDpb); - bCopyPrevious = bCopyPrevious && (pRef->iWidthInPixel == pCtx->pPreviousDecodedPictureInDpb->iWidthInPixel) - && (pRef->iHeightInPixel == pCtx->pPreviousDecodedPictureInDpb->iHeightInPixel); + && (NULL != pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb); + bCopyPrevious = bCopyPrevious + && (pRef->iWidthInPixel == pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->iWidthInPixel) + && (pRef->iHeightInPixel == pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->iHeightInPixel); if (!bCopyPrevious) { memset (pRef->pData[0], 128, pRef->iLinesize[0] * pRef->iHeightInPixel); memset (pRef->pData[1], 128, pRef->iLinesize[1] * pRef->iHeightInPixel / 2); memset (pRef->pData[2], 128, pRef->iLinesize[2] * pRef->iHeightInPixel / 2); - } else if (pRef == pCtx->pPreviousDecodedPictureInDpb) { + } else if (pRef == pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb) { WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "WelsInitRefList()::EC memcpy overlap."); } else { - memcpy (pRef->pData[0], pCtx->pPreviousDecodedPictureInDpb->pData[0], pRef->iLinesize[0] * pRef->iHeightInPixel); - memcpy (pRef->pData[1], pCtx->pPreviousDecodedPictureInDpb->pData[1], pRef->iLinesize[1] * pRef->iHeightInPixel / 2); - memcpy (pRef->pData[2], pCtx->pPreviousDecodedPictureInDpb->pData[2], pRef->iLinesize[2] * pRef->iHeightInPixel / 2); + memcpy (pRef->pData[0], pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->pData[0], + pRef->iLinesize[0] * pRef->iHeightInPixel); + memcpy (pRef->pData[1], pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->pData[1], + pRef->iLinesize[1] * pRef->iHeightInPixel / 2); + memcpy (pRef->pData[2], pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->pData[2], + pRef->iLinesize[2] * pRef->iHeightInPixel / 2); } pRef->iFrameNum = 0; pRef->iFramePoc = 0; pRef->uiTemporalId = pRef->uiQualityId = 0; + pRef->eSliceType = pCtx->eSliceType; ExpandReferencingPicture (pRef->pData, pRef->iWidthInPixel, pRef->iHeightInPixel, pRef->iLinesize, pCtx->sExpandPicFunc.pfExpandLumaPicture, pCtx->sExpandPicFunc.pfExpandChromaPicture); AddShortTermToList (&pCtx->sRefPic, pRef); @@ -346,8 +394,11 @@ int32_t WelsReorderRefList (PWelsDecoderContext pCtx) { for (int32_t listIdx = 0; listIdx < ListCount; ++listIdx) { PPicture pPic = NULL; PPicture* ppRefList = pCtx->sRefPic.pRefList[listIdx]; - int32_t iMaxRefIdx = pCtx->pSps->iNumRefFrames; - int32_t iRefCount = pCtx->sRefPic.uiRefCount[listIdx]; + int32_t iMaxRefIdx = pCtx->iPicQueueNumber; + if (iMaxRefIdx >= MAX_REF_PIC_COUNT) { + iMaxRefIdx = MAX_REF_PIC_COUNT - 1; + } + int32_t iRefCount = pSliceHeader->uiRefCount[listIdx]; int32_t iPredFrameNum = pSliceHeader->iFrameNum; int32_t iMaxPicNum = 1 << pSliceHeader->pSps->uiLog2MaxFrameNum; int32_t iAbsDiffPicNum = -1; @@ -520,13 +571,20 @@ int32_t WelsReorderRefList2 (PWelsDecoderContext pCtx) { for (i = WELS_MAX (1, WELS_MAX (iCount, pCtx->sRefPic.uiRefCount[listIdx])); i < iRefCount; i++) ppRefList[i] = ppRefList[i - 1]; - pCtx->sRefPic.uiRefCount[listIdx] = (uint8_t)WELS_MIN (WELS_MAX (iCount, pCtx->sRefPic.uiRefCount[listIdx]), iRefCount); + pCtx->sRefPic.uiRefCount[listIdx] = (uint8_t)WELS_MIN (WELS_MAX (iCount, pCtx->sRefPic.uiRefCount[listIdx]), + iRefCount); } return ERR_NONE; } -int32_t WelsMarkAsRef (PWelsDecoderContext pCtx) { - PRefPic pRefPic = &pCtx->sRefPic; +int32_t WelsMarkAsRef (PWelsDecoderContext pCtx, PPicture pLastDec) { + PPicture pDec = pLastDec; + bool isThreadCtx = true; + if (pDec == NULL) { + pDec = pCtx->pDec; + isThreadCtx = false; + } + PRefPic pRefPic = isThreadCtx ? &pCtx->sTmpRefPic : &pCtx->sRefPic; PRefPicMarking pRefPicMarking = pCtx->pCurDqLayer->pRefPicMarking; PAccessUnit pCurAU = pCtx->pAccessUnitList; bool bIsIDRAU = false; @@ -534,10 +592,10 @@ int32_t WelsMarkAsRef (PWelsDecoderContext pCtx) { int32_t iRet = ERR_NONE; - pCtx->pDec->uiQualityId = pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt.uiQualityId; - pCtx->pDec->uiTemporalId = pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt.uiTemporalId; - pCtx->pDec->iSpsId = pCtx->pSps->iSpsId; - pCtx->pDec->iPpsId = pCtx->pPps->iPpsId; + pDec->uiQualityId = pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt.uiQualityId; + pDec->uiTemporalId = pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt.uiTemporalId; + pDec->iSpsId = pCtx->pSps->iSpsId; + pDec->iPpsId = pCtx->pPps->iPpsId; for (j = pCurAU->uiStartPos; j <= pCurAU->uiEndPos; j++) { if (pCurAU->pNalUnitsList[j]->sNalHeaderExt.sNalUnitHeader.eNalUnitType == NAL_UNIT_CODED_SLICE_IDR @@ -548,33 +606,33 @@ int32_t WelsMarkAsRef (PWelsDecoderContext pCtx) { } if (bIsIDRAU) { if (pRefPicMarking->bLongTermRefFlag) { - pCtx->sRefPic.iMaxLongTermFrameIdx = 0; - AddLongTermToList (pRefPic, pCtx->pDec, 0, 0); + pRefPic->iMaxLongTermFrameIdx = 0; + AddLongTermToList (pRefPic, pDec, 0, 0); } else { - pCtx->sRefPic.iMaxLongTermFrameIdx = -1; + pRefPic->iMaxLongTermFrameIdx = -1; } } else { if (pRefPicMarking->bAdaptiveRefPicMarkingModeFlag) { - iRet = MMCO (pCtx, pRefPicMarking); + iRet = MMCO (pCtx, pRefPic, pRefPicMarking); if (iRet != ERR_NONE) { if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) { - iRet = RemainOneBufferInDpbForEC (pCtx); + iRet = RemainOneBufferInDpbForEC (pCtx, pRefPic); WELS_VERIFY_RETURN_IF (iRet, iRet); } else { return iRet; } } - if (pCtx->bLastHasMmco5) { - pCtx->pDec->iFrameNum = 0; - pCtx->pDec->iFramePoc = 0; + if (pCtx->pLastDecPicInfo->bLastHasMmco5) { + pDec->iFrameNum = 0; + pDec->iFramePoc = 0; } } else { - iRet = SlidingWindow (pCtx); + iRet = SlidingWindow (pCtx, pRefPic); if (iRet != ERR_NONE) { if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) { - iRet = RemainOneBufferInDpbForEC (pCtx); + iRet = RemainOneBufferInDpbForEC (pCtx, pRefPic); WELS_VERIFY_RETURN_IF (iRet, iRet); } else { return iRet; @@ -583,22 +641,22 @@ int32_t WelsMarkAsRef (PWelsDecoderContext pCtx) { } } - if (!pCtx->pDec->bIsLongRef) { + if (!pDec->bIsLongRef) { if (pRefPic->uiLongRefCount[LIST_0] + pRefPic->uiShortRefCount[LIST_0] >= WELS_MAX (1, pCtx->pSps->iNumRefFrames)) { if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) { - iRet = RemainOneBufferInDpbForEC (pCtx); + iRet = RemainOneBufferInDpbForEC (pCtx, pRefPic); WELS_VERIFY_RETURN_IF (iRet, iRet); } else { return ERR_INFO_INVALID_MMCO_REF_NUM_OVERFLOW; } } - iRet = AddShortTermToList (pRefPic, pCtx->pDec); + iRet = AddShortTermToList (pRefPic, pDec); } return iRet; } -static int32_t MMCO (PWelsDecoderContext pCtx, PRefPicMarking pRefPicMarking) { +static int32_t MMCO (PWelsDecoderContext pCtx, PRefPic pRefPic, PRefPicMarking pRefPicMarking) { PSps pSps = pCtx->pCurDqLayer->sLayerInfo.pSps; int32_t i = 0; int32_t iRet = ERR_NONE; @@ -612,7 +670,8 @@ static int32_t MMCO (PWelsDecoderContext pCtx, PRefPicMarking pRefPicMarking) { if (uiMmcoType > MMCO_LONG) { return ERR_INFO_INVALID_MMCO_OPCODE_BASE; } - iRet = MMCOProcess (pCtx, uiMmcoType, iShortFrameNum, uiLongTermPicNum, iLongTermFrameIdx, iMaxLongTermFrameIdx); + iRet = MMCOProcess (pCtx, pRefPic, uiMmcoType, iShortFrameNum, uiLongTermPicNum, iLongTermFrameIdx, + iMaxLongTermFrameIdx); if (iRet != ERR_NONE) { return iRet; } @@ -623,9 +682,8 @@ static int32_t MMCO (PWelsDecoderContext pCtx, PRefPicMarking pRefPicMarking) { return ERR_NONE; } -static int32_t MMCOProcess (PWelsDecoderContext pCtx, uint32_t uiMmcoType, +static int32_t MMCOProcess (PWelsDecoderContext pCtx, PRefPic pRefPic, uint32_t uiMmcoType, int32_t iShortFrameNum, uint32_t uiLongTermPicNum, int32_t iLongTermFrameIdx, int32_t iMaxLongTermFrameIdx) { - PRefPic pRefPic = &pCtx->sRefPic; PPicture pPic = NULL; int32_t i = 0; int32_t iRet = ERR_NONE; @@ -672,7 +730,7 @@ static int32_t MMCOProcess (PWelsDecoderContext pCtx, uint32_t uiMmcoType, break; case MMCO_RESET: WelsResetRefPic (pCtx); - pCtx->bLastHasMmco5 = true; + pCtx->pLastDecPicInfo->bLastHasMmco5 = true; break; case MMCO_LONG: if (iLongTermFrameIdx > pRefPic->iMaxLongTermFrameIdx) { @@ -697,13 +755,12 @@ static int32_t MMCOProcess (PWelsDecoderContext pCtx, uint32_t uiMmcoType, return iRet; } -static int32_t SlidingWindow (PWelsDecoderContext pCtx) { - PRefPic pRefPic = &pCtx->sRefPic; +static int32_t SlidingWindow (PWelsDecoderContext pCtx, PRefPic pRefPic) { PPicture pPic = NULL; int32_t i = 0; - if (pCtx->sRefPic.uiShortRefCount[LIST_0] + pCtx->sRefPic.uiLongRefCount[LIST_0] >= pCtx->pSps->iNumRefFrames) { - if (pCtx->sRefPic.uiShortRefCount[LIST_0] == 0) { + if (pRefPic->uiShortRefCount[LIST_0] + pRefPic->uiLongRefCount[LIST_0] >= pCtx->pSps->iNumRefFrames) { + if (pRefPic->uiShortRefCount[LIST_0] == 0) { WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "No reference picture in short term list when sliding window"); return ERR_INFO_INVALID_MMCO_REF_NUM_NOT_ENOUGH; } @@ -728,8 +785,8 @@ static PPicture WelsDelShortFromList (PRefPic pRefPic, int32_t iFrameNum) { for (i = 0; i < pRefPic->uiShortRefCount[LIST_0]; i++) { if (pRefPic->pShortRefList[LIST_0][i]->iFrameNum == iFrameNum) { iMoveSize = pRefPic->uiShortRefCount[LIST_0] - i - 1; - pRefPic->pShortRefList[LIST_0][i]->bUsedAsRef = false; pPic = pRefPic->pShortRefList[LIST_0][i]; + pPic->bUsedAsRef = false; pRefPic->pShortRefList[LIST_0][i] = NULL; if (iMoveSize > 0) { memmove (&pRefPic->pShortRefList[LIST_0][i], &pRefPic->pShortRefList[LIST_0][i + 1], @@ -740,7 +797,6 @@ static PPicture WelsDelShortFromList (PRefPic pRefPic, int32_t iFrameNum) { break; } } - return pPic; } @@ -788,6 +844,9 @@ static int32_t AddShortTermToList (PRefPic pRefPic, PPicture pPic) { if (pRefPic->uiShortRefCount[LIST_0] > 0) { // Check the duplicate frame_num in short ref list for (int32_t iPos = 0; iPos < pRefPic->uiShortRefCount[LIST_0]; iPos++) { + if (!pRefPic->pShortRefList[LIST_0][iPos]) { + return ERR_INFO_INVALID_PTR; + } if (pPic->iFrameNum == pRefPic->pShortRefList[LIST_0][iPos]->iFrameNum) { // Replace the previous ref pic with the new one with the same frame_num pRefPic->pShortRefList[LIST_0][iPos] = pPic; @@ -815,6 +874,9 @@ static int32_t AddLongTermToList (PRefPic pRefPic, PPicture pPic, int32_t iLongT pRefPic->pLongRefList[LIST_0][pRefPic->uiLongRefCount[LIST_0]] = pPic; } else { for (i = 0; i < pRefPic->uiLongRefCount[LIST_0]; i++) { + if (!pRefPic->pLongRefList[LIST_0][i]) { + return ERR_INFO_INVALID_PTR; + } if (pRefPic->pLongRefList[LIST_0][i]->iLongTermFrameIdx > pPic->iLongTermFrameIdx) { break; } @@ -860,14 +922,13 @@ int32_t GetLTRFrameIndex (PRefPic pRefPic, int32_t iAncLTRFrameNum) { } #endif -static int32_t RemainOneBufferInDpbForEC (PWelsDecoderContext pCtx) { +static int32_t RemainOneBufferInDpbForEC (PWelsDecoderContext pCtx, PRefPic pRefPic) { int32_t iRet = ERR_NONE; - PRefPic pRefPic = &pCtx->sRefPic; if (pRefPic->uiShortRefCount[0] + pRefPic->uiLongRefCount[0] < pCtx->pSps->iNumRefFrames) return iRet; if (pRefPic->uiShortRefCount[0] > 0) { - iRet = SlidingWindow (pCtx); + iRet = SlidingWindow (pCtx, pRefPic); } else { //all LTR, remove the smallest long_term_frame_idx int32_t iLongTermFrameIdx = 0; int32_t iMaxLongTermFrameIdx = pRefPic->iMaxLongTermFrameIdx; diff --git a/chromium/third_party/openh264/src/codec/decoder/core/src/mv_pred.cpp b/chromium/third_party/openh264/src/codec/decoder/core/src/mv_pred.cpp index e180bf132bd..642a982d915 100644 --- a/chromium/third_party/openh264/src/codec/decoder/core/src/mv_pred.cpp +++ b/chromium/third_party/openh264/src/codec/decoder/core/src/mv_pred.cpp @@ -155,7 +155,7 @@ void CopyRectBlock4Cols (void* vdst, void* vsrc, const int32_t stride_dst, const memcpy (&dst[stride_dst * 3], &src[stride_src * 3], 16); } } -void PredPSkipMvFromNeighbor (PDqLayer pCurLayer, int16_t iMvp[2]) { +void PredPSkipMvFromNeighbor (PDqLayer pCurDqLayer, int16_t iMvp[2]) { bool bTopAvail, bLeftTopAvail, bRightTopAvail, bLeftAvail; int32_t iCurSliceIdc, iTopSliceIdc, iLeftTopSliceIdc, iRightTopSliceIdc, iLeftSliceIdc; @@ -170,14 +170,14 @@ void PredPSkipMvFromNeighbor (PDqLayer pCurLayer, int16_t iMvp[2]) { int8_t iMatchRef; int16_t iMvA[2], iMvB[2], iMvC[2], iMvD[2]; - iCurXy = pCurLayer->iMbXyIndex; - iCurX = pCurLayer->iMbX; - iCurY = pCurLayer->iMbY; - iCurSliceIdc = pCurLayer->pSliceIdc[iCurXy]; + iCurXy = pCurDqLayer->iMbXyIndex; + iCurX = pCurDqLayer->iMbX; + iCurY = pCurDqLayer->iMbY; + iCurSliceIdc = pCurDqLayer->pSliceIdc[iCurXy]; if (iCurX != 0) { iLeftXy = iCurXy - 1; - iLeftSliceIdc = pCurLayer->pSliceIdc[iLeftXy]; + iLeftSliceIdc = pCurDqLayer->pSliceIdc[iLeftXy]; bLeftAvail = (iLeftSliceIdc == iCurSliceIdc); } else { bLeftAvail = 0; @@ -185,19 +185,19 @@ void PredPSkipMvFromNeighbor (PDqLayer pCurLayer, int16_t iMvp[2]) { } if (iCurY != 0) { - iTopXy = iCurXy - pCurLayer->iMbWidth; - iTopSliceIdc = pCurLayer->pSliceIdc[iTopXy]; + iTopXy = iCurXy - pCurDqLayer->iMbWidth; + iTopSliceIdc = pCurDqLayer->pSliceIdc[iTopXy]; bTopAvail = (iTopSliceIdc == iCurSliceIdc); if (iCurX != 0) { iLeftTopXy = iTopXy - 1; - iLeftTopSliceIdc = pCurLayer->pSliceIdc[iLeftTopXy]; + iLeftTopSliceIdc = pCurDqLayer->pSliceIdc[iLeftTopXy]; bLeftTopAvail = (iLeftTopSliceIdc == iCurSliceIdc); } else { bLeftTopAvail = 0; } - if (iCurX != (pCurLayer->iMbWidth - 1)) { + if (iCurX != (pCurDqLayer->iMbWidth - 1)) { iRightTopXy = iTopXy + 1; - iRightTopSliceIdc = pCurLayer->pSliceIdc[iRightTopXy]; + iRightTopSliceIdc = pCurDqLayer->pSliceIdc[iRightTopXy]; bRightTopAvail = (iRightTopSliceIdc == iCurSliceIdc); } else { bRightTopAvail = 0; @@ -208,18 +208,18 @@ void PredPSkipMvFromNeighbor (PDqLayer pCurLayer, int16_t iMvp[2]) { bRightTopAvail = 0; } - iLeftType = ((iCurX != 0 && bLeftAvail) ? pCurLayer->pMbType[iLeftXy] : 0); - iTopType = ((iCurY != 0 && bTopAvail) ? pCurLayer->pMbType[iTopXy] : 0); + iLeftType = ((iCurX != 0 && bLeftAvail) ? GetMbType (pCurDqLayer)[iLeftXy] : 0); + iTopType = ((iCurY != 0 && bTopAvail) ? GetMbType (pCurDqLayer)[iTopXy] : 0); iLeftTopType = ((iCurX != 0 && iCurY != 0 && bLeftTopAvail) - ? pCurLayer->pMbType[iLeftTopXy] : 0); - iRightTopType = ((iCurX != pCurLayer->iMbWidth - 1 && iCurY != 0 && bRightTopAvail) - ? pCurLayer->pMbType[iRightTopXy] : 0); + ? GetMbType (pCurDqLayer)[iLeftTopXy] : 0); + iRightTopType = ((iCurX != pCurDqLayer->iMbWidth - 1 && iCurY != 0 && bRightTopAvail) + ? GetMbType (pCurDqLayer)[iRightTopXy] : 0); /*get neb mv&iRefIdxArray*/ /*left*/ if (bLeftAvail && IS_INTER (iLeftType)) { - ST32 (iMvA, LD32 (pCurLayer->pMv[0][iLeftXy][3])); - iLeftRef = pCurLayer->pRefIndex[0][iLeftXy][3]; + ST32 (iMvA, LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[0][iLeftXy][3] : pCurDqLayer->pMv[0][iLeftXy][3])); + iLeftRef = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[0][iLeftXy][3] : pCurDqLayer->pRefIndex[0][iLeftXy][3]; } else { ST32 (iMvA, 0); if (0 == bLeftAvail) { //not available @@ -236,8 +236,8 @@ void PredPSkipMvFromNeighbor (PDqLayer pCurLayer, int16_t iMvp[2]) { /*top*/ if (bTopAvail && IS_INTER (iTopType)) { - ST32 (iMvB, LD32 (pCurLayer->pMv[0][iTopXy][12])); - iTopRef = pCurLayer->pRefIndex[0][iTopXy][12]; + ST32 (iMvB, LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[0][iTopXy][12] : pCurDqLayer->pMv[0][iTopXy][12])); + iTopRef = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[0][iTopXy][12] : pCurDqLayer->pRefIndex[0][iTopXy][12]; } else { ST32 (iMvB, 0); if (0 == bTopAvail) { //not available @@ -254,8 +254,10 @@ void PredPSkipMvFromNeighbor (PDqLayer pCurLayer, int16_t iMvp[2]) { /*right_top*/ if (bRightTopAvail && IS_INTER (iRightTopType)) { - ST32 (iMvC, LD32 (pCurLayer->pMv[0][iRightTopXy][12])); - iRightTopRef = pCurLayer->pRefIndex[0][iRightTopXy][12]; + ST32 (iMvC, LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[0][iRightTopXy][12] : + pCurDqLayer->pMv[0][iRightTopXy][12])); + iRightTopRef = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[0][iRightTopXy][12] : + pCurDqLayer->pRefIndex[0][iRightTopXy][12]; } else { ST32 (iMvC, 0); if (0 == bRightTopAvail) { //not available @@ -267,8 +269,9 @@ void PredPSkipMvFromNeighbor (PDqLayer pCurLayer, int16_t iMvp[2]) { /*left_top*/ if (bLeftTopAvail && IS_INTER (iLeftTopType)) { - ST32 (iMvD, LD32 (pCurLayer->pMv[0][iLeftTopXy][15])); - iLeftTopRef = pCurLayer->pRefIndex[0][iLeftTopXy][15]; + ST32 (iMvD, LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[0][iLeftTopXy][15] : pCurDqLayer->pMv[0][iLeftTopXy][15])); + iLeftTopRef = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[0][iLeftTopXy][15] : + pCurDqLayer->pRefIndex[0][iLeftTopXy][15]; } else { ST32 (iMvD, 0); if (0 == bLeftTopAvail) { //not available @@ -305,13 +308,21 @@ void PredPSkipMvFromNeighbor (PDqLayer pCurLayer, int16_t iMvp[2]) { } int32_t GetColocatedMb (PWelsDecoderContext pCtx, MbType& mbType, SubMbType& subMbType) { - PDqLayer pCurLayer = pCtx->pCurDqLayer; - int32_t iMbXy = pCurLayer->iMbXyIndex; + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; - uint32_t is8x8 = IS_Inter_8x8 (pCurLayer->pMbType[iMbXy]); - mbType = pCurLayer->pMbType[iMbXy]; + uint32_t is8x8 = IS_Inter_8x8 (GetMbType (pCurDqLayer)[iMbXy]); + mbType = GetMbType (pCurDqLayer)[iMbXy]; PPicture colocPic = pCtx->sRefPic.pRefList[LIST_1][0]; + if (GetThreadCount (pCtx) > 1) { + if (16 * pCurDqLayer->iMbY > pCtx->lastReadyHeightOffset[1][0]) { + if (colocPic->pReadyEvent[pCurDqLayer->iMbY].isSignaled != 1) { + WAIT_EVENT (&colocPic->pReadyEvent[pCurDqLayer->iMbY], WELS_DEC_THREAD_WAIT_INFINITE); + } + pCtx->lastReadyHeightOffset[1][0] = 16 * pCurDqLayer->iMbY; + } + } if (colocPic == NULL) { SLogContext* pLogCtx = & (pCtx->sLogCtx); @@ -320,7 +331,10 @@ int32_t GetColocatedMb (PWelsDecoderContext pCtx, MbType& mbType, SubMbType& sub } MbType coloc_mbType = colocPic->pMbType[iMbXy]; - + if (coloc_mbType == MB_TYPE_SKIP) { + //This indicates the colocated MB is P SKIP MB + coloc_mbType |= MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P1L0; + } if (IS_Inter_8x8 (coloc_mbType) && !pCtx->pSps->bDirect8x8InferenceFlag) { subMbType = SUB_MB_TYPE_4x4 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_DIRECT; mbType |= MB_TYPE_8x8 | MB_TYPE_L0 | MB_TYPE_L1; @@ -333,43 +347,43 @@ int32_t GetColocatedMb (PWelsDecoderContext pCtx, MbType& mbType, SubMbType& sub } if (IS_INTRA (coloc_mbType)) { - SetRectBlock (pCurLayer->iColocIntra, 4, 4, 4 * sizeof (int8_t), 1, sizeof (int8_t)); + SetRectBlock (pCurDqLayer->iColocIntra, 4, 4, 4 * sizeof (int8_t), 1, sizeof (int8_t)); return ERR_NONE; } - SetRectBlock (pCurLayer->iColocIntra, 4, 4, 4 * sizeof (int8_t), 0, sizeof (int8_t)); + SetRectBlock (pCurDqLayer->iColocIntra, 4, 4, 4 * sizeof (int8_t), 0, sizeof (int8_t)); if (IS_INTER_16x16 (mbType)) { int16_t iMVZero[2] = { 0 }; int16_t* pMv = IS_TYPE_L1 (coloc_mbType) ? colocPic->pMv[LIST_1][iMbXy][0] : iMVZero; - ST32 (pCurLayer->iColocMv[LIST_0][0], LD32 (colocPic->pMv[LIST_0][iMbXy][0])); - ST32 (pCurLayer->iColocMv[LIST_1][0], LD32 (pMv)); - pCurLayer->iColocRefIndex[LIST_0][0] = colocPic->pRefIndex[LIST_0][iMbXy][0]; - pCurLayer->iColocRefIndex[LIST_1][0] = IS_TYPE_L1 (coloc_mbType) ? colocPic->pRefIndex[LIST_1][iMbXy][0] : - REF_NOT_IN_LIST; + ST32 (pCurDqLayer->iColocMv[LIST_0][0], LD32 (colocPic->pMv[LIST_0][iMbXy][0])); + ST32 (pCurDqLayer->iColocMv[LIST_1][0], LD32 (pMv)); + pCurDqLayer->iColocRefIndex[LIST_0][0] = colocPic->pRefIndex[LIST_0][iMbXy][0]; + pCurDqLayer->iColocRefIndex[LIST_1][0] = IS_TYPE_L1 (coloc_mbType) ? colocPic->pRefIndex[LIST_1][iMbXy][0] : + REF_NOT_IN_LIST; } else { if (!pCtx->pSps->bDirect8x8InferenceFlag) { - CopyRectBlock4Cols (pCurLayer->iColocMv[LIST_0], colocPic->pMv[LIST_0][iMbXy], 16, 16, 4, 4); - CopyRectBlock4Cols (pCurLayer->iColocRefIndex[LIST_0], colocPic->pRefIndex[LIST_0][iMbXy], 4, 4, 4, 1); + CopyRectBlock4Cols (pCurDqLayer->iColocMv[LIST_0], colocPic->pMv[LIST_0][iMbXy], 16, 16, 4, 4); + CopyRectBlock4Cols (pCurDqLayer->iColocRefIndex[LIST_0], colocPic->pRefIndex[LIST_0][iMbXy], 4, 4, 4, 1); if (IS_TYPE_L1 (coloc_mbType)) { - CopyRectBlock4Cols (pCurLayer->iColocMv[LIST_1], colocPic->pMv[LIST_1][iMbXy], 16, 16, 4, 4); - CopyRectBlock4Cols (pCurLayer->iColocRefIndex[LIST_1], colocPic->pRefIndex[LIST_1][iMbXy], 4, 4, 4, 1); + CopyRectBlock4Cols (pCurDqLayer->iColocMv[LIST_1], colocPic->pMv[LIST_1][iMbXy], 16, 16, 4, 4); + CopyRectBlock4Cols (pCurDqLayer->iColocRefIndex[LIST_1], colocPic->pRefIndex[LIST_1][iMbXy], 4, 4, 4, 1); } else { // only forward prediction - SetRectBlock (pCurLayer->iColocRefIndex[LIST_1], 4, 4, 4, (uint8_t)REF_NOT_IN_LIST, 1); + SetRectBlock (pCurDqLayer->iColocRefIndex[LIST_1], 4, 4, 4, (uint8_t)REF_NOT_IN_LIST, 1); } } else { for (int32_t listIdx = 0; listIdx < 1 + !! (coloc_mbType & MB_TYPE_L1); listIdx++) { - SetRectBlock (pCurLayer->iColocMv[listIdx][0], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][0]), 4); - SetRectBlock (pCurLayer->iColocMv[listIdx][2], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][3]), 4); - SetRectBlock (pCurLayer->iColocMv[listIdx][8], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][12]), 4); - SetRectBlock (pCurLayer->iColocMv[listIdx][10], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][15]), 4); - - SetRectBlock (&pCurLayer->iColocRefIndex[listIdx][0], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][0], 1); - SetRectBlock (&pCurLayer->iColocRefIndex[listIdx][2], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][3], 1); - SetRectBlock (&pCurLayer->iColocRefIndex[listIdx][8], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][12], 1); - SetRectBlock (&pCurLayer->iColocRefIndex[listIdx][10], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][15], 1); + SetRectBlock (pCurDqLayer->iColocMv[listIdx][0], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][0]), 4); + SetRectBlock (pCurDqLayer->iColocMv[listIdx][2], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][3]), 4); + SetRectBlock (pCurDqLayer->iColocMv[listIdx][8], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][12]), 4); + SetRectBlock (pCurDqLayer->iColocMv[listIdx][10], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][15]), 4); + + SetRectBlock (&pCurDqLayer->iColocRefIndex[listIdx][0], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][0], 1); + SetRectBlock (&pCurDqLayer->iColocRefIndex[listIdx][2], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][3], 1); + SetRectBlock (&pCurDqLayer->iColocRefIndex[listIdx][8], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][12], 1); + SetRectBlock (&pCurDqLayer->iColocRefIndex[listIdx][10], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][15], 1); } if (! (coloc_mbType & MB_TYPE_L1)) // only forward prediction - SetRectBlock (&pCurLayer->iColocRefIndex[1][0], 4, 4, 4, (uint8_t)REF_NOT_IN_LIST, 1); + SetRectBlock (&pCurDqLayer->iColocRefIndex[1][0], 4, 4, 4, (uint8_t)REF_NOT_IN_LIST, 1); } } return ERR_NONE; @@ -379,9 +393,9 @@ int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2], SubMbType& subMbType) { int32_t ret = ERR_NONE; - PDqLayer pCurLayer = pCtx->pCurDqLayer; - int32_t iMbXy = pCurLayer->iMbXyIndex; - bool bSkipOrDirect = (IS_SKIP (pCurLayer->pMbType[iMbXy]) | IS_DIRECT (pCurLayer->pMbType[iMbXy])) > 0; + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + bool bSkipOrDirect = (IS_SKIP (GetMbType (pCurDqLayer)[iMbXy]) | IS_DIRECT (GetMbType (pCurDqLayer)[iMbXy])) > 0; MbType mbType; ret = GetColocatedMb (pCtx, mbType, subMbType); @@ -401,15 +415,15 @@ int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2], int8_t iDiagonalRef[LIST_A]; int16_t iMvA[LIST_A][2], iMvB[LIST_A][2], iMvC[LIST_A][2], iMvD[LIST_A][2]; - iCurXy = pCurLayer->iMbXyIndex; + iCurXy = pCurDqLayer->iMbXyIndex; - iCurX = pCurLayer->iMbX; - iCurY = pCurLayer->iMbY; - iCurSliceIdc = pCurLayer->pSliceIdc[iCurXy]; + iCurX = pCurDqLayer->iMbX; + iCurY = pCurDqLayer->iMbY; + iCurSliceIdc = pCurDqLayer->pSliceIdc[iCurXy]; if (iCurX != 0) { iLeftXy = iCurXy - 1; - iLeftSliceIdc = pCurLayer->pSliceIdc[iLeftXy]; + iLeftSliceIdc = pCurDqLayer->pSliceIdc[iLeftXy]; bLeftAvail = (iLeftSliceIdc == iCurSliceIdc); } else { bLeftAvail = 0; @@ -417,19 +431,19 @@ int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2], } if (iCurY != 0) { - iTopXy = iCurXy - pCurLayer->iMbWidth; - iTopSliceIdc = pCurLayer->pSliceIdc[iTopXy]; + iTopXy = iCurXy - pCurDqLayer->iMbWidth; + iTopSliceIdc = pCurDqLayer->pSliceIdc[iTopXy]; bTopAvail = (iTopSliceIdc == iCurSliceIdc); if (iCurX != 0) { iLeftTopXy = iTopXy - 1; - iLeftTopSliceIdc = pCurLayer->pSliceIdc[iLeftTopXy]; + iLeftTopSliceIdc = pCurDqLayer->pSliceIdc[iLeftTopXy]; bLeftTopAvail = (iLeftTopSliceIdc == iCurSliceIdc); } else { bLeftTopAvail = 0; } - if (iCurX != (pCurLayer->iMbWidth - 1)) { + if (iCurX != (pCurDqLayer->iMbWidth - 1)) { iRightTopXy = iTopXy + 1; - iRightTopSliceIdc = pCurLayer->pSliceIdc[iRightTopXy]; + iRightTopSliceIdc = pCurDqLayer->pSliceIdc[iRightTopXy]; bRightTopAvail = (iRightTopSliceIdc == iCurSliceIdc); } else { bRightTopAvail = 0; @@ -440,20 +454,22 @@ int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2], bRightTopAvail = 0; } - iLeftType = ((iCurX != 0 && bLeftAvail) ? pCurLayer->pMbType[iLeftXy] : 0); - iTopType = ((iCurY != 0 && bTopAvail) ? pCurLayer->pMbType[iTopXy] : 0); + iLeftType = ((iCurX != 0 && bLeftAvail) ? GetMbType (pCurDqLayer)[iLeftXy] : 0); + iTopType = ((iCurY != 0 && bTopAvail) ? GetMbType (pCurDqLayer)[iTopXy] : 0); iLeftTopType = ((iCurX != 0 && iCurY != 0 && bLeftTopAvail) - ? pCurLayer->pMbType[iLeftTopXy] : 0); - iRightTopType = ((iCurX != pCurLayer->iMbWidth - 1 && iCurY != 0 && bRightTopAvail) - ? pCurLayer->pMbType[iRightTopXy] : 0); + ? GetMbType (pCurDqLayer)[iLeftTopXy] : 0); + iRightTopType = ((iCurX != pCurDqLayer->iMbWidth - 1 && iCurY != 0 && bRightTopAvail) + ? GetMbType (pCurDqLayer)[iRightTopXy] : 0); /*get neb mv&iRefIdxArray*/ for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { /*left*/ if (bLeftAvail && IS_INTER (iLeftType)) { - ST32 (iMvA[listIdx], LD32 (pCurLayer->pMv[listIdx][iLeftXy][3])); - iLeftRef[listIdx] = pCurLayer->pRefIndex[listIdx][iLeftXy][3]; + ST32 (iMvA[listIdx], LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[listIdx][iLeftXy][3] : + pCurDqLayer->pMv[listIdx][iLeftXy][3])); + iLeftRef[listIdx] = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][3] : + pCurDqLayer->pRefIndex[listIdx][iLeftXy][3]; } else { ST32 (iMvA[listIdx], 0); if (0 == bLeftAvail) { //not available @@ -465,8 +481,10 @@ int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2], /*top*/ if (bTopAvail && IS_INTER (iTopType)) { - ST32 (iMvB[listIdx], LD32 (pCurLayer->pMv[listIdx][iTopXy][12])); - iTopRef[listIdx] = pCurLayer->pRefIndex[listIdx][iTopXy][12]; + ST32 (iMvB[listIdx], LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[listIdx][iTopXy][12] : + pCurDqLayer->pMv[listIdx][iTopXy][12])); + iTopRef[listIdx] = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[listIdx][iTopXy][12] : + pCurDqLayer->pRefIndex[listIdx][iTopXy][12]; } else { ST32 (iMvB[listIdx], 0); if (0 == bTopAvail) { //not available @@ -478,8 +496,10 @@ int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2], /*right_top*/ if (bRightTopAvail && IS_INTER (iRightTopType)) { - ST32 (iMvC[listIdx], LD32 (pCurLayer->pMv[listIdx][iRightTopXy][12])); - iRightTopRef[listIdx] = pCurLayer->pRefIndex[listIdx][iRightTopXy][12]; + ST32 (iMvC[listIdx], LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[listIdx][iRightTopXy][12] : + pCurDqLayer->pMv[listIdx][iRightTopXy][12])); + iRightTopRef[listIdx] = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[listIdx][iRightTopXy][12] : + pCurDqLayer->pRefIndex[listIdx][iRightTopXy][12]; } else { ST32 (iMvC[listIdx], 0); if (0 == bRightTopAvail) { //not available @@ -490,8 +510,10 @@ int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2], } /*left_top*/ if (bLeftTopAvail && IS_INTER (iLeftTopType)) { - ST32 (iMvD[listIdx], LD32 (pCurLayer->pMv[listIdx][iLeftTopXy][15])); - iLeftTopRef[listIdx] = pCurLayer->pRefIndex[listIdx][iLeftTopXy][15]; + ST32 (iMvD[listIdx], LD32 (pCurDqLayer->pDec ? pCurDqLayer->pDec->pMv[listIdx][iLeftTopXy][15] : + pCurDqLayer->pMv[listIdx][iLeftTopXy][15])); + iLeftTopRef[listIdx] = pCurDqLayer->pDec ? pCurDqLayer->pDec->pRefIndex[listIdx][iLeftTopXy][15] : + pCurDqLayer->pRefIndex[listIdx][iLeftTopXy][15]; } else { ST32 (iMvD[listIdx], 0); if (0 == bLeftTopAvail) { //not available @@ -540,7 +562,7 @@ int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2], mbType &= ~MB_TYPE_L0; subMbType &= ~MB_TYPE_L0; } - pCurLayer->pMbType[iMbXy] = mbType; + GetMbType (pCurDqLayer)[iMbXy] = mbType; int16_t pMvd[4] = { 0 }; @@ -548,31 +570,31 @@ int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2], if (IS_INTER_16x16 (mbType)) { if ((* (int32_t*)iMvp[LIST_0] | * (int32_t*)iMvp[LIST_1])) { - if (0 == pCurLayer->iColocIntra[0] && !bIsLongRef - && ((pCurLayer->iColocRefIndex[LIST_0][0] == 0 && (unsigned) (pCurLayer->iColocMv[LIST_0][0][0] + 1) <= 2 - && (unsigned) (pCurLayer->iColocMv[LIST_0][0][1] + 1) <= 2) - || (pCurLayer->iColocRefIndex[LIST_0][0] < 0 && pCurLayer->iColocRefIndex[LIST_1][0] == 0 - && (unsigned) (pCurLayer->iColocMv[LIST_1][0][0] + 1) <= 2 - && (unsigned) (pCurLayer->iColocMv[LIST_1][0][1] + 1) <= 2))) { + if (0 == pCurDqLayer->iColocIntra[0] && !bIsLongRef + && ((pCurDqLayer->iColocRefIndex[LIST_0][0] == 0 && (unsigned) (pCurDqLayer->iColocMv[LIST_0][0][0] + 1) <= 2 + && (unsigned) (pCurDqLayer->iColocMv[LIST_0][0][1] + 1) <= 2) + || (pCurDqLayer->iColocRefIndex[LIST_0][0] < 0 && pCurDqLayer->iColocRefIndex[LIST_1][0] == 0 + && (unsigned) (pCurDqLayer->iColocMv[LIST_1][0][0] + 1) <= 2 + && (unsigned) (pCurDqLayer->iColocMv[LIST_1][0][1] + 1) <= 2))) { if (0 >= ref[0]) * (uint32_t*)iMvp[LIST_0] = 0; if (0 >= ref[1]) * (uint32_t*)iMvp[LIST_1] = 0; } } - UpdateP16x16DirectCabac (pCurLayer); + UpdateP16x16DirectCabac (pCurDqLayer); for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { - UpdateP16x16MotionInfo (pCurLayer, listIdx, ref[listIdx], iMvp[listIdx]); - UpdateP16x16MvdCabac (pCurLayer, pMvd, listIdx); + UpdateP16x16MotionInfo (pCurDqLayer, listIdx, ref[listIdx], iMvp[listIdx]); + UpdateP16x16MvdCabac (pCurDqLayer, pMvd, listIdx); } } else { if (bSkipOrDirect) { int8_t pSubPartCount[4], pPartW[4]; for (int32_t i = 0; i < 4; i++) { //Direct 8x8 Ref and mv int16_t iIdx8 = i << 2; - pCurLayer->pSubMbType[iMbXy][i] = subMbType; + pCurDqLayer->pSubMbType[iMbXy][i] = subMbType; int8_t pRefIndex[LIST_A][30]; - UpdateP8x8RefIdxCabac (pCurLayer, pRefIndex, iIdx8, ref[LIST_0], LIST_0); - UpdateP8x8RefIdxCabac (pCurLayer, pRefIndex, iIdx8, ref[LIST_1], LIST_1); - UpdateP8x8DirectCabac (pCurLayer, iIdx8); + UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, ref[LIST_0], LIST_0); + UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, ref[LIST_1], LIST_1); + UpdateP8x8DirectCabac (pCurDqLayer, iIdx8); pSubPartCount[i] = g_ksInterBSubMbTypeInfo[0].iPartCount; pPartW[i] = g_ksInterBSubMbTypeInfo[0].iPartWidth; @@ -581,137 +603,90 @@ int32_t PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2], pSubPartCount[i] = 4; pPartW[i] = 1; } - - int8_t iPartCount = pSubPartCount[i]; - int16_t iPartIdx, iBlockW = pPartW[i]; - - for (int32_t j = 0; j < iPartCount; j++) { - iPartIdx = iIdx8 + j * iBlockW; - uint8_t iScan4Idx = g_kuiScan4[iPartIdx]; - uint8_t iColocIdx = g_kuiScan4[iPartIdx]; - //uint8_t iCacheIdx = g_kuiCache30ScanIdx[iPartIdx]; - - int16_t pMV[4] = { 0 }; - if (IS_SUB_8x8 (subMbType)) { - * (uint32_t*)pMV = * (uint32_t*)iMvp[LIST_0]; - ST32 ((pMV + 2), LD32 (pMV)); - ST64 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx], LD64 (pMV)); - ST64 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx + 4], LD64 (pMV)); - ST64 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0); - ST64 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0); - * (uint32_t*)pMV = * (uint32_t*)iMvp[LIST_1]; - ST32 ((pMV + 2), LD32 (pMV)); - ST64 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx], LD64 (pMV)); - ST64 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx + 4], LD64 (pMV)); - ST64 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0); - ST64 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0); - } else { //SUB_4x4 - * (uint32_t*)pMV = * (uint32_t*)iMvp[LIST_0]; - ST32 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx], LD32 (pMV)); - ST32 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0); - * (uint32_t*)pMV = * (uint32_t*)iMvp[LIST_1]; - ST32 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx], LD32 (pMV)); - ST32 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0); - } - if ((* (int32_t*)iMvp[LIST_0] | * (int32_t*)iMvp[LIST_1])) { - uint32_t uiColZeroFlag = (0 == pCurLayer->iColocIntra[iColocIdx]) && !bIsLongRef && - (pCurLayer->iColocRefIndex[LIST_0][iColocIdx] == 0 || (pCurLayer->iColocRefIndex[LIST_0][iColocIdx] < 0 - && pCurLayer->iColocRefIndex[LIST_1][iColocIdx] == 0)); - const int16_t (*mvColoc)[2] = 0 == pCurLayer->iColocRefIndex[LIST_0][iColocIdx] ? pCurLayer->iColocMv[LIST_0] : - pCurLayer->iColocMv[LIST_1]; - const int16_t* mv = mvColoc[iColocIdx]; - if (IS_SUB_8x8 (subMbType)) { - if (uiColZeroFlag && ((unsigned) (mv[0] + 1) <= 2 && (unsigned) (mv[1] + 1) <= 2)) { - if (ref[LIST_0] == 0) { - ST64 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx], 0); - ST64 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx + 4], 0); - ST64 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0); - ST64 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0); - } - - if (ref[LIST_1] == 0) { - ST64 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx], 0); - ST64 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx + 4], 0); - ST64 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0); - ST64 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0); - } - } - } else { - if (uiColZeroFlag && ((unsigned) (mv[0] + 1) <= 2 && (unsigned) (mv[1] + 1) <= 2)) { - if (ref[LIST_0] == 0) { - ST32 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx], 0); - ST32 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0); - } - if (ref[LIST_1] == 0) { - ST32 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx], 0); - ST32 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0); - } - } - } - } - } + FillSpatialDirect8x8Mv (pCurDqLayer, iIdx8, pSubPartCount[i], pPartW[i], subMbType, bIsLongRef, iMvp, ref, NULL, NULL); } } } return ret; } -int32_t PredBDirectTemporal (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2], int8_t ref[LIST_A]) { +int32_t PredBDirectTemporal (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2], int8_t ref[LIST_A], + SubMbType& subMbType) { int32_t ret = ERR_NONE; - PDqLayer pCurLayer = pCtx->pCurDqLayer; - int32_t iMbXy = pCurLayer->iMbXyIndex; - bool bSkipOrDirect = (IS_SKIP (pCurLayer->pMbType[iMbXy]) | IS_DIRECT (pCurLayer->pMbType[iMbXy])) > 0; + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + bool bSkipOrDirect = (IS_SKIP (GetMbType (pCurDqLayer)[iMbXy]) | IS_DIRECT (GetMbType (pCurDqLayer)[iMbXy])) > 0; + MbType mbType; - SubMbType subMbType; ret = GetColocatedMb (pCtx, mbType, subMbType); if (ret != ERR_NONE) { return ret; } - PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer; + + GetMbType (pCurDqLayer)[iMbXy] = mbType; + + PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; + PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; + int16_t pMvd[4] = { 0 }; + const int32_t ref0Count = WELS_MIN (pSliceHeader->uiRefCount[LIST_0], pCtx->sRefPic.uiRefCount[LIST_0]); if (IS_INTER_16x16 (mbType)) { ref[LIST_0] = 0; ref[LIST_1] = 0; - UpdateP16x16RefIdx (pCurLayer, LIST_1, ref[LIST_1]); + UpdateP16x16DirectCabac (pCurDqLayer); + UpdateP16x16RefIdx (pCurDqLayer, LIST_1, ref[LIST_1]); ST64 (iMvp, 0); - if (pCurLayer->iColocIntra[0]) { - UpdateP16x16MotionOnly (pCurLayer, LIST_0, iMvp[LIST_0]); - UpdateP16x16MotionOnly (pCurLayer, LIST_1, iMvp[LIST_1]); - UpdateP16x16RefIdx (pCurLayer, LIST_0, ref[LIST_0]); + if (pCurDqLayer->iColocIntra[0]) { + UpdateP16x16MotionOnly (pCurDqLayer, LIST_0, iMvp[LIST_0]); + UpdateP16x16MotionOnly (pCurDqLayer, LIST_1, iMvp[LIST_1]); + UpdateP16x16RefIdx (pCurDqLayer, LIST_0, ref[LIST_0]); } else { - ref[LIST_0] = pCurLayer->iColocRefIndex[LIST_0][0] >= 0 ? pCurLayer->iColocRefIndex[LIST_0][0] : - pCurLayer->iColocRefIndex[LIST_1][0]; - const int16_t (*mvColoc)[2] = 0 == ref[LIST_0] ? pCurLayer->iColocMv[LIST_0] : pCurLayer->iColocMv[LIST_1]; - const int16_t* mv = mvColoc[0]; - UpdateP16x16RefIdx (pCurLayer, LIST_0, ref[LIST_0]); + ref[LIST_0] = 0; + int16_t* mv = pCurDqLayer->iColocMv[LIST_0][0]; + int8_t colocRefIndexL0 = pCurDqLayer->iColocRefIndex[LIST_0][0]; + if (colocRefIndexL0 >= 0) { + ref[LIST_0] = MapColToList0 (pCtx, colocRefIndexL0, ref0Count); + } else { + mv = pCurDqLayer->iColocMv[LIST_1][0]; + } + UpdateP16x16RefIdx (pCurDqLayer, LIST_0, ref[LIST_0]); iMvp[LIST_0][0] = (pSlice->iMvScale[LIST_0][ref[LIST_0]] * mv[0] + 128) >> 8; iMvp[LIST_0][1] = (pSlice->iMvScale[LIST_0][ref[LIST_0]] * mv[1] + 128) >> 8; - UpdateP16x16MotionOnly (pCurLayer, LIST_0, iMvp[LIST_0]); + UpdateP16x16MotionOnly (pCurDqLayer, LIST_0, iMvp[LIST_0]); iMvp[LIST_1][0] = iMvp[LIST_0][0] - mv[0]; iMvp[LIST_1][1] = iMvp[LIST_0][1] - mv[1]; - UpdateP16x16MotionOnly (pCurLayer, LIST_1, iMvp[LIST_1]); + UpdateP16x16MotionOnly (pCurDqLayer, LIST_1, iMvp[LIST_1]); } + UpdateP16x16MvdCabac (pCurDqLayer, pMvd, LIST_0); + UpdateP16x16MvdCabac (pCurDqLayer, pMvd, LIST_1); } else { if (bSkipOrDirect) { int8_t pSubPartCount[4], pPartW[4]; + int8_t pRefIndex[LIST_A][30]; for (int32_t i = 0; i < 4; i++) { int16_t iIdx8 = i << 2; - pCurLayer->pSubMbType[iMbXy][i] = subMbType; + const uint8_t iScan4Idx = g_kuiScan4[iIdx8]; + pCurDqLayer->pSubMbType[iMbXy][i] = subMbType; + + int16_t (*mvColoc)[2] = pCurDqLayer->iColocMv[LIST_0]; ref[LIST_1] = 0; - if (pCurLayer->iColocIntra[g_kuiScan4[iIdx8]]) { + UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, ref[LIST_1], LIST_1); + if (pCurDqLayer->iColocIntra[iScan4Idx]) { ref[LIST_0] = 0; + UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, ref[LIST_0], LIST_0); + ST64 (iMvp, 0); } else { - if (pCurLayer->iColocRefIndex[LIST_0][iIdx8] >= 0) { - ref[LIST_0] = pCurLayer->iColocRefIndex[LIST_0][iIdx8]; + ref[LIST_0] = 0; + int8_t colocRefIndexL0 = pCurDqLayer->iColocRefIndex[LIST_0][iScan4Idx]; + if (colocRefIndexL0 >= 0) { + ref[LIST_0] = MapColToList0 (pCtx, colocRefIndexL0, ref0Count); } else { - ref[LIST_0] = pCurLayer->iColocRefIndex[LIST_1][iIdx8]; + mvColoc = pCurDqLayer->iColocMv[LIST_1]; } + UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, ref[LIST_0], LIST_0); } - int8_t pRefIndex[LIST_A][30]; - UpdateP8x8RefIdxCabac (pCurLayer, pRefIndex, iIdx8, ref[LIST_0], LIST_0); - UpdateP8x8RefIdxCabac (pCurLayer, pRefIndex, iIdx8, ref[LIST_1], LIST_1); - UpdateP8x8DirectCabac (pCurLayer, iIdx8); + UpdateP8x8DirectCabac (pCurDqLayer, iIdx8); pSubPartCount[i] = g_ksInterBSubMbTypeInfo[0].iPartCount; pPartW[i] = g_ksInterBSubMbTypeInfo[0].iPartWidth; @@ -720,46 +695,7 @@ int32_t PredBDirectTemporal (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2], pSubPartCount[i] = 4; pPartW[i] = 1; } - - int8_t iPartCount = pSubPartCount[i]; - int16_t iPartIdx, iBlockW = pPartW[i]; - for (int32_t j = 0; j < iPartCount; j++) { - iPartIdx = iIdx8 + j * iBlockW; - uint8_t iScan4Idx = g_kuiScan4[iPartIdx]; - uint8_t iColocIdx = g_kuiScan4[iPartIdx]; - - int16_t (*mvColoc)[2] = pCurLayer->iColocMv[LIST_0]; - int16_t* mv = mvColoc[iColocIdx]; - - int16_t pMV[4] = { 0 }; - if (IS_SUB_8x8 (subMbType)) { - iMvp[LIST_0][0] = (pSlice->iMvScale[LIST_0][ref[LIST_0]] * mv[0] + 128) >> 8; - iMvp[LIST_0][1] = (pSlice->iMvScale[LIST_0][ref[LIST_0]] * mv[1] + 128) >> 8; - ST32 (pMV, LD32 (iMvp[LIST_0])); - ST32 ((pMV + 2), LD32 (iMvp[LIST_0])); - ST64 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx], LD64 (pMV)); - ST64 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx + 4], LD64 (pMV)); - ST64 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0); - ST64 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0); - iMvp[LIST_1][0] -= iMvp[LIST_0][0] - mv[0]; - iMvp[LIST_1][1] -= iMvp[LIST_0][0] - mv[1]; - ST32 (pMV, LD32 (iMvp[LIST_1])); - ST32 ((pMV + 2), LD32 (iMvp[LIST_1])); - ST64 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx], LD64 (pMV)); - ST64 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx + 4], LD64 (pMV)); - ST64 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0); - ST64 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0); - } else { //SUB_4x4 - iMvp[LIST_0][0] = (pSlice->iMvScale[LIST_0][ref[LIST_0]] * mv[0] + 128) >> 8; - iMvp[LIST_0][1] = (pSlice->iMvScale[LIST_0][ref[LIST_0]] * mv[1] + 128) >> 8; - ST32 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx], LD32 (iMvp[LIST_0])); - ST32 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0); - iMvp[LIST_1][0] -= iMvp[LIST_0][0] - mv[0]; - iMvp[LIST_1][1] -= iMvp[LIST_0][0] - mv[1]; - ST32 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx], LD32 (iMvp[LIST_1])); - ST32 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0); - } - } + FillTemporalDirect8x8Mv (pCurDqLayer, iIdx8, pSubPartCount[i], pPartW[i], subMbType, ref, mvColoc, NULL, NULL); } } } @@ -868,14 +804,23 @@ void UpdateP16x16MotionInfo (PDqLayer pCurDqLayer, int32_t listIdx, int8_t iRef, //mb const uint8_t kuiScan4Idx = g_kuiScan4[i]; const uint8_t kuiScan4IdxPlus4 = 4 + kuiScan4Idx; + if (pCurDqLayer->pDec != NULL) { + ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2); + ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2); + + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32); + } else { + ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2); + ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2); - ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4Idx ], kiRef2); - ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2); - - ST32 (pCurDqLayer->pMv[listIdx][iMbXy][ kuiScan4Idx ], kiMV32); - ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx ], kiMV32); - ST32 (pCurDqLayer->pMv[listIdx][iMbXy][ kuiScan4IdxPlus4], kiMV32); - ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32); + ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32); + ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32); + ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32); + ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32); + } } } @@ -891,8 +836,8 @@ void UpdateP16x16RefIdx (PDqLayer pCurDqLayer, int32_t listIdx, int8_t iRef) { const uint8_t kuiScan4Idx = g_kuiScan4[i]; const uint8_t kuiScan4IdxPlus4 = 4 + kuiScan4Idx; - ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2); - ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2); + ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2); + ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2); } } @@ -907,11 +852,17 @@ void UpdateP16x16MotionOnly (PDqLayer pCurDqLayer, int32_t listIdx, int16_t iMVs //mb const uint8_t kuiScan4Idx = g_kuiScan4[i]; const uint8_t kuiScan4IdxPlus4 = 4 + kuiScan4Idx; - - ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32); - ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32); - ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32); - ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32); + if (pCurDqLayer->pDec != NULL) { + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32); + } else { + ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32); + ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32); + ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32); + ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32); + } } } @@ -931,12 +882,21 @@ void UpdateP16x8MotionInfo (PDqLayer pCurDqLayer, int16_t iMotionVector[LIST_A][ const uint8_t kuiCacheIdxPlus6 = 6 + kuiCacheIdx; //mb - ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4Idx ], kiRef2); - ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2); - ST32 (pCurDqLayer->pMv[listIdx][iMbXy][ kuiScan4Idx ], kiMV32); - ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx ], kiMV32); - ST32 (pCurDqLayer->pMv[listIdx][iMbXy][ kuiScan4IdxPlus4], kiMV32); - ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32); + if (pCurDqLayer->pDec != NULL) { + ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2); + ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32); + } else { + ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2); + ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2); + ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32); + ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32); + ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32); + ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32); + } //cache ST16 (&iRefIndex[listIdx][kuiCacheIdx ], kiRef2); ST16 (&iRefIndex[listIdx][kuiCacheIdxPlus6], kiRef2); @@ -962,12 +922,21 @@ void UpdateP8x16MotionInfo (PDqLayer pCurDqLayer, int16_t iMotionVector[LIST_A][ const uint8_t kuiCacheIdxPlus6 = 6 + kuiCacheIdx; //mb - ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4Idx ], kiRef2); - ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2); - ST32 (pCurDqLayer->pMv[listIdx][iMbXy][ kuiScan4Idx ], kiMV32); - ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx ], kiMV32); - ST32 (pCurDqLayer->pMv[listIdx][iMbXy][ kuiScan4IdxPlus4], kiMV32); - ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32); + if (pCurDqLayer->pDec != NULL) { + ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2); + ST16 (&pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32); + } else { + ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2); + ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2); + ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32); + ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32); + ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32); + ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32); + } //cache ST16 (&iRefIndex[listIdx][kuiCacheIdx ], kiRef2); ST16 (&iRefIndex[listIdx][kuiCacheIdxPlus6], kiRef2); @@ -978,4 +947,237 @@ void UpdateP8x16MotionInfo (PDqLayer pCurDqLayer, int16_t iMotionVector[LIST_A][ } } +void FillSpatialDirect8x8Mv (PDqLayer pCurDqLayer, const int16_t& iIdx8, const int8_t& iPartCount, const int8_t& iPartW, + const SubMbType& subMbType, const bool& bIsLongRef, int16_t pMvDirect[LIST_A][2], int8_t iRef[LIST_A], + int16_t pMotionVector[LIST_A][30][MV_A], int16_t pMvdCache[LIST_A][30][MV_A]) { + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + for (int32_t j = 0; j < iPartCount; j++) { + int8_t iPartIdx = iIdx8 + j * iPartW; + uint8_t iScan4Idx = g_kuiScan4[iPartIdx]; + uint8_t iColocIdx = g_kuiScan4[iPartIdx]; + uint8_t iCacheIdx = g_kuiCache30ScanIdx[iPartIdx]; + + int16_t pMV[4] = { 0 }; + if (IS_SUB_8x8 (subMbType)) { + * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_0]; + ST32 ((pMV + 2), LD32 (pMV)); + ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], LD64 (pMV)); + ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx + 4], LD64 (pMV)); + ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0); + ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0); + if (pMotionVector != NULL) { + ST64 (pMotionVector[LIST_0][iCacheIdx], LD64 (pMV)); + ST64 (pMotionVector[LIST_0][iCacheIdx + 6], LD64 (pMV)); + } + if (pMvdCache != NULL) { + ST64 (pMvdCache[LIST_0][iCacheIdx], 0); + ST64 (pMvdCache[LIST_0][iCacheIdx + 6], 0); + } + * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_1]; + ST32 ((pMV + 2), LD32 (pMV)); + ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], LD64 (pMV)); + ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx + 4], LD64 (pMV)); + ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0); + ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0); + if (pMotionVector != NULL) { + ST64 (pMotionVector[LIST_1][iCacheIdx], LD64 (pMV)); + ST64 (pMotionVector[LIST_1][iCacheIdx + 6], LD64 (pMV)); + } + if (pMvdCache != NULL) { + ST64 (pMvdCache[LIST_1][iCacheIdx], 0); + ST64 (pMvdCache[LIST_1][iCacheIdx + 6], 0); + } + } else { //SUB_4x4 + * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_0]; + ST32 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], LD32 (pMV)); + ST32 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0); + if (pMotionVector != NULL) { + ST32 (pMotionVector[LIST_0][iCacheIdx], LD32 (pMV)); + } + if (pMvdCache != NULL) { + ST32 (pMvdCache[LIST_0][iCacheIdx], 0); + } + * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_1]; + ST32 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], LD32 (pMV)); + ST32 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0); + if (pMotionVector != NULL) { + ST32 (pMotionVector[LIST_1][iCacheIdx], LD32 (pMV)); + } + if (pMvdCache != NULL) { + ST32 (pMvdCache[LIST_1][iCacheIdx], 0); + } + } + if ((* (int32_t*)pMvDirect[LIST_0] | * (int32_t*)pMvDirect[LIST_1])) { + uint32_t uiColZeroFlag = (0 == pCurDqLayer->iColocIntra[iColocIdx]) && !bIsLongRef && + (pCurDqLayer->iColocRefIndex[LIST_0][iColocIdx] == 0 || (pCurDqLayer->iColocRefIndex[LIST_0][iColocIdx] < 0 + && pCurDqLayer->iColocRefIndex[LIST_1][iColocIdx] == 0)); + const int16_t (*mvColoc)[2] = 0 == pCurDqLayer->iColocRefIndex[LIST_0][iColocIdx] ? pCurDqLayer->iColocMv[LIST_0] : + pCurDqLayer->iColocMv[LIST_1]; + const int16_t* mv = mvColoc[iColocIdx]; + if (IS_SUB_8x8 (subMbType)) { + if (uiColZeroFlag && ((unsigned) (mv[0] + 1) <= 2 && (unsigned) (mv[1] + 1) <= 2)) { + if (iRef[LIST_0] == 0) { + ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], 0); + ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx + 4], 0); + ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0); + ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0); + if (pMotionVector != NULL) { + ST64 (pMotionVector[LIST_0][iCacheIdx], 0); + ST64 (pMotionVector[LIST_0][iCacheIdx + 6], 0); + } + if (pMvdCache != NULL) { + ST64 (pMvdCache[LIST_0][iCacheIdx], 0); + ST64 (pMvdCache[LIST_0][iCacheIdx + 6], 0); + } + } + + if (iRef[LIST_1] == 0) { + ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], 0); + ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx + 4], 0); + ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0); + ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0); + if (pMotionVector != NULL) { + ST64 (pMotionVector[LIST_1][iCacheIdx], 0); + ST64 (pMotionVector[LIST_1][iCacheIdx + 6], 0); + } + if (pMvdCache != NULL) { + ST64 (pMvdCache[LIST_1][iCacheIdx], 0); + ST64 (pMvdCache[LIST_1][iCacheIdx + 6], 0); + } + } + } + } else { + if (uiColZeroFlag && ((unsigned) (mv[0] + 1) <= 2 && (unsigned) (mv[1] + 1) <= 2)) { + if (iRef[LIST_0] == 0) { + ST32 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], 0); + ST32 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0); + if (pMotionVector != NULL) { + ST32 (pMotionVector[LIST_0][iCacheIdx], 0); + } + if (pMvdCache != NULL) { + ST32 (pMvdCache[LIST_0][iCacheIdx], 0); + } + } + if (iRef[LIST_1] == 0) { + ST32 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], 0); + ST32 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0); + if (pMotionVector != NULL) { + ST32 (pMotionVector[LIST_1][iCacheIdx], 0); + } + if (pMvdCache != NULL) { + ST32 (pMvdCache[LIST_1][iCacheIdx], 0); + } + } + } + } + } + } +} + +void FillTemporalDirect8x8Mv (PDqLayer pCurDqLayer, const int16_t& iIdx8, const int8_t& iPartCount, + const int8_t& iPartW, + const SubMbType& subMbType, int8_t iRef[LIST_A], int16_t (*mvColoc)[2], int16_t pMotionVector[LIST_A][30][MV_A], + int16_t pMvdCache[LIST_A][30][MV_A]) { + PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + int16_t pMvDirect[LIST_A][2] = { { 0, 0 }, { 0, 0 } }; + for (int32_t j = 0; j < iPartCount; j++) { + int8_t iPartIdx = iIdx8 + j * iPartW; + uint8_t iScan4Idx = g_kuiScan4[iPartIdx]; + uint8_t iColocIdx = g_kuiScan4[iPartIdx]; + uint8_t iCacheIdx = g_kuiCache30ScanIdx[iPartIdx]; + + int16_t* mv = mvColoc[iColocIdx]; + + int16_t pMV[4] = { 0 }; + if (IS_SUB_8x8 (subMbType)) { + if (!pCurDqLayer->iColocIntra[iColocIdx]) { + pMvDirect[LIST_0][0] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[0] + 128) >> 8; + pMvDirect[LIST_0][1] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[1] + 128) >> 8; + } + ST32 (pMV, LD32 (pMvDirect[LIST_0])); + ST32 ((pMV + 2), LD32 (pMvDirect[LIST_0])); + ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], LD64 (pMV)); + ST64 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx + 4], LD64 (pMV)); + ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0); + ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0); + if (pMotionVector != NULL) { + ST64 (pMotionVector[LIST_0][iCacheIdx], LD64 (pMV)); + ST64 (pMotionVector[LIST_0][iCacheIdx + 6], LD64 (pMV)); + } + if (pMvdCache != NULL) { + ST64 (pMvdCache[LIST_0][iCacheIdx], 0); + ST64 (pMvdCache[LIST_0][iCacheIdx + 6], 0); + } + if (!pCurDqLayer->iColocIntra[g_kuiScan4[iIdx8]]) { + pMvDirect[LIST_1][0] = pMvDirect[LIST_0][0] - mv[0]; + pMvDirect[LIST_1][1] = pMvDirect[LIST_0][1] - mv[1]; + } + ST32 (pMV, LD32 (pMvDirect[LIST_1])); + ST32 ((pMV + 2), LD32 (pMvDirect[LIST_1])); + ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], LD64 (pMV)); + ST64 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx + 4], LD64 (pMV)); + ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0); + ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0); + if (pMotionVector != NULL) { + ST64 (pMotionVector[LIST_1][iCacheIdx], LD64 (pMV)); + ST64 (pMotionVector[LIST_1][iCacheIdx + 6], LD64 (pMV)); + } + if (pMvdCache != NULL) { + ST64 (pMvdCache[LIST_1][iCacheIdx], 0); + ST64 (pMvdCache[LIST_1][iCacheIdx + 6], 0); + } + } else { //SUB_4x4 + if (!pCurDqLayer->iColocIntra[iColocIdx]) { + pMvDirect[LIST_0][0] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[0] + 128) >> 8; + pMvDirect[LIST_0][1] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[1] + 128) >> 8; + } + ST32 (pCurDqLayer->pDec->pMv[LIST_0][iMbXy][iScan4Idx], LD32 (pMvDirect[LIST_0])); + ST32 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0); + if (pMotionVector != NULL) { + ST32 (pMotionVector[LIST_0][iCacheIdx], LD32 (pMvDirect[LIST_0])); + } + if (pMvdCache != NULL) { + ST32 (pMvdCache[LIST_0][iCacheIdx], 0); + } + if (!pCurDqLayer->iColocIntra[iColocIdx]) { + pMvDirect[LIST_1][0] = pMvDirect[LIST_0][0] - mv[0]; + pMvDirect[LIST_1][1] = pMvDirect[LIST_0][1] - mv[1]; + } + ST32 (pCurDqLayer->pDec->pMv[LIST_1][iMbXy][iScan4Idx], LD32 (pMvDirect[LIST_1])); + ST32 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0); + if (pMotionVector != NULL) { + ST32 (pMotionVector[LIST_1][iCacheIdx], LD32 (pMvDirect[LIST_1])); + } + if (pMvdCache != NULL) { + ST32 (pMvdCache[LIST_1][iCacheIdx], 0); + } + } + } +} +int8_t MapColToList0 (PWelsDecoderContext& pCtx, const int8_t& colocRefIndexL0, + const int32_t& ref0Count) { //ISO/IEC 14496-10:2009(E) (8-193) + //When reference is lost, this function must be skipped. + if ((pCtx->iErrorCode & dsRefLost) == dsRefLost) { + return 0; + } + PPicture pic1 = pCtx->sRefPic.pRefList[LIST_1][0]; + if (pic1 && pic1->pRefPic[LIST_0][colocRefIndexL0]) { + const int32_t iFramePoc = pic1->pRefPic[LIST_0][colocRefIndexL0]->iFramePoc; + for (int32_t i = 0; i < ref0Count; i++) { + if (pCtx->sRefPic.pRefList[LIST_0][i]->iFramePoc == iFramePoc) { + return i; + } + } + } + return 0; +} +void Update8x8RefIdx (PDqLayer& pCurDqLayer, const int16_t& iPartIdx, const int32_t& listIdx, const int8_t& iRef) { + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + const uint8_t iScan4Idx = g_kuiScan4[iPartIdx]; + pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][iScan4Idx] = pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][iScan4Idx + 1] = + pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][iScan4Idx + 4] = pCurDqLayer->pDec->pRefIndex[listIdx][iMbXy][iScan4Idx + + 5] = iRef; + +} } // namespace WelsDec diff --git a/chromium/third_party/openh264/src/codec/decoder/core/src/parse_mb_syn_cabac.cpp b/chromium/third_party/openh264/src/codec/decoder/core/src/parse_mb_syn_cabac.cpp index 65e53ff5bf7..690acd09e17 100644 --- a/chromium/third_party/openh264/src/codec/decoder/core/src/parse_mb_syn_cabac.cpp +++ b/chromium/third_party/openh264/src/codec/decoder/core/src/parse_mb_syn_cabac.cpp @@ -35,6 +35,7 @@ #include "mv_pred.h" #include "error_code.h" #include <stdio.h> + namespace WelsDec { #define IDX_UNUSED -1 @@ -110,8 +111,8 @@ void UpdateP16x8RefIdxCabac (PDqLayer pCurDqLayer, int8_t pRefIndex[LIST_A][30], const uint8_t iCacheIdx = g_kuiCache30ScanIdx[iPartIdx]; const uint8_t iCacheIdx6 = 6 + iCacheIdx; //mb - ST32 (&pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx ], iRef4Bytes); - ST32 (&pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx4], iRef4Bytes); + ST32 (&pCurDqLayer->pDec->pRefIndex[iListIdx][iMbXy][iScan4Idx ], iRef4Bytes); + ST32 (&pCurDqLayer->pDec->pRefIndex[iListIdx][iMbXy][iScan4Idx4], iRef4Bytes); //cache ST32 (&pRefIndex[iListIdx][iCacheIdx ], iRef4Bytes); ST32 (&pRefIndex[iListIdx][iCacheIdx6], iRef4Bytes); @@ -129,8 +130,8 @@ void UpdateP8x16RefIdxCabac (PDqLayer pCurDqLayer, int8_t pRefIndex[LIST_A][30], const uint8_t iScan4Idx4 = 4 + iScan4Idx; const uint8_t iCacheIdx6 = 6 + iCacheIdx; //mb - ST16 (&pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx ], iRef2Bytes); - ST16 (&pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx4], iRef2Bytes); + ST16 (&pCurDqLayer->pDec->pRefIndex[iListIdx][iMbXy][iScan4Idx ], iRef2Bytes); + ST16 (&pCurDqLayer->pDec->pRefIndex[iListIdx][iMbXy][iScan4Idx4], iRef2Bytes); //cache ST16 (&pRefIndex[iListIdx][iCacheIdx ], iRef2Bytes); ST16 (&pRefIndex[iListIdx][iCacheIdx6], iRef2Bytes); @@ -141,8 +142,10 @@ void UpdateP8x8RefIdxCabac (PDqLayer pCurDqLayer, int8_t pRefIndex[LIST_A][30], const int8_t iListIdx) { int32_t iMbXy = pCurDqLayer->iMbXyIndex; const uint8_t iScan4Idx = g_kuiScan4[iPartIdx]; - pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx] = pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx + 1] = - pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx + 4] = pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx + 5] = iRef; + pCurDqLayer->pDec->pRefIndex[iListIdx][iMbXy][iScan4Idx] = pCurDqLayer->pDec->pRefIndex[iListIdx][iMbXy][iScan4Idx + 1] + = + pCurDqLayer->pDec->pRefIndex[iListIdx][iMbXy][iScan4Idx + 4] = pCurDqLayer->pDec->pRefIndex[iListIdx][iMbXy][iScan4Idx + + 5] = iRef; } void UpdateP8x8DirectCabac (PDqLayer pCurDqLayer, int32_t iPartIdx) { @@ -476,7 +479,7 @@ int32_t ParseIntraPredModeChromaCabac (PWelsDecoderContext pCtx, uint8_t uiNeigh uint32_t uiCode; int32_t iIdxA, iIdxB, iCtxInc; int8_t* pChromaPredMode = pCtx->pCurDqLayer->pChromaPredMode; - uint32_t* pMbType = pCtx->pCurDqLayer->pMbType; + uint32_t* pMbType = pCtx->pCurDqLayer->pDec->pMbType; int32_t iLeftAvail = uiNeighAvail & 0x04; int32_t iTopAvail = uiNeighAvail & 0x01; @@ -532,7 +535,9 @@ int32_t ParseInterPMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN pRefCount[0] = pSliceHeader->uiRefCount[0]; pRefCount[1] = pSliceHeader->uiRefCount[1]; - switch (pCurDqLayer->pMbType[iMbXy]) { + bool bIsPending = GetThreadCount (pCtx) > 1; + + switch (pCurDqLayer->pDec->pMbType[iMbXy]) { case MB_TYPE_16x16: { iPartIdx = 0; WELS_READ_VERIFY (ParseRefIdxCabac (pCtx, pNeighAvail, pNonZeroCount, pRefIndex, 0, LIST_0, iPartIdx, pRefCount[0], 0, @@ -547,7 +552,7 @@ int32_t ParseInterPMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN } } pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRef[0]] - && ppRefPic[iRef[0]]->bIsComplete); + && (ppRefPic[iRef[0]]->bIsComplete || bIsPending)); PredMv (pMotionVector, pRefIndex, LIST_0, 0, 4, iRef[0], pMv); WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 0, pMvd[0])); WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 1, pMvd[1])); @@ -573,7 +578,7 @@ int32_t ParseInterPMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN } } pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRef[i]] - && ppRefPic[iRef[i]]->bIsComplete); + && (ppRefPic[iRef[i]]->bIsComplete || bIsPending)); UpdateP16x8RefIdxCabac (pCurDqLayer, pRefIndex, iPartIdx, iRef[i], LIST_0); } for (i = 0; i < 2; i++) { @@ -603,7 +608,7 @@ int32_t ParseInterPMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN } } pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRef[i]] - && ppRefPic[iRef[i]]->bIsComplete); + && (ppRefPic[iRef[i]]->bIsComplete || bIsPending)); UpdateP8x16RefIdxCabac (pCurDqLayer, pRefIndex, iPartIdx, iRef[i], LIST_0); } for (i = 0; i < 2; i++) { @@ -651,7 +656,7 @@ int32_t ParseInterPMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN } } pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[pRefIdx[i]] - && ppRefPic[pRefIdx[i]]->bIsComplete); + && (ppRefPic[pRefIdx[i]]->bIsComplete || bIsPending)); UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, pRefIdx[i], LIST_0); } //mv @@ -677,8 +682,8 @@ int32_t ParseInterPMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN if (SUB_MB_TYPE_8x8 == uiSubMbType) { ST32 ((pMv + 2), LD32 (pMv)); ST32 ((pMvd + 2), LD32 (pMvd)); - ST64 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx], LD64 (pMv)); - ST64 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx + 4], LD64 (pMv)); + ST64 (pCurDqLayer->pDec->pMv[0][iMbXy][iScan4Idx], LD64 (pMv)); + ST64 (pCurDqLayer->pDec->pMv[0][iMbXy][iScan4Idx + 4], LD64 (pMv)); ST64 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx], LD64 (pMvd)); ST64 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx + 4], LD64 (pMvd)); ST64 (pMotionVector[0][iCacheIdx ], LD64 (pMv)); @@ -688,13 +693,13 @@ int32_t ParseInterPMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN } else if (SUB_MB_TYPE_8x4 == uiSubMbType) { ST32 ((pMv + 2), LD32 (pMv)); ST32 ((pMvd + 2), LD32 (pMvd)); - ST64 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx ], LD64 (pMv)); + ST64 (pCurDqLayer->pDec->pMv[0][iMbXy][iScan4Idx ], LD64 (pMv)); ST64 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx ], LD64 (pMvd)); ST64 (pMotionVector[0][iCacheIdx ], LD64 (pMv)); ST64 (pMvdCache[0][iCacheIdx ], LD64 (pMvd)); } else if (SUB_MB_TYPE_4x8 == uiSubMbType) { - ST32 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx ], LD32 (pMv)); - ST32 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx + 4], LD32 (pMv)); + ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][iScan4Idx ], LD32 (pMv)); + ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][iScan4Idx + 4], LD32 (pMv)); ST32 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx ], LD32 (pMvd)); ST32 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx + 4], LD32 (pMvd)); ST32 (pMotionVector[0][iCacheIdx ], LD32 (pMv)); @@ -702,7 +707,7 @@ int32_t ParseInterPMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN ST32 (pMvdCache[0][iCacheIdx ], LD32 (pMvd)); ST32 (pMvdCache[0][iCacheIdx + 6], LD32 (pMvd)); } else { //SUB_MB_TYPE_4x4 - ST32 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx ], LD32 (pMv)); + ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][iScan4Idx ], LD32 (pMv)); ST32 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx ], LD32 (pMvd)); ST32 (pMotionVector[0][iCacheIdx ], LD32 (pMv)); ST32 (pMvdCache[0][iCacheIdx ], LD32 (pMvd)); @@ -734,22 +739,23 @@ int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN pRefCount[0] = pSliceHeader->uiRefCount[0]; pRefCount[1] = pSliceHeader->uiRefCount[1]; - MbType mbType = pCurDqLayer->pMbType[iMbXy]; + MbType mbType = pCurDqLayer->pDec->pMbType[iMbXy]; + + bool bIsPending = GetThreadCount (pCtx) > 1; if (IS_DIRECT (mbType)) { int16_t pMvDirect[LIST_A][2] = { { 0, 0 }, { 0, 0 } }; + SubMbType subMbType; if (pSliceHeader->iDirectSpatialMvPredFlag) { //predict direct spatial mv - SubMbType subMbType; int32_t ret = PredMvBDirectSpatial (pCtx, pMvDirect, iRef, subMbType); if (ret != ERR_NONE) { return ret; } } else { //temporal direct 16x16 mode - ComputeColocated (pCtx); - int32_t ret = PredBDirectTemporal (pCtx, pMvDirect, iRef); + int32_t ret = PredBDirectTemporal (pCtx, pMvDirect, iRef, subMbType); if (ret != ERR_NONE) { return ret; } @@ -773,7 +779,7 @@ int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN } } pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (pCtx->sRefPic.pRefList[listIdx][iRef[listIdx]] - && pCtx->sRefPic.pRefList[listIdx][iRef[listIdx]]->bIsComplete); + && (pCtx->sRefPic.pRefList[listIdx][iRef[listIdx]]->bIsComplete || bIsPending)); } } for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { @@ -810,7 +816,7 @@ int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN } } pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (pCtx->sRefPic.pRefList[listIdx][ref_idx] - && pCtx->sRefPic.pRefList[listIdx][ref_idx]->bIsComplete); + && (pCtx->sRefPic.pRefList[listIdx][ref_idx]->bIsComplete || bIsPending)); } UpdateP16x8RefIdxCabac (pCurDqLayer, pRefIndex, iPartIdx, ref_idx, listIdx); ref_idx_list[listIdx][i] = ref_idx; @@ -854,7 +860,7 @@ int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN } } pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (pCtx->sRefPic.pRefList[listIdx][ref_idx] - && pCtx->sRefPic.pRefList[listIdx][ref_idx]->bIsComplete); + && (pCtx->sRefPic.pRefList[listIdx][ref_idx]->bIsComplete || bIsPending)); } UpdateP8x16RefIdxCabac (pCurDqLayer, pRefIndex, iPartIdx, ref_idx, listIdx); ref_idx_list[listIdx][i] = ref_idx; @@ -883,11 +889,18 @@ int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN uint32_t uiSubMbType; //sub_mb_type, partition int16_t pMvDirect[LIST_A][2] = { {0, 0}, {0, 0} }; + if (pCtx->sRefPic.pRefList[LIST_1][0] == NULL) { + SLogContext* pLogCtx = & (pCtx->sLogCtx); + WelsLog (pLogCtx, WELS_LOG_ERROR, "Colocated Ref Picture for B-Slice is lost, B-Slice decoding cannot be continued!"); + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_DATA, ERR_INFO_REFERENCE_PIC_LOST); + } + bool bIsLongRef = pCtx->sRefPic.pRefList[LIST_1][0]->bIsLongRef; + const int32_t ref0Count = WELS_MIN (pSliceHeader->uiRefCount[LIST_0], pCtx->sRefPic.uiRefCount[LIST_0]); bool has_direct_called = false; SubMbType directSubMbType = 0; for (int32_t i = 0; i < 4; i++) { WELS_READ_VERIFY (ParseBSubMBTypeCabac (pCtx, pNeighAvail, uiSubMbType)); - if (uiSubMbType > 13) { //invalid sub_mb_type + if (uiSubMbType >= 13) { //invalid sub_mb_type return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_SUB_MB_TYPE); } // pCurDqLayer->pSubMbType[iMbXy][i] = g_ksInterBSubMbTypeInfo[uiSubMbType].iType; @@ -908,8 +921,7 @@ int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN } else { //temporal direct mode - ComputeColocated (pCtx); - int32_t ret = PredBDirectTemporal (pCtx, pMvDirect, iRef); + int32_t ret = PredBDirectTemporal (pCtx, pMvDirect, iRef, directSubMbType); if (ret != ERR_NONE) { return ret; } @@ -926,151 +938,31 @@ int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN } } for (int32_t i = 0; i < 4; i++) { //Direct 8x8 Ref and mv - int16_t iIdx8 = i << 2; if (IS_DIRECT (pCurDqLayer->pSubMbType[iMbXy][i])) { - - int8_t iPartCount = pSubPartCount[i]; - int16_t iPartIdx, iBlockW = pPartW[i]; - uint8_t iScan4Idx, iCacheIdx, iColocIdx; - iCacheIdx = g_kuiCache30ScanIdx[iIdx8]; - - if (!pSliceHeader->iDirectSpatialMvPredFlag) { + if (pSliceHeader->iDirectSpatialMvPredFlag) { + FillSpatialDirect8x8Mv (pCurDqLayer, iIdx8, pSubPartCount[i], pPartW[i], directSubMbType, bIsLongRef, pMvDirect, iRef, + pMotionVector, pMvdCache); + } else { + int16_t (*mvColoc)[2] = pCurDqLayer->iColocMv[LIST_0]; iRef[LIST_1] = 0; - if (pCurDqLayer->iColocIntra[g_kuiScan4[iIdx8]]) { + iRef[LIST_0] = 0; + const uint8_t uiColoc4Idx = g_kuiScan4[iIdx8]; + if (!pCurDqLayer->iColocIntra[uiColoc4Idx]) { iRef[LIST_0] = 0; - } else { - if (pCurDqLayer->iColocRefIndex[LIST_0][iIdx8] >= 0) { - iRef[LIST_0] = pCurDqLayer->iColocRefIndex[LIST_0][iIdx8]; + int8_t colocRefIndexL0 = pCurDqLayer->iColocRefIndex[LIST_0][uiColoc4Idx]; + if (colocRefIndexL0 >= 0) { + iRef[LIST_0] = MapColToList0 (pCtx, colocRefIndexL0, ref0Count); } else { - iRef[LIST_0] = pCurDqLayer->iColocRefIndex[LIST_1][iIdx8]; - } - } - } - for (int32_t j = 0; j < iPartCount; j++) { - iPartIdx = iIdx8 + j * iBlockW; - iColocIdx = g_kuiScan4[iPartIdx]; - iScan4Idx = g_kuiScan4[iPartIdx]; - iCacheIdx = g_kuiCache30ScanIdx[iPartIdx]; - - if (pSliceHeader->iDirectSpatialMvPredFlag) { - int16_t pMV[4] = { 0 }; - if (IS_SUB_8x8 (pCurDqLayer->pSubMbType[iMbXy][i])) { - * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_0]; - ST32 ((pMV + 2), LD32 (pMV)); - ST64 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx], LD64 (pMV)); - ST64 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx + 4], LD64 (pMV)); - ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0); - ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0); - ST64 (pMotionVector[LIST_0][iCacheIdx], LD64 (pMV)); - ST64 (pMotionVector[LIST_0][iCacheIdx + 6], LD64 (pMV)); - ST64 (pMvdCache[LIST_0][iCacheIdx], 0); - ST64 (pMvdCache[LIST_0][iCacheIdx + 6], 0); - * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_1]; - ST32 ((pMV + 2), LD32 (pMV)); - ST64 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx], LD64 (pMV)); - ST64 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx + 4], LD64 (pMV)); - ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0); - ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0); - ST64 (pMotionVector[LIST_1][iCacheIdx], LD64 (pMV)); - ST64 (pMotionVector[LIST_1][iCacheIdx + 6], LD64 (pMV)); - ST64 (pMvdCache[LIST_1][iCacheIdx], 0); - ST64 (pMvdCache[LIST_1][iCacheIdx + 6], 0); - } else { //SUB_4x4 - * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_0]; - ST32 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx], LD32 (pMV)); - ST32 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0); - ST32 (pMotionVector[LIST_0][iCacheIdx], LD32 (pMV)); - ST32 (pMvdCache[LIST_0][iCacheIdx], 0); - * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_1]; - ST32 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx], LD32 (pMV)); - ST32 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0); - ST32 (pMotionVector[LIST_1][iCacheIdx], LD32 (pMV)); - ST32 (pMvdCache[LIST_1][iCacheIdx], 0); - } - - if ((* (int32_t*)pMvDirect[LIST_0] | * (int32_t*)pMvDirect[LIST_1])) { - bool bIsLongRef = pCtx->sRefPic.pRefList[LIST_1][0]->bIsLongRef; - uint32_t uiColZeroFlag = (0 == pCurDqLayer->iColocIntra[iColocIdx]) && !bIsLongRef && - (pCurDqLayer->iColocRefIndex[LIST_0][iColocIdx] == 0 || (pCurDqLayer->iColocRefIndex[LIST_0][iColocIdx] < 0 - && pCurDqLayer->iColocRefIndex[LIST_1][iColocIdx] == 0)); - const int16_t (*mvColoc)[2] = pCurDqLayer->iColocRefIndex[LIST_0][iColocIdx] == 0 ? pCurDqLayer->iColocMv[LIST_0] : - pCurDqLayer->iColocMv[LIST_1]; - const int16_t* mv = mvColoc[iColocIdx]; - if (IS_SUB_8x8 (pCurDqLayer->pSubMbType[iMbXy][i])) { - if (uiColZeroFlag && ((unsigned) (mv[0] + 1) <= 2 && (unsigned) (mv[1] + 1) <= 2)) { - if (iRef[LIST_0] == 0) { - ST64 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx], 0); - ST64 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx + 4], 0); - ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0); - ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0); - ST64 (pMotionVector[LIST_0][iCacheIdx], 0); - ST64 (pMotionVector[LIST_0][iCacheIdx + 6], 0); - ST64 (pMvdCache[LIST_0][iCacheIdx], 0); - ST64 (pMvdCache[LIST_0][iCacheIdx + 6], 0); - } - - if (iRef[LIST_1] == 0) { - ST64 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx], 0); - ST64 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx + 4], 0); - ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0); - ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0); - ST64 (pMotionVector[LIST_1][iCacheIdx], 0); - ST64 (pMotionVector[LIST_1][iCacheIdx + 6], 0); - ST64 (pMvdCache[LIST_1][iCacheIdx], 0); - ST64 (pMvdCache[LIST_1][iCacheIdx + 6], 0); - } - } - } else { - if (uiColZeroFlag && ((unsigned) (mv[0] + 1) <= 2 && (unsigned) (mv[1] + 1) <= 2)) { - if (iRef[LIST_0] == 0) { - ST32 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx], 0); - ST32 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0); - ST32 (pMotionVector[LIST_0][iCacheIdx], 0); - ST32 (pMvdCache[LIST_0][iCacheIdx], 0); - } - if (iRef[LIST_1] == 0) { - ST32 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx], 0); - ST32 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0); - ST32 (pMotionVector[LIST_1][iCacheIdx], 0); - ST32 (pMvdCache[LIST_1][iCacheIdx], 0); - } - } - } - } - } else { - int16_t (*mvColoc)[2] = pCurDqLayer->iColocMv[LIST_0]; - int16_t* mv = mvColoc[iColocIdx]; - int16_t pMV[4] = { 0 }; - int16_t iMvp[LIST_A][2]; - if (IS_SUB_8x8 (pCurDqLayer->pSubMbType[iMbXy][i])) { - iMvp[LIST_0][0] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[0] + 128) >> 8; - iMvp[LIST_0][1] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[1] + 128) >> 8; - ST32 (pMV, LD32 (iMvp[LIST_0])); - ST32 ((pMV + 2), LD32 (iMvp[LIST_0])); - ST64 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx], LD64 (pMV)); - ST64 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx + 4], LD64 (pMV)); - ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0); - ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0); - iMvp[LIST_1][0] -= iMvp[LIST_0][0] - mv[0]; - iMvp[LIST_1][1] -= iMvp[LIST_0][0] - mv[1]; - ST32 (pMV, LD32 (iMvp[LIST_1])); - ST32 ((pMV + 2), LD32 (iMvp[LIST_1])); - ST64 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx], LD64 (pMV)); - ST64 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx + 4], LD64 (pMV)); - ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0); - ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0); - } else { //SUB_4x4 - iMvp[LIST_0][0] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[0] + 128) >> 8; - iMvp[LIST_0][1] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[1] + 128) >> 8; - ST32 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx], LD32 (iMvp[LIST_0])); - ST32 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0); - iMvp[LIST_1][0] -= iMvp[LIST_0][0] - mv[0]; - iMvp[LIST_1][1] -= iMvp[LIST_0][0] - mv[1]; - ST32 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx], LD32 (iMvp[LIST_1])); - ST32 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0); + mvColoc = pCurDqLayer->iColocMv[LIST_1]; } } + Update8x8RefIdx (pCurDqLayer, iIdx8, LIST_0, iRef[LIST_0]); + Update8x8RefIdx (pCurDqLayer, iIdx8, LIST_1, iRef[LIST_1]); + UpdateP8x8RefCacheIdxCabac (pRefIndex, iIdx8, LIST_0, iRef[LIST_0]); + UpdateP8x8RefCacheIdxCabac (pRefIndex, iIdx8, LIST_1, iRef[LIST_1]); + FillTemporalDirect8x8Mv (pCurDqLayer, iIdx8, pSubPartCount[i], pPartW[i], directSubMbType, iRef, mvColoc, pMotionVector, + pMvdCache); } } } @@ -1083,18 +975,8 @@ int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN int8_t iref = REF_NOT_IN_LIST; if (IS_DIRECT (subMbType)) { if (pSliceHeader->iDirectSpatialMvPredFlag) { - iref = iRef[listIdx]; - } else { - iref = 0; - if (listIdx == LIST_0) { - if (!pCurDqLayer->iColocIntra[g_kuiScan4[iIdx8]]) { - if (pCurDqLayer->iColocRefIndex[LIST_0][iIdx8] >= 0) { - iref = pCurDqLayer->iColocRefIndex[LIST_0][iIdx8]; - } else { - iref = pCurDqLayer->iColocRefIndex[LIST_1][iIdx8]; - } - } - } + Update8x8RefIdx (pCurDqLayer, iIdx8, listIdx, iRef[listIdx]); + ref_idx_list[listIdx][i] = iRef[listIdx]; } UpdateP8x8DirectCabac (pCurDqLayer, iIdx8); } else { @@ -1112,31 +994,32 @@ int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN } } pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (pCtx->sRefPic.pRefList[listIdx][iref] - && pCtx->sRefPic.pRefList[listIdx][iref]->bIsComplete); + && (pCtx->sRefPic.pRefList[listIdx][iref]->bIsComplete || bIsPending)); } + Update8x8RefIdx (pCurDqLayer, iIdx8, listIdx, iref); + ref_idx_list[listIdx][i] = iref; } - UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, iref, listIdx); - ref_idx_list[listIdx][i] = iref; } } //mv for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { for (int32_t i = 0; i < 4; i++) { - int8_t iPartCount = pSubPartCount[i]; - int16_t iPartIdx, iBlockW = pPartW[i]; - uint8_t iScan4Idx, iCacheIdx; + int16_t iIdx8 = i << 2; - iCacheIdx = g_kuiCache30ScanIdx[i << 2]; + uint32_t subMbType = pCurDqLayer->pSubMbType[iMbXy][i]; + if (IS_DIRECT (subMbType) && !pSliceHeader->iDirectSpatialMvPredFlag) + continue; int8_t iref = ref_idx_list[listIdx][i]; - pRefIndex[listIdx][iCacheIdx] = pRefIndex[listIdx][iCacheIdx + 1] - = pRefIndex[listIdx][iCacheIdx + 6] = pRefIndex[listIdx][iCacheIdx + 7] = iref; + UpdateP8x8RefCacheIdxCabac (pRefIndex, iIdx8, listIdx, iref); - uint32_t subMbType = pCurDqLayer->pSubMbType[iMbXy][i]; - if (IS_DIRECT (subMbType)) { + if (IS_DIRECT (subMbType)) continue; - } + bool is_dir = IS_DIR (subMbType, 0, listIdx) > 0; + int8_t iPartCount = pSubPartCount[i]; + int16_t iBlockW = pPartW[i]; + uint8_t iScan4Idx, iCacheIdx; for (int32_t j = 0; j < iPartCount; j++) { iPartIdx = (i << 2) + j * iBlockW; iScan4Idx = g_kuiScan4[iPartIdx]; @@ -1154,8 +1037,8 @@ int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN if (IS_SUB_8x8 (subMbType)) { //MB_TYPE_8x8 ST32 ((pMv + 2), LD32 (pMv)); ST32 ((pMvd + 2), LD32 (pMvd)); - ST64 (pCurDqLayer->pMv[listIdx][iMbXy][iScan4Idx], LD64 (pMv)); - ST64 (pCurDqLayer->pMv[listIdx][iMbXy][iScan4Idx + 4], LD64 (pMv)); + ST64 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][iScan4Idx], LD64 (pMv)); + ST64 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][iScan4Idx + 4], LD64 (pMv)); ST64 (pCurDqLayer->pMvd[listIdx][iMbXy][iScan4Idx], LD64 (pMvd)); ST64 (pCurDqLayer->pMvd[listIdx][iMbXy][iScan4Idx + 4], LD64 (pMvd)); ST64 (pMotionVector[listIdx][iCacheIdx], LD64 (pMv)); @@ -1163,13 +1046,13 @@ int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN ST64 (pMvdCache[listIdx][iCacheIdx], LD64 (pMvd)); ST64 (pMvdCache[listIdx][iCacheIdx + 6], LD64 (pMvd)); } else if (IS_SUB_4x4 (subMbType)) { //MB_TYPE_4x4 - ST32 (pCurDqLayer->pMv[listIdx][iMbXy][iScan4Idx], LD32 (pMv)); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][iScan4Idx], LD32 (pMv)); ST32 (pCurDqLayer->pMvd[listIdx][iMbXy][iScan4Idx], LD32 (pMvd)); ST32 (pMotionVector[listIdx][iCacheIdx], LD32 (pMv)); ST32 (pMvdCache[listIdx][iCacheIdx], LD32 (pMvd)); } else if (IS_SUB_4x8 (subMbType)) { //MB_TYPE_4x8 5, 7, 9 - ST32 (pCurDqLayer->pMv[listIdx][iMbXy][iScan4Idx], LD32 (pMv)); - ST32 (pCurDqLayer->pMv[listIdx][iMbXy][iScan4Idx + 4], LD32 (pMv)); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][iScan4Idx], LD32 (pMv)); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][iScan4Idx + 4], LD32 (pMv)); ST32 (pCurDqLayer->pMvd[listIdx][iMbXy][iScan4Idx], LD32 (pMvd)); ST32 (pCurDqLayer->pMvd[listIdx][iMbXy][iScan4Idx + 4], LD32 (pMvd)); ST32 (pMotionVector[listIdx][iCacheIdx], LD32 (pMv)); @@ -1179,7 +1062,7 @@ int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pN } else { //MB_TYPE_8x4 4, 6, 8 ST32 ((pMv + 2), LD32 (pMv)); ST32 ((pMvd + 2), LD32 (pMvd)); - ST64 (pCurDqLayer->pMv[listIdx][iMbXy][iScan4Idx], LD64 (pMv)); + ST64 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][iScan4Idx], LD64 (pMv)); ST64 (pCurDqLayer->pMvd[listIdx][iMbXy][iScan4Idx], LD64 (pMvd)); ST64 (pMotionVector[listIdx][iCacheIdx], LD64 (pMv)); ST64 (pMvdCache[listIdx][iCacheIdx], LD64 (pMvd)); @@ -1201,7 +1084,7 @@ int32_t ParseRefIdxCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t uiCode; int32_t iIdxA = 0, iIdxB = 0; int32_t iCtxInc = 0; - int8_t* pRefIdxInMB = pCtx->pCurDqLayer->pRefIndex[iListIdx][pCtx->pCurDqLayer->iMbXyIndex]; + int8_t* pRefIdxInMB = pCtx->pCurDqLayer->pDec->pRefIndex[iListIdx][pCtx->pCurDqLayer->iMbXyIndex]; int8_t* pDirect = pCtx->pCurDqLayer->pDirect[pCtx->pCurDqLayer->iMbXyIndex]; if (iZOrderIdx == 0) { iIdxB = (pNeighAvail->iTopAvail && pNeighAvail->iTopType != MB_TYPE_INTRA_PCM @@ -1394,7 +1277,7 @@ int32_t ParseCbfInfoCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNzcCache, int3 int32_t iTopBlkXy = iCurrBlkXy - pCtx->pCurDqLayer->iMbWidth; //default value: MB neighboring int32_t iLeftBlkXy = iCurrBlkXy - 1; //default value: MB neighboring uint16_t* pCbfDc = pCtx->pCurDqLayer->pCbfDc; - uint32_t* pMbType = pCtx->pCurDqLayer->pMbType; + uint32_t* pMbType = pCtx->pCurDqLayer->pDec->pMbType; int32_t iCtxInc; uiCbfBit = 0; nA = nB = (int8_t)!!IS_INTRA (pMbType[iCurrBlkXy]); @@ -1617,12 +1500,12 @@ int32_t ParseIPCMInfoCabac (PWelsDecoderContext pCtx) { int32_t i; PWelsCabacDecEngine pCabacDecEngine = pCtx->pCabacDecEngine; SBitStringAux* pBsAux = pCtx->pCurDqLayer->pBitStringAux; - SDqLayer* pCurLayer = pCtx->pCurDqLayer; - int32_t iDstStrideLuma = pCurLayer->pDec->iLinesize[0]; - int32_t iDstStrideChroma = pCurLayer->pDec->iLinesize[1]; - int32_t iMbX = pCurLayer->iMbX; - int32_t iMbY = pCurLayer->iMbY; - int32_t iMbXy = pCurLayer->iMbXyIndex; + SDqLayer* pCurDqLayer = pCtx->pCurDqLayer; + int32_t iDstStrideLuma = pCurDqLayer->pDec->iLinesize[0]; + int32_t iDstStrideChroma = pCurDqLayer->pDec->iLinesize[1]; + int32_t iMbX = pCurDqLayer->iMbX; + int32_t iMbY = pCurDqLayer->iMbY; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; int32_t iMbOffsetLuma = (iMbX + iMbY * iDstStrideLuma) << 4; int32_t iMbOffsetChroma = (iMbX + iMbY * iDstStrideChroma) << 3; @@ -1633,7 +1516,7 @@ int32_t ParseIPCMInfoCabac (PWelsDecoderContext pCtx) { uint8_t* pPtrSrc; - pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA_PCM; + pCurDqLayer->pDec->pMbType[iMbXy] = MB_TYPE_INTRA_PCM; RestoreCabacDecEngineToBS (pCabacDecEngine, pBsAux); intX_t iBytesLeft = pBsAux->pEndBuf - pBsAux->pCurBuf; if (iBytesLeft < 384) { @@ -1660,13 +1543,19 @@ int32_t ParseIPCMInfoCabac (PWelsDecoderContext pCtx) { pBsAux->pCurBuf += 384; - pCurLayer->pLumaQp[iMbXy] = 0; - pCurLayer->pChromaQp[iMbXy][0] = pCurLayer->pChromaQp[iMbXy][1] = 0; - memset (pCurLayer->pNzc[iMbXy], 16, sizeof (pCurLayer->pNzc[iMbXy])); + pCurDqLayer->pLumaQp[iMbXy] = 0; + pCurDqLayer->pChromaQp[iMbXy][0] = pCurDqLayer->pChromaQp[iMbXy][1] = 0; + memset (pCurDqLayer->pNzc[iMbXy], 16, sizeof (pCurDqLayer->pNzc[iMbXy])); //step 4: cabac engine init WELS_READ_VERIFY (InitReadBits (pBsAux, 1)); WELS_READ_VERIFY (InitCabacDecEngineFromBS (pCabacDecEngine, pBsAux)); return ERR_NONE; } +void UpdateP8x8RefCacheIdxCabac (int8_t pRefIndex[LIST_A][30], const int16_t& iPartIdx, + const int32_t& listIdx, const int8_t& iRef) { + const uint8_t uiCacheIdx = g_kuiCache30ScanIdx[iPartIdx]; + pRefIndex[listIdx][uiCacheIdx] = pRefIndex[listIdx][uiCacheIdx + 1] = pRefIndex[listIdx][uiCacheIdx + 6] = + pRefIndex[listIdx][uiCacheIdx + 7] = iRef; +} } diff --git a/chromium/third_party/openh264/src/codec/decoder/core/src/parse_mb_syn_cavlc.cpp b/chromium/third_party/openh264/src/codec/decoder/core/src/parse_mb_syn_cavlc.cpp index fc44b65e7e3..ba3a46d58a4 100644 --- a/chromium/third_party/openh264/src/codec/decoder/core/src/parse_mb_syn_cavlc.cpp +++ b/chromium/third_party/openh264/src/codec/decoder/core/src/parse_mb_syn_cavlc.cpp @@ -53,20 +53,20 @@ typedef struct TagReadBitsCache { uint8_t* pBuf; } SReadBitsCache; -void GetNeighborAvailMbType (PWelsNeighAvail pNeighAvail, PDqLayer pCurLayer) { +void GetNeighborAvailMbType (PWelsNeighAvail pNeighAvail, PDqLayer pCurDqLayer) { int32_t iCurSliceIdc, iTopSliceIdc, iLeftTopSliceIdc, iRightTopSliceIdc, iLeftSliceIdc; int32_t iCurXy, iTopXy = 0, iLeftXy = 0, iLeftTopXy = 0, iRightTopXy = 0; int32_t iCurX, iCurY; - iCurXy = pCurLayer->iMbXyIndex; - iCurX = pCurLayer->iMbX; - iCurY = pCurLayer->iMbY; - iCurSliceIdc = pCurLayer->pSliceIdc[iCurXy]; + iCurXy = pCurDqLayer->iMbXyIndex; + iCurX = pCurDqLayer->iMbX; + iCurY = pCurDqLayer->iMbY; + iCurSliceIdc = pCurDqLayer->pSliceIdc[iCurXy]; if (iCurX != 0) { iLeftXy = iCurXy - 1; - iLeftSliceIdc = pCurLayer->pSliceIdc[iLeftXy]; + iLeftSliceIdc = pCurDqLayer->pSliceIdc[iLeftXy]; pNeighAvail->iLeftAvail = (iLeftSliceIdc == iCurSliceIdc); - pNeighAvail->iLeftCbp = pNeighAvail->iLeftAvail ? pCurLayer->pCbp[iLeftXy] : 0; + pNeighAvail->iLeftCbp = pNeighAvail->iLeftAvail ? pCurDqLayer->pCbp[iLeftXy] : 0; } else { pNeighAvail->iLeftAvail = 0; pNeighAvail->iLeftTopAvail = 0; @@ -74,20 +74,20 @@ void GetNeighborAvailMbType (PWelsNeighAvail pNeighAvail, PDqLayer pCurLayer) { } if (iCurY != 0) { - iTopXy = iCurXy - pCurLayer->iMbWidth; - iTopSliceIdc = pCurLayer->pSliceIdc[iTopXy]; + iTopXy = iCurXy - pCurDqLayer->iMbWidth; + iTopSliceIdc = pCurDqLayer->pSliceIdc[iTopXy]; pNeighAvail->iTopAvail = (iTopSliceIdc == iCurSliceIdc); - pNeighAvail->iTopCbp = pNeighAvail->iTopAvail ? pCurLayer->pCbp[iTopXy] : 0; + pNeighAvail->iTopCbp = pNeighAvail->iTopAvail ? pCurDqLayer->pCbp[iTopXy] : 0; if (iCurX != 0) { iLeftTopXy = iTopXy - 1; - iLeftTopSliceIdc = pCurLayer->pSliceIdc[iLeftTopXy]; + iLeftTopSliceIdc = pCurDqLayer->pSliceIdc[iLeftTopXy]; pNeighAvail->iLeftTopAvail = (iLeftTopSliceIdc == iCurSliceIdc); } else { pNeighAvail->iLeftTopAvail = 0; } - if (iCurX != (pCurLayer->iMbWidth - 1)) { + if (iCurX != (pCurDqLayer->iMbWidth - 1)) { iRightTopXy = iTopXy + 1; - iRightTopSliceIdc = pCurLayer->pSliceIdc[iRightTopXy]; + iRightTopSliceIdc = pCurDqLayer->pSliceIdc[iRightTopXy]; pNeighAvail->iRightTopAvail = (iRightTopSliceIdc == iCurSliceIdc); } else { pNeighAvail->iRightTopAvail = 0; @@ -99,18 +99,18 @@ void GetNeighborAvailMbType (PWelsNeighAvail pNeighAvail, PDqLayer pCurLayer) { pNeighAvail->iTopCbp = 0; } - pNeighAvail->iLeftType = (pNeighAvail->iLeftAvail ? pCurLayer->pMbType[iLeftXy] : 0); - pNeighAvail->iTopType = (pNeighAvail->iTopAvail ? pCurLayer->pMbType[iTopXy] : 0); - pNeighAvail->iLeftTopType = (pNeighAvail->iLeftTopAvail ? pCurLayer->pMbType[iLeftTopXy] : 0); - pNeighAvail->iRightTopType = (pNeighAvail->iRightTopAvail ? pCurLayer->pMbType[iRightTopXy] : 0); + pNeighAvail->iLeftType = (pNeighAvail->iLeftAvail ? pCurDqLayer->pDec->pMbType[iLeftXy] : 0); + pNeighAvail->iTopType = (pNeighAvail->iTopAvail ? pCurDqLayer->pDec->pMbType[iTopXy] : 0); + pNeighAvail->iLeftTopType = (pNeighAvail->iLeftTopAvail ? pCurDqLayer->pDec->pMbType[iLeftTopXy] : 0); + pNeighAvail->iRightTopType = (pNeighAvail->iRightTopAvail ? pCurDqLayer->pDec->pMbType[iRightTopXy] : 0); } void WelsFillCacheNonZeroCount (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, - PDqLayer pCurLayer) { //no matter slice type, intra_pred_constrained_flag - int32_t iCurXy = pCurLayer->iMbXyIndex; + PDqLayer pCurDqLayer) { //no matter slice type, intra_pred_constrained_flag + int32_t iCurXy = pCurDqLayer->iMbXyIndex; int32_t iTopXy = 0; int32_t iLeftXy = 0; if (pNeighAvail->iTopAvail) { - iTopXy = iCurXy - pCurLayer->iMbWidth; + iTopXy = iCurXy - pCurDqLayer->iMbWidth; } if (pNeighAvail->iLeftAvail) { iLeftXy = iCurXy - 1; @@ -118,10 +118,10 @@ void WelsFillCacheNonZeroCount (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCo //stuff non_zero_coeff_count from pNeighAvail(left and top) if (pNeighAvail->iTopAvail) { - ST32 (&pNonZeroCount[1], LD32 (&pCurLayer->pNzc[iTopXy][12])); + ST32 (&pNonZeroCount[1], LD32 (&pCurDqLayer->pNzc[iTopXy][12])); pNonZeroCount[0] = pNonZeroCount[5] = pNonZeroCount[29] = 0; - ST16 (&pNonZeroCount[6], LD16 (&pCurLayer->pNzc[iTopXy][20])); - ST16 (&pNonZeroCount[30], LD16 (&pCurLayer->pNzc[iTopXy][22])); + ST16 (&pNonZeroCount[6], LD16 (&pCurDqLayer->pNzc[iTopXy][20])); + ST16 (&pNonZeroCount[30], LD16 (&pCurDqLayer->pNzc[iTopXy][22])); } else { ST32 (&pNonZeroCount[1], 0xFFFFFFFFU); pNonZeroCount[0] = pNonZeroCount[5] = pNonZeroCount[29] = 0xFF; @@ -130,15 +130,15 @@ void WelsFillCacheNonZeroCount (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCo } if (pNeighAvail->iLeftAvail) { - pNonZeroCount[8 * 1] = pCurLayer->pNzc[iLeftXy][3]; - pNonZeroCount[8 * 2] = pCurLayer->pNzc[iLeftXy][7]; - pNonZeroCount[8 * 3] = pCurLayer->pNzc[iLeftXy][11]; - pNonZeroCount[8 * 4] = pCurLayer->pNzc[iLeftXy][15]; - - pNonZeroCount[5 + 8 * 1] = pCurLayer->pNzc[iLeftXy][17]; - pNonZeroCount[5 + 8 * 2] = pCurLayer->pNzc[iLeftXy][21]; - pNonZeroCount[5 + 8 * 4] = pCurLayer->pNzc[iLeftXy][19]; - pNonZeroCount[5 + 8 * 5] = pCurLayer->pNzc[iLeftXy][23]; + pNonZeroCount[8 * 1] = pCurDqLayer->pNzc[iLeftXy][3]; + pNonZeroCount[8 * 2] = pCurDqLayer->pNzc[iLeftXy][7]; + pNonZeroCount[8 * 3] = pCurDqLayer->pNzc[iLeftXy][11]; + pNonZeroCount[8 * 4] = pCurDqLayer->pNzc[iLeftXy][15]; + + pNonZeroCount[5 + 8 * 1] = pCurDqLayer->pNzc[iLeftXy][17]; + pNonZeroCount[5 + 8 * 2] = pCurDqLayer->pNzc[iLeftXy][21]; + pNonZeroCount[5 + 8 * 4] = pCurDqLayer->pNzc[iLeftXy][19]; + pNonZeroCount[5 + 8 * 5] = pCurDqLayer->pNzc[iLeftXy][23]; } else { pNonZeroCount[8 * 1] = pNonZeroCount[8 * 2] = @@ -153,16 +153,16 @@ void WelsFillCacheNonZeroCount (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCo } } void WelsFillCacheConstrain1IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode, - PDqLayer pCurLayer) { //no matter slice type - int32_t iCurXy = pCurLayer->iMbXyIndex; + PDqLayer pCurDqLayer) { //no matter slice type + int32_t iCurXy = pCurDqLayer->iMbXyIndex; int32_t iTopXy = 0; int32_t iLeftXy = 0; //stuff non_zero_coeff_count from pNeighAvail(left and top) - WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurLayer); + WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurDqLayer); if (pNeighAvail->iTopAvail) { - iTopXy = iCurXy - pCurLayer->iMbWidth; + iTopXy = iCurXy - pCurDqLayer->iMbWidth; } if (pNeighAvail->iLeftAvail) { iLeftXy = iCurXy - 1; @@ -170,7 +170,7 @@ void WelsFillCacheConstrain1IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNon //intraNxN_pred_mode if (pNeighAvail->iTopAvail && IS_INTRANxN (pNeighAvail->iTopType)) { //top - ST32 (pIntraPredMode + 1, LD32 (&pCurLayer->pIntraPredMode[iTopXy][0])); + ST32 (pIntraPredMode + 1, LD32 (&pCurDqLayer->pIntraPredMode[iTopXy][0])); } else { int32_t iPred; if (IS_INTRA16x16 (pNeighAvail->iTopType) || (MB_TYPE_INTRA_PCM == pNeighAvail->iTopType)) @@ -181,10 +181,10 @@ void WelsFillCacheConstrain1IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNon } if (pNeighAvail->iLeftAvail && IS_INTRANxN (pNeighAvail->iLeftType)) { //left - pIntraPredMode[ 0 + 8 ] = pCurLayer->pIntraPredMode[iLeftXy][4]; - pIntraPredMode[ 0 + 8 * 2] = pCurLayer->pIntraPredMode[iLeftXy][5]; - pIntraPredMode[ 0 + 8 * 3] = pCurLayer->pIntraPredMode[iLeftXy][6]; - pIntraPredMode[ 0 + 8 * 4] = pCurLayer->pIntraPredMode[iLeftXy][3]; + pIntraPredMode[ 0 + 8 ] = pCurDqLayer->pIntraPredMode[iLeftXy][4]; + pIntraPredMode[ 0 + 8 * 2] = pCurDqLayer->pIntraPredMode[iLeftXy][5]; + pIntraPredMode[ 0 + 8 * 3] = pCurDqLayer->pIntraPredMode[iLeftXy][6]; + pIntraPredMode[ 0 + 8 * 4] = pCurDqLayer->pIntraPredMode[iLeftXy][3]; } else { int8_t iPred; if (IS_INTRA16x16 (pNeighAvail->iLeftType) || (MB_TYPE_INTRA_PCM == pNeighAvail->iLeftType)) @@ -199,16 +199,16 @@ void WelsFillCacheConstrain1IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNon } void WelsFillCacheConstrain0IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode, - PDqLayer pCurLayer) { //no matter slice type - int32_t iCurXy = pCurLayer->iMbXyIndex; + PDqLayer pCurDqLayer) { //no matter slice type + int32_t iCurXy = pCurDqLayer->iMbXyIndex; int32_t iTopXy = 0; int32_t iLeftXy = 0; //stuff non_zero_coeff_count from pNeighAvail(left and top) - WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurLayer); + WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurDqLayer); if (pNeighAvail->iTopAvail) { - iTopXy = iCurXy - pCurLayer->iMbWidth; + iTopXy = iCurXy - pCurDqLayer->iMbWidth; } if (pNeighAvail->iLeftAvail) { iLeftXy = iCurXy - 1; @@ -216,7 +216,7 @@ void WelsFillCacheConstrain0IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNon //intra4x4_pred_mode if (pNeighAvail->iTopAvail && IS_INTRANxN (pNeighAvail->iTopType)) { //top - ST32 (pIntraPredMode + 1, LD32 (&pCurLayer->pIntraPredMode[iTopXy][0])); + ST32 (pIntraPredMode + 1, LD32 (&pCurDqLayer->pIntraPredMode[iTopXy][0])); } else { int32_t iPred; if (pNeighAvail->iTopAvail) @@ -227,10 +227,10 @@ void WelsFillCacheConstrain0IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNon } if (pNeighAvail->iLeftAvail && IS_INTRANxN (pNeighAvail->iLeftType)) { //left - pIntraPredMode[ 0 + 8 * 1] = pCurLayer->pIntraPredMode[iLeftXy][4]; - pIntraPredMode[ 0 + 8 * 2] = pCurLayer->pIntraPredMode[iLeftXy][5]; - pIntraPredMode[ 0 + 8 * 3] = pCurLayer->pIntraPredMode[iLeftXy][6]; - pIntraPredMode[ 0 + 8 * 4] = pCurLayer->pIntraPredMode[iLeftXy][3]; + pIntraPredMode[ 0 + 8 * 1] = pCurDqLayer->pIntraPredMode[iLeftXy][4]; + pIntraPredMode[ 0 + 8 * 2] = pCurDqLayer->pIntraPredMode[iLeftXy][5]; + pIntraPredMode[ 0 + 8 * 3] = pCurDqLayer->pIntraPredMode[iLeftXy][6]; + pIntraPredMode[ 0 + 8 * 4] = pCurDqLayer->pIntraPredMode[iLeftXy][3]; } else { int8_t iPred; if (pNeighAvail->iLeftAvail) @@ -245,52 +245,52 @@ void WelsFillCacheConstrain0IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNon } void WelsFillCacheInterCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int16_t iMvArray[LIST_A][30][MV_A], - int16_t iMvdCache[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PDqLayer pCurLayer) { - int32_t iCurXy = pCurLayer->iMbXyIndex; + int16_t iMvdCache[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PDqLayer pCurDqLayer) { + int32_t iCurXy = pCurDqLayer->iMbXyIndex; int32_t iTopXy = 0; int32_t iLeftXy = 0; int32_t iLeftTopXy = 0; int32_t iRightTopXy = 0; - PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer; + PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; int32_t listCount = 1; if (pSliceHeader->eSliceType == B_SLICE) { listCount = 2; } //stuff non_zero_coeff_count from pNeighAvail(left and top) - WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurLayer); + WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurDqLayer); if (pNeighAvail->iTopAvail) { - iTopXy = iCurXy - pCurLayer->iMbWidth; + iTopXy = iCurXy - pCurDqLayer->iMbWidth; } if (pNeighAvail->iLeftAvail) { iLeftXy = iCurXy - 1; } if (pNeighAvail->iLeftTopAvail) { - iLeftTopXy = iCurXy - 1 - pCurLayer->iMbWidth; + iLeftTopXy = iCurXy - 1 - pCurDqLayer->iMbWidth; } if (pNeighAvail->iRightTopAvail) { - iRightTopXy = iCurXy + 1 - pCurLayer->iMbWidth; + iRightTopXy = iCurXy + 1 - pCurDqLayer->iMbWidth; } for (int32_t listIdx = 0; listIdx < listCount; ++listIdx) { //stuff mv_cache and iRefIdxArray from left and top (inter) if (pNeighAvail->iLeftAvail && IS_INTER (pNeighAvail->iLeftType)) { - ST32 (iMvArray[listIdx][6], LD32 (pCurLayer->pMv[listIdx][iLeftXy][3])); - ST32 (iMvArray[listIdx][12], LD32 (pCurLayer->pMv[listIdx][iLeftXy][7])); - ST32 (iMvArray[listIdx][18], LD32 (pCurLayer->pMv[listIdx][iLeftXy][11])); - ST32 (iMvArray[listIdx][24], LD32 (pCurLayer->pMv[listIdx][iLeftXy][15])); - - ST32 (iMvdCache[listIdx][6], LD32 (pCurLayer->pMvd[listIdx][iLeftXy][3])); - ST32 (iMvdCache[listIdx][12], LD32 (pCurLayer->pMvd[listIdx][iLeftXy][7])); - ST32 (iMvdCache[listIdx][18], LD32 (pCurLayer->pMvd[listIdx][iLeftXy][11])); - ST32 (iMvdCache[listIdx][24], LD32 (pCurLayer->pMvd[listIdx][iLeftXy][15])); - - iRefIdxArray[listIdx][6] = pCurLayer->pRefIndex[listIdx][iLeftXy][3]; - iRefIdxArray[listIdx][12] = pCurLayer->pRefIndex[listIdx][iLeftXy][7]; - iRefIdxArray[listIdx][18] = pCurLayer->pRefIndex[listIdx][iLeftXy][11]; - iRefIdxArray[listIdx][24] = pCurLayer->pRefIndex[listIdx][iLeftXy][15]; + ST32 (iMvArray[listIdx][6], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftXy][3])); + ST32 (iMvArray[listIdx][12], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftXy][7])); + ST32 (iMvArray[listIdx][18], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftXy][11])); + ST32 (iMvArray[listIdx][24], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftXy][15])); + + ST32 (iMvdCache[listIdx][6], LD32 (pCurDqLayer->pMvd[listIdx][iLeftXy][3])); + ST32 (iMvdCache[listIdx][12], LD32 (pCurDqLayer->pMvd[listIdx][iLeftXy][7])); + ST32 (iMvdCache[listIdx][18], LD32 (pCurDqLayer->pMvd[listIdx][iLeftXy][11])); + ST32 (iMvdCache[listIdx][24], LD32 (pCurDqLayer->pMvd[listIdx][iLeftXy][15])); + + iRefIdxArray[listIdx][6] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][3]; + iRefIdxArray[listIdx][12] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][7]; + iRefIdxArray[listIdx][18] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][11]; + iRefIdxArray[listIdx][24] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][15]; } else { ST32 (iMvArray[listIdx][6], 0); ST32 (iMvArray[listIdx][12], 0); @@ -316,9 +316,9 @@ void WelsFillCacheInterCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCoun } } if (pNeighAvail->iLeftTopAvail && IS_INTER (pNeighAvail->iLeftTopType)) { - ST32 (iMvArray[listIdx][0], LD32 (pCurLayer->pMv[listIdx][iLeftTopXy][15])); - ST32 (iMvdCache[listIdx][0], LD32 (pCurLayer->pMvd[listIdx][iLeftTopXy][15])); - iRefIdxArray[listIdx][0] = pCurLayer->pRefIndex[listIdx][iLeftTopXy][15]; + ST32 (iMvArray[listIdx][0], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftTopXy][15])); + ST32 (iMvdCache[listIdx][0], LD32 (pCurDqLayer->pMvd[listIdx][iLeftTopXy][15])); + iRefIdxArray[listIdx][0] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftTopXy][15]; } else { ST32 (iMvArray[listIdx][0], 0); ST32 (iMvdCache[listIdx][0], 0); @@ -330,11 +330,11 @@ void WelsFillCacheInterCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCoun } if (pNeighAvail->iTopAvail && IS_INTER (pNeighAvail->iTopType)) { - ST64 (iMvArray[listIdx][1], LD64 (pCurLayer->pMv[listIdx][iTopXy][12])); - ST64 (iMvArray[listIdx][3], LD64 (pCurLayer->pMv[listIdx][iTopXy][14])); - ST64 (iMvdCache[listIdx][1], LD64 (pCurLayer->pMvd[listIdx][iTopXy][12])); - ST64 (iMvdCache[listIdx][3], LD64 (pCurLayer->pMvd[listIdx][iTopXy][14])); - ST32 (&iRefIdxArray[listIdx][1], LD32 (&pCurLayer->pRefIndex[listIdx][iTopXy][12])); + ST64 (iMvArray[listIdx][1], LD64 (pCurDqLayer->pDec->pMv[listIdx][iTopXy][12])); + ST64 (iMvArray[listIdx][3], LD64 (pCurDqLayer->pDec->pMv[listIdx][iTopXy][14])); + ST64 (iMvdCache[listIdx][1], LD64 (pCurDqLayer->pMvd[listIdx][iTopXy][12])); + ST64 (iMvdCache[listIdx][3], LD64 (pCurDqLayer->pMvd[listIdx][iTopXy][14])); + ST32 (&iRefIdxArray[listIdx][1], LD32 (&pCurDqLayer->pDec->pRefIndex[listIdx][iTopXy][12])); } else { ST64 (iMvArray[listIdx][1], 0); ST64 (iMvArray[listIdx][3], 0); @@ -354,9 +354,9 @@ void WelsFillCacheInterCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCoun } if (pNeighAvail->iRightTopAvail && IS_INTER (pNeighAvail->iRightTopType)) { - ST32 (iMvArray[listIdx][5], LD32 (pCurLayer->pMv[listIdx][iRightTopXy][12])); - ST32 (iMvdCache[listIdx][5], LD32 (pCurLayer->pMvd[listIdx][iRightTopXy][12])); - iRefIdxArray[listIdx][5] = pCurLayer->pRefIndex[listIdx][iRightTopXy][12]; + ST32 (iMvArray[listIdx][5], LD32 (pCurDqLayer->pDec->pMv[listIdx][iRightTopXy][12])); + ST32 (iMvdCache[listIdx][5], LD32 (pCurDqLayer->pMvd[listIdx][iRightTopXy][12])); + iRefIdxArray[listIdx][5] = pCurDqLayer->pDec->pRefIndex[listIdx][iRightTopXy][12]; } else { ST32 (iMvArray[listIdx][5], 0); if (0 == pNeighAvail->iRightTopAvail) { //not available @@ -385,151 +385,160 @@ void WelsFillCacheInterCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCoun } } -void WelsFillDirectCacheCabac (PWelsNeighAvail pNeighAvail, int8_t iDirect[30], PDqLayer pCurLayer) { +void WelsFillDirectCacheCabac (PWelsNeighAvail pNeighAvail, int8_t iDirect[30], PDqLayer pCurDqLayer) { - int32_t iCurXy = pCurLayer->iMbXyIndex; + int32_t iCurXy = pCurDqLayer->iMbXyIndex; int32_t iTopXy = 0; int32_t iLeftXy = 0; int32_t iLeftTopXy = 0; int32_t iRightTopXy = 0; if (pNeighAvail->iTopAvail) { - iTopXy = iCurXy - pCurLayer->iMbWidth; + iTopXy = iCurXy - pCurDqLayer->iMbWidth; } if (pNeighAvail->iLeftAvail) { iLeftXy = iCurXy - 1; } if (pNeighAvail->iLeftTopAvail) { - iLeftTopXy = iCurXy - 1 - pCurLayer->iMbWidth; + iLeftTopXy = iCurXy - 1 - pCurDqLayer->iMbWidth; } if (pNeighAvail->iRightTopAvail) { - iRightTopXy = iCurXy + 1 - pCurLayer->iMbWidth; + iRightTopXy = iCurXy + 1 - pCurDqLayer->iMbWidth; } memset (iDirect, 0, 30); if (pNeighAvail->iLeftAvail && IS_INTER (pNeighAvail->iLeftType)) { - iDirect[6] = pCurLayer->pDirect[iLeftXy][3]; - iDirect[12] = pCurLayer->pDirect[iLeftXy][7]; - iDirect[18] = pCurLayer->pDirect[iLeftXy][11]; - iDirect[24] = pCurLayer->pDirect[iLeftXy][15]; + iDirect[6] = pCurDqLayer->pDirect[iLeftXy][3]; + iDirect[12] = pCurDqLayer->pDirect[iLeftXy][7]; + iDirect[18] = pCurDqLayer->pDirect[iLeftXy][11]; + iDirect[24] = pCurDqLayer->pDirect[iLeftXy][15]; } if (pNeighAvail->iLeftTopAvail && IS_INTER (pNeighAvail->iLeftTopType)) { - iDirect[0] = pCurLayer->pDirect[iLeftTopXy][15]; + iDirect[0] = pCurDqLayer->pDirect[iLeftTopXy][15]; } if (pNeighAvail->iTopAvail && IS_INTER (pNeighAvail->iTopType)) { - ST32 (&iDirect[1], LD32 (&pCurLayer->pDirect[iTopXy][12])); + ST32 (&iDirect[1], LD32 (&pCurDqLayer->pDirect[iTopXy][12])); } if (pNeighAvail->iRightTopAvail && IS_INTER (pNeighAvail->iRightTopType)) { - iDirect[5] = pCurLayer->pDirect[iRightTopXy][12]; + iDirect[5] = pCurDqLayer->pDirect[iRightTopXy][12]; } //right-top 4*4 block unavailable } void WelsFillCacheInter (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, - int16_t iMvArray[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PDqLayer pCurLayer) { - int32_t iCurXy = pCurLayer->iMbXyIndex; + int16_t iMvArray[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PDqLayer pCurDqLayer) { + int32_t iCurXy = pCurDqLayer->iMbXyIndex; int32_t iTopXy = 0; int32_t iLeftXy = 0; int32_t iLeftTopXy = 0; int32_t iRightTopXy = 0; + PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer; + PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; + int32_t listCount = 1; + if (pSliceHeader->eSliceType == B_SLICE) { + listCount = 2; + } + //stuff non_zero_coeff_count from pNeighAvail(left and top) - WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurLayer); + WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurDqLayer); if (pNeighAvail->iTopAvail) { - iTopXy = iCurXy - pCurLayer->iMbWidth; + iTopXy = iCurXy - pCurDqLayer->iMbWidth; } if (pNeighAvail->iLeftAvail) { iLeftXy = iCurXy - 1; } if (pNeighAvail->iLeftTopAvail) { - iLeftTopXy = iCurXy - 1 - pCurLayer->iMbWidth; + iLeftTopXy = iCurXy - 1 - pCurDqLayer->iMbWidth; } if (pNeighAvail->iRightTopAvail) { - iRightTopXy = iCurXy + 1 - pCurLayer->iMbWidth; + iRightTopXy = iCurXy + 1 - pCurDqLayer->iMbWidth; } - //stuff mv_cache and iRefIdxArray from left and top (inter) - if (pNeighAvail->iLeftAvail && IS_INTER (pNeighAvail->iLeftType)) { - ST32 (iMvArray[0][ 6], LD32 (pCurLayer->pMv[0][iLeftXy][ 3])); - ST32 (iMvArray[0][12], LD32 (pCurLayer->pMv[0][iLeftXy][ 7])); - ST32 (iMvArray[0][18], LD32 (pCurLayer->pMv[0][iLeftXy][11])); - ST32 (iMvArray[0][24], LD32 (pCurLayer->pMv[0][iLeftXy][15])); - iRefIdxArray[0][ 6] = pCurLayer->pRefIndex[0][iLeftXy][ 3]; - iRefIdxArray[0][12] = pCurLayer->pRefIndex[0][iLeftXy][ 7]; - iRefIdxArray[0][18] = pCurLayer->pRefIndex[0][iLeftXy][11]; - iRefIdxArray[0][24] = pCurLayer->pRefIndex[0][iLeftXy][15]; - } else { - ST32 (iMvArray[0][ 6], 0); - ST32 (iMvArray[0][12], 0); - ST32 (iMvArray[0][18], 0); - ST32 (iMvArray[0][24], 0); - - if (0 == pNeighAvail->iLeftAvail) { //not available - iRefIdxArray[0][ 6] = - iRefIdxArray[0][12] = - iRefIdxArray[0][18] = - iRefIdxArray[0][24] = REF_NOT_AVAIL; - } else { //available but is intra mb type - iRefIdxArray[0][ 6] = - iRefIdxArray[0][12] = - iRefIdxArray[0][18] = - iRefIdxArray[0][24] = REF_NOT_IN_LIST; + for (int32_t listIdx = 0; listIdx < listCount; ++listIdx) { + //stuff mv_cache and iRefIdxArray from left and top (inter) + if (pNeighAvail->iLeftAvail && IS_INTER (pNeighAvail->iLeftType)) { + ST32 (iMvArray[listIdx][6], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftXy][3])); + ST32 (iMvArray[listIdx][12], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftXy][7])); + ST32 (iMvArray[listIdx][18], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftXy][11])); + ST32 (iMvArray[listIdx][24], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftXy][15])); + iRefIdxArray[listIdx][6] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][3]; + iRefIdxArray[listIdx][12] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][7]; + iRefIdxArray[listIdx][18] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][11]; + iRefIdxArray[listIdx][24] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftXy][15]; + } else { + ST32 (iMvArray[listIdx][6], 0); + ST32 (iMvArray[listIdx][12], 0); + ST32 (iMvArray[listIdx][18], 0); + ST32 (iMvArray[listIdx][24], 0); + + if (0 == pNeighAvail->iLeftAvail) { //not available + iRefIdxArray[listIdx][6] = + iRefIdxArray[listIdx][12] = + iRefIdxArray[listIdx][18] = + iRefIdxArray[listIdx][24] = REF_NOT_AVAIL; + } else { //available but is intra mb type + iRefIdxArray[listIdx][6] = + iRefIdxArray[listIdx][12] = + iRefIdxArray[listIdx][18] = + iRefIdxArray[listIdx][24] = REF_NOT_IN_LIST; + } } - } - if (pNeighAvail->iLeftTopAvail && IS_INTER (pNeighAvail->iLeftTopType)) { - ST32 (iMvArray[0][0], LD32 (pCurLayer->pMv[0][iLeftTopXy][15])); - iRefIdxArray[0][0] = pCurLayer->pRefIndex[0][iLeftTopXy][15]; - } else { - ST32 (iMvArray[0][0], 0); - if (0 == pNeighAvail->iLeftTopAvail) { //not available - iRefIdxArray[0][0] = REF_NOT_AVAIL; - } else { //available but is intra mb type - iRefIdxArray[0][0] = REF_NOT_IN_LIST; + if (pNeighAvail->iLeftTopAvail && IS_INTER (pNeighAvail->iLeftTopType)) { + ST32 (iMvArray[listIdx][0], LD32 (pCurDqLayer->pDec->pMv[listIdx][iLeftTopXy][15])); + iRefIdxArray[listIdx][0] = pCurDqLayer->pDec->pRefIndex[listIdx][iLeftTopXy][15]; + } else { + ST32 (iMvArray[listIdx][0], 0); + if (0 == pNeighAvail->iLeftTopAvail) { //not available + iRefIdxArray[listIdx][0] = REF_NOT_AVAIL; + } else { //available but is intra mb type + iRefIdxArray[listIdx][0] = REF_NOT_IN_LIST; + } } - } - if (pNeighAvail->iTopAvail && IS_INTER (pNeighAvail->iTopType)) { - ST64 (iMvArray[0][1], LD64 (pCurLayer->pMv[0][iTopXy][12])); - ST64 (iMvArray[0][3], LD64 (pCurLayer->pMv[0][iTopXy][14])); - ST32 (&iRefIdxArray[0][1], LD32 (&pCurLayer->pRefIndex[0][iTopXy][12])); - } else { - ST64 (iMvArray[0][1], 0); - ST64 (iMvArray[0][3], 0); - if (0 == pNeighAvail->iTopAvail) { //not available - iRefIdxArray[0][1] = - iRefIdxArray[0][2] = - iRefIdxArray[0][3] = - iRefIdxArray[0][4] = REF_NOT_AVAIL; - } else { //available but is intra mb type - iRefIdxArray[0][1] = - iRefIdxArray[0][2] = - iRefIdxArray[0][3] = - iRefIdxArray[0][4] = REF_NOT_IN_LIST; + if (pNeighAvail->iTopAvail && IS_INTER (pNeighAvail->iTopType)) { + ST64 (iMvArray[listIdx][1], LD64 (pCurDqLayer->pDec->pMv[listIdx][iTopXy][12])); + ST64 (iMvArray[listIdx][3], LD64 (pCurDqLayer->pDec->pMv[listIdx][iTopXy][14])); + ST32 (&iRefIdxArray[listIdx][1], LD32 (&pCurDqLayer->pDec->pRefIndex[listIdx][iTopXy][12])); + } else { + ST64 (iMvArray[listIdx][1], 0); + ST64 (iMvArray[listIdx][3], 0); + if (0 == pNeighAvail->iTopAvail) { //not available + iRefIdxArray[listIdx][1] = + iRefIdxArray[listIdx][2] = + iRefIdxArray[listIdx][3] = + iRefIdxArray[listIdx][4] = REF_NOT_AVAIL; + } else { //available but is intra mb type + iRefIdxArray[listIdx][1] = + iRefIdxArray[listIdx][2] = + iRefIdxArray[listIdx][3] = + iRefIdxArray[listIdx][4] = REF_NOT_IN_LIST; + } } - } - if (pNeighAvail->iRightTopAvail && IS_INTER (pNeighAvail->iRightTopType)) { - ST32 (iMvArray[0][5], LD32 (pCurLayer->pMv[0][iRightTopXy][12])); - iRefIdxArray[0][5] = pCurLayer->pRefIndex[0][iRightTopXy][12]; - } else { - ST32 (iMvArray[0][5], 0); - if (0 == pNeighAvail->iRightTopAvail) { //not available - iRefIdxArray[0][5] = REF_NOT_AVAIL; - } else { //available but is intra mb type - iRefIdxArray[0][5] = REF_NOT_IN_LIST; + if (pNeighAvail->iRightTopAvail && IS_INTER (pNeighAvail->iRightTopType)) { + ST32 (iMvArray[listIdx][5], LD32 (pCurDqLayer->pDec->pMv[listIdx][iRightTopXy][12])); + iRefIdxArray[listIdx][5] = pCurDqLayer->pDec->pRefIndex[listIdx][iRightTopXy][12]; + } else { + ST32 (iMvArray[listIdx][5], 0); + if (0 == pNeighAvail->iRightTopAvail) { //not available + iRefIdxArray[listIdx][5] = REF_NOT_AVAIL; + } else { //available but is intra mb type + iRefIdxArray[listIdx][5] = REF_NOT_IN_LIST; + } } + //right-top 4*4 block unavailable + ST32 (iMvArray[listIdx][9], 0); + ST32 (iMvArray[listIdx][21], 0); + ST32 (iMvArray[listIdx][11], 0); + ST32 (iMvArray[listIdx][17], 0); + ST32 (iMvArray[listIdx][23], 0); + iRefIdxArray[listIdx][9] = + iRefIdxArray[listIdx][21] = + iRefIdxArray[listIdx][11] = + iRefIdxArray[listIdx][17] = + iRefIdxArray[listIdx][23] = REF_NOT_AVAIL; } - //right-top 4*4 block unavailable - ST32 (iMvArray[0][ 9], 0); - ST32 (iMvArray[0][21], 0); - ST32 (iMvArray[0][11], 0); - ST32 (iMvArray[0][17], 0); - ST32 (iMvArray[0][23], 0); - iRefIdxArray[0][ 9] = - iRefIdxArray[0][21] = - iRefIdxArray[0][11] = - iRefIdxArray[0][17] = - iRefIdxArray[0][23] = REF_NOT_AVAIL; } int32_t PredIntra4x4Mode (int8_t* pIntraPredMode, int32_t iIdx4) { @@ -1074,7 +1083,9 @@ int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][M iRefCount[0] = pSliceHeader->uiRefCount[0]; iRefCount[1] = pSliceHeader->uiRefCount[1]; - switch (pCurDqLayer->pMbType[iMbXy]) { + bool bIsPending = GetThreadCount (pCtx) > 1; + + switch (pCurDqLayer->pDec->pMbType[iMbXy]) { case MB_TYPE_16x16: { int32_t iRefIdx = 0; if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag) { @@ -1096,7 +1107,7 @@ int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][M } } pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRefIdx] - && ppRefPic[iRefIdx]->bIsComplete); + && (ppRefPic[iRefIdx]->bIsComplete || bIsPending)); } else { WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. "); return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP); @@ -1137,7 +1148,7 @@ int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][M } } pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRefIdx[i]] - && ppRefPic[iRefIdx[i]]->bIsComplete); + && (ppRefPic[iRefIdx[i]]->bIsComplete || bIsPending)); } for (i = 0; i < 2; i++) { PredInter16x8Mv (iMvArray, iRefIdxArray, LIST_0, i << 3, iRefIdx[i], iMv); @@ -1174,7 +1185,7 @@ int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][M } } pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRefIdx[i]] - && ppRefPic[iRefIdx[i]]->bIsComplete); + && (ppRefPic[iRefIdx[i]]->bIsComplete || bIsPending)); } else { WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. "); return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP); @@ -1198,7 +1209,7 @@ int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][M int32_t iRefIdx[4] = {0}, iSubPartCount[4], iPartWidth[4]; uint32_t uiSubMbType; - if (MB_TYPE_8x8_REF0 == pCurDqLayer->pMbType[iMbXy]) { + if (MB_TYPE_8x8_REF0 == pCurDqLayer->pDec->pMbType[iMbXy]) { iRefCount[0] = iRefCount[1] = 1; } @@ -1226,8 +1237,8 @@ int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][M } //iRefIdxArray - if (MB_TYPE_8x8_REF0 == pCurDqLayer->pMbType[iMbXy]) { - memset (pCurDqLayer->pRefIndex[0][iMbXy], 0, 16); + if (MB_TYPE_8x8_REF0 == pCurDqLayer->pDec->pMbType[iMbXy]) { + memset (pCurDqLayer->pDec->pRefIndex[0][iMbXy], 0, 16); } else { for (i = 0; i < 4; i++) { int16_t iIndex8 = i << 2; @@ -1246,10 +1257,11 @@ int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][M } } pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRefIdx[i]] - && ppRefPic[iRefIdx[i]]->bIsComplete); + && (ppRefPic[iRefIdx[i]]->bIsComplete || bIsPending)); - pCurDqLayer->pRefIndex[0][iMbXy][uiScan4Idx ] = pCurDqLayer->pRefIndex[0][iMbXy][uiScan4Idx + 1] = - pCurDqLayer->pRefIndex[0][iMbXy][uiScan4Idx + 4] = pCurDqLayer->pRefIndex[0][iMbXy][uiScan4Idx + 5] = iRefIdx[i]; + pCurDqLayer->pDec->pRefIndex[0][iMbXy][uiScan4Idx ] = pCurDqLayer->pDec->pRefIndex[0][iMbXy][uiScan4Idx + 1] = + pCurDqLayer->pDec->pRefIndex[0][iMbXy][uiScan4Idx + 4] = pCurDqLayer->pDec->pRefIndex[0][iMbXy][uiScan4Idx + 5] = + iRefIdx[i]; } else { WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. "); return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP); @@ -1281,26 +1293,26 @@ int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][M iMv[1] += iCode; WELS_CHECK_SE_BOTH_WARNING (iMv[1], iMinVmv, iMaxVmv, "vertical mv"); if (SUB_MB_TYPE_8x8 == uiSubMbType) { - ST32 (pCurDqLayer->pMv[0][iMbXy][uiScan4Idx], LD32 (iMv)); - ST32 (pCurDqLayer->pMv[0][iMbXy][uiScan4Idx + 1], LD32 (iMv)); - ST32 (pCurDqLayer->pMv[0][iMbXy][uiScan4Idx + 4], LD32 (iMv)); - ST32 (pCurDqLayer->pMv[0][iMbXy][uiScan4Idx + 5], LD32 (iMv)); + ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx], LD32 (iMv)); + ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx + 1], LD32 (iMv)); + ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx + 4], LD32 (iMv)); + ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx + 5], LD32 (iMv)); ST32 (iMvArray[0][uiCacheIdx ], LD32 (iMv)); ST32 (iMvArray[0][uiCacheIdx + 1], LD32 (iMv)); ST32 (iMvArray[0][uiCacheIdx + 6], LD32 (iMv)); ST32 (iMvArray[0][uiCacheIdx + 7], LD32 (iMv)); } else if (SUB_MB_TYPE_8x4 == uiSubMbType) { - ST32 (pCurDqLayer->pMv[0][iMbXy][uiScan4Idx ], LD32 (iMv)); - ST32 (pCurDqLayer->pMv[0][iMbXy][uiScan4Idx + 1], LD32 (iMv)); + ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx ], LD32 (iMv)); + ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx + 1], LD32 (iMv)); ST32 (iMvArray[0][uiCacheIdx ], LD32 (iMv)); ST32 (iMvArray[0][uiCacheIdx + 1], LD32 (iMv)); } else if (SUB_MB_TYPE_4x8 == uiSubMbType) { - ST32 (pCurDqLayer->pMv[0][iMbXy][uiScan4Idx ], LD32 (iMv)); - ST32 (pCurDqLayer->pMv[0][iMbXy][uiScan4Idx + 4], LD32 (iMv)); + ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx ], LD32 (iMv)); + ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx + 4], LD32 (iMv)); ST32 (iMvArray[0][uiCacheIdx ], LD32 (iMv)); ST32 (iMvArray[0][uiCacheIdx + 6], LD32 (iMv)); } else { //SUB_MB_TYPE_4x4 == uiSubMbType - ST32 (pCurDqLayer->pMv[0][iMbXy][uiScan4Idx ], LD32 (iMv)); + ST32 (pCurDqLayer->pDec->pMv[0][iMbXy][uiScan4Idx ], LD32 (iMv)); ST32 (iMvArray[0][uiCacheIdx ], LD32 (iMv)); } } @@ -1313,5 +1325,401 @@ int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][M return ERR_NONE; } +int32_t ParseInterBInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][MV_A], + int8_t iRefIdxArray[LIST_A][30], PBitStringAux pBs) { + PSlice pSlice = &pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer; + PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader; + PPicture* ppRefPic[2]; + ppRefPic[LIST_0] = pCtx->sRefPic.pRefList[LIST_0]; + ppRefPic[LIST_1] = pCtx->sRefPic.pRefList[LIST_1]; + int8_t ref_idx_list[LIST_A][4]; + int8_t iRef[2] = { 0, 0 }; + int32_t iRefCount[2]; + PDqLayer pCurDqLayer = pCtx->pCurDqLayer; + int32_t iMbXy = pCurDqLayer->iMbXyIndex; + uint8_t iMotionPredFlag[LIST_A][4]; + int16_t iMv[2]; + uint32_t uiCode; + int32_t iCode; + int16_t iMinVmv = pSliceHeader->pSps->pSLevelLimits->iMinVmv; + int16_t iMaxVmv = pSliceHeader->pSps->pSLevelLimits->iMaxVmv; + memset (ref_idx_list, -1, LIST_A * 4); + memset (iMotionPredFlag, (pSlice->sSliceHeaderExt.bDefaultMotionPredFlag ? 1 : 0), LIST_A * 4); + iRefCount[0] = pSliceHeader->uiRefCount[0]; + iRefCount[1] = pSliceHeader->uiRefCount[1]; + bool bIsPending = GetThreadCount (pCtx) > 1; + + MbType mbType = pCurDqLayer->pDec->pMbType[iMbXy]; + if (IS_DIRECT (mbType)) { + + int16_t pMvDirect[LIST_A][2] = { { 0, 0 }, { 0, 0 } }; + SubMbType subMbType; + if (pSliceHeader->iDirectSpatialMvPredFlag) { + //predict direct spatial mv + int32_t ret = PredMvBDirectSpatial (pCtx, pMvDirect, iRef, subMbType); + if (ret != ERR_NONE) { + return ret; + } + } else { + //temporal direct 16x16 mode + int32_t ret = PredBDirectTemporal (pCtx, pMvDirect, iRef, subMbType); + if (ret != ERR_NONE) { + return ret; + } + } + } else if (IS_INTER_16x16 (mbType)) { + if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag) { + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + if (IS_DIR (mbType, 0, listIdx)) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //motion_prediction_flag_l0/l1[ mbPartIdx ] + iMotionPredFlag[listIdx][0] = uiCode; + } + } + } + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + if (IS_DIR (mbType, 0, listIdx)) { + if (iMotionPredFlag[listIdx][0] == 0) { + WELS_READ_VERIFY (BsGetTe0 (pBs, iRefCount[listIdx], &uiCode)); //motion_prediction_flag_l1[ mbPartIdx ] + ref_idx_list[listIdx][0] = uiCode; + // Security check: iRefIdx should be in range 0 to num_ref_idx_l0_active_minus1, includsive + // ref to standard section 7.4.5.1. iRefCount[0] is 1 + num_ref_idx_l0_active_minus1. + if ((ref_idx_list[listIdx][0] < 0) || (ref_idx_list[listIdx][0] >= iRefCount[listIdx]) + || (ppRefPic[listIdx][ref_idx_list[listIdx][0]] == NULL)) { //error ref_idx + pCtx->bMbRefConcealed = true; + if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) { + ref_idx_list[listIdx][0] = 0; + pCtx->iErrorCode |= dsBitstreamError; + } else { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX); + } + } + pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[listIdx][ref_idx_list[listIdx][0]] + && (ppRefPic[listIdx][ref_idx_list[listIdx][0]]->bIsComplete || bIsPending)); + } else { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. "); + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP); + } + } + } + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + if (IS_DIR (mbType, 0, listIdx)) { + PredMv (iMvArray, iRefIdxArray, listIdx, 0, 4, ref_idx_list[listIdx][0], iMv); + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l0[ mbPartIdx ][ 0 ][ compIdx ] + iMv[0] += iCode; + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l1[ mbPartIdx ][ 0 ][ compIdx ] + iMv[1] += iCode; + WELS_CHECK_SE_BOTH_WARNING (iMv[1], iMinVmv, iMaxVmv, "vertical mv"); + } else { + * (uint32_t*)iMv = 0; + } + UpdateP16x16MotionInfo (pCurDqLayer, listIdx, ref_idx_list[listIdx][0], iMv); + } + } else if (IS_INTER_16x8 (mbType)) { + if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag) { + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + for (int32_t i = 0; i < 2; ++i) { + if (IS_DIR (mbType, i, listIdx)) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //motion_prediction_flag_l0/l1[ mbPartIdx ] + iMotionPredFlag[listIdx][i] = uiCode; + } + } + } + } + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + for (int32_t i = 0; i < 2; ++i) { + if (IS_DIR (mbType, i, listIdx)) { + if (iMotionPredFlag[listIdx][i] == 0) { + WELS_READ_VERIFY (BsGetTe0 (pBs, iRefCount[listIdx], &uiCode)); //motion_prediction_flag_l1[ mbPartIdx ] + int32_t iRefIdx = uiCode; + // Security check: iRefIdx should be in range 0 to num_ref_idx_l0_active_minus1, includsive + // ref to standard section 7.4.5.1. iRefCount[0] is 1 + num_ref_idx_l0_active_minus1. + if ((iRefIdx < 0) || (iRefIdx >= iRefCount[listIdx]) || (ppRefPic[listIdx][iRefIdx] == NULL)) { //error ref_idx + pCtx->bMbRefConcealed = true; + if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) { + iRefIdx = 0; + pCtx->iErrorCode |= dsBitstreamError; + } else { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX); + } + } + ref_idx_list[listIdx][i] = iRefIdx; + pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[listIdx][iRefIdx] + && (ppRefPic[listIdx][iRefIdx]->bIsComplete || bIsPending)); + } else { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. "); + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP); + } + } + } + } + // Read mvd_L0 then mvd_L1 + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + // Partitions + for (int32_t i = 0; i < 2; i++) { + int iPartIdx = i << 3; + int32_t iRefIdx = ref_idx_list[listIdx][i]; + if (IS_DIR (mbType, i, listIdx)) { + PredInter16x8Mv (iMvArray, iRefIdxArray, listIdx, iPartIdx, iRefIdx, iMv); + + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l{0,1}[ mbPartIdx ][ listIdx ][x] + iMv[0] += iCode; + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l{0,1}[ mbPartIdx ][ listIdx ][y] + iMv[1] += iCode; + + WELS_CHECK_SE_BOTH_WARNING (iMv[1], iMinVmv, iMaxVmv, "vertical mv"); + } else { + * (uint32_t*)iMv = 0; + } + UpdateP16x8MotionInfo (pCurDqLayer, iMvArray, iRefIdxArray, listIdx, iPartIdx, iRefIdx, iMv); + } + } + } else if (IS_INTER_8x16 (mbType)) { + if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag) { + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + for (int32_t i = 0; i < 2; ++i) { + if (IS_DIR (mbType, i, listIdx)) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //motion_prediction_flag_l0/l1[ mbPartIdx ] + iMotionPredFlag[listIdx][i] = uiCode; + } + } + } + } + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + for (int32_t i = 0; i < 2; ++i) { + if (IS_DIR (mbType, i, listIdx)) { + if (iMotionPredFlag[listIdx][i] == 0) { + WELS_READ_VERIFY (BsGetTe0 (pBs, iRefCount[listIdx], &uiCode)); //motion_prediction_flag_l1[ mbPartIdx ] + int32_t iRefIdx = uiCode; + // Security check: iRefIdx should be in range 0 to num_ref_idx_l0_active_minus1, includsive + // ref to standard section 7.4.5.1. iRefCount[0] is 1 + num_ref_idx_l0_active_minus1. + if ((iRefIdx < 0) || (iRefIdx >= iRefCount[listIdx]) || (ppRefPic[listIdx][iRefIdx] == NULL)) { //error ref_idx + pCtx->bMbRefConcealed = true; + if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) { + iRefIdx = 0; + pCtx->iErrorCode |= dsBitstreamError; + } else { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX); + } + } + ref_idx_list[listIdx][i] = iRefIdx; + pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[listIdx][iRefIdx] + && (ppRefPic[listIdx][iRefIdx]->bIsComplete || bIsPending)); + } else { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. "); + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP); + } + } + } + } + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + for (int32_t i = 0; i < 2; i++) { + int iPartIdx = i << 2; + int32_t iRefIdx = ref_idx_list[listIdx][i]; + if (IS_DIR (mbType, i, listIdx)) { + PredInter8x16Mv (iMvArray, iRefIdxArray, listIdx, iPartIdx, iRefIdx, iMv); + + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l0[ mbPartIdx ][ 0 ][ compIdx ] + iMv[0] += iCode; + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l1[ mbPartIdx ][ 0 ][ compIdx ] + iMv[1] += iCode; + WELS_CHECK_SE_BOTH_WARNING (iMv[1], iMinVmv, iMaxVmv, "vertical mv"); + } else { + * (uint32_t*)iMv = 0; + } + UpdateP8x16MotionInfo (pCurDqLayer, iMvArray, iRefIdxArray, listIdx, iPartIdx, iRefIdx, iMv); + } + } + } else if (IS_Inter_8x8 (mbType)) { + int8_t pSubPartCount[4], pPartW[4]; + uint32_t uiSubMbType; + //sub_mb_type, partition + int16_t pMvDirect[LIST_A][2] = { { 0, 0 }, { 0, 0 } }; + if (pCtx->sRefPic.pRefList[LIST_1][0] == NULL) { + SLogContext* pLogCtx = & (pCtx->sLogCtx); + WelsLog (pLogCtx, WELS_LOG_ERROR, "Colocated Ref Picture for B-Slice is lost, B-Slice decoding cannot be continued!"); + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_DATA, ERR_INFO_REFERENCE_PIC_LOST); + } + bool bIsLongRef = pCtx->sRefPic.pRefList[LIST_1][0]->bIsLongRef; + const int32_t ref0Count = WELS_MIN (pSliceHeader->uiRefCount[LIST_0], pCtx->sRefPic.uiRefCount[LIST_0]); + bool has_direct_called = false; + SubMbType directSubMbType = 0; + + //uiSubMbType, partition + for (int32_t i = 0; i < 4; i++) { + WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //sub_mb_type[ mbPartIdx ] + uiSubMbType = uiCode; + if (uiSubMbType >= 13) { //invalid uiSubMbType + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_SUB_MB_TYPE); + } + pSubPartCount[i] = g_ksInterBSubMbTypeInfo[uiSubMbType].iPartCount; + pPartW[i] = g_ksInterBSubMbTypeInfo[uiSubMbType].iPartWidth; + + // Need modification when B picture add in, reference to 7.3.5 + if (pSubPartCount[i] > 1) + pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = false; + + if (IS_DIRECT (g_ksInterBSubMbTypeInfo[uiSubMbType].iType)) { + if (!has_direct_called) { + if (pSliceHeader->iDirectSpatialMvPredFlag) { + int32_t ret = PredMvBDirectSpatial (pCtx, pMvDirect, iRef, directSubMbType); + if (ret != ERR_NONE) { + return ret; + } + + } else { + //temporal direct mode + int32_t ret = PredBDirectTemporal (pCtx, pMvDirect, iRef, directSubMbType); + if (ret != ERR_NONE) { + return ret; + } + } + has_direct_called = true; + } + pCurDqLayer->pSubMbType[iMbXy][i] = directSubMbType; + if (IS_SUB_4x4 (pCurDqLayer->pSubMbType[iMbXy][i])) { + pSubPartCount[i] = 4; + pPartW[i] = 1; + } + } else { + pCurDqLayer->pSubMbType[iMbXy][i] = g_ksInterBSubMbTypeInfo[uiSubMbType].iType; + } + } + if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag) { + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + for (int32_t i = 0; i < 4; i++) { + bool is_dir = IS_DIR (pCurDqLayer->pSubMbType[iMbXy][i], 0, listIdx) > 0; + if (is_dir) { + WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //motion_prediction_flag_l0[ mbPartIdx ] + iMotionPredFlag[listIdx][i] = uiCode; + } + } + } + } + for (int32_t i = 0; i < 4; i++) { //Direct 8x8 Ref and mv + int16_t iIdx8 = i << 2; + if (IS_DIRECT (pCurDqLayer->pSubMbType[iMbXy][i])) { + if (pSliceHeader->iDirectSpatialMvPredFlag) { + FillSpatialDirect8x8Mv (pCurDqLayer, iIdx8, pSubPartCount[i], pPartW[i], directSubMbType, bIsLongRef, pMvDirect, iRef, + iMvArray, NULL); + } else { + int16_t (*mvColoc)[2] = pCurDqLayer->iColocMv[LIST_0]; + iRef[LIST_1] = 0; + iRef[LIST_0] = 0; + const uint8_t uiColoc4Idx = g_kuiScan4[iIdx8]; + if (!pCurDqLayer->iColocIntra[uiColoc4Idx]) { + iRef[LIST_0] = 0; + int8_t colocRefIndexL0 = pCurDqLayer->iColocRefIndex[LIST_0][uiColoc4Idx]; + if (colocRefIndexL0 >= 0) { + iRef[LIST_0] = MapColToList0 (pCtx, colocRefIndexL0, ref0Count); + } else { + mvColoc = pCurDqLayer->iColocMv[LIST_1]; + } + } + Update8x8RefIdx (pCurDqLayer, iIdx8, LIST_0, iRef[LIST_0]); + Update8x8RefIdx (pCurDqLayer, iIdx8, LIST_1, iRef[LIST_1]); + FillTemporalDirect8x8Mv (pCurDqLayer, iIdx8, pSubPartCount[i], pPartW[i], directSubMbType, iRef, mvColoc, iMvArray, + NULL); + } + } + } + //ref no-direct + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + for (int32_t i = 0; i < 4; i++) { + int16_t iIdx8 = i << 2; + int32_t subMbType = pCurDqLayer->pSubMbType[iMbXy][i]; + int8_t iref = REF_NOT_IN_LIST; + if (IS_DIRECT (subMbType)) { + if (pSliceHeader->iDirectSpatialMvPredFlag) { + Update8x8RefIdx (pCurDqLayer, iIdx8, listIdx, iRef[listIdx]); + ref_idx_list[listIdx][i] = iRef[listIdx]; + } + } else { + if (IS_DIR (subMbType, 0, listIdx)) { + if (iMotionPredFlag[listIdx][i] == 0) { + WELS_READ_VERIFY (BsGetTe0 (pBs, iRefCount[listIdx], &uiCode)); //ref_idx_l0[ mbPartIdx ] + iref = uiCode; + if ((iref < 0) || (iref >= iRefCount[listIdx]) || (ppRefPic[listIdx][iref] == NULL)) { //error ref_idx + pCtx->bMbRefConcealed = true; + if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) { + iref = 0; + pCtx->iErrorCode |= dsBitstreamError; + } else { + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX); + } + } + pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[listIdx][iref] + && (ppRefPic[listIdx][iref]->bIsComplete || bIsPending)); + } else { + WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. "); + return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP); + } + } + Update8x8RefIdx (pCurDqLayer, iIdx8, listIdx, iref); + ref_idx_list[listIdx][i] = iref; + } + } + } + //mv + for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { + for (int32_t i = 0; i < 4; i++) { + int8_t iPartCount = pSubPartCount[i]; + int16_t iPartIdx, iBlockW = pPartW[i]; + uint8_t uiScan4Idx, uiCacheIdx; + + uiCacheIdx = g_kuiCache30ScanIdx[i << 2]; + + int8_t iref = ref_idx_list[listIdx][i]; + iRefIdxArray[listIdx][uiCacheIdx] = iRefIdxArray[listIdx][uiCacheIdx + 1] = + iRefIdxArray[listIdx][uiCacheIdx + 6] = iRefIdxArray[listIdx][uiCacheIdx + 7] = iref; + + uint32_t subMbType = pCurDqLayer->pSubMbType[iMbXy][i]; + if (IS_DIRECT (subMbType)) { + continue; + } + bool is_dir = IS_DIR (subMbType, 0, listIdx) > 0; + for (int32_t j = 0; j < iPartCount; j++) { + iPartIdx = (i << 2) + j * iBlockW; + uiScan4Idx = g_kuiScan4[iPartIdx]; + uiCacheIdx = g_kuiCache30ScanIdx[iPartIdx]; + if (is_dir) { + PredMv (iMvArray, iRefIdxArray, listIdx, iPartIdx, iBlockW, iref, iMv); + + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l0[ mbPartIdx ][ subMbPartIdx ][ compIdx ] + iMv[0] += iCode; + WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l1[ mbPartIdx ][ subMbPartIdx ][ compIdx ] + iMv[1] += iCode; + WELS_CHECK_SE_BOTH_WARNING (iMv[1], iMinVmv, iMaxVmv, "vertical mv"); + } else { + * (uint32_t*)iMv = 0; + } + if (IS_SUB_8x8 (subMbType)) { //MB_TYPE_8x8 + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx], LD32 (iMv)); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx + 1], LD32 (iMv)); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx + 4], LD32 (iMv)); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx + 5], LD32 (iMv)); + ST32 (iMvArray[listIdx][uiCacheIdx], LD32 (iMv)); + ST32 (iMvArray[listIdx][uiCacheIdx + 1], LD32 (iMv)); + ST32 (iMvArray[listIdx][uiCacheIdx + 6], LD32 (iMv)); + ST32 (iMvArray[listIdx][uiCacheIdx + 7], LD32 (iMv)); + } else if (IS_SUB_8x4 (subMbType)) { + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx], LD32 (iMv)); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx + 1], LD32 (iMv)); + ST32 (iMvArray[listIdx][uiCacheIdx], LD32 (iMv)); + ST32 (iMvArray[listIdx][uiCacheIdx + 1], LD32 (iMv)); + } else if (IS_SUB_4x8 (subMbType)) { + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx], LD32 (iMv)); + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx + 4], LD32 (iMv)); + ST32 (iMvArray[listIdx][uiCacheIdx], LD32 (iMv)); + ST32 (iMvArray[listIdx][uiCacheIdx + 6], LD32 (iMv)); + } else { //SUB_MB_TYPE_4x4 == uiSubMbType + ST32 (pCurDqLayer->pDec->pMv[listIdx][iMbXy][uiScan4Idx], LD32 (iMv)); + ST32 (iMvArray[listIdx][uiCacheIdx], LD32 (iMv)); + } + } + } + } + } + return ERR_NONE; +} } // namespace WelsDec diff --git a/chromium/third_party/openh264/src/codec/decoder/core/src/pic_queue.cpp b/chromium/third_party/openh264/src/codec/decoder/core/src/pic_queue.cpp index a2c5e7f08d9..475df0ac0d9 100644 --- a/chromium/third_party/openh264/src/codec/decoder/core/src/pic_queue.cpp +++ b/chromium/third_party/openh264/src/codec/decoder/core/src/pic_queue.cpp @@ -106,13 +106,15 @@ PPicture AllocPicture (PWelsDecoderContext pCtx, const int32_t kiPicWidth, const pPic->iWidthInPixel = kiPicWidth; pPic->iHeightInPixel = kiPicHeight; pPic->iFrameNum = -1; - pPic->bAvailableFlag = true; + pPic->iRefCount = 0; uint32_t uiMbWidth = (kiPicWidth + 15) >> 4; uint32_t uiMbHeight = (kiPicHeight + 15) >> 4; uint32_t uiMbCount = uiMbWidth * uiMbHeight; - pPic->pMbType = (uint32_t*)pMa->WelsMallocz (uiMbCount * sizeof (uint32_t), - "pPic->pMbType"); + + pPic->pMbCorrectlyDecodedFlag = (bool*)pMa->WelsMallocz (uiMbCount * sizeof (bool), "pPic->pMbCorrectlyDecodedFlag"); + pPic->pNzc = GetThreadCount (pCtx) > 1 ? (int8_t (*)[24])pMa->WelsMallocz (uiMbCount * 24, "pPic->pNzc") : NULL; + pPic->pMbType = (uint32_t*)pMa->WelsMallocz (uiMbCount * sizeof (uint32_t), "pPic->pMbType"); pPic->pMv[LIST_0] = (int16_t (*)[16][2])pMa->WelsMallocz (uiMbCount * sizeof ( int16_t) * MV_A * MB_BLOCK4x4_NUM, "pPic->pMv[]"); pPic->pMv[LIST_1] = (int16_t (*)[16][2])pMa->WelsMallocz (uiMbCount * sizeof ( @@ -121,6 +123,15 @@ PPicture AllocPicture (PWelsDecoderContext pCtx, const int32_t kiPicWidth, const int8_t) * MB_BLOCK4x4_NUM, "pCtx->sMb.pRefIndex[]"); pPic->pRefIndex[LIST_1] = (int8_t (*)[16])pMa->WelsMallocz (uiMbCount * sizeof ( int8_t) * MB_BLOCK4x4_NUM, "pCtx->sMb.pRefIndex[]"); + if (pCtx->pThreadCtx != NULL) { + pPic->pReadyEvent = (SWelsDecEvent*)pMa->WelsMallocz (uiMbHeight * sizeof (SWelsDecEvent), "pPic->pReadyEvent"); + for (uint32_t i = 0; i < uiMbHeight; ++i) { + CREATE_EVENT (&pPic->pReadyEvent[i], 1, 0, NULL); + } + } else { + pPic->pReadyEvent = NULL; + } + return pPic; } @@ -131,6 +142,16 @@ void FreePicture (PPicture pPic, CMemoryAlign* pMa) { pPic->pBuffer[0] = NULL; } + if (pPic->pMbCorrectlyDecodedFlag) { + pMa->WelsFree (pPic->pMbCorrectlyDecodedFlag, "pPic->pMbCorrectlyDecodedFlag"); + pPic->pMbCorrectlyDecodedFlag = NULL; + } + + if (pPic->pNzc) { + pMa->WelsFree (pPic->pNzc, "pPic->pNzc"); + pPic->pNzc = NULL; + } + if (pPic->pMbType) { pMa->WelsFree (pPic->pMbType, "pPic->pMbType"); pPic->pMbType = NULL; @@ -147,6 +168,14 @@ void FreePicture (PPicture pPic, CMemoryAlign* pMa) { pPic->pRefIndex[listIdx] = NULL; } } + if (pPic->pReadyEvent != NULL) { + uint32_t uiMbHeight = (pPic->iHeightInPixel + 15) >> 4; + for (uint32_t i = 0; i < uiMbHeight; ++i) { + CLOSE_EVENT (&pPic->pReadyEvent[i]); + } + pMa->WelsFree (pPic->pReadyEvent, "pPic->pReadyEvent"); + pPic->pReadyEvent = NULL; + } pMa->WelsFree (pPic, "pPic"); pPic = NULL; } @@ -160,25 +189,55 @@ PPicture PrefetchPic (PPicBuff pPicBuf) { } for (iPicIdx = pPicBuf->iCurrentIdx + 1; iPicIdx < pPicBuf->iCapacity ; ++iPicIdx) { - if (pPicBuf->ppPic[iPicIdx] != NULL && pPicBuf->ppPic[iPicIdx]->bAvailableFlag - && !pPicBuf->ppPic[iPicIdx]->bUsedAsRef) { + if (pPicBuf->ppPic[iPicIdx] != NULL && !pPicBuf->ppPic[iPicIdx]->bUsedAsRef + && pPicBuf->ppPic[iPicIdx]->iRefCount <= 0) { pPic = pPicBuf->ppPic[iPicIdx]; break; } } if (pPic != NULL) { pPicBuf->iCurrentIdx = iPicIdx; + pPic->iPicBuffIdx = iPicIdx; return pPic; } for (iPicIdx = 0 ; iPicIdx <= pPicBuf->iCurrentIdx ; ++iPicIdx) { - if (pPicBuf->ppPic[iPicIdx] != NULL && pPicBuf->ppPic[iPicIdx]->bAvailableFlag - && !pPicBuf->ppPic[iPicIdx]->bUsedAsRef) { + if (pPicBuf->ppPic[iPicIdx] != NULL && !pPicBuf->ppPic[iPicIdx]->bUsedAsRef + && pPicBuf->ppPic[iPicIdx]->iRefCount <= 0) { pPic = pPicBuf->ppPic[iPicIdx]; break; } } pPicBuf->iCurrentIdx = iPicIdx; + if (pPic != NULL) { + pPic->iPicBuffIdx = iPicIdx; + } + return pPic; +} + +PPicture PrefetchPicForThread (PPicBuff pPicBuf) { + PPicture pPic = NULL; + + if (pPicBuf->iCapacity == 0) { + return NULL; + } + pPic = pPicBuf->ppPic[pPicBuf->iCurrentIdx]; + pPic->iPicBuffIdx = pPicBuf->iCurrentIdx; + if (++pPicBuf->iCurrentIdx >= pPicBuf->iCapacity) { + pPicBuf->iCurrentIdx = 0; + } + return pPic; +} + +PPicture PrefetchLastPicForThread (PPicBuff pPicBuf, const int32_t& iLastPicBuffIdx) { + PPicture pPic = NULL; + + if (pPicBuf->iCapacity == 0) { + return NULL; + } + if (iLastPicBuffIdx >= 0 && iLastPicBuffIdx < pPicBuf->iCapacity) { + pPic = pPicBuf->ppPic[iLastPicBuffIdx]; + } return pPic; } diff --git a/chromium/third_party/openh264/src/codec/decoder/core/src/rec_mb.cpp b/chromium/third_party/openh264/src/codec/decoder/core/src/rec_mb.cpp index 157fb4cdb6b..9034cc4d7da 100644 --- a/chromium/third_party/openh264/src/codec/decoder/core/src/rec_mb.cpp +++ b/chromium/third_party/openh264/src/codec/decoder/core/src/rec_mb.cpp @@ -44,20 +44,20 @@ namespace WelsDec { -void WelsFillRecNeededMbInfo (PWelsDecoderContext pCtx, bool bOutput, PDqLayer pCurLayer) { +void WelsFillRecNeededMbInfo (PWelsDecoderContext pCtx, bool bOutput, PDqLayer pCurDqLayer) { PPicture pCurPic = pCtx->pDec; int32_t iLumaStride = pCurPic->iLinesize[0]; int32_t iChromaStride = pCurPic->iLinesize[1]; - int32_t iMbX = pCurLayer->iMbX; - int32_t iMbY = pCurLayer->iMbY; + int32_t iMbX = pCurDqLayer->iMbX; + int32_t iMbY = pCurDqLayer->iMbY; - pCurLayer->iLumaStride = iLumaStride; - pCurLayer->iChromaStride = iChromaStride; + pCurDqLayer->iLumaStride = iLumaStride; + pCurDqLayer->iChromaStride = iChromaStride; if (bOutput) { - pCurLayer->pPred[0] = pCurPic->pData[0] + ((iMbY * iLumaStride + iMbX) << 4); - pCurLayer->pPred[1] = pCurPic->pData[1] + ((iMbY * iChromaStride + iMbX) << 3); - pCurLayer->pPred[2] = pCurPic->pData[2] + ((iMbY * iChromaStride + iMbX) << 3); + pCurDqLayer->pPred[0] = pCurPic->pData[0] + ((iMbY * iLumaStride + iMbX) << 4); + pCurDqLayer->pPred[1] = pCurPic->pData[1] + ((iMbY * iChromaStride + iMbX) << 3); + pCurDqLayer->pPred[2] = pCurPic->pData[2] + ((iMbY * iChromaStride + iMbX) << 3); } } @@ -214,11 +214,10 @@ int32_t RecI16x16Mb (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLe //according to current 8*8 block ref_index to gain reference picture -static inline int32_t GetRefPic (sMCRefMember* pMCRefMem, PWelsDecoderContext pCtx, int8_t* pRefIdxList, - int32_t iIndex, int32_t listIdx) { +static inline int32_t GetRefPic (sMCRefMember* pMCRefMem, PWelsDecoderContext pCtx, const int8_t& iRefIdx, + int32_t listIdx) { PPicture pRefPic; - int8_t iRefIdx = pRefIdxList[iIndex]; if (iRefIdx >= 0) { pRefPic = pCtx->sRefPic.pRefList[listIdx][iRefIdx]; @@ -229,7 +228,9 @@ static inline int32_t GetRefPic (sMCRefMember* pMCRefMem, PWelsDecoderContext pC pMCRefMem->pSrcY = pRefPic->pData[0]; pMCRefMem->pSrcU = pRefPic->pData[1]; pMCRefMem->pSrcV = pRefPic->pData[2]; - + if (!pMCRefMem->pSrcY || !pMCRefMem->pSrcU || !pMCRefMem->pSrcV) { + return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_DATA, ERR_INFO_REFERENCE_PIC_LOST); + } return ERR_NONE; } } @@ -240,7 +241,9 @@ static inline int32_t GetRefPic (sMCRefMember* pMCRefMem, PWelsDecoderContext pC #ifndef MC_FLOW_SIMPLE_JUDGE #define MC_FLOW_SIMPLE_JUDGE 1 #endif //MC_FLOW_SIMPLE_JUDGE -void BaseMC (sMCRefMember* pMCRefMem, int32_t iXOffset, int32_t iYOffset, SMcFunc* pMCFunc, +void BaseMC (PWelsDecoderContext pCtx, sMCRefMember* pMCRefMem, const int32_t& listIdx, const int8_t& iRefIdx, + int32_t iXOffset, int32_t iYOffset, + SMcFunc* pMCFunc, int32_t iBlkWidth, int32_t iBlkHeight, int16_t iMVs[2]) { int32_t iFullMVx = (iXOffset << 2) + iMVs[0]; //quarter pixel int32_t iFullMVy = (iYOffset << 2) + iMVs[1]; @@ -249,6 +252,27 @@ void BaseMC (sMCRefMember* pMCRefMem, int32_t iXOffset, int32_t iYOffset, SMcFun iFullMVy = WELS_CLIP3 (iFullMVy, ((-PADDING_LENGTH + 2) * (1 << 2)), ((pMCRefMem->iPicHeight + PADDING_LENGTH - 19) * (1 << 2))); + if (GetThreadCount (pCtx) > 1 && iRefIdx >= 0) { + // wait for the lines of reference macroblock (3 + 16). + PPicture pRefPic = pCtx->sRefPic.pRefList[listIdx][iRefIdx]; + if (pCtx->bNewSeqBegin && (pCtx->iErrorCode & dsRefLost)) { + //set event if refpic is lost to prevent from infinite waiting. + if (!pRefPic->pReadyEvent[0].isSignaled) { + for (uint32_t ln = 0; ln < pCtx->sMb.iMbHeight; ++ln) { + SET_EVENT (&pRefPic->pReadyEvent[ln]); + } + } + } + int32_t offset = (iFullMVy >> 2) + iBlkHeight + 3 + 16; + if (offset > pCtx->lastReadyHeightOffset[listIdx][iRefIdx]) { + const int32_t down_line = WELS_MIN (offset >> 4, int32_t (pCtx->sMb.iMbHeight) - 1); + if (pRefPic->pReadyEvent[down_line].isSignaled != 1) { + WAIT_EVENT (&pRefPic->pReadyEvent[down_line], WELS_DEC_THREAD_WAIT_INFINITE); + } + pCtx->lastReadyHeightOffset[listIdx][iRefIdx] = offset; + } + } + int32_t iSrcPixOffsetLuma = (iFullMVx >> 2) + (iFullMVy >> 2) * pMCRefMem->iSrcLineLuma; int32_t iSrcPixOffsetChroma = (iFullMVx >> 3) + (iFullMVy >> 3) * pMCRefMem->iSrcLineChroma; @@ -435,7 +459,7 @@ static void BiPrediction (PDqLayer pCurDqLayer, sMCRefMember* pMCRefMem, sMCRefM } } -void GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWelsDecoderContext pCtx) { +int32_t GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWelsDecoderContext pCtx) { sMCRefMember pMCRefMem; PDqLayer pCurDqLayer = pCtx->pCurDqLayer; SMcFunc* pMCFunc = &pCtx->sMcFunc; @@ -444,7 +468,7 @@ void GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWelsDec int16_t iMVs[2] = {0}; - uint32_t iMBType = pCurDqLayer->pMbType[iMBXY]; + uint32_t iMBType = pCurDqLayer->pDec->pMbType[iMBXY]; int32_t iMBOffsetX = pCurDqLayer->iMbX << 4; int32_t iMBOffsetY = pCurDqLayer->iMbY << 4; @@ -464,65 +488,66 @@ void GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWelsDec pMCRefMem.iDstLineLuma = iDstLineLuma; pMCRefMem.iDstLineChroma = iDstLineChroma; - int32_t iRefIndex = 0; + int8_t iRefIndex = 0; switch (iMBType) { case MB_TYPE_SKIP: case MB_TYPE_16x16: - iMVs[0] = pCurDqLayer->pMv[0][iMBXY][0][0]; - iMVs[1] = pCurDqLayer->pMv[0][iMBXY][0][1]; - GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[0][iMBXY], 0, LIST_0); - BaseMC (&pMCRefMem, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 16, iMVs); + iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][0][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][0][1]; + iRefIndex = pCurDqLayer->pDec->pRefIndex[0][iMBXY][0]; + WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, LIST_0)); + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 16, iMVs); if (pCurDqLayer->bUseWeightPredictionFlag) { - iRefIndex = pCurDqLayer->pRefIndex[0][iMBXY][0]; + iRefIndex = pCurDqLayer->pDec->pRefIndex[0][iMBXY][0]; WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 16, 16); } break; case MB_TYPE_16x8: - iMVs[0] = pCurDqLayer->pMv[0][iMBXY][0][0]; - iMVs[1] = pCurDqLayer->pMv[0][iMBXY][0][1]; - GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[0][iMBXY], 0, LIST_0); - BaseMC (&pMCRefMem, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 8, iMVs); + iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][0][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][0][1]; + iRefIndex = pCurDqLayer->pDec->pRefIndex[0][iMBXY][0]; + WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, LIST_0)); + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 8, iMVs); if (pCurDqLayer->bUseWeightPredictionFlag) { - iRefIndex = pCurDqLayer->pRefIndex[0][iMBXY][0]; WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 16, 8); } - iMVs[0] = pCurDqLayer->pMv[0][iMBXY][8][0]; - iMVs[1] = pCurDqLayer->pMv[0][iMBXY][8][1]; - GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[0][iMBXY], 8, LIST_0); + iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][8][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][8][1]; + iRefIndex = pCurDqLayer->pDec->pRefIndex[0][iMBXY][8]; + WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, LIST_0)); pMCRefMem.pDstY = pPredY + (iDstLineLuma << 3); pMCRefMem.pDstU = pPredCb + (iDstLineChroma << 2); pMCRefMem.pDstV = pPredCr + (iDstLineChroma << 2); - BaseMC (&pMCRefMem, iMBOffsetX, iMBOffsetY + 8, pMCFunc, 16, 8, iMVs); + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iMBOffsetX, iMBOffsetY + 8, pMCFunc, 16, 8, iMVs); if (pCurDqLayer->bUseWeightPredictionFlag) { - iRefIndex = pCurDqLayer->pRefIndex[0][iMBXY][8]; WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 16, 8); } break; case MB_TYPE_8x16: - iMVs[0] = pCurDqLayer->pMv[0][iMBXY][0][0]; - iMVs[1] = pCurDqLayer->pMv[0][iMBXY][0][1]; - GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[0][iMBXY], 0, LIST_0); - BaseMC (&pMCRefMem, iMBOffsetX, iMBOffsetY, pMCFunc, 8, 16, iMVs); + iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][0][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][0][1]; + iRefIndex = pCurDqLayer->pDec->pRefIndex[0][iMBXY][0]; + WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, LIST_0)); + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iMBOffsetX, iMBOffsetY, pMCFunc, 8, 16, iMVs); if (pCurDqLayer->bUseWeightPredictionFlag) { - iRefIndex = pCurDqLayer->pRefIndex[0][iMBXY][0]; WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 8, 16); } - iMVs[0] = pCurDqLayer->pMv[0][iMBXY][2][0]; - iMVs[1] = pCurDqLayer->pMv[0][iMBXY][2][1]; - GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[0][iMBXY], 2, LIST_0); + iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][2][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][2][1]; + iRefIndex = pCurDqLayer->pDec->pRefIndex[0][iMBXY][2]; + WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, LIST_0)); pMCRefMem.pDstY = pPredY + 8; pMCRefMem.pDstU = pPredCb + 4; pMCRefMem.pDstV = pPredCr + 4; - BaseMC (&pMCRefMem, iMBOffsetX + 8, iMBOffsetY, pMCFunc, 8, 16, iMVs); + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iMBOffsetX + 8, iMBOffsetY, pMCFunc, 8, 16, iMVs); if (pCurDqLayer->bUseWeightPredictionFlag) { - iRefIndex = pCurDqLayer->pRefIndex[0][iMBXY][2]; WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 8, 16); } break; @@ -539,9 +564,8 @@ void GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWelsDec iYOffset = iMBOffsetY + iBlk8Y; iIIdx = ((i >> 1) << 3) + ((i & 1) << 1); - GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[0][iMBXY], iIIdx, LIST_0); - iRefIndex = pCurDqLayer->bUseWeightPredictionFlag ? pCurDqLayer->pRefIndex[0][iMBXY][iIIdx] : 0; - + iRefIndex = pCurDqLayer->pDec->pRefIndex[0][iMBXY][iIIdx]; + WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, LIST_0)); pDstY = pPredY + iBlk8X + iBlk8Y * iDstLineLuma; pDstU = pPredCb + (iBlk8X >> 1) + (iBlk8Y >> 1) * iDstLineChroma; pDstV = pPredCr + (iBlk8X >> 1) + (iBlk8Y >> 1) * iDstLineChroma; @@ -550,9 +574,9 @@ void GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWelsDec pMCRefMem.pDstV = pDstV; switch (iSubMBType) { case SUB_MB_TYPE_8x8: - iMVs[0] = pCurDqLayer->pMv[0][iMBXY][iIIdx][0]; - iMVs[1] = pCurDqLayer->pMv[0][iMBXY][iIIdx][1]; - BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 8, 8, iMVs); + iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx][1]; + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iXOffset, iYOffset, pMCFunc, 8, 8, iMVs); if (pCurDqLayer->bUseWeightPredictionFlag) { WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 8, 8); @@ -560,21 +584,21 @@ void GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWelsDec break; case SUB_MB_TYPE_8x4: - iMVs[0] = pCurDqLayer->pMv[0][iMBXY][iIIdx][0]; - iMVs[1] = pCurDqLayer->pMv[0][iMBXY][iIIdx][1]; - BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 8, 4, iMVs); + iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx][1]; + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iXOffset, iYOffset, pMCFunc, 8, 4, iMVs); if (pCurDqLayer->bUseWeightPredictionFlag) { WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 8, 4); } - iMVs[0] = pCurDqLayer->pMv[0][iMBXY][iIIdx + 4][0]; - iMVs[1] = pCurDqLayer->pMv[0][iMBXY][iIIdx + 4][1]; + iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx + 4][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx + 4][1]; pMCRefMem.pDstY += (iDstLineLuma << 2); pMCRefMem.pDstU += (iDstLineChroma << 1); pMCRefMem.pDstV += (iDstLineChroma << 1); - BaseMC (&pMCRefMem, iXOffset, iYOffset + 4, pMCFunc, 8, 4, iMVs); + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iXOffset, iYOffset + 4, pMCFunc, 8, 4, iMVs); if (pCurDqLayer->bUseWeightPredictionFlag) { WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 8, 4); @@ -582,21 +606,21 @@ void GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWelsDec break; case SUB_MB_TYPE_4x8: - iMVs[0] = pCurDqLayer->pMv[0][iMBXY][iIIdx][0]; - iMVs[1] = pCurDqLayer->pMv[0][iMBXY][iIIdx][1]; - BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 4, 8, iMVs); + iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx][1]; + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iXOffset, iYOffset, pMCFunc, 4, 8, iMVs); if (pCurDqLayer->bUseWeightPredictionFlag) { WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 4, 8); } - iMVs[0] = pCurDqLayer->pMv[0][iMBXY][iIIdx + 1][0]; - iMVs[1] = pCurDqLayer->pMv[0][iMBXY][iIIdx + 1][1]; + iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx + 1][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx + 1][1]; pMCRefMem.pDstY += 4; pMCRefMem.pDstU += 2; pMCRefMem.pDstV += 2; - BaseMC (&pMCRefMem, iXOffset + 4, iYOffset, pMCFunc, 4, 8, iMVs); + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iXOffset + 4, iYOffset, pMCFunc, 4, 8, iMVs); if (pCurDqLayer->bUseWeightPredictionFlag) { WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 4, 8); @@ -616,9 +640,9 @@ void GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWelsDec pMCRefMem.pDstU = pDstU + iUVLineStride; pMCRefMem.pDstV = pDstV + iUVLineStride; - iMVs[0] = pCurDqLayer->pMv[0][iMBXY][iIIdx + iJIdx][0]; - iMVs[1] = pCurDqLayer->pMv[0][iMBXY][iIIdx + iJIdx][1]; - BaseMC (&pMCRefMem, iXOffset + iBlk4X, iYOffset + iBlk4Y, pMCFunc, 4, 4, iMVs); + iMVs[0] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx + iJIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[0][iMBXY][iIIdx + iJIdx][1]; + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex, iXOffset + iBlk4X, iYOffset + iBlk4Y, pMCFunc, 4, 4, iMVs); if (pCurDqLayer->bUseWeightPredictionFlag) { WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 4, 4); @@ -636,6 +660,7 @@ void GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWelsDec default: break; } + return ERR_NONE; } int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWelsDecoderContext pCtx) { @@ -649,7 +674,7 @@ int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWels int16_t iMVs[2] = { 0 }; - uint32_t iMBType = pCurDqLayer->pMbType[iMBXY]; + uint32_t iMBType = pCurDqLayer->pDec->pMbType[iMBXY]; int32_t iMBOffsetX = pCurDqLayer->iMbX << 4; int32_t iMBOffsetY = pCurDqLayer->iMbY << 4; @@ -674,37 +699,38 @@ int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWels pTempMCRefMem.pDstV = pTempPredYCbCr[2]; - int32_t iRefIndex1 = 0; - int32_t iRefIndex2 = 0; + int8_t iRefIndex0 = 0; + int8_t iRefIndex1 = 0; + int8_t iRefIndex = 0; bool bWeightedBipredIdcIs1 = pCurDqLayer->sLayerInfo.pPps->uiWeightedBipredIdc == 1; if (IS_INTER_16x16 (iMBType)) { if (IS_TYPE_L0 (iMBType) && IS_TYPE_L1 (iMBType)) { - iMVs[0] = pCurDqLayer->pMv[LIST_0][iMBXY][0][0]; - iMVs[1] = pCurDqLayer->pMv[LIST_0][iMBXY][0][1]; - WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[LIST_0][iMBXY], 0, LIST_0)); - BaseMC (&pMCRefMem, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 16, iMVs); - - iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][0][0]; - iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][0][1]; - WELS_B_MB_REC_VERIFY (GetRefPic (&pTempMCRefMem, pCtx, pCurDqLayer->pRefIndex[LIST_1][iMBXY], 0, LIST_1)); - BaseMC (&pTempMCRefMem, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 16, iMVs); - iRefIndex1 = pCurDqLayer->pRefIndex[LIST_0][iMBXY][0]; - iRefIndex2 = pCurDqLayer->pRefIndex[LIST_1][iMBXY][0]; + iMVs[0] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][0][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][0][1]; + iRefIndex0 = pCurDqLayer->pDec->pRefIndex[LIST_0][iMBXY][0]; + WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex0, LIST_0)); + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex0, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 16, iMVs); + + iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][0][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][0][1]; + iRefIndex1 = pCurDqLayer->pDec->pRefIndex[LIST_1][iMBXY][0]; + WELS_B_MB_REC_VERIFY (GetRefPic (&pTempMCRefMem, pCtx, iRefIndex1, LIST_1)); + BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 16, iMVs); if (pCurDqLayer->bUseWeightedBiPredIdc) { - BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 16, 16); + BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 16, 16); } else { BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 16, 16); } } else { int32_t listIdx = (iMBType & MB_TYPE_P0L0) ? LIST_0 : LIST_1; - iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][0][0]; - iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][0][1]; - WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[listIdx][iMBXY], 0, listIdx)); - BaseMC (&pMCRefMem, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 16, iMVs); + iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][0][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][0][1]; + iRefIndex = pCurDqLayer->pDec->pRefIndex[listIdx][iMBXY][0]; + WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, listIdx)); + BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 16, iMVs); if (bWeightedBipredIdcIs1) { - int32_t iRefIndex = pCurDqLayer->pRefIndex[listIdx][iMBXY][0]; WeightPrediction (pCurDqLayer, &pMCRefMem, listIdx, iRefIndex, 16, 16); } } @@ -716,29 +742,31 @@ int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWels for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { if (IS_DIR (iMBType, i, listIdx)) { lastListIdx = listIdx; - iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][iPartIdx][0]; - iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][iPartIdx][1]; - WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[listIdx][iMBXY], iPartIdx, listIdx)); + iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iPartIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iPartIdx][1]; + iRefIndex = pCurDqLayer->pDec->pRefIndex[listIdx][iMBXY][iPartIdx]; + WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, listIdx)); if (i) { pMCRefMem.pDstY += (iDstLineLuma << 3); pMCRefMem.pDstU += (iDstLineChroma << 2); pMCRefMem.pDstV += (iDstLineChroma << 2); } - BaseMC (&pMCRefMem, iMBOffsetX, iMBOffsetY + iPartIdx, pMCFunc, 16, 8, iMVs); + BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iMBOffsetX, iMBOffsetY + iPartIdx, pMCFunc, 16, 8, iMVs); if (++listCount == 2) { - iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][iPartIdx][0]; - iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][iPartIdx][1]; - WELS_B_MB_REC_VERIFY (GetRefPic (&pTempMCRefMem, pCtx, pCurDqLayer->pRefIndex[LIST_1][iMBXY], iPartIdx, LIST_1)); + iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iPartIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iPartIdx][1]; + iRefIndex1 = pCurDqLayer->pDec->pRefIndex[LIST_1][iMBXY][iPartIdx]; + WELS_B_MB_REC_VERIFY (GetRefPic (&pTempMCRefMem, pCtx, iRefIndex1, LIST_1)); if (i) { pTempMCRefMem.pDstY += (iDstLineLuma << 3); pTempMCRefMem.pDstU += (iDstLineChroma << 2); pTempMCRefMem.pDstV += (iDstLineChroma << 2); } - BaseMC (&pTempMCRefMem, iMBOffsetX, iMBOffsetY + iPartIdx, pMCFunc, 16, 8, iMVs); + BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iMBOffsetX, iMBOffsetY + iPartIdx, pMCFunc, 16, 8, iMVs); if (pCurDqLayer->bUseWeightedBiPredIdc) { - iRefIndex1 = pCurDqLayer->pRefIndex[LIST_0][iMBXY][iPartIdx]; - iRefIndex2 = pCurDqLayer->pRefIndex[LIST_1][iMBXY][iPartIdx]; - BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 16, 8); + iRefIndex0 = pCurDqLayer->pDec->pRefIndex[LIST_0][iMBXY][iPartIdx]; + iRefIndex1 = pCurDqLayer->pDec->pRefIndex[LIST_1][iMBXY][iPartIdx]; + BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 16, 8); } else { BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 16, 8); } @@ -747,7 +775,7 @@ int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWels } if (listCount == 1) { if (bWeightedBipredIdcIs1) { - int32_t iRefIndex = pCurDqLayer->pRefIndex[lastListIdx][iMBXY][iPartIdx]; + iRefIndex = pCurDqLayer->pDec->pRefIndex[lastListIdx][iMBXY][iPartIdx]; WeightPrediction (pCurDqLayer, &pMCRefMem, lastListIdx, iRefIndex, 16, 8); } } @@ -759,29 +787,31 @@ int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWels for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) { if (IS_DIR (iMBType, i, listIdx)) { lastListIdx = listIdx; - iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][i << 1][0]; - iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][i << 1][1]; - WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[listIdx][iMBXY], i << 1, listIdx)); + iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][i << 1][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][i << 1][1]; + iRefIndex = pCurDqLayer->pDec->pRefIndex[listIdx][iMBXY][i << 1]; + WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, listIdx)); if (i) { pMCRefMem.pDstY += 8; pMCRefMem.pDstU += 4; pMCRefMem.pDstV += 4; } - BaseMC (&pMCRefMem, iMBOffsetX + (i ? 8 : 0), iMBOffsetY, pMCFunc, 8, 16, iMVs); + BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iMBOffsetX + (i ? 8 : 0), iMBOffsetY, pMCFunc, 8, 16, iMVs); if (++listCount == 2) { - iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][i << 1][0]; - iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][i << 1][1]; - WELS_B_MB_REC_VERIFY (GetRefPic (&pTempMCRefMem, pCtx, pCurDqLayer->pRefIndex[LIST_1][iMBXY], i << 1, LIST_1)); + iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][i << 1][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][i << 1][1]; + iRefIndex1 = pCurDqLayer->pDec->pRefIndex[LIST_1][iMBXY][i << 1]; + WELS_B_MB_REC_VERIFY (GetRefPic (&pTempMCRefMem, pCtx, iRefIndex1, LIST_1)); if (i) { pTempMCRefMem.pDstY += 8; pTempMCRefMem.pDstU += 4; pTempMCRefMem.pDstV += 4; } - BaseMC (&pTempMCRefMem, iMBOffsetX + (i ? 8 : 0), iMBOffsetY, pMCFunc, 8, 16, iMVs); + BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iMBOffsetX + (i ? 8 : 0), iMBOffsetY, pMCFunc, 8, 16, iMVs); if (pCurDqLayer->bUseWeightedBiPredIdc) { - iRefIndex1 = pCurDqLayer->pRefIndex[LIST_0][iMBXY][i << 1]; - iRefIndex2 = pCurDqLayer->pRefIndex[LIST_1][iMBXY][i << 1]; - BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 8, 16); + iRefIndex0 = pCurDqLayer->pDec->pRefIndex[LIST_0][iMBXY][i << 1]; + iRefIndex1 = pCurDqLayer->pDec->pRefIndex[LIST_1][iMBXY][i << 1]; + BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 8, 16); } else { BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 8, 16); } @@ -790,7 +820,7 @@ int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWels } if (listCount == 1) { if (bWeightedBipredIdcIs1) { - int32_t iRefIndex = pCurDqLayer->pRefIndex[lastListIdx][iMBXY][i << 1]; + iRefIndex = pCurDqLayer->pDec->pRefIndex[lastListIdx][iMBXY][i << 1]; WeightPrediction (pCurDqLayer, &pMCRefMem, lastListIdx, iRefIndex, 8, 16); } } @@ -827,53 +857,53 @@ int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWels pTempMCRefMem.pDstV = pDstV2; if ((IS_TYPE_L0 (iSubMBType) && IS_TYPE_L1 (iSubMBType))) { - iRefIndex1 = pCurDqLayer->pRefIndex[LIST_0][iMBXY][iIIdx]; - WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[LIST_0][iMBXY], iIIdx, LIST_0)); + iRefIndex0 = pCurDqLayer->pDec->pRefIndex[LIST_0][iMBXY][iIIdx]; + WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex0, LIST_0)); - iRefIndex2 = pCurDqLayer->pRefIndex[LIST_1][iMBXY][iIIdx]; - WELS_B_MB_REC_VERIFY (GetRefPic (&pTempMCRefMem, pCtx, pCurDqLayer->pRefIndex[LIST_1][iMBXY], iIIdx, LIST_1)); + iRefIndex1 = pCurDqLayer->pDec->pRefIndex[LIST_1][iMBXY][iIIdx]; + WELS_B_MB_REC_VERIFY (GetRefPic (&pTempMCRefMem, pCtx, iRefIndex1, LIST_1)); } else { int32_t listIdx = IS_TYPE_L0 (iSubMBType) ? LIST_0 : LIST_1; - iRefIndex1 = pCurDqLayer->pRefIndex[listIdx][iMBXY][iIIdx]; - WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[listIdx][iMBXY], iIIdx, listIdx)); + iRefIndex = pCurDqLayer->pDec->pRefIndex[listIdx][iMBXY][iIIdx]; + WELS_B_MB_REC_VERIFY (GetRefPic (&pMCRefMem, pCtx, iRefIndex, listIdx)); } if (IS_SUB_8x8 (iSubMBType)) { if (IS_TYPE_L0 (iSubMBType) && IS_TYPE_L1 (iSubMBType)) { - iMVs[0] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx][0]; - iMVs[1] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx][1]; - BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 8, 8, iMVs); + iMVs[0] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx][1]; + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex0, iXOffset, iYOffset, pMCFunc, 8, 8, iMVs); - iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx][0]; - iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx][1]; - BaseMC (&pTempMCRefMem, iXOffset, iYOffset, pMCFunc, 8, 8, iMVs); + iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx][1]; + BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iXOffset, iYOffset, pMCFunc, 8, 8, iMVs); if (pCurDqLayer->bUseWeightedBiPredIdc) { - BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 8, 8); + BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 8, 8); } else { BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 8, 8); } } else { int32_t listIdx = IS_TYPE_L0 (iSubMBType) ? LIST_0 : LIST_1; - iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx][0]; - iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx][1]; - BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 8, 8, iMVs); + iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx][1]; + iRefIndex = pCurDqLayer->pDec->pRefIndex[listIdx][iMBXY][iIIdx]; + BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iXOffset, iYOffset, pMCFunc, 8, 8, iMVs); if (bWeightedBipredIdcIs1) { - int32_t iRefIndex = pCurDqLayer->pRefIndex[listIdx][iMBXY][iIIdx]; WeightPrediction (pCurDqLayer, &pMCRefMem, listIdx, iRefIndex, 8, 8); } } } else if (IS_SUB_8x4 (iSubMBType)) { if (IS_TYPE_L0 (iSubMBType) && IS_TYPE_L1 (iSubMBType)) { //B_Bi_8x4 - iMVs[0] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx][0]; - iMVs[1] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx][1]; - BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 8, 4, iMVs); - iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx][0]; - iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx][1]; - BaseMC (&pTempMCRefMem, iXOffset, iYOffset, pMCFunc, 8, 4, iMVs); + iMVs[0] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx][1]; + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex0, iXOffset, iYOffset, pMCFunc, 8, 4, iMVs); + iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx][1]; + BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iXOffset, iYOffset, pMCFunc, 8, 4, iMVs); if (pCurDqLayer->bUseWeightedBiPredIdc) { - BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 8, 4); + BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 8, 4); } else { BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 8, 4); } @@ -881,49 +911,49 @@ int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWels pMCRefMem.pDstY += (iDstLineLuma << 2); pMCRefMem.pDstU += (iDstLineChroma << 1); pMCRefMem.pDstV += (iDstLineChroma << 1); - iMVs[0] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx + 4][0]; - iMVs[1] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx + 4][1]; - BaseMC (&pMCRefMem, iXOffset, iYOffset + 4, pMCFunc, 8, 4, iMVs); + iMVs[0] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx + 4][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx + 4][1]; + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex0, iXOffset, iYOffset + 4, pMCFunc, 8, 4, iMVs); pTempMCRefMem.pDstY += (iDstLineLuma << 2); pTempMCRefMem.pDstU += (iDstLineChroma << 1); pTempMCRefMem.pDstV += (iDstLineChroma << 1); - iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx + 4][0]; - iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx + 4][1]; - BaseMC (&pTempMCRefMem, iXOffset, iYOffset + 4, pMCFunc, 8, 4, iMVs); + iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx + 4][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx + 4][1]; + BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iXOffset, iYOffset + 4, pMCFunc, 8, 4, iMVs); if (pCurDqLayer->bUseWeightedBiPredIdc) { - BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 8, 4); + BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 8, 4); } else { BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 8, 4); } } else { //B_L0_8x4 B_L1_8x4 int32_t listIdx = IS_TYPE_L0 (iSubMBType) ? LIST_0 : LIST_1; - iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx][0]; - iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx][1]; - BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 8, 4, iMVs); + iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx][1]; + iRefIndex = pCurDqLayer->pDec->pRefIndex[listIdx][iMBXY][iIIdx]; + BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iXOffset, iYOffset, pMCFunc, 8, 4, iMVs); pMCRefMem.pDstY += (iDstLineLuma << 2); pMCRefMem.pDstU += (iDstLineChroma << 1); pMCRefMem.pDstV += (iDstLineChroma << 1); - iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx + 4][0]; - iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx + 4][1]; - BaseMC (&pMCRefMem, iXOffset, iYOffset + 4, pMCFunc, 8, 4, iMVs); + iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx + 4][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx + 4][1]; + BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iXOffset, iYOffset + 4, pMCFunc, 8, 4, iMVs); if (bWeightedBipredIdcIs1) { - int32_t iRefIndex = pCurDqLayer->pRefIndex[listIdx][iMBXY][iIIdx]; WeightPrediction (pCurDqLayer, &pMCRefMem, listIdx, iRefIndex, 8, 4); } } } else if (IS_SUB_4x8 (iSubMBType)) { if (IS_TYPE_L0 (iSubMBType) && IS_TYPE_L1 (iSubMBType)) { //B_Bi_4x8 - iMVs[0] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx][0]; - iMVs[1] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx][1]; - BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 4, 8, iMVs); - iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx][0]; - iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx][1]; - BaseMC (&pTempMCRefMem, iXOffset, iYOffset, pMCFunc, 4, 8, iMVs); + iMVs[0] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx][1]; + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex0, iXOffset, iYOffset, pMCFunc, 4, 8, iMVs); + iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx][1]; + BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iXOffset, iYOffset, pMCFunc, 4, 8, iMVs); if (pCurDqLayer->bUseWeightedBiPredIdc) { - BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 4, 8); + BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 4, 8); } else { BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 4, 8); } @@ -931,35 +961,35 @@ int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWels pMCRefMem.pDstY += 4; pMCRefMem.pDstU += 2; pMCRefMem.pDstV += 2; - iMVs[0] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx + 1][0]; - iMVs[1] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx + 1][1]; - BaseMC (&pMCRefMem, iXOffset + 4, iYOffset, pMCFunc, 4, 8, iMVs); + iMVs[0] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx + 1][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx + 1][1]; + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex0, iXOffset + 4, iYOffset, pMCFunc, 4, 8, iMVs); pTempMCRefMem.pDstY += 4; pTempMCRefMem.pDstU += 2; pTempMCRefMem.pDstV += 2; - iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx + 1][0]; - iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx + 1][1]; - BaseMC (&pTempMCRefMem, iXOffset + 4, iYOffset, pMCFunc, 4, 8, iMVs); + iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx + 1][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx + 1][1]; + BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iXOffset + 4, iYOffset, pMCFunc, 4, 8, iMVs); if (pCurDqLayer->bUseWeightedBiPredIdc) { - BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 4, 8); + BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 4, 8); } else { BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 4, 8); } } else { //B_L0_4x8 B_L1_4x8 int32_t listIdx = IS_TYPE_L0 (iSubMBType) ? LIST_0 : LIST_1; - iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx][0]; - iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx][1]; - BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 4, 8, iMVs); + iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx][1]; + iRefIndex = pCurDqLayer->pDec->pRefIndex[listIdx][iMBXY][iIIdx]; + BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iXOffset, iYOffset, pMCFunc, 4, 8, iMVs); pMCRefMem.pDstY += 4; pMCRefMem.pDstU += 2; pMCRefMem.pDstV += 2; - iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx + 1][0]; - iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx + 1][1]; - BaseMC (&pMCRefMem, iXOffset + 4, iYOffset, pMCFunc, 4, 8, iMVs); + iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx + 1][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx + 1][1]; + BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iXOffset + 4, iYOffset, pMCFunc, 4, 8, iMVs); if (bWeightedBipredIdcIs1) { - int32_t iRefIndex = pCurDqLayer->pRefIndex[listIdx][iMBXY][iIIdx]; WeightPrediction (pCurDqLayer, &pMCRefMem, listIdx, iRefIndex, 4, 8); } } @@ -977,27 +1007,27 @@ int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWels pMCRefMem.pDstU = pDstU + iUVLineStride; pMCRefMem.pDstV = pDstV + iUVLineStride; - iMVs[0] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx + iJIdx][0]; - iMVs[1] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx + iJIdx][1]; - BaseMC (&pMCRefMem, iXOffset + iBlk4X, iYOffset + iBlk4Y, pMCFunc, 4, 4, iMVs); + iMVs[0] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx + iJIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[LIST_0][iMBXY][iIIdx + iJIdx][1]; + BaseMC (pCtx, &pMCRefMem, LIST_0, iRefIndex0, iXOffset + iBlk4X, iYOffset + iBlk4Y, pMCFunc, 4, 4, iMVs); pTempMCRefMem.pDstY = pDstY2 + iBlk8X + iBlk8Y * iDstLineLuma; pTempMCRefMem.pDstU = pDstU2 + iUVLineStride; pTempMCRefMem.pDstV = pDstV2 + iUVLineStride;; - iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx + iJIdx][0]; - iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx + iJIdx][1]; - BaseMC (&pTempMCRefMem, iXOffset + iBlk4X, iYOffset + iBlk4Y, pMCFunc, 4, 4, iMVs); + iMVs[0] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx + iJIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[LIST_1][iMBXY][iIIdx + iJIdx][1]; + BaseMC (pCtx, &pTempMCRefMem, LIST_1, iRefIndex1, iXOffset + iBlk4X, iYOffset + iBlk4Y, pMCFunc, 4, 4, iMVs); if (pCurDqLayer->bUseWeightedBiPredIdc) { - BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 4, 4); + BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex0, iRefIndex1, bWeightedBipredIdcIs1, 4, 4); } else { BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 4, 4); } } } else { int32_t listIdx = IS_TYPE_L0 (iSubMBType) ? LIST_0 : LIST_1; - int32_t iRefIndex = pCurDqLayer->pRefIndex[listIdx][iMBXY][iIIdx]; + iRefIndex = pCurDqLayer->pDec->pRefIndex[listIdx][iMBXY][iIIdx]; for (int32_t j = 0; j < 4; j++) { int32_t iUVLineStride; iJIdx = ((j >> 1) << 2) + (j & 1); @@ -1010,9 +1040,9 @@ int32_t GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWels pMCRefMem.pDstU = pDstU + iUVLineStride; pMCRefMem.pDstV = pDstV + iUVLineStride; - iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx + iJIdx][0]; - iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx + iJIdx][1]; - BaseMC (&pMCRefMem, iXOffset + iBlk4X, iYOffset + iBlk4Y, pMCFunc, 4, 4, iMVs); + iMVs[0] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx + iJIdx][0]; + iMVs[1] = pCurDqLayer->pDec->pMv[listIdx][iMBXY][iIIdx + iJIdx][1]; + BaseMC (pCtx, &pMCRefMem, listIdx, iRefIndex, iXOffset + iBlk4X, iYOffset + iBlk4Y, pMCFunc, 4, 4, iMVs); if (bWeightedBipredIdcIs1) { WeightPrediction (pCurDqLayer, &pMCRefMem, listIdx, iRefIndex, 4, 4); } diff --git a/chromium/third_party/openh264/src/codec/decoder/core/src/wels_decoder_thread.cpp b/chromium/third_party/openh264/src/codec/decoder/core/src/wels_decoder_thread.cpp new file mode 100644 index 00000000000..d05aa4515b3 --- /dev/null +++ b/chromium/third_party/openh264/src/codec/decoder/core/src/wels_decoder_thread.cpp @@ -0,0 +1,311 @@ +/*! + * \copy + * Copyright (c) 2009-2019, Cisco Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * + * \file wels_decoder_thread.cpp + * + * \brief Interfaces introduced in thread programming + * + * \date 08/06/2018 Created + * + ************************************************************************************* + */ + + +#ifdef __linux__ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include <sched.h> +#elif !defined(_WIN32) && !defined(__CYGWIN__) +#include <sys/types.h> +#include <sys/param.h> +#include <unistd.h> +#ifndef __Fuchsia__ +#include <sys/sysctl.h> +#endif +#ifdef __APPLE__ +#define HW_NCPU_NAME "hw.logicalcpu" +#else +#define HW_NCPU_NAME "hw.ncpu" +#endif +#endif + +#include "wels_decoder_thread.h" +#include <stdio.h> +#include <stdlib.h> + +int32_t GetCPUCount() { + WelsLogicalProcessInfo pInfo; + pInfo.ProcessorCount = 1; + WelsQueryLogicalProcessInfo (&pInfo); + return pInfo.ProcessorCount; +} + +int ThreadCreate (SWelsDecThread* t, LPWELS_THREAD_ROUTINE tf, void* ta) { + WELS_THREAD_ATTR attr = 0; + return WelsThreadCreate (& (t->h), tf, ta, attr); +} + +int ThreadWait (SWelsDecThread* t) { + return WelsThreadJoin (t->h); +} + +#if defined(_WIN32) || defined(__CYGWIN__) + +int EventCreate (SWelsDecEvent* e, int manualReset, int initialState) { + e->h = CreateEvent (NULL, manualReset, initialState, NULL); + e->isSignaled = initialState; + return (e->h != NULL) ? 0 : 1; +} + +void EventReset (SWelsDecEvent* e) { + ResetEvent (e->h); + e->isSignaled = 0; +} + +void EventPost (SWelsDecEvent* e) { + SetEvent (e->h); + e->isSignaled = 1; +} + +int EventWait (SWelsDecEvent* e, int32_t timeout) { + DWORD result; + if ((uint32_t)timeout == WELS_DEC_THREAD_WAIT_INFINITE || timeout < 0) + result = WaitForSingleObject (e->h, INFINITE); + else + result = WaitForSingleObject (e->h, timeout); + + if (result == WAIT_OBJECT_0) + return WELS_DEC_THREAD_WAIT_SIGNALED; + else + return WAIT_TIMEOUT; +} + +void EventDestroy (SWelsDecEvent* e) { + CloseHandle (e->h); + e->h = NULL; +} + +int SemCreate (SWelsDecSemphore* s, long value, long max) { + s->h = CreateSemaphore (NULL, value, max, NULL); + return (s->h != NULL) ? 0 : 1; +} + +int SemWait (SWelsDecSemphore* s, int32_t timeout) { + DWORD result; + if ((uint32_t)timeout == WELS_DEC_THREAD_WAIT_INFINITE || timeout < 0) + result = WaitForSingleObject (s->h, INFINITE); + else + result = WaitForSingleObject (s->h, timeout); + + if (result == WAIT_OBJECT_0) { + return WELS_DEC_THREAD_WAIT_SIGNALED; + } else { + return WELS_DEC_THREAD_WAIT_TIMEDOUT; + } +} + +void SemRelease (SWelsDecSemphore* s, long* prevcount) { + ReleaseSemaphore (s->h, 1, prevcount); +} + +void SemDestroy (SWelsDecSemphore* s) { + CloseHandle (s->h); + s->h = NULL; +} + +#else /* _WIN32 */ + +static void getTimespecFromTimeout (struct timespec* ts, int32_t timeout) { + struct timeval tv; + gettimeofday (&tv, 0); + ts->tv_nsec = tv.tv_usec * 1000 + timeout * 1000000; + ts->tv_sec = tv.tv_sec + ts->tv_nsec / 1000000000; + ts->tv_nsec %= 1000000000; +} +int EventCreate (SWelsDecEvent* e, int manualReset, int initialState) { + if (pthread_mutex_init (& (e->m), NULL)) + return 1; + if (pthread_cond_init (& (e->c), NULL)) + return 2; + + e->isSignaled = initialState; + e->manualReset = manualReset; + + return 0; +} + +void EventReset (SWelsDecEvent* e) { + pthread_mutex_lock (& (e->m)); + e->isSignaled = 0; + pthread_mutex_unlock (& (e->m)); +} + +void EventPost (SWelsDecEvent* e) { + pthread_mutex_lock (& (e->m)); + pthread_cond_broadcast (& (e->c)); + e->isSignaled = 1; + pthread_mutex_unlock (& (e->m)); +} + +int EventWait (SWelsDecEvent* e, int32_t timeout) { + pthread_mutex_lock (& (e->m)); + int signaled = e->isSignaled; + if (timeout == 0) { + pthread_mutex_unlock (& (e->m)); + if (signaled) + return WELS_DEC_THREAD_WAIT_SIGNALED; + else + return WELS_DEC_THREAD_WAIT_TIMEDOUT; + } + if (signaled) { + if (!e->manualReset) { + e->isSignaled = 0; + } + pthread_mutex_unlock (& (e->m)); + return WELS_DEC_THREAD_WAIT_SIGNALED; + } + int rc = 0; + if (timeout == WELS_DEC_THREAD_WAIT_INFINITE || timeout < 0) { + rc = pthread_cond_wait (& (e->c), & (e->m)); + } else { + struct timespec ts; + getTimespecFromTimeout (&ts, timeout); + rc = pthread_cond_timedwait (& (e->c), & (e->m), &ts); + } + if (!e->manualReset) { + e->isSignaled = 0; + } + pthread_mutex_unlock (& (e->m)); + if (rc == 0) + return WELS_DEC_THREAD_WAIT_SIGNALED; + else + return WELS_DEC_THREAD_WAIT_TIMEDOUT; +} + +void EventDestroy (SWelsDecEvent* e) { + pthread_mutex_destroy (& (e->m)); + pthread_cond_destroy (& (e->c)); +} + +int SemCreate (SWelsDecSemphore* s, long value, long max) { + s->v = value; + s->max = max; + if (pthread_mutex_init (& (s->m), NULL)) + return 1; + const char* event_name = ""; + if (WelsEventOpen (& (s->e), event_name)) { + return 2; + } + return 0; +} + +int SemWait (SWelsDecSemphore* s, int32_t timeout) { +#if defined(__APPLE__) + pthread_mutex_lock (& (s->m)); +#endif + int rc = 0; + if (timeout != 0) { + while ((s->v) == 0) { + if (timeout == WELS_DEC_THREAD_WAIT_INFINITE || timeout < 0) { + // infinite wait until released +#if defined(__APPLE__) + rc = pthread_cond_wait (& (s->e), & (s->m)); +#else + rc = sem_wait (s->e); + if (rc != 0) rc = errno; +#endif + } else { + struct timespec ts; + getTimespecFromTimeout (&ts, timeout); +#if defined(__APPLE__) + rc = pthread_cond_timedwait (& (s->e), & (s->m), &ts); +#else + rc = sem_timedwait (s->e, &ts); + if (rc != 0) rc = errno; +#endif + if (rc != EINTR) { + // if timed out we return to the caller + break; + } + } + } + // only decrement counter if semaphore was signaled + if (rc == 0) + s->v -= 1; + + } else { + // Special handling for timeout of 0 + if (s->v > 0) { + s->v -= 1; + rc = 0; + } else { + rc = 1; + } + } +#if defined(__APPLE__) + pthread_mutex_unlock (& (s->m)); +#endif + // set return value + if (rc == 0) + return WELS_DEC_THREAD_WAIT_SIGNALED; + else + return WELS_DEC_THREAD_WAIT_TIMEDOUT; +} + +void SemRelease (SWelsDecSemphore* s, long* o_pPrevCount) { + long prevcount; +#ifdef __APPLE__ + pthread_mutex_lock (& (s->m)); + prevcount = s->v; + if (s->v < s->max) + s->v += 1; + pthread_cond_signal (& (s->e)); + pthread_mutex_unlock (& (s->m)); +#else + prevcount = s->v; + if (s->v < s->max) + s->v += 1; + sem_post (s->e); +#endif + if (o_pPrevCount != NULL) { + *o_pPrevCount = prevcount; + } +} + +void SemDestroy (SWelsDecSemphore* s) { + pthread_mutex_destroy (& (s->m)); + const char* event_name = ""; + WelsEventClose (& (s->e), event_name); +} + +#endif /* !_WIN32 */ + diff --git a/chromium/third_party/openh264/src/codec/decoder/meson.build b/chromium/third_party/openh264/src/codec/decoder/meson.build index f93837eaf1a..1131022ffe4 100644 --- a/chromium/third_party/openh264/src/codec/decoder/meson.build +++ b/chromium/third_party/openh264/src/codec/decoder/meson.build @@ -19,15 +19,30 @@ cpp_sources = [ 'core/src/pic_queue.cpp', 'core/src/rec_mb.cpp', 'plus/src/welsDecoderExt.cpp', + 'core/src/wels_decoder_thread.cpp', ] -asm_sources = [ - 'core/x86/dct.asm', - 'core/x86/intra_pred.asm', -] - -objs_asm = asm_gen.process(asm_sources) +objs_asm = [] +if ['x86', 'x86_64'].contains(cpu_family) + asm_sources = [ + 'core/x86/dct.asm', + 'core/x86/intra_pred.asm', + ] + objs_asm = asm_gen.process(asm_sources) +elif cpu_family == 'arm' + cpp_sources += [ + 'core/arm/block_add_neon.S', + 'core/arm/intra_pred_neon.S', + ] +elif cpu_family == 'aarch64' + cpp_sources += [ + 'core/arm64/block_add_aarch64_neon.S', + 'core/arm64/intra_pred_aarch64_neon.S', + ] +else + error('Unsupported cpu family @0@'.format(cpu_family)) +endif libdecoder = static_library('decoder', cpp_sources, objs_asm, - include_directories: [inc, decoder_inc], + include_directories: [inc, decoder_inc, casm_inc], dependencies: deps) diff --git a/chromium/third_party/openh264/src/codec/decoder/plus/inc/welsDecoderExt.h b/chromium/third_party/openh264/src/codec/decoder/plus/inc/welsDecoderExt.h index 6ed73acc119..cfacbc83f59 100644 --- a/chromium/third_party/openh264/src/codec/decoder/plus/inc/welsDecoderExt.h +++ b/chromium/third_party/openh264/src/codec/decoder/plus/inc/welsDecoderExt.h @@ -109,32 +109,53 @@ class CWelsDecoder : public ISVCDecoder { virtual long EXTAPI SetOption (DECODER_OPTION eOptID, void* pOption); virtual long EXTAPI GetOption (DECODER_OPTION eOptID, void* pOption); - typedef struct tagPictInfo { - SBufferInfo sBufferInfo; - int32_t iPOC; - int32_t iFrameNum; - bool bLastGOP; - unsigned char* pData[3]; - } SPictInfo, *PPictInfo; + public: + DECODING_STATE DecodeFrame2WithCtx (PWelsDecoderContext pCtx, const unsigned char* kpSrc, const int kiSrcLen, + unsigned char** ppDst, SBufferInfo* pDstInfo); + DECODING_STATE ParseAccessUnit (SWelsDecoderThreadCTX& sThreadCtx); private: - PWelsDecoderContext m_pDecContext; welsCodecTrace* m_pWelsTrace; + uint32_t m_uiDecodeTimeStamp; + bool m_bIsBaseline; + int32_t m_iCpuCount; + int32_t m_iThreadCount; + int32_t m_iCtxCount; + PPicBuff m_pPicBuff; + bool m_bParamSetsLostFlag; + bool m_bFreezeOutput; + int32_t m_DecCtxActiveCount; + PWelsDecoderThreadCTX m_pDecThrCtx; + PWelsDecoderThreadCTX m_pLastDecThrCtx; + int32_t m_iLastBufferedIdx; + WELS_MUTEX m_csDecoder; + SWelsDecEvent m_sBufferingEvent; + SWelsDecEvent m_sReleaseBufferEvent; + SWelsDecSemphore m_sIsBusy; SPictInfo m_sPictInfoList[16]; - int32_t m_iPictInfoIndex; - int32_t m_iMinPOC; - int32_t m_iNumOfPicts; - int32_t m_iLastGOPRemainPicts; - int32_t m_LastWrittenPOC; - int32_t m_iLargestBufferedPicIndex; + SPictReoderingStatus m_sReoderingStatus; + PWelsDecoderThreadCTX m_pDecThrCtxActive[WELS_DEC_MAX_NUM_CPU]; + SVlcTable m_sVlcTable; + SWelsLastDecPicInfo m_sLastDecPicInfo; + SDecoderStatistics m_sDecoderStatistics;// For real time debugging + private: int32_t InitDecoder (const SDecodingParam* pParam); void UninitDecoder (void); - int32_t ResetDecoder(); + int32_t InitDecoderCtx (PWelsDecoderContext& pCtx, const SDecodingParam* pParam); + void UninitDecoderCtx (PWelsDecoderContext& pCtx); + int32_t ResetDecoder (PWelsDecoderContext& pCtx); + int32_t ThreadResetDecoder (PWelsDecoderContext& pCtx); void OutputStatisticsLog (SDecoderStatistics& sDecoderStatistics); - DECODING_STATE ReorderPicturesInDisplay (unsigned char** ppDst, SBufferInfo* pDstInfo); - + DECODING_STATE ReorderPicturesInDisplay (PWelsDecoderContext pCtx, unsigned char** ppDst, SBufferInfo* pDstInfo); + int ThreadDecodeFrameInternal (const unsigned char* kpSrc, const int kiSrcLen, unsigned char** ppDst, + SBufferInfo* pDstInfo); + void BufferingReadyPicture (PWelsDecoderContext pCtx, unsigned char** ppDst, SBufferInfo* pDstInfo); + void ReleaseBufferedReadyPicture (PWelsDecoderContext pCtx, unsigned char** ppDst, SBufferInfo* pDstInfo); + + void OpenDecoderThreads(); + void CloseDecoderThreads(); #ifdef OUTPUT_BIT_STREAM WelsFileHandle* m_pFBS; WelsFileHandle* m_pFBSSize; diff --git a/chromium/third_party/openh264/src/codec/decoder/plus/src/welsDecoderExt.cpp b/chromium/third_party/openh264/src/codec/decoder/plus/src/welsDecoderExt.cpp index 0dbe117a030..85a10600886 100644 --- a/chromium/third_party/openh264/src/codec/decoder/plus/src/welsDecoderExt.cpp +++ b/chromium/third_party/openh264/src/codec/decoder/plus/src/welsDecoderExt.cpp @@ -51,6 +51,7 @@ //#include "macros.h" #include "decoder.h" #include "decoder_core.h" +#include "manage_dec_ref.h" #include "error_concealment.h" #include "measure_time.h" @@ -67,14 +68,11 @@ extern "C" { #include <stdio.h> #include <stdarg.h> #include <sys/types.h> +#include <malloc.h> #else #include <sys/time.h> #endif -#define _PICTURE_REORDERING_ 1 - -static int32_t sIMinInt32 = -0x7FFFFFFF; - namespace WelsDec { ////////////////////////////////////////////////////////////////////// @@ -90,15 +88,62 @@ namespace WelsDec { * * return: none ***************************************************************************/ +DECLARE_PROCTHREAD (pThrProcInit, p) { + SWelsDecThreadInfo* sThreadInfo = (SWelsDecThreadInfo*)p; +#if defined(WIN32) + _alloca (WELS_DEC_MAX_THREAD_STACK_SIZE * (sThreadInfo->uiThrNum + 1)); +#endif + return sThreadInfo->pThrProcMain (p); +} + +static DECODING_STATE ConstructAccessUnit (CWelsDecoder* pWelsDecoder, PWelsDecoderThreadCTX pThrCtx) { + int iRet = dsErrorFree; + //WelsMutexLock (&pWelsDecoder->m_csDecoder); + if (pThrCtx->pCtx->pLastThreadCtx != NULL) { + PWelsDecoderThreadCTX pLastThreadCtx = (PWelsDecoderThreadCTX) (pThrCtx->pCtx->pLastThreadCtx); + WAIT_EVENT (&pLastThreadCtx->sSliceDecodeStart, WELS_DEC_THREAD_WAIT_INFINITE); + RESET_EVENT (&pLastThreadCtx->sSliceDecodeStart); + } + pThrCtx->pDec = NULL; + if (GetThreadCount (pThrCtx->pCtx) > 1) { + RESET_EVENT (&pThrCtx->sSliceDecodeFinish); + } + iRet |= pWelsDecoder->DecodeFrame2WithCtx (pThrCtx->pCtx, NULL, 0, pThrCtx->ppDst, &pThrCtx->sDstInfo); + + //WelsMutexUnlock (&pWelsDecoder->m_csDecoder); + return (DECODING_STATE)iRet; +} + +DECLARE_PROCTHREAD (pThrProcFrame, p) { + SWelsDecoderThreadCTX* pThrCtx = (SWelsDecoderThreadCTX*)p; + while (1) { + RELEASE_SEMAPHORE (pThrCtx->sThreadInfo.sIsBusy); + RELEASE_SEMAPHORE (&pThrCtx->sThreadInfo.sIsIdle); + WAIT_SEMAPHORE (&pThrCtx->sThreadInfo.sIsActivated, WELS_DEC_THREAD_WAIT_INFINITE); + if (pThrCtx->sThreadInfo.uiCommand == WELS_DEC_THREAD_COMMAND_RUN) { + CWelsDecoder* pWelsDecoder = (CWelsDecoder*)pThrCtx->threadCtxOwner; + ConstructAccessUnit (pWelsDecoder, pThrCtx); + } else if (pThrCtx->sThreadInfo.uiCommand == WELS_DEC_THREAD_COMMAND_ABORT) { + break; + } + } + return 0; +} + CWelsDecoder::CWelsDecoder (void) - : m_pDecContext (NULL), - m_pWelsTrace (NULL), - m_iPictInfoIndex (0), - m_iMinPOC (sIMinInt32), - m_iNumOfPicts (0), - m_iLastGOPRemainPicts (0), - m_LastWrittenPOC (sIMinInt32), - m_iLargestBufferedPicIndex (0) { + : m_pWelsTrace (NULL), + m_uiDecodeTimeStamp (0), + m_bIsBaseline (false), + m_iCpuCount (1), + m_iThreadCount (0), + m_iCtxCount (1), + m_pPicBuff (NULL), + m_bParamSetsLostFlag (false), + m_bFreezeOutput (false), + m_DecCtxActiveCount (0), + m_pDecThrCtx (NULL), + m_pLastDecThrCtx (NULL), + m_iLastBufferedIdx (0) { #ifdef OUTPUT_BIT_STREAM char chFileName[1024] = { 0 }; //for .264 int iBufUsed = 0; @@ -120,11 +165,18 @@ CWelsDecoder::CWelsDecoder (void) WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "CWelsDecoder::CWelsDecoder() entry"); } - for (int32_t i = 0; i < 16; ++i) { - m_sPictInfoList[i].bLastGOP = false; - m_sPictInfoList[i].iPOC = sIMinInt32; + ResetReorderingPictureBuffers (&m_sReoderingStatus, m_sPictInfoList, true); + + m_iCpuCount = GetCPUCount(); + if (m_iCpuCount > WELS_DEC_MAX_NUM_CPU) { + m_iCpuCount = WELS_DEC_MAX_NUM_CPU; } + m_pDecThrCtx = new SWelsDecoderThreadCTX[m_iCtxCount]; + memset (m_pDecThrCtx, 0, sizeof (SWelsDecoderThreadCTX)*m_iCtxCount); + for (int32_t i = 0; i < WELS_DEC_MAX_NUM_CPU; ++i) { + m_pDecThrCtxActive[i] = NULL; + } #ifdef OUTPUT_BIT_STREAM SWelsTime sCurTime; @@ -180,7 +232,7 @@ CWelsDecoder::~CWelsDecoder() { if (m_pWelsTrace != NULL) { WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "CWelsDecoder::~CWelsDecoder()"); } - + CloseDecoderThreads(); UninitDecoder(); #ifdef OUTPUT_BIT_STREAM @@ -198,6 +250,10 @@ CWelsDecoder::~CWelsDecoder() { delete m_pWelsTrace; m_pWelsTrace = NULL; } + if (m_pDecThrCtx != NULL) { + delete[] m_pDecThrCtx; + m_pDecThrCtx = NULL; + } } long CWelsDecoder::Initialize (const SDecodingParam* pParam) { @@ -226,26 +282,88 @@ long CWelsDecoder::Uninitialize() { } void CWelsDecoder::UninitDecoder (void) { - if (NULL == m_pDecContext) - return; + for (int32_t i = 0; i < m_iCtxCount; ++i) { + if (m_pDecThrCtx[i].pCtx != NULL) { + if (i > 0) { + WelsResetRefPicWithoutUnRef (m_pDecThrCtx[i].pCtx); + } + UninitDecoderCtx (m_pDecThrCtx[i].pCtx); + } + } +} + +void CWelsDecoder::OpenDecoderThreads() { + if (m_iThreadCount >= 1) { + m_uiDecodeTimeStamp = 0; + CREATE_SEMAPHORE (&m_sIsBusy, m_iThreadCount, m_iThreadCount, NULL); + WelsMutexInit (&m_csDecoder); + CREATE_EVENT (&m_sBufferingEvent, 1, 0, NULL); + SET_EVENT (&m_sBufferingEvent); + CREATE_EVENT (&m_sReleaseBufferEvent, 1, 0, NULL); + SET_EVENT (&m_sReleaseBufferEvent); + for (int32_t i = 0; i < m_iThreadCount; ++i) { + m_pDecThrCtx[i].sThreadInfo.uiThrMaxNum = m_iThreadCount; + m_pDecThrCtx[i].sThreadInfo.uiThrNum = i; + m_pDecThrCtx[i].sThreadInfo.uiThrStackSize = WELS_DEC_MAX_THREAD_STACK_SIZE; + m_pDecThrCtx[i].sThreadInfo.pThrProcMain = pThrProcFrame; + m_pDecThrCtx[i].sThreadInfo.sIsBusy = &m_sIsBusy; + m_pDecThrCtx[i].sThreadInfo.uiCommand = WELS_DEC_THREAD_COMMAND_RUN; + m_pDecThrCtx[i].threadCtxOwner = this; + m_pDecThrCtx[i].kpSrc = NULL; + m_pDecThrCtx[i].kiSrcLen = 0; + m_pDecThrCtx[i].ppDst = NULL; + m_pDecThrCtx[i].pDec = NULL; + CREATE_EVENT (&m_pDecThrCtx[i].sImageReady, 1, 0, NULL); + CREATE_EVENT (&m_pDecThrCtx[i].sSliceDecodeStart, 1, 0, NULL); + CREATE_EVENT (&m_pDecThrCtx[i].sSliceDecodeFinish, 1, 0, NULL); + CREATE_SEMAPHORE (&m_pDecThrCtx[i].sThreadInfo.sIsIdle, 0, 1, NULL); + CREATE_SEMAPHORE (&m_pDecThrCtx[i].sThreadInfo.sIsActivated, 0, 1, NULL); + CREATE_THREAD (&m_pDecThrCtx[i].sThreadInfo.sThrHandle, pThrProcInit, (void*) (& (m_pDecThrCtx[i]))); + } + } +} +void CWelsDecoder::CloseDecoderThreads() { + if (m_iThreadCount >= 1) { + for (int32_t i = 0; i < m_iThreadCount; i++) { //waiting the completion begun slices + WAIT_SEMAPHORE (&m_pDecThrCtx[i].sThreadInfo.sIsIdle, WELS_DEC_THREAD_WAIT_INFINITE); + m_pDecThrCtx[i].sThreadInfo.uiCommand = WELS_DEC_THREAD_COMMAND_ABORT; + RELEASE_SEMAPHORE (&m_pDecThrCtx[i].sThreadInfo.sIsActivated); + WAIT_THREAD (&m_pDecThrCtx[i].sThreadInfo.sThrHandle); + CLOSE_EVENT (&m_pDecThrCtx[i].sImageReady); + CLOSE_EVENT (&m_pDecThrCtx[i].sSliceDecodeStart); + CLOSE_EVENT (&m_pDecThrCtx[i].sSliceDecodeFinish); + CLOSE_SEMAPHORE (&m_pDecThrCtx[i].sThreadInfo.sIsIdle); + CLOSE_SEMAPHORE (&m_pDecThrCtx[i].sThreadInfo.sIsActivated); + } + WelsMutexDestroy (&m_csDecoder); + CLOSE_EVENT (&m_sBufferingEvent); + CLOSE_EVENT (&m_sReleaseBufferEvent); + CLOSE_SEMAPHORE (&m_sIsBusy); + } +} - WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "CWelsDecoder::UninitDecoder(), openh264 codec version = %s.", - VERSION_NUMBER); +void CWelsDecoder::UninitDecoderCtx (PWelsDecoderContext& pCtx) { + if (pCtx != NULL) { - WelsEndDecoder (m_pDecContext); + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "CWelsDecoder::UninitDecoderCtx(), openh264 codec version = %s.", + VERSION_NUMBER); - if (m_pDecContext->pMemAlign != NULL) { - WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, - "CWelsDecoder::UninitDecoder(), verify memory usage (%d bytes) after free..", - m_pDecContext->pMemAlign->WelsGetMemoryUsage()); - delete m_pDecContext->pMemAlign; - m_pDecContext->pMemAlign = NULL; - } + WelsEndDecoder (pCtx); - if (NULL != m_pDecContext) { - WelsFree (m_pDecContext, "m_pDecContext"); + if (pCtx->pMemAlign != NULL) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsDecoder::UninitDecoder(), verify memory usage (%d bytes) after free..", + pCtx->pMemAlign->WelsGetMemoryUsage()); + delete pCtx->pMemAlign; + pCtx->pMemAlign = NULL; + } - m_pDecContext = NULL; + if (NULL != pCtx) { + WelsFree (pCtx, "m_pDecContext"); + + pCtx = NULL; + } + if (m_iCtxCount <= 1) m_pDecThrCtx[0].pCtx = NULL; } } @@ -255,43 +373,97 @@ int32_t CWelsDecoder::InitDecoder (const SDecodingParam* pParam) { WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "CWelsDecoder::init_decoder(), openh264 codec version = %s, ParseOnly = %d", VERSION_NUMBER, (int32_t)pParam->bParseOnly); + if (m_iThreadCount >= 1 && pParam->bParseOnly) { + m_iThreadCount = 0; + } + OpenDecoderThreads(); + //reset decoder context + memset (&m_sDecoderStatistics, 0, sizeof (SDecoderStatistics)); + memset (&m_sLastDecPicInfo, 0, sizeof (SWelsLastDecPicInfo)); + memset (&m_sVlcTable, 0, sizeof (SVlcTable)); + UninitDecoder(); + WelsDecoderLastDecPicInfoDefaults (m_sLastDecPicInfo); + for (int32_t i = 0; i < m_iCtxCount; ++i) { + InitDecoderCtx (m_pDecThrCtx[i].pCtx, pParam); + if (m_iThreadCount >= 1) { + m_pDecThrCtx[i].pCtx->pThreadCtx = &m_pDecThrCtx[i]; + } + } + m_bParamSetsLostFlag = false; + m_bFreezeOutput = false; + return cmResultSuccess; +} + +// the return value of this function is not suitable, it need report failure info to upper layer. +int32_t CWelsDecoder::InitDecoderCtx (PWelsDecoderContext& pCtx, const SDecodingParam* pParam) { + + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsDecoder::init_decoder(), openh264 codec version = %s, ParseOnly = %d", + VERSION_NUMBER, (int32_t)pParam->bParseOnly); //reset decoder context - if (m_pDecContext) //free - UninitDecoder(); - m_pDecContext = (PWelsDecoderContext)WelsMallocz (sizeof (SWelsDecoderContext), "m_pDecContext"); - if (NULL == m_pDecContext) + UninitDecoderCtx (pCtx); + pCtx = (PWelsDecoderContext)WelsMallocz (sizeof (SWelsDecoderContext), "m_pDecContext"); + if (NULL == pCtx) return cmMallocMemeError; int32_t iCacheLineSize = 16; // on chip cache line size in byte - m_pDecContext->pMemAlign = new CMemoryAlign (iCacheLineSize); - WELS_VERIFY_RETURN_PROC_IF (cmMallocMemeError, (NULL == m_pDecContext->pMemAlign), UninitDecoder()) - + pCtx->pMemAlign = new CMemoryAlign (iCacheLineSize); + WELS_VERIFY_RETURN_PROC_IF (cmMallocMemeError, (NULL == pCtx->pMemAlign), UninitDecoderCtx (pCtx)) + if (m_iCtxCount <= 1) m_pDecThrCtx[0].pCtx = pCtx; //fill in default value into context - WelsDecoderDefaults (m_pDecContext, &m_pWelsTrace->m_sLogCtx); - + pCtx->pLastDecPicInfo = &m_sLastDecPicInfo; + pCtx->pDecoderStatistics = &m_sDecoderStatistics; + pCtx->pVlcTable = &m_sVlcTable; + pCtx->pPictInfoList = m_sPictInfoList; + pCtx->pPictReoderingStatus = &m_sReoderingStatus; + pCtx->pCsDecoder = &m_csDecoder; + WelsDecoderDefaults (pCtx, &m_pWelsTrace->m_sLogCtx); + WelsDecoderSpsPpsDefaults (pCtx->sSpsPpsCtx); //check param and update decoder context - m_pDecContext->pParam = (SDecodingParam*)m_pDecContext->pMemAlign->WelsMallocz (sizeof (SDecodingParam), - "SDecodingParam"); - WELS_VERIFY_RETURN_PROC_IF (cmMallocMemeError, (NULL == m_pDecContext->pParam), UninitDecoder()); - int32_t iRet = DecoderConfigParam (m_pDecContext, pParam); + pCtx->pParam = (SDecodingParam*)pCtx->pMemAlign->WelsMallocz (sizeof (SDecodingParam), + "SDecodingParam"); + WELS_VERIFY_RETURN_PROC_IF (cmMallocMemeError, (NULL == pCtx->pParam), UninitDecoderCtx (pCtx)); + int32_t iRet = DecoderConfigParam (pCtx, pParam); WELS_VERIFY_RETURN_IFNEQ (iRet, cmResultSuccess); //init decoder - WELS_VERIFY_RETURN_PROC_IF (cmMallocMemeError, WelsInitDecoder (m_pDecContext, &m_pWelsTrace->m_sLogCtx), - UninitDecoder()) - + WELS_VERIFY_RETURN_PROC_IF (cmMallocMemeError, WelsInitDecoder (pCtx, &m_pWelsTrace->m_sLogCtx), + UninitDecoderCtx (pCtx)) + pCtx->pPicBuff = NULL; return cmResultSuccess; } -int32_t CWelsDecoder::ResetDecoder() { +int32_t CWelsDecoder::ResetDecoder (PWelsDecoderContext& pCtx) { // TBC: need to be modified when context and trace point are null - if (m_pDecContext != NULL && m_pWelsTrace != NULL) { - WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "ResetDecoder(), context error code is %d", - m_pDecContext->iErrorCode); - SDecodingParam sPrevParam; - memcpy (&sPrevParam, m_pDecContext->pParam, sizeof (SDecodingParam)); + if (m_iThreadCount >= 1) { + ThreadResetDecoder (pCtx); + } else { + if (pCtx != NULL && m_pWelsTrace != NULL) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "ResetDecoder(), context error code is %d", + pCtx->iErrorCode); + SDecodingParam sPrevParam; + memcpy (&sPrevParam, pCtx->pParam, sizeof (SDecodingParam)); + + WELS_VERIFY_RETURN_PROC_IF (cmInitParaError, InitDecoderCtx (pCtx, &sPrevParam), + UninitDecoderCtx (pCtx)); + } else if (m_pWelsTrace != NULL) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "ResetDecoder() failed as decoder context null"); + } + ResetReorderingPictureBuffers (&m_sReoderingStatus, m_sPictInfoList, false); + } + return ERR_INFO_UNINIT; +} - WELS_VERIFY_RETURN_PROC_IF (cmInitParaError, InitDecoder (&sPrevParam), UninitDecoder()); +int32_t CWelsDecoder::ThreadResetDecoder (PWelsDecoderContext& pCtx) { + // TBC: need to be modified when context and trace point are null + SDecodingParam sPrevParam; + if (pCtx != NULL && m_pWelsTrace != NULL) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "ResetDecoder(), context error code is %d", pCtx->iErrorCode); + memcpy (&sPrevParam, pCtx->pParam, sizeof (SDecodingParam)); + ResetReorderingPictureBuffers (&m_sReoderingStatus, m_sPictInfoList, true); + CloseDecoderThreads(); + UninitDecoder(); + InitDecoder (&sPrevParam); } else if (m_pWelsTrace != NULL) { WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "ResetDecoder() failed as decoder context null"); } @@ -303,71 +475,100 @@ int32_t CWelsDecoder::ResetDecoder() { */ long CWelsDecoder::SetOption (DECODER_OPTION eOptID, void* pOption) { int iVal = 0; + if (eOptID == DECODER_OPTION_NUM_OF_THREADS) { + if (pOption != NULL) { + int32_t threadCount = * ((int32_t*)pOption); + if (threadCount < 0) threadCount = 0; + if (threadCount > m_iCpuCount) { + threadCount = m_iCpuCount; + } + if (threadCount > 3) { + threadCount = 3; + } + if (threadCount != m_iThreadCount) { + m_iThreadCount = threadCount; + if (m_pDecThrCtx != NULL) { + delete [] m_pDecThrCtx; + m_iCtxCount = m_iThreadCount == 0 ? 1 : m_iThreadCount; + m_pDecThrCtx = new SWelsDecoderThreadCTX[m_iCtxCount]; + memset (m_pDecThrCtx, 0, sizeof (SWelsDecoderThreadCTX)*m_iCtxCount); + } + } + } + return cmResultSuccess; + } + for (int32_t i = 0; i < m_iCtxCount; ++i) { + PWelsDecoderContext pDecContext = m_pDecThrCtx[i].pCtx; + if (pDecContext == NULL && eOptID != DECODER_OPTION_TRACE_LEVEL && + eOptID != DECODER_OPTION_TRACE_CALLBACK && eOptID != DECODER_OPTION_TRACE_CALLBACK_CONTEXT) + return dsInitialOptExpected; + if (eOptID == DECODER_OPTION_END_OF_STREAM) { // Indicate bit-stream of the final frame to be decoded + if (pOption == NULL) + return cmInitParaError; - if (m_pDecContext == NULL && eOptID != DECODER_OPTION_TRACE_LEVEL && - eOptID != DECODER_OPTION_TRACE_CALLBACK && eOptID != DECODER_OPTION_TRACE_CALLBACK_CONTEXT) - return dsInitialOptExpected; - if (eOptID == DECODER_OPTION_END_OF_STREAM) { // Indicate bit-stream of the final frame to be decoded - if (pOption == NULL) - return cmInitParaError; + iVal = * ((int*)pOption); // boolean value for whether enabled End Of Stream flag - iVal = * ((int*)pOption); // boolean value for whether enabled End Of Stream flag + if (pDecContext == NULL) return dsInitialOptExpected; - m_pDecContext->bEndOfStreamFlag = iVal ? true : false; + pDecContext->bEndOfStreamFlag = iVal ? true : false; - return cmResultSuccess; - } else if (eOptID == DECODER_OPTION_ERROR_CON_IDC) { // Indicate error concealment status - if (pOption == NULL) - return cmInitParaError; + return cmResultSuccess; + } else if (eOptID == DECODER_OPTION_ERROR_CON_IDC) { // Indicate error concealment status + if (pOption == NULL) + return cmInitParaError; + + if (pDecContext == NULL) return dsInitialOptExpected; + + iVal = * ((int*)pOption); // int value for error concealment idc + iVal = WELS_CLIP3 (iVal, (int32_t)ERROR_CON_DISABLE, (int32_t)ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE); + if ((pDecContext->pParam->bParseOnly) && (iVal != (int32_t)ERROR_CON_DISABLE)) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsDecoder::SetOption for ERROR_CON_IDC = %d not allowd for parse only!.", iVal); + return cmInitParaError; + } - iVal = * ((int*)pOption); // int value for error concealment idc - iVal = WELS_CLIP3 (iVal, (int32_t)ERROR_CON_DISABLE, (int32_t)ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE); - if ((m_pDecContext->pParam->bParseOnly) && (iVal != (int32_t)ERROR_CON_DISABLE)) { + pDecContext->pParam->eEcActiveIdc = (ERROR_CON_IDC)iVal; + InitErrorCon (pDecContext); WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, - "CWelsDecoder::SetOption for ERROR_CON_IDC = %d not allowd for parse only!.", iVal); - return cmInitParaError; - } - - m_pDecContext->pParam->eEcActiveIdc = (ERROR_CON_IDC)iVal; - InitErrorCon (m_pDecContext); - WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, - "CWelsDecoder::SetOption for ERROR_CON_IDC = %d.", iVal); + "CWelsDecoder::SetOption for ERROR_CON_IDC = %d.", iVal); - return cmResultSuccess; - } else if (eOptID == DECODER_OPTION_TRACE_LEVEL) { - if (m_pWelsTrace) { - uint32_t level = * ((uint32_t*)pOption); - m_pWelsTrace->SetTraceLevel (level); - } - return cmResultSuccess; - } else if (eOptID == DECODER_OPTION_TRACE_CALLBACK) { - if (m_pWelsTrace) { - WelsTraceCallback callback = * ((WelsTraceCallback*)pOption); - m_pWelsTrace->SetTraceCallback (callback); - WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, - "CWelsDecoder::SetOption():DECODER_OPTION_TRACE_CALLBACK callback = %p.", - callback); - } - return cmResultSuccess; - } else if (eOptID == DECODER_OPTION_TRACE_CALLBACK_CONTEXT) { - if (m_pWelsTrace) { - void* ctx = * ((void**)pOption); - m_pWelsTrace->SetTraceCallbackContext (ctx); - } - return cmResultSuccess; - } else if (eOptID == DECODER_OPTION_GET_STATISTICS) { - WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_WARNING, - "CWelsDecoder::SetOption():DECODER_OPTION_GET_STATISTICS: this option is get-only!"); - return cmInitParaError; - } else if (eOptID == DECODER_OPTION_STATISTICS_LOG_INTERVAL) { - if (pOption) { - m_pDecContext->sDecoderStatistics.iStatisticsLogInterval = (* ((unsigned int*)pOption)); return cmResultSuccess; + } else if (eOptID == DECODER_OPTION_TRACE_LEVEL) { + if (m_pWelsTrace) { + uint32_t level = * ((uint32_t*)pOption); + m_pWelsTrace->SetTraceLevel (level); + } + return cmResultSuccess; + } else if (eOptID == DECODER_OPTION_TRACE_CALLBACK) { + if (m_pWelsTrace) { + WelsTraceCallback callback = * ((WelsTraceCallback*)pOption); + m_pWelsTrace->SetTraceCallback (callback); + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, + "CWelsDecoder::SetOption():DECODER_OPTION_TRACE_CALLBACK callback = %p.", + callback); + } + return cmResultSuccess; + } else if (eOptID == DECODER_OPTION_TRACE_CALLBACK_CONTEXT) { + if (m_pWelsTrace) { + void* ctx = * ((void**)pOption); + m_pWelsTrace->SetTraceCallbackContext (ctx); + } + return cmResultSuccess; + } else if (eOptID == DECODER_OPTION_GET_STATISTICS) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_WARNING, + "CWelsDecoder::SetOption():DECODER_OPTION_GET_STATISTICS: this option is get-only!"); + return cmInitParaError; + } else if (eOptID == DECODER_OPTION_STATISTICS_LOG_INTERVAL) { + if (pOption) { + if (pDecContext == NULL) return dsInitialOptExpected; + pDecContext->pDecoderStatistics->iStatisticsLogInterval = (* ((unsigned int*)pOption)); + return cmResultSuccess; + } + } else if (eOptID == DECODER_OPTION_GET_SAR_INFO) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_WARNING, + "CWelsDecoder::SetOption():DECODER_OPTION_GET_SAR_INFO: this option is get-only!"); + return cmInitParaError; } - } else if (eOptID == DECODER_OPTION_GET_SAR_INFO) { - WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_WARNING, - "CWelsDecoder::SetOption():DECODER_OPTION_GET_SAR_INFO: this option is get-only!"); - return cmInitParaError; } return cmInitParaError; } @@ -377,105 +578,109 @@ long CWelsDecoder::SetOption (DECODER_OPTION eOptID, void* pOption) { */ long CWelsDecoder::GetOption (DECODER_OPTION eOptID, void* pOption) { int iVal = 0; - - if (m_pDecContext == NULL) + if (DECODER_OPTION_NUM_OF_THREADS == eOptID) { + * ((int*)pOption) = m_iThreadCount; + return cmResultSuccess; + } + PWelsDecoderContext pDecContext = m_pDecThrCtx[0].pCtx; + if (pDecContext == NULL) return cmInitExpected; if (pOption == NULL) return cmInitParaError; if (DECODER_OPTION_END_OF_STREAM == eOptID) { - iVal = m_pDecContext->bEndOfStreamFlag; + iVal = pDecContext->bEndOfStreamFlag; * ((int*)pOption) = iVal; return cmResultSuccess; } #ifdef LONG_TERM_REF else if (DECODER_OPTION_IDR_PIC_ID == eOptID) { - iVal = m_pDecContext->uiCurIdrPicId; + iVal = pDecContext->uiCurIdrPicId; * ((int*)pOption) = iVal; return cmResultSuccess; } else if (DECODER_OPTION_FRAME_NUM == eOptID) { - iVal = m_pDecContext->iFrameNum; + iVal = pDecContext->iFrameNum; * ((int*)pOption) = iVal; return cmResultSuccess; } else if (DECODER_OPTION_LTR_MARKING_FLAG == eOptID) { - iVal = m_pDecContext->bCurAuContainLtrMarkSeFlag; + iVal = pDecContext->bCurAuContainLtrMarkSeFlag; * ((int*)pOption) = iVal; return cmResultSuccess; } else if (DECODER_OPTION_LTR_MARKED_FRAME_NUM == eOptID) { - iVal = m_pDecContext->iFrameNumOfAuMarkedLtr; + iVal = pDecContext->iFrameNumOfAuMarkedLtr; * ((int*)pOption) = iVal; return cmResultSuccess; } #endif else if (DECODER_OPTION_VCL_NAL == eOptID) { //feedback whether or not have VCL NAL in current AU - iVal = m_pDecContext->iFeedbackVclNalInAu; + iVal = pDecContext->iFeedbackVclNalInAu; * ((int*)pOption) = iVal; return cmResultSuccess; } else if (DECODER_OPTION_TEMPORAL_ID == eOptID) { //if have VCL NAL in current AU, then feedback the temporal ID - iVal = m_pDecContext->iFeedbackTidInAu; + iVal = pDecContext->iFeedbackTidInAu; * ((int*)pOption) = iVal; return cmResultSuccess; } else if (DECODER_OPTION_IS_REF_PIC == eOptID) { - iVal = m_pDecContext->iFeedbackNalRefIdc; + iVal = pDecContext->iFeedbackNalRefIdc; if (iVal > 0) iVal = 1; * ((int*)pOption) = iVal; return cmResultSuccess; } else if (DECODER_OPTION_ERROR_CON_IDC == eOptID) { - iVal = (int)m_pDecContext->pParam->eEcActiveIdc; + iVal = (int)pDecContext->pParam->eEcActiveIdc; * ((int*)pOption) = iVal; return cmResultSuccess; } else if (DECODER_OPTION_GET_STATISTICS == eOptID) { // get decoder statistics info for real time debugging SDecoderStatistics* pDecoderStatistics = (static_cast<SDecoderStatistics*> (pOption)); - memcpy (pDecoderStatistics, &m_pDecContext->sDecoderStatistics, sizeof (SDecoderStatistics)); + memcpy (pDecoderStatistics, pDecContext->pDecoderStatistics, sizeof (SDecoderStatistics)); - if (m_pDecContext->sDecoderStatistics.uiDecodedFrameCount != 0) { //not original status - pDecoderStatistics->fAverageFrameSpeedInMs = (float) (m_pDecContext->dDecTime) / - (m_pDecContext->sDecoderStatistics.uiDecodedFrameCount); - pDecoderStatistics->fActualAverageFrameSpeedInMs = (float) (m_pDecContext->dDecTime) / - (m_pDecContext->sDecoderStatistics.uiDecodedFrameCount + m_pDecContext->sDecoderStatistics.uiFreezingIDRNum + - m_pDecContext->sDecoderStatistics.uiFreezingNonIDRNum); + if (pDecContext->pDecoderStatistics->uiDecodedFrameCount != 0) { //not original status + pDecoderStatistics->fAverageFrameSpeedInMs = (float) (pDecContext->dDecTime) / + (pDecContext->pDecoderStatistics->uiDecodedFrameCount); + pDecoderStatistics->fActualAverageFrameSpeedInMs = (float) (pDecContext->dDecTime) / + (pDecContext->pDecoderStatistics->uiDecodedFrameCount + pDecContext->pDecoderStatistics->uiFreezingIDRNum + + pDecContext->pDecoderStatistics->uiFreezingNonIDRNum); } return cmResultSuccess; } else if (eOptID == DECODER_OPTION_STATISTICS_LOG_INTERVAL) { if (pOption) { - iVal = m_pDecContext->sDecoderStatistics.iStatisticsLogInterval; + iVal = pDecContext->pDecoderStatistics->iStatisticsLogInterval; * ((unsigned int*)pOption) = iVal; return cmResultSuccess; } } else if (DECODER_OPTION_GET_SAR_INFO == eOptID) { //get decoder SAR info in VUI PVuiSarInfo pVuiSarInfo = (static_cast<PVuiSarInfo> (pOption)); memset (pVuiSarInfo, 0, sizeof (SVuiSarInfo)); - if (!m_pDecContext->pSps) { + if (!pDecContext->pSps) { return cmInitExpected; } else { - pVuiSarInfo->uiSarWidth = m_pDecContext->pSps->sVui.uiSarWidth; - pVuiSarInfo->uiSarHeight = m_pDecContext->pSps->sVui.uiSarHeight; - pVuiSarInfo->bOverscanAppropriateFlag = m_pDecContext->pSps->sVui.bOverscanAppropriateFlag; + pVuiSarInfo->uiSarWidth = pDecContext->pSps->sVui.uiSarWidth; + pVuiSarInfo->uiSarHeight = pDecContext->pSps->sVui.uiSarHeight; + pVuiSarInfo->bOverscanAppropriateFlag = pDecContext->pSps->sVui.bOverscanAppropriateFlag; return cmResultSuccess; } } else if (DECODER_OPTION_PROFILE == eOptID) { - if (!m_pDecContext->pSps) { + if (!pDecContext->pSps) { return cmInitExpected; } - iVal = (int)m_pDecContext->pSps->uiProfileIdc; + iVal = (int)pDecContext->pSps->uiProfileIdc; * ((int*)pOption) = iVal; return cmResultSuccess; } else if (DECODER_OPTION_LEVEL == eOptID) { - if (!m_pDecContext->pSps) { + if (!pDecContext->pSps) { return cmInitExpected; } - iVal = (int)m_pDecContext->pSps->uiLevelIdc; + iVal = (int)pDecContext->pSps->uiLevelIdc; * ((int*)pOption) = iVal; return cmResultSuccess; } else if (DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER == eOptID) { - if (m_pDecContext->pSps && m_pDecContext->pSps->uiProfileIdc != 66 && m_pDecContext->pPps->bEntropyCodingModeFlag) { - * ((int*)pOption) = m_iNumOfPicts > 0 ? m_iNumOfPicts : 0; - } else { - * ((int*)pOption) = 0; + for (int32_t activeThread = 0; activeThread < m_DecCtxActiveCount; ++activeThread) { + WAIT_SEMAPHORE (&m_pDecThrCtxActive[activeThread]->sThreadInfo.sIsIdle, WELS_DEC_THREAD_WAIT_INFINITE); + RELEASE_SEMAPHORE (&m_pDecThrCtxActive[activeThread]->sThreadInfo.sIsIdle); } + * ((int*)pOption) = m_sReoderingStatus.iNumOfPicts; return cmResultSuccess; } @@ -486,7 +691,17 @@ DECODING_STATE CWelsDecoder::DecodeFrameNoDelay (const unsigned char* kpSrc, const int kiSrcLen, unsigned char** ppDst, SBufferInfo* pDstInfo) { - int iRet; + int iRet = dsErrorFree; + if (m_iThreadCount >= 1) { + iRet = ThreadDecodeFrameInternal (kpSrc, kiSrcLen, ppDst, pDstInfo); + if (m_sReoderingStatus.iNumOfPicts) { + WAIT_EVENT (&m_sBufferingEvent, WELS_DEC_THREAD_WAIT_INFINITE); + RESET_EVENT (&m_sReleaseBufferEvent); + ReleaseBufferedReadyPicture (NULL, ppDst, pDstInfo); + SET_EVENT (&m_sReleaseBufferEvent); + } + return (DECODING_STATE)iRet; + } //SBufferInfo sTmpBufferInfo; //unsigned char* ppTmpDst[3] = {NULL, NULL, NULL}; iRet = (int)DecodeFrame2 (kpSrc, kiSrcLen, ppDst, pDstInfo); @@ -504,24 +719,24 @@ DECODING_STATE CWelsDecoder::DecodeFrameNoDelay (const unsigned char* kpSrc, return (DECODING_STATE)iRet; } -DECODING_STATE CWelsDecoder::DecodeFrame2 (const unsigned char* kpSrc, +DECODING_STATE CWelsDecoder::DecodeFrame2WithCtx (PWelsDecoderContext pDecContext, const unsigned char* kpSrc, const int kiSrcLen, unsigned char** ppDst, SBufferInfo* pDstInfo) { - if (m_pDecContext == NULL || m_pDecContext->pParam == NULL) { + if (pDecContext == NULL || pDecContext->pParam == NULL) { if (m_pWelsTrace != NULL) { WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "Call DecodeFrame2 without Initialize.\n"); } return dsInitialOptExpected; } - if (m_pDecContext->pParam->bParseOnly) { + if (pDecContext->pParam->bParseOnly) { WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "bParseOnly should be false for this API calling! \n"); - m_pDecContext->iErrorCode |= dsInvalidArgument; + pDecContext->iErrorCode |= dsInvalidArgument; return dsInvalidArgument; } - if (CheckBsBuffer (m_pDecContext, kiSrcLen)) { - if (ResetDecoder()) + if (CheckBsBuffer (pDecContext, kiSrcLen)) { + if (ResetDecoder (pDecContext)) return dsOutOfMemory; return dsErrorFree; @@ -537,163 +752,243 @@ DECODING_STATE CWelsDecoder::DecodeFrame2 (const unsigned char* kpSrc, WelsFflush (m_pFBSSize); } #endif//OUTPUT_BIT_STREAM - m_pDecContext->bEndOfStreamFlag = false; + pDecContext->bEndOfStreamFlag = false; + if (GetThreadCount (pDecContext) <= 0) { + pDecContext->uiDecodingTimeStamp = ++m_uiDecodeTimeStamp; + } } else { //For application MODE, the error detection should be added for safe. //But for CONSOLE MODE, when decoding LAST AU, kiSrcLen==0 && kpSrc==NULL. - m_pDecContext->bEndOfStreamFlag = true; - m_pDecContext->bInstantDecFlag = true; + pDecContext->bEndOfStreamFlag = true; + pDecContext->bInstantDecFlag = true; } int64_t iStart, iEnd; iStart = WelsTime(); - ppDst[0] = ppDst[1] = ppDst[2] = NULL; - m_pDecContext->iErrorCode = dsErrorFree; //initialize at the starting of AU decoding. - m_pDecContext->iFeedbackVclNalInAu = FEEDBACK_UNKNOWN_NAL; //initialize + if (GetThreadCount (pDecContext) <= 1) { + ppDst[0] = ppDst[1] = ppDst[2] = NULL; + } + pDecContext->iErrorCode = dsErrorFree; //initialize at the starting of AU decoding. + pDecContext->iFeedbackVclNalInAu = FEEDBACK_UNKNOWN_NAL; //initialize unsigned long long uiInBsTimeStamp = pDstInfo->uiInBsTimeStamp; - memset (pDstInfo, 0, sizeof (SBufferInfo)); + if (GetThreadCount (pDecContext) <= 1) { + memset (pDstInfo, 0, sizeof (SBufferInfo)); + } pDstInfo->uiInBsTimeStamp = uiInBsTimeStamp; #ifdef LONG_TERM_REF - m_pDecContext->bReferenceLostAtT0Flag = false; //initialize for LTR - m_pDecContext->bCurAuContainLtrMarkSeFlag = false; - m_pDecContext->iFrameNumOfAuMarkedLtr = 0; - m_pDecContext->iFrameNum = -1; //initialize + pDecContext->bReferenceLostAtT0Flag = false; //initialize for LTR + pDecContext->bCurAuContainLtrMarkSeFlag = false; + pDecContext->iFrameNumOfAuMarkedLtr = 0; + pDecContext->iFrameNum = -1; //initialize #endif - m_pDecContext->iFeedbackTidInAu = -1; //initialize - m_pDecContext->iFeedbackNalRefIdc = -1; //initialize + pDecContext->iFeedbackTidInAu = -1; //initialize + pDecContext->iFeedbackNalRefIdc = -1; //initialize if (pDstInfo) { pDstInfo->uiOutYuvTimeStamp = 0; - m_pDecContext->uiTimeStamp = pDstInfo->uiInBsTimeStamp; + pDecContext->uiTimeStamp = pDstInfo->uiInBsTimeStamp; } else { - m_pDecContext->uiTimeStamp = 0; + pDecContext->uiTimeStamp = 0; } - WelsDecodeBs (m_pDecContext, kpSrc, kiSrcLen, ppDst, + WelsDecodeBs (pDecContext, kpSrc, kiSrcLen, ppDst, pDstInfo, NULL); //iErrorCode has been modified in this function - m_pDecContext->bInstantDecFlag = false; //reset no-delay flag - if (m_pDecContext->iErrorCode) { + pDecContext->bInstantDecFlag = false; //reset no-delay flag + if (pDecContext->iErrorCode) { EWelsNalUnitType eNalType = NAL_UNIT_UNSPEC_0; //for NBR, IDR frames are expected to decode as followed if error decoding an IDR currently - eNalType = m_pDecContext->sCurNalHead.eNalUnitType; - - if (m_pDecContext->iErrorCode & dsOutOfMemory) { - if (ResetDecoder()) + eNalType = pDecContext->sCurNalHead.eNalUnitType; + if (pDecContext->iErrorCode & dsOutOfMemory) { + if (ResetDecoder (pDecContext)) { return dsOutOfMemory; - + } + return dsErrorFree; + } + if (pDecContext->iErrorCode & dsRefListNullPtrs) { + if (ResetDecoder (pDecContext)) { + return dsRefListNullPtrs; + } + return dsErrorFree; + } + if ((pDecContext->iErrorCode & (dsBitstreamError | dsDataErrorConcealed)) && pDecContext->eSliceType == B_SLICE) { + if (ResetDecoder (pDecContext)) { + pDstInfo->iBufferStatus = 0; + return (DECODING_STATE)pDecContext->iErrorCode; + } return dsErrorFree; } //for AVC bitstream (excluding AVC with temporal scalability, including TP), as long as error occur, SHOULD notify upper layer key frame loss. if ((IS_PARAM_SETS_NALS (eNalType) || NAL_UNIT_CODED_SLICE_IDR == eNalType) || - (VIDEO_BITSTREAM_AVC == m_pDecContext->eVideoType)) { - if (m_pDecContext->pParam->eEcActiveIdc == ERROR_CON_DISABLE) { + (VIDEO_BITSTREAM_AVC == pDecContext->eVideoType)) { + if (pDecContext->pParam->eEcActiveIdc == ERROR_CON_DISABLE) { #ifdef LONG_TERM_REF - m_pDecContext->bParamSetsLostFlag = true; + pDecContext->bParamSetsLostFlag = true; #else - m_pDecContext->bReferenceLostAtT0Flag = true; + pDecContext->bReferenceLostAtT0Flag = true; #endif } } - if (m_pDecContext->bPrintFrameErrorTraceFlag) { + if (pDecContext->bPrintFrameErrorTraceFlag) { WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "decode failed, failure type:%d \n", - m_pDecContext->iErrorCode); - m_pDecContext->bPrintFrameErrorTraceFlag = false; + pDecContext->iErrorCode); + pDecContext->bPrintFrameErrorTraceFlag = false; } else { - m_pDecContext->iIgnoredErrorInfoPacketCount++; - if (m_pDecContext->iIgnoredErrorInfoPacketCount == INT_MAX) { + pDecContext->iIgnoredErrorInfoPacketCount++; + if (pDecContext->iIgnoredErrorInfoPacketCount == INT_MAX) { WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_WARNING, "continuous error reached INT_MAX! Restart as 0."); - m_pDecContext->iIgnoredErrorInfoPacketCount = 0; + pDecContext->iIgnoredErrorInfoPacketCount = 0; } } - if ((m_pDecContext->pParam->eEcActiveIdc != ERROR_CON_DISABLE) && (pDstInfo->iBufferStatus == 1)) { + if ((pDecContext->pParam->eEcActiveIdc != ERROR_CON_DISABLE) && (pDstInfo->iBufferStatus == 1)) { //TODO after dec status updated - m_pDecContext->iErrorCode |= dsDataErrorConcealed; + pDecContext->iErrorCode |= dsDataErrorConcealed; - m_pDecContext->sDecoderStatistics.uiDecodedFrameCount++; - if (m_pDecContext->sDecoderStatistics.uiDecodedFrameCount == 0) { //exceed max value of uint32_t - ResetDecStatNums (&m_pDecContext->sDecoderStatistics); - m_pDecContext->sDecoderStatistics.uiDecodedFrameCount++; + pDecContext->pDecoderStatistics->uiDecodedFrameCount++; + if (pDecContext->pDecoderStatistics->uiDecodedFrameCount == 0) { //exceed max value of uint32_t + ResetDecStatNums (pDecContext->pDecoderStatistics); + pDecContext->pDecoderStatistics->uiDecodedFrameCount++; } - int32_t iMbConcealedNum = m_pDecContext->iMbEcedNum + m_pDecContext->iMbEcedPropNum; - m_pDecContext->sDecoderStatistics.uiAvgEcRatio = m_pDecContext->iMbNum == 0 ? - (m_pDecContext->sDecoderStatistics.uiAvgEcRatio * m_pDecContext->sDecoderStatistics.uiEcFrameNum) : (( - m_pDecContext->sDecoderStatistics.uiAvgEcRatio * m_pDecContext->sDecoderStatistics.uiEcFrameNum) + (( - iMbConcealedNum * 100) / m_pDecContext->iMbNum)); - m_pDecContext->sDecoderStatistics.uiAvgEcPropRatio = m_pDecContext->iMbNum == 0 ? - (m_pDecContext->sDecoderStatistics.uiAvgEcPropRatio * m_pDecContext->sDecoderStatistics.uiEcFrameNum) : (( - m_pDecContext->sDecoderStatistics.uiAvgEcPropRatio * m_pDecContext->sDecoderStatistics.uiEcFrameNum) + (( - m_pDecContext->iMbEcedPropNum * 100) / m_pDecContext->iMbNum)); - m_pDecContext->sDecoderStatistics.uiEcFrameNum += (iMbConcealedNum == 0 ? 0 : 1); - m_pDecContext->sDecoderStatistics.uiAvgEcRatio = m_pDecContext->sDecoderStatistics.uiEcFrameNum == 0 ? 0 : - m_pDecContext->sDecoderStatistics.uiAvgEcRatio / m_pDecContext->sDecoderStatistics.uiEcFrameNum; - m_pDecContext->sDecoderStatistics.uiAvgEcPropRatio = m_pDecContext->sDecoderStatistics.uiEcFrameNum == 0 ? 0 : - m_pDecContext->sDecoderStatistics.uiAvgEcPropRatio / m_pDecContext->sDecoderStatistics.uiEcFrameNum; + int32_t iMbConcealedNum = pDecContext->iMbEcedNum + pDecContext->iMbEcedPropNum; + pDecContext->pDecoderStatistics->uiAvgEcRatio = pDecContext->iMbNum == 0 ? + (pDecContext->pDecoderStatistics->uiAvgEcRatio * pDecContext->pDecoderStatistics->uiEcFrameNum) : (( + pDecContext->pDecoderStatistics->uiAvgEcRatio * pDecContext->pDecoderStatistics->uiEcFrameNum) + (( + iMbConcealedNum * 100) / pDecContext->iMbNum)); + pDecContext->pDecoderStatistics->uiAvgEcPropRatio = pDecContext->iMbNum == 0 ? + (pDecContext->pDecoderStatistics->uiAvgEcPropRatio * pDecContext->pDecoderStatistics->uiEcFrameNum) : (( + pDecContext->pDecoderStatistics->uiAvgEcPropRatio * pDecContext->pDecoderStatistics->uiEcFrameNum) + (( + pDecContext->iMbEcedPropNum * 100) / pDecContext->iMbNum)); + pDecContext->pDecoderStatistics->uiEcFrameNum += (iMbConcealedNum == 0 ? 0 : 1); + pDecContext->pDecoderStatistics->uiAvgEcRatio = pDecContext->pDecoderStatistics->uiEcFrameNum == 0 ? 0 : + pDecContext->pDecoderStatistics->uiAvgEcRatio / pDecContext->pDecoderStatistics->uiEcFrameNum; + pDecContext->pDecoderStatistics->uiAvgEcPropRatio = pDecContext->pDecoderStatistics->uiEcFrameNum == 0 ? 0 : + pDecContext->pDecoderStatistics->uiAvgEcPropRatio / pDecContext->pDecoderStatistics->uiEcFrameNum; } iEnd = WelsTime(); - m_pDecContext->dDecTime += (iEnd - iStart) / 1e3; + pDecContext->dDecTime += (iEnd - iStart) / 1e3; - OutputStatisticsLog (m_pDecContext->sDecoderStatistics); + OutputStatisticsLog (*pDecContext->pDecoderStatistics); -#ifdef _PICTURE_REORDERING_ - ReorderPicturesInDisplay (ppDst, pDstInfo); -#endif + if (GetThreadCount (pDecContext) >= 1) { + WAIT_EVENT (&m_sReleaseBufferEvent, WELS_DEC_THREAD_WAIT_INFINITE); + RESET_EVENT (&m_sBufferingEvent); + BufferingReadyPicture (pDecContext, ppDst, pDstInfo); + SET_EVENT (&m_sBufferingEvent); + } else { + ReorderPicturesInDisplay (pDecContext, ppDst, pDstInfo); + } - return (DECODING_STATE)m_pDecContext->iErrorCode; + return (DECODING_STATE)pDecContext->iErrorCode; } // else Error free, the current codec works well if (pDstInfo->iBufferStatus == 1) { - m_pDecContext->sDecoderStatistics.uiDecodedFrameCount++; - if (m_pDecContext->sDecoderStatistics.uiDecodedFrameCount == 0) { //exceed max value of uint32_t - ResetDecStatNums (&m_pDecContext->sDecoderStatistics); - m_pDecContext->sDecoderStatistics.uiDecodedFrameCount++; + pDecContext->pDecoderStatistics->uiDecodedFrameCount++; + if (pDecContext->pDecoderStatistics->uiDecodedFrameCount == 0) { //exceed max value of uint32_t + ResetDecStatNums (pDecContext->pDecoderStatistics); + pDecContext->pDecoderStatistics->uiDecodedFrameCount++; } - OutputStatisticsLog (m_pDecContext->sDecoderStatistics); + OutputStatisticsLog (*pDecContext->pDecoderStatistics); } iEnd = WelsTime(); - m_pDecContext->dDecTime += (iEnd - iStart) / 1e3; + pDecContext->dDecTime += (iEnd - iStart) / 1e3; -#ifdef _PICTURE_REORDERING_ - ReorderPicturesInDisplay (ppDst, pDstInfo); -#endif + if (GetThreadCount (pDecContext) >= 1) { + WAIT_EVENT (&m_sReleaseBufferEvent, WELS_DEC_THREAD_WAIT_INFINITE); + RESET_EVENT (&m_sBufferingEvent); + BufferingReadyPicture (pDecContext, ppDst, pDstInfo); + SET_EVENT (&m_sBufferingEvent); + } else { + ReorderPicturesInDisplay (pDecContext, ppDst, pDstInfo); + } return dsErrorFree; } +DECODING_STATE CWelsDecoder::DecodeFrame2 (const unsigned char* kpSrc, + const int kiSrcLen, + unsigned char** ppDst, + SBufferInfo* pDstInfo) { + PWelsDecoderContext pDecContext = m_pDecThrCtx[0].pCtx; + return DecodeFrame2WithCtx (pDecContext, kpSrc, kiSrcLen, ppDst, pDstInfo); +} + DECODING_STATE CWelsDecoder::FlushFrame (unsigned char** ppDst, SBufferInfo* pDstInfo) { - if (m_pDecContext->bEndOfStreamFlag && m_iNumOfPicts > 0) { - m_iMinPOC = sIMinInt32; - for (int32_t i = 0; i <= m_iLargestBufferedPicIndex; ++i) { - if (m_iMinPOC == sIMinInt32 && m_sPictInfoList[i].iPOC > sIMinInt32) { - m_iMinPOC = m_sPictInfoList[i].iPOC; - m_iPictInfoIndex = i; + bool bEndOfStreamFlag = true; + if (m_iThreadCount <= 1) { + for (int32_t j = 0; j < m_iCtxCount; ++j) { + if (!m_pDecThrCtx[j].pCtx->bEndOfStreamFlag) { + bEndOfStreamFlag = false; + } + } + } + if (bEndOfStreamFlag && m_sReoderingStatus.iNumOfPicts > 0) { + m_sReoderingStatus.iMinPOC = IMinInt32; + if (m_bIsBaseline) { + uint32_t uiDecodingTimeStamp = 0; + int32_t firstValidIdx = -1; + for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) { + if (m_sPictInfoList[i].iPOC > IMinInt32) { + uiDecodingTimeStamp = m_sPictInfoList[i].uiDecodingTimeStamp; + m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC; + m_sReoderingStatus.iPictInfoIndex = i; + firstValidIdx = i; + break; + } + } + for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) { + if (i == firstValidIdx) continue; + if (m_sPictInfoList[i].iPOC > IMinInt32 && m_sPictInfoList[i].uiDecodingTimeStamp < uiDecodingTimeStamp) { + uiDecodingTimeStamp = m_sPictInfoList[i].uiDecodingTimeStamp; + m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC; + m_sReoderingStatus.iPictInfoIndex = i; + } + } + } else { + int32_t firstValidIdx = -1; + for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) { + if (m_sReoderingStatus.iMinPOC == IMinInt32 && m_sPictInfoList[i].iPOC > IMinInt32) { + m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC; + m_sReoderingStatus.iPictInfoIndex = i; + firstValidIdx = i; + break; + } } - if (m_sPictInfoList[i].iPOC > sIMinInt32 && m_sPictInfoList[i].iPOC < m_iMinPOC) { - m_iMinPOC = m_sPictInfoList[i].iPOC; - m_iPictInfoIndex = i; + for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) { + if (i == firstValidIdx) continue; + if (m_sPictInfoList[i].iPOC > IMinInt32 && m_sPictInfoList[i].iPOC < m_sReoderingStatus.iMinPOC) { + m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC; + m_sReoderingStatus.iPictInfoIndex = i; + } } } } - if (m_iMinPOC > sIMinInt32) { - m_LastWrittenPOC = m_iMinPOC; + if (m_sReoderingStatus.iMinPOC > IMinInt32) { + m_sReoderingStatus.iLastWrittenPOC = m_sReoderingStatus.iMinPOC; #if defined (_DEBUG) #ifdef _MOTION_VECTOR_DUMP_ - fprintf (stderr, "Output POC: #%d\n", m_LastWrittenPOC); + fprintf (stderr, "Output POC: #%d uiDecodingTimeStamp=%d\n", m_sReoderingStatus.iLastWrittenPOC, + m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].uiDecodingTimeStamp); #endif #endif - memcpy (pDstInfo, &m_sPictInfoList[m_iPictInfoIndex].sBufferInfo, sizeof (SBufferInfo)); - ppDst[0] = m_sPictInfoList[m_iPictInfoIndex].pData[0]; - ppDst[1] = m_sPictInfoList[m_iPictInfoIndex].pData[1]; - ppDst[2] = m_sPictInfoList[m_iPictInfoIndex].pData[2]; - m_sPictInfoList[m_iPictInfoIndex].iPOC = sIMinInt32; - m_sPictInfoList[m_iPictInfoIndex].bLastGOP = false; - m_iMinPOC = sIMinInt32; - --m_iNumOfPicts; + memcpy (pDstInfo, &m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].sBufferInfo, sizeof (SBufferInfo)); + ppDst[0] = pDstInfo->pDst[0]; + ppDst[1] = pDstInfo->pDst[1]; + ppDst[2] = pDstInfo->pDst[2]; + m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPOC = IMinInt32; + PPicBuff pPicBuff = m_iThreadCount <= 1 ? m_pDecThrCtx[0].pCtx->pPicBuff : m_pPicBuff; + if (m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx < pPicBuff->iCapacity) { + PPicture pPic = pPicBuff->ppPic[m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx]; + --pPic->iRefCount; + } + m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].bLastGOP = false; + m_sReoderingStatus.iMinPOC = IMinInt32; + --m_sReoderingStatus.iNumOfPicts; } + return dsErrorFree; } @@ -742,124 +1037,223 @@ void CWelsDecoder::OutputStatisticsLog (SDecoderStatistics& sDecoderStatistics) } } -DECODING_STATE CWelsDecoder::ReorderPicturesInDisplay (unsigned char** ppDst, SBufferInfo* pDstInfo) { - if (pDstInfo->iBufferStatus == 1 && m_pDecContext->pSps->uiProfileIdc != 66 - && m_pDecContext->pPps->bEntropyCodingModeFlag) { - if (m_pDecContext->pSliceHeader->iPicOrderCntLsb == 0) { - if (m_iNumOfPicts > 0) { - m_iLastGOPRemainPicts = m_iNumOfPicts; - for (int32_t i = 0; i <= m_iLargestBufferedPicIndex; ++i) { - if (m_sPictInfoList[i].iPOC > sIMinInt32) { - m_sPictInfoList[i].bLastGOP = true; +void CWelsDecoder::BufferingReadyPicture (PWelsDecoderContext pCtx, unsigned char** ppDst, + SBufferInfo* pDstInfo) { + if (pDstInfo->iBufferStatus == 0) { + return; + } + m_bIsBaseline = pCtx->pSps->uiProfileIdc == 66 || pCtx->pSps->uiProfileIdc == 83; + if (!m_bIsBaseline) { + if (m_sReoderingStatus.iNumOfPicts && pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb + && pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->bNewSeqBegin) { + m_sReoderingStatus.iLastGOPRemainPicts = m_sReoderingStatus.iNumOfPicts; + + for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) { + if (m_sPictInfoList[i].iPOC > IMinInt32) { + m_sPictInfoList[i].bLastGOP = true; + } + } + } else { + if (m_sReoderingStatus.iNumOfPicts > 0) { + //This can happen when decoder moves to next GOP without being able to decoder first picture PicOrderCntLsb = 0 + bool hasGOPChanged = false; + for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) { + if (m_sPictInfoList[i].iPOC == pCtx->pSliceHeader->iPicOrderCntLsb) { + hasGOPChanged = true; + break; + } + } + if (hasGOPChanged) { + m_sReoderingStatus.iLastGOPRemainPicts = m_sReoderingStatus.iNumOfPicts; + for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) { + if (m_sPictInfoList[i].iPOC > IMinInt32) { + m_sPictInfoList[i].bLastGOP = true; + } } } } } - for (int32_t i = 0; i < 16; ++i) { - if (m_sPictInfoList[i].iPOC == sIMinInt32) { - memcpy (&m_sPictInfoList[i].sBufferInfo, pDstInfo, sizeof (SBufferInfo)); - m_sPictInfoList[i].pData[0] = ppDst[0]; - m_sPictInfoList[i].pData[1] = ppDst[1]; - m_sPictInfoList[i].pData[2] = ppDst[2]; - m_sPictInfoList[i].iPOC = m_pDecContext->pSliceHeader->iPicOrderCntLsb; - m_sPictInfoList[i].iFrameNum = m_pDecContext->pSliceHeader->iFrameNum; - m_sPictInfoList[i].bLastGOP = false; - pDstInfo->iBufferStatus = 0; - ++m_iNumOfPicts; - if (i > m_iLargestBufferedPicIndex) { - m_iLargestBufferedPicIndex = i; - } + } + for (int32_t i = 0; i < 16; ++i) { + if (m_sPictInfoList[i].iPOC == IMinInt32) { + memcpy (&m_sPictInfoList[i].sBufferInfo, pDstInfo, sizeof (SBufferInfo)); + m_sPictInfoList[i].iPOC = pCtx->pSliceHeader->iPicOrderCntLsb; + m_sPictInfoList[i].uiDecodingTimeStamp = pCtx->uiDecodingTimeStamp; + m_sPictInfoList[i].iPicBuffIdx = pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->iPicBuffIdx; + if (GetThreadCount (pCtx) <= 1) ++pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->iRefCount; + m_sPictInfoList[i].bLastGOP = false; + m_iLastBufferedIdx = i; + pDstInfo->iBufferStatus = 0; + ++m_sReoderingStatus.iNumOfPicts; + if (i > m_sReoderingStatus.iLargestBufferedPicIndex) { + m_sReoderingStatus.iLargestBufferedPicIndex = i; + } + break; + } + } +} + +void CWelsDecoder::ReleaseBufferedReadyPicture (PWelsDecoderContext pCtx, unsigned char** ppDst, + SBufferInfo* pDstInfo) { + PPicBuff pPicBuff = pCtx ? pCtx->pPicBuff : m_pPicBuff; + if (pCtx == NULL && m_iThreadCount <= 1) { + pCtx = m_pDecThrCtx[0].pCtx; + } + if (!m_bIsBaseline && m_sReoderingStatus.iLastGOPRemainPicts > 0) { + m_sReoderingStatus.iMinPOC = IMinInt32; + int32_t firstValidIdx = -1; + for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) { + if (m_sReoderingStatus.iMinPOC == IMinInt32 && m_sPictInfoList[i].iPOC > IMinInt32 && m_sPictInfoList[i].bLastGOP) { + m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC; + m_sReoderingStatus.iPictInfoIndex = i; + firstValidIdx = i; break; } } - if (m_iLastGOPRemainPicts > 0) { - m_iMinPOC = sIMinInt32; - for (int32_t i = 0; i <= m_iLargestBufferedPicIndex; ++i) { - if (m_iMinPOC == sIMinInt32 && m_sPictInfoList[i].iPOC > sIMinInt32 && m_sPictInfoList[i].bLastGOP) { - m_iMinPOC = m_sPictInfoList[i].iPOC; - m_iPictInfoIndex = i; - } - if (m_sPictInfoList[i].iPOC > sIMinInt32 && m_sPictInfoList[i].iPOC < m_iMinPOC && m_sPictInfoList[i].bLastGOP) { - m_iMinPOC = m_sPictInfoList[i].iPOC; - m_iPictInfoIndex = i; - } + for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) { + if (i == firstValidIdx) continue; + if (m_sPictInfoList[i].iPOC > IMinInt32 && m_sPictInfoList[i].iPOC < m_sReoderingStatus.iMinPOC + && m_sPictInfoList[i].bLastGOP) { + m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC; + m_sReoderingStatus.iPictInfoIndex = i; } - m_LastWrittenPOC = m_iMinPOC; + } + m_sReoderingStatus.iLastWrittenPOC = m_sReoderingStatus.iMinPOC; #if defined (_DEBUG) #ifdef _MOTION_VECTOR_DUMP_ - fprintf (stderr, "Output POC: #%d\n", m_LastWrittenPOC); + fprintf (stderr, "Output POC: #%d uiDecodingTimeStamp=%d\n", m_sReoderingStatus.iLastWrittenPOC, + m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].uiDecodingTimeStamp); #endif #endif - memcpy (pDstInfo, &m_sPictInfoList[m_iPictInfoIndex].sBufferInfo, sizeof (SBufferInfo)); - ppDst[0] = m_sPictInfoList[m_iPictInfoIndex].pData[0]; - ppDst[1] = m_sPictInfoList[m_iPictInfoIndex].pData[1]; - ppDst[2] = m_sPictInfoList[m_iPictInfoIndex].pData[2]; - m_sPictInfoList[m_iPictInfoIndex].iPOC = sIMinInt32; - m_sPictInfoList[m_iPictInfoIndex].bLastGOP = false; - m_iMinPOC = sIMinInt32; - --m_iNumOfPicts; - --m_iLastGOPRemainPicts; - if (m_iLastGOPRemainPicts == 0) { - m_LastWrittenPOC = sIMinInt32; + memcpy (pDstInfo, &m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].sBufferInfo, sizeof (SBufferInfo)); + ppDst[0] = pDstInfo->pDst[0]; + ppDst[1] = pDstInfo->pDst[1]; + ppDst[2] = pDstInfo->pDst[2]; + m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPOC = IMinInt32; + PPicture pPic = pPicBuff->ppPic[m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx]; + --pPic->iRefCount; + m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].bLastGOP = false; + m_sReoderingStatus.iMinPOC = IMinInt32; + --m_sReoderingStatus.iNumOfPicts; + --m_sReoderingStatus.iLastGOPRemainPicts; + if (m_sReoderingStatus.iLastGOPRemainPicts == 0) { + m_sReoderingStatus.iLastWrittenPOC = IMinInt32; + } + return; + } + if (m_sReoderingStatus.iNumOfPicts && m_bIsBaseline) { + uint32_t uiDecodingTimeStamp = 0; + int32_t firstValidIdx = -1; + for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) { + if (m_sPictInfoList[i].iPOC > IMinInt32) { + uiDecodingTimeStamp = m_sPictInfoList[i].uiDecodingTimeStamp; + m_sReoderingStatus.iPictInfoIndex = i; + firstValidIdx = i; + break; } - return dsErrorFree; } - if (m_iNumOfPicts > 0) { - m_iMinPOC = sIMinInt32; - for (int32_t i = 0; i <= m_iLargestBufferedPicIndex; ++i) { - if (m_iMinPOC == sIMinInt32 && m_sPictInfoList[i].iPOC > sIMinInt32) { - m_iMinPOC = m_sPictInfoList[i].iPOC; - m_iPictInfoIndex = i; - } - if (m_sPictInfoList[i].iPOC > sIMinInt32 && m_sPictInfoList[i].iPOC < m_iMinPOC) { - m_iMinPOC = m_sPictInfoList[i].iPOC; - m_iPictInfoIndex = i; - } + for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) { + if (i == firstValidIdx) continue; + if (m_sPictInfoList[i].iPOC > IMinInt32 && m_sPictInfoList[i].uiDecodingTimeStamp < uiDecodingTimeStamp) { + uiDecodingTimeStamp = m_sPictInfoList[i].uiDecodingTimeStamp; + m_sReoderingStatus.iPictInfoIndex = i; } } - if (m_iMinPOC > sIMinInt32) { - if ((m_LastWrittenPOC > sIMinInt32 && m_iMinPOC - m_LastWrittenPOC <= 1) - || m_iMinPOC < m_pDecContext->pSliceHeader->iPicOrderCntLsb) { - m_LastWrittenPOC = m_iMinPOC; + if (uiDecodingTimeStamp > 0) { #if defined (_DEBUG) #ifdef _MOTION_VECTOR_DUMP_ - fprintf (stderr, "Output POC: #%d\n", m_LastWrittenPOC); + fprintf (stderr, "Output POC: #%d uiDecodingTimeStamp=%d\n", m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPOC, + uiDecodingTimeStamp); #endif #endif - memcpy (pDstInfo, &m_sPictInfoList[m_iPictInfoIndex].sBufferInfo, sizeof (SBufferInfo)); - ppDst[0] = m_sPictInfoList[m_iPictInfoIndex].pData[0]; - ppDst[1] = m_sPictInfoList[m_iPictInfoIndex].pData[1]; - ppDst[2] = m_sPictInfoList[m_iPictInfoIndex].pData[2]; - m_sPictInfoList[m_iPictInfoIndex].iPOC = sIMinInt32; - m_sPictInfoList[m_iPictInfoIndex].bLastGOP = false; - m_iMinPOC = sIMinInt32; - --m_iNumOfPicts; - return dsErrorFree; + memcpy (pDstInfo, &m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].sBufferInfo, sizeof (SBufferInfo)); + ppDst[0] = pDstInfo->pDst[0]; + ppDst[1] = pDstInfo->pDst[1]; + ppDst[2] = pDstInfo->pDst[2]; + m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPOC = IMinInt32; + PPicture pPic = pPicBuff->ppPic[m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx]; + --pPic->iRefCount; + --m_sReoderingStatus.iNumOfPicts; + } + return; + } + if (m_sReoderingStatus.iNumOfPicts > 0) { + m_sReoderingStatus.iMinPOC = IMinInt32; + int32_t firstValidIdx = -1; + for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) { + if (m_sReoderingStatus.iMinPOC == IMinInt32 && m_sPictInfoList[i].iPOC > IMinInt32) { + m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC; + m_sReoderingStatus.iPictInfoIndex = i; + firstValidIdx = i; + break; } } + for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) { + if (i == firstValidIdx) continue; + if (m_sPictInfoList[i].iPOC > IMinInt32 && m_sPictInfoList[i].iPOC < m_sReoderingStatus.iMinPOC) { + m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC; + m_sReoderingStatus.iPictInfoIndex = i; + } + } + } + if (m_sReoderingStatus.iMinPOC > IMinInt32) { + int32_t iLastPOC = pCtx != NULL ? pCtx->pSliceHeader->iPicOrderCntLsb : m_sPictInfoList[m_iLastBufferedIdx].iPOC; + bool isReady = (m_sReoderingStatus.iLastWrittenPOC > IMinInt32 + && m_sReoderingStatus.iMinPOC - m_sReoderingStatus.iLastWrittenPOC <= 1) + || m_sReoderingStatus.iMinPOC < iLastPOC; + if (isReady) { + m_sReoderingStatus.iLastWrittenPOC = m_sReoderingStatus.iMinPOC; +#if defined (_DEBUG) +#ifdef _MOTION_VECTOR_DUMP_ + fprintf (stderr, "Output POC: #%d uiDecodingTimeStamp=%d\n", m_sReoderingStatus.iLastWrittenPOC, + m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].uiDecodingTimeStamp); +#endif +#endif + memcpy (pDstInfo, &m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].sBufferInfo, sizeof (SBufferInfo)); + ppDst[0] = pDstInfo->pDst[0]; + ppDst[1] = pDstInfo->pDst[1]; + ppDst[2] = pDstInfo->pDst[2]; + m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPOC = IMinInt32; + PPicture pPic = pPicBuff->ppPic[m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx]; + --pPic->iRefCount; + m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].bLastGOP = false; + m_sReoderingStatus.iMinPOC = IMinInt32; + --m_sReoderingStatus.iNumOfPicts; + } } +} - return dsErrorFree; +DECODING_STATE CWelsDecoder::ReorderPicturesInDisplay (PWelsDecoderContext pDecContext, unsigned char** ppDst, + SBufferInfo* pDstInfo) { + DECODING_STATE iRet = dsErrorFree; + if (pDstInfo->iBufferStatus == 1) { + m_bIsBaseline = pDecContext->pSps->uiProfileIdc == 66 || pDecContext->pSps->uiProfileIdc == 83; + if (!m_bIsBaseline) { + BufferingReadyPicture (pDecContext, ppDst, pDstInfo); + ReleaseBufferedReadyPicture (pDecContext, ppDst, pDstInfo); + } + } + return iRet; } -DECODING_STATE CWelsDecoder::DecodeParser (const unsigned char* kpSrc, - const int kiSrcLen, - SParserBsInfo* pDstInfo) { - if (m_pDecContext == NULL || m_pDecContext->pParam == NULL) { +DECODING_STATE CWelsDecoder::DecodeParser (const unsigned char* kpSrc, const int kiSrcLen, SParserBsInfo* pDstInfo) { + PWelsDecoderContext pDecContext = m_pDecThrCtx[0].pCtx; + + if (pDecContext == NULL || pDecContext->pParam == NULL) { if (m_pWelsTrace != NULL) { WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "Call DecodeParser without Initialize.\n"); } return dsInitialOptExpected; } - if (!m_pDecContext->pParam->bParseOnly) { + if (!pDecContext->pParam->bParseOnly) { WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "bParseOnly should be true for this API calling! \n"); - m_pDecContext->iErrorCode |= dsInvalidArgument; + pDecContext->iErrorCode |= dsInvalidArgument; return dsInvalidArgument; } int64_t iEnd, iStart = WelsTime(); - if (CheckBsBuffer (m_pDecContext, kiSrcLen)) { - if (ResetDecoder()) + if (CheckBsBuffer (pDecContext, kiSrcLen)) { + if (ResetDecoder (pDecContext)) return dsOutOfMemory; return dsErrorFree; @@ -871,58 +1265,57 @@ DECODING_STATE CWelsDecoder::DecodeParser (const unsigned char* kpSrc, WelsFflush (m_pFBS); } #endif//OUTPUT_BIT_STREAM - m_pDecContext->bEndOfStreamFlag = false; + pDecContext->bEndOfStreamFlag = false; } else { //For application MODE, the error detection should be added for safe. //But for CONSOLE MODE, when decoding LAST AU, kiSrcLen==0 && kpSrc==NULL. - m_pDecContext->bEndOfStreamFlag = true; - m_pDecContext->bInstantDecFlag = true; + pDecContext->bEndOfStreamFlag = true; + pDecContext->bInstantDecFlag = true; } - m_pDecContext->iErrorCode = dsErrorFree; //initialize at the starting of AU decoding. - m_pDecContext->pParam->eEcActiveIdc = ERROR_CON_DISABLE; //add protection to disable EC here. - m_pDecContext->iFeedbackNalRefIdc = -1; //initialize - if (!m_pDecContext->bFramePending) { //frame complete - m_pDecContext->pParserBsInfo->iNalNum = 0; - memset (m_pDecContext->pParserBsInfo->pNalLenInByte, 0, MAX_NAL_UNITS_IN_LAYER); + pDecContext->iErrorCode = dsErrorFree; //initialize at the starting of AU decoding. + pDecContext->pParam->eEcActiveIdc = ERROR_CON_DISABLE; //add protection to disable EC here. + pDecContext->iFeedbackNalRefIdc = -1; //initialize + if (!pDecContext->bFramePending) { //frame complete + pDecContext->pParserBsInfo->iNalNum = 0; + memset (pDecContext->pParserBsInfo->pNalLenInByte, 0, MAX_NAL_UNITS_IN_LAYER); } pDstInfo->iNalNum = 0; pDstInfo->iSpsWidthInPixel = pDstInfo->iSpsHeightInPixel = 0; if (pDstInfo) { - m_pDecContext->uiTimeStamp = pDstInfo->uiInBsTimeStamp; + pDecContext->uiTimeStamp = pDstInfo->uiInBsTimeStamp; pDstInfo->uiOutBsTimeStamp = 0; } else { - m_pDecContext->uiTimeStamp = 0; + pDecContext->uiTimeStamp = 0; } - WelsDecodeBs (m_pDecContext, kpSrc, kiSrcLen, NULL, NULL, pDstInfo); - if (m_pDecContext->iErrorCode & dsOutOfMemory) { - if (ResetDecoder()) + WelsDecodeBs (pDecContext, kpSrc, kiSrcLen, NULL, NULL, pDstInfo); + if (pDecContext->iErrorCode & dsOutOfMemory) { + if (ResetDecoder (pDecContext)) return dsOutOfMemory; return dsErrorFree; } - if (!m_pDecContext->bFramePending && m_pDecContext->pParserBsInfo->iNalNum) { - memcpy (pDstInfo, m_pDecContext->pParserBsInfo, sizeof (SParserBsInfo)); + if (!pDecContext->bFramePending && pDecContext->pParserBsInfo->iNalNum) { + memcpy (pDstInfo, pDecContext->pParserBsInfo, sizeof (SParserBsInfo)); - if (m_pDecContext->iErrorCode == ERR_NONE) { //update statistics: decoding frame count - m_pDecContext->sDecoderStatistics.uiDecodedFrameCount++; - if (m_pDecContext->sDecoderStatistics.uiDecodedFrameCount == 0) { //exceed max value of uint32_t - ResetDecStatNums (&m_pDecContext->sDecoderStatistics); - m_pDecContext->sDecoderStatistics.uiDecodedFrameCount++; + if (pDecContext->iErrorCode == ERR_NONE) { //update statistics: decoding frame count + pDecContext->pDecoderStatistics->uiDecodedFrameCount++; + if (pDecContext->pDecoderStatistics->uiDecodedFrameCount == 0) { //exceed max value of uint32_t + ResetDecStatNums (pDecContext->pDecoderStatistics); + pDecContext->pDecoderStatistics->uiDecodedFrameCount++; } } } - m_pDecContext->bInstantDecFlag = false; //reset no-delay flag + pDecContext->bInstantDecFlag = false; //reset no-delay flag - if (m_pDecContext->iErrorCode && m_pDecContext->bPrintFrameErrorTraceFlag) { - WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "decode failed, failure type:%d \n", m_pDecContext->iErrorCode); - m_pDecContext->bPrintFrameErrorTraceFlag = false; + if (pDecContext->iErrorCode && pDecContext->bPrintFrameErrorTraceFlag) { + WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "decode failed, failure type:%d \n", pDecContext->iErrorCode); + pDecContext->bPrintFrameErrorTraceFlag = false; } iEnd = WelsTime(); - m_pDecContext->dDecTime += (iEnd - iStart) / 1e3; - - return (DECODING_STATE) m_pDecContext->iErrorCode; + pDecContext->dDecTime += (iEnd - iStart) / 1e3; + return (DECODING_STATE)pDecContext->iErrorCode; } DECODING_STATE CWelsDecoder::DecodeFrame (const unsigned char* kpSrc, @@ -964,6 +1357,100 @@ DECODING_STATE CWelsDecoder::DecodeFrameEx (const unsigned char* kpSrc, return state; } +DECODING_STATE CWelsDecoder::ParseAccessUnit (SWelsDecoderThreadCTX& sThreadCtx) { + sThreadCtx.pCtx->bHasNewSps = false; + sThreadCtx.pCtx->bParamSetsLostFlag = m_bParamSetsLostFlag; + sThreadCtx.pCtx->bFreezeOutput = m_bFreezeOutput; + sThreadCtx.pCtx->uiDecodingTimeStamp = ++m_uiDecodeTimeStamp; + bool bPicBuffChanged = false; + if (m_pLastDecThrCtx != NULL && sThreadCtx.pCtx->sSpsPpsCtx.iSeqId < m_pLastDecThrCtx->pCtx->sSpsPpsCtx.iSeqId) { + CopySpsPps (m_pLastDecThrCtx->pCtx, sThreadCtx.pCtx); + sThreadCtx.pCtx->iPicQueueNumber = m_pLastDecThrCtx->pCtx->iPicQueueNumber; + if (sThreadCtx.pCtx->pPicBuff != m_pPicBuff) { + bPicBuffChanged = true; + sThreadCtx.pCtx->pPicBuff = m_pPicBuff; + sThreadCtx.pCtx->bHaveGotMemory = m_pPicBuff != NULL; + sThreadCtx.pCtx->iImgWidthInPixel = m_pLastDecThrCtx->pCtx->iImgWidthInPixel; + sThreadCtx.pCtx->iImgHeightInPixel = m_pLastDecThrCtx->pCtx->iImgHeightInPixel; + } + } + + //if threadCount > 1, then each thread must contain exact one complete frame. + if (GetThreadCount (sThreadCtx.pCtx) > 1) { + sThreadCtx.pCtx->pAccessUnitList->uiAvailUnitsNum = 0; + sThreadCtx.pCtx->pAccessUnitList->uiActualUnitsNum = 0; + } + + int32_t iRet = DecodeFrame2WithCtx (sThreadCtx.pCtx, sThreadCtx.kpSrc, sThreadCtx.kiSrcLen, sThreadCtx.ppDst, + &sThreadCtx.sDstInfo); + + int32_t iErr = InitConstructAccessUnit (sThreadCtx.pCtx, &sThreadCtx.sDstInfo); + if (ERR_NONE != iErr) { + return (DECODING_STATE) (iRet | iErr); + } + if (sThreadCtx.pCtx->bNewSeqBegin) { + m_pPicBuff = sThreadCtx.pCtx->pPicBuff; + } else if (bPicBuffChanged) { + InitialDqLayersContext (sThreadCtx.pCtx, sThreadCtx.pCtx->pSps->iMbWidth << 4, sThreadCtx.pCtx->pSps->iMbHeight << 4); + } + m_bParamSetsLostFlag = sThreadCtx.pCtx->bNewSeqBegin ? false : sThreadCtx.pCtx->bParamSetsLostFlag; + m_bFreezeOutput = sThreadCtx.pCtx->bNewSeqBegin ? false : sThreadCtx.pCtx->bFreezeOutput; + return (DECODING_STATE)iErr; +} +/* +* Run decoding picture in separate thread. +*/ + +int CWelsDecoder::ThreadDecodeFrameInternal (const unsigned char* kpSrc, const int kiSrcLen, unsigned char** ppDst, + SBufferInfo* pDstInfo) { + int state = dsErrorFree; + int32_t i, j; + int32_t signal = 0; + + //serial using of threads + if (m_DecCtxActiveCount < m_iThreadCount) { + signal = m_DecCtxActiveCount; + } else { + signal = m_pDecThrCtxActive[0]->sThreadInfo.uiThrNum; + } + + WAIT_SEMAPHORE (&m_pDecThrCtx[signal].sThreadInfo.sIsIdle, WELS_DEC_THREAD_WAIT_INFINITE); + + for (i = 0; i < m_DecCtxActiveCount; ++i) { + if (m_pDecThrCtxActive[i] == &m_pDecThrCtx[signal]) { + m_pDecThrCtxActive[i] = NULL; + for (j = i; j < m_DecCtxActiveCount - 1; j++) { + m_pDecThrCtxActive[j] = m_pDecThrCtxActive[j + 1]; + m_pDecThrCtxActive[j + 1] = NULL; + } + --m_DecCtxActiveCount; + break; + } + } + + m_pDecThrCtxActive[m_DecCtxActiveCount++] = &m_pDecThrCtx[signal]; + if (m_pLastDecThrCtx != NULL) { + m_pDecThrCtx[signal].pCtx->pLastThreadCtx = m_pLastDecThrCtx; + } + m_pDecThrCtx[signal].kpSrc = const_cast<uint8_t*> (kpSrc); + m_pDecThrCtx[signal].kiSrcLen = kiSrcLen; + m_pDecThrCtx[signal].ppDst = ppDst; + memcpy (&m_pDecThrCtx[signal].sDstInfo, pDstInfo, sizeof (SBufferInfo)); + + ParseAccessUnit (m_pDecThrCtx[signal]); + if (m_iThreadCount > 1) { + m_pLastDecThrCtx = &m_pDecThrCtx[signal]; + } + m_pDecThrCtx[signal].sThreadInfo.uiCommand = WELS_DEC_THREAD_COMMAND_RUN; + RELEASE_SEMAPHORE (&m_pDecThrCtx[signal].sThreadInfo.sIsActivated); + + // wait early picture + if (m_DecCtxActiveCount >= m_iThreadCount) { + WAIT_SEMAPHORE (&m_pDecThrCtxActive[0]->sThreadInfo.sIsIdle, WELS_DEC_THREAD_WAIT_INFINITE); + RELEASE_SEMAPHORE (&m_pDecThrCtxActive[0]->sThreadInfo.sIsIdle); + } + return state; +} } // namespace WelsDec diff --git a/chromium/third_party/openh264/src/codec/decoder/targets.mk b/chromium/third_party/openh264/src/codec/decoder/targets.mk index eaf5d3c0780..88dc5afb123 100644 --- a/chromium/third_party/openh264/src/codec/decoder/targets.mk +++ b/chromium/third_party/openh264/src/codec/decoder/targets.mk @@ -22,6 +22,7 @@ DECODER_CPP_SRCS=\ $(DECODER_SRCDIR)/core/src/parse_mb_syn_cavlc.cpp\ $(DECODER_SRCDIR)/core/src/pic_queue.cpp\ $(DECODER_SRCDIR)/core/src/rec_mb.cpp\ + $(DECODER_SRCDIR)/core/src/wels_decoder_thread.cpp\ $(DECODER_SRCDIR)/plus/src/welsDecoderExt.cpp\ DECODER_OBJS += $(DECODER_CPP_SRCS:.cpp=.$(OBJ)) @@ -56,14 +57,24 @@ DECODER_OBJS += $(DECODER_OBJSARM64) endif OBJS += $(DECODER_OBJSARM64) -DECODER_ASM_MIPS_SRCS=\ +DECODER_ASM_MIPS_MMI_SRCS=\ $(DECODER_SRCDIR)/core/mips/dct_mmi.c\ -DECODER_OBJSMIPS += $(DECODER_ASM_MIPS_SRCS:.c=.$(OBJ)) +DECODER_OBJSMIPS_MMI += $(DECODER_ASM_MIPS_MMI_SRCS:.c=.$(OBJ)) + +DECODER_ASM_MIPS_MSA_SRCS=\ + +DECODER_OBJSMIPS_MSA += $(DECODER_ASM_MIPS_MSA_SRCS:.c=.$(OBJ)) ifeq ($(ASM_ARCH), mips) -DECODER_OBJS += $(DECODER_OBJSMIPS) +ifeq ($(ENABLE_MMI), Yes) +DECODER_OBJS += $(DECODER_OBJSMIPS_MMI) +endif +ifeq ($(ENABLE_MSA), Yes) +DECODER_OBJS += $(DECODER_OBJSMIPS_MSA) +endif endif -OBJS += $(DECODER_OBJSMIPS) +OBJS += $(DECODER_OBJSMIPS_MMI) +OBJS += $(DECODER_OBJSMIPS_MSA) OBJS += $(DECODER_OBJS) diff --git a/chromium/third_party/openh264/src/codec/encoder/core/inc/param_svc.h b/chromium/third_party/openh264/src/codec/encoder/core/inc/param_svc.h index a33830440d6..780b4df414d 100644 --- a/chromium/third_party/openh264/src/codec/encoder/core/inc/param_svc.h +++ b/chromium/third_party/openh264/src/codec/encoder/core/inc/param_svc.h @@ -365,7 +365,7 @@ typedef struct TagWelsSvcCodingParam: SEncParamExt { uiIntraPeriod = ((uiIntraPeriod + uiGopSize - 1) / uiGopSize) * uiGopSize; if (((pCodingParam.iNumRefFrame != AUTO_REF_PIC_COUNT) - && ((pCodingParam.iNumRefFrame > MAX_REF_PIC_COUNT) || (pCodingParam.iNumRefFrame < MIN_REF_PIC_COUNT))) + && !((pCodingParam.iNumRefFrame > MAX_REF_PIC_COUNT) || (pCodingParam.iNumRefFrame < MIN_REF_PIC_COUNT))) || ((iNumRefFrame != AUTO_REF_PIC_COUNT) && (pCodingParam.iNumRefFrame == AUTO_REF_PIC_COUNT))) { iNumRefFrame = pCodingParam.iNumRefFrame; } diff --git a/chromium/third_party/openh264/src/codec/encoder/core/src/au_set.cpp b/chromium/third_party/openh264/src/codec/encoder/core/src/au_set.cpp index ef2758cd6b3..a49df475203 100644 --- a/chromium/third_party/openh264/src/codec/encoder/core/src/au_set.cpp +++ b/chromium/third_party/openh264/src/codec/encoder/core/src/au_set.cpp @@ -134,7 +134,7 @@ static int32_t WelsCheckNumRefSetting (SLogContext* pLogCtx, SWelsSvcCodingParam int32_t WelsCheckRefFrameLimitationNumRefFirst (SLogContext* pLogCtx, SWelsSvcCodingParam* pParam) { - if (WelsCheckNumRefSetting (pLogCtx, pParam, true)) { + if (WelsCheckNumRefSetting (pLogCtx, pParam, false)) { // we take num-ref as the honored setting but it conflicts with temporal and LTR return ENC_RETURN_UNSUPPORTED_PARA; } diff --git a/chromium/third_party/openh264/src/codec/encoder/core/src/deblocking.cpp b/chromium/third_party/openh264/src/codec/encoder/core/src/deblocking.cpp index aec6b111788..8fd00ea6119 100644 --- a/chromium/third_party/openh264/src/codec/encoder/core/src/deblocking.cpp +++ b/chromium/third_party/openh264/src/codec/encoder/core/src/deblocking.cpp @@ -783,6 +783,11 @@ void WelsBlockFuncInit (PSetNoneZeroCountZeroFunc* pfSetNZCZero, int32_t iCpu) *pfSetNZCZero = WelsNonZeroCount_mmi; } #endif +#if defined(HAVE_MSA) + if (iCpu & WELS_CPU_MSA) { + *pfSetNZCZero = WelsNonZeroCount_msa; + } +#endif } void DeblockingInit (DeblockingFunc* pFunc, int32_t iCpu) { @@ -860,6 +865,19 @@ void DeblockingInit (DeblockingFunc* pFunc, int32_t iCpu) { pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_mmi; } #endif//HAVE_MMI + +#if defined(HAVE_MSA) + if (iCpu & WELS_CPU_MSA) { + pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_msa; + pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_msa; + pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_msa; + pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_msa; + pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_msa; + pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_msa; + pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_msa; + pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_msa; + } +#endif//HAVE_MSA } diff --git a/chromium/third_party/openh264/src/codec/encoder/core/src/encode_mb_aux.cpp b/chromium/third_party/openh264/src/codec/encoder/core/src/encode_mb_aux.cpp index 6f11f36ebf6..f9bc6c4768f 100644 --- a/chromium/third_party/openh264/src/codec/encoder/core/src/encode_mb_aux.cpp +++ b/chromium/third_party/openh264/src/codec/encoder/core/src/encode_mb_aux.cpp @@ -464,7 +464,7 @@ int32_t WelsHadamardQuant2x2Skip_AArch64_neon (int16_t* pRes, int16_t iFF, int1 void WelsInitEncodingFuncs (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) { pFuncList->pfCopy8x8Aligned = WelsCopy8x8_c; pFuncList->pfCopy16x16Aligned = - pFuncList->pfCopy16x16NotAligned = WelsCopy16x16_c; + pFuncList->pfCopy16x16NotAligned = WelsCopy16x16_c; pFuncList->pfCopy16x8NotAligned = WelsCopy16x8_c; pFuncList->pfCopy8x16Aligned = WelsCopy8x16_c; pFuncList->pfCopy4x4 = WelsCopy4x4_c; @@ -612,5 +612,16 @@ void WelsInitEncodingFuncs (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) { pFuncList->pfDctFourT4 = WelsDctFourT4_mmi; } #endif//HAVE_MMI + +#if defined(HAVE_MSA) + if (uiCpuFlag & WELS_CPU_MSA) { + pFuncList->pfCopy8x8Aligned = WelsCopy8x8_msa; + pFuncList->pfCopy8x16Aligned = WelsCopy8x16_msa; + + pFuncList->pfCopy16x16Aligned = + pFuncList->pfCopy16x16NotAligned = WelsCopy16x16_msa; + pFuncList->pfCopy16x8NotAligned = WelsCopy16x8_msa; + } +#endif } } diff --git a/chromium/third_party/openh264/src/codec/encoder/core/src/encoder_ext.cpp b/chromium/third_party/openh264/src/codec/encoder/core/src/encoder_ext.cpp index 9f79da89365..9bc6e103b73 100644 --- a/chromium/third_party/openh264/src/codec/encoder/core/src/encoder_ext.cpp +++ b/chromium/third_party/openh264/src/codec/encoder/core/src/encoder_ext.cpp @@ -374,12 +374,12 @@ int32_t ParamValidation (SLogContext* pLogCtx, SWelsSvcCodingParam* pCfg) { pCfg->bEnableFrameSkip); if ((pCfg->iMaxQp <= 0) || (pCfg->iMinQp <= 0)) { if (pCfg->iUsageType == SCREEN_CONTENT_REAL_TIME) { - WelsLog (pLogCtx, WELS_LOG_WARNING, "Change QP Range from(%d,%d) to (%d,%d)", pCfg->iMinQp, pCfg->iMaxQp, MIN_SCREEN_QP, + WelsLog (pLogCtx, WELS_LOG_INFO, "Change QP Range from(%d,%d) to (%d,%d)", pCfg->iMinQp, pCfg->iMaxQp, MIN_SCREEN_QP, MAX_SCREEN_QP); pCfg->iMinQp = MIN_SCREEN_QP; pCfg->iMaxQp = MAX_SCREEN_QP; } else { - WelsLog (pLogCtx, WELS_LOG_WARNING, "Change QP Range from(%d,%d) to (%d,%d)", pCfg->iMinQp, pCfg->iMaxQp, + WelsLog (pLogCtx, WELS_LOG_INFO, "Change QP Range from(%d,%d) to (%d,%d)", pCfg->iMinQp, pCfg->iMaxQp, GOM_MIN_QP_MODE, MAX_LOW_BR_QP); pCfg->iMinQp = GOM_MIN_QP_MODE; pCfg->iMaxQp = MAX_LOW_BR_QP; diff --git a/chromium/third_party/openh264/src/codec/encoder/core/src/ref_list_mgr_svc.cpp b/chromium/third_party/openh264/src/codec/encoder/core/src/ref_list_mgr_svc.cpp index 22fcb792041..80ba25aa83b 100644 --- a/chromium/third_party/openh264/src/codec/encoder/core/src/ref_list_mgr_svc.cpp +++ b/chromium/third_party/openh264/src/codec/encoder/core/src/ref_list_mgr_svc.cpp @@ -628,7 +628,6 @@ bool WelsBuildRefList (sWelsEncCtx* pCtx, const int32_t iPOC, int32_t iBestLtrRe WelsLog (& (pCtx->sLogCtx), WELS_LOG_DETAIL, "WelsBuildRefList pCtx->uiTemporalId = %d,pRef->iFrameNum = %d,pRef->uiTemporalId = %d", pCtx->uiTemporalId, pRef->iFrameNum, pRef->uiTemporalId); - break; } } } diff --git a/chromium/third_party/openh264/src/codec/encoder/core/src/svc_encode_slice.cpp b/chromium/third_party/openh264/src/codec/encoder/core/src/svc_encode_slice.cpp index 0bb9f141fe8..90139136f91 100644 --- a/chromium/third_party/openh264/src/codec/encoder/core/src/svc_encode_slice.cpp +++ b/chromium/third_party/openh264/src/codec/encoder/core/src/svc_encode_slice.cpp @@ -101,7 +101,7 @@ void WelsSliceHeaderExtInit (sWelsEncCtx* pEncCtx, SDqLayer* pCurLayer, SSlice* if (P_SLICE == pEncCtx->eSliceType) { pCurSliceHeader->uiNumRefIdxL0Active = 1; if (pCurSliceHeader->uiRefCount > 0 && - pCurSliceHeader->uiRefCount < pCurLayer->sLayerInfo.pSpsP->iNumRefFrames) { + pCurSliceHeader->uiRefCount <= pCurLayer->sLayerInfo.pSpsP->iNumRefFrames) { pCurSliceHeader->bNumRefIdxActiveOverrideFlag = true; pCurSliceHeader->uiNumRefIdxL0Active = pCurSliceHeader->uiRefCount; } diff --git a/chromium/third_party/openh264/src/codec/encoder/meson.build b/chromium/third_party/openh264/src/codec/encoder/meson.build index 4e5c3314b6c..5f46854510b 100644 --- a/chromium/third_party/openh264/src/codec/encoder/meson.build +++ b/chromium/third_party/openh264/src/codec/encoder/meson.build @@ -33,19 +33,41 @@ cpp_sources = [ 'plus/src/welsEncoderExt.cpp', ] -asm_sources = [ - 'core/x86/coeff.asm', - 'core/x86/dct.asm', - 'core/x86/intra_pred.asm', - 'core/x86/matrix_transpose.asm', - 'core/x86/memzero.asm', - 'core/x86/quant.asm', - 'core/x86/sample_sc.asm', - 'core/x86/score.asm', -] - -objs_asm = asm_gen.process(asm_sources) +objs_asm = [] +if ['x86', 'x86_64'].contains(cpu_family) + asm_sources = [ + 'core/x86/coeff.asm', + 'core/x86/dct.asm', + 'core/x86/intra_pred.asm', + 'core/x86/matrix_transpose.asm', + 'core/x86/memzero.asm', + 'core/x86/quant.asm', + 'core/x86/sample_sc.asm', + 'core/x86/score.asm', + ] + objs_asm = asm_gen.process(asm_sources) +elif cpu_family == 'arm' + cpp_sources += [ + 'core/arm/intra_pred_neon.S', + 'core/arm/intra_pred_sad_3_opt_neon.S', + 'core/arm/memory_neon.S', + 'core/arm/pixel_neon.S', + 'core/arm/reconstruct_neon.S', + 'core/arm/svc_motion_estimation.S', + ] +elif cpu_family == 'aarch64' + cpp_sources += [ + 'core/arm64/intra_pred_aarch64_neon.S', + 'core/arm64/intra_pred_sad_3_opt_aarch64_neon.S', + 'core/arm64/memory_aarch64_neon.S', + 'core/arm64/pixel_aarch64_neon.S', + 'core/arm64/reconstruct_aarch64_neon.S', + 'core/arm64/svc_motion_estimation_aarch64_neon.S', + ] +else + error('Unsupported cpu family @0@'.format(cpu_family)) +endif libencoder = static_library('encoder', cpp_sources, objs_asm, - include_directories: [inc, processing_inc, encoder_inc], + include_directories: [inc, processing_inc, encoder_inc, casm_inc], dependencies: deps) diff --git a/chromium/third_party/openh264/src/codec/encoder/targets.mk b/chromium/third_party/openh264/src/codec/encoder/targets.mk index 1f053280e1e..4fb2e690ea4 100644 --- a/chromium/third_party/openh264/src/codec/encoder/targets.mk +++ b/chromium/third_party/openh264/src/codec/encoder/targets.mk @@ -82,16 +82,26 @@ ENCODER_OBJS += $(ENCODER_OBJSARM64) endif OBJS += $(ENCODER_OBJSARM64) -ENCODER_ASM_MIPS_SRCS=\ +ENCODER_ASM_MIPS_MMI_SRCS=\ $(ENCODER_SRCDIR)/core/mips/dct_mmi.c\ $(ENCODER_SRCDIR)/core/mips/quant_mmi.c\ $(ENCODER_SRCDIR)/core/mips/score_mmi.c\ -ENCODER_OBJSMIPS += $(ENCODER_ASM_MIPS_SRCS:.c=.$(OBJ)) +ENCODER_OBJSMIPS_MMI += $(ENCODER_ASM_MIPS_MMI_SRCS:.c=.$(OBJ)) + +ENCODER_ASM_MIPS_MSA_SRCS=\ + +ENCODER_OBJSMIPS_MSA += $(ENCODER_ASM_MIPS_MSA_SRCS:.c=.$(OBJ)) ifeq ($(ASM_ARCH), mips) -ENCODER_OBJS += $(ENCODER_OBJSMIPS) +ifeq ($(ENABLE_MMI), Yes) +ENCODER_OBJS += $(ENCODER_OBJSMIPS_MMI) +endif +ifeq ($(ENABLE_MSA), Yes) +ENCODER_OBJS += $(ENCODER_OBJSMIPS_MSA) +endif endif -OBJS += $(ENCODER_OBJSMIPS) +OBJS += $(ENCODER_OBJSMIPS_MMI) +OBJS += $(ENCODER_OBJSMIPS_MSA) OBJS += $(ENCODER_OBJS) diff --git a/chromium/third_party/openh264/src/codec/meson.build b/chromium/third_party/openh264/src/codec/meson.build index 7b610d24c4d..7a427f498df 100644 --- a/chromium/third_party/openh264/src/codec/meson.build +++ b/chromium/third_party/openh264/src/codec/meson.build @@ -2,5 +2,8 @@ subdir('common') subdir('decoder') subdir('encoder') subdir('processing') -subdir('console') +if not ['android', 'ios'].contains(system) + # also disabled in the Makefile for these platforms + subdir('console') +endif subdir('api') diff --git a/chromium/third_party/openh264/src/codec/processing/meson.build b/chromium/third_party/openh264/src/codec/processing/meson.build index b7560e3d69a..d38dfb1f2ef 100644 --- a/chromium/third_party/openh264/src/codec/processing/meson.build +++ b/chromium/third_party/openh264/src/codec/processing/meson.build @@ -18,14 +18,32 @@ cpp_sources = [ 'src/vaacalc/vaacalculation.cpp', ] -asm_sources = [ - 'src/x86/denoisefilter.asm', - 'src/x86/downsample_bilinear.asm', - 'src/x86/vaa.asm', -] - -objs_asm = asm_gen.process(asm_sources) +objs_asm = [] +if ['x86', 'x86_64'].contains(cpu_family) + asm_sources = [ + 'src/x86/denoisefilter.asm', + 'src/x86/downsample_bilinear.asm', + 'src/x86/vaa.asm', + ] + objs_asm = asm_gen.process(asm_sources) +elif cpu_family == 'arm' + cpp_sources += [ + 'src/arm/adaptive_quantization.S', + 'src/arm/down_sample_neon.S', + 'src/arm/pixel_sad_neon.S', + 'src/arm/vaa_calc_neon.S', + ] +elif cpu_family == 'aarch64' + cpp_sources += [ + 'src/arm64/adaptive_quantization_aarch64_neon.S', + 'src/arm64/down_sample_aarch64_neon.S', + 'src/arm64/pixel_sad_aarch64_neon.S', + 'src/arm64/vaa_calc_aarch64_neon.S', + ] +else + error('Unsupported cpu family @0@'.format(cpu_family)) +endif libprocessing = static_library('processing', cpp_sources, objs_asm, - include_directories: [inc, processing_inc], + include_directories: [inc, processing_inc, casm_inc], dependencies: deps) diff --git a/chromium/third_party/openh264/src/codec/processing/src/scenechangedetection/SceneChangeDetection.h b/chromium/third_party/openh264/src/codec/processing/src/scenechangedetection/SceneChangeDetection.h index ae146cff71b..78c225ee795 100644 --- a/chromium/third_party/openh264/src/codec/processing/src/scenechangedetection/SceneChangeDetection.h +++ b/chromium/third_party/openh264/src/codec/processing/src/scenechangedetection/SceneChangeDetection.h @@ -89,6 +89,12 @@ class CSceneChangeDetectorVideo { } #endif +#ifdef HAVE_MMI + if (iCpuFlag & WELS_CPU_MMI) { + m_pfSad = WelsSampleSad8x8_mmi; + } +#endif + m_fSceneChangeMotionRatioLarge = SCENE_CHANGE_MOTION_RATIO_LARGE_VIDEO; m_fSceneChangeMotionRatioMedium = SCENE_CHANGE_MOTION_RATIO_MEDIUM; } diff --git a/chromium/third_party/openh264/src/codec/processing/targets.mk b/chromium/third_party/openh264/src/codec/processing/targets.mk index 300de2d803b..0f8873335aa 100644 --- a/chromium/third_party/openh264/src/codec/processing/targets.mk +++ b/chromium/third_party/openh264/src/codec/processing/targets.mk @@ -58,14 +58,24 @@ PROCESSING_OBJS += $(PROCESSING_OBJSARM64) endif OBJS += $(PROCESSING_OBJSARM64) -PROCESSING_ASM_MIPS_SRCS=\ +PROCESSING_ASM_MIPS_MMI_SRCS=\ $(PROCESSING_SRCDIR)/src/mips/vaa_mmi.c\ -PROCESSING_OBJSMIPS += $(PROCESSING_ASM_MIPS_SRCS:.c=.$(OBJ)) +PROCESSING_OBJSMIPS_MMI += $(PROCESSING_ASM_MIPS_MMI_SRCS:.c=.$(OBJ)) + +PROCESSING_ASM_MIPS_MSA_SRCS=\ + +PROCESSING_OBJSMIPS_MSA += $(PROCESSING_ASM_MIPS_MSA_SRCS:.c=.$(OBJ)) ifeq ($(ASM_ARCH), mips) -PROCESSING_OBJS += $(PROCESSING_OBJSMIPS) +ifeq ($(ENABLE_MMI), Yes) +PROCESSING_OBJS += $(PROCESSING_OBJSMIPS_MMI) +endif +ifeq ($(ENABLE_MSA), Yes) +PROCESSING_OBJS += $(PROCESSING_OBJSMIPS_MSA) +endif endif -OBJS += $(PROCESSING_OBJSMIPS) +OBJS += $(PROCESSING_OBJSMIPS_MMI) +OBJS += $(PROCESSING_OBJSMIPS_MSA) OBJS += $(PROCESSING_OBJS) diff --git a/chromium/third_party/openh264/src/gmpopenh264.info b/chromium/third_party/openh264/src/gmpopenh264.info index ad01420f0d9..7a666efb8be 100644 --- a/chromium/third_party/openh264/src/gmpopenh264.info +++ b/chromium/third_party/openh264/src/gmpopenh264.info @@ -1,4 +1,4 @@ Name: gmpopenh264 Description: GMP Plugin for OpenH264. -Version: 1.8.0 +Version: 2.1.0 APIs: encode-video[h264], decode-video[h264] diff --git a/chromium/third_party/openh264/src/include/wels/meson.build b/chromium/third_party/openh264/src/include/wels/meson.build index 1b0049222a5..73fcef3acfe 100644 --- a/chromium/third_party/openh264/src/include/wels/meson.build +++ b/chromium/third_party/openh264/src/include/wels/meson.build @@ -4,5 +4,5 @@ foreach header : api_headers api_header_deps += configure_file( input : header[1], output : header[0], - configuration : configuration_data()) + copy : true) endforeach diff --git a/chromium/third_party/openh264/src/meson.build b/chromium/third_party/openh264/src/meson.build index c5793dbca68..a8692285743 100644 --- a/chromium/third_party/openh264/src/meson.build +++ b/chromium/third_party/openh264/src/meson.build @@ -1,10 +1,10 @@ project('openh264', ['c', 'cpp'], - version : '1.8.0', - meson_version : '>= 0.43', + version : '2.1.0', + meson_version : '>= 0.47', default_options : [ 'warning_level=1', 'buildtype=debugoptimized' ]) -major_version = '4' +major_version = '6' cpp = meson.get_compiler('cpp') @@ -36,8 +36,6 @@ encoder_inc = include_directories([ join_paths('codec', 'encoder', 'plus', 'inc'), ]) -asm_inc = join_paths(meson.current_source_dir(), 'codec', 'common', 'x86', '') - nasm = find_program('nasm', 'nasm.exe') system = host_machine.system() @@ -54,24 +52,52 @@ deps = [dependency('threads')] c_args = [] cpp_args = [] asm_args = [] - -if system == 'linux' +asm_inc = [] +casm_inc = [] +cpp_lib = '-lstdc++' + +# TODO: should rely on dependency('threads') instead and change the pkg-config +# generator below +pthread_dep = cpp.find_library('pthread', required : false) +libm_dep = cpp.find_library('libm', required : false) +deps += [libm_dep] + +if ['linux', 'android', 'ios', 'darwin'].contains(system) + asm_format32 = 'elf' + asm_format64 = 'elf64' + if ['ios', 'darwin'].contains(system) + asm_format32 = 'macho32' + asm_format64 = 'macho64' + endif if cpu_family == 'x86' - asm_format = 'elf' - asm_args += ['-DX86_32'] - add_project_arguments('-DX86_32_ASM', language: 'c') + asm_format = asm_format32 + asm_args += ['-DX86_32', '-DHAVE_AVX2'] + add_project_arguments('-DHAVE_AVX2', language: 'cpp') + add_project_arguments('-DHAVE_AVX2', '-DX86_ASM', '-DX86_32_ASM', language: 'c') + asm_inc = join_paths(meson.current_source_dir(), 'codec', 'common', 'x86', '') elif cpu_family == 'x86_64' - asm_format = 'elf64' - asm_args += ['-DUNIX64'] + asm_format = asm_format64 + asm_args += ['-DUNIX64', '-DHAVE_AVX2'] + add_project_arguments('-DHAVE_AVX2', language: 'cpp') + add_project_arguments('-DHAVE_AVX2', '-DX86_ASM', language: 'c') + asm_inc = join_paths(meson.current_source_dir(), 'codec', 'common', 'x86', '') + elif cpu_family == 'arm' + asm_format = asm_format32 + add_project_arguments('-DHAVE_NEON', language: 'c') + add_project_arguments('-DHAVE_NEON', language: 'c') + casm_inc = include_directories(join_paths('codec', 'common', 'arm')) + elif cpu_family == 'aarch64' + asm_format = asm_format64 + add_project_arguments('-DHAVE_NEON_ARM64', language: 'c') + add_project_arguments('-DHAVE_NEON_ARM64', language: 'cpp') + casm_inc = include_directories(join_paths('codec', 'common', 'arm64')) else - error ('FIXME: unhandled CPU family @0@ for Linux'.format(cpu_family)) + error ('FIXME: unhandled CPU family @0@ for @1@'.format(cpu_family, system)) endif - deps += [cpp.find_library('libm')] - - asm_args += ['-DHAVE_AVX2'] - add_project_arguments('-DHAVE_AVX2', language: 'cpp') - add_project_arguments('-DHAVE_AVX2', '-DX86_ASM', language: 'c') + if ['ios', 'darwin', 'android'].contains(system) + cpp_lib = '-lc++' + endif elif system == 'windows' if cpu_family == 'x86' asm_format = 'win32' @@ -82,17 +108,20 @@ elif system == 'windows' else error ('FIXME: unhandled CPU family @0@ for Windows'.format(cpu_family)) endif + asm_inc = join_paths(meson.current_source_dir(), 'codec', 'common', 'x86', '') else error ('FIXME: Unhandled system @0@'.format(system)) endif -asm_gen = generator(nasm, - output : '@BASENAME@.o', - arguments : [ - '-f', asm_format, - '-i', asm_inc, - '@INPUT@', - '-o', '@OUTPUT@'] + asm_args) +if ['x86', 'x86_64'].contains(cpu_family) + asm_gen = generator(nasm, + output : '@BASENAME@.o', + arguments : [ + '-f', asm_format, + '-i', asm_inc, + '@INPUT@', + '-o', '@OUTPUT@'] + asm_args) +endif api_headers = [] api_header_deps = [] @@ -112,6 +141,7 @@ libopenh264_shared = shared_library('openh264', install: true, soversion: major_version, version: meson.project_version(), + vs_module_defs: 'openh264.def', dependencies: deps) libopenh264_static = static_library('openh264', @@ -124,19 +154,23 @@ pkg_install_dir = '@0@/pkgconfig'.format(get_option('libdir')) foreach t : ['', '-static'] pkgconf = configuration_data() pkgconf.set('prefix', join_paths(get_option('prefix'))) + pkgconf.set('libdir', '${prefix}/@0@'.format(get_option('libdir'))) pkgconf.set('VERSION', meson.project_version()) + pkglibs = cpp_lib + if libm_dep.found() + pkglibs += ' -lm' + endif + if pthread_dep.found() + pkglibs += ' -lpthread' + endif if t == '-static' - do_install = false - pkgconf.set('LIBS', '-lstdc++ -lpthread -lm') + pkgconf.set('LIBS', pkglibs) pkgconf.set('LIBS_PRIVATE', '') else - do_install = true pkgconf.set('LIBS', '') - pkgconf.set('LIBS_PRIVATE', '-lstdc++ -lpthread -lm') + pkgconf.set('LIBS_PRIVATE', pkglibs) endif - message('do_install: @0@'.format(do_install)) - configure_file( input: 'openh264.pc.in', output: 'openh264@0@.pc'.format(t), diff --git a/chromium/third_party/openh264/src/meson_options.txt b/chromium/third_party/openh264/src/meson_options.txt new file mode 100644 index 00000000000..a2c14d168b5 --- /dev/null +++ b/chromium/third_party/openh264/src/meson_options.txt @@ -0,0 +1 @@ +option('tests', type : 'feature', value : 'auto', yield : true) diff --git a/chromium/third_party/openh264/src/openh264.pc.in b/chromium/third_party/openh264/src/openh264.pc.in index 7fb5d0c13eb..f86225c0bca 100644 --- a/chromium/third_party/openh264/src/openh264.pc.in +++ b/chromium/third_party/openh264/src/openh264.pc.in @@ -1,5 +1,5 @@ prefix=@prefix@ -libdir=${prefix}/lib +libdir=@libdir@ includedir=${prefix}/include Name: OpenH264 diff --git a/chromium/third_party/openh264/src/openh264.rc b/chromium/third_party/openh264/src/openh264.rc index d06a147066b..7ff7ad803b3 100644 --- a/chromium/third_party/openh264/src/openh264.rc +++ b/chromium/third_party/openh264/src/openh264.rc @@ -24,8 +24,8 @@ LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US // VS_VERSION_INFO VERSIONINFO - FILEVERSION 1,9,0,1806 - PRODUCTVERSION 1,9,0,1806 + FILEVERSION 2,1,0,2002 + PRODUCTVERSION 2,1,0,2002 FILEFLAGSMASK 0x3fL #ifdef _DEBUG FILEFLAGS 0x1L @@ -43,12 +43,12 @@ BEGIN VALUE "Comments", "Cisco OpenH264 codec" VALUE "CompanyName", "Cisco Systems Inc." VALUE "FileDescription", "Cisco OpenH264 codec" - VALUE "FileVersion", "1.9.0.1806" + VALUE "FileVersion", "2.1.0.2002" VALUE "InternalName", "openh264.dll" VALUE "LegalCopyright", "© 2011-2015 Cisco and/or its affiliates. All rights reserved." VALUE "OriginalFilename", "openh264.dll" VALUE "ProductName", "Cisco OpenH264 codec" - VALUE "ProductVersion", "1.9.0.1806" + VALUE "ProductVersion", "2.1.0.2002" END END BLOCK "VarFileInfo" |